-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathnested_subqueries_and_CTEs.sql
More file actions
502 lines (402 loc) · 11.7 KB
/
nested_subqueries_and_CTEs.sql
File metadata and controls
502 lines (402 loc) · 11.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
/* The following questions are queried using data collected about soccer matches in Europe.
There are four tables in the data set: country (with just the country code to country), teams (giving the team code and long/short names), leagues (giving the league inforamtion for the teams), and matches (with all the match data, including who's playing (by code), home/away score, etc.).
The following queries were used to answer the given questions.
I've listed the queries in reverse order of completion in order to show more advance queries first */
-- Choosing between JOINs, subqueries, nested subqueries, and CTEs
-- Create a table that includes the date, both team names, and both team scores using all the various methods you know (JOINs, subqueries, correlated subqueries, CTEs)
-- My Method: Two JOINs together -- seems easiest? Less code, less computing?
SELECT
m.date,
home.team_long_name AS hometeam,
away.team_long_name AS awayteam,
m.home_goal,
m.away_goal
FROM match AS m
LEFT JOIN team AS home
ON m.hometeam_id=home.team_api_id
LEFT JOIN team AS away
ON m.awayteam_id=away.team_api_id;
-- Method #1: Subqueries (with JOINs within) -- more complicated writing, simple computing
SELECT
m.date,
home.hometeam,
away.awayteam,
m.home_goal,
m.away_goal
FROM match AS m
LEFT JOIN (
-- Using subquery to get hometeam names
SELECT
m.id,
t.team_long_name AS hometeam
FROM match AS m
LEFT JOIN team AS t
ON m.hometeam_id = t.team_api_id
) AS home
ON m.id=home.id
LEFT JOIN (
-- Using subquery to get awayteam names
SELECT
m.id,
t.team_long_name AS awayteam
FROM match AS m
LEFT JOIN team AS t
ON m.awayteam_id = t.team_api_id
) AS away
ON m.id=away.id;
-- Method #2: Correlated subquery (more clearly written, more computing power needed)
SELECT
m.date,
(
SELECT t.team_long_name
FROM team AS t
WHERE t.team_api_id=m.hometeam_id
) AS hometeam,
(
SELECT t.team_long_name
FROM team AS t
WHERE t.team_api_id=m.awayteam_id
) AS awayteam,
m.home_goal,
m.away_goal
FROM match AS m;
-- Method #3: CTEs
WITH home AS (
SELECT
m.id,
team_long_name AS hometeam
FROM team AS t
INNER JOIN match AS m
ON t.team_api_id=m.hometeam_id
),
away AS(
SELECT
m.id,
team_long_name AS awayteam
FROM team AS t
INNER JOIN match AS m
ON t.team_api_id=m.awayteam_id
)
SELECT
m.date,
home.hometeam AS hometeam,
away.awayteam AS awayteam,
m.home_goal,
m.away_goal
FROM match AS m
LEFT JOIN home
USING(id)
LEFT JOIN away
USING(id);
-- CTEs
-- Use a CTE to find the average number of total goals scored in games in August of the 2013-2014 season by league
-- Method #1: Clearer way
WITH match_list AS (
SELECT country_id,
(home_goal+away_goal) AS total_goals
FROM match
WHERE
EXTRACT(MONTH FROM date)=08
AND season='2013/2014'
)
SELECT
l.name AS league,
round(avg(total_goals), 2) AS aug_2013_avg_goals
FROM match_list
LEFT JOIN league AS l
USING(country_id)
GROUP BY l.name;
-- Method #2: CTE with subquery (what datacamp wanted me to do)
WITH match_list AS (
SELECT country_id,
(home_goal+away_goal) AS total_goals
FROM match
WHERE id IN (
SELECT id
FROM match
WHERE season='2013/2014' AND EXTRACT(MONTH FROM date)=08)
)
SELECT
l.name AS league,
round(avg(total_goals), 2) AS aug_2013_avg_goals
FROM match_list
LEFT JOIN league AS l
USING(country_id)
GROUP BY l.name;
-- Use a CTE to find the number of times each league has played in matches with 10 or more goals
-- Method #1: Join in the main query
WITH match_list AS (
SELECT country_id, id
FROM match
WHERE (home_goal+away_goal)>=10
)
SELECT
l.name AS league,
COUNT(match_list.id) AS matches
FROM league AS l
LEFT JOIN match_list
ON l.country_id=match_list.country_id
GROUP BY l.name;
-- METHOD #2: Join in the CTE
WITH match_list AS (
SELECT
l.name AS league,
date,
home_goal,
away_goal,
(home_goal+away_goal) AS total_goals
FROM match AS m
LEFT JOIN league AS l
USING(country_id)
)
SELECT
league,
date,
home_goal,
away_goal
FROM match_list
WHERE total_goals>=10;
-- NESTED SUBQUERIES
-- Use nested subqueries to answer this question: How do the average number of matches per season where a team scored 5 or more goals differ by country?
SELECT
c.name AS country,
round(avg(outer_s.matches),2) AS avg_seasonal_high_scores
FROM country AS c
LEFT JOIN (
-- Use subquery to create derived table of a count of each country's total number of high scoring games
SELECT
country_id,
season,
count(id) AS matches
FROM (
-- Use nested subquery to create derived table of matches with one side's score as 5 or more
SELECT
country_id,
season,
id
FROM match
WHERE
home_goal>=5
OR away_goal>=5
) AS sub
GROUP BY
country_id,
season
) AS outer_s
ON c.id=outer_s.country_id
GROUP BY c.name;
-- Use nested subqueries to identify the max number of goals in each season, the max number of goals overall, and the max number goals scored in July.
SELECT
season,
MAX(home_goal+away_goal) AS max_goals,
-- Get max goals for all matches
(SELECT MAX(home_goal+away_goal) FROM match) AS overall_max_goals,
-- Get max goals for matches in July
(
SELECT MAX(home_goal+away_goal)
FROM match
WHERE id IN (
SELECT id
FROM match
WHERE EXTRACT(MONTH FROM date)=07
)
) AS july_max_goals
FROM match
GROUP BY season;
-- CORRELATED SUBQUERIES
-- Use a correlated subquery to identify matches with total scores equaling the max number of goals in a match
SELECT
main.country_id,
main.date,
main.home_goal,
main.away_goal
FROM match AS main
WHERE (main.home_goal+main.away_goal)=(
-- Correlated subquery to find the matches whose total goals equal the max goals of the table
SELECT MAX(sub.home_goal+sub.away_goal)
FROM match AS sub
WHERE
main.country_id=sub.country_id
AND main.season=sub.season
);
-- Use a correlated subquery to identify matches with scores that are abnormally high -- more than three times the average match score
SELECT
main.country_id,
main.date,
main.home_goal,
main.away_goal
FROM match AS main
WHERE (home_goal+away_goal)>(
-- Correlated subquery to find the matches whose total goals were more than three times the average total goals
SELECT avg((sub.home_goal+sub.away_goal)*3)
FROM match AS sub
WHERE main.country_id=sub.country_id
);
-- SUBQUERIES IN ALL CLAUSES
-- Calculate the average goals scored in each stage as compared to the overall average, keeping only the stages in which the stage's average goals is greater than overall average goals
SELECT
s.stage,
ROUND(s.avg_goals, 2) AS avg_goals,
-- Subquery to also list the overall average not aggregated by stage
ROUND((
SELECT AVG(home_goal+away_goal) AS overall_avg
FROM match
WHERE season='2012/2013'
), 2)
FROM (
-- Subquery to pull from an aggregated table
SELECT
stage,
AVG(home_goal+away_goal) AS avg_goals
FROM match
WHERE season='2012/2013'
GROUP BY stage
) AS s
WHERE s.avg_goals>(
-- Subquery to filter to only stages that are above the average
SELECT avg(home_goal+away_goal)
FROM match
WHERE season='2012/2013'
)
ORDER BY s.stage ASC;
-- Calculate the average goals scored in each stage, keeping only the stages in which the stage's average goals is greater than overall average goals
SELECT
s.stage,
ROUND(s.avg_goals, 2) AS avg_goals,
FROM (
-- Subquery to pull from an aggregated table
SELECT
stage,
AVG(home_goal+away_goal) AS avg_goals
FROM match
WHERE season='2012/2013'
GROUP BY stage
) AS s
WHERE s.avg_goals>(
-- Subquery to filter to only stages that are above the average
SELECT avg(home_goal+away_goal)
FROM match
WHERE season='2012/2013'
)
ORDER BY s.stage ASC;
-- Create a data set listing the average total of goals in each match stage in the 2012/2013 season as compared to the overall goals
SELECT
stage,
ROUND(avg(home_goal+away_goal), 2) AS avg_goals,
ROUND((
-- Subquery to get overall average outside of group by
SELECT avg(home_goal+away_goal)
FROM match
WHERE season='2012/2013'), 2) AS overall_goals
FROM match
WHERE season='2012/2013'
GROUP BY stage
ORDER BY stage;
-- SUBQUERIES IN SELECT CLAUSE
-- Calculate the average number of goals per match in each country's league and its difference from the overall average, both in 2013-2014
SELECT
l.name AS league,
round(avg(m.home_goal+m.away_goal), 2) AS avg_goals,
round(avg(m.home_goal+m.away_goal)-(
-- Subquery calculates average total goals for 2013-14 season
SELECT
(avg(home_goal+away_goal))
FROM match
WHERE season='2013/2014'
), 2) AS diff
FROM league AS l
LEFT JOIN match AS m
ON l.country_id=m.country_id
WHERE season='2013/2014'
GROUP BY l.name;
-- Calculate the average number of goals per match in each country's league as compared to the overall average
SELECT
l.name AS league,
round(avg(m.home_goal+m.away_goal), 2) AS avg_goals,
round(avg(m.home_goal+m.away_goal)-(
-- Subquery calculates average total goals for 2013-14 season
SELECT
(avg(home_goal+away_goal))
FROM match
WHERE season='2013/2014'
), 2) AS diff
FROM league AS l
LEFT JOIN match AS m
ON l.country_id=m.country_id
WHERE season='2013/2014'
GROUP BY l.name;
-- SUBQUERIES IN SELECT CLAUSE
-- Identify the number of matches played by each country in which there were a total of 10 or more goals.
SELECT
name AS country_name,
count(name) AS matches
FROM country AS c
INNER JOIN (
-- Subquery filters out any games without total scores of more than or equal to 10 goals
SELECT
country_id
FROM match
WHERE (home_goal+away_goal)>=10
) AS sub
ON c.id=sub.country_id
GROUP BY country_name;
-- Identify the country, date, and respective goals from matches in which the total score was above 10.
-- Method #1: Using subquery in the FROM (datacamp method)
SELECT
country,
date,
home_goal,
away_goal
FROM
-- Subquery calculates/saves the total_goals for each match so the outside WHERE statement can use it
(SELECT c.name AS country,
m.date,
m.home_goal,
m.away_goal,
(m.home_goal + m.away_goal) AS total_goals
FROM match AS m
LEFT JOIN country AS c
ON m.country_id = c.id) AS sub
WHERE total_goals >=10;
-- Method #2: Just putting the calculation in the WHERE clause
SELECT c.name AS country,
m.date,
m.home_goal,
m.away_goal
FROM match AS m
LEFT JOIN country AS c
ON m.country_id = c.id
WHERE (m.home_goal + m.away_goal) >=10;
-- SUBQUERIES IN WHERE CLAUSE
-- Identify the teams who have, in a single home game, scored 8 or more points
SELECT
team_long_name,
team_short_name
FROM team
WHERE team_api_id IN (
-- Subquery filters for teams who have scored more than 8 goals at home
SELECT hometeam_id
FROM match
WHERE home_goal>=8
)
-- Identify the teams who have never played a game at home
SELECT
team_long_name,
team_short_name
FROM team
WHERE team_api_id<> NOT IN (
-- Subquery filters to show teams that have played home games according to the match table
SELECT DISTINCT hometeam_id
FROM match
);
-- Identify the games played in which the total score was three times the average total score.
SELECT
date,
home_goal,
away_goal
FROM matches_2013_2014
WHERE (home_goal+away_goal)>(
-- Subquery filters to show matches where the total score was over 3 times the average total score
SELECT avg(home_goal+away_goal)*3
FROM matches_2013_2014
);