-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmaster_list.sql
More file actions
3382 lines (2776 loc) · 83 KB
/
master_list.sql
File metadata and controls
3382 lines (2776 loc) · 83 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/* The following questions are queried using data collected about soccer matches in Europe.
There are three tables in the data set: country (with just the country code to country), teams (giving the team code and long/short names), and matches (with all the match data, including who's playing (by code), home/away score, etc.).
This data was then partitioned off by country, resulting in tables for each country (e.g., matches_spain, teams_germany, etc.).
The following queries were used to answer the given questions.
I've listed the queries in reverse order of difficulty (focusing on the full-data queries first and then looking at the country based ones). */
-- Identify the number of matches played by each country during the three different seasons.
-- Method #1: Using CASE (all dialects)
SELECT
c.name AS country,
COUNT(
CASE
WHEN m.season='2012/2013' THEN m.ID
ELSE NULL
END
) AS matches_2012_2013,
COUNT(
CASE
WHEN m.season='2013/2014' THEN m.ID
ELSE NULL
END
) AS matches_2013_2014,COUNT(
CASE
WHEN m.season='2014/2015' THEN m.ID
ELSE NULL
END
) AS matches_2014_2015
FROM country AS c
LEFT JOIN matches AS m
ON c.id=m.country_id
GROUP BY country
-- Method #2: Using FILTER (not available in MySQL)
SELECT
c.name AS country,
COUNT(m.season) FILTER(WHERE m.season='2012/2013') AS matches_2012_2013,
COUNT(m.season) FILTER(WHERE m.season='2013/2014') AS matches_2013_2014,
COUNT(m.season) FILTER(WHERE m.season='2014/2015') AS matches_2014_2015
FROM country AS c
LEFT JOIN match AS m
ON c.id=m.country_id
GROUP BY country;
-- Count of home wins, away wins, and ties in each country
SELECT
c.name AS country,
COUNT(
CASE
WHEN m.home_goal > m.away_goal THEN m.id
END) AS home_wins,
COUNT(
CASE
WHEN m.home_goal < m.away_goal THEN m.id
END) AS away_wins,
COUNT(
CASE
WHEN m.home_goal = m.away_goal THEN m.id
END) AS ties
FROM country AS c
LEFT JOIN matches AS m
ON c.id = m.country_id
GROUP BY country;
-- Calculate the percentage of ties the occur in each country, separated by the 2013-14 and 2014-15 seasons.
SELECT
c.name AS country,
ROUND(AVG(
CASE
WHEN m.season='2013/2014' AND m.home_goal = m.away_goal THEN 1
WHEN m.season='2013/2014' AND m.home_goal != m.away_goal THEN 0
END), 2) AS pct_ties_2013_2014,
ROUND(AVG(
CASE
WHEN m.season='2014/2015' AND m.home_goal = m.away_goal THEN 1
WHEN m.season='2014/2015' AND m.home_goal != m.away_goal THEN 0
END), 2) AS pct_ties_2014_2015
FROM country AS c
LEFT JOIN matches AS m
ON c.id = m.country_id
GROUP BY country;
-- Now total only the home games within each country
SELECT
c.name AS country,
SUM(
CASE
WHEN home_goal>away_goal AND season='2012/2013' THEN 1
ELSE 0
END) AS home_wins_2012_2013,
SUM(
CASE
WHEN home_goal>away_goal AND season='2013/2014' THEN 1
ELSE 0
END) AS home_wins_2013_2014,
SUM(
CASE
WHEN home_goal>away_goal AND season='2014/2015' THEN 1
ELSE 0
END) AS home_wins_2014_2015
FROM match AS m
LEFT JOIN country AS c
ON m.country_id=c.id
GROUP BY country
-- Identify matches in which Bologna won and specify if it was at home or away
-- Identify Bologna's ID
SELECT
team_long_name,
team_api_id
FROM teams_italy
WHERE team_long_name='Bologna';
-- Identify the date/season of Balogna's winning matches
-- Method #1: Using CASE as directed
SELECT
season,
date,
home_goal,
away_goal
FROM matches_italy
WHERE
-- Exclude games not won by Bologna
CASE WHEN hometeam_id = 9857 AND home_goal > away_goal THEN 'Bologna Win'
WHEN awayteam_id = 9857 AND away_goal > home_goal THEN 'Bologna Win'
END IS NOT NULL;
-- Method #2: Clearer way to code it with just boolean operators
SELECT
season,
date,
home_goal,
away_goal
FROM matches_italy
WHERE
home_goal>away_goal AND hometeam_id=9857
OR home_goal<away_goal AND awayteam_id=9857
-- Query a list of matches played between the two rivals, Barcelona and Real Madrid, in El Clásico matches, indicating who was home/away, and won in each.
WITH el_clasico AS (
SELECT
date,
CASE
WHEN hometeam_id = 8634 THEN 'FC Barcelona'
ELSE 'Real Madrid CF'
END AS home,
CASE
WHEN awayteam_id = 8634 THEN 'FC Barcelona'
ELSE 'Real Madrid CF'
END AS away,
home_goal,
away_goal
FROM matches_spain
WHERE (awayteam_id = 8634 OR hometeam_id = 8634)
AND (awayteam_id = 8633 OR hometeam_id = 8633))
SELECT
date,
home,
away,
CASE
WHEN home_goal > away_goal THEN home
WHEN away_goal > home_goal THEN away
ELSE 'Tie'
END AS victor
FROM el_clasico
ORDER BY date;
-- Identify the win/loss status of Barcelona's matches.
SELECT
m.date,
t.team_long_name AS opponent,
CASE
WHEN m.home_goal>m.away_goal THEN 'Barcelona win'
WHEN m.home_goal<m.away_goal THEN 'Barcelona loss'
ELSE 'Tie'
END AS outcome
FROM matches_spain AS m
LEFT JOIN teams_spain AS t
ON m.awayteam_Id=t.team_api_Id
WHERE m.hometeam_Id=8634
UNION
SELECT
m.date,
t.team_long_name AS opponent,
CASE
WHEN m.home_goal<m.away_goal THEN 'Barcelona win'
WHEN m.home_goal>m.away_goal THEN 'Barcelona loss'
ELSE 'Tie'
END AS outcome
FROM matches_spain AS m
LEFT JOIN teams_spain AS t
ON m.hometeam_id=t.team_api_Id
WHERE m.awayteam_id=8634;
-- Count the number of matches that FC Schalke 04 and FC Bayern Munich have each played at home using the data split across the filtered teams_germany and matches_germany datasets.
-- Identifying the corresponding API id
SELECT
team_api_id,
team_long_name
FROM teams_germany
WHERE team_long_name IN ('FC Schalke 04', 'FC Bayern Munich');
-- Counting the numbers of each team's home games
SELECT
CASE
WHEN hometeam_id=10189 THEN 'FC Schalke 04'
WHEN hometeam_id=9823 THEN 'FC Bayern Munich'
ELSE 'Other'
END AS home_team,
COUNT(id) AS total_matches
FROM matches_germany
GROUP BY home_team;
-- Find the currencies used by the countries in Oceania
SELECT basic_unit
FROM currencies
WHERE code IN (
SELECT code
FROM countries
WHERE continent='Oceania'
);
-- Identify Oceanic countries listed in countries table but not in currencies table
SELECT
code,
name
FROM countries
WHERE continent = 'Oceania'
AND code NOT IN (
SELECT code
FROM currencies
);
-- Identify which countries had higher average life expectancies (more than 1.15x) in 2015
SELECT *
FROM populations
WHERE year = 2015
AND life_expectancy > 1.15 *
(SELECT AVG(life_expectancy)
FROM populations
WHERE year = 2015) ;
-- Identify the largest city populations of only capital cities
SELECT
name,
urbanarea_pop
FROM cities
WHERE name IN (
SELECT capital
FROM countries
)
ORDER BY urbanarea_pop DESC;
-- Identify the countries with the most documented city populations in them
--Method 1: Joins
SELECT
c1.name AS country,
count(c2.name) AS cities_num
FROM countries AS c1
LEFT JOIN cities AS c2
ON c1.code=c2.country_code
GROUP BY c1.name
ORDER BY
cities_num DESC,
country
LIMIT 9;
--Method 2: Subquery
SELECT
countries.name AS country,
(
SELECT count(*)
FROM cities
WHERE cities.country_code=countries.code
) AS cities_num
FROM countries
ORDER BY cities_num DESC, country
LIMIT 9;
-- Identify the number of languages spoken in each country, identifying with its local name
--Method #1: Subquery within SELECT (same answer but more processing power)
SELECT
(
SELECT local_name
FROM countries
WHERE languages.code=countries.code
),
count(name) AS lang_num
FROM languages
GROUP BY code
ORDER BY lang_num DESC;
--Method #2: Subquery within SELECT
SELECT
local_name,
lang_num
FROM
countries,
(SELECT code, COUNT(*) AS lang_num
FROM languages
GROUP BY code) AS sub
-- Where codes match
WHERE countries.code=sub.code
ORDER BY lang_num DESC;
-- Identify the 2015 inflation and unemployment rate for Republics and Monarchies
-- Method #1: Subquery in FROM statement (my solution)
SELECT
economies.code,
inflation_rate,
unemployment_rate
FROM
economies,
(
SELECT *
FROM countries
WHERE
gov_form IN ('Republic','Monarchy')
) AS sub
WHERE economies.code=sub.code
AND year=2015
ORDER BY inflation_rate;
-- Method #2: Subquery in WHERE (their solution, which I came to once hearing the "WHERE" part)
SELECt
code,
inflation_rate,
unemployment_rate
FROM economies
WHERE year = 2015
AND code IN
(
SELECT code
FROM countries
WHERE gov_form LIKE '%Republic%'
OR gov_form LIKE '%Monarchy%'
)
ORDER BY inflation_rate;
-- Your task is to determine the top 10 capital cities in Europe and the Americas by city_perc, a metric you'll calculate. city_perc is a percentage that calculates the "proper" population in a city as a percentage of the total population in the wider metro area, as follows: city_proper_pop / metroarea_pop * 100
SELECT
name AS city,
country_code,
city_proper_pop,
metroarea_pop,
city_proper_pop/metroarea_pop * 100 AS city_perc
FROM
cities
WHERE name IN (
SELECT capital
FROM countries
WHERE
continent = 'Europe'
OR continent LIKE '%America'
) AND metroarea_pop IS NOT NULL
ORDER BY city_perc DESC
LIMIT 10;
/* The following questions are queried using data collected about soccer matches in Europe.
There are four tables in the data set: country (with just the country code to country), teams (giving the team code and long/short names), leagues (giving the league inforamtion for the teams), and matches (with all the match data, including who's playing (by code), home/away score, etc.).
The following queries were used to answer the given questions.
I've listed the queries in reverse order of completion in order to show more advance queries first */
-- Choosing between JOINs, subqueries, nested subqueries, and CTEs
-- Create a table that includes the date, both team names, and both team scores using all the various methods you know (JOINs, subqueries, correlated subqueries, CTEs)
-- My Method: Two JOINs together -- seems easiest? Less code, less computing?
SELECT
m.date,
home.team_long_name AS hometeam,
away.team_long_name AS awayteam,
m.home_goal,
m.away_goal
FROM match AS m
LEFT JOIN team AS home
ON m.hometeam_id=home.team_api_id
LEFT JOIN team AS away
ON m.awayteam_id=away.team_api_id;
-- Method #1: Subqueries (with JOINs within) -- more complicated writing, simple computing
SELECT
m.date,
home.hometeam,
away.awayteam,
m.home_goal,
m.away_goal
FROM match AS m
LEFT JOIN (
-- Using subquery to get hometeam names
SELECT
m.id,
t.team_long_name AS hometeam
FROM match AS m
LEFT JOIN team AS t
ON m.hometeam_id = t.team_api_id
) AS home
ON m.id=home.id
LEFT JOIN (
-- Using subquery to get awayteam names
SELECT
m.id,
t.team_long_name AS awayteam
FROM match AS m
LEFT JOIN team AS t
ON m.awayteam_id = t.team_api_id
) AS away
ON m.id=away.id;
-- Method #2: Correlated subquery (more clearly written, more computing power needed)
SELECT
m.date,
(
SELECT t.team_long_name
FROM team AS t
WHERE t.team_api_id=m.hometeam_id
) AS hometeam,
(
SELECT t.team_long_name
FROM team AS t
WHERE t.team_api_id=m.awayteam_id
) AS awayteam,
m.home_goal,
m.away_goal
FROM match AS m;
-- Method #3: CTEs
WITH home AS (
SELECT
m.id,
team_long_name AS hometeam
FROM team AS t
INNER JOIN match AS m
ON t.team_api_id=m.hometeam_id
),
away AS(
SELECT
m.id,
team_long_name AS awayteam
FROM team AS t
INNER JOIN match AS m
ON t.team_api_id=m.awayteam_id
)
SELECT
m.date,
home.hometeam AS hometeam,
away.awayteam AS awayteam,
m.home_goal,
m.away_goal
FROM match AS m
LEFT JOIN home
USING(id)
LEFT JOIN away
USING(id);
-- CTEs
-- Use a CTE to find the average number of total goals scored in games in August of the 2013-2014 season by league
-- Method #1: Clearer way
WITH match_list AS (
SELECT country_id,
(home_goal+away_goal) AS total_goals
FROM match
WHERE
EXTRACT(MONTH FROM date)=08
AND season='2013/2014'
)
SELECT
l.name AS league,
round(avg(total_goals), 2) AS aug_2013_avg_goals
FROM match_list
LEFT JOIN league AS l
USING(country_id)
GROUP BY l.name;
-- Method #2: CTE with subquery (what datacamp wanted me to do)
WITH match_list AS (
SELECT country_id,
(home_goal+away_goal) AS total_goals
FROM match
WHERE id IN (
SELECT id
FROM match
WHERE season='2013/2014' AND EXTRACT(MONTH FROM date)=08)
)
SELECT
l.name AS league,
round(avg(total_goals), 2) AS aug_2013_avg_goals
FROM match_list
LEFT JOIN league AS l
USING(country_id)
GROUP BY l.name;
-- Use a CTE to find the number of times each league has played in matches with 10 or more goals
-- Method #1: Join in the main query
WITH match_list AS (
SELECT country_id, id
FROM match
WHERE (home_goal+away_goal)>=10
)
SELECT
l.name AS league,
COUNT(match_list.id) AS matches
FROM league AS l
LEFT JOIN match_list
ON l.country_id=match_list.country_id
GROUP BY l.name;
-- METHOD #2: Join in the CTE
WITH match_list AS (
SELECT
l.name AS league,
date,
home_goal,
away_goal,
(home_goal+away_goal) AS total_goals
FROM match AS m
LEFT JOIN league AS l
USING(country_id)
)
SELECT
league,
date,
home_goal,
away_goal
FROM match_list
WHERE total_goals>=10;
-- NESTED SUBQUERIES
-- Use nested subqueries to answer this question: How do the average number of matches per season where a team scored 5 or more goals differ by country?
SELECT
c.name AS country,
round(avg(outer_s.matches),2) AS avg_seasonal_high_scores
FROM country AS c
LEFT JOIN (
-- Use subquery to create derived table of a count of each country's total number of high scoring games
SELECT
country_id,
season,
count(id) AS matches
FROM (
-- Use nested subquery to create derived table of matches with one side's score as 5 or more
SELECT
country_id,
season,
id
FROM match
WHERE
home_goal>=5
OR away_goal>=5
) AS sub
GROUP BY
country_id,
season
) AS outer_s
ON c.id=outer_s.country_id
GROUP BY c.name;
-- Use nested subqueries to identify the max number of goals in each season, the max number of goals overall, and the max number goals scored in July.
SELECT
season,
MAX(home_goal+away_goal) AS max_goals,
-- Get max goals for all matches
(SELECT MAX(home_goal+away_goal) FROM match) AS overall_max_goals,
-- Get max goals for matches in July
(
SELECT MAX(home_goal+away_goal)
FROM match
WHERE id IN (
SELECT id
FROM match
WHERE EXTRACT(MONTH FROM date)=07
)
) AS july_max_goals
FROM match
GROUP BY season;
-- CORRELATED SUBQUERIES
-- Use a correlated subquery to identify matches with total scores equaling the max number of goals in a match
SELECT
main.country_id,
main.date,
main.home_goal,
main.away_goal
FROM match AS main
WHERE (main.home_goal+main.away_goal)=(
-- Correlated subquery to find the matches whose total goals equal the max goals of the table
SELECT MAX(sub.home_goal+sub.away_goal)
FROM match AS sub
WHERE
main.country_id=sub.country_id
AND main.season=sub.season
);
-- Use a correlated subquery to identify matches with scores that are abnormally high -- more than three times the average match score
SELECT
main.country_id,
main.date,
main.home_goal,
main.away_goal
FROM match AS main
WHERE (home_goal+away_goal)>(
-- Correlated subquery to find the matches whose total goals were more than three times the average total goals
SELECT avg((sub.home_goal+sub.away_goal)*3)
FROM match AS sub
WHERE main.country_id=sub.country_id
);
-- SUBQUERIES IN ALL CLAUSES
-- Calculate the average goals scored in each stage as compared to the overall average, keeping only the stages in which the stage's average goals is greater than overall average goals
SELECT
s.stage,
ROUND(s.avg_goals, 2) AS avg_goals,
-- Subquery to also list the overall average not aggregated by stage
ROUND((
SELECT AVG(home_goal+away_goal) AS overall_avg
FROM match
WHERE season='2012/2013'
), 2)
FROM (
-- Subquery to pull from an aggregated table
SELECT
stage,
AVG(home_goal+away_goal) AS avg_goals
FROM match
WHERE season='2012/2013'
GROUP BY stage
) AS s
WHERE s.avg_goals>(
-- Subquery to filter to only stages that are above the average
SELECT avg(home_goal+away_goal)
FROM match
WHERE season='2012/2013'
)
ORDER BY s.stage ASC;
-- Calculate the average goals scored in each stage, keeping only the stages in which the stage's average goals is greater than overall average goals
SELECT
s.stage,
ROUND(s.avg_goals, 2) AS avg_goals,
FROM (
-- Subquery to pull from an aggregated table
SELECT
stage,
AVG(home_goal+away_goal) AS avg_goals
FROM match
WHERE season='2012/2013'
GROUP BY stage
) AS s
WHERE s.avg_goals>(
-- Subquery to filter to only stages that are above the average
SELECT avg(home_goal+away_goal)
FROM match
WHERE season='2012/2013'
)
ORDER BY s.stage ASC;
-- Create a data set listing the average total of goals in each match stage in the 2012/2013 season as compared to the overall goals
SELECT
stage,
ROUND(avg(home_goal+away_goal), 2) AS avg_goals,
ROUND((
-- Subquery to get overall average outside of group by
SELECT avg(home_goal+away_goal)
FROM match
WHERE season='2012/2013'), 2) AS overall_goals
FROM match
WHERE season='2012/2013'
GROUP BY stage
ORDER BY stage;
-- SUBQUERIES IN SELECT CLAUSE
-- Calculate the average number of goals per match in each country's league and its difference from the overall average, both in 2013-2014
SELECT
l.name AS league,
round(avg(m.home_goal+m.away_goal), 2) AS avg_goals,
round(avg(m.home_goal+m.away_goal)-(
-- Subquery calculates average total goals for 2013-14 season
SELECT
(avg(home_goal+away_goal))
FROM match
WHERE season='2013/2014'
), 2) AS diff
FROM league AS l
LEFT JOIN match AS m
ON l.country_id=m.country_id
WHERE season='2013/2014'
GROUP BY l.name;
-- Calculate the average number of goals per match in each country's league as compared to the overall average
SELECT
l.name AS league,
round(avg(m.home_goal+m.away_goal), 2) AS avg_goals,
round(avg(m.home_goal+m.away_goal)-(
-- Subquery calculates average total goals for 2013-14 season
SELECT
(avg(home_goal+away_goal))
FROM match
WHERE season='2013/2014'
), 2) AS diff
FROM league AS l
LEFT JOIN match AS m
ON l.country_id=m.country_id
WHERE season='2013/2014'
GROUP BY l.name;
-- SUBQUERIES IN SELECT CLAUSE
-- Identify the number of matches played by each country in which there were a total of 10 or more goals.
SELECT
name AS country_name,
count(name) AS matches
FROM country AS c
INNER JOIN (
-- Subquery filters out any games without total scores of more than or equal to 10 goals
SELECT
country_id
FROM match
WHERE (home_goal+away_goal)>=10
) AS sub
ON c.id=sub.country_id
GROUP BY country_name;
-- Identify the country, date, and respective goals from matches in which the total score was above 10.
-- Method #1: Using subquery in the FROM (datacamp method)
SELECT
country,
date,
home_goal,
away_goal
FROM
-- Subquery calculates/saves the total_goals for each match so the outside WHERE statement can use it
(SELECT c.name AS country,
m.date,
m.home_goal,
m.away_goal,
(m.home_goal + m.away_goal) AS total_goals
FROM match AS m
LEFT JOIN country AS c
ON m.country_id = c.id) AS sub
WHERE total_goals >=10;
-- Method #2: Just putting the calculation in the WHERE clause
SELECT c.name AS country,
m.date,
m.home_goal,
m.away_goal
FROM match AS m
LEFT JOIN country AS c
ON m.country_id = c.id
WHERE (m.home_goal + m.away_goal) >=10;
-- SUBQUERIES IN WHERE CLAUSE
-- Identify the teams who have, in a single home game, scored 8 or more points
SELECT
team_long_name,
team_short_name
FROM team
WHERE team_api_id IN (
-- Subquery filters for teams who have scored more than 8 goals at home
SELECT hometeam_id
FROM match
WHERE home_goal>=8
)
-- Identify the teams who have never played a game at home
SELECT
team_long_name,
team_short_name
FROM team
WHERE team_api_id<> NOT IN (
-- Subquery filters to show teams that have played home games according to the match table
SELECT DISTINCT hometeam_id
FROM match
);
-- Identify the games played in which the total score was three times the average total score.
SELECT
date,
home_goal,
away_goal
FROM matches_2013_2014
WHERE (home_goal+away_goal)>(
-- Subquery filters to show matches where the total score was over 3 times the average total score
SELECT avg(home_goal+away_goal)*3
FROM matches_2013_2014
);
/* The following questions are queried using data collected about soccer matches in Europe.
There are four tables in the data set: country (with just the country code to country), teams (giving the team code and long/short names), leagues (giving the league inforamtion for the teams), and matches (with all the match data, including who's playing (by code), home/away score, etc.).
The following queries were used to answer the given questions.
I've listed the queries in reverse order of completion in order to show more advance queries first */
'
-- CASE STUDY: Identify Manchester United's losses in the 2014/2015 season and rank the losses by how much they lost by
-- My method: Use CTE, UNION home and away games, filter for losses, RANK by severity of loss
WITH mu_matches AS (
SELECT
m.date,
m.id,
awayteam_id AS opponent_id,
t.team_long_name,
home_goal AS mu_goal,
away_goal AS opponent_goal,
CASE
WHEN t.team_long_name='Manchester United' THEN 'home'
ELSE 'error'
END AS home_or_away,
CASE
WHEN home_goal>away_goal THEN 'MU Win'
WHEN home_goal<away_goal THEN 'MU Loss'
ELSE 'MU Tie'
END AS outcome
FROM match AS m
LEFT JOIN team AS t
ON m.hometeam_id=t.team_api_id
WHERE
season='2014/2015'
AND t.team_long_name='Manchester United'
UNION
SELECT
m.date,
m.id,
hometeam_id AS opponent_id,
t.team_long_name,
away_goal AS mu_goal,
home_goal AS opponent_goal,
CASE
WHEN t.team_long_name='Manchester United' THEN 'away'
ELSE 'error'
END AS home_or_away,
CASE
WHEN home_goal<away_goal THEN 'MU Win'
WHEN home_goal>away_goal THEN 'MU Loss'
ELSE 'MU Tie'
END AS outcome
FROM match AS m
LEFT JOIN team AS t
ON m.awayteam_id=t.team_api_id
WHERE
season='2014/2015'
AND t.team_long_name='Manchester United'
)
SELECT DISTINCT
date,
t.team_long_name,
home_or_away,
mu_goal,
opponent_goal,
RANK() OVER (
ORDER BY opponent_goal-mu_goal DESC
) AS loss_severity
FROM mu_matches AS mu
LEFT JOIN team AS t
ON mu.opponent_id=t.team_api_id
WHERE outcome='MU Loss';
-- As directed by datacamp (two different CTEs, join with both of them)
-- Set up the home team CTE
WITH home AS (
SELECT m.id,
t.team_long_name,
CASE WHEN m.home_goal > m.away_goal THEN 'MU Win'
WHEN m.home_goal < m.away_goal THEN 'MU Loss'
ELSE 'Tie' END AS outcome
FROM match AS m
LEFT JOIN team AS t ON m.hometeam_id = t.team_api_id),
-- Set up the away team CTE
away AS (
SELECT m.id,
t.team_long_name,
CASE WHEN m.home_goal > m.away_goal THEN 'MU Loss'
WHEN m.home_goal < m.away_goal THEN 'MU Win'
ELSE 'Tie' END AS outcome
FROM match AS m
LEFT JOIN team AS t ON m.awayteam_id = t.team_api_id)
-- Select columns and and rank the matches by goal difference
SELECT DISTINCT
m.date,
home.team_long_name AS home_team,
away.team_long_name AS away_team,
m.home_goal,
m.away_goal,
RANK() OVER(ORDER BY ABS(home_goal - away_goal) DESC) as match_rank
-- Join the CTEs onto the match table
FROM match AS m
LEFT JOIN home ON m.id = home.id
LEFT JOIN away ON m.id = away.id
WHERE m.season = '2014/2015'
AND ((home.team_long_name = 'Manchester United' AND home.outcome = 'MU Loss')