-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfinal_project.sql
More file actions
206 lines (177 loc) · 6.07 KB
/
Copy pathfinal_project.sql
File metadata and controls
206 lines (177 loc) · 6.07 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
-- Using Postgresql
-- PART I: SCHOOL ANALYSIS
-- 1. View the schools and school details tables
SELECT * FROM schools;
SELECT * FROM school_details;
-- 2. In each decade, how many schools were there that produced players?
SELECT (yearid/10) * 10 AS decade,
COUNT(DISTINCT(schoolid)) AS num_schools
FROM schools
GROUP BY decade
ORDER BY decade;
-- 3. What are the names of the top 5 schools that produced the most players?
SELECT sd.name_full,
COUNT(DISTINCT(s.playerid)) AS num_players
FROM schools s LEFT JOIN school_details sd
ON s.schoolid= sd.schoolid
GROUP BY sd.name_full
ORDER BY num_players DESC
LIMIT 5;
-- 4. For each decade, what were the names of the top 3 schools that produced the most players?
WITH school_counts AS (
SELECT
(s.yearid/10) * 10 AS decade,
sd.name_full AS school_name,
COUNT(distinct(s.playerid)) AS num_players
FROM schools s LEFT JOIN school_details sd
ON s.schoolid = sd.schoolid
GROUP BY decade, sd.name_full
),
ranked AS (
SELECT
decade,
school_name,
num_players,
ROW_NUMBER() OVER (PARTITION BY decade ORDER BY num_players DESC, school_name) AS rn
FROM school_counts
)
SELECT
decade,
school_name,
num_players
FROM ranked
WHERE rn <= 3
ORDER BY decade DESC, rn;
-- PART II: SALARY ANALYSIS
-- 1. View the salaries table
SELECT * FROM salaries;
-- 2. Return the top 20% of teams in terms of average annual spending
WITH ts AS (SELECT yearid,
teamid,
SUM(salary) AS total_spend
FROM salaries
GROUP BY yearid, teamid),
pct AS (SELECT teamid,
AVG(total_spend) AS avg_spend,
NTILE(5) OVER(ORDER BY AVG(total_spend) DESC) AS spend_pct
FROM ts
GROUP BY teamid)
SELECT teamid,
ROUND(avg_spend / 1000000, 1) AS avg_spend_millions
FROM pct
WHERE spend_pct = 1;
-- 3. For each team, show the cumulative sum of spending over the years
WITH ts AS (SELECT yearid,
teamid,
SUM(salary) AS total_spend
FROM salaries
GROUP BY yearid, teamid)
SELECT yearid,
teamid,
ROUND(SUM(total_spend / 1000000.0) OVER(ORDER BY teamid, yearid), 1) AS cumlative_sum_millions
FROM ts;
-- 4. Return the first year that each team's cumulative spending surpassed 1 billion
WITH ts AS (SELECT yearid,
teamid,
SUM(salary) AS total_spend
FROM salaries
GROUP BY yearid, teamid),
csb AS (SELECT yearid,
teamid,
ROUND(SUM(total_spend / 1000000000.0) OVER(PARTITION BY teamid ORDER BY yearid), 2) AS cumlative_sum_billions
FROM ts
ORDER BY teamid),
srnk AS (SELECT teamid,
yearid,
cumlative_sum_billions,
ROW_NUMBER() OVER(PARTITION BY teamid ORDER BY cumlative_sum_billions) AS rnk
FROM csb
WHERE cumlative_sum_billions >= 1)
SELECT teamId,
yearid,
cumlative_sum_billions
FROM srnk
WHERE rnk = 1
ORDER BY teamid, yearid DESC;
-- PART III: PLAYER CAREER ANALYSIS
-- 1. View the players table and find the number of players in the table
SELECT * FROM players;
SELECT COUNT(playerid) AS num_players FROM players;
-- 2. For each player, calculate their age at their first game, their last game, and their career length (all in years). Sort from longest career to shortest career.
WITH fullbd AS (SELECT playerid,
MAKE_DATE(birthyear, birthmonth, birthday) AS bday,
namegiven,
debut,
finalgame
FROM players)
SELECT namegiven,
EXTRACT(YEAR FROM AGE(debut, bday))::INT AS starting_age,
EXTRACT(YEAR FROM AGE(finalgame, bday))::INT AS ending_age,
EXTRACT(YEAR FROM AGE(finalgame, debut))::INT AS career_length
FROM fullbd
WHERE debut IS NOT NULL
ORDER BY career_length DESC;
-- 3. What team did each player play on for their starting and ending years?
SELECT
nameGiven,
EXTRACT(YEAR FROM AGE(debut, make_date(birthYear, birthMonth, birthDay))) AS starting_age,
EXTRACT(YEAR FROM AGE(finalGame, make_date(birthYear, birthMonth, birthDay))) AS ending_age,
EXTRACT(YEAR FROM AGE(finalGame, debut)) AS career_length
FROM players
WHERE debut IS NOT NULL
ORDER BY career_length DESC;
-- 4. How many players started and ended on the same team and also played for over a decade?
SELECT
p.nameGiven,
s.yearID AS starting_year,
s.teamID AS starting_team,
e.yearID AS ending_year,
e.teamID AS ending_team
FROM players p
INNER JOIN salaries s
ON p.playerID = s.playerID
AND EXTRACT(YEAR FROM p.debut) = s.yearID
INNER JOIN salaries e
ON p.playerID = e.playerID
AND EXTRACT(YEAR FROM p.finalGame) = e.yearID
WHERE s.teamID = e.teamID
AND e.yearID - s.yearID > 10;
-- PART IV: PLAYER COMPARISON ANALYSIS
-- 1. View the players table
SELECT * FROM players;
-- 2. Which players have the same birthday?
WITH bd AS (SELECT CAST(CONCAT(birthyear, '-', birthmonth, '-', birthday) AS DATE) AS birthdate,
namegiven
FROM players
WHERE birthyear IS NOT NULL
AND birthmonth IS NOT NULL
AND birthday IS NOT NULL)
SELECT birthdate,
STRING_AGG(namegiven, ',') AS players
FROM bd
WHERE EXTRACT(YEAR FROM birthdate) BETWEEN 1980 AND 1990
GROUP BY birthdate
ORDER BY birthdate;
-- 3. Create a summary table that shows for each team, what percent of players bat right, left and both
SELECT
s.teamid,
ROUND((SUM(CASE WHEN p.bats = 'R' THEN 1 ELSE 0 END) * 100.0) / COUNT(*),1) AS pct_r,
ROUND((SUM(CASE WHEN p.bats = 'L' THEN 1 ELSE 0 END) * 100.0) / COUNT(*),1) AS pct_l,
ROUND((SUM(CASE WHEN p.bats = 'B' THEN 1 ELSE 0 END) * 100.0) / COUNT(*),1) AS pct_b
FROM players p
JOIN salaries s ON p.playerid = s.playerid
GROUP BY s.teamid
ORDER BY s.teamid;
-- 4. How have average height and weight at debut game changed over the years, and what's the decade-over-decade difference?
WITH awh AS (SELECT (EXTRACT(YEAR FROM debut)::int/10) * 10 AS decade,
AVG(weight) AS avg_weight,
AVG(height) AS avg_height
FROM players
WHERE debut IS NOT NULL AND
weight IS NOT NULL AND
height IS NOT NULL
GROUP BY (EXTRACT(YEAR FROM debut)::int/10) * 10)
SELECT decade,
ROUND(avg_height - LAG(avg_height) OVER(ORDER BY decade), 4) AS avg_height_diff,
ROUND(avg_weight - LAG(avg_weight) OVER(ORDER BY decade), 4) AS avg_weight_diff
FROM awh;