-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmanipulating_data_structure.sql
More file actions
349 lines (273 loc) · 9.41 KB
/
manipulating_data_structure.sql
File metadata and controls
349 lines (273 loc) · 9.41 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
-- All data comes from the Sakila database, a fictitional DVD rental company database. Many of these functions are postgreSQL-specific.
-- Generate a data set that we could use to predict whether the words and phrases used to describe a film have an impact on the number of rentals.
SELECT
title,
description,
-- Calculate the similarity
similarity(description, 'Astounding & Drama')
FROM
film
WHERE
to_tsvector(description) @@
to_tsquery('Astounding & Drama')
ORDER BY
similarity(description, 'Astounding & Drama') DESC;
-- Use levenshtein comparison to find the closest match to "JET NEIGHBOR"
SELECT
title,
description,
levenshtein(title, 'JET NEIGHBOR') AS distance
FROM
film
ORDER BY 3
--Check similarity between title and description columns
-- Method #1: similarity
SELECT
title,
description,
similarity(title, description)
FROM
film
-- Load the pg_trgm extension and then verify that it's loaded
CREATE EXTENSION IF NOT EXISTS pg_trgm;
SELECT *
FROM pg_extension;
-- Use the user-created function "inventory_held_by_customer" to create a query to check which movies are currently checked out by a customer
SELECT
f.title,
i.inventory_id,
inventory_held_by_customer(i.inventory_id) as held_by_cust
FROM film as f
INNER JOIN inventory AS i ON f.film_id=i.film_id
WHERE
inventory_held_by_customer(i.inventory_id) IS NOT NULL
-- Select all columns from the pg_type table where the type name is equal to mpaa_rating.
SELECT *
FROM pg_type
WHERE typname='mpaa_rating'
-- Select the column name, data type and udt name columns and filter by the rating column in the film table
SELECT column_name, data_type, udt_name
FROM INFORMATION_SCHEMA.COLUMNS
WHERE table_name ='film' AND column_name='rating';
-- Create an enumerated data type, compass_position, and then confirm that it's in the pg_type system table
CREATE TYPE compass_position AS ENUM (
'North',
'South',
'East',
'West'
);
SELECT typname, typcategory
FROM pg_type
WHERE typname='compass_position';
-- Search for "elf" in the title, retrieving those titles and descriptions
SELECT title, description
FROM film
WHERE to_tsvector(title) @@ to_tsquery('elf');
-- Select the film description as a tsvector
SELECT to_tsvector(description)
FROM film;
-- Concatenate the film and category. Generate a shortened description that doesn't go beyond 50 characters but also doesn't cut off any words.
SELECT
CONCAT(name, ' ', title) AS film_category,
LEFT(description, 50 -
POSITION(
' ' IN REVERSE(
LEFT(description, 50)
)
)
)
FROM
film AS f
INNER JOIN film_category AS fc
ON f.film_id = fc.film_id
INNER JOIN category AS c
ON fc.category_id = c.category_id;
-- Convert the film category name to uppercase and combine it with the title. Truncate the description 50 characters, getting rid of any leading/trailing white spaces.
SELECT
CONCAT(UPPER(c.name), ': ', f.title) AS film_category,
TRIM(LEFT(f.description, 50)) AS film_desc
FROM
film AS f
INNER JOIN film_category AS fc
ON f.film_id = fc.film_id
INNER JOIN category AS c
ON fc.category_id = c.category_id;
-- Generate a combine first/last name using padded text
-- Method #1
SELECT
RPAD(first_name, LENGTH(first_name)+1) || last_name AS full_name
FROM customer;
-- Method #2
SELECT
first_name || LPAD(last_name, LENGTH(last_name)+1) AS full_name
FROM customer;
-- Split the email addresses into the username and the domain name
SELECT
SUBSTR(email, 1, POSITION('@' IN email)-1) AS username,
SUBSTR(email, POSITION('@' IN email)+1, LENGTH(email)) AS domain
FROM customer;
-- Identify only the street name (not including the house address) from address column
SELECT
-- Select only the street name from the address table
SUBSTRING(address, POSITION(' ' IN address)+1, LENGTH(address))
FROM
address;
-- Shorter the movie descriptions to just the first 50 characters
SELECT
LEFT(description, 50) AS short_desc
FROM
film AS f
-- Identify the number of characters in each film description
SELECT
title,
description,
LENGTH(description) AS desc_len
FROM film;
-- Replace whitespace in the film title with an underscore
SELECT
REPLACE(title, ' ', '_') AS title
FROM film;
-- Adjust the case of the genre, title, and description, combining the genre and title into one concatenated cell.
-- Method #1: PostgreSQL only
SELECT
UPPER(c.name) || ': ' || INITCAP(f.title) AS film_category,
LOWER(f.description) AS description
FROM
film AS f
INNER JOIN film_category AS fc
ON f.film_id = fc.film_id
INNER JOIN category AS c
ON fc.category_id = c.category_id;
-- Method #2: General SQL
SELECT
CONCAT(UPPER(c.name), ': ', INITCAP(f.title)) AS film_category,
LOWER(f.description) AS description
FROM
film AS f
INNER JOIN film_category AS fc
ON f.film_id = fc.film_id
INNER JOIN category AS c
ON fc.category_id = c.category_id;
-- Put this all together. For each rental, identify the full name of the customer, the movie title, the rental date, the day of the week, the number of days rented, and whether or not the movie was overdue when it was turned in. All of this should apply to the same 90 day range from May 1 2005.
SELECT
CONCAT(c.first_name, ' ',c.last_name) AS full_name,
f.title,
r.rental_date,
EXTRACT(dow FROM r.rental_date) AS dayofweek,
AGE(r.return_date, r.rental_date) AS rental_days,
CASE
WHEN DATE_TRUNC('day', AGE(return_date, rental_date))>f.rental_duration * INTERVAL '1 day' THEN 'True'
ELSE 'False'
END AS past_due
FROM film AS f
INNER JOIN inventory AS i
ON f.film_id=i.film_id
INNER JOIN rental AS r
ON i.inventory_id=r.inventory_id
INNER JOIN customer AS c
ON r.customer_id=c.customer_id
WHERE r.rental_date BETWEEN CAST('2005-05-01' AS date) AND CAST('2005-05-01' AS date) + INTERVAL '90 days';
-- Identify the total number of rentals across each of the days of the week
-- Method #1
SELECT
EXTRACT(dow FROM rental_date) AS dayofweek,
count(*) AS total_rentals
FROM rental
GROUP BY 1;
-- Method #2
SELECT
DATE_TRUNC('day', rental_date) AS rental_day,
COUNT(*) AS rentals
FROM rental
GROUP BY 1;
-- Now calculate a timestamp five days from measured to the second.
SELECT
CURRENT_TIMESTAMP(0)::timestamp AS right_now,
interval '5 days' + CURRENT_TIMESTAMP(0) AS five_days_from_now;
--Select the current timestamp without a timezone
SELECT CAST( NOW() AS TIMESTAMP);
SELECT CURRENT_TIMESTAMP::TIMESTAMP AS right_now;
-- Calculate the expected return date of each rental
SELECT
f.title,
r.rental_date,
f.rental_duration,
INTERVAL '1' day * f.rental_duration + rental_date AS expected_return_date,
r.return_date
FROM film AS f
INNER JOIN inventory AS i ON f.film_id = i.film_id
INNER JOIN rental AS r ON i.inventory_id = r.inventory_id
ORDER BY f.title;
-- Exclude films that are currently checked out and also convert the rental_duration to an INTERVAL type.
SELECT
f.title,
INTERVAL '1' day * rental_duration,
r.return_date - r.rental_date AS days_rented
FROM film AS f
INNER JOIN inventory AS i ON f.film_id = i.film_id
INNER JOIN rental AS r ON i.inventory_id = r.inventory_id
WHERE r.return_date IS NOT NULL
ORDER BY f.title;
--Determine the number of days of each rental experience using both AGE() and subtraction
-- Method #1: AGE() function
SELECT f.title, f.rental_duration,
-- Calculate the number of days rented
AGE(return_date, rental_date) AS days_rented
FROM film AS f
INNER JOIN inventory AS i ON f.film_id = i.film_id
INNER JOIN rental AS r ON i.inventory_id = r.inventory_id
ORDER BY f.title;
-- Method #2: Basic subtraction
SELECT
f.title,
f.rental_duration,
r.return_date-r.rental_date AS days_rented
FROM rental AS r
INNER JOIN inventory AS i USING(inventory_id)
INNER JOIN film AS f USING(film_id)
ORDERY BY f.title;
-- Use the contains operator to match the text Deleted Scenes in the special_features column.
SELECT
title,
special_features
FROM film
WHERE special_features @> ARRAY['Deleted Scenes'];
-- Match 'Trailers' in any index of the special_features ARRAY regardless of position.
SELECT
title,
special_features
FROM film
WHERE 'Trailers' = ANY(special_features);
-- Now let's select all films that have Deleted Scenes in the second index of the special_features ARRAY.
SELECT
title,
special_features
FROM film
WHERE special_features[2] = 'Deleted Scenes';
-- Select all films that have a special feature Trailers by filtering on the first index of the special_features ARRAY.
SELECT
title,
special_features
FROM film
WHERE special_features[1] = 'Trailers';
--Select the rental date and return date from the rental table. Add an INTERVAL of 3 days to the rental_date to calculate the expected return date`.
SELECT
rental_date,
rental_date+INTERVAL '3 DAY' AS expected_return_date,
return_date
FROM
rental
-- Select the column name and data type from the INFORMATION_SCHEMA.COLUMNS system database. Limit results to only include the customer table.
SELECT
column_name,
data_type
FROM INFORMATION_SCHEMA.COLUMNS
WHERE table_name='customer';
-- Select all columns from the INFORMATION_SCHEMA.COLUMNS system database. Limit by table_name to actor
SELECT *
FROM INFORMATION_SCHEMA.columns
WHERE table_name = 'actor';
-- Select all columns from the INFORMATION_SCHEMA.TABLES system database. Limit results that have a public table_schema.
SELECT *
FROM INFORMATION_SCHEMA.tables
WHERE table_schema= 'public';