-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcreate_data_set.sql
More file actions
74 lines (59 loc) · 1.65 KB
/
create_data_set.sql
File metadata and controls
74 lines (59 loc) · 1.65 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
-- create datawarehouse
CREATE WAREHOUSE
demo_wh WITH
WAREHOUSE_SIZE = 'XSMALL'
AUTO_SUSPEND = 60
AUTO_RESUME = TRUE
INITIALLY_SUSPENDED = TRUE;
-- Create a new database
CREATE DATABASE demo_db;
-- Switch to use this database
USE DATABASE demo_db;
-- Create a schema within the database
CREATE SCHEMA sales_schema;
-- Set this as the active schema
USE SCHEMA sales_schema;
-- Create a table to hold sales data
CREATE OR REPLACE
TABLE sales
(order_id INT,order_date DATE,region STRING,product STRING,amount FLOAT );
-- Generate and insert 1 million random sales records
INSERT INTO sales
SELECT
-- Unique Order ID
ROW_NUMBER() OVER (ORDER BY SEQ8()) AS order_id,
-- Random Order Date (Between 2023-01-01 and ~2 years)
DATEADD(
day,
UNIFORM(1, 730, RANDOM()),
'2023-01-01'::DATE
) AS order_date,
-- Random Region
CASE UNIFORM(1, 5, RANDOM())
WHEN 1 THEN 'North'
WHEN 2 THEN 'South'
WHEN 3 THEN 'East'
WHEN 4 THEN 'West'
ELSE 'Central'
END AS region,
-- Random Product
CASE UNIFORM(1, 4, RANDOM())
WHEN 1 THEN 'Laptop'
WHEN 2 THEN 'Phone'
WHEN 3 THEN 'Tablet'
ELSE 'Monitor'
END AS product,
-- Random Sales Amount
UNIFORM(100, 5000, RANDOM()) AS amount
FROM TABLE(GENERATOR(ROWCOUNT => 1000000));
-- Check total row count
SELECT COUNT(*) as total_rows FROM sales;
-- View sample records
SELECT * FROM sales LIMIT 10;
-- Data distribution by region
SELECT
region,
COUNT(*) as row_count,
ROUND(COUNT(*) / 10000.0, 1) as percentage FROM sales
GROUP BY region
ORDER BY region;