Skip to content

Commit 4ee58a1

Browse files
author
Aniket Mokashi
committed
Minor edits to python script and Readme
1 parent f67597d commit 4ee58a1

File tree

2 files changed

+58
-26
lines changed

2 files changed

+58
-26
lines changed

bigquery/README.md

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,4 @@
1-
As of 2025, Google Bigquery allow publishing benchmark results, which was not the case earlier.
2-
3-
Download Google Cloud CLI:
1+
Download Google Cloud CLI and configure your project settings:
42
```
53
wget --continue --progress=dot:giga https://dl.google.com/dl/cloudsdk/channels/rapid/downloads/google-cloud-cli-linux-x86_64.tar.gz
64
tar -xf google-cloud-cli-linux-x86_64.tar.gz
@@ -9,12 +7,12 @@ source .bashrc
97
./google-cloud-sdk/bin/gcloud init
108
```
119

12-
Create the dataset and table:
10+
Create the dataset and table in BigQuery:
1311
```
1412
./create.sh
1513
```
1614

17-
Load the data:
15+
Load the data in the table:
1816
```
1917
wget --continue --progress=dot:giga 'https://datasets.clickhouse.com/hits_compatible/hits.csv.gz'
2018
gzip -d -f hits.csv.gz
@@ -24,8 +22,7 @@ command time -f '%e' bq load --source_format CSV --allow_quoted_newlines=1 test.
2422
```
2523

2624
Run the benchmark:
27-
2825
```
2926
pip install google-cloud-bigquery
30-
python3 run_queries.py > results.txt 2> log2.txt
27+
python3 run_queries.py > results.txt 2> log.txt
3128
```

bigquery/run_queries.py

Lines changed: 54 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,39 @@
22
from google.cloud.bigquery.enums import JobCreationMode
33

44
import sys
5+
from typing import TextIO, Any
6+
7+
def log(*objects: Any, sep: str = ' ', end: str = '\n', file: TextIO = sys.stderr, severity: str = 'INFO') -> None:
8+
"""
9+
Mimics the built-in print() function signature but prepends a
10+
timestamp and a configurable severity level to the output.
11+
12+
Args:
13+
*objects: The objects to be printed (converted to strings).
14+
sep (str): Separator inserted between values, default a space.
15+
end (str): String appended after the last value, default a newline.
16+
file (TextIO): Object with a write(string) method, default sys.stdout.
17+
severity (str): The log level (e.g., "INFO", "WARNING", "ERROR").
18+
"""
19+
# 1. Prepare the standard print content
20+
# Use an f-string to join the objects with the specified separator
21+
message = sep.join(str(obj) for obj in objects)
22+
23+
# 2. Prepare the log prefix
24+
timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
25+
prefix = f"[{timestamp}] [{severity.upper()}]: "
26+
27+
# 3. Combine the prefix and the message
28+
full_message = prefix + message
29+
30+
# 4. Use the file.write method to output the content
31+
# The 'end' argument is handled explicitly here
32+
file.write(full_message + end)
33+
34+
# Ensure the buffer is flushed (important for file/stream output)
35+
if file is not sys.stdout and file is not sys.stderr:
36+
file.flush()
37+
538

639
job_config = bigquery.QueryJobConfig()
740
job_config.use_query_cache = False
@@ -15,29 +48,31 @@
1548
query = query.strip()
1649
print("[", end='')
1750
for i in range(TRIES):
18-
print(f"\n[{i}]: {query}", file=sys.stderr)
51+
log(f"\n[{i}]: {query}")
1952
try:
2053
query_job = client.query(query, job_config=job_config)
2154
results = query_job.result()
22-
print(f"Job ID: **{query_job.job_id}**", file=sys.stderr)
23-
print(f"State: **{query_job.state}**", file=sys.stderr)
24-
print(f"Results Fetched from Cache: {query_job.cache_hit}", file=sys.stderr)
25-
print(f"Created Time: {query_job.created}", file=sys.stderr)
26-
print(f"Start Time: {query_job.started}", file=sys.stderr)
27-
print(f"End Time: {query_job.ended}", file=sys.stderr)
28-
totalTime = query_job.ended - query_job.started
29-
execTime = query_job.ended - query_job.created
30-
print(f"Execution Time: {totalTime}", file=sys.stderr)
31-
print(f"Total Time: {execTime}", file=sys.stderr)
32-
print(f"Total Rows Returned: {results.total_rows}", file=sys.stderr)
33-
34-
execSeconds = execTime.total_seconds()
55+
execution_time = query_job.ended - query_job.started
56+
total_time = query_job.ended - query_job.created
57+
total_time_secs = total_time.total_seconds()
3558
endstr = "],\n" if i == 2 else ","
36-
print(f"{execSeconds}", end=endstr)
59+
print(f"{total_time_secs}", end=endstr)
60+
61+
log(f"Job ID: **{query_job.job_id}**")
62+
log(f"Query ID: **{query_job.query_id}**")
63+
log(f"State: **{query_job.state}**")
64+
log(f"Results Fetched from Cache: {query_job.cache_hit}")
65+
log(f"Created Time: {query_job.created}")
66+
log(f"Start Time: {query_job.started}")
67+
log(f"End Time: {query_job.ended}")
68+
log(f"Execution Time: {execution_time}")
69+
log(f"Total Time: {total_time}")
70+
log(f"Total Rows Returned: {results.total_rows}")
71+
3772
except Exception as e:
38-
print(f"Job failed with error: {e}", file=sys.stderr)
73+
log(f"Job failed with error: {e}", severity="ERROR")
3974
# Print error details from the job itself
4075
if query_job.error_result:
41-
print("\n--- Job Error Details ---", file=sys.stderr)
42-
print(f"Reason: {query_job.error_result.get('reason')}", file=sys.stderr)
43-
print(f"Message: {query_job.error_result.get('message')}", file=sys.stderr)
76+
log("\n--- Job Error Details ---")
77+
log(f"Reason: {query_job.error_result.get('reason')}")
78+
log(f"Message: {query_job.error_result.get('message')}")

0 commit comments

Comments
 (0)