From ea02d17a2448824ad18a35f555d4893c51d29ff3 Mon Sep 17 00:00:00 2001 From: Aniket Mokashi Date: Wed, 29 Oct 2025 03:44:45 +0000 Subject: [PATCH 1/6] Add a new script to run bigquery queries with python client --- bigquery/create.sql | 3 ++- bigquery/queries.sql | 2 +- bigquery/run_queries.py | 41 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 44 insertions(+), 2 deletions(-) create mode 100644 bigquery/run_queries.py diff --git a/bigquery/create.sql b/bigquery/create.sql index 9012df89a..63ebbcd2e 100644 --- a/bigquery/create.sql +++ b/bigquery/create.sql @@ -104,5 +104,6 @@ CREATE TABLE test.hits HasGCLID SMALLINT NOT NULL, RefererHash BIGINT NOT NULL, URLHash BIGINT NOT NULL, - CLID INTEGER NOT NULL + CLID INTEGER NOT NULL, + PRIMARY KEY (CounterID, EventDate, UserID, EventTime, WatchID) NOT ENFORCED ); diff --git a/bigquery/queries.sql b/bigquery/queries.sql index 3dc8f405a..a97258890 100644 --- a/bigquery/queries.sql +++ b/bigquery/queries.sql @@ -26,7 +26,7 @@ SELECT SearchPhrase FROM test.hits WHERE SearchPhrase <> '' ORDER BY EventTime L SELECT SearchPhrase FROM test.hits WHERE SearchPhrase <> '' ORDER BY SearchPhrase LIMIT 10; SELECT SearchPhrase FROM test.hits WHERE SearchPhrase <> '' ORDER BY EventTime, SearchPhrase LIMIT 10; SELECT CounterID, AVG(length(URL)) AS l, COUNT(*) AS c FROM test.hits WHERE URL <> '' GROUP BY CounterID HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; -SELECT REGEXP_REPLACE(Referer, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k, AVG(length(Referer)) AS l, COUNT(*) AS c, MIN(Referer) FROM test.hits WHERE Referer <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; +SELECT REGEXP_REPLACE(Referer, '^https?://(?:www\\.)?([^/]+)/.*$', '\\1') AS k, AVG(length(Referer)) AS l, COUNT(*) AS c, MIN(Referer) FROM test.hits WHERE Referer <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; SELECT SUM(ResolutionWidth), SUM(ResolutionWidth + 1), SUM(ResolutionWidth + 2), SUM(ResolutionWidth + 3), SUM(ResolutionWidth + 4), SUM(ResolutionWidth + 5), SUM(ResolutionWidth + 6), SUM(ResolutionWidth + 7), SUM(ResolutionWidth + 8), SUM(ResolutionWidth + 9), SUM(ResolutionWidth + 10), SUM(ResolutionWidth + 11), SUM(ResolutionWidth + 12), SUM(ResolutionWidth + 13), SUM(ResolutionWidth + 14), SUM(ResolutionWidth + 15), SUM(ResolutionWidth + 16), SUM(ResolutionWidth + 17), SUM(ResolutionWidth + 18), SUM(ResolutionWidth + 19), SUM(ResolutionWidth + 20), SUM(ResolutionWidth + 21), SUM(ResolutionWidth + 22), SUM(ResolutionWidth + 23), SUM(ResolutionWidth + 24), SUM(ResolutionWidth + 25), SUM(ResolutionWidth + 26), SUM(ResolutionWidth + 27), SUM(ResolutionWidth + 28), SUM(ResolutionWidth + 29), SUM(ResolutionWidth + 30), SUM(ResolutionWidth + 31), SUM(ResolutionWidth + 32), SUM(ResolutionWidth + 33), SUM(ResolutionWidth + 34), SUM(ResolutionWidth + 35), SUM(ResolutionWidth + 36), SUM(ResolutionWidth + 37), SUM(ResolutionWidth + 38), SUM(ResolutionWidth + 39), SUM(ResolutionWidth + 40), SUM(ResolutionWidth + 41), SUM(ResolutionWidth + 42), SUM(ResolutionWidth + 43), SUM(ResolutionWidth + 44), SUM(ResolutionWidth + 45), SUM(ResolutionWidth + 46), SUM(ResolutionWidth + 47), SUM(ResolutionWidth + 48), SUM(ResolutionWidth + 49), SUM(ResolutionWidth + 50), SUM(ResolutionWidth + 51), SUM(ResolutionWidth + 52), SUM(ResolutionWidth + 53), SUM(ResolutionWidth + 54), SUM(ResolutionWidth + 55), SUM(ResolutionWidth + 56), SUM(ResolutionWidth + 57), SUM(ResolutionWidth + 58), SUM(ResolutionWidth + 59), SUM(ResolutionWidth + 60), SUM(ResolutionWidth + 61), SUM(ResolutionWidth + 62), SUM(ResolutionWidth + 63), SUM(ResolutionWidth + 64), SUM(ResolutionWidth + 65), SUM(ResolutionWidth + 66), SUM(ResolutionWidth + 67), SUM(ResolutionWidth + 68), SUM(ResolutionWidth + 69), SUM(ResolutionWidth + 70), SUM(ResolutionWidth + 71), SUM(ResolutionWidth + 72), SUM(ResolutionWidth + 73), SUM(ResolutionWidth + 74), SUM(ResolutionWidth + 75), SUM(ResolutionWidth + 76), SUM(ResolutionWidth + 77), SUM(ResolutionWidth + 78), SUM(ResolutionWidth + 79), SUM(ResolutionWidth + 80), SUM(ResolutionWidth + 81), SUM(ResolutionWidth + 82), SUM(ResolutionWidth + 83), SUM(ResolutionWidth + 84), SUM(ResolutionWidth + 85), SUM(ResolutionWidth + 86), SUM(ResolutionWidth + 87), SUM(ResolutionWidth + 88), SUM(ResolutionWidth + 89) FROM test.hits; SELECT SearchEngineID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM test.hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10; SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM test.hits WHERE SearchPhrase <> '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; diff --git a/bigquery/run_queries.py b/bigquery/run_queries.py new file mode 100644 index 000000000..1585f931b --- /dev/null +++ b/bigquery/run_queries.py @@ -0,0 +1,41 @@ +from google.cloud import bigquery +from google.cloud.bigquery.enums import JobCreationMode + +import sys + +client = bigquery.Client( + default_job_creation_mode=JobCreationMode.JOB_CREATION_OPTIONAL +) + +file = open('queries.sql', 'r') +TRIES = 3 +for query in file: + query = query.strip() + print("[", end='') + for i in range(TRIES): + print(f"\n[{i}]: {query}", file=sys.stderr) + try: + query_job = client.query(query) + results = query_job.result() + print(f"Job ID: **{query_job.job_id}**", file=sys.stderr) + print(f"State: **{query_job.state}**", file=sys.stderr) + print(f"Results Fetched from Cache: {query_job.cache_hit}", file=sys.stderr) + print(f"Created Time: {query_job.created}", file=sys.stderr) + print(f"Start Time: {query_job.started}", file=sys.stderr) + print(f"End Time: {query_job.ended}", file=sys.stderr) + totalTime = query_job.ended - query_job.started + execTime = query_job.ended - query_job.created + print(f"Execution Time: {totalTime}", file=sys.stderr) + print(f"Total Time: {execTime}", file=sys.stderr) + print(f"Total Rows Returned: {results.total_rows}", file=sys.stderr) + + execSeconds = execTime.total_seconds() + endstr = "],\n" if i == 2 else "," + print(f"{execSeconds}", end=endstr) + except Exception as e: + print(f"Job failed with error: {e}", file=sys.stderr) + # Print error details from the job itself + if query_job.error_result: + print("\n--- Job Error Details ---", file=sys.stderr) + print(f"Reason: {query_job.error_result.get('reason')}", file=sys.stderr) + print(f"Message: {query_job.error_result.get('message')}", file=sys.stderr) From c20bb97c7ea4705ee87e72ff23764262e155ccec Mon Sep 17 00:00:00 2001 From: Aniket Mokashi Date: Wed, 29 Oct 2025 04:53:01 +0000 Subject: [PATCH 2/6] Add results after a run on a project with no reservation (ondemand) --- bigquery/create.sh | 5 ++ bigquery/results/result.json | 90 ++++++++++++++++++------------------ bigquery/run_queries.py | 4 +- 3 files changed, 53 insertions(+), 46 deletions(-) create mode 100755 bigquery/create.sh diff --git a/bigquery/create.sh b/bigquery/create.sh new file mode 100755 index 000000000..6f9c26788 --- /dev/null +++ b/bigquery/create.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +bq mk --dataset test + +bq query --use_legacy_sql=false < create.sql diff --git a/bigquery/results/result.json b/bigquery/results/result.json index c46efe88b..7707cb533 100644 --- a/bigquery/results/result.json +++ b/bigquery/results/result.json @@ -1,6 +1,6 @@ { "system": "Bigquery", - "date": "2025-04-09", + "date": "2025-10-28", "machine": "serverless", "cluster_size": "serverless", "proprietary": "yes", @@ -9,52 +9,52 @@ "tags": ["serverless", "column-oriented", "gcp", "managed"], - "load_time": 1146, + "load_time": 781.50, "data_size": 8760000000, "result": [ -[4.862,4.001,3.921], -[4.268,4.113,4.467], -[4.341,4.15,4.219], -[4.124,3.996,4.337], -[4.553,4.36,4.349], -[4.565,4.4,4.661], -[4.089,4.132,3.974], -[4.514,4.296,4.312], -[6.183,6.155,4.557], -[6.068,6.106,6.259], -[4.109,4.082,4.165], -[4.24,3.981,4.054], -[4.295,4.301,4.283], -[6.03,6.079,6.094], -[4.383,4.399,4.218], -[4.304,4.23,4.189], -[4.849,4.86,4.62], -[4.309,4.371,4.393], -[6.096,6.109,6.071], -[3.838,3.89,3.938], -[4.249,4.037,4.136], -[4.337,4.196,4.264], -[4.493,4.603,4.435], -[6.125,4.667,4.559], -[4.039,4.039,3.942], -[3.903,4.239,4.003], -[4.013,4.108,4.073], -[4.524,4.474,4.498], -[null,null,null], -[4.866,4.862,6.063], -[4.271,4.403,4.34], -[4.39,4.314,4.566], -[7.233,7.322,7.241], -[7.39,7.382,7.298], -[6.05,6.084,6.362], -[4.31,4.222,4.254], -[4.181,4.003,3.95], -[3.98,3.988,3.982], -[4.017,4.004,3.987], -[4.334,4.322,4.445], -[4.126,3.853,3.982], -[4.214,3.931,3.921], -[4.033,3.913,3.866] +[0.714,0.503,0.405], +[0.53,0.33,0.426], +[0.73,0.408,0.432], +[0.799,0.33,0.281], +[0.623,0.494,0.437], +[0.738,0.553,0.546], +[0.344,0.359,0.407], +[0.434,0.772,0.379], +[0.993,0.654,0.63], +[0.855,0.855,0.82], +[0.494,0.384,0.411], +[0.585,0.564,0.596], +[0.669,0.516,0.536], +[2.199,2.128,1.78], +[0.791,0.635,0.703], +[0.571,0.561,0.546], +[0.774,0.725,0.742], +[0.577,0.896,0.517], +[1.704,1.341,1.182], +[0.31,0.289,0.331], +[1.033,0.499,0.536], +[0.588,0.62,0.544], +[1.015,0.689,0.784], +[1.399,0.754,0.903], +[0.37,0.491,0.363], +[0.369,0.348,0.34], +[0.395,0.404,0.391], +[0.651,0.667,0.818], +[1.304,1.163,1.158], +[0.659,0.425,0.529], +[0.684,0.553,0.542], +[0.876,0.57,0.492], +[1.024,1.022,1.119], +[1.261,1.257,1.374], +[1.057,0.918,0.893], +[0.556,0.585,0.522], +[0.57,0.498,0.532], +[0.473,0.411,0.411], +[0.387,0.453,0.38], +[0.703,0.623,0.626], +[0.442,0.408,0.337], +[0.372,0.361,0.442], +[0.403,0.358,0.388] ] } diff --git a/bigquery/run_queries.py b/bigquery/run_queries.py index 1585f931b..e31275095 100644 --- a/bigquery/run_queries.py +++ b/bigquery/run_queries.py @@ -3,6 +3,8 @@ import sys +job_config = bigquery.QueryJobConfig() +job_config.use_query_cache = False client = bigquery.Client( default_job_creation_mode=JobCreationMode.JOB_CREATION_OPTIONAL ) @@ -15,7 +17,7 @@ for i in range(TRIES): print(f"\n[{i}]: {query}", file=sys.stderr) try: - query_job = client.query(query) + query_job = client.query(query, job_config=job_config) results = query_job.result() print(f"Job ID: **{query_job.job_id}**", file=sys.stderr) print(f"State: **{query_job.state}**", file=sys.stderr) From fa9d63d30358d20da9e279ecb38dc7907d3fce6d Mon Sep 17 00:00:00 2001 From: Aniket Mokashi Date: Wed, 29 Oct 2025 04:55:48 +0000 Subject: [PATCH 3/6] update readme with updated instructions on running benchmark --- bigquery/README.md | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/bigquery/README.md b/bigquery/README.md index cdf99566e..717c8c6b6 100644 --- a/bigquery/README.md +++ b/bigquery/README.md @@ -1,12 +1,5 @@ As of 2025, Google Bigquery allow publishing benchmark results, which was not the case earlier. -It's very difficult to find, how to create a database. -Databases are named "datasets". You need to press on `⋮` near project. - -Create dataset `test`. -Go to the query editor and paste the contents of `create.sql`. -It will take two seconds to create a table. - Download Google Cloud CLI: ``` wget --continue --progress=dot:giga https://dl.google.com/dl/cloudsdk/channels/rapid/downloads/google-cloud-cli-linux-x86_64.tar.gz @@ -16,6 +9,11 @@ source .bashrc ./google-cloud-sdk/bin/gcloud init ``` +Create the dataset and table: +``` +./create.sh +``` + Load the data: ``` wget --continue --progress=dot:giga 'https://datasets.clickhouse.com/hits_compatible/hits.csv.gz' @@ -28,11 +26,6 @@ command time -f '%e' bq load --source_format CSV --allow_quoted_newlines=1 test. Run the benchmark: ``` -./run.sh 2>&1 | tee log.txt - -cat log.txt | - grep -P '^real|^Error' | - sed -r -e 's/^Error.*$/null/; s/^real\s*([0-9.]+)m([0-9.]+)s$/\1 \2/' | - awk '{ if ($2 != "") { print $1 * 60 + $2 } else { print $1 } }' | - awk '{ if ($1 == "null") { skip = 1 } else { if (i % 3 == 0) { printf "[" }; printf skip ? "null" : $1; if (i % 3 != 2) { printf "," } else { print "]," }; ++i; skip = 0; } }' +pip install google-cloud-bigquery +python3 run_queries.py > results.txt 2> log2.txt ``` From 1a92778d6d9de6d3054d63fd3802368c6341b770 Mon Sep 17 00:00:00 2001 From: Aniket Mokashi Date: Fri, 14 Nov 2025 00:01:12 +0000 Subject: [PATCH 4/6] Minor edits to python script and Readme --- bigquery/README.md | 11 +++---- bigquery/run_queries.py | 73 ++++++++++++++++++++++++++++++----------- 2 files changed, 58 insertions(+), 26 deletions(-) diff --git a/bigquery/README.md b/bigquery/README.md index 717c8c6b6..48b4e8a1e 100644 --- a/bigquery/README.md +++ b/bigquery/README.md @@ -1,6 +1,4 @@ -As of 2025, Google Bigquery allow publishing benchmark results, which was not the case earlier. - -Download Google Cloud CLI: +Download Google Cloud CLI and configure your project settings: ``` wget --continue --progress=dot:giga https://dl.google.com/dl/cloudsdk/channels/rapid/downloads/google-cloud-cli-linux-x86_64.tar.gz tar -xf google-cloud-cli-linux-x86_64.tar.gz @@ -9,12 +7,12 @@ source .bashrc ./google-cloud-sdk/bin/gcloud init ``` -Create the dataset and table: +Create the dataset and table in BigQuery: ``` ./create.sh ``` -Load the data: +Load the data in the table: ``` wget --continue --progress=dot:giga 'https://datasets.clickhouse.com/hits_compatible/hits.csv.gz' gzip -d -f hits.csv.gz @@ -24,8 +22,7 @@ command time -f '%e' bq load --source_format CSV --allow_quoted_newlines=1 test. ``` Run the benchmark: - ``` pip install google-cloud-bigquery -python3 run_queries.py > results.txt 2> log2.txt +python3 run_queries.py > results.txt 2> log.txt ``` diff --git a/bigquery/run_queries.py b/bigquery/run_queries.py index e31275095..1425d03fd 100644 --- a/bigquery/run_queries.py +++ b/bigquery/run_queries.py @@ -2,6 +2,39 @@ from google.cloud.bigquery.enums import JobCreationMode import sys +from typing import TextIO, Any + +def log(*objects: Any, sep: str = ' ', end: str = '\n', file: TextIO = sys.stderr, severity: str = 'INFO') -> None: + """ + Mimics the built-in print() function signature but prepends a + timestamp and a configurable severity level to the output. + + Args: + *objects: The objects to be printed (converted to strings). + sep (str): Separator inserted between values, default a space. + end (str): String appended after the last value, default a newline. + file (TextIO): Object with a write(string) method, default sys.stdout. + severity (str): The log level (e.g., "INFO", "WARNING", "ERROR"). + """ + # 1. Prepare the standard print content + # Use an f-string to join the objects with the specified separator + message = sep.join(str(obj) for obj in objects) + + # 2. Prepare the log prefix + timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + prefix = f"[{timestamp}] [{severity.upper()}]: " + + # 3. Combine the prefix and the message + full_message = prefix + message + + # 4. Use the file.write method to output the content + # The 'end' argument is handled explicitly here + file.write(full_message + end) + + # Ensure the buffer is flushed (important for file/stream output) + if file is not sys.stdout and file is not sys.stderr: + file.flush() + job_config = bigquery.QueryJobConfig() job_config.use_query_cache = False @@ -15,29 +48,31 @@ query = query.strip() print("[", end='') for i in range(TRIES): - print(f"\n[{i}]: {query}", file=sys.stderr) + log(f"\n[{i}]: {query}") try: query_job = client.query(query, job_config=job_config) results = query_job.result() - print(f"Job ID: **{query_job.job_id}**", file=sys.stderr) - print(f"State: **{query_job.state}**", file=sys.stderr) - print(f"Results Fetched from Cache: {query_job.cache_hit}", file=sys.stderr) - print(f"Created Time: {query_job.created}", file=sys.stderr) - print(f"Start Time: {query_job.started}", file=sys.stderr) - print(f"End Time: {query_job.ended}", file=sys.stderr) - totalTime = query_job.ended - query_job.started - execTime = query_job.ended - query_job.created - print(f"Execution Time: {totalTime}", file=sys.stderr) - print(f"Total Time: {execTime}", file=sys.stderr) - print(f"Total Rows Returned: {results.total_rows}", file=sys.stderr) - - execSeconds = execTime.total_seconds() + execution_time = query_job.ended - query_job.started + total_time = query_job.ended - query_job.created + total_time_secs = total_time.total_seconds() endstr = "],\n" if i == 2 else "," - print(f"{execSeconds}", end=endstr) + print(f"{total_time_secs}", end=endstr) + + log(f"Job ID: **{query_job.job_id}**") + log(f"Query ID: **{query_job.query_id}**") + log(f"State: **{query_job.state}**") + log(f"Results Fetched from Cache: {query_job.cache_hit}") + log(f"Created Time: {query_job.created}") + log(f"Start Time: {query_job.started}") + log(f"End Time: {query_job.ended}") + log(f"Execution Time: {execution_time}") + log(f"Total Time: {total_time}") + log(f"Total Rows Returned: {results.total_rows}") + except Exception as e: - print(f"Job failed with error: {e}", file=sys.stderr) + log(f"Job failed with error: {e}", severity="ERROR") # Print error details from the job itself if query_job.error_result: - print("\n--- Job Error Details ---", file=sys.stderr) - print(f"Reason: {query_job.error_result.get('reason')}", file=sys.stderr) - print(f"Message: {query_job.error_result.get('message')}", file=sys.stderr) + log("\n--- Job Error Details ---") + log(f"Reason: {query_job.error_result.get('reason')}") + log(f"Message: {query_job.error_result.get('message')}") From 1ad61f1e1de2c6c023fb8ceacb9fa271eebbd556 Mon Sep 17 00:00:00 2001 From: Aniket Mokashi Date: Fri, 14 Nov 2025 00:14:57 +0000 Subject: [PATCH 5/6] Minor edits to python script and Readme --- bigquery/README.md | 2 +- bigquery/run_queries.py | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/bigquery/README.md b/bigquery/README.md index 48b4e8a1e..9079561d3 100644 --- a/bigquery/README.md +++ b/bigquery/README.md @@ -1,4 +1,4 @@ -Download Google Cloud CLI and configure your project settings: +Download Google Cloud CLI and configure your project settings. You can skip this step if you are using [Cloud shell](https://docs.cloud.google.com/shell/docs/launching-cloud-shell): ``` wget --continue --progress=dot:giga https://dl.google.com/dl/cloudsdk/channels/rapid/downloads/google-cloud-cli-linux-x86_64.tar.gz tar -xf google-cloud-cli-linux-x86_64.tar.gz diff --git a/bigquery/run_queries.py b/bigquery/run_queries.py index 1425d03fd..64f639077 100644 --- a/bigquery/run_queries.py +++ b/bigquery/run_queries.py @@ -3,6 +3,7 @@ import sys from typing import TextIO, Any +from datetime import datetime def log(*objects: Any, sep: str = ' ', end: str = '\n', file: TextIO = sys.stderr, severity: str = 'INFO') -> None: """ @@ -50,14 +51,18 @@ def log(*objects: Any, sep: str = ' ', end: str = '\n', file: TextIO = sys.stder for i in range(TRIES): log(f"\n[{i}]: {query}") try: + client_start_time = datetime.now() query_job = client.query(query, job_config=job_config) results = query_job.result() + client_end_time = datetime.now() + execution_time = query_job.ended - query_job.started total_time = query_job.ended - query_job.created total_time_secs = total_time.total_seconds() endstr = "],\n" if i == 2 else "," print(f"{total_time_secs}", end=endstr) + client_time = client_end_time - client_start_time log(f"Job ID: **{query_job.job_id}**") log(f"Query ID: **{query_job.query_id}**") log(f"State: **{query_job.state}**") @@ -67,6 +72,7 @@ def log(*objects: Any, sep: str = ' ', end: str = '\n', file: TextIO = sys.stder log(f"End Time: {query_job.ended}") log(f"Execution Time: {execution_time}") log(f"Total Time: {total_time}") + log(f"Client Time: {client_time}") log(f"Total Rows Returned: {results.total_rows}") except Exception as e: From db478efd151427d51623727a3cf1396be3759746 Mon Sep 17 00:00:00 2001 From: Aniket Mokashi Date: Fri, 14 Nov 2025 19:45:47 +0000 Subject: [PATCH 6/6] Use only client time and update results --- bigquery/results/result.json | 88 ++++++++++++++++++------------------ bigquery/run_queries.py | 32 ++++--------- 2 files changed, 52 insertions(+), 68 deletions(-) diff --git a/bigquery/results/result.json b/bigquery/results/result.json index 7707cb533..9875c53d1 100644 --- a/bigquery/results/result.json +++ b/bigquery/results/result.json @@ -9,52 +9,52 @@ "tags": ["serverless", "column-oriented", "gcp", "managed"], - "load_time": 781.50, + "load_time": 776.91, "data_size": 8760000000, "result": [ -[0.714,0.503,0.405], -[0.53,0.33,0.426], -[0.73,0.408,0.432], -[0.799,0.33,0.281], -[0.623,0.494,0.437], -[0.738,0.553,0.546], -[0.344,0.359,0.407], -[0.434,0.772,0.379], -[0.993,0.654,0.63], -[0.855,0.855,0.82], -[0.494,0.384,0.411], -[0.585,0.564,0.596], -[0.669,0.516,0.536], -[2.199,2.128,1.78], -[0.791,0.635,0.703], -[0.571,0.561,0.546], -[0.774,0.725,0.742], -[0.577,0.896,0.517], -[1.704,1.341,1.182], -[0.31,0.289,0.331], -[1.033,0.499,0.536], -[0.588,0.62,0.544], -[1.015,0.689,0.784], -[1.399,0.754,0.903], -[0.37,0.491,0.363], -[0.369,0.348,0.34], -[0.395,0.404,0.391], -[0.651,0.667,0.818], -[1.304,1.163,1.158], -[0.659,0.425,0.529], -[0.684,0.553,0.542], -[0.876,0.57,0.492], -[1.024,1.022,1.119], -[1.261,1.257,1.374], -[1.057,0.918,0.893], -[0.556,0.585,0.522], -[0.57,0.498,0.532], -[0.473,0.411,0.411], -[0.387,0.453,0.38], -[0.703,0.623,0.626], -[0.442,0.408,0.337], -[0.372,0.361,0.442], -[0.403,0.358,0.388] +[0.383933,0.402355,0.370758], +[0.334439,0.433776,0.416341], +[0.469506,0.359557,0.386433], +[0.491417,0.333208,0.4758], +[0.552464,0.652322,0.555889], +[0.581302,0.603089,0.674999], +[1.087835,0.639649,0.360542], +[0.438221,0.759105,0.497731], +[0.702109,0.712533,0.678109], +[0.857454,0.968303,0.995039], +[0.547042,0.479513,0.475109], +[0.547026,0.549529,0.614708], +[0.686315,0.580551,0.630673], +[1.792573,2.034019,1.845895], +[0.610674,0.677655,0.643796], +[0.580303,0.729024,0.622044], +[0.760401,0.809858,0.822725], +[0.721757,0.611165,0.744566], +[1.49368,1.372045,1.498892], +[0.363523,0.383959,0.366856], +[0.625735,0.49802,0.473233], +[0.513777,0.508772,0.527258], +[0.895406,0.874879,0.799704], +[0.909036,0.679151,0.730413], +[0.358434,0.509104,0.467827], +[0.421586,0.428603,0.33761], +[0.54752,0.364919,0.444499], +[0.691434,0.674469,0.930067], +[1.143579,1.034013,1.105913], +[0.569294,0.444362,0.463864], +[0.517151,0.53565,0.523663], +[0.56208,0.573,0.543899], +[1.409102,1.116484,1.295522], +[1.413902,1.346194,1.406088], +[1.068575,0.985308,1.194028], +[0.781501,0.524615,0.664192], +[0.678144,0.666519,0.548661], +[0.477265,0.445584,0.469621], +[0.554599,0.530927,0.551336], +[0.777017,0.696796,0.810055], +[0.427604,0.43113,0.449339], +[0.434927,0.407959,0.435918], +[0.478507,0.425838,0.541504] ] } diff --git a/bigquery/run_queries.py b/bigquery/run_queries.py index 64f639077..76d75e38c 100644 --- a/bigquery/run_queries.py +++ b/bigquery/run_queries.py @@ -22,7 +22,7 @@ def log(*objects: Any, sep: str = ' ', end: str = '\n', file: TextIO = sys.stder message = sep.join(str(obj) for obj in objects) # 2. Prepare the log prefix - timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") prefix = f"[{timestamp}] [{severity.upper()}]: " # 3. Combine the prefix and the message @@ -52,33 +52,17 @@ def log(*objects: Any, sep: str = ' ', end: str = '\n', file: TextIO = sys.stder log(f"\n[{i}]: {query}") try: client_start_time = datetime.now() - query_job = client.query(query, job_config=job_config) - results = query_job.result() + results = client.query_and_wait(query, job_config=job_config) client_end_time = datetime.now() - execution_time = query_job.ended - query_job.started - total_time = query_job.ended - query_job.created - total_time_secs = total_time.total_seconds() + client_time = client_end_time - client_start_time + client_time_secs = client_time.total_seconds() endstr = "],\n" if i == 2 else "," - print(f"{total_time_secs}", end=endstr) + print(f"{client_time_secs}", end=endstr) - client_time = client_end_time - client_start_time - log(f"Job ID: **{query_job.job_id}**") - log(f"Query ID: **{query_job.query_id}**") - log(f"State: **{query_job.state}**") - log(f"Results Fetched from Cache: {query_job.cache_hit}") - log(f"Created Time: {query_job.created}") - log(f"Start Time: {query_job.started}") - log(f"End Time: {query_job.ended}") - log(f"Execution Time: {execution_time}") - log(f"Total Time: {total_time}") - log(f"Client Time: {client_time}") - log(f"Total Rows Returned: {results.total_rows}") + log(f"Job ID: **{results.job_id}**") + log(f"Query ID: **{results.query_id}**") + log(f"Client time: **{client_time}**") except Exception as e: log(f"Job failed with error: {e}", severity="ERROR") - # Print error details from the job itself - if query_job.error_result: - log("\n--- Job Error Details ---") - log(f"Reason: {query_job.error_result.get('reason')}") - log(f"Message: {query_job.error_result.get('message')}")