Minor edits to python script and Readme

Aniket Mokashi · Aniket Mokashi · commit 4ee58a1ab6e9 · 2025-11-14T00:01:12.000Z
diff --git a/bigquery/README.md b/bigquery/README.md
@@ -1,6 +1,4 @@
-As of 2025, Google Bigquery allow publishing benchmark results, which was not the case earlier.
-
-Download Google Cloud CLI:
+Download Google Cloud CLI and configure your project settings:
 ```
 wget --continue --progress=dot:giga https://dl.google.com/dl/cloudsdk/channels/rapid/downloads/google-cloud-cli-linux-x86_64.tar.gz
 tar -xf google-cloud-cli-linux-x86_64.tar.gz
@@ -9,12 +7,12 @@ source .bashrc
 ./google-cloud-sdk/bin/gcloud init
 ```
 
-Create the dataset and table:
+Create the dataset and table in BigQuery:
 ```
 ./create.sh
 ```
 
-Load the data:
+Load the data in the table:
 ```
 wget --continue --progress=dot:giga 'https://datasets.clickhouse.com/hits_compatible/hits.csv.gz'
 gzip -d -f hits.csv.gz
@@ -24,8 +22,7 @@ command time -f '%e' bq load --source_format CSV --allow_quoted_newlines=1 test.
 ```
 
 Run the benchmark:
-
 ```
 pip install google-cloud-bigquery
-python3 run_queries.py > results.txt 2> log2.txt
+python3 run_queries.py > results.txt 2> log.txt
 ```
diff --git a/bigquery/run_queries.py b/bigquery/run_queries.py
@@ -2,6 +2,39 @@
 from google.cloud.bigquery.enums import JobCreationMode
 
 import sys
+from typing import TextIO, Any
+
+def log(*objects: Any, sep: str = ' ', end: str = '\n', file: TextIO = sys.stderr, severity: str = 'INFO') -> None:
+    """
+    Mimics the built-in print() function signature but prepends a
+    timestamp and a configurable severity level to the output.
+
+    Args:
+        *objects: The objects to be printed (converted to strings).
+        sep (str): Separator inserted between values, default a space.
+        end (str): String appended after the last value, default a newline.
+        file (TextIO): Object with a write(string) method, default sys.stdout.
+        severity (str): The log level (e.g., "INFO", "WARNING", "ERROR").
+    """
+    # 1. Prepare the standard print content
+    # Use an f-string to join the objects with the specified separator
+    message = sep.join(str(obj) for obj in objects)
+
+    # 2. Prepare the log prefix
+    timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+    prefix = f"[{timestamp}] [{severity.upper()}]: "
+
+    # 3. Combine the prefix and the message
+    full_message = prefix + message
+
+    # 4. Use the file.write method to output the content
+    # The 'end' argument is handled explicitly here
+    file.write(full_message + end)
+    
+    # Ensure the buffer is flushed (important for file/stream output)
+    if file is not sys.stdout and file is not sys.stderr:
+        file.flush()
+
 
 job_config = bigquery.QueryJobConfig()
 job_config.use_query_cache = False
@@ -15,29 +48,31 @@
   query = query.strip()
   print("[", end='')
   for i in range(TRIES):
-    print(f"\n[{i}]: {query}", file=sys.stderr)
+    log(f"\n[{i}]: {query}")
     try:
       query_job = client.query(query, job_config=job_config)
       results = query_job.result()
-      print(f"Job ID: **{query_job.job_id}**", file=sys.stderr)
-      print(f"State: **{query_job.state}**", file=sys.stderr)
-      print(f"Results Fetched from Cache: {query_job.cache_hit}", file=sys.stderr)
-      print(f"Created Time: {query_job.created}", file=sys.stderr)
-      print(f"Start Time: {query_job.started}", file=sys.stderr)
-      print(f"End Time: {query_job.ended}", file=sys.stderr)
-      totalTime = query_job.ended - query_job.started
-      execTime = query_job.ended - query_job.created
-      print(f"Execution Time: {totalTime}", file=sys.stderr)
-      print(f"Total Time: {execTime}", file=sys.stderr)
-      print(f"Total Rows Returned: {results.total_rows}", file=sys.stderr)
-      
-      execSeconds = execTime.total_seconds()
+      execution_time = query_job.ended - query_job.started
+      total_time = query_job.ended - query_job.created
+      total_time_secs = total_time.total_seconds()
       endstr = "],\n" if i == 2 else ","
-      print(f"{execSeconds}", end=endstr)
+      print(f"{total_time_secs}", end=endstr)
+      
+      log(f"Job ID: **{query_job.job_id}**")
+      log(f"Query ID: **{query_job.query_id}**")
+      log(f"State: **{query_job.state}**")
+      log(f"Results Fetched from Cache: {query_job.cache_hit}")
+      log(f"Created Time: {query_job.created}")
+      log(f"Start Time: {query_job.started}")
+      log(f"End Time: {query_job.ended}")
+      log(f"Execution Time: {execution_time}")
+      log(f"Total Time: {total_time}")
+      log(f"Total Rows Returned: {results.total_rows}")
+      
     except Exception as e:
-      print(f"Job failed with error: {e}", file=sys.stderr)
+      log(f"Job failed with error: {e}", severity="ERROR")
       # Print error details from the job itself
       if query_job.error_result:
-        print("\n--- Job Error Details ---", file=sys.stderr)
-        print(f"Reason: {query_job.error_result.get('reason')}", file=sys.stderr)
-        print(f"Message: {query_job.error_result.get('message')}", file=sys.stderr)
+        log("\n--- Job Error Details ---")
+        log(f"Reason: {query_job.error_result.get('reason')}")
+        log(f"Message: {query_job.error_result.get('message')}")