From d3397f58edc972916ee587af80b525860e3fb1c4 Mon Sep 17 00:00:00 2001 From: Daniel Song Date: Mon, 30 Mar 2026 02:04:42 -0700 Subject: [PATCH 1/2] fix: implement set_timezone_to_utc() for BigQuery and ClickHouse MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - BigQuery: use SET @@time_zone = 'UTC' session variable - ClickHouse: use SET session_timezone = 'UTC' - Elevate _connect.py timezone warning from DEBUG to WARNING level Previously, when these databases couldn't set timezone to UTC, it was silently logged at DEBUG level. For a data comparison tool, timezone mismatches can produce incorrect diff results — users need to know. Closes #29 Co-Authored-By: Claude Opus 4.6 --- data_diff/databases/_connect.py | 2 +- data_diff/databases/bigquery.py | 2 +- data_diff/databases/clickhouse.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/data_diff/databases/_connect.py b/data_diff/databases/_connect.py index 31ed9329..7dc11e09 100644 --- a/data_diff/databases/_connect.py +++ b/data_diff/databases/_connect.py @@ -298,7 +298,7 @@ def _connection_created(self, db): try: db.query(db.dialect.set_timezone_to_utc()) except NotImplementedError: - logging.debug( + logging.warning( f"Database '{db}' does not allow setting timezone. We recommend making sure it's set to 'UTC'." ) return db diff --git a/data_diff/databases/bigquery.py b/data_diff/databases/bigquery.py index d518aa7c..de630ef8 100644 --- a/data_diff/databases/bigquery.py +++ b/data_diff/databases/bigquery.py @@ -152,7 +152,7 @@ def to_comparable(self, value: str, coltype: ColType) -> str: return super().to_comparable(value, coltype) def set_timezone_to_utc(self) -> str: - raise NotImplementedError() + return "SET @@time_zone = 'UTC'" def parse_table_name(self, name: str) -> DbPath: path = parse_table_name(name) diff --git a/data_diff/databases/clickhouse.py b/data_diff/databases/clickhouse.py index d26ca93b..3d042aad 100644 --- a/data_diff/databases/clickhouse.py +++ b/data_diff/databases/clickhouse.py @@ -96,7 +96,7 @@ def parse_type(self, table_path: DbPath, info: RawColumnInfo) -> ColType: # return f"'{str(t)[:19]}'" def set_timezone_to_utc(self) -> str: - raise NotImplementedError() + return "SET session_timezone = 'UTC'" def current_timestamp(self) -> str: return "now()" From df0cfa2142a9967234df9b638f06b2123e959d0e Mon Sep 17 00:00:00 2001 From: Daniel Song Date: Mon, 30 Mar 2026 02:14:53 -0700 Subject: [PATCH 2/2] fix: revert BigQuery set_timezone_to_utc to NotImplementedError MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit BigQuery uses stateless REST jobs — SET @@time_zone only affects the single job it runs in, not subsequent queries. The elevated WARNING in _connect.py will inform users. BigQuery stores timestamps in UTC internally, so this is safe. Co-Authored-By: Claude Opus 4.6 --- data_diff/databases/bigquery.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/data_diff/databases/bigquery.py b/data_diff/databases/bigquery.py index de630ef8..6d13b78c 100644 --- a/data_diff/databases/bigquery.py +++ b/data_diff/databases/bigquery.py @@ -152,7 +152,9 @@ def to_comparable(self, value: str, coltype: ColType) -> str: return super().to_comparable(value, coltype) def set_timezone_to_utc(self) -> str: - return "SET @@time_zone = 'UTC'" + # BigQuery uses stateless REST jobs; SET @@time_zone only affects a single job, + # not subsequent queries. BigQuery stores timestamps in UTC internally. + raise NotImplementedError("BigQuery uses stateless jobs; session SET has no cross-query effect.") def parse_table_name(self, name: str) -> DbPath: path = parse_table_name(name)