From 7d62aedf23b3bea77fe81fba6e933438c275cf8c Mon Sep 17 00:00:00 2001 From: Daniel Song Date: Mon, 30 Mar 2026 02:04:54 -0700 Subject: [PATCH 1/2] fix: MsSQL timezone-safe timestamp normalization via AT TIME ZONE Wrap MsSQL normalize_timestamp() with explicit UTC conversion using CAST(value AS DATETIMEOFFSET) AT TIME ZONE 'UTC'. MsSQL cannot set a session timezone, so we normalize explicitly in the SQL to ensure consistent cross-database timestamp comparisons. Closes #30 Co-Authored-By: Claude Opus 4.6 --- data_diff/databases/mssql.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/data_diff/databases/mssql.py b/data_diff/databases/mssql.py index d84eab94..3e04226e 100644 --- a/data_diff/databases/mssql.py +++ b/data_diff/databases/mssql.py @@ -138,13 +138,18 @@ def constant_values(self, rows) -> str: return f"VALUES {values}" def normalize_timestamp(self, value: str, coltype: TemporalType) -> str: + # Convert to UTC to ensure consistent cross-database comparisons. + # MsSQL cannot set a session timezone, so we normalize explicitly. + # Cast to datetimeoffset (assumes server local time), then convert to UTC. + utc_value = f"CAST({value} AS DATETIMEOFFSET) AT TIME ZONE 'UTC'" + if coltype.precision > 0: formatted_value = ( - f"FORMAT({value}, 'yyyy-MM-dd HH:mm:ss') + '.' + " - f"SUBSTRING(FORMAT({value}, 'fffffff'), 1, {coltype.precision})" + f"FORMAT({utc_value}, 'yyyy-MM-dd HH:mm:ss') + '.' + " + f"SUBSTRING(FORMAT({utc_value}, 'fffffff'), 1, {coltype.precision})" ) else: - formatted_value = f"FORMAT({value}, 'yyyy-MM-dd HH:mm:ss')" + formatted_value = f"FORMAT({utc_value}, 'yyyy-MM-dd HH:mm:ss')" return formatted_value From 71740cb24d899a63ad30e665e4bed569a6f271e4 Mon Sep 17 00:00:00 2001 From: Daniel Song Date: Mon, 30 Mar 2026 02:15:27 -0700 Subject: [PATCH 2/2] fix: only apply UTC conversion for TimestampTZ columns in MsSQL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CAST(datetime AS DATETIMEOFFSET) assigns +00:00 unconditionally, not the server's local timezone. UTC conversion only helps for datetimeoffset columns (TimestampTZ). For timezone-naive datetime/datetime2 columns, the conversion is a no-op — values are used as-is since the source timezone is unknown. Co-Authored-By: Claude Opus 4.6 --- data_diff/databases/mssql.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/data_diff/databases/mssql.py b/data_diff/databases/mssql.py index 3e04226e..fc0b8f58 100644 --- a/data_diff/databases/mssql.py +++ b/data_diff/databases/mssql.py @@ -138,18 +138,20 @@ def constant_values(self, rows) -> str: return f"VALUES {values}" def normalize_timestamp(self, value: str, coltype: TemporalType) -> str: - # Convert to UTC to ensure consistent cross-database comparisons. - # MsSQL cannot set a session timezone, so we normalize explicitly. - # Cast to datetimeoffset (assumes server local time), then convert to UTC. - utc_value = f"CAST({value} AS DATETIMEOFFSET) AT TIME ZONE 'UTC'" + # For timezone-aware columns (datetimeoffset), convert to UTC explicitly + # since MsSQL cannot set a session timezone. + # For timezone-naive columns (datetime/datetime2), no conversion is possible + # without knowing the source timezone — values are used as-is. + if isinstance(coltype, TimestampTZ): + value = f"{value} AT TIME ZONE 'UTC'" if coltype.precision > 0: formatted_value = ( - f"FORMAT({utc_value}, 'yyyy-MM-dd HH:mm:ss') + '.' + " - f"SUBSTRING(FORMAT({utc_value}, 'fffffff'), 1, {coltype.precision})" + f"FORMAT({value}, 'yyyy-MM-dd HH:mm:ss') + '.' + " + f"SUBSTRING(FORMAT({value}, 'fffffff'), 1, {coltype.precision})" ) else: - formatted_value = f"FORMAT({utc_value}, 'yyyy-MM-dd HH:mm:ss')" + formatted_value = f"FORMAT({value}, 'yyyy-MM-dd HH:mm:ss')" return formatted_value