Skip to content
This repository was archived by the owner on May 17, 2024. It is now read-only.

Commit 2d5db0f

Browse files
authored
Merge pull request #541 from dlawin/issue_479_2
output cols added/removed/types changed
2 parents e0060e1 + 4241471 commit 2d5db0f

File tree

3 files changed

+46
-13
lines changed

3 files changed

+46
-13
lines changed

data_diff/cloud/datafold_api.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,8 @@ class TSummaryResultSchemaStats(pydantic.BaseModel):
134134
column_type_mismatches: int
135135
column_reorders: int
136136
column_counts: Tuple[int, int]
137+
column_type_differs: List[str]
138+
exclusive_columns: Tuple[List[str], List[str]]
137139

138140

139141
class TCloudApiDataDiffSummaryResult(pydantic.BaseModel):

data_diff/dbt.py

Lines changed: 29 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,13 @@
66
from rich.prompt import Confirm
77

88
from typing import List, Optional, Dict
9-
from .utils import dbt_diff_string_template, getLogger
9+
from .utils import (
10+
dbt_diff_string_template,
11+
getLogger,
12+
columns_added_template,
13+
columns_removed_template,
14+
no_differences_template,
15+
)
1016
from pathlib import Path
1117

1218
import keyring
@@ -175,7 +181,6 @@ def _get_diff_vars(
175181

176182

177183
def _local_diff(diff_vars: TDiffVars) -> None:
178-
column_diffs_str = ""
179184
dev_qualified_str = ".".join(diff_vars.dev_path)
180185
prod_qualified_str = ".".join(diff_vars.prod_path)
181186
diff_output_str = _diff_output_base(dev_qualified_str, prod_qualified_str)
@@ -196,14 +201,14 @@ def _local_diff(diff_vars: TDiffVars) -> None:
196201
return
197202

198203
column_set = set(table1_columns).intersection(table2_columns)
199-
table1_diff = set(table1_columns).difference(table2_columns)
200-
table2_diff = set(table2_columns).difference(table1_columns)
204+
columns_added = set(table1_columns).difference(table2_columns)
205+
columns_removed = set(table2_columns).difference(table1_columns)
201206

202-
if table1_diff:
203-
column_diffs_str += f"Column(s) added: {table1_diff}\n"
207+
if columns_added:
208+
diff_output_str += columns_added_template(columns_added)
204209

205-
if table2_diff:
206-
column_diffs_str += f"Column(s) removed: {table2_diff}\n"
210+
if columns_removed:
211+
diff_output_str += columns_removed_template(columns_removed)
207212

208213
column_set = column_set - set(diff_vars.primary_keys)
209214

@@ -225,10 +230,10 @@ def _local_diff(diff_vars: TDiffVars) -> None:
225230
)
226231

227232
if list(diff):
228-
diff_output_str += f"{column_diffs_str}{diff.get_stats_string(is_dbt=True)} \n"
233+
diff_output_str += f"{diff.get_stats_string(is_dbt=True)} \n"
229234
rich.print(diff_output_str)
230235
else:
231-
diff_output_str += f"{column_diffs_str}[bold][green]No row differences[/][/] \n"
236+
diff_output_str += no_differences_template()
232237
rich.print(diff_output_str)
233238

234239

@@ -304,6 +309,18 @@ def _cloud_diff(diff_vars: TDiffVars, datasource_id: int, api: DatafoldAPI) -> N
304309
diff_percent_list = {
305310
x.column_name: str(x.match) + "%" for x in diff_results.values.columns_diff_stats if x.match != 100.0
306311
}
312+
columns_added = diff_results.schema_.exclusive_columns[1]
313+
columns_removed = diff_results.schema_.exclusive_columns[0]
314+
column_type_changes = diff_results.schema_.column_type_differs
315+
316+
if columns_added:
317+
diff_output_str += columns_added_template(columns_added)
318+
319+
if columns_removed:
320+
diff_output_str += columns_removed_template(columns_removed)
321+
322+
if column_type_changes:
323+
diff_output_str += "Type change: " + str(column_type_changes) + "\n"
307324

308325
if any([rows_added_count, rows_removed_count, rows_updated]):
309326
diff_output = dbt_diff_string_template(
@@ -314,10 +331,10 @@ def _cloud_diff(diff_vars: TDiffVars, datasource_id: int, api: DatafoldAPI) -> N
314331
diff_percent_list,
315332
"Value Match Percent:",
316333
)
317-
diff_output_str += f"{diff_url}\n {diff_output} \n"
334+
diff_output_str += f"\n{diff_url}\n {diff_output} \n"
318335
rich.print(diff_output_str)
319336
else:
320-
diff_output_str += f"{diff_url}\n [green]No row differences[/] \n"
337+
diff_output_str += f"\n{diff_url}\n{no_differences_template()}\n"
321338
rich.print(diff_output_str)
322339

323340
except BaseException as ex: # Catch KeyboardInterrupt too

data_diff/utils.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,7 @@ def _jsons_equiv(a: str, b: str):
156156

157157
def diffs_are_equiv_jsons(diff: list, json_cols: dict):
158158
overriden_diff_cols = set()
159-
if (len(diff) != 2) or ({diff[0][0], diff[1][0]} != {'+', '-'}):
159+
if (len(diff) != 2) or ({diff[0][0], diff[1][0]} != {"+", "-"}):
160160
return False, overriden_diff_cols
161161
match = True
162162
for i, (col_a, col_b) in enumerate(safezip(diff[0][1][1:], diff[1][1][1:])): # index 0 is extra_columns first elem
@@ -169,3 +169,17 @@ def diffs_are_equiv_jsons(diff: list, json_cols: dict):
169169
if not match:
170170
break
171171
return match, overriden_diff_cols
172+
173+
174+
def columns_removed_template(table2_set_diff) -> str:
175+
columns_removed = "Column(s) removed: " + str(table2_set_diff) + "\n"
176+
return columns_removed
177+
178+
179+
def columns_added_template(table1_set_diff) -> str:
180+
columns_added = "Column(s) added: " + str(table1_set_diff) + "\n"
181+
return columns_added
182+
183+
184+
def no_differences_template() -> str:
185+
return "[bold][green]No row differences[/][/]\n"

0 commit comments

Comments
 (0)