Skip to content
This repository was archived by the owner on May 17, 2024. It is now read-only.

Commit 16f4ee0

Browse files
authored
Merge pull request #274 from datafold/nov2_better_tests
Tests: Improvements to CI flow + fixes
2 parents 735f523 + f216ecb commit 16f4ee0

File tree

8 files changed

+102
-22
lines changed

8 files changed

+102
-22
lines changed

.github/workflows/ci.yml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
name: CI
1+
name: CI-COVER-VERSIONS
22

33
on:
44
push:
@@ -46,8 +46,9 @@ jobs:
4646
env:
4747
DATADIFF_SNOWFLAKE_URI: '${{ secrets.DATADIFF_SNOWFLAKE_URI }}'
4848
DATADIFF_PRESTO_URI: '${{ secrets.DATADIFF_PRESTO_URI }}'
49+
DATADIFF_TRINO_URI: '${{ secrets.DATADIFF_TRINO_URI }}'
4950
DATADIFF_CLICKHOUSE_URI: 'clickhouse://clickhouse:Password1@localhost:9000/clickhouse'
5051
DATADIFF_VERTICA_URI: 'vertica://vertica:Password1@localhost:5433/vertica'
5152
run: |
5253
chmod +x tests/waiting_for_stack_up.sh
53-
./tests/waiting_for_stack_up.sh && poetry run unittest-parallel -j 16
54+
./tests/waiting_for_stack_up.sh && TEST_ACROSS_ALL_DBS=0 poetry run unittest-parallel -j 16

.github/workflows/ci_full.yml

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
name: CI-COVER-DATABASES
2+
3+
on:
4+
push:
5+
paths:
6+
- '**.py'
7+
- '.github/workflows/**'
8+
- '!dev/**'
9+
pull_request:
10+
branches: [ master ]
11+
12+
workflow_dispatch:
13+
14+
jobs:
15+
unit_tests:
16+
strategy:
17+
fail-fast: false
18+
matrix:
19+
os: [ubuntu-latest]
20+
python-version:
21+
- "3.10"
22+
23+
name: Check Python ${{ matrix.python-version }} on ${{ matrix.os }}
24+
runs-on: ${{ matrix.os }}
25+
steps:
26+
- uses: actions/checkout@v3
27+
28+
- name: Setup Python ${{ matrix.python-version }}
29+
uses: actions/setup-python@v3
30+
with:
31+
python-version: ${{ matrix.python-version }}
32+
33+
- name: Build the stack
34+
run: docker-compose up -d mysql postgres presto trino clickhouse vertica
35+
36+
- name: Install Poetry
37+
run: pip install poetry
38+
39+
- name: Install package
40+
run: "poetry install"
41+
42+
- name: Run unit tests
43+
env:
44+
DATADIFF_SNOWFLAKE_URI: '${{ secrets.DATADIFF_SNOWFLAKE_URI }}'
45+
DATADIFF_PRESTO_URI: '${{ secrets.DATADIFF_PRESTO_URI }}'
46+
DATADIFF_CLICKHOUSE_URI: 'clickhouse://clickhouse:Password1@localhost:9000/clickhouse'
47+
DATADIFF_VERTICA_URI: 'vertica://vertica:Password1@localhost:5433/vertica'
48+
run: |
49+
chmod +x tests/waiting_for_stack_up.sh
50+
./tests/waiting_for_stack_up.sh && TEST_ACROSS_ALL_DBS=full poetry run unittest-parallel -j 16

data_diff/databases/base.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,9 @@ def query(self, sql_ast: Union[Expr, Generator], res_type: type = list):
267267
if res is None: # May happen due to sum() of 0 items
268268
return None
269269
return int(res)
270+
elif res_type is datetime:
271+
res = _one(_one(res))
272+
return res # XXX parse timestamp?
270273
elif res_type is tuple:
271274
assert len(res) == 1, (sql_code, res)
272275
return res[0]

data_diff/databases/clickhouse.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,6 @@ class Dialect(BaseDialect):
5555
"DateTime64": Timestamp,
5656
}
5757

58-
5958
def normalize_number(self, value: str, coltype: FractionalType) -> str:
6059
# If a decimal value has trailing zeros in a fractional part, when casting to string they are dropped.
6160
# For example:

data_diff/databases/oracle.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -127,9 +127,7 @@ def parse_type(
127127
precision = int(m.group(1))
128128
return t_cls(precision=precision, rounds=self.ROUNDS_ON_PREC_LOSS)
129129

130-
return super().parse_type(
131-
table_path, col_name, type_repr, datetime_precision, numeric_precision, numeric_scale
132-
)
130+
return super().parse_type(table_path, col_name, type_repr, datetime_precision, numeric_precision, numeric_scale)
133131

134132

135133
class Oracle(ThreadedDatabase):

tests/common.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
N_SAMPLES = int(os.environ.get("N_SAMPLES", DEFAULT_N_SAMPLES))
3737
BENCHMARK = os.environ.get("BENCHMARK", False)
3838
N_THREADS = int(os.environ.get("N_THREADS", 1))
39+
TEST_ACROSS_ALL_DBS = os.environ.get("TEST_ACROSS_ALL_DBS", True) # Should we run the full db<->db test suite?
3940

4041

4142
def get_git_revision_short_hash() -> str:
@@ -94,6 +95,10 @@ def _print_used_dbs():
9495
logging.info(f"Testing databases: {', '.join(used)}")
9596
if unused:
9697
logging.info(f"Connection not configured; skipping tests for: {', '.join(unused)}")
98+
if TEST_ACROSS_ALL_DBS:
99+
logging.info(
100+
f"Full tests enabled (every db<->db). May take very long when many dbs are involved. ={TEST_ACROSS_ALL_DBS}"
101+
)
97102

98103

99104
_print_used_dbs()

tests/test_cli.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,10 @@ def setUp(self) -> None:
3737

3838
src_table = table(table_src_name, schema={"id": int, "datetime": datetime, "text_comment": str})
3939
self.conn.query(src_table.create())
40-
self.now = now = arrow.get(datetime.now())
40+
41+
self.conn.query("SET @@session.time_zone='+00:00'")
42+
db_time = self.conn.query("select now()", datetime)
43+
self.now = now = arrow.get(db_time)
4144

4245
rows = [
4346
(now, "now"),

tests/test_database_types.py

Lines changed: 36 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
N_THREADS,
2626
BENCHMARK,
2727
GIT_REVISION,
28+
TEST_ACROSS_ALL_DBS,
2829
get_conn,
2930
random_table_suffix,
3031
)
@@ -418,22 +419,42 @@ def __iter__(self):
418419
"uuid": UUID_Faker(N_SAMPLES),
419420
}
420421

422+
423+
def _get_test_db_pairs():
424+
if str(TEST_ACROSS_ALL_DBS).lower() == "full":
425+
for source_db in DATABASE_TYPES:
426+
for target_db in DATABASE_TYPES:
427+
yield source_db, target_db
428+
elif int(TEST_ACROSS_ALL_DBS):
429+
for db_cls in DATABASE_TYPES:
430+
yield db_cls, db.PostgreSQL
431+
yield db.PostgreSQL, db_cls
432+
yield db_cls, db.Snowflake
433+
yield db.Snowflake, db_cls
434+
else:
435+
yield db.PostgreSQL, db.PostgreSQL
436+
437+
438+
def get_test_db_pairs():
439+
active_pairs = {(db1, db2) for db1, db2 in _get_test_db_pairs() if db1 in CONN_STRINGS and db2 in CONN_STRINGS}
440+
for db1, db2 in active_pairs:
441+
yield db1, DATABASE_TYPES[db1], db2, DATABASE_TYPES[db2]
442+
443+
421444
type_pairs = []
422-
for source_db, source_type_categories in DATABASE_TYPES.items():
423-
for target_db, target_type_categories in DATABASE_TYPES.items():
424-
if CONN_STRINGS.get(source_db, False) and CONN_STRINGS.get(target_db, False):
425-
for type_category, source_types in source_type_categories.items(): # int, datetime, ..
426-
for source_type in source_types:
427-
for target_type in target_type_categories[type_category]:
428-
type_pairs.append(
429-
(
430-
source_db,
431-
target_db,
432-
source_type,
433-
target_type,
434-
type_category,
435-
)
436-
)
445+
for source_db, source_type_categories, target_db, target_type_categories in get_test_db_pairs():
446+
for type_category, source_types in source_type_categories.items(): # int, datetime, ..
447+
for source_type in source_types:
448+
for target_type in target_type_categories[type_category]:
449+
type_pairs.append(
450+
(
451+
source_db,
452+
target_db,
453+
source_type,
454+
target_type,
455+
type_category,
456+
)
457+
)
437458

438459

439460
def sanitize(name):

0 commit comments

Comments
 (0)