Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions pipeline/tests/test_validate_driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,3 +223,36 @@ def multi_drift(samples, **kwargs):
data = json.loads(report_path.read_text())
assert result == 1
assert len(data["drift_records"]) == data["sample_size"]



def test_driver_skips_listed_shard(tmp_path: Path) -> None:
"""Shards in SKIP_REVALIDATION are skipped — revalidator is not called."""
from validate.driver import SKIP_REVALIDATION

db = _make_minimal_shard(tmp_path)
report_path = tmp_path / "report.json"

called = False

def must_not_be_called(samples, **kwargs):
nonlocal called
called = True
return [], []

skipped_shard = next(iter(SKIP_REVALIDATION))
result = run_validation(
shard=skipped_shard,
shard_db=db,
budget=5,
report=report_path,
revalidator=must_not_be_called,
seed=42,
)
assert result == 0
assert not called
data = json.loads(report_path.read_text())
assert data["exit"] == "skipped"
assert data["skip_reason"]
assert data["sample_size"] == 0
assert data["drift_records"] == []
36 changes: 35 additions & 1 deletion pipeline/validate/driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,22 @@
logger = logging.getLogger(__name__)


# Shards where the upstream API can't be compared one-to-one against catalog
# rows: catalog stores synthesized values (e.g. gcp-gce machine totals built
# from per-vCPU + per-GiB component prices) while the upstream API exposes the
# components. Listing here makes the validator skip revalidation and emit a
# pass with the reason recorded in the report.
SKIP_REVALIDATION: dict[str, str] = {
"gcp-gce": (
"ingest synthesizes machine totals from per-vCPU and per-GiB "
"component SKUs (see pipeline/ingest/gcp_gce.py); validator "
"compares against the raw component unitPrice, producing "
"false-positive drift. Re-enable once a sidecar or component-aware "
"comparison lands."
),
}


# ---------------------------------------------------------------------------
# Types
# ---------------------------------------------------------------------------
Expand All @@ -52,7 +68,8 @@ class ValidationReport:
drift_records: list[dict]
missing_upstream: list[str]
vantage_drift: list[dict]
exit: str # "pass" | "fail"
exit: str # "pass" | "fail" | "skipped"
skip_reason: str | None = None

def as_dict(self) -> dict:
return dataclasses.asdict(self)
Expand Down Expand Up @@ -123,6 +140,23 @@ def run_validation(
int
0 on pass, 1 on fail.
"""
if shard in SKIP_REVALIDATION:
reason = SKIP_REVALIDATION[shard]
logger.info("Skipping revalidation for %s: %s", shard, reason)
report_data = ValidationReport(
shard=shard,
generated_at=datetime.now(UTC).isoformat(),
sample_size=0,
drift_records=[],
missing_upstream=[],
vantage_drift=[],
exit="skipped",
skip_reason=reason,
)
report.parent.mkdir(parents=True, exist_ok=True)
report.write_text(json.dumps(report_data.as_dict(), indent=2))
return 0

if revalidator is None:
revalidator = _default_revalidator(shard)

Expand Down
Loading