diff --git a/src/ContentProcessor/src/libs/pipeline/handlers/save_handler.py b/src/ContentProcessor/src/libs/pipeline/handlers/save_handler.py index 15c90f56..15f8b878 100644 --- a/src/ContentProcessor/src/libs/pipeline/handlers/save_handler.py +++ b/src/ContentProcessor/src/libs/pipeline/handlers/save_handler.py @@ -112,20 +112,14 @@ def find_process_result(step_name: str): ) ) - total_evaluated_fields_count = evaluated_result.confidence.get( - "total_evaluated_fields_count", 0 - ) - schema_score = ( - 0 - if total_evaluated_fields_count == 0 - else round( - ( - len(evaluated_result.comparison_result.items) - - evaluated_result.confidence["zero_confidence_fields_count"] - ) - / len(evaluated_result.comparison_result.items), - 3, - ) + # Compute the aggregate scores. Successful (Completed) processing + # always yields numeric scores: when probabilistic confidence is + # available (logprobs from non-reasoning models / Content Understanding + # signal) we use it; otherwise we fall back to a structural + # completeness score (fraction of expected fields actually filled). + # Failed runs and genuinely empty extractions remain at ``0.0``. + entity_score, schema_score, min_extracted_entity_score = ( + self._derive_aggregate_scores(evaluated_result) ) processed_result = ContentProcess( @@ -143,11 +137,9 @@ def find_process_result(step_name: str): self._current_message_context.data_pipeline.pipeline_status.creation_time, "%Y-%m-%dT%H:%M:%S.%fZ", ), - entity_score=evaluated_result.confidence["overall_confidence"], + entity_score=entity_score, schema_score=schema_score, - min_extracted_entity_score=evaluated_result.confidence[ - "min_extracted_field_confidence" - ], + min_extracted_entity_score=min_extracted_entity_score, prompt_tokens=evaluated_result.prompt_tokens, completion_tokens=evaluated_result.completion_tokens, target_schema=Schema.get_schema( @@ -241,3 +233,85 @@ def _summarize_processed_time(self, step_results: list[StepResult]) -> str: # Format the total elapsed time as a string formatted_elapsed_time = f"{total_hours:02}:{total_minutes:02}:{total_seconds:02}.{total_milliseconds:03}" return formatted_elapsed_time + + @staticmethod + def _is_filled_value(value: object) -> bool: + """Heuristic: does an extracted value count as "actually filled"? + + Treats ``None``, empty strings, whitespace-only strings, and empty + containers as *not* filled. Recursively descends into dicts/lists so a + nested object that contains only nulls is still counted as empty. + """ + if value is None: + return False + if isinstance(value, bool): + return True + if isinstance(value, str): + return value.strip() != "" + if isinstance(value, dict): + return any(SaveHandler._is_filled_value(v) for v in value.values()) + if isinstance(value, (list, tuple, set)): + return any(SaveHandler._is_filled_value(v) for v in value) + return True + + @staticmethod + def _derive_aggregate_scores( + evaluated_result: DataExtractionResult, + ) -> tuple[float, float, float]: + """Compute ``(entity_score, schema_score, min_extracted_entity_score)``. + + Score selection order: + + 1. **Probabilistic confidence** — when the evaluate step produced + per-field confidence (``total_evaluated_fields_count > 0``), use the + probabilistic ``overall_confidence`` plus the ratio of + above-threshold fields. This is the highest-fidelity signal. + + 2. **Structural completeness fallback** — when no probabilistic + signal was produced (e.g. reasoning models like ``gpt-5``/``o1``/``o3`` + don't return logprobs, and image-only flow has no Content + Understanding signal), but extraction still produced a comparison + table, score by *how much of the schema was actually filled*. This + replaces the old behaviour of falsely emitting ``0%`` for completed + runs that simply lacked logprobs. + + 3. **Zero** — only when there is literally no extraction data + (failed pipeline / genuinely empty result). Failed processing + continues to surface as ``0`` so the UI consistently renders + ``0%`` for failures and genuine zeros. + """ + confidence = evaluated_result.confidence or {} + total_evaluated_fields_count = confidence.get( + "total_evaluated_fields_count", 0 + ) + comparison_items = ( + evaluated_result.comparison_result.items + if evaluated_result.comparison_result is not None + else [] + ) + + # Path 1: probabilistic confidence + if total_evaluated_fields_count > 0 and comparison_items: + zero_count = confidence.get("zero_confidence_fields_count", 0) + schema_score = round( + (len(comparison_items) - zero_count) / len(comparison_items), + 3, + ) + entity_score = float(confidence.get("overall_confidence") or 0.0) + min_extracted_entity_score = float( + confidence.get("min_extracted_field_confidence") or 0.0 + ) + return (entity_score, schema_score, min_extracted_entity_score) + + # Path 2: structural completeness fallback + if comparison_items: + filled = sum( + 1 + for item in comparison_items + if SaveHandler._is_filled_value(item.Extracted) + ) + ratio = round(filled / len(comparison_items), 3) + return (ratio, ratio, ratio) + + # Path 3: nothing to score on + return (0.0, 0.0, 0.0) diff --git a/src/ContentProcessor/src/libs/utils/azure_credential_utils.py b/src/ContentProcessor/src/libs/utils/azure_credential_utils.py index 5d711e85..3344379c 100644 --- a/src/ContentProcessor/src/libs/utils/azure_credential_utils.py +++ b/src/ContentProcessor/src/libs/utils/azure_credential_utils.py @@ -19,7 +19,6 @@ from azure.identity import ( AzureCliCredential, AzureDeveloperCliCredential, - DefaultAzureCredential, ManagedIdentityCredential, ) from azure.identity import ( diff --git a/src/ContentProcessor/src/libs/utils/credential_util.py b/src/ContentProcessor/src/libs/utils/credential_util.py index 1efcaab7..791ab42c 100644 --- a/src/ContentProcessor/src/libs/utils/credential_util.py +++ b/src/ContentProcessor/src/libs/utils/credential_util.py @@ -19,7 +19,6 @@ from azure.identity import ( AzureCliCredential, AzureDeveloperCliCredential, - DefaultAzureCredential, ManagedIdentityCredential, ) from azure.identity import ( diff --git a/src/ContentProcessor/tests/unit/pipeline/test_save_handler_scores.py b/src/ContentProcessor/tests/unit/pipeline/test_save_handler_scores.py new file mode 100644 index 00000000..be9649d6 --- /dev/null +++ b/src/ContentProcessor/tests/unit/pipeline/test_save_handler_scores.py @@ -0,0 +1,236 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +"""Tests for ``SaveHandler._derive_aggregate_scores``. + +Covers the score-derivation contract: +- probabilistic confidence flows through verbatim when available +- structural completeness fallback fires for Completed runs without logprobs + (e.g. reasoning models / image-only flow) instead of emitting a misleading 0% +- a genuine zero is preserved as ``0.0`` +- failed/empty runs return ``0.0`` +""" + +from __future__ import annotations + +from libs.pipeline.handlers.logics.evaluate_handler.comparison import ( + ExtractionComparisonData, + ExtractionComparisonItem, +) +from libs.pipeline.handlers.logics.evaluate_handler.model import DataExtractionResult +from libs.pipeline.handlers.save_handler import SaveHandler + + +def _make_result( + *, + items: list[ExtractionComparisonItem], + confidence: dict, +) -> DataExtractionResult: + return DataExtractionResult( + extracted_result={}, + confidence=confidence, + comparison_result=ExtractionComparisonData(items=items), + prompt_tokens=0, + completion_tokens=0, + execution_time=0, + ) + + +class TestProbabilisticPath: + def test_valid_scores_flow_through(self): + """A normal evaluate-step result must produce numeric scores.""" + items = [ + ExtractionComparisonItem( + Field="a", Extracted="x", Confidence="90.00%", IsAboveThreshold="True" + ), + ExtractionComparisonItem( + Field="b", Extracted="y", Confidence="80.00%", IsAboveThreshold="True" + ), + ExtractionComparisonItem( + Field="c", Extracted="z", Confidence="0.00%", IsAboveThreshold="False" + ), + ] + confidence = { + "total_evaluated_fields_count": 3, + "overall_confidence": 0.567, + "min_extracted_field_confidence": 0.0, + "zero_confidence_fields_count": 1, + } + entity, schema, min_score = SaveHandler._derive_aggregate_scores( + _make_result(items=items, confidence=confidence) + ) + assert entity == 0.567 + # 2 of 3 fields above threshold → 0.667 + assert schema == round(2 / 3, 3) + assert min_score == 0.0 + + def test_all_fields_above_threshold(self): + items = [ + ExtractionComparisonItem( + Field="a", Extracted="x", Confidence="95.00%", IsAboveThreshold="True" + ), + ExtractionComparisonItem( + Field="b", Extracted="y", Confidence="90.00%", IsAboveThreshold="True" + ), + ] + confidence = { + "total_evaluated_fields_count": 2, + "overall_confidence": 0.925, + "min_extracted_field_confidence": 0.9, + "zero_confidence_fields_count": 0, + } + entity, schema, min_score = SaveHandler._derive_aggregate_scores( + _make_result(items=items, confidence=confidence) + ) + assert entity == 0.925 + assert schema == 1.0 + assert min_score == 0.9 + + +class TestStructuralFallback: + """When logprobs are unavailable (reasoning model / image-only) but + extraction succeeded, the Completed file must still get a meaningful + numeric score based on schema completeness.""" + + def test_all_fields_filled_yields_one(self): + items = [ + ExtractionComparisonItem( + Field="a", Extracted="x", Confidence="0.00%", IsAboveThreshold="False" + ), + ExtractionComparisonItem( + Field="b", Extracted="y", Confidence="0.00%", IsAboveThreshold="False" + ), + ExtractionComparisonItem( + Field="c", Extracted=42, Confidence="0.00%", IsAboveThreshold="False" + ), + ] + # No probabilistic signal: total_evaluated_fields_count == 0 + confidence = { + "total_evaluated_fields_count": 0, + "overall_confidence": 0.0, + "min_extracted_field_confidence": 0.0, + "zero_confidence_fields_count": 0, + } + entity, schema, min_score = SaveHandler._derive_aggregate_scores( + _make_result(items=items, confidence=confidence) + ) + assert entity == 1.0 + assert schema == 1.0 + assert min_score == 1.0 + + def test_partial_fill_yields_ratio(self): + items = [ + ExtractionComparisonItem( + Field="a", Extracted="x", Confidence="0.00%", IsAboveThreshold="False" + ), + ExtractionComparisonItem( + Field="b", Extracted=None, Confidence="0.00%", IsAboveThreshold="False" + ), + ExtractionComparisonItem( + Field="c", Extracted="", Confidence="0.00%", IsAboveThreshold="False" + ), + ExtractionComparisonItem( + Field="d", Extracted="z", Confidence="0.00%", IsAboveThreshold="False" + ), + ] + confidence = {"total_evaluated_fields_count": 0} + entity, schema, min_score = SaveHandler._derive_aggregate_scores( + _make_result(items=items, confidence=confidence) + ) + # 2 of 4 fields actually filled → 0.5 + assert entity == 0.5 + assert schema == 0.5 + assert min_score == 0.5 + + def test_all_fields_empty_yields_zero(self): + """Genuine-empty extraction: structural fallback collapses to ``0.0``.""" + items = [ + ExtractionComparisonItem( + Field="a", Extracted=None, Confidence="0.00%", IsAboveThreshold="False" + ), + ExtractionComparisonItem( + Field="b", Extracted="", Confidence="0.00%", IsAboveThreshold="False" + ), + ExtractionComparisonItem( + Field="c", Extracted=" ", Confidence="0.00%", IsAboveThreshold="False" + ), + ] + confidence = {"total_evaluated_fields_count": 0} + entity, schema, min_score = SaveHandler._derive_aggregate_scores( + _make_result(items=items, confidence=confidence) + ) + assert entity == 0.0 + assert schema == 0.0 + assert min_score == 0.0 + + +class TestZeroPath: + def test_no_comparison_items_returns_zero(self): + """No extraction data at all (failed pipeline) → ``0.0``.""" + confidence = { + "total_evaluated_fields_count": 0, + "overall_confidence": 0.0, + "min_extracted_field_confidence": 0.0, + "zero_confidence_fields_count": 0, + } + entity, schema, min_score = SaveHandler._derive_aggregate_scores( + _make_result(items=[], confidence=confidence) + ) + assert entity == 0.0 + assert schema == 0.0 + assert min_score == 0.0 + + def test_genuine_zero_probabilistic_score_preserved(self): + """A real ``0`` confidence (every field below threshold) must NOT be + replaced by the structural fallback — it's genuinely 0%.""" + items = [ + ExtractionComparisonItem( + Field="a", Extracted="x", Confidence="0.00%", IsAboveThreshold="False" + ), + ] + confidence = { + "total_evaluated_fields_count": 1, + "overall_confidence": 0.0, + "min_extracted_field_confidence": 0.0, + "zero_confidence_fields_count": 1, + } + entity, schema, min_score = SaveHandler._derive_aggregate_scores( + _make_result(items=items, confidence=confidence) + ) + assert entity == 0.0 + assert schema == 0.0 + assert min_score == 0.0 + + +class TestIsFilledValue: + """Coverage for the ``_is_filled_value`` helper used by the structural fallback.""" + + def test_none_is_empty(self): + assert SaveHandler._is_filled_value(None) is False + + def test_empty_string_is_empty(self): + assert SaveHandler._is_filled_value("") is False + assert SaveHandler._is_filled_value(" ") is False + + def test_non_empty_string_is_filled(self): + assert SaveHandler._is_filled_value("x") is True + + def test_zero_int_is_filled(self): + # A literal ``0`` is a valid extracted value (e.g. count fields). + assert SaveHandler._is_filled_value(0) is True + + def test_bool_is_filled(self): + assert SaveHandler._is_filled_value(False) is True + assert SaveHandler._is_filled_value(True) is True + + def test_empty_container_is_empty(self): + assert SaveHandler._is_filled_value([]) is False + assert SaveHandler._is_filled_value({}) is False + + def test_nested_all_null_is_empty(self): + assert SaveHandler._is_filled_value({"a": None, "b": ""}) is False + assert SaveHandler._is_filled_value([None, "", {"c": None}]) is False + + def test_nested_with_value_is_filled(self): + assert SaveHandler._is_filled_value({"a": None, "b": "x"}) is True + assert SaveHandler._is_filled_value([None, "x"]) is True diff --git a/src/ContentProcessorAPI/app/routers/models/contentprocessor/claim_process.py b/src/ContentProcessorAPI/app/routers/models/contentprocessor/claim_process.py index 22625476..75276839 100644 --- a/src/ContentProcessorAPI/app/routers/models/contentprocessor/claim_process.py +++ b/src/ContentProcessorAPI/app/routers/models/contentprocessor/claim_process.py @@ -54,11 +54,11 @@ class Content_Process(EntityBase): description="MIME type of the processed content file", default=None ) entity_score: float = Field( - description="Score indicating the quality of entity extraction from the content", + description="Score indicating the quality of entity extraction from the content. For Completed runs this is either the probabilistic confidence (when logprobs are available) or a structural completeness fallback (fraction of expected fields actually filled). Failed runs and genuinely empty extractions remain at ``0.0``.", default=0.0, ) schema_score: float = Field( - description="Score indicating the quality of schema matching for the content", + description="Score indicating the quality of schema matching for the content. For Completed runs this is either the probabilistic above-threshold ratio or a structural completeness fallback. Failed runs remain at ``0.0``.", default=0.0, ) status: Optional[str] = Field( diff --git a/src/ContentProcessorWeb/src/Pages/DefaultPage/Components/ProcessQueueGrid/ProcessQueueGrid.tsx b/src/ContentProcessorWeb/src/Pages/DefaultPage/Components/ProcessQueueGrid/ProcessQueueGrid.tsx index 0581b3ac..e9026ddb 100644 --- a/src/ContentProcessorWeb/src/Pages/DefaultPage/Components/ProcessQueueGrid/ProcessQueueGrid.tsx +++ b/src/ContentProcessorWeb/src/Pages/DefaultPage/Components/ProcessQueueGrid/ProcessQueueGrid.tsx @@ -373,7 +373,11 @@ const ProcessQueueGrid: React.FC = () => { @@ -382,7 +386,11 @@ const ProcessQueueGrid: React.FC = () => { diff --git a/src/ContentProcessorWeb/src/Pages/DefaultPage/Components/ProcessQueueGrid/ProcessQueueGridTypes.ts b/src/ContentProcessorWeb/src/Pages/DefaultPage/Components/ProcessQueueGrid/ProcessQueueGridTypes.ts index d441eb5a..3ffc2409 100644 --- a/src/ContentProcessorWeb/src/Pages/DefaultPage/Components/ProcessQueueGrid/ProcessQueueGridTypes.ts +++ b/src/ContentProcessorWeb/src/Pages/DefaultPage/Components/ProcessQueueGrid/ProcessQueueGridTypes.ts @@ -17,7 +17,7 @@ export interface ProcessedDocument { readonly file_name: string; /** MIME type of the document. */ readonly mime_type: string; - /** Entity extraction confidence score (0–1). */ + /** Entity extraction score (0–1). */ readonly entity_score: number; /** Schema compliance score (0–1). */ readonly schema_score: number; diff --git a/src/ContentProcessorWorkflow/src/repositories/model/claim_process.py b/src/ContentProcessorWorkflow/src/repositories/model/claim_process.py index 470a946c..75ce41ba 100644 --- a/src/ContentProcessorWorkflow/src/repositories/model/claim_process.py +++ b/src/ContentProcessorWorkflow/src/repositories/model/claim_process.py @@ -78,11 +78,11 @@ class Content_Process(EntityBase): description="MIME type of the processed content file", default=None ) entity_score: float = Field( - description="Score indicating the quality of entity extraction from the content", + description="Score indicating the quality of entity extraction (0.0–1.0). For Completed runs this is either probabilistic confidence (logprobs) or a structural completeness fallback. Failed runs remain at ``0.0``.", default=0.0, ) schema_score: float = Field( - description="Score indicating the quality of schema matching for the content", + description="Score indicating the quality of schema matching (0.0–1.0). Failed runs remain at ``0.0``.", default=0.0, ) status: Optional[str] = Field( diff --git a/src/ContentProcessorWorkflow/src/steps/document_process/executor/document_process_executor.py b/src/ContentProcessorWorkflow/src/steps/document_process/executor/document_process_executor.py index f131c1a2..68a81b97 100644 --- a/src/ContentProcessorWorkflow/src/steps/document_process/executor/document_process_executor.py +++ b/src/ContentProcessorWorkflow/src/steps/document_process/executor/document_process_executor.py @@ -242,8 +242,12 @@ async def _on_poll(poll_data: dict) -> None: status_text = poll_result.get("status", "Failed") - schema_score_f = 0.0 - entity_score_f = 0.0 + # Failed / not-yet-scored documents default to ``0.0``; + # save_handler always emits numeric scores for Completed + # runs (probabilistic if available, otherwise structural + # completeness fallback). + schema_score_f: float = 0.0 + entity_score_f: float = 0.0 processed_time = "" result_payload = None @@ -253,18 +257,22 @@ async def _on_poll(poll_data: dict) -> None: ) if isinstance(final_payload, dict): status_text = final_payload.get("status") or status_text - try: - schema_score_f = float( - final_payload.get("schema_score") or 0.0 - ) - except Exception: - schema_score_f = 0.0 - try: - entity_score_f = float( - final_payload.get("entity_score") or 0.0 - ) - except Exception: - entity_score_f = 0.0 + + def _coerce_score(value: object) -> float: + """Coerce a raw score payload to ``float`` (default ``0.0``).""" + if value is None: + return 0.0 + try: + return float(value) + except (TypeError, ValueError): + return 0.0 + + schema_score_f = _coerce_score( + final_payload.get("schema_score") + ) + entity_score_f = _coerce_score( + final_payload.get("entity_score") + ) try: processed_time = ( final_payload.get("processed_time") or "" diff --git a/src/ContentProcessorWorkflow/src/utils/credential_util.py b/src/ContentProcessorWorkflow/src/utils/credential_util.py index fbef0657..306fd180 100644 --- a/src/ContentProcessorWorkflow/src/utils/credential_util.py +++ b/src/ContentProcessorWorkflow/src/utils/credential_util.py @@ -19,7 +19,6 @@ from azure.identity import ( AzureCliCredential, AzureDeveloperCliCredential, - DefaultAzureCredential, ManagedIdentityCredential, ) from azure.identity import ( @@ -126,11 +125,11 @@ def get_azure_credential(): logging.info( "[AUTH] All CLI credentials failed - falling back to DefaultAzureCredential" ) - + raise RuntimeError( - "No Azure authentication available. " - "Use Managed Identity in Azure or run " - "'az login' / 'azd auth login' locally." + "No Azure authentication available. " + "Use Managed Identity in Azure or run " + "'az login' / 'azd auth login' locally." ) diff --git a/src/ContentProcessorWorkflow/tests/unit/repositories/test_claim_process_model.py b/src/ContentProcessorWorkflow/tests/unit/repositories/test_claim_process_model.py index 195b9b36..a970555a 100644 --- a/src/ContentProcessorWorkflow/tests/unit/repositories/test_claim_process_model.py +++ b/src/ContentProcessorWorkflow/tests/unit/repositories/test_claim_process_model.py @@ -42,6 +42,7 @@ def test_defaults(self): assert cp.process_id == "p1" assert cp.file_name == "doc.pdf" assert cp.mime_type is None + # Defaults stay at ``0.0`` so failed/pre-save records render as 0%. assert cp.entity_score == 0.0 assert cp.schema_score == 0.0 assert cp.status is None @@ -57,6 +58,28 @@ def test_explicit_scores(self): assert cp.entity_score == 0.95 assert cp.schema_score == 0.87 + def test_explicit_zero_score_preserved(self): + """A literal ``0`` is a real score and must survive round-trip.""" + cp = Content_Process( + process_id="p1", + file_name="doc.pdf", + entity_score=0.0, + schema_score=0.0, + ) + assert cp.entity_score == 0.0 + assert cp.schema_score == 0.0 + + def test_failed_processing_keeps_default_zero(self): + """A failed file uses the ``0.0`` default so the UI renders ``0%``.""" + cp = Content_Process( + process_id="p1", + file_name="doc.pdf", + status="Failed", + ) + assert cp.status == "Failed" + assert cp.entity_score == 0.0 + assert cp.schema_score == 0.0 + # ── Claim_Process ──────────────────────────────────────────────────────────── diff --git a/src/ContentProcessorWorkflow/tests/unit/services/test_content_process_models.py b/src/ContentProcessorWorkflow/tests/unit/services/test_content_process_models.py index 19765025..c853c2d4 100644 --- a/src/ContentProcessorWorkflow/tests/unit/services/test_content_process_models.py +++ b/src/ContentProcessorWorkflow/tests/unit/services/test_content_process_models.py @@ -152,6 +152,17 @@ def test_construction_with_defaults(self): assert rec.id == "r1" assert rec.process_id == "" assert rec.status is None + # Defaults stay at ``0.0`` so failed/pre-save records render as 0% + # in the UI; save_handler overwrites with a real numeric score for + # Completed runs. + assert rec.entity_score == 0.0 + assert rec.schema_score == 0.0 + + def test_explicit_zero_score_preserved(self): + """A literal ``0.0`` must survive round-trip.""" + rec = ContentProcessRecord( + id="r1", process_id="r1", entity_score=0.0, schema_score=0.0 + ) assert rec.entity_score == 0.0 assert rec.schema_score == 0.0 diff --git a/src/tests/ContentProcessorWorkflow/repositories/test_claim_process_model.py b/src/tests/ContentProcessorWorkflow/repositories/test_claim_process_model.py index 36de49c0..fee05fb1 100644 --- a/src/tests/ContentProcessorWorkflow/repositories/test_claim_process_model.py +++ b/src/tests/ContentProcessorWorkflow/repositories/test_claim_process_model.py @@ -42,6 +42,7 @@ def test_defaults(self): assert cp.process_id == "p1" assert cp.file_name == "doc.pdf" assert cp.mime_type is None + # Defaults stay at ``0.0`` so failed/pre-save records render as 0%. assert cp.entity_score == 0.0 assert cp.schema_score == 0.0 assert cp.status is None diff --git a/src/tests/ContentProcessorWorkflow/services/test_content_process_models.py b/src/tests/ContentProcessorWorkflow/services/test_content_process_models.py index 059b2938..5133852a 100644 --- a/src/tests/ContentProcessorWorkflow/services/test_content_process_models.py +++ b/src/tests/ContentProcessorWorkflow/services/test_content_process_models.py @@ -218,6 +218,8 @@ def test_content_process_record_defaults(self): assert record.process_id == "" assert record.processed_file_name is None assert record.processed_file_mime_type is None + # Defaults stay at ``0.0`` so failed/pre-save records render as 0% + # in the UI. assert record.entity_score == 0.0 assert record.schema_score == 0.0