From 979548e19a812d14d7fecfa99f7e6c831f348d4f Mon Sep 17 00:00:00 2001 From: Paul Lizer Date: Thu, 9 Apr 2026 14:59:10 -0400 Subject: [PATCH] fix bug for media enhanced citations --- application/single_app/functions_documents.py | 35 +++- .../single_app/route_enhanced_citations.py | 6 +- .../MEDIA_ENHANCED_CITATION_BADGE_FIX.md | 48 ++++++ docs/explanation/release_notes.md | 6 + ..._media_enhanced_citations_metadata_flag.py | 153 ++++++++++++++++++ 5 files changed, 240 insertions(+), 8 deletions(-) create mode 100644 docs/explanation/fixes/MEDIA_ENHANCED_CITATION_BADGE_FIX.md create mode 100644 functional_tests/test_media_enhanced_citations_metadata_flag.py diff --git a/application/single_app/functions_documents.py b/application/single_app/functions_documents.py index 2f2f46e3..7c6e4a27 100644 --- a/application/single_app/functions_documents.py +++ b/application/single_app/functions_documents.py @@ -94,6 +94,27 @@ def get_document_blob_storage_info(document_item, user_id=None, group_id=None, p ) +def _has_persisted_blob_reference(document_item): + if not document_item: + return False + + if document_item.get("blob_path"): + return True + + return ( + document_item.get("blob_path_mode") == ARCHIVED_REVISION_BLOB_PATH_MODE + and bool(document_item.get("archived_blob_path")) + ) + + +def _normalize_document_enhanced_citations(document_item): + if not document_item: + return document_item + + document_item["enhanced_citations"] = _has_persisted_blob_reference(document_item) + return document_item + + def get_document_blob_delete_targets(document_item, user_id=None, group_id=None, public_workspace_id=None): targets = [] seen = set() @@ -317,7 +338,9 @@ def select_current_documents(documents): current_documents = [] for family_documents in families.values(): - current_documents.append(_choose_current_document(family_documents)) + current_documents.append( + _normalize_document_enhanced_citations(_choose_current_document(family_documents)) + ) return current_documents @@ -666,6 +689,7 @@ def create_document(file_name, user_id, document_id, num_file_chunks, status, gr "status": status, "percentage_complete": 0, "document_classification": carried_forward.get("document_classification", "None"), + "enhanced_citations": False, "type": "document_metadata", "public_workspace_id": public_workspace_id, "user_id": user_id, @@ -697,6 +721,7 @@ def create_document(file_name, user_id, document_id, num_file_chunks, status, gr "status": status, "percentage_complete": 0, "document_classification": carried_forward.get("document_classification", "None"), + "enhanced_citations": False, "type": "document_metadata", "group_id": group_id, "blob_container": _get_blob_container_name(group_id=group_id), @@ -728,6 +753,7 @@ def create_document(file_name, user_id, document_id, num_file_chunks, status, gr "status": status, "percentage_complete": 0, "document_classification": carried_forward.get("document_classification", "None"), + "enhanced_citations": False, "type": "document_metadata", "user_id": user_id, "blob_container": _get_blob_container_name(), @@ -823,7 +849,7 @@ def get_document_metadata(document_id, user_id, group_id=None, public_workspace_ user_id=public_workspace_id if is_public_workspace else (group_id if is_group else user_id), content=f"Document metadata retrieved: {document_items}." ) - return document_items[0] if document_items else None + return _normalize_document_enhanced_citations(document_items[0]) if document_items else None except Exception as e: print(f"Error retrieving document metadata: {repr(e)}\nTraceback:\n{traceback.format_exc()}") @@ -2775,7 +2801,7 @@ def get_document(user_id, document_id, group_id=None, public_workspace_id=None): if not document_results: return jsonify({'error': 'Document not found or access denied'}), 404 - return jsonify(document_results[0]), 200 + return jsonify(_normalize_document_enhanced_citations(document_results[0])), 200 except Exception as e: return jsonify({'error': f'Error retrieving document: {str(e)}'}), 500 @@ -2863,7 +2889,7 @@ def get_document_version(user_id, document_id, version, group_id=None, public_wo if not document_results: return jsonify({'error': 'Document version not found'}), 404 - return jsonify(document_results[0]), 200 + return jsonify(_normalize_document_enhanced_citations(document_results[0])), 200 except Exception as e: return jsonify({'error': f'Error retrieving document version: {str(e)}'}), 500 @@ -4158,6 +4184,7 @@ def upload_to_blob(temp_file_path, user_id, document_id, blob_filename, update_c current_document["blob_container"] = storage_account_container_name current_document["blob_path"] = blob_path current_document["blob_path_mode"] = CURRENT_ALIAS_BLOB_PATH_MODE + current_document["enhanced_citations"] = True if current_document.get("archived_blob_path") is None: current_document["archived_blob_path"] = None cosmos_container.upsert_item(current_document) diff --git a/application/single_app/route_enhanced_citations.py b/application/single_app/route_enhanced_citations.py index 29de8313..ca1b9e48 100644 --- a/application/single_app/route_enhanced_citations.py +++ b/application/single_app/route_enhanced_citations.py @@ -12,7 +12,7 @@ from functions_authentication import login_required, user_required, get_current_user_id from functions_settings import get_settings, enabled_required -from functions_documents import get_document_metadata, get_document_blob_storage_info +from functions_documents import get_document_metadata from functions_group import get_user_groups from functions_public_workspaces import get_user_visible_public_workspace_ids_from_settings from swagger_wrapper import swagger_route, get_auth_security @@ -90,15 +90,13 @@ def get_enhanced_citation_document_metadata(): return doc_response, status_code raw_doc = doc_response.get_json() - _, blob_path = get_document_blob_storage_info(raw_doc) - return jsonify({ "id": raw_doc.get("id"), "document_id": raw_doc.get("id"), "file_name": raw_doc.get("file_name"), "version": raw_doc.get("version"), "is_current_version": raw_doc.get("is_current_version"), - "enhanced_citations": bool(blob_path), + "enhanced_citations": bool(raw_doc.get("enhanced_citations", False)), }), 200 except Exception as e: diff --git a/docs/explanation/fixes/MEDIA_ENHANCED_CITATION_BADGE_FIX.md b/docs/explanation/fixes/MEDIA_ENHANCED_CITATION_BADGE_FIX.md new file mode 100644 index 00000000..01879805 --- /dev/null +++ b/docs/explanation/fixes/MEDIA_ENHANCED_CITATION_BADGE_FIX.md @@ -0,0 +1,48 @@ +# Media Enhanced Citation Badge Fix + +Fixed/Implemented in version: **0.241.007** + +## Issue Description + +Audio and video files uploaded while Enhanced Citations was enabled were stored in Azure Blob Storage and could open through the enhanced citation experience on the chat page, but the workspace document details panel still showed the citation mode as Standard. + +## Root Cause Analysis + +The workspace document list renders the citation badge from the persisted `enhanced_citations` field on the document metadata record. + +Audio and video processing uploaded originals to blob storage, but the metadata record was not updated to set `enhanced_citations` to `true`. + +At the same time, the chat-side enhanced citation metadata endpoint could still infer enhanced support from blob-backed document state, so chat behavior and workspace metadata drifted apart. + +## Technical Details + +Files modified: `application/single_app/functions_documents.py`, `application/single_app/route_enhanced_citations.py`, `application/single_app/config.py`, `functional_tests/test_media_enhanced_citations_metadata_flag.py` + +Code changes summary: + +- Added normalization helpers so blob-backed documents read back with `enhanced_citations=True` even when older records are missing that field. +- Updated `upload_to_blob()` to stamp `enhanced_citations=True` on the stored document metadata for new blob-backed uploads. +- Initialized new document metadata records with `enhanced_citations=False` so the field is always explicit. +- Updated the enhanced citation document metadata route to use the normalized per-document flag instead of inferring state from a derived blob path. + +Impact analysis: + +- Existing audio and video documents that already have persisted blob references now render the Enhanced badge in workspace details without requiring re-upload. +- New blob-backed uploads keep workspace metadata aligned with the chat enhanced citation experience. + +## Validation + +Test coverage: `functional_tests/test_media_enhanced_citations_metadata_flag.py` + +Test results: + +- Validates normalization of current and archived blob-backed documents to `enhanced_citations=True`. +- Validates that blob uploads stamp the document metadata with the enhanced citation flag. +- Validates that document list/detail reads and the enhanced citation metadata route use the normalized value. + +Before/after comparison: + +- Before: Blob-backed media could behave as enhanced in chat while still displaying Standard in workspace details. +- After: Workspace details and chat enhanced citation behavior use the same normalized document metadata state. + +Related config.py version update: `VERSION = "0.241.007"` \ No newline at end of file diff --git a/docs/explanation/release_notes.md b/docs/explanation/release_notes.md index f971e003..da34cbad 100644 --- a/docs/explanation/release_notes.md +++ b/docs/explanation/release_notes.md @@ -29,6 +29,12 @@ For feature-focused and fix-focused drill-downs by version, see [Features by Ver * Added functional and UI regression coverage for the guarded prompt-role path so future changes do not reintroduce the same startup failure. * (Ref: `group_workspaces.html`, `test_group_workspace_prompt_role_ui_guard.py`, `test_group_workspace_prompt_role_containers_ui.py`) +* **Audio and Video Enhanced Citation Badge Consistency** + * Fixed blob-backed audio and video documents showing Standard citations in workspace details even when Enhanced Citations was enabled and the same files already opened through the enhanced citation experience on the chat page. + * Document metadata now persists and normalizes the `enhanced_citations` flag from blob-backed storage state so existing media uploads and new uploads both render the correct Enhanced badge across workspace and chat flows. + * Added regression coverage and fix documentation for the metadata normalization path. + * (Ref: `functions_documents.py`, `route_enhanced_citations.py`, `test_media_enhanced_citations_metadata_flag.py`, `MEDIA_ENHANCED_CITATION_BADGE_FIX.md`) + #### User Interface Enhancements * **AI Voice Conversations Setup Guide** diff --git a/functional_tests/test_media_enhanced_citations_metadata_flag.py b/functional_tests/test_media_enhanced_citations_metadata_flag.py new file mode 100644 index 00000000..bf57a829 --- /dev/null +++ b/functional_tests/test_media_enhanced_citations_metadata_flag.py @@ -0,0 +1,153 @@ +#!/usr/bin/env python3 +# test_media_enhanced_citations_metadata_flag.py +""" +Functional test for media enhanced citation metadata normalization. +Version: 0.241.007 +Implemented in: 0.241.007 + +This test ensures blob-backed audio and video documents are marked as +enhanced citations in stored metadata so workspace badges match chat behavior. +""" + +import ast +import os +import re +import sys + + +ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +SINGLE_APP_ROOT = os.path.join(ROOT_DIR, 'application', 'single_app') +FUNCTIONS_DOCUMENTS_FILE = os.path.join(SINGLE_APP_ROOT, 'functions_documents.py') +ROUTE_FILE = os.path.join(SINGLE_APP_ROOT, 'route_enhanced_citations.py') +CONFIG_FILE = os.path.join(SINGLE_APP_ROOT, 'config.py') + + +def read_file(path): + with open(path, 'r', encoding='utf-8') as file_handle: + return file_handle.read() + + +def load_normalization_helpers(): + """Compile the normalization helpers directly from source for focused validation.""" + source = read_file(FUNCTIONS_DOCUMENTS_FILE) + module_ast = ast.parse(source, filename=FUNCTIONS_DOCUMENTS_FILE) + + helper_names = { + '_has_persisted_blob_reference', + '_normalize_document_enhanced_citations', + } + helper_nodes = [ + node for node in module_ast.body + if isinstance(node, ast.FunctionDef) and node.name in helper_names + ] + + missing_helpers = helper_names.difference({node.name for node in helper_nodes}) + assert not missing_helpers, f'Missing normalization helpers: {sorted(missing_helpers)}' + + isolated_module = ast.Module(body=helper_nodes, type_ignores=[]) + namespace = {'ARCHIVED_REVISION_BLOB_PATH_MODE': 'archived_revision'} + exec(compile(isolated_module, FUNCTIONS_DOCUMENTS_FILE, 'exec'), namespace) + return namespace['_normalize_document_enhanced_citations'] + + +def test_blob_backed_documents_normalize_to_enhanced(): + """Verify legacy and current blob-backed documents normalize to enhanced citations.""" + print('๐Ÿ” Testing blob-backed document normalization...') + + normalize_document = load_normalization_helpers() + + current_blob_doc = {'id': 'audio-doc', 'blob_path': 'user/audio.mp3'} + normalized_current = normalize_document(dict(current_blob_doc)) + assert normalized_current['enhanced_citations'] is True, 'Current blob path should normalize to enhanced citations' + + archived_blob_doc = { + 'id': 'video-doc', + 'blob_path': None, + 'blob_path_mode': 'archived_revision', + 'archived_blob_path': 'user/family/video.mp4', + } + normalized_archived = normalize_document(dict(archived_blob_doc)) + assert normalized_archived['enhanced_citations'] is True, 'Archived blob path should normalize to enhanced citations' + + text_only_doc = {'id': 'text-doc', 'blob_path': None, 'archived_blob_path': None} + normalized_text = normalize_document(dict(text_only_doc)) + assert normalized_text['enhanced_citations'] is False, 'Documents without persisted blob references should stay standard' + + print('โœ… Blob-backed document normalization passed') + return True + + +def test_blob_upload_persists_enhanced_flag(): + """Verify uploads stamp the document metadata with enhanced_citations=True.""" + print('๐Ÿ” Testing blob upload metadata stamping...') + + source = read_file(FUNCTIONS_DOCUMENTS_FILE) + required_snippets = [ + 'current_document["enhanced_citations"] = True', + '"enhanced_citations": False,', + ] + + missing = [snippet for snippet in required_snippets if snippet not in source] + assert not missing, f'Missing upload/create metadata snippets: {missing}' + + print('โœ… Blob upload metadata stamping passed') + return True + + +def test_document_reads_use_normalized_enhanced_flag(): + """Verify document list/detail reads expose normalized enhanced citation state.""" + print('๐Ÿ” Testing document read normalization and enhanced citation metadata route...') + + documents_source = read_file(FUNCTIONS_DOCUMENTS_FILE) + route_source = read_file(ROUTE_FILE) + + required_document_snippets = [ + '_normalize_document_enhanced_citations(_choose_current_document(family_documents))', + 'return jsonify(_normalize_document_enhanced_citations(document_results[0])), 200', + 'return _normalize_document_enhanced_citations(document_items[0]) if document_items else None', + ] + missing_document_snippets = [ + snippet for snippet in required_document_snippets if snippet not in documents_source + ] + assert not missing_document_snippets, ( + 'Missing document normalization snippets: ' + f'{missing_document_snippets}' + ) + + route_snippet = '"enhanced_citations": bool(raw_doc.get("enhanced_citations", False))' + assert route_snippet in route_source, 'Enhanced citation metadata route should use normalized per-document flag' + assert 'bool(blob_path)' not in route_source, 'Metadata route should no longer infer enhanced citations from a derived blob path' + + print('โœ… Document read normalization passed') + return True + + +def test_config_version_bumped_for_media_citation_fix(): + """Verify config.py version was bumped for this fix.""" + print('๐Ÿ” Testing config version bump...') + + config_source = read_file(CONFIG_FILE) + version_match = re.search(r'VERSION = "([0-9.]+)"', config_source) + assert version_match, 'Could not find VERSION in config.py' + assert version_match.group(1) == '0.241.007', 'Expected config.py version 0.241.007' + + print('โœ… Config version bump passed') + return True + + +if __name__ == '__main__': + tests = [ + test_blob_backed_documents_normalize_to_enhanced, + test_blob_upload_persists_enhanced_flag, + test_document_reads_use_normalized_enhanced_flag, + test_config_version_bumped_for_media_citation_fix, + ] + + results = [] + for test in tests: + print(f'\n๐Ÿงช Running {test.__name__}...') + results.append(test()) + + success = all(results) + print(f'\n๐Ÿ“Š Results: {sum(results)}/{len(results)} tests passed') + sys.exit(0 if success else 1) \ No newline at end of file