Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 31 additions & 4 deletions application/single_app/functions_documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,27 @@ def get_document_blob_storage_info(document_item, user_id=None, group_id=None, p
)


def _has_persisted_blob_reference(document_item):
if not document_item:
return False

if document_item.get("blob_path"):
return True

return (
document_item.get("blob_path_mode") == ARCHIVED_REVISION_BLOB_PATH_MODE
and bool(document_item.get("archived_blob_path"))
)


def _normalize_document_enhanced_citations(document_item):
if not document_item:
return document_item

document_item["enhanced_citations"] = _has_persisted_blob_reference(document_item)
return document_item


def get_document_blob_delete_targets(document_item, user_id=None, group_id=None, public_workspace_id=None):
targets = []
seen = set()
Expand Down Expand Up @@ -317,7 +338,9 @@ def select_current_documents(documents):

current_documents = []
for family_documents in families.values():
current_documents.append(_choose_current_document(family_documents))
current_documents.append(
_normalize_document_enhanced_citations(_choose_current_document(family_documents))
)

return current_documents

Expand Down Expand Up @@ -666,6 +689,7 @@ def create_document(file_name, user_id, document_id, num_file_chunks, status, gr
"status": status,
"percentage_complete": 0,
"document_classification": carried_forward.get("document_classification", "None"),
"enhanced_citations": False,
"type": "document_metadata",
"public_workspace_id": public_workspace_id,
"user_id": user_id,
Expand Down Expand Up @@ -697,6 +721,7 @@ def create_document(file_name, user_id, document_id, num_file_chunks, status, gr
"status": status,
"percentage_complete": 0,
"document_classification": carried_forward.get("document_classification", "None"),
"enhanced_citations": False,
"type": "document_metadata",
"group_id": group_id,
"blob_container": _get_blob_container_name(group_id=group_id),
Expand Down Expand Up @@ -728,6 +753,7 @@ def create_document(file_name, user_id, document_id, num_file_chunks, status, gr
"status": status,
"percentage_complete": 0,
"document_classification": carried_forward.get("document_classification", "None"),
"enhanced_citations": False,
"type": "document_metadata",
"user_id": user_id,
"blob_container": _get_blob_container_name(),
Expand Down Expand Up @@ -823,7 +849,7 @@ def get_document_metadata(document_id, user_id, group_id=None, public_workspace_
user_id=public_workspace_id if is_public_workspace else (group_id if is_group else user_id),
content=f"Document metadata retrieved: {document_items}."
)
return document_items[0] if document_items else None
return _normalize_document_enhanced_citations(document_items[0]) if document_items else None

except Exception as e:
print(f"Error retrieving document metadata: {repr(e)}\nTraceback:\n{traceback.format_exc()}")
Expand Down Expand Up @@ -2775,7 +2801,7 @@ def get_document(user_id, document_id, group_id=None, public_workspace_id=None):
if not document_results:
return jsonify({'error': 'Document not found or access denied'}), 404

return jsonify(document_results[0]), 200
return jsonify(_normalize_document_enhanced_citations(document_results[0])), 200

except Exception as e:
return jsonify({'error': f'Error retrieving document: {str(e)}'}), 500
Expand Down Expand Up @@ -2863,7 +2889,7 @@ def get_document_version(user_id, document_id, version, group_id=None, public_wo
if not document_results:
return jsonify({'error': 'Document version not found'}), 404

return jsonify(document_results[0]), 200
return jsonify(_normalize_document_enhanced_citations(document_results[0])), 200

except Exception as e:
return jsonify({'error': f'Error retrieving document version: {str(e)}'}), 500
Expand Down Expand Up @@ -4158,6 +4184,7 @@ def upload_to_blob(temp_file_path, user_id, document_id, blob_filename, update_c
current_document["blob_container"] = storage_account_container_name
current_document["blob_path"] = blob_path
current_document["blob_path_mode"] = CURRENT_ALIAS_BLOB_PATH_MODE
current_document["enhanced_citations"] = True
if current_document.get("archived_blob_path") is None:
current_document["archived_blob_path"] = None
cosmos_container.upsert_item(current_document)
Expand Down
6 changes: 2 additions & 4 deletions application/single_app/route_enhanced_citations.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

from functions_authentication import login_required, user_required, get_current_user_id
from functions_settings import get_settings, enabled_required
from functions_documents import get_document_metadata, get_document_blob_storage_info
from functions_documents import get_document_metadata
from functions_group import get_user_groups
from functions_public_workspaces import get_user_visible_public_workspace_ids_from_settings
from swagger_wrapper import swagger_route, get_auth_security
Expand Down Expand Up @@ -90,15 +90,13 @@ def get_enhanced_citation_document_metadata():
return doc_response, status_code

raw_doc = doc_response.get_json()
_, blob_path = get_document_blob_storage_info(raw_doc)

return jsonify({
"id": raw_doc.get("id"),
"document_id": raw_doc.get("id"),
"file_name": raw_doc.get("file_name"),
"version": raw_doc.get("version"),
"is_current_version": raw_doc.get("is_current_version"),
"enhanced_citations": bool(blob_path),
"enhanced_citations": bool(raw_doc.get("enhanced_citations", False)),
}), 200

except Exception as e:
Expand Down
48 changes: 48 additions & 0 deletions docs/explanation/fixes/MEDIA_ENHANCED_CITATION_BADGE_FIX.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# Media Enhanced Citation Badge Fix

Fixed/Implemented in version: **0.241.007**

## Issue Description

Audio and video files uploaded while Enhanced Citations was enabled were stored in Azure Blob Storage and could open through the enhanced citation experience on the chat page, but the workspace document details panel still showed the citation mode as Standard.

## Root Cause Analysis

The workspace document list renders the citation badge from the persisted `enhanced_citations` field on the document metadata record.

Audio and video processing uploaded originals to blob storage, but the metadata record was not updated to set `enhanced_citations` to `true`.

At the same time, the chat-side enhanced citation metadata endpoint could still infer enhanced support from blob-backed document state, so chat behavior and workspace metadata drifted apart.

## Technical Details

Files modified: `application/single_app/functions_documents.py`, `application/single_app/route_enhanced_citations.py`, `application/single_app/config.py`, `functional_tests/test_media_enhanced_citations_metadata_flag.py`

Code changes summary:

- Added normalization helpers so blob-backed documents read back with `enhanced_citations=True` even when older records are missing that field.
- Updated `upload_to_blob()` to stamp `enhanced_citations=True` on the stored document metadata for new blob-backed uploads.
- Initialized new document metadata records with `enhanced_citations=False` so the field is always explicit.
- Updated the enhanced citation document metadata route to use the normalized per-document flag instead of inferring state from a derived blob path.

Impact analysis:

- Existing audio and video documents that already have persisted blob references now render the Enhanced badge in workspace details without requiring re-upload.
- New blob-backed uploads keep workspace metadata aligned with the chat enhanced citation experience.

## Validation

Test coverage: `functional_tests/test_media_enhanced_citations_metadata_flag.py`

Test results:

- Validates normalization of current and archived blob-backed documents to `enhanced_citations=True`.
- Validates that blob uploads stamp the document metadata with the enhanced citation flag.
- Validates that document list/detail reads and the enhanced citation metadata route use the normalized value.

Before/after comparison:

- Before: Blob-backed media could behave as enhanced in chat while still displaying Standard in workspace details.
- After: Workspace details and chat enhanced citation behavior use the same normalized document metadata state.

Related config.py version update: `VERSION = "0.241.007"`
6 changes: 6 additions & 0 deletions docs/explanation/release_notes.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,12 @@ For feature-focused and fix-focused drill-downs by version, see [Features by Ver
* Added functional and UI regression coverage for the guarded prompt-role path so future changes do not reintroduce the same startup failure.
* (Ref: `group_workspaces.html`, `test_group_workspace_prompt_role_ui_guard.py`, `test_group_workspace_prompt_role_containers_ui.py`)

* **Audio and Video Enhanced Citation Badge Consistency**
* Fixed blob-backed audio and video documents showing Standard citations in workspace details even when Enhanced Citations was enabled and the same files already opened through the enhanced citation experience on the chat page.
* Document metadata now persists and normalizes the `enhanced_citations` flag from blob-backed storage state so existing media uploads and new uploads both render the correct Enhanced badge across workspace and chat flows.
* Added regression coverage and fix documentation for the metadata normalization path.
* (Ref: `functions_documents.py`, `route_enhanced_citations.py`, `test_media_enhanced_citations_metadata_flag.py`, `MEDIA_ENHANCED_CITATION_BADGE_FIX.md`)

#### User Interface Enhancements

* **AI Voice Conversations Setup Guide**
Expand Down
153 changes: 153 additions & 0 deletions functional_tests/test_media_enhanced_citations_metadata_flag.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
#!/usr/bin/env python3
# test_media_enhanced_citations_metadata_flag.py
"""
Functional test for media enhanced citation metadata normalization.
Version: 0.241.007
Implemented in: 0.241.007

This test ensures blob-backed audio and video documents are marked as
enhanced citations in stored metadata so workspace badges match chat behavior.
"""

import ast
import os
import re
import sys


ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
SINGLE_APP_ROOT = os.path.join(ROOT_DIR, 'application', 'single_app')
FUNCTIONS_DOCUMENTS_FILE = os.path.join(SINGLE_APP_ROOT, 'functions_documents.py')
ROUTE_FILE = os.path.join(SINGLE_APP_ROOT, 'route_enhanced_citations.py')
CONFIG_FILE = os.path.join(SINGLE_APP_ROOT, 'config.py')


def read_file(path):
with open(path, 'r', encoding='utf-8') as file_handle:
return file_handle.read()


def load_normalization_helpers():
"""Compile the normalization helpers directly from source for focused validation."""
source = read_file(FUNCTIONS_DOCUMENTS_FILE)
module_ast = ast.parse(source, filename=FUNCTIONS_DOCUMENTS_FILE)

helper_names = {
'_has_persisted_blob_reference',
'_normalize_document_enhanced_citations',
}
helper_nodes = [
node for node in module_ast.body
if isinstance(node, ast.FunctionDef) and node.name in helper_names
]

missing_helpers = helper_names.difference({node.name for node in helper_nodes})
assert not missing_helpers, f'Missing normalization helpers: {sorted(missing_helpers)}'

isolated_module = ast.Module(body=helper_nodes, type_ignores=[])
namespace = {'ARCHIVED_REVISION_BLOB_PATH_MODE': 'archived_revision'}
exec(compile(isolated_module, FUNCTIONS_DOCUMENTS_FILE, 'exec'), namespace)
return namespace['_normalize_document_enhanced_citations']


def test_blob_backed_documents_normalize_to_enhanced():
"""Verify legacy and current blob-backed documents normalize to enhanced citations."""
print('🔍 Testing blob-backed document normalization...')

normalize_document = load_normalization_helpers()

current_blob_doc = {'id': 'audio-doc', 'blob_path': 'user/audio.mp3'}
normalized_current = normalize_document(dict(current_blob_doc))
assert normalized_current['enhanced_citations'] is True, 'Current blob path should normalize to enhanced citations'

archived_blob_doc = {
'id': 'video-doc',
'blob_path': None,
'blob_path_mode': 'archived_revision',
'archived_blob_path': 'user/family/video.mp4',
}
normalized_archived = normalize_document(dict(archived_blob_doc))
assert normalized_archived['enhanced_citations'] is True, 'Archived blob path should normalize to enhanced citations'

text_only_doc = {'id': 'text-doc', 'blob_path': None, 'archived_blob_path': None}
normalized_text = normalize_document(dict(text_only_doc))
assert normalized_text['enhanced_citations'] is False, 'Documents without persisted blob references should stay standard'

print('✅ Blob-backed document normalization passed')
return True


def test_blob_upload_persists_enhanced_flag():
"""Verify uploads stamp the document metadata with enhanced_citations=True."""
print('🔍 Testing blob upload metadata stamping...')

source = read_file(FUNCTIONS_DOCUMENTS_FILE)
required_snippets = [
'current_document["enhanced_citations"] = True',
'"enhanced_citations": False,',
]

missing = [snippet for snippet in required_snippets if snippet not in source]
assert not missing, f'Missing upload/create metadata snippets: {missing}'

print('✅ Blob upload metadata stamping passed')
return True


def test_document_reads_use_normalized_enhanced_flag():
"""Verify document list/detail reads expose normalized enhanced citation state."""
print('🔍 Testing document read normalization and enhanced citation metadata route...')

documents_source = read_file(FUNCTIONS_DOCUMENTS_FILE)
route_source = read_file(ROUTE_FILE)

required_document_snippets = [
'_normalize_document_enhanced_citations(_choose_current_document(family_documents))',
'return jsonify(_normalize_document_enhanced_citations(document_results[0])), 200',
'return _normalize_document_enhanced_citations(document_items[0]) if document_items else None',
]
missing_document_snippets = [
snippet for snippet in required_document_snippets if snippet not in documents_source
]
assert not missing_document_snippets, (
'Missing document normalization snippets: '
f'{missing_document_snippets}'
)

route_snippet = '"enhanced_citations": bool(raw_doc.get("enhanced_citations", False))'
assert route_snippet in route_source, 'Enhanced citation metadata route should use normalized per-document flag'
assert 'bool(blob_path)' not in route_source, 'Metadata route should no longer infer enhanced citations from a derived blob path'

print('✅ Document read normalization passed')
return True


def test_config_version_bumped_for_media_citation_fix():
"""Verify config.py version was bumped for this fix."""
print('🔍 Testing config version bump...')

config_source = read_file(CONFIG_FILE)
version_match = re.search(r'VERSION = "([0-9.]+)"', config_source)
assert version_match, 'Could not find VERSION in config.py'
assert version_match.group(1) == '0.241.007', 'Expected config.py version 0.241.007'

print('✅ Config version bump passed')
return True


if __name__ == '__main__':
tests = [
test_blob_backed_documents_normalize_to_enhanced,
test_blob_upload_persists_enhanced_flag,
test_document_reads_use_normalized_enhanced_flag,
test_config_version_bumped_for_media_citation_fix,
]

results = []
for test in tests:
print(f'\n🧪 Running {test.__name__}...')
results.append(test())

success = all(results)
print(f'\n📊 Results: {sum(results)}/{len(results)} tests passed')
sys.exit(0 if success else 1)
Loading