diff --git a/core/common/checksums.py b/core/common/checksums.py index 59a4596f..2c146a52 100644 --- a/core/common/checksums.py +++ b/core/common/checksums.py @@ -4,6 +4,22 @@ from pydash import get +# Return only aggregate counts for changed resource groups. +SUMMARY_VERBOSITY = 0 + +# Include aggregate counts for resources that stayed the same. +SAME_STATS_VERBOSITY = 1 + +# Include resource IDs for new, removed, retired, and changed resources. +DIFF_RESOURCE_IDS_VERBOSITY = 2 + +# Include resource IDs for resources that stayed the same. +SAME_RESOURCE_IDS_VERBOSITY = 3 + +# Include expanded changelog fields such as names, descriptions, and previous values. +CHANGELOG_ENRICHMENT_VERBOSITY = 4 + + class ChecksumModel(models.Model): class Meta: abstract = True @@ -224,17 +240,17 @@ def common(self): @property def is_verbose(self): # include same stats, count only - return self.verbosity >= 1 + return self.verbosity >= SAME_STATS_VERBOSITY @property def is_very_verbose(self): # include IDS of new/changed/removed/retired - return self.verbosity >= 2 + return self.verbosity >= DIFF_RESOURCE_IDS_VERBOSITY @property def is_very_very_verbose(self): # include IDS of same - return self.verbosity >= 3 + return self.verbosity >= SAME_RESOURCE_IDS_VERBOSITY @property def include_same_stats(self): @@ -262,6 +278,13 @@ def populate_diff_from_common(self): self.same_standard[key] = info def get_struct(self, values, is_same=False): + """ + Return either a simple count or a detailed structure for a diff group. + + Changed groups include resource IDs at verbosity>=2, while unchanged + groups only include resource IDs at verbosity>=3 to keep lower + verbosity responses compact. + """ total = len(values or []) include_ids = self.is_very_very_verbose if is_same else self.is_very_verbose if include_ids: @@ -330,14 +353,15 @@ def get_db_id_for(self, diff_key, identity): class ChecksumChangelog: - def __init__(self, concepts_diff, mappings_diff, identity='mnemonic'): # pylint: disable=too-many-arguments + def __init__(self, concepts_diff, mappings_diff, identity='mnemonic', verbosity=0): # pylint: disable=too-many-arguments self.concepts_diff = concepts_diff self.mappings_diff = mappings_diff self.identity = identity + self.verbosity = verbosity self.result = {} - def get_mapping_summary(self, mapping, mapping_id=None): - return { + def get_mapping_summary(self, mapping, mapping_id=None, v1_mapping=None): + summary = { 'id': mapping_id or get(mapping, self.identity), 'from_concept': mapping.from_concept_code or get(mapping.from_concept, 'mnemonic'), 'from_source': mapping.from_source_url, @@ -345,6 +369,80 @@ def get_mapping_summary(self, mapping, mapping_id=None): 'to_source': mapping.to_source_url, 'map_type': mapping.map_type, } + if self.verbosity >= CHANGELOG_ENRICHMENT_VERBOSITY: + summary['external_id'] = getattr(mapping, 'external_id', None) + if v1_mapping is not None: + summary['prev_to_concept'] = ( + v1_mapping.to_concept_code or get(v1_mapping.to_concept, 'mnemonic') + ) + summary['prev_to_source'] = v1_mapping.to_source_url + summary['prev_map_type'] = v1_mapping.map_type + return summary + + def _collect_v1_v2_ids(self, diff, diff_obj): + """For each mnemonic in each category, collect the v2 and v1 DB ids needed for enrichment.""" + ids = set() + for key in diff: + entry = diff[key] + if not isinstance(entry, dict): + continue + for mnemonic in entry[self.identity]: + db_id = diff_obj.get_db_id_for(key, mnemonic) + if db_id: + ids.add(db_id) + if key in ('changed_major', 'changed_minor'): + v1_id = get(diff_obj.resources1_map, f'{mnemonic}.id') + if v1_id and v1_id != db_id: + ids.add(v1_id) + return ids + + def _build_concepts_cache(self, diff_keys): + """Batch-fetch concepts with names/descriptions for enrich mode (avoids N+1 queries).""" + from core.concepts.models import Concept + ids = self._collect_v1_v2_ids( + {k: self.concepts_diff.result.get(k, False) for k in diff_keys}, + self.concepts_diff, + ) + return { + c.id: c + for c in Concept.objects.filter(id__in=ids).prefetch_related('names', 'descriptions') + } + + def _build_mappings_cache(self, diff_keys): + """Batch-fetch mappings for enrich mode (to resolve v1 state of changed mappings).""" + from core.mappings.models import Mapping + ids = self._collect_v1_v2_ids( + {k: self.mappings_diff.result.get(k, False) for k in diff_keys}, + self.mappings_diff, + ) + return {m.id: m for m in Mapping.objects.filter(id__in=ids)} + + @staticmethod + def _names_list(concept): + return [ + { + 'external_id': n.external_id, + 'name': n.name, + 'type': n.type, + 'locale': n.locale, + 'locale_preferred': n.locale_preferred, + } + for n in concept.names.all() + ] + + @staticmethod + def _descriptions_list(concept): + return [ + {'external_id': d.external_id, 'description': d.name, 'type': d.type, 'locale': d.locale} + for d in concept.descriptions.all() + ] + + def _v1_mapping_for(self, mnemonic, mapping_db_id, mappings_cache): + """Look up the v1 mapping instance for a changed mapping (for prev_* fields).""" + v1_id = get(self.mappings_diff.resources1_map, f'{mnemonic}.id') + if v1_id and v1_id != mapping_db_id: + return mappings_cache.get(v1_id) + return None def process(self): # pylint: disable=too-many-locals,too-many-branches,too-many-statements from core.mappings.models import Mapping @@ -354,6 +452,11 @@ def process(self): # pylint: disable=too-many-locals,too-many-branches,too-many traversed_mappings = set() traversed_concepts = set() diff_keys = ['new', 'removed', 'changed_retired', 'changed_major', 'changed_minor'] + include_changelog_enrichment = self.verbosity >= CHANGELOG_ENRICHMENT_VERBOSITY + + concepts_cache = self._build_concepts_cache(diff_keys) if include_changelog_enrichment else {} + mappings_cache = self._build_mappings_cache(diff_keys) if include_changelog_enrichment else {} + for key in diff_keys: # pylint: disable=too-many-nested-blocks diff = self.concepts_diff.result.get(key, False) if isinstance(diff, dict): @@ -363,7 +466,10 @@ def process(self): # pylint: disable=too-many-locals,too-many-branches,too-many continue traversed_concepts.add(concept_id) concept_db_id = self.concepts_diff.get_db_id_for(key, concept_id) - concept = Concept.objects.filter(id=concept_db_id).first() + if include_changelog_enrichment: + concept = concepts_cache.get(concept_db_id) + else: + concept = Concept.objects.filter(id=concept_db_id).first() concept_display_name = get(concept, 'display_name') if concept_display_name: concept_display_name = concept_display_name.replace('"', "'") @@ -371,6 +477,20 @@ def process(self): # pylint: disable=too-many-locals,too-many-branches,too-many 'id': concept_id, 'display_name': concept_display_name } + if include_changelog_enrichment and concept: + summary['concept_class'] = getattr(concept, 'concept_class', None) + summary['datatype'] = getattr(concept, 'datatype', None) + summary['names'] = self._names_list(concept) + summary['descriptions'] = self._descriptions_list(concept) + if key in ('changed_major', 'changed_minor'): + v1_id = get(self.concepts_diff.resources1_map, f'{concept_id}.id') + if v1_id and v1_id != concept_db_id: + v1_concept = concepts_cache.get(v1_id) + if v1_concept: + summary['prev_names'] = self._names_list(v1_concept) + summary['prev_descriptions'] = self._descriptions_list(v1_concept) + summary['prev_concept_class'] = getattr(v1_concept, 'concept_class', None) + summary['prev_datatype'] = getattr(v1_concept, 'datatype', None) mappings_diff_summary = {} for mapping_diff_key in diff_keys: mapping_ids = get(self.mappings_diff.result, f'{mapping_diff_key}.{self.identity}') @@ -382,7 +502,17 @@ def process(self): # pylint: disable=too-many-locals,too-many-branches,too-many for mapping in mappings: if mapping_diff_key not in mappings_diff_summary: mappings_diff_summary[mapping_diff_key] = [] - mappings_diff_summary[mapping_diff_key].append(self.get_mapping_summary(mapping)) + v1_mapping = None + if ( + include_changelog_enrichment and + mapping_diff_key in ('changed_major', 'changed_minor') + ): + v1_mapping = self._v1_mapping_for( + get(mapping, self.identity), mapping.id, mappings_cache + ) + mappings_diff_summary[mapping_diff_key].append( + self.get_mapping_summary(mapping, v1_mapping=v1_mapping) + ) traversed_mappings.add(get(mapping, self.identity)) if mappings_diff_summary: summary['mappings'] = mappings_diff_summary @@ -403,29 +533,27 @@ def process(self): # pylint: disable=too-many-locals,too-many-branches,too-many traversed_mappings.add(mapping_id) mapping_db_id = self.mappings_diff.get_db_id_for(key, mapping_id) mapping = Mapping.objects.filter(id=mapping_db_id).first() + v1_mapping = None + if include_changelog_enrichment and key in ('changed_major', 'changed_minor'): + v1_mapping = self._v1_mapping_for(mapping_id, mapping_db_id, mappings_cache) + mapping_summary = self.get_mapping_summary(mapping, mapping_id, v1_mapping=v1_mapping) from_concept_code = get(mapping, 'from_concept_code') or get(mapping.from_concept, 'mnemonic') - if from_concept_code: - concept_id = from_concept_code - if concept_id in same_concept_ids: - if 'changed_mappings_only' not in concepts_result: - concepts_result['changed_mappings_only'] = {} - if concept_id not in concepts_result['changed_mappings_only']: - concept_display_name = get(mapping.from_concept, 'display_name') - if concept_display_name: - concept_display_name = concept_display_name.replace('"', "'") - concepts_result['changed_mappings_only'][concept_id] = { - 'id': concept_id, - 'display_name': concept_display_name, - 'mappings': {} - } - if key not in concepts_result['changed_mappings_only'][concept_id]['mappings']: - concepts_result['changed_mappings_only'][concept_id]['mappings'][key] = [] - concepts_result['changed_mappings_only'][concept_id]['mappings'][key].append( - self.get_mapping_summary(mapping, mapping_id)) - else: - section_summary[mapping_id] = self.get_mapping_summary(mapping, mapping_id) + if from_concept_code and from_concept_code in same_concept_ids: + if 'changed_mappings_only' not in concepts_result: + concepts_result['changed_mappings_only'] = {} + if from_concept_code not in concepts_result['changed_mappings_only']: + concept_display_name = get(mapping.from_concept, 'display_name') + if concept_display_name: + concept_display_name = concept_display_name.replace('"', "'") + concepts_result['changed_mappings_only'][from_concept_code] = { + 'id': from_concept_code, + 'display_name': concept_display_name, + 'mappings': {} + } + mappings_dict = concepts_result['changed_mappings_only'][from_concept_code]['mappings'] + mappings_dict.setdefault(key, []).append(mapping_summary) else: - section_summary[mapping_id] = self.get_mapping_summary(mapping, mapping_id) + section_summary[mapping_id] = mapping_summary if section_summary: mappings_result[key] = section_summary self.result = { diff --git a/core/common/tasks.py b/core/common/tasks.py index 2f033c08..e01769b4 100644 --- a/core/common/tasks.py +++ b/core/common/tasks.py @@ -880,11 +880,14 @@ def generate_key(*args, **kwargs): return "|".join(key_parts) @app.task(base=QueueOnceCustomTask) -def source_version_compare(version1_uri, version2_uri, is_changelog, verbosity, ignore_cache=False): +def source_version_compare(version1_uri, version2_uri, is_changelog, verbosity, ignore_cache=False, format_type='json'): ignore_cache = ignore_cache or get(settings, 'TEST_MODE', False) + # Include format_type in the cache key only when it differs from the default + # to avoid invalidating existing cached JSON results. + cache_key_suffix = f'|format={format_type}' if format_type != 'json' else '' if not ignore_cache: cache_key = generate_key( - 'source_version_compare', version1_uri, version2_uri, is_changelog, verbosity) + 'source_version_compare', version1_uri, version2_uri, is_changelog, verbosity) + cache_key_suffix result = cache.get(cache_key) if result: return result @@ -892,8 +895,15 @@ def source_version_compare(version1_uri, version2_uri, is_changelog, verbosity, from core.sources.models import Source version1 = Source.objects.get(uri=version1_uri) version2 = Source.objects.get(uri=version2_uri) - fn = Source.changelog if is_changelog else Source.compare - result = fn(version1, version2, verbosity) + + if is_changelog: + result = Source.changelog(version1, version2, verbosity) + if format_type == 'markdown': + from core.sources.changelog_markdown import ChangelogMarkdownGenerator + result['markdown'] = ChangelogMarkdownGenerator(result).generate() + else: + result = Source.compare(version1, version2, verbosity) + if not ignore_cache: cache.set(cache_key, result, timeout=60*60*24*4) return result diff --git a/core/integration_tests/tests_sources.py b/core/integration_tests/tests_sources.py index 50b9f7aa..ada6a500 100644 --- a/core/integration_tests/tests_sources.py +++ b/core/integration_tests/tests_sources.py @@ -18,7 +18,7 @@ from core.concepts.documents import ConceptDocument from core.concepts.models import Concept from core.concepts.serializers import ConceptVersionExportSerializer -from core.concepts.tests.factories import ConceptFactory, ConceptNameFactory +from core.concepts.tests.factories import ConceptDescriptionFactory, ConceptFactory, ConceptNameFactory from core.mappings.documents import MappingDocument from core.mappings.models import Mapping from core.mappings.serializers import MappingDetailSerializer @@ -2017,6 +2017,177 @@ def test_post_success(self, bundle_clone_mock): **parameters ) +class SourceVersionsChangelogOutputViewTest(OCLAPITestCase): + def _build_changelog_output_fixture(self): + source = OrganizationSourceFactory() + source_v1 = OrganizationSourceFactory(mnemonic=source.mnemonic, organization=source.organization, version='v1') + source_v2 = OrganizationSourceFactory(mnemonic=source.mnemonic, organization=source.organization, version='v2') + concept_v1 = ConceptFactory( + parent=source, + mnemonic='concept-detailed', + concept_class='Diagnosis', + datatype='None', + names=[ + ConceptNameFactory.build(name='Detailed name v1', locale='en', locale_preferred=True), + ], + descriptions=[ + ConceptDescriptionFactory.build(name='Detailed description v1', locale='en'), + ], + ) + concept_v2 = ConceptFactory( + parent=source, + mnemonic=concept_v1.mnemonic, + version='v2', + concept_class='Drug', + datatype='Text', + names=[ + ConceptNameFactory.build(name='Detailed name v2', locale='en', locale_preferred=True), + ConceptNameFactory.build(name='Nome detalhado', locale='pt', locale_preferred=False), + ], + descriptions=[ + ConceptDescriptionFactory.build(name='Detailed description v2', locale='en'), + ], + ) + mapping_from_concept = ConceptFactory(parent=source, mnemonic='mapping-from-concept') + mapping_target_v1 = ConceptFactory(parent=source, mnemonic='mapping-target-v1') + mapping_target_v2 = ConceptFactory(parent=source, mnemonic='mapping-target-v2') + mapping_v1 = MappingFactory( + parent=source, + mnemonic='mapping-detailed', + from_concept=mapping_from_concept, + to_concept=mapping_target_v1, + external_id='mapping-v1', + ) + mapping_v2 = MappingFactory( + parent=source, + mnemonic=mapping_v1.mnemonic, + version='v2', + from_concept=mapping_from_concept, + to_concept=mapping_target_v2, + map_type='NARROWER-THAN', + external_id='mapping-v2', + ) + + source_v1.concepts.add(concept_v1) + source_v2.concepts.add(concept_v2) + source_v1.mappings.add(mapping_v1) + source_v2.mappings.add(mapping_v2) + + for concept in Concept.objects.filter(parent=source): + concept.set_checksums() + + for mapping in Mapping.objects.filter(parent=source): + mapping.set_checksums() + + return { + 'concept_v1': concept_v1, + 'concept_v2': concept_v2, + 'mapping_v1': mapping_v1, + 'mapping_v2': mapping_v2, + 'source': source, + 'source_v1': source_v1, + 'source_v2': source_v2, + } + + def _post_changelog(self, source_v1, source_v2, token, verbosity, output): # pylint: disable=too-many-arguments + return self.client.post( + f'/sources/$changelog/?inline=true&output={output}', + { + 'version1': source_v1.uri, + 'version2': source_v2.uri, + 'verbosity': verbosity, + }, + HTTP_AUTHORIZATION=f'Token {token}', + format='json' + ) + + def test_json_output_for_all_verbosity_levels(self): + data = self._build_changelog_output_fixture() + token = data['source'].created_by.get_token() + + for verbosity in range(1, 5): + with self.subTest(output='json', verbosity=verbosity): + response = self._post_changelog( + data['source_v1'], data['source_v2'], token, verbosity, output='json' + ) + + self.assertEqual(response.status_code, 200) + self.assertNotIn('markdown', response.data) + self.assertEqual(response.data['meta']['diff']['concepts']['changed_major'], 1) + self.assertEqual(response.data['meta']['diff']['mappings']['changed_major'], 1) + + changed_concept = response.data['concepts']['changed_major']['concept-detailed'] + changed_mapping = response.data['mappings']['changed_major']['mapping-detailed'] + + if verbosity >= 4: + self.assertEqual(changed_concept['concept_class'], data['concept_v2'].concept_class) + self.assertEqual(changed_concept['prev_concept_class'], data['concept_v1'].concept_class) + self.assertEqual(changed_concept['datatype'], data['concept_v2'].datatype) + self.assertEqual(changed_concept['prev_datatype'], data['concept_v1'].datatype) + self.assertIn('Detailed name v2', [name['name'] for name in changed_concept['names']]) + self.assertIn('Nome detalhado', [name['name'] for name in changed_concept['names']]) + self.assertIn('Detailed name v1', [name['name'] for name in changed_concept['prev_names']]) + self.assertIn( + 'Detailed description v2', + [description['description'] for description in changed_concept['descriptions']] + ) + self.assertIn( + 'Detailed description v1', + [description['description'] for description in changed_concept['prev_descriptions']] + ) + self.assertEqual(changed_mapping['external_id'], data['mapping_v2'].external_id) + self.assertEqual(changed_mapping['prev_to_concept'], data['mapping_v1'].to_concept.mnemonic) + self.assertEqual(changed_mapping['to_concept'], data['mapping_v2'].to_concept.mnemonic) + self.assertEqual(changed_mapping['prev_map_type'], data['mapping_v1'].map_type) + self.assertEqual(changed_mapping['map_type'], data['mapping_v2'].map_type) + else: + self.assertNotIn('concept_class', changed_concept) + self.assertNotIn('prev_concept_class', changed_concept) + self.assertNotIn('datatype', changed_concept) + self.assertNotIn('prev_datatype', changed_concept) + self.assertNotIn('names', changed_concept) + self.assertNotIn('prev_names', changed_concept) + self.assertNotIn('descriptions', changed_concept) + self.assertNotIn('prev_descriptions', changed_concept) + self.assertNotIn('external_id', changed_mapping) + self.assertNotIn('prev_to_concept', changed_mapping) + self.assertNotIn('prev_map_type', changed_mapping) + + def test_markdown_output_for_all_verbosity_levels(self): + data = self._build_changelog_output_fixture() + token = data['source'].created_by.get_token() + + for verbosity in range(1, 5): + with self.subTest(output='markdown', verbosity=verbosity): + response = self._post_changelog( + data['source_v1'], data['source_v2'], token, verbosity, output='markdown' + ) + + self.assertEqual(response.status_code, 200) + self.assertIn('markdown', response.data) + self.assertEqual(response.data['meta']['diff']['concepts']['changed_major'], 1) + self.assertEqual(response.data['meta']['diff']['mappings']['changed_major'], 1) + + markdown_output = response.data['markdown'] + self.assertIn('# v2 Changelog', markdown_output) + self.assertIn('## Concepts', markdown_output) + self.assertIn('## Mappings', markdown_output) + + if verbosity >= 4: + self.assertNotIn('without enrichment', markdown_output) + self.assertIn('## Names', markdown_output) + self.assertIn('## Translations', markdown_output) + self.assertIn('Detailed name v1', markdown_output) + self.assertIn('Detailed name v2', markdown_output) + self.assertIn('Nome detalhado', markdown_output) + self.assertIn('Previous To Concept', markdown_output) + self.assertIn('mapping-target-v1', markdown_output) + self.assertIn('mapping-target-v2', markdown_output) + else: + self.assertIn('without enrichment', markdown_output) + self.assertNotIn('## Names', markdown_output) + self.assertNotIn('## Translations', markdown_output) + class SourceVersionsComparisonViewTest(OCLAPITestCase): def test_post_200(self): # pylint: disable=too-many-locals,too-many-statements diff --git a/core/sources/changelog_markdown.py b/core/sources/changelog_markdown.py new file mode 100644 index 00000000..005fb98d --- /dev/null +++ b/core/sources/changelog_markdown.py @@ -0,0 +1,1087 @@ +""" +Markdown changelog generator for source version diffs. + +Transforms $changelog JSON into a human-readable markdown document structured +after LOINC/SNOMED release notes. Requests with ``verbosity>=4`` include +enriched before/after details for names, descriptions, and mappings. + +LLM-based "Release Note Highlights" and per-section narrative summaries are +intentionally NOT implemented here. +# TODO: Integrate litellm (anthropic/claude-haiku-4.5) once ANTHROPIC_API_KEY +# is available in this deployment. Replace _static_highlight() calls with +# LLM completions (max_tokens=1000 for the top-level highlight, +# max_tokens=500 per section highlight, temperature=0.3, English output). +""" + +from datetime import date +from urllib.parse import unquote + +from django.conf import settings + + +class ChangelogMarkdownGenerator: + """ + Generates a markdown changelog from enriched $changelog JSON data. + + Usage:: + + generator = ChangelogMarkdownGenerator(changelog_data) + markdown_string = generator.generate() + + The ``changelog_data`` dict is the value returned by ``Source.changelog()``. + With ``verbosity>=4`` it carries extra fields used for detail tables. It has + the shape:: + + { + "meta": { + "version1": {"uri": "...", "concepts": N, "mappings": M}, + "version2": {"uri": "...", "concepts": N, "mappings": M}, + }, + "concepts": { + "new": {"id": {"id": "...", "display_name": "...", "names": [...], ...}}, + "removed": {...}, + "changed_retired": {...}, + "changed_major": {...}, + "changed_minor": {...}, + "changed_mappings_only": {...}, + }, + "mappings": { + "new": {"id": {"id": "...", "from_concept": "...", ...}}, + ... + }, + } + """ + + def __init__(self, changelog_data, default_locale='en'): + self.data = changelog_data + self.meta = changelog_data.get('meta', {}) + self.concepts = changelog_data.get('concepts', {}) + self.mappings = changelog_data.get('mappings', {}) + self.default_locale = default_locale + self._v1_meta = self.meta.get('version1', {}) + self._v2_meta = self.meta.get('version2', {}) + self._source_prefix = self._extract_source_prefix(self._v2_meta.get('uri', '')) + self._mapping_collections_cache = None + self.is_enriched = self._detect_enrichment() + + def _detect_enrichment(self): + """ + Detect whether the input JSON carries verbosity>=4 enrichment data + (names[], descriptions[], prev_*). Drives section-by-section adaptive + rendering so a low-verbosity input still yields a useful document + (Summary + Added/Removed/Retired/Updated lists) rather than an empty one. + """ + for key in ('new', 'changed_major', 'changed_minor', 'removed', 'changed_retired'): + for info in (self.concepts.get(key) or {}).values(): + if info.get('names') is not None or info.get('prev_names') is not None: + return True + for key in ('changed_major', 'changed_minor', 'new'): + for m in (self.mappings.get(key) or {}).values(): + if self._mapping_is_enriched(m): + return True + for concepts in self.concepts.values(): + if not isinstance(concepts, dict): + continue + for info in concepts.values(): + for mappings in (info.get('mappings') or {}).values(): + for _, mapping in self._mapping_items(mappings): + if self._mapping_is_enriched(mapping): + return True + return False + + @staticmethod + def _mapping_is_enriched(mapping): + """Return whether a mapping summary includes verbosity>=4 details.""" + return ( + mapping.get('external_id') is not None + or mapping.get('prev_to_concept') is not None + or mapping.get('prev_to_source') is not None + or mapping.get('prev_map_type') is not None + ) + + # ------------------------------------------------------------------ + # Public API + # ------------------------------------------------------------------ + + def generate(self): + # Precompute row collections once — used by both TOC (to decide which + # subsections to list) and the section renderers (to emit tables). + empty_rows = ([], [], []) + self._name_rows = self._collect_name_rows(self.default_locale, 'names') if self.is_enriched else empty_rows + self._description_rows = self._collect_description_rows() if self.is_enriched else empty_rows + self._translation_rows = self._collect_translation_rows() if self.is_enriched else empty_rows + + sections = [ + self._header(), + self._notice(), + self._overview(), + self._summary_table(), + self._toc(), + self._concepts_section(), + self._names_section(), + self._descriptions_section(), + self._translations_section(), + self._mappings_section(), + ] + return '\n\n'.join(s for s in sections if s) + + def _visible_changed(self, items): + """Filter out checksum-only changes when enriched data lets us detect them.""" + return ( + {cid: info for cid, info in items.items() if self._changed_axes(info)} + if self.is_enriched else items + ) + + @staticmethod + def _anchor(anchor_id): + return f'' + + def _notice(self): + """Warn the reader when enrichment data is missing — only when it matters.""" + if self.is_enriched: + return '' + return ( + '> **Note:** this changelog was generated without enrichment data ' + '(`verbosity<4`). Name, description, translation, and before/after ' + 'mapping details are not included. Re-run with `?verbosity=4` for ' + 'full detail.' + ) + + # ------------------------------------------------------------------ + # Header / meta + # ------------------------------------------------------------------ + + def _header(self): + v1_uri = self._v1_meta.get('uri', '') + v2_uri = self._v2_meta.get('uri', '') + v1_version = self._version_label(v1_uri) + v2_version = self._version_label(v2_uri) + today = date.today().isoformat() + return ( + f'# {v2_version} Changelog\n\n' + f'*Compared to {v1_version} — Generated on {today}*' + ) + + def _overview(self): + """ + High-level version comparison table placed immediately after the header. + + Shows total concept and mapping counts for each version side-by-side with + add/remove/change deltas so readers can instantly gauge the release scope. + """ + v1_label = self._version_label(self._v1_meta.get('uri', '')) + v2_label = self._version_label(self._v2_meta.get('uri', '')) + + v1_concepts = self._v1_meta.get('concepts', 0) + v2_concepts = self._v2_meta.get('concepts', 0) + v1_mappings = self._v1_meta.get('mappings', 0) + v2_mappings = self._v2_meta.get('mappings', 0) + + concepts_added = len(self.concepts.get('new') or {}) + # "Removed" in the overview encompasses both hard-removes and retirements. + concepts_removed = ( + len(self.concepts.get('removed') or {}) + + len(self.concepts.get('changed_retired') or {}) + ) + concepts_changed = ( + len(self.concepts.get('changed_major') or {}) + + len(self.concepts.get('changed_minor') or {}) + + len(self.concepts.get('changed_mappings_only') or {}) + ) + + mappings_added, mappings_removed, mappings_changed = self._mapping_collections() + + lines = [ + '## Overview', + '', + f'**{v1_label} → {v2_label}**', + '', + f'| Category | {v1_label} | {v2_label} | Added | Removed | Changed |', + '|----------|----------:|----------:|------:|--------:|--------:|', + ( + f'| Concepts | {v1_concepts:,} | {v2_concepts:,} | {concepts_added:,} ' + f'| {concepts_removed:,} | {concepts_changed:,} |' + ), + ( + f'| Mappings | {v1_mappings:,} | {v2_mappings:,} | {len(mappings_added):,} ' + f'| {len(mappings_removed):,} | {len(mappings_changed):,} |' + ), + '', + '---', + ] + return '\n'.join(lines) + + def _summary_table(self): + concepts_new = len(self.concepts.get('new') or {}) + concepts_removed = len(self.concepts.get('removed') or {}) + concepts_retired = len(self.concepts.get('changed_retired') or {}) + concepts_major = len(self.concepts.get('changed_major') or {}) + concepts_minor = len(self.concepts.get('changed_minor') or {}) + concepts_mappings_only = len(self.concepts.get('changed_mappings_only') or {}) + + mappings_added, mappings_removed, _ = self._mapping_collections() + + v1_uri = self._v1_meta.get('uri', '') + v2_uri = self._v2_meta.get('uri', '') + base = getattr(settings, 'API_BASE_URL', '') + diff_url = f'{base}/sources/$changelog/?version1={v1_uri}&version2={v2_uri}' + + lines = [ + '## Summary', + '', + '| Category | Count |', + '|----------|------:|', + f'| New concepts | {concepts_new:,} |', + f'| Removed concepts | {concepts_removed:,} |', + f'| Retired concepts | {concepts_retired:,} |', + f'| Major changes | {concepts_major:,} |', + f'| Minor changes | {concepts_minor:,} |', + f'| Mapping-only changes | {concepts_mappings_only:,} |', + f'| Mappings added | {len(mappings_added):,} |', + f'| Mappings removed | {len(mappings_removed):,} |', + '', + f'[Download full JSON diff]({diff_url})', + '', + '---', + ] + return '\n'.join(lines) + + def _toc(self): + entries = [] + for label, anchor, subs in self._toc_sections(): + if not subs: + continue + entries.append(f'- [{label}](#{anchor})') + for sub_label, sub_anchor in subs: + entries.append(f' - [{sub_label}](#{sub_anchor})') + if not entries: + return '' + return '\n'.join(['## Contents', '', *entries, '', '---']) + + def _toc_sections(self): + """Return only post-Contents sections, each with visible subsections.""" + return [ + ('Concepts', 'concepts', self._concept_subsections()), + ('Names', 'names', self._name_subsections()), + ('Descriptions', 'descriptions', self._description_subsections()), + ('Translations', 'translations', self._translation_subsections()), + ('Mappings', 'mappings', self._mapping_subsections()), + ] + + def _concept_subsections(self): + subs = [] + if self.concepts.get('new'): + subs.append(('Added', 'concepts-added')) + if self.concepts.get('removed'): + subs.append(('Removed', 'concepts-removed')) + if self.concepts.get('changed_retired'): + subs.append(('Retired', 'concepts-retired')) + if self._visible_changed(self.concepts.get('changed_major') or {}): + subs.append(('Updated (Major)', 'concepts-updated-major')) + if self._visible_changed(self.concepts.get('changed_minor') or {}): + subs.append(('Updated (Minor)', 'concepts-updated-minor')) + return subs + + def _name_subsections(self): + added, updated, removed = self._name_rows + subs = [] + if added: + subs.append(('Added', 'names-added')) + if updated: + subs.append(('Updated', 'names-updated')) + if removed: + subs.append(('Removed', 'names-removed')) + return subs + + def _description_subsections(self): + added, updated, removed = self._description_rows + subs = [] + if added: + subs.append(('Added', 'descriptions-added')) + if updated: + subs.append(('Updated', 'descriptions-updated')) + if removed: + subs.append(('Removed', 'descriptions-removed')) + return subs + + def _translation_subsections(self): + added, updated, removed = self._translation_rows + subs = [] + if added: + subs.append(('Added', 'translations-added')) + if updated: + subs.append(('Updated', 'translations-updated')) + if removed: + subs.append(('Removed', 'translations-removed')) + return subs + + def _mapping_subsections(self): + added, removed, changed = self._mapping_collections() + subs = [] + if added: + subs.append(('Added', 'mappings-added')) + if removed: + subs.append(('Removed', 'mappings-removed')) + if changed: + subs.append(('Updated', 'mappings-updated')) + return subs + + # ------------------------------------------------------------------ + # Concepts section + # ------------------------------------------------------------------ + + def _concepts_section(self): + if not self._has_concepts(): + return '' + + added = self.concepts.get('new') or {} + removed = self.concepts.get('removed') or {} + retired = self.concepts.get('changed_retired') or {} + changed_major = self.concepts.get('changed_major') or {} + changed_minor = self.concepts.get('changed_minor') or {} + + visible_major = self._visible_changed(changed_major) + visible_minor = self._visible_changed(changed_minor) + + highlight = self._static_highlight( + 'Concepts', + added=len(added), + removed=len(removed), + retired=len(retired), + changed=len(visible_major) + len(visible_minor), + ) + + parts = ['## Concepts', '', f'*{highlight}*'] + + if added: + parts += ['', self._anchor('concepts-added'), '### Added', ''] + parts += self._concept_table(added) + if removed: + parts += ['', self._anchor('concepts-removed'), '### Removed', ''] + parts += self._concept_table(removed) + if retired: + parts += ['', self._anchor('concepts-retired'), '### Retired', ''] + parts += self._concept_table(retired) + if visible_major: + parts += ['', self._anchor('concepts-updated-major'), '### Updated (Major)', ''] + parts += self._concept_table( + visible_major, show_changes=self.is_enriched, axes_as_links=self.is_enriched + ) + if visible_minor: + parts += ['', self._anchor('concepts-updated-minor'), '### Updated (Minor)', ''] + parts += self._concept_table( + visible_minor, show_changes=self.is_enriched, axes_as_links=self.is_enriched + ) + + parts += ['', '---'] + return '\n'.join(parts) + + def _concept_table(self, concepts_dict, show_changes=False, axes_as_links=False): + if show_changes: + rows = [ + '| Concept ID | Display Name | Concept Class | Changed |', + '|-----------:|-------------|---------------|---------|', + ] + else: + rows = [ + '| Concept ID | Display Name | Concept Class |', + '|-----------:|-------------|---------------|', + ] + for concept_id, info in concepts_dict.items(): + display = info.get('display_name') or '' + concept_class = info.get('concept_class') or '' + link = self._concept_link(concept_id) + if show_changes: + axes = self._changed_axes( + info, + as_links=axes_as_links, + concept_id=concept_id if axes_as_links else None + ) + changed_str = ', '.join(axes) if axes else '—' + rows.append( + f'| {link} | {self._escape(display)} | {self._escape(concept_class)} | {changed_str} |' + ) + else: + rows.append(f'| {link} | {self._escape(display)} | {self._escape(concept_class)} |') + return rows + + @staticmethod + def _names_changed(prev_names, curr_names, locale, invert=False): + """ + Return True if there is any real name change between prev and curr for + the given locale (or all non-locale names when invert=True). + + Uses external_id as stable key when available; falls back to comparing + the full set of (type, locale, text) tuples otherwise. + """ + def _matches(n): + return n.get('locale') != locale if invert else n.get('locale') == locale + + prev = [n for n in prev_names if _matches(n)] + curr = [n for n in curr_names if _matches(n)] + + prev_eid = {n['external_id']: n.get('name') for n in prev if n.get('external_id')} + curr_eid = {n['external_id']: n.get('name') for n in curr if n.get('external_id')} + + # Any addition/removal/change in external_id space? + if set(prev_eid) != set(curr_eid): + return True + for eid in prev_eid: + if prev_eid[eid] != curr_eid.get(eid): + return True + + # Fallback for names without external_id + prev_fallback = frozenset( + (n.get('type'), n.get('locale'), n.get('name')) + for n in prev if not n.get('external_id') + ) + curr_fallback = frozenset( + (n.get('type'), n.get('locale'), n.get('name')) + for n in curr if not n.get('external_id') + ) + return prev_fallback != curr_fallback + + def _changed_axes(self, info, as_links=False, concept_id=None): + """ + Compute which axes changed for a concept in changed_major/changed_minor. + + When ``as_links=True`` and ``concept_id`` is provided, each axis label + becomes a markdown anchor link pointing directly to the first row for + that concept in the relevant section table + (e.g. ``[Names](#names-139061)``). + + Default-locale names and non-default-locale names (Translations) are + checked separately so the link targets the correct section. + Metadata has no dedicated section and is kept as plain text. + """ + axes = [] + + def _link(label, section): + if as_links and concept_id: + return f'[{label}](#{section}-{concept_id})' + if as_links: + return f'[{label}](#{section})' + return label + + # Default-locale names → Names section + if self._names_changed(info.get('prev_names') or [], info.get('names') or [], self.default_locale): + axes.append(_link('Names', 'names')) + + # Non-default-locale names → Translations section + if self._names_changed(info.get('prev_names') or [], info.get('names') or [], self.default_locale, invert=True): + axes.append(_link('Translations', 'translations')) + + # Descriptions + prev_descs = frozenset( + (d.get('type'), d.get('locale'), d.get('description')) + for d in info.get('prev_descriptions') or [] + ) + curr_descs = frozenset( + (d.get('type'), d.get('locale'), d.get('description')) + for d in info.get('descriptions') or [] + ) + if prev_descs != curr_descs: + axes.append(_link('Descriptions', 'descriptions')) + + # Metadata (class or datatype changed) — no dedicated section + if ( + info.get('prev_concept_class') is not None + and info.get('concept_class') != info.get('prev_concept_class') + ) or ( + info.get('prev_datatype') is not None + and info.get('datatype') != info.get('prev_datatype') + ): + axes.append('Metadata') + + # Mappings + if info.get('mappings'): + axes.append(_link('Mappings', 'mappings')) + + return axes + + # ------------------------------------------------------------------ + # Names section + # ------------------------------------------------------------------ + + def _names_section(self): + added_rows, updated_rows, removed_rows = self._name_rows + if not (added_rows or updated_rows or removed_rows): + return '' + + highlight = self._static_highlight( + 'Names', + added=len(added_rows), updated=len(updated_rows), removed=len(removed_rows), + ) + parts = ['## Names', '', f'*{highlight}*'] + if added_rows: + parts += ['', self._anchor('names-added'), '### Added', ''] + parts += self._names_added_table(added_rows) + if updated_rows: + parts += ['', self._anchor('names-updated'), '### Updated', ''] + parts += self._names_updated_table(updated_rows) + if removed_rows: + parts += ['', self._anchor('names-removed'), '### Removed', ''] + parts += self._names_added_table(removed_rows) + parts += ['', '---'] + return '\n'.join(parts) + + def _collect_name_rows(self, locale_filter, section='names'): + """Return (added, updated, removed) rows for names at the given locale.""" + return self._collect_name_like_rows( + lambda n: n.get('locale') == locale_filter, section + ) + + def _collect_translation_name_rows(self): + """Return (added, updated, removed) rows for names in non-default locales.""" + return self._collect_name_like_rows( + lambda n: n.get('locale') != self.default_locale, 'translations' + ) + + def _collect_name_like_rows(self, matches_locale, section): # pylint: disable=too-many-locals,too-many-branches + """ + Collect name add/update/remove rows grouped by change type. + + Comparison strategy for changed concepts: + 1. Primary: ``external_id`` is the stable key across versions. Same + external_id with different text → Updated; only-in-v1 → Removed; + only-in-v2 → Added. + 2. Fallback (for names lacking ``external_id``): group by + ``(type, locale)`` and compare text sets. A single replacement + within a slot (1 removed + 1 added) is treated as Updated. + + The first row per concept carries an HTML anchor so the Concepts "Changed" + column can deep-link to it. + """ + from collections import defaultdict + + added_rows, updated_rows, removed_rows = [], [], [] + anchored = set() + + def anchored_link(concept_id): + base = self._concept_link(concept_id) + if concept_id in anchored: + return base + anchored.add(concept_id) + return f'{base}' + + def row_tuple(cid, name, ntype, locale): + return (anchored_link(cid), name or '', ntype or '', locale or '') + + # Added/Removed from new/removed concepts (all their names at this locale) + for concept_id, info in (self.concepts.get('new') or {}).items(): + for n in info.get('names') or []: + if matches_locale(n): + added_rows.append(row_tuple(concept_id, n.get('name'), n.get('type'), n.get('locale'))) + for concept_id, info in (self.concepts.get('removed') or {}).items(): + for n in info.get('names') or []: + if matches_locale(n): + removed_rows.append(row_tuple(concept_id, n.get('name'), n.get('type'), n.get('locale'))) + + # Compare prev vs current names for changed concepts + for key in ('changed_major', 'changed_minor'): + for concept_id, info in (self.concepts.get(key) or {}).items(): + prev_names = [n for n in (info.get('prev_names') or []) if matches_locale(n)] + curr_names = [n for n in (info.get('names') or []) if matches_locale(n)] + + prev_eid = {n['external_id']: n for n in prev_names if n.get('external_id')} + curr_eid = {n['external_id']: n for n in curr_names if n.get('external_id')} + + for eid in set(prev_eid) | set(curr_eid): + p, c = prev_eid.get(eid), curr_eid.get(eid) + if p and c: + if p.get('name') != c.get('name'): + updated_rows.append(( + anchored_link(concept_id), + p.get('name', ''), c.get('name', ''), + c.get('type') or '', c.get('locale') or '', + )) + elif c: + added_rows.append(row_tuple(concept_id, c.get('name'), c.get('type'), c.get('locale'))) + else: + removed_rows.append(row_tuple(concept_id, p.get('name'), p.get('type'), p.get('locale'))) + + # Fallback for names without external_id + prev_no_eid = [n for n in prev_names if not n.get('external_id')] + curr_no_eid = [n for n in curr_names if not n.get('external_id')] + if not (prev_no_eid or curr_no_eid): + continue + + prev_by_key = defaultdict(set) + for n in prev_no_eid: + prev_by_key[(n.get('type'), n.get('locale'))].add(n.get('name', '')) + curr_by_key = defaultdict(set) + for n in curr_no_eid: + curr_by_key[(n.get('type'), n.get('locale'))].add(n.get('name', '')) + for (ntype, nloc) in set(prev_by_key) | set(curr_by_key): + prev_texts = prev_by_key.get((ntype, nloc), set()) + curr_texts = curr_by_key.get((ntype, nloc), set()) + if prev_texts == curr_texts: + continue + added_texts = curr_texts - prev_texts + removed_texts = prev_texts - curr_texts + if len(added_texts) == 1 and len(removed_texts) == 1: + updated_rows.append(( + anchored_link(concept_id), + next(iter(removed_texts)), next(iter(added_texts)), + ntype or '', nloc or '', + )) + else: + for text in added_texts: + added_rows.append(row_tuple(concept_id, text, ntype, nloc)) + for text in removed_texts: + removed_rows.append(row_tuple(concept_id, text, ntype, nloc)) + + return added_rows, updated_rows, removed_rows + + @staticmethod + def _names_added_table(rows): + lines = [ + '| Concept ID | Name | Name Type | Locale |', + '|-----------:|------|-----------|--------|', + ] + for link, name, name_type, locale in rows: + lines.append( + f'| {link} | {ChangelogMarkdownGenerator._escape(name)} ' + f'| {ChangelogMarkdownGenerator._escape(name_type)} | {locale} |' + ) + return lines + + @staticmethod + def _names_updated_table(rows): + lines = [ + '| Concept ID | Previous Name | Updated Name | Name Type | Locale |', + '|-----------:|--------------|-------------|-----------|--------|', + ] + for link, prev, curr, name_type, locale in rows: + lines.append( + f'| {link} | {ChangelogMarkdownGenerator._escape(prev)} ' + f'| {ChangelogMarkdownGenerator._escape(curr)} ' + f'| {ChangelogMarkdownGenerator._escape(name_type)} | {locale} |' + ) + return lines + + # ------------------------------------------------------------------ + # Descriptions section + # ------------------------------------------------------------------ + + def _descriptions_section(self): + added_rows, updated_rows, removed_rows = self._description_rows + if not (added_rows or updated_rows or removed_rows): + return '' + + highlight = self._static_highlight( + 'Descriptions', + added=len(added_rows), updated=len(updated_rows), removed=len(removed_rows), + ) + parts = ['## Descriptions', '', f'*{highlight}*'] + if added_rows: + parts += ['', self._anchor('descriptions-added'), '### Added', ''] + parts += self._descriptions_added_table(added_rows) + if updated_rows: + parts += ['', self._anchor('descriptions-updated'), '### Updated', ''] + parts += self._descriptions_updated_table(updated_rows) + if removed_rows: + parts += ['', self._anchor('descriptions-removed'), '### Removed', ''] + parts += self._descriptions_added_table(removed_rows) + parts += ['', '---'] + return '\n'.join(parts) + + def _collect_description_rows(self): # pylint: disable=too-many-locals + added_rows = [] + updated_rows = [] + removed_rows = [] + anchored = set() + + def anchored_link(concept_id): + base = self._concept_link(concept_id) + if concept_id not in anchored: + anchored.add(concept_id) + return f'{base}' + return base + + for concept_id, info in (self.concepts.get('new') or {}).items(): + first = True + for desc in info.get('descriptions') or []: + link = anchored_link(concept_id) if first else self._concept_link(concept_id) + first = False + added_rows.append((link, desc.get('description', ''), desc.get('type', ''), desc.get('locale', ''))) + + for concept_id, info in (self.concepts.get('removed') or {}).items(): + first = True + for desc in info.get('descriptions') or []: + link = anchored_link(concept_id) if first else self._concept_link(concept_id) + first = False + removed_rows.append((link, desc.get('description', ''), desc.get('type', ''), desc.get('locale', ''))) + + for key in ('changed_major', 'changed_minor'): + for concept_id, info in (self.concepts.get(key) or {}).items(): + prev_descs = info.get('prev_descriptions') or [] + curr_descs = info.get('descriptions') or [] + prev_map = {(d.get('type'), d.get('locale')): d.get('description', '') for d in prev_descs} + curr_map = {(d.get('type'), d.get('locale')): d.get('description', '') for d in curr_descs} + for key_tuple, curr_text in curr_map.items(): + prev_text = prev_map.get(key_tuple) + dtype = key_tuple[0] or '' + dloc = key_tuple[1] or '' + link = anchored_link(concept_id) + if prev_text is None: + added_rows.append((link, curr_text, dtype, dloc)) + elif prev_text != curr_text: + updated_rows.append((link, prev_text, curr_text, dtype, dloc)) + for key_tuple, prev_text in prev_map.items(): + if key_tuple not in curr_map: + link = anchored_link(concept_id) + removed_rows.append((link, prev_text, key_tuple[0] or '', key_tuple[1] or '')) + + return added_rows, updated_rows, removed_rows + + @staticmethod + def _descriptions_added_table(rows): + lines = [ + '| Concept ID | Description | Description Type | Locale |', + '|-----------:|------------|-----------------|--------|', + ] + for link, desc, dtype, locale in rows: + lines.append( + f'| {link} | {ChangelogMarkdownGenerator._escape(desc)} ' + f'| {ChangelogMarkdownGenerator._escape(dtype)} | {locale} |' + ) + return lines + + @staticmethod + def _descriptions_updated_table(rows): + lines = [ + '| Concept ID | Previous Description | Updated Description | Locale |', + '|-----------:|---------------------|-------------------|--------|', + ] + for link, prev, curr, dtype, locale in rows: # pylint: disable=unused-variable + lines.append( + f'| {link} | {ChangelogMarkdownGenerator._escape(prev)} ' + f'| {ChangelogMarkdownGenerator._escape(curr)} | {locale} |' + ) + return lines + + # ------------------------------------------------------------------ + # Translations section + # ------------------------------------------------------------------ + + def _translations_section(self): + added_rows, updated_rows, removed_rows = self._translation_rows + if not (added_rows or updated_rows or removed_rows): + return '' + + # Group by locale for the highlight. + # added/removed rows have 4 elements (locale at index 3); + # updated rows have 5 elements (locale at index 4 == last). + # Using row[-1] (last element) works for both, and filtering None/empty. + all_locales = sorted({row[-1] for row in added_rows + updated_rows + removed_rows if row[-1]}) + locales_str = ', '.join(all_locales) if all_locales else '' + highlight = self._static_highlight( + 'Translations', + added=len(added_rows), updated=len(updated_rows), removed=len(removed_rows), + extra=f'Locales: {locales_str}' if locales_str else None, + ) + parts = ['## Translations', '', f'*{highlight}*'] + if added_rows: + parts += ['', self._anchor('translations-added'), '### Added', ''] + parts += self._translations_table(added_rows) + if updated_rows: + parts += ['', self._anchor('translations-updated'), '### Updated', ''] + parts += self._translations_updated_table(updated_rows) + if removed_rows: + parts += ['', self._anchor('translations-removed'), '### Removed', ''] + parts += self._translations_table(removed_rows) + parts += ['', '---'] + return '\n'.join(parts) + + def _collect_translation_rows(self): + """Alias for backward-compat / clarity — delegates to the unified helper.""" + return self._collect_translation_name_rows() + + @staticmethod + def _translations_table(rows): + lines = [ + '| Concept ID | Name | Locale | Name Type |', + '|-----------:|------|--------|-----------|', + ] + for link, name, name_type, locale in rows: + lines.append( + f'| {link} | {ChangelogMarkdownGenerator._escape(name)} | {locale} ' + f'| {ChangelogMarkdownGenerator._escape(name_type)} |' + ) + return lines + + @staticmethod + def _translations_updated_table(rows): + lines = [ + '| Concept ID | Previous Name | Updated Name | Locale | Name Type |', + '|-----------:|--------------|-------------|--------|-----------|', + ] + for link, prev, curr, name_type, locale in rows: + lines.append( + f'| {link} | {ChangelogMarkdownGenerator._escape(prev)} ' + f'| {ChangelogMarkdownGenerator._escape(curr)} ' + f'| {locale} | {ChangelogMarkdownGenerator._escape(name_type)} |' + ) + return lines + + # ------------------------------------------------------------------ + # Mappings section + # ------------------------------------------------------------------ + + @staticmethod + def _mapping_bucket_for(change_key): + if change_key == 'new': + return 'added' + if change_key == 'removed': + return 'removed' + return 'changed' + + @staticmethod + def _mapping_items(mapping_list_or_dict): + """Yield ``(id, mapping)`` pairs without inventing ids for list items.""" + if isinstance(mapping_list_or_dict, dict): + return mapping_list_or_dict.items() + return ((m.get('id'), m) for m in (mapping_list_or_dict or [])) + + def _normalize_mapping(self, mapping, mapping_id=None, from_concept=None): + normalized = dict(mapping or {}) + if mapping_id and not normalized.get('id'): + normalized['id'] = mapping_id + if from_concept and not normalized.get('from_concept'): + normalized['from_concept'] = from_concept + return normalized + + def _add_mapping_to_collection(self, collection, mapping, mapping_id=None, from_concept=None): + normalized = self._normalize_mapping(mapping, mapping_id=mapping_id, from_concept=from_concept) + key = normalized.get('id') or ( + f'{from_concept or ""}:{normalized.get("to_source") or ""}:' + f'{normalized.get("to_concept") or ""}:{normalized.get("map_type") or ""}' + ) + if key in collection: + # Prefer the richer normalized record when the same mapping appears both + # top-level and embedded under a concept. + collection[key] = {**collection[key], **normalized} + else: + collection[key] = normalized + + def _mapping_collections(self): + """ + Return added/removed/changed mappings across both top-level mapping diffs + and mappings embedded inside concept sections. + + Enriched changelogs intentionally attach mapping diffs to their owning + concepts so concept rows can deep-link to the relevant mapping rows. The + overview, summary, TOC, and Mappings section must therefore count/render + embedded mappings as first-class mapping changes too. + """ + if self._mapping_collections_cache is not None: + return self._mapping_collections_cache + + added, removed, changed = {}, {}, {} + collections = {'added': added, 'removed': removed, 'changed': changed} + + for change_key, bucket in ( + ('new', added), + ('removed', removed), + ('changed_major', changed), + ('changed_minor', changed), + ('changed_retired', changed), + ): + for mapping_id, mapping in (self.mappings.get(change_key) or {}).items(): + self._add_mapping_to_collection(bucket, mapping, mapping_id=mapping_id) + + for concepts in self.concepts.values(): + if not isinstance(concepts, dict): + continue + for concept_id, info in concepts.items(): + for change_key, mapping_list in (info.get('mappings') or {}).items(): + bucket = collections[self._mapping_bucket_for(change_key)] + for mapping_id, mapping in self._mapping_items(mapping_list): + self._add_mapping_to_collection( + bucket, mapping, mapping_id=mapping_id, from_concept=concept_id + ) + + self._mapping_collections_cache = added, removed, changed + return self._mapping_collections_cache + + def _mappings_section(self): + if not self._has_mappings(): + return '' + + added, removed, changed = self._mapping_collections() + + highlight = self._static_highlight( + 'Mappings', + added=len(added), + removed=len(removed), + changed=len(changed), + ) + + parts = ['## Mappings', '', f'*{highlight}*'] + if added: + parts += ['', self._anchor('mappings-added'), '### Added', ''] + parts += self._mappings_table(added) + if removed: + parts += ['', self._anchor('mappings-removed'), '### Removed', ''] + parts += self._mappings_table(removed) + if changed: + parts += ['', self._anchor('mappings-updated'), '### Updated', ''] + parts += self._mappings_updated_table(changed) + + return '\n'.join(parts) + + def _from_link_builder(self): + """Build a function that emits anchored from_concept links once per concept.""" + anchored = set() + + def builder(from_concept): + if not from_concept: + return '' + base = self._concept_link(from_concept) + if from_concept in anchored: + return base + anchored.add(from_concept) + return f'{base}' + return builder + + def _mappings_table(self, mappings_dict): + rows = [ + '| From Concept | To Concept | To Source | Map Type |', + '|-------------|-----------|----------|---------|', + ] + make_link = self._from_link_builder() + for m in mappings_dict.values(): + rows.append( + f'| {make_link(m.get("from_concept"))} ' + f'| {self._display_code(m.get("to_concept"))} ' + f'| {self._escape(m.get("to_source") or "")} ' + f'| {self._escape(m.get("map_type") or "")} |' + ) + return rows + + def _mappings_updated_table(self, mappings_dict): + """Before/after table for changed mappings, highlighting fields that changed.""" + rows = [ + '| From Concept | Previous To Concept | Updated To Concept | ' + 'Previous Map Type | Updated Map Type | To Source |', + '|-------------|--------------------|--------------------|' + '------------------|------------------|----------|', + ] + make_link = self._from_link_builder() + for m in mappings_dict.values(): + to_concept = m.get('to_concept') or '' + map_type = m.get('map_type') or '' + prev_to = m.get('prev_to_concept') + prev_mt = m.get('prev_map_type') + # Fall back to current value when prev is missing (e.g. verbosity<4 consumer). + prev_to_display = prev_to if prev_to is not None else to_concept + prev_mt_display = prev_mt if prev_mt is not None else map_type + rows.append( + f'| {make_link(m.get("from_concept"))} ' + f'| {self._display_code(prev_to_display)} ' + f'| {self._display_code(to_concept)} ' + f'| {self._escape(prev_mt_display)} ' + f'| {self._escape(map_type)} ' + f'| {self._escape(m.get("to_source") or "")} |' + ) + return rows + + # ------------------------------------------------------------------ + # Presence checks + # ------------------------------------------------------------------ + + def _has_concepts(self): + for key in ('new', 'removed', 'changed_retired', 'changed_major', 'changed_minor'): + if self.concepts.get(key): + return True + return False + + def _has_mappings(self): + added, removed, changed = self._mapping_collections() + return bool(added or removed or changed) + + # ------------------------------------------------------------------ + # Deterministic highlight (placeholder for future LLM integration) + # ------------------------------------------------------------------ + + @staticmethod + def _static_highlight( # pylint: disable=too-many-arguments + section, added=0, removed=0, changed=0, updated=0, retired=0, extra=None + ): + """ + Returns a short descriptive string for a changelog section. + + # TODO: Replace with LLM-generated summary (anthropic/claude-haiku-4.5, + # max_tokens=500, temperature=0.3) once ANTHROPIC_API_KEY is available. + """ + parts = [] + if added: + parts.append(f'{added:,} addition{"s" if added != 1 else ""}') + if updated or changed: + count = updated or changed + parts.append(f'{count:,} update{"s" if count != 1 else ""}') + if removed: + parts.append(f'{removed:,} removal{"s" if removed != 1 else ""}') + if retired: + parts.append(f'{retired:,} retirement{"s" if retired != 1 else ""}') + summary = ', '.join(parts) if parts else 'No changes' + if extra: + summary += f'. {extra}.' + return f'{section}: {summary}.' + + # ------------------------------------------------------------------ + # Helpers + # ------------------------------------------------------------------ + + @staticmethod + def _extract_source_prefix(version_uri): + """ + Extract the base source path from a version URI. + E.g. '/orgs/CIEL/sources/CIEL/v20260101/' → '/orgs/CIEL/sources/CIEL/' + """ + if not version_uri: + return '' + parts = version_uri.strip('/').split('/') + try: + src_idx = parts.index('sources') + return '/' + '/'.join(parts[:src_idx + 2]) + '/' + except (ValueError, IndexError): + return '' + + def _concept_link(self, concept_id): + if not concept_id: + return '' + base = getattr(settings, 'API_BASE_URL', '') + if self._source_prefix: + url = f'{base}{self._source_prefix}concepts/{concept_id}/' + return f'[#{concept_id}]({url})' + return f'#{concept_id}' + + @staticmethod + def _version_label(uri): + """Extract the version identifier from a version URI.""" + if not uri: + return 'Unknown' + parts = uri.strip('/').split('/') + return parts[-1] if parts else uri + + @staticmethod + def _escape(text): + """Escape pipe characters so they don't break markdown tables.""" + if not text: + return '' + return str(text).replace('|', '\\|') + + @staticmethod + def _display_code(code): + """ + URL-decode a concept/mapping code for human-readable display. + + ICD-11 extension codes use ``&`` as a separator (e.g. ``2B31.2Z&XH75E6``), + but they are stored in the database URL-encoded (``2B31.2Z%26XH75E6``). + Decoding here ensures the markdown shows the canonical human-readable form. + """ + if not code: + return '' + return ChangelogMarkdownGenerator._escape(unquote(str(code))) diff --git a/core/sources/models.py b/core/sources/models.py index c8061263..ee657d38 100644 --- a/core/sources/models.py +++ b/core/sources/models.py @@ -985,8 +985,14 @@ def changelog(version1, version2, verbosity=0): """ version1 is the older version version2 is the newer version + + verbosity >= 4 enables full enrichment: concept_class, datatype, names[] + (with external_id), descriptions[], and prev_* fields for changed concepts + and mappings. """ from core.common.checksums import ChecksumDiff + # Internal diff always runs at verbosity=3 to collect IDs of every category; + # per-resource enrichment is a concern of ChecksumChangelog (verbosity>=4). concepts_diff = ChecksumDiff( resources1=version1.get_concepts_queryset().only('mnemonic', 'checksums', 'retired'), resources2=version2.get_concepts_queryset().only('mnemonic', 'checksums', 'retired'), @@ -999,7 +1005,7 @@ def changelog(version1, version2, verbosity=0): ) concepts_diff.process() mappings_diff.process() - log = ChecksumChangelog(concepts_diff, mappings_diff) + log = ChecksumChangelog(concepts_diff, mappings_diff, verbosity=verbosity) log.process() result = { 'meta': { diff --git a/core/sources/tests/test_changelog_markdown.py b/core/sources/tests/test_changelog_markdown.py new file mode 100644 index 00000000..fb08b8ed --- /dev/null +++ b/core/sources/tests/test_changelog_markdown.py @@ -0,0 +1,803 @@ +# pylint: disable=protected-access,too-many-arguments + +from django.test import SimpleTestCase + +from core.sources.changelog_markdown import ChangelogMarkdownGenerator + + +def _make_data( + v1_uri='/orgs/CIEL/sources/CIEL/v20250101/', + v2_uri='/orgs/CIEL/sources/CIEL/v20260101/', + v1_concepts=100, + v2_concepts=102, + v1_mappings=50, + v2_mappings=52, + concepts=None, + mappings=None, +): + return { + 'meta': { + 'version1': {'uri': v1_uri, 'concepts': v1_concepts, 'mappings': v1_mappings}, + 'version2': {'uri': v2_uri, 'concepts': v2_concepts, 'mappings': v2_mappings}, + }, + 'concepts': concepts or {}, + 'mappings': mappings or {}, + } + + +def _make_mapping(mapping_id='m1', from_concept='c1', to_concept='x', map_type='SAME-AS', to_source=None): + return { + 'id': mapping_id, + 'from_concept': from_concept, + 'to_concept': to_concept, + 'to_source': to_source, + 'map_type': map_type, + } + + +class TestChangelogMarkdownGeneratorHeader(SimpleTestCase): + def test_header_contains_version_labels(self): + data = _make_data() + md = ChangelogMarkdownGenerator(data).generate() + self.assertIn('v20260101', md) + self.assertIn('v20250101', md) + + def test_header_h1(self): + data = _make_data() + md = ChangelogMarkdownGenerator(data).generate() + self.assertIn('# v20260101 Changelog', md) + + +class TestChangelogMarkdownGeneratorSummaryTable(SimpleTestCase): + def test_summary_table_present(self): + data = _make_data() + md = ChangelogMarkdownGenerator(data).generate() + self.assertIn('## Summary', md) + + def test_summary_counts(self): + data = _make_data( + concepts={ + 'new': {'c1': {'id': 'c1', 'display_name': 'Foo'}}, + 'removed': {'c2': {'id': 'c2', 'display_name': 'Bar'}, 'c3': {'id': 'c3', 'display_name': 'Baz'}}, + } + ) + md = ChangelogMarkdownGenerator(data).generate() + self.assertIn('| New concepts | 1 |', md) + self.assertIn('| Removed concepts | 2 |', md) + + def test_summary_breakdown_rows(self): + data = _make_data( + concepts={ + 'changed_major': {'c1': {'id': 'c1', 'display_name': 'Major'}}, + 'changed_minor': {'c2': {'id': 'c2', 'display_name': 'Minor'}}, + 'changed_retired': {'c3': {'id': 'c3', 'display_name': 'Retired'}}, + 'changed_mappings_only': {'c4': {'id': 'c4', 'display_name': 'Map only'}}, + }, + mappings={ + 'new': {'m1': _make_mapping('m1', 'c1', 'x')}, + 'removed': {'m2': _make_mapping('m2', 'c2', 'y')}, + }, + ) + md = ChangelogMarkdownGenerator(data).generate() + self.assertIn('| Major changes | 1 |', md) + self.assertIn('| Minor changes | 1 |', md) + self.assertIn('| Retired concepts | 1 |', md) + self.assertIn('| Mapping-only changes | 1 |', md) + self.assertIn('| Mappings added | 1 |', md) + self.assertIn('| Mappings removed | 1 |', md) + + def test_summary_counts_embedded_mappings(self): + data = _make_data( + concepts={ + 'changed_major': { + 'c1': { + 'id': 'c1', + 'display_name': 'Major', + 'mappings': { + 'new': [ + {'id': 'm1', 'to_concept': 'x', 'to_source': None, 'map_type': 'SAME-AS'}, + {'id': 'm2', 'to_concept': 'y', 'to_source': None, 'map_type': 'NARROWER-THAN'}, + ], + }, + }, + }, + 'changed_mappings_only': { + 'c2': { + 'id': 'c2', + 'display_name': 'Mapping only', + 'mappings': { + 'new': [ + {'id': 'm3', 'to_concept': 'z', 'to_source': None, 'map_type': 'SAME-AS'}, + ], + }, + }, + }, + }, + ) + md = ChangelogMarkdownGenerator(data).generate() + self.assertIn('| Mappings added | 3 |', md) + + def test_summary_no_version_range_header(self): + # Version range belongs to Overview only; it must not repeat in Summary. + data = _make_data() + md = ChangelogMarkdownGenerator(data).generate() + summary_start = md.index('## Summary') + summary_block = md[summary_start:] + self.assertNotIn('→', summary_block) + + def test_summary_no_totals_table(self): + # The v1/v2 totals table belongs to Overview only; it must not repeat in Summary. + data = _make_data(v1_concepts=100, v2_concepts=102, v1_mappings=50, v2_mappings=52) + md = ChangelogMarkdownGenerator(data).generate() + summary_start = md.index('## Summary') + summary_block = md[summary_start:] + self.assertNotIn('| Concepts |', summary_block) + self.assertNotIn('| Mappings |', summary_block) + + def test_json_diff_download_link(self): + data = _make_data() + md = ChangelogMarkdownGenerator(data).generate() + self.assertIn('Download full JSON diff', md) + self.assertIn('/sources/$changelog/', md) + + +class TestChangelogMarkdownGeneratorOverview(SimpleTestCase): + def test_overview_section_present(self): + data = _make_data() + md = ChangelogMarkdownGenerator(data).generate() + self.assertIn('## Overview', md) + + def test_overview_before_summary(self): + data = _make_data() + md = ChangelogMarkdownGenerator(data).generate() + self.assertLess(md.index('## Overview'), md.index('## Summary')) + + def test_overview_version_range_header(self): + data = _make_data() + md = ChangelogMarkdownGenerator(data).generate() + self.assertIn('**v20250101 → v20260101**', md) + + def test_overview_table_columns(self): + data = _make_data(v1_concepts=80_000, v2_concepts=80_200, v1_mappings=120_000, v2_mappings=120_450) + md = ChangelogMarkdownGenerator(data).generate() + self.assertIn('80,000', md) + self.assertIn('80,200', md) + self.assertIn('120,000', md) + self.assertIn('120,450', md) + + def test_overview_concepts_counts(self): + data = _make_data( + concepts={ + 'new': {'c1': {'id': 'c1', 'display_name': 'A'}, 'c2': {'id': 'c2', 'display_name': 'B'}}, + 'removed': {'c3': {'id': 'c3', 'display_name': 'C'}}, + 'changed_retired': {'c4': {'id': 'c4', 'display_name': 'D'}}, + 'changed_major': {'c5': {'id': 'c5', 'display_name': 'E'}}, + 'changed_minor': {'c6': {'id': 'c6', 'display_name': 'F'}}, + 'changed_mappings_only': {'c7': {'id': 'c7', 'display_name': 'G'}}, + } + ) + md = ChangelogMarkdownGenerator(data).generate() + # Added=2, Removed=1+1=2 (removed+retired), Changed=1+1+1=3 + self.assertIn('| Concepts |', md) + overview_line = [l for l in md.splitlines() if l.startswith('| Concepts |')][0] + self.assertIn('| 2 |', overview_line) # Added + self.assertIn('| 2 |', overview_line) # Removed (removed + retired) + self.assertIn('| 3 |', overview_line) # Changed + + def test_overview_mappings_counts(self): + data = _make_data( + mappings={ + 'new': {'m1': _make_mapping('m1', 'c1', 'x')}, + 'removed': {'m2': _make_mapping('m2', 'c2', 'y')}, + 'changed_minor': {'m3': _make_mapping('m3', 'c3', 'z')}, + } + ) + md = ChangelogMarkdownGenerator(data).generate() + mappings_line = [l for l in md.splitlines() if l.startswith('| Mappings |')][0] + # Added=1, Removed=1, Changed=1 + self.assertIn('| 1 |', mappings_line) + + def test_overview_counts_embedded_mappings(self): + data = _make_data( + concepts={ + 'new': { + 'c1': { + 'id': 'c1', + 'display_name': 'New', + 'mappings': { + 'new': [ + {'id': 'm1', 'to_concept': 'x', 'to_source': None, 'map_type': 'SAME-AS'}, + ], + }, + }, + }, + 'changed_major': { + 'c2': { + 'id': 'c2', + 'display_name': 'Major', + 'mappings': { + 'new': [ + {'id': 'm2', 'to_concept': 'y', 'to_source': None, 'map_type': 'SAME-AS'}, + ], + }, + }, + }, + }, + ) + md = ChangelogMarkdownGenerator(data).generate() + mappings_line = [l for l in md.splitlines() if l.startswith('| Mappings |')][0] + self.assertIn('| 2 | 0 | 0 |', mappings_line) + + +class TestChangelogMarkdownGeneratorTOC(SimpleTestCase): + def test_toc_omitted_without_following_sections(self): + data = _make_data() + md = ChangelogMarkdownGenerator(data).generate() + self.assertNotIn('## Contents', md) + + def test_toc_shows_only_present_sections(self): + data = _make_data( + concepts={ + 'new': {'c1': {'id': 'c1', 'display_name': 'New concept'}}, + }, + mappings={ + 'new': {'m1': _make_mapping('m1', 'c1', 'x')}, + } + ) + md = ChangelogMarkdownGenerator(data).generate() + self.assertIn('[Concepts]', md) + self.assertIn('[Mappings]', md) + # No names/descriptions/translations data → should not appear in TOC + self.assertNotIn('[Names]', md) + self.assertNotIn('[Descriptions]', md) + self.assertNotIn('[Translations]', md) + + def test_toc_excludes_sections_before_contents(self): + data = _make_data( + concepts={'new': {'c1': {'id': 'c1', 'display_name': 'A'}}}, + mappings={ + 'new': { + 'm1': { + 'id': 'm1', + 'from_concept': 'c1', + 'to_concept': 'x', + 'to_source': None, + 'map_type': 'SAME-AS', + } + } + }, + ) + md = ChangelogMarkdownGenerator(data).generate() + toc_block = md[md.index('## Contents'):md.index('## Concepts')] + self.assertNotIn('Changelog](#', toc_block) + self.assertNotIn('[Overview]', toc_block) + self.assertNotIn('[Summary]', toc_block) + self.assertNotIn('[Contents]', toc_block) + self.assertIn('- [Concepts](#concepts)', toc_block) + self.assertIn('- [Mappings](#mappings)', toc_block) + + def test_toc_lists_concept_subsections(self): + data = _make_data( + concepts={ + 'new': {'c1': {'id': 'c1', 'display_name': 'A'}}, + 'changed_retired': {'c2': {'id': 'c2', 'display_name': 'B'}}, + 'changed_major': {'c3': {'id': 'c3', 'display_name': 'C'}}, + } + ) + md = ChangelogMarkdownGenerator(data).generate() + # Top-level entry points to #concepts; subitems use section-prefixed anchors + self.assertIn('- [Concepts](#concepts)', md) + self.assertIn(' - [Added](#concepts-added)', md) + self.assertIn(' - [Retired](#concepts-retired)', md) + self.assertIn(' - [Updated (Major)](#concepts-updated-major)', md) + # 'Removed' and 'Updated (Minor)' are absent in the data → absent from TOC + self.assertNotIn('[Removed](#concepts-removed)', md) + self.assertNotIn('[Updated (Minor)]', md) + + def test_toc_lists_mapping_subsections(self): + data = _make_data(mappings={ + 'new': {'m1': _make_mapping('m1', 'c1', 'x')}, + 'changed_minor': {'m2': _make_mapping('m2', 'c2', 'y')}, + }) + md = ChangelogMarkdownGenerator(data).generate() + self.assertIn('- [Mappings](#mappings)', md) + self.assertIn(' - [Added](#mappings-added)', md) + self.assertIn(' - [Updated](#mappings-updated)', md) + self.assertNotIn('[Removed](#mappings-removed)', md) + + def test_toc_lists_embedded_mapping_subsections(self): + data = _make_data(concepts={ + 'changed_major': { + 'c1': { + 'id': 'c1', + 'display_name': 'Major', + 'mappings': { + 'new': [ + {'id': 'm1', 'to_concept': 'x', 'to_source': None, 'map_type': 'SAME-AS'}, + ], + }, + }, + }, + }) + md = ChangelogMarkdownGenerator(data).generate() + self.assertIn('- [Mappings](#mappings)', md) + self.assertIn(' - [Added](#mappings-added)', md) + + def test_subsection_anchors_emitted_in_body(self): + data = _make_data( + concepts={'new': {'c1': {'id': 'c1', 'display_name': 'A'}}}, + mappings={'new': {'m1': _make_mapping('m1', 'c1', 'x')}}, + ) + md = ChangelogMarkdownGenerator(data).generate() + # Each subsection TOC link must resolve to an anchor in the body + self.assertIn('', md) + self.assertIn('', md) + + def test_toc_lists_name_subsections_when_enriched(self): + data = _make_data(concepts={ + 'new': {'c1': { + 'id': 'c1', 'display_name': 'New', + 'names': [{'name': 'Foo', 'type': 'FULLY_SPECIFIED', 'locale': 'en'}], + 'descriptions': [], + }}, + 'changed_minor': {'c2': { + 'id': 'c2', 'display_name': 'Changed', + 'names': [{'external_id': 'e1', 'name': 'Bar v2', 'type': 'FULLY_SPECIFIED', 'locale': 'en'}], + 'prev_names': [{'external_id': 'e1', 'name': 'Bar v1', 'type': 'FULLY_SPECIFIED', 'locale': 'en'}], + 'descriptions': [], + }}, + }) + md = ChangelogMarkdownGenerator(data).generate() + self.assertIn('- [Names](#names)', md) + self.assertIn(' - [Added](#names-added)', md) + self.assertIn(' - [Updated](#names-updated)', md) + self.assertIn('', md) + self.assertIn('', md) + + +class TestChangelogMarkdownGeneratorConceptsSection(SimpleTestCase): + def test_concepts_added_table(self): + data = _make_data(concepts={ + 'new': { + 'c1': {'id': 'c1', 'display_name': 'Malaria Test', 'concept_class': 'Test'}, + } + }) + md = ChangelogMarkdownGenerator(data).generate() + self.assertIn('## Concepts', md) + self.assertIn('### Added', md) + self.assertIn('#c1', md) + self.assertIn('Malaria Test', md) + self.assertIn('Test', md) + + def test_concept_link_format(self): + data = _make_data(concepts={ + 'new': {'42': {'id': '42', 'display_name': 'Something'}} + }) + md = ChangelogMarkdownGenerator(data).generate() + self.assertIn('[#42]', md) + self.assertIn('/orgs/CIEL/sources/CIEL/concepts/42/', md) + + def test_concepts_removed_section(self): + data = _make_data(concepts={ + 'removed': {'c5': {'id': 'c5', 'display_name': 'Old concept'}} + }) + md = ChangelogMarkdownGenerator(data).generate() + self.assertIn('### Removed', md) + self.assertIn('#c5', md) + + def test_concepts_retired_section(self): + data = _make_data(concepts={ + 'changed_retired': {'c3': {'id': 'c3', 'display_name': 'Retired concept'}} + }) + md = ChangelogMarkdownGenerator(data).generate() + self.assertIn('### Retired', md) + + def test_empty_concepts_section_omitted(self): + data = _make_data() + md = ChangelogMarkdownGenerator(data).generate() + self.assertNotIn('## Concepts', md) + + def test_pipe_in_display_name_escaped(self): + data = _make_data(concepts={ + 'new': {'c1': {'id': 'c1', 'display_name': 'A|B name'}} + }) + md = ChangelogMarkdownGenerator(data).generate() + self.assertIn('A\\|B name', md) + + +class TestChangelogMarkdownGeneratorNamesSection(SimpleTestCase): + def _data_with_names(self): + return _make_data(concepts={ + 'new': { + 'c1': { + 'id': 'c1', + 'display_name': 'New concept', + 'names': [ + {'name': 'Lorem ipsum', 'type': 'FULLY_SPECIFIED', 'locale': 'en', 'locale_preferred': True}, + {'name': 'Ipsum lorem', 'type': 'SHORT', 'locale': 'en', 'locale_preferred': False}, + ], + 'descriptions': [], + } + }, + 'removed': { + 'c2': { + 'id': 'c2', + 'display_name': 'Removed concept', + 'names': [ + {'name': 'Old name', 'type': 'FULLY_SPECIFIED', 'locale': 'en', 'locale_preferred': True}, + ], + 'descriptions': [], + } + }, + }) + + def test_names_section_present(self): + md = ChangelogMarkdownGenerator(self._data_with_names()).generate() + self.assertIn('## Names', md) + + def test_names_added_table(self): + md = ChangelogMarkdownGenerator(self._data_with_names()).generate() + self.assertIn('### Added', md) + self.assertIn('Lorem ipsum', md) + + def test_names_removed_table(self): + md = ChangelogMarkdownGenerator(self._data_with_names()).generate() + self.assertIn('### Removed', md) + self.assertIn('Old name', md) + + def test_names_updated_table(self): + data = _make_data(concepts={ + 'changed_minor': { + 'c1': { + 'id': 'c1', + 'display_name': 'Fixed', + 'names': [ + {'name': 'Lorem ipsum', 'type': 'FULLY_SPECIFIED', 'locale': 'en', 'locale_preferred': True}, + ], + 'prev_names': [ + {'name': 'Lorem IPsum', 'type': 'FULLY_SPECIFIED', 'locale': 'en', 'locale_preferred': True}, + ], + 'descriptions': [], + } + } + }) + md = ChangelogMarkdownGenerator(data).generate() + self.assertIn('## Names', md) + self.assertIn('### Updated', md) + self.assertIn('Lorem IPsum', md) + self.assertIn('Lorem ipsum', md) + + def test_names_section_omitted_if_no_names(self): + data = _make_data(concepts={ + 'new': {'c1': {'id': 'c1', 'display_name': 'No names'}} + }) + md = ChangelogMarkdownGenerator(data).generate() + self.assertNotIn('## Names', md) + + +class TestChangelogMarkdownGeneratorTranslationsSection(SimpleTestCase): + def test_translations_section_present(self): + data = _make_data(concepts={ + 'new': { + 'c1': { + 'id': 'c1', + 'display_name': 'New', + 'names': [ + { + 'name': 'English name', + 'type': 'FULLY_SPECIFIED', + 'locale': 'en', + 'locale_preferred': True, + }, + { + 'name': 'Nome em português', + 'type': 'FULLY_SPECIFIED', + 'locale': 'pt', + 'locale_preferred': False, + }, + ], + 'descriptions': [], + } + } + }) + md = ChangelogMarkdownGenerator(data).generate() + self.assertIn('## Translations', md) + self.assertIn('Nome em português', md) + self.assertNotIn('English name', md.split('## Translations')[1]) + + def test_translations_omitted_if_only_default_locale(self): + data = _make_data(concepts={ + 'new': { + 'c1': { + 'id': 'c1', + 'display_name': 'English only', + 'names': [ + {'name': 'English name', 'type': 'FULLY_SPECIFIED', 'locale': 'en', 'locale_preferred': True}, + ], + 'descriptions': [], + } + } + }) + md = ChangelogMarkdownGenerator(data).generate() + self.assertNotIn('## Translations', md) + + +class TestChangelogMarkdownGeneratorMappingsSection(SimpleTestCase): + def test_mappings_added_table(self): + data = _make_data(mappings={ + 'new': { + 'm1': { + 'id': 'm1', + 'from_concept': 'c1', + 'from_source': None, + 'to_concept': '12345', + 'to_source': 'http://snomed.info/sct', + 'map_type': 'SAME-AS', + } + } + }) + md = ChangelogMarkdownGenerator(data).generate() + self.assertIn('## Mappings', md) + self.assertIn('### Added', md) + self.assertIn('SAME-AS', md) + self.assertIn('12345', md) + + def test_mappings_section_omitted_if_empty(self): + data = _make_data() + md = ChangelogMarkdownGenerator(data).generate() + self.assertNotIn('## Mappings', md) + + def test_mappings_updated_table_shows_prev_fields(self): + data = _make_data(mappings={ + 'changed_major': { + 'm99': { + 'id': 'm99', + 'external_id': '250260ABBBBBBBBBBBBBBBBBBBBBBBBBBBBB', + 'from_concept': 'c10', + 'from_source': None, + 'to_concept': '719709', + 'to_source': 'IMO', + 'map_type': 'SAME-AS', + 'prev_to_concept': '719700', + 'prev_to_source': 'IMO', + 'prev_map_type': 'NARROWER-THAN', + } + } + }) + md = ChangelogMarkdownGenerator(data).generate() + self.assertIn('### Updated', md) + # Previous and current values both appear + self.assertIn('719700', md) + self.assertIn('719709', md) + self.assertIn('NARROWER-THAN', md) + self.assertIn('SAME-AS', md) + # Column headers indicate before/after + self.assertIn('Previous To Concept', md) + self.assertIn('Updated To Concept', md) + self.assertIn('Previous Map Type', md) + self.assertIn('Updated Map Type', md) + + def test_mappings_updated_without_prev_falls_back_to_current(self): + # Legacy data without prev_* fields (verbosity<4) still renders gracefully + data = _make_data(mappings={ + 'changed_minor': { + 'm1': { + 'id': 'm1', + 'from_concept': 'c1', + 'to_concept': 'xyz', + 'to_source': None, + 'map_type': 'SAME-AS', + } + } + }) + md = ChangelogMarkdownGenerator(data).generate() + self.assertIn('### Updated', md) + self.assertIn('xyz', md) + self.assertIn('SAME-AS', md) + + def test_changed_mappings_only_included(self): + data = _make_data(concepts={ + 'changed_mappings_only': { + 'c7': { + 'id': 'c7', + 'display_name': 'Concept 7', + 'mappings': { + 'changed_minor': [ + { + 'id': 'mapping7', + 'from_concept': 'c7', + 'from_source': None, + 'to_concept': 'xyz', + 'to_source': None, + 'map_type': 'SAME-AS', + } + ] + } + } + } + }) + md = ChangelogMarkdownGenerator(data).generate() + self.assertIn('## Mappings', md) + + def test_embedded_mappings_from_changed_major_are_rendered(self): + data = _make_data(concepts={ + 'changed_major': { + '170670': { + 'id': '170670', + 'display_name': 'Dolutegravir / emtricitabine / tenofovir alafenamide', + 'names': [], + 'prev_names': [], + 'descriptions': [], + 'prev_descriptions': [], + 'mappings': { + 'new': [ + { + 'id': '17333738', + 'to_concept': '714767001', + 'to_source': '/orgs/IHTSDO/sources/SNOMED-CT/', + 'map_type': 'NARROWER-THAN', + }, + ], + }, + }, + }, + }) + md = ChangelogMarkdownGenerator(data).generate() + self.assertIn('## Mappings', md) + self.assertIn('[#170670]', md) + self.assertIn('714767001', md) + self.assertIn('NARROWER-THAN', md) + + def test_embedded_mapping_lists_do_not_invent_numeric_ids(self): + data = _make_data(concepts={ + 'changed_mappings_only': { + 'c1': { + 'id': 'c1', + 'display_name': 'Concept 1', + 'mappings': { + 'new': [ + {'to_concept': 'x', 'to_source': None, 'map_type': 'SAME-AS'}, + {'to_concept': 'y', 'to_source': None, 'map_type': 'SAME-AS'}, + ], + }, + }, + }, + }) + added, _, _ = ChangelogMarkdownGenerator(data)._mapping_collections() + self.assertNotIn(1, added) + + +class TestChangelogMarkdownGeneratorStaticHighlight(SimpleTestCase): + def test_additions_only(self): + result = ChangelogMarkdownGenerator._static_highlight('Concepts', added=5) + self.assertIn('5 additions', result) + + def test_all_counts(self): + result = ChangelogMarkdownGenerator._static_highlight('Names', added=30, updated=60, removed=2) + self.assertIn('30 additions', result) + self.assertIn('60 updates', result) + self.assertIn('2 removals', result) + + def test_no_changes(self): + result = ChangelogMarkdownGenerator._static_highlight('Names') + self.assertIn('No changes', result) + + def test_singular_forms(self): + result = ChangelogMarkdownGenerator._static_highlight('Concepts', added=1, removed=1) + self.assertIn('1 addition', result) + self.assertIn('1 removal', result) + self.assertNotIn('additions', result) + self.assertNotIn('removals', result) + + +class TestChangelogMarkdownGeneratorEnrichmentDetection(SimpleTestCase): + def test_not_enriched_when_no_names_or_prev(self): + data = _make_data(concepts={ + 'changed_major': {'c1': {'id': 'c1', 'display_name': 'A'}}, + 'changed_minor': {'c2': {'id': 'c2', 'display_name': 'B'}}, + }) + gen = ChangelogMarkdownGenerator(data) + self.assertFalse(gen.is_enriched) + + def test_enriched_when_names_present(self): + data = _make_data(concepts={ + 'changed_major': { + 'c1': { + 'id': 'c1', 'display_name': 'A', + 'names': [{'name': 'Foo', 'type': 'FULLY_SPECIFIED', 'locale': 'en'}], + 'prev_names': [], + }, + }, + }) + self.assertTrue(ChangelogMarkdownGenerator(data).is_enriched) + + def test_enriched_when_mapping_has_external_id(self): + data = _make_data(mappings={ + 'new': {'m1': { + 'id': 'm1', 'from_concept': 'c1', 'to_concept': 'x', 'to_source': None, + 'map_type': 'SAME-AS', 'external_id': 'abc-123', + }}, + }) + self.assertTrue(ChangelogMarkdownGenerator(data).is_enriched) + + def test_enriched_when_embedded_mapping_has_prev_fields(self): + data = _make_data(concepts={ + 'changed_mappings_only': { + 'c1': { + 'id': 'c1', + 'display_name': 'Concept 1', + 'mappings': { + 'changed_minor': [ + { + 'id': 'm1', + 'to_concept': 'new', + 'prev_to_concept': 'old', + 'map_type': 'SAME-AS', + }, + ], + }, + }, + }, + }) + self.assertTrue(ChangelogMarkdownGenerator(data).is_enriched) + + def test_notice_banner_present_only_when_not_enriched(self): + non_enriched = _make_data(concepts={ + 'changed_major': {'c1': {'id': 'c1', 'display_name': 'A'}}, + }) + self.assertIn('without enrichment', ChangelogMarkdownGenerator(non_enriched).generate()) + + enriched = _make_data(concepts={ + 'changed_major': {'c1': { + 'id': 'c1', 'display_name': 'A', + 'names': [{'name': 'Foo', 'type': 'FULLY_SPECIFIED', 'locale': 'en'}], + 'prev_names': [], + }}, + }) + self.assertNotIn('without enrichment', ChangelogMarkdownGenerator(enriched).generate()) + + def test_changed_concepts_still_rendered_without_enrichment(self): + # Key guarantee: non-enriched input must not produce an empty "Updated" section. + data = _make_data(concepts={ + 'changed_major': { + 'c1': {'id': 'c1', 'display_name': 'Concept 1'}, + 'c2': {'id': 'c2', 'display_name': 'Concept 2'}, + }, + 'changed_minor': { + 'c3': {'id': 'c3', 'display_name': 'Concept 3'}, + }, + }) + md = ChangelogMarkdownGenerator(data).generate() + self.assertIn('### Updated (Major)', md) + self.assertIn('### Updated (Minor)', md) + self.assertIn('#c1', md) + self.assertIn('#c2', md) + self.assertIn('#c3', md) + # Without enrichment, the concepts table must not have a "Changed" column. + # The Overview table does have a "Changed" header, so we check specifically + # for the concept-table header pattern (which includes "Concept ID"). + self.assertNotIn('| Concept ID | Display Name | Concept Class | Changed |', md) + + +class TestChangelogMarkdownGeneratorHelpers(SimpleTestCase): + def test_extract_source_prefix(self): + gen = ChangelogMarkdownGenerator.__new__(ChangelogMarkdownGenerator) + self.assertEqual( + gen._extract_source_prefix('/orgs/CIEL/sources/CIEL/v20260101/'), + '/orgs/CIEL/sources/CIEL/' + ) + + def test_extract_source_prefix_empty(self): + gen = ChangelogMarkdownGenerator.__new__(ChangelogMarkdownGenerator) + self.assertEqual(gen._extract_source_prefix(''), '') + + def test_escape_pipe(self): + self.assertEqual(ChangelogMarkdownGenerator._escape('A|B'), 'A\\|B') + self.assertEqual(ChangelogMarkdownGenerator._escape('Normal'), 'Normal') + self.assertEqual(ChangelogMarkdownGenerator._escape(None), '') + self.assertEqual(ChangelogMarkdownGenerator._escape(''), '') + + def test_version_label(self): + gen = ChangelogMarkdownGenerator.__new__(ChangelogMarkdownGenerator) + self.assertEqual(gen._version_label('/orgs/CIEL/sources/CIEL/v20260101/'), 'v20260101') + self.assertEqual(gen._version_label(''), 'Unknown') diff --git a/core/sources/views.py b/core/sources/views.py index 53c6c084..04d2d902 100644 --- a/core/sources/views.py +++ b/core/sources/views.py @@ -658,12 +658,13 @@ class AbstractSourceVersionsDiffView(BaseAPIView, TaskMixin): changelog = False def get_objects(self): - version1_uri = self.request.data.get('version1') # older version - version2_uri = self.request.data.get('version2') # newer version + version1_uri = self.request.data.get('version1') # caller-supplied older version + version2_uri = self.request.data.get('version2') # caller-supplied newer version version1 = get_object_or_404(Source.objects.filter(uri=version1_uri)) version2 = get_object_or_404(Source.objects.filter(uri=version2_uri)) - if version1.created_at > version2.created_at: - raise Http400('version1 must be older than version2') + # Auto-swap to ensure version1 is always the older release using created_at. + if version1.created_at and version2.created_at and version1.created_at > version2.created_at: + version1, version2 = version2, version1 self.check_object_permissions(self.request, version1) self.check_object_permissions(self.request, version2) return version1, version2 @@ -674,12 +675,17 @@ def get_verbosity(self): except: # pylint: disable=bare-except return 0 - def post(self, _): + def get_format_type(self): + # NOTE: DRF reserves '?format=' for content negotiation, so we use '?output=' instead. + return self.request.query_params.get('output', 'json') + + def post(self, _, **kwargs): # pylint: disable=unused-argument version1, version2 = self.get_objects() ignore_cache = bool(version1.is_head or version2.is_head) + format_type = self.get_format_type() if self.changelog else 'json' result = self.perform_task( source_version_compare, - (version1.uri, version2.uri, self.changelog, self.get_verbosity(), ignore_cache) + (version1.uri, version2.uri, self.changelog, self.get_verbosity(), ignore_cache, format_type) ) if isinstance(result, Response):