From deaa01f430e7017c2f7620524c8e1ef8a4de3967 Mon Sep 17 00:00:00 2001 From: Mary Gwozdz Date: Wed, 29 Apr 2026 17:14:46 -0700 Subject: [PATCH 1/8] feat: switch approach to clearing nested tag fields Co-authored-by: Copilot --- .../djangoapps/content/search/documents.py | 17 ++- .../content/search/tests/test_api.py | 120 ++++++++++++++---- 2 files changed, 103 insertions(+), 34 deletions(-) diff --git a/openedx/core/djangoapps/content/search/documents.py b/openedx/core/djangoapps/content/search/documents.py index 678281191eeb..a04b84f0d7c6 100644 --- a/openedx/core/djangoapps/content/search/documents.py +++ b/openedx/core/djangoapps/content/search/documents.py @@ -380,15 +380,18 @@ def searchable_doc_tags(object_id: OpaqueKey) -> dict: # if we used get_all_object_tags() to load all the tags for the library in a single query rather than loading the # tags for each component separately. all_tags = tagging_api.get_object_tags(str(object_id)).all() - if not all_tags: - # Clear out tags in the index when unselecting all tags for the block, otherwise - # it would remain the last value if a cleared Fields.tags field is not included - return {Fields.tags: {}} result = { Fields.tags_taxonomy: [], Fields.tags_level0: [], - # ... other levels added as needed + Fields.tags_level1: [], + Fields.tags_level2: [], + Fields.tags_level3: [], } + if not all_tags: + # Clear out tags in the index when unselecting all tags for the block, otherwise + # it would remain the last value if a cleared Fields.tags field is not included + return {Fields.tags: result} + for obj_tag in all_tags: # Add the taxonomy name: if obj_tag.taxonomy.name not in result[Fields.tags_taxonomy]: @@ -594,8 +597,8 @@ def searchable_doc_for_container( found using faceted search. If no container is found for the given container key, the returned document - will contain only basic information derived from the container key, and no - Fields.type value will be included in the returned dict. + will contain only basic information derived from the container key, and some + fields like Fields.display_name will be missing from the returned dict. """ doc = { Fields.id: meili_id_from_opaque_key(container_key), diff --git a/openedx/core/djangoapps/content/search/tests/test_api.py b/openedx/core/djangoapps/content/search/tests/test_api.py index e28d62f682dc..3fd6859cafd8 100644 --- a/openedx/core/djangoapps/content/search/tests/test_api.py +++ b/openedx/core/djangoapps/content/search/tests/test_api.py @@ -33,6 +33,13 @@ SearchAccess = {} STUDIO_SEARCH_ENDPOINT_URL = "/api/content_search/v2/studio/" +EMPTY_TAGS = { + "taxonomy": [], + "level0": [], + "level1": [], + "level2": [], + "level3": [], +} @ddt.ddt @@ -342,29 +349,29 @@ def test_reindex_meilisearch(self, mock_meilisearch) -> None: # Add tags field to doc, since reindex calls includes tags doc_sequential = copy.deepcopy(self.doc_sequential) - doc_sequential["tags"] = {} + doc_sequential["tags"] = copy.deepcopy(EMPTY_TAGS) doc_vertical = copy.deepcopy(self.doc_vertical) - doc_vertical["tags"] = {} + doc_vertical["tags"] = copy.deepcopy(EMPTY_TAGS) doc_problem1 = copy.deepcopy(self.doc_problem1) - doc_problem1["tags"] = {} + doc_problem1["tags"] = copy.deepcopy(EMPTY_TAGS) doc_problem1["collections"] = {'display_name': [], 'key': []} doc_problem1["units"] = {'display_name': [], 'key': []} doc_problem2 = copy.deepcopy(self.doc_problem2) - doc_problem2["tags"] = {} + doc_problem2["tags"] = copy.deepcopy(EMPTY_TAGS) doc_problem2["collections"] = {'display_name': [], 'key': []} doc_problem2["units"] = {'display_name': [], 'key': []} doc_collection = copy.deepcopy(self.collection_dict) - doc_collection["tags"] = {} + doc_collection["tags"] = copy.deepcopy(EMPTY_TAGS) doc_unit = copy.deepcopy(self.unit_dict) - doc_unit["tags"] = {} + doc_unit["tags"] = copy.deepcopy(EMPTY_TAGS) doc_unit["collections"] = {'display_name': [], 'key': []} doc_unit["subsections"] = {'display_name': ['Subsection 1'], 'key': ['lct:org1:lib:subsection:subsection-1']} doc_subsection = copy.deepcopy(self.subsection_dict) - doc_subsection["tags"] = {} + doc_subsection["tags"] = copy.deepcopy(EMPTY_TAGS) doc_subsection["collections"] = {'display_name': [], 'key': []} doc_subsection["sections"] = {'display_name': ['Section 1'], 'key': ['lct:org1:lib:section:section-1']} doc_section = copy.deepcopy(self.section_dict) - doc_section["tags"] = {} + doc_section["tags"] = copy.deepcopy(EMPTY_TAGS) doc_section["collections"] = {'display_name': [], 'key': []} api.rebuild_index() @@ -384,29 +391,29 @@ def test_reindex_meilisearch_incremental(self, mock_meilisearch) -> None: # Add tags field to doc, since reindex calls includes tags doc_sequential = copy.deepcopy(self.doc_sequential) - doc_sequential["tags"] = {} + doc_sequential["tags"] = copy.deepcopy(EMPTY_TAGS) doc_vertical = copy.deepcopy(self.doc_vertical) - doc_vertical["tags"] = {} + doc_vertical["tags"] = copy.deepcopy(EMPTY_TAGS) doc_problem1 = copy.deepcopy(self.doc_problem1) - doc_problem1["tags"] = {} + doc_problem1["tags"] = copy.deepcopy(EMPTY_TAGS) doc_problem1["collections"] = {"display_name": [], "key": []} doc_problem1["units"] = {'display_name': [], 'key': []} doc_problem2 = copy.deepcopy(self.doc_problem2) - doc_problem2["tags"] = {} + doc_problem2["tags"] = copy.deepcopy(EMPTY_TAGS) doc_problem2["collections"] = {"display_name": [], "key": []} doc_problem2["units"] = {'display_name': [], 'key': []} doc_collection = copy.deepcopy(self.collection_dict) - doc_collection["tags"] = {} + doc_collection["tags"] = copy.deepcopy(EMPTY_TAGS) doc_unit = copy.deepcopy(self.unit_dict) - doc_unit["tags"] = {} + doc_unit["tags"] = copy.deepcopy(EMPTY_TAGS) doc_unit["collections"] = {"display_name": [], "key": []} doc_unit["subsections"] = {'display_name': ['Subsection 1'], 'key': ['lct:org1:lib:subsection:subsection-1']} doc_subsection = copy.deepcopy(self.subsection_dict) - doc_subsection["tags"] = {} + doc_subsection["tags"] = copy.deepcopy(EMPTY_TAGS) doc_subsection["collections"] = {'display_name': [], 'key': []} doc_subsection["sections"] = {'display_name': ['Section 1'], 'key': ['lct:org1:lib:section:section-1']} doc_section = copy.deepcopy(self.section_dict) - doc_section["tags"] = {} + doc_section["tags"] = copy.deepcopy(EMPTY_TAGS) doc_section["collections"] = {'display_name': [], 'key': []} api.rebuild_index(incremental=True) @@ -524,11 +531,11 @@ def test_reindex_meilisearch_library_block_error(self, mock_meilisearch) -> None # Add tags field to doc, since reindex calls includes tags doc_sequential = copy.deepcopy(self.doc_sequential) - doc_sequential["tags"] = {} + doc_sequential["tags"] = copy.deepcopy(EMPTY_TAGS) doc_vertical = copy.deepcopy(self.doc_vertical) - doc_vertical["tags"] = {} + doc_vertical["tags"] = copy.deepcopy(EMPTY_TAGS) doc_problem2 = copy.deepcopy(self.doc_problem2) - doc_problem2["tags"] = {} + doc_problem2["tags"] = copy.deepcopy(EMPTY_TAGS) doc_problem2["collections"] = {'display_name': [], 'key': []} doc_problem2["units"] = {'display_name': [], 'key': []} @@ -611,14 +618,20 @@ def test_index_xblock_tags(self, mock_meilisearch) -> None: "id": self.doc_sequential["id"], "tags": { 'taxonomy': ['A'], - 'level0': ['A > one', 'A > two'] + 'level0': ['A > one', 'A > two'], + 'level1': [], + 'level2': [], + 'level3': [], } } doc_sequential_with_tags2 = { "id": self.doc_sequential["id"], "tags": { 'taxonomy': ['A', 'B'], - 'level0': ['A > one', 'A > two', 'B > four', 'B > three'] + 'level0': ['A > one', 'A > two', 'B > four', 'B > three'], + 'level1': [], + 'level2': [], + 'level3': [], } } @@ -631,6 +644,41 @@ def test_index_xblock_tags(self, mock_meilisearch) -> None: any_order=True, ) + @override_settings(MEILISEARCH_ENABLED=True) + def test_remove_xblock_tag_clears_index_tags(self, mock_meilisearch) -> None: + """ + Test that removing tags from an XBlock clears tag facets in the index. + """ + # Add a tag first so we can verify it is later cleared. + tagging_api.tag_object(str(self.sequential.usage_key), self.taxonomyA, ["one"]) + + # Remove all tags for taxonomyA from this object. + tagging_api.tag_object(str(self.sequential.usage_key), self.taxonomyA, []) + + doc_with_tag = { + "id": self.doc_sequential["id"], + "tags": { + "taxonomy": ["A"], + "level0": ["A > one"], + "level1": [], + "level2": [], + "level3": [], + }, + } + doc_without_tags = { + "id": self.doc_sequential["id"], + "tags": copy.deepcopy(EMPTY_TAGS), + } + + assert mock_meilisearch.return_value.index.return_value.update_documents.call_count == 2 + mock_meilisearch.return_value.index.return_value.update_documents.assert_has_calls( + [ + call([doc_with_tag]), + call([doc_without_tags]), + ], + any_order=True, + ) + @override_settings(MEILISEARCH_ENABLED=True) def test_delete_index_xblock(self, mock_meilisearch) -> None: """ @@ -670,14 +718,20 @@ def test_index_library_block_tags(self, mock_meilisearch) -> None: "id": self.doc_problem1["id"], "tags": { 'taxonomy': ['A'], - 'level0': ['A > one', 'A > two'] + 'level0': ['A > one', 'A > two'], + 'level1': [], + 'level2': [], + 'level3': [], } } doc_problem_with_tags2 = { "id": self.doc_problem1["id"], "tags": { 'taxonomy': ['A', 'B'], - 'level0': ['A > one', 'A > two', 'B > four', 'B > three'] + 'level0': ['A > one', 'A > two', 'B > four', 'B > three'], + 'level1': [], + 'level2': [], + 'level3': [], } } @@ -876,14 +930,20 @@ def test_index_tags_in_collections(self, mock_meilisearch) -> None: "id": "lib-collectionorg1libmycol-5b647617", "tags": { 'taxonomy': ['A'], - 'level0': ['A > one', 'A > two'] + 'level0': ['A > one', 'A > two'], + 'level1': [], + 'level2': [], + 'level3': [], } } doc_collection_with_tags2 = { "id": "lib-collectionorg1libmycol-5b647617", "tags": { 'taxonomy': ['A', 'B'], - 'level0': ['A > one', 'A > two', 'B > four', 'B > three'] + 'level0': ['A > one', 'A > two', 'B > four', 'B > three'], + 'level1': [], + 'level2': [], + 'level3': [], } } @@ -1148,14 +1208,20 @@ def test_index_tags_in_containers(self, container_type, container_id, mock_meili "id": container_id, "tags": { 'taxonomy': ['A'], - 'level0': ['A > one', 'A > two'] + 'level0': ['A > one', 'A > two'], + 'level1': [], + 'level2': [], + 'level3': [], } } doc_unit_with_tags2 = { "id": container_id, "tags": { 'taxonomy': ['A', 'B'], - 'level0': ['A > one', 'A > two', 'B > four', 'B > three'] + 'level0': ['A > one', 'A > two', 'B > four', 'B > three'], + 'level1': [], + 'level2': [], + 'level3': [], } } From c47df6749f4a7465fd4db2ff5b4f520e7de2971a Mon Sep 17 00:00:00 2001 From: Mary Gwozdz Date: Wed, 29 Apr 2026 17:23:07 -0700 Subject: [PATCH 2/8] feat: bump openedx-core version --- requirements/constraints.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/requirements/constraints.txt b/requirements/constraints.txt index 775df3b74554..cc5c2e579e13 100644 --- a/requirements/constraints.txt +++ b/requirements/constraints.txt @@ -65,7 +65,6 @@ numpy<2.0.0 # from openedx-maintainers. Minor and patch versions can roll out automatically. # Issue for unpinning: https://github.com/openedx/edx-platform/issues/35269 openedx-core<2 - # Date: 2023-11-29 # Open AI version 1.0.0 dropped support for openai.ChatCompletion which is currently in use in enterprise. # Issue for unpinning: https://github.com/openedx/edx-platform/issues/35268 From 73224b72c6ad6131846ac82c45bc37626c0cfb25 Mon Sep 17 00:00:00 2001 From: Mary Gwozdz Date: Thu, 30 Apr 2026 06:43:52 -0700 Subject: [PATCH 3/8] fix: test_documents should expect output on all levels for tags Co-authored-by: Copilot --- .../content/search/tests/test_documents.py | 53 ++++++++++++++++--- 1 file changed, 46 insertions(+), 7 deletions(-) diff --git a/openedx/core/djangoapps/content/search/tests/test_documents.py b/openedx/core/djangoapps/content/search/tests/test_documents.py index ee4a1d613f61..5bba56d4b7df 100644 --- a/openedx/core/djangoapps/content/search/tests/test_documents.py +++ b/openedx/core/djangoapps/content/search/tests/test_documents.py @@ -229,6 +229,9 @@ def test_problem_block(self): "tags": { "taxonomy": ["Difficulty"], "level0": ["Difficulty > Easy"], + "level1": [], + "level2": [], + "level3": [], }, } @@ -276,6 +279,7 @@ def test_html_block(self): "level0": ["Difficulty > Normal", "Subject > Hypertext", "Subject > Linguistics"], "level1": ["Subject > Hypertext > Jump Links", "Subject > Linguistics > Asian Languages"], "level2": ["Subject > Linguistics > Asian Languages > Chinese"], + "level3": [], }, } @@ -285,7 +289,9 @@ def test_video_block_untagged(self): """ block_usage_key = self.toy_course_key.make_usage_key("video", "Welcome") block = self.store.get_item(block_usage_key) - doc = searchable_doc_for_course_block(block) + doc = {} + doc.update(searchable_doc_for_course_block(block)) + doc.update(searchable_doc_tags(block.usage_key)) assert doc == { "id": "block-v1edxtoy2012_falltypevideoblockwelcome-0c9fd626", "type": "course_block", @@ -307,7 +313,13 @@ def test_video_block_untagged(self): ], "content": {}, "modified": self.created_date.timestamp(), - # This video has no tags. + "tags": { + "taxonomy": [], + "level0": [], + "level1": [], + "level2": [], + "level3": [], + }, } def test_html_library_block(self): @@ -346,6 +358,9 @@ def test_html_library_block(self): "tags": { "taxonomy": ["Difficulty"], "level0": ["Difficulty > Normal"], + "level1": [], + "level2": [], + "level3": [], }, "publish_status": "never", } @@ -385,6 +400,9 @@ def test_html_published_library_block(self): "tags": { "taxonomy": ["Difficulty"], "level0": ["Difficulty > Normal"], + "level1": [], + "level2": [], + "level3": [], }, 'published': {'display_name': 'Text'}, "publish_status": "published", @@ -427,6 +445,9 @@ def test_html_published_library_block(self): "tags": { "taxonomy": ["Difficulty"], "level0": ["Difficulty > Normal"], + "level1": [], + "level2": [], + "level3": [], }, "published": {"display_name": "Text"}, "publish_status": "published", @@ -467,6 +488,9 @@ def test_html_published_library_block(self): "tags": { "taxonomy": ["Difficulty"], "level0": ["Difficulty > Normal"], + "level1": [], + "level2": [], + "level3": [], }, "published": { "display_name": "Text 2", @@ -506,7 +530,10 @@ def test_collection_with_library(self): "modified": 1680674828.0, 'tags': { 'taxonomy': ['Difficulty'], - 'level0': ['Difficulty > Normal'] + 'level0': ['Difficulty > Normal'], + 'level1': [], + 'level2': [], + 'level3': [], }, "published": { "num_children": 0 @@ -535,7 +562,10 @@ def test_collection_with_published_library(self): "modified": 1680674828.0, 'tags': { 'taxonomy': ['Difficulty'], - 'level0': ['Difficulty > Normal'] + 'level0': ['Difficulty > Normal'], + 'level1': [], + 'level2': [], + 'level3': [], }, "published": { "num_children": 1 @@ -579,7 +609,10 @@ def test_draft_container(self, container_name, container_slug, container_type, d "last_published": None, "tags": { "taxonomy": ["Difficulty"], - "level0": ["Difficulty > Normal"] + "level0": ["Difficulty > Normal"], + "level1": [], + "level2": [], + "level3": [], }, # "published" is not set since we haven't published it yet } @@ -627,7 +660,10 @@ def test_published_container(self): "last_published": 1680674828.0, "tags": { "taxonomy": ["Difficulty"], - "level0": ["Difficulty > Normal"] + "level0": ["Difficulty > Normal"], + "level1": [], + "level2": [], + "level3": [], }, "published": { "num_children": 1, @@ -701,7 +737,10 @@ def test_published_container_with_changes(self): "last_published": 1680674828.0, "tags": { "taxonomy": ["Difficulty"], - "level0": ["Difficulty > Normal"] + "level0": ["Difficulty > Normal"], + "level1": [], + "level2": [], + "level3": [], }, "published": { "num_children": 1, From 640dabadafa95a774be82f1f1187c08cdbfbce1c Mon Sep 17 00:00:00 2001 From: Mary Gwozdz Date: Thu, 30 Apr 2026 06:51:35 -0700 Subject: [PATCH 4/8] fix: put back whitespace --- requirements/constraints.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements/constraints.txt b/requirements/constraints.txt index cc5c2e579e13..775df3b74554 100644 --- a/requirements/constraints.txt +++ b/requirements/constraints.txt @@ -65,6 +65,7 @@ numpy<2.0.0 # from openedx-maintainers. Minor and patch versions can roll out automatically. # Issue for unpinning: https://github.com/openedx/edx-platform/issues/35269 openedx-core<2 + # Date: 2023-11-29 # Open AI version 1.0.0 dropped support for openai.ChatCompletion which is currently in use in enterprise. # Issue for unpinning: https://github.com/openedx/edx-platform/issues/35268 From 51afb9144086a1c50bee9d7803c03f7efe1f0e2a Mon Sep 17 00:00:00 2001 From: Mary Gwozdz Date: Thu, 30 Apr 2026 08:31:18 -0700 Subject: [PATCH 5/8] fix: trigger rebuild From 691a4393a68c35afd7092f5df6d203edecdcb7e9 Mon Sep 17 00:00:00 2001 From: Mary Gwozdz Date: Thu, 30 Apr 2026 10:51:30 -0700 Subject: [PATCH 6/8] fix: trigger rebuild From 974b562fbea776e0ee17b9295f6004fb9c378fc8 Mon Sep 17 00:00:00 2001 From: Mary Gwozdz Date: Thu, 30 Apr 2026 11:09:15 -0700 Subject: [PATCH 7/8] fix: trigger rebuild From 4f61142d2c9a22d8d5065ec543f52e9913c6daef Mon Sep 17 00:00:00 2001 From: Braden MacDonald Date: Thu, 30 Apr 2026 14:47:51 -0700 Subject: [PATCH 8/8] docs: expand comment about why we're using this approach --- openedx/core/djangoapps/content/search/documents.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/openedx/core/djangoapps/content/search/documents.py b/openedx/core/djangoapps/content/search/documents.py index a04b84f0d7c6..4623b814d0a0 100644 --- a/openedx/core/djangoapps/content/search/documents.py +++ b/openedx/core/djangoapps/content/search/documents.py @@ -388,8 +388,13 @@ def searchable_doc_tags(object_id: OpaqueKey) -> dict: Fields.tags_level3: [], } if not all_tags: - # Clear out tags in the index when unselecting all tags for the block, otherwise - # it would remain the last value if a cleared Fields.tags field is not included + # Clear out tags in the index when the block has no tags (anymore) + # Note: due to a bug in Meilisearch, just setting `{Fields.tags: {}}` + # does not properly clear previously-set values within the tags field, + # like tags.level0, so we explicitly set `{Field.tags: { level0: [], ... }}` + # etc. to work around that and ensure tags are removed properly. + # In the future, if Meili's bug is fixed, we can perhaps simplify this + # and go back to just setting {Fields.tags: {}}` when there are no tags. return {Fields.tags: result} for obj_tag in all_tags: