diff --git a/openedx/core/djangoapps/content/search/documents.py b/openedx/core/djangoapps/content/search/documents.py index 678281191eeb..4623b814d0a0 100644 --- a/openedx/core/djangoapps/content/search/documents.py +++ b/openedx/core/djangoapps/content/search/documents.py @@ -380,15 +380,23 @@ def searchable_doc_tags(object_id: OpaqueKey) -> dict: # if we used get_all_object_tags() to load all the tags for the library in a single query rather than loading the # tags for each component separately. all_tags = tagging_api.get_object_tags(str(object_id)).all() - if not all_tags: - # Clear out tags in the index when unselecting all tags for the block, otherwise - # it would remain the last value if a cleared Fields.tags field is not included - return {Fields.tags: {}} result = { Fields.tags_taxonomy: [], Fields.tags_level0: [], - # ... other levels added as needed + Fields.tags_level1: [], + Fields.tags_level2: [], + Fields.tags_level3: [], } + if not all_tags: + # Clear out tags in the index when the block has no tags (anymore) + # Note: due to a bug in Meilisearch, just setting `{Fields.tags: {}}` + # does not properly clear previously-set values within the tags field, + # like tags.level0, so we explicitly set `{Field.tags: { level0: [], ... }}` + # etc. to work around that and ensure tags are removed properly. + # In the future, if Meili's bug is fixed, we can perhaps simplify this + # and go back to just setting {Fields.tags: {}}` when there are no tags. + return {Fields.tags: result} + for obj_tag in all_tags: # Add the taxonomy name: if obj_tag.taxonomy.name not in result[Fields.tags_taxonomy]: @@ -594,8 +602,8 @@ def searchable_doc_for_container( found using faceted search. If no container is found for the given container key, the returned document - will contain only basic information derived from the container key, and no - Fields.type value will be included in the returned dict. + will contain only basic information derived from the container key, and some + fields like Fields.display_name will be missing from the returned dict. """ doc = { Fields.id: meili_id_from_opaque_key(container_key), diff --git a/openedx/core/djangoapps/content/search/tests/test_api.py b/openedx/core/djangoapps/content/search/tests/test_api.py index e28d62f682dc..3fd6859cafd8 100644 --- a/openedx/core/djangoapps/content/search/tests/test_api.py +++ b/openedx/core/djangoapps/content/search/tests/test_api.py @@ -33,6 +33,13 @@ SearchAccess = {} STUDIO_SEARCH_ENDPOINT_URL = "/api/content_search/v2/studio/" +EMPTY_TAGS = { + "taxonomy": [], + "level0": [], + "level1": [], + "level2": [], + "level3": [], +} @ddt.ddt @@ -342,29 +349,29 @@ def test_reindex_meilisearch(self, mock_meilisearch) -> None: # Add tags field to doc, since reindex calls includes tags doc_sequential = copy.deepcopy(self.doc_sequential) - doc_sequential["tags"] = {} + doc_sequential["tags"] = copy.deepcopy(EMPTY_TAGS) doc_vertical = copy.deepcopy(self.doc_vertical) - doc_vertical["tags"] = {} + doc_vertical["tags"] = copy.deepcopy(EMPTY_TAGS) doc_problem1 = copy.deepcopy(self.doc_problem1) - doc_problem1["tags"] = {} + doc_problem1["tags"] = copy.deepcopy(EMPTY_TAGS) doc_problem1["collections"] = {'display_name': [], 'key': []} doc_problem1["units"] = {'display_name': [], 'key': []} doc_problem2 = copy.deepcopy(self.doc_problem2) - doc_problem2["tags"] = {} + doc_problem2["tags"] = copy.deepcopy(EMPTY_TAGS) doc_problem2["collections"] = {'display_name': [], 'key': []} doc_problem2["units"] = {'display_name': [], 'key': []} doc_collection = copy.deepcopy(self.collection_dict) - doc_collection["tags"] = {} + doc_collection["tags"] = copy.deepcopy(EMPTY_TAGS) doc_unit = copy.deepcopy(self.unit_dict) - doc_unit["tags"] = {} + doc_unit["tags"] = copy.deepcopy(EMPTY_TAGS) doc_unit["collections"] = {'display_name': [], 'key': []} doc_unit["subsections"] = {'display_name': ['Subsection 1'], 'key': ['lct:org1:lib:subsection:subsection-1']} doc_subsection = copy.deepcopy(self.subsection_dict) - doc_subsection["tags"] = {} + doc_subsection["tags"] = copy.deepcopy(EMPTY_TAGS) doc_subsection["collections"] = {'display_name': [], 'key': []} doc_subsection["sections"] = {'display_name': ['Section 1'], 'key': ['lct:org1:lib:section:section-1']} doc_section = copy.deepcopy(self.section_dict) - doc_section["tags"] = {} + doc_section["tags"] = copy.deepcopy(EMPTY_TAGS) doc_section["collections"] = {'display_name': [], 'key': []} api.rebuild_index() @@ -384,29 +391,29 @@ def test_reindex_meilisearch_incremental(self, mock_meilisearch) -> None: # Add tags field to doc, since reindex calls includes tags doc_sequential = copy.deepcopy(self.doc_sequential) - doc_sequential["tags"] = {} + doc_sequential["tags"] = copy.deepcopy(EMPTY_TAGS) doc_vertical = copy.deepcopy(self.doc_vertical) - doc_vertical["tags"] = {} + doc_vertical["tags"] = copy.deepcopy(EMPTY_TAGS) doc_problem1 = copy.deepcopy(self.doc_problem1) - doc_problem1["tags"] = {} + doc_problem1["tags"] = copy.deepcopy(EMPTY_TAGS) doc_problem1["collections"] = {"display_name": [], "key": []} doc_problem1["units"] = {'display_name': [], 'key': []} doc_problem2 = copy.deepcopy(self.doc_problem2) - doc_problem2["tags"] = {} + doc_problem2["tags"] = copy.deepcopy(EMPTY_TAGS) doc_problem2["collections"] = {"display_name": [], "key": []} doc_problem2["units"] = {'display_name': [], 'key': []} doc_collection = copy.deepcopy(self.collection_dict) - doc_collection["tags"] = {} + doc_collection["tags"] = copy.deepcopy(EMPTY_TAGS) doc_unit = copy.deepcopy(self.unit_dict) - doc_unit["tags"] = {} + doc_unit["tags"] = copy.deepcopy(EMPTY_TAGS) doc_unit["collections"] = {"display_name": [], "key": []} doc_unit["subsections"] = {'display_name': ['Subsection 1'], 'key': ['lct:org1:lib:subsection:subsection-1']} doc_subsection = copy.deepcopy(self.subsection_dict) - doc_subsection["tags"] = {} + doc_subsection["tags"] = copy.deepcopy(EMPTY_TAGS) doc_subsection["collections"] = {'display_name': [], 'key': []} doc_subsection["sections"] = {'display_name': ['Section 1'], 'key': ['lct:org1:lib:section:section-1']} doc_section = copy.deepcopy(self.section_dict) - doc_section["tags"] = {} + doc_section["tags"] = copy.deepcopy(EMPTY_TAGS) doc_section["collections"] = {'display_name': [], 'key': []} api.rebuild_index(incremental=True) @@ -524,11 +531,11 @@ def test_reindex_meilisearch_library_block_error(self, mock_meilisearch) -> None # Add tags field to doc, since reindex calls includes tags doc_sequential = copy.deepcopy(self.doc_sequential) - doc_sequential["tags"] = {} + doc_sequential["tags"] = copy.deepcopy(EMPTY_TAGS) doc_vertical = copy.deepcopy(self.doc_vertical) - doc_vertical["tags"] = {} + doc_vertical["tags"] = copy.deepcopy(EMPTY_TAGS) doc_problem2 = copy.deepcopy(self.doc_problem2) - doc_problem2["tags"] = {} + doc_problem2["tags"] = copy.deepcopy(EMPTY_TAGS) doc_problem2["collections"] = {'display_name': [], 'key': []} doc_problem2["units"] = {'display_name': [], 'key': []} @@ -611,14 +618,20 @@ def test_index_xblock_tags(self, mock_meilisearch) -> None: "id": self.doc_sequential["id"], "tags": { 'taxonomy': ['A'], - 'level0': ['A > one', 'A > two'] + 'level0': ['A > one', 'A > two'], + 'level1': [], + 'level2': [], + 'level3': [], } } doc_sequential_with_tags2 = { "id": self.doc_sequential["id"], "tags": { 'taxonomy': ['A', 'B'], - 'level0': ['A > one', 'A > two', 'B > four', 'B > three'] + 'level0': ['A > one', 'A > two', 'B > four', 'B > three'], + 'level1': [], + 'level2': [], + 'level3': [], } } @@ -631,6 +644,41 @@ def test_index_xblock_tags(self, mock_meilisearch) -> None: any_order=True, ) + @override_settings(MEILISEARCH_ENABLED=True) + def test_remove_xblock_tag_clears_index_tags(self, mock_meilisearch) -> None: + """ + Test that removing tags from an XBlock clears tag facets in the index. + """ + # Add a tag first so we can verify it is later cleared. + tagging_api.tag_object(str(self.sequential.usage_key), self.taxonomyA, ["one"]) + + # Remove all tags for taxonomyA from this object. + tagging_api.tag_object(str(self.sequential.usage_key), self.taxonomyA, []) + + doc_with_tag = { + "id": self.doc_sequential["id"], + "tags": { + "taxonomy": ["A"], + "level0": ["A > one"], + "level1": [], + "level2": [], + "level3": [], + }, + } + doc_without_tags = { + "id": self.doc_sequential["id"], + "tags": copy.deepcopy(EMPTY_TAGS), + } + + assert mock_meilisearch.return_value.index.return_value.update_documents.call_count == 2 + mock_meilisearch.return_value.index.return_value.update_documents.assert_has_calls( + [ + call([doc_with_tag]), + call([doc_without_tags]), + ], + any_order=True, + ) + @override_settings(MEILISEARCH_ENABLED=True) def test_delete_index_xblock(self, mock_meilisearch) -> None: """ @@ -670,14 +718,20 @@ def test_index_library_block_tags(self, mock_meilisearch) -> None: "id": self.doc_problem1["id"], "tags": { 'taxonomy': ['A'], - 'level0': ['A > one', 'A > two'] + 'level0': ['A > one', 'A > two'], + 'level1': [], + 'level2': [], + 'level3': [], } } doc_problem_with_tags2 = { "id": self.doc_problem1["id"], "tags": { 'taxonomy': ['A', 'B'], - 'level0': ['A > one', 'A > two', 'B > four', 'B > three'] + 'level0': ['A > one', 'A > two', 'B > four', 'B > three'], + 'level1': [], + 'level2': [], + 'level3': [], } } @@ -876,14 +930,20 @@ def test_index_tags_in_collections(self, mock_meilisearch) -> None: "id": "lib-collectionorg1libmycol-5b647617", "tags": { 'taxonomy': ['A'], - 'level0': ['A > one', 'A > two'] + 'level0': ['A > one', 'A > two'], + 'level1': [], + 'level2': [], + 'level3': [], } } doc_collection_with_tags2 = { "id": "lib-collectionorg1libmycol-5b647617", "tags": { 'taxonomy': ['A', 'B'], - 'level0': ['A > one', 'A > two', 'B > four', 'B > three'] + 'level0': ['A > one', 'A > two', 'B > four', 'B > three'], + 'level1': [], + 'level2': [], + 'level3': [], } } @@ -1148,14 +1208,20 @@ def test_index_tags_in_containers(self, container_type, container_id, mock_meili "id": container_id, "tags": { 'taxonomy': ['A'], - 'level0': ['A > one', 'A > two'] + 'level0': ['A > one', 'A > two'], + 'level1': [], + 'level2': [], + 'level3': [], } } doc_unit_with_tags2 = { "id": container_id, "tags": { 'taxonomy': ['A', 'B'], - 'level0': ['A > one', 'A > two', 'B > four', 'B > three'] + 'level0': ['A > one', 'A > two', 'B > four', 'B > three'], + 'level1': [], + 'level2': [], + 'level3': [], } } diff --git a/openedx/core/djangoapps/content/search/tests/test_documents.py b/openedx/core/djangoapps/content/search/tests/test_documents.py index ee4a1d613f61..5bba56d4b7df 100644 --- a/openedx/core/djangoapps/content/search/tests/test_documents.py +++ b/openedx/core/djangoapps/content/search/tests/test_documents.py @@ -229,6 +229,9 @@ def test_problem_block(self): "tags": { "taxonomy": ["Difficulty"], "level0": ["Difficulty > Easy"], + "level1": [], + "level2": [], + "level3": [], }, } @@ -276,6 +279,7 @@ def test_html_block(self): "level0": ["Difficulty > Normal", "Subject > Hypertext", "Subject > Linguistics"], "level1": ["Subject > Hypertext > Jump Links", "Subject > Linguistics > Asian Languages"], "level2": ["Subject > Linguistics > Asian Languages > Chinese"], + "level3": [], }, } @@ -285,7 +289,9 @@ def test_video_block_untagged(self): """ block_usage_key = self.toy_course_key.make_usage_key("video", "Welcome") block = self.store.get_item(block_usage_key) - doc = searchable_doc_for_course_block(block) + doc = {} + doc.update(searchable_doc_for_course_block(block)) + doc.update(searchable_doc_tags(block.usage_key)) assert doc == { "id": "block-v1edxtoy2012_falltypevideoblockwelcome-0c9fd626", "type": "course_block", @@ -307,7 +313,13 @@ def test_video_block_untagged(self): ], "content": {}, "modified": self.created_date.timestamp(), - # This video has no tags. + "tags": { + "taxonomy": [], + "level0": [], + "level1": [], + "level2": [], + "level3": [], + }, } def test_html_library_block(self): @@ -346,6 +358,9 @@ def test_html_library_block(self): "tags": { "taxonomy": ["Difficulty"], "level0": ["Difficulty > Normal"], + "level1": [], + "level2": [], + "level3": [], }, "publish_status": "never", } @@ -385,6 +400,9 @@ def test_html_published_library_block(self): "tags": { "taxonomy": ["Difficulty"], "level0": ["Difficulty > Normal"], + "level1": [], + "level2": [], + "level3": [], }, 'published': {'display_name': 'Text'}, "publish_status": "published", @@ -427,6 +445,9 @@ def test_html_published_library_block(self): "tags": { "taxonomy": ["Difficulty"], "level0": ["Difficulty > Normal"], + "level1": [], + "level2": [], + "level3": [], }, "published": {"display_name": "Text"}, "publish_status": "published", @@ -467,6 +488,9 @@ def test_html_published_library_block(self): "tags": { "taxonomy": ["Difficulty"], "level0": ["Difficulty > Normal"], + "level1": [], + "level2": [], + "level3": [], }, "published": { "display_name": "Text 2", @@ -506,7 +530,10 @@ def test_collection_with_library(self): "modified": 1680674828.0, 'tags': { 'taxonomy': ['Difficulty'], - 'level0': ['Difficulty > Normal'] + 'level0': ['Difficulty > Normal'], + 'level1': [], + 'level2': [], + 'level3': [], }, "published": { "num_children": 0 @@ -535,7 +562,10 @@ def test_collection_with_published_library(self): "modified": 1680674828.0, 'tags': { 'taxonomy': ['Difficulty'], - 'level0': ['Difficulty > Normal'] + 'level0': ['Difficulty > Normal'], + 'level1': [], + 'level2': [], + 'level3': [], }, "published": { "num_children": 1 @@ -579,7 +609,10 @@ def test_draft_container(self, container_name, container_slug, container_type, d "last_published": None, "tags": { "taxonomy": ["Difficulty"], - "level0": ["Difficulty > Normal"] + "level0": ["Difficulty > Normal"], + "level1": [], + "level2": [], + "level3": [], }, # "published" is not set since we haven't published it yet } @@ -627,7 +660,10 @@ def test_published_container(self): "last_published": 1680674828.0, "tags": { "taxonomy": ["Difficulty"], - "level0": ["Difficulty > Normal"] + "level0": ["Difficulty > Normal"], + "level1": [], + "level2": [], + "level3": [], }, "published": { "num_children": 1, @@ -701,7 +737,10 @@ def test_published_container_with_changes(self): "last_published": 1680674828.0, "tags": { "taxonomy": ["Difficulty"], - "level0": ["Difficulty > Normal"] + "level0": ["Difficulty > Normal"], + "level1": [], + "level2": [], + "level3": [], }, "published": { "num_children": 1,