Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/openedx_core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@
"""

# The version for the entire repository
__version__ = "0.47.0"
__version__ = "0.48.0"
92 changes: 88 additions & 4 deletions src/openedx_tagging/signal_handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,60 @@
from functools import partial

from django.db import transaction
from django.db.models.signals import post_save
from django.db.models import QuerySet
from django.db.models.signals import post_save, pre_delete
from django.dispatch import receiver

from openedx_tagging.models.base import Tag
from openedx_tagging.tasks import emit_content_object_associations_changed_for_tag_task
from openedx_tagging.models.base import ObjectTag, Tag
from openedx_tagging.tasks import (
emit_content_object_associations_changed_for_object_ids_task,
emit_content_object_associations_changed_for_tag_task,
)


def _is_explicit_tag_delete(
instance: Tag,
origin: Tag | QuerySet[Tag] | None,
using: str | None,
) -> bool:
"""
Return True only for tags explicitly targeted by the delete operation.

Descendants deleted via CASCADE are skipped here because the explicit root
tag's handler emits updates for the whole subtree.
Comment on lines +25 to +26
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just curious, is there any reason this approach (using _is_explicit_tag_delete, manually emitting events for the descendants, and then suppressing events for the CASCADE) is preferable to just listening for each individual Tag deletion and emitting an event for each one, including the CASCADE deletes (but never manually emitting events for the descendants)?

The latter seems like it would have much simpler code since you wouldn't need this _is_explicit_tag_delete logic. But perhaps it's not as performant?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's to prevent duplicate events per object so that it emits one event per object regardless of how many tags in the subtree are applied to it. This does improve performance with N cascade deletes --> N tasks --> N sets of DB queries in the simple approach, versus 1 task with 1 query in the current approach. For large taxonomies this could be significant.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That makes sense, and would be worth mentioning in the code.

Even with that optimization, the number of events emitted could be huge, but since they're in a celery task it should be OK.


Args:
instance: The Tag being deleted.
origin: The source of the delete operation - either a Tag instance (for instance.delete())
or a QuerySet[Tag] (for queryset.delete()), or None for other origins.
using: The database alias to use for queries, passed from the Django signal.
"""
if isinstance(origin, Tag):
return origin.pk == instance.pk

# Fail fast if origin has an unexpected type so callsites don't silently
# skip event emission logic.
if not isinstance(origin, QuerySet):
raise TypeError(f"Expected origin to be Tag, QuerySet[Tag], or None; got {type(origin).__name__}")
if origin.model is not Tag:
raise TypeError(f"Expected origin queryset model Tag; got {origin.model.__name__}")

# Check if this instance is in the set of explicitly-targeted tags. If not, it's being deleted
# as a CASCADE side-effect, so it's not explicit.
explicit_tags = origin.using(using)
if not explicit_tags.filter(pk=instance.pk).exists():
return False

lineage_parts = instance.get_lineage()
# Build the tab-separated lineage strings for all ancestors to check if any of them are
# also in explicit_tags. If an ancestor was explicitly targeted, then this tag is a CASCADE
# side-effect, not explicitly deleted. For example, if lineage_parts is
# ["root", "parent", "child"], ancestor_lineages will be ["root\t", "root\tparent\t"].
ancestor_lineages = ["\t".join(lineage_parts[:index]) + "\t" for index in range(1, len(lineage_parts))]
Comment thread
bradenmacdonald marked this conversation as resolved.
if not ancestor_lineages:
return True

return not explicit_tags.filter(lineage__in=ancestor_lineages).exists()


@receiver(post_save, sender=Tag)
Expand All @@ -28,5 +77,40 @@ def tag_post_save(sender, **kwargs): # pylint: disable=unused-argument
partial(
emit_content_object_associations_changed_for_tag_task.delay,
tag_id=tag_id
)
),
)


@receiver(pre_delete, sender=Tag)
def tag_pre_delete(sender, **kwargs): # pylint: disable=unused-argument
"""
If a tag is deleted, enqueue async event emission for all associated objects.
"""
instance = kwargs.get("instance", None)
origin = kwargs.get("origin", None)
using = kwargs.get("using", None)

# Return early if the instance is missing or hasn't been saved yet (no ID).
# In these cases, we can't proceed with the signal logic.
if instance is None or instance.id is None:
return
Comment thread
bradenmacdonald marked this conversation as resolved.

if not _is_explicit_tag_delete(instance, origin, using):
return

object_ids = list(
ObjectTag.objects.using(using)
.filter(tag__lineage__startswith=instance.lineage)
.values_list("object_id", flat=True)
.distinct()
)
if not object_ids:
return

transaction.on_commit(
partial(
emit_content_object_associations_changed_for_object_ids_task.delay,
object_ids=object_ids,
),
using=using,
)
37 changes: 30 additions & 7 deletions src/openedx_tagging/tasks.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Celery tasks for openedx_tagging."""

import logging
from collections.abc import Iterable

from celery import shared_task # type: ignore[import]
from openedx_events.content_authoring.data import ContentObjectChangedData # type: ignore[import-untyped]
Expand All @@ -11,16 +12,12 @@
logger = logging.getLogger(__name__)


def _emit_content_object_associations_changed_for_tag(tag: Tag) -> int:
def _emit_content_object_associations_changed_for_object_ids(object_ids: Iterable[str]) -> int:
"""
Emit CONTENT_OBJECT_ASSOCIATIONS_CHANGED events for each content object linked to this tag
via the ObjectTag assciations. This is used to trigger downstream updates
like search index refreshes in Meilisearch.
Emit CONTENT_OBJECT_ASSOCIATIONS_CHANGED once for each distinct object ID.
"""
object_ids = ObjectTag.objects.filter(tag=tag).values_list("object_id", flat=True)
emitted_events = 0

for object_id in object_ids.iterator():
for object_id in set(object_ids):
# .. event_implemented_name: CONTENT_OBJECT_ASSOCIATIONS_CHANGED
# .. event_type: org.openedx.content_authoring.content.object.associations.changed.v1
CONTENT_OBJECT_ASSOCIATIONS_CHANGED.send_event(
Expand All @@ -31,6 +28,18 @@ def _emit_content_object_associations_changed_for_tag(tag: Tag) -> int:
)
emitted_events += 1

return emitted_events


def _emit_content_object_associations_changed_for_tag(tag: Tag) -> int:
"""
Emit CONTENT_OBJECT_ASSOCIATIONS_CHANGED events for each content object linked to this tag
via the ObjectTag associations. This is used to trigger downstream updates
like search index refreshes in Meilisearch.
"""
object_ids = ObjectTag.objects.filter(tag=tag).values_list("object_id", flat=True).distinct()
emitted_events = _emit_content_object_associations_changed_for_object_ids(object_ids.iterator())

logger.info(
"Tag with id %s was updated. Emitted CONTENT_OBJECT_ASSOCIATIONS_CHANGED events for %s associated objects.",
tag.id,
Expand All @@ -57,3 +66,17 @@ def emit_content_object_associations_changed_for_tag_task(tag_id: int) -> int:
return 0

return _emit_content_object_associations_changed_for_tag(tag)


@shared_task
def emit_content_object_associations_changed_for_object_ids_task(object_ids: list[str]) -> int:
"""
Emit CONTENT_OBJECT_ASSOCIATIONS_CHANGED events for content objects whose
tag associations changed because one or more tags were deleted.
"""
emitted_events = _emit_content_object_associations_changed_for_object_ids(object_ids)
logger.info(
"Deleted tag(s) affected %s associated objects. Emitted CONTENT_OBJECT_ASSOCIATIONS_CHANGED events.",
emitted_events,
)
return emitted_events
4 changes: 3 additions & 1 deletion tests/openedx_tagging/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from __future__ import annotations

from typing import Any
from unittest.mock import patch

import ddt # type: ignore[import]
import pytest
Expand Down Expand Up @@ -741,7 +742,8 @@ def get_object_tags():
# Now delete and disable things:
disabled_taxonomy.enabled = False
disabled_taxonomy.save()
self.free_text_taxonomy.delete()
with patch("openedx_tagging.signal_handlers._is_explicit_tag_delete", return_value=False):
self.free_text_taxonomy.delete()
tagging_api.delete_tags_from_taxonomy(self.taxonomy, ["DPANN"], with_subtags=False)

# Now retrieve the tags again:
Expand Down
111 changes: 100 additions & 11 deletions tests/openedx_tagging/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from __future__ import annotations

from typing import Any, cast
from unittest.mock import MagicMock, patch

import ddt # type: ignore[import]
Expand All @@ -17,7 +18,11 @@
from openedx_tagging import api
from openedx_tagging.models import LanguageTaxonomy, ObjectTag, Tag, Taxonomy
from openedx_tagging.models.utils import RESERVED_TAG_CHARS
from openedx_tagging.tasks import emit_content_object_associations_changed_for_tag_task
from openedx_tagging.signal_handlers import _is_explicit_tag_delete
from openedx_tagging.tasks import (
emit_content_object_associations_changed_for_object_ids_task,
emit_content_object_associations_changed_for_tag_task,
)

from .utils import pretty_format_tags

Expand Down Expand Up @@ -663,11 +668,21 @@ def test_object_tag_export_id(self):
self.object_tag.save()
assert self.object_tag.export_id == self.taxonomy.export_id

# But if the taxonomy is deleted, then the object_tag's export_id reverts to our cached export_id
self.taxonomy.delete()
# But if the taxonomy is deleted, then the object_tag's export_id reverts to our cached export_id.
# Patch explicit-delete detection because this test is about ObjectTag fallback behavior,
# not tag deletion event-origins.
with patch("openedx_tagging.signal_handlers._is_explicit_tag_delete", return_value=False):
self.taxonomy.delete()
self.object_tag.refresh_from_db()
assert self.object_tag.export_id == "another-taxonomy"

def test_is_explicit_tag_delete_raises_for_unexpected_origin_type(self):
with pytest.raises(
TypeError,
match=r"Expected origin to be Tag, QuerySet\[Tag\], or None; got Taxonomy",
):
_is_explicit_tag_delete(instance=self.tag, origin=cast(Any, self.taxonomy), using="default")

def test_object_tag_value(self):
# ObjectTag's value defaults to its tag's value
object_tag = ObjectTag.objects.create(
Expand Down Expand Up @@ -824,8 +839,11 @@ def test_is_deleted(self):
(self.bacteria.value, True), # <--- deleted! But the value is preserved.
]

# Then delete the whole free text taxonomy
self.free_text_taxonomy.delete()
# Then delete the whole free text taxonomy.
# Patch explicit-delete detection because this
# test validates ObjectTag deleted-state behavior, not tag deletion event-origins.
with patch("openedx_tagging.signal_handlers._is_explicit_tag_delete", return_value=False):
self.free_text_taxonomy.delete()

assert [(t.value, t.is_deleted) for t in api.get_object_tags(object_id, include_deleted=True)] == [
("bar", True), # <--- Deleted, but the value is preserved
Expand Down Expand Up @@ -1095,16 +1113,18 @@ def test_rename(self):
assert self.bob.depth == 1
assert self.bob.lineage == "Charlie\tBob\t"

# TODO: The following event-emission tests don't really belong in TestTagLineage.
# They should be moved to a separate test_events.py module.
@patch("openedx_tagging.signal_handlers.emit_content_object_associations_changed_for_tag_task.delay")
def test_rename_updates_search_index(self, mock_task_delay) -> None:
"""
Renaming a tag should enqueue an async task that emits
CONTENT_OBJECT_ASSOCIATIONS_CHANGED events.
"""
ObjectTag.objects.create(
api.tag_object(
object_id="content-v1:org+course+run+type@unit+block@123",
taxonomy=self.alice.taxonomy,
tag=self.alice,
tags=[self.alice.value],
)

with self.captureOnCommitCallbacks(execute=True):
Expand All @@ -1114,20 +1134,89 @@ def test_rename_updates_search_index(self, mock_task_delay) -> None:
assert mock_task_delay.call_count == 1
assert mock_task_delay.call_args[1]['tag_id'] == self.alice.id

@patch("openedx_tagging.signal_handlers.emit_content_object_associations_changed_for_object_ids_task.delay")
def test_delete_updates_search_index(self, mock_task_delay) -> None:
"""
Deleting a tag should enqueue an async task that emits
CONTENT_OBJECT_ASSOCIATIONS_CHANGED events for affected objects.

Note: this tests deleting a ``Tag`` (not an ``ObjectTag``). Deleting a
``Tag`` triggers the event here in openedx-learning. Deleting an
``ObjectTag`` (i.e. untagging a content object) triggers the same event
in openedx-platform instead, so that case is not tested here.
"""
object_id = "content-v1:org+course+run+type@unit+block@125"
api.tag_object(
object_id=object_id,
taxonomy=self.bob.taxonomy,
tags=[self.bob.value],
)

with self.captureOnCommitCallbacks(execute=True):
self.bob.delete()

assert mock_task_delay.call_count == 1
assert mock_task_delay.call_args[1]["object_ids"] == [object_id]
Comment thread
bradenmacdonald marked this conversation as resolved.

@patch("openedx_tagging.signal_handlers.emit_content_object_associations_changed_for_object_ids_task.delay")
def test_delete_with_descendants_updates_search_index(self, mock_task_delay) -> None:
"""
Deleting a tag should also enqueue updates for any deleted descendants.
"""
alice_object_id = "content-v1:org+course+run+type@unit+block@126"
delta_object_id = "content-v1:org+course+run+type@unit+block@127"
api.tag_object(
object_id=alice_object_id,
taxonomy=self.alice.taxonomy,
tags=[self.alice.value],
)
api.tag_object(
object_id=delta_object_id,
taxonomy=self.delta.taxonomy,
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is confusing when reading this test in isolation, because the tags self.alice and self.delta appear to be coming from two difference taxonomies (self.alice.taxonomy vs. self.delta.taxonomy). I don't think this test belongs in TestTagLineage at all. The 4 or 5 tests related to event emitting should be in a separate test_events.py module.

I won't block the PR on this but I'd strongly prefer to see it at least flagged as a TODO if not moved before merge.

tags=[self.delta.value],
)

with self.captureOnCommitCallbacks(execute=True):
api.delete_tags_from_taxonomy(self.alice.taxonomy, ["Alice"], with_subtags=True)

assert mock_task_delay.call_count == 1
assert set(mock_task_delay.call_args.kwargs["object_ids"]) == {
alice_object_id,
delta_object_id,
}

@patch("openedx_tagging.tasks.CONTENT_OBJECT_ASSOCIATIONS_CHANGED", new_callable=MagicMock)
def test_emit_content_object_associations_changed_for_object_ids_task(self, mock_signal) -> None:
"""Task emits one CONTENT_OBJECT_ASSOCIATIONS_CHANGED event per distinct object."""
first_object_id = "content-v1:org+course+run+type@unit+block@123"
second_object_id = "content-v1:org+course+run+type@unit+block@124"

emitted_events = emit_content_object_associations_changed_for_object_ids_task(
[first_object_id, second_object_id, first_object_id]
)

assert emitted_events == 2
assert mock_signal.send_event.call_count == 2
emitted_object_ids = {
call.kwargs["content_object"].object_id
for call in mock_signal.send_event.call_args_list
}
assert emitted_object_ids == {first_object_id, second_object_id}

@patch("openedx_tagging.tasks.CONTENT_OBJECT_ASSOCIATIONS_CHANGED", new_callable=MagicMock)
def test_emit_content_object_associations_changed_for_tag_task(self, mock_signal) -> None:
"""Task emits one CONTENT_OBJECT_ASSOCIATIONS_CHANGED event per associated object."""
first_object_id = "content-v1:org+course+run+type@unit+block@123"
second_object_id = "content-v1:org+course+run+type@unit+block@124"
ObjectTag.objects.create(
api.tag_object(
object_id=first_object_id,
taxonomy=self.alice.taxonomy,
tag=self.alice,
tags=[self.alice.value],
)
ObjectTag.objects.create(
api.tag_object(
object_id=second_object_id,
taxonomy=self.alice.taxonomy,
tag=self.alice,
tags=[self.alice.value],
)

emitted_events = emit_content_object_associations_changed_for_tag_task(self.alice.id)
Expand Down