From 44983b78348dc642cb1398d0eef5036e269921e5 Mon Sep 17 00:00:00 2001 From: Kim Gustyr Date: Tue, 26 May 2026 15:00:27 +0100 Subject: [PATCH 1/4] fix(Segment membership): Zero out (segment, env) pairs that stopped matching `refresh_project_segment_counts` builds the upsert list from `compute_segment_counts_for_project`, which only returns pairs with at least one match. Combined with `bulk_create(update_conflicts=True)` -- which only touches rows it's given -- a pair that previously matched but no longer does keeps the stale non-zero count forever. Saw this in practice on staging: editing a segment rule so that a previously-matching env dropped to zero left the old count visible in the UI through the next refresh cycle. Append explicit zero-count rows for every (segment, env) pair the project already has in the table but didn't appear in this run's result. The next bulk_create then drives those down to zero. Pairs that have never matched stay absent (no row pre-population). Log the zeroed-row count alongside the existing membership_counts__count on `refresh.project.completed` so this path is observable. beep boop --- api/segment_membership/tasks.py | 24 ++++++- .../test_unit_segment_membership_tasks.py | 62 +++++++++++++++++++ .../observability/_events-catalogue.md | 3 +- 3 files changed, 87 insertions(+), 2 deletions(-) diff --git a/api/segment_membership/tasks.py b/api/segment_membership/tasks.py index 13c056127948..08d83156a5ac 100644 --- a/api/segment_membership/tasks.py +++ b/api/segment_membership/tasks.py @@ -161,8 +161,29 @@ def refresh_project_segment_counts(project_id: int) -> None: now = timezone.now() for m in membership_counts: m.last_synced_at = now + + # Pairs that previously matched but no longer do (rule edited, traits + # drifted, identities deleted) won't appear in `membership_counts`. + # `bulk_create(update_conflicts=True)` only touches rows it's handed, + # so the stale count would linger. Append explicit zeros for those + # missing pairs so the next refresh writes them down to zero. + new_pairs = {(m.segment_id, m.environment_id) for m in membership_counts} + existing_pairs = SegmentMembershipCount.objects.filter( + segment__project=project, + ).values_list("segment_id", "environment_id") + zeroed_counts = [ + SegmentMembershipCount( + segment_id=segment_id, + environment_id=environment_id, + count=0, + last_synced_at=now, + ) + for segment_id, environment_id in existing_pairs + if (segment_id, environment_id) not in new_pairs + ] + SegmentMembershipCount.objects.bulk_create( - membership_counts, + membership_counts + zeroed_counts, update_conflicts=True, unique_fields=["segment", "environment"], update_fields=["count", "last_synced_at"], @@ -171,4 +192,5 @@ def refresh_project_segment_counts(project_id: int) -> None: "refresh.project.completed", project__id=project_id, membership_counts__count=len(membership_counts), + zeroed_counts__count=len(zeroed_counts), ) diff --git a/api/tests/unit/segment_membership/test_unit_segment_membership_tasks.py b/api/tests/unit/segment_membership/test_unit_segment_membership_tasks.py index 9a57ae884f2a..ee75a4701411 100644 --- a/api/tests/unit/segment_membership/test_unit_segment_membership_tasks.py +++ b/api/tests/unit/segment_membership/test_unit_segment_membership_tasks.py @@ -1,5 +1,6 @@ from unittest.mock import MagicMock +from django.utils import timezone from pytest_django.fixtures import SettingsWrapper from pytest_mock import MockerFixture from pytest_structlog import StructuredLogCapture @@ -295,3 +296,64 @@ def test_refresh_project_segment_counts__counts_returned__upserts_per_env_rows( f":project_{project.id}" ) ) + + +def test_refresh_project_segment_counts__previously_matching_pair_drops_to_zero__row_zeroed( + mocker: MockerFixture, + settings: SettingsWrapper, + project: Project, + environment: Environment, + segment: Segment, + enable_features: EnableFeaturesFixture, +) -> None: + # Given a prior refresh that landed a non-zero count for (segment, env) + enable_features("segment_membership_inspection") + settings.CLICKHOUSE_ENABLED = True + SegmentMembershipCount.objects.create( + segment=segment, + environment=environment, + count=15, + last_synced_at=timezone.now(), + ) + cursor = MagicMock() + open_cursor = mocker.patch.object(tasks, "open_clickhouse_cursor") + open_cursor.return_value.__enter__.return_value = cursor + # ... and a new compute that returns no matches for the same pair (the + # rule was edited, the identity set drifted, etc.). + mocker.patch.object(tasks, "compute_segment_counts_for_project", return_value=[]) + + # When + refresh_project_segment_counts(project.id) + + # Then the stale row is reset to zero rather than left at 15. + membership = SegmentMembershipCount.objects.get( + segment=segment, environment=environment + ) + assert membership.count == 0 + assert membership.last_synced_at is not None + + +def test_refresh_project_segment_counts__never_matched_pair__no_row_written( + mocker: MockerFixture, + settings: SettingsWrapper, + project: Project, + environment: Environment, + segment: Segment, + enable_features: EnableFeaturesFixture, +) -> None: + # Given a project with no prior membership rows + enable_features("segment_membership_inspection") + settings.CLICKHOUSE_ENABLED = True + cursor = MagicMock() + open_cursor = mocker.patch.object(tasks, "open_clickhouse_cursor") + open_cursor.return_value.__enter__.return_value = cursor + mocker.patch.object(tasks, "compute_segment_counts_for_project", return_value=[]) + + # When + refresh_project_segment_counts(project.id) + + # Then no row is written: the zero-fill only resets previously-matching + # pairs, it doesn't pre-populate every (segment, env) combination. + assert not SegmentMembershipCount.objects.filter( + segment=segment, environment=environment + ).exists() diff --git a/docs/docs/deployment-self-hosting/observability/_events-catalogue.md b/docs/docs/deployment-self-hosting/observability/_events-catalogue.md index 2ddb52d2f39b..8f10a0a51a02 100644 --- a/docs/docs/deployment-self-hosting/observability/_events-catalogue.md +++ b/docs/docs/deployment-self-hosting/observability/_events-catalogue.md @@ -368,11 +368,12 @@ Attributes: ### `segment_membership.refresh.project.completed` Logged at `info` from: - - `api/segment_membership/tasks.py:170` + - `api/segment_membership/tasks.py:191` Attributes: - `membership_counts.count` - `project.id` + - `zeroed_counts.count` ### `segment_membership.refresh.project.failed` From b60fd51ea7b82e383a1b58ce2c3724c16dcbd766 Mon Sep 17 00:00:00 2001 From: Kim Gustyr Date: Tue, 26 May 2026 15:15:49 +0100 Subject: [PATCH 2/4] refactor(Segment membership): Skip already-zeroed rows on refresh Filter `existing_pairs` by `count__gt=0` so steady-state refreshes don't re-write zeros over zeros. Per gemini-code-assist on #7600. beep boop --- api/segment_membership/tasks.py | 3 ++ .../test_unit_segment_membership_tasks.py | 35 +++++++++++++++++++ .../observability/_events-catalogue.md | 2 +- 3 files changed, 39 insertions(+), 1 deletion(-) diff --git a/api/segment_membership/tasks.py b/api/segment_membership/tasks.py index 08d83156a5ac..baacc3a842e3 100644 --- a/api/segment_membership/tasks.py +++ b/api/segment_membership/tasks.py @@ -168,8 +168,11 @@ def refresh_project_segment_counts(project_id: int) -> None: # so the stale count would linger. Append explicit zeros for those # missing pairs so the next refresh writes them down to zero. new_pairs = {(m.segment_id, m.environment_id) for m in membership_counts} + # `count__gt=0` keeps steady-state refreshes off rows that are already + # zeroed -- writing zero over zero on every run is just churn. existing_pairs = SegmentMembershipCount.objects.filter( segment__project=project, + count__gt=0, ).values_list("segment_id", "environment_id") zeroed_counts = [ SegmentMembershipCount( diff --git a/api/tests/unit/segment_membership/test_unit_segment_membership_tasks.py b/api/tests/unit/segment_membership/test_unit_segment_membership_tasks.py index ee75a4701411..249283b35545 100644 --- a/api/tests/unit/segment_membership/test_unit_segment_membership_tasks.py +++ b/api/tests/unit/segment_membership/test_unit_segment_membership_tasks.py @@ -1,3 +1,4 @@ +from datetime import timedelta from unittest.mock import MagicMock from django.utils import timezone @@ -333,6 +334,40 @@ def test_refresh_project_segment_counts__previously_matching_pair_drops_to_zero_ assert membership.last_synced_at is not None +def test_refresh_project_segment_counts__already_zeroed_pair__not_rewritten( + mocker: MockerFixture, + settings: SettingsWrapper, + project: Project, + environment: Environment, + segment: Segment, + enable_features: EnableFeaturesFixture, +) -> None: + # Given a row that's already at zero from a previous refresh + enable_features("segment_membership_inspection") + settings.CLICKHOUSE_ENABLED = True + zeroed_at = timezone.now() - timedelta(hours=1) + SegmentMembershipCount.objects.create( + segment=segment, + environment=environment, + count=0, + last_synced_at=zeroed_at, + ) + cursor = MagicMock() + open_cursor = mocker.patch.object(tasks, "open_clickhouse_cursor") + open_cursor.return_value.__enter__.return_value = cursor + mocker.patch.object(tasks, "compute_segment_counts_for_project", return_value=[]) + + # When a refresh runs that still produces zero matches + refresh_project_segment_counts(project.id) + + # Then the row is left alone -- zeroing zero again is just write churn. + membership = SegmentMembershipCount.objects.get( + segment=segment, environment=environment + ) + assert membership.count == 0 + assert membership.last_synced_at == zeroed_at + + def test_refresh_project_segment_counts__never_matched_pair__no_row_written( mocker: MockerFixture, settings: SettingsWrapper, diff --git a/docs/docs/deployment-self-hosting/observability/_events-catalogue.md b/docs/docs/deployment-self-hosting/observability/_events-catalogue.md index 8f10a0a51a02..191e46e0a960 100644 --- a/docs/docs/deployment-self-hosting/observability/_events-catalogue.md +++ b/docs/docs/deployment-self-hosting/observability/_events-catalogue.md @@ -368,7 +368,7 @@ Attributes: ### `segment_membership.refresh.project.completed` Logged at `info` from: - - `api/segment_membership/tasks.py:191` + - `api/segment_membership/tasks.py:194` Attributes: - `membership_counts.count` From 8d259ad289e990b83a972cf2267ffd9334e7d4b2 Mon Sep 17 00:00:00 2001 From: Kim Gustyr Date: Tue, 26 May 2026 15:47:54 +0100 Subject: [PATCH 3/4] refactor(Segment membership): Delete pairs that stopped matching MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per matthewelwell on #7600: zero-out rows just for the next refresh to read them isn't useful — we're not surfacing history anywhere — so drop them outright. The frontend already renders no chip when a membership row is absent, which is the same UX the explicit zero was reaching for. beep boop --- api/segment_membership/tasks.py | 35 ++++++------- .../test_unit_segment_membership_tasks.py | 50 +++---------------- .../observability/_events-catalogue.md | 4 +- 3 files changed, 24 insertions(+), 65 deletions(-) diff --git a/api/segment_membership/tasks.py b/api/segment_membership/tasks.py index baacc3a842e3..314880cae28e 100644 --- a/api/segment_membership/tasks.py +++ b/api/segment_membership/tasks.py @@ -162,31 +162,26 @@ def refresh_project_segment_counts(project_id: int) -> None: for m in membership_counts: m.last_synced_at = now - # Pairs that previously matched but no longer do (rule edited, traits - # drifted, identities deleted) won't appear in `membership_counts`. - # `bulk_create(update_conflicts=True)` only touches rows it's handed, - # so the stale count would linger. Append explicit zeros for those - # missing pairs so the next refresh writes them down to zero. + # Drop pairs that stopped matching this run so the next refresh + # doesn't carry stale counts forward. new_pairs = {(m.segment_id, m.environment_id) for m in membership_counts} - # `count__gt=0` keeps steady-state refreshes off rows that are already - # zeroed -- writing zero over zero on every run is just churn. - existing_pairs = SegmentMembershipCount.objects.filter( - segment__project=project, - count__gt=0, - ).values_list("segment_id", "environment_id") - zeroed_counts = [ - SegmentMembershipCount( - segment_id=segment_id, - environment_id=environment_id, - count=0, - last_synced_at=now, + stale_ids = [ + pk + for pk, segment_id, environment_id in ( + SegmentMembershipCount.objects.filter( + segment__project=project + ).values_list("id", "segment_id", "environment_id") ) - for segment_id, environment_id in existing_pairs if (segment_id, environment_id) not in new_pairs ] + stale_deleted, _ = ( + SegmentMembershipCount.objects.filter(id__in=stale_ids).delete() + if stale_ids + else (0, {}) + ) SegmentMembershipCount.objects.bulk_create( - membership_counts + zeroed_counts, + membership_counts, update_conflicts=True, unique_fields=["segment", "environment"], update_fields=["count", "last_synced_at"], @@ -195,5 +190,5 @@ def refresh_project_segment_counts(project_id: int) -> None: "refresh.project.completed", project__id=project_id, membership_counts__count=len(membership_counts), - zeroed_counts__count=len(zeroed_counts), + stale_counts__count=stale_deleted, ) diff --git a/api/tests/unit/segment_membership/test_unit_segment_membership_tasks.py b/api/tests/unit/segment_membership/test_unit_segment_membership_tasks.py index 249283b35545..3eff7ba86c38 100644 --- a/api/tests/unit/segment_membership/test_unit_segment_membership_tasks.py +++ b/api/tests/unit/segment_membership/test_unit_segment_membership_tasks.py @@ -1,4 +1,3 @@ -from datetime import timedelta from unittest.mock import MagicMock from django.utils import timezone @@ -299,7 +298,7 @@ def test_refresh_project_segment_counts__counts_returned__upserts_per_env_rows( ) -def test_refresh_project_segment_counts__previously_matching_pair_drops_to_zero__row_zeroed( +def test_refresh_project_segment_counts__previously_matching_pair_drops_to_zero__row_deleted( mocker: MockerFixture, settings: SettingsWrapper, project: Project, @@ -326,46 +325,11 @@ def test_refresh_project_segment_counts__previously_matching_pair_drops_to_zero_ # When refresh_project_segment_counts(project.id) - # Then the stale row is reset to zero rather than left at 15. - membership = SegmentMembershipCount.objects.get( - segment=segment, environment=environment - ) - assert membership.count == 0 - assert membership.last_synced_at is not None - - -def test_refresh_project_segment_counts__already_zeroed_pair__not_rewritten( - mocker: MockerFixture, - settings: SettingsWrapper, - project: Project, - environment: Environment, - segment: Segment, - enable_features: EnableFeaturesFixture, -) -> None: - # Given a row that's already at zero from a previous refresh - enable_features("segment_membership_inspection") - settings.CLICKHOUSE_ENABLED = True - zeroed_at = timezone.now() - timedelta(hours=1) - SegmentMembershipCount.objects.create( - segment=segment, - environment=environment, - count=0, - last_synced_at=zeroed_at, - ) - cursor = MagicMock() - open_cursor = mocker.patch.object(tasks, "open_clickhouse_cursor") - open_cursor.return_value.__enter__.return_value = cursor - mocker.patch.object(tasks, "compute_segment_counts_for_project", return_value=[]) - - # When a refresh runs that still produces zero matches - refresh_project_segment_counts(project.id) - - # Then the row is left alone -- zeroing zero again is just write churn. - membership = SegmentMembershipCount.objects.get( + # Then the stale row is gone -- pairs that no longer match drop out of + # the table entirely rather than lingering at the previous count. + assert not SegmentMembershipCount.objects.filter( segment=segment, environment=environment - ) - assert membership.count == 0 - assert membership.last_synced_at == zeroed_at + ).exists() def test_refresh_project_segment_counts__never_matched_pair__no_row_written( @@ -387,8 +351,8 @@ def test_refresh_project_segment_counts__never_matched_pair__no_row_written( # When refresh_project_segment_counts(project.id) - # Then no row is written: the zero-fill only resets previously-matching - # pairs, it doesn't pre-populate every (segment, env) combination. + # Then no row is written: refresh upserts matches, drops misses, and + # leaves never-matched pairs untouched. assert not SegmentMembershipCount.objects.filter( segment=segment, environment=environment ).exists() diff --git a/docs/docs/deployment-self-hosting/observability/_events-catalogue.md b/docs/docs/deployment-self-hosting/observability/_events-catalogue.md index 191e46e0a960..580c78d14ee1 100644 --- a/docs/docs/deployment-self-hosting/observability/_events-catalogue.md +++ b/docs/docs/deployment-self-hosting/observability/_events-catalogue.md @@ -368,12 +368,12 @@ Attributes: ### `segment_membership.refresh.project.completed` Logged at `info` from: - - `api/segment_membership/tasks.py:194` + - `api/segment_membership/tasks.py:189` Attributes: - `membership_counts.count` - `project.id` - - `zeroed_counts.count` + - `stale_counts.count` ### `segment_membership.refresh.project.failed` From 5da46f1f531d3063b0e0aa4d64e8b469211f8ce1 Mon Sep 17 00:00:00 2001 From: Kim Gustyr Date: Tue, 26 May 2026 17:00:30 +0100 Subject: [PATCH 4/4] refactor(Segment membership): Trim refresh-stale-delete Per matthewelwell on #7600: comment was carrying old context, and the `if stale_ids else (0, {})` guard is unnecessary since `.delete()` on an empty `id__in` filter already returns `(0, {})` without issuing SQL. beep boop --- api/segment_membership/tasks.py | 10 +++------- .../observability/_events-catalogue.md | 2 +- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/api/segment_membership/tasks.py b/api/segment_membership/tasks.py index 314880cae28e..704ef7782ab6 100644 --- a/api/segment_membership/tasks.py +++ b/api/segment_membership/tasks.py @@ -162,8 +162,6 @@ def refresh_project_segment_counts(project_id: int) -> None: for m in membership_counts: m.last_synced_at = now - # Drop pairs that stopped matching this run so the next refresh - # doesn't carry stale counts forward. new_pairs = {(m.segment_id, m.environment_id) for m in membership_counts} stale_ids = [ pk @@ -174,11 +172,9 @@ def refresh_project_segment_counts(project_id: int) -> None: ) if (segment_id, environment_id) not in new_pairs ] - stale_deleted, _ = ( - SegmentMembershipCount.objects.filter(id__in=stale_ids).delete() - if stale_ids - else (0, {}) - ) + stale_deleted, _ = SegmentMembershipCount.objects.filter( + id__in=stale_ids, + ).delete() SegmentMembershipCount.objects.bulk_create( membership_counts, diff --git a/docs/docs/deployment-self-hosting/observability/_events-catalogue.md b/docs/docs/deployment-self-hosting/observability/_events-catalogue.md index 580c78d14ee1..eb0ad2df9312 100644 --- a/docs/docs/deployment-self-hosting/observability/_events-catalogue.md +++ b/docs/docs/deployment-self-hosting/observability/_events-catalogue.md @@ -368,7 +368,7 @@ Attributes: ### `segment_membership.refresh.project.completed` Logged at `info` from: - - `api/segment_membership/tasks.py:189` + - `api/segment_membership/tasks.py:185` Attributes: - `membership_counts.count`