Skip to content

Commit 7f72a70

Browse files
authored
ref(grouping): Add training mode for similarity model rollout (#102623)
Introduce training_mode parameter to send dual embeddings during model upgrades. Centralize model version config and add should_send_new_model_embeddings() to track which groups need new embeddings. Rename feature flag to be version-agnostic.
1 parent 723fddb commit 7f72a70

File tree

10 files changed

+571
-67
lines changed

10 files changed

+571
-67
lines changed

src/sentry/event_manager.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,10 @@
6565
run_primary_grouping,
6666
)
6767
from sentry.grouping.ingest.metrics import record_hash_calculation_metrics, record_new_group_metrics
68-
from sentry.grouping.ingest.seer import maybe_check_seer_for_matching_grouphash
68+
from sentry.grouping.ingest.seer import (
69+
maybe_check_seer_for_matching_grouphash,
70+
maybe_send_seer_for_new_model_training,
71+
)
6972
from sentry.grouping.ingest.utils import (
7073
add_group_id_to_grouphashes,
7174
check_for_group_creation_load_shed,
@@ -1287,6 +1290,7 @@ def assign_event_to_group(
12871290
if primary.existing_grouphash:
12881291
group_info = handle_existing_grouphash(job, primary.existing_grouphash, primary.grouphashes)
12891292
result = "found_primary"
1293+
maybe_send_seer_for_new_model_training(event, primary.existing_grouphash, primary.variants)
12901294
# If we haven't, try again using the secondary config. (If there is no secondary config, or
12911295
# we're out of the transition period, we'll get back the empty `NULL_GROUPHASH_INFO`.)
12921296
else:
@@ -1298,6 +1302,9 @@ def assign_event_to_group(
12981302
job, secondary.existing_grouphash, all_grouphashes
12991303
)
13001304
result = "found_secondary"
1305+
maybe_send_seer_for_new_model_training(
1306+
event, secondary.existing_grouphash, secondary.variants
1307+
)
13011308

13021309
# If we still haven't found a group, ask Seer for a match (if enabled for the event's platform)
13031310
else:

src/sentry/features/temporary.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -646,8 +646,8 @@ def register_temporary_features(manager: FeatureManager) -> None:
646646
manager.add("projects:similarity-embeddings", ProjectFeature, FeatureHandlerStrategy.INTERNAL, default=False, api_expose=True)
647647
manager.add("projects:similarity-indexing", ProjectFeature, FeatureHandlerStrategy.INTERNAL, api_expose=False)
648648
manager.add("projects:similarity-view", ProjectFeature, FeatureHandlerStrategy.INTERNAL, api_expose=True)
649-
# Enable v2 similarity grouping model (part of v2 grouping rollout)
650-
manager.add("projects:similarity-grouping-v2-model", ProjectFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=False)
649+
# Enable new similarity grouping model upgrade (version-agnostic rollout)
650+
manager.add("projects:similarity-grouping-model-upgrade", ProjectFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=False)
651651
# Starfish: extract metrics from the spans
652652
manager.add("projects:span-metrics-extraction", ProjectFeature, FeatureHandlerStrategy.INTERNAL, api_expose=True)
653653
manager.add("projects:span-metrics-extraction-addons", ProjectFeature, FeatureHandlerStrategy.INTERNAL, api_expose=False)

src/sentry/grouping/ingest/seer.py

Lines changed: 73 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,8 @@
66
from django.conf import settings
77
from django.utils import timezone
88

9-
from sentry import features, options
9+
from sentry import options
1010
from sentry import ratelimits as ratelimiter
11-
from sentry.conf.server import SEER_SIMILARITY_MODEL_VERSION
1211
from sentry.grouping.grouping_info import get_grouping_info_from_variants_legacy
1312
from sentry.grouping.ingest.grouphash_metadata import (
1413
check_grouphashes_for_positive_fingerprint_match,
@@ -17,8 +16,12 @@
1716
from sentry.grouping.variants import BaseVariant
1817
from sentry.models.grouphash import GroupHash
1918
from sentry.models.project import Project
19+
from sentry.seer.similarity.config import (
20+
get_grouping_model_version,
21+
should_send_new_model_embeddings,
22+
)
2023
from sentry.seer.similarity.similar_issues import get_similarity_data_from_seer
21-
from sentry.seer.similarity.types import GroupingVersion, SimilarIssuesEmbeddingsRequest
24+
from sentry.seer.similarity.types import SimilarIssuesEmbeddingsRequest
2225
from sentry.seer.similarity.utils import (
2326
SEER_INELIGIBLE_EVENT_PLATFORMS,
2427
ReferrerOptions,
@@ -257,10 +260,17 @@ def get_seer_similar_issues(
257260
event: Event,
258261
event_grouphash: GroupHash,
259262
variants: dict[str, BaseVariant],
263+
training_mode: bool = False,
260264
) -> tuple[float | None, GroupHash | None]:
261265
"""
262266
Ask Seer for the given event's nearest neighbor(s) and return the stacktrace distance and
263267
matching GroupHash of the closest match (if any), or `(None, None)` if no match found.
268+
269+
Args:
270+
event: The event being grouped
271+
event_grouphash: The grouphash for this event
272+
variants: Grouping variants for the event
273+
training_mode: If True, only possibly insert embedding without returning matches
264274
"""
265275
event_hash = event.get_primary_hash()
266276
exception_type = get_path(event.data, "exception", "values", -1, "type")
@@ -272,10 +282,7 @@ def get_seer_similar_issues(
272282
get_stacktrace_string(get_grouping_info_from_variants_legacy(variants)),
273283
)
274284

275-
# Get model configuration from feature flags
276-
use_v2_model = features.has("projects:similarity-grouping-v2-model", event.project)
277-
model_version = GroupingVersion.V2 if use_v2_model else GroupingVersion.V1
278-
training_mode = False # PR #B will add the smart logic
285+
model_version = get_grouping_model_version(event.project)
279286

280287
request_data: SimilarIssuesEmbeddingsRequest = {
281288
"event_id": event.event_id,
@@ -392,12 +399,20 @@ def get_seer_similar_issues(
392399
"grouping.similarity.seer_results_returned",
393400
len(seer_results),
394401
sample_rate=options.get("seer.similarity.metrics_sample_rate"),
395-
tags={**metrics_tags, "is_hybrid": is_hybrid_fingerprint_case},
402+
tags={
403+
**metrics_tags,
404+
"is_hybrid": is_hybrid_fingerprint_case,
405+
"training_mode": training_mode,
406+
},
396407
)
397408
metrics.incr(
398409
"grouping.similarity.get_seer_similar_issues",
399410
sample_rate=options.get("seer.similarity.metrics_sample_rate"),
400-
tags={**metrics_tags, "is_hybrid": is_hybrid_fingerprint_case},
411+
tags={
412+
**metrics_tags,
413+
"is_hybrid": is_hybrid_fingerprint_case,
414+
"training_mode": training_mode,
415+
},
401416
)
402417

403418
logger.info(
@@ -528,6 +543,8 @@ def maybe_check_seer_for_matching_grouphash(
528543

529544
timestamp = timezone.now()
530545

546+
model_version = get_grouping_model_version(event.project)
547+
531548
gh_metadata.update(
532549
# Technically the time of the metadata record creation and the time of the Seer
533550
# request will be some milliseconds apart, but a) the difference isn't meaningful
@@ -541,9 +558,55 @@ def maybe_check_seer_for_matching_grouphash(
541558
date_added=gh_metadata.date_added or timestamp,
542559
seer_date_sent=gh_metadata.date_added or timestamp,
543560
seer_event_sent=event.event_id,
544-
seer_model=SEER_SIMILARITY_MODEL_VERSION,
561+
seer_model=model_version.value,
545562
seer_matched_grouphash=seer_matched_grouphash,
546563
seer_match_distance=seer_match_distance,
547564
)
548565

549566
return seer_matched_grouphash
567+
568+
569+
@sentry_sdk.tracing.trace
570+
def maybe_send_seer_for_new_model_training(
571+
event: Event,
572+
existing_grouphash: GroupHash,
573+
variants: dict[str, BaseVariant],
574+
) -> None:
575+
"""
576+
Send a training_mode=true request to Seer to build embeddings for the new model
577+
version if the existing grouphash hasn't been sent to the new version yet.
578+
579+
This only happens for projects that have the new model rolled out. It helps
580+
build embeddings for existing groups without affecting production grouping decisions.
581+
582+
Args:
583+
event: The event being grouped
584+
existing_grouphash: The grouphash that was found for this event
585+
variants: Grouping variants for the event
586+
"""
587+
588+
# Check if we should send embeddings for the new model
589+
gh_metadata = existing_grouphash.metadata
590+
grouphash_seer_model = gh_metadata.seer_model if gh_metadata else None
591+
592+
if not should_send_new_model_embeddings(event.project, grouphash_seer_model):
593+
return
594+
595+
# Send training mode request (honor all checks like rate limits, circuit breaker, etc.)
596+
if not should_call_seer_for_grouping(event, variants, existing_grouphash):
597+
return
598+
599+
record_did_call_seer_metric(event, call_made=True, blocker="none", training_mode=True)
600+
601+
try:
602+
# Call Seer with training_mode=True (results won't be used for grouping)
603+
get_seer_similar_issues(event, existing_grouphash, variants, training_mode=True)
604+
except Exception as e:
605+
sentry_sdk.capture_exception(
606+
e,
607+
tags={
608+
"event": event.event_id,
609+
"project": event.project.id,
610+
"grouphash": existing_grouphash.hash,
611+
},
612+
)

src/sentry/issues/endpoints/group_similar_issues_embeddings.py

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from rest_framework.request import Request
77
from rest_framework.response import Response
88

9-
from sentry import analytics, features, options
9+
from sentry import analytics, options
1010
from sentry.api.analytics import GroupSimilarIssuesEmbeddingsCountEvent
1111
from sentry.api.api_owners import ApiOwner
1212
from sentry.api.api_publish_status import ApiPublishStatus
@@ -16,12 +16,9 @@
1616
from sentry.issues.endpoints.bases.group import GroupEndpoint
1717
from sentry.models.group import Group
1818
from sentry.models.grouphash import GroupHash
19+
from sentry.seer.similarity.config import get_grouping_model_version
1920
from sentry.seer.similarity.similar_issues import get_similarity_data_from_seer
20-
from sentry.seer.similarity.types import (
21-
GroupingVersion,
22-
SeerSimilarIssueData,
23-
SimilarIssuesEmbeddingsRequest,
24-
)
21+
from sentry.seer.similarity.types import SeerSimilarIssueData, SimilarIssuesEmbeddingsRequest
2522
from sentry.seer.similarity.utils import (
2623
ReferrerOptions,
2724
event_content_has_stacktrace,
@@ -104,10 +101,7 @@ def get(self, request: Request, group: Group) -> Response:
104101
if not stacktrace_string or not latest_event:
105102
return Response([]) # No exception, stacktrace or in-app frames, or event
106103

107-
# Get model configuration from feature flags
108-
use_v2_model = features.has("projects:similarity-grouping-v2-model", group.project)
109-
model_version = GroupingVersion.V2 if use_v2_model else GroupingVersion.V1
110-
training_mode = False # TODO: currently hardcoded, follow up PR will add the logic
104+
model_version = get_grouping_model_version(group.project)
111105

112106
similar_issues_params: SimilarIssuesEmbeddingsRequest = {
113107
"event_id": latest_event.event_id,
@@ -119,7 +113,7 @@ def get(self, request: Request, group: Group) -> Response:
119113
"referrer": "similar_issues",
120114
"use_reranking": options.get("seer.similarity.similar_issues.use_reranking"),
121115
"model": model_version,
122-
"training_mode": training_mode,
116+
"training_mode": False,
123117
}
124118
# Add optional parameters
125119
if request.GET.get("k"):
Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
"""
2+
Configuration for similarity grouping model versions.
3+
4+
This module defines which model versions are used for similarity grouping
5+
and provides helper functions for determining the appropriate version to use.
6+
"""
7+
8+
from sentry import features
9+
from sentry.models.project import Project
10+
from sentry.seer.similarity.types import GroupingVersion
11+
12+
# Stable model version - used for ALL requests for non-rolled-out projects
13+
SEER_GROUPING_STABLE_VERSION = GroupingVersion.V1
14+
15+
# New model version being rolled out
16+
# - Rolled-out projects: Use this for ALL requests (both grouping and embeddings)
17+
# - Non-rolled-out projects: Never use this (use stable version for everything)
18+
# Set to None to disable rollout entirely
19+
SEER_GROUPING_NEW_VERSION: GroupingVersion | None = GroupingVersion.V2
20+
21+
# Feature flag name (version-agnostic)
22+
SEER_GROUPING_NEW_MODEL_ROLLOUT_FEATURE = "projects:similarity-grouping-model-upgrade"
23+
24+
25+
def get_grouping_model_version(project: Project) -> GroupingVersion:
26+
"""
27+
Get the model version to use for grouping decisions for this project.
28+
29+
Returns:
30+
- New version if rollout is enabled for this project
31+
- Stable version otherwise
32+
"""
33+
# Early return if no new version configured
34+
if SEER_GROUPING_NEW_VERSION is None:
35+
return SEER_GROUPING_STABLE_VERSION
36+
37+
# Type is narrowed to GroupingVersion here
38+
if features.has(SEER_GROUPING_NEW_MODEL_ROLLOUT_FEATURE, project):
39+
return SEER_GROUPING_NEW_VERSION
40+
return SEER_GROUPING_STABLE_VERSION
41+
42+
43+
def is_new_model_rolled_out(project: Project) -> bool:
44+
"""
45+
Check if the new model version is rolled out for this project.
46+
47+
Returns False if:
48+
- No new version is configured (rollout disabled globally)
49+
- Feature flag is not enabled for this project
50+
"""
51+
if SEER_GROUPING_NEW_VERSION is None:
52+
return False
53+
54+
return features.has(SEER_GROUPING_NEW_MODEL_ROLLOUT_FEATURE, project)
55+
56+
57+
def get_new_model_version() -> GroupingVersion | None:
58+
"""
59+
Get the new model version being rolled out, if any.
60+
Returns None if no rollout is in progress.
61+
"""
62+
return SEER_GROUPING_NEW_VERSION
63+
64+
65+
def should_send_new_model_embeddings(
66+
project: Project,
67+
grouphash_seer_model: str | None,
68+
) -> bool:
69+
"""
70+
Check if we should send training_mode=true request to build embeddings
71+
for the new model version for an existing group.
72+
73+
This is true when:
74+
1. A new version is being rolled out
75+
2. The project has the rollout feature enabled
76+
3. The grouphash hasn't been sent to the new version yet
77+
78+
Args:
79+
project: The project
80+
grouphash_seer_model: The seer_model value from grouphash metadata
81+
82+
Returns:
83+
True if we should send a training_mode=true request
84+
"""
85+
new_version = get_new_model_version()
86+
if new_version is None:
87+
# No rollout in progress
88+
return False
89+
90+
if not is_new_model_rolled_out(project):
91+
# Rollout not enabled for this project
92+
return False
93+
94+
if grouphash_seer_model is None:
95+
# Never sent to Seer at all
96+
return True
97+
98+
# Check if it was sent to the new version
99+
return grouphash_seer_model != new_version.value

src/sentry/seer/similarity/utils.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -314,11 +314,18 @@ def event_content_has_stacktrace(event: GroupEvent | Event) -> bool:
314314
return exception_stacktrace or threads_stacktrace or only_stacktrace
315315

316316

317-
def record_did_call_seer_metric(event: Event, *, call_made: bool, blocker: str) -> None:
317+
def record_did_call_seer_metric(
318+
event: Event, *, call_made: bool, blocker: str, training_mode: bool = False
319+
) -> None:
318320
metrics.incr(
319321
"grouping.similarity.did_call_seer",
320322
sample_rate=options.get("seer.similarity.metrics_sample_rate"),
321-
tags={"call_made": call_made, "blocker": blocker, "platform": event.platform},
323+
tags={
324+
"call_made": call_made,
325+
"blocker": blocker,
326+
"platform": event.platform,
327+
"training_mode": training_mode,
328+
},
322329
)
323330

324331

tests/sentry/event_manager/grouping/test_seer_grouping.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,10 @@
33
from typing import Any
44
from unittest.mock import MagicMock, patch
55

6-
from sentry.conf.server import SEER_SIMILARITY_MODEL_VERSION
76
from sentry.grouping.ingest.grouphash_metadata import create_or_update_grouphash_metadata_if_needed
87
from sentry.grouping.ingest.seer import get_seer_similar_issues, should_call_seer_for_grouping
98
from sentry.models.grouphash import GroupHash
10-
from sentry.seer.similarity.types import SeerSimilarIssueData
9+
from sentry.seer.similarity.types import GroupingVersion, SeerSimilarIssueData
1110
from sentry.testutils.cases import TestCase
1211
from sentry.testutils.helpers.eventprocessing import save_new_event
1312
from sentry.testutils.pytest.mocking import capture_results
@@ -205,7 +204,7 @@ def test_group_with_no_seer_match(self, _: MagicMock) -> None:
205204
event_grouphash,
206205
event_grouphash.metadata.date_added,
207206
event.event_id,
208-
SEER_SIMILARITY_MODEL_VERSION,
207+
GroupingVersion.V1.value,
209208
None,
210209
None,
211210
)
@@ -248,7 +247,7 @@ def test_group_with_seer_match(self, _: MagicMock) -> None:
248247
new_event_grouphash,
249248
new_event_grouphash.metadata.date_added,
250249
new_event.event_id,
251-
SEER_SIMILARITY_MODEL_VERSION,
250+
GroupingVersion.V1.value,
252251
existing_event_grouphash,
253252
seer_result_data.stacktrace_distance,
254253
)

0 commit comments

Comments
 (0)