Skip to content

Commit 68f3e89

Browse files
committed
optimize push health query
1 parent 2f4909f commit 68f3e89

File tree

20 files changed

+1306
-342
lines changed

20 files changed

+1306
-342
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ __pycache__
1010
.venv/
1111
venv/
1212
.scratch/
13+
.vscode/settings.json
1314

1415
# vi
1516
*.swp

tests/ui/job-view/App_test.jsx

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,13 @@ describe('App', () => {
131131
),
132132
[],
133133
);
134+
fetchMock.get(
135+
getProjectUrl(
136+
'/push/health_summary_new_failures/?revision=3333333333335143b8df3f4b3e9b504dfbc589a0',
137+
'try',
138+
),
139+
[],
140+
);
134141
fetchMock.get(getProjectUrl('/push/?full=true&count=10', 'try'), {
135142
results: [
136143
{

tests/ui/push-health/CommitHistory_test.jsx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ import { toDateStr } from '../../../ui/helpers/display';
1010

1111
beforeEach(() => {
1212
fetchMock.get(
13-
'/api/project/autoland/push/health_summary/?revision=eeb6fd68c0223a72d8714734a34d3e6da69995e1&with_in_progress_tests=true',
13+
'/api/project/autoland/push/health_summary_new_failures/?revision=eeb6fd68c0223a72d8714734a34d3e6da69995e1',
1414
[{ needInvestigation: 87, unsupported: 8 }],
1515
);
1616
});

tests/ui/push-health/Health_test.jsx

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,34 @@ describe('Health', () => {
112112
),
113113
[],
114114
);
115+
fetchMock.get(
116+
getProjectUrl(
117+
'/push/health_summary_new_failures/?revision=cd02b96bdce57d9ae53b632ca4740c871d3ecc32',
118+
repo,
119+
),
120+
{
121+
revision: 'cd02b96bdce57d9ae53b632ca4740c871d3ecc32',
122+
id: 630337,
123+
metrics: {
124+
commitHistory: pushHealth.metrics.commitHistory,
125+
},
126+
status: { running: 0, pending: 0, completed: 200 },
127+
},
128+
);
129+
fetchMock.get(
130+
getProjectUrl(
131+
'/push/health_summary_new_failures/?revision=eeb6fd68c0223a72d8714734a34d3e6da69995e1',
132+
repo,
133+
),
134+
[],
135+
);
136+
fetchMock.get(
137+
getProjectUrl(
138+
'/push/health_details_new_failures/?revision=cd02b96bdce57d9ae53b632ca4740c871d3ecc32&limit=50&classification_ids=6',
139+
repo,
140+
),
141+
pushHealth,
142+
);
115143
});
116144

117145
afterAll(() => {

tests/ui/shared/PushHealthSummary_test.jsx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ import { getProjectUrl } from '../../../ui/helpers/location';
88
beforeEach(() => {
99
fetchMock.get(
1010
getProjectUrl(
11-
'/push/health_summary/?revision=failed&with_in_progress_tests=true',
11+
'/push/health_summary_new_failures/?revision=failed',
1212
'autoland',
1313
),
1414
[
@@ -22,7 +22,7 @@ beforeEach(() => {
2222
);
2323
fetchMock.get(
2424
getProjectUrl(
25-
'/push/health_summary/?revision=passed&with_in_progress_tests=true',
25+
'/push/health_summary_new_failures/?revision=passed',
2626
'autoland',
2727
),
2828
[

tests/webapp/api/test_push_api.py

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -585,3 +585,93 @@ def test_push_status(client, test_job, test_user):
585585
assert resp.status_code == 200
586586
assert isinstance(resp.json(), dict)
587587
assert resp.json() == {"completed": 0, "pending": 0, "running": 0}
588+
589+
590+
def test_push_health_new_failures(client, test_repository, test_push, test_job):
591+
"""
592+
test retrieving the health_new_failures endpoint which filters by failure_classification_id=6
593+
"""
594+
# Set the job to have failure_classification_id=6 (new failure not classified)
595+
new_failure_classification = FailureClassification.objects.get(
596+
name="new failure not classified"
597+
)
598+
test_job.failure_classification = new_failure_classification
599+
test_job.result = "testfailed"
600+
test_job.save()
601+
602+
resp = client.get(
603+
reverse("push-health_new_failures", kwargs={"project": test_repository.name}),
604+
{"revision": test_push.revision},
605+
)
606+
assert resp.status_code == 200
607+
data = resp.json()
608+
609+
# Verify response structure matches the health endpoint
610+
assert "revision" in data
611+
assert "id" in data
612+
assert "result" in data
613+
assert "jobs" in data
614+
assert "metrics" in data
615+
assert "status" in data
616+
617+
# Verify metrics structure
618+
assert "commitHistory" in data["metrics"]
619+
assert "linting" in data["metrics"]
620+
assert "tests" in data["metrics"]
621+
assert "builds" in data["metrics"]
622+
623+
# Verify commit history is returned for hg repos (test_repository is hg by default)
624+
assert data["metrics"]["commitHistory"]["name"] == "Commit History"
625+
assert data["metrics"]["commitHistory"]["result"] == "none"
626+
# commit history details should be populated for hg repos
627+
assert "details" in data["metrics"]["commitHistory"]
628+
629+
# Verify the response contains the revision
630+
assert data["revision"] == test_push.revision
631+
assert data["id"] == test_push.id
632+
633+
634+
def test_push_health_new_failures_excludes_other_classifications(
635+
client, test_repository, test_push, test_job, test_job_2
636+
):
637+
"""
638+
test that health_new_failures only includes jobs with failure_classification_id=6
639+
"""
640+
# Set one job to failure_classification_id=6
641+
new_failure_classification = FailureClassification.objects.get(
642+
name="new failure not classified"
643+
)
644+
test_job.failure_classification = new_failure_classification
645+
test_job.result = "testfailed"
646+
test_job.save()
647+
648+
# Set another job to a different failure_classification_id (e.g., intermittent = 4)
649+
intermittent_classification = FailureClassification.objects.get(name="intermittent")
650+
test_job_2.failure_classification = intermittent_classification
651+
test_job_2.result = "testfailed"
652+
test_job_2.save()
653+
654+
resp = client.get(
655+
reverse("push-health_new_failures", kwargs={"project": test_repository.name}),
656+
{"revision": test_push.revision},
657+
)
658+
assert resp.status_code == 200
659+
data = resp.json()
660+
661+
# The response should only include the job with failure_classification_id=6
662+
jobs_dict = data["jobs"]
663+
664+
# Verify we have job data
665+
assert isinstance(jobs_dict, dict)
666+
667+
# Collect all job IDs from the response
668+
all_job_ids = set()
669+
for job_type_name, job_list in jobs_dict.items():
670+
for job in job_list:
671+
all_job_ids.add(job["id"])
672+
673+
# Verify that only the job with classification_id=6 is included
674+
assert test_job.id in all_job_ids, "Job with classification_id=6 should be included"
675+
assert test_job_2.id not in all_job_ids, (
676+
"Job with classification_id=4 (intermittent) should be excluded"
677+
)

treeherder/push_health/builds.py

Lines changed: 42 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,52 @@
1-
from django.db.models import Q
2-
3-
from treeherder.model.models import Job, JobType
1+
from treeherder.model.models import Job
42
from treeherder.push_health.utils import get_job_results
53

64

75
def get_build_failures(push):
8-
build_types = JobType.objects.filter(Q(name__icontains="build"))
9-
6+
# Filter jobs directly by job_type__name instead of fetching JobType objects first.
7+
# This is more efficient because it only queries jobs for the specific push,
8+
# and the JobType filter happens on the join with an already-filtered dataset
9+
# (jobs in this push), rather than scanning the entire JobType table first.
1010
build_results = Job.objects.filter(
1111
push=push,
1212
tier__lte=2,
13-
job_type__in=build_types,
14-
).select_related("machine_platform", "taskcluster_metadata")
13+
job_type__name__icontains="build",
14+
).select_related("machine_platform", "taskcluster_metadata", "job_type")
15+
16+
result, failures, in_progress_count = get_job_results(build_results, "busted")
17+
18+
return (result, failures, in_progress_count)
19+
20+
21+
def get_build_failures_by_classification(push, classification_ids=None, limit=None):
22+
"""
23+
Get build failures filtered by failure classification IDs with optional limit.
24+
25+
Args:
26+
push: Push object
27+
classification_ids: List of classification IDs (default: [6] for new failures only)
28+
Common values: 1=fixed by commit, 6=new failure, 8=intermittent
29+
limit: Optional limit on number of failures to return
30+
"""
31+
if classification_ids is None:
32+
classification_ids = [6]
33+
34+
# Filter jobs directly by job_type__name instead of fetching JobType objects first.
35+
# This is more efficient because it only queries jobs for the specific push,
36+
# and the JobType filter happens on the join with an already-filtered dataset
37+
# (jobs in this push), rather than scanning the entire JobType table first.
38+
build_query = Job.objects.filter(
39+
push=push,
40+
tier__lte=2,
41+
job_type__name__icontains="build",
42+
failure_classification_id__in=classification_ids,
43+
).select_related("machine_platform", "taskcluster_metadata", "job_type")
44+
45+
# Apply limit if specified
46+
if limit:
47+
build_results = build_query[:limit]
48+
else:
49+
build_results = build_query
1550

1651
result, failures, in_progress_count = get_job_results(build_results, "busted")
1752

treeherder/push_health/classification.py

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ def get_log_lines(failure):
6767
return messages
6868

6969

70-
def get_grouped(failures):
70+
def get_grouped(failures, skip_ratio_classification=False):
7171
classified = {
7272
NEED_INVESTIGATION: [],
7373
KNOWN_ISSUES: [],
@@ -76,13 +76,24 @@ def get_grouped(failures):
7676
for failure in failures:
7777
is_intermittent = failure["suggestedClassification"] == "intermittent"
7878

79-
if (is_intermittent and failure["confidence"] == 100) or failure["totalFailures"] / failure[
80-
"totalJobs"
81-
] <= 0.5:
82-
classified[KNOWN_ISSUES].append(failure)
79+
# When data is limited (skip_ratio_classification=True), we can't trust the totalFailures/totalJobs
80+
# ratio because we may not have seen all failures. In this case, only use the intermittent
81+
# classification from history, not the ratio-based classification.
82+
if skip_ratio_classification:
83+
# Only classify as Known Issue if we have high confidence from history
84+
if is_intermittent and failure["confidence"] == 100:
85+
classified[KNOWN_ISSUES].append(failure)
86+
else:
87+
classified[NEED_INVESTIGATION].append(failure)
88+
failure["confidence"] = min(failure["confidence"], 90)
8389
else:
84-
classified[NEED_INVESTIGATION].append(failure)
85-
# If it needs investigation, we, by definition, don't have 100% confidence.
86-
failure["confidence"] = min(failure["confidence"], 90)
90+
# Full classification using both history and failure ratio
91+
if (is_intermittent and failure["confidence"] == 100) or failure[
92+
"totalFailures"
93+
] / failure["totalJobs"] <= 0.5:
94+
classified[KNOWN_ISSUES].append(failure)
95+
else:
96+
classified[NEED_INVESTIGATION].append(failure)
97+
failure["confidence"] = min(failure["confidence"], 90)
8798

8899
return classified

treeherder/push_health/compare.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,19 +11,23 @@
1111

1212

1313
def get_commit_history(repository, revision, push):
14-
from mozci.errors import ParentPushNotFound
14+
from mozci.errors import ParentPushNotFound, PushNotFound
1515
from mozci.push import Push as MozciPush
1616

17-
mozci_push = MozciPush([revision], repository.name)
1817
parent = None
1918
parent_sha = None
2019
parent_repo = None
2120
parent_push = None
2221

2322
try:
24-
parent = mozci_push.parent
25-
except ParentPushNotFound:
26-
pass
23+
mozci_push = MozciPush([revision], repository.name)
24+
try:
25+
parent = mozci_push.parent
26+
except ParentPushNotFound:
27+
pass
28+
except PushNotFound as e:
29+
logger.warning(f"Could not fetch mozci push data for {revision}: {e}")
30+
# Continue without parent push information
2731

2832
if parent:
2933
parent_sha = parent.revs[-1]

treeherder/push_health/linting.py

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,38 @@ def get_lint_failures(push):
99
Q(machine_platform__platform="lint") | Q(job_type__symbol="mozlint"),
1010
push=push,
1111
tier__lte=2,
12-
).select_related("machine_platform", "taskcluster_metadata")
12+
).select_related("machine_platform", "taskcluster_metadata", "job_type")
13+
14+
result, failures, in_progress_count = get_job_results(lint_results, "testfailed")
15+
16+
return (result, failures, in_progress_count)
17+
18+
19+
def get_lint_failures_by_classification(push, classification_ids=None, limit=None):
20+
"""
21+
Get lint failures filtered by failure classification IDs with optional limit.
22+
23+
Args:
24+
push: Push object
25+
classification_ids: List of classification IDs (default: [6] for new failures only)
26+
Common values: 1=fixed by commit, 6=new failure, 8=intermittent
27+
limit: Optional limit on number of failures to return
28+
"""
29+
if classification_ids is None:
30+
classification_ids = [6]
31+
32+
lint_query = Job.objects.filter(
33+
Q(machine_platform__platform="lint") | Q(job_type__symbol="mozlint"),
34+
push=push,
35+
tier__lte=2,
36+
failure_classification_id__in=classification_ids,
37+
).select_related("machine_platform", "taskcluster_metadata", "job_type")
38+
39+
# Apply limit if specified
40+
if limit:
41+
lint_results = lint_query[:limit]
42+
else:
43+
lint_results = lint_query
1344

1445
result, failures, in_progress_count = get_job_results(lint_results, "testfailed")
1546

0 commit comments

Comments
 (0)