Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 60 additions & 3 deletions premerge/advisor/advisor_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ class TestExplanationRequest(TypedDict):
}

EXPLAINED_HEAD_MAX_COMMIT_INDEX_DIFFERENCE = 5
EXPLAINED_FLAKY_MIN_COMMIT_RANGE = 200


def _create_table(table_name: str, connection: sqlite3.Connection):
Expand Down Expand Up @@ -131,20 +132,76 @@ def _try_explain_failing_at_head(
return None


def _try_explain_flaky_failure(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe a comment describing the particular definition of "flaky" this function is using would be handy?

It looks to me like it's testing if this failure has been seen before over at least a 200 commit range between oldest/newest sighting? What's to stop that being "it's been failing at HEAD for at least 200 revisions"?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added a function level docstring.

I've never seen a non-flaky test failure that has been failing for more than ~20 commits. I think a OOM more in range should be safe. I thought about also adding a heuristic around the longest running subsequence to catch cases like this, but I think it adds complexity for little benefit:

  1. As mentioned, 200 is about an OOM bigger than the ranges we've seen in practice.
  2. We're exactly matching the failure message, so the test would need to fail in the exact same way to trigger this.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd probably still go for a test that verifies the failures are discontiguous from ToT rather than a heuristic that guesses that something seems unlikely to be a long-standing failure at HEAD - seems cheap/simple enough to do. Because bad advice can be pretty confusing.

But I guess it's a start.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I'll add in that functionality soon. It would be good to have it for the failing at head case too.

db_connection: sqlite3.Connection,
test_failure: TestFailure,
platform: str,
) -> FailureExplanation | None:
"""See if a failure is flaky at head.

This function looks at a test failure and tries to see if the failure is
a known flake at head. It does this heuristically, by seeing if there have
been at least two failures across more than 200 commits. This has the
advantage of being a simple heuristic and performant. We do not
explicitly handle the case where a test has been failing continiously
for this amount of time as this is an OOM more range than any non-flaky
tests have stayed in tree.

Args:
db_connection: The database connection.
test_failure: The test failure to try and explain.
platform: The platform the test failed on.

Returns:
Either None, if the test could not be explained as flaky, or a
FailureExplanation object explaining the test failure.
"""
test_name_matches = db_connection.execute(
"SELECT failure_message, commit_index FROM failures WHERE source_type='postcommit' AND platform=?",
(platform,),
).fetchall()
commit_indices = []
for failure_message, commit_index in test_name_matches:
if failure_message == test_failure["message"]:
commit_indices.append(commit_index)
if len(commit_indices) == 0:
return None
commit_range = max(commit_indices) - min(commit_indices)
if commit_range > EXPLAINED_FLAKY_MIN_COMMIT_RANGE:
return {
"name": test_failure["name"],
"explained": True,
"reason": "This test is flaky in main.",
}
return None


def explain_failures(
explanation_request: TestExplanationRequest,
repository_path: str,
db_connection: sqlite3.Connection,
) -> list[FailureExplanation]:
explanations = []
for test_failure in explanation_request["failures"]:
commit_index = git_utils.get_commit_index(
explanation_request["base_commit_sha"], repository_path, db_connection
)
# We want to try and explain flaky failures first. Otherwise we might
# explain a flaky failure as a failure at head if there is a recent
# failure in the last couple of commits.
explained_as_flaky = _try_explain_flaky_failure(
db_connection,
test_failure,
explanation_request["platform"],
)
if explained_as_flaky:
explanations.append(explained_as_flaky)
continue
explained_at_head = _try_explain_failing_at_head(
db_connection,
test_failure,
explanation_request["base_commit_sha"],
git_utils.get_commit_index(
explanation_request["base_commit_sha"], repository_path, db_connection
),
commit_index,
explanation_request["platform"],
)
if explained_at_head:
Expand Down
103 changes: 103 additions & 0 deletions premerge/advisor/advisor_lib_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,9 @@ def setUp(self):
[
("8d29a3bb6f3d92d65bf5811b53bf42bf63685359", 1),
("6b7064686b706f7064656d6f6e68756e74657273", 2),
("6a6f73687561747265656a6f7368756174726565", 201),
("6269677375726269677375726269677375726269", 202),
("6d746c616e676c65796d746c616e676c65796d74", 203),
],
)
self.repository_path_dir = tempfile.TemporaryDirectory()
Expand Down Expand Up @@ -280,3 +283,103 @@ def test_no_explain_different_message(self):
}
],
)

def _setup_flaky_test_info(
self,
source_type="postcommit",
message="failed in way 1",
second_failure_sha="6269677375726269677375726269677375726269",
):
failures_info = [
{
"source_type": source_type,
"base_commit_sha": "8d29a3bb6f3d92d65bf5811b53bf42bf63685359",
"source_id": "10000",
"failures": [
{"name": "a.ll", "message": message},
],
"platform": "linux-x86_64",
},
{
"source_type": source_type,
"base_commit_sha": second_failure_sha,
"source_id": "100001",
"failures": [
{"name": "a.ll", "message": message},
],
"platform": "linux-x86_64",
},
]
for failure_info in failures_info:
advisor_lib.upload_failures(
failure_info, self.db_connection, self.repository_path
)

def _get_flaky_test_explanations(self):
explanation_request = {
"failures": [{"name": "a.ll", "message": "failed in way 1"}],
"base_commit_sha": "6d746c616e676c65796d746c616e676c65796d74",
"platform": "linux-x86_64",
}
return advisor_lib.explain_failures(
explanation_request, self.repository_path, self.db_connection
)

def test_explain_flaky(self):
self._setup_flaky_test_info()
self.assertListEqual(
self._get_flaky_test_explanations(),
[
{
"name": "a.ll",
"explained": True,
"reason": "This test is flaky in main.",
}
],
)

def test_no_explain_flaky_different_message(self):
self._setup_flaky_test_info(message="failed in way 2")
self.assertListEqual(
self._get_flaky_test_explanations(),
[
{
"name": "a.ll",
"explained": False,
"reason": None,
}
],
)

# Test that we do not explain away flaky failures from pull request data.
# PRs might have the same failures multiple times across a large span of
# base commits, which might accidentally trigger the heuristic.
def test_no_explain_flaky_pullrequest_data(self):
self._setup_flaky_test_info(source_type="pull_request")
self.assertListEqual(
self._get_flaky_test_explanations(),
[
{
"name": "a.ll",
"explained": False,
"reason": None,
}
],
)

# Test that if all of the flaky failures are within a small range, we do
# not report this as a flaky failure.
def test_no_explain_flaky_small_range(self):
self._setup_flaky_test_info(
second_failure_sha="6b7064686b706f7064656d6f6e68756e74657273"
)
self.assertListEqual(
self._get_flaky_test_explanations(),
[
{
"name": "a.ll",
"explained": False,
"reason": None,
}
],
)