From b5cecd8785848c2ec1d3d4bedce5e88f561ef285 Mon Sep 17 00:00:00 2001 From: Kim Gustyr Date: Fri, 17 Apr 2026 21:08:45 +0100 Subject: [PATCH 01/20] feat: docgen events scanner scaffold (literal domain, single emit) First red-green cycle for #201. Introduces `common.core.docgen.events.get_event_entries_from_source` plus `EventEntry` / `SourceLocation` data classes, handling the simplest shape: a literal-domain `structlog.get_logger("X")` assignment followed by `var.("evt")` produces one entry. Further cycles layer `__name__` domain resolution, the level allowlist, bind mechanics, cross-site merging, filesystem walk, and the management command + template. beep boop --- src/common/core/docgen/__init__.py | 0 src/common/core/docgen/events.py | 90 ++++++++++++++++++++ tests/unit/common/core/test_docgen_events.py | 39 +++++++++ 3 files changed, 129 insertions(+) create mode 100644 src/common/core/docgen/__init__.py create mode 100644 src/common/core/docgen/events.py create mode 100644 tests/unit/common/core/test_docgen_events.py diff --git a/src/common/core/docgen/__init__.py b/src/common/core/docgen/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/common/core/docgen/events.py b/src/common/core/docgen/events.py new file mode 100644 index 00000000..5a080333 --- /dev/null +++ b/src/common/core/docgen/events.py @@ -0,0 +1,90 @@ +import ast +from dataclasses import dataclass, field +from pathlib import Path +from typing import Iterator + + +@dataclass(frozen=True) +class SourceLocation: + path: Path + line: int + + +@dataclass +class EventEntry: + name: str + level: str + attributes: frozenset[str] + locations: list[SourceLocation] = field(default_factory=list) + + +def get_event_entries_from_source( + source: str, + *, + module_dotted: str, + path: Path, +) -> Iterator[EventEntry]: + tree = ast.parse(source) + logger_domains = _collect_logger_domains(tree) + + for node in ast.walk(tree): + if not isinstance(node, ast.Call): + continue + if entry := _build_entry_from_emit_call(node, logger_domains, path): + yield entry + + +def _collect_logger_domains(tree: ast.AST) -> dict[str, str]: + logger_domains: dict[str, str] = {} + for node in ast.walk(tree): + if not _is_logger_assignment(node): + continue + assert isinstance(node, ast.Assign) + target = node.targets[0] + assert isinstance(target, ast.Name) + domain_arg = node.value.args[0] # type: ignore[attr-defined] + if isinstance(domain_arg, ast.Constant) and isinstance(domain_arg.value, str): + logger_domains[target.id] = domain_arg.value + return logger_domains + + +def _build_entry_from_emit_call( + node: ast.Call, + logger_domains: dict[str, str], + path: Path, +) -> EventEntry | None: + func = node.func + if not isinstance(func, ast.Attribute): + return None + value = func.value + if not isinstance(value, ast.Name) or value.id not in logger_domains: + return None + if not node.args: + return None + event_arg = node.args[0] + if not (isinstance(event_arg, ast.Constant) and isinstance(event_arg.value, str)): + return None + return EventEntry( + name=f"{logger_domains[value.id]}.{event_arg.value}", + level=func.attr, + attributes=frozenset(), + locations=[SourceLocation(path=path, line=node.lineno)], + ) + + +def _is_logger_assignment(node: ast.AST) -> bool: + if not isinstance(node, ast.Assign): + return False + if len(node.targets) != 1 or not isinstance(node.targets[0], ast.Name): + return False + call = node.value + if not isinstance(call, ast.Call): + return False + func = call.func + if not isinstance(func, ast.Attribute): + return False + if func.attr != "get_logger": + return False + if not isinstance(func.value, ast.Name) or func.value.id != "structlog": + return False + return bool(call.args) diff --git a/tests/unit/common/core/test_docgen_events.py b/tests/unit/common/core/test_docgen_events.py new file mode 100644 index 00000000..91322f03 --- /dev/null +++ b/tests/unit/common/core/test_docgen_events.py @@ -0,0 +1,39 @@ +from pathlib import Path + +from common.core.docgen.events import ( + EventEntry, + SourceLocation, + get_event_entries_from_source, +) + + +def test_get_event_entries_from_source__literal_domain_plain_info__records_single_entry() -> ( + None +): + # Given + source = ( + "import structlog\n" + "\n" + 'logger = structlog.get_logger("code_references")\n' + 'logger.info("scan.created")\n' + ) + path = Path("projects/code_references/views.py") + + # When + entries = list( + get_event_entries_from_source( + source, + module_dotted="projects.code_references.views", + path=path, + ) + ) + + # Then + assert entries == [ + EventEntry( + name="code_references.scan.created", + level="info", + attributes=frozenset(), + locations=[SourceLocation(path=path, line=4)], + ), + ] From ceae9d69b6b1364ad944310e00a032d81b896797 Mon Sep 17 00:00:00 2001 From: Kim Gustyr Date: Fri, 17 Apr 2026 21:18:22 +0100 Subject: [PATCH 02/20] feat: collect structlog emit kwargs into event attributes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Second cycle for #201. Kwargs on the emit call become the entry's `attributes` set, with `__` → `.` substitution so the catalogue reads in the dotted form product stakeholders expect. beep boop --- src/common/core/docgen/events.py | 5 +- tests/unit/common/core/test_docgen_events.py | 84 +++++++++++++++----- 2 files changed, 66 insertions(+), 23 deletions(-) diff --git a/src/common/core/docgen/events.py b/src/common/core/docgen/events.py index 5a080333..975c24cd 100644 --- a/src/common/core/docgen/events.py +++ b/src/common/core/docgen/events.py @@ -64,10 +64,13 @@ def _build_entry_from_emit_call( event_arg = node.args[0] if not (isinstance(event_arg, ast.Constant) and isinstance(event_arg.value, str)): return None + attributes = frozenset( + kw.arg.replace("__", ".") for kw in node.keywords if kw.arg is not None + ) return EventEntry( name=f"{logger_domains[value.id]}.{event_arg.value}", level=func.attr, - attributes=frozenset(), + attributes=attributes, locations=[SourceLocation(path=path, line=node.lineno)], ) diff --git a/tests/unit/common/core/test_docgen_events.py b/tests/unit/common/core/test_docgen_events.py index 91322f03..7cc09245 100644 --- a/tests/unit/common/core/test_docgen_events.py +++ b/tests/unit/common/core/test_docgen_events.py @@ -1,39 +1,79 @@ from pathlib import Path +import pytest + from common.core.docgen.events import ( EventEntry, SourceLocation, get_event_entries_from_source, ) +PATH = Path("projects/code_references/views.py") +MODULE_DOTTED = "projects.code_references.views" -def test_get_event_entries_from_source__literal_domain_plain_info__records_single_entry() -> ( - None -): - # Given - source = ( - "import structlog\n" - "\n" - 'logger = structlog.get_logger("code_references")\n' - 'logger.info("scan.created")\n' - ) - path = Path("projects/code_references/views.py") - # When +@pytest.mark.parametrize( + "source, expected_entries", + [ + pytest.param( + """\ +import structlog + +logger = structlog.get_logger("code_references") +logger.info("scan.created") +""", + [ + EventEntry( + name="code_references.scan.created", + level="info", + attributes=frozenset(), + locations=[SourceLocation(path=PATH, line=4)], + ), + ], + id="plain-info-no-kwargs", + ), + pytest.param( + """\ +import structlog + +logger = structlog.get_logger("code_references") +logger.info( + "scan.created", + organisation__id=1, + code_references__count=2, + feature__count=3, +) +""", + [ + EventEntry( + name="code_references.scan.created", + level="info", + attributes=frozenset( + { + "organisation.id", + "code_references.count", + "feature.count", + } + ), + locations=[SourceLocation(path=PATH, line=4)], + ), + ], + id="kwargs-with-double-underscore-substitution", + ), + ], +) +def test_get_event_entries_from_source__emit_log__expected_entries( + source: str, + expected_entries: list[EventEntry], +) -> None: + # Given / When entries = list( get_event_entries_from_source( source, - module_dotted="projects.code_references.views", - path=path, + module_dotted=MODULE_DOTTED, + path=PATH, ) ) # Then - assert entries == [ - EventEntry( - name="code_references.scan.created", - level="info", - attributes=frozenset(), - locations=[SourceLocation(path=path, line=4)], - ), - ] + assert entries == expected_entries From 4806185055fbe29741bab8dbd820c434070d2c57 Mon Sep 17 00:00:00 2001 From: Kim Gustyr Date: Fri, 17 Apr 2026 21:19:04 +0100 Subject: [PATCH 03/20] feat: resolve `__name__` domain to module dotted path Third cycle for #201. A `structlog.get_logger(__name__)` call now resolves the domain statically to the module dotted path passed into the scanner, so loggers without an explicit string domain still produce catalogue entries. beep boop --- src/common/core/docgen/events.py | 16 ++++++++++++---- tests/unit/common/core/test_docgen_events.py | 17 +++++++++++++++++ 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/src/common/core/docgen/events.py b/src/common/core/docgen/events.py index 975c24cd..93095a7b 100644 --- a/src/common/core/docgen/events.py +++ b/src/common/core/docgen/events.py @@ -25,7 +25,7 @@ def get_event_entries_from_source( path: Path, ) -> Iterator[EventEntry]: tree = ast.parse(source) - logger_domains = _collect_logger_domains(tree) + logger_domains = _collect_logger_domains(tree, module_dotted=module_dotted) for node in ast.walk(tree): if not isinstance(node, ast.Call): @@ -34,7 +34,7 @@ def get_event_entries_from_source( yield entry -def _collect_logger_domains(tree: ast.AST) -> dict[str, str]: +def _collect_logger_domains(tree: ast.AST, *, module_dotted: str) -> dict[str, str]: logger_domains: dict[str, str] = {} for node in ast.walk(tree): if not _is_logger_assignment(node): @@ -43,11 +43,19 @@ def _collect_logger_domains(tree: ast.AST) -> dict[str, str]: target = node.targets[0] assert isinstance(target, ast.Name) domain_arg = node.value.args[0] # type: ignore[attr-defined] - if isinstance(domain_arg, ast.Constant) and isinstance(domain_arg.value, str): - logger_domains[target.id] = domain_arg.value + if domain := _resolve_domain(domain_arg, module_dotted=module_dotted): + logger_domains[target.id] = domain return logger_domains +def _resolve_domain(node: ast.expr, *, module_dotted: str) -> str | None: + if isinstance(node, ast.Constant) and isinstance(node.value, str): + return node.value + if isinstance(node, ast.Name) and node.id == "__name__": + return module_dotted + return None + + def _build_entry_from_emit_call( node: ast.Call, logger_domains: dict[str, str], diff --git a/tests/unit/common/core/test_docgen_events.py b/tests/unit/common/core/test_docgen_events.py index 7cc09245..40c2a036 100644 --- a/tests/unit/common/core/test_docgen_events.py +++ b/tests/unit/common/core/test_docgen_events.py @@ -60,6 +60,23 @@ ], id="kwargs-with-double-underscore-substitution", ), + pytest.param( + """\ +import structlog + +logger = structlog.get_logger(__name__) +logger.info("something.happened") +""", + [ + EventEntry( + name=f"{MODULE_DOTTED}.something.happened", + level="info", + attributes=frozenset(), + locations=[SourceLocation(path=PATH, line=4)], + ), + ], + id="dunder-name-resolves-to-module-dotted", + ), ], ) def test_get_event_entries_from_source__emit_log__expected_entries( From bc2b22a733085396229bd80929f5dd9ebacc7173 Mon Sep 17 00:00:00 2001 From: Kim Gustyr Date: Fri, 17 Apr 2026 21:19:57 +0100 Subject: [PATCH 04/20] feat: warn on unresolvable structlog logger domain Fourth cycle for #201. Adds `DocgenEventsWarning` and emits it with file + line context when a `structlog.get_logger(X)` argument is neither a string literal nor `__name__`. The logger is skipped rather than crashing the build, per the issue's acceptance contract. beep boop --- src/common/core/docgen/events.py | 25 ++++++++++++++--- tests/unit/common/core/test_docgen_events.py | 29 ++++++++++++++++++++ 2 files changed, 50 insertions(+), 4 deletions(-) diff --git a/src/common/core/docgen/events.py b/src/common/core/docgen/events.py index 93095a7b..20e34acb 100644 --- a/src/common/core/docgen/events.py +++ b/src/common/core/docgen/events.py @@ -1,9 +1,14 @@ import ast +import warnings from dataclasses import dataclass, field from pathlib import Path from typing import Iterator +class DocgenEventsWarning(UserWarning): + """Raised by the events scanner when a call site can't be resolved.""" + + @dataclass(frozen=True) class SourceLocation: path: Path @@ -25,7 +30,9 @@ def get_event_entries_from_source( path: Path, ) -> Iterator[EventEntry]: tree = ast.parse(source) - logger_domains = _collect_logger_domains(tree, module_dotted=module_dotted) + logger_domains = _collect_logger_domains( + tree, module_dotted=module_dotted, path=path + ) for node in ast.walk(tree): if not isinstance(node, ast.Call): @@ -34,7 +41,9 @@ def get_event_entries_from_source( yield entry -def _collect_logger_domains(tree: ast.AST, *, module_dotted: str) -> dict[str, str]: +def _collect_logger_domains( + tree: ast.AST, *, module_dotted: str, path: Path +) -> dict[str, str]: logger_domains: dict[str, str] = {} for node in ast.walk(tree): if not _is_logger_assignment(node): @@ -43,8 +52,16 @@ def _collect_logger_domains(tree: ast.AST, *, module_dotted: str) -> dict[str, s target = node.targets[0] assert isinstance(target, ast.Name) domain_arg = node.value.args[0] # type: ignore[attr-defined] - if domain := _resolve_domain(domain_arg, module_dotted=module_dotted): - logger_domains[target.id] = domain + domain = _resolve_domain(domain_arg, module_dotted=module_dotted) + if domain is None: + warnings.warn( + f"{path}:{node.lineno}: cannot statically resolve logger domain" + f" for `{target.id}`; skipping its events.", + DocgenEventsWarning, + stacklevel=2, + ) + continue + logger_domains[target.id] = domain return logger_domains diff --git a/tests/unit/common/core/test_docgen_events.py b/tests/unit/common/core/test_docgen_events.py index 40c2a036..7b93a517 100644 --- a/tests/unit/common/core/test_docgen_events.py +++ b/tests/unit/common/core/test_docgen_events.py @@ -3,6 +3,7 @@ import pytest from common.core.docgen.events import ( + DocgenEventsWarning, EventEntry, SourceLocation, get_event_entries_from_source, @@ -94,3 +95,31 @@ def test_get_event_entries_from_source__emit_log__expected_entries( # Then assert entries == expected_entries + + +def test_get_event_entries_from_source__non_resolvable_domain__warns_and_skips() -> ( + None +): + # Given + source = """\ +import structlog + +domain = "sneaky" +logger = structlog.get_logger(domain) +logger.info("something.happened") +""" + + # When + with pytest.warns(DocgenEventsWarning, match="domain") as warning_records: + entries = list( + get_event_entries_from_source( + source, + module_dotted=MODULE_DOTTED, + path=PATH, + ) + ) + + # Then + assert entries == [] + assert len(warning_records) == 1 + assert str(PATH) in str(warning_records[0].message) From ddf67a69e5cc1969addd9fea745afe7d125f18c6 Mon Sep 17 00:00:00 2001 From: Kim Gustyr Date: Fri, 17 Apr 2026 21:20:38 +0100 Subject: [PATCH 05/20] test: pin that stdlib logging.getLogger callers stay out of catalogue Fifth cycle for #201, characterization-only. The `structlog.`-prefix gate on logger assignments already excludes stdlib loggers; this test pins that contract so a future refactor can't silently widen the scan to include them. beep boop --- tests/unit/common/core/test_docgen_events.py | 25 ++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/tests/unit/common/core/test_docgen_events.py b/tests/unit/common/core/test_docgen_events.py index 7b93a517..3c780761 100644 --- a/tests/unit/common/core/test_docgen_events.py +++ b/tests/unit/common/core/test_docgen_events.py @@ -1,3 +1,4 @@ +import warnings from pathlib import Path import pytest @@ -97,6 +98,30 @@ def test_get_event_entries_from_source__emit_log__expected_entries( assert entries == expected_entries +def test_get_event_entries_from_source__stdlib_logging__ignored_silently() -> None: + # Given + source = """\ +import logging + +logger = logging.getLogger("code_references") +logger.info("scan.created", extra={"organisation": 1}) +""" + + # When + with warnings.catch_warnings(): + warnings.simplefilter("error", DocgenEventsWarning) + entries = list( + get_event_entries_from_source( + source, + module_dotted=MODULE_DOTTED, + path=PATH, + ) + ) + + # Then + assert entries == [] + + def test_get_event_entries_from_source__non_resolvable_domain__warns_and_skips() -> ( None ): From 15a302a2acd1785ac9b65e075d0f3058ff067b75 Mon Sep 17 00:00:00 2001 From: Kim Gustyr Date: Fri, 17 Apr 2026 21:23:10 +0100 Subject: [PATCH 06/20] feat: warn on dynamic structlog event names Sixth cycle for #201. An emit call whose first positional argument is not a string literal now emits `DocgenEventsWarning` with the call site rather than being silently dropped. The warning suggests the `# docgen: event=` annotation as a future escape hatch for intentional dynamic names. beep boop --- src/common/core/docgen/events.py | 8 ++++++ tests/unit/common/core/test_docgen_events.py | 26 ++++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/src/common/core/docgen/events.py b/src/common/core/docgen/events.py index 20e34acb..c7db6ee0 100644 --- a/src/common/core/docgen/events.py +++ b/src/common/core/docgen/events.py @@ -88,6 +88,14 @@ def _build_entry_from_emit_call( return None event_arg = node.args[0] if not (isinstance(event_arg, ast.Constant) and isinstance(event_arg.value, str)): + warnings.warn( + f"{path}:{node.lineno}: cannot statically resolve event name" + f" for `{value.id}.{func.attr}(...)`; skipping." + " Consider annotating the call site with a `# docgen: event=`" + " comment so the catalogue can still pick it up.", + DocgenEventsWarning, + stacklevel=2, + ) return None attributes = frozenset( kw.arg.replace("__", ".") for kw in node.keywords if kw.arg is not None diff --git a/tests/unit/common/core/test_docgen_events.py b/tests/unit/common/core/test_docgen_events.py index 3c780761..cebee35b 100644 --- a/tests/unit/common/core/test_docgen_events.py +++ b/tests/unit/common/core/test_docgen_events.py @@ -148,3 +148,29 @@ def test_get_event_entries_from_source__non_resolvable_domain__warns_and_skips() assert entries == [] assert len(warning_records) == 1 assert str(PATH) in str(warning_records[0].message) + + +def test_get_event_entries_from_source__dynamic_event_name__warns_and_skips() -> None: + # Given + source = """\ +import structlog + +logger = structlog.get_logger("code_references") +event_name = "scan." + "created" +logger.info(event_name) +""" + + # When + with pytest.warns(DocgenEventsWarning, match="event name") as warning_records: + entries = list( + get_event_entries_from_source( + source, + module_dotted=MODULE_DOTTED, + path=PATH, + ) + ) + + # Then + assert entries == [] + assert len(warning_records) == 1 + assert str(PATH) in str(warning_records[0].message) From 1eaf272fad1cd4baf564466cd37d1534d7afa595 Mon Sep 17 00:00:00 2001 From: Kim Gustyr Date: Fri, 17 Apr 2026 21:34:03 +0100 Subject: [PATCH 07/20] feat: restrict emit detection to structlog.stdlib.BoundLogger methods MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Seventh cycle for #201. Attribute calls on a tracked logger now only produce entries when the method name is one of the emission methods that `structlog.stdlib.BoundLogger` actually exposes (`debug`, `info`, `warning`, `warn`, `error`, `critical`, `fatal`, `exception`, `msg`). Other methods are ignored silently so helper calls don't contaminate the catalogue. `exception` is retained because `launch_darkly/tasks.py` — one of the five acceptance-listed domains — emits `log.exception("import-failed")` as its canonical error pattern. `log(level, msg, ...)` is excluded: its first arg is the level, not the event name. beep boop --- src/common/core/docgen/events.py | 21 ++++++++ tests/unit/common/core/test_docgen_events.py | 51 ++++++++++++++++++++ 2 files changed, 72 insertions(+) diff --git a/src/common/core/docgen/events.py b/src/common/core/docgen/events.py index c7db6ee0..407930d4 100644 --- a/src/common/core/docgen/events.py +++ b/src/common/core/docgen/events.py @@ -9,6 +9,25 @@ class DocgenEventsWarning(UserWarning): """Raised by the events scanner when a call site can't be resolved.""" +# Emission methods exposed by `structlog.stdlib.BoundLogger` whose first +# positional argument is the event name. `log` is excluded because its +# first argument is the level, not the event; `bind`/`unbind`/`new` are +# not emissions. +EMIT_METHOD_NAMES = frozenset( + { + "debug", + "info", + "warning", + "warn", + "error", + "critical", + "fatal", + "exception", + "msg", + } +) + + @dataclass(frozen=True) class SourceLocation: path: Path @@ -81,6 +100,8 @@ def _build_entry_from_emit_call( func = node.func if not isinstance(func, ast.Attribute): return None + if func.attr not in EMIT_METHOD_NAMES: + return None value = func.value if not isinstance(value, ast.Name) or value.id not in logger_domains: return None diff --git a/tests/unit/common/core/test_docgen_events.py b/tests/unit/common/core/test_docgen_events.py index cebee35b..3c9add4c 100644 --- a/tests/unit/common/core/test_docgen_events.py +++ b/tests/unit/common/core/test_docgen_events.py @@ -150,6 +150,57 @@ def test_get_event_entries_from_source__non_resolvable_domain__warns_and_skips() assert str(PATH) in str(warning_records[0].message) +@pytest.mark.parametrize( + "source, expected_entries", + [ + pytest.param( + """\ +import structlog + +logger = structlog.get_logger("code_references") +logger.exception("import.failed") +""", + [ + EventEntry( + name="code_references.import.failed", + level="exception", + attributes=frozenset(), + locations=[SourceLocation(path=PATH, line=4)], + ), + ], + id="exception-method-is-recognised", + ), + pytest.param( + """\ +import structlog + +logger = structlog.get_logger("code_references") +logger.bind_or_whatever("not.an.event") +""", + [], + id="non-level-method-is-ignored-silently", + ), + ], +) +def test_get_event_entries_from_source__level_method__allowlist_enforced( + source: str, + expected_entries: list[EventEntry], +) -> None: + # Given / When + with warnings.catch_warnings(): + warnings.simplefilter("error", DocgenEventsWarning) + entries = list( + get_event_entries_from_source( + source, + module_dotted=MODULE_DOTTED, + path=PATH, + ) + ) + + # Then + assert entries == expected_entries + + def test_get_event_entries_from_source__dynamic_event_name__warns_and_skips() -> None: # Given source = """\ From a9758f8da2d2cce31ba82565481a7485ab3d2b74 Mon Sep 17 00:00:00 2001 From: Kim Gustyr Date: Fri, 17 Apr 2026 21:45:41 +0100 Subject: [PATCH 08/20] test: unify scanner tests into one parametrised test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase A closure cycle for #201. Every scanner behaviour — positive, silently-ignored, and warn-and-skip — now lives in a single `test_get_event_entries_from_source__emit_log__expected_entries` parametrise. Each case specifies the source, expected entries, and expected `DocgenEventsWarning` instances (exact message match), so message regressions fail the suite rather than slipping through a substring match. Branch coverage on `events.py` stays at 100%. beep boop --- tests/unit/common/core/test_docgen_events.py | 219 ++++++++++--------- 1 file changed, 115 insertions(+), 104 deletions(-) diff --git a/tests/unit/common/core/test_docgen_events.py b/tests/unit/common/core/test_docgen_events.py index 3c9add4c..0b078a60 100644 --- a/tests/unit/common/core/test_docgen_events.py +++ b/tests/unit/common/core/test_docgen_events.py @@ -15,7 +15,7 @@ @pytest.mark.parametrize( - "source, expected_entries", + "source, expected_entries, expected_warnings", [ pytest.param( """\ @@ -32,6 +32,7 @@ locations=[SourceLocation(path=PATH, line=4)], ), ], + [], id="plain-info-no-kwargs", ), pytest.param( @@ -60,6 +61,7 @@ locations=[SourceLocation(path=PATH, line=4)], ), ], + [], id="kwargs-with-double-underscore-substitution", ), pytest.param( @@ -77,82 +79,9 @@ locations=[SourceLocation(path=PATH, line=4)], ), ], + [], id="dunder-name-resolves-to-module-dotted", ), - ], -) -def test_get_event_entries_from_source__emit_log__expected_entries( - source: str, - expected_entries: list[EventEntry], -) -> None: - # Given / When - entries = list( - get_event_entries_from_source( - source, - module_dotted=MODULE_DOTTED, - path=PATH, - ) - ) - - # Then - assert entries == expected_entries - - -def test_get_event_entries_from_source__stdlib_logging__ignored_silently() -> None: - # Given - source = """\ -import logging - -logger = logging.getLogger("code_references") -logger.info("scan.created", extra={"organisation": 1}) -""" - - # When - with warnings.catch_warnings(): - warnings.simplefilter("error", DocgenEventsWarning) - entries = list( - get_event_entries_from_source( - source, - module_dotted=MODULE_DOTTED, - path=PATH, - ) - ) - - # Then - assert entries == [] - - -def test_get_event_entries_from_source__non_resolvable_domain__warns_and_skips() -> ( - None -): - # Given - source = """\ -import structlog - -domain = "sneaky" -logger = structlog.get_logger(domain) -logger.info("something.happened") -""" - - # When - with pytest.warns(DocgenEventsWarning, match="domain") as warning_records: - entries = list( - get_event_entries_from_source( - source, - module_dotted=MODULE_DOTTED, - path=PATH, - ) - ) - - # Then - assert entries == [] - assert len(warning_records) == 1 - assert str(PATH) in str(warning_records[0].message) - - -@pytest.mark.parametrize( - "source, expected_entries", - [ pytest.param( """\ import structlog @@ -168,27 +97,132 @@ def test_get_event_entries_from_source__non_resolvable_domain__warns_and_skips() locations=[SourceLocation(path=PATH, line=4)], ), ], + [], id="exception-method-is-recognised", ), pytest.param( """\ import structlog +print("not a logger call") +""", + [], + [], + id="bare-function-call-ignored", + ), + pytest.param( + """\ +import structlog + +logger = structlog.get_logger("code_references") +logger.info() +""", + [], + [], + id="emit-without-event-name-ignored", + ), + pytest.param( + """\ +import structlog + +logger, spare = structlog.get_logger("a"), structlog.get_logger("b") +logger.info("scan.created") +""", + [], + [], + id="tuple-destructuring-ignored", + ), + pytest.param( + """\ +import structlog + +logger = unrelated() +logger.info("scan.created") +""", + [], + [], + id="assignment-from-non-attribute-call-ignored", + ), + pytest.param( + """\ +import structlog + +logger = factory.get_logger("code_references") +logger.info("scan.created") +""", + [], + [], + id="get_logger-on-non-structlog-ignored", + ), + pytest.param( + """\ +import logging + +logger = logging.getLogger("code_references") +logger.info("scan.created", extra={"organisation": 1}) +""", + [], + [], + id="stdlib-logging-getLogger-ignored", + ), + pytest.param( + """\ +import structlog + logger = structlog.get_logger("code_references") logger.bind_or_whatever("not.an.event") """, + [], [], id="non-level-method-is-ignored-silently", ), + pytest.param( + """\ +import structlog + +domain = "sneaky" +logger = structlog.get_logger(domain) +logger.info("something.happened") +""", + [], + [ + DocgenEventsWarning( + f"{PATH}:4: cannot statically resolve logger domain" + f" for `logger`; skipping its events." + ), + ], + id="non-resolvable-domain-warns-and-skips", + ), + pytest.param( + """\ +import structlog + +logger = structlog.get_logger("code_references") +event_name = "scan." + "created" +logger.info(event_name) +""", + [], + [ + DocgenEventsWarning( + f"{PATH}:5: cannot statically resolve event name" + f" for `logger.info(...)`; skipping." + " Consider annotating the call site with a" + " `# docgen: event=` comment so the catalogue" + " can still pick it up." + ), + ], + id="dynamic-event-name-warns-and-skips", + ), ], ) -def test_get_event_entries_from_source__level_method__allowlist_enforced( +def test_get_event_entries_from_source__emit_log__expected_entries( source: str, expected_entries: list[EventEntry], + expected_warnings: list[DocgenEventsWarning], ) -> None: # Given / When - with warnings.catch_warnings(): - warnings.simplefilter("error", DocgenEventsWarning) + with warnings.catch_warnings(record=True) as recorded: + warnings.simplefilter("always", DocgenEventsWarning) entries = list( get_event_entries_from_source( source, @@ -199,29 +233,6 @@ def test_get_event_entries_from_source__level_method__allowlist_enforced( # Then assert entries == expected_entries - - -def test_get_event_entries_from_source__dynamic_event_name__warns_and_skips() -> None: - # Given - source = """\ -import structlog - -logger = structlog.get_logger("code_references") -event_name = "scan." + "created" -logger.info(event_name) -""" - - # When - with pytest.warns(DocgenEventsWarning, match="event name") as warning_records: - entries = list( - get_event_entries_from_source( - source, - module_dotted=MODULE_DOTTED, - path=PATH, - ) - ) - - # Then - assert entries == [] - assert len(warning_records) == 1 - assert str(PATH) in str(warning_records[0].message) + assert [(w.category, str(w.message)) for w in recorded] == [ + (type(expected), str(expected)) for expected in expected_warnings + ] From 846eb062d4903b8b3787b635dd3851b8368ac541 Mon Sep 17 00:00:00 2001 From: Kim Gustyr Date: Fri, 17 Apr 2026 21:51:53 +0100 Subject: [PATCH 09/20] feat: track bind-assigned loggers and merge bound attrs into emits First cycle of phase B for #201. Introduces the internal `_LoggerScope(domain, bound_attrs)` and replaces the flat `logger_domains` map with `logger_scopes`. An assignment of the shape `log = .bind(k=v, ...)` now registers `log` as a new scope carrying the parent's domain plus the extra bound attributes. Emission calls on `log` union those bound attrs with the call's own kwargs, so a `log.info("success", response_status=200)` rendered after `log = logger.bind(sentry_action=..., feature_name=...)` captures all three attributes in the catalogue. Further phase-B cycles will cover chained binds, multi-chained binds, self-reassignment, and cross-function scope isolation. beep boop --- src/common/core/docgen/events.py | 128 ++++++++++++------- tests/unit/common/core/test_docgen_events.py | 24 ++++ 2 files changed, 106 insertions(+), 46 deletions(-) diff --git a/src/common/core/docgen/events.py b/src/common/core/docgen/events.py index 407930d4..b0823c90 100644 --- a/src/common/core/docgen/events.py +++ b/src/common/core/docgen/events.py @@ -42,6 +42,12 @@ class EventEntry: locations: list[SourceLocation] = field(default_factory=list) +@dataclass(frozen=True) +class _LoggerScope: + domain: str + bound_attrs: frozenset[str] + + def get_event_entries_from_source( source: str, *, @@ -49,39 +55,84 @@ def get_event_entries_from_source( path: Path, ) -> Iterator[EventEntry]: tree = ast.parse(source) - logger_domains = _collect_logger_domains( - tree, module_dotted=module_dotted, path=path - ) + logger_scopes = _collect_logger_scopes(tree, module_dotted=module_dotted, path=path) for node in ast.walk(tree): if not isinstance(node, ast.Call): continue - if entry := _build_entry_from_emit_call(node, logger_domains, path): + if entry := _build_entry_from_emit_call(node, logger_scopes, path): yield entry -def _collect_logger_domains( +def _collect_logger_scopes( tree: ast.AST, *, module_dotted: str, path: Path -) -> dict[str, str]: - logger_domains: dict[str, str] = {} +) -> dict[str, _LoggerScope]: + logger_scopes: dict[str, _LoggerScope] = {} for node in ast.walk(tree): - if not _is_logger_assignment(node): + if not isinstance(node, ast.Assign): continue - assert isinstance(node, ast.Assign) - target = node.targets[0] - assert isinstance(target, ast.Name) - domain_arg = node.value.args[0] # type: ignore[attr-defined] - domain = _resolve_domain(domain_arg, module_dotted=module_dotted) - if domain is None: - warnings.warn( - f"{path}:{node.lineno}: cannot statically resolve logger domain" - f" for `{target.id}`; skipping its events.", - DocgenEventsWarning, - stacklevel=2, - ) + if len(node.targets) != 1 or not isinstance(node.targets[0], ast.Name): continue - logger_domains[target.id] = domain - return logger_domains + target_id = node.targets[0].id + if scope := _resolve_seed( + node, logger_scopes=logger_scopes, module_dotted=module_dotted, path=path + ): + logger_scopes[target_id] = scope + elif scope := _resolve_bind(node, logger_scopes=logger_scopes): + logger_scopes[target_id] = scope + return logger_scopes + + +def _resolve_seed( + node: ast.Assign, + *, + logger_scopes: dict[str, _LoggerScope], + module_dotted: str, + path: Path, +) -> _LoggerScope | None: + call = node.value + if not isinstance(call, ast.Call): + return None + func = call.func + if not isinstance(func, ast.Attribute) or func.attr != "get_logger": + return None + if not isinstance(func.value, ast.Name) or func.value.id != "structlog": + return None + if not call.args: + return None + target = node.targets[0] + assert isinstance(target, ast.Name) + domain = _resolve_domain(call.args[0], module_dotted=module_dotted) + if domain is None: + warnings.warn( + f"{path}:{node.lineno}: cannot statically resolve logger domain" + f" for `{target.id}`; skipping its events.", + DocgenEventsWarning, + stacklevel=2, + ) + return None + return _LoggerScope(domain=domain, bound_attrs=frozenset()) + + +def _resolve_bind( + node: ast.Assign, + *, + logger_scopes: dict[str, _LoggerScope], +) -> _LoggerScope | None: + call = node.value + if not isinstance(call, ast.Call): + return None + func = call.func + if not isinstance(func, ast.Attribute) or func.attr != "bind": + return None + if not isinstance(func.value, ast.Name) or func.value.id not in logger_scopes: + return None + parent = logger_scopes[func.value.id] + new_attrs = _kwargs_as_attributes(call.keywords) + return _LoggerScope( + domain=parent.domain, + bound_attrs=parent.bound_attrs | new_attrs, + ) def _resolve_domain(node: ast.expr, *, module_dotted: str) -> str | None: @@ -92,9 +143,13 @@ def _resolve_domain(node: ast.expr, *, module_dotted: str) -> str | None: return None +def _kwargs_as_attributes(keywords: list[ast.keyword]) -> frozenset[str]: + return frozenset(kw.arg.replace("__", ".") for kw in keywords if kw.arg is not None) + + def _build_entry_from_emit_call( node: ast.Call, - logger_domains: dict[str, str], + logger_scopes: dict[str, _LoggerScope], path: Path, ) -> EventEntry | None: func = node.func @@ -103,7 +158,7 @@ def _build_entry_from_emit_call( if func.attr not in EMIT_METHOD_NAMES: return None value = func.value - if not isinstance(value, ast.Name) or value.id not in logger_domains: + if not isinstance(value, ast.Name) or value.id not in logger_scopes: return None if not node.args: return None @@ -118,30 +173,11 @@ def _build_entry_from_emit_call( stacklevel=2, ) return None - attributes = frozenset( - kw.arg.replace("__", ".") for kw in node.keywords if kw.arg is not None - ) + scope = logger_scopes[value.id] + attributes = scope.bound_attrs | _kwargs_as_attributes(node.keywords) return EventEntry( - name=f"{logger_domains[value.id]}.{event_arg.value}", + name=f"{scope.domain}.{event_arg.value}", level=func.attr, attributes=attributes, locations=[SourceLocation(path=path, line=node.lineno)], ) - - -def _is_logger_assignment(node: ast.AST) -> bool: - if not isinstance(node, ast.Assign): - return False - if len(node.targets) != 1 or not isinstance(node.targets[0], ast.Name): - return False - call = node.value - if not isinstance(call, ast.Call): - return False - func = call.func - if not isinstance(func, ast.Attribute): - return False - if func.attr != "get_logger": - return False - if not isinstance(func.value, ast.Name) or func.value.id != "structlog": - return False - return bool(call.args) diff --git a/tests/unit/common/core/test_docgen_events.py b/tests/unit/common/core/test_docgen_events.py index 0b078a60..868ee306 100644 --- a/tests/unit/common/core/test_docgen_events.py +++ b/tests/unit/common/core/test_docgen_events.py @@ -104,6 +104,30 @@ """\ import structlog +logger = structlog.get_logger("sentry_change_tracking") + + +def track() -> None: + log = logger.bind(sentry_action="trigger", feature_name="flag") + log.info("success", response_status=200) +""", + [ + EventEntry( + name="sentry_change_tracking.success", + level="info", + attributes=frozenset( + {"sentry_action", "feature_name", "response_status"} + ), + locations=[SourceLocation(path=PATH, line=8)], + ), + ], + [], + id="reassigned-bind-merges-attrs", + ), + pytest.param( + """\ +import structlog + print("not a logger call") """, [], From f4db7a8d1e6cb6c882ed22baeb6516ddb9174d8a Mon Sep 17 00:00:00 2001 From: Kim Gustyr Date: Fri, 17 Apr 2026 21:53:25 +0100 Subject: [PATCH 10/20] feat: handle chained bind on structlog emit calls Cycle B2 for #201. Emission calls may now target a `.bind(...)` chain rather than only a bare Name, so `logger.bind(a=1).info("e", b=2)` picks up both `a` and `b` as attributes. Works recursively, setting up multi-chain support in the next cycle. The dynamic-event-name warning now describes the chain shape (e.g. ``logger.bind(...).info(...)``) so the call-site is locatable from the message alone. beep boop --- src/common/core/docgen/events.py | 42 +++++++++++++++++--- tests/unit/common/core/test_docgen_events.py | 21 ++++++++++ 2 files changed, 57 insertions(+), 6 deletions(-) diff --git a/src/common/core/docgen/events.py b/src/common/core/docgen/events.py index b0823c90..96851911 100644 --- a/src/common/core/docgen/events.py +++ b/src/common/core/docgen/events.py @@ -157,8 +157,8 @@ def _build_entry_from_emit_call( return None if func.attr not in EMIT_METHOD_NAMES: return None - value = func.value - if not isinstance(value, ast.Name) or value.id not in logger_scopes: + scope = _scope_for_emit_target(func.value, logger_scopes) + if scope is None: return None if not node.args: return None @@ -166,14 +166,14 @@ def _build_entry_from_emit_call( if not (isinstance(event_arg, ast.Constant) and isinstance(event_arg.value, str)): warnings.warn( f"{path}:{node.lineno}: cannot statically resolve event name" - f" for `{value.id}.{func.attr}(...)`; skipping." - " Consider annotating the call site with a `# docgen: event=`" - " comment so the catalogue can still pick it up.", + f" for `{_describe_emit_target(func.value)}.{func.attr}(...)`;" + " skipping. Consider annotating the call site with a" + " `# docgen: event=` comment so the catalogue can still" + " pick it up.", DocgenEventsWarning, stacklevel=2, ) return None - scope = logger_scopes[value.id] attributes = scope.bound_attrs | _kwargs_as_attributes(node.keywords) return EventEntry( name=f"{scope.domain}.{event_arg.value}", @@ -181,3 +181,33 @@ def _build_entry_from_emit_call( attributes=attributes, locations=[SourceLocation(path=path, line=node.lineno)], ) + + +def _scope_for_emit_target( + target: ast.expr, + logger_scopes: dict[str, _LoggerScope], +) -> _LoggerScope | None: + if isinstance(target, ast.Name): + return logger_scopes.get(target.id) + if isinstance(target, ast.Call): + func = target.func + if not isinstance(func, ast.Attribute) or func.attr != "bind": + return None + parent = _scope_for_emit_target(func.value, logger_scopes) + if parent is None: + return None + return _LoggerScope( + domain=parent.domain, + bound_attrs=parent.bound_attrs | _kwargs_as_attributes(target.keywords), + ) + return None + + +def _describe_emit_target(target: ast.expr) -> str: + if isinstance(target, ast.Name): + return target.id + if isinstance(target, ast.Call): + func = target.func + if isinstance(func, ast.Attribute): + return f"{_describe_emit_target(func.value)}.{func.attr}(...)" + return "" diff --git a/tests/unit/common/core/test_docgen_events.py b/tests/unit/common/core/test_docgen_events.py index 868ee306..83a76788 100644 --- a/tests/unit/common/core/test_docgen_events.py +++ b/tests/unit/common/core/test_docgen_events.py @@ -128,6 +128,27 @@ def track() -> None: """\ import structlog +logger = structlog.get_logger("workflows") + + +def publish() -> None: + logger.bind(workflow_id=1).info("published", status="ok") +""", + [ + EventEntry( + name="workflows.published", + level="info", + attributes=frozenset({"workflow_id", "status"}), + locations=[SourceLocation(path=PATH, line=7)], + ), + ], + [], + id="chained-bind-merges-attrs", + ), + pytest.param( + """\ +import structlog + print("not a logger call") """, [], From 868262e4ca03723b0dfb358ba314ee3ef2bcb0f1 Mon Sep 17 00:00:00 2001 From: Kim Gustyr Date: Fri, 17 Apr 2026 21:54:18 +0100 Subject: [PATCH 11/20] test: pin multi-chained bind merges attrs from every hop Cycle B3 for #201, characterization-only. The recursive `_scope_for_emit_target` added in the previous cycle already handles arbitrary bind depth; this test pins that contract so `logger.bind(a=1).bind(b=2).info("e", c=3)` can't silently regress to only capturing the innermost bind. beep boop --- tests/unit/common/core/test_docgen_events.py | 21 ++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/tests/unit/common/core/test_docgen_events.py b/tests/unit/common/core/test_docgen_events.py index 83a76788..616cb6ef 100644 --- a/tests/unit/common/core/test_docgen_events.py +++ b/tests/unit/common/core/test_docgen_events.py @@ -149,6 +149,27 @@ def publish() -> None: """\ import structlog +logger = structlog.get_logger("workflows") + + +def publish() -> None: + logger.bind(workflow_id=1).bind(actor_id=2).info("published", status="ok") +""", + [ + EventEntry( + name="workflows.published", + level="info", + attributes=frozenset({"workflow_id", "actor_id", "status"}), + locations=[SourceLocation(path=PATH, line=7)], + ), + ], + [], + id="multi-chained-bind-merges-attrs", + ), + pytest.param( + """\ +import structlog + print("not a logger call") """, [], From 2cc44fe0c2986a5e7d856142488dc375ca1a6c5d Mon Sep 17 00:00:00 2001 From: Kim Gustyr Date: Fri, 17 Apr 2026 21:56:27 +0100 Subject: [PATCH 12/20] test: pin self-reassigned bind layers further attributes Cycle B4 for #201, characterization-only. `log = logger.bind(a=1); log = log.bind(b=2)` already works because `_resolve_bind` looks up the right-hand side in `logger_scopes` and the second assignment finds the previously registered `log`. This test pins that chain so a refactor can't silently drop the second bind. beep boop --- tests/unit/common/core/test_docgen_events.py | 23 ++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/tests/unit/common/core/test_docgen_events.py b/tests/unit/common/core/test_docgen_events.py index 616cb6ef..ca3d1463 100644 --- a/tests/unit/common/core/test_docgen_events.py +++ b/tests/unit/common/core/test_docgen_events.py @@ -170,6 +170,29 @@ def publish() -> None: """\ import structlog +logger = structlog.get_logger("workflows") + + +def publish() -> None: + log = logger.bind(workflow_id=1) + log = log.bind(actor_id=2) + log.info("published", status="ok") +""", + [ + EventEntry( + name="workflows.published", + level="info", + attributes=frozenset({"workflow_id", "actor_id", "status"}), + locations=[SourceLocation(path=PATH, line=9)], + ), + ], + [], + id="self-reassigned-bind-layers-further", + ), + pytest.param( + """\ +import structlog + print("not a logger call") """, [], From d79d5ab621e44e59a318073cc3371bc700be716a Mon Sep 17 00:00:00 2001 From: Kim Gustyr Date: Fri, 17 Apr 2026 22:00:00 +0100 Subject: [PATCH 13/20] feat: isolate bind scopes per function with a stack-walking visitor MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cycle B5 for #201. Replaces the single-pass flat-dict collection with an `ast.NodeVisitor` that pushes a copy of the enclosing scope on entry to a `FunctionDef`/`AsyncFunctionDef` and pops on exit. Binds inside one function no longer overwrite the same name in a sibling function — so a pair of `def`s that each do `log = logger.bind(...)` with different kwargs now produce two entries with their own attribute sets, rather than the second bind leaking into the first. Assignments and calls are processed in source order by the visitor, which also removes the two-pass design in favour of a single walk. beep boop --- src/common/core/docgen/events.py | 76 +++++++++++++------- tests/unit/common/core/test_docgen_events.py | 33 +++++++++ 2 files changed, 82 insertions(+), 27 deletions(-) diff --git a/src/common/core/docgen/events.py b/src/common/core/docgen/events.py index 96851911..ba9ce59b 100644 --- a/src/common/core/docgen/events.py +++ b/src/common/core/docgen/events.py @@ -55,38 +55,60 @@ def get_event_entries_from_source( path: Path, ) -> Iterator[EventEntry]: tree = ast.parse(source) - logger_scopes = _collect_logger_scopes(tree, module_dotted=module_dotted, path=path) - - for node in ast.walk(tree): - if not isinstance(node, ast.Call): - continue - if entry := _build_entry_from_emit_call(node, logger_scopes, path): - yield entry - - -def _collect_logger_scopes( - tree: ast.AST, *, module_dotted: str, path: Path -) -> dict[str, _LoggerScope]: - logger_scopes: dict[str, _LoggerScope] = {} - for node in ast.walk(tree): - if not isinstance(node, ast.Assign): - continue - if len(node.targets) != 1 or not isinstance(node.targets[0], ast.Name): - continue - target_id = node.targets[0].id - if scope := _resolve_seed( - node, logger_scopes=logger_scopes, module_dotted=module_dotted, path=path - ): - logger_scopes[target_id] = scope - elif scope := _resolve_bind(node, logger_scopes=logger_scopes): - logger_scopes[target_id] = scope - return logger_scopes + visitor = _ScopeVisitor(module_dotted=module_dotted, path=path) + visitor.visit(tree) + yield from visitor.entries + + +class _ScopeVisitor(ast.NodeVisitor): + """Walks the AST in source order with a stack of logger scopes. + + Entering a function body pushes a copy of the enclosing scope so + binds that happen inside the function don't leak to sibling scopes. + """ + + def __init__(self, *, module_dotted: str, path: Path) -> None: + self.module_dotted = module_dotted + self.path = path + self._scope_stack: list[dict[str, _LoggerScope]] = [{}] + self.entries: list[EventEntry] = [] + + @property + def _scope(self) -> dict[str, _LoggerScope]: + return self._scope_stack[-1] + + def visit_FunctionDef(self, node: ast.FunctionDef) -> None: + self._scope_stack.append(dict(self._scope)) + self.generic_visit(node) + self._scope_stack.pop() + + def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef) -> None: + self._scope_stack.append(dict(self._scope)) + self.generic_visit(node) + self._scope_stack.pop() + + def visit_Assign(self, node: ast.Assign) -> None: + if len(node.targets) == 1 and isinstance(node.targets[0], ast.Name): + target_id = node.targets[0].id + if scope := _resolve_seed( + node, + module_dotted=self.module_dotted, + path=self.path, + ): + self._scope[target_id] = scope + elif scope := _resolve_bind(node, logger_scopes=self._scope): + self._scope[target_id] = scope + self.generic_visit(node) + + def visit_Call(self, node: ast.Call) -> None: + if entry := _build_entry_from_emit_call(node, self._scope, self.path): + self.entries.append(entry) + self.generic_visit(node) def _resolve_seed( node: ast.Assign, *, - logger_scopes: dict[str, _LoggerScope], module_dotted: str, path: Path, ) -> _LoggerScope | None: diff --git a/tests/unit/common/core/test_docgen_events.py b/tests/unit/common/core/test_docgen_events.py index ca3d1463..5c6f1204 100644 --- a/tests/unit/common/core/test_docgen_events.py +++ b/tests/unit/common/core/test_docgen_events.py @@ -193,6 +193,39 @@ def publish() -> None: """\ import structlog +logger = structlog.get_logger("workflows") + + +def publish_one() -> None: + log = logger.bind(one_attr=1) + log.info("evt.one", kwarg_one=True) + + +def publish_two() -> None: + log = logger.bind(two_attr=2) + log.info("evt.two", kwarg_two=True) +""", + [ + EventEntry( + name="workflows.evt.one", + level="info", + attributes=frozenset({"one_attr", "kwarg_one"}), + locations=[SourceLocation(path=PATH, line=8)], + ), + EventEntry( + name="workflows.evt.two", + level="info", + attributes=frozenset({"two_attr", "kwarg_two"}), + locations=[SourceLocation(path=PATH, line=13)], + ), + ], + [], + id="binds-in-different-functions-do-not-leak", + ), + pytest.param( + """\ +import structlog + print("not a logger call") """, [], From 49a1a1b46422b65839be19471bf2fc1ed232c3c7 Mon Sep 17 00:00:00 2001 From: Kim Gustyr Date: Fri, 17 Apr 2026 22:13:45 +0100 Subject: [PATCH 14/20] feat: register domainless structlog loggers in the catalogue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase B closure for #201. `structlog.get_logger()` (no args) is not a skippable case — the OTel structlog processor in `common.core.otel` emits those events without a namespace prefix (see the `event_dict.get("logger")` branch there), so the scanner registers them with an empty domain and emits entries whose name is the bare event string. Also adds the six other parametrised cases needed to close Phase B branch coverage: a bind inside `async def`, a dynamic event name rendered through the chain describer (warning text reads ``logger.bind(...).info(...)``), bind on an untracked name, chain with a non-bind middle attribute, chain whose base isn't a tracked logger, and a subscript target that should just be ignored. The unreachable `` fallback in `_describe_emit_target` goes too — scope resolution already narrows the input to a Name or a bind-chain Call, so the helper asserts that invariant rather than papering over it. Branch coverage on `events.py` back to 100% before phase C (cross-site merging). beep boop --- src/common/core/docgen/events.py | 16 +-- tests/unit/common/core/test_docgen_events.py | 103 +++++++++++++++++++ 2 files changed, 112 insertions(+), 7 deletions(-) diff --git a/src/common/core/docgen/events.py b/src/common/core/docgen/events.py index ba9ce59b..dda9e84d 100644 --- a/src/common/core/docgen/events.py +++ b/src/common/core/docgen/events.py @@ -121,7 +121,7 @@ def _resolve_seed( if not isinstance(func.value, ast.Name) or func.value.id != "structlog": return None if not call.args: - return None + return _LoggerScope(domain="", bound_attrs=frozenset()) target = node.targets[0] assert isinstance(target, ast.Name) domain = _resolve_domain(call.args[0], module_dotted=module_dotted) @@ -197,8 +197,9 @@ def _build_entry_from_emit_call( ) return None attributes = scope.bound_attrs | _kwargs_as_attributes(node.keywords) + name = f"{scope.domain}.{event_arg.value}" if scope.domain else event_arg.value return EventEntry( - name=f"{scope.domain}.{event_arg.value}", + name=name, level=func.attr, attributes=attributes, locations=[SourceLocation(path=path, line=node.lineno)], @@ -228,8 +229,9 @@ def _scope_for_emit_target( def _describe_emit_target(target: ast.expr) -> str: if isinstance(target, ast.Name): return target.id - if isinstance(target, ast.Call): - func = target.func - if isinstance(func, ast.Attribute): - return f"{_describe_emit_target(func.value)}.{func.attr}(...)" - return "" + # `_scope_for_emit_target` already narrowed `target` to a bind-chain Call + # before any caller reaches this helper. + assert isinstance(target, ast.Call) + func = target.func + assert isinstance(func, ast.Attribute) + return f"{_describe_emit_target(func.value)}.{func.attr}(...)" diff --git a/tests/unit/common/core/test_docgen_events.py b/tests/unit/common/core/test_docgen_events.py index 5c6f1204..5ff6af36 100644 --- a/tests/unit/common/core/test_docgen_events.py +++ b/tests/unit/common/core/test_docgen_events.py @@ -226,6 +226,109 @@ def publish_two() -> None: """\ import structlog +logger = structlog.get_logger("workflows") + + +async def publish() -> None: + log = logger.bind(workflow_id=1) + log.info("published", status="ok") +""", + [ + EventEntry( + name="workflows.published", + level="info", + attributes=frozenset({"workflow_id", "status"}), + locations=[SourceLocation(path=PATH, line=8)], + ), + ], + [], + id="bind-inside-async-function", + ), + pytest.param( + """\ +import structlog + +logger = structlog.get_logger("code_references") +event_name = "dyn" +logger.bind(a=1).info(event_name) +""", + [], + [ + DocgenEventsWarning( + f"{PATH}:5: cannot statically resolve event name" + f" for `logger.bind(...).info(...)`; skipping." + " Consider annotating the call site with a" + " `# docgen: event=` comment so the catalogue" + " can still pick it up." + ), + ], + id="dynamic-event-name-in-chain-describes-chain", + ), + pytest.param( + """\ +import structlog + +logger = structlog.get_logger() +logger.info("evt", x=1) +""", + [ + EventEntry( + name="evt", + level="info", + attributes=frozenset({"x"}), + locations=[SourceLocation(path=PATH, line=4)], + ), + ], + [], + id="structlog-get_logger-no-args-produces-domainless-entry", + ), + pytest.param( + """\ +import structlog + +log = unknown.bind(x=1) +log.info("evt") +""", + [], + [], + id="bind-on-untracked-name-ignored", + ), + pytest.param( + """\ +import structlog + +logger = structlog.get_logger("code_references") +logger.unbind("x").info("evt", y=1) +""", + [], + [], + id="chain-with-non-bind-attr-ignored", + ), + pytest.param( + """\ +import structlog + +unknown.bind(a=1).info("evt") +""", + [], + [], + id="chain-bind-on-untracked-ignored", + ), + pytest.param( + """\ +import structlog + +loggers = {} +loggers["x"].info("evt") +""", + [], + [], + id="subscript-target-ignored", + ), + pytest.param( + """\ +import structlog + print("not a logger call") """, [], From 401c2a2dfdf3852c9d597b4e9833ff9157c9d1d8 Mon Sep 17 00:00:00 2001 From: Kim Gustyr Date: Fri, 17 Apr 2026 22:27:59 +0100 Subject: [PATCH 15/20] feat: register class methods that return bound loggers Handles the common flagsmith pattern where a class method is used as a bound-logger accessor: class Worker: def logger(self, project_id): return logger.bind(project__id=project_id) def do_work(self, project_id): self.logger(project_id=project_id).info("project.worked") `visit_ClassDef` pre-scans the body for methods whose only non-docstring statement is `return .bind(...)` (or a resolvable bind chain) and registers the method name in a per-class scope. `_scope_for_emit_target` learns to recognise `self.` and `self.(...)` (also `cls.` for classmethods) and resolves them against that class scope. The emit's kwargs still union with the method's bound attrs. Limitations called out in later cycles: no cross-class inheritance yet (C-edge-2 adds same-file inheritance), no cross-file (won't ship in v1), no warning for unresolvable self/cls emits yet (C-edge-3). beep boop --- src/common/core/docgen/events.py | 81 ++++++++++++++++++-- tests/unit/common/core/test_docgen_events.py | 30 ++++++++ 2 files changed, 105 insertions(+), 6 deletions(-) diff --git a/src/common/core/docgen/events.py b/src/common/core/docgen/events.py index dda9e84d..b8e07f8e 100644 --- a/src/common/core/docgen/events.py +++ b/src/common/core/docgen/events.py @@ -71,12 +71,27 @@ def __init__(self, *, module_dotted: str, path: Path) -> None: self.module_dotted = module_dotted self.path = path self._scope_stack: list[dict[str, _LoggerScope]] = [{}] + self._class_stack: list[dict[str, _LoggerScope]] = [] self.entries: list[EventEntry] = [] @property def _scope(self) -> dict[str, _LoggerScope]: return self._scope_stack[-1] + @property + def _class_scope(self) -> dict[str, _LoggerScope] | None: + return self._class_stack[-1] if self._class_stack else None + + def visit_ClassDef(self, node: ast.ClassDef) -> None: + class_scope: dict[str, _LoggerScope] = {} + for stmt in node.body: + if isinstance(stmt, (ast.FunctionDef, ast.AsyncFunctionDef)): + if accessor := _resolve_method_accessor(stmt, outer_scopes=self._scope): + class_scope[stmt.name] = accessor + self._class_stack.append(class_scope) + self.generic_visit(node) + self._class_stack.pop() + def visit_FunctionDef(self, node: ast.FunctionDef) -> None: self._scope_stack.append(dict(self._scope)) self.generic_visit(node) @@ -101,7 +116,9 @@ def visit_Assign(self, node: ast.Assign) -> None: self.generic_visit(node) def visit_Call(self, node: ast.Call) -> None: - if entry := _build_entry_from_emit_call(node, self._scope, self.path): + if entry := _build_entry_from_emit_call( + node, self._scope, self.path, class_scope=self._class_scope + ): self.entries.append(entry) self.generic_visit(node) @@ -173,13 +190,15 @@ def _build_entry_from_emit_call( node: ast.Call, logger_scopes: dict[str, _LoggerScope], path: Path, + *, + class_scope: dict[str, _LoggerScope] | None = None, ) -> EventEntry | None: func = node.func if not isinstance(func, ast.Attribute): return None if func.attr not in EMIT_METHOD_NAMES: return None - scope = _scope_for_emit_target(func.value, logger_scopes) + scope = _scope_for_emit_target(func.value, logger_scopes, class_scope=class_scope) if scope is None: return None if not node.args: @@ -209,14 +228,38 @@ def _build_entry_from_emit_call( def _scope_for_emit_target( target: ast.expr, logger_scopes: dict[str, _LoggerScope], + *, + class_scope: dict[str, _LoggerScope] | None = None, ) -> _LoggerScope | None: if isinstance(target, ast.Name): return logger_scopes.get(target.id) + if isinstance(target, ast.Attribute): + # `self.` — method/property accessor on the enclosing class. + if ( + class_scope is not None + and isinstance(target.value, ast.Name) + and target.value.id in _SELF_OR_CLS + and target.attr in class_scope + ): + return class_scope[target.attr] + return None if isinstance(target, ast.Call): func = target.func - if not isinstance(func, ast.Attribute) or func.attr != "bind": + if not isinstance(func, ast.Attribute): return None - parent = _scope_for_emit_target(func.value, logger_scopes) + # `self.(...)` — method accessor invocation. + if ( + class_scope is not None + and isinstance(func.value, ast.Name) + and func.value.id in _SELF_OR_CLS + and func.attr in class_scope + ): + return class_scope[func.attr] + if func.attr != "bind": + return None + parent = _scope_for_emit_target( + func.value, logger_scopes, class_scope=class_scope + ) if parent is None: return None return _LoggerScope( @@ -226,11 +269,37 @@ def _scope_for_emit_target( return None +_SELF_OR_CLS = frozenset({"self", "cls"}) + + +def _resolve_method_accessor( + func_def: ast.FunctionDef | ast.AsyncFunctionDef, + *, + outer_scopes: dict[str, _LoggerScope], +) -> _LoggerScope | None: + """Return a scope for a method that just returns a bound logger.""" + body = list(func_def.body) + # Allow a leading docstring. + if ( + body + and isinstance(body[0], ast.Expr) + and isinstance(body[0].value, ast.Constant) + and isinstance(body[0].value.value, str) + ): + body = body[1:] + if len(body) != 1: + return None + stmt = body[0] + if not isinstance(stmt, ast.Return) or stmt.value is None: + return None + return _scope_for_emit_target(stmt.value, outer_scopes) + + def _describe_emit_target(target: ast.expr) -> str: if isinstance(target, ast.Name): return target.id - # `_scope_for_emit_target` already narrowed `target` to a bind-chain Call - # before any caller reaches this helper. + if isinstance(target, ast.Attribute): + return f"{_describe_emit_target(target.value)}.{target.attr}" assert isinstance(target, ast.Call) func = target.func assert isinstance(func, ast.Attribute) diff --git a/tests/unit/common/core/test_docgen_events.py b/tests/unit/common/core/test_docgen_events.py index 5ff6af36..54d9406d 100644 --- a/tests/unit/common/core/test_docgen_events.py +++ b/tests/unit/common/core/test_docgen_events.py @@ -438,6 +438,36 @@ async def publish() -> None: ], id="dynamic-event-name-warns-and-skips", ), + pytest.param( + """\ +import structlog + +logger = structlog.get_logger("projects") + + +class ProjectWorker: + organisation_id: str + + def logger(self, project_id: str) -> structlog.BoundLogger: + return logger.bind( + organisation__id=self.organisation_id, + project__id=project_id, + ) + + def do_work(self, project_id: str) -> None: + self.logger(project_id=project_id).info("project.worked") +""", + [ + EventEntry( + name="projects.project.worked", + level="info", + attributes=frozenset({"organisation.id", "project.id"}), + locations=[SourceLocation(path=PATH, line=16)], + ), + ], + [], + id="self-method-accessor-resolves-via-class-scope", + ), ], ) def test_get_event_entries_from_source__emit_log__expected_entries( From a31fcc440540c1713bac18c09d581ca8c05bcd9a Mon Sep 17 00:00:00 2001 From: Kim Gustyr Date: Fri, 17 Apr 2026 22:33:07 +0100 Subject: [PATCH 16/20] feat: same-file inheritance + classmethods + unresolvable self/cls warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extends class-scope method-accessor support: * `visit_ClassDef` registers each class in a module-level `{class_name: class_scope}` map and, when a child class's bases include a same-file Name-typed parent, inherits that parent's accessors (child's own methods override). * `cls` is already treated like `self` for accessor lookup (classmethod-friendly). * Property-style `self..info(...)` access now resolves through the same class scope as the call-form `self.(...).info(...)`. * Methods with leading docstrings register as accessors. * Unresolved `self..("literal", ...)` or `self.(...).("literal", ...)` emissions now emit a `DocgenEventsWarning` pointing the engineer at the inline-bind or move-accessor-to-same-file escape hatches. Cross-file inheritance is deliberately out of scope for v1 — the warning ensures those misses aren't silent. beep boop --- src/common/core/docgen/events.py | 31 ++++ tests/unit/common/core/test_docgen_events.py | 161 +++++++++++++++++++ 2 files changed, 192 insertions(+) diff --git a/src/common/core/docgen/events.py b/src/common/core/docgen/events.py index b8e07f8e..e087b474 100644 --- a/src/common/core/docgen/events.py +++ b/src/common/core/docgen/events.py @@ -72,6 +72,7 @@ def __init__(self, *, module_dotted: str, path: Path) -> None: self.path = path self._scope_stack: list[dict[str, _LoggerScope]] = [{}] self._class_stack: list[dict[str, _LoggerScope]] = [] + self._module_classes: dict[str, dict[str, _LoggerScope]] = {} self.entries: list[EventEntry] = [] @property @@ -84,10 +85,17 @@ def _class_scope(self) -> dict[str, _LoggerScope] | None: def visit_ClassDef(self, node: ast.ClassDef) -> None: class_scope: dict[str, _LoggerScope] = {} + # Own methods take precedence — register them first. for stmt in node.body: if isinstance(stmt, (ast.FunctionDef, ast.AsyncFunctionDef)): if accessor := _resolve_method_accessor(stmt, outer_scopes=self._scope): class_scope[stmt.name] = accessor + # Inherit from same-file parents declared earlier (Name-typed bases only). + for base in node.bases: + if isinstance(base, ast.Name) and base.id in self._module_classes: + for method_name, method_scope in self._module_classes[base.id].items(): + class_scope.setdefault(method_name, method_scope) + self._module_classes[node.name] = class_scope self._class_stack.append(class_scope) self.generic_visit(node) self._class_stack.pop() @@ -200,6 +208,16 @@ def _build_entry_from_emit_call( return None scope = _scope_for_emit_target(func.value, logger_scopes, class_scope=class_scope) if scope is None: + if accessor_name := _self_cls_accessor_name(func.value): + warnings.warn( + f"{path}:{node.lineno}: cannot resolve" + f" `{_describe_emit_target(func.value)}.{func.attr}(...)`:" + f" `{accessor_name}` isn't a tracked accessor on this class" + " or any same-file parent. Consider inlining the bind at the" + " call site or moving the accessor into this file.", + DocgenEventsWarning, + stacklevel=2, + ) return None if not node.args: return None @@ -272,6 +290,19 @@ def _scope_for_emit_target( _SELF_OR_CLS = frozenset({"self", "cls"}) +def _self_cls_accessor_name(target: ast.expr) -> str | None: + """Name of the accessor in a `self.` / `cls.(...)` emit shape, else None.""" + if isinstance(target, ast.Attribute): + if isinstance(target.value, ast.Name) and target.value.id in _SELF_OR_CLS: + return target.attr + if isinstance(target, ast.Call): + func = target.func + if isinstance(func, ast.Attribute) and isinstance(func.value, ast.Name): + if func.value.id in _SELF_OR_CLS: + return func.attr + return None + + def _resolve_method_accessor( func_def: ast.FunctionDef | ast.AsyncFunctionDef, *, diff --git a/tests/unit/common/core/test_docgen_events.py b/tests/unit/common/core/test_docgen_events.py index 54d9406d..f75d4887 100644 --- a/tests/unit/common/core/test_docgen_events.py +++ b/tests/unit/common/core/test_docgen_events.py @@ -468,6 +468,167 @@ def do_work(self, project_id: str) -> None: [], id="self-method-accessor-resolves-via-class-scope", ), + pytest.param( + """\ +import structlog + +logger = structlog.get_logger("projects") + + +class BaseWorker: + organisation_id: str + + def logger(self, project_id: str) -> structlog.BoundLogger: + return logger.bind( + organisation__id=self.organisation_id, + project__id=project_id, + ) + + +class ProjectWorker(BaseWorker): + def do_work(self, project_id: str) -> None: + self.logger(project_id=project_id).info("project.worked") +""", + [ + EventEntry( + name="projects.project.worked", + level="info", + attributes=frozenset({"organisation.id", "project.id"}), + locations=[SourceLocation(path=PATH, line=18)], + ), + ], + [], + id="self-method-accessor-inherited-from-same-file-parent", + ), + pytest.param( + """\ +import structlog + + +class Worker: + def do_work(self) -> None: + self.logger.info("project.worked") + self.logger_factory().info("project.factoried") +""", + [], + [ + DocgenEventsWarning( + f"{PATH}:6: cannot resolve `self.logger.info(...)`:" + " `logger` isn't a tracked accessor on this class or any" + " same-file parent. Consider inlining the bind at the" + " call site or moving the accessor into this file." + ), + DocgenEventsWarning( + f"{PATH}:7: cannot resolve" + " `self.logger_factory(...).info(...)`:" + " `logger_factory` isn't a tracked accessor on this class" + " or any same-file parent. Consider inlining the bind at" + " the call site or moving the accessor into this file." + ), + ], + id="unresolvable-self-emit-warns-and-skips", + ), + pytest.param( + """\ +import structlog + +logger = structlog.get_logger("projects") + + +class ProjectWorker: + @classmethod + def logger(cls, project_id: str) -> structlog.BoundLogger: + return logger.bind(project__id=project_id) + + @classmethod + def do_work(cls, project_id: str) -> None: + cls.logger(project_id=project_id).info("project.worked") +""", + [ + EventEntry( + name="projects.project.worked", + level="info", + attributes=frozenset({"project.id"}), + locations=[SourceLocation(path=PATH, line=13)], + ), + ], + [], + id="cls-method-accessor-resolves-via-class-scope", + ), + pytest.param( + """\ +import structlog + +logger = structlog.get_logger("projects") + + +class ProjectWorker: + @property + def logger(self) -> structlog.BoundLogger: + \"\"\"Accessor with a docstring.\"\"\" + return logger.bind(worker="project") + + def do_work(self) -> None: + self.logger.info("project.worked") +""", + [ + EventEntry( + name="projects.project.worked", + level="info", + attributes=frozenset({"worker"}), + locations=[SourceLocation(path=PATH, line=13)], + ), + ], + [], + id="property-accessor-with-docstring-resolves-via-class-scope", + ), + pytest.param( + """\ +import structlog + +foo.bar.info("evt") +""", + [], + [], + id="non-self-attribute-chain-ignored", + ), + pytest.param( + """\ +import structlog + +some_factory().info("evt") +""", + [], + [], + id="non-attribute-func-target-ignored", + ), + pytest.param( + """\ +import structlog + +logger = structlog.get_logger("projects") + + +class Mixin: + def logger(self) -> structlog.BoundLogger: + return logger.bind(from_mixin=1) + + +class ProjectWorker(UnknownExternalBase, Mixin): + def do_work(self) -> None: + self.logger().info("project.worked") +""", + [ + EventEntry( + name="projects.project.worked", + level="info", + attributes=frozenset({"from_mixin"}), + locations=[SourceLocation(path=PATH, line=13)], + ), + ], + [], + id="multiple-bases-skip-unknown-and-inherit-from-known", + ), ], ) def test_get_event_entries_from_source__emit_log__expected_entries( From 6de4b08e47724d81bfefe986d208610a0b361f5a Mon Sep 17 00:00:00 2001 From: Kim Gustyr Date: Fri, 17 Apr 2026 22:36:53 +0100 Subject: [PATCH 17/20] feat: merge event entries across call sites MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds `merge_event_entries(entries)` which collapses entries sharing an event name: unions the attribute sets, concatenates the locations, keeps the first-seen log level, and returns the result sorted alphabetically by event name. Diverging log levels for the same event name emit a `DocgenEventsWarning` naming both call sites and the level that won, so the engineer can reconcile the emission sites. Sets up phase D (filesystem walk) — which will feed per-file entries through this merger to produce the single, deduplicated catalogue the docgen command renders. beep boop --- src/common/core/docgen/events.py | 35 ++++- tests/unit/common/core/test_docgen_events.py | 136 +++++++++++++++++++ 2 files changed, 170 insertions(+), 1 deletion(-) diff --git a/src/common/core/docgen/events.py b/src/common/core/docgen/events.py index e087b474..10e24a52 100644 --- a/src/common/core/docgen/events.py +++ b/src/common/core/docgen/events.py @@ -2,7 +2,7 @@ import warnings from dataclasses import dataclass, field from pathlib import Path -from typing import Iterator +from typing import Iterable, Iterator class DocgenEventsWarning(UserWarning): @@ -48,6 +48,39 @@ class _LoggerScope: bound_attrs: frozenset[str] +def merge_event_entries(entries: Iterable[EventEntry]) -> list[EventEntry]: + """Collapse entries sharing an event name: union attributes and locations. + + Diverging log levels trigger a `DocgenEventsWarning`; the first-seen level + wins. Output is sorted alphabetically by event name. + """ + merged: dict[str, EventEntry] = {} + for entry in entries: + if existing := merged.get(entry.name): + if entry.level != existing.level: + original_location = existing.locations[0] + new_location = entry.locations[0] + warnings.warn( + f"`{entry.name}` is emitted at diverging log levels:" + f" `{existing.level}` at {original_location.path}:{original_location.line}," + f" `{entry.level}` at {new_location.path}:{new_location.line}." + f" Keeping first-seen level `{existing.level}`; reconcile" + " the emission sites to silence this warning.", + DocgenEventsWarning, + stacklevel=2, + ) + existing.attributes = existing.attributes | entry.attributes + existing.locations = existing.locations + entry.locations + else: + merged[entry.name] = EventEntry( + name=entry.name, + level=entry.level, + attributes=entry.attributes, + locations=list(entry.locations), + ) + return sorted(merged.values(), key=lambda e: e.name) + + def get_event_entries_from_source( source: str, *, diff --git a/tests/unit/common/core/test_docgen_events.py b/tests/unit/common/core/test_docgen_events.py index f75d4887..f3007e9e 100644 --- a/tests/unit/common/core/test_docgen_events.py +++ b/tests/unit/common/core/test_docgen_events.py @@ -8,6 +8,7 @@ EventEntry, SourceLocation, get_event_entries_from_source, + merge_event_entries, ) PATH = Path("projects/code_references/views.py") @@ -652,3 +653,138 @@ def test_get_event_entries_from_source__emit_log__expected_entries( assert [(w.category, str(w.message)) for w in recorded] == [ (type(expected), str(expected)) for expected in expected_warnings ] + + +OTHER_PATH = Path("projects/other.py") + + +@pytest.mark.parametrize( + "entries, expected_merged, expected_warnings", + [ + pytest.param( + [ + EventEntry( + name="code_references.scan.created", + level="info", + attributes=frozenset({"organisation.id"}), + locations=[SourceLocation(path=PATH, line=10)], + ), + EventEntry( + name="code_references.scan.created", + level="info", + attributes=frozenset({"code_references.count"}), + locations=[SourceLocation(path=OTHER_PATH, line=20)], + ), + ], + [ + EventEntry( + name="code_references.scan.created", + level="info", + attributes=frozenset({"organisation.id", "code_references.count"}), + locations=[ + SourceLocation(path=PATH, line=10), + SourceLocation(path=OTHER_PATH, line=20), + ], + ), + ], + [], + id="same-event-two-sites-collapses-with-union-attrs", + ), + pytest.param( + [ + EventEntry( + name="code_references.scan.created", + level="info", + attributes=frozenset(), + locations=[SourceLocation(path=PATH, line=10)], + ), + EventEntry( + name="code_references.scan.created", + level="debug", + attributes=frozenset(), + locations=[SourceLocation(path=OTHER_PATH, line=20)], + ), + ], + [ + EventEntry( + name="code_references.scan.created", + level="info", + attributes=frozenset(), + locations=[ + SourceLocation(path=PATH, line=10), + SourceLocation(path=OTHER_PATH, line=20), + ], + ), + ], + [ + DocgenEventsWarning( + "`code_references.scan.created` is emitted at diverging" + f" log levels: `info` at {PATH}:10," + f" `debug` at {OTHER_PATH}:20." + " Keeping first-seen level `info`; reconcile the emission" + " sites to silence this warning." + ), + ], + id="diverging-level-warns-and-first-wins", + ), + pytest.param( + [ + EventEntry( + name="zulu", + level="info", + attributes=frozenset(), + locations=[SourceLocation(path=PATH, line=3)], + ), + EventEntry( + name="alpha", + level="info", + attributes=frozenset(), + locations=[SourceLocation(path=PATH, line=1)], + ), + EventEntry( + name="mike", + level="info", + attributes=frozenset(), + locations=[SourceLocation(path=PATH, line=2)], + ), + ], + [ + EventEntry( + name="alpha", + level="info", + attributes=frozenset(), + locations=[SourceLocation(path=PATH, line=1)], + ), + EventEntry( + name="mike", + level="info", + attributes=frozenset(), + locations=[SourceLocation(path=PATH, line=2)], + ), + EventEntry( + name="zulu", + level="info", + attributes=frozenset(), + locations=[SourceLocation(path=PATH, line=3)], + ), + ], + [], + id="output-sorted-alphabetically-by-event-name", + ), + ], +) +def test_merge_event_entries__various_entries__expected_merged_and_warnings( + entries: list[EventEntry], + expected_merged: list[EventEntry], + expected_warnings: list[DocgenEventsWarning], +) -> None: + # Given / When + with warnings.catch_warnings(record=True) as recorded: + warnings.simplefilter("always", DocgenEventsWarning) + merged = merge_event_entries(entries) + + # Then + assert merged == expected_merged + assert [(w.category, str(w.message)) for w in recorded] == [ + (type(expected), str(expected)) for expected in expected_warnings + ] From 4b5e9da9cb23a489b6c2996ec69c587e132f704b Mon Sep 17 00:00:00 2001 From: Kim Gustyr Date: Fri, 17 Apr 2026 22:42:57 +0100 Subject: [PATCH 18/20] feat: walk an app tree and yield every scanned event entry MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds `get_event_entries_from_tree(root, *, app_label, module_prefix)` which rglobs every `*.py` under `root`, computes the module dotted path for each file (via `module_prefix`), and pipes it through `get_event_entries_from_source`. Skips: - `migrations/` and `tests/` anywhere in the path. - `conftest.py` and `test_*.py` at any depth. - `management/commands/` unless `app_label == "task_processor"` — the task processor app emits operationally important runner-loop events from that directory. Combined with `merge_event_entries` and the per-source scanner, this is everything the docgen management command needs from the scanner layer. Phase F next: template + `flagsmith docgen events` subcommand + integration snapshot test. beep boop --- src/common/core/docgen/events.py | 46 ++++++ tests/unit/common/core/test_docgen_events.py | 161 +++++++++++++++++++ 2 files changed, 207 insertions(+) diff --git a/src/common/core/docgen/events.py b/src/common/core/docgen/events.py index 10e24a52..0a2fee91 100644 --- a/src/common/core/docgen/events.py +++ b/src/common/core/docgen/events.py @@ -48,6 +48,52 @@ class _LoggerScope: bound_attrs: frozenset[str] +_EXCLUDED_DIR_NAMES = frozenset({"migrations", "tests"}) +_EXCLUDED_MANAGEMENT_DIR = ("management", "commands") +_TASK_PROCESSOR_APP_LABEL = "task_processor" + + +def get_event_entries_from_tree( + root: Path, + *, + app_label: str, + module_prefix: str, +) -> Iterator[EventEntry]: + """Walk every `*.py` under `root` and yield its scanned event entries. + + Skips `migrations/`, `tests/`, `conftest.py`, and `test_*.py`. Also skips + `management/commands/` unless `app_label == "task_processor"`, where the + runner loop's events are operationally important. + """ + for file_path in sorted(root.rglob("*.py")): + if _should_skip(file_path.relative_to(root), app_label=app_label): + continue + rel_parts = file_path.relative_to(root).with_suffix("").parts + module_dotted = ".".join((module_prefix, *rel_parts)) + yield from get_event_entries_from_source( + file_path.read_text(), + module_dotted=module_dotted, + path=file_path, + ) + + +def _should_skip(relative: Path, *, app_label: str) -> bool: + parts = relative.parts + if any(part in _EXCLUDED_DIR_NAMES for part in parts[:-1]): + return True + filename = parts[-1] + if filename == "conftest.py" or filename.startswith("test_"): + return True + if ( + len(parts) >= 3 + and parts[0] == _EXCLUDED_MANAGEMENT_DIR[0] + and parts[1] == _EXCLUDED_MANAGEMENT_DIR[1] + and app_label != _TASK_PROCESSOR_APP_LABEL + ): + return True + return False + + def merge_event_entries(entries: Iterable[EventEntry]) -> list[EventEntry]: """Collapse entries sharing an event name: union attributes and locations. diff --git a/tests/unit/common/core/test_docgen_events.py b/tests/unit/common/core/test_docgen_events.py index f3007e9e..77074305 100644 --- a/tests/unit/common/core/test_docgen_events.py +++ b/tests/unit/common/core/test_docgen_events.py @@ -1,13 +1,16 @@ import warnings from pathlib import Path +from unittest.mock import ANY import pytest +from pyfakefs.fake_filesystem import FakeFilesystem from common.core.docgen.events import ( DocgenEventsWarning, EventEntry, SourceLocation, get_event_entries_from_source, + get_event_entries_from_tree, merge_event_entries, ) @@ -788,3 +791,161 @@ def test_merge_event_entries__various_entries__expected_merged_and_warnings( assert [(w.category, str(w.message)) for w in recorded] == [ (type(expected), str(expected)) for expected in expected_warnings ] + + +ROOT = Path("/fake/myapp") + + +@pytest.mark.parametrize( + "tree, app_label, expected_entries", + [ + pytest.param( + { + "views.py": """\ +import structlog +logger = structlog.get_logger("myapp") +logger.info("viewed") +""", + "nested/models.py": """\ +import structlog +logger = structlog.get_logger("myapp") +logger.info("modeled") +""", + }, + "myapp", + [ + EventEntry( + name="myapp.modeled", + level=ANY, + attributes=ANY, + locations=[ + SourceLocation(path=ROOT / "nested/models.py", line=3), + ], + ), + EventEntry( + name="myapp.viewed", + level=ANY, + attributes=ANY, + locations=[SourceLocation(path=ROOT / "views.py", line=3)], + ), + ], + id="walks-all-py-files-across-subdirs", + ), + pytest.param( + { + "views.py": """\ +import structlog +logger = structlog.get_logger("myapp") +logger.info("included") +""", + "migrations/0001_initial.py": """\ +import structlog +logger = structlog.get_logger("myapp") +logger.info("from_migration") +""", + "tests/test_views.py": """\ +import structlog +logger = structlog.get_logger("myapp") +logger.info("from_test_subdir") +""", + "conftest.py": """\ +import structlog +logger = structlog.get_logger("myapp") +logger.info("from_conftest") +""", + "test_thing.py": """\ +import structlog +logger = structlog.get_logger("myapp") +logger.info("from_test_module") +""", + }, + "myapp", + [ + EventEntry( + name="myapp.included", + level=ANY, + attributes=ANY, + locations=[SourceLocation(path=ROOT / "views.py", line=3)], + ), + ], + id="skips-migrations-tests-conftest-test-modules", + ), + pytest.param( + { + "views.py": """\ +import structlog +logger = structlog.get_logger("myapp") +logger.info("viewed") +""", + "management/commands/do_thing.py": """\ +import structlog +logger = structlog.get_logger("myapp") +logger.info("from_command") +""", + }, + "myapp", + [ + EventEntry( + name="myapp.viewed", + level=ANY, + attributes=ANY, + locations=[SourceLocation(path=ROOT / "views.py", line=3)], + ), + ], + id="skips-management-commands-by-default", + ), + pytest.param( + { + "views.py": """\ +import structlog +logger = structlog.get_logger("task_processor") +logger.info("setup") +""", + "management/commands/run_processor.py": """\ +import structlog +logger = structlog.get_logger("task_processor") +logger.info("task.dispatched") +""", + }, + "task_processor", + [ + EventEntry( + name="task_processor.setup", + level=ANY, + attributes=ANY, + locations=[SourceLocation(path=ROOT / "views.py", line=3)], + ), + EventEntry( + name="task_processor.task.dispatched", + level=ANY, + attributes=ANY, + locations=[ + SourceLocation( + path=ROOT / "management/commands/run_processor.py", + line=3, + ), + ], + ), + ], + id="includes-management-commands-for-task-processor-app", + ), + ], +) +def test_get_event_entries_from_tree__various_trees__expected_entries( + fs: FakeFilesystem, + tree: dict[str, str], + app_label: str, + expected_entries: list[EventEntry], +) -> None: + # Given + for relative, source in tree.items(): + fs.create_file(str(ROOT / relative), contents=source) + + # When + entries = sorted( + get_event_entries_from_tree(ROOT, app_label=app_label, module_prefix=app_label), + key=lambda e: e.name, + ) + + # Then + assert entries == expected_entries From 866fc59bbc6953d2c1af0c52db739f28337f7c6c Mon Sep 17 00:00:00 2001 From: Kim Gustyr Date: Fri, 17 Apr 2026 23:28:40 +0100 Subject: [PATCH 19/20] feat: wire `flagsmith docgen events` subcommand + integration coverage Final layer of #201. Adds a Jinja template at `templates/docgen-events.md` mirroring the existing metrics template, and a new `events` subparser on the `docgen` management command that walks every app returned by `apps.get_app_configs()`, merges entries with `merge_event_entries`, and renders the template. Source paths are rendered relative to the git repo root (queried via `git rev-parse --show-toplevel`), with a CWD fallback when the command is invoked outside a git repo. Paths that aren't under the resolved root stay absolute. Three integration tests cover each path through the public CLI, no private helpers under test: - `fixture_app` / `patched_apps` pytest fixtures stand up a two-file tree under `tmp_path/api/fixture_app` and wire the docgen command to scan only it. - `test_docgen__events__runs_expected` git-init's the fixture tree and snapshot-compares stdout against the rendered catalogue. - `test_docgen__events__falls_back_to_cwd_when_not_in_git_repo` omits the `git init`; asserts cwd-relative paths are emitted. - `test_docgen__events__keeps_absolute_path_when_app_outside_repo_root` init's a sibling subtree as the git repo; asserts absolute paths pass through unchanged. Both `events.py` and `docgen.py` at 100% branch coverage. beep boop --- src/common/core/management/commands/docgen.py | 77 ++++++++++++ src/common/core/templates/docgen-events.md | 20 +++ .../test_docgen__events__runs_expected.txt | 30 +++++ tests/integration/core/test_commands.py | 117 ++++++++++++++++++ 4 files changed, 244 insertions(+) create mode 100644 src/common/core/templates/docgen-events.md create mode 100644 tests/integration/core/snapshots/test_docgen__events__runs_expected.txt diff --git a/src/common/core/management/commands/docgen.py b/src/common/core/management/commands/docgen.py index 07bbdfe7..2d87b875 100644 --- a/src/common/core/management/commands/docgen.py +++ b/src/common/core/management/commands/docgen.py @@ -1,12 +1,21 @@ +import subprocess from operator import itemgetter +from pathlib import Path from typing import Any, Callable import prometheus_client +from django.apps import apps from django.core.management import BaseCommand, CommandParser from django.template.loader import get_template from django.utils.module_loading import autodiscover_modules from prometheus_client.metrics import MetricWrapperBase +from common.core.docgen.events import ( + EventEntry, + get_event_entries_from_tree, + merge_event_entries, +) + class Command(BaseCommand): help = "Generate documentation for the Flagsmith codebase." @@ -23,6 +32,12 @@ def add_arguments(self, parser: CommandParser) -> None: ) metric_parser.set_defaults(handle_method=self.handle_metrics) + events_parser = subparsers.add_parser( + "events", + help="Generate structlog events documentation.", + ) + events_parser.set_defaults(handle_method=self.handle_events) + def initialise(self) -> None: from common.gunicorn import metrics # noqa: F401 @@ -61,3 +76,65 @@ def handle_metrics(self, *args: Any, **options: Any) -> None: context={"flagsmith_metrics": flagsmith_metrics}, ) ) + + def handle_events(self, *args: Any, **options: Any) -> None: + template = get_template("docgen-events.md") + + repo_root = _get_repo_root() + entries: list[EventEntry] = [] + for app_config in apps.get_app_configs(): + entries.extend( + get_event_entries_from_tree( + Path(app_config.path), + app_label=app_config.label, + module_prefix=app_config.name, + ) + ) + merged = merge_event_entries(entries) + + flagsmith_events = [ + { + "name": entry.name, + "level": entry.level, + "locations": [ + { + "path": _relative_if_under(location.path, repo_root), + "line": location.line, + } + for location in entry.locations + ], + "attributes": sorted(entry.attributes), + } + for entry in merged + ] + + self.stdout.write( + template.render( + context={"flagsmith_events": flagsmith_events}, + ) + ) + + +def _get_repo_root() -> Path: + """Resolve the git repo root for emitted source paths. + + Falls back to the current working directory when git isn't available or + the CWD isn't inside a repo. + """ + try: + result = subprocess.run( + ["git", "rev-parse", "--show-toplevel"], + capture_output=True, + text=True, + check=True, + ) + except (subprocess.CalledProcessError, FileNotFoundError): + return Path.cwd() + return Path(result.stdout.strip()) + + +def _relative_if_under(path: Path, base: Path) -> Path: + try: + return path.relative_to(base) + except ValueError: + return path diff --git a/src/common/core/templates/docgen-events.md b/src/common/core/templates/docgen-events.md new file mode 100644 index 00000000..b5531075 --- /dev/null +++ b/src/common/core/templates/docgen-events.md @@ -0,0 +1,20 @@ +--- +title: Events +sidebar_label: Events +sidebar_position: 30 +--- + +Flagsmith backend emits [OpenTelemetry events](https://opentelemetry.io/docs/specs/otel/logs/data-model/#events) +that can be ingested to downstream observability systems and/or a data warehouse of your choice via OTLP. +To learn how to configure this, see [OpenTelemetry](deployment-self-hosting/scaling-and-performance/opentelemetry). + +## Event catalog +{% for event in flagsmith_events %} +### `{{ event.name }}` + +Logged at `{{ event.level }}` from: +{% for location in event.locations %} - `{{ location.path }}:{{ location.line }}` +{% endfor %} +Attributes: +{% for attr in event.attributes %} - `{{ attr }}` +{% endfor %}{% endfor %} diff --git a/tests/integration/core/snapshots/test_docgen__events__runs_expected.txt b/tests/integration/core/snapshots/test_docgen__events__runs_expected.txt new file mode 100644 index 00000000..e4ad7293 --- /dev/null +++ b/tests/integration/core/snapshots/test_docgen__events__runs_expected.txt @@ -0,0 +1,30 @@ +--- +title: Events +sidebar_label: Events +sidebar_position: 30 +--- + +Flagsmith backend emits [OpenTelemetry events](https://opentelemetry.io/docs/specs/otel/logs/data-model/#events) +that can be ingested to downstream observability systems and/or a data warehouse of your choice via OTLP. +To learn how to configure this, see [OpenTelemetry](deployment-self-hosting/scaling-and-performance/opentelemetry). + +## Event catalog + +### `code_references.scan.created` + +Logged at `info` from: + - `api/fixture_app/views.py:7` + +Attributes: + - `code_references.count` + - `organisation.id` + +### `workflows.published` + +Logged at `info` from: + - `api/fixture_app/nested/workflows.py:7` + +Attributes: + - `status` + - `workflow_id` + diff --git a/tests/integration/core/test_commands.py b/tests/integration/core/test_commands.py index 97bbf43d..93d349d8 100644 --- a/tests/integration/core/test_commands.py +++ b/tests/integration/core/test_commands.py @@ -1,6 +1,10 @@ +import subprocess +from pathlib import Path + import prometheus_client import pytest from django.core.management import call_command +from pytest_mock import MockerFixture from common.test_tools import SnapshotFixture @@ -20,3 +24,116 @@ def test_docgen__metrics__runs_expected( # Then assert capsys.readouterr().out == expected_stdout + + +@pytest.fixture +def fixture_app(tmp_path: Path) -> Path: + """Write a two-file fixture app under `tmp_path/api/fixture_app`.""" + app_path = tmp_path / "api" / "fixture_app" + app_path.mkdir(parents=True) + (app_path / "views.py").write_text( + """\ +import structlog + +logger = structlog.get_logger("code_references") + + +def perform_create() -> None: + logger.info( + "scan.created", + organisation__id=1, + code_references__count=2, + ) +""" + ) + (app_path / "nested").mkdir() + (app_path / "nested/workflows.py").write_text( + """\ +import structlog + +logger = structlog.get_logger("workflows") + + +def publish() -> None: + logger.bind(workflow_id=1).info("published", status="ok") +""" + ) + return app_path + + +@pytest.fixture +def patched_apps(fixture_app: Path, mocker: MockerFixture) -> None: + """Patch `docgen.apps` so only the fixture app is scanned.""" + fake_config = mocker.Mock() + fake_config.path = str(fixture_app) + fake_config.label = "fixture_app" + fake_config.name = "fixture_app" + mock_apps = mocker.patch("common.core.management.commands.docgen.apps") + mock_apps.get_app_configs.return_value = [fake_config] + + +@pytest.mark.django_db +def test_docgen__events__runs_expected( + tmp_path: Path, + capsys: pytest.CaptureFixture[str], + monkeypatch: pytest.MonkeyPatch, + patched_apps: None, + snapshot: SnapshotFixture, +) -> None: + # Given — `tmp_path` is a real git repo, so `_get_repo_root` resolves + # paths against it without mocking the subprocess call. + subprocess.run(["git", "init"], cwd=tmp_path, check=True, capture_output=True) + monkeypatch.chdir(tmp_path) + expected_stdout = snapshot() + + # When + call_command("docgen", "events") + + # Then + assert capsys.readouterr().out == expected_stdout + + +@pytest.mark.django_db +def test_docgen__events_not_in_git_repo__falls_back_to_cwd( + tmp_path: Path, + capsys: pytest.CaptureFixture[str], + monkeypatch: pytest.MonkeyPatch, + patched_apps: None, +) -> None: + # Given — no `git init`; `_get_repo_root` swallows the + # `CalledProcessError` and falls back to `Path.cwd()`, which we pin to + # `tmp_path` via `monkeypatch.chdir`. + monkeypatch.chdir(tmp_path) + + # When + call_command("docgen", "events") + + # Then + stdout = capsys.readouterr().out + assert " - `api/fixture_app/views.py:7`" in stdout + assert " - `api/fixture_app/nested/workflows.py:7`" in stdout + + +@pytest.mark.django_db +def test_docgen__events_app_outside_repo_root__keeps_absolute_path( + tmp_path: Path, + capsys: pytest.CaptureFixture[str], + monkeypatch: pytest.MonkeyPatch, + fixture_app: Path, + patched_apps: None, +) -> None: + # Given — the resolved repo root and the fixture app live in *different* + # subtrees under `tmp_path`, so `_relative_if_under` hits its `ValueError` + # branch and keeps the absolute path on each emitted location. + repo_root = tmp_path / "repo" + repo_root.mkdir() + subprocess.run(["git", "init"], cwd=repo_root, check=True, capture_output=True) + monkeypatch.chdir(repo_root) + + # When + call_command("docgen", "events") + + # Then + stdout = capsys.readouterr().out + assert f" - `{fixture_app}/views.py:7`" in stdout + assert f" - `{fixture_app}/nested/workflows.py:7`" in stdout From d1669ef0f9f01cb2f515b5fe08acb3a6fc58803e Mon Sep 17 00:00:00 2001 From: Kim Gustyr Date: Fri, 17 Apr 2026 23:55:45 +0100 Subject: [PATCH 20/20] fix: dedupe identical source locations when merging event entries When Django's INSTALLED_APPS contains both a parent app and a nested sub-app, the same source file gets scanned via both app roots and emits two identical EventEntry objects. The merge step then stacked their location lists, producing duplicate `path:line` entries in the rendered catalogue. Collapse identical locations on merge so overlapping app layouts stay idempotent. beep boop --- src/common/core/docgen/events.py | 6 ++++- tests/unit/common/core/test_docgen_events.py | 26 ++++++++++++++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/src/common/core/docgen/events.py b/src/common/core/docgen/events.py index 0a2fee91..828f0136 100644 --- a/src/common/core/docgen/events.py +++ b/src/common/core/docgen/events.py @@ -116,7 +116,11 @@ def merge_event_entries(entries: Iterable[EventEntry]) -> list[EventEntry]: stacklevel=2, ) existing.attributes = existing.attributes | entry.attributes - existing.locations = existing.locations + entry.locations + existing_locations = set(existing.locations) + for location in entry.locations: + if location not in existing_locations: + existing.locations.append(location) + existing_locations.add(location) else: merged[entry.name] = EventEntry( name=entry.name, diff --git a/tests/unit/common/core/test_docgen_events.py b/tests/unit/common/core/test_docgen_events.py index 77074305..d8b9c9ef 100644 --- a/tests/unit/common/core/test_docgen_events.py +++ b/tests/unit/common/core/test_docgen_events.py @@ -693,6 +693,32 @@ def test_get_event_entries_from_source__emit_log__expected_entries( [], id="same-event-two-sites-collapses-with-union-attrs", ), + pytest.param( + [ + EventEntry( + name="code_references.scan.created", + level="info", + attributes=frozenset({"organisation.id"}), + locations=[SourceLocation(path=PATH, line=10)], + ), + EventEntry( + name="code_references.scan.created", + level="info", + attributes=frozenset({"organisation.id"}), + locations=[SourceLocation(path=PATH, line=10)], + ), + ], + [ + EventEntry( + name="code_references.scan.created", + level="info", + attributes=frozenset({"organisation.id"}), + locations=[SourceLocation(path=PATH, line=10)], + ), + ], + [], + id="identical-locations-deduped", + ), pytest.param( [ EventEntry(