diff --git a/src/common/core/docgen/__init__.py b/src/common/core/docgen/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/common/core/docgen/events.py b/src/common/core/docgen/events.py new file mode 100644 index 00000000..828f0136 --- /dev/null +++ b/src/common/core/docgen/events.py @@ -0,0 +1,420 @@ +import ast +import warnings +from dataclasses import dataclass, field +from pathlib import Path +from typing import Iterable, Iterator + + +class DocgenEventsWarning(UserWarning): + """Raised by the events scanner when a call site can't be resolved.""" + + +# Emission methods exposed by `structlog.stdlib.BoundLogger` whose first +# positional argument is the event name. `log` is excluded because its +# first argument is the level, not the event; `bind`/`unbind`/`new` are +# not emissions. +EMIT_METHOD_NAMES = frozenset( + { + "debug", + "info", + "warning", + "warn", + "error", + "critical", + "fatal", + "exception", + "msg", + } +) + + +@dataclass(frozen=True) +class SourceLocation: + path: Path + line: int + + +@dataclass +class EventEntry: + name: str + level: str + attributes: frozenset[str] + locations: list[SourceLocation] = field(default_factory=list) + + +@dataclass(frozen=True) +class _LoggerScope: + domain: str + bound_attrs: frozenset[str] + + +_EXCLUDED_DIR_NAMES = frozenset({"migrations", "tests"}) +_EXCLUDED_MANAGEMENT_DIR = ("management", "commands") +_TASK_PROCESSOR_APP_LABEL = "task_processor" + + +def get_event_entries_from_tree( + root: Path, + *, + app_label: str, + module_prefix: str, +) -> Iterator[EventEntry]: + """Walk every `*.py` under `root` and yield its scanned event entries. + + Skips `migrations/`, `tests/`, `conftest.py`, and `test_*.py`. Also skips + `management/commands/` unless `app_label == "task_processor"`, where the + runner loop's events are operationally important. + """ + for file_path in sorted(root.rglob("*.py")): + if _should_skip(file_path.relative_to(root), app_label=app_label): + continue + rel_parts = file_path.relative_to(root).with_suffix("").parts + module_dotted = ".".join((module_prefix, *rel_parts)) + yield from get_event_entries_from_source( + file_path.read_text(), + module_dotted=module_dotted, + path=file_path, + ) + + +def _should_skip(relative: Path, *, app_label: str) -> bool: + parts = relative.parts + if any(part in _EXCLUDED_DIR_NAMES for part in parts[:-1]): + return True + filename = parts[-1] + if filename == "conftest.py" or filename.startswith("test_"): + return True + if ( + len(parts) >= 3 + and parts[0] == _EXCLUDED_MANAGEMENT_DIR[0] + and parts[1] == _EXCLUDED_MANAGEMENT_DIR[1] + and app_label != _TASK_PROCESSOR_APP_LABEL + ): + return True + return False + + +def merge_event_entries(entries: Iterable[EventEntry]) -> list[EventEntry]: + """Collapse entries sharing an event name: union attributes and locations. + + Diverging log levels trigger a `DocgenEventsWarning`; the first-seen level + wins. Output is sorted alphabetically by event name. + """ + merged: dict[str, EventEntry] = {} + for entry in entries: + if existing := merged.get(entry.name): + if entry.level != existing.level: + original_location = existing.locations[0] + new_location = entry.locations[0] + warnings.warn( + f"`{entry.name}` is emitted at diverging log levels:" + f" `{existing.level}` at {original_location.path}:{original_location.line}," + f" `{entry.level}` at {new_location.path}:{new_location.line}." + f" Keeping first-seen level `{existing.level}`; reconcile" + " the emission sites to silence this warning.", + DocgenEventsWarning, + stacklevel=2, + ) + existing.attributes = existing.attributes | entry.attributes + existing_locations = set(existing.locations) + for location in entry.locations: + if location not in existing_locations: + existing.locations.append(location) + existing_locations.add(location) + else: + merged[entry.name] = EventEntry( + name=entry.name, + level=entry.level, + attributes=entry.attributes, + locations=list(entry.locations), + ) + return sorted(merged.values(), key=lambda e: e.name) + + +def get_event_entries_from_source( + source: str, + *, + module_dotted: str, + path: Path, +) -> Iterator[EventEntry]: + tree = ast.parse(source) + visitor = _ScopeVisitor(module_dotted=module_dotted, path=path) + visitor.visit(tree) + yield from visitor.entries + + +class _ScopeVisitor(ast.NodeVisitor): + """Walks the AST in source order with a stack of logger scopes. + + Entering a function body pushes a copy of the enclosing scope so + binds that happen inside the function don't leak to sibling scopes. + """ + + def __init__(self, *, module_dotted: str, path: Path) -> None: + self.module_dotted = module_dotted + self.path = path + self._scope_stack: list[dict[str, _LoggerScope]] = [{}] + self._class_stack: list[dict[str, _LoggerScope]] = [] + self._module_classes: dict[str, dict[str, _LoggerScope]] = {} + self.entries: list[EventEntry] = [] + + @property + def _scope(self) -> dict[str, _LoggerScope]: + return self._scope_stack[-1] + + @property + def _class_scope(self) -> dict[str, _LoggerScope] | None: + return self._class_stack[-1] if self._class_stack else None + + def visit_ClassDef(self, node: ast.ClassDef) -> None: + class_scope: dict[str, _LoggerScope] = {} + # Own methods take precedence — register them first. + for stmt in node.body: + if isinstance(stmt, (ast.FunctionDef, ast.AsyncFunctionDef)): + if accessor := _resolve_method_accessor(stmt, outer_scopes=self._scope): + class_scope[stmt.name] = accessor + # Inherit from same-file parents declared earlier (Name-typed bases only). + for base in node.bases: + if isinstance(base, ast.Name) and base.id in self._module_classes: + for method_name, method_scope in self._module_classes[base.id].items(): + class_scope.setdefault(method_name, method_scope) + self._module_classes[node.name] = class_scope + self._class_stack.append(class_scope) + self.generic_visit(node) + self._class_stack.pop() + + def visit_FunctionDef(self, node: ast.FunctionDef) -> None: + self._scope_stack.append(dict(self._scope)) + self.generic_visit(node) + self._scope_stack.pop() + + def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef) -> None: + self._scope_stack.append(dict(self._scope)) + self.generic_visit(node) + self._scope_stack.pop() + + def visit_Assign(self, node: ast.Assign) -> None: + if len(node.targets) == 1 and isinstance(node.targets[0], ast.Name): + target_id = node.targets[0].id + if scope := _resolve_seed( + node, + module_dotted=self.module_dotted, + path=self.path, + ): + self._scope[target_id] = scope + elif scope := _resolve_bind(node, logger_scopes=self._scope): + self._scope[target_id] = scope + self.generic_visit(node) + + def visit_Call(self, node: ast.Call) -> None: + if entry := _build_entry_from_emit_call( + node, self._scope, self.path, class_scope=self._class_scope + ): + self.entries.append(entry) + self.generic_visit(node) + + +def _resolve_seed( + node: ast.Assign, + *, + module_dotted: str, + path: Path, +) -> _LoggerScope | None: + call = node.value + if not isinstance(call, ast.Call): + return None + func = call.func + if not isinstance(func, ast.Attribute) or func.attr != "get_logger": + return None + if not isinstance(func.value, ast.Name) or func.value.id != "structlog": + return None + if not call.args: + return _LoggerScope(domain="", bound_attrs=frozenset()) + target = node.targets[0] + assert isinstance(target, ast.Name) + domain = _resolve_domain(call.args[0], module_dotted=module_dotted) + if domain is None: + warnings.warn( + f"{path}:{node.lineno}: cannot statically resolve logger domain" + f" for `{target.id}`; skipping its events.", + DocgenEventsWarning, + stacklevel=2, + ) + return None + return _LoggerScope(domain=domain, bound_attrs=frozenset()) + + +def _resolve_bind( + node: ast.Assign, + *, + logger_scopes: dict[str, _LoggerScope], +) -> _LoggerScope | None: + call = node.value + if not isinstance(call, ast.Call): + return None + func = call.func + if not isinstance(func, ast.Attribute) or func.attr != "bind": + return None + if not isinstance(func.value, ast.Name) or func.value.id not in logger_scopes: + return None + parent = logger_scopes[func.value.id] + new_attrs = _kwargs_as_attributes(call.keywords) + return _LoggerScope( + domain=parent.domain, + bound_attrs=parent.bound_attrs | new_attrs, + ) + + +def _resolve_domain(node: ast.expr, *, module_dotted: str) -> str | None: + if isinstance(node, ast.Constant) and isinstance(node.value, str): + return node.value + if isinstance(node, ast.Name) and node.id == "__name__": + return module_dotted + return None + + +def _kwargs_as_attributes(keywords: list[ast.keyword]) -> frozenset[str]: + return frozenset(kw.arg.replace("__", ".") for kw in keywords if kw.arg is not None) + + +def _build_entry_from_emit_call( + node: ast.Call, + logger_scopes: dict[str, _LoggerScope], + path: Path, + *, + class_scope: dict[str, _LoggerScope] | None = None, +) -> EventEntry | None: + func = node.func + if not isinstance(func, ast.Attribute): + return None + if func.attr not in EMIT_METHOD_NAMES: + return None + scope = _scope_for_emit_target(func.value, logger_scopes, class_scope=class_scope) + if scope is None: + if accessor_name := _self_cls_accessor_name(func.value): + warnings.warn( + f"{path}:{node.lineno}: cannot resolve" + f" `{_describe_emit_target(func.value)}.{func.attr}(...)`:" + f" `{accessor_name}` isn't a tracked accessor on this class" + " or any same-file parent. Consider inlining the bind at the" + " call site or moving the accessor into this file.", + DocgenEventsWarning, + stacklevel=2, + ) + return None + if not node.args: + return None + event_arg = node.args[0] + if not (isinstance(event_arg, ast.Constant) and isinstance(event_arg.value, str)): + warnings.warn( + f"{path}:{node.lineno}: cannot statically resolve event name" + f" for `{_describe_emit_target(func.value)}.{func.attr}(...)`;" + " skipping. Consider annotating the call site with a" + " `# docgen: event=` comment so the catalogue can still" + " pick it up.", + DocgenEventsWarning, + stacklevel=2, + ) + return None + attributes = scope.bound_attrs | _kwargs_as_attributes(node.keywords) + name = f"{scope.domain}.{event_arg.value}" if scope.domain else event_arg.value + return EventEntry( + name=name, + level=func.attr, + attributes=attributes, + locations=[SourceLocation(path=path, line=node.lineno)], + ) + + +def _scope_for_emit_target( + target: ast.expr, + logger_scopes: dict[str, _LoggerScope], + *, + class_scope: dict[str, _LoggerScope] | None = None, +) -> _LoggerScope | None: + if isinstance(target, ast.Name): + return logger_scopes.get(target.id) + if isinstance(target, ast.Attribute): + # `self.` — method/property accessor on the enclosing class. + if ( + class_scope is not None + and isinstance(target.value, ast.Name) + and target.value.id in _SELF_OR_CLS + and target.attr in class_scope + ): + return class_scope[target.attr] + return None + if isinstance(target, ast.Call): + func = target.func + if not isinstance(func, ast.Attribute): + return None + # `self.(...)` — method accessor invocation. + if ( + class_scope is not None + and isinstance(func.value, ast.Name) + and func.value.id in _SELF_OR_CLS + and func.attr in class_scope + ): + return class_scope[func.attr] + if func.attr != "bind": + return None + parent = _scope_for_emit_target( + func.value, logger_scopes, class_scope=class_scope + ) + if parent is None: + return None + return _LoggerScope( + domain=parent.domain, + bound_attrs=parent.bound_attrs | _kwargs_as_attributes(target.keywords), + ) + return None + + +_SELF_OR_CLS = frozenset({"self", "cls"}) + + +def _self_cls_accessor_name(target: ast.expr) -> str | None: + """Name of the accessor in a `self.` / `cls.(...)` emit shape, else None.""" + if isinstance(target, ast.Attribute): + if isinstance(target.value, ast.Name) and target.value.id in _SELF_OR_CLS: + return target.attr + if isinstance(target, ast.Call): + func = target.func + if isinstance(func, ast.Attribute) and isinstance(func.value, ast.Name): + if func.value.id in _SELF_OR_CLS: + return func.attr + return None + + +def _resolve_method_accessor( + func_def: ast.FunctionDef | ast.AsyncFunctionDef, + *, + outer_scopes: dict[str, _LoggerScope], +) -> _LoggerScope | None: + """Return a scope for a method that just returns a bound logger.""" + body = list(func_def.body) + # Allow a leading docstring. + if ( + body + and isinstance(body[0], ast.Expr) + and isinstance(body[0].value, ast.Constant) + and isinstance(body[0].value.value, str) + ): + body = body[1:] + if len(body) != 1: + return None + stmt = body[0] + if not isinstance(stmt, ast.Return) or stmt.value is None: + return None + return _scope_for_emit_target(stmt.value, outer_scopes) + + +def _describe_emit_target(target: ast.expr) -> str: + if isinstance(target, ast.Name): + return target.id + if isinstance(target, ast.Attribute): + return f"{_describe_emit_target(target.value)}.{target.attr}" + assert isinstance(target, ast.Call) + func = target.func + assert isinstance(func, ast.Attribute) + return f"{_describe_emit_target(func.value)}.{func.attr}(...)" diff --git a/src/common/core/management/commands/docgen.py b/src/common/core/management/commands/docgen.py index 07bbdfe7..2d87b875 100644 --- a/src/common/core/management/commands/docgen.py +++ b/src/common/core/management/commands/docgen.py @@ -1,12 +1,21 @@ +import subprocess from operator import itemgetter +from pathlib import Path from typing import Any, Callable import prometheus_client +from django.apps import apps from django.core.management import BaseCommand, CommandParser from django.template.loader import get_template from django.utils.module_loading import autodiscover_modules from prometheus_client.metrics import MetricWrapperBase +from common.core.docgen.events import ( + EventEntry, + get_event_entries_from_tree, + merge_event_entries, +) + class Command(BaseCommand): help = "Generate documentation for the Flagsmith codebase." @@ -23,6 +32,12 @@ def add_arguments(self, parser: CommandParser) -> None: ) metric_parser.set_defaults(handle_method=self.handle_metrics) + events_parser = subparsers.add_parser( + "events", + help="Generate structlog events documentation.", + ) + events_parser.set_defaults(handle_method=self.handle_events) + def initialise(self) -> None: from common.gunicorn import metrics # noqa: F401 @@ -61,3 +76,65 @@ def handle_metrics(self, *args: Any, **options: Any) -> None: context={"flagsmith_metrics": flagsmith_metrics}, ) ) + + def handle_events(self, *args: Any, **options: Any) -> None: + template = get_template("docgen-events.md") + + repo_root = _get_repo_root() + entries: list[EventEntry] = [] + for app_config in apps.get_app_configs(): + entries.extend( + get_event_entries_from_tree( + Path(app_config.path), + app_label=app_config.label, + module_prefix=app_config.name, + ) + ) + merged = merge_event_entries(entries) + + flagsmith_events = [ + { + "name": entry.name, + "level": entry.level, + "locations": [ + { + "path": _relative_if_under(location.path, repo_root), + "line": location.line, + } + for location in entry.locations + ], + "attributes": sorted(entry.attributes), + } + for entry in merged + ] + + self.stdout.write( + template.render( + context={"flagsmith_events": flagsmith_events}, + ) + ) + + +def _get_repo_root() -> Path: + """Resolve the git repo root for emitted source paths. + + Falls back to the current working directory when git isn't available or + the CWD isn't inside a repo. + """ + try: + result = subprocess.run( + ["git", "rev-parse", "--show-toplevel"], + capture_output=True, + text=True, + check=True, + ) + except (subprocess.CalledProcessError, FileNotFoundError): + return Path.cwd() + return Path(result.stdout.strip()) + + +def _relative_if_under(path: Path, base: Path) -> Path: + try: + return path.relative_to(base) + except ValueError: + return path diff --git a/src/common/core/templates/docgen-events.md b/src/common/core/templates/docgen-events.md new file mode 100644 index 00000000..b5531075 --- /dev/null +++ b/src/common/core/templates/docgen-events.md @@ -0,0 +1,20 @@ +--- +title: Events +sidebar_label: Events +sidebar_position: 30 +--- + +Flagsmith backend emits [OpenTelemetry events](https://opentelemetry.io/docs/specs/otel/logs/data-model/#events) +that can be ingested to downstream observability systems and/or a data warehouse of your choice via OTLP. +To learn how to configure this, see [OpenTelemetry](deployment-self-hosting/scaling-and-performance/opentelemetry). + +## Event catalog +{% for event in flagsmith_events %} +### `{{ event.name }}` + +Logged at `{{ event.level }}` from: +{% for location in event.locations %} - `{{ location.path }}:{{ location.line }}` +{% endfor %} +Attributes: +{% for attr in event.attributes %} - `{{ attr }}` +{% endfor %}{% endfor %} diff --git a/tests/integration/core/snapshots/test_docgen__events__runs_expected.txt b/tests/integration/core/snapshots/test_docgen__events__runs_expected.txt new file mode 100644 index 00000000..e4ad7293 --- /dev/null +++ b/tests/integration/core/snapshots/test_docgen__events__runs_expected.txt @@ -0,0 +1,30 @@ +--- +title: Events +sidebar_label: Events +sidebar_position: 30 +--- + +Flagsmith backend emits [OpenTelemetry events](https://opentelemetry.io/docs/specs/otel/logs/data-model/#events) +that can be ingested to downstream observability systems and/or a data warehouse of your choice via OTLP. +To learn how to configure this, see [OpenTelemetry](deployment-self-hosting/scaling-and-performance/opentelemetry). + +## Event catalog + +### `code_references.scan.created` + +Logged at `info` from: + - `api/fixture_app/views.py:7` + +Attributes: + - `code_references.count` + - `organisation.id` + +### `workflows.published` + +Logged at `info` from: + - `api/fixture_app/nested/workflows.py:7` + +Attributes: + - `status` + - `workflow_id` + diff --git a/tests/integration/core/test_commands.py b/tests/integration/core/test_commands.py index 97bbf43d..93d349d8 100644 --- a/tests/integration/core/test_commands.py +++ b/tests/integration/core/test_commands.py @@ -1,6 +1,10 @@ +import subprocess +from pathlib import Path + import prometheus_client import pytest from django.core.management import call_command +from pytest_mock import MockerFixture from common.test_tools import SnapshotFixture @@ -20,3 +24,116 @@ def test_docgen__metrics__runs_expected( # Then assert capsys.readouterr().out == expected_stdout + + +@pytest.fixture +def fixture_app(tmp_path: Path) -> Path: + """Write a two-file fixture app under `tmp_path/api/fixture_app`.""" + app_path = tmp_path / "api" / "fixture_app" + app_path.mkdir(parents=True) + (app_path / "views.py").write_text( + """\ +import structlog + +logger = structlog.get_logger("code_references") + + +def perform_create() -> None: + logger.info( + "scan.created", + organisation__id=1, + code_references__count=2, + ) +""" + ) + (app_path / "nested").mkdir() + (app_path / "nested/workflows.py").write_text( + """\ +import structlog + +logger = structlog.get_logger("workflows") + + +def publish() -> None: + logger.bind(workflow_id=1).info("published", status="ok") +""" + ) + return app_path + + +@pytest.fixture +def patched_apps(fixture_app: Path, mocker: MockerFixture) -> None: + """Patch `docgen.apps` so only the fixture app is scanned.""" + fake_config = mocker.Mock() + fake_config.path = str(fixture_app) + fake_config.label = "fixture_app" + fake_config.name = "fixture_app" + mock_apps = mocker.patch("common.core.management.commands.docgen.apps") + mock_apps.get_app_configs.return_value = [fake_config] + + +@pytest.mark.django_db +def test_docgen__events__runs_expected( + tmp_path: Path, + capsys: pytest.CaptureFixture[str], + monkeypatch: pytest.MonkeyPatch, + patched_apps: None, + snapshot: SnapshotFixture, +) -> None: + # Given — `tmp_path` is a real git repo, so `_get_repo_root` resolves + # paths against it without mocking the subprocess call. + subprocess.run(["git", "init"], cwd=tmp_path, check=True, capture_output=True) + monkeypatch.chdir(tmp_path) + expected_stdout = snapshot() + + # When + call_command("docgen", "events") + + # Then + assert capsys.readouterr().out == expected_stdout + + +@pytest.mark.django_db +def test_docgen__events_not_in_git_repo__falls_back_to_cwd( + tmp_path: Path, + capsys: pytest.CaptureFixture[str], + monkeypatch: pytest.MonkeyPatch, + patched_apps: None, +) -> None: + # Given — no `git init`; `_get_repo_root` swallows the + # `CalledProcessError` and falls back to `Path.cwd()`, which we pin to + # `tmp_path` via `monkeypatch.chdir`. + monkeypatch.chdir(tmp_path) + + # When + call_command("docgen", "events") + + # Then + stdout = capsys.readouterr().out + assert " - `api/fixture_app/views.py:7`" in stdout + assert " - `api/fixture_app/nested/workflows.py:7`" in stdout + + +@pytest.mark.django_db +def test_docgen__events_app_outside_repo_root__keeps_absolute_path( + tmp_path: Path, + capsys: pytest.CaptureFixture[str], + monkeypatch: pytest.MonkeyPatch, + fixture_app: Path, + patched_apps: None, +) -> None: + # Given — the resolved repo root and the fixture app live in *different* + # subtrees under `tmp_path`, so `_relative_if_under` hits its `ValueError` + # branch and keeps the absolute path on each emitted location. + repo_root = tmp_path / "repo" + repo_root.mkdir() + subprocess.run(["git", "init"], cwd=repo_root, check=True, capture_output=True) + monkeypatch.chdir(repo_root) + + # When + call_command("docgen", "events") + + # Then + stdout = capsys.readouterr().out + assert f" - `{fixture_app}/views.py:7`" in stdout + assert f" - `{fixture_app}/nested/workflows.py:7`" in stdout diff --git a/tests/unit/common/core/test_docgen_events.py b/tests/unit/common/core/test_docgen_events.py new file mode 100644 index 00000000..d8b9c9ef --- /dev/null +++ b/tests/unit/common/core/test_docgen_events.py @@ -0,0 +1,977 @@ +import warnings +from pathlib import Path +from unittest.mock import ANY + +import pytest +from pyfakefs.fake_filesystem import FakeFilesystem + +from common.core.docgen.events import ( + DocgenEventsWarning, + EventEntry, + SourceLocation, + get_event_entries_from_source, + get_event_entries_from_tree, + merge_event_entries, +) + +PATH = Path("projects/code_references/views.py") +MODULE_DOTTED = "projects.code_references.views" + + +@pytest.mark.parametrize( + "source, expected_entries, expected_warnings", + [ + pytest.param( + """\ +import structlog + +logger = structlog.get_logger("code_references") +logger.info("scan.created") +""", + [ + EventEntry( + name="code_references.scan.created", + level="info", + attributes=frozenset(), + locations=[SourceLocation(path=PATH, line=4)], + ), + ], + [], + id="plain-info-no-kwargs", + ), + pytest.param( + """\ +import structlog + +logger = structlog.get_logger("code_references") +logger.info( + "scan.created", + organisation__id=1, + code_references__count=2, + feature__count=3, +) +""", + [ + EventEntry( + name="code_references.scan.created", + level="info", + attributes=frozenset( + { + "organisation.id", + "code_references.count", + "feature.count", + } + ), + locations=[SourceLocation(path=PATH, line=4)], + ), + ], + [], + id="kwargs-with-double-underscore-substitution", + ), + pytest.param( + """\ +import structlog + +logger = structlog.get_logger(__name__) +logger.info("something.happened") +""", + [ + EventEntry( + name=f"{MODULE_DOTTED}.something.happened", + level="info", + attributes=frozenset(), + locations=[SourceLocation(path=PATH, line=4)], + ), + ], + [], + id="dunder-name-resolves-to-module-dotted", + ), + pytest.param( + """\ +import structlog + +logger = structlog.get_logger("code_references") +logger.exception("import.failed") +""", + [ + EventEntry( + name="code_references.import.failed", + level="exception", + attributes=frozenset(), + locations=[SourceLocation(path=PATH, line=4)], + ), + ], + [], + id="exception-method-is-recognised", + ), + pytest.param( + """\ +import structlog + +logger = structlog.get_logger("sentry_change_tracking") + + +def track() -> None: + log = logger.bind(sentry_action="trigger", feature_name="flag") + log.info("success", response_status=200) +""", + [ + EventEntry( + name="sentry_change_tracking.success", + level="info", + attributes=frozenset( + {"sentry_action", "feature_name", "response_status"} + ), + locations=[SourceLocation(path=PATH, line=8)], + ), + ], + [], + id="reassigned-bind-merges-attrs", + ), + pytest.param( + """\ +import structlog + +logger = structlog.get_logger("workflows") + + +def publish() -> None: + logger.bind(workflow_id=1).info("published", status="ok") +""", + [ + EventEntry( + name="workflows.published", + level="info", + attributes=frozenset({"workflow_id", "status"}), + locations=[SourceLocation(path=PATH, line=7)], + ), + ], + [], + id="chained-bind-merges-attrs", + ), + pytest.param( + """\ +import structlog + +logger = structlog.get_logger("workflows") + + +def publish() -> None: + logger.bind(workflow_id=1).bind(actor_id=2).info("published", status="ok") +""", + [ + EventEntry( + name="workflows.published", + level="info", + attributes=frozenset({"workflow_id", "actor_id", "status"}), + locations=[SourceLocation(path=PATH, line=7)], + ), + ], + [], + id="multi-chained-bind-merges-attrs", + ), + pytest.param( + """\ +import structlog + +logger = structlog.get_logger("workflows") + + +def publish() -> None: + log = logger.bind(workflow_id=1) + log = log.bind(actor_id=2) + log.info("published", status="ok") +""", + [ + EventEntry( + name="workflows.published", + level="info", + attributes=frozenset({"workflow_id", "actor_id", "status"}), + locations=[SourceLocation(path=PATH, line=9)], + ), + ], + [], + id="self-reassigned-bind-layers-further", + ), + pytest.param( + """\ +import structlog + +logger = structlog.get_logger("workflows") + + +def publish_one() -> None: + log = logger.bind(one_attr=1) + log.info("evt.one", kwarg_one=True) + + +def publish_two() -> None: + log = logger.bind(two_attr=2) + log.info("evt.two", kwarg_two=True) +""", + [ + EventEntry( + name="workflows.evt.one", + level="info", + attributes=frozenset({"one_attr", "kwarg_one"}), + locations=[SourceLocation(path=PATH, line=8)], + ), + EventEntry( + name="workflows.evt.two", + level="info", + attributes=frozenset({"two_attr", "kwarg_two"}), + locations=[SourceLocation(path=PATH, line=13)], + ), + ], + [], + id="binds-in-different-functions-do-not-leak", + ), + pytest.param( + """\ +import structlog + +logger = structlog.get_logger("workflows") + + +async def publish() -> None: + log = logger.bind(workflow_id=1) + log.info("published", status="ok") +""", + [ + EventEntry( + name="workflows.published", + level="info", + attributes=frozenset({"workflow_id", "status"}), + locations=[SourceLocation(path=PATH, line=8)], + ), + ], + [], + id="bind-inside-async-function", + ), + pytest.param( + """\ +import structlog + +logger = structlog.get_logger("code_references") +event_name = "dyn" +logger.bind(a=1).info(event_name) +""", + [], + [ + DocgenEventsWarning( + f"{PATH}:5: cannot statically resolve event name" + f" for `logger.bind(...).info(...)`; skipping." + " Consider annotating the call site with a" + " `# docgen: event=` comment so the catalogue" + " can still pick it up." + ), + ], + id="dynamic-event-name-in-chain-describes-chain", + ), + pytest.param( + """\ +import structlog + +logger = structlog.get_logger() +logger.info("evt", x=1) +""", + [ + EventEntry( + name="evt", + level="info", + attributes=frozenset({"x"}), + locations=[SourceLocation(path=PATH, line=4)], + ), + ], + [], + id="structlog-get_logger-no-args-produces-domainless-entry", + ), + pytest.param( + """\ +import structlog + +log = unknown.bind(x=1) +log.info("evt") +""", + [], + [], + id="bind-on-untracked-name-ignored", + ), + pytest.param( + """\ +import structlog + +logger = structlog.get_logger("code_references") +logger.unbind("x").info("evt", y=1) +""", + [], + [], + id="chain-with-non-bind-attr-ignored", + ), + pytest.param( + """\ +import structlog + +unknown.bind(a=1).info("evt") +""", + [], + [], + id="chain-bind-on-untracked-ignored", + ), + pytest.param( + """\ +import structlog + +loggers = {} +loggers["x"].info("evt") +""", + [], + [], + id="subscript-target-ignored", + ), + pytest.param( + """\ +import structlog + +print("not a logger call") +""", + [], + [], + id="bare-function-call-ignored", + ), + pytest.param( + """\ +import structlog + +logger = structlog.get_logger("code_references") +logger.info() +""", + [], + [], + id="emit-without-event-name-ignored", + ), + pytest.param( + """\ +import structlog + +logger, spare = structlog.get_logger("a"), structlog.get_logger("b") +logger.info("scan.created") +""", + [], + [], + id="tuple-destructuring-ignored", + ), + pytest.param( + """\ +import structlog + +logger = unrelated() +logger.info("scan.created") +""", + [], + [], + id="assignment-from-non-attribute-call-ignored", + ), + pytest.param( + """\ +import structlog + +logger = factory.get_logger("code_references") +logger.info("scan.created") +""", + [], + [], + id="get_logger-on-non-structlog-ignored", + ), + pytest.param( + """\ +import logging + +logger = logging.getLogger("code_references") +logger.info("scan.created", extra={"organisation": 1}) +""", + [], + [], + id="stdlib-logging-getLogger-ignored", + ), + pytest.param( + """\ +import structlog + +logger = structlog.get_logger("code_references") +logger.bind_or_whatever("not.an.event") +""", + [], + [], + id="non-level-method-is-ignored-silently", + ), + pytest.param( + """\ +import structlog + +domain = "sneaky" +logger = structlog.get_logger(domain) +logger.info("something.happened") +""", + [], + [ + DocgenEventsWarning( + f"{PATH}:4: cannot statically resolve logger domain" + f" for `logger`; skipping its events." + ), + ], + id="non-resolvable-domain-warns-and-skips", + ), + pytest.param( + """\ +import structlog + +logger = structlog.get_logger("code_references") +event_name = "scan." + "created" +logger.info(event_name) +""", + [], + [ + DocgenEventsWarning( + f"{PATH}:5: cannot statically resolve event name" + f" for `logger.info(...)`; skipping." + " Consider annotating the call site with a" + " `# docgen: event=` comment so the catalogue" + " can still pick it up." + ), + ], + id="dynamic-event-name-warns-and-skips", + ), + pytest.param( + """\ +import structlog + +logger = structlog.get_logger("projects") + + +class ProjectWorker: + organisation_id: str + + def logger(self, project_id: str) -> structlog.BoundLogger: + return logger.bind( + organisation__id=self.organisation_id, + project__id=project_id, + ) + + def do_work(self, project_id: str) -> None: + self.logger(project_id=project_id).info("project.worked") +""", + [ + EventEntry( + name="projects.project.worked", + level="info", + attributes=frozenset({"organisation.id", "project.id"}), + locations=[SourceLocation(path=PATH, line=16)], + ), + ], + [], + id="self-method-accessor-resolves-via-class-scope", + ), + pytest.param( + """\ +import structlog + +logger = structlog.get_logger("projects") + + +class BaseWorker: + organisation_id: str + + def logger(self, project_id: str) -> structlog.BoundLogger: + return logger.bind( + organisation__id=self.organisation_id, + project__id=project_id, + ) + + +class ProjectWorker(BaseWorker): + def do_work(self, project_id: str) -> None: + self.logger(project_id=project_id).info("project.worked") +""", + [ + EventEntry( + name="projects.project.worked", + level="info", + attributes=frozenset({"organisation.id", "project.id"}), + locations=[SourceLocation(path=PATH, line=18)], + ), + ], + [], + id="self-method-accessor-inherited-from-same-file-parent", + ), + pytest.param( + """\ +import structlog + + +class Worker: + def do_work(self) -> None: + self.logger.info("project.worked") + self.logger_factory().info("project.factoried") +""", + [], + [ + DocgenEventsWarning( + f"{PATH}:6: cannot resolve `self.logger.info(...)`:" + " `logger` isn't a tracked accessor on this class or any" + " same-file parent. Consider inlining the bind at the" + " call site or moving the accessor into this file." + ), + DocgenEventsWarning( + f"{PATH}:7: cannot resolve" + " `self.logger_factory(...).info(...)`:" + " `logger_factory` isn't a tracked accessor on this class" + " or any same-file parent. Consider inlining the bind at" + " the call site or moving the accessor into this file." + ), + ], + id="unresolvable-self-emit-warns-and-skips", + ), + pytest.param( + """\ +import structlog + +logger = structlog.get_logger("projects") + + +class ProjectWorker: + @classmethod + def logger(cls, project_id: str) -> structlog.BoundLogger: + return logger.bind(project__id=project_id) + + @classmethod + def do_work(cls, project_id: str) -> None: + cls.logger(project_id=project_id).info("project.worked") +""", + [ + EventEntry( + name="projects.project.worked", + level="info", + attributes=frozenset({"project.id"}), + locations=[SourceLocation(path=PATH, line=13)], + ), + ], + [], + id="cls-method-accessor-resolves-via-class-scope", + ), + pytest.param( + """\ +import structlog + +logger = structlog.get_logger("projects") + + +class ProjectWorker: + @property + def logger(self) -> structlog.BoundLogger: + \"\"\"Accessor with a docstring.\"\"\" + return logger.bind(worker="project") + + def do_work(self) -> None: + self.logger.info("project.worked") +""", + [ + EventEntry( + name="projects.project.worked", + level="info", + attributes=frozenset({"worker"}), + locations=[SourceLocation(path=PATH, line=13)], + ), + ], + [], + id="property-accessor-with-docstring-resolves-via-class-scope", + ), + pytest.param( + """\ +import structlog + +foo.bar.info("evt") +""", + [], + [], + id="non-self-attribute-chain-ignored", + ), + pytest.param( + """\ +import structlog + +some_factory().info("evt") +""", + [], + [], + id="non-attribute-func-target-ignored", + ), + pytest.param( + """\ +import structlog + +logger = structlog.get_logger("projects") + + +class Mixin: + def logger(self) -> structlog.BoundLogger: + return logger.bind(from_mixin=1) + + +class ProjectWorker(UnknownExternalBase, Mixin): + def do_work(self) -> None: + self.logger().info("project.worked") +""", + [ + EventEntry( + name="projects.project.worked", + level="info", + attributes=frozenset({"from_mixin"}), + locations=[SourceLocation(path=PATH, line=13)], + ), + ], + [], + id="multiple-bases-skip-unknown-and-inherit-from-known", + ), + ], +) +def test_get_event_entries_from_source__emit_log__expected_entries( + source: str, + expected_entries: list[EventEntry], + expected_warnings: list[DocgenEventsWarning], +) -> None: + # Given / When + with warnings.catch_warnings(record=True) as recorded: + warnings.simplefilter("always", DocgenEventsWarning) + entries = list( + get_event_entries_from_source( + source, + module_dotted=MODULE_DOTTED, + path=PATH, + ) + ) + + # Then + assert entries == expected_entries + assert [(w.category, str(w.message)) for w in recorded] == [ + (type(expected), str(expected)) for expected in expected_warnings + ] + + +OTHER_PATH = Path("projects/other.py") + + +@pytest.mark.parametrize( + "entries, expected_merged, expected_warnings", + [ + pytest.param( + [ + EventEntry( + name="code_references.scan.created", + level="info", + attributes=frozenset({"organisation.id"}), + locations=[SourceLocation(path=PATH, line=10)], + ), + EventEntry( + name="code_references.scan.created", + level="info", + attributes=frozenset({"code_references.count"}), + locations=[SourceLocation(path=OTHER_PATH, line=20)], + ), + ], + [ + EventEntry( + name="code_references.scan.created", + level="info", + attributes=frozenset({"organisation.id", "code_references.count"}), + locations=[ + SourceLocation(path=PATH, line=10), + SourceLocation(path=OTHER_PATH, line=20), + ], + ), + ], + [], + id="same-event-two-sites-collapses-with-union-attrs", + ), + pytest.param( + [ + EventEntry( + name="code_references.scan.created", + level="info", + attributes=frozenset({"organisation.id"}), + locations=[SourceLocation(path=PATH, line=10)], + ), + EventEntry( + name="code_references.scan.created", + level="info", + attributes=frozenset({"organisation.id"}), + locations=[SourceLocation(path=PATH, line=10)], + ), + ], + [ + EventEntry( + name="code_references.scan.created", + level="info", + attributes=frozenset({"organisation.id"}), + locations=[SourceLocation(path=PATH, line=10)], + ), + ], + [], + id="identical-locations-deduped", + ), + pytest.param( + [ + EventEntry( + name="code_references.scan.created", + level="info", + attributes=frozenset(), + locations=[SourceLocation(path=PATH, line=10)], + ), + EventEntry( + name="code_references.scan.created", + level="debug", + attributes=frozenset(), + locations=[SourceLocation(path=OTHER_PATH, line=20)], + ), + ], + [ + EventEntry( + name="code_references.scan.created", + level="info", + attributes=frozenset(), + locations=[ + SourceLocation(path=PATH, line=10), + SourceLocation(path=OTHER_PATH, line=20), + ], + ), + ], + [ + DocgenEventsWarning( + "`code_references.scan.created` is emitted at diverging" + f" log levels: `info` at {PATH}:10," + f" `debug` at {OTHER_PATH}:20." + " Keeping first-seen level `info`; reconcile the emission" + " sites to silence this warning." + ), + ], + id="diverging-level-warns-and-first-wins", + ), + pytest.param( + [ + EventEntry( + name="zulu", + level="info", + attributes=frozenset(), + locations=[SourceLocation(path=PATH, line=3)], + ), + EventEntry( + name="alpha", + level="info", + attributes=frozenset(), + locations=[SourceLocation(path=PATH, line=1)], + ), + EventEntry( + name="mike", + level="info", + attributes=frozenset(), + locations=[SourceLocation(path=PATH, line=2)], + ), + ], + [ + EventEntry( + name="alpha", + level="info", + attributes=frozenset(), + locations=[SourceLocation(path=PATH, line=1)], + ), + EventEntry( + name="mike", + level="info", + attributes=frozenset(), + locations=[SourceLocation(path=PATH, line=2)], + ), + EventEntry( + name="zulu", + level="info", + attributes=frozenset(), + locations=[SourceLocation(path=PATH, line=3)], + ), + ], + [], + id="output-sorted-alphabetically-by-event-name", + ), + ], +) +def test_merge_event_entries__various_entries__expected_merged_and_warnings( + entries: list[EventEntry], + expected_merged: list[EventEntry], + expected_warnings: list[DocgenEventsWarning], +) -> None: + # Given / When + with warnings.catch_warnings(record=True) as recorded: + warnings.simplefilter("always", DocgenEventsWarning) + merged = merge_event_entries(entries) + + # Then + assert merged == expected_merged + assert [(w.category, str(w.message)) for w in recorded] == [ + (type(expected), str(expected)) for expected in expected_warnings + ] + + +ROOT = Path("/fake/myapp") + + +@pytest.mark.parametrize( + "tree, app_label, expected_entries", + [ + pytest.param( + { + "views.py": """\ +import structlog +logger = structlog.get_logger("myapp") +logger.info("viewed") +""", + "nested/models.py": """\ +import structlog +logger = structlog.get_logger("myapp") +logger.info("modeled") +""", + }, + "myapp", + [ + EventEntry( + name="myapp.modeled", + level=ANY, + attributes=ANY, + locations=[ + SourceLocation(path=ROOT / "nested/models.py", line=3), + ], + ), + EventEntry( + name="myapp.viewed", + level=ANY, + attributes=ANY, + locations=[SourceLocation(path=ROOT / "views.py", line=3)], + ), + ], + id="walks-all-py-files-across-subdirs", + ), + pytest.param( + { + "views.py": """\ +import structlog +logger = structlog.get_logger("myapp") +logger.info("included") +""", + "migrations/0001_initial.py": """\ +import structlog +logger = structlog.get_logger("myapp") +logger.info("from_migration") +""", + "tests/test_views.py": """\ +import structlog +logger = structlog.get_logger("myapp") +logger.info("from_test_subdir") +""", + "conftest.py": """\ +import structlog +logger = structlog.get_logger("myapp") +logger.info("from_conftest") +""", + "test_thing.py": """\ +import structlog +logger = structlog.get_logger("myapp") +logger.info("from_test_module") +""", + }, + "myapp", + [ + EventEntry( + name="myapp.included", + level=ANY, + attributes=ANY, + locations=[SourceLocation(path=ROOT / "views.py", line=3)], + ), + ], + id="skips-migrations-tests-conftest-test-modules", + ), + pytest.param( + { + "views.py": """\ +import structlog +logger = structlog.get_logger("myapp") +logger.info("viewed") +""", + "management/commands/do_thing.py": """\ +import structlog +logger = structlog.get_logger("myapp") +logger.info("from_command") +""", + }, + "myapp", + [ + EventEntry( + name="myapp.viewed", + level=ANY, + attributes=ANY, + locations=[SourceLocation(path=ROOT / "views.py", line=3)], + ), + ], + id="skips-management-commands-by-default", + ), + pytest.param( + { + "views.py": """\ +import structlog +logger = structlog.get_logger("task_processor") +logger.info("setup") +""", + "management/commands/run_processor.py": """\ +import structlog +logger = structlog.get_logger("task_processor") +logger.info("task.dispatched") +""", + }, + "task_processor", + [ + EventEntry( + name="task_processor.setup", + level=ANY, + attributes=ANY, + locations=[SourceLocation(path=ROOT / "views.py", line=3)], + ), + EventEntry( + name="task_processor.task.dispatched", + level=ANY, + attributes=ANY, + locations=[ + SourceLocation( + path=ROOT / "management/commands/run_processor.py", + line=3, + ), + ], + ), + ], + id="includes-management-commands-for-task-processor-app", + ), + ], +) +def test_get_event_entries_from_tree__various_trees__expected_entries( + fs: FakeFilesystem, + tree: dict[str, str], + app_label: str, + expected_entries: list[EventEntry], +) -> None: + # Given + for relative, source in tree.items(): + fs.create_file(str(ROOT / relative), contents=source) + + # When + entries = sorted( + get_event_entries_from_tree(ROOT, app_label=app_label, module_prefix=app_label), + key=lambda e: e.name, + ) + + # Then + assert entries == expected_entries