diff --git a/docs/api/configuration.md b/docs/api/configuration.md index 963c9e5..3ab8338 100644 --- a/docs/api/configuration.md +++ b/docs/api/configuration.md @@ -19,7 +19,7 @@ from botanu.sdk.config import BotanuConfig | `auto_detect_resources` | `bool` | `True` | Auto-detect cloud resources | | `otlp_endpoint` | `str` | From env / auto-configured when `BOTANU_API_KEY` is set / `"http://localhost:4318"` | OTLP endpoint | | `otlp_headers` | `dict` | `None` | Custom headers for OTLP exporter — always honored | -| `content_capture_rate` | `float` | `0.0` | Prompt/response capture rate (0.0–1.0). See [Content Capture](../tracking/content-capture.md). | +| `content_capture_rate` | `float` | `0.10` | Prompt/response capture rate (0.0–1.0). Default 10% sample. See [Content Capture](../tracking/content-capture.md). | | `pii_scrub_enabled` | `bool` | `True` | In-process PII scrub of captured content | | `pii_scrub_use_presidio` | `bool` | `False` | Add Microsoft Presidio NER to the scrub pipeline | | `max_export_batch_size` | `int` | `512` | Max spans per batch | diff --git a/docs/getting-started/configuration.md b/docs/getting-started/configuration.md index b64fbcb..a7c3f2e 100644 --- a/docs/getting-started/configuration.md +++ b/docs/getting-started/configuration.md @@ -80,7 +80,7 @@ class BotanuConfig: schedule_delay_millis: int = 5000 export_timeout_millis: int = 30000 - content_capture_rate: float = 0.0 + content_capture_rate: float = 0.10 ``` `BOTANU_API_KEY` is not a field on the dataclass — when the env var is set, `BotanuConfig` auto-configures `otlp_endpoint` + `otlp_headers` for the botanu-trusted endpoint. diff --git a/scripts/pre_publish_check.py b/scripts/pre_publish_check.py index ce36153..1fba724 100644 --- a/scripts/pre_publish_check.py +++ b/scripts/pre_publish_check.py @@ -97,7 +97,7 @@ def run( capture: bool = True, ) -> Tuple[int, str, str]: """Run a command and return (returncode, stdout, stderr).""" - result = subprocess.run( + result = subprocess.run( # noqa: S603 cmd, cwd=str(cwd) if cwd else None, env=env, @@ -249,7 +249,7 @@ def check_api_surface(venv: Path) -> bool: print(DIM + (err or out)[-1500:] + RESET) return False if "MISSING:" in out and "ALL OK" not in out: - missing_line = [line for line in out.split("\n") if "MISSING:" in line][0] + missing_line = next(line for line in out.split("\n") if "MISSING:" in line) fail(missing_line) return False exports = [line for line in out.split("\n") if line.startswith("EXPORTS:")] @@ -346,7 +346,7 @@ def check_smoke_test(venv: Path) -> bool: "OTEL_LOGS_EXPORTER": "console", "OTEL_METRICS_EXPORTER": "none", } - code, out, err = run([str(py), "-c", SMOKE_TEST_SCRIPT], env=env) + _code, out, err = run([str(py), "-c", SMOKE_TEST_SCRIPT], env=env) if "SMOKE_OK" in out: ok("decorator + outcome + validation all pass") return True @@ -383,12 +383,12 @@ def main() -> int: step(3, total, "python -m build") if not check_build(): - return summarize(results + [False]) + return summarize([*results, False]) results.append(True) step(4, total, "twine check") if not check_twine(): - return summarize(results + [False]) + return summarize([*results, False]) results.append(True) step(5, total, "create clean venv + install wheel") @@ -396,24 +396,24 @@ def main() -> int: venv_dir = make_venv() except RuntimeError as e: fail(str(e)) - return summarize(results + [False]) + return summarize([*results, False]) if not check_install(venv_dir): - return summarize(results + [False]) + return summarize([*results, False]) results.append(True) step(6, total, "version string") if not check_version(venv_dir): - return summarize(results + [False]) + return summarize([*results, False]) results.append(True) step(7, total, "public API surface (__all__)") if not check_api_surface(venv_dir): - return summarize(results + [False]) + return summarize([*results, False]) results.append(True) step(8, total, "end-to-end smoke test") if not check_smoke_test(venv_dir): - return summarize(results + [False]) + return summarize([*results, False]) results.append(True) finally: diff --git a/src/botanu/models/run_context.py b/src/botanu/models/run_context.py index e5283ec..f2752c3 100644 --- a/src/botanu/models/run_context.py +++ b/src/botanu/models/run_context.py @@ -279,10 +279,6 @@ def to_span_attributes(self) -> Dict[str, Union[str, float, int, bool]]: if self.cancelled_at: attrs["botanu.run.cancelled_at"] = self.cancelled_at if self.outcome: - # `botanu.outcome.status` is NOT emitted (removed 2026-04-16): - # customer-reported outcome is trivially fakeable. Event outcome - # is derived from eval verdict rollup / HITL / SoR instead. - # Remaining fields are diagnostic only and stay for debugging. if self.outcome.reason_code: attrs["botanu.outcome.reason_code"] = self.outcome.reason_code if self.outcome.error_class: diff --git a/src/botanu/sdk/bootstrap.py b/src/botanu/sdk/bootstrap.py index e508817..05ae3e9 100644 --- a/src/botanu/sdk/bootstrap.py +++ b/src/botanu/sdk/bootstrap.py @@ -163,11 +163,22 @@ def enable( from botanu.sdk.config import _redact_url_credentials logger.info( - "Initializing Botanu SDK: service=%s, env=%s, endpoint=%s", + "Initializing Botanu SDK: service=%s, env=%s, endpoint=%s, content_capture_rate=%s", cfg.service_name, cfg.deployment_environment, _redact_url_credentials(traces_endpoint), + cfg.content_capture_rate, ) + if cfg.content_capture_rate <= 0.0: + # Louder signal when the customer explicitly turned capture off — + # evaluator judge will no-op on every span. Not a failure, just a + # disable-by-choice worth flagging once at startup so a bug hunt + # for "why are eval rollups empty" ends here (Codex 2026-04-24 P0 #1). + logger.warning( + "Botanu content_capture_rate=0.0 — set_input_content / set_output_content " + "will not write span attributes, and the L2 evaluator judge will return " + "no test case for any span. See docs/tracking/content-capture.md to enable." + ) try: from opentelemetry import trace diff --git a/src/botanu/sdk/config.py b/src/botanu/sdk/config.py index 4c0e537..7e6c923 100644 --- a/src/botanu/sdk/config.py +++ b/src/botanu/sdk/config.py @@ -126,14 +126,23 @@ class BotanuConfig: schedule_delay_millis: int = 5000 export_timeout_millis: int = 30000 - # Content capture for eval — 0.0 disables entirely (default, privacy-safe). - # Set to 1.0 for sandbox/shadow, 0.10-0.20 for production. Customers must also - # call set_input_content() / set_output_content() on their trackers; this rate - # gates whether those calls actually write to span attributes. In-process PII - # scrubbing runs on the captured text before it hits the span (see - # pii_scrub_* fields below); collector regex + evaluator Presidio NER - # remain belt-and-suspenders. - content_capture_rate: float = 0.0 + # Content capture for eval — 0.10 default (~10% sample). Pre-2026-04-24 + # this defaulted to 0.0, which meant the default install silently + # produced no judgeable content: set_input_content / set_output_content + # no-op'd, the L2 judge returned empty test cases, and eval rollups + # stayed pending forever. Customers had to read the docs AND flip a + # config flag before the "SDK in, eval out" loop worked at all. + # + # 0.10 is what the configuration docs recommend for production; it's + # enough traffic for the judge to have real cases while keeping PII + # exposure + storage cost bounded. Three PII-defense layers still + # fire on every captured attribute: + # 1. SDK in-process scrub (pii_scrub_enabled — default True below) + # 2. Collector credential-regex scrub on content attribute prefixes + # 3. Evaluator Presidio NER pass before judge calls + # Set to 1.0 for sandbox/shadow, 0.0 to fully disable (e.g. HIPAA + # where any content capture is a legal hazard). + content_capture_rate: float = 0.10 # In-process PII scrubbing — runs on text passed to set_input_content / # set_output_content / set_retrieval_content before the span attribute is @@ -424,7 +433,7 @@ def _from_dict( max_queue_size=export.get("queue_size", 65536), schedule_delay_millis=export.get("delay_ms", 5000), export_timeout_millis=export.get("export_timeout_ms", 30000), - content_capture_rate=max(0.0, min(1.0, float(eval_cfg.get("content_capture_rate", 0.0)))), + content_capture_rate=max(0.0, min(1.0, float(eval_cfg.get("content_capture_rate", 0.10)))), pii_scrub_enabled=bool(pii_cfg.get("enabled", True)), pii_scrub_disable_patterns=pii_cfg.get("disable_patterns"), pii_scrub_custom_patterns=pii_cfg.get("custom_patterns"), diff --git a/tests/unit/test_config.py b/tests/unit/test_config.py index 23c8d50..f609d26 100644 --- a/tests/unit/test_config.py +++ b/tests/unit/test_config.py @@ -454,12 +454,20 @@ def test_default_packages(self): class TestContentCaptureRate: """Tests for the content_capture_rate field.""" - def test_default_is_zero(self): - """Privacy-safe default: no content captured unless explicitly enabled.""" + def test_default_is_ten_percent(self): + """Default changed from 0.0 to 0.10 on 2026-04-24 (Codex P0 #1). + + Pre-change: SDK worked out of the box except the evaluator judge + silently returned no test case for every span because the helper + methods (set_input_content / set_output_content) gated on rate > 0.0. + 0.10 matches the docs' recommended production value; three PII-defense + layers (SDK scrub + collector credential scrub + Presidio NER) still + fire on every captured attribute. + """ with mock.patch.dict(os.environ, {}, clear=True): os.environ.pop("BOTANU_CONTENT_CAPTURE_RATE", None) config = BotanuConfig() - assert config.content_capture_rate == 0.0 + assert config.content_capture_rate == 0.10 def test_explicit_value_respected(self): config = BotanuConfig(content_capture_rate=0.15) @@ -486,7 +494,8 @@ def test_env_var_invalid_ignored(self): """Invalid env values are ignored (default retained).""" with mock.patch.dict(os.environ, {"BOTANU_CONTENT_CAPTURE_RATE": "not_a_number"}): config = BotanuConfig() - assert config.content_capture_rate == 0.0 + # Default since 2026-04-24 is 0.10, not 0.0. + assert config.content_capture_rate == 0.10 def test_to_dict_roundtrip(self): config = BotanuConfig(content_capture_rate=0.1)