Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/api/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ from botanu.sdk.config import BotanuConfig
| `auto_detect_resources` | `bool` | `True` | Auto-detect cloud resources |
| `otlp_endpoint` | `str` | From env / auto-configured when `BOTANU_API_KEY` is set / `"http://localhost:4318"` | OTLP endpoint |
| `otlp_headers` | `dict` | `None` | Custom headers for OTLP exporter — always honored |
| `content_capture_rate` | `float` | `0.0` | Prompt/response capture rate (0.0–1.0). See [Content Capture](../tracking/content-capture.md). |
| `content_capture_rate` | `float` | `0.10` | Prompt/response capture rate (0.0–1.0). Default 10% sample. See [Content Capture](../tracking/content-capture.md). |
| `pii_scrub_enabled` | `bool` | `True` | In-process PII scrub of captured content |
| `pii_scrub_use_presidio` | `bool` | `False` | Add Microsoft Presidio NER to the scrub pipeline |
| `max_export_batch_size` | `int` | `512` | Max spans per batch |
Expand Down
2 changes: 1 addition & 1 deletion docs/getting-started/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ class BotanuConfig:
schedule_delay_millis: int = 5000
export_timeout_millis: int = 30000

content_capture_rate: float = 0.0
content_capture_rate: float = 0.10
```

`BOTANU_API_KEY` is not a field on the dataclass — when the env var is set, `BotanuConfig` auto-configures `otlp_endpoint` + `otlp_headers` for the botanu-trusted endpoint.
Expand Down
20 changes: 10 additions & 10 deletions scripts/pre_publish_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def run(
capture: bool = True,
) -> Tuple[int, str, str]:
"""Run a command and return (returncode, stdout, stderr)."""
result = subprocess.run(
result = subprocess.run( # noqa: S603
cmd,
cwd=str(cwd) if cwd else None,
env=env,
Expand Down Expand Up @@ -249,7 +249,7 @@ def check_api_surface(venv: Path) -> bool:
print(DIM + (err or out)[-1500:] + RESET)
return False
if "MISSING:" in out and "ALL OK" not in out:
missing_line = [line for line in out.split("\n") if "MISSING:" in line][0]
missing_line = next(line for line in out.split("\n") if "MISSING:" in line)
fail(missing_line)
return False
exports = [line for line in out.split("\n") if line.startswith("EXPORTS:")]
Expand Down Expand Up @@ -346,7 +346,7 @@ def check_smoke_test(venv: Path) -> bool:
"OTEL_LOGS_EXPORTER": "console",
"OTEL_METRICS_EXPORTER": "none",
}
code, out, err = run([str(py), "-c", SMOKE_TEST_SCRIPT], env=env)
_code, out, err = run([str(py), "-c", SMOKE_TEST_SCRIPT], env=env)
if "SMOKE_OK" in out:
ok("decorator + outcome + validation all pass")
return True
Expand Down Expand Up @@ -383,37 +383,37 @@ def main() -> int:

step(3, total, "python -m build")
if not check_build():
return summarize(results + [False])
return summarize([*results, False])
results.append(True)

step(4, total, "twine check")
if not check_twine():
return summarize(results + [False])
return summarize([*results, False])
results.append(True)

step(5, total, "create clean venv + install wheel")
try:
venv_dir = make_venv()
except RuntimeError as e:
fail(str(e))
return summarize(results + [False])
return summarize([*results, False])
if not check_install(venv_dir):
return summarize(results + [False])
return summarize([*results, False])
results.append(True)

step(6, total, "version string")
if not check_version(venv_dir):
return summarize(results + [False])
return summarize([*results, False])
results.append(True)

step(7, total, "public API surface (__all__)")
if not check_api_surface(venv_dir):
return summarize(results + [False])
return summarize([*results, False])
results.append(True)

step(8, total, "end-to-end smoke test")
if not check_smoke_test(venv_dir):
return summarize(results + [False])
return summarize([*results, False])
results.append(True)

finally:
Expand Down
4 changes: 0 additions & 4 deletions src/botanu/models/run_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,10 +279,6 @@ def to_span_attributes(self) -> Dict[str, Union[str, float, int, bool]]:
if self.cancelled_at:
attrs["botanu.run.cancelled_at"] = self.cancelled_at
if self.outcome:
# `botanu.outcome.status` is NOT emitted (removed 2026-04-16):
# customer-reported outcome is trivially fakeable. Event outcome
# is derived from eval verdict rollup / HITL / SoR instead.
# Remaining fields are diagnostic only and stay for debugging.
if self.outcome.reason_code:
attrs["botanu.outcome.reason_code"] = self.outcome.reason_code
if self.outcome.error_class:
Expand Down
13 changes: 12 additions & 1 deletion src/botanu/sdk/bootstrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,11 +163,22 @@ def enable(
from botanu.sdk.config import _redact_url_credentials

logger.info(
"Initializing Botanu SDK: service=%s, env=%s, endpoint=%s",
"Initializing Botanu SDK: service=%s, env=%s, endpoint=%s, content_capture_rate=%s",
cfg.service_name,
cfg.deployment_environment,
_redact_url_credentials(traces_endpoint),
cfg.content_capture_rate,
)
if cfg.content_capture_rate <= 0.0:
# Louder signal when the customer explicitly turned capture off —
# evaluator judge will no-op on every span. Not a failure, just a
# disable-by-choice worth flagging once at startup so a bug hunt
# for "why are eval rollups empty" ends here (Codex 2026-04-24 P0 #1).
logger.warning(
"Botanu content_capture_rate=0.0 — set_input_content / set_output_content "
"will not write span attributes, and the L2 evaluator judge will return "
"no test case for any span. See docs/tracking/content-capture.md to enable."
)

try:
from opentelemetry import trace
Expand Down
27 changes: 18 additions & 9 deletions src/botanu/sdk/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,14 +126,23 @@ class BotanuConfig:
schedule_delay_millis: int = 5000
export_timeout_millis: int = 30000

# Content capture for eval — 0.0 disables entirely (default, privacy-safe).
# Set to 1.0 for sandbox/shadow, 0.10-0.20 for production. Customers must also
# call set_input_content() / set_output_content() on their trackers; this rate
# gates whether those calls actually write to span attributes. In-process PII
# scrubbing runs on the captured text before it hits the span (see
# pii_scrub_* fields below); collector regex + evaluator Presidio NER
# remain belt-and-suspenders.
content_capture_rate: float = 0.0
# Content capture for eval — 0.10 default (~10% sample). Pre-2026-04-24
# this defaulted to 0.0, which meant the default install silently
# produced no judgeable content: set_input_content / set_output_content
# no-op'd, the L2 judge returned empty test cases, and eval rollups
# stayed pending forever. Customers had to read the docs AND flip a
# config flag before the "SDK in, eval out" loop worked at all.
#
# 0.10 is what the configuration docs recommend for production; it's
# enough traffic for the judge to have real cases while keeping PII
# exposure + storage cost bounded. Three PII-defense layers still
# fire on every captured attribute:
# 1. SDK in-process scrub (pii_scrub_enabled — default True below)
# 2. Collector credential-regex scrub on content attribute prefixes
# 3. Evaluator Presidio NER pass before judge calls
# Set to 1.0 for sandbox/shadow, 0.0 to fully disable (e.g. HIPAA
# where any content capture is a legal hazard).
content_capture_rate: float = 0.10

# In-process PII scrubbing — runs on text passed to set_input_content /
# set_output_content / set_retrieval_content before the span attribute is
Expand Down Expand Up @@ -424,7 +433,7 @@ def _from_dict(
max_queue_size=export.get("queue_size", 65536),
schedule_delay_millis=export.get("delay_ms", 5000),
export_timeout_millis=export.get("export_timeout_ms", 30000),
content_capture_rate=max(0.0, min(1.0, float(eval_cfg.get("content_capture_rate", 0.0)))),
content_capture_rate=max(0.0, min(1.0, float(eval_cfg.get("content_capture_rate", 0.10)))),
pii_scrub_enabled=bool(pii_cfg.get("enabled", True)),
pii_scrub_disable_patterns=pii_cfg.get("disable_patterns"),
pii_scrub_custom_patterns=pii_cfg.get("custom_patterns"),
Expand Down
17 changes: 13 additions & 4 deletions tests/unit/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -454,12 +454,20 @@ def test_default_packages(self):
class TestContentCaptureRate:
"""Tests for the content_capture_rate field."""

def test_default_is_zero(self):
"""Privacy-safe default: no content captured unless explicitly enabled."""
def test_default_is_ten_percent(self):
"""Default changed from 0.0 to 0.10 on 2026-04-24 (Codex P0 #1).

Pre-change: SDK worked out of the box except the evaluator judge
silently returned no test case for every span because the helper
methods (set_input_content / set_output_content) gated on rate > 0.0.
0.10 matches the docs' recommended production value; three PII-defense
layers (SDK scrub + collector credential scrub + Presidio NER) still
fire on every captured attribute.
"""
with mock.patch.dict(os.environ, {}, clear=True):
os.environ.pop("BOTANU_CONTENT_CAPTURE_RATE", None)
config = BotanuConfig()
assert config.content_capture_rate == 0.0
assert config.content_capture_rate == 0.10

def test_explicit_value_respected(self):
config = BotanuConfig(content_capture_rate=0.15)
Expand All @@ -486,7 +494,8 @@ def test_env_var_invalid_ignored(self):
"""Invalid env values are ignored (default retained)."""
with mock.patch.dict(os.environ, {"BOTANU_CONTENT_CAPTURE_RATE": "not_a_number"}):
config = BotanuConfig()
assert config.content_capture_rate == 0.0
# Default since 2026-04-24 is 0.10, not 0.0.
assert config.content_capture_rate == 0.10

def test_to_dict_roundtrip(self):
config = BotanuConfig(content_capture_rate=0.1)
Expand Down
Loading