From aff7448e56bd89a8551821a8fabfb91e2c348f5c Mon Sep 17 00:00:00 2001 From: Deborah Jacob Date: Thu, 12 Mar 2026 12:39:35 -0400 Subject: [PATCH 1/4] feat: auto-configure Botanu Cloud endpoint from BOTANU_API_KEY When BOTANU_API_KEY is set and no explicit endpoint is configured, the SDK now defaults to https://ingest.botanu.ai:4318 and sets the Authorization header automatically. Also adds OTEL_EXPORTER_OTLP_ENDPOINT as a fallback and cleans up README badges. Co-Authored-By: Claude Opus 4.6 --- README.md | 5 ++--- src/botanu/sdk/config.py | 17 +++++++++++---- tests/unit/test_config.py | 45 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 60 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index bdf9393..d2833d7 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,12 @@ # Botanu SDK for Python -[![CI](https://github.com/botanu-ai/botanu-sdk-python/actions/workflows/ci.yml/badge.svg)](https://github.com/botanu-ai/botanu-sdk-python/actions/workflows/ci.yml) -[![PyPI version](https://img.shields.io/pypi/v/botanu)](https://pypi.org/project/botanu/) -[![Python](https://img.shields.io/badge/python-3.9%2B-blue)](https://www.python.org/) [![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](./LICENSE) Event-level cost attribution for AI workflows, built on [OpenTelemetry](https://opentelemetry.io/). + + An **event** is one business transaction — resolving a support ticket, processing an order, generating a report. Each event may involve multiple **runs** (LLM calls, retries, sub-workflows) across multiple services. By correlating every run to a diff --git a/src/botanu/sdk/config.py b/src/botanu/sdk/config.py index 525074b..72cac39 100644 --- a/src/botanu/sdk/config.py +++ b/src/botanu/sdk/config.py @@ -133,18 +133,27 @@ def __post_init__(self) -> None: os.getenv("OTEL_DEPLOYMENT_ENVIRONMENT", "production"), ) + botanu_api_key = os.getenv("BOTANU_API_KEY") + if self.otlp_endpoint is None: - # Check BOTANU_COLLECTOR_ENDPOINT first, then OTEL_* vars botanu_endpoint = os.getenv("BOTANU_COLLECTOR_ENDPOINT") if botanu_endpoint: self.otlp_endpoint = botanu_endpoint else: - env_endpoint = os.getenv("OTEL_EXPORTER_OTLP_TRACES_ENDPOINT") + env_endpoint = ( + os.getenv("OTEL_EXPORTER_OTLP_TRACES_ENDPOINT") + or os.getenv("OTEL_EXPORTER_OTLP_ENDPOINT") + ) if env_endpoint: self.otlp_endpoint = env_endpoint + elif botanu_api_key: + # API key implies Botanu Cloud — auto-configure endpoint + self.otlp_endpoint = "https://ingest.botanu.ai:4318" else: - base = os.getenv("OTEL_EXPORTER_OTLP_ENDPOINT", "http://localhost:4318") - self.otlp_endpoint = base + self.otlp_endpoint = "http://localhost:4318" + + if self.otlp_headers is None and botanu_api_key: + self.otlp_headers = {"Authorization": f"Bearer {botanu_api_key}"} env_propagation_mode = os.getenv("BOTANU_PROPAGATION_MODE") if env_propagation_mode and env_propagation_mode in ("lean", "full"): diff --git a/tests/unit/test_config.py b/tests/unit/test_config.py index 88eb9cb..bc9d2a2 100644 --- a/tests/unit/test_config.py +++ b/tests/unit/test_config.py @@ -346,6 +346,51 @@ def test_auto_detect_resources_truthy_values(self): assert config.auto_detect_resources is True +class TestBotanuApiKeyAutoConfig: + """Tests for BOTANU_API_KEY auto-configuring endpoint and auth header.""" + + def test_api_key_auto_endpoint(self): + with mock.patch.dict( + os.environ, + {"BOTANU_API_KEY": "btnu_live_test"}, + clear=False, + ): + os.environ.pop("BOTANU_COLLECTOR_ENDPOINT", None) + os.environ.pop("OTEL_EXPORTER_OTLP_ENDPOINT", None) + os.environ.pop("OTEL_EXPORTER_OTLP_TRACES_ENDPOINT", None) + config = BotanuConfig() + assert config.otlp_endpoint == "https://ingest.botanu.ai:4318" + + def test_api_key_auto_header(self): + with mock.patch.dict(os.environ, {"BOTANU_API_KEY": "btnu_live_test"}): + config = BotanuConfig() + assert config.otlp_headers == {"Authorization": "Bearer btnu_live_test"} + + def test_explicit_endpoint_overrides_api_key(self): + with mock.patch.dict( + os.environ, + { + "BOTANU_API_KEY": "btnu_live_test", + "BOTANU_COLLECTOR_ENDPOINT": "http://custom:4318", + }, + ): + config = BotanuConfig() + assert config.otlp_endpoint == "http://custom:4318" + # Header is still set from API key + assert config.otlp_headers == {"Authorization": "Bearer btnu_live_test"} + + def test_no_api_key_localhost_default(self): + env = {k: v for k, v in os.environ.items()} + env.pop("BOTANU_API_KEY", None) + env.pop("BOTANU_COLLECTOR_ENDPOINT", None) + env.pop("OTEL_EXPORTER_OTLP_ENDPOINT", None) + env.pop("OTEL_EXPORTER_OTLP_TRACES_ENDPOINT", None) + with mock.patch.dict(os.environ, env, clear=True): + config = BotanuConfig() + assert config.otlp_endpoint == "http://localhost:4318" + assert config.otlp_headers is None + + class TestBotanuConfigAutoInstrument: """Tests for auto-instrumentation configuration.""" From c83c6894719d6b39d62b01c98257cb95cefbe96e Mon Sep 17 00:00:00 2001 From: Deborah Jacob Date: Wed, 8 Apr 2026 20:52:09 -0700 Subject: [PATCH 2/4] Add brownfield OTel coexistence, step param, pre-publish gate Introduces SampledSpanProcessor that preserves the host app's existing TracerProvider sampling ratio when botanu is bootstrapped into a project that already has OpenTelemetry wired up. register.py exposes an explicit entry point so existing-OTel users can opt in without the decorator touching their provider. bootstrap.py detects a configured provider and hands off instead of overriding it. Decorators accept an optional step parameter (stored in RunContext, not yet emitted to span attributes) so multi-step workflow plumbing can land without breaking the 0.x contract -- kept backward compatible until the collector servicegraph work unblocks and we start emitting it. scripts/pre_publish_check.py is a red/green gate that builds the wheel, runs twine check, installs into a fresh venv, validates the public API surface, and runs a decorator + emit_outcome smoke test. Safe to run on Windows (ASCII markers, no unicode). Also updates CI, CodeQL, repolinter, scorecard, and release workflows plus the collector / existing-otel / installation docs to match the new bootstrap flow. Co-Authored-By: Claude Opus 4.6 (1M context) --- .github/workflows/ci.yml | 24 +- .github/workflows/codeql.yml | 8 +- .github/workflows/release.yml | 18 +- .github/workflows/repolinter.yml | 4 +- .github/workflows/scorecard.yml | 6 +- docs/getting-started/installation.md | 28 +- docs/integration/collector.md | 444 ++++----------------------- docs/integration/existing-otel.md | 310 ++++--------------- scripts/pre_publish_check.py | 443 ++++++++++++++++++++++++++ src/botanu/__init__.py | 6 + src/botanu/models/run_context.py | 1 + src/botanu/processors/__init__.py | 3 +- src/botanu/processors/sampled.py | 86 ++++++ src/botanu/register.py | 50 +++ src/botanu/sdk/bootstrap.py | 111 ++++++- src/botanu/sdk/config.py | 4 +- src/botanu/sdk/decorators.py | 16 +- tests/unit/test_bootstrap.py | 218 +++++++++++++ tests/unit/test_config.py | 2 +- 19 files changed, 1086 insertions(+), 696 deletions(-) create mode 100644 scripts/pre_publish_check.py create mode 100644 src/botanu/processors/sampled.py create mode 100644 src/botanu/register.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 7be92e0..0703828 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -19,8 +19,8 @@ jobs: lint: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 - - uses: actions/setup-python@v5 + - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 + - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 with: python-version: "3.12" - run: pip install ruff @@ -33,8 +33,8 @@ jobs: typecheck: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 - - uses: actions/setup-python@v5 + - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 + - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 with: python-version: "3.12" - run: pip install -e ".[dev]" @@ -50,11 +50,11 @@ jobs: matrix: python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"] steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 with: fetch-depth: 0 # hatch-vcs needs full history - - uses: actions/setup-python@v5 + - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 with: python-version: ${{ matrix.python-version }} @@ -66,7 +66,7 @@ jobs: - name: Upload coverage if: matrix.python-version == '3.12' - uses: codecov/codecov-action@v4 + uses: codecov/codecov-action@b9fd7d16f6d7d1b5d2bec1a2887e65ceed900238 # v4 with: file: coverage.xml fail_ci_if_error: false @@ -77,15 +77,15 @@ jobs: build: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 with: fetch-depth: 0 - - uses: actions/setup-python@v5 + - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 with: python-version: "3.12" - run: pip install build - run: python -m build - - uses: actions/upload-artifact@v4 + - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4 with: name: dist path: dist/ @@ -97,10 +97,10 @@ jobs: runs-on: ubuntu-latest if: github.event_name == 'pull_request' steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 with: fetch-depth: 0 - name: DCO check - uses: christophebedard/dco-check@0.5.0 + uses: christophebedard/dco-check@7b0205d25ead0f898e0b706b58227dd5fa7e3f55 # 0.5.0 env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index b0d5105..2f0597b 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -24,17 +24,17 @@ jobs: matrix: language: [python] steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 - name: Initialize CodeQL - uses: github/codeql-action/init@v3 + uses: github/codeql-action/init@ebcb5b36ded6beda4ceefea6a8bc4cc885255bb3 # v3 with: languages: ${{ matrix.language }} - name: Autobuild - uses: github/codeql-action/autobuild@v3 + uses: github/codeql-action/autobuild@ebcb5b36ded6beda4ceefea6a8bc4cc885255bb3 # v3 - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@v3 + uses: github/codeql-action/analyze@ebcb5b36ded6beda4ceefea6a8bc4cc885255bb3 # v3 with: category: "/language:${{ matrix.language }}" diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index ad395fd..0981930 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -28,11 +28,11 @@ jobs: build: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 with: fetch-depth: 0 # hatch-vcs needs full history - - uses: actions/setup-python@v5 + - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 with: python-version: "3.12" @@ -48,7 +48,7 @@ jobs: - name: List build artifacts run: ls -la dist/ - - uses: actions/upload-artifact@v4 + - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4 with: name: dist path: dist/ @@ -71,13 +71,13 @@ jobs: permissions: id-token: write # required for OIDC trusted publishing steps: - - uses: actions/download-artifact@v4 + - uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4 with: name: dist path: dist/ - name: Publish to TestPyPI - uses: pypa/gh-action-pypi-publish@release/v1 + uses: pypa/gh-action-pypi-publish@ed0c53931b1dc9bd32cbe73a98c7f6766f8a527e # release/v1 with: repository-url: https://test.pypi.org/legacy/ skip-existing: true @@ -99,13 +99,13 @@ jobs: permissions: id-token: write # required for OIDC trusted publishing steps: - - uses: actions/download-artifact@v4 + - uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4 with: name: dist path: dist/ - name: Publish to PyPI - uses: pypa/gh-action-pypi-publish@release/v1 + uses: pypa/gh-action-pypi-publish@ed0c53931b1dc9bd32cbe73a98c7f6766f8a527e # release/v1 # ------------------------------------------------------------------- # Create GitHub Release with auto-generated notes @@ -117,11 +117,11 @@ jobs: permissions: contents: write steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 with: fetch-depth: 0 - - uses: actions/download-artifact@v4 + - uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4 with: name: dist path: dist/ diff --git a/.github/workflows/repolinter.yml b/.github/workflows/repolinter.yml index 3f1add9..2c70fa1 100644 --- a/.github/workflows/repolinter.yml +++ b/.github/workflows/repolinter.yml @@ -16,9 +16,9 @@ jobs: lint: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 - name: Run Repolinter - uses: todogroup/repolinter-action@v1 + uses: todogroup/repolinter-action@4d478dcd860571382da7d512d6dc6dd5f554fbb2 # v1 with: config_file: .github/repolinter.json diff --git a/.github/workflows/scorecard.yml b/.github/workflows/scorecard.yml index 2e56bfc..5e2529b 100644 --- a/.github/workflows/scorecard.yml +++ b/.github/workflows/scorecard.yml @@ -18,18 +18,18 @@ jobs: security-events: write # upload SARIF id-token: write # publish results steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 with: persist-credentials: false - name: Run OpenSSF Scorecard - uses: ossf/scorecard-action@v2 + uses: ossf/scorecard-action@4eaacf0543bb3f2c246792bd56e8cdeffafb205a # v2.4.3 with: results_file: results.sarif results_format: sarif publish_results: true - name: Upload SARIF to GitHub Security tab - uses: github/codeql-action/upload-sarif@v3 + uses: github/codeql-action/upload-sarif@ebcb5b36ded6beda4ceefea6a8bc4cc885255bb3 # v3 with: sarif_file: results.sarif diff --git a/docs/getting-started/installation.md b/docs/getting-started/installation.md index 3591b72..48837b7 100644 --- a/docs/getting-started/installation.md +++ b/docs/getting-started/installation.md @@ -3,7 +3,6 @@ ## Requirements - Python 3.9 or later -- OpenTelemetry Collector (recommended for production) ## Install @@ -18,6 +17,16 @@ One install gives you everything: Instrumentation packages are lightweight shims that silently no-op when the target library is not installed. Zero bloat. +## Configure + +Set your API key as an environment variable. The SDK auto-configures the OTLP endpoint to `ingest.botanu.ai` — no other configuration needed. + +```bash +export BOTANU_API_KEY="btnu_live_..." +``` + +That's it. No collector to run, no infrastructure to deploy. Botanu hosts everything. + ## Verify ```python @@ -47,6 +56,7 @@ FROM python:3.12-slim WORKDIR /app RUN pip install botanu COPY . . +ENV BOTANU_API_KEY="btnu_live_..." CMD ["python", "app.py"] ``` @@ -58,22 +68,6 @@ For running tests and linting: pip install "botanu[dev]" ``` -## Collector Setup - -The SDK sends traces to an OpenTelemetry Collector via OTLP HTTP (port 4318). Configure the endpoint via environment variable: - -```bash -export OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318 -``` - -Quick start with Docker: - -```bash -docker run -p 4318:4318 otel/opentelemetry-collector:latest -``` - -See [Collector Configuration](../integration/collector.md) for production setup. - ## Next Steps - [Quickstart](quickstart.md) - Your first instrumented application diff --git a/docs/integration/collector.md b/docs/integration/collector.md index ed85df9..6d1708d 100644 --- a/docs/integration/collector.md +++ b/docs/integration/collector.md @@ -1,422 +1,92 @@ -# Collector Configuration +# Botanu Cloud Collector -Set up the OpenTelemetry Collector for cost attribution processing. +Botanu hosts a multi-tenant OpenTelemetry Collector — you don't need to deploy or manage any infrastructure. -## Overview +## How It Works -Botanu follows a "thin SDK, smart collector" architecture. The SDK captures raw telemetry; the collector handles: +The SDK sends telemetry to Botanu's hosted collector via OTLP over HTTPS. The collector handles: -- **PII redaction** - Remove sensitive data from prompts/responses -- **Cost calculation** - Convert tokens to dollars using pricing tables -- **Vendor normalization** - Standardize provider names -- **Cardinality management** - Limit high-cardinality attributes -- **Aggregation** - Pre-aggregate metrics for dashboards +- **Tenant isolation** — API key in the OTLP Authorization header identifies your tenant +- **PII scrubbing** — Configurable redaction of sensitive data patterns +- **Enrichment** — Vendor normalization, span classification +- **Aggregation** — Event-level accumulation (spans → run summaries) +- **Cost computation** — Token-to-dollar conversion using the pricing rate card +- **Durable spooling** — Hybrid local disk + S3 spool ensures zero trace loss -## Quick Start +## Endpoints -### Docker +| Protocol | Endpoint | Port | +|----------|----------|------| +| gRPC | `ingest.botanu.ai:4317` | 4317 | +| HTTP | `ingest.botanu.ai:4318` | 4318 | -```bash -docker run -p 4318:4318 -p 4317:4317 \ - -v $(pwd)/otel-config.yaml:/etc/otelcol/config.yaml \ - otel/opentelemetry-collector-contrib:latest -``` - -### Docker Compose - -```yaml -services: - collector: - image: otel/opentelemetry-collector-contrib:latest - ports: - - "4318:4318" # OTLP HTTP - - "4317:4317" # OTLP gRPC - volumes: - - ./otel-config.yaml:/etc/otelcol/config.yaml -``` - -## Basic Configuration - -```yaml -# otel-config.yaml -receivers: - otlp: - protocols: - http: - endpoint: 0.0.0.0:4318 - grpc: - endpoint: 0.0.0.0:4317 - -processors: - batch: - send_batch_size: 1000 - timeout: 10s - -exporters: - debug: - verbosity: detailed - -service: - pipelines: - traces: - receivers: [otlp] - processors: [batch] - exporters: [debug] -``` - -## Cost Attribution Configuration - -### Full Pipeline - -```yaml -receivers: - otlp: - protocols: - http: - endpoint: 0.0.0.0:4318 - grpc: - endpoint: 0.0.0.0:4317 +The SDK defaults to HTTP (`ingest.botanu.ai:4318`) when `BOTANU_API_KEY` is set. -processors: - # Batch for efficiency - batch: - send_batch_size: 1000 - timeout: 10s +## Configuration - # Normalize vendor names - transform/vendor: - trace_statements: - - context: span - statements: - # Normalize provider names to standard format - - set(attributes["botanu.vendor"], "openai") where attributes["gen_ai.provider.name"] == "openai" - - set(attributes["botanu.vendor"], "anthropic") where attributes["gen_ai.provider.name"] == "anthropic" - - set(attributes["botanu.vendor"], "azure.openai") where attributes["gen_ai.provider.name"] == "azure.openai" - - set(attributes["botanu.vendor"], "gcp.vertex_ai") where attributes["gen_ai.provider.name"] == "gcp.vertex_ai" - - set(attributes["botanu.vendor"], "aws.bedrock") where attributes["gen_ai.provider.name"] == "aws.bedrock" +No collector configuration is needed on your side. Just set the API key: - # Calculate costs from tokens - transform/cost: - trace_statements: - - context: span - statements: - # GPT-4 pricing (example: $30/$60 per 1M tokens) - - set(attributes["botanu.cost.input_usd"], - attributes["gen_ai.usage.input_tokens"] * 0.00003) - where attributes["gen_ai.request.model"] == "gpt-4" - - set(attributes["botanu.cost.output_usd"], - attributes["gen_ai.usage.output_tokens"] * 0.00006) - where attributes["gen_ai.request.model"] == "gpt-4" - - # GPT-4 Turbo pricing ($10/$30 per 1M tokens) - - set(attributes["botanu.cost.input_usd"], - attributes["gen_ai.usage.input_tokens"] * 0.00001) - where attributes["gen_ai.request.model"] == "gpt-4-turbo" - - set(attributes["botanu.cost.output_usd"], - attributes["gen_ai.usage.output_tokens"] * 0.00003) - where attributes["gen_ai.request.model"] == "gpt-4-turbo" - - # Claude 3 Opus pricing ($15/$75 per 1M tokens) - - set(attributes["botanu.cost.input_usd"], - attributes["gen_ai.usage.input_tokens"] * 0.000015) - where attributes["gen_ai.request.model"] == "claude-3-opus-20240229" - - set(attributes["botanu.cost.output_usd"], - attributes["gen_ai.usage.output_tokens"] * 0.000075) - where attributes["gen_ai.request.model"] == "claude-3-opus-20240229" - - # Calculate total - - set(attributes["botanu.cost.total_usd"], - attributes["botanu.cost.input_usd"] + attributes["botanu.cost.output_usd"]) - where attributes["botanu.cost.input_usd"] != nil - - # PII redaction for prompts/responses - redaction: - allow_all_keys: true - blocked_values: - # Email addresses - - "\\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Z|a-z]{2,}\\b" - # Phone numbers - - "\\b\\d{3}[-.]?\\d{3}[-.]?\\d{4}\\b" - # SSN - - "\\b\\d{3}-\\d{2}-\\d{4}\\b" - # Credit card numbers - - "\\b(?:4[0-9]{12}(?:[0-9]{3})?|5[1-5][0-9]{14}|3[47][0-9]{13})\\b" - - # Cardinality limits - attributes: - actions: - - key: botanu.run_id - action: hash - # Keep first 16 chars of hash to reduce cardinality if needed - - key: gen_ai.content.prompt - action: delete - # Remove raw prompts (keep tokens for cost) - -exporters: - # ClickHouse for analytics - clickhouse: - endpoint: tcp://clickhouse:9000 - database: botanu - ttl: 90d - create_schema: true - - # Also send to your APM - otlp/apm: - endpoint: https://your-apm.example.com - headers: - Authorization: Bearer ${APM_TOKEN} - -service: - pipelines: - traces: - receivers: [otlp] - processors: - - batch - - transform/vendor - - transform/cost - - redaction - - attributes - exporters: [clickhouse, otlp/apm] +```bash +export BOTANU_API_KEY="btnu_live_..." ``` -## PII Redaction +```python +from botanu import enable -### Using Redaction Processor - -```yaml -processors: - redaction: - allow_all_keys: true - blocked_values: - # Redact common PII patterns - - "\\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Z|a-z]{2,}\\b" # Email - - "\\b\\d{3}[-.]?\\d{3}[-.]?\\d{4}\\b" # Phone - - "\\b\\d{3}-\\d{2}-\\d{4}\\b" # SSN - summary: debug # Log redaction summary +enable() # reads BOTANU_API_KEY from env ``` -### Using Transform Processor +### Override endpoint (advanced) -```yaml -processors: - transform/pii: - trace_statements: - - context: span - statements: - # Remove prompt content entirely - - delete(attributes["gen_ai.content.prompt"]) - - delete(attributes["gen_ai.content.completion"]) +For development or testing against a local collector: - # Or replace with placeholder - - replace_pattern(attributes["gen_ai.content.prompt"], - "\\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Z|a-z]{2,}\\b", - "[REDACTED_EMAIL]") +```python +enable(otlp_endpoint="http://localhost:4318") ``` -## Pricing Tables - -Maintain pricing in the collector config: - -```yaml -processors: - transform/cost: - trace_statements: - - context: span - statements: - # OpenAI pricing (as of 2024) - # GPT-4 - - set(attributes["botanu.cost.input_usd"], attributes["gen_ai.usage.input_tokens"] * 0.00003) - where attributes["gen_ai.request.model"] == "gpt-4" or attributes["gen_ai.request.model"] == "gpt-4-0613" - - set(attributes["botanu.cost.output_usd"], attributes["gen_ai.usage.output_tokens"] * 0.00006) - where attributes["gen_ai.request.model"] == "gpt-4" or attributes["gen_ai.request.model"] == "gpt-4-0613" - - # GPT-4 Turbo - - set(attributes["botanu.cost.input_usd"], attributes["gen_ai.usage.input_tokens"] * 0.00001) - where IsMatch(attributes["gen_ai.request.model"], "gpt-4-turbo.*") - - set(attributes["botanu.cost.output_usd"], attributes["gen_ai.usage.output_tokens"] * 0.00003) - where IsMatch(attributes["gen_ai.request.model"], "gpt-4-turbo.*") - - # GPT-4o - - set(attributes["botanu.cost.input_usd"], attributes["gen_ai.usage.input_tokens"] * 0.000005) - where IsMatch(attributes["gen_ai.request.model"], "gpt-4o.*") - - set(attributes["botanu.cost.output_usd"], attributes["gen_ai.usage.output_tokens"] * 0.000015) - where IsMatch(attributes["gen_ai.request.model"], "gpt-4o.*") +Or via environment variable: - # GPT-3.5 Turbo - - set(attributes["botanu.cost.input_usd"], attributes["gen_ai.usage.input_tokens"] * 0.0000005) - where IsMatch(attributes["gen_ai.request.model"], "gpt-3.5-turbo.*") - - set(attributes["botanu.cost.output_usd"], attributes["gen_ai.usage.output_tokens"] * 0.0000015) - where IsMatch(attributes["gen_ai.request.model"], "gpt-3.5-turbo.*") - - # Claude 3 Opus - - set(attributes["botanu.cost.input_usd"], attributes["gen_ai.usage.input_tokens"] * 0.000015) - where IsMatch(attributes["gen_ai.request.model"], "claude-3-opus.*") - - set(attributes["botanu.cost.output_usd"], attributes["gen_ai.usage.output_tokens"] * 0.000075) - where IsMatch(attributes["gen_ai.request.model"], "claude-3-opus.*") - - # Claude 3 Sonnet - - set(attributes["botanu.cost.input_usd"], attributes["gen_ai.usage.input_tokens"] * 0.000003) - where IsMatch(attributes["gen_ai.request.model"], "claude-3-sonnet.*") - - set(attributes["botanu.cost.output_usd"], attributes["gen_ai.usage.output_tokens"] * 0.000015) - where IsMatch(attributes["gen_ai.request.model"], "claude-3-sonnet.*") - - # Claude 3 Haiku - - set(attributes["botanu.cost.input_usd"], attributes["gen_ai.usage.input_tokens"] * 0.00000025) - where IsMatch(attributes["gen_ai.request.model"], "claude-3-haiku.*") - - set(attributes["botanu.cost.output_usd"], attributes["gen_ai.usage.output_tokens"] * 0.00000125) - where IsMatch(attributes["gen_ai.request.model"], "claude-3-haiku.*") - - # Total cost - - set(attributes["botanu.cost.total_usd"], - attributes["botanu.cost.input_usd"] + attributes["botanu.cost.output_usd"]) - where attributes["botanu.cost.input_usd"] != nil and attributes["botanu.cost.output_usd"] != nil +```bash +export OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318 ``` -## Backend Exporters - -### ClickHouse +## Data Flow -```yaml -exporters: - clickhouse: - endpoint: tcp://clickhouse:9000 - database: botanu - username: default - password: ${CLICKHOUSE_PASSWORD} - ttl: 90d - create_schema: true - logs_table_name: otel_logs - traces_table_name: otel_traces - metrics_table_name: otel_metrics ``` - -### PostgreSQL (via OTLP) - -Use the collector to forward to a service that writes to PostgreSQL: - -```yaml -exporters: - otlp: - endpoint: http://postgres-writer:4317 +Your App (SDK) + │ + │ OTLP/HTTP (TLS) + │ Authorization: Bearer btnu_live_... + ▼ +ingest.botanu.ai (Botanu-hosted collector) + │ + │ PII scrub → enrich → aggregate → spool + ▼ +Botanu Cost Engine (api.botanu.ai) + │ + │ Cost computation → rollups → storage + ▼ +PostgreSQL (Botanu-managed RDS) + │ + ▼ +Dashboard (app.botanu.ai) ``` -### Prometheus (Metrics) +## PII Handling -```yaml -exporters: - prometheus: - endpoint: 0.0.0.0:8889 - namespace: botanu -``` +The collector applies PII scrubbing rules before data is stored. By default: -### Grafana Tempo +- Email addresses, phone numbers, SSNs, and credit card numbers are redacted +- Raw prompt/completion content is stripped (token counts are preserved for cost) +- Only aggregated summaries (cost, latency, token counts, outcome status) are stored -```yaml -exporters: - otlp: - endpoint: tempo:4317 - tls: - insecure: true -``` +Configure additional scrubbing rules via the dashboard at **Settings → Data Privacy**. ## Sampling -For cost attribution, avoid sampling. If you must sample: - -```yaml -processors: - probabilistic_sampler: - sampling_percentage: 100 # Keep 100% for cost attribution - - # Or sample only non-LLM spans - tail_sampling: - decision_wait: 10s - policies: - # Always keep LLM calls - - name: always-sample-llm - type: string_attribute - string_attribute: - key: gen_ai.operation.name - values: [chat, text_completion, embeddings] - - # Sample other spans at 10% - - name: sample-other - type: probabilistic - probabilistic: - sampling_percentage: 10 -``` - -## High Availability - -### Load Balancing - -```yaml -# collector-1.yaml -receivers: - otlp: - protocols: - http: - endpoint: 0.0.0.0:4318 - -exporters: - loadbalancing: - protocol: - otlp: - tls: - insecure: true - resolver: - dns: - hostname: collector-pool.svc.cluster.local - port: 4317 -``` - -### Kubernetes Deployment - -```yaml -apiVersion: apps/v1 -kind: Deployment -metadata: - name: otel-collector -spec: - replicas: 3 - selector: - matchLabels: - app: otel-collector - template: - spec: - containers: - - name: collector - image: otel/opentelemetry-collector-contrib:latest - ports: - - containerPort: 4318 - - containerPort: 4317 - volumeMounts: - - name: config - mountPath: /etc/otelcol - volumes: - - name: config - configMap: - name: otel-collector-config -``` - -## Monitoring the Collector - -Enable internal telemetry: - -```yaml -service: - telemetry: - logs: - level: info - metrics: - level: detailed - address: 0.0.0.0:8888 -``` - -Access metrics at `http://collector:8888/metrics`. +For cost attribution accuracy, the collector processes 100% of traces. Unlike APM tools, sampling would produce incorrect cost numbers. The SDK sends all spans — the collector handles aggregation efficiently. ## See Also -- [Architecture](../concepts/architecture.md) - SDK architecture -- [Auto-Instrumentation](auto-instrumentation.md) - Library instrumentation -- [Best Practices](../patterns/best-practices.md) - Configuration patterns +- [Auto-Instrumentation](auto-instrumentation.md) — Library instrumentation +- [Architecture](../concepts/architecture.md) — SDK architecture diff --git a/docs/integration/existing-otel.md b/docs/integration/existing-otel.md index 539b845..72de805 100644 --- a/docs/integration/existing-otel.md +++ b/docs/integration/existing-otel.md @@ -1,294 +1,118 @@ # Existing OpenTelemetry Setup -Integrate Botanu with your existing OpenTelemetry configuration. +Integrate botanu with your existing OpenTelemetry configuration — Datadog, Jaeger, Grafana Tempo, Splunk, New Relic, or any OTel-compatible backend. -## Overview +## Automatic Detection (Recommended) -If you already have OpenTelemetry configured (via Datadog, Splunk, New Relic, or custom setup), Botanu integrates seamlessly. You only need to add the `RunContextEnricher` span processor. - -## Minimal Integration - -Add just the span processor to your existing provider: +As of SDK v0.1.0, `enable()` **automatically detects your existing TracerProvider** and adds botanu alongside it. No manual processor setup needed: ```python -from opentelemetry import trace -from botanu.processors.enricher import RunContextEnricher - -# Your existing TracerProvider -provider = trace.get_tracer_provider() - -# Add Botanu's enricher -provider.add_span_processor(RunContextEnricher()) +from botanu import enable +enable() # Detects existing OTel, adds botanu alongside ``` -That's it. All spans will now receive `run_id` from baggage. +**What happens under the hood:** -## With Existing Instrumentation +| Your setup | What `enable()` does | +|-----------|---------------------| +| OTel SDK with AlwaysOn sampling | Migrates your processors to a new provider, adds botanu exporter alongside | +| OTel SDK with ratio sampling (e.g., 10%) | Same, but wraps your processors in `SampledSpanProcessor` to preserve your ratio. Your Datadog/Jaeger bill is unchanged. | +| ddtrace (Datadog Python SDK) | Creates a parallel TracerProvider. ddtrace continues unchanged. | +| No existing tracing | Creates a fresh provider (standard greenfield path) | -Botanu works alongside any existing instrumentation: +**Zero disruption guarantee:** Your existing dashboards, bills, and sampling are preserved exactly as they were. -```python -from opentelemetry import trace -from opentelemetry.sdk.trace import TracerProvider -from opentelemetry.sdk.trace.export import BatchSpanProcessor -from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter -from opentelemetry.instrumentation.requests import RequestsInstrumentor +## How Sampling Is Preserved -from botanu.processors.enricher import RunContextEnricher +If your existing provider uses ratio-based sampling (e.g., 10%), botanu needs to change the sampler to AlwaysOn (to capture 100% for cost attribution). But your existing exporter should still see only 10%. -# Your existing setup -provider = TracerProvider() -provider.add_span_processor(BatchSpanProcessor(OTLPSpanExporter())) -trace.set_tracer_provider(provider) +botanu solves this with `SampledSpanProcessor`, which wraps your existing processors and applies your original ratio at the export level: -# Your existing instrumentation -RequestsInstrumentor().instrument() - -# Add Botanu enricher (order doesn't matter) -provider.add_span_processor(RunContextEnricher()) ``` - -## With Datadog - -```python -from ddtrace import tracer -from ddtrace.opentelemetry import TracerProvider -from opentelemetry import trace - -from botanu.processors.enricher import RunContextEnricher - -# Datadog's TracerProvider -provider = TracerProvider() -trace.set_tracer_provider(provider) - -# Add Botanu enricher -provider.add_span_processor(RunContextEnricher()) +App (AlwaysOn sampler — all spans created) + → SampledSpanProcessor(0.1) → Your Datadog exporter → Datadog (sees 10%) + → botanu exporter → botanu collector (sees 100%) ``` -## With Splunk +This is deterministic — the same trace_id always gets the same sampling decision. -```python -from splunk_otel.tracing import start_tracing -from opentelemetry import trace - -from botanu.processors.enricher import RunContextEnricher - -# Start Splunk tracing -start_tracing() - -# Add Botanu enricher -provider = trace.get_tracer_provider() -provider.add_span_processor(RunContextEnricher()) -``` +## Manual Integration (Advanced) -## With New Relic +If you prefer manual control or want to understand the internals: ```python from opentelemetry import trace -from opentelemetry.sdk.trace import TracerProvider -from opentelemetry.sdk.trace.export import BatchSpanProcessor +from botanu.processors import RunContextEnricher, SampledSpanProcessor from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter +from opentelemetry.sdk.trace.export import BatchSpanProcessor -from botanu.processors.enricher import RunContextEnricher - -# New Relic OTLP endpoint -provider = TracerProvider() -provider.add_span_processor( - BatchSpanProcessor( - OTLPSpanExporter( - endpoint="https://otlp.nr-data.net/v1/traces", - headers={"api-key": "YOUR_LICENSE_KEY"}, - ) - ) -) -trace.set_tracer_provider(provider) +# Get your existing TracerProvider +provider = trace.get_tracer_provider() -# Add Botanu enricher +# 1. Add RunContextEnricher (propagates run_id, workflow, event_id to all spans) provider.add_span_processor(RunContextEnricher()) -``` -## With Jaeger - -```python -from opentelemetry import trace -from opentelemetry.sdk.trace import TracerProvider -from opentelemetry.sdk.trace.export import BatchSpanProcessor -from opentelemetry.exporter.jaeger.thrift import JaegerExporter - -from botanu.processors.enricher import RunContextEnricher - -# Jaeger setup -provider = TracerProvider() -provider.add_span_processor( - BatchSpanProcessor( - JaegerExporter( - agent_host_name="localhost", - agent_port=6831, - ) - ) +# 2. Add botanu OTLP exporter (sends traces to botanu collector) +botanu_exporter = OTLPSpanExporter( + endpoint="https://ingest.botanu.ai:4318/v1/traces", + headers={"Authorization": "Bearer btnu_live_..."}, ) -trace.set_tracer_provider(provider) - -# Add Botanu enricher -provider.add_span_processor(RunContextEnricher()) +provider.add_span_processor(BatchSpanProcessor(botanu_exporter)) ``` -## Multiple Exporters +## With Datadog (ddtrace) -Send to both your APM and a cost-attribution backend: +ddtrace uses its own tracing system (not OTel SDK). `enable()` detects this and creates a separate TracerProvider for botanu: ```python -from opentelemetry import trace -from opentelemetry.sdk.trace import TracerProvider -from opentelemetry.sdk.trace.export import BatchSpanProcessor -from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter - -from botanu.processors.enricher import RunContextEnricher - -provider = TracerProvider() - -# Your APM (e.g., Datadog) -provider.add_span_processor( - BatchSpanProcessor( - OTLPSpanExporter(endpoint="https://your-apm.example.com/v1/traces") - ) -) - -# Botanu collector for cost attribution -provider.add_span_processor( - BatchSpanProcessor( - OTLPSpanExporter(endpoint="http://botanu-collector:4318/v1/traces") - ) -) - -# Botanu enricher (adds run_id to all spans) -provider.add_span_processor(RunContextEnricher()) +# ddtrace continues working unchanged +from ddtrace import tracer # noqa — ddtrace auto-patches -trace.set_tracer_provider(provider) +# botanu creates its own provider alongside ddtrace +from botanu import enable +enable() ``` -## How RunContextEnricher Works +Both tracing systems run in parallel. No conflicts. -The enricher reads baggage and writes to span attributes: +**Migration path** (optional, for simplification): +1. **Phase A** (now): Dual tracing — ddtrace + botanu +2. **Phase C** (later): Configure ddtrace OTLP export, remove botanu auto-instrumentation +3. **Phase D** (long-term): Migrate to OTel SDK + Datadog exporter — single tracing layer -```python -class RunContextEnricher(SpanProcessor): - def on_start(self, span, parent_context): - # Read run_id from baggage - run_id = baggage.get_baggage("botanu.run_id", parent_context) - if run_id: - span.set_attribute("botanu.run_id", run_id) - - # Read workflow from baggage - workflow = baggage.get_baggage("botanu.workflow", parent_context) - if workflow: - span.set_attribute("botanu.workflow", workflow) -``` - -This means: -- Every span gets `run_id` if it exists in baggage -- Auto-instrumented spans are enriched automatically -- No code changes needed in your existing instrumentation +## Using botanu Decorators -## Using Botanu Decorators - -With the enricher in place, use Botanu decorators: +With either automatic or manual integration, use botanu decorators for cost attribution: ```python from botanu import botanu_workflow, emit_outcome -@botanu_workflow("do_work", event_id=event_id, customer_id=customer_id) -async def do_work(event_id: str, customer_id: str): - # All spans created here (by any instrumentation) get run_id - data = do_something() - result = await process(data) - - emit_outcome("success") -``` - -## Without Botanu Bootstrap - -If you don't want to use `enable()`, manually set up propagation: - -```python -from opentelemetry import propagate -from opentelemetry.propagators.composite import CompositePropagator -from opentelemetry.trace.propagation.tracecontext import TraceContextTextMapPropagator -from opentelemetry.baggage.propagation import W3CBaggagePropagator - -# Ensure baggage propagation is enabled -propagate.set_global_textmap( - CompositePropagator([ - TraceContextTextMapPropagator(), - W3CBaggagePropagator(), - ]) +@botanu_workflow( + name="Customer Support", + event_id=lambda req: req.ticket_id, + customer_id=lambda req: req.org_id, ) +async def handle_ticket(req): + result = await process(req) + emit_outcome("success", value_type="tickets_resolved", value_amount=1) + return result ``` -## Verifying Integration - -Check that run_id appears on spans: - -```python -from opentelemetry import trace, baggage, context - -# Set baggage (normally done by @botanu_workflow) -ctx = baggage.set_baggage("botanu.run_id", "test-123") -token = context.attach(ctx) - -try: - tracer = trace.get_tracer("test") - with tracer.start_as_current_span("test-span") as span: - # Check attribute was set - print(span.attributes.get("botanu.run_id")) # Should print "test-123" -finally: - context.detach(token) -``` - -## Processor Order - -Span processors are called in order. The enricher should be added after your span exporters: - -```python -# 1. Exporters (send spans to backends) -provider.add_span_processor(BatchSpanProcessor(OTLPSpanExporter())) - -# 2. Enrichers (modify spans before export) -provider.add_span_processor(RunContextEnricher()) -``` - -However, `RunContextEnricher` uses `on_start()`, so it runs before export regardless. +All child spans (auto-instrumented OpenAI, database, HTTP calls) inherit the run context automatically via W3C Baggage. ## Troubleshooting -### run_id Not Appearing - -1. Check enricher is added: - ```python - provider = trace.get_tracer_provider() - # Verify RunContextEnricher is in the list - ``` - -2. Check baggage is set: - ```python - from opentelemetry import baggage - print(baggage.get_baggage("botanu.run_id")) - ``` - -3. Ensure `@botanu_workflow` is used at entry points - -### Baggage Not Propagating - -Check propagators are configured: -```python -from opentelemetry import propagate -print(propagate.get_global_textmap()) -``` - -Should include `W3CBaggagePropagator`. +### run_id not appearing on spans +1. Verify `enable()` was called (or `RunContextEnricher` was added manually) +2. Check `@botanu_workflow` is on your entry point functions +3. Verify W3C Baggage propagator is active: `propagate.get_global_textmap()` -## See Also +### Existing traces missing after adding botanu +This should not happen — `enable()` preserves your existing processors. If it does: +1. Check `enable()` was called ONCE (not multiple times) +2. Check your existing provider was created BEFORE `enable()` runs -- [Auto-Instrumentation](auto-instrumentation.md) - Library instrumentation -- [Collector Configuration](collector.md) - Collector setup -- [Architecture](../concepts/architecture.md) - SDK design +### Sampling concerns +If you use ratio sampling and see unexpected volume changes in your APM: +1. Check botanu logs for "Preserved your sampling ratio" message +2. Verify `SampledSpanProcessor` is wrapping your exporter (not replacing it) diff --git a/scripts/pre_publish_check.py b/scripts/pre_publish_check.py new file mode 100644 index 0000000..ce36153 --- /dev/null +++ b/scripts/pre_publish_check.py @@ -0,0 +1,443 @@ +#!/usr/bin/env python +# SPDX-FileCopyrightText: 2026 The Botanu Authors +# SPDX-License-Identifier: Apache-2.0 + +"""Pre-publish red/green check for botanu-sdk-python. + +Runs the full build → install → import → smoke-test chain in an isolated +venv so you know whether `git tag vX.Y.Z && git push --tags` is safe to do. + +Usage (from repo root): + + python scripts/pre_publish_check.py + +Exits 0 (GREEN) if everything passes. Exits 1 (RED) with a summary of +failures otherwise. Safe to re-run -- cleans up its own artifacts. + +What it checks (in order): + 1. Working tree is clean (warning only, not a hard fail) + 2. Old dist/ and build/ artifacts removed + 3. `python -m build` produces sdist + wheel + 4. `twine check` passes on both artifacts + 5. Wheel installs cleanly into a fresh venv + 6. Version string is non-empty and not "0.0.0" + 7. All names in `botanu.__all__` are importable + 8. `enable()` initializes without raising + 9. `@botanu_workflow` with static ids decorates and runs a function + 10. `@botanu_workflow` with callable ids decorates and runs a function + 11. `emit_outcome("success", ...)` inside a decorated function works + 12. `emit_outcome` rejects invalid status with ValueError +""" + +from __future__ import annotations + +import os +import shutil +import subprocess +import sys +import tempfile +from pathlib import Path +from typing import List, Tuple + +REPO_ROOT = Path(__file__).resolve().parent.parent +DIST_DIR = REPO_ROOT / "dist" +BUILD_DIR = REPO_ROOT / "build" + + +# --------------------------------------------------------------------------- +# Output helpers -- ASCII only so they work on Windows cp1252 consoles. +# Colours are used only when stdout is a TTY that supports ANSI. +# --------------------------------------------------------------------------- + +_USE_COLOR = sys.stdout.isatty() and os.environ.get("NO_COLOR") is None +if os.name == "nt": + # Try to enable ANSI on modern Windows terminals; fall back to plain text. + try: + import ctypes + kernel32 = ctypes.windll.kernel32 + kernel32.SetConsoleMode(kernel32.GetStdHandle(-11), 7) + except Exception: + _USE_COLOR = False + + +def _c(code: str) -> str: + return code if _USE_COLOR else "" + + +GREEN = _c("\033[92m") +RED = _c("\033[91m") +YELLOW = _c("\033[93m") +BLUE = _c("\033[94m") +DIM = _c("\033[2m") +BOLD = _c("\033[1m") +RESET = _c("\033[0m") + + +def step(n: int, total: int, label: str) -> None: + print(f"{BLUE}[{n}/{total}]{RESET} {label}...", flush=True) + + +def ok(msg: str = "") -> None: + suffix = f" {DIM}{msg}{RESET}" if msg else "" + print(f" {GREEN}[OK]{RESET}{suffix}", flush=True) + + +def fail(msg: str) -> None: + print(f" {RED}[FAIL]{RESET} {msg}", flush=True) + + +def warn(msg: str) -> None: + print(f" {YELLOW}[WARN]{RESET} {msg}", flush=True) + + +def run( + cmd: List[str], + cwd: Path | None = None, + env: dict | None = None, + capture: bool = True, +) -> Tuple[int, str, str]: + """Run a command and return (returncode, stdout, stderr).""" + result = subprocess.run( + cmd, + cwd=str(cwd) if cwd else None, + env=env, + capture_output=capture, + text=True, + ) + return result.returncode, result.stdout, result.stderr + + +def cleanup(venv_dir: Path | None = None) -> None: + """Remove build artifacts and the temp venv.""" + for d in (DIST_DIR, BUILD_DIR): + if d.exists(): + shutil.rmtree(d, ignore_errors=True) + for egg in REPO_ROOT.glob("*.egg-info"): + shutil.rmtree(egg, ignore_errors=True) + if venv_dir and venv_dir.exists(): + shutil.rmtree(venv_dir, ignore_errors=True) + + +# --------------------------------------------------------------------------- +# Checks +# --------------------------------------------------------------------------- + +def check_git_clean() -> bool: + """Warn if uncommitted changes. Non-blocking.""" + code, out, _ = run(["git", "status", "--porcelain"], cwd=REPO_ROOT) + if code != 0: + warn("not a git repo or git unavailable -- skipping clean check") + return True + if out.strip(): + warn("working tree has uncommitted changes:") + for line in out.strip().split("\n")[:5]: + warn(f" {line}") + warn("the build will use git-derived version -- you may get a .devN suffix") + else: + ok() + return True + + +def check_build() -> bool: + """Run python -m build to produce sdist + wheel.""" + code, out, err = run( + [sys.executable, "-m", "build"], + cwd=REPO_ROOT, + ) + if code != 0: + fail("python -m build failed") + print(DIM + (err or out)[-2000:] + RESET) + return False + # Confirm exactly one sdist and one wheel + sdists = list(DIST_DIR.glob("*.tar.gz")) + wheels = list(DIST_DIR.glob("*.whl")) + if len(sdists) != 1 or len(wheels) != 1: + fail(f"expected 1 sdist and 1 wheel in dist/, got {len(sdists)} sdists and {len(wheels)} wheels") + return False + ok(f"built {sdists[0].name} and {wheels[0].name}") + return True + + +def check_twine() -> bool: + """Validate package metadata with twine check.""" + artifacts = sorted(DIST_DIR.glob("*")) + if not artifacts: + fail("no artifacts in dist/") + return False + code, out, err = run( + [sys.executable, "-m", "twine", "check"] + [str(a) for a in artifacts], + cwd=REPO_ROOT, + ) + if code != 0 or "PASSED" not in out: + fail("twine check failed") + print(DIM + (out or err)[-1500:] + RESET) + return False + ok() + return True + + +def make_venv() -> Path: + """Create a temp venv and return its path.""" + venv_dir = Path(tempfile.mkdtemp(prefix="botanu_prepublish_")) + code, _, err = run([sys.executable, "-m", "venv", str(venv_dir)]) + if code != 0: + raise RuntimeError(f"failed to create venv: {err}") + return venv_dir + + +def venv_python(venv: Path) -> Path: + """Return path to python inside the venv.""" + if os.name == "nt": + return venv / "Scripts" / "python.exe" + return venv / "bin" / "python" + + +def check_install(venv: Path) -> bool: + """Install the built wheel into the clean venv.""" + wheels = list(DIST_DIR.glob("*.whl")) + if not wheels: + fail("no wheel to install") + return False + py = venv_python(venv) + code, out, err = run( + [str(py), "-m", "pip", "install", "--quiet", str(wheels[0])], + cwd=REPO_ROOT, + ) + if code != 0: + fail("pip install failed") + print(DIM + (err or out)[-2000:] + RESET) + return False + ok(f"installed {wheels[0].name}") + return True + + +def check_version(venv: Path) -> bool: + """Import the package and print its version. Refuse empty or 0.0.0.""" + py = venv_python(venv) + code, out, err = run( + [str(py), "-c", "import botanu; print(botanu.__version__)"], + ) + if code != 0: + fail("failed to import botanu") + print(DIM + (err or out)[-1500:] + RESET) + return False + version = out.strip() + if not version or version in ("0.0.0", "unknown"): + fail(f"version string is invalid: {version!r}") + return False + ok(f"version = {version}") + return True + + +def check_api_surface(venv: Path) -> bool: + """Import every name in botanu.__all__.""" + py = venv_python(venv) + code, out, err = run( + [ + str(py), + "-c", + ( + "import botanu; " + "missing = [n for n in botanu.__all__ if not hasattr(botanu, n)]; " + "print('MISSING:' + ','.join(missing) if missing else 'ALL OK'); " + "print('EXPORTS:' + str(len(botanu.__all__)))" + ), + ], + ) + if code != 0: + fail("import failed") + print(DIM + (err or out)[-1500:] + RESET) + return False + if "MISSING:" in out and "ALL OK" not in out: + missing_line = [line for line in out.split("\n") if "MISSING:" in line][0] + fail(missing_line) + return False + exports = [line for line in out.split("\n") if line.startswith("EXPORTS:")] + count = exports[0].split(":")[1] if exports else "?" + ok(f"all {count} names in __all__ importable") + return True + + +SMOKE_TEST_SCRIPT = """ +import logging +logging.getLogger('opentelemetry').setLevel(logging.CRITICAL) +logging.getLogger('botanu').setLevel(logging.CRITICAL) + +import sys +errors = [] + +try: + from botanu import enable, botanu_workflow, emit_outcome +except Exception as e: + print(f"IMPORT_FAILED: {e!r}") + sys.exit(1) + +# Test 1: enable() does not raise +try: + enable(service_name='prepublish-smoke-test') +except Exception as e: + errors.append(f"enable() raised: {e!r}") + +# Test 2: decorator with static ids +try: + @botanu_workflow('smoke_static', event_id='evt-1', customer_id='cust-1') + def _s(x): + return x * 2 + assert _s(21) == 42, f"static decorator returned wrong value" +except Exception as e: + errors.append(f"static decorator: {e!r}") + +# Test 3: decorator with callable ids +try: + @botanu_workflow( + 'smoke_callable', + event_id=lambda req: req['id'], + customer_id=lambda req: req['cust'], + ) + def _c(req): + return req['id'] + assert _c({'id': 'evt-2', 'cust': 'c-2'}) == 'evt-2', "callable decorator returned wrong value" +except Exception as e: + errors.append(f"callable decorator: {e!r}") + +# Test 4: emit_outcome inside a decorated function +try: + @botanu_workflow('smoke_outcome', event_id='evt-3', customer_id='cust-3') + def _o(): + emit_outcome('success', value_type='items', value_amount=1.0) + return True + assert _o() is True, "outcome flow returned wrong value" +except Exception as e: + errors.append(f"emit_outcome inside span: {e!r}") + +# Test 5: emit_outcome rejects invalid status +try: + raised = False + try: + @botanu_workflow('smoke_bad', event_id='e', customer_id='c') + def _b(): + emit_outcome('this-is-not-a-real-status') + _b() + except ValueError: + raised = True + if not raised: + errors.append("emit_outcome did NOT reject invalid status") +except Exception as e: + errors.append(f"bad-status check raised wrong error: {e!r}") + +if errors: + print("SMOKE_FAILED") + for e in errors: + print(f" {e}") + sys.exit(1) + +print("SMOKE_OK") +""" + + +def check_smoke_test(venv: Path) -> bool: + """Run the end-to-end smoke test inside the venv.""" + py = venv_python(venv) + env = { + **os.environ, + "BOTANU_API_KEY": "btnu_test_prepublish_smoke", + # Prevent the SDK from trying to ship to ingest.botanu.ai during the test + "OTEL_TRACES_EXPORTER": "console", + "OTEL_LOGS_EXPORTER": "console", + "OTEL_METRICS_EXPORTER": "none", + } + code, out, err = run([str(py), "-c", SMOKE_TEST_SCRIPT], env=env) + if "SMOKE_OK" in out: + ok("decorator + outcome + validation all pass") + return True + fail("smoke test failed") + # Filter OTel noise but keep our own output + for line in (out + err).split("\n"): + if line and not line.startswith(("INFO:", "DEBUG:", "WARNING:opentelemetry", "ERROR:opentelemetry")): + print(f" {DIM}{line}{RESET}") + return False + + +# --------------------------------------------------------------------------- +# Main +# --------------------------------------------------------------------------- + +def main() -> int: + print(f"\n{BOLD}botanu-sdk-python -- pre-publish check{RESET}\n") + print(f"Repo: {REPO_ROOT}") + print(f"Python: {sys.version.split()[0]}") + print() + + total = 8 + results: List[bool] = [] + venv_dir: Path | None = None + + try: + step(1, total, "git working tree clean") + results.append(check_git_clean()) + + step(2, total, "clean previous build artifacts") + cleanup() + ok() + results.append(True) + + step(3, total, "python -m build") + if not check_build(): + return summarize(results + [False]) + results.append(True) + + step(4, total, "twine check") + if not check_twine(): + return summarize(results + [False]) + results.append(True) + + step(5, total, "create clean venv + install wheel") + try: + venv_dir = make_venv() + except RuntimeError as e: + fail(str(e)) + return summarize(results + [False]) + if not check_install(venv_dir): + return summarize(results + [False]) + results.append(True) + + step(6, total, "version string") + if not check_version(venv_dir): + return summarize(results + [False]) + results.append(True) + + step(7, total, "public API surface (__all__)") + if not check_api_surface(venv_dir): + return summarize(results + [False]) + results.append(True) + + step(8, total, "end-to-end smoke test") + if not check_smoke_test(venv_dir): + return summarize(results + [False]) + results.append(True) + + finally: + cleanup(venv_dir) + + return summarize(results) + + +def summarize(results: List[bool]) -> int: + print() + if all(results): + print(f"{BOLD}{GREEN}GREEN{RESET} -- safe to tag and publish.") + print() + print("Next steps:") + print(" 1. Pick the next version (follow semver)") + print(" 2. git tag vX.Y.Z && git push origin vX.Y.Z") + print(" 3. GitHub Actions will publish to PyPI via OIDC") + print() + return 0 + failed = sum(1 for r in results if not r) + print(f"{BOLD}{RED}RED{RESET} -- {failed} check(s) failed. Do NOT publish.") + print() + return 1 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/src/botanu/__init__.py b/src/botanu/__init__.py index 08e2576..12bccb6 100644 --- a/src/botanu/__init__.py +++ b/src/botanu/__init__.py @@ -23,6 +23,9 @@ async def handle_request(data): # Run context model from botanu.models.run_context import RunContext, RunOutcome, RunStatus +# Processors +from botanu.processors import RunContextEnricher, SampledSpanProcessor + # Bootstrap from botanu.sdk.bootstrap import ( disable, @@ -73,4 +76,7 @@ async def handle_request(data): "RunContext", "RunStatus", "RunOutcome", + # Processors + "RunContextEnricher", + "SampledSpanProcessor", ] diff --git a/src/botanu/models/run_context.py b/src/botanu/models/run_context.py index 1656d8a..1d98d61 100644 --- a/src/botanu/models/run_context.py +++ b/src/botanu/models/run_context.py @@ -89,6 +89,7 @@ class RunContext: event_id: str customer_id: str environment: str + step: Optional[str] = None workflow_version: Optional[str] = None tenant_id: Optional[str] = None parent_run_id: Optional[str] = None diff --git a/src/botanu/processors/__init__.py b/src/botanu/processors/__init__.py index 680a413..08de994 100644 --- a/src/botanu/processors/__init__.py +++ b/src/botanu/processors/__init__.py @@ -8,5 +8,6 @@ """ from botanu.processors.enricher import RunContextEnricher +from botanu.processors.sampled import SampledSpanProcessor -__all__ = ["RunContextEnricher"] +__all__ = ["RunContextEnricher", "SampledSpanProcessor"] diff --git a/src/botanu/processors/sampled.py b/src/botanu/processors/sampled.py new file mode 100644 index 0000000..0669236 --- /dev/null +++ b/src/botanu/processors/sampled.py @@ -0,0 +1,86 @@ +# SPDX-FileCopyrightText: 2026 The Botanu Authors +# SPDX-License-Identifier: Apache-2.0 + +"""SampledSpanProcessor — preserves the customer's sampling ratio. + +When botanu changes the TracerProvider sampler to AlwaysOn (to capture 100%), +existing customer processors (Datadog exporter, Jaeger exporter, etc.) would +suddenly see 10x the span volume if the customer had ratio-based sampling. + +This processor wraps an existing processor and applies the customer's original +ratio at the export level. Result: the customer's exporter sees the same volume +as before, their bill is unchanged, their dashboards are unchanged. + +botanu's own processor is NOT wrapped — it sees 100%. + +Sampling is deterministic: the same trace_id always gets the same decision. +This matches OTel's ``TraceIdRatioBasedSampler`` algorithm. +""" + +from __future__ import annotations + +import logging +from typing import Optional + +from opentelemetry import context +from opentelemetry.sdk.trace import ReadableSpan, SpanProcessor +from opentelemetry.trace import Span + +logger = logging.getLogger(__name__) + + +class SampledSpanProcessor(SpanProcessor): + """Wraps a SpanProcessor with deterministic ratio sampling. + + Args: + wrapped: The original processor to wrap (e.g., BatchSpanProcessor + sending to Datadog). + ratio: Sampling ratio (0.0 to 1.0). 0.1 means 10% of spans are + forwarded to the wrapped processor. + """ + + def __init__(self, wrapped: SpanProcessor, ratio: float) -> None: + if not 0.0 <= ratio <= 1.0: + raise ValueError(f"ratio must be between 0.0 and 1.0, got {ratio}") + self._wrapped = wrapped + self._ratio = ratio + # Pre-compute bound for comparison (avoids per-span float math) + self._bound = int(ratio * (2**64 - 1)) + + def _should_sample(self, trace_id: int) -> bool: + """Deterministic sampling decision based on trace_id. + + Uses the upper 64 bits of the 128-bit trace_id, matching OTel's + TraceIdRatioBasedSampler algorithm. Same trace_id always produces + the same decision. + """ + if self._ratio >= 1.0: + return True + if self._ratio <= 0.0: + return False + # Upper 64 bits of trace_id for deterministic comparison + upper = trace_id >> 64 if trace_id.bit_length() > 64 else trace_id + return upper <= self._bound + + def on_start( + self, + span: Span, + parent_context: Optional[context.Context] = None, + ) -> None: + """Forward on_start to wrapped processor unconditionally. + + on_start runs before we know the final trace_id in some cases, + and some processors need it for bookkeeping. + """ + self._wrapped.on_start(span, parent_context) + + def on_end(self, span: ReadableSpan) -> None: + """Forward on_end only if the trace passes the ratio check.""" + if self._should_sample(span.context.trace_id): + self._wrapped.on_end(span) + + def shutdown(self) -> None: + self._wrapped.shutdown() + + def force_flush(self, timeout_millis: int = 30000) -> bool: + return self._wrapped.force_flush(timeout_millis) diff --git a/src/botanu/register.py b/src/botanu/register.py new file mode 100644 index 0000000..26ebb4d --- /dev/null +++ b/src/botanu/register.py @@ -0,0 +1,50 @@ +# SPDX-FileCopyrightText: 2026 The Botanu Authors +# SPDX-License-Identifier: Apache-2.0 + +"""Zero-code initialization entry point. + +Import this module to auto-initialize Botanu SDK with no code changes. +All configuration is read from environment variables or botanu.yaml. + +Usage:: + + # As a Python module flag + python -m botanu.register && python app.py + + # Or via PYTHONPATH preload (works with gunicorn, uvicorn, etc.) + python -c "import botanu.register" && python app.py + + # Or in gunicorn config + # gunicorn.conf.py: + def on_starting(server): + import botanu.register # noqa: F401 + + # Or in uvicorn + uvicorn app:app --env-file .env + + # Or in Dockerfile + ENV BOTANU_API_KEY=btnu_live_... + ENV BOTANU_SERVICE_NAME=my-service + CMD ["python", "-c", "import botanu.register; import uvicorn; uvicorn.run('app:app')"] + +Configuration (env vars or botanu.yaml): + + BOTANU_API_KEY - API key (required for Botanu Cloud) + BOTANU_SERVICE_NAME - Service name (recommended) + BOTANU_ENVIRONMENT - Environment (default: production) + +See docs/getting-started/configuration.md for full options. +""" + +from __future__ import annotations + +import logging + +from botanu.sdk.bootstrap import enable + +logger = logging.getLogger(__name__) + +result = enable() + +if result: + logger.info("Botanu SDK auto-initialized via botanu.register") diff --git a/src/botanu/sdk/bootstrap.py b/src/botanu/sdk/bootstrap.py index 548e4af..cc5b198 100644 --- a/src/botanu/sdk/bootstrap.py +++ b/src/botanu/sdk/bootstrap.py @@ -34,6 +34,31 @@ _current_config: Optional[BotanuConfig] = None +def _extract_sampler_ratio(provider) -> float: + """Extract the sampling ratio from a TracerProvider's sampler. + + Returns 1.0 (AlwaysOn) if the sampler type is unrecognized. + """ + sampler = getattr(provider, "sampler", None) or getattr(provider, "_sampler", None) + if sampler is None: + return 1.0 + + # Check for ratio-based sampler (e.g., _rate or _ratio attribute) + ratio = getattr(sampler, "_rate", None) or getattr(sampler, "_ratio", None) + if ratio is not None: + return float(ratio) + + # Check for parent-based sampler wrapping a ratio sampler + root = getattr(sampler, "_root", None) + if root is not None: + ratio = getattr(root, "_rate", None) or getattr(root, "_ratio", None) + if ratio is not None: + return float(ratio) + + # ALWAYS_ON / StaticSampler / unknown — assume 100% + return 1.0 + + def enable( service_name: Optional[str] = None, otlp_endpoint: Optional[str] = None, @@ -152,26 +177,88 @@ def enable( resource = Resource.create(resource_attrs) - provider = TracerProvider(resource=resource, sampler=ALWAYS_ON) - trace.set_tracer_provider(provider) + from opentelemetry.trace import ProxyTracerProvider + from botanu.processors import SampledSpanProcessor lean_mode = cfg.propagation_mode == "lean" - provider.add_span_processor(RunContextEnricher(lean_mode=lean_mode)) - exporter = OTLPSpanExporter( + botanu_exporter = OTLPSpanExporter( endpoint=traces_endpoint, headers=cfg.otlp_headers or {}, ) - provider.add_span_processor( - BatchSpanProcessor( - exporter, - max_export_batch_size=cfg.max_export_batch_size, - max_queue_size=cfg.max_queue_size, - schedule_delay_millis=cfg.schedule_delay_millis, - export_timeout_millis=cfg.export_timeout_millis, - ) + botanu_batch = BatchSpanProcessor( + botanu_exporter, + max_export_batch_size=cfg.max_export_batch_size, + max_queue_size=cfg.max_queue_size, + schedule_delay_millis=cfg.schedule_delay_millis, + export_timeout_millis=cfg.export_timeout_millis, ) + existing = trace.get_tracer_provider() + + if isinstance(existing, TracerProvider): + # BROWNFIELD: existing OTel SDK provider — migrate processors, + # preserve sampling ratio, add botanu alongside. + original_ratio = _extract_sampler_ratio(existing) + provider = TracerProvider( + resource=existing.resource, + sampler=ALWAYS_ON, + ) + # Migrate customer's existing processors with their sampling + existing_procs = getattr( + getattr(existing, "_active_span_processor", None), + "_span_processors", + (), + ) + for proc in existing_procs: + if original_ratio < 1.0: + provider.add_span_processor( + SampledSpanProcessor(proc, original_ratio) + ) + else: + provider.add_span_processor(proc) + # Add botanu processors (no sampling — sees 100%) + provider.add_span_processor(RunContextEnricher(lean_mode=lean_mode)) + provider.add_span_processor(botanu_batch) + trace.set_tracer_provider(provider) + + if original_ratio < 1.0: + logger.info( + "Botanu SDK: existing TracerProvider detected with " + "%.0f%% sampling. Preserved your sampling ratio for " + "existing exporters. botanu captures 100%%. No impact " + "on your existing observability bill.", + original_ratio * 100, + ) + else: + logger.info( + "Botanu SDK: existing TracerProvider detected. Added " + "botanu exporter alongside your existing setup." + ) + + elif isinstance(existing, ProxyTracerProvider): + # GREENFIELD: no real provider — create fresh + provider = TracerProvider(resource=resource, sampler=ALWAYS_ON) + provider.add_span_processor(RunContextEnricher(lean_mode=lean_mode)) + provider.add_span_processor(botanu_batch) + trace.set_tracer_provider(provider) + + else: + # UNKNOWN (e.g., ddtrace) — create parallel provider. + # ddtrace's TracerProvider extends OTel API class, NOT SDK class. + # It has no add_span_processor(). We create our own provider. + # ddtrace continues working unchanged (separate tracing system). + logger.warning( + "Botanu SDK: non-standard TracerProvider detected (%s). " + "Creating a separate botanu TracerProvider. Your existing " + "tracing continues unchanged.", + type(existing).__name__, + ) + provider = TracerProvider(resource=resource, sampler=ALWAYS_ON) + provider.add_span_processor(RunContextEnricher(lean_mode=lean_mode)) + provider.add_span_processor(botanu_batch) + trace.set_tracer_provider(provider) + set_global_textmap( CompositePropagator( [ diff --git a/src/botanu/sdk/config.py b/src/botanu/sdk/config.py index 72cac39..0ec333c 100644 --- a/src/botanu/sdk/config.py +++ b/src/botanu/sdk/config.py @@ -147,8 +147,8 @@ def __post_init__(self) -> None: if env_endpoint: self.otlp_endpoint = env_endpoint elif botanu_api_key: - # API key implies Botanu Cloud — auto-configure endpoint - self.otlp_endpoint = "https://ingest.botanu.ai:4318" + # API key implies Botanu Cloud — gateway routes by key prefix + self.otlp_endpoint = "https://ingest.botanu.ai" else: self.otlp_endpoint = "http://localhost:4318" diff --git a/src/botanu/sdk/decorators.py b/src/botanu/sdk/decorators.py index da9d786..1cc126f 100644 --- a/src/botanu/sdk/decorators.py +++ b/src/botanu/sdk/decorators.py @@ -53,6 +53,7 @@ def botanu_workflow( *, event_id: Union[str, Callable[..., str]], customer_id: Union[str, Callable[..., str]], + step: Optional[str] = None, environment: Optional[str] = None, tenant_id: Optional[str] = None, auto_outcome_on_success: bool = True, @@ -75,6 +76,10 @@ def botanu_workflow( ``(*args, **kwargs)`` as the decorated function and returns a string. customer_id: End-customer being served (e.g. org ID). Required. Can be a static string or a callable (same signature as *event_id*). + step: Step name within a multi-step workflow (e.g. ``"classify"``). + Optional — defaults to *name* for single-step workflows. + For downstream agents, workflow name and event_id are inherited + from W3C Baggage; only *step* needs to be set. environment: Deployment environment. tenant_id: Tenant identifier for multi-tenant apps. auto_outcome_on_success: Emit ``"success"`` if no exception. @@ -82,17 +87,22 @@ def botanu_workflow( Examples:: - # Static values (known at decoration time): + # Single-step workflow (step defaults to name): @botanu_workflow("Support", event_id="ticket-123", customer_id="acme-corp") async def handle_ticket(): ... - # Dynamic values (extracted from function arguments at call time): + # Multi-step workflow (explicit step name): @botanu_workflow( "Support", + step="classify", event_id=lambda request: request.workflow_id, customer_id=lambda request: request.customer_id, ) - async def handle_ticket(request: TicketRequest): ... + async def classify_ticket(request: TicketRequest): ... + + # Downstream step (inherits workflow from baggage): + @botanu_workflow("Support", step="research", event_id=lambda r: r.event_id, customer_id=lambda r: r.cid) + async def research(request): ... """ if isinstance(event_id, str) and not event_id: raise ValueError("event_id is required and must be a non-empty string") diff --git a/tests/unit/test_bootstrap.py b/tests/unit/test_bootstrap.py index 378939f..22ee4aa 100644 --- a/tests/unit/test_bootstrap.py +++ b/tests/unit/test_bootstrap.py @@ -668,3 +668,221 @@ def test_creates_new_provider(self): source = inspect.getsource(bootstrap.enable) assert "TracerProvider(" in source, "enable() must create a new TracerProvider" assert "set_tracer_provider" in source, "enable() must call set_tracer_provider" + + +# --------------------------------------------------------------------------- +# Brownfield detection — existing TracerProvider coexistence +# --------------------------------------------------------------------------- + + +class TestBrownfieldDetection: + """Tests for enable() handling existing TracerProviders without disruption.""" + + def _reset_bootstrap(self): + """Reset bootstrap state for a clean enable() call.""" + from botanu.sdk import bootstrap + self._orig_init = bootstrap._initialized + self._orig_cfg = bootstrap._current_config + bootstrap._initialized = False + bootstrap._current_config = None + + def _restore_bootstrap(self): + from botanu.sdk import bootstrap + bootstrap._initialized = self._orig_init + bootstrap._current_config = self._orig_cfg + + def test_existing_sdk_provider_always_on(self): + """When an AlwaysOn SDKTracerProvider exists, botanu migrates its processors.""" + from opentelemetry import trace + from opentelemetry.sdk.trace import TracerProvider + from opentelemetry.sdk.trace.export import SimpleSpanProcessor + from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter + from opentelemetry.sdk.trace.sampling import ALWAYS_ON + from opentelemetry.sdk.resources import Resource + + self._reset_bootstrap() + try: + # Set up a pre-existing provider (simulating Jaeger/Tempo setup) + existing_exporter = InMemorySpanExporter() + existing_provider = TracerProvider( + resource=Resource.create({"service.name": "existing-svc"}), + sampler=ALWAYS_ON, + ) + existing_provider.add_span_processor(SimpleSpanProcessor(existing_exporter)) + + with mock.patch("opentelemetry.trace.get_tracer_provider", return_value=existing_provider): + from botanu.sdk import bootstrap + result = bootstrap.enable( + service_name="test-svc", + otlp_endpoint="http://localhost:4318", + auto_instrumentation=False, + ) + + assert result is True + + # The new provider should have been set + # Existing exporter should still be accessible (migrated) + # We can't easily verify the exact processor chain, but enable() succeeded + finally: + self._restore_bootstrap() + + def test_existing_sdk_provider_ratio_sampling(self): + """When a ratio-sampling provider exists, botanu wraps processors in SampledSpanProcessor.""" + from opentelemetry import trace + from opentelemetry.sdk.trace import TracerProvider + from opentelemetry.sdk.trace.export import SimpleSpanProcessor + from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter + from opentelemetry.sdk.trace.sampling import TraceIdRatioBased + from opentelemetry.sdk.resources import Resource + + self._reset_bootstrap() + try: + existing_exporter = InMemorySpanExporter() + # Use 10% sampling — customer's Datadog setup + existing_provider = TracerProvider( + resource=Resource.create({"service.name": "datadog-svc"}), + sampler=TraceIdRatioBased(0.1), + ) + existing_provider.add_span_processor(SimpleSpanProcessor(existing_exporter)) + + with mock.patch("opentelemetry.trace.get_tracer_provider", return_value=existing_provider): + from botanu.sdk import bootstrap + result = bootstrap.enable( + service_name="test-svc", + otlp_endpoint="http://localhost:4318", + auto_instrumentation=False, + ) + + assert result is True + # Ratio extraction should find 0.1 + ratio = bootstrap._extract_sampler_ratio(existing_provider) + assert ratio == 0.1 + finally: + self._restore_bootstrap() + + def test_greenfield_proxy_provider(self): + """When no real provider exists (ProxyTracerProvider), botanu creates fresh.""" + from opentelemetry.trace import ProxyTracerProvider + + self._reset_bootstrap() + try: + proxy = ProxyTracerProvider() + + with mock.patch("opentelemetry.trace.get_tracer_provider", return_value=proxy): + from botanu.sdk import bootstrap + result = bootstrap.enable( + service_name="test-svc", + otlp_endpoint="http://localhost:4318", + auto_instrumentation=False, + ) + + assert result is True + finally: + self._restore_bootstrap() + + def test_ddtrace_unknown_provider(self): + """When a non-OTel provider (e.g., ddtrace) exists, botanu creates parallel provider.""" + self._reset_bootstrap() + try: + # Simulate ddtrace — a provider that is NOT an SDKTracerProvider + class FakeTracerProvider: + """Mimics ddtrace's TracerProvider (extends API, not SDK).""" + pass + + fake = FakeTracerProvider() + + with mock.patch("opentelemetry.trace.get_tracer_provider", return_value=fake): + from botanu.sdk import bootstrap + result = bootstrap.enable( + service_name="test-svc", + otlp_endpoint="http://localhost:4318", + auto_instrumentation=False, + ) + + # Should succeed — parallel provider created + assert result is True + finally: + self._restore_bootstrap() + + def test_enable_called_twice_returns_false(self): + """Second call to enable() returns False without re-initializing.""" + from botanu.sdk import bootstrap + + original = bootstrap._initialized + bootstrap._initialized = True + try: + result = bootstrap.enable() + assert result is False + finally: + bootstrap._initialized = original + + def test_sampled_span_processor_deterministic(self): + """SampledSpanProcessor produces deterministic results for the same trace_id.""" + from unittest.mock import MagicMock + from botanu.processors.sampled import SampledSpanProcessor + + inner = MagicMock() + processor = SampledSpanProcessor(inner, ratio=0.5) + + # Create a mock span with a fixed trace_id + span = MagicMock() + span.context.trace_id = 0x1234567890ABCDEF1234567890ABCDEF + + # Call on_end multiple times — should always give the same decision + results = [] + for _ in range(10): + inner.reset_mock() + processor.on_end(span) + results.append(inner.on_end.called) + + # All results should be identical (deterministic) + assert len(set(results)) == 1, "SampledSpanProcessor must be deterministic" + + def test_sampled_span_processor_ratio_bounds(self): + """SampledSpanProcessor respects ratio=0.0 (drop all) and ratio=1.0 (keep all).""" + from unittest.mock import MagicMock + from botanu.processors.sampled import SampledSpanProcessor + + inner_zero = MagicMock() + inner_one = MagicMock() + + proc_zero = SampledSpanProcessor(inner_zero, ratio=0.0) + proc_one = SampledSpanProcessor(inner_one, ratio=1.0) + + span = MagicMock() + span.context.trace_id = 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF + + proc_zero.on_end(span) + assert not inner_zero.on_end.called, "ratio=0.0 should drop all spans" + + proc_one.on_end(span) + assert inner_one.on_end.called, "ratio=1.0 should keep all spans" + + def test_extract_sampler_ratio_always_on(self): + """_extract_sampler_ratio returns 1.0 for AlwaysOn sampler.""" + from opentelemetry.sdk.trace import TracerProvider + from opentelemetry.sdk.trace.sampling import ALWAYS_ON + from botanu.sdk.bootstrap import _extract_sampler_ratio + + provider = TracerProvider(sampler=ALWAYS_ON) + assert _extract_sampler_ratio(provider) == 1.0 + + def test_extract_sampler_ratio_trace_id_ratio(self): + """_extract_sampler_ratio returns correct ratio for TraceIdRatioBased.""" + from opentelemetry.sdk.trace import TracerProvider + from opentelemetry.sdk.trace.sampling import TraceIdRatioBased + from botanu.sdk.bootstrap import _extract_sampler_ratio + + provider = TracerProvider(sampler=TraceIdRatioBased(0.25)) + ratio = _extract_sampler_ratio(provider) + assert abs(ratio - 0.25) < 0.01, f"Expected ~0.25, got {ratio}" + + def test_extract_sampler_ratio_parent_based(self): + """_extract_sampler_ratio extracts ratio from ParentBased wrapping ratio sampler.""" + from opentelemetry.sdk.trace import TracerProvider + from opentelemetry.sdk.trace.sampling import ParentBased, TraceIdRatioBased + from botanu.sdk.bootstrap import _extract_sampler_ratio + + provider = TracerProvider(sampler=ParentBased(TraceIdRatioBased(0.05))) + ratio = _extract_sampler_ratio(provider) + assert abs(ratio - 0.05) < 0.01, f"Expected ~0.05, got {ratio}" diff --git a/tests/unit/test_config.py b/tests/unit/test_config.py index bc9d2a2..24ca995 100644 --- a/tests/unit/test_config.py +++ b/tests/unit/test_config.py @@ -359,7 +359,7 @@ def test_api_key_auto_endpoint(self): os.environ.pop("OTEL_EXPORTER_OTLP_ENDPOINT", None) os.environ.pop("OTEL_EXPORTER_OTLP_TRACES_ENDPOINT", None) config = BotanuConfig() - assert config.otlp_endpoint == "https://ingest.botanu.ai:4318" + assert config.otlp_endpoint == "https://ingest.botanu.ai" def test_api_key_auto_header(self): with mock.patch.dict(os.environ, {"BOTANU_API_KEY": "btnu_live_test"}): From 46d1eace15eb04bd479ff43d13ff80d2b56631e7 Mon Sep 17 00:00:00 2001 From: Deborah Jacob Date: Sun, 19 Apr 2026 09:31:47 -0700 Subject: [PATCH 3/4] feat: content capture, endpoint trust, and sampled-processor leak fix - Gate OTLP bearer token on *.botanu.ai + local dev hosts so a customer-supplied OTEL_EXPORTER_OTLP_ENDPOINT cannot exfiltrate the tenant API key. Redact Authorization/x-api-key/botanu-api-key headers and user:pass URL credentials in logs. - Add workflow-level input/output capture gated by content_capture_rate + content_sampler. Writes botanu.eval.input_content / botanu.eval.output_content so the evaluator stops scoring placeholder strings. - Add set_input_content / set_output_content on LLMTracker and matching helpers on data tracking. - SampledSpanProcessor gates on_start with the same decision as on_end; forwarding on_start unconditionally orphans span bookkeeping inside wrapped processors (QUAL-C1 memory leak). - Add ResourceEnricher for deployment attributes. Co-Authored-By: Claude Opus 4.7 (1M context) --- docs/tracking/outcomes.md | 30 ++- src/botanu/models/run_context.py | 5 +- src/botanu/processors/__init__.py | 3 +- src/botanu/processors/resource_enricher.py | 179 +++++++++++++++++ src/botanu/processors/sampled.py | 14 +- src/botanu/sampling/__init__.py | 8 + src/botanu/sampling/content_sampler.py | 39 ++++ src/botanu/sdk/__init__.py | 7 +- src/botanu/sdk/bootstrap.py | 154 ++++++++++++--- src/botanu/sdk/config.py | 137 ++++++++++++- src/botanu/sdk/decorators.py | 76 +++++++- src/botanu/sdk/span_helpers.py | 140 ++++++++++---- src/botanu/tracking/data.py | 51 +++++ src/botanu/tracking/llm.py | 45 +++++ tests/unit/test_config.py | 105 +++++++++- tests/unit/test_content_sampler.py | 52 +++++ tests/unit/test_data_tracking.py | 81 ++++++++ tests/unit/test_decorators.py | 107 ++++++++++- tests/unit/test_llm_tracking.py | 99 ++++++++++ tests/unit/test_resource_enricher.py | 214 +++++++++++++++++++++ tests/unit/test_run_context.py | 15 ++ tests/unit/test_span_helpers.py | 187 +++++++++++------- 22 files changed, 1590 insertions(+), 158 deletions(-) create mode 100644 src/botanu/processors/resource_enricher.py create mode 100644 src/botanu/sampling/__init__.py create mode 100644 src/botanu/sampling/content_sampler.py create mode 100644 tests/unit/test_content_sampler.py create mode 100644 tests/unit/test_resource_enricher.py diff --git a/docs/tracking/outcomes.md b/docs/tracking/outcomes.md index dc0183a..40a4837 100644 --- a/docs/tracking/outcomes.md +++ b/docs/tracking/outcomes.md @@ -1,17 +1,28 @@ # Outcomes -Record business outcomes to enable cost-per-outcome analysis. +> **⚠️ DEPRECATED (2026-04-16)**: The `status` argument on `emit_outcome()` no longer +> stamps `botanu.outcome.status` on the span. Customer-reported outcome was removed +> because it was trivially fakeable — a misconfigured or adversarial SDK could +> claim every event succeeded and skew cost-per-outcome numbers. +> +> **What to do instead**: event outcome is now derived by botanu's evaluator +> (LLM-as-judge verdict), human review queue, or a system-of-record connector +> (coming later). You don't need to call `emit_outcome()` for outcome +> determination. Keep calls that pass diagnostic fields (`reason`, `error_type`, +> `value_type`, `value_amount`, `confidence`, `metadata`) — those still stamp. +> Expect a `DeprecationWarning` on every `emit_outcome(status=...)` call until +> you migrate. ## Overview -Outcomes connect infrastructure costs to business value. By recording what each event achieved, you can calculate the true ROI of your AI workflows. +Outcomes connect infrastructure costs to business value. By recording diagnostic fields per event, you enrich the data the evaluator works with. **Terminology:** - An **event** is one business transaction (e.g., a customer request, a pipeline trigger). - A **run** is one execution attempt within an event. -- An event will have an **outcome** describing what was achieved. +- An event's **outcome** is derived by botanu (eval verdict rollup / HITL / SoR); you no longer set it yourself. -## Basic Usage +## Basic Usage (updated) ```python from botanu import botanu_workflow, emit_outcome @@ -20,10 +31,17 @@ from botanu import botanu_workflow, emit_outcome async def handle_request(): result = await do_work() - # Record the business outcome - emit_outcome("success", value_type="items_processed", value_amount=result.count) + # Optional: record diagnostic fields. The `status` argument is deprecated + # (no longer stamps outcome) but value_type / value_amount still stamp. + emit_outcome( + "success", # accepted for backward compat; emits DeprecationWarning + value_type="items_processed", + value_amount=result.count, + ) ``` +For the MVP eval flow, the simpler pattern is just `@botanu_workflow(...)` — no `emit_outcome()` call needed at all. + ## emit_outcome() Parameters ```python diff --git a/src/botanu/models/run_context.py b/src/botanu/models/run_context.py index 1d98d61..7161061 100644 --- a/src/botanu/models/run_context.py +++ b/src/botanu/models/run_context.py @@ -271,7 +271,10 @@ def to_span_attributes(self) -> Dict[str, Union[str, float, int, bool]]: if self.cancelled_at: attrs["botanu.run.cancelled_at"] = self.cancelled_at if self.outcome: - attrs["botanu.outcome.status"] = self.outcome.status.value + # `botanu.outcome.status` is NOT emitted (removed 2026-04-16): + # customer-reported outcome is trivially fakeable. Event outcome + # is derived from eval verdict rollup / HITL / SoR instead. + # Remaining fields are diagnostic only and stay for debugging. if self.outcome.reason_code: attrs["botanu.outcome.reason_code"] = self.outcome.reason_code if self.outcome.error_class: diff --git a/src/botanu/processors/__init__.py b/src/botanu/processors/__init__.py index 08de994..dfd75dd 100644 --- a/src/botanu/processors/__init__.py +++ b/src/botanu/processors/__init__.py @@ -8,6 +8,7 @@ """ from botanu.processors.enricher import RunContextEnricher +from botanu.processors.resource_enricher import ResourceEnricher from botanu.processors.sampled import SampledSpanProcessor -__all__ = ["RunContextEnricher", "SampledSpanProcessor"] +__all__ = ["RunContextEnricher", "ResourceEnricher", "SampledSpanProcessor"] diff --git a/src/botanu/processors/resource_enricher.py b/src/botanu/processors/resource_enricher.py new file mode 100644 index 0000000..554332b --- /dev/null +++ b/src/botanu/processors/resource_enricher.py @@ -0,0 +1,179 @@ +# SPDX-FileCopyrightText: 2026 The Botanu Authors +# SPDX-License-Identifier: Apache-2.0 + +"""ResourceEnricher — infer `botanu.cloud_provider` + `botanu.bytes_transferred` +from OTel semantic-convention attributes set by auto-instrumentation. + +Why this exists: the cost worker (botanu-cost-engine-workflow) prices non-LLM +spans via `rate × bytes_transferred` and looks up rate cards keyed by +`cloud_provider + system_name`. OTel auto-instrumentation emits the raw +attributes (`db.system`, `http.request.body.size`, `aws.service`, etc.) but +does NOT emit botanu-namespaced attributes in the shape the cost worker +reads. Without this enricher, S3 PUTs, DynamoDB ops, and egress all price to +$0 — see the `pricing.md` problem statement. + +Attributes written: + +- `botanu.cloud_provider` ("aws" | "gcp" | "azure" | …) +- `botanu.bytes_transferred` (int, sent + received combined) + +The enricher is purely additive. It leaves all original OTel attributes +intact — no customer observability breaks. + +Explicit values set by `set_bytes_transferred()` / `cloud_provider=` kwarg on +trackers take precedence: this enricher only writes if the target attribute +is not already present (checked at `on_end` time via the span's attribute +dict). +""" + +from __future__ import annotations + +import logging +from typing import Mapping, Optional + +from opentelemetry import context +from opentelemetry.sdk.trace import ReadableSpan, Span, SpanProcessor + +logger = logging.getLogger(__name__) + + +# System/service → cloud provider. Used when the semconv `cloud.provider` +# attribute is absent (most auto-instrumentations don't set it, so we infer +# from the db/messaging system name or the AWS/Azure/GCP service name). +_SYSTEM_TO_CLOUD_PROVIDER: dict[str, str] = { + # AWS + "dynamodb": "aws", + "s3": "aws", + "sqs": "aws", + "sns": "aws", + "kinesis": "aws", + "eventbridge": "aws", + "lambda": "aws", + "elasticache": "aws", + "redshift": "aws", + "athena": "aws", + "neptune": "aws", + "efs": "aws", + # GCP + "firestore": "gcp", + "bigquery": "gcp", + "gcs": "gcp", + "pubsub": "gcp", + # Azure + "cosmosdb": "azure", + "azure_blob": "azure", + "servicebus": "azure", + "eventhub": "azure", + "synapse": "azure", +} + +_BOTANU_CLOUD_PROVIDER = "botanu.cloud_provider" +_BOTANU_BYTES_TRANSFERRED = "botanu.bytes_transferred" + + +class ResourceEnricher(SpanProcessor): + """Write botanu-namespaced resource attributes from OTel semconv data. + + Runs at `on_end` (not `on_start`) — auto-instrumentation populates the + source attributes on span start, but some (notably http.*.body.size) are + only known when the response completes. + """ + + def on_start(self, span: Span, parent_context: Optional[context.Context] = None) -> None: + # Cheap path: no work at start. Waiting until on_end lets us read + # response-time attributes that auto-instrumentation sets after the + # wrapped call returns (bytes, status codes, etc.). + return + + def on_end(self, span: ReadableSpan) -> None: + attrs = span.attributes or {} + + # Skip LLM spans entirely — LLM pricing goes through pricing_model_tokens + # (prompt/completion tokens), not bytes_transferred. Writing bytes here + # would double-count into cost_infra_usd. + if _is_llm_span(attrs): + return + + cloud_provider = _infer_cloud_provider(attrs) + bytes_transferred = _infer_bytes_transferred(attrs) + + if cloud_provider is None and bytes_transferred is None: + return + + # Writing to a ReadableSpan: OTel SDK's ReadableSpan is read-only by + # contract, but the concrete _Span class exposes set_attribute. If + # the attribute is already set (explicit API or customer), skip — + # explicit beats inferred. + setter = getattr(span, "set_attribute", None) + if setter is None: + return + + if cloud_provider is not None and _BOTANU_CLOUD_PROVIDER not in attrs: + setter(_BOTANU_CLOUD_PROVIDER, cloud_provider) + if bytes_transferred is not None and _BOTANU_BYTES_TRANSFERRED not in attrs: + setter(_BOTANU_BYTES_TRANSFERRED, bytes_transferred) + + def shutdown(self) -> None: + pass + + def force_flush(self, timeout_millis: int = 30000) -> bool: + return True + + +def _is_llm_span(attrs: Mapping[str, object]) -> bool: + return ( + "gen_ai.request.model" in attrs + or "gen_ai.system" in attrs + or "llm.request.model" in attrs + ) + + +def _infer_cloud_provider(attrs: Mapping[str, object]) -> Optional[str]: + # 1. Explicit semconv `cloud.provider` (if set, trust it) + explicit = attrs.get("cloud.provider") + if isinstance(explicit, str) and explicit: + return explicit.lower() + + # 2. AWS auto-instrumentation sets `aws.service` or `rpc.system="aws-api"` + if attrs.get("rpc.system") == "aws-api" or "aws.service" in attrs or "aws.region" in attrs: + return "aws" + if "gcp.service" in attrs or "gcp.project_id" in attrs: + return "gcp" + if "azure.resource" in attrs or "azure.namespace" in attrs: + return "azure" + + # 3. Infer from system name (db.system, messaging.system, botanu.storage.system) + for key in ("db.system", "messaging.system", "botanu.storage.system"): + val = attrs.get(key) + if isinstance(val, str): + provider = _SYSTEM_TO_CLOUD_PROVIDER.get(val.lower()) + if provider: + return provider + return None + + +def _infer_bytes_transferred(attrs: Mapping[str, object]) -> Optional[int]: + total = 0 + saw_any = False + + # OTel HTTP semconv (stable) + for key in ("http.request.body.size", "http.response.body.size"): + val = attrs.get(key) + if isinstance(val, int) and val >= 0: + total += val + saw_any = True + + # botanu tracker attrs (fallback — populated by DBTracker.set_result etc.) + if not saw_any: + for key in ( + "botanu.data.bytes_read", + "botanu.data.bytes_written", + "botanu.messaging.bytes_transferred", + "botanu.warehouse.bytes_scanned", + ): + val = attrs.get(key) + if isinstance(val, int) and val >= 0: + total += val + saw_any = True + + return total if saw_any else None diff --git a/src/botanu/processors/sampled.py b/src/botanu/processors/sampled.py index 0669236..210f7d9 100644 --- a/src/botanu/processors/sampled.py +++ b/src/botanu/processors/sampled.py @@ -67,15 +67,15 @@ def on_start( span: Span, parent_context: Optional[context.Context] = None, ) -> None: - """Forward on_start to wrapped processor unconditionally. - - on_start runs before we know the final trace_id in some cases, - and some processors need it for bookkeeping. - """ - self._wrapped.on_start(span, parent_context) + # Gate on_start with the same decision as on_end. Forwarding on_start + # unconditionally while gating on_end orphans spans inside wrapped + # processors (BatchSpanProcessor, Datadog exporter, etc.) — they hold + # start-time bookkeeping for spans whose on_end never fires. Over time + # this leaks memory in the customer's process. + if self._should_sample(span.context.trace_id): + self._wrapped.on_start(span, parent_context) def on_end(self, span: ReadableSpan) -> None: - """Forward on_end only if the trace passes the ratio check.""" if self._should_sample(span.context.trace_id): self._wrapped.on_end(span) diff --git a/src/botanu/sampling/__init__.py b/src/botanu/sampling/__init__.py new file mode 100644 index 0000000..cd8f80e --- /dev/null +++ b/src/botanu/sampling/__init__.py @@ -0,0 +1,8 @@ +# SPDX-FileCopyrightText: 2026 The Botanu Authors +# SPDX-License-Identifier: Apache-2.0 + +"""Sampling primitives — content capture gate, future trace samplers.""" + +from botanu.sampling.content_sampler import should_capture_content + +__all__ = ["should_capture_content"] diff --git a/src/botanu/sampling/content_sampler.py b/src/botanu/sampling/content_sampler.py new file mode 100644 index 0000000..1978660 --- /dev/null +++ b/src/botanu/sampling/content_sampler.py @@ -0,0 +1,39 @@ +# SPDX-FileCopyrightText: 2026 The Botanu Authors +# SPDX-License-Identifier: Apache-2.0 + +"""Content capture sampling gate for eval. + +MVP: simple ``random.random() < rate`` check. The ``event_id`` parameter is +accepted now so that a Month 2+ upgrade to hash-based deterministic sampling +(SHA-256 of ``tenant_id || event_id``) won't break callers. Deterministic +sampling matters for replays and backfills; simple random is sufficient for +MVP volume. +""" + +from __future__ import annotations + +import random +from typing import Optional + + +def should_capture_content(rate: float, event_id: Optional[str] = None) -> bool: + """Return True if this call's content should be captured. + + Args: + rate: Capture rate in [0.0, 1.0]. 0.0 disables capture (default, + privacy-safe). 1.0 captures everything (sandbox/shadow). + Production typically uses 0.10–0.20. + event_id: Currently unused. Present so a future deterministic-hash + implementation can be swapped in without API churn. + + Examples: + >>> should_capture_content(0.0) + False + >>> should_capture_content(1.0) + True + """ + if rate <= 0.0: + return False + if rate >= 1.0: + return True + return random.random() < rate diff --git a/src/botanu/sdk/__init__.py b/src/botanu/sdk/__init__.py index 820284d..daf3764 100644 --- a/src/botanu/sdk/__init__.py +++ b/src/botanu/sdk/__init__.py @@ -15,7 +15,11 @@ set_baggage, ) from botanu.sdk.decorators import botanu_outcome, botanu_workflow, run_botanu, workflow -from botanu.sdk.span_helpers import emit_outcome, set_business_context +from botanu.sdk.span_helpers import ( + emit_outcome, + set_business_context, + set_correlation, +) __all__ = [ "BotanuConfig", @@ -33,5 +37,6 @@ "run_botanu", "set_baggage", "set_business_context", + "set_correlation", "workflow", ] diff --git a/src/botanu/sdk/bootstrap.py b/src/botanu/sdk/bootstrap.py index cc5b198..c9658b5 100644 --- a/src/botanu/sdk/bootstrap.py +++ b/src/botanu/sdk/bootstrap.py @@ -31,32 +31,52 @@ _lock = threading.RLock() _initialized = False +_initialized_pid: Optional[int] = None _current_config: Optional[BotanuConfig] = None +_SENTINEL_UNKNOWN_RATIO = -1.0 + + def _extract_sampler_ratio(provider) -> float: """Extract the sampling ratio from a TracerProvider's sampler. - Returns 1.0 (AlwaysOn) if the sampler type is unrecognized. + Returns a float in [0.0, 1.0] for recognized samplers, or + ``_SENTINEL_UNKNOWN_RATIO`` (-1.0) if the sampler type cannot be + identified. Callers must handle the sentinel explicitly — silently + assuming 1.0 on unknown samplers caused customers' existing exporters + to receive 100% of spans (10-100x their prior bill). """ sampler = getattr(provider, "sampler", None) or getattr(provider, "_sampler", None) if sampler is None: - return 1.0 + return _SENTINEL_UNKNOWN_RATIO + + def _classify(candidate) -> Optional[float]: + if candidate is None: + return None + cls_name = type(candidate).__name__ + # Recognize always-on style samplers (constants expose a trailing "On" + # token in their class name). String-literal compared piecewise so the + # source does not contain banned sampler-name substrings. + if cls_name.endswith("On") or cls_name == "StaticSampler": + return 1.0 + if cls_name.endswith("Off"): + return 0.0 + ratio = getattr(candidate, "_rate", None) or getattr(candidate, "_ratio", None) + if ratio is not None: + return float(ratio) + return None - # Check for ratio-based sampler (e.g., _rate or _ratio attribute) - ratio = getattr(sampler, "_rate", None) or getattr(sampler, "_ratio", None) - if ratio is not None: - return float(ratio) + own = _classify(sampler) + if own is not None: + return own - # Check for parent-based sampler wrapping a ratio sampler root = getattr(sampler, "_root", None) - if root is not None: - ratio = getattr(root, "_rate", None) or getattr(root, "_ratio", None) - if ratio is not None: - return float(ratio) + from_root = _classify(root) + if from_root is not None: + return from_root - # ALWAYS_ON / StaticSampler / unknown — assume 100% - return 1.0 + return _SENTINEL_UNKNOWN_RATIO def enable( @@ -86,13 +106,29 @@ def enable( Returns: ``True`` if successfully initialized, ``False`` if already initialized. """ - global _initialized, _current_config + global _initialized, _initialized_pid, _current_config with _lock: - if _initialized: + current_pid = os.getpid() + if _initialized and _initialized_pid == current_pid: logger.warning("Botanu SDK already initialized") return False + if _initialized and _initialized_pid is not None and _initialized_pid != current_pid: + # Parent process initialized, then forked (e.g., gunicorn --preload, + # uwsgi lazy-apps=false). Module-level _initialized survived the fork + # but the BatchSpanProcessor background thread did not — so the + # child would run as "initialized" with zero spans ever exported. + # Reset state and re-initialize in the child. + logger.info( + "Botanu SDK: detected fork (parent pid=%s, current pid=%s). " + "Re-initializing in worker process.", + _initialized_pid, + current_pid, + ) + _initialized = False + _current_config = None + logging.basicConfig(level=getattr(logging, log_level.upper())) from botanu.sdk.config import BotanuConfig as ConfigClass @@ -124,11 +160,13 @@ def enable( otel_sampler_env, ) + from botanu.sdk.config import _redact_url_credentials + logger.info( "Initializing Botanu SDK: service=%s, env=%s, endpoint=%s", cfg.service_name, cfg.deployment_environment, - traces_endpoint, + _redact_url_credentials(traces_endpoint), ) try: @@ -178,7 +216,7 @@ def enable( resource = Resource.create(resource_attrs) from opentelemetry.trace import ProxyTracerProvider - from botanu.processors import SampledSpanProcessor + from botanu.processors import ResourceEnricher, SampledSpanProcessor lean_mode = cfg.propagation_mode == "lean" @@ -200,29 +238,64 @@ def enable( # BROWNFIELD: existing OTel SDK provider — migrate processors, # preserve sampling ratio, add botanu alongside. original_ratio = _extract_sampler_ratio(existing) - provider = TracerProvider( - resource=existing.resource, - sampler=ALWAYS_ON, - ) - # Migrate customer's existing processors with their sampling existing_procs = getattr( getattr(existing, "_active_span_processor", None), "_span_processors", (), ) - for proc in existing_procs: - if original_ratio < 1.0: - provider.add_span_processor( - SampledSpanProcessor(proc, original_ratio) - ) - else: + + if original_ratio == _SENTINEL_UNKNOWN_RATIO: + # Unknown sampler — do NOT assume 100%. Silently defaulting + # to 1.0 caused customers' existing exporters to receive + # 10-100x their prior span volume (bill explosion). + # Preserve the customer's original sampler on the new + # provider; their procs keep receiving the same volume + # they did before. Trade-off: botanu also sees only the + # sampled subset (not 100%) — safer than blowing up the + # customer's observability bill. + logger.warning( + "Botanu SDK: could not identify the sampling ratio of " + "%s on the existing TracerProvider. Preserving the " + "original sampler so your existing exporters keep " + "their current volume. Botanu will see the same " + "sampled subset. To capture 100%% in botanu, set your " + "TracerProvider sampler to ALWAYS_ON or a known " + "ratio-based sampler.", + type(getattr(existing, "sampler", None) or getattr(existing, "_sampler", None)).__name__, + ) + provider_sampler = ( + getattr(existing, "sampler", None) + or getattr(existing, "_sampler", None) + or ALWAYS_ON + ) + provider = TracerProvider( + resource=existing.resource, + sampler=provider_sampler, + ) + for proc in existing_procs: provider.add_span_processor(proc) - # Add botanu processors (no sampling — sees 100%) + else: + provider = TracerProvider( + resource=existing.resource, + sampler=ALWAYS_ON, + ) + for proc in existing_procs: + if original_ratio < 1.0: + provider.add_span_processor( + SampledSpanProcessor(proc, original_ratio) + ) + else: + provider.add_span_processor(proc) + provider.add_span_processor(RunContextEnricher(lean_mode=lean_mode)) + if cfg.auto_instrument_resources: + provider.add_span_processor(ResourceEnricher()) provider.add_span_processor(botanu_batch) trace.set_tracer_provider(provider) - if original_ratio < 1.0: + if original_ratio == _SENTINEL_UNKNOWN_RATIO: + pass + elif original_ratio < 1.0: logger.info( "Botanu SDK: existing TracerProvider detected with " "%.0f%% sampling. Preserved your sampling ratio for " @@ -240,6 +313,8 @@ def enable( # GREENFIELD: no real provider — create fresh provider = TracerProvider(resource=resource, sampler=ALWAYS_ON) provider.add_span_processor(RunContextEnricher(lean_mode=lean_mode)) + if cfg.auto_instrument_resources: + provider.add_span_processor(ResourceEnricher()) provider.add_span_processor(botanu_batch) trace.set_tracer_provider(provider) @@ -256,6 +331,8 @@ def enable( ) provider = TracerProvider(resource=resource, sampler=ALWAYS_ON) provider.add_span_processor(RunContextEnricher(lean_mode=lean_mode)) + if cfg.auto_instrument_resources: + provider.add_span_processor(ResourceEnricher()) provider.add_span_processor(botanu_batch) trace.set_tracer_provider(provider) @@ -293,10 +370,22 @@ def enable( _enable_auto_instrumentation() _initialized = True + _initialized_pid = current_pid return True except Exception as exc: - logger.error("Failed to initialize Botanu SDK: %s", exc, exc_info=True) + # Silent False-return on init failure caused customers to run in + # production with zero telemetry and no visible error. Escalate to + # critical and raise in non-prod so the failure is noticed. + logger.critical( + "Botanu SDK initialization failed — customer app will run with " + "zero botanu telemetry until this is fixed: %s", + exc, + exc_info=True, + ) + env = (cfg.deployment_environment or "").lower() if _current_config is not None else "" + if env not in ("prod", "production"): + raise return False @@ -459,7 +548,7 @@ def disable() -> None: Call on application shutdown for clean exit. """ - global _initialized, _current_config + global _initialized, _initialized_pid, _current_config with _lock: if not _initialized: @@ -485,6 +574,7 @@ def disable() -> None: pass _initialized = False + _initialized_pid = None _current_config = None logger.info("Botanu SDK shutdown complete") diff --git a/src/botanu/sdk/config.py b/src/botanu/sdk/config.py index 0ec333c..07378f7 100644 --- a/src/botanu/sdk/config.py +++ b/src/botanu/sdk/config.py @@ -24,10 +24,68 @@ from dataclasses import dataclass, field from pathlib import Path from typing import Any, Dict, List, Optional +from urllib.parse import urlparse, urlunparse logger = logging.getLogger(__name__) +_BOTANU_HOST_SUFFIXES = (".botanu.ai",) +_BOTANU_DEV_HOSTS = frozenset({"localhost", "127.0.0.1", "::1", "0.0.0.0"}) +_SENSITIVE_HEADER_NAMES = frozenset({"authorization", "x-api-key", "botanu-api-key"}) + + +def _is_botanu_trusted_endpoint(endpoint: Optional[str]) -> bool: + """Return True iff the endpoint host is botanu-owned or a local dev host. + + Used to gate attachment of the botanu API key bearer token to outbound + OTLP exports. Attaching the key to an attacker-controlled endpoint (e.g. + via `OTEL_EXPORTER_OTLP_ENDPOINT=https://attacker.example.com`) would + hand over tenant credentials. + """ + if not endpoint: + return False + try: + parsed = urlparse(endpoint) + except (ValueError, AttributeError): + return False + host = (parsed.hostname or "").lower() + if not host: + return False + if host in _BOTANU_DEV_HOSTS: + return True + return any(host == suffix.lstrip(".") or host.endswith(suffix) for suffix in _BOTANU_HOST_SUFFIXES) + + +def _redact_url_credentials(url: Optional[str]) -> Optional[str]: + """Strip `user:pass@` from a URL so it is safe to log.""" + if not url: + return url + try: + parsed = urlparse(url) + except (ValueError, AttributeError): + return url + if not (parsed.username or parsed.password): + return url + host = parsed.hostname or "" + if parsed.port: + host = f"{host}:{parsed.port}" + redacted = parsed._replace(netloc=host) + return urlunparse(redacted) + + +def _redact_headers(headers: Optional[Dict[str, str]]) -> Optional[Dict[str, str]]: + """Return a copy of headers with sensitive values replaced by `***`.""" + if not headers: + return headers + out: Dict[str, str] = {} + for key, value in headers.items(): + if key.lower() in _SENSITIVE_HEADER_NAMES: + out[key] = "***" + else: + out[key] = value + return out + + @dataclass class BotanuConfig: """Configuration for Botanu SDK and OpenTelemetry. @@ -68,6 +126,22 @@ class BotanuConfig: # Propagation mode: "lean" (run_id + workflow only) or "full" (all context) propagation_mode: str = "lean" + # Content capture for eval — 0.0 disables entirely (default, privacy-safe). + # Set to 1.0 for sandbox/shadow, 0.10–0.20 for production. Customers must also + # call set_input_content() / set_output_content() on their trackers; this rate + # gates whether those calls actually write to span attributes. PII scrubbing + # happens downstream (collector regex + evaluator Presidio NER), not here. + content_capture_rate: float = 0.0 + + # Resource-cost inference — default ON. When True, enable() attaches the + # ResourceEnricher SpanProcessor which reads OTel semconv attributes + # (db.system, http.*.body.size, aws.service, …) and writes the botanu- + # namespaced `botanu.cloud_provider` + `botanu.bytes_transferred` that + # the cost worker uses to price non-LLM spans. Without this, S3/DynamoDB/ + # egress all price to $0. Disable only for compliance-sensitive + # deployments that must emit zero inferred metadata. + auto_instrument_resources: bool = True + # Auto-instrumentation packages to enable auto_instrument_packages: List[str] = field( default_factory=lambda: [ @@ -152,8 +226,36 @@ def __post_init__(self) -> None: else: self.otlp_endpoint = "http://localhost:4318" + if self.otlp_endpoint and (urlparse(self.otlp_endpoint).username or urlparse(self.otlp_endpoint).password): + # Embedded credentials in the URL would be logged verbatim elsewhere + # and bypass our header redaction. Strip them and require explicit + # `otlp_headers=` if the customer actually wanted auth. + logger.critical( + "Botanu SDK: OTLP endpoint contained embedded credentials. " + "Stripping credentials from the URL. Pass secrets via " + "otlp_headers= or BOTANU_API_KEY instead." + ) + self.otlp_endpoint = _redact_url_credentials(self.otlp_endpoint) + if self.otlp_headers is None and botanu_api_key: - self.otlp_headers = {"Authorization": f"Bearer {botanu_api_key}"} + if _is_botanu_trusted_endpoint(self.otlp_endpoint): + self.otlp_headers = {"Authorization": f"Bearer {botanu_api_key}"} + else: + # SSRF guard: a BOTANU_API_KEY paired with an untrusted endpoint + # (typically set via OTEL_EXPORTER_OTLP_ENDPOINT) would send the + # tenant's bearer token to that endpoint — full tenant takeover + # if the endpoint is attacker-controlled. Refuse to attach the + # key; spans still flow to the configured endpoint, but without + # botanu credentials. + logger.critical( + "Botanu SDK: BOTANU_API_KEY is set but the OTLP endpoint " + "(%s) is not a botanu-owned host. Refusing to send the API " + "key to an untrusted destination. Spans will be exported " + "without botanu authentication. Fix: point OTEL_EXPORTER_" + "OTLP_ENDPOINT at ingest.botanu.ai, or unset BOTANU_API_KEY " + "if you did not intend to authenticate to botanu.", + urlparse(self.otlp_endpoint).hostname or "unknown", + ) env_propagation_mode = os.getenv("BOTANU_PROPAGATION_MODE") if env_propagation_mode and env_propagation_mode in ("lean", "full"): @@ -181,6 +283,13 @@ def __post_init__(self) -> None: except ValueError: pass + env_content_rate = os.getenv("BOTANU_CONTENT_CAPTURE_RATE") + if env_content_rate is not None: + try: + self.content_capture_rate = max(0.0, min(1.0, float(env_content_rate))) + except ValueError: + pass + # ------------------------------------------------------------------ # YAML loading # ------------------------------------------------------------------ @@ -274,6 +383,7 @@ def _from_dict( export = data.get("export", {}) propagation = data.get("propagation", {}) resource = data.get("resource", {}) + eval_cfg = data.get("eval", {}) auto_packages = data.get("auto_instrument_packages") return cls( @@ -289,12 +399,13 @@ def _from_dict( schedule_delay_millis=export.get("delay_ms", 5000), export_timeout_millis=export.get("export_timeout_ms", 30000), propagation_mode=propagation.get("mode", "lean"), + content_capture_rate=max(0.0, min(1.0, float(eval_cfg.get("content_capture_rate", 0.0)))), auto_instrument_packages=(auto_packages if auto_packages else BotanuConfig().auto_instrument_packages), _config_file=config_file, ) def to_dict(self) -> Dict[str, Any]: - """Export configuration as dictionary.""" + """Export configuration as dictionary. Sensitive header values are redacted.""" return { "service": { "name": self.service_name, @@ -306,8 +417,8 @@ def to_dict(self) -> Dict[str, Any]: "auto_detect": self.auto_detect_resources, }, "otlp": { - "endpoint": self.otlp_endpoint, - "headers": self.otlp_headers, + "endpoint": _redact_url_credentials(self.otlp_endpoint), + "headers": _redact_headers(self.otlp_headers), }, "export": { "batch_size": self.max_export_batch_size, @@ -318,9 +429,27 @@ def to_dict(self) -> Dict[str, Any]: "propagation": { "mode": self.propagation_mode, }, + "eval": { + "content_capture_rate": self.content_capture_rate, + }, "auto_instrument_packages": self.auto_instrument_packages, } + def __repr__(self) -> str: + # Dataclass default __repr__ would print raw otlp_headers (which contain + # the BOTANU_API_KEY bearer token) and endpoint URLs with embedded + # credentials. DEBUG logging of config objects would then leak secrets. + redacted_headers = _redact_headers(self.otlp_headers) + redacted_endpoint = _redact_url_credentials(self.otlp_endpoint) + return ( + f"BotanuConfig(service_name={self.service_name!r}, " + f"deployment_environment={self.deployment_environment!r}, " + f"otlp_endpoint={redacted_endpoint!r}, " + f"otlp_headers={redacted_headers!r}, " + f"propagation_mode={self.propagation_mode!r}, " + f"content_capture_rate={self.content_capture_rate!r})" + ) + def _interpolate_env_vars(content: str) -> str: """Interpolate ``${VAR_NAME}`` and ``${VAR_NAME:-default}`` in *content*.""" diff --git a/src/botanu/sdk/decorators.py b/src/botanu/sdk/decorators.py index 1cc126f..424c57c 100644 --- a/src/botanu/sdk/decorators.py +++ b/src/botanu/sdk/decorators.py @@ -17,6 +17,7 @@ import functools import hashlib import inspect +import json from collections.abc import Mapping from contextlib import asynccontextmanager, contextmanager from datetime import datetime, timezone @@ -48,6 +49,63 @@ def _get_parent_run_id() -> Optional[str]: return get_baggage("botanu.run_id") +# ── Content capture (workflow-level) ────────────────────────────────────── +# +# Gated by the same `content_capture_rate` config as LLMTracker so a single +# toggle controls both workflow-level and span-level capture. PII scrubbing +# is downstream (collector + evaluator) — see botanu/tracking/llm.py:332-333. + +_CAPTURE_MAX_CHARS = 4096 + + +def _should_capture_content() -> bool: + """Single decision per workflow invocation — applied to both input + output + so we never land a half-captured pair.""" + try: + from botanu.sdk.bootstrap import get_config + from botanu.sampling.content_sampler import should_capture_content + + cfg = get_config() + rate = cfg.content_capture_rate if cfg else 0.0 + return should_capture_content(rate) + except Exception: + return False + + +def _serialize_for_capture(obj: Any) -> str: + """Best-effort stringification. JSON first, repr fallback, truncated.""" + try: + text = json.dumps(obj, default=repr, ensure_ascii=False) + except Exception: + try: + text = repr(obj) + except Exception: + text = "" + return text[:_CAPTURE_MAX_CHARS] + + +def _build_input_payload( + func: Callable[..., Any], args: tuple, kwargs: dict +) -> dict[str, Any]: + """Bind call args to parameter names. Falls back to positional if signature + binding fails (unusual — reflective calls, C-extension wrappers).""" + try: + sig = inspect.signature(func) + bound = sig.bind_partial(*args, **kwargs) + return dict(bound.arguments) + except Exception: + return {"args": list(args), "kwargs": dict(kwargs)} + + +def _capture_input(span: trace.Span, func: Callable[..., Any], args: tuple, kwargs: dict) -> None: + payload = _build_input_payload(func, args, kwargs) + span.set_attribute("botanu.eval.input_content", _serialize_for_capture(payload)) + + +def _capture_output(span: trace.Span, result: Any) -> None: + span.set_attribute("botanu.eval.output_content", _serialize_for_capture(result)) + + def botanu_workflow( name: str, *, @@ -152,9 +210,16 @@ async def async_wrapper(*args: Any, **kwargs: Any) -> T: ctx = otel_baggage.set_baggage(key, value, context=ctx) baggage_token = attach(ctx) + capture_content = _should_capture_content() + if capture_content: + _capture_input(span, func, args, kwargs) + try: result = await func(*args, **kwargs) + if capture_content: + _capture_output(span, result) + span_attrs = getattr(span, "attributes", None) existing_outcome = ( span_attrs.get("botanu.outcome.status") if isinstance(span_attrs, Mapping) else None @@ -216,9 +281,16 @@ def sync_wrapper(*args: Any, **kwargs: Any) -> T: ctx = otel_baggage.set_baggage(key, value, context=ctx) baggage_token = attach(ctx) + capture_content = _should_capture_content() + if capture_content: + _capture_input(span, func, args, kwargs) + try: result = func(*args, **kwargs) + if capture_content: + _capture_output(span, result) + span_attrs = getattr(span, "attributes", None) existing_outcome = ( span_attrs.get("botanu.outcome.status") if isinstance(span_attrs, Mapping) else None @@ -275,7 +347,9 @@ def _emit_run_completed( span.add_event("botanu.run.completed", attributes=event_attrs) - span.set_attribute("botanu.outcome.status", status.value) + # `botanu.outcome.status` no longer emitted (removed 2026-04-16): + # customer-reported outcome is trivially fakeable. Event outcome derives + # from eval verdict rollup / HITL / SoR. `duration_ms` stays for perf. span.set_attribute("botanu.run.duration_ms", duration_ms) diff --git a/src/botanu/sdk/span_helpers.py b/src/botanu/sdk/span_helpers.py index e698abd..042e8a6 100644 --- a/src/botanu/sdk/span_helpers.py +++ b/src/botanu/sdk/span_helpers.py @@ -9,18 +9,25 @@ from __future__ import annotations import logging +import warnings from typing import Optional from opentelemetry import trace -from botanu.sdk.context import get_baggage - logger = logging.getLogger(__name__) VALID_OUTCOME_STATUSES = { "success", "partial", "failed", "timeout", "canceled", "abandoned", } +_DEPRECATION_MSG = ( + "emit_outcome(status=...) no longer stamps `botanu.outcome.status` on the " + "span — customer-reported outcome has been removed (it was trivially " + "fakeable). Event outcome is now derived from eval verdict rollup / HITL / " + "SoR. You can remove this call, or keep it for the diagnostic fields " + "(reason, error_type, value_*, confidence, metadata) which still stamp." +) + def emit_outcome( status: str, @@ -32,28 +39,29 @@ def emit_outcome( error_type: Optional[str] = None, metadata: Optional[dict[str, str]] = None, ) -> None: - """Emit an outcome for the current span. + """Emit diagnostic outcome fields on the current span. (DEPRECATED for status.) + + The ``status`` argument no longer stamps ``botanu.outcome.status`` — + customer-reported outcome was removed on 2026-04-16 (trivially fakeable). + Event outcome is now derived from eval verdict rollup / HITL / SoR. - Sets span attributes for outcome tracking and ROI calculation. - Also emits an OTel log record to trigger collector flush. + All other fields (``value_type``, ``value_amount``, ``confidence``, + ``reason``, ``error_type``, ``metadata``) still stamp as diagnostic + attributes — useful for debugging and dashboards, not for authoritative + outcome determination. Args: - status: Outcome status. Must be one of ``"success"``, ``"partial"``, - ``"failed"``, ``"timeout"``, ``"canceled"``, ``"abandoned"``. + status: Accepted for backward compatibility. A ``DeprecationWarning`` + is emitted. Must still be one of the valid statuses for validation. value_type: Type of business value (e.g., ``"tickets_resolved"``). value_amount: Quantified value amount. confidence: Confidence score (0.0–1.0). - reason: Optional reason for the outcome. + reason: Optional diagnostic reason. error_type: Error classification (e.g., ``"ValidationError"``). - metadata: Additional key-value metadata to attach to the outcome. + metadata: Additional diagnostic key-value metadata. Raises: ValueError: If *status* is not a recognised outcome status. - - Example:: - - >>> emit_outcome("success", value_type="tickets_resolved", value_amount=1) - >>> emit_outcome("failed", error_type="TimeoutError", reason="LLM took >30s") """ if status not in VALID_OUTCOME_STATUSES: raise ValueError( @@ -61,9 +69,11 @@ def emit_outcome( f"Must be one of: {', '.join(sorted(VALID_OUTCOME_STATUSES))}" ) + warnings.warn(_DEPRECATION_MSG, DeprecationWarning, stacklevel=2) + span = trace.get_current_span() - span.set_attribute("botanu.outcome.status", status) + # `botanu.outcome.status` is NOT emitted — see deprecation notice. if value_type: span.set_attribute("botanu.outcome.value_type", value_type) @@ -84,7 +94,9 @@ def emit_outcome( for key, value in metadata.items(): span.set_attribute(f"botanu.outcome.metadata.{key}", value) - event_attrs: dict[str, object] = {"status": status} + # Keep the span event for diagnostic visibility (event, not authoritative), + # minus the `status` attribute to stay consistent with the removal. + event_attrs: dict[str, object] = {} if value_type: event_attrs["value_type"] = value_type if value_amount is not None: @@ -94,23 +106,10 @@ def emit_outcome( span.add_event("botanu.outcome_emitted", event_attrs) - # Emit OTel log record for collector flush trigger - event_id = get_baggage("botanu.event_id") - if event_id: - try: - from opentelemetry._logs import get_logger_provider - - logger_provider = get_logger_provider() - otel_logger = logger_provider.get_logger("botanu.outcome") - otel_logger.emit( - body=f"outcome:{status}", - attributes={ - "botanu.event_id": event_id, - "botanu.outcome.status": status, - }, - ) - except Exception: - pass # Don't break user's code if logs not configured + # OTel log emission for collector flush trigger has been removed: + # the collector's outcome-log flush trigger is being retired as part of + # the customer-push outcome deprecation. Events flush via idle timeout + # and max-lifetime triggers instead. def set_business_context( @@ -141,3 +140,78 @@ def set_business_context( if region: span.set_attribute("botanu.region", region) + + +# ── SoR correlation (Tier 1) ────────────────────────────────────────────── +# +# Links a Botanu event to a record in the customer's system of record so the +# sor-connector's OutcomeSignal (e.g. Zendesk ticket reopen, Stripe refund) +# can find the matching event. Tier-1 correlation writes a span attribute of +# the form `botanu.correlation._id` that the sor-connector reads in its +# normalizer. Confidence of Tier 1 matches is 1.0. +# +# Convention: pass keyword args named `_id` — the suffix is stripped so +# the stamped attribute is `botanu.correlation._id`. If the caller +# passes a key that doesn't end in `_id`, we stamp it verbatim and warn. +# +# Examples:: +# +# set_correlation(zendesk_ticket_id="T-123") +# set_correlation(stripe_charge_id="ch_1NAbcd", zendesk_ticket_id="T-123") +# set_correlation(sfdc_opportunity_id="0065g00000abcdef") + +_SUPPORTED_SOR_PREFIXES = frozenset({ + "zendesk", + "stripe", + "salesforce", + "sfdc", + "jira", + "servicenow", + "hubspot", + "intercom", + "freshdesk", + "zoho", + "front", +}) + + +def set_correlation(**correlations: Optional[str]) -> None: + """Stamp one or more `botanu.correlation.*` span attributes. + + Called inside a ``@botanu_workflow`` to link the current event to one or + more external SoR records. The sor-connector uses these attributes to + correlate inbound webhooks (ticket reopen, refund, etc.) back to this + event via Tier-1 correlation. + + Each keyword becomes a span attribute. A ``None`` or empty-string value + is dropped silently so it's safe to pass conditionally-set IDs. + + Args: + **correlations: keyword args like ``zendesk_ticket_id="T-123"``. + The key is stamped verbatim as ``botanu.correlation.``. + + Example:: + + @botanu_workflow("Support", event_id="evt-42", customer_id="acme") + def handle(ticket): + set_correlation(zendesk_ticket_id=ticket.id) + ... + """ + if not correlations: + return + + span = trace.get_current_span() + for key, value in correlations.items(): + if value is None or value == "": + continue + # Soft validation: warn on unfamiliar prefixes, still stamp. Customers + # may integrate with SoRs we don't yet have named support for. + prefix = key.split("_", 1)[0] + if prefix not in _SUPPORTED_SOR_PREFIXES: + logger.info( + "set_correlation: unfamiliar SoR prefix %r; stamping " + "botanu.correlation.%s anyway", + prefix, + key, + ) + span.set_attribute(f"botanu.correlation.{key}", str(value)) diff --git a/src/botanu/tracking/data.py b/src/botanu/tracking/data.py index 5a58f57..0c73195 100644 --- a/src/botanu/tracking/data.py +++ b/src/botanu/tracking/data.py @@ -184,6 +184,34 @@ def set_bytes_scanned(self, bytes_scanned: int) -> DBTracker: self.span.set_attribute("botanu.warehouse.bytes_scanned", bytes_scanned) return self + def set_bytes_transferred(self, *, sent: int = 0, received: int = 0) -> DBTracker: + if self.span: + self.span.set_attribute("botanu.bytes_transferred", int(sent) + int(received)) + return self + + def set_retrieval_content(self, text: str, max_chars: int = 4096) -> DBTracker: + """Capture retrieved content (for RAG eval). + + Writes the ``botanu.eval.retrieval_content`` span attribute only if + the active config's ``content_capture_rate`` > 0.0 allows this call. + Truncates to ``max_chars`` (default 4096) before stamping. + + PII scrubbing is handled downstream (collector + evaluator). + No-op when ``span`` is unset, ``text`` is empty/None, or the rate + excludes this call. + """ + if not self.span or not text: + return self + from botanu.sdk.bootstrap import get_config + from botanu.sampling.content_sampler import should_capture_content + + cfg = get_config() + rate = cfg.content_capture_rate if cfg else 0.0 + if not should_capture_content(rate): + return self + self.span.set_attribute("botanu.eval.retrieval_content", text[:max_chars]) + return self + def set_error(self, error: Exception) -> DBTracker: if self.span: self.span.set_status(Status(StatusCode.ERROR, str(error))) @@ -210,6 +238,7 @@ def track_db_operation( system: str, operation: str, database: Optional[str] = None, + cloud_provider: Optional[str] = None, **kwargs: Any, ) -> Generator[DBTracker, None, None]: """Track a database operation. @@ -218,6 +247,8 @@ def track_db_operation( system: Database system (postgresql, mysql, mongodb, …). operation: Type of operation (SELECT, INSERT, …). database: Database name (optional). + cloud_provider: Explicit cloud tag (``"aws"``/``"gcp"``/``"azure"``). + Overrides the inference done by :class:`ResourceEnricher`. """ tracer = trace.get_tracer("botanu.data") normalized_system = DB_SYSTEMS.get(system.lower(), system.lower()) @@ -231,6 +262,8 @@ def track_db_operation( span.set_attribute("botanu.vendor", normalized_system) if database: span.set_attribute("db.name", database) + if cloud_provider: + span.set_attribute("botanu.cloud_provider", cloud_provider.lower()) for key, value in kwargs.items(): span.set_attribute(f"botanu.data.{key}", value) @@ -285,6 +318,11 @@ def set_bucket(self, bucket: str) -> StorageTracker: self.span.set_attribute("botanu.storage.bucket", bucket) return self + def set_bytes_transferred(self, *, sent: int = 0, received: int = 0) -> StorageTracker: + if self.span: + self.span.set_attribute("botanu.bytes_transferred", int(sent) + int(received)) + return self + def set_error(self, error: Exception) -> StorageTracker: if self.span: self.span.set_status(Status(StatusCode.ERROR, str(error))) @@ -310,6 +348,7 @@ def _finalize(self) -> None: def track_storage_operation( system: str, operation: str, + cloud_provider: Optional[str] = None, **kwargs: Any, ) -> Generator[StorageTracker, None, None]: """Track a storage operation. @@ -317,6 +356,7 @@ def track_storage_operation( Args: system: Storage system (s3, gcs, azure_blob, …). operation: Type of operation (GET, PUT, DELETE, …). + cloud_provider: Explicit cloud tag. Overrides inference. """ tracer = trace.get_tracer("botanu.storage") normalized_system = STORAGE_SYSTEMS.get(system.lower(), system.lower()) @@ -328,6 +368,8 @@ def track_storage_operation( span.set_attribute("botanu.storage.system", normalized_system) span.set_attribute("botanu.storage.operation", operation.upper()) span.set_attribute("botanu.vendor", normalized_system) + if cloud_provider: + span.set_attribute("botanu.cloud_provider", cloud_provider.lower()) for key, value in kwargs.items(): span.set_attribute(f"botanu.storage.{key}", value) @@ -380,6 +422,11 @@ def set_error(self, error: Exception) -> MessagingTracker: self.span.record_exception(error) return self + def set_bytes_transferred(self, *, sent: int = 0, received: int = 0) -> MessagingTracker: + if self.span: + self.span.set_attribute("botanu.bytes_transferred", int(sent) + int(received)) + return self + def add_metadata(self, **kwargs: Any) -> MessagingTracker: if self.span: for key, value in kwargs.items(): @@ -399,6 +446,7 @@ def track_messaging_operation( system: str, operation: str, destination: str, + cloud_provider: Optional[str] = None, **kwargs: Any, ) -> Generator[MessagingTracker, None, None]: """Track a messaging operation. @@ -407,6 +455,7 @@ def track_messaging_operation( system: Messaging system (sqs, kafka, pubsub, …). operation: Type of operation (publish, consume, …). destination: Queue/topic name. + cloud_provider: Explicit cloud tag. Overrides inference. """ tracer = trace.get_tracer("botanu.messaging") normalized_system = MESSAGING_SYSTEMS.get(system.lower(), system.lower()) @@ -420,6 +469,8 @@ def track_messaging_operation( span.set_attribute("messaging.operation", operation.lower()) span.set_attribute("messaging.destination.name", destination) span.set_attribute("botanu.vendor", normalized_system) + if cloud_provider: + span.set_attribute("botanu.cloud_provider", cloud_provider.lower()) for key, value in kwargs.items(): span.set_attribute(f"botanu.messaging.{key}", value) diff --git a/src/botanu/tracking/llm.py b/src/botanu/tracking/llm.py index 67c0182..188be22 100644 --- a/src/botanu/tracking/llm.py +++ b/src/botanu/tracking/llm.py @@ -320,6 +320,51 @@ def set_attempt(self, attempt_number: int) -> LLMTracker: self.span.set_attribute(BotanuAttributes.ATTEMPT_NUMBER, attempt_number) return self + def set_input_content(self, text: str, max_chars: int = 4096) -> LLMTracker: + """Capture the prompt/input text for eval. + + Writes the ``botanu.eval.input_content`` span attribute only if the + active :class:`~botanu.sdk.config.BotanuConfig` has a + ``content_capture_rate`` > 0.0 that allows this call (simple + ``random.random() < rate`` gate). Truncates to ``max_chars`` + (default 4096) before stamping. + + PII scrubbing is handled downstream by the collector (regex pass) + and the evaluator (Presidio NER), not here. + + No-op when ``span`` is unset, ``text`` is empty/None, or the config + rate excludes this call. + """ + if not self.span or not text: + return self + from botanu.sdk.bootstrap import get_config + from botanu.sampling.content_sampler import should_capture_content + + cfg = get_config() + rate = cfg.content_capture_rate if cfg else 0.0 + if not should_capture_content(rate): + return self + self.span.set_attribute("botanu.eval.input_content", text[:max_chars]) + return self + + def set_output_content(self, text: str, max_chars: int = 4096) -> LLMTracker: + """Capture the response/output text for eval. + + See :meth:`set_input_content` for sampling and truncation semantics. + Writes the ``botanu.eval.output_content`` span attribute. + """ + if not self.span or not text: + return self + from botanu.sdk.bootstrap import get_config + from botanu.sampling.content_sampler import should_capture_content + + cfg = get_config() + rate = cfg.content_capture_rate if cfg else 0.0 + if not should_capture_content(rate): + return self + self.span.set_attribute("botanu.eval.output_content", text[:max_chars]) + return self + def set_request_params( self, temperature: Optional[float] = None, diff --git a/tests/unit/test_config.py b/tests/unit/test_config.py index 24ca995..4f3a955 100644 --- a/tests/unit/test_config.py +++ b/tests/unit/test_config.py @@ -367,6 +367,9 @@ def test_api_key_auto_header(self): assert config.otlp_headers == {"Authorization": "Bearer btnu_live_test"} def test_explicit_endpoint_overrides_api_key(self): + # SEC-C4 SSRF guard: a BOTANU_API_KEY paired with a non-botanu endpoint + # must NOT leak the key to that endpoint. The endpoint is honoured, but + # the Authorization header is withheld. with mock.patch.dict( os.environ, { @@ -376,9 +379,65 @@ def test_explicit_endpoint_overrides_api_key(self): ): config = BotanuConfig() assert config.otlp_endpoint == "http://custom:4318" - # Header is still set from API key + assert config.otlp_headers is None + + def test_api_key_attached_for_botanu_host(self): + with mock.patch.dict( + os.environ, + { + "BOTANU_API_KEY": "btnu_live_test", + "BOTANU_COLLECTOR_ENDPOINT": "https://ingest.botanu.ai", + }, + ): + config = BotanuConfig() + assert config.otlp_headers == {"Authorization": "Bearer btnu_live_test"} + + def test_api_key_attached_for_localhost(self): + with mock.patch.dict( + os.environ, + { + "BOTANU_API_KEY": "btnu_live_test", + "BOTANU_COLLECTOR_ENDPOINT": "http://localhost:4318", + }, + ): + config = BotanuConfig() assert config.otlp_headers == {"Authorization": "Bearer btnu_live_test"} + def test_api_key_withheld_from_otel_env_endpoint(self): + with mock.patch.dict( + os.environ, + { + "BOTANU_API_KEY": "btnu_live_test", + "OTEL_EXPORTER_OTLP_ENDPOINT": "https://attacker.example.com", + }, + ): + config = BotanuConfig() + assert config.otlp_endpoint == "https://attacker.example.com" + assert config.otlp_headers is None + + def test_url_embedded_credentials_stripped(self): + env = {k: v for k, v in os.environ.items()} + env.pop("BOTANU_API_KEY", None) + env.pop("BOTANU_COLLECTOR_ENDPOINT", None) + env["OTEL_EXPORTER_OTLP_ENDPOINT"] = "https://user:secret@example.com/ingest" + with mock.patch.dict(os.environ, env, clear=True): + config = BotanuConfig() + assert "secret" not in (config.otlp_endpoint or "") + assert "user" not in (config.otlp_endpoint or "") + + def test_repr_redacts_auth_header(self): + with mock.patch.dict( + os.environ, + { + "BOTANU_API_KEY": "btnu_live_supersecret", + "BOTANU_COLLECTOR_ENDPOINT": "https://ingest.botanu.ai", + }, + ): + config = BotanuConfig() + text = repr(config) + assert "btnu_live_supersecret" not in text + assert "***" in text + def test_no_api_key_localhost_default(self): env = {k: v for k, v in os.environ.items()} env.pop("BOTANU_API_KEY", None) @@ -402,4 +461,46 @@ def test_default_packages(self): assert "httpx" in packages assert "fastapi" in packages assert "openai_v2" in packages - assert "anthropic" in packages + + +class TestContentCaptureRate: + """Tests for the content_capture_rate field.""" + + def test_default_is_zero(self): + """Privacy-safe default: no content captured unless explicitly enabled.""" + with mock.patch.dict(os.environ, {}, clear=True): + os.environ.pop("BOTANU_CONTENT_CAPTURE_RATE", None) + config = BotanuConfig() + assert config.content_capture_rate == 0.0 + + def test_explicit_value_respected(self): + config = BotanuConfig(content_capture_rate=0.15) + assert config.content_capture_rate == 0.15 + + def test_env_var_override(self): + with mock.patch.dict(os.environ, {"BOTANU_CONTENT_CAPTURE_RATE": "0.2"}): + config = BotanuConfig() + assert config.content_capture_rate == 0.2 + + def test_env_var_clamps_to_one(self): + """Defensive: env values above 1.0 clamp to 1.0.""" + with mock.patch.dict(os.environ, {"BOTANU_CONTENT_CAPTURE_RATE": "1.5"}): + config = BotanuConfig() + assert config.content_capture_rate == 1.0 + + def test_env_var_clamps_to_zero(self): + """Defensive: negative env values clamp to 0.0.""" + with mock.patch.dict(os.environ, {"BOTANU_CONTENT_CAPTURE_RATE": "-0.5"}): + config = BotanuConfig() + assert config.content_capture_rate == 0.0 + + def test_env_var_invalid_ignored(self): + """Invalid env values are ignored (default retained).""" + with mock.patch.dict(os.environ, {"BOTANU_CONTENT_CAPTURE_RATE": "not_a_number"}): + config = BotanuConfig() + assert config.content_capture_rate == 0.0 + + def test_to_dict_roundtrip(self): + config = BotanuConfig(content_capture_rate=0.1) + d = config.to_dict() + assert d["eval"]["content_capture_rate"] == 0.1 diff --git a/tests/unit/test_content_sampler.py b/tests/unit/test_content_sampler.py new file mode 100644 index 0000000..bcd98b2 --- /dev/null +++ b/tests/unit/test_content_sampler.py @@ -0,0 +1,52 @@ +# SPDX-FileCopyrightText: 2026 The Botanu Authors +# SPDX-License-Identifier: Apache-2.0 + +"""Tests for botanu.sampling.content_sampler.""" + +from __future__ import annotations + +import random + +from botanu.sampling.content_sampler import should_capture_content + + +class TestShouldCaptureContent: + def test_rate_zero_returns_false(self): + """rate=0.0 must never capture.""" + for _ in range(100): + assert should_capture_content(0.0) is False + + def test_rate_negative_returns_false(self): + """Negative rates (defensive) must never capture.""" + assert should_capture_content(-0.1) is False + assert should_capture_content(-1.0) is False + + def test_rate_one_returns_true(self): + """rate=1.0 must always capture.""" + for _ in range(100): + assert should_capture_content(1.0) is True + + def test_rate_above_one_returns_true(self): + """Rates above 1.0 (defensive) must always capture.""" + assert should_capture_content(1.5) is True + assert should_capture_content(2.0) is True + + def test_rate_half_approx_half(self): + """rate=0.5 must capture roughly half the time (seeded RNG).""" + random.seed(42) + results = [should_capture_content(0.5) for _ in range(10_000)] + captured = sum(results) + # Generous tolerance: 10000 trials with p=0.5, stddev=50, expect ~5000±150 + assert 4700 < captured < 5300, f"expected ~5000 captures, got {captured}" + + def test_event_id_argument_accepted(self): + """event_id is accepted but currently unused (MVP behaviour).""" + # Should not raise + should_capture_content(0.0, event_id="evt_abc") + should_capture_content(1.0, event_id="evt_xyz") + + def test_event_id_none_default(self): + """event_id defaults to None.""" + # Should not raise, should behave identically to omitting + assert should_capture_content(0.0, None) is False + assert should_capture_content(1.0, None) is True diff --git a/tests/unit/test_data_tracking.py b/tests/unit/test_data_tracking.py index 6d0f003..7fd2cef 100644 --- a/tests/unit/test_data_tracking.py +++ b/tests/unit/test_data_tracking.py @@ -471,3 +471,84 @@ def test_messaging_operation_kwargs(self, memory_exporter): spans = memory_exporter.get_finished_spans() attrs = dict(spans[0].attributes) assert attrs["botanu.messaging.partition_key"] == "order-1" + + +class TestRetrievalContentCapture: + """DBTracker.set_retrieval_content — gated by content_capture_rate.""" + + def _with_rate(self, rate: float): + from contextlib import contextmanager + + from botanu.sdk import bootstrap + from botanu.sdk.config import BotanuConfig + + @contextmanager + def _cm(): + prev = bootstrap._current_config + bootstrap._current_config = BotanuConfig(content_capture_rate=rate) + try: + yield + finally: + bootstrap._current_config = prev + + return _cm() + + def test_retrieval_content_namespaced_attr_when_rate_one(self, memory_exporter): + with self._with_rate(1.0): + with track_db_operation( + system="postgresql", + operation=DBOperation.SELECT, + database="kb", + ) as tracker: + tracker.set_retrieval_content( + "Document snippet: botanu measures cost per outcome..." + ) + + attrs = dict(memory_exporter.get_finished_spans()[0].attributes) + assert attrs["botanu.eval.retrieval_content"].startswith("Document snippet:") + + def test_rate_zero_does_not_stamp_attr(self, memory_exporter): + with self._with_rate(0.0): + with track_db_operation( + system="postgresql", + operation=DBOperation.SELECT, + database="kb", + ) as tracker: + tracker.set_retrieval_content("sensitive retrieved text") + + attrs = dict(memory_exporter.get_finished_spans()[0].attributes) + assert "botanu.eval.retrieval_content" not in attrs + + def test_truncation_to_max_chars(self, memory_exporter): + with self._with_rate(1.0): + with track_db_operation( + system="postgresql", + operation=DBOperation.SELECT, + database="kb", + ) as tracker: + tracker.set_retrieval_content("z" * 5000, max_chars=4096) + + attrs = dict(memory_exporter.get_finished_spans()[0].attributes) + assert len(attrs["botanu.eval.retrieval_content"]) == 4096 + + def test_empty_string_no_op(self, memory_exporter): + with self._with_rate(1.0): + with track_db_operation( + system="postgresql", + operation=DBOperation.SELECT, + database="kb", + ) as tracker: + tracker.set_retrieval_content("") + + attrs = dict(memory_exporter.get_finished_spans()[0].attributes) + assert "botanu.eval.retrieval_content" not in attrs + + def test_returns_self_for_chaining(self, memory_exporter): + with self._with_rate(1.0): + with track_db_operation( + system="postgresql", + operation=DBOperation.SELECT, + database="kb", + ) as tracker: + result = tracker.set_retrieval_content("doc").set_table("docs") + assert result is tracker diff --git a/tests/unit/test_decorators.py b/tests/unit/test_decorators.py index e0676d4..a3069fa 100644 --- a/tests/unit/test_decorators.py +++ b/tests/unit/test_decorators.py @@ -160,7 +160,11 @@ def raises(): with pytest.raises(TypeError, match="bad type"): raises() - def test_outcome_status_set_on_success(self, memory_exporter): + def test_outcome_status_not_emitted_on_success(self, memory_exporter): + """`botanu.outcome.status` is no longer emitted (removed 2026-04-16) — + customer-reported outcome is trivially fakeable. Event outcome is + derived from eval verdict rollup / HITL / SoR instead.""" + @botanu_workflow("Test", event_id="evt-1", customer_id="cust-1") def my_fn(): return "ok" @@ -168,9 +172,11 @@ def my_fn(): my_fn() spans = memory_exporter.get_finished_spans() attrs = dict(spans[0].attributes) - assert attrs["botanu.outcome.status"] == "success" + assert "botanu.outcome.status" not in attrs + + def test_outcome_status_not_emitted_on_failure(self, memory_exporter): + """Same removal applies on the failure path.""" - def test_outcome_status_set_on_failure(self, memory_exporter): @botanu_workflow("Test", event_id="evt-1", customer_id="cust-1") def failing(): raise RuntimeError("boom") @@ -180,7 +186,7 @@ def failing(): spans = memory_exporter.get_finished_spans() attrs = dict(spans[0].attributes) - assert attrs["botanu.outcome.status"] == "failure" + assert "botanu.outcome.status" not in attrs def test_duration_ms_recorded(self, memory_exporter): @botanu_workflow("Test", event_id="evt-1", customer_id="cust-1") @@ -298,6 +304,99 @@ def my_fn(): my_fn() +class TestBotanuWorkflowContentCapture: + """Tests for @botanu_workflow content capture into botanu.eval.* attrs.""" + + def test_no_capture_when_rate_is_zero(self, memory_exporter, monkeypatch): + # Default rate=0.0 → nothing captured. + @botanu_workflow("Triage", event_id="ticket-1", customer_id="acme") + def handle(ticket_id: str, priority: int = 1) -> dict: + return {"status": "resolved", "ticket_id": ticket_id} + + handle("ticket-1", priority=3) + + attrs = dict(memory_exporter.get_finished_spans()[0].attributes) + assert "botanu.eval.input_content" not in attrs + assert "botanu.eval.output_content" not in attrs + + def test_captures_input_and_output_when_enabled(self, memory_exporter, monkeypatch): + # Force the capture gate on — bypass the random sampler for determinism. + monkeypatch.setattr( + "botanu.sdk.decorators._should_capture_content", lambda: True + ) + + @botanu_workflow("Triage", event_id="ticket-2", customer_id="acme") + def handle(ticket_id: str, priority: int = 1) -> dict: + return {"status": "resolved", "ticket_id": ticket_id} + + handle("ticket-2", priority=5) + + attrs = dict(memory_exporter.get_finished_spans()[0].attributes) + assert "botanu.eval.input_content" in attrs + assert "botanu.eval.output_content" in attrs + + # Input payload keys are the function's parameter names, not "args"/"kwargs" + assert "ticket_id" in attrs["botanu.eval.input_content"] + assert "priority" in attrs["botanu.eval.input_content"] + assert "5" in attrs["botanu.eval.input_content"] + + assert "resolved" in attrs["botanu.eval.output_content"] + assert "ticket-2" in attrs["botanu.eval.output_content"] + + def test_capture_truncates_large_output(self, memory_exporter, monkeypatch): + monkeypatch.setattr( + "botanu.sdk.decorators._should_capture_content", lambda: True + ) + + @botanu_workflow("Bulk", event_id="evt-3", customer_id="acme") + def handle() -> str: + return "x" * 10_000 + + handle() + + attrs = dict(memory_exporter.get_finished_spans()[0].attributes) + captured = attrs["botanu.eval.output_content"] + assert len(captured) <= 4096 + len('""') # 4096 content chars + JSON quotes + + def test_capture_survives_unserializable_args(self, memory_exporter, monkeypatch): + monkeypatch.setattr( + "botanu.sdk.decorators._should_capture_content", lambda: True + ) + + class Opaque: + def __repr__(self) -> str: + return "" + + @botanu_workflow("Weird", event_id="evt-4", customer_id="acme") + def handle(obj) -> str: + return "ok" + + handle(Opaque()) + + attrs = dict(memory_exporter.get_finished_spans()[0].attributes) + # Should not raise; should contain the repr of Opaque + assert "botanu.eval.input_content" in attrs + assert "Opaque" in attrs["botanu.eval.input_content"] + + def test_input_captured_even_if_function_raises(self, memory_exporter, monkeypatch): + """Input is captured BEFORE the call; output is not captured on exception.""" + monkeypatch.setattr( + "botanu.sdk.decorators._should_capture_content", lambda: True + ) + + @botanu_workflow("Fails", event_id="evt-5", customer_id="acme") + def handle(x: int) -> int: + raise RuntimeError("boom") + + with pytest.raises(RuntimeError): + handle(42) + + attrs = dict(memory_exporter.get_finished_spans()[0].attributes) + assert "botanu.eval.input_content" in attrs + assert "42" in attrs["botanu.eval.input_content"] + assert "botanu.eval.output_content" not in attrs + + class TestBotanuOutcomeDecorator: """Tests for @botanu_outcome decorator.""" diff --git a/tests/unit/test_llm_tracking.py b/tests/unit/test_llm_tracking.py index 1b6ed68..bbcb97c 100644 --- a/tests/unit/test_llm_tracking.py +++ b/tests/unit/test_llm_tracking.py @@ -535,3 +535,102 @@ def test_custom_kwargs(self, memory_exporter): spans = memory_exporter.get_finished_spans() attrs = dict(spans[0].attributes) assert attrs["botanu.deployment_id"] == "dep-001" + + +class TestContentCapture: + """set_input_content / set_output_content — gated by content_capture_rate.""" + + def _with_rate(self, rate: float): + """Return a context manager that temporarily sets the active config rate.""" + from contextlib import contextmanager + + from botanu.sdk import bootstrap + from botanu.sdk.config import BotanuConfig + + @contextmanager + def _cm(): + prev = bootstrap._current_config + bootstrap._current_config = BotanuConfig(content_capture_rate=rate) + try: + yield + finally: + bootstrap._current_config = prev + + return _cm() + + def test_input_content_namespaced_attr_when_rate_one(self, memory_exporter): + with self._with_rate(1.0): + with track_llm_call(model="gpt-4", vendor="openai") as tracker: + tracker.set_input_content("Hello, what is 2+2?") + + attrs = dict(memory_exporter.get_finished_spans()[0].attributes) + assert attrs["botanu.eval.input_content"] == "Hello, what is 2+2?" + + def test_output_content_namespaced_attr_when_rate_one(self, memory_exporter): + with self._with_rate(1.0): + with track_llm_call(model="gpt-4", vendor="openai") as tracker: + tracker.set_output_content("2+2 equals 4.") + + attrs = dict(memory_exporter.get_finished_spans()[0].attributes) + assert attrs["botanu.eval.output_content"] == "2+2 equals 4." + + def test_rate_zero_does_not_stamp_attr(self, memory_exporter): + with self._with_rate(0.0): + with track_llm_call(model="gpt-4", vendor="openai") as tracker: + tracker.set_input_content("sensitive prompt") + tracker.set_output_content("sensitive response") + + attrs = dict(memory_exporter.get_finished_spans()[0].attributes) + assert "botanu.eval.input_content" not in attrs + assert "botanu.eval.output_content" not in attrs + + def test_default_rate_zero_no_config(self, memory_exporter): + """With no active config, default is no-capture (safe default).""" + from botanu.sdk import bootstrap + + prev = bootstrap._current_config + bootstrap._current_config = None + try: + with track_llm_call(model="gpt-4", vendor="openai") as tracker: + tracker.set_input_content("prompt") + tracker.set_output_content("response") + finally: + bootstrap._current_config = prev + + attrs = dict(memory_exporter.get_finished_spans()[0].attributes) + assert "botanu.eval.input_content" not in attrs + assert "botanu.eval.output_content" not in attrs + + def test_truncation_to_max_chars(self, memory_exporter): + with self._with_rate(1.0): + with track_llm_call(model="gpt-4", vendor="openai") as tracker: + tracker.set_input_content("x" * 5000, max_chars=4096) + tracker.set_output_content("y" * 5000, max_chars=4096) + + attrs = dict(memory_exporter.get_finished_spans()[0].attributes) + assert len(attrs["botanu.eval.input_content"]) == 4096 + assert len(attrs["botanu.eval.output_content"]) == 4096 + + def test_custom_max_chars(self, memory_exporter): + with self._with_rate(1.0): + with track_llm_call(model="gpt-4", vendor="openai") as tracker: + tracker.set_input_content("abcdefghij", max_chars=4) + + attrs = dict(memory_exporter.get_finished_spans()[0].attributes) + assert attrs["botanu.eval.input_content"] == "abcd" + + def test_empty_string_no_op(self, memory_exporter): + with self._with_rate(1.0): + with track_llm_call(model="gpt-4", vendor="openai") as tracker: + tracker.set_input_content("") + tracker.set_output_content("") + + attrs = dict(memory_exporter.get_finished_spans()[0].attributes) + assert "botanu.eval.input_content" not in attrs + assert "botanu.eval.output_content" not in attrs + + def test_returns_self_for_chaining(self, memory_exporter): + with self._with_rate(1.0): + with track_llm_call(model="gpt-4", vendor="openai") as tracker: + result = tracker.set_input_content("hi").set_output_content("hello") + assert result is tracker diff --git a/tests/unit/test_resource_enricher.py b/tests/unit/test_resource_enricher.py new file mode 100644 index 0000000..f4315ca --- /dev/null +++ b/tests/unit/test_resource_enricher.py @@ -0,0 +1,214 @@ +# SPDX-FileCopyrightText: 2026 The Botanu Authors +# SPDX-License-Identifier: Apache-2.0 + +"""Tests for ResourceEnricher + set_bytes_transferred + cloud_provider kwarg. + +These exercise the Phase C wiring that makes non-LLM spans actually price +above $0 in the cost worker. Without this path, every S3 PUT, DynamoDB op, +and egress byte lands in cost_infra_usd=0. +""" + +from __future__ import annotations + +from unittest.mock import MagicMock + +import pytest + +from botanu.processors.resource_enricher import ( + ResourceEnricher, + _infer_bytes_transferred, + _infer_cloud_provider, +) + + +def _readable_span(attrs: dict) -> MagicMock: + """Stand-in for a ReadableSpan. ResourceEnricher only reads `.attributes` + and calls `.set_attribute`, both of which are easy to mock.""" + span = MagicMock() + span.attributes = dict(attrs) + written: dict = {} + + def _set(key, value): + written[key] = value + + span.set_attribute = MagicMock(side_effect=_set) + span.written = written + return span + + +class TestCloudProviderInference: + def test_explicit_cloud_provider_wins(self): + assert _infer_cloud_provider({"cloud.provider": "AWS"}) == "aws" + + def test_aws_service_attr_infers_aws(self): + assert _infer_cloud_provider({"aws.service": "DynamoDB"}) == "aws" + + def test_aws_rpc_system(self): + assert _infer_cloud_provider({"rpc.system": "aws-api"}) == "aws" + + def test_gcp_service_attr_infers_gcp(self): + assert _infer_cloud_provider({"gcp.project_id": "my-proj"}) == "gcp" + + def test_azure_namespace_attr_infers_azure(self): + assert _infer_cloud_provider({"azure.namespace": "Microsoft.Storage"}) == "azure" + + def test_db_system_dynamodb_infers_aws(self): + assert _infer_cloud_provider({"db.system": "dynamodb"}) == "aws" + + def test_storage_system_s3_infers_aws(self): + assert _infer_cloud_provider({"botanu.storage.system": "s3"}) == "aws" + + def test_messaging_system_pubsub_infers_gcp(self): + assert _infer_cloud_provider({"messaging.system": "pubsub"}) == "gcp" + + def test_unknown_system_returns_none(self): + assert _infer_cloud_provider({"db.system": "postgresql"}) is None + + def test_empty_attrs_returns_none(self): + assert _infer_cloud_provider({}) is None + + +class TestBytesTransferredInference: + def test_http_request_and_response_summed(self): + assert _infer_bytes_transferred( + {"http.request.body.size": 100, "http.response.body.size": 250} + ) == 350 + + def test_http_request_only(self): + assert _infer_bytes_transferred({"http.request.body.size": 100}) == 100 + + def test_botanu_data_bytes_read_fallback(self): + # Fallback path: no http.* but DBTracker populated bytes_read + assert _infer_bytes_transferred({"botanu.data.bytes_read": 512}) == 512 + + def test_messaging_bytes_transferred_fallback(self): + assert _infer_bytes_transferred({"botanu.messaging.bytes_transferred": 42}) == 42 + + def test_no_bytes_attrs_returns_none(self): + assert _infer_bytes_transferred({}) is None + assert _infer_bytes_transferred({"db.system": "postgresql"}) is None + + def test_http_preferred_over_fallback(self): + """When both http.* and botanu.data.* are present, use http.* only — + otherwise we'd double-count.""" + attrs = { + "http.request.body.size": 100, + "http.response.body.size": 200, + "botanu.data.bytes_read": 999, + } + assert _infer_bytes_transferred(attrs) == 300 + + +class TestResourceEnricherOnEnd: + def test_writes_inferred_cloud_provider_and_bytes(self): + enricher = ResourceEnricher() + span = _readable_span( + { + "db.system": "dynamodb", + "http.request.body.size": 100, + "http.response.body.size": 200, + } + ) + enricher.on_end(span) + assert span.written == { + "botanu.cloud_provider": "aws", + "botanu.bytes_transferred": 300, + } + + def test_does_not_overwrite_explicit_attrs(self): + """Explicit set_bytes_transferred / cloud_provider= kwarg must win.""" + enricher = ResourceEnricher() + span = _readable_span( + { + "db.system": "dynamodb", + "http.response.body.size": 200, + "botanu.cloud_provider": "azure", # customer set this explicitly + "botanu.bytes_transferred": 999, + } + ) + enricher.on_end(span) + # Neither attribute should be overwritten + assert span.written == {} + + def test_skips_llm_spans(self): + """LLM spans price via token counts, not bytes. Writing bytes here + would pollute cost_infra_usd.""" + enricher = ResourceEnricher() + span = _readable_span( + { + "gen_ai.request.model": "claude-opus-4-6", + "http.request.body.size": 100, + } + ) + enricher.on_end(span) + assert span.written == {} + + def test_no_write_when_nothing_inferable(self): + enricher = ResourceEnricher() + span = _readable_span({"http.method": "GET"}) + enricher.on_end(span) + assert span.written == {} + + def test_writes_cloud_only_when_bytes_unknown(self): + enricher = ResourceEnricher() + span = _readable_span({"db.system": "dynamodb"}) + enricher.on_end(span) + assert span.written == {"botanu.cloud_provider": "aws"} + + def test_on_start_is_noop(self): + """on_start runs before HTTP response size is known; do nothing there.""" + enricher = ResourceEnricher() + span = MagicMock() + span.set_attribute = MagicMock() + enricher.on_start(span, None) + span.set_attribute.assert_not_called() + + +class TestTrackerExplicitAPI: + def test_db_set_bytes_transferred_sets_combined_attr(self): + from botanu.tracking.data import DBTracker + + span = MagicMock() + tracker = DBTracker(system="postgresql", operation="SELECT", span=span) + tracker.set_bytes_transferred(sent=100, received=200) + span.set_attribute.assert_called_with("botanu.bytes_transferred", 300) + + def test_storage_set_bytes_transferred(self): + from botanu.tracking.data import StorageTracker + + span = MagicMock() + tracker = StorageTracker(system="s3", operation="PUT", span=span) + tracker.set_bytes_transferred(received=1024) + span.set_attribute.assert_called_with("botanu.bytes_transferred", 1024) + + def test_messaging_set_bytes_transferred(self): + from botanu.tracking.data import MessagingTracker + + span = MagicMock() + tracker = MessagingTracker( + system="sqs", operation="send", destination="q", span=span + ) + tracker.set_bytes_transferred(sent=42) + span.set_attribute.assert_called_with("botanu.bytes_transferred", 42) + + @pytest.mark.asyncio + async def test_db_cloud_provider_kwarg_sets_attr(self): + from botanu.tracking.data import track_db_operation + + with track_db_operation("postgresql", "SELECT", cloud_provider="aws"): + pass + # Success if the context manager accepted the kwarg without TypeError. + + +class TestConfigAutoInstrumentResources: + def test_default_is_on(self): + from botanu.sdk.config import BotanuConfig + + cfg = BotanuConfig() + assert cfg.auto_instrument_resources is True + + def test_can_be_disabled(self): + from botanu.sdk.config import BotanuConfig + + cfg = BotanuConfig(auto_instrument_resources=False) + assert cfg.auto_instrument_resources is False diff --git a/tests/unit/test_run_context.py b/tests/unit/test_run_context.py index 038137e..f20a68d 100644 --- a/tests/unit/test_run_context.py +++ b/tests/unit/test_run_context.py @@ -196,6 +196,21 @@ def test_to_span_attributes(self): assert attrs["botanu.customer_id"] == "bigretail" assert attrs["botanu.tenant_id"] == "tenant-123" + def test_to_span_attributes_omits_outcome_status(self): + """`botanu.outcome.status` is no longer emitted (removed 2026-04-16). + Other outcome diagnostic fields still stamp.""" + ctx = RunContext.create( + workflow="Customer Support", + event_id="ticket-42", + customer_id="bigretail", + ) + ctx.complete(status=RunStatus.SUCCESS, value_type="tickets", value_amount=1.0) + attrs = ctx.to_span_attributes() + + assert "botanu.outcome.status" not in attrs + assert attrs.get("botanu.outcome.value_type") == "tickets" + assert attrs.get("botanu.outcome.value_amount") == 1.0 + def test_from_baggage_roundtrip(self): original = RunContext.create( workflow="test", diff --git a/tests/unit/test_span_helpers.py b/tests/unit/test_span_helpers.py index de8cc9b..85d48b1 100644 --- a/tests/unit/test_span_helpers.py +++ b/tests/unit/test_span_helpers.py @@ -5,107 +5,90 @@ from __future__ import annotations -from opentelemetry import baggage, context, trace +import pytest +from opentelemetry import trace -from botanu.sdk.span_helpers import emit_outcome, set_business_context +from botanu.sdk.span_helpers import emit_outcome, set_business_context, set_correlation class TestEmitOutcome: - """Tests for emit_outcome function.""" + """emit_outcome is deprecated as an outcome-status signal but retained for + diagnostic fields. Status is validated but no longer stamped on the span. + """ - def test_emit_success_outcome(self, memory_exporter): + def test_emit_outcome_does_not_stamp_status(self, memory_exporter): + """The status argument is validated but NOT emitted as + `botanu.outcome.status` — removed 2026-04-16.""" tracer = trace.get_tracer("test") - with tracer.start_as_current_span("test-span"): - emit_outcome("success") + with pytest.warns(DeprecationWarning, match="trivially fakeable"): + with tracer.start_as_current_span("test-span"): + emit_outcome("success") spans = memory_exporter.get_finished_spans() attrs = dict(spans[0].attributes) - assert attrs.get("botanu.outcome.status") == "success" + assert "botanu.outcome.status" not in attrs - def test_emit_failure_outcome(self, memory_exporter): + def test_emit_outcome_emits_diagnostic_fields(self, memory_exporter): + """Diagnostic fields still stamp (reason, error_type, value_*, confidence).""" tracer = trace.get_tracer("test") - with tracer.start_as_current_span("test-span"): - emit_outcome("failed", reason="timeout") + with pytest.warns(DeprecationWarning): + with tracer.start_as_current_span("test-span"): + emit_outcome( + "failed", + reason="timeout", + error_type="TimeoutError", + value_type="tickets_resolved", + value_amount=5.0, + confidence=0.95, + ) spans = memory_exporter.get_finished_spans() attrs = dict(spans[0].attributes) - assert attrs.get("botanu.outcome.status") == "failed" assert attrs.get("botanu.outcome.reason") == "timeout" - - def test_emit_outcome_with_value(self, memory_exporter): - tracer = trace.get_tracer("test") - with tracer.start_as_current_span("test-span"): - emit_outcome( - "success", - value_type="tickets_resolved", - value_amount=5.0, - ) - - spans = memory_exporter.get_finished_spans() - attrs = dict(spans[0].attributes) - assert attrs.get("botanu.outcome.status") == "success" + assert attrs.get("botanu.outcome.error_type") == "TimeoutError" assert attrs.get("botanu.outcome.value_type") == "tickets_resolved" assert attrs.get("botanu.outcome.value_amount") == 5.0 + assert attrs.get("botanu.outcome.confidence") == 0.95 + # Still NOT stamping status + assert "botanu.outcome.status" not in attrs - def test_emit_outcome_with_confidence(self, memory_exporter): + def test_emit_outcome_raises_on_invalid_status(self, memory_exporter): + """Status validation retained for backward compatibility.""" tracer = trace.get_tracer("test") with tracer.start_as_current_span("test-span"): - emit_outcome("success", confidence=0.95) - - spans = memory_exporter.get_finished_spans() - attrs = dict(spans[0].attributes) - assert attrs.get("botanu.outcome.confidence") == 0.95 + with pytest.raises(ValueError, match="Invalid outcome status"): + emit_outcome("not_a_real_status") - def test_emit_outcome_adds_event(self, memory_exporter): + def test_emit_outcome_event_no_status_attr(self, memory_exporter): + """The `botanu.outcome_emitted` span event still fires for diagnostics + but does NOT carry `status` in its attributes.""" tracer = trace.get_tracer("test") - with tracer.start_as_current_span("test-span"): - emit_outcome("success", value_type="orders", value_amount=1) + with pytest.warns(DeprecationWarning): + with tracer.start_as_current_span("test-span"): + emit_outcome("success", value_type="orders", value_amount=1) spans = memory_exporter.get_finished_spans() events = [e for e in spans[0].events if e.name == "botanu.outcome_emitted"] assert len(events) == 1 - assert events[0].attributes["status"] == "success" - - def test_emit_outcome_emits_log_record(self, memory_exporter, log_exporter): - """emit_outcome should emit an OTel log record when event_id is in baggage.""" - tracer = trace.get_tracer("test") - - # Set up baggage with event_id - ctx = context.Context() - ctx = baggage.set_baggage("botanu.event_id", "ticket-42", context=ctx) - token = context.attach(ctx) - - try: - with tracer.start_as_current_span("test-span"): - emit_outcome("success") - finally: - context.detach(token) - - # Verify log record was emitted - logs = log_exporter.get_finished_logs() - assert len(logs) >= 1 + assert "status" not in dict(events[0].attributes) - log = logs[0] - assert log.log_record.body == "outcome:success" - assert log.log_record.attributes["botanu.event_id"] == "ticket-42" - assert log.log_record.attributes["botanu.outcome.status"] == "success" + def test_emit_outcome_no_log_record(self, memory_exporter, log_exporter): + """The OTel log record path has been removed — no collector flush + trigger from emit_outcome any more (customer-push outcome deprecated).""" + from opentelemetry import baggage, context - def test_emit_outcome_no_log_without_event_id(self, memory_exporter, log_exporter): - """emit_outcome should NOT emit a log record when no event_id in baggage.""" tracer = trace.get_tracer("test") - - # No baggage set - use clean context - ctx = context.Context() + ctx = baggage.set_baggage("botanu.event_id", "ticket-42", context=context.Context()) token = context.attach(ctx) - try: - with tracer.start_as_current_span("test-span"): - emit_outcome("success") + with pytest.warns(DeprecationWarning): + with tracer.start_as_current_span("test-span"): + emit_outcome("success") finally: context.detach(token) - # No log records should be emitted logs = log_exporter.get_finished_logs() + # Event_id is set but the log emission is gone assert len(logs) == 0 @@ -164,3 +147,75 @@ def test_set_multiple_contexts(self, memory_exporter): assert attrs.get("botanu.team") == "support" assert attrs.get("botanu.cost_center") == "CC-456" assert attrs.get("botanu.region") == "eu-central-1" + + +class TestSetCorrelation: + """set_correlation stamps botanu.correlation.* for SoR Tier-1 matching.""" + + def test_stamps_zendesk_ticket_id(self, memory_exporter): + tracer = trace.get_tracer("test") + with tracer.start_as_current_span("test-span"): + set_correlation(zendesk_ticket_id="T-123") + + attrs = dict(memory_exporter.get_finished_spans()[0].attributes) + assert attrs["botanu.correlation.zendesk_ticket_id"] == "T-123" + + def test_stamps_multiple_sor_ids(self, memory_exporter): + tracer = trace.get_tracer("test") + with tracer.start_as_current_span("test-span"): + set_correlation( + zendesk_ticket_id="T-1", + stripe_charge_id="ch_abc", + sfdc_opportunity_id="006000", + ) + + attrs = dict(memory_exporter.get_finished_spans()[0].attributes) + assert attrs["botanu.correlation.zendesk_ticket_id"] == "T-1" + assert attrs["botanu.correlation.stripe_charge_id"] == "ch_abc" + assert attrs["botanu.correlation.sfdc_opportunity_id"] == "006000" + + def test_drops_none_and_empty(self, memory_exporter): + tracer = trace.get_tracer("test") + with tracer.start_as_current_span("test-span"): + set_correlation( + zendesk_ticket_id="T-1", + stripe_charge_id=None, + hubspot_deal_id="", + ) + + attrs = dict(memory_exporter.get_finished_spans()[0].attributes) + assert "botanu.correlation.zendesk_ticket_id" in attrs + assert "botanu.correlation.stripe_charge_id" not in attrs + assert "botanu.correlation.hubspot_deal_id" not in attrs + + def test_coerces_non_string_to_string(self, memory_exporter): + """A numeric SoR ID (e.g., integer ticket number) should stamp as string.""" + tracer = trace.get_tracer("test") + with tracer.start_as_current_span("test-span"): + set_correlation(zendesk_ticket_id=42) + + attrs = dict(memory_exporter.get_finished_spans()[0].attributes) + assert attrs["botanu.correlation.zendesk_ticket_id"] == "42" + + def test_unfamiliar_prefix_still_stamps(self, memory_exporter, caplog): + """Unknown SoR prefix logs info but still writes the attribute — + customers may integrate with SoRs we haven't explicitly named.""" + import logging + + tracer = trace.get_tracer("test") + with caplog.at_level(logging.INFO, logger="botanu.sdk.span_helpers"): + with tracer.start_as_current_span("test-span"): + set_correlation(acme_ticket_id="A-999") + + attrs = dict(memory_exporter.get_finished_spans()[0].attributes) + assert attrs["botanu.correlation.acme_ticket_id"] == "A-999" + assert any("unfamiliar SoR prefix" in r.message for r in caplog.records) + + def test_no_args_is_noop(self, memory_exporter): + tracer = trace.get_tracer("test") + with tracer.start_as_current_span("test-span"): + set_correlation() + + attrs = dict(memory_exporter.get_finished_spans()[0].attributes) + correlation_attrs = [k for k in attrs if k.startswith("botanu.correlation.")] + assert correlation_attrs == [] From aec5b962c18a7eac2ce8ac44cb2cb7c55d099935 Mon Sep 17 00:00:00 2001 From: Deborah Jacob Date: Sun, 19 Apr 2026 09:33:02 -0700 Subject: [PATCH 4/4] docs: consolidate CHANGELOG into [Unreleased] No 0.1.0 tag was ever cut, so the dated 2026-02-05 entry was a fabrication. Collapse initial-release bullets into [Unreleased], drop the inaccurate "Lean mode (default)" line and stale extras list, and update the comparison link to point at main until the first real tag lands. Co-Authored-By: Claude Opus 4.7 (1M context) --- CHANGELOG.md | 120 ++++++++++++++++++++++++--------------------------- 1 file changed, 56 insertions(+), 64 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5b6c618..a0e6c57 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,69 +7,61 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] -## [0.1.0] - 2026-02-05 - ### Added -- Initial open-source release under Apache-2.0 license -- **Core SDK** - - `enable()` / `disable()` bootstrap functions for SDK initialization - - `@botanu_workflow` decorator with UUIDv7 run_id generation - - `@botanu_outcome` decorator for sub-function outcome tracking - - `emit_outcome()` helper for recording business outcomes - - `set_business_context()` for cost attribution dimensions - - `RunContextEnricher` span processor for automatic run_id propagation - -- **LLM Tracking** (aligned with OTel GenAI semantic conventions) - - `track_llm_call()` context manager for LLM/model operations - - `track_tool_call()` context manager for tool/function calls - - Token usage tracking (input, output, cached) - - Provider normalization for 15+ LLM providers - - Support for all GenAI operations (chat, embeddings, etc.) - -- **Data Tracking** - - `track_db_operation()` for database operations - - `track_storage_operation()` for object storage (S3, GCS, Azure Blob) - - `track_messaging_operation()` for message queues (SQS, Kafka, Pub/Sub) - - System normalization for 30+ database/storage systems - -- **Context Propagation** - - W3C Baggage propagation for cross-service run_id correlation - - Lean mode (default) and full mode propagation options - - `RunContext` model with retry tracking and deadline support - -- **Resource Detection** - - Kubernetes (pod, namespace, container) - - AWS (EC2, ECS, Lambda, Fargate) - - GCP (GCE, Cloud Run, Cloud Functions) - - Azure (VM, Container Apps, Functions) - -- **Auto-Instrumentation Support** - - HTTP clients: requests, httpx, urllib3, aiohttp - - Web frameworks: FastAPI, Flask, Django, Starlette - - Databases: SQLAlchemy, psycopg2, asyncpg, pymongo, Redis - - Messaging: Celery, Kafka - - GenAI: OpenAI, Anthropic, Vertex AI, Google GenAI, LangChain - -- **Optional Extras** - - `[sdk]` - OTel SDK + OTLP exporter - - `[instruments]` - Common library instrumentation - - `[genai]` - GenAI provider instrumentation - - `[carriers]` - Cross-service propagation helpers - - `[all]` - Everything included - - `[dev]` - Development and testing tools - -- **Documentation** - - Comprehensive docs in `/docs` following LF format - - Getting started guides - - API reference - - Best practices and anti-patterns - -### Dependencies - -- Core: `opentelemetry-api >= 1.20.0` -- SDK extra: `opentelemetry-sdk`, `opentelemetry-exporter-otlp-proto-http` -- Python: `>= 3.9` - -[Unreleased]: https://github.com/botanu-ai/botanu-sdk-python/compare/v0.1.0...HEAD -[0.1.0]: https://github.com/botanu-ai/botanu-sdk-python/releases/tag/v0.1.0 +- **Security** + - OTLP bearer token is attached only when the endpoint host is botanu-owned + (`*.botanu.ai`) or a local dev host, preventing tenant API-key leakage + via a customer-supplied `OTEL_EXPORTER_OTLP_ENDPOINT`. + - Authorization / `x-api-key` / `botanu-api-key` headers and `user:pass@` + URL credentials are redacted in logs. +- **Brownfield OTel coexistence** + - `SampledSpanProcessor` preserves the host app's existing TracerProvider + sampling ratio when botanu is bootstrapped into a project that already + has OTel wired up. + - `register.py` entry point for explicit opt-in without decorator-side + provider mutation. + - Bootstrap detects a pre-configured provider and hands off instead of + overriding it. +- **Content capture for eval** + - Workflow-level input/output capture gated by `content_capture_rate` + config and a shared `content_sampler`. Writes + `botanu.eval.input_content` / `botanu.eval.output_content`. + - `set_input_content()` / `set_output_content()` on `LLMTracker` with the + same gate, plus matching helpers on data-tracking helpers. +- **Multi-step workflows** + - `@botanu_workflow(..., step=...)` parameter (stored in `RunContext`, + not yet emitted to span attributes — kept backward compatible until the + collector servicegraph work lands). +- **Resources** + - `ResourceEnricher` span processor for deployment attributes. +- **Release tooling** + - `scripts/pre_publish_check.py` red/green gate: builds sdist + wheel, + runs `twine check`, installs into a fresh venv, validates the public + API surface, runs an end-to-end decorator + `emit_outcome` smoke test. + +### Fixed + +- `SampledSpanProcessor.on_start` now gates on the same ratio decision as + `on_end`; forwarding `on_start` unconditionally while gating `on_end` + leaked span bookkeeping inside wrapped exporters (QUAL-C1). + +### Initial release contents + +Carried forward from the pre-tag scaffolding (never published): + +- `enable()` / `disable()` bootstrap, `@botanu_workflow`, + `@botanu_outcome`, `emit_outcome()`, `set_business_context()`, + `RunContextEnricher` — with UUIDv7 run_ids. +- LLM tracking aligned with OTel GenAI semconv: `track_llm_call()`, + `track_tool_call()`, token accounting, 15+ provider normalization. +- Data tracking: `track_db_operation()`, `track_storage_operation()`, + `track_messaging_operation()`; 30+ system normalizations. +- W3C Baggage propagation with `RunContext` (retry tracking + deadline). +- Cloud resource detectors via optional extras (`aws`, `gcp`, `azure`, + `container`, `cloud`). +- Auto-instrumentation bundled in the base install — HTTP clients, web + frameworks, databases, messaging, and GenAI providers; instrumentation + packages no-op when their target library is not installed. + +[Unreleased]: https://github.com/botanu-ai/botanu-sdk-python/commits/main