From 25691e362a54776083304bfbe2322ac8918db334 Mon Sep 17 00:00:00 2001 From: bussyjd <145845+bussyjd@users.noreply.github.com> Date: Fri, 12 Jun 2026 14:20:12 +0400 Subject: [PATCH] =?UTF-8?q?feat(agent):=20sellable=20smoke-test=20agent=20?= =?UTF-8?q?=E2=80=94=20read-only=20probes,=20GitHub=20reports,=20Validatio?= =?UTF-8?q?nRegistry=20verdict=20calldata?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - internal/embed/skills/smoke-test: SKILL.md + smoke.py (read-only x402/catalog probes, report.md + results.json, score 0-100) + gh_post.py (seller-owned public report repo, contents API, no-redirect token guard, Retry-After backoff) - internal/erc8004: SmokeTestRequestHash ("obol/smoke-test/v1||", golden-tested) reusing the existing validationResponse encoder - cmd/obol: 'obol smoke calldata' mirroring the bounty calldata UX (operator submits; agent never signs); GITHUB_TOKEN rides the existing optional hermes-env Secret — zero render/RBAC/admission changes - flows/flow-20-smoke-agent.sh (cluster/GitHub gated, skips clean) + docs/guides/smoke-test-agent.md - review: high finding (Bearer across redirects) fixed; dots-only run-id rejected post-review --- cmd/obol/main.go | 1 + cmd/obol/smoke.go | 140 +++++ cmd/obol/smoke_test.go | 225 ++++++++ docs/guides/smoke-test-agent.md | 301 ++++++++++ flows/flow-20-smoke-agent.sh | 367 ++++++++++++ internal/embed/skills/smoke-test/SKILL.md | 184 ++++++ .../skills/smoke-test/scripts/gh_post.py | 338 +++++++++++ .../embed/skills/smoke-test/scripts/smoke.py | 523 ++++++++++++++++++ internal/erc8004/smoke.go | 36 ++ internal/erc8004/smoke_test.go | 47 ++ internal/erc8004/validation.go | 401 ++++++++++++++ internal/erc8004/validation_registry.abi.json | 272 +++++++++ internal/erc8004/validation_test.go | 404 ++++++++++++++ tests/test_gh_post_no_redirect.py | 157 ++++++ 14 files changed, 3396 insertions(+) create mode 100644 cmd/obol/smoke.go create mode 100644 cmd/obol/smoke_test.go create mode 100644 docs/guides/smoke-test-agent.md create mode 100755 flows/flow-20-smoke-agent.sh create mode 100644 internal/embed/skills/smoke-test/SKILL.md create mode 100644 internal/embed/skills/smoke-test/scripts/gh_post.py create mode 100644 internal/embed/skills/smoke-test/scripts/smoke.py create mode 100644 internal/erc8004/smoke.go create mode 100644 internal/erc8004/smoke_test.go create mode 100644 internal/erc8004/validation.go create mode 100644 internal/erc8004/validation_registry.abi.json create mode 100644 internal/erc8004/validation_test.go create mode 100644 tests/test_gh_post_no_redirect.py diff --git a/cmd/obol/main.go b/cmd/obol/main.go index d3432da5..c21abf5f 100644 --- a/cmd/obol/main.go +++ b/cmd/obol/main.go @@ -325,6 +325,7 @@ GLOBAL OPTIONS:{{template "visibleFlagTemplate" .}}{{end}} openclawCommand(cfg), sellCommand(cfg), buyCommand(cfg), + smokeCommand(cfg), modelCommand(cfg), { Name: "app", diff --git a/cmd/obol/smoke.go b/cmd/obol/smoke.go new file mode 100644 index 00000000..d4356a47 --- /dev/null +++ b/cmd/obol/smoke.go @@ -0,0 +1,140 @@ +package main + +import ( + "context" + "fmt" + "regexp" + "strings" + + "github.com/ObolNetwork/obol-stack/internal/config" + "github.com/ObolNetwork/obol-stack/internal/erc8004" + "github.com/ethereum/go-ethereum/common" + "github.com/urfave/cli/v3" +) + +// smokeTestTag is the default validationResponse tag for smoke-test verdicts; +// it matches the erc8004 smoke-test request-hash domain. +const smokeTestTag = "obol/smoke-test/v1" + +// smokeBytes32Re matches a 0x-prefixed bytes32 hex string (the sha256 of the +// committed report.md, or an explicit request-hash override). +var smokeBytes32Re = regexp.MustCompile(`^0x[0-9a-fA-F]{64}$`) + +// smokeCommand groups the smoke-test agent's operator verbs. v0 carries only +// `calldata`: derive ERC-8004 validationResponse calldata for a finished +// smoke run so the operator can submit it with THEIR OWN wallet — the agent +// and the controller NEVER sign validation transactions (same stance as +// `obol bounty eval calldata`). +func smokeCommand(cfg *config.Config) *cli.Command { + return &cli.Command{ + Name: "smoke", + Usage: "Smoke-test agent verbs: derive ERC-8004 verdict calldata for a run", + Commands: []*cli.Command{ + smokeCalldataCommand(cfg), + }, + } +} + +// smokeCalldataCommand prints ERC-8004 validationResponse calldata for one +// smoke-test run. The request hash is derived as +// keccak256("obol/smoke-test/v1||") unless an explicit +// --request-hash override is given (mirrors `obol bounty eval calldata`). +func smokeCalldataCommand(cfg *config.Config) *cli.Command { + return &cli.Command{ + Name: "calldata", + Usage: "Print ERC-8004 validationResponse calldata for a smoke run, for YOUR wallet to submit (the agent NEVER signs)", + Flags: []cli.Flag{ + &cli.StringFlag{Name: "target", Usage: "[REQUIRED] Smoke target base URL (normalized: trimmed, trailing slashes dropped)", Required: true}, + &cli.StringFlag{Name: "run-id", Usage: "[REQUIRED] Run ID from the smoke report (results.json runId)", Required: true}, + &cli.StringFlag{Name: "request-hash", Usage: "Explicit validation request hash (bytes32, 0x...) — overrides --target/--run-id derivation"}, + &cli.IntFlag{Name: "response", Usage: "[REQUIRED] Verdict score 0-100 (results.json score100; the registry reverts above 100)", Required: true}, + &cli.StringFlag{Name: "response-uri", Usage: "Commit-pinned GitHub permalink of the committed report.md"}, + &cli.StringFlag{Name: "response-hash", Usage: "sha256 of the committed report.md bytes (0x + 64 hex; results.json reportSha256). Optional, zero allowed"}, + &cli.StringFlag{Name: "tag", Usage: "Validation tag", Value: smokeTestTag}, + &cli.StringFlag{Name: "network", Usage: "Chain", Value: "base-sepolia"}, + }, + Action: func(ctx context.Context, cmd *cli.Command) error { + res, err := buildSmokeCalldata(smokeCalldataInput{ + Target: cmd.String("target"), + RunID: cmd.String("run-id"), + RequestHashOverride: cmd.String("request-hash"), + Response: int(cmd.Int("response")), + ResponseURI: cmd.String("response-uri"), + ResponseHash: cmd.String("response-hash"), + Tag: cmd.String("tag"), + Network: cmd.String("network"), + }) + if err != nil { + return err + } + fmt.Printf("Request hash: %s\n", res.RequestHash.Hex()) + fmt.Printf("ValidationRegistry (%s): %s\n", cmd.String("network"), res.Registry) + fmt.Printf("Calldata: 0x%x\n", res.Calldata) + fmt.Println("Submit with YOUR wallet (e.g. the agent remote-signer or cast send) — the smoke agent and the controller NEVER sign validation transactions.") + return nil + }, + } +} + +// smokeCalldataInput carries the raw flag values for one calldata derivation. +type smokeCalldataInput struct { + Target string + RunID string + RequestHashOverride string + Response int + ResponseURI string + ResponseHash string + Tag string + Network string +} + +// smokeCalldataResult is the derived submit-ready transaction material. +type smokeCalldataResult struct { + RequestHash common.Hash + Registry string + Calldata []byte +} + +// buildSmokeCalldata validates the inputs and packs validationResponse +// calldata via the shared erc8004 encoder. Kept free of CLI plumbing so the +// golden test can pin the exact bytes. +func buildSmokeCalldata(in smokeCalldataInput) (smokeCalldataResult, error) { + if in.Response < 0 || in.Response > erc8004.MaxValidationResponse { + return smokeCalldataResult{}, fmt.Errorf("--response %d out of range 0-%d (the deployed registry reverts above %d; submit results.json score100, not score255)", + in.Response, erc8004.MaxValidationResponse, erc8004.MaxValidationResponse) + } + + requestHash := erc8004.SmokeTestRequestHash(in.Target, in.RunID) + if raw := strings.TrimSpace(in.RequestHashOverride); raw != "" { + if !smokeBytes32Re.MatchString(raw) { + return smokeCalldataResult{}, fmt.Errorf("--request-hash %q is not a bytes32 hex string (0x + 64 hex chars)", raw) + } + requestHash = common.HexToHash(raw) + } + + responseHash := common.Hash{} + if raw := strings.TrimSpace(in.ResponseHash); raw != "" { + if !smokeBytes32Re.MatchString(raw) { + return smokeCalldataResult{}, fmt.Errorf("--response-hash %q is not a sha256 hex string (0x + 64 hex chars)", raw) + } + responseHash = common.HexToHash(raw) + } + + registry, err := erc8004.ValidationRegistryAddress(in.Network) + if err != nil { + return smokeCalldataResult{}, err + } + + calldata, err := erc8004.EncodeValidationResponse( + requestHash, + uint8(in.Response), + in.ResponseURI, + responseHash, + in.Tag, + ) + if err != nil { + return smokeCalldataResult{}, err + } + + return smokeCalldataResult{RequestHash: requestHash, Registry: registry, Calldata: calldata}, nil +} diff --git a/cmd/obol/smoke_test.go b/cmd/obol/smoke_test.go new file mode 100644 index 00000000..1efe4e9b --- /dev/null +++ b/cmd/obol/smoke_test.go @@ -0,0 +1,225 @@ +package main + +import ( + "encoding/hex" + "strings" + "testing" + + "github.com/ObolNetwork/obol-stack/internal/config" + "github.com/ObolNetwork/obol-stack/internal/erc8004" + "github.com/urfave/cli/v3" +) + +// ───────────────────────────────────────────────────────────────────────────── +// Command structure (house style: sell_test.go) +// ───────────────────────────────────────────────────────────────────────────── + +func testSmokeCommand(t *testing.T) *cli.Command { + t.Helper() + return smokeCommand(&config.Config{}) +} + +func TestSmokeCalldataCommand_Flags(t *testing.T) { + calldata := findSubcommand(t, testSmokeCommand(t), "calldata") + flags := flagMap(calldata) + + requireFlags(t, flags, "target", "run-id", "request-hash", "response", "response-uri", "response-hash", "tag", "network") + assertFlagRequired(t, flags, "target") + assertFlagRequired(t, flags, "run-id") + assertFlagRequired(t, flags, "response") + assertStringDefault(t, flags, "network", "base-sepolia") + assertStringDefault(t, flags, "tag", "obol/smoke-test/v1") + + // --request-hash is an optional OVERRIDE (mirrors bounty eval calldata): + // the default derivation comes from --target/--run-id. + if f, ok := flags["request-hash"].(*cli.StringFlag); !ok || f.Required { + t.Errorf("--request-hash must be an optional override (derive via --target/--run-id), got required=%v", ok && f.Required) + } + if f, ok := flags["response-hash"].(*cli.StringFlag); !ok || f.Required { + t.Errorf("--response-hash must be optional (zero responseHash is allowed), got required=%v", ok && f.Required) + } +} + +// ───────────────────────────────────────────────────────────────────────────── +// Golden calldata +// ───────────────────────────────────────────────────────────────────────────── + +// TestBuildSmokeCalldata_Golden pins the full validationResponse calldata for +// fixed inputs: the 4-byte selector (validationResponse(bytes32,uint8,string, +// bytes32,string) == 0x3d659a96), the derived request hash (the erc8004 +// smoke golden vector), and the exact ABI-encoded bytes. Any drift here +// changes what operators submit on-chain, so the hex is hardcoded. +func TestBuildSmokeCalldata_Golden(t *testing.T) { + const ( + target = "http://obol.stack:8080" + runID = "20260101T000000Z-ab12cd" + responseURI = "https://github.com/example/obol-smoke-reports/blob/0011223344556677889900112233445566778899/reports/obol.stack-8080/20260101T000000Z-ab12cd.md" + responseHash = "0x9f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08" + + goldenRequestHash = "0x2a28aa12a52a28414de4933bbe8d1e52e42828ba08006748f544596823ce7a57" + goldenSelector = "3d659a96" + goldenCalldata = "3d659a96" + + "2a28aa12a52a28414de4933bbe8d1e52e42828ba08006748f544596823ce7a57" + + "0000000000000000000000000000000000000000000000000000000000000054" + + "00000000000000000000000000000000000000000000000000000000000000a0" + + "9f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08" + + "0000000000000000000000000000000000000000000000000000000000000160" + + "000000000000000000000000000000000000000000000000000000000000008e" + + "68747470733a2f2f6769746875622e636f6d2f6578616d706c652f6f626f6c2d" + + "736d6f6b652d7265706f7274732f626c6f622f303031313232333334343535363" + + "637373838393930303131323233333434353536363737383839392f7265706f72" + + "74732f6f626f6c2e737461636b2d383038302f3230323630313031543030303030" + + "305a2d6162313263642e6d64000000000000000000000000000000000000" + + "0000000000000000000000000000000000000000000000000000000000000012" + + "6f626f6c2f736d6f6b652d746573742f76310000000000000000000000000000" + ) + + res, err := buildSmokeCalldata(smokeCalldataInput{ + Target: target, + RunID: runID, + Response: 84, + ResponseURI: responseURI, + ResponseHash: responseHash, + Tag: "obol/smoke-test/v1", + Network: "base-sepolia", + }) + if err != nil { + t.Fatalf("buildSmokeCalldata: %v", err) + } + + if res.RequestHash.Hex() != goldenRequestHash { + t.Errorf("request hash = %s, want %s", res.RequestHash.Hex(), goldenRequestHash) + } + if res.Registry != erc8004.ValidationRegistryV2BaseSepolia { + t.Errorf("registry = %s, want %s", res.Registry, erc8004.ValidationRegistryV2BaseSepolia) + } + + got := hex.EncodeToString(res.Calldata) + if !strings.HasPrefix(got, goldenSelector) { + t.Errorf("selector = 0x%s, want 0x%s (validationResponse)", got[:8], goldenSelector) + } + if got != goldenCalldata { + t.Errorf("calldata drifted:\n got 0x%s\nwant 0x%s", got, goldenCalldata) + } + + // Round-trip through the shared decoder: every field the operator submits + // must come back exactly. + decoded, err := erc8004.DecodeValidationResponseCalldata(res.Calldata) + if err != nil { + t.Fatalf("DecodeValidationResponseCalldata: %v", err) + } + if decoded.RequestHash.Hex() != goldenRequestHash { + t.Errorf("decoded request hash = %s, want %s", decoded.RequestHash.Hex(), goldenRequestHash) + } + if decoded.Response != 84 { + t.Errorf("decoded response = %d, want 84", decoded.Response) + } + if decoded.ResponseURI != responseURI { + t.Errorf("decoded responseURI = %q, want %q", decoded.ResponseURI, responseURI) + } + if decoded.ResponseHash.Hex() != responseHash { + t.Errorf("decoded responseHash = %s, want %s", decoded.ResponseHash.Hex(), responseHash) + } + if decoded.Tag != "obol/smoke-test/v1" { + t.Errorf("decoded tag = %q, want obol/smoke-test/v1", decoded.Tag) + } +} + +// TestBuildSmokeCalldata_RequestHashOverride proves --request-hash wins over +// the --target/--run-id derivation, mirroring bounty eval calldata. +func TestBuildSmokeCalldata_RequestHashOverride(t *testing.T) { + const override = "0x1111111111111111111111111111111111111111111111111111111111111111" + + res, err := buildSmokeCalldata(smokeCalldataInput{ + Target: "http://obol.stack:8080", + RunID: "20260101T000000Z-ab12cd", + RequestHashOverride: override, + Response: 100, + Network: "base-sepolia", + }) + if err != nil { + t.Fatalf("buildSmokeCalldata: %v", err) + } + if res.RequestHash.Hex() != override { + t.Errorf("request hash = %s, want override %s", res.RequestHash.Hex(), override) + } + + if _, err := buildSmokeCalldata(smokeCalldataInput{ + Target: "http://obol.stack:8080", + RunID: "20260101T000000Z-ab12cd", + RequestHashOverride: "0x1234", + Response: 100, + Network: "base-sepolia", + }); err == nil { + t.Error("expected error for malformed --request-hash override") + } +} + +// ───────────────────────────────────────────────────────────────────────────── +// Flag validation +// ───────────────────────────────────────────────────────────────────────────── + +func TestBuildSmokeCalldata_RejectsResponseOutOfRange(t *testing.T) { + base := smokeCalldataInput{ + Target: "http://obol.stack:8080", + RunID: "20260101T000000Z-ab12cd", + Network: "base-sepolia", + } + + for _, response := range []int{-1, 101, 255} { + in := base + in.Response = response + if _, err := buildSmokeCalldata(in); err == nil { + t.Errorf("response %d: expected out-of-range error (registry reverts above %d)", response, erc8004.MaxValidationResponse) + } + } + + // Boundary values must pass. + for _, response := range []int{0, 100} { + in := base + in.Response = response + if _, err := buildSmokeCalldata(in); err != nil { + t.Errorf("response %d: unexpected error: %v", response, err) + } + } +} + +func TestBuildSmokeCalldata_RejectsMalformedResponseHash(t *testing.T) { + base := smokeCalldataInput{ + Target: "http://obol.stack:8080", + RunID: "20260101T000000Z-ab12cd", + Response: 50, + Network: "base-sepolia", + } + + for _, malformed := range []string{ + "0x1234", // too short + "9f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08", // missing 0x + "0x9f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00aZZ", // non-hex + "0x9f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a0800", // too long + } { + in := base + in.ResponseHash = malformed + if _, err := buildSmokeCalldata(in); err == nil { + t.Errorf("response hash %q: expected malformed-hash error", malformed) + } + } + + // Empty response hash is explicitly allowed (zero responseHash per spec). + in := base + in.ResponseHash = "" + if _, err := buildSmokeCalldata(in); err != nil { + t.Errorf("empty response hash should be allowed (zero hash): %v", err) + } +} + +func TestBuildSmokeCalldata_RejectsUnknownNetwork(t *testing.T) { + if _, err := buildSmokeCalldata(smokeCalldataInput{ + Target: "http://obol.stack:8080", + RunID: "20260101T000000Z-ab12cd", + Response: 50, + Network: "not-a-chain", + }); err == nil { + t.Error("expected error for a network without a verified validation registry deployment") + } +} diff --git a/docs/guides/smoke-test-agent.md b/docs/guides/smoke-test-agent.md new file mode 100644 index 00000000..28b8ae34 --- /dev/null +++ b/docs/guides/smoke-test-agent.md @@ -0,0 +1,301 @@ +# Selling a Smoke-Test Agent + +This guide walks you (the **seller/operator**) through provisioning, selling, +and operating the **smoke-test agent**: a payment-gated agent that buyers hire +per run to health-check the public surface of an Obol Stack deployment. + +For each paid run, the agent: + +1. **Probes** a buyer-supplied target stack URL — strictly **read-only** GETs + against the published public routes (`/skill.md`, `/api/services.json`, + each advertised `/services//*` 402 challenge, and the informational + `/.well-known/agent-registration.json`). It never sends an `X-PAYMENT` + header and never writes anything to the target. +2. **Writes a report** — `report.md` (the canonical committed bytes) and + `results.json` (machine-readable scores) in its workspace. +3. **Commits the report** to a **seller-owned public GitHub repo** at + `reports//.md` and streams the buyer the + `results.json` plus a commit-pinned permalink. +4. Leaves you with everything needed to **submit an ERC-8004 + ValidationRegistry verdict** from your own wallet via + `obol smoke calldata`. The agent and the controller never sign on-chain + transactions — same stance as the bounty pipeline. + +> [!IMPORTANT] +> The monetize subsystem is alpha software. If you encounter an issue, please +> open a [GitHub issue](https://github.com/ObolNetwork/obol-stack/issues). + +> [!WARNING] +> The buyer drives a prompt-injectable agent that holds a GitHub token in its +> environment. Scope that token to **one public report repo, contents +> read/write, nothing else** (see [Step 2](#step-2--create-the-github-secret) +> and [Production guidance](#production-guidance)). The accepted v0 blast +> radius is "an attacker can write junk to the one public report repo" — +> nothing more. + +## System overview + +``` +BUYER (any x402 wallet) SELLER (your obol stack) + +buy.py pay-agent ── x402 payment ──> Traefik /services/smoke-tester/* + "smoke-test " └─> x402-verifier ─> Hermes agent + │ smoke-test skill + │ + read-only GETs (no X-PAYMENT, ever) ▼ +TARGET stack public surface <────────────────────── smoke.py probe + /skill.md smoke.py post ──> GitHub + /api/services.json reports//.md + /services//* (expect 402) + /.well-known/agent-registration.json (informational) + +OPERATOR (you, out of band) + obol smoke calldata ──> validationResponse calldata ──> cast send (YOUR wallet) + ERC-8004 ValidationRegistry +``` + +## Prerequisites + +- A running Obol Stack (`obol stack up`) with the Cloudflare tunnel active so + `/services/*` is publicly reachable. +- A **public** GitHub repository you own for reports (e.g. + `/stack-smoke-reports`). +- A GitHub credential scoped to that repo only (see Step 2). +- For the on-chain verdict: a wallet with ETH for gas on the target chain + (default `base-sepolia`). + +## Step 1 — Provision the agent + +Declare the agent with the `smoke-test` skill. No `--create-wallet` is needed +for v0: the agent never signs anything; you submit the verdict from your own +wallet. + +```bash +obol agent new smoke-tester \ + --skills smoke-test \ + --objective "You are a smoke-test agent. When a buyer says 'smoke-test ', run the smoke-test skill: probe the target read-only, then post the report, then reply with results.json and the permalink." +``` + +This creates an Agent CR in namespace `agent-smoke-tester`; the +serviceoffer-controller provisions a Hermes runtime with the skill mounted at +`/data/.hermes/obol-skills/smoke-test/`. + +## Step 2 — Create the GitHub Secret + +The agent reads `GITHUB_TOKEN` and `GITHUB_REPORT_REPO` from its environment. +Both ride the **existing `hermes-env` Secret** — the runtime-env-override hook +every CRD agent already mounts (`envFrom`, optional). Do **not** invent a new +Secret name: `hermes-env` is the one whitelisted by the admission policy and +RBAC. + +Create a **fine-grained personal access token** (GitHub → Settings → +Developer settings → Fine-grained tokens): + +- **Repository access**: only the report repo (e.g. + `/stack-smoke-reports`). +- **Permissions**: Contents → Read and write. Nothing else. +- **Expiration**: short (30–90 days) and rotate. + +Then create the Secret and restart the agent (the Deployment's checksum +annotation only covers `hermes-config`, so a Secret change needs an explicit +restart): + +```bash +obol kubectl -n agent-smoke-tester create secret generic hermes-env \ + --from-literal=GITHUB_TOKEN=github_pat_XXXXXXXXXXXXXXXXXXXXXX \ + --from-literal=GITHUB_REPORT_REPO=/stack-smoke-reports \ + --dry-run=client -o yaml | obol kubectl apply -f - + +obol kubectl -n agent-smoke-tester rollout restart deploy/hermes +``` + +> [!CAUTION] +> The token lives **only** in the Secret's data. Never put it in the Agent +> CR spec, annotations, labels, status, or any file the agent commits. +> Explicit `env` entries on the Hermes container (e.g. `API_SERVER_KEY`, +> `REMOTE_SIGNER_TOKEN`) always take precedence over `envFrom`, so +> `hermes-env` cannot clobber the runtime's own credentials. + +To rotate: re-run the same two commands with the new token. + +## Step 3 — Sell it + +```bash +obol sell agent smoke-tester \ + --price 0.05 \ + --token USDC \ + --chain base-sepolia \ + --pay-to 0xYourRevenueWallet \ + --description "Paid smoke test: read-only probe of an Obol Stack public surface, report committed to a public GitHub repo" +``` + +This wraps the agent in a `type=agent` ServiceOffer. Check progress with +`obol sell status smoke-tester -n agent-smoke-tester`; once +`UpstreamHealthy`, `PaymentGateReady`, and `RoutePublished` are `True`, the +agent is purchasable at `/services/smoke-tester/v1/chat/completions` on your +tunnel hostname. + +## Step 4 — Buyer journey + +The buyer pays per run with the `buy-x402` skill's one-shot streaming call. +From any buyer agent pod: + +```bash +# 1. Discover pricing + the agent model id (extra.agentModel in the 402 body) +python3 ${OBOL_SKILLS_DIR:-/data/.openclaw/skills}/buy-x402/scripts/buy.py probe \ + https:///services/smoke-tester/v1/chat/completions --type agent + +# 2. Pay for one run (streaming; agent runs can be slow, prefer pay-agent) +python3 ${OBOL_SKILLS_DIR:-/data/.openclaw/skills}/buy-x402/scripts/buy.py pay-agent \ + https:///services/smoke-tester/v1/chat/completions \ + --model "" \ + --message 'smoke-test https://target-stack.example.com' +``` + +The message contract is `smoke-test `. The agent generates a +run id of the form `-<6 hex>` (a buyer may suggest one +in the message; it must match `^[A-Za-z0-9._-]+$`). The streamed reply +contains the full `results.json` — including `passed`/`total`, `score255`, +`score100`, `reportSha256` — and the commit-pinned GitHub permalink. + +Note for buyers: the report lands in the **seller's** public repo, so the +result is publicly auditable but the buyer needs no GitHub credentials. The +buyer's verification path is: fetch the permalink, check +`sha256(report bytes) == reportSha256`, and (once submitted) check the +on-chain validation entry. + +## Step 5 — Where reports live + +In the seller-owned report repo: + +| Path | Content | +|---|---| +| `reports//.md` | The canonical per-run report (committed bytes are what `reportSha256` covers) | +| `reports//latest.md` | Best-effort pointer: run id, score line, permalink of the latest run | + +`` is the lowercase target hostname with `:` rewritten to +`-` (e.g. `obol.stack:8080` → `obol.stack-8080`). The permalink the +buyer receives is commit-pinned +(`https://github.com///blob//reports/...`), so later +runs can never silently rewrite what the buyer was shown. + +Each run performs at most **two** repo writes: one commit for the report, +one best-effort commit for `latest.md`. + +## Step 6 — Submit the on-chain verdict + +The run's identity on-chain is: + +``` +requestHash = keccak256("obol/smoke-test/v1||") +``` + +with the target normalized exactly like the report (`strip()` whitespace, +strip trailing `/`). `results.json` deliberately does **not** contain the +request hash (the agent pod has no keccak256); `obol smoke calldata` derives +it for you: + +```bash +obol smoke calldata \ + --target https://target-stack.example.com \ + --run-id 20260612T093000Z-3fa9c2 \ + --response 87 \ + --response-uri "https://github.com//stack-smoke-reports/blob//reports/target-stack.example.com/20260612T093000Z-3fa9c2.md" \ + --response-hash 0x \ + --network base-sepolia +``` + +Flag-to-report mapping: + +| Flag | Source | +|---|---| +| `--target`, `--run-id` | `results.json` `target` + `runId` (the same normalized values) | +| `--response` | `results.json` **`score100`** — the on-chain value. The deployed registry reverts above 100, so `score255` stays an off-chain field | +| `--response-uri` | the commit-pinned permalink | +| `--response-hash` | `0x` + `results.json` `reportSha256` (sha256 of the committed `report.md` bytes; optional, zero allowed) | + +The command prints the request hash, the ValidationRegistry address for the +chosen network, and the ready-to-submit `validationResponse` calldata +(selector `0x3d659a96`). Submit it with **your own wallet** — never the +agent's: + +```bash +cast send \ + --rpc-url \ + --private-key "$OPERATOR_KEY" +``` + +(Use an environment variable or a hardware/keystore signer — never paste a +private key inline.) + +Anyone can then independently verify the verdict: recompute the request hash +from the public target + run id, fetch the permalink, and check +`sha256(report.md bytes)` against the submitted `responseHash`. + +## Production guidance + +> [!IMPORTANT] +> Read this section before selling runs for real money. It captures the v0 +> trust model and the GitHub operational limits. + +### Prefer GitHub App installation tokens over PATs + +For production, replace the fine-grained PAT with a **GitHub App installation +token**: + +- **Short-lived**: installation tokens expire after ~1 hour, so a leaked + token (the realistic failure mode for a prompt-injected agent) has a small + window. PATs live until rotated. +- **Per-repo by installation**: install the App on only the report repo; + the token cannot be over-scoped by mistake. +- **Higher, separately-bucketed rate limits** than user PATs. + +The trade-off is operational: something must mint a fresh installation token +and refresh the `hermes-env` Secret (`GITHUB_TOKEN`) on a schedule (e.g. a +host-side cron re-running the Step 2 commands). The agent contract is +unchanged — it just reads `GITHUB_TOKEN` from env. + +### v0 trust model: seller-owned repo only + +v0 posts to the **seller-owned public report repo**. There is deliberately +**no buyer token handoff** — a buyer cannot ask the agent to commit into a +buyer-owned repo, and the agent must never accept credentials passed through +chat. Buyer-repo delivery is explicitly out of scope for v0 and is planned as +a v1 feature with a proper credential channel. If a buyer needs a copy, the +report is public — mirror the permalink. + +### GitHub rate limits and acceptable use + +The posting script is built to stay well inside GitHub's +[acceptable use](https://docs.github.com/en/site-policy/acceptable-use-policies) +and secondary rate limits, and you should keep it that way: + +- **Batch: one report commit per run** (plus one best-effort `latest.md` + write) — never per-check or per-probe commits. +- Content writes are the expensive, secondary-rate-limited operation on + GitHub's side; the script honors `Retry-After` (falling back to + `x-ratelimit-reset`) on 403/429, retries 5xx with short exponential + backoff, and gives up within a bounded budget rather than hammering. +- If you operate many sellers against one report repo, expect concurrent- + write 409s (the script re-fetches the blob sha and retries once); beyond + light contention, shard by repo. +- A failed post never loses the run: the report stays in the agent workspace + and `post` is re-runnable. + +### Blast radius recap + +- The smoke agent **never signs or settles anything** — probe-only buyer + side, no `X-PAYMENT`; the operator submits the validation transaction. +- The GitHub token is the only credential it holds; with the scoping above, + the worst case from a hostile buyer prompt is junk commits in one public + report repo. Rotate the token and clean up the repo history if it happens. + +## CI / smoke coverage + +`flows/flow-20-smoke-agent.sh` gates this feature: it compiles the skill +scripts, runs a probe-only self-smoke against the local stack's own public +catalog surface (validating `report.md`/`results.json` and the +`reportSha256` binding), exercises GitHub posting **only** when +`GITHUB_TOKEN` + `GITHUB_REPORT_REPO` are exported (explicit SKIP otherwise, +so CI never needs GitHub), and asserts `obol smoke calldata` emits +`validationResponse` calldata with selector `0x3d659a96`. diff --git a/flows/flow-20-smoke-agent.sh b/flows/flow-20-smoke-agent.sh new file mode 100755 index 00000000..6dd2d888 --- /dev/null +++ b/flows/flow-20-smoke-agent.sh @@ -0,0 +1,367 @@ +#!/bin/bash +# Flow 20: Smoke-test agent — sellable read-only prober for Obol Stack +# public surfaces (skill: internal/embed/skills/smoke-test). +# +# Coverage: +# §1 Host-side syntax gate — python3 -m py_compile of the embedded +# smoke-test skill scripts (smoke.py + gh_post.py). +# §2 Self-smoke (cluster-gated) — run smoke.py probe against THIS +# stack's public catalog surface through the Traefik ingress; +# assert report.md + results.json are well-formed, score255/score100 +# are in range and internally consistent, and reportSha256 matches +# the exact bytes of report.md on disk. +# §3 GitHub posting (env-gated) — ONLY when GITHUB_TOKEN and +# GITHUB_REPORT_REPO are both set; posts the §2 report and asserts a +# commit-pinned permalink. Explicit SKIP otherwise so CI never needs +# GitHub credentials. +# §4 Verdict calldata — build obol, run `obol smoke calldata` with +# fixed inputs, assert non-empty calldata carrying the +# validationResponse(bytes32,uint8,string,bytes32,string) selector +# 0x3d659a96, and that target normalization (trailing slash) does +# not change the derived request hash. +# +# The probe path is strictly read-only: GET-only requests against the +# published public routes (/skill.md, /api/services.json, /services/*, +# /.well-known/agent-registration.json). No X-PAYMENT header is ever sent +# and nothing in the cluster is mutated. +# +# scrub_secrets-safe: GITHUB_TOKEN is read from the environment only — +# never echoed, never placed in argv — and every captured output that +# could embed it is redacted before printing. +# +# Env overrides: +# FLOW20_TARGET probe target base URL (default: this stack's +# ingress with obol.stack rewritten to 127.0.0.1 so +# python3/urllib needs no special DNS resolution) +# GITHUB_TOKEN fine-grained PAT for the seller-owned report repo +# GITHUB_REPORT_REPO / public report repository +source "$(dirname "$0")/lib.sh" + +require_tool python3 + +SKILL_SCRIPTS_DIR="$OBOL_ROOT/internal/embed/skills/smoke-test/scripts" +SMOKE_PY="$SKILL_SCRIPTS_DIR/smoke.py" +GH_POST_PY="$SKILL_SCRIPTS_DIR/gh_post.py" + +FLOW_STATE_DIR="$OBOL_ROOT/.workspace/state/flows" +RUN_ROOT="$FLOW_STATE_DIR/flow20-$(date +%Y%m%d-%H%M%S)-$$" +mkdir -p "$RUN_ROOT" + +# Redact the GitHub token from any captured output before it is printed. +# scrub_secrets (lib.sh) does not know about GitHub PATs, so this flow +# guarantees the token never reaches stdout/stderr on its own. +redact_gh_token() { + local text="$1" + if [ -n "${GITHUB_TOKEN:-}" ]; then + text="${text//${GITHUB_TOKEN}/[REDACTED-GH-TOKEN]}" + fi + printf '%s' "$text" +} + +# py_compile with an explicit cfile in the flow workspace — the default +# cfile would drop a __pycache__/ dir inside internal/embed/skills/, +# polluting the repo checkout the flow runs from. +py_compile_check() { + python3 -c 'import py_compile, sys; py_compile.compile(sys.argv[1], cfile=sys.argv[2], doraise=True)' \ + "$1" "$RUN_ROOT/$(basename "$1").pyc" +} + +# §1: Host-side compile gate — the skill scripts must at least be valid +# python3 before anything ships them into an agent PVC. +step "smoke.py compiles (python3 -m py_compile)" +if [ ! -f "$SMOKE_PY" ]; then + fail "smoke-test skill script missing: $SMOKE_PY" +else + compile_out="" + if compile_out=$(py_compile_check "$SMOKE_PY" 2>&1); then + pass "smoke.py compiles" + else + fail "smoke.py failed py_compile — ${compile_out:0:200}" + fi +fi + +step "gh_post.py compiles (python3 -m py_compile)" +if [ ! -f "$GH_POST_PY" ]; then + # Tolerated layout difference: posting may live in `smoke.py post` + # instead of a dedicated gh_post.py. §3 falls back accordingly. + skip "gh_post.py not present at $GH_POST_PY — assuming posting lives in 'smoke.py post'" +else + compile_out="" + if compile_out=$(py_compile_check "$GH_POST_PY" 2>&1); then + pass "gh_post.py compiles" + else + fail "gh_post.py failed py_compile — ${compile_out:0:200}" + fi +fi + +# §2: Self-smoke against this stack's public catalog surface. +# Cluster-gated: every step here SKIPs cleanly when no local stack is up. +CLUSTER_UP="" +if [ -x "$OBOL" ] \ + && [ -f "$OBOL_CONFIG_DIR/.stack-id" ] \ + && [ -f "$OBOL_CONFIG_DIR/kubeconfig.yaml" ] \ + && "$OBOL" kubectl cluster-info >/dev/null 2>&1; then + CLUSTER_UP="1" +fi + +TARGET="" +SELF_SMOKE_READY="" +step "Public catalog surface reachable (GET /api/services.json)" +if [ -z "$CLUSTER_UP" ]; then + skip "no local stack (config/kubeconfig/cluster unreachable) — self-smoke steps skipped" +else + refresh_obol_ingress_env + # python3/urllib cannot use curl's --resolve, so default the probe + # target to the loopback form of the ingress. The catalog routes have + # no hostname restriction (public by design), so Host: 127.0.0.1 is + # routed identically to obol.stack. + TARGET="${FLOW20_TARGET:-${OBOL_INGRESS_URL/obol.stack/127.0.0.1}}" + TARGET="${TARGET%/}" + catalog_code="" + # Small retry: the controller-served catalog can lag right after the + # route is wired (same first-request race as flows 07/08). + for _ in 1 2 3 4 5 6; do + catalog_code=$(curl -s -o /dev/null -w '%{http_code}' --max-time 8 \ + "$TARGET/api/services.json" 2>/dev/null) || true + [ "$catalog_code" = "200" ] && break + sleep 5 + done + if [ "$catalog_code" = "200" ]; then + pass "catalog surface up at $TARGET/api/services.json" + SELF_SMOKE_READY="1" + else + fail "catalog surface not reachable at $TARGET/api/services.json (HTTP ${catalog_code:-none})" + fi +fi + +RUN_ID="$(date -u +%Y%m%dT%H%M%SZ)-flow20" +RUN_DIR="" +step "smoke.py probe ${TARGET:-} (run id $RUN_ID)" +if [ -z "$SELF_SMOKE_READY" ]; then + skip "self-smoke target unavailable — probe skipped" +elif [ ! -f "$SMOKE_PY" ]; then + skip "smoke.py missing — probe skipped (§1 already failed)" +else + probe_rc=0 + # Worst case per the skill budget: 8 probes x 8s + report writes; 180s + # is comfortably above that without masking a hang. -B because smoke.py + # imports gh_post — without it python drops a __pycache__/ dir into + # internal/embed/skills/smoke-test/scripts/ in this checkout. + probe_out=$(cd "$RUN_ROOT" && run_with_timeout 180 \ + python3 -B "$SMOKE_PY" probe "$TARGET" --run-id "$RUN_ID" 2>&1) || probe_rc=$? + if [ "$probe_rc" -eq 0 ]; then + RUN_DIR=$(find "$RUN_ROOT/smoke" -type d -name "$RUN_ID" 2>/dev/null | head -1 || true) + if [ -n "$RUN_DIR" ] && [ -f "$RUN_DIR/results.json" ] && [ -f "$RUN_DIR/report.md" ]; then + pass "probe wrote $RUN_DIR/{report.md,results.json}" + else + RUN_DIR="" + fail "probe exited 0 but run dir/artifacts not found under $RUN_ROOT/smoke — ${probe_out:0:200}" + fi + else + fail "smoke.py probe exited $probe_rc — ${probe_out:0:300}" + fi +fi + +step "results.json + report.md well-formed (scores in range, reportSha256 matches file)" +if [ -z "$RUN_DIR" ]; then + skip "no probe artifacts — validation skipped" +else + validate_rc=0 + validate_out=$(python3 - "$RUN_DIR" "$RUN_ID" <<'PY' 2>&1 +import hashlib +import json +import re +import sys + +run_dir, run_id = sys.argv[1], sys.argv[2] + +with open(run_dir + "/results.json", encoding="utf-8") as fh: + results = json.load(fh) +with open(run_dir + "/report.md", "rb") as fh: + report = fh.read() + +assert report.startswith(b"# Obol Stack Smoke Report"), "report.md missing canonical header" + +assert results.get("version") == "obol/smoke-test/v1", f"version={results.get('version')!r}" +assert results.get("runId") == run_id, f"runId={results.get('runId')!r} want {run_id!r}" +assert isinstance(results.get("target"), str) and results["target"], "target missing" +assert not results["target"].endswith("/"), "target not normalized (trailing slash)" + +checks = results.get("checks") +assert isinstance(checks, list) and checks, "checks missing/empty" +for c in checks: + assert isinstance(c, dict) and c.get("name"), "check entry malformed" + assert isinstance(c.get("ok"), bool), f"check {c.get('name')}: ok not bool" + assert isinstance(c.get("ms"), (int, float)) and c["ms"] >= 0, f"check {c.get('name')}: ms invalid" + +names = {c["name"] for c in checks} +assert "skill-md" in names, "skill-md check missing" +assert "services-json" in names, "services-json check missing" + +counted = [c for c in checks if not c.get("informational")] +passed, total = results.get("passed"), results.get("total") +assert total == len(counted), f"total={total} != counted checks {len(counted)}" +assert total >= 2, f"total={total} < 2 (skill-md + services-json are always counted)" +assert passed == sum(1 for c in counted if c["ok"]), "passed != recount of ok counted checks" +assert 0 <= passed <= total, f"passed={passed} out of range" + +score255, score100 = results.get("score255"), results.get("score100") +assert score255 == (255 * passed) // total, f"score255={score255} != floor(255*{passed}/{total})" +assert 0 <= score255 <= 255, f"score255={score255} out of range" +assert score100 == (100 * passed) // total, f"score100={score100} != floor(100*{passed}/{total})" +assert 0 <= score100 <= 100, f"score100={score100} out of range" + +sha = results.get("reportSha256", "") +assert re.fullmatch(r"[0-9a-f]{64}", sha or ""), f"reportSha256 not 64 lowercase hex: {sha!r}" +assert sha == hashlib.sha256(report).hexdigest(), "reportSha256 does not match report.md bytes" + +# Probe-only run: permalink stays empty until a post succeeds. +assert results.get("permalink", "") == "", "permalink non-empty before post" + +print(f"OK passed={passed} total={total} score255={score255} score100={score100} sha256={sha[:12]}…") +PY + ) || validate_rc=$? + if [ "$validate_rc" -eq 0 ]; then + pass "artifacts valid — $validate_out" + else + fail "artifact validation failed — ${validate_out:0:300}" + fi +fi + +# §3: GitHub posting — strictly env-gated. CI runs probe-only and must see +# an explicit SKIP here, never an attempted network write. +step "Post report to GitHub (gh_post)" +if [ -z "${GITHUB_TOKEN:-}" ] || [ -z "${GITHUB_REPORT_REPO:-}" ]; then + skip "GITHUB_TOKEN / GITHUB_REPORT_REPO not set — GitHub posting step skipped (probe-only mode)" +elif [ -z "$RUN_DIR" ]; then + skip "no successful probe run dir — nothing to post" +else + post_rc=0 + if [ -f "$GH_POST_PY" ]; then + post_out=$(cd "$RUN_ROOT" && run_with_timeout 60 \ + python3 -B "$GH_POST_PY" "$RUN_DIR" 2>&1) || post_rc=$? + else + post_out=$(cd "$RUN_ROOT" && run_with_timeout 60 \ + python3 -B "$SMOKE_PY" post "$RUN_DIR" 2>&1) || post_rc=$? + fi + post_out=$(redact_gh_token "$post_out") + if [ "$post_rc" -ne 0 ]; then + fail "GitHub post exited $post_rc — ${post_out:0:300}" + else + permalink_rc=0 + permalink_out=$(python3 - "$RUN_DIR" <<'PY' 2>&1 +import json +import re +import sys + +with open(sys.argv[1] + "/results.json", encoding="utf-8") as fh: + results = json.load(fh) + +permalink = results.get("permalink", "") +# Commit-pinned blob URL — never a branch-floating html_url. +assert re.match(r"^https://github\.com/[^/]+/[^/]+/blob/[0-9a-f]{7,40}/", permalink or ""), \ + f"permalink not a commit-pinned GitHub blob URL: {permalink!r}" +print(f"OK permalink={permalink}") +PY + ) || permalink_rc=$? + if [ "$permalink_rc" -eq 0 ]; then + pass "report posted — $permalink_out" + else + fail "post succeeded but permalink invalid — ${permalink_out:0:300}" + fi + fi +fi + +# §4: Verdict calldata derivation — `obol smoke calldata` must emit +# validationResponse(bytes32,uint8,string,bytes32,string) calldata +# (selector 0x3d659a96) for the operator to submit with their own wallet. +CALLDATA_OBOL="" +step "Build obol for calldata derivation" +if ! command -v go >/dev/null 2>&1; then + skip "go toolchain not on PATH — calldata steps skipped" +else + build_rc=0 + build_out=$(cd "$OBOL_ROOT" && run_with_timeout 600 \ + go build -o "$RUN_ROOT/obol" ./cmd/obol 2>&1) || build_rc=$? + if [ "$build_rc" -eq 0 ] && [ -x "$RUN_ROOT/obol" ]; then + CALLDATA_OBOL="$RUN_ROOT/obol" + pass "obol built at $RUN_ROOT/obol" + else + fail "go build ./cmd/obol failed — ${build_out:0:300}" + fi +fi + +# Fixed inputs — deterministic across runs so the request-hash stability +# assertion below is meaningful. The response hash is the sha256 of the +# empty string (a recognizable, obviously-synthetic 32-byte value). +FIXED_TARGET="http://obol.stack:8080" +FIXED_RUN_ID="20260101T000000Z-cafe01" +FIXED_RESPONSE_HASH="0xe3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855" +FIXED_RESPONSE_URI="https://github.com/example-org/example-reports/blob/0000000000000000000000000000000000000000/reports/obol.stack-8080/${FIXED_RUN_ID}.md" + +step "obol smoke calldata emits selector 0x3d659a96" +if [ -z "$CALLDATA_OBOL" ]; then + skip "obol binary unavailable — calldata derivation skipped" +else + calldata_rc=0 + # Trailing slash on --target on purpose: the request hash must be + # derived from the NORMALIZED target (trailing slash stripped). + calldata_out=$(run_with_timeout 30 "$CALLDATA_OBOL" smoke calldata \ + --target "$FIXED_TARGET/" \ + --run-id "$FIXED_RUN_ID" \ + --response 100 \ + --response-uri "$FIXED_RESPONSE_URI" \ + --response-hash "$FIXED_RESPONSE_HASH" \ + --network base-sepolia 2>&1) || calldata_rc=$? + request_hash=$(echo "$calldata_out" | grep -oE 'Request hash: 0x[0-9a-fA-F]{64}' | head -1 | awk '{print $3}' || true) + if [ "$calldata_rc" -ne 0 ]; then + fail "obol smoke calldata exited $calldata_rc — ${calldata_out:0:300}" + elif [ -z "$request_hash" ] || [ "$request_hash" = "0x0000000000000000000000000000000000000000000000000000000000000000" ]; then + fail "request hash missing or zero — ${calldata_out:0:300}" + elif ! echo "$calldata_out" | grep -qE 'ValidationRegistry \(base-sepolia\): 0x[0-9a-fA-F]{40}'; then + fail "ValidationRegistry address line missing — ${calldata_out:0:300}" + elif echo "$calldata_out" | grep -qE 'Calldata: 0x3d659a96[0-9a-fA-F]+'; then + pass "calldata carries validationResponse selector 0x3d659a96 (request hash $request_hash)" + else + fail "calldata missing or wrong selector (want 0x3d659a96) — ${calldata_out:0:300}" + fi +fi + +step "Request hash stable under target normalization (trailing slash)" +if [ -z "$CALLDATA_OBOL" ] || [ -z "${request_hash:-}" ]; then + skip "no baseline request hash — normalization check skipped" +else + norm_rc=0 + norm_out=$(run_with_timeout 30 "$CALLDATA_OBOL" smoke calldata \ + --target "$FIXED_TARGET" \ + --run-id "$FIXED_RUN_ID" \ + --response 100 \ + --response-uri "$FIXED_RESPONSE_URI" \ + --response-hash "$FIXED_RESPONSE_HASH" \ + --network base-sepolia 2>&1) || norm_rc=$? + norm_hash=$(echo "$norm_out" | grep -oE 'Request hash: 0x[0-9a-fA-F]{64}' | head -1 | awk '{print $3}' || true) + if [ "$norm_rc" -eq 0 ] && [ -n "$norm_hash" ] && [ "$norm_hash" = "$request_hash" ]; then + pass "trailing-slash and bare target derive the same request hash" + else + fail "request hash drifted under normalization: with-slash=$request_hash bare=${norm_hash:-none} (exit $norm_rc)" + fi +fi + +step "obol smoke calldata rejects --response > 100" +if [ -z "$CALLDATA_OBOL" ]; then + skip "obol binary unavailable — bounds check skipped" +else + bounds_rc=0 + bounds_out=$(run_with_timeout 30 "$CALLDATA_OBOL" smoke calldata \ + --target "$FIXED_TARGET" \ + --run-id "$FIXED_RUN_ID" \ + --response 101 \ + --network base-sepolia 2>&1) || bounds_rc=$? + if [ "$bounds_rc" -ne 0 ]; then + pass "--response 101 rejected (the deployed registry reverts above 100)" + else + fail "--response 101 was accepted — ${bounds_out:0:200}" + fi +fi + +emit_metrics diff --git a/internal/embed/skills/smoke-test/SKILL.md b/internal/embed/skills/smoke-test/SKILL.md new file mode 100644 index 00000000..be219cc2 --- /dev/null +++ b/internal/embed/skills/smoke-test/SKILL.md @@ -0,0 +1,184 @@ +--- +name: smoke-test +description: "Sellable read-only smoke test of an Obol Stack public surface. The buyer pays per run (x402); the agent GET-probes the target's discovery + payment-gating endpoints, writes a scored report, commits it to the seller-owned public GitHub repo, and hands back the exact `obol smoke calldata` command the OPERATOR runs to derive the ERC-8004 validationResponse calldata. The agent never pays, never signs, never submits chain transactions." +metadata: { "openclaw": { "emoji": "🔍", "requires": { "bins": ["python3"] } } } +--- + +# Smoke Test + +Probe a TARGET Obol Stack public surface **read-only**, score it, publish the +report, and emit the verdict-grounding command. You are the seller side of a +paid smoke-test service: a buyer paid (via x402) for one run against one target. + +Hard rules — these are the product's trust model, never break them: + +- **GET only.** Never send an `X-PAYMENT` header, never sign anything, never + settle anything, never submit a chain transaction. The OPERATOR submits the + on-chain validationResponse from their own wallet — identical to the bounty + stance (the agent/controller never signs validation txs). +- **Never probe cross-host.** The scripts reduce catalog endpoints to their + path and re-join onto the target base URL. Don't hand-probe URLs from the + target's responses. +- **Never echo `GITHUB_TOKEN`** (or any `Authorization` header), never pass it + on a command line. The scripts read it from env only and redact it from + errors. To check it's configured, test presence only: + `[ -n "$GITHUB_TOKEN" ] && [ -n "$GITHUB_REPORT_REPO" ] && echo configured || echo missing` +- **Exactly ≤ 2 GitHub writes per run** (report.md + best-effort latest.md); + `results.json` is never committed. + +## Inputs (from the buyer message) + +The buyer message looks like `smoke-test `, optionally with a +run id. + +- **target** (required): an absolute http(s) base URL, e.g. + `https://` or `http://obol.stack:8080`. If the buyer gave a + bare host, prepend the scheme (`https://` for public hostnames, `http://` + for local stack addresses) BEFORE running the script — the normalized + target (whitespace-stripped, trailing `/` stripped) is hashed into the + on-chain requestHash, so it must be unambiguous. +- **run id** (optional): must match `^[A-Za-z0-9._-]+$`. When absent the + script generates `-<6 hex>`. + +## Run procedure — TWO separate terminal calls + +Terminal calls on CRD agents time out at 80s. The probe alone can take up to +~60s (up to 8 checks × 8s). **Never combine probe and post in one call.** + +**Call 1 — probe (no network writes):** + +```bash +python3 ${OBOL_SKILLS_DIR:-/data/.hermes/obol-skills}/smoke-test/scripts/smoke.py probe [--run-id ] +``` + +Prints `results.json` to stdout and writes +`./smoke///{report.md,results.json}` in the workspace +(`` = lowercase hostname with `:` → `-`, e.g. +`obol.stack:8080` → `obol.stack-8080`). Exit 0 even when checks fail — the +score IS the verdict. Non-zero only on operational errors. + +**Call 2 — post (only when BOTH `GITHUB_TOKEN` and `GITHUB_REPORT_REPO` are set):** + +```bash +python3 ${OBOL_SKILLS_DIR:-/data/.hermes/obol-skills}/smoke-test/scripts/gh_post.py ./smoke// +``` + +Commits `report.md` to `reports//.md` in the seller repo, +updates the local `results.json` with the commit-pinned permalink, best-effort +updates `reports//latest.md`. The only stdout payload lines are: + +``` +permalink: https://github.com///blob//reports//.md +content-sha: +``` + +If the GitHub env is absent, **degrade gracefully**: skip Call 2, tell the +buyer the report is local-only (no permalink), and still return the full +results + calldata command (without `--response-uri`). + +If Call 2 fails (non-zero exit), the report stays local and `post` is +re-runnable: `python3 .../scripts/smoke.py post ./smoke//` +(prints the updated results.json). + +## What gets probed + +All checks are GET-only, 8s timeout, 1 MiB body cap, no redirects, User-Agent +`obol-smoke-test/1.0 (+https://github.com/ObolNetwork/obol-stack)`: + +1. `skill-md` — `/skill.md` → 200 + non-empty body (counted) +2. `services-json` — `/api/services.json` → 200 + bare JSON **list** + of objects with non-empty string `name` and `endpoint` (counted; an empty + catalog passes) +3. `x402-402:` — per advertised service (first 5, sorted by name), + GET the endpoint's **path** on the target → 402 with a valid x402 body: + `x402Version` present, non-empty `accepts`, each entry with non-empty + `scheme`/`network`, 0x40-hex `payTo`/`asset`, and a positive digits-only + `maxAmountRequired` or `amount` (one counted check per service) +4. `agent-registration` — `/.well-known/agent-registration.json` → + 200 + JSON object (**informational** — excluded from passed/total/score) + +Scoring over counted checks only: `score100 = floor(100*passed/total)` (the +on-chain value — the deployed registry rejects responses above 100) and +`score255 = floor(255*passed/total)` (off-chain field kept in results.json). + +## Reply to the buyer + +After the run, reply with — in this order: + +1. The check table (from `report.md`): check name, ok, latency, detail. +2. The score line: `/ checks passed — score /100` + (mention `score255` from results.json as the off-chain value). +3. The GitHub permalink (when posted) and the `reportSha256` from + results.json (sha256 of the exact committed `report.md` bytes). +4. The full `results.json` content. +5. The EXACT command the operator runs to derive the ERC-8004 + validationResponse calldata (fill in the real values; the agent itself + NEVER runs this and never submits the transaction): + +```bash +obol smoke calldata \ + --target "" \ + --run-id "" \ + --response \ + --response-uri "" \ + --response-hash 0x \ + --network base-sepolia +``` + +Notes for that command: + +- It derives `requestHash = keccak256("obol/smoke-test/v1||")` — keccak256 is computed by the CLI, not in-pod + (there is no reliable in-pod keccak; `hashlib.sha3_256` is NIST SHA-3, NOT + keccak256). That is why `requestHash` is deliberately absent from + results.json. +- `--response` is **score100** (0–100), not score255. +- `--response-hash` is `0x` + the 64-hex `reportSha256` (sha256 of the + committed report.md bytes). Omit `--response-uri`/`--response-hash` when the + GitHub post didn't run (a zero response hash is allowed). +- The CLI prints the ValidationRegistry address + calldata; the operator + submits with THEIR wallet. + +## Seller/operator setup (one-time, host side — not the agent) + +GitHub credentials ride the existing `hermes-env` Secret (already whitelisted +by the admission policy and RBAC — do NOT invent a new Secret name): + +```bash +obol kubectl -n agent- create secret generic hermes-env \ + --from-literal=GITHUB_TOKEN= \ + --from-literal=GITHUB_REPORT_REPO=/ \ + --dry-run=client -o yaml | obol kubectl apply -f - +obol kubectl -n agent- rollout restart deploy/hermes +``` + +**Token scope is the blast radius.** The buyer drives a prompt-injectable +agent that holds this token in env, so it MUST be a fine-grained PAT scoped to +ONLY the one public report repo, with `contents: read+write` and nothing +else. Accepted v0 worst case: an attacker writes junk to that one public +repo. Never use a classic PAT or broader scopes. The token lives only in +Secret data — never in the Agent CR spec/annotations/status. + +Sell the agent: + +```bash +obol agent new --skills smoke-test --objective "Paid read-only smoke tests of Obol Stack public surfaces" +obol sell agent --per-request --chain --pay-to 0x +``` + +Buyers reach it via `buy.py pay-agent --model --message "smoke-test "` +(streaming). v0: no buyer token handoff — reports always land in the +seller-owned repo. + +## Artifacts + +``` +./smoke/// +├── report.md # canonical committed bytes; sha256 = reportSha256 +└── results.json # version obol/smoke-test/v1; stays local + in chat reply +``` + +results.json fields: `version`, `target` (normalized), `runId`, `timestamp`, +`checks[]` (`name`, `ok`, `detail`, `ms`, optional `informational`), +`passed`, `total`, `score255`, `score100`, `reportSha256` (64 hex, no 0x), +`permalink` (empty until post succeeds). diff --git a/internal/embed/skills/smoke-test/scripts/gh_post.py b/internal/embed/skills/smoke-test/scripts/gh_post.py new file mode 100644 index 00000000..54438e93 --- /dev/null +++ b/internal/embed/skills/smoke-test/scripts/gh_post.py @@ -0,0 +1,338 @@ +#!/usr/bin/env python3 +"""gh_post.py — commit a smoke report.md to the seller-owned public GitHub repo. + +Posting contract: + - Base https://api.github.com (override with GITHUB_API_BASE for tests only). + - Repo from env GITHUB_REPORT_REPO, validated against + ^[A-Za-z0-9_.-]+/[A-Za-z0-9_.-]+$. + - Token from env GITHUB_TOKEN ONLY. By construction this script never + prints headers or env values, never puts the token in argv, and redacts + the token from every error string before it can reach stderr. + - Headers on every call: Authorization Bearer, Accept + application/vnd.github+json, X-GitHub-Api-Version 2022-11-28, and the + obol-smoke-test User-Agent. + - Redirects are NEVER followed (the default opener would replay the + Authorization header to the redirect target, even cross-host); a 3xx + from the API is surfaced as the final status and is a hard failure. + - Path: reports//.md (target-host = lowercase hostname + with ":" rewritten to "-"). + - Create-or-update: GET contents for the existing blob sha (a read, not a + write; other-than-200/404 retried once then abort), then PUT. On 409 + re-GET the sha once and retry the PUT once. On 403/429 honor Retry-After + (fallback: x-ratelimit-reset delta), sleep min(value, 30s), max 2 + retries. On 5xx/connection errors exponential 2s/4s, max 2 retries. + Total post budget 25s; on exhaustion exit non-zero with a re-run hint. + - Permalink = https://github.com/{o}/{r}/blob/{PUT .commit.sha}/{path} + (commit-pinned, NOT the branch-floating .content.html_url). + - Write #2 (best-effort, failure never fails the run): + reports//latest.md with only runId, score line, permalink. + Exactly <= 2 writes per run; results.json is NEVER committed. + +Usage: + GITHUB_TOKEN=... GITHUB_REPORT_REPO=owner/repo \ + python3 gh_post.py + +stdout payload is exactly two lines (everything else goes to stderr): + permalink: + content-sha: + +Exit codes: 0 on success (even if the best-effort latest.md write failed), +non-zero when the report could not be committed (report stays local; `post` +is re-runnable). + +Stdlib only: argparse/base64/json/os/re/sys/time/urllib. +""" + +import argparse +import base64 +import json +import os +import re +import sys +import time +import urllib.error +import urllib.parse +import urllib.request + +# Shared normalization with the probe — same skill scripts/ dir. +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) +from smoke import USER_AGENT, host_slug # noqa: E402 + +API_BASE = os.environ.get("GITHUB_API_BASE", "https://api.github.com").rstrip("/") +REPO_RE = re.compile(r"^[A-Za-z0-9_.-]+/[A-Za-z0-9_.-]+$") +POST_BUDGET_SECONDS = 25 +MAX_SLEEP_SECONDS = 30 + + +class PostError(Exception): + """Operational posting failure. Messages are pre-redacted.""" + + +class _NoRedirect(urllib.request.HTTPRedirectHandler): + """Never follow redirects: the default handler re-sends every request + header — including Authorization — to the redirect target, even + cross-host, which would leak the token (mirrors smoke.py's _NoRedirect; + here the request carries the only credential). A 3xx comes back as the + final status and the retry ladder treats it as a hard failure.""" + + def redirect_request(self, req, fp, code, msg, headers, newurl): + return None + + +_OPENER = urllib.request.build_opener(_NoRedirect) + + +def _log(msg): + print(msg, file=sys.stderr) + + +def _redact(text, token): + text = str(text) + return text.replace(token, "[REDACTED]") if token else text + + +def _remaining(deadline): + return deadline - time.monotonic() + + +def _check_deadline(deadline): + if _remaining(deadline) <= 0: + raise PostError( + "post budget (%ds) exhausted; report remains local — re-run `post `" + % POST_BUDGET_SECONDS + ) + + +def _sleep_within(seconds, deadline): + seconds = min(seconds, MAX_SLEEP_SECONDS) + if seconds >= _remaining(deadline): + raise PostError( + "post budget (%ds) exhausted while backing off; report remains local — " + "re-run `post `" % POST_BUDGET_SECONDS + ) + time.sleep(seconds) + + +def _retry_after_seconds(headers): + """Retry-After seconds, falling back to the x-ratelimit-reset delta.""" + raw = headers.get("Retry-After") or headers.get("retry-after") + if raw: + try: + return max(1, int(float(raw))) + except ValueError: + pass + reset = headers.get("x-ratelimit-reset") or headers.get("X-RateLimit-Reset") + if reset: + try: + return max(1, int(float(reset)) - int(time.time())) + except ValueError: + pass + return 2 + + +def _gh_request(method, url, token, payload=None, deadline=None): + """One GitHub API call. Returns (status, headers_dict, body_bytes). + status == 0 means no HTTP response (connection-level failure); the body + then carries a redacted reason. Never raises, never logs headers.""" + headers = { + "Authorization": "Bearer " + token, + "Accept": "application/vnd.github+json", + "X-GitHub-Api-Version": "2022-11-28", + "User-Agent": USER_AGENT, + } + data = None + if payload is not None: + data = json.dumps(payload).encode("utf-8") + headers["Content-Type"] = "application/json" + timeout = 10.0 + if deadline is not None: + timeout = max(1.0, min(10.0, _remaining(deadline))) + req = urllib.request.Request(url, data=data, method=method, headers=headers) + try: + with _OPENER.open(req, timeout=timeout) as resp: + return resp.getcode(), dict(resp.headers), resp.read() + except urllib.error.HTTPError as exc: + try: + body = exc.read() + except Exception: + body = b"" + return exc.code, dict(exc.headers or {}), body + except Exception as exc: # URLError, timeout, ConnectionError, ... + return 0, {}, _redact(exc, token).encode("utf-8") + + +def _body_snippet(body, token): + return _redact(body.decode("utf-8", "replace")[:200], token) + + +def _contents_url(owner_repo, path): + return "%s/repos/%s/contents/%s" % (API_BASE, owner_repo, urllib.parse.quote(path, safe="/")) + + +def _get_existing_sha(owner_repo, path, token, deadline): + """Existing blob sha for create-or-update. 200 -> sha, 404 -> None, + anything else retried once then abort.""" + url = _contents_url(owner_repo, path) + for attempt in (1, 2): + _check_deadline(deadline) + status, _, body = _gh_request("GET", url, token, deadline=deadline) + if status == 200: + try: + return json.loads(body).get("sha") or None + except ValueError: + return None + if status == 404: + return None + if attempt == 1: + continue + raise PostError( + "GET contents %s failed (status %s): %s" % (path, status, _body_snippet(body, token)) + ) + + +def _put_file(owner_repo, path, message, content_bytes, sha, token, deadline): + """PUT one file via the contents API with the contract's retry ladder. + Returns the parsed PUT response JSON.""" + url = _contents_url(owner_repo, path) + body = {"message": message, "content": base64.b64encode(content_bytes).decode("ascii")} + if sha: + body["sha"] = sha + rate_retries = 0 + server_retries = 0 + conflict_retried = False + while True: + _check_deadline(deadline) + status, headers, raw = _gh_request("PUT", url, token, payload=body, deadline=deadline) + if status in (200, 201): + try: + return json.loads(raw) + except ValueError: + raise PostError("PUT %s returned %d but unparseable JSON" % (path, status)) + if status == 409 and not conflict_retried: + conflict_retried = True + new_sha = _get_existing_sha(owner_repo, path, token, deadline) + if new_sha: + body["sha"] = new_sha + else: + body.pop("sha", None) + continue + if status in (403, 429) and rate_retries < 2: + rate_retries += 1 + _sleep_within(_retry_after_seconds(headers), deadline) + continue + if (status >= 500 or status == 0) and server_retries < 2: + server_retries += 1 + _sleep_within(2 ** server_retries, deadline) # 2s, then 4s + continue + raise PostError( + "PUT %s failed (status %s): %s" % (path, status, _body_snippet(raw, token)) + ) + + +def post_run(run_dir): + """Commit /report.md per the contract, update results.json with + the commit-pinned permalink, best-effort update latest.md. + Returns (results_dict, permalink, content_sha). Raises PostError on + operational failure (report stays local; re-runnable).""" + token = os.environ.get("GITHUB_TOKEN", "").strip() + owner_repo = os.environ.get("GITHUB_REPORT_REPO", "").strip() + if not token: + raise PostError("GITHUB_TOKEN is not set (provision it via the hermes-env Secret)") + if not REPO_RE.match(owner_repo): + raise PostError( + "GITHUB_REPORT_REPO=%r is not / " + "(^[A-Za-z0-9_.-]+/[A-Za-z0-9_.-]+$)" % owner_repo + ) + + results_path = os.path.join(run_dir, "results.json") + report_path = os.path.join(run_dir, "report.md") + try: + with open(results_path, "r", encoding="utf-8") as fh: + results = json.load(fh) + with open(report_path, "rb") as fh: + report_bytes = fh.read() + except (OSError, ValueError) as exc: + raise PostError("cannot load run dir %s: %s" % (run_dir, _redact(exc, token))) + + run_id = str(results.get("runId", "")).strip() + target = str(results.get("target", "")).strip() + if not run_id or not target: + raise PostError("results.json missing runId/target — re-run the probe") + passed = int(results.get("passed", 0)) + total = int(results.get("total", 0)) + score100 = int(results.get("score100", 0)) + + target_host = host_slug(target) + report_repo_path = "reports/%s/%s.md" % (target_host, run_id) + deadline = time.monotonic() + POST_BUDGET_SECONDS + + # Write #1 — the report itself (create-or-update). + _log("posting %s to %s:%s" % (report_path, owner_repo, report_repo_path)) + sha = _get_existing_sha(owner_repo, report_repo_path, token, deadline) + put = _put_file( + owner_repo, + report_repo_path, + "smoke: %s %s %d/%d" % (target_host, run_id, passed, total), + report_bytes, + sha, + token, + deadline, + ) + try: + commit_sha = put["commit"]["sha"] + content_sha = put["content"]["sha"] + except (KeyError, TypeError): + raise PostError("PUT response missing commit/content sha") + + permalink = "https://github.com/%s/blob/%s/%s" % (owner_repo, commit_sha, report_repo_path) + results["permalink"] = permalink + try: + with open(results_path, "w", encoding="utf-8") as fh: + fh.write(json.dumps(results, indent=2) + "\n") + except OSError as exc: + _log("warning: could not update results.json: %s" % _redact(exc, token)) + + # Write #2 — best-effort latest.md pointer; failure does NOT fail the run. + latest_repo_path = "reports/%s/latest.md" % target_host + latest_bytes = ( + "Run ID: %s\nResult: %d/%d checks passed — score %d/100\nReport: %s\n" + % (run_id, passed, total, score100, permalink) + ).encode("utf-8") + try: + latest_sha = _get_existing_sha(owner_repo, latest_repo_path, token, deadline) + _put_file( + owner_repo, + latest_repo_path, + "smoke: %s latest %s" % (target_host, run_id), + latest_bytes, + latest_sha, + token, + deadline, + ) + except PostError as exc: + _log("warning: latest.md update skipped: %s" % exc) + + return results, permalink, content_sha + + +def main(argv=None): + parser = argparse.ArgumentParser( + prog="gh_post.py", + description="Commit a smoke run's report.md to the seller-owned public report repo.", + ) + parser.add_argument("run_dir", help="run dir written by smoke.py probe, e.g. ./smoke//") + args = parser.parse_args(argv) + try: + _, permalink, content_sha = post_run(args.run_dir) + except PostError as exc: + _log("error: %s" % exc) + _log("report remains local; re-run: python3 gh_post.py %s" % args.run_dir) + return 1 + # The ONLY stdout payload lines: + print("permalink: %s" % permalink) + print("content-sha: %s" % content_sha) + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/internal/embed/skills/smoke-test/scripts/smoke.py b/internal/embed/skills/smoke-test/scripts/smoke.py new file mode 100644 index 00000000..c476db68 --- /dev/null +++ b/internal/embed/skills/smoke-test/scripts/smoke.py @@ -0,0 +1,523 @@ +#!/usr/bin/env python3 +"""smoke.py — read-only smoke probe of an Obol Stack public surface. + +Probes a TARGET base URL's public discovery + payment-gating surface with +plain GETs, writes a markdown report + machine-readable results, and (via +`post` / gh_post.py) commits the report to a seller-owned public GitHub repo. + +Safety contract (non-negotiable): + - GET only. NEVER sends an X-PAYMENT header, never signs anything, never + settles anything, never submits chain transactions. + - Never probes a cross-host URL: catalog endpoints are reduced to their + PATH and re-joined onto the target base URL. + - Response bodies capped at 1 MiB; per-check timeout 8s; one retry on + connection-level errors only (refused/reset — fast failures), never on + timeouts or HTTP errors. + - Redirects are not followed (a 3xx counts as the final status). + +Checks (counted unless marked informational): + 1. skill-md GET /skill.md -> 200 + non-empty body + 2. services-json GET /api/services.json -> 200 + bare JSON LIST of + objects with non-empty string `name` and `endpoint` + 3. x402-402: per advertised service (first 5 sorted by name): + GET -> 402 + valid x402 body + (x402Version present; accepts non-empty; each entry has + scheme/network non-empty, payTo/asset 0x40-hex, and a + positive digits-only maxAmountRequired OR amount) + 4. agent-registration GET /.well-known/agent-registration.json + -> 200 + JSON object (INFORMATIONAL — excluded from score) + +Scoring: passed/total over counted checks only (total >= 2). + score255 = floor(255*passed/total) (off-chain, task-spec field) + score100 = floor(100*passed/total) (THE on-chain value — registry caps at 100) + +Usage: + python3 smoke.py probe [--run-id ] [--out-dir ] + python3 smoke.py post + python3 smoke.py run [--run-id ] [--out-dir ] + +`probe` performs NO network writes; it writes report.md + results.json under +/// (default ./smoke/...) and prints results.json +to stdout. `post` commits an existing report to GitHub (env GITHUB_TOKEN + +GITHUB_REPORT_REPO required) and prints the updated results.json. `run` is +probe+post one-shot for host/manual use; it degrades to probe-only when the +GitHub env is absent. + +Exit codes: 0 even when checks fail (the score carries the verdict); +non-zero only on operational errors (bad args, unwritable workspace, +GitHub post failure). + +Stdlib only: argparse/hashlib/json/re/secrets/socket/time/urllib. +""" + +import argparse +import hashlib +import json +import os +import re +import secrets +import socket +import sys +import time +import urllib.error +import urllib.parse +import urllib.request + +# --------------------------------------------------------------------------- +# Constants +# --------------------------------------------------------------------------- + +VERSION = "obol/smoke-test/v1" + +# Same Cloudflare-WAF-safe UA convention as buy-x402's buy.py. +USER_AGENT = "obol-smoke-test/1.0 (+https://github.com/ObolNetwork/obol-stack)" + +PER_CHECK_TIMEOUT = 8 # seconds, per attempt +MAX_BODY_BYTES = 1024 * 1024 # 1 MiB body cap on every response +MAX_SERVICES = 5 # probe at most the first 5 services sorted by name + +ADDR_RE = re.compile(r"^0x[0-9a-fA-F]{40}$") +DIGITS_RE = re.compile(r"^[0-9]+$") +RUN_ID_RE = re.compile(r"^[A-Za-z0-9._-]+$") +MAX_DETAIL_LEN = 200 + + +def log(msg): + """Diagnostics go to stderr; stdout is reserved for results.json.""" + print(msg, file=sys.stderr) + + +# --------------------------------------------------------------------------- +# Normalization (MUST stay in lockstep with Go: erc8004 normalizeSmokeTarget) +# --------------------------------------------------------------------------- + +def normalize_target(url): + """Identical to the Go-side normalization: strip whitespace, then strip + trailing slashes. The normalized form is what `obol smoke calldata` + hashes into the ERC-8004 requestHash preimage + ("obol/smoke-test/v1||") — this script never computes + keccak256 (no reliable in-pod keccak; hashlib.sha3_256 is NIST SHA-3, + NOT keccak256).""" + return url.strip().rstrip("/") + + +def host_slug(target): + """Lowercase hostname with ":" rewritten to "-", e.g. + obol.stack:8080 -> obol.stack-8080. Used for the local run dir AND the + GitHub report path (gh_post.py imports this — keep behavior stable).""" + netloc = urllib.parse.urlsplit(target).netloc + host = netloc.rsplit("@", 1)[-1].lower() + return re.sub(r"[^a-z0-9._-]", "-", host) + + +def default_run_id(): + """-<6 lowercase hex>.""" + return time.strftime("%Y%m%dT%H%M%SZ", time.gmtime()) + "-" + secrets.token_hex(3) + + +# --------------------------------------------------------------------------- +# HTTP (GET only — by construction this module cannot send a payment) +# --------------------------------------------------------------------------- + +class _NoRedirect(urllib.request.HTTPRedirectHandler): + """Never follow redirects: a redirect could send the probe cross-host. + A 3xx is returned as the final status of the check.""" + + def redirect_request(self, req, fp, code, msg, headers, newurl): + return None + + +_OPENER = urllib.request.build_opener(_NoRedirect) + + +def _fetch(url): + """GET url. Returns (status, body_bytes, error_str). Never raises. + + status == 0 with a non-empty error_str means no HTTP response at all. + One retry on connection-level errors only (ConnectionError — refused / + reset fail fast); timeouts and DNS failures are NOT retried so the + worst-case probe budget stays bounded under the agent's 80s terminal + timeout.""" + req = urllib.request.Request( + url, + method="GET", + headers={"User-Agent": USER_AGENT, "Accept": "*/*"}, + ) + attempt = 0 + while True: + attempt += 1 + try: + with _OPENER.open(req, timeout=PER_CHECK_TIMEOUT) as resp: + return resp.getcode(), resp.read(MAX_BODY_BYTES), "" + except urllib.error.HTTPError as exc: + try: + body = exc.read(MAX_BODY_BYTES) + except Exception: + body = b"" + return exc.code, body, "" + except urllib.error.URLError as exc: + reason = getattr(exc, "reason", exc) + if isinstance(reason, ConnectionError) and attempt == 1: + time.sleep(1) + continue + if isinstance(reason, (socket.timeout, TimeoutError)): + return 0, b"", "timeout after %ds" % PER_CHECK_TIMEOUT + return 0, b"", "connection failed: %s" % reason + except (socket.timeout, TimeoutError): + return 0, b"", "timeout after %ds" % PER_CHECK_TIMEOUT + except ConnectionError as exc: + if attempt == 1: + time.sleep(1) + continue + return 0, b"", "connection failed: %s" % exc + except OSError as exc: + return 0, b"", "network error: %s" % exc + + +def _clip(detail): + detail = str(detail) + if len(detail) > MAX_DETAIL_LEN: + detail = detail[: MAX_DETAIL_LEN - 1] + "…" + return detail + + +def _check(name, ok, detail, ms, informational=False): + entry = {"name": name, "ok": bool(ok), "detail": _clip(detail), "ms": int(ms)} + if informational: + entry["informational"] = True + return entry + + +def _timed(name, fn, informational=False): + """ms = wall-clock per check (includes the single connection-error retry).""" + t0 = time.monotonic() + ok, detail, extra = fn() + ms = round((time.monotonic() - t0) * 1000) + return _check(name, ok, detail, ms, informational=informational), extra + + +# --------------------------------------------------------------------------- +# Checks +# --------------------------------------------------------------------------- + +def check_skill_md(target): + def run(): + status, body, err = _fetch(target + "/skill.md") + if err: + return False, err, None + if status != 200: + return False, "expected 200, got %d" % status, None + if not body.decode("utf-8", "replace").strip(): + return False, "200 but body empty after strip", None + return True, "200, %d bytes" % len(body), None + + return _timed("skill-md", run)[0] + + +def check_services_json(target): + """Returns (check, services). services is the validated advertised list + (possibly empty) when the check passed, else [].""" + + def run(): + status, body, err = _fetch(target + "/api/services.json") + if err: + return False, err, [] + if status != 200: + return False, "expected 200, got %d" % status, [] + try: + parsed = json.loads(body.decode("utf-8", "replace")) + except ValueError as exc: + return False, "invalid JSON: %s" % exc, [] + # The catalog is a BARE JSON array of entries — not {"services": [...]}. + if not isinstance(parsed, list): + return False, "top-level JSON is not a list", [] + for i, entry in enumerate(parsed): + if not isinstance(entry, dict): + return False, "entry %d is not an object" % i, [] + name = entry.get("name") + endpoint = entry.get("endpoint") + if not isinstance(name, str) or not name.strip(): + return False, "entry %d missing non-empty string `name`" % i, [] + if not isinstance(endpoint, str) or not endpoint.strip(): + return False, "entry %d (%s) missing non-empty string `endpoint`" % (i, name), [] + return True, "200, %d service(s) advertised" % len(parsed), parsed + + return _timed("services-json", run) + + +def _validate_accepts_entry(entry, idx): + """Returns failure reason or '' for one entry of the 402 `accepts` list. + Amount uses the same v1/v2 dual lookup as buy.py: maxAmountRequired + falling back to amount.""" + if not isinstance(entry, dict): + return "accepts[%d] is not an object" % idx + for field in ("scheme", "network"): + value = entry.get(field) + if not isinstance(value, str) or not value.strip(): + return "accepts[%d].%s missing or empty" % (idx, field) + for field in ("payTo", "asset"): + value = entry.get(field) + if not isinstance(value, str) or not ADDR_RE.match(value): + return "accepts[%d].%s is not a 0x..40-hex address" % (idx, field) + raw = entry.get("maxAmountRequired") + if raw is None or not str(raw).strip(): + raw = entry.get("amount") + amount = str(raw if raw is not None else "").strip() + if not DIGITS_RE.match(amount) or int(amount) <= 0: + return "accepts[%d] has no positive digits-only maxAmountRequired/amount" % idx + return "" + + +def check_service_402(target, service): + """One counted check per advertised service. Probes ONLY the path of the + catalog endpoint joined onto the target base URL — never a cross-host URL + the catalog hands us.""" + name = service["name"].strip() + + def run(): + path = urllib.parse.urlsplit(service["endpoint"].strip()).path + if not path.startswith("/"): + path = "/" + path + status, body, err = _fetch(target + path) + if err: + return False, err, None + if status != 402: + return False, "expected 402, got %d" % status, None + try: + parsed = json.loads(body.decode("utf-8", "replace")) + except ValueError as exc: + return False, "402 body is not JSON: %s" % exc, None + if not isinstance(parsed, dict): + return False, "402 body is not a JSON object", None + if "x402Version" not in parsed: + return False, "402 body missing x402Version", None + accepts = parsed.get("accepts") + if not isinstance(accepts, list) or not accepts: + return False, "402 body has no non-empty accepts list", None + for i, entry in enumerate(accepts): + reason = _validate_accepts_entry(entry, i) + if reason: + return False, reason, None + return True, "402, %d payment option(s)" % len(accepts), None + + return _timed("x402-402:" + name, run)[0] + + +def check_agent_registration(target): + """INFORMATIONAL — recorded but excluded from passed/total/score.""" + + def run(): + status, body, err = _fetch(target + "/.well-known/agent-registration.json") + if err: + return False, err, None + if status != 200: + return False, "expected 200, got %d" % status, None + try: + parsed = json.loads(body.decode("utf-8", "replace")) + except ValueError as exc: + return False, "invalid JSON: %s" % exc, None + if not isinstance(parsed, dict): + return False, "200 but body is not a JSON object", None + return True, "200, JSON object", None + + return _timed("agent-registration", run, informational=True)[0] + + +# --------------------------------------------------------------------------- +# Report rendering +# --------------------------------------------------------------------------- + +def _md_cell(text): + return str(text).replace("|", "\\|").replace("\n", " ").replace("\r", " ") + + +def build_report(results, probed_count, advertised_count): + lines = [ + "# Obol Stack Smoke Report", + "", + "- Target: %s" % results["target"], + "- Run ID: %s" % results["runId"], + "- Timestamp: %s" % results["timestamp"], + "- Result: %d/%d checks passed — score %d/100" + % (results["passed"], results["total"], results["score100"]), + "", + "| Check | OK | Latency (ms) | Detail |", + "|---|---|---|---|", + ] + for check in results["checks"]: + name = check["name"] + if check.get("informational"): + name += " (info)" + lines.append( + "| %s | %s | %d | %s |" + % (_md_cell(name), "yes" if check["ok"] else "no", check["ms"], _md_cell(check["detail"])) + ) + if advertised_count > probed_count: + lines.append("") + lines.append("Probed %d of %d advertised services" % (probed_count, advertised_count)) + return "\n".join(lines) + "\n" + + +# --------------------------------------------------------------------------- +# Probe driver +# --------------------------------------------------------------------------- + +def run_probe(target_raw, run_id, out_base): + target = normalize_target(target_raw) + if not target.startswith(("http://", "https://")): + raise SystemExit( + "error: target must be an absolute http(s) URL (got %r) — the " + "normalized target is hashed into the on-chain requestHash, so " + "always pass the scheme explicitly" % target_raw + ) + if run_id is None or not str(run_id).strip(): + run_id = default_run_id() + run_id = str(run_id).strip() + if not RUN_ID_RE.match(run_id) or set(run_id) == {"."}: + # A buyer can suggest the run id; "." / ".." would escape the + # per-run directory under the report root. + raise SystemExit("error: --run-id must match ^[A-Za-z0-9._-]+$ and not be dots-only (got %r)" % run_id) + + log("smoke probe: target=%s run-id=%s" % (target, run_id)) + + checks = [] + checks.append(check_skill_md(target)) + log(" [%s] skill-md: %s" % ("ok" if checks[-1]["ok"] else "FAIL", checks[-1]["detail"])) + + services_check, services = check_services_json(target) + checks.append(services_check) + log(" [%s] services-json: %s" % ("ok" if services_check["ok"] else "FAIL", services_check["detail"])) + + advertised = len(services) if services_check["ok"] else 0 + probed = 0 + if services_check["ok"] and services: + for service in sorted(services, key=lambda s: s["name"])[:MAX_SERVICES]: + check = check_service_402(target, service) + checks.append(check) + probed += 1 + log(" [%s] %s: %s" % ("ok" if check["ok"] else "FAIL", check["name"], check["detail"])) + + info = check_agent_registration(target) + checks.append(info) + log(" [%s] agent-registration (info): %s" % ("ok" if info["ok"] else "FAIL", info["detail"])) + + counted = [c for c in checks if not c.get("informational")] + passed = sum(1 for c in counted if c["ok"]) + total = len(counted) # always >= 2 (skill-md + services-json) + + results = { + "version": VERSION, + "target": target, + "runId": run_id, + "timestamp": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()), + "checks": checks, + "passed": passed, + "total": total, + "score255": (255 * passed) // total, + "score100": (100 * passed) // total, + "reportSha256": "", + "permalink": "", + } + + run_dir = os.path.join(out_base, host_slug(target), run_id) + os.makedirs(run_dir, exist_ok=True) + + report = build_report(results, probed, advertised) + report_bytes = report.encode("utf-8") + report_path = os.path.join(run_dir, "report.md") + with open(report_path, "wb") as fh: + fh.write(report_bytes) + # reportSha256 = sha256 over the EXACT bytes written to disk (the same + # bytes gh_post.py base64s into the GitHub PUT). Computed after the final + # report write, before results.json. + results["reportSha256"] = hashlib.sha256(report_bytes).hexdigest() + + with open(os.path.join(run_dir, "results.json"), "w", encoding="utf-8") as fh: + fh.write(json.dumps(results, indent=2) + "\n") + + log("run dir: %s" % os.path.abspath(run_dir)) + return results, run_dir + + +# --------------------------------------------------------------------------- +# Commands +# --------------------------------------------------------------------------- + +def _load_gh_post(): + sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) + import gh_post # noqa: E402 (sibling script in this skill) + + return gh_post + + +def cmd_probe(args): + results, _ = run_probe(args.target, args.run_id, args.out_dir) + print(json.dumps(results, indent=2)) + return 0 + + +def cmd_post(args): + gh_post = _load_gh_post() + try: + results, _, _ = gh_post.post_run(args.run_dir) + except gh_post.PostError as exc: + log("error: %s" % exc) + log("report remains local; re-run: python3 smoke.py post %s" % args.run_dir) + return 1 + print(json.dumps(results, indent=2)) + return 0 + + +def cmd_run(args): + results, run_dir = run_probe(args.target, args.run_id, args.out_dir) + if os.environ.get("GITHUB_TOKEN", "").strip() and os.environ.get("GITHUB_REPORT_REPO", "").strip(): + gh_post = _load_gh_post() + try: + results, _, _ = gh_post.post_run(run_dir) + except gh_post.PostError as exc: + print(json.dumps(results, indent=2)) + log("error: %s" % exc) + log("report remains local; re-run: python3 smoke.py post %s" % run_dir) + return 1 + else: + log("GITHUB_TOKEN/GITHUB_REPORT_REPO not set; report kept local (no GitHub post)") + print(json.dumps(results, indent=2)) + return 0 + + +def main(argv=None): + parser = argparse.ArgumentParser( + prog="smoke.py", + description="Read-only smoke probe of an Obol Stack public surface (never pays, never signs).", + ) + sub = parser.add_subparsers(dest="command", required=True) + + p_probe = sub.add_parser("probe", help="probe checks only; writes report.md + results.json, no network writes") + p_probe.add_argument("target", nargs="?", help="target base URL, e.g. https:// or http://obol.stack:8080") + p_probe.add_argument("--target", dest="target_flag", help="alternative to the positional target") + p_probe.add_argument("--run-id", help="run identifier (^[A-Za-z0-9._-]+$); default -<6hex>") + p_probe.add_argument("--out-dir", default="./smoke", help="base output dir (default ./smoke)") + p_probe.set_defaults(func=cmd_probe) + + p_post = sub.add_parser("post", help="commit an existing run dir's report.md to GitHub (env GITHUB_TOKEN + GITHUB_REPORT_REPO)") + p_post.add_argument("run_dir", help="run dir written by probe, e.g. ./smoke//") + p_post.set_defaults(func=cmd_post) + + p_run = sub.add_parser("run", help="probe + post one-shot (host/manual use; agents should run probe then post)") + p_run.add_argument("target", nargs="?", help="target base URL") + p_run.add_argument("--target", dest="target_flag", help="alternative to the positional target") + p_run.add_argument("--run-id", help="run identifier") + p_run.add_argument("--out-dir", default="./smoke", help="base output dir (default ./smoke)") + p_run.set_defaults(func=cmd_run) + + args = parser.parse_args(argv) + if hasattr(args, "target"): + target = args.target or getattr(args, "target_flag", None) + if not target: + parser.error("a target base URL is required (positional or --target)") + args.target = target + return args.func(args) + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/internal/erc8004/smoke.go b/internal/erc8004/smoke.go new file mode 100644 index 00000000..9f50faf8 --- /dev/null +++ b/internal/erc8004/smoke.go @@ -0,0 +1,36 @@ +// Smoke-test ↔ ERC-8004 grounding: the smoke-test request hash binds an +// operator's on-chain validationResponse to one specific (target, run) pair, +// so a published smoke report (committed to a public GitHub repo) can be +// checked against a chain-anchored verdict entry. + +package erc8004 + +import ( + "strings" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/crypto" +) + +// smokeTestDomain is the versioned domain prefix for smoke-test request +// hashes. Changing it is a breaking change for every published verdict. +const smokeTestDomain = "obol/smoke-test/v1" + +// normalizeSmokeTarget canonicalizes the probed base URL exactly the way the +// in-pod smoke-test skill does (python `.strip().rstrip("/")`): surrounding +// whitespace and trailing slashes never change the request hash. +func normalizeSmokeTarget(u string) string { + return strings.TrimRight(strings.TrimSpace(u), "/") +} + +// SmokeTestRequestHash derives the ERC-8004 validation request hash for one +// smoke-test run: keccak256 of the exact ASCII bytes +// "obol/smoke-test/v1||". The CLI +// (`obol smoke calldata`, operator side) and any grounding consumer MUST +// compute this identically. The in-pod skill never computes it — there is no +// reliable keccak256 in the pod's python stdlib (hashlib.sha3_256 is NIST +// SHA-3, not keccak256) — it only echoes the normalized target into +// results.json. +func SmokeTestRequestHash(targetBaseURL, runID string) common.Hash { + return crypto.Keccak256Hash([]byte(smokeTestDomain + "|" + normalizeSmokeTarget(targetBaseURL) + "|" + strings.TrimSpace(runID))) +} diff --git a/internal/erc8004/smoke_test.go b/internal/erc8004/smoke_test.go new file mode 100644 index 00000000..49a63737 --- /dev/null +++ b/internal/erc8004/smoke_test.go @@ -0,0 +1,47 @@ +package erc8004 + +import "testing" + +// TestSmokeTestRequestHash_Golden pins the exact preimage layout +// ("obol/smoke-test/v1||"). The operator +// submits validationResponses against this hash via `obol smoke calldata` and +// grounding consumers match it on-chain — any drift silently breaks +// grounding, so the vector is hardcoded, not recomputed. +func TestSmokeTestRequestHash_Golden(t *testing.T) { + const ( + target = "http://obol.stack:8080" + runID = "20260101T000000Z-ab12cd" + golden = "0x2a28aa12a52a28414de4933bbe8d1e52e42828ba08006748f544596823ce7a57" + ) + + if got := SmokeTestRequestHash(target, runID).Hex(); got != golden { + t.Errorf("SmokeTestRequestHash = %s, want %s", got, golden) + } + + // The target is normalized exactly like the in-pod skill's + // `.strip().rstrip("/")`: trailing-slash and surrounding-whitespace + // variants of the same target MUST hash identically, and a padded runId + // is trimmed. + variants := []struct { + name, target, runID string + }{ + {"trailing slash", target + "/", runID}, + {"double trailing slash", target + "//", runID}, + {"surrounding whitespace", " " + target + " \n", runID}, + {"whitespace + slash", " " + target + "/ ", runID}, + {"padded runId", target, " " + runID + "\t"}, + } + for _, v := range variants { + if got := SmokeTestRequestHash(v.target, v.runID).Hex(); got != golden { + t.Errorf("%s: hash = %s, want %s (normalization must be hash-invariant)", v.name, got, golden) + } + } + + // Different target or runId must never collide with the golden pair. + if SmokeTestRequestHash("http://other.example:8080", runID).Hex() == golden { + t.Error("different target produced the golden hash") + } + if SmokeTestRequestHash(target, "20260101T000000Z-ffffff").Hex() == golden { + t.Error("different runId produced the golden hash") + } +} diff --git a/internal/erc8004/validation.go b/internal/erc8004/validation.go new file mode 100644 index 00000000..8aa46a8b --- /dev/null +++ b/internal/erc8004/validation.go @@ -0,0 +1,401 @@ +package erc8004 + +// ERC-8004 Validation Registry (v2.0.0) calldata builders and read helpers. +// +// IMPORTANT — signing model: the serviceoffer/servicebounty controller NEVER +// signs validation transactions. Poster agents submit validationRequest and +// evaluator agents submit validationResponse with THEIR OWN wallets; this +// package only builds calldata for them and reads/records results on-chain. +// +// Function signatures verified against: +// - Spec: https://eips.ethereum.org/EIPS/eip-8004 (Validation Registry) +// - Reference impl + official ABI: +// https://github.com/erc-8004/erc-8004-contracts +// (abis/ValidationRegistry.json, contracts/ValidationRegistryUpgradeable.sol, +// getVersion() == "2.0.0") +// +// validationRequest(address validatorAddress, uint256 agentId, string requestURI, bytes32 requestHash) +// validationResponse(bytes32 requestHash, uint8 response, string responseURI, bytes32 responseHash, string tag) +// getValidationStatus(bytes32 requestHash) -> (address, uint256, uint8, bytes32, string, uint256) +// getSummary(uint256 agentId, address[] validatorAddresses, string tag) -> (uint64 count, uint8 avgResponse) +// getAgentValidations(uint256 agentId) -> bytes32[] +// getValidatorRequests(address validatorAddress) -> bytes32[] + +import ( + "bytes" + "context" + _ "embed" + "fmt" + "math/big" + "strings" + "sync" + + "github.com/ethereum/go-ethereum/accounts/abi" + "github.com/ethereum/go-ethereum/accounts/abi/bind" + "github.com/ethereum/go-ethereum/common" +) + +//go:embed validation_registry.abi.json +var validationRegistryABI string + +const ( + // ValidationRegistryV2BaseSepolia is the ERC-8004 v2.0.0 Validation + // Registry on Base Sepolia (CREATE2 vanity proxy, same address on all + // supported testnets). + // + // NOTE: this intentionally differs from the legacy + // ValidationRegistryBaseSepolia constant in abi.go + // (0x8004CB39f29c09145F24Ad9dDe2A108C1A2cdfC5): that address has NO code + // on Base Sepolia — it is a v1.0.0 deployment that only exists on + // Ethereum Sepolia (verified via eth_getCode + getVersion(), 2026-06-10). + // Source: https://github.com/erc-8004/erc-8004-contracts + // (scripts/addresses.ts TESTNET_ADDRESSES.validationRegistry); on-chain: + // getVersion() == "2.0.0", getIdentityRegistry() == + // IdentityRegistryBaseSepolia. + ValidationRegistryV2BaseSepolia = "0x8004Cb1BF31DAf7788923b405b754f57acEB4272" + + // ValidationRegistryV2Mainnet is the ERC-8004 v2.0.0 Validation Registry + // on Ethereum mainnet and Base mainnet (deployed at the same address via + // CREATE2). Source: https://github.com/erc-8004/erc-8004-contracts + // (scripts/addresses.ts MAINNET_ADDRESSES.validationRegistry); on-chain: + // code present on both chains, getVersion() == "2.0.0", + // getIdentityRegistry() == IdentityRegistryMainnet. + ValidationRegistryV2Mainnet = "0x8004Cc8439f36fd5F9F049D9fF86523Df6dAAB58" + + // MaxValidationResponse is the maximum validationResponse score. The + // contract reverts with "resp>100" above this. + MaxValidationResponse = 100 +) + +var ( + validationABIOnce sync.Once + validationABIParsed abi.ABI + validationABIErr error +) + +// validationABI lazily parses the embedded Validation Registry ABI once. +func validationABI() (abi.ABI, error) { + validationABIOnce.Do(func() { + validationABIParsed, validationABIErr = abi.JSON(strings.NewReader(validationRegistryABI)) + }) + if validationABIErr != nil { + return abi.ABI{}, fmt.Errorf("erc8004: parse validation registry abi: %w", validationABIErr) + } + return validationABIParsed, nil +} + +// ValidationRegistryAddress maps a supported network name to the deployed +// ERC-8004 v2.0.0 Validation Registry address. It accepts the same aliases as +// ResolveNetwork. Networks without an on-chain-verified deployment return an +// error rather than a guessed address. +func ValidationRegistryAddress(network string) (string, error) { + net, err := ResolveNetwork(network) + if err != nil { + return "", fmt.Errorf("erc8004: validation registry: %w", err) + } + switch net.Name { + case BaseSepolia.Name: + return ValidationRegistryV2BaseSepolia, nil + case Base.Name, Ethereum.Name: + return ValidationRegistryV2Mainnet, nil + default: + return "", fmt.Errorf("erc8004: no verified validation registry deployment for network %q", net.Name) + } +} + +// checkAgentID rejects agent ids that cannot be ABI-encoded as uint256. +func checkAgentID(agentID *big.Int) error { + if agentID == nil { + return fmt.Errorf("erc8004: agentId must not be nil") + } + if agentID.Sign() < 0 { + return fmt.Errorf("erc8004: agentId must not be negative (got %s)", agentID) + } + if agentID.BitLen() > 256 { + return fmt.Errorf("erc8004: agentId does not fit in uint256") + } + return nil +} + +// unpackCalldata verifies the 4-byte selector against the named method and +// unpacks the argument payload. +func unpackCalldata(parsed abi.ABI, name string, data []byte) ([]interface{}, error) { + method, ok := parsed.Methods[name] + if !ok { + return nil, fmt.Errorf("erc8004: method %q not in ABI", name) + } + if len(data) < 4 { + return nil, fmt.Errorf("erc8004: calldata too short (%d bytes, need at least 4)", len(data)) + } + if !bytes.Equal(data[:4], method.ID) { + return nil, fmt.Errorf("erc8004: selector mismatch: got 0x%x, want 0x%x (%s)", data[:4], method.ID, method.Sig) + } + values, err := method.Inputs.Unpack(data[4:]) + if err != nil { + return nil, fmt.Errorf("erc8004: unpack %s calldata: %w", name, err) + } + return values, nil +} + +// EncodeValidationRequest builds calldata for +// validationRequest(address,uint256,string,bytes32). The transaction must be +// submitted by the owner or an approved operator of agentId (the poster +// agent's own wallet) — never by the controller. +func EncodeValidationRequest(validatorAddress common.Address, agentID *big.Int, requestURI string, requestHash common.Hash) ([]byte, error) { + if validatorAddress == (common.Address{}) { + return nil, fmt.Errorf("erc8004: validatorAddress must not be the zero address") + } + if err := checkAgentID(agentID); err != nil { + return nil, err + } + if requestHash == (common.Hash{}) { + return nil, fmt.Errorf("erc8004: requestHash must not be the zero hash") + } + + parsed, err := validationABI() + if err != nil { + return nil, err + } + data, err := parsed.Pack("validationRequest", validatorAddress, agentID, requestURI, requestHash) + if err != nil { + return nil, fmt.Errorf("erc8004: pack validationRequest: %w", err) + } + return data, nil +} + +// EncodeValidationResponse builds calldata for +// validationResponse(bytes32,uint8,string,bytes32,string). response is the +// 0-100 score; the transaction must be submitted by the validator address +// named in the matching validationRequest (the evaluator's own wallet) — +// never by the controller. responseURI, responseHash, and tag are optional +// per spec and may be zero values. +func EncodeValidationResponse(requestHash common.Hash, response uint8, responseURI string, responseHash common.Hash, tag string) ([]byte, error) { + if requestHash == (common.Hash{}) { + return nil, fmt.Errorf("erc8004: requestHash must not be the zero hash") + } + if response > MaxValidationResponse { + return nil, fmt.Errorf("erc8004: response %d out of range [0,%d]", response, MaxValidationResponse) + } + + parsed, err := validationABI() + if err != nil { + return nil, err + } + data, err := parsed.Pack("validationResponse", requestHash, response, responseURI, responseHash, tag) + if err != nil { + return nil, fmt.Errorf("erc8004: pack validationResponse: %w", err) + } + return data, nil +} + +// ValidationRequestCall is the decoded argument set of a validationRequest call. +type ValidationRequestCall struct { + ValidatorAddress common.Address + AgentID *big.Int + RequestURI string + RequestHash common.Hash +} + +// DecodeValidationRequestCalldata decodes validationRequest calldata +// (selector + ABI-encoded args). Useful for provenance checks on observed +// transactions and for tests. +func DecodeValidationRequestCalldata(data []byte) (ValidationRequestCall, error) { + parsed, err := validationABI() + if err != nil { + return ValidationRequestCall{}, err + } + values, err := unpackCalldata(parsed, "validationRequest", data) + if err != nil { + return ValidationRequestCall{}, err + } + if len(values) != 4 { + return ValidationRequestCall{}, fmt.Errorf("erc8004: validationRequest arg count = %d, want 4", len(values)) + } + + out := ValidationRequestCall{} + var ok bool + if out.ValidatorAddress, ok = values[0].(common.Address); !ok { + return ValidationRequestCall{}, fmt.Errorf("erc8004: validatorAddress type = %T", values[0]) + } + if out.AgentID, ok = values[1].(*big.Int); !ok { + return ValidationRequestCall{}, fmt.Errorf("erc8004: agentId type = %T", values[1]) + } + if out.RequestURI, ok = values[2].(string); !ok { + return ValidationRequestCall{}, fmt.Errorf("erc8004: requestURI type = %T", values[2]) + } + hash, ok := values[3].([32]byte) + if !ok { + return ValidationRequestCall{}, fmt.Errorf("erc8004: requestHash type = %T", values[3]) + } + out.RequestHash = common.Hash(hash) + return out, nil +} + +// ValidationResponseCall is the decoded argument set of a validationResponse call. +type ValidationResponseCall struct { + RequestHash common.Hash + Response uint8 + ResponseURI string + ResponseHash common.Hash + Tag string +} + +// DecodeValidationResponseCalldata decodes validationResponse calldata +// (selector + ABI-encoded args). Useful for provenance checks on observed +// evaluator transactions and for tests. +func DecodeValidationResponseCalldata(data []byte) (ValidationResponseCall, error) { + parsed, err := validationABI() + if err != nil { + return ValidationResponseCall{}, err + } + values, err := unpackCalldata(parsed, "validationResponse", data) + if err != nil { + return ValidationResponseCall{}, err + } + if len(values) != 5 { + return ValidationResponseCall{}, fmt.Errorf("erc8004: validationResponse arg count = %d, want 5", len(values)) + } + + out := ValidationResponseCall{} + reqHash, ok := values[0].([32]byte) + if !ok { + return ValidationResponseCall{}, fmt.Errorf("erc8004: requestHash type = %T", values[0]) + } + out.RequestHash = common.Hash(reqHash) + if out.Response, ok = values[1].(uint8); !ok { + return ValidationResponseCall{}, fmt.Errorf("erc8004: response type = %T", values[1]) + } + if out.ResponseURI, ok = values[2].(string); !ok { + return ValidationResponseCall{}, fmt.Errorf("erc8004: responseURI type = %T", values[2]) + } + respHash, ok := values[3].([32]byte) + if !ok { + return ValidationResponseCall{}, fmt.Errorf("erc8004: responseHash type = %T", values[3]) + } + out.ResponseHash = common.Hash(respHash) + if out.Tag, ok = values[4].(string); !ok { + return ValidationResponseCall{}, fmt.Errorf("erc8004: tag type = %T", values[4]) + } + return out, nil +} + +// ValidationStatus mirrors getValidationStatus(bytes32) return values. +type ValidationStatus struct { + ValidatorAddress common.Address + AgentID *big.Int + Response uint8 + ResponseHash common.Hash + Tag string + LastUpdate *big.Int +} + +// ValidationReader provides read-only access to a Validation Registry. The +// controller uses it to observe evaluator responses; it holds no signer. +type ValidationReader struct { + contract *bind.BoundContract +} + +// NewValidationReader binds a read-only Validation Registry at +// registryAddress. caller is typically (*erc8004.Client).ETH() or any +// *ethclient.Client. +func NewValidationReader(caller bind.ContractCaller, registryAddress string) (*ValidationReader, error) { + if caller == nil { + return nil, fmt.Errorf("erc8004: validation reader: caller must not be nil") + } + if !common.IsHexAddress(registryAddress) { + return nil, fmt.Errorf("erc8004: validation reader: invalid registry address %q", registryAddress) + } + parsed, err := validationABI() + if err != nil { + return nil, err + } + return &ValidationReader{ + contract: bind.NewBoundContract(common.HexToAddress(registryAddress), parsed, caller, nil, nil), + }, nil +} + +// ValidationStatus reads getValidationStatus(requestHash). +func (r *ValidationReader) ValidationStatus(ctx context.Context, requestHash common.Hash) (ValidationStatus, error) { + var out []interface{} + if err := r.contract.Call(&bind.CallOpts{Context: ctx}, &out, "getValidationStatus", requestHash); err != nil { + return ValidationStatus{}, fmt.Errorf("erc8004: getValidationStatus: %w", err) + } + if len(out) != 6 { + return ValidationStatus{}, fmt.Errorf("erc8004: getValidationStatus returned %d values, want 6", len(out)) + } + + status := ValidationStatus{} + var ok bool + if status.ValidatorAddress, ok = out[0].(common.Address); !ok { + return ValidationStatus{}, fmt.Errorf("erc8004: getValidationStatus validatorAddress type = %T", out[0]) + } + if status.AgentID, ok = out[1].(*big.Int); !ok { + return ValidationStatus{}, fmt.Errorf("erc8004: getValidationStatus agentId type = %T", out[1]) + } + if status.Response, ok = out[2].(uint8); !ok { + return ValidationStatus{}, fmt.Errorf("erc8004: getValidationStatus response type = %T", out[2]) + } + respHash, ok := out[3].([32]byte) + if !ok { + return ValidationStatus{}, fmt.Errorf("erc8004: getValidationStatus responseHash type = %T", out[3]) + } + status.ResponseHash = common.Hash(respHash) + if status.Tag, ok = out[4].(string); !ok { + return ValidationStatus{}, fmt.Errorf("erc8004: getValidationStatus tag type = %T", out[4]) + } + if status.LastUpdate, ok = out[5].(*big.Int); !ok { + return ValidationStatus{}, fmt.Errorf("erc8004: getValidationStatus lastUpdate type = %T", out[5]) + } + return status, nil +} + +// Summary reads getSummary(agentId, validatorAddresses, tag) and returns the +// response count and 0-100 average. +func (r *ValidationReader) Summary(ctx context.Context, agentID *big.Int, validatorAddresses []common.Address, tag string) (count uint64, avgResponse uint8, err error) { + if err := checkAgentID(agentID); err != nil { + return 0, 0, err + } + if validatorAddresses == nil { + validatorAddresses = []common.Address{} + } + var out []interface{} + if err := r.contract.Call(&bind.CallOpts{Context: ctx}, &out, "getSummary", agentID, validatorAddresses, tag); err != nil { + return 0, 0, fmt.Errorf("erc8004: validation getSummary: %w", err) + } + if len(out) != 2 { + return 0, 0, fmt.Errorf("erc8004: validation getSummary returned %d values, want 2", len(out)) + } + count, ok := out[0].(uint64) + if !ok { + return 0, 0, fmt.Errorf("erc8004: validation getSummary count type = %T", out[0]) + } + avgResponse, ok = out[1].(uint8) + if !ok { + return 0, 0, fmt.Errorf("erc8004: validation getSummary avgResponse type = %T", out[1]) + } + return count, avgResponse, nil +} + +// AgentValidations reads getAgentValidations(agentId) — all request hashes +// recorded for the agent. +func (r *ValidationReader) AgentValidations(ctx context.Context, agentID *big.Int) ([]common.Hash, error) { + if err := checkAgentID(agentID); err != nil { + return nil, err + } + var out []interface{} + if err := r.contract.Call(&bind.CallOpts{Context: ctx}, &out, "getAgentValidations", agentID); err != nil { + return nil, fmt.Errorf("erc8004: getAgentValidations: %w", err) + } + if len(out) != 1 { + return nil, fmt.Errorf("erc8004: getAgentValidations returned %d values, want 1", len(out)) + } + raw, ok := out[0].([][32]byte) + if !ok { + return nil, fmt.Errorf("erc8004: getAgentValidations type = %T", out[0]) + } + hashes := make([]common.Hash, len(raw)) + for i, h := range raw { + hashes[i] = common.Hash(h) + } + return hashes, nil +} diff --git a/internal/erc8004/validation_registry.abi.json b/internal/erc8004/validation_registry.abi.json new file mode 100644 index 00000000..a73a65bb --- /dev/null +++ b/internal/erc8004/validation_registry.abi.json @@ -0,0 +1,272 @@ +[ + { + "inputs": [ + { + "internalType": "address", + "name": "validatorAddress", + "type": "address" + }, + { + "internalType": "uint256", + "name": "agentId", + "type": "uint256" + }, + { + "internalType": "string", + "name": "requestURI", + "type": "string" + }, + { + "internalType": "bytes32", + "name": "requestHash", + "type": "bytes32" + } + ], + "name": "validationRequest", + "outputs": [], + "stateMutability": "nonpayable", + "type": "function" + }, + { + "inputs": [ + { + "internalType": "bytes32", + "name": "requestHash", + "type": "bytes32" + }, + { + "internalType": "uint8", + "name": "response", + "type": "uint8" + }, + { + "internalType": "string", + "name": "responseURI", + "type": "string" + }, + { + "internalType": "bytes32", + "name": "responseHash", + "type": "bytes32" + }, + { + "internalType": "string", + "name": "tag", + "type": "string" + } + ], + "name": "validationResponse", + "outputs": [], + "stateMutability": "nonpayable", + "type": "function" + }, + { + "inputs": [ + { + "internalType": "bytes32", + "name": "requestHash", + "type": "bytes32" + } + ], + "name": "getValidationStatus", + "outputs": [ + { + "internalType": "address", + "name": "validatorAddress", + "type": "address" + }, + { + "internalType": "uint256", + "name": "agentId", + "type": "uint256" + }, + { + "internalType": "uint8", + "name": "response", + "type": "uint8" + }, + { + "internalType": "bytes32", + "name": "responseHash", + "type": "bytes32" + }, + { + "internalType": "string", + "name": "tag", + "type": "string" + }, + { + "internalType": "uint256", + "name": "lastUpdate", + "type": "uint256" + } + ], + "stateMutability": "view", + "type": "function" + }, + { + "inputs": [ + { + "internalType": "uint256", + "name": "agentId", + "type": "uint256" + }, + { + "internalType": "address[]", + "name": "validatorAddresses", + "type": "address[]" + }, + { + "internalType": "string", + "name": "tag", + "type": "string" + } + ], + "name": "getSummary", + "outputs": [ + { + "internalType": "uint64", + "name": "count", + "type": "uint64" + }, + { + "internalType": "uint8", + "name": "avgResponse", + "type": "uint8" + } + ], + "stateMutability": "view", + "type": "function" + }, + { + "inputs": [ + { + "internalType": "uint256", + "name": "agentId", + "type": "uint256" + } + ], + "name": "getAgentValidations", + "outputs": [ + { + "internalType": "bytes32[]", + "name": "", + "type": "bytes32[]" + } + ], + "stateMutability": "view", + "type": "function" + }, + { + "inputs": [ + { + "internalType": "address", + "name": "validatorAddress", + "type": "address" + } + ], + "name": "getValidatorRequests", + "outputs": [ + { + "internalType": "bytes32[]", + "name": "", + "type": "bytes32[]" + } + ], + "stateMutability": "view", + "type": "function" + }, + { + "inputs": [], + "name": "getIdentityRegistry", + "outputs": [ + { + "internalType": "address", + "name": "", + "type": "address" + } + ], + "stateMutability": "view", + "type": "function" + }, + { + "anonymous": false, + "inputs": [ + { + "indexed": true, + "internalType": "address", + "name": "validatorAddress", + "type": "address" + }, + { + "indexed": true, + "internalType": "uint256", + "name": "agentId", + "type": "uint256" + }, + { + "indexed": false, + "internalType": "string", + "name": "requestURI", + "type": "string" + }, + { + "indexed": true, + "internalType": "bytes32", + "name": "requestHash", + "type": "bytes32" + } + ], + "name": "ValidationRequest", + "type": "event" + }, + { + "anonymous": false, + "inputs": [ + { + "indexed": true, + "internalType": "address", + "name": "validatorAddress", + "type": "address" + }, + { + "indexed": true, + "internalType": "uint256", + "name": "agentId", + "type": "uint256" + }, + { + "indexed": true, + "internalType": "bytes32", + "name": "requestHash", + "type": "bytes32" + }, + { + "indexed": false, + "internalType": "uint8", + "name": "response", + "type": "uint8" + }, + { + "indexed": false, + "internalType": "string", + "name": "responseURI", + "type": "string" + }, + { + "indexed": false, + "internalType": "bytes32", + "name": "responseHash", + "type": "bytes32" + }, + { + "indexed": false, + "internalType": "string", + "name": "tag", + "type": "string" + } + ], + "name": "ValidationResponse", + "type": "event" + } +] diff --git a/internal/erc8004/validation_test.go b/internal/erc8004/validation_test.go new file mode 100644 index 00000000..939bbf5a --- /dev/null +++ b/internal/erc8004/validation_test.go @@ -0,0 +1,404 @@ +package erc8004 + +import ( + "context" + "encoding/hex" + "math/big" + "strings" + "testing" + + ethereum "github.com/ethereum/go-ethereum" + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/crypto" +) + +// stubCaller is a bind.ContractCaller that returns canned ABI-encoded output. +// Shared by validation and reputation reader tests. Never hits the network. +type stubCaller struct { + ret []byte + err error + lastCall ethereum.CallMsg +} + +func (s *stubCaller) CodeAt(_ context.Context, _ common.Address, _ *big.Int) ([]byte, error) { + return []byte{0x01}, nil +} + +func (s *stubCaller) CallContract(_ context.Context, call ethereum.CallMsg, _ *big.Int) ([]byte, error) { + s.lastCall = call + return s.ret, s.err +} + +func TestValidationABI_Parses(t *testing.T) { + if _, err := validationABI(); err != nil { + t.Fatalf("embedded validation ABI failed to parse: %v", err) + } +} + +// TestValidationABI_SelectorGoldenValues pins the 4-byte selectors of the +// verified v2.0.0 signatures (spec: https://eips.ethereum.org/EIPS/eip-8004; +// ABI: https://github.com/erc-8004/erc-8004-contracts). Each golden value is +// cross-checked against keccak256 of the canonical signature string and the +// parsed ABI method. +func TestValidationABI_SelectorGoldenValues(t *testing.T) { + parsed, err := validationABI() + if err != nil { + t.Fatal(err) + } + + tests := []struct { + method string + sig string + selector string + }{ + {"validationRequest", "validationRequest(address,uint256,string,bytes32)", "aaf400c4"}, + {"validationResponse", "validationResponse(bytes32,uint8,string,bytes32,string)", "3d659a96"}, + {"getValidationStatus", "getValidationStatus(bytes32)", "ff2febfc"}, + {"getSummary", "getSummary(uint256,address[],string)", "1b7cabd6"}, + {"getAgentValidations", "getAgentValidations(uint256)", "8d5d0c2d"}, + {"getValidatorRequests", "getValidatorRequests(address)", "4bf3158c"}, + {"getIdentityRegistry", "getIdentityRegistry()", "bc4d861b"}, + } + + for _, tt := range tests { + t.Run(tt.method, func(t *testing.T) { + m, ok := parsed.Methods[tt.method] + if !ok { + t.Fatalf("method %q missing from parsed ABI", tt.method) + } + if m.Sig != tt.sig { + t.Errorf("signature = %q, want %q", m.Sig, tt.sig) + } + if got := hex.EncodeToString(m.ID); got != tt.selector { + t.Errorf("parsed selector = 0x%s, want 0x%s", got, tt.selector) + } + if got := hex.EncodeToString(crypto.Keccak256([]byte(tt.sig))[:4]); got != tt.selector { + t.Errorf("keccak256(%q)[:4] = 0x%s, want 0x%s", tt.sig, got, tt.selector) + } + }) + } +} + +func TestValidationABI_EventsPresent(t *testing.T) { + parsed, err := validationABI() + if err != nil { + t.Fatal(err) + } + for _, name := range []string{"ValidationRequest", "ValidationResponse"} { + if _, ok := parsed.Events[name]; !ok { + t.Errorf("missing event %q in parsed ABI", name) + } + } +} + +func TestEncodeValidationRequest_RoundTrip(t *testing.T) { + validator := common.HexToAddress("0x1111111111111111111111111111111111111111") + agentID := big.NewInt(42) + requestURI := "https://example.org/bounty/42/request.json" + requestHash := crypto.Keccak256Hash([]byte("request payload")) + + data, err := EncodeValidationRequest(validator, agentID, requestURI, requestHash) + if err != nil { + t.Fatalf("EncodeValidationRequest: %v", err) + } + if got := hex.EncodeToString(data[:4]); got != "aaf400c4" { + t.Errorf("selector = 0x%s, want 0xaaf400c4", got) + } + + decoded, err := DecodeValidationRequestCalldata(data) + if err != nil { + t.Fatalf("DecodeValidationRequestCalldata: %v", err) + } + if decoded.ValidatorAddress != validator { + t.Errorf("validatorAddress = %s, want %s", decoded.ValidatorAddress, validator) + } + if decoded.AgentID.Cmp(agentID) != 0 { + t.Errorf("agentId = %s, want %s", decoded.AgentID, agentID) + } + if decoded.RequestURI != requestURI { + t.Errorf("requestURI = %q, want %q", decoded.RequestURI, requestURI) + } + if decoded.RequestHash != requestHash { + t.Errorf("requestHash = %s, want %s", decoded.RequestHash, requestHash) + } +} + +func TestEncodeValidationResponse_RoundTrip(t *testing.T) { + requestHash := crypto.Keccak256Hash([]byte("request payload")) + responseHash := crypto.Keccak256Hash([]byte("evaluation artifact")) + + data, err := EncodeValidationResponse(requestHash, 87, "ipfs://bafy.../eval.json", responseHash, "code-review") + if err != nil { + t.Fatalf("EncodeValidationResponse: %v", err) + } + if got := hex.EncodeToString(data[:4]); got != "3d659a96" { + t.Errorf("selector = 0x%s, want 0x3d659a96", got) + } + + decoded, err := DecodeValidationResponseCalldata(data) + if err != nil { + t.Fatalf("DecodeValidationResponseCalldata: %v", err) + } + if decoded.RequestHash != requestHash { + t.Errorf("requestHash = %s, want %s", decoded.RequestHash, requestHash) + } + if decoded.Response != 87 { + t.Errorf("response = %d, want 87", decoded.Response) + } + if decoded.ResponseURI != "ipfs://bafy.../eval.json" { + t.Errorf("responseURI = %q", decoded.ResponseURI) + } + if decoded.ResponseHash != responseHash { + t.Errorf("responseHash = %s, want %s", decoded.ResponseHash, responseHash) + } + if decoded.Tag != "code-review" { + t.Errorf("tag = %q, want %q", decoded.Tag, "code-review") + } +} + +func TestEncodeValidationResponse_OptionalFieldsZero(t *testing.T) { + requestHash := crypto.Keccak256Hash([]byte("req")) + data, err := EncodeValidationResponse(requestHash, 0, "", common.Hash{}, "") + if err != nil { + t.Fatalf("EncodeValidationResponse with zero optionals: %v", err) + } + decoded, err := DecodeValidationResponseCalldata(data) + if err != nil { + t.Fatalf("decode: %v", err) + } + if decoded.Response != 0 || decoded.ResponseURI != "" || decoded.Tag != "" || decoded.ResponseHash != (common.Hash{}) { + t.Errorf("zero optionals did not round-trip: %+v", decoded) + } +} + +func TestEncodeValidationRequest_BadInput(t *testing.T) { + validator := common.HexToAddress("0x1111111111111111111111111111111111111111") + hash := crypto.Keccak256Hash([]byte("x")) + + tests := []struct { + name string + fn func() ([]byte, error) + }{ + {"zero validator", func() ([]byte, error) { + return EncodeValidationRequest(common.Address{}, big.NewInt(1), "u", hash) + }}, + {"nil agentId", func() ([]byte, error) { + return EncodeValidationRequest(validator, nil, "u", hash) + }}, + {"negative agentId", func() ([]byte, error) { + return EncodeValidationRequest(validator, big.NewInt(-1), "u", hash) + }}, + {"zero requestHash", func() ([]byte, error) { + return EncodeValidationRequest(validator, big.NewInt(1), "u", common.Hash{}) + }}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if _, err := tt.fn(); err == nil { + t.Error("expected error, got nil") + } + }) + } +} + +func TestEncodeValidationResponse_BadInput(t *testing.T) { + hash := crypto.Keccak256Hash([]byte("x")) + + if _, err := EncodeValidationResponse(common.Hash{}, 50, "", common.Hash{}, ""); err == nil { + t.Error("zero requestHash: expected error, got nil") + } + if _, err := EncodeValidationResponse(hash, 101, "", common.Hash{}, ""); err == nil { + t.Error("response 101: expected error, got nil") + } + if _, err := EncodeValidationResponse(hash, MaxValidationResponse, "", common.Hash{}, ""); err != nil { + t.Errorf("response 100 should be accepted: %v", err) + } +} + +func TestDecodeValidationCalldata_Errors(t *testing.T) { + t.Run("too short", func(t *testing.T) { + if _, err := DecodeValidationResponseCalldata([]byte{0x3d, 0x65}); err == nil { + t.Error("expected error for short calldata") + } + }) + + t.Run("wrong selector", func(t *testing.T) { + // validationRequest calldata fed to the validationResponse decoder. + data, err := EncodeValidationRequest( + common.HexToAddress("0x2222222222222222222222222222222222222222"), + big.NewInt(7), "u", crypto.Keccak256Hash([]byte("y"))) + if err != nil { + t.Fatal(err) + } + if _, err := DecodeValidationResponseCalldata(data); err == nil { + t.Error("expected selector mismatch error") + } else if !strings.Contains(err.Error(), "selector mismatch") { + t.Errorf("error = %v, want selector mismatch", err) + } + }) + + t.Run("truncated args", func(t *testing.T) { + data, err := EncodeValidationResponse(crypto.Keccak256Hash([]byte("z")), 10, "uri", common.Hash{}, "tag") + if err != nil { + t.Fatal(err) + } + if _, err := DecodeValidationResponseCalldata(data[:len(data)-40]); err == nil { + t.Error("expected error for truncated calldata") + } + }) +} + +func TestValidationRegistryAddress(t *testing.T) { + tests := []struct { + network string + want string + wantErr bool + }{ + {"base-sepolia", ValidationRegistryV2BaseSepolia, false}, + {" Base-Sepolia ", ValidationRegistryV2BaseSepolia, false}, + {"base", ValidationRegistryV2Mainnet, false}, + {"base-mainnet", ValidationRegistryV2Mainnet, false}, + {"ethereum", ValidationRegistryV2Mainnet, false}, + {"mainnet", ValidationRegistryV2Mainnet, false}, + {"solana", "", true}, + {"", "", true}, + } + for _, tt := range tests { + t.Run(tt.network, func(t *testing.T) { + got, err := ValidationRegistryAddress(tt.network) + if tt.wantErr { + if err == nil { + t.Errorf("expected error for %q, got address %s", tt.network, got) + } + return + } + if err != nil { + t.Fatalf("ValidationRegistryAddress(%q): %v", tt.network, err) + } + if got != tt.want { + t.Errorf("address = %s, want %s", got, tt.want) + } + }) + } +} + +func TestNewValidationReader_BadInput(t *testing.T) { + if _, err := NewValidationReader(nil, ValidationRegistryV2BaseSepolia); err == nil { + t.Error("nil caller: expected error") + } + if _, err := NewValidationReader(&stubCaller{}, "not-an-address"); err == nil { + t.Error("bad address: expected error") + } +} + +func TestValidationReader_ValidationStatus(t *testing.T) { + parsed, err := validationABI() + if err != nil { + t.Fatal(err) + } + + validator := common.HexToAddress("0x3333333333333333333333333333333333333333") + agentID := big.NewInt(42) + respHash := crypto.Keccak256Hash([]byte("artifact")) + lastUpdate := big.NewInt(1765432100) + + ret, err := parsed.Methods["getValidationStatus"].Outputs.Pack( + validator, agentID, uint8(91), [32]byte(respHash), "code-review", lastUpdate) + if err != nil { + t.Fatalf("pack outputs: %v", err) + } + + caller := &stubCaller{ret: ret} + reader, err := NewValidationReader(caller, ValidationRegistryV2BaseSepolia) + if err != nil { + t.Fatal(err) + } + + reqHash := crypto.Keccak256Hash([]byte("request")) + status, err := reader.ValidationStatus(context.Background(), reqHash) + if err != nil { + t.Fatalf("ValidationStatus: %v", err) + } + + if status.ValidatorAddress != validator { + t.Errorf("validatorAddress = %s, want %s", status.ValidatorAddress, validator) + } + if status.AgentID.Cmp(agentID) != 0 { + t.Errorf("agentId = %s, want %s", status.AgentID, agentID) + } + if status.Response != 91 { + t.Errorf("response = %d, want 91", status.Response) + } + if status.ResponseHash != respHash { + t.Errorf("responseHash = %s, want %s", status.ResponseHash, respHash) + } + if status.Tag != "code-review" { + t.Errorf("tag = %q, want %q", status.Tag, "code-review") + } + if status.LastUpdate.Cmp(lastUpdate) != 0 { + t.Errorf("lastUpdate = %s, want %s", status.LastUpdate, lastUpdate) + } + + // The reader must have issued a getValidationStatus(requestHash) call. + wantData, err := parsed.Pack("getValidationStatus", reqHash) + if err != nil { + t.Fatal(err) + } + if hex.EncodeToString(caller.lastCall.Data) != hex.EncodeToString(wantData) { + t.Errorf("call data = 0x%x, want 0x%x", caller.lastCall.Data, wantData) + } +} + +func TestValidationReader_Summary(t *testing.T) { + parsed, err := validationABI() + if err != nil { + t.Fatal(err) + } + ret, err := parsed.Methods["getSummary"].Outputs.Pack(uint64(5), uint8(78)) + if err != nil { + t.Fatal(err) + } + + reader, err := NewValidationReader(&stubCaller{ret: ret}, ValidationRegistryV2BaseSepolia) + if err != nil { + t.Fatal(err) + } + + count, avg, err := reader.Summary(context.Background(), big.NewInt(42), nil, "") + if err != nil { + t.Fatalf("Summary: %v", err) + } + if count != 5 || avg != 78 { + t.Errorf("summary = (%d, %d), want (5, 78)", count, avg) + } + + if _, _, err := reader.Summary(context.Background(), nil, nil, ""); err == nil { + t.Error("nil agentId: expected error") + } +} + +func TestValidationReader_AgentValidations(t *testing.T) { + parsed, err := validationABI() + if err != nil { + t.Fatal(err) + } + h1 := crypto.Keccak256Hash([]byte("a")) + h2 := crypto.Keccak256Hash([]byte("b")) + ret, err := parsed.Methods["getAgentValidations"].Outputs.Pack([][32]byte{h1, h2}) + if err != nil { + t.Fatal(err) + } + + reader, err := NewValidationReader(&stubCaller{ret: ret}, ValidationRegistryV2BaseSepolia) + if err != nil { + t.Fatal(err) + } + + hashes, err := reader.AgentValidations(context.Background(), big.NewInt(42)) + if err != nil { + t.Fatalf("AgentValidations: %v", err) + } + if len(hashes) != 2 || hashes[0] != h1 || hashes[1] != h2 { + t.Errorf("hashes = %v, want [%s %s]", hashes, h1, h2) + } +} diff --git a/tests/test_gh_post_no_redirect.py b/tests/test_gh_post_no_redirect.py new file mode 100644 index 00000000..6b19d5be --- /dev/null +++ b/tests/test_gh_post_no_redirect.py @@ -0,0 +1,157 @@ +#!/usr/bin/env python3 +"""Unit tests for gh_post.py's no-redirect guard. + +Regression suite for the token-exfiltration vector where ``_gh_request`` +used urllib's DEFAULT opener: CPython's HTTPRedirectHandler re-sends every +request header — including the bearer-token auth header — to a 3xx +redirect target, with no cross-origin stripping. A redirecting (or +attacker-influenced) GITHUB_API_BASE could therefore receive the literal +token. gh_post.py now routes every call through a module-level opener +whose ``redirect_request`` returns None (mirroring smoke.py's +``_NoRedirect``), so a 3xx is surfaced as the final status and the token +never leaves the intended endpoint. +""" +import http.server +import importlib.util +import sys +import threading +import time +import unittest +from pathlib import Path + +MODULE_PATH = ( + Path(__file__).resolve().parents[1] + / "internal" + / "embed" + / "skills" + / "smoke-test" + / "scripts" + / "gh_post.py" +) + +TOKEN = "ghp_test_secret_token_do_not_leak" + + +def load_gh_post_module(): + spec = importlib.util.spec_from_file_location("gh_post_smoke", MODULE_PATH) + module = importlib.util.module_from_spec(spec) + assert spec.loader is not None + sys.modules[spec.name] = module + spec.loader.exec_module(module) + return module + + +class _RecordingHandler(http.server.BaseHTTPRequestHandler): + """Records every request (method, path, Authorization header).""" + + requests = None # set per-server below + + def _record_and_respond(self, status, extra_headers=()): + self.requests.append( + (self.command, self.path, self.headers.get("Authorization")) + ) + self.send_response(status) + for name, value in extra_headers: + self.send_header(name, value) + self.send_header("Content-Length", "2") + self.end_headers() + self.wfile.write(b"{}") + + def log_message(self, *args): # keep test output clean + pass + + +def _start_server(handler_cls): + server = http.server.ThreadingHTTPServer(("127.0.0.1", 0), handler_cls) + thread = threading.Thread(target=server.serve_forever, daemon=True) + thread.start() + return server + + +class GhPostNoRedirectTests(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.gh_post = load_gh_post_module() + + # "Attacker" server on a different origin — must NEVER be contacted. + attacker_requests = [] + + class AttackerHandler(_RecordingHandler): + requests = attacker_requests + + def do_GET(self): + self._record_and_respond(200) + + do_PUT = do_GET + + cls.attacker_requests = attacker_requests + cls.attacker = _start_server(AttackerHandler) + attacker_url = "http://127.0.0.1:%d/leak" % cls.attacker.server_address[1] + + # Redirector: answers every request with 302 -> attacker origin. + redirector_requests = [] + + class RedirectorHandler(_RecordingHandler): + requests = redirector_requests + + def do_GET(self): + self._record_and_respond(302, [("Location", attacker_url)]) + + do_PUT = do_GET + + cls.redirector_requests = redirector_requests + cls.redirector = _start_server(RedirectorHandler) + cls.redirector_base = "http://127.0.0.1:%d" % cls.redirector.server_address[1] + + @classmethod + def tearDownClass(cls): + for server in (cls.attacker, cls.redirector): + server.shutdown() + server.server_close() + + def setUp(self): + del self.attacker_requests[:] + del self.redirector_requests[:] + # _put_file builds its URL from module-level API_BASE; point it at + # the redirector for the duration of each test. + self._orig_api_base = self.gh_post.API_BASE + self.gh_post.API_BASE = self.redirector_base + + def tearDown(self): + self.gh_post.API_BASE = self._orig_api_base + + # ── the opener refuses redirects outright ───────────────────────────── + + def test_no_redirect_handler_returns_none(self): + handler = self.gh_post._NoRedirect() + self.assertIsNone( + handler.redirect_request(None, None, 302, "Found", {}, "http://evil") + ) + + # ── empirical: a 3xx is final and the token never crosses origins ───── + + def test_get_does_not_follow_redirect_or_leak_token(self): + status, _, _ = self.gh_post._gh_request( + "GET", self.redirector_base + "/repos/o/r/contents/x", TOKEN + ) + self.assertEqual(status, 302) + self.assertEqual( + self.attacker_requests, [], "redirect target must never be contacted" + ) + # Sanity: the intended endpoint did see the Bearer header once. + self.assertEqual(len(self.redirector_requests), 1) + self.assertEqual(self.redirector_requests[0][2], "Bearer " + TOKEN) + + def test_put_treats_redirect_as_hard_failure(self): + deadline = time.monotonic() + 5 + with self.assertRaises(self.gh_post.PostError) as ctx: + self.gh_post._put_file( + "o/r", "reports/x/y.md", "msg", b"body", None, TOKEN, deadline + ) + self.assertIn("status 302", str(ctx.exception)) + self.assertNotIn(TOKEN, str(ctx.exception)) + self.assertEqual(self.attacker_requests, []) + + +if __name__ == "__main__": + unittest.main()