diff --git a/cmd/obol/bounty.go b/cmd/obol/bounty.go new file mode 100644 index 00000000..99ec84b3 --- /dev/null +++ b/cmd/obol/bounty.go @@ -0,0 +1,720 @@ +package main + +import ( + "context" + "encoding/json" + "fmt" + "slices" + "strconv" + "strings" + "time" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + "github.com/ObolNetwork/obol-stack/internal/bounty" + "github.com/ObolNetwork/obol-stack/internal/config" + "github.com/ObolNetwork/obol-stack/internal/erc8004" + "github.com/ObolNetwork/obol-stack/internal/kubectl" + "github.com/ObolNetwork/obol-stack/internal/monetizeapi" + "github.com/ObolNetwork/obol-stack/internal/ui" + "github.com/ethereum/go-ethereum/common" + "github.com/urfave/cli/v3" +) + +// bountyCommand is the demand-side counterpart to `obol sell`: post a +// ServiceBounty (escrowed reward for work) instead of a ServiceOffer. Task +// types are discovered dynamically from the embedded catalog — exactly like +// `obol network install ` builds a subcommand per embedded network — so +// `obol bounty post` lists only the types live in this release. +func bountyCommand(cfg *config.Config) *cli.Command { + return &cli.Command{ + Name: "bounty", + Usage: "Post and manage ServiceBounties (demand-side: pay for benchmarks, fine-tunes, serving)", + Commands: []*cli.Command{ + { + Name: "post", + Usage: "Post a bounty for a task type (run `obol bounty post` to list the available types)", + Commands: buildBountyPostCommands(cfg), + Action: func(ctx context.Context, cmd *cli.Command) error { + return cli.ShowSubcommandHelp(cmd) + }, + }, + bountyTypesCommand(cfg), + bountyListCommand(cfg), + bountyStatusCommand(cfg), + bountyClaimCommand(cfg), + bountySubmitCommand(cfg), + bountyVerdictCommand(cfg, "accept", "Accept the submission (poster verdict; releases the escrowed reward)"), + bountyVerdictCommand(cfg, "reject", "Reject the submission (poster verdict; escrow stays held until deadline refund)"), + bountyEvalCommand(cfg), + }, + } +} + +// bountyEvalCommand carries the evaluator-side commit-reveal verbs. Commitments +// are address-bound (hash includes the evaluator address) and the controller +// opens the reveal window only after K commitments are in — committing first +// and revealing later is the protocol, not a convenience. +func bountyEvalCommand(cfg *config.Config) *cli.Command { + return &cli.Command{ + Name: "eval", + Usage: "Evaluator verbs: enroll in the pool, commit and reveal quorum scores", + Commands: []*cli.Command{ + { + Name: "enroll", + Usage: "Enroll as an evaluator (joins the selection pool at the Shadow tier)", + ArgsUsage: "", + Flags: []cli.Flag{ + &cli.StringFlag{Name: "namespace", Aliases: []string{"n"}, Usage: "Namespace", Value: "hermes-obol-agent"}, + &cli.StringFlag{Name: "address", Usage: "[REQUIRED] Evaluator payout/identity address (0x...)", Required: true}, + &cli.StringFlag{Name: "task-types", Usage: "Comma-separated task-type refs you can re-run", Value: "benchmark@v1"}, + &cli.StringFlag{Name: "attestation-scheme", Usage: "Device attestation scheme [none|secure-enclave]", Value: "none"}, + &cli.BoolFlag{Name: "dry-run", Usage: "Print the EvaluatorEnrollment manifest instead of applying it"}, + }, + Action: func(ctx context.Context, cmd *cli.Command) error { + name := cmd.Args().First() + if name == "" { + return fmt.Errorf("missing enrollment name: obol bounty eval enroll --address 0x...") + } + enrollment := monetizeapi.EvaluatorEnrollment{ + TypeMeta: metav1.TypeMeta{ + APIVersion: monetizeapi.Group + "/" + monetizeapi.Version, + Kind: monetizeapi.EvaluatorEnrollmentKind, + }, + ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: cmd.String("namespace")}, + Spec: monetizeapi.EvaluatorEnrollmentSpec{ + Address: cmd.String("address"), + TaskTypes: strings.Split(cmd.String("task-types"), ","), + Attestation: monetizeapi.EvaluatorAttestation{Scheme: cmd.String("attestation-scheme")}, + }, + } + if cmd.Bool("dry-run") { + out, err := json.MarshalIndent(enrollment, "", " ") + if err != nil { + return err + } + fmt.Printf("# EvaluatorEnrollment (dry-run)\n%s\n", out) + return nil + } + out, err := kubectlApplyOutput(cfg, enrollment) + if err != nil { + return fmt.Errorf("apply EvaluatorEnrollment: %w", err) + } + fmt.Print(out) + fmt.Println("Enrolled at the Shadow tier: you'll be randomly assigned shadow seats; agreements with the quorum median climb the ladder.") + return nil + }, + }, + { + Name: "pool", + Usage: "List the enrolled evaluator pool", + Flags: []cli.Flag{ + &cli.StringFlag{Name: "namespace", Aliases: []string{"n"}, Usage: "Namespace (default: all namespaces)"}, + }, + Action: func(ctx context.Context, cmd *cli.Command) error { + bin, kc := kubectl.Paths(cfg) + args := []string{"get", "evaluatorenrollments.obol.org", "-o", "wide"} + if ns := cmd.String("namespace"); ns != "" { + args = append(args, "-n", ns) + } else { + args = append(args, "-A") + } + out, err := kubectl.Output(bin, kc, args...) + if err != nil { + return err + } + fmt.Print(out) + return nil + }, + }, + { + Name: "commit", + Usage: "Commit your score (only the address-bound hash is published; keep the salt for reveal)", + ArgsUsage: "", + Flags: []cli.Flag{ + &cli.StringFlag{Name: "namespace", Aliases: []string{"n"}, Usage: "Namespace", Value: "hermes-obol-agent"}, + &cli.StringFlag{Name: "address", Usage: "[REQUIRED] Evaluator address (0x...)", Required: true}, + &cli.IntFlag{Name: "score", Usage: "[REQUIRED] Verdict score 0-100 (>=50 verifies)", Required: true}, + &cli.StringFlag{Name: "salt", Usage: "[REQUIRED] Random salt — KEEP IT; the reveal is unverifiable without it", Required: true}, + }, + Action: func(ctx context.Context, cmd *cli.Command) error { + name := cmd.Args().First() + if name == "" { + return fmt.Errorf("missing bounty name: obol bounty eval commit --address 0x... --score N --salt s") + } + score := int64(cmd.Int("score")) + if score < 0 || score > 100 { + return fmt.Errorf("--score %d out of range 0-100", score) + } + addr := strings.ToLower(cmd.String("address")) + hash := monetizeapi.EvalCommitHash(score, cmd.String("salt"), addr) + fmt.Printf("Committing %s (score and salt stay local — reveal with the SAME --score and --salt)\n", hash) + return annotateBountyCLI(cfg, cmd.String("namespace"), name, + []string{"obol.org/eval-commit-" + addr + "=" + hash}) + }, + }, + { + Name: "reveal", + Usage: "Reveal your committed score (accepted once K commitments are in)", + ArgsUsage: "", + Flags: []cli.Flag{ + &cli.StringFlag{Name: "namespace", Aliases: []string{"n"}, Usage: "Namespace", Value: "hermes-obol-agent"}, + &cli.StringFlag{Name: "address", Usage: "[REQUIRED] Evaluator address (0x...)", Required: true}, + &cli.IntFlag{Name: "score", Usage: "[REQUIRED] The committed score", Required: true}, + &cli.StringFlag{Name: "salt", Usage: "[REQUIRED] The committed salt", Required: true}, + &cli.StringFlag{Name: "validation-tx", Usage: "Optional ERC-8004 validationResponse tx hash you submitted on-chain (recorded as provenance)"}, + }, + Action: func(ctx context.Context, cmd *cli.Command) error { + name := cmd.Args().First() + if name == "" { + return fmt.Errorf("missing bounty name: obol bounty eval reveal --address 0x... --score N --salt s") + } + payload := map[string]any{ + "score": int64(cmd.Int("score")), + "salt": cmd.String("salt"), + } + if tx := cmd.String("validation-tx"); tx != "" { + payload["validationTx"] = tx + } + raw, err := json.Marshal(payload) + if err != nil { + return err + } + addr := strings.ToLower(cmd.String("address")) + return annotateBountyCLI(cfg, cmd.String("namespace"), name, + []string{"obol.org/eval-reveal-" + addr + "=" + string(raw)}) + }, + }, + { + Name: "calldata", + Usage: "Print ERC-8004 validationResponse calldata for your wallet to submit (the controller NEVER signs)", + Flags: []cli.Flag{ + &cli.StringFlag{Name: "network", Usage: "Chain", Value: "base-sepolia"}, + &cli.StringFlag{Name: "request-hash", Usage: "[REQUIRED] The validation request hash (bytes32, 0x...)", Required: true}, + &cli.IntFlag{Name: "response", Usage: "[REQUIRED] Your 0-100 verdict score", Required: true}, + &cli.StringFlag{Name: "response-uri", Usage: "Optional URI of your evaluation report"}, + &cli.StringFlag{Name: "tag", Usage: "Optional tag (e.g. the task type ref)"}, + }, + Action: func(ctx context.Context, cmd *cli.Command) error { + response := cmd.Int("response") + if response < 0 || response > 100 { + return fmt.Errorf("--response %d out of range 0-100", response) + } + registry, err := erc8004.ValidationRegistryAddress(cmd.String("network")) + if err != nil { + return err + } + calldata, err := erc8004.EncodeValidationResponse( + common.HexToHash(cmd.String("request-hash")), + uint8(response), + cmd.String("response-uri"), + common.Hash{}, + cmd.String("tag"), + ) + if err != nil { + return err + } + fmt.Printf("ValidationRegistry (%s): %s\n", cmd.String("network"), registry) + fmt.Printf("Calldata: 0x%x\n", calldata) + fmt.Println("Submit with YOUR wallet (e.g. the agent remote-signer or cast send) — then pass the tx hash to `obol bounty eval reveal --validation-tx`.") + return nil + }, + }, + }, + } +} + +// bountyTypesCommand lists the enabled task-type catalog with its eval/pricing +// policy, so an operator can see what bounties are postable and on what terms. +func bountyTypesCommand(cfg *config.Config) *cli.Command { + return &cli.Command{ + Name: "types", + Usage: "List the available ServiceBounty task types (the dynamic catalog)", + Action: func(ctx context.Context, cmd *cli.Command) error { + types, err := bounty.Enabled() + if err != nil { + return err + } + if len(types) == 0 { + fmt.Println("No bounty task types are enabled in this release.") + return nil + } + for _, t := range types { + fmt.Printf("• %-14s %s\n", t.Ref(), t.Summary) + fmt.Printf(" runner=%s acceptance=%s eval-k=%d paid-in=%s/%s hardware-proof=%s\n", + t.Runner, t.Acceptance.Method, t.Eval.DefaultK, + t.Eval.Payment.Asset, t.Eval.Payment.Settle, t.HardwareProof) + } + return nil + }, + } +} + +// commonBountyFlags are shared by every `obol bounty post ` subcommand. +// The bounty name is positional (`obol bounty post benchmark `), matching +// `obol sell http `. +func commonBountyFlags() []cli.Flag { + return []cli.Flag{ + &cli.StringFlag{Name: "namespace", Aliases: []string{"n"}, Usage: "Namespace for the ServiceBounty", Value: "hermes-obol-agent"}, + &cli.StringFlag{Name: "model", Usage: "Target model id (spec.task.targetModel.name)"}, + &cli.StringFlag{Name: "runtime", Usage: "Target model runtime", Value: "vllm"}, + &cli.StringFlag{Name: "reward", Usage: "[REQUIRED] Reward amount in human units (e.g. 500.00)", Required: true}, + &cli.StringFlag{Name: "asset", Usage: "Reward asset symbol", Value: "USDC"}, + &cli.StringFlag{Name: "chain", Usage: "Payment network", Value: "base"}, + &cli.StringFlag{Name: "pay-to", Usage: "Escrow-return / poster address (0x...)"}, + &cli.StringFlag{Name: "escrow-scheme", Usage: "x402 escrow scheme [upto|authCapture]", Value: "upto"}, + &cli.StringFlag{Name: "facilitator", Usage: "x402 facilitator URL", Value: "https://x402.gcp.obol.tech"}, + &cli.StringFlag{Name: "deadline", Usage: "RFC3339 deadline (e.g. 2026-07-01T00:00:00Z)"}, + &cli.IntFlag{Name: "max-fulfillers", Usage: "Max paid fulfillers (1 = single-winner)", Value: 1}, + &cli.IntFlag{Name: "eval-k", Usage: "Evaluators to sample (defaults to the task type's defaultK)"}, + &cli.BoolFlag{Name: "dangerously-skip-verification", Usage: "Skip the evaluator quorum: poster-as-judge, bounty marked unverified, no reputation feedback emitted"}, + &cli.StringFlag{Name: "hardware-proof", Usage: "Hardware proof strength [self-report|gpu-attestation|evaluator-measured] (defaults to the task type's policy)"}, + &cli.StringFlag{Name: "tolerance", Usage: "Per-metric acceptance bands, metric=band pairs (e.g. totalScore=0.05,mmlu=0.01); overlays the task type's defaults"}, + &cli.StringFlag{Name: "dataset-commit", Usage: "Merkle root committing the (partially private) eval dataset"}, + &cli.StringFlag{Name: "private-fraction", Usage: "Fraction of dataset rows kept private, 0..1 (e.g. 0.2); revealed only to sampled evaluators"}, + &cli.StringFlag{Name: "bond", Usage: "Optional refundable self-bond amount (own funds; never slashed)"}, + &cli.BoolFlag{Name: "yes", Aliases: []string{"y"}, Usage: "Skip the cost-preview confirmation"}, + &cli.BoolFlag{Name: "dry-run", Usage: "Print the ServiceBounty manifest instead of applying it"}, + } +} + +// buildBountyPostCommands creates one `post` subcommand per ENABLED task type, +// with flags generated from that type's param schema. +func buildBountyPostCommands(cfg *config.Config) []*cli.Command { + types, err := bounty.Enabled() + if err != nil { + return nil + } + + var commands []*cli.Command + for _, t := range types { + flags := commonBountyFlags() + for _, p := range t.Params { + usage := p.Description + if usage == "" { + usage = "Set " + p.Name + } + if len(p.Enum) > 0 { + usage += fmt.Sprintf(" [options: %s]", strings.Join(p.Enum, ", ")) + } + required := p.Required && p.Default == "" + if required { + usage = "[REQUIRED] " + usage + } + flags = append(flags, &cli.StringFlag{ + Name: paramFlagName(p.Name), + Usage: usage, + Value: p.Default, + Required: required, + }) + } + + tt := t // capture for the closure + commands = append(commands, &cli.Command{ + Name: tt.ID, + Usage: tt.Summary, + ArgsUsage: "", + Flags: flags, + Action: func(ctx context.Context, cmd *cli.Command) error { + return postBounty(cfg, ui.New(false), cmd, tt) + }, + }) + } + + return commands +} + +// paramFlagName converts a task-package param name to the CLI's kebab-case +// flag convention, e.g. hardwareClass -> hardware-class (the same mapping +// network.fieldNameToFlagName applies to template fields). +func paramFlagName(param string) string { + var b strings.Builder + for i, r := range param { + if i > 0 && r >= 'A' && r <= 'Z' { + b.WriteRune('-') + } + b.WriteRune(r) + } + + return strings.ToLower(b.String()) +} + +// postBounty builds a ServiceBounty CR from the flags + task-type defaults, +// shows the two-leg cost preview (reward escrow + OBOL eval bill), confirms in +// a TTY, and applies the manifest. +func postBounty(cfg *config.Config, u *ui.UI, cmd *cli.Command, t bounty.TaskType) error { + name := cmd.Args().First() + if name == "" { + return fmt.Errorf("missing bounty name: obol bounty post %s [flags]", t.ID) + } + + // Collect + validate the type's params against its schema. Flags are the + // kebab-case form of the param name; the CR keeps the package's name. + params := make(map[string]string) + for _, p := range t.Params { + flag := paramFlagName(p.Name) + v := cmd.String(flag) + if v == "" { + v = p.Default + } + if p.Required && v == "" { + return fmt.Errorf("--%s is required for task type %s", flag, t.Ref()) + } + if len(p.Enum) > 0 && v != "" && !slices.Contains(p.Enum, v) { + return fmt.Errorf("--%s=%q is not one of [%s]", flag, v, strings.Join(p.Enum, ", ")) + } + if v != "" { + params[p.Name] = v + } + } + + evalK := int64(cmd.Int("eval-k")) + if evalK == 0 { + evalK = int64(t.Eval.DefaultK) + } + + evalMode := monetizeapi.EvalModeRequired + if cmd.Bool("dangerously-skip-verification") { + evalMode = monetizeapi.EvalModeDangerouslySkipped + } + + hardwareProof := cmd.String("hardware-proof") + if hardwareProof == "" { + hardwareProof = t.HardwareProof + } + switch hardwareProof { + case "", "self-report", "gpu-attestation", "evaluator-measured": + default: + return fmt.Errorf("--hardware-proof=%q is not one of [self-report, gpu-attestation, evaluator-measured]", hardwareProof) + } + + // Tolerance: the task type's bands, overlaid by --tolerance metric=band + // pairs (BenchLocal-style packs have their own metric keys). + tolerance := make(map[string]string, len(t.Acceptance.Tolerance)) + for k, v := range t.Acceptance.Tolerance { + tolerance[k] = v + } + if raw := cmd.String("tolerance"); raw != "" { + for _, pair := range strings.Split(raw, ",") { + metric, band, ok := strings.Cut(strings.TrimSpace(pair), "=") + if !ok || metric == "" || band == "" { + return fmt.Errorf("--tolerance entry %q is not metric=band", pair) + } + tolerance[metric] = band + } + } + + var deadline *metav1.Time + if d := cmd.String("deadline"); d != "" { + parsed, err := time.Parse(time.RFC3339, d) + if err != nil { + return fmt.Errorf("--deadline %q is not RFC3339 (e.g. 2026-07-01T00:00:00Z): %w", d, err) + } + deadline = &metav1.Time{Time: parsed} + } + + sb := monetizeapi.ServiceBounty{ + TypeMeta: metav1.TypeMeta{ + APIVersion: monetizeapi.Group + "/" + monetizeapi.Version, + Kind: monetizeapi.ServiceBountyKind, + }, + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: cmd.String("namespace"), + }, + Spec: monetizeapi.ServiceBountySpec{ + Task: monetizeapi.ServiceBountyTask{ + TypeRef: t.Ref(), + Params: params, + TargetModel: monetizeapi.ServiceOfferModel{Name: cmd.String("model"), Runtime: cmd.String("runtime")}, + HardwareProof: hardwareProof, + DatasetCommit: monetizeapi.ServiceBountyDatasetCommit{ + Root: cmd.String("dataset-commit"), + PrivateFraction: cmd.String("private-fraction"), + }, + }, + Acceptance: monetizeapi.ServiceBountyAcceptance{ + Method: t.Acceptance.Method, + Tolerance: tolerance, + CommitReveal: t.Acceptance.CommitReveal, + }, + Reward: monetizeapi.ServiceBountyReward{ + Network: cmd.String("chain"), + PayTo: cmd.String("pay-to"), + Asset: monetizeapi.ServiceOfferAsset{Symbol: cmd.String("asset")}, + Amount: cmd.String("reward"), + Escrow: monetizeapi.ServiceBountyEscrow{ + Scheme: cmd.String("escrow-scheme"), + Facilitator: cmd.String("facilitator"), + Mode: "auto", + }, + }, + Eval: monetizeapi.ServiceBountyEval{ + K: evalK, + Mode: evalMode, + Selection: t.Eval.Selection, + Payment: monetizeapi.ServiceBountyEvalPayment{ + Asset: t.Eval.Payment.Asset, + PerEvaluator: t.Eval.Payment.PerEvaluator, + FundedBy: t.Eval.Payment.FundedBy, + Settle: t.Eval.Payment.Settle, + }, + }, + Trust: monetizeapi.ServiceBountyTrust{ReputationGate: true}, + Deadline: deadline, + MaxFulfillers: int64(cmd.Int("max-fulfillers")), + }, + } + + if bond := cmd.String("bond"); bond != "" { + sb.Spec.Trust.SelfBond = monetizeapi.ServiceBountySelfBond{Required: true, Amount: bond, Token: cmd.String("asset")} + } + + if cmd.Bool("dry-run") { + out, err := json.MarshalIndent(sb, "", " ") + if err != nil { + return err + } + fmt.Printf("# ServiceBounty (dry-run)\n%s\n", out) + return nil + } + + printBountyCostPreview(u, &sb, t) + if !cmd.Bool("yes") && !u.Confirm("Proceed?", true) { + return fmt.Errorf("aborted") + } + + applyOut, err := kubectlApplyOutput(cfg, sb) + if err != nil { + return fmt.Errorf("apply ServiceBounty: %w", err) + } + fmt.Print(applyOut) + fmt.Printf("\nBounty posted. Check status: obol bounty status %s -n %s\n", name, sb.Namespace) + return nil +} + +// printBountyCostPreview shows the poster's full commitment before apply: the +// escrowed reward leg AND the OBOL eval bill (k × perEvaluator, paid to +// evaluators win-or-lose). Verification-by-default means the eval line is the +// part posters haven't already priced in — never let it surprise them. +func printBountyCostPreview(u *ui.UI, sb *monetizeapi.ServiceBounty, t bounty.TaskType) { + u.Print("──────────────────────────────────────────────────────────────") + u.Print(fmt.Sprintf(" Bounty: %s (%s)", sb.Name, sb.Spec.Task.TypeRef)) + u.Print(fmt.Sprintf(" Reward: %s %s on %s (%s escrow)", + sb.Spec.Reward.Amount, sb.Spec.Reward.Asset.Symbol, sb.Spec.Reward.Network, sb.Spec.Reward.Escrow.Scheme)) + if sb.Spec.Eval.Mode == monetizeapi.EvalModeDangerouslySkipped { + u.Warnf(" Verification: SKIPPED (--dangerously-skip-verification) — poster-as-judge, bounty marked unverified, no reputation feedback") + } else { + per := sb.Spec.Eval.Payment.PerEvaluator + line := fmt.Sprintf(" Verification: %d evaluators × %s %s", sb.Spec.Eval.K, per, sb.Spec.Eval.Payment.Asset) + if perF, err := strconv.ParseFloat(per, 64); err == nil { + line += fmt.Sprintf(" = %.2f %s", float64(sb.Spec.Eval.K)*perF, sb.Spec.Eval.Payment.Asset) + } + u.Print(line + " (poster-funded, paid win-or-lose)") + } + if sb.Spec.Trust.SelfBond.Required { + u.Print(fmt.Sprintf(" Fulfiller bond: %s %s (refundable; forfeited on rejected work)", sb.Spec.Trust.SelfBond.Amount, sb.Spec.Trust.SelfBond.Token)) + } + if sb.Spec.Deadline != nil { + u.Print(fmt.Sprintf(" Deadline: %s (auto-refund past it)", sb.Spec.Deadline.UTC().Format(time.RFC3339))) + } + u.Print("──────────────────────────────────────────────────────────────") +} + +// ── lifecycle verbs ───────────────────────────────────────────────────────── +// +// claim/submit/accept/reject write the controller's annotation channel +// (obol.org/claim|commit|submit|verdict); the reconcile loop validates and +// promotes them into controller-owned status. + +func bountyResource() string { return "servicebounties.obol.org" } + +func bountyListCommand(cfg *config.Config) *cli.Command { + return &cli.Command{ + Name: "list", + Usage: "List ServiceBounties", + Flags: []cli.Flag{ + &cli.StringFlag{Name: "namespace", Aliases: []string{"n"}, Usage: "Namespace (default: all namespaces)"}, + }, + Action: func(ctx context.Context, cmd *cli.Command) error { + bin, kc := kubectl.Paths(cfg) + args := []string{"get", bountyResource()} + if ns := cmd.String("namespace"); ns != "" { + args = append(args, "-n", ns) + } else { + args = append(args, "-A") + } + out, err := kubectl.Output(bin, kc, args...) + if err != nil { + return err + } + fmt.Print(out) + return nil + }, + } +} + +func bountyStatusCommand(cfg *config.Config) *cli.Command { + return &cli.Command{ + Name: "status", + Usage: "Show a bounty's phase, conditions, claims, and escrow state", + ArgsUsage: "", + Flags: []cli.Flag{ + &cli.StringFlag{Name: "namespace", Aliases: []string{"n"}, Usage: "Namespace", Value: "hermes-obol-agent"}, + }, + Action: func(ctx context.Context, cmd *cli.Command) error { + name := cmd.Args().First() + if name == "" { + return fmt.Errorf("missing bounty name: obol bounty status ") + } + bin, kc := kubectl.Paths(cfg) + out, err := kubectl.Output(bin, kc, "get", bountyResource(), name, "-n", cmd.String("namespace"), "-o", "json") + if err != nil { + return err + } + + var sb monetizeapi.ServiceBounty + if err := json.Unmarshal([]byte(out), &sb); err != nil { + return fmt.Errorf("decode bounty: %w", err) + } + + fmt.Printf("%s (%s)\n", sb.Name, sb.Spec.Task.TypeRef) + fmt.Printf(" Phase: %s\n", sb.Status.Phase) + fmt.Printf(" Reward: %s %s on %s (escrow: %s)\n", sb.Spec.Reward.Amount, sb.Spec.Reward.Asset.Symbol, sb.Spec.Reward.Network, valueOr(sb.Status.EscrowState, "not reserved")) + if sb.Status.CaptureTxHash != "" { + fmt.Printf(" Payout: %s\n", sb.Status.CaptureTxHash) + } + if sb.Status.RefundTxHash != "" { + fmt.Printf(" Refund: %s\n", sb.Status.RefundTxHash) + } + if sb.Status.ReportURI != "" { + fmt.Printf(" Report: %s\n", sb.Status.ReportURI) + } + for _, claim := range sb.Status.Claims { + fmt.Printf(" Claim: %s phase=%s commit=%s\n", claim.FulfillerAddress, claim.Phase, valueOr(claim.CommitHash, "-")) + } + if sb.Status.BondState != "" { + fmt.Printf(" Bond: %s\n", sb.Status.BondState) + } + if len(sb.Status.Evaluations) > 0 { + fmt.Printf(" Evaluations (quorum k=%d, median>=50 verifies):\n", sb.Spec.Eval.K) + if sb.Status.RevealDeadline != nil { + fmt.Printf(" reveal window closes %s\n", sb.Status.RevealDeadline.UTC().Format(time.RFC3339)) + } + for _, ev := range sb.Status.Evaluations { + score := "-" + if ev.Phase == "Revealed" { + score = fmt.Sprintf("%d", ev.Score) + } + fmt.Printf(" %s seat=%-9s phase=%-10s score=%-4s withinBand=%-5v paid=%v\n", + ev.Address, valueOr(ev.Seat, "open"), ev.Phase, score, ev.WithinBand, ev.Paid) + } + if sb.Status.EvalBudgetState != "" { + fmt.Printf(" eval budget: %s", sb.Status.EvalBudgetState) + if sb.Status.EvalPayoutTxHash != "" { + fmt.Printf(" payout=%s", sb.Status.EvalPayoutTxHash) + } + fmt.Println() + } + } + fmt.Println(" Conditions:") + for _, condition := range sb.Status.Conditions { + fmt.Printf(" %-15s %-5s %-22s %s\n", condition.Type, condition.Status, condition.Reason, condition.Message) + } + return nil + }, + } +} + +func bountyClaimCommand(cfg *config.Config) *cli.Command { + return &cli.Command{ + Name: "claim", + Usage: "Claim a bounty as a fulfiller (binds your payout address)", + ArgsUsage: "", + Flags: []cli.Flag{ + &cli.StringFlag{Name: "namespace", Aliases: []string{"n"}, Usage: "Namespace", Value: "hermes-obol-agent"}, + &cli.StringFlag{Name: "address", Usage: "[REQUIRED] Fulfiller payout address (0x...)", Required: true}, + &cli.StringFlag{Name: "commit", Usage: "Optional commit hash (binds you to a specific deliverable before reveal)"}, + }, + Action: func(ctx context.Context, cmd *cli.Command) error { + name := cmd.Args().First() + if name == "" { + return fmt.Errorf("missing bounty name: obol bounty claim --address 0x...") + } + annotations := []string{"obol.org/claim=" + cmd.String("address")} + if commit := cmd.String("commit"); commit != "" { + annotations = append(annotations, "obol.org/commit="+commit) + } + return annotateBountyCLI(cfg, cmd.String("namespace"), name, annotations) + }, + } +} + +func bountySubmitCommand(cfg *config.Config) *cli.Command { + return &cli.Command{ + Name: "submit", + Usage: "Submit a deliverable for a claimed bounty", + ArgsUsage: "", + Flags: []cli.Flag{ + &cli.StringFlag{Name: "namespace", Aliases: []string{"n"}, Usage: "Namespace", Value: "hermes-obol-agent"}, + &cli.StringFlag{Name: "result-hash", Usage: "[REQUIRED] Hash of the deliverable (reveals the commit)", Required: true}, + &cli.StringFlag{Name: "report-uri", Usage: "URI of the A2UI report (local agent hierarchy in v1)"}, + }, + Action: func(ctx context.Context, cmd *cli.Command) error { + name := cmd.Args().First() + if name == "" { + return fmt.Errorf("missing bounty name: obol bounty submit --result-hash 0x...") + } + submission, err := json.Marshal(map[string]string{ + "resultHash": cmd.String("result-hash"), + "reportURI": cmd.String("report-uri"), + }) + if err != nil { + return err + } + return annotateBountyCLI(cfg, cmd.String("namespace"), name, []string{"obol.org/submit=" + string(submission)}) + }, + } +} + +func bountyVerdictCommand(cfg *config.Config, verdict, usage string) *cli.Command { + return &cli.Command{ + Name: verdict, + Usage: usage, + ArgsUsage: "", + Flags: []cli.Flag{ + &cli.StringFlag{Name: "namespace", Aliases: []string{"n"}, Usage: "Namespace", Value: "hermes-obol-agent"}, + &cli.StringFlag{Name: "reason", Usage: "Reason (recorded in the Verified condition; reject only)"}, + }, + Action: func(ctx context.Context, cmd *cli.Command) error { + name := cmd.Args().First() + if name == "" { + return fmt.Errorf("missing bounty name: obol bounty %s ", verdict) + } + value := verdict + if verdict == "reject" { + value = "reject:" + cmd.String("reason") + } + return annotateBountyCLI(cfg, cmd.String("namespace"), name, []string{"obol.org/verdict=" + value}) + }, + } +} + +func annotateBountyCLI(cfg *config.Config, namespace, name string, annotations []string) error { + bin, kc := kubectl.Paths(cfg) + args := append([]string{"annotate", bountyResource(), name, "-n", namespace, "--overwrite"}, annotations...) + out, err := kubectl.Output(bin, kc, args...) + if err != nil { + return err + } + fmt.Print(out) + fmt.Printf("Check status: obol bounty status %s -n %s\n", name, namespace) + return nil +} + +func valueOr(value, fallback string) string { + if strings.TrimSpace(value) == "" { + return fallback + } + return value +} diff --git a/cmd/obol/main.go b/cmd/obol/main.go index d3432da5..97092493 100644 --- a/cmd/obol/main.go +++ b/cmd/obol/main.go @@ -325,6 +325,7 @@ GLOBAL OPTIONS:{{template "visibleFlagTemplate" .}}{{end}} openclawCommand(cfg), sellCommand(cfg), buyCommand(cfg), + bountyCommand(cfg), modelCommand(cfg), { Name: "app", diff --git a/cmd/obol/sell.go b/cmd/obol/sell.go index 6e1287c7..e52ef949 100644 --- a/cmd/obol/sell.go +++ b/cmd/obol/sell.go @@ -602,6 +602,10 @@ Examples: Name: "facilitator", Usage: "x402 facilitator URL (verify/settle)", }, + &cli.StringFlag{ + Name: "bounty-reports-dir", + Usage: "Directory serving ServiceBounty A2UI reports via the free bounty_report tool (default: $OBOL_DATA_DIR/bounty-reports)", + }, }, Action: func(ctx context.Context, cmd *cli.Command) error { u := getUI(cmd) @@ -624,18 +628,24 @@ Examples: return err } + reportsDir := cmd.String("bounty-reports-dir") + if reportsDir == "" { + reportsDir = filepath.Join(cfg.DataDir, "bounty-reports") + } + u.Infof("Starting paid MCP server %q on port %d (Ctrl-C to stop)", name, cmd.Int("port")) return x402mcp.Serve(ctx, x402mcp.Options{ - Name: name, - ToolName: cmd.String("tool-name"), - Description: cmd.String("description"), - Port: cmd.Int("port"), - PayTo: payTo, - Price: cmd.String("price"), - Chain: cmd.String("chain"), - FacilitatorURL: facilitator, - Upstream: cmd.String("upstream"), - UpstreamHeaders: headers, + Name: name, + ToolName: cmd.String("tool-name"), + Description: cmd.String("description"), + Port: cmd.Int("port"), + PayTo: payTo, + Price: cmd.String("price"), + Chain: cmd.String("chain"), + FacilitatorURL: facilitator, + Upstream: cmd.String("upstream"), + UpstreamHeaders: headers, + BountyReportsDir: reportsDir, }) }, } diff --git a/internal/bounty/registry.go b/internal/bounty/registry.go new file mode 100644 index 00000000..a0cc4f1e --- /dev/null +++ b/internal/bounty/registry.go @@ -0,0 +1,215 @@ +// Package bounty loads the embedded, versioned ServiceBounty task-type +// packages (internal/embed/bountytasks//task.yaml). A task type is a +// self-describing unit — param schema, eval method + tolerance, OBOL eval +// pricing, hardware-proof policy, and the A2UI report schema — discovered +// dynamically the same way networks are (internal/embed/networks). Adding a +// task type is dropping in a directory; the CRD and controller never change. +package bounty + +import ( + "fmt" + "sort" + + "gopkg.in/yaml.v3" + + "github.com/ObolNetwork/obol-stack/internal/embed" +) + +// Param is one knob in a task type's schema. It generates a CLI flag for +// `obol bounty post ` and is validated against spec.task.params. +type Param struct { + Name string `yaml:"name"` + Type string `yaml:"type"` // string | int | enum + Default string `yaml:"default"` + Enum []string `yaml:"enum"` + Required bool `yaml:"required"` // missing/empty value rejects the bounty at admission + Description string `yaml:"description"` +} + +// EvalPayment is the OBOL-denominated evaluator payment leg (separate from the +// reward — x402 cannot splice a fee out of the reward auth). +type EvalPayment struct { + Asset string `yaml:"asset"` + PerEvaluator string `yaml:"perEvaluator"` + FundedBy string `yaml:"fundedBy"` + Settle string `yaml:"settle"` +} + +// Ladder is the evaluator cold-start ladder (design doc §11.4): Shadow (free, +// randomly assigned, graded against the quorum median but never counted) → +// Probation (one reserved quorum seat at reduced pay, value-capped bounties +// only) → Full. Thresholds are per-task-type constants, not protocol globals. +type Ladder struct { + // ShadowAgreements within tolerance of the quorum median promote a + // shadow evaluator to Probation. + ShadowAgreements int `yaml:"shadowAgreements"` + + // ProbationEvals without divergence promote a probationer to Full. + ProbationEvals int `yaml:"probationEvals"` + + // ProbationValueCap is the reward (human units) above which no probation + // seat is offered — high-value bounties get an all-Full quorum. + ProbationValueCap string `yaml:"probationValueCap"` + + // RevealWindow is the commit→reveal duration; every commit closes before + // any reveal opens (selective-revelation guard). + RevealWindow string `yaml:"revealWindow"` + + // NonRevealPenalty grades a missing reveal; "outlier" treats it as a + // worst-case divergence so silent abstention is never the cheap exit. + NonRevealPenalty string `yaml:"nonRevealPenalty"` +} + +type Eval struct { + DefaultK int `yaml:"defaultK"` + Selection string `yaml:"selection"` + Payment EvalPayment `yaml:"payment"` + Ladder Ladder `yaml:"ladder"` +} + +type Acceptance struct { + Method string `yaml:"method"` + CommitReveal bool `yaml:"commitReveal"` + Tolerance map[string]string `yaml:"tolerance"` +} + +type Artifact struct { + Name string `yaml:"name"` + Kind string `yaml:"kind"` + Required bool `yaml:"required"` +} + +// ReportVariant is one A2UI rendering of the deliverable's result data. +// kind=declarative is the lean default: an operations file (create_surface → +// update_components → update_data_model) the client renders natively from its +// compiled-in catalog — no custom code, no iframes. kind=mcp-app is the +// MCP-Apps escape hatch: `surface` is self-contained HTML served url_encoded +// inside an A2UI `custom` McpApp node's properties.content; the CLIENT +// supplies the double-iframe isolation (sandbox proxy + srcdoc inner frame, +// never allow-same-origin) — the server only ever returns JSON. +type ReportVariant struct { + Kind string `yaml:"kind"` // declarative | mcp-app + Surface string `yaml:"surface"` // file in the task package + CatalogID string `yaml:"catalogId"` // stable id negotiated against the client's supportedCatalogIds +} + +// Report carries the variants in preference order. The serving side (FE +// locally, the stack MCP server cross-party) picks the first variant whose +// catalogId the client advertises (a2ui catalog negotiation, locked per +// surface); a client matching nothing falls back to the raw artifacts. +type Report struct { + Variants []ReportVariant `yaml:"variants"` +} + +type Deliverable struct { + Report Report `yaml:"report"` + Gate string `yaml:"gate"` // local | mcp-x402 | sign-in-with-x + Artifacts []Artifact `yaml:"artifacts"` +} + +// TaskType is a parsed task-type package. +type TaskType struct { + ID string `yaml:"id"` + Version int `yaml:"version"` + Runner string `yaml:"runner"` + Enabled bool `yaml:"enabled"` + Summary string `yaml:"summary"` + Requires []string `yaml:"requires"` + Params []Param `yaml:"params"` + Acceptance Acceptance `yaml:"acceptance"` + Eval Eval `yaml:"eval"` + HardwareProof string `yaml:"hardwareProof"` + Deliverable Deliverable `yaml:"deliverable"` +} + +// Ref is the portable, versioned reference written into +// ServiceBounty.spec.task.typeRef, e.g. "benchmark@v1". +func (t TaskType) Ref() string { + return fmt.Sprintf("%s@v%d", t.ID, t.Version) +} + +// Load reads and parses a single embedded task-type package by directory name. +func Load(name string) (TaskType, error) { + raw, err := embed.ReadEmbeddedBountyTaskFile(name, "task.yaml") + if err != nil { + return TaskType{}, err + } + + var t TaskType + if err := yaml.Unmarshal(raw, &t); err != nil { + return TaskType{}, fmt.Errorf("parse task type %q: %w", name, err) + } + + if t.ID == "" { + return TaskType{}, fmt.Errorf("task type %q: missing id", name) + } + + return t, nil +} + +// Available returns every embedded task type (enabled or not), sorted by id. +func Available() ([]TaskType, error) { + names, err := embed.GetAvailableBountyTasks() + if err != nil { + return nil, err + } + + tasks := make([]TaskType, 0, len(names)) + for _, name := range names { + t, err := Load(name) + if err != nil { + return nil, err + } + tasks = append(tasks, t) + } + + sort.Slice(tasks, func(i, j int) bool { return tasks[i].ID < tasks[j].ID }) + + return tasks, nil +} + +// Enabled returns only the task types live in this release. Shipping a type +// with enabled:false stages it (e.g. finetune) before it is turned on. +func Enabled() ([]TaskType, error) { + all, err := Available() + if err != nil { + return nil, err + } + + enabled := make([]TaskType, 0, len(all)) + for _, t := range all { + if t.Enabled { + enabled = append(enabled, t) + } + } + + return enabled, nil +} + +// Resolve resolves an `id` ("benchmark") or a versioned ref ("benchmark@v1") +// to its task type. It errors if the type is unknown or disabled. +func Resolve(ref string) (TaskType, error) { + id := ref + for i := 0; i < len(ref); i++ { + if ref[i] == '@' { + id = ref[:i] + break + } + } + + all, err := Available() + if err != nil { + return TaskType{}, err + } + + for _, t := range all { + if t.ID == id { + if !t.Enabled { + return TaskType{}, fmt.Errorf("task type %q is not enabled in this release", id) + } + return t, nil + } + } + + return TaskType{}, fmt.Errorf("unknown task type %q", ref) +} diff --git a/internal/bounty/registry_test.go b/internal/bounty/registry_test.go new file mode 100644 index 00000000..13a722cc --- /dev/null +++ b/internal/bounty/registry_test.go @@ -0,0 +1,173 @@ +package bounty + +import ( + "testing" + + "github.com/ObolNetwork/obol-stack/internal/embed" +) + +func TestEnabled_IncludesBenchmark(t *testing.T) { + types, err := Enabled() + if err != nil { + t.Fatalf("Enabled: %v", err) + } + + var bench *TaskType + for i := range types { + if types[i].ID == "benchmark" { + bench = &types[i] + break + } + } + if bench == nil { + t.Fatalf("benchmark task type not enabled; got %d types", len(types)) + } + + if got := bench.Ref(); got != "benchmark@v1" { + t.Errorf("Ref() = %q, want benchmark@v1", got) + } + if bench.Acceptance.Method != "rerun-tolerance" { + t.Errorf("acceptance.method = %q, want rerun-tolerance (benchmarks are not bit-exact)", bench.Acceptance.Method) + } + if bench.Eval.Payment.Asset != "OBOL" { + t.Errorf("eval paid in %q, want OBOL (separate eval leg)", bench.Eval.Payment.Asset) + } + if bench.Eval.Payment.Settle != "batch-settlement" { + t.Errorf("eval settle = %q, want batch-settlement", bench.Eval.Payment.Settle) + } + if len(bench.Params) == 0 { + t.Error("benchmark has no params; CLI flags would be empty") + } + + // Median-of-k quorum: k must be >=3 whenever a probation seat can be + // occupied — the median absorbing one outlier is what makes the newcomer + // seat verdict-safe (design doc §11.4). + if bench.Eval.DefaultK < 3 { + t.Errorf("eval.defaultK = %d, want >=3 (median-of-k with a probation seat)", bench.Eval.DefaultK) + } + + // Ladder thresholds are per-type constants; zero values would make the + // cold-start ladder unclimbable (no promotions) or the reveal window + // degenerate (no selective-revelation guard). + ladder := bench.Eval.Ladder + if ladder.ShadowAgreements <= 0 { + t.Errorf("ladder.shadowAgreements = %d, want >0", ladder.ShadowAgreements) + } + if ladder.ProbationEvals <= 0 { + t.Errorf("ladder.probationEvals = %d, want >0", ladder.ProbationEvals) + } + if ladder.ProbationValueCap == "" { + t.Error("ladder.probationValueCap is empty; probation seats would be unbounded by value") + } + if ladder.RevealWindow == "" { + t.Error("ladder.revealWindow is empty; commits and reveals would not be separated") + } + if ladder.NonRevealPenalty != "outlier" { + t.Errorf("ladder.nonRevealPenalty = %q, want outlier (non-reveal must cost >= divergence)", ladder.NonRevealPenalty) + } + + // Report variants drive a2ui catalog negotiation: the first variant whose + // catalogId the client advertises wins. The lean default is declarative; + // the mcp-app variant is what generic MCP-Apps hosts render (the server + // only serves JSON — double-iframe isolation is the client's job). + variants := bench.Deliverable.Report.Variants + if len(variants) < 2 { + t.Fatalf("report has %d variants, want >=2 (declarative + mcp-app)", len(variants)) + } + if variants[0].Kind != "declarative" { + t.Errorf("first variant kind = %q, want declarative (the lean default must win negotiation)", variants[0].Kind) + } + hasMCPApp := false + for _, v := range variants { + if v.Kind == "mcp-app" { + hasMCPApp = true + } + if v.CatalogID == "" { + t.Errorf("variant %s/%s has empty catalogId; negotiation would never select it", v.Kind, v.Surface) + } + if _, err := embed.ReadEmbeddedBountyTaskFile("benchmark", v.Surface); err != nil { + t.Errorf("variant surface %q is not in the embedded package: %v", v.Surface, err) + } + } + if !hasMCPApp { + t.Error("no mcp-app variant; generic MCP-Apps clients would have no rendering") + } +} + +func TestResolve(t *testing.T) { + for _, ref := range []string{"benchmark", "benchmark@v1"} { + got, err := Resolve(ref) + if err != nil { + t.Errorf("Resolve(%q): %v", ref, err) + continue + } + if got.ID != "benchmark" { + t.Errorf("Resolve(%q).ID = %q", ref, got.ID) + } + } + + if _, err := Resolve("does-not-exist"); err == nil { + t.Error("Resolve(unknown) should error") + } +} + +// benchlocal@v1 wraps third-party BenchLocal packs — pack code IS the scorer +// and the BenchLocal registry has no checksums, so packCommit MUST be a +// required param: without a byte pin, rerun-tolerance verification is theater. +func TestEnabled_BenchlocalRequiresPackCommit(t *testing.T) { + bl, err := Resolve("benchlocal@v1") + if err != nil { + t.Fatalf("Resolve(benchlocal@v1): %v", err) + } + + required := map[string]bool{} + for _, p := range bl.Params { + if p.Required { + required[p.Name] = true + } + } + for _, name := range []string{"pack", "packVersion", "packCommit"} { + if !required[name] { + t.Errorf("param %s must be required (pack bytes are unpinned without it)", name) + } + } + if _, ok := bl.Acceptance.Tolerance["totalScore"]; !ok { + t.Error("benchlocal tolerance must key on totalScore (the BenchmarkScore primary metric)") + } +} + +// finetune@v1 ships staged: present in Available (schema reviewable), absent +// from Enabled (not postable), refused by Resolve (not claimable/admittable). +// This is the registry's whole staging mechanism — pin it. +func TestStaging_FinetuneShippedButDisabled(t *testing.T) { + all, err := Available() + if err != nil { + t.Fatalf("Available: %v", err) + } + var staged *TaskType + for i := range all { + if all[i].ID == "finetune" { + staged = &all[i] + } + } + if staged == nil { + t.Fatal("finetune package missing from Available — staging mechanism has nothing staged") + } + if staged.Enabled { + t.Fatal("finetune must ship enabled:false until the MLX-LoRA runner + held-out re-eval land") + } + + enabled, err := Enabled() + if err != nil { + t.Fatalf("Enabled: %v", err) + } + for _, e := range enabled { + if e.ID == "finetune" { + t.Error("Enabled() must exclude disabled packages") + } + } + + if _, err := Resolve("finetune"); err == nil { + t.Error("Resolve(finetune) must refuse disabled types at admission") + } +} diff --git a/internal/embed/bountytasks/benchlocal/report.a2ui.json b/internal/embed/bountytasks/benchlocal/report.a2ui.json new file mode 100644 index 00000000..fd53f2a1 --- /dev/null +++ b/internal/embed/bountytasks/benchlocal/report.a2ui.json @@ -0,0 +1,65 @@ +{ + "$comment": "benchlocal@v1 deliverable as an ordered A2UI v1.0-candidate message list against the STANDARD basic catalog. The runner fills updateDataModel from the pack's BenchmarkScore (totalScore + categories) with display-ready strings; machine truth lives in the results.json artifact. Category rows bind relative to each /categories item.", + "messages": [ + { + "version": "v1.0", + "createSurface": { + "surfaceId": "obol-bounty-benchlocal-report", + "catalogId": "https://a2ui.org/specification/v1_0/catalogs/basic/catalog.json" + } + }, + { + "version": "v1.0", + "updateComponents": { + "surfaceId": "obol-bounty-benchlocal-report", + "components": [ + { "id": "root", "component": "Card", "child": "layout" }, + { + "id": "layout", "component": "Column", + "children": [ + "title", "subtitle", "verdict", "div-score", + "total-score", "categories-list", "scenarios-summary", + "div-provenance", "provenance" + ] + }, + { "id": "title", "component": "Text", "variant": "h2", "text": { "path": "/model/name" } }, + { "id": "subtitle", "component": "Text", "variant": "caption", "text": { "path": "/model/subtitle" } }, + { "id": "verdict", "component": "Text", "variant": "h3", "text": { "path": "/verdict/label" } }, + { "id": "div-score", "component": "Divider" }, + { "id": "total-score", "component": "Text", "variant": "h3", "text": { "path": "/totalScore" } }, + { + "id": "categories-list", "component": "List", "direction": "vertical", + "children": { "componentId": "category-row", "path": "/categories" } + }, + { + "id": "category-row", "component": "Row", + "children": ["c-label", "c-score"] + }, + { "id": "c-label", "component": "Text", "text": { "path": "/label" } }, + { "id": "c-score", "component": "Text", "text": { "path": "/score" } }, + { "id": "scenarios-summary", "component": "Text", "variant": "caption", "text": { "path": "/scenarios/summary" } }, + { "id": "div-provenance", "component": "Divider" }, + { "id": "provenance", "component": "Text", "variant": "caption", "text": { "path": "/provenance/line" } } + ] + } + }, + { + "version": "v1.0", + "updateDataModel": { + "surfaceId": "obol-bounty-benchlocal-report", + "path": "/", + "value": { + "$comment": "Filled by the runner; the shape below is the contract (display-ready placeholders).", + "model": { "name": "", "subtitle": "" }, + "verdict": { "label": "" }, + "totalScore": "", + "categories": [ + { "label": "", "score": "" } + ], + "scenarios": { "summary": "" }, + "provenance": { "line": "" } + } + } + } + ] +} diff --git a/internal/embed/bountytasks/benchlocal/report.app.html b/internal/embed/bountytasks/benchlocal/report.app.html new file mode 100644 index 00000000..d1978d75 --- /dev/null +++ b/internal/embed/bountytasks/benchlocal/report.app.html @@ -0,0 +1,37 @@ + + + + + + {{model.name}} — benchlocal@v1 + + + +

{{model.name}} {{model.subtitle}}

+

{{verdict.label}}

+

{{totalScore}}

+ + + {{categories.rows}} +
categoryscore
+

{{scenarios.summary}}

+
{{provenance.line}}
+ + diff --git a/internal/embed/bountytasks/benchlocal/task.yaml b/internal/embed/bountytasks/benchlocal/task.yaml new file mode 100644 index 00000000..7686201d --- /dev/null +++ b/internal/embed/bountytasks/benchlocal/task.yaml @@ -0,0 +1,91 @@ +# benchlocal@v1 — a ServiceBounty task type wrapping BenchLocal bench packs +# (github.com/stevibe/BenchLocal): installable TypeScript packs (ToolCall-15, +# CLI-40, BugFind-15, …) run against any OpenAI-compatible endpoint, emitting +# per-scenario pass|partial|fail and an aggregated BenchmarkScore +# {totalScore, categories[{id,label,score,weight}]}. +# +# Integrity note (the reason packCommit is REQUIRED): the BenchLocal registry +# resolves packs as {id, version, source:{repo, tag}} with NO checksums or +# signatures, and a git tag is mutable. A pack is CODE — the scorer ships +# inside it — so rerun-tolerance verification is meaningless unless poster, +# fulfiller, and evaluators all run byte-identical pack bytes. The commit SHA +# (or archive sha256) is that pin. +id: benchlocal +version: 1 +runner: benchlocal # host-side runner drives the BenchLocal agent control API +enabled: true +summary: Run a pinned BenchLocal bench pack against a model on declared hardware; verified by independent re-run within tolerance. + +requires: + - benchlocal + +params: + - name: pack + type: string + required: true + description: BenchLocal pack id from the registry (e.g. toolcall-15, cli-40, bugfind-15). + - name: packVersion + type: string + required: true + description: Pack version (the registry source.tag, e.g. 1.0.0). + - name: packCommit + type: string + required: true + description: Git commit SHA (or archive sha256) pinning the pack bytes — the registry has no checksums and tags are mutable; evaluators must re-run identical scoring code. + - name: scenarios + type: string + description: Optional comma-separated scenario-id filter (default = the whole pack). + - name: hardwareClass + type: string + default: any + description: Declared hardware class — free-form so specific GPUs are expressible (e.g. RTX-4090, H100, M4-Max). + - name: temperature + type: string + default: "0" + description: Sampling temperature; BenchLocal packs default to 0 (greedy) — keep 0 for reproducible scoring. There is no seed surface in the bench protocol. + +# BenchmarkScore.totalScore is the primary gate; per-category bands (metric = +# the category id, e.g. overall) can be added per-bounty via --tolerance. +acceptance: + method: rerun-tolerance + commitReveal: true + tolerance: + totalScore: "0.05" + +eval: + defaultK: 3 # median-of-k; k>=3 whenever a probation seat is occupied + selection: vrf-reputation-weighted + payment: + asset: OBOL + perEvaluator: "2.00" + fundedBy: poster + settle: batch-settlement + ladder: + shadowAgreements: 5 + probationEvals: 10 + probationValueCap: "50.00" + revealWindow: 10m + nonRevealPenalty: outlier + +# Pack scores are hardware-agnostic (pass/fail scoring), so self-report is the +# honest default; bounties pinning a specific GPU should post with +# --hardware-proof gpu-attestation or evaluator-measured. +hardwareProof: self-report + +deliverable: + report: + variants: + - kind: declarative + surface: report.a2ui.json + catalogId: https://a2ui.org/specification/v1_0/catalogs/basic/catalog.json + - kind: mcp-app + surface: report.app.html + catalogId: obol.org:mcp-app/v1 + gate: local # local | mcp-x402 | sign-in-with-x + artifacts: + - name: results.json + kind: eval-report + required: true + - name: run.manifest + kind: provenance + required: true diff --git a/internal/embed/bountytasks/benchmark/report.a2ui.json b/internal/embed/bountytasks/benchmark/report.a2ui.json new file mode 100644 index 00000000..7cbc1140 --- /dev/null +++ b/internal/embed/bountytasks/benchmark/report.a2ui.json @@ -0,0 +1,80 @@ +{ + "$comment": "benchmark@v1 deliverable as an ordered A2UI v1.0-candidate message list (a2ui-project/a2ui, specification/v1_0). Targets the STANDARD basic catalog, so any v1.0 renderer (obol FE react renderer included) draws the scorecard — no custom catalog required. The runner fills updateDataModel with display-ready strings; machine-readable numbers live in the results.json artifact (kind: eval-report), which is the agent-facing source of truth. List rows bind relative to each /scores item.", + "messages": [ + { + "version": "v1.0", + "createSurface": { + "surfaceId": "obol-bounty-benchmark-report", + "catalogId": "https://a2ui.org/specification/v1_0/catalogs/basic/catalog.json" + } + }, + { + "version": "v1.0", + "updateComponents": { + "surfaceId": "obol-bounty-benchmark-report", + "components": [ + { "id": "root", "component": "Card", "child": "layout" }, + { + "id": "layout", "component": "Column", + "children": [ + "title", "subtitle", "verdict", "div-scores", + "scores-header", "scores-list", "div-hardware", + "hw-title", "hw-class", "hw-proof", "hw-throughput", + "div-provenance", "provenance" + ] + }, + { "id": "title", "component": "Text", "variant": "h2", "text": { "path": "/model/name" } }, + { "id": "subtitle", "component": "Text", "variant": "caption", "text": { "path": "/model/subtitle" } }, + { "id": "verdict", "component": "Text", "variant": "h3", "text": { "path": "/verdict/label" } }, + { "id": "div-scores", "component": "Divider" }, + { + "id": "scores-header", "component": "Row", + "children": ["h-task", "h-claimed", "h-verified", "h-tolerance", "h-pass"] + }, + { "id": "h-task", "component": "Text", "variant": "caption", "text": "task" }, + { "id": "h-claimed", "component": "Text", "variant": "caption", "text": "claimed" }, + { "id": "h-verified", "component": "Text", "variant": "caption", "text": "verified" }, + { "id": "h-tolerance", "component": "Text", "variant": "caption", "text": "tolerance" }, + { "id": "h-pass", "component": "Text", "variant": "caption", "text": "pass" }, + { + "id": "scores-list", "component": "List", "direction": "vertical", + "children": { "componentId": "score-row", "path": "/scores" } + }, + { + "id": "score-row", "component": "Row", + "children": ["s-task", "s-claimed", "s-verified", "s-tolerance", "s-pass"] + }, + { "id": "s-task", "component": "Text", "text": { "path": "/task" } }, + { "id": "s-claimed", "component": "Text", "text": { "path": "/claimed" } }, + { "id": "s-verified", "component": "Text", "text": { "path": "/verified" } }, + { "id": "s-tolerance", "component": "Text", "text": { "path": "/tolerance" } }, + { "id": "s-pass", "component": "Text", "text": { "path": "/pass" } }, + { "id": "div-hardware", "component": "Divider" }, + { "id": "hw-title", "component": "Text", "variant": "h3", "text": "Hardware" }, + { "id": "hw-class", "component": "Text", "text": { "path": "/hardware/class" } }, + { "id": "hw-proof", "component": "Text", "text": { "path": "/hardware/proof" } }, + { "id": "hw-throughput", "component": "Text", "text": { "path": "/hardware/throughput" } }, + { "id": "div-provenance", "component": "Divider" }, + { "id": "provenance", "component": "Text", "variant": "caption", "text": { "path": "/provenance/line" } } + ] + } + }, + { + "version": "v1.0", + "updateDataModel": { + "surfaceId": "obol-bounty-benchmark-report", + "path": "/", + "value": { + "$comment": "Filled by the runner; the shape below is the contract (display-ready placeholders).", + "model": { "name": "", "subtitle": "" }, + "verdict": { "label": "" }, + "scores": [ + { "task": "", "claimed": "", "verified": "", "tolerance": "", "pass": "" } + ], + "hardware": { "class": "", "proof": "", "throughput": "" }, + "provenance": { "line": "" } + } + } + } + ] +} diff --git a/internal/embed/bountytasks/benchmark/report.app.html b/internal/embed/bountytasks/benchmark/report.app.html new file mode 100644 index 00000000..3a0140d9 --- /dev/null +++ b/internal/embed/bountytasks/benchmark/report.app.html @@ -0,0 +1,40 @@ + + + + + + {{model.name}} — benchmark@v1 + + + +

{{model.name}} {{model.subtitle}}

+

{{verdict.label}}

+ + + {{scores.rows}} +
taskclaimedverifiedtolerancepass
+
+
hardwareClass
{{hardware.class}}
+
proof
{{hardware.proof}}
+
throughput
{{hardware.throughput}}
+
+
{{provenance.line}}
+ + diff --git a/internal/embed/bountytasks/benchmark/task.yaml b/internal/embed/bountytasks/benchmark/task.yaml new file mode 100644 index 00000000..33bd1d2d --- /dev/null +++ b/internal/embed/bountytasks/benchmark/task.yaml @@ -0,0 +1,115 @@ +# benchmark@v1 — a ServiceBounty task type. +# +# A task type is a self-describing, prepackaged unit (mirrors the dynamic +# network-install registry in internal/embed/networks/). It owns: the param +# schema that generates `obol bounty post benchmark` flags, the eval method + +# tolerance, the OBOL eval pricing, the hardware-proof policy, and the A2UI +# report schema. New task types drop in as a directory here — no CRD or CLI +# change. `enabled: false` ships a type that isn't live yet (e.g. finetune). +id: benchmark +version: 1 +runner: bench # the BountyRunner that fulfills it (host-side) +enabled: true +summary: Benchmark a model on a pinned harness; verified by independent re-run within tolerance. + +# Capability tags a fulfiller node must advertise to claim. +requires: + - benchmark + +# params → generated `obol bounty post benchmark` flags, validated against +# spec.task.params at admission. type: string|int|enum; enum lists choices. +params: + - name: tasks + type: string + default: "mmlu,gsm8k" + description: Comma-separated harness tasks to run. + - name: harness + type: string + default: "lm-eval-harness@v0.4.3" + description: Pinned eval harness (name@version), content-addressed. + - name: hardwareClass + type: string + default: any + description: Declared hardware class — free-form so specific GPUs are expressible (e.g. any, M4-Max, H100, B200, RTX-4090); informational unless hardwareProof requires more. + - name: seed + type: string + default: "1234" + description: Decode seed; pinned to shrink nondeterminism (greedy decode assumed). + - name: dtype + type: enum + enum: [fp16, bf16, fp8, int8] + default: fp16 + description: Inference dtype. + +# acceptance — benchmarks are NOT bit-exact. An independent re-run must +# reproduce the claimed score within `tolerance`. commitHash is integrity +# (anti bait-and-switch), never a determinism gate. +acceptance: + method: rerun-tolerance + commitReveal: true + tolerance: # per-metric absolute score band; poster may tighten + mmlu: "0.01" + gsm8k: "0.015" + humaneval: "0.02" + +# eval market — a SEPARATE OBOL payment leg from the reward (x402 can't splice +# a fee out of the reward auth). Evaluators are paid for the WORK, pass or fail. +eval: + defaultK: 3 # median-of-k; k>=3 whenever a probation seat is occupied + selection: vrf-reputation-weighted + payment: + asset: OBOL + perEvaluator: "2.00" + fundedBy: poster + settle: batch-settlement + # ladder — evaluator cold-start (design doc §11.4): Shadow (free, randomly + # assigned, graded against the quorum median but never counted) → Probation + # (one reserved quorum seat at ~50% pay, value-capped bounties only) → Full. + # Thresholds are per-type constants so each task type tunes its own on-ramp. + ladder: + shadowAgreements: 5 # shadow verdicts within tolerance of the quorum median → Probation + probationEvals: 10 # paid evals without divergence → Full + probationValueCap: "50.00" # reward (human units) above which no probation seat is offered + revealWindow: 10m # commit→reveal window; every commit closes before any reveal opens + nonRevealPenalty: outlier # non-reveal is graded as a worst-case outlier (>= divergence penalty) + +# hardwareProof — self-report is a reputation-backed CLAIM (forgeable text). +# Throughput-flavored bounties should require gpu-attestation or +# evaluator-measured; a score (e.g. mmlu) is hardware-agnostic so self-report +# is fine there. +hardwareProof: self-report + +# deliverable — A2UI renderings of the SAME verified result data, in +# preference order. The serving side (FE locally, the stack MCP server +# cross-party) picks the first variant whose catalogId the client advertises +# (a2ui catalog negotiation, locked per surface); no match → raw artifacts. +# declarative: operations JSON rendered natively from the client's +# compiled-in catalog — no custom code, no iframes (the lean default). +# mcp-app: MCP-Apps escape hatch for clients that don't know our catalog — +# self-contained HTML served url_encoded in a `custom` McpApp node's +# properties.content. The CLIENT supplies double-iframe isolation +# (sandbox proxy + srcdoc inner frame, never allow-same-origin); the +# server only ever returns JSON. Keep the HTML self-contained: the inner +# frame has no storage, no cookies, no same-origin access. +# Locally the agent persists the report under its hierarchy and the FE reads +# it; cross-party serves gate it behind mcp-x402 / SIWx (+payment for resale). +deliverable: + report: + variants: + - kind: declarative # A2UI v1.0-candidate messages against the + surface: report.a2ui.json # STANDARD basic catalog — any v1.0 renderer + catalogId: https://a2ui.org/specification/v1_0/catalogs/basic/catalog.json + - kind: mcp-app + surface: report.app.html # any MCP-Apps host renders this, sandboxed + catalogId: obol.org:mcp-app/v1 # domain-prefixed id (spec convention) for the McpApp custom-node rendering + gate: local # local | mcp-x402 | sign-in-with-x + artifacts: + - name: results.json + kind: eval-report + required: true + - name: run.manifest + kind: provenance + required: true + - name: hw-attestation.json + kind: hardware-proof + required: false diff --git a/internal/embed/bountytasks/finetune/report.a2ui.json b/internal/embed/bountytasks/finetune/report.a2ui.json new file mode 100644 index 00000000..c03f6e0f --- /dev/null +++ b/internal/embed/bountytasks/finetune/report.a2ui.json @@ -0,0 +1,54 @@ +{ + "$comment": "finetune@v1 deliverable as an ordered A2UI v1.0-candidate message list against the STANDARD basic catalog. STAGED with the package (enabled: false); the runner fills updateDataModel from the held-out re-eval. Metric rows bind relative to each /metrics item.", + "messages": [ + { + "version": "v1.0", + "createSurface": { + "surfaceId": "obol-bounty-finetune-report", + "catalogId": "https://a2ui.org/specification/v1_0/catalogs/basic/catalog.json" + } + }, + { + "version": "v1.0", + "updateComponents": { + "surfaceId": "obol-bounty-finetune-report", + "components": [ + { "id": "root", "component": "Card", "child": "layout" }, + { + "id": "layout", "component": "Column", + "children": ["title", "subtitle", "verdict", "div-metrics", "metrics-list", "div-provenance", "provenance"] + }, + { "id": "title", "component": "Text", "variant": "h2", "text": { "path": "/model/name" } }, + { "id": "subtitle", "component": "Text", "variant": "caption", "text": { "path": "/model/subtitle" } }, + { "id": "verdict", "component": "Text", "variant": "h3", "text": { "path": "/verdict/label" } }, + { "id": "div-metrics", "component": "Divider" }, + { + "id": "metrics-list", "component": "List", "direction": "vertical", + "children": { "componentId": "metric-row", "path": "/metrics" } + }, + { "id": "metric-row", "component": "Row", "children": ["m-name", "m-value"] }, + { "id": "m-name", "component": "Text", "text": { "path": "/name" } }, + { "id": "m-value", "component": "Text", "text": { "path": "/value" } }, + { "id": "div-provenance", "component": "Divider" }, + { "id": "provenance", "component": "Text", "variant": "caption", "text": { "path": "/provenance/line" } } + ] + } + }, + { + "version": "v1.0", + "updateDataModel": { + "surfaceId": "obol-bounty-finetune-report", + "path": "/", + "value": { + "$comment": "Filled by the runner; the shape below is the contract (display-ready placeholders).", + "model": { "name": "", "subtitle": "" }, + "verdict": { "label": "" }, + "metrics": [ + { "name": "", "value": "" } + ], + "provenance": { "line": "" } + } + } + } + ] +} diff --git a/internal/embed/bountytasks/finetune/task.yaml b/internal/embed/bountytasks/finetune/task.yaml new file mode 100644 index 00000000..02a3eb31 --- /dev/null +++ b/internal/embed/bountytasks/finetune/task.yaml @@ -0,0 +1,80 @@ +# finetune@v1 — STAGED (enabled: false). Ships in the binary so the schema and +# eval policy are reviewable, but it is not postable: `obol bounty post` only +# generates subcommands for enabled types and bounty.Resolve refuses disabled +# refs at admission. Flipped on when the MLX-LoRA runner + held-out re-eval +# verification land (fine-tunes verify by inference-only re-eval of the +# committed checkpoint — never by re-training, which is non-deterministic and +# cost-prohibitive). +id: finetune +version: 1 +runner: mlx-lora +enabled: false +summary: Fine-tune a model on a committed dataset; verified by held-out re-eval of the committed checkpoint. + +requires: + - finetune.mlx + +params: + - name: dataset + type: string + required: true + description: Content-addressed training dataset URI (e.g. ipfs://… or https://…#sha256=…). + - name: epochs + type: string + default: "3" + description: Training epochs. + - name: learningRate + type: string + default: "1e-4" + description: Learning rate. + - name: loraRank + type: string + default: "32" + description: LoRA adapter rank. + - name: hardwareClass + type: string + default: any + description: Declared hardware class — free-form so specific GPUs are expressible. + +# harness-rerun: evaluators re-eval the COMMITTED checkpoint (modelHash binds +# it at submit) on the held-out fraction; scores must land within tolerance. +acceptance: + method: harness-rerun + commitReveal: true + tolerance: + evalLoss: "0.05" + +eval: + defaultK: 3 + selection: vrf-reputation-weighted + payment: + asset: OBOL + perEvaluator: "2.00" + fundedBy: poster + settle: batch-settlement + ladder: + shadowAgreements: 5 + probationEvals: 10 + probationValueCap: "50.00" + revealWindow: 10m + nonRevealPenalty: outlier + +hardwareProof: self-report + +deliverable: + report: + variants: + - kind: declarative + surface: report.a2ui.json + catalogId: https://a2ui.org/specification/v1_0/catalogs/basic/catalog.json + gate: local + artifacts: + - name: adapter.safetensors + kind: weights + required: true + - name: eval.json + kind: eval-report + required: true + - name: run.manifest + kind: provenance + required: true diff --git a/internal/embed/embed.go b/internal/embed/embed.go index 1deb3df4..f2f1df76 100644 --- a/internal/embed/embed.go +++ b/internal/embed/embed.go @@ -29,6 +29,9 @@ var networksFS embed.FS //go:embed all:skills var skillsFS embed.FS +//go:embed all:bountytasks +var bountyTasksFS embed.FS + // InfrastructureDigest returns a stable digest of the embedded infrastructure // assets. Callers use this to decide whether an existing copied defaults tree // needs to be refreshed from the current binary. @@ -145,6 +148,40 @@ func ReadEmbeddedNetworkFile(networkName, filename string) ([]byte, error) { return content, nil } +// GetAvailableBountyTasks returns the names of all embedded ServiceBounty +// task-type packages (one directory per type under bountytasks/), e.g. +// "benchmark". Mirrors GetAvailableNetworks — drop in a directory to add a +// task type. +func GetAvailableBountyTasks() ([]string, error) { + entries, err := fs.ReadDir(bountyTasksFS, "bountytasks") + if err != nil { + return nil, fmt.Errorf("failed to read embedded bountytasks directory: %w", err) + } + + var tasks []string + + for _, entry := range entries { + if entry.IsDir() { + tasks = append(tasks, entry.Name()) + } + } + + return tasks, nil +} + +// ReadEmbeddedBountyTaskFile reads a file (e.g. "task.yaml", +// "report.a2ui.json") from an embedded task-type package. +func ReadEmbeddedBountyTaskFile(taskName, filename string) ([]byte, error) { + path := filepath.Join("bountytasks", taskName, filename) + + content, err := bountyTasksFS.ReadFile(path) + if err != nil { + return nil, fmt.Errorf("failed to read %s from bounty task %s: %w", filename, taskName, err) + } + + return content, nil +} + // ReadInfrastructureFile reads a file from the embedded infrastructure directory func ReadInfrastructureFile(path string) ([]byte, error) { content, err := infrastructureFS.ReadFile(filepath.Join("infrastructure", path)) diff --git a/internal/embed/embed_bounty_crd_parity_test.go b/internal/embed/embed_bounty_crd_parity_test.go new file mode 100644 index 00000000..7a581a48 --- /dev/null +++ b/internal/embed/embed_bounty_crd_parity_test.go @@ -0,0 +1,181 @@ +package embed + +import ( + "fmt" + "reflect" + "sort" + "strings" + "testing" + + "gopkg.in/yaml.v3" + + "github.com/ObolNetwork/obol-stack/internal/monetizeapi" +) + +// The ServiceBounty CRD manifest is hand-written (no controller-gen run), so +// every new Go field needs a matching schema property and vice versa. This +// test walks both directions: a Go json tag without a CRD property means +// kubectl silently strips the field on apply (structural-schema pruning); a +// CRD property without a Go field means stale schema the controller can never +// reconcile. spec.eval.mode was added by hand in two places — this makes that +// class of drift impossible. + +// leafTypes are struct types serialized as scalars in the CRD schema. +var leafTypes = map[string]bool{ + "v1.Time": true, + "v1.Duration": true, +} + +// collectGoPaths walks a struct type and records every reachable json path. +// Arrays descend through "[]"; maps are leaves (additionalProperties). +func collectGoPaths(t reflect.Type, prefix string, out map[string]bool) { + for t.Kind() == reflect.Pointer { + t = t.Elem() + } + if t.Kind() != reflect.Struct || leafTypes[t.String()] { + return + } + for i := 0; i < t.NumField(); i++ { + f := t.Field(i) + tag := strings.Split(f.Tag.Get("json"), ",")[0] + if tag == "" || tag == "-" { + continue + } + path := prefix + tag + out[path] = true + + ft := f.Type + for ft.Kind() == reflect.Pointer { + ft = ft.Elem() + } + switch ft.Kind() { + case reflect.Struct: + collectGoPaths(ft, path+".", out) + case reflect.Slice: + el := ft.Elem() + if el.Kind() == reflect.Struct && !leafTypes[el.String()] { + collectGoPaths(el, path+"[].", out) + } + } + } +} + +// collectSchemaPaths walks an openAPIV3Schema properties tree. +func collectSchemaPaths(schema map[string]any, prefix string, out map[string]bool) { + props, _ := schema["properties"].(map[string]any) + for name, raw := range props { + path := prefix + name + out[path] = true + node, ok := raw.(map[string]any) + if !ok { + continue + } + if items, ok := node["items"].(map[string]any); ok { + collectSchemaPaths(items, path+"[].", out) + continue + } + collectSchemaPaths(node, path+".", out) + } +} + +func loadBountySchema(t *testing.T) map[string]any { + t.Helper() + return loadCRDSchema(t, "base/templates/servicebounty-crd.yaml") +} + +func loadCRDSchema(t *testing.T, path string) map[string]any { + t.Helper() + data, err := ReadInfrastructureFile(path) + if err != nil { + t.Fatalf("ReadInfrastructureFile: %v", err) + } + var crd map[string]any + if err := yaml.Unmarshal(data, &crd); err != nil { + t.Fatalf("parse CRD: %v", err) + } + versions, _ := nested(crd, "spec", "versions").([]any) + if len(versions) == 0 { + t.Fatal("CRD has no versions") + } + v0, _ := versions[0].(map[string]any) + schema, _ := nested(v0, "schema", "openAPIV3Schema").(map[string]any) + if schema == nil { + t.Fatal("CRD has no openAPIV3Schema") + } + return schema +} + +func TestServiceBountyCRD_GoSchemaParity(t *testing.T) { + assertCRDParity(t, loadBountySchema(t), + reflect.TypeOf(monetizeapi.ServiceBountySpec{}), + reflect.TypeOf(monetizeapi.ServiceBountyStatus{})) +} + +// The EvaluatorEnrollment CRD is hand-written too — same drift class, same +// bidirectional pin. +func TestEvaluatorEnrollmentCRD_GoSchemaParity(t *testing.T) { + assertCRDParity(t, loadCRDSchema(t, "base/templates/evaluatorenrollment-crd.yaml"), + reflect.TypeOf(monetizeapi.EvaluatorEnrollmentSpec{}), + reflect.TypeOf(monetizeapi.EvaluatorEnrollmentStatus{})) +} + +func assertCRDParity(t *testing.T, schema map[string]any, specType, statusType reflect.Type) { + t.Helper() + for _, section := range []struct { + name string + goType reflect.Type + }{ + {"spec", specType}, + {"status", statusType}, + } { + sectionSchema, _ := nested(schema, "properties", section.name).(map[string]any) + if sectionSchema == nil { + t.Fatalf("CRD schema missing .%s", section.name) + } + + goPaths := map[string]bool{} + collectGoPaths(section.goType, "", goPaths) + schemaPaths := map[string]bool{} + collectSchemaPaths(sectionSchema, "", schemaPaths) + + var missing, stale []string + for p := range goPaths { + if !schemaPaths[p] { + missing = append(missing, p) + } + } + for p := range schemaPaths { + if !goPaths[p] { + stale = append(stale, p) + } + } + sort.Strings(missing) + sort.Strings(stale) + + for _, p := range missing { + t.Errorf("%s.%s exists in Go but not in the CRD schema — kubectl apply would silently prune it", section.name, p) + } + for _, p := range stale { + t.Errorf("%s.%s exists in the CRD schema but not in Go — stale property the controller can never reconcile", section.name, p) + } + } +} + +// TestServiceBountyCRD_EvalModeEnum pins the verification-gate enum: required +// must stay the default and dangerouslySkipped the only opt-out. +func TestServiceBountyCRD_EvalModeEnum(t *testing.T) { + schema := loadBountySchema(t) + mode, _ := nested(schema, "properties", "spec", "properties", "eval", "properties", "mode").(map[string]any) + if mode == nil { + t.Fatal("spec.eval.mode missing from CRD schema") + } + if d, _ := mode["default"].(string); d != monetizeapi.EvalModeRequired { + t.Errorf("spec.eval.mode default = %q, want %q (verification is on by default)", d, monetizeapi.EvalModeRequired) + } + enum, _ := mode["enum"].([]any) + got := fmt.Sprintf("%v", enum) + want := fmt.Sprintf("%v", []any{monetizeapi.EvalModeRequired, monetizeapi.EvalModeDangerouslySkipped}) + if got != want { + t.Errorf("spec.eval.mode enum = %s, want %s", got, want) + } +} diff --git a/internal/embed/embed_bounty_rbac_test.go b/internal/embed/embed_bounty_rbac_test.go new file mode 100644 index 00000000..ab22c4cf --- /dev/null +++ b/internal/embed/embed_bounty_rbac_test.go @@ -0,0 +1,119 @@ +package embed + +import ( + "strings" + "testing" +) + +// The bounty RBAC posture is a reviewed security decision (see +// plans/bounty-ane-marketplace-design.md, review fix #2): the controller gets +// cluster-wide watch/status on servicebounties, the AGENT grant is a +// NAMESPACED Role in the hermes mother namespace — never the cluster-wide +// openclaw-monetize-write ClusterRole. These tests pin that decision. + +func TestBountyRBAC_ControllerClusterRoleIncludesServiceBounties(t *testing.T) { + data, err := ReadInfrastructureFile("base/templates/x402.yaml") + if err != nil { + t.Fatalf("ReadInfrastructureFile: %v", err) + } + + docs := multiDoc(data) + var controllerRole map[string]any + for _, d := range docs { + if d["kind"] == "ClusterRole" && nested(d, "metadata", "name") == "serviceoffer-controller" { + controllerRole = d + break + } + } + if controllerRole == nil { + t.Fatal("serviceoffer-controller ClusterRole not found in x402.yaml") + } + + var hasBounties, hasBountyStatus bool + var hasEnrollments, hasEnrollmentStatus bool + var enrollmentVerbs []any + rules, _ := controllerRole["rules"].([]any) + for _, r := range rules { + rule, _ := r.(map[string]any) + resources, _ := rule["resources"].([]any) + for _, res := range resources { + switch res { + case "servicebounties": + hasBounties = true + case "servicebounties/status": + hasBountyStatus = true + case "evaluatorenrollments": + hasEnrollments = true + enrollmentVerbs, _ = rule["verbs"].([]any) + case "evaluatorenrollments/status": + hasEnrollmentStatus = true + } + } + } + if !hasBounties || !hasBountyStatus { + t.Errorf("serviceoffer-controller ClusterRole missing servicebounties (%v) or servicebounties/status (%v)", hasBounties, hasBountyStatus) + } + if !hasEnrollments || !hasEnrollmentStatus { + t.Errorf("serviceoffer-controller ClusterRole missing evaluatorenrollments (%v) or evaluatorenrollments/status (%v)", hasEnrollments, hasEnrollmentStatus) + } + // The controller READS the pool and writes ladder STATE only — it never + // creates or deletes enrollments (evaluators own their enrollment). + for _, verb := range enrollmentVerbs { + if verb == "create" || verb == "delete" { + t.Errorf("controller must not %v evaluatorenrollments — the pool is evaluator-owned", verb) + } + } +} + +func TestBountyRBAC_AgentGrantIsNamespacedNotClusterWide(t *testing.T) { + data, err := ReadInfrastructureFile("base/templates/obol-agent-monetize-rbac.yaml") + if err != nil { + t.Fatalf("ReadInfrastructureFile: %v", err) + } + + docs := multiDoc(data) + + // 1. The cluster-wide write ClusterRole must NOT mention servicebounties. + for _, d := range docs { + if d["kind"] != "ClusterRole" { + continue + } + name, _ := nested(d, "metadata", "name").(string) + rules, _ := d["rules"].([]any) + for _, r := range rules { + rule, _ := r.(map[string]any) + resources, _ := rule["resources"].([]any) + for _, res := range resources { + if s, _ := res.(string); strings.Contains(s, "servicebounties") { + t.Errorf("ClusterRole %q grants %q — bounty write must stay a namespaced Role", name, s) + } + } + } + } + + // 2. The namespaced Role exists, in the hermes mother namespace. + var role map[string]any + for _, d := range docs { + if d["kind"] == "Role" && nested(d, "metadata", "name") == "hermes-bounty-write" { + role = d + break + } + } + if role == nil { + t.Fatal("namespaced Role hermes-bounty-write not found") + } + if ns := nested(role, "metadata", "namespace"); ns != "hermes-obol-agent" { + t.Errorf("hermes-bounty-write namespace = %v, want hermes-obol-agent", ns) + } + + var binding map[string]any + for _, d := range docs { + if d["kind"] == "RoleBinding" && nested(d, "metadata", "name") == "hermes-bounty-write-binding" { + binding = d + break + } + } + if binding == nil { + t.Fatal("RoleBinding hermes-bounty-write-binding not found") + } +} diff --git a/internal/embed/embed_servicebounty_crd_test.go b/internal/embed/embed_servicebounty_crd_test.go new file mode 100644 index 00000000..a26100ba --- /dev/null +++ b/internal/embed/embed_servicebounty_crd_test.go @@ -0,0 +1,103 @@ +package embed + +import "testing" + +// ───────────────────────────────────────────────────────────────────────────── +// ServiceBounty CRD tests +// ───────────────────────────────────────────────────────────────────────────── + +func TestServiceBountyCRD_Parses(t *testing.T) { + data, err := ReadInfrastructureFile("base/templates/servicebounty-crd.yaml") + if err != nil { + t.Fatalf("ReadInfrastructureFile: %v", err) + } + + crd := findDoc(multiDoc(data), "CustomResourceDefinition") + if crd == nil { + t.Fatal("no CustomResourceDefinition document found") + } + + if got := nested(crd, "metadata", "name"); got != "servicebounties.obol.org" { + t.Errorf("metadata.name = %v, want servicebounties.obol.org", got) + } + if got := nested(crd, "spec", "group"); got != "obol.org" { + t.Errorf("spec.group = %v, want obol.org", got) + } + if got := nested(crd, "spec", "names", "kind"); got != "ServiceBounty" { + t.Errorf("spec.names.kind = %v, want ServiceBounty", got) + } + if got := nested(crd, "spec", "scope"); got != "Namespaced" { + t.Errorf("spec.scope = %v, want Namespaced", got) + } + + short, _ := nested(crd, "spec", "names", "shortNames").([]any) + found := false + for _, s := range short { + if s == "sb" { + found = true + } + } + if !found { + t.Errorf("shortNames = %v, want it to include sb", short) + } +} + +func TestServiceBountyCRD_KeyFields(t *testing.T) { + data, err := ReadInfrastructureFile("base/templates/servicebounty-crd.yaml") + if err != nil { + t.Fatalf("ReadInfrastructureFile: %v", err) + } + + crd := findDoc(multiDoc(data), "CustomResourceDefinition") + if crd == nil { + t.Fatal("no CRD doc") + } + + versions, ok := nested(crd, "spec", "versions").([]any) + if !ok || len(versions) == 0 { + t.Fatal("spec.versions missing") + } + v0, _ := versions[0].(map[string]any) + + // status subresource present (the controller patches status). + if nested(v0, "subresources", "status") == nil { + t.Error("v1alpha1 missing status subresource") + } + + specProps := nested(v0, "schema", "openAPIV3Schema", "properties", "spec", "properties") + sp, ok := specProps.(map[string]any) + if !ok { + t.Fatal("spec.properties not an object") + } + + // spec.task.typeRef is the modular task-type anchor. + if nested(sp, "task", "properties", "typeRef") == nil { + t.Error("spec.task.typeRef missing — task-type modularity anchor") + } + + // hardwareProof enum present. + hw, _ := nested(sp, "task", "properties", "hardwareProof", "enum").([]any) + if len(hw) == 0 { + t.Error("spec.task.hardwareProof enum missing") + } + + // escrow scheme enum includes the live + future rails. + scheme, _ := nested(sp, "reward", "properties", "escrow", "properties", "scheme", "enum").([]any) + var hasUpto bool + for _, s := range scheme { + if s == "upto" { + hasUpto = true + } + } + if !hasUpto { + t.Errorf("reward.escrow.scheme enum = %v, want it to include upto", scheme) + } + + // reward carries the payment envelope needed to construct the upto auth: + // the chain it settles on and the poster's refund address. + for _, f := range []string{"network", "payTo"} { + if nested(sp, "reward", "properties", f) == nil { + t.Errorf("spec.reward.%s missing — required to build the escrow authorization", f) + } + } +} diff --git a/internal/embed/infrastructure/base/templates/evaluatorenrollment-crd.yaml b/internal/embed/infrastructure/base/templates/evaluatorenrollment-crd.yaml new file mode 100644 index 00000000..ec372bd6 --- /dev/null +++ b/internal/embed/infrastructure/base/templates/evaluatorenrollment-crd.yaml @@ -0,0 +1,143 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.16.5 + name: evaluatorenrollments.obol.org +spec: + group: obol.org + names: + kind: EvaluatorEnrollment + listKind: EvaluatorEnrollmentList + plural: evaluatorenrollments + shortNames: + - ee + singular: evaluatorenrollment + scope: Namespaced + versions: + - additionalPrinterColumns: + - jsonPath: .spec.address + name: Address + type: string + - jsonPath: .metadata.creationTimestamp + name: Age + type: date + name: v1alpha1 + schema: + openAPIV3Schema: + description: EvaluatorEnrollment opts an evaluator into the eval market. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + properties: + address: + description: |- + Address is the evaluator's payout/identity address — the same address + used in eval-commit/eval-reveal annotations and bound into commitments. + pattern: ^0x[a-fA-F0-9]{40}$ + type: string + attestation: + description: |- + Attestation is the device-binding claim. v1 RECORDS it (sybil cost is + real hardware per identity once verification lands with the Secure + Enclave wiring); scheme "none" is honest-unattested. + properties: + publicKey: + description: PublicKey is the attestation public key (secure-enclave + scheme). + type: string + scheme: + description: 'Scheme: none (unattested) | secure-enclave (device-bound + P-256 key).' + enum: + - none + - secure-enclave + type: string + signature: + description: Signature is the enrollment signature over the address + (scheme-defined). + type: string + type: object + taskTypes: + description: TaskTypes this evaluator can re-run (versioned refs, + e.g. benchmark@v1). + items: + type: string + type: array + required: + - address + - taskTypes + type: object + status: + description: EvaluatorEnrollmentStatus is controller-owned ladder state. + properties: + observedGeneration: + format: int64 + type: integer + records: + description: |- + Records hold per-task-type ladder progress (reputation is per task + type — benchmark@v1 rep says nothing about finetune@v1). + items: + description: EvaluatorLadderRecord is one task type's ladder progress. + properties: + completed: + description: Completed counts all settled panel seats (any tier). + format: int64 + type: integer + divergences: + description: |- + Divergences counts settled seats graded out of band (incl. non/bad + reveals) — the negative reputation signal. + format: int64 + type: integer + probationEvals: + description: |- + ProbationEvals counts paid in-band evals while on Probation (promotion + to Full at the package threshold). + format: int64 + type: integer + recentFulfillers: + description: |- + RecentFulfillers are the last few fulfiller addresses this evaluator + judged — the pair-diversity rule down-weights repeat pairings. + items: + type: string + type: array + shadowAgreements: + description: |- + ShadowAgreements counts shadow verdicts within tolerance of the quorum + median (promotion to Probation at the task package's threshold). + format: int64 + type: integer + taskType: + type: string + tier: + description: 'Tier: Shadow | Probation | Full. New enrollments + start Shadow.' + type: string + type: object + type: array + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/internal/embed/infrastructure/base/templates/obol-agent-monetize-rbac.yaml b/internal/embed/infrastructure/base/templates/obol-agent-monetize-rbac.yaml index bf2890af..5492a12b 100644 --- a/internal/embed/infrastructure/base/templates/obol-agent-monetize-rbac.yaml +++ b/internal/embed/infrastructure/base/templates/obol-agent-monetize-rbac.yaml @@ -156,3 +156,50 @@ subjects: - kind: ServiceAccount name: openclaw namespace: openclaw-obol-agent + +--- +#------------------------------------------------------------------------------ +# Role (NAMESPACED) - ServiceBounty demand-side write, hermes mother ns only +# +# Deliberately NOT added to the cluster-wide openclaw-monetize-write +# ClusterRole: that would hand every agent write access to every namespace's +# bounties (and therefore their claim/submit/verdict annotation channel and +# escrow lifecycle). The poster agent only ever needs to manage bounties in +# its own namespace. See plans/bounty-ane-marketplace-design.md (review fix #2). +#------------------------------------------------------------------------------ +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: hermes-bounty-write + namespace: hermes-obol-agent +rules: + - apiGroups: ["obol.org"] + resources: ["servicebounties"] + verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] + - apiGroups: ["obol.org"] + resources: ["servicebounties/status"] + verbs: ["get"] + # Evaluator enrollment is namespaced for the same reason bounty writes are: + # an agent manages its OWN enrollment, never the cluster's pool. Ladder + # state (status) stays controller-owned — read-only here. + - apiGroups: ["obol.org"] + resources: ["evaluatorenrollments"] + verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] + - apiGroups: ["obol.org"] + resources: ["evaluatorenrollments/status"] + verbs: ["get"] + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: hermes-bounty-write-binding + namespace: hermes-obol-agent +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: hermes-bounty-write +subjects: + - kind: ServiceAccount + name: hermes + namespace: hermes-obol-agent diff --git a/internal/embed/infrastructure/base/templates/servicebounty-crd.yaml b/internal/embed/infrastructure/base/templates/servicebounty-crd.yaml new file mode 100644 index 00000000..211c0348 --- /dev/null +++ b/internal/embed/infrastructure/base/templates/servicebounty-crd.yaml @@ -0,0 +1,508 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.16.5 + name: servicebounties.obol.org +spec: + group: obol.org + names: + kind: ServiceBounty + listKind: ServiceBountyList + plural: servicebounties + shortNames: + - sb + singular: servicebounty + scope: Namespaced + versions: + - additionalPrinterColumns: + - jsonPath: .spec.task.typeRef + name: Task + type: string + - jsonPath: .spec.reward.amount + name: Reward + type: string + - jsonPath: .spec.eval.mode + name: Verification + type: string + - jsonPath: .status.phase + name: Phase + type: string + - jsonPath: .metadata.creationTimestamp + name: Age + type: date + name: v1alpha1 + schema: + openAPIV3Schema: + description: |- + ServiceBounty declares a unit of paid work (benchmark, fine-tune, serve, …) + with an escrowed reward released on an accepted verdict. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + properties: + acceptance: + description: |- + Acceptance is how a submission is judged. Defaults come from the task + type; the poster may tighten them. + properties: + commitReveal: + description: |- + CommitReveal requires evaluators to commit then reveal scores, so they + can't pre-agree on a number. + type: boolean + method: + description: |- + Method judges a submission. Benchmarks are NOT bit-exact: rerun-tolerance + re-runs the harness and accepts a score within tolerance. The commitHash + is integrity (anti bait-and-switch), not a determinism gate. + enum: + - rerun-tolerance + - harness-rerun + - sla-probe + - poster-manual + type: string + tolerance: + additionalProperties: + type: string + description: Tolerance per metric (e.g. {"mmlu":"0.01"}). Default + from the task type. + type: object + type: object + deadline: + description: 'Deadline: past it with no accepted verdict → Expired + → Refunded.' + format: date-time + type: string + eval: + description: |- + Eval configures the OBOL-paid evaluation market (a SEPARATE payment leg + from the reward — x402 cannot splice a fee out of the reward auth). + properties: + k: + default: 1 + description: |- + K evaluators: median-of-k quorum; k≥3 whenever a probation seat is + occupied (the median absorbs one outlier). + format: int64 + type: integer + mode: + default: required + description: |- + Mode gates verification. 'required' (default) routes acceptance through + the evaluator quorum once the eval market is wired — until then a poster + verdict is recorded as PosterOverride. 'dangerouslySkipped' declares + poster-as-judge up front: same override path, but the bounty is marked + unverified and produces no reputation signal. + enum: + - required + - dangerouslySkipped + type: string + payment: + description: Payment for evaluators — a separate leg from the + reward. + properties: + asset: + default: OBOL + description: Asset defaults to OBOL (verification is an OBOL + utility sink). + type: string + fundedBy: + default: poster + description: 'FundedBy: ''poster'' (separate poster-funded + eval budget).' + type: string + perEvaluator: + description: PerEvaluator fee (human units). + type: string + settle: + default: batch-settlement + description: 'Settle: ''batch-settlement'' pays all K evaluators + in one tx.' + type: string + type: object + selection: + description: |- + Selection: VRF-sampled after submission, reputation-weighted; the poster + cannot hand-pick. + enum: + - vrf-reputation-weighted + - poster-manual + type: string + type: object + maxFulfillers: + default: 1 + description: 'MaxFulfillers: 1 = single-winner (default); >1 = first-N-valid + paid.' + format: int64 + type: integer + reward: + description: Reward is the escrowed payment released to the fulfiller + on acceptance. + properties: + amount: + description: Amount is the lump-sum reward (human units, e.g. + "500.00"). + type: string + asset: + description: Asset reuses ServiceOffer's asset shape (USDC eip3009 + / OBOL permit2). + properties: + address: + description: ERC-20 contract address. + pattern: ^0x[0-9a-fA-F]{40}$ + type: string + decimals: + description: Token decimals in atomic units. + format: int64 + maximum: 255 + minimum: 0 + type: integer + eip712Name: + description: EIP-712 domain name used by the token. + type: string + eip712Version: + description: EIP-712 domain version used by the token. + type: string + symbol: + description: Human-friendly token symbol (e.g. USDC, OBOL). + type: string + transferMethod: + description: x402 transfer method for the asset. + enum: + - eip3009 + - permit2 + type: string + type: object + escrow: + description: Escrow selects the x402 settlement rail + reputation-driven + mode. + properties: + facilitator: + description: |- + Facilitator URL (our own facilitator acts as the bounded settlement + trigger; payTo is signed into the auth so it can never redirect funds). + type: string + mode: + description: |- + Mode is selected by the fulfiller's reputation: 'auto' (optimistic), + 'facilitator-check' (deterministic re-run), 'onchain-lock' (authCapture). + enum: + - auto + - facilitator-check + - onchain-lock + type: string + scheme: + description: |- + Scheme: 'upto' (live: facilitator holds a recipient-bound auth, settles + ≤ max) or 'authCapture' (funds-locked, used above valueCap once the Go + impl lands — x402-foundation/x402#2298). + enum: + - upto + - authCapture + type: string + valueCapMicros: + description: 'ValueCapMicros: above this the escrow must use + an on-chain lock.' + type: string + type: object + network: + description: Payment network (e.g. "base", "base-sepolia"). + type: string + payTo: + description: |- + PayTo is the poster's address: the escrow-return / refund destination. + The fulfiller payout address is bound at claim time (witness.to in the + upto auth), not here. + pattern: ^0x[a-fA-F0-9]{40}$ + type: string + type: object + task: + description: |- + Task describes the work. spec.task.typeRef selects an embedded, + versioned task-type package; spec.task.params is validated against + that package's schema at admission. + properties: + datasetCommit: + description: |- + DatasetCommit pins the eval dataset (committed root + the fraction kept + private so a public re-run can't leak answers / enable train-on-test). + properties: + privateFraction: + description: |- + PrivateFraction (0..1, as a string to keep schema stable) of rows kept + secret and revealed only to sampled evaluators at eval time. + type: string + root: + description: Root is a Merkle root committing the (partially + private) eval dataset. + type: string + type: object + hardwareProof: + description: |- + HardwareProof strength required of the fulfiller. self-report is a + reputation-backed claim (forgeable); gpu-attestation is cryptographic + (NVIDIA CC / enclave-binding); evaluator-measured moves the throughput + measurement onto attested evaluator hardware. + enum: + - self-report + - gpu-attestation + - evaluator-measured + type: string + params: + additionalProperties: + type: string + description: Free-form knobs validated against the task type's + param schema. + type: object + targetModel: + description: Target model metadata (reuses ServiceOffer's model + shape). + properties: + name: + description: Model identifier (e.g. qwen3.5:35b). + type: string + runtime: + description: Runtime serving the model. + enum: + - ollama + - vllm + - tgi + type: string + required: + - name + - runtime + type: object + typeRef: + description: TypeRef resolves an embedded task-type package, e.g. + "benchmark@v1". + type: string + required: + - typeRef + type: object + trust: + description: |- + Trust selects the reputation gate + optional refundable self-bond. No + validator stake, no slashing — reputation (lost future income) is the + only collateral. + properties: + reputationGate: + description: |- + ReputationGate derives the fulfiller's maxBountyValue from ERC-8004 + getSummary (read with a curated, trusted client filter). + type: boolean + selfBond: + description: |- + SelfBond is an OPTIONAL refundable bond the fulfiller posts from their + OWN funds (returned on success). It is never slashed to a validator set. + properties: + amount: + type: string + required: + type: boolean + token: + type: string + type: object + type: object + required: + - reward + - task + type: object + status: + description: |- + ServiceBountyStatus mirrors the AND-rollup condition idiom used by + ServiceOffer. Machine truth is the condition set; Phase is the human rollup. + properties: + bondState: + description: |- + BondState tracks the fulfiller self-bond at the escrow gateway: + Reserved | Returned (success/honest timeout) | Forfeited (rejected work, + offsets the poster's burned eval budget). + type: string + captureTxHash: + description: CaptureTxHash / RefundTxHash record the settled reward + or refund. + type: string + claims: + description: |- + Claims are observed fulfiller bindings (single-winner is the common case, + so claims live in status, not a separate CR). + items: + properties: + claimedAt: + format: date-time + type: string + commitHash: + description: |- + CommitHash binds the worker to a specific model + outputs (anti + bait-and-switch), revealed at submit. + type: string + fulfillerAddress: + type: string + phase: + description: 'Phase: Claimed | Submitted | Verified | Rejected.' + type: string + type: object + type: array + conditions: + items: + properties: + lastTransitionTime: + description: Last time the condition transitioned. + format: date-time + type: string + message: + description: Human-readable message with details. + type: string + reason: + description: Machine-readable reason for the condition. + type: string + status: + description: Status of the condition. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: Condition type. + type: string + required: + - status + - type + type: object + type: array + escrowState: + description: 'EscrowState: Reserved | Captured | Voided (held auth + at the facilitator).' + type: string + evalBudgetState: + description: |- + EvalBudgetState tracks the poster-funded OBOL eval budget + (k × perEvaluator) at the escrow gateway: Reserved | Captured | Voided. + Evaluators are paid for the WORK, pass or fail. + type: string + evalPayoutTxHash: + description: EvalPayoutTxHash records the batch-settlement receipt + for the eval leg. + type: string + evaluations: + description: |- + Evaluations are the eval-market verdicts promoted from the + obol.org/eval-commit- / eval-reveal- annotation channel. + items: + description: |- + ServiceBountyEvaluation is one evaluator's commit-reveal record. WithinBand + is the per-bounty ladder bookkeeping hook: divergence from the quorum median + (or a missing/invalid reveal) is what future reputation feedback keys on. + properties: + address: + description: Address is the evaluator's payout/identity address + (annotation key suffix). + type: string + commitHash: + description: CommitHash = EvalCommitHash(score, salt, address), + promoted first-write-wins. + type: string + paid: + description: |- + Paid marks inclusion in the eval-budget batch settlement (counting + seats that revealed validly; shadows evaluate free). + type: boolean + phase: + description: 'Phase: Committed | Revealed | BadReveal | NonReveal.' + type: string + revealedAt: + description: RevealedAt records when a valid reveal was promoted. + format: date-time + type: string + score: + description: Score is the revealed 0-100 verdict (ERC-8004 validationResponse + semantics). + format: int64 + type: integer + seat: + description: |- + Seat mirrors the panel seat kind (full | probation | shadow); empty in + open-door mode. + type: string + validationTxHash: + description: |- + ValidationTxHash is the evaluator-submitted ERC-8004 validationResponse + transaction, recorded as provenance (the evaluator's OWN wallet signs; + the controller never does). + type: string + withinBand: + description: |- + WithinBand is false for NonReveal/BadReveal and for revealed scores + outside the outlier band around the quorum median. + type: boolean + type: object + type: array + evaluatorPanel: + description: |- + EvaluatorPanel is the controller-selected seat assignment (deterministic + per-bounty sampling from enrolled evaluators). Empty panel = open-door + fallback (insufficient pool) — any address may evaluate, as in early v1. + items: + description: ServiceBountyPanelSeat is one selected evaluator seat. + properties: + address: + description: Address is the enrolled evaluator's address. + type: string + seat: + description: 'Seat: full | probation | shadow.' + type: string + type: object + type: array + ladderRecorded: + description: |- + LadderRecorded latches the one-shot cross-bounty ladder bookkeeping so + repeated reconciles after quorum never double-count. + type: boolean + observedGeneration: + format: int64 + type: integer + phase: + type: string + refundTxHash: + type: string + reportURI: + description: ReportURI points at the SIWx/local-gated A2UI report + (deliverable). + type: string + revealDeadline: + description: |- + RevealDeadline opens once K commitments are in: every commit closes + before any reveal opens, and a missing reveal past this instant is + graded as a worst-case outlier (nonRevealPenalty). + format: date-time + type: string + weightedScore: + description: WeightedScore is the reputation-weighted eval verdict + (0-100). + format: int64 + type: integer + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/internal/embed/infrastructure/base/templates/x402.yaml b/internal/embed/infrastructure/base/templates/x402.yaml index d3209cec..7c28f02a 100644 --- a/internal/embed/infrastructure/base/templates/x402.yaml +++ b/internal/embed/infrastructure/base/templates/x402.yaml @@ -130,6 +130,24 @@ rules: - apiGroups: ["obol.org"] resources: ["purchaserequests/status"] verbs: ["get", "update", "patch"] + # ServiceBounty demand-side reconcile: watch + finalizer updates + status. + # The controller never CREATES bounties (posters do), and the bounty pass + # creates no routes/Middleware/Secrets — a bounty must never become ingress. + - apiGroups: ["obol.org"] + resources: ["servicebounties"] + verbs: ["get", "list", "watch", "update", "patch"] + - apiGroups: ["obol.org"] + resources: ["servicebounties/status"] + verbs: ["get", "update", "patch"] + # Evaluator pool: the controller READS enrollments for panel selection and + # writes only the controller-owned ladder STATE (status). It never creates + # or deletes enrollments — evaluators do. + - apiGroups: ["obol.org"] + resources: ["evaluatorenrollments"] + verbs: ["get", "list", "watch"] + - apiGroups: ["obol.org"] + resources: ["evaluatorenrollments/status"] + verbs: ["get", "update", "patch"] - apiGroups: ["obol.org"] resources: ["agents"] verbs: ["get", "list", "watch", "update", "patch"] diff --git a/internal/erc8004/abi.go b/internal/erc8004/abi.go index 7598181c..be1de094 100644 --- a/internal/erc8004/abi.go +++ b/internal/erc8004/abi.go @@ -16,7 +16,13 @@ const ( // ReputationRegistryBaseSepolia is the ERC-8004 Reputation Registry on Base Sepolia. ReputationRegistryBaseSepolia = "0x8004B663056A597Dffe9eCcC1965A193B7388713" - // ValidationRegistryBaseSepolia is the ERC-8004 Validation Registry on Base Sepolia. + // ValidationRegistryBaseSepolia is the v1.0.0 ERC-8004 Validation Registry + // address from the pre-v2 draft. + // + // Deprecated: this address has NO CODE on Base Sepolia (it was an Ethereum + // Sepolia deployment). Use ValidationRegistryAddress(network) / + // ValidationRegistryV2BaseSepolia (validation.go) — verified on-chain, + // getVersion()=="2.0.0". ValidationRegistryBaseSepolia = "0x8004CB39f29c09145F24Ad9dDe2A108C1A2cdfC5" // DefaultRPCBase is the default JSON-RPC base URL the controller uses to diff --git a/internal/erc8004/reputation.go b/internal/erc8004/reputation.go new file mode 100644 index 00000000..a9765289 --- /dev/null +++ b/internal/erc8004/reputation.go @@ -0,0 +1,437 @@ +package erc8004 + +// ERC-8004 Reputation Registry (v2.0.0) calldata builders and read helpers. +// +// IMPORTANT — signing model: the serviceoffer/servicebounty controller NEVER +// signs feedback transactions. Client agents submit giveFeedback (and +// revokeFeedback) with THEIR OWN wallets; agent operators submit +// appendResponse with theirs. This package only builds calldata and reads +// recorded feedback. +// +// Function signatures verified against: +// - Spec: https://eips.ethereum.org/EIPS/eip-8004 (Reputation Registry) +// - Reference impl + official ABI: +// https://github.com/erc-8004/erc-8004-contracts +// (abis/ReputationRegistry.json, contracts/ReputationRegistryUpgradeable.sol, +// getVersion() == "2.0.0") +// +// giveFeedback(uint256 agentId, int128 value, uint8 valueDecimals, string tag1, string tag2, string endpoint, string feedbackURI, bytes32 feedbackHash) +// revokeFeedback(uint256 agentId, uint64 feedbackIndex) +// appendResponse(uint256 agentId, address clientAddress, uint64 feedbackIndex, string responseURI, bytes32 responseHash) +// getSummary(uint256 agentId, address[] clientAddresses, string tag1, string tag2) -> (uint64 count, int128 summaryValue, uint8 summaryValueDecimals) +// readFeedback(uint256 agentId, address clientAddress, uint64 feedbackIndex) -> (int128, uint8, string, string, bool) +// getLastIndex(uint256 agentId, address clientAddress) -> uint64 +// getClients(uint256 agentId) -> address[] + +import ( + "context" + _ "embed" + "fmt" + "math/big" + "strings" + "sync" + + "github.com/ethereum/go-ethereum/accounts/abi" + "github.com/ethereum/go-ethereum/accounts/abi/bind" + "github.com/ethereum/go-ethereum/common" +) + +//go:embed reputation_registry.abi.json +var reputationRegistryABI string + +// ReputationRegistryMainnet is the ERC-8004 v2.0.0 Reputation Registry on +// Ethereum mainnet and Base mainnet (deployed at the same address via +// CREATE2). The Base Sepolia deployment is the existing +// ReputationRegistryBaseSepolia constant in abi.go. +// Source: https://github.com/erc-8004/erc-8004-contracts README + +// scripts/addresses.ts; on-chain: code present on both chains, +// getVersion() == "2.0.0". +const ReputationRegistryMainnet = "0x8004BAa17C55a88189AE136b182e5fdA19dE9b63" + +// MaxFeedbackValueDecimals is the maximum valueDecimals accepted by +// giveFeedback. The contract reverts with "too many decimals" above this. +const MaxFeedbackValueDecimals = 18 + +// maxFeedbackAbsValue mirrors the contract's MAX_ABS_VALUE = 1e38 bound on +// the int128 feedback value. +var maxFeedbackAbsValue = new(big.Int).Exp(big.NewInt(10), big.NewInt(38), nil) + +var ( + reputationABIOnce sync.Once + reputationABIParsed abi.ABI + reputationABIErr error +) + +// reputationABI lazily parses the embedded Reputation Registry ABI once. +func reputationABI() (abi.ABI, error) { + reputationABIOnce.Do(func() { + reputationABIParsed, reputationABIErr = abi.JSON(strings.NewReader(reputationRegistryABI)) + }) + if reputationABIErr != nil { + return abi.ABI{}, fmt.Errorf("erc8004: parse reputation registry abi: %w", reputationABIErr) + } + return reputationABIParsed, nil +} + +// ReputationRegistryAddress maps a supported network name to the deployed +// ERC-8004 v2.0.0 Reputation Registry address. It accepts the same aliases +// as ResolveNetwork. Networks without an on-chain-verified deployment return +// an error rather than a guessed address. +func ReputationRegistryAddress(network string) (string, error) { + net, err := ResolveNetwork(network) + if err != nil { + return "", fmt.Errorf("erc8004: reputation registry: %w", err) + } + switch net.Name { + case BaseSepolia.Name: + return ReputationRegistryBaseSepolia, nil + case Base.Name, Ethereum.Name: + return ReputationRegistryMainnet, nil + default: + return "", fmt.Errorf("erc8004: no verified reputation registry deployment for network %q", net.Name) + } +} + +// EncodeGiveFeedback builds calldata for +// giveFeedback(uint256,int128,uint8,string,string,string,string,bytes32). +// value is a fixed-point score scaled by 10^valueDecimals (|value| <= 1e38, +// valueDecimals <= 18). The transaction must be submitted by the client +// agent's own wallet — the contract forbids self-feedback from the agent's +// owner/operators, and the controller never signs. tag1, tag2, endpoint, +// feedbackURI, and feedbackHash are optional per spec and may be zero values. +func EncodeGiveFeedback(agentID *big.Int, value *big.Int, valueDecimals uint8, tag1, tag2, endpoint, feedbackURI string, feedbackHash common.Hash) ([]byte, error) { + if err := checkAgentID(agentID); err != nil { + return nil, err + } + if value == nil { + return nil, fmt.Errorf("erc8004: feedback value must not be nil") + } + if value.CmpAbs(maxFeedbackAbsValue) > 0 { + return nil, fmt.Errorf("erc8004: feedback value %s out of range [-1e38, 1e38]", value) + } + if valueDecimals > MaxFeedbackValueDecimals { + return nil, fmt.Errorf("erc8004: valueDecimals %d out of range [0,%d]", valueDecimals, MaxFeedbackValueDecimals) + } + + parsed, err := reputationABI() + if err != nil { + return nil, err + } + data, err := parsed.Pack("giveFeedback", agentID, value, valueDecimals, tag1, tag2, endpoint, feedbackURI, feedbackHash) + if err != nil { + return nil, fmt.Errorf("erc8004: pack giveFeedback: %w", err) + } + return data, nil +} + +// EncodeRevokeFeedback builds calldata for revokeFeedback(uint256,uint64). +// Must be submitted by the wallet that gave the feedback. Feedback indices +// are 1-based. +func EncodeRevokeFeedback(agentID *big.Int, feedbackIndex uint64) ([]byte, error) { + if err := checkAgentID(agentID); err != nil { + return nil, err + } + if feedbackIndex == 0 { + return nil, fmt.Errorf("erc8004: feedbackIndex must be > 0 (indices are 1-based)") + } + + parsed, err := reputationABI() + if err != nil { + return nil, err + } + data, err := parsed.Pack("revokeFeedback", agentID, feedbackIndex) + if err != nil { + return nil, fmt.Errorf("erc8004: pack revokeFeedback: %w", err) + } + return data, nil +} + +// EncodeAppendResponse builds calldata for +// appendResponse(uint256,address,uint64,string,bytes32) — an on-chain reply +// to existing feedback. Submitted by the responder's own wallet. +func EncodeAppendResponse(agentID *big.Int, clientAddress common.Address, feedbackIndex uint64, responseURI string, responseHash common.Hash) ([]byte, error) { + if err := checkAgentID(agentID); err != nil { + return nil, err + } + if clientAddress == (common.Address{}) { + return nil, fmt.Errorf("erc8004: clientAddress must not be the zero address") + } + if feedbackIndex == 0 { + return nil, fmt.Errorf("erc8004: feedbackIndex must be > 0 (indices are 1-based)") + } + if responseURI == "" { + return nil, fmt.Errorf("erc8004: responseURI must not be empty") + } + + parsed, err := reputationABI() + if err != nil { + return nil, err + } + data, err := parsed.Pack("appendResponse", agentID, clientAddress, feedbackIndex, responseURI, responseHash) + if err != nil { + return nil, fmt.Errorf("erc8004: pack appendResponse: %w", err) + } + return data, nil +} + +// GiveFeedbackCall is the decoded argument set of a giveFeedback call. +type GiveFeedbackCall struct { + AgentID *big.Int + Value *big.Int + ValueDecimals uint8 + Tag1 string + Tag2 string + Endpoint string + FeedbackURI string + FeedbackHash common.Hash +} + +// DecodeGiveFeedbackCalldata decodes giveFeedback calldata (selector + +// ABI-encoded args). Useful for provenance checks on observed transactions +// and for tests. +func DecodeGiveFeedbackCalldata(data []byte) (GiveFeedbackCall, error) { + parsed, err := reputationABI() + if err != nil { + return GiveFeedbackCall{}, err + } + values, err := unpackCalldata(parsed, "giveFeedback", data) + if err != nil { + return GiveFeedbackCall{}, err + } + if len(values) != 8 { + return GiveFeedbackCall{}, fmt.Errorf("erc8004: giveFeedback arg count = %d, want 8", len(values)) + } + + out := GiveFeedbackCall{} + var ok bool + if out.AgentID, ok = values[0].(*big.Int); !ok { + return GiveFeedbackCall{}, fmt.Errorf("erc8004: agentId type = %T", values[0]) + } + if out.Value, ok = values[1].(*big.Int); !ok { + return GiveFeedbackCall{}, fmt.Errorf("erc8004: value type = %T", values[1]) + } + if out.ValueDecimals, ok = values[2].(uint8); !ok { + return GiveFeedbackCall{}, fmt.Errorf("erc8004: valueDecimals type = %T", values[2]) + } + if out.Tag1, ok = values[3].(string); !ok { + return GiveFeedbackCall{}, fmt.Errorf("erc8004: tag1 type = %T", values[3]) + } + if out.Tag2, ok = values[4].(string); !ok { + return GiveFeedbackCall{}, fmt.Errorf("erc8004: tag2 type = %T", values[4]) + } + if out.Endpoint, ok = values[5].(string); !ok { + return GiveFeedbackCall{}, fmt.Errorf("erc8004: endpoint type = %T", values[5]) + } + if out.FeedbackURI, ok = values[6].(string); !ok { + return GiveFeedbackCall{}, fmt.Errorf("erc8004: feedbackURI type = %T", values[6]) + } + hash, ok := values[7].([32]byte) + if !ok { + return GiveFeedbackCall{}, fmt.Errorf("erc8004: feedbackHash type = %T", values[7]) + } + out.FeedbackHash = common.Hash(hash) + return out, nil +} + +// RevokeFeedbackCall is the decoded argument set of a revokeFeedback call. +type RevokeFeedbackCall struct { + AgentID *big.Int + FeedbackIndex uint64 +} + +// DecodeRevokeFeedbackCalldata decodes revokeFeedback calldata. +func DecodeRevokeFeedbackCalldata(data []byte) (RevokeFeedbackCall, error) { + parsed, err := reputationABI() + if err != nil { + return RevokeFeedbackCall{}, err + } + values, err := unpackCalldata(parsed, "revokeFeedback", data) + if err != nil { + return RevokeFeedbackCall{}, err + } + if len(values) != 2 { + return RevokeFeedbackCall{}, fmt.Errorf("erc8004: revokeFeedback arg count = %d, want 2", len(values)) + } + + out := RevokeFeedbackCall{} + var ok bool + if out.AgentID, ok = values[0].(*big.Int); !ok { + return RevokeFeedbackCall{}, fmt.Errorf("erc8004: agentId type = %T", values[0]) + } + if out.FeedbackIndex, ok = values[1].(uint64); !ok { + return RevokeFeedbackCall{}, fmt.Errorf("erc8004: feedbackIndex type = %T", values[1]) + } + return out, nil +} + +// AppendResponseCall is the decoded argument set of an appendResponse call. +type AppendResponseCall struct { + AgentID *big.Int + ClientAddress common.Address + FeedbackIndex uint64 + ResponseURI string + ResponseHash common.Hash +} + +// DecodeAppendResponseCalldata decodes appendResponse calldata. +func DecodeAppendResponseCalldata(data []byte) (AppendResponseCall, error) { + parsed, err := reputationABI() + if err != nil { + return AppendResponseCall{}, err + } + values, err := unpackCalldata(parsed, "appendResponse", data) + if err != nil { + return AppendResponseCall{}, err + } + if len(values) != 5 { + return AppendResponseCall{}, fmt.Errorf("erc8004: appendResponse arg count = %d, want 5", len(values)) + } + + out := AppendResponseCall{} + var ok bool + if out.AgentID, ok = values[0].(*big.Int); !ok { + return AppendResponseCall{}, fmt.Errorf("erc8004: agentId type = %T", values[0]) + } + if out.ClientAddress, ok = values[1].(common.Address); !ok { + return AppendResponseCall{}, fmt.Errorf("erc8004: clientAddress type = %T", values[1]) + } + if out.FeedbackIndex, ok = values[2].(uint64); !ok { + return AppendResponseCall{}, fmt.Errorf("erc8004: feedbackIndex type = %T", values[2]) + } + if out.ResponseURI, ok = values[3].(string); !ok { + return AppendResponseCall{}, fmt.Errorf("erc8004: responseURI type = %T", values[3]) + } + hash, ok := values[4].([32]byte) + if !ok { + return AppendResponseCall{}, fmt.Errorf("erc8004: responseHash type = %T", values[4]) + } + out.ResponseHash = common.Hash(hash) + return out, nil +} + +// FeedbackSummary mirrors the reputation getSummary return values. The +// aggregate score is SummaryValue scaled by 10^-SummaryValueDecimals. +type FeedbackSummary struct { + Count uint64 + SummaryValue *big.Int + SummaryValueDecimals uint8 +} + +// FeedbackEntry mirrors readFeedback return values. +type FeedbackEntry struct { + Value *big.Int + ValueDecimals uint8 + Tag1 string + Tag2 string + IsRevoked bool +} + +// ReputationReader provides read-only access to a Reputation Registry. The +// controller uses it to observe recorded feedback; it holds no signer. +type ReputationReader struct { + contract *bind.BoundContract +} + +// NewReputationReader binds a read-only Reputation Registry at +// registryAddress. caller is typically (*erc8004.Client).ETH() or any +// *ethclient.Client. +func NewReputationReader(caller bind.ContractCaller, registryAddress string) (*ReputationReader, error) { + if caller == nil { + return nil, fmt.Errorf("erc8004: reputation reader: caller must not be nil") + } + if !common.IsHexAddress(registryAddress) { + return nil, fmt.Errorf("erc8004: reputation reader: invalid registry address %q", registryAddress) + } + parsed, err := reputationABI() + if err != nil { + return nil, err + } + return &ReputationReader{ + contract: bind.NewBoundContract(common.HexToAddress(registryAddress), parsed, caller, nil, nil), + }, nil +} + +// Summary reads getSummary(agentId, clientAddresses, tag1, tag2). +func (r *ReputationReader) Summary(ctx context.Context, agentID *big.Int, clientAddresses []common.Address, tag1, tag2 string) (FeedbackSummary, error) { + if err := checkAgentID(agentID); err != nil { + return FeedbackSummary{}, err + } + if clientAddresses == nil { + clientAddresses = []common.Address{} + } + var out []interface{} + if err := r.contract.Call(&bind.CallOpts{Context: ctx}, &out, "getSummary", agentID, clientAddresses, tag1, tag2); err != nil { + return FeedbackSummary{}, fmt.Errorf("erc8004: reputation getSummary: %w", err) + } + if len(out) != 3 { + return FeedbackSummary{}, fmt.Errorf("erc8004: reputation getSummary returned %d values, want 3", len(out)) + } + + summary := FeedbackSummary{} + var ok bool + if summary.Count, ok = out[0].(uint64); !ok { + return FeedbackSummary{}, fmt.Errorf("erc8004: reputation getSummary count type = %T", out[0]) + } + if summary.SummaryValue, ok = out[1].(*big.Int); !ok { + return FeedbackSummary{}, fmt.Errorf("erc8004: reputation getSummary summaryValue type = %T", out[1]) + } + if summary.SummaryValueDecimals, ok = out[2].(uint8); !ok { + return FeedbackSummary{}, fmt.Errorf("erc8004: reputation getSummary summaryValueDecimals type = %T", out[2]) + } + return summary, nil +} + +// ReadFeedback reads readFeedback(agentId, clientAddress, feedbackIndex). +// Feedback indices are 1-based. +func (r *ReputationReader) ReadFeedback(ctx context.Context, agentID *big.Int, clientAddress common.Address, feedbackIndex uint64) (FeedbackEntry, error) { + if err := checkAgentID(agentID); err != nil { + return FeedbackEntry{}, err + } + var out []interface{} + if err := r.contract.Call(&bind.CallOpts{Context: ctx}, &out, "readFeedback", agentID, clientAddress, feedbackIndex); err != nil { + return FeedbackEntry{}, fmt.Errorf("erc8004: readFeedback: %w", err) + } + if len(out) != 5 { + return FeedbackEntry{}, fmt.Errorf("erc8004: readFeedback returned %d values, want 5", len(out)) + } + + entry := FeedbackEntry{} + var ok bool + if entry.Value, ok = out[0].(*big.Int); !ok { + return FeedbackEntry{}, fmt.Errorf("erc8004: readFeedback value type = %T", out[0]) + } + if entry.ValueDecimals, ok = out[1].(uint8); !ok { + return FeedbackEntry{}, fmt.Errorf("erc8004: readFeedback valueDecimals type = %T", out[1]) + } + if entry.Tag1, ok = out[2].(string); !ok { + return FeedbackEntry{}, fmt.Errorf("erc8004: readFeedback tag1 type = %T", out[2]) + } + if entry.Tag2, ok = out[3].(string); !ok { + return FeedbackEntry{}, fmt.Errorf("erc8004: readFeedback tag2 type = %T", out[3]) + } + if entry.IsRevoked, ok = out[4].(bool); !ok { + return FeedbackEntry{}, fmt.Errorf("erc8004: readFeedback isRevoked type = %T", out[4]) + } + return entry, nil +} + +// LastIndex reads getLastIndex(agentId, clientAddress) — the most recent +// (1-based) feedback index the client has submitted for the agent; 0 when +// none. +func (r *ReputationReader) LastIndex(ctx context.Context, agentID *big.Int, clientAddress common.Address) (uint64, error) { + if err := checkAgentID(agentID); err != nil { + return 0, err + } + var out []interface{} + if err := r.contract.Call(&bind.CallOpts{Context: ctx}, &out, "getLastIndex", agentID, clientAddress); err != nil { + return 0, fmt.Errorf("erc8004: getLastIndex: %w", err) + } + if len(out) != 1 { + return 0, fmt.Errorf("erc8004: getLastIndex returned %d values, want 1", len(out)) + } + idx, ok := out[0].(uint64) + if !ok { + return 0, fmt.Errorf("erc8004: getLastIndex type = %T", out[0]) + } + return idx, nil +} diff --git a/internal/erc8004/reputation_registry.abi.json b/internal/erc8004/reputation_registry.abi.json new file mode 100644 index 00000000..9948315b --- /dev/null +++ b/internal/erc8004/reputation_registry.abi.json @@ -0,0 +1,391 @@ +[ + { + "inputs": [ + { + "internalType": "uint256", + "name": "agentId", + "type": "uint256" + }, + { + "internalType": "int128", + "name": "value", + "type": "int128" + }, + { + "internalType": "uint8", + "name": "valueDecimals", + "type": "uint8" + }, + { + "internalType": "string", + "name": "tag1", + "type": "string" + }, + { + "internalType": "string", + "name": "tag2", + "type": "string" + }, + { + "internalType": "string", + "name": "endpoint", + "type": "string" + }, + { + "internalType": "string", + "name": "feedbackURI", + "type": "string" + }, + { + "internalType": "bytes32", + "name": "feedbackHash", + "type": "bytes32" + } + ], + "name": "giveFeedback", + "outputs": [], + "stateMutability": "nonpayable", + "type": "function" + }, + { + "inputs": [ + { + "internalType": "uint256", + "name": "agentId", + "type": "uint256" + }, + { + "internalType": "uint64", + "name": "feedbackIndex", + "type": "uint64" + } + ], + "name": "revokeFeedback", + "outputs": [], + "stateMutability": "nonpayable", + "type": "function" + }, + { + "inputs": [ + { + "internalType": "uint256", + "name": "agentId", + "type": "uint256" + }, + { + "internalType": "address", + "name": "clientAddress", + "type": "address" + }, + { + "internalType": "uint64", + "name": "feedbackIndex", + "type": "uint64" + }, + { + "internalType": "string", + "name": "responseURI", + "type": "string" + }, + { + "internalType": "bytes32", + "name": "responseHash", + "type": "bytes32" + } + ], + "name": "appendResponse", + "outputs": [], + "stateMutability": "nonpayable", + "type": "function" + }, + { + "inputs": [ + { + "internalType": "uint256", + "name": "agentId", + "type": "uint256" + }, + { + "internalType": "address[]", + "name": "clientAddresses", + "type": "address[]" + }, + { + "internalType": "string", + "name": "tag1", + "type": "string" + }, + { + "internalType": "string", + "name": "tag2", + "type": "string" + } + ], + "name": "getSummary", + "outputs": [ + { + "internalType": "uint64", + "name": "count", + "type": "uint64" + }, + { + "internalType": "int128", + "name": "summaryValue", + "type": "int128" + }, + { + "internalType": "uint8", + "name": "summaryValueDecimals", + "type": "uint8" + } + ], + "stateMutability": "view", + "type": "function" + }, + { + "inputs": [ + { + "internalType": "uint256", + "name": "agentId", + "type": "uint256" + }, + { + "internalType": "address", + "name": "clientAddress", + "type": "address" + }, + { + "internalType": "uint64", + "name": "feedbackIndex", + "type": "uint64" + } + ], + "name": "readFeedback", + "outputs": [ + { + "internalType": "int128", + "name": "value", + "type": "int128" + }, + { + "internalType": "uint8", + "name": "valueDecimals", + "type": "uint8" + }, + { + "internalType": "string", + "name": "tag1", + "type": "string" + }, + { + "internalType": "string", + "name": "tag2", + "type": "string" + }, + { + "internalType": "bool", + "name": "isRevoked", + "type": "bool" + } + ], + "stateMutability": "view", + "type": "function" + }, + { + "inputs": [ + { + "internalType": "uint256", + "name": "agentId", + "type": "uint256" + }, + { + "internalType": "address", + "name": "clientAddress", + "type": "address" + } + ], + "name": "getLastIndex", + "outputs": [ + { + "internalType": "uint64", + "name": "", + "type": "uint64" + } + ], + "stateMutability": "view", + "type": "function" + }, + { + "inputs": [ + { + "internalType": "uint256", + "name": "agentId", + "type": "uint256" + } + ], + "name": "getClients", + "outputs": [ + { + "internalType": "address[]", + "name": "", + "type": "address[]" + } + ], + "stateMutability": "view", + "type": "function" + }, + { + "inputs": [], + "name": "getIdentityRegistry", + "outputs": [ + { + "internalType": "address", + "name": "", + "type": "address" + } + ], + "stateMutability": "view", + "type": "function" + }, + { + "anonymous": false, + "inputs": [ + { + "indexed": true, + "internalType": "uint256", + "name": "agentId", + "type": "uint256" + }, + { + "indexed": true, + "internalType": "address", + "name": "clientAddress", + "type": "address" + }, + { + "indexed": false, + "internalType": "uint64", + "name": "feedbackIndex", + "type": "uint64" + }, + { + "indexed": false, + "internalType": "int128", + "name": "value", + "type": "int128" + }, + { + "indexed": false, + "internalType": "uint8", + "name": "valueDecimals", + "type": "uint8" + }, + { + "indexed": true, + "internalType": "string", + "name": "indexedTag1", + "type": "string" + }, + { + "indexed": false, + "internalType": "string", + "name": "tag1", + "type": "string" + }, + { + "indexed": false, + "internalType": "string", + "name": "tag2", + "type": "string" + }, + { + "indexed": false, + "internalType": "string", + "name": "endpoint", + "type": "string" + }, + { + "indexed": false, + "internalType": "string", + "name": "feedbackURI", + "type": "string" + }, + { + "indexed": false, + "internalType": "bytes32", + "name": "feedbackHash", + "type": "bytes32" + } + ], + "name": "NewFeedback", + "type": "event" + }, + { + "anonymous": false, + "inputs": [ + { + "indexed": true, + "internalType": "uint256", + "name": "agentId", + "type": "uint256" + }, + { + "indexed": true, + "internalType": "address", + "name": "clientAddress", + "type": "address" + }, + { + "indexed": true, + "internalType": "uint64", + "name": "feedbackIndex", + "type": "uint64" + } + ], + "name": "FeedbackRevoked", + "type": "event" + }, + { + "anonymous": false, + "inputs": [ + { + "indexed": true, + "internalType": "uint256", + "name": "agentId", + "type": "uint256" + }, + { + "indexed": true, + "internalType": "address", + "name": "clientAddress", + "type": "address" + }, + { + "indexed": false, + "internalType": "uint64", + "name": "feedbackIndex", + "type": "uint64" + }, + { + "indexed": true, + "internalType": "address", + "name": "responder", + "type": "address" + }, + { + "indexed": false, + "internalType": "string", + "name": "responseURI", + "type": "string" + }, + { + "indexed": false, + "internalType": "bytes32", + "name": "responseHash", + "type": "bytes32" + } + ], + "name": "ResponseAppended", + "type": "event" + } +] diff --git a/internal/erc8004/reputation_test.go b/internal/erc8004/reputation_test.go new file mode 100644 index 00000000..a225b5e0 --- /dev/null +++ b/internal/erc8004/reputation_test.go @@ -0,0 +1,410 @@ +package erc8004 + +import ( + "context" + "encoding/hex" + "math/big" + "strings" + "testing" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/crypto" +) + +func TestReputationABI_Parses(t *testing.T) { + if _, err := reputationABI(); err != nil { + t.Fatalf("embedded reputation ABI failed to parse: %v", err) + } +} + +// TestReputationABI_SelectorGoldenValues pins the 4-byte selectors of the +// verified v2.0.0 signatures (spec: https://eips.ethereum.org/EIPS/eip-8004; +// ABI: https://github.com/erc-8004/erc-8004-contracts). Each golden value is +// cross-checked against keccak256 of the canonical signature string and the +// parsed ABI method. +func TestReputationABI_SelectorGoldenValues(t *testing.T) { + parsed, err := reputationABI() + if err != nil { + t.Fatal(err) + } + + tests := []struct { + method string + sig string + selector string + }{ + {"giveFeedback", "giveFeedback(uint256,int128,uint8,string,string,string,string,bytes32)", "3c036a7e"}, + {"revokeFeedback", "revokeFeedback(uint256,uint64)", "4ab3ca99"}, + {"appendResponse", "appendResponse(uint256,address,uint64,string,bytes32)", "c2349ab2"}, + {"getSummary", "getSummary(uint256,address[],string,string)", "81bbba58"}, + {"readFeedback", "readFeedback(uint256,address,uint64)", "232b0810"}, + {"getLastIndex", "getLastIndex(uint256,address)", "f2d81759"}, + {"getClients", "getClients(uint256)", "42dd519c"}, + {"getIdentityRegistry", "getIdentityRegistry()", "bc4d861b"}, + } + + for _, tt := range tests { + t.Run(tt.method, func(t *testing.T) { + m, ok := parsed.Methods[tt.method] + if !ok { + t.Fatalf("method %q missing from parsed ABI", tt.method) + } + if m.Sig != tt.sig { + t.Errorf("signature = %q, want %q", m.Sig, tt.sig) + } + if got := hex.EncodeToString(m.ID); got != tt.selector { + t.Errorf("parsed selector = 0x%s, want 0x%s", got, tt.selector) + } + if got := hex.EncodeToString(crypto.Keccak256([]byte(tt.sig))[:4]); got != tt.selector { + t.Errorf("keccak256(%q)[:4] = 0x%s, want 0x%s", tt.sig, got, tt.selector) + } + }) + } +} + +func TestReputationABI_EventsPresent(t *testing.T) { + parsed, err := reputationABI() + if err != nil { + t.Fatal(err) + } + for _, name := range []string{"NewFeedback", "FeedbackRevoked", "ResponseAppended"} { + if _, ok := parsed.Events[name]; !ok { + t.Errorf("missing event %q in parsed ABI", name) + } + } +} + +func TestEncodeGiveFeedback_RoundTrip(t *testing.T) { + agentID := big.NewInt(42) + value := big.NewInt(-875) // -87.5 with valueDecimals=1 + feedbackHash := crypto.Keccak256Hash([]byte("feedback payload")) + + data, err := EncodeGiveFeedback(agentID, value, 1, "code-review", "go", "https://agent.example/v1", "ipfs://bafy.../fb.json", feedbackHash) + if err != nil { + t.Fatalf("EncodeGiveFeedback: %v", err) + } + if got := hex.EncodeToString(data[:4]); got != "3c036a7e" { + t.Errorf("selector = 0x%s, want 0x3c036a7e", got) + } + + decoded, err := DecodeGiveFeedbackCalldata(data) + if err != nil { + t.Fatalf("DecodeGiveFeedbackCalldata: %v", err) + } + if decoded.AgentID.Cmp(agentID) != 0 { + t.Errorf("agentId = %s, want %s", decoded.AgentID, agentID) + } + if decoded.Value.Cmp(value) != 0 { + t.Errorf("value = %s, want %s", decoded.Value, value) + } + if decoded.ValueDecimals != 1 { + t.Errorf("valueDecimals = %d, want 1", decoded.ValueDecimals) + } + if decoded.Tag1 != "code-review" || decoded.Tag2 != "go" { + t.Errorf("tags = (%q, %q), want (code-review, go)", decoded.Tag1, decoded.Tag2) + } + if decoded.Endpoint != "https://agent.example/v1" { + t.Errorf("endpoint = %q", decoded.Endpoint) + } + if decoded.FeedbackURI != "ipfs://bafy.../fb.json" { + t.Errorf("feedbackURI = %q", decoded.FeedbackURI) + } + if decoded.FeedbackHash != feedbackHash { + t.Errorf("feedbackHash = %s, want %s", decoded.FeedbackHash, feedbackHash) + } +} + +func TestEncodeRevokeFeedback_RoundTrip(t *testing.T) { + data, err := EncodeRevokeFeedback(big.NewInt(42), 7) + if err != nil { + t.Fatalf("EncodeRevokeFeedback: %v", err) + } + if got := hex.EncodeToString(data[:4]); got != "4ab3ca99" { + t.Errorf("selector = 0x%s, want 0x4ab3ca99", got) + } + + decoded, err := DecodeRevokeFeedbackCalldata(data) + if err != nil { + t.Fatalf("DecodeRevokeFeedbackCalldata: %v", err) + } + if decoded.AgentID.Cmp(big.NewInt(42)) != 0 || decoded.FeedbackIndex != 7 { + t.Errorf("decoded = %+v, want agentId=42 feedbackIndex=7", decoded) + } +} + +func TestEncodeAppendResponse_RoundTrip(t *testing.T) { + client := common.HexToAddress("0x4444444444444444444444444444444444444444") + respHash := crypto.Keccak256Hash([]byte("response payload")) + + data, err := EncodeAppendResponse(big.NewInt(42), client, 7, "ipfs://bafy.../resp.json", respHash) + if err != nil { + t.Fatalf("EncodeAppendResponse: %v", err) + } + if got := hex.EncodeToString(data[:4]); got != "c2349ab2" { + t.Errorf("selector = 0x%s, want 0xc2349ab2", got) + } + + decoded, err := DecodeAppendResponseCalldata(data) + if err != nil { + t.Fatalf("DecodeAppendResponseCalldata: %v", err) + } + if decoded.AgentID.Cmp(big.NewInt(42)) != 0 { + t.Errorf("agentId = %s, want 42", decoded.AgentID) + } + if decoded.ClientAddress != client { + t.Errorf("clientAddress = %s, want %s", decoded.ClientAddress, client) + } + if decoded.FeedbackIndex != 7 { + t.Errorf("feedbackIndex = %d, want 7", decoded.FeedbackIndex) + } + if decoded.ResponseURI != "ipfs://bafy.../resp.json" { + t.Errorf("responseURI = %q", decoded.ResponseURI) + } + if decoded.ResponseHash != respHash { + t.Errorf("responseHash = %s, want %s", decoded.ResponseHash, respHash) + } +} + +func TestEncodeGiveFeedback_BadInput(t *testing.T) { + hash := crypto.Keccak256Hash([]byte("x")) + overMax := new(big.Int).Add(maxFeedbackAbsValue, big.NewInt(1)) + underMin := new(big.Int).Neg(overMax) + + tests := []struct { + name string + fn func() ([]byte, error) + }{ + {"nil agentId", func() ([]byte, error) { + return EncodeGiveFeedback(nil, big.NewInt(1), 0, "", "", "", "", hash) + }}, + {"negative agentId", func() ([]byte, error) { + return EncodeGiveFeedback(big.NewInt(-1), big.NewInt(1), 0, "", "", "", "", hash) + }}, + {"nil value", func() ([]byte, error) { + return EncodeGiveFeedback(big.NewInt(1), nil, 0, "", "", "", "", hash) + }}, + {"value over 1e38", func() ([]byte, error) { + return EncodeGiveFeedback(big.NewInt(1), overMax, 0, "", "", "", "", hash) + }}, + {"value under -1e38", func() ([]byte, error) { + return EncodeGiveFeedback(big.NewInt(1), underMin, 0, "", "", "", "", hash) + }}, + {"valueDecimals 19", func() ([]byte, error) { + return EncodeGiveFeedback(big.NewInt(1), big.NewInt(1), 19, "", "", "", "", hash) + }}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if _, err := tt.fn(); err == nil { + t.Error("expected error, got nil") + } + }) + } + + // Boundary values must be accepted. + if _, err := EncodeGiveFeedback(big.NewInt(1), maxFeedbackAbsValue, MaxFeedbackValueDecimals, "", "", "", "", common.Hash{}); err != nil { + t.Errorf("value 1e38, decimals 18 should be accepted: %v", err) + } + if _, err := EncodeGiveFeedback(big.NewInt(1), new(big.Int).Neg(maxFeedbackAbsValue), 0, "", "", "", "", common.Hash{}); err != nil { + t.Errorf("value -1e38 should be accepted: %v", err) + } +} + +func TestEncodeRevokeFeedback_BadInput(t *testing.T) { + if _, err := EncodeRevokeFeedback(nil, 1); err == nil { + t.Error("nil agentId: expected error") + } + if _, err := EncodeRevokeFeedback(big.NewInt(1), 0); err == nil { + t.Error("feedbackIndex 0: expected error") + } +} + +func TestEncodeAppendResponse_BadInput(t *testing.T) { + client := common.HexToAddress("0x4444444444444444444444444444444444444444") + if _, err := EncodeAppendResponse(nil, client, 1, "u", common.Hash{}); err == nil { + t.Error("nil agentId: expected error") + } + if _, err := EncodeAppendResponse(big.NewInt(1), common.Address{}, 1, "u", common.Hash{}); err == nil { + t.Error("zero clientAddress: expected error") + } + if _, err := EncodeAppendResponse(big.NewInt(1), client, 0, "u", common.Hash{}); err == nil { + t.Error("feedbackIndex 0: expected error") + } + if _, err := EncodeAppendResponse(big.NewInt(1), client, 1, "", common.Hash{}); err == nil { + t.Error("empty responseURI: expected error") + } +} + +func TestDecodeReputationCalldata_Errors(t *testing.T) { + t.Run("too short", func(t *testing.T) { + if _, err := DecodeGiveFeedbackCalldata([]byte{0x3c}); err == nil { + t.Error("expected error for short calldata") + } + }) + + t.Run("wrong selector", func(t *testing.T) { + data, err := EncodeRevokeFeedback(big.NewInt(1), 1) + if err != nil { + t.Fatal(err) + } + if _, err := DecodeGiveFeedbackCalldata(data); err == nil { + t.Error("expected selector mismatch error") + } else if !strings.Contains(err.Error(), "selector mismatch") { + t.Errorf("error = %v, want selector mismatch", err) + } + }) + + t.Run("truncated args", func(t *testing.T) { + data, err := EncodeGiveFeedback(big.NewInt(1), big.NewInt(50), 0, "t1", "t2", "e", "u", common.Hash{}) + if err != nil { + t.Fatal(err) + } + // Cut the entire trailing dynamic section so the feedbackURI offset + // points past the end of the payload. + if _, err := DecodeGiveFeedbackCalldata(data[:len(data)-96]); err == nil { + t.Error("expected error for truncated calldata") + } + }) +} + +func TestReputationRegistryAddress(t *testing.T) { + tests := []struct { + network string + want string + wantErr bool + }{ + {"base-sepolia", ReputationRegistryBaseSepolia, false}, + {"base", ReputationRegistryMainnet, false}, + {"base-mainnet", ReputationRegistryMainnet, false}, + {"ethereum", ReputationRegistryMainnet, false}, + {"mainnet", ReputationRegistryMainnet, false}, + {"solana", "", true}, + {"", "", true}, + } + for _, tt := range tests { + t.Run(tt.network, func(t *testing.T) { + got, err := ReputationRegistryAddress(tt.network) + if tt.wantErr { + if err == nil { + t.Errorf("expected error for %q, got address %s", tt.network, got) + } + return + } + if err != nil { + t.Fatalf("ReputationRegistryAddress(%q): %v", tt.network, err) + } + if got != tt.want { + t.Errorf("address = %s, want %s", got, tt.want) + } + }) + } +} + +func TestNewReputationReader_BadInput(t *testing.T) { + if _, err := NewReputationReader(nil, ReputationRegistryBaseSepolia); err == nil { + t.Error("nil caller: expected error") + } + if _, err := NewReputationReader(&stubCaller{}, "0xZZ"); err == nil { + t.Error("bad address: expected error") + } +} + +func TestReputationReader_Summary(t *testing.T) { + parsed, err := reputationABI() + if err != nil { + t.Fatal(err) + } + ret, err := parsed.Methods["getSummary"].Outputs.Pack(uint64(12), big.NewInt(925), uint8(1)) + if err != nil { + t.Fatal(err) + } + + caller := &stubCaller{ret: ret} + reader, err := NewReputationReader(caller, ReputationRegistryBaseSepolia) + if err != nil { + t.Fatal(err) + } + + summary, err := reader.Summary(context.Background(), big.NewInt(42), nil, "code-review", "") + if err != nil { + t.Fatalf("Summary: %v", err) + } + if summary.Count != 12 { + t.Errorf("count = %d, want 12", summary.Count) + } + if summary.SummaryValue.Cmp(big.NewInt(925)) != 0 { + t.Errorf("summaryValue = %s, want 925", summary.SummaryValue) + } + if summary.SummaryValueDecimals != 1 { + t.Errorf("summaryValueDecimals = %d, want 1", summary.SummaryValueDecimals) + } + + wantData, err := parsed.Pack("getSummary", big.NewInt(42), []common.Address{}, "code-review", "") + if err != nil { + t.Fatal(err) + } + if hex.EncodeToString(caller.lastCall.Data) != hex.EncodeToString(wantData) { + t.Errorf("call data = 0x%x, want 0x%x", caller.lastCall.Data, wantData) + } + + if _, err := reader.Summary(context.Background(), nil, nil, "", ""); err == nil { + t.Error("nil agentId: expected error") + } +} + +func TestReputationReader_ReadFeedback(t *testing.T) { + parsed, err := reputationABI() + if err != nil { + t.Fatal(err) + } + ret, err := parsed.Methods["readFeedback"].Outputs.Pack(big.NewInt(-50), uint8(0), "code-review", "go", true) + if err != nil { + t.Fatal(err) + } + + reader, err := NewReputationReader(&stubCaller{ret: ret}, ReputationRegistryBaseSepolia) + if err != nil { + t.Fatal(err) + } + + entry, err := reader.ReadFeedback(context.Background(), big.NewInt(42), common.HexToAddress("0x4444444444444444444444444444444444444444"), 3) + if err != nil { + t.Fatalf("ReadFeedback: %v", err) + } + if entry.Value.Cmp(big.NewInt(-50)) != 0 { + t.Errorf("value = %s, want -50", entry.Value) + } + if entry.ValueDecimals != 0 { + t.Errorf("valueDecimals = %d, want 0", entry.ValueDecimals) + } + if entry.Tag1 != "code-review" || entry.Tag2 != "go" { + t.Errorf("tags = (%q, %q)", entry.Tag1, entry.Tag2) + } + if !entry.IsRevoked { + t.Error("isRevoked = false, want true") + } +} + +func TestReputationReader_LastIndex(t *testing.T) { + parsed, err := reputationABI() + if err != nil { + t.Fatal(err) + } + ret, err := parsed.Methods["getLastIndex"].Outputs.Pack(uint64(9)) + if err != nil { + t.Fatal(err) + } + + reader, err := NewReputationReader(&stubCaller{ret: ret}, ReputationRegistryBaseSepolia) + if err != nil { + t.Fatal(err) + } + + idx, err := reader.LastIndex(context.Background(), big.NewInt(42), common.HexToAddress("0x4444444444444444444444444444444444444444")) + if err != nil { + t.Fatalf("LastIndex: %v", err) + } + if idx != 9 { + t.Errorf("lastIndex = %d, want 9", idx) + } +} diff --git a/internal/erc8004/validation.go b/internal/erc8004/validation.go new file mode 100644 index 00000000..8aa46a8b --- /dev/null +++ b/internal/erc8004/validation.go @@ -0,0 +1,401 @@ +package erc8004 + +// ERC-8004 Validation Registry (v2.0.0) calldata builders and read helpers. +// +// IMPORTANT — signing model: the serviceoffer/servicebounty controller NEVER +// signs validation transactions. Poster agents submit validationRequest and +// evaluator agents submit validationResponse with THEIR OWN wallets; this +// package only builds calldata for them and reads/records results on-chain. +// +// Function signatures verified against: +// - Spec: https://eips.ethereum.org/EIPS/eip-8004 (Validation Registry) +// - Reference impl + official ABI: +// https://github.com/erc-8004/erc-8004-contracts +// (abis/ValidationRegistry.json, contracts/ValidationRegistryUpgradeable.sol, +// getVersion() == "2.0.0") +// +// validationRequest(address validatorAddress, uint256 agentId, string requestURI, bytes32 requestHash) +// validationResponse(bytes32 requestHash, uint8 response, string responseURI, bytes32 responseHash, string tag) +// getValidationStatus(bytes32 requestHash) -> (address, uint256, uint8, bytes32, string, uint256) +// getSummary(uint256 agentId, address[] validatorAddresses, string tag) -> (uint64 count, uint8 avgResponse) +// getAgentValidations(uint256 agentId) -> bytes32[] +// getValidatorRequests(address validatorAddress) -> bytes32[] + +import ( + "bytes" + "context" + _ "embed" + "fmt" + "math/big" + "strings" + "sync" + + "github.com/ethereum/go-ethereum/accounts/abi" + "github.com/ethereum/go-ethereum/accounts/abi/bind" + "github.com/ethereum/go-ethereum/common" +) + +//go:embed validation_registry.abi.json +var validationRegistryABI string + +const ( + // ValidationRegistryV2BaseSepolia is the ERC-8004 v2.0.0 Validation + // Registry on Base Sepolia (CREATE2 vanity proxy, same address on all + // supported testnets). + // + // NOTE: this intentionally differs from the legacy + // ValidationRegistryBaseSepolia constant in abi.go + // (0x8004CB39f29c09145F24Ad9dDe2A108C1A2cdfC5): that address has NO code + // on Base Sepolia — it is a v1.0.0 deployment that only exists on + // Ethereum Sepolia (verified via eth_getCode + getVersion(), 2026-06-10). + // Source: https://github.com/erc-8004/erc-8004-contracts + // (scripts/addresses.ts TESTNET_ADDRESSES.validationRegistry); on-chain: + // getVersion() == "2.0.0", getIdentityRegistry() == + // IdentityRegistryBaseSepolia. + ValidationRegistryV2BaseSepolia = "0x8004Cb1BF31DAf7788923b405b754f57acEB4272" + + // ValidationRegistryV2Mainnet is the ERC-8004 v2.0.0 Validation Registry + // on Ethereum mainnet and Base mainnet (deployed at the same address via + // CREATE2). Source: https://github.com/erc-8004/erc-8004-contracts + // (scripts/addresses.ts MAINNET_ADDRESSES.validationRegistry); on-chain: + // code present on both chains, getVersion() == "2.0.0", + // getIdentityRegistry() == IdentityRegistryMainnet. + ValidationRegistryV2Mainnet = "0x8004Cc8439f36fd5F9F049D9fF86523Df6dAAB58" + + // MaxValidationResponse is the maximum validationResponse score. The + // contract reverts with "resp>100" above this. + MaxValidationResponse = 100 +) + +var ( + validationABIOnce sync.Once + validationABIParsed abi.ABI + validationABIErr error +) + +// validationABI lazily parses the embedded Validation Registry ABI once. +func validationABI() (abi.ABI, error) { + validationABIOnce.Do(func() { + validationABIParsed, validationABIErr = abi.JSON(strings.NewReader(validationRegistryABI)) + }) + if validationABIErr != nil { + return abi.ABI{}, fmt.Errorf("erc8004: parse validation registry abi: %w", validationABIErr) + } + return validationABIParsed, nil +} + +// ValidationRegistryAddress maps a supported network name to the deployed +// ERC-8004 v2.0.0 Validation Registry address. It accepts the same aliases as +// ResolveNetwork. Networks without an on-chain-verified deployment return an +// error rather than a guessed address. +func ValidationRegistryAddress(network string) (string, error) { + net, err := ResolveNetwork(network) + if err != nil { + return "", fmt.Errorf("erc8004: validation registry: %w", err) + } + switch net.Name { + case BaseSepolia.Name: + return ValidationRegistryV2BaseSepolia, nil + case Base.Name, Ethereum.Name: + return ValidationRegistryV2Mainnet, nil + default: + return "", fmt.Errorf("erc8004: no verified validation registry deployment for network %q", net.Name) + } +} + +// checkAgentID rejects agent ids that cannot be ABI-encoded as uint256. +func checkAgentID(agentID *big.Int) error { + if agentID == nil { + return fmt.Errorf("erc8004: agentId must not be nil") + } + if agentID.Sign() < 0 { + return fmt.Errorf("erc8004: agentId must not be negative (got %s)", agentID) + } + if agentID.BitLen() > 256 { + return fmt.Errorf("erc8004: agentId does not fit in uint256") + } + return nil +} + +// unpackCalldata verifies the 4-byte selector against the named method and +// unpacks the argument payload. +func unpackCalldata(parsed abi.ABI, name string, data []byte) ([]interface{}, error) { + method, ok := parsed.Methods[name] + if !ok { + return nil, fmt.Errorf("erc8004: method %q not in ABI", name) + } + if len(data) < 4 { + return nil, fmt.Errorf("erc8004: calldata too short (%d bytes, need at least 4)", len(data)) + } + if !bytes.Equal(data[:4], method.ID) { + return nil, fmt.Errorf("erc8004: selector mismatch: got 0x%x, want 0x%x (%s)", data[:4], method.ID, method.Sig) + } + values, err := method.Inputs.Unpack(data[4:]) + if err != nil { + return nil, fmt.Errorf("erc8004: unpack %s calldata: %w", name, err) + } + return values, nil +} + +// EncodeValidationRequest builds calldata for +// validationRequest(address,uint256,string,bytes32). The transaction must be +// submitted by the owner or an approved operator of agentId (the poster +// agent's own wallet) — never by the controller. +func EncodeValidationRequest(validatorAddress common.Address, agentID *big.Int, requestURI string, requestHash common.Hash) ([]byte, error) { + if validatorAddress == (common.Address{}) { + return nil, fmt.Errorf("erc8004: validatorAddress must not be the zero address") + } + if err := checkAgentID(agentID); err != nil { + return nil, err + } + if requestHash == (common.Hash{}) { + return nil, fmt.Errorf("erc8004: requestHash must not be the zero hash") + } + + parsed, err := validationABI() + if err != nil { + return nil, err + } + data, err := parsed.Pack("validationRequest", validatorAddress, agentID, requestURI, requestHash) + if err != nil { + return nil, fmt.Errorf("erc8004: pack validationRequest: %w", err) + } + return data, nil +} + +// EncodeValidationResponse builds calldata for +// validationResponse(bytes32,uint8,string,bytes32,string). response is the +// 0-100 score; the transaction must be submitted by the validator address +// named in the matching validationRequest (the evaluator's own wallet) — +// never by the controller. responseURI, responseHash, and tag are optional +// per spec and may be zero values. +func EncodeValidationResponse(requestHash common.Hash, response uint8, responseURI string, responseHash common.Hash, tag string) ([]byte, error) { + if requestHash == (common.Hash{}) { + return nil, fmt.Errorf("erc8004: requestHash must not be the zero hash") + } + if response > MaxValidationResponse { + return nil, fmt.Errorf("erc8004: response %d out of range [0,%d]", response, MaxValidationResponse) + } + + parsed, err := validationABI() + if err != nil { + return nil, err + } + data, err := parsed.Pack("validationResponse", requestHash, response, responseURI, responseHash, tag) + if err != nil { + return nil, fmt.Errorf("erc8004: pack validationResponse: %w", err) + } + return data, nil +} + +// ValidationRequestCall is the decoded argument set of a validationRequest call. +type ValidationRequestCall struct { + ValidatorAddress common.Address + AgentID *big.Int + RequestURI string + RequestHash common.Hash +} + +// DecodeValidationRequestCalldata decodes validationRequest calldata +// (selector + ABI-encoded args). Useful for provenance checks on observed +// transactions and for tests. +func DecodeValidationRequestCalldata(data []byte) (ValidationRequestCall, error) { + parsed, err := validationABI() + if err != nil { + return ValidationRequestCall{}, err + } + values, err := unpackCalldata(parsed, "validationRequest", data) + if err != nil { + return ValidationRequestCall{}, err + } + if len(values) != 4 { + return ValidationRequestCall{}, fmt.Errorf("erc8004: validationRequest arg count = %d, want 4", len(values)) + } + + out := ValidationRequestCall{} + var ok bool + if out.ValidatorAddress, ok = values[0].(common.Address); !ok { + return ValidationRequestCall{}, fmt.Errorf("erc8004: validatorAddress type = %T", values[0]) + } + if out.AgentID, ok = values[1].(*big.Int); !ok { + return ValidationRequestCall{}, fmt.Errorf("erc8004: agentId type = %T", values[1]) + } + if out.RequestURI, ok = values[2].(string); !ok { + return ValidationRequestCall{}, fmt.Errorf("erc8004: requestURI type = %T", values[2]) + } + hash, ok := values[3].([32]byte) + if !ok { + return ValidationRequestCall{}, fmt.Errorf("erc8004: requestHash type = %T", values[3]) + } + out.RequestHash = common.Hash(hash) + return out, nil +} + +// ValidationResponseCall is the decoded argument set of a validationResponse call. +type ValidationResponseCall struct { + RequestHash common.Hash + Response uint8 + ResponseURI string + ResponseHash common.Hash + Tag string +} + +// DecodeValidationResponseCalldata decodes validationResponse calldata +// (selector + ABI-encoded args). Useful for provenance checks on observed +// evaluator transactions and for tests. +func DecodeValidationResponseCalldata(data []byte) (ValidationResponseCall, error) { + parsed, err := validationABI() + if err != nil { + return ValidationResponseCall{}, err + } + values, err := unpackCalldata(parsed, "validationResponse", data) + if err != nil { + return ValidationResponseCall{}, err + } + if len(values) != 5 { + return ValidationResponseCall{}, fmt.Errorf("erc8004: validationResponse arg count = %d, want 5", len(values)) + } + + out := ValidationResponseCall{} + reqHash, ok := values[0].([32]byte) + if !ok { + return ValidationResponseCall{}, fmt.Errorf("erc8004: requestHash type = %T", values[0]) + } + out.RequestHash = common.Hash(reqHash) + if out.Response, ok = values[1].(uint8); !ok { + return ValidationResponseCall{}, fmt.Errorf("erc8004: response type = %T", values[1]) + } + if out.ResponseURI, ok = values[2].(string); !ok { + return ValidationResponseCall{}, fmt.Errorf("erc8004: responseURI type = %T", values[2]) + } + respHash, ok := values[3].([32]byte) + if !ok { + return ValidationResponseCall{}, fmt.Errorf("erc8004: responseHash type = %T", values[3]) + } + out.ResponseHash = common.Hash(respHash) + if out.Tag, ok = values[4].(string); !ok { + return ValidationResponseCall{}, fmt.Errorf("erc8004: tag type = %T", values[4]) + } + return out, nil +} + +// ValidationStatus mirrors getValidationStatus(bytes32) return values. +type ValidationStatus struct { + ValidatorAddress common.Address + AgentID *big.Int + Response uint8 + ResponseHash common.Hash + Tag string + LastUpdate *big.Int +} + +// ValidationReader provides read-only access to a Validation Registry. The +// controller uses it to observe evaluator responses; it holds no signer. +type ValidationReader struct { + contract *bind.BoundContract +} + +// NewValidationReader binds a read-only Validation Registry at +// registryAddress. caller is typically (*erc8004.Client).ETH() or any +// *ethclient.Client. +func NewValidationReader(caller bind.ContractCaller, registryAddress string) (*ValidationReader, error) { + if caller == nil { + return nil, fmt.Errorf("erc8004: validation reader: caller must not be nil") + } + if !common.IsHexAddress(registryAddress) { + return nil, fmt.Errorf("erc8004: validation reader: invalid registry address %q", registryAddress) + } + parsed, err := validationABI() + if err != nil { + return nil, err + } + return &ValidationReader{ + contract: bind.NewBoundContract(common.HexToAddress(registryAddress), parsed, caller, nil, nil), + }, nil +} + +// ValidationStatus reads getValidationStatus(requestHash). +func (r *ValidationReader) ValidationStatus(ctx context.Context, requestHash common.Hash) (ValidationStatus, error) { + var out []interface{} + if err := r.contract.Call(&bind.CallOpts{Context: ctx}, &out, "getValidationStatus", requestHash); err != nil { + return ValidationStatus{}, fmt.Errorf("erc8004: getValidationStatus: %w", err) + } + if len(out) != 6 { + return ValidationStatus{}, fmt.Errorf("erc8004: getValidationStatus returned %d values, want 6", len(out)) + } + + status := ValidationStatus{} + var ok bool + if status.ValidatorAddress, ok = out[0].(common.Address); !ok { + return ValidationStatus{}, fmt.Errorf("erc8004: getValidationStatus validatorAddress type = %T", out[0]) + } + if status.AgentID, ok = out[1].(*big.Int); !ok { + return ValidationStatus{}, fmt.Errorf("erc8004: getValidationStatus agentId type = %T", out[1]) + } + if status.Response, ok = out[2].(uint8); !ok { + return ValidationStatus{}, fmt.Errorf("erc8004: getValidationStatus response type = %T", out[2]) + } + respHash, ok := out[3].([32]byte) + if !ok { + return ValidationStatus{}, fmt.Errorf("erc8004: getValidationStatus responseHash type = %T", out[3]) + } + status.ResponseHash = common.Hash(respHash) + if status.Tag, ok = out[4].(string); !ok { + return ValidationStatus{}, fmt.Errorf("erc8004: getValidationStatus tag type = %T", out[4]) + } + if status.LastUpdate, ok = out[5].(*big.Int); !ok { + return ValidationStatus{}, fmt.Errorf("erc8004: getValidationStatus lastUpdate type = %T", out[5]) + } + return status, nil +} + +// Summary reads getSummary(agentId, validatorAddresses, tag) and returns the +// response count and 0-100 average. +func (r *ValidationReader) Summary(ctx context.Context, agentID *big.Int, validatorAddresses []common.Address, tag string) (count uint64, avgResponse uint8, err error) { + if err := checkAgentID(agentID); err != nil { + return 0, 0, err + } + if validatorAddresses == nil { + validatorAddresses = []common.Address{} + } + var out []interface{} + if err := r.contract.Call(&bind.CallOpts{Context: ctx}, &out, "getSummary", agentID, validatorAddresses, tag); err != nil { + return 0, 0, fmt.Errorf("erc8004: validation getSummary: %w", err) + } + if len(out) != 2 { + return 0, 0, fmt.Errorf("erc8004: validation getSummary returned %d values, want 2", len(out)) + } + count, ok := out[0].(uint64) + if !ok { + return 0, 0, fmt.Errorf("erc8004: validation getSummary count type = %T", out[0]) + } + avgResponse, ok = out[1].(uint8) + if !ok { + return 0, 0, fmt.Errorf("erc8004: validation getSummary avgResponse type = %T", out[1]) + } + return count, avgResponse, nil +} + +// AgentValidations reads getAgentValidations(agentId) — all request hashes +// recorded for the agent. +func (r *ValidationReader) AgentValidations(ctx context.Context, agentID *big.Int) ([]common.Hash, error) { + if err := checkAgentID(agentID); err != nil { + return nil, err + } + var out []interface{} + if err := r.contract.Call(&bind.CallOpts{Context: ctx}, &out, "getAgentValidations", agentID); err != nil { + return nil, fmt.Errorf("erc8004: getAgentValidations: %w", err) + } + if len(out) != 1 { + return nil, fmt.Errorf("erc8004: getAgentValidations returned %d values, want 1", len(out)) + } + raw, ok := out[0].([][32]byte) + if !ok { + return nil, fmt.Errorf("erc8004: getAgentValidations type = %T", out[0]) + } + hashes := make([]common.Hash, len(raw)) + for i, h := range raw { + hashes[i] = common.Hash(h) + } + return hashes, nil +} diff --git a/internal/erc8004/validation_registry.abi.json b/internal/erc8004/validation_registry.abi.json new file mode 100644 index 00000000..a73a65bb --- /dev/null +++ b/internal/erc8004/validation_registry.abi.json @@ -0,0 +1,272 @@ +[ + { + "inputs": [ + { + "internalType": "address", + "name": "validatorAddress", + "type": "address" + }, + { + "internalType": "uint256", + "name": "agentId", + "type": "uint256" + }, + { + "internalType": "string", + "name": "requestURI", + "type": "string" + }, + { + "internalType": "bytes32", + "name": "requestHash", + "type": "bytes32" + } + ], + "name": "validationRequest", + "outputs": [], + "stateMutability": "nonpayable", + "type": "function" + }, + { + "inputs": [ + { + "internalType": "bytes32", + "name": "requestHash", + "type": "bytes32" + }, + { + "internalType": "uint8", + "name": "response", + "type": "uint8" + }, + { + "internalType": "string", + "name": "responseURI", + "type": "string" + }, + { + "internalType": "bytes32", + "name": "responseHash", + "type": "bytes32" + }, + { + "internalType": "string", + "name": "tag", + "type": "string" + } + ], + "name": "validationResponse", + "outputs": [], + "stateMutability": "nonpayable", + "type": "function" + }, + { + "inputs": [ + { + "internalType": "bytes32", + "name": "requestHash", + "type": "bytes32" + } + ], + "name": "getValidationStatus", + "outputs": [ + { + "internalType": "address", + "name": "validatorAddress", + "type": "address" + }, + { + "internalType": "uint256", + "name": "agentId", + "type": "uint256" + }, + { + "internalType": "uint8", + "name": "response", + "type": "uint8" + }, + { + "internalType": "bytes32", + "name": "responseHash", + "type": "bytes32" + }, + { + "internalType": "string", + "name": "tag", + "type": "string" + }, + { + "internalType": "uint256", + "name": "lastUpdate", + "type": "uint256" + } + ], + "stateMutability": "view", + "type": "function" + }, + { + "inputs": [ + { + "internalType": "uint256", + "name": "agentId", + "type": "uint256" + }, + { + "internalType": "address[]", + "name": "validatorAddresses", + "type": "address[]" + }, + { + "internalType": "string", + "name": "tag", + "type": "string" + } + ], + "name": "getSummary", + "outputs": [ + { + "internalType": "uint64", + "name": "count", + "type": "uint64" + }, + { + "internalType": "uint8", + "name": "avgResponse", + "type": "uint8" + } + ], + "stateMutability": "view", + "type": "function" + }, + { + "inputs": [ + { + "internalType": "uint256", + "name": "agentId", + "type": "uint256" + } + ], + "name": "getAgentValidations", + "outputs": [ + { + "internalType": "bytes32[]", + "name": "", + "type": "bytes32[]" + } + ], + "stateMutability": "view", + "type": "function" + }, + { + "inputs": [ + { + "internalType": "address", + "name": "validatorAddress", + "type": "address" + } + ], + "name": "getValidatorRequests", + "outputs": [ + { + "internalType": "bytes32[]", + "name": "", + "type": "bytes32[]" + } + ], + "stateMutability": "view", + "type": "function" + }, + { + "inputs": [], + "name": "getIdentityRegistry", + "outputs": [ + { + "internalType": "address", + "name": "", + "type": "address" + } + ], + "stateMutability": "view", + "type": "function" + }, + { + "anonymous": false, + "inputs": [ + { + "indexed": true, + "internalType": "address", + "name": "validatorAddress", + "type": "address" + }, + { + "indexed": true, + "internalType": "uint256", + "name": "agentId", + "type": "uint256" + }, + { + "indexed": false, + "internalType": "string", + "name": "requestURI", + "type": "string" + }, + { + "indexed": true, + "internalType": "bytes32", + "name": "requestHash", + "type": "bytes32" + } + ], + "name": "ValidationRequest", + "type": "event" + }, + { + "anonymous": false, + "inputs": [ + { + "indexed": true, + "internalType": "address", + "name": "validatorAddress", + "type": "address" + }, + { + "indexed": true, + "internalType": "uint256", + "name": "agentId", + "type": "uint256" + }, + { + "indexed": true, + "internalType": "bytes32", + "name": "requestHash", + "type": "bytes32" + }, + { + "indexed": false, + "internalType": "uint8", + "name": "response", + "type": "uint8" + }, + { + "indexed": false, + "internalType": "string", + "name": "responseURI", + "type": "string" + }, + { + "indexed": false, + "internalType": "bytes32", + "name": "responseHash", + "type": "bytes32" + }, + { + "indexed": false, + "internalType": "string", + "name": "tag", + "type": "string" + } + ], + "name": "ValidationResponse", + "type": "event" + } +] diff --git a/internal/erc8004/validation_test.go b/internal/erc8004/validation_test.go new file mode 100644 index 00000000..939bbf5a --- /dev/null +++ b/internal/erc8004/validation_test.go @@ -0,0 +1,404 @@ +package erc8004 + +import ( + "context" + "encoding/hex" + "math/big" + "strings" + "testing" + + ethereum "github.com/ethereum/go-ethereum" + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/crypto" +) + +// stubCaller is a bind.ContractCaller that returns canned ABI-encoded output. +// Shared by validation and reputation reader tests. Never hits the network. +type stubCaller struct { + ret []byte + err error + lastCall ethereum.CallMsg +} + +func (s *stubCaller) CodeAt(_ context.Context, _ common.Address, _ *big.Int) ([]byte, error) { + return []byte{0x01}, nil +} + +func (s *stubCaller) CallContract(_ context.Context, call ethereum.CallMsg, _ *big.Int) ([]byte, error) { + s.lastCall = call + return s.ret, s.err +} + +func TestValidationABI_Parses(t *testing.T) { + if _, err := validationABI(); err != nil { + t.Fatalf("embedded validation ABI failed to parse: %v", err) + } +} + +// TestValidationABI_SelectorGoldenValues pins the 4-byte selectors of the +// verified v2.0.0 signatures (spec: https://eips.ethereum.org/EIPS/eip-8004; +// ABI: https://github.com/erc-8004/erc-8004-contracts). Each golden value is +// cross-checked against keccak256 of the canonical signature string and the +// parsed ABI method. +func TestValidationABI_SelectorGoldenValues(t *testing.T) { + parsed, err := validationABI() + if err != nil { + t.Fatal(err) + } + + tests := []struct { + method string + sig string + selector string + }{ + {"validationRequest", "validationRequest(address,uint256,string,bytes32)", "aaf400c4"}, + {"validationResponse", "validationResponse(bytes32,uint8,string,bytes32,string)", "3d659a96"}, + {"getValidationStatus", "getValidationStatus(bytes32)", "ff2febfc"}, + {"getSummary", "getSummary(uint256,address[],string)", "1b7cabd6"}, + {"getAgentValidations", "getAgentValidations(uint256)", "8d5d0c2d"}, + {"getValidatorRequests", "getValidatorRequests(address)", "4bf3158c"}, + {"getIdentityRegistry", "getIdentityRegistry()", "bc4d861b"}, + } + + for _, tt := range tests { + t.Run(tt.method, func(t *testing.T) { + m, ok := parsed.Methods[tt.method] + if !ok { + t.Fatalf("method %q missing from parsed ABI", tt.method) + } + if m.Sig != tt.sig { + t.Errorf("signature = %q, want %q", m.Sig, tt.sig) + } + if got := hex.EncodeToString(m.ID); got != tt.selector { + t.Errorf("parsed selector = 0x%s, want 0x%s", got, tt.selector) + } + if got := hex.EncodeToString(crypto.Keccak256([]byte(tt.sig))[:4]); got != tt.selector { + t.Errorf("keccak256(%q)[:4] = 0x%s, want 0x%s", tt.sig, got, tt.selector) + } + }) + } +} + +func TestValidationABI_EventsPresent(t *testing.T) { + parsed, err := validationABI() + if err != nil { + t.Fatal(err) + } + for _, name := range []string{"ValidationRequest", "ValidationResponse"} { + if _, ok := parsed.Events[name]; !ok { + t.Errorf("missing event %q in parsed ABI", name) + } + } +} + +func TestEncodeValidationRequest_RoundTrip(t *testing.T) { + validator := common.HexToAddress("0x1111111111111111111111111111111111111111") + agentID := big.NewInt(42) + requestURI := "https://example.org/bounty/42/request.json" + requestHash := crypto.Keccak256Hash([]byte("request payload")) + + data, err := EncodeValidationRequest(validator, agentID, requestURI, requestHash) + if err != nil { + t.Fatalf("EncodeValidationRequest: %v", err) + } + if got := hex.EncodeToString(data[:4]); got != "aaf400c4" { + t.Errorf("selector = 0x%s, want 0xaaf400c4", got) + } + + decoded, err := DecodeValidationRequestCalldata(data) + if err != nil { + t.Fatalf("DecodeValidationRequestCalldata: %v", err) + } + if decoded.ValidatorAddress != validator { + t.Errorf("validatorAddress = %s, want %s", decoded.ValidatorAddress, validator) + } + if decoded.AgentID.Cmp(agentID) != 0 { + t.Errorf("agentId = %s, want %s", decoded.AgentID, agentID) + } + if decoded.RequestURI != requestURI { + t.Errorf("requestURI = %q, want %q", decoded.RequestURI, requestURI) + } + if decoded.RequestHash != requestHash { + t.Errorf("requestHash = %s, want %s", decoded.RequestHash, requestHash) + } +} + +func TestEncodeValidationResponse_RoundTrip(t *testing.T) { + requestHash := crypto.Keccak256Hash([]byte("request payload")) + responseHash := crypto.Keccak256Hash([]byte("evaluation artifact")) + + data, err := EncodeValidationResponse(requestHash, 87, "ipfs://bafy.../eval.json", responseHash, "code-review") + if err != nil { + t.Fatalf("EncodeValidationResponse: %v", err) + } + if got := hex.EncodeToString(data[:4]); got != "3d659a96" { + t.Errorf("selector = 0x%s, want 0x3d659a96", got) + } + + decoded, err := DecodeValidationResponseCalldata(data) + if err != nil { + t.Fatalf("DecodeValidationResponseCalldata: %v", err) + } + if decoded.RequestHash != requestHash { + t.Errorf("requestHash = %s, want %s", decoded.RequestHash, requestHash) + } + if decoded.Response != 87 { + t.Errorf("response = %d, want 87", decoded.Response) + } + if decoded.ResponseURI != "ipfs://bafy.../eval.json" { + t.Errorf("responseURI = %q", decoded.ResponseURI) + } + if decoded.ResponseHash != responseHash { + t.Errorf("responseHash = %s, want %s", decoded.ResponseHash, responseHash) + } + if decoded.Tag != "code-review" { + t.Errorf("tag = %q, want %q", decoded.Tag, "code-review") + } +} + +func TestEncodeValidationResponse_OptionalFieldsZero(t *testing.T) { + requestHash := crypto.Keccak256Hash([]byte("req")) + data, err := EncodeValidationResponse(requestHash, 0, "", common.Hash{}, "") + if err != nil { + t.Fatalf("EncodeValidationResponse with zero optionals: %v", err) + } + decoded, err := DecodeValidationResponseCalldata(data) + if err != nil { + t.Fatalf("decode: %v", err) + } + if decoded.Response != 0 || decoded.ResponseURI != "" || decoded.Tag != "" || decoded.ResponseHash != (common.Hash{}) { + t.Errorf("zero optionals did not round-trip: %+v", decoded) + } +} + +func TestEncodeValidationRequest_BadInput(t *testing.T) { + validator := common.HexToAddress("0x1111111111111111111111111111111111111111") + hash := crypto.Keccak256Hash([]byte("x")) + + tests := []struct { + name string + fn func() ([]byte, error) + }{ + {"zero validator", func() ([]byte, error) { + return EncodeValidationRequest(common.Address{}, big.NewInt(1), "u", hash) + }}, + {"nil agentId", func() ([]byte, error) { + return EncodeValidationRequest(validator, nil, "u", hash) + }}, + {"negative agentId", func() ([]byte, error) { + return EncodeValidationRequest(validator, big.NewInt(-1), "u", hash) + }}, + {"zero requestHash", func() ([]byte, error) { + return EncodeValidationRequest(validator, big.NewInt(1), "u", common.Hash{}) + }}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if _, err := tt.fn(); err == nil { + t.Error("expected error, got nil") + } + }) + } +} + +func TestEncodeValidationResponse_BadInput(t *testing.T) { + hash := crypto.Keccak256Hash([]byte("x")) + + if _, err := EncodeValidationResponse(common.Hash{}, 50, "", common.Hash{}, ""); err == nil { + t.Error("zero requestHash: expected error, got nil") + } + if _, err := EncodeValidationResponse(hash, 101, "", common.Hash{}, ""); err == nil { + t.Error("response 101: expected error, got nil") + } + if _, err := EncodeValidationResponse(hash, MaxValidationResponse, "", common.Hash{}, ""); err != nil { + t.Errorf("response 100 should be accepted: %v", err) + } +} + +func TestDecodeValidationCalldata_Errors(t *testing.T) { + t.Run("too short", func(t *testing.T) { + if _, err := DecodeValidationResponseCalldata([]byte{0x3d, 0x65}); err == nil { + t.Error("expected error for short calldata") + } + }) + + t.Run("wrong selector", func(t *testing.T) { + // validationRequest calldata fed to the validationResponse decoder. + data, err := EncodeValidationRequest( + common.HexToAddress("0x2222222222222222222222222222222222222222"), + big.NewInt(7), "u", crypto.Keccak256Hash([]byte("y"))) + if err != nil { + t.Fatal(err) + } + if _, err := DecodeValidationResponseCalldata(data); err == nil { + t.Error("expected selector mismatch error") + } else if !strings.Contains(err.Error(), "selector mismatch") { + t.Errorf("error = %v, want selector mismatch", err) + } + }) + + t.Run("truncated args", func(t *testing.T) { + data, err := EncodeValidationResponse(crypto.Keccak256Hash([]byte("z")), 10, "uri", common.Hash{}, "tag") + if err != nil { + t.Fatal(err) + } + if _, err := DecodeValidationResponseCalldata(data[:len(data)-40]); err == nil { + t.Error("expected error for truncated calldata") + } + }) +} + +func TestValidationRegistryAddress(t *testing.T) { + tests := []struct { + network string + want string + wantErr bool + }{ + {"base-sepolia", ValidationRegistryV2BaseSepolia, false}, + {" Base-Sepolia ", ValidationRegistryV2BaseSepolia, false}, + {"base", ValidationRegistryV2Mainnet, false}, + {"base-mainnet", ValidationRegistryV2Mainnet, false}, + {"ethereum", ValidationRegistryV2Mainnet, false}, + {"mainnet", ValidationRegistryV2Mainnet, false}, + {"solana", "", true}, + {"", "", true}, + } + for _, tt := range tests { + t.Run(tt.network, func(t *testing.T) { + got, err := ValidationRegistryAddress(tt.network) + if tt.wantErr { + if err == nil { + t.Errorf("expected error for %q, got address %s", tt.network, got) + } + return + } + if err != nil { + t.Fatalf("ValidationRegistryAddress(%q): %v", tt.network, err) + } + if got != tt.want { + t.Errorf("address = %s, want %s", got, tt.want) + } + }) + } +} + +func TestNewValidationReader_BadInput(t *testing.T) { + if _, err := NewValidationReader(nil, ValidationRegistryV2BaseSepolia); err == nil { + t.Error("nil caller: expected error") + } + if _, err := NewValidationReader(&stubCaller{}, "not-an-address"); err == nil { + t.Error("bad address: expected error") + } +} + +func TestValidationReader_ValidationStatus(t *testing.T) { + parsed, err := validationABI() + if err != nil { + t.Fatal(err) + } + + validator := common.HexToAddress("0x3333333333333333333333333333333333333333") + agentID := big.NewInt(42) + respHash := crypto.Keccak256Hash([]byte("artifact")) + lastUpdate := big.NewInt(1765432100) + + ret, err := parsed.Methods["getValidationStatus"].Outputs.Pack( + validator, agentID, uint8(91), [32]byte(respHash), "code-review", lastUpdate) + if err != nil { + t.Fatalf("pack outputs: %v", err) + } + + caller := &stubCaller{ret: ret} + reader, err := NewValidationReader(caller, ValidationRegistryV2BaseSepolia) + if err != nil { + t.Fatal(err) + } + + reqHash := crypto.Keccak256Hash([]byte("request")) + status, err := reader.ValidationStatus(context.Background(), reqHash) + if err != nil { + t.Fatalf("ValidationStatus: %v", err) + } + + if status.ValidatorAddress != validator { + t.Errorf("validatorAddress = %s, want %s", status.ValidatorAddress, validator) + } + if status.AgentID.Cmp(agentID) != 0 { + t.Errorf("agentId = %s, want %s", status.AgentID, agentID) + } + if status.Response != 91 { + t.Errorf("response = %d, want 91", status.Response) + } + if status.ResponseHash != respHash { + t.Errorf("responseHash = %s, want %s", status.ResponseHash, respHash) + } + if status.Tag != "code-review" { + t.Errorf("tag = %q, want %q", status.Tag, "code-review") + } + if status.LastUpdate.Cmp(lastUpdate) != 0 { + t.Errorf("lastUpdate = %s, want %s", status.LastUpdate, lastUpdate) + } + + // The reader must have issued a getValidationStatus(requestHash) call. + wantData, err := parsed.Pack("getValidationStatus", reqHash) + if err != nil { + t.Fatal(err) + } + if hex.EncodeToString(caller.lastCall.Data) != hex.EncodeToString(wantData) { + t.Errorf("call data = 0x%x, want 0x%x", caller.lastCall.Data, wantData) + } +} + +func TestValidationReader_Summary(t *testing.T) { + parsed, err := validationABI() + if err != nil { + t.Fatal(err) + } + ret, err := parsed.Methods["getSummary"].Outputs.Pack(uint64(5), uint8(78)) + if err != nil { + t.Fatal(err) + } + + reader, err := NewValidationReader(&stubCaller{ret: ret}, ValidationRegistryV2BaseSepolia) + if err != nil { + t.Fatal(err) + } + + count, avg, err := reader.Summary(context.Background(), big.NewInt(42), nil, "") + if err != nil { + t.Fatalf("Summary: %v", err) + } + if count != 5 || avg != 78 { + t.Errorf("summary = (%d, %d), want (5, 78)", count, avg) + } + + if _, _, err := reader.Summary(context.Background(), nil, nil, ""); err == nil { + t.Error("nil agentId: expected error") + } +} + +func TestValidationReader_AgentValidations(t *testing.T) { + parsed, err := validationABI() + if err != nil { + t.Fatal(err) + } + h1 := crypto.Keccak256Hash([]byte("a")) + h2 := crypto.Keccak256Hash([]byte("b")) + ret, err := parsed.Methods["getAgentValidations"].Outputs.Pack([][32]byte{h1, h2}) + if err != nil { + t.Fatal(err) + } + + reader, err := NewValidationReader(&stubCaller{ret: ret}, ValidationRegistryV2BaseSepolia) + if err != nil { + t.Fatal(err) + } + + hashes, err := reader.AgentValidations(context.Background(), big.NewInt(42)) + if err != nil { + t.Fatalf("AgentValidations: %v", err) + } + if len(hashes) != 2 || hashes[0] != h1 || hashes[1] != h2 { + t.Errorf("hashes = %v, want [%s %s]", hashes, h1, h2) + } +} diff --git a/internal/monetizeapi/evaluatorenrollment.go b/internal/monetizeapi/evaluatorenrollment.go new file mode 100644 index 00000000..ae352b6c --- /dev/null +++ b/internal/monetizeapi/evaluatorenrollment.go @@ -0,0 +1,192 @@ +package monetizeapi + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + runtime "k8s.io/apimachinery/pkg/runtime" +) + +// ── EvaluatorEnrollment ───────────────────────────────────────────────────── +// +// EvaluatorEnrollment is an evaluator's opt-in to the OBOL eval market: an +// address + the task types it can re-run + an optional device attestation. +// The spec is evaluator-written; the LADDER STATE in status is controller- +// owned (Shadow → Probation → Full, per task type — design doc §11.4). No +// staking: the only collateral is the future income a reputation earns. + +// Evaluator ladder tiers. +const ( + EvaluatorTierShadow = "Shadow" + EvaluatorTierProbation = "Probation" + EvaluatorTierFull = "Full" +) + +// +kubebuilder:object:root=true +// +kubebuilder:resource:scope=Namespaced,shortName=ee +// +kubebuilder:subresource:status +// +kubebuilder:printcolumn:name="Address",type=string,JSONPath=`.spec.address` +// +kubebuilder:printcolumn:name="Age",type=date,JSONPath=`.metadata.creationTimestamp` + +// EvaluatorEnrollment opts an evaluator into the eval market. +type EvaluatorEnrollment struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + Spec EvaluatorEnrollmentSpec `json:"spec,omitempty"` + Status EvaluatorEnrollmentStatus `json:"status,omitempty"` +} + +// +kubebuilder:object:root=true + +// EvaluatorEnrollmentList is the list form. +type EvaluatorEnrollmentList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []EvaluatorEnrollment `json:"items"` +} + +type EvaluatorEnrollmentSpec struct { + // Address is the evaluator's payout/identity address — the same address + // used in eval-commit/eval-reveal annotations and bound into commitments. + // +kubebuilder:validation:Required + // +kubebuilder:validation:Pattern=`^0x[a-fA-F0-9]{40}$` + Address string `json:"address"` + + // TaskTypes this evaluator can re-run (versioned refs, e.g. benchmark@v1). + // +kubebuilder:validation:Required + TaskTypes []string `json:"taskTypes"` + + // Attestation is the device-binding claim. v1 RECORDS it (sybil cost is + // real hardware per identity once verification lands with the Secure + // Enclave wiring); scheme "none" is honest-unattested. + Attestation EvaluatorAttestation `json:"attestation,omitempty"` +} + +type EvaluatorAttestation struct { + // Scheme: none (unattested) | secure-enclave (device-bound P-256 key). + // +kubebuilder:validation:Enum=none;secure-enclave + Scheme string `json:"scheme,omitempty"` + + // PublicKey is the attestation public key (secure-enclave scheme). + PublicKey string `json:"publicKey,omitempty"` + + // Signature is the enrollment signature over the address (scheme-defined). + Signature string `json:"signature,omitempty"` +} + +// EvaluatorEnrollmentStatus is controller-owned ladder state. +type EvaluatorEnrollmentStatus struct { + ObservedGeneration int64 `json:"observedGeneration,omitempty"` + + // Records hold per-task-type ladder progress (reputation is per task + // type — benchmark@v1 rep says nothing about finetune@v1). + Records []EvaluatorLadderRecord `json:"records,omitempty"` +} + +// EvaluatorLadderRecord is one task type's ladder progress. +type EvaluatorLadderRecord struct { + TaskType string `json:"taskType,omitempty"` + + // Tier: Shadow | Probation | Full. New enrollments start Shadow. + Tier string `json:"tier,omitempty"` + + // ShadowAgreements counts shadow verdicts within tolerance of the quorum + // median (promotion to Probation at the task package's threshold). + ShadowAgreements int64 `json:"shadowAgreements,omitempty"` + + // ProbationEvals counts paid in-band evals while on Probation (promotion + // to Full at the package threshold). + ProbationEvals int64 `json:"probationEvals,omitempty"` + + // Completed counts all settled panel seats (any tier). + Completed int64 `json:"completed,omitempty"` + + // Divergences counts settled seats graded out of band (incl. non/bad + // reveals) — the negative reputation signal. + Divergences int64 `json:"divergences,omitempty"` + + // RecentFulfillers are the last few fulfiller addresses this evaluator + // judged — the pair-diversity rule down-weights repeat pairings. + RecentFulfillers []string `json:"recentFulfillers,omitempty"` +} + +// ── deepcopy (hand-written, matching the package idiom) ───────────────────── + +func (in *EvaluatorEnrollment) DeepCopyInto(out *EvaluatorEnrollment) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + in.Spec.DeepCopyInto(&out.Spec) + in.Status.DeepCopyInto(&out.Status) +} + +func (in *EvaluatorEnrollment) DeepCopy() *EvaluatorEnrollment { + if in == nil { + return nil + } + out := new(EvaluatorEnrollment) + in.DeepCopyInto(out) + return out +} + +func (in *EvaluatorEnrollment) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +func (in *EvaluatorEnrollmentList) DeepCopyInto(out *EvaluatorEnrollmentList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + l, m := &in.Items, &out.Items + *m = make([]EvaluatorEnrollment, len(*l)) + for i := range *l { + (*l)[i].DeepCopyInto(&(*m)[i]) + } + } +} + +func (in *EvaluatorEnrollmentList) DeepCopy() *EvaluatorEnrollmentList { + if in == nil { + return nil + } + out := new(EvaluatorEnrollmentList) + in.DeepCopyInto(out) + return out +} + +func (in *EvaluatorEnrollmentList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +func (in *EvaluatorEnrollmentSpec) DeepCopyInto(out *EvaluatorEnrollmentSpec) { + *out = *in + if in.TaskTypes != nil { + out.TaskTypes = make([]string, len(in.TaskTypes)) + copy(out.TaskTypes, in.TaskTypes) + } + out.Attestation = in.Attestation +} + +func (in *EvaluatorEnrollmentStatus) DeepCopyInto(out *EvaluatorEnrollmentStatus) { + *out = *in + if in.Records != nil { + l, m := &in.Records, &out.Records + *m = make([]EvaluatorLadderRecord, len(*l)) + for i := range *l { + (*l)[i].DeepCopyInto(&(*m)[i]) + } + } +} + +func (in *EvaluatorLadderRecord) DeepCopyInto(out *EvaluatorLadderRecord) { + *out = *in + if in.RecentFulfillers != nil { + out.RecentFulfillers = make([]string, len(in.RecentFulfillers)) + copy(out.RecentFulfillers, in.RecentFulfillers) + } +} diff --git a/internal/monetizeapi/servicebounty.go b/internal/monetizeapi/servicebounty.go new file mode 100644 index 00000000..3e8c2fd7 --- /dev/null +++ b/internal/monetizeapi/servicebounty.go @@ -0,0 +1,522 @@ +package monetizeapi + +import ( + "crypto/sha256" + "encoding/hex" + "fmt" + "strings" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + runtime "k8s.io/apimachinery/pkg/runtime" +) + +// EvalCommitHash binds an evaluator's score commitment to their address: +// sha256("||"). The address inside the +// preimage means a commitment cannot be copied by another evaluator and +// replayed with the original's revealed {score, salt} (Kleros whitepaper +// §4.3). CLI and controller MUST compute this identically. +func EvalCommitHash(score int64, salt, address string) string { + sum := sha256.Sum256(fmt.Appendf(nil, "%d|%s|%s", score, salt, strings.ToLower(address))) + return "0x" + hex.EncodeToString(sum[:]) +} + +// ── ServiceBounty ─────────────────────────────────────────────────────────── +// +// ServiceBounty is the demand-side inverse of a ServiceOffer. A ServiceOffer is +// standing supply that converges to one live route and stays up; a ServiceBounty +// is time-boxed demand that converges to one paid deliverable and closes. Both +// share the same money rail (x402), identity rail (ERC-8004), and controller +// plumbing, run in opposite directions. +// +// Task semantics are deliberately NOT hardcoded in this CRD. spec.task.typeRef +// points at an embedded, versioned task-type package (internal/embed/bountytasks, +// e.g. "benchmark@v1") that owns the param schema, the eval method + tolerance, +// the OBOL eval pricing, the hardware-proof policy, and the A2UI report schema. +// New task types drop in as data — the CRD and controller never change. +// +// Verification is reputation-graded with NO validator set and NO slashing: the +// escrow releases on an accepted, ERC-8004-reputation-weighted verdict produced +// by an OBOL-paid evaluation market. See plans/bounty-ane-marketplace-design.md. + +// +kubebuilder:object:root=true +// +kubebuilder:resource:scope=Namespaced,shortName=sb +// +kubebuilder:subresource:status +// +kubebuilder:printcolumn:name="Task",type=string,JSONPath=`.spec.task.typeRef` +// +kubebuilder:printcolumn:name="Reward",type=string,JSONPath=`.spec.reward.amount` +// +kubebuilder:printcolumn:name="Verification",type=string,JSONPath=`.spec.eval.mode` +// +kubebuilder:printcolumn:name="Phase",type=string,JSONPath=`.status.phase` +// +kubebuilder:printcolumn:name="Age",type=date,JSONPath=`.metadata.creationTimestamp` + +// ServiceBounty declares a unit of paid work (benchmark, fine-tune, serve, …) +// with an escrowed reward released on an accepted verdict. +type ServiceBounty struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + Spec ServiceBountySpec `json:"spec,omitempty"` + Status ServiceBountyStatus `json:"status,omitempty"` +} + +// +kubebuilder:object:root=true + +// ServiceBountyList is the list form for kubectl/list operations. +type ServiceBountyList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []ServiceBounty `json:"items"` +} + +type ServiceBountySpec struct { + // Task describes the work. spec.task.typeRef selects an embedded, + // versioned task-type package; spec.task.params is validated against + // that package's schema at admission. + // +kubebuilder:validation:Required + Task ServiceBountyTask `json:"task"` + + // Acceptance is how a submission is judged. Defaults come from the task + // type; the poster may tighten them. + Acceptance ServiceBountyAcceptance `json:"acceptance,omitempty"` + + // Reward is the escrowed payment released to the fulfiller on acceptance. + // +kubebuilder:validation:Required + Reward ServiceBountyReward `json:"reward"` + + // Eval configures the OBOL-paid evaluation market (a SEPARATE payment leg + // from the reward — x402 cannot splice a fee out of the reward auth). + Eval ServiceBountyEval `json:"eval,omitempty"` + + // Trust selects the reputation gate + optional refundable self-bond. No + // validator stake, no slashing — reputation (lost future income) is the + // only collateral. + Trust ServiceBountyTrust `json:"trust,omitempty"` + + // Deadline: past it with no accepted verdict → Expired → Refunded. + Deadline *metav1.Time `json:"deadline,omitempty"` + + // MaxFulfillers: 1 = single-winner (default); >1 = first-N-valid paid. + // +kubebuilder:default=1 + MaxFulfillers int64 `json:"maxFulfillers,omitempty"` +} + +// ServiceBountyTask carries the task-type reference + opaque params. The +// controller never interprets params beyond validating them against the +// resolved task-type schema. +type ServiceBountyTask struct { + // TypeRef resolves an embedded task-type package, e.g. "benchmark@v1". + // +kubebuilder:validation:Required + TypeRef string `json:"typeRef"` + + // Free-form knobs validated against the task type's param schema. + Params map[string]string `json:"params,omitempty"` + + // Target model metadata (reuses ServiceOffer's model shape). + TargetModel ServiceOfferModel `json:"targetModel,omitempty"` + + // DatasetCommit pins the eval dataset (committed root + the fraction kept + // private so a public re-run can't leak answers / enable train-on-test). + DatasetCommit ServiceBountyDatasetCommit `json:"datasetCommit,omitempty"` + + // HardwareProof strength required of the fulfiller. self-report is a + // reputation-backed claim (forgeable); gpu-attestation is cryptographic + // (NVIDIA CC / enclave-binding); evaluator-measured moves the throughput + // measurement onto attested evaluator hardware. + // +kubebuilder:validation:Enum=self-report;gpu-attestation;evaluator-measured + HardwareProof string `json:"hardwareProof,omitempty"` +} + +type ServiceBountyDatasetCommit struct { + // Root is a Merkle root committing the (partially private) eval dataset. + Root string `json:"root,omitempty"` + // PrivateFraction (0..1, as a string to keep schema stable) of rows kept + // secret and revealed only to sampled evaluators at eval time. + PrivateFraction string `json:"privateFraction,omitempty"` +} + +type ServiceBountyAcceptance struct { + // Method judges a submission. Benchmarks are NOT bit-exact: rerun-tolerance + // re-runs the harness and accepts a score within tolerance. The commitHash + // is integrity (anti bait-and-switch), not a determinism gate. + // +kubebuilder:validation:Enum=rerun-tolerance;harness-rerun;sla-probe;poster-manual + Method string `json:"method,omitempty"` + + // Tolerance per metric (e.g. {"mmlu":"0.01"}). Default from the task type. + Tolerance map[string]string `json:"tolerance,omitempty"` + + // CommitReveal requires evaluators to commit then reveal scores, so they + // can't pre-agree on a number. + CommitReveal bool `json:"commitReveal,omitempty"` +} + +// ServiceBountyReward mirrors the ServiceOfferPayment envelope (network + +// payTo + asset) so buy/sell/bounty all read the same way, plus the amount and +// the escrow rail. Network + PayTo are required to construct the upto +// authorization: the chain it settles on and the poster's refund address. +type ServiceBountyReward struct { + // Payment network (e.g. "base", "base-sepolia"). + Network string `json:"network,omitempty"` + + // PayTo is the poster's address: the escrow-return / refund destination. + // The fulfiller payout address is bound at claim time (witness.to in the + // upto auth), not here. + // +kubebuilder:validation:Pattern=`^0x[a-fA-F0-9]{40}$` + PayTo string `json:"payTo,omitempty"` + + // Asset reuses ServiceOffer's asset shape (USDC eip3009 / OBOL permit2). + Asset ServiceOfferAsset `json:"asset,omitempty"` + + // Amount is the lump-sum reward (human units, e.g. "500.00"). + Amount string `json:"amount,omitempty"` + + // Escrow selects the x402 settlement rail + reputation-driven mode. + Escrow ServiceBountyEscrow `json:"escrow,omitempty"` +} + +type ServiceBountyEscrow struct { + // Scheme: 'upto' (live: facilitator holds a recipient-bound auth, settles + // ≤ max) or 'authCapture' (funds-locked, used above valueCap once the Go + // impl lands — x402-foundation/x402#2298). + // +kubebuilder:validation:Enum=upto;authCapture + Scheme string `json:"scheme,omitempty"` + + // Facilitator URL (our own facilitator acts as the bounded settlement + // trigger; payTo is signed into the auth so it can never redirect funds). + Facilitator string `json:"facilitator,omitempty"` + + // Mode is selected by the fulfiller's reputation: 'auto' (optimistic), + // 'facilitator-check' (deterministic re-run), 'onchain-lock' (authCapture). + // +kubebuilder:validation:Enum=auto;facilitator-check;onchain-lock + Mode string `json:"mode,omitempty"` + + // ValueCapMicros: above this the escrow must use an on-chain lock. + ValueCapMicros string `json:"valueCapMicros,omitempty"` +} + +// Eval verification modes. Verification is ON by default; skipping is an +// explicit, labeled act (--dangerously-skip-verification) — a skipped bounty +// emits no ERC-8004 validation entries and its reputation feedback is +// suppressed, so it can never be farmed for reputation. +const ( + EvalModeRequired = "required" + EvalModeDangerouslySkipped = "dangerouslySkipped" +) + +// ServiceBountyEval is the OBOL-paid evaluation market. Evaluators are paid for +// the WORK (pass or fail), selected by reputation (not stake), and paid in OBOL +// by default via x402 batch-settlement. +type ServiceBountyEval struct { + // K evaluators: median-of-k quorum; k≥3 whenever a probation seat is + // occupied (the median absorbs one outlier). + // +kubebuilder:default=1 + K int64 `json:"k,omitempty"` + + // Mode gates verification. 'required' (default) routes acceptance through + // the evaluator quorum once the eval market is wired — until then a poster + // verdict is recorded as PosterOverride. 'dangerouslySkipped' declares + // poster-as-judge up front: same override path, but the bounty is marked + // unverified and produces no reputation signal. + // +kubebuilder:default="required" + // +kubebuilder:validation:Enum=required;dangerouslySkipped + Mode string `json:"mode,omitempty"` + + // Selection: VRF-sampled after submission, reputation-weighted; the poster + // cannot hand-pick. + // +kubebuilder:validation:Enum=vrf-reputation-weighted;poster-manual + Selection string `json:"selection,omitempty"` + + // Payment for evaluators — a separate leg from the reward. + Payment ServiceBountyEvalPayment `json:"payment,omitempty"` +} + +type ServiceBountyEvalPayment struct { + // Asset defaults to OBOL (verification is an OBOL utility sink). + // +kubebuilder:default="OBOL" + Asset string `json:"asset,omitempty"` + + // PerEvaluator fee (human units). + PerEvaluator string `json:"perEvaluator,omitempty"` + + // FundedBy: 'poster' (separate poster-funded eval budget). + // +kubebuilder:default="poster" + FundedBy string `json:"fundedBy,omitempty"` + + // Settle: 'batch-settlement' pays all K evaluators in one tx. + // +kubebuilder:default="batch-settlement" + Settle string `json:"settle,omitempty"` +} + +type ServiceBountyTrust struct { + // ReputationGate derives the fulfiller's maxBountyValue from ERC-8004 + // getSummary (read with a curated, trusted client filter). + ReputationGate bool `json:"reputationGate,omitempty"` + + // SelfBond is an OPTIONAL refundable bond the fulfiller posts from their + // OWN funds (returned on success). It is never slashed to a validator set. + SelfBond ServiceBountySelfBond `json:"selfBond,omitempty"` +} + +type ServiceBountySelfBond struct { + Required bool `json:"required,omitempty"` + Amount string `json:"amount,omitempty"` + Token string `json:"token,omitempty"` +} + +// ServiceBountyStatus mirrors the AND-rollup condition idiom used by +// ServiceOffer. Machine truth is the condition set; Phase is the human rollup. +type ServiceBountyStatus struct { + ObservedGeneration int64 `json:"observedGeneration,omitempty"` + Phase string `json:"phase,omitempty"` + Conditions []Condition `json:"conditions,omitempty"` + + // EscrowState: Reserved | Captured | Voided (held auth at the facilitator). + EscrowState string `json:"escrowState,omitempty"` + + // WeightedScore is the reputation-weighted eval verdict (0-100). + WeightedScore int64 `json:"weightedScore,omitempty"` + + // CaptureTxHash / RefundTxHash record the settled reward or refund. + CaptureTxHash string `json:"captureTxHash,omitempty"` + RefundTxHash string `json:"refundTxHash,omitempty"` + + // ReportURI points at the SIWx/local-gated A2UI report (deliverable). + ReportURI string `json:"reportURI,omitempty"` + + // Claims are observed fulfiller bindings (single-winner is the common case, + // so claims live in status, not a separate CR). + Claims []ServiceBountyClaim `json:"claims,omitempty"` + + // EvaluatorPanel is the controller-selected seat assignment (deterministic + // per-bounty sampling from enrolled evaluators). Empty panel = open-door + // fallback (insufficient pool) — any address may evaluate, as in early v1. + EvaluatorPanel []ServiceBountyPanelSeat `json:"evaluatorPanel,omitempty"` + + // Evaluations are the eval-market verdicts promoted from the + // obol.org/eval-commit- / eval-reveal- annotation channel. + Evaluations []ServiceBountyEvaluation `json:"evaluations,omitempty"` + + // EvalBudgetState tracks the poster-funded OBOL eval budget + // (k × perEvaluator) at the escrow gateway: Reserved | Captured | Voided. + // Evaluators are paid for the WORK, pass or fail. + EvalBudgetState string `json:"evalBudgetState,omitempty"` + + // EvalPayoutTxHash records the batch-settlement receipt for the eval leg. + EvalPayoutTxHash string `json:"evalPayoutTxHash,omitempty"` + + // LadderRecorded latches the one-shot cross-bounty ladder bookkeeping so + // repeated reconciles after quorum never double-count. + LadderRecorded bool `json:"ladderRecorded,omitempty"` + + // RevealDeadline opens once K commitments are in: every commit closes + // before any reveal opens, and a missing reveal past this instant is + // graded as a worst-case outlier (nonRevealPenalty). + RevealDeadline *metav1.Time `json:"revealDeadline,omitempty"` + + // BondState tracks the fulfiller self-bond at the escrow gateway: + // Reserved | Returned (success/honest timeout) | Forfeited (rejected work, + // offsets the poster's burned eval budget). + BondState string `json:"bondState,omitempty"` +} + +// Panel seat kinds (design doc §11.4): full and probation seats count in the +// median-of-k quorum; shadow seats are graded against the median but never +// counted (the free reputation on-ramp). +const ( + PanelSeatFull = "full" + PanelSeatProbation = "probation" + PanelSeatShadow = "shadow" +) + +// ServiceBountyPanelSeat is one selected evaluator seat. +type ServiceBountyPanelSeat struct { + // Address is the enrolled evaluator's address. + Address string `json:"address,omitempty"` + + // Seat: full | probation | shadow. + Seat string `json:"seat,omitempty"` +} + +// ServiceBountyEvaluation is one evaluator's commit-reveal record. WithinBand +// is the per-bounty ladder bookkeeping hook: divergence from the quorum median +// (or a missing/invalid reveal) is what future reputation feedback keys on. +type ServiceBountyEvaluation struct { + // Address is the evaluator's payout/identity address (annotation key suffix). + Address string `json:"address,omitempty"` + + // CommitHash = EvalCommitHash(score, salt, address), promoted first-write-wins. + CommitHash string `json:"commitHash,omitempty"` + + // Score is the revealed 0-100 verdict (ERC-8004 validationResponse semantics). + Score int64 `json:"score,omitempty"` + + // RevealedAt records when a valid reveal was promoted. + RevealedAt *metav1.Time `json:"revealedAt,omitempty"` + + // WithinBand is false for NonReveal/BadReveal and for revealed scores + // outside the outlier band around the quorum median. + WithinBand bool `json:"withinBand,omitempty"` + + // Phase: Committed | Revealed | BadReveal | NonReveal. + Phase string `json:"phase,omitempty"` + + // Seat mirrors the panel seat kind (full | probation | shadow); empty in + // open-door mode. + Seat string `json:"seat,omitempty"` + + // Paid marks inclusion in the eval-budget batch settlement (counting + // seats that revealed validly; shadows evaluate free). + Paid bool `json:"paid,omitempty"` + + // ValidationTxHash is the evaluator-submitted ERC-8004 validationResponse + // transaction, recorded as provenance (the evaluator's OWN wallet signs; + // the controller never does). + ValidationTxHash string `json:"validationTxHash,omitempty"` +} + +type ServiceBountyClaim struct { + FulfillerAddress string `json:"fulfillerAddress,omitempty"` + ClaimedAt *metav1.Time `json:"claimedAt,omitempty"` + // CommitHash binds the worker to a specific model + outputs (anti + // bait-and-switch), revealed at submit. + CommitHash string `json:"commitHash,omitempty"` + // Phase: Claimed | Submitted | Verified | Rejected. + Phase string `json:"phase,omitempty"` +} + +// ── deepcopy (hand-written to match controller-gen idioms in +// zz_generated.deepcopy.go; the Reward/Eval/Trust sub-trees are pure value +// structs so the shallow `*out = *in` is already a deep copy for them) ───── + +func (in *ServiceBounty) DeepCopyInto(out *ServiceBounty) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + in.Spec.DeepCopyInto(&out.Spec) + in.Status.DeepCopyInto(&out.Status) +} + +func (in *ServiceBounty) DeepCopy() *ServiceBounty { + if in == nil { + return nil + } + out := new(ServiceBounty) + in.DeepCopyInto(out) + return out +} + +func (in *ServiceBounty) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +func (in *ServiceBountyList) DeepCopyInto(out *ServiceBountyList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + l, m := &in.Items, &out.Items + *m = make([]ServiceBounty, len(*l)) + for i := range *l { + (*l)[i].DeepCopyInto(&(*m)[i]) + } + } +} + +func (in *ServiceBountyList) DeepCopy() *ServiceBountyList { + if in == nil { + return nil + } + out := new(ServiceBountyList) + in.DeepCopyInto(out) + return out +} + +func (in *ServiceBountyList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +func (in *ServiceBountySpec) DeepCopyInto(out *ServiceBountySpec) { + *out = *in + in.Task.DeepCopyInto(&out.Task) + in.Acceptance.DeepCopyInto(&out.Acceptance) + out.Reward = in.Reward + out.Eval = in.Eval + out.Trust = in.Trust + if in.Deadline != nil { + l, m := &in.Deadline, &out.Deadline + *m = (*l).DeepCopy() + } +} + +func (in *ServiceBountyTask) DeepCopyInto(out *ServiceBountyTask) { + *out = *in + if in.Params != nil { + out.Params = make(map[string]string, len(in.Params)) + for k, v := range in.Params { + out.Params[k] = v + } + } + out.TargetModel = in.TargetModel + out.DatasetCommit = in.DatasetCommit +} + +func (in *ServiceBountyAcceptance) DeepCopyInto(out *ServiceBountyAcceptance) { + *out = *in + if in.Tolerance != nil { + out.Tolerance = make(map[string]string, len(in.Tolerance)) + for k, v := range in.Tolerance { + out.Tolerance[k] = v + } + } +} + +func (in *ServiceBountyStatus) DeepCopyInto(out *ServiceBountyStatus) { + *out = *in + if in.Conditions != nil { + l, m := &in.Conditions, &out.Conditions + *m = make([]Condition, len(*l)) + for i := range *l { + (*l)[i].DeepCopyInto(&(*m)[i]) + } + } + if in.Claims != nil { + l, m := &in.Claims, &out.Claims + *m = make([]ServiceBountyClaim, len(*l)) + for i := range *l { + (*l)[i].DeepCopyInto(&(*m)[i]) + } + } + if in.EvaluatorPanel != nil { + out.EvaluatorPanel = make([]ServiceBountyPanelSeat, len(in.EvaluatorPanel)) + copy(out.EvaluatorPanel, in.EvaluatorPanel) + } + if in.Evaluations != nil { + l, m := &in.Evaluations, &out.Evaluations + *m = make([]ServiceBountyEvaluation, len(*l)) + for i := range *l { + (*l)[i].DeepCopyInto(&(*m)[i]) + } + } + if in.RevealDeadline != nil { + l, m := &in.RevealDeadline, &out.RevealDeadline + *m = (*l).DeepCopy() + } +} + +func (in *ServiceBountyEvaluation) DeepCopyInto(out *ServiceBountyEvaluation) { + *out = *in + if in.RevealedAt != nil { + l, m := &in.RevealedAt, &out.RevealedAt + *m = (*l).DeepCopy() + } +} + +func (in *ServiceBountyClaim) DeepCopyInto(out *ServiceBountyClaim) { + *out = *in + if in.ClaimedAt != nil { + l, m := &in.ClaimedAt, &out.ClaimedAt + *m = (*l).DeepCopy() + } +} diff --git a/internal/monetizeapi/types.go b/internal/monetizeapi/types.go index 2efb439b..3825a564 100644 --- a/internal/monetizeapi/types.go +++ b/internal/monetizeapi/types.go @@ -20,16 +20,20 @@ const ( Version = "v1alpha1" ServiceOfferKind = "ServiceOffer" + ServiceBountyKind = "ServiceBounty" RegistrationRequestKind = "RegistrationRequest" PurchaseRequestKind = "PurchaseRequest" AgentKind = "Agent" AgentIdentityKind = "AgentIdentity" + EvaluatorEnrollmentKind = "EvaluatorEnrollment" ServiceOfferResource = "serviceoffers" + ServiceBountyResource = "servicebounties" RegistrationRequestResource = "registrationrequests" PurchaseRequestResource = "purchaserequests" AgentResource = "agents" AgentIdentityResource = "agentidentities" + EvaluatorEnrollmentResource = "evaluatorenrollments" // Default identity used for the operator's public ERC-8004 registration // file. The registration file can contain multiple per-chain registrations. @@ -46,6 +50,8 @@ const ( var ( ServiceOfferGVR = schema.GroupVersionResource{Group: Group, Version: Version, Resource: ServiceOfferResource} + ServiceBountyGVR = schema.GroupVersionResource{Group: Group, Version: Version, Resource: ServiceBountyResource} + EvaluatorEnrollmentGVR = schema.GroupVersionResource{Group: Group, Version: Version, Resource: EvaluatorEnrollmentResource} RegistrationRequestGVR = schema.GroupVersionResource{Group: Group, Version: Version, Resource: RegistrationRequestResource} PurchaseRequestGVR = schema.GroupVersionResource{Group: Group, Version: Version, Resource: PurchaseRequestResource} AgentGVR = schema.GroupVersionResource{Group: Group, Version: Version, Resource: AgentResource} diff --git a/internal/monetizeapi/zz_generated.deepcopy.go b/internal/monetizeapi/zz_generated.deepcopy.go index 3c0207f3..78674df4 100644 --- a/internal/monetizeapi/zz_generated.deepcopy.go +++ b/internal/monetizeapi/zz_generated.deepcopy.go @@ -8,7 +8,7 @@ package monetizeapi import ( "k8s.io/apimachinery/pkg/apis/meta/v1" - runtime "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime" ) // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. @@ -253,6 +253,51 @@ func (in *Condition) DeepCopy() *Condition { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *EvaluatorAttestation) DeepCopyInto(out *EvaluatorAttestation) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new EvaluatorAttestation. +func (in *EvaluatorAttestation) DeepCopy() *EvaluatorAttestation { + if in == nil { + return nil + } + out := new(EvaluatorAttestation) + in.DeepCopyInto(out) + return out +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new EvaluatorEnrollmentSpec. +func (in *EvaluatorEnrollmentSpec) DeepCopy() *EvaluatorEnrollmentSpec { + if in == nil { + return nil + } + out := new(EvaluatorEnrollmentSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new EvaluatorEnrollmentStatus. +func (in *EvaluatorEnrollmentStatus) DeepCopy() *EvaluatorEnrollmentStatus { + if in == nil { + return nil + } + out := new(EvaluatorEnrollmentStatus) + in.DeepCopyInto(out) + return out +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new EvaluatorLadderRecord. +func (in *EvaluatorLadderRecord) DeepCopy() *EvaluatorLadderRecord { + if in == nil { + return nil + } + out := new(EvaluatorLadderRecord) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *PurchaseAutoRefill) DeepCopyInto(out *PurchaseAutoRefill) { *out = *in @@ -477,6 +522,190 @@ func (in *RegistrationRequestStatus) DeepCopy() *RegistrationRequestStatus { return out } +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServiceBountyAcceptance. +func (in *ServiceBountyAcceptance) DeepCopy() *ServiceBountyAcceptance { + if in == nil { + return nil + } + out := new(ServiceBountyAcceptance) + in.DeepCopyInto(out) + return out +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServiceBountyClaim. +func (in *ServiceBountyClaim) DeepCopy() *ServiceBountyClaim { + if in == nil { + return nil + } + out := new(ServiceBountyClaim) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ServiceBountyDatasetCommit) DeepCopyInto(out *ServiceBountyDatasetCommit) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServiceBountyDatasetCommit. +func (in *ServiceBountyDatasetCommit) DeepCopy() *ServiceBountyDatasetCommit { + if in == nil { + return nil + } + out := new(ServiceBountyDatasetCommit) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ServiceBountyEscrow) DeepCopyInto(out *ServiceBountyEscrow) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServiceBountyEscrow. +func (in *ServiceBountyEscrow) DeepCopy() *ServiceBountyEscrow { + if in == nil { + return nil + } + out := new(ServiceBountyEscrow) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ServiceBountyEval) DeepCopyInto(out *ServiceBountyEval) { + *out = *in + out.Payment = in.Payment +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServiceBountyEval. +func (in *ServiceBountyEval) DeepCopy() *ServiceBountyEval { + if in == nil { + return nil + } + out := new(ServiceBountyEval) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ServiceBountyEvalPayment) DeepCopyInto(out *ServiceBountyEvalPayment) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServiceBountyEvalPayment. +func (in *ServiceBountyEvalPayment) DeepCopy() *ServiceBountyEvalPayment { + if in == nil { + return nil + } + out := new(ServiceBountyEvalPayment) + in.DeepCopyInto(out) + return out +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServiceBountyEvaluation. +func (in *ServiceBountyEvaluation) DeepCopy() *ServiceBountyEvaluation { + if in == nil { + return nil + } + out := new(ServiceBountyEvaluation) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ServiceBountyPanelSeat) DeepCopyInto(out *ServiceBountyPanelSeat) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServiceBountyPanelSeat. +func (in *ServiceBountyPanelSeat) DeepCopy() *ServiceBountyPanelSeat { + if in == nil { + return nil + } + out := new(ServiceBountyPanelSeat) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ServiceBountyReward) DeepCopyInto(out *ServiceBountyReward) { + *out = *in + out.Asset = in.Asset + out.Escrow = in.Escrow +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServiceBountyReward. +func (in *ServiceBountyReward) DeepCopy() *ServiceBountyReward { + if in == nil { + return nil + } + out := new(ServiceBountyReward) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ServiceBountySelfBond) DeepCopyInto(out *ServiceBountySelfBond) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServiceBountySelfBond. +func (in *ServiceBountySelfBond) DeepCopy() *ServiceBountySelfBond { + if in == nil { + return nil + } + out := new(ServiceBountySelfBond) + in.DeepCopyInto(out) + return out +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServiceBountySpec. +func (in *ServiceBountySpec) DeepCopy() *ServiceBountySpec { + if in == nil { + return nil + } + out := new(ServiceBountySpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServiceBountyStatus. +func (in *ServiceBountyStatus) DeepCopy() *ServiceBountyStatus { + if in == nil { + return nil + } + out := new(ServiceBountyStatus) + in.DeepCopyInto(out) + return out +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServiceBountyTask. +func (in *ServiceBountyTask) DeepCopy() *ServiceBountyTask { + if in == nil { + return nil + } + out := new(ServiceBountyTask) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ServiceBountyTrust) DeepCopyInto(out *ServiceBountyTrust) { + *out = *in + out.SelfBond = in.SelfBond +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServiceBountyTrust. +func (in *ServiceBountyTrust) DeepCopy() *ServiceBountyTrust { + if in == nil { + return nil + } + out := new(ServiceBountyTrust) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *ServiceOffer) DeepCopyInto(out *ServiceOffer) { *out = *in diff --git a/internal/serviceoffercontroller/bounty.go b/internal/serviceoffercontroller/bounty.go new file mode 100644 index 00000000..42e092a0 --- /dev/null +++ b/internal/serviceoffercontroller/bounty.go @@ -0,0 +1,532 @@ +package serviceoffercontroller + +// ServiceBounty reconcile — the demand-side sibling pass, following the +// RegistrationRequest/PurchaseRequest precedent: one more informer + queue + +// worker on the same Controller, in the same binary. +// +// Lifecycle: Open → Claimed → Submitted → Verified → Paid, with Expired → +// Refunded on deadline and Rejected on a poster verdict. Machine truth is the +// condition set (TaskValid, EscrowReserved, Claimed, Submitted, Verified, +// Paid); status.phase is the human rollup. +// +// Claim/submit/verdict arrive as ANNOTATIONS on the CR (the k8s-native write +// channel for agents/CLI, validated and promoted into controller-owned +// status). v1 trust posture is the design doc's v0: escrow via the Gateway +// seam (dev-ledger locally until the facilitator routes ship) and +// poster-as-judge acceptance; the OBOL eval market replaces the poster verdict +// in a later slice. The controller signs NOTHING — see internal/x402/escrow. + +import ( + "context" + "encoding/json" + "fmt" + "log" + "net/http" + "os" + "slices" + "strings" + "time" + + "github.com/ObolNetwork/obol-stack/internal/bounty" + "github.com/ObolNetwork/obol-stack/internal/monetizeapi" + "github.com/ObolNetwork/obol-stack/internal/x402/escrow" + "github.com/ethereum/go-ethereum/common" + "k8s.io/apimachinery/pkg/api/equality" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/client-go/tools/cache" +) + +const ( + serviceBountyFinalizer = "obol.org/servicebounty-finalizer" + + // Annotation write-channel (agent/CLI → controller). + bountyClaimAnnotation = "obol.org/claim" // fulfiller payout address (0x…) + bountyCommitAnnotation = "obol.org/commit" // commit hash (anti bait-and-switch) + bountySubmitAnnotation = "obol.org/submit" // JSON {"resultHash":"…","reportURI":"…"} + bountyVerdictAnnotation = "obol.org/verdict" // "accept" or "reject:" + + bountyPhaseInvalid = "Invalid" + bountyPhaseOpen = "Open" + bountyPhaseClaimed = "Claimed" + bountyPhaseSubmitted = "Submitted" + bountyPhaseVerified = "Verified" + bountyPhasePaid = "Paid" + bountyPhaseRejected = "Rejected" + bountyPhaseExpired = "Expired" + bountyPhaseRefunded = "Refunded" +) + +// bountySubmission is the bountySubmitAnnotation payload. +type bountySubmission struct { + ResultHash string `json:"resultHash"` + ReportURI string `json:"reportURI"` +} + +func (c *Controller) enqueueBounty(obj any) { + key, err := cache.DeletionHandlingMetaNamespaceKeyFunc(obj) + if err != nil { + log.Printf("serviceoffer-controller: build bounty queue key: %v", err) + return + } + c.bountyQueue.Add(key) +} + +func (c *Controller) processNextBounty(ctx context.Context) bool { + key, shutdown := c.bountyQueue.Get() + if shutdown { + return false + } + defer c.bountyQueue.Done(key) + + if err := c.reconcileBounty(ctx, key); err != nil { + log.Printf("serviceoffer-controller: reconcile bounty %s: %v", key, err) + c.bountyQueue.AddRateLimited(key) + return true + } + + c.bountyQueue.Forget(key) + return true +} + +func (c *Controller) reconcileBounty(ctx context.Context, key string) error { + namespace, name, err := cache.SplitMetaNamespaceKey(key) + if err != nil { + return err + } + + raw, err := c.dynClient.Resource(monetizeapi.ServiceBountyGVR).Namespace(namespace).Get(ctx, name, metav1.GetOptions{}) + if apierrors.IsNotFound(err) { + return nil + } + if err != nil { + return err + } + + var sb monetizeapi.ServiceBounty + if err := runtime.DefaultUnstructuredConverter.FromUnstructured(raw.Object, &sb); err != nil { + return fmt.Errorf("decode ServiceBounty: %w", err) + } + + // Deletion: best-effort escrow void (poster keeps funds), then release + // the finalizer. A captured escrow is final — void refuses, and we still + // remove the finalizer (the reward was legitimately paid). + if raw.GetDeletionTimestamp() != nil { + if !slices.Contains(raw.GetFinalizers(), serviceBountyFinalizer) { + return nil + } + if sb.Status.EscrowState == escrow.StateReserved { + if _, err := c.escrowGateway().Void(ctx, string(sb.UID)); err != nil { + log.Printf("serviceoffer-controller: void escrow for deleting bounty %s: %v", key, err) + } + } + if sb.Status.BondState == escrow.StateReserved { + if _, err := c.escrowGateway().Void(ctx, string(sb.UID)+"-bond"); err != nil { + log.Printf("serviceoffer-controller: void bond for deleting bounty %s: %v", key, err) + } + } + if sb.Status.EvalBudgetState == escrow.StateReserved { + if _, err := c.escrowGateway().Void(ctx, string(sb.UID)+"-eval"); err != nil { + log.Printf("serviceoffer-controller: void eval budget for deleting bounty %s: %v", key, err) + } + } + return c.removeBountyFinalizer(ctx, raw) + } + + if !slices.Contains(raw.GetFinalizers(), serviceBountyFinalizer) { + patched := raw.DeepCopy() + patched.SetFinalizers(append(patched.GetFinalizers(), serviceBountyFinalizer)) + _, err := c.dynClient.Resource(monetizeapi.ServiceBountyGVR).Namespace(namespace).Update(ctx, patched, metav1.UpdateOptions{}) + return err + } + + status := sb.Status + status.ObservedGeneration = sb.Generation + + // 1. Task validity — typeRef must resolve against the embedded registry + // and params must satisfy the package's schema. Invalid bounties park + // (no requeue): only a spec change can fix them. + if err := validateBountyTask(&sb); err != nil { + setPurchaseCondition(&status.Conditions, "TaskValid", "False", "InvalidTask", truncateMessage(err.Error())) + status.Phase = bountyPhaseInvalid + return c.updateBountyStatus(ctx, raw, status) + } + setPurchaseCondition(&status.Conditions, "TaskValid", "True", "Resolved", fmt.Sprintf("Task type %s resolved", sb.Spec.Task.TypeRef)) + + // 2. Deadline — past it with no accepted verdict, the escrow is returned + // to the poster. Requeue at expiry so the refund happens on time without + // any spec mutation (the DrainAt requeue discipline). + if sb.Spec.Deadline != nil && !bountyConditionIsTrue(status.Conditions, "Verified") { + now := time.Now() + if now.After(sb.Spec.Deadline.Time) { + return c.refundBounty(ctx, raw, &sb, status, "DeadlineExpired", + fmt.Sprintf("Deadline %s passed without an accepted submission", sb.Spec.Deadline.UTC().Format(time.RFC3339))) + } + if delay := time.Until(sb.Spec.Deadline.Time) + time.Second; delay > 0 { + c.bountyQueue.AddAfter(key, delay) + } + } + + // 3. Escrow reserve — hold the reward before any claim is admitted, so a + // fulfiller never starts work against an unfunded bounty. + if status.EscrowState == "" { + receipt, err := c.escrowGateway().Reserve(ctx, escrow.ReserveRequest{ + ID: string(sb.UID), + Network: sb.Spec.Reward.Network, + PayTo: sb.Spec.Reward.PayTo, + Asset: sb.Spec.Reward.Asset.Symbol, + Amount: sb.Spec.Reward.Amount, + Scheme: sb.Spec.Reward.Escrow.Scheme, + }) + if err != nil { + setPurchaseCondition(&status.Conditions, "EscrowReserved", "False", "FacilitatorError", truncateMessage(err.Error())) + status.Phase = bountyPhaseOpen + if statusErr := c.updateBountyStatus(ctx, raw, status); statusErr != nil { + return statusErr + } + return err // rate-limited retry + } + status.EscrowState = receipt.State + } + setPurchaseCondition(&status.Conditions, "EscrowReserved", "True", "Reserved", escrowReason(c.escrowGateway())) + + // 4. Claim — promote the claim annotation into controller-owned status. + annotations := raw.GetAnnotations() + if claim := strings.TrimSpace(annotations[bountyClaimAnnotation]); claim != "" && len(status.Claims) == 0 { + if !common.IsHexAddress(claim) { + setPurchaseCondition(&status.Conditions, "Claimed", "False", "InvalidAddress", + fmt.Sprintf("claim annotation %q is not a hex address", claim)) + status.Phase = bountyPhaseOpen + return c.updateBountyStatus(ctx, raw, status) + } + now := metav1.Now() + status.Claims = []monetizeapi.ServiceBountyClaim{{ + FulfillerAddress: common.HexToAddress(claim).Hex(), + ClaimedAt: &now, + CommitHash: strings.TrimSpace(annotations[bountyCommitAnnotation]), + Phase: bountyPhaseClaimed, + }} + } + if len(status.Claims) > 0 { + setPurchaseCondition(&status.Conditions, "Claimed", "True", "Claimed", + fmt.Sprintf("Claimed by %s", status.Claims[0].FulfillerAddress)) + // Late commit: the commit annotation may land after the claim. + if commit := strings.TrimSpace(annotations[bountyCommitAnnotation]); commit != "" && status.Claims[0].CommitHash == "" { + status.Claims[0].CommitHash = commit + } + } else { + setPurchaseCondition(&status.Conditions, "Claimed", "False", "Open", "No fulfiller has claimed this bounty") + } + + // 4b. Self-bond — held at the escrow gateway against the fulfiller's own + // funds at claim time (anti-griefing: returned on success or honest + // timeout, forfeited on rejected work to offset the poster's eval spend). + if sb.Spec.Trust.SelfBond.Required && len(status.Claims) > 0 && status.BondState == "" { + receipt, err := c.escrowGateway().Reserve(ctx, escrow.ReserveRequest{ + ID: string(sb.UID) + "-bond", + Network: sb.Spec.Reward.Network, + PayTo: status.Claims[0].FulfillerAddress, + Asset: sb.Spec.Trust.SelfBond.Token, + Amount: sb.Spec.Trust.SelfBond.Amount, + Scheme: sb.Spec.Reward.Escrow.Scheme, + }) + if err != nil { + if statusErr := c.updateBountyStatus(ctx, raw, status); statusErr != nil { + return statusErr + } + return err // rate-limited retry + } + status.BondState = receipt.State + } + + // 5. Submit — parse the submission annotation, advance the claim. + if subRaw := strings.TrimSpace(annotations[bountySubmitAnnotation]); subRaw != "" && len(status.Claims) > 0 { + var sub bountySubmission + if err := json.Unmarshal([]byte(subRaw), &sub); err != nil { + setPurchaseCondition(&status.Conditions, "Submitted", "False", "InvalidSubmission", truncateMessage(err.Error())) + } else { + if status.Claims[0].Phase == bountyPhaseClaimed { + status.Claims[0].Phase = bountyPhaseSubmitted + } + status.ReportURI = sub.ReportURI + setPurchaseCondition(&status.Conditions, "Submitted", "True", "Submitted", + fmt.Sprintf("Result hash %s", sub.ResultHash)) + } + } else if !bountyConditionIsTrue(status.Conditions, "Submitted") { + setPurchaseCondition(&status.Conditions, "Submitted", "False", "AwaitingSubmission", "No submission yet") + } + + // 5b. Eval market — verification-by-default: once a submission exists and + // the bounty is not dangerously skipped (nor poster-manual), the + // commit-reveal quorum drives Verified (reason=EvaluatorQuorum). The + // poster verdict annotation below still overrides either way. + if evalMarketActive(&sb) && bountyConditionIsTrue(status.Conditions, "Submitted") { + if requeue := c.reconcileEvalMarket(ctx, &sb, annotations, &status, time.Now()); requeue > 0 { + c.bountyQueue.AddAfter(key, requeue) + } + } + + // 6. Verdict — the poster verdict annotation. With the eval market active + // it is an explicit OVERRIDE on top of (or instead of) the quorum; for + // poster-manual or dangerously-skipped bounties it is the designed path. + verdict := strings.TrimSpace(annotations[bountyVerdictAnnotation]) + quorumSpoke := conditionReason(status.Conditions, "Verified") == "EvaluatorQuorum" + switch { + case verdict == "accept" && bountyConditionIsTrue(status.Conditions, "Submitted"): + reason := "PosterAccepted" + if sb.Spec.Acceptance.Method != "poster-manual" && !bountyConditionIsTrue(status.Conditions, "Verified") { + reason = "PosterOverride" + } + if !bountyConditionIsTrue(status.Conditions, "Verified") { + setPurchaseCondition(&status.Conditions, "Verified", "True", reason, "Submission accepted by poster") + status.WeightedScore = 100 + } + if len(status.Claims) > 0 { + status.Claims[0].Phase = bountyPhaseVerified + } + case strings.HasPrefix(verdict, "reject"): + reason := strings.TrimPrefix(strings.TrimPrefix(verdict, "reject"), ":") + if reason == "" { + reason = "rejected by poster" + } + setPurchaseCondition(&status.Conditions, "Verified", "False", "PosterRejected", truncateMessage(reason)) + if len(status.Claims) > 0 { + status.Claims[0].Phase = bountyPhaseRejected + } + case bountyConditionIsTrue(status.Conditions, "Submitted") && !bountyConditionIsTrue(status.Conditions, "Verified") && !quorumSpoke: + setPurchaseCondition(&status.Conditions, "Verified", "False", "AwaitingVerdict", + awaitingVerdictMessage(sb.Spec.Acceptance.Method, sb.Spec.Eval.Mode)) + case !bountyConditionIsTrue(status.Conditions, "Verified") && !quorumSpoke: + setPurchaseCondition(&status.Conditions, "Verified", "False", "AwaitingSubmission", "No submission to verify") + } + + // 6b. Self-bond settlement: returned on an accepted verdict, forfeited on + // rejected work (poster or quorum). Deadline expiry returns it (honest + // timeout) via refundBounty. + if status.BondState == escrow.StateReserved { + switch { + case bountyConditionIsTrue(status.Conditions, "Verified"): + if _, err := c.escrowGateway().Void(ctx, string(sb.UID)+"-bond"); err == nil { + status.BondState = "Returned" + } + case len(status.Claims) > 0 && status.Claims[0].Phase == bountyPhaseRejected: + if _, err := c.escrowGateway().Capture(ctx, string(sb.UID)+"-bond"); err == nil { + status.BondState = "Forfeited" + } + } + } + + // 7. Payout — Verified + a held escrow → capture to the fulfiller. + if bountyConditionIsTrue(status.Conditions, "Verified") && status.EscrowState == escrow.StateReserved { + receipt, err := c.escrowGateway().Capture(ctx, string(sb.UID)) + if err != nil { + setPurchaseCondition(&status.Conditions, "Paid", "False", "CaptureFailed", truncateMessage(err.Error())) + if statusErr := c.updateBountyStatus(ctx, raw, status); statusErr != nil { + return statusErr + } + return err // verified-but-unpaid is a retryable, worker-protecting state + } + status.EscrowState = receipt.State + status.CaptureTxHash = receipt.TxHash + } + if status.EscrowState == escrow.StateCaptured { + setPurchaseCondition(&status.Conditions, "Paid", "True", "Captured", "Reward released to fulfiller") + if len(status.Claims) > 0 && status.Claims[0].Phase == bountyPhaseVerified { + status.Claims[0].Phase = bountyPhasePaid + } + } else if !bountyConditionIsTrue(status.Conditions, "Paid") { + setPurchaseCondition(&status.Conditions, "Paid", "False", "AwaitingVerification", "Escrow capture follows an accepted verdict") + } + + status.Phase = bountyPhaseRollup(status) + return c.updateBountyStatus(ctx, raw, status) +} + +// refundBounty voids the escrow and parks the bounty in Expired/Refunded. +// A held self-bond is returned — deadline expiry is an honest timeout, not +// rejected work. +func (c *Controller) refundBounty(ctx context.Context, raw *unstructured.Unstructured, sb *monetizeapi.ServiceBounty, status monetizeapi.ServiceBountyStatus, reason, message string) error { + if status.BondState == escrow.StateReserved { + if _, err := c.escrowGateway().Void(ctx, string(sb.UID)+"-bond"); err == nil { + status.BondState = "Returned" + } + } + if status.EvalBudgetState == escrow.StateReserved { + if _, err := c.escrowGateway().Void(ctx, string(sb.UID)+"-eval"); err == nil { + status.EvalBudgetState = escrow.StateVoided + } + } + if status.EscrowState == escrow.StateReserved { + receipt, err := c.escrowGateway().Void(ctx, string(sb.UID)) + if err != nil { + setPurchaseCondition(&status.Conditions, "Paid", "False", "RefundFailed", truncateMessage(err.Error())) + if statusErr := c.updateBountyStatus(ctx, raw, status); statusErr != nil { + return statusErr + } + return err + } + status.EscrowState = receipt.State + status.RefundTxHash = receipt.TxHash + } + setPurchaseCondition(&status.Conditions, "Verified", "False", reason, message) + setPurchaseCondition(&status.Conditions, "Paid", "False", reason, "Escrow returned to poster") + status.Phase = bountyPhaseRefunded + if status.EscrowState == "" { + status.Phase = bountyPhaseExpired + } + return c.updateBountyStatus(ctx, raw, status) +} + +// bountyPhaseRollup derives the human phase from the condition machine truth. +func bountyPhaseRollup(status monetizeapi.ServiceBountyStatus) string { + conditions := status.Conditions + claimRejected := len(status.Claims) > 0 && status.Claims[0].Phase == bountyPhaseRejected + switch { + case bountyConditionIsTrue(conditions, "Paid"): + return bountyPhasePaid + case bountyConditionIsTrue(conditions, "Verified"): + return bountyPhaseVerified + case conditionReason(conditions, "Verified") == "PosterRejected" || claimRejected: + return bountyPhaseRejected + case bountyConditionIsTrue(conditions, "Submitted"): + return bountyPhaseSubmitted + case bountyConditionIsTrue(conditions, "Claimed"): + return bountyPhaseClaimed + default: + return bountyPhaseOpen + } +} + +// validateBountyTask resolves spec.task.typeRef against the embedded registry +// and validates params + the reward envelope needed to construct the escrow. +// Admission is strict: a gate that silently accepts what it doesn't understand +// is not a gate (unknown params are typo'd intent, not extensibility). +func validateBountyTask(sb *monetizeapi.ServiceBounty) error { + t, err := bounty.Resolve(sb.Spec.Task.TypeRef) + if err != nil { + return err + } + + known := make(map[string]bool, len(t.Params)) + for _, p := range t.Params { + known[p.Name] = true + } + for name := range sb.Spec.Task.Params { + if !known[name] { + return fmt.Errorf("unknown param %q for task type %s", name, t.Ref()) + } + } + + for _, p := range t.Params { + v := sb.Spec.Task.Params[p.Name] + if p.Required && strings.TrimSpace(v) == "" { + return fmt.Errorf("param %s is required for task type %s", p.Name, t.Ref()) + } + if v == "" { + continue + } + if len(p.Enum) > 0 && !slices.Contains(p.Enum, v) { + return fmt.Errorf("param %s=%q is not one of [%s]", p.Name, v, strings.Join(p.Enum, ", ")) + } + } + + // Single-winner guard: the controller admits one claim at a time. Honoring + // >1 silently would promise a race/split semantic that does not exist yet. + if sb.Spec.MaxFulfillers > 1 { + return fmt.Errorf("maxFulfillers=%d is not supported yet — v1 bounties are single-winner", sb.Spec.MaxFulfillers) + } + + if strings.TrimSpace(sb.Spec.Reward.Amount) == "" { + return fmt.Errorf("reward.amount is required") + } + if strings.TrimSpace(sb.Spec.Reward.Network) == "" { + return fmt.Errorf("reward.network is required to construct the escrow authorization") + } + + return nil +} + +func awaitingVerdictMessage(method, evalMode string) string { + if method == "poster-manual" || evalMode == monetizeapi.EvalModeDangerouslySkipped { + return "Awaiting poster verdict — accept with `obol bounty accept `" + } + return fmt.Sprintf("Eval market for %s is not wired yet; poster may override with `obol bounty accept `", method) +} + +func bountyConditionIsTrue(conditions []monetizeapi.Condition, conditionType string) bool { + for _, condition := range conditions { + if condition.Type == conditionType { + return condition.Status == "True" + } + } + return false +} + +func conditionReason(conditions []monetizeapi.Condition, conditionType string) string { + for _, condition := range conditions { + if condition.Type == conditionType { + return condition.Reason + } + } + return "" +} + +// newBountyEscrowGateway selects the escrow backend from controller-level +// configuration, NOT from spec.reward.escrow.facilitator: the gateway carries +// the controller's release-authority bearer token, and honoring an arbitrary +// per-bounty URL would let a poster exfiltrate that token to a server they +// control. The spec field stays advisory/documentary. +func newBountyEscrowGateway() escrow.Gateway { + if base := strings.TrimSpace(os.Getenv("OBOL_BOUNTY_ESCROW_URL")); base != "" { + return &escrow.HTTPGateway{ + Base: base, + Token: strings.TrimSpace(os.Getenv("OBOL_BOUNTY_ESCROW_TOKEN")), + Client: &http.Client{Timeout: 10 * time.Second}, + } + } + return escrow.NewLedgerGateway() +} + +// defaultBountyLedger backs Controllers constructed without an explicit +// gateway (struct-literal tests); New() always sets bountyEscrow. +var defaultBountyLedger = escrow.NewLedgerGateway() + +// escrowGateway returns the configured gateway, defaulting to the dev ledger. +// The dev ledger is escrow theater for local-first stacks — receipts are +// labeled dev-ledger and the EscrowReserved reason says so. +func (c *Controller) escrowGateway() escrow.Gateway { + if c.bountyEscrow != nil { + return c.bountyEscrow + } + return defaultBountyLedger +} + +func escrowReason(g escrow.Gateway) string { + if _, ok := g.(*escrow.LedgerGateway); ok { + return "Reward hold recorded in dev ledger (no funds held — local dev mode)" + } + return "Reward authorization held at facilitator" +} + +func (c *Controller) removeBountyFinalizer(ctx context.Context, raw *unstructured.Unstructured) error { + patched := raw.DeepCopy() + patched.SetFinalizers(slices.DeleteFunc(patched.GetFinalizers(), func(s string) bool { return s == serviceBountyFinalizer })) + _, err := c.dynClient.Resource(monetizeapi.ServiceBountyGVR).Namespace(patched.GetNamespace()).Update(ctx, patched, metav1.UpdateOptions{}) + return err +} + +func (c *Controller) updateBountyStatus(ctx context.Context, raw *unstructured.Unstructured, status monetizeapi.ServiceBountyStatus) error { + patched := raw.DeepCopy() + statusObject, err := runtime.DefaultUnstructuredConverter.ToUnstructured(&status) + if err != nil { + return err + } + if existing, found := patched.Object["status"]; found && equality.Semantic.DeepEqual(existing, statusObject) { + return nil + } + patched.Object["status"] = statusObject + _, err = c.dynClient.Resource(monetizeapi.ServiceBountyGVR).Namespace(patched.GetNamespace()).UpdateStatus(ctx, patched, metav1.UpdateOptions{}) + return err +} diff --git a/internal/serviceoffercontroller/bounty_eval.go b/internal/serviceoffercontroller/bounty_eval.go new file mode 100644 index 00000000..5585acb4 --- /dev/null +++ b/internal/serviceoffercontroller/bounty_eval.go @@ -0,0 +1,408 @@ +package serviceoffercontroller + +// Eval-market pass — the verification-by-default slice (design doc §11). +// +// Evaluators interact through per-address annotations (the same k8s-native +// write channel as claim/submit, keyed per evaluator so concurrent writers +// never last-write-wins each other): +// +// obol.org/eval-commit- = EvalCommitHash(score, salt, addr) +// obol.org/eval-reveal- = {"score":N,"salt":"…"} +// +// Discipline (the research amendments, plans/evaluator-market-research-notes.md): +// - commitments are ADDRESS-BOUND (Kleros §4.3) — copying another +// evaluator's commit hash makes your own reveal unverifiable; +// - no reveal is processed until K commitments are in (commit window +// closes before any reveal opens); +// - a missing reveal past the reveal window is graded as a worst-case +// outlier (nonRevealPenalty) — silent abstention is never the cheap exit; +// - quorum = MEDIAN of revealed scores (robust to one outlier, which is +// what makes the future probation seat verdict-safe); +// - WithinBand records divergence from the median per evaluator — the +// per-bounty bookkeeping hook the reputation ladder will key on. +// +// Deliberately NOT here yet: evaluator selection (needs an enrollment pool), +// the OBOL eval-payment leg (signed by the poster's agent at selection time, +// batch-settled at the facilitator — never by this controller), and +// cross-bounty ladder state. The controller signs NOTHING. + +import ( + "context" + "encoding/json" + "fmt" + "log" + "sort" + "strconv" + "strings" + "time" + + "github.com/ObolNetwork/obol-stack/internal/bounty" + "github.com/ObolNetwork/obol-stack/internal/monetizeapi" + "github.com/ObolNetwork/obol-stack/internal/x402/escrow" + "github.com/ethereum/go-ethereum/common" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +const ( + bountyEvalCommitPrefix = "obol.org/eval-commit-" + bountyEvalRevealPrefix = "obol.org/eval-reveal-" + + evalPhaseCommitted = "Committed" + evalPhaseRevealed = "Revealed" + evalPhaseBadReveal = "BadReveal" + evalPhaseNonReveal = "NonReveal" + + // evalPassThreshold: median revealed score (0-100, ERC-8004 + // validationResponse semantics) at or above this verifies the submission. + evalPassThreshold = 50 + + // evalOutlierBand: a revealed score further than this from the median is + // marked WithinBand=false (the divergence penalty reputation keys on). + evalOutlierBand = 20 + + // defaultRevealWindow guards against a task package with a missing or + // unparseable ladder.revealWindow. + defaultRevealWindow = 10 * time.Minute +) + +// bountyEvalReveal is the eval-reveal annotation payload. ValidationTx is the +// optional ERC-8004 validationResponse transaction the evaluator submitted +// with their OWN wallet — recorded as provenance, never required. +type bountyEvalReveal struct { + Score int64 `json:"score"` + Salt string `json:"salt"` + ValidationTx string `json:"validationTx,omitempty"` +} + +// evalMarketActive reports whether quorum verification applies: skipped mode +// and poster-manual acceptance both leave the poster as the judge. +func evalMarketActive(sb *monetizeapi.ServiceBounty) bool { + return sb.Spec.Eval.Mode != monetizeapi.EvalModeDangerouslySkipped && + sb.Spec.Acceptance.Method != "poster-manual" +} + +// reconcileEvalMarket promotes commit/reveal annotations into status and, once +// the quorum settles, writes the Verified condition with reason +// EvaluatorQuorum. Returns a positive duration when the bounty should be +// requeued (reveal-window expiry). +func (c *Controller) reconcileEvalMarket(ctx context.Context, sb *monetizeapi.ServiceBounty, annotations map[string]string, status *monetizeapi.ServiceBountyStatus, now time.Time) time.Duration { + // 0. Panel selection (once) + eval-budget reservation (once). The budget + // is the SEPARATE OBOL leg: k × perEvaluator, poster-funded, paid to + // evaluators win-or-lose. + c.ensurePanel(ctx, sb, status) + c.reserveEvalBudget(ctx, sb, status) + + // Seat lookup is by CANONICAL (EIP-55) address — enrollments may carry any + // case, annotations another; HexToAddress.Hex() is the one true form. + panelSeats := make(map[string]string, len(status.EvaluatorPanel)) + for _, seat := range status.EvaluatorPanel { + panelSeats[common.HexToAddress(seat.Address).Hex()] = seat.Seat + } + + // 1. Promote commitments (first write wins per address — a commitment is + // binding; later annotation edits must not rewrite history). With a panel + // selected, only panel members are admitted; shadows are admitted but + // never counted. + for key, value := range annotations { + addr, ok := strings.CutPrefix(key, bountyEvalCommitPrefix) + if !ok || !common.IsHexAddress(addr) { + continue + } + canonical := common.HexToAddress(addr).Hex() + seat := "" + if len(panelSeats) > 0 { + s, selected := panelSeats[canonical] + if !selected { + continue // not on the panel — the open door is closed + } + seat = s + } + if findEvaluation(status.Evaluations, canonical) != nil { + continue + } + status.Evaluations = append(status.Evaluations, monetizeapi.ServiceBountyEvaluation{ + Address: canonical, + CommitHash: strings.TrimSpace(value), + Phase: evalPhaseCommitted, + Seat: seat, + }) + } + sort.Slice(status.Evaluations, func(i, j int) bool { + return status.Evaluations[i].Address < status.Evaluations[j].Address + }) + + k := sb.Spec.Eval.K + if k < 1 { + k = 1 + } + + // 2. The commit window closes (and the reveal window opens) only when K + // COUNTING commitments are in (shadows never gate the window). No reveal + // is graded before that instant. + var requeue time.Duration + if status.RevealDeadline == nil { + counting := int64(0) + for _, evaluation := range status.Evaluations { + if evaluation.Seat != monetizeapi.PanelSeatShadow { + counting++ + } + } + if counting < k { + return 0 + } + deadline := metav1.NewTime(now.Add(revealWindow(sb))) + status.RevealDeadline = &deadline + requeue = time.Until(deadline.Time) + time.Second + } + + // 3. Grade reveals against the address-bound commitment. + for key, value := range annotations { + addr, ok := strings.CutPrefix(key, bountyEvalRevealPrefix) + if !ok || !common.IsHexAddress(addr) { + continue + } + evaluation := findEvaluation(status.Evaluations, common.HexToAddress(addr).Hex()) + if evaluation == nil || evaluation.Phase != evalPhaseCommitted { + continue + } + var reveal bountyEvalReveal + if err := json.Unmarshal([]byte(value), &reveal); err != nil { + evaluation.Phase = evalPhaseBadReveal + continue + } + if monetizeapi.EvalCommitHash(reveal.Score, reveal.Salt, evaluation.Address) != evaluation.CommitHash { + evaluation.Phase = evalPhaseBadReveal + continue + } + revealedAt := metav1.NewTime(now) + evaluation.Phase = evalPhaseRevealed + evaluation.Score = reveal.Score + evaluation.RevealedAt = &revealedAt + evaluation.ValidationTxHash = strings.TrimSpace(reveal.ValidationTx) + } + + // 4. Past the reveal window, missing reveals become worst-case outliers. + deadlinePassed := now.After(status.RevealDeadline.Time) + if deadlinePassed { + for i := range status.Evaluations { + if status.Evaluations[i].Phase == evalPhaseCommitted { + status.Evaluations[i].Phase = evalPhaseNonReveal + } + } + } + + // 5. Quorum settles when every commitment is graded (all revealed early) + // or the reveal window has closed. + settled := deadlinePassed + if !settled { + settled = true + for _, evaluation := range status.Evaluations { + if evaluation.Phase == evalPhaseCommitted { + settled = false + break + } + } + } + if !settled { + return requeue + } + + // Median over COUNTING reveals only — shadows are graded against it but + // never move it (the free reputation on-ramp can't sway verdicts). + var scores []int64 + for _, evaluation := range status.Evaluations { + if evaluation.Phase == evalPhaseRevealed && evaluation.Seat != monetizeapi.PanelSeatShadow { + scores = append(scores, evaluation.Score) + } + } + if len(scores) == 0 { + setPurchaseCondition(&status.Conditions, "Verified", "False", "EvaluatorQuorum", + "No valid reveals — submission unverifiable; poster may override or the deadline refunds") + return requeue + } + + median := medianInt64(scores) + for i := range status.Evaluations { + evaluation := &status.Evaluations[i] + switch evaluation.Phase { + case evalPhaseRevealed: + diff := evaluation.Score - median + if diff < 0 { + diff = -diff + } + evaluation.WithinBand = diff <= evalOutlierBand + default: + evaluation.WithinBand = false + } + } + + status.WeightedScore = median + if median >= evalPassThreshold { + setPurchaseCondition(&status.Conditions, "Verified", "True", "EvaluatorQuorum", + fmt.Sprintf("Median score %d/100 from %d reveal(s) meets the %d threshold", median, len(scores), evalPassThreshold)) + if len(status.Claims) > 0 && status.Claims[0].Phase == bountyPhaseSubmitted { + status.Claims[0].Phase = bountyPhaseVerified + } + } else { + setPurchaseCondition(&status.Conditions, "Verified", "False", "EvaluatorQuorum", + fmt.Sprintf("Median score %d/100 from %d reveal(s) is below the %d threshold", median, len(scores), evalPassThreshold)) + if len(status.Claims) > 0 && status.Claims[0].Phase == bountyPhaseSubmitted { + status.Claims[0].Phase = bountyPhaseRejected + } + } + + // 6. Settlement side-effects, once per bounty: pay the evaluators + // (win-or-lose — they did the work) and record the cross-bounty ladder. + c.settleEvalBudget(ctx, sb, status) + if !status.LadderRecorded { + if err := c.recordLadder(ctx, sb, status); err != nil { + log.Printf("serviceoffer-controller: record evaluator ladder for %s/%s: %v", sb.Namespace, sb.Name, err) + } else { + status.LadderRecorded = true + } + } + return requeue +} + +// reserveEvalBudget holds the poster-funded OBOL eval budget (k × perEvaluator, +// minus the newcomer discount when a probation seat is sitting) at the escrow +// gateway under -eval. Errors are non-fatal: evaluation proceeds and the +// reserve retries on the next reconcile. +func (c *Controller) reserveEvalBudget(ctx context.Context, sb *monetizeapi.ServiceBounty, status *monetizeapi.ServiceBountyStatus) { + if status.EvalBudgetState != "" || sb.Spec.Eval.Payment.PerEvaluator == "" { + return + } + total := evalBudgetTotal(sb, status) + if total == "" { + return + } + receipt, err := c.escrowGateway().Reserve(ctx, escrow.ReserveRequest{ + ID: string(sb.UID) + "-eval", + Network: sb.Spec.Reward.Network, + PayTo: sb.Spec.Reward.PayTo, // poster refund address + Asset: sb.Spec.Eval.Payment.Asset, + Amount: total, + Scheme: sb.Spec.Reward.Escrow.Scheme, + }) + if err != nil { + log.Printf("serviceoffer-controller: reserve eval budget for %s/%s: %v", sb.Namespace, sb.Name, err) + return + } + status.EvalBudgetState = receipt.State +} + +// settleEvalBudget batch-settles the held eval budget to every counting +// evaluator with a valid reveal (probation seats at half price — the discount +// already went to the poster at reserve time). Shadows evaluate free; non/bad +// reveals earn nothing (the monetary edge of the non-reveal penalty). +func (c *Controller) settleEvalBudget(ctx context.Context, sb *monetizeapi.ServiceBounty, status *monetizeapi.ServiceBountyStatus) { + if status.EvalBudgetState != escrow.StateReserved { + return + } + per, err := strconv.ParseFloat(strings.TrimSpace(sb.Spec.Eval.Payment.PerEvaluator), 64) + if err != nil || per <= 0 { + return + } + + var recipients []escrow.BatchRecipient + paid := make(map[string]bool) + k := sb.Spec.Eval.K + if k < 1 { + k = 1 + } + for i := range status.Evaluations { + evaluation := &status.Evaluations[i] + if evaluation.Phase != evalPhaseRevealed || evaluation.Seat == monetizeapi.PanelSeatShadow { + continue + } + if int64(len(recipients)) >= k { + break // open-door can over-subscribe; the budget pays k seats + } + amount := per + if evaluation.Seat == monetizeapi.PanelSeatProbation { + amount = per / 2 + } + recipients = append(recipients, escrow.BatchRecipient{ + Address: evaluation.Address, + Amount: strconv.FormatFloat(amount, 'f', 2, 64), + }) + paid[evaluation.Address] = true + } + if len(recipients) == 0 { + return // nothing to pay; refund path voids the budget + } + + var receipt escrow.Receipt + if batch, ok := c.escrowGateway().(escrow.BatchGateway); ok { + receipt, err = batch.CaptureBatch(ctx, string(sb.UID)+"-eval", recipients) + } else { + receipt, err = c.escrowGateway().Capture(ctx, string(sb.UID)+"-eval") + } + if err != nil { + log.Printf("serviceoffer-controller: settle eval budget for %s/%s: %v", sb.Namespace, sb.Name, err) + return + } + status.EvalBudgetState = receipt.State + status.EvalPayoutTxHash = receipt.TxHash + for i := range status.Evaluations { + if paid[status.Evaluations[i].Address] { + status.Evaluations[i].Paid = true + } + } +} + +// evalBudgetTotal computes k × perEvaluator with the probation seat at half +// price (the newcomer discount is passed to the poster). +func evalBudgetTotal(sb *monetizeapi.ServiceBounty, status *monetizeapi.ServiceBountyStatus) string { + per, err := strconv.ParseFloat(strings.TrimSpace(sb.Spec.Eval.Payment.PerEvaluator), 64) + if err != nil || per <= 0 { + return "" + } + k := sb.Spec.Eval.K + if k < 1 { + k = 1 + } + total := float64(k) * per + for _, seat := range status.EvaluatorPanel { + if seat.Seat == monetizeapi.PanelSeatProbation { + total -= per / 2 + break + } + } + return strconv.FormatFloat(total, 'f', 2, 64) +} + +func findEvaluation(evaluations []monetizeapi.ServiceBountyEvaluation, address string) *monetizeapi.ServiceBountyEvaluation { + for i := range evaluations { + if evaluations[i].Address == address { + return &evaluations[i] + } + } + return nil +} + +// revealWindow resolves the task package's ladder.revealWindow. +func revealWindow(sb *monetizeapi.ServiceBounty) time.Duration { + t, err := bounty.Resolve(sb.Spec.Task.TypeRef) + if err != nil { + return defaultRevealWindow + } + window, err := time.ParseDuration(t.Eval.Ladder.RevealWindow) + if err != nil || window <= 0 { + return defaultRevealWindow + } + return window +} + +// medianInt64 returns the median (lower-middle average for even counts) — +// robust to one outlier, which is what makes a newcomer seat verdict-safe. +func medianInt64(values []int64) int64 { + sorted := make([]int64, len(values)) + copy(sorted, values) + sort.Slice(sorted, func(i, j int) bool { return sorted[i] < sorted[j] }) + mid := len(sorted) / 2 + if len(sorted)%2 == 1 { + return sorted[mid] + } + return (sorted[mid-1] + sorted[mid]) / 2 +} diff --git a/internal/serviceoffercontroller/bounty_eval_test.go b/internal/serviceoffercontroller/bounty_eval_test.go new file mode 100644 index 00000000..ec5b8963 --- /dev/null +++ b/internal/serviceoffercontroller/bounty_eval_test.go @@ -0,0 +1,272 @@ +package serviceoffercontroller + +import ( + "fmt" + "strings" + "testing" + + "github.com/ObolNetwork/obol-stack/internal/monetizeapi" + "github.com/ObolNetwork/obol-stack/internal/x402/escrow" +) + +const ( + evalA = "0xaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + evalB = "0xbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb" + evalC = "0xcccccccccccccccccccccccccccccccccccccccc" +) + +// testEvalBounty is a quorum-verified (mode=required, rerun-tolerance) bounty +// with k=3, claimed and submitted. +func testEvalBounty(name string) *monetizeapi.ServiceBounty { + sb := testBounty(name) + sb.Spec.Acceptance.Method = "rerun-tolerance" + sb.Spec.Eval = monetizeapi.ServiceBountyEval{ + K: 3, + Mode: monetizeapi.EvalModeRequired, + Payment: monetizeapi.ServiceBountyEvalPayment{ + Asset: "OBOL", PerEvaluator: "2.00", FundedBy: "poster", Settle: "batch-settlement", + }, + } + return sb +} + +func claimAndSubmit(t *testing.T, c *Controller, ns, name string) { + t.Helper() + key := ns + "/" + name + reconcileBountyUntilSettled(t, c, key) + annotateBounty(t, c, ns, name, map[string]string{ + "obol.org/claim": "0x2222222222222222222222222222222222222222", + }) + reconcileBountyUntilSettled(t, c, key) + annotateBounty(t, c, ns, name, map[string]string{ + "obol.org/submit": `{"resultHash":"0xbeef","reportURI":"file:///r.json"}`, + }) + reconcileBountyUntilSettled(t, c, key) +} + +func commitAndReveal(t *testing.T, c *Controller, ns, name string, scores map[string]int64) { + t.Helper() + key := ns + "/" + name + // Commit phase: all evaluators commit before anyone reveals. + for addr, score := range scores { + annotateBounty(t, c, ns, name, map[string]string{ + "obol.org/eval-commit-" + addr: monetizeapi.EvalCommitHash(score, "salt-"+addr, addr), + }) + } + reconcileBountyUntilSettled(t, c, key) + // Reveal phase. + for addr, score := range scores { + annotateBounty(t, c, ns, name, map[string]string{ + "obol.org/eval-reveal-" + addr: fmt.Sprintf(`{"score":%d,"salt":"salt-%s"}`, score, addr), + }) + } + reconcileBountyUntilSettled(t, c, key) +} + +func TestEvalMarket_QuorumPassToPaid(t *testing.T) { + sb := testEvalBounty("quorum-pass") + c := newBountyTestController(t, sb) + ns := "hermes-obol-agent" + + claimAndSubmit(t, c, ns, "quorum-pass") + commitAndReveal(t, c, ns, "quorum-pass", map[string]int64{evalA: 90, evalB: 85, evalC: 40}) + + got := getBounty(t, c, ns, "quorum-pass") + if reason := conditionReason(got.Status.Conditions, "Verified"); reason != "EvaluatorQuorum" { + t.Fatalf("Verified reason = %q, want EvaluatorQuorum", reason) + } + if !bountyConditionIsTrue(got.Status.Conditions, "Verified") { + t.Fatal("median 85 >= 50 must verify") + } + if got.Status.WeightedScore != 85 { + t.Errorf("WeightedScore = %d, want median 85", got.Status.WeightedScore) + } + if got.Status.Phase != bountyPhasePaid { + t.Errorf("phase = %q, want Paid (quorum verdict releases the escrow)", got.Status.Phase) + } + // The 40 is >20 from the median 85 → out of band; the others in band. + for _, ev := range got.Status.Evaluations { + wantBand := ev.Score >= 65 + if ev.WithinBand != wantBand { + t.Errorf("evaluator %s score %d withinBand = %v, want %v", ev.Address, ev.Score, ev.WithinBand, wantBand) + } + } +} + +func TestEvalMarket_QuorumRejects(t *testing.T) { + sb := testEvalBounty("quorum-reject") + c := newBountyTestController(t, sb) + ns := "hermes-obol-agent" + + claimAndSubmit(t, c, ns, "quorum-reject") + commitAndReveal(t, c, ns, "quorum-reject", map[string]int64{evalA: 10, evalB: 20, evalC: 90}) + + got := getBounty(t, c, ns, "quorum-reject") + if bountyConditionIsTrue(got.Status.Conditions, "Verified") { + t.Fatal("median 20 < 50 must not verify") + } + if reason := conditionReason(got.Status.Conditions, "Verified"); reason != "EvaluatorQuorum" { + t.Fatalf("Verified reason = %q, want EvaluatorQuorum", reason) + } + if got.Status.Phase != bountyPhaseRejected { + t.Errorf("phase = %q, want Rejected", got.Status.Phase) + } + if bountyConditionIsTrue(got.Status.Conditions, "Paid") { + t.Fatal("rejected bounty must not pay") + } +} + +// The Kleros address-binding steal: evaluator C copies B's commitment hash, +// then replays B's revealed {score, salt}. The hash binds B's address, so C's +// reveal cannot verify — C grades BadReveal and is excluded from the median. +func TestEvalMarket_CommitBoundToAddress(t *testing.T) { + sb := testEvalBounty("copycat") + c := newBountyTestController(t, sb) + ns := "hermes-obol-agent" + + claimAndSubmit(t, c, ns, "copycat") + + honest := map[string]int64{evalA: 80, evalB: 75} + for addr, score := range honest { + annotateBounty(t, c, ns, "copycat", map[string]string{ + "obol.org/eval-commit-" + addr: monetizeapi.EvalCommitHash(score, "salt-"+addr, addr), + }) + } + // C copies B's commitment verbatim. + annotateBounty(t, c, ns, "copycat", map[string]string{ + "obol.org/eval-commit-" + evalC: monetizeapi.EvalCommitHash(75, "salt-"+evalB, evalB), + }) + reconcileBountyUntilSettled(t, c, "hermes-obol-agent/copycat") + + for addr, score := range honest { + annotateBounty(t, c, ns, "copycat", map[string]string{ + "obol.org/eval-reveal-" + addr: fmt.Sprintf(`{"score":%d,"salt":"salt-%s"}`, score, addr), + }) + } + // C replays B's reveal. + annotateBounty(t, c, ns, "copycat", map[string]string{ + "obol.org/eval-reveal-" + evalC: fmt.Sprintf(`{"score":75,"salt":"salt-%s"}`, evalB), + }) + reconcileBountyUntilSettled(t, c, "hermes-obol-agent/copycat") + + got := getBounty(t, c, ns, "copycat") + var copycat *monetizeapi.ServiceBountyEvaluation + for i := range got.Status.Evaluations { + if strings.EqualFold(got.Status.Evaluations[i].Address, evalC) { + copycat = &got.Status.Evaluations[i] + } + } + if copycat == nil { + t.Fatal("copycat evaluation not found") + } + if copycat.Phase != evalPhaseBadReveal { + t.Fatalf("copycat phase = %q, want BadReveal (commitment is address-bound)", copycat.Phase) + } + if !bountyConditionIsTrue(got.Status.Conditions, "Verified") { + t.Error("honest median (80,75 → 77) must still verify") + } +} + +// Reveals posted before K commitments are in must be ignored: every commit +// closes before any reveal opens. +func TestEvalMarket_RevealBeforeWindowIgnored(t *testing.T) { + sb := testEvalBounty("early-reveal") + c := newBountyTestController(t, sb) + ns := "hermes-obol-agent" + + claimAndSubmit(t, c, ns, "early-reveal") + annotateBounty(t, c, ns, "early-reveal", map[string]string{ + "obol.org/eval-commit-" + evalA: monetizeapi.EvalCommitHash(90, "salt-"+evalA, evalA), + "obol.org/eval-reveal-" + evalA: fmt.Sprintf(`{"score":90,"salt":"salt-%s"}`, evalA), + }) + reconcileBountyUntilSettled(t, c, "hermes-obol-agent/early-reveal") + + got := getBounty(t, c, ns, "early-reveal") + if got.Status.RevealDeadline != nil { + t.Fatal("reveal window must not open before k=3 commitments") + } + for _, ev := range got.Status.Evaluations { + if ev.Phase != evalPhaseCommitted { + t.Errorf("evaluation %s phase = %q, want Committed (reveal ignored before the window opens)", ev.Address, ev.Phase) + } + } + if bountyConditionIsTrue(got.Status.Conditions, "Verified") { + t.Fatal("no quorum yet") + } +} + +func TestEvalMarket_SelfBondReturnedOnPass(t *testing.T) { + sb := testEvalBounty("bonded-pass") + sb.Spec.Trust.SelfBond = monetizeapi.ServiceBountySelfBond{Required: true, Amount: "10.00", Token: "OBOL"} + c := newBountyTestController(t, sb) + ns := "hermes-obol-agent" + + claimAndSubmit(t, c, ns, "bonded-pass") + if got := getBounty(t, c, ns, "bonded-pass"); got.Status.BondState != escrow.StateReserved { + t.Fatalf("bond state after claim = %q, want Reserved", got.Status.BondState) + } + + commitAndReveal(t, c, ns, "bonded-pass", map[string]int64{evalA: 90, evalB: 85, evalC: 80}) + got := getBounty(t, c, ns, "bonded-pass") + if got.Status.BondState != "Returned" { + t.Errorf("bond state = %q, want Returned (accepted work returns the bond)", got.Status.BondState) + } + if got.Status.Phase != bountyPhasePaid { + t.Errorf("phase = %q, want Paid", got.Status.Phase) + } +} + +func TestEvalMarket_SelfBondForfeitedOnReject(t *testing.T) { + sb := testEvalBounty("bonded-reject") + sb.Spec.Trust.SelfBond = monetizeapi.ServiceBountySelfBond{Required: true, Amount: "10.00", Token: "OBOL"} + c := newBountyTestController(t, sb) + ns := "hermes-obol-agent" + + claimAndSubmit(t, c, ns, "bonded-reject") + commitAndReveal(t, c, ns, "bonded-reject", map[string]int64{evalA: 10, evalB: 15, evalC: 20}) + + got := getBounty(t, c, ns, "bonded-reject") + if got.Status.BondState != "Forfeited" { + t.Errorf("bond state = %q, want Forfeited (rejected work forfeits the bond)", got.Status.BondState) + } + if got.Status.Phase != bountyPhaseRejected { + t.Errorf("phase = %q, want Rejected", got.Status.Phase) + } +} + +// Poster override on top of an active eval market: an explicit accept verdict +// wins even before the quorum settles. +func TestEvalMarket_PosterOverrideStillWins(t *testing.T) { + sb := testEvalBounty("override") + c := newBountyTestController(t, sb) + ns := "hermes-obol-agent" + + claimAndSubmit(t, c, ns, "override") + annotateBounty(t, c, ns, "override", map[string]string{"obol.org/verdict": "accept"}) + reconcileBountyUntilSettled(t, c, "hermes-obol-agent/override") + + got := getBounty(t, c, ns, "override") + if reason := conditionReason(got.Status.Conditions, "Verified"); reason != "PosterOverride" { + t.Fatalf("Verified reason = %q, want PosterOverride", reason) + } + if got.Status.Phase != bountyPhasePaid { + t.Errorf("phase = %q, want Paid", got.Status.Phase) + } +} + +func TestMedianInt64(t *testing.T) { + cases := []struct { + in []int64 + want int64 + }{ + {[]int64{90}, 90}, + {[]int64{90, 40}, 65}, + {[]int64{90, 85, 40}, 85}, + {[]int64{1, 2, 3, 100}, 2}, + } + for _, tc := range cases { + if got := medianInt64(tc.in); got != tc.want { + t.Errorf("median(%v) = %d, want %d", tc.in, got, tc.want) + } + } +} diff --git a/internal/serviceoffercontroller/bounty_lifecycle_test.go b/internal/serviceoffercontroller/bounty_lifecycle_test.go new file mode 100644 index 00000000..76baa835 --- /dev/null +++ b/internal/serviceoffercontroller/bounty_lifecycle_test.go @@ -0,0 +1,315 @@ +package serviceoffercontroller + +import ( + "context" + "strings" + "testing" + "time" + + "github.com/ObolNetwork/obol-stack/internal/monetizeapi" + "github.com/ObolNetwork/obol-stack/internal/x402/escrow" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/dynamic/fake" + "k8s.io/client-go/util/workqueue" +) + +func newBountyTestController(t *testing.T, bounties ...*monetizeapi.ServiceBounty) *Controller { + t.Helper() + + objects := make([]runtime.Object, 0, len(bounties)) + for _, sb := range bounties { + objects = append(objects, mustBountyObject(t, sb)) + } + + dynClient := fake.NewSimpleDynamicClientWithCustomListKinds( + runtime.NewScheme(), + map[schema.GroupVersionResource]string{ + monetizeapi.ServiceBountyGVR: "ServiceBountyList", + monetizeapi.EvaluatorEnrollmentGVR: "EvaluatorEnrollmentList", + }, + objects..., + ) + + return &Controller{ + dynClient: dynClient, + bountyQueue: workqueue.NewTypedRateLimitingQueue(workqueue.DefaultTypedControllerRateLimiter[string]()), + bountyEscrow: escrow.NewLedgerGateway(), + } +} + +func mustBountyObject(t *testing.T, sb *monetizeapi.ServiceBounty) *unstructured.Unstructured { + t.Helper() + + sb.TypeMeta = metav1.TypeMeta{ + APIVersion: monetizeapi.Group + "/" + monetizeapi.Version, + Kind: monetizeapi.ServiceBountyKind, + } + obj, err := runtime.DefaultUnstructuredConverter.ToUnstructured(sb) + if err != nil { + t.Fatalf("to unstructured: %v", err) + } + return &unstructured.Unstructured{Object: obj} +} + +func testBounty(name string) *monetizeapi.ServiceBounty { + return &monetizeapi.ServiceBounty{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: "hermes-obol-agent", + UID: types.UID("uid-" + name), + }, + Spec: monetizeapi.ServiceBountySpec{ + Task: monetizeapi.ServiceBountyTask{ + TypeRef: "benchmark@v1", + Params: map[string]string{"dtype": "fp16"}, + }, + Acceptance: monetizeapi.ServiceBountyAcceptance{Method: "poster-manual"}, + Reward: monetizeapi.ServiceBountyReward{ + Network: "base", + PayTo: "0x1111111111111111111111111111111111111111", + Asset: monetizeapi.ServiceOfferAsset{Symbol: "USDC"}, + Amount: "500.00", + Escrow: monetizeapi.ServiceBountyEscrow{Scheme: "upto"}, + }, + MaxFulfillers: 1, + }, + } +} + +// reconcileBountyUntilSettled runs reconcile twice: the first pass may only +// add the finalizer (it returns early, the informer event re-queues in prod). +func reconcileBountyUntilSettled(t *testing.T, c *Controller, key string) { + t.Helper() + for i := 0; i < 2; i++ { + if err := c.reconcileBounty(context.Background(), key); err != nil { + t.Fatalf("reconcile %s (pass %d): %v", key, i, err) + } + } +} + +func getBounty(t *testing.T, c *Controller, namespace, name string) *monetizeapi.ServiceBounty { + t.Helper() + + raw, err := c.dynClient.Resource(monetizeapi.ServiceBountyGVR).Namespace(namespace).Get(context.Background(), name, metav1.GetOptions{}) + if err != nil { + t.Fatalf("get bounty %s/%s: %v", namespace, name, err) + } + var sb monetizeapi.ServiceBounty + if err := runtime.DefaultUnstructuredConverter.FromUnstructured(raw.Object, &sb); err != nil { + t.Fatalf("decode bounty: %v", err) + } + return &sb +} + +func annotateBounty(t *testing.T, c *Controller, namespace, name string, annotations map[string]string) { + t.Helper() + + raw, err := c.dynClient.Resource(monetizeapi.ServiceBountyGVR).Namespace(namespace).Get(context.Background(), name, metav1.GetOptions{}) + if err != nil { + t.Fatalf("get bounty for annotate: %v", err) + } + existing := raw.GetAnnotations() + if existing == nil { + existing = map[string]string{} + } + for k, v := range annotations { + existing[k] = v + } + raw.SetAnnotations(existing) + if _, err := c.dynClient.Resource(monetizeapi.ServiceBountyGVR).Namespace(namespace).Update(context.Background(), raw, metav1.UpdateOptions{}); err != nil { + t.Fatalf("annotate bounty: %v", err) + } +} + +func TestBountyLifecycle_OpenToPaid(t *testing.T) { + c := newBountyTestController(t, testBounty("bench")) + key := "hermes-obol-agent/bench" + + // Open: finalizer + task validation + escrow reserve. + reconcileBountyUntilSettled(t, c, key) + sb := getBounty(t, c, "hermes-obol-agent", "bench") + if !bountyConditionIsTrue(sb.Status.Conditions, "TaskValid") { + t.Fatalf("TaskValid not true: %+v", sb.Status.Conditions) + } + if !bountyConditionIsTrue(sb.Status.Conditions, "EscrowReserved") { + t.Fatalf("EscrowReserved not true: %+v", sb.Status.Conditions) + } + if sb.Status.EscrowState != escrow.StateReserved { + t.Fatalf("EscrowState = %q, want Reserved", sb.Status.EscrowState) + } + if sb.Status.Phase != bountyPhaseOpen { + t.Fatalf("phase = %q, want Open", sb.Status.Phase) + } + + // Claim. + annotateBounty(t, c, "hermes-obol-agent", "bench", map[string]string{ + bountyClaimAnnotation: "0x2222222222222222222222222222222222222222", + bountyCommitAnnotation: "0xc0ffee", + }) + reconcileBountyUntilSettled(t, c, key) + sb = getBounty(t, c, "hermes-obol-agent", "bench") + if sb.Status.Phase != bountyPhaseClaimed { + t.Fatalf("phase = %q, want Claimed", sb.Status.Phase) + } + if len(sb.Status.Claims) != 1 || sb.Status.Claims[0].CommitHash != "0xc0ffee" { + t.Fatalf("claims = %+v", sb.Status.Claims) + } + + // Submit. + annotateBounty(t, c, "hermes-obol-agent", "bench", map[string]string{ + bountySubmitAnnotation: `{"resultHash":"0xbeef","reportURI":"http://hermes.local/results/bench.a2ui.json"}`, + }) + reconcileBountyUntilSettled(t, c, key) + sb = getBounty(t, c, "hermes-obol-agent", "bench") + if sb.Status.Phase != bountyPhaseSubmitted { + t.Fatalf("phase = %q, want Submitted", sb.Status.Phase) + } + if sb.Status.ReportURI == "" { + t.Fatal("ReportURI not promoted from submission") + } + + // Poster accepts → Verified + Paid (ledger capture). + annotateBounty(t, c, "hermes-obol-agent", "bench", map[string]string{ + bountyVerdictAnnotation: "accept", + }) + reconcileBountyUntilSettled(t, c, key) + sb = getBounty(t, c, "hermes-obol-agent", "bench") + if !bountyConditionIsTrue(sb.Status.Conditions, "Verified") { + t.Fatalf("Verified not true: %+v", sb.Status.Conditions) + } + if !bountyConditionIsTrue(sb.Status.Conditions, "Paid") { + t.Fatalf("Paid not true: %+v", sb.Status.Conditions) + } + if sb.Status.Phase != bountyPhasePaid { + t.Fatalf("phase = %q, want Paid", sb.Status.Phase) + } + if sb.Status.WeightedScore != 100 { + t.Fatalf("weightedScore = %d, want 100", sb.Status.WeightedScore) + } + if !strings.HasPrefix(sb.Status.CaptureTxHash, "dev-ledger:") { + t.Fatalf("CaptureTxHash = %q, want dev-ledger label (never mistakable for settlement)", sb.Status.CaptureTxHash) + } + if len(sb.Status.Claims) != 1 || sb.Status.Claims[0].Phase != bountyPhasePaid { + t.Fatalf("claim phase = %+v, want Paid", sb.Status.Claims) + } +} + +func TestBountyLifecycle_InvalidTaskParks(t *testing.T) { + sb := testBounty("bad") + sb.Spec.Task.TypeRef = "does-not-exist@v9" + c := newBountyTestController(t, sb) + + reconcileBountyUntilSettled(t, c, "hermes-obol-agent/bad") + got := getBounty(t, c, "hermes-obol-agent", "bad") + if got.Status.Phase != bountyPhaseInvalid { + t.Fatalf("phase = %q, want Invalid", got.Status.Phase) + } + if bountyConditionIsTrue(got.Status.Conditions, "TaskValid") { + t.Fatal("TaskValid should be false for unknown typeRef") + } +} + +func TestBountyLifecycle_BadParamEnumParks(t *testing.T) { + sb := testBounty("bad-param") + sb.Spec.Task.Params = map[string]string{"dtype": "fp64"} + c := newBountyTestController(t, sb) + + reconcileBountyUntilSettled(t, c, "hermes-obol-agent/bad-param") + got := getBounty(t, c, "hermes-obol-agent", "bad-param") + if got.Status.Phase != bountyPhaseInvalid { + t.Fatalf("phase = %q, want Invalid", got.Status.Phase) + } +} + +func TestBountyLifecycle_UnknownParamParks(t *testing.T) { + sb := testBounty("typo-param") + sb.Spec.Task.Params = map[string]string{"hardwreClass": "H100"} + c := newBountyTestController(t, sb) + + reconcileBountyUntilSettled(t, c, "hermes-obol-agent/typo-param") + got := getBounty(t, c, "hermes-obol-agent", "typo-param") + if got.Status.Phase != bountyPhaseInvalid { + t.Fatalf("phase = %q, want Invalid (unknown params are typo'd intent, not extensibility)", got.Status.Phase) + } +} + +func TestBountyLifecycle_MultiFulfillerParks(t *testing.T) { + sb := testBounty("multi") + sb.Spec.MaxFulfillers = 3 + c := newBountyTestController(t, sb) + + reconcileBountyUntilSettled(t, c, "hermes-obol-agent/multi") + got := getBounty(t, c, "hermes-obol-agent", "multi") + if got.Status.Phase != bountyPhaseInvalid { + t.Fatalf("phase = %q, want Invalid (v1 is single-winner; silently honoring >1 promises a race semantic that doesn't exist)", got.Status.Phase) + } +} + +func TestBountyLifecycle_DeadlineRefunds(t *testing.T) { + sb := testBounty("late") + past := metav1.NewTime(time.Now().Add(-time.Hour)) + sb.Spec.Deadline = &past + c := newBountyTestController(t, sb) + key := "hermes-obol-agent/late" + + // First pass adds the finalizer; the next passes reserve then refund. + for i := 0; i < 3; i++ { + if err := c.reconcileBounty(context.Background(), key); err != nil { + t.Fatalf("reconcile pass %d: %v", i, err) + } + } + got := getBounty(t, c, "hermes-obol-agent", "late") + if got.Status.Phase != bountyPhaseExpired && got.Status.Phase != bountyPhaseRefunded { + t.Fatalf("phase = %q, want Expired or Refunded", got.Status.Phase) + } + if bountyConditionIsTrue(got.Status.Conditions, "Paid") { + t.Fatal("expired bounty must not pay") + } +} + +func TestBountyLifecycle_RejectVerdict(t *testing.T) { + c := newBountyTestController(t, testBounty("rejected")) + key := "hermes-obol-agent/rejected" + + reconcileBountyUntilSettled(t, c, key) + annotateBounty(t, c, "hermes-obol-agent", "rejected", map[string]string{ + bountyClaimAnnotation: "0x3333333333333333333333333333333333333333", + bountySubmitAnnotation: `{"resultHash":"0x1","reportURI":"http://x"}`, + bountyVerdictAnnotation: "reject:scores out of tolerance", + }) + reconcileBountyUntilSettled(t, c, key) + + got := getBounty(t, c, "hermes-obol-agent", "rejected") + if got.Status.Phase != bountyPhaseRejected { + t.Fatalf("phase = %q, want Rejected", got.Status.Phase) + } + if bountyConditionIsTrue(got.Status.Conditions, "Paid") { + t.Fatal("rejected bounty must not pay") + } + if got.Status.EscrowState != escrow.StateReserved { + t.Fatalf("EscrowState = %q; rejection keeps the hold until deadline refund or poster delete", got.Status.EscrowState) + } +} + +func TestBountyLifecycle_InvalidClaimAddress(t *testing.T) { + c := newBountyTestController(t, testBounty("badclaim")) + key := "hermes-obol-agent/badclaim" + + reconcileBountyUntilSettled(t, c, key) + annotateBounty(t, c, "hermes-obol-agent", "badclaim", map[string]string{ + bountyClaimAnnotation: "not-an-address", + }) + reconcileBountyUntilSettled(t, c, key) + + got := getBounty(t, c, "hermes-obol-agent", "badclaim") + if len(got.Status.Claims) != 0 { + t.Fatalf("claims = %+v, want none for invalid address", got.Status.Claims) + } + if got.Status.Phase != bountyPhaseOpen { + t.Fatalf("phase = %q, want Open", got.Status.Phase) + } +} diff --git a/internal/serviceoffercontroller/bounty_panel.go b/internal/serviceoffercontroller/bounty_panel.go new file mode 100644 index 00000000..4cea6490 --- /dev/null +++ b/internal/serviceoffercontroller/bounty_panel.go @@ -0,0 +1,323 @@ +package serviceoffercontroller + +// Evaluator panel selection + ladder bookkeeping (design doc §11.4). +// +// Selection is controller-side weighted sampling — the honest local-first +// stand-in for VRF (the swap seam is exactly this function). It is +// DETERMINISTIC per bounty: seeded from the bounty UID so every reconcile +// computes the same panel (idempotence), and the poster cannot re-roll +// evaluators by touching the spec. +// +// Seats: k counting seats (Full tier, plus at most ONE Probation seat on +// value-capped bounties — the median absorbs one outlier, which is what makes +// the newcomer seat verdict-safe) + up to two free Shadow seats, randomly +// ASSIGNED (a sybil can't choose where to warm reputation). If the enrolled +// pool can't fill k counting seats the bounty falls back to open-door (any +// address may evaluate), and ladder bookkeeping still applies to enrolled +// participants — open-door participation is how the first evaluators climb +// out of Shadow. + +import ( + "context" + "crypto/sha256" + "encoding/binary" + "fmt" + "math/rand" + "slices" + "sort" + "strconv" + "strings" + + "github.com/ObolNetwork/obol-stack/internal/bounty" + "github.com/ObolNetwork/obol-stack/internal/monetizeapi" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime" +) + +const ( + maxShadowSeats = 2 + recentFulfillersKept = 5 + // pairDiversityWeight down-weights an evaluator who recently judged the + // same fulfiller (anti-collusion: break up cozy evaluator↔fulfiller pairs). + pairDiversityWeight = 0.25 +) + +// evaluatorCandidate is one enrolled evaluator considered for selection. +type evaluatorCandidate struct { + Address string + Record monetizeapi.EvaluatorLadderRecord +} + +// listEnrollmentsForTask returns the enrolled evaluators for a task type in +// the bounty's namespace. +func (c *Controller) listEnrollmentsForTask(ctx context.Context, namespace, taskRef string) ([]monetizeapi.EvaluatorEnrollment, error) { + raw, err := c.dynClient.Resource(monetizeapi.EvaluatorEnrollmentGVR).Namespace(namespace).List(ctx, metav1.ListOptions{}) + if err != nil { + return nil, err + } + var out []monetizeapi.EvaluatorEnrollment + for i := range raw.Items { + var enrollment monetizeapi.EvaluatorEnrollment + if err := runtime.DefaultUnstructuredConverter.FromUnstructured(raw.Items[i].Object, &enrollment); err != nil { + continue + } + if slices.Contains(enrollment.Spec.TaskTypes, taskRef) { + out = append(out, enrollment) + } + } + sort.Slice(out, func(i, j int) bool { return out[i].Spec.Address < out[j].Spec.Address }) + return out, nil +} + +// ladderRecordFor returns the enrollment's ladder record for taskRef; new +// enrollments start at Shadow. +func ladderRecordFor(enrollment *monetizeapi.EvaluatorEnrollment, taskRef string) monetizeapi.EvaluatorLadderRecord { + for _, r := range enrollment.Status.Records { + if r.TaskType == taskRef { + return r + } + } + return monetizeapi.EvaluatorLadderRecord{TaskType: taskRef, Tier: monetizeapi.EvaluatorTierShadow} +} + +// selectEvaluatorPanel performs the deterministic weighted sampling. Returns +// nil when the counting pool (Full+Probation) cannot fill k seats — the +// open-door fallback. +func selectEvaluatorPanel(uid string, pool []monetizeapi.EvaluatorEnrollment, taskRef string, k int64, rewardAmount, probationValueCap, fulfiller string) []monetizeapi.ServiceBountyPanelSeat { + var full, probation, shadow []evaluatorCandidate + for i := range pool { + candidate := evaluatorCandidate{ + Address: pool[i].Spec.Address, + Record: ladderRecordFor(&pool[i], taskRef), + } + switch candidate.Record.Tier { + case monetizeapi.EvaluatorTierFull: + full = append(full, candidate) + case monetizeapi.EvaluatorTierProbation: + probation = append(probation, candidate) + default: + shadow = append(shadow, candidate) + } + } + + counting := len(full) + len(probation) + if int64(counting) < k { + return nil // open-door fallback + } + + // Deterministic seed: same bounty → same panel, every reconcile. + sum := sha256.Sum256([]byte(uid)) + rng := rand.New(rand.NewSource(int64(binary.BigEndian.Uint64(sum[:8])))) //nolint:gosec // deterministic-by-design selection, not crypto + + weight := func(candidate evaluatorCandidate) float64 { + w := 1.0 + 0.1*float64(candidate.Record.Completed-candidate.Record.Divergences) + if w < 0.1 { + w = 0.1 + } + if slices.Contains(candidate.Record.RecentFulfillers, fulfiller) { + w *= pairDiversityWeight + } + return w + } + + var seats []monetizeapi.ServiceBountyPanelSeat + + // One reserved probation seat on value-capped bounties: the median-of-k + // absorbs one outlier, so the newcomer seat is verdict-safe by + // construction — and only offered where the value cap allows. + remaining := k + if len(probation) > 0 && withinValueCap(rewardAmount, probationValueCap) && k >= 3 { + pick := weightedPick(rng, probation, weight) + seats = append(seats, monetizeapi.ServiceBountyPanelSeat{Address: pick.Address, Seat: monetizeapi.PanelSeatProbation}) + probation = removeCandidate(probation, pick.Address) + remaining-- + } + + countingPool := append(append([]evaluatorCandidate{}, full...), probation...) + for remaining > 0 && len(countingPool) > 0 { + pick := weightedPick(rng, countingPool, weight) + seats = append(seats, monetizeapi.ServiceBountyPanelSeat{Address: pick.Address, Seat: monetizeapi.PanelSeatFull}) + countingPool = removeCandidate(countingPool, pick.Address) + remaining-- + } + if remaining > 0 { + return nil // pool shrank under us — open-door + } + + // Shadows are randomly ASSIGNED, never chosen by the evaluator. + for i := 0; i < maxShadowSeats && len(shadow) > 0; i++ { + pick := shadow[rng.Intn(len(shadow))] + seats = append(seats, monetizeapi.ServiceBountyPanelSeat{Address: pick.Address, Seat: monetizeapi.PanelSeatShadow}) + shadow = removeCandidate(shadow, pick.Address) + } + + sort.Slice(seats, func(i, j int) bool { return seats[i].Address < seats[j].Address }) + return seats +} + +func weightedPick(rng *rand.Rand, pool []evaluatorCandidate, weight func(evaluatorCandidate) float64) evaluatorCandidate { + total := 0.0 + for _, candidate := range pool { + total += weight(candidate) + } + target := rng.Float64() * total + for _, candidate := range pool { + target -= weight(candidate) + if target <= 0 { + return candidate + } + } + return pool[len(pool)-1] +} + +func removeCandidate(pool []evaluatorCandidate, address string) []evaluatorCandidate { + out := pool[:0] + for _, candidate := range pool { + if candidate.Address != address { + out = append(out, candidate) + } + } + return out +} + +func withinValueCap(amount, cap string) bool { + a, errA := strconv.ParseFloat(strings.TrimSpace(amount), 64) + c, errC := strconv.ParseFloat(strings.TrimSpace(cap), 64) + if errA != nil || errC != nil || c <= 0 { + return false + } + return a <= c +} + +// ensurePanel runs selection exactly once per bounty (latched by the +// PanelSelected condition so a growing pool can never re-gate a bounty whose +// evaluation already started). +func (c *Controller) ensurePanel(ctx context.Context, sb *monetizeapi.ServiceBounty, status *monetizeapi.ServiceBountyStatus) { + for _, condition := range status.Conditions { + if condition.Type == "PanelSelected" { + return + } + } + + taskRef := sb.Spec.Task.TypeRef + pool, err := c.listEnrollmentsForTask(ctx, sb.Namespace, taskRef) + if err != nil { + // Missing CRD / transient list error → open-door, recorded as such. + setPurchaseCondition(&status.Conditions, "PanelSelected", "False", "OpenDoor", + truncateMessage(fmt.Sprintf("enrollment pool unavailable (%v) — open-door evaluation", err))) + return + } + + k := sb.Spec.Eval.K + if k < 1 { + k = 1 + } + cap := "" + if t, err := bounty.Resolve(taskRef); err == nil { + cap = t.Eval.Ladder.ProbationValueCap + } + fulfiller := "" + if len(status.Claims) > 0 { + fulfiller = status.Claims[0].FulfillerAddress + } + + seats := selectEvaluatorPanel(string(sb.UID), pool, taskRef, k, sb.Spec.Reward.Amount, cap, fulfiller) + if seats == nil { + setPurchaseCondition(&status.Conditions, "PanelSelected", "False", "OpenDoor", + fmt.Sprintf("Enrolled pool has fewer than %d counting evaluators — open-door evaluation", k)) + return + } + status.EvaluatorPanel = seats + setPurchaseCondition(&status.Conditions, "PanelSelected", "True", "Selected", + fmt.Sprintf("%d counting seat(s) + %d shadow(s) selected from %d enrolled", k, len(seats)-int(k), len(pool))) +} + +// recordLadder applies the one-shot cross-bounty bookkeeping after the quorum +// settles: completion/divergence counters, shadow agreements, probation +// progress, tier promotions, and the pair-diversity history. +func (c *Controller) recordLadder(ctx context.Context, sb *monetizeapi.ServiceBounty, status *monetizeapi.ServiceBountyStatus) error { + taskRef := sb.Spec.Task.TypeRef + thresholds := bounty.Ladder{ShadowAgreements: 5, ProbationEvals: 10} + if t, err := bounty.Resolve(taskRef); err == nil && t.Eval.Ladder.ShadowAgreements > 0 { + thresholds = t.Eval.Ladder + } + fulfiller := "" + if len(status.Claims) > 0 { + fulfiller = status.Claims[0].FulfillerAddress + } + + for _, evaluation := range status.Evaluations { + raw, err := c.findEnrollmentByAddress(ctx, sb.Namespace, evaluation.Address) + if err != nil || raw == nil { + continue // unenrolled open-door participant — nothing to record + } + var enrollment monetizeapi.EvaluatorEnrollment + if err := runtime.DefaultUnstructuredConverter.FromUnstructured(raw.Object, &enrollment); err != nil { + continue + } + + record := ladderRecordFor(&enrollment, taskRef) + record.Completed++ + if !evaluation.WithinBand { + record.Divergences++ + } + switch record.Tier { + case monetizeapi.EvaluatorTierShadow: + if evaluation.WithinBand { + record.ShadowAgreements++ + } + if record.ShadowAgreements >= int64(thresholds.ShadowAgreements) { + record.Tier = monetizeapi.EvaluatorTierProbation + } + case monetizeapi.EvaluatorTierProbation: + if evaluation.WithinBand { + record.ProbationEvals++ + } + if record.ProbationEvals >= int64(thresholds.ProbationEvals) { + record.Tier = monetizeapi.EvaluatorTierFull + } + } + if fulfiller != "" { + record.RecentFulfillers = append([]string{fulfiller}, record.RecentFulfillers...) + if len(record.RecentFulfillers) > recentFulfillersKept { + record.RecentFulfillers = record.RecentFulfillers[:recentFulfillersKept] + } + } + + replaced := false + for i := range enrollment.Status.Records { + if enrollment.Status.Records[i].TaskType == taskRef { + enrollment.Status.Records[i] = record + replaced = true + } + } + if !replaced { + enrollment.Status.Records = append(enrollment.Status.Records, record) + } + + statusObject, err := runtime.DefaultUnstructuredConverter.ToUnstructured(&enrollment.Status) + if err != nil { + return err + } + patched := raw.DeepCopy() + patched.Object["status"] = statusObject + if _, err := c.dynClient.Resource(monetizeapi.EvaluatorEnrollmentGVR).Namespace(sb.Namespace).UpdateStatus(ctx, patched, metav1.UpdateOptions{}); err != nil { + return fmt.Errorf("update ladder for %s: %w", evaluation.Address, err) + } + } + return nil +} + +func (c *Controller) findEnrollmentByAddress(ctx context.Context, namespace, address string) (*unstructured.Unstructured, error) { + list, err := c.dynClient.Resource(monetizeapi.EvaluatorEnrollmentGVR).Namespace(namespace).List(ctx, metav1.ListOptions{}) + if err != nil { + return nil, err + } + for i := range list.Items { + if addr, _, _ := unstructured.NestedString(list.Items[i].Object, "spec", "address"); strings.EqualFold(addr, address) { + return &list.Items[i], nil + } + } + return nil, nil +} diff --git a/internal/serviceoffercontroller/bounty_panel_test.go b/internal/serviceoffercontroller/bounty_panel_test.go new file mode 100644 index 00000000..6abea0dd --- /dev/null +++ b/internal/serviceoffercontroller/bounty_panel_test.go @@ -0,0 +1,317 @@ +package serviceoffercontroller + +import ( + "context" + "fmt" + "reflect" + "strings" + "testing" + + "github.com/ObolNetwork/obol-stack/internal/monetizeapi" + "github.com/ObolNetwork/obol-stack/internal/x402/escrow" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/client-go/dynamic/fake" + "k8s.io/client-go/util/workqueue" +) + +func testEnrollment(t *testing.T, name, address, tier string) *unstructured.Unstructured { + t.Helper() + enrollment := monetizeapi.EvaluatorEnrollment{ + TypeMeta: metav1.TypeMeta{ + APIVersion: monetizeapi.Group + "/" + monetizeapi.Version, + Kind: monetizeapi.EvaluatorEnrollmentKind, + }, + ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: "hermes-obol-agent"}, + Spec: monetizeapi.EvaluatorEnrollmentSpec{ + Address: address, + TaskTypes: []string{"benchmark@v1"}, + }, + } + if tier != "" { + enrollment.Status.Records = []monetizeapi.EvaluatorLadderRecord{{TaskType: "benchmark@v1", Tier: tier}} + } + obj, err := runtime.DefaultUnstructuredConverter.ToUnstructured(&enrollment) + if err != nil { + t.Fatalf("enrollment to unstructured: %v", err) + } + return &unstructured.Unstructured{Object: obj} +} + +func newPanelTestController(t *testing.T, sb *monetizeapi.ServiceBounty, enrollments ...*unstructured.Unstructured) *Controller { + t.Helper() + objects := []runtime.Object{mustBountyObject(t, sb)} + for _, e := range enrollments { + objects = append(objects, e) + } + dynClient := fake.NewSimpleDynamicClientWithCustomListKinds( + runtime.NewScheme(), + map[schema.GroupVersionResource]string{ + monetizeapi.ServiceBountyGVR: "ServiceBountyList", + monetizeapi.EvaluatorEnrollmentGVR: "EvaluatorEnrollmentList", + }, + objects..., + ) + return &Controller{ + dynClient: dynClient, + bountyQueue: workqueue.NewTypedRateLimitingQueue(workqueue.DefaultTypedControllerRateLimiter[string]()), + bountyEscrow: escrow.NewLedgerGateway(), + } +} + +func TestSelectEvaluatorPanel_DeterministicPerBounty(t *testing.T) { + pool := []monetizeapi.EvaluatorEnrollment{} + for i := 0; i < 6; i++ { + addr := fmt.Sprintf("0x%040d", i) + pool = append(pool, monetizeapi.EvaluatorEnrollment{ + Spec: monetizeapi.EvaluatorEnrollmentSpec{Address: addr, TaskTypes: []string{"benchmark@v1"}}, + Status: monetizeapi.EvaluatorEnrollmentStatus{Records: []monetizeapi.EvaluatorLadderRecord{ + {TaskType: "benchmark@v1", Tier: monetizeapi.EvaluatorTierFull}, + }}, + }) + } + + a := selectEvaluatorPanel("uid-1", pool, "benchmark@v1", 3, "5.00", "50.00", "0xf") + b := selectEvaluatorPanel("uid-1", pool, "benchmark@v1", 3, "5.00", "50.00", "0xf") + if !reflect.DeepEqual(a, b) { + t.Fatalf("selection must be deterministic per bounty UID:\n%v\n%v", a, b) + } + if len(a) != 3 { + t.Fatalf("got %d seats, want 3", len(a)) + } +} + +func TestSelectEvaluatorPanel_OpenDoorWhenPoolThin(t *testing.T) { + pool := []monetizeapi.EvaluatorEnrollment{ + { + Spec: monetizeapi.EvaluatorEnrollmentSpec{Address: "0x" + strings.Repeat("1", 40), TaskTypes: []string{"benchmark@v1"}}, + Status: monetizeapi.EvaluatorEnrollmentStatus{Records: []monetizeapi.EvaluatorLadderRecord{{TaskType: "benchmark@v1", Tier: monetizeapi.EvaluatorTierFull}}}, + }, + // Shadows are not counting candidates. + {Spec: monetizeapi.EvaluatorEnrollmentSpec{Address: "0x" + strings.Repeat("2", 40), TaskTypes: []string{"benchmark@v1"}}}, + } + if seats := selectEvaluatorPanel("uid", pool, "benchmark@v1", 3, "5.00", "50.00", ""); seats != nil { + t.Fatalf("thin pool must fall back to open-door, got %v", seats) + } +} + +func TestSelectEvaluatorPanel_ProbationSeatValueCapped(t *testing.T) { + pool := []monetizeapi.EvaluatorEnrollment{} + for i := 0; i < 4; i++ { + pool = append(pool, monetizeapi.EvaluatorEnrollment{ + Spec: monetizeapi.EvaluatorEnrollmentSpec{Address: fmt.Sprintf("0x%040d", i), TaskTypes: []string{"benchmark@v1"}}, + Status: monetizeapi.EvaluatorEnrollmentStatus{Records: []monetizeapi.EvaluatorLadderRecord{ + {TaskType: "benchmark@v1", Tier: monetizeapi.EvaluatorTierFull}, + }}, + }) + } + pool = append(pool, monetizeapi.EvaluatorEnrollment{ + Spec: monetizeapi.EvaluatorEnrollmentSpec{Address: "0x" + strings.Repeat("9", 40), TaskTypes: []string{"benchmark@v1"}}, + Status: monetizeapi.EvaluatorEnrollmentStatus{Records: []monetizeapi.EvaluatorLadderRecord{ + {TaskType: "benchmark@v1", Tier: monetizeapi.EvaluatorTierProbation}, + }}, + }) + + countProbation := func(seats []monetizeapi.ServiceBountyPanelSeat) int { + n := 0 + for _, s := range seats { + if s.Seat == monetizeapi.PanelSeatProbation { + n++ + } + } + return n + } + + under := selectEvaluatorPanel("uid", pool, "benchmark@v1", 3, "5.00", "50.00", "") + if countProbation(under) != 1 { + t.Errorf("reward under the cap must seat exactly one probationer, got %d (%v)", countProbation(under), under) + } + over := selectEvaluatorPanel("uid", pool, "benchmark@v1", 3, "500.00", "50.00", "") + if countProbation(over) != 0 { + t.Errorf("reward above the cap must seat no probationer, got %d (%v)", countProbation(over), over) + } +} + +// Full panel-mode lifecycle: panel gates out a non-panel commit, the shadow is +// graded but not counted, evaluators get paid, the ladder records. +func TestEvalMarket_PanelMode(t *testing.T) { + sb := testEvalBounty("panel") + sb.Spec.Trust.SelfBond = monetizeapi.ServiceBountySelfBond{} + pool := []*unstructured.Unstructured{ + testEnrollment(t, "ev-a", evalA, monetizeapi.EvaluatorTierFull), + testEnrollment(t, "ev-b", evalB, monetizeapi.EvaluatorTierFull), + testEnrollment(t, "ev-c", evalC, monetizeapi.EvaluatorTierFull), + testEnrollment(t, "ev-shadow", "0xdddddddddddddddddddddddddddddddddddddddd", ""), + } + c := newPanelTestController(t, sb, pool...) + ns := "hermes-obol-agent" + + claimAndSubmit(t, c, ns, "panel") + + got := getBounty(t, c, ns, "panel") + if len(got.Status.EvaluatorPanel) != 4 { + t.Fatalf("panel = %v, want 3 counting + 1 shadow", got.Status.EvaluatorPanel) + } + seatOf := map[string]string{} + for _, seat := range got.Status.EvaluatorPanel { + seatOf[strings.ToLower(seat.Address)] = seat.Seat + } + if seatOf["0xdddddddddddddddddddddddddddddddddddddddd"] != monetizeapi.PanelSeatShadow { + t.Fatalf("the Shadow-tier enrollee must hold the shadow seat: %v", seatOf) + } + if got.Status.EvalBudgetState != escrow.StateReserved { + t.Fatalf("eval budget state = %q, want Reserved at panel selection", got.Status.EvalBudgetState) + } + + // A non-panel outsider tries to commit — must be ignored. + outsider := "0xeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee" + annotateBounty(t, c, ns, "panel", map[string]string{ + "obol.org/eval-commit-" + outsider: monetizeapi.EvalCommitHash(99, "x", outsider), + }) + + // Panel members (incl. the shadow) commit and reveal. + scores := map[string]int64{evalA: 90, evalB: 85, evalC: 80, "0xdddddddddddddddddddddddddddddddddddddddd": 10} + for addr, score := range scores { + annotateBounty(t, c, ns, "panel", map[string]string{ + "obol.org/eval-commit-" + addr: monetizeapi.EvalCommitHash(score, "salt-"+addr, addr), + }) + } + reconcileBountyUntilSettled(t, c, ns+"/panel") + for addr, score := range scores { + annotateBounty(t, c, ns, "panel", map[string]string{ + "obol.org/eval-reveal-" + addr: fmt.Sprintf(`{"score":%d,"salt":"salt-%s"}`, score, addr), + }) + } + reconcileBountyUntilSettled(t, c, ns+"/panel") + + got = getBounty(t, c, ns, "panel") + for _, ev := range got.Status.Evaluations { + if strings.EqualFold(ev.Address, outsider) { + t.Error("non-panel commit must be ignored in panel mode") + } + } + if got.Status.WeightedScore != 85 { + t.Errorf("WeightedScore = %d, want 85 (shadow's 10 must not move the median)", got.Status.WeightedScore) + } + if got.Status.Phase != bountyPhasePaid { + t.Fatalf("phase = %q, want Paid", got.Status.Phase) + } + if got.Status.EvalBudgetState != escrow.StateCaptured || got.Status.EvalPayoutTxHash == "" { + t.Errorf("eval budget = %q payout=%q, want Captured with a batch receipt", got.Status.EvalBudgetState, got.Status.EvalPayoutTxHash) + } + for _, ev := range got.Status.Evaluations { + isShadow := ev.Seat == monetizeapi.PanelSeatShadow + if ev.Paid == isShadow { + t.Errorf("evaluator %s (seat=%s) paid=%v — counting seats are paid, shadows are free", ev.Address, ev.Seat, ev.Paid) + } + } + if !got.Status.LadderRecorded { + t.Fatal("ladder bookkeeping must latch after settle") + } + + // Ladder: the shadow diverged (10 vs median 85, out of band) → no + // agreement; counting members completed in band. + shadowRecord := ladderStatusOf(t, c, ns, "ev-shadow") + if shadowRecord.ShadowAgreements != 0 || shadowRecord.Completed != 1 || shadowRecord.Divergences != 1 { + t.Errorf("shadow record = %+v, want completed=1 divergences=1 agreements=0", shadowRecord) + } + fullRecord := ladderStatusOf(t, c, ns, "ev-a") + if fullRecord.Completed != 1 || fullRecord.Divergences != 0 { + t.Errorf("full record = %+v, want completed=1 divergences=0", fullRecord) + } + if len(fullRecord.RecentFulfillers) == 0 { + t.Error("pair-diversity history must record the fulfiller") + } +} + +// A shadow agreeing with the median climbs toward Probation. +func TestEvalMarket_ShadowAgreementClimbs(t *testing.T) { + sb := testEvalBounty("shadow-climb") + pool := []*unstructured.Unstructured{ + testEnrollment(t, "ev-a", evalA, monetizeapi.EvaluatorTierFull), + testEnrollment(t, "ev-b", evalB, monetizeapi.EvaluatorTierFull), + testEnrollment(t, "ev-c", evalC, monetizeapi.EvaluatorTierFull), + testEnrollment(t, "ev-shadow", "0xdddddddddddddddddddddddddddddddddddddddd", ""), + } + c := newPanelTestController(t, sb, pool...) + ns := "hermes-obol-agent" + + claimAndSubmit(t, c, ns, "shadow-climb") + scores := map[string]int64{evalA: 90, evalB: 85, evalC: 80, "0xdddddddddddddddddddddddddddddddddddddddd": 88} + for addr, score := range scores { + annotateBounty(t, c, ns, "shadow-climb", map[string]string{ + "obol.org/eval-commit-" + addr: monetizeapi.EvalCommitHash(score, "salt-"+addr, addr), + }) + } + reconcileBountyUntilSettled(t, c, ns+"/shadow-climb") + for addr, score := range scores { + annotateBounty(t, c, ns, "shadow-climb", map[string]string{ + "obol.org/eval-reveal-" + addr: fmt.Sprintf(`{"score":%d,"salt":"salt-%s"}`, score, addr), + }) + } + reconcileBountyUntilSettled(t, c, ns+"/shadow-climb") + + record := ladderStatusOf(t, c, ns, "ev-shadow") + if record.ShadowAgreements != 1 { + t.Errorf("shadow within band must earn an agreement, got %+v", record) + } + if record.Tier != monetizeapi.EvaluatorTierShadow { + t.Errorf("one agreement must not yet promote (threshold 5), got tier %s", record.Tier) + } +} + +// The probation seat is half price and the discount goes to the POSTER: the +// reserved budget shrinks by per/2 when a probationer is seated. +func TestEvalBudgetTotal_ProbationDiscount(t *testing.T) { + sb := testEvalBounty("x") + sb.Spec.Eval.Payment.PerEvaluator = "2.00" + sb.Spec.Eval.K = 3 + + status := &monetizeapi.ServiceBountyStatus{} + if got := evalBudgetTotal(sb, status); got != "6.00" { + t.Errorf("all-full budget = %q, want 6.00", got) + } + + status.EvaluatorPanel = []monetizeapi.ServiceBountyPanelSeat{ + {Address: evalA, Seat: monetizeapi.PanelSeatFull}, + {Address: evalB, Seat: monetizeapi.PanelSeatFull}, + {Address: evalC, Seat: monetizeapi.PanelSeatProbation}, + } + if got := evalBudgetTotal(sb, status); got != "5.00" { + t.Errorf("probation-seated budget = %q, want 5.00 (2+2+1)", got) + } +} + +func TestLedgerGateway_CaptureBatch(t *testing.T) { + g := escrow.NewLedgerGateway() + if _, err := g.Reserve(context.Background(), escrow.ReserveRequest{ID: "b-eval", Asset: "OBOL", Amount: "6.00"}); err != nil { + t.Fatal(err) + } + receipt, err := g.CaptureBatch(context.Background(), "b-eval", []escrow.BatchRecipient{ + {Address: evalA, Amount: "2.00"}, {Address: evalB, Amount: "2.00"}, + }) + if err != nil { + t.Fatal(err) + } + if receipt.State != escrow.StateCaptured || !strings.Contains(receipt.TxHash, "batch[2]") { + t.Errorf("receipt = %+v, want Captured dev-ledger batch[2]", receipt) + } +} + +func ladderStatusOf(t *testing.T, c *Controller, namespace, name string) monetizeapi.EvaluatorLadderRecord { + t.Helper() + raw, err := c.dynClient.Resource(monetizeapi.EvaluatorEnrollmentGVR).Namespace(namespace).Get(context.Background(), name, metav1.GetOptions{}) + if err != nil { + t.Fatalf("get enrollment %s: %v", name, err) + } + var enrollment monetizeapi.EvaluatorEnrollment + if err := runtime.DefaultUnstructuredConverter.FromUnstructured(raw.Object, &enrollment); err != nil { + t.Fatalf("decode enrollment: %v", err) + } + if len(enrollment.Status.Records) == 0 { + return monetizeapi.EvaluatorLadderRecord{} + } + return enrollment.Status.Records[0] +} diff --git a/internal/serviceoffercontroller/bounty_structure_test.go b/internal/serviceoffercontroller/bounty_structure_test.go new file mode 100644 index 00000000..19427217 --- /dev/null +++ b/internal/serviceoffercontroller/bounty_structure_test.go @@ -0,0 +1,24 @@ +package serviceoffercontroller + +import ( + "os" + "regexp" + "testing" +) + +// TestBountyReconcile_NeverCreatesIngressOrSecrets pins the review invariant +// that a ServiceBounty must never become public ingress and the bounty pass +// must never broker credentials: the reconcile source must not touch +// HTTPRoute, Middleware, ReferenceGrant, or Secret resources. (The structural +// source-check style follows internal/x402/setup_structure_test.go.) +func TestBountyReconcile_NeverCreatesIngressOrSecrets(t *testing.T) { + src, err := os.ReadFile("bounty.go") + if err != nil { + t.Fatalf("read bounty.go: %v", err) + } + + forbidden := regexp.MustCompile(`HTTPRouteGVR|MiddlewareGVR|ReferenceGrantGVR|SecretGVR|c\.httpRoutes|c\.middlewares|c\.referenceGrants`) + if match := forbidden.Find(src); match != nil { + t.Fatalf("bounty.go references %q — the bounty reconcile must never create routes, middlewares, reference grants, or secrets (a bounty must never become ingress)", match) + } +} diff --git a/internal/serviceoffercontroller/controller.go b/internal/serviceoffercontroller/controller.go index be6b7cfe..732f7fda 100644 --- a/internal/serviceoffercontroller/controller.go +++ b/internal/serviceoffercontroller/controller.go @@ -16,6 +16,7 @@ import ( "github.com/ObolNetwork/obol-stack/internal/erc8004" "github.com/ObolNetwork/obol-stack/internal/monetizeapi" + "github.com/ObolNetwork/obol-stack/internal/x402/escrow" "github.com/ethereum/go-ethereum/common" "k8s.io/apimachinery/pkg/api/equality" apierrors "k8s.io/apimachinery/pkg/api/errors" @@ -67,14 +68,21 @@ type Controller struct { identityInformer cache.SharedIndexInformer purchaseInformer cache.SharedIndexInformer agentInformer cache.SharedIndexInformer + bountyInformer cache.SharedIndexInformer configMapInformer cache.SharedIndexInformer offerQueue workqueue.TypedRateLimitingInterface[string] registrationQueue workqueue.TypedRateLimitingInterface[string] identityQueue workqueue.TypedRateLimitingInterface[string] purchaseQueue workqueue.TypedRateLimitingInterface[string] agentQueue workqueue.TypedRateLimitingInterface[string] + bountyQueue workqueue.TypedRateLimitingInterface[string] catalogMu sync.Mutex + // bountyEscrow is the Hold/Release/Refund seam for ServiceBounty rewards. + // Configured at construction (env), never from a bounty's spec — see + // newBountyEscrowGateway for why. + bountyEscrow escrow.Gateway + pendingAuths sync.Map // key: "ns/name" → []map[string]string httpClient *http.Client @@ -107,6 +115,27 @@ func New(cfg *rest.Config) (*Controller, error) { identityInformer := factory.ForResource(monetizeapi.AgentIdentityGVR).Informer() purchaseInformer := factory.ForResource(monetizeapi.PurchaseRequestGVR).Informer() agentInformer := factory.ForResource(monetizeapi.AgentGVR).Informer() + + // ServiceBounty is newer than the other CRDs. Guard on discovery so a + // controller image rolled onto a cluster that hasn't applied the CRD yet + // degrades to a log line instead of blocking every informer cache sync. + // Only a definitive "group served, resource absent" answer disables the + // pass — a transient discovery error keeps it on (the CRD ships in the + // same release train). + var bountyInformer cache.SharedIndexInformer + if resources, err := kubeClient.Discovery().ServerResourcesForGroupVersion(monetizeapi.Group + "/" + monetizeapi.Version); err == nil { + for _, r := range resources.APIResources { + if r.Name == monetizeapi.ServiceBountyResource { + bountyInformer = factory.ForResource(monetizeapi.ServiceBountyGVR).Informer() + break + } + } + if bountyInformer == nil { + log.Printf("serviceoffer-controller: ServiceBounty CRD not installed; bounty reconcile disabled") + } + } else { + bountyInformer = factory.ForResource(monetizeapi.ServiceBountyGVR).Informer() + } configMapFactory := dynamicinformer.NewFilteredDynamicSharedInformerFactory(client, 0, "obol-frontend", func(options *metav1.ListOptions) { options.FieldSelector = fields.OneTermEqualSelector("metadata.name", "obol-stack-config").String() }) @@ -131,12 +160,15 @@ func New(cfg *rest.Config) (*Controller, error) { identityInformer: identityInformer, purchaseInformer: purchaseInformer, agentInformer: agentInformer, + bountyInformer: bountyInformer, configMapInformer: configMapInformer, offerQueue: workqueue.NewTypedRateLimitingQueue(workqueue.DefaultTypedControllerRateLimiter[string]()), registrationQueue: workqueue.NewTypedRateLimitingQueue(workqueue.DefaultTypedControllerRateLimiter[string]()), identityQueue: workqueue.NewTypedRateLimitingQueue(workqueue.DefaultTypedControllerRateLimiter[string]()), purchaseQueue: workqueue.NewTypedRateLimitingQueue(workqueue.DefaultTypedControllerRateLimiter[string]()), agentQueue: workqueue.NewTypedRateLimitingQueue(workqueue.DefaultTypedControllerRateLimiter[string]()), + bountyQueue: workqueue.NewTypedRateLimitingQueue(workqueue.DefaultTypedControllerRateLimiter[string]()), + bountyEscrow: newBountyEscrowGateway(), httpClient: &http.Client{Timeout: 3 * time.Second}, registrationRPCBase: getenvDefault("ERC8004_RPC_BASE", erc8004.DefaultRPCBase), baseURLOverride: strings.TrimRight(os.Getenv("AGENT_BASE_URL"), "/"), @@ -201,6 +233,13 @@ func New(cfg *rest.Config) (*Controller, error) { UpdateFunc: func(_, newObj any) { controller.enqueueDiscoveryRefresh(newObj) }, DeleteFunc: controller.enqueueDiscoveryRefresh, }) + if bountyInformer != nil { + bountyInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{ + AddFunc: controller.enqueueBounty, + UpdateFunc: func(_, newObj any) { controller.enqueueBounty(newObj) }, + DeleteFunc: controller.enqueueBounty, + }) + } return controller, nil } @@ -211,6 +250,7 @@ func (c *Controller) Run(ctx context.Context, workers int) error { defer c.identityQueue.ShutDown() defer c.purchaseQueue.ShutDown() defer c.agentQueue.ShutDown() + defer c.bountyQueue.ShutDown() go c.offerInformer.Run(ctx.Done()) go c.registrationInformer.Run(ctx.Done()) @@ -218,14 +258,19 @@ func (c *Controller) Run(ctx context.Context, workers int) error { go c.purchaseInformer.Run(ctx.Done()) go c.agentInformer.Run(ctx.Done()) go c.configMapInformer.Run(ctx.Done()) - if !cache.WaitForCacheSync(ctx.Done(), + syncs := []cache.InformerSynced{ c.offerInformer.HasSynced, c.registrationInformer.HasSynced, c.identityInformer.HasSynced, c.purchaseInformer.HasSynced, c.agentInformer.HasSynced, c.configMapInformer.HasSynced, - ) { + } + if c.bountyInformer != nil { + go c.bountyInformer.Run(ctx.Done()) + syncs = append(syncs, c.bountyInformer.HasSynced) + } + if !cache.WaitForCacheSync(ctx.Done(), syncs...) { return fmt.Errorf("wait for informer sync") } @@ -257,6 +302,12 @@ func (c *Controller) Run(ctx context.Context, workers int) error { for c.processNextAgent(ctx) { } }() + if c.bountyInformer != nil { + go func() { + for c.processNextBounty(ctx) { + } + }() + } } <-ctx.Done() diff --git a/internal/x402/escrow/gateway.go b/internal/x402/escrow/gateway.go new file mode 100644 index 00000000..a19847be --- /dev/null +++ b/internal/x402/escrow/gateway.go @@ -0,0 +1,230 @@ +// Package escrow defines the conditional-settlement seam between the +// servicebounty-controller and the x402 facilitator: hold a reward +// authorization now, release it to the fulfiller on an accepted verdict, or +// return it to the poster on expiry/rejection. +// +// The controller is only a bounded release TRIGGER, never a signer: the +// poster's agent pre-signs the upto authorization (payTo is signed into it, so +// whoever triggers settlement can only release the signed transfer to the +// signed recipient — or nothing). The facilitator holds the auth and performs +// settlement; the controller authenticates to it with a bearer token. This +// preserves the "controller holds no keys" invariant exactly as today. +// +// Two implementations: +// - HTTPGateway: POSTs to the facilitator's /escrow/{reserve,capture,void} +// routes (the ConditionalSettleFacilitator wrapper around x402-rs — the +// next slice on the facilitator side). +// - LedgerGateway: in-memory dev mode for local-first stacks and tests. It +// is escrow THEATER — nothing is held anywhere — and every receipt is +// labeled dev-ledger so it can never be mistaken for settlement. +package escrow + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "strings" + "sync" +) + +// States reported by a Gateway. They feed ServiceBountyStatus.EscrowState. +const ( + StateReserved = "Reserved" + StateCaptured = "Captured" + StateVoided = "Voided" +) + +// ReserveRequest identifies the reward authorization the facilitator should +// verify and hold for a bounty. +type ReserveRequest struct { + // ID is the stable escrow key — the ServiceBounty UID. + ID string `json:"id"` + // Network, PayTo, Asset, Amount describe the reward leg (PayTo is the + // poster's refund address; the fulfiller payout address is bound in the + // pre-signed auth itself at claim time). + Network string `json:"network"` + PayTo string `json:"payTo"` + Asset string `json:"asset"` + Amount string `json:"amount"` + // Scheme is the x402 settlement scheme (upto today, authCapture later). + Scheme string `json:"scheme"` +} + +// Receipt is the gateway's record of an escrow operation. +type Receipt struct { + State string `json:"state"` + TxHash string `json:"txHash,omitempty"` +} + +// Gateway is the Hold/Release/Refund seam. Implementations must be safe for +// concurrent use by reconcile workers. +type Gateway interface { + // Reserve verifies + holds the reward auth for id. Idempotent. + Reserve(ctx context.Context, req ReserveRequest) (Receipt, error) + // Capture settles the held auth to the fulfiller. Idempotent: capturing + // an already-captured id returns the original receipt. + Capture(ctx context.Context, id string) (Receipt, error) + // Void drops the held auth (poster keeps funds). Voiding an unknown or + // already-voided id is not an error — refund flows must be re-runnable. + Void(ctx context.Context, id string) (Receipt, error) +} + +// BatchRecipient is one payee of a split capture (the eval-payment leg: each +// revealed counting evaluator gets perEvaluator from the held budget). +type BatchRecipient struct { + Address string `json:"address"` + Amount string `json:"amount"` +} + +// BatchGateway is the x402 batch-settlement seam: one held authorization +// captured to k recipients in one settlement (the eval budget → evaluators). +// Optional — callers type-assert and fall back to plain Capture. +type BatchGateway interface { + // CaptureBatch settles the held auth for id split across recipients. + CaptureBatch(ctx context.Context, id string, recipients []BatchRecipient) (Receipt, error) +} + +// ── HTTPGateway ───────────────────────────────────────────────────────────── + +// HTTPGateway drives the facilitator's escrow routes. +type HTTPGateway struct { + // Base is the facilitator URL, e.g. https://x402.gcp.obol.tech. + Base string + // Token authenticates capture/void (the release-authority credential). + Token string + Client *http.Client +} + +func (g *HTTPGateway) Reserve(ctx context.Context, req ReserveRequest) (Receipt, error) { + return g.post(ctx, "reserve", req.ID, req) +} + +func (g *HTTPGateway) Capture(ctx context.Context, id string) (Receipt, error) { + return g.post(ctx, "capture", id, nil) +} + +func (g *HTTPGateway) Void(ctx context.Context, id string) (Receipt, error) { + return g.post(ctx, "void", id, nil) +} + +// CaptureBatch drives the facilitator's batch-settlement capture: the held +// auth for id is settled to all recipients in one transaction. +func (g *HTTPGateway) CaptureBatch(ctx context.Context, id string, recipients []BatchRecipient) (Receipt, error) { + return g.post(ctx, "capture", id, map[string]any{"recipients": recipients}) +} + +func (g *HTTPGateway) post(ctx context.Context, op, id string, body any) (Receipt, error) { + var payload io.Reader + if body != nil { + raw, err := json.Marshal(body) + if err != nil { + return Receipt{}, err + } + payload = bytes.NewReader(raw) + } + + url := strings.TrimRight(g.Base, "/") + "/escrow/" + op + "/" + id + req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, payload) + if err != nil { + return Receipt{}, err + } + req.Header.Set("Content-Type", "application/json") + if g.Token != "" { + req.Header.Set("Authorization", "Bearer "+g.Token) + } + + client := g.Client + if client == nil { + client = http.DefaultClient + } + resp, err := client.Do(req) + if err != nil { + return Receipt{}, fmt.Errorf("escrow %s %s: %w", op, id, err) + } + defer resp.Body.Close() + + raw, _ := io.ReadAll(io.LimitReader(resp.Body, 1<<16)) + if resp.StatusCode < 200 || resp.StatusCode >= 300 { + return Receipt{}, fmt.Errorf("escrow %s %s: facilitator returned %d: %s", op, id, resp.StatusCode, strings.TrimSpace(string(raw))) + } + + var receipt Receipt + if err := json.Unmarshal(raw, &receipt); err != nil { + return Receipt{}, fmt.Errorf("escrow %s %s: decode receipt: %w", op, id, err) + } + return receipt, nil +} + +// ── LedgerGateway (dev) ───────────────────────────────────────────────────── + +// LedgerGateway records escrow state in memory. Local-first dev mode only — +// no funds are verified or held anywhere. Receipts carry a dev-ledger TxHash +// so downstream surfaces can never present them as settlement. +type LedgerGateway struct { + mu sync.Mutex + states map[string]Receipt +} + +func NewLedgerGateway() *LedgerGateway { + return &LedgerGateway{states: make(map[string]Receipt)} +} + +func (g *LedgerGateway) Reserve(_ context.Context, req ReserveRequest) (Receipt, error) { + g.mu.Lock() + defer g.mu.Unlock() + if r, ok := g.states[req.ID]; ok { + return r, nil + } + r := Receipt{State: StateReserved, TxHash: "dev-ledger:" + req.ID} + g.states[req.ID] = r + return r, nil +} + +func (g *LedgerGateway) Capture(_ context.Context, id string) (Receipt, error) { + g.mu.Lock() + defer g.mu.Unlock() + r, ok := g.states[id] + if !ok { + return Receipt{}, fmt.Errorf("escrow capture %s: nothing reserved", id) + } + if r.State == StateVoided { + return Receipt{}, fmt.Errorf("escrow capture %s: already voided", id) + } + r.State = StateCaptured + g.states[id] = r + return r, nil +} + +// CaptureBatch marks the held budget captured with a dev-ledger receipt +// naming the recipient count — escrow theater, honestly labeled, like the +// rest of the ledger. +func (g *LedgerGateway) CaptureBatch(ctx context.Context, id string, recipients []BatchRecipient) (Receipt, error) { + r, err := g.Capture(ctx, id) + if err != nil { + return Receipt{}, err + } + r.TxHash = fmt.Sprintf("dev-ledger:%s:batch[%d]", id, len(recipients)) + g.mu.Lock() + g.states[id] = r + g.mu.Unlock() + return r, nil +} + +func (g *LedgerGateway) Void(_ context.Context, id string) (Receipt, error) { + g.mu.Lock() + defer g.mu.Unlock() + r, ok := g.states[id] + if !ok { + // Re-runnable refunds: voiding the unknown is a no-op success. + return Receipt{State: StateVoided, TxHash: "dev-ledger:" + id}, nil + } + if r.State == StateCaptured { + return Receipt{}, fmt.Errorf("escrow void %s: already captured", id) + } + r.State = StateVoided + g.states[id] = r + return r, nil +} diff --git a/internal/x402/escrow/gateway_test.go b/internal/x402/escrow/gateway_test.go new file mode 100644 index 00000000..7525a3fe --- /dev/null +++ b/internal/x402/escrow/gateway_test.go @@ -0,0 +1,105 @@ +package escrow + +import ( + "context" + "encoding/json" + "net/http" + "net/http/httptest" + "strings" + "testing" +) + +func TestLedgerGateway_Lifecycle(t *testing.T) { + g := NewLedgerGateway() + ctx := context.Background() + + r, err := g.Reserve(ctx, ReserveRequest{ID: "b1", Amount: "500.00"}) + if err != nil || r.State != StateReserved { + t.Fatalf("Reserve = %+v, %v", r, err) + } + if !strings.HasPrefix(r.TxHash, "dev-ledger:") { + t.Fatalf("ledger receipt %q must be labeled dev-ledger", r.TxHash) + } + + // Reserve is idempotent. + again, err := g.Reserve(ctx, ReserveRequest{ID: "b1"}) + if err != nil || again.State != StateReserved { + t.Fatalf("re-Reserve = %+v, %v", again, err) + } + + c, err := g.Capture(ctx, "b1") + if err != nil || c.State != StateCaptured { + t.Fatalf("Capture = %+v, %v", c, err) + } + + // Captured escrow cannot be voided (the reward was legitimately paid). + if _, err := g.Void(ctx, "b1"); err == nil { + t.Fatal("Void after Capture should error") + } +} + +func TestLedgerGateway_VoidPaths(t *testing.T) { + g := NewLedgerGateway() + ctx := context.Background() + + // Voiding the unknown is a no-op success (refunds must be re-runnable). + if r, err := g.Void(ctx, "ghost"); err != nil || r.State != StateVoided { + t.Fatalf("Void(unknown) = %+v, %v", r, err) + } + + if _, err := g.Reserve(ctx, ReserveRequest{ID: "b2"}); err != nil { + t.Fatal(err) + } + if r, err := g.Void(ctx, "b2"); err != nil || r.State != StateVoided { + t.Fatalf("Void(reserved) = %+v, %v", r, err) + } + // Capturing a voided escrow fails. + if _, err := g.Capture(ctx, "b2"); err == nil { + t.Fatal("Capture after Void should error") + } +} + +func TestHTTPGateway_RoutesAndAuth(t *testing.T) { + var gotPath, gotAuth string + var gotBody ReserveRequest + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + gotPath = r.URL.Path + gotAuth = r.Header.Get("Authorization") + if r.Body != nil { + _ = json.NewDecoder(r.Body).Decode(&gotBody) + } + _ = json.NewEncoder(w).Encode(Receipt{State: StateReserved, TxHash: "0xabc"}) + })) + defer server.Close() + + g := &HTTPGateway{Base: server.URL + "/", Token: "secret", Client: server.Client()} + r, err := g.Reserve(context.Background(), ReserveRequest{ID: "b3", Network: "base", Amount: "10"}) + if err != nil { + t.Fatalf("Reserve: %v", err) + } + if gotPath != "/escrow/reserve/b3" { + t.Errorf("path = %q, want /escrow/reserve/b3", gotPath) + } + if gotAuth != "Bearer secret" { + t.Errorf("auth = %q, want bearer token", gotAuth) + } + if gotBody.Network != "base" || gotBody.Amount != "10" { + t.Errorf("body = %+v", gotBody) + } + if r.State != StateReserved || r.TxHash != "0xabc" { + t.Errorf("receipt = %+v", r) + } +} + +func TestHTTPGateway_SurfacesFacilitatorErrors(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + http.Error(w, "no such escrow", http.StatusNotFound) + })) + defer server.Close() + + g := &HTTPGateway{Base: server.URL, Client: server.Client()} + if _, err := g.Capture(context.Background(), "missing"); err == nil || !strings.Contains(err.Error(), "404") { + t.Fatalf("Capture error = %v, want 404 surfaced", err) + } +} diff --git a/internal/x402mcp/bountyreport.go b/internal/x402mcp/bountyreport.go new file mode 100644 index 00000000..ccd4824a --- /dev/null +++ b/internal/x402mcp/bountyreport.go @@ -0,0 +1,205 @@ +package x402mcp + +// bounty_report — a FREE companion tool on the MCP server: serves the A2UI +// report deliverable of a settled ServiceBounty. Reports are gate:local in v1 +// (the fulfiller's runner persists them on disk under the agent hierarchy); +// the cross-party paid gate (gate: mcp-x402) is this same tool wrapped with +// the existing payment wrapper — no new machinery. +// +// Variant selection is a2ui catalog negotiation: the caller passes its +// supportedCatalogIds in preference order and the first task-package variant +// whose catalogId matches wins. kind=declarative returns the raw A2UI +// v1.0-candidate message-list JSON (native render, no iframes); kind=mcp-app +// wraps the self-contained HTML into a `custom` McpApp node with url_encoded +// content — the CLIENT supplies double-iframe isolation (sandbox proxy + +// srcdoc inner frame, never allow-same-origin); this server only returns JSON. + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "net/url" + "os" + "path/filepath" + "strings" + + mcpsdk "github.com/modelcontextprotocol/go-sdk/mcp" + + "github.com/ObolNetwork/obol-stack/internal/bounty" +) + +type bountyReportArgs struct { + Name string `json:"name"` + Namespace string `json:"namespace"` + TaskType string `json:"taskType"` + SupportedCatalogIDs []string `json:"supportedCatalogIds"` +} + +// bountyReportMeta is the optional task.json sidecar the runner writes next to +// the report files, removing task-type inference ambiguity. +type bountyReportMeta struct { + TypeRef string `json:"typeRef"` +} + +// AddBountyReportTool registers the free bounty_report tool. reportsDir layout: +// /// (+ optional task.json +// sidecar {"typeRef":"benchmark@v1"}). +func AddBountyReportTool(server *mcpsdk.Server, reportsDir string) { + server.AddTool(&mcpsdk.Tool{ + Name: "bounty_report", + Description: "Fetch a ServiceBounty's A2UI report. Pass supportedCatalogIds in preference " + + "order (a2ui catalog negotiation): a declarative match returns the A2UI v1.0 message list; " + + "obol.org:mcp-app/v1 returns a custom McpApp node (self-contained HTML, render in the " + + "double-iframe sandbox). Args: {name, namespace?, taskType?, supportedCatalogIds?}.", + InputSchema: map[string]any{ + "type": "object", + "required": []string{"name"}, + "properties": map[string]any{ + "name": map[string]any{"type": "string", "description": "Bounty name."}, + "namespace": map[string]any{"type": "string", "description": "Bounty namespace (default hermes-obol-agent)."}, + "taskType": map[string]any{"type": "string", "description": "Task type ref (e.g. benchmark@v1); inferred from the task.json sidecar or the report files when omitted."}, + "supportedCatalogIds": map[string]any{"type": "array", "items": map[string]any{"type": "string"}, "description": "Client-supported catalog ids in preference order."}, + }, + }, + }, func(_ context.Context, req *mcpsdk.CallToolRequest) (*mcpsdk.CallToolResult, error) { + var args bountyReportArgs + if len(req.Params.Arguments) > 0 { + if err := json.Unmarshal(req.Params.Arguments, &args); err != nil { + return errResult(fmt.Sprintf("bad arguments: %v", err)), nil + } + } + out, err := renderBountyReport(reportsDir, args) + if err != nil { + return errResult(err.Error()), nil + } + return textResult(out), nil + }) +} + +// renderBountyReport resolves the report directory, negotiates the variant, +// and renders it. Exposed for tests. +func renderBountyReport(reportsDir string, args bountyReportArgs) (string, error) { + if strings.TrimSpace(args.Name) == "" { + return "", errors.New("name is required") + } + if args.Namespace == "" { + args.Namespace = "hermes-obol-agent" + } + // The two path segments come from the caller — never let them escape the + // reports root. + for _, segment := range []string{args.Name, args.Namespace} { + if segment != filepath.Base(segment) || segment == ".." || segment == "." { + return "", fmt.Errorf("invalid path segment %q", segment) + } + } + + dir := filepath.Join(reportsDir, args.Namespace, args.Name) + if info, err := os.Stat(dir); err != nil || !info.IsDir() { + return "", fmt.Errorf("no report found for %s/%s", args.Namespace, args.Name) + } + + t, err := resolveReportTaskType(dir, args.TaskType) + if err != nil { + return "", err + } + + variant, raw, err := negotiateReportVariant(dir, t, args.SupportedCatalogIDs) + if err != nil { + return "", err + } + + if variant.Kind == "mcp-app" { + node := map[string]any{ + "type": "custom", + "name": "McpApp", + "properties": map[string]any{ + "title": fmt.Sprintf("%s — %s report", args.Name, t.Ref()), + // decodeURIComponent-safe percent encoding (QueryEscape's '+' + // for space would corrupt the HTML on decode). + "content": "url_encoded:" + strings.ReplaceAll(url.QueryEscape(string(raw)), "+", "%20"), + }, + } + encoded, err := json.Marshal(node) + if err != nil { + return "", err + } + return string(encoded), nil + } + return string(raw), nil +} + +// resolveReportTaskType picks the task type: explicit arg > task.json sidecar > +// first enabled type with a variant surface present in dir. +func resolveReportTaskType(dir, explicit string) (bounty.TaskType, error) { + if explicit != "" { + return bounty.Resolve(explicit) + } + + if raw, err := os.ReadFile(filepath.Join(dir, "task.json")); err == nil { + var meta bountyReportMeta + if err := json.Unmarshal(raw, &meta); err == nil && meta.TypeRef != "" { + return bounty.Resolve(meta.TypeRef) + } + } + + enabled, err := bounty.Enabled() + if err != nil { + return bounty.TaskType{}, err + } + for _, t := range enabled { + for _, v := range t.Deliverable.Report.Variants { + if _, err := os.Stat(filepath.Join(dir, filepath.Base(v.Surface))); err == nil { + return t, nil + } + } + } + return bounty.TaskType{}, fmt.Errorf("cannot infer task type for %s (write a task.json sidecar or pass taskType)", dir) +} + +// negotiateReportVariant applies a2ui catalog negotiation: walk the caller's +// supportedCatalogIds in preference order, return the first variant that +// matches AND whose surface file exists. No ids → first variant present. +func negotiateReportVariant(dir string, t bounty.TaskType, supported []string) (bounty.ReportVariant, []byte, error) { + variants := t.Deliverable.Report.Variants + if len(variants) == 0 { + return bounty.ReportVariant{}, nil, fmt.Errorf("task type %s declares no report variants", t.Ref()) + } + + read := func(v bounty.ReportVariant) []byte { + raw, err := os.ReadFile(filepath.Join(dir, filepath.Base(v.Surface))) + if err != nil { + return nil + } + return raw + } + + if len(supported) > 0 { + for _, id := range supported { + for _, v := range variants { + if v.CatalogID == id { + if raw := read(v); raw != nil { + return v, raw, nil + } + } + } + } + return bounty.ReportVariant{}, nil, fmt.Errorf( + "no variant of %s matches supportedCatalogIds %v (available: %s)", t.Ref(), supported, variantCatalogs(variants)) + } + + for _, v := range variants { + if raw := read(v); raw != nil { + return v, raw, nil + } + } + return bounty.ReportVariant{}, nil, fmt.Errorf("no report files present in %s", dir) +} + +func variantCatalogs(variants []bounty.ReportVariant) string { + ids := make([]string, 0, len(variants)) + for _, v := range variants { + ids = append(ids, v.CatalogID) + } + return strings.Join(ids, ", ") +} diff --git a/internal/x402mcp/bountyreport_test.go b/internal/x402mcp/bountyreport_test.go new file mode 100644 index 00000000..8e24d772 --- /dev/null +++ b/internal/x402mcp/bountyreport_test.go @@ -0,0 +1,146 @@ +package x402mcp + +import ( + "encoding/json" + "net/url" + "os" + "path/filepath" + "strings" + "testing" +) + +const ( + basicCatalogID = "https://a2ui.org/specification/v1_0/catalogs/basic/catalog.json" + mcpAppCatalogID = "obol.org:mcp-app/v1" +) + +func writeReportFixture(t *testing.T) string { + t.Helper() + root := t.TempDir() + dir := filepath.Join(root, "hermes-obol-agent", "smoke-bench") + if err := os.MkdirAll(dir, 0o755); err != nil { + t.Fatal(err) + } + files := map[string]string{ + "report.a2ui.json": `{"messages":[{"version":"v1.0"}]}`, + "report.app.html": "score & verdict", + "task.json": `{"typeRef":"benchmark@v1"}`, + } + for name, content := range files { + if err := os.WriteFile(filepath.Join(dir, name), []byte(content), 0o644); err != nil { + t.Fatal(err) + } + } + return root +} + +func TestBountyReport_DeclarativeByDefault(t *testing.T) { + root := writeReportFixture(t) + + out, err := renderBountyReport(root, bountyReportArgs{Name: "smoke-bench"}) + if err != nil { + t.Fatalf("render: %v", err) + } + if !strings.Contains(out, `"version":"v1.0"`) { + t.Errorf("default render should be the raw declarative A2UI JSON, got %q", out) + } +} + +func TestBountyReport_NegotiatesMcpApp(t *testing.T) { + root := writeReportFixture(t) + + out, err := renderBountyReport(root, bountyReportArgs{ + Name: "smoke-bench", + SupportedCatalogIDs: []string{mcpAppCatalogID, basicCatalogID}, + }) + if err != nil { + t.Fatalf("render: %v", err) + } + + var node struct { + Type string `json:"type"` + Name string `json:"name"` + Properties struct { + Content string `json:"content"` + Title string `json:"title"` + } `json:"properties"` + } + if err := json.Unmarshal([]byte(out), &node); err != nil { + t.Fatalf("mcp-app render is not a JSON node: %v", err) + } + if node.Type != "custom" || node.Name != "McpApp" { + t.Errorf("node = %s/%s, want custom/McpApp", node.Type, node.Name) + } + if !strings.HasPrefix(node.Properties.Content, "url_encoded:") { + t.Fatalf("content must be url_encoded:-prefixed, got %q", node.Properties.Content[:20]) + } + decoded, err := url.QueryUnescape(strings.TrimPrefix(node.Properties.Content, "url_encoded:")) + if err != nil { + t.Fatalf("content does not decode: %v", err) + } + if decoded != "score & verdict" { + t.Errorf("decoded content = %q (encoding must be decodeURIComponent-safe)", decoded) + } +} + +func TestBountyReport_PrefersClientOrder(t *testing.T) { + root := writeReportFixture(t) + + out, err := renderBountyReport(root, bountyReportArgs{ + Name: "smoke-bench", + SupportedCatalogIDs: []string{basicCatalogID, mcpAppCatalogID}, + }) + if err != nil { + t.Fatalf("render: %v", err) + } + if strings.Contains(out, "McpApp") { + t.Error("client preferred the basic catalog; declarative variant must win") + } +} + +func TestBountyReport_NoCatalogMatch(t *testing.T) { + root := writeReportFixture(t) + + _, err := renderBountyReport(root, bountyReportArgs{ + Name: "smoke-bench", + SupportedCatalogIDs: []string{"example.com:unknown/v9"}, + }) + if err == nil || !strings.Contains(err.Error(), "supportedCatalogIds") { + t.Errorf("no-match must error with the available catalogs, got %v", err) + } +} + +func TestBountyReport_InferenceWithoutSidecar(t *testing.T) { + root := writeReportFixture(t) + if err := os.Remove(filepath.Join(root, "hermes-obol-agent", "smoke-bench", "task.json")); err != nil { + t.Fatal(err) + } + + out, err := renderBountyReport(root, bountyReportArgs{Name: "smoke-bench"}) + if err != nil { + t.Fatalf("inference from report files should work: %v", err) + } + if !strings.Contains(out, `"version":"v1.0"`) { + t.Errorf("unexpected render: %q", out) + } +} + +func TestBountyReport_RejectsPathTraversal(t *testing.T) { + root := writeReportFixture(t) + + for _, name := range []string{"../smoke-bench", "..", "a/b"} { + if _, err := renderBountyReport(root, bountyReportArgs{Name: name}); err == nil { + t.Errorf("name %q must be rejected (path traversal)", name) + } + } + if _, err := renderBountyReport(root, bountyReportArgs{Name: "smoke-bench", Namespace: "../hermes-obol-agent"}); err == nil { + t.Error("namespace traversal must be rejected") + } +} + +func TestBountyReport_MissingBounty(t *testing.T) { + root := writeReportFixture(t) + if _, err := renderBountyReport(root, bountyReportArgs{Name: "nonexistent"}); err == nil { + t.Error("missing report dir must error") + } +} diff --git a/internal/x402mcp/server.go b/internal/x402mcp/server.go index 255254fc..03d71fcd 100644 --- a/internal/x402mcp/server.go +++ b/internal/x402mcp/server.go @@ -56,6 +56,11 @@ type Options struct { FacilitatorURL string // x402 facilitator (verify/settle); caller supplies a default Upstream string // backend HTTP service the paid tool POSTs the buyer's JSON args to (e.g. a weather/data API) UpstreamHeaders map[string]string // optional auth headers for the backend (e.g. "X-Api-Key": ""); set server-side, never exposed to buyers + + // BountyReportsDir, when set, registers the free bounty_report tool + // serving ServiceBounty A2UI reports from + // ///. + BountyReportsDir string } // Serve builds and runs the x402-paid MCP server in the foreground over @@ -121,6 +126,12 @@ func Serve(ctx context.Context, opts Options) error { return textResult("pong"), nil }) + // Free bounty-report tool (unwrapped — reports are gate:local in v1; the + // mcp-x402 gate wraps this same handler with the payment wrapper later). + if strings.TrimSpace(opts.BountyReportsDir) != "" { + AddBountyReportTool(server, opts.BountyReportsDir) + } + // Paid tool: forward the buyer's JSON arguments to the backend service and // return the response. The arg shape is the backend's own request body — // documented by the operator in opts.Description (e.g. a get_weather tool: diff --git a/justfile b/justfile index 802f66b4..95db1126 100644 --- a/justfile +++ b/justfile @@ -152,6 +152,8 @@ generate: purchaserequests) target="purchaserequest-crd.yaml" ;; registrationrequests) target="registrationrequest-crd.yaml" ;; serviceoffers) target="serviceoffer-crd.yaml" ;; + servicebounties) target="servicebounty-crd.yaml" ;; + evaluatorenrollments) target="evaluatorenrollment-crd.yaml" ;; *) target="${plural%s}-crd.yaml" ;; esac mv "$f" "$out/$target" diff --git a/plans/bounty-ane-marketplace-design.md b/plans/bounty-ane-marketplace-design.md new file mode 100644 index 00000000..a6d5ee78 --- /dev/null +++ b/plans/bounty-ane-marketplace-design.md @@ -0,0 +1,765 @@ +# Bounties: a demand-side marketplace for AI work on a distributed ANE fleet + +**Status:** Design / buildable brainstorm · **Owner:** Lead Architect · **Target:** obol-stack `obol.org/v1alpha1` + +> **Naming (locked):** the CRD Kind is **`ServiceBounty`** (plural `servicebounties`, short `sb`) so it sorts beside `ServiceOffer` in `kubectl get crds` and reads as its matched pair. The CLI verb stays **`obol bounty`** — Kind ≠ verb, exactly as `obol sell` creates a `ServiceOffer`. "Bounty" remains the human/CLI/domain concept (e.g. `BountyRunner`, `BountyEscrow.sol`); only the Kubernetes resource carries the `Service…` prefix. + +> ⚠️ **READ FIRST — must-fix corrections (from adversarial review, Appendix B).** The body below is the design exploration; these five corrections OVERRIDE it where they conflict: +> 1. **Payout does NOT reuse the buyer-sidecar.** `internal/x402/buyer/proxy.go` is a request-time `http.RoundTripper` that burns one voucher only when a *live* x402 upstream returns `<400` (`ConfirmSpend`, `signer.go:295`); money flows buyer→seller. A bounty needs escrow→fulfiller-on-verdict — the inverse. Honest v0 payout = a **coordinator agent** that, on `Verified`, submits one poster-pre-signed ERC-3009 voucher (`payTo`=fulfiller) to the facilitator `/settle` directly. That coordinator **is a trusted release authority** — say so; don't claim "trustless on shipped code." +> 2. **Agent RBAC is cluster-wide, not namespace-scoped.** `openclaw-monetize-write` is a ClusterRole+ClusterRoleBinding to both agent SAs. Put `servicebounties` in a **namespaced Role/RoleBinding**, or state the cluster-wide posture plainly. +> 3. **Remove `escrowRef.namespace` entirely** — force same-namespace by construction (a runtime string-compare guard is a future-refactor footgun given cluster-wide PurchaseRequest write). +> 4. **Cut the ANE/Ray worker substrate (§6) from v0.** Fulfillment is opaque: any process that emits a signed deliverable. ANE inference is real but niche (≤8B, 2–5× slower than the same Mac's GPU); ANE *training* is research PoC. See Appendix A. +> 5. **Hard invariant + test: a ServiceBounty NEVER creates an HTTPRoute/Middleware/Secret/Namespace.** The servicebounty-controller has zero route/secret creation capability; discovery rides only the existing `/skill.md` + `agent-registration.json`. Extend `internal/embed/embed_crd_test.go`. +> 6. **§5.3's stake-weighted verifier selection/slashing and the §9 v1/v2 stake/juror-committee roadmap are SUPERSEDED by §11 (evaluator market, 2026-06-10).** Post-scaffold design steer: no validator staking, no slashing — verification is an OBOL-paid evaluator market with a median-of-k quorum and a Shadow→Probation→Full reputation ladder, **on by default** with `--dangerously-skip-verification` as the explicit opt-out. Wherever the body says "stake-weighted", "slashing", "challenge bond", or "juror committee" for verification, read §11. + +--- + +## 1. Vision + +Someone on the timeline posted a public bounty — *"benchmark DeepSeek-V4-Flash on real hardware, $500"* — and a stranger ran it on their MacBook and got paid $500 USDT on Polygon. That transaction happened on Twitter and a block explorer, not on a marketplace, because no marketplace for it exists. obol-stack already ships the **seller half** of exactly this economy: a `ServiceOffer` declares "I will serve work for pay," the controller publishes an x402-gated route (`internal/serviceoffercontroller/controller.go:528-532`), and the buyer sidecar settles ERC-3009 vouchers after success (`internal/x402/buyer/`). What's missing is the **buyer-initiated half**: a way to *post demand* — "here is money, here is the work, here is how I'll know it's done." This document specifies the **ServiceBounty**: the structural inverse of a ServiceOffer that turns obol-stack into a two-sided marketplace, with a distributed Apple-silicon fleet (GPU-first, ANE where it honestly helps) as the execution substrate. + +--- + +## 2. Core insight: the ServiceBounty is the inverse of a ServiceOffer + +A ServiceOffer and a ServiceBounty are the two halves of one market, mirror-imaged on every axis: + +| Axis | `ServiceOffer` (supply) | `ServiceBounty` (demand) | +|---|---|---| +| Who initiates payment | Buyer, at request time | **Poster, up front, escrowed** | +| Money direction | buyer → `payment.payTo` (the seller) | escrow → fulfiller (`payTo` filled at claim) | +| Terminal state | a route that **stays up** serving traffic (`Ready`, controller.go:528-532) | a resource that **settles and closes** (`Paid`/`Refunded`) | +| Work latency | milliseconds | minutes → hours → days | +| Acceptance signal | HTTP `<400` from upstream | a **verifier judgement on a deliverable** | +| Counterparties | 1 buyer ↔ 1 seller | 1 poster ↔ **N fulfillers** (race / split / redundant) | +| Side-effect rail | Middleware + HTTPRoute (controller.go:660/:695) | escrow `PurchaseRequest` payout (types.go:565) | +| Time-box | `DrainAt` graceful teardown (types.go:142) | `deadline` → expiry → refund | +| Sibling CR | `RegistrationRequest` (on-chain side effect, controller.go:802) | `PurchaseRequest` (escrow side effect, types.go:536) | +| Trust vocab | `registration.supportedTrust[]` (types.go:320) | `acceptance.verifier` (same enum, repurposed) | + +**Crisp statement:** *A ServiceOffer is standing supply that converges to one live route and stays up. A ServiceBounty is time-boxed demand that converges to one paid deliverable and closes. They are the same state-machine skeleton run in opposite directions, sharing the same money rail (x402/ERC-3009), the same identity rail (ERC-8004), and the same controller plumbing — and together they are a complete marketplace.* + +This symmetry is the spine of the whole design. Everywhere a perspective proposed new machinery, I checked whether the *inverse* of existing ServiceOffer machinery already does the job. Usually it does. + +--- + +## 3. Data model — the `ServiceBounty` CRD + +**The call: a new top-level `ServiceBounty` CRD in `obol.org/v1alpha1`, plus a co-located `servicebounty-controller` in the existing `serviceoffer-controller` binary, reusing `PurchaseRequest` as the escrow/payout primitive.** + +### 3.1 Why a new CRD and not `type=bounty` or a generic `WorkRequest` + +I considered three designs and reject two: + +- **Rejected: `ServiceOffer.type=bounty`.** ServiceOffer's entire reconcile loop converges toward *keeping an HTTPRoute + Traefik Middleware live* (`reconcilePaymentGate` controller.go:660, `reconcileRoute` controller.go:695). A bounty has **no upstream Service to route to** and **inverts the meaning of `payment.payTo`** (seller-receives → escrow-pays-out). Overloading the enum forces every consumer — the verifier's `serviceoffer_source`, `/skill.md`, `obol sell list/status`, the `IsInference()/IsAgent()` helpers — to learn a type with no route, no upstream, and reversed money flow. Large blast radius for a leaky abstraction. +- **Rejected: a generic `WorkRequest`.** Too abstract to validate. The whole value of a CRD is that `kubectl`/the API server enforce a schema; a `map[string]any` task blob defers all validation to the controller and loses the per-type acceptance gates that make verification possible. +- **Chosen: a dedicated `ServiceBounty` CRD.** It mirrors the ServiceOffer shape exactly (group `obol.org`, version `v1alpha1`, status conditions, finalizers), so it inherits the codebase's conventions and RBAC posture, while keeping the demand-side lifecycle (`Open → Claimed → … → Paid`, with `Disputed/Expired/Refunded`) cleanly separate from the route-publication loop. This is the *same architectural precedent the codebase already chose* for `RegistrationRequest`: a sibling CR + a sibling reconcile pass in the same binary (controller.go:524, :802), isolating a side-effecting concern from the main loop. + +The unimplemented `fine-tuning` ServiceOffer enum value (types.go:105) is the **supply-side dual** ("I sell fine-tuning capacity") of a demand-side `fine-tune` bounty. I leave that enum untouched and make `ServiceBounty.spec.task.type` include `fine-tune`, so the two meet in the middle without entangling. + +### 3.2 Field table + +`ServiceBounty` is **namespaced** (mirrors ServiceOffer, inherits per-namespace RBAC). Register `ServiceBountyKind = "ServiceBounty"`, `ServiceBountyResource = "servicebounties"`, `ServiceBountyGVR` next to the existing GVRs (types.go:48-67) — **plural `servicebounties`, singular `servicebounty`, shortName `sb`**, so it sorts beside `serviceoffers`. + +| Field | Type | Reuses (file:line) | Notes | +|---|---|---|---| +| `spec.task.type` | enum `benchmark\|fine-tune\|serve\|http\|generic` | `ServiceOfferSpec.Type` (types.go:105) | `fine-tune` deliberately mirrors the unimplemented `fine-tuning` supply hook. | +| `spec.task.runner` | string | new | `BountyRunner` plugin id (§6), e.g. `mlx-lora`, `anemll-serve`. Opaque to the controller. | +| `spec.task.requires[]` | `[]string` | new | Capability tags the fulfiller node must advertise (e.g. `serve.ane`, `finetune.mlx`). Matched at claim. | +| `spec.task.targetModel` | `{name, runtime}` | `ServiceOfferModel` (types.go:166-174) | Reused verbatim. Runtime enum `ollama\|vllm\|tgi`. | +| `spec.task.datasetRef` | `{uri, hash, format}` | new | Content-addressed dataset pointer; hash makes verification deterministic. | +| `spec.task.harnessRef` | `{name, uri, version}` | new | Pinned eval harness / trainer image, content-addressed. | +| `spec.task.params` | `map[string]string` | `ServiceOfferSpec.Provenance` shape (types.go:129) | Free-form knobs (`epochs`, `lr`, `seqlen`, `tasks`). Keeps schema stable across task types. | +| `spec.acceptance.criteria[]` | `[]{metric, op, threshold, weight}` | new | Machine-checkable gates. The bounty's *raison d'être*. | +| `spec.acceptance.verifier` | enum `self-attested\|harness-rerun\|tee-attestation\|consensus\|poster-manual` | `supportedTrust[]` vocab (types.go:320) | How a submission is checked (§5). | +| `spec.acceptance.deliverableSchema` | `{artifacts[]{name,kind,required}, resultHashRequired}` | new | Declares a valid submission's contents. | +| `spec.reward` | `ServiceOfferPayment` | `ServiceOfferPayment` (types.go:211-247) | **Reused whole.** `method=crypto\|card` (#608), `network`, `asset`, `card{...}`, `price`. `payTo` here = escrow-return address. | +| `spec.reward.price.perRequest` | string | `PriceTable.PerRequest` (types.go:299) | The flat lump-sum reward (the "$500"). | +| `spec.reward.price.perEpoch` | string | `PriceTable.PerEpoch` (types.go:305) | Milestone/staged payout for fine-tunes. | +| `spec.escrowRef` | `{name, namespace}` → `PurchaseRequest` | `PurchaseRequest` (types.go:536) + `AgentRef` shape (types.go:159-164) | Poster's pre-signed reward auths (`PreSignedAuths[]`, types.go:565). **Confused-deputy guard: namespace MUST equal bounty namespace** (copy agent_resolver.go:46). | +| `spec.deadline` | `*metav1.Time` | `DrainAt` pattern (types.go:142) | Past deadline + no `Verified` → `Expired` → `Refunded`. Reuse requeue-at-expiry logic. | +| `spec.claimGracePeriod` | `*metav1.Duration` | `DrainGracePeriod` (types.go:148) | How long a `Claimed` fulfiller has before the claim lapses and the bounty re-opens. | +| `spec.maxFulfillers` | `int64` (default 1) | new | `1` = single-winner; `>1` = first-N-valid paid (split/redundant). | +| `spec.firstValidWins` | `bool` (default true) | new | First submission passing `acceptance` is auto-paid; controller stops accepting claims. | +| `spec.bond` | `{required, amount, token}` | new | Fulfiller anti-griefing stake (§4, §5). | +| `spec.registration` | `ServiceOfferRegistration` | `ServiceOfferRegistration` (types.go:308-333) | Optional ERC-8004 publication of the bounty as **discoverable demand**. | +| `spec.provenance` | `map[string]string` | `ServiceOfferSpec.Provenance` (types.go:129) | Why this bounty exists. | + +**Status** reuses the shared `Condition` type and the `isConditionTrue` AND-rollup idiom (controller.go:528-532): + +```go +type ServiceBountyStatus struct { + ObservedGeneration int64 `json:"observedGeneration,omitempty"` + Phase string `json:"phase,omitempty"` // human rollup, like AgentStatus.Phase (types.go:718) + Conditions []Condition `json:"conditions,omitempty"` // shared type + Claims []ServiceBountyClaim `json:"claims,omitempty"` // observed fulfiller bindings + EscrowFunded bool `json:"escrowFunded,omitempty"` + EscrowRemaining string `json:"escrowRemaining,omitempty"`// mirrors PurchaseRequest.Status.Remaining (types.go:638) + WinningClaim string `json:"winningClaim,omitempty"` + PayoutTxHash string `json:"payoutTxHash,omitempty"` // like RegistrationTxHash (types.go:355) + RefundTxHash string `json:"refundTxHash,omitempty"` +} +``` + +Claims are *observed* facts → they live in `status.claims[]`, not spec (a separate `Claim` CRD over-engineers the common single-winner case). Each `ServiceBountyClaim` binds `{fulfillerAddress, fulfillerAgentRef, claimedAt, submission{artifacts,resultHash,metrics,submittedAt}, phase, payoutRef}`. + +**Lifecycle** (machine truth = condition set; `phase` is the human rollup): + +``` + ┌─────────────► Expired ──► Refunded + │ (deadline, no Verified) +Open ──► Claimed ──► InProgress ──► Submitted ──► Verified ──► Paid + ▲ │ │ │ + └─────────┘ (claimGracePeriod lapses) └─► Rejected └─► Disputed ──► (Verified | Refunded) +``` + +Condition set, each mirroring an inverse ServiceOffer condition: `EscrowFunded` (inverse of `PaymentGateReady`), `Open`, `Claimed`, `Submitted`, `Verified` (the core gate), `Paid` (inverse of `Registered`). The `done` rollup: + +```go +done := isConditionTrue(status,"Verified") && isConditionTrue(status,"Paid") // mirrors controller.go:528-532 +``` + +### 3.3 Three example YAMLs + +**(a) Benchmark — the motivating $500 case** + +```yaml +apiVersion: obol.org/v1alpha1 +kind: ServiceBounty +metadata: { name: bench-deepseek-v4-flash, namespace: hermes-obol-agent } +spec: + task: + type: benchmark + runner: bench + requires: ["benchmark"] + targetModel: { name: "deepseek-v4-flash", runtime: vllm } # ServiceOfferModel, types.go:166 + harnessRef: { name: lm-eval-harness, uri: "ghcr.io/eleutherai/lm-eval-harness", version: v0.4.3 } + params: { tasks: "mmlu,gsm8k,humaneval", hardwareClass: "M4-Max-40c-128g", seed: "1234", dtype: fp16 } + acceptance: + criteria: + - { metric: mmlu, op: ">=", threshold: "0.0", weight: 1 } # report-only; eval SCORE is the verifiable gold case + verifier: consensus # N-of-M re-run on committed dataset (§5) + deliverableSchema: + resultHashRequired: true + artifacts: + - { name: results.json, kind: eval-report, required: true } + - { name: run.manifest, kind: provenance, required: true } # signed run-manifest (§5.0) + reward: # ServiceOfferPayment, types.go:211 + method: crypto + network: base + payTo: "0xPOSTER...aaaa" # escrow-return addr + asset: { symbol: USDT, decimals: 6, transferMethod: eip3009 } + price: { perRequest: "500.00" } # the $500 lump sum + escrowRef: { name: bench-deepseek-escrow, namespace: hermes-obol-agent } # PurchaseRequest, types.go:536 + deadline: "2026-07-01T00:00:00Z" # DrainAt pattern, types.go:142 + claimGracePeriod: "72h" + maxFulfillers: 1 + firstValidWins: true + bond: { required: true, amount: "750.00", token: USDT } # 1.5x → lying is -EV + registration: + enabled: true + name: "Benchmark DeepSeek-V4-Flash" + skills: ["evaluation/benchmarking"] + supportedTrust: ["reputation"] +``` + +**(b) Fine-tune — staged, pay-per-epoch** + +```yaml +apiVersion: obol.org/v1alpha1 +kind: ServiceBounty +metadata: { name: ft-qwen-coder, namespace: hermes-obol-agent } +spec: + task: + type: fine-tune # mirrors the unimplemented supply hook, types.go:105 + runner: mlx-lora # MLX GPU trainer (NOT ane-train; see §6) + requires: ["finetune.mlx"] + targetModel: { name: "qwen3.5:9b", runtime: vllm } + datasetRef: { uri: "ipfs://bafy.../sql-pairs-v2.jsonl", hash: "sha256:9f2c...", format: jsonl } + harnessRef: { name: mlx-lm.lora, uri: "ghcr.io/obol/mlx-tune", version: 0.6.0 } + params: { epochs: "3", lr: "1e-4", loraRank: "32", seqlen: "4096" } + acceptance: + criteria: + - { metric: sql_exec_acc, op: ">=", threshold: "0.78", weight: 3 } # held-out execution accuracy + - { metric: eval_loss, op: "<=", threshold: "0.85", weight: 2 } + verifier: harness-rerun # held-out re-eval on committed checkpoint hash (§5) + deliverableSchema: + resultHashRequired: true + artifacts: + - { name: adapter.safetensors, kind: weights, required: true } + - { name: eval.json, kind: eval-report, required: true } + reward: + method: crypto + network: base-sepolia + payTo: "0xPOSTER...bbbb" + asset: { symbol: USDC, decimals: 6, transferMethod: eip3009 } + price: { perEpoch: "40.00", perRequest: "120.00" } # PerEpoch staged (types.go:305) + 120 on final pass + escrowRef: { name: ft-qwen-escrow, namespace: hermes-obol-agent } + deadline: "2026-06-20T00:00:00Z" + claimGracePeriod: "168h" + maxFulfillers: 1 + firstValidWins: false # poster reviews before final release + bond: { required: true, amount: "200.00", token: USDC } +``` + +**(c) Serve — keep ComfyUI up, pay with a credit card (MPP #608)** + +```yaml +apiVersion: obol.org/v1alpha1 +kind: ServiceBounty +metadata: { name: host-comfyui-sdxl, namespace: hermes-obol-agent } +spec: + task: + type: serve + runner: comfyui + requires: ["render"] + targetModel: { name: "sdxl-comfyui", runtime: tgi } + harnessRef: { name: comfyui, uri: "ghcr.io/comfyanonymous/comfyui", version: v0.3 } + params: { workflow: "txt2img-sdxl", endpoint_kind: openai-compat, uptime_window: 30d } + acceptance: + criteria: + - { metric: uptime_pct, op: ">=", threshold: "99.5", weight: 3 } # Prometheus SLA, automatic + - { metric: p95_latency_ms, op: "<=", threshold: "4000", weight: 2 } + verifier: tee-attestation # enclave-bound device identity (§5) + deliverableSchema: + resultHashRequired: false + artifacts: + - { name: served-endpoint, kind: http-endpoint, required: true } # a live URL, monitored + - { name: attestation.json, kind: tee-quote, required: true } + reward: + method: card # MPP credit-card, #608, types.go:216 + card: { provider: stripe, account: "acct_1ObolHostExample", currency: usd } + price: { perHour: "0.50", perRequest: "300.00" } # PerHour for serving window (types.go:303) + escrowRef: { name: host-comfyui-escrow, namespace: hermes-obol-agent } # Stripe manual-capture hold (§4) + deadline: "2026-08-01T00:00:00Z" + claimGracePeriod: "24h" + maxFulfillers: 3 # up to 3 redundant hosts paid + firstValidWins: true +``` + +--- + +## 4. Escrow & payment + +**The invariant that survives every phase: the controller never holds keys.** This is already enforced — the controller's only secret access is `secretRef` plumbing, and the agent-resolver confused-deputy guard (agent_resolver.go:46) exists precisely to stop it brokering credentials it shouldn't. All signing lives in agent wallets / remote-signer `:9000` (`internal/openclaw/wallet.go`) / Secure Enclave (`internal/enclave/enclave_darwin.go`). The controller is declarative: it watches `ServiceBounty`/`PurchaseRequest`, drives the state machine, and **observes** tx hashes it never produces. We keep this absolute. + +The hard problem is the **temporal gap**. The shipped x402 path is a request-time micropayment: work completes in milliseconds, so the voucher *is* the conditional release and no custody is needed. A bounty inverts this — funds must commit *up front* and release *hours or days later* on a deliverable. That gap is what forces an escrow design. + +### 4.1 The call: MVP = conditional-voucher escrow; end-state = on-chain `BountyEscrow` contract — and settlement is pluggable + +I evaluated three options. Resolving the disagreement between Perspectives A and B (A reuses `PurchaseRequest` as-is; B notes a bare ERC-3009 voucher has *no native condition*): + +- **Option 1 — On-chain `BountyEscrow.sol`.** Poster `lock()`s USDC/OBOL into a contract; release on a verifier EIP-712 signature; refund on timeout; native milestones + bond/slash. **Real custody, trust-minimized — but needs an audited contract per chain.** Cannot directly hold card funds. +- **Option 2 — Pre-signed conditional ERC-3009 voucher held by a coordinator agent.** Reuses `PurchaseRequest.PreSignedAuths[]` (types.go:565) verbatim. **Ships this week on existing code, zero new contracts.** *Honest limit B surfaced and A glossed:* an ERC-3009 voucher is a bearer instrument valid for its whole `validBefore` window — it has no on-chain condition. So the coordinator agent is a *de facto* custodian of the release *decision* (never of the funds-bearing key), and refund = the poster calling `cancelAuthorization(nonce)`. The poster's balance is not actually reserved. **This is escrow theater for trust-minimization — acceptable for low-value / reputation-gated pairs, fenced by a bond + value cap.** +- **Option 3 — x402-as-settlement (deliverable-as-a-sale).** The fulfiller "sells" the verified deliverable as a ServiceOffer; the poster "buys" it through the buyer sidecar. **Zero new payment code — but it provides no lock leg at all** (the fulfiller works on a promise). It is a *settlement rail*, not an escrow. + +**Decision:** +- **MVP = Option 2** for the lock, **gated to low-value / reputation-vetted fulfillers**, so we ship on shipped code. +- **End-state = Option 1** for custody + native milestones + slashing, with **Option 3 as the release rail** (the payout txn can be modeled as the poster buying the deliverable), and **ERC-8004 reputation progressively replacing escrow** as trust accrues. + +Critically, **the CRD surface is identical across phases** — only the *settlement adapter* swaps (`voucherAdapter` → `escrowContractAdapter` → `cardAuthAdapter`). This mirrors exactly how MPP #608 made payment *methods* pluggable (`Method: crypto|card`, types.go:216). ServiceBounty settlement becomes a fourth pluggable rail: `voucher | escrow | sale | cardAuth`. **One switch, four rails, one invariant: the controller never signs.** + +### 4.2 Who signs what (no-signer invariant, made explicit) + +| Action | Signer | Where | +|---|---|---| +| `lock` / voucher pre-sign | **Poster's agent wallet** | remote-signer `:9000`, poster ns (wallet.go) | +| `release` verifier signature | **Verifier agent / oracle** | its own wallet, or **Secure Enclave** for attestable trust (enclave_darwin.go) | +| voucher submission / `release()` call | **Coordinator agent** (MVP) or any submitter holding the verifier sig (contract verifies EIP-712) | agent ns; submitter is untrusted in the contract case | +| Fulfiller bond | **Fulfiller's agent wallet** | fulfiller ns remote-signer | + +### 4.3 Milestone / per-epoch release + +`PriceTable.PerEpoch` already exists (types.go:305, marked "Fine-tuning only") — bounties finally exercise it. **One milestone = one epoch = one release tranche.** + +- **MVP:** poster pre-signs **one voucher per epoch** into the escrow `PurchaseRequest` (this is the *exact* N-auth fan-out the buyer sidecar already does, `PreSignedAuths[]`). Verifier signs off on epoch *k*'s checkpoint → coordinator submits voucher *k*. Refund = cancel the unspent epochs' nonces. +- **End-state:** `release(id, fraction)` callable per milestone; contract tracks `releasedFraction`. + +For a 5-epoch fine-tune at `perEpoch: 40`, a fulfiller who completes 3/5 and then fails keeps 60% — incentive-aligned, and **poster loss is bounded to one unverified epoch**. This is the same bounded-loss discipline the buyer sidecar already enforces (`max loss = N × price`, `internal/x402/buyer/`). + +### 4.4 Fee, bond, payout + +- **Platform fee** — `feeBps` + `feeRecipient`. Contract: deducted atomically in `release()`. Voucher-MVP: a *second* pre-signed voucher poster→`feeRecipient` submitted alongside. Card: Stripe `application_fee_amount`. +- **Fulfiller bond** (anti-griefing) — `spec.bond`, staked before `Claimed`, returned on accepted proof or honest timeout, **slashed** to `feeRecipient`/poster on bad-faith submission. Bond ≥ verifier's marginal verification cost so spamming is never profitable. Sized so `bond × P(detected) > reward` → lying is always −EV. +- **Payout token** — reuses `ServiceOfferPayment.Method`/`Asset`: `eip3009` (USDC), `permit2` (OBOL), or `Method: card`. **Card rewards cannot fund an on-chain escrow**, so for `reward.method: card` the lock is a **Stripe manual-capture `PaymentIntent`** (`capture_method: manual`): authorize up front (lock), capture on accepted proof (release), cancel on timeout (refund) — the off-chain mirror of `cancelAuthorization`, slotting into the same rail switch as `cardAuth`. + +### 4.5 Reconcile loop extension + +Add a sibling `reconcileServiceBounty` pass to `cmd/serviceoffer-controller/main.go`, cloned structurally from `reconcileOffer` (controller.go:386-559) — *not* an extension of it, following the `RegistrationRequest` precedent (one binary, now three controllers): + +1. **Finalizer + decode** — identical to controller.go:400-428; on delete, refund escrow (tombstone cleanup). +2. **`reconcileEscrow`** (replaces `reconcileUpstream`) — resolve `escrowRef`, apply the confused-deputy guard `escrowRef.Namespace == bounty.Namespace` copied from agent_resolver.go:46. Set `EscrowFunded` when `status.remaining > 0`. +3. **`reconcileClaims`** (replaces gate/route) — **no Middleware, no HTTPRoute**; admit claims up to `maxFulfillers`, lapse stale claims past `claimGracePeriod` using the `DrainEndsAt`/`DrainExpired` time math (types.go:498-519) + requeue-at-expiry. +4. **`reconcileVerification`** (new, the core) — run `acceptance.verifier` per submitted claim (§5). +5. **`reconcilePayout`** (replaces `reconcileRegistrationStatus`) — on `Verified`, trigger the existing buyer-sidecar settlement path against the escrow `PurchaseRequest`; for `card`, route through `internal/x402/card.go`. Set `Paid` + `PayoutTxHash`. +6. **Rollup** — `done := isConditionTrue("Verified") && isConditionTrue("Paid")`; on deadline-past-no-Verified → `Expired` → `reconcileRefund` → `Refunded`. + +--- + +## 5. Verification — from trusted coordinator to trust-minimized consensus + +**The honest premise up front: most ML deliverables are not cryptographically verifiable.** You cannot prove a tok/s number with a SNARK or that a model "is good" with a hash. What you *can* do is make cheating **expensive, attributable, and slashable** — reducing the trusted surface from "trust a person" to "trust a hash + a quorum + a bond." + +### 5.0 Shared primitives (all bounty types) + +- **Commit–reveal.** Fulfiller posts `H = hash(deliverable ‖ manifest ‖ salt)` *before* escrow logic runs, then reveals. Defeats "report a good number, ship a different model." Costs one 32-byte commitment. +- **Signed run-manifest.** Every deliverable ships `{datasetCommit, modelHash, harnessCommit, seed, params, hardwareClass, result, resultHash, fulfillerSig, enclaveSig?}`, signed by the fulfiller's ERC-8004 agent wallet. A bare "47 tok/s" is unfalsifiable; the manifest makes it **re-runnable**, which is the whole game. +- **Optimistic-by-default with a bonded challenge window**; pessimistic N-of-M consensus only for a low-reputation agent's first job or above a value threshold. **Reputation (ERC-8004) is the throttle** — it sets the verification tax: high-rep fulfillers clear with a short window and no upfront re-run; new agents are fully re-verified. This is a *policy dial*, not a protocol fork. + +### 5.1 Per-type verification + +| Type | What's verifiable | Mechanism | Honest limit | +|---|---|---|---| +| **Benchmark — eval *score*** | ✅ Strongly | Deterministic re-run on a *committed* held-out dataset (root committed at creation, rows revealed post-commit so they can't be trained on) + pinned harness + greedy/seed decode → **agreement within ε on the rounded score** (not bit-exact logits), N-of-M consensus on the rounded `resultHash`. **The flagship MVP case.** | Floating-point nondeterminism across GPUs → consensus on rounded scalar, never raw logits. | +| **Benchmark — tok/s** | ⚠️ Hardware-relative | Bind every claim to a `hardwareClass`; **reference-task calibration** (`normalized = claimed / referenceTokPerSec`) neutralizes silicon-lottery; verifier re-runs on *same-class* hardware and checks `verified ≥ claimed × (1−tol)`. Verifiable as **lower-bound + comparative ranking**, never a portable absolute. | Needs same-class verifiers in the pool. Frame as "verified ≥ claimed on declared class." | +| **Benchmark — tok/s/W** | ❌ Trust-only | — | No remote wattmeter attestation. Reputation + spot-audit only. Directly bites ANE "47–62 tok/s @ 2W" claims: throughput checkable, watts not. | +| **Fine-tune — checkpoint** | ✅ Strongly | Commit `modelHash` (no bait-and-switch), then **held-out eval re-run** on the committed checkpoint against `criteria` thresholds. Verification is **inference-only and orders of magnitude cheaper than the fine-tune itself** → optimistic verification is viable (an honest challenger can always afford to call a bluff). | **Never re-train** (non-deterministic, prohibitive). Data-contamination ("trained on test") is reputation/audit, not crypto — mitigate with a rotating never-revealed gold subset + reputation decay. | +| **Serving — SLA** | ✅ Automatic | **Reuse the deployed PodMonitor → Prometheus** (`internal/embed/infrastructure/base/templates/x402.yaml`, `llm.yaml`): liveness probes, quality canaries (known-good prompts vs committed reference), p50/p95/error-rate vs SLA. **Real paid x402 traffic doubles as liveness+quality proof** — a successful paid request *is* a datapoint. The most trust-minimizable type: machines decide payout, not people. Epoch payout via `price.perEpoch`/`perHour`; `drainAt`/`drainGracePeriod` give graceful teardown. | Graded/open-ended output quality isn't machine-verifiable → buyer dispute + reputation. Latency claims are probe-vantage-dependent → pin probe locations. | + +### 5.2 TEE / Secure Enclave — what it does and does NOT buy you + +The stack has **real** Secure Enclave signing (`enclave_darwin.go:330`, `Key.Sign(digest)` over P-256, hardware-bound, non-exportable, SIP-checked). Resolving a temptation across perspectives: **this is device/identity attestation, not computation attestation.** + +- ✅ It proves: "this result was signed by a key that physically lives in *a* Secure Enclave and never left the chip." Strong **sybil-resistance + device-binding** (one enclave key = one device). +- ❌ It does NOT prove the *computation* (the inference, the tok/s) ran in a TEE. The ANE/GPU compute runs *outside* any TEE; there is **no macOS TEE that attests an LLM forward pass.** + +**Correct use:** the enclave signature is a **reputation multiplier and challenge-window reducer** (more expensive to fake at scale because you need real distinct devices), *not* an oracle. **Don't claim TEE-verified inference.** + +### 5.3 Collusion & the oracle problem + +> **Superseded by §11** where this subsection leans on stake-weighting or slashing. The layered-defense framing survives; the *levers* changed (reputation ladder + random assignment + commit-reveal + escalation, not stake). + +Even with re-run + consensus: who watches the watchers? Layered defenses, none sufficient alone: + +1. **VRF-sampled, stake- and reputation-weighted verifier selection** *after* the result is committed — the fulfiller can't pre-select friendly verifiers. +2. **ERC-8004 reputation + stake** (`OnChainReg.AgentID`, ERC-721) — verifiers overturned by challenge/audit are slashed and lose reputation; sybils with no history carry near-zero weight. +3. **Enclave-bound verifier identity** — sybil farms now cost real hardware per identity. +4. **Disagreement → escalation, not blind majority** — escalate to a larger fresh pessimistic panel; collusion must win *every* escalation while the cost of being caught (full bond) dominates. +5. **Poster-as-oracle (MVP) → stake-weighted juror committee (v2)** for non-deterministic deliverables. + +**Honest floor (said out loud to users):** for deterministic deliverables (eval scores, checkpoint held-out re-run, SLA metrics) the oracle *is the re-run* — trust-minimizable. For non-deterministic deliverables (subjective quality, "is this a good fine-tune") **there is no cryptographic oracle** — you are buying a stake-weighted, slashable human/committee judgment. Power/watt, absolute cross-hardware tok/s, and "didn't train on the test set" rest permanently on reputation + stake + audit. + +--- + +## 6. ANE execution substrate + +**The honest framing, baked into the design (not an afterthought):** the verified ANE landscape says ANE = Core ML/ANEMLL **inference** for ≤8B models at ≤4K context, ~2–5× *slower* than the same Mac's GPU but ~10× more power-efficient. **No mainstream runtime (MLX, llama.cpp, vLLM, Ollama) dispatches LLM matmul to the ANE — they all run on the Metal GPU.** ANE *training* is reverse-engineering research only (maderix/ANE, Orion — PoC at 5–9% of peak, "does NOT replace GPU training"). Nobody clusters ANEs; real Mac fleets (exo) shard across the GPU/MLX. Ray multi-node on macOS is officially unsupported (Linux-only). + +So the fabric is, honestly named: **"distributed Mac *GPU* inference with optional per-node ANE for low-power small-model inference."** It advertises three capability classes and dispatches each to the substrate the research says actually works: + +| ServiceBounty class | Real substrate (today) | ANE role | Pluggable future | +|---|---|---|---| +| **serve** | MLX-GPU (`vllm-metal`/`llama.cpp`) for throughput; **ANEMLL→ANE** for ≤8B/≤4K low-power | ANE *is real here* for battery-bound nodes | — | +| **fine-tune** | **MLX GPU** (`mlx-lm.lora`/`mlx-tune`) | ANE only to *eval* checkpoints | `ane-train` (Orion) behind `OBOL_EXPERIMENTAL_ANE_TRAIN=1`, default OFF | +| **benchmark** | whatever engine the bounty names | ANE as a *measured target* (report ANE tok/s honestly: ~19 TFLOPS FP16, never "38 TOPS INT8" or "16×") | — | + +The fabric **never claims ANE training.** A fine-tune bounty demanding `task.requires: ["finetune.ane"]` is **rejected at claim time** unless the node opted into the experimental gate. + +### 6.1 Where Ray runs — host-side, NOT in-cluster (the load-bearing decision) + +**The ANE and Metal GPU are only reachable from host processes.** k3d nodes are Linux containers with no ANE, no Metal, no Core ML. Putting Ray workers in-cluster strands them on Linux with neither accelerator. obol-stack *already* solves this exact seam: the standalone inference gateway (`internal/inference/gateway.go`) and the Secure Enclave signer (`enclave_darwin.go`) **run on the Mac host**, and the cluster reaches them via `host.k3d.internal`. We reuse it. + +Because **Ray multi-node on macOS is unsupported**, the **Ray head runs on Linux** (a small k3d pod) while **Mac nodes run host-side Ray worker processes** that join it — the facts' recommended "Ray-head-on-Linux + Mac workers" pattern. Single-node degenerate case needs no cluster at all: `ray.init()` local mode. + +``` +┌──────────────── Mac host ─────────────────┐ ┌──── k3d (Linux) ─────┐ +│ obol runner (Agent runtime=worker) │ │ Ray HEAD pod │ +│ ├─ Ray WORKER ───────────────────────────┼────▶│ (GCS, scheduler) │ +│ │ ├─ Ray Serve → MLX-GPU / ANEMLL-ANE │ │ serviceoffer- + │ +│ │ ├─ Ray Train → MLX trainer │ │ servicebounty-controller, │ +│ │ └─ benchmark task → harness │ │ x402, LiteLLM, │ +│ ├─ Secure Enclave signer │◀────┼─ Traefik (reach host │ +│ └─ obol sell inference (host gateway) │ │ via host.k3d.internal)│ +└────────────────────────────────────────────┘ └──────────────────────┘ +``` + +Control plane (bounty board, x402 verify/settle, ERC-8004) stays **in-cluster**. Ray + accelerators stay **on the host**. The runner is the bridge. + +### 6.2 Node identity — reuse the `Agent` CR (one schema change) + +A Mac joins by creating **one `Agent` CR** (its identity + payout wallet). The Agent CR already gives a namespaced identity, an optional remote-signer wallet (`AgentWallet.Create` → `GenerateWallet()` in wallet.go), and a status block with `WalletAddress`/`Endpoint`/`Phase` (types.go:715-727). The **only schema change to an existing CRD** is extending the runtime enum: + +```go +// AgentSpec.Runtime — types.go:686-690 +// +kubebuilder:validation:Enum=hermes;worker // ← add "worker" +Runtime string `json:"runtime,omitempty"` +``` + +`EffectiveRuntime()` (types.go:731) already defaults to hermes, so this is additive. A `runtime: worker` Agent is **not a Hermes pod** — it's a host-side runner process whose `Status.Endpoint` points at its Ray Serve / control port and whose wallet is the **payout address**. + +**Capability is measured, not declared** (every Mac *claims* an ANE). A one-time onboarding probe writes a `WorkerProfile` into the existing ERC-8004 `Metadata`/`Provenance` maps (types.go:249-274; published via the `RegistrationRequest` path, controller.go:802) — measured per-engine tok/s, chip, RAM, cached model inventory, context ceiling, and a `capabilities[]` list (`serve.ane`, `serve.gpu`, `finetune.mlx`, `benchmark`, `render`). **`finetune.ane` is deliberately absent** unless the experimental gate is on. A node that lies (claims `serve.ane`, has no ANE) fails the deterministic benchmark gate (§5) and loses reputation. No new CRD field — capability rides the free-form metadata maps. + +### 6.3 The `BountyRunner` plugin interface + +New task types must drop in **without touching the controller or the `ServiceBounty` CRD**. The controller only ever sees an opaque `spec.task.runner` + `spec.task` blob, a verifiable `Proof`, and a settlement trigger. All task semantics live host-side behind a `BountyRunner` interface keyed by `spec.task.runner`: + +```go +// internal/worker/runner.go (host-side; controller never imports this) +type BountyRunner interface { + ID() string // matches ServiceBounty.spec.task.runner + Capabilities() []Capability // must intersect spec.task.requires + Validate(spec ServiceBountySpec, node WorkerProfile) error // ANE limits enforced HERE: + // serve.ane rejects params>8B or ctx>4K; finetune rejects ane-train unless gated + Resolve(ctx, spec) (ResolvedInputs, error) // pull content-addressed model/dataset + Run(ctx, in, progress chan<- ProgressEvent) (outputs map[string]string, error) // streams 1→n + Prove(in, outputs, sign Signer) (Proof, error) // controller verifies generically +} + +register(MLXServeRunner{}) // serve.gpu +register(ANEMLLServeRunner{}) // serve.ane — ≤8B / ≤4K only +register(MLXLoRARunner{}) // finetune.mlx — GPU +register(BenchmarkRunner{}) // benchmark — doubles as the anti-lying gate (§5) +register(ComfyRenderRunner{}) // render — wraps ComfyUI, exposed via `obol sell http` +if experimentalANETrain { register(OrionANETrainRunner{}) } // finetune.ane — gated, OFF by default +``` + +`Run` streams per-step `{step, loss, tok_s, etaSec}` over **the SSE flush seam that already exists** (`x402-verifier.HandleProxy` flushes per-write; `statusRecorder.Flush` must forward to the underlying `http.Flusher`, `internal/x402/verifier.go`, regression `TestVerifier_HandleProxy_StreamsSSEChunks`). This is not cosmetic: a 500-step job streams keepalive progress so it survives the Cloudflare quick-tunnel ~100s idle ceiling — the exact reason CLAUDE.md prefers `stream: true`. + +Adding RL, eval, or embeddings = write one `BountyRunner`, register it, advertise its `Capabilities()`. CRD, controller, x402 settlement, ERC-8004 — untouched. This is the same polymorphism the controller already uses for `ServiceOffer.Type` (the `agent` resolver synthesizes upstream without the rest of the pipeline branching, agent_resolver.go:33). **The ANE-training gate is the whole modularity payoff:** if Orion ever graduates, flip the env flag, `finetune.ane` appears, `ane-train` bounties start matching — with no controller or CRD change. Until then it's vapor and the fabric correctly refuses to schedule it. + +### 6.4 One Mac, end-to-end (no Ray cluster needed) + +1. `obol stack up` (k3d + controllers + x402 + LiteLLM, as today). +2. `obol agent new worker-x --runtime worker --create-wallet` → one Agent CR + wallet. +3. Runner probes the box, publishes `WorkerProfile` via a `RegistrationRequest`. +4. `ray.init()` local mode — no head pod. +5. A `ServiceBounty` is claimed, executed against **MLX-GPU (finetune/serve) or ANEMLL-ANE (small-model serve)**, proven with the Enclave key, and either pinned (IPFS) or handed off as a `ServiceOffer` (§7). **A real ANE-served bounty is demoable on one MacBook today.** + +### 6.5 What changes at N nodes + +| Concern | 1 node | N nodes | +|---|---|---| +| Ray topology | `ray.init()` local | **Head on Linux**; Mac runners are host-side workers (forced: macOS multi-node unsupported) | +| Scheduling | trivial | Ray places by **custom resources = `capabilities[]`**: small-model low-power → ANE nodes, GPU jobs → Max chips | +| Claim contention | none | Controller lease (`status.claimedBy` + finalizer) — single-writer, same discipline as serviceoffer-controller | +| Fine-tune scale-out | single worker | Ray Train `num_workers>1` **on MLX-GPU** — *distributed GPU* (the real path), **not** distributed ANE | +| Serve scale-out | 1 replica | Ray Serve `num_replicas=N`, fronted by **one ServiceOffer** → Traefik load-balances `/services//*` over multiple Endpoints (ClusterIP, per the ExternalName-avoidance rule) | + +**Invariant across the growth curve: Ray scales the *GPU* fabric; the ANE is always a per-node, small-model, low-power inference/eval accelerator — never a cluster-wide training pool.** That is the only design the landscape supports. + +--- + +## 7. Three worked examples (post → claim → run → verify → pay) + +**Proposed CLI surface.** Demand side: `obol bounty post|list|claim|submit|status|cancel`. Fulfiller side: `obol fulfill ` (the runner loop), `obol worker onboard` (probe + register, an alias over `agent new --runtime worker`). Reuse `obol buy inference` (#607) for consuming served bounties and `obol sell mcp` (#609) for verifier-as-a-tool. + +### 7.1 Benchmark (the $500 case) + +```bash +# POST — poster escrows $500 as pre-signed vouchers into a PurchaseRequest, creates the ServiceBounty +obol bounty post bench-deepseek-v4-flash --type benchmark --runner bench \ + --model deepseek-v4-flash --hardware-class M4-Max-40c-128g \ + --reward 500 --asset USDT --chain base --bond 750 \ + --verifier consensus --harness lm-eval-harness@v0.4.3 \ + --criteria "mmlu>=0,gsm8k>=0,humaneval>=0" +# → escrow PurchaseRequest (PreSignedAuths[]) + ServiceBounty CR, phase=Open + +# CLAIM — a fulfiller's runner sees the board, stakes the bond, leases the bounty +obol bounty list --requires benchmark +obol fulfill bench-deepseek-v4-flash # sets status.claimedBy, stakes bond, phase=Claimed + +# RUN — runner commits H, runs the pinned harness, signs the run-manifest +# (BenchmarkRunner; engine reported honestly: GPU or ANE) +# SUBMIT +obol bounty submit bench-deepseek-v4-flash \ + --artifact results.json --artifact run.manifest # phase=Submitted (committed first) + +# VERIFY — N-of-M VRF-sampled same-class verifiers re-run on the committed dataset, +# agree within ε on the rounded eval-score hash → controller sets Verified +# PAY — reconcilePayout releases one escrow voucher to the fulfiller's wallet → phase=Paid +obol bounty status bench-deepseek-v4-flash # Verified=True, Paid=True, PayoutTxHash=0x... +``` + +### 7.2 Fine-tune (staged, pay-per-epoch) + +```bash +obol bounty post ft-qwen-coder --type fine-tune --runner mlx-lora \ + --model qwen3.5:9b --dataset ipfs://bafy.../sql.jsonl --epochs 3 \ + --reward-per-epoch 40 --reward 120 --asset USDC --chain base-sepolia --bond 200 \ + --verifier harness-rerun --criteria "sql_exec_acc>=0.78,eval_loss<=0.85" \ + --no-first-valid-wins # poster reviews before final release + +obol fulfill ft-qwen-coder +# runner trains on MLX GPU (NOT ANE), streams {step,loss,tok_s} via SSE through HandleProxy +# after each epoch's checkpoint: verifier does held-out re-eval (inference-only, cheap) +# → controller releases that epoch's $40 voucher; 3 epochs = $120 + $120 final = $240 +obol bounty status ft-qwen-coder # shows EscrowRemaining shrinking per accepted epoch +``` + +### 7.3 Serve (ComfyUI, card-paid, becomes a sellable endpoint) + +```bash +obol bounty post host-comfyui-sdxl --type serve --runner comfyui \ + --model sdxl-comfyui --reward-per-hour 0.50 --pay-with card \ + --verifier tee-attestation --criteria "uptime_pct>=99.5,p95_latency_ms<=4000" \ + --max-fulfillers 3 # 3 redundant hosts + +obol fulfill host-comfyui-sdxl +# runner stands up Ray Serve → ComfyUI, then runs the HANDOFF that closes the loop: +obol sell inference bounty-svc-host-comfyui --model sdxl-comfyui \ + --pay-to 0xWORKER... --per-mtok 0.05 --chain base +# → ServiceOffer → controller: ModelReady→...→Ready (controller.go:528-532) +# → Traefik routes /services/bounty-svc-host-comfyui/* via x402 to the host listener + +# VERIFY — continuous, automatic: PodMonitor→Prometheus checks uptime/p95; canary probes; +# real paid traffic doubles as liveness. SLA met → epoch payout via Stripe capture (#608). +# CONSUME — the bounty produced a DURABLE revenue endpoint, not a one-shot: +obol buy inference http://obol.stack:8080/services/bounty-svc-host-comfyui # #607 UX +``` + +The serve example is the marketplace's keystone: **a fulfilled serve bounty becomes a `ServiceOffer`**, so the bounty doesn't just pay once — it spins up standing supply that anyone can then buy. Demand creates supply. That is the two-sided market closing on itself. + +--- + +## 8. Modularity & growth + +- **New task types** drop in as a single `BountyRunner` (§6.3) advertising new `capabilities[]`. The CRD, controller, x402 rail, and ERC-8004 path never change — they operate on the opaque `spec.task.runner` + a generic `Proof`. RL, eval, embeddings, render: one file each. +- **New payment methods** drop in as a settlement adapter (`voucher|escrow|sale|cardAuth`), exactly as MPP #608 made `Method: crypto|card` pluggable. The controller never signs in any of them. +- **Composition with `obol sell mcp` (#609):** a verifier exposes "verify-this-bounty" as a **paid MCP tool over x402** (`internal/x402mcp/server.go`) — verification becomes a permissionless, per-job-compensated market, and submission/verification ride the same in-band `_meta` x402 rail. +- **Composition with card payments (#608):** rewards payable in USDC/OBOL/card via the same pluggable `cardSettleFunc` (`internal/x402/card.go`); card escrow = Stripe manual-capture. +- **Composition with buy-inference (#607):** fulfillers *discover* demand via the same `/skill.md` + `/api/services.json` feeds and `internal/buy/discover.go`; posters *consume* served-bounty endpoints with the new positional-URL `obol buy inference` UX (`internal/buy/{balance,discover,purchases}.go`). + +The marketplace is therefore **closed under composition**: a ServiceBounty can be fulfilled by an Agent (`obol sell agent`), served as a ServiceOffer, consumed via buy-inference, verified via a paid MCP tool, and paid in fiat — all on machinery that already ships. + +--- + +## 9. Phased roadmap + +**Smallest shippable slice — v0 (target: the deterministic-eval happy path on one Mac):** + +1. **`ServiceBounty` CRD + GVR registration** — `internal/monetizeapi/types.go` (add `ServiceBountyKind`/`ServiceBountyResource`/`ServiceBountyGVR` near :48-67; `ServiceBountySpec`/`ServiceBountyStatus`/`ServiceBountyClaim`; clone `DrainEndsAt`/`DrainExpired` as `EffectiveDeadline`/`ClaimExpired` from :498-519). Ship the CRD manifest in `internal/embed/infrastructure/base/templates/` beside `serviceoffer-crd.yaml`; extend `internal/embed/embed_crd_test.go`. +2. **`runtime: worker` enum** — one-line additive change at `types.go:686-690`. +3. **`reconcileServiceBounty` sibling pass** — `internal/serviceoffercontroller/` (new `bounty_controller.go` + `bounty_render.go`), wired into `cmd/serviceoffer-controller/main.go` as a third queue, cloned from `reconcileOffer` (controller.go:386-559). Includes the confused-deputy escrow guard (copy agent_resolver.go:46). +4. **Escrow via Option 2 (voucher)** — reuse `PurchaseRequest.PreSignedAuths[]` (types.go:565); release through the existing buyer-sidecar settlement path. **Trusted coordinator + poster-as-judge** for acceptance; single re-run for deterministic types. No consensus yet. +5. **CLI** — `obol bounty post|list|claim|submit|status` in `cmd/obol/` (new `bounty.go`); `obol worker onboard` as an alias over `agent new --runtime worker`. +6. **Single-Mac runner** — `internal/worker/` (new): `runner.go` (the `BountyRunner` interface + loop) with `BenchmarkRunner`, `MLXServeRunner`, `ANEMLLServeRunner`, `MLXLoRARunner`. `ray.init()` local mode. Reuse `enclave_darwin.go` for proof signing, `inference/gateway.go` for serve handoff. +7. **RBAC** — add `servicebounties` + `servicebounties/status` to the agent role in `internal/embed/infrastructure/base/templates/obol-agent-monetize-rbac.yaml` — as a **namespaced Role/RoleBinding** (NOT the existing cluster-wide `openclaw-monetize-write` ClusterRole; see corrections #2), beside `serviceoffers`/`purchaserequests`. + +**Flagship v0 bounty types:** deterministic **eval-score benchmark** (near-trust-minimized for free) and **serving SLA** (automatic via existing PodMonitor/Prometheus). v0 is *honest about trust*: you trust the coordinator and the poster. + +**v1 / v2 verification roadmap — superseded by §11.** The paragraphs below predate the no-staking steer; the canonical eval roadmap is §11.7. Kept for the non-verification items only (hardware-class binding, `BountyEscrow.sol`, MCP composition). + +**v1 — verifier consensus + optimistic challenge market** *(superseded where stake-weighted)*: ~~VRF-sampled, stake-weighted N-of-M consensus~~ → median-of-k OBOL-paid evaluator quorum (§11); hardware-class binding + reference normalization for tok/s; probabilistic full-audit for fine-tunes. Coordinator becomes a dumb router. + +**v2 — trust-minimized** *(superseded where stake-weighted)*: on-chain `BountyEscrow.sol` removes fund custody; enclave-bound evaluator identities for real sybil cost; ~~a stake-weighted juror committee~~ → disagreement-triggered escalation panels (§11.7); ERC-8004 reputation sets the verification tax (high-rep → short optimistic window; low-rep → mandatory pessimistic re-run); verifier-as-a-paid-MCP-tool (#609) makes the evaluator market permissionless. Flip `OBOL_EXPERIMENTAL_ANE_TRAIN=1` *only if* Orion ever leaves PoC. + +**Build it Monday:** items 1–6 are the v0 cut. The only genuinely new code is the `ServiceBounty` CRD, the `reconcileServiceBounty` pass, the `cmd/obol/bounty.go` CLI, and the `internal/worker/` runner. Everything else — escrow vouchers, x402 settlement, ERC-8004 identity, the Enclave signer, the serve handoff, PodMonitor SLA — already ships. + +--- + +## 10. Honest risks & open questions + +1. **Voucher-MVP is escrow theater.** Option 2 gives the coordinator control over a *release decision* without funds custody, and the poster's balance isn't actually reserved (the voucher bounces if the poster spends elsewhere). This is deliberate, fenced by a value cap + bond + reputation gating, and retired the moment `BountyEscrow.sol` ships. **Don't market it as custody.** +2. **No cryptographic oracle for non-deterministic deliverables.** Open-ended quality, tok/s/W, absolute cross-hardware throughput, and "didn't train on the test set" rest *permanently* on reputation + stake + audit. The product must label each bounty's class so posters know what they're buying. +3. **TEE attests the signer, not the computation.** There is no macOS TEE for an LLM forward pass. Overselling "TEE-verified inference" would be a lie; the enclave is a sybil-resistance multiplier only. +4. **Same-class verifier liquidity.** Verifying an M4-Max tok/s claim needs M4-Max verifiers in the pool. Bootstrapping that pool per hardware class is a real operational cost; until it exists, tok/s bounties fall back to reputation. +5. **ANE training is vapor today.** The `finetune.ane` path is gated off for a reason (Orion: GPT-2-124M, 5–9% peak). If we ever ship it on, we must re-validate the landscape — building product on it now would be dishonest. +6. **Ray-head-on-Linux is an extra moving part.** macOS multi-node being unsupported forces a Linux head; this complicates the N-node story and the demo. Single-node `ray.init()` is the safe default; clustering is a v1+ concern. +7. **Cross-namespace escrow is a confused-deputy footgun.** The `escrowRef.Namespace == bounty.Namespace` guard (mirroring agent_resolver.go:46) is **load-bearing** — without it a poster in ns A could drain a `PurchaseRequest`'s pre-signed auths in ns B. This must ship with the CRD, not after. +8. **Open question: does the coordinator hold the verifier release key in MVP?** If yes, it's a single point of compromise for all open bounties (mitigate: per-bounty keys / threshold / move to on-chain EIP-712 release ASAP). If no, who submits the voucher after verification? Resolve before v0 ships. +9. **Open question: dispute resolution latency.** The challenge window trades payout speed against safety. What window length per type, and who funds the watcher incentive at low volume before challenger rewards self-sustain? *(Partially superseded: §11.7's escalation panel is the new dispute path; window economics still open.)* + +--- + +## 11. Evaluator market — verification by default (canonical, 2026-06-10) + +> **This section is the canonical verification design.** It supersedes §5.3's stake-weighted machinery and the §9 v1/v2 stake/slashing roadmap. Design steer after the v1 scaffold shipped: **no validator staking, no slashing — we are not rebuilding EigenLayer.** Verification is a separate OBOL-paid evaluator market anchored on ERC-8004 reputation. Full research citations: `plans/evaluator-market-research-notes.md`. + +### 11.1 Trust model and money legs + +The poster funds **two legs** at post time; the controller tallies but never signs: + +| Leg | Token | Signed by | When | On pass | On fail | +|---|---|---|---|---|---| +| Reward | USDC | Poster at post (`upto`, recipient bound at claim via `witness.to`) | Escrowed at post | Captured → fulfiller | Voided → refund poster | +| Eval budget | OBOL | Poster's **agent** at selection time (Permit2, `witness.to` = each evaluator) | Reserved at post, signed when evaluator set is known | Batch-settled to k evaluators (one tx) | Batch-settled to k evaluators | +| Self-bond | OBOL | Fulfiller at claim (`ServiceBountySelfBond`) | Held with claim | Returned | Forfeited → offsets poster's eval spend (anti-griefing) | + +The eval leg **cannot be pre-signed at post** — `witness.to` needs evaluator addresses that don't exist until selection. The poster's agent signs at selection (buy.py-process-loop style); bounded to exactly k × the per-eval price approved at post. Evaluators submit ERC-8004 `validationResponse` (0–100) with **their own agent wallets**; the controller reads and tallies. Per-eval price, k, and tolerance bands come from the task package (`task.yaml`), not per-bounty negotiation. + +### 11.2 Defaults and the dangerous flag + +Verification is **on by default**. `obol bounty post` shows a cost preview (reward + k × evalPrice) and confirms in a TTY. Opt-out is explicit and never silent: + +- `--dangerously-skip-verification` (house precedent: `dangerouslyDisableDeviceAuth`) → `spec.eval.mode: dangerouslySkipped`, printer column `VERIFIED: no`, `Verified` condition keeps `reason=PosterOverride` — the shipped v1 scaffold's poster-as-judge path **is** the skipped path, correctly labeled, nothing retrofits. +- Skipped bounties write no ERC-8004 validation entries and their reputation feedback is suppressed/discounted — an unverified bounty cannot be farmed for reputation. +- Non-TTY: no prompt, but skipping still requires the flag. +- `--evaluators N` raises k above the package default; `--no-newcomer-seat` buys an all-veteran quorum at full price (§11.4). + +### 11.3 Lifecycle with the EVALUATING phase + +``` + post ─► Open ─claim─► Claimed ─submit─► Submitted + │ + ┌─────────────┴──────────────┐ + │ EVALUATING │ + enrolled evaluator pool ──►│ 1. SELECT k evaluators, │ + (ERC-8004 id + enclave │ reputation-weighted │ + attestation, per task │ 2. COMMIT hash(score ‖ │◄─ each re-runs the + type) │ salt ‖ evaluatorAddr)│ private dataset + │ 3. REVEAL scores + salt │ fraction locally + │ 4. QUORUM median within │ + │ tolerance band? │ + └──────┬──────────────┬──────┘ + pass │ │ fail + ▼ ▼ + Verified=True Rejected + reason= (reward voids → refund, + EvaluatorQuorum self-bond forfeits) + │ + ▼ + Paid: reward → fulfiller (capture) + eval budget → k evaluators (batch-settlement) +``` + +Evaluators claim slots and post verdicts through the same annotation write-channel as fulfillers (`obol.org/eval-claim|eval-commit|eval-verdict`), validated and promoted by the controller. The eval an evaluator runs is **the same embedded task package** — they re-run and compare, they don't grade freestyle. + +### 11.4 The ladder: Shadow → Probation → Full (cold-start without ossification) + +Quorum = **median of k** is what makes this safe: a median is robust to one outlier by construction, so one newcomer seat cannot flip a verdict even if malicious. + +``` + ┌─────────────────────────────────────────────┐ + │ SEAT COMPOSITION OF A k=3 QUORUM │ + TIER 2 · FULL ──────►│ Seat 1 high-rep full price counts │ + rep-weighted lottery │ Seat 2 high-rep full price counts │ + TIER 1 · PROBATION ─►│ Seat 3 newcomer ~50% price counts │ + reserved seat, │ (median absorbs one outlier; │ + value-capped bounties│ discount passed to poster) │ + TIER 0 · SHADOW ────►│ +1..2 shadow free scored │ + random assignment, │ commit-reveal alongside, verdict │ + can't pick bounties │ graded against quorum median │ + └─────────────────────────────────────────────┘ + PROMOTION Shadow ──(N agreements within tolerance)──► Probation + Probation ──(M paid evals, no divergence)──► Full + DEMOTION divergence → rep hit → weight drops; inactivity → decay +``` + +- **Tier 0 Shadow (free)**: enroll = ERC-8004 identity + Secure Enclave device attestation, per task type. Randomly *assigned* to live bounties (can't park sybils where you want them); commits and reveals in the same window; verdict counts for nothing, pays nothing; graded against the quorum median → ERC-8004 feedback anchored to the settled bounty. Farming cost = real GPU time per attested device. +- **Tier 1 Probation**: one reserved seat of k, counts fully (median protects the verdict), ~50% pay with the **discount passed to the poster** — posters gain from hosting newcomer seats. Only on bounties below a value cap. Requires k≥3 whenever seated. +- **Tier 2 Full**: reputation-weighted lottery, full price, all values. v1 selection is controller-side weighted sampling (honest about local-first centralization); the selection function is the swap seam for VRF when cross-party. +- Promotion thresholds live in the task package: `eval.ladder: {shadowAgreements, probationEvals, probationValueCap}`. +- Anti-collusion: random shadow assignment, commit-reveal, **pair-diversity** (down-weight repeat evaluator↔fulfiller pairs), device-binding, rep decay. Reputation is **per task type**. + +### 11.5 What adjacent protocols taught us (deep-research 2026-06-10, all claims 3-vote verified) + +**The no-stake bet is vindicated.** Bittensor's stake-weighted Yuma Consensus is governed by capital, not quality: top 1% of wallets held a median ~90% of stake across 64 subnets; >half of subnets 51%-attackable by <1% of wallets; rewards correlate with stake at r≈0.80–0.95 vs r≈0.50 with consensus quality. The cold-start corollary transfers: low-participant markets are trivially capturable — benchmark the ladder against small-coalition takeover in the early phase. + +**Three confirmed weaknesses:** +1. **Median-proximity free-riding** (Bittensor weight-copying, production-exploited: copiers out-earned honest validators). Commit-reveal only stops *same-round* copying — Bittensor's own docs concede that for static ground truth "nothing can prevent weight copying." For repeated bounty types, copying last round's revealed median works. Fix = **make the answer move** (rotate the private fraction), not longer concealment. +2. **p+epsilon bribery** (executed on Kleros mainnet, 2018 Doges on Trial: the bribe won rounds 1–2 of disputeID 75 and was reversed only by an appeal to a fresh 14-juror panel). Attacker pledges P+ε conditional on the dishonest outcome *losing* → everyone complies → bribe never paid → zero realized cost. The two defenses that work — slashable deposits and escalating appeals (O(N²) attacker lockup) — are both absent from our v1. Our bribery floor = per-task reward + discounted reputation-stream value; commit-reveal is *load-bearing* in a no-appeal design. +3. **Attestation-only sybil resistance has no production precedent.** Kleros is explicit that stake IS the sybil defense for random sortition. Device attestation + rep decay carry that burden alone; the free Shadow tier is the attack surface — cost-per-attested-device must exceed the value of walking a sybil to a Full seat. + +**Plus**: base-rate guessing beats coherence reputation (Kleros: ~70% Reject skew → zero-effort base-rate voting looks ~88% coherent). If most bounties pass, rubber-stamp "pass" votes climb the ladder. + +### 11.6 Mechanisms stolen verbatim + +| Steal | From | Fixes | +|---|---|---| +| `hash(score ‖ salt ‖ evaluatorAddress)` commitments | Kleros §4.3 | Commitment copy/replay between evaluators | +| Non-reveal penalty ≥ outlier penalty | Kleros incentive system | Silent abstention as the cheap exit | +| Automated reveals (Drand time-lock) or non-reveal = worst case | Bittensor CR4 | Selective revelation | +| EV-balance tuning (no-effort evaluation must be EV-negative) | Kleros parameterization | Lazy rubber-stamping; our lever is rep decay, not voteStake | +| Difficulty-weighted rep (reward correct-minority, not easy unanimity) | derived from Kleros base-rate data | Base-rate climbing | +| Known-fail canaries in the private fraction | derived | Makes rubber-stampers detectably wrong | +| Disagreement-triggered escalation to a larger fresh panel | Kleros appeals | The only defense that beat p+epsilon in production | + +### 11.7 Amendments (folded into the build plan) + +**v1 (ship in the ladder slice):** +1. Commitment format = `hash(score ‖ salt ‖ evaluatorAddress)`. +2. Fixed reveal window; non-reveal = worst-case outlier (rep penalty ≥ divergence penalty). `task.yaml` ladder block gains `revealWindow` + `nonRevealPenalty`. +3. Seed `datasetCommit.privateFraction` with known-fail canaries; **rotate the private fraction per round** for repeatable bounty types. +4. Reputation gains weighted by disagreement/difficulty — unanimous easy agreement earns ~0; correct minority positions earn most. + +**v2 (design before cross-party):** +5. **Disagreement-triggered escalation**: revealed scores straddling the tolerance band → re-run with a larger fresh panel (2k+1); poster pre-approves an escalation budget cap at post. Weaker than Kleros's (no loser-deposit redistribution funds it) — cost falls on the eval budget. +6. **Quantify the bribery floor in OBOL**: the discounted value of a Full seat's future income stream is our analog of Kleros's O(N²) lockup. If corrupting ⌈k/2⌉+1 medians costs less than plausible bounty values, raise k or tighten value caps. +7. Drand-style time-lock reveals when cross-party. + +**Open questions carried forward:** OBOL value of a Full-tier reputation stream (unquantified); empirical adequacy of device attestation as a sybil bound (no production precedent anywhere); which task types have static-enough ground truth that commit-reveal is structurally insufficient → rotation cadence; how Truebit/Gensyn/Numerai/Chainlink handle non-deterministic verification (didn't survive this research round — re-research before a verifiable-compute task type ships). + +--- + +*Relevant code anchors reused throughout: `internal/monetizeapi/types.go` (Type enum :105, Model :166-174, Payment/PriceTable :211-247/:299-305, card :216, registration/supportedTrust :308-333, drain time-math :498-519, PurchaseRequest/PreSignedAuths :536/:565/:638, Agent runtime/status :686-690/:715-727/:731); `internal/serviceoffercontroller/controller.go` (reconcile loop :386-559, Ready rollup :528-532, gate/route :660/:695, registration sibling :802); `internal/serviceoffercontroller/agent_resolver.go:33,:46` (polymorphic upstream + confused-deputy guard); `internal/x402/verifier.go` (SSE flush seam); `internal/x402/buyer/` (bounded settle-after-success); `internal/inference/gateway.go` (host gateway/NoPaymentGate); `internal/enclave/enclave_darwin.go:330` (real Secure Enclave Sign); `internal/openclaw/wallet.go` (payout wallet); `internal/erc8004/types.go:15-47` (OnChainReg.AgentID, SupportedTrust[]); `internal/embed/infrastructure/base/templates/{x402.yaml,llm.yaml}` (PodMonitor→Prometheus), `obol-agent-monetize-rbac.yaml` (agent RBAC); `internal/x402/card.go` (#608), `internal/x402mcp/server.go` (#609), `internal/buy/` (#607).* + + +--- + +## Appendix A — Verified ANE landscape (live research, 2026-06-09) + +**Feasibility verdict.** INFERENCE on ANE: REAL but niche. Running small LLMs (<=8B) on the ANE works today via Apple Core ML (ANEMLL is the leading open pipeline, Beta 0.3.5). It is power-efficient but 2-5x SLOWER than the same Mac's GPU. No mainstream runtime (MLX, llama.cpp, vLLM, Ollama) dispatches LLM matmul to the ANE; they all run on the Metal GPU and leave the ANE idle. TRAINING on ANE: REAL only as research PoC. Two reverse-engineered projects (maderix/ANE and its successor mechramc/Orion) genuinely run forward+backward passes on the ANE via private _ANEClient/_ANECompiler APIs, but the authors themselves say it does NOT replace GPU training and runs at ~5-9% of peak. DISTRIBUTED ANE / Mac fleets: No one clusters ANEs. Real Mac clusters (exo) shard models across the GPU/MLX, not the ANE. Ray multi-node on macOS is officially UNSUPPORTED (Linux-only; macOS multi-node is 'untested', needs an at-your-own-risk env flag). A 'distributed ANE access platform' for training is NOT buildable today; a distributed GPU-based Mac inference cluster IS. + + +**Detailed findings.** Skeptical verdict after cross-verifying every pasted claim against primary sources (GitHub repos/issues, an arXiv preprint, Apple research, and independent benchmarks).\n\nINFERENCE on the ANE is real but a niche, low-power play: ANEMLL (Beta 0.3.5) genuinely runs <=8B LLMs (Llama/Qwen/Gemma/DeepSeek-distill) through Core ML on the ANE at ~512-4K context, but it is 2-5x SLOWER than the same Mac's GPU (e.g. ~47-62 tok/s @2W on Llama-3.2-1B vs ~204 tok/s @20W on GPU). Crucially, NO mainstream runtime uses the ANE for LLMs: MLX (issue #18 open), llama.cpp (issue #10453 is an OPEN proposal, nothing merged; discussion #336 is exploratory), vLLM (vllm-metal/vllm-mlx are real but GPU-via-MLX), Ollama and LM Studio all run on the Metal GPU and leave the ANE idle. Unsloth has no ANE support ('in the works'); 'Unsloth-MLX' was renamed mlx-tune and trains on the GPU.\n\nThe Anemll 'Flash-MoE' / anemll-flash-llama.cpp fork is real and IS a llama.cpp fork, but it streams MoE experts from SSD to the Metal GPU — not the ANE.\n\nTRAINING on the ANE is real ONLY as reverse-engineering research. maderix/ANE genuinely does forward+backward, Adam, dynamic weight patching, and zero-copy GPU<->ANE via private _ANEClient/_ANECompiler + MIL — but the author labels it a PoC at ~5-9% of peak that 'does NOT replace GPU training.' Its successor mechramc/Orion (backed by arXiv 2603.06728, Mar 2026) extends this with LoRA hot-swap and a compiler, but still on tiny GPT-2-124M/Stories-110M models. These prove the inference-only restriction is a software policy, not silicon — but ANE training is nowhere near production.\n\nDISTRIBUTED: no one clusters ANEs. Real Mac fleets (exo, ~38k stars, RDMA-over-Thunderbolt 5) shard across the GPU/MLX, not the ANE. Ray multi-node is officially Linux-only; macOS multi-node is 'untested' behind RAY_ENABLE_WINDOWS_OR_OSX_CLUSTER=1.\n\nFor a 'distributed ANE access platform': building it on ANE *training* is not feasible today. The realistic build is a distributed Mac *GPU* inference platform (exo or Ray-head-on-Linux + Mac workers, MLX/vllm-metal/llama.cpp per node), with optional per-node ANEMLL for low-power small-model inference. Numbers to distrust: the '16x speedup' has no source (fabricated), and Apple's '38 TOPS INT8' is a 2x-convention over a measured ~19 TFLOPS FP16 peak with no real INT8 compute speedup for LLM matmul. + + +--- + +## Appendix B — Adversarial red-team + +### Biggest risk + +The whole design rests on a load-bearing falsehood: that bounty payout "reuses the existing buyer-sidecar settlement path against the escrow PurchaseRequest" (sec 4.5 step 5, sec 9 item 4, sec 7.1 PAY). It does not, and cannot, without net-new payment code, which collapses the doc's central "build it Monday, only 4 new files" thesis. Verified against internal/x402/buyer/proxy.go and signer.go: the buyer sidecar is an http.RoundTripper (proxy.go:580-649) that consumes exactly one pre-signed ERC-3009 voucher when, and only when, a LIVE x402-gated HTTP upstream returns <400 to a per-request micropayment (proxy.go:241, 605; ConfirmSpend at signer.go:295-297 "persists a nonce as consumed after a successful paid upstream response"). There is no primitive for "hold N vouchers, then release one to a fulfiller address on a verifier verdict." The buyer-side money flow is buyer->seller-at-request-time; a bounty needs escrow->fulfiller-on-acceptance, the inverse direction the sidecar has no code path for. Worse, EffectiveBuyerNamespace() hard-returns "llm" (types.go:649-651), so every PurchaseRequest's auths are written into the single shared llm-namespace buyer pool; there is no per-poster, per-fulfiller payout isolation primitive at all. The doc's own sec 10.1 admits the voucher is "escrow theater" (no on-chain condition, poster balance not reserved, refund = poster racing to cancelAuthorization), but then still routes the actual release through machinery that physically performs the opposite operation. Net: the "v0 ships this week on shipped code" claim is the single biggest reason this fails; the only honest v0 is "trusted coordinator manually triggers an off-band transfer," exactly the centralized-custodian design the doc claims to avoid. + + +### Sharpest 5 fixes + +1. DELETE the 'reuse buyer-sidecar settlement' claim everywhere (sec 4.5 step5, sec 7, sec 9). The buyer sidecar is a request-time micropayment RoundTripper (proxy.go:580-649, ConfirmSpend signer.go:295) that consumes a voucher on a live upstream 2xx; it has no release-on-verdict path and EffectiveBuyerNamespace() pins everything to 'llm' (types.go:649). Honest v0 = a coordinator agent that, on Verified, submits a single poster-pre-signed ERC-3009 voucher (payTo=fulfiller) by calling the facilitator /settle directly. State that this coordinator IS a trusted release authority and that sec 10.8's open question is a v0 blocker, not a v2 nicety. +2. FIX the RBAC claim, which is factually wrong and security-relevant. The doc says agent bounty RBAC is 'namespace-scoped' (sec 9 item7, sec 2). Verified: serviceoffers and purchaserequests are granted via ClusterRole 'openclaw-monetize-write' + ClusterRoleBinding to BOTH Hermes and OpenClaw SAs (obol-agent-monetize-rbac.yaml); cluster-wide create/update/delete. Adding 'bounties' there gives every agent cluster-wide write on all bounties/escrow refs in every namespace. Either move bounties to a namespaced Role/RoleBinding or state plainly the posture is cluster-wide and design the confused-deputy guard accordingly. +3. KILL the cross-namespace escrowRef before it ships. sec 10.7 calls the escrowRef.Namespace==bounty.Namespace guard 'load-bearing' but the field is {name,namespace} with namespace settable (sec 3.2). Given cluster-wide PurchaseRequest write, an attacker posts a ServiceBounty in ns A whose escrowRef points at a victim PurchaseRequest in ns B and drains its pre-signed auths. Mitigation: REMOVE the namespace field from escrowRef entirely (force same-namespace by construction) rather than relying on a runtime string compare a future refactor can drop. +4. CUT the entire ANE/Ray/worker substrate (sec 6) from v0. It is the largest, least-buildable surface (host-side Ray worker join with macOS multi-node officially unsupported, WorkerProfile probes, BountyRunner plugin registry, ane-train gating). v0 needs none of it: a bounty is fulfilled by any process that can produce a signed deliverable. Ship ServiceBounty CRD + reconcile + CLI + a single deterministic verifier (eval-score re-run OR PodMonitor SLA) and let fulfillment be opaque. Re-introduce the substrate only after money/verification rails are proven. +5. ADD a hard admission invariant that a ServiceBounty NEVER produces an HTTPRoute, Middleware, or any tunnel-exposed route, and that the servicebounty-controller has zero route/Secret/Namespace creation capability. Make it a test (extend embed_crd_test.go) so a ServiceBounty can never become unintended public ingress, and ensure registration.enabled discovery rides only the existing /skill.md + agent-registration.json surfaces, never a new public path. + + +### Economic / trust attacks + +- **[HIGH] Escrow griefing: poster never accepts (poster-as-oracle for non-deterministic deliverables). Fulfiller burns real compute on a fine-tune, submits a valid checkpoint, poster stalls (no deadline pressure on the poster) or rejects in bad faith. With voucher-MVP the poster's funds were never reserved (sec 10.1), so the poster's downside is zero while the fulfiller ate the compute. firstValidWins=false (sec 3.3b) makes this the DEFAULT for fine-tunes.** + - _Mitigation:_ Symmetric bonds: poster must also bond. On a deterministic-verifier pass the controller auto-releases with NO poster discretion. Reserve poster-manual strictly for explicitly-labeled subjective bounties, and on poster non-response past a review deadline auto-release to the fulfiller. Require real on-chain lock (BountyEscrow.sol) above a low value cap so the poster has skin in the game. +- **[HIGH] Reward front-running / claim-then-copy. firstValidWins=true + maxFulfillers + readable submissions: a watcher sees fulfiller A's revealed deliverable (or the payout tx in the mempool) and submits a copy to win the race. Commit-reveal (sec 5.0) is described but submissions are still readable and payout is an observable tx the coordinator submits.** + - _Mitigation:_ Enforce commit-reveal as a HARD protocol gate: H=hash(deliverable||salt) committed before any reveal, reward binds to the address that committed first so a copied reveal pays the original committer. Encrypt the deliverable to the poster or use threshold reveal so a watcher cannot lift it. +- **[HIGH] Sybil fulfillers + verifier collusion on tok/s and 'didn't train on test' claims. New agents carry near-zero ERC-8004 reputation but a sybil farm spins up many Agent CRs cheaply (agent new --create-wallet is free), self-claims, self-verifies in a consensus pool, and splits rewards. VRF-sampled stake-weighted selection assumes a deep honest same-hardware-class verifier pool that does not exist at launch (sec 10.4).** + - _Mitigation:_ At low pool depth fall back to a single trusted coordinator re-run (deterministic types only) and REFUSE non-deterministic bounties until verifier liquidity exists. Gate consensus weight on enclave-bound identity (one Secure Enclave key = one device) so each sybil costs real hardware. Make bond >= reward x (1/P(detected)) a validated admission constraint, not prose. Never pay tok/s/W or contamination claims on consensus; reputation+audit only. +- **[MED] Free-riding via report-vs-ship mismatch / fabricated benchmarks. sec 3.3a example criteria are all 'mmlu>=0, gsm8k>=0, humaneval>=0' (report-only) with verifier:consensus, so the acceptance gate accepts ANY score; only the optional, liquidity-dependent consensus re-run catches it.** + - _Mitigation:_ Reject threshold==0 / report-only acceptance criteria at admission for any reward-bearing bounty (a gate that always passes is not a gate). Require resultHashRequired + a mandatory deterministic re-run (not optional consensus) for eval-score bounties. Commit the eval dataset at creation with rows revealed post-commit and make contamination-resistant gold subsets non-optional. +- **[MED] Voucher replay / double-spend across bounties. A pre-signed ERC-3009 voucher is a bearer instrument valid for its whole validBefore window (types.go PreSignedAuth.ValidBefore). The same poster voucher, or one leaked from the shared 'llm' buyer pool (EffectiveBuyerNamespace=llm, types.go:649), could be submitted against multiple bounties or replayed before the poster cancels the nonce.** + - _Mitigation:_ One nonce per (bounty, epoch); controller tracks consumed nonces in status and refuses re-submission. Do not co-mingle bounty reward vouchers in the shared llm buyer ConfigMap pool; give bounty escrow isolated holding. Move to on-chain escrow where the contract enforces single-release per id. +- **[MED] Privilege escalation via serve-bounty handoff. The serve flow has a fulfiller run 'obol sell inference/http' to publish a /services/* route (sec 7.3). Combined with cluster-wide serviceoffers write (ClusterRole openclaw-monetize-write) and agent-factory namespace+secret create rights (hermes-agent-factory-write), a malicious bounty could induce an agent to publish routes or create namespaces/secrets it shouldn't, straining the 'only x402-gated + discovery endpoints public' tunnel invariant.** + - _Mitigation:_ servicebounty-controller must have ZERO route/Middleware/Secret/Namespace creation capability (assert via test). Route publication stays an explicit, separately-authorized 'obol sell' action by the node operator, never auto-driven by bounty reconciliation. Keep the existing hostnames restriction and confirm bounty discovery rides only /skill.md + agent-registration.json. +- **[MED] Card-escrow auth expiry + chargeback griefing (MPP #608 path). sec 4.4 uses a Stripe manual-capture PaymentIntent as the card 'lock'. Stripe auth holds expire (~7 days); a long-deadline bounty (sec 3.3c deadline weeks out) outlives the auth, so the lock silently evaporates, and the poster can chargeback after capture, clawing back paid work with no on-chain finality.** + - _Mitigation:_ Reject card-method bounties whose deadline exceeds the provider's max auth-hold window at admission. Treat card rewards as non-final (chargeback-exposed): require fulfiller reputation gating and/or a poster bond for card bounties, and never advertise card payout as settlement-final the way on-chain USDC is. + + +### Architecture conflicts with existing obol-stack invariants + +1. RBAC is NOT namespace-scoped, contrary to the doc's repeated claim. serviceoffers + purchaserequests are granted via ClusterRole 'openclaw-monetize-write' bound by ClusterRoleBinding to BOTH Hermes and OpenClaw SAs (obol-agent-monetize-rbac.yaml); cluster-wide create/update/patch/delete. sec 9 item7 and sec 2 assert namespace-scoped; false today. Adding 'bounties' there is a cluster-wide grant on all bounties and their escrow refs. +2. Payout cannot reuse the existing buyer-sidecar settlement path (sec 4.5 step5, sec 7, sec 9 item4). proxy.go is an http.RoundTripper that consumes one voucher only when a LIVE x402 upstream returns <400 (proxy.go:241,605; ConfirmSpend signer.go:295). It has no release-on-verifier-verdict path. The money direction (buyer->seller at request time) is the inverse of bounty payout (escrow->fulfiller on acceptance). Structural mismatch, not a tweak. +3. Shared-namespace escrow co-mingling. EffectiveBuyerNamespace() hard-returns 'llm' (types.go:649-651). PurchaseRequest auths all land in the single llm-namespace buyer ConfigMap pool built for buyer micropayments. Routing multi-poster bounty REWARD vouchers through PurchaseRequest (sec 3.2 escrowRef, sec 4.1 Option2) puts N posters' payout instruments into one shared pool with no per-bounty isolation; a custody and replay hazard the doc does not acknowledge. +4. Cross-namespace confused-deputy reintroduced. The doc adds escrowRef:{name,namespace} with a settable namespace (sec 3.2) and relies on a runtime guard copied from agent_resolver.go:46. Unlike the agent case nothing forces it: combined with cluster-wide PurchaseRequest write, a settable namespace is a drain-victim's-auths footgun. Omit the namespace field (force same-ns by construction). +5. Controller-holds-no-keys preserved in spirit but the doc smuggles a de-facto custodian. sec 4.2 has a coordinator agent submit the voucher / hold the verifier release key (sec 10.8 leaves open whether it holds that key), making a single coordinator a release authority over all open bounties. Strains the purely-declarative posture (agent.go reads only litellm-secrets in 'llm'; agent_resolver guards credential brokering). The bounty coordinator is a new trusted signer the architecture has no slot for. +6. verifyOnly permanence vs serve-bounty. x402.yaml:35 verifyOnly:true is permanent and forwardauth.go:24-36 documents the invariant. The serve handoff to obol sell inference is fine (own in-process settle), but the doc must not let a bounty reconcile flip verifyOnly or settle at the Traefik gate, and should say so explicitly; 'reconcilePayout route through internal/x402/card.go' (sec 4.5) brushes against gate settlement. +7. The 'agent' Type / agent-resolver precedent is mis-cited as a model for opaque polymorphism (sec 6.3). The agent resolver synthesizes a CONCRETE upstream (hermes:8642) so the existing route pipeline runs; it is NOT an opaque task-blob dispatcher. A ServiceBounty has no upstream and no route; the precedent supports 'sibling reconcile pass' but not 'controller operates on an opaque task.runner it never interprets.' + + +### Overstated / unbuildable ANE claims to retract from the body + +1. sec 5.1 / sec 3.3a present benchmark eval-score under 'verifier: consensus' as near-trust-minimized, but the consensus re-run depends on same-hardware-class verifier liquidity the doc itself admits does not exist (sec 10.4). For tok/s the doc is mostly honest (lower-bound + ranking only), but the sec 3.3a YAML still labels a tok/s-relevant benchmark 'consensus', overstating verifiability at launch. +2. sec 6.4 claims a 'real ANE-served bounty is demoable on one MacBook today' and lists ANEMLLServeRunner as a v0 deliverable (sec 9 item6). Per verified ANE facts this is real ONLY for <=8B models at <=4K context via ANEMLL Beta 0.3.5, at 2-5x SLOWER than the same Mac's GPU. Shipping an ANE runner in v0 sells a niche, slower-than-GPU path as a headline. The honest v0 substrate is MLX-GPU; ANE should be explicitly deferred, not a v0 runner. +3. The serve example (sec 3.3c, sec 5.1) uses 'verifier: tee-attestation' with a tee-quote artifact, implying TEE-verified serving. sec 5.2 correctly retracts this (attests the signer, not the computation; no macOS TEE for an LLM forward pass), but the example YAML and acceptance enum still advertise tee-attestation as a verifier for the computation/SLA. Rename the enum value (e.g. 'enclave-identity') so the CRD surface can't imply TEE-verified inference. +4. sec 6 keeps ane-train (Orion) as a gated-but-named capability and frames flipping OBOL_EXPERIMENTAL_ANE_TRAIN=1 as a near-term modularity payoff. Per facts Orion is research PoC on GPT-2-124M/Stories-110M at 5-9% of peak that does NOT replace GPU training. Naming finetune.ane in the design (even gated) overstates buildability; drop it from CRD/runner vocabulary until it leaves PoC, not parked behind an env flag. +5. Implicit in sec 6: that Ray gives a distributed Apple-silicon fabric. Verified: Ray multi-node on macOS is officially unsupported (Linux-only, RAY_ENABLE_WINDOWS_OR_OSX_CLUSTER=1 at-your-own-risk). The doc acknowledges this (sec 6.1, 6.5) but still lists Ray-head-on-Linux + Mac workers as the N-node story with more confidence than the 'untested' upstream status warrants. The honest N-node claim is GPU sharding via exo/MLX, not Ray. + + +### What the MVP should DROP + +1. The entire ANE/Ray/worker execution substrate (sec 6): host-side Ray worker join (macOS multi-node unsupported), WorkerProfile capability probes, the BountyRunner plugin registry, ANEMLLServeRunner, MLXLoRARunner, ane-train gating. v0 fulfillment can be opaque: any process producing a signed deliverable. The single largest, least-buildable cut. +2. Verifier consensus / VRF-sampled stake-weighted selection (sec 5.3); also drop 'consensus' from the v0 acceptance enum so v0 YAMLs cannot claim it. v0 exposes ONLY deterministic single-re-run (eval-score) and automatic PodMonitor SLA. +3. Fine-tune bounties entirely from v0 (sec 3.3b). They combine the weakest verification (held-out re-eval depends on contamination assumptions + reputation), the worst griefing surface (firstValidWins=false poster-discretion default), milestone/per-epoch voucher fan-out, and the MLX trainer runner. Ship benchmark-eval + serve-SLA first. +4. Card payments / MPP #608 escrow (sec 4.4, sec 3.3c). Stripe manual-capture as 'escrow' adds auth-expiry and chargeback failure modes orthogonal to the core crypto rail. Prove the on-chain/voucher path first; add card later as a pure adapter. +5. maxFulfillers>1, redundant/split payouts, and firstValidWins racing (sec 3.2-3.3). v0 should be single-winner, single-claim only; N-fulfiller contention multiplies front-running and double-spend surface before the basic single-winner flow is proven. +6. ERC-8004 reputation-as-verification-tax dial and the paid-MCP-verifier composition (sec 5, sec 8). v0 has no reputation history to throttle on and no verifier market to meter. Defer. + + +--- + +_Generated via a 7-agent design workflow (live ANE research → 4 parallel design perspectives → synthesis → adversarial red-team) on 2026-06-09._ diff --git a/plans/evaluator-market-research-notes.md b/plans/evaluator-market-research-notes.md new file mode 100644 index 00000000..db562df5 --- /dev/null +++ b/plans/evaluator-market-research-notes.md @@ -0,0 +1,111 @@ +# Evaluator Market — Adjacent-Protocol Research Notes + +> Deep-research pass (2026-06-10) challenging the ServiceBounty evaluator-market design +> (verification-by-default, median-of-k quorum, Shadow→Probation→Full ladder, no staking/slashing) +> against production decentralized-evaluation systems. 23 sources fetched, 114 claims extracted, +> 25 verified by 3-vote adversarial panels — 25 confirmed, 0 refuted. +> +> **Coverage caveat**: only Bittensor, Kleros, and the p+epsilon literature produced claims that +> survived verification. Truebit, Numerai, Chainlink OCR, Gensyn/Prime Intellect, Ritual/Allora, +> and the EigenLayer baseline did not — "state of the art" below means two production systems +> plus one canonical attack paper. Non-deterministic-verification comparisons (research item 3) +> remain only partially answered. + +## Verdict on the core bet + +**No-stake reputation-weighted selection is vindicated by Bittensor's production record.** +Pre-dTAO on-chain analysis of all 64 subnets (6.66M events): top 1% of wallets held a median +~90% of stake; over half of subnets were 51%-attackable by <1% of wallets colluding; validator +rewards correlated with **stake** at r≈0.80–0.95 vs r≈0.50 for consensus quality. Capital, not +evaluation quality, governs stake-weighted systems. [arXiv:2507.02951 — note: pre-dTAO snapshot, +FLock.io-affiliated authors, wallet-level not entity-level] + +But the cold-start lesson transfers: **low-participant markets are trivially capturable by tiny +coalitions.** The ladder + random assignment + pair-diversity must be benchmarked against +small-coalition takeover during the early low-evaluator-count phase. + +## Three confirmed weaknesses + +### 1. Median-proximity scoring is gameable by free-riders (Bittensor weight-copying) + +Bittensor validators copied publicly visible weight matrices, computed the stake-weighted median +to predict Yuma Consensus, and **earned higher APY than honest validators** — because rewards flow +from alignment-with-consensus, not evaluation labor. Our median-of-k + rerun-tolerance rewards +proximity-to-median identically. + +Commit-reveal only fixes **same-round** copying. Bittensor's own docs concede (against interest): +*"If the ground truth about miner rankings is overly static... nothing can prevent weight +copying."* For repeated/static bounty types, copying the prior round's revealed median survives +any concealment window. **The countermeasure is making the answer move, not longer concealment**: +rotate the private-dataset fraction per round; per-round task perturbation. +[docs.learnbittensor.org/concepts/weight-copying-in-bittensor, /commit-reveal; Opentensor weight-copier paper May 2024] + +### 2. p+epsilon bribery — executed in production, and our two missing defenses are the ones that worked + +An attacker who credibly pledges to pay each evaluator P+ε **conditional on the dishonest outcome +losing** makes dishonest voting dominant; if everyone complies the bribe is never paid — attack +succeeds at **zero realized cost**. "No attacker would spend that much" is invalid: the budget is +pledged, not spent. [Buterin 2015, blog.ethereum.org/2015/01/28/p-epsilon-attack] + +Not theoretical: executed on Ethereum mainnet against Kleros (Doges on Trial 2018, disputeIDs +70–76, 94; conditional-bribe contracts at 0xbaf2eb...). In disputeID 75 the bribe **won rounds 1 +and 2** against small panels; reversed only when a community member funded an appeal to a fresh +14-juror round (attacker lost 0-14). [blog.kleros.io/cryptoeconomic-deep-dive-doges-on-trial] + +The two operative defenses are both absent from our v1: +- **Slashable deposits** (bribe must exceed deposit, not per-round reward) — rejected by design. +- **Escalating appeals** — attacker lockup grows O(N²) in panel size (~110M PNK at 2023 General + Court parameters). This is the defense that actually worked in production. + +Our bribery floor = per-task reward + discounted value of k evaluators' future reputation +streams. Partial mitigants (private dataset fraction breaks pure-Schelling structure; coordinating +a continuous median within tolerance is harder than flipping a binary vote) bound but don't +eliminate exposure. Kleros guidance: commit-reveal matters **most** when appeals are unlikely — +in a no-appeal design it is load-bearing, not optional. + +### 3. Stake is the canonical sybil defense for random sortition; attestation-only has no production precedent + +Kleros whitepaper §4.2.1: *"If jurors were simply drawn randomly, a malicious party could create a +high number of addresses... By being drawn more times than all honest jurors, the malicious party +would control the system."* Zero stake = never drawn; no reputation ladder exists in Kleros. +Device attestation + rep decay must absorb the entire sybil burden alone. The **free Shadow tier +is the attack surface**: cost-per-attested-device (emulation, device farms, resale) must exceed +the discounted value of progressing a sybil to a Full seat. + +### Bonus: base-rate guessing defeats coherence-based reputation + +Kleros production data: ~88-89% juror coherence against a ~70%-Reject outcome skew — always +voting the base rate beats random with zero effort. **If most bounty evaluations pass, zero-effort +"pass" votes look reputationally coherent and climb our ladder.** +[blog.kleros.io/parameterization-of-kleros-courts] + +## Mechanisms worth stealing verbatim + +| Steal | From | What it fixes | +|---|---|---| +| `hash(score, salt, address)` — bind evaluator address into the commitment | Kleros §4.3 | Commitment copy/replay between evaluators | +| Reveal-failure penalty ≥ outlier penalty | Kleros incentive system | Silent abstention as cheap exit when your committed score looks bad | +| Automated reveals (Drand time-lock) or non-reveal = penalized worst case | Bittensor CR4 | Selective revelation (validators gamed manual reveals by revealing only when it helped) | +| EV-balance parameterization: tune penalties so no-effort evaluation is EV-negative | Kleros parameterization | Lazy rubber-stamping; portable framework, our lever is rep decay instead of voteStake | +| Difficulty-weighted reputation: reward being right when others were wrong, not easy unanimity | (derived from Kleros base-rate finding) | Base-rate climbing | +| Known-fail canaries seeded into the private dataset fraction | (derived) | Makes rubber-stampers detectably wrong at a measurable rate | +| Disagreement-triggered escalation to a larger fresh panel | Kleros appeals | The only defense that beat p+epsilon in production | + +## Amendments + +**v1 (cheap, do in the ladder slice):** +1. Commitment format = `hash(score ‖ salt ‖ evaluatorAddress)`. +2. Fixed reveal window; non-reveal treated as worst-case outlier (rep penalty ≥ divergence penalty). Ladder constants in `task.yaml` gain `revealWindow` + `nonRevealPenalty`. +3. Seed `datasetCommit.privateFraction` with known-fail canaries; rotate the private fraction per round for repeatable bounty types. +4. Reputation gains weighted by disagreement/difficulty — unanimous easy agreement earns ~0; correct minority positions earn most. + +**v2 (design before cross-party):** +5. Disagreement-triggered escalation: if revealed scores straddle the tolerance band, re-run with a larger fresh panel (2k+1), poster pre-approves escalation budget cap at post time. Note: weaker than Kleros's version (no loser-deposit redistribution funds it) — escalation cost falls on the eval budget. +6. Quantify the bribery floor in OBOL: discounted value of a Full-tier seat's future income stream is our analog of Kleros's O(N²) lockup. Model it; if corrupting ⌈k/2⌉+1 medians costs less than plausible bounty values, raise k or value caps. +7. Drand-style time-lock reveals when cross-party (committer-controlled reveals are an exploit vector even with penalties). + +**Open questions carried forward:** +- OBOL-denominated value of a Full-tier reputation stream (the no-stake bribery floor) — unquantified. +- Is device attestation an adequate sybil bound? No production precedent exists. +- Which task-type registry entries have static-enough ground truth that commit-reveal is structurally insufficient → what rotation cadence makes copying unprofitable? +- How Truebit/Gensyn/Numerai/Chainlink handle non-deterministic verification — didn't survive this round's verification; re-research before the verifiable-compute task type ships.