From 3ab28465e3919b7bb5c302042a74cb693b821216 Mon Sep 17 00:00:00 2001 From: Andrew Cunliffe Date: Thu, 7 May 2026 16:58:03 -0700 Subject: [PATCH 1/2] feat(validation): probe repository URL reachability at publish (#395) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Issue #395 surfaced a published server (com.pga/pga-golf) whose repository.url was a 404 — the registry had accepted it because semantic validation only verifies URL shape, not reachability. Add a publish-time HTTP HEAD probe that follows up to 5 redirects and rejects 4xx/5xx responses with the URL and status code in the error message. Reuses the prior-art HEAD-probe pattern from internal/validators/registries/mcpb.go. The probe is gated behind a new MCP_REGISTRY_ENABLE_REPOSITORY_REACHABILITY_CHECK config flag (default true; set to false for offline dev), kept independent of MCP_REGISTRY_ENABLE_REGISTRY_VALIDATION so operators can disable just this check. Tests cover 200, 301→200, 404, 410, 5xx, timeout, redirect-chain limit, malformed URL, and unreachable host using net/http/httptest. --- README.md | 2 +- docker-compose.yml | 1 + internal/config/config.go | 3 + internal/validators/export_test.go | 22 +++ .../validators/repository_reachability.go | 68 ++++++++ .../repository_reachability_test.go | 161 ++++++++++++++++++ internal/validators/validators.go | 19 +++ .../docker-compose.integration-test.yml | 1 + 8 files changed, 276 insertions(+), 1 deletion(-) create mode 100644 internal/validators/export_test.go create mode 100644 internal/validators/repository_reachability.go create mode 100644 internal/validators/repository_reachability_test.go diff --git a/README.md b/README.md index cbd9670b5..4d9a7398c 100644 --- a/README.md +++ b/README.md @@ -47,7 +47,7 @@ This starts the registry at [`localhost:8080`](http://localhost:8080) with Postg **Note:** The registry uses [ko](https://ko.build) to build container images. The `make dev-compose` command automatically builds the registry image with ko and loads it into your local Docker daemon before starting the services. -By default, the registry seeds from the production API with a filtered subset of servers (to keep startup fast). This ensures your local environment mirrors production behavior and all seed data passes validation. For offline development you can seed from a file without validation with `MCP_REGISTRY_SEED_FROM=data/seed.json MCP_REGISTRY_ENABLE_REGISTRY_VALIDATION=false make dev-compose`. +By default, the registry seeds from the production API with a filtered subset of servers (to keep startup fast). This ensures your local environment mirrors production behavior and all seed data passes validation. For offline development you can seed from a file without validation with `MCP_REGISTRY_SEED_FROM=data/seed.json MCP_REGISTRY_ENABLE_REGISTRY_VALIDATION=false MCP_REGISTRY_ENABLE_REPOSITORY_REACHABILITY_CHECK=false make dev-compose`. The setup can be configured with environment variables in [docker-compose.yml](./docker-compose.yml) - see [.env.example](./.env.example) for a reference. diff --git a/docker-compose.yml b/docker-compose.yml index 7eb71c25a..725e9e63e 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -23,6 +23,7 @@ services: - MCP_REGISTRY_OIDC_EDIT_PERMISSIONS=${MCP_REGISTRY_OIDC_EDIT_PERMISSIONS:-*} - MCP_REGISTRY_OIDC_PUBLISH_PERMISSIONS=${MCP_REGISTRY_OIDC_PUBLISH_PERMISSIONS:-*} - MCP_REGISTRY_ENABLE_REGISTRY_VALIDATION=${MCP_REGISTRY_ENABLE_REGISTRY_VALIDATION:-true} + - MCP_REGISTRY_ENABLE_REPOSITORY_REACHABILITY_CHECK=${MCP_REGISTRY_ENABLE_REPOSITORY_REACHABILITY_CHECK:-true} ports: - 8080:8080 volumes: diff --git a/internal/config/config.go b/internal/config/config.go index a15a67a11..30f32a9f2 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -16,6 +16,9 @@ type Config struct { JWTPrivateKey string `env:"JWT_PRIVATE_KEY" envDefault:""` EnableAnonymousAuth bool `env:"ENABLE_ANONYMOUS_AUTH" envDefault:"false"` EnableRegistryValidation bool `env:"ENABLE_REGISTRY_VALIDATION" envDefault:"true"` + // EnableRepositoryReachabilityCheck probes the publisher-supplied repository.url + // at publish time and rejects 4xx/5xx. Disable for offline development. + EnableRepositoryReachabilityCheck bool `env:"ENABLE_REPOSITORY_REACHABILITY_CHECK" envDefault:"true"` GitHubOIDCAudience string `env:"GITHUB_OIDC_AUDIENCE" envDefault:""` diff --git a/internal/validators/export_test.go b/internal/validators/export_test.go new file mode 100644 index 000000000..7b00fb84a --- /dev/null +++ b/internal/validators/export_test.go @@ -0,0 +1,22 @@ +package validators + +// Test-only re-exports of internal helpers used by repository_reachability_test.go +// in the external validators_test package. + +import ( + "context" + "net/http" + "time" +) + +// ProbeRepositoryReachableForTest exposes the internal probe with an injectable +// client so tests can drive timeouts and redirect caps deterministically without +// hitting the package-level default client. +func ProbeRepositoryReachableForTest(ctx context.Context, client *http.Client, repoURL string) error { + return probeRepositoryReachable(ctx, client, repoURL) +} + +// NewRepoReachabilityClientForTest exposes the test-tunable client constructor. +func NewRepoReachabilityClientForTest(timeout time.Duration, maxHops int) *http.Client { + return newRepoReachabilityClient(timeout, maxHops) +} diff --git a/internal/validators/repository_reachability.go b/internal/validators/repository_reachability.go new file mode 100644 index 000000000..937669c4e --- /dev/null +++ b/internal/validators/repository_reachability.go @@ -0,0 +1,68 @@ +package validators + +import ( + "context" + "errors" + "fmt" + "net/http" + "time" +) + +// ErrRepositoryURLNotReachable is returned when a publisher-supplied repository +// URL fails a publish-time reachability probe (404, 5xx, network failure, +// timeout, or excessive redirect chain). +var ErrRepositoryURLNotReachable = errors.New("repository URL is not publicly reachable") + +const ( + repoReachabilityTimeout = 10 * time.Second + repoReachabilityMaxHops = 5 + repoReachabilityUserAgent = "MCP-Registry-Validator/1.0" +) + +// defaultRepoReachabilityClient is reused across publishes so connections to +// github.com / gitlab.com can be pooled. +var defaultRepoReachabilityClient = newRepoReachabilityClient(repoReachabilityTimeout, repoReachabilityMaxHops) + +func newRepoReachabilityClient(timeout time.Duration, maxHops int) *http.Client { + return &http.Client{ + Timeout: timeout, + CheckRedirect: func(_ *http.Request, via []*http.Request) error { + if len(via) >= maxHops { + return fmt.Errorf("stopped after %d redirects", maxHops) + } + return nil + }, + } +} + +// ProbeRepositoryReachable issues an HTTP HEAD against repoURL to confirm the +// repository is publicly reachable. Returns nil on 2xx/3xx responses, and an +// error wrapping ErrRepositoryURLNotReachable for 4xx/5xx, network failures, +// timeouts, or redirect chains exceeding the cap. Callers should skip the +// probe when the repository field is unset or reachability checks are +// disabled in config. +func ProbeRepositoryReachable(ctx context.Context, repoURL string) error { + return probeRepositoryReachable(ctx, defaultRepoReachabilityClient, repoURL) +} + +func probeRepositoryReachable(ctx context.Context, client *http.Client, repoURL string) error { + req, err := http.NewRequestWithContext(ctx, http.MethodHead, repoURL, nil) + if err != nil { + return fmt.Errorf("%w: %s: %w", ErrRepositoryURLNotReachable, repoURL, err) + } + req.Header.Set("User-Agent", repoReachabilityUserAgent) + + resp, err := client.Do(req) + if err != nil { + return fmt.Errorf("%w: %s: %w", ErrRepositoryURLNotReachable, repoURL, err) + } + defer resp.Body.Close() + + // Accept 2xx and 3xx (matches the spec's "200, 301, 302" with headroom for + // 304/307/308 that GitHub/GitLab also legitimately return). 4xx and 5xx + // are the failure modes the original PGA-golf bug reproduced as 404. + if resp.StatusCode >= 200 && resp.StatusCode < 400 { + return nil + } + return fmt.Errorf("%w: %s (status %d)", ErrRepositoryURLNotReachable, repoURL, resp.StatusCode) +} diff --git a/internal/validators/repository_reachability_test.go b/internal/validators/repository_reachability_test.go new file mode 100644 index 000000000..1eaf4cb6c --- /dev/null +++ b/internal/validators/repository_reachability_test.go @@ -0,0 +1,161 @@ +package validators_test + +import ( + "context" + "errors" + "fmt" + "net/http" + "net/http/httptest" + "strings" + "sync/atomic" + "testing" + "time" + + "github.com/modelcontextprotocol/registry/internal/validators" +) + +func newTestClient(timeout time.Duration, maxHops int) *http.Client { + return validators.NewRepoReachabilityClientForTest(timeout, maxHops) +} + +func probe(ctx context.Context, client *http.Client, url string) error { + return validators.ProbeRepositoryReachableForTest(ctx, client, url) +} + +func TestProbeRepositoryReachable_OK(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusOK) + })) + defer srv.Close() + + if err := probe(context.Background(), newTestClient(2*time.Second, 5), srv.URL); err != nil { + t.Fatalf("expected nil for 200, got %v", err) + } +} + +func TestProbeRepositoryReachable_FollowsRedirectThenAccepts(t *testing.T) { + // First server returns 301 → second server returns 200. Default Go transport + // follows the redirect; we should accept the resulting 200. + final := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusOK) + })) + defer final.Close() + + redirector := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + http.Redirect(w, r, final.URL, http.StatusMovedPermanently) + })) + defer redirector.Close() + + if err := probe(context.Background(), newTestClient(2*time.Second, 5), redirector.URL); err != nil { + t.Fatalf("expected nil for 301→200, got %v", err) + } +} + +func TestProbeRepositoryReachable_404(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusNotFound) + })) + defer srv.Close() + + err := probe(context.Background(), newTestClient(2*time.Second, 5), srv.URL) + if err == nil { + t.Fatal("expected error for 404") + } + if !errors.Is(err, validators.ErrRepositoryURLNotReachable) { + t.Fatalf("expected wrapped ErrRepositoryURLNotReachable, got %v", err) + } + if !strings.Contains(err.Error(), "404") { + t.Fatalf("expected error to include status 404, got %q", err.Error()) + } + if !strings.Contains(err.Error(), srv.URL) { + t.Fatalf("expected error to include URL %q, got %q", srv.URL, err.Error()) + } +} + +func TestProbeRepositoryReachable_410And5xx(t *testing.T) { + cases := []int{http.StatusGone, http.StatusInternalServerError, http.StatusBadGateway, http.StatusServiceUnavailable} + for _, status := range cases { + t.Run(http.StatusText(status), func(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(status) + })) + defer srv.Close() + + err := probe(context.Background(), newTestClient(2*time.Second, 5), srv.URL) + if err == nil { + t.Fatalf("expected error for status %d", status) + } + if !strings.Contains(err.Error(), fmt.Sprintf("%d", status)) { + t.Fatalf("expected error to include status %d, got %q", status, err.Error()) + } + }) + } +} + +func TestProbeRepositoryReachable_Timeout(t *testing.T) { + // Server holds the connection longer than the client timeout. + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + time.Sleep(300 * time.Millisecond) + w.WriteHeader(http.StatusOK) + })) + defer srv.Close() + + err := probe(context.Background(), newTestClient(50*time.Millisecond, 5), srv.URL) + if err == nil { + t.Fatal("expected timeout error") + } + if !errors.Is(err, validators.ErrRepositoryURLNotReachable) { + t.Fatalf("expected wrapped ErrRepositoryURLNotReachable, got %v", err) + } +} + +func TestProbeRepositoryReachable_RedirectChainLimit(t *testing.T) { + // Endless redirect server — must trip the redirect cap and surface a + // reachability error rather than spinning until timeout. + var hits atomic.Int32 + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + hits.Add(1) + http.Redirect(w, r, "/next", http.StatusMovedPermanently) + })) + defer srv.Close() + + err := probe(context.Background(), newTestClient(2*time.Second, 3), srv.URL) + if err == nil { + t.Fatal("expected error for excessive redirect chain") + } + if !errors.Is(err, validators.ErrRepositoryURLNotReachable) { + t.Fatalf("expected wrapped ErrRepositoryURLNotReachable, got %v", err) + } + // Net/http counts the initial request hop, then invokes CheckRedirect on + // each redirect: with cap=3 we expect ~3 hits before bailing. + if got := hits.Load(); got > 5 { + t.Fatalf("expected redirect chain to be capped, got %d hits", got) + } +} + +func TestProbeRepositoryReachable_InvalidURL(t *testing.T) { + err := probe(context.Background(), newTestClient(2*time.Second, 5), "://not-a-url") + if err == nil { + t.Fatal("expected error for malformed URL") + } + if !errors.Is(err, validators.ErrRepositoryURLNotReachable) { + t.Fatalf("expected wrapped ErrRepositoryURLNotReachable, got %v", err) + } +} + +func TestProbeRepositoryReachable_NetworkError(t *testing.T) { + // Bind a listener and immediately close it to get a guaranteed-unreachable URL. + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusOK) + })) + url := srv.URL + srv.Close() + + err := probe(context.Background(), newTestClient(2*time.Second, 5), url) + if err == nil { + t.Fatal("expected error for unreachable host") + } + if !errors.Is(err, validators.ErrRepositoryURLNotReachable) { + t.Fatalf("expected wrapped ErrRepositoryURLNotReachable, got %v", err) + } +} diff --git a/internal/validators/validators.go b/internal/validators/validators.go index 114ed3c71..16b29e106 100644 --- a/internal/validators/validators.go +++ b/internal/validators/validators.go @@ -647,6 +647,15 @@ func ValidatePublishRequest(ctx context.Context, req apiv0.ServerJSON, cfg *conf return err } + // Probe publisher-supplied repository URL for public reachability so 404s + // like https://github.com/pgahq/mcp-pga-com (issue #395) are caught at + // publish time instead of leaking into production listings. + if cfg.EnableRepositoryReachabilityCheck { + if err := validateRepositoryReachability(ctx, req); err != nil { + return err + } + } + // Validate registry ownership for all packages if validation is enabled if cfg.EnableRegistryValidation { if err := validateRegistryOwnership(ctx, req); err != nil { @@ -669,6 +678,16 @@ func ValidateUpdateRequest(ctx context.Context, req apiv0.ServerJSON, cfg *confi return nil } +func validateRepositoryReachability(ctx context.Context, req apiv0.ServerJSON) error { + if req.Repository == nil || req.Repository.URL == "" { + return nil + } + if err := ProbeRepositoryReachable(ctx, req.Repository.URL); err != nil { + return fmt.Errorf("repository reachability check failed: %w", err) + } + return nil +} + func validateRegistryOwnership(ctx context.Context, req apiv0.ServerJSON) error { for i, pkg := range req.Packages { if err := ValidatePackage(ctx, pkg, req.Name); err != nil { diff --git a/tests/integration/docker-compose.integration-test.yml b/tests/integration/docker-compose.integration-test.yml index 6aa47e7a4..502324e36 100644 --- a/tests/integration/docker-compose.integration-test.yml +++ b/tests/integration/docker-compose.integration-test.yml @@ -4,6 +4,7 @@ services: environment: - MCP_REGISTRY_SEED_FROM= - MCP_REGISTRY_ENABLE_REGISTRY_VALIDATION=false + - MCP_REGISTRY_ENABLE_REPOSITORY_REACHABILITY_CHECK=false healthcheck: test: ["CMD", "wget", "-qO-", "http://localhost:8080/v0/servers"] interval: 1s From 0a2336f43556046ec2bc097dc502220e441ec61a Mon Sep 17 00:00:00 2001 From: Andrew Cunliffe Date: Mon, 25 May 2026 18:13:33 -0700 Subject: [PATCH 2/2] test(telemetry): disable repo reachability probe in TestPrometheusHandler TestPrometheusHandler builds its registry service with config.NewConfig(), which enables the repository reachability check by default, and seeds a fixture server whose repository.url (https://github.com/example/test-server) is a non-resolving placeholder. The publish-time probe therefore made a live HTTP call that 404'd, failing CreateServer and panicking on the nil result. Disable EnableRepositoryReachabilityCheck for this test's service config so it stays hermetic, matching the existing convention of opting out of network- dependent validation (EnableRegistryValidation) in unit tests. --- internal/api/handlers/v0/telemetry_test.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/internal/api/handlers/v0/telemetry_test.go b/internal/api/handlers/v0/telemetry_test.go index 54b4ad5a3..95839362d 100644 --- a/internal/api/handlers/v0/telemetry_test.go +++ b/internal/api/handlers/v0/telemetry_test.go @@ -22,7 +22,12 @@ import ( ) func TestPrometheusHandler(t *testing.T) { - registryService := service.NewRegistryService(database.NewTestDB(t), config.NewConfig()) + // The repository reachability check issues a live HTTP probe; disable it so + // this telemetry test stays hermetic and does not depend on github.com + // resolving the placeholder repository URL below. + svcConfig := config.NewConfig() + svcConfig.EnableRepositoryReachabilityCheck = false + registryService := service.NewRegistryService(database.NewTestDB(t), svcConfig) server, err := registryService.CreateServer(context.Background(), &apiv0.ServerJSON{ Schema: model.CurrentSchemaURL, Name: "io.github.example/test-server",