diff --git a/README.md b/README.md index cbd9670b..4d9a7398 100644 --- a/README.md +++ b/README.md @@ -47,7 +47,7 @@ This starts the registry at [`localhost:8080`](http://localhost:8080) with Postg **Note:** The registry uses [ko](https://ko.build) to build container images. The `make dev-compose` command automatically builds the registry image with ko and loads it into your local Docker daemon before starting the services. -By default, the registry seeds from the production API with a filtered subset of servers (to keep startup fast). This ensures your local environment mirrors production behavior and all seed data passes validation. For offline development you can seed from a file without validation with `MCP_REGISTRY_SEED_FROM=data/seed.json MCP_REGISTRY_ENABLE_REGISTRY_VALIDATION=false make dev-compose`. +By default, the registry seeds from the production API with a filtered subset of servers (to keep startup fast). This ensures your local environment mirrors production behavior and all seed data passes validation. For offline development you can seed from a file without validation with `MCP_REGISTRY_SEED_FROM=data/seed.json MCP_REGISTRY_ENABLE_REGISTRY_VALIDATION=false MCP_REGISTRY_ENABLE_REPOSITORY_REACHABILITY_CHECK=false make dev-compose`. The setup can be configured with environment variables in [docker-compose.yml](./docker-compose.yml) - see [.env.example](./.env.example) for a reference. diff --git a/docker-compose.yml b/docker-compose.yml index 7eb71c25..725e9e63 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -23,6 +23,7 @@ services: - MCP_REGISTRY_OIDC_EDIT_PERMISSIONS=${MCP_REGISTRY_OIDC_EDIT_PERMISSIONS:-*} - MCP_REGISTRY_OIDC_PUBLISH_PERMISSIONS=${MCP_REGISTRY_OIDC_PUBLISH_PERMISSIONS:-*} - MCP_REGISTRY_ENABLE_REGISTRY_VALIDATION=${MCP_REGISTRY_ENABLE_REGISTRY_VALIDATION:-true} + - MCP_REGISTRY_ENABLE_REPOSITORY_REACHABILITY_CHECK=${MCP_REGISTRY_ENABLE_REPOSITORY_REACHABILITY_CHECK:-true} ports: - 8080:8080 volumes: diff --git a/internal/api/handlers/v0/telemetry_test.go b/internal/api/handlers/v0/telemetry_test.go index 54b4ad5a..95839362 100644 --- a/internal/api/handlers/v0/telemetry_test.go +++ b/internal/api/handlers/v0/telemetry_test.go @@ -22,7 +22,12 @@ import ( ) func TestPrometheusHandler(t *testing.T) { - registryService := service.NewRegistryService(database.NewTestDB(t), config.NewConfig()) + // The repository reachability check issues a live HTTP probe; disable it so + // this telemetry test stays hermetic and does not depend on github.com + // resolving the placeholder repository URL below. + svcConfig := config.NewConfig() + svcConfig.EnableRepositoryReachabilityCheck = false + registryService := service.NewRegistryService(database.NewTestDB(t), svcConfig) server, err := registryService.CreateServer(context.Background(), &apiv0.ServerJSON{ Schema: model.CurrentSchemaURL, Name: "io.github.example/test-server", diff --git a/internal/config/config.go b/internal/config/config.go index a15a67a1..30f32a9f 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -16,6 +16,9 @@ type Config struct { JWTPrivateKey string `env:"JWT_PRIVATE_KEY" envDefault:""` EnableAnonymousAuth bool `env:"ENABLE_ANONYMOUS_AUTH" envDefault:"false"` EnableRegistryValidation bool `env:"ENABLE_REGISTRY_VALIDATION" envDefault:"true"` + // EnableRepositoryReachabilityCheck probes the publisher-supplied repository.url + // at publish time and rejects 4xx/5xx. Disable for offline development. + EnableRepositoryReachabilityCheck bool `env:"ENABLE_REPOSITORY_REACHABILITY_CHECK" envDefault:"true"` GitHubOIDCAudience string `env:"GITHUB_OIDC_AUDIENCE" envDefault:""` diff --git a/internal/validators/export_test.go b/internal/validators/export_test.go new file mode 100644 index 00000000..7b00fb84 --- /dev/null +++ b/internal/validators/export_test.go @@ -0,0 +1,22 @@ +package validators + +// Test-only re-exports of internal helpers used by repository_reachability_test.go +// in the external validators_test package. + +import ( + "context" + "net/http" + "time" +) + +// ProbeRepositoryReachableForTest exposes the internal probe with an injectable +// client so tests can drive timeouts and redirect caps deterministically without +// hitting the package-level default client. +func ProbeRepositoryReachableForTest(ctx context.Context, client *http.Client, repoURL string) error { + return probeRepositoryReachable(ctx, client, repoURL) +} + +// NewRepoReachabilityClientForTest exposes the test-tunable client constructor. +func NewRepoReachabilityClientForTest(timeout time.Duration, maxHops int) *http.Client { + return newRepoReachabilityClient(timeout, maxHops) +} diff --git a/internal/validators/repository_reachability.go b/internal/validators/repository_reachability.go new file mode 100644 index 00000000..937669c4 --- /dev/null +++ b/internal/validators/repository_reachability.go @@ -0,0 +1,68 @@ +package validators + +import ( + "context" + "errors" + "fmt" + "net/http" + "time" +) + +// ErrRepositoryURLNotReachable is returned when a publisher-supplied repository +// URL fails a publish-time reachability probe (404, 5xx, network failure, +// timeout, or excessive redirect chain). +var ErrRepositoryURLNotReachable = errors.New("repository URL is not publicly reachable") + +const ( + repoReachabilityTimeout = 10 * time.Second + repoReachabilityMaxHops = 5 + repoReachabilityUserAgent = "MCP-Registry-Validator/1.0" +) + +// defaultRepoReachabilityClient is reused across publishes so connections to +// github.com / gitlab.com can be pooled. +var defaultRepoReachabilityClient = newRepoReachabilityClient(repoReachabilityTimeout, repoReachabilityMaxHops) + +func newRepoReachabilityClient(timeout time.Duration, maxHops int) *http.Client { + return &http.Client{ + Timeout: timeout, + CheckRedirect: func(_ *http.Request, via []*http.Request) error { + if len(via) >= maxHops { + return fmt.Errorf("stopped after %d redirects", maxHops) + } + return nil + }, + } +} + +// ProbeRepositoryReachable issues an HTTP HEAD against repoURL to confirm the +// repository is publicly reachable. Returns nil on 2xx/3xx responses, and an +// error wrapping ErrRepositoryURLNotReachable for 4xx/5xx, network failures, +// timeouts, or redirect chains exceeding the cap. Callers should skip the +// probe when the repository field is unset or reachability checks are +// disabled in config. +func ProbeRepositoryReachable(ctx context.Context, repoURL string) error { + return probeRepositoryReachable(ctx, defaultRepoReachabilityClient, repoURL) +} + +func probeRepositoryReachable(ctx context.Context, client *http.Client, repoURL string) error { + req, err := http.NewRequestWithContext(ctx, http.MethodHead, repoURL, nil) + if err != nil { + return fmt.Errorf("%w: %s: %w", ErrRepositoryURLNotReachable, repoURL, err) + } + req.Header.Set("User-Agent", repoReachabilityUserAgent) + + resp, err := client.Do(req) + if err != nil { + return fmt.Errorf("%w: %s: %w", ErrRepositoryURLNotReachable, repoURL, err) + } + defer resp.Body.Close() + + // Accept 2xx and 3xx (matches the spec's "200, 301, 302" with headroom for + // 304/307/308 that GitHub/GitLab also legitimately return). 4xx and 5xx + // are the failure modes the original PGA-golf bug reproduced as 404. + if resp.StatusCode >= 200 && resp.StatusCode < 400 { + return nil + } + return fmt.Errorf("%w: %s (status %d)", ErrRepositoryURLNotReachable, repoURL, resp.StatusCode) +} diff --git a/internal/validators/repository_reachability_test.go b/internal/validators/repository_reachability_test.go new file mode 100644 index 00000000..1eaf4cb6 --- /dev/null +++ b/internal/validators/repository_reachability_test.go @@ -0,0 +1,161 @@ +package validators_test + +import ( + "context" + "errors" + "fmt" + "net/http" + "net/http/httptest" + "strings" + "sync/atomic" + "testing" + "time" + + "github.com/modelcontextprotocol/registry/internal/validators" +) + +func newTestClient(timeout time.Duration, maxHops int) *http.Client { + return validators.NewRepoReachabilityClientForTest(timeout, maxHops) +} + +func probe(ctx context.Context, client *http.Client, url string) error { + return validators.ProbeRepositoryReachableForTest(ctx, client, url) +} + +func TestProbeRepositoryReachable_OK(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusOK) + })) + defer srv.Close() + + if err := probe(context.Background(), newTestClient(2*time.Second, 5), srv.URL); err != nil { + t.Fatalf("expected nil for 200, got %v", err) + } +} + +func TestProbeRepositoryReachable_FollowsRedirectThenAccepts(t *testing.T) { + // First server returns 301 → second server returns 200. Default Go transport + // follows the redirect; we should accept the resulting 200. + final := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusOK) + })) + defer final.Close() + + redirector := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + http.Redirect(w, r, final.URL, http.StatusMovedPermanently) + })) + defer redirector.Close() + + if err := probe(context.Background(), newTestClient(2*time.Second, 5), redirector.URL); err != nil { + t.Fatalf("expected nil for 301→200, got %v", err) + } +} + +func TestProbeRepositoryReachable_404(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusNotFound) + })) + defer srv.Close() + + err := probe(context.Background(), newTestClient(2*time.Second, 5), srv.URL) + if err == nil { + t.Fatal("expected error for 404") + } + if !errors.Is(err, validators.ErrRepositoryURLNotReachable) { + t.Fatalf("expected wrapped ErrRepositoryURLNotReachable, got %v", err) + } + if !strings.Contains(err.Error(), "404") { + t.Fatalf("expected error to include status 404, got %q", err.Error()) + } + if !strings.Contains(err.Error(), srv.URL) { + t.Fatalf("expected error to include URL %q, got %q", srv.URL, err.Error()) + } +} + +func TestProbeRepositoryReachable_410And5xx(t *testing.T) { + cases := []int{http.StatusGone, http.StatusInternalServerError, http.StatusBadGateway, http.StatusServiceUnavailable} + for _, status := range cases { + t.Run(http.StatusText(status), func(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(status) + })) + defer srv.Close() + + err := probe(context.Background(), newTestClient(2*time.Second, 5), srv.URL) + if err == nil { + t.Fatalf("expected error for status %d", status) + } + if !strings.Contains(err.Error(), fmt.Sprintf("%d", status)) { + t.Fatalf("expected error to include status %d, got %q", status, err.Error()) + } + }) + } +} + +func TestProbeRepositoryReachable_Timeout(t *testing.T) { + // Server holds the connection longer than the client timeout. + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + time.Sleep(300 * time.Millisecond) + w.WriteHeader(http.StatusOK) + })) + defer srv.Close() + + err := probe(context.Background(), newTestClient(50*time.Millisecond, 5), srv.URL) + if err == nil { + t.Fatal("expected timeout error") + } + if !errors.Is(err, validators.ErrRepositoryURLNotReachable) { + t.Fatalf("expected wrapped ErrRepositoryURLNotReachable, got %v", err) + } +} + +func TestProbeRepositoryReachable_RedirectChainLimit(t *testing.T) { + // Endless redirect server — must trip the redirect cap and surface a + // reachability error rather than spinning until timeout. + var hits atomic.Int32 + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + hits.Add(1) + http.Redirect(w, r, "/next", http.StatusMovedPermanently) + })) + defer srv.Close() + + err := probe(context.Background(), newTestClient(2*time.Second, 3), srv.URL) + if err == nil { + t.Fatal("expected error for excessive redirect chain") + } + if !errors.Is(err, validators.ErrRepositoryURLNotReachable) { + t.Fatalf("expected wrapped ErrRepositoryURLNotReachable, got %v", err) + } + // Net/http counts the initial request hop, then invokes CheckRedirect on + // each redirect: with cap=3 we expect ~3 hits before bailing. + if got := hits.Load(); got > 5 { + t.Fatalf("expected redirect chain to be capped, got %d hits", got) + } +} + +func TestProbeRepositoryReachable_InvalidURL(t *testing.T) { + err := probe(context.Background(), newTestClient(2*time.Second, 5), "://not-a-url") + if err == nil { + t.Fatal("expected error for malformed URL") + } + if !errors.Is(err, validators.ErrRepositoryURLNotReachable) { + t.Fatalf("expected wrapped ErrRepositoryURLNotReachable, got %v", err) + } +} + +func TestProbeRepositoryReachable_NetworkError(t *testing.T) { + // Bind a listener and immediately close it to get a guaranteed-unreachable URL. + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusOK) + })) + url := srv.URL + srv.Close() + + err := probe(context.Background(), newTestClient(2*time.Second, 5), url) + if err == nil { + t.Fatal("expected error for unreachable host") + } + if !errors.Is(err, validators.ErrRepositoryURLNotReachable) { + t.Fatalf("expected wrapped ErrRepositoryURLNotReachable, got %v", err) + } +} diff --git a/internal/validators/validators.go b/internal/validators/validators.go index 114ed3c7..16b29e10 100644 --- a/internal/validators/validators.go +++ b/internal/validators/validators.go @@ -647,6 +647,15 @@ func ValidatePublishRequest(ctx context.Context, req apiv0.ServerJSON, cfg *conf return err } + // Probe publisher-supplied repository URL for public reachability so 404s + // like https://github.com/pgahq/mcp-pga-com (issue #395) are caught at + // publish time instead of leaking into production listings. + if cfg.EnableRepositoryReachabilityCheck { + if err := validateRepositoryReachability(ctx, req); err != nil { + return err + } + } + // Validate registry ownership for all packages if validation is enabled if cfg.EnableRegistryValidation { if err := validateRegistryOwnership(ctx, req); err != nil { @@ -669,6 +678,16 @@ func ValidateUpdateRequest(ctx context.Context, req apiv0.ServerJSON, cfg *confi return nil } +func validateRepositoryReachability(ctx context.Context, req apiv0.ServerJSON) error { + if req.Repository == nil || req.Repository.URL == "" { + return nil + } + if err := ProbeRepositoryReachable(ctx, req.Repository.URL); err != nil { + return fmt.Errorf("repository reachability check failed: %w", err) + } + return nil +} + func validateRegistryOwnership(ctx context.Context, req apiv0.ServerJSON) error { for i, pkg := range req.Packages { if err := ValidatePackage(ctx, pkg, req.Name); err != nil { diff --git a/tests/integration/docker-compose.integration-test.yml b/tests/integration/docker-compose.integration-test.yml index 6aa47e7a..502324e3 100644 --- a/tests/integration/docker-compose.integration-test.yml +++ b/tests/integration/docker-compose.integration-test.yml @@ -4,6 +4,7 @@ services: environment: - MCP_REGISTRY_SEED_FROM= - MCP_REGISTRY_ENABLE_REGISTRY_VALIDATION=false + - MCP_REGISTRY_ENABLE_REPOSITORY_REACHABILITY_CHECK=false healthcheck: test: ["CMD", "wget", "-qO-", "http://localhost:8080/v0/servers"] interval: 1s