From 838587aed7e9259f52bfc2675762d9ccef306730 Mon Sep 17 00:00:00 2001 From: Frederic BIDON Date: Wed, 10 Jun 2026 14:47:23 +0200 Subject: [PATCH] feat(spec): warn on dubious $ref locations Adds a warning-only check (validateDubiousRefs) over the unexpanded spec's $refs, to flag location patterns that may indicate an unsafe or adversarial spec when running spec validation: - Rule 1: an absolute local $ref (file://, Unix /abs, Windows C:\, UNC) is flagged when it escapes the spec's base path. Absolute refs that stay beneath the base path are legitimate (flatten/expand introduces such anchors for cyclical $refs) and are not flagged. Relative and fragment-only refs are exempt. - Rule 2: when remote (http/https or protocol-relative) $refs resolve to two or more distinct hosts, a single aggregate warning lists them. A single consistent remote host is not flagged. Findings are warnings only and do not affect validity. The check inspects refs as authored via analyzer.AllRefs(), before expansion. The go-openapi/spec path normalization helpers are unexported, so a minimal slash/drive-letter normalize and a lexical "beneath base" containment check are replicated locally (cleanRefPath, isBeneathBase, localBaseDir). Windows drive paths need care so the "beneath base" comparison holds across every form they can be authored in. All of: bare C:\ / C:/, the canonical file:///C:/ URL, and the (invalid but tolerated) hybrid file://C:/ form (whose drive letter parses into the URL host) normalize to the same drive path with no leading slash, matching the base derived from SpecFilePath. Co-Authored-By: Claude Opus 4.8 (1M context) Signed-off-by: Frederic BIDON --- spec.go | 3 +- spec_messages.go | 20 +++ spec_ref_warnings.go | 209 ++++++++++++++++++++++++++++ spec_ref_warnings_test.go | 286 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 517 insertions(+), 1 deletion(-) create mode 100644 spec_ref_warnings.go create mode 100644 spec_ref_warnings_test.go diff --git a/spec.go b/spec.go index b85432f..21b5ec8 100644 --- a/spec.go +++ b/spec.go @@ -146,7 +146,8 @@ func (s *SpecValidator) Validate(data any) (*Result, *Result) { errs.Merge(s.validateNonEmptyPathParamNames()) // errs.Merge(s.validateRefNoSibling()) // warning only - errs.Merge(s.validateReferenced()) // warning only + errs.Merge(s.validateReferenced()) // warning only + errs.Merge(s.validateDubiousRefs()) // warning only return errs, warnings } diff --git a/spec_messages.go b/spec_messages.go index 42ce360..eeb8a86 100644 --- a/spec_messages.go +++ b/spec_messages.go @@ -177,6 +177,18 @@ const ( // UnusedResponseWarning ... UnusedResponseWarning = "response %q is not used anywhere" + // DubiousAbsoluteRefWarning flags a $ref pointing to an absolute local file location that escapes the + // spec's base path. Absolute local references are legitimate when they stay beneath the base path + // (flattening/expansion introduces such anchors for cyclical $refs), but an absolute reference that + // escapes the base path - or a file:// reference in a spec with no known base - may indicate an + // unsafe or adversarial spec. + DubiousAbsoluteRefWarning = "$ref %q points to an absolute or local file location that escapes the spec's base path: this may be unsafe with adversarial specs" + + // DubiousMultipleHostsWarning flags a spec whose remote $refs resolve to several distinct hosts. + // A single consistent remote host is common and legitimate; references spread across multiple hosts + // may indicate an unsafe or adversarial spec. + DubiousMultipleHostsWarning = "$ref values point to %d distinct remote hosts (%s): a spec referencing multiple hosts may be unsafe" + InvalidObject = "expected an object in %q.%s" ) @@ -404,3 +416,11 @@ func someParametersBrokenMsg(path, method, operationID string) errors.Error { func refShouldNotHaveSiblingsMsg(path, operationID string) errors.Error { return errors.New(errors.CompositeErrorCode, RefShouldNotHaveSiblingsWarning, operationID, path) } + +func dubiousAbsoluteRefMsg(ref string) errors.Error { + return errors.New(errors.CompositeErrorCode, DubiousAbsoluteRefWarning, ref) +} + +func dubiousMultipleHostsMsg(count int, hosts string) errors.Error { + return errors.New(errors.CompositeErrorCode, DubiousMultipleHostsWarning, count, hosts) +} diff --git a/spec_ref_warnings.go b/spec_ref_warnings.go new file mode 100644 index 0000000..49c7231 --- /dev/null +++ b/spec_ref_warnings.go @@ -0,0 +1,209 @@ +// SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers +// SPDX-License-Identifier: Apache-2.0 + +package validate + +import ( + "net/url" + "path" + "sort" + "strings" + + "github.com/go-openapi/spec" +) + +// minDistinctHostsToWarn is the number of distinct remote hosts among $refs at or above which +// Rule 2 emits a host-spread warning. A single consistent remote host is legitimate. +const minDistinctHostsToWarn = 2 + +// validateDubiousRefs emits warnings (never errors) when $ref locations match patterns +// that may indicate an unsafe or adversarial spec. It inspects refs as authored, on the +// UNEXPANDED spec, so it must run before expansion flattens them away. +// +// Two rules are applied over s.analyzer.AllRefs(): +// +// - Rule 1 (absolute local escape): a $ref pointing to an absolute local file location +// (file:// scheme, a Unix absolute path, or a Windows drive path such as C:\) is dubious +// UNLESS it stays beneath the spec's base path. Absolute refs beneath the base are +// legitimate: flattening/expansion in go-openapi/spec and analysis introduces absolute +// anchors to resolve cyclical $refs. Relative and fragment-only refs are always exempt. +// +// - Rule 2 (host spread): when remote (http/https, or protocol-relative) refs resolve to +// two or more distinct hosts, a single aggregate warning lists them. A single consistent +// remote host is common and legitimate, so it is not flagged. +// +// All findings are warnings: they do not affect validity (see Result.IsValid). +func (s *SpecValidator) validateDubiousRefs() *Result { + res := pools.poolOfResults.BorrowResult() + + baseDir, hasBase := s.localBaseDir() + + remoteHosts := make(map[string]struct{}) + for _, r := range s.analyzer.AllRefs() { + u := r.GetURL() + if u == nil { // Safeguard: a valid spec always yields parseable refs + continue + } + + // Rule 1: absolute local reference escaping the base path. + if refPath, isLocalAbs := absoluteLocalRefPath(r, u); isLocalAbs { + if !hasBase || !isBeneathBase(refPath, baseDir) { + res.AddWarnings(dubiousAbsoluteRefMsg(r.String())) + } + continue + } + + // Rule 2: gather remote hosts (http/https and protocol-relative //host/...). + if host := remoteRefHost(u); host != "" { + remoteHosts[host] = struct{}{} + } + } + + if len(remoteHosts) >= minDistinctHostsToWarn { + hosts := make([]string, 0, len(remoteHosts)) + for h := range remoteHosts { + hosts = append(hosts, h) + } + sort.Strings(hosts) + res.AddWarnings(dubiousMultipleHostsMsg(len(hosts), strings.Join(hosts, ", "))) + } + + return res +} + +// absoluteLocalRefPath reports whether r is an absolute LOCAL file reference and, if so, +// returns the cleaned path it points to (without scheme/fragment, drive letter lower-cased). +// +// Classification order matters (see the empirical jsonreference flag behavior): +// - file:// scheme is local, including UNC file://host/share (inherently dubious). +// - a non-empty Host with no file scheme means remote (http/https or protocol-relative +// //host/path) - NOT local; handled by Rule 2. This must be checked before the Unix +// branch, because protocol-relative refs also set HasFullFilePath. +// - len(u.Scheme) == 1 is a Windows drive path (C:\ or C:/), whose drive+path land in +// Scheme/Opaque/Path rather than Path. Checked before the Unix branch because C:/x also +// sets HasFullFilePath, and reconstructed from the authored ref string to keep the drive. +// - !r.HasFullURL && r.HasFullFilePath is a plain Unix absolute path (/abs/models.json). +// +// Relative (./x.json) and fragment-only (#/definitions/X) refs return false. +func absoluteLocalRefPath(r spec.Ref, u *url.URL) (string, bool) { + switch { + case r.HasFileScheme: + return fileRefPath(u), true + case u.Host != "": + // Remote (http/https) or protocol-relative //host/path: handled by Rule 2. + return "", false + case len(u.Scheme) == 1: + // Windows drive letter: reconstruct from the authored ref string. + return cleanRefPath(r.String()), true + case !r.HasFullURL && r.HasFullFilePath: + return cleanRefPath(u.Path), true + default: + return "", false + } +} + +// remoteRefHost returns the host of a remote reference (http/https), or of a protocol-relative +// reference (//host/path). It returns "" for local and fragment-only refs. file:// hosts (UNC) +// are deliberately excluded: those are handled as local-absolute refs by Rule 1. +func remoteRefHost(u *url.URL) string { + switch u.Scheme { + case "http", "https": + return u.Host + case "": + // Protocol-relative //host/path: empty scheme but a host is present. + return u.Host + default: + return "" + } +} + +// localBaseDir returns the directory of the spec file, slash-normalized, when the spec was +// loaded from a local path. It returns ok=false when the base is unknown (in-memory spec) or +// remote (http/https), in which case absolute-local refs cannot be proven beneath a base and +// are treated as dubious. +func (s *SpecValidator) localBaseDir() (string, bool) { + specPath := s.spec.SpecFilePath() + if specPath == "" { + return "", false + } + + // Strip a file:// scheme if present; reject remote bases. + if u, err := url.Parse(specPath); err == nil && u.Scheme != "" { + switch { + case u.Scheme == "file": + specPath = u.Path + case len(u.Scheme) == 1: // Windows drive letter, treat as local + // keep specPath as-is (authored path) + default: // http, https, ... : no local base + return "", false + } + } + + return path.Dir(cleanRefPath(specPath)), true +} + +// isBeneathBase reports whether the cleaned target path is located within baseDir, i.e. it does +// not escape baseDir via "..". Comparison is purely lexical on cleaned paths, which is sufficient +// (and cross-platform safe) for a non-fatal warning. Both sides are expected to already be +// cleanRefPath-normalized (slashes, drive-letter case). +func isBeneathBase(target, baseDir string) bool { + if baseDir == "" { + return false + } + if target == baseDir { + return true + } + if !strings.HasSuffix(baseDir, "/") { + baseDir += "/" + } + return strings.HasPrefix(target, baseDir) +} + +// fileRefPath extracts the local path a file:// reference points to, accounting for the way +// Windows file URLs parse: +// - file:///abs/x -> /abs/x (empty host) +// - file:///C:/dir/x -> /c:/dir/x (empty host; drive sits in the path) +// - file://D:/a/x -> d:/a/x (drive letter lands in Host, rejoin it) +// - file://host/share -> /host/share/x (real UNC host kept visible so it cannot match a +// local base and stays flagged as dubious) +func fileRefPath(u *url.URL) string { + switch { + case u.Host == "": + return cleanRefPath(u.Path) + case isDriveHost(u.Host): + // Windows path authored as file://D:/... : the drive landed in Host (e.g. "d:"). + return cleanRefPath(u.Host + u.Path) + default: + // Real remote/UNC host: keep it in the path so it never matches a local base. + return cleanRefPath("//" + u.Host + u.Path) + } +} + +// isDriveHost reports whether a URL host is actually a Windows drive letter (e.g. "d:"), which +// happens when a Windows path is authored as a two-slash file URL: file://D:/path. +func isDriveHost(host string) bool { + h := strings.TrimSuffix(host, ":") + if len(h) != 1 { + return false + } + c := h[0] + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') +} + +// cleanRefPath normalizes a ref or base path for lexical comparison: backslashes to forward +// slashes, path.Clean, and a lower-cased leading Windows drive letter (matching the behavior of +// go-openapi/spec's normalizer). Plain Unix paths are unaffected, preserving case-sensitivity. +func cleanRefPath(p string) string { + p = path.Clean(strings.ReplaceAll(p, `\`, `/`)) + switch { + case len(p) >= 2 && p[1] == ':': + // drive-letter form: C:/dir -> c:/dir + p = strings.ToLower(p[:1]) + p[1:] + case len(p) >= 3 && p[0] == '/' && p[2] == ':': + // slash-prefixed drive form from canonical file:// URLs: /C:/dir -> c:/dir. + // The leading slash is dropped so this matches the base path derived from + // SpecFilePath (which has no leading slash), and the bare-drive form. + p = strings.ToLower(p[1:2]) + p[2:] + } + return p +} diff --git a/spec_ref_warnings_test.go b/spec_ref_warnings_test.go new file mode 100644 index 0000000..f013c49 --- /dev/null +++ b/spec_ref_warnings_test.go @@ -0,0 +1,286 @@ +// SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers +// SPDX-License-Identifier: Apache-2.0 + +package validate + +import ( + "encoding/json" + "fmt" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/go-openapi/analysis" + "github.com/go-openapi/loads" + "github.com/go-openapi/spec" + "github.com/go-openapi/strfmt" + "github.com/go-openapi/testify/v2/assert" + "github.com/go-openapi/testify/v2/require" +) + +const ( + testUnixAbsRef = "/abs/models.json" + testWinDrivePath = "c:/dir/x.json" + testBeneathBaseID = "/base" +) + +func TestAbsoluteLocalRefPath(t *testing.T) { + tests := []struct { + ref string + wantPath string + wantOK bool + }{ + {"#/definitions/Pet", "", false}, // fragment-only + {"./models.json#/definitions/Model", "", false}, // relative + {"models.json", "", false}, // relative, no leading dot + {testUnixAbsRef, testUnixAbsRef, true}, // unix absolute + {"file://" + testUnixAbsRef, testUnixAbsRef, true}, + {"file:///D:/a/x.json", "d:/a/x.json", true}, // windows drive, canonical 3-slash file URL + {"file://D:/a/x.json", "d:/a/x.json", true}, // windows drive lands in URL host (invalid hybrid) + {"file://host/share/x.json", "/host/share/x.json", true}, // UNC: host kept, local-dubious + {`C:\dir\x.json`, testWinDrivePath, true}, // windows drive, backslashes + {"C:/dir/x.json", testWinDrivePath, true}, // windows drive, forward slashes + {"http://example.com/a.json#/x", "", false}, // remote + {"https://other.com/b.json", "", false}, // remote + {"//proto.relative/c.json", "", false}, // protocol-relative => remote, not local + } + for _, tc := range tests { + t.Run(tc.ref, func(t *testing.T) { + r := spec.MustCreateRef(tc.ref) + got, ok := absoluteLocalRefPath(r, r.GetURL()) + assert.Equal(t, tc.wantOK, ok) + assert.Equal(t, tc.wantPath, got) + }) + } +} + +// TestWindowsDriveRefFormsConverge guards the Windows nuance that cannot be exercised on a +// non-Windows runner: the valid bare-drive (C:\, C:/) and canonical file:///C:/ forms, plus the +// invalid-but-tolerated hybrid file://C:/ form (drive lands in the URL host), must all normalize +// to the SAME drive path - and that path must compare correctly against a base derived from +// SpecFilePath (which carries no leading slash). Regression guard for the leading-slash mismatch +// that previously made a legit canonical ref beneath the base spuriously warn. +func TestWindowsDriveRefFormsConverge(t *testing.T) { + const base = "c:/specdir" + const want = "c:/specdir/sub/models.json" + for _, ref := range []string{ + "file:///C:/specdir/sub/models.json", // canonical 3-slash file URL (valid) + "file://C:/specdir/sub/models.json", // hybrid 2-slash, drive in host (invalid, tolerated) + "C:/specdir/sub/models.json", // bare drive, forward slashes (valid) + `C:\specdir\sub\models.json`, // bare drive, backslashes (valid) + } { + t.Run(ref, func(t *testing.T) { + r := spec.MustCreateRef(ref) + got, ok := absoluteLocalRefPath(r, r.GetURL()) + require.True(t, ok) + assert.Equal(t, want, got, "all drive forms must normalize identically") + assert.True(t, isBeneathBase(got, base), "%q should be beneath %q", got, base) + }) + } +} + +func TestRemoteRefHost(t *testing.T) { + tests := []struct { + ref string + wantHost string + }{ + {"http://example.com/a.json", "example.com"}, + {"https://other.com/b.json", "other.com"}, + {"//proto.relative/c.json", "proto.relative"}, + {"/abs/models.json", ""}, + {"#/definitions/Pet", ""}, + {"file:///abs/models.json", ""}, // file scheme is local, not a remote host + } + for _, tc := range tests { + t.Run(tc.ref, func(t *testing.T) { + r := spec.MustCreateRef(tc.ref) + assert.Equal(t, tc.wantHost, remoteRefHost(r.GetURL())) + }) + } +} + +func TestCleanRefPath(t *testing.T) { + tests := []struct{ in, want string }{ + {"/abs/x.json", "/abs/x.json"}, + {`C:\dir\x.json`, testWinDrivePath}, + {testWinDrivePath, testWinDrivePath}, + {"/C:/dir/x.json", "c:/dir/x.json"}, // canonical file:// URL drive: leading slash dropped + {"/abs/../y.json", "/y.json"}, + {"/Case/Sensitive", "/Case/Sensitive"}, // unix case preserved + } + for _, tc := range tests { + t.Run(tc.in, func(t *testing.T) { + assert.Equal(t, tc.want, cleanRefPath(tc.in)) + }) + } +} + +func TestIsBeneathBase(t *testing.T) { + tests := []struct { + target, base string + want bool + }{ + {testBeneathBaseID + "/sub/x.json", testBeneathBaseID, true}, + {testBeneathBaseID, testBeneathBaseID, true}, + {testBeneathBaseID + "/x.json", testBeneathBaseID, true}, + {"/other/x.json", testBeneathBaseID, false}, + {"/baseball/x.json", testBeneathBaseID, false}, // prefix but not a path boundary + {"/x.json", testBeneathBaseID, false}, + {testWinDrivePath, "c:/dir", true}, + {"c:/other/x.json", "c:/dir", false}, + {"", testBeneathBaseID, false}, // no target + {testBeneathBaseID + "/x", "", false}, // no base + } + for _, tc := range tests { + t.Run(tc.target+"|"+tc.base, func(t *testing.T) { + assert.Equal(t, tc.want, isBeneathBase(tc.target, tc.base)) + }) + } +} + +// dubiousValidatorFromJSON builds a SpecValidator wired with an analyzer over an in-memory spec +// (SpecFilePath is empty, so absolute-local refs have no base and are treated as dubious). +func dubiousValidatorFromJSON(t *testing.T, doc string) *SpecValidator { + t.Helper() + d, err := loads.Analyzed(json.RawMessage(doc), "") + require.NoError(t, err) + s := NewSpecValidator(d.Schema(), strfmt.Default) + s.spec = d + s.analyzer = analysis.New(d.Spec()) + return s +} + +func warningMessages(res *Result) []string { + msgs := make([]string, 0, len(res.Warnings)) + for _, w := range res.Warnings { + msgs = append(msgs, w.Error()) + } + return msgs +} + +func TestValidateDubiousRefs_MultipleHosts(t *testing.T) { + doc := `{ + "swagger": "2.0", + "info": {"title": "t", "version": "1"}, + "paths": {}, + "definitions": { + "A": {"$ref": "http://host-one.example/a.json"}, + "B": {"$ref": "https://host-two.example/b.json"} + } + }` + res := dubiousValidatorFromJSON(t, doc).validateDubiousRefs() + msgs := warningMessages(res) + require.Len(t, msgs, 1) + assert.Contains(t, msgs[0], "distinct remote hosts") + assert.Contains(t, msgs[0], "host-one.example") + assert.Contains(t, msgs[0], "host-two.example") +} + +func TestValidateDubiousRefs_SingleHostNoWarning(t *testing.T) { + doc := `{ + "swagger": "2.0", + "info": {"title": "t", "version": "1"}, + "paths": {}, + "definitions": { + "A": {"$ref": "https://only-host.example/a.json"}, + "B": {"$ref": "https://only-host.example/b.json"} + } + }` + res := dubiousValidatorFromJSON(t, doc).validateDubiousRefs() + assert.Empty(t, warningMessages(res), "a single consistent remote host must not warn") +} + +func TestValidateDubiousRefs_AbsoluteLocalNoBase(t *testing.T) { + doc := `{ + "swagger": "2.0", + "info": {"title": "t", "version": "1"}, + "paths": {}, + "definitions": { + "A": {"$ref": "file:///etc/passwd"} + } + }` + res := dubiousValidatorFromJSON(t, doc).validateDubiousRefs() + msgs := warningMessages(res) + require.Len(t, msgs, 1) + assert.Contains(t, msgs[0], "escapes the spec's base path") + assert.Contains(t, msgs[0], "/etc/passwd") +} + +func TestValidateDubiousRefs_FragmentAndRelativeNoWarning(t *testing.T) { + doc := `{ + "swagger": "2.0", + "info": {"title": "t", "version": "1"}, + "paths": {}, + "definitions": { + "A": {"$ref": "#/definitions/B"}, + "B": {"type": "object"}, + "C": {"$ref": "./models.json#/definitions/X"} + } + }` + res := dubiousValidatorFromJSON(t, doc).validateDubiousRefs() + assert.Empty(t, warningMessages(res), "fragment-only and relative refs must not warn") +} + +// TestValidateDubiousRefs_AbsoluteBeneathBase exercises Fred's critical nuance end-to-end: +// an absolute local ref that stays BENEATH the spec's base path is legitimate (flatten/expand +// introduces such anchors for cyclical $refs) and must NOT warn, whereas one that escapes does. +func TestValidateDubiousRefs_AbsoluteBeneathBase(t *testing.T) { + dir := t.TempDir() + specPath := filepath.Join(dir, "spec.json") + beneath := filepath.Join(dir, "models.json") // absolute, beneath base + escape := filepath.Join(filepath.Dir(dir), "outside.json") // absolute, escapes base + + toFileRef := func(p string) string { + return "file://" + filepath.ToSlash(p) + } + + doc := fmt.Sprintf(`{ + "swagger": "2.0", + "info": {"title": "t", "version": "1"}, + "paths": {}, + "definitions": { + "Beneath": {"$ref": %q}, + "Escape": {"$ref": %q} + } + }`, toFileRef(beneath), toFileRef(escape)) + + require.NoError(t, os.WriteFile(specPath, []byte(doc), 0o600)) + + d, err := loads.Spec(specPath) + require.NoError(t, err) + require.NotEmpty(t, d.SpecFilePath()) + + s := NewSpecValidator(d.Schema(), strfmt.Default) + s.spec = d + s.analyzer = analysis.New(d.Spec()) + + msgs := warningMessages(s.validateDubiousRefs()) + require.Len(t, msgs, 1, "only the escaping ref should warn, not the one beneath base") + assert.Contains(t, msgs[0], "outside.json") + for _, m := range msgs { + assert.NotContains(t, m, "models.json", "absolute ref beneath base must not warn") + } +} + +func TestLocalBaseDir(t *testing.T) { + t.Run("empty spec path yields no base", func(t *testing.T) { + d, err := loads.Analyzed(json.RawMessage(`{"swagger":"2.0","info":{"title":"t","version":"1"},"paths":{}}`), "") + require.NoError(t, err) + s := &SpecValidator{spec: d} + _, ok := s.localBaseDir() + assert.False(t, ok) + }) + + t.Run("local file path yields its directory", func(t *testing.T) { + dir := t.TempDir() + specPath := filepath.Join(dir, "spec.json") + require.NoError(t, os.WriteFile(specPath, []byte(`{"swagger":"2.0","info":{"title":"t","version":"1"},"paths":{}}`), 0o600)) + d, err := loads.Spec(specPath) + require.NoError(t, err) + s := &SpecValidator{spec: d} + base, ok := s.localBaseDir() + require.True(t, ok) + assert.True(t, strings.HasSuffix(base, cleanRefPath(dir)), "base %q should end with %q", base, cleanRefPath(dir)) + }) +}