diff --git a/spec.go b/spec.go index b85432f..21b5ec8 100644 --- a/spec.go +++ b/spec.go @@ -146,7 +146,8 @@ func (s *SpecValidator) Validate(data any) (*Result, *Result) { errs.Merge(s.validateNonEmptyPathParamNames()) // errs.Merge(s.validateRefNoSibling()) // warning only - errs.Merge(s.validateReferenced()) // warning only + errs.Merge(s.validateReferenced()) // warning only + errs.Merge(s.validateDubiousRefs()) // warning only return errs, warnings } diff --git a/spec_messages.go b/spec_messages.go index 42ce360..eeb8a86 100644 --- a/spec_messages.go +++ b/spec_messages.go @@ -177,6 +177,18 @@ const ( // UnusedResponseWarning ... UnusedResponseWarning = "response %q is not used anywhere" + // DubiousAbsoluteRefWarning flags a $ref pointing to an absolute local file location that escapes the + // spec's base path. Absolute local references are legitimate when they stay beneath the base path + // (flattening/expansion introduces such anchors for cyclical $refs), but an absolute reference that + // escapes the base path - or a file:// reference in a spec with no known base - may indicate an + // unsafe or adversarial spec. + DubiousAbsoluteRefWarning = "$ref %q points to an absolute or local file location that escapes the spec's base path: this may be unsafe with adversarial specs" + + // DubiousMultipleHostsWarning flags a spec whose remote $refs resolve to several distinct hosts. + // A single consistent remote host is common and legitimate; references spread across multiple hosts + // may indicate an unsafe or adversarial spec. + DubiousMultipleHostsWarning = "$ref values point to %d distinct remote hosts (%s): a spec referencing multiple hosts may be unsafe" + InvalidObject = "expected an object in %q.%s" ) @@ -404,3 +416,11 @@ func someParametersBrokenMsg(path, method, operationID string) errors.Error { func refShouldNotHaveSiblingsMsg(path, operationID string) errors.Error { return errors.New(errors.CompositeErrorCode, RefShouldNotHaveSiblingsWarning, operationID, path) } + +func dubiousAbsoluteRefMsg(ref string) errors.Error { + return errors.New(errors.CompositeErrorCode, DubiousAbsoluteRefWarning, ref) +} + +func dubiousMultipleHostsMsg(count int, hosts string) errors.Error { + return errors.New(errors.CompositeErrorCode, DubiousMultipleHostsWarning, count, hosts) +} diff --git a/spec_ref_warnings.go b/spec_ref_warnings.go new file mode 100644 index 0000000..49c7231 --- /dev/null +++ b/spec_ref_warnings.go @@ -0,0 +1,209 @@ +// SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers +// SPDX-License-Identifier: Apache-2.0 + +package validate + +import ( + "net/url" + "path" + "sort" + "strings" + + "github.com/go-openapi/spec" +) + +// minDistinctHostsToWarn is the number of distinct remote hosts among $refs at or above which +// Rule 2 emits a host-spread warning. A single consistent remote host is legitimate. +const minDistinctHostsToWarn = 2 + +// validateDubiousRefs emits warnings (never errors) when $ref locations match patterns +// that may indicate an unsafe or adversarial spec. It inspects refs as authored, on the +// UNEXPANDED spec, so it must run before expansion flattens them away. +// +// Two rules are applied over s.analyzer.AllRefs(): +// +// - Rule 1 (absolute local escape): a $ref pointing to an absolute local file location +// (file:// scheme, a Unix absolute path, or a Windows drive path such as C:\) is dubious +// UNLESS it stays beneath the spec's base path. Absolute refs beneath the base are +// legitimate: flattening/expansion in go-openapi/spec and analysis introduces absolute +// anchors to resolve cyclical $refs. Relative and fragment-only refs are always exempt. +// +// - Rule 2 (host spread): when remote (http/https, or protocol-relative) refs resolve to +// two or more distinct hosts, a single aggregate warning lists them. A single consistent +// remote host is common and legitimate, so it is not flagged. +// +// All findings are warnings: they do not affect validity (see Result.IsValid). +func (s *SpecValidator) validateDubiousRefs() *Result { + res := pools.poolOfResults.BorrowResult() + + baseDir, hasBase := s.localBaseDir() + + remoteHosts := make(map[string]struct{}) + for _, r := range s.analyzer.AllRefs() { + u := r.GetURL() + if u == nil { // Safeguard: a valid spec always yields parseable refs + continue + } + + // Rule 1: absolute local reference escaping the base path. + if refPath, isLocalAbs := absoluteLocalRefPath(r, u); isLocalAbs { + if !hasBase || !isBeneathBase(refPath, baseDir) { + res.AddWarnings(dubiousAbsoluteRefMsg(r.String())) + } + continue + } + + // Rule 2: gather remote hosts (http/https and protocol-relative //host/...). + if host := remoteRefHost(u); host != "" { + remoteHosts[host] = struct{}{} + } + } + + if len(remoteHosts) >= minDistinctHostsToWarn { + hosts := make([]string, 0, len(remoteHosts)) + for h := range remoteHosts { + hosts = append(hosts, h) + } + sort.Strings(hosts) + res.AddWarnings(dubiousMultipleHostsMsg(len(hosts), strings.Join(hosts, ", "))) + } + + return res +} + +// absoluteLocalRefPath reports whether r is an absolute LOCAL file reference and, if so, +// returns the cleaned path it points to (without scheme/fragment, drive letter lower-cased). +// +// Classification order matters (see the empirical jsonreference flag behavior): +// - file:// scheme is local, including UNC file://host/share (inherently dubious). +// - a non-empty Host with no file scheme means remote (http/https or protocol-relative +// //host/path) - NOT local; handled by Rule 2. This must be checked before the Unix +// branch, because protocol-relative refs also set HasFullFilePath. +// - len(u.Scheme) == 1 is a Windows drive path (C:\ or C:/), whose drive+path land in +// Scheme/Opaque/Path rather than Path. Checked before the Unix branch because C:/x also +// sets HasFullFilePath, and reconstructed from the authored ref string to keep the drive. +// - !r.HasFullURL && r.HasFullFilePath is a plain Unix absolute path (/abs/models.json). +// +// Relative (./x.json) and fragment-only (#/definitions/X) refs return false. +func absoluteLocalRefPath(r spec.Ref, u *url.URL) (string, bool) { + switch { + case r.HasFileScheme: + return fileRefPath(u), true + case u.Host != "": + // Remote (http/https) or protocol-relative //host/path: handled by Rule 2. + return "", false + case len(u.Scheme) == 1: + // Windows drive letter: reconstruct from the authored ref string. + return cleanRefPath(r.String()), true + case !r.HasFullURL && r.HasFullFilePath: + return cleanRefPath(u.Path), true + default: + return "", false + } +} + +// remoteRefHost returns the host of a remote reference (http/https), or of a protocol-relative +// reference (//host/path). It returns "" for local and fragment-only refs. file:// hosts (UNC) +// are deliberately excluded: those are handled as local-absolute refs by Rule 1. +func remoteRefHost(u *url.URL) string { + switch u.Scheme { + case "http", "https": + return u.Host + case "": + // Protocol-relative //host/path: empty scheme but a host is present. + return u.Host + default: + return "" + } +} + +// localBaseDir returns the directory of the spec file, slash-normalized, when the spec was +// loaded from a local path. It returns ok=false when the base is unknown (in-memory spec) or +// remote (http/https), in which case absolute-local refs cannot be proven beneath a base and +// are treated as dubious. +func (s *SpecValidator) localBaseDir() (string, bool) { + specPath := s.spec.SpecFilePath() + if specPath == "" { + return "", false + } + + // Strip a file:// scheme if present; reject remote bases. + if u, err := url.Parse(specPath); err == nil && u.Scheme != "" { + switch { + case u.Scheme == "file": + specPath = u.Path + case len(u.Scheme) == 1: // Windows drive letter, treat as local + // keep specPath as-is (authored path) + default: // http, https, ... : no local base + return "", false + } + } + + return path.Dir(cleanRefPath(specPath)), true +} + +// isBeneathBase reports whether the cleaned target path is located within baseDir, i.e. it does +// not escape baseDir via "..". Comparison is purely lexical on cleaned paths, which is sufficient +// (and cross-platform safe) for a non-fatal warning. Both sides are expected to already be +// cleanRefPath-normalized (slashes, drive-letter case). +func isBeneathBase(target, baseDir string) bool { + if baseDir == "" { + return false + } + if target == baseDir { + return true + } + if !strings.HasSuffix(baseDir, "/") { + baseDir += "/" + } + return strings.HasPrefix(target, baseDir) +} + +// fileRefPath extracts the local path a file:// reference points to, accounting for the way +// Windows file URLs parse: +// - file:///abs/x -> /abs/x (empty host) +// - file:///C:/dir/x -> /c:/dir/x (empty host; drive sits in the path) +// - file://D:/a/x -> d:/a/x (drive letter lands in Host, rejoin it) +// - file://host/share -> /host/share/x (real UNC host kept visible so it cannot match a +// local base and stays flagged as dubious) +func fileRefPath(u *url.URL) string { + switch { + case u.Host == "": + return cleanRefPath(u.Path) + case isDriveHost(u.Host): + // Windows path authored as file://D:/... : the drive landed in Host (e.g. "d:"). + return cleanRefPath(u.Host + u.Path) + default: + // Real remote/UNC host: keep it in the path so it never matches a local base. + return cleanRefPath("//" + u.Host + u.Path) + } +} + +// isDriveHost reports whether a URL host is actually a Windows drive letter (e.g. "d:"), which +// happens when a Windows path is authored as a two-slash file URL: file://D:/path. +func isDriveHost(host string) bool { + h := strings.TrimSuffix(host, ":") + if len(h) != 1 { + return false + } + c := h[0] + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') +} + +// cleanRefPath normalizes a ref or base path for lexical comparison: backslashes to forward +// slashes, path.Clean, and a lower-cased leading Windows drive letter (matching the behavior of +// go-openapi/spec's normalizer). Plain Unix paths are unaffected, preserving case-sensitivity. +func cleanRefPath(p string) string { + p = path.Clean(strings.ReplaceAll(p, `\`, `/`)) + switch { + case len(p) >= 2 && p[1] == ':': + // drive-letter form: C:/dir -> c:/dir + p = strings.ToLower(p[:1]) + p[1:] + case len(p) >= 3 && p[0] == '/' && p[2] == ':': + // slash-prefixed drive form from canonical file:// URLs: /C:/dir -> c:/dir. + // The leading slash is dropped so this matches the base path derived from + // SpecFilePath (which has no leading slash), and the bare-drive form. + p = strings.ToLower(p[1:2]) + p[2:] + } + return p +} diff --git a/spec_ref_warnings_test.go b/spec_ref_warnings_test.go new file mode 100644 index 0000000..f013c49 --- /dev/null +++ b/spec_ref_warnings_test.go @@ -0,0 +1,286 @@ +// SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers +// SPDX-License-Identifier: Apache-2.0 + +package validate + +import ( + "encoding/json" + "fmt" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/go-openapi/analysis" + "github.com/go-openapi/loads" + "github.com/go-openapi/spec" + "github.com/go-openapi/strfmt" + "github.com/go-openapi/testify/v2/assert" + "github.com/go-openapi/testify/v2/require" +) + +const ( + testUnixAbsRef = "/abs/models.json" + testWinDrivePath = "c:/dir/x.json" + testBeneathBaseID = "/base" +) + +func TestAbsoluteLocalRefPath(t *testing.T) { + tests := []struct { + ref string + wantPath string + wantOK bool + }{ + {"#/definitions/Pet", "", false}, // fragment-only + {"./models.json#/definitions/Model", "", false}, // relative + {"models.json", "", false}, // relative, no leading dot + {testUnixAbsRef, testUnixAbsRef, true}, // unix absolute + {"file://" + testUnixAbsRef, testUnixAbsRef, true}, + {"file:///D:/a/x.json", "d:/a/x.json", true}, // windows drive, canonical 3-slash file URL + {"file://D:/a/x.json", "d:/a/x.json", true}, // windows drive lands in URL host (invalid hybrid) + {"file://host/share/x.json", "/host/share/x.json", true}, // UNC: host kept, local-dubious + {`C:\dir\x.json`, testWinDrivePath, true}, // windows drive, backslashes + {"C:/dir/x.json", testWinDrivePath, true}, // windows drive, forward slashes + {"http://example.com/a.json#/x", "", false}, // remote + {"https://other.com/b.json", "", false}, // remote + {"//proto.relative/c.json", "", false}, // protocol-relative => remote, not local + } + for _, tc := range tests { + t.Run(tc.ref, func(t *testing.T) { + r := spec.MustCreateRef(tc.ref) + got, ok := absoluteLocalRefPath(r, r.GetURL()) + assert.Equal(t, tc.wantOK, ok) + assert.Equal(t, tc.wantPath, got) + }) + } +} + +// TestWindowsDriveRefFormsConverge guards the Windows nuance that cannot be exercised on a +// non-Windows runner: the valid bare-drive (C:\, C:/) and canonical file:///C:/ forms, plus the +// invalid-but-tolerated hybrid file://C:/ form (drive lands in the URL host), must all normalize +// to the SAME drive path - and that path must compare correctly against a base derived from +// SpecFilePath (which carries no leading slash). Regression guard for the leading-slash mismatch +// that previously made a legit canonical ref beneath the base spuriously warn. +func TestWindowsDriveRefFormsConverge(t *testing.T) { + const base = "c:/specdir" + const want = "c:/specdir/sub/models.json" + for _, ref := range []string{ + "file:///C:/specdir/sub/models.json", // canonical 3-slash file URL (valid) + "file://C:/specdir/sub/models.json", // hybrid 2-slash, drive in host (invalid, tolerated) + "C:/specdir/sub/models.json", // bare drive, forward slashes (valid) + `C:\specdir\sub\models.json`, // bare drive, backslashes (valid) + } { + t.Run(ref, func(t *testing.T) { + r := spec.MustCreateRef(ref) + got, ok := absoluteLocalRefPath(r, r.GetURL()) + require.True(t, ok) + assert.Equal(t, want, got, "all drive forms must normalize identically") + assert.True(t, isBeneathBase(got, base), "%q should be beneath %q", got, base) + }) + } +} + +func TestRemoteRefHost(t *testing.T) { + tests := []struct { + ref string + wantHost string + }{ + {"http://example.com/a.json", "example.com"}, + {"https://other.com/b.json", "other.com"}, + {"//proto.relative/c.json", "proto.relative"}, + {"/abs/models.json", ""}, + {"#/definitions/Pet", ""}, + {"file:///abs/models.json", ""}, // file scheme is local, not a remote host + } + for _, tc := range tests { + t.Run(tc.ref, func(t *testing.T) { + r := spec.MustCreateRef(tc.ref) + assert.Equal(t, tc.wantHost, remoteRefHost(r.GetURL())) + }) + } +} + +func TestCleanRefPath(t *testing.T) { + tests := []struct{ in, want string }{ + {"/abs/x.json", "/abs/x.json"}, + {`C:\dir\x.json`, testWinDrivePath}, + {testWinDrivePath, testWinDrivePath}, + {"/C:/dir/x.json", "c:/dir/x.json"}, // canonical file:// URL drive: leading slash dropped + {"/abs/../y.json", "/y.json"}, + {"/Case/Sensitive", "/Case/Sensitive"}, // unix case preserved + } + for _, tc := range tests { + t.Run(tc.in, func(t *testing.T) { + assert.Equal(t, tc.want, cleanRefPath(tc.in)) + }) + } +} + +func TestIsBeneathBase(t *testing.T) { + tests := []struct { + target, base string + want bool + }{ + {testBeneathBaseID + "/sub/x.json", testBeneathBaseID, true}, + {testBeneathBaseID, testBeneathBaseID, true}, + {testBeneathBaseID + "/x.json", testBeneathBaseID, true}, + {"/other/x.json", testBeneathBaseID, false}, + {"/baseball/x.json", testBeneathBaseID, false}, // prefix but not a path boundary + {"/x.json", testBeneathBaseID, false}, + {testWinDrivePath, "c:/dir", true}, + {"c:/other/x.json", "c:/dir", false}, + {"", testBeneathBaseID, false}, // no target + {testBeneathBaseID + "/x", "", false}, // no base + } + for _, tc := range tests { + t.Run(tc.target+"|"+tc.base, func(t *testing.T) { + assert.Equal(t, tc.want, isBeneathBase(tc.target, tc.base)) + }) + } +} + +// dubiousValidatorFromJSON builds a SpecValidator wired with an analyzer over an in-memory spec +// (SpecFilePath is empty, so absolute-local refs have no base and are treated as dubious). +func dubiousValidatorFromJSON(t *testing.T, doc string) *SpecValidator { + t.Helper() + d, err := loads.Analyzed(json.RawMessage(doc), "") + require.NoError(t, err) + s := NewSpecValidator(d.Schema(), strfmt.Default) + s.spec = d + s.analyzer = analysis.New(d.Spec()) + return s +} + +func warningMessages(res *Result) []string { + msgs := make([]string, 0, len(res.Warnings)) + for _, w := range res.Warnings { + msgs = append(msgs, w.Error()) + } + return msgs +} + +func TestValidateDubiousRefs_MultipleHosts(t *testing.T) { + doc := `{ + "swagger": "2.0", + "info": {"title": "t", "version": "1"}, + "paths": {}, + "definitions": { + "A": {"$ref": "http://host-one.example/a.json"}, + "B": {"$ref": "https://host-two.example/b.json"} + } + }` + res := dubiousValidatorFromJSON(t, doc).validateDubiousRefs() + msgs := warningMessages(res) + require.Len(t, msgs, 1) + assert.Contains(t, msgs[0], "distinct remote hosts") + assert.Contains(t, msgs[0], "host-one.example") + assert.Contains(t, msgs[0], "host-two.example") +} + +func TestValidateDubiousRefs_SingleHostNoWarning(t *testing.T) { + doc := `{ + "swagger": "2.0", + "info": {"title": "t", "version": "1"}, + "paths": {}, + "definitions": { + "A": {"$ref": "https://only-host.example/a.json"}, + "B": {"$ref": "https://only-host.example/b.json"} + } + }` + res := dubiousValidatorFromJSON(t, doc).validateDubiousRefs() + assert.Empty(t, warningMessages(res), "a single consistent remote host must not warn") +} + +func TestValidateDubiousRefs_AbsoluteLocalNoBase(t *testing.T) { + doc := `{ + "swagger": "2.0", + "info": {"title": "t", "version": "1"}, + "paths": {}, + "definitions": { + "A": {"$ref": "file:///etc/passwd"} + } + }` + res := dubiousValidatorFromJSON(t, doc).validateDubiousRefs() + msgs := warningMessages(res) + require.Len(t, msgs, 1) + assert.Contains(t, msgs[0], "escapes the spec's base path") + assert.Contains(t, msgs[0], "/etc/passwd") +} + +func TestValidateDubiousRefs_FragmentAndRelativeNoWarning(t *testing.T) { + doc := `{ + "swagger": "2.0", + "info": {"title": "t", "version": "1"}, + "paths": {}, + "definitions": { + "A": {"$ref": "#/definitions/B"}, + "B": {"type": "object"}, + "C": {"$ref": "./models.json#/definitions/X"} + } + }` + res := dubiousValidatorFromJSON(t, doc).validateDubiousRefs() + assert.Empty(t, warningMessages(res), "fragment-only and relative refs must not warn") +} + +// TestValidateDubiousRefs_AbsoluteBeneathBase exercises Fred's critical nuance end-to-end: +// an absolute local ref that stays BENEATH the spec's base path is legitimate (flatten/expand +// introduces such anchors for cyclical $refs) and must NOT warn, whereas one that escapes does. +func TestValidateDubiousRefs_AbsoluteBeneathBase(t *testing.T) { + dir := t.TempDir() + specPath := filepath.Join(dir, "spec.json") + beneath := filepath.Join(dir, "models.json") // absolute, beneath base + escape := filepath.Join(filepath.Dir(dir), "outside.json") // absolute, escapes base + + toFileRef := func(p string) string { + return "file://" + filepath.ToSlash(p) + } + + doc := fmt.Sprintf(`{ + "swagger": "2.0", + "info": {"title": "t", "version": "1"}, + "paths": {}, + "definitions": { + "Beneath": {"$ref": %q}, + "Escape": {"$ref": %q} + } + }`, toFileRef(beneath), toFileRef(escape)) + + require.NoError(t, os.WriteFile(specPath, []byte(doc), 0o600)) + + d, err := loads.Spec(specPath) + require.NoError(t, err) + require.NotEmpty(t, d.SpecFilePath()) + + s := NewSpecValidator(d.Schema(), strfmt.Default) + s.spec = d + s.analyzer = analysis.New(d.Spec()) + + msgs := warningMessages(s.validateDubiousRefs()) + require.Len(t, msgs, 1, "only the escaping ref should warn, not the one beneath base") + assert.Contains(t, msgs[0], "outside.json") + for _, m := range msgs { + assert.NotContains(t, m, "models.json", "absolute ref beneath base must not warn") + } +} + +func TestLocalBaseDir(t *testing.T) { + t.Run("empty spec path yields no base", func(t *testing.T) { + d, err := loads.Analyzed(json.RawMessage(`{"swagger":"2.0","info":{"title":"t","version":"1"},"paths":{}}`), "") + require.NoError(t, err) + s := &SpecValidator{spec: d} + _, ok := s.localBaseDir() + assert.False(t, ok) + }) + + t.Run("local file path yields its directory", func(t *testing.T) { + dir := t.TempDir() + specPath := filepath.Join(dir, "spec.json") + require.NoError(t, os.WriteFile(specPath, []byte(`{"swagger":"2.0","info":{"title":"t","version":"1"},"paths":{}}`), 0o600)) + d, err := loads.Spec(specPath) + require.NoError(t, err) + s := &SpecValidator{spec: d} + base, ok := s.localBaseDir() + require.True(t, ok) + assert.True(t, strings.HasSuffix(base, cleanRefPath(dir)), "base %q should end with %q", base, cleanRefPath(dir)) + }) +}