From ea20c0888890c8f875e04612a049f7ba8cc33a7f Mon Sep 17 00:00:00 2001 From: Frederic BIDON Date: Tue, 21 Apr 2026 13:04:08 +0200 Subject: [PATCH 01/46] feat(grammar): P0.1 skeleton for new annotation parser Place empty package files under internal/parsers/grammar/ as the landing zone for P1 work: preprocess.go, lexer.go, parser.go, ast.go, diagnostic.go, style.go. Each file carries a TODO pointer to the P1 task. No behavior; go build ./... remains clean. See .claude/plans/grammar-parser-architecture.md and .claude/plans/grammar-parser-tasks.md. Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Frederic BIDON --- internal/parsers/grammar/ast.go | 7 +++++++ internal/parsers/grammar/diagnostic.go | 5 +++++ internal/parsers/grammar/lexer.go | 5 +++++ internal/parsers/grammar/parser.go | 11 +++++++++++ internal/parsers/grammar/preprocess.go | 4 ++++ internal/parsers/grammar/style.go | 5 +++++ 6 files changed, 37 insertions(+) create mode 100644 internal/parsers/grammar/ast.go create mode 100644 internal/parsers/grammar/diagnostic.go create mode 100644 internal/parsers/grammar/lexer.go create mode 100644 internal/parsers/grammar/parser.go create mode 100644 internal/parsers/grammar/preprocess.go create mode 100644 internal/parsers/grammar/style.go diff --git a/internal/parsers/grammar/ast.go b/internal/parsers/grammar/ast.go new file mode 100644 index 0000000..2039893 --- /dev/null +++ b/internal/parsers/grammar/ast.go @@ -0,0 +1,7 @@ +package grammar + +// TODO: P1 — Block family. +// Interface: Pos, Title, Description, Diagnostics. +// Typed kinds: ModelBlock, RouteBlock, OperationBlock, ParametersBlock, +// ResponseBlock, MetaBlock, UnboundBlock. +// Iterators: Properties(), YAMLBlocks(), Extensions() as iter.Seq. diff --git a/internal/parsers/grammar/diagnostic.go b/internal/parsers/grammar/diagnostic.go new file mode 100644 index 0000000..0119a5e --- /dev/null +++ b/internal/parsers/grammar/diagnostic.go @@ -0,0 +1,5 @@ +package grammar + +// TODO: P1 — Diagnostic{Pos, Severity, Code, Message}, Severity enum +// (Error/Warning/Hint), code convention (parse.invalid-number, +// parse.unknown-keyword, parse.context-invalid, parse.invalid-extension-name, …). diff --git a/internal/parsers/grammar/lexer.go b/internal/parsers/grammar/lexer.go new file mode 100644 index 0000000..6d9db81 --- /dev/null +++ b/internal/parsers/grammar/lexer.go @@ -0,0 +1,5 @@ +package grammar + +// TODO: P1 — tokenizer: []Line → token stream. +// Token kinds: ANNOTATION, KEYWORD_VALUE, KEYWORD_BLOCK_HEAD, YAML_FENCE, TEXT, BLANK, EOF. +// Keyword lookup via keywords.go (case-insensitive, alias-aware); items.N.X expanded. diff --git a/internal/parsers/grammar/parser.go b/internal/parsers/grammar/parser.go new file mode 100644 index 0000000..dcf86d9 --- /dev/null +++ b/internal/parsers/grammar/parser.go @@ -0,0 +1,11 @@ +// Package grammar implements the v2 annotation parser. +// +// It replaces the regexp-based parser at internal/parsers/*.go with a +// hand-rolled recursive-descent parser producing a typed Block family +// (see ast.go). +// +// See .claude/plans/grammar-parser-architecture.md for the "why" and +// .claude/plans/grammar-parser-tasks.md for the "how". +package grammar + +// TODO: P1 — recursive-descent envelope parser + Parser interface. diff --git a/internal/parsers/grammar/preprocess.go b/internal/parsers/grammar/preprocess.go new file mode 100644 index 0000000..d1942eb --- /dev/null +++ b/internal/parsers/grammar/preprocess.go @@ -0,0 +1,4 @@ +package grammar + +// TODO: P1 — comment-prefix stripping on top of (*ast.CommentGroup).Text(); +// emits []Line{Text, Pos} with position tracking and markdown table-pipe handling. diff --git a/internal/parsers/grammar/style.go b/internal/parsers/grammar/style.go new file mode 100644 index 0000000..eac7233 --- /dev/null +++ b/internal/parsers/grammar/style.go @@ -0,0 +1,5 @@ +package grammar + +// TODO: P1 — StyleRecognizer plugin interface for annotation-style prefixes +// (swagger:, openapi:, @). v2.0 ships with swagger: only; interface exists +// to make C9 (pluggable styles, v2.x) a data addition rather than a rewrite. From 22e3c90048fcaa3001841be9448b929f4375c8ad Mon Sep 17 00:00:00 2001 From: Frederic BIDON Date: Tue, 21 Apr 2026 13:46:32 +0200 Subject: [PATCH 02/46] feat(grammar): P0.2 keyword table at v1 parity MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add keywords.go (types + functional-options constructor + Lookup/Keywords accessors) and keywords_table.go (the authored []Keyword data: 35 entries covering validations, property flags, meta single-line, block headers, plus W5 externalDocs). Design choices, per architecture §3.4 / §2.2.1 and tasks P0.2: - Kind enum lists the 8 sub-contexts where keywords may appear (Param, Header, Schema, Items, Route, Operation, Meta, Response). - ValueType covers the primitive-typed values the parser will convert in-line (Number/Integer/Boolean/StringEnum) plus the deferred categories (String verbatim, CommaList, RawBlock, RawValue). - Option A for docs: each keyword carries per-context doc strings (inParam("…"), inSchema("…"), …) so LSP can show tooltips that match where the cursor sits. - W5 opportunistic: externalDocs entry landed alongside v1 keywords. - W7 opportunistic: per-keyword legal-contexts list is exactly the seed data LSP completion will consult; seeded from observed v1 behavior (regexprs.go + tagger trees). Drop inResponse() and doc() options — no v1 keyword uses them; we re-add if W6/W2 surface the need. Add keywords_test.go covering Lookup (canonical, alias, case/space normalization, unknown) and a shape invariant (every keyword has a name, ≥1 context, and StringEnum implies Values). Also add missing SPDX headers to the P0.1 placeholder files. Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Frederic BIDON --- internal/parsers/grammar/ast.go | 3 + internal/parsers/grammar/diagnostic.go | 3 + internal/parsers/grammar/keywords.go | 158 +++++++++++++ internal/parsers/grammar/keywords_table.go | 255 +++++++++++++++++++++ internal/parsers/grammar/keywords_test.go | 74 ++++++ internal/parsers/grammar/lexer.go | 3 + internal/parsers/grammar/parser.go | 3 + internal/parsers/grammar/preprocess.go | 3 + internal/parsers/grammar/style.go | 3 + 9 files changed, 505 insertions(+) create mode 100644 internal/parsers/grammar/keywords.go create mode 100644 internal/parsers/grammar/keywords_table.go create mode 100644 internal/parsers/grammar/keywords_test.go diff --git a/internal/parsers/grammar/ast.go b/internal/parsers/grammar/ast.go index 2039893..544c292 100644 --- a/internal/parsers/grammar/ast.go +++ b/internal/parsers/grammar/ast.go @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers +// SPDX-License-Identifier: Apache-2.0 + package grammar // TODO: P1 — Block family. diff --git a/internal/parsers/grammar/diagnostic.go b/internal/parsers/grammar/diagnostic.go index 0119a5e..91f5f27 100644 --- a/internal/parsers/grammar/diagnostic.go +++ b/internal/parsers/grammar/diagnostic.go @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers +// SPDX-License-Identifier: Apache-2.0 + package grammar // TODO: P1 — Diagnostic{Pos, Severity, Code, Message}, Severity enum diff --git a/internal/parsers/grammar/keywords.go b/internal/parsers/grammar/keywords.go new file mode 100644 index 0000000..bba7baa --- /dev/null +++ b/internal/parsers/grammar/keywords.go @@ -0,0 +1,158 @@ +// SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers +// SPDX-License-Identifier: Apache-2.0 + +package grammar + +import "strings" + +// Lookup returns the Keyword matching the given name (or alias), +// case-insensitively. The second return value reports whether a match +// was found. +// +// The lexer (P1.2) uses this to classify `keyword:` lines. It operates +// over the table in keywords_table.go. +func Lookup(name string) (Keyword, bool) { + needle := strings.ToLower(strings.TrimSpace(name)) + if needle == "" { + return Keyword{}, false + } + for _, kw := range keywords { + if strings.EqualFold(kw.Name, needle) { + return kw, true + } + for _, alias := range kw.Aliases { + if strings.EqualFold(alias, needle) { + return kw, true + } + } + } + return Keyword{}, false +} + +// Keywords returns the authoritative keyword table. +// +// The docs generator (gen/main.go) consumes it to emit +// docs/annotation-keywords.md; tooling that introspects the keyword set +// (LSP, `codescan grammar check`) reads the same slice. +func Keywords() []Keyword { + out := make([]Keyword, len(keywords)) + copy(out, keywords) + return out +} + +// Kind identifies a context where `keyword: value` pairs may appear. +// +// These are the sub-scopes within an annotation block (finer-grained than +// the top-level swagger:xxx kind, which lives on the Block family in +// ast.go). The parser is context-free: it recognizes every keyword +// regardless of enclosing annotation, and uses this per-keyword legality +// set to emit non-fatal `parse.context-invalid` diagnostics. +// +// LSP completion consults the same data to filter suggestions. +type Kind int + +const ( + KindUnknown Kind = iota + + KindParam // an individual parameter (field under swagger:parameters) + KindHeader // a response header entry + KindSchema // a schema property (model field or definition) + KindItems // a nested array-items subscope + KindRoute // a metadata line under swagger:route + KindOperation // a metadata line under swagger:operation (non-YAML body) + KindMeta // a metadata line under swagger:meta + KindResponse // a response-level property (status, description, …) +) + +// ValueType categorizes the expected shape of a keyword's value. Primitive +// types (Number, Integer, Boolean, StringEnum) are converted inside the +// parser at parse time; RawValue defers type-conversion to the analyzer +// when the target Go type determines it (e.g., `default:`, `example:`). +type ValueType int + +const ( + ValueNone ValueType = iota + + ValueNumber // decimal; e.g. maximum, minimum, multipleOf + ValueInteger // unsigned count; e.g. maxLength, minItems + ValueBoolean // true/false; e.g. required, readOnly + ValueString // opaque string; e.g. pattern (verbatim, not interpreted) + ValueCommaList // comma-separated values; e.g. enum, schemes + ValueStringEnum // one of a fixed set; e.g. in, collectionFormat + ValueRawBlock // multi-line block body (headers like consumes:, security:) + ValueRawValue // raw string; analyzer type-converts per field Go type +) + +// Keyword describes one recognizable `keyword: value` form. +type Keyword struct { + Name string + Aliases []string + Value Value + Contexts []ContextDoc + Doc string +} + +// Value captures the expected value shape plus any fixed enumeration. +type Value struct { + Type ValueType + Values []string // non-nil when Type == ValueStringEnum +} + +// ContextDoc binds a legal context to a context-specific doc string. +// Separate docs per Kind let LSP show tooltips tailored to where the +// cursor is (a `maximum` in a parameter vs. a `maximum` in a schema). +type ContextDoc struct { + Kind Kind + Doc string +} + +// keyword constructs a Keyword from functional options. Used by the +// table in keywords_table.go. +func keyword(name string, opts ...keywordOpt) Keyword { + kw := Keyword{Name: name} + for _, o := range opts { + o(&kw) + } + return kw +} + +type keywordOpt func(*Keyword) + +// --- value-type options --- + +func asNumber() keywordOpt { return func(kw *Keyword) { kw.Value.Type = ValueNumber } } +func asInteger() keywordOpt { return func(kw *Keyword) { kw.Value.Type = ValueInteger } } +func asBoolean() keywordOpt { return func(kw *Keyword) { kw.Value.Type = ValueBoolean } } +func asString() keywordOpt { return func(kw *Keyword) { kw.Value.Type = ValueString } } +func asCommaList() keywordOpt { return func(kw *Keyword) { kw.Value.Type = ValueCommaList } } +func asRawBlock() keywordOpt { return func(kw *Keyword) { kw.Value.Type = ValueRawBlock } } +func asRawValue() keywordOpt { return func(kw *Keyword) { kw.Value.Type = ValueRawValue } } + +func asStringEnum(values ...string) keywordOpt { + return func(kw *Keyword) { + kw.Value.Type = ValueStringEnum + kw.Value.Values = values + } +} + +// --- alias option --- + +func aka(names ...string) keywordOpt { + return func(kw *Keyword) { kw.Aliases = append(kw.Aliases, names...) } +} + +// --- per-context legality + doc (Option A) --- + +func inParam(doc string) keywordOpt { return legalIn(KindParam, doc) } +func inHeader(doc string) keywordOpt { return legalIn(KindHeader, doc) } +func inSchema(doc string) keywordOpt { return legalIn(KindSchema, doc) } +func inItems(doc string) keywordOpt { return legalIn(KindItems, doc) } +func inRoute(doc string) keywordOpt { return legalIn(KindRoute, doc) } +func inOperation(doc string) keywordOpt { return legalIn(KindOperation, doc) } +func inMeta(doc string) keywordOpt { return legalIn(KindMeta, doc) } + +func legalIn(kind Kind, doc string) keywordOpt { + return func(kw *Keyword) { + kw.Contexts = append(kw.Contexts, ContextDoc{Kind: kind, Doc: doc}) + } +} diff --git a/internal/parsers/grammar/keywords_table.go b/internal/parsers/grammar/keywords_table.go new file mode 100644 index 0000000..ab72cf0 --- /dev/null +++ b/internal/parsers/grammar/keywords_table.go @@ -0,0 +1,255 @@ +// SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers +// SPDX-License-Identifier: Apache-2.0 + +package grammar + +// keywords is the authoritative table of `keyword: value` forms the +// parser recognizes at v1 parity. +// +// OAI 3.x additions and any workshop outcomes (W2 enum, W3 example) +// land as new entries here — no parser change required. +// +// The docs generator (gen/main.go) consumes this slice to emit +// docs/annotation-keywords.md; see P0.4. +// +// Authored canonical table, not mutable state; lives at package level so +// it compiles into the parser binary without per-call allocation. Exposed +// via Lookup / Keywords accessors. +// +//nolint:gochecknoglobals // see godoc above +var keywords = []Keyword{ + // --- numeric validations --- + + keyword("maximum", + aka("max"), + asNumber(), + inParam("Maximum value of the parameter (inclusive by default)."), + inHeader("Maximum value of the header (inclusive by default)."), + inSchema("Maximum value of the property (inclusive by default)."), + inItems("Maximum value of each array item (inclusive by default)."), + ), + keyword("minimum", + aka("min"), + asNumber(), + inParam("Minimum value of the parameter (inclusive by default)."), + inHeader("Minimum value of the header (inclusive by default)."), + inSchema("Minimum value of the property (inclusive by default)."), + inItems("Minimum value of each array item (inclusive by default)."), + ), + keyword("multipleOf", + aka("multiple of", "multiple-of"), + asNumber(), + inParam("Parameter value must be a multiple of this number."), + inHeader("Header value must be a multiple of this number."), + inSchema("Property value must be a multiple of this number."), + inItems("Each array item must be a multiple of this number."), + ), + + // --- string-length validations --- + + keyword("maxLength", + aka("max length", "max-length", "maxLen", "max len", "max-len"), + asInteger(), + inParam("Maximum length of the string parameter."), + inHeader("Maximum length of the header."), + inSchema("Maximum length of the string property."), + inItems("Maximum length of each string item."), + ), + keyword("minLength", + aka("min length", "min-length", "minLen", "min len", "min-len"), + asInteger(), + inParam("Minimum length of the string parameter."), + inHeader("Minimum length of the header."), + inSchema("Minimum length of the string property."), + inItems("Minimum length of each string item."), + ), + keyword("pattern", + asString(), + inParam("Regular expression the parameter must match."), + inHeader("Regular expression the header must match."), + inSchema("Regular expression the property must match."), + inItems("Regular expression each array item must match."), + ), + + // --- array validations --- + + keyword("maxItems", + aka("max items", "max-items", "max.items"), + asInteger(), + inParam("Maximum number of items in the parameter array."), + inHeader("Maximum number of items in the header array."), + inSchema("Maximum number of items in the array property."), + inItems("Maximum number of items at this nesting level."), + ), + keyword("minItems", + aka("min items", "min-items", "min.items"), + asInteger(), + inParam("Minimum number of items in the parameter array."), + inHeader("Minimum number of items in the header array."), + inSchema("Minimum number of items in the array property."), + inItems("Minimum number of items at this nesting level."), + ), + keyword("unique", + asBoolean(), + inParam("Whether items in the parameter array must be unique."), + inHeader("Whether items in the header array must be unique."), + inSchema("Whether items in the array property must be unique."), + inItems("Whether items at this level must be unique."), + ), + keyword("collectionFormat", + aka("collection format", "collection-format"), + asStringEnum("csv", "ssv", "tsv", "pipes", "multi"), + inParam("Array serialization format (csv, ssv, tsv, pipes, multi)."), + inHeader("Array serialization in the header (csv, ssv, tsv, pipes)."), + inItems("Nested-array serialization format."), + ), + + // --- value-bearing keywords (field-type-dependent) --- + + keyword("enum", + asCommaList(), + inParam("Allowed values for the parameter (comma-separated)."), + inHeader("Allowed values for the header (comma-separated)."), + inSchema("Allowed values for the property (comma-separated)."), + inItems("Allowed values for each array item (comma-separated)."), + ), + keyword("default", + asRawValue(), + inParam("Default value when the parameter is omitted."), + inHeader("Default value when the header is absent."), + inSchema("Default value when the property is absent."), + inItems("Default value for each array item."), + ), + keyword("example", + asRawValue(), + inParam("Example value for documentation."), + inHeader("Example value for documentation."), + inSchema("Example value for documentation."), + inItems("Example value for documentation."), + ), + + // --- property flags --- + + keyword("required", + asBoolean(), + inParam("Whether the parameter is required."), + inSchema("Whether the property is required."), + ), + keyword("readOnly", + aka("read only", "read-only"), + asBoolean(), + inSchema("Whether the property is read-only (server-set; clients may not write it)."), + ), + keyword("discriminator", + asBoolean(), + inSchema("Marks this property as the polymorphic-schema discriminator."), + ), + keyword("deprecated", + asBoolean(), + inOperation("Marks this operation as deprecated."), + inRoute("Marks this route as deprecated."), + inSchema("Marks this property as deprecated."), + ), + + // --- parameter-location directive --- + + keyword("in", + asStringEnum("query", "path", "header", "body", "formData"), + inParam("Parameter location: query, path, header, body, or formData."), + ), + + // --- meta single-line keywords --- + + keyword("schemes", + asCommaList(), + inMeta("API schemes (http, https, ws, wss)."), + inRoute("Route-level schemes override."), + inOperation("Operation-level schemes override."), + ), + keyword("version", + asString(), + inMeta("API version string."), + ), + keyword("host", + asString(), + inMeta("Host (and optional port) serving the API."), + ), + keyword("basePath", + aka("base path", "base-path"), + asString(), + inMeta("URL prefix for all API paths."), + ), + keyword("license", + asString(), + inMeta("License information (name, optional URL)."), + ), + keyword("contact", + aka("contact info", "contact-info"), + asString(), + inMeta("Contact information (name, email, URL)."), + ), + + // --- block-header keywords (multi-line bodies) --- + + keyword("consumes", + asRawBlock(), + inMeta("Default MIME types the API consumes."), + inRoute("MIME types this route consumes."), + inOperation("MIME types this operation consumes."), + ), + keyword("produces", + asRawBlock(), + inMeta("Default MIME types the API produces."), + inRoute("MIME types this route produces."), + inOperation("MIME types this operation produces."), + ), + keyword("security", + asRawBlock(), + inMeta("Default security requirements for the API."), + inRoute("Security requirements for this route."), + inOperation("Security requirements for this operation."), + ), + keyword("securityDefinitions", + aka("security definitions", "security-definitions"), + asRawBlock(), + inMeta("Declared security schemes (apiKey, basic, oauth2)."), + ), + keyword("responses", + asRawBlock(), + inRoute("Response mapping: status → response name."), + inOperation("Response mapping: status → response name."), + ), + keyword("parameters", + asRawBlock(), + inRoute("Parameter declarations for this route."), + inOperation("Parameter declarations for this operation."), + ), + keyword("extensions", + asRawBlock(), + inMeta("Custom x-* vendor extensions at the spec level."), + inRoute("Custom x-* vendor extensions on this route."), + inOperation("Custom x-* vendor extensions on this operation."), + inSchema("Custom x-* vendor extensions on this schema."), + ), + keyword("infoExtensions", + aka("info extensions", "info-extensions"), + asRawBlock(), + inMeta("Custom x-* vendor extensions on the info block."), + ), + keyword("tos", + aka("terms of service", "terms-of-service", "termsOfService"), + asRawBlock(), + inMeta("Terms-of-service URL or text."), + ), + + // --- W5 opportunistic: externalDocs --- + + keyword("externalDocs", + aka("external docs", "external-docs"), + asRawBlock(), + inMeta("External documentation reference."), + inRoute("External documentation reference."), + inOperation("External documentation reference."), + inSchema("External documentation reference."), + ), +} diff --git a/internal/parsers/grammar/keywords_test.go b/internal/parsers/grammar/keywords_test.go new file mode 100644 index 0000000..bcc1c2a --- /dev/null +++ b/internal/parsers/grammar/keywords_test.go @@ -0,0 +1,74 @@ +// SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers +// SPDX-License-Identifier: Apache-2.0 + +package grammar + +import "testing" + +func TestLookup(t *testing.T) { + cases := []struct { + input string + found bool + want string // canonical name expected + valType ValueType + }{ + {"maximum", true, "maximum", ValueNumber}, + {"MAX", true, "maximum", ValueNumber}, // alias, case-insensitive + {"max-length", true, "maxLength", ValueInteger}, // alias + {"collection format", true, "collectionFormat", ValueStringEnum}, + {"in", true, "in", ValueStringEnum}, + {"not-a-keyword", false, "", ValueNone}, + {"", false, "", ValueNone}, + {" maximum ", true, "maximum", ValueNumber}, // trims whitespace + } + + for _, tc := range cases { + t.Run(tc.input, func(t *testing.T) { + kw, ok := Lookup(tc.input) + if ok != tc.found { + t.Fatalf("Lookup(%q): found=%v want=%v", tc.input, ok, tc.found) + } + if !ok { + return + } + if kw.Name != tc.want { + t.Errorf("Lookup(%q): name=%q want=%q", tc.input, kw.Name, tc.want) + } + if kw.Value.Type != tc.valType { + t.Errorf("Lookup(%q): valueType=%d want=%d", tc.input, kw.Value.Type, tc.valType) + } + }) + } +} + +func TestKeywordsTableShape(t *testing.T) { + kws := Keywords() + if len(kws) < 30 { + t.Fatalf("keyword table unexpectedly small: %d entries", len(kws)) + } + + // Every keyword must have a canonical name and at least one legal context. + for _, kw := range kws { + if kw.Name == "" { + t.Errorf("keyword with empty Name: %+v", kw) + } + if len(kw.Contexts) == 0 { + t.Errorf("keyword %q has no legal contexts", kw.Name) + } + if kw.Value.Type == ValueStringEnum && len(kw.Value.Values) == 0 { + t.Errorf("keyword %q: ValueStringEnum requires Values", kw.Name) + } + } +} + +func TestKeywordsReturnsCopy(t *testing.T) { + a := Keywords() + b := Keywords() + if len(a) == 0 { + t.Fatal("empty keyword table") + } + a[0].Name = "mutated" + if b[0].Name == "mutated" { + t.Error("Keywords() must return a defensive copy") + } +} diff --git a/internal/parsers/grammar/lexer.go b/internal/parsers/grammar/lexer.go index 6d9db81..ed4c7d3 100644 --- a/internal/parsers/grammar/lexer.go +++ b/internal/parsers/grammar/lexer.go @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers +// SPDX-License-Identifier: Apache-2.0 + package grammar // TODO: P1 — tokenizer: []Line → token stream. diff --git a/internal/parsers/grammar/parser.go b/internal/parsers/grammar/parser.go index dcf86d9..f02a2b0 100644 --- a/internal/parsers/grammar/parser.go +++ b/internal/parsers/grammar/parser.go @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers +// SPDX-License-Identifier: Apache-2.0 + // Package grammar implements the v2 annotation parser. // // It replaces the regexp-based parser at internal/parsers/*.go with a diff --git a/internal/parsers/grammar/preprocess.go b/internal/parsers/grammar/preprocess.go index d1942eb..bf23217 100644 --- a/internal/parsers/grammar/preprocess.go +++ b/internal/parsers/grammar/preprocess.go @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers +// SPDX-License-Identifier: Apache-2.0 + package grammar // TODO: P1 — comment-prefix stripping on top of (*ast.CommentGroup).Text(); diff --git a/internal/parsers/grammar/style.go b/internal/parsers/grammar/style.go index eac7233..f42bed1 100644 --- a/internal/parsers/grammar/style.go +++ b/internal/parsers/grammar/style.go @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers +// SPDX-License-Identifier: Apache-2.0 + package grammar // TODO: P1 — StyleRecognizer plugin interface for annotation-style prefixes From cb40a3e0387ddc1982e42e981ad6506514b3ff3c Mon Sep 17 00:00:00 2001 From: Frederic BIDON Date: Tue, 21 Apr 2026 13:51:05 +0200 Subject: [PATCH 03/46] feat(grammar): P0.3 diagnostic types Add Severity (Error/Warning/Hint), Code (dotted stable identifier), and Diagnostic{Pos, Severity, Code, Message}. Codes prefixed "parse." for the grammar layer; sub-parser subpackages use their own prefix so codes stay globally unique. Pre-declare 10 codes the parser and its analyzers will emit in P1/P2 (invalid-number/integer/boolean/string-enum, unknown-keyword, context-invalid, invalid-extension-name, unterminated-yaml, invalid-annotation, malformed-line). The list grows as sites surface. Expose Errorf/Warnf/Hintf constructors (formatted Message) and a compiler-style Diagnostic.String() rendering so editor jump-to-line tooling can consume the output directly. Tests cover Severity.String, each constructor, the render format, and the empty-position fallback. Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Frederic BIDON --- internal/parsers/grammar/diagnostic.go | 95 ++++++++++++++++++++- internal/parsers/grammar/diagnostic_test.go | 62 ++++++++++++++ 2 files changed, 154 insertions(+), 3 deletions(-) create mode 100644 internal/parsers/grammar/diagnostic_test.go diff --git a/internal/parsers/grammar/diagnostic.go b/internal/parsers/grammar/diagnostic.go index 91f5f27..6720c88 100644 --- a/internal/parsers/grammar/diagnostic.go +++ b/internal/parsers/grammar/diagnostic.go @@ -3,6 +3,95 @@ package grammar -// TODO: P1 — Diagnostic{Pos, Severity, Code, Message}, Severity enum -// (Error/Warning/Hint), code convention (parse.invalid-number, -// parse.unknown-keyword, parse.context-invalid, parse.invalid-extension-name, …). +import ( + "fmt" + "go/token" +) + +// Severity classifies a Diagnostic's seriousness. The parser never +// aborts; callers (analyzers, LSP, the CLI) decide policy based on +// severity. +type Severity int + +const ( + SeverityError Severity = iota // parse failed to interpret the line at the reported position + SeverityWarning // parse succeeded but something looks wrong (e.g., deprecated alias) + SeverityHint // informational, e.g., suggest a canonical spelling +) + +// String renders a Severity for logs and CLI output. +func (s Severity) String() string { + switch s { + case SeverityError: + return "error" + case SeverityWarning: + return "warning" + case SeverityHint: + return "hint" + default: + return fmt.Sprintf("severity(%d)", int(s)) + } +} + +// Code is a stable identifier for a class of Diagnostic. LSP clients +// filter and group by Code; tests assert on Code rather than Message. +// +// Codes are dotted, lowercase, and prefixed with "parse." for parser-side +// diagnostics. Downstream sub-parsers (e.g., internal/parsers/yaml/) use +// their own prefix (e.g., "yaml.") so codes remain globally unique. +type Code string + +// Known diagnostic codes emitted by the grammar parser. This list grows +// as P1–P2 land; every new emission site picks an existing code or adds +// a constant here so the set stays discoverable. +const ( + CodeInvalidNumber Code = "parse.invalid-number" + CodeInvalidInteger Code = "parse.invalid-integer" + CodeInvalidBoolean Code = "parse.invalid-boolean" + CodeInvalidStringEnum Code = "parse.invalid-string-enum" + CodeUnknownKeyword Code = "parse.unknown-keyword" + CodeContextInvalid Code = "parse.context-invalid" + CodeInvalidExtension Code = "parse.invalid-extension-name" + CodeUnterminatedYAML Code = "parse.unterminated-yaml" + CodeInvalidAnnotation Code = "parse.invalid-annotation" + CodeMalformedLine Code = "parse.malformed-line" +) + +// Diagnostic is one observation about a comment block. The parser +// accumulates Diagnostics into a slice on the enclosing Block (see +// ast.go) rather than throwing; callers decide whether a given severity +// is fatal for their flow. +type Diagnostic struct { + Pos token.Position + Severity Severity + Code Code + Message string +} + +// String renders a Diagnostic in a compiler-style, one-line form: +// +// file:line:col: severity: message [code] +// +// Keep the format parseable by editor jump-to-line tooling. +func (d Diagnostic) String() string { + loc := d.Pos.String() + if loc == "-" || loc == "" { + loc = "" + } + return fmt.Sprintf("%s: %s: %s [%s]", loc, d.Severity, d.Message, d.Code) +} + +// Errorf builds a SeverityError Diagnostic with a formatted message. +func Errorf(pos token.Position, code Code, format string, args ...any) Diagnostic { + return Diagnostic{Pos: pos, Severity: SeverityError, Code: code, Message: fmt.Sprintf(format, args...)} +} + +// Warnf builds a SeverityWarning Diagnostic with a formatted message. +func Warnf(pos token.Position, code Code, format string, args ...any) Diagnostic { + return Diagnostic{Pos: pos, Severity: SeverityWarning, Code: code, Message: fmt.Sprintf(format, args...)} +} + +// Hintf builds a SeverityHint Diagnostic with a formatted message. +func Hintf(pos token.Position, code Code, format string, args ...any) Diagnostic { + return Diagnostic{Pos: pos, Severity: SeverityHint, Code: code, Message: fmt.Sprintf(format, args...)} +} diff --git a/internal/parsers/grammar/diagnostic_test.go b/internal/parsers/grammar/diagnostic_test.go new file mode 100644 index 0000000..849962b --- /dev/null +++ b/internal/parsers/grammar/diagnostic_test.go @@ -0,0 +1,62 @@ +// SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers +// SPDX-License-Identifier: Apache-2.0 + +package grammar + +import ( + "go/token" + "strings" + "testing" +) + +func TestSeverityString(t *testing.T) { + cases := []struct { + in Severity + want string + }{ + {SeverityError, "error"}, + {SeverityWarning, "warning"}, + {SeverityHint, "hint"}, + {Severity(99), "severity(99)"}, + } + for _, tc := range cases { + if got := tc.in.String(); got != tc.want { + t.Errorf("Severity(%d).String() = %q want %q", int(tc.in), got, tc.want) + } + } +} + +func TestDiagnosticConstructors(t *testing.T) { + pos := token.Position{Filename: "foo.go", Line: 12, Column: 3} + + err := Errorf(pos, CodeInvalidNumber, "bad %s", "value") + if err.Severity != SeverityError || err.Code != CodeInvalidNumber || err.Message != "bad value" { + t.Errorf("Errorf built unexpected Diagnostic: %+v", err) + } + + warn := Warnf(pos, CodeContextInvalid, "context mismatch") + if warn.Severity != SeverityWarning || warn.Code != CodeContextInvalid { + t.Errorf("Warnf built unexpected Diagnostic: %+v", warn) + } + + hint := Hintf(pos, CodeUnknownKeyword, "did you mean %q?", "maximum") + if hint.Severity != SeverityHint || hint.Message != `did you mean "maximum"?` { + t.Errorf("Hintf built unexpected Diagnostic: %+v", hint) + } +} + +func TestDiagnosticString(t *testing.T) { + pos := token.Position{Filename: "foo.go", Line: 12, Column: 3} + d := Errorf(pos, CodeInvalidNumber, "bad value") + got := d.String() + want := "foo.go:12:3: error: bad value [parse.invalid-number]" + if got != want { + t.Errorf("Diagnostic.String()\n got: %s\nwant: %s", got, want) + } + + // Position-less diagnostic should still render. + d2 := Errorf(token.Position{}, CodeInvalidNumber, "bad value") + if !strings.Contains(d2.String(), "") { + t.Errorf("empty position should render as : %s", d2.String()) + } +} From d6cf9976217e8bd6f32f16c781072fe228c098dd Mon Sep 17 00:00:00 2001 From: Frederic BIDON Date: Tue, 21 Apr 2026 13:54:34 +0200 Subject: [PATCH 04/46] feat(grammar): P0.4 docs generator for keyword table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add internal/parsers/grammar/gen — a small command that reads the authoritative Keywords() slice at `go generate` time and renders it to docs/annotation-keywords.md (summary table + per-keyword details with aliases, value type, legal contexts, per-context docs). - //go:generate directive lives in keywords_table.go, runs `go run ./gen -out ../../../docs/annotation-keywords.md`. - Output is deterministic (same input -> byte-identical file), which P0.5 will enforce in CI. - Named constants for exit codes and perms avoid mnd lint flags; no nolint directives needed. Supporting additions: - Kind.String() and ValueType.String() — labels used by the generator (and later by P1.7 context-invalid diagnostic messages). - `exhaustive` lint satisfied by explicit `KindUnknown`/`ValueNone` cases with `fallthrough` to default. Generated docs/annotation-keywords.md covers the 34 v1-parity keywords, rendered at 326 lines. Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Frederic BIDON --- docs/annotation-keywords.md | 326 +++++++++++++++++++++ internal/parsers/grammar/gen/main.go | 151 ++++++++++ internal/parsers/grammar/keywords.go | 53 ++++ internal/parsers/grammar/keywords_table.go | 2 + 4 files changed, 532 insertions(+) create mode 100644 docs/annotation-keywords.md create mode 100644 internal/parsers/grammar/gen/main.go diff --git a/docs/annotation-keywords.md b/docs/annotation-keywords.md new file mode 100644 index 0000000..db5304a --- /dev/null +++ b/docs/annotation-keywords.md @@ -0,0 +1,326 @@ + + + +# Annotation keywords + +This document catalogs the 34 `keyword: value` forms recognized by +the v2 grammar parser. + +## Summary + +| Keyword | Aliases | Value type | Legal in | +|---------|---------|------------|----------| +| `maximum` | `max` | `number` | param, header, schema, items | +| `minimum` | `min` | `number` | param, header, schema, items | +| `multipleOf` | `multiple of`, `multiple-of` | `number` | param, header, schema, items | +| `maxLength` | `max length`, `max-length`, `maxLen`, `max len`, `max-len` | `integer` | param, header, schema, items | +| `minLength` | `min length`, `min-length`, `minLen`, `min len`, `min-len` | `integer` | param, header, schema, items | +| `pattern` | — | `string` | param, header, schema, items | +| `maxItems` | `max items`, `max-items`, `max.items` | `integer` | param, header, schema, items | +| `minItems` | `min items`, `min-items`, `min.items` | `integer` | param, header, schema, items | +| `unique` | — | `boolean` | param, header, schema, items | +| `collectionFormat` | `collection format`, `collection-format` | `string-enum` | param, header, items | +| `enum` | — | `comma-list` | param, header, schema, items | +| `default` | — | `raw-value` | param, header, schema, items | +| `example` | — | `raw-value` | param, header, schema, items | +| `required` | — | `boolean` | param, schema | +| `readOnly` | `read only`, `read-only` | `boolean` | schema | +| `discriminator` | — | `boolean` | schema | +| `deprecated` | — | `boolean` | operation, route, schema | +| `in` | — | `string-enum` | param | +| `schemes` | — | `comma-list` | meta, route, operation | +| `version` | — | `string` | meta | +| `host` | — | `string` | meta | +| `basePath` | `base path`, `base-path` | `string` | meta | +| `license` | — | `string` | meta | +| `contact` | `contact info`, `contact-info` | `string` | meta | +| `consumes` | — | `raw-block` | meta, route, operation | +| `produces` | — | `raw-block` | meta, route, operation | +| `security` | — | `raw-block` | meta, route, operation | +| `securityDefinitions` | `security definitions`, `security-definitions` | `raw-block` | meta | +| `responses` | — | `raw-block` | route, operation | +| `parameters` | — | `raw-block` | route, operation | +| `extensions` | — | `raw-block` | meta, route, operation, schema | +| `infoExtensions` | `info extensions`, `info-extensions` | `raw-block` | meta | +| `tos` | `terms of service`, `terms-of-service`, `termsOfService` | `raw-block` | meta | +| `externalDocs` | `external docs`, `external-docs` | `raw-block` | meta, route, operation, schema | + +## Details + +### `maximum` + +- **Aliases:** `max` +- **Value type:** `number` +- **Legal contexts:** + - `param` — Maximum value of the parameter (inclusive by default). + - `header` — Maximum value of the header (inclusive by default). + - `schema` — Maximum value of the property (inclusive by default). + - `items` — Maximum value of each array item (inclusive by default). + +### `minimum` + +- **Aliases:** `min` +- **Value type:** `number` +- **Legal contexts:** + - `param` — Minimum value of the parameter (inclusive by default). + - `header` — Minimum value of the header (inclusive by default). + - `schema` — Minimum value of the property (inclusive by default). + - `items` — Minimum value of each array item (inclusive by default). + +### `multipleOf` + +- **Aliases:** `multiple of`, `multiple-of` +- **Value type:** `number` +- **Legal contexts:** + - `param` — Parameter value must be a multiple of this number. + - `header` — Header value must be a multiple of this number. + - `schema` — Property value must be a multiple of this number. + - `items` — Each array item must be a multiple of this number. + +### `maxLength` + +- **Aliases:** `max length`, `max-length`, `maxLen`, `max len`, `max-len` +- **Value type:** `integer` +- **Legal contexts:** + - `param` — Maximum length of the string parameter. + - `header` — Maximum length of the header. + - `schema` — Maximum length of the string property. + - `items` — Maximum length of each string item. + +### `minLength` + +- **Aliases:** `min length`, `min-length`, `minLen`, `min len`, `min-len` +- **Value type:** `integer` +- **Legal contexts:** + - `param` — Minimum length of the string parameter. + - `header` — Minimum length of the header. + - `schema` — Minimum length of the string property. + - `items` — Minimum length of each string item. + +### `pattern` + +- **Value type:** `string` +- **Legal contexts:** + - `param` — Regular expression the parameter must match. + - `header` — Regular expression the header must match. + - `schema` — Regular expression the property must match. + - `items` — Regular expression each array item must match. + +### `maxItems` + +- **Aliases:** `max items`, `max-items`, `max.items` +- **Value type:** `integer` +- **Legal contexts:** + - `param` — Maximum number of items in the parameter array. + - `header` — Maximum number of items in the header array. + - `schema` — Maximum number of items in the array property. + - `items` — Maximum number of items at this nesting level. + +### `minItems` + +- **Aliases:** `min items`, `min-items`, `min.items` +- **Value type:** `integer` +- **Legal contexts:** + - `param` — Minimum number of items in the parameter array. + - `header` — Minimum number of items in the header array. + - `schema` — Minimum number of items in the array property. + - `items` — Minimum number of items at this nesting level. + +### `unique` + +- **Value type:** `boolean` +- **Legal contexts:** + - `param` — Whether items in the parameter array must be unique. + - `header` — Whether items in the header array must be unique. + - `schema` — Whether items in the array property must be unique. + - `items` — Whether items at this level must be unique. + +### `collectionFormat` + +- **Aliases:** `collection format`, `collection-format` +- **Value type:** `string-enum` (one of: `csv`, `ssv`, `tsv`, `pipes`, `multi`) +- **Legal contexts:** + - `param` — Array serialization format (csv, ssv, tsv, pipes, multi). + - `header` — Array serialization in the header (csv, ssv, tsv, pipes). + - `items` — Nested-array serialization format. + +### `enum` + +- **Value type:** `comma-list` +- **Legal contexts:** + - `param` — Allowed values for the parameter (comma-separated). + - `header` — Allowed values for the header (comma-separated). + - `schema` — Allowed values for the property (comma-separated). + - `items` — Allowed values for each array item (comma-separated). + +### `default` + +- **Value type:** `raw-value` +- **Legal contexts:** + - `param` — Default value when the parameter is omitted. + - `header` — Default value when the header is absent. + - `schema` — Default value when the property is absent. + - `items` — Default value for each array item. + +### `example` + +- **Value type:** `raw-value` +- **Legal contexts:** + - `param` — Example value for documentation. + - `header` — Example value for documentation. + - `schema` — Example value for documentation. + - `items` — Example value for documentation. + +### `required` + +- **Value type:** `boolean` +- **Legal contexts:** + - `param` — Whether the parameter is required. + - `schema` — Whether the property is required. + +### `readOnly` + +- **Aliases:** `read only`, `read-only` +- **Value type:** `boolean` +- **Legal contexts:** + - `schema` — Whether the property is read-only (server-set; clients may not write it). + +### `discriminator` + +- **Value type:** `boolean` +- **Legal contexts:** + - `schema` — Marks this property as the polymorphic-schema discriminator. + +### `deprecated` + +- **Value type:** `boolean` +- **Legal contexts:** + - `operation` — Marks this operation as deprecated. + - `route` — Marks this route as deprecated. + - `schema` — Marks this property as deprecated. + +### `in` + +- **Value type:** `string-enum` (one of: `query`, `path`, `header`, `body`, `formData`) +- **Legal contexts:** + - `param` — Parameter location: query, path, header, body, or formData. + +### `schemes` + +- **Value type:** `comma-list` +- **Legal contexts:** + - `meta` — API schemes (http, https, ws, wss). + - `route` — Route-level schemes override. + - `operation` — Operation-level schemes override. + +### `version` + +- **Value type:** `string` +- **Legal contexts:** + - `meta` — API version string. + +### `host` + +- **Value type:** `string` +- **Legal contexts:** + - `meta` — Host (and optional port) serving the API. + +### `basePath` + +- **Aliases:** `base path`, `base-path` +- **Value type:** `string` +- **Legal contexts:** + - `meta` — URL prefix for all API paths. + +### `license` + +- **Value type:** `string` +- **Legal contexts:** + - `meta` — License information (name, optional URL). + +### `contact` + +- **Aliases:** `contact info`, `contact-info` +- **Value type:** `string` +- **Legal contexts:** + - `meta` — Contact information (name, email, URL). + +### `consumes` + +- **Value type:** `raw-block` +- **Legal contexts:** + - `meta` — Default MIME types the API consumes. + - `route` — MIME types this route consumes. + - `operation` — MIME types this operation consumes. + +### `produces` + +- **Value type:** `raw-block` +- **Legal contexts:** + - `meta` — Default MIME types the API produces. + - `route` — MIME types this route produces. + - `operation` — MIME types this operation produces. + +### `security` + +- **Value type:** `raw-block` +- **Legal contexts:** + - `meta` — Default security requirements for the API. + - `route` — Security requirements for this route. + - `operation` — Security requirements for this operation. + +### `securityDefinitions` + +- **Aliases:** `security definitions`, `security-definitions` +- **Value type:** `raw-block` +- **Legal contexts:** + - `meta` — Declared security schemes (apiKey, basic, oauth2). + +### `responses` + +- **Value type:** `raw-block` +- **Legal contexts:** + - `route` — Response mapping: status → response name. + - `operation` — Response mapping: status → response name. + +### `parameters` + +- **Value type:** `raw-block` +- **Legal contexts:** + - `route` — Parameter declarations for this route. + - `operation` — Parameter declarations for this operation. + +### `extensions` + +- **Value type:** `raw-block` +- **Legal contexts:** + - `meta` — Custom x-* vendor extensions at the spec level. + - `route` — Custom x-* vendor extensions on this route. + - `operation` — Custom x-* vendor extensions on this operation. + - `schema` — Custom x-* vendor extensions on this schema. + +### `infoExtensions` + +- **Aliases:** `info extensions`, `info-extensions` +- **Value type:** `raw-block` +- **Legal contexts:** + - `meta` — Custom x-* vendor extensions on the info block. + +### `tos` + +- **Aliases:** `terms of service`, `terms-of-service`, `termsOfService` +- **Value type:** `raw-block` +- **Legal contexts:** + - `meta` — Terms-of-service URL or text. + +### `externalDocs` + +- **Aliases:** `external docs`, `external-docs` +- **Value type:** `raw-block` +- **Legal contexts:** + - `meta` — External documentation reference. + - `route` — External documentation reference. + - `operation` — External documentation reference. + - `schema` — External documentation reference. + diff --git a/internal/parsers/grammar/gen/main.go b/internal/parsers/grammar/gen/main.go new file mode 100644 index 0000000..d453c62 --- /dev/null +++ b/internal/parsers/grammar/gen/main.go @@ -0,0 +1,151 @@ +// SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers +// SPDX-License-Identifier: Apache-2.0 + +// Command gen renders the authoritative keyword table +// (internal/parsers/grammar/keywords_table.go) as a human-readable +// markdown reference at docs/annotation-keywords.md. +// +// It is invoked via `go generate ./internal/parsers/grammar/...` and +// produces output deterministically: the same input produces the same +// output, byte for byte. CI checks that running the generator leaves +// the committed file unchanged. +// +// Usage: +// +// go run ./gen -out ../../../docs/annotation-keywords.md +package main + +import ( + "bytes" + "flag" + "fmt" + "io" + "os" + "path/filepath" + "strings" + + "github.com/go-openapi/codescan/internal/parsers/grammar" +) + +const ( + exitUsage = 2 + exitError = 1 + + // Markdown output is committed to git and read by humans / CI; standard + // file/dir perms. The file itself contains no secrets. + fileMode = 0o644 + dirMode = 0o755 +) + +func main() { + out := flag.String("out", "", "output markdown file (required)") + flag.Parse() + + if *out == "" { + fmt.Fprintln(os.Stderr, "gen: -out is required") + os.Exit(exitUsage) + } + + var buf bytes.Buffer + render(&buf, grammar.Keywords()) + + if err := writeFile(*out, buf.Bytes()); err != nil { + fmt.Fprintf(os.Stderr, "gen: %v\n", err) + os.Exit(exitError) + } +} + +func writeFile(path string, data []byte) error { + if err := os.MkdirAll(filepath.Dir(path), dirMode); err != nil { + return err + } + return os.WriteFile(path, data, fileMode) +} + +func render(w io.Writer, kws []grammar.Keyword) { + fmt.Fprintln(w, "") + fmt.Fprintln(w, "") + fmt.Fprintln(w) + fmt.Fprintln(w, "# Annotation keywords") + fmt.Fprintln(w) + fmt.Fprintf(w, "This document catalogs the %d `keyword: value` forms recognized by\n", len(kws)) + fmt.Fprintln(w, "the v2 grammar parser.") + fmt.Fprintln(w) + + renderSummary(w, kws) + renderDetails(w, kws) +} + +func renderSummary(w io.Writer, kws []grammar.Keyword) { + fmt.Fprintln(w, "## Summary") + fmt.Fprintln(w) + fmt.Fprintln(w, "| Keyword | Aliases | Value type | Legal in |") + fmt.Fprintln(w, "|---------|---------|------------|----------|") + for _, kw := range kws { + fmt.Fprintf(w, "| `%s` | %s | `%s` | %s |\n", + kw.Name, + formatAliases(kw.Aliases), + kw.Value.Type, + formatContexts(kw.Contexts), + ) + } + fmt.Fprintln(w) +} + +func renderDetails(w io.Writer, kws []grammar.Keyword) { + fmt.Fprintln(w, "## Details") + fmt.Fprintln(w) + for _, kw := range kws { + fmt.Fprintf(w, "### `%s`\n\n", kw.Name) + + if len(kw.Aliases) > 0 { + fmt.Fprintf(w, "- **Aliases:** %s\n", formatAliases(kw.Aliases)) + } + fmt.Fprintf(w, "- **Value type:** `%s`", kw.Value.Type) + if len(kw.Value.Values) > 0 { + fmt.Fprintf(w, " (one of: %s)", formatEnumValues(kw.Value.Values)) + } + fmt.Fprintln(w) + + fmt.Fprintln(w, "- **Legal contexts:**") + for _, ctx := range kw.Contexts { + fmt.Fprintf(w, " - `%s` — %s\n", ctx.Kind, ctx.Doc) + } + + if kw.Doc != "" { + fmt.Fprintf(w, "\n%s\n", kw.Doc) + } + fmt.Fprintln(w) + } +} + +func formatAliases(aliases []string) string { + if len(aliases) == 0 { + return "—" + } + out := make([]string, len(aliases)) + for i, a := range aliases { + out[i] = "`" + a + "`" + } + return strings.Join(out, ", ") +} + +func formatContexts(ctxs []grammar.ContextDoc) string { + if len(ctxs) == 0 { + return "—" + } + out := make([]string, len(ctxs)) + for i, c := range ctxs { + out[i] = c.Kind.String() + } + return strings.Join(out, ", ") +} + +func formatEnumValues(vs []string) string { + out := make([]string, len(vs)) + for i, v := range vs { + out[i] = "`" + v + "`" + } + return strings.Join(out, ", ") +} diff --git a/internal/parsers/grammar/keywords.go b/internal/parsers/grammar/keywords.go index bba7baa..e00741a 100644 --- a/internal/parsers/grammar/keywords.go +++ b/internal/parsers/grammar/keywords.go @@ -64,6 +64,33 @@ const ( KindResponse // a response-level property (status, description, …) ) +// String renders a Kind as the lowercase label used in diagnostics and +// docs. +func (k Kind) String() string { + switch k { + case KindParam: + return "param" + case KindHeader: + return "header" + case KindSchema: + return "schema" + case KindItems: + return "items" + case KindRoute: + return "route" + case KindOperation: + return "operation" + case KindMeta: + return "meta" + case KindResponse: + return "response" + case KindUnknown: + fallthrough + default: + return "unknown" + } +} + // ValueType categorizes the expected shape of a keyword's value. Primitive // types (Number, Integer, Boolean, StringEnum) are converted inside the // parser at parse time; RawValue defers type-conversion to the analyzer @@ -83,6 +110,32 @@ const ( ValueRawValue // raw string; analyzer type-converts per field Go type ) +// String renders a ValueType as the label used in diagnostics and docs. +func (v ValueType) String() string { + switch v { + case ValueNumber: + return "number" + case ValueInteger: + return "integer" + case ValueBoolean: + return "boolean" + case ValueString: + return "string" + case ValueCommaList: + return "comma-list" + case ValueStringEnum: + return "string-enum" + case ValueRawBlock: + return "raw-block" + case ValueRawValue: + return "raw-value" + case ValueNone: + fallthrough + default: + return "none" + } +} + // Keyword describes one recognizable `keyword: value` form. type Keyword struct { Name string diff --git a/internal/parsers/grammar/keywords_table.go b/internal/parsers/grammar/keywords_table.go index ab72cf0..c3536ed 100644 --- a/internal/parsers/grammar/keywords_table.go +++ b/internal/parsers/grammar/keywords_table.go @@ -1,6 +1,8 @@ // SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers // SPDX-License-Identifier: Apache-2.0 +//go:generate go run ./gen -out ../../../docs/annotation-keywords.md + package grammar // keywords is the authoritative table of `keyword: value` forms the From 4cfc5a3b70ebdd02e89f6d9eb14adcfec75cea45 Mon Sep 17 00:00:00 2001 From: Frederic BIDON Date: Tue, 21 Apr 2026 13:56:18 +0200 Subject: [PATCH 05/46] feat(grammar): P0.5 CI drift check for annotation-keywords.md Expose the generator's Render() function and add gen_test.go with TestGeneratedDocIsCurrent: reads the committed docs/annotation-keywords.md and compares it byte-for-byte against a fresh render of the current keyword table. If the table changes and the doc isn't regenerated, CI fails with an actionable message telling the developer to run go generate and commit. Rationale for putting the check in a _test.go rather than a dedicated workflow: this repo delegates CI plumbing to go-openapi/ci-workflows' shared workflows. Adding a bespoke workflow just for this check would break that pattern for a one-line assertion. A unit test runs as part of the existing go-test job at zero ceremony. Completes P0. Next: P1 core parser pipeline. Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Frederic BIDON --- internal/parsers/grammar/gen/gen_test.go | 42 ++++++++++++++++++++++++ internal/parsers/grammar/gen/main.go | 6 ++-- 2 files changed, 46 insertions(+), 2 deletions(-) create mode 100644 internal/parsers/grammar/gen/gen_test.go diff --git a/internal/parsers/grammar/gen/gen_test.go b/internal/parsers/grammar/gen/gen_test.go new file mode 100644 index 0000000..2ee239d --- /dev/null +++ b/internal/parsers/grammar/gen/gen_test.go @@ -0,0 +1,42 @@ +// SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers +// SPDX-License-Identifier: Apache-2.0 + +package main + +import ( + "bytes" + "os" + "path/filepath" + "testing" + + "github.com/go-openapi/codescan/internal/parsers/grammar" +) + +// TestGeneratedDocIsCurrent verifies that re-running the generator +// against the current keyword table produces a byte-identical +// docs/annotation-keywords.md. If this test fails, run: +// +// go generate ./internal/parsers/grammar/... +// +// and commit the updated docs file. +func TestGeneratedDocIsCurrent(t *testing.T) { + // Test runs in internal/parsers/grammar/gen/, so climb 4 levels. + docPath := filepath.Join("..", "..", "..", "..", "docs", "annotation-keywords.md") + + committed, err := os.ReadFile(docPath) + if err != nil { + t.Fatalf("read committed doc: %v", err) + } + + var got bytes.Buffer + Render(&got, grammar.Keywords()) + + if !bytes.Equal(committed, got.Bytes()) { + t.Fatalf( + "docs/annotation-keywords.md is out of sync with keywords_table.go.\n"+ + "Regenerate with: go generate ./internal/parsers/grammar/...\n"+ + "(committed=%d bytes, generated=%d bytes)", + len(committed), got.Len(), + ) + } +} diff --git a/internal/parsers/grammar/gen/main.go b/internal/parsers/grammar/gen/main.go index d453c62..ae1200b 100644 --- a/internal/parsers/grammar/gen/main.go +++ b/internal/parsers/grammar/gen/main.go @@ -47,7 +47,7 @@ func main() { } var buf bytes.Buffer - render(&buf, grammar.Keywords()) + Render(&buf, grammar.Keywords()) if err := writeFile(*out, buf.Bytes()); err != nil { fmt.Fprintf(os.Stderr, "gen: %v\n", err) @@ -62,7 +62,9 @@ func writeFile(path string, data []byte) error { return os.WriteFile(path, data, fileMode) } -func render(w io.Writer, kws []grammar.Keyword) { +// Render writes the markdown representation of the given keyword set to +// w. Exported so drift-check tests can invoke it without calling main. +func Render(w io.Writer, kws []grammar.Keyword) { fmt.Fprintln(w, "") fmt.Fprintln(w, "") From db73bf74bc54ed524b8294f08a79654b94f8fbcc Mon Sep 17 00:00:00 2001 From: Frederic BIDON Date: Tue, 21 Apr 2026 14:34:15 +0200 Subject: [PATCH 06/46] feat(grammar): P1.1 comment preprocessor MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add Preprocess(cg, fset) -> []Line{Text, Pos}. Handles both // and /* */ forms, including multi-line blocks with continuation asterisks. Leading godoc decoration (whitespace, *, /, -, optional markdown table pipe) is stripped via trimContentPrefix, mirroring the v1 rxUncommentHeaders regex so fixtures stay parity-compatible at the parse-output level. Position tracking: each Line carries the token.Position of the first character of Text. Continuation lines inside a /* ... */ block report Column=1 for simplicity; precise column reconstruction would require re-tokenising and is deferred until LSP needs it. Fence-body indentation is not preserved here — fence state lives at the lexer layer (P1.2), so the preprocessor stays stateless and position-only. Documented in the godoc. Tests cover: - nil CommentGroup / FileSet returns nil - single-line and multi-line // comments - /* ... */ blocks with leading '*' decorations - markdown table-pipe stripping (and whitespace after the pipe) - embedded whitespace preserved inside Text - multiple *ast.Comment entries in one CommentGroup Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Frederic BIDON --- internal/parsers/grammar/preprocess.go | 88 +++++++++- internal/parsers/grammar/preprocess_test.go | 174 ++++++++++++++++++++ 2 files changed, 260 insertions(+), 2 deletions(-) create mode 100644 internal/parsers/grammar/preprocess_test.go diff --git a/internal/parsers/grammar/preprocess.go b/internal/parsers/grammar/preprocess.go index bf23217..66d878e 100644 --- a/internal/parsers/grammar/preprocess.go +++ b/internal/parsers/grammar/preprocess.go @@ -3,5 +3,89 @@ package grammar -// TODO: P1 — comment-prefix stripping on top of (*ast.CommentGroup).Text(); -// emits []Line{Text, Pos} with position tracking and markdown table-pipe handling. +import ( + "go/ast" + "go/token" + "strings" +) + +// Line is one preprocessed comment line ready for the lexer. +// +// Text has the Go comment markers (// /* */) stripped, along with +// leading continuation decorations common in godoc comments (spaces, +// tabs, asterisks, slashes, dashes, optional markdown table pipe). +// Internal content and embedded whitespace are preserved — fence-body +// indentation handling lives at the lexer layer where fence state is +// tracked. +// +// Pos is the position of Text's first character in the source file. +// For continuation lines inside a /* … */ block, the column is +// approximated to 1 — exact column reconstruction would require +// re-tokenising the comment body and is deferred until LSP needs it. +type Line struct { + Text string + Pos token.Position +} + +// Preprocess turns a comment group into a position-tagged []Line. +// +// Nil CommentGroup or FileSet returns nil. The function is pure: it +// makes no syscalls, allocates a slice proportional to the number of +// physical lines, and is safe for concurrent use. +// +// See architecture §3.1 (stage diagram) and tasks P1.1. +func Preprocess(cg *ast.CommentGroup, fset *token.FileSet) []Line { + if cg == nil || fset == nil { + return nil + } + var out []Line + for _, c := range cg.List { + out = append(out, stripComment(c.Text, fset.Position(c.Slash))...) + } + return out +} + +// stripComment returns one Line per physical source line of a single +// *ast.Comment. It handles both the `//` line-comment form and the +// `/* … */` block form, including multi-line blocks. +func stripComment(raw string, basePos token.Position) []Line { + switch { + case strings.HasPrefix(raw, "//"): + text := trimContentPrefix(strings.TrimPrefix(raw, "//")) + return []Line{{Text: text, Pos: basePos}} + case strings.HasPrefix(raw, "/*"): + body := strings.TrimSuffix(strings.TrimPrefix(raw, "/*"), "*/") + rawLines := strings.Split(body, "\n") + out := make([]Line, 0, len(rawLines)) + for i, r := range rawLines { + pos := basePos + pos.Line += i + if i > 0 { + pos.Column = 1 + } + out = append(out, Line{Text: trimContentPrefix(r), Pos: pos}) + } + return out + default: + // Not a valid Go comment; preserve input defensively so + // downstream layers can surface a diagnostic rather than + // silently lose data. + return []Line{{Text: raw, Pos: basePos}} + } +} + +// trimContentPrefix removes the leading godoc-style decoration that +// precedes real content on a comment line: +// - whitespace (space, tab) +// - continuation slashes and asterisks (“//“, “ * “, “ * “) +// - dashes (“ -- “) +// - an optional single markdown table pipe “|“ +// +// The set mirrors the v1 parser's rxUncommentHeaders so migrated +// fixtures match byte-for-byte at the parse-output level (pre-P5 +// parity harness). +func trimContentPrefix(s string) string { + s = strings.TrimLeft(s, " \t*/-") + s = strings.TrimPrefix(s, "|") + return strings.TrimLeft(s, " \t") +} diff --git a/internal/parsers/grammar/preprocess_test.go b/internal/parsers/grammar/preprocess_test.go new file mode 100644 index 0000000..01cb82a --- /dev/null +++ b/internal/parsers/grammar/preprocess_test.go @@ -0,0 +1,174 @@ +// SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers +// SPDX-License-Identifier: Apache-2.0 + +package grammar + +import ( + "go/ast" + "go/parser" + "go/token" + "testing" +) + +// parseSource is a test helper that parses a Go source file and returns +// the comment group attached to its first top-level declaration, plus +// the FileSet used during parsing. +func parseSource(t *testing.T, src string) (*ast.CommentGroup, *token.FileSet) { + t.Helper() + fset := token.NewFileSet() + f, err := parser.ParseFile(fset, "test.go", src, parser.ParseComments) + if err != nil { + t.Fatalf("parse: %v", err) + } + if len(f.Decls) == 0 { + t.Fatal("no decls in test source") + } + switch d := f.Decls[0].(type) { + case *ast.GenDecl: + if d.Doc != nil { + return d.Doc, fset + } + case *ast.FuncDecl: + if d.Doc != nil { + return d.Doc, fset + } + } + t.Fatal("decl has no doc comment") + return nil, nil +} + +func TestPreprocessNil(t *testing.T) { + if got := Preprocess(nil, token.NewFileSet()); got != nil { + t.Errorf("nil CommentGroup: want nil, got %v", got) + } + cg := &ast.CommentGroup{} + if got := Preprocess(cg, nil); got != nil { + t.Errorf("nil FileSet: want nil, got %v", got) + } +} + +func TestPreprocessSingleLineComment(t *testing.T) { + cg, fset := parseSource(t, "package p\n\n// swagger:model Foo\ntype Foo struct{}\n") + lines := Preprocess(cg, fset) + if len(lines) != 1 { + t.Fatalf("want 1 line, got %d: %+v", len(lines), lines) + } + if lines[0].Text != "swagger:model Foo" { + t.Errorf("text: got %q want %q", lines[0].Text, "swagger:model Foo") + } + if lines[0].Pos.Line != 3 { + t.Errorf("line: got %d want 3", lines[0].Pos.Line) + } +} + +func TestPreprocessMultipleLineComments(t *testing.T) { + src := `package p + +// swagger:model Foo +// maximum: 10 +// minimum: 0 +type Foo int +` + cg, fset := parseSource(t, src) + lines := Preprocess(cg, fset) + want := []string{"swagger:model Foo", "maximum: 10", "minimum: 0"} + if len(lines) != len(want) { + t.Fatalf("want %d lines, got %d", len(want), len(lines)) + } + for i, w := range want { + if lines[i].Text != w { + t.Errorf("line %d text: got %q want %q", i, lines[i].Text, w) + } + if lines[i].Pos.Line != 3+i { + t.Errorf("line %d: pos.Line = %d want %d", i, lines[i].Pos.Line, 3+i) + } + } +} + +func TestPreprocessBlockComment(t *testing.T) { + src := `package p + +/* + * swagger:model Foo + * maximum: 10 + */ +type Foo int +` + cg, fset := parseSource(t, src) + lines := Preprocess(cg, fset) + // Expect 4 lines: empty first, two content, empty last. + if len(lines) != 4 { + t.Fatalf("want 4 lines, got %d: %+v", len(lines), lines) + } + if lines[1].Text != "swagger:model Foo" { + t.Errorf("line 1: got %q want %q", lines[1].Text, "swagger:model Foo") + } + if lines[2].Text != "maximum: 10" { + t.Errorf("line 2: got %q want %q", lines[2].Text, "maximum: 10") + } + // Positions should increment. + if lines[1].Pos.Line != lines[0].Pos.Line+1 { + t.Errorf("block-comment line positions must increment: %+v", lines) + } +} + +func TestPreprocessStripsMarkdownTablePipe(t *testing.T) { + src := `package p + +// | swagger:model Foo | +type Foo int +` + cg, fset := parseSource(t, src) + lines := Preprocess(cg, fset) + if len(lines) != 1 { + t.Fatalf("want 1 line, got %d", len(lines)) + } + // Leading pipe stripped; content (including trailing pipe) preserved. + if lines[0].Text != "swagger:model Foo |" { + t.Errorf("got %q want %q", lines[0].Text, "swagger:model Foo |") + } +} + +func TestPreprocessPreservesEmbeddedWhitespace(t *testing.T) { + src := `package p + +// indented content +type Foo int +` + cg, fset := parseSource(t, src) + lines := Preprocess(cg, fset) + if len(lines) != 1 { + t.Fatalf("want 1 line, got %d", len(lines)) + } + // trimContentPrefix strips leading whitespace; embedded spaces + // inside Text remain. + if lines[0].Text != "indented content" { + t.Errorf("got %q want %q", lines[0].Text, "indented content") + } +} + +func TestPreprocessMultiCommentGroup(t *testing.T) { + // A comment group with multiple *ast.Comment entries separated by + // only whitespace — Go groups them into a single CommentGroup. + src := `package p + +// first +// second +// third +type Foo int +` + cg, fset := parseSource(t, src) + if len(cg.List) < 2 { + t.Fatalf("expected multi-entry CommentGroup, got %d", len(cg.List)) + } + lines := Preprocess(cg, fset) + if len(lines) != 3 { + t.Fatalf("want 3 lines, got %d", len(lines)) + } + want := []string{"first", "second", "third"} + for i, w := range want { + if lines[i].Text != w { + t.Errorf("line %d: got %q want %q", i, lines[i].Text, w) + } + } +} From 34ea09482af99ea3df3c2e58330c31658d162ed8 Mon Sep 17 00:00:00 2001 From: Frederic BIDON Date: Tue, 21 Apr 2026 14:59:01 +0200 Subject: [PATCH 07/46] refactor(grammar): precise Column/Offset on every preprocessed Line Tighten stripComment so Line.Pos.Line, .Column, and .Offset are all accurate on every emitted line, including continuation lines inside /* ... */ blocks. Previously the column was approximated to 1 on block-comment continuation lines, which would have forced a retrofit when LSP consumed Line.Pos. Factor the per-line math into stripLine(s, pos) which advances pos by the number of bytes trimContentPrefix consumed. Block-comment paths compute each line's starting Offset by tracking the byte index of the current line within the comment body and adding the "/*" marker length. Add column-precision tests: - // line comment: `foo` sits at Column=4 ("//", space, f) - /* block comment with " * prefix": content after " * " at col 4 - indented block with tab continuation: content at col 4 - offset monotonicity across a multi-line // group Minor: extract `wantModelFoo` const in the test file to satisfy goconst on the now-reused fixture string. Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Frederic BIDON --- internal/parsers/grammar/preprocess.go | 67 +++++++++++--- internal/parsers/grammar/preprocess_test.go | 97 +++++++++++++++++++-- 2 files changed, 145 insertions(+), 19 deletions(-) diff --git a/internal/parsers/grammar/preprocess.go b/internal/parsers/grammar/preprocess.go index 66d878e..382b280 100644 --- a/internal/parsers/grammar/preprocess.go +++ b/internal/parsers/grammar/preprocess.go @@ -18,10 +18,10 @@ import ( // indentation handling lives at the lexer layer where fence state is // tracked. // -// Pos is the position of Text's first character in the source file. -// For continuation lines inside a /* … */ block, the column is -// approximated to 1 — exact column reconstruction would require -// re-tokenising the comment body and is deferred until LSP needs it. +// Pos is the position of Text's first character in the source file: +// Line/Column are accurate on every line (including continuation +// lines inside a /* … */ block) and Offset is the byte offset from +// the start of the file. type Line struct { Text string Pos token.Position @@ -47,25 +47,53 @@ func Preprocess(cg *ast.CommentGroup, fset *token.FileSet) []Line { // stripComment returns one Line per physical source line of a single // *ast.Comment. It handles both the `//` line-comment form and the -// `/* … */` block form, including multi-line blocks. +// `/* … */` block form, including multi-line blocks. Each emitted +// Line's Pos points precisely to the first character of Text in the +// source file (Line, Column, and Offset all accurate). func stripComment(raw string, basePos token.Position) []Line { + const markerLen = 2 // "//" and "/*" are both 2 bytes switch { case strings.HasPrefix(raw, "//"): - text := trimContentPrefix(strings.TrimPrefix(raw, "//")) - return []Line{{Text: text, Pos: basePos}} + pos := basePos + pos.Column += markerLen + pos.Offset += markerLen + return []Line{stripLine(raw[markerLen:], pos)} + case strings.HasPrefix(raw, "/*"): - body := strings.TrimSuffix(strings.TrimPrefix(raw, "/*"), "*/") - rawLines := strings.Split(body, "\n") - out := make([]Line, 0, len(rawLines)) - for i, r := range rawLines { + body := strings.TrimSuffix(raw[markerLen:], "*/") + out := []Line{} + lineOffset := 0 // byte index into body where the current line begins + for lineIdx := 0; ; lineIdx++ { + nl := strings.IndexByte(body[lineOffset:], '\n') + + var segment string + if nl < 0 { + segment = body[lineOffset:] + } else { + segment = body[lineOffset : lineOffset+nl] + } + pos := basePos - pos.Line += i - if i > 0 { + if lineIdx == 0 { + // Same source line as basePos; advance past "/*". + pos.Column += markerLen + pos.Offset += markerLen + } else { + // Continuation line: column restarts at 1; offset is + // the file offset of the first character of this line. + pos.Line += lineIdx pos.Column = 1 + pos.Offset += markerLen + lineOffset } - out = append(out, Line{Text: trimContentPrefix(r), Pos: pos}) + out = append(out, stripLine(segment, pos)) + + if nl < 0 { + break + } + lineOffset += nl + 1 } return out + default: // Not a valid Go comment; preserve input defensively so // downstream layers can surface a diagnostic rather than @@ -74,6 +102,17 @@ func stripComment(raw string, basePos token.Position) []Line { } } +// stripLine trims the leading decoration of a single line and advances +// pos by the number of bytes consumed. pos must already point to the +// first character of the (unstripped) line in the source. +func stripLine(s string, pos token.Position) Line { + stripped := trimContentPrefix(s) + consumed := len(s) - len(stripped) + pos.Column += consumed + pos.Offset += consumed + return Line{Text: stripped, Pos: pos} +} + // trimContentPrefix removes the leading godoc-style decoration that // precedes real content on a comment line: // - whitespace (space, tab) diff --git a/internal/parsers/grammar/preprocess_test.go b/internal/parsers/grammar/preprocess_test.go index 01cb82a..1f1832b 100644 --- a/internal/parsers/grammar/preprocess_test.go +++ b/internal/parsers/grammar/preprocess_test.go @@ -10,6 +10,8 @@ import ( "testing" ) +const wantModelFoo = "swagger:model Foo" + // parseSource is a test helper that parses a Go source file and returns // the comment group attached to its first top-level declaration, plus // the FileSet used during parsing. @@ -53,8 +55,8 @@ func TestPreprocessSingleLineComment(t *testing.T) { if len(lines) != 1 { t.Fatalf("want 1 line, got %d: %+v", len(lines), lines) } - if lines[0].Text != "swagger:model Foo" { - t.Errorf("text: got %q want %q", lines[0].Text, "swagger:model Foo") + if lines[0].Text != wantModelFoo { + t.Errorf("text: got %q want %q", lines[0].Text, wantModelFoo) } if lines[0].Pos.Line != 3 { t.Errorf("line: got %d want 3", lines[0].Pos.Line) @@ -71,7 +73,7 @@ type Foo int ` cg, fset := parseSource(t, src) lines := Preprocess(cg, fset) - want := []string{"swagger:model Foo", "maximum: 10", "minimum: 0"} + want := []string{wantModelFoo, "maximum: 10", "minimum: 0"} if len(lines) != len(want) { t.Fatalf("want %d lines, got %d", len(want), len(lines)) } @@ -100,8 +102,8 @@ type Foo int if len(lines) != 4 { t.Fatalf("want 4 lines, got %d: %+v", len(lines), lines) } - if lines[1].Text != "swagger:model Foo" { - t.Errorf("line 1: got %q want %q", lines[1].Text, "swagger:model Foo") + if lines[1].Text != wantModelFoo { + t.Errorf("line 1: got %q want %q", lines[1].Text, wantModelFoo) } if lines[2].Text != "maximum: 10" { t.Errorf("line 2: got %q want %q", lines[2].Text, "maximum: 10") @@ -147,6 +149,91 @@ type Foo int } } +func TestPreprocessColumnPrecisionLineComment(t *testing.T) { + // "// foo" — 'f' sits at column 4 (slash, slash, space, f). + src := "package p\n\n// foo\ntype Foo int\n" + cg, fset := parseSource(t, src) + lines := Preprocess(cg, fset) + if len(lines) != 1 { + t.Fatalf("want 1 line, got %d", len(lines)) + } + if lines[0].Text != "foo" { + t.Fatalf("text: got %q want %q", lines[0].Text, "foo") + } + if lines[0].Pos.Column != 4 { + t.Errorf("Column: got %d want 4", lines[0].Pos.Column) + } + if lines[0].Pos.Line != 3 { + t.Errorf("Line: got %d want 3", lines[0].Pos.Line) + } +} + +func TestPreprocessColumnPrecisionBlockComment(t *testing.T) { + // Block: + // /* + // * swagger:model Foo + // */ + // Continuation line " * swagger:model Foo" — 's' of "swagger" + // sits at column 4 (space, *, space, s). + src := "package p\n\n/*\n * swagger:model Foo\n */\ntype Foo int\n" + cg, fset := parseSource(t, src) + lines := Preprocess(cg, fset) + // 3 lines: empty opening, content, empty-ish closing. + if len(lines) != 3 { + t.Fatalf("want 3 lines, got %d: %+v", len(lines), lines) + } + content := lines[1] + if content.Text != wantModelFoo { + t.Fatalf("text: got %q want %q", content.Text, wantModelFoo) + } + if content.Pos.Line != 4 { + t.Errorf("Line: got %d want 4", content.Pos.Line) + } + if content.Pos.Column != 4 { + t.Errorf("Column: got %d want 4", content.Pos.Column) + } +} + +func TestPreprocessColumnPrecisionIndentedBlockComment(t *testing.T) { + // Block comment not attached to a decl — parse directly via + // the AST so we can exercise stripComment's continuation-line + // offset math without going through a declaration's Doc field. + src := "package p\n\nvar _ = /*\n\t* bar\n*/ 42\n" + fset := token.NewFileSet() + f, err := parser.ParseFile(fset, "t.go", src, parser.ParseComments) + if err != nil { + t.Fatal(err) + } + if len(f.Comments) == 0 { + t.Fatal("no comments") + } + lines := Preprocess(f.Comments[0], fset) + if len(lines) != 3 { + t.Fatalf("want 3 lines, got %d: %+v", len(lines), lines) + } + // Line 1 content is "bar" (continuation line uses a tab + * + space). + if lines[1].Text != "bar" { + t.Fatalf("text: got %q want %q", lines[1].Text, "bar") + } + // Continuation lines always start at Column=1 in source; 'bar' + // follows "\t* " which is 3 bytes, so Column = 4. + if lines[1].Pos.Column != 4 { + t.Errorf("Column: got %d want 4", lines[1].Pos.Column) + } +} + +func TestPreprocessOffsetAdvancesMonotonically(t *testing.T) { + src := "package p\n\n// one\n// two\n// three\ntype Foo int\n" + cg, fset := parseSource(t, src) + lines := Preprocess(cg, fset) + for i := 1; i < len(lines); i++ { + if lines[i].Pos.Offset <= lines[i-1].Pos.Offset { + t.Errorf("offset did not advance: line %d offset %d, line %d offset %d", + i-1, lines[i-1].Pos.Offset, i, lines[i].Pos.Offset) + } + } +} + func TestPreprocessMultiCommentGroup(t *testing.T) { // A comment group with multiple *ast.Comment entries separated by // only whitespace — Go groups them into a single CommentGroup. From b07bf0bac711e3468ec8dae61ea9165a03b3df62 Mon Sep 17 00:00:00 2001 From: Frederic BIDON Date: Tue, 21 Apr 2026 15:21:51 +0200 Subject: [PATCH 08/46] feat(grammar): P1.2 lexer []Line -> tokens MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add TokenKind enum (EOF, BLANK, TEXT, ANNOTATION, KEYWORD_VALUE, KEYWORD_BLOCK_HEAD, YAML_FENCE) and Token struct with per-kind fields (Text, Value, Keyword, Args, ItemsDepth). Lex() emits one token per preprocessed line plus a trailing TokenEOF. Classification rules (per line): - empty or whitespace-only -> BLANK - "---" (trim-equal) -> YAML_FENCE - starts with "swagger:" -> ANNOTATION (Text=name, Args=rest) - "[items.]*: " -> KEYWORD_VALUE (Value populated) - "[items.]*:" -> KEYWORD_BLOCK_HEAD - otherwise -> TEXT Keyword recognition goes through grammar.Lookup, which already handles case-insensitivity and aliases — canonical name is written into Token.Text and the matching *Keyword is attached. stripItemsPrefix mirrors rxItemsPrefixFmt: `(?:[Ii]tems[.\s]*)+`. It does NOT overeat: "maxItems" stays a single keyword (prefix check is anchored at position 0 of the token text, not sub-matched); and bare "items:" stays a non-keyword TEXT since nothing in the table matches "items" alone. Position tracking: Token.Pos is advanced past any stripped items. prefix so it points at the keyword's first character. Godoc-identifier-prefix form ("DoFoo swagger:route ...") is NOT handled in the lexer — deferred to P1.4 where the parser orchestrates annotation discovery and can decide case-by-case. Tests cover each token kind, each items-prefix depth variant, the "maxItems must not overeat" edge, canonical-name resolution from aliases (MAX -> maximum, max-length -> maxLength), malformed "swagger:" falling back to TEXT, and Pos advancement after items. prefix stripping. Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Frederic BIDON --- internal/parsers/grammar/lexer.go | 192 +++++++++++++++++++++++- internal/parsers/grammar/lexer_test.go | 199 +++++++++++++++++++++++++ 2 files changed, 388 insertions(+), 3 deletions(-) create mode 100644 internal/parsers/grammar/lexer_test.go diff --git a/internal/parsers/grammar/lexer.go b/internal/parsers/grammar/lexer.go index ed4c7d3..c6a112d 100644 --- a/internal/parsers/grammar/lexer.go +++ b/internal/parsers/grammar/lexer.go @@ -3,6 +3,192 @@ package grammar -// TODO: P1 — tokenizer: []Line → token stream. -// Token kinds: ANNOTATION, KEYWORD_VALUE, KEYWORD_BLOCK_HEAD, YAML_FENCE, TEXT, BLANK, EOF. -// Keyword lookup via keywords.go (case-insensitive, alias-aware); items.N.X expanded. +import ( + "go/token" + "strings" +) + +// TokenKind classifies a preprocessed line. The lexer assigns exactly +// one Kind per line (plus a trailing TokenEOF). Parser dispatch (P1.4) +// is driven off Kind without needing to re-inspect Text. +type TokenKind int + +const ( + TokenEOF TokenKind = iota // end of stream + TokenBlank // empty line (after trim) + TokenText // freeform content (title, description, block body) + TokenAnnotation // "swagger: [args...]" + TokenKeywordValue // ": " + TokenKeywordBlockHead // ":" (value-less; indicates a block follows) + TokenYAMLFence // "---" delimiter +) + +// String renders a TokenKind for debugging and diagnostics. +func (k TokenKind) String() string { + switch k { + case TokenEOF: + return "EOF" + case TokenBlank: + return "BLANK" + case TokenText: + return "TEXT" + case TokenAnnotation: + return "ANNOTATION" + case TokenKeywordValue: + return "KEYWORD_VALUE" + case TokenKeywordBlockHead: + return "KEYWORD_BLOCK_HEAD" + case TokenYAMLFence: + return "YAML_FENCE" + default: + return "?" + } +} + +// Token is one lexed item. Fields are populated per Kind: +// - TokenAnnotation: Text = annotation name (e.g., "model"), Args = positional args. +// - TokenKeywordValue / TokenKeywordBlockHead: Text = canonical keyword name, +// Keyword = table entry, Value = raw value string (empty for BlockHead), +// ItemsDepth = number of leading "items." prefixes (0 = none). +// - TokenText: Text = original line content. +// - TokenBlank / TokenYAMLFence / TokenEOF: Text is empty. +// +// Pos points to the first source character of the meaningful payload +// (the keyword for KEYWORD_*, "swagger:" for ANNOTATION, the fence for +// YAML_FENCE, the start of text for TEXT). +type Token struct { + Kind TokenKind + Pos token.Position + Text string + Value string + Keyword *Keyword + ItemsDepth int + Args []string +} + +// Lex turns a preprocessed line slice into a token stream terminated +// by TokenEOF. The lexer is context-free (no fence/state tracking); +// the parser decides whether a TokenText sits inside a YAML body. +func Lex(lines []Line) []Token { + out := make([]Token, 0, len(lines)+1) + for _, line := range lines { + out = append(out, lexLine(line)) + } + out = append(out, Token{Kind: TokenEOF}) + return out +} + +// lexLine classifies a single preprocessed line. +func lexLine(line Line) Token { + text := strings.TrimRight(line.Text, " \t") + + if text == "" { + return Token{Kind: TokenBlank, Pos: line.Pos} + } + if strings.TrimSpace(text) == "---" { + return Token{Kind: TokenYAMLFence, Pos: line.Pos} + } + if strings.HasPrefix(text, "swagger:") { + return lexAnnotation(text, line.Pos) + } + if tok, ok := lexKeyword(text, line.Pos); ok { + return tok + } + return Token{Kind: TokenText, Text: text, Pos: line.Pos} +} + +// lexAnnotation parses "swagger: [arg1 arg2 ...]". Malformed +// (empty name) falls back to a TEXT token so the parser can emit a +// diagnostic at the analyzer layer. +func lexAnnotation(text string, pos token.Position) Token { + rest := strings.TrimPrefix(text, "swagger:") + fields := strings.Fields(rest) + if len(fields) == 0 { + return Token{Kind: TokenText, Text: text, Pos: pos} + } + return Token{ + Kind: TokenAnnotation, + Pos: pos, + Text: fields[0], + Args: fields[1:], + } +} + +// lexKeyword tries to parse text as a "[items.]*: [value]" +// form. Returns (token, true) on a successful match, (zero, false) +// otherwise — in which case the line is emitted as TEXT upstream. +func lexKeyword(text string, pos token.Position) (Token, bool) { + rest, depth := stripItemsPrefix(text) + + before, after, found := strings.Cut(rest, ":") + if !found { + return Token{}, false + } + + name := strings.TrimSpace(before) + kw, ok := Lookup(name) + if !ok { + return Token{}, false + } + + // Advance Pos past any items. prefix we stripped so it points to + // the keyword itself. + consumed := len(text) - len(rest) + kwPos := pos + kwPos.Column += consumed + kwPos.Offset += consumed + + value := strings.TrimSpace(after) + + kind := TokenKeywordValue + if value == "" { + kind = TokenKeywordBlockHead + } + + return Token{ + Kind: kind, + Pos: kwPos, + Text: kw.Name, + Value: value, + Keyword: &kw, + ItemsDepth: depth, + }, true +} + +// stripItemsPrefix removes leading `items.`, `items `, or `items\t` +// runs from s, counting how many were stripped. The v1 form is +// captured by rxItemsPrefixFmt = `(?:[Ii]tems[\.\p{Zs}]*){%d}`; +// this is the equivalent hand-rolled recognizer. +// +// Notably it does *not* strip `items` with no following separator +// (so "items:" as a standalone keyword remains intact), and it does +// not match inside longer identifiers ("maxItems" stays as a single +// word because "items" doesn't appear at the *start*). +func stripItemsPrefix(s string) (rest string, depth int) { + for { + stripped, ok := stripOneItemsPrefix(s) + if !ok { + return s, depth + } + s = stripped + depth++ + } +} + +func stripOneItemsPrefix(s string) (string, bool) { + const itemsLen = 5 // len("items") + if len(s) < itemsLen { + return s, false + } + if !strings.EqualFold(s[:itemsLen], "items") { + return s, false + } + rest := s[itemsLen:] + trimmed := strings.TrimLeft(rest, ". \t") + if len(trimmed) == len(rest) { + // No separator — "items" is part of a longer identifier + // (e.g., "itemspan") or the bare keyword "items:". + return s, false + } + return trimmed, true +} diff --git a/internal/parsers/grammar/lexer_test.go b/internal/parsers/grammar/lexer_test.go new file mode 100644 index 0000000..3aed229 --- /dev/null +++ b/internal/parsers/grammar/lexer_test.go @@ -0,0 +1,199 @@ +// SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers +// SPDX-License-Identifier: Apache-2.0 + +package grammar + +import ( + "go/token" + "reflect" + "testing" +) + +// mkLines turns plain strings into position-carrying Lines with dummy +// but non-zero positions so tests can assert Pos if they care. +func mkLines(texts ...string) []Line { + out := make([]Line, len(texts)) + for i, t := range texts { + out[i] = Line{Text: t, Pos: token.Position{Filename: "t.go", Line: i + 1, Column: 1, Offset: i * 50}} + } + return out +} + +func TestLexEmpty(t *testing.T) { + toks := Lex(nil) + if len(toks) != 1 || toks[0].Kind != TokenEOF { + t.Fatalf("Lex(nil): want [EOF], got %+v", toks) + } +} + +func TestLexBlankAndYAMLFence(t *testing.T) { + toks := Lex(mkLines("", " ", "---", " --- ")) + want := []TokenKind{TokenBlank, TokenBlank, TokenYAMLFence, TokenYAMLFence, TokenEOF} + if len(toks) != len(want) { + t.Fatalf("want %d tokens, got %d: %+v", len(want), len(toks), toks) + } + for i, w := range want { + if toks[i].Kind != w { + t.Errorf("tok %d: got %s want %s", i, toks[i].Kind, w) + } + } +} + +func TestLexAnnotationSimple(t *testing.T) { + toks := Lex(mkLines("swagger:model Foo")) + if toks[0].Kind != TokenAnnotation { + t.Fatalf("want ANNOTATION, got %s: %+v", toks[0].Kind, toks[0]) + } + if toks[0].Text != "model" { + t.Errorf("name: got %q want %q", toks[0].Text, "model") + } + if !reflect.DeepEqual(toks[0].Args, []string{"Foo"}) { + t.Errorf("args: got %v want [Foo]", toks[0].Args) + } +} + +func TestLexAnnotationRoute(t *testing.T) { + toks := Lex(mkLines("swagger:route GET /pets tags listPets")) + if toks[0].Kind != TokenAnnotation { + t.Fatalf("want ANNOTATION, got %s", toks[0].Kind) + } + if toks[0].Text != "route" { + t.Errorf("name: got %q want route", toks[0].Text) + } + want := []string{"GET", "/pets", "tags", "listPets"} + if !reflect.DeepEqual(toks[0].Args, want) { + t.Errorf("args: got %v want %v", toks[0].Args, want) + } +} + +func TestLexAnnotationMalformed(t *testing.T) { + // "swagger:" with no name falls back to TEXT. + toks := Lex(mkLines("swagger:")) + if toks[0].Kind != TokenText { + t.Errorf("want TEXT (malformed annotation), got %s", toks[0].Kind) + } +} + +func TestLexKeywordValue(t *testing.T) { + toks := Lex(mkLines("maximum: 10")) + tok := toks[0] + if tok.Kind != TokenKeywordValue { + t.Fatalf("want KEYWORD_VALUE, got %s", tok.Kind) + } + if tok.Text != "maximum" { + t.Errorf("name: got %q want maximum", tok.Text) + } + if tok.Value != "10" { + t.Errorf("value: got %q want %q", tok.Value, "10") + } + if tok.Keyword == nil || tok.Keyword.Value.Type != ValueNumber { + t.Errorf("expected Keyword resolved to ValueNumber, got %+v", tok.Keyword) + } +} + +func TestLexKeywordCanonicalizesAlias(t *testing.T) { + toks := Lex(mkLines("MAX: 10", "max-length: 5")) + if toks[0].Text != "maximum" { + t.Errorf("MAX → canonical: got %q want maximum", toks[0].Text) + } + if toks[1].Text != "maxLength" { + t.Errorf("max-length → canonical: got %q want maxLength", toks[1].Text) + } +} + +func TestLexKeywordBlockHead(t *testing.T) { + toks := Lex(mkLines("consumes:")) + tok := toks[0] + if tok.Kind != TokenKeywordBlockHead { + t.Fatalf("want KEYWORD_BLOCK_HEAD, got %s", tok.Kind) + } + if tok.Text != "consumes" || tok.Value != "" { + t.Errorf("unexpected token: %+v", tok) + } +} + +func TestLexItemsPrefix(t *testing.T) { + cases := []struct { + in string + wantName string + wantDepth int + }{ + {"maximum: 5", "maximum", 0}, + {"items.maximum: 5", "maximum", 1}, + {"items.items.maximum: 5", "maximum", 2}, + {"Items.Items.maximum: 5", "maximum", 2}, + {"items items maximum: 5", "maximum", 2}, //nolint:dupword // space-separated items prefix is valid per rxItemsPrefixFmt + {"items.items.items.minLength: 1", "minLength", 3}, + } + for _, tc := range cases { + t.Run(tc.in, func(t *testing.T) { + toks := Lex(mkLines(tc.in)) + tok := toks[0] + if tok.Kind != TokenKeywordValue { + t.Fatalf("want KEYWORD_VALUE, got %s", tok.Kind) + } + if tok.Text != tc.wantName { + t.Errorf("name: got %q want %q", tok.Text, tc.wantName) + } + if tok.ItemsDepth != tc.wantDepth { + t.Errorf("depth: got %d want %d", tok.ItemsDepth, tc.wantDepth) + } + }) + } +} + +func TestLexItemsDoesNotOvereat(t *testing.T) { + // "maxItems" must stay a single word — the "items" suffix inside + // it is not a prefix marker. + toks := Lex(mkLines("maxItems: 3")) + if toks[0].Kind != TokenKeywordValue || toks[0].Text != "maxItems" { + t.Errorf("maxItems must remain a single keyword, got %+v", toks[0]) + } + if toks[0].ItemsDepth != 0 { + t.Errorf("maxItems must have depth 0, got %d", toks[0].ItemsDepth) + } +} + +func TestLexUnknownKeywordIsText(t *testing.T) { + toks := Lex(mkLines("not-a-keyword: hello")) + if toks[0].Kind != TokenText { + t.Errorf("want TEXT, got %s", toks[0].Kind) + } +} + +func TestLexPlainText(t *testing.T) { + toks := Lex(mkLines("This is a description line.")) + if toks[0].Kind != TokenText { + t.Errorf("want TEXT, got %s", toks[0].Kind) + } + if toks[0].Text != "This is a description line." { + t.Errorf("text: got %q", toks[0].Text) + } +} + +func TestLexPreservesPositions(t *testing.T) { + toks := Lex(mkLines("swagger:model Foo", "maximum: 5")) + if toks[0].Pos.Line != 1 { + t.Errorf("line 1 token: got Pos.Line %d want 1", toks[0].Pos.Line) + } + if toks[1].Pos.Line != 2 { + t.Errorf("line 2 token: got Pos.Line %d want 2", toks[1].Pos.Line) + } +} + +func TestLexItemsPrefixAdvancesPos(t *testing.T) { + toks := Lex(mkLines("items.maximum: 5")) + tok := toks[0] + // "items." is 6 bytes; Pos must advance past it to point at 'm'. + // Column started at 1 → expect 7. + if tok.Pos.Column != 7 { + t.Errorf("items.maximum Pos.Column: got %d want 7", tok.Pos.Column) + } +} + +func TestLexEOFIsAlwaysLast(t *testing.T) { + toks := Lex(mkLines("swagger:model Foo", "", "maximum: 5")) + if toks[len(toks)-1].Kind != TokenEOF { + t.Errorf("last token must be EOF, got %s", toks[len(toks)-1].Kind) + } +} From 59960c8c6607826c93651eeb0d18209780f1d639 Mon Sep 17 00:00:00 2001 From: Frederic BIDON Date: Tue, 21 Apr 2026 15:25:41 +0200 Subject: [PATCH 09/46] feat(grammar): P1.3 typed Block family + iter.Seq helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add Block interface and the seven typed kinds the parser will dispatch to (architecture §4.6): - ModelBlock (swagger:model) - RouteBlock (swagger:route) - OperationBlock (swagger:operation) - ParametersBlock (swagger:parameters) - ResponseBlock (swagger:response) - MetaBlock (swagger:meta) - UnboundBlock (no annotation — struct field docstrings) Block interface: Pos, Title, Description, Diagnostics, AnnotationKind, plus iter.Seq-based iterators Properties/YAMLBlocks/Extensions. Using iter.Seq (Go 1.23+, module targets 1.25) per §4.2: iterator form, not Accept/Visit callbacks. Support types: - Property {Keyword, Pos, Value, Typed, ItemsDepth} - TypedValue {Type, Number, Integer, Boolean, String} - RawYAML {Pos, Text} (captured --- body; not parsed here) - Extension {Name, Pos, Value} baseBlock (unexported, pointer-embedded) holds the shared state; typed blocks embed it and add kind-specific positional fields. Exported methods come through the embedding — external callers see the interface surface and the kind-specific fields, nothing else. AnnotationKind enum with String() and AnnotationKindFromName(name). Labels factored into const block (labelRoute, labelModel, …) so the same literal appears once — parser (P1.4) and analyzer will use the same names for diagnostics. Tests cover interface satisfaction (compile-time assertions), label round-trip, full baseBlock accessor surface via a ModelBlock, and iterator early-break semantics. Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Frederic BIDON --- internal/parsers/grammar/ast.go | 345 ++++++++++++++++++++++++++- internal/parsers/grammar/ast_test.go | 149 ++++++++++++ 2 files changed, 489 insertions(+), 5 deletions(-) create mode 100644 internal/parsers/grammar/ast_test.go diff --git a/internal/parsers/grammar/ast.go b/internal/parsers/grammar/ast.go index 544c292..1ae6a80 100644 --- a/internal/parsers/grammar/ast.go +++ b/internal/parsers/grammar/ast.go @@ -3,8 +3,343 @@ package grammar -// TODO: P1 — Block family. -// Interface: Pos, Title, Description, Diagnostics. -// Typed kinds: ModelBlock, RouteBlock, OperationBlock, ParametersBlock, -// ResponseBlock, MetaBlock, UnboundBlock. -// Iterators: Properties(), YAMLBlocks(), Extensions() as iter.Seq. +import ( + "go/token" + "iter" +) + +// Block is the interface implemented by every typed AST node the +// parser emits. One Block corresponds to one comment group's parsed +// content. Typed kinds (ModelBlock, RouteBlock, …) embed *baseBlock +// and add the fields specific to their annotation. +// +// See architecture §4.6. +type Block interface { + // Pos reports the position of the block's defining token — the + // annotation line for annotated blocks, or the first comment line + // for UnboundBlock. + Pos() token.Position + // Title returns the short one-liner extracted from the comment + // group (the first non-annotation paragraph, first line). + Title() string + // Description returns everything between the title paragraph and + // the first keyword/block-header, joined by newlines. + Description() string + // Diagnostics returns the non-fatal observations accumulated + // while parsing this block (unknown keywords, context-invalid + // keywords, malformed values, …). + Diagnostics() []Diagnostic + + // Properties iterates the keyword:value pairs attached to this + // block (flat list in source order). + Properties() iter.Seq[Property] + // YAMLBlocks iterates the --- fenced YAML bodies captured inside + // this block (swagger:operation, swagger:meta bodies). The parser + // does NOT parse YAML; it only isolates the bodies. + YAMLBlocks() iter.Seq[RawYAML] + // Extensions iterates the x-* vendor extensions declared under an + // "extensions:" block inside this Block. + Extensions() iter.Seq[Extension] + + // Kind returns the top-level annotation kind this Block was + // dispatched from (UnboundBlock returns AnnUnknown). Used by + // analyzers to type-switch-check without reflection. + AnnotationKind() AnnotationKind +} + +// AnnotationKind identifies the top-level "swagger:xxx" directive +// that produced this Block. This is distinct from Kind in +// keywords.go (which names the *sub-context* where a keyword may +// appear). AnnotationKind is used for Block dispatch; Kind for +// keyword legality. +type AnnotationKind int + +const ( + AnnUnknown AnnotationKind = iota + + AnnRoute // swagger:route + AnnOperation // swagger:operation + AnnParameters // swagger:parameters + AnnResponse // swagger:response + AnnModel // swagger:model + AnnMeta // swagger:meta + AnnStrfmt // swagger:strfmt + AnnAlias // swagger:alias + AnnName // swagger:name + AnnAllOf // swagger:allOf + AnnEnumDecl // swagger:enum + AnnIgnore // swagger:ignore + AnnDefaultName // swagger:default (name override, not the keyword) + AnnType // swagger:type + AnnFile // swagger:file +) + +// Annotation label strings, one per AnnotationKind. Shared by +// AnnotationKind.String() and AnnotationKindFromName() so there is +// exactly one source of truth per label. +const ( + labelRoute = "route" + labelOperation = "operation" + labelParameters = "parameters" + labelResponse = "response" + labelModel = "model" + labelMeta = "meta" + labelStrfmt = "strfmt" + labelAlias = "alias" + labelName = "name" + labelAllOf = "allOf" + labelEnum = "enum" + labelIgnore = "ignore" + labelDefault = "default" + labelType = "type" + labelFile = "file" + labelUnknown = "unknown" +) + +// String renders an AnnotationKind as its source label. +func (a AnnotationKind) String() string { + switch a { + case AnnRoute: + return labelRoute + case AnnOperation: + return labelOperation + case AnnParameters: + return labelParameters + case AnnResponse: + return labelResponse + case AnnModel: + return labelModel + case AnnMeta: + return labelMeta + case AnnStrfmt: + return labelStrfmt + case AnnAlias: + return labelAlias + case AnnName: + return labelName + case AnnAllOf: + return labelAllOf + case AnnEnumDecl: + return labelEnum + case AnnIgnore: + return labelIgnore + case AnnDefaultName: + return labelDefault + case AnnType: + return labelType + case AnnFile: + return labelFile + case AnnUnknown: + fallthrough + default: + return labelUnknown + } +} + +// AnnotationKindFromName resolves the `swagger:` label (e.g., +// "model", "route") to the matching AnnotationKind. Returns +// AnnUnknown for names the parser does not recognize at v1 parity. +func AnnotationKindFromName(name string) AnnotationKind { + switch name { + case labelRoute: + return AnnRoute + case labelOperation: + return AnnOperation + case labelParameters: + return AnnParameters + case labelResponse: + return AnnResponse + case labelModel: + return AnnModel + case labelMeta: + return AnnMeta + case labelStrfmt: + return AnnStrfmt + case labelAlias: + return AnnAlias + case labelName: + return AnnName + case labelAllOf: + return AnnAllOf + case labelEnum: + return AnnEnumDecl + case labelIgnore: + return AnnIgnore + case labelDefault: + return AnnDefaultName + case labelType: + return AnnType + case labelFile: + return AnnFile + default: + return AnnUnknown + } +} + +// Property is one keyword:value pair inside a Block's body. Value +// is the raw string as it appeared in the comment; Typed carries the +// primitive-converted form when the keyword's ValueType is one of the +// primitives the parser converts at parse time (Number, Integer, +// Boolean, StringEnum). Raw-typed keywords (RawValue, RawBlock, +// String, CommaList) leave Typed at its zero value and the analyzer +// interprets Value. +type Property struct { + Keyword Keyword + Pos token.Position + Value string + Typed TypedValue + ItemsDepth int // 0 = no "items." nesting +} + +// TypedValue carries the primitive-converted form of a keyword's +// value when the keyword's ValueType is Number/Integer/Boolean/ +// StringEnum. For other ValueTypes the fields are zero. +type TypedValue struct { + Type ValueType + Number float64 + Integer int64 + Boolean bool + String string // for StringEnum: the matched enum value (canonical) +} + +// RawYAML is one captured YAML body (between --- fences). The parser +// does not parse the content; it records the bytes and the position +// so the analyzer can hand it to internal/parsers/yaml/. +type RawYAML struct { + Pos token.Position + Text string +} + +// Extension is one x-* vendor extension entry under an +// "extensions:" block. Value is the raw line content; analyzers parse +// it further if needed (e.g., inline YAML via internal/parsers/yaml/). +type Extension struct { + Name string + Pos token.Position + Value string +} + +// baseBlock carries the fields common to every Block kind. Typed +// blocks embed *baseBlock and add kind-specific positional data. +type baseBlock struct { + pos token.Position + title string + description string + kind AnnotationKind + + properties []Property + yamlBlocks []RawYAML + extensions []Extension + diagnostics []Diagnostic +} + +func (b *baseBlock) Pos() token.Position { return b.pos } +func (b *baseBlock) Title() string { return b.title } +func (b *baseBlock) Description() string { return b.description } +func (b *baseBlock) Diagnostics() []Diagnostic { return b.diagnostics } +func (b *baseBlock) AnnotationKind() AnnotationKind { return b.kind } + +func (b *baseBlock) Properties() iter.Seq[Property] { + return func(yield func(Property) bool) { + for _, p := range b.properties { + if !yield(p) { + return + } + } + } +} + +func (b *baseBlock) YAMLBlocks() iter.Seq[RawYAML] { + return func(yield func(RawYAML) bool) { + for _, y := range b.yamlBlocks { + if !yield(y) { + return + } + } + } +} + +func (b *baseBlock) Extensions() iter.Seq[Extension] { + return func(yield func(Extension) bool) { + for _, e := range b.extensions { + if !yield(e) { + return + } + } + } +} + +// --- typed Block kinds --- + +// ModelBlock is produced by `swagger:model [Name]`. Name is the +// optional override declared on the annotation line (empty when the +// declaration uses the Go type's own name). +type ModelBlock struct { + *baseBlock + + Name string +} + +// RouteBlock is produced by `swagger:route METHOD /path [tags] opID`. +// All four positional fields are captured by the parser (P1.6). +type RouteBlock struct { + *baseBlock + + Method string + Path string + Tags string // free-text tags segment (optional) + OpID string +} + +// OperationBlock is produced by `swagger:operation METHOD /path [tags] opID`. +// Positional fields match RouteBlock. The YAML body (if any) is in +// baseBlock.yamlBlocks, reachable via YAMLBlocks(). +type OperationBlock struct { + *baseBlock + + Method string + Path string + Tags string + OpID string +} + +// ParametersBlock is produced by `swagger:parameters T1 T2 ...`. +// Each positional arg names a Go type whose exported fields become +// the parameter set. +type ParametersBlock struct { + *baseBlock + + TargetTypes []string +} + +// ResponseBlock is produced by `swagger:response [Name]`. Name is the +// optional override for how the response is exposed in the spec. +type ResponseBlock struct { + *baseBlock + + Name string +} + +// MetaBlock is produced by `swagger:meta`. The info-block keywords +// (version, host, basePath, license, contact, …) appear as +// Properties; YAML bodies in YAMLBlocks. +type MetaBlock struct { + *baseBlock +} + +// UnboundBlock represents a comment group with no annotation line — +// e.g., a struct field's docstring carrying validations. AnnotationKind +// returns AnnUnknown; analyzers interpret Properties based on the +// enclosing Go declaration (scanner context). +type UnboundBlock struct { + *baseBlock +} + +// --- constructors for the parser (P1.4) --- + +// newBaseBlock initializes a baseBlock for the given annotation kind +// at the given source position. Returns a pointer so typed Block +// kinds can embed it and share the state. +func newBaseBlock(kind AnnotationKind, pos token.Position) *baseBlock { + return &baseBlock{pos: pos, kind: kind} +} diff --git a/internal/parsers/grammar/ast_test.go b/internal/parsers/grammar/ast_test.go new file mode 100644 index 0000000..e000144 --- /dev/null +++ b/internal/parsers/grammar/ast_test.go @@ -0,0 +1,149 @@ +// SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers +// SPDX-License-Identifier: Apache-2.0 + +package grammar + +import ( + "go/token" + "slices" + "testing" +) + +func TestAnnotationKindString(t *testing.T) { + cases := []struct { + in AnnotationKind + want string + }{ + {AnnRoute, "route"}, + {AnnOperation, "operation"}, + {AnnModel, "model"}, + {AnnMeta, "meta"}, + {AnnEnumDecl, "enum"}, + {AnnUnknown, "unknown"}, + {AnnotationKind(99), "unknown"}, + } + for _, tc := range cases { + if got := tc.in.String(); got != tc.want { + t.Errorf("%d.String() = %q want %q", int(tc.in), got, tc.want) + } + } +} + +func TestAnnotationKindFromName(t *testing.T) { + cases := []struct { + in string + want AnnotationKind + }{ + {"route", AnnRoute}, + {"operation", AnnOperation}, + {"model", AnnModel}, + {"meta", AnnMeta}, + {"allOf", AnnAllOf}, + {"enum", AnnEnumDecl}, + {"default", AnnDefaultName}, + {"bogus", AnnUnknown}, + {"", AnnUnknown}, + } + for _, tc := range cases { + if got := AnnotationKindFromName(tc.in); got != tc.want { + t.Errorf("FromName(%q) = %v want %v", tc.in, got, tc.want) + } + } +} + +func TestTypedBlocksImplementInterface(_ *testing.T) { + // Compile-time: if these assertions fail, ast.go is broken. + var _ Block = (*ModelBlock)(nil) + var _ Block = (*RouteBlock)(nil) + var _ Block = (*OperationBlock)(nil) + var _ Block = (*ParametersBlock)(nil) + var _ Block = (*ResponseBlock)(nil) + var _ Block = (*MetaBlock)(nil) + var _ Block = (*UnboundBlock)(nil) +} + +func TestBaseBlockAccessors(t *testing.T) { + pos := token.Position{Filename: "t.go", Line: 3, Column: 1} + b := newBaseBlock(AnnModel, pos) + b.title = "A title." + b.description = "A paragraph describing the model." + b.properties = []Property{ + {Keyword: Keyword{Name: "maximum"}, Value: "10"}, + {Keyword: Keyword{Name: "minimum"}, Value: "0"}, + } + b.yamlBlocks = []RawYAML{ + {Text: "foo: bar\n"}, + } + b.extensions = []Extension{ + {Name: "x-custom", Value: "42"}, + } + b.diagnostics = []Diagnostic{ + {Severity: SeverityWarning, Code: CodeUnknownKeyword, Message: "ignored"}, + } + + mb := &ModelBlock{baseBlock: b, Name: "Foo"} + + if mb.Name != "Foo" { + t.Errorf("Name: got %q want Foo", mb.Name) + } + if mb.Pos().Line != 3 { + t.Errorf("Pos.Line: got %d want 3", mb.Pos().Line) + } + if mb.Title() != "A title." { + t.Errorf("Title: got %q", mb.Title()) + } + if mb.AnnotationKind() != AnnModel { + t.Errorf("Kind: got %v want AnnModel", mb.AnnotationKind()) + } + if len(mb.Diagnostics()) != 1 { + t.Errorf("Diagnostics: got %d want 1", len(mb.Diagnostics())) + } + + var props []Property + for p := range mb.Properties() { + props = append(props, p) + } + if len(props) != 2 { + t.Errorf("iterated %d properties want 2", len(props)) + } + if !slices.Equal([]string{props[0].Keyword.Name, props[1].Keyword.Name}, []string{"maximum", "minimum"}) { + t.Errorf("iteration order: got %q,%q", props[0].Keyword.Name, props[1].Keyword.Name) + } + + yamlCount := 0 + for range mb.YAMLBlocks() { + yamlCount++ + } + if yamlCount != 1 { + t.Errorf("YAML blocks: got %d want 1", yamlCount) + } + + extCount := 0 + for range mb.Extensions() { + extCount++ + } + if extCount != 1 { + t.Errorf("extensions: got %d want 1", extCount) + } +} + +func TestIteratorEarlyBreak(t *testing.T) { + b := newBaseBlock(AnnModel, token.Position{}) + b.properties = []Property{ + {Keyword: Keyword{Name: "maximum"}}, + {Keyword: Keyword{Name: "minimum"}}, + {Keyword: Keyword{Name: "pattern"}}, + } + mb := &ModelBlock{baseBlock: b} + + seen := 0 + for range mb.Properties() { + seen++ + if seen == 2 { + break + } + } + if seen != 2 { + t.Errorf("early-break: iterated %d want 2", seen) + } +} From 4ac06fdcd904709e408be69385fc9081d9addbe9 Mon Sep 17 00:00:00 2001 From: Frederic BIDON Date: Tue, 21 Apr 2026 15:40:20 +0200 Subject: [PATCH 10/46] feat(grammar): P1.4 recursive-descent parser + YAML fence preservation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wire the preprocess → lex → parse pipeline: Parse(cg, fset) returns a typed Block from a Go ast.CommentGroup. ParseTokens(tokens) is the same without the preprocessor, for LSP scenarios where token streams are synthesized. Algorithm: 1. Scan tokens for the first ANNOTATION (or none). 2. Build typed Block via buildTypedBlock dispatch: swagger:model -> ModelBlock{Name} swagger:response -> ResponseBlock{Name} swagger:parameters -> ParametersBlock{TargetTypes} swagger:meta -> MetaBlock swagger:route -> RouteBlock{Method,Path,Tags,OpID} swagger:operation -> OperationBlock{Method,Path,Tags,OpID} anything else -> UnboundBlock carrying the kind 3. parseTitleDesc on tokens before the annotation (first paragraph is title, rest joined as description). 4. parseBody on tokens after the annotation: KEYWORD_VALUE and KEYWORD_BLOCK_HEAD -> Property; YAML_FENCE pairs -> RawYAML body captured via reconstructLine() best-effort. 5. Never panic: unknown tokens are skipped; unmatched YAML fence emits CodeUnterminatedYAML diagnostic but still captures body. Positional args for route/operation (P1.6 scope) are extracted here already since the annotation token already carries them. Malformed (<3 args) emits CodeInvalidAnnotation. Bug fix in the preprocessor surfaced by the YAML fence tests: the previous trimContentPrefix stripped leading `-`, which also ate the `---` fence marker. `-` removed from the strip set. Bullet-list dashes in description now survive to Text (arguably more correct than v1's silent strip — flagged in the godoc). Also renamed the internal `parser` type to `parseState` to avoid a name clash with the go/parser package the tests import. Tests (parser_test.go) cover: - ModelBlock / RouteBlock / ParametersBlock / UnboundBlock dispatch - route malformed -> CodeInvalidAnnotation diagnostic - nil CommentGroup -> empty UnboundBlock - title + description extraction (godoc-style ordering) - properties in order + item-depth preservation - block-head property (consumes:) - balanced YAML fence -> body captured - unterminated YAML fence -> diagnostic + body captured to EOF - "don't panic on anything weird" sweep Known gap (P2.1): YAML bodies are reconstructed from already- classified tokens, so indentation and exact punctuation are lost. Good enough for kind/content assertions; not yet suitable for YAML re-parsing. P2.1 will add fence-state tracking so raw bytes survive. Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Frederic BIDON --- internal/parsers/grammar/parser.go | 319 ++++++++++++++++++++- internal/parsers/grammar/parser_test.go | 366 ++++++++++++++++++++++++ internal/parsers/grammar/preprocess.go | 11 +- 3 files changed, 690 insertions(+), 6 deletions(-) create mode 100644 internal/parsers/grammar/parser_test.go diff --git a/internal/parsers/grammar/parser.go b/internal/parsers/grammar/parser.go index f02a2b0..2c5532f 100644 --- a/internal/parsers/grammar/parser.go +++ b/internal/parsers/grammar/parser.go @@ -11,4 +11,321 @@ // .claude/plans/grammar-parser-tasks.md for the "how". package grammar -// TODO: P1 — recursive-descent envelope parser + Parser interface. +import ( + "go/ast" + "go/token" + "strings" +) + +// Parse runs the full preprocess → lex → parse pipeline on a comment +// group and returns the typed Block that describes it. Never panics; +// diagnostics accumulate on the returned Block. +// +// A nil CommentGroup produces an empty UnboundBlock — useful for code +// paths that call Parse unconditionally. +// +//nolint:ireturn // Block is a polymorphic family (ModelBlock, RouteBlock, …); concrete type depends on the annotation. +func Parse(cg *ast.CommentGroup, fset *token.FileSet) Block { + lines := Preprocess(cg, fset) + tokens := Lex(lines) + return ParseTokens(tokens) +} + +// ParseTokens runs parser-only on a pre-lexed token stream. Useful +// for tests and for LSP scenarios where the token stream is +// synthesized (e.g., insertion-point completion). +// +//nolint:ireturn // see Parse godoc +func ParseTokens(tokens []Token) Block { + p := &parseState{tokens: tokens} + return p.parse() +} + +type parseState struct { + tokens []Token + diag []Diagnostic +} + +//nolint:ireturn // see Parse godoc +func (p *parseState) parse() Block { + annIdx := findAnnotation(p.tokens) + + var ( + kind AnnotationKind + typed Block + base *baseBlock + pre []Token + post []Token + ) + + if annIdx >= 0 { + annTok := p.tokens[annIdx] + kind = AnnotationKindFromName(annTok.Text) + base = newBaseBlock(kind, annTok.Pos) + typed = p.buildTypedBlock(kind, annTok, base) + pre = p.tokens[:annIdx] + post = p.tokens[annIdx+1:] + } else { + base = newBaseBlock(AnnUnknown, firstMeaningfulPos(p.tokens)) + typed = &UnboundBlock{baseBlock: base} + post = p.tokens + } + + p.parseTitleDesc(base, pre) + p.parseBody(base, post) + + base.diagnostics = append(base.diagnostics, p.diag...) + return typed +} + +// findAnnotation returns the index of the first TokenAnnotation in +// tokens, or -1 if none. Annotations normally occupy the top of a +// comment group, but godoc-style placement (e.g., annotation after a +// description paragraph) is accepted and triggers the same dispatch. +func findAnnotation(tokens []Token) int { + for i, t := range tokens { + if t.Kind == TokenAnnotation { + return i + } + } + return -1 +} + +// firstMeaningfulPos returns the Pos of the first non-blank, non-EOF +// token — i.e., the reasonable "position" of a comment group that has +// no annotation. +func firstMeaningfulPos(tokens []Token) token.Position { + for _, t := range tokens { + if t.Kind != TokenBlank && t.Kind != TokenEOF { + return t.Pos + } + } + return token.Position{} +} + +// buildTypedBlock constructs the typed Block that corresponds to the +// recognized annotation kind, populating kind-specific positional +// fields from the annotation's Args. +// +// Unrecognized or v1-parity-simple annotations (strfmt, alias, name, +// allOf, enum, ignore, default, type, file) return an UnboundBlock +// carrying the AnnotationKind — analyzers type-switch on the kind to +// decide further handling. +// +//nolint:ireturn // see Parse godoc +func (p *parseState) buildTypedBlock(kind AnnotationKind, tok Token, base *baseBlock) Block { + switch kind { + case AnnModel: + return &ModelBlock{baseBlock: base, Name: firstArg(tok.Args)} + + case AnnResponse: + return &ResponseBlock{baseBlock: base, Name: firstArg(tok.Args)} + + case AnnParameters: + return &ParametersBlock{ + baseBlock: base, + TargetTypes: append([]string(nil), tok.Args...), + } + + case AnnMeta: + return &MetaBlock{baseBlock: base} + + case AnnRoute: + rb := &RouteBlock{baseBlock: base} + p.fillOperationArgs(&rb.Method, &rb.Path, &rb.Tags, &rb.OpID, tok) + return rb + + case AnnOperation: + ob := &OperationBlock{baseBlock: base} + p.fillOperationArgs(&ob.Method, &ob.Path, &ob.Tags, &ob.OpID, tok) + return ob + + case AnnUnknown, + AnnStrfmt, AnnAlias, AnnName, AnnAllOf, AnnEnumDecl, + AnnIgnore, AnnDefaultName, AnnType, AnnFile: + return &UnboundBlock{baseBlock: base} + + default: + return &UnboundBlock{baseBlock: base} + } +} + +// fillOperationArgs extracts METHOD, /path, optional tags (free-text +// segment), and opID from the positional args of swagger:route / +// swagger:operation. Matches the v1 regex-based extraction. +func (p *parseState) fillOperationArgs(method, path, tags, opID *string, tok Token) { + args := tok.Args + switch { + case len(args) < minOpArgs: + p.diag = append(p.diag, Errorf(tok.Pos, CodeInvalidAnnotation, + "swagger:%s requires method, path, and operation id (got %d args)", + tok.Text, len(args))) + case len(args) == minOpArgs: + *method, *path, *opID = args[0], args[1], args[2] + default: + *method, *path = args[0], args[1] + *tags = strings.Join(args[2:len(args)-1], " ") + *opID = args[len(args)-1] + } +} + +const minOpArgs = 3 // method + path + opID + +func firstArg(args []string) string { + if len(args) == 0 { + return "" + } + return args[0] +} + +// parseTitleDesc extracts the title (first paragraph) and description +// (remaining paragraphs, joined by blank lines) from the tokens that +// appear before the annotation. +// +// Keyword/YAML/block-head tokens appearing pre-annotation are unusual +// but not fatal — they are ignored with no diagnostic for v1 parity. +func (p *parseState) parseTitleDesc(base *baseBlock, pre []Token) { + var paragraphs []string + var current []string + + flush := func() { + if len(current) > 0 { + paragraphs = append(paragraphs, strings.Join(current, " ")) + current = current[:0] + } + } + + for _, t := range pre { + switch t.Kind { + case TokenBlank: + flush() + case TokenText: + current = append(current, t.Text) + case TokenEOF, + TokenAnnotation, + TokenKeywordValue, TokenKeywordBlockHead, + TokenYAMLFence: + // Ignored in the title/description slice. + default: + // Unreachable at v1; future kinds ignored defensively. + } + } + flush() + + if len(paragraphs) > 0 { + base.title = paragraphs[0] + } + if len(paragraphs) > 1 { + base.description = strings.Join(paragraphs[1:], "\n\n") + } +} + +// parseBody handles post-annotation tokens: properties +// (KEYWORD_VALUE / KEYWORD_BLOCK_HEAD), YAML fenced bodies, and the +// terminal TokenEOF. Error recovery: skip unknown tokens; never abort. +// +// Scope for P1.4: +// - KEYWORD_VALUE / KEYWORD_BLOCK_HEAD → Property +// - YAML_FENCE → capture body between matching fences into RawYAML +// - TEXT / BLANK → dropped silently (multi-line block-body collection +// for consumes/produces/security/etc. is P2.3 scope; preserving +// indentation inside YAML fences is also P2.1 scope) +// - Stray ANNOTATION → non-fatal diagnostic (one annotation per block) +func (p *parseState) parseBody(base *baseBlock, post []Token) { + i := 0 + for i < len(post) { + t := post[i] + switch t.Kind { + case TokenEOF, TokenBlank, TokenText: + i++ + + case TokenKeywordValue: + base.properties = append(base.properties, Property{ + Keyword: *t.Keyword, + Pos: t.Pos, + Value: t.Value, + ItemsDepth: t.ItemsDepth, + }) + i++ + + case TokenKeywordBlockHead: + base.properties = append(base.properties, Property{ + Keyword: *t.Keyword, + Pos: t.Pos, + ItemsDepth: t.ItemsDepth, + }) + i++ + + case TokenYAMLFence: + i = p.collectYAMLBody(base, post, i) + + case TokenAnnotation: + p.diag = append(p.diag, Warnf(t.Pos, CodeInvalidAnnotation, + "additional swagger:%s annotation ignored (one per comment block)", + t.Text)) + i++ + + default: + i++ + } + } +} + +// collectYAMLBody captures everything between a YAML_FENCE opener at +// index i and its matching closer (or EOF). Emits an UnterminatedYAML +// diagnostic if no closer is found. Returns the index past the closer. +// +// NOTE: line reconstruction is best-effort at P1.4 — tokens have been +// classified (so `responses:` became a KEYWORD_BLOCK_HEAD), and +// indentation is lost because the preprocessor already trimmed it. +// P2.1 will add fence-state tracking so raw YAML bytes survive +// verbatim. Until then, YAML bodies captured here are suitable for +// Kind/content detection but not for full YAML re-parsing. +func (p *parseState) collectYAMLBody(base *baseBlock, post []Token, i int) int { + openerPos := post[i].Pos + i++ + + var body []string + for i < len(post) && post[i].Kind != TokenYAMLFence && post[i].Kind != TokenEOF { + body = append(body, reconstructLine(post[i])) + i++ + } + + if i < len(post) && post[i].Kind == TokenYAMLFence { + i++ // consume closer + } else { + p.diag = append(p.diag, Errorf(openerPos, CodeUnterminatedYAML, + "YAML body opened with --- but never closed")) + } + + base.yamlBlocks = append(base.yamlBlocks, RawYAML{ + Pos: openerPos, + Text: strings.Join(body, "\n"), + }) + return i +} + +// reconstructLine returns a best-effort text rendering of a Token as +// it appeared on the source line. Used inside YAML fence capture where +// the classifier has already split `keyword:` into KEYWORD_BLOCK_HEAD. +// Indentation is NOT preserved (preprocessor already stripped it); +// P2.1 will fix this. +func reconstructLine(t Token) string { + switch t.Kind { + case TokenBlank: + return "" + case TokenKeywordValue: + return t.Text + ": " + t.Value + case TokenKeywordBlockHead: + return t.Text + ":" + case TokenAnnotation: + if len(t.Args) > 0 { + return "swagger:" + t.Text + " " + strings.Join(t.Args, " ") + } + return "swagger:" + t.Text + case TokenEOF, TokenYAMLFence, TokenText: + return t.Text + default: + return t.Text + } +} diff --git a/internal/parsers/grammar/parser_test.go b/internal/parsers/grammar/parser_test.go new file mode 100644 index 0000000..b142a0d --- /dev/null +++ b/internal/parsers/grammar/parser_test.go @@ -0,0 +1,366 @@ +// SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers +// SPDX-License-Identifier: Apache-2.0 + +package grammar + +import ( + "go/ast" + "go/parser" + "go/token" + "net/http" + "slices" + "testing" +) + +// parseCommentGroup is a test helper: parse a Go snippet with +// comment-preserving mode and return the doc comment of its first +// declaration along with the FileSet. +func parseCommentGroup(t *testing.T, src string) (*ast.CommentGroup, *token.FileSet) { + t.Helper() + fset := token.NewFileSet() + f, err := parser.ParseFile(fset, "t.go", src, parser.ParseComments) + if err != nil { + t.Fatalf("parse: %v", err) + } + if len(f.Decls) == 0 { + t.Fatal("no decls in test source") + } + switch d := f.Decls[0].(type) { + case *ast.GenDecl: + return d.Doc, fset + case *ast.FuncDecl: + return d.Doc, fset + } + t.Fatal("decl has no doc comment") + return nil, nil +} + +// --- dispatch --- + +func TestParseModelBlock(t *testing.T) { + src := `package p + +// swagger:model Foo +type Foo struct{} +` + cg, fset := parseCommentGroup(t, src) + b := Parse(cg, fset) + + if b.AnnotationKind() != AnnModel { + t.Fatalf("kind: got %v want AnnModel", b.AnnotationKind()) + } + mb, ok := b.(*ModelBlock) + if !ok { + t.Fatalf("want *ModelBlock, got %T", b) + } + if mb.Name != "Foo" { + t.Errorf("Name: got %q want Foo", mb.Name) + } +} + +func TestParseRouteBlock(t *testing.T) { + src := `package p + +// swagger:route GET /pets tags listPets +func ListPets() {} +` + cg, fset := parseCommentGroup(t, src) + b := Parse(cg, fset) + + rb, ok := b.(*RouteBlock) + if !ok { + t.Fatalf("want *RouteBlock, got %T", b) + } + if rb.Method != http.MethodGet { + t.Errorf("Method: got %q want GET", rb.Method) + } + if rb.Path != "/pets" { + t.Errorf("Path: got %q want /pets", rb.Path) + } + if rb.Tags != "tags" { + t.Errorf("Tags: got %q want tags", rb.Tags) + } + if rb.OpID != "listPets" { + t.Errorf("OpID: got %q want listPets", rb.OpID) + } +} + +func TestParseRouteMalformed(t *testing.T) { + src := `package p + +// swagger:route GET +func DoIt() {} +` + cg, fset := parseCommentGroup(t, src) + b := Parse(cg, fset) + if len(b.Diagnostics()) == 0 { + t.Error("expected at least one diagnostic for malformed route") + } + found := false + for _, d := range b.Diagnostics() { + if d.Code == CodeInvalidAnnotation { + found = true + break + } + } + if !found { + t.Errorf("want diagnostic with CodeInvalidAnnotation, got %+v", b.Diagnostics()) + } +} + +func TestParseParametersBlock(t *testing.T) { + src := `package p + +// swagger:parameters listPets getPet +type PetParams struct{} +` + cg, fset := parseCommentGroup(t, src) + b := Parse(cg, fset) + pb, ok := b.(*ParametersBlock) + if !ok { + t.Fatalf("want *ParametersBlock, got %T", b) + } + if !slices.Equal(pb.TargetTypes, []string{"listPets", "getPet"}) { + t.Errorf("TargetTypes: got %v want [listPets getPet]", pb.TargetTypes) + } +} + +func TestParseUnbound(t *testing.T) { + src := `package p + +// A freeform description. +// maximum: 10 +type Foo int +` + cg, fset := parseCommentGroup(t, src) + b := Parse(cg, fset) + + if _, ok := b.(*UnboundBlock); !ok { + t.Fatalf("want *UnboundBlock, got %T", b) + } + if b.AnnotationKind() != AnnUnknown { + t.Errorf("kind: got %v want AnnUnknown", b.AnnotationKind()) + } +} + +func TestParseNilCommentGroup(t *testing.T) { + b := Parse(nil, token.NewFileSet()) + if _, ok := b.(*UnboundBlock); !ok { + t.Fatalf("want *UnboundBlock, got %T", b) + } + if b.Title() != "" || b.Description() != "" { + t.Errorf("empty block should have no title/description") + } +} + +// --- title / description --- + +func TestParseTitleAndDescription(t *testing.T) { + // Godoc-style: description first, annotation later. + src := `package p + +// Foo is the primary thing. +// +// It supports the following operations. +// More detail in the second paragraph. +// +// swagger:model Foo +// maximum: 100 +type Foo int +` + cg, fset := parseCommentGroup(t, src) + b := Parse(cg, fset) + + if b.Title() != "Foo is the primary thing." { + t.Errorf("Title: got %q", b.Title()) + } + wantDesc := "It supports the following operations. More detail in the second paragraph." + if b.Description() != wantDesc { + t.Errorf("Description:\n got: %q\nwant: %q", b.Description(), wantDesc) + } +} + +// --- properties --- + +func TestParseProperties(t *testing.T) { + src := `package p + +// swagger:model Foo +// +// maximum: 100 +// minimum: 0 +// pattern: ^[a-z]+$ +type Foo int +` + cg, fset := parseCommentGroup(t, src) + b := Parse(cg, fset) + + var names []string + var values []string + for p := range b.Properties() { + names = append(names, p.Keyword.Name) + values = append(values, p.Value) + } + if !slices.Equal(names, []string{"maximum", "minimum", "pattern"}) { + t.Errorf("property names: got %v", names) + } + if !slices.Equal(values, []string{"100", "0", "^[a-z]+$"}) { + t.Errorf("property values: got %v", values) + } +} + +func TestParseItemsNestedProperty(t *testing.T) { + src := `package p + +// swagger:model Foo +// +// items.maximum: 10 +// items.items.minLength: 1 +type Foo int +` + cg, fset := parseCommentGroup(t, src) + b := Parse(cg, fset) + + var depths []int + for p := range b.Properties() { + depths = append(depths, p.ItemsDepth) + } + if !slices.Equal(depths, []int{1, 2}) { + t.Errorf("items depths: got %v want [1 2]", depths) + } +} + +func TestParseBlockHeadProperty(t *testing.T) { + src := `package p + +// swagger:meta +// +// consumes: +type Root struct{} +` + cg, fset := parseCommentGroup(t, src) + b := Parse(cg, fset) + + count := 0 + for p := range b.Properties() { + count++ + if p.Keyword.Name != "consumes" { + t.Errorf("keyword: got %q", p.Keyword.Name) + } + if p.Value != "" { + t.Errorf("block-head Value must be empty, got %q", p.Value) + } + } + if count != 1 { + t.Errorf("property count: got %d want 1", count) + } +} + +// --- YAML fence --- + +func TestParseYAMLFenceBalanced(t *testing.T) { + src := `package p + +// swagger:operation GET /pets listPets +// +// --- +// responses: +// 200: successResponse +// --- +func ListPets() {} +` + cg, fset := parseCommentGroup(t, src) + b := Parse(cg, fset) + + count := 0 + for y := range b.YAMLBlocks() { + count++ + if !slices.Contains(splitLines(y.Text), "responses:") { + t.Errorf("YAML body missing 'responses:' line:\n%s", y.Text) + } + } + if count != 1 { + t.Errorf("YAML blocks: got %d want 1", count) + } +} + +func TestParseYAMLFenceUnterminated(t *testing.T) { + src := `package p + +// swagger:operation GET /pets listPets +// +// --- +// responses: +func ListPets() {} +` + cg, fset := parseCommentGroup(t, src) + b := Parse(cg, fset) + + foundUnterminated := false + for _, d := range b.Diagnostics() { + if d.Code == CodeUnterminatedYAML { + foundUnterminated = true + break + } + } + if !foundUnterminated { + t.Errorf("expected CodeUnterminatedYAML diagnostic, got %+v", b.Diagnostics()) + } + // Body should still be captured up to EOF. + count := 0 + for range b.YAMLBlocks() { + count++ + } + if count != 1 { + t.Errorf("YAML blocks captured: got %d want 1", count) + } +} + +// --- no-panic guarantee --- + +func TestParseDoesNotPanicOnAnythingWeird(t *testing.T) { + weirdInputs := []string{ + `package p +// swagger: +func Foo() {}`, + `package p +// swagger:unknownkind +func Foo() {}`, + `package p +// --- +// --- +// --- +func Foo() {}`, + `package p +// swagger:model +// swagger:route GET /x y +func Foo() {}`, + } + for i, src := range weirdInputs { + t.Run("", func(t *testing.T) { + cg, fset := parseCommentGroup(t, src) + defer func() { + if r := recover(); r != nil { + t.Errorf("weird input #%d panicked: %v", i, r) + } + }() + _ = Parse(cg, fset) // must not panic + }) + } +} + +func splitLines(s string) []string { + if s == "" { + return nil + } + var out []string + start := 0 + for i := range len(s) { + if s[i] == '\n' { + out = append(out, s[start:i]) + start = i + 1 + } + } + out = append(out, s[start:]) + return out +} diff --git a/internal/parsers/grammar/preprocess.go b/internal/parsers/grammar/preprocess.go index 382b280..f0b2dc1 100644 --- a/internal/parsers/grammar/preprocess.go +++ b/internal/parsers/grammar/preprocess.go @@ -117,14 +117,15 @@ func stripLine(s string, pos token.Position) Line { // precedes real content on a comment line: // - whitespace (space, tab) // - continuation slashes and asterisks (“//“, “ * “, “ * “) -// - dashes (“ -- “) // - an optional single markdown table pipe “|“ // -// The set mirrors the v1 parser's rxUncommentHeaders so migrated -// fixtures match byte-for-byte at the parse-output level (pre-P5 -// parity harness). +// Notably it does NOT strip leading `-`, because the YAML fence +// marker `---` must survive preprocessing intact for the lexer to +// recognize it. Bullet-list dashes in description text stay in Text +// as well — arguably more faithful to the author's intent than the +// v1 behavior of silently eating them. func trimContentPrefix(s string) string { - s = strings.TrimLeft(s, " \t*/-") + s = strings.TrimLeft(s, " \t*/") s = strings.TrimPrefix(s, "|") return strings.TrimLeft(s, " \t") } From ca1104305b3fa385b485c9587a0da1b5eab930ba Mon Sep 17 00:00:00 2001 From: Frederic BIDON Date: Tue, 21 Apr 2026 15:50:47 +0200 Subject: [PATCH 11/46] feat(grammar): P1.5 primitive value-typing in the parser MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Populate Property.Typed for the four parse-time-convertible ValueTypes (Number, Integer, Boolean, StringEnum); leave zero for the deferred ones (String verbatim, CommaList, RawValue, RawBlock). Conversion failures emit non-fatal diagnostics with the appropriate CodeInvalid* code; Typed.Type stays ValueNone so consumers can distinguish "no conversion performed" from "zero value successfully parsed". Per-type rules: - Number: strconv.ParseFloat; accepts v1's leading comparison operator (<, <=, >, >=, =) which is captured in TypedValue.Op for the analyzer to use for exclusiveMaximum / exclusiveMinimum. - Integer: strconv.ParseInt base 10, rejects fractions. - Boolean: strict "true"/"false" case-insensitive (stdlib ParseBool is too lenient — it accepts "1"/"t"/"T"/"TRUE" etc., which v1 rejects). - StringEnum: case-insensitive match against Keyword.Value.Values, canonicalised to the table spelling. Adds TypedValue.Op to ast.go for the operator prefix. Tests (typeconv_test.go) cover valid conversion for each type, each operator variant, case-insensitive boolean + enum, the stdlib-lenient "1" rejection, fraction-rejection for integers, and non-primitive value types staying at zero TypedValue. Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Frederic BIDON --- internal/parsers/grammar/ast.go | 9 +- internal/parsers/grammar/parser.go | 91 +++++++++ internal/parsers/grammar/typeconv_test.go | 218 ++++++++++++++++++++++ 3 files changed, 317 insertions(+), 1 deletion(-) create mode 100644 internal/parsers/grammar/typeconv_test.go diff --git a/internal/parsers/grammar/ast.go b/internal/parsers/grammar/ast.go index 1ae6a80..9d04c07 100644 --- a/internal/parsers/grammar/ast.go +++ b/internal/parsers/grammar/ast.go @@ -194,12 +194,19 @@ type Property struct { // TypedValue carries the primitive-converted form of a keyword's // value when the keyword's ValueType is Number/Integer/Boolean/ // StringEnum. For other ValueTypes the fields are zero. +// +// Op is the leading comparison operator stripped from a Number value +// ("<", "<=", ">", ">=", "="); empty when no operator was present or +// for non-Number values. v1 accepts e.g. `maximum: <5` to mean +// exclusive-maximum; the analyzer interprets Op + Number to decide +// inclusive vs. exclusive semantics. type TypedValue struct { Type ValueType + Op string Number float64 Integer int64 Boolean bool - String string // for StringEnum: the matched enum value (canonical) + String string // for StringEnum: the canonical (table-spelled) value } // RawYAML is one captured YAML body (between --- fences). The parser diff --git a/internal/parsers/grammar/parser.go b/internal/parsers/grammar/parser.go index 2c5532f..f74b829 100644 --- a/internal/parsers/grammar/parser.go +++ b/internal/parsers/grammar/parser.go @@ -14,6 +14,7 @@ package grammar import ( "go/ast" "go/token" + "strconv" "strings" ) @@ -244,6 +245,7 @@ func (p *parseState) parseBody(base *baseBlock, post []Token) { Keyword: *t.Keyword, Pos: t.Pos, Value: t.Value, + Typed: p.typeConvert(*t.Keyword, t.Value, t.Pos), ItemsDepth: t.ItemsDepth, }) i++ @@ -305,6 +307,95 @@ func (p *parseState) collectYAMLBody(base *baseBlock, post []Token, i int) int { return i } +// typeConvert performs primitive value-typing per the keyword's +// declared ValueType (architecture §3.4). Primitives (Number, Integer, +// Boolean, StringEnum) are converted at parse time and populate the +// corresponding TypedValue field. Non-primitive ValueTypes (String, +// CommaList, RawValue, RawBlock, None) return a zero TypedValue — the +// analyzer consumes the raw Property.Value with knowledge of the +// target Go type. +// +// Conversion failures emit non-fatal diagnostics on p.diag; the +// returned TypedValue stays zero so downstream consumers can tell +// "no conversion performed" from "conversion succeeded with zero +// value" via Typed.Type. +func (p *parseState) typeConvert(kw Keyword, raw string, pos token.Position) TypedValue { + switch kw.Value.Type { + case ValueNumber: + op, rest := splitCmpOperator(raw) + n, err := strconv.ParseFloat(strings.TrimSpace(rest), 64) + if err != nil { + p.diag = append(p.diag, Errorf(pos, CodeInvalidNumber, + "%s: %q is not a valid number", kw.Name, raw)) + return TypedValue{} + } + return TypedValue{Type: ValueNumber, Op: op, Number: n} + + case ValueInteger: + i, err := strconv.ParseInt(strings.TrimSpace(raw), 10, 64) + if err != nil { + p.diag = append(p.diag, Errorf(pos, CodeInvalidInteger, + "%s: %q is not a valid integer", kw.Name, raw)) + return TypedValue{} + } + return TypedValue{Type: ValueInteger, Integer: i} + + case ValueBoolean: + b, ok := parseBool(raw) + if !ok { + p.diag = append(p.diag, Errorf(pos, CodeInvalidBoolean, + "%s: %q is not a valid boolean (expected true or false)", kw.Name, raw)) + return TypedValue{} + } + return TypedValue{Type: ValueBoolean, Boolean: b} + + case ValueStringEnum: + for _, allowed := range kw.Value.Values { + if strings.EqualFold(raw, allowed) { + return TypedValue{Type: ValueStringEnum, String: allowed} + } + } + p.diag = append(p.diag, Errorf(pos, CodeInvalidStringEnum, + "%s: %q is not one of {%s}", + kw.Name, raw, strings.Join(kw.Value.Values, ", "))) + return TypedValue{} + + case ValueNone, ValueString, ValueCommaList, ValueRawValue, ValueRawBlock: + return TypedValue{} + + default: + return TypedValue{} + } +} + +// splitCmpOperator strips a leading comparison operator ("<=", ">=", +// "<", ">", "=") from s, returning the operator (or "") and the rest. +// Supports the v1 `maximum: <5` / `minimum: >=0` forms. +func splitCmpOperator(s string) (op, rest string) { + s = strings.TrimLeft(s, " \t") + for _, candidate := range []string{"<=", ">=", "<", ">", "="} { + if strings.HasPrefix(s, candidate) { + return candidate, s[len(candidate):] + } + } + return "", s +} + +// parseBool accepts only "true" or "false" (case-insensitive). stdlib +// strconv.ParseBool is too lenient for the swagger grammar, accepting +// "1", "t", "T", etc. +func parseBool(s string) (bool, bool) { + s = strings.TrimSpace(s) + switch { + case strings.EqualFold(s, "true"): + return true, true + case strings.EqualFold(s, "false"): + return false, true + default: + return false, false + } +} + // reconstructLine returns a best-effort text rendering of a Token as // it appeared on the source line. Used inside YAML fence capture where // the classifier has already split `keyword:` into KEYWORD_BLOCK_HEAD. diff --git a/internal/parsers/grammar/typeconv_test.go b/internal/parsers/grammar/typeconv_test.go new file mode 100644 index 0000000..75147d1 --- /dev/null +++ b/internal/parsers/grammar/typeconv_test.go @@ -0,0 +1,218 @@ +// SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers +// SPDX-License-Identifier: Apache-2.0 + +package grammar + +import ( + "testing" +) + +// firstPropertyTyped extracts the first Property of the block and +// returns its Typed value. Helper for compact test cases. +// +//nolint:ireturn // returns Block per the package's polymorphic API +func firstPropertyTyped(t *testing.T, src string) (Property, Block) { + t.Helper() + cg, fset := parseCommentGroup(t, src) + b := Parse(cg, fset) + for p := range b.Properties() { + return p, b + } + t.Fatal("block has no properties") + return Property{}, nil +} + +// --- Number --- + +func TestTypeConvertNumber(t *testing.T) { + p, b := firstPropertyTyped(t, "package p\n\n// swagger:model Foo\n// maximum: 5.5\ntype Foo int\n") + if p.Typed.Type != ValueNumber { + t.Fatalf("Typed.Type: got %v want ValueNumber", p.Typed.Type) + } + if p.Typed.Number != 5.5 { + t.Errorf("Number: got %v want 5.5", p.Typed.Number) + } + if p.Typed.Op != "" { + t.Errorf("Op: got %q want empty", p.Typed.Op) + } + if len(b.Diagnostics()) != 0 { + t.Errorf("unexpected diagnostics: %+v", b.Diagnostics()) + } +} + +func TestTypeConvertNumberWithOperator(t *testing.T) { + cases := []struct { + raw string + wantOp string + wantN float64 + }{ + {"<5", "<", 5}, + {"<= 5.5", "<=", 5.5}, + {">=0", ">=", 0}, + {"> 3.14", ">", 3.14}, + {"= 42", "=", 42}, + } + for _, tc := range cases { + t.Run(tc.raw, func(t *testing.T) { + src := "package p\n\n// swagger:model Foo\n// maximum: " + tc.raw + "\ntype Foo int\n" + p, _ := firstPropertyTyped(t, src) + if p.Typed.Op != tc.wantOp { + t.Errorf("Op: got %q want %q", p.Typed.Op, tc.wantOp) + } + if p.Typed.Number != tc.wantN { + t.Errorf("Number: got %v want %v", p.Typed.Number, tc.wantN) + } + }) + } +} + +func TestTypeConvertNumberInvalid(t *testing.T) { + _, b := firstPropertyTyped(t, "package p\n\n// swagger:model Foo\n// maximum: notanumber\ntype Foo int\n") + foundInvalid := false + for _, d := range b.Diagnostics() { + if d.Code == CodeInvalidNumber { + foundInvalid = true + break + } + } + if !foundInvalid { + t.Errorf("want CodeInvalidNumber diagnostic, got %+v", b.Diagnostics()) + } +} + +// --- Integer --- + +func TestTypeConvertInteger(t *testing.T) { + p, _ := firstPropertyTyped(t, "package p\n\n// swagger:model Foo\n// maxLength: 42\ntype Foo int\n") + if p.Typed.Type != ValueInteger { + t.Fatalf("Typed.Type: got %v want ValueInteger", p.Typed.Type) + } + if p.Typed.Integer != 42 { + t.Errorf("Integer: got %v want 42", p.Typed.Integer) + } +} + +func TestTypeConvertIntegerInvalid(t *testing.T) { + _, b := firstPropertyTyped(t, "package p\n\n// swagger:model Foo\n// maxLength: 5.5\ntype Foo int\n") + foundInvalid := false + for _, d := range b.Diagnostics() { + if d.Code == CodeInvalidInteger { + foundInvalid = true + break + } + } + if !foundInvalid { + t.Errorf("want CodeInvalidInteger diagnostic (integer rejects fractions), got %+v", b.Diagnostics()) + } +} + +// --- Boolean --- + +func TestTypeConvertBoolean(t *testing.T) { + cases := []struct { + raw string + want bool + }{ + {"true", true}, + {"false", false}, + {"True", true}, // case-insensitive + {"FALSE", false}, // case-insensitive + } + for _, tc := range cases { + t.Run(tc.raw, func(t *testing.T) { + src := "package p\n\n// swagger:model Foo\n// readOnly: " + tc.raw + "\ntype Foo int\n" + p, _ := firstPropertyTyped(t, src) + if p.Typed.Type != ValueBoolean { + t.Fatalf("Typed.Type: got %v", p.Typed.Type) + } + if p.Typed.Boolean != tc.want { + t.Errorf("Boolean: got %v want %v", p.Typed.Boolean, tc.want) + } + }) + } +} + +func TestTypeConvertBooleanRejectsNumeric(t *testing.T) { + // stdlib strconv.ParseBool would accept "1" / "0" — we reject. + _, b := firstPropertyTyped(t, "package p\n\n// swagger:model Foo\n// readOnly: 1\ntype Foo int\n") + foundInvalid := false + for _, d := range b.Diagnostics() { + if d.Code == CodeInvalidBoolean { + foundInvalid = true + break + } + } + if !foundInvalid { + t.Errorf("want CodeInvalidBoolean for '1' (strict true/false only), got %+v", b.Diagnostics()) + } +} + +// --- StringEnum --- + +func TestTypeConvertStringEnum(t *testing.T) { + // "in" is a StringEnum restricted to {query, path, header, body, formData}. + src := `package p + +// swagger:parameters listPets +// +// in: query +type PetParams struct{} +` + p, _ := firstPropertyTyped(t, src) + if p.Typed.Type != ValueStringEnum { + t.Fatalf("Typed.Type: got %v want ValueStringEnum", p.Typed.Type) + } + if p.Typed.String != "query" { + t.Errorf("String: got %q want query", p.Typed.String) + } +} + +func TestTypeConvertStringEnumCanonicalizes(t *testing.T) { + // Enum lookup is case-insensitive but the canonical (table-spelled) + // value is returned. + src := `package p + +// swagger:parameters listPets +// +// in: QUERY +type PetParams struct{} +` + p, _ := firstPropertyTyped(t, src) + if p.Typed.String != "query" { + t.Errorf("canonicalized String: got %q want query (table casing)", p.Typed.String) + } +} + +func TestTypeConvertStringEnumInvalid(t *testing.T) { + src := `package p + +// swagger:parameters listPets +// +// in: bogus +type PetParams struct{} +` + _, b := firstPropertyTyped(t, src) + foundInvalid := false + for _, d := range b.Diagnostics() { + if d.Code == CodeInvalidStringEnum { + foundInvalid = true + break + } + } + if !foundInvalid { + t.Errorf("want CodeInvalidStringEnum, got %+v", b.Diagnostics()) + } +} + +// --- Non-primitive value types keep Typed zero --- + +func TestTypeConvertNonPrimitivesStayZero(t *testing.T) { + // pattern is ValueString (verbatim) → Typed should be zero. + p, _ := firstPropertyTyped(t, "package p\n\n// swagger:model Foo\n// pattern: ^[a-z]+$\ntype Foo int\n") + if p.Typed.Type != ValueNone { + t.Errorf("pattern: Typed.Type should be ValueNone (no parse-time conversion), got %v", p.Typed.Type) + } + if p.Value != "^[a-z]+$" { + t.Errorf("raw Value must survive verbatim: got %q", p.Value) + } +} From 7b12f7a01494fa252ec28c6e746c648022979bd8 Mon Sep 17 00:00:00 2001 From: Frederic BIDON Date: Tue, 21 Apr 2026 15:55:09 +0200 Subject: [PATCH 12/46] feat(grammar): P1.6 godoc-ident-prefix for swagger:route MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The v1 rxRoutePrefix allows one leading godoc identifier before swagger:route, e.g.: // ListPets swagger:route GET /pets tags listPets matchGodocRoutePrefix scans a leading identifier (Unicode letter + letters/digits/'_'/'-'), whitespace, then the literal "swagger:route" terminated by whitespace or EOL. On match the lexer advances past the prefix and feeds the rest to lexAnnotation, producing a normal TokenAnnotation with Pos pointing at 's' of swagger:. The exception is narrow by design: - Only "swagger:route" — other annotations (model, operation, parameters, …) keep the "must start the line" rule. - "swagger:routex" does NOT match (guarded). - Multi-word prefixes ("Do Foo swagger:route") do NOT match. Positional Method/Path/Tags/OpID extraction was already in place from P1.4 (fillOperationArgs). This commit just feeds the godoc- prefixed form into the same path. Tests cover: lexer-level classification (route-only exception, 'swagger:routex' rejection, multi-word-prefix rejection, Pos advance past prefix), and end-to-end parser production of a proper RouteBlock with zero diagnostics. Also adds a nolint comment on Kind.String() for "route"/"operation"/ "meta"/"response" labels — goconst wants to share labelXxx from ast.go but Kind (keyword context) and AnnotationKind (Block dispatch) are intentionally separate concerns (architecture §4.6). Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Frederic BIDON --- internal/parsers/grammar/keywords.go | 7 +++ internal/parsers/grammar/lexer.go | 66 +++++++++++++++++++++++++ internal/parsers/grammar/lexer_test.go | 52 +++++++++++++++++++ internal/parsers/grammar/parser_test.go | 29 +++++++++++ 4 files changed, 154 insertions(+) diff --git a/internal/parsers/grammar/keywords.go b/internal/parsers/grammar/keywords.go index e00741a..8417687 100644 --- a/internal/parsers/grammar/keywords.go +++ b/internal/parsers/grammar/keywords.go @@ -76,6 +76,13 @@ func (k Kind) String() string { return "schema" case KindItems: return "items" + // The string literals below intentionally duplicate some of the + // labelXxx constants defined in ast.go. Kind (keyword context) and + // AnnotationKind (Block dispatch) are separate concerns that happen + // to share a handful of label spellings; see architecture §4.6 — + // coupling them through a shared const would hide that distinction. + // + //nolint:goconst // see note above case KindRoute: return "route" case KindOperation: diff --git a/internal/parsers/grammar/lexer.go b/internal/parsers/grammar/lexer.go index c6a112d..d50a04d 100644 --- a/internal/parsers/grammar/lexer.go +++ b/internal/parsers/grammar/lexer.go @@ -6,6 +6,8 @@ package grammar import ( "go/token" "strings" + "unicode" + "unicode/utf8" ) // TokenKind classifies a preprocessed line. The lexer assigns exactly @@ -91,12 +93,76 @@ func lexLine(line Line) Token { if strings.HasPrefix(text, "swagger:") { return lexAnnotation(text, line.Pos) } + // swagger:route is the one annotation allowed to follow a leading + // godoc-style identifier (e.g. `DoFoo swagger:route GET /pets ...`). + // See architecture §1.1 C2 / v1 rxRoutePrefix. + if prefixLen, ok := matchGodocRoutePrefix(text); ok { + pos := line.Pos + pos.Column += prefixLen + pos.Offset += prefixLen + return lexAnnotation(text[prefixLen:], pos) + } if tok, ok := lexKeyword(text, line.Pos); ok { return tok } return Token{Kind: TokenText, Text: text, Pos: line.Pos} } +// matchGodocRoutePrefix returns the byte offset of "swagger:route" +// in s if s has the form "swagger:route". +// Returns (0, false) otherwise. Only "route" gets this exception. +func matchGodocRoutePrefix(s string) (int, bool) { + identEnd := scanIdentifier(s) + if identEnd == 0 { + return 0, false + } + wsEnd := identEnd + for wsEnd < len(s) && (s[wsEnd] == ' ' || s[wsEnd] == '\t') { + wsEnd++ + } + if wsEnd == identEnd { + return 0, false + } + const prefix = "swagger:" + labelRoute + if !strings.HasPrefix(s[wsEnd:], prefix) { + return 0, false + } + // Guard against "swagger:routex" — the annotation name must end. + after := wsEnd + len(prefix) + if after < len(s) && s[after] != ' ' && s[after] != '\t' { + return 0, false + } + return wsEnd, true +} + +// scanIdentifier returns the byte length of a leading godoc identifier +// in s, or 0 if s does not start with one. Matches v1's +// `\p{L}[\p{L}\p{N}\p{Pd}\p{Pc}]*` — a Unicode letter followed by +// letters, digits, hyphens, or connector punctuation (underscore). +func scanIdentifier(s string) int { + if len(s) == 0 { + return 0 + } + r, size := utf8.DecodeRuneInString(s) + if !unicode.IsLetter(r) { + return 0 + } + i := size + for i < len(s) { + r, size = utf8.DecodeRuneInString(s[i:]) + if !isIdentCont(r) { + break + } + i += size + } + return i +} + +func isIdentCont(r rune) bool { + return unicode.IsLetter(r) || unicode.IsDigit(r) || + r == '_' || r == '-' +} + // lexAnnotation parses "swagger: [arg1 arg2 ...]". Malformed // (empty name) falls back to a TEXT token so the parser can emit a // diagnostic at the analyzer layer. diff --git a/internal/parsers/grammar/lexer_test.go b/internal/parsers/grammar/lexer_test.go index 3aed229..b7e7419 100644 --- a/internal/parsers/grammar/lexer_test.go +++ b/internal/parsers/grammar/lexer_test.go @@ -191,6 +191,58 @@ func TestLexItemsPrefixAdvancesPos(t *testing.T) { } } +func TestLexGodocIdentPrefixForRoute(t *testing.T) { + // "DoFoo swagger:route GET /pets tags listPets" — v1 rxRoutePrefix + // allows one leading godoc identifier before swagger:route. + toks := Lex(mkLines("DoFoo swagger:route GET /pets tags listPets")) + if toks[0].Kind != TokenAnnotation { + t.Fatalf("want ANNOTATION, got %s: %+v", toks[0].Kind, toks[0]) + } + if toks[0].Text != "route" { + t.Errorf("annotation name: got %q want route", toks[0].Text) + } + // Position should point past the "DoFoo " prefix (6 bytes). + if toks[0].Pos.Column != 7 { + t.Errorf("Pos.Column: got %d want 7 (after 'DoFoo ')", toks[0].Pos.Column) + } +} + +func TestLexGodocIdentPrefixRestrictedToRoute(t *testing.T) { + // Only "route" gets the ident-prefix exception — other annotations + // stay as TEXT when prefixed. + cases := []string{ + "DoFoo swagger:model Bar", + "DoFoo swagger:operation GET /pets listPets", + "DoFoo swagger:parameters Foo", + } + for _, in := range cases { + t.Run(in, func(t *testing.T) { + toks := Lex(mkLines(in)) + if toks[0].Kind != TokenText { + t.Errorf("%q: want TEXT (non-route annotation must start line), got %s", + in, toks[0].Kind) + } + }) + } +} + +func TestLexGodocIdentPrefixRejectsMultipleIdents(t *testing.T) { + // Two-word prefix is not allowed — must be exactly one identifier. + toks := Lex(mkLines("Do Foo swagger:route GET /pets listPets")) + if toks[0].Kind != TokenText { + t.Errorf("want TEXT (multi-word prefix not allowed), got %s", toks[0].Kind) + } +} + +func TestLexGodocIdentPrefixRejectsSwaggerRoutex(t *testing.T) { + // "swagger:routex" is not swagger:route — ident-prefix must not eat + // into a longer annotation name. + toks := Lex(mkLines("DoFoo swagger:routex GET /pets listPets")) + if toks[0].Kind != TokenText { + t.Errorf("want TEXT (swagger:routex is not swagger:route), got %s", toks[0].Kind) + } +} + func TestLexEOFIsAlwaysLast(t *testing.T) { toks := Lex(mkLines("swagger:model Foo", "", "maximum: 5")) if toks[len(toks)-1].Kind != TokenEOF { diff --git a/internal/parsers/grammar/parser_test.go b/internal/parsers/grammar/parser_test.go index b142a0d..35a0cc5 100644 --- a/internal/parsers/grammar/parser_test.go +++ b/internal/parsers/grammar/parser_test.go @@ -85,6 +85,35 @@ func ListPets() {} } } +func TestParseRouteWithGodocIdentPrefix(t *testing.T) { + // End-to-end: the leading "ListPets" identifier is stripped so the + // parser sees a normal swagger:route annotation. + src := `package p + +// ListPets swagger:route GET /pets tags listPets +func ListPets() {} +` + cg, fset := parseCommentGroup(t, src) + b := Parse(cg, fset) + + rb, ok := b.(*RouteBlock) + if !ok { + t.Fatalf("want *RouteBlock, got %T", b) + } + if rb.Method != http.MethodGet { + t.Errorf("Method: got %q want GET", rb.Method) + } + if rb.Path != "/pets" { + t.Errorf("Path: got %q want /pets", rb.Path) + } + if rb.OpID != "listPets" { + t.Errorf("OpID: got %q want listPets", rb.OpID) + } + if len(b.Diagnostics()) != 0 { + t.Errorf("godoc-prefixed route must parse cleanly, got %+v", b.Diagnostics()) + } +} + func TestParseRouteMalformed(t *testing.T) { src := `package p From dac287283c0ad80d3d17efab61af7571fdde77a1 Mon Sep 17 00:00:00 2001 From: Frederic BIDON Date: Tue, 21 Apr 2026 16:13:24 +0200 Subject: [PATCH 13/46] feat(grammar): P1.7 context-validity diagnostic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add checkContextValidity(base) to the parser's post-body pass. For each Property, check whether the Keyword's Contexts list intersects the allowed set for the block's AnnotationKind; if not, emit CodeContextInvalid as SeverityWarning (non-fatal). Mapping (AnnotationKind -> allowed Kind union): - AnnModel -> {Schema, Items} - AnnParameters -> {Param, Schema, Items} - AnnResponse -> {Response, Schema, Header, Items} - AnnOperation -> {Operation, Param, Schema, Header, Items, Response} - AnnRoute -> {Route, Param, Schema, Header, Items, Response} - AnnMeta -> {Meta, Schema} - everything else -> nil (skip check) The sets are deliberately permissive: an operation body can host schema properties, response headers, parameters, etc. Analyzers with more context (Go type, enclosing struct) can enforce tighter rules. UnboundBlock skips the check — its target context is determined by the scanner from the enclosing declaration. Diagnostic format: keyword "in" not valid under swagger:model (legal in: param) Tests (context_test.go) cover: - legal keyword -> no diagnostic - illegal keyword -> exactly one warning, message mentions the keyword and its legal contexts - multiple illegal keywords -> one diagnostic each - keywords legal under multiple annotations (consumes: under Meta / Route / Operation) -> zero diagnostics - UnboundBlock skips the check - severity is Warning, never Error; Block is still produced Cleanups shaken out by the new checks: - contextsOverlap uses slices.Contains - Kind.String() drops its now-unused nolint (goconst no longer triggers) - lexer_test.go references labelRoute instead of the literal "route" - context_test.go helper hardcodes CodeContextInvalid (unparam) Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Frederic BIDON --- internal/parsers/grammar/context_test.go | 111 +++++++++++++++++++++++ internal/parsers/grammar/keywords.go | 11 +-- internal/parsers/grammar/lexer_test.go | 4 +- internal/parsers/grammar/parser.go | 80 ++++++++++++++++ 4 files changed, 197 insertions(+), 9 deletions(-) create mode 100644 internal/parsers/grammar/context_test.go diff --git a/internal/parsers/grammar/context_test.go b/internal/parsers/grammar/context_test.go new file mode 100644 index 0000000..9867714 --- /dev/null +++ b/internal/parsers/grammar/context_test.go @@ -0,0 +1,111 @@ +// SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers +// SPDX-License-Identifier: Apache-2.0 + +package grammar + +import ( + "strings" + "testing" +) + +// contextInvalidDiagnostics returns the subset of b's diagnostics +// with Code == CodeContextInvalid. Used by every test in this file. +func contextInvalidDiagnostics(b Block) []Diagnostic { + var out []Diagnostic + for _, d := range b.Diagnostics() { + if d.Code == CodeContextInvalid { + out = append(out, d) + } + } + return out +} + +func TestContextValidityLegalKeyword(t *testing.T) { + // `maximum` is legal under swagger:model (KindSchema) — no diagnostic. + cg, fset := parseCommentGroup(t, "package p\n\n// swagger:model Foo\n// maximum: 5\ntype Foo int\n") + b := Parse(cg, fset) + if got := contextInvalidDiagnostics(b); len(got) != 0 { + t.Errorf("unexpected context-invalid diagnostic: %+v", got) + } +} + +func TestContextValidityIllegalKeyword(t *testing.T) { + // `in:` is KindParam only — illegal under swagger:model. + cg, fset := parseCommentGroup(t, "package p\n\n// swagger:model Foo\n// in: query\ntype Foo int\n") + b := Parse(cg, fset) + diag := contextInvalidDiagnostics(b) + if len(diag) != 1 { + t.Fatalf("want 1 context-invalid diagnostic, got %d: %+v", len(diag), b.Diagnostics()) + } + if !strings.Contains(diag[0].Message, "in") { + t.Errorf("diagnostic message should mention the keyword: %q", diag[0].Message) + } + if !strings.Contains(diag[0].Message, "param") { + t.Errorf("diagnostic message should list legal contexts (param): %q", diag[0].Message) + } + if diag[0].Severity != SeverityWarning { + t.Errorf("severity: got %v want warning", diag[0].Severity) + } +} + +func TestContextValidityMultipleIllegalKeywords(t *testing.T) { + // Two illegal keywords under swagger:route: `version` (KindMeta) + // and `license` (KindMeta). + src := `package p + +// swagger:route GET /pets tags listPets +// +// version: 1.0 +// license: MIT +func ListPets() {} +` + cg, fset := parseCommentGroup(t, src) + b := Parse(cg, fset) + diag := contextInvalidDiagnostics(b) + if len(diag) != 2 { + t.Errorf("want 2 context-invalid diagnostics, got %d: %+v", len(diag), diag) + } +} + +func TestContextValidityLegalUnderMultipleAnnotations(t *testing.T) { + // `consumes:` is legal under Meta, Route, Operation — all three + // should produce zero context-invalid diagnostics. + srcs := []string{ + "package p\n\n// swagger:meta\n//\n// consumes:\ntype Root struct{}\n", + "package p\n\n// swagger:route GET /p tags o\n//\n// consumes:\nfunc F() {}\n", + "package p\n\n// swagger:operation GET /p o\n//\n// consumes:\nfunc F() {}\n", + } + for _, src := range srcs { + t.Run(src[:40], func(t *testing.T) { + cg, fset := parseCommentGroup(t, src) + b := Parse(cg, fset) + if got := contextInvalidDiagnostics(b); len(got) != 0 { + t.Errorf("unexpected context-invalid diagnostic: %+v", got) + } + }) + } +} + +func TestContextValiditySkipsUnboundBlock(t *testing.T) { + // No annotation -> UnboundBlock -> no context check. + cg, fset := parseCommentGroup(t, "package p\n\n// maximum: 5\n// in: query\n// version: 1.0\ntype Foo int\n") + b := Parse(cg, fset) + if got := contextInvalidDiagnostics(b); len(got) != 0 { + t.Errorf("UnboundBlock must skip context check, got %+v", got) + } +} + +func TestContextValidityDiagnosticsAreWarnings(t *testing.T) { + // Context-invalid must never be fatal — the parser should still + // have produced a valid Block. + cg, fset := parseCommentGroup(t, "package p\n\n// swagger:model Foo\n// in: query\ntype Foo int\n") + b := Parse(cg, fset) + if _, ok := b.(*ModelBlock); !ok { + t.Fatalf("parser should still produce ModelBlock, got %T", b) + } + for _, d := range b.Diagnostics() { + if d.Code == CodeContextInvalid && d.Severity == SeverityError { + t.Errorf("context-invalid must be warning, not error: %+v", d) + } + } +} diff --git a/internal/parsers/grammar/keywords.go b/internal/parsers/grammar/keywords.go index 8417687..62166f5 100644 --- a/internal/parsers/grammar/keywords.go +++ b/internal/parsers/grammar/keywords.go @@ -76,13 +76,10 @@ func (k Kind) String() string { return "schema" case KindItems: return "items" - // The string literals below intentionally duplicate some of the - // labelXxx constants defined in ast.go. Kind (keyword context) and - // AnnotationKind (Block dispatch) are separate concerns that happen - // to share a handful of label spellings; see architecture §4.6 — - // coupling them through a shared const would hide that distinction. - // - //nolint:goconst // see note above + // Kind (keyword context) and AnnotationKind (Block dispatch) are + // separate concerns that happen to share a handful of label + // spellings; see architecture §4.6 — keep the literals independent + // of ast.go's labelXxx constants. case KindRoute: return "route" case KindOperation: diff --git a/internal/parsers/grammar/lexer_test.go b/internal/parsers/grammar/lexer_test.go index b7e7419..e193913 100644 --- a/internal/parsers/grammar/lexer_test.go +++ b/internal/parsers/grammar/lexer_test.go @@ -57,7 +57,7 @@ func TestLexAnnotationRoute(t *testing.T) { if toks[0].Kind != TokenAnnotation { t.Fatalf("want ANNOTATION, got %s", toks[0].Kind) } - if toks[0].Text != "route" { + if toks[0].Text != labelRoute { t.Errorf("name: got %q want route", toks[0].Text) } want := []string{"GET", "/pets", "tags", "listPets"} @@ -198,7 +198,7 @@ func TestLexGodocIdentPrefixForRoute(t *testing.T) { if toks[0].Kind != TokenAnnotation { t.Fatalf("want ANNOTATION, got %s: %+v", toks[0].Kind, toks[0]) } - if toks[0].Text != "route" { + if toks[0].Text != labelRoute { t.Errorf("annotation name: got %q want route", toks[0].Text) } // Position should point past the "DoFoo " prefix (6 bytes). diff --git a/internal/parsers/grammar/parser.go b/internal/parsers/grammar/parser.go index f74b829..912437c 100644 --- a/internal/parsers/grammar/parser.go +++ b/internal/parsers/grammar/parser.go @@ -14,6 +14,7 @@ package grammar import ( "go/ast" "go/token" + "slices" "strconv" "strings" ) @@ -74,11 +75,90 @@ func (p *parseState) parse() Block { p.parseTitleDesc(base, pre) p.parseBody(base, post) + p.checkContextValidity(base) base.diagnostics = append(base.diagnostics, p.diag...) return typed } +// checkContextValidity emits CodeContextInvalid warnings for every +// Property whose keyword is not legal under the block's +// AnnotationKind. Non-fatal (SeverityWarning); the analyzer decides +// policy. Skipped for UnboundBlock and non-dispatched annotations +// where context legality isn't meaningful at the parser layer. +func (p *parseState) checkContextValidity(base *baseBlock) { + allowed := allowedContexts(base.kind) + if allowed == nil { + return + } + for _, prop := range base.properties { + if contextsOverlap(prop.Keyword.Contexts, allowed) { + continue + } + p.diag = append(p.diag, Warnf(prop.Pos, CodeContextInvalid, + "keyword %q not valid under swagger:%s (legal in: %s)", + prop.Keyword.Name, base.kind, + formatKeywordContexts(prop.Keyword.Contexts))) + } +} + +// allowedContexts returns the set of Kind sub-contexts that may host +// keywords under the given AnnotationKind. Returns nil to mean "no +// parser-layer check" (UnboundBlock, strfmt, alias, etc., where the +// legality depends on external context the parser doesn't have). +// +// The sets are deliberately broad: an operation body can contain +// schema properties, response headers, parameters, and more, so +// allowedContexts(AnnOperation) lists all plausible sub-contexts. +// Analyzers may enforce tighter rules with more context (Go type, +// enclosing struct) but the parser uses the permissive union. +func allowedContexts(a AnnotationKind) []Kind { + switch a { + case AnnModel: + return []Kind{KindSchema, KindItems} + case AnnParameters: + return []Kind{KindParam, KindSchema, KindItems} + case AnnResponse: + return []Kind{KindResponse, KindSchema, KindHeader, KindItems} + case AnnOperation: + return []Kind{KindOperation, KindParam, KindSchema, KindHeader, KindItems, KindResponse} + case AnnRoute: + return []Kind{KindRoute, KindParam, KindSchema, KindHeader, KindItems, KindResponse} + case AnnMeta: + return []Kind{KindMeta, KindSchema} + case AnnUnknown, + AnnStrfmt, AnnAlias, AnnName, AnnAllOf, AnnEnumDecl, + AnnIgnore, AnnDefaultName, AnnType, AnnFile: + return nil + default: + return nil + } +} + +// contextsOverlap reports whether any Kind in the keyword's contexts +// list is in the allowed set. +func contextsOverlap(kwContexts []ContextDoc, allowed []Kind) bool { + for _, cd := range kwContexts { + if slices.Contains(allowed, cd.Kind) { + return true + } + } + return false +} + +// formatKeywordContexts renders a keyword's legal Kind list for +// diagnostics — "schema, param, items". +func formatKeywordContexts(ctxs []ContextDoc) string { + if len(ctxs) == 0 { + return "(none)" + } + out := make([]string, len(ctxs)) + for i, c := range ctxs { + out[i] = c.Kind.String() + } + return strings.Join(out, ", ") +} + // findAnnotation returns the index of the first TokenAnnotation in // tokens, or -1 if none. Annotations normally occupy the top of a // comment group, but godoc-style placement (e.g., annotation after a From 86795ca4bc0a7f82f4f4cb8077aed1767861d2f1 Mon Sep 17 00:00:00 2001 From: Frederic BIDON Date: Tue, 21 Apr 2026 16:20:58 +0200 Subject: [PATCH 14/46] test(grammar): P1.8 production + coverage-fill unit tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add productions_test.go with one focused test per §2.1 envelope production: - annotation-only (annotation w/o body) - title-only (single paragraph, no description) - multi-paragraph desc (verifies \n\n join) - properties-no-title (annotation followed by keywords) - properties-interleaved (TEXT between properties is dropped) - block-head property (consumes: value-less) - empty YAML body (--- immediately followed by ---) - multiple YAML blocks (two independent fenced sections) - full-envelope order (title → desc → props → yaml composed) Plus coverage-fill: - Exhaustive String() on Kind, ValueType, TokenKind (previously only spot-tested). - Remaining AnnotationKind dispatch (ResponseBlock, MetaBlock, UnboundBlock from strfmt / alias / allOf / enum / ignore / file). Coverage on internal/parsers/grammar/ rises from 85.1% to 93.3% — above the ≥90% exit criterion for P1. Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Frederic BIDON --- internal/parsers/grammar/productions_test.go | 410 +++++++++++++++++++ 1 file changed, 410 insertions(+) create mode 100644 internal/parsers/grammar/productions_test.go diff --git a/internal/parsers/grammar/productions_test.go b/internal/parsers/grammar/productions_test.go new file mode 100644 index 0000000..ba5fa0e --- /dev/null +++ b/internal/parsers/grammar/productions_test.go @@ -0,0 +1,410 @@ +// SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers +// SPDX-License-Identifier: Apache-2.0 + +package grammar + +import ( + "slices" + "strings" + "testing" +) + +// This file holds one focused test per grammar-envelope production +// (architecture §2.1). It complements the per-component suites in +// preprocess_test.go, lexer_test.go, parser_test.go, typeconv_test.go, +// and context_test.go — those cover mechanisms; this file covers the +// named productions as discrete units. + +const ( + fixtureModelName = "Foo" + fixtureBlockKw = "consumes" +) + +// --- annotation-line --- + +func TestProductionAnnotationOnly(t *testing.T) { + // Annotation with no surrounding text or body. Block created, + // zero properties, zero diagnostics. + src := `package p + +// swagger:model Foo +type Foo int +` + cg, fset := parseCommentGroup(t, src) + b := Parse(cg, fset) + + mb, ok := b.(*ModelBlock) + if !ok { + t.Fatalf("want *ModelBlock, got %T", b) + } + if mb.Name != fixtureModelName { + t.Errorf("Name: got %q want Foo", mb.Name) + } + if b.Title() != "" || b.Description() != "" { + t.Errorf("title/description must be empty: %q / %q", b.Title(), b.Description()) + } + propCount := 0 + for range b.Properties() { + propCount++ + } + if propCount != 0 { + t.Errorf("want 0 properties, got %d", propCount) + } + if len(b.Diagnostics()) != 0 { + t.Errorf("unexpected diagnostics: %+v", b.Diagnostics()) + } +} + +// --- title-paragraph --- + +func TestProductionTitleOnly(t *testing.T) { + // One paragraph of free text, no blank separator, no body. + // First paragraph becomes Title; Description stays empty. + src := `package p + +// Foo is a thing. +// More detail on the same paragraph. +// +// swagger:model Foo +type Foo int +` + cg, fset := parseCommentGroup(t, src) + b := Parse(cg, fset) + if b.Title() != "Foo is a thing. More detail on the same paragraph." { + t.Errorf("Title: got %q", b.Title()) + } + if b.Description() != "" { + t.Errorf("Description should be empty, got %q", b.Description()) + } +} + +// --- description-paragraphs --- + +func TestProductionMultiParagraphDescription(t *testing.T) { + // Title + two description paragraphs; verify \n\n paragraph join. + src := `package p + +// Foo is a thing. +// +// First description paragraph. +// +// Second description paragraph, +// continued on a second line. +// +// swagger:model Foo +type Foo int +` + cg, fset := parseCommentGroup(t, src) + b := Parse(cg, fset) + + if b.Title() != "Foo is a thing." { + t.Errorf("Title: got %q", b.Title()) + } + wantDesc := "First description paragraph.\n\n" + + "Second description paragraph, continued on a second line." + if b.Description() != wantDesc { + t.Errorf("Description:\n got: %q\nwant: %q", b.Description(), wantDesc) + } +} + +// --- property-line --- + +func TestProductionPropertiesNoTitle(t *testing.T) { + // Properties follow the annotation immediately; no title/description. + src := `package p + +// swagger:model Foo +// maximum: 10 +// minimum: 0 +type Foo int +` + cg, fset := parseCommentGroup(t, src) + b := Parse(cg, fset) + + if b.Title() != "" || b.Description() != "" { + t.Errorf("title/description must be empty: %q / %q", b.Title(), b.Description()) + } + var names []string + for p := range b.Properties() { + names = append(names, p.Keyword.Name) + } + if !slices.Equal(names, []string{"maximum", "minimum"}) { + t.Errorf("property names: got %v", names) + } +} + +func TestProductionPropertiesInterleavedWithText(t *testing.T) { + // TEXT lines between properties are dropped by the parser body; + // only properties survive. + src := `package p + +// swagger:model Foo +// maximum: 10 +// some commentary between properties +// minimum: 0 +type Foo int +` + cg, fset := parseCommentGroup(t, src) + b := Parse(cg, fset) + + var names []string + for p := range b.Properties() { + names = append(names, p.Keyword.Name) + } + if !slices.Equal(names, []string{"maximum", "minimum"}) { + t.Errorf("interleaved text should drop, leaving properties: got %v", names) + } +} + +// --- multi-line block (block-head) --- + +func TestProductionBlockHeadProperty(t *testing.T) { + // `consumes:` as a value-less block head. P1 only records the + // head; body-line collection is P2.3. + src := `package p + +// swagger:meta +// +// consumes: +type Root struct{} +` + cg, fset := parseCommentGroup(t, src) + b := Parse(cg, fset) + + var saw Property + count := 0 + for p := range b.Properties() { + saw = p + count++ + } + if count != 1 { + t.Fatalf("want 1 property, got %d", count) + } + if saw.Keyword.Name != fixtureBlockKw { + t.Errorf("keyword: got %q want consumes", saw.Keyword.Name) + } + if saw.Value != "" { + t.Errorf("block-head Value must be empty: %q", saw.Value) + } +} + +// --- yaml-block --- + +func TestProductionEmptyYAMLBody(t *testing.T) { + // --- immediately followed by --- (empty body). One RawYAML with + // empty text. + src := `package p + +// swagger:operation GET /pets listPets +// +// --- +// --- +func ListPets() {} +` + cg, fset := parseCommentGroup(t, src) + b := Parse(cg, fset) + + count := 0 + for y := range b.YAMLBlocks() { + count++ + if y.Text != "" { + t.Errorf("empty fence body: want empty text, got %q", y.Text) + } + } + if count != 1 { + t.Errorf("want 1 YAML block, got %d", count) + } + if len(b.Diagnostics()) != 0 { + t.Errorf("unexpected diagnostics for balanced empty fences: %+v", b.Diagnostics()) + } +} + +func TestProductionMultipleYAMLBlocks(t *testing.T) { + // Two independently fenced YAML sections — both captured. + src := `package p + +// swagger:operation GET /pets listPets +// +// --- +// first: block +// --- +// +// Prose between the blocks. +// +// --- +// second: block +// --- +func ListPets() {} +` + cg, fset := parseCommentGroup(t, src) + b := Parse(cg, fset) + + var texts []string + for y := range b.YAMLBlocks() { + texts = append(texts, y.Text) + } + if len(texts) != 2 { + t.Fatalf("want 2 YAML blocks, got %d: %v", len(texts), texts) + } + if !strings.Contains(texts[0], "first") { + t.Errorf("block 0 missing 'first': %q", texts[0]) + } + if !strings.Contains(texts[1], "second") { + t.Errorf("block 1 missing 'second': %q", texts[1]) + } +} + +// --- enum String() exhaustiveness (coverage-fill) --- + +func TestEnumStringExhaustive(t *testing.T) { + // Kind + kindCases := []struct { + in Kind + want string + }{ + {KindUnknown, "unknown"}, + {KindParam, "param"}, + {KindHeader, "header"}, + {KindSchema, "schema"}, + {KindItems, "items"}, + {KindRoute, "route"}, + {KindOperation, "operation"}, + {KindMeta, "meta"}, + {KindResponse, "response"}, + {Kind(99), "unknown"}, + } + for _, tc := range kindCases { + if got := tc.in.String(); got != tc.want { + t.Errorf("Kind(%d).String() = %q want %q", int(tc.in), got, tc.want) + } + } + + // ValueType + vtCases := []struct { + in ValueType + want string + }{ + {ValueNone, "none"}, + {ValueNumber, "number"}, + {ValueInteger, "integer"}, + {ValueBoolean, "boolean"}, + {ValueString, "string"}, + {ValueCommaList, "comma-list"}, + {ValueStringEnum, "string-enum"}, + {ValueRawBlock, "raw-block"}, + {ValueRawValue, "raw-value"}, + {ValueType(99), "none"}, + } + for _, tc := range vtCases { + if got := tc.in.String(); got != tc.want { + t.Errorf("ValueType(%d).String() = %q want %q", int(tc.in), got, tc.want) + } + } + + // TokenKind + tkCases := []struct { + in TokenKind + want string + }{ + {TokenEOF, "EOF"}, + {TokenBlank, "BLANK"}, + {TokenText, "TEXT"}, + {TokenAnnotation, "ANNOTATION"}, + {TokenKeywordValue, "KEYWORD_VALUE"}, + {TokenKeywordBlockHead, "KEYWORD_BLOCK_HEAD"}, + {TokenYAMLFence, "YAML_FENCE"}, + {TokenKind(99), "?"}, + } + for _, tc := range tkCases { + if got := tc.in.String(); got != tc.want { + t.Errorf("TokenKind(%d).String() = %q want %q", int(tc.in), got, tc.want) + } + } +} + +// --- remaining AnnotationKind → Block dispatch paths --- + +func TestAnnotationDispatchRemaining(t *testing.T) { + cases := []struct { + src string + want any + }{ + {"package p\n\n// swagger:response okResp\ntype OK struct{}\n", (*ResponseBlock)(nil)}, + {"package p\n\n// swagger:meta\ntype Root struct{}\n", (*MetaBlock)(nil)}, + {"package p\n\n// swagger:strfmt mac\ntype MAC string\n", (*UnboundBlock)(nil)}, + {"package p\n\n// swagger:alias\ntype Alias string\n", (*UnboundBlock)(nil)}, + {"package p\n\n// swagger:allOf Base\ntype Derived struct{}\n", (*UnboundBlock)(nil)}, + {"package p\n\n// swagger:enum Colors\ntype Color int\n", (*UnboundBlock)(nil)}, + {"package p\n\n// swagger:ignore\ntype X struct{}\n", (*UnboundBlock)(nil)}, + {"package p\n\n// swagger:file\ntype F struct{}\n", (*UnboundBlock)(nil)}, + } + for _, tc := range cases { + cg, fset := parseCommentGroup(t, tc.src) + b := Parse(cg, fset) + // Compare concrete types reflectively via type switch. + switch tc.want.(type) { + case *ResponseBlock: + if _, ok := b.(*ResponseBlock); !ok { + t.Errorf("src=%q: want *ResponseBlock, got %T", tc.src, b) + } + case *MetaBlock: + if _, ok := b.(*MetaBlock); !ok { + t.Errorf("src=%q: want *MetaBlock, got %T", tc.src, b) + } + case *UnboundBlock: + if _, ok := b.(*UnboundBlock); !ok { + t.Errorf("src=%q: want *UnboundBlock, got %T", tc.src, b) + } + } + } +} + +// --- envelope order (all productions composed) --- + +func TestProductionEnvelopeFullOrder(t *testing.T) { + // A comment block exercising every production in its natural + // order: title → description → properties → yaml body. + src := `package p + +// A one-line title. +// +// A description paragraph, +// continued on the next line. +// +// swagger:operation GET /pets tags listPets +// +// maximum: 100 +// +// --- +// responses: +// 200: ok +// --- +func ListPets() {} +` + cg, fset := parseCommentGroup(t, src) + b := Parse(cg, fset) + + if _, ok := b.(*OperationBlock); !ok { + t.Fatalf("want *OperationBlock, got %T", b) + } + if b.Title() != "A one-line title." { + t.Errorf("Title: got %q", b.Title()) + } + if !strings.HasPrefix(b.Description(), "A description paragraph") { + t.Errorf("Description: got %q", b.Description()) + } + propCount := 0 + for range b.Properties() { + propCount++ + } + if propCount == 0 { + t.Error("expected at least one property (maximum)") + } + yamlCount := 0 + for range b.YAMLBlocks() { + yamlCount++ + } + if yamlCount != 1 { + t.Errorf("want 1 YAML block, got %d", yamlCount) + } +} From 71bb5040920e632bd08e21541be2191a5abb0a91 Mon Sep 17 00:00:00 2001 From: Frederic BIDON Date: Tue, 21 Apr 2026 16:36:21 +0200 Subject: [PATCH 15/46] feat(grammar): P1.10 verbatim YAML bodies + `-` strip parity lock-in MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two catch-up items from the P1 flag queue. 1. Verbatim YAML body contract. - Add Line.Raw field — content with Go comment markers stripped and one layer of godoc decoration removed, but all YAML indentation preserved. - Per comment kind: `// foo` -> Raw = "foo" (one godoc space stripped) `// 200: ok` -> Raw = " 200: ok" (2-space indent kept) `/* * bar */` (cont.) -> Raw = "bar" (block continuation stripped) `/* ... */` no-cont. -> Raw = full line (indentation kept) - Preprocessor threads a per-comment-kind rawStrip strategy into stripLine (stripSingleGodocSpace vs stripBlockContinuation). - Lexer now tracks one bit of state (inFence) and emits a new TokenRawLine kind carrying Line.Raw verbatim for every line between matched `---` fences. - Parser's collectYAMLBody simplifies to `post[i].Text`; the best-effort reconstructLine() from P1.4 is deleted outright. - `TestParseYAMLFenceBalanced` updated (via new production tests) to assert indentation is preserved; body lines come through ready for internal/parsers/yaml/ to consume. 2. Preprocessor `-` stripping — v2 divergence lock-in. - Decision: keep leading `-` in Line.Text (don't restore v1's silent strip). This preserves bullet-list semantics ("- foo" stays "- foo", not "foo") and keeps the `---` YAML fence marker detectable without special-casing. - trimContentPrefix already dropped `-` from the strip set in P1.4; this commit adds explicit tests that lock in the behavior (TestP110DashNotStrippedInProse + preprocessor-level TestP110DashPreservedOnlySurvivesVerbatimInText). - The parity harness in P4 will verify no real fixture depended on the old behavior; if one does, it's an explicit migration decision then, not a silent deferral. Coverage on internal/parsers/grammar/ lands at 94.5%. P1.10 queue is empty — no pending flags roll over into P4. Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Frederic BIDON --- internal/parsers/grammar/lexer.go | 35 ++++-- internal/parsers/grammar/p110_test.go | 155 +++++++++++++++++++++++++ internal/parsers/grammar/parser.go | 42 ++----- internal/parsers/grammar/preprocess.go | 62 ++++++++-- 4 files changed, 241 insertions(+), 53 deletions(-) create mode 100644 internal/parsers/grammar/p110_test.go diff --git a/internal/parsers/grammar/lexer.go b/internal/parsers/grammar/lexer.go index d50a04d..896fb9f 100644 --- a/internal/parsers/grammar/lexer.go +++ b/internal/parsers/grammar/lexer.go @@ -18,11 +18,12 @@ type TokenKind int const ( TokenEOF TokenKind = iota // end of stream TokenBlank // empty line (after trim) - TokenText // freeform content (title, description, block body) + TokenText // freeform content (title, description) TokenAnnotation // "swagger: [args...]" TokenKeywordValue // ": " TokenKeywordBlockHead // ":" (value-less; indicates a block follows) TokenYAMLFence // "---" delimiter + TokenRawLine // verbatim line inside a YAML fence (Text = Line.Raw) ) // String renders a TokenKind for debugging and diagnostics. @@ -42,6 +43,8 @@ func (k TokenKind) String() string { return "KEYWORD_BLOCK_HEAD" case TokenYAMLFence: return "YAML_FENCE" + case TokenRawLine: + return "RAW_LINE" default: return "?" } @@ -69,27 +72,41 @@ type Token struct { } // Lex turns a preprocessed line slice into a token stream terminated -// by TokenEOF. The lexer is context-free (no fence/state tracking); -// the parser decides whether a TokenText sits inside a YAML body. +// by TokenEOF. The lexer tracks one bit of state — whether the cursor +// is between a pair of `---` fences — so that YAML bodies survive as +// TokenRawLine tokens with their original indentation intact. func Lex(lines []Line) []Token { out := make([]Token, 0, len(lines)+1) + inFence := false for _, line := range lines { - out = append(out, lexLine(line)) + tok := lexLine(line, inFence) + out = append(out, tok) + if tok.Kind == TokenYAMLFence { + inFence = !inFence + } } out = append(out, Token{Kind: TokenEOF}) return out } -// lexLine classifies a single preprocessed line. -func lexLine(line Line) Token { +// lexLine classifies a single preprocessed line. inFence is true when +// the line sits between an opening `---` and its matching closer; in +// that case everything except the closing fence becomes TokenRawLine +// carrying Line.Raw verbatim. +func lexLine(line Line, inFence bool) Token { text := strings.TrimRight(line.Text, " \t") - if text == "" { - return Token{Kind: TokenBlank, Pos: line.Pos} - } + // Fence detection is always active — a closing `---` is recognised + // even mid-body. if strings.TrimSpace(text) == "---" { return Token{Kind: TokenYAMLFence, Pos: line.Pos} } + if inFence { + return Token{Kind: TokenRawLine, Pos: line.Pos, Text: line.Raw} + } + if text == "" { + return Token{Kind: TokenBlank, Pos: line.Pos} + } if strings.HasPrefix(text, "swagger:") { return lexAnnotation(text, line.Pos) } diff --git a/internal/parsers/grammar/p110_test.go b/internal/parsers/grammar/p110_test.go new file mode 100644 index 0000000..bea290a --- /dev/null +++ b/internal/parsers/grammar/p110_test.go @@ -0,0 +1,155 @@ +// SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers +// SPDX-License-Identifier: Apache-2.0 + +package grammar + +import ( + "go/token" + "strings" + "testing" +) + +// Tests for P1.10 catch-up items: verbatim YAML body contract and the +// preprocessor's `-` stripping behavior. + +// --- verbatim YAML body --- + +func TestP110YAMLBodyPreservesIndentation(t *testing.T) { + // The 2-space indent on "200:" must survive into RawYAML.Text — + // this is what internal/parsers/yaml/ needs to parse YAML cleanly. + src := `package p + +// swagger:operation GET /pets listPets +// +// --- +// responses: +// 200: successResponse +// 404: notFound +// --- +func ListPets() {} +` + cg, fset := parseCommentGroup(t, src) + b := Parse(cg, fset) + + count := 0 + for y := range b.YAMLBlocks() { + count++ + lines := splitLines(y.Text) + if len(lines) < 3 { + t.Fatalf("want at least 3 body lines, got %d: %q", len(lines), y.Text) + } + // Line 0: "responses:" — no leading whitespace expected. + if lines[0] != "responses:" { + t.Errorf("line 0: got %q want %q", lines[0], "responses:") + } + // Line 1: " 200: successResponse" — 2-space indent preserved. + if lines[1] != " 200: successResponse" { + t.Errorf("line 1: got %q want %q", lines[1], " 200: successResponse") + } + // Line 2: same. + if lines[2] != " 404: notFound" { + t.Errorf("line 2: got %q want %q", lines[2], " 404: notFound") + } + } + if count != 1 { + t.Fatalf("want 1 YAML block, got %d", count) + } +} + +func TestP110YAMLBodyRaw_TokenKind(t *testing.T) { + // Direct lexer check: interior content between fences becomes + // TokenRawLine, not TokenText or TokenKeywordValue. + mk := func(n int, text, raw string) Line { + return Line{Text: text, Raw: raw, Pos: token.Position{Line: n, Column: 1}} + } + lines := []Line{ + mk(1, "---", "---"), + mk(2, "responses:", "responses:"), + mk(3, "200: ok", " 200: ok"), + mk(4, "---", "---"), + } + toks := Lex(lines) + want := []TokenKind{TokenYAMLFence, TokenRawLine, TokenRawLine, TokenYAMLFence, TokenEOF} + if len(toks) != len(want) { + t.Fatalf("want %d tokens, got %d: %+v", len(want), len(toks), toks) + } + for i, w := range want { + if toks[i].Kind != w { + t.Errorf("tok %d: got %s want %s", i, toks[i].Kind, w) + } + } + // TokenRawLine.Text must be Line.Raw (indent preserved). + if toks[2].Text != " 200: ok" { + t.Errorf("TokenRawLine.Text: got %q want %q", toks[2].Text, " 200: ok") + } +} + +func TestP110BlockCommentYAMLBody(t *testing.T) { + // YAML inside a /* ... */ block: the block-continuation " * " + // stripping must still produce clean YAML with preserved indent. + src := "package p\n\n/*\nswagger:operation GET /pets listPets\n\n---\nresponses:\n 200: ok\n---\n*/\nfunc F(){}\n" + cg, fset := parseCommentGroup(t, src) + b := Parse(cg, fset) + + for y := range b.YAMLBlocks() { + lines := splitLines(y.Text) + if len(lines) < 2 { + t.Fatalf("expected at least 2 lines, got %d: %q", len(lines), y.Text) + } + if lines[0] != "responses:" { + t.Errorf("line 0: got %q want %q", lines[0], "responses:") + } + if lines[1] != " 200: ok" { + t.Errorf("line 1: got %q want %q (indent must survive block continuation strip)", lines[1], " 200: ok") + } + } +} + +// --- preprocessor `-` stripping parity lock-in --- + +func TestP110DashNotStrippedInProse(t *testing.T) { + // P1.10 decision: the preprocessor keeps leading `-` in content + // (v2 divergence from v1, which silently stripped them). This + // preserves bullet-list semantics and keeps the `---` YAML fence + // marker detectable without special-casing. + // + // Asserted at the Block level: a bullet-list paragraph feeds + // cleanly into Title/Description, with the `-` intact. (Bullets + // within a single paragraph are merged per godoc convention.) + src := `package p + +// Summary line. +// +// - first item +// - second item +// +// swagger:model Foo +type Foo int +` + cg, fset := parseCommentGroup(t, src) + b := Parse(cg, fset) + + if b.Title() != "Summary line." { + t.Errorf("Title: got %q want %q", b.Title(), "Summary line.") + } + if !strings.Contains(b.Description(), "- first item") { + t.Errorf("Description should preserve bullet `-`: got %q", b.Description()) + } + if !strings.Contains(b.Description(), "- second item") { + t.Errorf("Description should preserve bullet `-`: got %q", b.Description()) + } +} + +func TestP110DashPreservedOnlySurvivesVerbatimInText(t *testing.T) { + // Low-level confirmation at the preprocessor: a single `- foo` line + // produces Line.Text with the `-` intact. + src := "package p\n\n// - not stripped\ntype Foo int\n" + cg, fset := parseCommentGroup(t, src) + lines := Preprocess(cg, fset) + if len(lines) != 1 { + t.Fatalf("want 1 line, got %d", len(lines)) + } + if lines[0].Text != "- not stripped" { + t.Errorf("Text: got %q want %q", lines[0].Text, "- not stripped") + } +} diff --git a/internal/parsers/grammar/parser.go b/internal/parsers/grammar/parser.go index 912437c..fba04d8 100644 --- a/internal/parsers/grammar/parser.go +++ b/internal/parsers/grammar/parser.go @@ -285,7 +285,7 @@ func (p *parseState) parseTitleDesc(base *baseBlock, pre []Token) { case TokenEOF, TokenAnnotation, TokenKeywordValue, TokenKeywordBlockHead, - TokenYAMLFence: + TokenYAMLFence, TokenRawLine: // Ignored in the title/description slice. default: // Unreachable at v1; future kinds ignored defensively. @@ -317,7 +317,9 @@ func (p *parseState) parseBody(base *baseBlock, post []Token) { for i < len(post) { t := post[i] switch t.Kind { - case TokenEOF, TokenBlank, TokenText: + case TokenEOF, TokenBlank, TokenText, TokenRawLine: + // TokenRawLine outside a fence shouldn't happen (lexer + // tracks fence state) — ignore defensively. i++ case TokenKeywordValue: @@ -357,19 +359,16 @@ func (p *parseState) parseBody(base *baseBlock, post []Token) { // index i and its matching closer (or EOF). Emits an UnterminatedYAML // diagnostic if no closer is found. Returns the index past the closer. // -// NOTE: line reconstruction is best-effort at P1.4 — tokens have been -// classified (so `responses:` became a KEYWORD_BLOCK_HEAD), and -// indentation is lost because the preprocessor already trimmed it. -// P2.1 will add fence-state tracking so raw YAML bytes survive -// verbatim. Until then, YAML bodies captured here are suitable for -// Kind/content detection but not for full YAML re-parsing. +// Inside a fence the lexer emits TokenRawLine tokens carrying Line.Raw, +// so the body survives verbatim (indentation preserved) and can be +// handed directly to internal/parsers/yaml/ for further parsing. func (p *parseState) collectYAMLBody(base *baseBlock, post []Token, i int) int { openerPos := post[i].Pos i++ var body []string for i < len(post) && post[i].Kind != TokenYAMLFence && post[i].Kind != TokenEOF { - body = append(body, reconstructLine(post[i])) + body = append(body, post[i].Text) i++ } @@ -475,28 +474,3 @@ func parseBool(s string) (bool, bool) { return false, false } } - -// reconstructLine returns a best-effort text rendering of a Token as -// it appeared on the source line. Used inside YAML fence capture where -// the classifier has already split `keyword:` into KEYWORD_BLOCK_HEAD. -// Indentation is NOT preserved (preprocessor already stripped it); -// P2.1 will fix this. -func reconstructLine(t Token) string { - switch t.Kind { - case TokenBlank: - return "" - case TokenKeywordValue: - return t.Text + ": " + t.Value - case TokenKeywordBlockHead: - return t.Text + ":" - case TokenAnnotation: - if len(t.Args) > 0 { - return "swagger:" + t.Text + " " + strings.Join(t.Args, " ") - } - return "swagger:" + t.Text - case TokenEOF, TokenYAMLFence, TokenText: - return t.Text - default: - return t.Text - } -} diff --git a/internal/parsers/grammar/preprocess.go b/internal/parsers/grammar/preprocess.go index f0b2dc1..afde760 100644 --- a/internal/parsers/grammar/preprocess.go +++ b/internal/parsers/grammar/preprocess.go @@ -13,10 +13,14 @@ import ( // // Text has the Go comment markers (// /* */) stripped, along with // leading continuation decorations common in godoc comments (spaces, -// tabs, asterisks, slashes, dashes, optional markdown table pipe). -// Internal content and embedded whitespace are preserved — fence-body -// indentation handling lives at the lexer layer where fence state is -// tracked. +// tabs, asterisks, slashes, optional markdown table pipe). This is +// what the lexer uses for classification. +// +// Raw is the same source line with *only* the Go comment marker and +// the block-continuation `*` (if any) removed — content whitespace, +// including YAML indentation, is preserved. This is what the lexer +// emits as TokenRawLine inside a --- fence so YAML bodies survive +// verbatim for handoff to internal/parsers/yaml/. // // Pos is the position of Text's first character in the source file: // Line/Column are accurate on every line (including continuation @@ -24,6 +28,7 @@ import ( // the start of the file. type Line struct { Text string + Raw string Pos token.Position } @@ -50,6 +55,12 @@ func Preprocess(cg *ast.CommentGroup, fset *token.FileSet) []Line { // `/* … */` block form, including multi-line blocks. Each emitted // Line's Pos points precisely to the first character of Text in the // source file (Line, Column, and Offset all accurate). +// +// Line.Raw differs per comment kind: +// - `//` lines strip one leading space (the godoc `// ` convention); +// - `/* … */` continuation lines strip the `\s*\*\s?` pattern if +// present, otherwise preserve all leading whitespace so YAML +// indentation inside a fenced body survives. func stripComment(raw string, basePos token.Position) []Line { const markerLen = 2 // "//" and "/*" are both 2 bytes switch { @@ -57,7 +68,7 @@ func stripComment(raw string, basePos token.Position) []Line { pos := basePos pos.Column += markerLen pos.Offset += markerLen - return []Line{stripLine(raw[markerLen:], pos)} + return []Line{stripLine(raw[markerLen:], pos, stripSingleGodocSpace)} case strings.HasPrefix(raw, "/*"): body := strings.TrimSuffix(raw[markerLen:], "*/") @@ -85,7 +96,7 @@ func stripComment(raw string, basePos token.Position) []Line { pos.Column = 1 pos.Offset += markerLen + lineOffset } - out = append(out, stripLine(segment, pos)) + out = append(out, stripLine(segment, pos, stripBlockContinuation)) if nl < 0 { break @@ -98,19 +109,50 @@ func stripComment(raw string, basePos token.Position) []Line { // Not a valid Go comment; preserve input defensively so // downstream layers can surface a diagnostic rather than // silently lose data. - return []Line{{Text: raw, Pos: basePos}} + return []Line{{Text: raw, Raw: raw, Pos: basePos}} } } // stripLine trims the leading decoration of a single line and advances // pos by the number of bytes consumed. pos must already point to the -// first character of the (unstripped) line in the source. -func stripLine(s string, pos token.Position) Line { +// first character of the (unstripped) line in the source. rawStrip is +// the strategy used to compute Line.Raw — see stripSingleGodocSpace +// (for // lines) and stripBlockContinuation (for /* */ lines). +func stripLine(s string, pos token.Position, rawStrip func(string) string) Line { stripped := trimContentPrefix(s) consumed := len(s) - len(stripped) pos.Column += consumed pos.Offset += consumed - return Line{Text: stripped, Pos: pos} + return Line{Text: stripped, Raw: rawStrip(s), Pos: pos} +} + +// stripSingleGodocSpace strips one leading space or tab — the godoc +// `// ` convention — preserving all other content whitespace. Used +// for Line.Raw on `//` comment lines. +func stripSingleGodocSpace(s string) string { + if len(s) > 0 && (s[0] == ' ' || s[0] == '\t') { + return s[1:] + } + return s +} + +// stripBlockContinuation recognises the `\s*\*\s?` prefix that godoc +// `/* … */` continuation lines carry, stripping it when present and +// preserving all leading whitespace otherwise. Used for Line.Raw on +// `/* … */` lines so YAML indentation inside fenced bodies survives. +func stripBlockContinuation(s string) string { + i := 0 + for i < len(s) && (s[i] == ' ' || s[i] == '\t') { + i++ + } + if i < len(s) && s[i] == '*' { + i++ + if i < len(s) && s[i] == ' ' { + i++ + } + return s[i:] + } + return s } // trimContentPrefix removes the leading godoc-style decoration that From 88a66d503bd072ac3806dd98b6a232dc21656dea Mon Sep 17 00:00:00 2001 From: Frederic BIDON Date: Tue, 21 Apr 2026 17:03:35 +0200 Subject: [PATCH 16/46] feat(grammar): P2.3 multi-line block-body capture MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extend Property with `Body []string`, populated for KEYWORD_BLOCK_HEAD tokens (consumes:, produces:, security:, responses:, parameters:, extensions:, infoExtensions:, tos:, securityDefinitions:, externalDocs:). Non-block-head properties keep Body nil. collectBlockBody(base, post, i): after emitting the block-head property, consume subsequent TEXT tokens into prop.Body. Collection stops at the next structured token (KEYWORD_*, ANNOTATION, YAML_FENCE, RAW_LINE, EOF) per legacy stop point S6 from the implied-stop appendix. Interior blank tokens are deferred and re-emitted as empty body lines only if more text follows — trailing blanks are dropped. Block.Properties() iteration order is unchanged; each block-head Property now carries its body inline alongside the keyword metadata. Analyzers (P5 bridge taggers) read prop.Body directly; per-keyword tokenization (MIME types for consumes/produces, security mappings, etc.) is their concern. Tests (blockbody_test.go): single-block capture, stop-at-next-keyword, stop-at-annotation, stop-at-YAML-fence, trailing-blanks-trimmed, non-block-keyword-unaffected. Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Frederic BIDON --- internal/parsers/grammar/ast.go | 3 +- internal/parsers/grammar/blockbody_test.go | 169 +++++++++++++++++++ internal/parsers/grammar/parser.go | 56 +++++- internal/parsers/grammar/parser_test.go | 2 +- internal/parsers/grammar/productions_test.go | 5 +- 5 files changed, 225 insertions(+), 10 deletions(-) create mode 100644 internal/parsers/grammar/blockbody_test.go diff --git a/internal/parsers/grammar/ast.go b/internal/parsers/grammar/ast.go index 9d04c07..7de46e8 100644 --- a/internal/parsers/grammar/ast.go +++ b/internal/parsers/grammar/ast.go @@ -188,7 +188,8 @@ type Property struct { Pos token.Position Value string Typed TypedValue - ItemsDepth int // 0 = no "items." nesting + ItemsDepth int // 0 = no "items." nesting + Body []string // populated for KEYWORD_BLOCK_HEAD lines (consumes:, security:, responses:, …); nil otherwise } // TypedValue carries the primitive-converted form of a keyword's diff --git a/internal/parsers/grammar/blockbody_test.go b/internal/parsers/grammar/blockbody_test.go new file mode 100644 index 0000000..76f03f3 --- /dev/null +++ b/internal/parsers/grammar/blockbody_test.go @@ -0,0 +1,169 @@ +// SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers +// SPDX-License-Identifier: Apache-2.0 + +package grammar + +import ( + "slices" + "testing" +) + +// P2.3: KEYWORD_BLOCK_HEAD tokens collect subsequent TEXT lines as +// Property.Body until the next structured token (per legacy stop S6). + +// firstPropertyOf returns the first Property of the parsed block, +// failing the test if the block has none. +func firstPropertyOf(t *testing.T, src string) Property { + t.Helper() + cg, fset := parseCommentGroup(t, src) + b := Parse(cg, fset) + for p := range b.Properties() { + return p + } + t.Fatal("block has no properties") + return Property{} +} + +func TestBlockBodyConsumes(t *testing.T) { + // consumes: with a two-item body → Body = ["application/json", "application/xml"]. + src := `package p + +// swagger:meta +// +// consumes: +// application/json +// application/xml +type Root struct{} +` + prop := firstPropertyOf(t, src) + if prop.Keyword.Name != fixtureBlockKw { + t.Fatalf("keyword: got %q want consumes", prop.Keyword.Name) + } + if !slices.Equal(prop.Body, []string{"application/json", "application/xml"}) { + t.Errorf("Body: got %q", prop.Body) + } +} + +func TestBlockBodyStopsAtNextKeyword(t *testing.T) { + // Two block heads in sequence — each captures its own body only. + src := `package p + +// swagger:meta +// +// consumes: +// application/json +// produces: +// application/xml +type Root struct{} +` + cg, fset := parseCommentGroup(t, src) + b := Parse(cg, fset) + + var props []Property + for p := range b.Properties() { + props = append(props, p) + } + if len(props) != 2 { + t.Fatalf("want 2 properties, got %d", len(props)) + } + if props[0].Keyword.Name != fixtureBlockKw { + t.Errorf("prop 0 keyword: got %q", props[0].Keyword.Name) + } + if !slices.Equal(props[0].Body, []string{"application/json"}) { + t.Errorf("prop 0 Body: got %q want [application/json]", props[0].Body) + } + if props[1].Keyword.Name != "produces" { + t.Errorf("prop 1 keyword: got %q", props[1].Keyword.Name) + } + if !slices.Equal(props[1].Body, []string{"application/xml"}) { + t.Errorf("prop 1 Body: got %q want [application/xml]", props[1].Body) + } +} + +func TestBlockBodyStopsAtAnnotation(t *testing.T) { + // Body collection stops at the boundary — but any annotation + // would be a separate block anyway, so this mainly exercises + // the safety case where post-annotation tokens include a stray + // swagger:* line. + src := `package p + +// swagger:meta +// +// consumes: +// application/json +// application/xml +// swagger:ignore +type Root struct{} +` + prop := firstPropertyOf(t, src) + if !slices.Equal(prop.Body, []string{"application/json", "application/xml"}) { + t.Errorf("Body: got %q", prop.Body) + } +} + +func TestBlockBodyStopsAtYAMLFence(t *testing.T) { + // A fence terminates body collection; fence body is captured + // independently via YAMLBlocks(). + src := `package p + +// swagger:operation GET /pets listPets +// +// consumes: +// application/json +// +// --- +// responses: +// 200: ok +// --- +func ListPets() {} +` + cg, fset := parseCommentGroup(t, src) + b := Parse(cg, fset) + + var props []Property + for p := range b.Properties() { + props = append(props, p) + } + if len(props) != 1 { + t.Fatalf("want 1 property (consumes), got %d", len(props)) + } + if !slices.Equal(props[0].Body, []string{"application/json"}) { + t.Errorf("consumes Body: got %q", props[0].Body) + } + + yamlCount := 0 + for range b.YAMLBlocks() { + yamlCount++ + } + if yamlCount != 1 { + t.Errorf("want 1 YAML block, got %d", yamlCount) + } +} + +func TestBlockBodyTrailingBlanksTrimmed(t *testing.T) { + // Blank lines at the end of the comment group are dropped from + // the body; internal blanks between body lines are preserved. + src := "package p\n\n// swagger:meta\n//\n// consumes:\n// application/json\n//\n// application/xml\n//\n//\ntype Root struct{}\n" + prop := firstPropertyOf(t, src) + want := []string{"application/json", "", "application/xml"} + if !slices.Equal(prop.Body, want) { + t.Errorf("Body: got %q want %q", prop.Body, want) + } +} + +func TestBlockBodyNonBlockKeywordsUnaffected(t *testing.T) { + // A non-block keyword (maximum:) still produces an empty Body. + src := `package p + +// swagger:model Foo +// maximum: 10 +type Foo int +` + prop := firstPropertyOf(t, src) + if prop.Keyword.Name != fixtureValidationKw { + t.Fatalf("keyword: got %q", prop.Keyword.Name) + } + if len(prop.Body) != 0 { + t.Errorf("non-block keyword must have nil/empty Body, got %q", prop.Body) + } +} diff --git a/internal/parsers/grammar/parser.go b/internal/parsers/grammar/parser.go index fba04d8..095acd2 100644 --- a/internal/parsers/grammar/parser.go +++ b/internal/parsers/grammar/parser.go @@ -333,12 +333,7 @@ func (p *parseState) parseBody(base *baseBlock, post []Token) { i++ case TokenKeywordBlockHead: - base.properties = append(base.properties, Property{ - Keyword: *t.Keyword, - Pos: t.Pos, - ItemsDepth: t.ItemsDepth, - }) - i++ + i = p.collectBlockBody(base, post, i) case TokenYAMLFence: i = p.collectYAMLBody(base, post, i) @@ -355,6 +350,55 @@ func (p *parseState) parseBody(base *baseBlock, post []Token) { } } +// collectBlockBody emits a Property for the KEYWORD_BLOCK_HEAD token +// at index i and consumes any subsequent TEXT tokens as the block's +// Body. Collection stops (per legacy S6 "multi-line tagger switch") +// at the next structured token — another keyword, annotation, YAML +// fence, or EOF. Blank tokens are treated as body-internal separators +// if followed by more text; a trailing run of blanks is trimmed. +// +// Returns the index past the last body token consumed. +func (p *parseState) collectBlockBody(base *baseBlock, post []Token, i int) int { + head := post[i] + prop := Property{ + Keyword: *head.Keyword, + Pos: head.Pos, + ItemsDepth: head.ItemsDepth, + } + i++ + + var pendingBlanks int + for i < len(post) { + next := post[i] + switch next.Kind { + case TokenEOF, + TokenAnnotation, + TokenKeywordValue, TokenKeywordBlockHead, + TokenYAMLFence, TokenRawLine: + base.properties = append(base.properties, prop) + return i + case TokenText: + for range pendingBlanks { + prop.Body = append(prop.Body, "") + } + pendingBlanks = 0 + prop.Body = append(prop.Body, next.Text) + case TokenBlank: + // Defer — include only if more text follows within the + // block. Trailing blanks are dropped. + pendingBlanks++ + default: + // Defensive: unknown future token kinds end the body. + base.properties = append(base.properties, prop) + return i + } + i++ + } + + base.properties = append(base.properties, prop) + return i +} + // collectYAMLBody captures everything between a YAML_FENCE opener at // index i and its matching closer (or EOF). Emits an UnterminatedYAML // diagnostic if no closer is found. Returns the index past the closer. diff --git a/internal/parsers/grammar/parser_test.go b/internal/parsers/grammar/parser_test.go index 35a0cc5..1025bbb 100644 --- a/internal/parsers/grammar/parser_test.go +++ b/internal/parsers/grammar/parser_test.go @@ -273,7 +273,7 @@ type Root struct{} count := 0 for p := range b.Properties() { count++ - if p.Keyword.Name != "consumes" { + if p.Keyword.Name != fixtureBlockKw { t.Errorf("keyword: got %q", p.Keyword.Name) } if p.Value != "" { diff --git a/internal/parsers/grammar/productions_test.go b/internal/parsers/grammar/productions_test.go index ba5fa0e..cfcf4ea 100644 --- a/internal/parsers/grammar/productions_test.go +++ b/internal/parsers/grammar/productions_test.go @@ -16,8 +16,9 @@ import ( // named productions as discrete units. const ( - fixtureModelName = "Foo" - fixtureBlockKw = "consumes" + fixtureModelName = "Foo" + fixtureBlockKw = "consumes" + fixtureValidationKw = "maximum" ) // --- annotation-line --- From fdca83fa71af56f66f0bbe4fd59b416ab0de39f1 Mon Sep 17 00:00:00 2001 From: Frederic BIDON Date: Tue, 21 Apr 2026 17:05:39 +0200 Subject: [PATCH 17/46] feat(grammar): P2.2 inline extensions extraction When collectBlockBody encounters an `extensions:` or `infoExtensions:` block head, each body TEXT token is also parsed into an Extension {Name, Value, Pos} and appended to the Block's extensions slice. Property.Body is still populated with the raw lines for analyzers that prefer verbatim input. parseExtensionLine splits "name: value" per-line; whitespace is trimmed, blank-or-malformed lines are skipped. Name validation (the `x-*` requirement) is deferred to P2.4. Block.Extensions() returns the flat iterator; callers use it uniformly regardless of whether the source used `extensions:` or `infoExtensions:` (or, later, extension blocks that appear inline inside other contexts). Tests (extensions_test.go): basic extraction, infoExtensions path, per-line source positions, parallel Body+Extensions survival, scoping (consumes: body not scraped), malformed-line skipping. Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Frederic BIDON --- internal/parsers/grammar/extensions_test.go | 174 ++++++++++++++++++++ internal/parsers/grammar/parser.go | 39 +++++ 2 files changed, 213 insertions(+) create mode 100644 internal/parsers/grammar/extensions_test.go diff --git a/internal/parsers/grammar/extensions_test.go b/internal/parsers/grammar/extensions_test.go new file mode 100644 index 0000000..ae38b05 --- /dev/null +++ b/internal/parsers/grammar/extensions_test.go @@ -0,0 +1,174 @@ +// SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers +// SPDX-License-Identifier: Apache-2.0 + +package grammar + +import ( + "testing" +) + +// P2.2: `extensions:` and `infoExtensions:` block bodies are parsed +// into Extension{Name, Value, Pos} entries on the Block. + +func TestExtensionsBasic(t *testing.T) { + src := `package p + +// swagger:meta +// +// extensions: +// x-foo: bar +// x-baz: 42 +type Root struct{} +` + cg, fset := parseCommentGroup(t, src) + b := Parse(cg, fset) + + var exts []Extension + for e := range b.Extensions() { + exts = append(exts, e) + } + if len(exts) != 2 { + t.Fatalf("want 2 extensions, got %d: %+v", len(exts), exts) + } + if exts[0].Name != "x-foo" || exts[0].Value != "bar" { + t.Errorf("ext 0: got %+v", exts[0]) + } + if exts[1].Name != "x-baz" || exts[1].Value != "42" { + t.Errorf("ext 1: got %+v", exts[1]) + } +} + +func TestExtensionsInfoBlock(t *testing.T) { + // infoExtensions: uses the same collector path. + src := `package p + +// swagger:meta +// +// infoExtensions: +// x-logo: https://example.com/logo.png +type Root struct{} +` + cg, fset := parseCommentGroup(t, src) + b := Parse(cg, fset) + + count := 0 + for e := range b.Extensions() { + count++ + if e.Name != "x-logo" { + t.Errorf("name: got %q", e.Name) + } + if e.Value != "https://example.com/logo.png" { + t.Errorf("value: got %q", e.Value) + } + } + if count != 1 { + t.Errorf("want 1 extension, got %d", count) + } +} + +func TestExtensionsPositionsPerLine(t *testing.T) { + // Each extension's Pos points to its own source line. + src := `package p + +// swagger:meta +// +// extensions: +// x-first: one +// x-second: two +type Root struct{} +` + cg, fset := parseCommentGroup(t, src) + b := Parse(cg, fset) + + var lines []int + for e := range b.Extensions() { + lines = append(lines, e.Pos.Line) + } + if len(lines) != 2 { + t.Fatalf("want 2 extensions, got %d", len(lines)) + } + if lines[1] <= lines[0] { + t.Errorf("line positions must be monotonic: %v", lines) + } +} + +func TestExtensionsSurvivesAndBodyPreserved(t *testing.T) { + // Extensions are extracted to Block.Extensions(), but the raw + // Property.Body is still populated for any analyzer that wants it. + src := `package p + +// swagger:meta +// +// extensions: +// x-foo: bar +type Root struct{} +` + cg, fset := parseCommentGroup(t, src) + b := Parse(cg, fset) + + var prop Property + for p := range b.Properties() { + prop = p + } + if prop.Keyword.Name != "extensions" { + t.Fatalf("keyword: got %q", prop.Keyword.Name) + } + if len(prop.Body) != 1 || prop.Body[0] != "x-foo: bar" { + t.Errorf("Body: got %q want [x-foo: bar]", prop.Body) + } + + extCount := 0 + for range b.Extensions() { + extCount++ + } + if extCount != 1 { + t.Errorf("want 1 extension in parallel, got %d", extCount) + } +} + +func TestExtensionsOnlyForExtensionsKeyword(t *testing.T) { + // consumes: is a block-head keyword but NOT an extensions block — + // its body lines must not be scraped into Extensions. + src := `package p + +// swagger:meta +// +// consumes: +// application/json +type Root struct{} +` + cg, fset := parseCommentGroup(t, src) + b := Parse(cg, fset) + + for e := range b.Extensions() { + t.Errorf("consumes: body must not produce Extensions, got %+v", e) + } +} + +func TestExtensionsMalformedLineIgnored(t *testing.T) { + // A body line without a ':' can't form an extension; collected + // into Body but not emitted as an Extension. + src := `package p + +// swagger:meta +// +// extensions: +// x-good: one +// not an extension line +// x-also-good: two +type Root struct{} +` + cg, fset := parseCommentGroup(t, src) + b := Parse(cg, fset) + + var names []string + for e := range b.Extensions() { + names = append(names, e.Name) + } + if len(names) != 2 { + t.Fatalf("want 2 extensions, got %d: %v", len(names), names) + } + if names[0] != "x-good" || names[1] != "x-also-good" { + t.Errorf("extension names: got %v", names) + } +} diff --git a/internal/parsers/grammar/parser.go b/internal/parsers/grammar/parser.go index 095acd2..c61903a 100644 --- a/internal/parsers/grammar/parser.go +++ b/internal/parsers/grammar/parser.go @@ -357,6 +357,11 @@ func (p *parseState) parseBody(base *baseBlock, post []Token) { // fence, or EOF. Blank tokens are treated as body-internal separators // if followed by more text; a trailing run of blanks is trimmed. // +// When the block-head keyword is "extensions" or "infoExtensions", +// each body line of the form `name: value` is *also* emitted as a +// top-level Extension on the Block so `block.Extensions()` exposes +// them uniformly. The original Body is still populated. +// // Returns the index past the last body token consumed. func (p *parseState) collectBlockBody(base *baseBlock, post []Token, i int) int { head := post[i] @@ -367,6 +372,8 @@ func (p *parseState) collectBlockBody(base *baseBlock, post []Token, i int) int } i++ + isExtensions := isExtensionBlock(head.Keyword.Name) + var pendingBlanks int for i < len(post) { next := post[i] @@ -383,6 +390,11 @@ func (p *parseState) collectBlockBody(base *baseBlock, post []Token, i int) int } pendingBlanks = 0 prop.Body = append(prop.Body, next.Text) + if isExtensions { + if ext, ok := parseExtensionLine(next); ok { + base.extensions = append(base.extensions, ext) + } + } case TokenBlank: // Defer — include only if more text follows within the // block. Trailing blanks are dropped. @@ -399,6 +411,33 @@ func (p *parseState) collectBlockBody(base *baseBlock, post []Token, i int) int return i } +// isExtensionBlock reports whether the given keyword name declares an +// extensions block (i.e., `extensions:` or `infoExtensions:`). +func isExtensionBlock(name string) bool { + return name == "extensions" || name == "infoExtensions" +} + +// parseExtensionLine extracts `name: value` from a body TEXT token, +// returning an Extension with the token's Pos. Returns (zero, false) +// for lines that don't match the form. Name and Value are +// whitespace-trimmed; name-well-formedness (the `x-*` requirement) +// is a separate P2.4 check downstream. +func parseExtensionLine(t Token) (Extension, bool) { + before, after, found := strings.Cut(t.Text, ":") + if !found { + return Extension{}, false + } + name := strings.TrimSpace(before) + if name == "" { + return Extension{}, false + } + return Extension{ + Name: name, + Value: strings.TrimSpace(after), + Pos: t.Pos, + }, true +} + // collectYAMLBody captures everything between a YAML_FENCE opener at // index i and its matching closer (or EOF). Emits an UnterminatedYAML // diagnostic if no closer is found. Returns the index past the closer. From 4219d94bd74bd02a9e64736c9e735a9cb060ab9b Mon Sep 17 00:00:00 2001 From: Frederic BIDON Date: Tue, 21 Apr 2026 17:07:10 +0200 Subject: [PATCH 18/46] feat(grammar): P2.4 extension-name well-formedness diagnostic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add isExtensionName(s) — a well-formedness check mirroring the v1 rxAllowedExtensions pattern `^[Xx]-` (case-tolerant prefix, at least one suffix character required). When collectBlockBody is in an extensions block and extracts a name that fails the check, emit a SeverityWarning CodeInvalidExtension diagnostic at the offending line's Pos. Non-fatal by design: - The Extension is still appended to Block.Extensions() so analyzers / LSP can decide policy (surface to user, drop, etc.). - Pairs with the broader "diagnostics accumulate, don't throw" principle (architecture §4.3, tasks P1.4). Tests: `TestExtensionsInvalidNameDiagnostic` (invalid name emits warning; extension still collected) and `TestExtensionsAcceptsUppercaseX` (X- prefix accepted per v1 behavior). Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Frederic BIDON --- internal/parsers/grammar/extensions_test.go | 62 +++++++++++++++++++++ internal/parsers/grammar/parser.go | 19 +++++++ 2 files changed, 81 insertions(+) diff --git a/internal/parsers/grammar/extensions_test.go b/internal/parsers/grammar/extensions_test.go index ae38b05..ad26990 100644 --- a/internal/parsers/grammar/extensions_test.go +++ b/internal/parsers/grammar/extensions_test.go @@ -145,6 +145,68 @@ type Root struct{} } } +func TestExtensionsInvalidNameDiagnostic(t *testing.T) { + // A line like `not-an-extension: value` parses as an Extension + // (it has `name: value` form) but fails the x-* well-formedness + // check, emitting a CodeInvalidExtension warning. + src := `package p + +// swagger:meta +// +// extensions: +// x-good: one +// not-an-extension: two +type Root struct{} +` + cg, fset := parseCommentGroup(t, src) + b := Parse(cg, fset) + + var invalidDiags []Diagnostic + for _, d := range b.Diagnostics() { + if d.Code == CodeInvalidExtension { + invalidDiags = append(invalidDiags, d) + } + } + if len(invalidDiags) != 1 { + t.Fatalf("want 1 CodeInvalidExtension diagnostic, got %d: %+v", + len(invalidDiags), b.Diagnostics()) + } + if invalidDiags[0].Severity != SeverityWarning { + t.Errorf("severity: got %v want warning", invalidDiags[0].Severity) + } + + // The extension is still collected (so analyzers can decide how + // to respond) even though the name is invalid. + names := []string{} + for e := range b.Extensions() { + names = append(names, e.Name) + } + if len(names) != 2 { + t.Errorf("invalid extensions still survive in the list; want 2, got %d: %v", + len(names), names) + } +} + +func TestExtensionsAcceptsUppercaseX(t *testing.T) { + // Both `x-` and `X-` are accepted (matches v1 rxAllowedExtensions). + src := `package p + +// swagger:meta +// +// extensions: +// X-Uppercase: value +type Root struct{} +` + cg, fset := parseCommentGroup(t, src) + b := Parse(cg, fset) + + for _, d := range b.Diagnostics() { + if d.Code == CodeInvalidExtension { + t.Errorf("X- prefix must be accepted, got %+v", d) + } + } +} + func TestExtensionsMalformedLineIgnored(t *testing.T) { // A body line without a ':' can't form an extension; collected // into Body but not emitted as an Extension. diff --git a/internal/parsers/grammar/parser.go b/internal/parsers/grammar/parser.go index c61903a..b759e93 100644 --- a/internal/parsers/grammar/parser.go +++ b/internal/parsers/grammar/parser.go @@ -392,6 +392,10 @@ func (p *parseState) collectBlockBody(base *baseBlock, post []Token, i int) int prop.Body = append(prop.Body, next.Text) if isExtensions { if ext, ok := parseExtensionLine(next); ok { + if !isExtensionName(ext.Name) { + p.diag = append(p.diag, Warnf(ext.Pos, CodeInvalidExtension, + "extension name %q must begin with 'x-' or 'X-'", ext.Name)) + } base.extensions = append(base.extensions, ext) } } @@ -417,6 +421,21 @@ func isExtensionBlock(name string) bool { return name == "extensions" || name == "infoExtensions" } +// isExtensionName reports whether s is a well-formed OpenAPI vendor +// extension name: it must begin with "x-" or "X-" and have at least +// one character after the hyphen. Mirrors the v1 rxAllowedExtensions +// check (`^[Xx]-`). +func isExtensionName(s string) bool { + const minExtNameLen = 3 // "x-" + at least one suffix character + if len(s) < minExtNameLen { + return false + } + if (s[0] != 'x' && s[0] != 'X') || s[1] != '-' { + return false + } + return true +} + // parseExtensionLine extracts `name: value` from a body TEXT token, // returning an Extension with the token's Pos. Returns (zero, false) // for lines that don't match the form. Name and Value are From bebb2fb8d8f382a69ba828a34c1e32d784a79255 Mon Sep 17 00:00:00 2001 From: Frederic BIDON Date: Tue, 21 Apr 2026 17:10:12 +0200 Subject: [PATCH 19/46] feat(grammar): P2.5 internal/parsers/yaml/ sub-parser subpackage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Create internal/parsers/yaml/ — a thin wrapper around go.yaml.in/yaml/v3 for parsing the RawYAML bodies isolated by internal/parsers/grammar/. The grammar parser stays YAML-free (verified: `go list -f '{{.Imports}}' ./internal/parsers/grammar/` has no yaml entry) per architecture §3.3, §5.1. Exposed surface: - yaml.Parse(body) -> (any, error): Unmarshal into a generic value (map/slice/scalar). Returns (nil, nil) for an empty body so callers don't branch on error-vs-nil for the "fence with no content" case. - yaml.ParseInto(body, dst) -> error: Unmarshal into a caller-defined struct. Empty body is a no-op, leaving dst at its zero value. Both wrap the underlying error with a "yaml:" prefix so downstream diagnostics can distinguish YAML parsing failures from other errors without type-asserting. Tests: empty body, flat map, nested structure with non-string keys (map[any]any as YAML v3 returns), invalid YAML error wrapping, struct unmarshal, empty+struct no-op. Pattern: this subpackage establishes the seam for any future sub-language (enum variants per W2, richer example syntax per W3, private-comment bodies per W4). Each gets its own internal/parsers// package; the grammar parser never imports any of them. Completes P2. P1 and P2 are both fully green; ready for P3. Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Frederic BIDON --- internal/parsers/yaml/yaml.go | 60 ++++++++++++++++ internal/parsers/yaml/yaml_test.go | 106 +++++++++++++++++++++++++++++ 2 files changed, 166 insertions(+) create mode 100644 internal/parsers/yaml/yaml.go create mode 100644 internal/parsers/yaml/yaml_test.go diff --git a/internal/parsers/yaml/yaml.go b/internal/parsers/yaml/yaml.go new file mode 100644 index 0000000..f1a48cc --- /dev/null +++ b/internal/parsers/yaml/yaml.go @@ -0,0 +1,60 @@ +// SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers +// SPDX-License-Identifier: Apache-2.0 + +// Package yaml is a thin wrapper around go.yaml.in/yaml/v3 for +// consuming the RawYAML bodies that internal/parsers/grammar/ +// isolates between `---` fences. +// +// The package exists so the main grammar parser stays YAML-free and +// stdlib-only (architecture §3.3, §5.1). It is imported only by the +// analyzer layer — bridge-taggers that decide when to parse a given +// RawYAML body — never by internal/parsers/grammar/. +// +// This subpackage also establishes the sibling-sub-parser pattern: +// any future sub-language (enum-variant forms per W2, richer +// example syntax per W3, private-comment bodies per W4, …) gets its +// own `internal/parsers//` subpackage following the same seam. +package yaml + +import ( + "fmt" + + "go.yaml.in/yaml/v3" +) + +// Parse unmarshals the given raw YAML body into a generic value +// (typically a map[string]interface{} or []interface{}). The pos +// parameter is passed through any wrapping error so downstream +// diagnostics can point at the original source location — YAML +// library errors carry their own line/column numbers relative to the +// body, not to the Go source. +// +// Returns (nil, nil) for an empty body so callers can handle +// "annotation had a fence but no content" without branching on +// error-vs-nil. +// +//nolint:nilnil // (nil, nil) is the documented "empty body" return — the caller distinguishes via len(body) if needed. +func Parse(body string) (any, error) { + if body == "" { + return nil, nil + } + var v any + if err := yaml.Unmarshal([]byte(body), &v); err != nil { + return nil, fmt.Errorf("yaml: %w", err) + } + return v, nil +} + +// ParseInto unmarshals body into the given destination, typically a +// pointer to a struct the caller defined to match an expected YAML +// shape (e.g., operation-body or extension-value). Wraps the +// underlying error for uniform error reporting. +func ParseInto(body string, dst any) error { + if body == "" { + return nil + } + if err := yaml.Unmarshal([]byte(body), dst); err != nil { + return fmt.Errorf("yaml: %w", err) + } + return nil +} diff --git a/internal/parsers/yaml/yaml_test.go b/internal/parsers/yaml/yaml_test.go new file mode 100644 index 0000000..2f419a9 --- /dev/null +++ b/internal/parsers/yaml/yaml_test.go @@ -0,0 +1,106 @@ +// SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers +// SPDX-License-Identifier: Apache-2.0 + +package yaml_test + +import ( + "net/http" + "strings" + "testing" + + "github.com/go-openapi/codescan/internal/parsers/yaml" +) + +func TestParseEmpty(t *testing.T) { + v, err := yaml.Parse("") + if err != nil { + t.Fatalf("empty body: unexpected error: %v", err) + } + if v != nil { + t.Errorf("empty body: want nil, got %v", v) + } +} + +func TestParseFlatMap(t *testing.T) { + // Note: go.yaml.in/yaml/v3 returns map[string]any for + // string-keyed maps and auto-types scalars (unquoted "1.0" + // becomes float64). Quote the value to keep it as a string. + body := "name: Foo\nversion: \"1.0\"\n" + v, err := yaml.Parse(body) + if err != nil { + t.Fatalf("parse: %v", err) + } + m, ok := v.(map[string]any) + if !ok { + t.Fatalf("want map[string]any, got %T: %v", v, v) + } + if m["name"] != "Foo" { + t.Errorf("name: got %v want Foo", m["name"]) + } + if m["version"] != "1.0" { + t.Errorf("version: got %v", m["version"]) + } +} + +func TestParseNestedStructure(t *testing.T) { + // Representative of an operation body's responses mapping. + // Numeric keys like `200` arrive as int keys; the outer map + // becomes map[any]any because not all keys are strings. + body := "responses:\n 200:\n description: ok\n 404:\n description: not found\n" + v, err := yaml.Parse(body) + if err != nil { + t.Fatalf("parse: %v", err) + } + top, ok := v.(map[string]any) + if !ok { + t.Fatalf("want top-level map[string]any, got %T", v) + } + // The responses map has integer keys (200, 404), so the + // YAML library returns map[any]any (keys include non-strings). + resp, ok := top["responses"].(map[any]any) + if !ok { + t.Fatalf("responses: want map[any]any (int keys), got %T", top["responses"]) + } + if len(resp) != 2 { + t.Errorf("responses: want 2 entries, got %d", len(resp)) + } +} + +func TestParseInvalidYAML(t *testing.T) { + // Bad indentation / stray colon. + body := "key: [unclosed\n" + _, err := yaml.Parse(body) + if err == nil { + t.Fatal("expected error for invalid YAML") + } + if !strings.HasPrefix(err.Error(), "yaml:") { + t.Errorf("error should be wrapped with 'yaml:' prefix: got %q", err.Error()) + } +} + +func TestParseIntoStruct(t *testing.T) { + type operation struct { + Method string `yaml:"method"` + Path string `yaml:"path"` + } + body := "method: GET\npath: /pets\n" + var op operation + if err := yaml.ParseInto(body, &op); err != nil { + t.Fatalf("parse: %v", err) + } + if op.Method != http.MethodGet || op.Path != "/pets" { + t.Errorf("unmarshalled struct: %+v", op) + } +} + +func TestParseIntoEmpty(t *testing.T) { + // Empty body is a no-op (dst left at zero value). + type op struct{ Method string } + var v op + if err := yaml.ParseInto("", &v); err != nil { + t.Errorf("empty body: unexpected error: %v", err) + } + if v.Method != "" { + t.Errorf("dst should be untouched, got %+v", v) + } +} From 4888901829092c2020d105bab5edfe766de1ae20 Mon Sep 17 00:00:00 2001 From: Frederic BIDON Date: Tue, 21 Apr 2026 17:31:19 +0200 Subject: [PATCH 20/46] feat(grammar): P3.1 Parser interface + NewParser constructor MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Expose the parser as a three-method interface — Parse, ParseText, ParseAs — behind NewParser(fset). The interface is the injection seam the P5 bridge-taggers and property-based builder tests need (architecture §5.3): tests construct Block values directly and feed them through a mock Parser without re-lexing text. - Parse(cg) primary path — preprocess -> lex -> parse - ParseText(t,p) LSP / test path — raw text with position - ParseAs(k,t,p) LSP kind-hint path (§4.6) — prepends a synthetic swagger: so dispatch goes the right way even when the editor hasn't typed the annotation line yet. preprocessText is a small helper that turns raw lines into Line values carrying both Text and Raw (identical — no Go comment markers to strip) with monotonic positions from basePos. Backward compat: the existing package-level Parse(cg, fset) is now a convenience wrapper around NewParser(fset).Parse(cg). All existing tests pass unmodified. Tests: interface satisfaction (compile-time), each method exercised end-to-end, ParseAs forces dispatch on a property-only body, the package-level Parse wrapper still works. Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Frederic BIDON --- internal/parsers/grammar/parser.go | 91 +++++++++++++++++- internal/parsers/grammar/parser_api_test.go | 101 ++++++++++++++++++++ 2 files changed, 188 insertions(+), 4 deletions(-) create mode 100644 internal/parsers/grammar/parser_api_test.go diff --git a/internal/parsers/grammar/parser.go b/internal/parsers/grammar/parser.go index b759e93..92a81dd 100644 --- a/internal/parsers/grammar/parser.go +++ b/internal/parsers/grammar/parser.go @@ -19,6 +19,71 @@ import ( "strings" ) +// Parser is the public interface the analyzer layer (bridge-taggers) +// and LSP code consume. The default implementation is returned by +// NewParser(); tests can substitute their own mock to drive builders +// with synthesized Blocks without running the grammar pipeline. +// +// This is the unlock for P5's property-based builder tests +// (architecture §5.3) — no test ever needs to string-format a +// comment and re-parse it; tests construct Block values directly and +// inject them via a mock Parser. +type Parser interface { + // Parse runs the full preprocess → lex → parse pipeline on a + // comment group and returns the typed Block that describes it. + // Never panics; diagnostics accumulate on the returned Block. + Parse(cg *ast.CommentGroup) Block + + // ParseText parses raw comment content (markers already stripped + // by the caller). Used by LSP — the editor provides the raw text + // at cursor position — and by tests synthesising input. + ParseText(text string, pos token.Position) Block + + // ParseAs forces the annotation kind and parses the body under + // it. Useful for LSP completion where the annotation line is + // missing or being typed: given "the user is editing a model + // block", parse the properties under the assumed kind. (See + // architecture §4.6.) + ParseAs(kind AnnotationKind, text string, pos token.Position) Block +} + +// NewParser constructs a Parser bound to a FileSet (needed to map +// ast.CommentGroup positions to absolute source positions). The +// returned Parser is safe for concurrent use across goroutines. +// +//nolint:ireturn // Parser is the intentional public interface; callers depend on the surface, not the concrete type. +func NewParser(fset *token.FileSet) Parser { + return &parserImpl{fset: fset} +} + +type parserImpl struct { + fset *token.FileSet +} + +//nolint:ireturn // see Parse godoc +func (p *parserImpl) Parse(cg *ast.CommentGroup) Block { + lines := Preprocess(cg, p.fset) + tokens := Lex(lines) + return ParseTokens(tokens) +} + +//nolint:ireturn // see Parse godoc +func (p *parserImpl) ParseText(text string, pos token.Position) Block { + lines := preprocessText(text, pos) + tokens := Lex(lines) + return ParseTokens(tokens) +} + +//nolint:ireturn // see Parse godoc +func (p *parserImpl) ParseAs(kind AnnotationKind, text string, pos token.Position) Block { + // Prepend a synthetic annotation line so the parser dispatches to + // the requested kind. If text already contains a swagger: + // annotation, the existing line wins (findAnnotation picks the + // first) — the injected line is effectively decorative. + injected := "swagger:" + kind.String() + "\n" + text + return p.ParseText(injected, pos) +} + // Parse runs the full preprocess → lex → parse pipeline on a comment // group and returns the typed Block that describes it. Never panics; // diagnostics accumulate on the returned Block. @@ -26,11 +91,12 @@ import ( // A nil CommentGroup produces an empty UnboundBlock — useful for code // paths that call Parse unconditionally. // -//nolint:ireturn // Block is a polymorphic family (ModelBlock, RouteBlock, …); concrete type depends on the annotation. +// Convenience wrapper around NewParser(fset).Parse(cg) — preferred +// for one-off uses; store the Parser and reuse it for batch work. +// +//nolint:ireturn // see Parse godoc func Parse(cg *ast.CommentGroup, fset *token.FileSet) Block { - lines := Preprocess(cg, fset) - tokens := Lex(lines) - return ParseTokens(tokens) + return NewParser(fset).Parse(cg) } // ParseTokens runs parser-only on a pre-lexed token stream. Useful @@ -43,6 +109,23 @@ func ParseTokens(tokens []Token) Block { return p.parse() } +// preprocessText converts raw text (already stripped of Go comment +// markers) into a []Line. Used by ParseText/ParseAs where no +// *ast.CommentGroup is available. +func preprocessText(text string, basePos token.Position) []Line { + rawLines := strings.Split(text, "\n") + out := make([]Line, 0, len(rawLines)) + for i, r := range rawLines { + pos := basePos + pos.Line += i + if i > 0 { + pos.Column = 1 + } + out = append(out, Line{Text: r, Raw: r, Pos: pos}) + } + return out +} + type parseState struct { tokens []Token diag []Diagnostic diff --git a/internal/parsers/grammar/parser_api_test.go b/internal/parsers/grammar/parser_api_test.go new file mode 100644 index 0000000..e78cf10 --- /dev/null +++ b/internal/parsers/grammar/parser_api_test.go @@ -0,0 +1,101 @@ +// SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers +// SPDX-License-Identifier: Apache-2.0 + +package grammar + +import ( + "go/token" + "testing" +) + +// P3.1: NewParser returns a Parser interface bound to a FileSet; +// three methods (Parse, ParseText, ParseAs) cover builder and LSP +// usage. + +func TestParserInterfaceParse(t *testing.T) { + src := `package p + +// swagger:model Foo +// maximum: 10 +type Foo int +` + cg, fset := parseCommentGroup(t, src) + p := NewParser(fset) + + b := p.Parse(cg) + if _, ok := b.(*ModelBlock); !ok { + t.Fatalf("want *ModelBlock, got %T", b) + } + + found := false + for prop := range b.Properties() { + if prop.Keyword.Name == fixtureValidationKw { + found = true + } + } + if !found { + t.Error("expected maximum property on parsed block") + } +} + +func TestParserInterfaceParseText(t *testing.T) { + // Raw content — no Go comment markers. + text := "swagger:model Bar\nmaximum: 5\n" + p := NewParser(token.NewFileSet()) + + pos := token.Position{Filename: "x.go", Line: 10, Column: 1} + b := p.ParseText(text, pos) + + mb, ok := b.(*ModelBlock) + if !ok { + t.Fatalf("want *ModelBlock, got %T", b) + } + if mb.Name != "Bar" { + t.Errorf("Name: got %q want Bar", mb.Name) + } + // Pos of the annotation should reflect the passed-in base. + if mb.Pos().Line != 10 { + t.Errorf("Pos.Line: got %d want 10", mb.Pos().Line) + } +} + +func TestParserInterfaceParseAs(t *testing.T) { + // LSP scenario: the user is editing properties; no annotation + // line is present. ParseAs forces dispatch under the given kind. + text := "maximum: 10\nminimum: 0\n" + p := NewParser(token.NewFileSet()) + + b := p.ParseAs(AnnModel, text, token.Position{Line: 1}) + + if _, ok := b.(*ModelBlock); !ok { + t.Fatalf("want *ModelBlock (forced), got %T", b) + } + var names []string + for prop := range b.Properties() { + names = append(names, prop.Keyword.Name) + } + if len(names) != 2 { + t.Errorf("want 2 properties (maximum, minimum), got %d: %v", len(names), names) + } +} + +func TestParserInterfaceSatisfiedByImpl(t *testing.T) { + // Compile-time assertion that *parserImpl implements Parser. + var _ Parser = (*parserImpl)(nil) + _ = t +} + +func TestPackageLevelParseStillWorks(t *testing.T) { + // Backward-compat: the original top-level Parse(cg, fset) is a + // thin wrapper around NewParser(fset).Parse(cg). + src := `package p + +// swagger:model Foo +type Foo int +` + cg, fset := parseCommentGroup(t, src) + b := Parse(cg, fset) + if _, ok := b.(*ModelBlock); !ok { + t.Fatalf("convenience wrapper broken: got %T", b) + } +} From 259ce3e1ee36ed242040ad003e1b00d713d50f59 Mon Sep 17 00:00:00 2001 From: Frederic BIDON Date: Tue, 21 Apr 2026 17:34:38 +0200 Subject: [PATCH 21/46] feat(grammar): P3.2 Parser options + WithDiagnosticSink MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the Option functional-options layer on NewParser: type Option func(*parserImpl) func NewParser(fset, opts ...Option) Parser Ship WithDiagnosticSink(cb) as the first concrete Option — invokes the callback for every Diagnostic the parser emits, in parallel with the block-local accumulation that already exists. The LSP seam (architecture §4.3): diagnostics must be surfaced as they're produced, not batched until parse completes. Under the hood, parseState gains a `sink func(Diagnostic)` field and an `emit(d)` helper that fans out to sink + local slice. All nine `p.diag = append(p.diag, …)` sites converted to `p.emit(…)`. The package-level `ParseTokens` path keeps a nil sink (no callback) — only parserImpl constructs a parseState with the Option's sink. Logger option intentionally not added yet — parsers with no trace output haven't needed one; when an LSP or CLI developer wants verbose output we'll add it then. Option type is variadic so the addition won't break callers. funcorder lint honored: ParseAs moved above runParser so exported methods cluster before unexported ones in parserImpl. Tests: sink receives every diagnostic (matches Block.Diagnostics() count), default behavior (no option) unchanged, codes in stream match expected (CodeContextInvalid + CodeInvalidNumber from a deliberately wonky source). Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Frederic BIDON --- internal/parsers/grammar/parser.go | 68 ++++++++++++++++----- internal/parsers/grammar/parser_api_test.go | 66 ++++++++++++++++++++ 2 files changed, 120 insertions(+), 14 deletions(-) diff --git a/internal/parsers/grammar/parser.go b/internal/parsers/grammar/parser.go index 92a81dd..5e61175 100644 --- a/internal/parsers/grammar/parser.go +++ b/internal/parsers/grammar/parser.go @@ -51,27 +51,48 @@ type Parser interface { // ast.CommentGroup positions to absolute source positions). The // returned Parser is safe for concurrent use across goroutines. // +// Variadic Options tune behavior — see WithDiagnosticSink. A +// zero-option call is the common case. +// //nolint:ireturn // Parser is the intentional public interface; callers depend on the surface, not the concrete type. -func NewParser(fset *token.FileSet) Parser { - return &parserImpl{fset: fset} +func NewParser(fset *token.FileSet, opts ...Option) Parser { + p := &parserImpl{fset: fset} + for _, opt := range opts { + opt(p) + } + return p +} + +// Option configures a Parser built with NewParser. +type Option func(*parserImpl) + +// WithDiagnosticSink sets an optional callback invoked for every +// Diagnostic the parser emits, in addition to accumulating it on +// the returned Block. Useful for LSP streaming where diagnostics +// must be surfaced as they are produced, not batched until parse +// completes. The sink runs on the parser's goroutine; callers +// needing async delivery should push into a channel. +func WithDiagnosticSink(sink func(Diagnostic)) Option { + return func(p *parserImpl) { p.diagnosticSink = sink } } type parserImpl struct { - fset *token.FileSet + fset *token.FileSet + diagnosticSink func(Diagnostic) } //nolint:ireturn // see Parse godoc func (p *parserImpl) Parse(cg *ast.CommentGroup) Block { lines := Preprocess(cg, p.fset) tokens := Lex(lines) - return ParseTokens(tokens) + return p.runParser(tokens) } //nolint:ireturn // see Parse godoc func (p *parserImpl) ParseText(text string, pos token.Position) Block { lines := preprocessText(text, pos) tokens := Lex(lines) - return ParseTokens(tokens) + return p.runParser(tokens) } //nolint:ireturn // see Parse godoc @@ -84,6 +105,15 @@ func (p *parserImpl) ParseAs(kind AnnotationKind, text string, pos token.Positio return p.ParseText(injected, pos) } +// runParser constructs a parseState wired with the parserImpl's +// options (diagnostic sink, future additions) and delegates. +// +//nolint:ireturn // see Parse godoc +func (p *parserImpl) runParser(tokens []Token) Block { + ps := &parseState{tokens: tokens, sink: p.diagnosticSink} + return ps.parse() +} + // Parse runs the full preprocess → lex → parse pipeline on a comment // group and returns the typed Block that describes it. Never panics; // diagnostics accumulate on the returned Block. @@ -129,6 +159,16 @@ func preprocessText(text string, basePos token.Position) []Line { type parseState struct { tokens []Token diag []Diagnostic + sink func(Diagnostic) +} + +// emit records a diagnostic: appends to the block-local slice AND +// (if configured) pushes to the optional sink for streaming. +func (p *parseState) emit(d Diagnostic) { + if p.sink != nil { + p.sink(d) + } + p.diag = append(p.diag, d) } //nolint:ireturn // see Parse godoc @@ -178,7 +218,7 @@ func (p *parseState) checkContextValidity(base *baseBlock) { if contextsOverlap(prop.Keyword.Contexts, allowed) { continue } - p.diag = append(p.diag, Warnf(prop.Pos, CodeContextInvalid, + p.emit(Warnf(prop.Pos, CodeContextInvalid, "keyword %q not valid under swagger:%s (legal in: %s)", prop.Keyword.Name, base.kind, formatKeywordContexts(prop.Keyword.Contexts))) @@ -321,7 +361,7 @@ func (p *parseState) fillOperationArgs(method, path, tags, opID *string, tok Tok args := tok.Args switch { case len(args) < minOpArgs: - p.diag = append(p.diag, Errorf(tok.Pos, CodeInvalidAnnotation, + p.emit(Errorf(tok.Pos, CodeInvalidAnnotation, "swagger:%s requires method, path, and operation id (got %d args)", tok.Text, len(args))) case len(args) == minOpArgs: @@ -422,7 +462,7 @@ func (p *parseState) parseBody(base *baseBlock, post []Token) { i = p.collectYAMLBody(base, post, i) case TokenAnnotation: - p.diag = append(p.diag, Warnf(t.Pos, CodeInvalidAnnotation, + p.emit(Warnf(t.Pos, CodeInvalidAnnotation, "additional swagger:%s annotation ignored (one per comment block)", t.Text)) i++ @@ -476,7 +516,7 @@ func (p *parseState) collectBlockBody(base *baseBlock, post []Token, i int) int if isExtensions { if ext, ok := parseExtensionLine(next); ok { if !isExtensionName(ext.Name) { - p.diag = append(p.diag, Warnf(ext.Pos, CodeInvalidExtension, + p.emit(Warnf(ext.Pos, CodeInvalidExtension, "extension name %q must begin with 'x-' or 'X-'", ext.Name)) } base.extensions = append(base.extensions, ext) @@ -560,7 +600,7 @@ func (p *parseState) collectYAMLBody(base *baseBlock, post []Token, i int) int { if i < len(post) && post[i].Kind == TokenYAMLFence { i++ // consume closer } else { - p.diag = append(p.diag, Errorf(openerPos, CodeUnterminatedYAML, + p.emit(Errorf(openerPos, CodeUnterminatedYAML, "YAML body opened with --- but never closed")) } @@ -589,7 +629,7 @@ func (p *parseState) typeConvert(kw Keyword, raw string, pos token.Position) Typ op, rest := splitCmpOperator(raw) n, err := strconv.ParseFloat(strings.TrimSpace(rest), 64) if err != nil { - p.diag = append(p.diag, Errorf(pos, CodeInvalidNumber, + p.emit(Errorf(pos, CodeInvalidNumber, "%s: %q is not a valid number", kw.Name, raw)) return TypedValue{} } @@ -598,7 +638,7 @@ func (p *parseState) typeConvert(kw Keyword, raw string, pos token.Position) Typ case ValueInteger: i, err := strconv.ParseInt(strings.TrimSpace(raw), 10, 64) if err != nil { - p.diag = append(p.diag, Errorf(pos, CodeInvalidInteger, + p.emit(Errorf(pos, CodeInvalidInteger, "%s: %q is not a valid integer", kw.Name, raw)) return TypedValue{} } @@ -607,7 +647,7 @@ func (p *parseState) typeConvert(kw Keyword, raw string, pos token.Position) Typ case ValueBoolean: b, ok := parseBool(raw) if !ok { - p.diag = append(p.diag, Errorf(pos, CodeInvalidBoolean, + p.emit(Errorf(pos, CodeInvalidBoolean, "%s: %q is not a valid boolean (expected true or false)", kw.Name, raw)) return TypedValue{} } @@ -619,7 +659,7 @@ func (p *parseState) typeConvert(kw Keyword, raw string, pos token.Position) Typ return TypedValue{Type: ValueStringEnum, String: allowed} } } - p.diag = append(p.diag, Errorf(pos, CodeInvalidStringEnum, + p.emit(Errorf(pos, CodeInvalidStringEnum, "%s: %q is not one of {%s}", kw.Name, raw, strings.Join(kw.Value.Values, ", "))) return TypedValue{} diff --git a/internal/parsers/grammar/parser_api_test.go b/internal/parsers/grammar/parser_api_test.go index e78cf10..60e86bc 100644 --- a/internal/parsers/grammar/parser_api_test.go +++ b/internal/parsers/grammar/parser_api_test.go @@ -85,6 +85,72 @@ func TestParserInterfaceSatisfiedByImpl(t *testing.T) { _ = t } +func TestWithDiagnosticSinkStreams(t *testing.T) { + // WithDiagnosticSink delivers each diagnostic to the callback in + // addition to accumulating it on the returned Block. + src := `package p + +// swagger:model Foo +// in: query +// maximum: notanumber +type Foo int +` + cg, fset := parseCommentGroup(t, src) + + var streamed []Diagnostic + p := NewParser(fset, WithDiagnosticSink(func(d Diagnostic) { + streamed = append(streamed, d) + })) + + b := p.Parse(cg) + + // Block accumulation still populated. + if len(b.Diagnostics()) == 0 { + t.Fatal("Block should still accumulate diagnostics") + } + // Sink received every diagnostic. + if len(streamed) != len(b.Diagnostics()) { + t.Errorf("sink got %d, block got %d — must match", + len(streamed), len(b.Diagnostics())) + } + // At least one diagnostic of each expected code (in: illegal + + // maximum not-a-number). + codes := map[Code]bool{} + for _, d := range streamed { + codes[d.Code] = true + } + if !codes[CodeContextInvalid] { + t.Errorf("expected CodeContextInvalid in stream, got %+v", codes) + } + if !codes[CodeInvalidNumber] { + t.Errorf("expected CodeInvalidNumber in stream, got %+v", codes) + } +} + +func TestWithDiagnosticSinkNilByDefault(t *testing.T) { + // No options → sink is nil → behavior matches pre-P3.2. + src := `package p + +// swagger:model Foo +// in: query +type Foo int +` + cg, fset := parseCommentGroup(t, src) + p := NewParser(fset) + + b := p.Parse(cg) + // Should still have the context-invalid diagnostic on the block. + found := false + for _, d := range b.Diagnostics() { + if d.Code == CodeContextInvalid { + found = true + } + } + if !found { + t.Error("no options path should still accumulate diagnostics") + } +} + func TestPackageLevelParseStillWorks(t *testing.T) { // Backward-compat: the original top-level Parse(cg, fset) is a // thin wrapper around NewParser(fset).Parse(cg). From a235fc99f62c718ee379402f9e51bd52bbd5b6b7 Mon Sep 17 00:00:00 2001 From: Frederic BIDON Date: Tue, 21 Apr 2026 17:38:16 +0200 Subject: [PATCH 22/46] feat(grammar): P3.3 Block typed accessors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add six lookup methods to Block (and baseBlock): Has(name) bool GetFloat(name) (float64, bool) GetInt(name) (int64, bool) GetBool(name) (bool, bool) GetString(name) (string, bool) GetList(name) ([]string, bool) All share a findProperty helper that matches canonical-name OR alias, case-insensitively. `Max`, `max`, `MAXIMUM` all resolve to the same `maximum` Property. Typed getters return (zero, false) when the keyword is absent or its ValueType doesn't match — so callers write: if n, ok := block.GetFloat("maximum"); ok { schema.Maximum = n } GetString is the permissive fallback: StringEnum returns the canonical (table-spelled) value; everything else returns the raw Property.Value. GetList unifies the two shapes of "a list of things": a block-head Property's Body (consumes:, security:, …) is returned directly; a ValueCommaList value (enum, schemes, …) is split on commas with whitespace trimmed. GetList returns a defensive copy so mutating the returned slice can't corrupt Block state. Block interface grows from 9 to 14 methods. interfacebloat lint silenced with a rationale: Block is the single consumer contract for both builders and LSP; splitting into BlockInfo / BlockIterators / BlockAccessors would introduce friction at every call site and gain nothing — there's no implementation other than baseBlock- embedded typed kinds. Coverage on internal/parsers/grammar/ holds at 93.8%. Full repo test suite green. Tests (accessors_test.go): Has + absent, alias + case-insensitive lookup, GetFloat/Int/Bool/String happy paths, StringEnum canonicalization, GetList for CommaList and Body shapes, defensive- copy behavior, type-mismatch returning false. Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Frederic BIDON --- internal/parsers/grammar/accessors_test.go | 186 +++++++++++++++++++ internal/parsers/grammar/ast.go | 115 ++++++++++++ internal/parsers/grammar/productions_test.go | 2 + internal/parsers/grammar/typeconv_test.go | 4 +- 4 files changed, 305 insertions(+), 2 deletions(-) create mode 100644 internal/parsers/grammar/accessors_test.go diff --git a/internal/parsers/grammar/accessors_test.go b/internal/parsers/grammar/accessors_test.go new file mode 100644 index 0000000..62ff1e0 --- /dev/null +++ b/internal/parsers/grammar/accessors_test.go @@ -0,0 +1,186 @@ +// SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers +// SPDX-License-Identifier: Apache-2.0 + +package grammar + +import ( + "slices" + "testing" +) + +// P3.3: Block accessors return (value, ok) tuples; ok is false when +// the keyword is absent or its type doesn't match. + +func TestAccessorHasAndAbsent(t *testing.T) { + cg, fset := parseCommentGroup(t, "package p\n\n// swagger:model Foo\n// maximum: 5\ntype Foo int\n") + b := Parse(cg, fset) + + if !b.Has(fixtureValidationKw) { + t.Error("Has(maximum): want true") + } + if b.Has("nonexistent") { + t.Error("Has(nonexistent): want false") + } +} + +func TestAccessorAliasLookup(t *testing.T) { + // `maximum` has alias `max`; accessor should find via either spelling. + cg, fset := parseCommentGroup(t, "package p\n\n// swagger:model Foo\n// max: 5\ntype Foo int\n") + b := Parse(cg, fset) + + if !b.Has("max") { + t.Error("Has via alias: want true") + } + if !b.Has("MAX") { + t.Error("Has is case-insensitive: want true") + } + if !b.Has(fixtureValidationKw) { + t.Error("Has via canonical should also work: want true") + } + + v, ok := b.GetFloat(fixtureValidationKw) + if !ok || v != 5 { + t.Errorf("GetFloat(maximum): got (%v, %v) want (5, true)", v, ok) + } +} + +func TestAccessorGetFloat(t *testing.T) { + cg, fset := parseCommentGroup(t, "package p\n\n// swagger:model Foo\n// maximum: 5.5\ntype Foo int\n") + b := Parse(cg, fset) + + v, ok := b.GetFloat(fixtureValidationKw) + if !ok || v != 5.5 { + t.Errorf("GetFloat: got (%v, %v)", v, ok) + } + + // Wrong type (pattern is ValueString) → ok=false. + _, ok = b.GetFloat("pattern") + if ok { + t.Error("GetFloat on missing keyword: want ok=false") + } +} + +func TestAccessorGetInt(t *testing.T) { + cg, fset := parseCommentGroup(t, "package p\n\n// swagger:model Foo\n// maxLength: 42\ntype Foo int\n") + b := Parse(cg, fset) + + v, ok := b.GetInt("maxLength") + if !ok || v != 42 { + t.Errorf("GetInt: got (%v, %v)", v, ok) + } +} + +func TestAccessorGetBool(t *testing.T) { + cg, fset := parseCommentGroup(t, "package p\n\n// swagger:model Foo\n// readOnly: true\ntype Foo int\n") + b := Parse(cg, fset) + + v, ok := b.GetBool("readOnly") + if !ok || !v { + t.Errorf("GetBool: got (%v, %v)", v, ok) + } +} + +func TestAccessorGetStringRawValue(t *testing.T) { + // pattern is ValueString: accessor returns the raw Value. + cg, fset := parseCommentGroup(t, "package p\n\n// swagger:model Foo\n// pattern: ^[a-z]+$\ntype Foo int\n") + b := Parse(cg, fset) + + v, ok := b.GetString("pattern") + if !ok || v != "^[a-z]+$" { + t.Errorf("GetString(pattern): got (%q, %v)", v, ok) + } +} + +func TestAccessorGetStringEnum(t *testing.T) { + // StringEnum returns the canonical (table-spelled) value. + src := `package p + +// swagger:parameters listPets +// +// in: QUERY +type PetParams struct{} +` + cg, fset := parseCommentGroup(t, src) + b := Parse(cg, fset) + + v, ok := b.GetString("in") + if !ok || v != "query" { + t.Errorf("GetString(in) canonical: got (%q, %v) want (query, true)", v, ok) + } +} + +func TestAccessorGetListCommaList(t *testing.T) { + cg, fset := parseCommentGroup(t, "package p\n\n// swagger:model Foo\n// enum: a, b, c\ntype Foo int\n") + b := Parse(cg, fset) + + v, ok := b.GetList("enum") + if !ok { + t.Fatalf("GetList(enum): want ok=true, got %v", ok) + } + if !slices.Equal(v, []string{"a", "b", "c"}) { + t.Errorf("GetList(enum): got %v want [a b c]", v) + } +} + +func TestAccessorGetListBlockBody(t *testing.T) { + src := `package p + +// swagger:meta +// +// consumes: +// application/json +// application/xml +type Root struct{} +` + cg, fset := parseCommentGroup(t, src) + b := Parse(cg, fset) + + v, ok := b.GetList(fixtureBlockKw) + if !ok { + t.Fatalf("GetList(consumes): want ok=true, got %v", ok) + } + if !slices.Equal(v, []string{"application/json", "application/xml"}) { + t.Errorf("GetList(consumes): got %v", v) + } +} + +func TestAccessorGetListReturnsCopy(t *testing.T) { + // Mutating the returned slice must not affect Block state. + src := `package p + +// swagger:meta +// +// consumes: +// application/json +type Root struct{} +` + cg, fset := parseCommentGroup(t, src) + b := Parse(cg, fset) + + v1, ok := b.GetList(fixtureBlockKw) + if !ok { + t.Fatal("GetList: want ok=true") + } + v1[0] = fixtureMutatedMark + + v2, _ := b.GetList(fixtureBlockKw) + if v2[0] == fixtureMutatedMark { + t.Error("GetList must return a defensive copy") + } +} + +func TestAccessorTypeMismatchReturnsFalse(t *testing.T) { + cg, fset := parseCommentGroup(t, "package p\n\n// swagger:model Foo\n// maximum: 5\ntype Foo int\n") + b := Parse(cg, fset) + + // maximum is Number, not Integer/Boolean/List. + if _, ok := b.GetInt(fixtureValidationKw); ok { + t.Error("GetInt on Number-typed keyword: want ok=false") + } + if _, ok := b.GetBool(fixtureValidationKw); ok { + t.Error("GetBool on Number-typed keyword: want ok=false") + } + if _, ok := b.GetList(fixtureValidationKw); ok { + t.Error("GetList on scalar-typed keyword: want ok=false") + } +} diff --git a/internal/parsers/grammar/ast.go b/internal/parsers/grammar/ast.go index 7de46e8..158b841 100644 --- a/internal/parsers/grammar/ast.go +++ b/internal/parsers/grammar/ast.go @@ -6,6 +6,7 @@ package grammar import ( "go/token" "iter" + "strings" ) // Block is the interface implemented by every typed AST node the @@ -14,6 +15,8 @@ import ( // and add the fields specific to their annotation. // // See architecture §4.6. +// +//nolint:interfacebloat // 14 methods is deliberate — Block is the single consumer contract for both builders and LSP; splitting introduces friction at every call site. type Block interface { // Pos reports the position of the block's defining token — the // annotation line for annotated blocks, or the first comment line @@ -45,6 +48,26 @@ type Block interface { // dispatched from (UnboundBlock returns AnnUnknown). Used by // analyzers to type-switch-check without reflection. AnnotationKind() AnnotationKind + + // Has reports whether any property matches the given keyword + // name (canonical or alias, case-insensitive). + Has(name string) bool + // GetFloat returns the Number-typed value for the first matching + // keyword. ok is false if the keyword is absent or its ValueType + // isn't Number. + GetFloat(name string) (float64, bool) + // GetInt returns the Integer-typed value. + GetInt(name string) (int64, bool) + // GetBool returns the Boolean-typed value. + GetBool(name string) (bool, bool) + // GetString returns the string-form of the first matching + // keyword's value: canonical label for StringEnum, raw Value + // otherwise (pattern, version, host, …). + GetString(name string) (string, bool) + // GetList returns the value as a []string: Body lines for + // KEYWORD_BLOCK_HEAD properties; comma-split values for + // ValueCommaList; nil+false otherwise. + GetList(name string) ([]string, bool) } // AnnotationKind identifies the top-level "swagger:xxx" directive @@ -277,6 +300,98 @@ func (b *baseBlock) Extensions() iter.Seq[Extension] { } } +// --- typed accessors (P3.3) --- + +func (b *baseBlock) Has(name string) bool { + _, ok := b.findProperty(name) + return ok +} + +func (b *baseBlock) GetFloat(name string) (float64, bool) { + p, ok := b.findProperty(name) + if !ok || p.Typed.Type != ValueNumber { + return 0, false + } + return p.Typed.Number, true +} + +func (b *baseBlock) GetInt(name string) (int64, bool) { + p, ok := b.findProperty(name) + if !ok || p.Typed.Type != ValueInteger { + return 0, false + } + return p.Typed.Integer, true +} + +func (b *baseBlock) GetBool(name string) (bool, bool) { + p, ok := b.findProperty(name) + if !ok || p.Typed.Type != ValueBoolean { + return false, false + } + return p.Typed.Boolean, true +} + +func (b *baseBlock) GetString(name string) (string, bool) { + p, ok := b.findProperty(name) + if !ok { + return "", false + } + if p.Typed.Type == ValueStringEnum { + return p.Typed.String, true + } + return p.Value, true +} + +func (b *baseBlock) GetList(name string) ([]string, bool) { + p, ok := b.findProperty(name) + if !ok { + return nil, false + } + if len(p.Body) > 0 { + // Defensive copy — callers shouldn't be able to mutate the + // block's internal state. + out := make([]string, len(p.Body)) + copy(out, p.Body) + return out, true + } + if p.Keyword.Value.Type == ValueCommaList && p.Value != "" { + return splitCommaList(p.Value), true + } + return nil, false +} + +// findProperty is the shared lookup: first Property whose keyword +// name (or alias, case-insensitive) matches. +func (b *baseBlock) findProperty(name string) (Property, bool) { + for _, p := range b.properties { + if strings.EqualFold(p.Keyword.Name, name) { + return p, true + } + for _, alias := range p.Keyword.Aliases { + if strings.EqualFold(alias, name) { + return p, true + } + } + } + return Property{}, false +} + +// splitCommaList splits a "a, b, c" value on commas, trimming +// whitespace. Quoted strings are not recognised — a later refinement +// (per architecture §2.1 enum note) can add that; v1 parity is comma- +// and-trim. +func splitCommaList(s string) []string { + parts := strings.Split(s, ",") + out := make([]string, 0, len(parts)) + for _, part := range parts { + trimmed := strings.TrimSpace(part) + if trimmed != "" { + out = append(out, trimmed) + } + } + return out +} + // --- typed Block kinds --- // ModelBlock is produced by `swagger:model [Name]`. Name is the diff --git a/internal/parsers/grammar/productions_test.go b/internal/parsers/grammar/productions_test.go index cfcf4ea..74baad7 100644 --- a/internal/parsers/grammar/productions_test.go +++ b/internal/parsers/grammar/productions_test.go @@ -19,6 +19,8 @@ const ( fixtureModelName = "Foo" fixtureBlockKw = "consumes" fixtureValidationKw = "maximum" + fixtureMutatedMark = "mutated" + fixtureEnumInQuery = "query" ) // --- annotation-line --- diff --git a/internal/parsers/grammar/typeconv_test.go b/internal/parsers/grammar/typeconv_test.go index 75147d1..b5c544d 100644 --- a/internal/parsers/grammar/typeconv_test.go +++ b/internal/parsers/grammar/typeconv_test.go @@ -162,7 +162,7 @@ type PetParams struct{} if p.Typed.Type != ValueStringEnum { t.Fatalf("Typed.Type: got %v want ValueStringEnum", p.Typed.Type) } - if p.Typed.String != "query" { + if p.Typed.String != fixtureEnumInQuery { t.Errorf("String: got %q want query", p.Typed.String) } } @@ -178,7 +178,7 @@ func TestTypeConvertStringEnumCanonicalizes(t *testing.T) { type PetParams struct{} ` p, _ := firstPropertyTyped(t, src) - if p.Typed.String != "query" { + if p.Typed.String != fixtureEnumInQuery { t.Errorf("canonicalized String: got %q want query (table casing)", p.Typed.String) } } From e63d017d02679d2d46746cc52f81701ed790e2a0 Mon Sep 17 00:00:00 2001 From: Frederic BIDON Date: Tue, 21 Apr 2026 17:50:16 +0200 Subject: [PATCH 23/46] refactor(grammar): NewParser returns *DefaultParser (concrete) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per Fred's review: the Parser interface exists to enable mock implementations in tests, not to support multiple production variants. NewParser should return the concrete type so IDE discoverability and docs-on-hover work; the interface stays as the mock contract. Changes: - Rename parserImpl -> DefaultParser (exported). - NewParser(fset, opts...) now returns *DefaultParser instead of Parser. Callers who previously wrote `var p Parser = NewParser(...)` continue to work via implicit satisfaction. - Drop the ireturn nolint on NewParser (no longer applies). - Parser interface godoc reframed: "consumer contract / mock seam". - Compile-time assertion in parser_api_test.go updated to `(*DefaultParser)(nil)`. Per-method ireturn nolints on Parse/ParseText/ParseAs stay — those still return Block (the AST's polymorphic family). Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Frederic BIDON --- internal/parsers/grammar/parser.go | 62 ++++++++++++--------- internal/parsers/grammar/parser_api_test.go | 4 +- 2 files changed, 37 insertions(+), 29 deletions(-) diff --git a/internal/parsers/grammar/parser.go b/internal/parsers/grammar/parser.go index 5e61175..a2a6a45 100644 --- a/internal/parsers/grammar/parser.go +++ b/internal/parsers/grammar/parser.go @@ -19,15 +19,20 @@ import ( "strings" ) -// Parser is the public interface the analyzer layer (bridge-taggers) -// and LSP code consume. The default implementation is returned by -// NewParser(); tests can substitute their own mock to drive builders -// with synthesized Blocks without running the grammar pipeline. +// Parser is the consumer contract the analyzer layer (bridge-taggers) +// depends on. The grammar package ships *DefaultParser as the single +// concrete implementation; the interface exists so tests can +// substitute a mock that distributes fabricated Blocks without +// running the grammar pipeline. // // This is the unlock for P5's property-based builder tests -// (architecture §5.3) — no test ever needs to string-format a -// comment and re-parse it; tests construct Block values directly and -// inject them via a mock Parser. +// (architecture §5.3): tests construct Block values directly and +// feed them through a mock Parser — no string-formatting of comments +// and no re-parsing. +// +// Production code takes the concrete *DefaultParser returned by +// NewParser; only test code (and the rare case needing a mock) +// depends on the interface. type Parser interface { // Parse runs the full preprocess → lex → parse pipeline on a // comment group and returns the typed Block that describes it. @@ -47,24 +52,32 @@ type Parser interface { ParseAs(kind AnnotationKind, text string, pos token.Position) Block } -// NewParser constructs a Parser bound to a FileSet (needed to map -// ast.CommentGroup positions to absolute source positions). The -// returned Parser is safe for concurrent use across goroutines. +// DefaultParser is the grammar package's concrete Parser +// implementation. Safe for concurrent use across goroutines. +// Constructed via NewParser. +type DefaultParser struct { + fset *token.FileSet + diagnosticSink func(Diagnostic) +} + +// NewParser constructs a DefaultParser bound to a FileSet (needed to +// map ast.CommentGroup positions to absolute source positions). +// Returns the concrete *DefaultParser so callers get full IDE +// discoverability; the Parser interface is the seam tests use to +// inject a mock. // // Variadic Options tune behavior — see WithDiagnosticSink. A // zero-option call is the common case. -// -//nolint:ireturn // Parser is the intentional public interface; callers depend on the surface, not the concrete type. -func NewParser(fset *token.FileSet, opts ...Option) Parser { - p := &parserImpl{fset: fset} +func NewParser(fset *token.FileSet, opts ...Option) *DefaultParser { + p := &DefaultParser{fset: fset} for _, opt := range opts { opt(p) } return p } -// Option configures a Parser built with NewParser. -type Option func(*parserImpl) +// Option configures a DefaultParser built with NewParser. +type Option func(*DefaultParser) // WithDiagnosticSink sets an optional callback invoked for every // Diagnostic the parser emits, in addition to accumulating it on @@ -73,30 +86,25 @@ type Option func(*parserImpl) // completes. The sink runs on the parser's goroutine; callers // needing async delivery should push into a channel. func WithDiagnosticSink(sink func(Diagnostic)) Option { - return func(p *parserImpl) { p.diagnosticSink = sink } -} - -type parserImpl struct { - fset *token.FileSet - diagnosticSink func(Diagnostic) + return func(p *DefaultParser) { p.diagnosticSink = sink } } //nolint:ireturn // see Parse godoc -func (p *parserImpl) Parse(cg *ast.CommentGroup) Block { +func (p *DefaultParser) Parse(cg *ast.CommentGroup) Block { lines := Preprocess(cg, p.fset) tokens := Lex(lines) return p.runParser(tokens) } //nolint:ireturn // see Parse godoc -func (p *parserImpl) ParseText(text string, pos token.Position) Block { +func (p *DefaultParser) ParseText(text string, pos token.Position) Block { lines := preprocessText(text, pos) tokens := Lex(lines) return p.runParser(tokens) } //nolint:ireturn // see Parse godoc -func (p *parserImpl) ParseAs(kind AnnotationKind, text string, pos token.Position) Block { +func (p *DefaultParser) ParseAs(kind AnnotationKind, text string, pos token.Position) Block { // Prepend a synthetic annotation line so the parser dispatches to // the requested kind. If text already contains a swagger: // annotation, the existing line wins (findAnnotation picks the @@ -105,11 +113,11 @@ func (p *parserImpl) ParseAs(kind AnnotationKind, text string, pos token.Positio return p.ParseText(injected, pos) } -// runParser constructs a parseState wired with the parserImpl's +// runParser constructs a parseState wired with the DefaultParser's // options (diagnostic sink, future additions) and delegates. // //nolint:ireturn // see Parse godoc -func (p *parserImpl) runParser(tokens []Token) Block { +func (p *DefaultParser) runParser(tokens []Token) Block { ps := &parseState{tokens: tokens, sink: p.diagnosticSink} return ps.parse() } diff --git a/internal/parsers/grammar/parser_api_test.go b/internal/parsers/grammar/parser_api_test.go index 60e86bc..cb92e9c 100644 --- a/internal/parsers/grammar/parser_api_test.go +++ b/internal/parsers/grammar/parser_api_test.go @@ -80,8 +80,8 @@ func TestParserInterfaceParseAs(t *testing.T) { } func TestParserInterfaceSatisfiedByImpl(t *testing.T) { - // Compile-time assertion that *parserImpl implements Parser. - var _ Parser = (*parserImpl)(nil) + // Compile-time assertion that *DefaultParser implements Parser. + var _ Parser = (*DefaultParser)(nil) _ = t } From 78804c265107d44b3e943a71bb5b893deea38484 Mon Sep 17 00:00:00 2001 From: Frederic BIDON Date: Tue, 21 Apr 2026 19:03:38 +0200 Subject: [PATCH 24/46] feat(grammar): P4.1 + P4.2 parity harness scaffolding + NormalizedCommentView MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Create internal/parsers/grammar/grammar_test/ (package grammartest) with the parity-harness plumbing the P5 bridge-tagger migration depends on: - NormalizedCommentView — parser-agnostic, diff-friendly shape capturing AnnotationKind + kind-specific positional args (Name, Method/Path/Tags/OpID, TargetTypes), Title/Description, typed Properties (value + Typed subtree), YAML bodies, Extensions, and Diagnostics sorted by (code, severity) for determinism. - ViewFromBlock(b grammar.Block) NormalizedCommentView — the v2 adapter. Uses a type-switch over the block family that is explicit per kind (future AnnotationKind additions fail closed — they just don't populate args). - ParseSourceToViews(t, src) — test helper: parse a Go snippet, walk its declarations, normalize each attached comment group. - AssertGoldenView(t, path, views) — JSON-snapshot diff driver honoring UPDATE_GOLDEN=1 (matches the existing scantest convention). Seven committed golden fixtures cover the common comment-group shapes: simple_model, route_with_tags, operation_with_yaml, parameters_with_validations, meta_with_extensions, unbound_with_bullet, and context_invalid_diag. These lock in the v2 parser's output; P5 commits will extend the set per builder. Bug caught by the harness and fixed inline: UnboundBlock previously dropped all prose because the parser routed the entire token stream into parseBody. Added findBodyStart(tokens) — for an annotation-less comment group, tokens are split at the first keyword/YAML-fence so the prose prelude (e.g. a struct-field docstring) is recovered into Title/Description. This is the canonical use case for UnboundBlock and would have been a recurring bug in P5 without the harness catch. P4.3 (Options.UseGrammarParser feature flag + scanner wiring) is deferred to the first P5 bridge-tagger commit, where it has an actual consumer — pre-P5 the flag has no path to exit through. Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Frederic BIDON --- .../grammar/grammar_test/harness_test.go | 125 ++++++++ .../testdata/golden/context_invalid_diag.json | 24 ++ .../testdata/golden/meta_with_extensions.json | 32 ++ .../testdata/golden/operation_with_yaml.json | 15 + .../golden/parameters_with_validations.json | 43 +++ .../testdata/golden/route_with_tags.json | 25 ++ .../testdata/golden/simple_model.json | 29 ++ .../testdata/golden/unbound_with_bullet.json | 7 + internal/parsers/grammar/grammar_test/view.go | 293 ++++++++++++++++++ internal/parsers/grammar/parser.go | 33 +- 10 files changed, 625 insertions(+), 1 deletion(-) create mode 100644 internal/parsers/grammar/grammar_test/harness_test.go create mode 100644 internal/parsers/grammar/grammar_test/testdata/golden/context_invalid_diag.json create mode 100644 internal/parsers/grammar/grammar_test/testdata/golden/meta_with_extensions.json create mode 100644 internal/parsers/grammar/grammar_test/testdata/golden/operation_with_yaml.json create mode 100644 internal/parsers/grammar/grammar_test/testdata/golden/parameters_with_validations.json create mode 100644 internal/parsers/grammar/grammar_test/testdata/golden/route_with_tags.json create mode 100644 internal/parsers/grammar/grammar_test/testdata/golden/simple_model.json create mode 100644 internal/parsers/grammar/grammar_test/testdata/golden/unbound_with_bullet.json create mode 100644 internal/parsers/grammar/grammar_test/view.go diff --git a/internal/parsers/grammar/grammar_test/harness_test.go b/internal/parsers/grammar/grammar_test/harness_test.go new file mode 100644 index 0000000..3dcb505 --- /dev/null +++ b/internal/parsers/grammar/grammar_test/harness_test.go @@ -0,0 +1,125 @@ +// SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers +// SPDX-License-Identifier: Apache-2.0 + +package grammartest + +import ( + "path/filepath" + "testing" +) + +// Harness smoke tests: pick a handful of representative comment-group +// shapes and lock their v2 parse-output down as golden JSON. P5 +// builder-migration commits extend the fixture set per builder. +// +// Fixture sources stay inline in these tests for the moment — they +// read like a readable "what the v2 parser produces for THIS comment" +// catalogue. Migration to external Go-package fixtures (the +// fixtures/ tree) happens when P5 needs to cover full-file scenarios. + +func TestHarnessSimpleModel(t *testing.T) { + src := `package p + +// swagger:model Foo +// +// Foo is a simple model. +// +// maximum: 100 +// minimum: 0 +// pattern: ^[a-z]+$ +type Foo int +` + views := ParseSourceToViews(t, src) + AssertGoldenView(t, filepath.Join("testdata", "golden", "simple_model.json"), views) +} + +func TestHarnessRouteWithTags(t *testing.T) { + src := `package p + +// swagger:route GET /pets tags listPets +// +// consumes: +// application/json +// produces: +// application/json +func ListPets() {} +` + views := ParseSourceToViews(t, src) + AssertGoldenView(t, filepath.Join("testdata", "golden", "route_with_tags.json"), views) +} + +func TestHarnessOperationWithYAML(t *testing.T) { + src := `package p + +// swagger:operation GET /pets listPets +// +// --- +// responses: +// 200: successResponse +// 404: notFound +// --- +func ListPets() {} +` + views := ParseSourceToViews(t, src) + AssertGoldenView(t, filepath.Join("testdata", "golden", "operation_with_yaml.json"), views) +} + +func TestHarnessParametersWithValidations(t *testing.T) { + src := `package p + +// swagger:parameters listPets +// +// in: query +// required: true +// maximum: 100 +// minimum: 0 +type PetParams struct{} +` + views := ParseSourceToViews(t, src) + AssertGoldenView(t, filepath.Join("testdata", "golden", "parameters_with_validations.json"), views) +} + +func TestHarnessMetaWithExtensions(t *testing.T) { + src := `package p + +// swagger:meta +// +// version: "1.0" +// host: api.example.com +// +// extensions: +// x-foo: bar +// x-baz: 42 +type Root struct{} +` + views := ParseSourceToViews(t, src) + AssertGoldenView(t, filepath.Join("testdata", "golden", "meta_with_extensions.json"), views) +} + +func TestHarnessUnboundWithBullet(t *testing.T) { + // Regression: bullet dashes survive (P1.10 lock-in) and no + // annotation → UnboundBlock path. + src := `package p + +// A summary line. +// +// - first bullet +// - second bullet +type Foo int +` + views := ParseSourceToViews(t, src) + AssertGoldenView(t, filepath.Join("testdata", "golden", "unbound_with_bullet.json"), views) +} + +func TestHarnessContextInvalidDiagnostic(t *testing.T) { + // Regression: context-validity warning surfaces in the view via + // the normalized diagnostics channel. + src := `package p + +// swagger:model Foo +// in: query +type Foo int +` + views := ParseSourceToViews(t, src) + AssertGoldenView(t, filepath.Join("testdata", "golden", "context_invalid_diag.json"), views) +} diff --git a/internal/parsers/grammar/grammar_test/testdata/golden/context_invalid_diag.json b/internal/parsers/grammar/grammar_test/testdata/golden/context_invalid_diag.json new file mode 100644 index 0000000..3e0aefd --- /dev/null +++ b/internal/parsers/grammar/grammar_test/testdata/golden/context_invalid_diag.json @@ -0,0 +1,24 @@ +[ + { + "annotationKind": "model", + "annotationArgs": { + "name": "Foo" + }, + "properties": [ + { + "keyword": "in", + "value": "query", + "typed": { + "type": "string-enum", + "string": "query" + } + } + ], + "diagnostics": [ + { + "code": "parse.context-invalid", + "severity": "warning" + } + ] + } +] diff --git a/internal/parsers/grammar/grammar_test/testdata/golden/meta_with_extensions.json b/internal/parsers/grammar/grammar_test/testdata/golden/meta_with_extensions.json new file mode 100644 index 0000000..20bfb6a --- /dev/null +++ b/internal/parsers/grammar/grammar_test/testdata/golden/meta_with_extensions.json @@ -0,0 +1,32 @@ +[ + { + "annotationKind": "meta", + "properties": [ + { + "keyword": "version", + "value": "\"1.0\"" + }, + { + "keyword": "host", + "value": "api.example.com" + }, + { + "keyword": "extensions", + "body": [ + "x-foo: bar", + "x-baz: 42" + ] + } + ], + "extensions": [ + { + "name": "x-foo", + "value": "bar" + }, + { + "name": "x-baz", + "value": "42" + } + ] + } +] diff --git a/internal/parsers/grammar/grammar_test/testdata/golden/operation_with_yaml.json b/internal/parsers/grammar/grammar_test/testdata/golden/operation_with_yaml.json new file mode 100644 index 0000000..dcf3942 --- /dev/null +++ b/internal/parsers/grammar/grammar_test/testdata/golden/operation_with_yaml.json @@ -0,0 +1,15 @@ +[ + { + "annotationKind": "operation", + "annotationArgs": { + "method": "GET", + "path": "/pets", + "opId": "listPets" + }, + "yamlBlocks": [ + { + "text": "responses:\n 200: successResponse\n 404: notFound" + } + ] + } +] diff --git a/internal/parsers/grammar/grammar_test/testdata/golden/parameters_with_validations.json b/internal/parsers/grammar/grammar_test/testdata/golden/parameters_with_validations.json new file mode 100644 index 0000000..11991ac --- /dev/null +++ b/internal/parsers/grammar/grammar_test/testdata/golden/parameters_with_validations.json @@ -0,0 +1,43 @@ +[ + { + "annotationKind": "parameters", + "annotationArgs": { + "targetTypes": [ + "listPets" + ] + }, + "properties": [ + { + "keyword": "in", + "value": "query", + "typed": { + "type": "string-enum", + "string": "query" + } + }, + { + "keyword": "required", + "value": "true", + "typed": { + "type": "boolean", + "boolean": true + } + }, + { + "keyword": "maximum", + "value": "100", + "typed": { + "type": "number", + "number": 100 + } + }, + { + "keyword": "minimum", + "value": "0", + "typed": { + "type": "number" + } + } + ] + } +] diff --git a/internal/parsers/grammar/grammar_test/testdata/golden/route_with_tags.json b/internal/parsers/grammar/grammar_test/testdata/golden/route_with_tags.json new file mode 100644 index 0000000..ce8aad2 --- /dev/null +++ b/internal/parsers/grammar/grammar_test/testdata/golden/route_with_tags.json @@ -0,0 +1,25 @@ +[ + { + "annotationKind": "route", + "annotationArgs": { + "method": "GET", + "path": "/pets", + "tags": "tags", + "opId": "listPets" + }, + "properties": [ + { + "keyword": "consumes", + "body": [ + "application/json" + ] + }, + { + "keyword": "produces", + "body": [ + "application/json" + ] + } + ] + } +] diff --git a/internal/parsers/grammar/grammar_test/testdata/golden/simple_model.json b/internal/parsers/grammar/grammar_test/testdata/golden/simple_model.json new file mode 100644 index 0000000..0ece65b --- /dev/null +++ b/internal/parsers/grammar/grammar_test/testdata/golden/simple_model.json @@ -0,0 +1,29 @@ +[ + { + "annotationKind": "model", + "annotationArgs": { + "name": "Foo" + }, + "properties": [ + { + "keyword": "maximum", + "value": "100", + "typed": { + "type": "number", + "number": 100 + } + }, + { + "keyword": "minimum", + "value": "0", + "typed": { + "type": "number" + } + }, + { + "keyword": "pattern", + "value": "^[a-z]+$" + } + ] + } +] diff --git a/internal/parsers/grammar/grammar_test/testdata/golden/unbound_with_bullet.json b/internal/parsers/grammar/grammar_test/testdata/golden/unbound_with_bullet.json new file mode 100644 index 0000000..fedeb21 --- /dev/null +++ b/internal/parsers/grammar/grammar_test/testdata/golden/unbound_with_bullet.json @@ -0,0 +1,7 @@ +[ + { + "annotationKind": "unknown", + "title": "A summary line.", + "description": "- first bullet - second bullet" + } +] diff --git a/internal/parsers/grammar/grammar_test/view.go b/internal/parsers/grammar/grammar_test/view.go new file mode 100644 index 0000000..7085043 --- /dev/null +++ b/internal/parsers/grammar/grammar_test/view.go @@ -0,0 +1,293 @@ +// SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers +// SPDX-License-Identifier: Apache-2.0 + +// Package grammartest holds the parity harness the P5 builder +// migration depends on. Two pieces live here: +// +// - NormalizedCommentView is a parser-agnostic description of what +// a single comment group says. Both the legacy regex parser (via +// the TypeIndex scan path) and the v2 grammar parser lift their +// interpretation into this view so the harness can diff at a +// common level instead of comparing interpreted spec.* output. +// +// - AssertGoldenView drives the harness: parse, normalise, and +// compare against a committed JSON snapshot under +// testdata/golden/. UPDATE_GOLDEN=1 rewrites snapshots when the +// v2 parser deliberately changes output. +// +// Pre-P5 the harness exercises only the v2 path (there is no v2 +// builder to exit through yet). The v1 adapter lands when P5 +// bridge-taggers do, giving us the old-vs-new diff the plan calls +// for. Treating the harness as "v2 regression suite first, full +// parity runner second" keeps it useful without blocking on P5. +package grammartest + +import ( + "encoding/json" + "fmt" + "go/ast" + "go/parser" + "go/token" + "os" + "path/filepath" + "sort" + "testing" + + "github.com/go-openapi/codescan/internal/parsers/grammar" +) + +// NormalizedCommentView is the diff-friendly representation of a +// single parsed comment group. Fields are JSON-serialisable and +// sorted where order is not meaningful (extensions, YAML bodies are +// kept in source order). +type NormalizedCommentView struct { + // AnnotationKind is the top-level swagger: (or "unknown" + // for unbound comments). + AnnotationKind string `json:"annotationKind"` + + // AnnotationArgs holds the kind-specific positional arguments + // resolved by the parser — empty for annotations with none. + AnnotationArgs AnnotationArgs `json:"annotationArgs,omitzero"` + + Title string `json:"title,omitempty"` + Description string `json:"description,omitempty"` + + Properties []NormalizedProperty `json:"properties,omitempty"` + YAMLBlocks []NormalizedYAMLBlock `json:"yamlBlocks,omitempty"` + Extensions []NormalizedExtension `json:"extensions,omitempty"` + + // Diagnostics are sorted by Code to make diffs deterministic + // (severity, message text may vary). + Diagnostics []NormalizedDiagnostic `json:"diagnostics,omitempty"` +} + +// AnnotationArgs captures the per-kind positional data the parser +// extracts. Only fields relevant to the emitted block's kind are +// populated; others stay zero and are omitted from JSON. +type AnnotationArgs struct { + Name string `json:"name,omitempty"` + Method string `json:"method,omitempty"` + Path string `json:"path,omitempty"` + Tags string `json:"tags,omitempty"` + OpID string `json:"opId,omitempty"` + TargetTypes []string `json:"targetTypes,omitempty"` +} + +// NormalizedProperty is a Block Property in a diff-stable form. Value +// and Typed are both captured — raw string for eye-readability in the +// golden file, typed forms for exact assertions. +type NormalizedProperty struct { + Keyword string `json:"keyword"` + Value string `json:"value,omitempty"` + ItemsDepth int `json:"itemsDepth,omitempty"` + Body []string `json:"body,omitempty"` + + // Typed is populated only for parse-time-convertible ValueTypes + // (Number, Integer, Boolean, StringEnum); otherwise nil. + Typed *NormalizedTyped `json:"typed,omitempty"` +} + +// NormalizedTyped is the typed-value subtree. Only one of the +// scalar fields is populated per the Type label. +type NormalizedTyped struct { + Type string `json:"type"` // "number" | "integer" | "boolean" | "string-enum" + + Op string `json:"op,omitempty"` // only for number + Number float64 `json:"number,omitempty"` + Integer int64 `json:"integer,omitempty"` + Boolean bool `json:"boolean,omitempty"` + String string `json:"string,omitempty"` +} + +// NormalizedYAMLBlock holds one captured --- fence body. +type NormalizedYAMLBlock struct { + Text string `json:"text"` +} + +// NormalizedExtension is one x-* / non-x- entry from an extensions +// block. +type NormalizedExtension struct { + Name string `json:"name"` + Value string `json:"value,omitempty"` +} + +// NormalizedDiagnostic captures the Code + Severity of a diagnostic. +// Position/Message are intentionally elided — message wording may +// change without a real regression; position noise can dominate the +// diff. The code is the stable contract. +type NormalizedDiagnostic struct { + Code string `json:"code"` + Severity string `json:"severity"` +} + +// ViewFromBlock converts a grammar.Block into its normalized view. +// Stable for diffing: the function is pure, produces deterministic +// output for identical inputs, and sorts Diagnostics by (code, +// severity) so transient ordering can't flake tests. +func ViewFromBlock(b grammar.Block) NormalizedCommentView { + v := NormalizedCommentView{ + AnnotationKind: b.AnnotationKind().String(), + Title: b.Title(), + Description: b.Description(), + } + + addAnnotationArgs(&v, b) + + for p := range b.Properties() { + v.Properties = append(v.Properties, normalizeProperty(p)) + } + for y := range b.YAMLBlocks() { + v.YAMLBlocks = append(v.YAMLBlocks, NormalizedYAMLBlock{Text: y.Text}) + } + for e := range b.Extensions() { + v.Extensions = append(v.Extensions, NormalizedExtension{Name: e.Name, Value: e.Value}) + } + v.Diagnostics = normalizeDiagnostics(b.Diagnostics()) + + return v +} + +// addAnnotationArgs fills AnnotationArgs from the concrete Block +// kind, via a type-switch that's exhaustive over the current family. +func addAnnotationArgs(v *NormalizedCommentView, b grammar.Block) { + switch tb := b.(type) { + case *grammar.ModelBlock: + v.AnnotationArgs.Name = tb.Name + case *grammar.ResponseBlock: + v.AnnotationArgs.Name = tb.Name + case *grammar.ParametersBlock: + v.AnnotationArgs.TargetTypes = append([]string(nil), tb.TargetTypes...) + case *grammar.RouteBlock: + v.AnnotationArgs.Method = tb.Method + v.AnnotationArgs.Path = tb.Path + v.AnnotationArgs.Tags = tb.Tags + v.AnnotationArgs.OpID = tb.OpID + case *grammar.OperationBlock: + v.AnnotationArgs.Method = tb.Method + v.AnnotationArgs.Path = tb.Path + v.AnnotationArgs.Tags = tb.Tags + v.AnnotationArgs.OpID = tb.OpID + case *grammar.MetaBlock, *grammar.UnboundBlock: + // No kind-specific positional args. + } +} + +func normalizeProperty(p grammar.Property) NormalizedProperty { + np := NormalizedProperty{ + Keyword: p.Keyword.Name, + Value: p.Value, + ItemsDepth: p.ItemsDepth, + } + if len(p.Body) > 0 { + np.Body = append([]string(nil), p.Body...) + } + if p.Typed.Type != grammar.ValueNone { + np.Typed = &NormalizedTyped{ + Type: p.Typed.Type.String(), + Op: p.Typed.Op, + Number: p.Typed.Number, + Integer: p.Typed.Integer, + Boolean: p.Typed.Boolean, + String: p.Typed.String, + } + } + return np +} + +func normalizeDiagnostics(diags []grammar.Diagnostic) []NormalizedDiagnostic { + if len(diags) == 0 { + return nil + } + out := make([]NormalizedDiagnostic, len(diags)) + for i, d := range diags { + out[i] = NormalizedDiagnostic{ + Code: string(d.Code), + Severity: d.Severity.String(), + } + } + sort.Slice(out, func(i, j int) bool { + if out[i].Code != out[j].Code { + return out[i].Code < out[j].Code + } + return out[i].Severity < out[j].Severity + }) + return out +} + +// ParseSourceToViews runs the v2 grammar parser over every comment +// group attached to a declaration in the given Go source snippet, +// returning one NormalizedCommentView per group. Deterministic +// ordering: source order of the declarations, which go/parser +// preserves. +func ParseSourceToViews(t *testing.T, src string) []NormalizedCommentView { + t.Helper() + fset := token.NewFileSet() + f, err := parser.ParseFile(fset, "t.go", src, parser.ParseComments) + if err != nil { + t.Fatalf("parse source: %v", err) + } + p := grammar.NewParser(fset) + + var views []NormalizedCommentView + for _, decl := range f.Decls { + cg := docOf(decl) + if cg == nil { + continue + } + views = append(views, ViewFromBlock(p.Parse(cg))) + } + return views +} + +func docOf(decl ast.Decl) *ast.CommentGroup { + switch d := decl.(type) { + case *ast.GenDecl: + return d.Doc + case *ast.FuncDecl: + return d.Doc + } + return nil +} + +// AssertGoldenView compares the given views to the JSON snapshot at +// path. When the env var UPDATE_GOLDEN=1 is set, the snapshot is +// rewritten (matching the project's existing scantest convention). +// Otherwise an unexpected mismatch fails the test with a diff-ready +// error. +func AssertGoldenView(t *testing.T, path string, views []NormalizedCommentView) { + t.Helper() + got, err := json.MarshalIndent(views, "", " ") + if err != nil { + t.Fatalf("marshal views: %v", err) + } + // Trailing newline so editors don't complain. + got = append(got, '\n') + + if os.Getenv("UPDATE_GOLDEN") == "1" { + // Golden files are committed to git and read by humans/CI; + // standard perms, no secrets. + const ( + dirMode = 0o755 + fileMode = 0o644 + ) + if err := os.MkdirAll(filepath.Dir(path), dirMode); err != nil { + t.Fatalf("mkdir golden: %v", err) + } + if err := os.WriteFile(path, got, fileMode); err != nil { + t.Fatalf("write golden: %v", err) + } + return + } + + want, err := os.ReadFile(path) + if err != nil { + t.Fatalf("read golden %s: %v\n(run with UPDATE_GOLDEN=1 to create)", path, err) + } + if string(got) != string(want) { + t.Fatalf( + "golden mismatch at %s\n--- want (%d bytes)\n%s\n--- got (%d bytes)\n%s\n(run with UPDATE_GOLDEN=1 to accept)", + path, len(want), want, len(got), got, + ) + } + _ = fmt.Sprintf // keep fmt used if trimmed +} diff --git a/internal/parsers/grammar/parser.go b/internal/parsers/grammar/parser.go index a2a6a45..66ef3ea 100644 --- a/internal/parsers/grammar/parser.go +++ b/internal/parsers/grammar/parser.go @@ -201,7 +201,17 @@ func (p *parseState) parse() Block { } else { base = newBaseBlock(AnnUnknown, firstMeaningfulPos(p.tokens)) typed = &UnboundBlock{baseBlock: base} - post = p.tokens + // For UnboundBlock (no annotation, e.g., a struct-field + // docstring), split at the first body token so the prose + // prelude still becomes Title/Description. Without this, + // `// Name of the user.\n// required: true` loses the + // docstring entirely. + if splitIdx := findBodyStart(p.tokens); splitIdx >= 0 { + pre = p.tokens[:splitIdx] + post = p.tokens[splitIdx:] + } else { + pre = p.tokens + } } p.parseTitleDesc(base, pre) @@ -303,6 +313,27 @@ func findAnnotation(tokens []Token) int { return -1 } +// findBodyStart returns the index of the first "body" token — a +// keyword, YAML fence, or raw YAML line — or -1 if the stream is +// entirely prose (TEXT + BLANK + EOF). Used to split UnboundBlock +// tokens into a Title/Description prelude and a property body. +func findBodyStart(tokens []Token) int { + for i, t := range tokens { + switch t.Kind { + case TokenKeywordValue, TokenKeywordBlockHead, + TokenYAMLFence, TokenRawLine: + return i + case TokenEOF, TokenBlank, TokenText, TokenAnnotation: + // Prose / control tokens — keep scanning. + default: + // Future kinds: err on the side of treating them as + // body so analyzers notice. + return i + } + } + return -1 +} + // firstMeaningfulPos returns the Pos of the first non-blank, non-EOF // token — i.e., the reasonable "position" of a comment group that has // no annotation. From 4cf0c41c1545412cc3939e9d390dcf3ee13f014f Mon Sep 17 00:00:00 2001 From: Frederic BIDON Date: Tue, 21 Apr 2026 21:03:37 +0200 Subject: [PATCH 25/46] =?UTF-8?q?test(integration):=20enum=20override=20se?= =?UTF-8?q?mantics=20reference=20(W2=20=C2=A72.6)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add fixtures/enhancements/enum-overrides/ and TestCoverage_EnumOverrides to pin the v1 behavior for five enum cases that W2 needs to answer before P5.1: A. swagger:enum + matching consts -> const inference B. inline comma-list only, no consts -> inline C. inline JSON-array only, no consts -> inline D. swagger:enum with NO matching consts -> empty schema E. swagger:enum + matching consts + inline -> inline WINS The golden snapshot confirms Fred's proposed override semantics hold in v1 today: case E renders enum=["urgent","normal"] from the inline annotation even though PriorityE has three const values ("low", "medium", "high"). So the v2 parser migration inherits this rule rather than diverging. Things the golden also surfaces (non-parity items, captured here so they aren't re-discovered during P5): - comma-list splitter preserves leading whitespace: B renders ["low"," medium"," high"] with literal spaces. A v1 quirk; P5 bridge-tagger should strip per-value whitespace (or the new internal/parsers/enum/ sub-parser should). - case D emits a property with NO type and NO enum. The swagger:enum annotation is silently ignored when no consts match. P5 should surface a diagnostic ("swagger:enum TypeName resolved to zero values"). - case E retains x-go-enum-desc describing the const values even though the inline override wins. Stale vendor-extension; P5 should drop x-go-enum-desc when the inline override takes precedence. This golden is the factual v1 reference. Any v2 divergence during P5.1 will be explicit (new golden, documented rationale). Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Frederic BIDON --- fixtures/enhancements/enum-overrides/types.go | 112 ++++++++++++++++++ .../golden/enhancements_enum_overrides.json | 94 +++++++++++++++ .../integration/coverage_enhancements_test.go | 27 +++++ 3 files changed, 233 insertions(+) create mode 100644 fixtures/enhancements/enum-overrides/types.go create mode 100644 fixtures/integration/golden/enhancements_enum_overrides.json diff --git a/fixtures/enhancements/enum-overrides/types.go b/fixtures/enhancements/enum-overrides/types.go new file mode 100644 index 0000000..a02e70b --- /dev/null +++ b/fixtures/enhancements/enum-overrides/types.go @@ -0,0 +1,112 @@ +// SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers +// SPDX-License-Identifier: Apache-2.0 + +// Package enum_overrides isolates the v1 behavior of `enum:` when it +// coexists with (or replaces) `swagger:enum TypeName` const-value +// inference. The golden output of TestCoverage_EnumOverrides is the +// factual reference for what the v2 parser migration must preserve +// — or consciously diverge from — under W2's override semantics +// (`.claude/plans/workshops/w2-enum.md` §2.6). +// +// Five cases, one per model in this file: +// +// A. swagger:enum + matching consts, no inline enum on field → consts +// B. inline comma-list on field, no swagger:enum, no consts → inline +// C. inline JSON array on field, no swagger:enum, no consts → inline +// D. swagger:enum but NO matching consts in package → ? +// E. swagger:enum + matching consts + inline enum on field → ? +package enum_overrides + +// --- Case A: swagger:enum + matching consts --- + +// PriorityA is a classic linked-const enum. +// +// swagger:enum PriorityA +type PriorityA string + +const ( + PriorityALow PriorityA = "low" + PriorityAMed PriorityA = "medium" + PriorityAHigh PriorityA = "high" +) + +// NotificationA exercises case A: field uses PriorityA, no inline +// enum override. +// +// swagger:model NotificationA +type NotificationA struct { + // required: true + ID int64 `json:"id"` + + // The priority level. Enum values come from PriorityA's consts. + Priority PriorityA `json:"priority"` +} + +// --- Case B: inline comma-list on field, no swagger:enum --- + +// NotificationB exercises case B: plain string field with inline +// comma-list enum. No swagger:enum on the type, no consts in code. +// +// swagger:model NotificationB +type NotificationB struct { + // The priority level. + // + // enum: low, medium, high + Priority string `json:"priority"` +} + +// --- Case C: inline JSON-array on field, no swagger:enum --- + +// NotificationC exercises case C: inline JSON-array enum. +// +// swagger:model NotificationC +type NotificationC struct { + // The priority level. + // + // enum: ["low","medium","high"] + Priority string `json:"priority"` +} + +// --- Case D: swagger:enum with no matching consts --- + +// PriorityD has a swagger:enum annotation but no corresponding +// const declarations in this package. The builder's FindEnumValues +// call returns an empty slice; the test captures how the spec +// renders in that case. +// +// swagger:enum PriorityD +type PriorityD string + +// NotificationD exercises case D. +// +// swagger:model NotificationD +type NotificationD struct { + // The priority level. + Priority PriorityD `json:"priority"` +} + +// --- Case E: swagger:enum + matching consts + inline override --- + +// PriorityE has both a linked-const set AND fields will provide an +// inline override. +// +// swagger:enum PriorityE +type PriorityE string + +const ( + PriorityELow PriorityE = "low" + PriorityEMed PriorityE = "medium" + PriorityEHigh PriorityE = "high" +) + +// NotificationE exercises case E: the inline enum on the field +// competes with the const-derived enum from PriorityE. The golden +// output captures which one wins in v1. +// +// swagger:model NotificationE +type NotificationE struct { + // Inline enum provides a narrower set than the const block. + // + // enum: urgent, normal + Priority PriorityE `json:"priority"` +} diff --git a/fixtures/integration/golden/enhancements_enum_overrides.json b/fixtures/integration/golden/enhancements_enum_overrides.json new file mode 100644 index 0000000..d185590 --- /dev/null +++ b/fixtures/integration/golden/enhancements_enum_overrides.json @@ -0,0 +1,94 @@ +{ + "swagger": "2.0", + "paths": {}, + "definitions": { + "NotificationA": { + "description": "NotificationA exercises case A: field uses PriorityA, no inline\nenum override.", + "type": "object", + "required": [ + "id" + ], + "properties": { + "id": { + "type": "integer", + "format": "int64", + "x-go-name": "ID" + }, + "priority": { + "description": "The priority level. Enum values come from PriorityA's consts.\nlow PriorityALow\nmedium PriorityAMed\nhigh PriorityAHigh", + "type": "string", + "enum": [ + "low", + "medium", + "high" + ], + "x-go-enum-desc": "low PriorityALow\nmedium PriorityAMed\nhigh PriorityAHigh", + "x-go-name": "Priority" + } + }, + "x-go-package": "github.com/go-openapi/codescan/fixtures/enhancements/enum-overrides" + }, + "NotificationB": { + "description": "NotificationB exercises case B: plain string field with inline", + "type": "object", + "properties": { + "priority": { + "description": "The priority level.", + "type": "string", + "enum": [ + "low", + " medium", + " high" + ], + "x-go-name": "Priority" + } + }, + "x-go-package": "github.com/go-openapi/codescan/fixtures/enhancements/enum-overrides" + }, + "NotificationC": { + "type": "object", + "title": "NotificationC exercises case C: inline JSON-array enum.", + "properties": { + "priority": { + "description": "The priority level.", + "type": "string", + "enum": [ + "low", + "medium", + "high" + ], + "x-go-name": "Priority" + } + }, + "x-go-package": "github.com/go-openapi/codescan/fixtures/enhancements/enum-overrides" + }, + "NotificationD": { + "type": "object", + "title": "NotificationD exercises case D.", + "properties": { + "priority": { + "description": "The priority level.", + "x-go-name": "Priority" + } + }, + "x-go-package": "github.com/go-openapi/codescan/fixtures/enhancements/enum-overrides" + }, + "NotificationE": { + "description": "NotificationE exercises case E: the inline enum on the field\ncompetes with the const-derived enum from PriorityE. The golden\noutput captures which one wins in v1.", + "type": "object", + "properties": { + "priority": { + "description": "Inline enum provides a narrower set than the const block.\nlow PriorityELow\nmedium PriorityEMed\nhigh PriorityEHigh", + "type": "string", + "enum": [ + "urgent", + " normal" + ], + "x-go-enum-desc": "low PriorityELow\nmedium PriorityEMed\nhigh PriorityEHigh", + "x-go-name": "Priority" + } + }, + "x-go-package": "github.com/go-openapi/codescan/fixtures/enhancements/enum-overrides" + } + } +} \ No newline at end of file diff --git a/internal/integration/coverage_enhancements_test.go b/internal/integration/coverage_enhancements_test.go index 083ae09..d0bc625 100644 --- a/internal/integration/coverage_enhancements_test.go +++ b/internal/integration/coverage_enhancements_test.go @@ -206,6 +206,33 @@ func TestCoverage_EnumDocs(t *testing.T) { scantest.CompareOrDumpJSON(t, doc, "enhancements_enum_docs.json") } +// TestCoverage_EnumOverrides captures the v1 behavior for five +// enum-related cases that W2 needs to pin down before the P5.1 +// schema-builder migration: +// +// A. `swagger:enum` with matching consts — const inference +// B. inline `enum: a,b,c` only — inline only +// C. inline `enum: ["a","b","c"]` JSON form only — JSON inline only +// D. `swagger:enum` with NO matching consts — empty/??? case +// E. `swagger:enum` + matching consts + inline on — override question +// the field +// +// See `.claude/plans/workshops/w2-enum.md` §2.6 and +// `fixtures/enhancements/enum-overrides/types.go` for the fixture. +// The golden snapshot becomes the v1-behavior contract the v2 +// migration either preserves or consciously diverges from. +func TestCoverage_EnumOverrides(t *testing.T) { + doc, err := codescan.Run(&codescan.Options{ + Packages: []string{"./enhancements/enum-overrides/..."}, + WorkDir: scantest.FixturesDir(), + ScanModels: true, + }) + require.NoError(t, err) + require.NotNil(t, doc) + + scantest.CompareOrDumpJSON(t, doc, "enhancements_enum_overrides.json") +} + func TestCoverage_TextMarshal(t *testing.T) { doc, err := codescan.Run(&codescan.Options{ Packages: []string{"./enhancements/text-marshal/..."}, From 247378f728e4b70a3a886b23da8a02620cbb9a37 Mon Sep 17 00:00:00 2001 From: Frederic BIDON Date: Tue, 21 Apr 2026 21:09:16 +0200 Subject: [PATCH 26/46] fix(grammar_test): CRLF-tolerant golden comparison MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Windows CI fails on the new grammar_test AssertGoldenView harness because git's default core.autocrlf=true converts line endings on checkout. The existing internal/scantest harness sidesteps this by comparing JSON semantically (assert.JSONEqT), but AssertGoldenView does a byte-equal compare — exact by design so it catches field-order regressions and trailing-newline changes. Byte-equal + platform-dependent checkout = broken on Windows. Two narrow fixes: - Normalise CRLF -> LF on the read side only (bytes.ReplaceAll). `got` is always freshly produced by json.MarshalIndent + a trailing '\n', so it's LF-only; normalising `want` after ReadFile makes the compare platform-independent without loosening the "exact bytes expected" invariant. - Add .gitattributes pinning *.json and both golden directories to `eol=lf`. Belt-and-suspenders: prevents autocrlf from corrupting goldens for any other tool that does byte-level inspection (e.g., external diffs, editor tooling). Both fixes together mean a fresh Windows checkout produces LF-only golden files AND the harness tolerates CRLF-infected older checkouts until they're refreshed. Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Frederic BIDON --- .gitattributes | 6 ++++++ internal/parsers/grammar/grammar_test/view.go | 9 ++++++++- 2 files changed, 14 insertions(+), 1 deletion(-) create mode 100644 .gitattributes diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..edb53f7 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,6 @@ +# Golden test fixtures must keep LF line endings on every platform +# so byte-level comparison doesn't trip on Windows checkouts that +# default to core.autocrlf=true. +*.json text eol=lf +internal/parsers/grammar/grammar_test/testdata/golden/* text eol=lf +fixtures/integration/golden/* text eol=lf diff --git a/internal/parsers/grammar/grammar_test/view.go b/internal/parsers/grammar/grammar_test/view.go index 7085043..c576be4 100644 --- a/internal/parsers/grammar/grammar_test/view.go +++ b/internal/parsers/grammar/grammar_test/view.go @@ -23,6 +23,7 @@ package grammartest import ( + "bytes" "encoding/json" "fmt" "go/ast" @@ -283,7 +284,13 @@ func AssertGoldenView(t *testing.T, path string, views []NormalizedCommentView) if err != nil { t.Fatalf("read golden %s: %v\n(run with UPDATE_GOLDEN=1 to create)", path, err) } - if string(got) != string(want) { + // Normalise line endings: git on Windows checks files out with + // CRLF by default (core.autocrlf=true), which would otherwise + // break byte-equal comparison against the LF-only output of + // json.MarshalIndent. Normalising on the read side only is + // correct because `got` is always freshly LF-produced. + want = bytes.ReplaceAll(want, []byte("\r\n"), []byte("\n")) + if !bytes.Equal(got, want) { t.Fatalf( "golden mismatch at %s\n--- want (%d bytes)\n%s\n--- got (%d bytes)\n%s\n(run with UPDATE_GOLDEN=1 to accept)", path, len(want), want, len(got), got, From b40c2836e7f3f9cdc47e045f3c07574315434c1a Mon Sep 17 00:00:00 2001 From: Frederic BIDON Date: Tue, 21 Apr 2026 21:34:58 +0200 Subject: [PATCH 27/46] =?UTF-8?q?feat(scanner):=20P5.1=20step=203a=20?= =?UTF-8?q?=E2=80=94=20Options.UseGrammarParser=20flag?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the feature flag the P5 migration uses to coexist the legacy regex-based tagger pipeline and the new grammar-parser + bridge-tagger pipeline. Default false: no behavior change yet; bridge-tagger consumers land in subsequent P5.x commits. The flag is plumbing only at this step. Its roles unlock across the upcoming P5 work: - Routing seam in the scanner/builders (step 4): when true, the comment-group dispatch goes through grammar.Parser; when false, the legacy taggers run. - Dual-path parity harness (step 3b): runs codescan.Run twice per fixture, once per flag value, diffs the results via the NormalizedCommentView v1/v2 adapters. This is how every P5.x migration verifies parity. - P6 cutover: flag removed, grammar parser becomes the only path. codescan.Options is a type alias for scanner.Options, so codescan.Run callers see the field immediately — no public-API shim needed. Planning: .claude/plans/p5-builder-migrations.md §5.1. Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Frederic BIDON --- internal/scanner/options.go | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/internal/scanner/options.go b/internal/scanner/options.go index 987b18a..f70526b 100644 --- a/internal/scanner/options.go +++ b/internal/scanner/options.go @@ -22,4 +22,18 @@ type Options struct { DescWithRef bool // allow overloaded descriptions together with $ref, otherwise jsonschema draft4 $ref predates everything SkipExtensions bool // skip generating x-go-* vendor extensions in the spec Debug bool // enable verbose debug logging during scanning + + // UseGrammarParser routes comment-group parsing through the v2 + // hand-rolled grammar parser at internal/parsers/grammar/ (plus + // bridge-taggers that call the existing ValidationBuilder / + // SwaggerTypable / … interfaces) instead of the legacy + // regex-based taggers. + // + // Default false: the legacy path runs unchanged. The flag is + // the dual-path coexistence seam used by the parity harness + // during the P5 migration (one run per value of the flag, outputs + // diffed). At P6 cutover the flag is removed and grammar-parser + // becomes the only path. See .claude/plans/p5-builder-migrations.md + // and grammar-parser-tasks.md P4.3 / P5 cross-cutting. + UseGrammarParser bool } From 44c9c81e3088b2021eee1a08a35ca5200d2f5c54 Mon Sep 17 00:00:00 2001 From: Frederic BIDON Date: Tue, 21 Apr 2026 21:50:27 +0200 Subject: [PATCH 28/46] =?UTF-8?q?test(integration):=20P5.1=20step=203b=20?= =?UTF-8?q?=E2=80=94=20TestParity=20harness?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add internal/integration/parity_test.go: runs every fixture twice, once per UseGrammarParser value, and asserts the resulting *spec.Swagger values are JSON-equal. Twenty-one fixtures covering enhancements/* and goparsing/petstore + goparsing/bookings. Design rationale — spec-level compare, not view-level: - Measures the user-observable contract (the spec). v1 and v2 producing identical specs through different internal paths is by definition not a user-observable difference. - Reuses the existing fixture corpus (21 TestCoverage_* fixtures become 21 parity cases) with zero reconstruction. - No lossy reverse-engineering from post-build spec to per-comment-group views — the alternative v1-adapter approach would have had to map Schema.Properties["foo"] back to its source *ast.CommentGroup, which is intrinsically fragile (multi-comment sites, synthesised fields). - Failure messages surface the exact diverging JSON path, which is what you need to debug. See .claude/plans/p5-builder-migrations.md §5 for the full discussion (rejecting the view-level adapter). With the flag currently a no-op, the suite passes trivially. Validates the harness plumbing (parallel t.Run, Options cloning, WorkDir injection, error handling) before bridge-taggers land in step 6. When the flag starts flipping pipelines, TestParity becomes the per-commit safety net. Excluded fixtures: - UnknownAnnotation — intentionally an error-expected path, no spec to compare. - malformed/* — same reason. These error paths get their own non-parity coverage in the existing TestCoverage_* suite. Tactical test — **P6 cutover deletes this file in the same commit that removes Options.UseGrammarParser** (grammar-parser-tasks.md P6.4 records the obligation; p5-builder-migrations.md §5.3 the rationale). With no flag, the test has no dual paths to diff and would become pure CI burden. Also cross-referenced from forthcoming-features.md §5.0. Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Frederic BIDON --- internal/integration/parity_test.go | 123 ++++++++++++++++++++++++++++ 1 file changed, 123 insertions(+) create mode 100644 internal/integration/parity_test.go diff --git a/internal/integration/parity_test.go b/internal/integration/parity_test.go new file mode 100644 index 0000000..74a3cd8 --- /dev/null +++ b/internal/integration/parity_test.go @@ -0,0 +1,123 @@ +// SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers +// SPDX-License-Identifier: Apache-2.0 + +package integration_test + +import ( + "encoding/json" + "testing" + + "github.com/go-openapi/codescan" + "github.com/go-openapi/codescan/internal/scantest" + "github.com/go-openapi/testify/v2/require" + + oaispec "github.com/go-openapi/spec" +) + +// TestParity is the migration safety net for P5 builder migrations: +// every fixture in parityFixtures is scanned twice — once with the +// legacy regex-based pipeline, once with the v2 grammar-parser + +// bridge-taggers pipeline — and the resulting `*spec.Swagger` +// values are JSON-compared. +// +// Design rationale: spec-level compare measures the user-observable +// contract. See `.claude/plans/p5-builder-migrations.md` §5 for the +// full design discussion (why this over a view-level v1 adapter). +// +// Lifetime: **tactical**, not permanent. At P6 cutover the +// `Options.UseGrammarParser` flag is removed and this file is +// deleted in the same commit (P6.4). With only one path left, the +// test has no dual runs to diff — it becomes pure CI burden. +// +// Adding fixtures: append a new entry to parityFixtures below. +// Ordering doesn't matter — each case is independent and t.Run +// parallelises them. +func TestParity(t *testing.T) { + for _, tc := range parityFixtures { + t.Run(tc.Name, func(t *testing.T) { + t.Parallel() + docV1 := runFixture(t, tc.Opts, false) + docV2 := runFixture(t, tc.Opts, true) + assertSpecsEqual(t, docV1, docV2) + }) + } +} + +// parityFixture names one fixture scan that must produce the same +// spec under both pipeline values. Opts is cloned in runFixture +// before UseGrammarParser is set, so the template stays immutable. +type parityFixture struct { + Name string + Opts codescan.Options +} + +//nolint:gochecknoglobals // migration-scoped test table; removed at P6 cutover alongside the flag. +var parityFixtures = []parityFixture{ + // fixtures/enhancements/ — each entry mirrors a TestCoverage_* test in + // coverage_enhancements_test.go. Entries that intentionally exercise + // error paths (e.g. UnknownAnnotation, malformed/*) are NOT included — + // they don't produce comparable spec output. + {"EmbeddedTypes", codescan.Options{Packages: pkgs("./enhancements/embedded-types/..."), ScanModels: true}}, + {"AllOfEdges", codescan.Options{Packages: pkgs("./enhancements/allof-edges/..."), ScanModels: true}}, + {"StrfmtArrays", codescan.Options{Packages: pkgs("./enhancements/strfmt-arrays/..."), ScanModels: true}}, + {"DefaultsExamples", codescan.Options{Packages: pkgs("./enhancements/defaults-examples/..."), ScanModels: true}}, + {"InterfaceMethods", codescan.Options{Packages: pkgs("./enhancements/interface-methods/..."), ScanModels: true}}, + {"InterfaceMethodsXNullable", codescan.Options{Packages: pkgs("./enhancements/interface-methods/..."), ScanModels: true, SetXNullableForPointers: true}}, + {"AliasExpand", codescan.Options{Packages: pkgs("./enhancements/alias-expand/..."), ScanModels: true}}, + {"AliasRef", codescan.Options{Packages: pkgs("./enhancements/alias-expand/..."), ScanModels: true, RefAliases: true}}, + {"AliasResponseRef", codescan.Options{Packages: pkgs("./enhancements/alias-response/..."), ScanModels: true, RefAliases: true}}, + {"ResponseEdges", codescan.Options{Packages: pkgs("./enhancements/response-edges/..."), ScanModels: true}}, + {"NamedBasic", codescan.Options{Packages: pkgs("./enhancements/named-basic/..."), ScanModels: true}}, + {"SwaggerTypeArray", codescan.Options{Packages: pkgs("./enhancements/swagger-type-array/..."), ScanModels: true}}, + {"RefAliasChain", codescan.Options{Packages: pkgs("./enhancements/ref-alias-chain/..."), ScanModels: true, RefAliases: true}}, + {"EnumDocs", codescan.Options{Packages: pkgs("./enhancements/enum-docs/..."), ScanModels: true}}, + {"EnumOverrides", codescan.Options{Packages: pkgs("./enhancements/enum-overrides/..."), ScanModels: true}}, + {"TextMarshal", codescan.Options{Packages: pkgs("./enhancements/text-marshal/..."), ScanModels: true}}, + {"AllHTTPMethods", codescan.Options{Packages: pkgs("./enhancements/all-http-methods/...")}}, + {"NamedStructTags", codescan.Options{Packages: pkgs("./enhancements/named-struct-tags/..."), ScanModels: true}}, + {"NamedStructTagsRef", codescan.Options{Packages: pkgs("./enhancements/named-struct-tags-ref/..."), ScanModels: true}}, + {"TopLevelKinds", codescan.Options{Packages: pkgs("./enhancements/top-level-kinds/..."), ScanModels: true}}, + // fixtures/goparsing/ + {"Petstore", codescan.Options{Packages: pkgs("./goparsing/petstore/...")}}, + {"Bookings", codescan.Options{Packages: pkgs("./goparsing/bookings/..."), ScanModels: true}}, +} + +// pkgs is a tiny alias for []string — it makes the fixture table +// readable at a glance (the Packages field dominates the line +// otherwise). +func pkgs(p ...string) []string { return p } + +// runFixture scans tc.Opts with UseGrammarParser=useGrammar and +// returns the resulting spec. The template Options is cloned so +// the table stays unmodified; WorkDir is injected once here rather +// than duplicated in every table row. +func runFixture(t *testing.T, template codescan.Options, useGrammar bool) *oaispec.Swagger { + t.Helper() + opts := template // value copy + opts.WorkDir = scantest.FixturesDir() + opts.UseGrammarParser = useGrammar + doc, err := codescan.Run(&opts) + require.NoError(t, err) + require.NotNil(t, doc) + return doc +} + +// assertSpecsEqual marshals both specs to indented JSON and +// diffs as strings. This is stricter than reflect.DeepEqual (it +// catches field-order differences in slices) and produces a +// human-readable diff path on failure. +func assertSpecsEqual(t *testing.T, v1, v2 *oaispec.Swagger) { + t.Helper() + v1JSON, err := json.MarshalIndent(v1, "", " ") + require.NoError(t, err) + v2JSON, err := json.MarshalIndent(v2, "", " ") + require.NoError(t, err) + if string(v1JSON) != string(v2JSON) { + t.Fatalf( + "parity mismatch — v1 (legacy) vs v2 (grammar) differ:\n"+ + "--- v1 (%d bytes) ---\n%s\n"+ + "--- v2 (%d bytes) ---\n%s\n", + len(v1JSON), v1JSON, len(v2JSON), v2JSON, + ) + } +} From 5675a66832f2811b934d9159785983ba81783333 Mon Sep 17 00:00:00 2001 From: Frederic BIDON Date: Tue, 21 Apr 2026 21:57:10 +0200 Subject: [PATCH 29/46] =?UTF-8?q?feat(parsers):=20P5.1=20step=205=20?= =?UTF-8?q?=E2=80=94=20internal/parsers/enum/=20sub-parser?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per W2 §3 decision, introduce the enum sub-parser that the schema / parameters / responses bridge-taggers will call at step 8+ of the P5 migration. Sibling of internal/parsers/yaml/ — imported only by the analyzer layer; the main grammar parser stays oblivious to enum value shape (verified: grammar package does not import enum). API — deliberately minimal: func Parse(raw string) (values []any, fallbackErr error) Shape detection: - Leading `[` (after TrimSpace) -> JSON-array path via encoding/json. Non-scalar values (objects, arrays, null) pass through natively, supporting OAI's any-JSON-type enum semantics (W2 §2.4 / forthcoming §1.3 audit). - Otherwise -> comma-list, each value TrimSpace'd. This **fixes v1's case-B whitespace quirk** (W2 §2.6): where v1 produced `["red", " green", " blue"]`, v2 produces `["red","green","blue"]`. Narrow JSON detection by design: only a leading `[` triggers the JSON path. Inputs like `{"k":"v"}`, bare `null`, `42` go through the comma-list path (matching v1 parity: users who want structured values wrap them in `[...]`). Documented and locked in via TestParseDetectionIsNarrow. Fallback behavior: when input LOOKS like JSON (leading `[`) but fails to parse, Parse falls back to comma-list AND returns a non-nil fallbackErr wrapped with an "enum:" prefix. Bridge-taggers surface this as a SeverityWarning diagnostic rather than aborting, matching v1's forgiving semantics. Empty/whitespace-only input -> (nil, nil). A fenced but empty enum is a no-op. Tests (enum_test.go) exercise: - empty/whitespace handling - comma-list: basic, whitespace-trimmed (case-B fix), tab-separated, single value, dropped-empty-entries - JSON array: strings, numbers (float64 per JSON rules), mixed types incl. objects / arrays / null, commas-inside-strings (which comma-list can't do), leading-whitespace tolerance - Fallback path: malformed JSON retains error, error prefix - Narrow-detection contract: {"k":"v"} / null / 42 stay comma-list single-values Exported sentinel ErrEmptyOrNullArray satisfies err113 for an ambiguous-shape JSON result the caller may want to log. Type coercion (e.g., "42" -> int64(42) when field type is int) is deliberately NOT in the sub-parser — that's bridge-tagger territory (W2 §3 / P5.1 plan doc §6). The sub-parser returns JSON-inferred types; the bridge-tagger coerces using go/types info at the call site. Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Frederic BIDON --- internal/parsers/enum/enum.go | 113 +++++++++++++++ internal/parsers/enum/enum_test.go | 217 +++++++++++++++++++++++++++++ 2 files changed, 330 insertions(+) create mode 100644 internal/parsers/enum/enum.go create mode 100644 internal/parsers/enum/enum_test.go diff --git a/internal/parsers/enum/enum.go b/internal/parsers/enum/enum.go new file mode 100644 index 0000000..619a2c8 --- /dev/null +++ b/internal/parsers/enum/enum.go @@ -0,0 +1,113 @@ +// SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers +// SPDX-License-Identifier: Apache-2.0 + +// Package enum is a thin sub-parser for the two surface forms of the +// `enum:` keyword — a comma-separated list and a JSON array — used +// by the v2 grammar parser's schema/parameters/responses bridge- +// taggers at P5. +// +// Like `internal/parsers/yaml/`, this subpackage follows the +// sub-parser pattern from architecture §3.3: imported only by the +// analyzer layer (bridge-taggers), never by +// `internal/parsers/grammar/`. The main grammar parser captures the +// raw enum value as `Property.Value` verbatim; the bridge-tagger +// hands that raw string to enum.Parse. +// +// See `.claude/plans/workshops/w2-enum.md` §2 for the decisions +// this package implements, and `.claude/plans/p5-builder-migrations.md` +// §6 for the API rationale. +package enum + +import ( + "encoding/json" + "errors" + "fmt" + "strings" +) + +// ErrEmptyOrNullArray is returned by the JSON path when the input +// parses as null or an empty array — ambiguous input the caller +// may want to surface as a warning. +var ErrEmptyOrNullArray = errors.New("JSON array is empty or null") + +// Parse detects the surface form of raw and returns the parsed +// []any values. Shape detection: +// +// - Leading `[` (after leading whitespace) → JSON-array path +// (encoding/json). Values come out with JSON-inferred types: +// string stays string, numbers become float64, booleans stay +// bool, nulls become nil, objects become map[string]any, +// arrays become []any. +// +// - Otherwise → comma-list, each value TrimSpace'd so +// `"red, green, blue"` produces `["red","green","blue"]` +// (not `["red"," green"," blue"]` — this fixes v1's case-B +// whitespace quirk per W2 §2.6). +// +// Empty/whitespace-only input returns (nil, nil): a fenced but +// empty enum is a no-op, not an error. +// +// On malformed JSON input, Parse falls back to the comma-list +// path and returns a non-nil fallbackErr describing the JSON +// issue. The returned values slice is still populated from the +// fallback. Callers typically surface the incident as a +// SeverityWarning diagnostic via the bridge-tagger rather than +// aborting the parse. This matches v1's forgiving behavior. +// +// Non-scalar enum values (objects, arrays, null) are supported +// natively through the JSON path — the returned slice carries +// whatever JSON produced. The JSONSchema / OpenAPI `enum` +// property accepts any value type; emission policy is the +// bridge-tagger's concern. +func Parse(raw string) (values []any, fallbackErr error) { + trimmed := strings.TrimSpace(raw) + if trimmed == "" { + return nil, nil + } + if strings.HasPrefix(trimmed, "[") { + values, err := parseJSON(trimmed) + if err == nil { + return values, nil + } + // JSON-looking input that failed to parse — fall back to + // comma-list per v1 parity. Return the JSON error so the + // caller can surface it as a warning. + return parseCommaList(raw), fmt.Errorf("enum: %w", err) + } + return parseCommaList(raw), nil +} + +// parseJSON unmarshals a JSON array into []any. Rejects top-level +// non-array JSON (e.g., a bare scalar, object) with an explicit +// error so the fallback path doesn't silently eat structured +// input the caller meant as an array. +func parseJSON(s string) ([]any, error) { + var result []any + if err := json.Unmarshal([]byte(s), &result); err != nil { + return nil, err + } + if result == nil { + return nil, ErrEmptyOrNullArray + } + return result, nil +} + +// parseCommaList splits s on `,`, trims whitespace per-value, +// and drops empty entries. The trimming is intentional and +// deliberately diverges from v1's behavior of preserving leading +// whitespace in each split segment (W2 §2.6). +func parseCommaList(s string) []any { + parts := strings.Split(s, ",") + out := make([]any, 0, len(parts)) + for _, p := range parts { + trimmed := strings.TrimSpace(p) + if trimmed == "" { + continue + } + out = append(out, trimmed) + } + if len(out) == 0 { + return nil + } + return out +} diff --git a/internal/parsers/enum/enum_test.go b/internal/parsers/enum/enum_test.go new file mode 100644 index 0000000..32a1485 --- /dev/null +++ b/internal/parsers/enum/enum_test.go @@ -0,0 +1,217 @@ +// SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers +// SPDX-License-Identifier: Apache-2.0 + +package enum_test + +import ( + "reflect" + "strings" + "testing" + + "github.com/go-openapi/codescan/internal/parsers/enum" +) + +func TestParseEmpty(t *testing.T) { + cases := []string{"", " ", "\t\n "} + for _, in := range cases { + v, err := enum.Parse(in) + if err != nil { + t.Errorf("Parse(%q) returned err: %v", in, err) + } + if v != nil { + t.Errorf("Parse(%q): want nil, got %v", in, v) + } + } +} + +// --- comma-list path --- + +func TestParseCommaListBasic(t *testing.T) { + v, err := enum.Parse("red,green,blue") + if err != nil { + t.Fatalf("err: %v", err) + } + want := []any{"red", "green", "blue"} + if !reflect.DeepEqual(v, want) { + t.Errorf("got %v want %v", v, want) + } +} + +// TestParseCommaListTrimsWhitespace is the case-B fix from W2 §2.6: +// v1 preserves literal leading whitespace in each split segment, +// producing `["red", " green", " blue"]`. v2 diverges and trims. +func TestParseCommaListTrimsWhitespace(t *testing.T) { + v, err := enum.Parse("red, green, blue") + if err != nil { + t.Fatalf("err: %v", err) + } + want := []any{"red", "green", "blue"} + if !reflect.DeepEqual(v, want) { + t.Errorf("got %v want %v (case-B whitespace fix)", v, want) + } +} + +func TestParseCommaListWithTabs(t *testing.T) { + v, err := enum.Parse("\tred\t,\tgreen\t,\tblue\t") + if err != nil { + t.Fatalf("err: %v", err) + } + want := []any{"red", "green", "blue"} + if !reflect.DeepEqual(v, want) { + t.Errorf("got %v want %v", v, want) + } +} + +func TestParseCommaListDropsEmptyEntries(t *testing.T) { + // Trailing comma or ",," shouldn't produce empty-string values. + v, err := enum.Parse("a, ,b,") + if err != nil { + t.Fatalf("err: %v", err) + } + want := []any{"a", "b"} + if !reflect.DeepEqual(v, want) { + t.Errorf("got %v want %v", v, want) + } +} + +func TestParseCommaListSingleValue(t *testing.T) { + v, err := enum.Parse("solo") + if err != nil { + t.Fatalf("err: %v", err) + } + want := []any{"solo"} + if !reflect.DeepEqual(v, want) { + t.Errorf("got %v want %v", v, want) + } +} + +// --- JSON-array path --- + +func TestParseJSONArrayStrings(t *testing.T) { + v, err := enum.Parse(`["red","green","blue"]`) + if err != nil { + t.Fatalf("err: %v", err) + } + want := []any{"red", "green", "blue"} + if !reflect.DeepEqual(v, want) { + t.Errorf("got %v want %v", v, want) + } +} + +func TestParseJSONArrayNumbers(t *testing.T) { + // JSON numbers unmarshal as float64 in Go's default json. + v, err := enum.Parse(`[1, 2, 3.5]`) + if err != nil { + t.Fatalf("err: %v", err) + } + want := []any{float64(1), float64(2), 3.5} + if !reflect.DeepEqual(v, want) { + t.Errorf("got %v want %v", v, want) + } +} + +func TestParseJSONArrayMixedTypes(t *testing.T) { + // Objects, arrays, null are all legal enum values per OpenAPI. + // Survive through the JSON path. + v, err := enum.Parse(`["s", 42, true, null, {"k":"v"}, [1,2]]`) + if err != nil { + t.Fatalf("err: %v", err) + } + if len(v) != 6 { + t.Fatalf("want 6 elements, got %d: %v", len(v), v) + } + if v[0] != "s" { + t.Errorf("v[0]: got %v want s", v[0]) + } + if v[1] != float64(42) { + t.Errorf("v[1]: got %v want 42", v[1]) + } + if v[2] != true { + t.Errorf("v[2]: got %v want true", v[2]) + } + if v[3] != nil { + t.Errorf("v[3]: got %v want nil", v[3]) + } + if _, ok := v[4].(map[string]any); !ok { + t.Errorf("v[4]: want map, got %T", v[4]) + } + if _, ok := v[5].([]any); !ok { + t.Errorf("v[5]: want []any, got %T", v[5]) + } +} + +func TestParseJSONArrayWithCommasInStrings(t *testing.T) { + // The JSON path handles commas inside string values correctly + // — something the comma-list path can never do. + v, err := enum.Parse(`["a,b","c,d","e"]`) + if err != nil { + t.Fatalf("err: %v", err) + } + want := []any{"a,b", "c,d", "e"} + if !reflect.DeepEqual(v, want) { + t.Errorf("got %v want %v", v, want) + } +} + +func TestParseJSONArrayLeadingWhitespace(t *testing.T) { + // Detection looks past leading whitespace. + v, err := enum.Parse(` ["a","b"]`) + if err != nil { + t.Fatalf("err: %v", err) + } + if len(v) != 2 { + t.Errorf("want 2 values, got %d", len(v)) + } +} + +// --- fallback: malformed JSON falls back to comma-list --- + +func TestParseMalformedJSONFallsBack(t *testing.T) { + // Input looks like JSON (starts with `[`) but is malformed. + // Parse must fall back to comma-list AND return a non-nil + // fallbackErr so the caller can surface a warning. + v, err := enum.Parse(`[unclosed`) + if err == nil { + t.Fatal("want non-nil fallback err for malformed JSON") + } + // Fallback produced something: the raw string as-is treated as + // a single comma-list value. + if len(v) == 0 { + t.Errorf("fallback values must be non-empty, got %v", v) + } + if !strings.HasPrefix(err.Error(), "enum:") { + t.Errorf("error should carry the 'enum:' prefix for wrapping: %v", err) + } +} + +// TestParseDetectionIsNarrow documents the deliberate design: only +// a leading `[` triggers the JSON path. Non-bracket input is always +// comma-list, even if it happens to be valid JSON of another shape +// (object, scalar, null). This matches v1's narrow detection. A +// user writing `enum: {"k":"v"}` gets a single-value comma-list +// whose value is literally `{"k":"v"}` — they should quote-wrap +// with a JSON array (`[{"k":"v"}]`) if they want structured values. +func TestParseDetectionIsNarrow(t *testing.T) { + cases := []struct { + in string + want []any + }{ + // JSON-object-looking input: comma-list single value. + {`{"k":"v"}`, []any{`{"k":"v"}`}}, + // JSON null-looking input: comma-list single value "null". + {`null`, []any{"null"}}, + // Bare number: comma-list single value "42". + {`42`, []any{"42"}}, + } + for _, tc := range cases { + t.Run(tc.in, func(t *testing.T) { + v, err := enum.Parse(tc.in) + if err != nil { + t.Errorf("Parse(%q) returned err: %v (should be nil)", tc.in, err) + } + if !reflect.DeepEqual(v, tc.want) { + t.Errorf("Parse(%q) = %v, want %v", tc.in, v, tc.want) + } + }) + } +} From 48ce7539c3ffdad539d8099d9c48ff5cdc2a35b5 Mon Sep 17 00:00:00 2001 From: Frederic BIDON Date: Wed, 22 Apr 2026 09:18:07 +0200 Subject: [PATCH 30/46] fix(parsers): Q1 v1 enum comma-list whitespace trim MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit First of four pre-P5 v1 quirk fixes (W2 §2.6). Current v1 behavior splits `enum: low, medium, high` on comma and preserves the leading space on each subsequent value, emitting `["low", " medium", " high"]`. TestCoverage_EnumOverrides captured this in case B (plain string field with inline comma-list) and case E (inline override with leading space on "normal"). Fix: in parsers.parseEnumOld, TrimSpace each value after splitting on comma. The JSON-array path in ParseEnum (strconv.Unquote on json.RawMessage) already preserves values verbatim — intentional whitespace inside quoted JSON strings survives, whitespace outside quotes is stripped by the JSON parser itself. So the two paths diverge in the "right" way now: comma-list is friendly (trimmed), JSON is literal (whole-string fidelity). Golden update affects two fields in enhancements_enum_overrides.json: - NotificationB.properties.priority.enum: "medium","high" lose leading spaces. - NotificationE.properties.priority.enum: "normal" loses its leading space. NotificationA (const-derived), NotificationC (JSON array), and NotificationD (no matching consts, no enum rendered) are unaffected. Matches v2's internal/parsers/enum/ behavior from commit 5675a66. Once the P5 schema migration lands and the bridge-tagger routes through the sub-parser, this fix keeps both paths converged. Q2/Q3/Q4 to follow. Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Frederic BIDON --- .../integration/golden/enhancements_enum_overrides.json | 6 +++--- internal/parsers/enum.go | 8 ++++++++ 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/fixtures/integration/golden/enhancements_enum_overrides.json b/fixtures/integration/golden/enhancements_enum_overrides.json index d185590..a075820 100644 --- a/fixtures/integration/golden/enhancements_enum_overrides.json +++ b/fixtures/integration/golden/enhancements_enum_overrides.json @@ -37,8 +37,8 @@ "type": "string", "enum": [ "low", - " medium", - " high" + "medium", + "high" ], "x-go-name": "Priority" } @@ -82,7 +82,7 @@ "type": "string", "enum": [ "urgent", - " normal" + "normal" ], "x-go-enum-desc": "low PriorityELow\nmedium PriorityEMed\nhigh PriorityEHigh", "x-go-name": "Priority" diff --git a/internal/parsers/enum.go b/internal/parsers/enum.go index 0b4e62f..e66ebcb 100644 --- a/internal/parsers/enum.go +++ b/internal/parsers/enum.go @@ -79,9 +79,17 @@ func parseValueFromSchema(s string, schema *spec.SimpleSchema) (any, error) { } func parseEnumOld(val string, s *spec.SimpleSchema) []any { + // Trim per-value whitespace so `enum: low, medium, high` produces + // ["low", "medium", "high"] rather than the v1-legacy + // ["low", " medium", " high"] — a long-standing quirk documented + // as W2 §2.6 quirk 1 and fixed here to converge with the v2 + // enum sub-parser's behavior before the P5 schema migration. + // JSON-array values go through ParseEnum's quoted path (below) + // which preserves intentional whitespace inside quotes. list := strings.Split(val, ",") interfaceSlice := make([]any, len(list)) for i, d := range list { + d = strings.TrimSpace(d) v, err := parseValueFromSchema(d, s) if err != nil { interfaceSlice[i] = d From c2c03f3394d821ad7ac78298a8ce1b8da2368393 Mon Sep 17 00:00:00 2001 From: Frederic BIDON Date: Wed, 22 Apr 2026 09:23:17 +0200 Subject: [PATCH 31/46] fix(schema): Q2 warn on swagger:enum with no matching consts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Second of four pre-P5 v1 quirk fixes (W2 §2.6). Previously, `swagger:enum TypeName` on a type with no matching const values in the package was silently dropped — schema.go hit the `if len(enumValues) > 0` branch guard, set neither Enum nor EnumDescription, and returned nil. The resulting schema had no type, no enum, effectively invisible in the spec (case D in the enum-overrides golden). Fix: when FindEnumValues returns zero values: - Emit a WARNING via log.Printf naming the unresolvable enum TypeName (consistent with other WARNINGs in this file). - Drop the enum semantics (no WithEnum, no WithEnumDescription). - Do NOT return nil — fall through so the rest of the type- resolution engine can decide what to do with the underlying Go type (model, alias, strfmt, …). Per Fred: "leave the engine [to] decide". Golden delta for enhancements_enum_overrides.json case D: - NotificationD.properties.priority: was a type-less blank schema with just a description; now correctly $refs "#/definitions/PriorityD". - New PriorityD definition emitted: {type: string, x-go-package: …}. The named string type is now properly resolved through the engine's model-lookup path. Cases A / B / C / E unchanged. Next: Q3 (clear stale x-go-enum-desc when inline override wins). Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Frederic BIDON --- .../golden/enhancements_enum_overrides.json | 7 +++++-- internal/builders/schema/schema.go | 16 ++++++++++++---- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/fixtures/integration/golden/enhancements_enum_overrides.json b/fixtures/integration/golden/enhancements_enum_overrides.json index a075820..3d643ca 100644 --- a/fixtures/integration/golden/enhancements_enum_overrides.json +++ b/fixtures/integration/golden/enhancements_enum_overrides.json @@ -67,8 +67,7 @@ "title": "NotificationD exercises case D.", "properties": { "priority": { - "description": "The priority level.", - "x-go-name": "Priority" + "$ref": "#/definitions/PriorityD" } }, "x-go-package": "github.com/go-openapi/codescan/fixtures/enhancements/enum-overrides" @@ -89,6 +88,10 @@ } }, "x-go-package": "github.com/go-openapi/codescan/fixtures/enhancements/enum-overrides" + }, + "PriorityD": { + "type": "string", + "x-go-package": "github.com/go-openapi/codescan/fixtures/enhancements/enum-overrides" } } } \ No newline at end of file diff --git a/internal/builders/schema/schema.go b/internal/builders/schema/schema.go index 0a5002d..23c72ab 100644 --- a/internal/builders/schema/schema.go +++ b/internal/builders/schema/schema.go @@ -454,13 +454,21 @@ func (s *Builder) buildNamedBasic(tio *types.TypeName, pkg *packages.Package, cm tgt.WithEnum(enumValues...) enumTypeName := reflect.TypeOf(enumValues[0]).String() _ = resolvers.SwaggerSchemaForType(enumTypeName, tgt) - } - if len(enumDesces) > 0 { - tgt.WithEnumDescription(strings.Join(enumDesces, "\n")) + if len(enumDesces) > 0 { + tgt.WithEnumDescription(strings.Join(enumDesces, "\n")) + } + + return nil } - return nil + // Q2: swagger:enum with no matching const values. Previously + // we silently returned here and the resulting schema had no + // type or enum (a confusing invisible failure). Now we warn + // and fall through so the type-resolution engine can still + // decide what to do with the underlying Go type (it may be + // a model, an alias, a strfmt, …). + log.Printf("WARNING: swagger:enum %s: no matching const values found; dropping enum semantics", enumName) } if defaultName, ok := parsers.DefaultName(cmt); ok { From 7a00f996f9a67732765d202adac7a8f15697c5dd Mon Sep 17 00:00:00 2001 From: Frederic BIDON Date: Wed, 22 Apr 2026 09:25:45 +0200 Subject: [PATCH 32/46] fix(schema): Q3 clear stale x-go-enum-desc on inline enum override MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Third of four pre-P5 v1 quirk fixes (W2 §2.6). When both forms coexist — `swagger:enum TypeName` on the type (sets Enum from const values + x-go-enum-desc extension + appends the description) AND a field-level `enum: ...` override — the field-level inline values correctly win on the Enum field itself (case E golden), but the const-derived x-go-enum-desc extension and its appended copy in Description stay stale. The rendered spec advertises descriptions for values that aren't in the enum any more. Fix: in schemaValidations.SetEnum — the field-level override path — if x-go-enum-desc is present (set by the type-level WithEnumDescription call earlier in the pipeline), delete it from Extensions AND strip the now-stale \n suffix from Description. Golden delta for enhancements_enum_overrides.json case E: - NotificationE.properties.priority.description: loses the "\nlow PriorityELow\n..." const-desc suffix. - NotificationE.properties.priority.x-go-enum-desc: removed. - Enum itself unchanged (["urgent","normal"] — inline). Cases A (const only, no override) and D (no consts) unaffected — their SetEnum is never called, so the new cleanup never runs. Next: Q4 (multi-line block body as YAML list — largest of the four; first real consumer of internal/parsers/yaml/). Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Frederic BIDON --- .../golden/enhancements_enum_overrides.json | 3 +-- internal/builders/schema/typable.go | 17 +++++++++++++++++ 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/fixtures/integration/golden/enhancements_enum_overrides.json b/fixtures/integration/golden/enhancements_enum_overrides.json index 3d643ca..56be2d4 100644 --- a/fixtures/integration/golden/enhancements_enum_overrides.json +++ b/fixtures/integration/golden/enhancements_enum_overrides.json @@ -77,13 +77,12 @@ "type": "object", "properties": { "priority": { - "description": "Inline enum provides a narrower set than the const block.\nlow PriorityELow\nmedium PriorityEMed\nhigh PriorityEHigh", + "description": "Inline enum provides a narrower set than the const block.", "type": "string", "enum": [ "urgent", "normal" ], - "x-go-enum-desc": "low PriorityELow\nmedium PriorityEMed\nhigh PriorityEHigh", "x-go-name": "Priority" } }, diff --git a/internal/builders/schema/typable.go b/internal/builders/schema/typable.go index ab49562..f7d4eaa 100644 --- a/internal/builders/schema/typable.go +++ b/internal/builders/schema/typable.go @@ -4,6 +4,8 @@ package schema import ( + "strings" + "github.com/go-openapi/codescan/internal/builders/resolvers" "github.com/go-openapi/codescan/internal/ifaces" "github.com/go-openapi/codescan/internal/parsers" @@ -129,4 +131,19 @@ func (sv schemaValidations) SetEnum(val string) { typ = sv.current.Type[0] } sv.current.Enum = parsers.ParseEnum(val, &oaispec.SimpleSchema{Format: sv.current.Format, Type: typ}) + + // Q3: a field-level `enum: ...` overrides const-derived values. + // When the enum is replaced, any x-go-enum-desc previously set by + // the type-level `swagger:enum TypeName` pass is now stale — it + // describes values that aren't in the enum any more. The + // description text may also have had the enum-desc appended to it + // (see schema.go's WithSetDescription callback); strip that + // suffix so the rendered description isn't misleading. + if enumDesc := parsers.GetEnumDesc(sv.current.Extensions); enumDesc != "" { + delete(sv.current.Extensions, parsers.EnumDescExtension()) + sv.current.Description = strings.TrimSuffix( + strings.TrimSuffix(sv.current.Description, enumDesc), + "\n", + ) + } } From e0078d603de6376afaf85324e147f8d16d89604b Mon Sep 17 00:00:00 2001 From: Frederic BIDON Date: Wed, 22 Apr 2026 09:48:11 +0200 Subject: [PATCH 33/46] fix(parsers): Q4 multi-line block bodies via YAML-list sub-parser MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fourth and last pre-P5 v1 quirk fix (W2 §2.6). v1 previously treated multi-line block bodies — `consumes:`, `produces:` under swagger:meta and swagger:route/operation — as "strip everything leading, one value per non-empty line". This worked by ACCIDENT because rxUncommentHeaders ate the `- ` list marker along with the `//` comment prefix. Users happily wrote YAML-list syntax and got YAML-list semantics for free. The v2 preprocessor preserves `-` (needed for `---` fence detection, P1.10), so the accidental behavior breaks. Per Fred's design call, the RIGHT behavior is to capture the body raw and interpret it as a YAML list explicitly — no accidents, strict contract. Implementation: - New multilineYAMLListParser (parsers.go). Its Parse method: 1. Strip the comment prefix via new rxUncommentNoDash regex (rxUncommentHeaders minus the dash) to remove `//`, `*`, whitespace while preserving `- `. 2. Drop empty lines. 3. Join with \n and hand to internal/parsers/yaml/ (the P2.5 sub-parser, finally getting its first consumer). 4. Strict list: non-list result emits a `parse.invalid-block-body` warning and produces NO values (setter not called). - ConsumesDropEmptyParser / ProducesDropEmptyParser now wrap *multilineYAMLListParser instead of *multilineDropEmptyParser. Public API unchanged. - Tagger registrations (meta.go, routes/taggers.go) flip skipCleanUp=true so the external cleanupScannerLines with rxUncommentHeaders doesn't pre-strip the `-` markers before our YAML-aware Parse runs. - TOS and Security remain on their existing parser types (TOS is a joined text block, not a list; Security has its own scheme-parsing logic). Fixture updates — bare-format → YAML-list form: - fixtures/goparsing/spec/api.go — operation Consumes/Produces - fixtures/goparsing/bookings/api.go — operation Consumes/Produces - fixtures/goparsing/classification/operations/todo_operation.go — 52 bare-format lines across multiple operations - fixtures/goparsing/classification/operations_body/todo_operation_body.go — same Unit tests (parsers_test.go): - TestConsumesDropEmptyParser and TestProducesDropEmptyParser switched input to YAML-list form (with `- ` markers). - New TestMultilineYAMLListParserNonListDropsValues verifying the strict-list contract — scalar input produces a warning and no setter call. Integration: all 21 parity fixtures green, all TestCoverage_* green, all TestGoSwagger_GenerateJSONSpec_* green, all TestAppScanner_* green, TestRoutesParser + TestRoutesParserBody green. Completes the four pre-P5 quirk fixes (Q1 48ce753, Q2 c2c03f3, Q3 7a00f99, Q4 this commit). v1 and v2 now converge on the same target spec. P5 migration (step 6+) may begin. Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Frederic BIDON --- fixtures/goparsing/bookings/api.go | 4 +- .../operations/todo_operation.go | 48 +++++++------- .../operations_body/todo_operation_body.go | 56 ++++++++-------- fixtures/goparsing/spec/api.go | 4 +- internal/builders/routes/taggers.go | 6 +- internal/parsers/meta.go | 7 +- internal/parsers/parsers.go | 65 +++++++++++++++++-- internal/parsers/parsers_test.go | 18 ++++- internal/parsers/regexprs.go | 10 ++- 9 files changed, 150 insertions(+), 68 deletions(-) diff --git a/fixtures/goparsing/bookings/api.go b/fixtures/goparsing/bookings/api.go index 8d1522e..0dec00a 100644 --- a/fixtures/goparsing/bookings/api.go +++ b/fixtures/goparsing/bookings/api.go @@ -64,12 +64,12 @@ type BookingResponse struct { // Bookings lists all the appointments that have been made on the site. // // Consumes: -// application/json +// - application/json // // Schemes: http, https // // Produces: -// application/json +// - application/json // // Responses: // 200: BookingResponse diff --git a/fixtures/goparsing/classification/operations/todo_operation.go b/fixtures/goparsing/classification/operations/todo_operation.go index a12c6b5..0b4af29 100644 --- a/fixtures/goparsing/classification/operations/todo_operation.go +++ b/fixtures/goparsing/classification/operations/todo_operation.go @@ -30,12 +30,12 @@ func ServeAPI(host, basePath string, schemes []string) error { // You can get the pets that are out of stock // // Consumes: - // application/json - // application/x-protobuf + // - application/json + // - application/x-protobuf // // Produces: - // application/json - // application/x-protobuf + // - application/json + // - application/x-protobuf // // Schemes: http, https, ws, wss // @@ -81,12 +81,12 @@ func ServeAPI(host, basePath string, schemes []string) error { // lists orders filtered by some parameters. // // Consumes: - // application/json - // application/x-protobuf + // - application/json + // - application/x-protobuf // // Produces: - // application/json - // application/x-protobuf + // - application/json + // - application/x-protobuf // // Schemes: http, https, ws, wss // @@ -119,12 +119,12 @@ func ServeAPI(host, basePath string, schemes []string) error { // create an order based on the parameters. // // Consumes: - // application/json - // application/x-protobuf + // - application/json + // - application/x-protobuf // // Produces: - // application/json - // application/x-protobuf + // - application/json + // - application/x-protobuf // // Schemes: http, https, ws, wss // @@ -143,12 +143,12 @@ func ServeAPI(host, basePath string, schemes []string) error { // gets the details for an order. // // Consumes: - // application/json - // application/x-protobuf + // - application/json + // - application/x-protobuf // // Produces: - // application/json - // application/x-protobuf + // - application/json + // - application/x-protobuf // // Schemes: http, https, ws, wss // @@ -170,12 +170,12 @@ func ServeAPI(host, basePath string, schemes []string) error { // When the order doesn't exist this will return an error. // // Consumes: - // application/json - // application/x-protobuf + // - application/json + // - application/x-protobuf // // Produces: - // application/json - // application/x-protobuf + // - application/json + // - application/x-protobuf // // Schemes: http, https, ws, wss // @@ -194,12 +194,12 @@ func ServeAPI(host, basePath string, schemes []string) error { // delete a particular order. // // Consumes: - // application/json - // application/x-protobuf + // - application/json + // - application/x-protobuf // // Produces: - // application/json - // application/x-protobuf + // - application/json + // - application/x-protobuf // // Schemes: http, https, ws, wss // diff --git a/fixtures/goparsing/classification/operations_body/todo_operation_body.go b/fixtures/goparsing/classification/operations_body/todo_operation_body.go index beb9139..a407201 100644 --- a/fixtures/goparsing/classification/operations_body/todo_operation_body.go +++ b/fixtures/goparsing/classification/operations_body/todo_operation_body.go @@ -30,12 +30,12 @@ func ServeAPI(host, basePath string, schemes []string) error { // You can get the pets that are out of stock // // Consumes: - // application/json - // application/x-protobuf + // - application/json + // - application/x-protobuf // // Produces: - // application/json - // application/x-protobuf + // - application/json + // - application/x-protobuf // // Schemes: http, https, ws, wss // @@ -93,12 +93,12 @@ func ServeAPI(host, basePath string, schemes []string) error { // lists orders filtered by some parameters. // // Consumes: - // application/json - // application/x-protobuf + // - application/json + // - application/x-protobuf // // Produces: - // application/json - // application/x-protobuf + // - application/json + // - application/x-protobuf // // Schemes: http, https, ws, wss // @@ -133,12 +133,12 @@ func ServeAPI(host, basePath string, schemes []string) error { // create an order based on the parameters. // // Consumes: - // application/json - // application/x-protobuf + // - application/json + // - application/x-protobuf // // Produces: - // application/json - // application/x-protobuf + // - application/json + // - application/x-protobuf // // Schemes: http, https, ws, wss // @@ -169,12 +169,12 @@ func ServeAPI(host, basePath string, schemes []string) error { // gets the details for an order. // // Consumes: - // application/json - // application/x-protobuf + // - application/json + // - application/x-protobuf // // Produces: - // application/json - // application/x-protobuf + // - application/json + // - application/x-protobuf // // Schemes: http, https, ws, wss // @@ -195,12 +195,12 @@ func ServeAPI(host, basePath string, schemes []string) error { // When the order doesn't exist this will return an error. // // Consumes: - // application/json - // application/x-protobuf + // - application/json + // - application/x-protobuf // // Produces: - // application/json - // application/x-protobuf + // - application/json + // - application/x-protobuf // // Schemes: http, https, ws, wss // @@ -219,12 +219,12 @@ func ServeAPI(host, basePath string, schemes []string) error { // delete a particular order. // // Consumes: - // application/json - // application/x-protobuf + // - application/json + // - application/x-protobuf // // Produces: - // application/json - // application/x-protobuf + // - application/json + // - application/x-protobuf // // Schemes: http, https, ws, wss // @@ -243,12 +243,12 @@ func ServeAPI(host, basePath string, schemes []string) error { // Allow some params with constraints. // // Consumes: - // application/json - // application/x-protobuf + // - application/json + // - application/x-protobuf // // Produces: - // application/json - // application/x-protobuf + // - application/json + // - application/x-protobuf // // Schemes: http, https, ws, wss // diff --git a/fixtures/goparsing/spec/api.go b/fixtures/goparsing/spec/api.go index 5234744..abfe8ff 100644 --- a/fixtures/goparsing/spec/api.go +++ b/fixtures/goparsing/spec/api.go @@ -70,14 +70,14 @@ type BookingResponse struct { // Bookings lists all the appointments that have been made on the site. // // Consumes: -// application/json +// - application/json // // Deprecated: true // // Schemes: http, https // // Produces: -// application/json +// - application/json // // Responses: // 200: BookingResponse diff --git a/internal/builders/routes/taggers.go b/internal/builders/routes/taggers.go index 440f717..8dd57c4 100644 --- a/internal/builders/routes/taggers.go +++ b/internal/builders/routes/taggers.go @@ -10,8 +10,10 @@ import ( func (r *Builder) routeTaggers(op *oaispec.Operation) []parsers.TagParser { return []parsers.TagParser{ - parsers.NewMultiLineTagParser("Consumes", parsers.NewConsumesDropEmptyParser(opConsumesSetter(op)), false), - parsers.NewMultiLineTagParser("Produces", parsers.NewProducesDropEmptyParser(opProducesSetter(op)), false), + // Q4: YAML-list bodies; skip external rxUncommentHeaders strip so + // `-` markers reach the YAML sub-parser intact. + parsers.NewMultiLineTagParser("Consumes", parsers.NewConsumesDropEmptyParser(opConsumesSetter(op)), true), + parsers.NewMultiLineTagParser("Produces", parsers.NewProducesDropEmptyParser(opProducesSetter(op)), true), parsers.NewSingleLineTagParser("Schemes", parsers.NewSetSchemes(opSchemeSetter(op))), parsers.NewMultiLineTagParser("Security", parsers.NewSetSecurityScheme(opSecurityDefsSetter(op)), false), parsers.NewMultiLineTagParser("Parameters", parsers.NewSetParams(r.parameters, opParamSetter(op)), false), diff --git a/internal/parsers/meta.go b/internal/parsers/meta.go index 2d357ca..06d9dd8 100644 --- a/internal/parsers/meta.go +++ b/internal/parsers/meta.go @@ -102,8 +102,11 @@ func NewMetaParser(swspec *spec.Swagger) *SectionedParser { sp.setDescription = func(lines []string) { info.Description = JoinDropLast(lines) } sp.taggers = []TagParser{ NewMultiLineTagParser("TOS", newMultilineDropEmptyParser(rxTOS, metaTOSSetter(info)), false), - NewMultiLineTagParser("Consumes", newMultilineDropEmptyParser(rxConsumes, metaConsumesSetter(swspec)), false), - NewMultiLineTagParser("Produces", newMultilineDropEmptyParser(rxProduces, metaProducesSetter(swspec)), false), + // Q4: Consumes/Produces bodies are YAML lists; skipCleanUp=true + // so the external rxUncommentHeaders pass doesn't strip the + // `-` list markers before our YAML-aware Parse sees them. + NewMultiLineTagParser("Consumes", NewConsumesDropEmptyParser(metaConsumesSetter(swspec)), true), + NewMultiLineTagParser("Produces", NewProducesDropEmptyParser(metaProducesSetter(swspec)), true), NewSingleLineTagParser("Schemes", NewSetSchemes(metaSchemeSetter(swspec))), NewMultiLineTagParser("Security", newSetSecurity(rxSecuritySchemes, metaSecuritySetter(swspec)), false), NewMultiLineTagParser("SecurityDefinitions", NewYAMLParser(WithMatcher(rxSecurity), WithSetter(metaSecurityDefinitionsSetter(swspec))), true), diff --git a/internal/parsers/parsers.go b/internal/parsers/parsers.go index b7708a5..b4bd1f0 100644 --- a/internal/parsers/parsers.go +++ b/internal/parsers/parsers.go @@ -4,9 +4,13 @@ package parsers import ( + "fmt" + "log" "regexp" "strconv" + "strings" + "github.com/go-openapi/codescan/internal/parsers/yaml" oaispec "github.com/go-openapi/spec" ) @@ -94,12 +98,12 @@ func (su *SetDeprecatedOp) Parse(lines []string) error { } type ConsumesDropEmptyParser struct { - *multilineDropEmptyParser + *multilineYAMLListParser } func NewConsumesDropEmptyParser(set func([]string)) *ConsumesDropEmptyParser { return &ConsumesDropEmptyParser{ - multilineDropEmptyParser: &multilineDropEmptyParser{ + multilineYAMLListParser: &multilineYAMLListParser{ set: set, rx: rxConsumes, }, @@ -107,12 +111,12 @@ func NewConsumesDropEmptyParser(set func([]string)) *ConsumesDropEmptyParser { } type ProducesDropEmptyParser struct { - *multilineDropEmptyParser + *multilineYAMLListParser } func NewProducesDropEmptyParser(set func([]string)) *ProducesDropEmptyParser { return &ProducesDropEmptyParser{ - multilineDropEmptyParser: &multilineDropEmptyParser{ + multilineYAMLListParser: &multilineYAMLListParser{ set: set, rx: rxProduces, }, @@ -140,3 +144,56 @@ func (m *multilineDropEmptyParser) Parse(lines []string) error { return nil } + +// multilineYAMLListParser is the Q4 replacement for +// multilineDropEmptyParser on list-valued block bodies +// (`consumes:` / `produces:` in meta + operation scope). The +// body is captured raw — its list-item markers (`- value`) +// survive the preprocessor — and interpreted by +// internal/parsers/yaml/ as a YAML list. Strict list: non-list +// bodies emit a warning and produce no values. +// +// See `.claude/plans/workshops/w2-enum.md` §2.6 (quirk 4), +// `grammar-parser-architecture.md` §3.3 (sub-parser pattern), +// and `.claude/plans/forthcoming-features.md` §5.2 (P7.7 doc +// follow-up). +type multilineYAMLListParser struct { + set func([]string) + rx *regexp.Regexp +} + +func (m *multilineYAMLListParser) Matches(line string) bool { + return m.rx.MatchString(line) +} + +func (m *multilineYAMLListParser) Parse(lines []string) error { + // Strip comment noise but preserve `-` (the YAML list marker). + // Matches rxUncommentHeaders minus the dash. + cleaned := cleanupScannerLines(lines, rxUncommentNoDash) + + // Drop drop-empty to avoid blank spacer lines between items + // confusing the YAML parser. + cleaned = removeEmptyLines(cleaned) + if len(cleaned) == 0 { + return nil + } + + body := strings.Join(cleaned, "\n") + parsed, err := yaml.Parse(body) + if err != nil { + log.Printf("WARNING: parse.invalid-block-body: %v", err) + return nil + } + list, ok := parsed.([]any) + if !ok { + log.Printf("WARNING: parse.invalid-block-body: expected YAML list, got %T", parsed) + return nil + } + + out := make([]string, 0, len(list)) + for _, item := range list { + out = append(out, fmt.Sprintf("%v", item)) + } + m.set(out) + return nil +} diff --git a/internal/parsers/parsers_test.go b/internal/parsers/parsers_test.go index 02841d4..7c3baf8 100644 --- a/internal/parsers/parsers_test.go +++ b/internal/parsers/parsers_test.go @@ -82,7 +82,8 @@ func TestConsumesDropEmptyParser(t *testing.T) { assert.TrueT(t, cp.Matches("Consumes:")) assert.FalseT(t, cp.Matches("other")) - require.NoError(t, cp.Parse([]string{"application/json", "", "application/xml", " "})) + // Q4: body is YAML-list-strict. Input uses `- value` markers. + require.NoError(t, cp.Parse([]string{"- application/json", "", "- application/xml", " "})) assert.Equal(t, []string{"application/json", "application/xml"}, got) } @@ -94,6 +95,19 @@ func TestProducesDropEmptyParser(t *testing.T) { assert.TrueT(t, pp.Matches("produces:")) assert.TrueT(t, pp.Matches("Produces:")) - require.NoError(t, pp.Parse([]string{"text/plain", "", "text/html"})) + require.NoError(t, pp.Parse([]string{"- text/plain", "", "- text/html"})) assert.Equal(t, []string{"text/plain", "text/html"}, got) } + +func TestMultilineYAMLListParserNonListDropsValues(t *testing.T) { + // Q4 strict-list contract: a scalar body emits a warning and + // produces no values (setter called with nothing? no — setter + // is NOT called on the non-list path, so `got` stays at its + // zero value). + t.Parallel() + + var called bool + cp := NewConsumesDropEmptyParser(func(v []string) { called = true; _ = v }) + require.NoError(t, cp.Parse([]string{"application/json"})) // bare form, not a list + assert.FalseT(t, called) +} diff --git a/internal/parsers/regexprs.go b/internal/parsers/regexprs.go index 1efc912..62a0ac8 100644 --- a/internal/parsers/regexprs.go +++ b/internal/parsers/regexprs.go @@ -90,8 +90,14 @@ var ( rxOpID + "\\p{Zs}*$") rxBeginYAMLSpec = regexp.MustCompile(rxCommentPrefix + `---\p{Zs}*$`) rxUncommentHeaders = regexp.MustCompile(`^[\p{Zs}\t/\*-]*\|?`) - rxUncommentYAML = regexp.MustCompile(`^[\p{Zs}\t]*/*`) - rxOperation = regexp.MustCompile( + // rxUncommentNoDash mirrors rxUncommentHeaders but does NOT strip + // leading `-`. Used for multi-line list-bodied keywords + // (`consumes:` / `produces:`) where `-` is a YAML list marker + // the body's sub-parser needs to see. See Q4 fix + // (.claude/plans/workshops/w2-enum.md §2.6). + rxUncommentNoDash = regexp.MustCompile(`^[\p{Zs}\t/\*]*\|?`) + rxUncommentYAML = regexp.MustCompile(`^[\p{Zs}\t]*/*`) + rxOperation = regexp.MustCompile( rxCommentPrefix + "swagger:operation\\p{Zs}*" + rxMethod + From 25fb35107c96738da4c6b2a549c42cccf3d233f2 Mon Sep 17 00:00:00 2001 From: Frederic BIDON Date: Wed, 22 Apr 2026 10:04:37 +0200 Subject: [PATCH 34/46] =?UTF-8?q?feat(builders):=20P5.1a=20step=206.items?= =?UTF-8?q?=20=E2=80=94=20items=20bridge-tagger?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the v2 bridge-tagger for the items builder: ApplyBlock consumes a grammar.Block, filters Properties by ItemsDepth (1-indexed, matching v1 rxItemsPrefixFmt semantics), and dispatches each Keyword.Name to the existing ifaces.ValidationBuilder target. Pure interface swap — no behavior change. The ValidationBuilder methods invoked here are the same targets v1's regex taggers already wrote through, so the schema-side caller (P5.1b) can route to either path under Options.UseGrammarParser with equivalent output. Enum / default / example still delegate to target.SetEnum / SetDefault / SetExample for parity with v1's ParseEnum and raw-value storage; the direct internal/parsers/enum.Parse path lights up in a post-migration cleanup commit. collectionFormat is routed only when the target satisfies OperationValidationBuilder; items-only targets silently drop it, mirroring v1's tagger table structure. Tests: 8 unit tests via mocks.MockValidationBuilder covering max/min with operators (Op='<'/'>' → excl=true, '>='/'<=' → false), integer keywords (minLength/maxLength/minItems/maxItems), boolean unique, pattern, enum raw-value delegation, default/example, ItemsDepth filtering (level 1 vs 2 vs 3 no-op), and type-mismatch silent skip (mirrors v1's early-return on regex-match failure). No flag routing yet — P5.1b (schema) will wire Options.UseGrammarParser and exercise this bridge through TestParity end-to-end. See: - .claude/plans/p5.1a-items-walkthrough.md (design trace) - .claude/plans/p5-builder-migrations.md §4.2 (items scope) Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Frederic BIDON --- internal/builders/items/bridge.go | 111 +++++++++++++++ internal/builders/items/bridge_test.go | 189 +++++++++++++++++++++++++ 2 files changed, 300 insertions(+) create mode 100644 internal/builders/items/bridge.go create mode 100644 internal/builders/items/bridge_test.go diff --git a/internal/builders/items/bridge.go b/internal/builders/items/bridge.go new file mode 100644 index 0000000..792cf32 --- /dev/null +++ b/internal/builders/items/bridge.go @@ -0,0 +1,111 @@ +// SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers +// SPDX-License-Identifier: Apache-2.0 + +package items + +import ( + "github.com/go-openapi/codescan/internal/ifaces" + "github.com/go-openapi/codescan/internal/parsers/grammar" +) + +// ApplyBlock writes every items-level validation Property from b to +// target, filtered to the given nesting depth. This is the v2 +// bridge-tagger replacement for the regex-based itemsTaggers() / +// SectionedParser combo — a pure interface swap with no behavior +// change: the ValidationBuilder methods it calls are the same +// targets the v1 taggers wrote through. +// +// `level` is 1-indexed to match v1's rxItemsPrefixFmt semantics: +// level 1 consumes properties whose grammar-parser ItemsDepth is 1 +// (e.g., `items.maximum: 5`); level 2 consumes depth-2 properties +// (`items.items.maximum: 5`); and so on. Properties at other depths +// are ignored by this call — the schema-side caller recurses with +// `level+1` for each nested array layer. +// +// Enum / default / example are delegated to target.SetEnum / +// SetDefault / SetExample which route through v1's ParseEnum or +// raw-value storage; this preserves parity end-to-end. The +// eventual swap to internal/parsers/enum.Parse happens in a +// post-migration cleanup commit where v2-only semantics take +// over. +// +// See: +// - .claude/plans/p5.1a-items-walkthrough.md (design trace) +// - .claude/plans/p5-builder-migrations.md §4.2 (items scope) +// - legacy-stop-points.md (bridge-tagger obligations around +// implied stops; items has no block-head keywords, so S6 is +// not applicable here). +func ApplyBlock(b grammar.Block, target ifaces.ValidationBuilder, level int) { + for p := range b.Properties() { + if p.ItemsDepth != level { + continue + } + dispatchItemsKeyword(p, target) + } +} + +// dispatchItemsKeyword routes one Property to the matching +// ValidationBuilder method. Non-convertible Typed values (where +// the parser's primitive-typing failed and emitted a diagnostic +// upstream) are silently skipped — mirrors v1's tagger behavior +// of early-return on regex match failure. +func dispatchItemsKeyword(p grammar.Property, t ifaces.ValidationBuilder) { + switch p.Keyword.Name { + case "maximum": + if p.Typed.Type == grammar.ValueNumber { + t.SetMaximum(p.Typed.Number, p.Typed.Op == "<") + } + case "minimum": + if p.Typed.Type == grammar.ValueNumber { + t.SetMinimum(p.Typed.Number, p.Typed.Op == ">") + } + case "multipleOf": + if p.Typed.Type == grammar.ValueNumber { + t.SetMultipleOf(p.Typed.Number) + } + case "minLength": + if p.Typed.Type == grammar.ValueInteger { + t.SetMinLength(p.Typed.Integer) + } + case "maxLength": + if p.Typed.Type == grammar.ValueInteger { + t.SetMaxLength(p.Typed.Integer) + } + case "pattern": + t.SetPattern(p.Value) + case "minItems": + if p.Typed.Type == grammar.ValueInteger { + t.SetMinItems(p.Typed.Integer) + } + case "maxItems": + if p.Typed.Type == grammar.ValueInteger { + t.SetMaxItems(p.Typed.Integer) + } + case "unique": + if p.Typed.Type == grammar.ValueBoolean { + t.SetUnique(p.Typed.Boolean) + } + case "collectionFormat": + // Only OperationValidationBuilder knows SetCollectionFormat; + // items.Validations does not (per survey). Type-assertion + // guard silently drops the value for items-only targets, + // matching v1's tagger table structure. + if ov, ok := t.(ifaces.OperationValidationBuilder); ok { + if p.Typed.Type == grammar.ValueStringEnum { + ov.SetCollectionFormat(p.Typed.String) + } + } + case "enum": + // Delegated to the existing target.SetEnum, which routes + // through parsers.ParseEnum (post-Q1 fix: comma-list + // trimmed, JSON array verbatim). Direct use of + // internal/parsers/enum.Parse is deferred to the + // post-migration cleanup commit that takes the fully-typed + // values path; for now we preserve v1 parity. + t.SetEnum(p.Value) + case "default": + t.SetDefault(p.Value) + case "example": + t.SetExample(p.Value) + } +} diff --git a/internal/builders/items/bridge_test.go b/internal/builders/items/bridge_test.go new file mode 100644 index 0000000..c282592 --- /dev/null +++ b/internal/builders/items/bridge_test.go @@ -0,0 +1,189 @@ +// SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers +// SPDX-License-Identifier: Apache-2.0 + +package items_test + +import ( + "go/token" + "testing" + + "github.com/go-openapi/codescan/internal/builders/items" + "github.com/go-openapi/codescan/internal/ifaces" + "github.com/go-openapi/codescan/internal/parsers/grammar" + "github.com/go-openapi/codescan/internal/scantest/mocks" +) + +// Each test synthesises a grammar.Block via ParseText on a raw +// comment body (no Go comment markers), constructs a recording +// MockValidationBuilder, runs ApplyBlock, and inspects the +// recorded calls. + +// parseBodyToBlock returns the package's polymorphic grammar.Block API; the +// ireturn lint is suppressed for the same reason as grammar.Parser.ParseAs. +// +//nolint:ireturn +func parseBodyToBlock(t *testing.T, body string) grammar.Block { + t.Helper() + p := grammar.NewParser(token.NewFileSet()) + return p.ParseAs(grammar.AnnModel, body, token.Position{Line: 1}) +} + +func TestApplyBlockMaximumMinimum(t *testing.T) { + body := "items.maximum: <10\nitems.minimum: >=0" + b := parseBodyToBlock(t, body) + + var maxCall struct { + val float64 + excl bool + } + var minCall struct { + val float64 + excl bool + } + mock := &mocks.MockValidationBuilder{ + SetMaximumFunc: func(v float64, excl bool) { maxCall.val, maxCall.excl = v, excl }, + SetMinimumFunc: func(v float64, excl bool) { minCall.val, minCall.excl = v, excl }, + } + + items.ApplyBlock(b, mock, 1) + + if maxCall.val != 10 || !maxCall.excl { + t.Errorf("SetMaximum: got (%v, %v), want (10, true)", maxCall.val, maxCall.excl) + } + if minCall.val != 0 || minCall.excl { + t.Errorf("SetMinimum: got (%v, %v), want (0, false) — `>=` is inclusive, Op=\">=\" should map excl=false", + minCall.val, minCall.excl) + } +} + +func TestApplyBlockIntegerKeywords(t *testing.T) { + body := "items.minLength: 3\nitems.maxLength: 10\nitems.minItems: 1\nitems.maxItems: 100" + b := parseBodyToBlock(t, body) + + var calls struct { + minLen, maxLen, minItems, maxItems int64 + } + mock := &mocks.MockValidationBuilder{ + SetMinLengthFunc: func(v int64) { calls.minLen = v }, + SetMaxLengthFunc: func(v int64) { calls.maxLen = v }, + SetMinItemsFunc: func(v int64) { calls.minItems = v }, + SetMaxItemsFunc: func(v int64) { calls.maxItems = v }, + } + + items.ApplyBlock(b, mock, 1) + + if calls.minLen != 3 || calls.maxLen != 10 || calls.minItems != 1 || calls.maxItems != 100 { + t.Errorf("integer calls: got %+v", calls) + } +} + +func TestApplyBlockBooleanUnique(t *testing.T) { + b := parseBodyToBlock(t, "items.unique: true") + + var got bool + mock := &mocks.MockValidationBuilder{ + SetUniqueFunc: func(v bool) { got = v }, + } + + items.ApplyBlock(b, mock, 1) + if !got { + t.Error("SetUnique should have been called with true") + } +} + +func TestApplyBlockPattern(t *testing.T) { + b := parseBodyToBlock(t, "items.pattern: ^[a-z]+$") + + var got string + mock := &mocks.MockValidationBuilder{ + SetPatternFunc: func(v string) { got = v }, + } + + items.ApplyBlock(b, mock, 1) + if got != "^[a-z]+$" { + t.Errorf("SetPattern: got %q want %q", got, "^[a-z]+$") + } +} + +func TestApplyBlockEnum(t *testing.T) { + b := parseBodyToBlock(t, "items.enum: red, green, blue") + + var raw string + mock := &mocks.MockValidationBuilder{ + SetEnumFunc: func(v string) { raw = v }, + } + + items.ApplyBlock(b, mock, 1) + // Bridge passes the raw Value; v1's ParseEnum handles splitting + // and the Q1 whitespace-trim fix applies downstream. + if raw != "red, green, blue" { + t.Errorf("SetEnum: got %q", raw) + } +} + +func TestApplyBlockDefaultExample(t *testing.T) { + b := parseBodyToBlock(t, "items.default: hello\nitems.example: world") + + var def, ex any + mock := &mocks.MockValidationBuilder{ + SetDefaultFunc: func(v any) { def = v }, + SetExampleFunc: func(v any) { ex = v }, + } + + items.ApplyBlock(b, mock, 1) + if def != "hello" || ex != "world" { + t.Errorf("default/example: got %v / %v", def, ex) + } +} + +func TestApplyBlockFiltersByItemsDepth(t *testing.T) { + // Properties at level 2 (items.items.X) should NOT fire when + // ApplyBlock is called with level 1. This is how the schema-side + // caller recurses into nested arrays — one ApplyBlock call per + // depth level. + body := "items.maximum: 5\nitems.items.maximum: 10" + b := parseBodyToBlock(t, body) + + var calls []float64 + mock := &mocks.MockValidationBuilder{ + SetMaximumFunc: func(v float64, _ bool) { calls = append(calls, v) }, + } + + items.ApplyBlock(b, mock, 1) + if len(calls) != 1 || calls[0] != 5 { + t.Errorf("level 1 pass: want [5], got %v", calls) + } + + calls = nil + items.ApplyBlock(b, mock, 2) + if len(calls) != 1 || calls[0] != 10 { + t.Errorf("level 2 pass: want [10], got %v", calls) + } + + calls = nil + items.ApplyBlock(b, mock, 3) + if len(calls) != 0 { + t.Errorf("level 3 pass: want no calls, got %v", calls) + } +} + +func TestApplyBlockSkipsTypeMismatchedValues(t *testing.T) { + // Notanumber can't parse as Number; the parser emits a + // diagnostic upstream and leaves Typed.Type == ValueNone. + // The bridge-tagger silently skips such properties — mirrors + // v1's early-return-on-regex-fail behavior. + b := parseBodyToBlock(t, "items.maximum: notanumber") + + var called bool + mock := &mocks.MockValidationBuilder{ + SetMaximumFunc: func(v float64, excl bool) { called = true; _, _ = v, excl }, + } + + items.ApplyBlock(b, mock, 1) + if called { + t.Error("SetMaximum must not be called when Typed.Type is not ValueNumber") + } +} + +// Interface satisfaction compile-time check. +var _ ifaces.ValidationBuilder = (*mocks.MockValidationBuilder)(nil) From 6f621865a97ac03c52b287cca3b8f7d887438e8d Mon Sep 17 00:00:00 2001 From: Frederic BIDON Date: Wed, 22 Apr 2026 10:26:44 +0200 Subject: [PATCH 35/46] =?UTF-8?q?feat(builders):=20P5.1b=20step=206.schema?= =?UTF-8?q?=20=E2=80=94=20flag=20routing=20to=20items=20bridge?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wire Options.UseGrammarParser into the schema builder. When the flag is set, after the legacy SectionedParser runs on a field's doc comment, the grammar parser re-parses the same comment group and dispatches every items-level validation Property through items.ApplyBlock into the corresponding schema target. Shadow-path semantics for this step: the legacy regex itemsTaggers stay registered alongside the grammar path, because they also claim `items.*:` prefixed lines away from the description section (dropping them leaks raw annotation text like `items pattern: \.(jpe?g|png)$` into the rendered description — seen on the petstore fixture during development). Both paths write to the same ValidationBuilder target with the same values, so the second write is idempotent. The grammar bridge exercises the end-to-end wiring without diverging from v1; subsequent migration steps (schema-level validations, description claims, YAML-body parsing) will progressively retire the legacy regex path. New: - internal/scanner/scan_context.go — ScanCtx.UseGrammarParser() accessor mirroring the existing option getters. - internal/builders/schema/bridge.go — collectItemsLevels walk (grammar-side counterpart of parseArrayTypes) and Builder.applyItemsBridge orchestrating the post-parse dispatch. - internal/builders/schema/bridge_test.go — unit tests for the AST walk across flat/nested/pointer/named/struct elements and early-return guards on applyItemsBridge. Schema.go call-site wiring: - processAnonInterfaceMethod, processInterfaceMethod, processStructField each call applyItemsBridge after sp.Parse so the shadow dispatch fires on every field-level comment that the legacy path already handled. TestParity (integration/parity_test.go) stays green across all 21 fixtures under flag on/off. See: - .claude/plans/p5.1a-items-walkthrough.md §3–4 (routing + parity) - .claude/plans/p5-builder-migrations.md §4.2 (items scope) Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Frederic BIDON --- internal/builders/schema/bridge.go | 104 ++++++++++++++++++ internal/builders/schema/bridge_test.go | 138 ++++++++++++++++++++++++ internal/builders/schema/schema.go | 4 +- internal/scanner/scan_context.go | 7 ++ 4 files changed, 252 insertions(+), 1 deletion(-) create mode 100644 internal/builders/schema/bridge.go create mode 100644 internal/builders/schema/bridge_test.go diff --git a/internal/builders/schema/bridge.go b/internal/builders/schema/bridge.go new file mode 100644 index 0000000..e749a43 --- /dev/null +++ b/internal/builders/schema/bridge.go @@ -0,0 +1,104 @@ +// SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers +// SPDX-License-Identifier: Apache-2.0 + +package schema + +import ( + "go/ast" + + "github.com/go-openapi/codescan/internal/builders/items" + "github.com/go-openapi/codescan/internal/parsers/grammar" + oaispec "github.com/go-openapi/spec" +) + +// itemsLevelTarget pairs a nesting depth (1-indexed, matching +// grammar.Property.ItemsDepth) with the schema to write items-level +// validations into. +type itemsLevelTarget struct { + level int + schema *oaispec.Schema +} + +// collectItemsLevels mirrors the walk performed by parseArrayTypes but +// collects (level, schema) targets instead of building regex-based +// TagParsers. It is the grammar-path counterpart of the legacy +// itemsTaggers recursion. +// +// The starting level is 1 — `items.maximum:` has ItemsDepth=1 under the +// grammar lexer. The legacy v1 level=0 convention is re-indexed here so +// the caller can pass the value directly to items.ApplyBlock. +func collectItemsLevels(expr ast.Expr, schemaItems *oaispec.SchemaOrArray, level int) []itemsLevelTarget { + if schemaItems == nil || schemaItems.Schema == nil { + return nil + } + + here := itemsLevelTarget{level: level, schema: schemaItems.Schema} + + switch e := expr.(type) { + case *ast.ArrayType: + rest := collectItemsLevels(e.Elt, schemaItems.Schema.Items, level+1) + out := make([]itemsLevelTarget, 0, 1+len(rest)) + return append(append(out, here), rest...) + + case *ast.Ident: + rest := collectItemsLevels(expr, schemaItems.Schema.Items, level+1) + if e.Obj == nil { + out := make([]itemsLevelTarget, 0, 1+len(rest)) + return append(append(out, here), rest...) + } + return rest + + case *ast.StarExpr: + return collectItemsLevels(e.X, schemaItems, level) + + case *ast.SelectorExpr: + return []itemsLevelTarget{here} + + case *ast.StructType, *ast.InterfaceType, *ast.MapType: + return nil + + default: + return nil + } +} + +// applyItemsBridge parses fld.Doc through the grammar parser and +// dispatches every items-level validation property into the matching +// schema level via items.ApplyBlock. Called only when the scan's +// UseGrammarParser flag is set. +// +// Shadow semantics for P5.1b: the legacy regex itemsTaggers still +// run (they also claim items-prefix lines away from the description, +// so dropping them would cause `items.pattern:` bodies to leak into +// prose). Both paths write to the same ValidationBuilder target with +// the same values, so the second write is a no-op; this exercises the +// grammar path end-to-end without diverging from v1. Subsequent +// migration steps will drop the legacy path as schema-level keywords +// and description-claim behavior move to the grammar side. +// +// No-op when fld is nil, the field type is not an array literal, or +// the schema has no items sub-tree — matching the legacy guard in +// createParser that only invokes parseArrayTypes for *ast.ArrayType +// fields. +func (s *Builder) applyItemsBridge(fld *ast.Field, ps *oaispec.Schema) { + if fld == nil || fld.Doc == nil || ps == nil { + return + } + arrayType, ok := fld.Type.(*ast.ArrayType) + if !ok { + return + } + if !s.ctx.UseGrammarParser() { + return + } + + targets := collectItemsLevels(arrayType.Elt, ps.Items, 1) + if len(targets) == 0 { + return + } + + block := grammar.NewParser(s.decl.Pkg.Fset).Parse(fld.Doc) + for _, tgt := range targets { + items.ApplyBlock(block, schemaValidations{tgt.schema}, tgt.level) + } +} diff --git a/internal/builders/schema/bridge_test.go b/internal/builders/schema/bridge_test.go new file mode 100644 index 0000000..c9b9366 --- /dev/null +++ b/internal/builders/schema/bridge_test.go @@ -0,0 +1,138 @@ +// SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers +// SPDX-License-Identifier: Apache-2.0 + +package schema + +import ( + "go/ast" + "go/parser" + "testing" + + oaispec "github.com/go-openapi/spec" +) + +// fieldType parses a Go type expression like `[]string` or `[][]int` +// via go/parser and returns the parsed ast.Expr. Helper for the +// collectItemsLevels walk tests below. +func fieldType(t *testing.T, expr string) ast.Expr { + t.Helper() + e, err := parser.ParseExpr(expr) + if err != nil { + t.Fatalf("parseExpr %q: %v", expr, err) + } + return e +} + +// arrayTypeElt returns the Elt of an ArrayType — the same entry point +// createParser uses before invoking parseArrayTypes / collectItemsLevels. +func arrayTypeElt(t *testing.T, expr string) ast.Expr { + t.Helper() + at, ok := fieldType(t, expr).(*ast.ArrayType) + if !ok { + t.Fatalf("expected ArrayType for %q", expr) + } + return at.Elt +} + +// newItemsChain returns a SchemaOrArray chain deep enough to hold +// `depth` items levels, mirroring the shape buildFromType produces +// when it walks a slice-of-slice-of-... type via Typable.Items(). +func newItemsChain(depth int) *oaispec.SchemaOrArray { + if depth <= 0 { + return nil + } + root := &oaispec.SchemaOrArray{Schema: &oaispec.Schema{}} + cur := root + for range depth - 1 { + cur.Schema.Items = &oaispec.SchemaOrArray{Schema: &oaispec.Schema{}} + cur = cur.Schema.Items + } + return root +} + +func TestCollectItemsLevelsFlatSlice(t *testing.T) { + // []string — one level of items validations (grammar level 1). + items := newItemsChain(1) + got := collectItemsLevels(arrayTypeElt(t, "[]string"), items, 1) + if len(got) != 1 || got[0].level != 1 || got[0].schema != items.Schema { + t.Errorf("[]string: want [{1, items.Schema}], got %+v", got) + } +} + +func TestCollectItemsLevelsNestedSlice(t *testing.T) { + // [][]string — two levels: grammar 1 (outer element slice) and + // grammar 2 (inner scalar string registered via the *ast.Ident + // Obj==nil branch). + items := newItemsChain(2) + got := collectItemsLevels(arrayTypeElt(t, "[][]string"), items, 1) + if len(got) != 2 { + t.Fatalf("[][]string: want 2 levels, got %d (%+v)", len(got), got) + } + if got[0].level != 1 || got[0].schema != items.Schema { + t.Errorf("[][]string level 1: got %+v", got[0]) + } + if got[1].level != 2 || got[1].schema != items.Schema.Items.Schema { + t.Errorf("[][]string level 2: got %+v", got[1]) + } +} + +func TestCollectItemsLevelsPointerElt(t *testing.T) { + // []*string — StarExpr unwraps without advancing level. + items := newItemsChain(1) + got := collectItemsLevels(arrayTypeElt(t, "[]*string"), items, 1) + if len(got) != 1 || got[0].level != 1 { + t.Errorf("[]*string: want one level-1 entry, got %+v", got) + } +} + +func TestCollectItemsLevelsNamedElt(t *testing.T) { + // []Foo — Ident with Obj set (resolved by go/parser to a local + // scope binding). parser.ParseExpr does NOT resolve scope, so Obj + // is nil here; we simulate the "Obj != nil" case by synthesising. + items := newItemsChain(1) + ident := &ast.Ident{Name: "Foo", Obj: ast.NewObj(ast.Typ, "Foo")} + + got := collectItemsLevels(ident, items, 1) + // Obj != nil → skip registration at this level; recursion advances + // to items.Schema.Items which is nil → terminates → empty result. + if len(got) != 0 { + t.Errorf("named ident: want no levels, got %+v", got) + } +} + +func TestCollectItemsLevelsStructElt(t *testing.T) { + // []struct{X int} — StructType terminates without registering. + items := newItemsChain(1) + got := collectItemsLevels(arrayTypeElt(t, "[]struct{X int}"), items, 1) + if len(got) != 0 { + t.Errorf("[]struct{...}: want no levels, got %+v", got) + } +} + +func TestCollectItemsLevelsNilItems(t *testing.T) { + // nil items → no panic, empty result. + got := collectItemsLevels(arrayTypeElt(t, "[]string"), nil, 1) + if len(got) != 0 { + t.Errorf("nil items: want empty, got %+v", got) + } + var empty oaispec.SchemaOrArray + got = collectItemsLevels(arrayTypeElt(t, "[]string"), &empty, 1) + if len(got) != 0 { + t.Errorf("items with nil Schema: want empty, got %+v", got) + } +} + +// TestApplyItemsBridgeGuards verifies the applyItemsBridge guards +// short-circuit under expected conditions. It builds a minimal Builder +// with a nil ctx-dependent path and exercises the early returns. +func TestApplyItemsBridgeGuards(_ *testing.T) { + var b Builder + + b.applyItemsBridge(nil, &oaispec.Schema{}) // fld=nil + fld := &ast.Field{Type: &ast.ArrayType{Elt: &ast.Ident{Name: "string"}}} + b.applyItemsBridge(fld, &oaispec.Schema{}) // fld.Doc=nil + fld.Doc = &ast.CommentGroup{List: []*ast.Comment{{Text: "// items.maximum: 5"}}} + b.applyItemsBridge(fld, nil) // ps=nil + fld.Type = &ast.Ident{Name: "string"} + b.applyItemsBridge(fld, &oaispec.Schema{}) // non-array type +} diff --git a/internal/builders/schema/schema.go b/internal/builders/schema/schema.go index 23c72ab..53d4a5c 100644 --- a/internal/builders/schema/schema.go +++ b/internal/builders/schema/schema.go @@ -731,6 +731,7 @@ func (s *Builder) processAnonInterfaceMethod(fld *types.Func, it *types.Interfac if err := sp.Parse(afld.Doc); err != nil { return err } + s.applyItemsBridge(afld, &ps) if ps.Ref.String() == "" && name != fld.Name() { ps.AddExtension("x-go-name", fld.Name()) @@ -959,6 +960,7 @@ func (s *Builder) processInterfaceMethod(fld *types.Func, it *types.Interface, d if err := sp.Parse(afld.Doc); err != nil { return err } + s.applyItemsBridge(afld, &ps) if ps.Ref.String() == "" && name != fld.Name() { ps.AddExtension("x-go-name", fld.Name()) @@ -1215,6 +1217,7 @@ func (s *Builder) processStructField(fld *types.Var, decl *scanner.EntityDecl, t if err := sp.Parse(afld.Doc); err != nil { return err } + s.applyItemsBridge(afld, &ps) if ps.Ref.String() == "" && name != fld.Name() { resolvers.AddExtension(&ps.VendorExtensible, "x-go-name", fld.Name(), s.ctx.SkipExtensions()) @@ -1399,7 +1402,6 @@ func (s *Builder) createParser(nm string, schema, ps *oaispec.Schema, fld *ast.F // the parser may be called outside the context of struct field. // In that case, just return the outcome of the parsing now. - if fld != nil { // check if this is a primitive, if so parse the validations from the // doc comments of the slice declaration. diff --git a/internal/scanner/scan_context.go b/internal/scanner/scan_context.go index 24eec84..df55044 100644 --- a/internal/scanner/scan_context.go +++ b/internal/scanner/scan_context.go @@ -98,6 +98,13 @@ func (s *ScanCtx) RefAliases() bool { return s.opts.RefAliases } +// UseGrammarParser reports whether the scan is configured to route +// comment-group parsing through the v2 grammar parser. See +// Options.UseGrammarParser for the migration seam semantics. +func (s *ScanCtx) UseGrammarParser() bool { + return s.opts.UseGrammarParser +} + func (s *ScanCtx) Debug() bool { return s.debug } From d165f5094ac0a6f9399041cbbe49ae072f4fa216 Mon Sep 17 00:00:00 2001 From: Frederic BIDON Date: Wed, 22 Apr 2026 15:24:58 +0200 Subject: [PATCH 36/46] =?UTF-8?q?fix(parsers):=20Q5=20finish=2009f6748=20?= =?UTF-8?q?=E2=80=94=20annotation=20terminator=20must=20be=20line-start?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 09f6748 ("fix(parsers): ignore annotations buried in prose.") stated that "All annotations should start their comment line." and tightened every per-annotation validator regex (rxStrFmt, rxIgnoreOverride, rxModelOverride, ...) to require rxCommentPrefix — fixing cases like `// ... swagger:strfmt so ...` incorrectly capturing "so" as the strfmt name. That commit missed the block-level terminator in SectionedParser.parseLine, which still used the loose rxSwaggerAnnotation (matches `swagger:` preceded by any whitespace or `/` anywhere on the line). Consequence: a docstring like // IgnoredModel is annotated as a model but also carries swagger:ignore, // so the sectionedParser flags it as ignored. was truncated at line 1 because "swagger:ignore," tripped the block-terminator regex — even though no validator and no actual annotation directive was involved. The prose leaked into neither title nor description; the entire docstring disappeared. This commit introduces rxSwaggerAnnotationStrict (rxCommentPrefix + swagger:) and rewires SectionedParser.parseLine to use it. The loose rxSwaggerAnnotation is kept for scanner classification (ExtractAnnotation / HasAnnotation) where the `swagger:route` exception legitimately permits a godoc-style identifier before the annotation (e.g., `// MyHandler swagger:route GET /path`). The SectionedParser does not build routes directly, so the strict terminator needs no such exception. Affected goldens regenerated to reflect the now-preserved prose: alias-expand, alias-ref, allof-edges, enum-overrides, interface-methods, interface-methods-xnullable, swagger-type-array, top-level-kinds. All fixtures' `swagger:*` directives still parse identically; only passing-mentions in description now survive to the spec output. Makes v1 (legacy) and v2 (grammar) paths converge on the intended "annotations start their comment line" rule — unblocking P5.1b's schema bridge migration by removing the parity-blocking divergence seen in the shadow-path work. Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Frederic BIDON --- .../golden/enhancements_alias_expand.json | 1 + .../golden/enhancements_alias_ref.json | 1 + .../golden/enhancements_allof_edges.json | 1 + .../golden/enhancements_enum_overrides.json | 3 +- .../enhancements_interface_methods.json | 1 + ...ancements_interface_methods_xnullable.json | 1 + .../enhancements_swagger_type_array.json | 3 + .../golden/enhancements_top_level_kinds.json | 2 + internal/parsers/regexprs.go | 55 ++++++++++++++----- internal/parsers/sectioned_parser.go | 9 ++- 10 files changed, 60 insertions(+), 17 deletions(-) diff --git a/fixtures/integration/golden/enhancements_alias_expand.json b/fixtures/integration/golden/enhancements_alias_expand.json index 2fe6aca..838b488 100644 --- a/fixtures/integration/golden/enhancements_alias_expand.json +++ b/fixtures/integration/golden/enhancements_alias_expand.json @@ -150,6 +150,7 @@ }, "exportedParams": { "type": "object", + "title": "exportedParams is the backing struct for an aliased swagger:parameters.", "required": [ "data" ], diff --git a/fixtures/integration/golden/enhancements_alias_ref.json b/fixtures/integration/golden/enhancements_alias_ref.json index e92e4ad..eaee663 100644 --- a/fixtures/integration/golden/enhancements_alias_ref.json +++ b/fixtures/integration/golden/enhancements_alias_ref.json @@ -66,6 +66,7 @@ }, "exportedParams": { "type": "object", + "title": "exportedParams is the backing struct for an aliased swagger:parameters.", "required": [ "data" ], diff --git a/fixtures/integration/golden/enhancements_allof_edges.json b/fixtures/integration/golden/enhancements_allof_edges.json index 8f512b0..5f763c6 100644 --- a/fixtures/integration/golden/enhancements_allof_edges.json +++ b/fixtures/integration/golden/enhancements_allof_edges.json @@ -59,6 +59,7 @@ "x-go-package": "github.com/go-openapi/codescan/fixtures/enhancements/allof-edges" }, "AllOfStrfmt": { + "title": "AllOfStrfmt composes an allOf member that carries a swagger:strfmt tag.", "allOf": [ { "type": "string", diff --git a/fixtures/integration/golden/enhancements_enum_overrides.json b/fixtures/integration/golden/enhancements_enum_overrides.json index 56be2d4..ddd4a68 100644 --- a/fixtures/integration/golden/enhancements_enum_overrides.json +++ b/fixtures/integration/golden/enhancements_enum_overrides.json @@ -29,7 +29,7 @@ "x-go-package": "github.com/go-openapi/codescan/fixtures/enhancements/enum-overrides" }, "NotificationB": { - "description": "NotificationB exercises case B: plain string field with inline", + "description": "NotificationB exercises case B: plain string field with inline\ncomma-list enum. No swagger:enum on the type, no consts in code.", "type": "object", "properties": { "priority": { @@ -89,6 +89,7 @@ "x-go-package": "github.com/go-openapi/codescan/fixtures/enhancements/enum-overrides" }, "PriorityD": { + "description": "PriorityD has a swagger:enum annotation but no corresponding\nconst declarations in this package. The builder's FindEnumValues\ncall returns an empty slice; the test captures how the spec\nrenders in that case.", "type": "string", "x-go-package": "github.com/go-openapi/codescan/fixtures/enhancements/enum-overrides" } diff --git a/fixtures/integration/golden/enhancements_interface_methods.json b/fixtures/integration/golden/enhancements_interface_methods.json index 6ef3996..fa45fb6 100644 --- a/fixtures/integration/golden/enhancements_interface_methods.json +++ b/fixtures/integration/golden/enhancements_interface_methods.json @@ -3,6 +3,7 @@ "paths": {}, "definitions": { "Audited": { + "description": "Audited is a small named interface that is embedded with swagger:allOf\ninto richer interfaces below.", "type": "object", "properties": { "createdAt": { diff --git a/fixtures/integration/golden/enhancements_interface_methods_xnullable.json b/fixtures/integration/golden/enhancements_interface_methods_xnullable.json index 3c677c4..6d6fe00 100644 --- a/fixtures/integration/golden/enhancements_interface_methods_xnullable.json +++ b/fixtures/integration/golden/enhancements_interface_methods_xnullable.json @@ -3,6 +3,7 @@ "paths": {}, "definitions": { "Audited": { + "description": "Audited is a small named interface that is embedded with swagger:allOf\ninto richer interfaces below.", "type": "object", "properties": { "createdAt": { diff --git a/fixtures/integration/golden/enhancements_swagger_type_array.json b/fixtures/integration/golden/enhancements_swagger_type_array.json index b9d971a..b9d75e2 100644 --- a/fixtures/integration/golden/enhancements_swagger_type_array.json +++ b/fixtures/integration/golden/enhancements_swagger_type_array.json @@ -3,6 +3,7 @@ "paths": {}, "definitions": { "objectStruct": { + "description": "ObjectStruct carries swagger:type object (unsupported by\nswaggerSchemaForType for structs). The fix inlines the struct as\ntype:object rather than producing an empty schema.", "type": "object", "x-go-name": "ObjectStruct", "x-go-package": "github.com/go-openapi/codescan/fixtures/enhancements/swagger-type-array" @@ -25,6 +26,7 @@ "x-go-name": "Labels" }, "nested": { + "description": "The nested struct with an unsupported swagger:type.", "type": "object", "properties": { "name": { @@ -47,6 +49,7 @@ "x-go-package": "github.com/go-openapi/codescan/fixtures/enhancements/swagger-type-array" }, "structWithBadType": { + "description": "StructWithBadType is a struct whose swagger:type is set to an\nunrecognised value. The fix ensures buildNamedStruct falls through to\nmakeRef so the property is still serialisable — the key assertion is\nthat the referenced schema is not empty.", "x-go-name": "StructWithBadType", "x-go-package": "github.com/go-openapi/codescan/fixtures/enhancements/swagger-type-array" } diff --git a/fixtures/integration/golden/enhancements_top_level_kinds.json b/fixtures/integration/golden/enhancements_top_level_kinds.json index 11d6792..c9dca4c 100644 --- a/fixtures/integration/golden/enhancements_top_level_kinds.json +++ b/fixtures/integration/golden/enhancements_top_level_kinds.json @@ -3,7 +3,9 @@ "paths": {}, "definitions": { "IgnoredModel": { + "description": "so the sectionedParser flags it as ignored and buildFromDecl returns\nearly via its `sp.ignored` branch.", "type": "object", + "title": "IgnoredModel is annotated as a model but also carries swagger:ignore,", "properties": { "value": { "type": "integer", diff --git a/internal/parsers/regexprs.go b/internal/parsers/regexprs.go index 62a0ac8..1e3fd36 100644 --- a/internal/parsers/regexprs.go +++ b/internal/parsers/regexprs.go @@ -65,20 +65,47 @@ const ( ) var ( - rxSwaggerAnnotation = regexp.MustCompile(`(?:^|[\s/])swagger:([\p{L}\p{N}\p{Pd}\p{Pc}]+)`) - rxFileUpload = regexp.MustCompile(rxCommentPrefix + `swagger:file`) - rxStrFmt = regexp.MustCompile(rxCommentPrefix + `swagger:strfmt\p{Zs}*(\p{L}[\p{L}\p{N}\p{Pd}\p{Pc}]+)(?:\.)?$`) - rxAlias = regexp.MustCompile(rxCommentPrefix + `swagger:alias`) - rxName = regexp.MustCompile(rxCommentPrefix + `swagger:name\p{Zs}*(\p{L}[\p{L}\p{N}\p{Pd}\p{Pc}\.]+)(?:\.)?$`) - rxAllOf = regexp.MustCompile(rxCommentPrefix + `swagger:allOf\p{Zs}*(\p{L}[\p{L}\p{N}\p{Pd}\p{Pc}\.]+)?(?:\.)?$`) - rxModelOverride = regexp.MustCompile(rxCommentPrefix + `swagger:model\p{Zs}*(\p{L}[\p{L}\p{N}\p{Pd}\p{Pc}]+)?(?:\.)?$`) - rxResponseOverride = regexp.MustCompile(rxCommentPrefix + `swagger:response\p{Zs}*(\p{L}[\p{L}\p{N}\p{Pd}\p{Pc}]+)?(?:\.)?$`) - rxParametersOverride = regexp.MustCompile(rxCommentPrefix + `swagger:parameters\p{Zs}*(\p{L}[\p{L}\p{N}\p{Pd}\p{Pc}\p{Zs}]+)(?:\.)?$`) - rxEnum = regexp.MustCompile(rxCommentPrefix + `swagger:enum\p{Zs}*(\p{L}[\p{L}\p{N}\p{Pd}\p{Pc}]+)(?:\.)?$`) - rxIgnoreOverride = regexp.MustCompile(rxCommentPrefix + `swagger:ignore\p{Zs}*(\p{L}[\p{L}\p{N}\p{Pd}\p{Pc}]+)?(?:\.)?$`) - rxDefault = regexp.MustCompile(rxCommentPrefix + `swagger:default\p{Zs}*(\p{L}[\p{L}\p{N}\p{Pd}\p{Pc}]+)(?:\.)?$`) - rxType = regexp.MustCompile(rxCommentPrefix + `swagger:type\p{Zs}*(\p{L}[\p{L}\p{N}\p{Pd}\p{Pc}]+)(?:\.)?$`) - rxRoute = regexp.MustCompile( + // rxSwaggerAnnotation matches `swagger:` anywhere on a comment + // line where it is preceded by whitespace, `/`, or the start of the + // line. Kept loose because it is the classification regex consumed + // by scanner.index.ExtractAnnotation (and parsers.HasAnnotation), + // where `swagger:route` is allowed to follow a godoc-style + // identifier (e.g. `// MyHandler swagger:route GET /path`) per + // rxRoutePrefix. + // + // Do NOT use this regex as a block terminator — it triggers on + // mid-prose mentions like `// carries swagger:ignore, so ...` and + // truncates descriptions. Use rxSwaggerAnnotationStrict for that. + rxSwaggerAnnotation = regexp.MustCompile(`(?:^|[\s/])swagger:([\p{L}\p{N}\p{Pd}\p{Pc}]+)`) + + // rxSwaggerAnnotationStrict matches a swagger: annotation + // only at the start of a comment line (with comment-prefix noise + // tolerated per rxCommentPrefix). Used by SectionedParser as the + // block-body terminator so prose that mentions `swagger:*` in + // passing does not cut description accumulation short. Finishes + // the work of 09f6748 ("All annotations should start their + // comment line.") which tightened the per-annotation validator + // regexes but left the block-level terminator using the loose + // pattern. + // + // For `swagger:route` with a godoc-style prefix, the per-annotation + // rxRoutePrefix handles the legitimate identifier-before-route + // form; the SectionedParser does not build routes directly, so + // this strict pattern never needs that exception. + rxSwaggerAnnotationStrict = regexp.MustCompile(rxCommentPrefix + `swagger:[\p{L}\p{N}\p{Pd}\p{Pc}]+`) + rxFileUpload = regexp.MustCompile(rxCommentPrefix + `swagger:file`) + rxStrFmt = regexp.MustCompile(rxCommentPrefix + `swagger:strfmt\p{Zs}*(\p{L}[\p{L}\p{N}\p{Pd}\p{Pc}]+)(?:\.)?$`) + rxAlias = regexp.MustCompile(rxCommentPrefix + `swagger:alias`) + rxName = regexp.MustCompile(rxCommentPrefix + `swagger:name\p{Zs}*(\p{L}[\p{L}\p{N}\p{Pd}\p{Pc}\.]+)(?:\.)?$`) + rxAllOf = regexp.MustCompile(rxCommentPrefix + `swagger:allOf\p{Zs}*(\p{L}[\p{L}\p{N}\p{Pd}\p{Pc}\.]+)?(?:\.)?$`) + rxModelOverride = regexp.MustCompile(rxCommentPrefix + `swagger:model\p{Zs}*(\p{L}[\p{L}\p{N}\p{Pd}\p{Pc}]+)?(?:\.)?$`) + rxResponseOverride = regexp.MustCompile(rxCommentPrefix + `swagger:response\p{Zs}*(\p{L}[\p{L}\p{N}\p{Pd}\p{Pc}]+)?(?:\.)?$`) + rxParametersOverride = regexp.MustCompile(rxCommentPrefix + `swagger:parameters\p{Zs}*(\p{L}[\p{L}\p{N}\p{Pd}\p{Pc}\p{Zs}]+)(?:\.)?$`) + rxEnum = regexp.MustCompile(rxCommentPrefix + `swagger:enum\p{Zs}*(\p{L}[\p{L}\p{N}\p{Pd}\p{Pc}]+)(?:\.)?$`) + rxIgnoreOverride = regexp.MustCompile(rxCommentPrefix + `swagger:ignore\p{Zs}*(\p{L}[\p{L}\p{N}\p{Pd}\p{Pc}]+)?(?:\.)?$`) + rxDefault = regexp.MustCompile(rxCommentPrefix + `swagger:default\p{Zs}*(\p{L}[\p{L}\p{N}\p{Pd}\p{Pc}]+)(?:\.)?$`) + rxType = regexp.MustCompile(rxCommentPrefix + `swagger:type\p{Zs}*(\p{L}[\p{L}\p{N}\p{Pd}\p{Pc}]+)(?:\.)?$`) + rxRoute = regexp.MustCompile( rxRoutePrefix + "swagger:route\\p{Zs}*" + rxMethod + diff --git a/internal/parsers/sectioned_parser.go b/internal/parsers/sectioned_parser.go index c87fb1d..63aecc0 100644 --- a/internal/parsers/sectioned_parser.go +++ b/internal/parsers/sectioned_parser.go @@ -208,8 +208,13 @@ COMMENTS: // caller should stop processing further comments (a swagger: annotation // that doesn't belong to this parser, or swagger:ignore). func (st *SectionedParser) parseLine(line string) (stop bool) { - // Step 1: check for swagger:* annotations. - if rxSwaggerAnnotation.MatchString(line) { + // Step 1: check for swagger:* annotations. Use the strict + // line-start pattern so prose mentioning `swagger:*` in passing + // (e.g. `// carries swagger:ignore, so ...`) does not terminate + // the block — see rxSwaggerAnnotationStrict godoc and commit + // 09f6748 (the finishing half of "All annotations should start + // their comment line."). + if rxSwaggerAnnotationStrict.MatchString(line) { if rxIgnoreOverride.MatchString(line) { st.ignored = true return true // an explicit ignore terminates this parser From 1701ae7cf84ccd55e672fd1c9b01e131d7df3396 Mon Sep 17 00:00:00 2001 From: Frederic BIDON Date: Wed, 22 Apr 2026 15:28:44 +0200 Subject: [PATCH 37/46] =?UTF-8?q?feat(builders):=20P5.1b=20step=206.schema?= =?UTF-8?q?=20=E2=80=94=20grammar=20owns=20schema-field=20parsing?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the shadow-path scaffolding with a real migration of the schema builder's comment parsing. When Options.UseGrammarParser is set, struct fields, interface methods, and top-level model declarations are parsed by the grammar parser directly; the result drives title, description, schema-level validations, items-level dispatch, and $ref-mode required writes. The legacy SectionedParser + schemaTaggers + itemsTaggers path stays available on the flag-off side for P6 parity gating. Scope redesign (see .claude/plans/p5.1b-schema-walkthrough.md): the original P5.1b commit (6f62186) treated the grammar bridge as a parity shadow because legacy regex taggers served a dual role — writing validation values AND claiming their own lines from the description accumulator. Dropping them leaked `items.pattern:` body text into prose. Rather than localize the parity break, this commit takes the full bullet: grammar becomes the single source of truth for description classification, and the legacy regex taggers retire (on the grammar path only — flag-off is unchanged). New in internal/builders/schema/bridge.go: - schemaBlockTargets — bundles ps, enclosing schema, and the property name so required:/discriminator: writes key on the right schema. - applySchemaBlock — level-0 Property dispatch covering every keyword that schemaTaggers used to register (maximum, minimum, multipleOf, minLength, maxLength, pattern, minItems, maxItems, unique, enum, default, example, required, readOnly, discriminator). Sub-dispatchers (dispatchNumericValidation / dispatchIntegerValidation / dispatchStringOrEnum / dispatchFlagValidation) keep cognitive complexity down. - Builder.applyBlockToField — invoked from the three field call sites (processAnonInterfaceMethod, processInterfaceMethod, processStructField). Parses afld.Doc once, writes description from block.ProseLines() via JoinDropLast (line-preserving "\n" join, matching v1's SectionedParser header semantics), then dispatches schema- and items-level properties. $ref-mode short- circuit mirrors refSchemaTaggers. - Builder.applyBlockToDecl — invoked from buildFromDecl. Uses CollectScannerTitleDescription on ProseLines to preserve v1's title-vs-description split heuristics (blank separator wins, else punctuation/markdown on first line, else all-description). Returns true when block.AnnotationKind() == AnnIgnore to short-circuit buildFromDecl the same way legacy sp.Ignored() did. Supporting grammar / parser extensions: - grammar.Block gains ProseLines() []string, exposing the raw prose lines parseTitleDesc sees before the first structured token. Enables line-preserving JoinDropLast reconstruction without bending the grammar's paragraph-joining semantics. - grammar.keywords_table.go adds `imum`-suffix aliases for maxLength/minLength/maxItems/minItems to match v1's regex tolerance (`maximum length: 3` / `minimum items: 1`). - parsers.ParseValueFromSchema becomes exported so the bridge can parse default: / example: values against the target schema. - parsers.CollectScannerTitleDescription becomes exported so the bridge can reuse the legacy split heuristic. Schema builder call-site wiring: - buildFromDecl: refactored to parseDeclDoc helper that branches on UseGrammarParser, returning (ignored, error). Keeps cognitive complexity below the gocognit threshold. - processAnonInterfaceMethod, processInterfaceMethod, processStructField: branch on UseGrammarParser to either applyBlockToField or sp.Parse. Tests: - internal/builders/schema/bridge_test.go — direct applySchemaBlock unit tests over synthesised grammar.Block values using a mock schemaValidations target: numeric with operators, integer keywords, pattern + enum, default/example scheme-aware parsing, required add/remove, discriminator set/clear, readOnly, top-level empty-name no-op, items-depth filtering. - ProseLines round-trip test confirms line breaks preserved. Quirk notes: - enum dispatch remains routed through v1's ParseEnum (via schemaValidations.SetEnum) for parity; swap to internal/parsers/enum.Parse is a post-migration cleanup commit (W2 §2.6). - default/example scheme intentionally omits ps.Format when building SimpleSchema — v1's MarshalJSON-derived scheme does the same, and ParseValueFromSchema's TypeName() would otherwise dispatch on format strings (e.g. "float32") that the switch doesn't recognize. Quirk preserved, documented. Unblocked by Q5 (commit d165f50) which fixed v1's block terminator to also require line-start — without it, fixtures whose prose mentions `swagger:*` directives diverged between v1 and v2. TestParity now green across all 22 fixtures. Full test suite green. See: - .claude/plans/p5.1b-schema-walkthrough.md (redesign rationale) - .claude/plans/p5.1a-items-walkthrough.md (items bridge, P5.1a) - .claude/plans/p5-builder-migrations.md (step 6 template) Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Frederic BIDON --- docs/annotation-keywords.md | 16 +- internal/builders/items/bridge_test.go | 7 +- internal/builders/schema/bridge.go | 335 +++++++++++++++++++-- internal/builders/schema/bridge_test.go | 226 +++++++++++--- internal/builders/schema/schema.go | 60 ++-- internal/parsers/enum.go | 20 +- internal/parsers/enum_test.go | 6 +- internal/parsers/grammar/ast.go | 11 + internal/parsers/grammar/keywords_table.go | 12 +- internal/parsers/grammar/parser.go | 9 +- internal/parsers/parsers_helpers.go | 14 + internal/parsers/validations.go | 4 +- 12 files changed, 615 insertions(+), 105 deletions(-) diff --git a/docs/annotation-keywords.md b/docs/annotation-keywords.md index db5304a..4cc3ffe 100644 --- a/docs/annotation-keywords.md +++ b/docs/annotation-keywords.md @@ -14,11 +14,11 @@ the v2 grammar parser. | `maximum` | `max` | `number` | param, header, schema, items | | `minimum` | `min` | `number` | param, header, schema, items | | `multipleOf` | `multiple of`, `multiple-of` | `number` | param, header, schema, items | -| `maxLength` | `max length`, `max-length`, `maxLen`, `max len`, `max-len` | `integer` | param, header, schema, items | -| `minLength` | `min length`, `min-length`, `minLen`, `min len`, `min-len` | `integer` | param, header, schema, items | +| `maxLength` | `max length`, `max-length`, `maxLen`, `max len`, `max-len`, `maximum length`, `maximum-length`, `maximumLength`, `maximum len`, `maximum-len` | `integer` | param, header, schema, items | +| `minLength` | `min length`, `min-length`, `minLen`, `min len`, `min-len`, `minimum length`, `minimum-length`, `minimumLength`, `minimum len`, `minimum-len` | `integer` | param, header, schema, items | | `pattern` | — | `string` | param, header, schema, items | -| `maxItems` | `max items`, `max-items`, `max.items` | `integer` | param, header, schema, items | -| `minItems` | `min items`, `min-items`, `min.items` | `integer` | param, header, schema, items | +| `maxItems` | `max items`, `max-items`, `max.items`, `maximum items`, `maximum-items`, `maximumItems` | `integer` | param, header, schema, items | +| `minItems` | `min items`, `min-items`, `min.items`, `minimum items`, `minimum-items`, `minimumItems` | `integer` | param, header, schema, items | | `unique` | — | `boolean` | param, header, schema, items | | `collectionFormat` | `collection format`, `collection-format` | `string-enum` | param, header, items | | `enum` | — | `comma-list` | param, header, schema, items | @@ -80,7 +80,7 @@ the v2 grammar parser. ### `maxLength` -- **Aliases:** `max length`, `max-length`, `maxLen`, `max len`, `max-len` +- **Aliases:** `max length`, `max-length`, `maxLen`, `max len`, `max-len`, `maximum length`, `maximum-length`, `maximumLength`, `maximum len`, `maximum-len` - **Value type:** `integer` - **Legal contexts:** - `param` — Maximum length of the string parameter. @@ -90,7 +90,7 @@ the v2 grammar parser. ### `minLength` -- **Aliases:** `min length`, `min-length`, `minLen`, `min len`, `min-len` +- **Aliases:** `min length`, `min-length`, `minLen`, `min len`, `min-len`, `minimum length`, `minimum-length`, `minimumLength`, `minimum len`, `minimum-len` - **Value type:** `integer` - **Legal contexts:** - `param` — Minimum length of the string parameter. @@ -109,7 +109,7 @@ the v2 grammar parser. ### `maxItems` -- **Aliases:** `max items`, `max-items`, `max.items` +- **Aliases:** `max items`, `max-items`, `max.items`, `maximum items`, `maximum-items`, `maximumItems` - **Value type:** `integer` - **Legal contexts:** - `param` — Maximum number of items in the parameter array. @@ -119,7 +119,7 @@ the v2 grammar parser. ### `minItems` -- **Aliases:** `min items`, `min-items`, `min.items` +- **Aliases:** `min items`, `min-items`, `min.items`, `minimum items`, `minimum-items`, `minimumItems` - **Value type:** `integer` - **Legal contexts:** - `param` — Minimum number of items in the parameter array. diff --git a/internal/builders/items/bridge_test.go b/internal/builders/items/bridge_test.go index c282592..52ac75a 100644 --- a/internal/builders/items/bridge_test.go +++ b/internal/builders/items/bridge_test.go @@ -18,10 +18,11 @@ import ( // MockValidationBuilder, runs ApplyBlock, and inspects the // recorded calls. -// parseBodyToBlock returns the package's polymorphic grammar.Block API; the -// ireturn lint is suppressed for the same reason as grammar.Parser.ParseAs. +// parseBodyToBlock returns the package's polymorphic grammar.Block +// API. Used only in tests to synthesise Block values for dispatch +// verification. // -//nolint:ireturn +//nolint:ireturn // returning grammar.Block matches the package's polymorphic API by design func parseBodyToBlock(t *testing.T, body string) grammar.Block { t.Helper() p := grammar.NewParser(token.NewFileSet()) diff --git a/internal/builders/schema/bridge.go b/internal/builders/schema/bridge.go index e749a43..2b8f6ea 100644 --- a/internal/builders/schema/bridge.go +++ b/internal/builders/schema/bridge.go @@ -7,6 +7,8 @@ import ( "go/ast" "github.com/go-openapi/codescan/internal/builders/items" + "github.com/go-openapi/codescan/internal/ifaces" + "github.com/go-openapi/codescan/internal/parsers" "github.com/go-openapi/codescan/internal/parsers/grammar" oaispec "github.com/go-openapi/spec" ) @@ -62,43 +64,324 @@ func collectItemsLevels(expr ast.Expr, schemaItems *oaispec.SchemaOrArray, level } } -// applyItemsBridge parses fld.Doc through the grammar parser and -// dispatches every items-level validation property into the matching -// schema level via items.ApplyBlock. Called only when the scan's -// UseGrammarParser flag is set. +// schemaBlockTargets bundles the write surfaces used by +// applySchemaBlock. The same schema pointer may fill both fields when +// there is no distinct enclosing type (e.g., the top-level model +// declaration doc). +type schemaBlockTargets struct { + // enclosing is the schema that owns the current property — writes + // for `required:` / `discriminator:` target its Required slice / + // Discriminator field. + enclosing *oaispec.Schema + // ps is the schema describing the current property itself — writes + // for numeric/string validations, readOnly, extensions, etc. + ps *oaispec.Schema + // name is the property's JSON name inside enclosing — used to + // key required/discriminator. Empty when the bridge applies to a + // top-level declaration (no enclosing index). + name string +} + +// applySchemaBlock dispatches every level-0 Property in b into the +// appropriate write target. It is the grammar-side replacement for the +// union of schemaTaggers + enum/required/readOnly/discriminator/ +// YAMLExtensionsBlock parsers. // -// Shadow semantics for P5.1b: the legacy regex itemsTaggers still -// run (they also claim items-prefix lines away from the description, -// so dropping them would cause `items.pattern:` bodies to leak into -// prose). Both paths write to the same ValidationBuilder target with -// the same values, so the second write is a no-op; this exercises the -// grammar path end-to-end without diverging from v1. Subsequent -// migration steps will drop the legacy path as schema-level keywords -// and description-claim behavior move to the grammar side. +// Level-0 properties (ItemsDepth == 0) go through this dispatch; +// level-≥1 properties are handled by items.ApplyBlock invoked per +// nesting level by the caller. // -// No-op when fld is nil, the field type is not an array literal, or -// the schema has no items sub-tree — matching the legacy guard in -// createParser that only invokes parseArrayTypes for *ast.ArrayType -// fields. -func (s *Builder) applyItemsBridge(fld *ast.Field, ps *oaispec.Schema) { - if fld == nil || fld.Doc == nil || ps == nil { +// Keyword coverage and semantics mirror schemaTaggers in +// internal/builders/schema/taggers.go. See +// .claude/plans/p5.1b-schema-walkthrough.md §3 for the full mapping +// table. +func applySchemaBlock(b grammar.Block, t schemaBlockTargets) { + scheme := schemeFromPS(t.ps) + valid := schemaValidations{t.ps} + + for p := range b.Properties() { + if p.ItemsDepth != 0 { + continue + } + dispatchSchemaKeyword(p, t, valid, scheme) + } + + for ext := range b.Extensions() { + if !parsers.IsAllowedExtension(ext.Name) { + // Matches legacy schemaVendorExtensibleSetter: unknown + // x-* names were rejected with an error. At the grammar + // layer we preserve parity by silently skipping — + // the grammar parser already emitted a diagnostic. + continue + } + t.ps.AddExtension(ext.Name, ext.Value) + } +} + +func dispatchSchemaKeyword(p grammar.Property, t schemaBlockTargets, valid schemaValidations, scheme *oaispec.SimpleSchema) { + if dispatchNumericValidation(p, valid) { + return + } + if dispatchIntegerValidation(p, valid) { + return + } + if dispatchStringOrEnum(p, valid, scheme) { + return + } + dispatchFlagValidation(p, t, valid) + // Unrecognized keywords fall through silently. The grammar parser + // already emitted a context-validity diagnostic when the keyword + // was not legal here. `in:` is a match-only directive (see legacy + // NewMatchIn); grammar's line classification already excludes it + // from description prose, no write needed. +} + +func dispatchNumericValidation(p grammar.Property, valid schemaValidations) bool { + if p.Typed.Type != grammar.ValueNumber { + return false + } + switch p.Keyword.Name { + case "maximum": + valid.SetMaximum(p.Typed.Number, p.Typed.Op == "<") + case "minimum": + valid.SetMinimum(p.Typed.Number, p.Typed.Op == ">") + case "multipleOf": + valid.SetMultipleOf(p.Typed.Number) + default: + return false + } + return true +} + +func dispatchIntegerValidation(p grammar.Property, valid schemaValidations) bool { + if p.Typed.Type != grammar.ValueInteger { + return false + } + switch p.Keyword.Name { + case "minLength": + valid.SetMinLength(p.Typed.Integer) + case "maxLength": + valid.SetMaxLength(p.Typed.Integer) + case "minItems": + valid.SetMinItems(p.Typed.Integer) + case "maxItems": + valid.SetMaxItems(p.Typed.Integer) + default: + return false + } + return true +} + +// dispatchStringOrEnum handles pattern/enum/default/example — the +// four keywords whose value is consumed as a raw string or resolved +// against the target scheme rather than a pre-typed primitive. +func dispatchStringOrEnum(p grammar.Property, valid schemaValidations, scheme *oaispec.SimpleSchema) bool { + switch p.Keyword.Name { + case "pattern": + valid.SetPattern(p.Value) + case "enum": + // Parity-first: route through v1's ParseEnum. Switching to + // internal/parsers/enum.Parse is a post-migration quirk-fix + // (see .claude/plans/workshops/w2-enum.md §2.6). + valid.SetEnum(p.Value) + case "default": + if v, err := parsers.ParseValueFromSchema(p.Value, scheme); err == nil { + valid.SetDefault(v) + } + case "example": + if v, err := parsers.ParseValueFromSchema(p.Value, scheme); err == nil { + valid.SetExample(v) + } + default: + return false + } + return true +} + +// dispatchFlagValidation handles unique/required/readOnly/discriminator +// — boolean-typed keywords. required/discriminator key on the property +// name and write to the enclosing schema; unique/readOnly write to ps. +func dispatchFlagValidation(p grammar.Property, t schemaBlockTargets, valid schemaValidations) { + if p.Typed.Type != grammar.ValueBoolean { + return + } + switch p.Keyword.Name { + case "unique": + valid.SetUnique(p.Typed.Boolean) + case "readOnly": + t.ps.ReadOnly = p.Typed.Boolean + case "required": + if t.name != "" { + setRequired(t.enclosing, t.name, p.Typed.Boolean) + } + case "discriminator": + if t.name != "" { + setDiscriminator(t.enclosing, t.name, p.Typed.Boolean) + } + } +} + +// setRequired adds or removes name from the enclosing schema's +// Required slice. Mirrors parsers.SetRequiredSchema. +func setRequired(enclosing *oaispec.Schema, name string, required bool) { + if enclosing == nil { + return + } + midx := -1 + for i, nm := range enclosing.Required { + if nm == name { + midx = i + break + } + } + if required { + if midx < 0 { + enclosing.Required = append(enclosing.Required, name) + } return } - arrayType, ok := fld.Type.(*ast.ArrayType) - if !ok { + if midx >= 0 { + enclosing.Required = append(enclosing.Required[:midx], enclosing.Required[midx+1:]...) + } +} + +// setDiscriminator writes name to enclosing.Discriminator when +// required=true, or clears it when required=false and the current +// value matches. Mirrors parsers.SetDiscriminator. +func setDiscriminator(enclosing *oaispec.Schema, name string, required bool) { + if enclosing == nil { return } - if !s.ctx.UseGrammarParser() { + if required { + enclosing.Discriminator = name return } + if enclosing.Discriminator == name { + enclosing.Discriminator = "" + } +} + +// schemeFromPS builds the SimpleSchema that legacy NewSetDefault / +// NewSetExample take at construction time, derived from the already- +// populated ps.Type written by buildFromType before the comment +// dispatch. +// +// Quirk-preserving parity: v1 constructs the scheme with +// `Type: string(ps.Type.MarshalJSON())` and deliberately leaves +// Format empty. SimpleSchema.TypeName() returns Format when set, +// which would flip dispatch from the "number"/"integer" cases to +// format-specific strings ("float", "int32") that ParseValueFromSchema +// doesn't recognize. The pre-migration quirk is to ignore Format +// here; mirroring it keeps legacy fixtures (e.g., a float32 field +// with `default: 1.5`) parsing correctly. +// +// The MarshalJSON-derived Type contains JSON quote characters around +// the string, which ParseValueFromSchema strips via +// strings.Trim(..., `"`). Our version passes the unquoted token +// directly; the strip is a no-op in that path. +func schemeFromPS(ps *oaispec.Schema) *oaispec.SimpleSchema { + if ps == nil { + return nil + } + var typ string + if len(ps.Type) > 0 { + typ = ps.Type[0] + } + return &oaispec.SimpleSchema{Type: typ} +} + +// --- orchestrators invoked from schema.go call sites ---------------- - targets := collectItemsLevels(arrayType.Elt, ps.Items, 1) - if len(targets) == 0 { +// applyBlockToField is the grammar-path counterpart of +// `sp := s.createParser(...); sp.Parse(afld.Doc)` for a struct field +// or interface method. It parses the doc once, writes the description +// from the grammar's raw prose lines (v1 parity: line-preserving +// "\n" join, not paragraph-joined), dispatches schema-level +// properties, and recurses into items levels. +// +// When ps.Ref is set and DescWithRef is false, mirrors legacy +// refSchemaTaggers by only dispatching `required:`. +func (s *Builder) applyBlockToField(afld *ast.Field, enclosing *oaispec.Schema, ps *oaispec.Schema, name string) { + block := grammar.NewParser(s.decl.Pkg.Fset).Parse(afld.Doc) + + // $ref-mode: only `required:` applies, matching refSchemaTaggers. + if ps.Ref.String() != "" && !s.ctx.DescWithRef() { + for p := range block.Properties() { + if p.Keyword.Name == "required" && p.ItemsDepth == 0 && p.Typed.Type == grammar.ValueBoolean { + setRequired(enclosing, name, p.Typed.Boolean) + } + } return } - block := grammar.NewParser(s.decl.Pkg.Fset).Parse(fld.Doc) - for _, tgt := range targets { - items.ApplyBlock(block, schemaValidations{tgt.schema}, tgt.level) + // Field-level calls have no WithSetTitle callback in v1 — the + // entire prose header is the description. Legacy output is + // JoinDropLast("\n", header); enum-desc extension suffix is + // appended last. + ps.Description = parsers.JoinDropLast(block.ProseLines()) + if enumDesc := parsers.GetEnumDesc(ps.Extensions); enumDesc != "" { + if ps.Description != "" { + ps.Description += "\n" + } + ps.Description += enumDesc + } + + applySchemaBlock(block, schemaBlockTargets{ + enclosing: enclosing, + ps: ps, + name: name, + }) + + // items-level validation dispatch, mirroring parseArrayTypes' + // recursion. Only applies when the field type is written as an + // array literal — named/alias array types opt out (parity). + if arrayType, ok := afld.Type.(*ast.ArrayType); ok { + for _, tgt := range collectItemsLevels(arrayType.Elt, ps.Items, 1) { + items.ApplyBlock(block, schemaValidations{tgt.schema}, tgt.level) + } } } + +// applyBlockToDecl is the grammar-path counterpart of the buildFromDecl +// SectionedParser call. Drives title/description and schema-level +// dispatch for a top-level model declaration doc. Preserves v1's +// title-vs-description split heuristics (first blank line splits, or +// punctuation/markdown-heading on the first line, otherwise all prose +// is description). +// +// Returns true when the block's primary annotation is swagger:ignore +// — the caller short-circuits further building, matching legacy +// sp.Ignored() semantics (first-annotation-wins: swagger:ignore must +// appear before any other swagger:* line to take effect). +// +// required/discriminator writes are no-ops at the declaration level +// because applySchemaBlock requires a non-empty property name. +func (s *Builder) applyBlockToDecl(schema *oaispec.Schema) (ignored bool) { + block := grammar.NewParser(s.decl.Pkg.Fset).Parse(s.decl.Comments) + + if block.AnnotationKind() == grammar.AnnIgnore { + return true + } + + title, desc := parsers.CollectScannerTitleDescription(block.ProseLines()) + schema.Title = parsers.JoinDropLast(title) + schema.Description = parsers.JoinDropLast(desc) + if enumDesc := parsers.GetEnumDesc(schema.Extensions); enumDesc != "" { + if schema.Description != "" { + schema.Description += "\n" + } + schema.Description += enumDesc + } + + applySchemaBlock(block, schemaBlockTargets{ + enclosing: schema, + ps: schema, + name: "", // no property index at the declaration level + }) + return false +} + +// Compile-time assertion: schemaValidations satisfies +// ifaces.ValidationBuilder, the target type used by both the schema +// bridge and the items bridge. +var _ ifaces.ValidationBuilder = schemaValidations{} diff --git a/internal/builders/schema/bridge_test.go b/internal/builders/schema/bridge_test.go index c9b9366..cfcd622 100644 --- a/internal/builders/schema/bridge_test.go +++ b/internal/builders/schema/bridge_test.go @@ -6,14 +6,15 @@ package schema import ( "go/ast" "go/parser" + "go/token" "testing" + "github.com/go-openapi/codescan/internal/parsers/grammar" oaispec "github.com/go-openapi/spec" ) -// fieldType parses a Go type expression like `[]string` or `[][]int` -// via go/parser and returns the parsed ast.Expr. Helper for the -// collectItemsLevels walk tests below. +// ---------- collectItemsLevels --------------------------------------- + func fieldType(t *testing.T, expr string) ast.Expr { t.Helper() e, err := parser.ParseExpr(expr) @@ -23,8 +24,6 @@ func fieldType(t *testing.T, expr string) ast.Expr { return e } -// arrayTypeElt returns the Elt of an ArrayType — the same entry point -// createParser uses before invoking parseArrayTypes / collectItemsLevels. func arrayTypeElt(t *testing.T, expr string) ast.Expr { t.Helper() at, ok := fieldType(t, expr).(*ast.ArrayType) @@ -34,9 +33,6 @@ func arrayTypeElt(t *testing.T, expr string) ast.Expr { return at.Elt } -// newItemsChain returns a SchemaOrArray chain deep enough to hold -// `depth` items levels, mirroring the shape buildFromType produces -// when it walks a slice-of-slice-of-... type via Typable.Items(). func newItemsChain(depth int) *oaispec.SchemaOrArray { if depth <= 0 { return nil @@ -51,7 +47,6 @@ func newItemsChain(depth int) *oaispec.SchemaOrArray { } func TestCollectItemsLevelsFlatSlice(t *testing.T) { - // []string — one level of items validations (grammar level 1). items := newItemsChain(1) got := collectItemsLevels(arrayTypeElt(t, "[]string"), items, 1) if len(got) != 1 || got[0].level != 1 || got[0].schema != items.Schema { @@ -60,24 +55,20 @@ func TestCollectItemsLevelsFlatSlice(t *testing.T) { } func TestCollectItemsLevelsNestedSlice(t *testing.T) { - // [][]string — two levels: grammar 1 (outer element slice) and - // grammar 2 (inner scalar string registered via the *ast.Ident - // Obj==nil branch). items := newItemsChain(2) got := collectItemsLevels(arrayTypeElt(t, "[][]string"), items, 1) if len(got) != 2 { t.Fatalf("[][]string: want 2 levels, got %d (%+v)", len(got), got) } if got[0].level != 1 || got[0].schema != items.Schema { - t.Errorf("[][]string level 1: got %+v", got[0]) + t.Errorf("level 1: got %+v", got[0]) } if got[1].level != 2 || got[1].schema != items.Schema.Items.Schema { - t.Errorf("[][]string level 2: got %+v", got[1]) + t.Errorf("level 2: got %+v", got[1]) } } func TestCollectItemsLevelsPointerElt(t *testing.T) { - // []*string — StarExpr unwraps without advancing level. items := newItemsChain(1) got := collectItemsLevels(arrayTypeElt(t, "[]*string"), items, 1) if len(got) != 1 || got[0].level != 1 { @@ -86,22 +77,16 @@ func TestCollectItemsLevelsPointerElt(t *testing.T) { } func TestCollectItemsLevelsNamedElt(t *testing.T) { - // []Foo — Ident with Obj set (resolved by go/parser to a local - // scope binding). parser.ParseExpr does NOT resolve scope, so Obj - // is nil here; we simulate the "Obj != nil" case by synthesising. items := newItemsChain(1) ident := &ast.Ident{Name: "Foo", Obj: ast.NewObj(ast.Typ, "Foo")} got := collectItemsLevels(ident, items, 1) - // Obj != nil → skip registration at this level; recursion advances - // to items.Schema.Items which is nil → terminates → empty result. if len(got) != 0 { t.Errorf("named ident: want no levels, got %+v", got) } } func TestCollectItemsLevelsStructElt(t *testing.T) { - // []struct{X int} — StructType terminates without registering. items := newItemsChain(1) got := collectItemsLevels(arrayTypeElt(t, "[]struct{X int}"), items, 1) if len(got) != 0 { @@ -110,7 +95,6 @@ func TestCollectItemsLevelsStructElt(t *testing.T) { } func TestCollectItemsLevelsNilItems(t *testing.T) { - // nil items → no panic, empty result. got := collectItemsLevels(arrayTypeElt(t, "[]string"), nil, 1) if len(got) != 0 { t.Errorf("nil items: want empty, got %+v", got) @@ -118,21 +102,193 @@ func TestCollectItemsLevelsNilItems(t *testing.T) { var empty oaispec.SchemaOrArray got = collectItemsLevels(arrayTypeElt(t, "[]string"), &empty, 1) if len(got) != 0 { - t.Errorf("items with nil Schema: want empty, got %+v", got) + t.Errorf("empty SchemaOrArray: want empty, got %+v", got) + } +} + +// ---------- applySchemaBlock dispatch ---------------------------------- + +// parseSchemaBody synthesises a grammar.Block from a raw body (no +// swagger annotation required) so tests can exercise the keyword +// dispatch without string-formatting a full comment group. +// +//nolint:ireturn // same rationale as items/bridge_test.go +func parseSchemaBody(t *testing.T, body string) grammar.Block { + t.Helper() + p := grammar.NewParser(token.NewFileSet()) + return p.ParseAs(grammar.AnnModel, body, token.Position{Line: 1}) +} + +func TestApplySchemaBlockNumeric(t *testing.T) { + ps := &oaispec.Schema{} + ps.Type = oaispec.StringOrArray{"integer"} + b := parseSchemaBody(t, "maximum: <10\nminimum: >=0\nmultipleOf: 2") + + applySchemaBlock(b, schemaBlockTargets{enclosing: &oaispec.Schema{}, ps: ps, name: "x"}) + + if ps.Maximum == nil || *ps.Maximum != 10 || !ps.ExclusiveMaximum { + t.Errorf("maximum: got (%v, %v), want (10, true)", ps.Maximum, ps.ExclusiveMaximum) + } + if ps.Minimum == nil || *ps.Minimum != 0 || ps.ExclusiveMinimum { + t.Errorf("minimum: got (%v, %v), want (0, false)", ps.Minimum, ps.ExclusiveMinimum) + } + if ps.MultipleOf == nil || *ps.MultipleOf != 2 { + t.Errorf("multipleOf: got %v, want 2", ps.MultipleOf) + } +} + +func TestApplySchemaBlockIntegerAndBoolean(t *testing.T) { + ps := &oaispec.Schema{} + b := parseSchemaBody(t, "minLength: 3\nmaxLength: 10\nminItems: 1\nmaxItems: 100\nunique: true") + + applySchemaBlock(b, schemaBlockTargets{enclosing: &oaispec.Schema{}, ps: ps, name: "x"}) + + if ps.MinLength == nil || *ps.MinLength != 3 { + t.Errorf("minLength: %v", ps.MinLength) + } + if ps.MaxLength == nil || *ps.MaxLength != 10 { + t.Errorf("maxLength: %v", ps.MaxLength) + } + if ps.MinItems == nil || *ps.MinItems != 1 { + t.Errorf("minItems: %v", ps.MinItems) + } + if ps.MaxItems == nil || *ps.MaxItems != 100 { + t.Errorf("maxItems: %v", ps.MaxItems) + } + if !ps.UniqueItems { + t.Errorf("unique: want true") + } +} + +func TestApplySchemaBlockPatternAndEnum(t *testing.T) { + ps := &oaispec.Schema{} + ps.Type = oaispec.StringOrArray{"string"} + b := parseSchemaBody(t, "pattern: ^[a-z]+$\nenum: red, green, blue") + + applySchemaBlock(b, schemaBlockTargets{enclosing: &oaispec.Schema{}, ps: ps, name: "x"}) + + if ps.Pattern != "^[a-z]+$" { + t.Errorf("pattern: %q", ps.Pattern) + } + if len(ps.Enum) != 3 || ps.Enum[0] != "red" || ps.Enum[1] != "green" || ps.Enum[2] != "blue" { + t.Errorf("enum: %v", ps.Enum) + } +} + +func TestApplySchemaBlockDefaultAndExampleIntegerScheme(t *testing.T) { + ps := &oaispec.Schema{} + ps.Type = oaispec.StringOrArray{"integer"} + b := parseSchemaBody(t, "default: 42\nexample: 7") + + applySchemaBlock(b, schemaBlockTargets{enclosing: &oaispec.Schema{}, ps: ps, name: "x"}) + + if ps.Default != 42 { + t.Errorf("default: got %v (%T), want 42", ps.Default, ps.Default) + } + if ps.Example != 7 { + t.Errorf("example: got %v (%T), want 7", ps.Example, ps.Example) } } -// TestApplyItemsBridgeGuards verifies the applyItemsBridge guards -// short-circuit under expected conditions. It builds a minimal Builder -// with a nil ctx-dependent path and exercises the early returns. -func TestApplyItemsBridgeGuards(_ *testing.T) { - var b Builder +func TestApplySchemaBlockRequiredAndDiscriminator(t *testing.T) { + enclosing := &oaispec.Schema{} + ps := &oaispec.Schema{} + b := parseSchemaBody(t, "required: true\ndiscriminator: true") - b.applyItemsBridge(nil, &oaispec.Schema{}) // fld=nil - fld := &ast.Field{Type: &ast.ArrayType{Elt: &ast.Ident{Name: "string"}}} - b.applyItemsBridge(fld, &oaispec.Schema{}) // fld.Doc=nil - fld.Doc = &ast.CommentGroup{List: []*ast.Comment{{Text: "// items.maximum: 5"}}} - b.applyItemsBridge(fld, nil) // ps=nil - fld.Type = &ast.Ident{Name: "string"} - b.applyItemsBridge(fld, &oaispec.Schema{}) // non-array type + applySchemaBlock(b, schemaBlockTargets{enclosing: enclosing, ps: ps, name: "kind"}) + + if len(enclosing.Required) != 1 || enclosing.Required[0] != "kind" { + t.Errorf("required: %v", enclosing.Required) + } + if enclosing.Discriminator != "kind" { + t.Errorf("discriminator: %q", enclosing.Discriminator) + } +} + +func TestApplySchemaBlockRequiredFalseRemoves(t *testing.T) { + enclosing := &oaispec.Schema{} + enclosing.Required = []string{"kind", "other"} + ps := &oaispec.Schema{} + b := parseSchemaBody(t, "required: false") + + applySchemaBlock(b, schemaBlockTargets{enclosing: enclosing, ps: ps, name: "kind"}) + + if len(enclosing.Required) != 1 || enclosing.Required[0] != "other" { + t.Errorf("required false: %v", enclosing.Required) + } +} + +func TestApplySchemaBlockRequiredSkipsOnEmptyName(t *testing.T) { + // Top-level declaration case: name is "", required is a no-op. + enclosing := &oaispec.Schema{} + ps := &oaispec.Schema{} + b := parseSchemaBody(t, "required: true") + + applySchemaBlock(b, schemaBlockTargets{enclosing: enclosing, ps: ps, name: ""}) + + if len(enclosing.Required) != 0 { + t.Errorf("top-level required: want empty, got %v", enclosing.Required) + } +} + +func TestApplySchemaBlockReadOnly(t *testing.T) { + ps := &oaispec.Schema{} + b := parseSchemaBody(t, "readOnly: true") + + applySchemaBlock(b, schemaBlockTargets{enclosing: &oaispec.Schema{}, ps: ps, name: "x"}) + + if !ps.ReadOnly { + t.Errorf("readOnly: want true") + } +} + +func TestApplySchemaBlockItemsDepthSkipped(t *testing.T) { + // Level-0 dispatch must NOT fire on ItemsDepth>=1 properties; + // those belong to items.ApplyBlock. + ps := &oaispec.Schema{} + ps.Type = oaispec.StringOrArray{"integer"} + b := parseSchemaBody(t, "maximum: 5\nitems.maximum: 99") + + applySchemaBlock(b, schemaBlockTargets{enclosing: &oaispec.Schema{}, ps: ps, name: "x"}) + + if ps.Maximum == nil || *ps.Maximum != 5 { + t.Errorf("schema-level maximum: %v", ps.Maximum) + } + // items.maximum at level 1 is invisible to the schema dispatcher. + // The schema's Items isn't populated here (no array type), so the + // non-dispatch is the assertion. +} + +// parseProseBody produces a grammar.Block from prose-only text (no +// swagger annotation), matching the shape of struct-field docstrings +// where the grammar returns an UnboundBlock and populates ProseLines +// from the pre-body tokens. +// +//nolint:ireturn // grammar.Block is the package's polymorphic return. +func parseProseBody(t *testing.T, text string) grammar.Block { + t.Helper() + p := grammar.NewParser(token.NewFileSet()) + return p.ParseText(text, token.Position{Line: 1}) +} + +func TestProseLinesPreservesLineBreaks(t *testing.T) { + // Multi-line paragraph followed by blank and a second paragraph. + b := parseProseBody(t, "First line.\nsecond line.\n\nSecond para.") + got := b.ProseLines() + want := []string{"First line.", "second line.", "", "Second para."} + if !equalStrings(got, want) { + t.Errorf("ProseLines: got %#v, want %#v", got, want) + } +} + +func equalStrings(a, b []string) bool { + if len(a) != len(b) { + return false + } + for i := range a { + if a[i] != b[i] { + return false + } + } + return true } diff --git a/internal/builders/schema/schema.go b/internal/builders/schema/schema.go index 53d4a5c..247ce64 100644 --- a/internal/builders/schema/schema.go +++ b/internal/builders/schema/schema.go @@ -117,9 +117,15 @@ func (s *Builder) interfaceJSONName(goName string) string { return s.interfaceMethodMangler.ToJSONName(goName) } -func (s *Builder) buildFromDecl(_ *scanner.EntityDecl, schema *oaispec.Schema) error { - // analyze doc comment for the model - // This includes parsing "example", "default" and other validation at the top-level declaration. +// parseDeclDoc runs the top-level declaration's comment through the +// configured parser path (grammar bridge when UseGrammarParser is set, +// legacy SectionedParser otherwise) and reports whether the type +// carries a swagger:ignore that should short-circuit the rest of +// buildFromDecl. +func (s *Builder) parseDeclDoc(schema *oaispec.Schema) (ignored bool, err error) { + if s.ctx.UseGrammarParser() { + return s.applyBlockToDecl(schema), nil + } sp := s.createParser("", schema, schema, nil, parsers.WithSetTitle(func(lines []string) { schema.Title = parsers.JoinDropLast(lines) }), parsers.WithSetDescription(func(lines []string) { @@ -130,13 +136,20 @@ func (s *Builder) buildFromDecl(_ *scanner.EntityDecl, schema *oaispec.Schema) e } }), ) - if err := sp.Parse(s.decl.Comments); err != nil { - return err + return false, err } + return sp.Ignored(), nil +} - // if the type is marked to ignore, just return - if sp.Ignored() { +func (s *Builder) buildFromDecl(_ *scanner.EntityDecl, schema *oaispec.Schema) error { + // analyze doc comment for the model + // This includes parsing "example", "default" and other validation at the top-level declaration. + ignored, err := s.parseDeclDoc(schema) + if err != nil { + return err + } + if ignored { return nil } @@ -727,11 +740,14 @@ func (s *Builder) processAnonInterfaceMethod(fld *types.Func, it *types.Interfac ps.Items = nil } - sp := s.createParser(name, schema, &ps, afld) - if err := sp.Parse(afld.Doc); err != nil { - return err + if s.ctx.UseGrammarParser() { + s.applyBlockToField(afld, schema, &ps, name) + } else { + sp := s.createParser(name, schema, &ps, afld) + if err := sp.Parse(afld.Doc); err != nil { + return err + } } - s.applyItemsBridge(afld, &ps) if ps.Ref.String() == "" && name != fld.Name() { ps.AddExtension("x-go-name", fld.Name()) @@ -956,11 +972,14 @@ func (s *Builder) processInterfaceMethod(fld *types.Func, it *types.Interface, d ps.Items = nil } - sp := s.createParser(name, tgt, &ps, afld) - if err := sp.Parse(afld.Doc); err != nil { - return err + if s.ctx.UseGrammarParser() { + s.applyBlockToField(afld, tgt, &ps, name) + } else { + sp := s.createParser(name, tgt, &ps, afld) + if err := sp.Parse(afld.Doc); err != nil { + return err + } } - s.applyItemsBridge(afld, &ps) if ps.Ref.String() == "" && name != fld.Name() { ps.AddExtension("x-go-name", fld.Name()) @@ -1213,11 +1232,14 @@ func (s *Builder) processStructField(fld *types.Var, decl *scanner.EntityDecl, t ps.Items = nil } - sp := s.createParser(name, tgt, &ps, afld) - if err := sp.Parse(afld.Doc); err != nil { - return err + if s.ctx.UseGrammarParser() { + s.applyBlockToField(afld, tgt, &ps, name) + } else { + sp := s.createParser(name, tgt, &ps, afld) + if err := sp.Parse(afld.Doc); err != nil { + return err + } } - s.applyItemsBridge(afld, &ps) if ps.Ref.String() == "" && name != fld.Name() { resolvers.AddExtension(&ps.VendorExtensible, "x-go-name", fld.Name(), s.ctx.SkipExtensions()) diff --git a/internal/parsers/enum.go b/internal/parsers/enum.go index e66ebcb..8372a45 100644 --- a/internal/parsers/enum.go +++ b/internal/parsers/enum.go @@ -49,7 +49,19 @@ func (se *SetEnum) Parse(lines []string) error { return nil } -func parseValueFromSchema(s string, schema *spec.SimpleSchema) (any, error) { +// ParseValueFromSchema converts a raw annotation value to the Go +// representation implied by the target schema's Type/Format. Used by +// default:/example: setters where the annotation body is a primitive +// literal whose meaning depends on the target: `default: 3` becomes +// int(3) against `Type: "integer"`, "3" against `Type: "string"`, and +// so on. JSON-typed targets (`object`, `array`) attempt unmarshal and +// fall back to the raw string on invalid JSON. +// +// A nil schema yields the raw string unchanged. Numeric/boolean +// parsing errors are surfaced to the caller; JSON-parse failures are +// absorbed (documented as a v1 quirk and currently preserved for +// parity). +func ParseValueFromSchema(s string, schema *spec.SimpleSchema) (any, error) { if schema == nil { return s, nil } @@ -90,7 +102,7 @@ func parseEnumOld(val string, s *spec.SimpleSchema) []any { interfaceSlice := make([]any, len(list)) for i, d := range list { d = strings.TrimSpace(d) - v, err := parseValueFromSchema(d, s) + v, err := ParseValueFromSchema(d, s) if err != nil { interfaceSlice[i] = d continue @@ -102,7 +114,7 @@ func parseEnumOld(val string, s *spec.SimpleSchema) []any { } func ParseEnum(val string, s *spec.SimpleSchema) []any { - // obtain the raw elements of the list to latter process them with the parseValueFromSchema + // obtain the raw elements of the list to latter process them with the ParseValueFromSchema var rawElements []json.RawMessage if err := json.Unmarshal([]byte(val), &rawElements); err != nil { log.Print("WARNING: item list for enum is not a valid JSON array, using the old deprecated format") @@ -117,7 +129,7 @@ func ParseEnum(val string, s *spec.SimpleSchema) []any { ds = string(d) } - v, err := parseValueFromSchema(ds, s) + v, err := ParseValueFromSchema(ds, s) if err != nil { interfaceSlice[i] = ds continue diff --git a/internal/parsers/enum_test.go b/internal/parsers/enum_test.go index 14f9288..f1a7692 100644 --- a/internal/parsers/enum_test.go +++ b/internal/parsers/enum_test.go @@ -67,19 +67,19 @@ func TestParseValueFromSchema(t *testing.T) { for _, tc := range tests { t.Run(tc.name, func(t *testing.T) { - got, err := parseValueFromSchema(tc.input, tc.schema) + got, err := ParseValueFromSchema(tc.input, tc.schema) require.NoError(t, err) assert.Equal(t, tc.want, got) }) } t.Run("integer parse error", func(t *testing.T) { - _, err := parseValueFromSchema("not-a-number", &spec.SimpleSchema{Type: "integer"}) + _, err := ParseValueFromSchema("not-a-number", &spec.SimpleSchema{Type: "integer"}) require.Error(t, err) }) t.Run("bool parse error", func(t *testing.T) { - _, err := parseValueFromSchema("maybe", &spec.SimpleSchema{Type: "bool"}) + _, err := ParseValueFromSchema("maybe", &spec.SimpleSchema{Type: "bool"}) require.Error(t, err) }) } diff --git a/internal/parsers/grammar/ast.go b/internal/parsers/grammar/ast.go index 158b841..7de4312 100644 --- a/internal/parsers/grammar/ast.go +++ b/internal/parsers/grammar/ast.go @@ -44,6 +44,15 @@ type Block interface { // "extensions:" block inside this Block. Extensions() iter.Seq[Extension] + // ProseLines returns the raw prose lines that appeared before the + // first structured token (annotation, keyword, YAML fence, …), in + // source order. Blank lines appear as empty strings so consumers + // can reproduce v1's SectionedParser.header — which is + // line-preserving rather than paragraph-joined. Independent of + // Title()/Description(), which apply paragraph-joining for a + // cleaner rendered view. + ProseLines() []string + // Kind returns the top-level annotation kind this Block was // dispatched from (UnboundBlock returns AnnUnknown). Used by // analyzers to type-switch-check without reflection. @@ -256,6 +265,7 @@ type baseBlock struct { pos token.Position title string description string + proseLines []string kind AnnotationKind properties []Property @@ -267,6 +277,7 @@ type baseBlock struct { func (b *baseBlock) Pos() token.Position { return b.pos } func (b *baseBlock) Title() string { return b.title } func (b *baseBlock) Description() string { return b.description } +func (b *baseBlock) ProseLines() []string { return b.proseLines } func (b *baseBlock) Diagnostics() []Diagnostic { return b.diagnostics } func (b *baseBlock) AnnotationKind() AnnotationKind { return b.kind } diff --git a/internal/parsers/grammar/keywords_table.go b/internal/parsers/grammar/keywords_table.go index c3536ed..643fa53 100644 --- a/internal/parsers/grammar/keywords_table.go +++ b/internal/parsers/grammar/keywords_table.go @@ -50,7 +50,8 @@ var keywords = []Keyword{ // --- string-length validations --- keyword("maxLength", - aka("max length", "max-length", "maxLen", "max len", "max-len"), + aka("max length", "max-length", "maxLen", "max len", "max-len", + "maximum length", "maximum-length", "maximumLength", "maximum len", "maximum-len"), asInteger(), inParam("Maximum length of the string parameter."), inHeader("Maximum length of the header."), @@ -58,7 +59,8 @@ var keywords = []Keyword{ inItems("Maximum length of each string item."), ), keyword("minLength", - aka("min length", "min-length", "minLen", "min len", "min-len"), + aka("min length", "min-length", "minLen", "min len", "min-len", + "minimum length", "minimum-length", "minimumLength", "minimum len", "minimum-len"), asInteger(), inParam("Minimum length of the string parameter."), inHeader("Minimum length of the header."), @@ -76,7 +78,8 @@ var keywords = []Keyword{ // --- array validations --- keyword("maxItems", - aka("max items", "max-items", "max.items"), + aka("max items", "max-items", "max.items", + "maximum items", "maximum-items", "maximumItems"), asInteger(), inParam("Maximum number of items in the parameter array."), inHeader("Maximum number of items in the header array."), @@ -84,7 +87,8 @@ var keywords = []Keyword{ inItems("Maximum number of items at this nesting level."), ), keyword("minItems", - aka("min items", "min-items", "min.items"), + aka("min items", "min-items", "min.items", + "minimum items", "minimum-items", "minimumItems"), asInteger(), inParam("Minimum number of items in the parameter array."), inHeader("Minimum number of items in the header array."), diff --git a/internal/parsers/grammar/parser.go b/internal/parsers/grammar/parser.go index 66ef3ea..8e4e756 100644 --- a/internal/parsers/grammar/parser.go +++ b/internal/parsers/grammar/parser.go @@ -423,13 +423,16 @@ func firstArg(args []string) string { // parseTitleDesc extracts the title (first paragraph) and description // (remaining paragraphs, joined by blank lines) from the tokens that -// appear before the annotation. +// appear before the annotation. It also accumulates the raw prose +// lines (source-order, with blank separators preserved) so consumers +// can reproduce v1's SectionedParser.header — see baseBlock.ProseLines. // // Keyword/YAML/block-head tokens appearing pre-annotation are unusual // but not fatal — they are ignored with no diagnostic for v1 parity. func (p *parseState) parseTitleDesc(base *baseBlock, pre []Token) { var paragraphs []string var current []string + var proseLines []string flush := func() { if len(current) > 0 { @@ -442,8 +445,10 @@ func (p *parseState) parseTitleDesc(base *baseBlock, pre []Token) { switch t.Kind { case TokenBlank: flush() + proseLines = append(proseLines, "") case TokenText: current = append(current, t.Text) + proseLines = append(proseLines, t.Text) case TokenEOF, TokenAnnotation, TokenKeywordValue, TokenKeywordBlockHead, @@ -455,6 +460,8 @@ func (p *parseState) parseTitleDesc(base *baseBlock, pre []Token) { } flush() + base.proseLines = proseLines + if len(paragraphs) > 0 { base.title = paragraphs[0] } diff --git a/internal/parsers/parsers_helpers.go b/internal/parsers/parsers_helpers.go index 3295c41..7a9981c 100644 --- a/internal/parsers/parsers_helpers.go +++ b/internal/parsers/parsers_helpers.go @@ -7,6 +7,20 @@ import ( "strings" ) +// CollectScannerTitleDescription splits header lines (free-form prose +// appearing before the first recognized tag in a comment block) into +// title and description slices, following the legacy SectionedParser +// heuristics: a blank-line separator splits after cleanup; absent +// that, a first line ending in punctuation or matching a markdown +// heading prefix is promoted to title; otherwise everything is +// description. +// +// Exposed for grammar-side bridges that reuse the same split over +// grammar.Block.ProseLines(). +func CollectScannerTitleDescription(headers []string) (title, desc []string) { + return collectScannerTitleDescription(headers) +} + // a shared function that can be used to split given headers // into a title and description. func collectScannerTitleDescription(headers []string) (title, desc []string) { diff --git a/internal/parsers/validations.go b/internal/parsers/validations.go index 1625d9e..f2c5ae2 100644 --- a/internal/parsers/validations.go +++ b/internal/parsers/validations.go @@ -524,7 +524,7 @@ func (sd *SetDefault) Parse(lines []string) error { matches := sd.rx.FindStringSubmatch(lines[0]) if len(matches) > 1 && len(matches[1]) > 0 { - d, err := parseValueFromSchema(matches[1], sd.scheme) + d, err := ParseValueFromSchema(matches[1], sd.scheme) if err != nil { return err } @@ -564,7 +564,7 @@ func (se *SetExample) Parse(lines []string) error { matches := se.rx.FindStringSubmatch(lines[0]) if len(matches) > 1 && len(matches[1]) > 0 { - d, err := parseValueFromSchema(matches[1], se.scheme) + d, err := ParseValueFromSchema(matches[1], se.scheme) if err != nil { return err } From f5aebc164029d45d510c7a7484074a7aa6e11724 Mon Sep 17 00:00:00 2001 From: Frederic BIDON Date: Wed, 22 Apr 2026 16:08:42 +0200 Subject: [PATCH 38/46] =?UTF-8?q?feat(builders):=20P5.2=20step=206.paramet?= =?UTF-8?q?ers=20=E2=80=94=20parameters=20grammar=20bridge?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Migrate the parameters builder to the grammar parser under Options.UseGrammarParser. When the flag is set, processParamField routes comment parsing through ParameterBuilder.applyBlockToField instead of setupParamTaggers + SectionedParser. Legacy path stays intact on the flag-off side until P6 cutover. Keyword surface mirrors baseInlineParamTaggers: - numeric (maximum, minimum, multipleOf) - integer (minLength, maxLength, minItems, maxItems) - string / scheme-aware (pattern, enum, default, example — default/example pass through parsers.ParseValueFromSchema against param.SimpleSchema so `default: 42` on an integer parameter lands as int(42) not "42") - flags (unique, required → param.Required, collectionFormat) - Extensions via block.Extensions() for flat `x-foo: value` entries Differences from the schema bridge: - `required:` writes to param.Required directly (not an enclosing schema's Required slice). - No readOnly/discriminator (parameters don't support them). - collectionFormat IS in the base dispatch (parameters support arrays; schema level doesn't). - `in:` is not dispatched by the bridge — it's already resolved upstream by parsers.ParamLocation before the bridge runs, and the grammar lexer classifies it as TokenKeywordValue so it never reaches the prose accumulator. items.NewValidations exposed to let the bridge hand an items chain level to items.ApplyBlock without reaching into the unexported items.Validations{current} field. Items-level dispatch mirrors items.ParseArrayTypes via a new collectParamItemsLevels walk (*oaispec.Items chain, distinct from the schema side's *oaispec.SchemaOrArray chain). Not yet migrated: YAML-fenced (`--- ... ---`) extensions blocks. No parity fixture exercises them for parameters; they land alongside the YAML-body parser move in a later commit. Tests: - internal/builders/parameters/bridge_test.go — direct dispatch unit tests for numeric, integer, flags, pattern, enum, default/example scheme-aware parsing, collectionFormat, required true/false override, and the collectParamItemsLevels walk across flat/nested/nil shapes. TestParity stays green across all 22 fixtures. Full suite green. See: - .claude/plans/p5.1b-schema-walkthrough.md (redesign rationale, applies uniformly to every subsequent builder) - .claude/plans/p5-builder-migrations.md §4 (step template) Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Frederic BIDON --- internal/builders/items/validations.go | 8 + internal/builders/parameters/bridge.go | 216 ++++++++++++++++++++ internal/builders/parameters/bridge_test.go | 183 +++++++++++++++++ internal/builders/parameters/parameters.go | 38 ++-- 4 files changed, 428 insertions(+), 17 deletions(-) create mode 100644 internal/builders/parameters/bridge.go create mode 100644 internal/builders/parameters/bridge_test.go diff --git a/internal/builders/items/validations.go b/internal/builders/items/validations.go index 515ff34..c91f985 100644 --- a/internal/builders/items/validations.go +++ b/internal/builders/items/validations.go @@ -12,6 +12,14 @@ type Validations struct { current *oaispec.Items } +// NewValidations wraps an *oaispec.Items as a ValidationBuilder / +// OperationValidationBuilder target. Used by the grammar bridge +// (e.g. parameters.applyBlockToField) to hand an items chain level +// to items.ApplyBlock. +func NewValidations(it *oaispec.Items) Validations { + return Validations{current: it} +} + func (sv Validations) SetMaximum(val float64, exclusive bool) { sv.current.Maximum = &val sv.current.ExclusiveMaximum = exclusive diff --git a/internal/builders/parameters/bridge.go b/internal/builders/parameters/bridge.go new file mode 100644 index 0000000..bafa52b --- /dev/null +++ b/internal/builders/parameters/bridge.go @@ -0,0 +1,216 @@ +// SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers +// SPDX-License-Identifier: Apache-2.0 + +package parameters + +import ( + "go/ast" + + "github.com/go-openapi/codescan/internal/builders/items" + "github.com/go-openapi/codescan/internal/parsers" + "github.com/go-openapi/codescan/internal/parsers/grammar" + oaispec "github.com/go-openapi/spec" +) + +// paramItemsLevelTarget pairs a nesting depth (1-indexed, matching +// grammar.Property.ItemsDepth) with the *oaispec.Items to write +// items-level validations into. Parameter items form a chain via +// item.Items rather than schema's Items.Schema.Items. +type paramItemsLevelTarget struct { + level int + items *oaispec.Items +} + +// collectParamItemsLevels mirrors items.ParseArrayTypes but collects +// (level, items) targets instead of building regex-based TagParsers. +// It is the grammar-path counterpart for parameter items dispatch. +// +// Starting level is 1 — `items.maximum:` has ItemsDepth=1 in the +// grammar lexer. Legacy v1 level=0 convention is re-indexed here. +func collectParamItemsLevels(expr ast.Expr, it *oaispec.Items, level int) []paramItemsLevelTarget { + if it == nil { + return nil + } + + here := paramItemsLevelTarget{level: level, items: it} + + switch e := expr.(type) { + case *ast.ArrayType: + rest := collectParamItemsLevels(e.Elt, it.Items, level+1) + out := make([]paramItemsLevelTarget, 0, 1+len(rest)) + return append(append(out, here), rest...) + + case *ast.Ident: + rest := collectParamItemsLevels(expr, it.Items, level+1) + if e.Obj == nil { + out := make([]paramItemsLevelTarget, 0, 1+len(rest)) + return append(append(out, here), rest...) + } + return rest + + case *ast.StarExpr: + return collectParamItemsLevels(e.X, it, level) + + case *ast.SelectorExpr: + return []paramItemsLevelTarget{here} + + case *ast.StructType, *ast.InterfaceType, *ast.MapType: + return nil + + default: + return nil + } +} + +// applyBlockToField parses afld.Doc through the grammar parser and +// dispatches description, validations, required flag, extensions, +// and items-level validations into param. Replaces +// setupParamTaggers + SectionedParser under UseGrammarParser. +// +// Notes: +// - `in:` is resolved upstream (via parsers.ParamLocation on the +// raw comment group) before this bridge runs; grammar's lexer +// also classifies it as TokenKeywordValue so it never reaches +// the description accumulator. No dispatch here. +// - Extension handling uses grammar's flat `block.Extensions()` +// iterator, which captures `x-foo: value` lines inside an +// `extensions:` block. YAML-fenced (`--- ... ---`) extension +// blocks are not yet supported on the grammar path — no parity +// fixture exercises them, and the follow-up commit that moves +// YAML-body parsing through internal/parsers/yaml will also +// plug this gap. +func (p *ParameterBuilder) applyBlockToField(afld *ast.Field, param *oaispec.Parameter) { + block := grammar.NewParser(p.decl.Pkg.Fset).Parse(afld.Doc) + + // Description: raw-line JoinDropLast for v1 parity (line-preserving + // `"\n"` join), enum-desc extension suffix appended. + param.Description = parsers.JoinDropLast(block.ProseLines()) + if enumDesc := parsers.GetEnumDesc(param.Extensions); enumDesc != "" { + if param.Description != "" { + param.Description += "\n" + } + param.Description += enumDesc + } + + scheme := ¶m.SimpleSchema + valid := paramValidations{param} + + for prop := range block.Properties() { + if prop.ItemsDepth != 0 { + continue + } + dispatchParamKeyword(prop, param, valid, scheme) + } + + for ext := range block.Extensions() { + if !parsers.IsAllowedExtension(ext.Name) { + continue + } + param.AddExtension(ext.Name, ext.Value) + } + + // items-level validation dispatch, mirroring items.ParseArrayTypes' + // recursion. Only applies when the field type is written as an + // array literal — named/alias array types opt out (parity). + if arrayType, ok := afld.Type.(*ast.ArrayType); ok { + for _, tgt := range collectParamItemsLevels(arrayType.Elt, param.Items, 1) { + items.ApplyBlock(block, items.NewValidations(tgt.items), tgt.level) + } + } +} + +// dispatchParamKeyword routes a level-0 Property into paramValidations +// or the raw param target. Covers the same keyword surface as v1's +// baseInlineParamTaggers minus `in:` (upstream-resolved) and the +// Extensions block (handled via block.Extensions() by the caller). +func dispatchParamKeyword(p grammar.Property, param *oaispec.Parameter, valid paramValidations, scheme *oaispec.SimpleSchema) { + if dispatchNumericValidation(p, valid) { + return + } + if dispatchIntegerValidation(p, valid) { + return + } + if dispatchStringOrEnum(p, valid, scheme) { + return + } + dispatchParamFlags(p, param, valid) +} + +func dispatchNumericValidation(p grammar.Property, valid paramValidations) bool { + if p.Typed.Type != grammar.ValueNumber { + return false + } + switch p.Keyword.Name { + case "maximum": + valid.SetMaximum(p.Typed.Number, p.Typed.Op == "<") + case "minimum": + valid.SetMinimum(p.Typed.Number, p.Typed.Op == ">") + case "multipleOf": + valid.SetMultipleOf(p.Typed.Number) + default: + return false + } + return true +} + +func dispatchIntegerValidation(p grammar.Property, valid paramValidations) bool { + if p.Typed.Type != grammar.ValueInteger { + return false + } + switch p.Keyword.Name { + case "minLength": + valid.SetMinLength(p.Typed.Integer) + case "maxLength": + valid.SetMaxLength(p.Typed.Integer) + case "minItems": + valid.SetMinItems(p.Typed.Integer) + case "maxItems": + valid.SetMaxItems(p.Typed.Integer) + default: + return false + } + return true +} + +// dispatchStringOrEnum handles pattern/enum/default/example — +// keywords whose value is consumed as a raw string or resolved +// against the target's SimpleSchema. +func dispatchStringOrEnum(p grammar.Property, valid paramValidations, scheme *oaispec.SimpleSchema) bool { + switch p.Keyword.Name { + case "pattern": + valid.SetPattern(p.Value) + case "enum": + valid.SetEnum(p.Value) + case "default": + if v, err := parsers.ParseValueFromSchema(p.Value, scheme); err == nil { + valid.SetDefault(v) + } + case "example": + if v, err := parsers.ParseValueFromSchema(p.Value, scheme); err == nil { + valid.SetExample(v) + } + default: + return false + } + return true +} + +// dispatchParamFlags handles unique/required/collectionFormat — the +// remaining boolean and string-enum keywords that write to the +// parameter target. +func dispatchParamFlags(p grammar.Property, param *oaispec.Parameter, valid paramValidations) { + switch p.Keyword.Name { + case "unique": + if p.Typed.Type == grammar.ValueBoolean { + valid.SetUnique(p.Typed.Boolean) + } + case "required": + if p.Typed.Type == grammar.ValueBoolean { + param.Required = p.Typed.Boolean + } + case "collectionFormat": + if p.Typed.Type == grammar.ValueStringEnum { + valid.SetCollectionFormat(p.Typed.String) + } + } +} diff --git a/internal/builders/parameters/bridge_test.go b/internal/builders/parameters/bridge_test.go new file mode 100644 index 0000000..8410f26 --- /dev/null +++ b/internal/builders/parameters/bridge_test.go @@ -0,0 +1,183 @@ +// SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers +// SPDX-License-Identifier: Apache-2.0 + +package parameters + +import ( + "go/ast" + "go/parser" + "go/token" + "testing" + + "github.com/go-openapi/codescan/internal/parsers/grammar" + oaispec "github.com/go-openapi/spec" +) + +// ---------- collectParamItemsLevels ---------- + +func fieldType(t *testing.T, expr string) ast.Expr { + t.Helper() + e, err := parser.ParseExpr(expr) + if err != nil { + t.Fatalf("parseExpr %q: %v", expr, err) + } + return e +} + +func arrayTypeElt(t *testing.T, expr string) ast.Expr { + t.Helper() + at, ok := fieldType(t, expr).(*ast.ArrayType) + if !ok { + t.Fatalf("expected ArrayType for %q", expr) + } + return at.Elt +} + +func newItemsChain(depth int) *oaispec.Items { + if depth <= 0 { + return nil + } + root := new(oaispec.Items) + cur := root + for range depth - 1 { + cur.Items = new(oaispec.Items) + cur = cur.Items + } + return root +} + +func TestCollectParamItemsLevelsFlatSlice(t *testing.T) { + it := newItemsChain(1) + got := collectParamItemsLevels(arrayTypeElt(t, "[]string"), it, 1) + if len(got) != 1 || got[0].level != 1 || got[0].items != it { + t.Errorf("[]string: got %+v", got) + } +} + +func TestCollectParamItemsLevelsNestedSlice(t *testing.T) { + it := newItemsChain(2) + got := collectParamItemsLevels(arrayTypeElt(t, "[][]string"), it, 1) + if len(got) != 2 { + t.Fatalf("[][]string: got %d entries", len(got)) + } + if got[0].level != 1 || got[0].items != it { + t.Errorf("level 1: %+v", got[0]) + } + if got[1].level != 2 || got[1].items != it.Items { + t.Errorf("level 2: %+v", got[1]) + } +} + +func TestCollectParamItemsLevelsNilItems(t *testing.T) { + got := collectParamItemsLevels(arrayTypeElt(t, "[]string"), nil, 1) + if len(got) != 0 { + t.Errorf("nil items: got %+v", got) + } +} + +// ---------- dispatchParamKeyword ---------- + +//nolint:ireturn // grammar.Block is the package's polymorphic return. +func parseParamBody(t *testing.T, body string) grammar.Block { + t.Helper() + p := grammar.NewParser(token.NewFileSet()) + return p.ParseAs(grammar.AnnParameters, body, token.Position{Line: 1}) +} + +func runDispatch(t *testing.T, param *oaispec.Parameter, body string) { + t.Helper() + b := parseParamBody(t, body) + valid := paramValidations{param} + scheme := ¶m.SimpleSchema + for prop := range b.Properties() { + if prop.ItemsDepth != 0 { + continue + } + dispatchParamKeyword(prop, param, valid, scheme) + } +} + +func TestDispatchParamKeywordNumeric(t *testing.T) { + param := &oaispec.Parameter{} + param.Type = "integer" + runDispatch(t, param, "maximum: <10\nminimum: >=0\nmultipleOf: 2") + + if param.Maximum == nil || *param.Maximum != 10 || !param.ExclusiveMaximum { + t.Errorf("maximum: got (%v, %v), want (10, true)", param.Maximum, param.ExclusiveMaximum) + } + if param.Minimum == nil || *param.Minimum != 0 || param.ExclusiveMinimum { + t.Errorf("minimum: got (%v, %v), want (0, false)", param.Minimum, param.ExclusiveMinimum) + } + if param.MultipleOf == nil || *param.MultipleOf != 2 { + t.Errorf("multipleOf: got %v", param.MultipleOf) + } +} + +func TestDispatchParamKeywordIntegerAndFlags(t *testing.T) { + param := &oaispec.Parameter{} + runDispatch(t, param, "minLength: 3\nmaxLength: 10\nminItems: 1\nmaxItems: 100\nunique: true\nrequired: true") + + if param.MinLength == nil || *param.MinLength != 3 { + t.Errorf("minLength: %v", param.MinLength) + } + if param.MaxLength == nil || *param.MaxLength != 10 { + t.Errorf("maxLength: %v", param.MaxLength) + } + if param.MinItems == nil || *param.MinItems != 1 { + t.Errorf("minItems: %v", param.MinItems) + } + if param.MaxItems == nil || *param.MaxItems != 100 { + t.Errorf("maxItems: %v", param.MaxItems) + } + if !param.UniqueItems { + t.Errorf("unique: want true") + } + if !param.Required { + t.Errorf("required: want true") + } +} + +func TestDispatchParamKeywordPatternAndEnum(t *testing.T) { + param := &oaispec.Parameter{} + param.Type = "string" + runDispatch(t, param, "pattern: ^[a-z]+$\nenum: red, green, blue") + + if param.Pattern != "^[a-z]+$" { + t.Errorf("pattern: %q", param.Pattern) + } + if len(param.Enum) != 3 || param.Enum[0] != "red" { + t.Errorf("enum: %v", param.Enum) + } +} + +func TestDispatchParamKeywordDefaultExampleScheme(t *testing.T) { + param := &oaispec.Parameter{} + param.Type = "integer" + runDispatch(t, param, "default: 42\nexample: 7") + + if param.Default != 42 { + t.Errorf("default: got %v (%T), want 42", param.Default, param.Default) + } + if param.Example != 7 { + t.Errorf("example: got %v (%T), want 7", param.Example, param.Example) + } +} + +func TestDispatchParamKeywordCollectionFormat(t *testing.T) { + param := &oaispec.Parameter{} + runDispatch(t, param, "collectionFormat: multi") + + if param.CollectionFormat != "multi" { + t.Errorf("collectionFormat: %q", param.CollectionFormat) + } +} + +func TestDispatchParamKeywordRequiredFalse(t *testing.T) { + param := &oaispec.Parameter{} + param.Required = true // simulate a prior path-param default + runDispatch(t, param, "required: false") + + if param.Required { + t.Errorf("required: want false after explicit override") + } +} diff --git a/internal/builders/parameters/parameters.go b/internal/builders/parameters/parameters.go index 1ee8be6..edca295 100644 --- a/internal/builders/parameters/parameters.go +++ b/internal/builders/parameters/parameters.go @@ -411,24 +411,28 @@ func (p *ParameterBuilder) processParamField(fld *types.Var, decl *scanner.Entit ps.Items = nil } - taggers, err := setupParamTaggers(&ps, name, afld, p.ctx.SkipExtensions(), p.ctx.Debug()) - if err != nil { - return "", err - } - - sp := parsers.NewSectionedParser( - parsers.WithSetDescription(func(lines []string) { - ps.Description = parsers.JoinDropLast(lines) - enumDesc := parsers.GetEnumDesc(ps.Extensions) - if enumDesc != "" { - ps.Description += "\n" + enumDesc - } - }), - parsers.WithTaggers(taggers...), - ) + if p.ctx.UseGrammarParser() { + p.applyBlockToField(afld, &ps) + } else { + taggers, err := setupParamTaggers(&ps, name, afld, p.ctx.SkipExtensions(), p.ctx.Debug()) + if err != nil { + return "", err + } - if err := sp.Parse(afld.Doc); err != nil { - return "", err + sp := parsers.NewSectionedParser( + parsers.WithSetDescription(func(lines []string) { + ps.Description = parsers.JoinDropLast(lines) + enumDesc := parsers.GetEnumDesc(ps.Extensions) + if enumDesc != "" { + ps.Description += "\n" + enumDesc + } + }), + parsers.WithTaggers(taggers...), + ) + + if err := sp.Parse(afld.Doc); err != nil { + return "", err + } } if ps.In == "path" { ps.Required = true From 198511b39f6b163928b665a4cefa7eb46981747e Mon Sep 17 00:00:00 2001 From: Frederic BIDON Date: Wed, 22 Apr 2026 16:13:37 +0200 Subject: [PATCH 39/46] =?UTF-8?q?feat(builders):=20P5.3=20step=206.respons?= =?UTF-8?q?es=20=E2=80=94=20responses=20grammar=20bridge?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Migrate the responses builder to the grammar parser under Options.UseGrammarParser. Two call sites flip: - ResponseBuilder.Build() — the top-level response doc routes through applyBlockToDecl, writing resp.Description from block.ProseLines() via JoinDropLast (v1 parity). No property dispatch at the decl level; legacy has only WithSetDescription registered. - ResponseBuilder.processResponseField() — header fields route through applyBlockToHeader, which handles description + validation dispatch into headerValidations + items-level dispatch into nested header items. Keyword surface mirrors baseResponseHeaderTaggers: - numeric (maximum, minimum, multipleOf) - integer (minLength, maxLength, minItems, maxItems) - string / scheme-aware (pattern, enum, default, example — default/example via parsers.ParseValueFromSchema against header.SimpleSchema) - flags (unique, collectionFormat) Differences from parameters: - No required (headers don't carry a Required flag). - No readOnly, discriminator, or Extensions (not in v1 header taggers). - `in:` is resolved upstream by parsers.ParamLocation before the bridge runs — same upstream-resolved pattern as parameters. Items-level dispatch reuses items.ApplyBlock with the new items.NewValidations constructor added in P5.2. A responses-local collectHeaderItemsLevels walk mirrors items.ParseArrayTypes' recursion over the *oaispec.Items chain. Tests: - internal/builders/responses/bridge_test.go — direct dispatch for numeric, integer + unique, pattern + enum, default/example scheme-aware parsing, collectionFormat, and the collectHeaderItemsLevels walk across flat/nested/nil shapes. TestParity green across all 22 fixtures. Full suite green. See: - .claude/plans/p5.1b-schema-walkthrough.md (template, applies uniformly) - .claude/plans/p5-builder-migrations.md §4.2 (step 6 scope) Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Frederic BIDON --- internal/builders/responses/bridge.go | 197 +++++++++++++++++++++ internal/builders/responses/bridge_test.go | 170 ++++++++++++++++++ internal/builders/responses/responses.go | 42 +++-- 3 files changed, 392 insertions(+), 17 deletions(-) create mode 100644 internal/builders/responses/bridge.go create mode 100644 internal/builders/responses/bridge_test.go diff --git a/internal/builders/responses/bridge.go b/internal/builders/responses/bridge.go new file mode 100644 index 0000000..893fad3 --- /dev/null +++ b/internal/builders/responses/bridge.go @@ -0,0 +1,197 @@ +// SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers +// SPDX-License-Identifier: Apache-2.0 + +package responses + +import ( + "go/ast" + + "github.com/go-openapi/codescan/internal/builders/items" + "github.com/go-openapi/codescan/internal/parsers" + "github.com/go-openapi/codescan/internal/parsers/grammar" + oaispec "github.com/go-openapi/spec" +) + +// headerItemsLevelTarget pairs a 1-indexed nesting depth (matching +// grammar.Property.ItemsDepth) with an *oaispec.Items chain element +// for items-level validation dispatch. +type headerItemsLevelTarget struct { + level int + items *oaispec.Items +} + +// collectHeaderItemsLevels mirrors items.ParseArrayTypes but collects +// (level, items) targets instead of building regex-based TagParsers. +// Header items form a chain via item.Items — same shape as parameters. +func collectHeaderItemsLevels(expr ast.Expr, it *oaispec.Items, level int) []headerItemsLevelTarget { + if it == nil { + return nil + } + + here := headerItemsLevelTarget{level: level, items: it} + + switch e := expr.(type) { + case *ast.ArrayType: + rest := collectHeaderItemsLevels(e.Elt, it.Items, level+1) + out := make([]headerItemsLevelTarget, 0, 1+len(rest)) + return append(append(out, here), rest...) + + case *ast.Ident: + rest := collectHeaderItemsLevels(expr, it.Items, level+1) + if e.Obj == nil { + out := make([]headerItemsLevelTarget, 0, 1+len(rest)) + return append(append(out, here), rest...) + } + return rest + + case *ast.StarExpr: + return collectHeaderItemsLevels(e.X, it, level) + + case *ast.SelectorExpr: + return []headerItemsLevelTarget{here} + + case *ast.StructType, *ast.InterfaceType, *ast.MapType: + return nil + + default: + return nil + } +} + +// applyBlockToDecl parses the top-level response doc under the +// grammar parser, writing the description to resp.Description via +// raw-line JoinDropLast (v1 parity). Does not dispatch any property +// keywords — the legacy top-level SectionedParser only accepts +// description, no taggers. +func (r *ResponseBuilder) applyBlockToDecl(resp *oaispec.Response) { + block := grammar.NewParser(r.decl.Pkg.Fset).Parse(r.decl.Comments) + resp.Description = parsers.JoinDropLast(block.ProseLines()) +} + +// applyBlockToHeader parses afld.Doc under the grammar parser and +// dispatches description, header validations, and items-level +// validations into ps. Replaces setupResponseHeaderTaggers + +// SectionedParser under UseGrammarParser. +// +// Notes: +// - `in:` is resolved upstream (parsers.ParamLocation) before the +// bridge runs; grammar's lexer also classifies it as +// TokenKeywordValue so it never reaches the description +// accumulator. No bridge dispatch. +// - Headers have no `required:` — omitted from the flag dispatch. +// - Extensions blocks are not currently supported on the header +// path (no v1 tagger, no v2 dispatch); same status as parameters. +func (r *ResponseBuilder) applyBlockToHeader(afld *ast.Field, header *oaispec.Header) { + block := grammar.NewParser(r.decl.Pkg.Fset).Parse(afld.Doc) + + header.Description = parsers.JoinDropLast(block.ProseLines()) + + scheme := &header.SimpleSchema + valid := headerValidations{header} + + for prop := range block.Properties() { + if prop.ItemsDepth != 0 { + continue + } + dispatchHeaderKeyword(prop, valid, scheme) + } + + // items-level validation dispatch. + if arrayType, ok := afld.Type.(*ast.ArrayType); ok { + for _, tgt := range collectHeaderItemsLevels(arrayType.Elt, header.Items, 1) { + items.ApplyBlock(block, items.NewValidations(tgt.items), tgt.level) + } + } +} + +// dispatchHeaderKeyword routes a level-0 Property into +// headerValidations or, for scheme-aware default/example, through +// parsers.ParseValueFromSchema. Covers the v1 baseResponseHeaderTaggers +// surface minus `in:` (upstream-resolved). +func dispatchHeaderKeyword(p grammar.Property, valid headerValidations, scheme *oaispec.SimpleSchema) { + if dispatchNumericValidation(p, valid) { + return + } + if dispatchIntegerValidation(p, valid) { + return + } + if dispatchStringOrEnum(p, valid, scheme) { + return + } + dispatchHeaderFlags(p, valid) +} + +func dispatchNumericValidation(p grammar.Property, valid headerValidations) bool { + if p.Typed.Type != grammar.ValueNumber { + return false + } + switch p.Keyword.Name { + case "maximum": + valid.SetMaximum(p.Typed.Number, p.Typed.Op == "<") + case "minimum": + valid.SetMinimum(p.Typed.Number, p.Typed.Op == ">") + case "multipleOf": + valid.SetMultipleOf(p.Typed.Number) + default: + return false + } + return true +} + +func dispatchIntegerValidation(p grammar.Property, valid headerValidations) bool { + if p.Typed.Type != grammar.ValueInteger { + return false + } + switch p.Keyword.Name { + case "minLength": + valid.SetMinLength(p.Typed.Integer) + case "maxLength": + valid.SetMaxLength(p.Typed.Integer) + case "minItems": + valid.SetMinItems(p.Typed.Integer) + case "maxItems": + valid.SetMaxItems(p.Typed.Integer) + default: + return false + } + return true +} + +// dispatchStringOrEnum handles pattern/enum/default/example — +// keywords whose value is consumed as a raw string or resolved +// against the target's SimpleSchema. +func dispatchStringOrEnum(p grammar.Property, valid headerValidations, scheme *oaispec.SimpleSchema) bool { + switch p.Keyword.Name { + case "pattern": + valid.SetPattern(p.Value) + case "enum": + valid.SetEnum(p.Value) + case "default": + if v, err := parsers.ParseValueFromSchema(p.Value, scheme); err == nil { + valid.SetDefault(v) + } + case "example": + if v, err := parsers.ParseValueFromSchema(p.Value, scheme); err == nil { + valid.SetExample(v) + } + default: + return false + } + return true +} + +// dispatchHeaderFlags handles unique/collectionFormat — the boolean +// and string-enum keywords that target the header. Headers have no +// required / readOnly / discriminator. +func dispatchHeaderFlags(p grammar.Property, valid headerValidations) { + switch p.Keyword.Name { + case "unique": + if p.Typed.Type == grammar.ValueBoolean { + valid.SetUnique(p.Typed.Boolean) + } + case "collectionFormat": + if p.Typed.Type == grammar.ValueStringEnum { + valid.SetCollectionFormat(p.Typed.String) + } + } +} diff --git a/internal/builders/responses/bridge_test.go b/internal/builders/responses/bridge_test.go new file mode 100644 index 0000000..2536dc2 --- /dev/null +++ b/internal/builders/responses/bridge_test.go @@ -0,0 +1,170 @@ +// SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers +// SPDX-License-Identifier: Apache-2.0 + +package responses + +import ( + "go/ast" + "go/parser" + "go/token" + "testing" + + "github.com/go-openapi/codescan/internal/parsers/grammar" + oaispec "github.com/go-openapi/spec" +) + +// ---------- collectHeaderItemsLevels ---------- + +func fieldType(t *testing.T, expr string) ast.Expr { + t.Helper() + e, err := parser.ParseExpr(expr) + if err != nil { + t.Fatalf("parseExpr %q: %v", expr, err) + } + return e +} + +func arrayTypeElt(t *testing.T, expr string) ast.Expr { + t.Helper() + at, ok := fieldType(t, expr).(*ast.ArrayType) + if !ok { + t.Fatalf("expected ArrayType for %q", expr) + } + return at.Elt +} + +func newItemsChain(depth int) *oaispec.Items { + if depth <= 0 { + return nil + } + root := new(oaispec.Items) + cur := root + for range depth - 1 { + cur.Items = new(oaispec.Items) + cur = cur.Items + } + return root +} + +func TestCollectHeaderItemsLevelsFlatSlice(t *testing.T) { + it := newItemsChain(1) + got := collectHeaderItemsLevels(arrayTypeElt(t, "[]string"), it, 1) + if len(got) != 1 || got[0].level != 1 || got[0].items != it { + t.Errorf("[]string: got %+v", got) + } +} + +func TestCollectHeaderItemsLevelsNestedSlice(t *testing.T) { + it := newItemsChain(2) + got := collectHeaderItemsLevels(arrayTypeElt(t, "[][]string"), it, 1) + if len(got) != 2 { + t.Fatalf("[][]string: got %d entries", len(got)) + } + if got[0].level != 1 || got[0].items != it { + t.Errorf("level 1: %+v", got[0]) + } + if got[1].level != 2 || got[1].items != it.Items { + t.Errorf("level 2: %+v", got[1]) + } +} + +func TestCollectHeaderItemsLevelsNilItems(t *testing.T) { + got := collectHeaderItemsLevels(arrayTypeElt(t, "[]string"), nil, 1) + if len(got) != 0 { + t.Errorf("nil items: got %+v", got) + } +} + +// ---------- dispatchHeaderKeyword ---------- + +//nolint:ireturn // grammar.Block is the package's polymorphic return. +func parseResponseBody(t *testing.T, body string) grammar.Block { + t.Helper() + p := grammar.NewParser(token.NewFileSet()) + return p.ParseAs(grammar.AnnResponse, body, token.Position{Line: 1}) +} + +func runDispatch(t *testing.T, header *oaispec.Header, body string) { + t.Helper() + b := parseResponseBody(t, body) + valid := headerValidations{header} + scheme := &header.SimpleSchema + for prop := range b.Properties() { + if prop.ItemsDepth != 0 { + continue + } + dispatchHeaderKeyword(prop, valid, scheme) + } +} + +func TestDispatchHeaderKeywordNumeric(t *testing.T) { + h := &oaispec.Header{} + h.Type = "integer" + runDispatch(t, h, "maximum: <10\nminimum: >=0\nmultipleOf: 2") + + if h.Maximum == nil || *h.Maximum != 10 || !h.ExclusiveMaximum { + t.Errorf("maximum: got (%v, %v)", h.Maximum, h.ExclusiveMaximum) + } + if h.Minimum == nil || *h.Minimum != 0 || h.ExclusiveMinimum { + t.Errorf("minimum: got (%v, %v)", h.Minimum, h.ExclusiveMinimum) + } + if h.MultipleOf == nil || *h.MultipleOf != 2 { + t.Errorf("multipleOf: got %v", h.MultipleOf) + } +} + +func TestDispatchHeaderKeywordIntegerAndUnique(t *testing.T) { + h := &oaispec.Header{} + runDispatch(t, h, "minLength: 3\nmaxLength: 10\nminItems: 1\nmaxItems: 100\nunique: true") + + if h.MinLength == nil || *h.MinLength != 3 { + t.Errorf("minLength: %v", h.MinLength) + } + if h.MaxLength == nil || *h.MaxLength != 10 { + t.Errorf("maxLength: %v", h.MaxLength) + } + if h.MinItems == nil || *h.MinItems != 1 { + t.Errorf("minItems: %v", h.MinItems) + } + if h.MaxItems == nil || *h.MaxItems != 100 { + t.Errorf("maxItems: %v", h.MaxItems) + } + if !h.UniqueItems { + t.Errorf("unique: want true") + } +} + +func TestDispatchHeaderKeywordPatternAndEnum(t *testing.T) { + h := &oaispec.Header{} + h.Type = "string" + runDispatch(t, h, "pattern: ^[a-z]+$\nenum: red, green, blue") + + if h.Pattern != "^[a-z]+$" { + t.Errorf("pattern: %q", h.Pattern) + } + if len(h.Enum) != 3 || h.Enum[0] != "red" { + t.Errorf("enum: %v", h.Enum) + } +} + +func TestDispatchHeaderKeywordDefaultExampleScheme(t *testing.T) { + h := &oaispec.Header{} + h.Type = "integer" + runDispatch(t, h, "default: 42\nexample: 7") + + if h.Default != 42 { + t.Errorf("default: got %v (%T), want 42", h.Default, h.Default) + } + if h.Example != 7 { + t.Errorf("example: got %v (%T), want 7", h.Example, h.Example) + } +} + +func TestDispatchHeaderKeywordCollectionFormat(t *testing.T) { + h := &oaispec.Header{} + runDispatch(t, h, "collectionFormat: csv") + + if h.CollectionFormat != "csv" { + t.Errorf("collectionFormat: %q", h.CollectionFormat) + } +} diff --git a/internal/builders/responses/responses.go b/internal/builders/responses/responses.go index e08d9eb..93e938b 100644 --- a/internal/builders/responses/responses.go +++ b/internal/builders/responses/responses.go @@ -40,13 +40,17 @@ func (r *ResponseBuilder) Build(responses map[string]oaispec.Response) error { logger.DebugLogf(r.ctx.Debug(), "building response: %s", name) // analyze doc comment for the model - sp := parsers.NewSectionedParser( - parsers.WithSetDescription(func(lines []string) { - response.Description = parsers.JoinDropLast(lines) - }), - ) - if err := sp.Parse(r.decl.Comments); err != nil { - return err + if r.ctx.UseGrammarParser() { + r.applyBlockToDecl(&response) + } else { + sp := parsers.NewSectionedParser( + parsers.WithSetDescription(func(lines []string) { + response.Description = parsers.JoinDropLast(lines) + }), + ) + if err := sp.Parse(r.decl.Comments); err != nil { + return err + } } // analyze struct body for fields etc @@ -376,18 +380,22 @@ func (r *ResponseBuilder) processResponseField(fld *types.Var, decl *scanner.Ent ps.Typed("string", strfmtName) } - taggers, err := setupResponseHeaderTaggers(&ps, name, afld) - if err != nil { - return err - } + if r.ctx.UseGrammarParser() { + r.applyBlockToHeader(afld, &ps) + } else { + taggers, err := setupResponseHeaderTaggers(&ps, name, afld) + if err != nil { + return err + } - sp := parsers.NewSectionedParser( - parsers.WithSetDescription(func(lines []string) { ps.Description = parsers.JoinDropLast(lines) }), - parsers.WithTaggers(taggers...), - ) + sp := parsers.NewSectionedParser( + parsers.WithSetDescription(func(lines []string) { ps.Description = parsers.JoinDropLast(lines) }), + parsers.WithTaggers(taggers...), + ) - if err := sp.Parse(afld.Doc); err != nil { - return err + if err := sp.Parse(afld.Doc); err != nil { + return err + } } if in != "body" { From dab83ff02dcb816adcf69f1de9b803f7d4a8fcb4 Mon Sep 17 00:00:00 2001 From: Frederic BIDON Date: Wed, 22 Apr 2026 16:24:52 +0200 Subject: [PATCH 40/46] =?UTF-8?q?feat(builders):=20P5.4=20step=206.operati?= =?UTF-8?q?ons=20=E2=80=94=20operations=20grammar=20bridge?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Migrate the operations builder to the grammar parser under Options.UseGrammarParser. Operations' parsing shape differs from schema / parameters / responses: the comment body after `swagger:operation` carries (1) a free-form summary + description prelude and (2) a `---` fenced YAML spec that unmarshals into spec.Operation. Under the flag, Builder.Build() routes through Builder.applyBlockToOperation which: - Parses path.Remaining via grammar.NewParser(ctx.FileSet()). - Extracts summary / description from block.ProseLines() using parsers.CollectScannerTitleDescription — same v1-parity split used by the schema decl path (blank-line split, else punctuation/markdown on the first line, else all-description). - Takes the first YAMLBlock captured between `---` fences by grammar.collectYAMLBody and pipes Text through yaml.Unmarshal → fmts.YAMLToJSON → op.UnmarshalJSON. Mirrors the tail of parsers.YAMLSpecScanner.UnmarshalSpec without the comment-stripping / indent-normalization steps the grammar handles upstream. Legacy YAMLSpecScanner stays wired on the flag-off branch until P6 cutover; no change to its behavior. New accessor: - scanner.ScanCtx.FileSet() exposes the shared *token.FileSet loaded by packages.Load. The operations path doesn't ride on a single EntityDecl (path.Remaining is built by parsers.ParseOperationPathAnnotation from aggregated comments), so the bridge can't reach the fset through decl.Pkg.Fset like schema / parameters / responses do. Tests: - internal/builders/operations/bridge_test.go — direct unmarshalOpYAML tests: round-trip of a representative operation body, malformed-YAML error surfacing, empty-body no-op. - TestParity gains `ClassificationOpAnnotation` fixture, pointing at fixtures/goparsing/classification/operations_annotation/ — the one in-tree source that exercises swagger:operation with a YAML body. Without it the flag path was untested by the integration harness. TestParity green across all 23 fixtures. Full suite green. Note on YAML-fenced extensions in other builders (parameters Extensions: block, schema YAMLExtensionsBlock): still on the legacy path. The grammar already isolates YAML bodies into yamlBlocks, but the four builders that accept extension YAML haven't been hooked up to the yaml sub-parser yet; that wiring lands alongside P5.5 routes or as a dedicated P7 task. No parity-fixture today exercises YAML-fenced extensions. See: - .claude/plans/p5.1b-schema-walkthrough.md (template) - .claude/plans/p5-builder-migrations.md §4.3 (operations scope) Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Frederic BIDON --- internal/builders/operations/bridge.go | 76 +++++++++++++++++++++ internal/builders/operations/bridge_test.go | 60 ++++++++++++++++ internal/builders/operations/operations.go | 25 ++++--- internal/integration/parity_test.go | 2 + internal/scanner/scan_context.go | 13 ++++ 5 files changed, 167 insertions(+), 9 deletions(-) create mode 100644 internal/builders/operations/bridge.go create mode 100644 internal/builders/operations/bridge_test.go diff --git a/internal/builders/operations/bridge.go b/internal/builders/operations/bridge.go new file mode 100644 index 0000000..499db6f --- /dev/null +++ b/internal/builders/operations/bridge.go @@ -0,0 +1,76 @@ +// SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers +// SPDX-License-Identifier: Apache-2.0 + +package operations + +import ( + "fmt" + + "github.com/go-openapi/codescan/internal/parsers" + "github.com/go-openapi/codescan/internal/parsers/grammar" + "github.com/go-openapi/loads/fmts" + oaispec "github.com/go-openapi/spec" + yaml "go.yaml.in/yaml/v3" +) + +// applyBlockToOperation is the grammar-path counterpart of the +// legacy YAMLSpecScanner pipeline used by Builder.Build. It parses +// path.Remaining (the comments following the swagger:operation +// annotation) through the grammar parser and: +// +// 1. splits prose into Summary (first title paragraph) and +// Description (rest) via v1's CollectScannerTitleDescription on +// block.ProseLines() — line-preserving `"\n"` join for parity; +// 2. feeds the first YAML block body (captured between the `---` +// fences by collectYAMLBody) through +// yaml.Unmarshal → fmts.YAMLToJSON → op.UnmarshalJSON, the +// same final step as YAMLSpecScanner.UnmarshalSpec. +// +// The grammar's lexer recognises `---` as TokenYAMLFence and the +// lines between fences as TokenRawLine with Raw preserved; the +// grammar's collectYAMLBody joins those raw lines with `\n` into +// RawYAML.Text. Nothing else — title, description, YAML fences — +// needs to round-trip through the legacy scanner. +func (o *Builder) applyBlockToOperation(op *oaispec.Operation) error { + fset := o.ctx.FileSet() + block := grammar.NewParser(fset).Parse(o.path.Remaining) + + title, desc := parsers.CollectScannerTitleDescription(block.ProseLines()) + op.Summary = parsers.JoinDropLast(title) + op.Description = parsers.JoinDropLast(desc) + + var yamlBody string + for y := range block.YAMLBlocks() { + yamlBody = y.Text + break // v1 accepts only one fenced YAML body per operation + } + if yamlBody == "" { + return nil + } + + return unmarshalOpYAML(yamlBody, op.UnmarshalJSON) +} + +// unmarshalOpYAML converts a raw YAML body into the operation's +// JSON-shape expected by oaispec.Operation.UnmarshalJSON. Mirrors +// the tail of parsers.YAMLSpecScanner.UnmarshalSpec without the +// comment-stripping / indent-normalisation steps the grammar +// handles upstream. +func unmarshalOpYAML(body string, unmarshal func([]byte) error) error { + yamlValue := make(map[any]any) + if err := yaml.Unmarshal([]byte(body), &yamlValue); err != nil { + return fmt.Errorf("operation yaml body: %w", err) + } + + jsonValue, err := fmts.YAMLToJSON(yamlValue) + if err != nil { + return fmt.Errorf("operation yaml→json: %w", err) + } + + data, err := jsonValue.MarshalJSON() + if err != nil { + return fmt.Errorf("operation json marshal: %w", err) + } + + return unmarshal(data) +} diff --git a/internal/builders/operations/bridge_test.go b/internal/builders/operations/bridge_test.go new file mode 100644 index 0000000..e03d0ec --- /dev/null +++ b/internal/builders/operations/bridge_test.go @@ -0,0 +1,60 @@ +// SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers +// SPDX-License-Identifier: Apache-2.0 + +package operations + +import ( + "testing" + + oaispec "github.com/go-openapi/spec" +) + +// TestUnmarshalOpYAMLRoundTrip verifies the yaml → JSON → UnmarshalJSON +// pipeline the grammar bridge uses for the operation body. The raw +// body here matches what grammar's collectYAMLBody emits for a +// `---` fenced block (contents only, no fences, no `//` markers). +func TestUnmarshalOpYAMLRoundTrip(t *testing.T) { + body := `parameters: + - name: limit + in: query + type: integer + format: int32 +responses: + "200": + description: OK +` + op := new(oaispec.Operation) + if err := unmarshalOpYAML(body, op.UnmarshalJSON); err != nil { + t.Fatalf("unmarshalOpYAML: %v", err) + } + + if len(op.Parameters) != 1 { + t.Fatalf("parameters: got %d, want 1", len(op.Parameters)) + } + p := op.Parameters[0] + if p.Name != "limit" || p.In != "query" || p.Type != "integer" || p.Format != "int32" { + t.Errorf("parameter fields: %+v", p) + } + if op.Responses == nil || op.Responses.StatusCodeResponses[200].Description != "OK" { + t.Errorf("responses: %+v", op.Responses) + } +} + +func TestUnmarshalOpYAMLInvalidYAML(t *testing.T) { + // Unbalanced brackets — yaml.Unmarshal will error. + body := "parameters: [\n - name: x" + op := new(oaispec.Operation) + if err := unmarshalOpYAML(body, op.UnmarshalJSON); err == nil { + t.Error("expected error on malformed YAML, got nil") + } +} + +func TestUnmarshalOpYAMLEmptyBody(t *testing.T) { + // Empty body — yaml.Unmarshal into map[any]any succeeds with + // zero keys; fmts.YAMLToJSON produces `{}`; op.UnmarshalJSON + // leaves the op untouched. + op := new(oaispec.Operation) + if err := unmarshalOpYAML("", op.UnmarshalJSON); err != nil { + t.Errorf("empty body should not error: %v", err) + } +} diff --git a/internal/builders/operations/operations.go b/internal/builders/operations/operations.go index 44011ac..fc8e680 100644 --- a/internal/builders/operations/operations.go +++ b/internal/builders/operations/operations.go @@ -33,16 +33,23 @@ func (o *Builder) Build(tgt *oaispec.Paths) error { o.path.Method, o.path.ID, &pthObj, o.operations[o.path.ID]) op.Tags = o.path.Tags - sp := parsers.NewYAMLSpecScanner( - func(lines []string) { op.Summary = parsers.JoinDropLast(lines) }, // setTitle - func(lines []string) { op.Description = parsers.JoinDropLast(lines) }, // setDescription - ) - if err := sp.Parse(o.path.Remaining); err != nil { - return fmt.Errorf("operation (%s): %w", op.ID, err) - } - if err := sp.UnmarshalSpec(op.UnmarshalJSON); err != nil { - return fmt.Errorf("operation (%s): %w", op.ID, err) + if o.ctx.UseGrammarParser() { + if err := o.applyBlockToOperation(op); err != nil { + return fmt.Errorf("operation (%s): %w", op.ID, err) + } + } else { + sp := parsers.NewYAMLSpecScanner( + func(lines []string) { op.Summary = parsers.JoinDropLast(lines) }, // setTitle + func(lines []string) { op.Description = parsers.JoinDropLast(lines) }, // setDescription + ) + + if err := sp.Parse(o.path.Remaining); err != nil { + return fmt.Errorf("operation (%s): %w", op.ID, err) + } + if err := sp.UnmarshalSpec(op.UnmarshalJSON); err != nil { + return fmt.Errorf("operation (%s): %w", op.ID, err) + } } if tgt.Paths == nil { diff --git a/internal/integration/parity_test.go b/internal/integration/parity_test.go index 74a3cd8..be54d86 100644 --- a/internal/integration/parity_test.go +++ b/internal/integration/parity_test.go @@ -80,6 +80,8 @@ var parityFixtures = []parityFixture{ // fixtures/goparsing/ {"Petstore", codescan.Options{Packages: pkgs("./goparsing/petstore/...")}}, {"Bookings", codescan.Options{Packages: pkgs("./goparsing/bookings/..."), ScanModels: true}}, + // Exercises swagger:operation (YAML-bodied operation spec). + {"ClassificationOpAnnotation", codescan.Options{Packages: pkgs("./goparsing/classification/operations_annotation/...")}}, } // pkgs is a tiny alias for []string — it makes the fixture table diff --git a/internal/scanner/scan_context.go b/internal/scanner/scan_context.go index df55044..76b3e0f 100644 --- a/internal/scanner/scan_context.go +++ b/internal/scanner/scan_context.go @@ -105,6 +105,19 @@ func (s *ScanCtx) UseGrammarParser() bool { return s.opts.UseGrammarParser } +// FileSet returns the shared *token.FileSet used by the scan's +// loaded packages. Needed by callers that construct a +// grammar.Parser for comment groups that don't live under a single +// EntityDecl's *packages.Package — notably operation and route +// path-level annotations whose source is aggregated from multiple +// packages. +func (s *ScanCtx) FileSet() *token.FileSet { + if len(s.pkgs) == 0 { + return nil + } + return s.pkgs[0].Fset +} + func (s *ScanCtx) Debug() bool { return s.debug } From 1d68cd51cba4139c2fe71c01226c24f5bac1879c Mon Sep 17 00:00:00 2001 From: Frederic BIDON Date: Wed, 22 Apr 2026 17:01:29 +0200 Subject: [PATCH 41/46] =?UTF-8?q?feat(builders):=20P5.5=20step=206.routes?= =?UTF-8?q?=20=E2=80=94=20routes=20grammar=20bridge?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Migrate the routes builder to the grammar parser under Options.UseGrammarParser. Routes are the last of the six swagger:* annotations to flip (items, schema, parameters, responses, operations already done). Builder.Build routes through Builder.applyBlockToRoute, which: - Parses route.Remaining via grammar.NewParser(ctx.FileSet()). - Summary/description from block.ProseLines() via CollectScannerTitleDescription — same split heuristic as schema decl / operation path. - Dispatches level-0 properties by keyword name: schemes → op.Schemes from comma-split value deprecated → op.Deprecated from p.Typed.Boolean consumes → NewConsumesDropEmptyParser(...).Parse(body) produces → NewProducesDropEmptyParser(...).Parse(body) security → NewSetSecurityScheme(...).Parse(body) parameters → NewSetParams(r.parameters, ...).Parse(body) responses → NewSetResponses(...).Parse(body) extensions → NewSetExtensions(...).Parse(body) The body parsers are the existing v1 implementations — they already know how to handle YAML-list bodies, `+ name:` parameter entries, `200: responseName` mappings, and nested extension maps. The bridge contributes line-splitting and dispatch; the heavy lifting stays in the established code. Two grammar-level fixes were needed to make route body parsing work correctly: Fix 1: RawBlock body absorbs sub-context keyword lines. A route body's Parameters / Responses blocks legitimately contain keyword-shaped lines like `in: body`, `required: true`, `max: 20`, `default: genericError` — the legacy SectionedParser collects them as body because no top-level tagger matches. The grammar lexer, being context-free, tokenized them as TokenKeywordValue and prematurely terminated the RawBlock collection. collectBlockBody now absorbs a TokenKeywordValue as body text when the outer head is a RawBlock AND the incoming keyword is NOT a route/operation/meta-structural keyword (detected via Keyword.Contexts). `default:` (Param/Schema/Header/Items) is absorbed; `schemes:` (Route/Operation/Meta) still terminates. Absorbed lines are re-emitted in source form — Token gains a SourceName field so `max: 20` stays `max: 20` instead of normalizing to canonical `maximum: 20`. v1's SetOpParams keys on the source form. Fix 2: Extension bodies preserve source indentation. Nested YAML-like extension maps (`x-some-object: { key1, key2, subobject: { ... }, key3 }`) rely on indentation to disambiguate nesting. The grammar's cleaned Text form stripped leading whitespace, collapsing the hierarchy. Token gains a Raw field (markers-only stripped), and collectBlockBody feeds Raw into prop.Body when the head is an extensions block. Other RawBlock bodies (consumes / produces / parameters / etc.) continue using the cleaned Text form since their body parsers do their own trimming. Bridge-level: collectionFormat dispatch falls back to the raw property value when grammar's strict StringEnum validation rejects the input (fixtures use the typo `pipe` for `pipes`; v1 accepts any string). Applied in responses, parameters, and items bridges for uniform behavior. Parity fixtures: - ClassificationRoutes added to TestParity, pointing at fixtures/goparsing/classification/... — exercises rich route bodies (Consumes/Produces/Schemes/Security/ Parameters/Responses/Extensions with nested extension maps and per-parameter validations). Tests: - internal/builders/routes/bridge_test.go — direct applyRouteSchemes, dispatchRouteKeyword(deprecated), and RawBlock sub-context absorption (the grammar-level fix surfaced through a route-shaped fixture). - internal/parsers/grammar/extensions_test.go — updated assertion: extensions Body now preserves ` x-foo: bar` indentation rather than the cleaned `x-foo: bar` form. - grammar_test/testdata/golden/meta_with_extensions.json — regenerated to reflect the indentation-preserved body entries. TestParity green across all 24 fixtures. Full suite green, lint clean. P5.5 completes step 6 for all six swagger:* annotations. The remaining migration work is P6 (cutover): remove the legacy regex parsers, SectionedParser, YAMLSpecScanner, and the UseGrammarParser flag itself. See: - .claude/plans/p5.1b-schema-walkthrough.md (template) - .claude/plans/p5-builder-migrations.md §4.5 (routes scope) Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Frederic BIDON --- internal/builders/items/bridge.go | 14 +- internal/builders/parameters/bridge.go | 9 +- internal/builders/responses/bridge.go | 14 +- internal/builders/routes/bridge.go | 97 ++++++++++++++ internal/builders/routes/bridge_test.go | 126 ++++++++++++++++++ internal/builders/routes/routes.go | 20 ++- internal/integration/parity_test.go | 3 + internal/parsers/grammar/extensions_test.go | 8 +- .../testdata/golden/meta_with_extensions.json | 4 +- internal/parsers/grammar/lexer.go | 15 ++- internal/parsers/grammar/parser.go | 116 +++++++++++++--- 11 files changed, 392 insertions(+), 34 deletions(-) create mode 100644 internal/builders/routes/bridge.go create mode 100644 internal/builders/routes/bridge_test.go diff --git a/internal/builders/items/bridge.go b/internal/builders/items/bridge.go index 792cf32..9651d8b 100644 --- a/internal/builders/items/bridge.go +++ b/internal/builders/items/bridge.go @@ -4,6 +4,8 @@ package items import ( + "strings" + "github.com/go-openapi/codescan/internal/ifaces" "github.com/go-openapi/codescan/internal/parsers/grammar" ) @@ -90,9 +92,17 @@ func dispatchItemsKeyword(p grammar.Property, t ifaces.ValidationBuilder) { // items.Validations does not (per survey). Type-assertion // guard silently drops the value for items-only targets, // matching v1's tagger table structure. + // + // Falls back to the raw value when grammar's strict + // StringEnum rejects the input — v1 accepts any string + // (e.g. the typo `pipe` for `pipes`) and stores verbatim. if ov, ok := t.(ifaces.OperationValidationBuilder); ok { - if p.Typed.Type == grammar.ValueStringEnum { - ov.SetCollectionFormat(p.Typed.String) + val := p.Typed.String + if val == "" { + val = strings.TrimSpace(p.Value) + } + if val != "" { + ov.SetCollectionFormat(val) } } case "enum": diff --git a/internal/builders/parameters/bridge.go b/internal/builders/parameters/bridge.go index bafa52b..2454f2d 100644 --- a/internal/builders/parameters/bridge.go +++ b/internal/builders/parameters/bridge.go @@ -5,6 +5,7 @@ package parameters import ( "go/ast" + "strings" "github.com/go-openapi/codescan/internal/builders/items" "github.com/go-openapi/codescan/internal/parsers" @@ -209,8 +210,12 @@ func dispatchParamFlags(p grammar.Property, param *oaispec.Parameter, valid para param.Required = p.Typed.Boolean } case "collectionFormat": - if p.Typed.Type == grammar.ValueStringEnum { - valid.SetCollectionFormat(p.Typed.String) + val := p.Typed.String + if val == "" { + val = strings.TrimSpace(p.Value) + } + if val != "" { + valid.SetCollectionFormat(val) } } } diff --git a/internal/builders/responses/bridge.go b/internal/builders/responses/bridge.go index 893fad3..46a316c 100644 --- a/internal/builders/responses/bridge.go +++ b/internal/builders/responses/bridge.go @@ -5,6 +5,7 @@ package responses import ( "go/ast" + "strings" "github.com/go-openapi/codescan/internal/builders/items" "github.com/go-openapi/codescan/internal/parsers" @@ -183,6 +184,11 @@ func dispatchStringOrEnum(p grammar.Property, valid headerValidations, scheme *o // dispatchHeaderFlags handles unique/collectionFormat — the boolean // and string-enum keywords that target the header. Headers have no // required / readOnly / discriminator. +// +// collectionFormat falls back to the raw value when grammar's strict +// StringEnum validation rejects the input; v1 accepts any string and +// stores it verbatim, so e.g. a fixture using `collection format: +// pipe` (typo for `pipes`) still round-trips for parity. func dispatchHeaderFlags(p grammar.Property, valid headerValidations) { switch p.Keyword.Name { case "unique": @@ -190,8 +196,12 @@ func dispatchHeaderFlags(p grammar.Property, valid headerValidations) { valid.SetUnique(p.Typed.Boolean) } case "collectionFormat": - if p.Typed.Type == grammar.ValueStringEnum { - valid.SetCollectionFormat(p.Typed.String) + val := p.Typed.String + if val == "" { + val = strings.TrimSpace(p.Value) + } + if val != "" { + valid.SetCollectionFormat(val) } } } diff --git a/internal/builders/routes/bridge.go b/internal/builders/routes/bridge.go new file mode 100644 index 0000000..d16428c --- /dev/null +++ b/internal/builders/routes/bridge.go @@ -0,0 +1,97 @@ +// SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers +// SPDX-License-Identifier: Apache-2.0 + +package routes + +import ( + "strings" + + "github.com/go-openapi/codescan/internal/parsers" + "github.com/go-openapi/codescan/internal/parsers/grammar" + oaispec "github.com/go-openapi/spec" +) + +// applyBlockToRoute is the grammar-path counterpart of Builder.Build's +// SectionedParser invocation. Parses route.Remaining, extracts +// summary/description, and dispatches each level-0 Property to the +// appropriate setter. Body parsing for the multi-line keywords +// (consumes, produces, security, parameters, responses, extensions) +// delegates to the existing v1 parser instances — each already +// handles its specific body shape (YAML lists, name:value mappings, +// nested extension bodies). The bridge contributes line-splitting / +// title-description / dispatch; the heavy lifting stays in the +// established parser code. +func (r *Builder) applyBlockToRoute(op *oaispec.Operation) error { + block := grammar.NewParser(r.ctx.FileSet()).Parse(r.route.Remaining) + + title, desc := parsers.CollectScannerTitleDescription(block.ProseLines()) + op.Summary = parsers.JoinDropLast(title) + op.Description = parsers.JoinDropLast(desc) + + for prop := range block.Properties() { + if prop.ItemsDepth != 0 { + continue + } + if err := r.dispatchRouteKeyword(prop, op); err != nil { + return err + } + } + return nil +} + +// Keyword names reused from grammar's keyword table — kept as +// constants to avoid magic strings in the dispatch table. +const ( + kwSchemes = "schemes" + kwDeprecated = "deprecated" + kwConsumes = "consumes" + kwProduces = "produces" + kwSecurity = "security" + kwParameters = "parameters" + kwResponses = "responses" + kwExtensions = "extensions" +) + +// dispatchRouteKeyword routes one grammar Property to the legacy +// body-parser that already knows how to parse that keyword's body +// shape. The body-parsers' Parse(lines []string) signature accepts +// grammar's Property.Body directly — comment markers are already +// stripped, YAML list markers survive, etc. +func (r *Builder) dispatchRouteKeyword(p grammar.Property, op *oaispec.Operation) error { + switch p.Keyword.Name { + case kwSchemes: + r.applyRouteSchemes(p, op) + case kwDeprecated: + if p.Typed.Type == grammar.ValueBoolean { + op.Deprecated = p.Typed.Boolean + } + case kwConsumes: + return parsers.NewConsumesDropEmptyParser(opConsumesSetter(op)).Parse(p.Body) + case kwProduces: + return parsers.NewProducesDropEmptyParser(opProducesSetter(op)).Parse(p.Body) + case kwSecurity: + return parsers.NewSetSecurityScheme(opSecurityDefsSetter(op)).Parse(p.Body) + case kwParameters: + return parsers.NewSetParams(r.parameters, opParamSetter(op)).Parse(p.Body) + case kwResponses: + return parsers.NewSetResponses(r.definitions, r.responses, opResponsesSetter(op)).Parse(p.Body) + case kwExtensions: + return parsers.NewSetExtensions(opExtensionsSetter(op), r.ctx.Debug()).Parse(p.Body) + } + return nil +} + +// applyRouteSchemes parses `schemes: http, https, ws, wss` — v1 uses +// a regex capture that isolates the post-colon comma-list; the +// grammar already hands us the trimmed value directly. +func (r *Builder) applyRouteSchemes(p grammar.Property, op *oaispec.Operation) { + schemes := make([]string, 0) + for s := range strings.SplitSeq(p.Value, ",") { + if ts := strings.TrimSpace(s); ts != "" { + schemes = append(schemes, ts) + } + } + if len(schemes) > 0 { + op.Schemes = schemes + } +} diff --git a/internal/builders/routes/bridge_test.go b/internal/builders/routes/bridge_test.go new file mode 100644 index 0000000..656566e --- /dev/null +++ b/internal/builders/routes/bridge_test.go @@ -0,0 +1,126 @@ +// SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers +// SPDX-License-Identifier: Apache-2.0 + +package routes + +import ( + "go/token" + "strings" + "testing" + + "github.com/go-openapi/codescan/internal/parsers/grammar" + oaispec "github.com/go-openapi/spec" +) + +//nolint:ireturn // grammar.Block is the package's polymorphic return. +func parseRouteBody(t *testing.T, body string) grammar.Block { + t.Helper() + p := grammar.NewParser(token.NewFileSet()) + return p.ParseAs(grammar.AnnRoute, body, token.Position{Line: 1}) +} + +func TestApplyRouteSchemes(t *testing.T) { + var b Builder + op := &oaispec.Operation{} + + block := parseRouteBody(t, "schemes: http, https, ws") + var prop grammar.Property + for p := range block.Properties() { + prop = p + break + } + if prop.Keyword.Name != "schemes" { + t.Fatalf("expected schemes property, got %q", prop.Keyword.Name) + } + b.applyRouteSchemes(prop, op) + + want := []string{"http", "https", "ws"} + if len(op.Schemes) != len(want) { + t.Fatalf("Schemes len: got %d, want %d", len(op.Schemes), len(want)) + } + for i, s := range want { + if op.Schemes[i] != s { + t.Errorf("Schemes[%d]: got %q, want %q", i, op.Schemes[i], s) + } + } +} + +func TestDispatchRouteKeywordDeprecated(t *testing.T) { + var b Builder + op := &oaispec.Operation{} + + block := parseRouteBody(t, "deprecated: true") + for prop := range block.Properties() { + if err := b.dispatchRouteKeyword(prop, op); err != nil { + t.Fatalf("dispatch: %v", err) + } + } + + if !op.Deprecated { + t.Errorf("Deprecated: want true") + } +} + +// TestRawBlockAbsorbsSubContextKeywords verifies the grammar-level +// fix that lets a Parameters or Responses body contain keywords +// whose natural context is Param/Schema/Items (not Route/Operation/ +// Meta): they're absorbed as body text rather than terminating the +// multi-line block. Without this, `default:`, `in:`, `required:`, +// `max:` inside a Parameters body would prematurely stop the +// collection and produce a malformed spec. +func TestRawBlockAbsorbsSubContextKeywords(t *testing.T) { + body := `Parameters: ++ name: someNumber + in: path + required: true + type: number + max: 20 + min: 10 + default: 15 ++ name: flag + in: query + type: boolean +` + block := parseRouteBody(t, body) + + var params grammar.Property + for p := range block.Properties() { + if p.Keyword.Name == "parameters" { + params = p + break + } + } + if params.Keyword.Name != "parameters" { + t.Fatalf("parameters property not found") + } + + // Body must retain every source line, absorbed verbatim (names in + // source form: `max` not the canonical `maximum`). + var sb strings.Builder + for _, l := range params.Body { + sb.WriteString(l) + sb.WriteByte('\n') + } + joined := sb.String() + for _, expected := range []string{ + "+ name: someNumber", + "in: path", + "required: true", + "max: 20", + "default: 15", + "+ name: flag", + } { + if !contains(joined, expected) { + t.Errorf("Body missing %q in:\n%s", expected, joined) + } + } +} + +func contains(s, sub string) bool { + for i := 0; i+len(sub) <= len(s); i++ { + if s[i:i+len(sub)] == sub { + return true + } + } + return false +} diff --git a/internal/builders/routes/routes.go b/internal/builders/routes/routes.go index 5d3687f..b3849e7 100644 --- a/internal/builders/routes/routes.go +++ b/internal/builders/routes/routes.go @@ -45,14 +45,20 @@ func (r *Builder) Build(tgt *oaispec.Paths) error { ) op.Tags = r.route.Tags - sp := parsers.NewSectionedParser( - parsers.WithSetTitle(func(lines []string) { op.Summary = parsers.JoinDropLast(lines) }), - parsers.WithSetDescription(func(lines []string) { op.Description = parsers.JoinDropLast(lines) }), - parsers.WithTaggers(r.routeTaggers(op)...), - ) + if r.ctx.UseGrammarParser() { + if err := r.applyBlockToRoute(op); err != nil { + return fmt.Errorf("operation (%s): %w", op.ID, err) + } + } else { + sp := parsers.NewSectionedParser( + parsers.WithSetTitle(func(lines []string) { op.Summary = parsers.JoinDropLast(lines) }), + parsers.WithSetDescription(func(lines []string) { op.Description = parsers.JoinDropLast(lines) }), + parsers.WithTaggers(r.routeTaggers(op)...), + ) - if err := sp.Parse(r.route.Remaining); err != nil { - return fmt.Errorf("operation (%s): %w", op.ID, err) + if err := sp.Parse(r.route.Remaining); err != nil { + return fmt.Errorf("operation (%s): %w", op.ID, err) + } } if tgt.Paths == nil { diff --git a/internal/integration/parity_test.go b/internal/integration/parity_test.go index be54d86..036ca07 100644 --- a/internal/integration/parity_test.go +++ b/internal/integration/parity_test.go @@ -82,6 +82,9 @@ var parityFixtures = []parityFixture{ {"Bookings", codescan.Options{Packages: pkgs("./goparsing/bookings/..."), ScanModels: true}}, // Exercises swagger:operation (YAML-bodied operation spec). {"ClassificationOpAnnotation", codescan.Options{Packages: pkgs("./goparsing/classification/operations_annotation/...")}}, + // Exercises swagger:route with rich bodies: Consumes / Produces / Schemes / + // Security / Parameters / Responses / Extensions. + {"ClassificationRoutes", codescan.Options{Packages: pkgs("./goparsing/classification/...")}}, } // pkgs is a tiny alias for []string — it makes the fixture table diff --git a/internal/parsers/grammar/extensions_test.go b/internal/parsers/grammar/extensions_test.go index ad26990..3f948c1 100644 --- a/internal/parsers/grammar/extensions_test.go +++ b/internal/parsers/grammar/extensions_test.go @@ -113,8 +113,12 @@ type Root struct{} if prop.Keyword.Name != "extensions" { t.Fatalf("keyword: got %q", prop.Keyword.Name) } - if len(prop.Body) != 1 || prop.Body[0] != "x-foo: bar" { - t.Errorf("Body: got %q want [x-foo: bar]", prop.Body) + // Extensions bodies preserve source indentation in Property.Body + // so nested YAML-like extension maps can be re-parsed downstream. + // The extracted Extension entries (block.Extensions()) are the + // cleaned form. + if len(prop.Body) != 1 || prop.Body[0] != " x-foo: bar" { + t.Errorf("Body: got %q want [ x-foo: bar]", prop.Body) } extCount := 0 diff --git a/internal/parsers/grammar/grammar_test/testdata/golden/meta_with_extensions.json b/internal/parsers/grammar/grammar_test/testdata/golden/meta_with_extensions.json index 20bfb6a..131c54b 100644 --- a/internal/parsers/grammar/grammar_test/testdata/golden/meta_with_extensions.json +++ b/internal/parsers/grammar/grammar_test/testdata/golden/meta_with_extensions.json @@ -13,8 +13,8 @@ { "keyword": "extensions", "body": [ - "x-foo: bar", - "x-baz: 42" + " x-foo: bar", + " x-baz: 42" ] } ], diff --git a/internal/parsers/grammar/lexer.go b/internal/parsers/grammar/lexer.go index 896fb9f..5c43cc8 100644 --- a/internal/parsers/grammar/lexer.go +++ b/internal/parsers/grammar/lexer.go @@ -54,7 +54,9 @@ func (k TokenKind) String() string { // - TokenAnnotation: Text = annotation name (e.g., "model"), Args = positional args. // - TokenKeywordValue / TokenKeywordBlockHead: Text = canonical keyword name, // Keyword = table entry, Value = raw value string (empty for BlockHead), -// ItemsDepth = number of leading "items." prefixes (0 = none). +// ItemsDepth = number of leading "items." prefixes (0 = none), +// SourceName = the keyword name as it appeared in source (may be +// an alias like "max" for canonical "maximum"). // - TokenText: Text = original line content. // - TokenBlank / TokenYAMLFence / TokenEOF: Text is empty. // @@ -69,6 +71,14 @@ type Token struct { Keyword *Keyword ItemsDepth int Args []string + SourceName string + // Raw is the source line with only the comment markers (`//` / + // `/*`) stripped — internal whitespace, indentation, and list + // markers are preserved. Populated for TokenText and TokenRawLine, + // empty otherwise. Consumers that need YAML-style indentation or + // list-marker fidelity (notably the extensions body parser) read + // Raw; Text is the cleaned form suitable for regex dispatch. + Raw string } // Lex turns a preprocessed line slice into a token stream terminated @@ -122,7 +132,7 @@ func lexLine(line Line, inFence bool) Token { if tok, ok := lexKeyword(text, line.Pos); ok { return tok } - return Token{Kind: TokenText, Text: text, Pos: line.Pos} + return Token{Kind: TokenText, Text: text, Raw: line.Raw, Pos: line.Pos} } // matchGodocRoutePrefix returns the byte offset of "swagger:route" @@ -235,6 +245,7 @@ func lexKeyword(text string, pos token.Position) (Token, bool) { Value: value, Keyword: &kw, ItemsDepth: depth, + SourceName: name, }, true } diff --git a/internal/parsers/grammar/parser.go b/internal/parsers/grammar/parser.go index 8e4e756..f30a5cb 100644 --- a/internal/parsers/grammar/parser.go +++ b/internal/parsers/grammar/parser.go @@ -531,6 +531,18 @@ func (p *parseState) parseBody(base *baseBlock, post []Token) { // top-level Extension on the Block so `block.Extensions()` exposes // them uniformly. The original Body is still populated. // +// Raw-block absorption: when the head's ValueType is RawBlock +// (consumes/produces/security/parameters/responses/extensions at a +// route or operation level), scalar keyword-shaped body lines +// (RawValue-typed keywords like `default:` or `example:`) are +// absorbed as body text rather than treated as terminators. Mirrors +// v1's SectionedParser behavior: without a registered top-level +// tagger for `default`, `example`, etc., those lines fall through +// into the currently-active multi-line tagger's body. Terminators +// (other block heads, single-line route-structural keywords like +// `schemes:` with ValueType = CommaList, and `deprecated:` Boolean) +// still stop collection. +// // Returns the index past the last body token consumed. func (p *parseState) collectBlockBody(base *baseBlock, post []Token, i int) int { head := post[i] @@ -542,6 +554,7 @@ func (p *parseState) collectBlockBody(base *baseBlock, post []Token, i int) int i++ isExtensions := isExtensionBlock(head.Keyword.Name) + isRawBlock := head.Keyword.Value.Type == ValueRawBlock var pendingBlanks int for i < len(post) { @@ -549,25 +562,17 @@ func (p *parseState) collectBlockBody(base *baseBlock, post []Token, i int) int switch next.Kind { case TokenEOF, TokenAnnotation, - TokenKeywordValue, TokenKeywordBlockHead, + TokenKeywordBlockHead, TokenYAMLFence, TokenRawLine: base.properties = append(base.properties, prop) return i - case TokenText: - for range pendingBlanks { - prop.Body = append(prop.Body, "") - } - pendingBlanks = 0 - prop.Body = append(prop.Body, next.Text) - if isExtensions { - if ext, ok := parseExtensionLine(next); ok { - if !isExtensionName(ext.Name) { - p.emit(Warnf(ext.Pos, CodeInvalidExtension, - "extension name %q must begin with 'x-' or 'X-'", ext.Name)) - } - base.extensions = append(base.extensions, ext) - } + case TokenKeywordValue: + if absorbed := p.absorbRawBlockKeyword(&prop, next, isRawBlock, &pendingBlanks); !absorbed { + base.properties = append(base.properties, prop) + return i } + case TokenText: + p.appendRawBlockText(base, &prop, next, isExtensions, &pendingBlanks) case TokenBlank: // Defer — include only if more text follows within the // block. Trailing blanks are dropped. @@ -584,12 +589,93 @@ func (p *parseState) collectBlockBody(base *baseBlock, post []Token, i int) int return i } +// absorbRawBlockKeyword handles a TokenKeywordValue encountered +// inside a RawBlock body. Returns true iff the token was absorbed as +// body text (mirroring v1's tagger-based collection where sub- +// context keywords like `default:` / `in:` / `max:` fall through +// into the active multi-line tagger's body). Returns false when the +// token is a legitimate sibling terminator (route/operation/meta +// structural keyword) that should end the block. +func (p *parseState) absorbRawBlockKeyword(prop *Property, next Token, isRawBlock bool, pendingBlanks *int) bool { + if !isRawBlock || next.Keyword == nil || isRouteStructuralKeyword(next.Keyword) { + return false + } + for range *pendingBlanks { + prop.Body = append(prop.Body, "") + } + *pendingBlanks = 0 + name := next.SourceName + if name == "" { + name = next.Keyword.Name + } + prop.Body = append(prop.Body, name+": "+next.Value) + return true +} + +// appendRawBlockText accumulates a TokenText line into prop.Body, +// using the indentation-preserving Raw form for extensions bodies +// (where nested YAML-like maps rely on source indentation) and the +// cleaned Text form for other raw-block bodies. When the head is an +// extensions block, the line is also parsed into an Extension entry +// for the block-level accessor. +func (p *parseState) appendRawBlockText(base *baseBlock, prop *Property, next Token, isExtensions bool, pendingBlanks *int) { + for range *pendingBlanks { + prop.Body = append(prop.Body, "") + } + *pendingBlanks = 0 + body := next.Text + if isExtensions && next.Raw != "" { + body = next.Raw + } + prop.Body = append(prop.Body, body) + if !isExtensions { + return + } + ext, ok := parseExtensionLine(next) + if !ok { + return + } + if !isExtensionName(ext.Name) { + p.emit(Warnf(ext.Pos, CodeInvalidExtension, + "extension name %q must begin with 'x-' or 'X-'", ext.Name)) + } + base.extensions = append(base.extensions, ext) +} + // isExtensionBlock reports whether the given keyword name declares an // extensions block (i.e., `extensions:` or `infoExtensions:`). func isExtensionBlock(name string) bool { return name == "extensions" || name == "infoExtensions" } +// isRouteStructuralKeyword reports whether kw is a top-level +// keyword at the route/operation/meta annotation level — the set +// that v1's SectionedParser registers as taggers at those +// annotations and that terminates the currently-active multi-line +// tagger's body. A keyword qualifies if any of its declared contexts +// is KindRoute, KindOperation, or KindMeta. +// +// Used by collectBlockBody to decide, in a RawBlock body, whether an +// incoming TokenKeywordValue is a sibling top-level tag (terminate) +// or a sub-context keyword (`in:`, `required:`, `default:`) that +// legitimately appears as body text inside `Parameters:` / +// `Responses:` block entries. +func isRouteStructuralKeyword(kw *Keyword) bool { + for _, ctx := range kw.Contexts { + switch ctx.Kind { + case KindRoute, KindOperation, KindMeta: + return true + case KindParam, KindSchema, KindHeader, KindItems, KindResponse: + // Sub-contexts — keep scanning; a keyword can have + // multiple contexts and any route/operation/meta match + // wins. + default: + // Unknown/future kinds: conservative — don't terminate. + } + } + return false +} + // isExtensionName reports whether s is a well-formed OpenAPI vendor // extension name: it must begin with "x-" or "X-" and have at least // one character after the hyphen. Mirrors the v1 rxAllowedExtensions From d55a96096c9231c6a6823910e92a6c30a1732cdf Mon Sep 17 00:00:00 2001 From: Frederic BIDON Date: Wed, 22 Apr 2026 18:19:48 +0200 Subject: [PATCH 42/46] test(harness): dual-mode CI via CODESCAN_USE_GRAMMAR env var MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-cutover belt-and-braces: every existing test (not just TestParity) now runs under the grammar path when CODESCAN_USE_GRAMMAR=1 is set in the environment. Added a single env-var check at scanner.NewScanCtx that force-enables Options.UseGrammarParser — the only Run / NewScanCtx chokepoint every test reaches, so no call-site threading is required. Removed at P6 cutover alongside the flag itself. CI: run the full suite twice (default + env var set). This exposed a handful of gaps that the parity test's spec-vs-spec compare didn't catch: Grammar-level fixes ------------------- 1. stripSingleGodocSpace becomes a no-op. YAML fence bodies whose first line used a tab as the godoc-convention separator (e.g., `//\t---` in fixtures/goparsing/go119/) were losing the anchor tab that parsers.RemoveIndent relies on to detect the common indent. Preserving the post-marker content verbatim fixes the go119 TestIndentedYAMLBlock case and keeps extensions bodies' nested YAML rounds intact. 2. Extensions bodies wrapped in `---` fences are now absorbed into Property.Body via a new absorbFencedExtensionBody. v1's `Extensions:\n---\nx-nullable: false\n---` form (used by fixtures/enhancements/pointers-nullable-by-default) was dropped entirely because collectBlockBody terminated on the first YAML fence token. 3. Pre-annotation keyword tokens in an UnboundBlock (or any block) now land on Property entries instead of being silently ignored. Interface methods like `// discriminator: true\n// swagger:name jsonClass\nfunc()` were losing the discriminator because the trailing annotation triggered the pre-annotation split and the earlier `discriminator:` KEYWORD_VALUE had no home. 4. Trailing blank lines are trimmed from ProseLines so JoinDropLast (which drops only one trailing empty) matches v1's SectionedParser.header shape — fixes the `\n`-suffix on descriptions where a field has `// \n//\n// in: foo\n// swagger:...` shape. 5. parsers.RemoveIndent exposed so the operations bridge can apply the same tab/indent normalization YAMLSpecScanner.UnmarshalSpec uses. Tab-indented YAML bodies (go119 fixture) now round-trip. Bridge-level fixes ------------------ 6. schema's extensions dispatch routes the prop.Body through v1's NewYAMLParser (WithExtensionMatcher + JSON-unmarshal setter) instead of iterating block.Extensions() flat string values. Preserves typed extension values (bool `false`, not string `"false"`) for parity with schemaVendorExtensibleSetter. Fixes TestPointersAreNullable*, TestBuilder, TestIssue2540, TestEmbeddedDescriptionAndTags. 7. parameters bridge propagates ParseValueFromSchema errors up through applyBlockToField → processParamField. `default: notanumber` on an int parameter now returns an error, matching v1's setDefault.Parse error branch. Refactored processParamField into a small parseParamDoc helper to keep cognitive complexity under gocognit's threshold. Test harness ------------ 8. scanner.NewScanCtx reads CODESCAN_USE_GRAMMAR in the process environment and force-enables Options.UseGrammarParser when it is set. TestParity sets the flag explicitly per fixture and is unaffected in mode 1 (env unset — validates v1 vs v2 divergence); in mode 2 it compares grammar against itself (the env override wins), which is trivially green but not useful. The v1 vs v2 comparison keeps mode 1 as the divergence canary. Result: `go test ./...` green. `CODESCAN_USE_GRAMMAR=1 go test ./...` green (was 11 failures before this commit). TestParity still green on all 24 fixtures. Lint clean. Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Frederic BIDON --- internal/builders/operations/bridge.go | 14 +++- internal/builders/parameters/bridge.go | 45 +++++++---- internal/builders/parameters/bridge_test.go | 4 +- internal/builders/parameters/parameters.go | 49 ++++++------ internal/builders/schema/bridge.go | 46 +++++++++--- internal/parsers/grammar/extensions_test.go | 9 ++- .../testdata/golden/meta_with_extensions.json | 4 +- .../testdata/golden/operation_with_yaml.json | 2 +- internal/parsers/grammar/p110_test.go | 20 ++--- internal/parsers/grammar/parser.go | 75 ++++++++++++++++++- internal/parsers/grammar/parser_test.go | 5 +- internal/parsers/grammar/preprocess.go | 16 ++-- internal/parsers/yaml_spec_parser.go | 9 +++ internal/scanner/scan_context.go | 20 +++++ 14 files changed, 241 insertions(+), 77 deletions(-) diff --git a/internal/builders/operations/bridge.go b/internal/builders/operations/bridge.go index 499db6f..b2f6388 100644 --- a/internal/builders/operations/bridge.go +++ b/internal/builders/operations/bridge.go @@ -5,6 +5,7 @@ package operations import ( "fmt" + "strings" "github.com/go-openapi/codescan/internal/parsers" "github.com/go-openapi/codescan/internal/parsers/grammar" @@ -53,12 +54,17 @@ func (o *Builder) applyBlockToOperation(op *oaispec.Operation) error { // unmarshalOpYAML converts a raw YAML body into the operation's // JSON-shape expected by oaispec.Operation.UnmarshalJSON. Mirrors -// the tail of parsers.YAMLSpecScanner.UnmarshalSpec without the -// comment-stripping / indent-normalisation steps the grammar -// handles upstream. +// parsers.YAMLSpecScanner.UnmarshalSpec — common leading indent is +// stripped and tab indentation normalised to spaces so YAML bodies +// written with godoc-style leading tabs (e.g. the go119 fixture) +// parse correctly. func unmarshalOpYAML(body string, unmarshal func([]byte) error) error { + lines := strings.Split(body, "\n") + lines = parsers.RemoveIndent(lines) + normalized := strings.Join(lines, "\n") + yamlValue := make(map[any]any) - if err := yaml.Unmarshal([]byte(body), &yamlValue); err != nil { + if err := yaml.Unmarshal([]byte(normalized), &yamlValue); err != nil { return fmt.Errorf("operation yaml body: %w", err) } diff --git a/internal/builders/parameters/bridge.go b/internal/builders/parameters/bridge.go index 2454f2d..44762ee 100644 --- a/internal/builders/parameters/bridge.go +++ b/internal/builders/parameters/bridge.go @@ -80,7 +80,7 @@ func collectParamItemsLevels(expr ast.Expr, it *oaispec.Items, level int) []para // fixture exercises them, and the follow-up commit that moves // YAML-body parsing through internal/parsers/yaml will also // plug this gap. -func (p *ParameterBuilder) applyBlockToField(afld *ast.Field, param *oaispec.Parameter) { +func (p *ParameterBuilder) applyBlockToField(afld *ast.Field, param *oaispec.Parameter) error { block := grammar.NewParser(p.decl.Pkg.Fset).Parse(afld.Doc) // Description: raw-line JoinDropLast for v1 parity (line-preserving @@ -100,7 +100,9 @@ func (p *ParameterBuilder) applyBlockToField(afld *ast.Field, param *oaispec.Par if prop.ItemsDepth != 0 { continue } - dispatchParamKeyword(prop, param, valid, scheme) + if err := dispatchParamKeyword(prop, param, valid, scheme); err != nil { + return err + } } for ext := range block.Extensions() { @@ -118,23 +120,29 @@ func (p *ParameterBuilder) applyBlockToField(afld *ast.Field, param *oaispec.Par items.ApplyBlock(block, items.NewValidations(tgt.items), tgt.level) } } + return nil } // dispatchParamKeyword routes a level-0 Property into paramValidations // or the raw param target. Covers the same keyword surface as v1's // baseInlineParamTaggers minus `in:` (upstream-resolved) and the // Extensions block (handled via block.Extensions() by the caller). -func dispatchParamKeyword(p grammar.Property, param *oaispec.Parameter, valid paramValidations, scheme *oaispec.SimpleSchema) { +func dispatchParamKeyword(p grammar.Property, param *oaispec.Parameter, valid paramValidations, scheme *oaispec.SimpleSchema) error { if dispatchNumericValidation(p, valid) { - return + return nil } if dispatchIntegerValidation(p, valid) { - return + return nil } - if dispatchStringOrEnum(p, valid, scheme) { - return + handled, err := dispatchStringOrEnum(p, valid, scheme) + if err != nil { + return err + } + if handled { + return nil } dispatchParamFlags(p, param, valid) + return nil } func dispatchNumericValidation(p grammar.Property, valid paramValidations) bool { @@ -175,25 +183,32 @@ func dispatchIntegerValidation(p grammar.Property, valid paramValidations) bool // dispatchStringOrEnum handles pattern/enum/default/example — // keywords whose value is consumed as a raw string or resolved -// against the target's SimpleSchema. -func dispatchStringOrEnum(p grammar.Property, valid paramValidations, scheme *oaispec.SimpleSchema) bool { +// against the target's SimpleSchema. Parse errors from +// ParseValueFromSchema (e.g. `default: notanumber` on an int +// parameter) propagate so the run surfaces them, matching v1's +// error semantics. +func dispatchStringOrEnum(p grammar.Property, valid paramValidations, scheme *oaispec.SimpleSchema) (bool, error) { switch p.Keyword.Name { case "pattern": valid.SetPattern(p.Value) case "enum": valid.SetEnum(p.Value) case "default": - if v, err := parsers.ParseValueFromSchema(p.Value, scheme); err == nil { - valid.SetDefault(v) + v, err := parsers.ParseValueFromSchema(p.Value, scheme) + if err != nil { + return true, err } + valid.SetDefault(v) case "example": - if v, err := parsers.ParseValueFromSchema(p.Value, scheme); err == nil { - valid.SetExample(v) + v, err := parsers.ParseValueFromSchema(p.Value, scheme) + if err != nil { + return true, err } + valid.SetExample(v) default: - return false + return false, nil } - return true + return true, nil } // dispatchParamFlags handles unique/required/collectionFormat — the diff --git a/internal/builders/parameters/bridge_test.go b/internal/builders/parameters/bridge_test.go index 8410f26..771f7b3 100644 --- a/internal/builders/parameters/bridge_test.go +++ b/internal/builders/parameters/bridge_test.go @@ -93,7 +93,9 @@ func runDispatch(t *testing.T, param *oaispec.Parameter, body string) { if prop.ItemsDepth != 0 { continue } - dispatchParamKeyword(prop, param, valid, scheme) + if err := dispatchParamKeyword(prop, param, valid, scheme); err != nil { + t.Fatalf("dispatchParamKeyword: %v", err) + } } } diff --git a/internal/builders/parameters/parameters.go b/internal/builders/parameters/parameters.go index edca295..2b68825 100644 --- a/internal/builders/parameters/parameters.go +++ b/internal/builders/parameters/parameters.go @@ -5,6 +5,7 @@ package parameters import ( "fmt" + "go/ast" "go/types" "github.com/go-openapi/codescan/internal/builders/resolvers" @@ -357,6 +358,30 @@ func (p *ParameterBuilder) buildFromStruct(decl *scanner.EntityDecl, tpe *types. return nil } +// parseParamDoc routes the field's comment through the grammar +// bridge (when UseGrammarParser is set) or the legacy SectionedParser +// pipeline, writing description and validation onto ps. +func (p *ParameterBuilder) parseParamDoc(afld *ast.Field, ps *oaispec.Parameter, name string) error { + if p.ctx.UseGrammarParser() { + return p.applyBlockToField(afld, ps) + } + taggers, err := setupParamTaggers(ps, name, afld, p.ctx.SkipExtensions(), p.ctx.Debug()) + if err != nil { + return err + } + sp := parsers.NewSectionedParser( + parsers.WithSetDescription(func(lines []string) { + ps.Description = parsers.JoinDropLast(lines) + enumDesc := parsers.GetEnumDesc(ps.Extensions) + if enumDesc != "" { + ps.Description += "\n" + enumDesc + } + }), + parsers.WithTaggers(taggers...), + ) + return sp.Parse(afld.Doc) +} + // processParamField processes a single non-embedded struct field for parameter building. // Returns the parameter name if the field was processed, or "" if it was skipped. func (p *ParameterBuilder) processParamField(fld *types.Var, decl *scanner.EntityDecl, seen map[string]oaispec.Parameter) (string, error) { @@ -411,28 +436,8 @@ func (p *ParameterBuilder) processParamField(fld *types.Var, decl *scanner.Entit ps.Items = nil } - if p.ctx.UseGrammarParser() { - p.applyBlockToField(afld, &ps) - } else { - taggers, err := setupParamTaggers(&ps, name, afld, p.ctx.SkipExtensions(), p.ctx.Debug()) - if err != nil { - return "", err - } - - sp := parsers.NewSectionedParser( - parsers.WithSetDescription(func(lines []string) { - ps.Description = parsers.JoinDropLast(lines) - enumDesc := parsers.GetEnumDesc(ps.Extensions) - if enumDesc != "" { - ps.Description += "\n" + enumDesc - } - }), - parsers.WithTaggers(taggers...), - ) - - if err := sp.Parse(afld.Doc); err != nil { - return "", err - } + if err := p.parseParamDoc(afld, &ps, name); err != nil { + return "", err } if ps.In == "path" { ps.Required = true diff --git a/internal/builders/schema/bridge.go b/internal/builders/schema/bridge.go index 2b8f6ea..a91bc53 100644 --- a/internal/builders/schema/bridge.go +++ b/internal/builders/schema/bridge.go @@ -4,6 +4,7 @@ package schema import ( + "encoding/json" "go/ast" "github.com/go-openapi/codescan/internal/builders/items" @@ -103,21 +104,46 @@ func applySchemaBlock(b grammar.Block, t schemaBlockTargets) { if p.ItemsDepth != 0 { continue } - dispatchSchemaKeyword(p, t, valid, scheme) - } - - for ext := range b.Extensions() { - if !parsers.IsAllowedExtension(ext.Name) { - // Matches legacy schemaVendorExtensibleSetter: unknown - // x-* names were rejected with an error. At the grammar - // layer we preserve parity by silently skipping — - // the grammar parser already emitted a diagnostic. + if p.Keyword.Name == "extensions" || p.Keyword.Name == "YAMLExtensionsBlock" { + // Delegate the body to v1's YAML-aware extension parser + // so nested/typed values (bool, list, map) are + // recognised — block.Extensions()'s flat iterator + // stores values as strings only. + applyExtensionsBody(t.ps, p.Body) continue } - t.ps.AddExtension(ext.Name, ext.Value) + dispatchSchemaKeyword(p, t, valid, scheme) } } +// applyExtensionsBody feeds the grammar-captured extension body +// lines through the v1 YAML-aware extension parser so nested / typed +// values (bool, number, list, map) land on ps.Extensions with their +// semantic types — parity with the legacy schemaVendorExtensibleSetter +// path. Unknown x-* names (rejected by IsAllowedExtension) are +// silently dropped, matching the legacy reject-with-error behaviour +// sufficiently for parity (errors on extension names are rare and +// always user-authored). +func applyExtensionsBody(ps *oaispec.Schema, body []string) { + yamlParser := parsers.NewYAMLParser( + parsers.WithExtensionMatcher(), + parsers.WithSetter(func(jsonValue json.RawMessage) error { + var data oaispec.Extensions + if err := json.Unmarshal(jsonValue, &data); err != nil { + return err + } + for k, v := range data { + if !parsers.IsAllowedExtension(k) { + continue + } + ps.AddExtension(k, v) + } + return nil + }), + ) + _ = yamlParser.Parse(body) +} + func dispatchSchemaKeyword(p grammar.Property, t schemaBlockTargets, valid schemaValidations, scheme *oaispec.SimpleSchema) { if dispatchNumericValidation(p, valid) { return diff --git a/internal/parsers/grammar/extensions_test.go b/internal/parsers/grammar/extensions_test.go index 3f948c1..c75cb4f 100644 --- a/internal/parsers/grammar/extensions_test.go +++ b/internal/parsers/grammar/extensions_test.go @@ -114,11 +114,12 @@ type Root struct{} t.Fatalf("keyword: got %q", prop.Keyword.Name) } // Extensions bodies preserve source indentation in Property.Body - // so nested YAML-like extension maps can be re-parsed downstream. - // The extracted Extension entries (block.Extensions()) are the + // verbatim (comment markers stripped, all post-marker whitespace + // retained) so nested YAML-like extension maps can be re-parsed + // downstream. Extension entries (block.Extensions()) are the // cleaned form. - if len(prop.Body) != 1 || prop.Body[0] != " x-foo: bar" { - t.Errorf("Body: got %q want [ x-foo: bar]", prop.Body) + if len(prop.Body) != 1 || prop.Body[0] != " x-foo: bar" { + t.Errorf("Body: got %q want [ x-foo: bar]", prop.Body) } extCount := 0 diff --git a/internal/parsers/grammar/grammar_test/testdata/golden/meta_with_extensions.json b/internal/parsers/grammar/grammar_test/testdata/golden/meta_with_extensions.json index 131c54b..c44f7f6 100644 --- a/internal/parsers/grammar/grammar_test/testdata/golden/meta_with_extensions.json +++ b/internal/parsers/grammar/grammar_test/testdata/golden/meta_with_extensions.json @@ -13,8 +13,8 @@ { "keyword": "extensions", "body": [ - " x-foo: bar", - " x-baz: 42" + " x-foo: bar", + " x-baz: 42" ] } ], diff --git a/internal/parsers/grammar/grammar_test/testdata/golden/operation_with_yaml.json b/internal/parsers/grammar/grammar_test/testdata/golden/operation_with_yaml.json index dcf3942..52eae2a 100644 --- a/internal/parsers/grammar/grammar_test/testdata/golden/operation_with_yaml.json +++ b/internal/parsers/grammar/grammar_test/testdata/golden/operation_with_yaml.json @@ -8,7 +8,7 @@ }, "yamlBlocks": [ { - "text": "responses:\n 200: successResponse\n 404: notFound" + "text": " responses:\n 200: successResponse\n 404: notFound" } ] } diff --git a/internal/parsers/grammar/p110_test.go b/internal/parsers/grammar/p110_test.go index bea290a..a8eedf7 100644 --- a/internal/parsers/grammar/p110_test.go +++ b/internal/parsers/grammar/p110_test.go @@ -38,17 +38,19 @@ func ListPets() {} if len(lines) < 3 { t.Fatalf("want at least 3 body lines, got %d: %q", len(lines), y.Text) } - // Line 0: "responses:" — no leading whitespace expected. - if lines[0] != "responses:" { - t.Errorf("line 0: got %q want %q", lines[0], "responses:") + // Line 0: " responses:" — the godoc convention space after + // `// ` is now preserved in Line.Raw so YAML bodies with tab + // indentation (e.g. go119 fixture) round-trip faithfully. + if lines[0] != " responses:" { + t.Errorf("line 0: got %q want %q", lines[0], " responses:") } - // Line 1: " 200: successResponse" — 2-space indent preserved. - if lines[1] != " 200: successResponse" { - t.Errorf("line 1: got %q want %q", lines[1], " 200: successResponse") + // Line 1: ` 200: successResponse` — godoc space + source + // 2-space indent preserved. + if lines[1] != " 200: successResponse" { + t.Errorf("line 1: got %q want %q", lines[1], " 200: successResponse") } - // Line 2: same. - if lines[2] != " 404: notFound" { - t.Errorf("line 2: got %q want %q", lines[2], " 404: notFound") + if lines[2] != " 404: notFound" { + t.Errorf("line 2: got %q want %q", lines[2], " 404: notFound") } } if count != 1 { diff --git a/internal/parsers/grammar/parser.go b/internal/parsers/grammar/parser.go index f30a5cb..8430d0d 100644 --- a/internal/parsers/grammar/parser.go +++ b/internal/parsers/grammar/parser.go @@ -449,9 +449,23 @@ func (p *parseState) parseTitleDesc(base *baseBlock, pre []Token) { case TokenText: current = append(current, t.Text) proseLines = append(proseLines, t.Text) + case TokenKeywordValue: + // Pre-annotation keyword lines (e.g., `discriminator: true` + // appearing before a trailing `swagger:name` annotation on + // an interface method) land on the block's Properties + // alongside post-annotation keywords. Without this, those + // keywords fall into a gap — not prose, not properties — + // and never reach the analyzer. + base.properties = append(base.properties, Property{ + Keyword: *t.Keyword, + Pos: t.Pos, + Value: t.Value, + Typed: p.typeConvert(*t.Keyword, t.Value, t.Pos), + ItemsDepth: t.ItemsDepth, + }) case TokenEOF, TokenAnnotation, - TokenKeywordValue, TokenKeywordBlockHead, + TokenKeywordBlockHead, TokenYAMLFence, TokenRawLine: // Ignored in the title/description slice. default: @@ -460,6 +474,13 @@ func (p *parseState) parseTitleDesc(base *baseBlock, pre []Token) { } flush() + // Trim trailing blank lines from proseLines so JoinDropLast + // produces parity with v1's SectionedParser, which stopped + // collecting header lines at the first tagger match and never + // accumulated more than one trailing blank. + for len(proseLines) > 0 && proseLines[len(proseLines)-1] == "" { + proseLines = proseLines[:len(proseLines)-1] + } base.proseLines = proseLines if len(paragraphs) > 0 { @@ -563,7 +584,19 @@ func (p *parseState) collectBlockBody(base *baseBlock, post []Token, i int) int case TokenEOF, TokenAnnotation, TokenKeywordBlockHead, - TokenYAMLFence, TokenRawLine: + TokenRawLine: + base.properties = append(base.properties, prop) + return i + case TokenYAMLFence: + if isExtensions { + // Extension blocks may wrap their body in `---` + // fences (e.g., `Extensions:\n---\nx-foo: false\n---`). + // Absorb the fence-internal lines into prop.Body so + // the downstream v1-style extension body parser sees + // them. The closing fence is skipped. + i = p.absorbFencedExtensionBody(base, &prop, post, i) + continue + } base.properties = append(base.properties, prop) return i case TokenKeywordValue: @@ -589,6 +622,44 @@ func (p *parseState) collectBlockBody(base *baseBlock, post []Token, i int) int return i } +// absorbFencedExtensionBody consumes the `---`-fenced body of an +// extensions block, appending each interior line (as Raw) to +// prop.Body and emitting each as an Extension entry via +// parseExtensionLine. Returns the index past the closing fence (or +// past EOF if the block is unterminated). Mirrors how v1's +// SetOpExtensions.Parse treats fence-wrapped extension bodies — the +// fences are consumed, the interior is parsed with indentation +// intact. +func (p *parseState) absorbFencedExtensionBody(base *baseBlock, prop *Property, post []Token, i int) int { + openerPos := post[i].Pos + i++ + for i < len(post) { + next := post[i] + if next.Kind == TokenYAMLFence { + return i + 1 // skip closing fence + } + if next.Kind == TokenEOF { + p.emit(Errorf(openerPos, CodeUnterminatedYAML, + "YAML body opened with --- but never closed")) + return i + } + if next.Kind == TokenRawLine { + prop.Body = append(prop.Body, next.Text) + // Also extract Extension entries. parseExtensionLine + // expects a TEXT-shaped Token — synthesise one with the + // raw line. + surrogate := Token{Kind: TokenText, Text: strings.TrimSpace(next.Text), Pos: next.Pos} + if ext, ok := parseExtensionLine(surrogate); ok { + if isExtensionName(ext.Name) { + base.extensions = append(base.extensions, ext) + } + } + } + i++ + } + return i +} + // absorbRawBlockKeyword handles a TokenKeywordValue encountered // inside a RawBlock body. Returns true iff the token was absorbed as // body text (mirroring v1's tagger-based collection where sub- diff --git a/internal/parsers/grammar/parser_test.go b/internal/parsers/grammar/parser_test.go index 1025bbb..dcb733b 100644 --- a/internal/parsers/grammar/parser_test.go +++ b/internal/parsers/grammar/parser_test.go @@ -304,7 +304,10 @@ func ListPets() {} count := 0 for y := range b.YAMLBlocks() { count++ - if !slices.Contains(splitLines(y.Text), "responses:") { + // Line.Raw preserves the godoc space after `// ` so the body's + // responses header appears as " responses:" — consumers + // (operation bridge) normalize via parsers.RemoveIndent. + if !slices.Contains(splitLines(y.Text), " responses:") { t.Errorf("YAML body missing 'responses:' line:\n%s", y.Text) } } diff --git a/internal/parsers/grammar/preprocess.go b/internal/parsers/grammar/preprocess.go index afde760..17ef95b 100644 --- a/internal/parsers/grammar/preprocess.go +++ b/internal/parsers/grammar/preprocess.go @@ -126,13 +126,17 @@ func stripLine(s string, pos token.Position, rawStrip func(string) string) Line return Line{Text: stripped, Raw: rawStrip(s), Pos: pos} } -// stripSingleGodocSpace strips one leading space or tab — the godoc -// `// ` convention — preserving all other content whitespace. Used -// for Line.Raw on `//` comment lines. +// stripSingleGodocSpace used to strip one leading space or tab +// following the godoc `// ` convention. It is now a no-op — Line.Raw +// preserves the entire post-marker content verbatim so consumers +// that rely on source indentation (YAML fence bodies whose first +// line's tab anchors removeIndent; extension bodies whose nested +// maps rely on relative indent) see the source faithfully. +// +// Kept as a named function for the stripLine callback seam so future +// per-kind stripping strategies can slot in without rewiring +// callers. func stripSingleGodocSpace(s string) string { - if len(s) > 0 && (s[0] == ' ' || s[0] == '\t') { - return s[1:] - } return s } diff --git a/internal/parsers/yaml_spec_parser.go b/internal/parsers/yaml_spec_parser.go index 1ebe8c7..083f861 100644 --- a/internal/parsers/yaml_spec_parser.go +++ b/internal/parsers/yaml_spec_parser.go @@ -141,6 +141,15 @@ func (sp *YAMLSpecScanner) collectTitleDescription() { sp.title, sp.header = collectScannerTitleDescription(sp.header) } +// RemoveIndent normalises the common leading indentation on a YAML +// body: it strips the first line's indent from every line and +// converts remaining tab indentation to two-space equivalents. Used +// by YAMLSpecScanner's UnmarshalSpec pipeline and by the grammar +// operations bridge for the same v1-parity normalisation. +func RemoveIndent(spec []string) []string { + return removeIndent(spec) +} + // removes indent based on the first line. func removeIndent(spec []string) []string { if len(spec) == 0 { diff --git a/internal/scanner/scan_context.go b/internal/scanner/scan_context.go index 76b3e0f..d8e7eaa 100644 --- a/internal/scanner/scan_context.go +++ b/internal/scanner/scan_context.go @@ -11,6 +11,7 @@ import ( "iter" "log" "maps" + "os" "slices" "strings" @@ -19,6 +20,17 @@ import ( "golang.org/x/tools/go/packages" ) +// envForceGrammarParser, when set to "1" / "true" in the process +// environment, forces Options.UseGrammarParser to true at context +// construction time regardless of the value the caller passed. +// Used by CI to run the full test suite under the grammar path +// without threading the flag through every test call site — +// `CODESCAN_USE_GRAMMAR=1 go test ./...`. +// +// Removed at P6 cutover alongside the legacy regex path and the +// flag itself. +const envForceGrammarParser = "CODESCAN_USE_GRAMMAR" + const pkgLoadMode = packages.NeedName | packages.NeedFiles | packages.NeedImports | packages.NeedDeps | packages.NeedTypes | packages.NeedSyntax | packages.NeedTypesInfo type node uint32 @@ -41,6 +53,14 @@ type ScanCtx struct { } func NewScanCtx(opts *Options) (*ScanCtx, error) { + // Env-var override: CODESCAN_USE_GRAMMAR=1 forces the grammar + // parser on for all builders regardless of the caller's flag. + // Test-only migration aid; removed at P6 cutover. + switch os.Getenv(envForceGrammarParser) { + case "1", "true", "TRUE", "True": + opts.UseGrammarParser = true + } + cfg := &packages.Config{ Dir: opts.WorkDir, Mode: pkgLoadMode, From 641cb4a77376b77050be34e2b6e106294717da54 Mon Sep 17 00:00:00 2001 From: Frederic BIDON Date: Wed, 22 Apr 2026 18:36:22 +0200 Subject: [PATCH 43/46] =?UTF-8?q?refactor(parsers):=20P6=20cutover=20?= =?UTF-8?q?=E2=80=94=20remove=20UseGrammarParser=20flag=20and=20dead=20val?= =?UTF-8?q?idation=20taggers?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Grammar path is now the only comment-parsing path for the six swagger:* annotations migrated in P5. This commit removes the dual-path scaffolding and the legacy tagger types that existed solely to serve that scaffolding. Flag + harness removal: - Options.UseGrammarParser deleted. - ScanCtx.UseGrammarParser() accessor deleted. - CODESCAN_USE_GRAMMAR env-var override deleted (internal/scanner/scan_context.go). - TestParity deleted (internal/integration/parity_test.go) — the single-flag harness has no dual runs to diff with the flag gone. - All `if ctx.UseGrammarParser() { ... } else { ... legacy ... }` branches in the six builders (schema×4, parameters, responses×2, operations, routes) collapsed to the grammar arm. Dead builder code removed: - internal/builders/{items,parameters,responses,routes,schema}/taggers.go - schema.Builder.createParser (SectionedParser factory). - Orphaned setters: opSchemeSetter, schemaVendorExtensibleSetter, spExtensionsSetter. Dead parser code removed: - internal/parsers/validations.go + validations_test.go — all the Set{Maximum,Minimum,Multi,MinLength,MaxLength,Pattern,Unique, MinItems,MaxItems,CollectionFormat,Default,Example,ReadOnlySchema, RequiredSchema,RequiredParam,Discriminator} tagger types and helpers that wrote through ValidationBuilder/ OperationValidationBuilder. None are referenced now. - parsers.SetEnum type + NewSetEnum (enum.go) — superseded by the grammar bridges' inline dispatch. ParseEnum / ParseValueFromSchema stay as they're called from bridges and headerValidations. - parsers.MatchParamIn / NewMatchParamIn / NewMatchIn / MatchParamRequired / NewMatchParamRequired — match-only taggers whose only purpose was to claim lines away from the SectionedParser's description; no longer needed. - parsers.SetDeprecatedOp / NewSetDeprecatedOp — routes bridge handles deprecated inline via typed grammar value. - internal/parsers/sectioned_parser_test.go + *_go119_test.go + parsers_test.go — tests for code that just moved away. - rxDiscriminator, rxReadOnly, rxDeprecated constants from regexprs.go (no longer referenced). What's kept alive for meta: - SectionedParser, tag_parsers.go (NewSingleLineTagParser, NewMultiLineTagParser), NewMetaParser, setMetaSingle, newMultilineDropEmptyParser, newSetSecurity. - NewSetSchemes (security.go) — meta still registers Schemes as a tagger. - rxSchemes, rxRequired, rxIn (still used by matchers / meta). The meta builder (internal/builders/spec/spec.go) still invokes NewMetaParser(...).Parse and therefore still transitively needs SectionedParser and its tag infrastructure. Meta migration is a separate follow-up (P6.1 or P7), after which the entire SectionedParser + TagParser stack and remaining legacy plumbing can retire too. What's kept alive for the bridges: - NewConsumesDropEmptyParser / NewProducesDropEmptyParser — routes bridge dispatches `consumes:` / `produces:` bodies through their multilineYAMLListParser path. - NewSetSecurityScheme, NewSetParams, NewSetResponses, NewSetExtensions — routes bridge body-parsers. - NewYAMLParser + WithExtensionMatcher + WithSetter — schema bridge's YAML-fenced extensions path. - RemoveIndent (yaml_spec_parser.go) — operations bridge YAML body normalization. YAMLSpecScanner is still present because it's in the same file as RemoveIndent; a split + YAMLSpecScanner deletion lands with meta migration. All tests green. Lint clean. Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Frederic BIDON --- internal/builders/items/taggers.go | 77 -- internal/builders/operations/operations.go | 18 +- internal/builders/parameters/parameters.go | 35 +- internal/builders/parameters/taggers.go | 58 -- internal/builders/responses/responses.go | 31 +- internal/builders/responses/taggers.go | 48 -- internal/builders/routes/routes.go | 16 +- internal/builders/routes/setters.go | 4 - internal/builders/routes/taggers.go | 24 - internal/builders/schema/schema.go | 127 +-- internal/builders/schema/taggers.go | 125 --- internal/integration/parity_test.go | 128 --- internal/parsers/enum.go | 36 - internal/parsers/parsers.go | 77 -- internal/parsers/parsers_test.go | 113 --- internal/parsers/regexprs.go | 3 - internal/parsers/regexprs_test.go | 1 - .../parsers/sectioned_parser_go119_test.go | 47 -- internal/parsers/sectioned_parser_test.go | 382 --------- internal/parsers/validations.go | 610 -------------- internal/parsers/validations_test.go | 750 ------------------ internal/scanner/options.go | 14 - internal/scanner/scan_context.go | 27 - 23 files changed, 14 insertions(+), 2737 deletions(-) delete mode 100644 internal/builders/items/taggers.go delete mode 100644 internal/builders/parameters/taggers.go delete mode 100644 internal/builders/responses/taggers.go delete mode 100644 internal/builders/routes/taggers.go delete mode 100644 internal/builders/schema/taggers.go delete mode 100644 internal/integration/parity_test.go delete mode 100644 internal/parsers/parsers_test.go delete mode 100644 internal/parsers/sectioned_parser_go119_test.go delete mode 100644 internal/parsers/sectioned_parser_test.go delete mode 100644 internal/parsers/validations.go delete mode 100644 internal/parsers/validations_test.go diff --git a/internal/builders/items/taggers.go b/internal/builders/items/taggers.go deleted file mode 100644 index bfb5a1e..0000000 --- a/internal/builders/items/taggers.go +++ /dev/null @@ -1,77 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers -// SPDX-License-Identifier: Apache-2.0 - -package items - -import ( - "fmt" - "go/ast" - "slices" - - "github.com/go-openapi/codescan/internal/parsers" - "github.com/go-openapi/spec" -) - -// Taggers builds tag parsers for array items at a given nesting level. -func Taggers(items *spec.Items, level int) []parsers.TagParser { - return itemsTaggers(items, level) -} - -func itemsTaggers(items *spec.Items, level int) []parsers.TagParser { - opts := []parsers.PrefixRxOption{parsers.WithItemsPrefixLevel(level)} - - return []parsers.TagParser{ - parsers.NewSingleLineTagParser(fmt.Sprintf("items%dMaximum", level), parsers.NewSetMaximum(Validations{items}, opts...)), - parsers.NewSingleLineTagParser(fmt.Sprintf("items%dMinimum", level), parsers.NewSetMinimum(Validations{items}, opts...)), - parsers.NewSingleLineTagParser(fmt.Sprintf("items%dMultipleOf", level), parsers.NewSetMultipleOf(Validations{items}, opts...)), - parsers.NewSingleLineTagParser(fmt.Sprintf("items%dMinLength", level), parsers.NewSetMinLength(Validations{items}, opts...)), - parsers.NewSingleLineTagParser(fmt.Sprintf("items%dMaxLength", level), parsers.NewSetMaxLength(Validations{items}, opts...)), - parsers.NewSingleLineTagParser(fmt.Sprintf("items%dPattern", level), parsers.NewSetPattern(Validations{items}, opts...)), - parsers.NewSingleLineTagParser(fmt.Sprintf("items%dCollectionFormat", level), parsers.NewSetCollectionFormat(Validations{items}, opts...)), - parsers.NewSingleLineTagParser(fmt.Sprintf("items%dMinItems", level), parsers.NewSetMinItems(Validations{items}, opts...)), - parsers.NewSingleLineTagParser(fmt.Sprintf("items%dMaxItems", level), parsers.NewSetMaxItems(Validations{items}, opts...)), - parsers.NewSingleLineTagParser(fmt.Sprintf("items%dUnique", level), parsers.NewSetUnique(Validations{items}, opts...)), - parsers.NewSingleLineTagParser(fmt.Sprintf("items%dEnum", level), parsers.NewSetEnum(Validations{items}, opts...)), - parsers.NewSingleLineTagParser(fmt.Sprintf("items%dDefault", level), parsers.NewSetDefault(&items.SimpleSchema, Validations{items}, opts...)), - parsers.NewSingleLineTagParser(fmt.Sprintf("items%dExample", level), parsers.NewSetExample(&items.SimpleSchema, Validations{items}, opts...)), - } -} - -// ParseArrayTypes recursively builds tag parsers for nested array types. -func ParseArrayTypes(taggers []parsers.TagParser, name string, expr ast.Expr, items *spec.Items, level int) ([]parsers.TagParser, error) { - return parseArrayTypes(taggers, name, expr, items, level) -} - -func parseArrayTypes(taggers []parsers.TagParser, name string, expr ast.Expr, items *spec.Items, level int) ([]parsers.TagParser, error) { - if items == nil { - return taggers, nil - } - - switch iftpe := expr.(type) { - case *ast.ArrayType: - eleTaggers := itemsTaggers(items, level) - return parseArrayTypes(slices.Concat(eleTaggers, taggers), name, iftpe.Elt, items.Items, level+1) - - case *ast.SelectorExpr: - return parseArrayTypes(taggers, name, iftpe.Sel, items.Items, level+1) - - case *ast.Ident: - var identTaggers []parsers.TagParser - if iftpe.Obj == nil { - identTaggers = itemsTaggers(items, level) - } - - otherTaggers, err := parseArrayTypes(taggers, name, expr, items.Items, level+1) - if err != nil { - return nil, err - } - - return slices.Concat(identTaggers, otherTaggers), nil - - case *ast.StarExpr: - return parseArrayTypes(taggers, name, iftpe.X, items, level) - - default: - return nil, fmt.Errorf("unknown field type element for %q: %w", name, ErrItems) - } -} diff --git a/internal/builders/operations/operations.go b/internal/builders/operations/operations.go index fc8e680..74489d4 100644 --- a/internal/builders/operations/operations.go +++ b/internal/builders/operations/operations.go @@ -34,22 +34,8 @@ func (o *Builder) Build(tgt *oaispec.Paths) error { &pthObj, o.operations[o.path.ID]) op.Tags = o.path.Tags - if o.ctx.UseGrammarParser() { - if err := o.applyBlockToOperation(op); err != nil { - return fmt.Errorf("operation (%s): %w", op.ID, err) - } - } else { - sp := parsers.NewYAMLSpecScanner( - func(lines []string) { op.Summary = parsers.JoinDropLast(lines) }, // setTitle - func(lines []string) { op.Description = parsers.JoinDropLast(lines) }, // setDescription - ) - - if err := sp.Parse(o.path.Remaining); err != nil { - return fmt.Errorf("operation (%s): %w", op.ID, err) - } - if err := sp.UnmarshalSpec(op.UnmarshalJSON); err != nil { - return fmt.Errorf("operation (%s): %w", op.ID, err) - } + if err := o.applyBlockToOperation(op); err != nil { + return fmt.Errorf("operation (%s): %w", op.ID, err) } if tgt.Paths == nil { diff --git a/internal/builders/parameters/parameters.go b/internal/builders/parameters/parameters.go index 2b68825..06f6c9f 100644 --- a/internal/builders/parameters/parameters.go +++ b/internal/builders/parameters/parameters.go @@ -5,7 +5,6 @@ package parameters import ( "fmt" - "go/ast" "go/types" "github.com/go-openapi/codescan/internal/builders/resolvers" @@ -358,30 +357,6 @@ func (p *ParameterBuilder) buildFromStruct(decl *scanner.EntityDecl, tpe *types. return nil } -// parseParamDoc routes the field's comment through the grammar -// bridge (when UseGrammarParser is set) or the legacy SectionedParser -// pipeline, writing description and validation onto ps. -func (p *ParameterBuilder) parseParamDoc(afld *ast.Field, ps *oaispec.Parameter, name string) error { - if p.ctx.UseGrammarParser() { - return p.applyBlockToField(afld, ps) - } - taggers, err := setupParamTaggers(ps, name, afld, p.ctx.SkipExtensions(), p.ctx.Debug()) - if err != nil { - return err - } - sp := parsers.NewSectionedParser( - parsers.WithSetDescription(func(lines []string) { - ps.Description = parsers.JoinDropLast(lines) - enumDesc := parsers.GetEnumDesc(ps.Extensions) - if enumDesc != "" { - ps.Description += "\n" + enumDesc - } - }), - parsers.WithTaggers(taggers...), - ) - return sp.Parse(afld.Doc) -} - // processParamField processes a single non-embedded struct field for parameter building. // Returns the parameter name if the field was processed, or "" if it was skipped. func (p *ParameterBuilder) processParamField(fld *types.Var, decl *scanner.EntityDecl, seen map[string]oaispec.Parameter) (string, error) { @@ -436,7 +411,7 @@ func (p *ParameterBuilder) processParamField(fld *types.Var, decl *scanner.Entit ps.Items = nil } - if err := p.parseParamDoc(afld, &ps, name); err != nil { + if err := p.applyBlockToField(afld, &ps); err != nil { return "", err } if ps.In == "path" { @@ -467,11 +442,3 @@ func (p *ParameterBuilder) makeRef(decl *scanner.EntityDecl, prop ifaces.Swagger return nil } - -func spExtensionsSetter(ps *oaispec.Parameter, skipExt bool) func(*oaispec.Extensions) { - return func(exts *oaispec.Extensions) { - for name, value := range *exts { - resolvers.AddExtension(&ps.VendorExtensible, name, value, skipExt) - } - } -} diff --git a/internal/builders/parameters/taggers.go b/internal/builders/parameters/taggers.go deleted file mode 100644 index 273daa7..0000000 --- a/internal/builders/parameters/taggers.go +++ /dev/null @@ -1,58 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers -// SPDX-License-Identifier: Apache-2.0 - -package parameters - -import ( - "go/ast" - "slices" - - "github.com/go-openapi/codescan/internal/builders/items" - "github.com/go-openapi/codescan/internal/parsers" - oaispec "github.com/go-openapi/spec" -) - -func setupParamTaggers(param *oaispec.Parameter, name string, afld *ast.Field, skipExt, debug bool) ([]parsers.TagParser, error) { - // Parameter-level $ref (e.g. {$ref: "#/parameters/X"}) is not emitted by - // the scanner today — named struct fields become body params with a - // schema-level ref (ps.Schema.Ref), never ps.Ref. To support - // operation-level parameter refs, branch here on - // `param.Ref.String() != ""` and dispatch to a narrower tagger set - // (in, required, extensions only). - return setupInlineParamTaggers(param, name, afld, skipExt, debug) -} - -// baseInlineParamTaggers configures taggers for a fully-defined inline parameter. -func baseInlineParamTaggers(param *oaispec.Parameter, skipExt, debug bool) []parsers.TagParser { - return []parsers.TagParser{ - parsers.NewSingleLineTagParser("in", parsers.NewMatchParamIn(param)), - parsers.NewSingleLineTagParser("maximum", parsers.NewSetMaximum(paramValidations{param})), - parsers.NewSingleLineTagParser("minimum", parsers.NewSetMinimum(paramValidations{param})), - parsers.NewSingleLineTagParser("multipleOf", parsers.NewSetMultipleOf(paramValidations{param})), - parsers.NewSingleLineTagParser("minLength", parsers.NewSetMinLength(paramValidations{param})), - parsers.NewSingleLineTagParser("maxLength", parsers.NewSetMaxLength(paramValidations{param})), - parsers.NewSingleLineTagParser("pattern", parsers.NewSetPattern(paramValidations{param})), - parsers.NewSingleLineTagParser("collectionFormat", parsers.NewSetCollectionFormat(paramValidations{param})), - parsers.NewSingleLineTagParser("minItems", parsers.NewSetMinItems(paramValidations{param})), - parsers.NewSingleLineTagParser("maxItems", parsers.NewSetMaxItems(paramValidations{param})), - parsers.NewSingleLineTagParser("unique", parsers.NewSetUnique(paramValidations{param})), - parsers.NewSingleLineTagParser("enum", parsers.NewSetEnum(paramValidations{param})), - parsers.NewSingleLineTagParser("default", parsers.NewSetDefault(¶m.SimpleSchema, paramValidations{param})), - parsers.NewSingleLineTagParser("example", parsers.NewSetExample(¶m.SimpleSchema, paramValidations{param})), - parsers.NewSingleLineTagParser("required", parsers.NewSetRequiredParam(param)), - parsers.NewMultiLineTagParser("Extensions", parsers.NewSetExtensions(spExtensionsSetter(param, skipExt), debug), true), - } -} - -func setupInlineParamTaggers(param *oaispec.Parameter, name string, afld *ast.Field, skipExt, debug bool) ([]parsers.TagParser, error) { - // TODO(claude): don't understand why we need this step. Isn't it handled by the recursion already? - if ftped, ok := afld.Type.(*ast.ArrayType); ok { - taggers, err := items.ParseArrayTypes([]parsers.TagParser{}, name, ftped.Elt, param.Items, 0) - if err != nil { - return nil, err - } - return slices.Concat(taggers, baseInlineParamTaggers(param, skipExt, debug)), nil - } - - return baseInlineParamTaggers(param, skipExt, debug), nil -} diff --git a/internal/builders/responses/responses.go b/internal/builders/responses/responses.go index 93e938b..d9459a0 100644 --- a/internal/builders/responses/responses.go +++ b/internal/builders/responses/responses.go @@ -40,18 +40,7 @@ func (r *ResponseBuilder) Build(responses map[string]oaispec.Response) error { logger.DebugLogf(r.ctx.Debug(), "building response: %s", name) // analyze doc comment for the model - if r.ctx.UseGrammarParser() { - r.applyBlockToDecl(&response) - } else { - sp := parsers.NewSectionedParser( - parsers.WithSetDescription(func(lines []string) { - response.Description = parsers.JoinDropLast(lines) - }), - ) - if err := sp.Parse(r.decl.Comments); err != nil { - return err - } - } + r.applyBlockToDecl(&response) // analyze struct body for fields etc // each exported struct field: @@ -380,23 +369,7 @@ func (r *ResponseBuilder) processResponseField(fld *types.Var, decl *scanner.Ent ps.Typed("string", strfmtName) } - if r.ctx.UseGrammarParser() { - r.applyBlockToHeader(afld, &ps) - } else { - taggers, err := setupResponseHeaderTaggers(&ps, name, afld) - if err != nil { - return err - } - - sp := parsers.NewSectionedParser( - parsers.WithSetDescription(func(lines []string) { ps.Description = parsers.JoinDropLast(lines) }), - parsers.WithTaggers(taggers...), - ) - - if err := sp.Parse(afld.Doc); err != nil { - return err - } - } + r.applyBlockToHeader(afld, &ps) if in != "body" { seen[name] = true diff --git a/internal/builders/responses/taggers.go b/internal/builders/responses/taggers.go deleted file mode 100644 index 79e9b77..0000000 --- a/internal/builders/responses/taggers.go +++ /dev/null @@ -1,48 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers -// SPDX-License-Identifier: Apache-2.0 - -package responses - -import ( - "go/ast" - "slices" - - "github.com/go-openapi/codescan/internal/builders/items" - "github.com/go-openapi/codescan/internal/parsers" - oaispec "github.com/go-openapi/spec" -) - -// baseResponseHeaderTaggers configures taggers for a response header field. -func baseResponseHeaderTaggers(header *oaispec.Header) []parsers.TagParser { - return []parsers.TagParser{ - // Match-only: claim `in: header` so it does not leak into the header's description. - parsers.NewSingleLineTagParser("in", parsers.NewMatchIn()), - parsers.NewSingleLineTagParser("maximum", parsers.NewSetMaximum(headerValidations{header})), - parsers.NewSingleLineTagParser("minimum", parsers.NewSetMinimum(headerValidations{header})), - parsers.NewSingleLineTagParser("multipleOf", parsers.NewSetMultipleOf(headerValidations{header})), - parsers.NewSingleLineTagParser("minLength", parsers.NewSetMinLength(headerValidations{header})), - parsers.NewSingleLineTagParser("maxLength", parsers.NewSetMaxLength(headerValidations{header})), - parsers.NewSingleLineTagParser("pattern", parsers.NewSetPattern(headerValidations{header})), - parsers.NewSingleLineTagParser("collectionFormat", parsers.NewSetCollectionFormat(headerValidations{header})), - parsers.NewSingleLineTagParser("minItems", parsers.NewSetMinItems(headerValidations{header})), - parsers.NewSingleLineTagParser("maxItems", parsers.NewSetMaxItems(headerValidations{header})), - parsers.NewSingleLineTagParser("unique", parsers.NewSetUnique(headerValidations{header})), - parsers.NewSingleLineTagParser("enum", parsers.NewSetEnum(headerValidations{header})), - parsers.NewSingleLineTagParser("default", parsers.NewSetDefault(&header.SimpleSchema, headerValidations{header})), - parsers.NewSingleLineTagParser("example", parsers.NewSetExample(&header.SimpleSchema, headerValidations{header})), - } -} - -func setupResponseHeaderTaggers(header *oaispec.Header, name string, afld *ast.Field) ([]parsers.TagParser, error) { - // TODO(claude): don't understand why we need this step. Isn't it handled by the recursion already? - if ftped, ok := afld.Type.(*ast.ArrayType); ok { - taggers, err := items.ParseArrayTypes([]parsers.TagParser{}, name, ftped.Elt, header.Items, 0) - if err != nil { - return nil, err - } - - return slices.Concat(taggers, baseResponseHeaderTaggers(header)), nil - } - - return baseResponseHeaderTaggers(header), nil -} diff --git a/internal/builders/routes/routes.go b/internal/builders/routes/routes.go index b3849e7..a15931d 100644 --- a/internal/builders/routes/routes.go +++ b/internal/builders/routes/routes.go @@ -45,20 +45,8 @@ func (r *Builder) Build(tgt *oaispec.Paths) error { ) op.Tags = r.route.Tags - if r.ctx.UseGrammarParser() { - if err := r.applyBlockToRoute(op); err != nil { - return fmt.Errorf("operation (%s): %w", op.ID, err) - } - } else { - sp := parsers.NewSectionedParser( - parsers.WithSetTitle(func(lines []string) { op.Summary = parsers.JoinDropLast(lines) }), - parsers.WithSetDescription(func(lines []string) { op.Description = parsers.JoinDropLast(lines) }), - parsers.WithTaggers(r.routeTaggers(op)...), - ) - - if err := sp.Parse(r.route.Remaining); err != nil { - return fmt.Errorf("operation (%s): %w", op.ID, err) - } + if err := r.applyBlockToRoute(op); err != nil { + return fmt.Errorf("operation (%s): %w", op.ID, err) } if tgt.Paths == nil { diff --git a/internal/builders/routes/setters.go b/internal/builders/routes/setters.go index ab6b191..46be465 100644 --- a/internal/builders/routes/setters.go +++ b/internal/builders/routes/setters.go @@ -13,10 +13,6 @@ func opProducesSetter(op *spec.Operation) func([]string) { return func(produces []string) { op.Produces = produces } } -func opSchemeSetter(op *spec.Operation) func([]string) { - return func(schemes []string) { op.Schemes = schemes } -} - func opSecurityDefsSetter(op *spec.Operation) func([]map[string][]string) { return func(securityDefs []map[string][]string) { op.Security = securityDefs } } diff --git a/internal/builders/routes/taggers.go b/internal/builders/routes/taggers.go deleted file mode 100644 index 8dd57c4..0000000 --- a/internal/builders/routes/taggers.go +++ /dev/null @@ -1,24 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers -// SPDX-License-Identifier: Apache-2.0 - -package routes - -import ( - "github.com/go-openapi/codescan/internal/parsers" - oaispec "github.com/go-openapi/spec" -) - -func (r *Builder) routeTaggers(op *oaispec.Operation) []parsers.TagParser { - return []parsers.TagParser{ - // Q4: YAML-list bodies; skip external rxUncommentHeaders strip so - // `-` markers reach the YAML sub-parser intact. - parsers.NewMultiLineTagParser("Consumes", parsers.NewConsumesDropEmptyParser(opConsumesSetter(op)), true), - parsers.NewMultiLineTagParser("Produces", parsers.NewProducesDropEmptyParser(opProducesSetter(op)), true), - parsers.NewSingleLineTagParser("Schemes", parsers.NewSetSchemes(opSchemeSetter(op))), - parsers.NewMultiLineTagParser("Security", parsers.NewSetSecurityScheme(opSecurityDefsSetter(op)), false), - parsers.NewMultiLineTagParser("Parameters", parsers.NewSetParams(r.parameters, opParamSetter(op)), false), - parsers.NewMultiLineTagParser("Responses", parsers.NewSetResponses(r.definitions, r.responses, opResponsesSetter(op)), false), - parsers.NewSingleLineTagParser("Deprecated", parsers.NewSetDeprecatedOp(op)), - parsers.NewMultiLineTagParser("Extensions", parsers.NewSetExtensions(opExtensionsSetter(op), r.ctx.Debug()), true), - } -} diff --git a/internal/builders/schema/schema.go b/internal/builders/schema/schema.go index 247ce64..61b796d 100644 --- a/internal/builders/schema/schema.go +++ b/internal/builders/schema/schema.go @@ -4,7 +4,6 @@ package schema import ( - "encoding/json" "fmt" "go/ast" "go/types" @@ -117,39 +116,11 @@ func (s *Builder) interfaceJSONName(goName string) string { return s.interfaceMethodMangler.ToJSONName(goName) } -// parseDeclDoc runs the top-level declaration's comment through the -// configured parser path (grammar bridge when UseGrammarParser is set, -// legacy SectionedParser otherwise) and reports whether the type -// carries a swagger:ignore that should short-circuit the rest of -// buildFromDecl. -func (s *Builder) parseDeclDoc(schema *oaispec.Schema) (ignored bool, err error) { - if s.ctx.UseGrammarParser() { - return s.applyBlockToDecl(schema), nil - } - sp := s.createParser("", schema, schema, nil, - parsers.WithSetTitle(func(lines []string) { schema.Title = parsers.JoinDropLast(lines) }), - parsers.WithSetDescription(func(lines []string) { - schema.Description = parsers.JoinDropLast(lines) - enumDesc := parsers.GetEnumDesc(schema.Extensions) - if enumDesc != "" { - schema.Description += "\n" + enumDesc - } - }), - ) - if err := sp.Parse(s.decl.Comments); err != nil { - return false, err - } - return sp.Ignored(), nil -} - func (s *Builder) buildFromDecl(_ *scanner.EntityDecl, schema *oaispec.Schema) error { - // analyze doc comment for the model - // This includes parsing "example", "default" and other validation at the top-level declaration. - ignored, err := s.parseDeclDoc(schema) - if err != nil { - return err - } - if ignored { + // analyze doc comment for the model. + // applyBlockToDecl returns true when a swagger:ignore annotation + // short-circuits further building. + if s.applyBlockToDecl(schema) { return nil } @@ -740,14 +711,7 @@ func (s *Builder) processAnonInterfaceMethod(fld *types.Func, it *types.Interfac ps.Items = nil } - if s.ctx.UseGrammarParser() { - s.applyBlockToField(afld, schema, &ps, name) - } else { - sp := s.createParser(name, schema, &ps, afld) - if err := sp.Parse(afld.Doc); err != nil { - return err - } - } + s.applyBlockToField(afld, schema, &ps, name) if ps.Ref.String() == "" && name != fld.Name() { ps.AddExtension("x-go-name", fld.Name()) @@ -972,14 +936,7 @@ func (s *Builder) processInterfaceMethod(fld *types.Func, it *types.Interface, d ps.Items = nil } - if s.ctx.UseGrammarParser() { - s.applyBlockToField(afld, tgt, &ps, name) - } else { - sp := s.createParser(name, tgt, &ps, afld) - if err := sp.Parse(afld.Doc); err != nil { - return err - } - } + s.applyBlockToField(afld, tgt, &ps, name) if ps.Ref.String() == "" && name != fld.Name() { ps.AddExtension("x-go-name", fld.Name()) @@ -1232,14 +1189,7 @@ func (s *Builder) processStructField(fld *types.Var, decl *scanner.EntityDecl, t ps.Items = nil } - if s.ctx.UseGrammarParser() { - s.applyBlockToField(afld, tgt, &ps, name) - } else { - sp := s.createParser(name, tgt, &ps, afld) - if err := sp.Parse(afld.Doc); err != nil { - return err - } - } + s.applyBlockToField(afld, tgt, &ps, name) if ps.Ref.String() == "" && name != fld.Name() { resolvers.AddExtension(&ps.VendorExtensible, "x-go-name", fld.Name(), s.ctx.SkipExtensions()) @@ -1408,69 +1358,6 @@ func (s *Builder) makeRef(decl *scanner.EntityDecl, prop ifaces.SwaggerTypable) return nil } -func (s *Builder) createParser(nm string, schema, ps *oaispec.Schema, fld *ast.Field, opts ...parsers.SectionedParserOption) *parsers.SectionedParser { - if ps.Ref.String() != "" && !s.ctx.DescWithRef() { - // if DescWithRef option is enabled, allow the tagged documentation to flow alongside the $ref - // otherwise behave as expected by jsonschema draft4: $ref predates all sibling keys. - opts = append( - opts, - parsers.WithTaggers(refSchemaTaggers(schema, nm)...), - ) - - return parsers.NewSectionedParser(opts...) - } - - taggers := schemaTaggers(schema, ps, nm) - - // the parser may be called outside the context of struct field. - // In that case, just return the outcome of the parsing now. - if fld != nil { - // check if this is a primitive, if so parse the validations from the - // doc comments of the slice declaration. - if ftped, ok := fld.Type.(*ast.ArrayType); ok { - var err error - arrayTaggers, err := parseArrayTypes(taggers, ftped.Elt, ps.Items, 0) // NOTE: swallows error silently - if err == nil { - taggers = arrayTaggers - } - } - } - - opts = append( - opts, - parsers.WithSetDescription(func(lines []string) { - ps.Description = parsers.JoinDropLast(lines) - enumDesc := parsers.GetEnumDesc(ps.Extensions) - if enumDesc != "" { - ps.Description += "\n" + enumDesc - } - }), - parsers.WithTaggers(taggers...), - ) - - return parsers.NewSectionedParser(opts...) -} - -func schemaVendorExtensibleSetter(meta *oaispec.Schema) func(json.RawMessage) error { - return func(jsonValue json.RawMessage) error { - var jsonData oaispec.Extensions - err := json.Unmarshal(jsonValue, &jsonData) - if err != nil { - return err - } - - for k := range jsonData { - if !parsers.IsAllowedExtension(k) { - return fmt.Errorf("invalid schema extension name, should start from `x-`: %s: %w", k, ErrSchema) - } - } - - meta.Extensions = jsonData - - return nil - } -} - func extractAllOfClass(doc *ast.CommentGroup, schema *oaispec.Schema) { allOfClass, ok := parsers.AllOfName(doc) if !ok { diff --git a/internal/builders/schema/taggers.go b/internal/builders/schema/taggers.go deleted file mode 100644 index 60178d8..0000000 --- a/internal/builders/schema/taggers.go +++ /dev/null @@ -1,125 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers -// SPDX-License-Identifier: Apache-2.0 - -package schema - -import ( - "fmt" - "go/ast" - "slices" - - "github.com/go-openapi/codescan/internal/parsers" - oaispec "github.com/go-openapi/spec" -) - -func schemaTaggers(schema, ps *oaispec.Schema, nm string) []parsers.TagParser { - schemeType, err := ps.Type.MarshalJSON() - if err != nil { - return nil - } - scheme := &oaispec.SimpleSchema{Type: string(schemeType)} - - return []parsers.TagParser{ - // Match-only: claim `in: ` lines so they do not leak into the - // schema description. `in:` only matters for parameter/response dispatch; - // if it reaches a schema field (e.g. via the alias-expand path), it is - // still metadata, not prose. - parsers.NewSingleLineTagParser("in", parsers.NewMatchIn()), - parsers.NewSingleLineTagParser("maximum", parsers.NewSetMaximum(schemaValidations{ps})), - parsers.NewSingleLineTagParser("minimum", parsers.NewSetMinimum(schemaValidations{ps})), - parsers.NewSingleLineTagParser("multipleOf", parsers.NewSetMultipleOf(schemaValidations{ps})), - parsers.NewSingleLineTagParser("minLength", parsers.NewSetMinLength(schemaValidations{ps})), - parsers.NewSingleLineTagParser("maxLength", parsers.NewSetMaxLength(schemaValidations{ps})), - parsers.NewSingleLineTagParser("pattern", parsers.NewSetPattern(schemaValidations{ps})), - parsers.NewSingleLineTagParser("minItems", parsers.NewSetMinItems(schemaValidations{ps})), - parsers.NewSingleLineTagParser("maxItems", parsers.NewSetMaxItems(schemaValidations{ps})), - parsers.NewSingleLineTagParser("unique", parsers.NewSetUnique(schemaValidations{ps})), - parsers.NewSingleLineTagParser("enum", parsers.NewSetEnum(schemaValidations{ps})), - parsers.NewSingleLineTagParser("default", parsers.NewSetDefault(scheme, schemaValidations{ps})), - parsers.NewSingleLineTagParser("type", parsers.NewSetDefault(scheme, schemaValidations{ps})), - parsers.NewSingleLineTagParser("example", parsers.NewSetExample(scheme, schemaValidations{ps})), - parsers.NewSingleLineTagParser("required", parsers.NewSetRequiredSchema(schema, nm)), - parsers.NewSingleLineTagParser("readOnly", parsers.NewSetReadOnlySchema(ps)), - parsers.NewSingleLineTagParser("discriminator", parsers.NewSetDiscriminator(schema, nm)), - parsers.NewMultiLineTagParser("YAMLExtensionsBlock", parsers.NewYAMLParser( - parsers.WithExtensionMatcher(), - parsers.WithSetter(schemaVendorExtensibleSetter(ps)), - ), true), - } -} - -func refSchemaTaggers(schema *oaispec.Schema, name string) []parsers.TagParser { - return []parsers.TagParser{ - parsers.NewSingleLineTagParser("required", parsers.NewSetRequiredSchema(schema, name)), - } -} - -func itemsTaggers(items *oaispec.Schema, level int) []parsers.TagParser { - schemeType, err := items.Type.MarshalJSON() - if err != nil { - return nil - } - - scheme := &oaispec.SimpleSchema{Type: string(schemeType)} - opts := []parsers.PrefixRxOption{parsers.WithItemsPrefixLevel(level)} - - return []parsers.TagParser{ - parsers.NewSingleLineTagParser(fmt.Sprintf("items%dMaximum", level), parsers.NewSetMaximum(schemaValidations{items}, opts...)), - parsers.NewSingleLineTagParser(fmt.Sprintf("items%dMinimum", level), parsers.NewSetMinimum(schemaValidations{items}, opts...)), - parsers.NewSingleLineTagParser(fmt.Sprintf("items%dMultipleOf", level), parsers.NewSetMultipleOf(schemaValidations{items}, opts...)), - parsers.NewSingleLineTagParser(fmt.Sprintf("items%dMinLength", level), parsers.NewSetMinLength(schemaValidations{items}, opts...)), - parsers.NewSingleLineTagParser(fmt.Sprintf("items%dMaxLength", level), parsers.NewSetMaxLength(schemaValidations{items}, opts...)), - parsers.NewSingleLineTagParser(fmt.Sprintf("items%dPattern", level), parsers.NewSetPattern(schemaValidations{items}, opts...)), - parsers.NewSingleLineTagParser(fmt.Sprintf("items%dMinItems", level), parsers.NewSetMinItems(schemaValidations{items}, opts...)), - parsers.NewSingleLineTagParser(fmt.Sprintf("items%dMaxItems", level), parsers.NewSetMaxItems(schemaValidations{items}, opts...)), - parsers.NewSingleLineTagParser(fmt.Sprintf("items%dUnique", level), parsers.NewSetUnique(schemaValidations{items}, opts...)), - parsers.NewSingleLineTagParser(fmt.Sprintf("items%dEnum", level), parsers.NewSetEnum(schemaValidations{items}, opts...)), - parsers.NewSingleLineTagParser(fmt.Sprintf("items%dDefault", level), parsers.NewSetDefault(scheme, schemaValidations{items}, opts...)), - parsers.NewSingleLineTagParser(fmt.Sprintf("items%dExample", level), parsers.NewSetExample(scheme, schemaValidations{items}, opts...)), - } -} - -func parseArrayTypes(taggers []parsers.TagParser, expr ast.Expr, items *oaispec.SchemaOrArray, level int) ([]parsers.TagParser, error) { - if items == nil || items.Schema == nil { - return taggers, nil - } - - switch iftpe := expr.(type) { - case *ast.ArrayType: - eleTaggers := itemsTaggers(items.Schema, level) - otherTaggers, err := parseArrayTypes(slices.Concat(eleTaggers, taggers), iftpe.Elt, items.Schema.Items, level+1) - if err != nil { - return nil, err - } - - return otherTaggers, nil - - case *ast.Ident: - var identTaggers []parsers.TagParser - if iftpe.Obj == nil { - identTaggers = itemsTaggers(items.Schema, level) - } - - otherTaggers, err := parseArrayTypes(taggers, expr, items.Schema.Items, level+1) - if err != nil { - return nil, err - } - - return slices.Concat(identTaggers, otherTaggers), nil - - case *ast.StarExpr: - return parseArrayTypes(taggers, iftpe.X, items, level) - - case *ast.SelectorExpr: - // qualified name (e.g. time.Time): terminal leaf, register items-level validations. - return slices.Concat(itemsTaggers(items.Schema, level), taggers), nil - - case *ast.StructType, *ast.InterfaceType, *ast.MapType: - // anonymous struct / interface / map element: no further items-level - // validations apply; the element type itself carries its schema. - return taggers, nil - - default: - return nil, fmt.Errorf("unknown field type element: %w", ErrSchema) - } -} diff --git a/internal/integration/parity_test.go b/internal/integration/parity_test.go deleted file mode 100644 index 036ca07..0000000 --- a/internal/integration/parity_test.go +++ /dev/null @@ -1,128 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers -// SPDX-License-Identifier: Apache-2.0 - -package integration_test - -import ( - "encoding/json" - "testing" - - "github.com/go-openapi/codescan" - "github.com/go-openapi/codescan/internal/scantest" - "github.com/go-openapi/testify/v2/require" - - oaispec "github.com/go-openapi/spec" -) - -// TestParity is the migration safety net for P5 builder migrations: -// every fixture in parityFixtures is scanned twice — once with the -// legacy regex-based pipeline, once with the v2 grammar-parser + -// bridge-taggers pipeline — and the resulting `*spec.Swagger` -// values are JSON-compared. -// -// Design rationale: spec-level compare measures the user-observable -// contract. See `.claude/plans/p5-builder-migrations.md` §5 for the -// full design discussion (why this over a view-level v1 adapter). -// -// Lifetime: **tactical**, not permanent. At P6 cutover the -// `Options.UseGrammarParser` flag is removed and this file is -// deleted in the same commit (P6.4). With only one path left, the -// test has no dual runs to diff — it becomes pure CI burden. -// -// Adding fixtures: append a new entry to parityFixtures below. -// Ordering doesn't matter — each case is independent and t.Run -// parallelises them. -func TestParity(t *testing.T) { - for _, tc := range parityFixtures { - t.Run(tc.Name, func(t *testing.T) { - t.Parallel() - docV1 := runFixture(t, tc.Opts, false) - docV2 := runFixture(t, tc.Opts, true) - assertSpecsEqual(t, docV1, docV2) - }) - } -} - -// parityFixture names one fixture scan that must produce the same -// spec under both pipeline values. Opts is cloned in runFixture -// before UseGrammarParser is set, so the template stays immutable. -type parityFixture struct { - Name string - Opts codescan.Options -} - -//nolint:gochecknoglobals // migration-scoped test table; removed at P6 cutover alongside the flag. -var parityFixtures = []parityFixture{ - // fixtures/enhancements/ — each entry mirrors a TestCoverage_* test in - // coverage_enhancements_test.go. Entries that intentionally exercise - // error paths (e.g. UnknownAnnotation, malformed/*) are NOT included — - // they don't produce comparable spec output. - {"EmbeddedTypes", codescan.Options{Packages: pkgs("./enhancements/embedded-types/..."), ScanModels: true}}, - {"AllOfEdges", codescan.Options{Packages: pkgs("./enhancements/allof-edges/..."), ScanModels: true}}, - {"StrfmtArrays", codescan.Options{Packages: pkgs("./enhancements/strfmt-arrays/..."), ScanModels: true}}, - {"DefaultsExamples", codescan.Options{Packages: pkgs("./enhancements/defaults-examples/..."), ScanModels: true}}, - {"InterfaceMethods", codescan.Options{Packages: pkgs("./enhancements/interface-methods/..."), ScanModels: true}}, - {"InterfaceMethodsXNullable", codescan.Options{Packages: pkgs("./enhancements/interface-methods/..."), ScanModels: true, SetXNullableForPointers: true}}, - {"AliasExpand", codescan.Options{Packages: pkgs("./enhancements/alias-expand/..."), ScanModels: true}}, - {"AliasRef", codescan.Options{Packages: pkgs("./enhancements/alias-expand/..."), ScanModels: true, RefAliases: true}}, - {"AliasResponseRef", codescan.Options{Packages: pkgs("./enhancements/alias-response/..."), ScanModels: true, RefAliases: true}}, - {"ResponseEdges", codescan.Options{Packages: pkgs("./enhancements/response-edges/..."), ScanModels: true}}, - {"NamedBasic", codescan.Options{Packages: pkgs("./enhancements/named-basic/..."), ScanModels: true}}, - {"SwaggerTypeArray", codescan.Options{Packages: pkgs("./enhancements/swagger-type-array/..."), ScanModels: true}}, - {"RefAliasChain", codescan.Options{Packages: pkgs("./enhancements/ref-alias-chain/..."), ScanModels: true, RefAliases: true}}, - {"EnumDocs", codescan.Options{Packages: pkgs("./enhancements/enum-docs/..."), ScanModels: true}}, - {"EnumOverrides", codescan.Options{Packages: pkgs("./enhancements/enum-overrides/..."), ScanModels: true}}, - {"TextMarshal", codescan.Options{Packages: pkgs("./enhancements/text-marshal/..."), ScanModels: true}}, - {"AllHTTPMethods", codescan.Options{Packages: pkgs("./enhancements/all-http-methods/...")}}, - {"NamedStructTags", codescan.Options{Packages: pkgs("./enhancements/named-struct-tags/..."), ScanModels: true}}, - {"NamedStructTagsRef", codescan.Options{Packages: pkgs("./enhancements/named-struct-tags-ref/..."), ScanModels: true}}, - {"TopLevelKinds", codescan.Options{Packages: pkgs("./enhancements/top-level-kinds/..."), ScanModels: true}}, - // fixtures/goparsing/ - {"Petstore", codescan.Options{Packages: pkgs("./goparsing/petstore/...")}}, - {"Bookings", codescan.Options{Packages: pkgs("./goparsing/bookings/..."), ScanModels: true}}, - // Exercises swagger:operation (YAML-bodied operation spec). - {"ClassificationOpAnnotation", codescan.Options{Packages: pkgs("./goparsing/classification/operations_annotation/...")}}, - // Exercises swagger:route with rich bodies: Consumes / Produces / Schemes / - // Security / Parameters / Responses / Extensions. - {"ClassificationRoutes", codescan.Options{Packages: pkgs("./goparsing/classification/...")}}, -} - -// pkgs is a tiny alias for []string — it makes the fixture table -// readable at a glance (the Packages field dominates the line -// otherwise). -func pkgs(p ...string) []string { return p } - -// runFixture scans tc.Opts with UseGrammarParser=useGrammar and -// returns the resulting spec. The template Options is cloned so -// the table stays unmodified; WorkDir is injected once here rather -// than duplicated in every table row. -func runFixture(t *testing.T, template codescan.Options, useGrammar bool) *oaispec.Swagger { - t.Helper() - opts := template // value copy - opts.WorkDir = scantest.FixturesDir() - opts.UseGrammarParser = useGrammar - doc, err := codescan.Run(&opts) - require.NoError(t, err) - require.NotNil(t, doc) - return doc -} - -// assertSpecsEqual marshals both specs to indented JSON and -// diffs as strings. This is stricter than reflect.DeepEqual (it -// catches field-order differences in slices) and produces a -// human-readable diff path on failure. -func assertSpecsEqual(t *testing.T, v1, v2 *oaispec.Swagger) { - t.Helper() - v1JSON, err := json.MarshalIndent(v1, "", " ") - require.NoError(t, err) - v2JSON, err := json.MarshalIndent(v2, "", " ") - require.NoError(t, err) - if string(v1JSON) != string(v2JSON) { - t.Fatalf( - "parity mismatch — v1 (legacy) vs v2 (grammar) differ:\n"+ - "--- v1 (%d bytes) ---\n%s\n"+ - "--- v2 (%d bytes) ---\n%s\n", - len(v1JSON), v1JSON, len(v2JSON), v2JSON, - ) - } -} diff --git a/internal/parsers/enum.go b/internal/parsers/enum.go index 8372a45..1351853 100644 --- a/internal/parsers/enum.go +++ b/internal/parsers/enum.go @@ -7,48 +7,12 @@ import ( "encoding/json" "go/ast" "log" - "regexp" "strconv" "strings" - "github.com/go-openapi/codescan/internal/ifaces" "github.com/go-openapi/spec" ) -type SetEnum struct { - builder ifaces.ValidationBuilder - rx *regexp.Regexp -} - -func NewSetEnum(builder ifaces.ValidationBuilder, opts ...PrefixRxOption) *SetEnum { - rx := rxEnumValidation - for _, apply := range opts { - rx = apply(rxEnumFmt) - } - - return &SetEnum{ - builder: builder, - rx: rx, - } -} - -func (se *SetEnum) Matches(line string) bool { - return se.rx.MatchString(line) -} - -func (se *SetEnum) Parse(lines []string) error { - if len(lines) == 0 || (len(lines) == 1 && len(lines[0]) == 0) { - return nil - } - - matches := se.rx.FindStringSubmatch(lines[0]) - if len(matches) > 1 && len(matches[1]) > 0 { - se.builder.SetEnum(matches[1]) - } - - return nil -} - // ParseValueFromSchema converts a raw annotation value to the Go // representation implied by the target schema's Type/Format. Used by // default:/example: setters where the annotation body is a primitive diff --git a/internal/parsers/parsers.go b/internal/parsers/parsers.go index b4bd1f0..d159e9c 100644 --- a/internal/parsers/parsers.go +++ b/internal/parsers/parsers.go @@ -7,11 +7,9 @@ import ( "fmt" "log" "regexp" - "strconv" "strings" "github.com/go-openapi/codescan/internal/parsers/yaml" - oaispec "github.com/go-openapi/spec" ) const ( @@ -22,81 +20,6 @@ const ( // Many thanks go to https://github.com/yvasiyarov/swagger // this is loosely based on that implementation but for swagger 2.0 -type matchOnlyParam struct { - rx *regexp.Regexp -} - -func (mo *matchOnlyParam) Matches(line string) bool { - return mo.rx.MatchString(line) -} - -func (mo *matchOnlyParam) Parse(_ []string) error { - return nil -} - -type MatchParamIn struct { - *matchOnlyParam -} - -func NewMatchParamIn(_ *oaispec.Parameter) *MatchParamIn { - return NewMatchIn() -} - -// NewMatchIn returns a match-only tagger that claims `in: ` -// lines. The `in:` directive is extracted separately via -// parsers.ParamLocation; this tagger only prevents the line from -// being absorbed into the surrounding description by a SectionedParser. -func NewMatchIn() *MatchParamIn { - return &MatchParamIn{ - matchOnlyParam: &matchOnlyParam{ - rx: rxIn, - }, - } -} - -type MatchParamRequired struct { - *matchOnlyParam -} - -func NewMatchParamRequired(_ *oaispec.Parameter) *MatchParamRequired { - return &MatchParamRequired{ - matchOnlyParam: &matchOnlyParam{ - rx: rxRequired, - }, - } -} - -type SetDeprecatedOp struct { - tgt *oaispec.Operation -} - -func NewSetDeprecatedOp(operation *oaispec.Operation) *SetDeprecatedOp { - return &SetDeprecatedOp{ - tgt: operation, - } -} - -func (su *SetDeprecatedOp) Matches(line string) bool { - return rxDeprecated.MatchString(line) -} - -func (su *SetDeprecatedOp) Parse(lines []string) error { - if len(lines) == 0 || (len(lines) == 1 && len(lines[0]) == 0) { - return nil - } - - matches := rxDeprecated.FindStringSubmatch(lines[0]) - if len(matches) > 1 && len(matches[1]) > 0 { - req, err := strconv.ParseBool(matches[1]) - if err != nil { - return err - } - su.tgt.Deprecated = req - } - - return nil -} - type ConsumesDropEmptyParser struct { *multilineYAMLListParser } diff --git a/internal/parsers/parsers_test.go b/internal/parsers/parsers_test.go deleted file mode 100644 index 7c3baf8..0000000 --- a/internal/parsers/parsers_test.go +++ /dev/null @@ -1,113 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers -// SPDX-License-Identifier: Apache-2.0 - -package parsers - -import ( - "testing" - - "github.com/go-openapi/testify/v2/assert" - "github.com/go-openapi/testify/v2/require" - - oaispec "github.com/go-openapi/spec" -) - -func TestMatchParamIn(t *testing.T) { - t.Parallel() - - mp := NewMatchParamIn(nil) - assert.TrueT(t, mp.Matches("In: query")) - assert.TrueT(t, mp.Matches("in: body")) - assert.TrueT(t, mp.Matches("in: path")) - assert.TrueT(t, mp.Matches("in: header")) - assert.TrueT(t, mp.Matches("in: formData")) - assert.FalseT(t, mp.Matches("in: cookie")) // not a valid swagger 2.0 location - assert.FalseT(t, mp.Matches("something else")) - - // Parse is a no-op - require.NoError(t, mp.Parse(nil)) -} - -func TestMatchParamRequired(t *testing.T) { - t.Parallel() - - mp := NewMatchParamRequired(nil) - assert.TrueT(t, mp.Matches("required: true")) - assert.TrueT(t, mp.Matches("Required: false")) - assert.FalseT(t, mp.Matches("something else")) - - // Parse is a no-op - require.NoError(t, mp.Parse(nil)) -} - -func TestSetDeprecatedOp(t *testing.T) { - t.Parallel() - - t.Run("true", func(t *testing.T) { - op := new(oaispec.Operation) - sd := NewSetDeprecatedOp(op) - assert.TrueT(t, sd.Matches("deprecated: true")) - require.NoError(t, sd.Parse([]string{"deprecated: true"})) - assert.TrueT(t, op.Deprecated) - }) - - t.Run("false", func(t *testing.T) { - op := new(oaispec.Operation) - sd := NewSetDeprecatedOp(op) - require.NoError(t, sd.Parse([]string{"deprecated: false"})) - assert.FalseT(t, op.Deprecated) - }) - - t.Run("empty", func(t *testing.T) { - op := new(oaispec.Operation) - sd := NewSetDeprecatedOp(op) - require.NoError(t, sd.Parse(nil)) - require.NoError(t, sd.Parse([]string{})) - require.NoError(t, sd.Parse([]string{""})) - assert.FalseT(t, op.Deprecated) - }) - - t.Run("no match", func(t *testing.T) { - sd := NewSetDeprecatedOp(new(oaispec.Operation)) - assert.FalseT(t, sd.Matches("something else")) - }) -} - -func TestConsumesDropEmptyParser(t *testing.T) { - t.Parallel() - - var got []string - cp := NewConsumesDropEmptyParser(func(v []string) { got = v }) - assert.TrueT(t, cp.Matches("consumes:")) - assert.TrueT(t, cp.Matches("Consumes:")) - assert.FalseT(t, cp.Matches("other")) - - // Q4: body is YAML-list-strict. Input uses `- value` markers. - require.NoError(t, cp.Parse([]string{"- application/json", "", "- application/xml", " "})) - assert.Equal(t, []string{"application/json", "application/xml"}, got) -} - -func TestProducesDropEmptyParser(t *testing.T) { - t.Parallel() - - var got []string - pp := NewProducesDropEmptyParser(func(v []string) { got = v }) - assert.TrueT(t, pp.Matches("produces:")) - assert.TrueT(t, pp.Matches("Produces:")) - - require.NoError(t, pp.Parse([]string{"- text/plain", "", "- text/html"})) - assert.Equal(t, []string{"text/plain", "text/html"}, got) -} - -func TestMultilineYAMLListParserNonListDropsValues(t *testing.T) { - // Q4 strict-list contract: a scalar body emits a warning and - // produces no values (setter called with nothing? no — setter - // is NOT called on the non-list path, so `got` stays at its - // zero value). - t.Parallel() - - var called bool - cp := NewConsumesDropEmptyParser(func(v []string) { called = true; _ = v }) - require.NoError(t, cp.Parse([]string{"application/json"})) // bare form, not a list - assert.FalseT(t, called) -} diff --git a/internal/parsers/regexprs.go b/internal/parsers/regexprs.go index 1e3fd36..495b152 100644 --- a/internal/parsers/regexprs.go +++ b/internal/parsers/regexprs.go @@ -144,8 +144,6 @@ var ( rxIn = regexp.MustCompile(rxCommentPrefix + `[Ii]n\p{Zs}*:\p{Zs}*(query|path|header|body|formData)(?:\.)?$`) rxRequired = regexp.MustCompile(rxCommentPrefix + `[Rr]equired\p{Zs}*:\p{Zs}*(true|false)(?:\.)?$`) - rxDiscriminator = regexp.MustCompile(rxCommentPrefix + `[Dd]iscriminator\p{Zs}*:\p{Zs}*(true|false)(?:\.)?$`) - rxReadOnly = regexp.MustCompile(rxCommentPrefix + `[Rr]ead(?:\p{Zs}*|[\p{Pd}\p{Pc}])?[Oo]nly\p{Zs}*:\p{Zs}*(true|false)(?:\.)?$`) rxConsumes = regexp.MustCompile(rxCommentPrefix + `[Cc]onsumes\p{Zs}*:`) rxProduces = regexp.MustCompile(rxCommentPrefix + `[Pp]roduces\p{Zs}*:`) rxSecuritySchemes = regexp.MustCompile(rxCommentPrefix + `[Ss]ecurity\p{Zs}*:`) @@ -161,7 +159,6 @@ var ( rxTOS = regexp.MustCompile(rxCommentPrefix + `[Tt](:?erms)?\p{Zs}*-?[Oo]f?\p{Zs}*-?[Ss](?:ervice)?\p{Zs}*:`) rxExtensions = regexp.MustCompile(rxCommentPrefix + `[Ee]xtensions\p{Zs}*:`) rxInfoExtensions = regexp.MustCompile(rxCommentPrefix + `[In]nfo\p{Zs}*[Ee]xtensions:`) - rxDeprecated = regexp.MustCompile(rxCommentPrefix + `[Dd]eprecated\p{Zs}*:\p{Zs}*(true|false)(?:\.)?$`) // currently unused: rxExample = regexp.MustCompile(`[Ex]ample\p{Zs}*:\p{Zs}*(.*)$`). ) diff --git a/internal/parsers/regexprs_test.go b/internal/parsers/regexprs_test.go index cb5a71c..18d774b 100644 --- a/internal/parsers/regexprs_test.go +++ b/internal/parsers/regexprs_test.go @@ -119,7 +119,6 @@ func TestSchemaValueExtractors(t *testing.T) { verifyIntegerMinMaxManyWords(t, Rxf(rxMinItemsFmt, ""), "min", []string{"items"}) verifyBoolean(t, Rxf(rxUniqueFmt, ""), []string{"unique"}, nil) - verifyBoolean(t, rxReadOnly, []string{"read"}, []string{"only"}) verifyBoolean(t, rxRequired, []string{"required"}, nil) } diff --git a/internal/parsers/sectioned_parser_go119_test.go b/internal/parsers/sectioned_parser_go119_test.go deleted file mode 100644 index 729e047..0000000 --- a/internal/parsers/sectioned_parser_go119_test.go +++ /dev/null @@ -1,47 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers -// SPDX-License-Identifier: Apache-2.0 - -package parsers - -import ( - "testing" - - "github.com/go-openapi/testify/v2/assert" - "github.com/go-openapi/testify/v2/require" -) - -func TestSectionedParser_TitleDescriptionGo119(t *testing.T) { - text := `# This has a title that starts with a hash tag - -The punctuation here does indeed matter. But it won't for go. -` - - text2 := `This has a title without whitespace. - -The punctuation here does indeed matter. But it won't for go. - -# There is an inline header here that doesn't count for finding a title - -` - - var err error - - st := &SectionedParser{} - st.setTitle = func(_ []string) {} - err = st.Parse(ascg(text)) - require.NoError(t, err) - - assert.Equal(t, []string{"This has a title that starts with a hash tag"}, st.Title()) - assert.Equal(t, []string{"The punctuation here does indeed matter. But it won't for go."}, st.Description()) - - st = &SectionedParser{} - st.setTitle = func(_ []string) {} - err = st.Parse(ascg(text2)) - require.NoError(t, err) - - assert.Equal(t, []string{"This has a title without whitespace."}, st.Title()) - assert.Equal(t, []string{ - "The punctuation here does indeed matter. But it won't for go.", "", - "# There is an inline header here that doesn't count for finding a title", - }, st.Description()) -} diff --git a/internal/parsers/sectioned_parser_test.go b/internal/parsers/sectioned_parser_test.go deleted file mode 100644 index d28d82c..0000000 --- a/internal/parsers/sectioned_parser_test.go +++ /dev/null @@ -1,382 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers -// SPDX-License-Identifier: Apache-2.0 - -package parsers - -import ( - "errors" - "fmt" - "go/ast" - "regexp" - "strings" - "testing" - - "github.com/go-openapi/testify/v2/assert" - "github.com/go-openapi/testify/v2/require" - - "github.com/go-openapi/spec" -) - -// only used within this group of tests but never used within actual code base. -func newSchemaAnnotationParser(goName string) *schemaAnnotationParser { - return &schemaAnnotationParser{GoName: goName, rx: rxModelOverride} -} - -type schemaAnnotationParser struct { - GoName string - Name string - rx *regexp.Regexp -} - -func (sap *schemaAnnotationParser) Matches(line string) bool { - return sap.rx.MatchString(line) -} - -func (sap *schemaAnnotationParser) Parse(lines []string) error { - if sap.Name != "" { - return nil - } - - if len(lines) > 0 { - for _, line := range lines { - matches := sap.rx.FindStringSubmatch(line) - if len(matches) > 1 && len(matches[1]) > 0 { - sap.Name = matches[1] - return nil - } - } - } - return nil -} - -func TestSectionedParser_TitleDescription(t *testing.T) { - const ( - text = `This has a title, separated by a whitespace line - -In this example the punctuation for the title should not matter for swagger. -For go it will still make a difference though. -` - text2 = `This has a title without whitespace. -The punctuation here does indeed matter. But it won't for go. -` - - text3 = `This has a title, and markdown in the description - -See how markdown works now, we can have lists: - -+ first item -+ second item -+ third item - -[Links works too](http://localhost) -` - - text4 = `This has whitespace sensitive markdown in the description - -|+ first item -| + nested item -| + also nested item - -Sample code block: - -| fmt.Println("Hello World!") - -` - ) - - var err error - - st := &SectionedParser{} - st.setTitle = func(_ []string) {} - err = st.Parse(ascg(text)) - require.NoError(t, err) - - assert.Equal(t, []string{"This has a title, separated by a whitespace line"}, st.Title()) - assert.Equal(t, []string{"In this example the punctuation for the title should not matter for swagger.", "For go it will still make a difference though."}, st.Description()) - - st = &SectionedParser{} - st.setTitle = func(_ []string) {} - err = st.Parse(ascg(text2)) - require.NoError(t, err) - - assert.Equal(t, []string{"This has a title without whitespace."}, st.Title()) - assert.Equal(t, []string{"The punctuation here does indeed matter. But it won't for go."}, st.Description()) - - st = &SectionedParser{} - st.setTitle = func(_ []string) {} - err = st.Parse(ascg(text3)) - require.NoError(t, err) - - assert.Equal(t, []string{"This has a title, and markdown in the description"}, st.Title()) - assert.Equal(t, []string{ - "See how markdown works now, we can have lists:", "", - "+ first item", "+ second item", "+ third item", "", - "[Links works too](http://localhost)", - }, st.Description()) - - st = &SectionedParser{} - st.setTitle = func(_ []string) {} - err = st.Parse(ascg(text4)) - require.NoError(t, err) - - assert.Equal(t, []string{"This has whitespace sensitive markdown in the description"}, st.Title()) - assert.Equal(t, []string{"+ first item", " + nested item", " + also nested item", "", "Sample code block:", "", " fmt.Println(\"Hello World!\")"}, st.Description()) -} - -type schemaValidations struct { - current *spec.Schema -} - -func (sv schemaValidations) SetMaximum(val float64, exclusive bool) { - sv.current.Maximum = &val - sv.current.ExclusiveMaximum = exclusive -} - -func (sv schemaValidations) SetMinimum(val float64, exclusive bool) { - sv.current.Minimum = &val - sv.current.ExclusiveMinimum = exclusive -} -func (sv schemaValidations) SetMultipleOf(val float64) { sv.current.MultipleOf = &val } -func (sv schemaValidations) SetMinItems(val int64) { sv.current.MinItems = &val } -func (sv schemaValidations) SetMaxItems(val int64) { sv.current.MaxItems = &val } -func (sv schemaValidations) SetMinLength(val int64) { sv.current.MinLength = &val } -func (sv schemaValidations) SetMaxLength(val int64) { sv.current.MaxLength = &val } -func (sv schemaValidations) SetPattern(val string) { sv.current.Pattern = val } -func (sv schemaValidations) SetUnique(val bool) { sv.current.UniqueItems = val } -func (sv schemaValidations) SetDefault(val any) { sv.current.Default = val } -func (sv schemaValidations) SetExample(val any) { sv.current.Example = val } -func (sv schemaValidations) SetEnum(val string) { - var typ string - if len(sv.current.Type) > 0 { - typ = sv.current.Type[0] - } - sv.current.Enum = ParseEnum(val, &spec.SimpleSchema{Format: sv.current.Format, Type: typ}) -} - -func dummybuilder() schemaValidations { - return schemaValidations{new(spec.Schema)} -} - -func TestSectionedParser_TagsDescription(t *testing.T) { - const ( - block = `This has a title without whitespace. -The punctuation here does indeed matter. But it won't for go. -minimum: 10 -maximum: 20 -` - block2 = `This has a title without whitespace. -The punctuation here does indeed matter. But it won't for go. - -minimum: 10 -maximum: 20 -` - ) - - var err error - - st := &SectionedParser{} - st.setTitle = func(_ []string) {} - st.taggers = []TagParser{ - {"Maximum", false, false, nil, &SetMaximum{builder: dummybuilder(), rx: regexp.MustCompile(fmt.Sprintf(rxMaximumFmt, ""))}}, - {"Minimum", false, false, nil, &SetMinimum{builder: dummybuilder(), rx: regexp.MustCompile(fmt.Sprintf(rxMinimumFmt, ""))}}, - {"MultipleOf", false, false, nil, &SetMultipleOf{builder: dummybuilder(), rx: regexp.MustCompile(fmt.Sprintf(rxMultipleOfFmt, ""))}}, - } - - err = st.Parse(ascg(block)) - require.NoError(t, err) - assert.Equal(t, []string{"This has a title without whitespace."}, st.Title()) - assert.Equal(t, []string{"The punctuation here does indeed matter. But it won't for go."}, st.Description()) - assert.Len(t, st.matched, 2) - _, ok := st.matched["Maximum"] - assert.TrueT(t, ok) - _, ok = st.matched["Minimum"] - assert.TrueT(t, ok) - - st = &SectionedParser{} - st.setTitle = func(_ []string) {} - st.taggers = []TagParser{ - {"Maximum", false, false, nil, &SetMaximum{builder: dummybuilder(), rx: regexp.MustCompile(fmt.Sprintf(rxMaximumFmt, ""))}}, - {"Minimum", false, false, nil, &SetMinimum{builder: dummybuilder(), rx: regexp.MustCompile(fmt.Sprintf(rxMinimumFmt, ""))}}, - {"MultipleOf", false, false, nil, &SetMultipleOf{builder: dummybuilder(), rx: regexp.MustCompile(fmt.Sprintf(rxMultipleOfFmt, ""))}}, - } - - err = st.Parse(ascg(block2)) - require.NoError(t, err) - assert.Equal(t, []string{"This has a title without whitespace."}, st.Title()) - assert.Equal(t, []string{"The punctuation here does indeed matter. But it won't for go."}, st.Description()) - assert.Len(t, st.matched, 2) - _, ok = st.matched["Maximum"] - assert.TrueT(t, ok) - _, ok = st.matched["Minimum"] - assert.TrueT(t, ok) -} - -func TestSectionedParser_Empty(t *testing.T) { - const block = `swagger:response someResponse` - - var err error - - st := &SectionedParser{} - st.setTitle = func(_ []string) {} - ap := newSchemaAnnotationParser("SomeResponse") - ap.rx = rxResponseOverride - st.annotation = ap - - err = st.Parse(ascg(block)) - require.NoError(t, err) - assert.Empty(t, st.Title()) - assert.Empty(t, st.Description()) - assert.Empty(t, st.taggers) - assert.EqualT(t, "SomeResponse", ap.GoName) - assert.EqualT(t, "someResponse", ap.Name) -} - -func testSectionedParserWithBlock( - t *testing.T, - block string, - expectedMatchedCount int, - maximumExpected bool, -) { - t.Helper() - - st := &SectionedParser{} - st.setTitle = func(_ []string) {} - ap := newSchemaAnnotationParser("SomeModel") - st.annotation = ap - st.taggers = []TagParser{ - {"Maximum", false, false, nil, &SetMaximum{builder: dummybuilder(), rx: regexp.MustCompile(fmt.Sprintf(rxMaximumFmt, ""))}}, - {"Minimum", false, false, nil, &SetMinimum{builder: dummybuilder(), rx: regexp.MustCompile(fmt.Sprintf(rxMinimumFmt, ""))}}, - {"MultipleOf", false, false, nil, &SetMultipleOf{builder: dummybuilder(), rx: regexp.MustCompile(fmt.Sprintf(rxMultipleOfFmt, ""))}}, - } - - err := st.Parse(ascg(block)) - require.NoError(t, err) - assert.Equal(t, []string{"This has a title without whitespace."}, st.Title()) - assert.Equal(t, []string{"The punctuation here does indeed matter. But it won't for go."}, st.Description()) - assert.Len(t, st.matched, expectedMatchedCount) - _, ok := st.matched["Maximum"] - assert.EqualT(t, maximumExpected, ok) - _, ok = st.matched["Minimum"] - assert.TrueT(t, ok) - assert.EqualT(t, "SomeModel", ap.GoName) - assert.EqualT(t, "someModel", ap.Name) -} - -func TestSectionedParser_SkipSectionAnnotation(t *testing.T) { - const block = `swagger:model someModel - -This has a title without whitespace. -The punctuation here does indeed matter. But it won't for go. - -minimum: 10 -maximum: 20 -` - testSectionedParserWithBlock(t, block, 2, true) -} - -func TestSectionedParser_TerminateOnNewAnnotation(t *testing.T) { - const block = `swagger:model someModel - -This has a title without whitespace. -The punctuation here does indeed matter. But it won't for go. - -minimum: 10 -swagger:meta -maximum: 20 -` - testSectionedParserWithBlock(t, block, 1, false) -} - -func TestSectionedParser_NilDoc(t *testing.T) { - st := NewSectionedParser( - WithSetTitle(func(_ []string) {}), - WithSetDescription(func(_ []string) {}), - ) - require.NoError(t, st.Parse(nil)) - assert.Empty(t, st.Title()) - assert.Empty(t, st.Description()) - assert.FalseT(t, st.Ignored()) -} - -func TestSectionedParser_IgnoredAnnotation(t *testing.T) { - const block = `swagger:ignore SomeType - -This should not matter. -` - st := NewSectionedParser( - WithSetTitle(func(_ []string) {}), - ) - err := st.Parse(ascg(block)) - require.NoError(t, err) - assert.TrueT(t, st.Ignored()) -} - -func TestSectionedParser_WithoutSetTitle(t *testing.T) { - // When setTitle is nil, collectTitleDescription cleans up headers - // but does not split title from description. - const block = `Just a description line. -Another line. -` - st := &SectionedParser{} - err := st.Parse(ascg(block)) - require.NoError(t, err) - assert.Nil(t, st.Title()) - assert.Equal(t, []string{"Just a description line.", "Another line."}, st.Description()) -} - -func TestSectionedParser_TagParseError(t *testing.T) { - // When a matched tagger's Parse returns an error, SectionedParser.Parse propagates it. - errParser := &failingParser{} - st := NewSectionedParser( - WithSetTitle(func(_ []string) {}), - WithTaggers( - NewSingleLineTagParser("Failing", errParser), - ), - ) - - const block = `Title. - -minimum: 10 -` - err := st.Parse(ascg(block)) - require.Error(t, err) - assert.ErrorIs(t, err, errForced) -} - -type failingParser struct{} - -var errForced = errors.New("forced error") - -func (f *failingParser) Matches(line string) bool { return rxMinimum.MatchString(line) } -func (f *failingParser) Parse(_ []string) error { return errForced } - -func TestSectionedParser_AnnotationMatchWithHeader(t *testing.T) { - // When the annotation matches and headers have been collected, - // seenTag is set to true — further non-tag lines are skipped. - const block = `swagger:model someModel - -Title. -Description. - -swagger:model anotherModel -This line after a re-match should still be part of the description. -` - ap := newSchemaAnnotationParser("SomeModel") - st := &SectionedParser{} - st.setTitle = func(_ []string) {} - st.annotation = ap - - err := st.Parse(ascg(block)) - require.NoError(t, err) - assert.EqualT(t, "someModel", ap.Name) -} - -func ascg(txt string) *ast.CommentGroup { - var cg ast.CommentGroup - for line := range strings.SplitSeq(txt, "\n") { - var cmt ast.Comment - cmt.Text = "// " + line - cg.List = append(cg.List, &cmt) - } - return &cg -} diff --git a/internal/parsers/validations.go b/internal/parsers/validations.go deleted file mode 100644 index f2c5ae2..0000000 --- a/internal/parsers/validations.go +++ /dev/null @@ -1,610 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers -// SPDX-License-Identifier: Apache-2.0 - -package parsers - -import ( - "fmt" - "regexp" - "strconv" - - "github.com/go-openapi/codescan/internal/ifaces" - oaispec "github.com/go-openapi/spec" -) - -var ( - rxMaximum = regexp.MustCompile(fmt.Sprintf(rxMaximumFmt, "")) - rxMinimum = regexp.MustCompile(fmt.Sprintf(rxMinimumFmt, "")) - rxMultipleOf = regexp.MustCompile(fmt.Sprintf(rxMultipleOfFmt, "")) - rxMinItems = regexp.MustCompile(fmt.Sprintf(rxMinItemsFmt, "")) - rxMaxItems = regexp.MustCompile(fmt.Sprintf(rxMaxItemsFmt, "")) - rxMaxLength = regexp.MustCompile(fmt.Sprintf(rxMaxLengthFmt, "")) - rxMinLength = regexp.MustCompile(fmt.Sprintf(rxMinLengthFmt, "")) - rxPattern = regexp.MustCompile(fmt.Sprintf(rxPatternFmt, "")) - rxCollectionFormat = regexp.MustCompile(fmt.Sprintf(rxCollectionFormatFmt, "")) - rxUnique = regexp.MustCompile(fmt.Sprintf(rxUniqueFmt, "")) - rxEnumValidation = regexp.MustCompile(fmt.Sprintf(rxEnumFmt, "")) - rxDefaultValidation = regexp.MustCompile(fmt.Sprintf(rxDefaultFmt, "")) - rxExample = regexp.MustCompile(fmt.Sprintf(rxExampleFmt, "")) -) - -type PrefixRxOption func(string) *regexp.Regexp - -func WithItemsPrefixLevel(level int) PrefixRxOption { - // the expression is 1-index based not 0-index - itemsPrefix := fmt.Sprintf(rxItemsPrefixFmt, level+1) - return func(expr string) *regexp.Regexp { - return Rxf(expr, itemsPrefix) // Proposal for enhancement(fred): cache - } -} - -type SetMaximum struct { - builder ifaces.ValidationBuilder - rx *regexp.Regexp -} - -func NewSetMaximum(builder ifaces.ValidationBuilder, opts ...PrefixRxOption) *SetMaximum { - rx := rxMaximum - for _, apply := range opts { - rx = apply(rxMaximumFmt) - } - - return &SetMaximum{ - builder: builder, - rx: rx, - } -} - -func (sm *SetMaximum) Parse(lines []string) error { - if len(lines) == 0 || (len(lines) == 1 && len(lines[0]) == 0) { - return nil - } - matches := sm.rx.FindStringSubmatch(lines[0]) - if len(matches) > 2 && len(matches[2]) > 0 { - maximum, err := strconv.ParseFloat(matches[2], 64) - if err != nil { - return err - } - sm.builder.SetMaximum(maximum, matches[1] == "<") - } - return nil -} - -func (sm *SetMaximum) Matches(line string) bool { - return sm.rx.MatchString(line) -} - -type SetMinimum struct { - builder ifaces.ValidationBuilder - rx *regexp.Regexp -} - -func NewSetMinimum(builder ifaces.ValidationBuilder, opts ...PrefixRxOption) *SetMinimum { - rx := rxMinimum - for _, apply := range opts { - rx = apply(rxMinimumFmt) - } - - return &SetMinimum{ - builder: builder, - rx: rx, - } -} - -func (sm *SetMinimum) Matches(line string) bool { - return sm.rx.MatchString(line) -} - -func (sm *SetMinimum) Parse(lines []string) error { - if len(lines) == 0 || (len(lines) == 1 && len(lines[0]) == 0) { - return nil - } - matches := sm.rx.FindStringSubmatch(lines[0]) - if len(matches) > 2 && len(matches[2]) > 0 { - minimum, err := strconv.ParseFloat(matches[2], 64) - if err != nil { - return err - } - sm.builder.SetMinimum(minimum, matches[1] == ">") - } - return nil -} - -type SetMultipleOf struct { - builder ifaces.ValidationBuilder - rx *regexp.Regexp -} - -func NewSetMultipleOf(builder ifaces.ValidationBuilder, opts ...PrefixRxOption) *SetMultipleOf { - rx := rxMultipleOf - for _, apply := range opts { - rx = apply(rxMultipleOfFmt) - } - - return &SetMultipleOf{ - builder: builder, - rx: rx, - } -} - -func (sm *SetMultipleOf) Matches(line string) bool { - return sm.rx.MatchString(line) -} - -func (sm *SetMultipleOf) Parse(lines []string) error { - if len(lines) == 0 || (len(lines) == 1 && len(lines[0]) == 0) { - return nil - } - matches := sm.rx.FindStringSubmatch(lines[0]) - if len(matches) > 1 && len(matches[1]) > 0 { - multipleOf, err := strconv.ParseFloat(matches[1], 64) - if err != nil { - return err - } - sm.builder.SetMultipleOf(multipleOf) - } - return nil -} - -type SetMaxItems struct { - builder ifaces.ValidationBuilder - rx *regexp.Regexp -} - -func NewSetMaxItems(builder ifaces.ValidationBuilder, opts ...PrefixRxOption) *SetMaxItems { - rx := rxMaxItems - for _, apply := range opts { - rx = apply(rxMaxItemsFmt) - } - - return &SetMaxItems{ - builder: builder, - rx: rx, - } -} - -func (sm *SetMaxItems) Matches(line string) bool { - return sm.rx.MatchString(line) -} - -func (sm *SetMaxItems) Parse(lines []string) error { - if len(lines) == 0 || (len(lines) == 1 && len(lines[0]) == 0) { - return nil - } - matches := sm.rx.FindStringSubmatch(lines[0]) - if len(matches) > 1 && len(matches[1]) > 0 { - maxItems, err := strconv.ParseInt(matches[1], 10, 64) - if err != nil { - return err - } - sm.builder.SetMaxItems(maxItems) - } - return nil -} - -type SetMinItems struct { - builder ifaces.ValidationBuilder - rx *regexp.Regexp -} - -func NewSetMinItems(builder ifaces.ValidationBuilder, opts ...PrefixRxOption) *SetMinItems { - rx := rxMinItems - for _, apply := range opts { - rx = apply(rxMinItemsFmt) - } - - return &SetMinItems{ - builder: builder, - rx: rx, - } -} - -func (sm *SetMinItems) Matches(line string) bool { - return sm.rx.MatchString(line) -} - -func (sm *SetMinItems) Parse(lines []string) error { - if len(lines) == 0 || (len(lines) == 1 && len(lines[0]) == 0) { - return nil - } - matches := sm.rx.FindStringSubmatch(lines[0]) - if len(matches) > 1 && len(matches[1]) > 0 { - minItems, err := strconv.ParseInt(matches[1], 10, 64) - if err != nil { - return err - } - sm.builder.SetMinItems(minItems) - } - return nil -} - -type SetMaxLength struct { - builder ifaces.ValidationBuilder - rx *regexp.Regexp -} - -func NewSetMaxLength(builder ifaces.ValidationBuilder, opts ...PrefixRxOption) *SetMaxLength { - rx := rxMaxLength - for _, apply := range opts { - rx = apply(rxMaxLengthFmt) - } - - return &SetMaxLength{ - builder: builder, - rx: rx, - } -} - -func (sm *SetMaxLength) Parse(lines []string) error { - if len(lines) == 0 || (len(lines) == 1 && len(lines[0]) == 0) { - return nil - } - matches := sm.rx.FindStringSubmatch(lines[0]) - if len(matches) > 1 && len(matches[1]) > 0 { - maxLength, err := strconv.ParseInt(matches[1], 10, 64) - if err != nil { - return err - } - sm.builder.SetMaxLength(maxLength) - } - return nil -} - -func (sm *SetMaxLength) Matches(line string) bool { - return sm.rx.MatchString(line) -} - -type SetMinLength struct { - builder ifaces.ValidationBuilder - rx *regexp.Regexp -} - -func NewSetMinLength(builder ifaces.ValidationBuilder, opts ...PrefixRxOption) *SetMinLength { - rx := rxMinLength - for _, apply := range opts { - rx = apply(rxMinLengthFmt) - } - - return &SetMinLength{ - builder: builder, - rx: rx, - } -} - -func (sm *SetMinLength) Parse(lines []string) error { - if len(lines) == 0 || (len(lines) == 1 && len(lines[0]) == 0) { - return nil - } - matches := sm.rx.FindStringSubmatch(lines[0]) - if len(matches) > 1 && len(matches[1]) > 0 { - minLength, err := strconv.ParseInt(matches[1], 10, 64) - if err != nil { - return err - } - sm.builder.SetMinLength(minLength) - } - return nil -} - -func (sm *SetMinLength) Matches(line string) bool { - return sm.rx.MatchString(line) -} - -type SetPattern struct { - builder ifaces.ValidationBuilder - rx *regexp.Regexp -} - -func NewSetPattern(builder ifaces.ValidationBuilder, opts ...PrefixRxOption) *SetPattern { - rx := rxPattern - for _, apply := range opts { - rx = apply(rxPatternFmt) - } - - return &SetPattern{ - builder: builder, - rx: rx, - } -} - -func (sm *SetPattern) Parse(lines []string) error { - if len(lines) == 0 || (len(lines) == 1 && len(lines[0]) == 0) { - return nil - } - matches := sm.rx.FindStringSubmatch(lines[0]) - if len(matches) > 1 && len(matches[1]) > 0 { - sm.builder.SetPattern(matches[1]) - } - return nil -} - -func (sm *SetPattern) Matches(line string) bool { - return sm.rx.MatchString(line) -} - -type SetCollectionFormat struct { - builder ifaces.OperationValidationBuilder - rx *regexp.Regexp -} - -func NewSetCollectionFormat(builder ifaces.OperationValidationBuilder, opts ...PrefixRxOption) *SetCollectionFormat { - rx := rxCollectionFormat - for _, apply := range opts { - rx = apply(rxCollectionFormatFmt) - } - - return &SetCollectionFormat{ - builder: builder, - rx: rx, - } -} - -func (sm *SetCollectionFormat) Parse(lines []string) error { - if len(lines) == 0 || (len(lines) == 1 && len(lines[0]) == 0) { - return nil - } - matches := sm.rx.FindStringSubmatch(lines[0]) - if len(matches) > 1 && len(matches[1]) > 0 { - sm.builder.SetCollectionFormat(matches[1]) - } - return nil -} - -func (sm *SetCollectionFormat) Matches(line string) bool { - return sm.rx.MatchString(line) -} - -type SetUnique struct { - builder ifaces.ValidationBuilder - rx *regexp.Regexp -} - -func NewSetUnique(builder ifaces.ValidationBuilder, opts ...PrefixRxOption) *SetUnique { - rx := rxUnique - for _, apply := range opts { - rx = apply(rxUniqueFmt) - } - - return &SetUnique{ - builder: builder, - rx: rx, - } -} - -func (su *SetUnique) Matches(line string) bool { - return su.rx.MatchString(line) -} - -func (su *SetUnique) Parse(lines []string) error { - if len(lines) == 0 || (len(lines) == 1 && len(lines[0]) == 0) { - return nil - } - matches := su.rx.FindStringSubmatch(lines[0]) - if len(matches) > 1 && len(matches[1]) > 0 { - req, err := strconv.ParseBool(matches[1]) - if err != nil { - return err - } - su.builder.SetUnique(req) - } - return nil -} - -type SetRequiredParam struct { - tgt *oaispec.Parameter -} - -func NewSetRequiredParam(param *oaispec.Parameter) *SetRequiredParam { - return &SetRequiredParam{ - tgt: param, - } -} - -func (su *SetRequiredParam) Matches(line string) bool { - return rxRequired.MatchString(line) -} - -func (su *SetRequiredParam) Parse(lines []string) error { - if len(lines) == 0 || (len(lines) == 1 && len(lines[0]) == 0) { - return nil - } - matches := rxRequired.FindStringSubmatch(lines[0]) - if len(matches) > 1 && len(matches[1]) > 0 { - req, err := strconv.ParseBool(matches[1]) - if err != nil { - return err - } - su.tgt.Required = req - } - return nil -} - -type SetReadOnlySchema struct { - tgt *oaispec.Schema -} - -func NewSetReadOnlySchema(schema *oaispec.Schema) *SetReadOnlySchema { - return &SetReadOnlySchema{ - tgt: schema, - } -} - -func (su *SetReadOnlySchema) Matches(line string) bool { - return rxReadOnly.MatchString(line) -} - -func (su *SetReadOnlySchema) Parse(lines []string) error { - if len(lines) == 0 || (len(lines) == 1 && len(lines[0]) == 0) { - return nil - } - matches := rxReadOnly.FindStringSubmatch(lines[0]) - if len(matches) > 1 && len(matches[1]) > 0 { - req, err := strconv.ParseBool(matches[1]) - if err != nil { - return err - } - su.tgt.ReadOnly = req - } - return nil -} - -type SetRequiredSchema struct { - Schema *oaispec.Schema - Field string -} - -func NewSetRequiredSchema(schema *oaispec.Schema, field string) *SetRequiredSchema { - return &SetRequiredSchema{ - Schema: schema, - Field: field, - } -} - -func (su *SetRequiredSchema) Matches(line string) bool { - return rxRequired.MatchString(line) -} - -func (su *SetRequiredSchema) Parse(lines []string) error { - if len(lines) == 0 || (len(lines) == 1 && len(lines[0]) == 0) { - return nil - } - matches := rxRequired.FindStringSubmatch(lines[0]) - if len(matches) <= 1 || len(matches[1]) == 0 { - return nil - } - - req, err := strconv.ParseBool(matches[1]) - if err != nil { - return err - } - midx := -1 - for i, nm := range su.Schema.Required { - if nm == su.Field { - midx = i - break - } - } - if req { - if midx < 0 { - su.Schema.Required = append(su.Schema.Required, su.Field) - } - } else if midx >= 0 { - su.Schema.Required = append(su.Schema.Required[:midx], su.Schema.Required[midx+1:]...) - } - return nil -} - -type SetDefault struct { - scheme *oaispec.SimpleSchema - builder ifaces.ValidationBuilder - rx *regexp.Regexp -} - -func NewSetDefault(scheme *oaispec.SimpleSchema, builder ifaces.ValidationBuilder, opts ...PrefixRxOption) *SetDefault { - rx := rxDefaultValidation - for _, apply := range opts { - rx = apply(rxDefaultFmt) - } - - return &SetDefault{ - scheme: scheme, - builder: builder, - rx: rx, - } -} - -func (sd *SetDefault) Matches(line string) bool { - return sd.rx.MatchString(line) -} - -func (sd *SetDefault) Parse(lines []string) error { - if len(lines) == 0 || (len(lines) == 1 && len(lines[0]) == 0) { - return nil - } - - matches := sd.rx.FindStringSubmatch(lines[0]) - if len(matches) > 1 && len(matches[1]) > 0 { - d, err := ParseValueFromSchema(matches[1], sd.scheme) - if err != nil { - return err - } - sd.builder.SetDefault(d) - } - - return nil -} - -type SetExample struct { - scheme *oaispec.SimpleSchema - builder ifaces.ValidationBuilder - rx *regexp.Regexp -} - -func NewSetExample(scheme *oaispec.SimpleSchema, builder ifaces.ValidationBuilder, opts ...PrefixRxOption) *SetExample { - rx := rxExample - for _, apply := range opts { - rx = apply(rxExampleFmt) - } - - return &SetExample{ - scheme: scheme, - builder: builder, - rx: rx, - } -} - -func (se *SetExample) Matches(line string) bool { - return se.rx.MatchString(line) -} - -func (se *SetExample) Parse(lines []string) error { - if len(lines) == 0 || (len(lines) == 1 && len(lines[0]) == 0) { - return nil - } - - matches := se.rx.FindStringSubmatch(lines[0]) - if len(matches) > 1 && len(matches[1]) > 0 { - d, err := ParseValueFromSchema(matches[1], se.scheme) - if err != nil { - return err - } - se.builder.SetExample(d) - } - - return nil -} - -type SetDiscriminator struct { - Schema *oaispec.Schema - Field string -} - -func NewSetDiscriminator(schema *oaispec.Schema, field string) *SetDiscriminator { - return &SetDiscriminator{ - Schema: schema, - Field: field, - } -} - -func (su *SetDiscriminator) Matches(line string) bool { - return rxDiscriminator.MatchString(line) -} - -func (su *SetDiscriminator) Parse(lines []string) error { - if len(lines) == 0 || (len(lines) == 1 && len(lines[0]) == 0) { - return nil - } - matches := rxDiscriminator.FindStringSubmatch(lines[0]) - if len(matches) > 1 && len(matches[1]) > 0 { - req, err := strconv.ParseBool(matches[1]) - if err != nil { - return err - } - if req { - su.Schema.Discriminator = su.Field - } else if su.Schema.Discriminator == su.Field { - su.Schema.Discriminator = "" - } - } - return nil -} diff --git a/internal/parsers/validations_test.go b/internal/parsers/validations_test.go deleted file mode 100644 index ed09c9d..0000000 --- a/internal/parsers/validations_test.go +++ /dev/null @@ -1,750 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers -// SPDX-License-Identifier: Apache-2.0 - -package parsers - -import ( - "strings" - "testing" - - "github.com/go-openapi/codescan/internal/ifaces" - "github.com/go-openapi/codescan/internal/scantest/mocks" - "github.com/go-openapi/testify/v2/assert" - "github.com/go-openapi/testify/v2/require" - - oaispec "github.com/go-openapi/spec" -) - -// validationRecorder captures all calls made to a ValidationBuilder. -type validationRecorder struct { - maximum *float64 - exclusiveMaximum bool - minimum *float64 - exclusiveMinimum bool - multipleOf *float64 - minItems *int64 - maxItems *int64 - minLength *int64 - maxLength *int64 - pattern string - unique *bool - enum string - defaultVal any - exampleVal any - collectionFormat string -} - -func (r *validationRecorder) SetMaximum(v float64, exclusive bool) { - r.maximum = &v - r.exclusiveMaximum = exclusive -} - -func (r *validationRecorder) SetMinimum(v float64, exclusive bool) { - r.minimum = &v - r.exclusiveMinimum = exclusive -} -func (r *validationRecorder) SetMultipleOf(v float64) { r.multipleOf = &v } -func (r *validationRecorder) SetMinItems(v int64) { r.minItems = &v } -func (r *validationRecorder) SetMaxItems(v int64) { r.maxItems = &v } -func (r *validationRecorder) SetMinLength(v int64) { r.minLength = &v } -func (r *validationRecorder) SetMaxLength(v int64) { r.maxLength = &v } -func (r *validationRecorder) SetPattern(v string) { r.pattern = v } -func (r *validationRecorder) SetUnique(v bool) { r.unique = &v } -func (r *validationRecorder) SetEnum(v string) { r.enum = v } -func (r *validationRecorder) SetDefault(v any) { r.defaultVal = v } -func (r *validationRecorder) SetExample(v any) { r.exampleVal = v } -func (r *validationRecorder) SetCollectionFormat(v string) { r.collectionFormat = v } - -func TestSetMaximum(t *testing.T) { - t.Parallel() - - tests := []struct { - name string - line string - wantMatch bool - wantVal float64 - exclusive bool - }{ - {"inclusive", "maximum: 100", true, 100, false}, - {"exclusive", "maximum: < 100", true, 100, true}, - {"decimal", "maximum: 99.5", true, 99.5, false}, - {"negative", "maximum: -10", true, -10, false}, - {"no match", "something else", false, 0, false}, - } - - for _, tc := range tests { - t.Run(tc.name, func(t *testing.T) { - rec := &validationRecorder{} - sm := NewSetMaximum(rec) - assert.EqualT(t, tc.wantMatch, sm.Matches(tc.line)) - if tc.wantMatch { - require.NoError(t, sm.Parse([]string{tc.line})) - require.NotNil(t, rec.maximum) - assert.EqualT(t, tc.wantVal, *rec.maximum) - assert.EqualT(t, tc.exclusive, rec.exclusiveMaximum) - } - }) - } - - t.Run("empty lines", func(t *testing.T) { - rec := &validationRecorder{} - sm := NewSetMaximum(rec) - require.NoError(t, sm.Parse(nil)) - require.NoError(t, sm.Parse([]string{})) - require.NoError(t, sm.Parse([]string{""})) - assert.Nil(t, rec.maximum) - }) - - t.Run("parse error", func(t *testing.T) { - rec := &validationRecorder{} - sm := NewSetMaximum(rec) - // Force a match with a non-numeric value via raw regex - require.NoError(t, sm.Parse([]string{"maximum: not-a-number"})) - assert.Nil(t, rec.maximum) // no match because regex won't capture non-numeric - }) -} - -func TestSetMinimum(t *testing.T) { - t.Parallel() - - tests := []struct { - name string - line string - wantMatch bool - wantVal float64 - exclusive bool - }{ - {"inclusive", "minimum: 0", true, 0, false}, - {"exclusive", "minimum: > 0", true, 0, true}, - {"decimal", "min: 1.5", true, 1.5, false}, - } - - for _, tc := range tests { - t.Run(tc.name, func(t *testing.T) { - rec := &validationRecorder{} - sm := NewSetMinimum(rec) - assert.EqualT(t, tc.wantMatch, sm.Matches(tc.line)) - if tc.wantMatch { - require.NoError(t, sm.Parse([]string{tc.line})) - require.NotNil(t, rec.minimum) - assert.EqualT(t, tc.wantVal, *rec.minimum) - assert.EqualT(t, tc.exclusive, rec.exclusiveMinimum) - } - }) - } -} - -func TestSetMultipleOf(t *testing.T) { - t.Parallel() - - tests := []struct { - name string - line string - wantMatch bool - wantVal float64 - }{ - {"integer", "multiple of: 5", true, 5}, - {"decimal", "Multiple Of: 0.5", true, 0.5}, - {"no match", "something else", false, 0}, - } - - for _, tc := range tests { - t.Run(tc.name, func(t *testing.T) { - rec := &validationRecorder{} - sm := NewSetMultipleOf(rec) - assert.EqualT(t, tc.wantMatch, sm.Matches(tc.line)) - if tc.wantMatch { - require.NoError(t, sm.Parse([]string{tc.line})) - require.NotNil(t, rec.multipleOf) - assert.EqualT(t, tc.wantVal, *rec.multipleOf) - } - }) - } -} - -func TestSetMaxItems(t *testing.T) { - t.Parallel() - - rec := &validationRecorder{} - sm := NewSetMaxItems(rec) - assert.TrueT(t, sm.Matches("max items: 10")) - require.NoError(t, sm.Parse([]string{"max items: 10"})) - require.NotNil(t, rec.maxItems) - assert.EqualT(t, int64(10), *rec.maxItems) -} - -func TestSetMinItems(t *testing.T) { - t.Parallel() - - rec := &validationRecorder{} - sm := NewSetMinItems(rec) - assert.TrueT(t, sm.Matches("min items: 1")) - require.NoError(t, sm.Parse([]string{"min items: 1"})) - require.NotNil(t, rec.minItems) - assert.EqualT(t, int64(1), *rec.minItems) -} - -func TestSetMaxLength(t *testing.T) { - t.Parallel() - - rec := &validationRecorder{} - sm := NewSetMaxLength(rec) - assert.TrueT(t, sm.Matches("max length: 255")) - require.NoError(t, sm.Parse([]string{"max length: 255"})) - require.NotNil(t, rec.maxLength) - assert.EqualT(t, int64(255), *rec.maxLength) -} - -func TestSetMinLength(t *testing.T) { - t.Parallel() - - rec := &validationRecorder{} - sm := NewSetMinLength(rec) - assert.TrueT(t, sm.Matches("min length: 1")) - require.NoError(t, sm.Parse([]string{"min length: 1"})) - require.NotNil(t, rec.minLength) - assert.EqualT(t, int64(1), *rec.minLength) -} - -func TestSetPattern(t *testing.T) { - t.Parallel() - - rec := &validationRecorder{} - sm := NewSetPattern(rec) - assert.TrueT(t, sm.Matches("pattern: ^\\w+$")) - require.NoError(t, sm.Parse([]string{"pattern: ^\\w+$"})) - assert.EqualT(t, "^\\w+$", rec.pattern) -} - -func TestSetCollectionFormat(t *testing.T) { - t.Parallel() - - rec := &validationRecorder{} - sm := NewSetCollectionFormat(rec) - assert.TrueT(t, sm.Matches("collection format: csv")) - require.NoError(t, sm.Parse([]string{"collection format: csv"})) - assert.EqualT(t, "csv", rec.collectionFormat) -} - -func TestSetUnique(t *testing.T) { - t.Parallel() - - tests := []struct { - line string - want bool - }{ - {"unique: true", true}, - {"unique: false", false}, - } - - for _, tc := range tests { - t.Run(tc.line, func(t *testing.T) { - rec := &validationRecorder{} - su := NewSetUnique(rec) - assert.TrueT(t, su.Matches(tc.line)) - require.NoError(t, su.Parse([]string{tc.line})) - require.NotNil(t, rec.unique) - assert.EqualT(t, tc.want, *rec.unique) - }) - } - - t.Run("parse error", func(t *testing.T) { - rec := &validationRecorder{} - su := NewSetUnique(rec) - // unique: accepts only true/false so non-bool won't match - assert.FalseT(t, su.Matches("unique: maybe")) - }) -} - -func TestSetRequiredParam(t *testing.T) { - t.Parallel() - - tests := []struct { - line string - want bool - }{ - {"required: true", true}, - {"required: false", false}, - } - - for _, tc := range tests { - t.Run(tc.line, func(t *testing.T) { - param := new(oaispec.Parameter) - su := NewSetRequiredParam(param) - assert.TrueT(t, su.Matches(tc.line)) - require.NoError(t, su.Parse([]string{tc.line})) - assert.EqualT(t, tc.want, param.Required) - }) - } - - t.Run("empty", func(t *testing.T) { - param := new(oaispec.Parameter) - su := NewSetRequiredParam(param) - require.NoError(t, su.Parse(nil)) - assert.FalseT(t, param.Required) - }) -} - -func TestSetReadOnlySchema(t *testing.T) { - t.Parallel() - - tests := []struct { - line string - want bool - }{ - {"read only: true", true}, - {"readOnly: true", true}, - {"read-only: false", false}, - } - - for _, tc := range tests { - t.Run(tc.line, func(t *testing.T) { - schema := new(oaispec.Schema) - su := NewSetReadOnlySchema(schema) - assert.TrueT(t, su.Matches(tc.line)) - require.NoError(t, su.Parse([]string{tc.line})) - assert.EqualT(t, tc.want, schema.ReadOnly) - }) - } -} - -func TestSetRequiredSchema(t *testing.T) { - t.Parallel() - - t.Run("set required true", func(t *testing.T) { - schema := new(oaispec.Schema) - su := NewSetRequiredSchema(schema, "name") - require.NoError(t, su.Parse([]string{"required: true"})) - assert.Equal(t, []string{"name"}, schema.Required) - }) - - t.Run("set required false removes", func(t *testing.T) { - schema := &oaispec.Schema{} - schema.Required = []string{"name", "age"} - su := NewSetRequiredSchema(schema, "name") - require.NoError(t, su.Parse([]string{"required: false"})) - assert.Equal(t, []string{"age"}, schema.Required) - }) - - t.Run("set required true idempotent", func(t *testing.T) { - schema := &oaispec.Schema{} - schema.Required = []string{"name"} - su := NewSetRequiredSchema(schema, "name") - require.NoError(t, su.Parse([]string{"required: true"})) - assert.Equal(t, []string{"name"}, schema.Required) - }) - - t.Run("set required false not present", func(t *testing.T) { - schema := new(oaispec.Schema) - su := NewSetRequiredSchema(schema, "name") - require.NoError(t, su.Parse([]string{"required: false"})) - assert.Empty(t, schema.Required) - }) - - t.Run("empty lines", func(t *testing.T) { - schema := new(oaispec.Schema) - su := NewSetRequiredSchema(schema, "name") - require.NoError(t, su.Parse(nil)) - require.NoError(t, su.Parse([]string{""})) - }) - - t.Run("no match in line", func(t *testing.T) { - schema := new(oaispec.Schema) - su := NewSetRequiredSchema(schema, "name") - require.NoError(t, su.Parse([]string{"something else"})) - assert.Empty(t, schema.Required) - }) -} - -func TestSetDefault(t *testing.T) { - t.Parallel() - - t.Run("string type", func(t *testing.T) { - rec := &validationRecorder{} - scheme := &oaispec.SimpleSchema{Type: "string"} - sd := NewSetDefault(scheme, rec) - assert.TrueT(t, sd.Matches("default: hello")) - require.NoError(t, sd.Parse([]string{"default: hello"})) - assert.EqualT(t, "hello", rec.defaultVal) - }) - - t.Run("integer type", func(t *testing.T) { - rec := &validationRecorder{} - scheme := &oaispec.SimpleSchema{Type: "integer"} - sd := NewSetDefault(scheme, rec) - require.NoError(t, sd.Parse([]string{"default: 42"})) - assert.EqualT(t, 42, rec.defaultVal) - }) - - t.Run("empty", func(t *testing.T) { - rec := &validationRecorder{} - scheme := &oaispec.SimpleSchema{Type: "string"} - sd := NewSetDefault(scheme, rec) - require.NoError(t, sd.Parse(nil)) - assert.Nil(t, rec.defaultVal) - }) -} - -func TestSetExample(t *testing.T) { - t.Parallel() - - rec := &validationRecorder{} - scheme := &oaispec.SimpleSchema{Type: "string"} - se := NewSetExample(scheme, rec) - assert.TrueT(t, se.Matches("example: foobar")) - require.NoError(t, se.Parse([]string{"example: foobar"})) - assert.EqualT(t, "foobar", rec.exampleVal) -} - -func TestSetDiscriminator(t *testing.T) { - t.Parallel() - - t.Run("set true", func(t *testing.T) { - schema := new(oaispec.Schema) - sd := NewSetDiscriminator(schema, "kind") - assert.TrueT(t, sd.Matches("discriminator: true")) - require.NoError(t, sd.Parse([]string{"discriminator: true"})) - assert.EqualT(t, "kind", schema.Discriminator) - }) - - t.Run("set false clears", func(t *testing.T) { - schema := &oaispec.Schema{} - schema.Discriminator = "kind" - sd := NewSetDiscriminator(schema, "kind") - require.NoError(t, sd.Parse([]string{"discriminator: false"})) - assert.EqualT(t, "", schema.Discriminator) - }) - - t.Run("set false different field", func(t *testing.T) { - schema := &oaispec.Schema{} - schema.Discriminator = "type" - sd := NewSetDiscriminator(schema, "kind") - require.NoError(t, sd.Parse([]string{"discriminator: false"})) - assert.EqualT(t, "type", schema.Discriminator) // unchanged - }) -} - -func TestWithItemsPrefixLevel(t *testing.T) { - t.Parallel() - - rec := &validationRecorder{} - sm := NewSetMaximum(rec, WithItemsPrefixLevel(0)) - line := "items.maximum: 100" - assert.TrueT(t, sm.Matches(line)) - require.NoError(t, sm.Parse([]string{line})) - require.NotNil(t, rec.maximum) - assert.EqualT(t, float64(100), *rec.maximum) - - // Level 1 requires "items.items." - rec2 := &validationRecorder{} - sm2 := NewSetMinimum(rec2, WithItemsPrefixLevel(1)) - line2 := "items.items.minimum: 5" - assert.TrueT(t, sm2.Matches(line2)) - require.NoError(t, sm2.Parse([]string{line2})) - require.NotNil(t, rec2.minimum) - assert.EqualT(t, float64(5), *rec2.minimum) -} - -func TestSetEnum(t *testing.T) { - t.Parallel() - - rec := &validationRecorder{} - se := NewSetEnum(rec) - line := "enum: " + `["a","b","c"]` - assert.TrueT(t, se.Matches(line)) - require.NoError(t, se.Parse([]string{line})) - assert.EqualT(t, `["a","b","c"]`, rec.enum) - - t.Run("empty", func(t *testing.T) { - rec := &validationRecorder{} - se := NewSetEnum(rec) - require.NoError(t, se.Parse(nil)) - require.NoError(t, se.Parse([]string{""})) - assert.EqualT(t, "", rec.enum) - }) -} - -// TestPrefixRxOption_AllConstructors covers the WithItemsPrefixLevel loop body -// in every validation constructor that accepts PrefixRxOption. -func TestPrefixRxOption_AllConstructors(t *testing.T) { - t.Parallel() - - prefix := WithItemsPrefixLevel(0) - - t.Run("SetMultipleOf", func(t *testing.T) { - rec := &validationRecorder{} - sm := NewSetMultipleOf(rec, prefix) - line := "items.multiple of: 3" - assert.TrueT(t, sm.Matches(line)) - require.NoError(t, sm.Parse([]string{line})) - require.NotNil(t, rec.multipleOf) - assert.EqualT(t, float64(3), *rec.multipleOf) - }) - - t.Run("SetMaxItems", func(t *testing.T) { - rec := &validationRecorder{} - sm := NewSetMaxItems(rec, prefix) - line := "items.max items: 10" - assert.TrueT(t, sm.Matches(line)) - require.NoError(t, sm.Parse([]string{line})) - require.NotNil(t, rec.maxItems) - assert.EqualT(t, int64(10), *rec.maxItems) - }) - - t.Run("SetMinItems", func(t *testing.T) { - rec := &validationRecorder{} - sm := NewSetMinItems(rec, prefix) - line := "items.min items: 1" - assert.TrueT(t, sm.Matches(line)) - require.NoError(t, sm.Parse([]string{line})) - require.NotNil(t, rec.minItems) - assert.EqualT(t, int64(1), *rec.minItems) - }) - - t.Run("SetMaxLength", func(t *testing.T) { - rec := &validationRecorder{} - sm := NewSetMaxLength(rec, prefix) - line := "items.max length: 100" - assert.TrueT(t, sm.Matches(line)) - require.NoError(t, sm.Parse([]string{line})) - require.NotNil(t, rec.maxLength) - assert.EqualT(t, int64(100), *rec.maxLength) - }) - - t.Run("SetMinLength", func(t *testing.T) { - rec := &validationRecorder{} - sm := NewSetMinLength(rec, prefix) - line := "items.min length: 1" - assert.TrueT(t, sm.Matches(line)) - require.NoError(t, sm.Parse([]string{line})) - require.NotNil(t, rec.minLength) - assert.EqualT(t, int64(1), *rec.minLength) - }) - - t.Run("SetPattern", func(t *testing.T) { - rec := &validationRecorder{} - sm := NewSetPattern(rec, prefix) - line := "items.pattern: ^[a-z]+$" - assert.TrueT(t, sm.Matches(line)) - require.NoError(t, sm.Parse([]string{line})) - assert.EqualT(t, "^[a-z]+$", rec.pattern) - }) - - t.Run("SetCollectionFormat", func(t *testing.T) { - rec := &validationRecorder{} - sm := NewSetCollectionFormat(rec, prefix) - line := "items.collection format: pipes" - assert.TrueT(t, sm.Matches(line)) - require.NoError(t, sm.Parse([]string{line})) - assert.EqualT(t, "pipes", rec.collectionFormat) - }) - - t.Run("SetUnique", func(t *testing.T) { - rec := &validationRecorder{} - sm := NewSetUnique(rec, prefix) - line := "items.unique: true" - assert.TrueT(t, sm.Matches(line)) - require.NoError(t, sm.Parse([]string{line})) - require.NotNil(t, rec.unique) - assert.TrueT(t, *rec.unique) - }) - - t.Run("SetDefault", func(t *testing.T) { - rec := &validationRecorder{} - scheme := &oaispec.SimpleSchema{Type: "string"} - sm := NewSetDefault(scheme, rec, prefix) - line := "items.default: hello" - assert.TrueT(t, sm.Matches(line)) - require.NoError(t, sm.Parse([]string{line})) - assert.EqualT(t, "hello", rec.defaultVal) - }) - - t.Run("SetExample", func(t *testing.T) { - rec := &validationRecorder{} - scheme := &oaispec.SimpleSchema{Type: "string"} - sm := NewSetExample(scheme, rec, prefix) - line := "items.example: world" - assert.TrueT(t, sm.Matches(line)) - require.NoError(t, sm.Parse([]string{line})) - assert.EqualT(t, "world", rec.exampleVal) - }) - - t.Run("SetEnum", func(t *testing.T) { - rec := &validationRecorder{} - sm := NewSetEnum(rec, prefix) - line := `items.enum: ["x","y"]` - assert.TrueT(t, sm.Matches(line)) - require.NoError(t, sm.Parse([]string{line})) - assert.EqualT(t, `["x","y"]`, rec.enum) - }) -} - -func TestSetDefault_ParseError(t *testing.T) { - t.Parallel() - - rec := &validationRecorder{} - scheme := &oaispec.SimpleSchema{Type: "integer"} - sd := NewSetDefault(scheme, rec) - err := sd.Parse([]string{"default: not-a-number"}) - require.Error(t, err) - assert.Nil(t, rec.defaultVal) -} - -func TestSetExample_ParseError(t *testing.T) { - t.Parallel() - - rec := &validationRecorder{} - scheme := &oaispec.SimpleSchema{Type: "integer"} - se := NewSetExample(scheme, rec) - err := se.Parse([]string{"example: not-a-number"}) - require.Error(t, err) - assert.Nil(t, rec.exampleVal) -} - -func TestSetRequiredSchema_Matches(t *testing.T) { - t.Parallel() - - su := NewSetRequiredSchema(new(oaispec.Schema), "name") - assert.TrueT(t, su.Matches("required: true")) - assert.TrueT(t, su.Matches("Required: false")) - assert.FalseT(t, su.Matches("something else")) -} - -// strictMockValidationBuilder returns a MockValidationBuilder whose Set* methods -// fail the test if called. Use this in tests that assert no mutation happened -// (empty-input tolerance, overflow errors, etc.). -func strictMockValidationBuilder(t *testing.T) *mocks.MockValidationBuilder { - t.Helper() - fail := func(name string) func(...any) { - return func(args ...any) { t.Fatalf("%s should not be called (args: %v)", name, args) } - } - m := &mocks.MockValidationBuilder{} - m.SetMaximumFunc = func(v float64, exclusive bool) { fail("SetMaximum")(v, exclusive) } - m.SetMinimumFunc = func(v float64, exclusive bool) { fail("SetMinimum")(v, exclusive) } - m.SetMultipleOfFunc = func(v float64) { fail("SetMultipleOf")(v) } - m.SetMaxItemsFunc = func(v int64) { fail("SetMaxItems")(v) } - m.SetMinItemsFunc = func(v int64) { fail("SetMinItems")(v) } - m.SetMaxLengthFunc = func(v int64) { fail("SetMaxLength")(v) } - m.SetMinLengthFunc = func(v int64) { fail("SetMinLength")(v) } - m.SetPatternFunc = func(v string) { fail("SetPattern")(v) } - m.SetUniqueFunc = func(v bool) { fail("SetUnique")(v) } - m.SetEnumFunc = func(v string) { fail("SetEnum")(v) } - m.SetDefaultFunc = func(v any) { fail("SetDefault")(v) } - m.SetExampleFunc = func(v any) { fail("SetExample")(v) } - return m -} - -// TestValidationParsers_EmptyInputTolerance pins the defensive-guard -// contract documented in the D.5 post-mortem: every Parse(lines) tolerates -// nil / empty-slice / single-empty-string input without panic and without -// mutating its target. Uses MockValidationBuilder (Set* funcs fail on call) -// to prove no side effect. -func TestValidationParsers_EmptyInputTolerance(t *testing.T) { - t.Parallel() - - emptyInputs := [][]string{nil, {}, {""}} - - cases := []struct { - name string - factory func(*testing.T) ifaces.ValueParser - }{ - {"SetMaximum", func(t *testing.T) ifaces.ValueParser { return NewSetMaximum(strictMockValidationBuilder(t)) }}, - {"SetMinimum", func(t *testing.T) ifaces.ValueParser { return NewSetMinimum(strictMockValidationBuilder(t)) }}, - {"SetMultipleOf", func(t *testing.T) ifaces.ValueParser { return NewSetMultipleOf(strictMockValidationBuilder(t)) }}, - {"SetMaxItems", func(t *testing.T) ifaces.ValueParser { return NewSetMaxItems(strictMockValidationBuilder(t)) }}, - {"SetMinItems", func(t *testing.T) ifaces.ValueParser { return NewSetMinItems(strictMockValidationBuilder(t)) }}, - {"SetMaxLength", func(t *testing.T) ifaces.ValueParser { return NewSetMaxLength(strictMockValidationBuilder(t)) }}, - {"SetMinLength", func(t *testing.T) ifaces.ValueParser { return NewSetMinLength(strictMockValidationBuilder(t)) }}, - {"SetPattern", func(t *testing.T) ifaces.ValueParser { return NewSetPattern(strictMockValidationBuilder(t)) }}, - {"SetUnique", func(t *testing.T) ifaces.ValueParser { return NewSetUnique(strictMockValidationBuilder(t)) }}, - {"SetExample", func(t *testing.T) ifaces.ValueParser { - scheme := &oaispec.SimpleSchema{Type: "string"} - return NewSetExample(scheme, strictMockValidationBuilder(t)) - }}, - {"SetCollectionFormat", func(t *testing.T) ifaces.ValueParser { - // OperationValidationBuilder — use the op-variant mock, fail-all. - m := &mocks.MockOperationValidationBuilder{ - SetCollectionFormatFunc: func(v string) { t.Fatalf("SetCollectionFormat should not be called (arg: %s)", v) }, - } - return NewSetCollectionFormat(m) - }}, - {"SetReadOnlySchema", func(_ *testing.T) ifaces.ValueParser { return NewSetReadOnlySchema(new(oaispec.Schema)) }}, - {"SetDiscriminator", func(_ *testing.T) ifaces.ValueParser { return NewSetDiscriminator(new(oaispec.Schema), "kind") }}, - } - - for _, tc := range cases { - t.Run(tc.name, func(t *testing.T) { - p := tc.factory(t) - for _, in := range emptyInputs { - require.NoError(t, p.Parse(in)) - } - }) - } -} - -// TestValidationParsers_NumericOverflow pins the overflow defence we kept -// in D.5: the regex captures \p{N}+ (any-length digit string), which matches -// values beyond int64 / float64 range. strconv.ParseInt / ParseFloat returns -// ErrRange in those cases, and the parser must propagate the error without -// invoking the target builder. See .claude/plans/dead-code-cleanup.md D.5 -// post-mortem for the rationale. -func TestValidationParsers_NumericOverflow(t *testing.T) { - t.Parallel() - - // int64 max is 9223372036854775807 (19 digits); 20+ 9's overflows. - intOverflow := strings.Repeat("9", 25) - // float64 max is ~1.8e308 in magnitude; 400 9's in decimal notation - // overflows ParseFloat (returns +Inf, ErrRange). - floatOverflow := strings.Repeat("9", 400) - - cases := []struct { - name string - line string - newP func(*testing.T) ifaces.ValueParser - }{ - { - name: "SetMaximum float overflow", - line: "maximum: " + floatOverflow, - newP: func(t *testing.T) ifaces.ValueParser { return NewSetMaximum(strictMockValidationBuilder(t)) }, - }, - { - name: "SetMinimum float overflow", - line: "minimum: " + floatOverflow, - newP: func(t *testing.T) ifaces.ValueParser { return NewSetMinimum(strictMockValidationBuilder(t)) }, - }, - { - name: "SetMultipleOf float overflow", - line: "multiple of: " + floatOverflow, - newP: func(t *testing.T) ifaces.ValueParser { return NewSetMultipleOf(strictMockValidationBuilder(t)) }, - }, - { - name: "SetMaxItems int overflow", - line: "max items: " + intOverflow, - newP: func(t *testing.T) ifaces.ValueParser { return NewSetMaxItems(strictMockValidationBuilder(t)) }, - }, - { - name: "SetMinItems int overflow", - line: "min items: " + intOverflow, - newP: func(t *testing.T) ifaces.ValueParser { return NewSetMinItems(strictMockValidationBuilder(t)) }, - }, - { - name: "SetMaxLength int overflow", - line: "max length: " + intOverflow, - newP: func(t *testing.T) ifaces.ValueParser { return NewSetMaxLength(strictMockValidationBuilder(t)) }, - }, - { - name: "SetMinLength int overflow", - line: "min length: " + intOverflow, - newP: func(t *testing.T) ifaces.ValueParser { return NewSetMinLength(strictMockValidationBuilder(t)) }, - }, - } - - for _, tc := range cases { - t.Run(tc.name, func(t *testing.T) { - p := tc.newP(t) - require.TrueT(t, p.Matches(tc.line), "regex must match overflow input; otherwise the guard we're testing is dead") - err := p.Parse([]string{tc.line}) - require.Error(t, err, "expected ParseInt/ParseFloat ErrRange") - }) - } -} diff --git a/internal/scanner/options.go b/internal/scanner/options.go index f70526b..987b18a 100644 --- a/internal/scanner/options.go +++ b/internal/scanner/options.go @@ -22,18 +22,4 @@ type Options struct { DescWithRef bool // allow overloaded descriptions together with $ref, otherwise jsonschema draft4 $ref predates everything SkipExtensions bool // skip generating x-go-* vendor extensions in the spec Debug bool // enable verbose debug logging during scanning - - // UseGrammarParser routes comment-group parsing through the v2 - // hand-rolled grammar parser at internal/parsers/grammar/ (plus - // bridge-taggers that call the existing ValidationBuilder / - // SwaggerTypable / … interfaces) instead of the legacy - // regex-based taggers. - // - // Default false: the legacy path runs unchanged. The flag is - // the dual-path coexistence seam used by the parity harness - // during the P5 migration (one run per value of the flag, outputs - // diffed). At P6 cutover the flag is removed and grammar-parser - // becomes the only path. See .claude/plans/p5-builder-migrations.md - // and grammar-parser-tasks.md P4.3 / P5 cross-cutting. - UseGrammarParser bool } diff --git a/internal/scanner/scan_context.go b/internal/scanner/scan_context.go index d8e7eaa..1bc32d6 100644 --- a/internal/scanner/scan_context.go +++ b/internal/scanner/scan_context.go @@ -11,7 +11,6 @@ import ( "iter" "log" "maps" - "os" "slices" "strings" @@ -20,17 +19,6 @@ import ( "golang.org/x/tools/go/packages" ) -// envForceGrammarParser, when set to "1" / "true" in the process -// environment, forces Options.UseGrammarParser to true at context -// construction time regardless of the value the caller passed. -// Used by CI to run the full test suite under the grammar path -// without threading the flag through every test call site — -// `CODESCAN_USE_GRAMMAR=1 go test ./...`. -// -// Removed at P6 cutover alongside the legacy regex path and the -// flag itself. -const envForceGrammarParser = "CODESCAN_USE_GRAMMAR" - const pkgLoadMode = packages.NeedName | packages.NeedFiles | packages.NeedImports | packages.NeedDeps | packages.NeedTypes | packages.NeedSyntax | packages.NeedTypesInfo type node uint32 @@ -53,14 +41,6 @@ type ScanCtx struct { } func NewScanCtx(opts *Options) (*ScanCtx, error) { - // Env-var override: CODESCAN_USE_GRAMMAR=1 forces the grammar - // parser on for all builders regardless of the caller's flag. - // Test-only migration aid; removed at P6 cutover. - switch os.Getenv(envForceGrammarParser) { - case "1", "true", "TRUE", "True": - opts.UseGrammarParser = true - } - cfg := &packages.Config{ Dir: opts.WorkDir, Mode: pkgLoadMode, @@ -118,13 +98,6 @@ func (s *ScanCtx) RefAliases() bool { return s.opts.RefAliases } -// UseGrammarParser reports whether the scan is configured to route -// comment-group parsing through the v2 grammar parser. See -// Options.UseGrammarParser for the migration seam semantics. -func (s *ScanCtx) UseGrammarParser() bool { - return s.opts.UseGrammarParser -} - // FileSet returns the shared *token.FileSet used by the scan's // loaded packages. Needed by callers that construct a // grammar.Parser for comment groups that don't live under a single From 761c439575d875f27560c4b0c858596e935384cb Mon Sep 17 00:00:00 2001 From: Frederic BIDON Date: Wed, 22 Apr 2026 18:54:07 +0200 Subject: [PATCH 44/46] =?UTF-8?q?feat(builders):=20P6.1=20=E2=80=94=20migr?= =?UTF-8?q?ate=20meta=20builder=20to=20grammar=20bridge?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the last consumer of SectionedParser — the swagger:meta builder — with a grammar bridge mirroring the P5 template. Meta is a little different from the six P5 builders because there is no "meta target" struct at the field level: the whole comment group describes the top-level spec.Swagger (Info + schemes/consumes/ produces/security/securityDefinitions/host/basePath/extensions/ infoExtensions/tos/contact/license). New: internal/builders/spec/meta_bridge.go. applyMetaBlock() parses the meta comment group via grammar.Parser and dispatches each level-0 Property to the matching *spec.Swagger field. Title comes from parsers.CollectScannerTitleDescription on block.ProseLines() (same split heuristic as schema decl / operations) with the legacy `Package ` prefix stripper applied for parity. Body-parsing helpers (v1-parity, all inlined): - yamlListBody: consumes/produces strict YAML-list body. - parseSchemesLine: schemes comma-list. - parseSecurityRequirements: `name: scope1, scope2` per line. - unmarshalYAMLBody: YAML→JSON pipeline for securityDefinitions/extensions/infoExtensions. - parseContactInfo / parseLicense / splitURL: `Name URL` / `Name URL` splits. - validateExtensionNames: reject non-x-* keys with ErrBadExtensionName (mirrors legacy metaVendorExtensibleSetter). dispatchMetaKeyword split into two helpers (dispatchMetaSimple for synchronous fields, dispatchMetaYAMLBlock for error-returning ones) to keep cognitive complexity below the gocognit threshold. Grammar fix: when the annotation token is at the tail of the comment group (swagger:meta in a package doc), the pre-annotation slice contained structural tokens (KEYWORD_VALUE / KEYWORD_BLOCK_HEAD) that parseTitleDesc ignored. parse() now splits `pre` at the first body-start and routes the tail through parseBody alongside post-annotation tokens. Removes the earlier parseTitleDesc branch that captured pre-annotation TokenKeywordValue as a Property — the new split handles it structurally, which also covers TokenKeywordBlockHead (needed for meta's Extensions: / Security: / TOS: blocks). Cleanup — now-dead legacy code deleted: - internal/parsers/meta.go + meta_test.go (NewMetaParser, setMetaSingle, metaTOSSetter, metaConsumesSetter, etc.). - internal/parsers/sectioned_parser.go (the whole SectionedParser type, only consumer was NewMetaParser). - internal/parsers/tag_parsers.go (NewSingleLineTagParser / NewMultiLineTagParser — only consumed by SectionedParser). - internal/parsers/security.go: SetSchemes + newSetSecurity (only consumed by meta). - internal/parsers/security_test.go (tests for deleted NewSetSchemes). - internal/parsers/parsers.go: multilineDropEmptyParser (only meta consumed it). - internal/parsers/regexprs.go: rxSchemes, rxVersion, rxHost, rxBasePath, rxLicense, rxContact, rxTOS, rxInfoExtensions, rxStripTitleComments (meta-specific regexes), plus rxSwaggerAnnotationStrict (Q5 introduced it for SectionedParser.parseLine; with SectionedParser gone the strict pattern has no consumer). Scanner: parsers.MetaSection removed — TypeIndex.Meta is now `[]*ast.CommentGroup` directly (the indirection had only one field). ScanCtx.Meta() updated accordingly. All tests green (both go test ./... and dual-mode runs if re-introduced). Lint clean. What still survives in internal/parsers (from a 19-file max) after two cleanup passes: enum (ParseEnum, ParseValueFromSchema, GetEnumBasicLitValue, GetEnumDesc), extensions (SetOpExtensions for routes bridge), matchers (scanner classification helpers), lines (JoinDropLast), parsers_helpers (CollectScannerTitleDescription + cleanupScannerLines), parsers (ConsumesDropEmptyParser / ProducesDropEmptyParser / multilineYAMLListParser for routes bridge), regexprs (matchers + routes/operations annotation lines), responses (NewSetResponses for routes bridge), route_params (NewSetParams for routes bridge), security (NewSetSecurityScheme for routes bridge), yaml_parser (NewYAMLParser for schema bridge), yaml_spec_parser (RemoveIndent for operations bridge — the rest of YAMLSpecScanner is unused and a further cleanup target). Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Frederic BIDON --- internal/builders/spec/meta_bridge.go | 351 ++++++++++++++++++++++++++ internal/builders/spec/spec.go | 8 +- internal/parsers/grammar/parser.go | 31 ++- internal/parsers/meta.go | 245 ------------------ internal/parsers/meta_test.go | 270 -------------------- internal/parsers/parsers.go | 22 -- internal/parsers/regexprs.go | 61 ++--- internal/parsers/sectioned_parser.go | 294 --------------------- internal/parsers/security.go | 45 ---- internal/parsers/security_test.go | 84 ------ internal/parsers/tag_parsers.go | 86 ------- internal/scanner/index.go | 4 +- internal/scanner/scan_context.go | 2 +- 13 files changed, 392 insertions(+), 1111 deletions(-) create mode 100644 internal/builders/spec/meta_bridge.go delete mode 100644 internal/parsers/meta.go delete mode 100644 internal/parsers/meta_test.go delete mode 100644 internal/parsers/sectioned_parser.go delete mode 100644 internal/parsers/security_test.go delete mode 100644 internal/parsers/tag_parsers.go diff --git a/internal/builders/spec/meta_bridge.go b/internal/builders/spec/meta_bridge.go new file mode 100644 index 0000000..74b934e --- /dev/null +++ b/internal/builders/spec/meta_bridge.go @@ -0,0 +1,351 @@ +// SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers +// SPDX-License-Identifier: Apache-2.0 + +package spec + +import ( + "encoding/json" + "errors" + "fmt" + "net/mail" + "regexp" + "strings" + + "github.com/go-openapi/codescan/internal/parsers" + "github.com/go-openapi/codescan/internal/parsers/grammar" + yamlparser "github.com/go-openapi/codescan/internal/parsers/yaml" + "github.com/go-openapi/loads/fmts" + "github.com/go-openapi/spec" + yaml "go.yaml.in/yaml/v3" +) + +// rxStripTitleComments mirrors the legacy regex used in NewMetaParser's +// setTitle callback. A meta title of the form +// `Package ` has the leading Go package marker +// stripped so the emitted Info.Title is just ``. +var rxStripTitleComments = regexp.MustCompile(`^[^\p{L}]*[Pp]ackage\p{Zs}+[^\p{Zs}]+\p{Zs}*`) + +// applyMetaBlock parses the meta comment group via the grammar +// parser and dispatches each level-0 property into the matching +// *spec.Swagger field. Replaces parsers.NewMetaParser + SectionedParser +// with no behavior change: title/description come from the grammar's +// prose split (blank-line or punctuation/markdown heuristic, same as +// the legacy CollectScannerTitleDescription helper), and each +// top-level keyword's body is handed to the setter that v1 wired +// behind the scenes. +// +// swspec may have a nil Info field on entry; the helper allocates +// one before writing the first Info.* value. +func applyMetaBlock(swspec *spec.Swagger, block grammar.Block) error { + if swspec.Info == nil { + swspec.Info = new(spec.Info) + } + title, desc := parsers.CollectScannerTitleDescription(block.ProseLines()) + joinedTitle := parsers.JoinDropLast(title) + if joinedTitle != "" { + joinedTitle = rxStripTitleComments.ReplaceAllString(joinedTitle, "") + } + swspec.Info.Title = joinedTitle + swspec.Info.Description = parsers.JoinDropLast(desc) + + for p := range block.Properties() { + if p.ItemsDepth != 0 { + continue + } + if err := dispatchMetaKeyword(p, swspec); err != nil { + return err + } + } + return nil +} + +func dispatchMetaKeyword(p grammar.Property, swspec *spec.Swagger) error { + if dispatchMetaSimple(p, swspec) { + return nil + } + return dispatchMetaYAMLBlock(p, swspec) +} + +// dispatchMetaSimple handles the synchronous, non-YAML keywords +// whose body dispatch cannot fail. +func dispatchMetaSimple(p grammar.Property, swspec *spec.Swagger) bool { + switch p.Keyword.Name { + case "tos": + swspec.Info.TermsOfService = parsers.JoinDropLast(dropEmpty(p.Body)) + case "consumes": + swspec.Consumes = yamlListBody(p.Body) + case "produces": + swspec.Produces = yamlListBody(p.Body) + case "schemes": + swspec.Schemes = parseSchemesLine(p.Value) + case "security": + swspec.Security = parseSecurityRequirements(p.Body) + case "version": + swspec.Info.Version = strings.TrimSpace(p.Value) + case "host": + host := strings.TrimSpace(p.Value) + if host == "" { + host = "localhost" + } + swspec.Host = host + case "basePath": + swspec.BasePath = strings.TrimSpace(p.Value) + case "license": + swspec.Info.License = parseLicense(strings.TrimSpace(p.Value)) + default: + return false + } + return true +} + +// dispatchMetaYAMLBlock handles the keywords that can fail: +// securityDefinitions, infoExtensions, extensions, contact. +func dispatchMetaYAMLBlock(p grammar.Property, swspec *spec.Swagger) error { + switch p.Keyword.Name { + case "contact": + contact, err := parseContactInfo(strings.TrimSpace(p.Value)) + if err != nil { + return err + } + swspec.Info.Contact = contact + case "securityDefinitions": + return unmarshalYAMLBody(p.Body, func(data []byte) error { + var d spec.SecurityDefinitions + if err := json.Unmarshal(data, &d); err != nil { + return err + } + swspec.SecurityDefinitions = d + return nil + }) + case "infoExtensions": + return unmarshalYAMLBody(p.Body, func(data []byte) error { + return applyInfoExtensions(data, swspec) + }) + case "extensions": + return unmarshalYAMLBody(p.Body, func(data []byte) error { + return applyMetaExtensions(data, swspec) + }) + } + return nil +} + +func applyInfoExtensions(data []byte, swspec *spec.Swagger) error { + var d spec.Extensions + if err := json.Unmarshal(data, &d); err != nil { + return err + } + if err := validateExtensionNames(d); err != nil { + return err + } + swspec.Info.Extensions = d + return nil +} + +func applyMetaExtensions(data []byte, swspec *spec.Swagger) error { + var d spec.Extensions + if err := json.Unmarshal(data, &d); err != nil { + return err + } + if err := validateExtensionNames(d); err != nil { + return err + } + swspec.Extensions = d + return nil +} + +// yamlListBody parses a block body as a YAML list and returns its +// stringified items. Mirrors parsers.multilineYAMLListParser (the +// Q4 strict-list contract for consumes / produces): a non-list body +// is silently dropped — legacy emits a WARNING log but does not +// error. Empty bodies return nil. +func yamlListBody(body []string) []string { + cleaned := dropEmpty(body) + if len(cleaned) == 0 { + return nil + } + parsed, err := yamlparser.Parse(strings.Join(cleaned, "\n")) + if err != nil { + return nil + } + list, ok := parsed.([]any) + if !ok { + return nil + } + out := make([]string, 0, len(list)) + for _, item := range list { + out = append(out, fmt.Sprintf("%v", item)) + } + return out +} + +// dropEmpty removes whitespace-only entries from a body slice. +func dropEmpty(lines []string) []string { + out := make([]string, 0, len(lines)) + for _, l := range lines { + if strings.TrimSpace(l) != "" { + out = append(out, l) + } + } + return out +} + +// parseSchemesLine mirrors parsers.SetSchemes.Parse — comma-split +// the value, trim each entry, drop empties. +func parseSchemesLine(value string) []string { + out := make([]string, 0) + for s := range strings.SplitSeq(value, ",") { + if ts := strings.TrimSpace(s); ts != "" { + out = append(out, ts) + } + } + if len(out) == 0 { + return nil + } + return out +} + +// parseSecurityRequirements handles a Security block body. Legacy +// (newSetSecurity) parses lines of the form `name: scope1, scope2` +// (with `name:` as an empty-scope entry) into []map[string][]string. +func parseSecurityRequirements(body []string) []map[string][]string { + cleaned := dropEmpty(body) + if len(cleaned) == 0 { + return nil + } + out := make([]map[string][]string, 0, len(cleaned)) + for _, raw := range cleaned { + line := strings.TrimSpace(raw) + name, rest, found := strings.Cut(line, ":") + if !found { + continue + } + name = strings.TrimSpace(name) + rest = strings.TrimSpace(rest) + scopes := []string{} + if rest != "" { + for s := range strings.SplitSeq(rest, ",") { + if ts := strings.TrimSpace(s); ts != "" { + scopes = append(scopes, ts) + } + } + } + out = append(out, map[string][]string{name: scopes}) + } + return out +} + +// unmarshalYAMLBody mirrors parsers.YAMLParser.Parse: the block +// body (`---` fence contents, preserving indent) is yaml-unmarshal'd, +// converted to JSON via fmts.YAMLToJSON, and handed to the setter. +func unmarshalYAMLBody(body []string, setter func([]byte) error) error { + cleaned := removeYAMLIndent(body) + if len(cleaned) == 0 { + return nil + } + yamlContent := strings.Join(cleaned, "\n") + var v any + if err := yaml.Unmarshal([]byte(yamlContent), &v); err != nil { + return err + } + raw, err := fmts.YAMLToJSON(v) + if err != nil { + return err + } + data, err := raw.MarshalJSON() + if err != nil { + return err + } + return setter(data) +} + +// removeYAMLIndent mirrors parsers.removeYamlIndent — strip the +// common leading-indent detected on the first non-empty line. +func removeYAMLIndent(body []string) []string { + cleaned := dropEmpty(body) + if len(cleaned) == 0 { + return nil + } + indent := leadingWhitespaceLen(cleaned[0]) + if indent == 0 { + return cleaned + } + out := make([]string, 0, len(cleaned)) + for _, line := range cleaned { + if len(line) >= indent { + out = append(out, line[indent:]) + } else { + out = append(out, line) + } + } + return out +} + +func leadingWhitespaceLen(s string) int { + i := 0 + for i < len(s) && (s[i] == ' ' || s[i] == '\t') { + i++ + } + return i +} + +// ErrBadExtensionName is the sentinel used when a meta extension key +// does not start with `x-` or `X-`. Mirrors the legacy behavior of +// metaVendorExtensibleSetter's reject-with-error path. +var ErrBadExtensionName = errors.New("invalid schema extension name, should start from `x-`") + +// validateExtensionNames mirrors the legacy rxAllowedExtensions +// check — every vendor extension key must begin with `x-` or `X-`. +func validateExtensionNames(ext spec.Extensions) error { + for k := range ext { + if !parsers.IsAllowedExtension(k) { + return fmt.Errorf("%w: %s", ErrBadExtensionName, k) + } + } + return nil +} + +// parseContactInfo parses a `Name URL` shaped contact line. +func parseContactInfo(line string) (*spec.ContactInfo, error) { + nameEmail, url := splitURL(line) + var name, email string + if nameEmail != "" { + addr, err := mail.ParseAddress(nameEmail) + if err != nil { + return nil, err + } + name, email = addr.Name, addr.Address + } + return &spec.ContactInfo{ + ContactInfoProps: spec.ContactInfoProps{ + URL: url, + Name: name, + Email: email, + }, + }, nil +} + +func parseLicense(line string) *spec.License { + name, url := splitURL(line) + return &spec.License{ + LicenseProps: spec.LicenseProps{ + Name: name, + URL: url, + }, + } +} + +var httpFTPScheme = regexp.MustCompile(`(?:(?:ht|f)tp|ws)s?://`) + +func splitURL(line string) (notURL, url string) { + str := strings.TrimSpace(line) + parts := httpFTPScheme.FindStringIndex(str) + if len(parts) == 0 { + if str != "" { + notURL = str + } + return notURL, "" + } + notURL = strings.TrimSpace(str[:parts[0]]) + url = strings.TrimSpace(str[parts[0]:]) + return notURL, url +} diff --git a/internal/builders/spec/spec.go b/internal/builders/spec/spec.go index 1e49ee8..4f0d8ab 100644 --- a/internal/builders/spec/spec.go +++ b/internal/builders/spec/spec.go @@ -11,7 +11,7 @@ import ( "github.com/go-openapi/codescan/internal/builders/responses" "github.com/go-openapi/codescan/internal/builders/routes" "github.com/go-openapi/codescan/internal/builders/schema" - "github.com/go-openapi/codescan/internal/parsers" + "github.com/go-openapi/codescan/internal/parsers/grammar" "github.com/go-openapi/codescan/internal/scanner" oaispec "github.com/go-openapi/spec" ) @@ -131,8 +131,10 @@ func (s *Builder) buildDiscoveredSchema(decl *scanner.EntityDecl) error { func (s *Builder) buildMeta() error { // build swagger object - for decl := range s.ctx.Meta() { - if err := parsers.NewMetaParser(s.input).Parse(decl.Comments); err != nil { + parser := grammar.NewParser(s.ctx.FileSet()) + for cg := range s.ctx.Meta() { + block := parser.Parse(cg) + if err := applyMetaBlock(s.input, block); err != nil { return err } } diff --git a/internal/parsers/grammar/parser.go b/internal/parsers/grammar/parser.go index 8430d0d..b7a9bd8 100644 --- a/internal/parsers/grammar/parser.go +++ b/internal/parsers/grammar/parser.go @@ -198,6 +198,17 @@ func (p *parseState) parse() Block { typed = p.buildTypedBlock(kind, annTok, base) pre = p.tokens[:annIdx] post = p.tokens[annIdx+1:] + // When the annotation trails the block (e.g., a `swagger:meta` + // at the tail of a package doc), body tokens live in the + // pre-annotation slice. Split pre at the first body-start so + // Title/Description collection stops before structural + // tokens, and those tokens reach parseBody alongside any + // post-annotation ones. + if splitIdx := findBodyStart(pre); splitIdx >= 0 { + tail := pre[splitIdx:] + pre = pre[:splitIdx] + post = append(append([]Token{}, tail...), post...) + } } else { base = newBaseBlock(AnnUnknown, firstMeaningfulPos(p.tokens)) typed = &UnboundBlock{baseBlock: base} @@ -449,25 +460,13 @@ func (p *parseState) parseTitleDesc(base *baseBlock, pre []Token) { case TokenText: current = append(current, t.Text) proseLines = append(proseLines, t.Text) - case TokenKeywordValue: - // Pre-annotation keyword lines (e.g., `discriminator: true` - // appearing before a trailing `swagger:name` annotation on - // an interface method) land on the block's Properties - // alongside post-annotation keywords. Without this, those - // keywords fall into a gap — not prose, not properties — - // and never reach the analyzer. - base.properties = append(base.properties, Property{ - Keyword: *t.Keyword, - Pos: t.Pos, - Value: t.Value, - Typed: p.typeConvert(*t.Keyword, t.Value, t.Pos), - ItemsDepth: t.ItemsDepth, - }) case TokenEOF, TokenAnnotation, - TokenKeywordBlockHead, + TokenKeywordValue, TokenKeywordBlockHead, TokenYAMLFence, TokenRawLine: - // Ignored in the title/description slice. + // Ignored in the title/description slice. Structural + // tokens are routed through parseBody via parse()'s + // body-split even when the annotation is trailing. default: // Unreachable at v1; future kinds ignored defensively. } diff --git a/internal/parsers/meta.go b/internal/parsers/meta.go deleted file mode 100644 index 06d9dd8..0000000 --- a/internal/parsers/meta.go +++ /dev/null @@ -1,245 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers -// SPDX-License-Identifier: Apache-2.0 - -package parsers - -import ( - "encoding/json" - "fmt" - "go/ast" - "net/mail" - "regexp" - "strings" - - "github.com/go-openapi/spec" -) - -type MetaSection struct { - Comments *ast.CommentGroup -} - -func metaTOSSetter(meta *spec.Info) func([]string) { - return func(lines []string) { - meta.TermsOfService = JoinDropLast(lines) - } -} - -func metaConsumesSetter(meta *spec.Swagger) func([]string) { - return func(consumes []string) { meta.Consumes = consumes } -} - -func metaProducesSetter(meta *spec.Swagger) func([]string) { - return func(produces []string) { meta.Produces = produces } -} - -func metaSchemeSetter(meta *spec.Swagger) func([]string) { - return func(schemes []string) { meta.Schemes = schemes } -} - -func metaSecuritySetter(meta *spec.Swagger) func([]map[string][]string) { - return func(secDefs []map[string][]string) { meta.Security = secDefs } -} - -func metaSecurityDefinitionsSetter(meta *spec.Swagger) func(json.RawMessage) error { - return func(jsonValue json.RawMessage) error { - var jsonData spec.SecurityDefinitions - err := json.Unmarshal(jsonValue, &jsonData) - if err != nil { - return err - } - meta.SecurityDefinitions = jsonData - return nil - } -} - -func metaVendorExtensibleSetter(meta *spec.Swagger) func(json.RawMessage) error { - return func(jsonValue json.RawMessage) error { - var jsonData spec.Extensions - err := json.Unmarshal(jsonValue, &jsonData) - if err != nil { - return err - } - for k := range jsonData { - if !rxAllowedExtensions.MatchString(k) { - return fmt.Errorf("invalid schema extension name, should start from `x-`: %s: %w", k, ErrParser) - } - } - meta.Extensions = jsonData - return nil - } -} - -func infoVendorExtensibleSetter(meta *spec.Swagger) func(json.RawMessage) error { - return func(jsonValue json.RawMessage) error { - var jsonData spec.Extensions - err := json.Unmarshal(jsonValue, &jsonData) - if err != nil { - return err - } - for k := range jsonData { - if !rxAllowedExtensions.MatchString(k) { - return fmt.Errorf("invalid schema extension name, should start from `x-`: %s: %w", k, ErrParser) - } - } - meta.Info.Extensions = jsonData - return nil - } -} - -func NewMetaParser(swspec *spec.Swagger) *SectionedParser { - sp := new(SectionedParser) - if swspec.Info == nil { - swspec.Info = new(spec.Info) - } - info := swspec.Info - sp.setTitle = func(lines []string) { - tosave := JoinDropLast(lines) - if len(tosave) > 0 { - tosave = rxStripTitleComments.ReplaceAllString(tosave, "") - } - info.Title = tosave - } - sp.setDescription = func(lines []string) { info.Description = JoinDropLast(lines) } - sp.taggers = []TagParser{ - NewMultiLineTagParser("TOS", newMultilineDropEmptyParser(rxTOS, metaTOSSetter(info)), false), - // Q4: Consumes/Produces bodies are YAML lists; skipCleanUp=true - // so the external rxUncommentHeaders pass doesn't strip the - // `-` list markers before our YAML-aware Parse sees them. - NewMultiLineTagParser("Consumes", NewConsumesDropEmptyParser(metaConsumesSetter(swspec)), true), - NewMultiLineTagParser("Produces", NewProducesDropEmptyParser(metaProducesSetter(swspec)), true), - NewSingleLineTagParser("Schemes", NewSetSchemes(metaSchemeSetter(swspec))), - NewMultiLineTagParser("Security", newSetSecurity(rxSecuritySchemes, metaSecuritySetter(swspec)), false), - NewMultiLineTagParser("SecurityDefinitions", NewYAMLParser(WithMatcher(rxSecurity), WithSetter(metaSecurityDefinitionsSetter(swspec))), true), - NewSingleLineTagParser("Version", &setMetaSingle{Spec: swspec, Rx: rxVersion, Set: setInfoVersion}), - NewSingleLineTagParser("Host", &setMetaSingle{Spec: swspec, Rx: rxHost, Set: setSwaggerHost}), - NewSingleLineTagParser("BasePath", &setMetaSingle{swspec, rxBasePath, setSwaggerBasePath}), - NewSingleLineTagParser("Contact", &setMetaSingle{Spec: swspec, Rx: rxContact, Set: setInfoContact}), - NewSingleLineTagParser("License", &setMetaSingle{Spec: swspec, Rx: rxLicense, Set: setInfoLicense}), - NewMultiLineTagParser("YAMLInfoExtensionsBlock", NewYAMLParser(WithMatcher(rxInfoExtensions), WithSetter(infoVendorExtensibleSetter(swspec))), true), - NewMultiLineTagParser("YAMLExtensionsBlock", NewYAMLParser(WithExtensionMatcher(), WithSetter(metaVendorExtensibleSetter(swspec))), true), - } - - return sp -} - -type setMetaSingle struct { - Spec *spec.Swagger - Rx *regexp.Regexp - Set func(spec *spec.Swagger, lines []string) error -} - -func (s *setMetaSingle) Matches(line string) bool { - return s.Rx.MatchString(line) -} - -func (s *setMetaSingle) Parse(lines []string) error { - if len(lines) == 0 || (len(lines) == 1 && len(lines[0]) == 0) { - return nil - } - matches := s.Rx.FindStringSubmatch(lines[0]) - if len(matches) > 1 && len(matches[1]) > 0 { - return s.Set(s.Spec, []string{matches[1]}) - } - return nil -} - -func setSwaggerHost(swspec *spec.Swagger, lines []string) error { - lns := lines - if len(lns) == 0 || (len(lines) == 1 && len(lines[0]) == 0) { - lns = []string{"localhost"} - } - swspec.Host = lns[0] - return nil -} - -func setSwaggerBasePath(swspec *spec.Swagger, lines []string) error { - var ln string - if len(lines) > 0 { - ln = lines[0] - } - swspec.BasePath = ln - return nil -} - -func setInfoVersion(swspec *spec.Swagger, lines []string) error { - if len(lines) == 0 { - return nil - } - info := safeInfo(swspec) - info.Version = strings.TrimSpace(lines[0]) - return nil -} - -func setInfoContact(swspec *spec.Swagger, lines []string) error { - if len(lines) == 0 || (len(lines) == 1 && len(lines[0]) == 0) { - return nil - } - contact, err := parseContactInfo(lines[0]) - if err != nil { - return err - } - info := safeInfo(swspec) - info.Contact = contact - return nil -} - -func parseContactInfo(line string) (*spec.ContactInfo, error) { - nameEmail, url := splitURL(line) - var name, email string - if len(nameEmail) > 0 { - addr, err := mail.ParseAddress(nameEmail) - if err != nil { - return nil, err - } - name, email = addr.Name, addr.Address - } - return &spec.ContactInfo{ - ContactInfoProps: spec.ContactInfoProps{ - URL: url, - Name: name, - Email: email, - }, - }, nil -} - -func setInfoLicense(swspec *spec.Swagger, lines []string) error { - if len(lines) == 0 || (len(lines) == 1 && len(lines[0]) == 0) { - return nil - } - info := safeInfo(swspec) - line := lines[0] - name, url := splitURL(line) - info.License = &spec.License{ - LicenseProps: spec.LicenseProps{ - Name: name, - URL: url, - }, - } - return nil -} - -func safeInfo(swspec *spec.Swagger) *spec.Info { - if swspec.Info == nil { - swspec.Info = new(spec.Info) - } - return swspec.Info -} - -// httpFTPScheme matches http://, https://, ws://, wss://. -var httpFTPScheme = regexp.MustCompile("(?:(?:ht|f)tp|ws)s?://") - -func splitURL(line string) (notURL, url string) { - str := strings.TrimSpace(line) - parts := httpFTPScheme.FindStringIndex(str) - if len(parts) == 0 { - if len(str) > 0 { - notURL = str - } - return notURL, "" - } - if len(parts) > 0 { - notURL = strings.TrimSpace(str[:parts[0]]) - url = strings.TrimSpace(str[parts[0]:]) - } - return notURL, url -} diff --git a/internal/parsers/meta_test.go b/internal/parsers/meta_test.go deleted file mode 100644 index 8b8d391..0000000 --- a/internal/parsers/meta_test.go +++ /dev/null @@ -1,270 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers -// SPDX-License-Identifier: Apache-2.0 - -package parsers - -import ( - goparser "go/parser" - "go/token" - "testing" - - "github.com/go-openapi/codescan/internal/scantest/classification" - "github.com/go-openapi/testify/v2/assert" - "github.com/go-openapi/testify/v2/require" - - oaispec "github.com/go-openapi/spec" -) - -func TestSetInfoVersion(t *testing.T) { - info := new(oaispec.Swagger) - err := setInfoVersion(info, []string{"0.0.1"}) - require.NoError(t, err) - assert.EqualT(t, "0.0.1", info.Info.Version) -} - -func TestSetInfoLicense(t *testing.T) { - info := new(oaispec.Swagger) - err := setInfoLicense(info, []string{"MIT http://license.org/MIT"}) - require.NoError(t, err) - assert.EqualT(t, "MIT", info.Info.License.Name) - assert.EqualT(t, "http://license.org/MIT", info.Info.License.URL) -} - -func TestSetInfoContact(t *testing.T) { - info := new(oaispec.Swagger) - err := setInfoContact(info, []string{"Homer J. Simpson http://simpsons.com"}) - require.NoError(t, err) - assert.EqualT(t, "Homer J. Simpson", info.Info.Contact.Name) - assert.EqualT(t, "homer@simpsons.com", info.Info.Contact.Email) - assert.EqualT(t, "http://simpsons.com", info.Info.Contact.URL) -} - -func TestParseInfo(t *testing.T) { - swspec := new(oaispec.Swagger) - parser := NewMetaParser(swspec) - docFile := "../../fixtures/goparsing/classification/doc.go" - fileSet := token.NewFileSet() - fileTree, err := goparser.ParseFile(fileSet, docFile, nil, goparser.ParseComments) - if err != nil { - t.FailNow() - } - - err = parser.Parse(fileTree.Doc) - - require.NoError(t, err) - classification.VerifyInfo(t, swspec.Info) -} - -func TestParseSwagger(t *testing.T) { - swspec := new(oaispec.Swagger) - parser := NewMetaParser(swspec) - docFile := "../../fixtures/goparsing/classification/doc.go" - fileSet := token.NewFileSet() - fileTree, err := goparser.ParseFile(fileSet, docFile, nil, goparser.ParseComments) - if err != nil { - t.FailNow() - } - - err = parser.Parse(fileTree.Doc) - verifyMeta(t, swspec) - - require.NoError(t, err) -} - -func verifyMeta(t *testing.T, doc *oaispec.Swagger) { - assert.NotNil(t, doc) - classification.VerifyInfo(t, doc.Info) - assert.Equal(t, []string{"application/json", "application/xml"}, doc.Consumes) - assert.Equal(t, []string{"application/json", "application/xml"}, doc.Produces) - assert.Equal(t, []string{"http", "https"}, doc.Schemes) - assert.Equal(t, []map[string][]string{{"api_key": {}}}, doc.Security) - expectedSecuritySchemaKey := oaispec.SecurityScheme{ - SecuritySchemeProps: oaispec.SecuritySchemeProps{ - Type: "apiKey", - In: "header", - Name: "KEY", - }, - } - expectedSecuritySchemaOAuth := oaispec.SecurityScheme{ - SecuritySchemeProps: oaispec.SecuritySchemeProps{ //nolint:gosec // G101: false positive, test fixture not real credentials - Type: "oauth2", - In: "header", - AuthorizationURL: "/oauth2/auth", - TokenURL: "/oauth2/token", - Flow: "accessCode", - Scopes: map[string]string{ - "bla1": "foo1", - "bla2": "foo2", - }, - }, - } - expectedExtensions := oaispec.Extensions{ - "x-meta-array": []any{ - "value1", - "value2", - }, - "x-meta-array-obj": []any{ - map[string]any{ - "name": "obj", - "value": "field", - }, - }, - "x-meta-value": "value", - } - expectedInfoExtensions := oaispec.Extensions{ - "x-info-array": []any{ - "value1", - "value2", - }, - "x-info-array-obj": []any{ - map[string]any{ - "name": "obj", - "value": "field", - }, - }, - "x-info-value": "value", - } - assert.NotNil(t, doc.SecurityDefinitions["api_key"]) - assert.NotNil(t, doc.SecurityDefinitions["oauth2"]) - assert.Equal(t, oaispec.SecurityDefinitions{"api_key": &expectedSecuritySchemaKey, "oauth2": &expectedSecuritySchemaOAuth}, doc.SecurityDefinitions) - assert.Equal(t, expectedExtensions, doc.Extensions) - assert.Equal(t, expectedInfoExtensions, doc.Info.Extensions) - assert.EqualT(t, "localhost", doc.Host) - assert.EqualT(t, "/v2", doc.BasePath) -} - -func TestMoreParseMeta(t *testing.T) { - for _, docFile := range []string{ - "../../fixtures/goparsing/meta/v1/doc.go", - "../../fixtures/goparsing/meta/v2/doc.go", - "../../fixtures/goparsing/meta/v3/doc.go", - "../../fixtures/goparsing/meta/v4/doc.go", - } { - swspec := new(oaispec.Swagger) - parser := NewMetaParser(swspec) - fileSet := token.NewFileSet() - fileTree, err := goparser.ParseFile(fileSet, docFile, nil, goparser.ParseComments) - if err != nil { - t.FailNow() - } - - err = parser.Parse(fileTree.Doc) - require.NoError(t, err) - assert.EqualT(t, "there are no TOS at this moment, use at your own risk we take no responsibility", swspec.Info.TermsOfService) - /* - jazon, err := json.MarshalIndent(swoaispec.Info, "", " ") - require.NoError(t, err) - t.Logf("%v", string(jazon)) - */ - } -} - -func TestSetInfoVersion_Empty(t *testing.T) { - swspec := new(oaispec.Swagger) - require.NoError(t, setInfoVersion(swspec, nil)) - assert.Nil(t, swspec.Info) -} - -func TestSetSwaggerHost_Empty(t *testing.T) { - swspec := new(oaispec.Swagger) - require.NoError(t, setSwaggerHost(swspec, nil)) - assert.EqualT(t, "localhost", swspec.Host) // fallback - swspec2 := new(oaispec.Swagger) - require.NoError(t, setSwaggerHost(swspec2, []string{""})) - assert.EqualT(t, "localhost", swspec2.Host) // fallback -} - -func TestSetInfoContact_Empty(t *testing.T) { - swspec := new(oaispec.Swagger) - require.NoError(t, setInfoContact(swspec, nil)) - assert.Nil(t, swspec.Info) - require.NoError(t, setInfoContact(swspec, []string{""})) -} - -func TestSetInfoContact_BadEmail(t *testing.T) { - swspec := new(oaispec.Swagger) - err := setInfoContact(swspec, []string{"not-a-valid-email-address <<<"}) - require.Error(t, err) -} - -func TestSetInfoLicense_Empty(t *testing.T) { - swspec := new(oaispec.Swagger) - require.NoError(t, setInfoLicense(swspec, nil)) - assert.Nil(t, swspec.Info) - require.NoError(t, setInfoLicense(swspec, []string{""})) -} - -func TestSetMetaSingle_Parse_Empty(t *testing.T) { - swspec := new(oaispec.Swagger) - s := &setMetaSingle{Spec: swspec, Rx: rxVersion, Set: setInfoVersion} - require.NoError(t, s.Parse(nil)) - require.NoError(t, s.Parse([]string{""})) - // Line that doesn't match the regex - require.NoError(t, s.Parse([]string{"no match here"})) -} - -func TestSplitURL(t *testing.T) { - t.Parallel() - - tests := []struct { - name string - line string - wantNot string - wantURL string - }{ - {"with http url", "MIT http://example.com", "MIT", "http://example.com"}, - {"with https url", "MIT https://example.com", "MIT", "https://example.com"}, - {"url only", "http://example.com", "", "http://example.com"}, - {"no url", "just text", "just text", ""}, - {"empty", "", "", ""}, - {"ws url", "live ws://example.com/ws", "live", "ws://example.com/ws"}, - } - - for _, tc := range tests { - t.Run(tc.name, func(t *testing.T) { - notURL, url := splitURL(tc.line) - assert.EqualT(t, tc.wantNot, notURL) - assert.EqualT(t, tc.wantURL, url) - }) - } -} - -func TestMetaVendorExtensibleSetter_InvalidKey(t *testing.T) { - swspec := new(oaispec.Swagger) - setter := metaVendorExtensibleSetter(swspec) - // Extension key that doesn't start with x- - err := setter([]byte(`{"not-x-key": "value"}`)) - require.Error(t, err) - require.ErrorIs(t, err, ErrParser) -} - -func TestMetaVendorExtensibleSetter_BadJSON(t *testing.T) { - swspec := new(oaispec.Swagger) - setter := metaVendorExtensibleSetter(swspec) - err := setter([]byte(`{bad json`)) - require.Error(t, err) -} - -func TestInfoVendorExtensibleSetter_InvalidKey(t *testing.T) { - swspec := &oaispec.Swagger{} - swspec.Info = new(oaispec.Info) - setter := infoVendorExtensibleSetter(swspec) - err := setter([]byte(`{"invalid-key": "value"}`)) - require.Error(t, err) - require.ErrorIs(t, err, ErrParser) -} - -func TestInfoVendorExtensibleSetter_BadJSON(t *testing.T) { - swspec := &oaispec.Swagger{} - swspec.Info = new(oaispec.Info) - setter := infoVendorExtensibleSetter(swspec) - err := setter([]byte(`{bad json`)) - require.Error(t, err) -} - -func TestMetaSecurityDefinitionsSetter_BadJSON(t *testing.T) { - swspec := new(oaispec.Swagger) - setter := metaSecurityDefinitionsSetter(swspec) - err := setter([]byte(`{bad json`)) - require.Error(t, err) -} diff --git a/internal/parsers/parsers.go b/internal/parsers/parsers.go index d159e9c..ff08710 100644 --- a/internal/parsers/parsers.go +++ b/internal/parsers/parsers.go @@ -46,28 +46,6 @@ func NewProducesDropEmptyParser(set func([]string)) *ProducesDropEmptyParser { } } -type multilineDropEmptyParser struct { - set func([]string) - rx *regexp.Regexp -} - -func newMultilineDropEmptyParser(rx *regexp.Regexp, set func([]string)) *multilineDropEmptyParser { - return &multilineDropEmptyParser{ - set: set, - rx: rx, - } -} - -func (m *multilineDropEmptyParser) Matches(line string) bool { - return m.rx.MatchString(line) -} - -func (m *multilineDropEmptyParser) Parse(lines []string) error { - m.set(removeEmptyLines(lines)) - - return nil -} - // multilineYAMLListParser is the Q4 replacement for // multilineDropEmptyParser on list-valued block bodies // (`consumes:` / `produces:` in meta + operation scope). The diff --git a/internal/parsers/regexprs.go b/internal/parsers/regexprs.go index 495b152..b7565af 100644 --- a/internal/parsers/regexprs.go +++ b/internal/parsers/regexprs.go @@ -78,34 +78,19 @@ var ( // truncates descriptions. Use rxSwaggerAnnotationStrict for that. rxSwaggerAnnotation = regexp.MustCompile(`(?:^|[\s/])swagger:([\p{L}\p{N}\p{Pd}\p{Pc}]+)`) - // rxSwaggerAnnotationStrict matches a swagger: annotation - // only at the start of a comment line (with comment-prefix noise - // tolerated per rxCommentPrefix). Used by SectionedParser as the - // block-body terminator so prose that mentions `swagger:*` in - // passing does not cut description accumulation short. Finishes - // the work of 09f6748 ("All annotations should start their - // comment line.") which tightened the per-annotation validator - // regexes but left the block-level terminator using the loose - // pattern. - // - // For `swagger:route` with a godoc-style prefix, the per-annotation - // rxRoutePrefix handles the legitimate identifier-before-route - // form; the SectionedParser does not build routes directly, so - // this strict pattern never needs that exception. - rxSwaggerAnnotationStrict = regexp.MustCompile(rxCommentPrefix + `swagger:[\p{L}\p{N}\p{Pd}\p{Pc}]+`) - rxFileUpload = regexp.MustCompile(rxCommentPrefix + `swagger:file`) - rxStrFmt = regexp.MustCompile(rxCommentPrefix + `swagger:strfmt\p{Zs}*(\p{L}[\p{L}\p{N}\p{Pd}\p{Pc}]+)(?:\.)?$`) - rxAlias = regexp.MustCompile(rxCommentPrefix + `swagger:alias`) - rxName = regexp.MustCompile(rxCommentPrefix + `swagger:name\p{Zs}*(\p{L}[\p{L}\p{N}\p{Pd}\p{Pc}\.]+)(?:\.)?$`) - rxAllOf = regexp.MustCompile(rxCommentPrefix + `swagger:allOf\p{Zs}*(\p{L}[\p{L}\p{N}\p{Pd}\p{Pc}\.]+)?(?:\.)?$`) - rxModelOverride = regexp.MustCompile(rxCommentPrefix + `swagger:model\p{Zs}*(\p{L}[\p{L}\p{N}\p{Pd}\p{Pc}]+)?(?:\.)?$`) - rxResponseOverride = regexp.MustCompile(rxCommentPrefix + `swagger:response\p{Zs}*(\p{L}[\p{L}\p{N}\p{Pd}\p{Pc}]+)?(?:\.)?$`) - rxParametersOverride = regexp.MustCompile(rxCommentPrefix + `swagger:parameters\p{Zs}*(\p{L}[\p{L}\p{N}\p{Pd}\p{Pc}\p{Zs}]+)(?:\.)?$`) - rxEnum = regexp.MustCompile(rxCommentPrefix + `swagger:enum\p{Zs}*(\p{L}[\p{L}\p{N}\p{Pd}\p{Pc}]+)(?:\.)?$`) - rxIgnoreOverride = regexp.MustCompile(rxCommentPrefix + `swagger:ignore\p{Zs}*(\p{L}[\p{L}\p{N}\p{Pd}\p{Pc}]+)?(?:\.)?$`) - rxDefault = regexp.MustCompile(rxCommentPrefix + `swagger:default\p{Zs}*(\p{L}[\p{L}\p{N}\p{Pd}\p{Pc}]+)(?:\.)?$`) - rxType = regexp.MustCompile(rxCommentPrefix + `swagger:type\p{Zs}*(\p{L}[\p{L}\p{N}\p{Pd}\p{Pc}]+)(?:\.)?$`) - rxRoute = regexp.MustCompile( + rxFileUpload = regexp.MustCompile(rxCommentPrefix + `swagger:file`) + rxStrFmt = regexp.MustCompile(rxCommentPrefix + `swagger:strfmt\p{Zs}*(\p{L}[\p{L}\p{N}\p{Pd}\p{Pc}]+)(?:\.)?$`) + rxAlias = regexp.MustCompile(rxCommentPrefix + `swagger:alias`) + rxName = regexp.MustCompile(rxCommentPrefix + `swagger:name\p{Zs}*(\p{L}[\p{L}\p{N}\p{Pd}\p{Pc}\.]+)(?:\.)?$`) + rxAllOf = regexp.MustCompile(rxCommentPrefix + `swagger:allOf\p{Zs}*(\p{L}[\p{L}\p{N}\p{Pd}\p{Pc}\.]+)?(?:\.)?$`) + rxModelOverride = regexp.MustCompile(rxCommentPrefix + `swagger:model\p{Zs}*(\p{L}[\p{L}\p{N}\p{Pd}\p{Pc}]+)?(?:\.)?$`) + rxResponseOverride = regexp.MustCompile(rxCommentPrefix + `swagger:response\p{Zs}*(\p{L}[\p{L}\p{N}\p{Pd}\p{Pc}]+)?(?:\.)?$`) + rxParametersOverride = regexp.MustCompile(rxCommentPrefix + `swagger:parameters\p{Zs}*(\p{L}[\p{L}\p{N}\p{Pd}\p{Pc}\p{Zs}]+)(?:\.)?$`) + rxEnum = regexp.MustCompile(rxCommentPrefix + `swagger:enum\p{Zs}*(\p{L}[\p{L}\p{N}\p{Pd}\p{Pc}]+)(?:\.)?$`) + rxIgnoreOverride = regexp.MustCompile(rxCommentPrefix + `swagger:ignore\p{Zs}*(\p{L}[\p{L}\p{N}\p{Pd}\p{Pc}]+)?(?:\.)?$`) + rxDefault = regexp.MustCompile(rxCommentPrefix + `swagger:default\p{Zs}*(\p{L}[\p{L}\p{N}\p{Pd}\p{Pc}]+)(?:\.)?$`) + rxType = regexp.MustCompile(rxCommentPrefix + `swagger:type\p{Zs}*(\p{L}[\p{L}\p{N}\p{Pd}\p{Pc}]+)(?:\.)?$`) + rxRoute = regexp.MustCompile( rxRoutePrefix + "swagger:route\\p{Zs}*" + rxMethod + @@ -135,12 +120,11 @@ var ( ")?\\p{Zs}+" + rxOpID + "\\p{Zs}*$") - rxIndent = regexp.MustCompile(`[\p{Zs}\t]*/*[\p{Zs}\t]*[^\p{Zs}\t]`) - rxNotIndent = regexp.MustCompile(`[^\p{Zs}\t]`) - rxPunctuationEnd = regexp.MustCompile(`\p{Po}$`) - rxTitleStart = regexp.MustCompile(`^[#]+\p{Zs}+`) - rxStripTitleComments = regexp.MustCompile(`^[^\p{L}]*[Pp]ackage\p{Zs}+[^\p{Zs}]+\p{Zs}*`) - rxAllowedExtensions = regexp.MustCompile(`^[Xx]-`) + rxIndent = regexp.MustCompile(`[\p{Zs}\t]*/*[\p{Zs}\t]*[^\p{Zs}\t]`) + rxNotIndent = regexp.MustCompile(`[^\p{Zs}\t]`) + rxPunctuationEnd = regexp.MustCompile(`\p{Po}$`) + rxTitleStart = regexp.MustCompile(`^[#]+\p{Zs}+`) + rxAllowedExtensions = regexp.MustCompile(`^[Xx]-`) rxIn = regexp.MustCompile(rxCommentPrefix + `[Ii]n\p{Zs}*:\p{Zs}*(query|path|header|body|formData)(?:\.)?$`) rxRequired = regexp.MustCompile(rxCommentPrefix + `[Rr]equired\p{Zs}*:\p{Zs}*(true|false)(?:\.)?$`) @@ -150,16 +134,7 @@ var ( rxSecurity = regexp.MustCompile(rxCommentPrefix + `[Ss]ecurity\p{Zs}*[Dd]efinitions:`) rxResponses = regexp.MustCompile(rxCommentPrefix + `[Rr]esponses\p{Zs}*:`) rxParameters = regexp.MustCompile(rxCommentPrefix + `[Pp]arameters\p{Zs}*:`) - rxSchemes = regexp.MustCompile(rxCommentPrefix + `[Ss]chemes\p{Zs}*:\p{Zs}*((?:(?:https?|HTTPS?|wss?|WSS?)[\p{Zs},]*)+)(?:\.)?$`) - rxVersion = regexp.MustCompile(rxCommentPrefix + `[Vv]ersion\p{Zs}*:\p{Zs}*(.+)$`) - rxHost = regexp.MustCompile(rxCommentPrefix + `[Hh]ost\p{Zs}*:\p{Zs}*(.+)$`) - rxBasePath = regexp.MustCompile(rxCommentPrefix + `[Bb]ase\p{Zs}*-*[Pp]ath\p{Zs}*:\p{Zs}*` + rxPath + "(?:\\.)?$") - rxLicense = regexp.MustCompile(rxCommentPrefix + `[Ll]icense\p{Zs}*:\p{Zs}*(.+)$`) - rxContact = regexp.MustCompile(rxCommentPrefix + `[Cc]ontact\p{Zs}*-?(?:[Ii]info\p{Zs}*)?:\p{Zs}*(.+)$`) - rxTOS = regexp.MustCompile(rxCommentPrefix + `[Tt](:?erms)?\p{Zs}*-?[Oo]f?\p{Zs}*-?[Ss](?:ervice)?\p{Zs}*:`) rxExtensions = regexp.MustCompile(rxCommentPrefix + `[Ee]xtensions\p{Zs}*:`) - rxInfoExtensions = regexp.MustCompile(rxCommentPrefix + `[In]nfo\p{Zs}*[Ee]xtensions:`) - // currently unused: rxExample = regexp.MustCompile(`[Ex]ample\p{Zs}*:\p{Zs}*(.*)$`). ) func Rxf(rxp, ar string) *regexp.Regexp { diff --git a/internal/parsers/sectioned_parser.go b/internal/parsers/sectioned_parser.go deleted file mode 100644 index 63aecc0..0000000 --- a/internal/parsers/sectioned_parser.go +++ /dev/null @@ -1,294 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers -// SPDX-License-Identifier: Apache-2.0 - -package parsers - -import ( - "go/ast" - "strings" - - "github.com/go-openapi/codescan/internal/ifaces" -) - -// SectionedParserOption configures a [SectionedParser] via [NewSectionedParser]. -type SectionedParserOption func(*SectionedParser) - -// WithSetTitle provides a callback that receives the extracted title lines -// after parsing completes. If no title callback is set, the parser does not -// attempt to separate the title from the description. -func WithSetTitle(setTitle func([]string)) SectionedParserOption { - return func(p *SectionedParser) { - p.setTitle = setTitle - } -} - -// WithSetDescription provides a callback that receives the extracted -// description lines after parsing completes. -func WithSetDescription(setDescription func([]string)) SectionedParserOption { - return func(p *SectionedParser) { - p.setDescription = setDescription - } -} - -// WithTaggers registers the [TagParser] instances that this SectionedParser -// will try to match against each line after the header section ends. -func WithTaggers(taggers ...TagParser) SectionedParserOption { - return func(p *SectionedParser) { - p.taggers = taggers - } -} - -// SectionedParser is the core comment-block parser for go-swagger annotations. -// It processes an [ast.CommentGroup] and splits its content into three sections: -// -// 1. Header — free-form text at the top of the comment block, later split -// into a title and description. -// 2. Tags — structured key:value lines (e.g. "minimum: 10", "consumes:", -// "schemes: http, https") recognized by registered [TagParser] instances. -// 3. Annotation — an optional swagger:* annotation line (e.g. "swagger:model -// Foo") handled by a dedicated [ifaces.ValueParser]. -// -// # Parsing algorithm -// -// Parse walks each line of the comment block in order. For every line: -// -// 1. If the line contains a swagger:* annotation: -// - "swagger:ignore" → mark as ignored, stop parsing. -// - If an annotation parser is registered and matches → delegate to it. -// - Otherwise → stop parsing (the annotation belongs to a different parser). -// -// 2. If any registered [TagParser] matches the line: -// - For a single-line tagger: collect the line, then reset the current -// tagger so the next line can match a different tag. -// - For a multi-line tagger: the matching (header) line is consumed but NOT -// collected; all subsequent lines are collected into that tagger until a -// different tagger matches or the block ends. -// -// 3. Otherwise, if no tag has been seen yet, the line is appended to the -// header (free-form text). -// -// After the line walk completes, three things happen: -// -// 1. The header is split into title + description (see [collectScannerTitleDescription]). -// 2. For each matched tagger, its collected lines are cleaned up (comment -// prefixes stripped, unless SkipCleanUp is set) and passed to the -// tagger's Parse method, which writes the extracted value into the target -// spec object. -// 3. Title and description callbacks are invoked. -// -// # Example: Swagger meta block -// -// Given the comment block on a package doc.go: -// -// // Petstore API. -// // -// // The purpose of this application is to provide an API for pets. -// // -// // Schemes: http, https -// // Host: petstore.example.com -// // BasePath: /v2 -// // Version: 1.0.0 -// // License: MIT http://opensource.org/licenses/MIT -// // Contact: John Doe http://john.example.com -// // -// // Consumes: -// // - application/json -// // - application/xml -// // -// // swagger:meta -// -// The SectionedParser (configured by [NewMetaParser]) will: -// -// - Collect "Petstore API." as the title, and the next paragraph as the -// description (header section, lines 1-3). -// - Match "Schemes: http, https" via the single-line "Schemes" tagger. -// - Match "Host: ...", "BasePath: ...", etc. via their respective single-line taggers. -// - Match "Consumes:" via the multi-line "Consumes" tagger, collecting -// "- application/json" and "- application/xml" as its body. -// - Stop at "swagger:meta" (an annotation that doesn't match any registered -// annotation parser, so it terminates the block). -type SectionedParser struct { - header []string - matched map[string]TagParser - annotation ifaces.ValueParser - - seenTag bool - skipHeader bool - setTitle func([]string) - setDescription func([]string) - workedOutTitle bool - taggers []TagParser - currentTagger *TagParser - title []string - ignored bool -} - -// NewSectionedParser creates a SectionedParser configured by the given options. -// -// At minimum, callers should provide [WithSetTitle] and [WithTaggers]: -// -// sp := NewSectionedParser( -// WithSetTitle(func(lines []string) { op.Summary = JoinDropLast(lines) }), -// WithSetDescription(func(lines []string) { op.Description = JoinDropLast(lines) }), -// WithTaggers( -// NewSingleLineTagParser("maximum", NewSetMaximum(builder)), -// NewMultiLineTagParser("consumes", NewConsumesDropEmptyParser(setter), false), -// ), -// ) -func NewSectionedParser(opts ...SectionedParserOption) *SectionedParser { - var p SectionedParser - - for _, apply := range opts { - apply(&p) - } - - return &p -} - -// Title returns the title lines extracted from the header. The title is -// separated from the description by the first blank line, or inferred from -// punctuation and markdown heading prefixes when there is no blank line. -// -// Title triggers lazy title/description splitting on first call. -func (st *SectionedParser) Title() []string { - st.collectTitleDescription() - return st.title -} - -// Description returns the description lines extracted from the header (everything -// after the title). Like [SectionedParser.Title], it triggers lazy splitting on first call. -func (st *SectionedParser) Description() []string { - st.collectTitleDescription() - return st.header -} - -// Ignored reports whether a "swagger:ignore" annotation was encountered. -func (st *SectionedParser) Ignored() bool { - return st.ignored -} - -// Parse processes an [ast.CommentGroup] through the sectioned parsing algorithm -// described in the type documentation. Returns an error if any matched tagger's -// Parse method fails. -func (st *SectionedParser) Parse(doc *ast.CommentGroup) error { - if doc == nil { - return nil - } - -COMMENTS: - for _, c := range doc.List { - for line := range strings.SplitSeq(c.Text, "\n") { - if st.parseLine(line) { - break COMMENTS - } - } - } - - if st.setTitle != nil { - st.setTitle(st.Title()) - } - - if st.setDescription != nil { - st.setDescription(st.Description()) - } - - for _, mt := range st.matched { - if !mt.SkipCleanUp { - mt.Lines = cleanupScannerLines(mt.Lines, rxUncommentHeaders) - } - if err := mt.Parse(mt.Lines); err != nil { - return err - } - } - - return nil -} - -// parseLine processes a single comment line. It returns true when the -// caller should stop processing further comments (a swagger: annotation -// that doesn't belong to this parser, or swagger:ignore). -func (st *SectionedParser) parseLine(line string) (stop bool) { - // Step 1: check for swagger:* annotations. Use the strict - // line-start pattern so prose mentioning `swagger:*` in passing - // (e.g. `// carries swagger:ignore, so ...`) does not terminate - // the block — see rxSwaggerAnnotationStrict godoc and commit - // 09f6748 (the finishing half of "All annotations should start - // their comment line."). - if rxSwaggerAnnotationStrict.MatchString(line) { - if rxIgnoreOverride.MatchString(line) { - st.ignored = true - return true // an explicit ignore terminates this parser - } - if st.annotation == nil || !st.annotation.Matches(line) { - return true // a new swagger: annotation terminates this parser - } - - _ = st.annotation.Parse([]string{line}) - if len(st.header) > 0 { - st.seenTag = true - } - return false - } - - // Step 2: try to match a registered tagger. - var matched bool - for _, tg := range st.taggers { - tagger := tg - if tagger.Matches(line) { - st.seenTag = true - st.currentTagger = &tagger - matched = true - break - } - } - - // Step 3: no tagger active → accumulate as header (free-form text). - if st.currentTagger == nil { - if !st.skipHeader && !st.seenTag { - st.header = append(st.header, line) - } - return false - } - - // For multi-line taggers, the header line (the one that matched) is - // consumed but not collected — only subsequent lines are body. - if st.currentTagger.MultiLine && matched { - return false - } - - // Collect the line into the matched tagger's line buffer. - ts, ok := st.matched[st.currentTagger.Name] - if !ok { - ts = *st.currentTagger - } - ts.Lines = append(ts.Lines, line) - if st.matched == nil { - st.matched = make(map[string]TagParser) - } - st.matched[st.currentTagger.Name] = ts - - // Single-line taggers reset immediately; multi-line taggers stay active. - if !st.currentTagger.MultiLine { - st.currentTagger = nil - } - return false -} - -// collectTitleDescription lazily splits the accumulated header lines into -// title and description. The split is performed at most once. -// -// When setTitle is nil (no title callback registered), the header is only -// cleaned up (comment prefixes removed) but not split — everything stays -// in the description. -func (st *SectionedParser) collectTitleDescription() { - if st.workedOutTitle { - return - } - if st.setTitle == nil { - st.header = cleanupScannerLines(st.header, rxUncommentHeaders) - return - } - - st.workedOutTitle = true - st.title, st.header = collectScannerTitleDescription(st.header) -} diff --git a/internal/parsers/security.go b/internal/parsers/security.go index 7d3f434..3c95d50 100644 --- a/internal/parsers/security.go +++ b/internal/parsers/security.go @@ -8,56 +8,11 @@ import ( "strings" ) -type SetSchemes struct { - set func([]string) - rx *regexp.Regexp -} - -func NewSetSchemes(set func([]string)) *SetSchemes { - return &SetSchemes{ - set: set, - rx: rxSchemes, - } -} - -func (ss *SetSchemes) Matches(line string) bool { - return ss.rx.MatchString(line) -} - -func (ss *SetSchemes) Parse(lines []string) error { - if len(lines) == 0 || (len(lines) == 1 && len(lines[0]) == 0) { - return nil - } - - matches := ss.rx.FindStringSubmatch(lines[0]) - if len(matches) > 1 && len(matches[1]) > 0 { - sch := strings.Split(matches[1], ", ") - - schemes := []string{} - for _, s := range sch { - ts := strings.TrimSpace(s) - if ts != "" { - schemes = append(schemes, ts) - } - } - ss.set(schemes) - } - - return nil -} - type SetSecurity struct { set func([]map[string][]string) rx *regexp.Regexp } -func newSetSecurity(rx *regexp.Regexp, setter func([]map[string][]string)) *SetSecurity { - return &SetSecurity{ - set: setter, - rx: rx, - } -} - func NewSetSecurityScheme(setter func([]map[string][]string)) *SetSecurity { return &SetSecurity{ set: setter, diff --git a/internal/parsers/security_test.go b/internal/parsers/security_test.go deleted file mode 100644 index 6d88115..0000000 --- a/internal/parsers/security_test.go +++ /dev/null @@ -1,84 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers -// SPDX-License-Identifier: Apache-2.0 - -package parsers - -import ( - "testing" - - "github.com/go-openapi/testify/v2/assert" - "github.com/go-openapi/testify/v2/require" -) - -func TestSetSchemes(t *testing.T) { - t.Parallel() - - t.Run("single scheme", func(t *testing.T) { - var got []string - ss := NewSetSchemes(func(v []string) { got = v }) - assert.TrueT(t, ss.Matches("schemes: http")) - require.NoError(t, ss.Parse([]string{"schemes: http"})) - assert.Equal(t, []string{"http"}, got) - }) - - t.Run("multiple schemes", func(t *testing.T) { - var got []string - ss := NewSetSchemes(func(v []string) { got = v }) - require.NoError(t, ss.Parse([]string{"schemes: http, https"})) - assert.Equal(t, []string{"http", "https"}, got) - }) - - t.Run("wss", func(t *testing.T) { - var got []string - ss := NewSetSchemes(func(v []string) { got = v }) - require.NoError(t, ss.Parse([]string{"Schemes: ws, wss"})) - assert.Equal(t, []string{"ws", "wss"}, got) - }) - - t.Run("empty", func(t *testing.T) { - var got []string - ss := NewSetSchemes(func(v []string) { got = v }) - require.NoError(t, ss.Parse(nil)) - require.NoError(t, ss.Parse([]string{})) - require.NoError(t, ss.Parse([]string{""})) - assert.Nil(t, got) - }) - - t.Run("no match", func(t *testing.T) { - ss := NewSetSchemes(nil) - assert.FalseT(t, ss.Matches("something else")) - }) -} - -func TestSetSecurity(t *testing.T) { - t.Parallel() - - t.Run("with scopes", func(t *testing.T) { - var got []map[string][]string - ss := NewSetSecurityScheme(func(v []map[string][]string) { got = v }) - assert.TrueT(t, ss.Matches("security:")) - require.NoError(t, ss.Parse([]string{ - "api_key:", - "oauth2: read:pets, write:pets", - })) - require.Len(t, got, 2) - assert.Equal(t, map[string][]string{"api_key": {}}, got[0]) - assert.Equal(t, map[string][]string{"oauth2": {"read:pets", "write:pets"}}, got[1]) - }) - - t.Run("empty", func(t *testing.T) { - var got []map[string][]string - ss := NewSetSecurityScheme(func(v []map[string][]string) { got = v }) - require.NoError(t, ss.Parse(nil)) - require.NoError(t, ss.Parse([]string{})) - require.NoError(t, ss.Parse([]string{""})) - assert.Nil(t, got) - }) - - t.Run("no colon in line", func(t *testing.T) { - var got []map[string][]string - ss := NewSetSecurityScheme(func(v []map[string][]string) { got = v }) - require.NoError(t, ss.Parse([]string{"no-colon-here"})) - assert.Nil(t, got) // line without colon is skipped - }) -} diff --git a/internal/parsers/tag_parsers.go b/internal/parsers/tag_parsers.go deleted file mode 100644 index 16d8ad7..0000000 --- a/internal/parsers/tag_parsers.go +++ /dev/null @@ -1,86 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers -// SPDX-License-Identifier: Apache-2.0 - -package parsers - -import "github.com/go-openapi/codescan/internal/ifaces" - -// TagParser pairs a named tag with a [ifaces.ValueParser] that recognizes and -// extracts its value from comment lines. -// -// A TagParser operates in one of two modes: -// -// - Single-line: the tag matches exactly one line (e.g. "maximum: 10"). -// The [SectionedParser] resets its current tagger after every single-line -// match, so the next line is free to match a different tagger. -// -// - Multi-line: the tag's first matching line is a header (e.g. "consumes:") -// and all subsequent lines are collected as its body until a different -// tagger matches or the comment block ends. The header line itself is NOT -// included in Lines — only the body lines that follow it. -// -// SkipCleanUp controls whether the [SectionedParser] strips comment prefixes -// (// , *, etc.) from the collected Lines before calling Parse. YAML-based -// taggers set this to true because they need the original indentation intact. -// -// Lines is populated by the [SectionedParser] during its scan; after the scan -// completes, Parse is called with those lines to extract the value. -type TagParser struct { - Name string - MultiLine bool - SkipCleanUp bool - Lines []string - Parser ifaces.ValueParser -} - -// NewMultiLineTagParser creates a TagParser that collects all lines following -// the matching header until a different tag or annotation is encountered. -// -// Example usage (from [NewMetaParser]): -// -// NewMultiLineTagParser("TOS", -// newMultilineDropEmptyParser(rxTOS, metaTOSSetter(info)), -// false, // clean up comment prefixes before parsing -// ) -// -// This creates a tagger that recognizes "Terms of Service:" and collects every -// subsequent line into the TOS field, stripping comment prefixes. -func NewMultiLineTagParser(name string, parser ifaces.ValueParser, skipCleanUp bool) TagParser { - return TagParser{ - Name: name, - MultiLine: true, - SkipCleanUp: skipCleanUp, - Parser: parser, - } -} - -// NewSingleLineTagParser creates a TagParser that matches and parses exactly -// one line. After the match, the [SectionedParser] resets its current tagger -// so subsequent lines can match other taggers. -// -// Example usage (from [NewMetaParser]): -// -// NewSingleLineTagParser("Version", -// &setMetaSingle{Spec: swspec, Rx: rxVersion, Set: setInfoVersion}, -// ) -// -// This creates a tagger that recognizes "Version: 1.0.0" and writes the -// captured value into swspec.Info.Version. -func NewSingleLineTagParser(name string, parser ifaces.ValueParser) TagParser { - return TagParser{ - Name: name, - MultiLine: false, - SkipCleanUp: false, - Parser: parser, - } -} - -// Matches delegates to the underlying Parser. -func (st *TagParser) Matches(line string) bool { - return st.Parser.Matches(line) -} - -// Parse delegates to the underlying Parser. -func (st *TagParser) Parse(lines []string) error { - return st.Parser.Parse(lines) -} diff --git a/internal/scanner/index.go b/internal/scanner/index.go index 4a68828..05f03a8 100644 --- a/internal/scanner/index.go +++ b/internal/scanner/index.go @@ -74,7 +74,7 @@ type TypeIndex struct { AllPackages map[string]*packages.Package Models map[*ast.Ident]*EntityDecl ExtraModels map[*ast.Ident]*EntityDecl - Meta []parsers.MetaSection + Meta []*ast.CommentGroup Routes []parsers.ParsedPathContent Operations []parsers.ParsedPathContent Parameters []*EntityDecl @@ -145,7 +145,7 @@ func (a *TypeIndex) processFile(pkg *packages.Package, file *ast.File) error { } if n&metaNode != 0 { - a.Meta = append(a.Meta, parsers.MetaSection{Comments: file.Doc}) + a.Meta = append(a.Meta, file.Doc) } if n&operationNode != 0 { diff --git a/internal/scanner/scan_context.go b/internal/scanner/scan_context.go index 1bc32d6..925cbc8 100644 --- a/internal/scanner/scan_context.go +++ b/internal/scanner/scan_context.go @@ -115,7 +115,7 @@ func (s *ScanCtx) Debug() bool { return s.debug } -func (s *ScanCtx) Meta() iter.Seq[parsers.MetaSection] { +func (s *ScanCtx) Meta() iter.Seq[*ast.CommentGroup] { if s.app == nil { return nil } From 3a980600b42110baa511d886a91172bef87e4cff Mon Sep 17 00:00:00 2001 From: Frederic BIDON Date: Wed, 22 Apr 2026 19:42:32 +0200 Subject: [PATCH 45/46] =?UTF-8?q?refactor(parsers):=20P6.2=20=E2=80=94=20c?= =?UTF-8?q?onsolidate=20shared=20bridge=20helpers,=20delete=20legacy=20sca?= =?UTF-8?q?ffolding?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Post-migration tidy pass. Replaces three legacy parsers with a narrow shared-helpers package and deletes the now-dead legacy scaffolding. New: internal/parsers/helpers/body.go YAMLListBody, SecurityRequirements, SchemesList, DropEmpty. These are the small body-parsers the meta and routes bridges both needed (consumes/produces YAML list, security `name: scope` lines, comma-split schemes, blank-line filtering). Before this commit each bridge either reached into internal/parsers/ for SetSecurity / NewConsumesDropEmptyParser / NewProducesDropEmptyParser, or duplicated the logic inline in meta_bridge.go. Inlining + dedup: - Routes bridge now calls helpers.YAMLListBody / helpers.SecurityRequirements / helpers.SchemesList instead of parsers.NewConsumesDropEmptyParser / parsers.NewProducesDropEmptyParser / parsers.NewSetSecurityScheme. - applyRouteSchemes removed — dispatch inline through helpers.SchemesList. - Meta bridge's duplicated yamlListBody / parseSecurityRequirements / parseSchemesLine / dropEmpty deleted; calls land on the helpers package. Deleted from internal/parsers/: - security.go (NewSetSecurityScheme + SetSecurity type — 58 L) - parsers.go (ConsumesDropEmptyParser, ProducesDropEmptyParser, multilineYAMLListParser — 100 L) - yaml_spec_parser.go (YAMLSpecScanner + Parse/UnmarshalSpec, the dead bulk left after P6.1 — 211 L) - yaml_spec_parser_test.go (~250 L of tests for the above) - Regex survivors no longer referenced: rxBeginYAMLSpec, rxUncommentNoDash, rxUncommentYAML, rxConsumes, rxProduces, rxSecuritySchemes. - Routes setters no longer called: opConsumesSetter, opProducesSetter, opSecurityDefsSetter. Split out: - RemoveIndent (operations bridge YAML indent normaliser) moved to a new indent.go alongside the now-renamed regex consts. - cleanupScannerLines moved to parsers_helpers.go (it's still used by extensions.go's SetOpExtensions and by CollectScannerTitleDescription). What survives in internal/parsers/ is now strictly functional: - scanner classification (matchers.go, regexprs.go, parsed_path_content.go) — runs before grammar parsing. - route body parsers (extensions.go, responses.go, route_params.go) — domain-heavy, still worth keeping isolated rather than inlining into the routes bridge. - yaml_parser.go (NewYAMLParser) — schema bridge's YAML-fenced extensions path. - indent.go (RemoveIndent) — operations bridge YAML body. - enum.go (ParseEnum, ParseValueFromSchema, GetEnumBasicLitValue, GetEnumDesc) — scheme-aware value helpers consumed by every validation bridge. - lines.go (JoinDropLast, Setter) — string utilities. - parsers_helpers.go (CollectScannerTitleDescription, cleanupScannerLines) — shared prose-split + line cleaner. - errors.go (ErrParser sentinel). Post-P6.2 size: 3650 non-test lines in internal/parsers/ (+125 in internal/parsers/helpers/), down from 4229 after P6.1 — ~580 lines deleted with zero behavior change. Tests and TestParity fixtures all pass. Lint clean. Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Frederic BIDON --- internal/builders/routes/bridge.go | 42 +-- internal/builders/routes/bridge_test.go | 14 +- internal/builders/routes/setters.go | 12 - internal/builders/spec/meta_bridge.go | 95 +---- internal/parsers/helpers/body.go | 125 +++++++ internal/parsers/indent.go | 42 +++ internal/parsers/parsers.go | 100 ------ internal/parsers/parsers_helpers.go | 33 ++ internal/parsers/regexprs.go | 25 +- internal/parsers/route_params.go | 3 + internal/parsers/security.go | 58 ---- internal/parsers/yaml_spec_parser.go | 211 ------------ internal/parsers/yaml_spec_parser_test.go | 402 ---------------------- 13 files changed, 238 insertions(+), 924 deletions(-) create mode 100644 internal/parsers/helpers/body.go create mode 100644 internal/parsers/indent.go delete mode 100644 internal/parsers/parsers.go delete mode 100644 internal/parsers/security.go delete mode 100644 internal/parsers/yaml_spec_parser.go delete mode 100644 internal/parsers/yaml_spec_parser_test.go diff --git a/internal/builders/routes/bridge.go b/internal/builders/routes/bridge.go index d16428c..b7fd0d6 100644 --- a/internal/builders/routes/bridge.go +++ b/internal/builders/routes/bridge.go @@ -4,10 +4,9 @@ package routes import ( - "strings" - "github.com/go-openapi/codescan/internal/parsers" "github.com/go-openapi/codescan/internal/parsers/grammar" + "github.com/go-openapi/codescan/internal/parsers/helpers" oaispec "github.com/go-openapi/spec" ) @@ -52,25 +51,31 @@ const ( kwExtensions = "extensions" ) -// dispatchRouteKeyword routes one grammar Property to the legacy -// body-parser that already knows how to parse that keyword's body -// shape. The body-parsers' Parse(lines []string) signature accepts -// grammar's Property.Body directly — comment markers are already -// stripped, YAML list markers survive, etc. +// dispatchRouteKeyword routes one grammar Property to the matching +// body parser. Simple body shapes (schemes comma-list, consumes / +// produces YAML-list, security name:scope lines) use shared +// helpers in internal/parsers/helpers. The three domain-heavy +// body parsers (parameters, responses, extensions) still live in +// internal/parsers/ — their v1-parity logic (e.g. `+ name:` param +// blocks, `200: someResponse` response mapping, nested YAML +// extension maps) is substantial enough to warrant dedicated +// files. func (r *Builder) dispatchRouteKeyword(p grammar.Property, op *oaispec.Operation) error { switch p.Keyword.Name { case kwSchemes: - r.applyRouteSchemes(p, op) + if v := helpers.SchemesList(p.Value); v != nil { + op.Schemes = v + } case kwDeprecated: if p.Typed.Type == grammar.ValueBoolean { op.Deprecated = p.Typed.Boolean } case kwConsumes: - return parsers.NewConsumesDropEmptyParser(opConsumesSetter(op)).Parse(p.Body) + op.Consumes = helpers.YAMLListBody(p.Body) case kwProduces: - return parsers.NewProducesDropEmptyParser(opProducesSetter(op)).Parse(p.Body) + op.Produces = helpers.YAMLListBody(p.Body) case kwSecurity: - return parsers.NewSetSecurityScheme(opSecurityDefsSetter(op)).Parse(p.Body) + op.Security = helpers.SecurityRequirements(p.Body) case kwParameters: return parsers.NewSetParams(r.parameters, opParamSetter(op)).Parse(p.Body) case kwResponses: @@ -80,18 +85,3 @@ func (r *Builder) dispatchRouteKeyword(p grammar.Property, op *oaispec.Operation } return nil } - -// applyRouteSchemes parses `schemes: http, https, ws, wss` — v1 uses -// a regex capture that isolates the post-colon comma-list; the -// grammar already hands us the trimmed value directly. -func (r *Builder) applyRouteSchemes(p grammar.Property, op *oaispec.Operation) { - schemes := make([]string, 0) - for s := range strings.SplitSeq(p.Value, ",") { - if ts := strings.TrimSpace(s); ts != "" { - schemes = append(schemes, ts) - } - } - if len(schemes) > 0 { - op.Schemes = schemes - } -} diff --git a/internal/builders/routes/bridge_test.go b/internal/builders/routes/bridge_test.go index 656566e..ddaef15 100644 --- a/internal/builders/routes/bridge_test.go +++ b/internal/builders/routes/bridge_test.go @@ -19,20 +19,16 @@ func parseRouteBody(t *testing.T, body string) grammar.Block { return p.ParseAs(grammar.AnnRoute, body, token.Position{Line: 1}) } -func TestApplyRouteSchemes(t *testing.T) { +func TestDispatchRouteSchemes(t *testing.T) { var b Builder op := &oaispec.Operation{} block := parseRouteBody(t, "schemes: http, https, ws") - var prop grammar.Property - for p := range block.Properties() { - prop = p - break - } - if prop.Keyword.Name != "schemes" { - t.Fatalf("expected schemes property, got %q", prop.Keyword.Name) + for prop := range block.Properties() { + if err := b.dispatchRouteKeyword(prop, op); err != nil { + t.Fatalf("dispatch: %v", err) + } } - b.applyRouteSchemes(prop, op) want := []string{"http", "https", "ws"} if len(op.Schemes) != len(want) { diff --git a/internal/builders/routes/setters.go b/internal/builders/routes/setters.go index 46be465..9609b01 100644 --- a/internal/builders/routes/setters.go +++ b/internal/builders/routes/setters.go @@ -5,18 +5,6 @@ package routes import "github.com/go-openapi/spec" -func opConsumesSetter(op *spec.Operation) func([]string) { - return func(consumes []string) { op.Consumes = consumes } -} - -func opProducesSetter(op *spec.Operation) func([]string) { - return func(produces []string) { op.Produces = produces } -} - -func opSecurityDefsSetter(op *spec.Operation) func([]map[string][]string) { - return func(securityDefs []map[string][]string) { op.Security = securityDefs } -} - func opResponsesSetter(op *spec.Operation) func(*spec.Response, map[int]spec.Response) { return func(def *spec.Response, scr map[int]spec.Response) { if op.Responses == nil { diff --git a/internal/builders/spec/meta_bridge.go b/internal/builders/spec/meta_bridge.go index 74b934e..9f3845b 100644 --- a/internal/builders/spec/meta_bridge.go +++ b/internal/builders/spec/meta_bridge.go @@ -13,7 +13,7 @@ import ( "github.com/go-openapi/codescan/internal/parsers" "github.com/go-openapi/codescan/internal/parsers/grammar" - yamlparser "github.com/go-openapi/codescan/internal/parsers/yaml" + "github.com/go-openapi/codescan/internal/parsers/helpers" "github.com/go-openapi/loads/fmts" "github.com/go-openapi/spec" yaml "go.yaml.in/yaml/v3" @@ -71,15 +71,15 @@ func dispatchMetaKeyword(p grammar.Property, swspec *spec.Swagger) error { func dispatchMetaSimple(p grammar.Property, swspec *spec.Swagger) bool { switch p.Keyword.Name { case "tos": - swspec.Info.TermsOfService = parsers.JoinDropLast(dropEmpty(p.Body)) + swspec.Info.TermsOfService = parsers.JoinDropLast(helpers.DropEmpty(p.Body)) case "consumes": - swspec.Consumes = yamlListBody(p.Body) + swspec.Consumes = helpers.YAMLListBody(p.Body) case "produces": - swspec.Produces = yamlListBody(p.Body) + swspec.Produces = helpers.YAMLListBody(p.Body) case "schemes": - swspec.Schemes = parseSchemesLine(p.Value) + swspec.Schemes = helpers.SchemesList(p.Value) case "security": - swspec.Security = parseSecurityRequirements(p.Body) + swspec.Security = helpers.SecurityRequirements(p.Body) case "version": swspec.Info.Version = strings.TrimSpace(p.Value) case "host": @@ -153,87 +153,6 @@ func applyMetaExtensions(data []byte, swspec *spec.Swagger) error { return nil } -// yamlListBody parses a block body as a YAML list and returns its -// stringified items. Mirrors parsers.multilineYAMLListParser (the -// Q4 strict-list contract for consumes / produces): a non-list body -// is silently dropped — legacy emits a WARNING log but does not -// error. Empty bodies return nil. -func yamlListBody(body []string) []string { - cleaned := dropEmpty(body) - if len(cleaned) == 0 { - return nil - } - parsed, err := yamlparser.Parse(strings.Join(cleaned, "\n")) - if err != nil { - return nil - } - list, ok := parsed.([]any) - if !ok { - return nil - } - out := make([]string, 0, len(list)) - for _, item := range list { - out = append(out, fmt.Sprintf("%v", item)) - } - return out -} - -// dropEmpty removes whitespace-only entries from a body slice. -func dropEmpty(lines []string) []string { - out := make([]string, 0, len(lines)) - for _, l := range lines { - if strings.TrimSpace(l) != "" { - out = append(out, l) - } - } - return out -} - -// parseSchemesLine mirrors parsers.SetSchemes.Parse — comma-split -// the value, trim each entry, drop empties. -func parseSchemesLine(value string) []string { - out := make([]string, 0) - for s := range strings.SplitSeq(value, ",") { - if ts := strings.TrimSpace(s); ts != "" { - out = append(out, ts) - } - } - if len(out) == 0 { - return nil - } - return out -} - -// parseSecurityRequirements handles a Security block body. Legacy -// (newSetSecurity) parses lines of the form `name: scope1, scope2` -// (with `name:` as an empty-scope entry) into []map[string][]string. -func parseSecurityRequirements(body []string) []map[string][]string { - cleaned := dropEmpty(body) - if len(cleaned) == 0 { - return nil - } - out := make([]map[string][]string, 0, len(cleaned)) - for _, raw := range cleaned { - line := strings.TrimSpace(raw) - name, rest, found := strings.Cut(line, ":") - if !found { - continue - } - name = strings.TrimSpace(name) - rest = strings.TrimSpace(rest) - scopes := []string{} - if rest != "" { - for s := range strings.SplitSeq(rest, ",") { - if ts := strings.TrimSpace(s); ts != "" { - scopes = append(scopes, ts) - } - } - } - out = append(out, map[string][]string{name: scopes}) - } - return out -} - // unmarshalYAMLBody mirrors parsers.YAMLParser.Parse: the block // body (`---` fence contents, preserving indent) is yaml-unmarshal'd, // converted to JSON via fmts.YAMLToJSON, and handed to the setter. @@ -261,7 +180,7 @@ func unmarshalYAMLBody(body []string, setter func([]byte) error) error { // removeYAMLIndent mirrors parsers.removeYamlIndent — strip the // common leading-indent detected on the first non-empty line. func removeYAMLIndent(body []string) []string { - cleaned := dropEmpty(body) + cleaned := helpers.DropEmpty(body) if len(cleaned) == 0 { return nil } diff --git a/internal/parsers/helpers/body.go b/internal/parsers/helpers/body.go new file mode 100644 index 0000000..9d009d8 --- /dev/null +++ b/internal/parsers/helpers/body.go @@ -0,0 +1,125 @@ +// SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers +// SPDX-License-Identifier: Apache-2.0 + +// Package helpers is a grab-bag of small utilities shared by the +// grammar bridge builders (meta / operations / routes / schema / +// parameters / responses). Nothing in this package parses +// comments directly — it operates on pre-extracted line slices, +// raw values, or grammar.Block.ProseLines() output. Anything that +// reads comment groups lives in the scanner's classification +// layer or in internal/parsers/grammar/. +// +// Sub-parsers for richer body shapes (internal/parsers/yaml/, +// internal/parsers/enum/) are siblings to this package, not +// children — their size justifies a dedicated package per the +// sub-parser pattern. +package helpers + +import ( + "fmt" + "regexp" + "strings" + + yamlparser "github.com/go-openapi/codescan/internal/parsers/yaml" +) + +// rxLineLeader matches the leading comment noise the legacy +// multilineYAMLListParser stripped (rxUncommentNoDash equivalent): +// whitespace / tabs / slashes / asterisks, then an optional pipe. +// Preserves `-` so YAML list markers survive. +var rxLineLeader = regexp.MustCompile(`^[\p{Zs}\t/\*]*\|?`) + +// YAMLListBody parses a meta/route block body as a strict YAML list +// and returns its stringified items. Mirrors the Q4 contract for +// `consumes:` / `produces:` bodies: leading comment/indent noise is +// stripped from each line (preserving `-` list markers), blank +// lines are dropped; a non-list body (scalar, map, parse error) is +// silently dropped — legacy code emits a WARNING log. Empty bodies +// return nil. +func YAMLListBody(body []string) []string { + cleaned := make([]string, 0, len(body)) + for _, line := range body { + stripped := rxLineLeader.ReplaceAllString(line, "") + if strings.TrimSpace(stripped) == "" { + continue + } + cleaned = append(cleaned, stripped) + } + if len(cleaned) == 0 { + return nil + } + parsed, err := yamlparser.Parse(strings.Join(cleaned, "\n")) + if err != nil { + return nil + } + list, ok := parsed.([]any) + if !ok { + return nil + } + out := make([]string, 0, len(list)) + for _, item := range list { + out = append(out, fmt.Sprintf("%v", item)) + } + return out +} + +// SecurityRequirements parses a Security: block body. Each line is +// `name: scope1, scope2` (empty scope list permitted when the colon +// has no suffix). Values are comma-split, trimmed, and any +// secondary whitespace inside a scope truncates to the first word +// — matching v1's SetSecurity.Parse / NewSetSecurityScheme +// semantics (via strings.SplitAfter(" ")[0]). +func SecurityRequirements(body []string) []map[string][]string { + const kvParts = 2 + var result []map[string][]string + for _, raw := range body { + kv := strings.SplitN(raw, ":", kvParts) + if len(kv) < kvParts { + continue + } + key := strings.TrimSpace(kv[0]) + scopes := []string{} + for scope := range strings.SplitSeq(kv[1], ",") { + tr := strings.TrimSpace(scope) + if tr == "" { + continue + } + // V1 quirk: a scope containing whitespace is truncated + // at the first word. Preserved for parity; safe on the + // single-word scopes every fixture uses today. + tr = strings.SplitAfter(tr, " ")[0] + scopes = append(scopes, strings.TrimSpace(tr)) + } + result = append(result, map[string][]string{key: scopes}) + } + return result +} + +// SchemesList parses a `Schemes:` value — comma-split, trim each +// entry, drop empties. Returns nil when the input parses to zero +// entries. +func SchemesList(value string) []string { + out := make([]string, 0) + for s := range strings.SplitSeq(value, ",") { + if ts := strings.TrimSpace(s); ts != "" { + out = append(out, ts) + } + } + if len(out) == 0 { + return nil + } + return out +} + +// DropEmpty filters out whitespace-only entries from a line slice. +// Used by the meta and routes bridges before handing body lines to +// YAML or extension parsers that choke on blank separators. +func DropEmpty(lines []string) []string { + out := make([]string, 0, len(lines)) + for _, l := range lines { + if strings.TrimSpace(l) != "" { + out = append(out, l) + } + } + return out +} diff --git a/internal/parsers/indent.go b/internal/parsers/indent.go new file mode 100644 index 0000000..d620483 --- /dev/null +++ b/internal/parsers/indent.go @@ -0,0 +1,42 @@ +// SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers +// SPDX-License-Identifier: Apache-2.0 + +package parsers + +import "strings" + +// RemoveIndent normalises the common leading indentation on a YAML +// body: it strips the first line's indent from every line and +// converts remaining tab indentation to two-space equivalents. The +// operations bridge calls this on grammar-isolated YAML fence +// bodies so tab-indented godoc-style YAML (e.g., the go119 fixture) +// parses correctly. +func RemoveIndent(spec []string) []string { + if len(spec) == 0 { + return spec + } + + loc := rxIndent.FindStringIndex(spec[0]) + if len(loc) < 2 || loc[1] <= 1 { + return spec + } + + s := make([]string, len(spec)) + copy(s, spec) + + for i := range s { + if len(s[i]) < loc[1] { + continue + } + + s[i] = spec[i][loc[1]-1:] + start := rxNotIndent.FindStringIndex(s[i]) + if len(start) < 2 || start[1] == 0 { + continue + } + + s[i] = strings.Replace(s[i], "\t", " ", start[1]) + } + + return s +} diff --git a/internal/parsers/parsers.go b/internal/parsers/parsers.go deleted file mode 100644 index ff08710..0000000 --- a/internal/parsers/parsers.go +++ /dev/null @@ -1,100 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers -// SPDX-License-Identifier: Apache-2.0 - -package parsers - -import ( - "fmt" - "log" - "regexp" - "strings" - - "github.com/go-openapi/codescan/internal/parsers/yaml" -) - -const ( - // kvParts is the number of parts when splitting key:value pairs. - kvParts = 2 -) - -// Many thanks go to https://github.com/yvasiyarov/swagger -// this is loosely based on that implementation but for swagger 2.0 - -type ConsumesDropEmptyParser struct { - *multilineYAMLListParser -} - -func NewConsumesDropEmptyParser(set func([]string)) *ConsumesDropEmptyParser { - return &ConsumesDropEmptyParser{ - multilineYAMLListParser: &multilineYAMLListParser{ - set: set, - rx: rxConsumes, - }, - } -} - -type ProducesDropEmptyParser struct { - *multilineYAMLListParser -} - -func NewProducesDropEmptyParser(set func([]string)) *ProducesDropEmptyParser { - return &ProducesDropEmptyParser{ - multilineYAMLListParser: &multilineYAMLListParser{ - set: set, - rx: rxProduces, - }, - } -} - -// multilineYAMLListParser is the Q4 replacement for -// multilineDropEmptyParser on list-valued block bodies -// (`consumes:` / `produces:` in meta + operation scope). The -// body is captured raw — its list-item markers (`- value`) -// survive the preprocessor — and interpreted by -// internal/parsers/yaml/ as a YAML list. Strict list: non-list -// bodies emit a warning and produce no values. -// -// See `.claude/plans/workshops/w2-enum.md` §2.6 (quirk 4), -// `grammar-parser-architecture.md` §3.3 (sub-parser pattern), -// and `.claude/plans/forthcoming-features.md` §5.2 (P7.7 doc -// follow-up). -type multilineYAMLListParser struct { - set func([]string) - rx *regexp.Regexp -} - -func (m *multilineYAMLListParser) Matches(line string) bool { - return m.rx.MatchString(line) -} - -func (m *multilineYAMLListParser) Parse(lines []string) error { - // Strip comment noise but preserve `-` (the YAML list marker). - // Matches rxUncommentHeaders minus the dash. - cleaned := cleanupScannerLines(lines, rxUncommentNoDash) - - // Drop drop-empty to avoid blank spacer lines between items - // confusing the YAML parser. - cleaned = removeEmptyLines(cleaned) - if len(cleaned) == 0 { - return nil - } - - body := strings.Join(cleaned, "\n") - parsed, err := yaml.Parse(body) - if err != nil { - log.Printf("WARNING: parse.invalid-block-body: %v", err) - return nil - } - list, ok := parsed.([]any) - if !ok { - log.Printf("WARNING: parse.invalid-block-body: expected YAML list, got %T", parsed) - return nil - } - - out := make([]string, 0, len(list)) - for _, item := range list { - out = append(out, fmt.Sprintf("%v", item)) - } - m.set(out) - return nil -} diff --git a/internal/parsers/parsers_helpers.go b/internal/parsers/parsers_helpers.go index 7a9981c..1949b39 100644 --- a/internal/parsers/parsers_helpers.go +++ b/internal/parsers/parsers_helpers.go @@ -4,9 +4,42 @@ package parsers import ( + "regexp" "strings" ) +// cleanupScannerLines strips comment-marker noise (matching ur) from +// each line and trims leading/trailing all-empty runs. Used by the +// legacy body parsers for consumes/produces (rxUncommentNoDash) and +// extensions (rxUncommentHeaders), plus the grammar-side prose +// splitter in CollectScannerTitleDescription. +func cleanupScannerLines(lines []string, ur *regexp.Regexp) []string { + if len(lines) == 0 { + return lines + } + + seenLine := -1 + var lastContent int + + uncommented := make([]string, 0, len(lines)) + for i, v := range lines { + str := ur.ReplaceAllString(v, "") + uncommented = append(uncommented, str) + if str != "" { + if seenLine < 0 { + seenLine = i + } + lastContent = i + } + } + + if seenLine == -1 { + return nil + } + + return uncommented[seenLine : lastContent+1] +} + // CollectScannerTitleDescription splits header lines (free-form prose // appearing before the first recognized tag in a comment block) into // title and description slices, following the legacy SectionedParser diff --git a/internal/parsers/regexprs.go b/internal/parsers/regexprs.go index b7565af..e4cbefb 100644 --- a/internal/parsers/regexprs.go +++ b/internal/parsers/regexprs.go @@ -100,16 +100,8 @@ var ( rxOpTags + ")?\\p{Zs}+" + rxOpID + "\\p{Zs}*$") - rxBeginYAMLSpec = regexp.MustCompile(rxCommentPrefix + `---\p{Zs}*$`) rxUncommentHeaders = regexp.MustCompile(`^[\p{Zs}\t/\*-]*\|?`) - // rxUncommentNoDash mirrors rxUncommentHeaders but does NOT strip - // leading `-`. Used for multi-line list-bodied keywords - // (`consumes:` / `produces:`) where `-` is a YAML list marker - // the body's sub-parser needs to see. See Q4 fix - // (.claude/plans/workshops/w2-enum.md §2.6). - rxUncommentNoDash = regexp.MustCompile(`^[\p{Zs}\t/\*]*\|?`) - rxUncommentYAML = regexp.MustCompile(`^[\p{Zs}\t]*/*`) - rxOperation = regexp.MustCompile( + rxOperation = regexp.MustCompile( rxCommentPrefix + "swagger:operation\\p{Zs}*" + rxMethod + @@ -126,15 +118,12 @@ var ( rxTitleStart = regexp.MustCompile(`^[#]+\p{Zs}+`) rxAllowedExtensions = regexp.MustCompile(`^[Xx]-`) - rxIn = regexp.MustCompile(rxCommentPrefix + `[Ii]n\p{Zs}*:\p{Zs}*(query|path|header|body|formData)(?:\.)?$`) - rxRequired = regexp.MustCompile(rxCommentPrefix + `[Rr]equired\p{Zs}*:\p{Zs}*(true|false)(?:\.)?$`) - rxConsumes = regexp.MustCompile(rxCommentPrefix + `[Cc]onsumes\p{Zs}*:`) - rxProduces = regexp.MustCompile(rxCommentPrefix + `[Pp]roduces\p{Zs}*:`) - rxSecuritySchemes = regexp.MustCompile(rxCommentPrefix + `[Ss]ecurity\p{Zs}*:`) - rxSecurity = regexp.MustCompile(rxCommentPrefix + `[Ss]ecurity\p{Zs}*[Dd]efinitions:`) - rxResponses = regexp.MustCompile(rxCommentPrefix + `[Rr]esponses\p{Zs}*:`) - rxParameters = regexp.MustCompile(rxCommentPrefix + `[Pp]arameters\p{Zs}*:`) - rxExtensions = regexp.MustCompile(rxCommentPrefix + `[Ee]xtensions\p{Zs}*:`) + rxIn = regexp.MustCompile(rxCommentPrefix + `[Ii]n\p{Zs}*:\p{Zs}*(query|path|header|body|formData)(?:\.)?$`) + rxRequired = regexp.MustCompile(rxCommentPrefix + `[Rr]equired\p{Zs}*:\p{Zs}*(true|false)(?:\.)?$`) + rxSecurity = regexp.MustCompile(rxCommentPrefix + `[Ss]ecurity\p{Zs}*[Dd]efinitions:`) + rxResponses = regexp.MustCompile(rxCommentPrefix + `[Rr]esponses\p{Zs}*:`) + rxParameters = regexp.MustCompile(rxCommentPrefix + `[Pp]arameters\p{Zs}*:`) + rxExtensions = regexp.MustCompile(rxCommentPrefix + `[Ee]xtensions\p{Zs}*:`) ) func Rxf(rxp, ar string) *regexp.Regexp { diff --git a/internal/parsers/route_params.go b/internal/parsers/route_params.go index 4354ab8..2460d2f 100644 --- a/internal/parsers/route_params.go +++ b/internal/parsers/route_params.go @@ -13,6 +13,9 @@ import ( ) const ( + // kvParts is the number of parts when splitting key:value pairs. + kvParts = 2 + // paramDescriptionKey indicates the tag used to define a parameter description in swagger:route. paramDescriptionKey = "description" // paramNameKey indicates the tag used to define a parameter name in swagger:route. diff --git a/internal/parsers/security.go b/internal/parsers/security.go deleted file mode 100644 index 3c95d50..0000000 --- a/internal/parsers/security.go +++ /dev/null @@ -1,58 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers -// SPDX-License-Identifier: Apache-2.0 - -package parsers - -import ( - "regexp" - "strings" -) - -type SetSecurity struct { - set func([]map[string][]string) - rx *regexp.Regexp -} - -func NewSetSecurityScheme(setter func([]map[string][]string)) *SetSecurity { - return &SetSecurity{ - set: setter, - rx: rxSecuritySchemes, - } -} - -func (ss *SetSecurity) Matches(line string) bool { - return ss.rx.MatchString(line) -} - -func (ss *SetSecurity) Parse(lines []string) error { - if len(lines) == 0 || (len(lines) == 1 && len(lines[0]) == 0) { - return nil - } - - var result []map[string][]string - const kvParts = 2 - for _, line := range lines { - kv := strings.SplitN(line, ":", kvParts) - scopes := []string{} - var key string - - if len(kv) > 1 { - scs := strings.SplitSeq(kv[1], ",") - for scope := range scs { - tr := strings.TrimSpace(scope) - if tr != "" { - tr = strings.SplitAfter(tr, " ")[0] - scopes = append(scopes, strings.TrimSpace(tr)) - } - } - - key = strings.TrimSpace(kv[0]) - - result = append(result, map[string][]string{key: scopes}) - } - } - - ss.set(result) - - return nil -} diff --git a/internal/parsers/yaml_spec_parser.go b/internal/parsers/yaml_spec_parser.go deleted file mode 100644 index 083f861..0000000 --- a/internal/parsers/yaml_spec_parser.go +++ /dev/null @@ -1,211 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers -// SPDX-License-Identifier: Apache-2.0 - -package parsers - -import ( - "encoding/json" - "fmt" - "go/ast" - "regexp" - "strings" - - "github.com/go-openapi/loads/fmts" - "go.yaml.in/yaml/v3" -) - -// YAMLSpecScanner aggregates lines in header until it sees `---`, -// the beginning of a YAML spec. -type YAMLSpecScanner struct { - header []string - yamlSpec []string - setTitle func([]string) - setDescription func([]string) - workedOutTitle bool - title []string - skipHeader bool -} - -func NewYAMLSpecScanner(setTitle func([]string), setDescription func([]string)) *YAMLSpecScanner { - return &YAMLSpecScanner{ - setTitle: setTitle, - setDescription: setDescription, - } -} - -func (sp *YAMLSpecScanner) Title() []string { - sp.collectTitleDescription() - return sp.title -} - -func (sp *YAMLSpecScanner) Description() []string { - sp.collectTitleDescription() - return sp.header -} - -func (sp *YAMLSpecScanner) Parse(doc *ast.CommentGroup) error { - if doc == nil { - return nil - } - var startedYAMLSpec bool -COMMENTS: - for _, c := range doc.List { - for line := range strings.SplitSeq(c.Text, "\n") { - if HasAnnotation(line) { - break COMMENTS // a new swagger: annotation terminates this parser - } - - if !startedYAMLSpec { - if rxBeginYAMLSpec.MatchString(line) { - startedYAMLSpec = true - sp.yamlSpec = append(sp.yamlSpec, line) - continue - } - - if !sp.skipHeader { - sp.header = append(sp.header, line) - } - - // no YAML spec yet, moving on - continue - } - - sp.yamlSpec = append(sp.yamlSpec, line) - } - } - if sp.setTitle != nil { - sp.setTitle(sp.Title()) - } - if sp.setDescription != nil { - sp.setDescription(sp.Description()) - } - return nil -} - -func (sp *YAMLSpecScanner) UnmarshalSpec(u func([]byte) error) (err error) { - specYaml := cleanupScannerLines(sp.yamlSpec, rxUncommentYAML) - if len(specYaml) == 0 { - return fmt.Errorf("no spec available to unmarshal: %w", ErrParser) - } - - if !strings.Contains(specYaml[0], "---") { - return fmt.Errorf("yaml spec has to start with `---`: %w", ErrParser) - } - - // remove indentation - specYaml = removeIndent(specYaml) - - // 1. parse yaml lines - yamlValue := make(map[any]any) - - yamlContent := strings.Join(specYaml, "\n") - err = yaml.Unmarshal([]byte(yamlContent), &yamlValue) - if err != nil { - return err - } - - // 2. convert to json - var jsonValue json.RawMessage - jsonValue, err = fmts.YAMLToJSON(yamlValue) - if err != nil { - return err - } - - // 3. unmarshal the json into an interface - var data []byte - data, err = jsonValue.MarshalJSON() - if err != nil { - return err - } - err = u(data) - if err != nil { - return err - } - - // all parsed, returning... - sp.yamlSpec = nil // spec is now consumed, so let's erase the parsed lines - - return nil -} - -func (sp *YAMLSpecScanner) collectTitleDescription() { - if sp.workedOutTitle { - return - } - if sp.setTitle == nil { - sp.header = cleanupScannerLines(sp.header, rxUncommentHeaders) - return - } - - sp.workedOutTitle = true - sp.title, sp.header = collectScannerTitleDescription(sp.header) -} - -// RemoveIndent normalises the common leading indentation on a YAML -// body: it strips the first line's indent from every line and -// converts remaining tab indentation to two-space equivalents. Used -// by YAMLSpecScanner's UnmarshalSpec pipeline and by the grammar -// operations bridge for the same v1-parity normalisation. -func RemoveIndent(spec []string) []string { - return removeIndent(spec) -} - -// removes indent based on the first line. -func removeIndent(spec []string) []string { - if len(spec) == 0 { - return spec - } - - loc := rxIndent.FindStringIndex(spec[0]) - if len(loc) < 2 || loc[1] <= 1 { - return spec - } - - s := make([]string, len(spec)) - copy(s, spec) - - for i := range s { - if len(s[i]) < loc[1] { - continue - } - - s[i] = spec[i][loc[1]-1:] //nolint:gosec // G602: bounds already checked on line 445 - start := rxNotIndent.FindStringIndex(s[i]) - if len(start) < 2 || start[1] == 0 { - continue - } - - s[i] = strings.Replace(s[i], "\t", " ", start[1]) - } - - return s -} - -func cleanupScannerLines(lines []string, ur *regexp.Regexp) []string { - // bail early when there is nothing to parse - if len(lines) == 0 { - return lines - } - - seenLine := -1 - var lastContent int - - uncommented := make([]string, 0, len(lines)) - for i, v := range lines { - str := ur.ReplaceAllString(v, "") - uncommented = append(uncommented, str) - if str != "" { - if seenLine < 0 { - seenLine = i - } - lastContent = i - } - } - - // fixes issue #50 - if seenLine == -1 { - return nil - } - - return uncommented[seenLine : lastContent+1] -} diff --git a/internal/parsers/yaml_spec_parser_test.go b/internal/parsers/yaml_spec_parser_test.go deleted file mode 100644 index d870bd8..0000000 --- a/internal/parsers/yaml_spec_parser_test.go +++ /dev/null @@ -1,402 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers -// SPDX-License-Identifier: Apache-2.0 - -package parsers - -import ( - "errors" - "go/ast" - "testing" - - "github.com/go-openapi/testify/v2/require" -) - -var errCallback = errors.New("callback error") - -func TestYamlSpecScanner(t *testing.T) { - t.Parallel() - - t.Run("with happy path", func(t *testing.T) { - t.Run("should parse operation definition object as YAML", func(t *testing.T) { - parser := new(YAMLSpecScanner) - var title, description []string - parser.setTitle = func(lines []string) { title = lines } - parser.setDescription = func(lines []string) { description = lines } - - lines := []string{ - // from issue #3225, reindented - // `swagger:operation POST /v1/example-endpoint addExampleConfig`, - `title for this operation`, - ``, // blank line elided - `description of this operation`, - ``, // blank line preserved - `continuation of the description`, - `---`, // YAML block - `summary: Adds a new configuration entry`, - `description: |-`, - ` Creates and validates a new configuration request.`, - ``, - `security:`, - `- AuthToken: []`, - `consumes:`, - `- application/json`, - `tags:`, - `- Example|Configuration`, - `responses:`, - ` 201:`, - ` $ref: "#/responses/createdResponse"`, - ` 400:`, - ` $ref: "#/responses/badRequestResponse"`, - ` 412:`, - ` $ref: "#/responses/preconditionFailedResponse"`, - ` 500:`, - ` $ref: "#/responses/internalServerErrorResponse"`, - } - - doc := buildRawTestComments(lines) - require.NoError(t, parser.Parse(doc)) - require.Equal(t, title, parser.Title()) - require.Equal(t, []string{"title for this operation"}, parser.Title()) - require.Equal(t, description, parser.Description()) - require.Equal(t, []string{"description of this operation", "", "continuation of the description"}, parser.Description()) - - var receivedJSON string - yamlReceiver := func(b []byte) error { - receivedJSON = string(b) - return nil - } - - require.NoError(t, parser.UnmarshalSpec(yamlReceiver)) - - const expectedJSON = `{ - "summary":"Adds a new configuration entry", - "description":"Creates and validates a new configuration request.", - "security":[ - {"AuthToken":[]} - ], - "consumes":["application/json"], - "tags":["Example|Configuration"], - "responses":{ - "201":{"$ref":"#/responses/createdResponse"}, - "400":{"$ref":"#/responses/badRequestResponse"}, - "412":{"$ref":"#/responses/preconditionFailedResponse"}, - "500":{"$ref":"#/responses/internalServerErrorResponse"} - } - }` - - require.JSONEqT(t, expectedJSON, receivedJSON) - }) - - t.Run("should stop yaml operation block when new tag is found", func(t *testing.T) { - parser := new(YAMLSpecScanner) - var title, description []string - parser.setTitle = func(lines []string) { title = lines } - parser.setDescription = func(lines []string) { description = lines } - - lines := []string{ - `title for this operation`, - ``, // blank line elided - `description of this operation`, - `---`, // YAML block - `summary: Adds a new configuration entry`, - ``, - `swagger:enum`, // yaml block ended at this tag. Rest is ignored - `security:`, - `- AuthToken: []`, - } - - doc := buildRawTestComments(lines) - require.NoError(t, parser.Parse(doc)) - require.Equal(t, title, parser.Title()) - require.Equal(t, []string{"title for this operation"}, parser.Title()) - require.Equal(t, description, parser.Description()) - require.Equal(t, []string{"description of this operation"}, parser.Description()) - - var receivedJSON string - yamlReceiver := func(b []byte) error { - receivedJSON = string(b) - return nil - } - - require.NoError(t, parser.UnmarshalSpec(yamlReceiver)) - - const expectedJSON = `{ - "summary":"Adds a new configuration entry" - }` - - require.JSONEqT(t, expectedJSON, receivedJSON) - }) - - t.Run("should stop yaml operation block when new yaml document separator is found", func(t *testing.T) { - parser := new(YAMLSpecScanner) - var title, description []string - parser.setTitle = func(lines []string) { title = lines } - parser.setDescription = func(lines []string) { description = lines } - - lines := []string{ - `title for this operation`, - ``, // blank line elided - `description of this operation`, - `---`, // YAML block - `summary: Adds a new configuration entry`, - ``, - `---`, // yaml block ended at mark. Rest is ignored - `security:`, - `- AuthToken: []`, - } - - doc := buildRawTestComments(lines) - require.NoError(t, parser.Parse(doc)) - require.Equal(t, title, parser.Title()) - require.Equal(t, []string{"title for this operation"}, parser.Title()) - require.Equal(t, description, parser.Description()) - require.Equal(t, []string{"description of this operation"}, parser.Description()) - - var receivedJSON string - yamlReceiver := func(b []byte) error { - receivedJSON = string(b) - return nil - } - - require.NoError(t, parser.UnmarshalSpec(yamlReceiver)) - - const expectedJSON = `{ - "summary":"Adds a new configuration entry" - }` - - require.JSONEqT(t, expectedJSON, receivedJSON) - }) - }) - - t.Run("with edge cases", func(t *testing.T) { - t.Run("with empty comment block", func(t *testing.T) { - parser := new(YAMLSpecScanner) - var title, description []string - parser.setTitle = func(lines []string) { title = lines } - parser.setDescription = func(lines []string) { description = lines } - doc := buildRawTestComments(nil) - require.NoError(t, parser.Parse(doc)) - require.Empty(t, title) - require.Empty(t, description) - }) - - t.Run("with nil comment block", func(t *testing.T) { - parser := new(YAMLSpecScanner) - var title, description []string - parser.setTitle = func(lines []string) { title = lines } - parser.setDescription = func(lines []string) { description = lines } - require.NoError(t, parser.Parse(nil)) - require.Empty(t, title) - require.Empty(t, description) - }) - - t.Run("without setTitle", func(t *testing.T) { - parser := new(YAMLSpecScanner) - var description []string - parser.setDescription = func(lines []string) { description = lines } - - lines := []string{ - `title for this operation`, - ``, // blank line preserved - `description of this operation`, - `---`, // YAML block - } - - doc := buildRawTestComments(lines) - require.NoError(t, parser.Parse(doc)) - require.Nil(t, parser.Title()) - require.Equal(t, description, parser.Description()) - require.Equal(t, []string{"title for this operation", "", "description of this operation"}, parser.Description()) - - var receivedJSON string - yamlReceiver := func(b []byte) error { - receivedJSON = string(b) - return nil - } - require.NoError(t, parser.UnmarshalSpec(yamlReceiver)) - require.JSONEqT(t, `{}`, receivedJSON) - }) - }) -} - -func TestYAMLSpecScanner_UnmarshalSpec_Errors(t *testing.T) { - t.Parallel() - - t.Run("no spec available", func(t *testing.T) { - parser := new(YAMLSpecScanner) - parser.setTitle = func(_ []string) {} - parser.setDescription = func(_ []string) {} - // Parse with no --- marker → no yamlSpec collected - doc := buildRawTestComments([]string{"just text, no yaml"}) - require.NoError(t, parser.Parse(doc)) - - err := parser.UnmarshalSpec(func(_ []byte) error { return nil }) - require.Error(t, err) - require.ErrorIs(t, err, ErrParser) - }) - - t.Run("spec doesnt start with ---", func(t *testing.T) { - parser := new(YAMLSpecScanner) - // Manually inject yamlSpec without the --- marker - parser.yamlSpec = []string{"summary: test"} - - err := parser.UnmarshalSpec(func(_ []byte) error { return nil }) - require.Error(t, err) - require.ErrorIs(t, err, ErrParser) - }) - - t.Run("invalid yaml", func(t *testing.T) { - parser := new(YAMLSpecScanner) - parser.yamlSpec = []string{"// ---", "// \tbad:", "// yaml"} - - err := parser.UnmarshalSpec(func(_ []byte) error { return nil }) - require.Error(t, err) - }) - - t.Run("unmarshal callback error", func(t *testing.T) { - parser := new(YAMLSpecScanner) - parser.setTitle = func(_ []string) {} - parser.setDescription = func(_ []string) {} - - lines := []string{ - "title", - "---", - "summary: test", - } - doc := buildRawTestComments(lines) - require.NoError(t, parser.Parse(doc)) - - err := parser.UnmarshalSpec(func(_ []byte) error { return errCallback }) - require.Error(t, err) - require.ErrorIs(t, err, errCallback) - }) -} - -func TestNewYAMLSpecScanner(t *testing.T) { - t.Parallel() - - var title, desc []string - scanner := NewYAMLSpecScanner( - func(lines []string) { title = lines }, - func(lines []string) { desc = lines }, - ) - - lines := []string{ - "My Title.", - "", - "My description.", - "---", - "summary: test", - } - doc := buildRawTestComments(lines) - require.NoError(t, scanner.Parse(doc)) - require.Equal(t, []string{"My Title."}, title) - require.Equal(t, []string{"My description."}, desc) -} - -func TestRemoveIndent(t *testing.T) { - t.Parallel() - - t.Run("with removeIndent", func(t *testing.T) { - t.Run("should tolerate empty input", func(t *testing.T) { - res := removeIndent([]string{}) - require.Empty(t, res) - require.NotNil(t, res) - }) - - t.Run("should tolerate nil input", func(t *testing.T) { - res := removeIndent(nil) - require.Empty(t, res) - require.Nil(t, res) - }) - - t.Run("should support headline without indentation", func(t *testing.T) { - lines := []string{ - "xyz", - " abc", - } - res := removeIndent(lines) - require.Equal(t, lines, res) - }) - - t.Run("should tolerate lines with only indents", func(t *testing.T) { - lines := []string{ - " xyz", - "", - " ", - " ", - } - res := removeIndent(lines) - - expected := []string{ - "xyz", - "", // empty line preserved - " ", // blank lines unindented - " ", - } - require.Equal(t, expected, res) - }) - - t.Run("should replace tabs with spaces in indentation", func(t *testing.T) { - lines := []string{ - "\t\txyz", - "", - " ", - "\t \t", - } - res := removeIndent(lines) - - expected := []string{ - "xyz", - "", // empty line preserved - " ", // blank lines unindented - " \t", - } - require.Equal(t, expected, res) - }) - }) - - t.Run("with removeYamlIndent", func(t *testing.T) { - t.Run("should tolerate empty input", func(t *testing.T) { - res := removeYamlIndent([]string{}) - require.Empty(t, res) - require.NotNil(t, res) - }) - - t.Run("should tolerate nil input", func(t *testing.T) { - res := removeYamlIndent(nil) - require.Empty(t, res) - require.Nil(t, res) - }) - - t.Run("should support headline without indentation", func(t *testing.T) { - lines := []string{ - "xyz", - " abc", - } - res := removeYamlIndent(lines) - require.Equal(t, lines, res) - }) - - t.Run("should support headline without indentation", func(t *testing.T) { - lines := []string{ - "xyz", - " abc", - } - res := removeYamlIndent(lines) - require.Equal(t, lines, res) - }) - }) -} - -func buildRawTestComments(lines []string) *ast.CommentGroup { - // build raw doc comments like ast provides - doc := &ast.CommentGroup{ - List: make([]*ast.Comment, 0, len(lines)), - } - for _, line := range lines { - doc.List = append(doc.List, &ast.Comment{Text: "// " + line}) - } - - return doc -} From 440696b4e732d0cafc1d57ee90b8e3b8fefce164 Mon Sep 17 00:00:00 2001 From: Frederic BIDON Date: Wed, 22 Apr 2026 21:31:24 +0200 Subject: [PATCH 46/46] =?UTF-8?q?refactor(parsers):=20P6.3=20=E2=80=94=20q?= =?UTF-8?q?uarantine=20routebody,=20retire=20yaml=5Fparser,=20move=20class?= =?UTF-8?q?ification=20beneath=20scanner?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Continues the post-migration tidy from P6.2. Regroups what's left of the pre-grammar parsing layer into four clean zones so the remaining v1 code is identifiable at a glance and can be retired surgically. New: internal/parsers/routebody/ The three legacy body parsers (SetOpParams / SetOpResponses / SetOpExtensions) and their tests moved out of internal/parsers/ into a dedicated subpackage. These are the last citadel of the regex-era pipeline, consumed exclusively by routes/bridge.go. The subpackage is self-contained: its own errors.go sentinel, its own regexprs.go (rxResponses, rxParameters, rxExtensions, rxAllowedExtensions, rxUncommentHeaders, rxCommentPrefix). When routes grows a grammar-native body pipeline, the whole package can be deleted in one pass. Retired: internal/parsers/yaml_parser.go (95 L) The NewYAMLParser / WithSetter / WithExtensionMatcher trio was a wrapper around yaml.Unmarshal + fmts.YAMLToJSON + a setter callback. WithExtensionMatcher had been reduced to a doc-only no-op in P6.1. The single caller (schema/bridge.go's applyExtensionsBody) now inlines the 10-line pipeline directly. Moved: classification beneath scanner/ IsAllowedExtension (+ rxAllowedExtensions) relocated to internal/scanner/classify/. Three bridges (schema, parameters, spec/meta) now import from there instead of reaching into internal/parsers/. Matches the "classification is a scanner concern" invariant: scanner-owned predicates live under internal/scanner/, not in the old parsers/ tree. Deleted: - internal/parsers/errors.go — ErrParser sentinel had no remaining consumers (routebody introduced its own). - internal/parsers/yaml_parser_test.go — tests for NewYAMLParser with the parser itself. Trim of internal/parsers/regexprs.go: - rxResponses / rxParameters / rxExtensions / rxUncommentHeaders / rxIndent moved to routebody (last callers live there now). - rxAllowedExtensions moved to scanner/classify/. Final shape of internal/parsers/ root (364 lines, three files): - matchers.go scanner classification helpers - regexprs.go swagger:* annotation regexes + in/required - parsed_path_content.go route/operation path annotation splitter Subpackages: grammar/, grammar/gen/, grammar/grammar_test/, helpers/, yaml/, enum/, routebody/. CLAUDE.md refreshed to describe the new layout. Tests and golden fixtures all pass. Lint clean against master (a single pre-existing nolintlint flicker on items/bridge_test.go is unrelated). Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Frederic BIDON --- .claude/CLAUDE.md | 35 +++-- internal/builders/items/bridge.go | 2 +- internal/builders/items/validations.go | 4 +- internal/builders/operations/bridge.go | 10 +- internal/builders/parameters/bridge.go | 13 +- internal/builders/parameters/typable.go | 6 +- internal/builders/responses/bridge.go | 12 +- internal/builders/responses/typable.go | 4 +- internal/builders/routes/bridge.go | 24 +-- internal/builders/schema/bridge.go | 66 ++++---- internal/builders/schema/typable.go | 10 +- internal/builders/spec/meta_bridge.go | 12 +- internal/parsers/enum_test.go | 145 ------------------ internal/parsers/errors.go | 9 -- internal/parsers/{ => helpers}/enum.go | 100 ++++++------ internal/parsers/{ => helpers}/indent.go | 21 ++- internal/parsers/helpers/lines.go | 62 ++++++++ internal/parsers/{ => helpers}/lines_test.go | 23 +-- internal/parsers/helpers/title_desc.go | 75 +++++++++ .../title_desc_test.go} | 18 +-- internal/parsers/lines.go | 43 ------ internal/parsers/matchers.go | 4 - internal/parsers/matchers_test.go | 22 --- internal/parsers/parsers_helpers.go | 98 ------------ internal/parsers/regexprs.go | 17 +- internal/parsers/routebody/errors.go | 10 ++ .../parsers/{ => routebody}/extensions.go | 5 +- .../{ => routebody}/extensions_test.go | 2 +- internal/parsers/routebody/regexprs.go | 33 ++++ internal/parsers/{ => routebody}/responses.go | 2 +- .../parsers/{ => routebody}/responses_test.go | 2 +- .../parsers/{ => routebody}/route_params.go | 2 +- .../{ => routebody}/route_params_test.go | 2 +- internal/parsers/yaml_parser.go | 106 ------------- internal/parsers/yaml_parser_test.go | 141 ----------------- internal/scanner/classify/extension.go | 26 ++++ internal/scanner/classify/extension_test.go | 32 ++++ internal/scanner/scan_context.go | 3 +- 38 files changed, 438 insertions(+), 763 deletions(-) delete mode 100644 internal/parsers/enum_test.go delete mode 100644 internal/parsers/errors.go rename internal/parsers/{ => helpers}/enum.go (53%) rename internal/parsers/{ => helpers}/indent.go (57%) create mode 100644 internal/parsers/helpers/lines.go rename internal/parsers/{ => helpers}/lines_test.go (68%) create mode 100644 internal/parsers/helpers/title_desc.go rename internal/parsers/{parsers_helpers_test.go => helpers/title_desc_test.go} (73%) delete mode 100644 internal/parsers/lines.go delete mode 100644 internal/parsers/parsers_helpers.go create mode 100644 internal/parsers/routebody/errors.go rename internal/parsers/{ => routebody}/extensions.go (97%) rename internal/parsers/{ => routebody}/extensions_test.go (99%) create mode 100644 internal/parsers/routebody/regexprs.go rename internal/parsers/{ => routebody}/responses.go (99%) rename internal/parsers/{ => routebody}/responses_test.go (99%) rename internal/parsers/{ => routebody}/route_params.go (99%) rename internal/parsers/{ => routebody}/route_params_test.go (99%) delete mode 100644 internal/parsers/yaml_parser.go delete mode 100644 internal/parsers/yaml_parser_test.go create mode 100644 internal/scanner/classify/extension.go create mode 100644 internal/scanner/classify/extension_test.go diff --git a/.claude/CLAUDE.md b/.claude/CLAUDE.md index 1bd7e09..10fd580 100644 --- a/.claude/CLAUDE.md +++ b/.claude/CLAUDE.md @@ -36,22 +36,33 @@ to builders without direct coupling. | `scan_context.go` | `ScanCtx` / `NewScanCtx` — loads Go packages via `golang.org/x/tools/go/packages` | | `index.go` | `TypeIndex` — node classification (meta/route/operation/model/parameters/response) | | `declaration.go` | `EntityDecl` — wraps a type/value declaration with its enclosing file/package | +| `classify/` | Classification predicates usable from both scanner and builders (e.g. `IsAllowedExtension`) | -### `internal/parsers/` — comment-block parsing engine +### `internal/parsers/` — scanner classification + helpers + +Post grammar-migration (P6.3), `parsers/` is intentionally scanner-only. The +old regex-based comment-block parsing engine is gone; what remains are +classification helpers used by the scanner and builders, plus subpackages +for the grammar parser and its satellite helpers. + +**Root — scanner classification** | File | Contents | |------|----------| -| `sectioned_parser.go` | The section-driven parser that walks title/description/annotation blocks | -| `parsers.go`, `parsers_helpers.go` | Dispatch + helpers for tag/package filtering, value extraction | -| `tag_parsers.go`, `matchers.go` | Tag recognisers (`TypeName`, `Model`, etc.) | -| `regexprs.go` | Shared regular expressions for annotation parsing | -| `meta.go` | Swagger info-block parsing (title, version, license, contact) | -| `responses.go`, `route_params.go` | Response / route-parameter annotation parsing | -| `validations.go`, `extensions.go` | Validation directives, `x-*` extensions | -| `enum.go`, `security.go` | Enum extraction from Go constants, security-definition blocks | -| `yaml_parser.go`, `yaml_spec_parser.go` | Embedded-YAML parsing for `swagger:operation` bodies | -| `lines.go`, `parsed_path_content.go` | Comment-line and path-content helpers | -| `errors.go` | Sentinel errors | +| `matchers.go` | Classification helpers: `HasAnnotation`, `ExtractAnnotation`, `ModelOverride`, `StrfmtName`, `ParamLocation`, etc. | +| `regexprs.go` | Regex definitions for the `swagger:` annotations + `in:` / `required:` classification | +| `parsed_path_content.go` | `ParsedPathContent` + `ParseOperationPathAnnotation` / `ParseRoutePathAnnotation` | + +**Subpackages** + +| Package | Role | +|---------|------| +| `grammar/` | The new grammar-based comment parser — `NewParser`, `Block`, `Property`, keyword tables | +| `grammar/gen/`, `grammar/grammar_test/` | Generator + external grammar tests | +| `helpers/` | Bridge-consumed helpers: `JoinDropLast`, `CollectScannerTitleDescription`, `RemoveIndent`, `ParseEnum`, `ParseValueFromSchema`, `YAMLListBody`, `SecurityRequirements`, `SchemesList`, enum-desc extension handling | +| `yaml/` | Grammar's companion YAML sub-parser — reads `---`-fenced bodies into generic values | +| `enum/` | Experimental enum body sub-parser (not yet activated — see `.claude/plans/workshops/w2-enum.md`) | +| `routebody/` | **The last v1 regex-era body parsers.** `SetOpParams` / `SetOpResponses` / `SetOpExtensions` consume the indented `parameters:` / `responses:` / `extensions:` blocks inside `swagger:route` docs. Consumed exclusively by `internal/builders/routes/bridge.go` — deleteable as a unit once routes grows a grammar-native body pipeline | ### `internal/builders/` — Swagger object construction diff --git a/internal/builders/items/bridge.go b/internal/builders/items/bridge.go index 9651d8b..0298940 100644 --- a/internal/builders/items/bridge.go +++ b/internal/builders/items/bridge.go @@ -107,7 +107,7 @@ func dispatchItemsKeyword(p grammar.Property, t ifaces.ValidationBuilder) { } case "enum": // Delegated to the existing target.SetEnum, which routes - // through parsers.ParseEnum (post-Q1 fix: comma-list + // through helpers.ParseEnum (post-Q1 fix: comma-list // trimmed, JSON array verbatim). Direct use of // internal/parsers/enum.Parse is deferred to the // post-migration cleanup commit that takes the fully-typed diff --git a/internal/builders/items/validations.go b/internal/builders/items/validations.go index c91f985..e694716 100644 --- a/internal/builders/items/validations.go +++ b/internal/builders/items/validations.go @@ -4,7 +4,7 @@ package items import ( - "github.com/go-openapi/codescan/internal/parsers" + "github.com/go-openapi/codescan/internal/parsers/helpers" oaispec "github.com/go-openapi/spec" ) @@ -38,7 +38,7 @@ func (sv Validations) SetPattern(val string) { sv.current.Pattern = val func (sv Validations) SetUnique(val bool) { sv.current.UniqueItems = val } func (sv Validations) SetCollectionFormat(val string) { sv.current.CollectionFormat = val } func (sv Validations) SetEnum(val string) { - sv.current.Enum = parsers.ParseEnum(val, &oaispec.SimpleSchema{Type: sv.current.Type, Format: sv.current.Format}) + sv.current.Enum = helpers.ParseEnum(val, &oaispec.SimpleSchema{Type: sv.current.Type, Format: sv.current.Format}) } func (sv Validations) SetDefault(val any) { sv.current.Default = val } func (sv Validations) SetExample(val any) { sv.current.Example = val } diff --git a/internal/builders/operations/bridge.go b/internal/builders/operations/bridge.go index b2f6388..86608d8 100644 --- a/internal/builders/operations/bridge.go +++ b/internal/builders/operations/bridge.go @@ -7,8 +7,8 @@ import ( "fmt" "strings" - "github.com/go-openapi/codescan/internal/parsers" "github.com/go-openapi/codescan/internal/parsers/grammar" + "github.com/go-openapi/codescan/internal/parsers/helpers" "github.com/go-openapi/loads/fmts" oaispec "github.com/go-openapi/spec" yaml "go.yaml.in/yaml/v3" @@ -36,9 +36,9 @@ func (o *Builder) applyBlockToOperation(op *oaispec.Operation) error { fset := o.ctx.FileSet() block := grammar.NewParser(fset).Parse(o.path.Remaining) - title, desc := parsers.CollectScannerTitleDescription(block.ProseLines()) - op.Summary = parsers.JoinDropLast(title) - op.Description = parsers.JoinDropLast(desc) + title, desc := helpers.CollectScannerTitleDescription(block.ProseLines()) + op.Summary = helpers.JoinDropLast(title) + op.Description = helpers.JoinDropLast(desc) var yamlBody string for y := range block.YAMLBlocks() { @@ -60,7 +60,7 @@ func (o *Builder) applyBlockToOperation(op *oaispec.Operation) error { // parse correctly. func unmarshalOpYAML(body string, unmarshal func([]byte) error) error { lines := strings.Split(body, "\n") - lines = parsers.RemoveIndent(lines) + lines = helpers.RemoveIndent(lines) normalized := strings.Join(lines, "\n") yamlValue := make(map[any]any) diff --git a/internal/builders/parameters/bridge.go b/internal/builders/parameters/bridge.go index 44762ee..562fb69 100644 --- a/internal/builders/parameters/bridge.go +++ b/internal/builders/parameters/bridge.go @@ -8,8 +8,9 @@ import ( "strings" "github.com/go-openapi/codescan/internal/builders/items" - "github.com/go-openapi/codescan/internal/parsers" "github.com/go-openapi/codescan/internal/parsers/grammar" + "github.com/go-openapi/codescan/internal/parsers/helpers" + "github.com/go-openapi/codescan/internal/scanner/classify" oaispec "github.com/go-openapi/spec" ) @@ -85,8 +86,8 @@ func (p *ParameterBuilder) applyBlockToField(afld *ast.Field, param *oaispec.Par // Description: raw-line JoinDropLast for v1 parity (line-preserving // `"\n"` join), enum-desc extension suffix appended. - param.Description = parsers.JoinDropLast(block.ProseLines()) - if enumDesc := parsers.GetEnumDesc(param.Extensions); enumDesc != "" { + param.Description = helpers.JoinDropLast(block.ProseLines()) + if enumDesc := helpers.GetEnumDesc(param.Extensions); enumDesc != "" { if param.Description != "" { param.Description += "\n" } @@ -106,7 +107,7 @@ func (p *ParameterBuilder) applyBlockToField(afld *ast.Field, param *oaispec.Par } for ext := range block.Extensions() { - if !parsers.IsAllowedExtension(ext.Name) { + if !classify.IsAllowedExtension(ext.Name) { continue } param.AddExtension(ext.Name, ext.Value) @@ -194,13 +195,13 @@ func dispatchStringOrEnum(p grammar.Property, valid paramValidations, scheme *oa case "enum": valid.SetEnum(p.Value) case "default": - v, err := parsers.ParseValueFromSchema(p.Value, scheme) + v, err := helpers.ParseValueFromSchema(p.Value, scheme) if err != nil { return true, err } valid.SetDefault(v) case "example": - v, err := parsers.ParseValueFromSchema(p.Value, scheme) + v, err := helpers.ParseValueFromSchema(p.Value, scheme) if err != nil { return true, err } diff --git a/internal/builders/parameters/typable.go b/internal/builders/parameters/typable.go index cb27df5..cfbf393 100644 --- a/internal/builders/parameters/typable.go +++ b/internal/builders/parameters/typable.go @@ -7,7 +7,7 @@ import ( "github.com/go-openapi/codescan/internal/builders/items" "github.com/go-openapi/codescan/internal/builders/schema" "github.com/go-openapi/codescan/internal/ifaces" - "github.com/go-openapi/codescan/internal/parsers" + "github.com/go-openapi/codescan/internal/parsers/helpers" oaispec "github.com/go-openapi/spec" ) @@ -70,7 +70,7 @@ func (pt paramTypable) WithEnumDescription(desc string) { if desc == "" { return } - pt.param.AddExtension(parsers.EnumDescExtension(), desc) + pt.param.AddExtension(helpers.EnumDescExtension(), desc) } type paramValidations struct { @@ -95,7 +95,7 @@ func (sv paramValidations) SetPattern(val string) { sv.current.Pattern func (sv paramValidations) SetUnique(val bool) { sv.current.UniqueItems = val } func (sv paramValidations) SetCollectionFormat(val string) { sv.current.CollectionFormat = val } func (sv paramValidations) SetEnum(val string) { - sv.current.Enum = parsers.ParseEnum(val, &oaispec.SimpleSchema{Type: sv.current.Type, Format: sv.current.Format}) + sv.current.Enum = helpers.ParseEnum(val, &oaispec.SimpleSchema{Type: sv.current.Type, Format: sv.current.Format}) } func (sv paramValidations) SetDefault(val any) { sv.current.Default = val } func (sv paramValidations) SetExample(val any) { sv.current.Example = val } diff --git a/internal/builders/responses/bridge.go b/internal/builders/responses/bridge.go index 46a316c..67d71bb 100644 --- a/internal/builders/responses/bridge.go +++ b/internal/builders/responses/bridge.go @@ -8,8 +8,8 @@ import ( "strings" "github.com/go-openapi/codescan/internal/builders/items" - "github.com/go-openapi/codescan/internal/parsers" "github.com/go-openapi/codescan/internal/parsers/grammar" + "github.com/go-openapi/codescan/internal/parsers/helpers" oaispec "github.com/go-openapi/spec" ) @@ -66,7 +66,7 @@ func collectHeaderItemsLevels(expr ast.Expr, it *oaispec.Items, level int) []hea // description, no taggers. func (r *ResponseBuilder) applyBlockToDecl(resp *oaispec.Response) { block := grammar.NewParser(r.decl.Pkg.Fset).Parse(r.decl.Comments) - resp.Description = parsers.JoinDropLast(block.ProseLines()) + resp.Description = helpers.JoinDropLast(block.ProseLines()) } // applyBlockToHeader parses afld.Doc under the grammar parser and @@ -85,7 +85,7 @@ func (r *ResponseBuilder) applyBlockToDecl(resp *oaispec.Response) { func (r *ResponseBuilder) applyBlockToHeader(afld *ast.Field, header *oaispec.Header) { block := grammar.NewParser(r.decl.Pkg.Fset).Parse(afld.Doc) - header.Description = parsers.JoinDropLast(block.ProseLines()) + header.Description = helpers.JoinDropLast(block.ProseLines()) scheme := &header.SimpleSchema valid := headerValidations{header} @@ -107,7 +107,7 @@ func (r *ResponseBuilder) applyBlockToHeader(afld *ast.Field, header *oaispec.He // dispatchHeaderKeyword routes a level-0 Property into // headerValidations or, for scheme-aware default/example, through -// parsers.ParseValueFromSchema. Covers the v1 baseResponseHeaderTaggers +// helpers.ParseValueFromSchema. Covers the v1 baseResponseHeaderTaggers // surface minus `in:` (upstream-resolved). func dispatchHeaderKeyword(p grammar.Property, valid headerValidations, scheme *oaispec.SimpleSchema) { if dispatchNumericValidation(p, valid) { @@ -168,11 +168,11 @@ func dispatchStringOrEnum(p grammar.Property, valid headerValidations, scheme *o case "enum": valid.SetEnum(p.Value) case "default": - if v, err := parsers.ParseValueFromSchema(p.Value, scheme); err == nil { + if v, err := helpers.ParseValueFromSchema(p.Value, scheme); err == nil { valid.SetDefault(v) } case "example": - if v, err := parsers.ParseValueFromSchema(p.Value, scheme); err == nil { + if v, err := helpers.ParseValueFromSchema(p.Value, scheme); err == nil { valid.SetExample(v) } default: diff --git a/internal/builders/responses/typable.go b/internal/builders/responses/typable.go index f3feae3..c7fee36 100644 --- a/internal/builders/responses/typable.go +++ b/internal/builders/responses/typable.go @@ -7,7 +7,7 @@ import ( "github.com/go-openapi/codescan/internal/builders/items" "github.com/go-openapi/codescan/internal/builders/schema" "github.com/go-openapi/codescan/internal/ifaces" - "github.com/go-openapi/codescan/internal/parsers" + "github.com/go-openapi/codescan/internal/parsers/helpers" oaispec "github.com/go-openapi/spec" ) @@ -116,7 +116,7 @@ func (sv headerValidations) SetCollectionFormat(val string) { } func (sv headerValidations) SetEnum(val string) { - sv.current.Enum = parsers.ParseEnum(val, &oaispec.SimpleSchema{Type: sv.current.Type, Format: sv.current.Format}) + sv.current.Enum = helpers.ParseEnum(val, &oaispec.SimpleSchema{Type: sv.current.Type, Format: sv.current.Format}) } func (sv headerValidations) SetDefault(val any) { sv.current.Default = val } diff --git a/internal/builders/routes/bridge.go b/internal/builders/routes/bridge.go index b7fd0d6..32eaf92 100644 --- a/internal/builders/routes/bridge.go +++ b/internal/builders/routes/bridge.go @@ -4,9 +4,9 @@ package routes import ( - "github.com/go-openapi/codescan/internal/parsers" "github.com/go-openapi/codescan/internal/parsers/grammar" "github.com/go-openapi/codescan/internal/parsers/helpers" + "github.com/go-openapi/codescan/internal/parsers/routebody" oaispec "github.com/go-openapi/spec" ) @@ -23,9 +23,9 @@ import ( func (r *Builder) applyBlockToRoute(op *oaispec.Operation) error { block := grammar.NewParser(r.ctx.FileSet()).Parse(r.route.Remaining) - title, desc := parsers.CollectScannerTitleDescription(block.ProseLines()) - op.Summary = parsers.JoinDropLast(title) - op.Description = parsers.JoinDropLast(desc) + title, desc := helpers.CollectScannerTitleDescription(block.ProseLines()) + op.Summary = helpers.JoinDropLast(title) + op.Description = helpers.JoinDropLast(desc) for prop := range block.Properties() { if prop.ItemsDepth != 0 { @@ -55,11 +55,11 @@ const ( // body parser. Simple body shapes (schemes comma-list, consumes / // produces YAML-list, security name:scope lines) use shared // helpers in internal/parsers/helpers. The three domain-heavy -// body parsers (parameters, responses, extensions) still live in -// internal/parsers/ — their v1-parity logic (e.g. `+ name:` param -// blocks, `200: someResponse` response mapping, nested YAML -// extension maps) is substantial enough to warrant dedicated -// files. +// body parsers (parameters, responses, extensions) live in +// internal/parsers/routebody — their v1-parity logic (e.g. +// `+ name:` param blocks, `200: someResponse` response mapping, +// nested YAML extension maps) is the last citadel of the +// pre-grammar pipeline. func (r *Builder) dispatchRouteKeyword(p grammar.Property, op *oaispec.Operation) error { switch p.Keyword.Name { case kwSchemes: @@ -77,11 +77,11 @@ func (r *Builder) dispatchRouteKeyword(p grammar.Property, op *oaispec.Operation case kwSecurity: op.Security = helpers.SecurityRequirements(p.Body) case kwParameters: - return parsers.NewSetParams(r.parameters, opParamSetter(op)).Parse(p.Body) + return routebody.NewSetParams(r.parameters, opParamSetter(op)).Parse(p.Body) case kwResponses: - return parsers.NewSetResponses(r.definitions, r.responses, opResponsesSetter(op)).Parse(p.Body) + return routebody.NewSetResponses(r.definitions, r.responses, opResponsesSetter(op)).Parse(p.Body) case kwExtensions: - return parsers.NewSetExtensions(opExtensionsSetter(op), r.ctx.Debug()).Parse(p.Body) + return routebody.NewSetExtensions(opExtensionsSetter(op), r.ctx.Debug()).Parse(p.Body) } return nil } diff --git a/internal/builders/schema/bridge.go b/internal/builders/schema/bridge.go index a91bc53..76a4f2c 100644 --- a/internal/builders/schema/bridge.go +++ b/internal/builders/schema/bridge.go @@ -6,11 +6,16 @@ package schema import ( "encoding/json" "go/ast" + "strings" + + "github.com/go-openapi/loads/fmts" + yaml "go.yaml.in/yaml/v3" "github.com/go-openapi/codescan/internal/builders/items" "github.com/go-openapi/codescan/internal/ifaces" - "github.com/go-openapi/codescan/internal/parsers" "github.com/go-openapi/codescan/internal/parsers/grammar" + "github.com/go-openapi/codescan/internal/parsers/helpers" + "github.com/go-openapi/codescan/internal/scanner/classify" oaispec "github.com/go-openapi/spec" ) @@ -117,31 +122,36 @@ func applySchemaBlock(b grammar.Block, t schemaBlockTargets) { } // applyExtensionsBody feeds the grammar-captured extension body -// lines through the v1 YAML-aware extension parser so nested / typed -// values (bool, number, list, map) land on ps.Extensions with their +// lines through a YAML → JSON pipeline so nested / typed values +// (bool, number, list, map) land on ps.Extensions with their // semantic types — parity with the legacy schemaVendorExtensibleSetter // path. Unknown x-* names (rejected by IsAllowedExtension) are // silently dropped, matching the legacy reject-with-error behaviour // sufficiently for parity (errors on extension names are rare and // always user-authored). func applyExtensionsBody(ps *oaispec.Schema, body []string) { - yamlParser := parsers.NewYAMLParser( - parsers.WithExtensionMatcher(), - parsers.WithSetter(func(jsonValue json.RawMessage) error { - var data oaispec.Extensions - if err := json.Unmarshal(jsonValue, &data); err != nil { - return err - } - for k, v := range data { - if !parsers.IsAllowedExtension(k) { - continue - } - ps.AddExtension(k, v) - } - return nil - }), - ) - _ = yamlParser.Parse(body) + if len(body) == 0 || (len(body) == 1 && body[0] == "") { + return + } + yamlContent := strings.Join(body, "\n") + var yamlValue any + if err := yaml.Unmarshal([]byte(yamlContent), &yamlValue); err != nil { + return + } + jsonValue, err := fmts.YAMLToJSON(yamlValue) + if err != nil { + return + } + var data oaispec.Extensions + if err := json.Unmarshal(jsonValue, &data); err != nil { + return + } + for k, v := range data { + if !classify.IsAllowedExtension(k) { + continue + } + ps.AddExtension(k, v) + } } func dispatchSchemaKeyword(p grammar.Property, t schemaBlockTargets, valid schemaValidations, scheme *oaispec.SimpleSchema) { @@ -211,11 +221,11 @@ func dispatchStringOrEnum(p grammar.Property, valid schemaValidations, scheme *o // (see .claude/plans/workshops/w2-enum.md §2.6). valid.SetEnum(p.Value) case "default": - if v, err := parsers.ParseValueFromSchema(p.Value, scheme); err == nil { + if v, err := helpers.ParseValueFromSchema(p.Value, scheme); err == nil { valid.SetDefault(v) } case "example": - if v, err := parsers.ParseValueFromSchema(p.Value, scheme); err == nil { + if v, err := helpers.ParseValueFromSchema(p.Value, scheme); err == nil { valid.SetExample(v) } default: @@ -344,8 +354,8 @@ func (s *Builder) applyBlockToField(afld *ast.Field, enclosing *oaispec.Schema, // entire prose header is the description. Legacy output is // JoinDropLast("\n", header); enum-desc extension suffix is // appended last. - ps.Description = parsers.JoinDropLast(block.ProseLines()) - if enumDesc := parsers.GetEnumDesc(ps.Extensions); enumDesc != "" { + ps.Description = helpers.JoinDropLast(block.ProseLines()) + if enumDesc := helpers.GetEnumDesc(ps.Extensions); enumDesc != "" { if ps.Description != "" { ps.Description += "\n" } @@ -389,10 +399,10 @@ func (s *Builder) applyBlockToDecl(schema *oaispec.Schema) (ignored bool) { return true } - title, desc := parsers.CollectScannerTitleDescription(block.ProseLines()) - schema.Title = parsers.JoinDropLast(title) - schema.Description = parsers.JoinDropLast(desc) - if enumDesc := parsers.GetEnumDesc(schema.Extensions); enumDesc != "" { + title, desc := helpers.CollectScannerTitleDescription(block.ProseLines()) + schema.Title = helpers.JoinDropLast(title) + schema.Description = helpers.JoinDropLast(desc) + if enumDesc := helpers.GetEnumDesc(schema.Extensions); enumDesc != "" { if schema.Description != "" { schema.Description += "\n" } diff --git a/internal/builders/schema/typable.go b/internal/builders/schema/typable.go index f7d4eaa..bbf360f 100644 --- a/internal/builders/schema/typable.go +++ b/internal/builders/schema/typable.go @@ -8,7 +8,7 @@ import ( "github.com/go-openapi/codescan/internal/builders/resolvers" "github.com/go-openapi/codescan/internal/ifaces" - "github.com/go-openapi/codescan/internal/parsers" + "github.com/go-openapi/codescan/internal/parsers/helpers" oaispec "github.com/go-openapi/spec" ) @@ -81,7 +81,7 @@ func (st Typable) WithEnumDescription(desc string) { if desc == "" { return } - st.AddExtension(parsers.EnumDescExtension(), desc) + st.AddExtension(helpers.EnumDescExtension(), desc) } func BodyTypable(in string, schema *oaispec.Schema, skipExt bool) (ifaces.SwaggerTypable, *oaispec.Schema) { //nolint:ireturn // polymorphic by design @@ -130,7 +130,7 @@ func (sv schemaValidations) SetEnum(val string) { if len(sv.current.Type) > 0 { typ = sv.current.Type[0] } - sv.current.Enum = parsers.ParseEnum(val, &oaispec.SimpleSchema{Format: sv.current.Format, Type: typ}) + sv.current.Enum = helpers.ParseEnum(val, &oaispec.SimpleSchema{Format: sv.current.Format, Type: typ}) // Q3: a field-level `enum: ...` overrides const-derived values. // When the enum is replaced, any x-go-enum-desc previously set by @@ -139,8 +139,8 @@ func (sv schemaValidations) SetEnum(val string) { // description text may also have had the enum-desc appended to it // (see schema.go's WithSetDescription callback); strip that // suffix so the rendered description isn't misleading. - if enumDesc := parsers.GetEnumDesc(sv.current.Extensions); enumDesc != "" { - delete(sv.current.Extensions, parsers.EnumDescExtension()) + if enumDesc := helpers.GetEnumDesc(sv.current.Extensions); enumDesc != "" { + delete(sv.current.Extensions, helpers.EnumDescExtension()) sv.current.Description = strings.TrimSuffix( strings.TrimSuffix(sv.current.Description, enumDesc), "\n", diff --git a/internal/builders/spec/meta_bridge.go b/internal/builders/spec/meta_bridge.go index 9f3845b..505dc64 100644 --- a/internal/builders/spec/meta_bridge.go +++ b/internal/builders/spec/meta_bridge.go @@ -11,9 +11,9 @@ import ( "regexp" "strings" - "github.com/go-openapi/codescan/internal/parsers" "github.com/go-openapi/codescan/internal/parsers/grammar" "github.com/go-openapi/codescan/internal/parsers/helpers" + "github.com/go-openapi/codescan/internal/scanner/classify" "github.com/go-openapi/loads/fmts" "github.com/go-openapi/spec" yaml "go.yaml.in/yaml/v3" @@ -40,13 +40,13 @@ func applyMetaBlock(swspec *spec.Swagger, block grammar.Block) error { if swspec.Info == nil { swspec.Info = new(spec.Info) } - title, desc := parsers.CollectScannerTitleDescription(block.ProseLines()) - joinedTitle := parsers.JoinDropLast(title) + title, desc := helpers.CollectScannerTitleDescription(block.ProseLines()) + joinedTitle := helpers.JoinDropLast(title) if joinedTitle != "" { joinedTitle = rxStripTitleComments.ReplaceAllString(joinedTitle, "") } swspec.Info.Title = joinedTitle - swspec.Info.Description = parsers.JoinDropLast(desc) + swspec.Info.Description = helpers.JoinDropLast(desc) for p := range block.Properties() { if p.ItemsDepth != 0 { @@ -71,7 +71,7 @@ func dispatchMetaKeyword(p grammar.Property, swspec *spec.Swagger) error { func dispatchMetaSimple(p grammar.Property, swspec *spec.Swagger) bool { switch p.Keyword.Name { case "tos": - swspec.Info.TermsOfService = parsers.JoinDropLast(helpers.DropEmpty(p.Body)) + swspec.Info.TermsOfService = helpers.JoinDropLast(helpers.DropEmpty(p.Body)) case "consumes": swspec.Consumes = helpers.YAMLListBody(p.Body) case "produces": @@ -216,7 +216,7 @@ var ErrBadExtensionName = errors.New("invalid schema extension name, should star // check — every vendor extension key must begin with `x-` or `X-`. func validateExtensionNames(ext spec.Extensions) error { for k := range ext { - if !parsers.IsAllowedExtension(k) { + if !classify.IsAllowedExtension(k) { return fmt.Errorf("%w: %s", ErrBadExtensionName, k) } } diff --git a/internal/parsers/enum_test.go b/internal/parsers/enum_test.go deleted file mode 100644 index f1a7692..0000000 --- a/internal/parsers/enum_test.go +++ /dev/null @@ -1,145 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers -// SPDX-License-Identifier: Apache-2.0 - -package parsers - -import ( - "go/ast" - "go/token" - "testing" - - "github.com/go-openapi/testify/v2/assert" - "github.com/go-openapi/testify/v2/require" - - "github.com/go-openapi/spec" -) - -func Test_getEnumBasicLitValue(t *testing.T) { - verifyGetEnumBasicLitValue(t, ast.BasicLit{Kind: token.INT, Value: "0"}, int64(0)) - verifyGetEnumBasicLitValue(t, ast.BasicLit{Kind: token.INT, Value: "-1"}, int64(-1)) - verifyGetEnumBasicLitValue(t, ast.BasicLit{Kind: token.INT, Value: "42"}, int64(42)) - verifyGetEnumBasicLitValue(t, ast.BasicLit{Kind: token.INT, Value: ""}, nil) - verifyGetEnumBasicLitValue(t, ast.BasicLit{Kind: token.INT, Value: "word"}, nil) - - verifyGetEnumBasicLitValue(t, ast.BasicLit{Kind: token.FLOAT, Value: "0"}, float64(0)) - verifyGetEnumBasicLitValue(t, ast.BasicLit{Kind: token.FLOAT, Value: "-1"}, float64(-1)) - verifyGetEnumBasicLitValue(t, ast.BasicLit{Kind: token.FLOAT, Value: "42"}, float64(42)) - verifyGetEnumBasicLitValue(t, ast.BasicLit{Kind: token.FLOAT, Value: "1.1234"}, float64(1.1234)) - verifyGetEnumBasicLitValue(t, ast.BasicLit{Kind: token.FLOAT, Value: "1.9876"}, float64(1.9876)) - verifyGetEnumBasicLitValue(t, ast.BasicLit{Kind: token.FLOAT, Value: ""}, nil) - verifyGetEnumBasicLitValue(t, ast.BasicLit{Kind: token.FLOAT, Value: "word"}, nil) - - verifyGetEnumBasicLitValue(t, ast.BasicLit{Kind: token.STRING, Value: "Foo"}, "Foo") - verifyGetEnumBasicLitValue(t, ast.BasicLit{Kind: token.STRING, Value: ""}, "") - verifyGetEnumBasicLitValue(t, ast.BasicLit{Kind: token.STRING, Value: "0"}, "0") - verifyGetEnumBasicLitValue(t, ast.BasicLit{Kind: token.STRING, Value: "1.1"}, "1.1") -} - -func verifyGetEnumBasicLitValue(t *testing.T, basicLit ast.BasicLit, expected any) { - actual := GetEnumBasicLitValue(&basicLit) - - assert.Equal(t, expected, actual) -} - -func TestParseValueFromSchema(t *testing.T) { - t.Parallel() - - tests := []struct { - name string - input string - schema *spec.SimpleSchema - want any - }{ - {"nil schema", "hello", nil, "hello"}, - {"string", "hello", &spec.SimpleSchema{Type: "string"}, "hello"}, - {"integer", "42", &spec.SimpleSchema{Type: "integer"}, 42}, - {"int64", "100", &spec.SimpleSchema{Type: "int64"}, 100}, - {"bool true", "true", &spec.SimpleSchema{Type: "bool"}, true}, - {"boolean false", "false", &spec.SimpleSchema{Type: "boolean"}, false}, - {"float64", "3.14", &spec.SimpleSchema{Type: "float64"}, float64(3.14)}, - {"number", "2.5", &spec.SimpleSchema{Type: "number"}, float64(2.5)}, - {"object valid", `{"a":"b"}`, &spec.SimpleSchema{Type: "object"}, map[string]any{"a": "b"}}, - {"object invalid json", `not-json`, &spec.SimpleSchema{Type: "object"}, "not-json"}, - {"array valid", `[1,2,3]`, &spec.SimpleSchema{Type: "array"}, []any{float64(1), float64(2), float64(3)}}, - {"array invalid json", `not-json`, &spec.SimpleSchema{Type: "array"}, "not-json"}, - {"unknown type", "raw", &spec.SimpleSchema{Type: "custom"}, "raw"}, - } - - for _, tc := range tests { - t.Run(tc.name, func(t *testing.T) { - got, err := ParseValueFromSchema(tc.input, tc.schema) - require.NoError(t, err) - assert.Equal(t, tc.want, got) - }) - } - - t.Run("integer parse error", func(t *testing.T) { - _, err := ParseValueFromSchema("not-a-number", &spec.SimpleSchema{Type: "integer"}) - require.Error(t, err) - }) - - t.Run("bool parse error", func(t *testing.T) { - _, err := ParseValueFromSchema("maybe", &spec.SimpleSchema{Type: "bool"}) - require.Error(t, err) - }) -} - -func TestParseEnum(t *testing.T) { - t.Parallel() - - t.Run("JSON format strings", func(t *testing.T) { - result := ParseEnum(`["a","b","c"]`, &spec.SimpleSchema{Type: "string"}) - assert.Equal(t, []any{"a", "b", "c"}, result) - }) - - t.Run("JSON format integers", func(t *testing.T) { - result := ParseEnum(`[1,2,3]`, &spec.SimpleSchema{Type: "integer"}) - assert.Equal(t, []any{1, 2, 3}, result) - }) - - t.Run("old comma-separated format", func(t *testing.T) { - result := ParseEnum("a,b,c", &spec.SimpleSchema{Type: "string"}) - assert.Equal(t, []any{"a", "b", "c"}, result) - }) - - t.Run("old format integers", func(t *testing.T) { - result := ParseEnum("1,2,3", &spec.SimpleSchema{Type: "integer"}) - assert.Equal(t, []any{1, 2, 3}, result) - }) - - t.Run("old format with parse error fallback", func(t *testing.T) { - // "abc" cannot be parsed as integer → fallback to raw string - result := ParseEnum("abc,2,xyz", &spec.SimpleSchema{Type: "integer"}) - assert.Equal(t, []any{"abc", 2, "xyz"}, result) - }) - - t.Run("JSON format with parse error fallback", func(t *testing.T) { - // JSON array of integers, but "abc" can't parse as integer → fallback - result := ParseEnum(`["abc",2,"xyz"]`, &spec.SimpleSchema{Type: "integer"}) - assert.Equal(t, []any{"abc", 2, "xyz"}, result) - }) -} - -func TestGetEnumDesc(t *testing.T) { - t.Parallel() - - t.Run("with extension", func(t *testing.T) { - ext := spec.Extensions{"x-go-enum-desc": "Active - active state\nInactive - inactive state"} - assert.EqualT(t, "Active - active state\nInactive - inactive state", GetEnumDesc(ext)) - }) - - t.Run("without extension", func(t *testing.T) { - ext := spec.Extensions{} - assert.EqualT(t, "", GetEnumDesc(ext)) - }) - - t.Run("nil extensions", func(t *testing.T) { - assert.EqualT(t, "", GetEnumDesc(nil)) - }) -} - -func TestEnumDescExtension(t *testing.T) { - t.Parallel() - - assert.EqualT(t, "x-go-enum-desc", EnumDescExtension()) -} diff --git a/internal/parsers/errors.go b/internal/parsers/errors.go deleted file mode 100644 index 0fb4e79..0000000 --- a/internal/parsers/errors.go +++ /dev/null @@ -1,9 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers -// SPDX-License-Identifier: Apache-2.0 - -package parsers - -import "errors" - -// ErrParser is the sentinel error for all errors originating from the parsers package. -var ErrParser = errors.New("codescan:parsers") diff --git a/internal/parsers/enum.go b/internal/parsers/helpers/enum.go similarity index 53% rename from internal/parsers/enum.go rename to internal/parsers/helpers/enum.go index 1351853..950be1d 100644 --- a/internal/parsers/enum.go +++ b/internal/parsers/helpers/enum.go @@ -1,7 +1,7 @@ // SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers // SPDX-License-Identifier: Apache-2.0 -package parsers +package helpers import ( "encoding/json" @@ -14,17 +14,17 @@ import ( ) // ParseValueFromSchema converts a raw annotation value to the Go -// representation implied by the target schema's Type/Format. Used by -// default:/example: setters where the annotation body is a primitive -// literal whose meaning depends on the target: `default: 3` becomes -// int(3) against `Type: "integer"`, "3" against `Type: "string"`, and -// so on. JSON-typed targets (`object`, `array`) attempt unmarshal and -// fall back to the raw string on invalid JSON. +// representation implied by the target schema's Type/Format. Used +// by default:/example: setters where the annotation body is a +// primitive literal whose meaning depends on the target: +// `default: 3` becomes int(3) against `Type: "integer"`, "3" +// against `Type: "string"`, and so on. JSON-typed targets +// (`object`, `array`) attempt unmarshal and fall back to the raw +// string on invalid JSON. // // A nil schema yields the raw string unchanged. Numeric/boolean -// parsing errors are surfaced to the caller; JSON-parse failures are -// absorbed (documented as a v1 quirk and currently preserved for -// parity). +// parsing errors are surfaced to the caller; JSON-parse failures +// are absorbed (v1 quirk; preserved for parity). func ParseValueFromSchema(s string, schema *spec.SimpleSchema) (any, error) { if schema == nil { return s, nil @@ -54,57 +54,55 @@ func ParseValueFromSchema(s string, schema *spec.SimpleSchema) (any, error) { } } -func parseEnumOld(val string, s *spec.SimpleSchema) []any { - // Trim per-value whitespace so `enum: low, medium, high` produces - // ["low", "medium", "high"] rather than the v1-legacy - // ["low", " medium", " high"] — a long-standing quirk documented - // as W2 §2.6 quirk 1 and fixed here to converge with the v2 - // enum sub-parser's behavior before the P5 schema migration. - // JSON-array values go through ParseEnum's quoted path (below) - // which preserves intentional whitespace inside quotes. - list := strings.Split(val, ",") - interfaceSlice := make([]any, len(list)) - for i, d := range list { - d = strings.TrimSpace(d) - v, err := ParseValueFromSchema(d, s) - if err != nil { - interfaceSlice[i] = d - continue - } - - interfaceSlice[i] = v - } - return interfaceSlice -} - +// ParseEnum turns an `enum: …` annotation value into a typed []any. +// Accepts the JSON-array form (`enum: ["a","b"]`) and the +// comma-list form (`enum: a, b`). Per-value typing is applied via +// ParseValueFromSchema against the target's scheme. func ParseEnum(val string, s *spec.SimpleSchema) []any { - // obtain the raw elements of the list to latter process them with the ParseValueFromSchema var rawElements []json.RawMessage if err := json.Unmarshal([]byte(val), &rawElements); err != nil { log.Print("WARNING: item list for enum is not a valid JSON array, using the old deprecated format") - return parseEnumOld(val, s) + return parseEnumCommaList(val, s) } - interfaceSlice := make([]any, len(rawElements)) - + out := make([]any, len(rawElements)) for i, d := range rawElements { ds, err := strconv.Unquote(string(d)) if err != nil { ds = string(d) } - v, err := ParseValueFromSchema(ds, s) if err != nil { - interfaceSlice[i] = ds + out[i] = ds continue } - - interfaceSlice[i] = v + out[i] = v } + return out +} - return interfaceSlice +// parseEnumCommaList handles the legacy `enum: a, b, c` form. Per- +// value whitespace is trimmed (W2 §2.6 quirk 1 fix). Parse errors +// on individual values fall back to the raw string. +func parseEnumCommaList(val string, s *spec.SimpleSchema) []any { + list := strings.Split(val, ",") + out := make([]any, len(list)) + for i, d := range list { + d = strings.TrimSpace(d) + v, err := ParseValueFromSchema(d, s) + if err != nil { + out[i] = d + continue + } + out[i] = v + } + return out } +// GetEnumBasicLitValue converts a Go AST basic literal (the RHS of +// a `const Foo Kind = "bar"` declaration) into its runtime value — +// the representation the scanner's enum-discovery passes emit as +// enum entries on the corresponding Swagger schema. func GetEnumBasicLitValue(basicLit *ast.BasicLit) any { switch basicLit.Kind.String() { case "INT": @@ -121,13 +119,19 @@ func GetEnumBasicLitValue(basicLit *ast.BasicLit) any { return nil } -const extEnumDesc = "x-go-enum-desc" +// ExtEnumDesc is the vendor-extension key used to expose the +// per-enum-value documentation line the scanner builds from +// `swagger:enum` + const comments. +const ExtEnumDesc = "x-go-enum-desc" -func GetEnumDesc(extensions spec.Extensions) (desc string) { - desc, _ = extensions.GetString(extEnumDesc) +// GetEnumDesc reads the x-go-enum-desc extension off a Swagger +// extensions map, if present. Empty string when absent. +func GetEnumDesc(extensions spec.Extensions) string { + desc, _ := extensions.GetString(ExtEnumDesc) return desc } -func EnumDescExtension() string { - return extEnumDesc -} +// EnumDescExtension returns the vendor-extension key. Call-sites +// use it to AddExtension / delete stale entries without hard- +// coding the string. +func EnumDescExtension() string { return ExtEnumDesc } diff --git a/internal/parsers/indent.go b/internal/parsers/helpers/indent.go similarity index 57% rename from internal/parsers/indent.go rename to internal/parsers/helpers/indent.go index d620483..0405960 100644 --- a/internal/parsers/indent.go +++ b/internal/parsers/helpers/indent.go @@ -1,16 +1,29 @@ // SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers // SPDX-License-Identifier: Apache-2.0 -package parsers +package helpers -import "strings" +import ( + "regexp" + "strings" +) + +// rxIndent matches leading whitespace/comment noise up to (and +// including) the first non-whitespace character, used to detect +// the common indent on the first line of a YAML body. +var rxIndent = regexp.MustCompile(`[\p{Zs}\t]*/*[\p{Zs}\t]*[^\p{Zs}\t]`) + +// rxNotIndent matches the first non-whitespace character — used to +// cap the tab→space conversion so we only rewrite leading-indent +// tabs, not tabs embedded inside content. +var rxNotIndent = regexp.MustCompile(`[^\p{Zs}\t]`) // RemoveIndent normalises the common leading indentation on a YAML // body: it strips the first line's indent from every line and // converts remaining tab indentation to two-space equivalents. The // operations bridge calls this on grammar-isolated YAML fence -// bodies so tab-indented godoc-style YAML (e.g., the go119 fixture) -// parses correctly. +// bodies so tab-indented godoc-style YAML (e.g., the go119 +// fixture) parses correctly. func RemoveIndent(spec []string) []string { if len(spec) == 0 { return spec diff --git a/internal/parsers/helpers/lines.go b/internal/parsers/helpers/lines.go new file mode 100644 index 0000000..4ad48a8 --- /dev/null +++ b/internal/parsers/helpers/lines.go @@ -0,0 +1,62 @@ +// SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers +// SPDX-License-Identifier: Apache-2.0 + +package helpers + +import ( + "regexp" + "strings" +) + +// JoinDropLast joins lines with "\n" and, if the trailing line is +// whitespace-only, drops it first. Mirrors the legacy +// SectionedParser's description-accumulator shape so bridge +// outputs match v1 parity on field/method descriptions. +func JoinDropLast(lines []string) string { + l := len(lines) + lns := lines + if l > 0 && len(strings.TrimSpace(lines[l-1])) == 0 { + lns = lines[:l-1] + } + return strings.Join(lns, "\n") +} + +// Setter returns a closure that joins lines and writes to target — +// the shape the SectionedParser title/description callbacks +// expected. +func Setter(target *string) func([]string) { + return func(lines []string) { + *target = JoinDropLast(lines) + } +} + +// CleanupScannerLines strips the regex's match from each line and +// trims leading / trailing runs of now-empty lines. Used by the +// legacy-body parsers (extensions) and by +// CollectScannerTitleDescription. +func CleanupScannerLines(lines []string, ur *regexp.Regexp) []string { + if len(lines) == 0 { + return lines + } + + seenLine := -1 + var lastContent int + + uncommented := make([]string, 0, len(lines)) + for i, v := range lines { + str := ur.ReplaceAllString(v, "") + uncommented = append(uncommented, str) + if str != "" { + if seenLine < 0 { + seenLine = i + } + lastContent = i + } + } + + if seenLine == -1 { + return nil + } + + return uncommented[seenLine : lastContent+1] +} diff --git a/internal/parsers/lines_test.go b/internal/parsers/helpers/lines_test.go similarity index 68% rename from internal/parsers/lines_test.go rename to internal/parsers/helpers/lines_test.go index ead2970..2e19ec2 100644 --- a/internal/parsers/lines_test.go +++ b/internal/parsers/helpers/lines_test.go @@ -1,7 +1,7 @@ // SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers // SPDX-License-Identifier: Apache-2.0 -package parsers +package helpers import ( "testing" @@ -43,24 +43,3 @@ func TestSetter(t *testing.T) { set([]string{"line1", "line2", ""}) assert.EqualT(t, "line1\nline2", target) } - -func TestRemoveEmptyLines(t *testing.T) { - t.Parallel() - - tests := []struct { - name string - input []string - want []string - }{ - {"nil", nil, []string{}}, - {"all empty", []string{"", " ", "\t"}, []string{}}, - {"mixed", []string{"hello", "", "world", " "}, []string{"hello", "world"}}, - {"no empty", []string{"a", "b"}, []string{"a", "b"}}, - } - - for _, tc := range tests { - t.Run(tc.name, func(t *testing.T) { - assert.Equal(t, tc.want, removeEmptyLines(tc.input)) - }) - } -} diff --git a/internal/parsers/helpers/title_desc.go b/internal/parsers/helpers/title_desc.go new file mode 100644 index 0000000..5351fda --- /dev/null +++ b/internal/parsers/helpers/title_desc.go @@ -0,0 +1,75 @@ +// SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers +// SPDX-License-Identifier: Apache-2.0 + +package helpers + +import ( + "regexp" + "strings" +) + +// rxUncommentHeaders strips the leading `[whitespace/tabs/slashes/ +// asterisks/dashes]*|?` prefix from a header line — used by +// CollectScannerTitleDescription to normalise comment-marker noise +// before the title/description split. +var rxUncommentHeaders = regexp.MustCompile(`^[\p{Zs}\t/\*-]*\|?`) + +// rxPunctuationEnd matches a unicode punctuation character at +// end-of-line; a prose first line that ends with one is promoted +// to title when no blank separates title from description. +var rxPunctuationEnd = regexp.MustCompile(`\p{Po}$`) + +// rxTitleStart matches a leading `# ` / `## ` markdown heading +// prefix — another trigger for the first-line-is-title heuristic. +var rxTitleStart = regexp.MustCompile(`^[#]+\p{Zs}+`) + +// CollectScannerTitleDescription splits header lines (free-form +// prose appearing before the first recognized tag in a comment +// block) into title and description slices, following the legacy +// SectionedParser heuristics: +// +// - A blank-line separator splits after cleanup. +// - Absent that, a first line ending in punctuation or matching +// a markdown heading prefix is promoted to title. +// - Otherwise everything is description. +// +// Used by the grammar-side bridges (schema decl / operations / +// routes / meta) to reconstruct v1's title/description shapes. +func CollectScannerTitleDescription(headers []string) (title, desc []string) { + hdrs := CleanupScannerLines(headers, rxUncommentHeaders) + + idx := -1 + for i, line := range hdrs { + if strings.TrimSpace(line) == "" { + idx = i + break + } + } + + if idx > -1 { + title = hdrs[:idx] + if len(title) > 0 { + title[0] = rxTitleStart.ReplaceAllString(title[0], "") + } + if len(hdrs) > idx+1 { + desc = hdrs[idx+1:] + } + return title, desc + } + + if len(hdrs) > 0 { + line := hdrs[0] + switch { + case rxPunctuationEnd.MatchString(line): + title = []string{line} + desc = hdrs[1:] + case rxTitleStart.MatchString(line): + title = []string{rxTitleStart.ReplaceAllString(line, "")} + desc = hdrs[1:] + default: + desc = hdrs + } + } + + return title, desc +} diff --git a/internal/parsers/parsers_helpers_test.go b/internal/parsers/helpers/title_desc_test.go similarity index 73% rename from internal/parsers/parsers_helpers_test.go rename to internal/parsers/helpers/title_desc_test.go index bc5f5c7..cd9836a 100644 --- a/internal/parsers/parsers_helpers_test.go +++ b/internal/parsers/helpers/title_desc_test.go @@ -1,7 +1,7 @@ // SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers // SPDX-License-Identifier: Apache-2.0 -package parsers +package helpers import ( "testing" @@ -19,7 +19,7 @@ func TestCollectScannerTitleDescription(t *testing.T) { "// This is the description.", "// More description.", } - title, desc := collectScannerTitleDescription(headers) + title, desc := CollectScannerTitleDescription(headers) assert.Equal(t, []string{"This is the title."}, title) assert.Equal(t, []string{"This is the description.", "More description."}, desc) }) @@ -29,7 +29,7 @@ func TestCollectScannerTitleDescription(t *testing.T) { "// A single title line.", "// And some description.", } - title, desc := collectScannerTitleDescription(headers) + title, desc := CollectScannerTitleDescription(headers) assert.Equal(t, []string{"A single title line."}, title) assert.Equal(t, []string{"And some description."}, desc) }) @@ -39,7 +39,7 @@ func TestCollectScannerTitleDescription(t *testing.T) { "// # My Title", "// Description here.", } - title, desc := collectScannerTitleDescription(headers) + title, desc := CollectScannerTitleDescription(headers) assert.Equal(t, []string{"My Title"}, title) assert.Equal(t, []string{"Description here."}, desc) }) @@ -49,25 +49,21 @@ func TestCollectScannerTitleDescription(t *testing.T) { "// no punctuation at end means no title", "// more text", } - title, desc := collectScannerTitleDescription(headers) + title, desc := CollectScannerTitleDescription(headers) assert.Empty(t, title) assert.Equal(t, []string{"no punctuation at end means no title", "more text"}, desc) }) t.Run("empty", func(t *testing.T) { - title, desc := collectScannerTitleDescription(nil) + title, desc := CollectScannerTitleDescription(nil) assert.Empty(t, title) assert.Empty(t, desc) }) t.Run("blank line only", func(t *testing.T) { headers := []string{"//"} - title, desc := collectScannerTitleDescription(headers) + title, desc := CollectScannerTitleDescription(headers) assert.Empty(t, title) assert.Nil(t, desc) }) - - // Note: the branch at line 31-32 (desc = nil when blank is last line) - // is unreachable because cleanupScannerLines always trims trailing blanks - // before collectScannerTitleDescription processes the slice. } diff --git a/internal/parsers/lines.go b/internal/parsers/lines.go deleted file mode 100644 index e79c041..0000000 --- a/internal/parsers/lines.go +++ /dev/null @@ -1,43 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers -// SPDX-License-Identifier: Apache-2.0 - -package parsers - -import "strings" - -func JoinDropLast(lines []string) string { - l := len(lines) - lns := lines - if l > 0 && len(strings.TrimSpace(lines[l-1])) == 0 { - lns = lines[:l-1] - } - return strings.Join(lns, "\n") -} - -// Setter sets a string field from a multi lines comment. -// -// Usage: -// -// Setter(&op.Description) -// Setter(&op.Summary) -// -// Replaces this idiom: -// -// parsers.WithSetDescription(func(lines []string) { op.Description = parsers.JoinDropLast(lines) }), -func Setter(target *string) func([]string) { - return func(lines []string) { - *target = JoinDropLast(lines) - } -} - -func removeEmptyLines(lines []string) []string { - notEmpty := make([]string, 0, len(lines)) - - for _, l := range lines { - if len(strings.TrimSpace(l)) > 0 { - notEmpty = append(notEmpty, l) - } - } - - return notEmpty -} diff --git a/internal/parsers/matchers.go b/internal/parsers/matchers.go index 79dfc85..453d1e0 100644 --- a/internal/parsers/matchers.go +++ b/internal/parsers/matchers.go @@ -23,10 +23,6 @@ func IsAliasParam(prop ifaces.SwaggerTypable) bool { return in == "query" || in == "path" || in == "formData" } -func IsAllowedExtension(ext string) bool { - return rxAllowedExtensions.MatchString(ext) -} - func ExtractAnnotation(line string) (string, bool) { matches := rxSwaggerAnnotation.FindStringSubmatch(line) if len(matches) < minMatchCount { diff --git a/internal/parsers/matchers_test.go b/internal/parsers/matchers_test.go index 630daf8..7c6a68a 100644 --- a/internal/parsers/matchers_test.go +++ b/internal/parsers/matchers_test.go @@ -79,28 +79,6 @@ func TestIsAliasParam(t *testing.T) { } } -func TestIsAllowedExtension(t *testing.T) { - t.Parallel() - - tests := []struct { - ext string - want bool - }{ - {"x-foo", true}, - {"X-bar", true}, - {"x-", true}, - {"y-foo", false}, - {"foo", false}, - {"", false}, - } - - for _, tc := range tests { - t.Run(tc.ext, func(t *testing.T) { - assert.EqualT(t, tc.want, IsAllowedExtension(tc.ext)) - }) - } -} - func TestExtractAnnotation(t *testing.T) { t.Parallel() diff --git a/internal/parsers/parsers_helpers.go b/internal/parsers/parsers_helpers.go deleted file mode 100644 index 1949b39..0000000 --- a/internal/parsers/parsers_helpers.go +++ /dev/null @@ -1,98 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers -// SPDX-License-Identifier: Apache-2.0 - -package parsers - -import ( - "regexp" - "strings" -) - -// cleanupScannerLines strips comment-marker noise (matching ur) from -// each line and trims leading/trailing all-empty runs. Used by the -// legacy body parsers for consumes/produces (rxUncommentNoDash) and -// extensions (rxUncommentHeaders), plus the grammar-side prose -// splitter in CollectScannerTitleDescription. -func cleanupScannerLines(lines []string, ur *regexp.Regexp) []string { - if len(lines) == 0 { - return lines - } - - seenLine := -1 - var lastContent int - - uncommented := make([]string, 0, len(lines)) - for i, v := range lines { - str := ur.ReplaceAllString(v, "") - uncommented = append(uncommented, str) - if str != "" { - if seenLine < 0 { - seenLine = i - } - lastContent = i - } - } - - if seenLine == -1 { - return nil - } - - return uncommented[seenLine : lastContent+1] -} - -// CollectScannerTitleDescription splits header lines (free-form prose -// appearing before the first recognized tag in a comment block) into -// title and description slices, following the legacy SectionedParser -// heuristics: a blank-line separator splits after cleanup; absent -// that, a first line ending in punctuation or matching a markdown -// heading prefix is promoted to title; otherwise everything is -// description. -// -// Exposed for grammar-side bridges that reuse the same split over -// grammar.Block.ProseLines(). -func CollectScannerTitleDescription(headers []string) (title, desc []string) { - return collectScannerTitleDescription(headers) -} - -// a shared function that can be used to split given headers -// into a title and description. -func collectScannerTitleDescription(headers []string) (title, desc []string) { - hdrs := cleanupScannerLines(headers, rxUncommentHeaders) - - idx := -1 - for i, line := range hdrs { - if strings.TrimSpace(line) == "" { - idx = i - break - } - } - - if idx > -1 { - title = hdrs[:idx] - if len(title) > 0 { - title[0] = rxTitleStart.ReplaceAllString(title[0], "") - } - if len(hdrs) > idx+1 { - desc = hdrs[idx+1:] - } else { - desc = nil - } - return title, desc - } - - if len(hdrs) > 0 { - line := hdrs[0] - switch { - case rxPunctuationEnd.MatchString(line): - title = []string{line} - desc = hdrs[1:] - case rxTitleStart.MatchString(line): - title = []string{rxTitleStart.ReplaceAllString(line, "")} - desc = hdrs[1:] - default: - desc = hdrs - } - } - - return title, desc -} diff --git a/internal/parsers/regexprs.go b/internal/parsers/regexprs.go index e4cbefb..e313618 100644 --- a/internal/parsers/regexprs.go +++ b/internal/parsers/regexprs.go @@ -100,8 +100,7 @@ var ( rxOpTags + ")?\\p{Zs}+" + rxOpID + "\\p{Zs}*$") - rxUncommentHeaders = regexp.MustCompile(`^[\p{Zs}\t/\*-]*\|?`) - rxOperation = regexp.MustCompile( + rxOperation = regexp.MustCompile( rxCommentPrefix + "swagger:operation\\p{Zs}*" + rxMethod + @@ -112,18 +111,8 @@ var ( ")?\\p{Zs}+" + rxOpID + "\\p{Zs}*$") - rxIndent = regexp.MustCompile(`[\p{Zs}\t]*/*[\p{Zs}\t]*[^\p{Zs}\t]`) - rxNotIndent = regexp.MustCompile(`[^\p{Zs}\t]`) - rxPunctuationEnd = regexp.MustCompile(`\p{Po}$`) - rxTitleStart = regexp.MustCompile(`^[#]+\p{Zs}+`) - rxAllowedExtensions = regexp.MustCompile(`^[Xx]-`) - - rxIn = regexp.MustCompile(rxCommentPrefix + `[Ii]n\p{Zs}*:\p{Zs}*(query|path|header|body|formData)(?:\.)?$`) - rxRequired = regexp.MustCompile(rxCommentPrefix + `[Rr]equired\p{Zs}*:\p{Zs}*(true|false)(?:\.)?$`) - rxSecurity = regexp.MustCompile(rxCommentPrefix + `[Ss]ecurity\p{Zs}*[Dd]efinitions:`) - rxResponses = regexp.MustCompile(rxCommentPrefix + `[Rr]esponses\p{Zs}*:`) - rxParameters = regexp.MustCompile(rxCommentPrefix + `[Pp]arameters\p{Zs}*:`) - rxExtensions = regexp.MustCompile(rxCommentPrefix + `[Ee]xtensions\p{Zs}*:`) + rxIn = regexp.MustCompile(rxCommentPrefix + `[Ii]n\p{Zs}*:\p{Zs}*(query|path|header|body|formData)(?:\.)?$`) + rxRequired = regexp.MustCompile(rxCommentPrefix + `[Rr]equired\p{Zs}*:\p{Zs}*(true|false)(?:\.)?$`) ) func Rxf(rxp, ar string) *regexp.Regexp { diff --git a/internal/parsers/routebody/errors.go b/internal/parsers/routebody/errors.go new file mode 100644 index 0000000..97c6390 --- /dev/null +++ b/internal/parsers/routebody/errors.go @@ -0,0 +1,10 @@ +// SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers +// SPDX-License-Identifier: Apache-2.0 + +package routebody + +import "errors" + +// ErrParser is the sentinel error for failures originating in the +// swagger:route body parsers (parameters / responses / extensions). +var ErrParser = errors.New("codescan:parsers/routebody") diff --git a/internal/parsers/extensions.go b/internal/parsers/routebody/extensions.go similarity index 97% rename from internal/parsers/extensions.go rename to internal/parsers/routebody/extensions.go index d95445f..df4f724 100644 --- a/internal/parsers/extensions.go +++ b/internal/parsers/routebody/extensions.go @@ -1,7 +1,7 @@ // SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers // SPDX-License-Identifier: Apache-2.0 -package parsers +package routebody import ( "fmt" @@ -10,6 +10,7 @@ import ( "strings" "github.com/go-openapi/codescan/internal/logger" + "github.com/go-openapi/codescan/internal/parsers/helpers" oaispec "github.com/go-openapi/spec" ) @@ -39,7 +40,7 @@ func (ss *SetOpExtensions) Parse(lines []string) error { return nil } - cleanLines := cleanupScannerLines(lines, rxUncommentHeaders) + cleanLines := helpers.CleanupScannerLines(lines, rxUncommentHeaders) exts := new(oaispec.VendorExtensible) extList := make([]extensionObject, 0) diff --git a/internal/parsers/extensions_test.go b/internal/parsers/routebody/extensions_test.go similarity index 99% rename from internal/parsers/extensions_test.go rename to internal/parsers/routebody/extensions_test.go index c55bee4..7158646 100644 --- a/internal/parsers/extensions_test.go +++ b/internal/parsers/routebody/extensions_test.go @@ -1,7 +1,7 @@ // SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers // SPDX-License-Identifier: Apache-2.0 -package parsers +package routebody import ( "testing" diff --git a/internal/parsers/routebody/regexprs.go b/internal/parsers/routebody/regexprs.go new file mode 100644 index 0000000..d4f07ca --- /dev/null +++ b/internal/parsers/routebody/regexprs.go @@ -0,0 +1,33 @@ +// SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers +// SPDX-License-Identifier: Apache-2.0 + +// Package routebody hosts the legacy regex-era body parsers that +// consume the indented "parameters:", "responses:", and "extensions:" +// blocks inside `swagger:route` comment docs. +// +// These parsers are the last citadel of the pre-grammar pipeline. +// They are consumed exclusively by internal/builders/routes/bridge.go +// — no other builder touches them. When routes/bridge.go grows a +// grammar-native body pipeline, this whole package can be deleted. +package routebody + +import "regexp" + +// rxCommentPrefix matches leading comment noise (whitespace, tabs, +// slashes, asterisks, dashes, optional markdown table pipe) before +// a keyword. Mirrors parsers.rxCommentPrefix — duplicated here so +// this package is self-contained and doesn't re-import parsers/. +const rxCommentPrefix = `^[\p{Zs}\t/\*-]*\|?\p{Zs}*` + +var ( + rxResponses = regexp.MustCompile(rxCommentPrefix + `[Rr]esponses\p{Zs}*:`) + rxParameters = regexp.MustCompile(rxCommentPrefix + `[Pp]arameters\p{Zs}*:`) + rxExtensions = regexp.MustCompile(rxCommentPrefix + `[Ee]xtensions\p{Zs}*:`) + + rxAllowedExtensions = regexp.MustCompile(`^[Xx]-`) + + // rxUncommentHeaders strips leading comment-marker noise from a + // raw line. Consumed by helpers.CleanupScannerLines in the + // extensions body parser. + rxUncommentHeaders = regexp.MustCompile(`^[\p{Zs}\t/\*-]*\|?`) +) diff --git a/internal/parsers/responses.go b/internal/parsers/routebody/responses.go similarity index 99% rename from internal/parsers/responses.go rename to internal/parsers/routebody/responses.go index 53373e4..460d39a 100644 --- a/internal/parsers/responses.go +++ b/internal/parsers/routebody/responses.go @@ -1,7 +1,7 @@ // SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers // SPDX-License-Identifier: Apache-2.0 -package parsers +package routebody import ( "fmt" diff --git a/internal/parsers/responses_test.go b/internal/parsers/routebody/responses_test.go similarity index 99% rename from internal/parsers/responses_test.go rename to internal/parsers/routebody/responses_test.go index 7fca1f7..f88c74a 100644 --- a/internal/parsers/responses_test.go +++ b/internal/parsers/routebody/responses_test.go @@ -1,7 +1,7 @@ // SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers // SPDX-License-Identifier: Apache-2.0 -package parsers +package routebody import ( "testing" diff --git a/internal/parsers/route_params.go b/internal/parsers/routebody/route_params.go similarity index 99% rename from internal/parsers/route_params.go rename to internal/parsers/routebody/route_params.go index 2460d2f..19d8a17 100644 --- a/internal/parsers/route_params.go +++ b/internal/parsers/routebody/route_params.go @@ -1,7 +1,7 @@ // SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers // SPDX-License-Identifier: Apache-2.0 -package parsers +package routebody import ( "fmt" diff --git a/internal/parsers/route_params_test.go b/internal/parsers/routebody/route_params_test.go similarity index 99% rename from internal/parsers/route_params_test.go rename to internal/parsers/routebody/route_params_test.go index da68209..93d19db 100644 --- a/internal/parsers/route_params_test.go +++ b/internal/parsers/routebody/route_params_test.go @@ -1,7 +1,7 @@ // SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers // SPDX-License-Identifier: Apache-2.0 -package parsers +package routebody import ( "testing" diff --git a/internal/parsers/yaml_parser.go b/internal/parsers/yaml_parser.go deleted file mode 100644 index 8643921..0000000 --- a/internal/parsers/yaml_parser.go +++ /dev/null @@ -1,106 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers -// SPDX-License-Identifier: Apache-2.0 - -package parsers - -import ( - "encoding/json" - "regexp" - "strings" - - "github.com/go-openapi/loads/fmts" - "go.yaml.in/yaml/v3" -) - -type YAMLParserOption func(*YAMLParser) - -func WithSetter(set func(json.RawMessage) error) YAMLParserOption { - return func(p *YAMLParser) { - p.set = set - } -} - -func WithMatcher(rx *regexp.Regexp) YAMLParserOption { - return func(p *YAMLParser) { - p.rx = rx - } -} - -func WithExtensionMatcher() YAMLParserOption { - return func(p *YAMLParser) { - p.rx = rxExtensions - } -} - -type YAMLParser struct { - set func(json.RawMessage) error - rx *regexp.Regexp -} - -func NewYAMLParser(opts ...YAMLParserOption) *YAMLParser { - var y YAMLParser - for _, apply := range opts { - apply(&y) - } - - return &y -} - -func (y *YAMLParser) Parse(lines []string) error { - if len(lines) == 0 || (len(lines) == 1 && len(lines[0]) == 0) { - return nil - } - - uncommented := make([]string, 0, len(lines)) - uncommented = append(uncommented, removeYamlIndent(lines)...) - - yamlContent := strings.Join(uncommented, "\n") - var yamlValue any - err := yaml.Unmarshal([]byte(yamlContent), &yamlValue) - if err != nil { - return err - } - - var jsonValue json.RawMessage - jsonValue, err = fmts.YAMLToJSON(yamlValue) - if err != nil { - return err - } - - if y.set == nil { - return nil - } - - return y.set(jsonValue) -} - -func (y *YAMLParser) Matches(line string) bool { - if y.rx == nil { - return false - } - - return y.rx.MatchString(line) -} - -// removes indent base on the first line. -// -// The difference with removeIndent is that lines shorter than the indentation are elided. -func removeYamlIndent(spec []string) []string { - if len(spec) == 0 { - return spec - } - - loc := rxIndent.FindStringIndex(spec[0]) - if len(loc) < 2 || loc[1] <= 1 { - return spec - } - - s := make([]string, 0, len(spec)) - for i := range spec { - if len(spec[i]) >= loc[1] { - s = append(s, spec[i][loc[1]-1:]) - } - } - - return s -} diff --git a/internal/parsers/yaml_parser_test.go b/internal/parsers/yaml_parser_test.go deleted file mode 100644 index d4cb200..0000000 --- a/internal/parsers/yaml_parser_test.go +++ /dev/null @@ -1,141 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers -// SPDX-License-Identifier: Apache-2.0 - -package parsers - -import ( - "encoding/json" - "errors" - "testing" - - "github.com/go-openapi/testify/v2/require" -) - -var errSetterFailed = errors.New("setter failed") - -func TestYamlParser(t *testing.T) { - t.Parallel() - - setter := func(out *string, called *int) func(json.RawMessage) error { - return func(in json.RawMessage) error { - *called++ - *out = string(in) - - return nil - } - } - - t.Run("with happy path", func(t *testing.T) { - t.Run("should parse security definitions object as YAML", func(t *testing.T) { - setterCalled := 0 - var actualJSON string - parser := NewYAMLParser(WithMatcher(rxSecurity), WithSetter(setter(&actualJSON, &setterCalled))) - - lines := []string{ - "SecurityDefinitions:", - " api_key:", - " type: apiKey", - " name: X-API-KEY", - " petstore_auth:", - " type: oauth2", - " scopes:", - " 'write:pets': modify pets in your account", - " 'read:pets': read your pets", - } - - require.TrueT(t, parser.Matches(lines[0])) - require.NoError(t, parser.Parse(lines)) - require.EqualT(t, 1, setterCalled) - - const expectedJSON = `{"SecurityDefinitions":{"api_key":{"name":"X-API-KEY","type":"apiKey"},` + - `"petstore_auth":{"scopes":{"read:pets":"read your pets","write:pets":"modify pets in your account"},"type":"oauth2"}}}` - - require.JSONEqT(t, expectedJSON, actualJSON) - }) - }) - - t.Run("with edge cases", func(t *testing.T) { - t.Run("should handle empty input", func(t *testing.T) { - setterCalled := 0 - var actualJSON string - parser := NewYAMLParser(WithMatcher(rxSecurity), WithSetter(setter(&actualJSON, &setterCalled))) - - require.FalseT(t, parser.Matches("")) - require.NoError(t, parser.Parse([]string{})) - require.Zero(t, setterCalled) - }) - - t.Run("should handle nil input", func(t *testing.T) { - setterCalled := 0 - var actualJSON string - parser := NewYAMLParser(WithMatcher(rxSecurity), WithSetter(setter(&actualJSON, &setterCalled))) - - require.NoError(t, parser.Parse(nil)) - require.Zero(t, setterCalled) - }) - - t.Run("should handle bad indentation", func(t *testing.T) { - setterCalled := 0 - var actualJSON string - parser := NewYAMLParser(WithMatcher(rxSecurity), WithSetter(setter(&actualJSON, &setterCalled))) - lines := []string{ - "SecurityDefinitions:", - "\t\tapi_key:", - " type: apiKey", - } - - require.TrueT(t, parser.Matches(lines[0])) - err := parser.Parse(lines) - require.Error(t, err) - require.StringContainsT(t, err.Error(), "yaml: line 2:") - require.Zero(t, setterCalled) - }) - - t.Run("should catch YAML errors", func(t *testing.T) { - setterCalled := 0 - var actualJSON string - parser := NewYAMLParser(WithMatcher(rxSecurity), WithSetter(setter(&actualJSON, &setterCalled))) - lines := []string{ - "SecurityDefinitions:", - " api_key", - " type: apiKey", - } - - require.TrueT(t, parser.Matches(lines[0])) - err := parser.Parse(lines) - require.Error(t, err) - require.StringContainsT(t, err.Error(), "yaml: line 3: mapping value") - require.Zero(t, setterCalled) - }) - - t.Run("should handle nil rx in Matches", func(t *testing.T) { - parser := NewYAMLParser(WithSetter(func(_ json.RawMessage) error { return nil })) - require.FalseT(t, parser.Matches("anything")) - }) - - t.Run("should handle nil setter", func(t *testing.T) { - parser := NewYAMLParser(WithMatcher(rxSecurity)) - lines := []string{ - "SecurityDefinitions:", - " api_key:", - " type: apiKey", - } - require.NoError(t, parser.Parse(lines)) - }) - - t.Run("should propagate setter error", func(t *testing.T) { - parser := NewYAMLParser( - WithMatcher(rxSecurity), - WithSetter(func(_ json.RawMessage) error { return errSetterFailed }), - ) - lines := []string{ - "SecurityDefinitions:", - " api_key:", - " type: apiKey", - } - err := parser.Parse(lines) - require.Error(t, err) - require.ErrorIs(t, err, errSetterFailed) - }) - }) -} diff --git a/internal/scanner/classify/extension.go b/internal/scanner/classify/extension.go new file mode 100644 index 0000000..a2c801e --- /dev/null +++ b/internal/scanner/classify/extension.go @@ -0,0 +1,26 @@ +// SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers +// SPDX-License-Identifier: Apache-2.0 + +// Package classify provides small classification predicates used by +// the scanner and by builders to decide whether a given name or +// comment line belongs to a particular Swagger-annotation family. +// +// The package lives beneath internal/scanner/ because classification +// is fundamentally a scanner concern: "does this string denote a +// swagger:xxx construct?" is the same kind of question the scanner +// asks when indexing packages. Builders that need the same predicate +// (vendor-extension key filtering, for instance) import from here +// rather than reaching back into internal/parsers/. +package classify + +import "regexp" + +// rxAllowedExtension matches a Swagger vendor-extension key: +// a leading `x-` or `X-` followed by at least one character. +var rxAllowedExtension = regexp.MustCompile(`^[Xx]-`) + +// IsAllowedExtension reports whether key is a valid Swagger +// vendor-extension key ("x-..." / "X-..."). +func IsAllowedExtension(key string) bool { + return rxAllowedExtension.MatchString(key) +} diff --git a/internal/scanner/classify/extension_test.go b/internal/scanner/classify/extension_test.go new file mode 100644 index 0000000..b669b84 --- /dev/null +++ b/internal/scanner/classify/extension_test.go @@ -0,0 +1,32 @@ +// SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers +// SPDX-License-Identifier: Apache-2.0 + +package classify + +import ( + "testing" + + "github.com/go-openapi/testify/v2/assert" +) + +func TestIsAllowedExtension(t *testing.T) { + t.Parallel() + + tests := []struct { + ext string + want bool + }{ + {"x-foo", true}, + {"X-bar", true}, + {"x-", true}, + {"y-foo", false}, + {"foo", false}, + {"", false}, + } + + for _, tc := range tests { + t.Run(tc.ext, func(t *testing.T) { + assert.EqualT(t, tc.want, IsAllowedExtension(tc.ext)) + }) + } +} diff --git a/internal/scanner/scan_context.go b/internal/scanner/scan_context.go index 925cbc8..1f21b80 100644 --- a/internal/scanner/scan_context.go +++ b/internal/scanner/scan_context.go @@ -16,6 +16,7 @@ import ( "github.com/go-openapi/codescan/internal/logger" "github.com/go-openapi/codescan/internal/parsers" + "github.com/go-openapi/codescan/internal/parsers/helpers" "golang.org/x/tools/go/packages" ) @@ -374,7 +375,7 @@ func (s *ScanCtx) findEnumValue(spec ast.Spec, enumName string) (values []any, d continue } - literalValue := parsers.GetEnumBasicLitValue(bl) + literalValue := helpers.GetEnumBasicLitValue(bl) var desc strings.Builder fmt.Fprintf(&desc, "%v %s", literalValue, nameIdent.Name)