From 9ef46dd76b4a39d9046472861df06c7756ef7247 Mon Sep 17 00:00:00 2001 From: kerobbi Date: Mon, 16 Feb 2026 18:16:48 +0000 Subject: [PATCH 01/11] add response optimisation pkg --- pkg/github/pullrequests.go | 3 +- pkg/response/optimize.go | 285 +++++++++++++++++++++++++++++++++++++ 2 files changed, 287 insertions(+), 1 deletion(-) create mode 100644 pkg/response/optimize.go diff --git a/pkg/github/pullrequests.go b/pkg/github/pullrequests.go index 1043870f1..397ec8e9f 100644 --- a/pkg/github/pullrequests.go +++ b/pkg/github/pullrequests.go @@ -16,6 +16,7 @@ import ( ghErrors "github.com/github/github-mcp-server/pkg/errors" "github.com/github/github-mcp-server/pkg/inventory" "github.com/github/github-mcp-server/pkg/octicons" + "github.com/github/github-mcp-server/pkg/response" "github.com/github/github-mcp-server/pkg/sanitize" "github.com/github/github-mcp-server/pkg/scopes" "github.com/github/github-mcp-server/pkg/translations" @@ -1171,7 +1172,7 @@ func ListPullRequests(t translations.TranslationHelperFunc) inventory.ServerTool } } - r, err := json.Marshal(prs) + r, err := response.MarshalItems(prs) if err != nil { return utils.NewToolResultErrorFromErr("failed to marshal response", err), nil, nil } diff --git a/pkg/response/optimize.go b/pkg/response/optimize.go new file mode 100644 index 000000000..479a41840 --- /dev/null +++ b/pkg/response/optimize.go @@ -0,0 +1,285 @@ +package response + +import ( + "encoding/json" + "fmt" + "strings" +) + +// defaultFillRateThreshold is the default proportion of items that must have a key for it to survive +const defaultFillRateThreshold = 0.1 + +// minFillRateRows is the minimum number of items required to apply fill-rate filtering +const minFillRateRows = 3 + +// preservedFields is a set of keys that are exempt from all destructive strategies except whitespace normalization. +// Keys are matched against post-flatten map keys, so for nested fields like "user.html_url", the dotted key must be +// added explicitly. Empty collections are still dropped. Wins over collectionFieldExtractors. +var preservedFields = map[string]bool{ + "html_url": true, +} + +// collectionFieldExtractors controls how array fields are handled instead of being summarized as "[N items]". +// - 1 sub-field: comma-joined into a flat string ("bug, enhancement"). +// - Multiple sub-fields: keep the array, but trim each element to only those fields. +// +// These are explicitly exempt from fill-rate filtering; if we asked for the extraction, it's likely important +// to preserve the data even if only one item has it. +var collectionFieldExtractors = map[string][]string{ + "labels": {"name"}, + "requested_reviewers": {"login"}, +} + +// MarshalItems is the single entry point for response optimization. +// Handles two shapes: plain JSON arrays and wrapped objects with metadata. +func MarshalItems(data any) ([]byte, error) { + raw, err := json.Marshal(data) + if err != nil { + return nil, fmt.Errorf("failed to marshal data: %w", err) + } + + switch raw[0] { + case '[': + return optimizeArray(raw) + case '{': + return optimizeObject(raw) + default: + return raw, nil + } +} + +// OptimizeItems runs the full optimization pipeline on a slice of items: +// flatten, remove URLs, remove zero-values, normalize whitespace, +// summarize collections, and fill-rate filtering. +func OptimizeItems(items []map[string]any) []map[string]any { + if len(items) == 0 { + return items + } + + for i, item := range items { + flattenedItem := flatten(item) + items[i] = optimizeItem(flattenedItem) + } + + if len(items) >= minFillRateRows { + items = filterByFillRate(items, defaultFillRateThreshold) + } + + return items +} + +// flatten promotes values from one-level-deep nested maps into the parent +// using dot-notation keys ("user.login", "user.id"). Nested maps and arrays +// within those nested maps are dropped. +func flatten(item map[string]any) map[string]any { + result := make(map[string]any, len(item)) + for key, value := range item { + nested, ok := value.(map[string]any) + if !ok { + result[key] = value + continue + } + + for nk, nv := range nested { + switch nv.(type) { + case map[string]any, []any: + // skip complex nested values + default: + result[key+"."+nk] = nv + } + } + } + + return result +} + +// filterByFillRate drops keys that appear on less than the threshold proportion of items. +// Preserved fields and extractor keys always survive. +func filterByFillRate(items []map[string]any, threshold float64) []map[string]any { + keyCounts := make(map[string]int) + for _, item := range items { + for key := range item { + keyCounts[key]++ + } + } + + minCount := int(threshold * float64(len(items))) + keepKeys := make(map[string]bool, len(keyCounts)) + for key, count := range keyCounts { + _, hasExtractor := collectionFieldExtractors[key] + if count > minCount || preservedFields[key] || hasExtractor { + keepKeys[key] = true + } + } + + for i, item := range items { + filtered := make(map[string]any, len(keepKeys)) + for key, value := range item { + if keepKeys[key] { + filtered[key] = value + } + } + items[i] = filtered + } + + return items +} + +// optimizeArray is the entry point for optimizing a raw JSON array. +func optimizeArray(raw []byte) ([]byte, error) { + var items []map[string]any + if err := json.Unmarshal(raw, &items); err != nil { + return nil, fmt.Errorf("failed to unmarshal JSON: %w", err) + } + return json.Marshal(OptimizeItems(items)) +} + +// optimizeObject is the entry point for optimizing a raw JSON object. +func optimizeObject(raw []byte) ([]byte, error) { + var wrapper map[string]any + if err := json.Unmarshal(raw, &wrapper); err != nil { + return nil, fmt.Errorf("failed to unmarshal JSON: %w", err) + } + + // find the actual data array within the wrapper; rest is metadata to be preserved as is + var dataKey string + for key, value := range wrapper { + if _, ok := value.([]any); ok { + dataKey = key + break + } + } + // if no data array found, just return the original response + if dataKey == "" { + return raw, nil + } + + rawItems := wrapper[dataKey].([]any) + items := make([]map[string]any, 0, len(rawItems)) + for _, rawItem := range rawItems { + if m, ok := rawItem.(map[string]any); ok { + items = append(items, m) + } + } + wrapper[dataKey] = OptimizeItems(items) + + return json.Marshal(wrapper) +} + +// optimizeItem applies per-item strategies in a single pass: remove URLs, +// remove zero-values, normalize whitespace, summarize collections. +// Preserved fields skip everything except whitespace normalization. +func optimizeItem(item map[string]any) map[string]any { + result := make(map[string]any, len(item)) + for key, value := range item { + preserved := preservedFields[key] + if !preserved && isURLKey(key) { + continue + } + if !preserved && isZeroValue(value) { + continue + } + + switch v := value.(type) { + case string: + result[key] = strings.Join(strings.Fields(v), " ") + case []any: + if len(v) == 0 { + continue + } + + if preserved { + result[key] = value + } else if fields, ok := collectionFieldExtractors[key]; ok { + if len(fields) == 1 { + result[key] = extractSubField(v, fields[0]) + } else { + result[key] = trimArrayFields(v, fields) + } + } else { + result[key] = fmt.Sprintf("[%d items]", len(v)) + } + default: + result[key] = value + } + } + + return result +} + +// extractSubField pulls a named sub-field from each slice element and joins +// them with ", ". Elements missing the field are silently skipped. +func extractSubField(items []any, field string) string { + var vals []string + for _, item := range items { + m, ok := item.(map[string]any) + if !ok { + continue + } + + v, ok := m[field] + if !ok || v == nil { + continue + } + + switch s := v.(type) { + case string: + if s != "" { + vals = append(vals, s) + } + default: + vals = append(vals, fmt.Sprintf("%v", v)) + } + } + + return strings.Join(vals, ", ") +} + +// trimArrayFields keeps only the specified fields from each object in a slice. +// The trimmed objects are returned as is, no further strategies are applied. +func trimArrayFields(items []any, fields []string) []any { + result := make([]any, 0, len(items)) + for _, item := range items { + m, ok := item.(map[string]any) + if !ok { + continue + } + + trimmed := make(map[string]any, len(fields)) + for _, f := range fields { + if v, exists := m[f]; exists { + trimmed[f] = v + } + } + + if len(trimmed) > 0 { + result = append(result, trimmed) + } + } + + return result +} + +// isURLKey matches "url", "*_url", and their dot-prefixed variants. +func isURLKey(key string) bool { + base := key + if idx := strings.LastIndex(base, "."); idx >= 0 { + base = base[idx+1:] + } + return base == "url" || strings.HasSuffix(base, "_url") +} + +func isZeroValue(v any) bool { + switch val := v.(type) { + case nil: + return true + case string: + return val == "" + case bool: + return !val + case float64: + return val == 0 + default: + return false + } +} From f5291c44edd06bcf5fe57b19625f1150e6d8fd5a Mon Sep 17 00:00:00 2001 From: kerobbi Date: Mon, 16 Feb 2026 18:39:36 +0000 Subject: [PATCH 02/11] add tests --- pkg/response/optimize_test.go | 331 ++++++++++++++++++++++++++++++++++ 1 file changed, 331 insertions(+) create mode 100644 pkg/response/optimize_test.go diff --git a/pkg/response/optimize_test.go b/pkg/response/optimize_test.go new file mode 100644 index 000000000..d33912bc2 --- /dev/null +++ b/pkg/response/optimize_test.go @@ -0,0 +1,331 @@ +package response + +import ( + "encoding/json" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestFlatten(t *testing.T) { + tests := []struct { + name string + input map[string]any + expected map[string]any + }{ + { + name: "promotes primitive fields from nested map", + input: map[string]any{ + "title": "fix bug", + "user": map[string]any{ + "login": "user", + "id": float64(1), + }, + }, + expected: map[string]any{ + "title": "fix bug", + "user.login": "user", + "user.id": float64(1), + }, + }, + { + name: "discards non-primitive nested values", + input: map[string]any{ + "user": map[string]any{ + "login": "user", + "repos": []any{"repo1"}, + "org": map[string]any{"name": "org"}, + }, + }, + expected: map[string]any{ + "user.login": "user", + }, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + result := flatten(tc.input) + assert.Equal(t, tc.expected, result) + }) + } +} + +func TestTrimArrayFields(t *testing.T) { + original := collectionFieldExtractors + defer func() { collectionFieldExtractors = original }() + + collectionFieldExtractors = map[string][]string{ + "reviewers": {"login", "state"}, + } + + result := optimizeItem(map[string]any{ + "reviewers": []any{ + map[string]any{"login": "alice", "state": "approved", "id": float64(1)}, + map[string]any{"login": "bob", "state": "changes_requested", "id": float64(2)}, + }, + "title": "Fix bug", + }) + + expected := []any{ + map[string]any{"login": "alice", "state": "approved"}, + map[string]any{"login": "bob", "state": "changes_requested"}, + } + assert.Equal(t, expected, result["reviewers"]) + assert.Equal(t, "Fix bug", result["title"]) +} + +func TestFilterByFillRate(t *testing.T) { + items := []map[string]any{ + {"title": "a", "body": "text", "milestone": "v1"}, + {"title": "b", "body": "text"}, + {"title": "c", "body": "text"}, + {"title": "d", "body": "text"}, + {"title": "e", "body": "text"}, + {"title": "f", "body": "text"}, + {"title": "g", "body": "text"}, + {"title": "h", "body": "text"}, + {"title": "i", "body": "text"}, + {"title": "j", "body": "text"}, + } + + result := filterByFillRate(items, 0.1) + + for _, item := range result { + assert.Contains(t, item, "title") + assert.Contains(t, item, "body") + assert.NotContains(t, item, "milestone") + } +} + +func TestFilterByFillRate_PreservesFields(t *testing.T) { + original := preservedFields + defer func() { preservedFields = original }() + + preservedFields = map[string]bool{"html_url": true} + + items := make([]map[string]any, 10) + for i := range items { + items[i] = map[string]any{"title": "x"} + } + items[0]["html_url"] = "https://github.com/repo/1" + + result := filterByFillRate(items, 0.1) + assert.Contains(t, result[0], "html_url") +} + +func TestOptimizeItems(t *testing.T) { + tests := []struct { + name string + items []map[string]any + expected []map[string]any + }{ + { + name: "applies all strategies in sequence", + items: []map[string]any{ + { + "title": "Fix bug", + "body": "line1\n\nline2", + "url": "https://api.github.com/repos/1", + "html_url": "https://github.com/repo/1", + "avatar_url": "https://avatars.githubusercontent.com/1", + "draft": false, + "merged_at": nil, + "labels": []any{map[string]any{"name": "bug"}}, + "user": map[string]any{ + "login": "user", + "avatar_url": "https://avatars.githubusercontent.com/1", + }, + }, + }, + expected: []map[string]any{ + { + "title": "Fix bug", + "body": "line1 line2", + "html_url": "https://github.com/repo/1", + "labels": "bug", + "user.login": "user", + }, + }, + }, + { + name: "nil input", + items: nil, + expected: nil, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + result := OptimizeItems(tc.items) + assert.Equal(t, tc.expected, result) + }) + } +} + +func TestOptimizeItems_SkipsFillRateBelowMinRows(t *testing.T) { + items := []map[string]any{ + {"title": "a", "rare": "x"}, + {"title": "b"}, + } + + result := OptimizeItems(items) + assert.Equal(t, "x", result[0]["rare"]) +} + +func TestPreservedFields(t *testing.T) { + original := preservedFields + defer func() { preservedFields = original }() + + preservedFields = map[string]bool{ + "html_url": true, + "clone_url": true, + } + + result := optimizeItem(map[string]any{ + "html_url": "https://github.com/repo/1", + "clone_url": "https://github.com/repo/1.git", + "avatar_url": "https://avatars.githubusercontent.com/1", + "user.html_url": "https://github.com/user", + "user.clone_url": "https://github.com/user.git", + }) + + assert.Contains(t, result, "html_url") + assert.Contains(t, result, "clone_url") + assert.NotContains(t, result, "avatar_url") + assert.NotContains(t, result, "user.html_url") + assert.NotContains(t, result, "user.clone_url") +} + +func TestPreservedFields_ProtectsZeroValues(t *testing.T) { + original := preservedFields + defer func() { preservedFields = original }() + + preservedFields = map[string]bool{"draft": true} + + result := optimizeItem(map[string]any{ + "draft": false, + "body": "", + }) + + assert.Contains(t, result, "draft") + assert.NotContains(t, result, "body") +} + +func TestPreservedFields_ProtectsFromCollectionSummarization(t *testing.T) { + original := preservedFields + defer func() { preservedFields = original }() + + preservedFields = map[string]bool{"assignees": true} + + assignees := []any{ + map[string]any{"login": "alice", "id": float64(1)}, + map[string]any{"login": "bob", "id": float64(2)}, + } + + result := optimizeItem(map[string]any{ + "assignees": assignees, + "comments": []any{map[string]any{"id": "1"}, map[string]any{"id": "2"}}, + }) + + assert.Equal(t, assignees, result["assignees"]) + assert.Equal(t, "[2 items]", result["comments"]) +} + +func TestCollectionFieldExtractors_SurviveFillRate(t *testing.T) { + original := collectionFieldExtractors + defer func() { collectionFieldExtractors = original }() + + collectionFieldExtractors = map[string][]string{"labels": {"name"}} + + items := []map[string]any{ + {"title": "PR 1", "labels": "bug"}, + {"title": "PR 2"}, + {"title": "PR 3"}, + {"title": "PR 4"}, + } + + result := filterByFillRate(items, defaultFillRateThreshold) + + assert.Contains(t, result[0], "labels") +} + +func TestMarshalItems_PlainSlice(t *testing.T) { + type commit struct { + SHA string `json:"sha"` + Message string `json:"message"` + URL string `json:"url"` + } + + data := []commit{ + {SHA: "abc123", Message: "fix bug", URL: "https://api.github.com/commits/abc123"}, + {SHA: "def456", Message: "add feature", URL: "https://api.github.com/commits/def456"}, + {SHA: "ghi789", Message: "update docs", URL: "https://api.github.com/commits/ghi789"}, + } + + raw, err := MarshalItems(data) + require.NoError(t, err) + + var result []map[string]any + err = json.Unmarshal(raw, &result) + require.NoError(t, err) + + for _, item := range result { + assert.NotEmpty(t, item["sha"]) + assert.NotEmpty(t, item["message"]) + assert.Nil(t, item["url"]) + } +} + +func TestMarshalItems_WrappedObject(t *testing.T) { + data := map[string]any{ + "issues": []any{ + map[string]any{ + "title": "bug report", + "url": "https://api.github.com/issues/1", + "html_url": "https://github.com/issues/1", + "draft": false, + }, + map[string]any{ + "title": "feature request", + "url": "https://api.github.com/issues/2", + "html_url": "https://github.com/issues/2", + "draft": false, + }, + map[string]any{ + "title": "docs update", + "url": "https://api.github.com/issues/3", + "html_url": "https://github.com/issues/3", + "draft": false, + }, + }, + "totalCount": 100, + "pageInfo": map[string]any{ + "hasNextPage": true, + "endCursor": "abc123", + }, + } + + raw, err := MarshalItems(data) + require.NoError(t, err) + + var result map[string]any + err = json.Unmarshal(raw, &result) + require.NoError(t, err) + + assert.NotNil(t, result["totalCount"]) + assert.NotNil(t, result["pageInfo"]) + + issues, ok := result["issues"].([]any) + require.True(t, ok) + require.Len(t, issues, 3) + + for _, issue := range issues { + m := issue.(map[string]any) + assert.NotEmpty(t, m["title"]) + assert.NotEmpty(t, m["html_url"]) + assert.Nil(t, m["url"]) + assert.Nil(t, m["draft"]) + } +} From 5d28623af64146379f96032296451e4eeaab5fa6 Mon Sep 17 00:00:00 2001 From: kerobbi Date: Tue, 17 Feb 2026 00:57:05 +0000 Subject: [PATCH 03/11] write remaining list tools, make flatten recursive with per-tool depth available --- pkg/github/issues.go | 7 ++-- pkg/github/issues_test.go | 38 +++++++++---------- pkg/github/repositories.go | 7 ++-- pkg/github/repositories_test.go | 27 +++++++++----- pkg/response/optimize.go | 65 +++++++++++++++++++-------------- pkg/response/optimize_test.go | 30 ++++++++++++--- 6 files changed, 104 insertions(+), 70 deletions(-) diff --git a/pkg/github/issues.go b/pkg/github/issues.go index 83fd46c3c..591d2ebb4 100644 --- a/pkg/github/issues.go +++ b/pkg/github/issues.go @@ -13,6 +13,7 @@ import ( ghErrors "github.com/github/github-mcp-server/pkg/errors" "github.com/github/github-mcp-server/pkg/inventory" "github.com/github/github-mcp-server/pkg/octicons" + "github.com/github/github-mcp-server/pkg/response" "github.com/github/github-mcp-server/pkg/sanitize" "github.com/github/github-mcp-server/pkg/scopes" "github.com/github/github-mcp-server/pkg/translations" @@ -610,7 +611,7 @@ func ListIssueTypes(t translations.TranslationHelperFunc) inventory.ServerTool { return ghErrors.NewGitHubAPIStatusErrorResponse(ctx, "failed to list issue types", resp, body), nil, nil } - r, err := json.Marshal(issueTypes) + r, err := response.MarshalItems(issueTypes) if err != nil { return utils.NewToolResultErrorFromErr("failed to marshal issue types", err), nil, nil } @@ -1605,7 +1606,7 @@ func ListIssues(t translations.TranslationHelperFunc) inventory.ServerTool { } // Create response with issues - response := map[string]any{ + issueResponse := map[string]any{ "issues": issues, "pageInfo": map[string]any{ "hasNextPage": pageInfo.HasNextPage, @@ -1615,7 +1616,7 @@ func ListIssues(t translations.TranslationHelperFunc) inventory.ServerTool { }, "totalCount": totalCount, } - out, err := json.Marshal(response) + out, err := response.MarshalItems(issueResponse) if err != nil { return nil, nil, fmt.Errorf("failed to marshal issues: %w", err) } diff --git a/pkg/github/issues_test.go b/pkg/github/issues_test.go index 1eeec2246..301329d74 100644 --- a/pkg/github/issues_test.go +++ b/pkg/github/issues_test.go @@ -1188,7 +1188,6 @@ func Test_ListIssues(t *testing.T) { expectError bool errContains string expectedCount int - verifyOrder func(t *testing.T, issues []*github.Issue) }{ { name: "list all issues", @@ -1296,32 +1295,29 @@ func Test_ListIssues(t *testing.T) { } require.NoError(t, err) - // Parse the structured response with pagination info - var response struct { - Issues []*github.Issue `json:"issues"` - PageInfo struct { - HasNextPage bool `json:"hasNextPage"` - HasPreviousPage bool `json:"hasPreviousPage"` - StartCursor string `json:"startCursor"` - EndCursor string `json:"endCursor"` - } `json:"pageInfo"` - TotalCount int `json:"totalCount"` - } + // Parse the response + var response map[string]any err = json.Unmarshal([]byte(text), &response) require.NoError(t, err) - assert.Len(t, response.Issues, tc.expectedCount, "Expected %d issues, got %d", tc.expectedCount, len(response.Issues)) + // Metadata should be preserved + assert.NotNil(t, response["totalCount"]) + pageInfo, ok := response["pageInfo"].(map[string]any) + require.True(t, ok, "pageInfo should be a map") + assert.Contains(t, pageInfo, "hasNextPage") + assert.Contains(t, pageInfo, "endCursor") - // Verify order if verifyOrder function is provided - if tc.verifyOrder != nil { - tc.verifyOrder(t, response.Issues) - } + issues, ok := response["issues"].([]any) + require.True(t, ok) + assert.Len(t, issues, tc.expectedCount, "Expected %d issues, got %d", tc.expectedCount, len(issues)) // Verify that returned issues have expected structure - for _, issue := range response.Issues { - assert.NotNil(t, issue.Number, "Issue should have number") - assert.NotNil(t, issue.Title, "Issue should have title") - assert.NotNil(t, issue.State, "Issue should have state") + for _, issue := range issues { + m, ok := issue.(map[string]any) + require.True(t, ok) + assert.NotNil(t, m["number"], "Issue should have number") + assert.NotNil(t, m["title"], "Issue should have title") + assert.NotNil(t, m["state"], "Issue should have state") } }) } diff --git a/pkg/github/repositories.go b/pkg/github/repositories.go index 1af296882..80dd3c7a4 100644 --- a/pkg/github/repositories.go +++ b/pkg/github/repositories.go @@ -13,6 +13,7 @@ import ( ghErrors "github.com/github/github-mcp-server/pkg/errors" "github.com/github/github-mcp-server/pkg/inventory" "github.com/github/github-mcp-server/pkg/octicons" + "github.com/github/github-mcp-server/pkg/response" "github.com/github/github-mcp-server/pkg/scopes" "github.com/github/github-mcp-server/pkg/translations" "github.com/github/github-mcp-server/pkg/utils" @@ -216,7 +217,7 @@ func ListCommits(t translations.TranslationHelperFunc) inventory.ServerTool { minimalCommits[i] = convertToMinimalCommit(commit, false) } - r, err := json.Marshal(minimalCommits) + r, err := response.MarshalItems(minimalCommits, 3) if err != nil { return nil, nil, fmt.Errorf("failed to marshal response: %w", err) } @@ -1496,7 +1497,7 @@ func ListTags(t translations.TranslationHelperFunc) inventory.ServerTool { return ghErrors.NewGitHubAPIStatusErrorResponse(ctx, "failed to list tags", resp, body), nil, nil } - r, err := json.Marshal(tags) + r, err := response.MarshalItems(tags) if err != nil { return nil, nil, fmt.Errorf("failed to marshal response: %w", err) } @@ -1669,7 +1670,7 @@ func ListReleases(t translations.TranslationHelperFunc) inventory.ServerTool { return ghErrors.NewGitHubAPIStatusErrorResponse(ctx, "failed to list releases", resp, body), nil, nil } - r, err := json.Marshal(releases) + r, err := response.MarshalItems(releases) if err != nil { return nil, nil, fmt.Errorf("failed to marshal response: %w", err) } diff --git a/pkg/github/repositories_test.go b/pkg/github/repositories_test.go index 38e8f8938..d9b16fa0e 100644 --- a/pkg/github/repositories_test.go +++ b/pkg/github/repositories_test.go @@ -1053,23 +1053,30 @@ func Test_ListCommits(t *testing.T) { textContent := getTextResult(t, result) // Unmarshal and verify the result - var returnedCommits []MinimalCommit + var returnedCommits []map[string]any err = json.Unmarshal([]byte(textContent.Text), &returnedCommits) require.NoError(t, err) assert.Len(t, returnedCommits, len(tc.expectedCommits)) for i, commit := range returnedCommits { - assert.Equal(t, tc.expectedCommits[i].GetSHA(), commit.SHA) - assert.Equal(t, tc.expectedCommits[i].GetHTMLURL(), commit.HTMLURL) + assert.Equal(t, tc.expectedCommits[i].GetSHA(), commit["sha"]) + assert.Equal(t, tc.expectedCommits[i].GetHTMLURL(), commit["html_url"]) if tc.expectedCommits[i].Commit != nil { - assert.Equal(t, tc.expectedCommits[i].Commit.GetMessage(), commit.Commit.Message) + assert.Equal(t, tc.expectedCommits[i].Commit.GetMessage(), commit["commit.message"]) + if tc.expectedCommits[i].Commit.Author != nil { + assert.Equal(t, tc.expectedCommits[i].Commit.Author.GetName(), commit["commit.author.name"]) + assert.Equal(t, tc.expectedCommits[i].Commit.Author.GetEmail(), commit["commit.author.email"]) + if tc.expectedCommits[i].Commit.Author.Date != nil { + assert.NotEmpty(t, commit["commit.author.date"], "commit.author.date should be present") + } + } } if tc.expectedCommits[i].Author != nil { - assert.Equal(t, tc.expectedCommits[i].Author.GetLogin(), commit.Author.Login) + assert.Equal(t, tc.expectedCommits[i].Author.GetLogin(), commit["author.login"]) } // Files and stats are never included in list_commits - assert.Nil(t, commit.Files) - assert.Nil(t, commit.Stats) + assert.Nil(t, commit["files"]) + assert.Nil(t, commit["stats"]) } }) } @@ -2782,15 +2789,15 @@ func Test_ListTags(t *testing.T) { textContent := getTextResult(t, result) // Parse and verify the result - var returnedTags []*github.RepositoryTag + var returnedTags []map[string]any err = json.Unmarshal([]byte(textContent.Text), &returnedTags) require.NoError(t, err) // Verify each tag require.Equal(t, len(tc.expectedTags), len(returnedTags)) for i, expectedTag := range tc.expectedTags { - assert.Equal(t, *expectedTag.Name, *returnedTags[i].Name) - assert.Equal(t, *expectedTag.Commit.SHA, *returnedTags[i].Commit.SHA) + assert.Equal(t, *expectedTag.Name, returnedTags[i]["name"]) + assert.Equal(t, *expectedTag.Commit.SHA, returnedTags[i]["commit.sha"]) } }) } diff --git a/pkg/response/optimize.go b/pkg/response/optimize.go index 479a41840..c438df997 100644 --- a/pkg/response/optimize.go +++ b/pkg/response/optimize.go @@ -12,11 +12,16 @@ const defaultFillRateThreshold = 0.1 // minFillRateRows is the minimum number of items required to apply fill-rate filtering const minFillRateRows = 3 +// maxFlattenDepth is the maximum nesting depth that flatten will recurse into. +// Deeper nested maps are silently dropped. +const maxFlattenDepth = 2 + // preservedFields is a set of keys that are exempt from all destructive strategies except whitespace normalization. // Keys are matched against post-flatten map keys, so for nested fields like "user.html_url", the dotted key must be // added explicitly. Empty collections are still dropped. Wins over collectionFieldExtractors. var preservedFields = map[string]bool{ "html_url": true, + "draft": true, } // collectionFieldExtractors controls how array fields are handled instead of being summarized as "[N items]". @@ -32,7 +37,14 @@ var collectionFieldExtractors = map[string][]string{ // MarshalItems is the single entry point for response optimization. // Handles two shapes: plain JSON arrays and wrapped objects with metadata. -func MarshalItems(data any) ([]byte, error) { +// An optional maxDepth controls how many nesting levels flatten will recurse +// into; it defaults to maxFlattenDepth when omitted. +func MarshalItems(data any, maxDepth ...int) ([]byte, error) { + depth := maxFlattenDepth + if len(maxDepth) > 0 { + depth = maxDepth[0] + } + raw, err := json.Marshal(data) if err != nil { return nil, fmt.Errorf("failed to marshal data: %w", err) @@ -40,9 +52,9 @@ func MarshalItems(data any) ([]byte, error) { switch raw[0] { case '[': - return optimizeArray(raw) + return optimizeArray(raw, depth) case '{': - return optimizeObject(raw) + return optimizeObject(raw, depth) default: return raw, nil } @@ -51,13 +63,13 @@ func MarshalItems(data any) ([]byte, error) { // OptimizeItems runs the full optimization pipeline on a slice of items: // flatten, remove URLs, remove zero-values, normalize whitespace, // summarize collections, and fill-rate filtering. -func OptimizeItems(items []map[string]any) []map[string]any { +func OptimizeItems(items []map[string]any, depth int) []map[string]any { if len(items) == 0 { return items } for i, item := range items { - flattenedItem := flatten(item) + flattenedItem := flattenTo(item, depth) items[i] = optimizeItem(flattenedItem) } @@ -68,29 +80,26 @@ func OptimizeItems(items []map[string]any) []map[string]any { return items } -// flatten promotes values from one-level-deep nested maps into the parent -// using dot-notation keys ("user.login", "user.id"). Nested maps and arrays -// within those nested maps are dropped. -func flatten(item map[string]any) map[string]any { +// flattenTo recursively promotes values from nested maps into the parent +// using dot-notation keys ("user.login", "commit.author.date"). Arrays +// within nested maps are preserved at their dotted key position. +// Recursion stops at the given maxDepth; deeper nested maps are dropped. +func flattenTo(item map[string]any, maxDepth int) map[string]any { result := make(map[string]any, len(item)) - for key, value := range item { - nested, ok := value.(map[string]any) - if !ok { - result[key] = value - continue - } + flattenInto(item, "", result, 1, maxDepth) + return result +} - for nk, nv := range nested { - switch nv.(type) { - case map[string]any, []any: - // skip complex nested values - default: - result[key+"."+nk] = nv - } +// flattenInto is the recursive worker for flattenTo. +func flattenInto(item map[string]any, prefix string, result map[string]any, depth int, maxDepth int) { + for key, value := range item { + fullKey := prefix + key + if nested, ok := value.(map[string]any); ok && depth < maxDepth { + flattenInto(nested, fullKey+".", result, depth+1, maxDepth) + } else if !ok { + result[fullKey] = value } } - - return result } // filterByFillRate drops keys that appear on less than the threshold proportion of items. @@ -126,16 +135,16 @@ func filterByFillRate(items []map[string]any, threshold float64) []map[string]an } // optimizeArray is the entry point for optimizing a raw JSON array. -func optimizeArray(raw []byte) ([]byte, error) { +func optimizeArray(raw []byte, depth int) ([]byte, error) { var items []map[string]any if err := json.Unmarshal(raw, &items); err != nil { return nil, fmt.Errorf("failed to unmarshal JSON: %w", err) } - return json.Marshal(OptimizeItems(items)) + return json.Marshal(OptimizeItems(items, depth)) } // optimizeObject is the entry point for optimizing a raw JSON object. -func optimizeObject(raw []byte) ([]byte, error) { +func optimizeObject(raw []byte, depth int) ([]byte, error) { var wrapper map[string]any if err := json.Unmarshal(raw, &wrapper); err != nil { return nil, fmt.Errorf("failed to unmarshal JSON: %w", err) @@ -161,7 +170,7 @@ func optimizeObject(raw []byte) ([]byte, error) { items = append(items, m) } } - wrapper[dataKey] = OptimizeItems(items) + wrapper[dataKey] = OptimizeItems(items, depth) return json.Marshal(wrapper) } diff --git a/pkg/response/optimize_test.go b/pkg/response/optimize_test.go index d33912bc2..dfc59091d 100644 --- a/pkg/response/optimize_test.go +++ b/pkg/response/optimize_test.go @@ -30,7 +30,7 @@ func TestFlatten(t *testing.T) { }, }, { - name: "discards non-primitive nested values", + name: "drops nested maps at default depth", input: map[string]any{ "user": map[string]any{ "login": "user", @@ -40,16 +40,35 @@ func TestFlatten(t *testing.T) { }, expected: map[string]any{ "user.login": "user", + "user.repos": []any{"repo1"}, }, }, } for _, tc := range tests { t.Run(tc.name, func(t *testing.T) { - result := flatten(tc.input) + result := flattenTo(tc.input, maxFlattenDepth) assert.Equal(t, tc.expected, result) }) } + + t.Run("recurses deeper with custom depth", func(t *testing.T) { + input := map[string]any{ + "commit": map[string]any{ + "message": "fix bug", + "author": map[string]any{ + "name": "user", + "date": "2026-01-01", + }, + }, + } + result := flattenTo(input, 3) + assert.Equal(t, map[string]any{ + "commit.message": "fix bug", + "commit.author.name": "user", + "commit.author.date": "2026-01-01", + }, result) + }) } func TestTrimArrayFields(t *testing.T) { @@ -144,6 +163,7 @@ func TestOptimizeItems(t *testing.T) { "title": "Fix bug", "body": "line1 line2", "html_url": "https://github.com/repo/1", + "draft": false, "labels": "bug", "user.login": "user", }, @@ -158,7 +178,7 @@ func TestOptimizeItems(t *testing.T) { for _, tc := range tests { t.Run(tc.name, func(t *testing.T) { - result := OptimizeItems(tc.items) + result := OptimizeItems(tc.items, maxFlattenDepth) assert.Equal(t, tc.expected, result) }) } @@ -170,7 +190,7 @@ func TestOptimizeItems_SkipsFillRateBelowMinRows(t *testing.T) { {"title": "b"}, } - result := OptimizeItems(items) + result := OptimizeItems(items, maxFlattenDepth) assert.Equal(t, "x", result[0]["rare"]) } @@ -326,6 +346,6 @@ func TestMarshalItems_WrappedObject(t *testing.T) { assert.NotEmpty(t, m["title"]) assert.NotEmpty(t, m["html_url"]) assert.Nil(t, m["url"]) - assert.Nil(t, m["draft"]) + assert.Equal(t, false, m["draft"]) } } From 78591117c11c3b28e66c0899315891b760847765 Mon Sep 17 00:00:00 2001 From: kerobbi Date: Tue, 17 Feb 2026 10:39:25 +0000 Subject: [PATCH 04/11] wire list_branches --- pkg/github/repositories.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/github/repositories.go b/pkg/github/repositories.go index 80dd3c7a4..991299ca6 100644 --- a/pkg/github/repositories.go +++ b/pkg/github/repositories.go @@ -304,7 +304,7 @@ func ListBranches(t translations.TranslationHelperFunc) inventory.ServerTool { minimalBranches = append(minimalBranches, convertToMinimalBranch(branch)) } - r, err := json.Marshal(minimalBranches) + r, err := response.MarshalItems(minimalBranches) if err != nil { return nil, nil, fmt.Errorf("failed to marshal response: %w", err) } From 70deafb9ed28be360bf522b338ea087ce03a2d0f Mon Sep 17 00:00:00 2001 From: kerobbi Date: Wed, 18 Feb 2026 08:56:28 +0000 Subject: [PATCH 05/11] unwire list_branches and list_issue_types --- pkg/github/issues.go | 2 +- pkg/github/repositories.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/github/issues.go b/pkg/github/issues.go index d668a724f..ce3af4ea7 100644 --- a/pkg/github/issues.go +++ b/pkg/github/issues.go @@ -608,7 +608,7 @@ func ListIssueTypes(t translations.TranslationHelperFunc) inventory.ServerTool { return ghErrors.NewGitHubAPIStatusErrorResponse(ctx, "failed to list issue types", resp, body), nil, nil } - r, err := response.MarshalItems(issueTypes) + r, err := json.Marshal(issueTypes) if err != nil { return utils.NewToolResultErrorFromErr("failed to marshal issue types", err), nil, nil } diff --git a/pkg/github/repositories.go b/pkg/github/repositories.go index 991299ca6..80dd3c7a4 100644 --- a/pkg/github/repositories.go +++ b/pkg/github/repositories.go @@ -304,7 +304,7 @@ func ListBranches(t translations.TranslationHelperFunc) inventory.ServerTool { minimalBranches = append(minimalBranches, convertToMinimalBranch(branch)) } - r, err := response.MarshalItems(minimalBranches) + r, err := json.Marshal(minimalBranches) if err != nil { return nil, nil, fmt.Errorf("failed to marshal response: %w", err) } From 77695c7911c899c40a691a1d3cba2c14558c47fe Mon Sep 17 00:00:00 2001 From: kerobbi Date: Wed, 18 Feb 2026 08:56:54 +0000 Subject: [PATCH 06/11] add prerelease and requested_teams to configs --- pkg/response/optimize.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pkg/response/optimize.go b/pkg/response/optimize.go index c438df997..996b17d9a 100644 --- a/pkg/response/optimize.go +++ b/pkg/response/optimize.go @@ -22,6 +22,7 @@ const maxFlattenDepth = 2 var preservedFields = map[string]bool{ "html_url": true, "draft": true, + "prerelease": true, } // collectionFieldExtractors controls how array fields are handled instead of being summarized as "[N items]". @@ -33,6 +34,7 @@ var preservedFields = map[string]bool{ var collectionFieldExtractors = map[string][]string{ "labels": {"name"}, "requested_reviewers": {"login"}, + "requested_teams": {"name"}, } // MarshalItems is the single entry point for response optimization. From 92e6f447ea19fab6af294b0a5f4d417d6483e544 Mon Sep 17 00:00:00 2001 From: kerobbi Date: Wed, 18 Feb 2026 10:06:47 +0000 Subject: [PATCH 07/11] rewire list_branches --- pkg/github/repositories.go | 2 +- pkg/response/optimize.go | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pkg/github/repositories.go b/pkg/github/repositories.go index 80dd3c7a4..991299ca6 100644 --- a/pkg/github/repositories.go +++ b/pkg/github/repositories.go @@ -304,7 +304,7 @@ func ListBranches(t translations.TranslationHelperFunc) inventory.ServerTool { minimalBranches = append(minimalBranches, convertToMinimalBranch(branch)) } - r, err := json.Marshal(minimalBranches) + r, err := response.MarshalItems(minimalBranches) if err != nil { return nil, nil, fmt.Errorf("failed to marshal response: %w", err) } diff --git a/pkg/response/optimize.go b/pkg/response/optimize.go index 996b17d9a..b3846e2f1 100644 --- a/pkg/response/optimize.go +++ b/pkg/response/optimize.go @@ -20,8 +20,8 @@ const maxFlattenDepth = 2 // Keys are matched against post-flatten map keys, so for nested fields like "user.html_url", the dotted key must be // added explicitly. Empty collections are still dropped. Wins over collectionFieldExtractors. var preservedFields = map[string]bool{ - "html_url": true, - "draft": true, + "html_url": true, + "draft": true, "prerelease": true, } From 3a1d18d7cc3da1ccba1ad3be7380847e869a19cf Mon Sep 17 00:00:00 2001 From: kerobbi Date: Wed, 18 Feb 2026 11:53:50 +0000 Subject: [PATCH 08/11] improve comment --- pkg/response/optimize.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pkg/response/optimize.go b/pkg/response/optimize.go index b3846e2f1..35a9fc891 100644 --- a/pkg/response/optimize.go +++ b/pkg/response/optimize.go @@ -152,7 +152,8 @@ func optimizeObject(raw []byte, depth int) ([]byte, error) { return nil, fmt.Errorf("failed to unmarshal JSON: %w", err) } - // find the actual data array within the wrapper; rest is metadata to be preserved as is + // find the first array field in the wrapper (data); rest is metadata to be preserved as is + // assumes exactly one array field exists; if multiple are present, only the first will be optimized var dataKey string for key, value := range wrapper { if _, ok := value.([]any); ok { From c960f498da70f9e2c2dc3056b42e634c678561f0 Mon Sep 17 00:00:00 2001 From: kerobbi Date: Wed, 18 Feb 2026 17:04:45 +0000 Subject: [PATCH 09/11] simplify isUrlKey --- pkg/response/optimize.go | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/pkg/response/optimize.go b/pkg/response/optimize.go index 35a9fc891..824a1d4b2 100644 --- a/pkg/response/optimize.go +++ b/pkg/response/optimize.go @@ -274,11 +274,10 @@ func trimArrayFields(items []any, fields []string) []any { // isURLKey matches "url", "*_url", and their dot-prefixed variants. func isURLKey(key string) bool { - base := key - if idx := strings.LastIndex(base, "."); idx >= 0 { - base = base[idx+1:] + if idx := strings.LastIndex(key, "."); idx >= 0 { + key = key[idx+1:] } - return base == "url" || strings.HasSuffix(base, "_url") + return key == "url" || strings.HasSuffix(key, "_url") } func isZeroValue(v any) bool { From b6518873821cf4df4ae6b64dba06d1faa9b03c80 Mon Sep 17 00:00:00 2001 From: kerobbi Date: Thu, 19 Feb 2026 14:51:39 +0000 Subject: [PATCH 10/11] refactor MarshalItems with generics and per-tool config --- pkg/github/issues.go | 13 +++- pkg/github/pullrequests.go | 9 ++- pkg/github/repositories.go | 13 +++- pkg/response/optimize.go | 148 +++++++++++++------------------------ 4 files changed, 79 insertions(+), 104 deletions(-) diff --git a/pkg/github/issues.go b/pkg/github/issues.go index ce3af4ea7..aa8a9b775 100644 --- a/pkg/github/issues.go +++ b/pkg/github/issues.go @@ -1602,9 +1602,16 @@ func ListIssues(t translations.TranslationHelperFunc) inventory.ServerTool { totalCount = fragment.TotalCount } - // Create response with issues + optimizedIssues, err := response.OptimizeList(issues, + response.WithCollectionExtractors(map[string][]string{"labels": {"name"}}), + ) + if err != nil { + return nil, nil, fmt.Errorf("failed to optimize issues: %w", err) + } + + // Wrap optimized issues with pagination metadata issueResponse := map[string]any{ - "issues": issues, + "issues": json.RawMessage(optimizedIssues), "pageInfo": map[string]any{ "hasNextPage": pageInfo.HasNextPage, "hasPreviousPage": pageInfo.HasPreviousPage, @@ -1613,7 +1620,7 @@ func ListIssues(t translations.TranslationHelperFunc) inventory.ServerTool { }, "totalCount": totalCount, } - out, err := response.MarshalItems(issueResponse) + out, err := json.Marshal(issueResponse) if err != nil { return nil, nil, fmt.Errorf("failed to marshal issues: %w", err) } diff --git a/pkg/github/pullrequests.go b/pkg/github/pullrequests.go index 2abba072b..08518633d 100644 --- a/pkg/github/pullrequests.go +++ b/pkg/github/pullrequests.go @@ -1169,7 +1169,14 @@ func ListPullRequests(t translations.TranslationHelperFunc) inventory.ServerTool } } - r, err := response.MarshalItems(prs) + r, err := response.OptimizeList(prs, + response.WithPreservedFields(map[string]bool{"html_url": true, "draft": true}), + response.WithCollectionExtractors(map[string][]string{ + "labels": {"name"}, + "requested_reviewers": {"login"}, + "requested_teams": {"name"}, + }), + ) if err != nil { return utils.NewToolResultErrorFromErr("failed to marshal response", err), nil, nil } diff --git a/pkg/github/repositories.go b/pkg/github/repositories.go index 786da8963..5a3f33aab 100644 --- a/pkg/github/repositories.go +++ b/pkg/github/repositories.go @@ -217,7 +217,10 @@ func ListCommits(t translations.TranslationHelperFunc) inventory.ServerTool { minimalCommits[i] = convertToMinimalCommit(commit, false) } - r, err := response.MarshalItems(minimalCommits, 3) + r, err := response.OptimizeList(minimalCommits, + response.WithMaxDepth(3), + response.WithPreservedFields(map[string]bool{"html_url": true}), + ) if err != nil { return nil, nil, fmt.Errorf("failed to marshal response: %w", err) } @@ -304,7 +307,7 @@ func ListBranches(t translations.TranslationHelperFunc) inventory.ServerTool { minimalBranches = append(minimalBranches, convertToMinimalBranch(branch)) } - r, err := response.MarshalItems(minimalBranches) + r, err := response.OptimizeList(minimalBranches) if err != nil { return nil, nil, fmt.Errorf("failed to marshal response: %w", err) } @@ -1498,7 +1501,7 @@ func ListTags(t translations.TranslationHelperFunc) inventory.ServerTool { return ghErrors.NewGitHubAPIStatusErrorResponse(ctx, "failed to list tags", resp, body), nil, nil } - r, err := response.MarshalItems(tags) + r, err := response.OptimizeList(tags) if err != nil { return nil, nil, fmt.Errorf("failed to marshal response: %w", err) } @@ -1671,7 +1674,9 @@ func ListReleases(t translations.TranslationHelperFunc) inventory.ServerTool { return ghErrors.NewGitHubAPIStatusErrorResponse(ctx, "failed to list releases", resp, body), nil, nil } - r, err := response.MarshalItems(releases) + r, err := response.OptimizeList(releases, + response.WithPreservedFields(map[string]bool{"html_url": true, "prerelease": true}), + ) if err != nil { return nil, nil, fmt.Errorf("failed to marshal response: %w", err) } diff --git a/pkg/response/optimize.go b/pkg/response/optimize.go index 824a1d4b2..b66f4bc55 100644 --- a/pkg/response/optimize.go +++ b/pkg/response/optimize.go @@ -6,80 +6,78 @@ import ( "strings" ) -// defaultFillRateThreshold is the default proportion of items that must have a key for it to survive -const defaultFillRateThreshold = 0.1 +const ( + defaultFillRateThreshold = 0.1 // default proportion of items that must have a key for it to survive + minFillRateRows = 3 // minimum number of items required to apply fill-rate filtering + defaultMaxDepth = 2 // default nesting depth that flatten will recurse into +) + +// OptimizeListConfig controls the optimization pipeline behavior. +type OptimizeListConfig struct { + maxDepth int + preservedFields map[string]bool + collectionExtractors map[string][]string +} -// minFillRateRows is the minimum number of items required to apply fill-rate filtering -const minFillRateRows = 3 +type OptimizeListOption func(*OptimizeListConfig) -// maxFlattenDepth is the maximum nesting depth that flatten will recurse into. +// WithMaxDepth sets the maximum nesting depth for flattening. // Deeper nested maps are silently dropped. -const maxFlattenDepth = 2 +func WithMaxDepth(d int) OptimizeListOption { + return func(c *OptimizeListConfig) { + c.maxDepth = d + } +} -// preservedFields is a set of keys that are exempt from all destructive strategies except whitespace normalization. +// WithPreservedFields sets keys that are exempt from all destructive strategies except whitespace normalization. // Keys are matched against post-flatten map keys, so for nested fields like "user.html_url", the dotted key must be -// added explicitly. Empty collections are still dropped. Wins over collectionFieldExtractors. -var preservedFields = map[string]bool{ - "html_url": true, - "draft": true, - "prerelease": true, +// added explicitly. Empty collections are still dropped. Wins over collectionExtractors. +func WithPreservedFields(fields map[string]bool) OptimizeListOption { + return func(c *OptimizeListConfig) { + c.preservedFields = fields + } } -// collectionFieldExtractors controls how array fields are handled instead of being summarized as "[N items]". +// WithCollectionExtractors controls how array fields are handled instead of being summarized as "[N items]". // - 1 sub-field: comma-joined into a flat string ("bug, enhancement"). // - Multiple sub-fields: keep the array, but trim each element to only those fields. // // These are explicitly exempt from fill-rate filtering; if we asked for the extraction, it's likely important // to preserve the data even if only one item has it. -var collectionFieldExtractors = map[string][]string{ - "labels": {"name"}, - "requested_reviewers": {"login"}, - "requested_teams": {"name"}, +func WithCollectionExtractors(extractors map[string][]string) OptimizeListOption { + return func(c *OptimizeListConfig) { + c.collectionExtractors = extractors + } } -// MarshalItems is the single entry point for response optimization. -// Handles two shapes: plain JSON arrays and wrapped objects with metadata. -// An optional maxDepth controls how many nesting levels flatten will recurse -// into; it defaults to maxFlattenDepth when omitted. -func MarshalItems(data any, maxDepth ...int) ([]byte, error) { - depth := maxFlattenDepth - if len(maxDepth) > 0 { - depth = maxDepth[0] +// OptimizeList optimizes a list of items by applying flattening, URL removal, zero-value removal, +// whitespace normalization, collection summarization, and fill-rate filtering. +func OptimizeList[T any](items []T, opts ...OptimizeListOption) ([]byte, error) { + cfg := OptimizeListConfig{maxDepth: defaultMaxDepth} + for _, opt := range opts { + opt(&cfg) } - raw, err := json.Marshal(data) + raw, err := json.Marshal(items) if err != nil { return nil, fmt.Errorf("failed to marshal data: %w", err) } - switch raw[0] { - case '[': - return optimizeArray(raw, depth) - case '{': - return optimizeObject(raw, depth) - default: - return raw, nil + var maps []map[string]any + if err := json.Unmarshal(raw, &maps); err != nil { + return nil, fmt.Errorf("failed to unmarshal JSON: %w", err) } -} -// OptimizeItems runs the full optimization pipeline on a slice of items: -// flatten, remove URLs, remove zero-values, normalize whitespace, -// summarize collections, and fill-rate filtering. -func OptimizeItems(items []map[string]any, depth int) []map[string]any { - if len(items) == 0 { - return items + for i, item := range maps { + flattenedItem := flattenTo(item, cfg.maxDepth) + maps[i] = optimizeItem(flattenedItem, cfg) } - for i, item := range items { - flattenedItem := flattenTo(item, depth) - items[i] = optimizeItem(flattenedItem) + if len(maps) >= minFillRateRows { + maps = filterByFillRate(maps, defaultFillRateThreshold, cfg) } - if len(items) >= minFillRateRows { - items = filterByFillRate(items, defaultFillRateThreshold) - } - - return items + return json.Marshal(maps) } // flattenTo recursively promotes values from nested maps into the parent @@ -106,7 +104,7 @@ func flattenInto(item map[string]any, prefix string, result map[string]any, dept // filterByFillRate drops keys that appear on less than the threshold proportion of items. // Preserved fields and extractor keys always survive. -func filterByFillRate(items []map[string]any, threshold float64) []map[string]any { +func filterByFillRate(items []map[string]any, threshold float64, cfg OptimizeListConfig) []map[string]any { keyCounts := make(map[string]int) for _, item := range items { for key := range item { @@ -117,8 +115,8 @@ func filterByFillRate(items []map[string]any, threshold float64) []map[string]an minCount := int(threshold * float64(len(items))) keepKeys := make(map[string]bool, len(keyCounts)) for key, count := range keyCounts { - _, hasExtractor := collectionFieldExtractors[key] - if count > minCount || preservedFields[key] || hasExtractor { + _, hasExtractor := cfg.collectionExtractors[key] + if count > minCount || cfg.preservedFields[key] || hasExtractor { keepKeys[key] = true } } @@ -136,55 +134,13 @@ func filterByFillRate(items []map[string]any, threshold float64) []map[string]an return items } -// optimizeArray is the entry point for optimizing a raw JSON array. -func optimizeArray(raw []byte, depth int) ([]byte, error) { - var items []map[string]any - if err := json.Unmarshal(raw, &items); err != nil { - return nil, fmt.Errorf("failed to unmarshal JSON: %w", err) - } - return json.Marshal(OptimizeItems(items, depth)) -} - -// optimizeObject is the entry point for optimizing a raw JSON object. -func optimizeObject(raw []byte, depth int) ([]byte, error) { - var wrapper map[string]any - if err := json.Unmarshal(raw, &wrapper); err != nil { - return nil, fmt.Errorf("failed to unmarshal JSON: %w", err) - } - - // find the first array field in the wrapper (data); rest is metadata to be preserved as is - // assumes exactly one array field exists; if multiple are present, only the first will be optimized - var dataKey string - for key, value := range wrapper { - if _, ok := value.([]any); ok { - dataKey = key - break - } - } - // if no data array found, just return the original response - if dataKey == "" { - return raw, nil - } - - rawItems := wrapper[dataKey].([]any) - items := make([]map[string]any, 0, len(rawItems)) - for _, rawItem := range rawItems { - if m, ok := rawItem.(map[string]any); ok { - items = append(items, m) - } - } - wrapper[dataKey] = OptimizeItems(items, depth) - - return json.Marshal(wrapper) -} - // optimizeItem applies per-item strategies in a single pass: remove URLs, // remove zero-values, normalize whitespace, summarize collections. // Preserved fields skip everything except whitespace normalization. -func optimizeItem(item map[string]any) map[string]any { +func optimizeItem(item map[string]any, cfg OptimizeListConfig) map[string]any { result := make(map[string]any, len(item)) for key, value := range item { - preserved := preservedFields[key] + preserved := cfg.preservedFields[key] if !preserved && isURLKey(key) { continue } @@ -202,7 +158,7 @@ func optimizeItem(item map[string]any) map[string]any { if preserved { result[key] = value - } else if fields, ok := collectionFieldExtractors[key]; ok { + } else if fields, ok := cfg.collectionExtractors[key]; ok { if len(fields) == 1 { result[key] = extractSubField(v, fields[0]) } else { From c75ab7d50d400ac5049b4bf1f0cf82ca89ecd3c9 Mon Sep 17 00:00:00 2001 From: kerobbi Date: Thu, 19 Feb 2026 15:27:27 +0000 Subject: [PATCH 11/11] fix tests --- pkg/response/optimize.go | 2 +- pkg/response/optimize_test.go | 295 +++++++++++++--------------------- 2 files changed, 109 insertions(+), 188 deletions(-) diff --git a/pkg/response/optimize.go b/pkg/response/optimize.go index b66f4bc55..c962422aa 100644 --- a/pkg/response/optimize.go +++ b/pkg/response/optimize.go @@ -50,7 +50,7 @@ func WithCollectionExtractors(extractors map[string][]string) OptimizeListOption } } -// OptimizeList optimizes a list of items by applying flattening, URL removal, zero-value removal, +// OptimizeList optimizes a list of items by applying flattening, URL removal, zero-value removal, // whitespace normalization, collection summarization, and fill-rate filtering. func OptimizeList[T any](items []T, opts ...OptimizeListOption) ([]byte, error) { cfg := OptimizeListConfig{maxDepth: defaultMaxDepth} diff --git a/pkg/response/optimize_test.go b/pkg/response/optimize_test.go index dfc59091d..4412ef424 100644 --- a/pkg/response/optimize_test.go +++ b/pkg/response/optimize_test.go @@ -47,7 +47,7 @@ func TestFlatten(t *testing.T) { for _, tc := range tests { t.Run(tc.name, func(t *testing.T) { - result := flattenTo(tc.input, maxFlattenDepth) + result := flattenTo(tc.input, defaultMaxDepth) assert.Equal(t, tc.expected, result) }) } @@ -72,11 +72,10 @@ func TestFlatten(t *testing.T) { } func TestTrimArrayFields(t *testing.T) { - original := collectionFieldExtractors - defer func() { collectionFieldExtractors = original }() - - collectionFieldExtractors = map[string][]string{ - "reviewers": {"login", "state"}, + cfg := OptimizeListConfig{ + collectionExtractors: map[string][]string{ + "reviewers": {"login", "state"}, + }, } result := optimizeItem(map[string]any{ @@ -85,7 +84,7 @@ func TestTrimArrayFields(t *testing.T) { map[string]any{"login": "bob", "state": "changes_requested", "id": float64(2)}, }, "title": "Fix bug", - }) + }, cfg) expected := []any{ map[string]any{"login": "alice", "state": "approved"}, @@ -96,6 +95,8 @@ func TestTrimArrayFields(t *testing.T) { } func TestFilterByFillRate(t *testing.T) { + cfg := OptimizeListConfig{} + items := []map[string]any{ {"title": "a", "body": "text", "milestone": "v1"}, {"title": "b", "body": "text"}, @@ -109,7 +110,7 @@ func TestFilterByFillRate(t *testing.T) { {"title": "j", "body": "text"}, } - result := filterByFillRate(items, 0.1) + result := filterByFillRate(items, 0.1, cfg) for _, item := range result { assert.Contains(t, item, "title") @@ -119,10 +120,9 @@ func TestFilterByFillRate(t *testing.T) { } func TestFilterByFillRate_PreservesFields(t *testing.T) { - original := preservedFields - defer func() { preservedFields = original }() - - preservedFields = map[string]bool{"html_url": true} + cfg := OptimizeListConfig{ + preservedFields: map[string]bool{"html_url": true}, + } items := make([]map[string]any, 10) for i := range items { @@ -130,134 +130,134 @@ func TestFilterByFillRate_PreservesFields(t *testing.T) { } items[0]["html_url"] = "https://github.com/repo/1" - result := filterByFillRate(items, 0.1) + result := filterByFillRate(items, 0.1, cfg) assert.Contains(t, result[0], "html_url") } -func TestOptimizeItems(t *testing.T) { - tests := []struct { - name string - items []map[string]any - expected []map[string]any - }{ +func TestOptimizeList_AllStrategies(t *testing.T) { + items := []map[string]any{ { - name: "applies all strategies in sequence", - items: []map[string]any{ - { - "title": "Fix bug", - "body": "line1\n\nline2", - "url": "https://api.github.com/repos/1", - "html_url": "https://github.com/repo/1", - "avatar_url": "https://avatars.githubusercontent.com/1", - "draft": false, - "merged_at": nil, - "labels": []any{map[string]any{"name": "bug"}}, - "user": map[string]any{ - "login": "user", - "avatar_url": "https://avatars.githubusercontent.com/1", - }, - }, - }, - expected: []map[string]any{ - { - "title": "Fix bug", - "body": "line1 line2", - "html_url": "https://github.com/repo/1", - "draft": false, - "labels": "bug", - "user.login": "user", - }, + "title": "Fix bug", + "body": "line1\n\nline2", + "url": "https://api.github.com/repos/1", + "html_url": "https://github.com/repo/1", + "avatar_url": "https://avatars.githubusercontent.com/1", + "draft": false, + "merged_at": nil, + "labels": []any{map[string]any{"name": "bug"}}, + "user": map[string]any{ + "login": "user", + "avatar_url": "https://avatars.githubusercontent.com/1", }, }, - { - name: "nil input", - items: nil, - expected: nil, - }, } - for _, tc := range tests { - t.Run(tc.name, func(t *testing.T) { - result := OptimizeItems(tc.items, maxFlattenDepth) - assert.Equal(t, tc.expected, result) - }) - } + raw, err := OptimizeList(items, + WithPreservedFields(map[string]bool{"html_url": true, "draft": true}), + WithCollectionExtractors(map[string][]string{"labels": {"name"}}), + ) + require.NoError(t, err) + + var result []map[string]any + err = json.Unmarshal(raw, &result) + require.NoError(t, err) + require.Len(t, result, 1) + + assert.Equal(t, "Fix bug", result[0]["title"]) + assert.Equal(t, "line1 line2", result[0]["body"]) + assert.Equal(t, "https://github.com/repo/1", result[0]["html_url"]) + assert.Equal(t, false, result[0]["draft"]) + assert.Equal(t, "bug", result[0]["labels"]) + assert.Equal(t, "user", result[0]["user.login"]) + assert.Nil(t, result[0]["url"]) + assert.Nil(t, result[0]["avatar_url"]) + assert.Nil(t, result[0]["merged_at"]) } -func TestOptimizeItems_SkipsFillRateBelowMinRows(t *testing.T) { +func TestOptimizeList_NilInput(t *testing.T) { + raw, err := OptimizeList[map[string]any](nil) + require.NoError(t, err) + assert.Equal(t, "null", string(raw)) +} + +func TestOptimizeList_SkipsFillRateBelowMinRows(t *testing.T) { items := []map[string]any{ {"title": "a", "rare": "x"}, {"title": "b"}, } - result := OptimizeItems(items, maxFlattenDepth) + raw, err := OptimizeList(items) + require.NoError(t, err) + + var result []map[string]any + err = json.Unmarshal(raw, &result) + require.NoError(t, err) + assert.Equal(t, "x", result[0]["rare"]) } func TestPreservedFields(t *testing.T) { - original := preservedFields - defer func() { preservedFields = original }() - - preservedFields = map[string]bool{ - "html_url": true, - "clone_url": true, - } + t.Run("keeps preserved URL keys, strips non-preserved", func(t *testing.T) { + cfg := OptimizeListConfig{ + preservedFields: map[string]bool{ + "html_url": true, + "clone_url": true, + }, + } - result := optimizeItem(map[string]any{ - "html_url": "https://github.com/repo/1", - "clone_url": "https://github.com/repo/1.git", - "avatar_url": "https://avatars.githubusercontent.com/1", - "user.html_url": "https://github.com/user", - "user.clone_url": "https://github.com/user.git", + result := optimizeItem(map[string]any{ + "html_url": "https://github.com/repo/1", + "clone_url": "https://github.com/repo/1.git", + "avatar_url": "https://avatars.githubusercontent.com/1", + "user.html_url": "https://github.com/user", + "user.clone_url": "https://github.com/user.git", + }, cfg) + + assert.Contains(t, result, "html_url") + assert.Contains(t, result, "clone_url") + assert.NotContains(t, result, "avatar_url") + assert.NotContains(t, result, "user.html_url") + assert.NotContains(t, result, "user.clone_url") }) - assert.Contains(t, result, "html_url") - assert.Contains(t, result, "clone_url") - assert.NotContains(t, result, "avatar_url") - assert.NotContains(t, result, "user.html_url") - assert.NotContains(t, result, "user.clone_url") -} - -func TestPreservedFields_ProtectsZeroValues(t *testing.T) { - original := preservedFields - defer func() { preservedFields = original }() + t.Run("protects zero values", func(t *testing.T) { + cfg := OptimizeListConfig{ + preservedFields: map[string]bool{"draft": true}, + } - preservedFields = map[string]bool{"draft": true} + result := optimizeItem(map[string]any{ + "draft": false, + "body": "", + }, cfg) - result := optimizeItem(map[string]any{ - "draft": false, - "body": "", + assert.Contains(t, result, "draft") + assert.NotContains(t, result, "body") }) - assert.Contains(t, result, "draft") - assert.NotContains(t, result, "body") -} - -func TestPreservedFields_ProtectsFromCollectionSummarization(t *testing.T) { - original := preservedFields - defer func() { preservedFields = original }() + t.Run("protects from collection summarization", func(t *testing.T) { + cfg := OptimizeListConfig{ + preservedFields: map[string]bool{"assignees": true}, + } - preservedFields = map[string]bool{"assignees": true} + assignees := []any{ + map[string]any{"login": "alice", "id": float64(1)}, + map[string]any{"login": "bob", "id": float64(2)}, + } - assignees := []any{ - map[string]any{"login": "alice", "id": float64(1)}, - map[string]any{"login": "bob", "id": float64(2)}, - } + result := optimizeItem(map[string]any{ + "assignees": assignees, + "comments": []any{map[string]any{"id": "1"}, map[string]any{"id": "2"}}, + }, cfg) - result := optimizeItem(map[string]any{ - "assignees": assignees, - "comments": []any{map[string]any{"id": "1"}, map[string]any{"id": "2"}}, + assert.Equal(t, assignees, result["assignees"]) + assert.Equal(t, "[2 items]", result["comments"]) }) - - assert.Equal(t, assignees, result["assignees"]) - assert.Equal(t, "[2 items]", result["comments"]) } func TestCollectionFieldExtractors_SurviveFillRate(t *testing.T) { - original := collectionFieldExtractors - defer func() { collectionFieldExtractors = original }() - - collectionFieldExtractors = map[string][]string{"labels": {"name"}} + cfg := OptimizeListConfig{ + collectionExtractors: map[string][]string{"labels": {"name"}}, + } items := []map[string]any{ {"title": "PR 1", "labels": "bug"}, @@ -266,86 +266,7 @@ func TestCollectionFieldExtractors_SurviveFillRate(t *testing.T) { {"title": "PR 4"}, } - result := filterByFillRate(items, defaultFillRateThreshold) + result := filterByFillRate(items, defaultFillRateThreshold, cfg) assert.Contains(t, result[0], "labels") } - -func TestMarshalItems_PlainSlice(t *testing.T) { - type commit struct { - SHA string `json:"sha"` - Message string `json:"message"` - URL string `json:"url"` - } - - data := []commit{ - {SHA: "abc123", Message: "fix bug", URL: "https://api.github.com/commits/abc123"}, - {SHA: "def456", Message: "add feature", URL: "https://api.github.com/commits/def456"}, - {SHA: "ghi789", Message: "update docs", URL: "https://api.github.com/commits/ghi789"}, - } - - raw, err := MarshalItems(data) - require.NoError(t, err) - - var result []map[string]any - err = json.Unmarshal(raw, &result) - require.NoError(t, err) - - for _, item := range result { - assert.NotEmpty(t, item["sha"]) - assert.NotEmpty(t, item["message"]) - assert.Nil(t, item["url"]) - } -} - -func TestMarshalItems_WrappedObject(t *testing.T) { - data := map[string]any{ - "issues": []any{ - map[string]any{ - "title": "bug report", - "url": "https://api.github.com/issues/1", - "html_url": "https://github.com/issues/1", - "draft": false, - }, - map[string]any{ - "title": "feature request", - "url": "https://api.github.com/issues/2", - "html_url": "https://github.com/issues/2", - "draft": false, - }, - map[string]any{ - "title": "docs update", - "url": "https://api.github.com/issues/3", - "html_url": "https://github.com/issues/3", - "draft": false, - }, - }, - "totalCount": 100, - "pageInfo": map[string]any{ - "hasNextPage": true, - "endCursor": "abc123", - }, - } - - raw, err := MarshalItems(data) - require.NoError(t, err) - - var result map[string]any - err = json.Unmarshal(raw, &result) - require.NoError(t, err) - - assert.NotNil(t, result["totalCount"]) - assert.NotNil(t, result["pageInfo"]) - - issues, ok := result["issues"].([]any) - require.True(t, ok) - require.Len(t, issues, 3) - - for _, issue := range issues { - m := issue.(map[string]any) - assert.NotEmpty(t, m["title"]) - assert.NotEmpty(t, m["html_url"]) - assert.Nil(t, m["url"]) - assert.Equal(t, false, m["draft"]) - } -}