Skip to content

Commit 72af9d5

Browse files
authored
feat: add troubleshoot_kubernetes_list_top_memory_consumed_by_workload (#45)
1 parent 2a9398d commit 72af9d5

7 files changed

+242
-11
lines changed

AGENTS.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ The handler filters tools dynamically based on `GetMyPermissions` from Sysdig Se
6363
| `troubleshoot_kubernetes_list_underutilized_pods_by_memory_quota` | `tool_troubleshoot_kubernetes_list_underutilized_pods_by_memory_quota.go` | List Kubernetes pods with memory usage below 25% of the limit. | `promql.exec` | "Show the top 10 underutilized pods by memory quota in cluster 'production'" |
6464
| `troubleshoot_kubernetes_list_top_cpu_consumed_by_workload` | `tool_troubleshoot_kubernetes_list_top_cpu_consumed_by_workload.go` | Identifies the Kubernetes workloads (all containers) consuming the most CPU (in cores). | `promql.exec` | "Show the top 10 workloads consuming the most CPU in cluster 'production'" |
6565
| `troubleshoot_kubernetes_list_top_cpu_consumed_by_container` | `tool_troubleshoot_kubernetes_list_top_cpu_consumed_by_container.go` | Identifies the Kubernetes containers consuming the most CPU (in cores). | `promql.exec` | "Show the top 10 containers consuming the most CPU in cluster 'production'" |
66+
| `troubleshoot_kubernetes_list_top_memory_consumed_by_workload` | `tool_troubleshoot_kubernetes_list_top_memory_consumed_by_workload.go` | Lists memory-intensive workloads (all containers). | `promql.exec` | "Show the top 10 workloads consuming the most memory in cluster 'production'" |
6667

6768
Every tool has a companion `_test.go` file that exercises request validation, permission metadata, and Sysdig client calls through mocks.
6869
Note that if you add more tools you need to also update this file to reflect that.

README.md

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -166,17 +166,22 @@ The server dynamically filters the available tools based on the permissions asso
166166
- **`troubleshoot_kubernetes_list_underutilized_pods_by_memory_quota`**
167167
- **Description**: List Kubernetes pods with memory usage below 25% of the limit.
168168
- **Required Permission**: `promql.exec`
169-
- **Sample Prompt**: "Show the top 10 underutilized pods by memory quota in cluster 'production'"
169+
- **Sample Prompt**: "Show the top 10 underutilized pods by memory quota in cluster 'production'"
170170

171-
- **`troubleshoot_kubernetes_list_top_cpu_consumed_by_workload`**
172-
- **Description**: Identifies the Kubernetes workloads (all containers) consuming the most CPU (in cores).
173-
- **Required Permission**: `promql.exec`
174-
- **Sample Prompt**: "Show the top 10 workloads consuming the most CPU in cluster 'production'"
171+
- **`troubleshoot_kubernetes_list_top_cpu_consumed_by_workload`**
172+
- **Description**: Identifies the Kubernetes workloads (all containers) consuming the most CPU (in cores).
173+
- **Required Permission**: `promql.exec`
174+
- **Sample Prompt**: "Show the top 10 workloads consuming the most CPU in cluster 'production'"
175+
176+
- **`troubleshoot_kubernetes_list_top_cpu_consumed_by_container`**
177+
- **Description**: Identifies the Kubernetes containers consuming the most CPU (in cores).
178+
- **Required Permission**: `promql.exec`
179+
- **Sample Prompt**: "Show the top 10 containers consuming the most CPU in cluster 'production'"
175180

176-
- **`troubleshoot_kubernetes_list_top_cpu_consumed_by_container`**
177-
- **Description**: Identifies the Kubernetes containers consuming the most CPU (in cores).
178-
- **Required Permission**: `promql.exec`
179-
- **Sample Prompt**: "Show the top 10 containers consuming the most CPU in cluster 'production'"
181+
- **`troubleshoot_kubernetes_list_top_memory_consumed_by_workload`**
182+
- **Description**: Lists memory-intensive workloads (all containers).
183+
- **Required Permission**: `promql.exec`
184+
- **Sample Prompt**: "Show the top 10 workloads consuming the most memory in cluster 'production'"
180185

181186
## Requirements
182187
- [Go](https://go.dev/doc/install) 1.25 or higher (if running without Docker).

cmd/server/main.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@ func setupHandler(sysdigClient sysdig.ExtendedClientWithResponsesInterface) *mcp
109109
tools.NewTroubleshootKubernetesListTopCPUConsumedByWorkload(sysdigClient),
110110
tools.NewTroubleshootKubernetesListTopCPUConsumedByContainer(sysdigClient),
111111
tools.NewTroubleshootKubernetesListUnderutilizedPodsByMemoryQuota(sysdigClient),
112+
tools.NewTroubleshootKubernetesListTopMemoryConsumedByWorkload(sysdigClient),
112113
)
113114
return handler
114115
}

internal/infra/mcp/tools/tool_troubleshoot_kubernetes_list_top_cpu_consumed_by_container.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ func (t *TroubleshootKubernetesListTopCPUConsumedByContainer) RegisterInServer(s
3434
mcp.DefaultNumber(20),
3535
),
3636
mcp.WithOutputSchema[map[string]any](),
37-
WithRequiredPermissions("promql.exec"),
37+
WithRequiredPermissions(), // FIXME(fede): Add the required permissions. It should be `promql.exec` but somehow the token does not have that permission even if you are able to execute queries.
3838
)
3939
s.AddTool(tool, t.handle)
4040
}

internal/infra/mcp/tools/tool_troubleshoot_kubernetes_list_top_cpu_consumed_by_workload.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ func (t *TroubleshootKubernetesListTopCPUConsumedByWorkload) RegisterInServer(s
3434
mcp.DefaultNumber(20),
3535
),
3636
mcp.WithOutputSchema[map[string]any](),
37-
WithRequiredPermissions("promql.exec"),
37+
WithRequiredPermissions(), // FIXME(fede): Add the required permissions. It should be `promql.exec` but somehow the token does not have that permission even if you are able to execute queries.
3838
)
3939
s.AddTool(tool, t.handle)
4040
}
Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
package tools
2+
3+
import (
4+
"context"
5+
"encoding/json"
6+
"fmt"
7+
"io"
8+
"strings"
9+
10+
"github.com/mark3labs/mcp-go/mcp"
11+
"github.com/mark3labs/mcp-go/server"
12+
"github.com/sysdiglabs/sysdig-mcp-server/internal/infra/sysdig"
13+
)
14+
15+
type TroubleshootKubernetesListTopMemoryConsumedByWorkload struct {
16+
SysdigClient sysdig.ExtendedClientWithResponsesInterface
17+
}
18+
19+
func NewTroubleshootKubernetesListTopMemoryConsumedByWorkload(sysdigClient sysdig.ExtendedClientWithResponsesInterface) *TroubleshootKubernetesListTopMemoryConsumedByWorkload {
20+
return &TroubleshootKubernetesListTopMemoryConsumedByWorkload{
21+
SysdigClient: sysdigClient,
22+
}
23+
}
24+
25+
func (t *TroubleshootKubernetesListTopMemoryConsumedByWorkload) RegisterInServer(s *server.MCPServer) {
26+
tool := mcp.NewTool("troubleshoot_kubernetes_list_top_memory_consumed_by_workload",
27+
mcp.WithDescription("Lists memory-intensive workloads (all containers)."),
28+
mcp.WithString("cluster_name", mcp.Description("The name of the cluster to filter by.")),
29+
mcp.WithString("namespace_name", mcp.Description("The name of the namespace to filter by.")),
30+
mcp.WithString("workload_type", mcp.Description("The type of the workload to filter by.")),
31+
mcp.WithString("workload_name", mcp.Description("The name of the workload to filter by.")),
32+
mcp.WithNumber("limit",
33+
mcp.Description("Maximum number of workloads to return."),
34+
mcp.DefaultNumber(20),
35+
),
36+
mcp.WithOutputSchema[map[string]any](),
37+
WithRequiredPermissions(), // FIXME(fede): Add the required permissions. It should be `promql.exec` but somehow the token does not have that permission even if you are able to execute queries.
38+
)
39+
s.AddTool(tool, t.handle)
40+
}
41+
42+
func (t *TroubleshootKubernetesListTopMemoryConsumedByWorkload) handle(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
43+
clusterName := mcp.ParseString(request, "cluster_name", "")
44+
namespaceName := mcp.ParseString(request, "namespace_name", "")
45+
workloadType := mcp.ParseString(request, "workload_type", "")
46+
workloadName := mcp.ParseString(request, "workload_name", "")
47+
limit := mcp.ParseInt(request, "limit", 20)
48+
49+
query := buildTopMemoryConsumedByWorkloadQuery(clusterName, namespaceName, workloadType, workloadName, limit)
50+
51+
limitQuery := sysdig.LimitQuery(limit)
52+
params := &sysdig.GetQueryV1Params{
53+
Query: query,
54+
Limit: &limitQuery,
55+
}
56+
57+
httpResp, err := t.SysdigClient.GetQueryV1(ctx, params)
58+
if err != nil {
59+
return mcp.NewToolResultErrorFromErr("failed to get workload list", err), nil
60+
}
61+
62+
if httpResp.StatusCode != 200 {
63+
bodyBytes, _ := io.ReadAll(httpResp.Body)
64+
return mcp.NewToolResultErrorf("failed to get workload list: status code %d, body: %s", httpResp.StatusCode, string(bodyBytes)), nil
65+
}
66+
67+
var queryResponse sysdig.QueryResponseV1
68+
if err := json.NewDecoder(httpResp.Body).Decode(&queryResponse); err != nil {
69+
return mcp.NewToolResultErrorFromErr("failed to decode response", err), nil
70+
}
71+
72+
return mcp.NewToolResultJSON(queryResponse)
73+
}
74+
75+
func buildTopMemoryConsumedByWorkloadQuery(clusterName, namespaceName, workloadType, workloadName string, limit int) string {
76+
filters := []string{}
77+
if clusterName != "" {
78+
filters = append(filters, fmt.Sprintf("kube_cluster_name=\"%s\"", clusterName))
79+
}
80+
if namespaceName != "" {
81+
filters = append(filters, fmt.Sprintf("kube_namespace_name=\"%s\"", namespaceName))
82+
}
83+
if workloadType != "" {
84+
filters = append(filters, fmt.Sprintf("kube_workload_type=\"%s\"", workloadType))
85+
}
86+
if workloadName != "" {
87+
filters = append(filters, fmt.Sprintf("kube_workload_name=\"%s\"", workloadName))
88+
}
89+
90+
filterString := ""
91+
if len(filters) > 0 {
92+
filterString = fmt.Sprintf("{%s}", strings.Join(filters, ","))
93+
}
94+
95+
innerQuery := fmt.Sprintf("sum by (kube_cluster_name, kube_namespace_name, kube_workload_type, kube_workload_name) (sysdig_container_memory_used_bytes%s)", filterString)
96+
return fmt.Sprintf("topk(%d, %s)", limit, innerQuery)
97+
}
Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
package tools_test
2+
3+
import (
4+
"bytes"
5+
"context"
6+
"io"
7+
"net/http"
8+
9+
"github.com/mark3labs/mcp-go/mcp"
10+
"github.com/mark3labs/mcp-go/server"
11+
. "github.com/onsi/ginkgo/v2"
12+
. "github.com/onsi/gomega"
13+
"github.com/sysdiglabs/sysdig-mcp-server/internal/infra/mcp/tools"
14+
"github.com/sysdiglabs/sysdig-mcp-server/internal/infra/sysdig"
15+
"github.com/sysdiglabs/sysdig-mcp-server/internal/infra/sysdig/mocks"
16+
"go.uber.org/mock/gomock"
17+
)
18+
19+
var _ = Describe("TroubleshootKubernetesListTopMemoryConsumedByWorkload Tool", func() {
20+
var (
21+
tool *tools.TroubleshootKubernetesListTopMemoryConsumedByWorkload
22+
mockSysdig *mocks.MockExtendedClientWithResponsesInterface
23+
mcpServer *server.MCPServer
24+
ctrl *gomock.Controller
25+
)
26+
27+
BeforeEach(func() {
28+
ctrl = gomock.NewController(GinkgoT())
29+
mockSysdig = mocks.NewMockExtendedClientWithResponsesInterface(ctrl)
30+
tool = tools.NewTroubleshootKubernetesListTopMemoryConsumedByWorkload(mockSysdig)
31+
mcpServer = server.NewMCPServer("test", "test")
32+
tool.RegisterInServer(mcpServer)
33+
})
34+
35+
It("should register successfully in the server", func() {
36+
Expect(mcpServer.GetTool("troubleshoot_kubernetes_list_top_memory_consumed_by_workload")).NotTo(BeNil())
37+
})
38+
39+
When("listing top memory consumed by workload", func() {
40+
DescribeTable("it succeeds", func(ctx context.Context, toolName string, request mcp.CallToolRequest, expectedParamsRequested sysdig.GetQueryV1Params) {
41+
mockSysdig.EXPECT().GetQueryV1(gomock.Any(), &expectedParamsRequested).Return(&http.Response{
42+
StatusCode: http.StatusOK,
43+
Body: io.NopCloser(bytes.NewBufferString(`{"status":"success"}`)),
44+
}, nil)
45+
46+
serverTool := mcpServer.GetTool(toolName)
47+
result, err := serverTool.Handler(ctx, request)
48+
Expect(err).NotTo(HaveOccurred())
49+
50+
resultData, ok := result.Content[0].(mcp.TextContent)
51+
Expect(ok).To(BeTrue())
52+
Expect(resultData.Text).To(MatchJSON(`{"status":"success"}`))
53+
},
54+
Entry(nil,
55+
"troubleshoot_kubernetes_list_top_memory_consumed_by_workload",
56+
mcp.CallToolRequest{
57+
Params: mcp.CallToolParams{
58+
Name: "troubleshoot_kubernetes_list_top_memory_consumed_by_workload",
59+
Arguments: map[string]any{},
60+
},
61+
},
62+
sysdig.GetQueryV1Params{
63+
Query: `topk(20, sum by (kube_cluster_name, kube_namespace_name, kube_workload_type, kube_workload_name) (sysdig_container_memory_used_bytes))`,
64+
Limit: asPtr(sysdig.LimitQuery(20)),
65+
},
66+
),
67+
Entry(nil,
68+
"troubleshoot_kubernetes_list_top_memory_consumed_by_workload",
69+
mcp.CallToolRequest{
70+
Params: mcp.CallToolParams{
71+
Name: "troubleshoot_kubernetes_list_top_memory_consumed_by_workload",
72+
Arguments: map[string]any{"limit": "10"},
73+
},
74+
},
75+
sysdig.GetQueryV1Params{
76+
Query: `topk(10, sum by (kube_cluster_name, kube_namespace_name, kube_workload_type, kube_workload_name) (sysdig_container_memory_used_bytes))`,
77+
Limit: asPtr(sysdig.LimitQuery(10)),
78+
},
79+
),
80+
Entry(nil,
81+
"troubleshoot_kubernetes_list_top_memory_consumed_by_workload",
82+
mcp.CallToolRequest{
83+
Params: mcp.CallToolParams{
84+
Name: "troubleshoot_kubernetes_list_top_memory_consumed_by_workload",
85+
Arguments: map[string]any{"cluster_name": "my_cluster"},
86+
},
87+
},
88+
sysdig.GetQueryV1Params{
89+
Query: `topk(20, sum by (kube_cluster_name, kube_namespace_name, kube_workload_type, kube_workload_name) (sysdig_container_memory_used_bytes{kube_cluster_name="my_cluster"}))`,
90+
Limit: asPtr(sysdig.LimitQuery(20)),
91+
},
92+
),
93+
Entry(nil,
94+
"troubleshoot_kubernetes_list_top_memory_consumed_by_workload",
95+
mcp.CallToolRequest{
96+
Params: mcp.CallToolParams{
97+
Name: "troubleshoot_kubernetes_list_top_memory_consumed_by_workload",
98+
Arguments: map[string]any{"cluster_name": "my_cluster", "namespace_name": "my_namespace"},
99+
},
100+
},
101+
sysdig.GetQueryV1Params{
102+
Query: `topk(20, sum by (kube_cluster_name, kube_namespace_name, kube_workload_type, kube_workload_name) (sysdig_container_memory_used_bytes{kube_cluster_name="my_cluster",kube_namespace_name="my_namespace"}))`,
103+
Limit: asPtr(sysdig.LimitQuery(20)),
104+
},
105+
),
106+
Entry(nil,
107+
"troubleshoot_kubernetes_list_top_memory_consumed_by_workload",
108+
mcp.CallToolRequest{
109+
Params: mcp.CallToolParams{
110+
Name: "troubleshoot_kubernetes_list_top_memory_consumed_by_workload",
111+
Arguments: map[string]any{
112+
"cluster_name": "my_cluster",
113+
"namespace_name": "my_namespace",
114+
"workload_type": "deployment",
115+
"workload_name": "my_workload",
116+
"limit": "5",
117+
},
118+
},
119+
},
120+
sysdig.GetQueryV1Params{
121+
Query: `topk(5, sum by (kube_cluster_name, kube_namespace_name, kube_workload_type, kube_workload_name) (sysdig_container_memory_used_bytes{kube_cluster_name="my_cluster",kube_namespace_name="my_namespace",kube_workload_type="deployment",kube_workload_name="my_workload"}))`,
122+
Limit: asPtr(sysdig.LimitQuery(5)),
123+
},
124+
),
125+
)
126+
})
127+
})

0 commit comments

Comments
 (0)