Skip to content

Commit f599b5f

Browse files
DA-1203 Modified Add Slow Running Queries tools (#74)
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
1 parent 5aa2c1a commit f599b5f

File tree

4 files changed

+293
-1
lines changed

4 files changed

+293
-1
lines changed

DOCKER.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,21 @@ Dockerfile: https://github.com/Couchbase-Ecosystem/mcp-server-couchbase/blob/mai
1919
- Upsert a document by ID to a specified scope and collection
2020
- Delete a document by ID from a specified scope and collection
2121
- Run a [SQL++ query](https://www.couchbase.com/sqlplusplus/) on a specified scope
22+
- Queries are automatically scoped to the specified bucket and scope, so use collection names directly (e.g., use `SELECT * FROM users` instead of `SELECT * FROM bucket.scope.users`)
2223
- There is an option in the MCP server, `CB_MCP_READ_ONLY_QUERY_MODE` that is set to true by default to disable running SQL++ queries that change the data or the underlying collection structure. Note that the documents can still be updated by ID.
2324
- Get the status of the MCP server
2425
- Check the cluster credentials by connecting to the cluster
2526
- List all indexes in the cluster with their definitions, with optional filtering by bucket, scope, collection and index name.
2627
- Get index recommendations from Couchbase Index Advisor for a given SQL++ query to optimize query performance
2728
- Get cluster health status and list of all running services
29+
- Query performance analysis tools using:
30+
- Get longest running queries by average service time
31+
- Get most frequently executed queries
32+
- Get queries with the largest response sizes
33+
- Get queries with the largest result counts
34+
- Get queries that use a primary index (potential performance concern)
35+
- Get queries that don't use a covering index
36+
- Get queries that are not selective (index scans return many more documents than final result)
2837

2938
## Usage
3039

README.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,14 @@ An [MCP](https://modelcontextprotocol.io/) server implementation of Couchbase th
2828
- List all indexes in the cluster with their definitions, with optional filtering by bucket, scope, collection and index name.
2929
- Get index recommendations from Couchbase Index Advisor for a given SQL++ query to optimize query performance
3030
- Get cluster health status and list of all running services
31+
- Query performance analysis tools:
32+
- Get longest running queries by average service time
33+
- Get most frequently executed queries
34+
- Get queries with the largest response sizes
35+
- Get queries with the largest result counts
36+
- Get queries that use a primary index (potential performance concern)
37+
- Get queries that don't use a covering index
38+
- Get queries that are not selective (index scans return many more documents than final result)
3139

3240
## Prerequisites
3341

src/tools/__init__.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,13 @@
1616

1717
# Query tools
1818
from .query import (
19+
get_longest_running_queries,
20+
get_most_frequent_queries,
21+
get_queries_not_selective,
22+
get_queries_not_using_covering_index,
23+
get_queries_using_primary_index,
24+
get_queries_with_large_result_count,
25+
get_queries_with_largest_response_sizes,
1926
get_schema_for_collection,
2027
run_sql_plus_plus_query,
2128
)
@@ -47,6 +54,13 @@
4754
get_index_advisor_recommendations,
4855
list_indexes,
4956
get_cluster_health_and_services,
57+
get_queries_not_selective,
58+
get_queries_not_using_covering_index,
59+
get_queries_using_primary_index,
60+
get_queries_with_large_result_count,
61+
get_queries_with_largest_response_sizes,
62+
get_longest_running_queries,
63+
get_most_frequent_queries,
5064
]
5165

5266
__all__ = [
@@ -65,6 +79,13 @@
6579
"get_index_advisor_recommendations",
6680
"list_indexes",
6781
"get_cluster_health_and_services",
82+
"get_queries_not_selective",
83+
"get_queries_not_using_covering_index",
84+
"get_queries_using_primary_index",
85+
"get_queries_with_large_result_count",
86+
"get_queries_with_largest_response_sizes",
87+
"get_longest_running_queries",
88+
"get_most_frequent_queries",
6889
# Convenience
6990
"ALL_TOOLS",
7091
]

src/tools/query.py

Lines changed: 255 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,6 @@ def run_sql_plus_plus_query(
8989
raise
9090

9191

92-
# Don't expose this function to the MCP server until we have a use case
9392
def run_cluster_query(ctx: Context, query: str, **kwargs: Any) -> list[dict[str, Any]]:
9493
"""Run a query on the cluster object and return the results as a list of JSON objects."""
9594

@@ -104,3 +103,258 @@ def run_cluster_query(ctx: Context, query: str, **kwargs: Any) -> list[dict[str,
104103
except Exception as e:
105104
logger.error(f"Error running query: {e}")
106105
raise
106+
107+
108+
def _run_query_tool_with_empty_message(
109+
ctx: Context,
110+
query: str,
111+
*,
112+
limit: int,
113+
empty_message: str,
114+
extra_payload: dict[str, Any] | None = None,
115+
**query_kwargs: Any,
116+
) -> list[dict[str, Any]]:
117+
"""Execute a cluster query with a consistent empty-result response."""
118+
results = run_cluster_query(ctx, query, limit=limit, **query_kwargs)
119+
120+
if results:
121+
return results
122+
123+
payload: dict[str, Any] = {"message": empty_message, "results": []}
124+
if extra_payload:
125+
payload.update(extra_payload)
126+
return [payload]
127+
128+
129+
def get_longest_running_queries(ctx: Context, limit: int = 10) -> list[dict[str, Any]]:
130+
"""Get the N longest running queries from the system:completed_requests catalog.
131+
132+
Args:
133+
limit: Number of queries to return (default: 10)
134+
135+
Returns:
136+
List of queries with their average service time and count
137+
"""
138+
query = """
139+
SELECT statement,
140+
DURATION_TO_STR(avgServiceTime) AS avgServiceTime,
141+
COUNT(1) AS queries
142+
FROM system:completed_requests
143+
WHERE UPPER(statement) NOT LIKE 'INFER %'
144+
AND UPPER(statement) NOT LIKE 'CREATE INDEX%'
145+
AND UPPER(statement) NOT LIKE 'CREATE PRIMARY INDEX%'
146+
AND UPPER(statement) NOT LIKE '% SYSTEM:%'
147+
GROUP BY statement
148+
LETTING avgServiceTime = AVG(STR_TO_DURATION(serviceTime))
149+
ORDER BY avgServiceTime DESC
150+
LIMIT $limit
151+
"""
152+
153+
return _run_query_tool_with_empty_message(
154+
ctx,
155+
query,
156+
limit=limit,
157+
empty_message=(
158+
"No completed queries were available to calculate longest running queries."
159+
),
160+
)
161+
162+
163+
def get_most_frequent_queries(ctx: Context, limit: int = 10) -> list[dict[str, Any]]:
164+
"""Get the N most frequent queries from the system:completed_requests catalog.
165+
166+
Args:
167+
limit: Number of queries to return (default: 10)
168+
169+
Returns:
170+
List of queries with their frequency count
171+
"""
172+
query = """
173+
SELECT statement,
174+
COUNT(1) AS queries
175+
FROM system:completed_requests
176+
WHERE UPPER(statement) NOT LIKE 'INFER %'
177+
AND UPPER(statement) NOT LIKE 'CREATE INDEX%'
178+
AND UPPER(statement) NOT LIKE 'CREATE PRIMARY INDEX%'
179+
AND UPPER(statement) NOT LIKE 'EXPLAIN %'
180+
AND UPPER(statement) NOT LIKE 'ADVISE %'
181+
AND UPPER(statement) NOT LIKE '% SYSTEM:%'
182+
GROUP BY statement
183+
LETTING queries = COUNT(1)
184+
ORDER BY queries DESC
185+
LIMIT $limit
186+
"""
187+
188+
return _run_query_tool_with_empty_message(
189+
ctx,
190+
query,
191+
limit=limit,
192+
empty_message=(
193+
"No completed queries were available to calculate most frequent queries."
194+
),
195+
)
196+
197+
198+
def get_queries_with_largest_response_sizes(
199+
ctx: Context, limit: int = 10
200+
) -> list[dict[str, Any]]:
201+
"""Get queries with the largest response sizes from the system:completed_requests catalog.
202+
203+
Args:
204+
limit: Number of queries to return (default: 10)
205+
206+
Returns:
207+
List of queries with their average result size in bytes, KB, and MB
208+
"""
209+
query = """
210+
SELECT statement,
211+
avgResultSize AS avgResultSizeBytes,
212+
(avgResultSize / 1000) AS avgResultSizeKB,
213+
(avgResultSize / 1000000) AS avgResultSizeMB,
214+
COUNT(1) AS queries
215+
FROM system:completed_requests
216+
WHERE UPPER(statement) NOT LIKE 'INFER %'
217+
AND UPPER(statement) NOT LIKE 'CREATE INDEX%'
218+
AND UPPER(statement) NOT LIKE 'CREATE PRIMARY INDEX%'
219+
AND UPPER(statement) NOT LIKE '% SYSTEM:%'
220+
GROUP BY statement
221+
LETTING avgResultSize = AVG(resultSize)
222+
ORDER BY avgResultSize DESC
223+
LIMIT $limit
224+
"""
225+
226+
return _run_query_tool_with_empty_message(
227+
ctx,
228+
query,
229+
limit=limit,
230+
empty_message=(
231+
"No completed queries were available to calculate response sizes."
232+
),
233+
)
234+
235+
236+
def get_queries_with_large_result_count(
237+
ctx: Context, limit: int = 10
238+
) -> list[dict[str, Any]]:
239+
"""Get queries with the largest result counts from the system:completed_requests catalog.
240+
241+
Args:
242+
limit: Number of queries to return (default: 10)
243+
244+
Returns:
245+
List of queries with their average result count
246+
"""
247+
query = """
248+
SELECT statement,
249+
avgResultCount,
250+
COUNT(1) AS queries
251+
FROM system:completed_requests
252+
WHERE UPPER(statement) NOT LIKE 'INFER %' AND
253+
UPPER(statement) NOT LIKE 'CREATE INDEX%' AND
254+
UPPER(statement) NOT LIKE 'CREATE PRIMARY INDEX%' AND
255+
UPPER(statement) NOT LIKE '% SYSTEM:%'
256+
GROUP BY statement
257+
LETTING avgResultCount = AVG(resultCount)
258+
ORDER BY avgResultCount DESC
259+
LIMIT $limit
260+
"""
261+
262+
return _run_query_tool_with_empty_message(
263+
ctx,
264+
query,
265+
limit=limit,
266+
empty_message=(
267+
"No completed queries were available to calculate result counts."
268+
),
269+
)
270+
271+
272+
def get_queries_using_primary_index(
273+
ctx: Context, limit: int = 10
274+
) -> list[dict[str, Any]]:
275+
"""Get queries that use a primary index from the system:completed_requests catalog.
276+
277+
Args:
278+
limit: Number of queries to return (default: 10)
279+
280+
Returns:
281+
List of queries that use primary indexes, ordered by result count
282+
"""
283+
query = """
284+
SELECT *
285+
FROM system:completed_requests
286+
WHERE phaseCounts.`primaryScan` IS NOT MISSING
287+
AND UPPER(statement) NOT LIKE '% SYSTEM:%'
288+
ORDER BY resultCount DESC
289+
LIMIT $limit
290+
"""
291+
292+
return _run_query_tool_with_empty_message(
293+
ctx,
294+
query,
295+
limit=limit,
296+
empty_message=(
297+
"No queries using the primary index were found in system:completed_requests."
298+
),
299+
)
300+
301+
302+
def get_queries_not_using_covering_index(
303+
ctx: Context, limit: int = 10
304+
) -> list[dict[str, Any]]:
305+
"""Get queries that don't use a covering index from the system:completed_requests catalog.
306+
307+
Args:
308+
limit: Number of queries to return (default: 10)
309+
310+
Returns:
311+
List of queries that perform index scans but also require fetches (not covering)
312+
"""
313+
query = """
314+
SELECT *
315+
FROM system:completed_requests
316+
WHERE phaseCounts.`indexScan` IS NOT MISSING
317+
AND phaseCounts.`fetch` IS NOT MISSING
318+
AND UPPER(statement) NOT LIKE '% SYSTEM:%'
319+
ORDER BY resultCount DESC
320+
LIMIT $limit
321+
"""
322+
323+
return _run_query_tool_with_empty_message(
324+
ctx,
325+
query,
326+
limit=limit,
327+
empty_message=(
328+
"No queries that require fetches after index scans were found "
329+
"in system:completed_requests."
330+
),
331+
)
332+
333+
334+
def get_queries_not_selective(ctx: Context, limit: int = 10) -> list[dict[str, Any]]:
335+
"""Get queries that are not very selective from the system:completed_requests catalog.
336+
337+
Args:
338+
limit: Number of queries to return (default: 10)
339+
340+
Returns:
341+
List of queries where index scans return significantly more documents than the final result
342+
"""
343+
query = """
344+
SELECT statement,
345+
AVG(phaseCounts.`indexScan` - resultCount) AS diff
346+
FROM system:completed_requests
347+
WHERE phaseCounts.`indexScan` > resultCount
348+
GROUP BY statement
349+
ORDER BY diff DESC
350+
LIMIT $limit
351+
"""
352+
353+
return _run_query_tool_with_empty_message(
354+
ctx,
355+
query,
356+
limit=limit,
357+
empty_message=(
358+
"No non-selective queries were found in system:completed_requests."
359+
),
360+
)

0 commit comments

Comments
 (0)