diff --git a/package-lock.json b/package-lock.json index 86738bf..a25f469 100644 --- a/package-lock.json +++ b/package-lock.json @@ -16,7 +16,7 @@ "@libpg-query/parser": "^17.6.3", "@opentelemetry/api": "^1.9.0", "@pgsql/types": "^17.6.2", - "@query-doctor/core": "^0.10.4", + "@query-doctor/core": "^0.10.5", "async-sema": "^3.1.1", "capnweb": "^0.7.0", "dedent": "^1.7.1", @@ -1187,9 +1187,9 @@ "license": "BSD-3-Clause" }, "node_modules/@query-doctor/core": { - "version": "0.10.4", - "resolved": "https://registry.npmjs.org/@query-doctor/core/-/core-0.10.4.tgz", - "integrity": "sha512-n9a1hriF75ezcuMPW8ob8sYlGTlTftCzFfS4Na4mLkF4dlZqIrt+29tRRWd2mHEZoSL87H47sCsyg/XGqZpezQ==", + "version": "0.10.6", + "resolved": "https://registry.npmjs.org/@query-doctor/core/-/core-0.10.6.tgz", + "integrity": "sha512-Fr91NFDHUCfGrtEzg8sn6Tq0NNaZcX2cz5FZWFtFFQcdx0yjokRHrATbfAn6D9j8+yhckniOKHqtuKOAb0KO8Q==", "dependencies": { "@pgsql/types": "^17.6.2", "capnweb": "^0.7.0", diff --git a/package.json b/package.json index 0621f3e..0f70883 100644 --- a/package.json +++ b/package.json @@ -21,7 +21,7 @@ "@libpg-query/parser": "^17.6.3", "@opentelemetry/api": "^1.9.0", "@pgsql/types": "^17.6.2", - "@query-doctor/core": "^0.10.4", + "@query-doctor/core": "^0.10.5", "async-sema": "^3.1.1", "capnweb": "^0.7.0", "dedent": "^1.7.1", diff --git a/src/remote/query-loader.test.ts b/src/remote/query-loader.test.ts index 33edca1..048efc1 100644 --- a/src/remote/query-loader.test.ts +++ b/src/remote/query-loader.test.ts @@ -34,6 +34,7 @@ function createMockRecentQuery(query: string): RecentQuery { tags: [], nudges: [], hash: "test_hash" as QueryHash, + normalizedHash: "test_normalized_hash" as QueryHash, seenAt: Date.now(), optimization: { state: "waiting" }, withOptimization: function () { diff --git a/src/remote/query-optimizer.test.ts b/src/remote/query-optimizer.test.ts index a71d862..0edc079 100644 --- a/src/remote/query-optimizer.test.ts +++ b/src/remote/query-optimizer.test.ts @@ -143,6 +143,7 @@ test("controller syncs correctly", async () => { [], [], 0 as any, + 0 as any, 1, ), ]); diff --git a/src/sql/recent-query.test.ts b/src/sql/recent-query.test.ts index 2c87787..b24c656 100644 --- a/src/sql/recent-query.test.ts +++ b/src/sql/recent-query.test.ts @@ -20,6 +20,7 @@ function makeRawQuery(overrides?: Partial): RawRecentQuery { } const testHash = QueryHash.parse("test-hash"); +const testNormalizedHash = QueryHash.parse("test-normalized-hash"); // --- isSelectQuery --- @@ -137,7 +138,7 @@ test("isTargetlessSelectQuery returns false when table references exist", () => test("constructor sets derived boolean properties correctly for a SELECT on user tables", () => { const refs: TableReference[] = [{ table: "users", schema: "public" }]; - const rq = new RecentQuery(makeRawQuery(), refs, [], [], [], testHash, 1000); + const rq = new RecentQuery(makeRawQuery(), refs, [], [], [], testHash, testNormalizedHash, 1000); expect(rq.isSelectQuery).toBe(true); expect(rq.isSystemQuery).toBe(false); expect(rq.isIntrospection).toBe(false); @@ -145,7 +146,7 @@ test("constructor sets derived boolean properties correctly for a SELECT on user }); test("constructor sets isTargetlessSelectQuery=true for SELECT with no table refs", () => { - const rq = new RecentQuery(makeRawQuery(), [], [], [], [], testHash, 1000); + const rq = new RecentQuery(makeRawQuery(), [], [], [], [], testHash, testNormalizedHash, 1000); expect(rq.isSelectQuery).toBe(true); expect(rq.isTargetlessSelectQuery).toBe(true); }); @@ -158,6 +159,7 @@ test("constructor sets isTargetlessSelectQuery=false for non-SELECT even with em [], [], testHash, + testNormalizedHash, 1000, ); expect(rq.isSelectQuery).toBe(false); @@ -174,7 +176,7 @@ test("constructor copies all data fields from RawRecentQuery", () => { rows: "0", topLevel: false, }); - const rq = new RecentQuery(data, [], [], [], [], testHash, 1000); + const rq = new RecentQuery(data, [], [], [], [], testHash, testNormalizedHash, 1000); expect(rq.username).toBe("admin"); expect(rq.query).toBe("SELECT 1"); expect(rq.formattedQuery).toBe("SELECT\n 1"); @@ -189,7 +191,7 @@ test("constructor copies all data fields from RawRecentQuery", () => { // --- withOptimization --- test("withOptimization attaches optimization to the instance", () => { - const rq = new RecentQuery(makeRawQuery(), [], [], [], [], testHash, 1000); + const rq = new RecentQuery(makeRawQuery(), [], [], [], [], testHash, testNormalizedHash, 1000); const optimization = { plan: "mock plan" } as any; const optimized = rq.withOptimization(optimization); expect(optimized.optimization).toBe(optimization); @@ -201,7 +203,7 @@ test("withOptimization attaches optimization to the instance", () => { test("analyze produces a RecentQuery with formatted query and analysis", async () => { const data = makeRawQuery({ query: "SELECT id FROM users WHERE id = $1" }); - const rq = await RecentQuery.analyze(data, testHash, 2000); + const rq = await RecentQuery.analyze(data, testHash, testNormalizedHash, 2000); expect(rq).toBeInstanceOf(RecentQuery); expect(rq.hash).toBe(testHash); expect(rq.seenAt).toBe(2000); @@ -214,7 +216,7 @@ test("analyze produces a RecentQuery with formatted query and analysis", async ( test("analyze throws on unparseable SQL", async () => { const data = makeRawQuery({ query: "THIS IS NOT VALID SQL AT ALL !!!" }); await expect( - RecentQuery.analyze(data, testHash, 3000), + RecentQuery.analyze(data, testHash, testNormalizedHash, 3000), ).rejects.toThrow(); }); @@ -222,7 +224,7 @@ test("analyze throws on unparseable SQL", async () => { test("analyze sets isSelectQuery=true for SELECT", async () => { const data = makeRawQuery({ query: "SELECT * FROM users" }); - const rq = await RecentQuery.analyze(data, testHash, 1000); + const rq = await RecentQuery.analyze(data, testHash, testNormalizedHash, 1000); expect(rq.isSelectQuery).toBe(true); }); @@ -230,7 +232,7 @@ test("analyze sets isSelectQuery=true for CTE with SELECT", async () => { const data = makeRawQuery({ query: "WITH cte AS (SELECT id FROM users) SELECT * FROM cte", }); - const rq = await RecentQuery.analyze(data, testHash, 1000); + const rq = await RecentQuery.analyze(data, testHash, testNormalizedHash, 1000); expect(rq.isSelectQuery).toBe(true); }); @@ -239,7 +241,7 @@ test("analyze sets isSelectQuery=false for UPDATE even with SELECT subquery", as query: 'UPDATE "public"."jobs" SET "state" = $1 FROM (SELECT id FROM "public"."jobs" WHERE state = $2 LIMIT 10) AS s1 WHERE "jobs".id = s1.id', }); - const rq = await RecentQuery.analyze(data, testHash, 1000); + const rq = await RecentQuery.analyze(data, testHash, testNormalizedHash, 1000); expect(rq.isSelectQuery).toBe(false); }); @@ -247,7 +249,7 @@ test("analyze sets isSelectQuery=false for INSERT ... SELECT", async () => { const data = makeRawQuery({ query: "INSERT INTO archive SELECT * FROM users WHERE active = false", }); - const rq = await RecentQuery.analyze(data, testHash, 1000); + const rq = await RecentQuery.analyze(data, testHash, testNormalizedHash, 1000); expect(rq.isSelectQuery).toBe(false); }); @@ -256,7 +258,7 @@ test("analyze sets isSelectQuery=false for DELETE with EXISTS subquery", async ( query: "DELETE FROM users WHERE EXISTS (SELECT 1 FROM banned WHERE banned.user_id = users.id)", }); - const rq = await RecentQuery.analyze(data, testHash, 1000); + const rq = await RecentQuery.analyze(data, testHash, testNormalizedHash, 1000); expect(rq.isSelectQuery).toBe(false); }); @@ -267,7 +269,7 @@ test("analyze populates displayQuery for wide SELECTs", async () => { query: 'SELECT "u"."id", "u"."email", "u"."first_name", "u"."last_name", "u"."created_at", "u"."updated_at", "u"."stripe_customer_id" FROM "users" "u" WHERE "u"."id" = $1', }); - const rq = await RecentQuery.analyze(data, testHash, 1000); + const rq = await RecentQuery.analyze(data, testHash, testNormalizedHash, 1000); // Normalize whitespace because the analyzer prettier-formats the query // before compacting; the site applies the same normalization on render. const normalized = rq.displayQuery?.replace(/\s+/g, " ").trim(); @@ -278,7 +280,7 @@ test("analyze populates displayQuery for wide SELECTs", async () => { test("analyze leaves displayQuery undefined for narrow SELECTs", async () => { const data = makeRawQuery({ query: "SELECT id FROM users WHERE id = $1" }); - const rq = await RecentQuery.analyze(data, testHash, 1000); + const rq = await RecentQuery.analyze(data, testHash, testNormalizedHash, 1000); expect(rq.displayQuery).toBeUndefined(); }); @@ -287,7 +289,7 @@ test("analyze leaves displayQuery undefined for non-SELECTs", async () => { query: "INSERT INTO archive SELECT a, b, c, d, e, f, g, h FROM users WHERE active = false", }); - const rq = await RecentQuery.analyze(data, testHash, 1000); + const rq = await RecentQuery.analyze(data, testHash, testNormalizedHash, 1000); expect(rq.displayQuery).toBeUndefined(); }); @@ -296,14 +298,14 @@ test("analyze leaves displayQuery undefined for UNION", async () => { query: "SELECT a, b, c, d, e, f, g FROM t UNION SELECT a, b, c, d, e, f, g FROM u", }); - const rq = await RecentQuery.analyze(data, testHash, 1000); + const rq = await RecentQuery.analyze(data, testHash, testNormalizedHash, 1000); expect(rq.displayQuery).toBeUndefined(); }); test("analyze strips sqlcommenter tags from formattedQuery", async () => { const data = makeRawQuery({ query: "select 1 /*a='1',b='2'*/" }); - const rq = await RecentQuery.analyze(data, testHash, 1000); + const rq = await RecentQuery.analyze(data, testHash, testNormalizedHash, 1000); expect(rq.tags).toEqual([ { key: "a", value: "1" }, { key: "b", value: "2" }, diff --git a/src/sql/recent-query.ts b/src/sql/recent-query.ts index 66f8304..12d6681 100644 --- a/src/sql/recent-query.ts +++ b/src/sql/recent-query.ts @@ -49,6 +49,7 @@ export class RecentQuery { readonly tags: SQLCommenterTag[], readonly nudges: Nudge[], readonly hash: QueryHash, + readonly normalizedHash: QueryHash, readonly seenAt: number, analysisSkipped = false, statementType?: StatementType, @@ -99,6 +100,7 @@ export class RecentQuery { tags: this.tags, nudges: this.nudges, hash: this.hash, + normalizedHash: this.normalizedHash, seenAt: this.seenAt, optimization: this.optimization, })); @@ -114,6 +116,7 @@ export class RecentQuery { static async analyze( data: RawRecentQuery, hash: QueryHash, + normalizedHash: QueryHash, seenAt: number, ) { if (data.query.length > RecentQuery.MAX_ANALYZABLE_QUERY_SIZE) { @@ -124,6 +127,7 @@ export class RecentQuery { [], [], hash, + normalizedHash, seenAt, true, ); @@ -144,6 +148,7 @@ export class RecentQuery { analysis.tags, analysis.nudges, hash, + normalizedHash, seenAt, false, analysis.statementType, diff --git a/src/sync/seen-cache.ts b/src/sync/seen-cache.ts index 9be282d..c92adc0 100644 --- a/src/sync/seen-cache.ts +++ b/src/sync/seen-cache.ts @@ -1,6 +1,10 @@ -import type { Postgres } from "@query-doctor/core"; -import { QueryHash, RawRecentQuery, RecentQuery } from "../sql/recent-query.ts"; -import { fingerprint } from "@libpg-query/parser"; +import { normalizedFingerprint, type Postgres } from "@query-doctor/core"; +import { + QueryHash, + RawRecentQuery, + RecentQuery, +} from "../sql/recent-query.ts"; +import { fingerprint, parse } from "@libpg-query/parser"; import { Sema } from "async-sema"; import { log } from "../log.ts"; @@ -56,7 +60,15 @@ export class QueryCache { await sema.acquire(); try { const key = await this.store(rawQuery); - return await RecentQuery.analyze(rawQuery, key, this.getFirstSeen(key)); + const normalizedHash = QueryHash.parse( + await normalizedFingerprint(await parse(rawQuery.query), fingerprint), + ); + return await RecentQuery.analyze( + rawQuery, + key, + normalizedHash, + this.getFirstSeen(key), + ); } catch (error) { log.error(`Failed to analyze query ${rawQuery.query}`, "query-cache") console.error(error)