From 0ab159affab7f2c2dd300cf0c41776702cf154d7 Mon Sep 17 00:00:00 2001 From: kagura-agent Date: Thu, 23 Apr 2026 17:33:19 +0800 Subject: [PATCH] fix(memory): add quality gates to heuristic fact extractor (#84) - Add word count filter (5-150 words) to reject short Slack fragments and long 'thinking out loud' messages - Add truncation detection to reject messages ending mid-word - Add text-based deduplication within same session - Lower heuristic confidence from 0.8/0.9 to 0.4 (appropriate for pattern-based extraction without LLM validation) - Add 18 tests covering all quality gates and edge cases --- .../__tests__/consolidation-facts.test.ts | 254 ++++++++++++++++++ src/memory/__tests__/consolidation.test.ts | 4 +- src/memory/consolidation.ts | 42 ++- 3 files changed, 296 insertions(+), 4 deletions(-) create mode 100644 src/memory/__tests__/consolidation-facts.test.ts diff --git a/src/memory/__tests__/consolidation-facts.test.ts b/src/memory/__tests__/consolidation-facts.test.ts new file mode 100644 index 00000000..ce8e7199 --- /dev/null +++ b/src/memory/__tests__/consolidation-facts.test.ts @@ -0,0 +1,254 @@ +import { describe, expect, mock, test } from "bun:test"; +import { type SessionData, consolidateSession } from "../consolidation.ts"; +import type { MemorySystem } from "../system.ts"; + +function makeTestSessionData(overrides?: Partial): SessionData { + return { + sessionId: "sdk-session-1", + sessionKey: "cli:local", + userId: "user-1", + userMessages: [], + assistantMessages: [], + toolsUsed: [], + filesTracked: [], + startedAt: new Date(Date.now() - 300000).toISOString(), + endedAt: new Date().toISOString(), + costUsd: 0.01, + outcome: "success", + ...overrides, + }; +} + +function createMockMemory(): { + memory: MemorySystem; + storedFacts: Array>; +} { + const storedFacts: Array> = []; + + const memory = { + storeEpisode: mock(() => Promise.resolve("episode-id")), + storeFact: mock((fact: Record) => { + storedFacts.push(fact); + return Promise.resolve(fact.id as string); + }), + } as unknown as MemorySystem; + + return { memory, storedFacts }; +} + +describe("extractFactsFromSession quality gates", () => { + test("rejects messages with fewer than 5 words", async () => { + const { memory, storedFacts } = createMockMemory(); + const data = makeTestSessionData({ + userMessages: ["No way", "Actually no", "Wrong thing here", "No that is wrong"], + }); + + await consolidateSession(memory, data); + + expect(storedFacts.length).toBe(0); + }); + + test("accepts messages with exactly 5 words", async () => { + const { memory, storedFacts } = createMockMemory(); + const data = makeTestSessionData({ + userMessages: ["Actually that is wrong here."], + }); + + await consolidateSession(memory, data); + + expect(storedFacts.length).toBe(1); + expect(storedFacts[0].confidence).toBe(0.4); + }); + + test("rejects messages with more than 150 words", async () => { + const { memory, storedFacts } = createMockMemory(); + const words = Array(151).fill("word").join(" "); + const data = makeTestSessionData({ + userMessages: [`Actually ${words}.`], + }); + + await consolidateSession(memory, data); + + expect(storedFacts.length).toBe(0); + }); + + test("accepts messages with exactly 150 words", async () => { + const { memory, storedFacts } = createMockMemory(); + const words = Array(149).fill("word").join(" "); + const data = makeTestSessionData({ + userMessages: [`Actually ${words}.`], + }); + + await consolidateSession(memory, data); + + expect(storedFacts.length).toBe(1); + expect(storedFacts[0].confidence).toBe(0.4); + }); + + test("rejects messages that appear truncated (no sentence-ending punctuation)", async () => { + const { memory, storedFacts } = createMockMemory(); + const data = makeTestSessionData({ + userMessages: ["Actually I prefer using tabs instead of", "No that is wrong about the configuration"], + }); + + await consolidateSession(memory, data); + + expect(storedFacts.length).toBe(0); + }); + + test("accepts messages ending with period", async () => { + const { memory, storedFacts } = createMockMemory(); + const data = makeTestSessionData({ + userMessages: ["Actually I prefer using tabs instead of spaces."], + }); + + await consolidateSession(memory, data); + + expect(storedFacts.length).toBe(2); + }); + + test("accepts messages ending with exclamation mark", async () => { + const { memory, storedFacts } = createMockMemory(); + const data = makeTestSessionData({ + userMessages: ["No that is completely wrong!"], + }); + + await consolidateSession(memory, data); + + expect(storedFacts.length).toBe(1); + }); + + test("accepts messages ending with question mark", async () => { + const { memory, storedFacts } = createMockMemory(); + const data = makeTestSessionData({ + userMessages: ["Actually should we use tabs instead?"], + }); + + await consolidateSession(memory, data); + + expect(storedFacts.length).toBe(1); + }); + + test("accepts messages ending with semicolon", async () => { + const { memory, storedFacts } = createMockMemory(); + const data = makeTestSessionData({ + userMessages: ["I prefer using tabs for indentation;"], + }); + + await consolidateSession(memory, data); + + expect(storedFacts.length).toBe(1); + }); + + test("accepts messages ending with colon", async () => { + const { memory, storedFacts } = createMockMemory(); + const data = makeTestSessionData({ + userMessages: ["Always use these tools for development:"], + }); + + await consolidateSession(memory, data); + + expect(storedFacts.length).toBe(1); + }); + + test("correction facts have confidence 0.4", async () => { + const { memory, storedFacts } = createMockMemory(); + const data = makeTestSessionData({ + userMessages: ["Actually the port is five thousand."], + }); + + await consolidateSession(memory, data); + + expect(storedFacts.length).toBe(1); + expect(storedFacts[0].tags).toContain("correction"); + expect(storedFacts[0].confidence).toBe(0.4); + }); + + test("preference facts have confidence 0.4", async () => { + const { memory, storedFacts } = createMockMemory(); + const data = makeTestSessionData({ + userMessages: ["I prefer using tabs over spaces."], + }); + + await consolidateSession(memory, data); + + expect(storedFacts.length).toBe(1); + expect(storedFacts[0].tags).toContain("preference"); + expect(storedFacts[0].confidence).toBe(0.4); + }); + + test("deduplicates identical messages within same session", async () => { + const { memory, storedFacts } = createMockMemory(); + const data = makeTestSessionData({ + userMessages: ["I prefer using tabs over spaces.", "I prefer using tabs over spaces."], + }); + + await consolidateSession(memory, data); + + expect(storedFacts.length).toBe(1); + }); + + test("deduplicates messages differing only in whitespace", async () => { + const { memory, storedFacts } = createMockMemory(); + const data = makeTestSessionData({ + userMessages: ["I prefer using tabs over spaces.", "I prefer using tabs over spaces."], + }); + + await consolidateSession(memory, data); + + expect(storedFacts.length).toBe(1); + }); + + test("deduplicates messages differing only in case", async () => { + const { memory, storedFacts } = createMockMemory(); + const data = makeTestSessionData({ + userMessages: ["I prefer using tabs over spaces.", "I PREFER USING TABS OVER SPACES."], + }); + + await consolidateSession(memory, data); + + expect(storedFacts.length).toBe(1); + }); + + test("does not deduplicate genuinely different messages", async () => { + const { memory, storedFacts } = createMockMemory(); + const data = makeTestSessionData({ + userMessages: ["I prefer using tabs over spaces.", "I prefer using semicolons at line ends."], + }); + + await consolidateSession(memory, data); + + expect(storedFacts.length).toBe(2); + }); + + test("applies all quality gates together", async () => { + const { memory, storedFacts } = createMockMemory(); + const data = makeTestSessionData({ + userMessages: [ + "No way", + "Actually this is the correct approach.", + "I prefer tabs", + "Wrong that is incorrect configuration here", + Array(151).fill("word").join(" "), + "I prefer using semicolons at line ends.", + "I prefer using semicolons at line ends.", + ], + }); + + await consolidateSession(memory, data); + + expect(storedFacts.length).toBe(2); + expect(storedFacts.every((f) => f.confidence === 0.4)).toBe(true); + }); + + test("issue #84 regression: short Slack fragments are rejected", async () => { + const { memory, storedFacts } = createMockMemory(); + const data = makeTestSessionData({ + userMessages: ["No thanks."], + }); + + await consolidateSession(memory, data); + + expect(storedFacts.length).toBe(0); + }); +}); diff --git a/src/memory/__tests__/consolidation.test.ts b/src/memory/__tests__/consolidation.test.ts index ed088ef7..10d7acaf 100644 --- a/src/memory/__tests__/consolidation.test.ts +++ b/src/memory/__tests__/consolidation.test.ts @@ -99,7 +99,7 @@ describe("consolidateSession", () => { test("extracts correction facts from user messages", async () => { const { memory, storedFacts } = createMockMemory(); const data = makeTestSessionData({ - userMessages: ["Actually, the staging server is on port 3001 not 3000", "Deploy it now"], + userMessages: ["Actually, the staging server is on port 3001 not 3000.", "Deploy it now"], }); const result = await consolidateSession(memory, data); @@ -113,7 +113,7 @@ describe("consolidateSession", () => { test("extracts preference facts from user messages", async () => { const { memory, storedFacts } = createMockMemory(); const data = makeTestSessionData({ - userMessages: ["I prefer PRs over direct pushes", "Please always use feature branches"], + userMessages: ["I prefer PRs over direct pushes.", "Please always use feature branches."], }); const result = await consolidateSession(memory, data); diff --git a/src/memory/consolidation.ts b/src/memory/consolidation.ts index a6e4cc1e..a4d4d4a0 100644 --- a/src/memory/consolidation.ts +++ b/src/memory/consolidation.ts @@ -105,11 +105,27 @@ function calculateImportance(data: SessionData): number { function extractFactsFromSession(data: SessionData, episodeId: string): SemanticFact[] { const facts: SemanticFact[] = []; const now = new Date().toISOString(); + const seenNormalizedTexts = new Set(); for (const message of data.userMessages) { + const wordCount = countWords(message); + if (wordCount < 5 || wordCount > 150) { + continue; + } + + if (isTruncated(message)) { + continue; + } + + const normalizedText = normalizeFactText(message); + if (seenNormalizedTexts.has(normalizedText)) { + continue; + } + const lower = message.toLowerCase(); if (matchesCorrectionPattern(lower)) { + seenNormalizedTexts.add(normalizedText); facts.push({ id: crypto.randomUUID(), subject: "user_correction", @@ -117,7 +133,7 @@ function extractFactsFromSession(data: SessionData, episodeId: string): Semantic object: message.slice(0, 200), natural_language: message.slice(0, 300), source_episode_ids: [episodeId], - confidence: 0.8, + confidence: 0.4, valid_from: now, valid_until: null, version: 1, @@ -128,6 +144,7 @@ function extractFactsFromSession(data: SessionData, episodeId: string): Semantic } if (matchesPreferencePattern(lower)) { + seenNormalizedTexts.add(normalizedText); facts.push({ id: crypto.randomUUID(), subject: "user", @@ -135,7 +152,7 @@ function extractFactsFromSession(data: SessionData, episodeId: string): Semantic object: message.slice(0, 200), natural_language: message.slice(0, 300), source_episode_ids: [episodeId], - confidence: 0.9, + confidence: 0.4, valid_from: now, valid_until: null, version: 1, @@ -148,3 +165,24 @@ function extractFactsFromSession(data: SessionData, episodeId: string): Semantic return facts; } + +function countWords(text: string): number { + return text + .trim() + .split(/\s+/) + .filter((word) => word.length > 0).length; +} + +function isTruncated(text: string): boolean { + const trimmed = text.trim(); + if (trimmed.length === 0) { + return false; + } + const lastChar = trimmed[trimmed.length - 1]; + const endsWithPunctuation = /[.!?;:]/.test(lastChar); + return !endsWithPunctuation; +} + +function normalizeFactText(text: string): string { + return text.toLowerCase().replace(/\s+/g, " ").trim(); +}