diff --git a/bun.lock b/bun.lock index 66c4ab2..2ae43b1 100644 --- a/bun.lock +++ b/bun.lock @@ -1,6 +1,5 @@ { "lockfileVersion": 1, - "configVersion": 1, "workspaces": { "": { "name": "memorybench", @@ -8,9 +7,11 @@ "@ai-sdk/anthropic": "^2.0.56", "@ai-sdk/google": "^2.0.49", "@ai-sdk/openai": "^2.0.88", + "@anthropic-ai/tokenizer": "^0.0.4", "@getzep/zep-cloud": "^3.13.0", "ai": "^5.0.115", "drizzle-orm": "^0.45.1", + "js-tiktoken": "^1.0.21", "mem0ai": "^2.1.38", "supermemory": "^4.0.0", "zod": "^3.24.4", @@ -37,6 +38,8 @@ "@anthropic-ai/sdk": ["@anthropic-ai/sdk@0.40.1", "", { "dependencies": { "@types/node": "^18.11.18", "@types/node-fetch": "^2.6.4", "abort-controller": "^3.0.0", "agentkeepalive": "^4.2.1", "form-data-encoder": "1.7.2", "formdata-node": "^4.3.2", "node-fetch": "^2.6.7" } }, "sha512-DJMWm8lTEM9Lk/MSFL+V+ugF7jKOn0M2Ujvb5fN8r2nY14aHbGPZ1k6sgjL+tpJ3VuOGJNG+4R83jEpOuYPv8w=="], + "@anthropic-ai/tokenizer": ["@anthropic-ai/tokenizer@0.0.4", "", { "dependencies": { "@types/node": "^18.11.18", "tiktoken": "^1.0.10" } }, "sha512-EHRKbxlxlc8W4KCBEseByJ7YwyYCmgu9OyN59H9+IYIGPoKv8tXyQXinkeGDI+cI8Tiuz9wk2jZb/kK7AyvL7g=="], + "@babel/code-frame": ["@babel/code-frame@7.27.1", "", { "dependencies": { "@babel/helper-validator-identifier": "^7.27.1", "js-tokens": "^4.0.0", "picocolors": "^1.1.1" } }, "sha512-cjQ7ZlQ0Mv3b47hABuTevyTuYN4i+loJKGeV9flcCgIK37cCXRh+L1bd3iBHlynerhQ7BhCkn2BPbQUL+rGqFg=="], "@babel/helper-validator-identifier": ["@babel/helper-validator-identifier@7.28.5", "", {}, "sha512-qSs4ifwzKJSV39ucNjsvc6WVHs6b7S03sOh2OcHF9UHfVPqWWALUsNUVzhSBiItjRZoLHx7nIarVjqKVusUZ1Q=="], @@ -121,7 +124,7 @@ "@types/jest": ["@types/jest@29.5.14", "", { "dependencies": { "expect": "^29.0.0", "pretty-format": "^29.0.0" } }, "sha512-ZN+4sdnLUbo8EVvVc2ao0GFW6oVrQRPn4K2lglySj7APvSrgzxHiNNK99us4WDMi57xxA2yggblIAMNhXOotLQ=="], - "@types/node": ["@types/node@25.0.3", "", { "dependencies": { "undici-types": "~7.16.0" } }, "sha512-W609buLVRVmeW693xKfzHeIV6nJGGz98uCPfeXI1ELMLXVeKYZ9m15fAMSaUPBHYLGFsVRcMmSCksQOrZV9BYA=="], + "@types/node": ["@types/node@18.19.130", "", { "dependencies": { "undici-types": "~5.26.4" } }, "sha512-GRaXQx6jGfL8sKfaIDD6OupbIHBr9jv7Jnaml9tB7l4v068PAOXqfcujMMo5PhbIs6ggR1XODELqahT2R8v0fg=="], "@types/node-fetch": ["@types/node-fetch@2.6.13", "", { "dependencies": { "@types/node": "*", "form-data": "^4.0.4" } }, "sha512-QGpRVpzSaUs30JBSGPjOg4Uveu384erbHBoT1zeONvyCfwQxIkUshLAOqN/k9EjGviPRmWTTe6aH2qySWKTVSw=="], @@ -635,6 +638,8 @@ "tar-stream": ["tar-stream@2.2.0", "", { "dependencies": { "bl": "^4.0.3", "end-of-stream": "^1.4.1", "fs-constants": "^1.0.0", "inherits": "^2.0.3", "readable-stream": "^3.1.1" } }, "sha512-ujeqbceABgwMZxEJnk2HDY2DlnUZ+9oEcb1KzTVfYHio0UE6dG71n60d8D2I4qNvleWrrXpmjpt7vZeF1LnMZQ=="], + "tiktoken": ["tiktoken@1.0.22", "", {}, "sha512-PKvy1rVF1RibfF3JlXBSP0Jrcw2uq3yXdgcEXtKTYn3QJ/cBRBHDnrJ5jHky+MENZ6DIPwNUGWpkVx+7joCpNA=="], + "to-regex-range": ["to-regex-range@5.0.1", "", { "dependencies": { "is-number": "^7.0.0" } }, "sha512-65P7iz6X5yEr1cwcgvQxbbIw7Uk3gOy5dIdtZ4rDveLqhrdJP+Li/Hx6tyK0NEb+2GCyneCMJiGqrADCSNk8sQ=="], "tr46": ["tr46@0.0.3", "", {}, "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw=="], @@ -647,7 +652,7 @@ "undici": ["undici@5.28.5", "", { "dependencies": { "@fastify/busboy": "^2.0.0" } }, "sha512-zICwjrDrcrUE0pyyJc1I2QzBkLM8FINsgOrt6WjA+BgajVq9Nxu2PbFFXUrAggLfDXlZGZBVZYw7WNV5KiBiBA=="], - "undici-types": ["undici-types@7.16.0", "", {}, "sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw=="], + "undici-types": ["undici-types@5.26.5", "", {}, "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA=="], "unique-filename": ["unique-filename@1.1.1", "", { "dependencies": { "unique-slug": "^2.0.0" } }, "sha512-Vmp0jIp2ln35UTXuryvjzkjGdRyf9b2lTXuSYUiPmzRcl3FDtYqAwOnTJkAngD9SWhnoJzDbTKwaOrZ+STtxNQ=="], @@ -685,22 +690,32 @@ "zod-to-json-schema": ["zod-to-json-schema@3.25.0", "", { "peerDependencies": { "zod": "^3.25 || ^4" } }, "sha512-HvWtU2UG41LALjajJrML6uQejQhNJx+JBO9IflpSja4R03iNWfKXrj6W2h7ljuLyc1nKS+9yDyL/9tD1U/yBnQ=="], - "@anthropic-ai/sdk/@types/node": ["@types/node@18.19.130", "", { "dependencies": { "undici-types": "~5.26.4" } }, "sha512-GRaXQx6jGfL8sKfaIDD6OupbIHBr9jv7Jnaml9tB7l4v068PAOXqfcujMMo5PhbIs6ggR1XODELqahT2R8v0fg=="], - "@isaacs/cliui/string-width": ["string-width@5.1.2", "", { "dependencies": { "eastasianwidth": "^0.2.0", "emoji-regex": "^9.2.2", "strip-ansi": "^7.0.1" } }, "sha512-HnLOCR3vjcY8beoNLtcjZ5/nxn2afmME6lhrDrebokqMap+XbeW8n9TXpPDOqdGK5qcI3oT0GKTW6wC7EMiVqA=="], "@isaacs/cliui/strip-ansi": ["strip-ansi@7.1.2", "", { "dependencies": { "ansi-regex": "^6.0.1" } }, "sha512-gmBGslpoQJtgnMAvOVqGZpEz9dyoKTCzy2nfz/n8aIFhN/jCE/rCmcxabB6jOOHV+0WNnylOxaxBQPSvcWklhA=="], + "@jest/types/@types/node": ["@types/node@25.0.3", "", { "dependencies": { "undici-types": "~7.16.0" } }, "sha512-W609buLVRVmeW693xKfzHeIV6nJGGz98uCPfeXI1ELMLXVeKYZ9m15fAMSaUPBHYLGFsVRcMmSCksQOrZV9BYA=="], + "@langchain/core/uuid": ["uuid@10.0.0", "", { "bin": { "uuid": "dist/bin/uuid" } }, "sha512-8XkAphELsDnEGrDxUOHB3RGvXz6TeuYSGEZBOjtTtPm2lwhGBjLgOzLHB63IUWfBpNucQjND6d3AOudO+H3RWQ=="], + "@types/better-sqlite3/@types/node": ["@types/node@25.0.3", "", { "dependencies": { "undici-types": "~7.16.0" } }, "sha512-W609buLVRVmeW693xKfzHeIV6nJGGz98uCPfeXI1ELMLXVeKYZ9m15fAMSaUPBHYLGFsVRcMmSCksQOrZV9BYA=="], + + "@types/node-fetch/@types/node": ["@types/node@25.0.3", "", { "dependencies": { "undici-types": "~7.16.0" } }, "sha512-W609buLVRVmeW693xKfzHeIV6nJGGz98uCPfeXI1ELMLXVeKYZ9m15fAMSaUPBHYLGFsVRcMmSCksQOrZV9BYA=="], + + "@types/pg/@types/node": ["@types/node@25.0.3", "", { "dependencies": { "undici-types": "~7.16.0" } }, "sha512-W609buLVRVmeW693xKfzHeIV6nJGGz98uCPfeXI1ELMLXVeKYZ9m15fAMSaUPBHYLGFsVRcMmSCksQOrZV9BYA=="], + + "@types/sqlite3/@types/node": ["@types/node@25.0.3", "", { "dependencies": { "undici-types": "~7.16.0" } }, "sha512-W609buLVRVmeW693xKfzHeIV6nJGGz98uCPfeXI1ELMLXVeKYZ9m15fAMSaUPBHYLGFsVRcMmSCksQOrZV9BYA=="], + + "@types/ws/@types/node": ["@types/node@25.0.3", "", { "dependencies": { "undici-types": "~7.16.0" } }, "sha512-W609buLVRVmeW693xKfzHeIV6nJGGz98uCPfeXI1ELMLXVeKYZ9m15fAMSaUPBHYLGFsVRcMmSCksQOrZV9BYA=="], + "bl/buffer": ["buffer@5.7.1", "", { "dependencies": { "base64-js": "^1.3.1", "ieee754": "^1.1.13" } }, "sha512-EHcyIPBQ4BSGlvjB16k5KgAJ27CIsHY/2JBmCRReo48y9rQ3MaUzWX3KVlBa4U7MyX02HdVj0K7C3WaB3ju7FQ=="], + "bun-types/@types/node": ["@types/node@25.0.3", "", { "dependencies": { "undici-types": "~7.16.0" } }, "sha512-W609buLVRVmeW693xKfzHeIV6nJGGz98uCPfeXI1ELMLXVeKYZ9m15fAMSaUPBHYLGFsVRcMmSCksQOrZV9BYA=="], + "cacache/minipass": ["minipass@3.3.6", "", { "dependencies": { "yallist": "^4.0.0" } }, "sha512-DxiNidxSEK+tHG6zOIklvNOwm3hvCrbUrdtzY74U6HKTJxvIDfOUL5W5P2Ghd3DTkhhKPYGqeNUIh5qcM4YBfw=="], "chalk/ansi-styles": ["ansi-styles@4.3.0", "", { "dependencies": { "color-convert": "^2.0.1" } }, "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg=="], - "cloudflare/@types/node": ["@types/node@18.19.130", "", { "dependencies": { "undici-types": "~5.26.4" } }, "sha512-GRaXQx6jGfL8sKfaIDD6OupbIHBr9jv7Jnaml9tB7l4v068PAOXqfcujMMo5PhbIs6ggR1XODELqahT2R8v0fg=="], - "foreground-child/signal-exit": ["signal-exit@4.1.0", "", {}, "sha512-bzyZ1e88w9O1iNJbKnOlvYTrWPDl46O1bG0D3XInv+9tkPrxrN8jUUTiFlDkkmKWgn1M6CfIA13SuGqOa9Korw=="], "formdata-node/web-streams-polyfill": ["web-streams-polyfill@4.0.0-beta.3", "", {}, "sha512-QW95TCTaHmsYfHDybGMwO5IJIM93I/6vTRk+daHTWFPhwh+C8Cg7j7XyKrwrj8Ib6vYXe0ocYNrmzY4xAAN6ug=="], @@ -713,7 +728,7 @@ "gaxios/rimraf": ["rimraf@5.0.10", "", { "dependencies": { "glob": "^10.3.7" }, "bin": { "rimraf": "dist/esm/bin.mjs" } }, "sha512-l0OE8wL34P4nJH/H2ffoaniAokM2qSmrtXHmlpvYr5AVVX8msAyW0l8NVJFDxlSK4u3Uh/f41cQheDVdnYijwQ=="], - "groq-sdk/@types/node": ["@types/node@18.19.130", "", { "dependencies": { "undici-types": "~5.26.4" } }, "sha512-GRaXQx6jGfL8sKfaIDD6OupbIHBr9jv7Jnaml9tB7l4v068PAOXqfcujMMo5PhbIs6ggR1XODELqahT2R8v0fg=="], + "jest-util/@types/node": ["@types/node@25.0.3", "", { "dependencies": { "undici-types": "~7.16.0" } }, "sha512-W609buLVRVmeW693xKfzHeIV6nJGGz98uCPfeXI1ELMLXVeKYZ9m15fAMSaUPBHYLGFsVRcMmSCksQOrZV9BYA=="], "langsmith/uuid": ["uuid@10.0.0", "", { "bin": { "uuid": "dist/bin/uuid" } }, "sha512-8XkAphELsDnEGrDxUOHB3RGvXz6TeuYSGEZBOjtTtPm2lwhGBjLgOzLHB63IUWfBpNucQjND6d3AOudO+H3RWQ=="], @@ -731,8 +746,6 @@ "minizlib/minipass": ["minipass@3.3.6", "", { "dependencies": { "yallist": "^4.0.0" } }, "sha512-DxiNidxSEK+tHG6zOIklvNOwm3hvCrbUrdtzY74U6HKTJxvIDfOUL5W5P2Ghd3DTkhhKPYGqeNUIh5qcM4YBfw=="], - "openai/@types/node": ["@types/node@18.19.130", "", { "dependencies": { "undici-types": "~5.26.4" } }, "sha512-GRaXQx6jGfL8sKfaIDD6OupbIHBr9jv7Jnaml9tB7l4v068PAOXqfcujMMo5PhbIs6ggR1XODELqahT2R8v0fg=="], - "path-scurry/lru-cache": ["lru-cache@10.4.3", "", {}, "sha512-JNAzZcXrCt42VGLuYz0zfAzDfAvJWW6AfYlDBQyDV5DClI2m5sAmK+OIO7s59XfsRsWHp02jAJrRadPRGTt6SQ=="], "path-scurry/minipass": ["minipass@7.1.2", "", {}, "sha512-qOOzS1cBTWYF4BH8fVePDBOO9iptMnGUEZwNc/cMWnTV2nVLZ7VoNWEPHkYczZA0pdoA7dl6e7FL659nX9S2aw=="], @@ -753,21 +766,29 @@ "wrap-ansi-cjs/ansi-styles": ["ansi-styles@4.3.0", "", { "dependencies": { "color-convert": "^2.0.1" } }, "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg=="], - "@anthropic-ai/sdk/@types/node/undici-types": ["undici-types@5.26.5", "", {}, "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA=="], - "@isaacs/cliui/string-width/emoji-regex": ["emoji-regex@9.2.2", "", {}, "sha512-L18DaJsXSUk2+42pv8mLs5jJT2hqFkFE4j21wOmgbUqsZ2hL72NsUU785g9RXgo3s0ZNgVl42TiHp3ZtOv/Vyg=="], "@isaacs/cliui/strip-ansi/ansi-regex": ["ansi-regex@6.2.2", "", {}, "sha512-Bq3SmSpyFHaWjPk8If9yc6svM8c56dB5BAtW4Qbw5jHTwwXXcTLoRMkpDJp6VL0XzlWaCHTXrkFURMYmD0sLqg=="], - "cloudflare/@types/node/undici-types": ["undici-types@5.26.5", "", {}, "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA=="], + "@jest/types/@types/node/undici-types": ["undici-types@7.16.0", "", {}, "sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw=="], + + "@types/better-sqlite3/@types/node/undici-types": ["undici-types@7.16.0", "", {}, "sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw=="], + + "@types/node-fetch/@types/node/undici-types": ["undici-types@7.16.0", "", {}, "sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw=="], + + "@types/pg/@types/node/undici-types": ["undici-types@7.16.0", "", {}, "sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw=="], + + "@types/sqlite3/@types/node/undici-types": ["undici-types@7.16.0", "", {}, "sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw=="], + + "@types/ws/@types/node/undici-types": ["undici-types@7.16.0", "", {}, "sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw=="], + + "bun-types/@types/node/undici-types": ["undici-types@7.16.0", "", {}, "sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw=="], "gaxios/https-proxy-agent/agent-base": ["agent-base@7.1.4", "", {}, "sha512-MnA+YT8fwfJPgBx3m60MNqakm30XOkyIoH1y6huTQvC0PwZG7ki8NacLBcrPbNoo8vEZy7Jpuk7+jMO+CUovTQ=="], "gaxios/rimraf/glob": ["glob@10.5.0", "", { "dependencies": { "foreground-child": "^3.1.0", "jackspeak": "^3.1.2", "minimatch": "^9.0.4", "minipass": "^7.1.2", "package-json-from-dist": "^1.0.0", "path-scurry": "^1.11.1" }, "bin": { "glob": "dist/esm/bin.mjs" } }, "sha512-DfXN8DfhJ7NH3Oe7cFmu3NCu1wKbkReJ8TorzSAFbSKrlNaQSKfIzqYqVY8zlbs2NLBbWpRiU52GX2PbaBVNkg=="], - "groq-sdk/@types/node/undici-types": ["undici-types@5.26.5", "", {}, "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA=="], - - "openai/@types/node/undici-types": ["undici-types@5.26.5", "", {}, "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA=="], + "jest-util/@types/node/undici-types": ["undici-types@7.16.0", "", {}, "sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw=="], "pg/pg-types/postgres-array": ["postgres-array@2.0.0", "", {}, "sha512-VpZrUqU5A69eQyW2c5CA1jtLecCsN2U/bD6VilrFDWq5+5UIEVO7nazS3TEcHf1zuPYO/sqGvUvW62g86RXZuA=="], diff --git a/package.json b/package.json index 7337b01..7c3d10d 100644 --- a/package.json +++ b/package.json @@ -12,9 +12,11 @@ "@ai-sdk/anthropic": "^2.0.56", "@ai-sdk/google": "^2.0.49", "@ai-sdk/openai": "^2.0.88", + "@anthropic-ai/tokenizer": "^0.0.4", "@getzep/zep-cloud": "^3.13.0", "ai": "^5.0.115", "drizzle-orm": "^0.45.1", + "js-tiktoken": "^1.0.21", "mem0ai": "^2.1.38", "supermemory": "^4.0.0", "zod": "^3.24.4" diff --git a/src/orchestrator/phases/answer.ts b/src/orchestrator/phases/answer.ts index c8f7d77..8c1b540 100644 --- a/src/orchestrator/phases/answer.ts +++ b/src/orchestrator/phases/answer.ts @@ -14,6 +14,7 @@ import { buildDefaultAnswerPrompt } from "../../prompts/defaults" import { buildContextString } from "../../types/prompts" import { ConcurrentExecutor } from "../concurrent" import { resolveConcurrency } from "../../types/concurrency" +import { countTokens } from "../../utils/tokens" type LanguageModel = | ReturnType @@ -118,8 +119,16 @@ export async function runAnswerPhase( const context: unknown[] = searchData.results || [] const questionDate = checkpoint.questions[question.questionId]?.questionDate + // Build prompts to count tokens separately + const basePrompt = buildAnswerPrompt(question.question, [], questionDate, provider) + const contextStr = buildContextString(context) const prompt = buildAnswerPrompt(question.question, context, questionDate, provider) + // Count tokens separately for better analytics + const basePromptTokens = countTokens(basePrompt, modelConfig) + const contextTokens = countTokens(contextStr, modelConfig) + const promptTokens = countTokens(prompt, modelConfig) + const params: Record = { model: client(modelConfig.id), prompt, @@ -136,11 +145,18 @@ export async function runAnswerPhase( checkpointManager.updatePhase(checkpoint, question.questionId, "answer", { status: "completed", hypothesis: text.trim(), + promptTokens, + basePromptTokens, + contextTokens, completedAt: new Date().toISOString(), durationMs, }) - logger.progress(index + 1, total, `Answered ${question.questionId} (${durationMs}ms)`) + logger.progress( + index + 1, + total, + `Answered ${question.questionId} (${durationMs}ms, ${promptTokens} tokens: ${basePromptTokens} base + ${contextTokens} context)` + ) return { questionId: question.questionId, durationMs } } catch (e) { const error = e instanceof Error ? e.message : String(e) diff --git a/src/orchestrator/phases/report.ts b/src/orchestrator/phases/report.ts index f6cd15c..f27c3c8 100644 --- a/src/orchestrator/phases/report.ts +++ b/src/orchestrator/phases/report.ts @@ -9,6 +9,7 @@ import type { QuestionTypeStats, RetrievalMetrics, RetrievalAggregates, + TokenMetrics, } from "../../types/unified" import { logger } from "../../utils/logger" @@ -185,6 +186,43 @@ export function generateReport(benchmark: Benchmark, checkpoint: RunCheckpoint): const overallRetrieval = aggregateRetrievalMetrics(allRetrievalMetrics) + // Aggregate token metrics + let tokenMetrics: TokenMetrics | undefined + const allPromptTokens: number[] = [] + const allBasePromptTokens: number[] = [] + const allContextTokens: number[] = [] + + for (const question of questions) { + const qCheckpoint = checkpoint.questions[question.questionId] + if (!qCheckpoint) continue + + const answerPhase = qCheckpoint.phases.answer + if (answerPhase.status === "completed") { + if (answerPhase.promptTokens) allPromptTokens.push(answerPhase.promptTokens) + if (answerPhase.basePromptTokens) allBasePromptTokens.push(answerPhase.basePromptTokens) + if (answerPhase.contextTokens) allContextTokens.push(answerPhase.contextTokens) + } + } + + if (allPromptTokens.length > 0) { + const totalTokens = allPromptTokens.reduce((a, b) => a + b, 0) + const totalBasePromptTokens = allBasePromptTokens.reduce((a, b) => a + b, 0) + const totalContextTokens = allContextTokens.reduce((a, b) => a + b, 0) + + tokenMetrics = { + totalTokens, + basePromptTokens: totalBasePromptTokens, + contextTokens: totalContextTokens, + avgTokensPerQuestion: Math.round(totalTokens / allPromptTokens.length), + avgBasePromptTokens: allBasePromptTokens.length > 0 + ? Math.round(totalBasePromptTokens / allBasePromptTokens.length) + : 0, + avgContextTokens: allContextTokens.length > 0 + ? Math.round(totalContextTokens / allContextTokens.length) + : 0, + } + } + const totalQuestions = evaluations.length const correctCount = evaluations.filter((e) => e.score === 1).length const accuracy = totalQuestions > 0 ? correctCount / totalQuestions : 0 @@ -210,6 +248,7 @@ export function generateReport(benchmark: Benchmark, checkpoint: RunCheckpoint): evaluate: calculateLatencyStats(evaluateDurations), total: calculateLatencyStats(totalDurations), }, + tokens: tokenMetrics, retrieval: overallRetrieval, byQuestionType, questionTypeRegistry: benchmark.getQuestionTypes(), diff --git a/src/types/checkpoint.ts b/src/types/checkpoint.ts index 48c89e0..f8f1180 100644 --- a/src/types/checkpoint.ts +++ b/src/types/checkpoint.ts @@ -55,6 +55,9 @@ export interface SearchPhaseCheckpoint { export interface AnswerPhaseCheckpoint { status: PhaseStatus hypothesis?: string + promptTokens?: number + basePromptTokens?: number + contextTokens?: number startedAt?: string completedAt?: string durationMs?: number diff --git a/src/types/unified.ts b/src/types/unified.ts index a67defe..06e205b 100644 --- a/src/types/unified.ts +++ b/src/types/unified.ts @@ -91,6 +91,15 @@ export interface QuestionTypeStats { retrieval?: RetrievalAggregates } +export interface TokenMetrics { + totalTokens: number + basePromptTokens: number + contextTokens: number + avgTokensPerQuestion: number + avgBasePromptTokens: number + avgContextTokens: number +} + export interface BenchmarkResult { provider: string benchmark: string @@ -112,6 +121,7 @@ export interface BenchmarkResult { evaluate: LatencyStats total: LatencyStats } + tokens?: TokenMetrics retrieval?: RetrievalAggregates byQuestionType: Record questionTypeRegistry?: QuestionTypeRegistry diff --git a/src/utils/tokens.ts b/src/utils/tokens.ts new file mode 100644 index 0000000..383014d --- /dev/null +++ b/src/utils/tokens.ts @@ -0,0 +1,54 @@ +import { Tiktoken } from "js-tiktoken" +import cl100k_base from "js-tiktoken/ranks/cl100k_base" +import o200k_base from "js-tiktoken/ranks/o200k_base" +import { countTokens as countAnthropicTokens } from "@anthropic-ai/tokenizer" +import type { ModelConfig } from "./models" + +/** + * Count tokens in a text string based on the model being used + */ +export function countTokens(text: string, modelConfig: ModelConfig): number { + const provider = modelConfig.provider + + if (provider === "openai") { + return countOpenAITokens(text, modelConfig.id) + } else if (provider === "anthropic") { + return countAnthropicTokens(text) + } else if (provider === "google") { + // Google doesn't have a standard tokenizer for JS + // Use approximation: ~4 characters per token + return Math.ceil(text.length / 4) + } + + // Fallback approximation + return Math.ceil(text.length / 4) +} + +/** + * Count tokens for OpenAI models using tiktoken + */ +function countOpenAITokens(text: string, modelId: string): number { + // Determine which encoding to use based on model + // o200k_base is used for GPT-4o and newer models + // cl100k_base is used for GPT-4, GPT-3.5-turbo + try { + let encoding: Tiktoken + + if ( + modelId.includes("gpt-4o") || + modelId.includes("gpt-4.1") || + modelId.includes("gpt-5") + ) { + encoding = new Tiktoken(o200k_base) + } else { + // Default to cl100k_base for other GPT-4 models + encoding = new Tiktoken(cl100k_base) + } + + const tokens = encoding.encode(text) + return tokens.length + } catch (error) { + // Fallback to approximation if encoding fails + return Math.ceil(text.length / 4) + } +} diff --git a/ui/app/runs/[runId]/questions/[questionId]/page.tsx b/ui/app/runs/[runId]/questions/[questionId]/page.tsx index dae1dea..d3f7327 100644 --- a/ui/app/runs/[runId]/questions/[questionId]/page.tsx +++ b/ui/app/runs/[runId]/questions/[questionId]/page.tsx @@ -162,7 +162,14 @@ export default function QuestionDetailPage() { : "border-status-error/30 bg-status-error/5" )} > -

Model Answer

+
+

Model Answer

+ {question.phases?.answer?.promptTokens && ( + + {question.phases.answer.promptTokens.toLocaleString()} tokens + + )} +

-
- Model Answer +
+ Model Answer + {q.phases.answer.promptTokens && ( + + {q.phases.answer.promptTokens.toLocaleString()} tokens + + )}
{q.phases.answer.hypothesis || "—"}