Skip to content

Commit 24a3253

Browse files
Claudeclaude
andcommitted
Add local inference service with summarize command
Adds local GGUF model inference using llama.cpp via yzma for task summarization and branch name generation. Key components: - InferenceService: Handles model loading and text generation - ModelDownloader: Downloads and caches GGUF models from HuggingFace - LibraryDownloader: Auto-downloads llama.cpp libraries for current platform - summarize command: CLI interface for generating summaries - download command: Pre-download model and libraries - REST API endpoint: POST /v1/inference/summarize Critical fix: Must use addSpecial=true when tokenizing prompts for Gemma models to include BOS token - without this, the model produces incorrect outputs (was outputting examples from the prompt instead of actual summaries). 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
1 parent b8fb5b6 commit 24a3253

File tree

13 files changed

+1663
-4
lines changed

13 files changed

+1663
-4
lines changed

.gitignore

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,3 +64,9 @@ container/internal/assets/dist/*
6464
# Xcode user-specific files
6565
**/xcuserdata/
6666
xcode/build/
67+
68+
# Inference: llama.cpp libraries and GGUF models
69+
# Libraries are downloaded at build time, not committed
70+
container/models/lib/
71+
models/*.gguf
72+
*.gguf

container/.goreleaser.yml

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,17 @@ archives:
111111
# Copy the entire signed app bundle - keep the Catnip.app directory name
112112
- src: "dist/catnip-macos_darwin_amd64_v1/Catnip.app"
113113
dst: "Catnip.app"
114+
# Include llama.cpp libraries for local inference
115+
- src: "models/lib/darwin/amd64/build/bin/libllama.dylib"
116+
dst: "lib/libllama.dylib"
117+
- src: "models/lib/darwin/amd64/build/bin/libggml.dylib"
118+
dst: "lib/libggml.dylib"
119+
- src: "models/lib/darwin/amd64/build/bin/libggml-metal.dylib"
120+
dst: "lib/libggml-metal.dylib"
121+
- src: "models/lib/darwin/amd64/build/bin/libggml-base.dylib"
122+
dst: "lib/libggml-base.dylib"
123+
- src: "models/lib/darwin/amd64/build/bin/libggml-cpu.dylib"
124+
dst: "lib/libggml-cpu.dylib"
114125
# Documentation files
115126
- README.md
116127
- LICENSE
@@ -128,6 +139,17 @@ archives:
128139
# Copy the entire signed app bundle - keep the Catnip.app directory name
129140
- src: "dist/catnip-macos_darwin_arm64_v8.0/Catnip.app"
130141
dst: "Catnip.app"
142+
# Include llama.cpp libraries for local inference
143+
- src: "models/lib/darwin/arm64/build/bin/libllama.dylib"
144+
dst: "lib/libllama.dylib"
145+
- src: "models/lib/darwin/arm64/build/bin/libggml.dylib"
146+
dst: "lib/libggml.dylib"
147+
- src: "models/lib/darwin/arm64/build/bin/libggml-metal.dylib"
148+
dst: "lib/libggml-metal.dylib"
149+
- src: "models/lib/darwin/arm64/build/bin/libggml-base.dylib"
150+
dst: "lib/libggml-base.dylib"
151+
- src: "models/lib/darwin/arm64/build/bin/libggml-cpu.dylib"
152+
dst: "lib/libggml-cpu.dylib"
131153
# Documentation files
132154
- README.md
133155
- LICENSE

container/docs/docs.go

Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1315,6 +1315,78 @@ const docTemplate = `{
13151315
}
13161316
}
13171317
},
1318+
"/v1/inference/status": {
1319+
"get": {
1320+
"description": "Check if local inference is available and get service information",
1321+
"produces": [
1322+
"application/json"
1323+
],
1324+
"tags": [
1325+
"inference"
1326+
],
1327+
"summary": "Get inference service status",
1328+
"responses": {
1329+
"200": {
1330+
"description": "Inference service status",
1331+
"schema": {
1332+
"$ref": "#/definitions/internal_handlers.InferenceStatusResponse"
1333+
}
1334+
}
1335+
}
1336+
}
1337+
},
1338+
"/v1/inference/summarize": {
1339+
"post": {
1340+
"description": "Generate a short task summary and git branch name using local GGUF model",
1341+
"consumes": [
1342+
"application/json"
1343+
],
1344+
"produces": [
1345+
"application/json"
1346+
],
1347+
"tags": [
1348+
"inference"
1349+
],
1350+
"summary": "Summarize task and generate branch name",
1351+
"parameters": [
1352+
{
1353+
"description": "Summarization request",
1354+
"name": "request",
1355+
"in": "body",
1356+
"required": true,
1357+
"schema": {
1358+
"$ref": "#/definitions/internal_handlers.SummarizeRequest"
1359+
}
1360+
}
1361+
],
1362+
"responses": {
1363+
"200": {
1364+
"description": "Successfully generated summary and branch name",
1365+
"schema": {
1366+
"$ref": "#/definitions/internal_handlers.SummarizeResponse"
1367+
}
1368+
},
1369+
"400": {
1370+
"description": "Invalid request",
1371+
"schema": {
1372+
"$ref": "#/definitions/fiber.Map"
1373+
}
1374+
},
1375+
"500": {
1376+
"description": "Inference error",
1377+
"schema": {
1378+
"$ref": "#/definitions/fiber.Map"
1379+
}
1380+
},
1381+
"503": {
1382+
"description": "Inference not available on this platform",
1383+
"schema": {
1384+
"$ref": "#/definitions/fiber.Map"
1385+
}
1386+
}
1387+
}
1388+
}
1389+
},
13181390
"/v1/notifications": {
13191391
"post": {
13201392
"description": "Sends a notification event to all connected SSE clients, including the TUI app which can display native macOS notifications",
@@ -1846,6 +1918,10 @@ const docTemplate = `{
18461918
}
18471919
},
18481920
"definitions": {
1921+
"fiber.Map": {
1922+
"type": "object",
1923+
"additionalProperties": true
1924+
},
18491925
"github_com_vanpelt_catnip_internal_models.ClaudeActivityState": {
18501926
"type": "string",
18511927
"enum": [
@@ -3148,6 +3224,37 @@ const docTemplate = `{
31483224
}
31493225
}
31503226
},
3227+
"internal_handlers.InferenceStatusResponse": {
3228+
"description": "Status of the local inference service",
3229+
"type": "object",
3230+
"properties": {
3231+
"architecture": {
3232+
"description": "Architecture (amd64, arm64)",
3233+
"type": "string",
3234+
"example": "arm64"
3235+
},
3236+
"available": {
3237+
"description": "Whether inference is available on this platform",
3238+
"type": "boolean",
3239+
"example": true
3240+
},
3241+
"error": {
3242+
"description": "Error message if initialization failed",
3243+
"type": "string",
3244+
"example": "model not found"
3245+
},
3246+
"modelPath": {
3247+
"description": "Model path if loaded",
3248+
"type": "string",
3249+
"example": "/Users/user/.catnip/models/gemma3-270m-summarizer-Q4_K_M.gguf"
3250+
},
3251+
"platform": {
3252+
"description": "Platform name (darwin, linux, windows)",
3253+
"type": "string",
3254+
"example": "darwin"
3255+
}
3256+
}
3257+
},
31513258
"internal_handlers.NotificationPayload": {
31523259
"type": "object",
31533260
"properties": {
@@ -3186,6 +3293,33 @@ const docTemplate = `{
31863293
"$ref": "#/definitions/internal_handlers.ActiveSessionInfo"
31873294
}
31883295
},
3296+
"internal_handlers.SummarizeRequest": {
3297+
"description": "Request to summarize a task and generate a branch name",
3298+
"type": "object",
3299+
"properties": {
3300+
"prompt": {
3301+
"description": "Task description or code changes to summarize",
3302+
"type": "string",
3303+
"example": "Add user authentication with OAuth2"
3304+
}
3305+
}
3306+
},
3307+
"internal_handlers.SummarizeResponse": {
3308+
"description": "Response containing task summary and suggested branch name",
3309+
"type": "object",
3310+
"properties": {
3311+
"branchName": {
3312+
"description": "Git branch name in kebab-case with category prefix",
3313+
"type": "string",
3314+
"example": "feat/add-user-auth"
3315+
},
3316+
"summary": {
3317+
"description": "2-4 word summary in Title Case",
3318+
"type": "string",
3319+
"example": "Add User Auth"
3320+
}
3321+
}
3322+
},
31893323
"internal_handlers.UploadResponse": {
31903324
"description": "Response containing upload status and file location",
31913325
"type": "object",

container/docs/swagger.json

Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1312,6 +1312,78 @@
13121312
}
13131313
}
13141314
},
1315+
"/v1/inference/status": {
1316+
"get": {
1317+
"description": "Check if local inference is available and get service information",
1318+
"produces": [
1319+
"application/json"
1320+
],
1321+
"tags": [
1322+
"inference"
1323+
],
1324+
"summary": "Get inference service status",
1325+
"responses": {
1326+
"200": {
1327+
"description": "Inference service status",
1328+
"schema": {
1329+
"$ref": "#/definitions/internal_handlers.InferenceStatusResponse"
1330+
}
1331+
}
1332+
}
1333+
}
1334+
},
1335+
"/v1/inference/summarize": {
1336+
"post": {
1337+
"description": "Generate a short task summary and git branch name using local GGUF model",
1338+
"consumes": [
1339+
"application/json"
1340+
],
1341+
"produces": [
1342+
"application/json"
1343+
],
1344+
"tags": [
1345+
"inference"
1346+
],
1347+
"summary": "Summarize task and generate branch name",
1348+
"parameters": [
1349+
{
1350+
"description": "Summarization request",
1351+
"name": "request",
1352+
"in": "body",
1353+
"required": true,
1354+
"schema": {
1355+
"$ref": "#/definitions/internal_handlers.SummarizeRequest"
1356+
}
1357+
}
1358+
],
1359+
"responses": {
1360+
"200": {
1361+
"description": "Successfully generated summary and branch name",
1362+
"schema": {
1363+
"$ref": "#/definitions/internal_handlers.SummarizeResponse"
1364+
}
1365+
},
1366+
"400": {
1367+
"description": "Invalid request",
1368+
"schema": {
1369+
"$ref": "#/definitions/fiber.Map"
1370+
}
1371+
},
1372+
"500": {
1373+
"description": "Inference error",
1374+
"schema": {
1375+
"$ref": "#/definitions/fiber.Map"
1376+
}
1377+
},
1378+
"503": {
1379+
"description": "Inference not available on this platform",
1380+
"schema": {
1381+
"$ref": "#/definitions/fiber.Map"
1382+
}
1383+
}
1384+
}
1385+
}
1386+
},
13151387
"/v1/notifications": {
13161388
"post": {
13171389
"description": "Sends a notification event to all connected SSE clients, including the TUI app which can display native macOS notifications",
@@ -1843,6 +1915,10 @@
18431915
}
18441916
},
18451917
"definitions": {
1918+
"fiber.Map": {
1919+
"type": "object",
1920+
"additionalProperties": true
1921+
},
18461922
"github_com_vanpelt_catnip_internal_models.ClaudeActivityState": {
18471923
"type": "string",
18481924
"enum": [
@@ -3145,6 +3221,37 @@
31453221
}
31463222
}
31473223
},
3224+
"internal_handlers.InferenceStatusResponse": {
3225+
"description": "Status of the local inference service",
3226+
"type": "object",
3227+
"properties": {
3228+
"architecture": {
3229+
"description": "Architecture (amd64, arm64)",
3230+
"type": "string",
3231+
"example": "arm64"
3232+
},
3233+
"available": {
3234+
"description": "Whether inference is available on this platform",
3235+
"type": "boolean",
3236+
"example": true
3237+
},
3238+
"error": {
3239+
"description": "Error message if initialization failed",
3240+
"type": "string",
3241+
"example": "model not found"
3242+
},
3243+
"modelPath": {
3244+
"description": "Model path if loaded",
3245+
"type": "string",
3246+
"example": "/Users/user/.catnip/models/gemma3-270m-summarizer-Q4_K_M.gguf"
3247+
},
3248+
"platform": {
3249+
"description": "Platform name (darwin, linux, windows)",
3250+
"type": "string",
3251+
"example": "darwin"
3252+
}
3253+
}
3254+
},
31483255
"internal_handlers.NotificationPayload": {
31493256
"type": "object",
31503257
"properties": {
@@ -3183,6 +3290,33 @@
31833290
"$ref": "#/definitions/internal_handlers.ActiveSessionInfo"
31843291
}
31853292
},
3293+
"internal_handlers.SummarizeRequest": {
3294+
"description": "Request to summarize a task and generate a branch name",
3295+
"type": "object",
3296+
"properties": {
3297+
"prompt": {
3298+
"description": "Task description or code changes to summarize",
3299+
"type": "string",
3300+
"example": "Add user authentication with OAuth2"
3301+
}
3302+
}
3303+
},
3304+
"internal_handlers.SummarizeResponse": {
3305+
"description": "Response containing task summary and suggested branch name",
3306+
"type": "object",
3307+
"properties": {
3308+
"branchName": {
3309+
"description": "Git branch name in kebab-case with category prefix",
3310+
"type": "string",
3311+
"example": "feat/add-user-auth"
3312+
},
3313+
"summary": {
3314+
"description": "2-4 word summary in Title Case",
3315+
"type": "string",
3316+
"example": "Add User Auth"
3317+
}
3318+
}
3319+
},
31863320
"internal_handlers.UploadResponse": {
31873321
"description": "Response containing upload status and file location",
31883322
"type": "object",

0 commit comments

Comments
 (0)