Skip to content

Commit c578df0

Browse files
Claudeclaude
andcommitted
Add async inference loading with CATNIP_INFERENCE env flag
- Implement non-blocking background initialization for inference service - Add state management (initializing/ready/failed/disabled) with progress tracking - Return 503 with status info while model downloads in background - Add retry logic with exponential backoff (3 attempts) - Use golang.org/x/sys/unix for cross-platform stderr suppression - Clean up .gitignore (remove models/) and .goreleaser.yml (remove bundled libs) The inference service now starts immediately and downloads libraries/model in the background. Enable with CATNIP_INFERENCE=1 environment variable. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
1 parent ec38067 commit c578df0

File tree

7 files changed

+293
-188
lines changed

7 files changed

+293
-188
lines changed

.gitignore

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -64,9 +64,3 @@ container/internal/assets/dist/*
6464
# Xcode user-specific files
6565
**/xcuserdata/
6666
xcode/build/
67-
68-
# Inference: llama.cpp libraries and GGUF models
69-
# Libraries are downloaded at build time, not committed
70-
container/models/lib/
71-
models/*.gguf
72-
*.gguf

container/.goreleaser.yml

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -111,17 +111,6 @@ archives:
111111
# Copy the entire signed app bundle - keep the Catnip.app directory name
112112
- src: "dist/catnip-macos_darwin_amd64_v1/Catnip.app"
113113
dst: "Catnip.app"
114-
# Include llama.cpp libraries for local inference
115-
- src: "models/lib/darwin/amd64/build/bin/libllama.dylib"
116-
dst: "lib/libllama.dylib"
117-
- src: "models/lib/darwin/amd64/build/bin/libggml.dylib"
118-
dst: "lib/libggml.dylib"
119-
- src: "models/lib/darwin/amd64/build/bin/libggml-metal.dylib"
120-
dst: "lib/libggml-metal.dylib"
121-
- src: "models/lib/darwin/amd64/build/bin/libggml-base.dylib"
122-
dst: "lib/libggml-base.dylib"
123-
- src: "models/lib/darwin/amd64/build/bin/libggml-cpu.dylib"
124-
dst: "lib/libggml-cpu.dylib"
125114
# Documentation files
126115
- README.md
127116
- LICENSE
@@ -139,17 +128,6 @@ archives:
139128
# Copy the entire signed app bundle - keep the Catnip.app directory name
140129
- src: "dist/catnip-macos_darwin_arm64_v8.0/Catnip.app"
141130
dst: "Catnip.app"
142-
# Include llama.cpp libraries for local inference
143-
- src: "models/lib/darwin/arm64/build/bin/libllama.dylib"
144-
dst: "lib/libllama.dylib"
145-
- src: "models/lib/darwin/arm64/build/bin/libggml.dylib"
146-
dst: "lib/libggml.dylib"
147-
- src: "models/lib/darwin/arm64/build/bin/libggml-metal.dylib"
148-
dst: "lib/libggml-metal.dylib"
149-
- src: "models/lib/darwin/arm64/build/bin/libggml-base.dylib"
150-
dst: "lib/libggml-base.dylib"
151-
- src: "models/lib/darwin/arm64/build/bin/libggml-cpu.dylib"
152-
dst: "lib/libggml-cpu.dylib"
153131
# Documentation files
154132
- README.md
155133
- LICENSE

container/internal/cmd/serve.go

Lines changed: 23 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -154,20 +154,21 @@ func startServer(cmd *cobra.Command) {
154154
claudeService := services.NewClaudeService()
155155
sessionService := services.NewSessionService()
156156

157-
// Initialize inference service (cross-platform support via yzma FFI)
157+
// Initialize inference service if enabled via CATNIP_INFERENCE=1
158158
var inferenceService *services.InferenceService
159-
inferenceConfig := services.InferenceConfig{
160-
ModelURL: "https://huggingface.co/vanpelt/catnip-summarizer/resolve/main/gemma3-270m-summarizer-Q4_K_M.gguf",
161-
Checksum: "", // Optional checksum for verification
162-
}
163-
var err error
164-
inferenceService, err = services.NewInferenceService(inferenceConfig)
165-
if err != nil {
166-
logger.Warnf("⚠️ Failed to initialize inference service: %v", err)
167-
logger.Warnf(" Run 'catnip download' to pre-download dependencies")
168-
inferenceService = nil
159+
if os.Getenv("CATNIP_INFERENCE") == "1" {
160+
inferenceConfig := services.InferenceConfig{
161+
ModelURL: "https://huggingface.co/vanpelt/catnip-summarizer/resolve/main/gemma3-270m-summarizer-Q4_K_M.gguf",
162+
Checksum: "", // Optional checksum for verification
163+
}
164+
inferenceService = services.NewInferenceService(inferenceConfig)
165+
166+
// Start background initialization (non-blocking)
167+
go inferenceService.InitializeAsync()
168+
169+
logger.Infof("🧠 Inference service enabled, downloading in background... (%s/%s)", goruntime.GOOS, goruntime.GOARCH)
169170
} else {
170-
logger.Infof("✅ Inference service initialized (%s/%s)", goruntime.GOOS, goruntime.GOARCH)
171+
logger.Debugf("🧠 Inference service disabled (set CATNIP_INFERENCE=1 to enable)")
171172
}
172173

173174
// Wire up SessionService to ClaudeService for best session file selection
@@ -224,7 +225,11 @@ func startServer(cmd *cobra.Command) {
224225
defer eventsHandler.Stop()
225226
portsHandler := handlers.NewPortsHandler(portMonitor).WithEvents(eventsHandler)
226227
proxyHandler := handlers.NewProxyHandler(portMonitor)
227-
inferenceHandler := handlers.NewInferenceHandler(inferenceService)
228+
// Only create inference handler if service is enabled
229+
var inferenceHandler *handlers.InferenceHandler
230+
if inferenceService != nil {
231+
inferenceHandler = handlers.NewInferenceHandler(inferenceService)
232+
}
228233

229234
// Connect events handler to GitService for worktree status events
230235
gitService.SetEventsHandler(eventsHandler)
@@ -309,9 +314,11 @@ func startServer(cmd *cobra.Command) {
309314
v1.Post("/ports/mappings", portsHandler.SetPortMapping)
310315
v1.Delete("/ports/mappings/:port", portsHandler.DeletePortMapping)
311316

312-
// Inference routes (cross-platform local inference)
313-
v1.Post("/inference/summarize", inferenceHandler.HandleSummarize)
314-
v1.Get("/inference/status", inferenceHandler.HandleInferenceStatus)
317+
// Inference routes (only if enabled via CATNIP_INFERENCE=1)
318+
if inferenceHandler != nil {
319+
v1.Post("/inference/summarize", inferenceHandler.HandleSummarize)
320+
v1.Get("/inference/status", inferenceHandler.HandleInferenceStatus)
321+
}
315322

316323
// Server info route
317324
v1.Get("/info", func(c *fiber.Ctx) error {

container/internal/cmd/summarize.go

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -61,9 +61,15 @@ func runSummarize(cmd *cobra.Command, args []string) error {
6161
Checksum: "",
6262
}
6363

64-
inferenceService, err := services.NewInferenceService(inferenceConfig)
65-
if err != nil {
66-
return fmt.Errorf("failed to initialize inference service: %w\n\nTry running: catnip download", err)
64+
inferenceService := services.NewInferenceService(inferenceConfig)
65+
66+
// Run initialization synchronously for CLI usage
67+
inferenceService.InitializeAsync()
68+
69+
// Check if initialization succeeded
70+
if !inferenceService.IsReady() {
71+
state, message, _ := inferenceService.GetStatus()
72+
return fmt.Errorf("failed to initialize inference service: %s (%s)\n\nTry running: catnip download", message, state)
6773
}
6874

6975
// Run inference

container/internal/handlers/inference.go

Lines changed: 36 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -40,16 +40,18 @@ type SummarizeResponse struct {
4040
// InferenceStatusResponse represents the inference service status
4141
// @Description Status of the local inference service
4242
type InferenceStatusResponse struct {
43-
// Whether inference is available on this platform
43+
// Whether inference is ready for requests
4444
Available bool `json:"available" example:"true"`
45+
// Current status: initializing, ready, failed
46+
Status string `json:"status" example:"ready"`
47+
// Human-readable status message
48+
Message string `json:"message,omitempty" example:"Inference service ready"`
49+
// Download progress (when initializing)
50+
Progress *services.DownloadProgress `json:"progress,omitempty"`
4551
// Platform name (darwin, linux, windows)
4652
Platform string `json:"platform" example:"darwin"`
4753
// Architecture (amd64, arm64)
4854
Architecture string `json:"architecture" example:"arm64"`
49-
// Model path if loaded
50-
ModelPath string `json:"modelPath,omitempty" example:"/Users/user/.catnip/models/gemma3-270m-summarizer-Q4_K_M.gguf"`
51-
// Error message if initialization failed
52-
Error string `json:"error,omitempty" example:"model not found"`
5355
}
5456

5557
// HandleSummarize godoc
@@ -65,10 +67,20 @@ type InferenceStatusResponse struct {
6567
// @Failure 503 {object} fiber.Map "Inference not available on this platform"
6668
// @Router /v1/inference/summarize [post]
6769
func (h *InferenceHandler) HandleSummarize(c *fiber.Ctx) error {
68-
// Check if service is available
70+
// Check if service is available and ready
6971
if h.service == nil {
7072
return c.Status(fiber.StatusServiceUnavailable).JSON(fiber.Map{
71-
"error": "Inference service not available on this platform",
73+
"error": "Inference service not configured",
74+
})
75+
}
76+
77+
// Check if service is ready
78+
if !h.service.IsReady() {
79+
state, message, progress := h.service.GetStatus()
80+
return c.Status(fiber.StatusServiceUnavailable).JSON(fiber.Map{
81+
"error": fmt.Sprintf("Inference service not ready: %s", message),
82+
"status": string(state),
83+
"progress": progress,
7284
})
7385
}
7486

@@ -114,18 +126,27 @@ func (h *InferenceHandler) HandleSummarize(c *fiber.Ctx) error {
114126
// @Success 200 {object} InferenceStatusResponse "Inference service status"
115127
// @Router /v1/inference/status [get]
116128
func (h *InferenceHandler) HandleInferenceStatus(c *fiber.Ctx) error {
117-
status := InferenceStatusResponse{
118-
Available: h.service != nil,
129+
resp := InferenceStatusResponse{
119130
Platform: runtime.GOOS,
120131
Architecture: runtime.GOARCH,
121132
}
122133

123-
if h.service != nil {
124-
// Try to get model path (implementation would need to expose this)
125-
status.ModelPath = "~/.catnip/models/gemma3-270m-summarizer-Q4_K_M.gguf"
126-
} else {
127-
status.Error = "Inference only available on macOS currently"
134+
if h.service == nil {
135+
resp.Available = false
136+
resp.Status = "disabled"
137+
resp.Message = "Inference service not configured"
138+
return c.JSON(resp)
139+
}
140+
141+
state, message, progress := h.service.GetStatus()
142+
resp.Available = h.service.IsReady()
143+
resp.Status = string(state)
144+
resp.Message = message
145+
146+
// Include progress if still initializing
147+
if state == services.InferenceStateInitializing {
148+
resp.Progress = &progress
128149
}
129150

130-
return c.JSON(status)
151+
return c.JSON(resp)
131152
}

0 commit comments

Comments
 (0)