Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 35 additions & 3 deletions core/config/backend_capabilities.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,11 @@ const (
UsecaseRerank = "rerank"
UsecaseDetection = "detection"
UsecaseVAD = "vad"
UsecaseAudioTransform = "audio_transform"
UsecaseDiarization = "diarization"
UsecaseRealtimeAudio = "realtime_audio"
UsecaseAudioTransform = "audio_transform"
UsecaseDiarization = "diarization"
UsecaseRealtimeAudio = "realtime_audio"
UsecaseFaceRecognition = "face_recognition"
UsecaseSpeakerRecognition = "speaker_recognition"
)

// GRPCMethod identifies a Backend service RPC from backend.proto.
Expand All @@ -47,6 +49,11 @@ const (
MethodAudioTransform GRPCMethod = "AudioTransform"
MethodDiarize GRPCMethod = "Diarize"
MethodAudioToAudioStream GRPCMethod = "AudioToAudioStream"
MethodFaceVerify GRPCMethod = "FaceVerify"
MethodFaceAnalyze GRPCMethod = "FaceAnalyze"
MethodVoiceVerify GRPCMethod = "VoiceVerify"
MethodVoiceEmbed GRPCMethod = "VoiceEmbed"
MethodVoiceAnalyze GRPCMethod = "VoiceAnalyze"
)

// UsecaseInfo describes a single known_usecase value and how it maps
Expand Down Expand Up @@ -154,6 +161,16 @@ var UsecaseInfoMap = map[string]UsecaseInfo{
GRPCMethod: MethodAudioToAudioStream,
Description: "Self-contained any-to-any audio model for the Realtime API — accepts microphone audio and emits speech + transcript (+ optional function calls) from a single backend via the AudioToAudioStream RPC.",
},
UsecaseFaceRecognition: {
Flag: FLAG_FACE_RECOGNITION,
GRPCMethod: MethodFaceVerify,
Description: "Face recognition — verify identity, analyze attributes (age/gender/emotion) via FaceVerify and FaceAnalyze RPCs.",
},
UsecaseSpeakerRecognition: {
Flag: FLAG_SPEAKER_RECOGNITION,
GRPCMethod: MethodVoiceVerify,
Description: "Speaker recognition — verify identity, embed and analyze voice via VoiceVerify, VoiceEmbed and VoiceAnalyze RPCs.",
},
}

// BackendCapability describes which gRPC methods and usecases a backend supports.
Expand Down Expand Up @@ -434,6 +451,21 @@ var BackendCapabilities = map[string]BackendCapability{
DefaultUsecases: []string{UsecaseDetection},
Description: "RF-DETR object detection",
},

// --- Face and speaker recognition backends ---
"insightface": {
GRPCMethods: []GRPCMethod{MethodEmbedding, MethodDetect, MethodFaceVerify, MethodFaceAnalyze},
PossibleUsecases: []string{UsecaseEmbeddings, UsecaseDetection, UsecaseFaceRecognition},
DefaultUsecases: []string{UsecaseFaceRecognition},
AcceptsImages: true,
Description: "InsightFace — face detection, embedding, verification and attribute analysis",
},
"speaker-recognition": {
GRPCMethods: []GRPCMethod{MethodVoiceVerify, MethodVoiceEmbed, MethodVoiceAnalyze},
PossibleUsecases: []string{UsecaseSpeakerRecognition},
DefaultUsecases: []string{UsecaseSpeakerRecognition},
Description: "Speaker recognition — voice identity verification and analysis",
},
"silero-vad": {
GRPCMethods: []GRPCMethod{MethodVAD},
PossibleUsecases: []string{UsecaseVAD},
Expand Down