From af17bf9c571ceb2f0dca2cae869f1021f332538e Mon Sep 17 00:00:00 2001 From: Adira Denis Muhando Date: Sun, 31 May 2026 12:21:06 +0300 Subject: [PATCH] fix(config): add face/speaker recognition constants and register insightface + speaker-recognition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit FLAG_FACE_RECOGNITION and FLAG_SPEAKER_RECOGNITION already existed as ModelConfigUsecase bitmask flags, and GuessUsecases already gate-checks both backends by name — but BackendCapabilities had no entries for either, so the UI could not classify them. Also missing were the Method* constants for the five proto-defined RPCs these backends implement (FaceVerify, FaceAnalyze, VoiceVerify, VoiceEmbed, VoiceAnalyze) and the corresponding Usecase* strings and UsecaseInfoMap entries needed to wire them into the rest of the capability system. Changes: - Add MethodFaceVerify, MethodFaceAnalyze, MethodVoiceVerify, MethodVoiceEmbed, MethodVoiceAnalyze GRPCMethod constants - Add UsecaseFaceRecognition ("face_recognition") and UsecaseSpeakerRecognition ("speaker_recognition") Usecase constants - Add UsecaseInfoMap entries for both new usecases, referencing the existing FLAG_FACE_RECOGNITION and FLAG_SPEAKER_RECOGNITION flags - Register insightface: Embedding + Detect + FaceVerify + FaceAnalyze - Register speaker-recognition: VoiceVerify + VoiceEmbed + VoiceAnalyze Follows up on #10107 which left these two out because they needed new constants first. Assisted-by: Claude Sonnet 4.6 --- core/config/backend_capabilities.go | 38 ++++++++++++++++++++++++++--- 1 file changed, 35 insertions(+), 3 deletions(-) diff --git a/core/config/backend_capabilities.go b/core/config/backend_capabilities.go index 4d66dc1079a6..fcd2aec23fa4 100644 --- a/core/config/backend_capabilities.go +++ b/core/config/backend_capabilities.go @@ -22,9 +22,11 @@ const ( UsecaseRerank = "rerank" UsecaseDetection = "detection" UsecaseVAD = "vad" - UsecaseAudioTransform = "audio_transform" - UsecaseDiarization = "diarization" - UsecaseRealtimeAudio = "realtime_audio" + UsecaseAudioTransform = "audio_transform" + UsecaseDiarization = "diarization" + UsecaseRealtimeAudio = "realtime_audio" + UsecaseFaceRecognition = "face_recognition" + UsecaseSpeakerRecognition = "speaker_recognition" ) // GRPCMethod identifies a Backend service RPC from backend.proto. @@ -47,6 +49,11 @@ const ( MethodAudioTransform GRPCMethod = "AudioTransform" MethodDiarize GRPCMethod = "Diarize" MethodAudioToAudioStream GRPCMethod = "AudioToAudioStream" + MethodFaceVerify GRPCMethod = "FaceVerify" + MethodFaceAnalyze GRPCMethod = "FaceAnalyze" + MethodVoiceVerify GRPCMethod = "VoiceVerify" + MethodVoiceEmbed GRPCMethod = "VoiceEmbed" + MethodVoiceAnalyze GRPCMethod = "VoiceAnalyze" ) // UsecaseInfo describes a single known_usecase value and how it maps @@ -154,6 +161,16 @@ var UsecaseInfoMap = map[string]UsecaseInfo{ GRPCMethod: MethodAudioToAudioStream, Description: "Self-contained any-to-any audio model for the Realtime API — accepts microphone audio and emits speech + transcript (+ optional function calls) from a single backend via the AudioToAudioStream RPC.", }, + UsecaseFaceRecognition: { + Flag: FLAG_FACE_RECOGNITION, + GRPCMethod: MethodFaceVerify, + Description: "Face recognition — verify identity, analyze attributes (age/gender/emotion) via FaceVerify and FaceAnalyze RPCs.", + }, + UsecaseSpeakerRecognition: { + Flag: FLAG_SPEAKER_RECOGNITION, + GRPCMethod: MethodVoiceVerify, + Description: "Speaker recognition — verify identity, embed and analyze voice via VoiceVerify, VoiceEmbed and VoiceAnalyze RPCs.", + }, } // BackendCapability describes which gRPC methods and usecases a backend supports. @@ -434,6 +451,21 @@ var BackendCapabilities = map[string]BackendCapability{ DefaultUsecases: []string{UsecaseDetection}, Description: "RF-DETR object detection", }, + + // --- Face and speaker recognition backends --- + "insightface": { + GRPCMethods: []GRPCMethod{MethodEmbedding, MethodDetect, MethodFaceVerify, MethodFaceAnalyze}, + PossibleUsecases: []string{UsecaseEmbeddings, UsecaseDetection, UsecaseFaceRecognition}, + DefaultUsecases: []string{UsecaseFaceRecognition}, + AcceptsImages: true, + Description: "InsightFace — face detection, embedding, verification and attribute analysis", + }, + "speaker-recognition": { + GRPCMethods: []GRPCMethod{MethodVoiceVerify, MethodVoiceEmbed, MethodVoiceAnalyze}, + PossibleUsecases: []string{UsecaseSpeakerRecognition}, + DefaultUsecases: []string{UsecaseSpeakerRecognition}, + Description: "Speaker recognition — voice identity verification and analysis", + }, "silero-vad": { GRPCMethods: []GRPCMethod{MethodVAD}, PossibleUsecases: []string{UsecaseVAD},