From af17bf9c571ceb2f0dca2cae869f1021f332538e Mon Sep 17 00:00:00 2001
From: Adira Denis Muhando <dennisadira@gmail.com>
Date: Sun, 31 May 2026 12:21:06 +0300
Subject: [PATCH] fix(config): add face/speaker recognition constants and
 register insightface + speaker-recognition
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

FLAG_FACE_RECOGNITION and FLAG_SPEAKER_RECOGNITION already existed as
ModelConfigUsecase bitmask flags, and GuessUsecases already gate-checks
both backends by name — but BackendCapabilities had no entries for
either, so the UI could not classify them.

Also missing were the Method* constants for the five proto-defined RPCs
these backends implement (FaceVerify, FaceAnalyze, VoiceVerify,
VoiceEmbed, VoiceAnalyze) and the corresponding Usecase* strings
and UsecaseInfoMap entries needed to wire them into the rest of the
capability system.

Changes:
- Add MethodFaceVerify, MethodFaceAnalyze, MethodVoiceVerify,
  MethodVoiceEmbed, MethodVoiceAnalyze GRPCMethod constants
- Add UsecaseFaceRecognition ("face_recognition") and
  UsecaseSpeakerRecognition ("speaker_recognition") Usecase constants
- Add UsecaseInfoMap entries for both new usecases, referencing the
  existing FLAG_FACE_RECOGNITION and FLAG_SPEAKER_RECOGNITION flags
- Register insightface: Embedding + Detect + FaceVerify + FaceAnalyze
- Register speaker-recognition: VoiceVerify + VoiceEmbed + VoiceAnalyze

Follows up on #10107 which left these two out because they needed new
constants first.

Assisted-by: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 core/config/backend_capabilities.go | 38 ++++++++++++++++++++++++++---
 1 file changed, 35 insertions(+), 3 deletions(-)

diff --git a/core/config/backend_capabilities.go b/core/config/backend_capabilities.go
index 4d66dc1079a6..fcd2aec23fa4 100644
--- a/core/config/backend_capabilities.go
+++ b/core/config/backend_capabilities.go
@@ -22,9 +22,11 @@ const (
 	UsecaseRerank          = "rerank"
 	UsecaseDetection       = "detection"
 	UsecaseVAD             = "vad"
-	UsecaseAudioTransform  = "audio_transform"
-	UsecaseDiarization     = "diarization"
-	UsecaseRealtimeAudio   = "realtime_audio"
+	UsecaseAudioTransform      = "audio_transform"
+	UsecaseDiarization         = "diarization"
+	UsecaseRealtimeAudio       = "realtime_audio"
+	UsecaseFaceRecognition     = "face_recognition"
+	UsecaseSpeakerRecognition  = "speaker_recognition"
 )
 
 // GRPCMethod identifies a Backend service RPC from backend.proto.
@@ -47,6 +49,11 @@ const (
 	MethodAudioTransform     GRPCMethod = "AudioTransform"
 	MethodDiarize            GRPCMethod = "Diarize"
 	MethodAudioToAudioStream GRPCMethod = "AudioToAudioStream"
+	MethodFaceVerify         GRPCMethod = "FaceVerify"
+	MethodFaceAnalyze        GRPCMethod = "FaceAnalyze"
+	MethodVoiceVerify        GRPCMethod = "VoiceVerify"
+	MethodVoiceEmbed         GRPCMethod = "VoiceEmbed"
+	MethodVoiceAnalyze       GRPCMethod = "VoiceAnalyze"
 )
 
 // UsecaseInfo describes a single known_usecase value and how it maps
@@ -154,6 +161,16 @@ var UsecaseInfoMap = map[string]UsecaseInfo{
 		GRPCMethod:  MethodAudioToAudioStream,
 		Description: "Self-contained any-to-any audio model for the Realtime API — accepts microphone audio and emits speech + transcript (+ optional function calls) from a single backend via the AudioToAudioStream RPC.",
 	},
+	UsecaseFaceRecognition: {
+		Flag:        FLAG_FACE_RECOGNITION,
+		GRPCMethod:  MethodFaceVerify,
+		Description: "Face recognition — verify identity, analyze attributes (age/gender/emotion) via FaceVerify and FaceAnalyze RPCs.",
+	},
+	UsecaseSpeakerRecognition: {
+		Flag:        FLAG_SPEAKER_RECOGNITION,
+		GRPCMethod:  MethodVoiceVerify,
+		Description: "Speaker recognition — verify identity, embed and analyze voice via VoiceVerify, VoiceEmbed and VoiceAnalyze RPCs.",
+	},
 }
 
 // BackendCapability describes which gRPC methods and usecases a backend supports.
@@ -434,6 +451,21 @@ var BackendCapabilities = map[string]BackendCapability{
 		DefaultUsecases:  []string{UsecaseDetection},
 		Description:      "RF-DETR object detection",
 	},
+
+	// --- Face and speaker recognition backends ---
+	"insightface": {
+		GRPCMethods:      []GRPCMethod{MethodEmbedding, MethodDetect, MethodFaceVerify, MethodFaceAnalyze},
+		PossibleUsecases: []string{UsecaseEmbeddings, UsecaseDetection, UsecaseFaceRecognition},
+		DefaultUsecases:  []string{UsecaseFaceRecognition},
+		AcceptsImages:    true,
+		Description:      "InsightFace — face detection, embedding, verification and attribute analysis",
+	},
+	"speaker-recognition": {
+		GRPCMethods:      []GRPCMethod{MethodVoiceVerify, MethodVoiceEmbed, MethodVoiceAnalyze},
+		PossibleUsecases: []string{UsecaseSpeakerRecognition},
+		DefaultUsecases:  []string{UsecaseSpeakerRecognition},
+		Description:      "Speaker recognition — voice identity verification and analysis",
+	},
 	"silero-vad": {
 		GRPCMethods:      []GRPCMethod{MethodVAD},
 		PossibleUsecases: []string{UsecaseVAD},