Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
108 changes: 108 additions & 0 deletions backend/index.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1002,6 +1002,31 @@
nvidia-l4t-cuda-12: "nvidia-l4t-faster-qwen3-tts"
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-faster-qwen3-tts"
icon: https://cdn-avatars.huggingface.co/v1/production/uploads/620760a26e3b7210c2ff1943/-s1gyJfvbE1RgO5iBeNOi.png
- &funasr
urls:
- https://github.com/modelscope/FunASR
description: |
FunASR is an industrial-grade speech recognition toolkit supporting 50+ languages.
Includes SenseVoice (170x realtime, emotion detection), Paraformer (highest Chinese
accuracy), and built-in VAD, punctuation restoration, and speaker diarization.
tags:
- speech-recognition
- ASR
- multilingual
license: mit
name: "funasr"
alias: "funasr"
capabilities:
nvidia: "cuda12-funasr"
amd: "rocm-funasr"
metal: "metal-funasr"
default: "cpu-funasr"
nvidia-cuda-13: "cuda13-funasr"
nvidia-cuda-12: "cuda12-funasr"
nvidia-l4t: "nvidia-l4t-funasr"
nvidia-l4t-cuda-12: "nvidia-l4t-funasr"
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-funasr"
icon: https://avatars.githubusercontent.com/u/109454077
- &qwen-asr
urls:
- https://github.com/QwenLM/Qwen3-ASR
Expand Down Expand Up @@ -4586,3 +4611,86 @@
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-sherpa-onnx"
mirrors:
- localai/localai-backends:master-gpu-nvidia-cuda-12-sherpa-onnx
## funasr
- !!merge <<: *funasr
name: "funasr-development"
capabilities:
nvidia: "cuda12-funasr-development"
amd: "rocm-funasr-development"
nvidia-l4t: "nvidia-l4t-funasr-development"
metal: "metal-funasr-development"
default: "cpu-funasr-development"
nvidia-cuda-13: "cuda13-funasr-development"
nvidia-cuda-12: "cuda12-funasr-development"
nvidia-l4t-cuda-12: "nvidia-l4t-funasr-development"
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-funasr-development"
- !!merge <<: *funasr
name: "cpu-funasr"
uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-funasr"
mirrors:
- localai/localai-backends:latest-cpu-funasr
- !!merge <<: *funasr
name: "cpu-funasr-development"
uri: "quay.io/go-skynet/local-ai-backends:master-cpu-funasr"
mirrors:
- localai/localai-backends:master-cpu-funasr
- !!merge <<: *funasr
name: "cuda12-funasr"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-funasr"
mirrors:
- localai/localai-backends:latest-gpu-nvidia-cuda-12-funasr
- !!merge <<: *funasr
name: "cuda12-funasr-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-funasr"
mirrors:
- localai/localai-backends:master-gpu-nvidia-cuda-12-funasr
- !!merge <<: *funasr
name: "cuda13-funasr"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-13-funasr"
mirrors:
- localai/localai-backends:latest-gpu-nvidia-cuda-13-funasr
- !!merge <<: *funasr
name: "cuda13-funasr-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-funasr"
mirrors:
- localai/localai-backends:master-gpu-nvidia-cuda-13-funasr
- !!merge <<: *funasr
name: "rocm-funasr"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-funasr"
mirrors:
- localai/localai-backends:latest-gpu-rocm-hipblas-funasr
- !!merge <<: *funasr
name: "rocm-funasr-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-rocm-hipblas-funasr"
mirrors:
- localai/localai-backends:master-gpu-rocm-hipblas-funasr
- !!merge <<: *funasr
name: "nvidia-l4t-funasr"
uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-funasr"
mirrors:
- localai/localai-backends:latest-nvidia-l4t-funasr
- !!merge <<: *funasr
name: "nvidia-l4t-funasr-development"
uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-funasr"
mirrors:
- localai/localai-backends:master-nvidia-l4t-funasr
- !!merge <<: *funasr
name: "cuda13-nvidia-l4t-arm64-funasr"
uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-cuda-13-arm64-funasr"
mirrors:
- localai/localai-backends:latest-nvidia-l4t-cuda-13-arm64-funasr
- !!merge <<: *funasr
name: "cuda13-nvidia-l4t-arm64-funasr-development"
uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-cuda-13-arm64-funasr"
mirrors:
- localai/localai-backends:master-nvidia-l4t-cuda-13-arm64-funasr
- !!merge <<: *funasr
name: "metal-funasr"
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-funasr"
mirrors:
- localai/localai-backends:latest-metal-darwin-arm64-funasr
- !!merge <<: *funasr
name: "metal-funasr-development"
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-funasr"
mirrors:
- localai/localai-backends:master-metal-darwin-arm64-funasr
23 changes: 23 additions & 0 deletions backend/python/funasr/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
.PHONY: funasr
funasr:
bash install.sh

.PHONY: run
run: funasr
@echo "Running funasr..."
bash run.sh
@echo "funasr run."

.PHONY: test
test: funasr
@echo "Testing funasr..."
bash test.sh
@echo "funasr tested."

.PHONY: protogen-clean
protogen-clean:
$(RM) backend_pb2_grpc.py backend_pb2.py

.PHONY: clean
clean: protogen-clean
rm -rf venv __pycache__
133 changes: 133 additions & 0 deletions backend/python/funasr/backend.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
#!/usr/bin/env python3
"""
gRPC backend for LocalAI wrapping FunASR (SenseVoice / Paraformer).
"""
from concurrent import futures
import time
import argparse
import signal
import sys
import os
import backend_pb2
import backend_pb2_grpc
import torch

import grpc
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'common'))
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'common'))
from grpc_auth import get_auth_interceptors


_ONE_DAY_IN_SECONDS = 60 * 60 * 24
MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))


class BackendServicer(backend_pb2_grpc.BackendServicer):
def Health(self, request, context):
return backend_pb2.Reply(message=bytes("OK", 'utf-8'))

def LoadModel(self, request, context):
from funasr import AutoModel

device = "cpu"
if request.CUDA and torch.cuda.is_available():
device = "cuda"
mps_available = hasattr(torch.backends, "mps") and torch.backends.mps.is_available()
if mps_available:
device = "mps"

model_id = request.Model or "iic/SenseVoiceSmall"

try:
print(f"Loading FunASR model: {model_id} on {device}", file=sys.stderr)
self.model = AutoModel(
model=model_id,
vad_model="fsmn-vad",
device=device,
disable_update=True,
)
print("FunASR model loaded successfully", file=sys.stderr)
except Exception as err:
print(f"[ERROR] LoadModel failed: {err}", file=sys.stderr)
import traceback
traceback.print_exc(file=sys.stderr)
return backend_pb2.Result(success=False, message=str(err))

return backend_pb2.Result(message="Model loaded successfully", success=True)

def AudioTranscription(self, request, context):
result_segments = []
text = ""
try:
audio_path = request.dst
if not audio_path or not os.path.exists(audio_path):
print(f"Error: Audio file not found: {audio_path}", file=sys.stderr)
return backend_pb2.TranscriptResult(segments=[], text="")

language = None
if request.language and request.language.strip():
language = request.language.strip()

kwargs = {}
if language:
kwargs["language"] = language

results = self.model.generate(input=audio_path, **kwargs)

if not results:
return backend_pb2.TranscriptResult(segments=[], text="")

for idx, r in enumerate(results):
seg_text = r.get("text", "") if isinstance(r, dict) else str(r)
text += seg_text
result_segments.append(backend_pb2.TranscriptSegment(
id=idx,
start=0,
end=0,
text=seg_text,
))

except Exception as err:
print(f"Error in AudioTranscription: {err}", file=sys.stderr)
import traceback
traceback.print_exc(file=sys.stderr)
return backend_pb2.TranscriptResult(segments=[], text="")

return backend_pb2.TranscriptResult(segments=result_segments, text=text)


def serve(address):
server = grpc.server(
futures.ThreadPoolExecutor(max_workers=MAX_WORKERS),
options=[
('grpc.max_message_length', 50 * 1024 * 1024),
('grpc.max_send_message_length', 50 * 1024 * 1024),
('grpc.max_receive_message_length', 50 * 1024 * 1024),
],
interceptors=get_auth_interceptors(),
)
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
server.add_insecure_port(address)
server.start()
print("Server started. Listening on: " + address, file=sys.stderr)

def signal_handler(sig, frame):
print("Received termination signal. Shutting down...")
server.stop(0)
sys.exit(0)

signal.signal(signal.SIGINT, signal_handler)
signal.signal(signal.SIGTERM, signal_handler)

try:
while True:
time.sleep(_ONE_DAY_IN_SECONDS)
except KeyboardInterrupt:
server.stop(0)


if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Run the gRPC server.")
parser.add_argument("--addr", default="localhost:50051", help="The address to bind the server to.")
args = parser.parse_args()
serve(args.addr)
21 changes: 21 additions & 0 deletions backend/python/funasr/install.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#!/bin/bash
set -e

EXTRA_PIP_INSTALL_FLAGS="--no-build-isolation"

backend_dir=$(dirname $0)
if [ -d $backend_dir/common ]; then
source $backend_dir/common/libbackend.sh
else
source $backend_dir/../common/libbackend.sh
fi

if [ "x${BUILD_PROFILE}" == "xintel" ]; then
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
fi

PYTHON_VERSION="3.12"
PYTHON_PATCH="12"
PY_STANDALONE_TAG="20251120"

installRequirements
3 changes: 3 additions & 0 deletions backend/python/funasr/requirements-cpu.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
--extra-index-url https://download.pytorch.org/whl/cpu
torch
funasr
3 changes: 3 additions & 0 deletions backend/python/funasr/requirements-cublas12.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
--extra-index-url https://download.pytorch.org/whl/cu121
torch
funasr
3 changes: 3 additions & 0 deletions backend/python/funasr/requirements-cublas13.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
--extra-index-url https://download.pytorch.org/whl/cu131
torch
funasr
3 changes: 3 additions & 0 deletions backend/python/funasr/requirements-hipblas.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
--extra-index-url https://download.pytorch.org/whl/rocm6.1
torch
funasr
2 changes: 2 additions & 0 deletions backend/python/funasr/requirements-intel.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
torch
funasr
1 change: 1 addition & 0 deletions backend/python/funasr/requirements-l4t12.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
funasr
1 change: 1 addition & 0 deletions backend/python/funasr/requirements-l4t13.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
funasr
2 changes: 2 additions & 0 deletions backend/python/funasr/requirements-mps.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
torch==2.7.1
funasr
5 changes: 5 additions & 0 deletions backend/python/funasr/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
grpcio==1.71.0
protobuf
certifi
packaging==24.1
setuptools
9 changes: 9 additions & 0 deletions backend/python/funasr/run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#!/bin/bash
backend_dir=$(dirname $0)
if [ -d $backend_dir/common ]; then
source $backend_dir/common/libbackend.sh
else
source $backend_dir/../common/libbackend.sh
fi

startBackend $@
13 changes: 13 additions & 0 deletions backend/python/funasr/test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/bin/bash
set -e

backend_dir=$(dirname $0)
if [ -d $backend_dir/common ]; then
source $backend_dir/common/libbackend.sh
else
source $backend_dir/../common/libbackend.sh
fi

ensureVenv

python -m pytest -x -s test.py "$@"