Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ jobs:
if: ${{ matrix.skip_pytests != 'true' }}
env:
ML_BACKEND: ${{ matrix.backend_dir_name }}
TEST_ENV: "true"
run: |
docker compose -f label_studio_ml/examples/${{ matrix.backend_dir_name }}/docker-compose.yml exec -T ${{ matrix.backend_dir_name }} pytest -vvv --cov --cov-report=xml:/tmp/coverage.xml

Expand Down
48 changes: 48 additions & 0 deletions label_studio_ml/examples/deepgram/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# syntax=docker/dockerfile:1
ARG PYTHON_VERSION=3.13

FROM python:${PYTHON_VERSION}-slim AS python-base
ARG TEST_ENV

WORKDIR /app

ENV PYTHONUNBUFFERED=1 \
PYTHONDONTWRITEBYTECODE=1 \
PORT=${PORT:-9090} \
PIP_CACHE_DIR=/.cache \
WORKERS=1 \
THREADS=8

# Update the base OS
RUN --mount=type=cache,target="/var/cache/apt",sharing=locked \
--mount=type=cache,target="/var/lib/apt/lists",sharing=locked \
set -eux; \
apt-get update; \
apt-get upgrade -y; \
apt install --no-install-recommends -y \
git; \
apt-get autoremove -y

# install base requirements
COPY requirements-base.txt .
RUN --mount=type=cache,target=${PIP_CACHE_DIR},sharing=locked \
pip install -r requirements-base.txt

# install custom requirements
COPY requirements.txt .
RUN --mount=type=cache,target=${PIP_CACHE_DIR},sharing=locked \
pip install -r requirements.txt

# install test requirements if needed
COPY requirements-test.txt .
# build only when TEST_ENV="true"
RUN --mount=type=cache,target=${PIP_CACHE_DIR},sharing=locked \
if [ "$TEST_ENV" = "true" ]; then \
pip install -r requirements-test.txt; \
fi

COPY . .

EXPOSE 9090
CMD gunicorn --preload --bind :$PORT --workers $WORKERS --threads $THREADS --timeout 0 _wsgi:app

35 changes: 35 additions & 0 deletions label_studio_ml/examples/deepgram/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@

# Using Deepgram with Label Studio for Text to Speech annotation

This backend uses the Deepgram API to take the input text from the user, do text to speech, and return the output audio for annotation in Label Studio.

https://github.com/user-attachments/assets/9569a955-0baf-4a95-9e8a-d08250a0a298


IMPORTANT NOTE: YOU MUST REFRESH THE PAGE AFTER SUBMITTING THE TEXT TO SEE THE AUDIO APPEAR.

## Prerequistes
1. [Deepgram API Key](https://deepgram.com/) -- create an account and follow the instructions to get an api key with default permissions. Store this key as `DEEPGRAM_API_KEY` in `docker_compose.yml`
2. AWS Storage -- make sure you configure the following parameters in `docker_compose.yml`:
- `AWS_ACCESS_KEY_ID` -- your AWS access key id
- `AWS_SECRET_ACCESS_KEY` -- your AWS secret access key
- `AWS_SESSION_TOKEN` -- your AWS session token
- `AWS_DEFAULT_REGION` - the region you want to use for S3
- `S3_BUCKET` -- the name of the bucket where you'd like to store the created audio files
- `S3_FOLDER` -- the name of the folder within the specified bucket where you'd like to store the audio files.
3. Label Studio -- make sure you set your `LABEL_STUDIO_URL` and your `LABEL_STUDIO_API_KEY` in `docker_compose.yml`. As of 11/12/25, you must use the LEGACY TOKEN.

## Labeling Config
This is the base labeling config to be used with this backend. Note that you may add additional annotations to the document after the audio without breaking anything!
```
<View>
<Header value="What would you like to TTS?"/>
<TextArea name="text" toName="audio" placeholder="What do you want to tts?" value="$text" valrows="4" maxSubmissions="1"/>
<Audio name="audio" value="$audio" zoom="true" hotkey="ctrl+enter"/>
</View>
```
## A Data Note
Note that in order for this to work, you need to upload dummy data (i.e. empty text and audio) so that the tasks populate. You can use `dummy_data.json` as this data.

## Configuring the backend
When you attach the model to Label Studio in your model settings, make sure to toggle ON interactive preannotations!
122 changes: 122 additions & 0 deletions label_studio_ml/examples/deepgram/_wsgi.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
import os
import argparse
import json
import logging
import logging.config

logging.config.dictConfig({
"version": 1,
"disable_existing_loggers": False,
"formatters": {
"standard": {
"format": "[%(asctime)s] [%(levelname)s] [%(name)s::%(funcName)s::%(lineno)d] %(message)s"
}
},
"handlers": {
"console": {
"class": "logging.StreamHandler",
"level": os.getenv('LOG_LEVEL'),
"stream": "ext://sys.stdout",
"formatter": "standard"
}
},
"root": {
"level": os.getenv('LOG_LEVEL'),
"handlers": [
"console"
],
"propagate": True
}
})

from label_studio_ml.api import init_app
from model import DeepgramModel


_DEFAULT_CONFIG_PATH = os.path.join(os.path.dirname(__file__), 'config.json')


def get_kwargs_from_config(config_path=_DEFAULT_CONFIG_PATH):
if not os.path.exists(config_path):
return dict()
with open(config_path) as f:
config = json.load(f)
assert isinstance(config, dict)
return config


if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Label studio')
parser.add_argument(
'-p', '--port', dest='port', type=int, default=9090,
help='Server port')
parser.add_argument(
'--host', dest='host', type=str, default='0.0.0.0',
help='Server host')
parser.add_argument(
'--kwargs', '--with', dest='kwargs', metavar='KEY=VAL', nargs='+', type=lambda kv: kv.split('='),
help='Additional LabelStudioMLBase model initialization kwargs')
parser.add_argument(
'-d', '--debug', dest='debug', action='store_true',
help='Switch debug mode')
parser.add_argument(
'--log-level', dest='log_level', choices=['DEBUG', 'INFO', 'WARNING', 'ERROR'], default=None,
help='Logging level')
parser.add_argument(
'--model-dir', dest='model_dir', default=os.path.dirname(__file__),
help='Directory where models are stored (relative to the project directory)')
parser.add_argument(
'--check', dest='check', action='store_true',
help='Validate model instance before launching server')
parser.add_argument('--basic-auth-user',
default=os.environ.get('ML_SERVER_BASIC_AUTH_USER', None),
help='Basic auth user')

parser.add_argument('--basic-auth-pass',
default=os.environ.get('ML_SERVER_BASIC_AUTH_PASS', None),
help='Basic auth pass')

args = parser.parse_args()

# setup logging level
if args.log_level:
logging.root.setLevel(args.log_level)

def isfloat(value):
try:
float(value)
return True
except ValueError:
return False

def parse_kwargs():
param = dict()
for k, v in args.kwargs:
if v.isdigit():
param[k] = int(v)
elif v == 'True' or v == 'true':
param[k] = True
elif v == 'False' or v == 'false':
param[k] = False
elif isfloat(v):
param[k] = float(v)
else:
param[k] = v
return param

kwargs = get_kwargs_from_config()

if args.kwargs:
kwargs.update(parse_kwargs())

if args.check:
print('Check "' + DeepgramModel.__name__ + '" instance creation..')
model = DeepgramModel(**kwargs)

app = init_app(model_class=DeepgramModel, basic_auth_user=args.basic_auth_user, basic_auth_pass=args.basic_auth_pass)

app.run(host=args.host, port=args.port, debug=args.debug)

else:
# for uWSGI use
app = init_app(model_class=DeepgramModel)
53 changes: 53 additions & 0 deletions label_studio_ml/examples/deepgram/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
services:
deepgram:
container_name: ml-backend
image: humansignal/ml-backend:v0
build:
context: .
args:
TEST_ENV: ${TEST_ENV}

# deploy:
# resources:
# reservations:
# devices:
# - driver: nvidia
# count: 1
# capabilities: [ gpu ]


environment:
# specify these parameters if you want to use basic auth for the model server
- BASIC_AUTH_USER=
- BASIC_AUTH_PASS=
# set the log level for the model server
- LOG_LEVEL=DEBUG
# any other parameters that you want to pass to the model server
- ANY=PARAMETER
# specify the number of workers and threads for the model server
- WORKERS=1
- THREADS=8
# specify the model directory (likely you don't need to change this)
- MODEL_DIR=/data/models
# specify device
- DEEPGRAM_API_KEY=

# For AWS upload
- AWS_ACCESS_KEY_ID=
- AWS_SECRET_ACCESS_KEY=
- AWS_SESSION_TOKEN=
- AWS_DEFAULT_REGION=us-east-1
- S3_BUCKET=
- S3_FOLDER=

# Specify the Label Studio URL and API key to access
# uploaded, local storage and cloud storage files.
# Do not use 'localhost' as it does not work within Docker containers.
# Use prefix 'http://' or 'https://' for the URL always.
# Determine the actual IP using 'ifconfig' (Linux/Mac) or 'ipconfig' (Windows).
- LABEL_STUDIO_URL=
- LABEL_STUDIO_API_KEY=
ports:
- "9090:9090"
volumes:
- "./data/server:/data"
3 changes: 3 additions & 0 deletions label_studio_ml/examples/deepgram/dummy_data.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"data": {"audio": "", "text": ""}
}
Loading
Loading