Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions fastapi_startkit/src/fastapi_startkit/ai/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,28 @@

Provides a LangGraph-powered declarative API for building AI agents backed
by Anthropic, OpenAI, or Google provider SDKs.

Also exposes a Laravel-style fluent API for image generation and text-to-speech::

from fastapi_startkit.ai import Image, Audio, Document

image = await Image.of("A donut on a counter").generate()

# With a photo attachment
doc = await Document.from_url("https://example.com/photo.jpg")
image = await Image.of("Make impressionist").attachments([doc]).generate()

audio = await Audio.of("Hello world").female().generate()
"""

from .agent import Agent
from .audio import Audio, AudioResponse
from .audio_providers import AudioSynthesisProvider, ElevenLabsAudioProvider, OpenAIAudioProvider
from .config import AIConfig, AnthropicConfig, GoogleConfig, OpenAIConfig
from .decorators import max_steps, max_tokens, memory, model, provider, timeout, top_p
from .document import Document
from .image import Image, ImageResponse
from .image_providers import ImageGenerationProvider, OpenAIImageProvider, StabilityImageProvider
from .providers.ai_provider import AIProvider
from .response import AgentResponse, AgentSnapshot

Expand All @@ -18,9 +34,19 @@
"AIConfig",
"AIProvider",
"AnthropicConfig",
"Audio",
"AudioResponse",
"AudioSynthesisProvider",
"Document",
"ElevenLabsAudioProvider",
"GoogleConfig",
"Image",
"ImageGenerationProvider",
"ImageResponse",
"OpenAIAudioProvider",
"OpenAIConfig",
"OpenAIImageProvider",
"StabilityImageProvider",
"max_steps",
"max_tokens",
"memory",
Expand Down
195 changes: 195 additions & 0 deletions fastapi_startkit/src/fastapi_startkit/ai/audio.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
"""Audio generation API — text-to-speech via a pluggable provider."""

from __future__ import annotations

import asyncio
import uuid
from typing import TYPE_CHECKING, Optional

if TYPE_CHECKING:
from .audio_providers import AudioSynthesisProvider

try:
from fastapi_startkit.storage.storage import Storage
except Exception: # pragma: no cover
Storage = None # type: ignore[assignment,misc]


class AudioResponse:
"""Returned by :meth:`Audio.generate`.

Holds raw MP3 (or other format) bytes and provides async helpers to
persist the audio to any configured storage disk::

audio = await Audio.of("Hello world").generate()

path = await audio.store() # auto-named, private disk
path = await audio.storeAs("greeting.mp3") # named, private disk
path = await audio.storePublicly() # auto-named, public disk
path = await audio.storePubliclyAs("greeting.mp3")
"""

def __init__(self, data: bytes, fmt: str = "mp3"):
self._data = data
self._fmt = fmt

@property
def data(self) -> bytes:
"""Raw audio bytes."""
return self._data

def _auto_filename(self) -> str:
return f"{uuid.uuid4()}.{self._fmt}"

# ── Storage helpers ────────────────────────────────────────────────────────

async def store(self) -> str:
"""Save to the default private disk with an auto-generated filename."""
return await self._save(self._auto_filename(), disk="local")

async def storeAs(self, name: str) -> str:
"""Save to the default private disk with a custom filename."""
return await self._save(name, disk="local")

async def storePublicly(self) -> str:
"""Save to the public disk with an auto-generated filename."""
return await self._save(self._auto_filename(), disk="public")

async def storePubliclyAs(self, name: str) -> str:
"""Save to the public disk with a custom filename."""
return await self._save(name, disk="public")

# ── Internal ───────────────────────────────────────────────────────────────

async def _save(self, name: str, disk: str = "local") -> str:
return await asyncio.to_thread(self._save_sync, name, disk)

def _save_sync(self, name: str, disk: str) -> str:
"""Try the Storage facade first; fall back to a temp file."""
if Storage is not None:
try:
Storage.disk(disk).put(name, self._data)
return name
except Exception:
pass
import os
import tempfile

path = os.path.join(tempfile.gettempdir(), name)
with open(path, "wb") as f:
f.write(self._data)
return path


class Audio:
"""Fluent builder for text-to-speech generation.

The active backend is selected from :attr:`~fastapi_startkit.ai.AIConfig.audio_provider`
(env: ``AI_AUDIO_PROVIDER``). Defaults to OpenAI TTS.

Usage::

audio = await Audio.of("Hello world").generate()
audio = await Audio.of("Hello world").female().generate()
audio = await Audio.of("Hello world").male().generate()
audio = await Audio.of("Hello world").voice("nova").generate()

Available OpenAI TTS voices: alloy, echo, fable, onyx, nova, shimmer.
"""

# OpenAI TTS voice presets
_DEFAULT_VOICE = "alloy"
_DEFAULT_FEMALE_VOICE = "nova"
_DEFAULT_MALE_VOICE = "onyx"

def __init__(self, text: str):
self._text = text
self._voice: str = self._DEFAULT_VOICE
self._model: str = "tts-1"
self._speed: float = 1.0
self._response_format: str = "mp3"

@classmethod
def of(cls, text: str) -> "Audio":
"""Create an :class:`Audio` builder with the given input text."""
return cls(text)

# ── Modifier methods (chainable) ───────────────────────────────────────────

def female(self) -> "Audio":
"""Use a female voice (``nova``)."""
self._voice = self._DEFAULT_FEMALE_VOICE
return self

def male(self) -> "Audio":
"""Use a male voice (``onyx``)."""
self._voice = self._DEFAULT_MALE_VOICE
return self

def voice(self, name: str) -> "Audio":
"""Set an explicit TTS voice name.

OpenAI voices: ``alloy``, ``echo``, ``fable``, ``onyx``, ``nova``,
``shimmer``.
"""
self._voice = name
return self

def model(self, name: str) -> "Audio":
"""Override the TTS model (default: ``tts-1``).

Use ``tts-1-hd`` for higher quality at the cost of latency.
"""
self._model = name
return self

def speed(self, value: float) -> "Audio":
"""Set speech speed (0.25 – 4.0, default: 1.0)."""
self._speed = value
return self

def format(self, fmt: str) -> "Audio":
"""Set output format: ``mp3``, ``opus``, ``aac``, or ``flac``."""
self._response_format = fmt
return self

# ── Generation ─────────────────────────────────────────────────────────────

async def generate(self) -> AudioResponse:
"""Call the configured TTS provider and return an :class:`AudioResponse`."""
provider = self._resolve_provider()
data = await provider.synthesize(
text=self._text,
voice=self._voice,
model=self._model,
speed=self._speed,
fmt=self._response_format,
)
return AudioResponse(data=data, fmt=self._response_format)

# ── Internal ───────────────────────────────────────────────────────────────

def _resolve_provider(self) -> "AudioSynthesisProvider":
from .audio_providers import ElevenLabsAudioProvider, OpenAIAudioProvider # noqa: PLC0415

provider_name = "openai"
api_key: Optional[str] = None
base_url: Optional[str] = None

try:
from fastapi_startkit.facades.Config import Config # noqa: PLC0415

ai_config = Config.get("ai")
provider_name = ai_config.audio_provider
openai_cfg = ai_config.providers.get("openai")
if openai_cfg:
api_key = openai_cfg.key or None
base_url = openai_cfg.url or None
except Exception:
pass

if provider_name == "openai":
return OpenAIAudioProvider(api_key=api_key, base_url=base_url)
if provider_name == "elevenlabs":
return ElevenLabsAudioProvider()
raise ValueError(f"Unknown audio provider: {provider_name!r}. Use 'openai' or 'elevenlabs'.")
77 changes: 77 additions & 0 deletions fastapi_startkit/src/fastapi_startkit/ai/audio_providers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
"""Audio synthesis provider abstractions.

Providers implement the :class:`AudioSynthesisProvider` ABC so that the
:class:`~fastapi_startkit.ai.Audio` builder is not hard-wired to a single
vendor. Select the active provider via ``AI_AUDIO_PROVIDER`` in your
``.env`` (or ``AIConfig.audio_provider``).

Supported providers
-------------------
* ``openai`` — OpenAI TTS (tts-1 / tts-1-hd) (default)
* ``elevenlabs`` — ElevenLabs (stub, raises :exc:`NotImplementedError`)
"""

from __future__ import annotations

from abc import ABC, abstractmethod


class AudioSynthesisProvider(ABC):
"""Abstract base for text-to-speech backends."""

@abstractmethod
async def synthesize(
self,
text: str,
voice: str,
model: str,
speed: float,
fmt: str,
) -> bytes:
"""Convert *text* to speech and return raw audio bytes."""


class OpenAIAudioProvider(AudioSynthesisProvider):
"""OpenAI TTS provider using :class:`openai.AsyncOpenAI`.

Supported voices: ``alloy``, ``echo``, ``fable``, ``onyx``, ``nova``,
``shimmer``. Supported formats: ``mp3``, ``opus``, ``aac``, ``flac``.
"""

def __init__(self, api_key: str | None = None, base_url: str | None = None):
self._api_key = api_key
self._base_url = base_url

async def synthesize(
self,
text: str,
voice: str,
model: str,
speed: float,
fmt: str,
) -> bytes:
from openai import AsyncOpenAI # noqa: PLC0415

client = AsyncOpenAI(api_key=self._api_key, base_url=self._base_url)
response = await client.audio.speech.create(
model=model,
voice=voice,
input=text,
speed=speed,
response_format=fmt,
)
return response.read()


class ElevenLabsAudioProvider(AudioSynthesisProvider):
"""ElevenLabs provider stub — raises :exc:`NotImplementedError` until implemented."""

async def synthesize(
self,
text: str,
voice: str,
model: str,
speed: float,
fmt: str,
) -> bytes:
raise NotImplementedError("ElevenLabsAudioProvider is not yet implemented")
4 changes: 4 additions & 0 deletions fastapi_startkit/src/fastapi_startkit/ai/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,3 +46,7 @@ class AIConfig:
"google": GoogleConfig(),
}
)

# Media-generation provider selection
image_provider: str = field(default_factory=lambda: env("AI_IMAGE_PROVIDER", "openai"))
audio_provider: str = field(default_factory=lambda: env("AI_AUDIO_PROVIDER", "openai"))
Loading
Loading