diff --git a/fastapi_startkit/src/fastapi_startkit/ai/__init__.py b/fastapi_startkit/src/fastapi_startkit/ai/__init__.py index ff4cde47..1c47a483 100644 --- a/fastapi_startkit/src/fastapi_startkit/ai/__init__.py +++ b/fastapi_startkit/src/fastapi_startkit/ai/__init__.py @@ -2,12 +2,28 @@ Provides a LangGraph-powered declarative API for building AI agents backed by Anthropic, OpenAI, or Google provider SDKs. + +Also exposes a Laravel-style fluent API for image generation and text-to-speech:: + + from fastapi_startkit.ai import Image, Audio, Document + + image = await Image.of("A donut on a counter").generate() + + # With a photo attachment + doc = await Document.from_url("https://example.com/photo.jpg") + image = await Image.of("Make impressionist").attachments([doc]).generate() + + audio = await Audio.of("Hello world").female().generate() """ from .agent import Agent +from .audio import Audio, AudioResponse +from .audio_providers import AudioSynthesisProvider, ElevenLabsAudioProvider, OpenAIAudioProvider from .config import AIConfig, AnthropicConfig, GoogleConfig, OpenAIConfig from .decorators import max_steps, max_tokens, memory, model, provider, timeout, top_p from .document import Document +from .image import Image, ImageResponse +from .image_providers import ImageGenerationProvider, OpenAIImageProvider, StabilityImageProvider from .providers.ai_provider import AIProvider from .response import AgentResponse, AgentSnapshot @@ -18,9 +34,19 @@ "AIConfig", "AIProvider", "AnthropicConfig", + "Audio", + "AudioResponse", + "AudioSynthesisProvider", "Document", + "ElevenLabsAudioProvider", "GoogleConfig", + "Image", + "ImageGenerationProvider", + "ImageResponse", + "OpenAIAudioProvider", "OpenAIConfig", + "OpenAIImageProvider", + "StabilityImageProvider", "max_steps", "max_tokens", "memory", diff --git a/fastapi_startkit/src/fastapi_startkit/ai/audio.py b/fastapi_startkit/src/fastapi_startkit/ai/audio.py new file mode 100644 index 00000000..b16e54f1 --- /dev/null +++ b/fastapi_startkit/src/fastapi_startkit/ai/audio.py @@ -0,0 +1,195 @@ +"""Audio generation API — text-to-speech via a pluggable provider.""" + +from __future__ import annotations + +import asyncio +import uuid +from typing import TYPE_CHECKING, Optional + +if TYPE_CHECKING: + from .audio_providers import AudioSynthesisProvider + +try: + from fastapi_startkit.storage.storage import Storage +except Exception: # pragma: no cover + Storage = None # type: ignore[assignment,misc] + + +class AudioResponse: + """Returned by :meth:`Audio.generate`. + + Holds raw MP3 (or other format) bytes and provides async helpers to + persist the audio to any configured storage disk:: + + audio = await Audio.of("Hello world").generate() + + path = await audio.store() # auto-named, private disk + path = await audio.storeAs("greeting.mp3") # named, private disk + path = await audio.storePublicly() # auto-named, public disk + path = await audio.storePubliclyAs("greeting.mp3") + """ + + def __init__(self, data: bytes, fmt: str = "mp3"): + self._data = data + self._fmt = fmt + + @property + def data(self) -> bytes: + """Raw audio bytes.""" + return self._data + + def _auto_filename(self) -> str: + return f"{uuid.uuid4()}.{self._fmt}" + + # ── Storage helpers ──────────────────────────────────────────────────────── + + async def store(self) -> str: + """Save to the default private disk with an auto-generated filename.""" + return await self._save(self._auto_filename(), disk="local") + + async def storeAs(self, name: str) -> str: + """Save to the default private disk with a custom filename.""" + return await self._save(name, disk="local") + + async def storePublicly(self) -> str: + """Save to the public disk with an auto-generated filename.""" + return await self._save(self._auto_filename(), disk="public") + + async def storePubliclyAs(self, name: str) -> str: + """Save to the public disk with a custom filename.""" + return await self._save(name, disk="public") + + # ── Internal ─────────────────────────────────────────────────────────────── + + async def _save(self, name: str, disk: str = "local") -> str: + return await asyncio.to_thread(self._save_sync, name, disk) + + def _save_sync(self, name: str, disk: str) -> str: + """Try the Storage facade first; fall back to a temp file.""" + if Storage is not None: + try: + Storage.disk(disk).put(name, self._data) + return name + except Exception: + pass + import os + import tempfile + + path = os.path.join(tempfile.gettempdir(), name) + with open(path, "wb") as f: + f.write(self._data) + return path + + +class Audio: + """Fluent builder for text-to-speech generation. + + The active backend is selected from :attr:`~fastapi_startkit.ai.AIConfig.audio_provider` + (env: ``AI_AUDIO_PROVIDER``). Defaults to OpenAI TTS. + + Usage:: + + audio = await Audio.of("Hello world").generate() + audio = await Audio.of("Hello world").female().generate() + audio = await Audio.of("Hello world").male().generate() + audio = await Audio.of("Hello world").voice("nova").generate() + + Available OpenAI TTS voices: alloy, echo, fable, onyx, nova, shimmer. + """ + + # OpenAI TTS voice presets + _DEFAULT_VOICE = "alloy" + _DEFAULT_FEMALE_VOICE = "nova" + _DEFAULT_MALE_VOICE = "onyx" + + def __init__(self, text: str): + self._text = text + self._voice: str = self._DEFAULT_VOICE + self._model: str = "tts-1" + self._speed: float = 1.0 + self._response_format: str = "mp3" + + @classmethod + def of(cls, text: str) -> "Audio": + """Create an :class:`Audio` builder with the given input text.""" + return cls(text) + + # ── Modifier methods (chainable) ─────────────────────────────────────────── + + def female(self) -> "Audio": + """Use a female voice (``nova``).""" + self._voice = self._DEFAULT_FEMALE_VOICE + return self + + def male(self) -> "Audio": + """Use a male voice (``onyx``).""" + self._voice = self._DEFAULT_MALE_VOICE + return self + + def voice(self, name: str) -> "Audio": + """Set an explicit TTS voice name. + + OpenAI voices: ``alloy``, ``echo``, ``fable``, ``onyx``, ``nova``, + ``shimmer``. + """ + self._voice = name + return self + + def model(self, name: str) -> "Audio": + """Override the TTS model (default: ``tts-1``). + + Use ``tts-1-hd`` for higher quality at the cost of latency. + """ + self._model = name + return self + + def speed(self, value: float) -> "Audio": + """Set speech speed (0.25 – 4.0, default: 1.0).""" + self._speed = value + return self + + def format(self, fmt: str) -> "Audio": + """Set output format: ``mp3``, ``opus``, ``aac``, or ``flac``.""" + self._response_format = fmt + return self + + # ── Generation ───────────────────────────────────────────────────────────── + + async def generate(self) -> AudioResponse: + """Call the configured TTS provider and return an :class:`AudioResponse`.""" + provider = self._resolve_provider() + data = await provider.synthesize( + text=self._text, + voice=self._voice, + model=self._model, + speed=self._speed, + fmt=self._response_format, + ) + return AudioResponse(data=data, fmt=self._response_format) + + # ── Internal ─────────────────────────────────────────────────────────────── + + def _resolve_provider(self) -> "AudioSynthesisProvider": + from .audio_providers import ElevenLabsAudioProvider, OpenAIAudioProvider # noqa: PLC0415 + + provider_name = "openai" + api_key: Optional[str] = None + base_url: Optional[str] = None + + try: + from fastapi_startkit.facades.Config import Config # noqa: PLC0415 + + ai_config = Config.get("ai") + provider_name = ai_config.audio_provider + openai_cfg = ai_config.providers.get("openai") + if openai_cfg: + api_key = openai_cfg.key or None + base_url = openai_cfg.url or None + except Exception: + pass + + if provider_name == "openai": + return OpenAIAudioProvider(api_key=api_key, base_url=base_url) + if provider_name == "elevenlabs": + return ElevenLabsAudioProvider() + raise ValueError(f"Unknown audio provider: {provider_name!r}. Use 'openai' or 'elevenlabs'.") diff --git a/fastapi_startkit/src/fastapi_startkit/ai/audio_providers.py b/fastapi_startkit/src/fastapi_startkit/ai/audio_providers.py new file mode 100644 index 00000000..cb81c198 --- /dev/null +++ b/fastapi_startkit/src/fastapi_startkit/ai/audio_providers.py @@ -0,0 +1,77 @@ +"""Audio synthesis provider abstractions. + +Providers implement the :class:`AudioSynthesisProvider` ABC so that the +:class:`~fastapi_startkit.ai.Audio` builder is not hard-wired to a single +vendor. Select the active provider via ``AI_AUDIO_PROVIDER`` in your +``.env`` (or ``AIConfig.audio_provider``). + +Supported providers +------------------- +* ``openai`` — OpenAI TTS (tts-1 / tts-1-hd) (default) +* ``elevenlabs`` — ElevenLabs (stub, raises :exc:`NotImplementedError`) +""" + +from __future__ import annotations + +from abc import ABC, abstractmethod + + +class AudioSynthesisProvider(ABC): + """Abstract base for text-to-speech backends.""" + + @abstractmethod + async def synthesize( + self, + text: str, + voice: str, + model: str, + speed: float, + fmt: str, + ) -> bytes: + """Convert *text* to speech and return raw audio bytes.""" + + +class OpenAIAudioProvider(AudioSynthesisProvider): + """OpenAI TTS provider using :class:`openai.AsyncOpenAI`. + + Supported voices: ``alloy``, ``echo``, ``fable``, ``onyx``, ``nova``, + ``shimmer``. Supported formats: ``mp3``, ``opus``, ``aac``, ``flac``. + """ + + def __init__(self, api_key: str | None = None, base_url: str | None = None): + self._api_key = api_key + self._base_url = base_url + + async def synthesize( + self, + text: str, + voice: str, + model: str, + speed: float, + fmt: str, + ) -> bytes: + from openai import AsyncOpenAI # noqa: PLC0415 + + client = AsyncOpenAI(api_key=self._api_key, base_url=self._base_url) + response = await client.audio.speech.create( + model=model, + voice=voice, + input=text, + speed=speed, + response_format=fmt, + ) + return response.read() + + +class ElevenLabsAudioProvider(AudioSynthesisProvider): + """ElevenLabs provider stub — raises :exc:`NotImplementedError` until implemented.""" + + async def synthesize( + self, + text: str, + voice: str, + model: str, + speed: float, + fmt: str, + ) -> bytes: + raise NotImplementedError("ElevenLabsAudioProvider is not yet implemented") diff --git a/fastapi_startkit/src/fastapi_startkit/ai/config.py b/fastapi_startkit/src/fastapi_startkit/ai/config.py index af1a1acf..2a7b1587 100644 --- a/fastapi_startkit/src/fastapi_startkit/ai/config.py +++ b/fastapi_startkit/src/fastapi_startkit/ai/config.py @@ -46,3 +46,7 @@ class AIConfig: "google": GoogleConfig(), } ) + + # Media-generation provider selection + image_provider: str = field(default_factory=lambda: env("AI_IMAGE_PROVIDER", "openai")) + audio_provider: str = field(default_factory=lambda: env("AI_AUDIO_PROVIDER", "openai")) diff --git a/fastapi_startkit/src/fastapi_startkit/ai/document.py b/fastapi_startkit/src/fastapi_startkit/ai/document.py index f6dffee9..862ec686 100644 --- a/fastapi_startkit/src/fastapi_startkit/ai/document.py +++ b/fastapi_startkit/src/fastapi_startkit/ai/document.py @@ -1,27 +1,112 @@ -"""Document helper — attach files or text to agent prompts.""" +"""Document helper — attach files, images, or text to agent prompts.""" from __future__ import annotations +import asyncio + +# Optional runtime dependency — imported at module level so tests can patch it. +try: + from fastapi_startkit.storage.storage import Storage +except Exception: # pragma: no cover + Storage = None # type: ignore[assignment,misc] + class Document: - """Attach documents to agent.prompt() calls.""" + """Attach text or binary content to :meth:`~fastapi_startkit.ai.Agent.prompt` calls. + + Supports both text (for LLM context documents) and binary (for image + attachments sent to :class:`~fastapi_startkit.ai.Image`). + + Text:: + + doc = Document.from_path("report.txt") + agent.prompt("Summarise this", attachments=[doc]) + + Binary image:: - def __init__(self, content: str, name: str = "", media_type: str = "text/plain"): + doc = await Document.from_url("https://example.com/photo.jpg") + image = await Image.of("Make this impressionist").attachments([doc]).generate() + """ + + def __init__(self, content: str | bytes, name: str = "", media_type: str = "text/plain"): self.content = content self.name = name self.media_type = media_type + # ── Sync constructors (text) ─────────────────────────────────────────────── + @classmethod def from_path(cls, path: str) -> "Document": - """Load a document from a local file path.""" - with open(path) as f: - content = f.read() + """Load a document from a local file path. + + Text files are returned with ``str`` content; binary files + (e.g. images) fall back to ``bytes`` automatically. + """ + try: + with open(path) as f: + content: str | bytes = f.read() + except UnicodeDecodeError: + with open(path, "rb") as f: + content = f.read() return cls(content=content, name=path) + # ── Async constructors (binary) ──────────────────────────────────────────── + @classmethod - def from_storage(cls, key: str) -> "Document": - """Load a document from application storage (storage/).""" - return cls.from_path(f"storage/{key}") + async def from_storage(cls, key: str) -> "Document": + """Load a binary file from application storage (``storage/``) asynchronously. + + Falls back to reading directly from the ``storage/`` directory relative + to the current working directory if the Storage facade is not configured. + """ + + def _read() -> bytes: + if Storage is not None: + try: + disk = Storage.disk("local") + # Resolve the full path and read as binary + resolved_path = disk.get_path(key) + with open(resolved_path, "rb") as f: + return f.read() + except Exception: + pass + import os # noqa: PLC0415 + + with open(os.path.join("storage", key), "rb") as f: + return f.read() + + data = await asyncio.to_thread(_read) + return cls(content=data, name=key) + + @classmethod + async def from_url(cls, url: str) -> "Document": + """Download bytes from a URL asynchronously using *httpx*. + + Example:: + + doc = await Document.from_url("https://example.com/photo.jpg") + """ + import httpx # noqa: PLC0415 + + async with httpx.AsyncClient() as client: + response = await client.get(url) + response.raise_for_status() + name = url.rstrip("/").split("/")[-1] + return cls(content=response.content, name=name) + + # ── Binary accessor ──────────────────────────────────────────────────────── + + def to_bytes(self) -> bytes: + """Return the document content as raw bytes. + + If the content was loaded as text (e.g. via :meth:`from_path`), + it is UTF-8 encoded. Binary content is returned as-is. + """ + if isinstance(self.content, bytes): + return self.content + return self.content.encode("utf-8") + + # ── LLM content blocks ───────────────────────────────────────────────────── def to_anthropic_block(self) -> dict: """Return an Anthropic-compatible content block for this document.""" diff --git a/fastapi_startkit/src/fastapi_startkit/ai/files.py b/fastapi_startkit/src/fastapi_startkit/ai/files.py new file mode 100644 index 00000000..1db8fcc0 --- /dev/null +++ b/fastapi_startkit/src/fastapi_startkit/ai/files.py @@ -0,0 +1,99 @@ +"""Files helpers — image attachment factories for use with Image editing requests.""" + +from __future__ import annotations + +import base64 +import os + + +class ImageAttachment: + """Represents an image file to attach to an Image editing request. + + Instances are created via the :class:`Files.Image` factory, not directly:: + + attachment = Files.Image.fromPath("/tmp/photo.jpg") + attachment = Files.Image.fromStorage("photo.jpg") + attachment = Files.Image.fromUrl("https://example.com/photo.jpg") + """ + + def __init__( + self, + data: bytes, + name: str = "", + media_type: str = "image/jpeg", + ): + self._data = data + self._name = name + self._media_type = media_type + + @property + def data(self) -> bytes: + """Raw bytes of the image.""" + return self._data + + @property + def name(self) -> str: + """Filename hint (basename of the source path or URL).""" + return self._name + + @property + def media_type(self) -> str: + """MIME type of the image (e.g. ``image/jpeg``).""" + return self._media_type + + def to_base64(self) -> str: + """Return the image data base64-encoded as a plain string.""" + return base64.b64encode(self._data).decode("utf-8") + + +class Files: + """Namespace for file attachment helpers. + + Usage:: + + from fastapi_startkit.ai import Files, Image + + image = await ( + Image.of("Make this impressionist") + .attachments([ + Files.Image.fromStorage("photo.jpg"), + Files.Image.fromPath("/tmp/photo.jpg"), + Files.Image.fromUrl("https://example.com/photo.jpg"), + ]) + .generate() + ) + """ + + class Image: + """Factory for :class:`ImageAttachment` objects. + + All methods are static — no need to instantiate ``Files.Image``. + """ + + @staticmethod + def fromStorage(key: str) -> ImageAttachment: + """Load an image from application storage (``storage/``).""" + path = os.path.join("storage", key) + with open(path, "rb") as f: + data = f.read() + return ImageAttachment(data=data, name=key) + + @staticmethod + def fromPath(path: str) -> ImageAttachment: + """Load an image from a local filesystem path.""" + with open(path, "rb") as f: + data = f.read() + return ImageAttachment(data=data, name=os.path.basename(path)) + + @staticmethod + def fromUrl(url: str) -> ImageAttachment: + """Download an image from a URL and return an :class:`ImageAttachment`. + + Uses :mod:`urllib.request` — no extra dependencies required. + """ + import urllib.request + + with urllib.request.urlopen(url) as response: # noqa: S310 + data = response.read() + name = url.rstrip("/").split("/")[-1] + return ImageAttachment(data=data, name=name) diff --git a/fastapi_startkit/src/fastapi_startkit/ai/image.py b/fastapi_startkit/src/fastapi_startkit/ai/image.py new file mode 100644 index 00000000..9e8f7902 --- /dev/null +++ b/fastapi_startkit/src/fastapi_startkit/ai/image.py @@ -0,0 +1,203 @@ +"""Image generation API — text-to-image and image editing via a pluggable provider.""" + +from __future__ import annotations + +import asyncio +import uuid +from typing import TYPE_CHECKING, Optional + +if TYPE_CHECKING: + from .document import Document + from .image_providers import ImageGenerationProvider + +try: + from fastapi_startkit.storage.storage import Storage +except Exception: # pragma: no cover + Storage = None # type: ignore[assignment,misc] + + +class ImageResponse: + """Returned by :meth:`Image.generate`. + + Holds raw PNG bytes and provides async helpers to persist the image to + any configured storage disk:: + + image = await Image.of("A donut on a counter").generate() + + path = await image.store() # auto-named, private disk + path = await image.storeAs("result.png") # named, private disk + path = await image.storePublicly() # auto-named, public disk + path = await image.storePubliclyAs("result.png") + """ + + def __init__(self, data: bytes, fmt: str = "png"): + self._data = data + self._fmt = fmt + + @property + def data(self) -> bytes: + """Raw image bytes.""" + return self._data + + def _auto_filename(self) -> str: + return f"{uuid.uuid4()}.{self._fmt}" + + # ── Storage helpers ──────────────────────────────────────────────────────── + + async def store(self) -> str: + """Save to the default private disk with an auto-generated filename.""" + return await self._save(self._auto_filename(), disk="local") + + async def storeAs(self, name: str) -> str: + """Save to the default private disk with a custom filename.""" + return await self._save(name, disk="local") + + async def storePublicly(self) -> str: + """Save to the public disk with an auto-generated filename.""" + return await self._save(self._auto_filename(), disk="public") + + async def storePubliclyAs(self, name: str) -> str: + """Save to the public disk with a custom filename.""" + return await self._save(name, disk="public") + + # ── Internal ─────────────────────────────────────────────────────────────── + + async def _save(self, name: str, disk: str = "local") -> str: + return await asyncio.to_thread(self._save_sync, name, disk) + + def _save_sync(self, name: str, disk: str) -> str: + """Try the Storage facade first; fall back to a temp file.""" + if Storage is not None: + try: + Storage.disk(disk).put(name, self._data) + return name + except Exception: + pass + import os + import tempfile + + path = os.path.join(tempfile.gettempdir(), name) + with open(path, "wb") as f: + f.write(self._data) + return path + + +class Image: + """Fluent builder for image generation and editing. + + The active backend is selected from :attr:`~fastapi_startkit.ai.AIConfig.image_provider` + (env: ``AI_IMAGE_PROVIDER``). Defaults to OpenAI DALL-E. + + Usage — text to image:: + + image = await Image.of("A donut on a counter").generate() + + Usage — edit with :class:`~fastapi_startkit.ai.Document` attachments:: + + from fastapi_startkit.ai import Document + + image = await ( + Image.of("Make this impressionist") + .attachments([await Document.from_url("https://example.com/photo.jpg")]) + .landscape() + .generate() + ) + """ + + # DALL-E 3 size presets + _LANDSCAPE_SIZE = "1792x1024" + _PORTRAIT_SIZE = "1024x1792" + _SQUARE_SIZE = "1024x1024" + + def __init__(self, prompt: str): + self._prompt = prompt + self._attachments: list[Document] = [] + self._size: str = self._SQUARE_SIZE + self._model: str = "dall-e-3" + self._quality: str = "standard" + + @classmethod + def of(cls, prompt: str) -> "Image": + """Create an :class:`Image` builder with the given prompt.""" + return cls(prompt) + + # ── Modifier methods (chainable) ─────────────────────────────────────────── + + def attachments(self, docs: list) -> "Image": + """Attach :class:`~fastapi_startkit.ai.Document` objects for an editing request.""" + self._attachments = list(docs) + return self + + def landscape(self) -> "Image": + """Use landscape size (1792×1024). DALL-E 3 only.""" + self._size = self._LANDSCAPE_SIZE + return self + + def portrait(self) -> "Image": + """Use portrait size (1024×1792). DALL-E 3 only.""" + self._size = self._PORTRAIT_SIZE + return self + + def square(self) -> "Image": + """Use square size (1024×1024).""" + self._size = self._SQUARE_SIZE + return self + + def model(self, name: str) -> "Image": + """Override the model (default: ``dall-e-3``).""" + self._model = name + return self + + def quality(self, q: str) -> "Image": + """Set quality — ``'standard'`` or ``'hd'`` (DALL-E 3 only).""" + self._quality = q + return self + + # ── Generation ───────────────────────────────────────────────────────────── + + async def generate(self) -> ImageResponse: + """Call the configured image provider and return an :class:`ImageResponse`.""" + provider = self._resolve_provider() + + if self._attachments: + image_bytes = await provider.edit( + prompt=self._prompt, + image_bytes=self._attachments[0].to_bytes(), + size=self._size, + ) + else: + image_bytes = await provider.generate( + prompt=self._prompt, + size=self._size, + model=self._model, + quality=self._quality, + ) + + return ImageResponse(data=image_bytes, fmt="png") + + # ── Internal ─────────────────────────────────────────────────────────────── + + def _resolve_provider(self) -> "ImageGenerationProvider": + from .image_providers import OpenAIImageProvider, StabilityImageProvider # noqa: PLC0415 + + provider_name = "openai" + api_key: Optional[str] = None + base_url: Optional[str] = None + + try: + from fastapi_startkit.facades.Config import Config # noqa: PLC0415 + + ai_config = Config.get("ai") + provider_name = ai_config.image_provider + openai_cfg = ai_config.providers.get("openai") + if openai_cfg: + api_key = openai_cfg.key or None + base_url = openai_cfg.url or None + except Exception: + pass + + if provider_name == "openai": + return OpenAIImageProvider(api_key=api_key, base_url=base_url) + if provider_name == "stability": + return StabilityImageProvider() + raise ValueError(f"Unknown image provider: {provider_name!r}. Use 'openai' or 'stability'.") diff --git a/fastapi_startkit/src/fastapi_startkit/ai/image_providers.py b/fastapi_startkit/src/fastapi_startkit/ai/image_providers.py new file mode 100644 index 00000000..a1b8d7ec --- /dev/null +++ b/fastapi_startkit/src/fastapi_startkit/ai/image_providers.py @@ -0,0 +1,87 @@ +"""Image generation provider abstractions. + +Providers implement the :class:`ImageGenerationProvider` ABC so that the +:class:`~fastapi_startkit.ai.Image` builder is not hard-wired to a single +vendor. Select the active provider via ``AI_IMAGE_PROVIDER`` in your +``.env`` (or ``AIConfig.image_provider``). + +Supported providers +------------------- +* ``openai`` — OpenAI DALL-E 3 / DALL-E 2 (default) +* ``stability`` — Stability AI (stub, raises :exc:`NotImplementedError`) +""" + +from __future__ import annotations + +import base64 +from abc import ABC, abstractmethod + + +class ImageGenerationProvider(ABC): + """Abstract base for image generation backends.""" + + @abstractmethod + async def generate(self, prompt: str, size: str, model: str, quality: str) -> bytes: + """Generate a new image from a text prompt and return raw PNG bytes.""" + + @abstractmethod + async def edit(self, prompt: str, image_bytes: bytes, size: str) -> bytes: + """Edit an existing image (described by *image_bytes*) and return raw PNG bytes.""" + + +class OpenAIImageProvider(ImageGenerationProvider): + """OpenAI DALL-E provider using :class:`openai.AsyncOpenAI`. + + Uses DALL-E 3 for generation and DALL-E 2 for editing (the only model + that supports inpainting as of mid-2025). + """ + + def __init__(self, api_key: str | None = None, base_url: str | None = None): + self._api_key = api_key + self._base_url = base_url + + async def generate(self, prompt: str, size: str, model: str, quality: str) -> bytes: + from openai import AsyncOpenAI # noqa: PLC0415 + + client = AsyncOpenAI(api_key=self._api_key, base_url=self._base_url) + params: dict = { + "model": model, + "prompt": prompt, + "size": size, + "n": 1, + "response_format": "b64_json", + } + if model == "dall-e-3": + params["quality"] = quality + + response = await client.images.generate(**params) + return base64.b64decode(response.data[0].b64_json) + + async def edit(self, prompt: str, image_bytes: bytes, size: str) -> bytes: + import io # noqa: PLC0415 + + from openai import AsyncOpenAI # noqa: PLC0415 + + client = AsyncOpenAI(api_key=self._api_key, base_url=self._base_url) + image_file = io.BytesIO(image_bytes) + image_file.name = "image.png" + + response = await client.images.edit( + model="dall-e-2", + image=image_file, + prompt=prompt, + size="1024x1024", + n=1, + response_format="b64_json", + ) + return base64.b64decode(response.data[0].b64_json) + + +class StabilityImageProvider(ImageGenerationProvider): + """Stability AI provider stub — raises :exc:`NotImplementedError` until implemented.""" + + async def generate(self, prompt: str, size: str, model: str, quality: str) -> bytes: + raise NotImplementedError("StabilityImageProvider is not yet implemented") + + async def edit(self, prompt: str, image_bytes: bytes, size: str) -> bytes: + raise NotImplementedError("StabilityImageProvider is not yet implemented") diff --git a/fastapi_startkit/tests/ai/test_audio.py b/fastapi_startkit/tests/ai/test_audio.py new file mode 100644 index 00000000..e67182bd --- /dev/null +++ b/fastapi_startkit/tests/ai/test_audio.py @@ -0,0 +1,227 @@ +"""Tests for the Audio generation API (Audio, AudioResponse).""" + +from __future__ import annotations + +import os +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +from fastapi_startkit.ai.audio import Audio, AudioResponse + + +# ─── Shared fixtures ────────────────────────────────────────────────────────── + +def _fake_audio_bytes() -> bytes: + return b"ID3\x03\x00" # minimal MP3 magic + + +def _mock_provider(result: bytes | None = None) -> MagicMock: + """Return a mock AudioSynthesisProvider.""" + p = MagicMock() + p.synthesize = AsyncMock(return_value=result if result is not None else _fake_audio_bytes()) + return p + + +# ─── Audio builder — chainable API ──────────────────────────────────────────── + +class TestAudioBuilder: + def test_of_returns_audio_instance(self): + audio = Audio.of("Hello world") + assert isinstance(audio, Audio) + assert audio._text == "Hello world" + + def test_default_voice_is_alloy(self): + audio = Audio.of("Hello") + assert audio._voice == "alloy" + + def test_female_sets_nova_voice(self): + audio = Audio.of("Hello").female() + assert audio._voice == "nova" + + def test_male_sets_onyx_voice(self): + audio = Audio.of("Hello").male() + assert audio._voice == "onyx" + + def test_voice_sets_explicit_voice(self): + audio = Audio.of("Hello").voice("shimmer") + assert audio._voice == "shimmer" + + def test_voice_overrides_previous_setting(self): + audio = Audio.of("Hello").female().voice("echo") + assert audio._voice == "echo" + + def test_model_override(self): + audio = Audio.of("Hello").model("tts-1-hd") + assert audio._model == "tts-1-hd" + + def test_speed_override(self): + audio = Audio.of("Hello").speed(1.5) + assert audio._speed == 1.5 + + def test_format_override(self): + audio = Audio.of("Hello").format("opus") + assert audio._response_format == "opus" + + def test_chainable_methods_return_self(self): + audio = Audio.of("Hello") + assert audio.female() is audio + assert audio.male() is audio + assert audio.voice("alloy") is audio + assert audio.model("tts-1") is audio + assert audio.speed(1.0) is audio + assert audio.format("mp3") is audio + + +# ─── Audio.generate() ───────────────────────────────────────────────────────── + +class TestAudioGeneration: + @pytest.mark.asyncio + async def test_generate_calls_provider_and_returns_response(self): + provider = _mock_provider() + + with patch.object(Audio, "_resolve_provider", return_value=provider): + result = await Audio.of("Hello world").generate() + + assert isinstance(result, AudioResponse) + assert result.data == _fake_audio_bytes() + provider.synthesize.assert_called_once() + + @pytest.mark.asyncio + async def test_generate_passes_text_to_provider(self): + provider = _mock_provider() + + with patch.object(Audio, "_resolve_provider", return_value=provider): + await Audio.of("Hello world").generate() + + call_kwargs = provider.synthesize.call_args[1] + assert call_kwargs["text"] == "Hello world" + + @pytest.mark.asyncio + async def test_generate_female_passes_nova_voice(self): + provider = _mock_provider() + + with patch.object(Audio, "_resolve_provider", return_value=provider): + await Audio.of("Hi").female().generate() + + call_kwargs = provider.synthesize.call_args[1] + assert call_kwargs["voice"] == "nova" + + @pytest.mark.asyncio + async def test_generate_male_passes_onyx_voice(self): + provider = _mock_provider() + + with patch.object(Audio, "_resolve_provider", return_value=provider): + await Audio.of("Hi").male().generate() + + call_kwargs = provider.synthesize.call_args[1] + assert call_kwargs["voice"] == "onyx" + + @pytest.mark.asyncio + async def test_generate_explicit_voice(self): + provider = _mock_provider() + + with patch.object(Audio, "_resolve_provider", return_value=provider): + await Audio.of("Hi").voice("shimmer").generate() + + call_kwargs = provider.synthesize.call_args[1] + assert call_kwargs["voice"] == "shimmer" + + @pytest.mark.asyncio + async def test_generate_passes_speed(self): + provider = _mock_provider() + + with patch.object(Audio, "_resolve_provider", return_value=provider): + await Audio.of("Hi").speed(1.25).generate() + + call_kwargs = provider.synthesize.call_args[1] + assert call_kwargs["speed"] == 1.25 + + @pytest.mark.asyncio + async def test_generate_passes_format(self): + provider = _mock_provider() + + with patch.object(Audio, "_resolve_provider", return_value=provider): + await Audio.of("Hi").format("opus").generate() + + call_kwargs = provider.synthesize.call_args[1] + assert call_kwargs["fmt"] == "opus" + + @pytest.mark.asyncio + async def test_generate_hd_model(self): + provider = _mock_provider() + + with patch.object(Audio, "_resolve_provider", return_value=provider): + await Audio.of("Hi").model("tts-1-hd").generate() + + call_kwargs = provider.synthesize.call_args[1] + assert call_kwargs["model"] == "tts-1-hd" + + +# ─── AudioResponse storage methods ──────────────────────────────────────────── + +class TestAudioResult: + @pytest.mark.asyncio + async def test_store_writes_to_temp_when_no_storage(self): + resp = AudioResponse(data=_fake_audio_bytes()) + + path = await resp.store() + + assert os.path.exists(path) + with open(path, "rb") as f: + assert f.read() == _fake_audio_bytes() + os.remove(path) + + @pytest.mark.asyncio + async def test_store_as_uses_given_name(self): + resp = AudioResponse(data=_fake_audio_bytes()) + + with patch.object(resp, "_save_sync") as mock_save: + mock_save.return_value = "/tmp/greeting.mp3" + await resp.storeAs("greeting.mp3") + + mock_save.assert_called_once_with("greeting.mp3", "local") + + @pytest.mark.asyncio + async def test_store_publicly_as_uses_public_disk(self): + resp = AudioResponse(data=_fake_audio_bytes()) + + with patch.object(resp, "_save_sync") as mock_save: + mock_save.return_value = "/tmp/greeting.mp3" + await resp.storePubliclyAs("greeting.mp3") + + mock_save.assert_called_once_with("greeting.mp3", "public") + + @pytest.mark.asyncio + async def test_store_publicly_uses_public_disk(self): + resp = AudioResponse(data=_fake_audio_bytes()) + + with patch.object(resp, "_save_sync") as mock_save: + mock_save.return_value = "/tmp/auto.mp3" + await resp.storePublicly() + + _, disk = mock_save.call_args[0] + assert disk == "public" + + @pytest.mark.asyncio + async def test_store_auto_filename_has_mp3_ext(self): + resp = AudioResponse(data=_fake_audio_bytes(), fmt="mp3") + + with patch.object(resp, "_save_sync") as mock_save: + mock_save.return_value = "/tmp/auto.mp3" + await resp.store() + + name, _ = mock_save.call_args[0] + assert name.endswith(".mp3") + + @pytest.mark.asyncio + async def test_store_uses_storage_facade_when_available(self): + resp = AudioResponse(data=_fake_audio_bytes()) + mock_disk = MagicMock() + + with patch("fastapi_startkit.ai.audio.Storage") as mock_storage_cls: + mock_storage_cls.disk.return_value = mock_disk + await resp.storeAs("hello.mp3") + + mock_storage_cls.disk.assert_called_once_with("local") + mock_disk.put.assert_called_once_with("hello.mp3", _fake_audio_bytes()) diff --git a/fastapi_startkit/tests/ai/test_image.py b/fastapi_startkit/tests/ai/test_image.py new file mode 100644 index 00000000..d38c0f4b --- /dev/null +++ b/fastapi_startkit/tests/ai/test_image.py @@ -0,0 +1,236 @@ +"""Tests for the Image generation API (Image, ImageResponse, Document attachments).""" + +from __future__ import annotations + +import os +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +from fastapi_startkit.ai.document import Document +from fastapi_startkit.ai.image import Image, ImageResponse + + +# ─── Shared fixtures ────────────────────────────────────────────────────────── + +def _fake_image_bytes() -> bytes: + return b"\x89PNG\r\n\x1a\n" # minimal PNG magic + + +def _mock_provider(generate_result: bytes | None = None, edit_result: bytes | None = None) -> MagicMock: + """Return a mock ImageGenerationProvider.""" + p = MagicMock() + p.generate = AsyncMock(return_value=generate_result if generate_result is not None else _fake_image_bytes()) + p.edit = AsyncMock(return_value=edit_result if edit_result is not None else _fake_image_bytes()) + return p + + +# ─── Document used as image attachment ──────────────────────────────────────── + +class TestDocumentImageAttachment: + def test_document_from_path_reads_binary_via_to_bytes(self, tmp_path): + """from_path auto-detects binary files and stores bytes content.""" + img = tmp_path / "photo.jpg" + img.write_bytes(b"\xff\xd8\xff") + doc = Document.from_path(str(img)) + # Binary files: content is bytes, to_bytes() returns them directly + assert doc.to_bytes() == b"\xff\xd8\xff" + + def test_document_content_bytes_stored_directly(self): + doc = Document(content=b"\x89PNG", name="photo.png") + assert doc.to_bytes() == b"\x89PNG" + + def test_document_content_str_encoded_to_bytes(self): + doc = Document(content="hello", name="text.txt") + assert doc.to_bytes() == b"hello" + + @pytest.mark.asyncio + async def test_document_from_url_downloads_bytes(self): + fake_data = b"fake-image-bytes" + + with patch("httpx.AsyncClient") as MockClient: + mock_response = MagicMock() + mock_response.content = fake_data + mock_response.raise_for_status = MagicMock() + MockClient.return_value.__aenter__ = AsyncMock(return_value=MockClient.return_value) + MockClient.return_value.__aexit__ = AsyncMock(return_value=False) + MockClient.return_value.get = AsyncMock(return_value=mock_response) + + doc = await Document.from_url("https://example.com/photo.jpg") + + assert doc.to_bytes() == fake_data + assert doc.name == "photo.jpg" + + @pytest.mark.asyncio + async def test_document_from_storage_reads_bytes(self, tmp_path, monkeypatch): + storage_dir = tmp_path / "storage" + storage_dir.mkdir() + (storage_dir / "photo.jpg").write_bytes(b"\x89PNG") + monkeypatch.chdir(tmp_path) + + # Patch Storage so it falls back to the direct path read + with patch("fastapi_startkit.ai.document.Storage", None): + doc = await Document.from_storage("photo.jpg") + + assert doc.to_bytes() == b"\x89PNG" + + +# ─── Image builder — chainable API ──────────────────────────────────────────── + +class TestImageBuilder: + def test_of_returns_image_instance(self): + img = Image.of("A donut on a counter") + assert isinstance(img, Image) + assert img._prompt == "A donut on a counter" + + def test_landscape_sets_size(self): + img = Image.of("test").landscape() + assert img._size == "1792x1024" + + def test_portrait_sets_size(self): + img = Image.of("test").portrait() + assert img._size == "1024x1792" + + def test_square_sets_size(self): + img = Image.of("test").landscape().square() + assert img._size == "1024x1024" + + def test_model_override(self): + img = Image.of("test").model("dall-e-2") + assert img._model == "dall-e-2" + + def test_quality_override(self): + img = Image.of("test").quality("hd") + assert img._quality == "hd" + + def test_attachments_sets_list(self): + doc = Document(content=b"img", name="x.png") + img = Image.of("test").attachments([doc]) + assert img._attachments == [doc] + + +# ─── Image.generate() ───────────────────────────────────────────────────────── + +class TestImageGeneration: + @pytest.mark.asyncio + async def test_generate_calls_provider_and_returns_response(self): + provider = _mock_provider() + + with patch.object(Image, "_resolve_provider", return_value=provider): + result = await Image.of("A donut on a counter").generate() + + assert isinstance(result, ImageResponse) + assert result.data == _fake_image_bytes() + provider.generate.assert_called_once() + + @pytest.mark.asyncio + async def test_generate_passes_landscape_size_to_provider(self): + provider = _mock_provider() + + with patch.object(Image, "_resolve_provider", return_value=provider): + await Image.of("test").landscape().generate() + + call_kwargs = provider.generate.call_args[1] + assert call_kwargs["size"] == "1792x1024" + + @pytest.mark.asyncio + async def test_generate_passes_quality_to_provider(self): + provider = _mock_provider() + + with patch.object(Image, "_resolve_provider", return_value=provider): + await Image.of("test").quality("hd").generate() + + call_kwargs = provider.generate.call_args[1] + assert call_kwargs["quality"] == "hd" + + @pytest.mark.asyncio + async def test_generate_uses_edit_when_attachments_present(self): + provider = _mock_provider() + doc = Document(content=b"img-bytes", name="photo.png") + + with patch.object(Image, "_resolve_provider", return_value=provider): + result = await Image.of("Make impressionist").attachments([doc]).generate() + + assert isinstance(result, ImageResponse) + provider.edit.assert_called_once() + provider.generate.assert_not_called() + + @pytest.mark.asyncio + async def test_generate_passes_attachment_bytes_to_edit(self): + provider = _mock_provider() + doc = Document(content=b"raw-image-bytes", name="photo.png") + + with patch.object(Image, "_resolve_provider", return_value=provider): + await Image.of("Make impressionist").attachments([doc]).generate() + + call_kwargs = provider.edit.call_args[1] + assert call_kwargs["image_bytes"] == b"raw-image-bytes" + + +# ─── ImageResponse storage methods ──────────────────────────────────────────── + +class TestImageResult: + @pytest.mark.asyncio + async def test_store_writes_to_temp_when_no_storage(self): + """Falls back to tempfile when Storage facade is unavailable.""" + resp = ImageResponse(data=_fake_image_bytes()) + + path = await resp.store() + + assert os.path.exists(path) + with open(path, "rb") as f: + assert f.read() == _fake_image_bytes() + os.remove(path) + + @pytest.mark.asyncio + async def test_store_as_uses_given_name(self, tmp_path): + resp = ImageResponse(data=_fake_image_bytes()) + + with patch.object(resp, "_save_sync", wraps=lambda name, disk: str(tmp_path / name)) as mock_save: + path = await resp.storeAs("result.png") + + mock_save.assert_called_once_with("result.png", "local") + assert path.endswith("result.png") + + @pytest.mark.asyncio + async def test_store_publicly_as_uses_public_disk(self, tmp_path): + resp = ImageResponse(data=_fake_image_bytes()) + + with patch.object(resp, "_save_sync", wraps=lambda name, disk: str(tmp_path / name)) as mock_save: + await resp.storePubliclyAs("result.png") + + mock_save.assert_called_once_with("result.png", "public") + + @pytest.mark.asyncio + async def test_store_publicly_uses_public_disk(self): + resp = ImageResponse(data=_fake_image_bytes()) + + with patch.object(resp, "_save_sync") as mock_save: + mock_save.return_value = "/tmp/auto.png" + await resp.storePublicly() + + _, disk = mock_save.call_args[0] + assert disk == "public" + + @pytest.mark.asyncio + async def test_store_auto_filename_has_png_ext(self): + resp = ImageResponse(data=_fake_image_bytes(), fmt="png") + + with patch.object(resp, "_save_sync") as mock_save: + mock_save.return_value = "/tmp/auto.png" + await resp.store() + + name, _ = mock_save.call_args[0] + assert name.endswith(".png") + + @pytest.mark.asyncio + async def test_store_uses_storage_facade_when_available(self): + resp = ImageResponse(data=_fake_image_bytes()) + mock_disk = MagicMock() + + with patch("fastapi_startkit.ai.image.Storage") as mock_storage_cls: + mock_storage_cls.disk.return_value = mock_disk + await resp.storeAs("photo.png") + + mock_storage_cls.disk.assert_called_once_with("local") + mock_disk.put.assert_called_once_with("photo.png", _fake_image_bytes())