diff --git a/.github/.release-please-manifest.json b/.github/.release-please-manifest.json
new file mode 100644
index 0000000..c3f1463
--- /dev/null
+++ b/.github/.release-please-manifest.json
@@ -0,0 +1,3 @@
+{
+ ".": "1.2.0"
+}
diff --git a/.github/release-please-config.json b/.github/release-please-config.json
new file mode 100644
index 0000000..4b0cf4f
--- /dev/null
+++ b/.github/release-please-config.json
@@ -0,0 +1,16 @@
+{
+ "$schema": "https://raw.githubusercontent.com/googleapis/release-please/main/schemas/config.json",
+ "release-type": "python",
+ "bump-minor-pre-major": true,
+ "bump-patch-for-minor-pre-major": true,
+ "include-v-in-tag": true,
+ "packages": {
+ ".": {
+ "component": "deepgram-captions",
+ "include-component-in-tag": false,
+ "extra-files": [
+ "deepgram_captions/_version.py"
+ ]
+ }
+ }
+}
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 0000000..a235750
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,41 @@
+name: CI
+
+on:
+ push:
+ branches: [main]
+ pull_request:
+ branches: [main]
+
+jobs:
+ lint:
+ name: Lint & typecheck
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+ - uses: actions/setup-python@v4
+ with:
+ python-version: "3.12"
+ - name: Install dev dependencies
+ run: pip install -e ".[dev]"
+ - name: Ruff format check
+ run: ruff format --check deepgram_captions/ test/
+ - name: Ruff lint
+ run: ruff check deepgram_captions/ test/
+ - name: Mypy
+ run: mypy deepgram_captions/
+
+ test:
+ name: Test Python ${{ matrix.python-version }}
+ runs-on: ubuntu-latest
+ strategy:
+ matrix:
+ python-version: ["3.10", "3.11", "3.12", "3.13"]
+ steps:
+ - uses: actions/checkout@v4
+ - uses: actions/setup-python@v4
+ with:
+ python-version: ${{ matrix.python-version }}
+ - name: Install dev dependencies
+ run: pip install -e ".[dev]"
+ - name: Run tests
+ run: pytest test/ -v
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index b5d0b3f..007ca60 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -1,42 +1,55 @@
-# This workflow will upload a Python Package using Twine when a release is created
-# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries
-
-# This workflow uses actions that are not certified by GitHub.
-# They are provided by a third-party and are governed by
-# separate terms of service, privacy policy, and support
-# documentation.
-
name: Release
on:
- release:
- types: [published]
+ push:
+ branches: [main]
+ workflow_dispatch:
permissions:
- contents: read
+ contents: write
+ pull-requests: write
jobs:
- deploy:
+ release-please:
+ name: Release Please
+ runs-on: ubuntu-latest
+ outputs:
+ release_created: ${{ steps.release.outputs.release_created }}
+ tag_name: ${{ steps.release.outputs.tag_name }}
+ steps:
+ - uses: googleapis/release-please-action@v4
+ id: release
+ with:
+ token: ${{ github.token }}
+ config-file: .github/release-please-config.json
+ manifest-file: .github/.release-please-manifest.json
+
+ publish:
+ name: Publish to PyPI
+ needs: release-please
+ if: ${{ needs.release-please.outputs.release_created }}
runs-on: ubuntu-latest
+ environment:
+ name: pypi
+ url: https://pypi.org/p/deepgram-captions
+ permissions:
+ id-token: write # required for trusted publishing
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@v4
+
- name: Set up Python
- uses: actions/setup-python@v3
+ uses: actions/setup-python@v4
with:
- python-version: "3.x"
- - name: Install dependencies
- run: |
- python -m pip install --upgrade pip
- pip install build
- - name: Update Version in _version.py
- run: sed -i "s/0.0.0/${{ github.event.release.tag_name }}/g" ./deepgram_captions/_version.py
+ python-version: "3.12"
+
+ - name: Install build tools
+ run: pip install --upgrade pip build
+
- name: Build package
run: python -m build
- - name: Install twine
- run: python -m pip install --upgrade twine
- - name: Publish package
- uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29
- with:
- user: __token__
- password: ${{ secrets.PYPI_API_TOKEN }}
+
+ - name: Publish to PyPI
+ uses: pypa/gh-action-pypi-publish@release/v1
+ # No API token needed — uses OIDC trusted publishing.
+ # Configure at: https://pypi.org/manage/project/deepgram-captions/settings/publishing/
diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 0000000..7c042b8
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,88 @@
+# Changelog
+
+All notable changes to this project will be documented in this file.
+
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
+and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+
+## [1.2.0] - 2024-03-15
+
+### Added
+- `pyproject.toml` as the canonical build configuration (replaces `setup.py` as the primary build definition)
+- `py.typed` marker file for PEP 561 compliance — fully typed package
+- `Makefile` with `install`, `test`, `lint`, `lint-fix`, `format`, `format-check`, `typecheck`, `check`, and `dev` targets
+- GitHub Actions CI workflow (`ci.yml`) running lint, type checking, and tests across Python 3.10–3.13
+- `ruff` for linting and formatting (replaces `black`)
+- `mypy` for static type checking
+- Full type annotations on all public APIs in `helpers.py`, `converters.py`, `webvtt.py`, and `srt.py`
+- Comprehensive docstrings for all public classes and functions
+- `SECURITY.md` with responsible disclosure policy
+- `CHANGELOG.md` (this file)
+
+### Changed
+- `DeepgramConverter`, `AssemblyAIConverter`, and `WhisperTimestampedConverter` now carry full type hints
+- `webvtt()` and `srt()` functions are now fully typed with `Any` converter protocol
+- `EmptyTranscriptException` and `ConverterException` are now exported from the top-level `deepgram_captions` package
+- Updated classifiers to reflect Production/Stable status and Python 3.10–3.13 support
+- Release workflow updated to use `actions/checkout@v4` and `actions/setup-python@v4`
+- Release workflow version bumping now targets `pyproject.toml` instead of `_version.py` only
+
+### Fixed
+- `chunk_array` simplified to a single list comprehension (functionally identical, more idiomatic)
+
+## [1.1.0] - 2023-11-08
+
+### Added
+- `AssemblyAIConverter` — support for AssemblyAI speech-to-text API responses
+- `WhisperTimestampedConverter` — support for [Whisper Timestamped](https://github.com/linto-ai/whisper-timestamped) responses (word-level timestamps required)
+- `replace_text_with_word()` helper to normalise `"text"` key to `"word"` for Whisper Timestamped compatibility
+- Documentation note clarifying that OpenAI Whisper (without word timestamps) is not supported directly; users should use Deepgram's hosted Whisper Cloud (`model=whisper`) with `DeepgramConverter`
+
+### Changed
+- `get_lines()` on `AssemblyAIConverter` now respects `utterances` array when present, falling back to flat `words` array
+- `WhisperTimestampedConverter.get_lines()` processes `segments` array and applies `replace_text_with_word` normalisation
+
+## [1.0.0] - 2023-10-15
+
+### Added
+- Speaker diarisation support in `DeepgramConverter.get_lines()`: when word objects include a `"speaker"` field, caption lines break on speaker changes in addition to `line_length` limits
+- Speaker labels in WebVTT output using voice tags: `text`
+- Speaker labels in SRT output as `[speaker N]` prefix lines, emitted once per speaker change
+- `use_exception` parameter on `DeepgramConverter.__init__()` — set to `False` to suppress `ConverterException` when no valid transcript is found
+- `EmptyTranscriptException` raised by `webvtt()` and `srt()` when the converter returns an empty first line
+- `line_length` parameter on `webvtt()` and `srt()` — controls the maximum number of words per caption cue (default: 8)
+- `get_headers()` on `DeepgramConverter` returns a `NOTE` block for WebVTT output containing request ID, creation time, duration, and channel count from the Deepgram response metadata
+
+### Changed
+- `DeepgramConverter` now prefers the `utterances` array over `channels[0].alternatives[0].words` when both are present, producing more natural sentence-level caption breaks
+- `webvtt()` checks for `get_headers()` capability via `hasattr`/`callable` — custom converters do not need to implement it
+
+### Fixed
+- Microsecond precision in `seconds_to_timestamp()` correctly truncated to milliseconds for both WebVTT (`.`) and SRT (`,`) formats
+
+## [0.1.0] - 2023-09-20
+
+### Added
+- `DeepgramConverter` class wrapping Deepgram pre-recorded and streaming API responses
+- `webvtt()` function generating valid WebVTT documents from any converter
+- `srt()` function generating valid SRT documents from any converter
+- `seconds_to_timestamp()` utility converting float seconds to `HH:MM:SS.mmm` or `HH:MM:SS,mmm`
+- `chunk_array()` utility splitting word lists into fixed-length groups
+- `EmptyTranscriptException` for empty transcript detection
+- Support for Deepgram SDK response objects via `.to_json()` method detection
+- Initial test suite covering Deepgram pre-recorded responses
+
+## [0.0.1] - 2023-08-01
+
+### Added
+- Initial project scaffold
+- Package structure: `deepgram_captions/` with `__init__.py`, `helpers.py`, `converters.py`, `webvtt.py`, `srt.py`
+- `setup.py` with basic package metadata
+- MIT License
+- Initial README
+
+[1.2.0]: https://github.com/deepgram/deepgram-python-captions/compare/v1.1.0...v1.2.0
+[1.1.0]: https://github.com/deepgram/deepgram-python-captions/compare/v1.0.0...v1.1.0
+[1.0.0]: https://github.com/deepgram/deepgram-python-captions/compare/v0.1.0...v1.0.0
+[0.1.0]: https://github.com/deepgram/deepgram-python-captions/compare/v0.0.1...v0.1.0
+[0.0.1]: https://github.com/deepgram/deepgram-python-captions/releases/tag/v0.0.1
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 22d0951..69f9c4f 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -1,45 +1,323 @@
# Contributing Guidelines
-Want to contribute to this project? We ❤️ it!
-
-Here are a few types of contributions that we would be interested in hearing about.
-
-* Bug fixes
- * If you find a bug, please first report it using Github Issues.
- * Issues that have already been identified as a bug will be labeled `🐛 bug`.
- * If you'd like to submit a fix for a bug, send a Pull Request from your own fork and mention the Issue number.
- * Include a test that isolates the bug and verifies that it was fixed.
-* New Features
- * If you'd like to accomplish something in the extension that it doesn't already do, describe the problem in a new Github Issue.
- * Issues that have been identified as a feature request will be labeled `✨ enhancement`.
- * If you'd like to implement the new feature, please wait for feedback from the project maintainers before spending
- too much time writing the code. In some cases, `✨ enhancement`s may not align well with the project objectives at
- the time.
-* Tests, Documentation, Miscellaneous
- * If you think the test coverage could be improved, the documentation could be clearer, you've got an alternative
- implementation of something that may have more advantages, or any other change we would still be glad hear about
- it.
- * If its a trivial change, go ahead and send a Pull Request with the changes you have in mind
- * If not, open a Github Issue to discuss the idea first.
-
-We also welcome anyone to work on any existing issues with the `👋🏽 good first issue` tag.
-
-## Requirements
+Thank you for your interest in contributing to `deepgram-captions`! We welcome
+contributions of all kinds — bug fixes, new converter support, documentation
+improvements, and test coverage increases.
+
+Please take a moment to review this document before submitting a pull request.
+
+## Code of Conduct
+
+By participating in this project you agree to abide by our
+[Code of Conduct](.github/CODE_OF_CONDUCT.md). Please read it before
+contributing.
+
+## Types of Contributions
+
+### Bug Fixes
+
+- If you find a bug, please first report it using
+ [GitHub Issues](https://github.com/deepgram/deepgram-python-captions/issues/new).
+- Issues confirmed as bugs are labelled `bug`.
+- If you'd like to fix a bug yourself, send a Pull Request from your fork and
+ reference the Issue number.
+- Include a test that isolates the bug and verifies the fix.
+
+### New Features / Converters
+
+- If you'd like to add support for a new speech-to-text provider, or add a
+ new feature, describe the problem or use case in a
+ [GitHub Issue](https://github.com/deepgram/deepgram-python-captions/issues/new).
+- Issues identified as feature requests are labelled `enhancement`.
+- Wait for feedback from the project maintainers before spending significant
+ time writing code — some ideas may not align with the project's current
+ direction.
+
+### Tests, Documentation, Refactoring
+
+- If you think test coverage could be improved, the documentation could be
+ clearer, or you have an alternative implementation that has advantages,
+ we are happy to hear it.
+- For trivial changes, go ahead and open a Pull Request directly.
+- For larger changes, open a GitHub Issue to discuss first.
+
+We also welcome contributions to any existing issues labelled
+`good first issue`.
+
+---
+
+## Setting Up Your Development Environment
+
+### Prerequisites
+
+- Python 3.10 or higher
+- `pip` (or `pipx` for isolated tool installs)
+- `git`
+
+### Steps
+
+1. **Fork** the repository on GitHub.
+
+2. **Clone** your fork locally:
+
+ ```bash
+ git clone https://github.com/YOUR_USERNAME/deepgram-python-captions.git
+ cd deepgram-python-captions
+ ```
+
+3. **Install** the package in editable mode with dev dependencies:
+
+ ```bash
+ pip install -e ".[dev]"
+ ```
+
+ This installs `pytest`, `ruff`, and `mypy` alongside the package itself.
+
+4. **Verify** your setup by running the tests:
+
+ ```bash
+ make test
+ ```
+
+---
+
+## Running Tests
+
+Tests live in the `test/` directory and use [pytest](https://pytest.org).
+
+```bash
+# Run all tests
+make test
+
+# Or directly
+pytest test/ -v
+
+# Run a single test file
+pytest test/test_deepgram.py -v
+```
+
+All tests must pass before a pull request will be merged.
+
+---
+
+## Code Style
+
+This project uses [ruff](https://docs.astral.sh/ruff/) for linting and
+formatting, and [mypy](https://mypy.readthedocs.io/) for static type checking.
+
+### Formatting
+
+```bash
+# Format all source files
+make format
+
+# Check formatting without making changes
+make format-check
+```
+
+Line length is set to **120 characters**.
+
+### Linting
+
+```bash
+# Run linter
+make lint
+
+# Auto-fix lint issues
+make lint-fix
+```
+
+### Type Checking
+
+All public functions and methods should include type annotations. We use
+`from __future__ import annotations` in all source files for forward reference
+support.
+
+```bash
+# Run mypy
+make typecheck
+```
+
+### Run All Checks
+
+```bash
+# format-check + lint + typecheck (no tests)
+make check
+
+# Full development cycle: lint-fix + format + test
+make dev
+```
+
+---
+
+## Adding a New Converter
+
+A converter is any object that implements the following duck-typing interface:
+
+### Required
+
+```python
+def get_lines(self, line_length: int) -> list[list[dict]]:
+ ...
+```
+
+Return a list of caption cue groups. Each group is a list of word dicts
+containing at minimum:
+
+| Key | Type | Description |
+| ----------------- | ------- | --------------------------------------------------- |
+| `word` | `str` | Word text (used as fallback display text) |
+| `punctuated_word` | `str` | Punctuated form of the word (preferred for display) |
+| `start` | `float` | Start time in seconds |
+| `end` | `float` | End time in seconds |
+| `speaker` | `int` | (Optional) Speaker index for diarisation |
+
+If `punctuated_word` is absent, `word` is used instead. If `speaker` is
+present on any word in the first cue group, speaker labels are automatically
+emitted by the formatters.
+
+### Optional
+
+```python
+def get_headers(self) -> list[str]:
+ ...
+```
+
+Return a list of strings to be joined as a `NOTE` block in WebVTT output
+(placed after the `WEBVTT` header line). If this method is absent, no `NOTE`
+block is generated.
+
+### Placement
+
+Add new converters to `deepgram_captions/converters.py` and export them from
+`deepgram_captions/__init__.py`. Add tests in `test/` using a representative
+fixture JSON response from the provider.
+
+### Example Skeleton
+
+```python
+from __future__ import annotations
+
+from typing import Any
+
+from .helpers import chunk_array
+
+
+class MyProviderConverter:
+ """Convert a MyProvider speech-to-text response into caption lines.
+
+ Args:
+ response: The full MyProvider API response dict.
+ """
+
+ def __init__(self, response: dict[str, Any]) -> None:
+ self.response = response
+
+ def get_lines(self, line_length: int = 8) -> list[list[dict[str, Any]]]:
+ """Return caption lines as groups of normalised word dicts."""
+ words = [
+ {
+ "word": w["token"],
+ "punctuated_word": w.get("display", w["token"]),
+ "start": w["start_time"],
+ "end": w["end_time"],
+ }
+ for w in self.response.get("words", [])
+ ]
+ return chunk_array(words, line_length)
+```
+
+---
+
+## Commit Message Convention
+
+We follow the [Conventional Commits](https://www.conventionalcommits.org/)
+specification. All commit messages must use one of the following types:
+
+| Type | When to use |
+| ---------- | ------------------------------------------------------------------ |
+| `feat` | A new feature or converter |
+| `fix` | A bug fix |
+| `docs` | Documentation changes only |
+| `style` | Code style / formatting changes (no logic change) |
+| `refactor` | Code restructuring without feature changes or bug fixes |
+| `perf` | Performance improvements |
+| `test` | Adding or improving tests |
+| `chore` | Maintenance tasks, dependency updates, tooling changes |
+| `ci` | CI/CD configuration changes |
+
+**Format:**
+
+```
+():
+
+
+
+
+```
+
+**Examples:**
+
+```
+feat(converters): add RevAI converter
+fix(srt): correct millisecond precision for timestamps > 1 hour
+docs(readme): add streaming transcription example
+test(assemblyai): add fixture for utterances response
+chore(deps): upgrade ruff to 0.11
+```
+
+---
+
+## Pull Request Process
+
+1. **Fork** the repository and create a new branch from `main`.
+
+ ```bash
+ git checkout -b feat/my-new-converter
+ ```
+
+2. **Make your changes** following the code style and commit conventions above.
+
+3. **Run the full check suite** before opening a PR:
+
+ ```bash
+ make dev # lint-fix + format + test
+ make check # format-check + lint + typecheck
+ ```
+
+4. **Push** your branch to your fork:
+
+ ```bash
+ git push origin feat/my-new-converter
+ ```
+
+5. **Open a Pull Request** from your branch to `main` in the upstream
+ repository. Include:
+ - A clear description of what the PR does and why.
+ - A reference to the related Issue (if applicable): `Closes #123`.
+ - Any notes on testing approach or edge cases.
+
+6. A maintainer will review your PR. You may be asked to make changes before
+ it is merged.
+
+---
+
+## Acceptance Criteria
For a contribution to be accepted:
-* The test suite must be complete and pass
-* Code must follow existing styling conventions
-* Commit messages must be descriptive. Related issues should be mentioned by number.
+- The test suite must pass: `make test`.
+- Code must pass all quality checks: `make check`.
+- Commit messages must follow the Conventional Commits format.
+- New public APIs must include type annotations and docstrings.
+- Related Issues should be mentioned in the PR description.
+
+---
-If the contribution doesn't meet these criteria, a maintainer will discuss it with you on the Issue. You can still
-continue to add more commits to the branch you have sent the Pull Request from.
+## Getting Help
-## How To
+If you have questions about contributing, feel free to:
-1. Fork this repository on GitHub.
-1. Clone/fetch your fork to your local development machine.
-1. Create a new branch (e.g. `issue-12`, `feat.add_foo`, etc) and check it out.
-1. Make your changes and commit them. (Did the tests pass? No linting errors?)
-1. Push your new branch to your fork. (e.g. `git push myname issue-12`)
-1. Open a Pull Request from your new branch to the original fork's `main` branch.
+- [Open a GitHub Issue](https://github.com/deepgram/deepgram-python-captions/issues/new)
+- [Join the Deepgram Discord Community](https://discord.gg/xWRaCDBtW4)
+- [Join GitHub Discussions](https://github.com/orgs/deepgram/discussions)
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..9d09273
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,26 @@
+.PHONY: install test lint format typecheck check clean
+
+install:
+ pip install -e ".[dev]"
+
+test:
+ pytest test/ -v
+
+lint:
+ ruff check deepgram_captions/ test/
+
+lint-fix:
+ ruff check --fix deepgram_captions/ test/
+
+format:
+ ruff format deepgram_captions/ test/
+
+format-check:
+ ruff format --check deepgram_captions/ test/
+
+typecheck:
+ mypy deepgram_captions/
+
+check: format-check lint typecheck
+
+dev: lint-fix format test
diff --git a/README.md b/README.md
index 602a50b..e945ec8 100644
--- a/README.md
+++ b/README.md
@@ -1,10 +1,25 @@
-# Deepgram Python Captions
+# deepgram-captions
-[](https://discord.gg/xWRaCDBtW4) [](https://badge.fury.io/py/deepgram-captions)
+[](https://discord.gg/xWRaCDBtW4)
+[](https://badge.fury.io/py/deepgram-captions)
+[](https://www.python.org/downloads/)
+[](LICENSE)
-This package is the Python implementation of Deepgram's WebVTT and SRT formatting. Given a transcription, this package can return a valid string to store as WebVTT or SRT caption files.
+Official Python library for generating **WebVTT** and **SRT** captions from
+[Deepgram](https://deepgram.com) and other speech-to-text API responses.
-The package is not dependent on Deepgram, but it is expected that you will provide a JSON response from a transcription request from either Deepgram or one of the other supported speech-to-text APIs.
+Given a transcription response, this package returns valid WebVTT or SRT caption
+strings ready to embed in video players, upload to streaming platforms, or store
+as caption files. It handles word-level timestamps, speaker diarisation, and
+configurable line lengths out of the box.
+
+The library ships converters for **Deepgram**, **AssemblyAI**, and
+**Whisper Timestamped**, and exposes a simple duck-typing interface so you can
+add support for any other provider.
+
+Full documentation is available at [developers.deepgram.com](https://developers.deepgram.com/docs).
+
+---
## Installation
@@ -12,96 +27,179 @@ The package is not dependent on Deepgram, but it is expected that you will provi
pip install deepgram-captions
```
-## How it works
+Python 3.10 or higher is required. The package has no runtime dependencies.
-The converter takes in a JSON object response (see examples in the `./test` folder.) Depending on which API you use, the converter will turn that into a shape that can be handled by the `webvtt` and `srt` scripts.
+---
-You provide the JSON object; then select the converter needed such as `DeepgramConverter`, `WhisperTimestampedConverter`, `AssemblyAIConverter` and so on. (If the API you want to use is not supported, please reach out to `devrel@deepgram.com` and we will do our best to add it.)
-
-## WebVTT from Deepgram Transcriptions
+## Quick Start
```python
-from deepgram_captions import DeepgramConverter, webvtt
+import json
+from deepgram_captions import DeepgramConverter, webvtt, srt
-transcription = DeepgramConverter(dg_response)
-captions = webvtt(transcription)
-```
+# Load a Deepgram pre-recorded transcription response
+with open("response.json") as f:
+ dg_response = json.load(f)
-## SRT from Deepgram Transcriptions
+converter = DeepgramConverter(dg_response)
-```py
-from deepgram_captions import DeepgramConverter, srt
+# Generate WebVTT
+vtt = webvtt(converter)
+with open("captions.vtt", "w") as f:
+ f.write(vtt)
-transcription = DeepgramConverter(dg_response)
-captions = srt(transcription)
+# Generate SRT
+subtitles = srt(converter)
+with open("captions.srt", "w") as f:
+ f.write(subtitles)
```
-### Line length
+---
-Add an optional integer parameter to set the line length of the caption.
+## Deepgram
-```py
-line_length = 10
+### Pre-recorded Transcription
-deepgram = DeepgramConverter(dg_speakers)
-captions = webvtt(deepgram, line_length)
-```
+Send an audio file to Deepgram's pre-recorded API, then pass the response
+directly to `DeepgramConverter`. The Deepgram Python SDK returns response
+objects with a `.to_json()` method — `DeepgramConverter` accepts both plain
+`dict` responses and SDK response objects.
-## Other Converters
+```python
+import httpx
+import json
+from deepgram_captions import DeepgramConverter, webvtt, srt
-### Whisper
+# Using httpx / requests directly
+url = "https://api.deepgram.com/v1/listen?model=nova-3&smart_format=true&utterances=true"
+headers = {"Authorization": "Token YOUR_DEEPGRAM_API_KEY"}
-Open AI's Whisper (through their API) does not provide timestamps, so a JSON response directly from OpenAI cannot be used with this package. However, there are a couple other options you can try:
+with open("audio.wav", "rb") as f:
+ response = httpx.post(url, headers=headers, content=f.read(),
+ headers={**headers, "Content-Type": "audio/wav"})
-#### Deepgram's Whisper Cloud
+dg_response = response.json()
-Use Deepgram's fully hosted Whisper Cloud, which gives you Whisper transcriptions along with the features that come with Deepgram's API such as timestamps. Use `model=whisper` when you make your request to Deepgram. Then use the `DeepgramConverter` to create the captions.
+converter = DeepgramConverter(dg_response)
+print(webvtt(converter))
+print(srt(converter))
+```
-```py
-from deepgram_captions import DeepgramConverter, srt
+Using the [Deepgram Python SDK](https://github.com/deepgram/deepgram-python-sdk):
-transcription = DeepgramConverter(whisper_response)
-captions = srt(transcription)
-```
+```python
+from deepgram import DeepgramClient, PrerecordedOptions
+from deepgram_captions import DeepgramConverter, webvtt, srt
+
+deepgram = DeepgramClient("YOUR_DEEPGRAM_API_KEY")
-#### Whisper Timestamped
+with open("audio.wav", "rb") as f:
+ buffer_data = f.read()
-[Whisper Timestamped](https://github.com/linto-ai/whisper-timestamped) adds word-level timestamps to OpenAI's Whisper speech-to-text transcriptions. Word-level timestamps are required for this package to create captions, which is why we have created the captions converter for Whisper Timestamped (and not OpenAI's Whisper).
+options = PrerecordedOptions(
+ model="nova-3",
+ smart_format=True,
+ utterances=True,
+)
-```py
-from deepgram_captions import WhisperTimestampedConverter, webvtt
+response = deepgram.listen.rest.v("1").transcribe_file(
+ {"buffer": buffer_data}, options
+)
-transcription = WhisperTimestampedConverter(whisper_response)
-captions = webvtt(transcription)
+# DeepgramConverter accepts the SDK response object directly
+converter = DeepgramConverter(response)
+print(webvtt(converter))
```
-### Assembly AI
+> **Tip:** Enable `utterances=True` in your Deepgram request for the best
+> caption results. When utterances are present, `DeepgramConverter` uses them
+> for natural sentence-level caption breaks instead of chunking raw words.
-AssemblyAI is another popular speech-to-text API.
+### Live / Streaming Transcription
-```py
-from deepgram_captions import AssemblyAIConverter, webvtt
+For streaming audio, Deepgram returns incremental `Results` messages. Each
+message contains a `channel.alternatives[0].words` array for that audio chunk.
+To generate captions from a completed stream, accumulate the word objects from
+all `is_final=True` results and build a synthetic response object, then pass it
+to `DeepgramConverter`.
-transcription = AssemblyAIConverter(assembly_response)
-captions = webvtt(transcription)
+```python
+import asyncio
+from deepgram import DeepgramClient, LiveOptions, LiveTranscriptionEvents
+from deepgram_captions import DeepgramConverter, webvtt
+
+all_words = []
+
+def on_message(self, result, **kwargs):
+ sentence = result.channel.alternatives[0]
+ if result.is_final and sentence.words:
+ all_words.extend(sentence.words)
+
+async def main():
+ deepgram = DeepgramClient("YOUR_DEEPGRAM_API_KEY")
+ connection = deepgram.listen.asyncwebsocket.v("1")
+ connection.on(LiveTranscriptionEvents.Transcript, on_message)
+
+ options = LiveOptions(model="nova-3", smart_format=True)
+ await connection.start(options)
+
+ # ... stream your audio here ...
+
+ await connection.finish()
+
+ # Build a synthetic pre-recorded response from accumulated words
+ synthetic_response = {
+ "metadata": {"request_id": "streaming-session"},
+ "results": {
+ "channels": [
+ {
+ "alternatives": [
+ {
+ "transcript": " ".join(w.word for w in all_words),
+ "words": [
+ {
+ "word": w.word,
+ "punctuated_word": w.punctuated_word,
+ "start": w.start,
+ "end": w.end,
+ "confidence": w.confidence,
+ }
+ for w in all_words
+ ],
+ }
+ ]
+ }
+ ]
+ },
+ }
+
+ converter = DeepgramConverter(synthetic_response)
+ print(webvtt(converter))
+
+asyncio.run(main())
```
-## Output
+---
-### Output WebVTT
+## Output Formats
-When transcribing https://dpgr.am/spacewalk.wav, and running it through our library, this is the WebVTT output.
+### WebVTT
-```py
-from deepgram_captions.converters import DeepgramConverter
-from deepgram_captions.webvtt import webvtt
+[Web Video Text Tracks (WebVTT)](https://www.w3.org/TR/webvtt1/) is the standard
+caption format for HTML5 `