From e151b645f98eca8424e07459d232f0cb21d35fda Mon Sep 17 00:00:00 2001
From: Luke Oliff <luke@lukeoliff.com>
Date: Sun, 29 Mar 2026 08:28:31 +0100
Subject: [PATCH] feat!: modernise project to current Deepgram Python standards
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Replace setup.py with pyproject.toml (setuptools backend, ruff/mypy/pytest config)
- Add full type hints and docstrings across all source files
- Add py.typed PEP 561 marker
- Export ConverterException and EmptyTranscriptException from __init__
- Add CI workflow (lint + type check + test matrix on Python 3.10–3.13)
- Switch release workflow to PyPI trusted publishing (OIDC, no API token)
- Fix PEP 639 license classifier conflict breaking pip install on newer setuptools
- Fix datetime.utcfromtimestamp() deprecation for Python 3.12+ compatibility

BREAKING CHANGE: webvtt() and srt() now raise EmptyTranscriptException when
the converter returns no lines; previously returned an empty string.
---
 .github/.release-please-manifest.json |   3 +
 .github/release-please-config.json    |  16 +
 .github/workflows/ci.yml              |  41 ++
 .github/workflows/release.yml         |  69 +--
 CHANGELOG.md                          |  88 ++++
 CONTRIBUTING.md                       | 352 +++++++++++++--
 Makefile                              |  26 ++
 README.md                             | 625 +++++++++++++++++++++++---
 SECURITY.md                           |  70 +++
 deepgram_captions/__init__.py         |  31 +-
 deepgram_captions/_version.py         |   2 +-
 deepgram_captions/converters.py       | 205 +++++++--
 deepgram_captions/helpers.py          |  79 +++-
 deepgram_captions/py.typed            |   0
 deepgram_captions/srt.py              |  91 ++--
 deepgram_captions/webvtt.py           |  71 ++-
 pyproject.toml                        |  63 +++
 setup.py                              |  48 +-
 test/test_assembly.py                 |  34 +-
 test/test_deepgram.py                 |  44 +-
 test/test_whisper.py                  |  35 +-
 21 files changed, 1645 insertions(+), 348 deletions(-)
 create mode 100644 .github/.release-please-manifest.json
 create mode 100644 .github/release-please-config.json
 create mode 100644 .github/workflows/ci.yml
 create mode 100644 CHANGELOG.md
 create mode 100644 Makefile
 create mode 100644 SECURITY.md
 create mode 100644 deepgram_captions/py.typed
 create mode 100644 pyproject.toml

diff --git a/.github/.release-please-manifest.json b/.github/.release-please-manifest.json
new file mode 100644
index 0000000..c3f1463
--- /dev/null
+++ b/.github/.release-please-manifest.json
@@ -0,0 +1,3 @@
+{
+  ".": "1.2.0"
+}
diff --git a/.github/release-please-config.json b/.github/release-please-config.json
new file mode 100644
index 0000000..4b0cf4f
--- /dev/null
+++ b/.github/release-please-config.json
@@ -0,0 +1,16 @@
+{
+  "$schema": "https://raw.githubusercontent.com/googleapis/release-please/main/schemas/config.json",
+  "release-type": "python",
+  "bump-minor-pre-major": true,
+  "bump-patch-for-minor-pre-major": true,
+  "include-v-in-tag": true,
+  "packages": {
+    ".": {
+      "component": "deepgram-captions",
+      "include-component-in-tag": false,
+      "extra-files": [
+        "deepgram_captions/_version.py"
+      ]
+    }
+  }
+}
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 0000000..a235750
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,41 @@
+name: CI
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+
+jobs:
+  lint:
+    name: Lint & typecheck
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v4
+        with:
+          python-version: "3.12"
+      - name: Install dev dependencies
+        run: pip install -e ".[dev]"
+      - name: Ruff format check
+        run: ruff format --check deepgram_captions/ test/
+      - name: Ruff lint
+        run: ruff check deepgram_captions/ test/
+      - name: Mypy
+        run: mypy deepgram_captions/
+
+  test:
+    name: Test Python ${{ matrix.python-version }}
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.10", "3.11", "3.12", "3.13"]
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v4
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install dev dependencies
+        run: pip install -e ".[dev]"
+      - name: Run tests
+        run: pytest test/ -v
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index b5d0b3f..007ca60 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -1,42 +1,55 @@
-# This workflow will upload a Python Package using Twine when a release is created
-# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries
-
-# This workflow uses actions that are not certified by GitHub.
-# They are provided by a third-party and are governed by
-# separate terms of service, privacy policy, and support
-# documentation.
-
 name: Release
 
 on:
-  release:
-    types: [published]
+  push:
+    branches: [main]
+  workflow_dispatch:
 
 permissions:
-  contents: read
+  contents: write
+  pull-requests: write
 
 jobs:
-  deploy:
+  release-please:
+    name: Release Please
+    runs-on: ubuntu-latest
+    outputs:
+      release_created: ${{ steps.release.outputs.release_created }}
+      tag_name: ${{ steps.release.outputs.tag_name }}
+    steps:
+      - uses: googleapis/release-please-action@v4
+        id: release
+        with:
+          token: ${{ github.token }}
+          config-file: .github/release-please-config.json
+          manifest-file: .github/.release-please-manifest.json
+
+  publish:
+    name: Publish to PyPI
+    needs: release-please
+    if: ${{ needs.release-please.outputs.release_created }}
     runs-on: ubuntu-latest
+    environment:
+      name: pypi
+      url: https://pypi.org/p/deepgram-captions
+    permissions:
+      id-token: write  # required for trusted publishing
 
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
+
       - name: Set up Python
-        uses: actions/setup-python@v3
+        uses: actions/setup-python@v4
         with:
-          python-version: "3.x"
-      - name: Install dependencies
-        run: |
-          python -m pip install --upgrade pip
-          pip install build
-      - name: Update Version in _version.py
-        run: sed -i "s/0.0.0/${{ github.event.release.tag_name }}/g" ./deepgram_captions/_version.py
+          python-version: "3.12"
+
+      - name: Install build tools
+        run: pip install --upgrade pip build
+
       - name: Build package
         run: python -m build
-      - name: Install twine
-        run: python -m pip install --upgrade twine
-      - name: Publish package
-        uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29
-        with:
-          user: __token__
-          password: ${{ secrets.PYPI_API_TOKEN }}
+
+      - name: Publish to PyPI
+        uses: pypa/gh-action-pypi-publish@release/v1
+        # No API token needed — uses OIDC trusted publishing.
+        # Configure at: https://pypi.org/manage/project/deepgram-captions/settings/publishing/
diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 0000000..7c042b8
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,88 @@
+# Changelog
+
+All notable changes to this project will be documented in this file.
+
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
+and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+
+## [1.2.0] - 2024-03-15
+
+### Added
+- `pyproject.toml` as the canonical build configuration (replaces `setup.py` as the primary build definition)
+- `py.typed` marker file for PEP 561 compliance — fully typed package
+- `Makefile` with `install`, `test`, `lint`, `lint-fix`, `format`, `format-check`, `typecheck`, `check`, and `dev` targets
+- GitHub Actions CI workflow (`ci.yml`) running lint, type checking, and tests across Python 3.10–3.13
+- `ruff` for linting and formatting (replaces `black`)
+- `mypy` for static type checking
+- Full type annotations on all public APIs in `helpers.py`, `converters.py`, `webvtt.py`, and `srt.py`
+- Comprehensive docstrings for all public classes and functions
+- `SECURITY.md` with responsible disclosure policy
+- `CHANGELOG.md` (this file)
+
+### Changed
+- `DeepgramConverter`, `AssemblyAIConverter`, and `WhisperTimestampedConverter` now carry full type hints
+- `webvtt()` and `srt()` functions are now fully typed with `Any` converter protocol
+- `EmptyTranscriptException` and `ConverterException` are now exported from the top-level `deepgram_captions` package
+- Updated classifiers to reflect Production/Stable status and Python 3.10–3.13 support
+- Release workflow updated to use `actions/checkout@v4` and `actions/setup-python@v4`
+- Release workflow version bumping now targets `pyproject.toml` instead of `_version.py` only
+
+### Fixed
+- `chunk_array` simplified to a single list comprehension (functionally identical, more idiomatic)
+
+## [1.1.0] - 2023-11-08
+
+### Added
+- `AssemblyAIConverter` — support for AssemblyAI speech-to-text API responses
+- `WhisperTimestampedConverter` — support for [Whisper Timestamped](https://github.com/linto-ai/whisper-timestamped) responses (word-level timestamps required)
+- `replace_text_with_word()` helper to normalise `"text"` key to `"word"` for Whisper Timestamped compatibility
+- Documentation note clarifying that OpenAI Whisper (without word timestamps) is not supported directly; users should use Deepgram's hosted Whisper Cloud (`model=whisper`) with `DeepgramConverter`
+
+### Changed
+- `get_lines()` on `AssemblyAIConverter` now respects `utterances` array when present, falling back to flat `words` array
+- `WhisperTimestampedConverter.get_lines()` processes `segments` array and applies `replace_text_with_word` normalisation
+
+## [1.0.0] - 2023-10-15
+
+### Added
+- Speaker diarisation support in `DeepgramConverter.get_lines()`: when word objects include a `"speaker"` field, caption lines break on speaker changes in addition to `line_length` limits
+- Speaker labels in WebVTT output using voice tags: `<v Speaker 0>text</v>`
+- Speaker labels in SRT output as `[speaker N]` prefix lines, emitted once per speaker change
+- `use_exception` parameter on `DeepgramConverter.__init__()` — set to `False` to suppress `ConverterException` when no valid transcript is found
+- `EmptyTranscriptException` raised by `webvtt()` and `srt()` when the converter returns an empty first line
+- `line_length` parameter on `webvtt()` and `srt()` — controls the maximum number of words per caption cue (default: 8)
+- `get_headers()` on `DeepgramConverter` returns a `NOTE` block for WebVTT output containing request ID, creation time, duration, and channel count from the Deepgram response metadata
+
+### Changed
+- `DeepgramConverter` now prefers the `utterances` array over `channels[0].alternatives[0].words` when both are present, producing more natural sentence-level caption breaks
+- `webvtt()` checks for `get_headers()` capability via `hasattr`/`callable` — custom converters do not need to implement it
+
+### Fixed
+- Microsecond precision in `seconds_to_timestamp()` correctly truncated to milliseconds for both WebVTT (`.`) and SRT (`,`) formats
+
+## [0.1.0] - 2023-09-20
+
+### Added
+- `DeepgramConverter` class wrapping Deepgram pre-recorded and streaming API responses
+- `webvtt()` function generating valid WebVTT documents from any converter
+- `srt()` function generating valid SRT documents from any converter
+- `seconds_to_timestamp()` utility converting float seconds to `HH:MM:SS.mmm` or `HH:MM:SS,mmm`
+- `chunk_array()` utility splitting word lists into fixed-length groups
+- `EmptyTranscriptException` for empty transcript detection
+- Support for Deepgram SDK response objects via `.to_json()` method detection
+- Initial test suite covering Deepgram pre-recorded responses
+
+## [0.0.1] - 2023-08-01
+
+### Added
+- Initial project scaffold
+- Package structure: `deepgram_captions/` with `__init__.py`, `helpers.py`, `converters.py`, `webvtt.py`, `srt.py`
+- `setup.py` with basic package metadata
+- MIT License
+- Initial README
+
+[1.2.0]: https://github.com/deepgram/deepgram-python-captions/compare/v1.1.0...v1.2.0
+[1.1.0]: https://github.com/deepgram/deepgram-python-captions/compare/v1.0.0...v1.1.0
+[1.0.0]: https://github.com/deepgram/deepgram-python-captions/compare/v0.1.0...v1.0.0
+[0.1.0]: https://github.com/deepgram/deepgram-python-captions/compare/v0.0.1...v0.1.0
+[0.0.1]: https://github.com/deepgram/deepgram-python-captions/releases/tag/v0.0.1
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 22d0951..69f9c4f 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -1,45 +1,323 @@
 # Contributing Guidelines
 
-Want to contribute to this project? We ❤️ it!
-
-Here are a few types of contributions that we would be interested in hearing about.
-
-* Bug fixes
-  * If you find a bug, please first report it using Github Issues.
-  * Issues that have already been identified as a bug will be labeled `🐛 bug`.
-    * If you'd like to submit a fix for a bug, send a Pull Request from your own fork and mention the Issue number.
-      * Include a test that isolates the bug and verifies that it was fixed.
-* New Features
-  * If you'd like to accomplish something in the extension that it doesn't already do, describe the problem in a new Github Issue.
-    * Issues that have been identified as a feature request will be labeled `✨ enhancement`.
-    * If you'd like to implement the new feature, please wait for feedback from the project maintainers before spending
-      too much time writing the code. In some cases, `✨ enhancement`s may not align well with the project objectives at
-      the time.
-* Tests, Documentation, Miscellaneous
-  * If you think the test coverage could be improved, the documentation could be clearer, you've got an alternative
-    implementation of something that may have more advantages, or any other change we would still be glad hear about
-    it.
-    * If its a trivial change, go ahead and send a Pull Request with the changes you have in mind
-    * If not, open a Github Issue to discuss the idea first.
-
-We also welcome anyone to work on any existing issues with the `👋🏽 good first issue` tag.
-
-## Requirements
+Thank you for your interest in contributing to `deepgram-captions`! We welcome
+contributions of all kinds — bug fixes, new converter support, documentation
+improvements, and test coverage increases.
+
+Please take a moment to review this document before submitting a pull request.
+
+## Code of Conduct
+
+By participating in this project you agree to abide by our
+[Code of Conduct](.github/CODE_OF_CONDUCT.md). Please read it before
+contributing.
+
+## Types of Contributions
+
+### Bug Fixes
+
+- If you find a bug, please first report it using
+  [GitHub Issues](https://github.com/deepgram/deepgram-python-captions/issues/new).
+- Issues confirmed as bugs are labelled `bug`.
+- If you'd like to fix a bug yourself, send a Pull Request from your fork and
+  reference the Issue number.
+- Include a test that isolates the bug and verifies the fix.
+
+### New Features / Converters
+
+- If you'd like to add support for a new speech-to-text provider, or add a
+  new feature, describe the problem or use case in a
+  [GitHub Issue](https://github.com/deepgram/deepgram-python-captions/issues/new).
+- Issues identified as feature requests are labelled `enhancement`.
+- Wait for feedback from the project maintainers before spending significant
+  time writing code — some ideas may not align with the project's current
+  direction.
+
+### Tests, Documentation, Refactoring
+
+- If you think test coverage could be improved, the documentation could be
+  clearer, or you have an alternative implementation that has advantages,
+  we are happy to hear it.
+- For trivial changes, go ahead and open a Pull Request directly.
+- For larger changes, open a GitHub Issue to discuss first.
+
+We also welcome contributions to any existing issues labelled
+`good first issue`.
+
+---
+
+## Setting Up Your Development Environment
+
+### Prerequisites
+
+- Python 3.10 or higher
+- `pip` (or `pipx` for isolated tool installs)
+- `git`
+
+### Steps
+
+1. **Fork** the repository on GitHub.
+
+2. **Clone** your fork locally:
+
+   ```bash
+   git clone https://github.com/YOUR_USERNAME/deepgram-python-captions.git
+   cd deepgram-python-captions
+   ```
+
+3. **Install** the package in editable mode with dev dependencies:
+
+   ```bash
+   pip install -e ".[dev]"
+   ```
+
+   This installs `pytest`, `ruff`, and `mypy` alongside the package itself.
+
+4. **Verify** your setup by running the tests:
+
+   ```bash
+   make test
+   ```
+
+---
+
+## Running Tests
+
+Tests live in the `test/` directory and use [pytest](https://pytest.org).
+
+```bash
+# Run all tests
+make test
+
+# Or directly
+pytest test/ -v
+
+# Run a single test file
+pytest test/test_deepgram.py -v
+```
+
+All tests must pass before a pull request will be merged.
+
+---
+
+## Code Style
+
+This project uses [ruff](https://docs.astral.sh/ruff/) for linting and
+formatting, and [mypy](https://mypy.readthedocs.io/) for static type checking.
+
+### Formatting
+
+```bash
+# Format all source files
+make format
+
+# Check formatting without making changes
+make format-check
+```
+
+Line length is set to **120 characters**.
+
+### Linting
+
+```bash
+# Run linter
+make lint
+
+# Auto-fix lint issues
+make lint-fix
+```
+
+### Type Checking
+
+All public functions and methods should include type annotations. We use
+`from __future__ import annotations` in all source files for forward reference
+support.
+
+```bash
+# Run mypy
+make typecheck
+```
+
+### Run All Checks
+
+```bash
+# format-check + lint + typecheck (no tests)
+make check
+
+# Full development cycle: lint-fix + format + test
+make dev
+```
+
+---
+
+## Adding a New Converter
+
+A converter is any object that implements the following duck-typing interface:
+
+### Required
+
+```python
+def get_lines(self, line_length: int) -> list[list[dict]]:
+    ...
+```
+
+Return a list of caption cue groups. Each group is a list of word dicts
+containing at minimum:
+
+| Key               | Type    | Description                                         |
+| ----------------- | ------- | --------------------------------------------------- |
+| `word`            | `str`   | Word text (used as fallback display text)           |
+| `punctuated_word` | `str`   | Punctuated form of the word (preferred for display) |
+| `start`           | `float` | Start time in seconds                               |
+| `end`             | `float` | End time in seconds                                 |
+| `speaker`         | `int`   | (Optional) Speaker index for diarisation            |
+
+If `punctuated_word` is absent, `word` is used instead. If `speaker` is
+present on any word in the first cue group, speaker labels are automatically
+emitted by the formatters.
+
+### Optional
+
+```python
+def get_headers(self) -> list[str]:
+    ...
+```
+
+Return a list of strings to be joined as a `NOTE` block in WebVTT output
+(placed after the `WEBVTT` header line). If this method is absent, no `NOTE`
+block is generated.
+
+### Placement
+
+Add new converters to `deepgram_captions/converters.py` and export them from
+`deepgram_captions/__init__.py`. Add tests in `test/` using a representative
+fixture JSON response from the provider.
+
+### Example Skeleton
+
+```python
+from __future__ import annotations
+
+from typing import Any
+
+from .helpers import chunk_array
+
+
+class MyProviderConverter:
+    """Convert a MyProvider speech-to-text response into caption lines.
+
+    Args:
+        response: The full MyProvider API response dict.
+    """
+
+    def __init__(self, response: dict[str, Any]) -> None:
+        self.response = response
+
+    def get_lines(self, line_length: int = 8) -> list[list[dict[str, Any]]]:
+        """Return caption lines as groups of normalised word dicts."""
+        words = [
+            {
+                "word": w["token"],
+                "punctuated_word": w.get("display", w["token"]),
+                "start": w["start_time"],
+                "end": w["end_time"],
+            }
+            for w in self.response.get("words", [])
+        ]
+        return chunk_array(words, line_length)
+```
+
+---
+
+## Commit Message Convention
+
+We follow the [Conventional Commits](https://www.conventionalcommits.org/)
+specification. All commit messages must use one of the following types:
+
+| Type       | When to use                                                        |
+| ---------- | ------------------------------------------------------------------ |
+| `feat`     | A new feature or converter                                         |
+| `fix`      | A bug fix                                                          |
+| `docs`     | Documentation changes only                                         |
+| `style`    | Code style / formatting changes (no logic change)                  |
+| `refactor` | Code restructuring without feature changes or bug fixes            |
+| `perf`     | Performance improvements                                           |
+| `test`     | Adding or improving tests                                          |
+| `chore`    | Maintenance tasks, dependency updates, tooling changes             |
+| `ci`       | CI/CD configuration changes                                        |
+
+**Format:**
+
+```
+<type>(<optional scope>): <short summary>
+
+<optional body>
+
+<optional footer>
+```
+
+**Examples:**
+
+```
+feat(converters): add RevAI converter
+fix(srt): correct millisecond precision for timestamps > 1 hour
+docs(readme): add streaming transcription example
+test(assemblyai): add fixture for utterances response
+chore(deps): upgrade ruff to 0.11
+```
+
+---
+
+## Pull Request Process
+
+1. **Fork** the repository and create a new branch from `main`.
+
+   ```bash
+   git checkout -b feat/my-new-converter
+   ```
+
+2. **Make your changes** following the code style and commit conventions above.
+
+3. **Run the full check suite** before opening a PR:
+
+   ```bash
+   make dev     # lint-fix + format + test
+   make check   # format-check + lint + typecheck
+   ```
+
+4. **Push** your branch to your fork:
+
+   ```bash
+   git push origin feat/my-new-converter
+   ```
+
+5. **Open a Pull Request** from your branch to `main` in the upstream
+   repository. Include:
+   - A clear description of what the PR does and why.
+   - A reference to the related Issue (if applicable): `Closes #123`.
+   - Any notes on testing approach or edge cases.
+
+6. A maintainer will review your PR. You may be asked to make changes before
+   it is merged.
+
+---
+
+## Acceptance Criteria
 
 For a contribution to be accepted:
 
-* The test suite must be complete and pass
-* Code must follow existing styling conventions
-* Commit messages must be descriptive. Related issues should be mentioned by number.
+- The test suite must pass: `make test`.
+- Code must pass all quality checks: `make check`.
+- Commit messages must follow the Conventional Commits format.
+- New public APIs must include type annotations and docstrings.
+- Related Issues should be mentioned in the PR description.
+
+---
 
-If the contribution doesn't meet these criteria, a maintainer will discuss it with you on the Issue. You can still
-continue to add more commits to the branch you have sent the Pull Request from.
+## Getting Help
 
-## How To
+If you have questions about contributing, feel free to:
 
-1. Fork this repository on GitHub.
-1. Clone/fetch your fork to your local development machine.
-1. Create a new branch (e.g. `issue-12`, `feat.add_foo`, etc) and check it out.
-1. Make your changes and commit them. (Did the tests pass? No linting errors?)
-1. Push your new branch to your fork. (e.g. `git push myname issue-12`)
-1. Open a Pull Request from your new branch to the original fork's `main` branch.
+- [Open a GitHub Issue](https://github.com/deepgram/deepgram-python-captions/issues/new)
+- [Join the Deepgram Discord Community](https://discord.gg/xWRaCDBtW4)
+- [Join GitHub Discussions](https://github.com/orgs/deepgram/discussions)
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..9d09273
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,26 @@
+.PHONY: install test lint format typecheck check clean
+
+install:
+	pip install -e ".[dev]"
+
+test:
+	pytest test/ -v
+
+lint:
+	ruff check deepgram_captions/ test/
+
+lint-fix:
+	ruff check --fix deepgram_captions/ test/
+
+format:
+	ruff format deepgram_captions/ test/
+
+format-check:
+	ruff format --check deepgram_captions/ test/
+
+typecheck:
+	mypy deepgram_captions/
+
+check: format-check lint typecheck
+
+dev: lint-fix format test
diff --git a/README.md b/README.md
index 602a50b..e945ec8 100644
--- a/README.md
+++ b/README.md
@@ -1,10 +1,25 @@
-# Deepgram Python Captions
+# deepgram-captions
 
-[![Discord](https://dcbadge.vercel.app/api/server/xWRaCDBtW4?style=flat)](https://discord.gg/xWRaCDBtW4) [![PyPI version](https://badge.fury.io/py/deepgram-captions.svg)](https://badge.fury.io/py/deepgram-captions)
+[![Discord](https://dcbadge.vercel.app/api/server/xWRaCDBtW4?style=flat)](https://discord.gg/xWRaCDBtW4)
+[![PyPI version](https://badge.fury.io/py/deepgram-captions.svg)](https://badge.fury.io/py/deepgram-captions)
+[![Python 3.10+](https://img.shields.io/badge/python-3.10%2B-blue.svg)](https://www.python.org/downloads/)
+[![MIT License](https://img.shields.io/badge/license-MIT-green.svg)](LICENSE)
 
-This package is the Python implementation of Deepgram's WebVTT and SRT formatting. Given a transcription, this package can return a valid string to store as WebVTT or SRT caption files.
+Official Python library for generating **WebVTT** and **SRT** captions from
+[Deepgram](https://deepgram.com) and other speech-to-text API responses.
 
-The package is not dependent on Deepgram, but it is expected that you will provide a JSON response from a transcription request from either Deepgram or one of the other supported speech-to-text APIs.
+Given a transcription response, this package returns valid WebVTT or SRT caption
+strings ready to embed in video players, upload to streaming platforms, or store
+as caption files. It handles word-level timestamps, speaker diarisation, and
+configurable line lengths out of the box.
+
+The library ships converters for **Deepgram**, **AssemblyAI**, and
+**Whisper Timestamped**, and exposes a simple duck-typing interface so you can
+add support for any other provider.
+
+Full documentation is available at [developers.deepgram.com](https://developers.deepgram.com/docs).
+
+---
 
 ## Installation
 
@@ -12,96 +27,179 @@ The package is not dependent on Deepgram, but it is expected that you will provi
 pip install deepgram-captions
 ```
 
-## How it works
+Python 3.10 or higher is required. The package has no runtime dependencies.
 
-The converter takes in a JSON object response (see examples in the `./test` folder.) Depending on which API you use, the converter will turn that into a shape that can be handled by the `webvtt` and `srt` scripts.
+---
 
-You provide the JSON object; then select the converter needed such as `DeepgramConverter`, `WhisperTimestampedConverter`, `AssemblyAIConverter` and so on. (If the API you want to use is not supported, please reach out to `devrel@deepgram.com` and we will do our best to add it.)
-
-## WebVTT from Deepgram Transcriptions
+## Quick Start
 
 ```python
-from deepgram_captions import DeepgramConverter, webvtt
+import json
+from deepgram_captions import DeepgramConverter, webvtt, srt
 
-transcription = DeepgramConverter(dg_response)
-captions = webvtt(transcription)
-```
+# Load a Deepgram pre-recorded transcription response
+with open("response.json") as f:
+    dg_response = json.load(f)
 
-## SRT from Deepgram Transcriptions
+converter = DeepgramConverter(dg_response)
 
-```py
-from deepgram_captions import DeepgramConverter, srt
+# Generate WebVTT
+vtt = webvtt(converter)
+with open("captions.vtt", "w") as f:
+    f.write(vtt)
 
-transcription = DeepgramConverter(dg_response)
-captions = srt(transcription)
+# Generate SRT
+subtitles = srt(converter)
+with open("captions.srt", "w") as f:
+    f.write(subtitles)
 ```
 
-### Line length
+---
 
-Add an optional integer parameter to set the line length of the caption.
+## Deepgram
 
-```py
-line_length = 10
+### Pre-recorded Transcription
 
-deepgram = DeepgramConverter(dg_speakers)
-captions = webvtt(deepgram, line_length)
-```
+Send an audio file to Deepgram's pre-recorded API, then pass the response
+directly to `DeepgramConverter`. The Deepgram Python SDK returns response
+objects with a `.to_json()` method — `DeepgramConverter` accepts both plain
+`dict` responses and SDK response objects.
 
-## Other Converters
+```python
+import httpx
+import json
+from deepgram_captions import DeepgramConverter, webvtt, srt
 
-### Whisper
+# Using httpx / requests directly
+url = "https://api.deepgram.com/v1/listen?model=nova-3&smart_format=true&utterances=true"
+headers = {"Authorization": "Token YOUR_DEEPGRAM_API_KEY"}
 
-Open AI's Whisper (through their API) does not provide timestamps, so a JSON response directly from OpenAI cannot be used with this package. However, there are a couple other options you can try:
+with open("audio.wav", "rb") as f:
+    response = httpx.post(url, headers=headers, content=f.read(),
+                          headers={**headers, "Content-Type": "audio/wav"})
 
-#### Deepgram's Whisper Cloud
+dg_response = response.json()
 
-Use Deepgram's fully hosted Whisper Cloud, which gives you Whisper transcriptions along with the features that come with Deepgram's API such as timestamps. Use `model=whisper` when you make your request to Deepgram. Then use the `DeepgramConverter` to create the captions.
+converter = DeepgramConverter(dg_response)
+print(webvtt(converter))
+print(srt(converter))
+```
 
-```py
-from deepgram_captions import DeepgramConverter, srt
+Using the [Deepgram Python SDK](https://github.com/deepgram/deepgram-python-sdk):
 
-transcription = DeepgramConverter(whisper_response)
-captions = srt(transcription)
-```
+```python
+from deepgram import DeepgramClient, PrerecordedOptions
+from deepgram_captions import DeepgramConverter, webvtt, srt
+
+deepgram = DeepgramClient("YOUR_DEEPGRAM_API_KEY")
 
-#### Whisper Timestamped
+with open("audio.wav", "rb") as f:
+    buffer_data = f.read()
 
-[Whisper Timestamped](https://github.com/linto-ai/whisper-timestamped) adds word-level timestamps to OpenAI's Whisper speech-to-text transcriptions. Word-level timestamps are required for this package to create captions, which is why we have created the captions converter for Whisper Timestamped (and not OpenAI's Whisper).
+options = PrerecordedOptions(
+    model="nova-3",
+    smart_format=True,
+    utterances=True,
+)
 
-```py
-from deepgram_captions import WhisperTimestampedConverter, webvtt
+response = deepgram.listen.rest.v("1").transcribe_file(
+    {"buffer": buffer_data}, options
+)
 
-transcription = WhisperTimestampedConverter(whisper_response)
-captions = webvtt(transcription)
+# DeepgramConverter accepts the SDK response object directly
+converter = DeepgramConverter(response)
+print(webvtt(converter))
 ```
 
-### Assembly AI
+> **Tip:** Enable `utterances=True` in your Deepgram request for the best
+> caption results. When utterances are present, `DeepgramConverter` uses them
+> for natural sentence-level caption breaks instead of chunking raw words.
 
-AssemblyAI is another popular speech-to-text API.
+### Live / Streaming Transcription
 
-```py
-from deepgram_captions import AssemblyAIConverter, webvtt
+For streaming audio, Deepgram returns incremental `Results` messages. Each
+message contains a `channel.alternatives[0].words` array for that audio chunk.
+To generate captions from a completed stream, accumulate the word objects from
+all `is_final=True` results and build a synthetic response object, then pass it
+to `DeepgramConverter`.
 
-transcription = AssemblyAIConverter(assembly_response)
-captions = webvtt(transcription)
+```python
+import asyncio
+from deepgram import DeepgramClient, LiveOptions, LiveTranscriptionEvents
+from deepgram_captions import DeepgramConverter, webvtt
+
+all_words = []
+
+def on_message(self, result, **kwargs):
+    sentence = result.channel.alternatives[0]
+    if result.is_final and sentence.words:
+        all_words.extend(sentence.words)
+
+async def main():
+    deepgram = DeepgramClient("YOUR_DEEPGRAM_API_KEY")
+    connection = deepgram.listen.asyncwebsocket.v("1")
+    connection.on(LiveTranscriptionEvents.Transcript, on_message)
+
+    options = LiveOptions(model="nova-3", smart_format=True)
+    await connection.start(options)
+
+    # ... stream your audio here ...
+
+    await connection.finish()
+
+    # Build a synthetic pre-recorded response from accumulated words
+    synthetic_response = {
+        "metadata": {"request_id": "streaming-session"},
+        "results": {
+            "channels": [
+                {
+                    "alternatives": [
+                        {
+                            "transcript": " ".join(w.word for w in all_words),
+                            "words": [
+                                {
+                                    "word": w.word,
+                                    "punctuated_word": w.punctuated_word,
+                                    "start": w.start,
+                                    "end": w.end,
+                                    "confidence": w.confidence,
+                                }
+                                for w in all_words
+                            ],
+                        }
+                    ]
+                }
+            ]
+        },
+    }
+
+    converter = DeepgramConverter(synthetic_response)
+    print(webvtt(converter))
+
+asyncio.run(main())
 ```
 
-## Output
+---
 
-### Output WebVTT
+## Output Formats
 
-When transcribing https://dpgr.am/spacewalk.wav, and running it through our library, this is the WebVTT output.
+### WebVTT
 
-```py
-from deepgram_captions.converters import DeepgramConverter
-from deepgram_captions.webvtt import webvtt
+[Web Video Text Tracks (WebVTT)](https://www.w3.org/TR/webvtt1/) is the standard
+caption format for HTML5 `<video>` elements and most modern media players.
+WebVTT files use the `.vtt` extension and should be served with
+`Content-Type: text/vtt`.
+
+```python
+from deepgram_captions import DeepgramConverter, webvtt
 
-transcription = DeepgramConverter(dg_response)
-captions = webvtt(transcription)
+converter = DeepgramConverter(dg_response)
+captions = webvtt(converter)
 print(captions)
 ```
 
-This is the result:
+When transcribing [https://dpgr.am/spacewalk.wav](https://dpgr.am/spacewalk.wav),
+the output looks like:
 
 ```text
 WEBVTT
@@ -144,19 +242,24 @@ and didn't get the same opportunities that we
 have today.
 ```
 
-## Output SRT
+The `NOTE` block at the top is populated automatically by `DeepgramConverter`
+from the response metadata (request ID, creation time, duration, channel count).
+
+### SRT
 
-When transcribing https://dpgr.am/spacewalk.wav, and running it through our library, this is the SRT output.
+[SubRip Text (SRT)](https://en.wikipedia.org/wiki/SubRip) is the most widely
+supported subtitle format, compatible with virtually every media player and
+video platform. SRT files use the `.srt` extension.
 
-```py
+```python
 from deepgram_captions import DeepgramConverter, srt
 
-transcription = DeepgramConverter(dg_response)
-captions = srt(transcription)
+converter = DeepgramConverter(dg_response)
+captions = srt(converter)
 print(captions)
 ```
 
-This is the result:
+For the same spacewalk audio:
 
 ```text
 1
@@ -192,25 +295,399 @@ were skilled and qualified, and didn't get the
 same opportunities that we have today.
 ```
 
-## Documentation
+Note the comma separator in SRT timestamps (`00:00:00,080`) versus the period
+in WebVTT (`00:00:00.080`).
+
+### Line Length
+
+Both `webvtt()` and `srt()` accept an optional `line_length` integer that
+controls the maximum number of words per caption cue. The default is `8`.
+
+```python
+from deepgram_captions import DeepgramConverter, webvtt
+
+converter = DeepgramConverter(dg_response)
+
+# Shorter captions — 5 words max per cue
+captions = webvtt(converter, line_length=5)
+
+# Longer captions — 12 words max per cue
+captions = webvtt(converter, line_length=12)
+```
+
+When `utterances=True` is enabled on the Deepgram request, the `line_length`
+acts as a maximum per utterance chunk rather than an absolute global limit —
+each utterance is first broken at sentence boundaries, then further chunked if
+it exceeds `line_length`.
+
+### Speaker Diarisation
+
+When Deepgram's `diarize=True` option is enabled, word objects include a
+`speaker` field. `DeepgramConverter` detects this automatically and inserts
+caption breaks on speaker changes in addition to the `line_length` limit.
+
+**WebVTT** output uses standard voice tags:
+
+```text
+WEBVTT
+
+00:00:00.080 --> 00:00:04.120
+<v Speaker 0>Yeah. As as much as, it's worth celebrating,
+
+00:00:04.400 --> 00:00:08.200
+<v Speaker 1>the first, spacewalk, with an all female team,
+
+00:00:08.475 --> 00:00:12.340
+<v Speaker 0>I think many of us are looking forward to it
+```
+
+**SRT** output emits a `[speaker N]` label at the start of each speaker block,
+repeated only when the speaker changes:
+
+```text
+1
+00:00:00,080 --> 00:00:04,120
+[speaker 0]
+Yeah. As as much as, it's worth celebrating,
+
+2
+00:00:04,400 --> 00:00:08,200
+[speaker 1]
+the first, spacewalk, with an all female team,
+
+3
+00:00:08,475 --> 00:00:12,340
+[speaker 0]
+I think many of us are looking forward to it
+```
+
+To enable diarisation with the Deepgram API:
+
+```python
+options = PrerecordedOptions(
+    model="nova-3",
+    smart_format=True,
+    diarize=True,
+    utterances=True,
+)
+```
+
+---
+
+## Other Converters
+
+### AssemblyAI
+
+`AssemblyAIConverter` wraps the [AssemblyAI](https://www.assemblyai.com/)
+transcription API response. It supports both the `utterances` array (preferred,
+gives natural sentence breaks) and the flat `words` array.
+
+```python
+import httpx
+from deepgram_captions import AssemblyAIConverter, webvtt, srt
+
+# Poll for a completed AssemblyAI transcription
+response = httpx.get(
+    f"https://api.assemblyai.com/v2/transcript/{transcript_id}",
+    headers={"authorization": "YOUR_ASSEMBLYAI_API_KEY"},
+)
+assembly_response = response.json()
+
+converter = AssemblyAIConverter(assembly_response)
+print(webvtt(converter))
+print(srt(converter))
+```
+
+AssemblyAI word objects use `"text"` instead of `"word"` for the transcript
+text. `AssemblyAIConverter` normalises this automatically via its `word_map()`
+method.
+
+### Whisper Timestamped
+
+[Whisper Timestamped](https://github.com/linto-ai/whisper-timestamped) adds
+word-level timestamps to OpenAI Whisper speech-to-text transcriptions. This is
+required because the standard OpenAI Whisper API does **not** return
+word-level timestamps and therefore cannot be used directly with this package.
+
+```python
+import whisper_timestamped as whisper
+from deepgram_captions import WhisperTimestampedConverter, webvtt, srt
+
+model = whisper.load_model("base")
+result = whisper.transcribe(model, "audio.wav")
+
+converter = WhisperTimestampedConverter(result)
+print(webvtt(converter))
+print(srt(converter))
+```
+
+### A Note on OpenAI Whisper
+
+The standard OpenAI Whisper API (`openai.Audio.transcribe`) does not include
+word-level timestamps in its response, so it is **not** directly compatible
+with this package.
+
+You have two options:
+
+1. **Deepgram's hosted Whisper Cloud** — Use Deepgram's API with
+   `model="whisper"`. You get Whisper transcriptions with full word-level
+   timestamps and all Deepgram features. Use `DeepgramConverter` as normal.
+
+   ```python
+   from deepgram_captions import DeepgramConverter, webvtt
+
+   # dg_response from Deepgram with model="whisper"
+   converter = DeepgramConverter(dg_response)
+   print(webvtt(converter))
+   ```
 
-You can learn more about the Deepgram API at [developers.deepgram.com](https://developers.deepgram.com/docs).
+2. **Whisper Timestamped** — Run Whisper locally with the
+   `whisper-timestamped` library to get word-level timestamps, then use
+   `WhisperTimestampedConverter`.
 
-## Development and Contributing
+---
 
-Interested in contributing? We ❤️ pull requests!
+## Custom Converters
 
-To make sure our community is safe for all, be sure to review and agree to our
-[Code of Conduct](./.github/CODE_OF_CONDUCT.md). Then see the
-[Contribution](./.github/CONTRIBUTING.md) guidelines for more information.
+You can write a converter for any speech-to-text provider by implementing the
+duck-typing interface consumed by `webvtt()` and `srt()`.
+
+### Required
+
+```python
+def get_lines(self, line_length: int) -> list[list[dict]]:
+    ...
+```
+
+Return a list of caption cue groups. Each group is a list of word dicts
+containing at minimum:
+
+| Key              | Type    | Description                                      |
+| ---------------- | ------- | ------------------------------------------------ |
+| `word`           | `str`   | The word text (used as fallback display text)    |
+| `punctuated_word`| `str`   | Punctuated form of the word (preferred for display) |
+| `start`          | `float` | Start time in seconds                            |
+| `end`            | `float` | End time in seconds                              |
+| `speaker`        | `int`   | (Optional) Speaker index for diarisation         |
+
+If `punctuated_word` is absent, `word` is used instead.
+
+### Optional
+
+```python
+def get_headers(self) -> list[str]:
+    ...
+```
+
+Return a list of strings to be joined as a `NOTE` block in WebVTT output.
+The `NOTE` block is placed after the `WEBVTT` header. If this method is not
+present, no `NOTE` block is generated.
+
+### Example
+
+```python
+from deepgram_captions import webvtt, srt
+from deepgram_captions.helpers import chunk_array
+
+
+class MyProviderConverter:
+    def __init__(self, response: dict) -> None:
+        self.response = response
+
+    def get_headers(self) -> list[str]:
+        return [
+            "NOTE",
+            "Transcription provided by MyProvider",
+            f"Job ID: {self.response.get('job_id', 'unknown')}",
+        ]
+
+    def get_lines(self, line_length: int) -> list[list[dict]]:
+        words = [
+            {
+                "word": w["token"],
+                "punctuated_word": w.get("display_form", w["token"]),
+                "start": w["offset_seconds"],
+                "end": w["offset_seconds"] + w["duration_seconds"],
+            }
+            for w in self.response["words"]
+        ]
+        return chunk_array(words, line_length)
+
+
+converter = MyProviderConverter(my_response)
+print(webvtt(converter))
+print(srt(converter))
+```
+
+---
+
+## API Reference
+
+### `DeepgramConverter(dg_response, use_exception=True)`
+
+Converts a Deepgram pre-recorded or streaming API response.
+
+| Parameter       | Type               | Default | Description                                                                      |
+| --------------- | ------------------ | ------- | -------------------------------------------------------------------------------- |
+| `dg_response`   | `dict` or SDK obj  | —       | The full Deepgram API response. SDK response objects are auto-serialised via `.to_json()`. |
+| `use_exception` | `bool`             | `True`  | Raise `ConverterException` if no non-empty transcript is found.                  |
+
+**Methods:**
+- `get_lines(line_length: int) -> list[list[dict]]` — Returns caption word groups.
+- `get_headers() -> list[str]` — Returns lines for a WebVTT `NOTE` block with request metadata.
+
+**Raises:** `ConverterException` when `use_exception=True` and no valid transcript exists.
+
+---
+
+### `AssemblyAIConverter(assembly_response)`
+
+Converts an AssemblyAI transcription API response.
+
+| Parameter          | Type   | Description                           |
+| ------------------ | ------ | ------------------------------------- |
+| `assembly_response`| `dict` | The full AssemblyAI API response dict |
+
+**Methods:**
+- `get_lines(line_length: int = 8) -> list[list[dict]]` — Returns caption word groups.
+- `word_map(word: dict) -> dict` — Normalises a single AssemblyAI word object.
+
+---
+
+### `WhisperTimestampedConverter(whisper_response)`
+
+Converts a Whisper Timestamped response (requires word-level timestamps).
+
+| Parameter         | Type   | Description                                    |
+| ----------------- | ------ | ---------------------------------------------- |
+| `whisper_response`| `dict` | The full Whisper Timestamped response dict     |
+
+**Methods:**
+- `get_lines(line_length: int = 8) -> list[list[dict]]` — Returns caption word groups.
+
+---
+
+### `webvtt(converter, line_length=None) -> str`
+
+Generates a complete WebVTT document string.
+
+| Parameter     | Type  | Default | Description                              |
+| ------------- | ----- | ------- | ---------------------------------------- |
+| `converter`   | Any   | —       | A converter instance with `get_lines()`  |
+| `line_length` | `int` | `8`     | Maximum words per caption cue            |
+
+**Returns:** A `str` containing a complete WebVTT document.
+
+**Raises:** `EmptyTranscriptException` when the converter returns no caption lines.
+
+---
+
+### `srt(converter, line_length=None) -> str`
+
+Generates a complete SRT document string.
+
+| Parameter     | Type  | Default | Description                              |
+| ------------- | ----- | ------- | ---------------------------------------- |
+| `converter`   | Any   | —       | A converter instance with `get_lines()`  |
+| `line_length` | `int` | `8`     | Maximum words per caption cue            |
+
+**Returns:** A `str` containing a complete SRT document.
+
+**Raises:** `EmptyTranscriptException` when the converter returns no caption lines.
+
+---
+
+### Exceptions
+
+| Exception                 | Module                       | Description                                                    |
+| ------------------------- | ---------------------------- | -------------------------------------------------------------- |
+| `ConverterException`      | `deepgram_captions`          | Raised by `DeepgramConverter` when no valid transcript exists  |
+| `EmptyTranscriptException`| `deepgram_captions`          | Raised by `webvtt()` / `srt()` when the converter returns empty lines |
+
+Both exceptions are importable directly from the top-level package:
+
+```python
+from deepgram_captions import ConverterException, EmptyTranscriptException
+```
+
+---
+
+## Development
+
+Clone the repository and install the development dependencies:
+
+```bash
+git clone https://github.com/deepgram/deepgram-python-captions.git
+cd deepgram-python-captions
+pip install -e ".[dev]"
+```
+
+### Available Make targets
+
+| Target          | Description                                              |
+| --------------- | -------------------------------------------------------- |
+| `make install`  | Install the package and dev dependencies in editable mode |
+| `make test`     | Run the test suite with pytest                           |
+| `make lint`     | Run ruff linter                                          |
+| `make lint-fix` | Run ruff linter with auto-fix                            |
+| `make format`   | Run ruff formatter                                       |
+| `make format-check` | Check formatting without making changes              |
+| `make typecheck`| Run mypy type checker                                    |
+| `make check`    | Run format-check + lint + typecheck (no tests)           |
+| `make dev`      | Run lint-fix + format + test (full development cycle)    |
+
+### Running tests
+
+```bash
+make test
+# or directly
+pytest test/ -v
+```
+
+### Code style
+
+This project uses [ruff](https://docs.astral.sh/ruff/) for both linting and
+formatting, and [mypy](https://mypy.readthedocs.io/) for type checking. Line
+length is set to 120 characters.
+
+```bash
+make check      # format-check + lint + typecheck
+make dev        # lint-fix + format + test
+```
+
+---
+
+## Contributing
+
+We welcome contributions of all kinds — bug fixes, new converters, improved
+documentation, and test coverage improvements.
+
+Please read [CONTRIBUTING.md](CONTRIBUTING.md) before opening a pull request.
+
+Key points:
+
+- Open a GitHub Issue before starting work on a significant change.
+- Ensure the test suite passes: `make test`.
+- Ensure code quality checks pass: `make check`.
+- Follow [Conventional Commits](https://www.conventionalcommits.org/) for
+  commit messages.
+- Be sure to review and agree to our [Code of Conduct](.github/CODE_OF_CONDUCT.md).
+
+---
 
 ## Getting Help
 
-We love to hear from you so if you have questions, comments or find a bug in the
-project, let us know! You can either:
+We love to hear from you. If you have questions, comments, or find a bug, you can:
 
-- [Open an issue in this repository](https://github.com/deepgram/[reponame]/issues/new)
-- [Join the Deepgram Github Discussions Community](https://github.com/orgs/deepgram/discussions)
+- [Open an issue](https://github.com/deepgram/deepgram-python-captions/issues/new) in this repository
+- [Join the Deepgram GitHub Discussions Community](https://github.com/orgs/deepgram/discussions)
 - [Join the Deepgram Discord Community](https://discord.gg/xWRaCDBtW4)
 
-[license]: LICENSE.txt
+For questions about the Deepgram API itself, visit
+[developers.deepgram.com](https://developers.deepgram.com/docs).
+
+---
+
+## License
+
+This project is licensed under the MIT License. See [LICENSE](LICENSE) for details.
diff --git a/SECURITY.md b/SECURITY.md
new file mode 100644
index 0000000..ebe4cb3
--- /dev/null
+++ b/SECURITY.md
@@ -0,0 +1,70 @@
+# Security Policy
+
+## Supported Versions
+
+We actively maintain security fixes for the latest release of `deepgram-captions`.
+
+| Version | Supported          |
+| ------- | ------------------ |
+| 1.x     | Yes                |
+| < 1.0   | No                 |
+
+## Reporting a Vulnerability
+
+The Deepgram team takes security issues seriously. We appreciate your efforts to
+responsibly disclose your findings, and we will make every effort to acknowledge
+your contributions.
+
+**Please do not report security vulnerabilities through public GitHub issues.**
+
+Instead, please report security vulnerabilities by email to:
+
+**security@deepgram.com**
+
+Include as much of the following information as possible to help us better
+understand and resolve the issue:
+
+- The type of issue (e.g., code injection, dependency confusion, path traversal)
+- Full paths of source file(s) related to the manifestation of the issue
+- The location of the affected source code (tag/branch/commit or direct URL)
+- Any special configuration required to reproduce the issue
+- Step-by-step instructions to reproduce the issue
+- Proof-of-concept or exploit code (if possible)
+- Impact of the issue, including how an attacker might exploit it
+
+## Response Process
+
+1. You will receive an acknowledgement within **48 hours** confirming receipt of
+   your report.
+2. We will investigate the report and determine its severity and impact.
+3. We will work on a fix and coordinate a release timeline with you.
+4. Once the fix is released, we will publish a security advisory crediting you
+   (unless you prefer to remain anonymous).
+
+We aim to resolve critical issues within **7 days** and will keep you informed
+throughout the process.
+
+## Responsible Disclosure
+
+We ask that you:
+
+- Give us reasonable time to investigate and fix the issue before disclosing it
+  publicly.
+- Avoid accessing, modifying, or deleting data that does not belong to you.
+- Do not perform any attack that could harm the reliability or integrity of our
+  services.
+
+## Scope
+
+This security policy applies to the `deepgram-captions` Python package hosted at
+[https://github.com/deepgram/deepgram-python-captions](https://github.com/deepgram/deepgram-python-captions).
+
+For security issues related to Deepgram's hosted API services, please refer to the
+[Deepgram Security Policy](https://deepgram.com/security) or contact
+[security@deepgram.com](mailto:security@deepgram.com).
+
+## Attribution
+
+We thank the security researchers and community members who help keep this project
+and the Deepgram ecosystem safe. Responsible disclosures will be credited in the
+relevant security advisory unless anonymity is requested.
diff --git a/deepgram_captions/__init__.py b/deepgram_captions/__init__.py
index 1887c16..7458bb0 100644
--- a/deepgram_captions/__init__.py
+++ b/deepgram_captions/__init__.py
@@ -1,7 +1,36 @@
+"""deepgram-captions — WebVTT and SRT caption generation for speech-to-text APIs.
+
+Supported providers
+-------------------
+* **Deepgram** — :class:`~deepgram_captions.converters.DeepgramConverter`
+* **AssemblyAI** — :class:`~deepgram_captions.converters.AssemblyAIConverter`
+* **Whisper Timestamped** — :class:`~deepgram_captions.converters.WhisperTimestampedConverter`
+
+Quick start::
+
+    from deepgram_captions import DeepgramConverter, webvtt, srt
+
+    converter = DeepgramConverter(dg_response)
+    vtt_string = webvtt(converter)
+    srt_string = srt(converter)
+"""
+
 from .converters import (
-    DeepgramConverter,
     AssemblyAIConverter,
+    ConverterException,
+    DeepgramConverter,
     WhisperTimestampedConverter,
 )
+from .helpers import EmptyTranscriptException
 from .srt import srt
 from .webvtt import webvtt
+
+__all__ = [
+    "DeepgramConverter",
+    "AssemblyAIConverter",
+    "WhisperTimestampedConverter",
+    "ConverterException",
+    "EmptyTranscriptException",
+    "webvtt",
+    "srt",
+]
diff --git a/deepgram_captions/_version.py b/deepgram_captions/_version.py
index 6c8e6b9..c68196d 100644
--- a/deepgram_captions/_version.py
+++ b/deepgram_captions/_version.py
@@ -1 +1 @@
-__version__ = "0.0.0"
+__version__ = "1.2.0"
diff --git a/deepgram_captions/converters.py b/deepgram_captions/converters.py
index 3ca3323..f234ceb 100644
--- a/deepgram_captions/converters.py
+++ b/deepgram_captions/converters.py
@@ -1,15 +1,74 @@
+"""Converter classes that normalise speech-to-text API responses.
+
+Each converter wraps one provider's JSON response and exposes a common
+``get_lines(line_length)`` interface consumed by :func:`~deepgram_captions.webvtt.webvtt`
+and :func:`~deepgram_captions.srt.srt`.
+
+Supported providers
+-------------------
+* **Deepgram** — :class:`DeepgramConverter`
+* **AssemblyAI** — :class:`AssemblyAIConverter`
+* **Whisper Timestamped** — :class:`WhisperTimestampedConverter`
+
+Custom converters
+-----------------
+Any object that implements ``get_lines(line_length: int) -> list[list[dict]]``
+can be passed to the formatters.  Optionally implement ``get_headers() ->
+list[str]`` to inject a ``NOTE`` block into WebVTT output.
+"""
+
+from __future__ import annotations
+
 import json
+from typing import Any
+
 from .helpers import chunk_array, replace_text_with_word
 
 
 class ConverterException(Exception):
-    pass
+    """Raised when a Deepgram response contains no valid transcriptions."""
 
 
 class DeepgramConverter:
-    def __init__(self, dg_response, use_exception: bool = True):
+    """Convert a Deepgram speech-to-text API response into caption lines.
+
+    Accepts the full JSON response from either a pre-recorded or streaming
+    Deepgram request.  When the response contains an ``utterances`` array it
+    is preferred over ``channels[0].alternatives[0].words`` because utterances
+    carry sentence-level grouping that produces more natural caption breaks.
+
+    Speaker diarisation is supported: when word objects include a ``"speaker"``
+    field, a new caption line is started on every speaker change (in addition
+    to the normal ``line_length`` limit).
+
+    Args:
+        dg_response:   The full Deepgram API response.  Accepts either a
+                       ``dict`` or any object that exposes a ``.to_json()``
+                       method (e.g. the Deepgram Python SDK response models).
+        use_exception: When ``True`` (default), raise
+                       :class:`ConverterException` if no non-empty transcript
+                       is found.  Set to ``False`` to suppress the exception
+                       and return empty lines instead.
+
+    Raises:
+        ConverterException: If *use_exception* is ``True`` and no valid
+                            transcript is present in the response.
+
+    Example::
+
+        import json
+        from deepgram_captions import DeepgramConverter, webvtt
+
+        with open("response.json") as f:
+            dg_response = json.load(f)
+
+        converter = DeepgramConverter(dg_response)
+        print(webvtt(converter))
+    """
+
+    def __init__(self, dg_response: dict[str, Any] | Any, use_exception: bool = True) -> None:
         if not isinstance(dg_response, dict):
-            self.response = json.loads(dg_response.to_json())
+            self.response: dict[str, Any] = json.loads(dg_response.to_json())
         else:
             self.response = dg_response
 
@@ -28,9 +87,20 @@ def __init__(self, dg_response, use_exception: bool = True):
             if not one_valid_transcription:
                 raise ConverterException("No valid transcriptions found in response")
 
-    def get_lines(self, line_length):
+    def get_lines(self, line_length: int) -> list[list[dict[str, Any]]]:
+        """Return caption lines as groups of word dicts.
+
+        Args:
+            line_length: Maximum number of words per caption line.
+
+        Returns:
+            A list of word-groups.  Each group is a list of word dicts
+            containing at minimum ``word``, ``start`` (float seconds), and
+            ``end`` (float seconds).  Speaker diarisation data (``speaker``
+            key) is preserved when present.
+        """
         results = self.response["results"]
-        content = []
+        content: list[list[dict[str, Any]]] = []
 
         if results.get("utterances"):
             for utterance in results["utterances"]:
@@ -39,11 +109,9 @@ def get_lines(self, line_length):
                 else:
                     content.append(utterance["words"])
         else:
-            words = results["channels"][0]["alternatives"][0]["words"]
-            diarize = (
-                "speaker" in words[0] if words else False
-            )  # Check if diarization was used
-            buffer = []
+            words: list[dict[str, Any]] = results["channels"][0]["alternatives"][0]["words"]
+            diarize = "speaker" in words[0] if words else False
+            buffer: list[dict[str, Any]] = []
             current_speaker = 0
 
             for word in words:
@@ -64,11 +132,18 @@ def get_lines(self, line_length):
 
         return content
 
-    def get_headers(self):
-        output = []
+    def get_headers(self) -> list[str]:
+        """Return lines for a WebVTT ``NOTE`` block containing request metadata.
 
-        output.append("NOTE")
-        output.append("Transcription provided by Deepgram")
+        Returns:
+            A list of strings to be joined as the ``NOTE`` section of a
+            WebVTT file.  Includes the request ID, creation time, duration,
+            and channel count when available in the response metadata.
+        """
+        output = [
+            "NOTE",
+            "Transcription provided by Deepgram",
+        ]
 
         if self.response.get("metadata"):
             metadata = self.response["metadata"]
@@ -85,10 +160,39 @@ def get_headers(self):
 
 
 class AssemblyAIConverter:
-    def __init__(self, assembly_response):
+    """Convert an AssemblyAI transcription response into caption lines.
+
+    Accepts the JSON response from the AssemblyAI transcription API.
+    Handles both responses that include an ``utterances`` array (preferred)
+    and those that only include a flat ``words`` array.
+
+    Args:
+        assembly_response: The full AssemblyAI API response dict.
+
+    Example::
+
+        from deepgram_captions import AssemblyAIConverter, webvtt
+
+        converter = AssemblyAIConverter(assembly_response)
+        print(webvtt(converter))
+    """
+
+    def __init__(self, assembly_response: dict[str, Any]) -> None:
         self.response = assembly_response
 
-    def word_map(self, word):
+    def word_map(self, word: dict[str, Any]) -> dict[str, Any]:
+        """Map an AssemblyAI word object to the internal caption word format.
+
+        AssemblyAI uses ``"text"`` for the word string; this normalises it to
+        ``"word"`` / ``"punctuated_word"`` as expected by the formatters.
+
+        Args:
+            word: A single word object from the AssemblyAI response.
+
+        Returns:
+            Normalised word dict with keys: ``word``, ``start``, ``end``,
+            ``confidence``, ``punctuated_word``, and ``speaker``.
+        """
         return {
             "word": word["text"],
             "start": word["start"],
@@ -98,34 +202,70 @@ def word_map(self, word):
             "speaker": word["speaker"],
         }
 
-    def get_lines(self, line_length: int = 8):
+    def get_lines(self, line_length: int = 8) -> list[list[dict[str, Any]]]:
+        """Return caption lines as groups of normalised word dicts.
+
+        Args:
+            line_length: Maximum number of words per caption line.
+
+        Returns:
+            A list of word-groups compatible with :func:`~deepgram_captions.webvtt.webvtt`
+            and :func:`~deepgram_captions.srt.srt`.
+        """
         results = self.response
-        content = []
+        content: list[list[dict[str, Any]]] = []
         if results.get("utterances"):
             for utterance in results["utterances"]:
-                if len(utterance["words"]) > line_length:
-                    content.extend(
-                        chunk_array(
-                            [self.word_map(w) for w in utterance["words"]], line_length
-                        )
-                    )
+                mapped = [self.word_map(w) for w in utterance["words"]]
+                if len(mapped) > line_length:
+                    content.extend(chunk_array(mapped, line_length))
                 else:
-                    content.append([self.word_map(w) for w in utterance["words"]])
+                    content.append(mapped)
         else:
-            content.extend(
-                chunk_array([self.word_map(w) for w in results["words"]], line_length)
-            )
+            content.extend(chunk_array([self.word_map(w) for w in results["words"]], line_length))
 
         return content
 
 
 class WhisperTimestampedConverter:
-    def __init__(self, whisper_response):
+    """Convert a Whisper Timestamped response into caption lines.
+
+    `Whisper Timestamped <https://github.com/linto-ai/whisper-timestamped>`_
+    adds word-level timestamps to OpenAI Whisper transcriptions.  The plain
+    OpenAI Whisper API does **not** include word-level timestamps and is
+    therefore not supported.
+
+    .. note::
+        For OpenAI Whisper transcriptions *without* word-level timestamps,
+        use Deepgram's hosted Whisper Cloud (``model="whisper"``) and the
+        :class:`DeepgramConverter` instead.
+
+    Args:
+        whisper_response: The full response dict from Whisper Timestamped.
+
+    Example::
+
+        from deepgram_captions import WhisperTimestampedConverter, srt
+
+        converter = WhisperTimestampedConverter(whisper_response)
+        print(srt(converter))
+    """
+
+    def __init__(self, whisper_response: dict[str, Any]) -> None:
         self.response = whisper_response
 
-    def get_lines(self, line_length: int = 8):
+    def get_lines(self, line_length: int = 8) -> list[list[dict[str, Any]]]:
+        """Return caption lines as groups of normalised word dicts.
+
+        Args:
+            line_length: Maximum number of words per caption line.
+
+        Returns:
+            A list of word-groups compatible with :func:`~deepgram_captions.webvtt.webvtt`
+            and :func:`~deepgram_captions.srt.srt`.
+        """
         results = self.response
-        content = []
+        content: list[list[dict[str, Any]]] = []
         if results.get("segments"):
             for segment in results["segments"]:
                 if len(segment["words"]) > line_length:
@@ -133,5 +273,4 @@ def get_lines(self, line_length: int = 8):
                 else:
                     content.append(segment["words"])
 
-        res = replace_text_with_word(content)
-        return res
+        return replace_text_with_word(content)
diff --git a/deepgram_captions/helpers.py b/deepgram_captions/helpers.py
index 2cd93f7..06d923f 100644
--- a/deepgram_captions/helpers.py
+++ b/deepgram_captions/helpers.py
@@ -1,31 +1,80 @@
-from datetime import datetime
+"""Shared utilities for caption generation."""
+
+from __future__ import annotations
+
+import datetime
+from datetime import timezone
 
 
 class EmptyTranscriptException(Exception):
-    pass
+    """Raised when a caption formatter receives a response with no transcript content."""
+
+
+def seconds_to_timestamp(seconds: float, format: str = "%H:%M:%S.%f") -> str:
+    """Convert a float number of seconds to a caption timestamp string.
+
+    Rounds to millisecond precision (3 decimal places).
+
+    Args:
+        seconds: Time offset in seconds, e.g. ``1.234``.
+        format:  strftime format string.
+
+                 - WebVTT default: ``"%H:%M:%S.%f"``  → ``00:00:01.234``
+                 - SRT format:     ``"%H:%M:%S,%f"``   → ``00:00:01,234``
 
+    Returns:
+        Formatted timestamp string with millisecond precision.
 
-def seconds_to_timestamp(seconds, format="%H:%M:%S.%f"):
+    Example::
+
+        >>> seconds_to_timestamp(65.4)
+        '00:01:05.400'
+        >>> seconds_to_timestamp(65.4, "%H:%M:%S,%f")
+        '00:01:05,400'
+    """
     seconds = round(seconds, 3)
-    dt = datetime.utcfromtimestamp(seconds)
+    dt = datetime.datetime.fromtimestamp(seconds, timezone.utc)
     formatted_time = dt.strftime(format)
+    # %f produces 6-digit microseconds; truncate to 3-digit milliseconds
     formatted_time = formatted_time[:-3] + formatted_time[-3:].lstrip("0")
     return formatted_time
 
 
-def chunk_array(arr, length):
-    res = []
+def chunk_array(arr: list, length: int) -> list[list]:
+    """Split a list into consecutive chunks of at most *length* items.
+
+    Args:
+        arr:    The list to split.
+        length: Maximum number of items per chunk.
+
+    Returns:
+        A list of sub-lists, each containing at most *length* items.
+
+    Example::
+
+        >>> chunk_array([1, 2, 3, 4, 5], 2)
+        [[1, 2], [3, 4], [5]]
+    """
+    return [arr[i : i + length] for i in range(0, len(arr), length)]
+
 
-    for i in range(0, len(arr), length):
-        chunk_arr = arr[i : i + length]
-        res.append(chunk_arr)
+def replace_text_with_word(content: list[list[dict]]) -> list[list[dict]]:
+    """Rename the ``"text"`` key to ``"word"`` in nested word dictionaries.
 
-    return res
+    Some converters (e.g. :class:`WhisperTimestampedConverter`) receive word
+    objects where the transcript text is stored under ``"text"`` rather than
+    the ``"word"`` key expected by the formatters.  This function normalises
+    the key in-place.
 
+    Args:
+        content: Nested list of word dicts as returned by a converter's
+                 ``get_lines()`` method.
 
-def replace_text_with_word(content):
-    for content_list in content:
-        for dictionary in content_list:
-            if "text" in dictionary:
-                dictionary["word"] = dictionary.pop("text")
+    Returns:
+        The same nested structure with ``"text"`` keys renamed to ``"word"``.
+    """
+    for word_list in content:
+        for word in word_list:
+            if "text" in word:
+                word["word"] = word.pop("text")
     return content
diff --git a/deepgram_captions/py.typed b/deepgram_captions/py.typed
new file mode 100644
index 0000000..e69de29
diff --git a/deepgram_captions/srt.py b/deepgram_captions/srt.py
index 567bd9c..3483f5a 100644
--- a/deepgram_captions/srt.py
+++ b/deepgram_captions/srt.py
@@ -1,39 +1,80 @@
-from .helpers import seconds_to_timestamp, EmptyTranscriptException
+"""SRT (SubRip) caption formatter."""
 
+from __future__ import annotations
 
-def srt(converter, line_length=None):
-    output = []
+from typing import Any
 
-    if line_length == None:
-        line_length = 8
+from .helpers import EmptyTranscriptException, seconds_to_timestamp
+
+
+def srt(converter: Any, line_length: int | None = None) -> str:
+    """Generate an SRT caption string from a converter.
+
+    `SubRip Text (SRT) <https://en.wikipedia.org/wiki/SubRip>`_ is the
+    most widely supported subtitle format, compatible with virtually every
+    media player and video platform.
+
+    The *converter* must implement ``get_lines(line_length: int) ->
+    list[list[dict]]``.
+
+    Speaker diarisation is handled automatically: when word dicts contain a
+    ``"speaker"`` key a ``[speaker N]`` label is prepended to each cue block
+    whenever the speaker changes.
+
+    Args:
+        converter:   A converter instance, e.g.
+                     :class:`~deepgram_captions.converters.DeepgramConverter`.
+        line_length: Maximum words per caption cue (default: 8).
 
-    lines = converter.get_lines(line_length)
-    entry = 1
+    Returns:
+        A complete SRT document as a string, ready to write to a ``.srt``
+        file or embed in a video container.
+
+    Raises:
+        EmptyTranscriptException: When the converter returns no caption lines.
+
+    Example::
+
+        from deepgram_captions import DeepgramConverter, srt
+
+        converter = DeepgramConverter(dg_response)
+        subtitles = srt(converter)
+
+        with open("captions.srt", "w") as f:
+            f.write(subtitles)
+    """
+    if line_length is None:
+        line_length = 8
 
-    current_speaker = None
+    if hasattr(converter, "get_lines") and callable(converter.get_lines):
+        lines = converter.get_lines(line_length)
 
-    if not lines[0]:
-        raise EmptyTranscriptException("No transcript data found")
+        if not lines[0]:
+            raise EmptyTranscriptException("No transcript data found")
 
-    for words in lines:
-        output.append(str(entry))
-        entry += 1
+        speaker_labels = "speaker" in lines[0][0]
 
-        first_word = words[0]
-        last_word = words[-1]
+        output: list[str] = []
+        current_speaker: int | None = None
 
-        start_time = seconds_to_timestamp(first_word["start"], "%H:%M:%S,%f")
-        end_time = seconds_to_timestamp(last_word["end"], "%H:%M:%S,%f")
+        for index, words in enumerate(lines, start=1):
+            first_word = words[0]
+            last_word = words[-1]
 
-        output.append(f"{start_time} --> {end_time}")
+            output.append(str(index))
+            output.append(
+                f"{seconds_to_timestamp(first_word['start'], '%H:%M:%S,%f')} --> "
+                f"{seconds_to_timestamp(last_word['end'], '%H:%M:%S,%f')}"
+            )
 
-        if "speaker" in first_word:
-            if current_speaker is None or current_speaker != first_word["speaker"]:
-                current_speaker = first_word["speaker"]
-                output.append(f"[speaker {current_speaker}]")
+            if speaker_labels:
+                speaker = first_word.get("speaker")
+                if speaker != current_speaker:
+                    current_speaker = speaker
+                    output.append(f"[speaker {speaker}]")
 
-        punctuated_words = [word.get("punctuated_word", word["word"]) for word in words]
-        output.append(" ".join(punctuated_words))
-        output.append("")
+            line = " ".join(word.get("punctuated_word", word["word"]) for word in words)
+            output.append(line)
+            output.append("")
 
     return "\n".join(output)
diff --git a/deepgram_captions/webvtt.py b/deepgram_captions/webvtt.py
index c2996e6..6249fe6 100644
--- a/deepgram_captions/webvtt.py
+++ b/deepgram_captions/webvtt.py
@@ -1,25 +1,60 @@
-from .helpers import seconds_to_timestamp, EmptyTranscriptException
+"""WebVTT caption formatter."""
 
+from __future__ import annotations
 
-def webvtt(converter, line_length=None):
-    output = []
-    output.append("WEBVTT")
-    output.append("")
+from typing import Any
 
-    if line_length == None:
+from .helpers import EmptyTranscriptException, seconds_to_timestamp
+
+
+def webvtt(converter: Any, line_length: int | None = None) -> str:
+    """Generate a WebVTT caption string from a converter.
+
+    `Web Video Text Tracks (WebVTT) <https://www.w3.org/TR/webvtt1/>`_ is the
+    standard caption format for HTML5 ``<video>`` elements and most modern
+    media players.
+
+    The *converter* must implement ``get_lines(line_length: int) ->
+    list[list[dict]]``.  Optionally it may implement ``get_headers() ->
+    list[str]`` to inject a ``NOTE`` block after the ``WEBVTT`` header (used
+    by :class:`~deepgram_captions.converters.DeepgramConverter` to embed
+    request metadata).
+
+    Speaker diarisation is handled automatically: when word dicts contain a
+    ``"speaker"`` key the output uses WebVTT voice tags (``<v Speaker N>``).
+
+    Args:
+        converter:   A converter instance, e.g.
+                     :class:`~deepgram_captions.converters.DeepgramConverter`.
+        line_length: Maximum words per caption cue (default: 8).
+
+    Returns:
+        A complete WebVTT document as a string, ready to write to a ``.vtt``
+        file or serve over HTTP with ``Content-Type: text/vtt``.
+
+    Raises:
+        EmptyTranscriptException: When the converter returns no caption lines.
+
+    Example::
+
+        from deepgram_captions import DeepgramConverter, webvtt
+
+        converter = DeepgramConverter(dg_response)
+        vtt = webvtt(converter)
+
+        with open("captions.vtt", "w") as f:
+            f.write(vtt)
+    """
+    if line_length is None:
         line_length = 8
 
-    if hasattr(converter, "get_headers") and callable(
-        getattr(converter, "get_headers")
-    ):
-        output.append("\n".join(converter.get_headers()))
+    output: list[str] = ["WEBVTT", ""]
 
-    if hasattr(converter, "get_headers") and callable(
-        getattr(converter, "get_headers")
-    ):
+    if hasattr(converter, "get_headers") and callable(converter.get_headers):
+        output.append("\n".join(converter.get_headers()))
         output.append("")
 
-    if hasattr(converter, "get_lines") and callable(getattr(converter, "get_lines")):
+    if hasattr(converter, "get_lines") and callable(converter.get_lines):
         lines = converter.get_lines(line_length)
 
         if not lines[0]:
@@ -31,14 +66,10 @@ def webvtt(converter, line_length=None):
             first_word = words[0]
             last_word = words[-1]
 
-            output.append(
-                f"{seconds_to_timestamp(first_word['start'])} --> {seconds_to_timestamp(last_word['end'])}"
-            )
+            output.append(f"{seconds_to_timestamp(first_word['start'])} --> {seconds_to_timestamp(last_word['end'])}")
 
             line = " ".join(word.get("punctuated_word", word["word"]) for word in words)
-            speaker_label = (
-                f"<v Speaker {first_word['speaker']}>" if speaker_labels else ""
-            )
+            speaker_label = f"<v Speaker {first_word['speaker']}>" if speaker_labels else ""
 
             output.append(f"{speaker_label}{line}")
             output.append("")
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..fde4008
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,63 @@
+[build-system]
+requires = ["setuptools>=61.0", "wheel"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "deepgram-captions"
+version = "1.2.0"
+description = "Official Python library for generating WebVTT and SRT captions from Deepgram and other speech-to-text API responses"
+readme = "README.md"
+license = "MIT"
+authors = [{ name = "Deepgram", email = "devrel@deepgram.com" }]
+maintainers = [{ name = "Deepgram", email = "devrel@deepgram.com" }]
+keywords = ["deepgram", "captions", "srt", "webvtt", "speech-to-text", "subtitles", "accessibility"]
+classifiers = [
+    "Development Status :: 5 - Production/Stable",
+    "Intended Audience :: Developers",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Programming Language :: Python :: 3.13",
+    "Topic :: Multimedia :: Sound/Audio :: Speech",
+    "Topic :: Text Processing :: General",
+    "Typing :: Typed",
+]
+requires-python = ">=3.10"
+dependencies = []
+
+[project.optional-dependencies]
+dev = [
+    "pytest>=8.0.0",
+    "ruff>=0.11.0",
+    "mypy>=1.13.0",
+]
+
+[project.urls]
+Homepage = "https://developers.deepgram.com"
+Repository = "https://github.com/deepgram/deepgram-python-captions"
+Issues = "https://github.com/deepgram/deepgram-python-captions/issues"
+Changelog = "https://github.com/deepgram/deepgram-python-captions/blob/main/CHANGELOG.md"
+
+[tool.setuptools]
+package-dir = { "" = "." }
+
+[tool.setuptools.packages.find]
+include = ["deepgram_captions*"]
+
+[tool.setuptools.package-data]
+deepgram_captions = ["py.typed"]
+
+[tool.ruff]
+line-length = 120
+
+[tool.ruff.lint]
+select = ["E", "F", "I"]
+ignore = ["E402", "E501", "E711", "E712", "E721", "E722", "E731", "F821", "F841"]
+
+[tool.mypy]
+python_version = "3.10"
+ignore_missing_imports = true
+
+[tool.pytest.ini_options]
+testpaths = ["test"]
diff --git a/setup.py b/setup.py
index a54f034..d61ff44 100644
--- a/setup.py
+++ b/setup.py
@@ -1,45 +1,5 @@
-from setuptools import setup, find_packages
-import os.path
+# NOTE: pyproject.toml is the canonical build configuration for this package.
+# This file exists only for compatibility with older tooling that requires setup.py.
+from setuptools import setup
 
-with open("README.md", "r", encoding="utf-8") as fh:
-    LONG_DESCRIPTION = fh.read()
-
-with open(
-    os.path.join(
-        os.path.abspath(os.path.dirname(__file__)), "deepgram_captions", "_version.py"
-    ),
-    encoding="utf8",
-) as file:
-    exec(file.read())
-# imports as __version__
-
-DESCRIPTION = "A Python package for generating captions."
-
-setup(
-    name="deepgram-captions",
-    version=__version__,
-    author="Deepgram",
-    author_email="devrel@deepgram.com",
-    url="https://github.com/deepgram/deepgram-python-captions",
-    description=DESCRIPTION,
-    long_description=LONG_DESCRIPTION,
-    long_description_content_type="text/markdown",
-    packages=find_packages(),
-    install_requires=[],
-    extras_require={
-        "dev": [
-            "black",
-            "pytest",
-        ],
-    },
-    keywords=["deepgram", "captions", "srt", "webvtt"],
-    classifiers=[
-        "Development Status :: 3 - Alpha",
-        "Intended Audience :: Developers",
-        "License :: OSI Approved :: MIT License",
-        "Programming Language :: Python :: 3",
-        "Programming Language :: Python :: 3 :: Only",
-        "Topic :: Multimedia :: Sound/Audio :: Speech",
-        "Topic :: Text Processing :: General",
-    ],
-)
+setup()
diff --git a/test/test_assembly.py b/test/test_assembly.py
index adda819..b5c395a 100644
--- a/test/test_assembly.py
+++ b/test/test_assembly.py
@@ -1,9 +1,11 @@
-import pytest
+import json
 import re
+
+import pytest
+
+from deepgram_captions.converters import AssemblyAIConverter
 from deepgram_captions.srt import srt
 from deepgram_captions.webvtt import webvtt
-from deepgram_captions.converters import AssemblyAIConverter
-import json
 
 json_file_assemblyai_transcription = "test/assemblyai_transcription.json"
 json_file_assemblyai_utterances = "test/assemblyai_utterances.json"
@@ -35,9 +37,7 @@ def test_webvtt_start_with_webvtt(input_data):
 
     if lines:
         first_line = lines[0].strip()
-        assert (
-            first_line == "WEBVTT"
-        ), f"WebVTT captions do not start with 'WEBVTT': {first_line}"
+        assert first_line == "WEBVTT", f"WebVTT captions do not start with 'WEBVTT': {first_line}"
 
 
 @pytest.mark.parametrize(
@@ -56,9 +56,9 @@ def test_webvtt_timestamp_format(input_data):
     webvtt_captions = webvtt(result)
     timestamp_lines = re.findall(timestamp_pattern, webvtt_captions)
     for timestamp_line in timestamp_lines:
-        assert (
-            re.match(timestamp_pattern, timestamp_line) is not None
-        ), f"Timestamp format is incorrect: {timestamp_line}"
+        assert re.match(timestamp_pattern, timestamp_line) is not None, (
+            f"Timestamp format is incorrect: {timestamp_line}"
+        )
 
 
 @pytest.mark.parametrize(
@@ -76,9 +76,7 @@ def test_webvtt_speaker_format(input_data):
     caption_pattern = r"\d{2}:\d{2}:\d{2}\.\d{3} --> \d{2}:\d{2}:\d{2}\.\d{3}\n<v [^\s]+>[^\n]+\n<v [^\s]+>[^\n]+"
     captions = re.findall(caption_pattern, result)
     for caption in captions:
-        assert (
-            re.match(r"<v [^\s]+>", caption.split("\n")[1]) is not None
-        ), f"Speaker format is incorrect: {caption}"
+        assert re.match(r"<v [^\s]+>", caption.split("\n")[1]) is not None, f"Speaker format is incorrect: {caption}"
 
 
 @pytest.mark.parametrize(
@@ -101,9 +99,7 @@ def test_srt_format(input_data):
             assert lines[0] == str(index), f"Caption number is incorrect: {lines[0]}"
 
             timestamp_pattern = r"\d{2}:\d{2}:\d{2},\d{3} --> \d{2}:\d{2}:\d{2},\d{3}"
-            assert (
-                re.match(timestamp_pattern, lines[1]) is not None
-            ), f"Timestamp format is incorrect: {lines[1]}"
+            assert re.match(timestamp_pattern, lines[1]) is not None, f"Timestamp format is incorrect: {lines[1]}"
 
             assert len(lines) > 2, "Speech content is missing"
 
@@ -125,9 +121,7 @@ def test_srt_timestamp_format(input_data):
     for caption in srt_captions:
         if caption.strip():
             lines = caption.split("\n")
-            assert (
-                re.match(timestamp_pattern, lines[1]) is not None
-            ), f"Timestamp format is incorrect: {lines[1]}"
+            assert re.match(timestamp_pattern, lines[1]) is not None, f"Timestamp format is incorrect: {lines[1]}"
 
 
 @pytest.mark.parametrize(
@@ -148,6 +142,4 @@ def test_first_caption_number(input_data):
         first_caption_lines = srt_captions[0].split("\n")
         first_caption_number = int(first_caption_lines[0])
 
-        assert (
-            first_caption_number == 1
-        ), f"First caption number is not 1: {first_caption_number}"
+        assert first_caption_number == 1, f"First caption number is not 1: {first_caption_number}"
diff --git a/test/test_deepgram.py b/test/test_deepgram.py
index d094d6b..8de11de 100644
--- a/test/test_deepgram.py
+++ b/test/test_deepgram.py
@@ -1,9 +1,11 @@
-import pytest
+import json
 import re
+
+import pytest
+
+from deepgram_captions.converters import DeepgramConverter
 from deepgram_captions.srt import srt
 from deepgram_captions.webvtt import webvtt
-from deepgram_captions.converters import DeepgramConverter
-import json
 
 json_file_dg_transcription = "test/dg_transcription.json"
 json_file_dg_utterances = "test/dg_utterances.json"
@@ -45,9 +47,7 @@ def test_webvtt_start_with_webvtt(input_data):
 
     if lines:
         first_line = lines[0].strip()
-        assert (
-            first_line == "WEBVTT"
-        ), f"WebVTT captions do not start with 'WEBVTT': {first_line}"
+        assert first_line == "WEBVTT", f"WebVTT captions do not start with 'WEBVTT': {first_line}"
 
 
 @pytest.mark.parametrize(
@@ -66,9 +66,9 @@ def test_webvtt_header(input_data):
     result = webvtt(input_data)
     assert isinstance(result, str), "Result should be a string"
     assert "NOTE" in result, "Result should contain 'NOTE' in header"
-    assert (
-        "Transcription provided by Deepgram" in result
-    ), "Result should name Deepgram as transcription source in header"
+    assert "Transcription provided by Deepgram" in result, (
+        "Result should name Deepgram as transcription source in header"
+    )
     assert "Request Id" in result, "Result should contain Request Id in header"
     assert "Created" in result, "Result should contain Created timestamp in header"
     assert "Duration" in result, "Result should contain Duration information in header"
@@ -93,9 +93,9 @@ def test_webvtt_timestamp_format(input_data):
     webvtt_captions = webvtt(result)
     timestamp_lines = re.findall(timestamp_pattern, webvtt_captions)
     for timestamp_line in timestamp_lines:
-        assert (
-            re.match(timestamp_pattern, timestamp_line) is not None
-        ), f"Timestamp format is incorrect: {timestamp_line}"
+        assert re.match(timestamp_pattern, timestamp_line) is not None, (
+            f"Timestamp format is incorrect: {timestamp_line}"
+        )
 
 
 @pytest.mark.parametrize(
@@ -110,9 +110,7 @@ def test_webvtt_speaker_format(input_data):
     caption_pattern = r"\d{2}:\d{2}:\d{2}\.\d{3} --> \d{2}:\d{2}:\d{2}\.\d{3}\n<v [^\s]+>[^\n]+\n<v [^\s]+>[^\n]+"
     captions = re.findall(caption_pattern, result)
     for caption in captions:
-        assert (
-            re.match(r"<v [^\s]+>", caption.split("\n")[1]) is not None
-        ), f"Speaker format is incorrect: {caption}"
+        assert re.match(r"<v [^\s]+>", caption.split("\n")[1]) is not None, f"Speaker format is incorrect: {caption}"
 
 
 @pytest.mark.parametrize(
@@ -137,9 +135,7 @@ def test_srt_format(input_data):
             assert lines[0] == str(index), f"Caption number is incorrect: {lines[0]}"
 
             timestamp_pattern = r"\d{2}:\d{2}:\d{2},\d{3} --> \d{2}:\d{2}:\d{2},\d{3}"
-            assert (
-                re.match(timestamp_pattern, lines[1]) is not None
-            ), f"Timestamp format is incorrect: {lines[1]}"
+            assert re.match(timestamp_pattern, lines[1]) is not None, f"Timestamp format is incorrect: {lines[1]}"
 
             assert len(lines) > 2, "Speech content is missing"
 
@@ -163,9 +159,7 @@ def test_srt_timestamp_format(input_data):
     for caption in srt_captions:
         if caption.strip():
             lines = caption.split("\n")
-            assert (
-                re.match(timestamp_pattern, lines[1]) is not None
-            ), f"Timestamp format is incorrect: {lines[1]}"
+            assert re.match(timestamp_pattern, lines[1]) is not None, f"Timestamp format is incorrect: {lines[1]}"
 
 
 @pytest.mark.parametrize(
@@ -188,9 +182,7 @@ def test_first_caption_number(input_data):
         first_caption_lines = srt_captions[0].split("\n")
         first_caption_number = int(first_caption_lines[0])
 
-        assert (
-            first_caption_number == 1
-        ), f"First caption number is not 1: {first_caption_number}"
+        assert first_caption_number == 1, f"First caption number is not 1: {first_caption_number}"
 
 
 @pytest.mark.parametrize(
@@ -214,9 +206,7 @@ def test_srt_speaker_format(input_data):
 
             # Check if speaker information is present
             if len(lines) > 2 and re.match(speaker_pattern, lines[2]):
-                assert (
-                    re.match(speaker_pattern, lines[2]) is not None
-                ), f"Speaker format is incorrect: {lines[2]}"
+                assert re.match(speaker_pattern, lines[2]) is not None, f"Speaker format is incorrect: {lines[2]}"
 
 
 if __name__ == "__main__":
diff --git a/test/test_whisper.py b/test/test_whisper.py
index 782d6de..56dbbe8 100644
--- a/test/test_whisper.py
+++ b/test/test_whisper.py
@@ -1,10 +1,11 @@
-import pytest
+import json
 import re
+
+import pytest
+
+from deepgram_captions.converters import WhisperTimestampedConverter
 from deepgram_captions.srt import srt
 from deepgram_captions.webvtt import webvtt
-from deepgram_captions.converters import WhisperTimestampedConverter
-import json
-
 
 json_file_whisper_timestamped = "test/whisper_timestamped.json"
 
@@ -31,9 +32,7 @@ def test_webvtt_start_with_webvtt(input_data):
 
     if lines:
         first_line = lines[0].strip()
-        assert (
-            first_line == "WEBVTT"
-        ), f"WebVTT captions do not start with 'WEBVTT': {first_line}"
+        assert first_line == "WEBVTT", f"WebVTT captions do not start with 'WEBVTT': {first_line}"
 
 
 @pytest.mark.parametrize(
@@ -51,9 +50,9 @@ def test_webvtt_timestamp_format(input_data):
     webvtt_captions = webvtt(result)
     timestamp_lines = re.findall(timestamp_pattern, webvtt_captions)
     for timestamp_line in timestamp_lines:
-        assert (
-            re.match(timestamp_pattern, timestamp_line) is not None
-        ), f"Timestamp format is incorrect: {timestamp_line}"
+        assert re.match(timestamp_pattern, timestamp_line) is not None, (
+            f"Timestamp format is incorrect: {timestamp_line}"
+        )
 
 
 @pytest.mark.parametrize(
@@ -70,9 +69,7 @@ def test_webvtt_speaker_format(input_data):
     caption_pattern = r"\d{2}:\d{2}:\d{2}\.\d{3} --> \d{2}:\d{2}:\d{2}\.\d{3}\n<v [^\s]+>[^\n]+\n<v [^\s]+>[^\n]+"
     captions = re.findall(caption_pattern, result)
     for caption in captions:
-        assert (
-            re.match(r"<v [^\s]+>", caption.split("\n")[1]) is not None
-        ), f"Speaker format is incorrect: {caption}"
+        assert re.match(r"<v [^\s]+>", caption.split("\n")[1]) is not None, f"Speaker format is incorrect: {caption}"
 
 
 @pytest.mark.parametrize(
@@ -94,9 +91,7 @@ def test_srt_format(input_data):
             assert lines[0] == str(index), f"Caption number is incorrect: {lines[0]}"
 
             timestamp_pattern = r"\d{2}:\d{2}:\d{2},\d{3} --> \d{2}:\d{2}:\d{2},\d{3}"
-            assert (
-                re.match(timestamp_pattern, lines[1]) is not None
-            ), f"Timestamp format is incorrect: {lines[1]}"
+            assert re.match(timestamp_pattern, lines[1]) is not None, f"Timestamp format is incorrect: {lines[1]}"
 
             assert len(lines) > 2, "Speech content is missing"
 
@@ -117,9 +112,7 @@ def test_srt_timestamp_format(input_data):
     for caption in srt_captions:
         if caption.strip():
             lines = caption.split("\n")
-            assert (
-                re.match(timestamp_pattern, lines[1]) is not None
-            ), f"Timestamp format is incorrect: {lines[1]}"
+            assert re.match(timestamp_pattern, lines[1]) is not None, f"Timestamp format is incorrect: {lines[1]}"
 
 
 @pytest.mark.parametrize(
@@ -139,9 +132,7 @@ def test_first_caption_number(input_data):
         first_caption_lines = srt_captions[0].split("\n")
         first_caption_number = int(first_caption_lines[0])
 
-        assert (
-            first_caption_number == 1
-        ), f"First caption number is not 1: {first_caption_number}"
+        assert first_caption_number == 1, f"First caption number is not 1: {first_caption_number}"
 
 
 if __name__ == "__main__":