diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..b14e81e
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,4 @@
+data/text_responses/*.jsonl filter=lfs diff=lfs merge=lfs -text
+data/images_responses/*.jsonl filter=lfs diff=lfs merge=lfs -text
+data/audio_responses/*.jsonl filter=lfs diff=lfs merge=lfs -text
+data/evaluation/**/eval_records.jsonl filter=lfs diff=lfs merge=lfs -text
diff --git a/.github/workflows/publish-leaderboard.yml b/.github/workflows/publish-leaderboard.yml
new file mode 100644
index 0000000..466e569
--- /dev/null
+++ b/.github/workflows/publish-leaderboard.yml
@@ -0,0 +1,90 @@
+name: Publish leaderboard
+
+on:
+ push:
+ branches: [main]
+ paths:
+ - 'data/evaluation/**'
+ - 'scripts/build_leaderboard.py'
+ - 'sob-leaderboard/**'
+ - '.github/workflows/publish-leaderboard.yml'
+ workflow_dispatch:
+
+concurrency:
+ group: publish-leaderboard
+ cancel-in-progress: true
+
+permissions:
+ contents: read
+
+jobs:
+ publish:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+ with:
+ lfs: true
+
+ - uses: actions/setup-python@v5
+ with:
+ python-version: '3.12'
+
+ - name: Install huggingface_hub
+ run: pip install --upgrade "huggingface_hub>=0.24"
+
+ - name: Build leaderboard JSON
+ run: python scripts/build_leaderboard.py --output leaderboard.json
+
+ - name: Validate JSON
+ run: |
+ python - <<'PY'
+ import json
+ d = json.load(open("leaderboard.json"))
+ rows = d.get("rows") or []
+ assert rows, "no rows in leaderboard.json"
+ required = {"model", "overall", "value_accuracy", "faithfulness",
+ "json_pass_rate", "path_recall", "structure_coverage", "type_safety"}
+ missing = required - set(rows[0])
+ assert not missing, f"missing keys in row: {missing}"
+ print(f"ok: {len(rows)} rows, generated_at={d.get('generated_at')}")
+ PY
+
+ - name: Upload leaderboard.json to dataset
+ env:
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
+ GITHUB_SHA: ${{ github.sha }}
+ run: |
+ python <<'PY'
+ import os
+ from huggingface_hub import HfApi
+
+ api = HfApi(token=os.environ["HF_TOKEN"])
+ api.upload_file(
+ path_or_fileobj="leaderboard.json",
+ path_in_repo="leaderboard.json",
+ repo_id="interfaze-ai/sob-leaderboard",
+ repo_type="dataset",
+ commit_message=f"Publish leaderboard ({os.environ['GITHUB_SHA'][:7]})",
+ )
+ print("Uploaded leaderboard.json -> dataset interfaze-ai/sob-leaderboard")
+ PY
+
+ - name: Sync Space app from sob-leaderboard/
+ env:
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
+ GITHUB_SHA: ${{ github.sha }}
+ run: |
+ python <<'PY'
+ import os
+ from huggingface_hub import HfApi
+
+ api = HfApi(token=os.environ["HF_TOKEN"])
+ api.upload_folder(
+ folder_path="sob-leaderboard",
+ repo_id="interfaze-ai/sob-leaderboard",
+ repo_type="space",
+ commit_message=f"Sync app from {os.environ['GITHUB_SHA'][:7]}",
+ ignore_patterns=["__pycache__/**", "*.pyc", ".DS_Store", "leaderboard.json"],
+ )
+ print("Synced sob-leaderboard/ -> space interfaze-ai/sob-leaderboard")
+ PY
diff --git a/.github/workflows/validate-leaderboard.yml b/.github/workflows/validate-leaderboard.yml
new file mode 100644
index 0000000..12da0e6
--- /dev/null
+++ b/.github/workflows/validate-leaderboard.yml
@@ -0,0 +1,95 @@
+name: Validate leaderboard
+
+# Runs on every PR that touches an eval result, the build script, or the
+# Space app. Builds the leaderboard JSON and asserts it is well-formed,
+# without uploading anything. Publish happens separately on push to main.
+
+on:
+ pull_request:
+ paths:
+ - 'data/evaluation/**'
+ - 'scripts/build_leaderboard.py'
+ - 'sob-leaderboard/**'
+ - '.github/workflows/validate-leaderboard.yml'
+ - '.github/workflows/publish-leaderboard.yml'
+
+permissions:
+ contents: read
+ pull-requests: write
+
+jobs:
+ validate:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+ with:
+ lfs: true
+
+ - uses: actions/setup-python@v5
+ with:
+ python-version: '3.12'
+
+ - name: Build leaderboard JSON
+ run: python scripts/build_leaderboard.py --output leaderboard.json | tee build.log
+
+ - name: Validate JSON shape
+ run: |
+ python - <<'PY'
+ import json
+ d = json.load(open("leaderboard.json"))
+ rows = d.get("rows") or []
+ assert rows, "no rows in leaderboard.json"
+ required = {"model", "overall", "value_accuracy", "faithfulness",
+ "json_pass_rate", "path_recall", "structure_coverage", "type_safety"}
+ for i, row in enumerate(rows):
+ missing = required - set(row)
+ assert not missing, f"row {i} ({row.get('model')}) missing: {missing}"
+ for k in ("overall", "value_accuracy", "faithfulness",
+ "json_pass_rate", "path_recall", "structure_coverage", "type_safety"):
+ v = row[k]
+ assert v is None or 0.0 <= v <= 1.0, f"{row.get('model')} {k}={v} out of [0,1]"
+ print(f"ok: {len(rows)} rows valid")
+ PY
+
+ - name: Upload leaderboard.json artifact
+ uses: actions/upload-artifact@v4
+ with:
+ name: leaderboard-json
+ path: leaderboard.json
+
+ - name: Comment leaderboard preview on PR
+ # Fork PRs only get a read-only GITHUB_TOKEN, so the comment step
+ # will 403; the build/validate above still gates merge correctly.
+ continue-on-error: true
+ uses: actions/github-script@v7
+ with:
+ script: |
+ const fs = require('fs');
+ const d = JSON.parse(fs.readFileSync('leaderboard.json', 'utf8'));
+ const rows = d.rows;
+ const fmt = (v) => v == null ? 'โ' : v.toFixed(3);
+ const top = rows.slice(0, 10).map((r, i) =>
+ `| ${i+1} | ${r.model} | ${fmt(r.overall)} | ${fmt(r.value_accuracy)} | ${fmt(r.json_pass_rate)} | ${fmt(r.perfect_response)} |`
+ ).join('\n');
+ const body = [
+ '### ๐ Leaderboard preview',
+ '',
+ `Built **${rows.length} models**, top 10 by Overall:`,
+ '',
+ '| Rank | Model | Overall | Val. Acc. | JSON Pass | Perfect |',
+ '| :--- | :---- | :-----: | :-------: | :-------: | :-----: |',
+ top,
+ '',
+ `_Generated at ${d.generated_at} โข full JSON in workflow artifacts_`,
+ ].join('\n');
+
+ const { owner, repo } = context.repo;
+ const issue_number = context.issue.number;
+ const { data: comments } = await github.rest.issues.listComments({ owner, repo, issue_number });
+ const marker = '### ๐ Leaderboard preview';
+ const existing = comments.find(c => c.user.type === 'Bot' && c.body.startsWith(marker));
+ if (existing) {
+ await github.rest.issues.updateComment({ owner, repo, comment_id: existing.id, body });
+ } else {
+ await github.rest.issues.createComment({ owner, repo, issue_number, body });
+ }
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..6549eec
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,214 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[codz]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+# Usually these files are written by a python script from a template
+# before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py.cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+# For a library or package, you might want to ignore these files since the code is
+# intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+# However, in case of collaboration, if having platform-specific dependencies or dependencies
+# having no cross-platform support, pipenv may install dependencies that don't work, or not
+# install all needed dependencies.
+#Pipfile.lock
+
+# UV
+# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
+# This is especially recommended for binary packages to ensure reproducibility, and is more
+# commonly ignored for libraries.
+#uv.lock
+
+# poetry
+# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+# This is especially recommended for binary packages to ensure reproducibility, and is more
+# commonly ignored for libraries.
+# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+#poetry.toml
+
+# pdm
+# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
+# https://pdm-project.org/en/latest/usage/project/#working-with-version-control
+#pdm.lock
+#pdm.toml
+.pdm-python
+.pdm-build/
+
+# pixi
+# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
+#pixi.lock
+# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
+# in the .venv directory. It is recommended not to include this directory in version control.
+.pixi
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.envrc
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+# and can be added to the global gitignore or merged into this file. For a more nuclear
+# option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+
+# Abstra
+# Abstra is an AI-powered process automation framework.
+# Ignore directories containing user credentials, local state, and settings.
+# Learn more at https://abstra.io/docs
+.abstra/
+
+# Visual Studio Code
+# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
+# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
+# and can be added to the global gitignore or merged into this file. However, if you prefer,
+# you could uncomment the following to ignore the entire vscode folder
+# .vscode/
+
+# Ruff stuff:
+.ruff_cache/
+
+# PyPI configuration file
+.pypirc
+
+# Cursor
+# Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
+# exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
+# refer to https://docs.cursor.com/context/ignore-files
+.cursorignore
+.cursorindexingignore
+
+# Marimo
+marimo/_static/
+marimo/_lsp/
+__marimo__/
+
+# Project-specific
+.codex
+logs/
+data/checkpoints/
+data/dataset/
+leaderboard.json
diff --git a/.python-version b/.python-version
new file mode 100644
index 0000000..e4fba21
--- /dev/null
+++ b/.python-version
@@ -0,0 +1 @@
+3.12
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..74aa255
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2026 JigsawStack, Inc.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..0bb2c5a
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,24 @@
+.PHONY: install format lint
+
+PYTHON := $(shell command -v python3 2>/dev/null || command -v python 2>/dev/null || command -v py 2>/dev/null)
+
+install:
+ @if command -v uv >/dev/null 2>&1; then \
+ uv sync; \
+ else \
+ $(PYTHON) -m pip install -r requirements.txt; \
+ fi
+
+format: install
+ @if command -v uv >/dev/null 2>&1; then \
+ uv run ruff format .; \
+ else \
+ $(PYTHON) -m ruff format .; \
+ fi
+
+lint: install
+ @if command -v uv >/dev/null 2>&1; then \
+ uv run ruff check .; \
+ else \
+ $(PYTHON) -m ruff check .; \
+ fi
\ No newline at end of file
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..53ae2a3
--- /dev/null
+++ b/README.md
@@ -0,0 +1,213 @@
+
+
The Structured Output Benchmark
+ SOB ยท A multi-source benchmark for evaluating structured-output quality in LLMs
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Leaderboard ยท
+ Quickstart ยท
+ Installation ยท
+ Inference ยท
+ Evaluation ยท
+ Submit a model ยท
+ Citation
+
+
+---
+
+**SOB** measures **value-level correctness** of LLM-generated JSON, not just *whether the JSON is valid*. We evaluate models across **three source modalities** โ text, images, and audio โ under a single unified evaluation framework.
+
+## ๐ Leaderboard
+
+Top 5 by **Overall** (coverage-adjusted aggregate across text + image + audio). The full live leaderboard is on the [SOB Leaderboard Space](https://huggingface.co/spaces/interfaze-ai/sob-leaderboard) โ it auto-updates whenever a model PR lands.
+
+| Rank | Model | Overall | Val. Acc. | Faithful. | JSON Pass | Path Rec. | Str. Cov. | Type Saf. | Perfect |
+| :--- | :----------------- | :-------: | :-------: | :-------: | :-------: | :-------: | :-------: | :-------: | :-----: |
+| 1 | **GPT-5.4** | **0.870** | 0.798 | **0.869** | **0.993** | **0.988** | **0.981** | **0.993** | 0.469 |
+| 2 | GLM-4.7 | 0.861 | **0.804** | 0.868 | 0.965 | 0.959 | 0.957 | 0.965 | **0.508** |
+| 3 | Qwen3.5-35B | 0.861 | 0.801 | 0.863 | 0.969 | 0.962 | 0.960 | 0.969 | 0.500 |
+| 4 | Gemini-2.5-Flash | 0.860 | 0.796 | 0.856 | 0.972 | 0.967 | 0.961 | 0.972 | 0.498 |
+| 5 | Qwen3-235B | 0.857 | 0.786 | 0.854 | 0.978 | 0.970 | 0.968 | 0.978 | 0.463 |
+
+Per-modality bests: **text 0.830 (GLM-4.7) ยท image 0.672 (Gemma-4-31B) ยท audio 0.237 (Gemini-2.5-Flash)** โ see paper Tables 2โ4. Perfect Response is aggregated over text + image only.
+
+**All 21 rows + per-modality leaderboards โ** [interfaze-ai/sob-leaderboard](https://huggingface.co/spaces/interfaze-ai/sob-leaderboard)
+
+## Quickstart
+
+Load the dataset directly:
+
+```python
+from datasets import load_dataset
+text = load_dataset("interfaze-ai/sob", split="test") # 5,000 records
+image = load_dataset("interfaze-ai/sob", "image", split="train") # 209 records
+audio = load_dataset("interfaze-ai/sob", "audio", split="train") # 115 records
+```
+
+Or run a 5-record smoke test end-to-end:
+
+```bash
+git clone https://github.com/JigsawStack/sob && cd sob
+make install
+export OPENROUTER_API_KEY=...
+python -m sob.run --provider openrouter --modality text \
+ --model-id google/gemma-4-31b-it --sample-size 5
+python evaluate.py data/text_responses/response_google_gemma-4-31b-it.jsonl
+```
+
+## Installation
+
+Python 3.12, clean virtualenv:
+
+```bash
+git clone https://github.com/JigsawStack/sob && cd sob
+uv venv && source .venv/bin/activate
+make install
+```
+
+`make install` uses `uv sync` if available, otherwise falls back to `pip install -r requirements.txt`. Other targets:
+
+```bash
+make format # ruff format .
+make lint # ruff check .
+```
+
+For local vLLM inference (NVIDIA GPU, CUDA 12.8, โฅ 24 GB VRAM):
+
+```bash
+uv pip install vllm --extra-index-url https://download.pytorch.org/whl/cu128
+```
+
+### API keys
+
+```bash
+export OPENROUTER_API_KEY=...
+export OPENAI_API_KEY=...
+export ANTHROPIC_API_KEY=...
+export GEMINI_API_KEY=...
+export HF_TOKEN=... # only if the dataset is private
+```
+
+### Git LFS
+
+Response files and per-model evaluations under `data/` are LFS-tracked:
+
+```bash
+git lfs install
+```
+
+## Running inference
+
+`--modality text` runs the **test** split (5,000 records); `image` and `audio` use the single `train` split for those configs (209 / 115). Omit `--sample-size` for the full run.
+
+**OpenRouter:**
+
+```bash
+python -m sob.run --provider openrouter --modality text \
+ --model-id google/gemma-4-31b-it --sample-size 100
+```
+
+**OpenAI:**
+
+```bash
+python -m sob.run --provider openai --modality image --model-id gpt-5
+```
+
+**Anthropic:**
+
+```bash
+python -m sob.run --provider anthropic --modality audio --model-id claude-sonnet-4-6
+```
+
+**Gemini:**
+
+```bash
+python -m sob.run --provider gemini --modality text --model-id gemini-2.5-flash
+```
+
+**vLLM** (open-weight, your GPU):
+
+```bash
+python -m sob.run --provider vllm --modality text \
+ --model-id Qwen/Qwen3.5-35B-A3B --use-structured-decoding
+```
+
+`--use-structured-decoding` is the schema-constrained ablation from paper ยง6.2; the headline leaderboard runs without it.
+
+Outputs:
+
+- `data/text_responses/response_.jsonl`
+- `data/images_responses/response__image.jsonl`
+- `data/audio_responses/response__audio.jsonl`
+
+## Evaluation
+
+Score a single response file:
+
+```bash
+python evaluate.py data/text_responses/response_google_gemma-4-31b-it.jsonl
+```
+
+Produces `data/evaluation///{eval_records.jsonl, eval_summary.json}` โ every paper number is reproducible from these summaries. Or score a whole directory:
+
+```bash
+python evaluate.py data/text_responses/ # all response_*.jsonl
+python evaluate.py data/audio_responses/ --modality audio
+```
+
+## Submitting a new model
+
+The leaderboard is rebuilt from `data/evaluation/` by [`scripts/build_leaderboard.py`](scripts/build_leaderboard.py) on every push to `main`, published to the [`interfaze-ai/sob-leaderboard`](https://huggingface.co/datasets/interfaze-ai/sob-leaderboard) dataset, and rendered by the [Space](https://huggingface.co/spaces/interfaze-ai/sob-leaderboard).
+
+1. Fork, run inference + `evaluate.py` for one or more modalities, and drop the resulting `eval_summary.json` files into `data/evaluation/{text,image,audio}//`.
+2. Add an entry for `` in [`data/evaluation/display_names.json`](data/evaluation/display_names.json). The `_comment` key is ignored โ paste your `"": ""` alongside the others.
+3. Open a PR โ CI builds the leaderboard JSON and posts a top-10 preview comment to verify the row before merge.
+4. On merge to `main`, the publish workflow uploads a fresh `leaderboard.json` to the dataset and the Space picks it up.
+
+Preview locally before opening a PR:
+
+```bash
+python scripts/build_leaderboard.py --output leaderboard.json
+```
+
+## Citation
+
+```bibtex
+@inproceedings{singh2026sob,
+ title = {The Structured Output Benchmark: A Multi-Source Benchmark for Evaluating Structured Output Quality in Large Language Models},
+ author = {Singh, Abhinav Kumar and Khurdula, Harsha Vardhan and Khemlani, Yoeven D and Agarwal, Vineet},
+ booktitle = {NeurIPS 2026 Evaluations and Datasets Track},
+ year = {2026},
+ publisher = {JigsawStack, Inc.}
+}
+```
+
+## License
+
+[MIT License](LICENSE). Source datasets retain their original licenses: HotpotQA (CC-BY-SA-4.0), AMI Meeting Corpus (CC-BY-4.0), olmOCR-bench / olmOCR (ODC-BY / Apache-2.0).
+
+## Acknowledgments
+
+The HotpotQA team, the AMI Meeting Corpus team, and the Allen AI olmOCR team for releasing their datasets.
+
+## Contact
+
+Open an [issue](https://github.com/JigsawStack/sob/issues) or reach the authors at `{abhinav, harsha, yoeven, vineet}@interfaze.ai`.
diff --git a/data/audio_responses/response_DeepSeek-R1-Distill-Qwen-32B_audio.jsonl b/data/audio_responses/response_DeepSeek-R1-Distill-Qwen-32B_audio.jsonl
new file mode 100644
index 0000000..e2f6cd9
--- /dev/null
+++ b/data/audio_responses/response_DeepSeek-R1-Distill-Qwen-32B_audio.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9aee02363dbad69dee3ffa5b3fc0974281f135eca9325600957a92151861e80c
+size 5598000
diff --git a/data/audio_responses/response_Ministral-3-14B-Instruct-2512_audio.jsonl b/data/audio_responses/response_Ministral-3-14B-Instruct-2512_audio.jsonl
new file mode 100644
index 0000000..57189a6
--- /dev/null
+++ b/data/audio_responses/response_Ministral-3-14B-Instruct-2512_audio.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7661f938794e15e8d28778183affda06f8e63d8413c0121d771356d29e7092c4
+size 5697549
diff --git a/data/audio_responses/response_NVIDIA-Nemotron-3-Nano-30B-A3B-BF16_audio.jsonl b/data/audio_responses/response_NVIDIA-Nemotron-3-Nano-30B-A3B-BF16_audio.jsonl
new file mode 100644
index 0000000..4e863c3
--- /dev/null
+++ b/data/audio_responses/response_NVIDIA-Nemotron-3-Nano-30B-A3B-BF16_audio.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ca4bfdd6c91c23f24ed334c8b1a29143cbc5c6f716dc3e11ed8bb68268a50708
+size 5685914
diff --git a/data/audio_responses/response_Qwen3-235B-A22B-Instruct-2507_audio.jsonl b/data/audio_responses/response_Qwen3-235B-A22B-Instruct-2507_audio.jsonl
new file mode 100644
index 0000000..a669d9a
--- /dev/null
+++ b/data/audio_responses/response_Qwen3-235B-A22B-Instruct-2507_audio.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dc9143161967706fb1339791f2e28962260d5c8a0783213c3f69d9ae3ab3c918
+size 5692736
diff --git a/data/audio_responses/response_Qwen3-30B-A3B-Instruct-2507_audio.jsonl b/data/audio_responses/response_Qwen3-30B-A3B-Instruct-2507_audio.jsonl
new file mode 100644
index 0000000..8dc65de
--- /dev/null
+++ b/data/audio_responses/response_Qwen3-30B-A3B-Instruct-2507_audio.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3ee032193fe78cb22e6dd8a9851cf936efa2c8be20801a5ecf0a34396f6eaf9d
+size 5709349
diff --git a/data/audio_responses/response_Qwen3.5-35B-A3B_audio.jsonl b/data/audio_responses/response_Qwen3.5-35B-A3B_audio.jsonl
new file mode 100644
index 0000000..4e55f0b
--- /dev/null
+++ b/data/audio_responses/response_Qwen3.5-35B-A3B_audio.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a5bc069149642c75b3871ee4b2dcd637d48ff4bb77d1efe8f2a1accf8900aeb0
+size 5678218
diff --git a/data/audio_responses/response_claude-sonnet-4-6_audio.jsonl b/data/audio_responses/response_claude-sonnet-4-6_audio.jsonl
new file mode 100644
index 0000000..b15a7c2
--- /dev/null
+++ b/data/audio_responses/response_claude-sonnet-4-6_audio.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e25073697ccf01a74dbfd90fb2ce9b375b8cc24cb5b69dbc7b45fd228080cea8
+size 5843741
diff --git a/data/audio_responses/response_gemini-2.5-flash_audio.jsonl b/data/audio_responses/response_gemini-2.5-flash_audio.jsonl
new file mode 100644
index 0000000..55115a0
--- /dev/null
+++ b/data/audio_responses/response_gemini-2.5-flash_audio.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ca9a42e62a4154745d204b49bafb2969cfe110a2d480cf7c3f66e6e30c404dfe
+size 5736853
diff --git a/data/audio_responses/response_gemini-3-flash_audio.jsonl b/data/audio_responses/response_gemini-3-flash_audio.jsonl
new file mode 100644
index 0000000..cf28e28
--- /dev/null
+++ b/data/audio_responses/response_gemini-3-flash_audio.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:25d8cd8648c54c2a52224df392e63ca589bcdefcbcbfe650a893496845eb821b
+size 5642424
diff --git a/data/audio_responses/response_gemma-3-27b-it_audio.jsonl b/data/audio_responses/response_gemma-3-27b-it_audio.jsonl
new file mode 100644
index 0000000..7946a6f
--- /dev/null
+++ b/data/audio_responses/response_gemma-3-27b-it_audio.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:900ce51096571de08cd52cda6a8c3839103a6fb3a5002ba9e0ff94937c84ce03
+size 5629543
diff --git a/data/audio_responses/response_gemma-4-31b-it_audio.jsonl b/data/audio_responses/response_gemma-4-31b-it_audio.jsonl
new file mode 100644
index 0000000..b5a0cd6
--- /dev/null
+++ b/data/audio_responses/response_gemma-4-31b-it_audio.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f59c74cfcecff2fee414a809a8f00b17f825bce8fb93ad913eab0d39a32722de
+size 5641169
diff --git a/data/audio_responses/response_gpt-oss_audio.jsonl b/data/audio_responses/response_gpt-oss_audio.jsonl
new file mode 100644
index 0000000..36374af
--- /dev/null
+++ b/data/audio_responses/response_gpt-oss_audio.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1e4e709c051106bf8abf1bdab2f29c74946acf3a59eb00123745558592acfb1b
+size 5992038
diff --git a/data/audio_responses/response_ibm-granite-4.0-h-small_audio.jsonl b/data/audio_responses/response_ibm-granite-4.0-h-small_audio.jsonl
new file mode 100644
index 0000000..6368cb4
--- /dev/null
+++ b/data/audio_responses/response_ibm-granite-4.0-h-small_audio.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:531737e24ad23024716a9060240e2bfe2536eeae88555938f22ab9dad8603746
+size 5646025
diff --git a/data/audio_responses/response_inference-net-Schematron-8B_audio.jsonl b/data/audio_responses/response_inference-net-Schematron-8B_audio.jsonl
new file mode 100644
index 0000000..0046512
--- /dev/null
+++ b/data/audio_responses/response_inference-net-Schematron-8B_audio.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a1000a61338adee2e40c720067c49d1d767d1bf073d6db918caa5e4b31c68041
+size 5728300
diff --git a/data/audio_responses/response_interfaze-beta_audio.jsonl b/data/audio_responses/response_interfaze-beta_audio.jsonl
new file mode 100644
index 0000000..49284b0
--- /dev/null
+++ b/data/audio_responses/response_interfaze-beta_audio.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ecc87093ae620f38d0e978fde406947f7a81b5a95485397aedb481cb49dd4c19
+size 5648300
diff --git a/data/audio_responses/response_openai-gpt-4.1_audio.jsonl b/data/audio_responses/response_openai-gpt-4.1_audio.jsonl
new file mode 100644
index 0000000..de7001e
--- /dev/null
+++ b/data/audio_responses/response_openai-gpt-4.1_audio.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2d3f5644190c5c1a06ec0e3f06c4865c4980dc31a2a45e38ed66790f7f6fc7e0
+size 5748444
diff --git a/data/audio_responses/response_openai-gpt-5-4_audio.jsonl b/data/audio_responses/response_openai-gpt-5-4_audio.jsonl
new file mode 100644
index 0000000..2f7c21a
--- /dev/null
+++ b/data/audio_responses/response_openai-gpt-5-4_audio.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d73eeaee03c284d2f4100834f871aedc7f12506614496293ecc835b8f32cb04a
+size 5815981
diff --git a/data/audio_responses/response_openai-gpt-5-mini_audio.jsonl b/data/audio_responses/response_openai-gpt-5-mini_audio.jsonl
new file mode 100644
index 0000000..4258c9b
--- /dev/null
+++ b/data/audio_responses/response_openai-gpt-5-mini_audio.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b9ab33e02bd2ba216387b5df2e6061ce20479184afaee0a871ef2582ac6283ca
+size 5799830
diff --git a/data/audio_responses/response_openai-gpt-5_audio.jsonl b/data/audio_responses/response_openai-gpt-5_audio.jsonl
new file mode 100644
index 0000000..9f00f6f
--- /dev/null
+++ b/data/audio_responses/response_openai-gpt-5_audio.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9aac006c3979ccb38e3178b3ea57d075e65a46ddc43c39cf4e4b0f5905e1de6b
+size 5766626
diff --git a/data/audio_responses/response_zai-org-GLM-4.7_audio.jsonl b/data/audio_responses/response_zai-org-GLM-4.7_audio.jsonl
new file mode 100644
index 0000000..ab35da8
--- /dev/null
+++ b/data/audio_responses/response_zai-org-GLM-4.7_audio.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7e1d8a650334bcc34e4382b70f275c9a27a170dd32897ce500a7b49fde09cb0a
+size 5684156
diff --git a/data/evaluation/audio/DeepSeek-R1-Distill-Qwen-32B/eval_records.jsonl b/data/evaluation/audio/DeepSeek-R1-Distill-Qwen-32B/eval_records.jsonl
new file mode 100644
index 0000000..c2c07d6
--- /dev/null
+++ b/data/evaluation/audio/DeepSeek-R1-Distill-Qwen-32B/eval_records.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a1d718a7b415b2d293f2c2e36e241a071755933801f3835596a6e48dc09c3817
+size 73378
diff --git a/data/evaluation/audio/DeepSeek-R1-Distill-Qwen-32B/eval_summary.json b/data/evaluation/audio/DeepSeek-R1-Distill-Qwen-32B/eval_summary.json
new file mode 100644
index 0000000..c18b8d3
--- /dev/null
+++ b/data/evaluation/audio/DeepSeek-R1-Distill-Qwen-32B/eval_summary.json
@@ -0,0 +1,264 @@
+{
+ "response_file": "data/audio_responses/response_DeepSeek-R1-Distill-Qwen-32B_audio.jsonl",
+ "num_records": 115,
+ "model_ids": [
+ "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B"
+ ],
+ "data_quality": {
+ "json_parse_fail_count": 0,
+ "json_non_structured_root_count": 0,
+ "invalid_schema_input_count": 0,
+ "unknown_difficulty_count": 0,
+ "malformed_jsonl_line_count": 0
+ },
+ "summary": {
+ "overall": {
+ "n": 115,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.9652173913043478,
+ "ci95_low": 0.9304347826086956,
+ "ci95_high": 0.991304347826087,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.17898986079995355,
+ "ci95_low": 0.14738968959875198,
+ "ci95_high": 0.2071164357682992,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.3329117853360891,
+ "ci95_low": 0.2934893568810859,
+ "ci95_high": 0.379643057876666,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.7600316089103358,
+ "ci95_low": 0.7175945476564607,
+ "ci95_high": 0.7971862634045108,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.8186346051099261,
+ "ci95_low": 0.7802114076034392,
+ "ci95_high": 0.8521880644619918,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.9652173913043478,
+ "ci95_low": 0.9304347826086956,
+ "ci95_high": 0.991304347826087,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.0,
+ "ci95_low": 0.0,
+ "ci95_high": 0.0,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.4239777516821262,
+ "ci95_low": 0.39027083762391784,
+ "ci95_high": 0.4550665295891178,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.916356462572874,
+ "ci95_low": 0.8794707935016464,
+ "ci95_high": 0.9441683106312027,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.25595082306802136,
+ "ci95_low": 0.22260147602552507,
+ "ci95_high": 0.2928590255996577,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.9768115942028985,
+ "ci95_low": 0.9536231884057972,
+ "ci95_high": 0.9942028985507246,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.0,
+ "ci95_low": 0.0,
+ "ci95_high": 0.0,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ }
+ },
+ "overall_weighted": {
+ "n": 115,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.9650145772594753,
+ "ci95_low": 0.9298245614035088,
+ "ci95_high": 0.9912790697674418,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.18003353345767925,
+ "ci95_low": 0.1502274490958075,
+ "ci95_high": 0.21068211263083153,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.3328006398593711,
+ "ci95_low": 0.29260753566565834,
+ "ci95_high": 0.3744962123054128,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.759246155655555,
+ "ci95_low": 0.7198954959600902,
+ "ci95_high": 0.7987898803751113,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.8180629895906448,
+ "ci95_low": 0.7821204624780069,
+ "ci95_high": 0.850153756988263,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.9650145772594753,
+ "ci95_low": 0.9298245614035088,
+ "ci95_high": 0.9912790697674418,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.0,
+ "ci95_low": 0.0,
+ "ci95_high": 0.0,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.4240267763242017,
+ "ci95_low": 0.3919682722706874,
+ "ci95_high": 0.45954600703343723,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.9160307147031985,
+ "ci95_low": 0.8762967928840606,
+ "ci95_high": 0.9430876234418136,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.2564170866585252,
+ "ci95_low": 0.2218621607063843,
+ "ci95_high": 0.29191699747812266,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.9766763848396501,
+ "ci95_low": 0.9532163742690059,
+ "ci95_high": 0.9941860465116279,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.0,
+ "ci95_low": 0.0,
+ "ci95_high": 0.0,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ },
+ "weighting": "schema_complexity",
+ "weight_field_priority": [
+ "schema_complexity",
+ "difficulty"
+ ],
+ "difficulty_weights": {
+ "easy": 1.0,
+ "medium": 2.0,
+ "hard": 3.0
+ }
+ }
+ }
+}
diff --git a/data/evaluation/audio/Ministral-3-14B-Instruct-2512/eval_records.jsonl b/data/evaluation/audio/Ministral-3-14B-Instruct-2512/eval_records.jsonl
new file mode 100644
index 0000000..0857d25
--- /dev/null
+++ b/data/evaluation/audio/Ministral-3-14B-Instruct-2512/eval_records.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bed9abaad0024200e20817b93aa2629fb3c23e7701cdb15c3ba741088be9416c
+size 72650
diff --git a/data/evaluation/audio/Ministral-3-14B-Instruct-2512/eval_summary.json b/data/evaluation/audio/Ministral-3-14B-Instruct-2512/eval_summary.json
new file mode 100644
index 0000000..ee3882f
--- /dev/null
+++ b/data/evaluation/audio/Ministral-3-14B-Instruct-2512/eval_summary.json
@@ -0,0 +1,264 @@
+{
+ "response_file": "data/audio_responses/response_Ministral-3-14B-Instruct-2512_audio.jsonl",
+ "num_records": 115,
+ "model_ids": [
+ "mistralai/Ministral-3-14B-Instruct-2512"
+ ],
+ "data_quality": {
+ "json_parse_fail_count": 0,
+ "json_non_structured_root_count": 0,
+ "invalid_schema_input_count": 0,
+ "unknown_difficulty_count": 0,
+ "malformed_jsonl_line_count": 0
+ },
+ "summary": {
+ "overall": {
+ "n": 115,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.9043478260869565,
+ "ci95_low": 0.8521739130434782,
+ "ci95_high": 0.9565217391304348,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.1894342180802461,
+ "ci95_low": 0.16031354258671127,
+ "ci95_high": 0.218482406106874,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.3868308115693064,
+ "ci95_low": 0.3486874314805658,
+ "ci95_high": 0.4315632906562883,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.8223048454836833,
+ "ci95_low": 0.7705501613002181,
+ "ci95_high": 0.8707789532204558,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.8209346214892864,
+ "ci95_low": 0.7701276233260085,
+ "ci95_high": 0.8686345652012101,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.9043478260869565,
+ "ci95_low": 0.8521739130434782,
+ "ci95_high": 0.9565217391304348,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.0,
+ "ci95_low": 0.0,
+ "ci95_high": 0.0,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.4661899583777453,
+ "ci95_low": 0.42768249107518697,
+ "ci95_high": 0.5001771132539948,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.8765434245543998,
+ "ci95_low": 0.8206476211455304,
+ "ci95_high": 0.9217550055439274,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.28813251482477625,
+ "ci95_low": 0.2550822939187982,
+ "ci95_high": 0.3227849178758579,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.9362318840579711,
+ "ci95_low": 0.8956521739130435,
+ "ci95_high": 0.9710144927536233,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.0,
+ "ci95_low": 0.0,
+ "ci95_high": 0.0,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ }
+ },
+ "overall_weighted": {
+ "n": 115,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.9037900874635568,
+ "ci95_low": 0.8425655976676385,
+ "ci95_high": 0.956268221574344,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.19053879078042246,
+ "ci95_low": 0.16285539963756604,
+ "ci95_high": 0.21962479659127457,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.38643614009113264,
+ "ci95_low": 0.34561591921066465,
+ "ci95_high": 0.4276565532681572,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.8215756118068965,
+ "ci95_low": 0.7708642566898538,
+ "ci95_high": 0.8663184709540567,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.8201973981682174,
+ "ci95_low": 0.7678117450572333,
+ "ci95_high": 0.8657153728419635,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.9037900874635568,
+ "ci95_low": 0.8430232558139535,
+ "ci95_high": 0.956268221574344,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.0,
+ "ci95_low": 0.0,
+ "ci95_high": 0.0,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.46618351422615056,
+ "ci95_low": 0.4285247703666592,
+ "ci95_high": 0.502527192812768,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.8759258576984437,
+ "ci95_low": 0.8186779297140533,
+ "ci95_high": 0.9264339786302783,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.2884874654357775,
+ "ci95_low": 0.25542721621883935,
+ "ci95_high": 0.32071033335067733,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.9358600583090378,
+ "ci95_low": 0.8997050147492626,
+ "ci95_high": 0.9708454810495627,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.0,
+ "ci95_low": 0.0,
+ "ci95_high": 0.0,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ },
+ "weighting": "schema_complexity",
+ "weight_field_priority": [
+ "schema_complexity",
+ "difficulty"
+ ],
+ "difficulty_weights": {
+ "easy": 1.0,
+ "medium": 2.0,
+ "hard": 3.0
+ }
+ }
+ }
+}
diff --git a/data/evaluation/audio/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16/eval_records.jsonl b/data/evaluation/audio/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16/eval_records.jsonl
new file mode 100644
index 0000000..d6769c3
--- /dev/null
+++ b/data/evaluation/audio/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16/eval_records.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b18e1bdaae5d9eed3ce788f19b811cf3b0070aff0f1be5a687f71200994dbc5b
+size 73473
diff --git a/data/evaluation/audio/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16/eval_summary.json b/data/evaluation/audio/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16/eval_summary.json
new file mode 100644
index 0000000..79658ee
--- /dev/null
+++ b/data/evaluation/audio/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16/eval_summary.json
@@ -0,0 +1,264 @@
+{
+ "response_file": "data/audio_responses/response_NVIDIA-Nemotron-3-Nano-30B-A3B-BF16_audio.jsonl",
+ "num_records": 115,
+ "model_ids": [
+ "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16"
+ ],
+ "data_quality": {
+ "json_parse_fail_count": 1,
+ "json_non_structured_root_count": 1,
+ "invalid_schema_input_count": 0,
+ "unknown_difficulty_count": 0,
+ "malformed_jsonl_line_count": 0
+ },
+ "summary": {
+ "overall": {
+ "n": 115,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 0.991304347826087,
+ "ci95_low": 0.9739130434782609,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 0.991304347826087,
+ "ci95_low": 0.9739130434782609,
+ "ci95_high": 1.0,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.991304347826087,
+ "ci95_low": 0.9739130434782609,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.151577860934308,
+ "ci95_low": 0.12670875384637348,
+ "ci95_high": 0.17586151557197793,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.31961724817011555,
+ "ci95_low": 0.2860862684653814,
+ "ci95_high": 0.35673469795262547,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.8745097602239374,
+ "ci95_low": 0.844108760628685,
+ "ci95_high": 0.8997852664616466,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.8525252466745608,
+ "ci95_low": 0.827731205190022,
+ "ci95_high": 0.8741804578570854,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.991304347826087,
+ "ci95_low": 0.9739130434782609,
+ "ci95_high": 1.0,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.0,
+ "ci95_low": 0.0,
+ "ci95_high": 0.0,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.44856828977612034,
+ "ci95_low": 0.42298156838107903,
+ "ci95_high": 0.47159542249413,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.9450446474422449,
+ "ci95_low": 0.9255259242774997,
+ "ci95_high": 0.9574291970687064,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.2355975545522118,
+ "ci95_low": 0.20980671781943605,
+ "ci95_high": 0.2666779367897788,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.991304347826087,
+ "ci95_low": 0.9739130434782609,
+ "ci95_high": 1.0,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.0,
+ "ci95_low": 0.0,
+ "ci95_high": 0.0,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ }
+ },
+ "overall_weighted": {
+ "n": 115,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 0.9912536443148688,
+ "ci95_low": 0.9735294117647059,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 0.9912536443148688,
+ "ci95_low": 0.9735294117647059,
+ "ci95_high": 1.0,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.9912536443148688,
+ "ci95_low": 0.9652173913043478,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.15215480718496072,
+ "ci95_low": 0.12949931944743262,
+ "ci95_high": 0.17630603485265273,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.3190259626498831,
+ "ci95_low": 0.2856287256893974,
+ "ci95_high": 0.3552974098898152,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.8737780387092083,
+ "ci95_low": 0.8409784039446543,
+ "ci95_high": 0.9001532180410742,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.8520697564397408,
+ "ci95_low": 0.8284072738496586,
+ "ci95_high": 0.8721182857634245,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.9912536443148688,
+ "ci95_low": 0.9736842105263158,
+ "ci95_high": 1.0,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.0,
+ "ci95_low": 0.0,
+ "ci95_high": 0.0,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.44831960284801736,
+ "ci95_low": 0.42419646524860194,
+ "ci95_high": 0.4751728371799867,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.9448590150231594,
+ "ci95_low": 0.924560775253555,
+ "ci95_high": 0.9575107249262862,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.23559038491742187,
+ "ci95_low": 0.20612900116137603,
+ "ci95_high": 0.26375937046258463,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.9912536443148688,
+ "ci95_low": 0.9734513274336283,
+ "ci95_high": 1.0,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.0,
+ "ci95_low": 0.0,
+ "ci95_high": 0.0,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ },
+ "weighting": "schema_complexity",
+ "weight_field_priority": [
+ "schema_complexity",
+ "difficulty"
+ ],
+ "difficulty_weights": {
+ "easy": 1.0,
+ "medium": 2.0,
+ "hard": 3.0
+ }
+ }
+ }
+}
diff --git a/data/evaluation/audio/Qwen3-235B-A22B-Instruct-2507/eval_records.jsonl b/data/evaluation/audio/Qwen3-235B-A22B-Instruct-2507/eval_records.jsonl
new file mode 100644
index 0000000..a38f139
--- /dev/null
+++ b/data/evaluation/audio/Qwen3-235B-A22B-Instruct-2507/eval_records.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:89d3127f700ed785c5862a6e9b696750a55945fdb9ecd76a9b9fddbf2df20180
+size 72491
diff --git a/data/evaluation/audio/Qwen3-235B-A22B-Instruct-2507/eval_summary.json b/data/evaluation/audio/Qwen3-235B-A22B-Instruct-2507/eval_summary.json
new file mode 100644
index 0000000..09eed30
--- /dev/null
+++ b/data/evaluation/audio/Qwen3-235B-A22B-Instruct-2507/eval_summary.json
@@ -0,0 +1,264 @@
+{
+ "response_file": "data/audio_responses/response_Qwen3-235B-A22B-Instruct-2507_audio.jsonl",
+ "num_records": 115,
+ "model_ids": [
+ "Qwen/Qwen3-235B-A22B-Instruct-2507"
+ ],
+ "data_quality": {
+ "json_parse_fail_count": 0,
+ "json_non_structured_root_count": 0,
+ "invalid_schema_input_count": 0,
+ "unknown_difficulty_count": 0,
+ "malformed_jsonl_line_count": 0
+ },
+ "summary": {
+ "overall": {
+ "n": 115,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.9826086956521739,
+ "ci95_low": 0.9565217391304348,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.2173330483141894,
+ "ci95_low": 0.1849070569591469,
+ "ci95_high": 0.25291168229995886,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.4335420209640275,
+ "ci95_low": 0.39671761722154736,
+ "ci95_high": 0.4777794917917517,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.8664646156184306,
+ "ci95_low": 0.8343664613820998,
+ "ci95_high": 0.8943779096525558,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.8899864538544172,
+ "ci95_low": 0.8604004840026385,
+ "ci95_high": 0.9149752223692527,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.9826086956521739,
+ "ci95_low": 0.9565217391304348,
+ "ci95_high": 1.0,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.0,
+ "ci95_low": 0.0,
+ "ci95_high": 0.0,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.5057798949655492,
+ "ci95_low": 0.47569344955056614,
+ "ci95_high": 0.5357612396403303,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.9517346150529217,
+ "ci95_low": 0.9253268887776264,
+ "ci95_high": 0.9710070627270684,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.32543753463910846,
+ "ci95_low": 0.2932708528641611,
+ "ci95_high": 0.3603876734033322,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.9884057971014493,
+ "ci95_low": 0.9710144927536233,
+ "ci95_high": 1.0,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.0,
+ "ci95_low": 0.0,
+ "ci95_high": 0.0,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ }
+ },
+ "overall_weighted": {
+ "n": 115,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.9825072886297376,
+ "ci95_low": 0.9563953488372093,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.2170100499211102,
+ "ci95_low": 0.18754505813984265,
+ "ci95_high": 0.2502573462555236,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.4330504892668873,
+ "ci95_low": 0.3961661362789984,
+ "ci95_high": 0.4727742227237597,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.8658394284761107,
+ "ci95_low": 0.8353157361590775,
+ "ci95_high": 0.8948913933163393,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.8896251790658627,
+ "ci95_low": 0.8606081349517137,
+ "ci95_high": 0.9124666996069264,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.9825072886297376,
+ "ci95_low": 0.956140350877193,
+ "ci95_high": 1.0,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.0,
+ "ci95_low": 0.0,
+ "ci95_high": 0.0,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.5052999892213693,
+ "ci95_low": 0.47572834134424663,
+ "ci95_high": 0.5346651401184934,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.9515465854417793,
+ "ci95_low": 0.9246273296338423,
+ "ci95_high": 0.9708058978545069,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.3250302695939987,
+ "ci95_low": 0.2952605070982098,
+ "ci95_high": 0.3575089629475446,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.9883381924198251,
+ "ci95_low": 0.9707602339181286,
+ "ci95_high": 1.0,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.0,
+ "ci95_low": 0.0,
+ "ci95_high": 0.0,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ },
+ "weighting": "schema_complexity",
+ "weight_field_priority": [
+ "schema_complexity",
+ "difficulty"
+ ],
+ "difficulty_weights": {
+ "easy": 1.0,
+ "medium": 2.0,
+ "hard": 3.0
+ }
+ }
+ }
+}
diff --git a/data/evaluation/audio/Qwen3-30B-A3B-Instruct-2507/eval_records.jsonl b/data/evaluation/audio/Qwen3-30B-A3B-Instruct-2507/eval_records.jsonl
new file mode 100644
index 0000000..76d522b
--- /dev/null
+++ b/data/evaluation/audio/Qwen3-30B-A3B-Instruct-2507/eval_records.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:20ddfb0c5926c6a8ab861e33cd8b6d9e8285a3eb6694261b9403bfa6a4f23c78
+size 72382
diff --git a/data/evaluation/audio/Qwen3-30B-A3B-Instruct-2507/eval_summary.json b/data/evaluation/audio/Qwen3-30B-A3B-Instruct-2507/eval_summary.json
new file mode 100644
index 0000000..290615c
--- /dev/null
+++ b/data/evaluation/audio/Qwen3-30B-A3B-Instruct-2507/eval_summary.json
@@ -0,0 +1,264 @@
+{
+ "response_file": "data/audio_responses/response_Qwen3-30B-A3B-Instruct-2507_audio.jsonl",
+ "num_records": 115,
+ "model_ids": [
+ "Qwen/Qwen3-30B-A3B-Instruct-2507"
+ ],
+ "data_quality": {
+ "json_parse_fail_count": 0,
+ "json_non_structured_root_count": 0,
+ "invalid_schema_input_count": 0,
+ "unknown_difficulty_count": 0,
+ "malformed_jsonl_line_count": 0
+ },
+ "summary": {
+ "overall": {
+ "n": 115,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.991304347826087,
+ "ci95_low": 0.9739130434782609,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.19684189232606458,
+ "ci95_low": 0.16625172723838663,
+ "ci95_high": 0.22910395652568327,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.39450184216905454,
+ "ci95_low": 0.3591788535601328,
+ "ci95_high": 0.4330124833604759,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.8784115376415182,
+ "ci95_low": 0.8505344141002975,
+ "ci95_high": 0.903028725140316,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.8862593882121056,
+ "ci95_low": 0.8615307509816879,
+ "ci95_high": 0.9067183821064985,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.991304347826087,
+ "ci95_low": 0.9739130434782609,
+ "ci95_high": 1.0,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.0,
+ "ci95_low": 0.0,
+ "ci95_high": 0.0,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.48991842404554575,
+ "ci95_low": 0.4626504800733364,
+ "ci95_high": 0.5140304760879899,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.9562893612880932,
+ "ci95_low": 0.9325442187892024,
+ "ci95_high": 0.9686383018582763,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.2956718672475595,
+ "ci95_low": 0.26454669094181665,
+ "ci95_high": 0.3300849155671355,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.9942028985507246,
+ "ci95_low": 0.9826086956521739,
+ "ci95_high": 1.0,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.0,
+ "ci95_low": 0.0,
+ "ci95_high": 0.0,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ }
+ },
+ "overall_weighted": {
+ "n": 115,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.9912536443148688,
+ "ci95_low": 0.9736070381231672,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.19692949413660615,
+ "ci95_low": 0.16715251119120092,
+ "ci95_high": 0.22753561172224926,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.3940608241817464,
+ "ci95_low": 0.35829987372750594,
+ "ci95_high": 0.4299848786093709,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.8778560118521025,
+ "ci95_low": 0.8495839664477937,
+ "ci95_high": 0.9035468570885135,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.8860633975027196,
+ "ci95_low": 0.8618174023303097,
+ "ci95_high": 0.9053864704022218,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.9912536443148688,
+ "ci95_low": 0.9736070381231672,
+ "ci95_high": 1.0,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.0,
+ "ci95_low": 0.0,
+ "ci95_high": 0.0,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.4896154433901518,
+ "ci95_low": 0.4626583630393019,
+ "ci95_high": 0.5187933981904828,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.956190228710819,
+ "ci95_low": 0.9360750060590338,
+ "ci95_high": 0.9684787222304081,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.29549515915917635,
+ "ci95_low": 0.2636338581774627,
+ "ci95_high": 0.3278267638384661,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.9941690962099126,
+ "ci95_low": 0.9824046920821115,
+ "ci95_high": 1.0,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.0,
+ "ci95_low": 0.0,
+ "ci95_high": 0.0,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ },
+ "weighting": "schema_complexity",
+ "weight_field_priority": [
+ "schema_complexity",
+ "difficulty"
+ ],
+ "difficulty_weights": {
+ "easy": 1.0,
+ "medium": 2.0,
+ "hard": 3.0
+ }
+ }
+ }
+}
diff --git a/data/evaluation/audio/Qwen3.5-35B-A3B/eval_records.jsonl b/data/evaluation/audio/Qwen3.5-35B-A3B/eval_records.jsonl
new file mode 100644
index 0000000..13a94fa
--- /dev/null
+++ b/data/evaluation/audio/Qwen3.5-35B-A3B/eval_records.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eb20c598a2eb95ece4d279df94e839d5b9a01781e979dc7a2ce2037671bce250
+size 70761
diff --git a/data/evaluation/audio/Qwen3.5-35B-A3B/eval_summary.json b/data/evaluation/audio/Qwen3.5-35B-A3B/eval_summary.json
new file mode 100644
index 0000000..310c962
--- /dev/null
+++ b/data/evaluation/audio/Qwen3.5-35B-A3B/eval_summary.json
@@ -0,0 +1,264 @@
+{
+ "response_file": "data/audio_responses/response_Qwen3.5-35B-A3B_audio.jsonl",
+ "num_records": 115,
+ "model_ids": [
+ "Qwen/Qwen3.5-35B-A3B"
+ ],
+ "data_quality": {
+ "json_parse_fail_count": 0,
+ "json_non_structured_root_count": 0,
+ "invalid_schema_input_count": 0,
+ "unknown_difficulty_count": 0,
+ "malformed_jsonl_line_count": 0
+ },
+ "summary": {
+ "overall": {
+ "n": 115,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.9652173913043478,
+ "ci95_low": 0.9304347826086956,
+ "ci95_high": 0.991304347826087,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.21520958936056125,
+ "ci95_low": 0.18413675546228014,
+ "ci95_high": 0.24700422882620335,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.4409981491748027,
+ "ci95_low": 0.4031597753446333,
+ "ci95_high": 0.48379487295088275,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.8684769955516382,
+ "ci95_low": 0.8315903030711295,
+ "ci95_high": 0.902314829885168,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.8721890684371824,
+ "ci95_low": 0.8351595353907607,
+ "ci95_high": 0.9026444562872105,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.9652173913043478,
+ "ci95_low": 0.9304347826086956,
+ "ci95_high": 0.991304347826087,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.0,
+ "ci95_low": 0.0,
+ "ci95_high": 0.0,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.5082282446956674,
+ "ci95_low": 0.47715176208373883,
+ "ci95_high": 0.5378975020909359,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.934207950348626,
+ "ci95_low": 0.8995590516074293,
+ "ci95_high": 0.9625937689147661,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.328103869267682,
+ "ci95_low": 0.29608465888483215,
+ "ci95_high": 0.36429607398236513,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.9768115942028985,
+ "ci95_low": 0.9536231884057972,
+ "ci95_high": 0.9942028985507246,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.0,
+ "ci95_low": 0.0,
+ "ci95_high": 0.0,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ }
+ },
+ "overall_weighted": {
+ "n": 115,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.9650145772594753,
+ "ci95_low": 0.9296187683284457,
+ "ci95_high": 0.991304347826087,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.21540429144535644,
+ "ci95_low": 0.18809527966292172,
+ "ci95_high": 0.24787486518319565,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.44028484522749434,
+ "ci95_low": 0.3985409155588139,
+ "ci95_high": 0.48117974866137053,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.8677100975665166,
+ "ci95_low": 0.8276990172013519,
+ "ci95_high": 0.8999637577512384,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.8717828212262254,
+ "ci95_low": 0.8364382312240174,
+ "ci95_high": 0.9009618923505894,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.9650145772594753,
+ "ci95_low": 0.9217391304347826,
+ "ci95_high": 0.991304347826087,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.0,
+ "ci95_low": 0.0,
+ "ci95_high": 0.0,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.5077997447464557,
+ "ci95_low": 0.4752733395379202,
+ "ci95_high": 0.5394260892181673,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.933937325248392,
+ "ci95_low": 0.8986341223292782,
+ "ci95_high": 0.9626918753206848,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.3278445683364254,
+ "ci95_low": 0.2931951068366996,
+ "ci95_high": 0.36273009855268296,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.9766763848396501,
+ "ci95_low": 0.9534883720930233,
+ "ci95_high": 0.9942028985507246,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.0,
+ "ci95_low": 0.0,
+ "ci95_high": 0.0,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ },
+ "weighting": "schema_complexity",
+ "weight_field_priority": [
+ "schema_complexity",
+ "difficulty"
+ ],
+ "difficulty_weights": {
+ "easy": 1.0,
+ "medium": 2.0,
+ "hard": 3.0
+ }
+ }
+ }
+}
diff --git a/data/evaluation/audio/claude-sonnet-4-6/eval_records.jsonl b/data/evaluation/audio/claude-sonnet-4-6/eval_records.jsonl
new file mode 100644
index 0000000..9b410ba
--- /dev/null
+++ b/data/evaluation/audio/claude-sonnet-4-6/eval_records.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6e8d4a51cefa626aba4423d9bc6eaa8a25687dde8c1db8cc133e5db04b8e7bda
+size 70009
diff --git a/data/evaluation/audio/claude-sonnet-4-6/eval_summary.json b/data/evaluation/audio/claude-sonnet-4-6/eval_summary.json
new file mode 100644
index 0000000..769da9c
--- /dev/null
+++ b/data/evaluation/audio/claude-sonnet-4-6/eval_summary.json
@@ -0,0 +1,264 @@
+{
+ "response_file": "data/audio_responses/response_claude-sonnet-4-6_audio.jsonl",
+ "num_records": 115,
+ "model_ids": [
+ "claude-sonnet-4-6"
+ ],
+ "data_quality": {
+ "json_parse_fail_count": 0,
+ "json_non_structured_root_count": 0,
+ "invalid_schema_input_count": 0,
+ "unknown_difficulty_count": 0,
+ "malformed_jsonl_line_count": 0
+ },
+ "summary": {
+ "overall": {
+ "n": 115,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.9565217391304348,
+ "ci95_low": 0.9217391304347826,
+ "ci95_high": 0.991304347826087,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.20016078837795664,
+ "ci95_low": 0.16974025208949503,
+ "ci95_high": 0.23302572473336608,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.420246223758364,
+ "ci95_low": 0.38517597372371826,
+ "ci95_high": 0.4585651541608854,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.923056143457486,
+ "ci95_low": 0.8852768484420277,
+ "ci95_high": 0.9569683377143458,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.8614706174503426,
+ "ci95_low": 0.8202871165461174,
+ "ci95_high": 0.8949979625732924,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.9565217391304348,
+ "ci95_low": 0.9130434782608695,
+ "ci95_high": 0.991304347826087,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.0,
+ "ci95_low": 0.0,
+ "ci95_high": 0.0,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.514487718531269,
+ "ci95_low": 0.4829910597319649,
+ "ci95_high": 0.5434698043421542,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.9248380319037375,
+ "ci95_low": 0.8839075165267541,
+ "ci95_high": 0.9575596932195619,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.31020350606816033,
+ "ci95_low": 0.27552627660418666,
+ "ci95_high": 0.3448886890448519,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.9710144927536233,
+ "ci95_low": 0.9478260869565217,
+ "ci95_high": 0.9942028985507246,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.0,
+ "ci95_low": 0.0,
+ "ci95_high": 0.0,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ }
+ },
+ "overall_weighted": {
+ "n": 115,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.956268221574344,
+ "ci95_low": 0.9127906976744186,
+ "ci95_high": 0.9912280701754386,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.2005327834334337,
+ "ci95_low": 0.17058529051758092,
+ "ci95_high": 0.2342717737893348,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.42006910025683963,
+ "ci95_low": 0.3834315400918656,
+ "ci95_high": 0.4590192545366525,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.9226074912327484,
+ "ci95_low": 0.8821600287912809,
+ "ci95_high": 0.9565658171831976,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.8613033818952337,
+ "ci95_low": 0.8229630006060684,
+ "ci95_high": 0.8940429097145464,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.956268221574344,
+ "ci95_low": 0.9125364431486881,
+ "ci95_high": 0.9912023460410557,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.0,
+ "ci95_low": 0.0,
+ "ci95_high": 0.0,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.5144031249743406,
+ "ci95_low": 0.4821578578384746,
+ "ci95_high": 0.5435775177767942,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.9246132750146405,
+ "ci95_low": 0.8877916993096939,
+ "ci95_high": 0.9586982234487992,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.3103009418451366,
+ "ci95_low": 0.27777939335510404,
+ "ci95_high": 0.34067392451866924,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.9708454810495627,
+ "ci95_low": 0.9416909620991254,
+ "ci95_high": 0.9941690962099126,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.0,
+ "ci95_low": 0.0,
+ "ci95_high": 0.0,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ },
+ "weighting": "schema_complexity",
+ "weight_field_priority": [
+ "schema_complexity",
+ "difficulty"
+ ],
+ "difficulty_weights": {
+ "easy": 1.0,
+ "medium": 2.0,
+ "hard": 3.0
+ }
+ }
+ }
+}
diff --git a/data/evaluation/audio/gemini-2.5-flash/eval_records.jsonl b/data/evaluation/audio/gemini-2.5-flash/eval_records.jsonl
new file mode 100644
index 0000000..365b4fd
--- /dev/null
+++ b/data/evaluation/audio/gemini-2.5-flash/eval_records.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c065661506c77b78824ffe610a65271ec1037b4dbd17b7d28b2437ca9238cd61
+size 69468
diff --git a/data/evaluation/audio/gemini-2.5-flash/eval_summary.json b/data/evaluation/audio/gemini-2.5-flash/eval_summary.json
new file mode 100644
index 0000000..59a7224
--- /dev/null
+++ b/data/evaluation/audio/gemini-2.5-flash/eval_summary.json
@@ -0,0 +1,264 @@
+{
+ "response_file": "data/audio_responses/response_gemini-2.5-flash_audio.jsonl",
+ "num_records": 115,
+ "model_ids": [
+ "gemini-2.5-flash"
+ ],
+ "data_quality": {
+ "json_parse_fail_count": 6,
+ "json_non_structured_root_count": 6,
+ "invalid_schema_input_count": 0,
+ "unknown_difficulty_count": 0,
+ "malformed_jsonl_line_count": 0
+ },
+ "summary": {
+ "overall": {
+ "n": 115,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 0.9478260869565217,
+ "ci95_low": 0.9043478260869565,
+ "ci95_high": 0.9826086956521739,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 0.9478260869565217,
+ "ci95_low": 0.9043478260869565,
+ "ci95_high": 0.9826086956521739,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.8608695652173913,
+ "ci95_low": 0.7913043478260869,
+ "ci95_high": 0.9217391304347826,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.23661732371321395,
+ "ci95_low": 0.20117200571655602,
+ "ci95_high": 0.27552994808947867,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.46450724566902873,
+ "ci95_low": 0.41355207603221794,
+ "ci95_high": 0.5113132721305818,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.8167104269149895,
+ "ci95_low": 0.7503453697506918,
+ "ci95_high": 0.8793060448423199,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.7920682194547527,
+ "ci95_low": 0.7217651508574049,
+ "ci95_high": 0.8484803651524777,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.8608695652173913,
+ "ci95_low": 0.7913043478260869,
+ "ci95_high": 0.9217391304347826,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.0,
+ "ci95_low": 0.0,
+ "ci95_high": 0.0,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.5059449987657442,
+ "ci95_low": 0.46119458554509035,
+ "ci95_high": 0.5476419755929257,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.8379357832965119,
+ "ci95_low": 0.7704589243715659,
+ "ci95_high": 0.8987731482604431,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.35056228469112133,
+ "ci95_low": 0.3122109995929442,
+ "ci95_high": 0.39194721758098,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.889855072463768,
+ "ci95_low": 0.8318840579710145,
+ "ci95_high": 0.9391304347826087,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.0,
+ "ci95_low": 0.0,
+ "ci95_high": 0.0,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ }
+ },
+ "overall_weighted": {
+ "n": 115,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 0.9475218658892128,
+ "ci95_low": 0.9037900874635568,
+ "ci95_high": 0.9825581395348837,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 0.9475218658892128,
+ "ci95_low": 0.9029411764705882,
+ "ci95_high": 0.9825581395348837,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.8600583090379009,
+ "ci95_low": 0.7906976744186046,
+ "ci95_high": 0.9212827988338192,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.23693685223738328,
+ "ci95_low": 0.2022894689784624,
+ "ci95_high": 0.2732332188897701,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.4637811536642826,
+ "ci95_low": 0.4149778293144418,
+ "ci95_high": 0.5140852516596801,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.8156416830486046,
+ "ci95_low": 0.7484519121011579,
+ "ci95_high": 0.8716024790234644,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.7910573032023333,
+ "ci95_low": 0.733009280035517,
+ "ci95_high": 0.8449132083645086,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.8600583090379009,
+ "ci95_low": 0.7894736842105263,
+ "ci95_high": 0.9130434782608695,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.0,
+ "ci95_low": 0.0,
+ "ci95_high": 0.0,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.5054532296500902,
+ "ci95_low": 0.45928163505473457,
+ "ci95_high": 0.5488039311607088,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.8370579737593783,
+ "ci95_low": 0.7747768655475792,
+ "ci95_high": 0.8975238886596524,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.35035900295083294,
+ "ci95_low": 0.3102747384732858,
+ "ci95_high": 0.39151782160346615,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.8892128279883382,
+ "ci95_low": 0.8352941176470589,
+ "ci95_high": 0.9384164222873901,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.0,
+ "ci95_low": 0.0,
+ "ci95_high": 0.0,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ },
+ "weighting": "schema_complexity",
+ "weight_field_priority": [
+ "schema_complexity",
+ "difficulty"
+ ],
+ "difficulty_weights": {
+ "easy": 1.0,
+ "medium": 2.0,
+ "hard": 3.0
+ }
+ }
+ }
+}
diff --git a/data/evaluation/audio/gemini-3-flash/eval_records.jsonl b/data/evaluation/audio/gemini-3-flash/eval_records.jsonl
new file mode 100644
index 0000000..4fef767
--- /dev/null
+++ b/data/evaluation/audio/gemini-3-flash/eval_records.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a5133399f25ec5a611a017aabda1eae7612c1437bf8c2734b552cf9df713bcf1
+size 69793
diff --git a/data/evaluation/audio/gemini-3-flash/eval_summary.json b/data/evaluation/audio/gemini-3-flash/eval_summary.json
new file mode 100644
index 0000000..6742ec6
--- /dev/null
+++ b/data/evaluation/audio/gemini-3-flash/eval_summary.json
@@ -0,0 +1,264 @@
+{
+ "response_file": "data/audio_responses/response_gemini-3-flash_audio.jsonl",
+ "num_records": 115,
+ "model_ids": [
+ "gemini-3-flash-preview"
+ ],
+ "data_quality": {
+ "json_parse_fail_count": 15,
+ "json_non_structured_root_count": 15,
+ "invalid_schema_input_count": 0,
+ "unknown_difficulty_count": 0,
+ "malformed_jsonl_line_count": 0
+ },
+ "summary": {
+ "overall": {
+ "n": 115,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 0.8695652173913043,
+ "ci95_low": 0.808695652173913,
+ "ci95_high": 0.9304347826086956,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 0.8695652173913043,
+ "ci95_low": 0.8,
+ "ci95_high": 0.9304347826086956,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.7739130434782608,
+ "ci95_low": 0.6956521739130435,
+ "ci95_high": 0.8434782608695652,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.19092496253791677,
+ "ci95_low": 0.15621880626208295,
+ "ci95_high": 0.22749822903603178,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.36821845674205417,
+ "ci95_low": 0.31751208924597785,
+ "ci95_high": 0.41735237085876215,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.6990822881238199,
+ "ci95_low": 0.6217685708661496,
+ "ci95_high": 0.7686919268026687,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.7012698101618149,
+ "ci95_low": 0.6308013740031556,
+ "ci95_high": 0.7664699547112257,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.7739130434782608,
+ "ci95_low": 0.6956521739130435,
+ "ci95_high": 0.8434782608695652,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.0,
+ "ci95_low": 0.0,
+ "ci95_high": 0.0,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.4194085691345969,
+ "ci95_low": 0.3721419967839604,
+ "ci95_high": 0.46380151209537906,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.7496986323727788,
+ "ci95_low": 0.6746138903524292,
+ "ci95_high": 0.8188813319544843,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.2795717096399855,
+ "ci95_low": 0.2396290661392365,
+ "ci95_high": 0.32136822495920675,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.8057971014492754,
+ "ci95_low": 0.736231884057971,
+ "ci95_high": 0.8666666666666667,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.0,
+ "ci95_low": 0.0,
+ "ci95_high": 0.0,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ }
+ },
+ "overall_weighted": {
+ "n": 115,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 0.8688046647230321,
+ "ci95_low": 0.8075801749271136,
+ "ci95_high": 0.9302325581395349,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 0.8688046647230321,
+ "ci95_low": 0.8,
+ "ci95_high": 0.9298245614035088,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.7725947521865889,
+ "ci95_low": 0.6938775510204082,
+ "ci95_high": 0.8513119533527697,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.19044798113739575,
+ "ci95_low": 0.15821936157988672,
+ "ci95_high": 0.2254199946255887,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.36723009522999384,
+ "ci95_low": 0.31821918133185206,
+ "ci95_high": 0.41859689596649446,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.6974811107337179,
+ "ci95_low": 0.6247875855785714,
+ "ci95_high": 0.7672126287934371,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.7000624426797653,
+ "ci95_low": 0.6255562700004903,
+ "ci95_high": 0.7681480562995643,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.7725947521865889,
+ "ci95_low": 0.6938775510204082,
+ "ci95_high": 0.8504398826979472,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.0,
+ "ci95_low": 0.0,
+ "ci95_high": 0.0,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.4183863957003692,
+ "ci95_low": 0.37023834193086674,
+ "ci95_high": 0.46590967593718413,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.7484173156843142,
+ "ci95_low": 0.6746286216994775,
+ "ci95_high": 0.8172896525360409,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.2788390381836948,
+ "ci95_low": 0.23862933568942593,
+ "ci95_high": 0.3181737674595218,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.8046647230320699,
+ "ci95_low": 0.7333333333333333,
+ "ci95_high": 0.8753623188405797,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.0,
+ "ci95_low": 0.0,
+ "ci95_high": 0.0,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ },
+ "weighting": "schema_complexity",
+ "weight_field_priority": [
+ "schema_complexity",
+ "difficulty"
+ ],
+ "difficulty_weights": {
+ "easy": 1.0,
+ "medium": 2.0,
+ "hard": 3.0
+ }
+ }
+ }
+}
diff --git a/data/evaluation/audio/gemma-3-27b-it/eval_records.jsonl b/data/evaluation/audio/gemma-3-27b-it/eval_records.jsonl
new file mode 100644
index 0000000..5e2b535
--- /dev/null
+++ b/data/evaluation/audio/gemma-3-27b-it/eval_records.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fe095a24a65521abe72b0e723bbb3a245e57c01d7901bca5d4e7b7c044879d7f
+size 70567
diff --git a/data/evaluation/audio/gemma-3-27b-it/eval_summary.json b/data/evaluation/audio/gemma-3-27b-it/eval_summary.json
new file mode 100644
index 0000000..b416b4c
--- /dev/null
+++ b/data/evaluation/audio/gemma-3-27b-it/eval_summary.json
@@ -0,0 +1,264 @@
+{
+ "response_file": "data/audio_responses/response_gemma-3-27b-it_audio.jsonl",
+ "num_records": 115,
+ "model_ids": [
+ "google/gemma-3-27b-it"
+ ],
+ "data_quality": {
+ "json_parse_fail_count": 0,
+ "json_non_structured_root_count": 0,
+ "invalid_schema_input_count": 0,
+ "unknown_difficulty_count": 0,
+ "malformed_jsonl_line_count": 0
+ },
+ "summary": {
+ "overall": {
+ "n": 115,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.8869565217391304,
+ "ci95_low": 0.8260869565217391,
+ "ci95_high": 0.9391304347826087,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.1717653514349151,
+ "ci95_low": 0.14145615261791927,
+ "ci95_high": 0.20136059430902212,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.34951376394542105,
+ "ci95_low": 0.30649581152938093,
+ "ci95_high": 0.39437754811535725,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.7493667653480155,
+ "ci95_low": 0.6913592172265975,
+ "ci95_high": 0.7998772700276529,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.7858121851065563,
+ "ci95_low": 0.7309408116653888,
+ "ci95_high": 0.8381098324846292,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.8869565217391304,
+ "ci95_low": 0.8260869565217391,
+ "ci95_high": 0.9391304347826087,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.0,
+ "ci95_low": 0.0,
+ "ci95_high": 0.0,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.4235486269094506,
+ "ci95_low": 0.3862172989218324,
+ "ci95_high": 0.458569510230797,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.8532417428616058,
+ "ci95_low": 0.7959824782270689,
+ "ci95_high": 0.9047129931008376,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.26063955769016806,
+ "ci95_low": 0.22708561588299195,
+ "ci95_high": 0.2972238839686948,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.9246376811594202,
+ "ci95_low": 0.8840579710144928,
+ "ci95_high": 0.9594202898550724,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.0,
+ "ci95_low": 0.0,
+ "ci95_high": 0.0,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ }
+ },
+ "overall_weighted": {
+ "n": 115,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.8862973760932945,
+ "ci95_low": 0.8245614035087719,
+ "ci95_high": 0.938953488372093,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.17276689867360265,
+ "ci95_low": 0.14342125233699807,
+ "ci95_high": 0.2027242626428895,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.3493877758276355,
+ "ci95_low": 0.30552831084799387,
+ "ci95_high": 0.39218622821564847,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.7482122367433239,
+ "ci95_low": 0.6914773465207806,
+ "ci95_high": 0.8030042597222274,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.7848701662380662,
+ "ci95_low": 0.7303242942964129,
+ "ci95_high": 0.8357333275406418,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.8862973760932945,
+ "ci95_low": 0.8255813953488372,
+ "ci95_high": 0.9391304347826087,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.0,
+ "ci95_low": 0.0,
+ "ci95_high": 0.0,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.4234556370815207,
+ "ci95_low": 0.388048919093289,
+ "ci95_high": 0.46182869829152884,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.8524883061415516,
+ "ci95_low": 0.7952949587567733,
+ "ci95_high": 0.9040194918814656,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.2610773372506191,
+ "ci95_low": 0.2268204717846509,
+ "ci95_high": 0.29674939513878085,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.924198250728863,
+ "ci95_low": 0.8782608695652174,
+ "ci95_high": 0.9594202898550724,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.0,
+ "ci95_low": 0.0,
+ "ci95_high": 0.0,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ },
+ "weighting": "schema_complexity",
+ "weight_field_priority": [
+ "schema_complexity",
+ "difficulty"
+ ],
+ "difficulty_weights": {
+ "easy": 1.0,
+ "medium": 2.0,
+ "hard": 3.0
+ }
+ }
+ }
+}
diff --git a/data/evaluation/audio/gemma-4-31b-it/eval_records.jsonl b/data/evaluation/audio/gemma-4-31b-it/eval_records.jsonl
new file mode 100644
index 0000000..7263a6a
--- /dev/null
+++ b/data/evaluation/audio/gemma-4-31b-it/eval_records.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0d0a71d3fb3b54c7343040cef22f532157d9cd5b315677395cf6581d3f2edb69
+size 68590
diff --git a/data/evaluation/audio/gemma-4-31b-it/eval_summary.json b/data/evaluation/audio/gemma-4-31b-it/eval_summary.json
new file mode 100644
index 0000000..ce21a9e
--- /dev/null
+++ b/data/evaluation/audio/gemma-4-31b-it/eval_summary.json
@@ -0,0 +1,264 @@
+{
+ "response_file": "data/audio_responses/response_gemma-4-31b-it_audio.jsonl",
+ "num_records": 115,
+ "model_ids": [
+ "gemma-4-31b-it"
+ ],
+ "data_quality": {
+ "json_parse_fail_count": 9,
+ "json_non_structured_root_count": 9,
+ "invalid_schema_input_count": 0,
+ "unknown_difficulty_count": 0,
+ "malformed_jsonl_line_count": 0
+ },
+ "summary": {
+ "overall": {
+ "n": 115,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 0.9217391304347826,
+ "ci95_low": 0.8695652173913043,
+ "ci95_high": 0.9652173913043478,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 0.9217391304347826,
+ "ci95_low": 0.8695652173913043,
+ "ci95_high": 0.9652173913043478,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.7739130434782608,
+ "ci95_low": 0.7043478260869566,
+ "ci95_high": 0.8434782608695652,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.17734685749049478,
+ "ci95_low": 0.1406798846482258,
+ "ci95_high": 0.2157053625934928,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.37656279416795235,
+ "ci95_low": 0.3265896176885625,
+ "ci95_high": 0.4258484736630576,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.6794316268349846,
+ "ci95_low": 0.607422088524736,
+ "ci95_high": 0.7457605958808317,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.7010951045023723,
+ "ci95_low": 0.6280103800612882,
+ "ci95_high": 0.7752135310810917,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.7739130434782608,
+ "ci95_low": 0.6956521739130435,
+ "ci95_high": 0.8434782608695652,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.0,
+ "ci95_low": 0.0,
+ "ci95_high": 0.0,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.4111137594978106,
+ "ci95_low": 0.3660352002159086,
+ "ci95_high": 0.4580533909837083,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.7496403971529646,
+ "ci95_low": 0.6727891802488667,
+ "ci95_high": 0.816571974138853,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.27695482582922354,
+ "ci95_low": 0.23704546740523288,
+ "ci95_high": 0.318339810222142,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.8231884057971015,
+ "ci95_low": 0.7623188405797102,
+ "ci95_high": 0.8840579710144928,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.0,
+ "ci95_low": 0.0,
+ "ci95_high": 0.0,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ }
+ },
+ "overall_weighted": {
+ "n": 115,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 0.9212827988338192,
+ "ci95_low": 0.868421052631579,
+ "ci95_high": 0.9651162790697675,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 0.9212827988338192,
+ "ci95_low": 0.868421052631579,
+ "ci95_high": 0.9650145772594753,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.7725947521865889,
+ "ci95_low": 0.6938775510204082,
+ "ci95_high": 0.8473053892215568,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.17838094995399617,
+ "ci95_low": 0.14544097186845628,
+ "ci95_high": 0.21226502603108224,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.37506969245066707,
+ "ci95_low": 0.3256912721197849,
+ "ci95_high": 0.4231368957967361,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.6781343542474447,
+ "ci95_low": 0.6066697052471903,
+ "ci95_high": 0.741310912605868,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.6996530194066852,
+ "ci95_low": 0.6275246980099639,
+ "ci95_high": 0.7696546940342126,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.7725947521865889,
+ "ci95_low": 0.6929824561403509,
+ "ci95_high": 0.8430232558139535,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.0,
+ "ci95_low": 0.0,
+ "ci95_high": 0.0,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.41052833221736934,
+ "ci95_low": 0.3640397333821325,
+ "ci95_high": 0.4583481730972259,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.7482808412599543,
+ "ci95_low": 0.6692601640133323,
+ "ci95_high": 0.818361660953208,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.2767253212023317,
+ "ci95_low": 0.23613385523760125,
+ "ci95_high": 0.3172397443728542,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.8221574344023324,
+ "ci95_low": 0.7616279069767442,
+ "ci95_high": 0.8866279069767442,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.0,
+ "ci95_low": 0.0,
+ "ci95_high": 0.0,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ },
+ "weighting": "schema_complexity",
+ "weight_field_priority": [
+ "schema_complexity",
+ "difficulty"
+ ],
+ "difficulty_weights": {
+ "easy": 1.0,
+ "medium": 2.0,
+ "hard": 3.0
+ }
+ }
+ }
+}
diff --git a/data/evaluation/audio/gpt-oss/eval_records.jsonl b/data/evaluation/audio/gpt-oss/eval_records.jsonl
new file mode 100644
index 0000000..140d8ee
--- /dev/null
+++ b/data/evaluation/audio/gpt-oss/eval_records.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d241c871e044b25206a07801884d8fa67ef1ae848903f16b46937fc8208ce7f8
+size 67969
diff --git a/data/evaluation/audio/gpt-oss/eval_summary.json b/data/evaluation/audio/gpt-oss/eval_summary.json
new file mode 100644
index 0000000..ac07d6b
--- /dev/null
+++ b/data/evaluation/audio/gpt-oss/eval_summary.json
@@ -0,0 +1,264 @@
+{
+ "response_file": "data/audio_responses/response_gpt-oss_audio.jsonl",
+ "num_records": 115,
+ "model_ids": [
+ "openai/gpt-oss-20b"
+ ],
+ "data_quality": {
+ "json_parse_fail_count": 53,
+ "json_non_structured_root_count": 53,
+ "invalid_schema_input_count": 0,
+ "unknown_difficulty_count": 0,
+ "malformed_jsonl_line_count": 0
+ },
+ "summary": {
+ "overall": {
+ "n": 115,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 0.5391304347826087,
+ "ci95_low": 0.4434782608695652,
+ "ci95_high": 0.6260869565217392,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 0.5391304347826087,
+ "ci95_low": 0.4434782608695652,
+ "ci95_high": 0.6260869565217392,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.5391304347826087,
+ "ci95_low": 0.4434782608695652,
+ "ci95_high": 0.6260869565217392,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.087780999683714,
+ "ci95_low": 0.06403489647331755,
+ "ci95_high": 0.11184872878121542,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.18552367876615575,
+ "ci95_low": 0.14888042397729367,
+ "ci95_high": 0.22654958997573085,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.4992791946294463,
+ "ci95_low": 0.4135495366687181,
+ "ci95_high": 0.5790084702209605,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.4682520421896049,
+ "ci95_low": 0.39013429069496314,
+ "ci95_high": 0.5482398338966474,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.5391304347826087,
+ "ci95_low": 0.4434782608695652,
+ "ci95_high": 0.6260869565217392,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.0,
+ "ci95_low": 0.0,
+ "ci95_high": 0.0,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.2575279576931054,
+ "ci95_low": 0.21009699443404564,
+ "ci95_high": 0.3047131345878343,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.5155043039182741,
+ "ci95_low": 0.4311309715813897,
+ "ci95_high": 0.597713418729902,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.1366523392249349,
+ "ci95_low": 0.10823844631698255,
+ "ci95_high": 0.16769995479742728,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.5391304347826087,
+ "ci95_low": 0.45217391304347826,
+ "ci95_high": 0.6347826086956522,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.0,
+ "ci95_low": 0.0,
+ "ci95_high": 0.0,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ }
+ },
+ "overall_weighted": {
+ "n": 115,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 0.5393586005830904,
+ "ci95_low": 0.45058139534883723,
+ "ci95_high": 0.6366279069767442,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 0.5393586005830904,
+ "ci95_low": 0.4473684210526316,
+ "ci95_high": 0.6239067055393586,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.5393586005830904,
+ "ci95_low": 0.4489795918367347,
+ "ci95_high": 0.6355685131195336,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.08829284224746745,
+ "ci95_low": 0.06607089765025743,
+ "ci95_high": 0.11023481536990189,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.18489656556319695,
+ "ci95_low": 0.14873646029420928,
+ "ci95_high": 0.22715381425822342,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.4992749916826792,
+ "ci95_low": 0.40813814123139663,
+ "ci95_high": 0.5828520222494427,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.4683098772266677,
+ "ci95_low": 0.3921492433124941,
+ "ci95_high": 0.5415416923026193,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.5393586005830904,
+ "ci95_low": 0.45321637426900585,
+ "ci95_high": 0.631578947368421,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.0,
+ "ci95_low": 0.0,
+ "ci95_high": 0.0,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.25748813316444785,
+ "ci95_low": 0.21095366116607744,
+ "ci95_high": 0.304395730134038,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.5156756927976162,
+ "ci95_low": 0.4305953257922563,
+ "ci95_high": 0.6078419439811253,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.13659470390533218,
+ "ci95_low": 0.10663955326822294,
+ "ci95_high": 0.1647452998740339,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.5393586005830904,
+ "ci95_low": 0.4434782608695652,
+ "ci95_high": 0.6297376093294461,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.0,
+ "ci95_low": 0.0,
+ "ci95_high": 0.0,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ },
+ "weighting": "schema_complexity",
+ "weight_field_priority": [
+ "schema_complexity",
+ "difficulty"
+ ],
+ "difficulty_weights": {
+ "easy": 1.0,
+ "medium": 2.0,
+ "hard": 3.0
+ }
+ }
+ }
+}
diff --git a/data/evaluation/audio/ibm-granite-4.0-h-small/eval_records.jsonl b/data/evaluation/audio/ibm-granite-4.0-h-small/eval_records.jsonl
new file mode 100644
index 0000000..6136148
--- /dev/null
+++ b/data/evaluation/audio/ibm-granite-4.0-h-small/eval_records.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:198b3aaa6bdf045d1a48a51065e7afe3ce0485e18e710e4aaff414a77434802a
+size 72326
diff --git a/data/evaluation/audio/ibm-granite-4.0-h-small/eval_summary.json b/data/evaluation/audio/ibm-granite-4.0-h-small/eval_summary.json
new file mode 100644
index 0000000..41e1f9e
--- /dev/null
+++ b/data/evaluation/audio/ibm-granite-4.0-h-small/eval_summary.json
@@ -0,0 +1,264 @@
+{
+ "response_file": "data/audio_responses/response_ibm-granite-4.0-h-small_audio.jsonl",
+ "num_records": 115,
+ "model_ids": [
+ "ibm-granite/granite-4.0-h-small"
+ ],
+ "data_quality": {
+ "json_parse_fail_count": 2,
+ "json_non_structured_root_count": 2,
+ "invalid_schema_input_count": 0,
+ "unknown_difficulty_count": 0,
+ "malformed_jsonl_line_count": 0
+ },
+ "summary": {
+ "overall": {
+ "n": 115,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 0.9826086956521739,
+ "ci95_low": 0.9565217391304348,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 0.9826086956521739,
+ "ci95_low": 0.9565217391304348,
+ "ci95_high": 1.0,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.9565217391304348,
+ "ci95_low": 0.9130434782608695,
+ "ci95_high": 0.991304347826087,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.15959750723916355,
+ "ci95_low": 0.13294018312608144,
+ "ci95_high": 0.18548604734436647,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.32507069644261244,
+ "ci95_low": 0.29112922647754225,
+ "ci95_high": 0.3656670672434472,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.7798625238737148,
+ "ci95_low": 0.7357585372125726,
+ "ci95_high": 0.8191680712437149,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.8255478017928248,
+ "ci95_low": 0.785468262942776,
+ "ci95_high": 0.8573187865012646,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.9565217391304348,
+ "ci95_low": 0.9130434782608695,
+ "ci95_high": 0.991304347826087,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.0,
+ "ci95_low": 0.0,
+ "ci95_high": 0.0,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.4215102425184969,
+ "ci95_low": 0.39087218442747934,
+ "ci95_high": 0.45089272758682214,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.9128637600178982,
+ "ci95_low": 0.8720674007374946,
+ "ci95_high": 0.9431244689863129,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.242334101840888,
+ "ci95_low": 0.2138222964364514,
+ "ci95_high": 0.2727319522115769,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.9652173913043478,
+ "ci95_low": 0.9333333333333332,
+ "ci95_high": 0.991304347826087,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.0,
+ "ci95_low": 0.0,
+ "ci95_high": 0.0,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ }
+ },
+ "overall_weighted": {
+ "n": 115,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 0.9825072886297376,
+ "ci95_low": 0.956140350877193,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 0.9825072886297376,
+ "ci95_low": 0.9558823529411765,
+ "ci95_high": 1.0,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.956268221574344,
+ "ci95_low": 0.9201183431952663,
+ "ci95_high": 0.9912790697674418,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.16052810494901287,
+ "ci95_low": 0.13382140599078812,
+ "ci95_high": 0.18650882386799922,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.32442907573892116,
+ "ci95_low": 0.28724754915273665,
+ "ci95_high": 0.3631448067188117,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.779304299020941,
+ "ci95_low": 0.7404081561782935,
+ "ci95_high": 0.8196012683605033,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.8249933579511799,
+ "ci95_low": 0.7837276557326035,
+ "ci95_high": 0.8577527885992285,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.956268221574344,
+ "ci95_low": 0.9130434782608695,
+ "ci95_high": 0.9912536443148688,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.0,
+ "ci95_low": 0.0,
+ "ci95_high": 0.0,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.4214204932362917,
+ "ci95_low": 0.39126395022466676,
+ "ci95_high": 0.4535840020211524,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.912509933699956,
+ "ci95_low": 0.8727157850127626,
+ "ci95_high": 0.9446079911748322,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.24247859034396702,
+ "ci95_low": 0.21199266262017524,
+ "ci95_high": 0.27394144284942373,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.9650145772594753,
+ "ci95_low": 0.9302325581395349,
+ "ci95_high": 0.9941176470588236,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.0,
+ "ci95_low": 0.0,
+ "ci95_high": 0.0,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ },
+ "weighting": "schema_complexity",
+ "weight_field_priority": [
+ "schema_complexity",
+ "difficulty"
+ ],
+ "difficulty_weights": {
+ "easy": 1.0,
+ "medium": 2.0,
+ "hard": 3.0
+ }
+ }
+ }
+}
diff --git a/data/evaluation/audio/inference-net-Schematron-8B/eval_records.jsonl b/data/evaluation/audio/inference-net-Schematron-8B/eval_records.jsonl
new file mode 100644
index 0000000..4bce444
--- /dev/null
+++ b/data/evaluation/audio/inference-net-Schematron-8B/eval_records.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:505f14c832890de158a721c7194ddb7cbc6a592826e1f31dd6cb540910b604f9
+size 71466
diff --git a/data/evaluation/audio/inference-net-Schematron-8B/eval_summary.json b/data/evaluation/audio/inference-net-Schematron-8B/eval_summary.json
new file mode 100644
index 0000000..21c1eb0
--- /dev/null
+++ b/data/evaluation/audio/inference-net-Schematron-8B/eval_summary.json
@@ -0,0 +1,264 @@
+{
+ "response_file": "data/audio_responses/response_inference-net-Schematron-8B_audio.jsonl",
+ "num_records": 115,
+ "model_ids": [
+ "inference-net/Schematron-8B"
+ ],
+ "data_quality": {
+ "json_parse_fail_count": 1,
+ "json_non_structured_root_count": 1,
+ "invalid_schema_input_count": 0,
+ "unknown_difficulty_count": 0,
+ "malformed_jsonl_line_count": 0
+ },
+ "summary": {
+ "overall": {
+ "n": 115,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 0.991304347826087,
+ "ci95_low": 0.9739130434782609,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 0.991304347826087,
+ "ci95_low": 0.9739130434782609,
+ "ci95_high": 1.0,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.9652173913043478,
+ "ci95_low": 0.9304347826086956,
+ "ci95_high": 0.991304347826087,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.1553911740613997,
+ "ci95_low": 0.1320029894899545,
+ "ci95_high": 0.18016190005013116,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.3220076966940882,
+ "ci95_low": 0.2902274354971941,
+ "ci95_high": 0.35735263602546713,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.8922674613889068,
+ "ci95_low": 0.8557805779516507,
+ "ci95_high": 0.9242919680580224,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.8266846007038038,
+ "ci95_low": 0.7888370279609189,
+ "ci95_high": 0.857294443756498,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.9652173913043478,
+ "ci95_low": 0.9304347826086956,
+ "ci95_high": 0.991304347826087,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.0,
+ "ci95_low": 0.0,
+ "ci95_high": 0.0,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.4565554440481316,
+ "ci95_low": 0.4317603602626809,
+ "ci95_high": 0.4807628669543965,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.9190397944374997,
+ "ci95_low": 0.8819838340422165,
+ "ci95_high": 0.9464866615135176,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.23869943537774396,
+ "ci95_low": 0.20953234770885412,
+ "ci95_high": 0.2669433853600214,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.9739130434782609,
+ "ci95_low": 0.9478260869565217,
+ "ci95_high": 0.9942028985507246,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.0,
+ "ci95_low": 0.0,
+ "ci95_high": 0.0,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ }
+ },
+ "overall_weighted": {
+ "n": 115,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 0.9912536443148688,
+ "ci95_low": 0.9736070381231672,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 0.9912536443148688,
+ "ci95_low": 0.9735294117647059,
+ "ci95_high": 1.0,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.9650145772594753,
+ "ci95_low": 0.9298245614035088,
+ "ci95_high": 0.991304347826087,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.15629724504718046,
+ "ci95_low": 0.1327274435582804,
+ "ci95_high": 0.179951591810721,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.32292701639735444,
+ "ci95_low": 0.2923462296177095,
+ "ci95_high": 0.3547654006201007,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.8917927281577849,
+ "ci95_low": 0.8535384296282597,
+ "ci95_high": 0.9275291389267839,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.8267056367249518,
+ "ci95_low": 0.7911870254288785,
+ "ci95_high": 0.8570216421064648,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.9650145772594753,
+ "ci95_low": 0.9215116279069767,
+ "ci95_high": 0.991304347826087,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.0,
+ "ci95_low": 0.0,
+ "ci95_high": 0.0,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.45700566320077324,
+ "ci95_low": 0.43197767334012077,
+ "ci95_high": 0.48241223877526634,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.9189115970813007,
+ "ci95_low": 0.8824537321793952,
+ "ci95_high": 0.9475233612230993,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.23961213072226742,
+ "ci95_low": 0.21300428328120372,
+ "ci95_high": 0.268032518309985,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.9737609329446064,
+ "ci95_low": 0.9441176470588235,
+ "ci95_high": 0.9942028985507246,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.0,
+ "ci95_low": 0.0,
+ "ci95_high": 0.0,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ },
+ "weighting": "schema_complexity",
+ "weight_field_priority": [
+ "schema_complexity",
+ "difficulty"
+ ],
+ "difficulty_weights": {
+ "easy": 1.0,
+ "medium": 2.0,
+ "hard": 3.0
+ }
+ }
+ }
+}
diff --git a/data/evaluation/audio/interfaze-beta/eval_records.jsonl b/data/evaluation/audio/interfaze-beta/eval_records.jsonl
new file mode 100644
index 0000000..77b525e
--- /dev/null
+++ b/data/evaluation/audio/interfaze-beta/eval_records.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e1f3883d7ada414ab958a7baf3a8535bdbdfb1b7906dc5bac0f0beefc6ff5bb0
+size 69324
diff --git a/data/evaluation/audio/interfaze-beta/eval_summary.json b/data/evaluation/audio/interfaze-beta/eval_summary.json
new file mode 100644
index 0000000..8b6b766
--- /dev/null
+++ b/data/evaluation/audio/interfaze-beta/eval_summary.json
@@ -0,0 +1,264 @@
+{
+ "response_file": "data/audio_responses/response_interfaze-beta_audio.jsonl",
+ "num_records": 115,
+ "model_ids": [
+ "interfaze-beta"
+ ],
+ "data_quality": {
+ "json_parse_fail_count": 0,
+ "json_non_structured_root_count": 0,
+ "invalid_schema_input_count": 0,
+ "unknown_difficulty_count": 0,
+ "malformed_jsonl_line_count": 0
+ },
+ "summary": {
+ "overall": {
+ "n": 115,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.8434782608695652,
+ "ci95_low": 0.7739130434782608,
+ "ci95_high": 0.9043478260869565,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.2035126943660157,
+ "ci95_low": 0.17103765793432008,
+ "ci95_high": 0.23741275870060108,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.40979085174131163,
+ "ci95_low": 0.3676184394443381,
+ "ci95_high": 0.45917501182305825,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.7525844250377696,
+ "ci95_low": 0.6821627487702453,
+ "ci95_high": 0.8164828651631216,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.760510818069284,
+ "ci95_low": 0.6982341816918611,
+ "ci95_high": 0.8200800041652657,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.8434782608695652,
+ "ci95_low": 0.7739130434782608,
+ "ci95_high": 0.9130434782608695,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.0,
+ "ci95_low": 0.0,
+ "ci95_high": 0.0,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.455295990381699,
+ "ci95_low": 0.4151706968317004,
+ "ci95_high": 0.49527492797097805,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.8158224466028048,
+ "ci95_low": 0.7481439044300071,
+ "ci95_high": 0.8759841683396631,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.30665177305366365,
+ "ci95_low": 0.2679415384215724,
+ "ci95_high": 0.344213751681674,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.8956521739130435,
+ "ci95_low": 0.8434782608695652,
+ "ci95_high": 0.9362318840579711,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.0,
+ "ci95_low": 0.0,
+ "ci95_high": 0.0,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ }
+ },
+ "overall_weighted": {
+ "n": 115,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.8425655976676385,
+ "ci95_low": 0.7732558139534884,
+ "ci95_high": 0.911504424778761,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.20469935730692537,
+ "ci95_low": 0.1704665631059015,
+ "ci95_high": 0.23875318937515516,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.40870736953514497,
+ "ci95_low": 0.3627944278559024,
+ "ci95_high": 0.45182892624577414,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.7511417686239956,
+ "ci95_low": 0.6884415972103335,
+ "ci95_high": 0.8087760153990141,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.7594544642171425,
+ "ci95_low": 0.6913442023524712,
+ "ci95_high": 0.8199826794306149,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.8425655976676385,
+ "ci95_low": 0.7725947521865889,
+ "ci95_high": 0.9040697674418605,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.0,
+ "ci95_low": 0.0,
+ "ci95_high": 0.0,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.45484949848868866,
+ "ci95_low": 0.4100811636782316,
+ "ci95_high": 0.5013095103464918,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.8148618865174733,
+ "ci95_low": 0.7445595678806685,
+ "ci95_high": 0.872890555427766,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.3067033634210352,
+ "ci95_low": 0.26983533749301075,
+ "ci95_high": 0.3447669368005712,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.8950437317784257,
+ "ci95_low": 0.847953216374269,
+ "ci95_high": 0.936046511627907,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.0,
+ "ci95_low": 0.0,
+ "ci95_high": 0.0,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ },
+ "weighting": "schema_complexity",
+ "weight_field_priority": [
+ "schema_complexity",
+ "difficulty"
+ ],
+ "difficulty_weights": {
+ "easy": 1.0,
+ "medium": 2.0,
+ "hard": 3.0
+ }
+ }
+ }
+}
diff --git a/data/evaluation/audio/openai-gpt-4.1/eval_records.jsonl b/data/evaluation/audio/openai-gpt-4.1/eval_records.jsonl
new file mode 100644
index 0000000..b076ceb
--- /dev/null
+++ b/data/evaluation/audio/openai-gpt-4.1/eval_records.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:57a6f674ffd71235c750cea3c62a417e58faa1b278089689af191543df2cd6c7
+size 68876
diff --git a/data/evaluation/audio/openai-gpt-4.1/eval_summary.json b/data/evaluation/audio/openai-gpt-4.1/eval_summary.json
new file mode 100644
index 0000000..d968f96
--- /dev/null
+++ b/data/evaluation/audio/openai-gpt-4.1/eval_summary.json
@@ -0,0 +1,264 @@
+{
+ "response_file": "data/audio_responses/response_openai-gpt-4.1_audio.jsonl",
+ "num_records": 115,
+ "model_ids": [
+ "gpt-4.1"
+ ],
+ "data_quality": {
+ "json_parse_fail_count": 0,
+ "json_non_structured_root_count": 0,
+ "invalid_schema_input_count": 0,
+ "unknown_difficulty_count": 0,
+ "malformed_jsonl_line_count": 0
+ },
+ "summary": {
+ "overall": {
+ "n": 115,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.9304347826086956,
+ "ci95_low": 0.8869565217391304,
+ "ci95_high": 0.9739130434782609,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.18958947413551921,
+ "ci95_low": 0.159772558124553,
+ "ci95_high": 0.2206176154889207,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.38002522533684346,
+ "ci95_low": 0.3448747350717143,
+ "ci95_high": 0.4207172682367718,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.8768212586294121,
+ "ci95_low": 0.82830368444961,
+ "ci95_high": 0.9183136513881435,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.8363433916046665,
+ "ci95_low": 0.7886834198668883,
+ "ci95_high": 0.8772305020034089,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.9304347826086956,
+ "ci95_low": 0.8782608695652174,
+ "ci95_high": 0.9739130434782609,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.0,
+ "ci95_low": 0.0,
+ "ci95_high": 0.0,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.48214531936725824,
+ "ci95_low": 0.4489320444671767,
+ "ci95_high": 0.5125448384704976,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.8990709856073525,
+ "ci95_low": 0.8546952191601953,
+ "ci95_high": 0.9401937779968003,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.2848073497361813,
+ "ci95_low": 0.2514458444140591,
+ "ci95_high": 0.3209409266936525,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.9536231884057972,
+ "ci95_low": 0.918840579710145,
+ "ci95_high": 0.9826086956521739,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.0,
+ "ci95_low": 0.0,
+ "ci95_high": 0.0,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ }
+ },
+ "overall_weighted": {
+ "n": 115,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.9300291545189504,
+ "ci95_low": 0.877906976744186,
+ "ci95_high": 0.9737609329446064,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.1906949521188167,
+ "ci95_low": 0.16072650102906985,
+ "ci95_high": 0.22140786200654658,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.3797292716061742,
+ "ci95_low": 0.3455910848680907,
+ "ci95_high": 0.4173186777520272,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.8761030152394961,
+ "ci95_low": 0.8271788404677927,
+ "ci95_high": 0.9163066202254914,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.8360166419791487,
+ "ci95_low": 0.7889756502885275,
+ "ci95_high": 0.8763537994787879,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.9300291545189504,
+ "ci95_low": 0.8775510204081632,
+ "ci95_high": 0.9737609329446064,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.0,
+ "ci95_low": 0.0,
+ "ci95_high": 0.0,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.48217574632149574,
+ "ci95_low": 0.45295980040191225,
+ "ci95_high": 0.515130863094975,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.8986916503390165,
+ "ci95_low": 0.8496767261341824,
+ "ci95_high": 0.941656358529026,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.28521211186249545,
+ "ci95_low": 0.25201301919306296,
+ "ci95_high": 0.31620130043864864,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.9533527696793003,
+ "ci95_low": 0.9183673469387755,
+ "ci95_high": 0.9825581395348837,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.0,
+ "ci95_low": 0.0,
+ "ci95_high": 0.0,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ },
+ "weighting": "schema_complexity",
+ "weight_field_priority": [
+ "schema_complexity",
+ "difficulty"
+ ],
+ "difficulty_weights": {
+ "easy": 1.0,
+ "medium": 2.0,
+ "hard": 3.0
+ }
+ }
+ }
+}
diff --git a/data/evaluation/audio/openai-gpt-5-4/eval_records.jsonl b/data/evaluation/audio/openai-gpt-5-4/eval_records.jsonl
new file mode 100644
index 0000000..86dc6fc
--- /dev/null
+++ b/data/evaluation/audio/openai-gpt-5-4/eval_records.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ee1a41969696263bb1f90262b1b9ca6d6dacf2fd2d1ec04202f6ef86b9fe24d8
+size 68255
diff --git a/data/evaluation/audio/openai-gpt-5-4/eval_summary.json b/data/evaluation/audio/openai-gpt-5-4/eval_summary.json
new file mode 100644
index 0000000..b45c3aa
--- /dev/null
+++ b/data/evaluation/audio/openai-gpt-5-4/eval_summary.json
@@ -0,0 +1,264 @@
+{
+ "response_file": "data/audio_responses/response_openai-gpt-5-4_audio.jsonl",
+ "num_records": 115,
+ "model_ids": [
+ "gpt-5.4"
+ ],
+ "data_quality": {
+ "json_parse_fail_count": 0,
+ "json_non_structured_root_count": 0,
+ "invalid_schema_input_count": 0,
+ "unknown_difficulty_count": 0,
+ "malformed_jsonl_line_count": 0
+ },
+ "summary": {
+ "overall": {
+ "n": 115,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.8695652173913043,
+ "ci95_low": 0.808695652173913,
+ "ci95_high": 0.9304347826086956,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.178753282322858,
+ "ci95_low": 0.14538265849466941,
+ "ci95_high": 0.21131793544435185,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.37038847422840715,
+ "ci95_low": 0.33134941384738215,
+ "ci95_high": 0.41588346803631976,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.8503262372114581,
+ "ci95_low": 0.7873911486264039,
+ "ci95_high": 0.9060360854036947,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.7574437206774722,
+ "ci95_low": 0.6997590543957498,
+ "ci95_high": 0.8143502158791877,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.8695652173913043,
+ "ci95_low": 0.8,
+ "ci95_high": 0.9304347826086956,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.0,
+ "ci95_low": 0.0,
+ "ci95_high": 0.0,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.46648933125424114,
+ "ci95_low": 0.4234805623977596,
+ "ci95_high": 0.5024174731120753,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.8321913851533603,
+ "ci95_low": 0.7720744783623196,
+ "ci95_high": 0.8889766981392194,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.2745708782756326,
+ "ci95_low": 0.23670671136691113,
+ "ci95_high": 0.31523679530746257,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.9130434782608695,
+ "ci95_low": 0.8666666666666667,
+ "ci95_high": 0.9478260869565217,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.0,
+ "ci95_low": 0.0,
+ "ci95_high": 0.0,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ }
+ },
+ "overall_weighted": {
+ "n": 115,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.8688046647230321,
+ "ci95_low": 0.8,
+ "ci95_high": 0.9294117647058824,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.17979557551424494,
+ "ci95_low": 0.14686783044111773,
+ "ci95_high": 0.21376380454558733,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.3702515118872446,
+ "ci95_low": 0.3269642806263735,
+ "ci95_high": 0.4150649917304779,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.8494535039007377,
+ "ci95_low": 0.7826081450974193,
+ "ci95_high": 0.9100773186094581,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.7567768141985177,
+ "ci95_low": 0.6925726300886754,
+ "ci95_high": 0.8113563724523583,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.8688046647230321,
+ "ci95_low": 0.8064516129032258,
+ "ci95_high": 0.9294117647058824,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.0,
+ "ci95_low": 0.0,
+ "ci95_high": 0.0,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.4665001971007424,
+ "ci95_low": 0.4256995121878762,
+ "ci95_high": 0.5078270674644423,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.8314620478815273,
+ "ci95_low": 0.7696787903313166,
+ "ci95_high": 0.8906410425317538,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.2750235437007448,
+ "ci95_low": 0.23818381798326538,
+ "ci95_high": 0.3115220761737194,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.9125364431486881,
+ "ci95_low": 0.8662790697674418,
+ "ci95_high": 0.9533527696793003,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.0,
+ "ci95_low": 0.0,
+ "ci95_high": 0.0,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ },
+ "weighting": "schema_complexity",
+ "weight_field_priority": [
+ "schema_complexity",
+ "difficulty"
+ ],
+ "difficulty_weights": {
+ "easy": 1.0,
+ "medium": 2.0,
+ "hard": 3.0
+ }
+ }
+ }
+}
diff --git a/data/evaluation/audio/openai-gpt-5-mini/eval_records.jsonl b/data/evaluation/audio/openai-gpt-5-mini/eval_records.jsonl
new file mode 100644
index 0000000..496d108
--- /dev/null
+++ b/data/evaluation/audio/openai-gpt-5-mini/eval_records.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:60f7d3cddd3e2bc9986d4a1fe5190701080de7d1656a1f955c948a371527224b
+size 68933
diff --git a/data/evaluation/audio/openai-gpt-5-mini/eval_summary.json b/data/evaluation/audio/openai-gpt-5-mini/eval_summary.json
new file mode 100644
index 0000000..fd0e2db
--- /dev/null
+++ b/data/evaluation/audio/openai-gpt-5-mini/eval_summary.json
@@ -0,0 +1,264 @@
+{
+ "response_file": "data/audio_responses/response_openai-gpt-5-mini_audio.jsonl",
+ "num_records": 115,
+ "model_ids": [
+ "gpt-5-mini"
+ ],
+ "data_quality": {
+ "json_parse_fail_count": 0,
+ "json_non_structured_root_count": 0,
+ "invalid_schema_input_count": 0,
+ "unknown_difficulty_count": 0,
+ "malformed_jsonl_line_count": 0
+ },
+ "summary": {
+ "overall": {
+ "n": 115,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.8869565217391304,
+ "ci95_low": 0.8260869565217391,
+ "ci95_high": 0.9391304347826087,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.14984868932531292,
+ "ci95_low": 0.12460358409224974,
+ "ci95_high": 0.17736389926544074,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.33004480767477434,
+ "ci95_low": 0.29320572428065145,
+ "ci95_high": 0.36745362259647435,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.8342081436030478,
+ "ci95_low": 0.7727331616055219,
+ "ci95_high": 0.8902042578624381,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.7882932906166985,
+ "ci95_low": 0.7301402645957245,
+ "ci95_high": 0.837629782495769,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.8869565217391304,
+ "ci95_low": 0.8260869565217391,
+ "ci95_high": 0.9391304347826087,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.0,
+ "ci95_low": 0.0,
+ "ci95_high": 0.0,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.43803388020104506,
+ "ci95_low": 0.403692565481851,
+ "ci95_high": 0.4713744673389034,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.854068778031653,
+ "ci95_low": 0.7954616803896698,
+ "ci95_high": 0.9051028629123954,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.2399467485000437,
+ "ci95_low": 0.20867682990862976,
+ "ci95_high": 0.27129725422953327,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.9246376811594202,
+ "ci95_low": 0.8840579710144928,
+ "ci95_high": 0.9594202898550724,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.0,
+ "ci95_low": 0.0,
+ "ci95_high": 0.0,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ }
+ },
+ "overall_weighted": {
+ "n": 115,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.8862973760932945,
+ "ci95_low": 0.8240469208211144,
+ "ci95_high": 0.9473684210526315,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.15046905025360363,
+ "ci95_low": 0.12419931256411967,
+ "ci95_high": 0.17755300674086869,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.33032541070109706,
+ "ci95_low": 0.2928473244197168,
+ "ci95_high": 0.37125206821899537,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.8335483169123796,
+ "ci95_low": 0.7780040240324673,
+ "ci95_high": 0.8888350170253181,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.7879064634696153,
+ "ci95_low": 0.734563298211581,
+ "ci95_high": 0.8406384014219156,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.8862973760932945,
+ "ci95_low": 0.8250728862973761,
+ "ci95_high": 0.938953488372093,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.0,
+ "ci95_low": 0.0,
+ "ci95_high": 0.0,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.4381142592890268,
+ "ci95_low": 0.40415513710921935,
+ "ci95_high": 0.47548616228087365,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.8535004052187347,
+ "ci95_low": 0.7913437183873487,
+ "ci95_high": 0.906641854412905,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.24039723047735034,
+ "ci95_low": 0.21035043951658064,
+ "ci95_high": 0.2719906593020342,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.924198250728863,
+ "ci95_low": 0.8833819241982507,
+ "ci95_high": 0.9590643274853801,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.0,
+ "ci95_low": 0.0,
+ "ci95_high": 0.0,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ },
+ "weighting": "schema_complexity",
+ "weight_field_priority": [
+ "schema_complexity",
+ "difficulty"
+ ],
+ "difficulty_weights": {
+ "easy": 1.0,
+ "medium": 2.0,
+ "hard": 3.0
+ }
+ }
+ }
+}
diff --git a/data/evaluation/audio/openai-gpt-5/eval_records.jsonl b/data/evaluation/audio/openai-gpt-5/eval_records.jsonl
new file mode 100644
index 0000000..fa70a2a
--- /dev/null
+++ b/data/evaluation/audio/openai-gpt-5/eval_records.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:828eaac3939e0e63012d5b74591ba6a868a2e1db8009ccbf787c21a1beff6287
+size 68410
diff --git a/data/evaluation/audio/openai-gpt-5/eval_summary.json b/data/evaluation/audio/openai-gpt-5/eval_summary.json
new file mode 100644
index 0000000..bf0a6ab
--- /dev/null
+++ b/data/evaluation/audio/openai-gpt-5/eval_summary.json
@@ -0,0 +1,264 @@
+{
+ "response_file": "data/audio_responses/response_openai-gpt-5_audio.jsonl",
+ "num_records": 115,
+ "model_ids": [
+ "gpt-5"
+ ],
+ "data_quality": {
+ "json_parse_fail_count": 0,
+ "json_non_structured_root_count": 0,
+ "invalid_schema_input_count": 0,
+ "unknown_difficulty_count": 0,
+ "malformed_jsonl_line_count": 0
+ },
+ "summary": {
+ "overall": {
+ "n": 115,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.9130434782608695,
+ "ci95_low": 0.8608695652173913,
+ "ci95_high": 0.9565217391304348,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.18764825147324535,
+ "ci95_low": 0.15607753589881523,
+ "ci95_high": 0.22470178520072995,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.36356520839031264,
+ "ci95_low": 0.323274218717594,
+ "ci95_high": 0.4045207933731644,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.870452446927442,
+ "ci95_low": 0.8238599953672845,
+ "ci95_high": 0.9186351671511073,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.8039853750280364,
+ "ci95_low": 0.7531235610487667,
+ "ci95_high": 0.84872567263727,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.9130434782608695,
+ "ci95_low": 0.8608695652173913,
+ "ci95_high": 0.9652173913043478,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.0,
+ "ci95_low": 0.0,
+ "ci95_high": 0.0,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.473888635597,
+ "ci95_low": 0.4401159616720605,
+ "ci95_high": 0.5085687628598874,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.8766907771832585,
+ "ci95_low": 0.8239021953414114,
+ "ci95_high": 0.921152415219854,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.27560672993177904,
+ "ci95_low": 0.23821828307473922,
+ "ci95_high": 0.30897310211105705,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.9420289855072463,
+ "ci95_low": 0.9072463768115941,
+ "ci95_high": 0.9710144927536233,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.0,
+ "ci95_low": 0.0,
+ "ci95_high": 0.0,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ }
+ },
+ "overall_weighted": {
+ "n": 115,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.9125364431486881,
+ "ci95_low": 0.8600583090379009,
+ "ci95_high": 0.9563953488372093,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.1882356256494922,
+ "ci95_low": 0.15847147668072956,
+ "ci95_high": 0.22349252310694379,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.36352713730205827,
+ "ci95_low": 0.32208685152100913,
+ "ci95_high": 0.4033570916236783,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.8696970676092345,
+ "ci95_low": 0.8188532472190849,
+ "ci95_high": 0.916047699987058,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.8034056125841978,
+ "ci95_low": 0.7523663778420959,
+ "ci95_high": 0.8497443810227716,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.9125364431486881,
+ "ci95_low": 0.8517441860465116,
+ "ci95_high": 0.9565217391304348,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.0,
+ "ci95_low": 0.0,
+ "ci95_high": 0.0,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.4738199435202617,
+ "ci95_low": 0.43916032667101806,
+ "ci95_high": 0.5079022494181294,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.8761594996271913,
+ "ci95_low": 0.8225365126627978,
+ "ci95_high": 0.9239694271532586,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.27588138147577523,
+ "ci95_low": 0.24148421587200156,
+ "ci95_high": 0.3116812050197701,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.9416909620991254,
+ "ci95_low": 0.9067055393586005,
+ "ci95_high": 0.9709302325581395,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.0,
+ "ci95_low": 0.0,
+ "ci95_high": 0.0,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ },
+ "weighting": "schema_complexity",
+ "weight_field_priority": [
+ "schema_complexity",
+ "difficulty"
+ ],
+ "difficulty_weights": {
+ "easy": 1.0,
+ "medium": 2.0,
+ "hard": 3.0
+ }
+ }
+ }
+}
diff --git a/data/evaluation/audio/zai-org-GLM-4.7/eval_records.jsonl b/data/evaluation/audio/zai-org-GLM-4.7/eval_records.jsonl
new file mode 100644
index 0000000..cf4887f
--- /dev/null
+++ b/data/evaluation/audio/zai-org-GLM-4.7/eval_records.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2a4210551980b67e592deb4919568638de992bfe4f8dd1c639bfdfde0cef9320
+size 69694
diff --git a/data/evaluation/audio/zai-org-GLM-4.7/eval_summary.json b/data/evaluation/audio/zai-org-GLM-4.7/eval_summary.json
new file mode 100644
index 0000000..226b005
--- /dev/null
+++ b/data/evaluation/audio/zai-org-GLM-4.7/eval_summary.json
@@ -0,0 +1,264 @@
+{
+ "response_file": "data/audio_responses/response_zai-org-GLM-4.7_audio.jsonl",
+ "num_records": 115,
+ "model_ids": [
+ "zai-org/GLM-4.7"
+ ],
+ "data_quality": {
+ "json_parse_fail_count": 1,
+ "json_non_structured_root_count": 1,
+ "invalid_schema_input_count": 0,
+ "unknown_difficulty_count": 0,
+ "malformed_jsonl_line_count": 0
+ },
+ "summary": {
+ "overall": {
+ "n": 115,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 0.991304347826087,
+ "ci95_low": 0.9739130434782609,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 0.991304347826087,
+ "ci95_low": 0.9739130434782609,
+ "ci95_high": 1.0,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.9043478260869565,
+ "ci95_low": 0.8521739130434782,
+ "ci95_high": 0.9565217391304348,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.219993416647882,
+ "ci95_low": 0.1864874473500824,
+ "ci95_high": 0.2542458982263171,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.45484885514479173,
+ "ci95_low": 0.4110348600973023,
+ "ci95_high": 0.5046794569799717,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.8450639470904567,
+ "ci95_low": 0.7920862912640724,
+ "ci95_high": 0.892972394007315,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.8311956753774273,
+ "ci95_low": 0.7798010644883785,
+ "ci95_high": 0.8788487595279411,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.9043478260869565,
+ "ci95_low": 0.8521739130434782,
+ "ci95_high": 0.9565217391304348,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.0,
+ "ci95_low": 0.0,
+ "ci95_high": 0.0,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.5066354062943769,
+ "ci95_low": 0.46723826752135744,
+ "ci95_high": 0.5430513160005054,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.8799637758504468,
+ "ci95_low": 0.8253074871457949,
+ "ci95_high": 0.9283942320061578,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.3374211358963369,
+ "ci95_low": 0.30055302248592686,
+ "ci95_high": 0.3766722635272676,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.9333333333333332,
+ "ci95_low": 0.8927536231884058,
+ "ci95_high": 0.9681159420289854,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.0,
+ "ci95_low": 0.0,
+ "ci95_high": 0.0,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ }
+ },
+ "overall_weighted": {
+ "n": 115,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 0.9912536443148688,
+ "ci95_low": 0.9736842105263158,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 0.9912536443148688,
+ "ci95_low": 0.9736842105263158,
+ "ci95_high": 1.0,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.9037900874635568,
+ "ci95_low": 0.8513119533527697,
+ "ci95_high": 0.956268221574344,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.21942088952523514,
+ "ci95_low": 0.187576999016537,
+ "ci95_high": 0.25146697805116097,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.45420689773643125,
+ "ci95_low": 0.4101338316625595,
+ "ci95_high": 0.49850096638917807,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.8441605298723254,
+ "ci95_low": 0.7873666545029532,
+ "ci95_high": 0.8914517556447745,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.8304890556864447,
+ "ci95_low": 0.7699401438661703,
+ "ci95_high": 0.8753678505300467,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.9037900874635568,
+ "ci95_low": 0.8434782608695652,
+ "ci95_high": 0.956140350877193,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.0,
+ "ci95_low": 0.0,
+ "ci95_high": 0.0,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.5059294390446639,
+ "ci95_low": 0.4678514811271154,
+ "ci95_high": 0.5429658057032338,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.8793564102045195,
+ "ci95_low": 0.8218975240113924,
+ "ci95_high": 0.9281838258783739,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.3368138936308332,
+ "ci95_low": 0.30226662779579244,
+ "ci95_high": 0.3731445521080851,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.9329446064139941,
+ "ci95_low": 0.8898550724637682,
+ "ci95_high": 0.9680232558139535,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.0,
+ "ci95_low": 0.0,
+ "ci95_high": 0.0,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ },
+ "weighting": "schema_complexity",
+ "weight_field_priority": [
+ "schema_complexity",
+ "difficulty"
+ ],
+ "difficulty_weights": {
+ "easy": 1.0,
+ "medium": 2.0,
+ "hard": 3.0
+ }
+ }
+ }
+}
diff --git a/data/evaluation/display_names.json b/data/evaluation/display_names.json
new file mode 100644
index 0000000..a2ef96f
--- /dev/null
+++ b/data/evaluation/display_names.json
@@ -0,0 +1,24 @@
+{
+ "_comment": "Maps model directory name (under data/evaluation/{text,image,audio}/) to the display name shown on the leaderboard. When a new model lands via PR, add an entry here. If a directory has no entry, the directory name itself is used.",
+ "openai-gpt-5-4": "GPT-5.4",
+ "openai-gpt-5": "GPT-5",
+ "openai-gpt-5-mini": "GPT-5-Mini",
+ "openai-gpt-4.1": "GPT-4.1",
+ "gemini-2.5-flash": "Gemini-2.5-Flash",
+ "gemini-3-flash": "Gemini-3-Flash-Preview",
+ "gemma-3-27b-it": "Gemma-3-27B",
+ "gemma-4-31b-it": "Gemma-4-31B",
+ "claude-sonnet-4-6": "Claude-Sonnet-4.6",
+ "zai-org-GLM-4.7": "GLM-4.7",
+ "Qwen3-235B-A22B-Instruct-2507": "Qwen3-235B",
+ "Qwen3-30B-A3B-Instruct-2507": "Qwen3-30B",
+ "Qwen3.5-35B-A3B": "Qwen3.5-35B",
+ "phi-4": "Phi-4",
+ "NVIDIA-Nemotron-3-Nano-30B-A3B-BF16": "Nemotron-3-Nano-30B",
+ "DeepSeek-R1-Distill-Qwen-32B": "DS-R1-Distill-32B",
+ "Ministral-3-14B-Instruct-2512": "Ministral-3-14B",
+ "gpt-oss": "GPT-OSS-20B",
+ "inference-net-Schematron-8B": "Schematron-8B",
+ "ibm-granite-4.0-h-small": "IBM-Granite-4.0",
+ "interfaze-beta": "Interfaze-Beta"
+}
diff --git a/data/evaluation/image/DeepSeek-R1-Distill-Qwen-32B/eval_records.jsonl b/data/evaluation/image/DeepSeek-R1-Distill-Qwen-32B/eval_records.jsonl
new file mode 100644
index 0000000..881a5ea
--- /dev/null
+++ b/data/evaluation/image/DeepSeek-R1-Distill-Qwen-32B/eval_records.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:16272f7d07c843d7617e173b2e1de6f8c2703a69b7c968b599f7087d14e1eb51
+size 196284
diff --git a/data/evaluation/image/DeepSeek-R1-Distill-Qwen-32B/eval_summary.json b/data/evaluation/image/DeepSeek-R1-Distill-Qwen-32B/eval_summary.json
new file mode 100644
index 0000000..2287e83
--- /dev/null
+++ b/data/evaluation/image/DeepSeek-R1-Distill-Qwen-32B/eval_summary.json
@@ -0,0 +1,430 @@
+{
+ "response_file": "data/images_responses/response_DeepSeek-R1-Distill-Qwen-32B_image.jsonl",
+ "num_records": 209,
+ "model_ids": [
+ "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B"
+ ],
+ "data_quality": {
+ "json_parse_fail_count": 13,
+ "json_non_structured_root_count": 13,
+ "invalid_schema_input_count": 0,
+ "unknown_difficulty_count": 0,
+ "malformed_jsonl_line_count": 0
+ },
+ "summary": {
+ "overall": {
+ "n": 209,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 0.937799043062201,
+ "ci95_low": 0.9043062200956937,
+ "ci95_high": 0.9665071770334929,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 0.937799043062201,
+ "ci95_low": 0.9043062200956937,
+ "ci95_high": 0.9712918660287081,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.8803827751196173,
+ "ci95_low": 0.8325358851674641,
+ "ci95_high": 0.9234449760765551,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.5097983981852483,
+ "ci95_low": 0.46559119479443567,
+ "ci95_high": 0.5551969194536229,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.6853164950929338,
+ "ci95_low": 0.6405300051523511,
+ "ci95_high": 0.7318028073636902,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.836839985101644,
+ "ci95_low": 0.7881943358205417,
+ "ci95_high": 0.8769507996887084,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.8455488226719633,
+ "ci95_low": 0.8007698514165322,
+ "ci95_high": 0.8847692302051551,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.8803827751196173,
+ "ci95_low": 0.8373205741626795,
+ "ci95_high": 0.9234449760765551,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.06698564593301436,
+ "ci95_low": 0.03827751196172249,
+ "ci95_high": 0.10047846889952153,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.6773182927932754,
+ "ci95_low": 0.637405480528679,
+ "ci95_high": 0.7222549617410012,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.8687714576370659,
+ "ci95_low": 0.8245136830278865,
+ "ci95_high": 0.9118720998220539,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.597557446639091,
+ "ci95_low": 0.5523337486298424,
+ "ci95_high": 0.6415346097959044,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.8995215311004785,
+ "ci95_low": 0.8628389154704945,
+ "ci95_high": 0.9362041467304625,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.06698564593301436,
+ "ci95_low": 0.03349282296650718,
+ "ci95_high": 0.10047846889952153,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ }
+ },
+ "overall_weighted": {
+ "n": 209,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 0.9368770764119602,
+ "ci95_low": 0.8991735537190083,
+ "ci95_high": 0.97,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 0.9368770764119602,
+ "ci95_low": 0.9013377926421404,
+ "ci95_high": 0.9679595278246206,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.8787375415282392,
+ "ci95_low": 0.8289036544850499,
+ "ci95_high": 0.9225589225589226,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.5138763433266931,
+ "ci95_low": 0.46246256694290566,
+ "ci95_high": 0.5627520932816459,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.6869474274699517,
+ "ci95_low": 0.6404346457384374,
+ "ci95_high": 0.7303769664318212,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.8351122860093958,
+ "ci95_low": 0.7914848459162734,
+ "ci95_high": 0.8799761949715916,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.8437332974338013,
+ "ci95_low": 0.7934562110740424,
+ "ci95_high": 0.8847969354894487,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.8787375415282392,
+ "ci95_low": 0.835,
+ "ci95_high": 0.9207920792079208,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.06478405315614617,
+ "ci95_low": 0.033277870216306155,
+ "ci95_high": 0.09966777408637874,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.6786453522686803,
+ "ci95_low": 0.6328641249785265,
+ "ci95_high": 0.7200468859655313,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.8670694601634267,
+ "ci95_low": 0.8214123221110936,
+ "ci95_high": 0.909389875841314,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.6004118853983225,
+ "ci95_low": 0.5593624007398154,
+ "ci95_high": 0.6449258493697881,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.8981173864894795,
+ "ci95_low": 0.8565193671576651,
+ "ci95_high": 0.9402568397543273,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.06478405315614617,
+ "ci95_low": 0.03442622950819672,
+ "ci95_high": 0.09833333333333333,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ },
+ "weighting": "schema_complexity",
+ "weight_field_priority": [
+ "schema_complexity",
+ "difficulty"
+ ],
+ "difficulty_weights": {
+ "easy": 1.0,
+ "medium": 2.0,
+ "hard": 3.0
+ }
+ }
+ },
+ "error_analysis": {
+ "top_missing_gt_paths": [
+ {
+ "path": "table_title",
+ "count": 2
+ },
+ {
+ "path": "problem_addressed",
+ "count": 2
+ },
+ {
+ "path": "market_dynamics_differences[0].characteristics[3]",
+ "count": 1
+ },
+ {
+ "path": "month",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].date",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].day_of_week",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[0].is_meeting",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[0].name",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[0].time",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[1].is_meeting",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[1].name",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[1].time",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[2].is_meeting",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[2].name",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[2].time",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[3].is_meeting",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[3].name",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[3].time",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[4].is_meeting",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[4].name",
+ "count": 1
+ }
+ ],
+ "top_missing_required_paths": [
+ {
+ "path": "table_title",
+ "count": 2
+ },
+ {
+ "path": "problem_addressed",
+ "count": 2
+ },
+ {
+ "path": "month",
+ "count": 1
+ },
+ {
+ "path": "year",
+ "count": 1
+ },
+ {
+ "path": "schedule",
+ "count": 1
+ },
+ {
+ "path": "schedule[].day_of_week",
+ "count": 1
+ },
+ {
+ "path": "schedule[].date",
+ "count": 1
+ },
+ {
+ "path": "schedule[].events",
+ "count": 1
+ },
+ {
+ "path": "schedule[].events[].time",
+ "count": 1
+ },
+ {
+ "path": "schedule[].events[].name",
+ "count": 1
+ },
+ {
+ "path": "schedule[].events[].is_meeting",
+ "count": 1
+ },
+ {
+ "path": "distances",
+ "count": 1
+ },
+ {
+ "path": "distances.value",
+ "count": 1
+ },
+ {
+ "path": "distances.equation_number",
+ "count": 1
+ },
+ {
+ "path": "forces",
+ "count": 1
+ },
+ {
+ "path": "forces.electrical_force_value",
+ "count": 1
+ },
+ {
+ "path": "forces.magnetic_force_value",
+ "count": 1
+ },
+ {
+ "path": "forces.equation_number",
+ "count": 1
+ },
+ {
+ "path": "charge_relationship",
+ "count": 1
+ },
+ {
+ "path": "charge_relationship.equation",
+ "count": 1
+ }
+ ]
+ }
+}
diff --git a/data/evaluation/image/Ministral-3-14B-Instruct-2512/eval_records.jsonl b/data/evaluation/image/Ministral-3-14B-Instruct-2512/eval_records.jsonl
new file mode 100644
index 0000000..1f0c624
--- /dev/null
+++ b/data/evaluation/image/Ministral-3-14B-Instruct-2512/eval_records.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:76a4403b186fa196e7eeea1d80815255327e683726e9c5b8f75a58295bbafa09
+size 181020
diff --git a/data/evaluation/image/Ministral-3-14B-Instruct-2512/eval_summary.json b/data/evaluation/image/Ministral-3-14B-Instruct-2512/eval_summary.json
new file mode 100644
index 0000000..146aea3
--- /dev/null
+++ b/data/evaluation/image/Ministral-3-14B-Instruct-2512/eval_summary.json
@@ -0,0 +1,430 @@
+{
+ "response_file": "data/images_responses/response_Ministral-3-14B-Instruct-2512_image.jsonl",
+ "num_records": 209,
+ "model_ids": [
+ "mistralai/Ministral-3-14B-Instruct-2512"
+ ],
+ "data_quality": {
+ "json_parse_fail_count": 8,
+ "json_non_structured_root_count": 8,
+ "invalid_schema_input_count": 0,
+ "unknown_difficulty_count": 0,
+ "malformed_jsonl_line_count": 0
+ },
+ "summary": {
+ "overall": {
+ "n": 209,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 0.9617224880382775,
+ "ci95_low": 0.9330143540669856,
+ "ci95_high": 0.9856459330143541,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 0.9617224880382775,
+ "ci95_low": 0.9330143540669856,
+ "ci95_high": 0.9856459330143541,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.8277511961722488,
+ "ci95_low": 0.7703349282296651,
+ "ci95_high": 0.8755980861244019,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.4499051587530121,
+ "ci95_low": 0.40251256023675724,
+ "ci95_high": 0.49676732465702766,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.6536280355163878,
+ "ci95_low": 0.6051622850663879,
+ "ci95_high": 0.7057241369722681,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.7986247463755467,
+ "ci95_low": 0.7420706173032029,
+ "ci95_high": 0.8502093826583359,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.8003756144921973,
+ "ci95_low": 0.7441617744766166,
+ "ci95_high": 0.8467805156496399,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.8277511961722488,
+ "ci95_low": 0.7703349282296651,
+ "ci95_high": 0.8755980861244019,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.06220095693779904,
+ "ci95_low": 0.03349282296650718,
+ "ci95_high": 0.09569377990430622,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.6340526468816489,
+ "ci95_low": 0.5894993371881777,
+ "ci95_high": 0.6801399354584893,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.8186260022788983,
+ "ci95_low": 0.7625530386201076,
+ "ci95_high": 0.8675865434113189,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.5517665971347,
+ "ci95_low": 0.5050956437780899,
+ "ci95_high": 0.5971232604129031,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.8724082934609251,
+ "ci95_low": 0.8325358851674641,
+ "ci95_high": 0.9090909090909091,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.06220095693779904,
+ "ci95_low": 0.028708133971291867,
+ "ci95_high": 0.10047846889952153,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ }
+ },
+ "overall_weighted": {
+ "n": 209,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 0.9617940199335548,
+ "ci95_low": 0.9352159468438538,
+ "ci95_high": 0.9866666666666667,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 0.9617940199335548,
+ "ci95_low": 0.934453781512605,
+ "ci95_high": 0.9867109634551495,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.8239202657807309,
+ "ci95_low": 0.768595041322314,
+ "ci95_high": 0.8768971332209107,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.4537987414457645,
+ "ci95_low": 0.40768027860793504,
+ "ci95_high": 0.5032478964596976,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.6523615921405755,
+ "ci95_low": 0.6040680366919295,
+ "ci95_high": 0.7049964133118529,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.7945701074932833,
+ "ci95_low": 0.7423847524781415,
+ "ci95_high": 0.8458618637485605,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.7966504347390257,
+ "ci95_low": 0.7394714905143133,
+ "ci95_high": 0.8487872771621229,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.8239202657807309,
+ "ci95_low": 0.7698675496688742,
+ "ci95_high": 0.8791390728476821,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.061461794019933555,
+ "ci95_low": 0.03166666666666667,
+ "ci95_high": 0.09656301145662848,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.6335768136932077,
+ "ci95_low": 0.5859964701055657,
+ "ci95_high": 0.6804053907342742,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.8148303221001625,
+ "ci95_low": 0.7580031719815242,
+ "ci95_high": 0.868182482744087,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.55308016679317,
+ "ci95_low": 0.5067946593831067,
+ "ci95_high": 0.5976120923380849,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.8698781838316721,
+ "ci95_low": 0.828116419549698,
+ "ci95_high": 0.9112227805695142,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.061461794019933555,
+ "ci95_low": 0.030303030303030304,
+ "ci95_high": 0.0978441127694859,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ },
+ "weighting": "schema_complexity",
+ "weight_field_priority": [
+ "schema_complexity",
+ "difficulty"
+ ],
+ "difficulty_weights": {
+ "easy": 1.0,
+ "medium": 2.0,
+ "hard": 3.0
+ }
+ }
+ },
+ "error_analysis": {
+ "top_missing_gt_paths": [
+ {
+ "path": "table_title",
+ "count": 2
+ },
+ {
+ "path": "month",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].date",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].day_of_week",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[0].is_meeting",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[0].name",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[0].time",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[1].is_meeting",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[1].name",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[1].time",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[2].is_meeting",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[2].name",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[2].time",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[3].is_meeting",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[3].name",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[3].time",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[4].is_meeting",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[4].name",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[4].time",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[5].is_meeting",
+ "count": 1
+ }
+ ],
+ "top_missing_required_paths": [
+ {
+ "path": "table_title",
+ "count": 2
+ },
+ {
+ "path": "month",
+ "count": 1
+ },
+ {
+ "path": "year",
+ "count": 1
+ },
+ {
+ "path": "schedule",
+ "count": 1
+ },
+ {
+ "path": "schedule[].day_of_week",
+ "count": 1
+ },
+ {
+ "path": "schedule[].date",
+ "count": 1
+ },
+ {
+ "path": "schedule[].events",
+ "count": 1
+ },
+ {
+ "path": "schedule[].events[].time",
+ "count": 1
+ },
+ {
+ "path": "schedule[].events[].name",
+ "count": 1
+ },
+ {
+ "path": "schedule[].events[].is_meeting",
+ "count": 1
+ },
+ {
+ "path": "distances",
+ "count": 1
+ },
+ {
+ "path": "distances.value",
+ "count": 1
+ },
+ {
+ "path": "distances.equation_number",
+ "count": 1
+ },
+ {
+ "path": "forces",
+ "count": 1
+ },
+ {
+ "path": "forces.electrical_force_value",
+ "count": 1
+ },
+ {
+ "path": "forces.magnetic_force_value",
+ "count": 1
+ },
+ {
+ "path": "forces.equation_number",
+ "count": 1
+ },
+ {
+ "path": "charge_relationship",
+ "count": 1
+ },
+ {
+ "path": "charge_relationship.equation",
+ "count": 1
+ },
+ {
+ "path": "charge_relationship.equation_number",
+ "count": 1
+ }
+ ]
+ }
+}
diff --git a/data/evaluation/image/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16/eval_records.jsonl b/data/evaluation/image/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16/eval_records.jsonl
new file mode 100644
index 0000000..5fd5938
--- /dev/null
+++ b/data/evaluation/image/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16/eval_records.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c5ac70e672487b7d668237adcd3a4e777facfc04f7c200db7c14885edbccc61a
+size 178780
diff --git a/data/evaluation/image/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16/eval_summary.json b/data/evaluation/image/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16/eval_summary.json
new file mode 100644
index 0000000..ea6d2ae
--- /dev/null
+++ b/data/evaluation/image/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16/eval_summary.json
@@ -0,0 +1,430 @@
+{
+ "response_file": "data/images_responses/response_NVIDIA-Nemotron-3-Nano-30B-A3B-BF16_image.jsonl",
+ "num_records": 209,
+ "model_ids": [
+ "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16"
+ ],
+ "data_quality": {
+ "json_parse_fail_count": 4,
+ "json_non_structured_root_count": 4,
+ "invalid_schema_input_count": 0,
+ "unknown_difficulty_count": 0,
+ "malformed_jsonl_line_count": 0
+ },
+ "summary": {
+ "overall": {
+ "n": 209,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 0.9808612440191388,
+ "ci95_low": 0.9617224880382775,
+ "ci95_high": 0.9952153110047847,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 0.9808612440191388,
+ "ci95_low": 0.9617224880382775,
+ "ci95_high": 1.0,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.9043062200956937,
+ "ci95_low": 0.8660287081339713,
+ "ci95_high": 0.9425837320574163,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.4986360960668832,
+ "ci95_low": 0.4551925771151853,
+ "ci95_high": 0.543497172731699,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.6902427380456573,
+ "ci95_low": 0.6470529318481971,
+ "ci95_high": 0.7356808156265139,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.875241720991044,
+ "ci95_low": 0.8338715476467754,
+ "ci95_high": 0.9128799118926574,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.8628857788475326,
+ "ci95_low": 0.8212923600326185,
+ "ci95_high": 0.899496163289154,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.9043062200956937,
+ "ci95_low": 0.861244019138756,
+ "ci95_high": 0.9425837320574163,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.05263157894736842,
+ "ci95_low": 0.023923444976076555,
+ "ci95_high": 0.0861244019138756,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.6880401850345281,
+ "ci95_low": 0.6500949053421738,
+ "ci95_high": 0.726071422836662,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.8904994063463068,
+ "ci95_low": 0.8516981033256984,
+ "ci95_high": 0.926746241973707,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.5944394170562702,
+ "ci95_low": 0.5537763655961999,
+ "ci95_high": 0.6362279596587371,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.9298245614035088,
+ "ci95_low": 0.89792663476874,
+ "ci95_high": 0.9585326953748007,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.05263157894736842,
+ "ci95_low": 0.023923444976076555,
+ "ci95_high": 0.0861244019138756,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ }
+ },
+ "overall_weighted": {
+ "n": 209,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 0.9817275747508306,
+ "ci95_low": 0.9616666666666667,
+ "ci95_high": 0.9966777408637874,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 0.9817275747508306,
+ "ci95_low": 0.9618573797678275,
+ "ci95_high": 0.9966887417218543,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.9036544850498339,
+ "ci95_low": 0.8592964824120602,
+ "ci95_high": 0.9421487603305785,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.5009502484986046,
+ "ci95_low": 0.45329234677384267,
+ "ci95_high": 0.5474382950374237,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.6912722491782539,
+ "ci95_low": 0.64696977095067,
+ "ci95_high": 0.7366229582287556,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.8741836864840612,
+ "ci95_low": 0.8365925712657636,
+ "ci95_high": 0.9135834439544048,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.8618906142427721,
+ "ci95_low": 0.8172137426704003,
+ "ci95_high": 0.8993370552492892,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.9036544850498339,
+ "ci95_low": 0.8653198653198653,
+ "ci95_high": 0.9417637271214643,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.053156146179401995,
+ "ci95_low": 0.024958402662229616,
+ "ci95_high": 0.08710217755443886,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.6888020613869732,
+ "ci95_low": 0.650570493151225,
+ "ci95_high": 0.7280025855845489,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.8897331947808134,
+ "ci95_low": 0.8469243715208753,
+ "ci95_high": 0.9273612458349629,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.5961112488384291,
+ "ci95_low": 0.5540527530668902,
+ "ci95_high": 0.635539846096054,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.9296788482834993,
+ "ci95_low": 0.8976071229827489,
+ "ci95_high": 0.9601990049751243,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.053156146179401995,
+ "ci95_low": 0.023333333333333334,
+ "ci95_high": 0.08745874587458746,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ },
+ "weighting": "schema_complexity",
+ "weight_field_priority": [
+ "schema_complexity",
+ "difficulty"
+ ],
+ "difficulty_weights": {
+ "easy": 1.0,
+ "medium": 2.0,
+ "hard": 3.0
+ }
+ }
+ },
+ "error_analysis": {
+ "top_missing_gt_paths": [
+ {
+ "path": "month",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].date",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].day_of_week",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[0].is_meeting",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[0].name",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[0].time",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[1].is_meeting",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[1].name",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[1].time",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[2].is_meeting",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[2].name",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[2].time",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[3].is_meeting",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[3].name",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[3].time",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[4].is_meeting",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[4].name",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[4].time",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[5].is_meeting",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[5].name",
+ "count": 1
+ }
+ ],
+ "top_missing_required_paths": [
+ {
+ "path": "month",
+ "count": 1
+ },
+ {
+ "path": "year",
+ "count": 1
+ },
+ {
+ "path": "schedule",
+ "count": 1
+ },
+ {
+ "path": "schedule[].day_of_week",
+ "count": 1
+ },
+ {
+ "path": "schedule[].date",
+ "count": 1
+ },
+ {
+ "path": "schedule[].events",
+ "count": 1
+ },
+ {
+ "path": "schedule[].events[].time",
+ "count": 1
+ },
+ {
+ "path": "schedule[].events[].name",
+ "count": 1
+ },
+ {
+ "path": "schedule[].events[].is_meeting",
+ "count": 1
+ },
+ {
+ "path": "distances",
+ "count": 1
+ },
+ {
+ "path": "distances.value",
+ "count": 1
+ },
+ {
+ "path": "distances.equation_number",
+ "count": 1
+ },
+ {
+ "path": "forces",
+ "count": 1
+ },
+ {
+ "path": "forces.electrical_force_value",
+ "count": 1
+ },
+ {
+ "path": "forces.magnetic_force_value",
+ "count": 1
+ },
+ {
+ "path": "forces.equation_number",
+ "count": 1
+ },
+ {
+ "path": "charge_relationship",
+ "count": 1
+ },
+ {
+ "path": "charge_relationship.equation",
+ "count": 1
+ },
+ {
+ "path": "charge_relationship.equation_number",
+ "count": 1
+ },
+ {
+ "path": "solution_condition_for_forces",
+ "count": 1
+ }
+ ]
+ }
+}
diff --git a/data/evaluation/image/Qwen3-235B-A22B-Instruct-2507/eval_records.jsonl b/data/evaluation/image/Qwen3-235B-A22B-Instruct-2507/eval_records.jsonl
new file mode 100644
index 0000000..12fc860
--- /dev/null
+++ b/data/evaluation/image/Qwen3-235B-A22B-Instruct-2507/eval_records.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1365d29c5103b71524599adf0f3284d2f6547c53255d095337f6857fad54a132
+size 176950
diff --git a/data/evaluation/image/Qwen3-235B-A22B-Instruct-2507/eval_summary.json b/data/evaluation/image/Qwen3-235B-A22B-Instruct-2507/eval_summary.json
new file mode 100644
index 0000000..d29c92e
--- /dev/null
+++ b/data/evaluation/image/Qwen3-235B-A22B-Instruct-2507/eval_summary.json
@@ -0,0 +1,430 @@
+{
+ "response_file": "data/images_responses/response_Qwen3-235B-A22B-Instruct-2507_image.jsonl",
+ "num_records": 209,
+ "model_ids": [
+ "Qwen/Qwen3-235B-A22B-Instruct-2507"
+ ],
+ "data_quality": {
+ "json_parse_fail_count": 3,
+ "json_non_structured_root_count": 3,
+ "invalid_schema_input_count": 0,
+ "unknown_difficulty_count": 0,
+ "malformed_jsonl_line_count": 0
+ },
+ "summary": {
+ "overall": {
+ "n": 209,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 0.9856459330143541,
+ "ci95_low": 0.9665071770334929,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 0.9856459330143541,
+ "ci95_low": 0.9665071770334929,
+ "ci95_high": 1.0,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.9090909090909091,
+ "ci95_low": 0.8660287081339713,
+ "ci95_high": 0.9473684210526315,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.5475977620392481,
+ "ci95_low": 0.5002169896822971,
+ "ci95_high": 0.5922620803707379,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.7410753077466709,
+ "ci95_low": 0.6965480816704321,
+ "ci95_high": 0.7826775226809843,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.8866781011108038,
+ "ci95_low": 0.8437065706038455,
+ "ci95_high": 0.9234270257188399,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.8827035298740187,
+ "ci95_low": 0.8440330478982239,
+ "ci95_high": 0.9171474901237849,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.9090909090909091,
+ "ci95_low": 0.8660287081339713,
+ "ci95_high": 0.9473684210526315,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.08133971291866028,
+ "ci95_low": 0.04784688995215311,
+ "ci95_high": 0.11961722488038277,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.7251170569655743,
+ "ci95_low": 0.6882470166671854,
+ "ci95_high": 0.7622700177027153,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.9002951160186122,
+ "ci95_low": 0.8582124261370087,
+ "ci95_high": 0.9369878554212718,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.6443365348929595,
+ "ci95_low": 0.6033688496395414,
+ "ci95_high": 0.683709321756958,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.9346092503987241,
+ "ci95_low": 0.9059011164274322,
+ "ci95_high": 0.9617224880382775,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.08133971291866028,
+ "ci95_low": 0.04784688995215311,
+ "ci95_high": 0.12440191387559808,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ }
+ },
+ "overall_weighted": {
+ "n": 209,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 0.9850498338870431,
+ "ci95_low": 0.9654036243822076,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 0.9850498338870431,
+ "ci95_low": 0.9654036243822076,
+ "ci95_high": 1.0,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.9069767441860465,
+ "ci95_low": 0.8632619439868204,
+ "ci95_high": 0.945,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.552426406983434,
+ "ci95_low": 0.5056068849200928,
+ "ci95_high": 0.5989406405821298,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.7417654805807454,
+ "ci95_low": 0.6949960144108521,
+ "ci95_high": 0.7846087486054573,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.8845287619528559,
+ "ci95_low": 0.8427290870552795,
+ "ci95_high": 0.922741461841828,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.8808113560793797,
+ "ci95_low": 0.8405362683947608,
+ "ci95_high": 0.9200523377929823,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.9069767441860465,
+ "ci95_low": 0.87,
+ "ci95_high": 0.9467554076539102,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.07807308970099668,
+ "ci95_low": 0.04576271186440678,
+ "ci95_high": 0.11647254575707154,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.7262402165056785,
+ "ci95_low": 0.6862687290881735,
+ "ci95_high": 0.7628186581548497,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.8982549481504909,
+ "ci95_low": 0.8594774831715853,
+ "ci95_high": 0.9360116636617524,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.6470959437820898,
+ "ci95_low": 0.6039216938355516,
+ "ci95_high": 0.686910252633198,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.933001107419712,
+ "ci95_low": 0.9024793388429752,
+ "ci95_high": 0.962032384142937,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.07807308970099668,
+ "ci95_low": 0.04304635761589404,
+ "ci95_high": 0.11774461028192372,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ },
+ "weighting": "schema_complexity",
+ "weight_field_priority": [
+ "schema_complexity",
+ "difficulty"
+ ],
+ "difficulty_weights": {
+ "easy": 1.0,
+ "medium": 2.0,
+ "hard": 3.0
+ }
+ }
+ },
+ "error_analysis": {
+ "top_missing_gt_paths": [
+ {
+ "path": "month",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].date",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].day_of_week",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[0].is_meeting",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[0].name",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[0].time",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[1].is_meeting",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[1].name",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[1].time",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[2].is_meeting",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[2].name",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[2].time",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[3].is_meeting",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[3].name",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[3].time",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[4].is_meeting",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[4].name",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[4].time",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[5].is_meeting",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[5].name",
+ "count": 1
+ }
+ ],
+ "top_missing_required_paths": [
+ {
+ "path": "month",
+ "count": 1
+ },
+ {
+ "path": "year",
+ "count": 1
+ },
+ {
+ "path": "schedule",
+ "count": 1
+ },
+ {
+ "path": "schedule[].day_of_week",
+ "count": 1
+ },
+ {
+ "path": "schedule[].date",
+ "count": 1
+ },
+ {
+ "path": "schedule[].events",
+ "count": 1
+ },
+ {
+ "path": "schedule[].events[].time",
+ "count": 1
+ },
+ {
+ "path": "schedule[].events[].name",
+ "count": 1
+ },
+ {
+ "path": "schedule[].events[].is_meeting",
+ "count": 1
+ },
+ {
+ "path": "other_laser_types",
+ "count": 1
+ },
+ {
+ "path": "other_laser_types[].name",
+ "count": 1
+ },
+ {
+ "path": "conclusions",
+ "count": 1
+ },
+ {
+ "path": "recommended_bibliography",
+ "count": 1
+ },
+ {
+ "path": "recommended_bibliography[].id",
+ "count": 1
+ },
+ {
+ "path": "recommended_bibliography[].authors",
+ "count": 1
+ },
+ {
+ "path": "recommended_bibliography[].title",
+ "count": 1
+ },
+ {
+ "path": "recommended_bibliography[].journal_info",
+ "count": 1
+ },
+ {
+ "path": "table_title",
+ "count": 1
+ },
+ {
+ "path": "base_case_inputs",
+ "count": 1
+ },
+ {
+ "path": "base_case_inputs[].parameter_name",
+ "count": 1
+ }
+ ]
+ }
+}
diff --git a/data/evaluation/image/Qwen3-30B-A3B-Instruct-2507/eval_records.jsonl b/data/evaluation/image/Qwen3-30B-A3B-Instruct-2507/eval_records.jsonl
new file mode 100644
index 0000000..fc82f5c
--- /dev/null
+++ b/data/evaluation/image/Qwen3-30B-A3B-Instruct-2507/eval_records.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:00172789c5001b0c153c098a4bced89ddffd33e868d6065f1176c9d5ecee0dbc
+size 182378
diff --git a/data/evaluation/image/Qwen3-30B-A3B-Instruct-2507/eval_summary.json b/data/evaluation/image/Qwen3-30B-A3B-Instruct-2507/eval_summary.json
new file mode 100644
index 0000000..276886f
--- /dev/null
+++ b/data/evaluation/image/Qwen3-30B-A3B-Instruct-2507/eval_summary.json
@@ -0,0 +1,430 @@
+{
+ "response_file": "data/images_responses/response_Qwen3-30B-A3B-Instruct-2507_image.jsonl",
+ "num_records": 209,
+ "model_ids": [
+ "Qwen/Qwen3-30B-A3B-Instruct-2507"
+ ],
+ "data_quality": {
+ "json_parse_fail_count": 5,
+ "json_non_structured_root_count": 5,
+ "invalid_schema_input_count": 0,
+ "unknown_difficulty_count": 0,
+ "malformed_jsonl_line_count": 0
+ },
+ "summary": {
+ "overall": {
+ "n": 209,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 0.9760765550239234,
+ "ci95_low": 0.9521531100478469,
+ "ci95_high": 0.9952153110047847,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 0.9760765550239234,
+ "ci95_low": 0.9521531100478469,
+ "ci95_high": 0.9952153110047847,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.8851674641148325,
+ "ci95_low": 0.8421052631578947,
+ "ci95_high": 0.9282296650717703,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.5170256821070236,
+ "ci95_low": 0.47193153031069507,
+ "ci95_high": 0.5639553069482756,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.7085530506936046,
+ "ci95_low": 0.663177591683751,
+ "ci95_high": 0.7554693705910928,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.8562322340004953,
+ "ci95_low": 0.8120568051926284,
+ "ci95_high": 0.8985226990501056,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.8506217888473663,
+ "ci95_low": 0.8047028472474136,
+ "ci95_high": 0.8878544051765538,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.8851674641148325,
+ "ci95_low": 0.8373205741626795,
+ "ci95_high": 0.9234449760765551,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.10526315789473684,
+ "ci95_low": 0.06698564593301436,
+ "ci95_high": 0.14832535885167464,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.6939369889337079,
+ "ci95_low": 0.6540415197173666,
+ "ci95_high": 0.736949493450778,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.8736522390256771,
+ "ci95_low": 0.830049299137168,
+ "ci95_high": 0.9131291007955166,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.6127893664003141,
+ "ci95_low": 0.5695001751592066,
+ "ci95_high": 0.6555688298066883,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.9154704944178629,
+ "ci95_low": 0.8803827751196173,
+ "ci95_high": 0.9441786283891548,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.10526315789473684,
+ "ci95_low": 0.06698564593301436,
+ "ci95_high": 0.14832535885167464,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ }
+ },
+ "overall_weighted": {
+ "n": 209,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 0.9767441860465116,
+ "ci95_low": 0.9548494983277592,
+ "ci95_high": 0.995049504950495,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 0.9767441860465116,
+ "ci95_low": 0.9534883720930233,
+ "ci95_high": 0.9950413223140496,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.8837209302325582,
+ "ci95_low": 0.8366666666666667,
+ "ci95_high": 0.9238410596026491,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.5214395541148268,
+ "ci95_low": 0.4720740307578211,
+ "ci95_high": 0.5704794300179254,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.7102705619940225,
+ "ci95_low": 0.6602562341862664,
+ "ci95_high": 0.7560421089734237,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.8542859641210138,
+ "ci95_low": 0.8079237758294121,
+ "ci95_high": 0.8969924021309268,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.8491071199701832,
+ "ci95_low": 0.8034489346076047,
+ "ci95_high": 0.8897433093200564,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.8837209302325582,
+ "ci95_low": 0.8380165289256198,
+ "ci95_high": 0.9248747913188647,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.10299003322259136,
+ "ci95_low": 0.06700167504187604,
+ "ci95_high": 0.14735099337748345,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.6953320267432876,
+ "ci95_low": 0.6541252612871202,
+ "ci95_high": 0.7379661959260425,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.8721829934784331,
+ "ci95_low": 0.8283448484501655,
+ "ci95_high": 0.9172818990270705,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.6158550580544246,
+ "ci95_low": 0.5742887998920191,
+ "ci95_high": 0.6571210293752923,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.9147286821705426,
+ "ci95_low": 0.8782894736842105,
+ "ci95_high": 0.9466666666666667,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.10299003322259136,
+ "ci95_low": 0.06239737274220033,
+ "ci95_high": 0.14691151919866444,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ },
+ "weighting": "schema_complexity",
+ "weight_field_priority": [
+ "schema_complexity",
+ "difficulty"
+ ],
+ "difficulty_weights": {
+ "easy": 1.0,
+ "medium": 2.0,
+ "hard": 3.0
+ }
+ }
+ },
+ "error_analysis": {
+ "top_missing_gt_paths": [
+ {
+ "path": "month",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].date",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].day_of_week",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[0].is_meeting",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[0].name",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[0].time",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[1].is_meeting",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[1].name",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[1].time",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[2].is_meeting",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[2].name",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[2].time",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[3].is_meeting",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[3].name",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[3].time",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[4].is_meeting",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[4].name",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[4].time",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[5].is_meeting",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[5].name",
+ "count": 1
+ }
+ ],
+ "top_missing_required_paths": [
+ {
+ "path": "month",
+ "count": 1
+ },
+ {
+ "path": "year",
+ "count": 1
+ },
+ {
+ "path": "schedule",
+ "count": 1
+ },
+ {
+ "path": "schedule[].day_of_week",
+ "count": 1
+ },
+ {
+ "path": "schedule[].date",
+ "count": 1
+ },
+ {
+ "path": "schedule[].events",
+ "count": 1
+ },
+ {
+ "path": "schedule[].events[].time",
+ "count": 1
+ },
+ {
+ "path": "schedule[].events[].name",
+ "count": 1
+ },
+ {
+ "path": "schedule[].events[].is_meeting",
+ "count": 1
+ },
+ {
+ "path": "other_laser_types",
+ "count": 1
+ },
+ {
+ "path": "other_laser_types[].name",
+ "count": 1
+ },
+ {
+ "path": "conclusions",
+ "count": 1
+ },
+ {
+ "path": "recommended_bibliography",
+ "count": 1
+ },
+ {
+ "path": "recommended_bibliography[].id",
+ "count": 1
+ },
+ {
+ "path": "recommended_bibliography[].authors",
+ "count": 1
+ },
+ {
+ "path": "recommended_bibliography[].title",
+ "count": 1
+ },
+ {
+ "path": "recommended_bibliography[].journal_info",
+ "count": 1
+ },
+ {
+ "path": "table_title",
+ "count": 1
+ },
+ {
+ "path": "base_case_inputs",
+ "count": 1
+ },
+ {
+ "path": "base_case_inputs[].parameter_name",
+ "count": 1
+ }
+ ]
+ }
+}
diff --git a/data/evaluation/image/Qwen3.5-35B-A3B/eval_records.jsonl b/data/evaluation/image/Qwen3.5-35B-A3B/eval_records.jsonl
new file mode 100644
index 0000000..c129a60
--- /dev/null
+++ b/data/evaluation/image/Qwen3.5-35B-A3B/eval_records.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ca26738a18ed259b2c2b09c2e8f236be73b6f9c2e88b28803c70f5cda92591af
+size 174114
diff --git a/data/evaluation/image/Qwen3.5-35B-A3B/eval_summary.json b/data/evaluation/image/Qwen3.5-35B-A3B/eval_summary.json
new file mode 100644
index 0000000..3670129
--- /dev/null
+++ b/data/evaluation/image/Qwen3.5-35B-A3B/eval_summary.json
@@ -0,0 +1,430 @@
+{
+ "response_file": "data/images_responses/response_Qwen3.5-35B-A3B_image.jsonl",
+ "num_records": 209,
+ "model_ids": [
+ "Qwen/Qwen3.5-35B-A3B"
+ ],
+ "data_quality": {
+ "json_parse_fail_count": 5,
+ "json_non_structured_root_count": 5,
+ "invalid_schema_input_count": 0,
+ "unknown_difficulty_count": 0,
+ "malformed_jsonl_line_count": 0
+ },
+ "summary": {
+ "overall": {
+ "n": 209,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 0.9760765550239234,
+ "ci95_low": 0.9521531100478469,
+ "ci95_high": 0.9952153110047847,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 0.9760765550239234,
+ "ci95_low": 0.9521531100478469,
+ "ci95_high": 0.9952153110047847,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.8564593301435407,
+ "ci95_low": 0.8038277511961722,
+ "ci95_high": 0.8995215311004785,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.5457764656471178,
+ "ci95_low": 0.4968260673485736,
+ "ci95_high": 0.5949782532176893,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.7157037780619488,
+ "ci95_low": 0.6678018134417617,
+ "ci95_high": 0.7616999065292073,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.8368859206672693,
+ "ci95_low": 0.7882694890755966,
+ "ci95_high": 0.8850011841685638,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.8318474569340153,
+ "ci95_low": 0.7844882742833287,
+ "ci95_high": 0.8749621710279943,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.8564593301435407,
+ "ci95_low": 0.8038277511961722,
+ "ci95_high": 0.9043062200956937,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.10047846889952153,
+ "ci95_low": 0.06220095693779904,
+ "ci95_high": 0.14354066985645933,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.6994553881254453,
+ "ci95_low": 0.6588080259454268,
+ "ci95_high": 0.7433657230222565,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.8482553724070323,
+ "ci95_low": 0.7960026863763052,
+ "ci95_high": 0.8918157832176392,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.6307401218545334,
+ "ci95_low": 0.5803459345693692,
+ "ci95_high": 0.6719575947325611,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.8963317384370016,
+ "ci95_low": 0.8628389154704945,
+ "ci95_high": 0.9282296650717703,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.10047846889952153,
+ "ci95_low": 0.06220095693779904,
+ "ci95_high": 0.14354066985645933,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ }
+ },
+ "overall_weighted": {
+ "n": 209,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 0.9767441860465116,
+ "ci95_low": 0.9533333333333334,
+ "ci95_high": 0.995049504950495,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 0.9767441860465116,
+ "ci95_low": 0.9543230016313213,
+ "ci95_high": 0.995049504950495,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.8538205980066446,
+ "ci95_low": 0.8003355704697986,
+ "ci95_high": 0.8990066225165563,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.5497369959773195,
+ "ci95_low": 0.49846824755725333,
+ "ci95_high": 0.6016381966452143,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.7174848193518577,
+ "ci95_low": 0.6681320389746879,
+ "ci95_high": 0.7652224286376529,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.8342673674748519,
+ "ci95_low": 0.7855170967509938,
+ "ci95_high": 0.8793450001276872,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.8294552262044126,
+ "ci95_low": 0.7775311727630003,
+ "ci95_high": 0.8768093568366971,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.8538205980066446,
+ "ci95_low": 0.8059701492537313,
+ "ci95_high": 0.900990099009901,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.09800664451827243,
+ "ci95_low": 0.057947019867549666,
+ "ci95_high": 0.14144736842105263,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.7004963942680097,
+ "ci95_low": 0.6542447512828945,
+ "ci95_high": 0.7460862637005548,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.8456988074059006,
+ "ci95_low": 0.7962728248080696,
+ "ci95_high": 0.8929524859313152,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.6336109076645885,
+ "ci95_low": 0.5909922205637855,
+ "ci95_high": 0.676093371149852,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.8947951273532668,
+ "ci95_low": 0.8582502768549279,
+ "ci95_high": 0.9316804407713498,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.09800664451827243,
+ "ci95_low": 0.06271186440677966,
+ "ci95_high": 0.13856427378964942,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ },
+ "weighting": "schema_complexity",
+ "weight_field_priority": [
+ "schema_complexity",
+ "difficulty"
+ ],
+ "difficulty_weights": {
+ "easy": 1.0,
+ "medium": 2.0,
+ "hard": 3.0
+ }
+ }
+ },
+ "error_analysis": {
+ "top_missing_gt_paths": [
+ {
+ "path": "month",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].date",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].day_of_week",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[0].is_meeting",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[0].name",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[0].time",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[1].is_meeting",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[1].name",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[1].time",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[2].is_meeting",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[2].name",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[2].time",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[3].is_meeting",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[3].name",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[3].time",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[4].is_meeting",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[4].name",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[4].time",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[5].is_meeting",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[5].name",
+ "count": 1
+ }
+ ],
+ "top_missing_required_paths": [
+ {
+ "path": "month",
+ "count": 1
+ },
+ {
+ "path": "year",
+ "count": 1
+ },
+ {
+ "path": "schedule",
+ "count": 1
+ },
+ {
+ "path": "schedule[].day_of_week",
+ "count": 1
+ },
+ {
+ "path": "schedule[].date",
+ "count": 1
+ },
+ {
+ "path": "schedule[].events",
+ "count": 1
+ },
+ {
+ "path": "schedule[].events[].time",
+ "count": 1
+ },
+ {
+ "path": "schedule[].events[].name",
+ "count": 1
+ },
+ {
+ "path": "schedule[].events[].is_meeting",
+ "count": 1
+ },
+ {
+ "path": "other_laser_types",
+ "count": 1
+ },
+ {
+ "path": "other_laser_types[].name",
+ "count": 1
+ },
+ {
+ "path": "conclusions",
+ "count": 1
+ },
+ {
+ "path": "recommended_bibliography",
+ "count": 1
+ },
+ {
+ "path": "recommended_bibliography[].id",
+ "count": 1
+ },
+ {
+ "path": "recommended_bibliography[].authors",
+ "count": 1
+ },
+ {
+ "path": "recommended_bibliography[].title",
+ "count": 1
+ },
+ {
+ "path": "recommended_bibliography[].journal_info",
+ "count": 1
+ },
+ {
+ "path": "table_title",
+ "count": 1
+ },
+ {
+ "path": "base_case_inputs",
+ "count": 1
+ },
+ {
+ "path": "base_case_inputs[].parameter_name",
+ "count": 1
+ }
+ ]
+ }
+}
diff --git a/data/evaluation/image/claude-sonnet-4-6/eval_records.jsonl b/data/evaluation/image/claude-sonnet-4-6/eval_records.jsonl
new file mode 100644
index 0000000..2d25a0d
--- /dev/null
+++ b/data/evaluation/image/claude-sonnet-4-6/eval_records.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:42ec2549b4100fed89bb62e83d7e925a06e2a70d8b14abb95757bc6c20e3613b
+size 140534
diff --git a/data/evaluation/image/claude-sonnet-4-6/eval_summary.json b/data/evaluation/image/claude-sonnet-4-6/eval_summary.json
new file mode 100644
index 0000000..a242098
--- /dev/null
+++ b/data/evaluation/image/claude-sonnet-4-6/eval_summary.json
@@ -0,0 +1,402 @@
+{
+ "response_file": "data/images_responses/response_claude-sonnet-4-6_image.jsonl",
+ "num_records": 209,
+ "model_ids": [
+ "claude-sonnet-4-6"
+ ],
+ "data_quality": {
+ "json_parse_fail_count": 1,
+ "json_non_structured_root_count": 1,
+ "invalid_schema_input_count": 0,
+ "unknown_difficulty_count": 0,
+ "malformed_jsonl_line_count": 0
+ },
+ "summary": {
+ "overall": {
+ "n": 209,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 0.9952153110047847,
+ "ci95_low": 0.9856459330143541,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 0.9952153110047847,
+ "ci95_low": 0.9856459330143541,
+ "ci95_high": 1.0,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.8899521531100478,
+ "ci95_low": 0.84688995215311,
+ "ci95_high": 0.9330143540669856,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.46322408724942715,
+ "ci95_low": 0.4171161439663302,
+ "ci95_high": 0.5113184088361302,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.7127956535262018,
+ "ci95_low": 0.666699005317852,
+ "ci95_high": 0.7560257092312607,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.8796547277497109,
+ "ci95_low": 0.8360024347600025,
+ "ci95_high": 0.9206907141988876,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.8576988269883881,
+ "ci95_low": 0.8149811794607668,
+ "ci95_high": 0.8952029303290155,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.8899521531100478,
+ "ci95_low": 0.8421052631578947,
+ "ci95_high": 0.9282296650717703,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.07655502392344497,
+ "ci95_low": 0.0430622009569378,
+ "ci95_high": 0.11483253588516747,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.6852248228417799,
+ "ci95_low": 0.6487369353328891,
+ "ci95_high": 0.7224483027476718,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.879201044402828,
+ "ci95_low": 0.8366193714904446,
+ "ci95_high": 0.9175212137719068,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.5880098703878145,
+ "ci95_low": 0.5471670036696672,
+ "ci95_high": 0.6302831162506575,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.9250398724082936,
+ "ci95_low": 0.8931419457735247,
+ "ci95_high": 0.9521531100478469,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.07655502392344497,
+ "ci95_low": 0.0430622009569378,
+ "ci95_high": 0.11483253588516747,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ }
+ },
+ "overall_weighted": {
+ "n": 209,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 0.9950166112956811,
+ "ci95_low": 0.9849246231155779,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 0.9950166112956811,
+ "ci95_low": 0.9849749582637729,
+ "ci95_high": 1.0,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.8870431893687708,
+ "ci95_low": 0.8430717863105175,
+ "ci95_high": 0.9271523178807947,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.4661018625555611,
+ "ci95_low": 0.41724579744514123,
+ "ci95_high": 0.51049626165023,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.7140311166650616,
+ "ci95_low": 0.664476360314895,
+ "ci95_high": 0.7596140437978912,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.876725861867979,
+ "ci95_low": 0.831202678254868,
+ "ci95_high": 0.9195006115869611,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.8553356827093228,
+ "ci95_low": 0.8097726580188132,
+ "ci95_high": 0.8967390957827666,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.8870431893687708,
+ "ci95_low": 0.841845140032949,
+ "ci95_high": 0.9283333333333333,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.07142857142857142,
+ "ci95_low": 0.040983606557377046,
+ "ci95_high": 0.10720268006700168,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.6856196136962005,
+ "ci95_low": 0.6402084142374221,
+ "ci95_high": 0.7268032657230978,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.8764740204822881,
+ "ci95_low": 0.8349663262192057,
+ "ci95_high": 0.9166069810497288,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.5900664896103114,
+ "ci95_low": 0.5489942566045877,
+ "ci95_high": 0.6321123651910036,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.9230343300110742,
+ "ci95_low": 0.8922222222222222,
+ "ci95_high": 0.9529346622369878,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.07142857142857142,
+ "ci95_low": 0.03966942148760331,
+ "ci95_high": 0.10942760942760943,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ },
+ "weighting": "schema_complexity",
+ "weight_field_priority": [
+ "schema_complexity",
+ "difficulty"
+ ],
+ "difficulty_weights": {
+ "easy": 1.0,
+ "medium": 2.0,
+ "hard": 3.0
+ }
+ }
+ },
+ "error_analysis": {
+ "top_missing_gt_paths": [
+ {
+ "path": "key_advantages[10]",
+ "count": 1
+ },
+ {
+ "path": "key_advantages[11]",
+ "count": 1
+ },
+ {
+ "path": "spring_mechanisms[3].benefit",
+ "count": 1
+ },
+ {
+ "path": "spring_mechanisms[3].configuration",
+ "count": 1
+ },
+ {
+ "path": "spring_mechanisms[3].type",
+ "count": 1
+ },
+ {
+ "path": "spring_mechanisms[4].benefit",
+ "count": 1
+ },
+ {
+ "path": "spring_mechanisms[4].type",
+ "count": 1
+ },
+ {
+ "path": "inner_model_assessment.r_squared_interpretation[4].context",
+ "count": 1
+ },
+ {
+ "path": "inner_model_assessment.r_squared_interpretation[4].description",
+ "count": 1
+ },
+ {
+ "path": "inner_model_assessment.r_squared_interpretation[4].r_squared_value",
+ "count": 1
+ },
+ {
+ "path": "application_examples[0]",
+ "count": 1
+ },
+ {
+ "path": "application_examples[1]",
+ "count": 1
+ },
+ {
+ "path": "application_examples[2]",
+ "count": 1
+ },
+ {
+ "path": "application_examples[3]",
+ "count": 1
+ },
+ {
+ "path": "application_examples[4]",
+ "count": 1
+ },
+ {
+ "path": "application_programs[0]",
+ "count": 1
+ },
+ {
+ "path": "application_programs[1]",
+ "count": 1
+ },
+ {
+ "path": "application_programs[2]",
+ "count": 1
+ },
+ {
+ "path": "application_programs[3]",
+ "count": 1
+ },
+ {
+ "path": "available_languages[0]",
+ "count": 1
+ }
+ ],
+ "top_missing_required_paths": [
+ {
+ "path": "software_name",
+ "count": 1
+ },
+ {
+ "path": "programming_modes",
+ "count": 1
+ },
+ {
+ "path": "programming_modes[].name",
+ "count": 1
+ },
+ {
+ "path": "programming_modes[].description",
+ "count": 1
+ },
+ {
+ "path": "available_languages",
+ "count": 1
+ },
+ {
+ "path": "supported_operating_systems",
+ "count": 1
+ },
+ {
+ "path": "application_programs",
+ "count": 1
+ },
+ {
+ "path": "expert_programs",
+ "count": 1
+ },
+ {
+ "path": "key_performance_features",
+ "count": 1
+ },
+ {
+ "path": "download_information",
+ "count": 1
+ },
+ {
+ "path": "download_information.website",
+ "count": 1
+ },
+ {
+ "path": "download_information.other_option",
+ "count": 1
+ },
+ {
+ "path": "application_examples",
+ "count": 1
+ }
+ ]
+ }
+}
diff --git a/data/evaluation/image/gemini-2.5-flash/eval_records.jsonl b/data/evaluation/image/gemini-2.5-flash/eval_records.jsonl
new file mode 100644
index 0000000..f78958c
--- /dev/null
+++ b/data/evaluation/image/gemini-2.5-flash/eval_records.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3d8e1cb30fbb024f06cc1e010f2d2e0d4c0784018a7607248b65c1403c1bfc8d
+size 269576
diff --git a/data/evaluation/image/gemini-2.5-flash/eval_summary.json b/data/evaluation/image/gemini-2.5-flash/eval_summary.json
new file mode 100644
index 0000000..a66dcfb
--- /dev/null
+++ b/data/evaluation/image/gemini-2.5-flash/eval_summary.json
@@ -0,0 +1,430 @@
+{
+ "response_file": "data/images_responses/response_gemini-2.5-flash_image.jsonl",
+ "num_records": 209,
+ "model_ids": [
+ "gemini-2.5-flash"
+ ],
+ "data_quality": {
+ "json_parse_fail_count": 43,
+ "json_non_structured_root_count": 43,
+ "invalid_schema_input_count": 0,
+ "unknown_difficulty_count": 0,
+ "malformed_jsonl_line_count": 0
+ },
+ "summary": {
+ "overall": {
+ "n": 209,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 0.7942583732057417,
+ "ci95_low": 0.7368421052631579,
+ "ci95_high": 0.84688995215311,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 0.7942583732057417,
+ "ci95_low": 0.7368421052631579,
+ "ci95_high": 0.84688995215311,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.7942583732057417,
+ "ci95_low": 0.7320574162679426,
+ "ci95_high": 0.8516746411483254,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.5581504918558131,
+ "ci95_low": 0.5069925412207336,
+ "ci95_high": 0.6055466958326258,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.6942705493708262,
+ "ci95_low": 0.6409802852844902,
+ "ci95_high": 0.7449018604901195,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.7839198789649342,
+ "ci95_low": 0.7302441350752867,
+ "ci95_high": 0.8339612780748692,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.7735826719741372,
+ "ci95_low": 0.7232873799549806,
+ "ci95_high": 0.8271017251164885,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.7942583732057417,
+ "ci95_low": 0.7320574162679426,
+ "ci95_high": 0.8516746411483254,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.1339712918660287,
+ "ci95_low": 0.09090909090909091,
+ "ci95_high": 0.18181818181818182,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.6787803067305246,
+ "ci95_low": 0.6294875896696748,
+ "ci95_high": 0.7298248912254501,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.7873664727952068,
+ "ci95_low": 0.7340260377266811,
+ "ci95_high": 0.8403236685373507,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.6262105206133196,
+ "ci95_low": 0.5767872063536326,
+ "ci95_high": 0.6753797994655193,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.7942583732057417,
+ "ci95_low": 0.7368421052631579,
+ "ci95_high": 0.84688995215311,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.1339712918660287,
+ "ci95_low": 0.0861244019138756,
+ "ci95_high": 0.17703349282296652,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ }
+ },
+ "overall_weighted": {
+ "n": 209,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 0.7857142857142857,
+ "ci95_low": 0.7286184210526315,
+ "ci95_high": 0.8447412353923205,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 0.7857142857142857,
+ "ci95_low": 0.728171334431631,
+ "ci95_high": 0.8410596026490066,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.7857142857142857,
+ "ci95_low": 0.7286432160804021,
+ "ci95_high": 0.8344481605351171,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.5588880686568083,
+ "ci95_low": 0.5061474769678147,
+ "ci95_high": 0.6146928770085549,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.6910651659956816,
+ "ci95_low": 0.6320942498753812,
+ "ci95_high": 0.7391344916952974,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.7760184623195415,
+ "ci95_low": 0.7214062725044672,
+ "ci95_high": 0.8324139585339299,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.765658131440789,
+ "ci95_low": 0.7080083441431956,
+ "ci95_high": 0.8197933006746391,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.7857142857142857,
+ "ci95_low": 0.7299509001636661,
+ "ci95_high": 0.841845140032949,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.12956810631229235,
+ "ci95_low": 0.0858085808580858,
+ "ci95_high": 0.1812191103789127,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.6753238989906771,
+ "ci95_low": 0.6209115911272619,
+ "ci95_high": 0.724523646525512,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.7790289009564535,
+ "ci95_low": 0.7272977595471399,
+ "ci95_high": 0.8327368464428543,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.6249766173262449,
+ "ci95_low": 0.5766569869247319,
+ "ci95_high": 0.676129354504673,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.7857142857142857,
+ "ci95_low": 0.7312703583061889,
+ "ci95_high": 0.8360927152317881,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.12956810631229235,
+ "ci95_low": 0.08609271523178808,
+ "ci95_high": 0.17637271214642264,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ },
+ "weighting": "schema_complexity",
+ "weight_field_priority": [
+ "schema_complexity",
+ "difficulty"
+ ],
+ "difficulty_weights": {
+ "easy": 1.0,
+ "medium": 2.0,
+ "hard": 3.0
+ }
+ }
+ },
+ "error_analysis": {
+ "top_missing_gt_paths": [
+ {
+ "path": "associated_problems[5]",
+ "count": 1
+ },
+ {
+ "path": "postdoc_details.development_areas[1]",
+ "count": 1
+ },
+ {
+ "path": "chemical_formula",
+ "count": 1
+ },
+ {
+ "path": "etymology.color_allusion",
+ "count": 1
+ },
+ {
+ "path": "etymology.meaning",
+ "count": 1
+ },
+ {
+ "path": "etymology.origin_language",
+ "count": 1
+ },
+ {
+ "path": "etymology.root_word",
+ "count": 1
+ },
+ {
+ "path": "formation_process_summary",
+ "count": 1
+ },
+ {
+ "path": "general_description",
+ "count": 1
+ },
+ {
+ "path": "geographic_sources[0].country",
+ "count": 1
+ },
+ {
+ "path": "geographic_sources[0].location_details",
+ "count": 1
+ },
+ {
+ "path": "geographic_sources[0].mine_name",
+ "count": 1
+ },
+ {
+ "path": "geographic_sources[0].status",
+ "count": 1
+ },
+ {
+ "path": "geographic_sources[1].country",
+ "count": 1
+ },
+ {
+ "path": "geographic_sources[1].location_details",
+ "count": 1
+ },
+ {
+ "path": "geographic_sources[1].mine_name",
+ "count": 1
+ },
+ {
+ "path": "geographic_sources[1].status",
+ "count": 1
+ },
+ {
+ "path": "geographic_sources[2].country",
+ "count": 1
+ },
+ {
+ "path": "geographic_sources[2].location_details",
+ "count": 1
+ },
+ {
+ "path": "geographic_sources[2].mine_name",
+ "count": 1
+ }
+ ],
+ "top_missing_required_paths": [
+ {
+ "path": "mineral_name",
+ "count": 1
+ },
+ {
+ "path": "general_description",
+ "count": 1
+ },
+ {
+ "path": "etymology",
+ "count": 1
+ },
+ {
+ "path": "etymology.origin_language",
+ "count": 1
+ },
+ {
+ "path": "etymology.root_word",
+ "count": 1
+ },
+ {
+ "path": "etymology.meaning",
+ "count": 1
+ },
+ {
+ "path": "etymology.color_allusion",
+ "count": 1
+ },
+ {
+ "path": "chemical_formula",
+ "count": 1
+ },
+ {
+ "path": "formation_process_summary",
+ "count": 1
+ },
+ {
+ "path": "physical_properties",
+ "count": 1
+ },
+ {
+ "path": "physical_properties.usual_occurrence_modes",
+ "count": 1
+ },
+ {
+ "path": "physical_properties.fracture_appearance",
+ "count": 1
+ },
+ {
+ "path": "physical_properties.color_varieties",
+ "count": 1
+ },
+ {
+ "path": "historical_identifications",
+ "count": 1
+ },
+ {
+ "path": "historical_identifications[].source",
+ "count": 1
+ },
+ {
+ "path": "historical_identifications[].identified_name",
+ "count": 1
+ },
+ {
+ "path": "geographic_sources",
+ "count": 1
+ },
+ {
+ "path": "geographic_sources[].country",
+ "count": 1
+ },
+ {
+ "path": "geographic_sources[].status",
+ "count": 1
+ },
+ {
+ "path": "notable_specimens",
+ "count": 1
+ }
+ ]
+ }
+}
diff --git a/data/evaluation/image/gemini-3-flash/eval_records.jsonl b/data/evaluation/image/gemini-3-flash/eval_records.jsonl
new file mode 100644
index 0000000..8572369
--- /dev/null
+++ b/data/evaluation/image/gemini-3-flash/eval_records.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5366eff95fcebd23297023e4df6c3fd88cd841cbea2346923a3d3ca387fc6413
+size 263990
diff --git a/data/evaluation/image/gemini-3-flash/eval_summary.json b/data/evaluation/image/gemini-3-flash/eval_summary.json
new file mode 100644
index 0000000..230c0bb
--- /dev/null
+++ b/data/evaluation/image/gemini-3-flash/eval_summary.json
@@ -0,0 +1,430 @@
+{
+ "response_file": "data/images_responses/response_gemini-3-flash_image.jsonl",
+ "num_records": 209,
+ "model_ids": [
+ "gemini-3-flash-preview"
+ ],
+ "data_quality": {
+ "json_parse_fail_count": 48,
+ "json_non_structured_root_count": 48,
+ "invalid_schema_input_count": 0,
+ "unknown_difficulty_count": 0,
+ "malformed_jsonl_line_count": 0
+ },
+ "summary": {
+ "overall": {
+ "n": 209,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 0.7703349282296651,
+ "ci95_low": 0.7081339712918661,
+ "ci95_high": 0.8325358851674641,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 0.7703349282296651,
+ "ci95_low": 0.7081339712918661,
+ "ci95_high": 0.8229665071770335,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.7703349282296651,
+ "ci95_low": 0.7129186602870813,
+ "ci95_high": 0.8229665071770335,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.5241731465596917,
+ "ci95_low": 0.47327316703732203,
+ "ci95_high": 0.5767290497794737,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.6552737829624004,
+ "ci95_low": 0.601076429695972,
+ "ci95_high": 0.7053859569834393,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.7592970635301592,
+ "ci95_low": 0.6997169195502421,
+ "ci95_high": 0.8156938651550499,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.7457369289287286,
+ "ci95_low": 0.6890064704967206,
+ "ci95_high": 0.8038153437756722,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.7703349282296651,
+ "ci95_low": 0.7081339712918661,
+ "ci95_high": 0.8277511961722488,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.12440191387559808,
+ "ci95_low": 0.08133971291866028,
+ "ci95_high": 0.1674641148325359,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.6462479976840838,
+ "ci95_low": 0.5928770756325159,
+ "ci95_high": 0.6991945094310109,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.7621355951293529,
+ "ci95_low": 0.7056910089279259,
+ "ci95_high": 0.8145612686067176,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.589723464761046,
+ "ci95_low": 0.5373442809253007,
+ "ci95_high": 0.6412819698237937,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.7703349282296651,
+ "ci95_low": 0.7129186602870813,
+ "ci95_high": 0.8277511961722488,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.12440191387559808,
+ "ci95_low": 0.0861244019138756,
+ "ci95_high": 0.1722488038277512,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ }
+ },
+ "overall_weighted": {
+ "n": 209,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 0.7641196013289037,
+ "ci95_low": 0.7090909090909091,
+ "ci95_high": 0.8174204355108877,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 0.7641196013289037,
+ "ci95_low": 0.7036423841059603,
+ "ci95_high": 0.8256578947368421,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.7641196013289037,
+ "ci95_low": 0.7019867549668874,
+ "ci95_high": 0.8219633943427621,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.5248226278176488,
+ "ci95_low": 0.4710848697211861,
+ "ci95_high": 0.5788175760614576,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.6534524123718818,
+ "ci95_low": 0.6022324642848425,
+ "ci95_high": 0.7080773686135151,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.7530469720073557,
+ "ci95_low": 0.6945227538748344,
+ "ci95_high": 0.8061623117054213,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.7399907251293643,
+ "ci95_low": 0.6762338877868826,
+ "ci95_high": 0.7976507911652665,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.7641196013289037,
+ "ci95_low": 0.7086092715231788,
+ "ci95_high": 0.8222591362126246,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.12458471760797342,
+ "ci95_low": 0.08166666666666667,
+ "ci95_high": 0.17081260364842454,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.6437740040656287,
+ "ci95_low": 0.5900312279325025,
+ "ci95_high": 0.6967100781667901,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.7560766425957238,
+ "ci95_low": 0.69539909946387,
+ "ci95_high": 0.8117511114271591,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.5891375200947653,
+ "ci95_low": 0.5413764001320697,
+ "ci95_high": 0.6393743623716094,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.7641196013289037,
+ "ci95_low": 0.7036423841059603,
+ "ci95_high": 0.8211920529801324,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.12458471760797342,
+ "ci95_low": 0.08196721311475409,
+ "ci95_high": 0.17224080267558528,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ },
+ "weighting": "schema_complexity",
+ "weight_field_priority": [
+ "schema_complexity",
+ "difficulty"
+ ],
+ "difficulty_weights": {
+ "easy": 1.0,
+ "medium": 2.0,
+ "hard": 3.0
+ }
+ }
+ },
+ "error_analysis": {
+ "top_missing_gt_paths": [
+ {
+ "path": "letter_date",
+ "count": 2
+ },
+ {
+ "path": "authors[0].name",
+ "count": 2
+ },
+ {
+ "path": "authors[1].name",
+ "count": 2
+ },
+ {
+ "path": "year",
+ "count": 2
+ },
+ {
+ "path": "accident_type_investigated",
+ "count": 1
+ },
+ {
+ "path": "cited_previous_research[0].authors",
+ "count": 1
+ },
+ {
+ "path": "cited_previous_research[0].key_finding",
+ "count": 1
+ },
+ {
+ "path": "cited_previous_research[0].year",
+ "count": 1
+ },
+ {
+ "path": "cited_previous_research[1].authors",
+ "count": 1
+ },
+ {
+ "path": "cited_previous_research[1].country_of_research",
+ "count": 1
+ },
+ {
+ "path": "cited_previous_research[1].key_finding",
+ "count": 1
+ },
+ {
+ "path": "cited_previous_research[1].risk_factors_identified[0]",
+ "count": 1
+ },
+ {
+ "path": "cited_previous_research[1].risk_factors_identified[1]",
+ "count": 1
+ },
+ {
+ "path": "cited_previous_research[1].risk_factors_identified[2]",
+ "count": 1
+ },
+ {
+ "path": "cited_previous_research[1].year",
+ "count": 1
+ },
+ {
+ "path": "study_main_focus",
+ "count": 1
+ },
+ {
+ "path": "study_methodology_steps[0]",
+ "count": 1
+ },
+ {
+ "path": "study_methodology_steps[1]",
+ "count": 1
+ },
+ {
+ "path": "study_methodology_steps[2]",
+ "count": 1
+ },
+ {
+ "path": "study_objectives[0]",
+ "count": 1
+ }
+ ],
+ "top_missing_required_paths": [
+ {
+ "path": "letter_date",
+ "count": 2
+ },
+ {
+ "path": "authors",
+ "count": 2
+ },
+ {
+ "path": "authors[].name",
+ "count": 2
+ },
+ {
+ "path": "year",
+ "count": 2
+ },
+ {
+ "path": "study_main_focus",
+ "count": 1
+ },
+ {
+ "path": "accident_type_investigated",
+ "count": 1
+ },
+ {
+ "path": "study_objectives",
+ "count": 1
+ },
+ {
+ "path": "study_methodology_steps",
+ "count": 1
+ },
+ {
+ "path": "cited_previous_research",
+ "count": 1
+ },
+ {
+ "path": "cited_previous_research[].authors",
+ "count": 1
+ },
+ {
+ "path": "cited_previous_research[].year",
+ "count": 1
+ },
+ {
+ "path": "cited_previous_research[].key_finding",
+ "count": 1
+ },
+ {
+ "path": "determinant_property_489",
+ "count": 1
+ },
+ {
+ "path": "determinant_property_489.section_number",
+ "count": 1
+ },
+ {
+ "path": "determinant_property_489.rule_description",
+ "count": 1
+ },
+ {
+ "path": "determinant_property_489.mathematical_illustration_step_1",
+ "count": 1
+ },
+ {
+ "path": "determinant_property_489.mathematical_illustration_step_2",
+ "count": 1
+ },
+ {
+ "path": "determinant_property_489.consequence_of_proof",
+ "count": 1
+ },
+ {
+ "path": "example_determinant_evaluation",
+ "count": 1
+ },
+ {
+ "path": "example_determinant_evaluation.initial_determinant_expression",
+ "count": 1
+ }
+ ]
+ }
+}
diff --git a/data/evaluation/image/gemma-3-27b-it/eval_records.jsonl b/data/evaluation/image/gemma-3-27b-it/eval_records.jsonl
new file mode 100644
index 0000000..6153445
--- /dev/null
+++ b/data/evaluation/image/gemma-3-27b-it/eval_records.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:227cbbfaf9fe9a59fac8529b87f4566bdf550a6f30cae6b118ba3f09c9fd5dc6
+size 175797
diff --git a/data/evaluation/image/gemma-3-27b-it/eval_summary.json b/data/evaluation/image/gemma-3-27b-it/eval_summary.json
new file mode 100644
index 0000000..035b930
--- /dev/null
+++ b/data/evaluation/image/gemma-3-27b-it/eval_summary.json
@@ -0,0 +1,430 @@
+{
+ "response_file": "data/images_responses/response_gemma-3-27b-it_image.jsonl",
+ "num_records": 209,
+ "model_ids": [
+ "google/gemma-3-27b-it"
+ ],
+ "data_quality": {
+ "json_parse_fail_count": 6,
+ "json_non_structured_root_count": 6,
+ "invalid_schema_input_count": 0,
+ "unknown_difficulty_count": 0,
+ "malformed_jsonl_line_count": 0
+ },
+ "summary": {
+ "overall": {
+ "n": 209,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 0.9712918660287081,
+ "ci95_low": 0.9473684210526315,
+ "ci95_high": 0.9904306220095693,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 0.9712918660287081,
+ "ci95_low": 0.9473684210526315,
+ "ci95_high": 0.9904306220095693,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.8803827751196173,
+ "ci95_low": 0.8325358851674641,
+ "ci95_high": 0.9234449760765551,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.5467280923059646,
+ "ci95_low": 0.4976612683100865,
+ "ci95_high": 0.59393831419269,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.7149564466259734,
+ "ci95_low": 0.6708155162381643,
+ "ci95_high": 0.7573040784475592,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.8566470873712374,
+ "ci95_low": 0.8086190426896136,
+ "ci95_high": 0.8955281405228324,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.8534662639453409,
+ "ci95_low": 0.808879780104232,
+ "ci95_high": 0.8932890848063835,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.8803827751196173,
+ "ci95_low": 0.8325358851674641,
+ "ci95_high": 0.9186602870813397,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.09090909090909091,
+ "ci95_low": 0.05741626794258373,
+ "ci95_high": 0.1291866028708134,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.7061105421010584,
+ "ci95_low": 0.6636156034213323,
+ "ci95_high": 0.7469680455147353,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.8714106047281918,
+ "ci95_low": 0.82532374853866,
+ "ci95_high": 0.9126295936956825,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.630842269465969,
+ "ci95_low": 0.5868141927532468,
+ "ci95_high": 0.6729462137576792,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.9106858054226475,
+ "ci95_low": 0.8771929824561404,
+ "ci95_high": 0.9425837320574163,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.09090909090909091,
+ "ci95_low": 0.05741626794258373,
+ "ci95_high": 0.1291866028708134,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ }
+ },
+ "overall_weighted": {
+ "n": 209,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 0.973421926910299,
+ "ci95_low": 0.9517470881863561,
+ "ci95_high": 0.9917491749174917,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 0.973421926910299,
+ "ci95_low": 0.9503311258278145,
+ "ci95_high": 0.9933665008291874,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.8803986710963455,
+ "ci95_low": 0.8336079077429983,
+ "ci95_high": 0.9221854304635762,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.5523997142675148,
+ "ci95_low": 0.5047108578681738,
+ "ci95_high": 0.604321104429813,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.7198560498691636,
+ "ci95_low": 0.6726764373288237,
+ "ci95_high": 0.7635102117056065,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.8565119881133869,
+ "ci95_low": 0.8138708483918958,
+ "ci95_high": 0.9003054854398281,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.8536632193539122,
+ "ci95_low": 0.807590680863603,
+ "ci95_high": 0.8966797699698758,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.8803986710963455,
+ "ci95_low": 0.8380165289256198,
+ "ci95_high": 0.9246231155778895,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.08803986710963455,
+ "ci95_low": 0.05073649754500818,
+ "ci95_high": 0.1281198003327787,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.7095892507500218,
+ "ci95_low": 0.6684033852398518,
+ "ci95_high": 0.7522010722992085,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.8714868538488677,
+ "ci95_low": 0.8263669255877143,
+ "ci95_high": 0.9136300934408405,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.6361278820683391,
+ "ci95_low": 0.5930111310915782,
+ "ci95_high": 0.6777794671914322,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.91140642303433,
+ "ci95_low": 0.8759689922480621,
+ "ci95_high": 0.9448160535117057,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.08803986710963455,
+ "ci95_low": 0.05132450331125828,
+ "ci95_high": 0.1281198003327787,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ },
+ "weighting": "schema_complexity",
+ "weight_field_priority": [
+ "schema_complexity",
+ "difficulty"
+ ],
+ "difficulty_weights": {
+ "easy": 1.0,
+ "medium": 2.0,
+ "hard": 3.0
+ }
+ }
+ },
+ "error_analysis": {
+ "top_missing_gt_paths": [
+ {
+ "path": "month",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].date",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].day_of_week",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[0].is_meeting",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[0].name",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[0].time",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[1].is_meeting",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[1].name",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[1].time",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[2].is_meeting",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[2].name",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[2].time",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[3].is_meeting",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[3].name",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[3].time",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[4].is_meeting",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[4].name",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[4].time",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[5].is_meeting",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[5].name",
+ "count": 1
+ }
+ ],
+ "top_missing_required_paths": [
+ {
+ "path": "month",
+ "count": 1
+ },
+ {
+ "path": "year",
+ "count": 1
+ },
+ {
+ "path": "schedule",
+ "count": 1
+ },
+ {
+ "path": "schedule[].day_of_week",
+ "count": 1
+ },
+ {
+ "path": "schedule[].date",
+ "count": 1
+ },
+ {
+ "path": "schedule[].events",
+ "count": 1
+ },
+ {
+ "path": "schedule[].events[].time",
+ "count": 1
+ },
+ {
+ "path": "schedule[].events[].name",
+ "count": 1
+ },
+ {
+ "path": "schedule[].events[].is_meeting",
+ "count": 1
+ },
+ {
+ "path": "shared_values_importance",
+ "count": 1
+ },
+ {
+ "path": "formalization_methods",
+ "count": 1
+ },
+ {
+ "path": "manager_key_message",
+ "count": 1
+ },
+ {
+ "path": "figure_4_concept",
+ "count": 1
+ },
+ {
+ "path": "van_vught_illustration_summary",
+ "count": 1
+ },
+ {
+ "path": "other_laser_types",
+ "count": 1
+ },
+ {
+ "path": "other_laser_types[].name",
+ "count": 1
+ },
+ {
+ "path": "conclusions",
+ "count": 1
+ },
+ {
+ "path": "recommended_bibliography",
+ "count": 1
+ },
+ {
+ "path": "recommended_bibliography[].id",
+ "count": 1
+ },
+ {
+ "path": "recommended_bibliography[].authors",
+ "count": 1
+ }
+ ]
+ }
+}
diff --git a/data/evaluation/image/gemma-4-31b-it/eval_records.jsonl b/data/evaluation/image/gemma-4-31b-it/eval_records.jsonl
new file mode 100644
index 0000000..dee2d76
--- /dev/null
+++ b/data/evaluation/image/gemma-4-31b-it/eval_records.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9fa277b07af724cd6140c0497642fa7b2a7572ba9122f7af42e3a86ead978d88
+size 145726
diff --git a/data/evaluation/image/gemma-4-31b-it/eval_summary.json b/data/evaluation/image/gemma-4-31b-it/eval_summary.json
new file mode 100644
index 0000000..49dbefe
--- /dev/null
+++ b/data/evaluation/image/gemma-4-31b-it/eval_summary.json
@@ -0,0 +1,382 @@
+{
+ "response_file": "data/images_responses/response_gemma-4-31b-it_image.jsonl",
+ "num_records": 209,
+ "model_ids": [
+ "gemma-4-31b-it"
+ ],
+ "data_quality": {
+ "json_parse_fail_count": 1,
+ "json_non_structured_root_count": 1,
+ "invalid_schema_input_count": 0,
+ "unknown_difficulty_count": 0,
+ "malformed_jsonl_line_count": 0
+ },
+ "summary": {
+ "overall": {
+ "n": 209,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 0.9952153110047847,
+ "ci95_low": 0.9856459330143541,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 0.9952153110047847,
+ "ci95_low": 0.9808612440191388,
+ "ci95_high": 1.0,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.9952153110047847,
+ "ci95_low": 0.9856459330143541,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.6685544502149684,
+ "ci95_low": 0.6314410073005832,
+ "ci95_high": 0.7073419394508985,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.8353664185587351,
+ "ci95_low": 0.8036112258916691,
+ "ci95_high": 0.8624620066331863,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.9631271415034764,
+ "ci95_low": 0.9490825518309134,
+ "ci95_high": 0.9747970993618379,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.956706411683843,
+ "ci95_low": 0.9425678005994358,
+ "ci95_high": 0.9670522010654257,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.9952153110047847,
+ "ci95_low": 0.9856459330143541,
+ "ci95_high": 1.0,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.11483253588516747,
+ "ci95_low": 0.07177033492822966,
+ "ci95_high": 0.15789473684210525,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.82234933675906,
+ "ci95_low": 0.7977515451565406,
+ "ci95_high": 0.8456470796644405,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.9823790112311375,
+ "ci95_low": 0.9702061909174606,
+ "ci95_high": 0.9892651661656205,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.7519604343868517,
+ "ci95_low": 0.71975532541575,
+ "ci95_high": 0.7825804750315775,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.9952153110047847,
+ "ci95_low": 0.9856459330143541,
+ "ci95_high": 1.0,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.11483253588516747,
+ "ci95_low": 0.07655502392344497,
+ "ci95_high": 0.15789473684210525,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ }
+ },
+ "overall_weighted": {
+ "n": 209,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 0.9950166112956811,
+ "ci95_low": 0.9849246231155779,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 0.9950166112956811,
+ "ci95_low": 0.984822934232715,
+ "ci95_high": 1.0,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.9950166112956811,
+ "ci95_low": 0.9848993288590604,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.6722096352960484,
+ "ci95_low": 0.630751309583317,
+ "ci95_high": 0.7105934485551747,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.8374308725360823,
+ "ci95_low": 0.8081834927887596,
+ "ci95_high": 0.8652010396320697,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.9629166968069991,
+ "ci95_low": 0.9483912879655313,
+ "ci95_high": 0.9745823845801737,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.9565283256230704,
+ "ci95_low": 0.941144724626549,
+ "ci95_high": 0.9685717228689568,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.9950166112956811,
+ "ci95_low": 0.9849498327759197,
+ "ci95_high": 1.0,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.11295681063122924,
+ "ci95_low": 0.07345575959933222,
+ "ci95_high": 0.15763546798029557,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.8241857348797099,
+ "ci95_low": 0.8008926450261039,
+ "ci95_high": 0.8472484794649664,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.9821871827381443,
+ "ci95_low": 0.9696356374750892,
+ "ci95_high": 0.9890312371079665,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.7548202539160653,
+ "ci95_low": 0.7233753898807461,
+ "ci95_high": 0.7834019503448648,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.9950166112956811,
+ "ci95_low": 0.9849246231155779,
+ "ci95_high": 1.0,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.11295681063122924,
+ "ci95_low": 0.06988352745424292,
+ "ci95_high": 0.16193656093489148,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ },
+ "weighting": "schema_complexity",
+ "weight_field_priority": [
+ "schema_complexity",
+ "difficulty"
+ ],
+ "difficulty_weights": {
+ "easy": 1.0,
+ "medium": 2.0,
+ "hard": 3.0
+ }
+ }
+ },
+ "error_analysis": {
+ "top_missing_gt_paths": [
+ {
+ "path": "benefits_of_previews[4]",
+ "count": 1
+ },
+ {
+ "path": "benefits_of_previews[5]",
+ "count": 1
+ },
+ {
+ "path": "drawbacks_of_previews[3]",
+ "count": 1
+ },
+ {
+ "path": "specific_challenges[2].challenge",
+ "count": 1
+ },
+ {
+ "path": "specific_challenges[2].supporting_quotes[0]",
+ "count": 1
+ },
+ {
+ "path": "analysis_sections[0].methods[4]",
+ "count": 1
+ },
+ {
+ "path": "analysis_sections[0].tools_software[2]",
+ "count": 1
+ },
+ {
+ "path": "analysis_sections[1].methods[1]",
+ "count": 1
+ },
+ {
+ "path": "analysis_sections[1].tools_software[0]",
+ "count": 1
+ },
+ {
+ "path": "analysis_sections[1].tools_software[1]",
+ "count": 1
+ },
+ {
+ "path": "analysis_sections[2].methods[1]",
+ "count": 1
+ },
+ {
+ "path": "soft_tissue_calcification_results[0].treatments[3].calcium_umol",
+ "count": 1
+ },
+ {
+ "path": "soft_tissue_calcification_results[0].treatments[3].treatment_group",
+ "count": 1
+ },
+ {
+ "path": "soft_tissue_calcification_results[1].treatments[3].calcium_umol",
+ "count": 1
+ },
+ {
+ "path": "soft_tissue_calcification_results[1].treatments[3].treatment_group",
+ "count": 1
+ },
+ {
+ "path": "soft_tissue_calcification_results[2].treatments[3].calcium_umol",
+ "count": 1
+ },
+ {
+ "path": "soft_tissue_calcification_results[2].treatments[3].treatment_group",
+ "count": 1
+ },
+ {
+ "path": "soft_tissue_calcification_results[3].treatments[3].calcium_umol",
+ "count": 1
+ },
+ {
+ "path": "soft_tissue_calcification_results[3].treatments[3].treatment_group",
+ "count": 1
+ },
+ {
+ "path": "influential_approaches[1].derived_elements[2]",
+ "count": 1
+ }
+ ],
+ "top_missing_required_paths": [
+ {
+ "path": "figure_8_details",
+ "count": 1
+ },
+ {
+ "path": "figure_8_details.main_topic",
+ "count": 1
+ },
+ {
+ "path": "figure_8_details.schemes",
+ "count": 1
+ },
+ {
+ "path": "figure_8_details.alpha_values",
+ "count": 1
+ },
+ {
+ "path": "figure_9_details",
+ "count": 1
+ },
+ {
+ "path": "figure_9_details.left_panel_content",
+ "count": 1
+ },
+ {
+ "path": "figure_9_details.right_panel_content",
+ "count": 1
+ },
+ {
+ "path": "context_section_title",
+ "count": 1
+ }
+ ]
+ }
+}
diff --git a/data/evaluation/image/gpt-oss/eval_records.jsonl b/data/evaluation/image/gpt-oss/eval_records.jsonl
new file mode 100644
index 0000000..025e385
--- /dev/null
+++ b/data/evaluation/image/gpt-oss/eval_records.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a267420fa4b8dec006351e710a88736664fc6ab6699b020e4ec3d6cbd47ba560
+size 253591
diff --git a/data/evaluation/image/gpt-oss/eval_summary.json b/data/evaluation/image/gpt-oss/eval_summary.json
new file mode 100644
index 0000000..c0c45e8
--- /dev/null
+++ b/data/evaluation/image/gpt-oss/eval_summary.json
@@ -0,0 +1,430 @@
+{
+ "response_file": "data/images_responses/response_gpt-oss_image.jsonl",
+ "num_records": 209,
+ "model_ids": [
+ "openai/gpt-oss-20b"
+ ],
+ "data_quality": {
+ "json_parse_fail_count": 51,
+ "json_non_structured_root_count": 51,
+ "invalid_schema_input_count": 0,
+ "unknown_difficulty_count": 0,
+ "malformed_jsonl_line_count": 0
+ },
+ "summary": {
+ "overall": {
+ "n": 209,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 0.7559808612440191,
+ "ci95_low": 0.6985645933014354,
+ "ci95_high": 0.8133971291866029,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 0.7559808612440191,
+ "ci95_low": 0.6985645933014354,
+ "ci95_high": 0.8181818181818182,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.7464114832535885,
+ "ci95_low": 0.6889952153110048,
+ "ci95_high": 0.8086124401913876,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.42487584239856024,
+ "ci95_low": 0.3738096113140317,
+ "ci95_high": 0.47570783266855343,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.5633972440125468,
+ "ci95_low": 0.5099773593946927,
+ "ci95_high": 0.6172130720385245,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.7222698039381288,
+ "ci95_low": 0.6586970724217468,
+ "ci95_high": 0.7781679024044743,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.7096363518081903,
+ "ci95_low": 0.650574017752842,
+ "ci95_high": 0.7657533885190596,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.7464114832535885,
+ "ci95_low": 0.6889952153110048,
+ "ci95_high": 0.8086124401913876,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.07655502392344497,
+ "ci95_low": 0.0430622009569378,
+ "ci95_high": 0.11483253588516747,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.5701809634497453,
+ "ci95_low": 0.523492155697328,
+ "ci95_high": 0.6202226415565092,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.7341531061051224,
+ "ci95_low": 0.6734946000961883,
+ "ci95_high": 0.7898341248628169,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.4941365432055535,
+ "ci95_low": 0.4455305749948997,
+ "ci95_high": 0.543975655208376,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.7496012759170654,
+ "ci95_low": 0.6874003189792663,
+ "ci95_high": 0.8070175438596491,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.07655502392344497,
+ "ci95_low": 0.0430622009569378,
+ "ci95_high": 0.11483253588516747,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ }
+ },
+ "overall_weighted": {
+ "n": 209,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 0.7558139534883721,
+ "ci95_low": 0.697324414715719,
+ "ci95_high": 0.8109452736318408,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 0.7558139534883721,
+ "ci95_low": 0.6933333333333334,
+ "ci95_high": 0.8138385502471169,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.7458471760797342,
+ "ci95_low": 0.6857142857142857,
+ "ci95_high": 0.8023255813953488,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.42748820999246856,
+ "ci95_low": 0.3752869082794269,
+ "ci95_high": 0.4812409276845706,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.5654817874796494,
+ "ci95_low": 0.5096344770195435,
+ "ci95_high": 0.6172179160085915,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.7214024683560226,
+ "ci95_low": 0.6619965278452405,
+ "ci95_high": 0.7746893893220655,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.708945306947063,
+ "ci95_low": 0.652607878469208,
+ "ci95_high": 0.76457954371324,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.7458471760797342,
+ "ci95_low": 0.6850921273031826,
+ "ci95_high": 0.8076285240464345,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.07308970099667775,
+ "ci95_low": 0.039538714991762765,
+ "ci95_high": 0.11036789297658862,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.5714574886093802,
+ "ci95_low": 0.51380959610144,
+ "ci95_high": 0.621443835509521,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.7335465530355105,
+ "ci95_low": 0.6766587533622818,
+ "ci95_high": 0.7896368483906779,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.49648499873605895,
+ "ci95_low": 0.4468484568343588,
+ "ci95_high": 0.5455651939031697,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.7491694352159468,
+ "ci95_low": 0.6905158069883528,
+ "ci95_high": 0.8076923076923077,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.07308970099667775,
+ "ci95_low": 0.03940886699507389,
+ "ci95_high": 0.10945273631840796,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ },
+ "weighting": "schema_complexity",
+ "weight_field_priority": [
+ "schema_complexity",
+ "difficulty"
+ ],
+ "difficulty_weights": {
+ "easy": 1.0,
+ "medium": 2.0,
+ "hard": 3.0
+ }
+ }
+ },
+ "error_analysis": {
+ "top_missing_gt_paths": [
+ {
+ "path": "letter_date",
+ "count": 5
+ },
+ {
+ "path": "recipient_name",
+ "count": 3
+ },
+ {
+ "path": "authors[0].affiliation",
+ "count": 2
+ },
+ {
+ "path": "authors[0].name",
+ "count": 2
+ },
+ {
+ "path": "authors[1].affiliation",
+ "count": 2
+ },
+ {
+ "path": "authors[1].name",
+ "count": 2
+ },
+ {
+ "path": "authors[2].affiliation",
+ "count": 2
+ },
+ {
+ "path": "authors[2].name",
+ "count": 2
+ },
+ {
+ "path": "table_title",
+ "count": 2
+ },
+ {
+ "path": "event_name",
+ "count": 2
+ },
+ {
+ "path": "recipient_address",
+ "count": 2
+ },
+ {
+ "path": "sender_name",
+ "count": 2
+ },
+ {
+ "path": "recipient_details.address",
+ "count": 2
+ },
+ {
+ "path": "recipient_details.name",
+ "count": 2
+ },
+ {
+ "path": "problem_addressed",
+ "count": 2
+ },
+ {
+ "path": "commonalities_across_firms.implications[1]",
+ "count": 1
+ },
+ {
+ "path": "month",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].date",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].day_of_week",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[0].is_meeting",
+ "count": 1
+ }
+ ],
+ "top_missing_required_paths": [
+ {
+ "path": "letter_date",
+ "count": 5
+ },
+ {
+ "path": "recipient_name",
+ "count": 3
+ },
+ {
+ "path": "conclusions",
+ "count": 2
+ },
+ {
+ "path": "authors",
+ "count": 2
+ },
+ {
+ "path": "authors[].name",
+ "count": 2
+ },
+ {
+ "path": "authors[].affiliation",
+ "count": 2
+ },
+ {
+ "path": "table_title",
+ "count": 2
+ },
+ {
+ "path": "event_name",
+ "count": 2
+ },
+ {
+ "path": "sender_name",
+ "count": 2
+ },
+ {
+ "path": "recipient_address",
+ "count": 2
+ },
+ {
+ "path": "sender_details",
+ "count": 2
+ },
+ {
+ "path": "recipient_details",
+ "count": 2
+ },
+ {
+ "path": "recipient_details.name",
+ "count": 2
+ },
+ {
+ "path": "recipient_details.address",
+ "count": 2
+ },
+ {
+ "path": "problem_addressed",
+ "count": 2
+ },
+ {
+ "path": "month",
+ "count": 1
+ },
+ {
+ "path": "year",
+ "count": 1
+ },
+ {
+ "path": "schedule",
+ "count": 1
+ },
+ {
+ "path": "schedule[].day_of_week",
+ "count": 1
+ },
+ {
+ "path": "schedule[].date",
+ "count": 1
+ }
+ ]
+ }
+}
diff --git a/data/evaluation/image/ibm-granite-4.0-h-small/eval_records.jsonl b/data/evaluation/image/ibm-granite-4.0-h-small/eval_records.jsonl
new file mode 100644
index 0000000..6ba845c
--- /dev/null
+++ b/data/evaluation/image/ibm-granite-4.0-h-small/eval_records.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:77d39332d29dcc653809246e0bbab37e12a3c3af88bd01a8e415c6d18d2f6d53
+size 181769
diff --git a/data/evaluation/image/ibm-granite-4.0-h-small/eval_summary.json b/data/evaluation/image/ibm-granite-4.0-h-small/eval_summary.json
new file mode 100644
index 0000000..ab190f6
--- /dev/null
+++ b/data/evaluation/image/ibm-granite-4.0-h-small/eval_summary.json
@@ -0,0 +1,430 @@
+{
+ "response_file": "data/images_responses/response_ibm-granite-4.0-h-small_image.jsonl",
+ "num_records": 209,
+ "model_ids": [
+ "ibm-granite/granite-4.0-h-small"
+ ],
+ "data_quality": {
+ "json_parse_fail_count": 3,
+ "json_non_structured_root_count": 3,
+ "invalid_schema_input_count": 0,
+ "unknown_difficulty_count": 0,
+ "malformed_jsonl_line_count": 0
+ },
+ "summary": {
+ "overall": {
+ "n": 209,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 0.9856459330143541,
+ "ci95_low": 0.9665071770334929,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 0.9856459330143541,
+ "ci95_low": 0.9665071770334929,
+ "ci95_high": 1.0,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.9425837320574163,
+ "ci95_low": 0.9090909090909091,
+ "ci95_high": 0.9712918660287081,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.512589690863236,
+ "ci95_low": 0.468771159517032,
+ "ci95_high": 0.5573137458426992,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.708170173933668,
+ "ci95_low": 0.6681755952554318,
+ "ci95_high": 0.7486102665943825,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.8857115669217293,
+ "ci95_low": 0.8501360434241946,
+ "ci95_high": 0.9179335522161671,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.8970618834752123,
+ "ci95_low": 0.8652315060255643,
+ "ci95_high": 0.9272628230557944,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.9425837320574163,
+ "ci95_low": 0.9090909090909091,
+ "ci95_high": 0.9712918660287081,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.04784688995215311,
+ "ci95_low": 0.019138755980861243,
+ "ci95_high": 0.08133971291866028,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.7021571439062111,
+ "ci95_low": 0.6664492661651513,
+ "ci95_high": 0.7374828394319375,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.9274097825300149,
+ "ci95_low": 0.8931058485683837,
+ "ci95_high": 0.9551624729156082,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.610379932398452,
+ "ci95_low": 0.5657522080806223,
+ "ci95_high": 0.6464448651493385,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.9569377990430622,
+ "ci95_low": 0.9314194577352471,
+ "ci95_high": 0.9792663476874003,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.04784688995215311,
+ "ci95_low": 0.019138755980861243,
+ "ci95_high": 0.08133971291866028,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ }
+ },
+ "overall_weighted": {
+ "n": 209,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 0.9850498338870431,
+ "ci95_low": 0.9696969696969697,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 0.9850498338870431,
+ "ci95_low": 0.9654036243822076,
+ "ci95_high": 1.0,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.9418604651162791,
+ "ci95_low": 0.9065573770491804,
+ "ci95_high": 0.971900826446281,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.5167955061606273,
+ "ci95_low": 0.4691790053659684,
+ "ci95_high": 0.5636031874928136,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.7110341111960856,
+ "ci95_low": 0.6712822794140823,
+ "ci95_high": 0.7516013597281442,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.8855617919384893,
+ "ci95_low": 0.8514996818093364,
+ "ci95_high": 0.9161629647148207,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.8965030068194604,
+ "ci95_low": 0.8611285790885485,
+ "ci95_high": 0.9286469593879743,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.9418604651162791,
+ "ci95_low": 0.9053156146179402,
+ "ci95_high": 0.9701492537313433,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.044850498338870434,
+ "ci95_low": 0.021311475409836064,
+ "ci95_high": 0.07308970099667775,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.7044638030984007,
+ "ci95_low": 0.6697431004622167,
+ "ci95_high": 0.7388578862687755,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.9267413123506728,
+ "ci95_low": 0.8942180640883515,
+ "ci95_high": 0.9578899798677016,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.6139148086783565,
+ "ci95_low": 0.5744924396002377,
+ "ci95_high": 0.6554984232821865,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.9562569213732004,
+ "ci95_low": 0.9266106442577031,
+ "ci95_high": 0.9812981298129813,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.044850498338870434,
+ "ci95_low": 0.01990049751243781,
+ "ci95_high": 0.075,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ },
+ "weighting": "schema_complexity",
+ "weight_field_priority": [
+ "schema_complexity",
+ "difficulty"
+ ],
+ "difficulty_weights": {
+ "easy": 1.0,
+ "medium": 2.0,
+ "hard": 3.0
+ }
+ }
+ },
+ "error_analysis": {
+ "top_missing_gt_paths": [
+ {
+ "path": "market_dynamics_differences[0].characteristics[3]",
+ "count": 1
+ },
+ {
+ "path": "month",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].date",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].day_of_week",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[0].is_meeting",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[0].name",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[0].time",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[1].is_meeting",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[1].name",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[1].time",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[2].is_meeting",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[2].name",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[2].time",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[3].is_meeting",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[3].name",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[3].time",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[4].is_meeting",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[4].name",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[4].time",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[5].is_meeting",
+ "count": 1
+ }
+ ],
+ "top_missing_required_paths": [
+ {
+ "path": "month",
+ "count": 1
+ },
+ {
+ "path": "year",
+ "count": 1
+ },
+ {
+ "path": "schedule",
+ "count": 1
+ },
+ {
+ "path": "schedule[].day_of_week",
+ "count": 1
+ },
+ {
+ "path": "schedule[].date",
+ "count": 1
+ },
+ {
+ "path": "schedule[].events",
+ "count": 1
+ },
+ {
+ "path": "schedule[].events[].time",
+ "count": 1
+ },
+ {
+ "path": "schedule[].events[].name",
+ "count": 1
+ },
+ {
+ "path": "schedule[].events[].is_meeting",
+ "count": 1
+ },
+ {
+ "path": "other_laser_types",
+ "count": 1
+ },
+ {
+ "path": "other_laser_types[].name",
+ "count": 1
+ },
+ {
+ "path": "conclusions",
+ "count": 1
+ },
+ {
+ "path": "recommended_bibliography",
+ "count": 1
+ },
+ {
+ "path": "recommended_bibliography[].id",
+ "count": 1
+ },
+ {
+ "path": "recommended_bibliography[].authors",
+ "count": 1
+ },
+ {
+ "path": "recommended_bibliography[].title",
+ "count": 1
+ },
+ {
+ "path": "recommended_bibliography[].journal_info",
+ "count": 1
+ },
+ {
+ "path": "map_f_properties",
+ "count": 1
+ },
+ {
+ "path": "map_f_properties.is_compact",
+ "count": 1
+ },
+ {
+ "path": "map_f_properties.compactness_proof_method",
+ "count": 1
+ }
+ ]
+ }
+}
diff --git a/data/evaluation/image/inference-net-Schematron-8B/eval_records.jsonl b/data/evaluation/image/inference-net-Schematron-8B/eval_records.jsonl
new file mode 100644
index 0000000..5aeead6
--- /dev/null
+++ b/data/evaluation/image/inference-net-Schematron-8B/eval_records.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d38859cdcfeecd4d4b42a327debec82d40a69eb4b9612a2cfaf74ce316a7bb1c
+size 179696
diff --git a/data/evaluation/image/inference-net-Schematron-8B/eval_summary.json b/data/evaluation/image/inference-net-Schematron-8B/eval_summary.json
new file mode 100644
index 0000000..3964e59
--- /dev/null
+++ b/data/evaluation/image/inference-net-Schematron-8B/eval_summary.json
@@ -0,0 +1,430 @@
+{
+ "response_file": "data/images_responses/response_inference-net-Schematron-8B_image.jsonl",
+ "num_records": 209,
+ "model_ids": [
+ "inference-net/Schematron-8B"
+ ],
+ "data_quality": {
+ "json_parse_fail_count": 7,
+ "json_non_structured_root_count": 7,
+ "invalid_schema_input_count": 0,
+ "unknown_difficulty_count": 0,
+ "malformed_jsonl_line_count": 0
+ },
+ "summary": {
+ "overall": {
+ "n": 209,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 0.9665071770334929,
+ "ci95_low": 0.937799043062201,
+ "ci95_high": 0.9904306220095693,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 0.9665071770334929,
+ "ci95_low": 0.9425837320574163,
+ "ci95_high": 0.9904306220095693,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.9186602870813397,
+ "ci95_low": 0.8803827751196173,
+ "ci95_high": 0.9569377990430622,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.5381700703950235,
+ "ci95_low": 0.4926123738137938,
+ "ci95_high": 0.585046386677166,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.7280431793683613,
+ "ci95_low": 0.6830651088857935,
+ "ci95_high": 0.769160858562266,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.8904379376907052,
+ "ci95_low": 0.8504982786710716,
+ "ci95_high": 0.927778278294978,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.879845053147794,
+ "ci95_low": 0.8410550387764462,
+ "ci95_high": 0.9157830241058116,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.9186602870813397,
+ "ci95_low": 0.8803827751196173,
+ "ci95_high": 0.9521531100478469,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.08133971291866028,
+ "ci95_low": 0.0430622009569378,
+ "ci95_high": 0.11961722488038277,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.7188837291513632,
+ "ci95_low": 0.6858321274408613,
+ "ci95_high": 0.7563329418434813,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.9057218757701578,
+ "ci95_low": 0.8654192947268704,
+ "ci95_high": 0.9403498834858558,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.6331066248816923,
+ "ci95_low": 0.5934864950930939,
+ "ci95_high": 0.6746600322788856,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.9346092503987241,
+ "ci95_low": 0.901116427432217,
+ "ci95_high": 0.963317384370016,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.08133971291866028,
+ "ci95_low": 0.04784688995215311,
+ "ci95_high": 0.11961722488038277,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ }
+ },
+ "overall_weighted": {
+ "n": 209,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 0.9667774086378738,
+ "ci95_low": 0.9404958677685951,
+ "ci95_high": 0.988313856427379,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 0.9667774086378738,
+ "ci95_low": 0.9391891891891891,
+ "ci95_high": 0.9899665551839465,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.9169435215946844,
+ "ci95_low": 0.8756218905472637,
+ "ci95_high": 0.9545454545454546,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.5409154466475828,
+ "ci95_low": 0.49389686099749336,
+ "ci95_high": 0.586780924886349,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.7287902581029784,
+ "ci95_low": 0.6835867680092285,
+ "ci95_high": 0.7711626949758776,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.8881880179014486,
+ "ci95_low": 0.8503833492099402,
+ "ci95_high": 0.9235454468741847,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.878237152786448,
+ "ci95_low": 0.8399453758738881,
+ "ci95_high": 0.9153102456661908,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.9169435215946844,
+ "ci95_low": 0.8772802653399668,
+ "ci95_high": 0.9522240527182867,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.07807308970099668,
+ "ci95_low": 0.046052631578947366,
+ "ci95_high": 0.11589403973509933,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.7192979075506699,
+ "ci95_low": 0.6821963774839597,
+ "ci95_high": 0.7566282658957475,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.9040413986586056,
+ "ci95_low": 0.8639758045458851,
+ "ci95_high": 0.9403181369898477,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.6348528523752807,
+ "ci95_low": 0.5910671740263119,
+ "ci95_high": 0.6736595975816225,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.9335548172757475,
+ "ci95_low": 0.9003322259136213,
+ "ci95_high": 0.9646464646464646,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.07807308970099668,
+ "ci95_low": 0.04340567612687813,
+ "ci95_high": 0.11627906976744186,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ },
+ "weighting": "schema_complexity",
+ "weight_field_priority": [
+ "schema_complexity",
+ "difficulty"
+ ],
+ "difficulty_weights": {
+ "easy": 1.0,
+ "medium": 2.0,
+ "hard": 3.0
+ }
+ }
+ },
+ "error_analysis": {
+ "top_missing_gt_paths": [
+ {
+ "path": "month",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].date",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].day_of_week",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[0].is_meeting",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[0].name",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[0].time",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[1].is_meeting",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[1].name",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[1].time",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[2].is_meeting",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[2].name",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[2].time",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[3].is_meeting",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[3].name",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[3].time",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[4].is_meeting",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[4].name",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[4].time",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[5].is_meeting",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[5].name",
+ "count": 1
+ }
+ ],
+ "top_missing_required_paths": [
+ {
+ "path": "month",
+ "count": 1
+ },
+ {
+ "path": "year",
+ "count": 1
+ },
+ {
+ "path": "schedule",
+ "count": 1
+ },
+ {
+ "path": "schedule[].day_of_week",
+ "count": 1
+ },
+ {
+ "path": "schedule[].date",
+ "count": 1
+ },
+ {
+ "path": "schedule[].events",
+ "count": 1
+ },
+ {
+ "path": "schedule[].events[].time",
+ "count": 1
+ },
+ {
+ "path": "schedule[].events[].name",
+ "count": 1
+ },
+ {
+ "path": "schedule[].events[].is_meeting",
+ "count": 1
+ },
+ {
+ "path": "other_laser_types",
+ "count": 1
+ },
+ {
+ "path": "other_laser_types[].name",
+ "count": 1
+ },
+ {
+ "path": "conclusions",
+ "count": 1
+ },
+ {
+ "path": "recommended_bibliography",
+ "count": 1
+ },
+ {
+ "path": "recommended_bibliography[].id",
+ "count": 1
+ },
+ {
+ "path": "recommended_bibliography[].authors",
+ "count": 1
+ },
+ {
+ "path": "recommended_bibliography[].title",
+ "count": 1
+ },
+ {
+ "path": "recommended_bibliography[].journal_info",
+ "count": 1
+ },
+ {
+ "path": "experimental_method",
+ "count": 1
+ },
+ {
+ "path": "undamped_system_modes",
+ "count": 1
+ },
+ {
+ "path": "undamped_system_modes[].frequency_hz",
+ "count": 1
+ }
+ ]
+ }
+}
diff --git a/data/evaluation/image/interfaze-beta/eval_records.jsonl b/data/evaluation/image/interfaze-beta/eval_records.jsonl
new file mode 100644
index 0000000..e6460b1
--- /dev/null
+++ b/data/evaluation/image/interfaze-beta/eval_records.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3d297db0ade9574efd9242ee7863ff2fb98a1ed8a62d259093234b24c294063d
+size 143490
diff --git a/data/evaluation/image/interfaze-beta/eval_summary.json b/data/evaluation/image/interfaze-beta/eval_summary.json
new file mode 100644
index 0000000..f712131
--- /dev/null
+++ b/data/evaluation/image/interfaze-beta/eval_summary.json
@@ -0,0 +1,410 @@
+{
+ "response_file": "data/images_responses/response_interfaze-beta_image.jsonl",
+ "num_records": 209,
+ "model_ids": [
+ "interfaze-beta"
+ ],
+ "data_quality": {
+ "json_parse_fail_count": 1,
+ "json_non_structured_root_count": 1,
+ "invalid_schema_input_count": 0,
+ "unknown_difficulty_count": 0,
+ "malformed_jsonl_line_count": 0
+ },
+ "summary": {
+ "overall": {
+ "n": 209,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 0.9952153110047847,
+ "ci95_low": 0.9856459330143541,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 0.9952153110047847,
+ "ci95_low": 0.9856459330143541,
+ "ci95_high": 1.0,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.8564593301435407,
+ "ci95_low": 0.8038277511961722,
+ "ci95_high": 0.9043062200956937,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.5562403873141119,
+ "ci95_low": 0.5082435620996949,
+ "ci95_high": 0.6062044129737745,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.7220265986732678,
+ "ci95_low": 0.6720435594360519,
+ "ci95_high": 0.7693604768000328,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.8415768796521425,
+ "ci95_low": 0.7921302615526636,
+ "ci95_high": 0.8882774337590906,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.8307800970280014,
+ "ci95_low": 0.7798704257107005,
+ "ci95_high": 0.8727449341903178,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.8564593301435407,
+ "ci95_low": 0.8086124401913876,
+ "ci95_high": 0.9043062200956937,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.11483253588516747,
+ "ci95_low": 0.07177033492822966,
+ "ci95_high": 0.16267942583732056,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.7066146218798407,
+ "ci95_low": 0.6649899049093924,
+ "ci95_high": 0.7486455353937841,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.8478995857716942,
+ "ci95_low": 0.8000954107960992,
+ "ci95_high": 0.8912936941457287,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.6391334929936899,
+ "ci95_low": 0.5926307984824499,
+ "ci95_high": 0.684001902374487,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.9027113237639552,
+ "ci95_low": 0.8676236044657097,
+ "ci95_high": 0.9346092503987241,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.11483253588516747,
+ "ci95_low": 0.07177033492822966,
+ "ci95_high": 0.15789473684210525,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ }
+ },
+ "overall_weighted": {
+ "n": 209,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 0.9950166112956811,
+ "ci95_low": 0.9847715736040609,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 0.9950166112956811,
+ "ci95_low": 0.9849498327759197,
+ "ci95_high": 1.0,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.8521594684385382,
+ "ci95_low": 0.8019966722129783,
+ "ci95_high": 0.8994974874371859,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.5587225814752367,
+ "ci95_low": 0.5084633809422346,
+ "ci95_high": 0.6078186662341794,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.7209196871933863,
+ "ci95_low": 0.6697617057998083,
+ "ci95_high": 0.768747725006925,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.837082067103977,
+ "ci95_low": 0.7858431630808539,
+ "ci95_high": 0.8831020131536742,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.8268572377929342,
+ "ci95_low": 0.7725013671213219,
+ "ci95_high": 0.8725383801315261,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.8521594684385382,
+ "ci95_low": 0.8053244592346089,
+ "ci95_high": 0.9011725293132329,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.11461794019933555,
+ "ci95_low": 0.07236842105263158,
+ "ci95_high": 0.16026711185308848,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.7055747785908667,
+ "ci95_low": 0.658278168400453,
+ "ci95_high": 0.7491840552340282,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.8437253915566703,
+ "ci95_low": 0.7931132457496363,
+ "ci95_high": 0.891327096321407,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.6398211343343115,
+ "ci95_low": 0.5937525802780164,
+ "ci95_high": 0.6846232411790564,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.8997785160575857,
+ "ci95_low": 0.8655555555555556,
+ "ci95_high": 0.9344444444444444,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.11461794019933555,
+ "ci95_low": 0.07370184254606366,
+ "ci95_high": 0.15851602023608768,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ },
+ "weighting": "schema_complexity",
+ "weight_field_priority": [
+ "schema_complexity",
+ "difficulty"
+ ],
+ "difficulty_weights": {
+ "easy": 1.0,
+ "medium": 2.0,
+ "hard": 3.0
+ }
+ }
+ },
+ "error_analysis": {
+ "top_missing_gt_paths": [
+ {
+ "path": "key_advantages[10]",
+ "count": 1
+ },
+ {
+ "path": "key_advantages[11]",
+ "count": 1
+ },
+ {
+ "path": "key_advantages[8]",
+ "count": 1
+ },
+ {
+ "path": "key_advantages[9]",
+ "count": 1
+ },
+ {
+ "path": "spring_mechanisms[2].benefit",
+ "count": 1
+ },
+ {
+ "path": "spring_mechanisms[2].configuration",
+ "count": 1
+ },
+ {
+ "path": "spring_mechanisms[2].type",
+ "count": 1
+ },
+ {
+ "path": "spring_mechanisms[3].benefit",
+ "count": 1
+ },
+ {
+ "path": "spring_mechanisms[3].configuration",
+ "count": 1
+ },
+ {
+ "path": "spring_mechanisms[3].type",
+ "count": 1
+ },
+ {
+ "path": "spring_mechanisms[4].benefit",
+ "count": 1
+ },
+ {
+ "path": "spring_mechanisms[4].type",
+ "count": 1
+ },
+ {
+ "path": "total_sound_description.weakened_by_factors[5]",
+ "count": 1
+ },
+ {
+ "path": "postdoc_details.development_areas[1]",
+ "count": 1
+ },
+ {
+ "path": "proposed_test_characteristics[5]",
+ "count": 1
+ },
+ {
+ "path": "ibn_khaldun_ideal_sufi_practice.key_characteristics[4]",
+ "count": 1
+ },
+ {
+ "path": "inner_model_assessment.r_squared_interpretation[4].context",
+ "count": 1
+ },
+ {
+ "path": "inner_model_assessment.r_squared_interpretation[4].description",
+ "count": 1
+ },
+ {
+ "path": "inner_model_assessment.r_squared_interpretation[4].r_squared_value",
+ "count": 1
+ },
+ {
+ "path": "overall_hydrogen_absorption_mechanism",
+ "count": 1
+ }
+ ],
+ "top_missing_required_paths": [
+ {
+ "path": "overall_hydrogen_absorption_mechanism",
+ "count": 1
+ },
+ {
+ "path": "pd_cluster_role",
+ "count": 1
+ },
+ {
+ "path": "pd_cluster_role.catalytic_activity_at_low_coverage",
+ "count": 1
+ },
+ {
+ "path": "pd_cluster_role.catalytic_properties_of_small_clusters",
+ "count": 1
+ },
+ {
+ "path": "pd_cluster_role.function_in_hydrogen_dissociation",
+ "count": 1
+ },
+ {
+ "path": "yttrium_oxide_role",
+ "count": 1
+ },
+ {
+ "path": "yttrium_oxide_role.intrinsic_reactivity_with_hydrogen",
+ "count": 1
+ },
+ {
+ "path": "yttrium_oxide_role.catalytic_activity_state",
+ "count": 1
+ },
+ {
+ "path": "yttrium_oxide_role.interaction_effect_on_mobility",
+ "count": 1
+ },
+ {
+ "path": "smsi_state_and_cluster_size_impact",
+ "count": 1
+ },
+ {
+ "path": "smsi_state_and_cluster_size_impact.description",
+ "count": 1
+ },
+ {
+ "path": "smsi_state_and_cluster_size_impact.cluster_size_effects",
+ "count": 1
+ },
+ {
+ "path": "smsi_state_and_cluster_size_impact.cluster_size_effects[].cluster_size_category",
+ "count": 1
+ },
+ {
+ "path": "smsi_state_and_cluster_size_impact.cluster_size_effects[].encapsulation_status",
+ "count": 1
+ },
+ {
+ "path": "smsi_state_and_cluster_size_impact.cluster_size_effects[].hydrogen_uptake_consequence",
+ "count": 1
+ }
+ ]
+ }
+}
diff --git a/data/evaluation/image/openai-gpt-4.1/eval_records.jsonl b/data/evaluation/image/openai-gpt-4.1/eval_records.jsonl
new file mode 100644
index 0000000..4d9b94c
--- /dev/null
+++ b/data/evaluation/image/openai-gpt-4.1/eval_records.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:00257a1ae22824bbb46a0f35d6b3b73864faca804a324466ee5191ec509b4fd5
+size 142359
diff --git a/data/evaluation/image/openai-gpt-4.1/eval_summary.json b/data/evaluation/image/openai-gpt-4.1/eval_summary.json
new file mode 100644
index 0000000..be77ba2
--- /dev/null
+++ b/data/evaluation/image/openai-gpt-4.1/eval_summary.json
@@ -0,0 +1,348 @@
+{
+ "response_file": "data/images_responses/response_openai-gpt-4.1_image.jsonl",
+ "num_records": 209,
+ "model_ids": [
+ "gpt-4.1"
+ ],
+ "data_quality": {
+ "json_parse_fail_count": 0,
+ "json_non_structured_root_count": 0,
+ "invalid_schema_input_count": 0,
+ "unknown_difficulty_count": 0,
+ "malformed_jsonl_line_count": 0
+ },
+ "summary": {
+ "overall": {
+ "n": 209,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.8708133971291866,
+ "ci95_low": 0.8229665071770335,
+ "ci95_high": 0.9138755980861244,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.5224496595674526,
+ "ci95_low": 0.47413498525285086,
+ "ci95_high": 0.5719389946844815,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.71260010859554,
+ "ci95_low": 0.6647615538494634,
+ "ci95_high": 0.7582434961618306,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.8550167234277541,
+ "ci95_low": 0.8068063191709628,
+ "ci95_high": 0.900220243090464,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.8393371226399404,
+ "ci95_low": 0.7961543150299925,
+ "ci95_high": 0.881274638363946,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.8708133971291866,
+ "ci95_low": 0.8181818181818182,
+ "ci95_high": 0.9138755980861244,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.10526315789473684,
+ "ci95_low": 0.06698564593301436,
+ "ci95_high": 0.14832535885167464,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.6966888305302489,
+ "ci95_low": 0.654660338865258,
+ "ci95_high": 0.7402249202474532,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.8603213056327713,
+ "ci95_low": 0.8136593153770264,
+ "ci95_high": 0.9037072823635998,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.6175248840814963,
+ "ci95_low": 0.570698588021626,
+ "ci95_high": 0.6599316821939192,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.9138755980861244,
+ "ci95_low": 0.8819776714513557,
+ "ci95_high": 0.9425837320574163,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.10526315789473684,
+ "ci95_low": 0.06698564593301436,
+ "ci95_high": 0.14832535885167464,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ }
+ },
+ "overall_weighted": {
+ "n": 209,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.867109634551495,
+ "ci95_low": 0.8196994991652755,
+ "ci95_high": 0.9109243697478991,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.5262878269941652,
+ "ci95_low": 0.4750068261079102,
+ "ci95_high": 0.5784887572866998,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.7136786535508322,
+ "ci95_low": 0.6679457451855519,
+ "ci95_high": 0.7616278335812934,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.8516649628837606,
+ "ci95_low": 0.8017181834355829,
+ "ci95_high": 0.8963096984461303,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.8358789565086054,
+ "ci95_low": 0.7863130153518036,
+ "ci95_high": 0.8829712756334329,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.867109634551495,
+ "ci95_low": 0.8225538971807629,
+ "ci95_high": 0.911620294599018,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.10299003322259136,
+ "ci95_low": 0.06197654941373534,
+ "ci95_high": 0.14427860696517414,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.6972104811429193,
+ "ci95_low": 0.6529795131919842,
+ "ci95_high": 0.7417616136055537,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.8566994085371985,
+ "ci95_low": 0.8091761738424871,
+ "ci95_high": 0.905497671554946,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.6199832402724986,
+ "ci95_low": 0.5753910452818527,
+ "ci95_high": 0.6648314184852895,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.91140642303433,
+ "ci95_low": 0.8800880088008801,
+ "ci95_high": 0.9433333333333334,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.10299003322259136,
+ "ci95_low": 0.06176961602671119,
+ "ci95_high": 0.14403973509933773,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ },
+ "weighting": "schema_complexity",
+ "weight_field_priority": [
+ "schema_complexity",
+ "difficulty"
+ ],
+ "difficulty_weights": {
+ "easy": 1.0,
+ "medium": 2.0,
+ "hard": 3.0
+ }
+ }
+ },
+ "error_analysis": {
+ "top_missing_gt_paths": [
+ {
+ "path": "key_advantages[10]",
+ "count": 1
+ },
+ {
+ "path": "key_advantages[11]",
+ "count": 1
+ },
+ {
+ "path": "key_advantages[8]",
+ "count": 1
+ },
+ {
+ "path": "key_advantages[9]",
+ "count": 1
+ },
+ {
+ "path": "spring_mechanisms[2].benefit",
+ "count": 1
+ },
+ {
+ "path": "spring_mechanisms[2].configuration",
+ "count": 1
+ },
+ {
+ "path": "spring_mechanisms[2].type",
+ "count": 1
+ },
+ {
+ "path": "spring_mechanisms[3].benefit",
+ "count": 1
+ },
+ {
+ "path": "spring_mechanisms[3].configuration",
+ "count": 1
+ },
+ {
+ "path": "spring_mechanisms[3].type",
+ "count": 1
+ },
+ {
+ "path": "spring_mechanisms[4].benefit",
+ "count": 1
+ },
+ {
+ "path": "spring_mechanisms[4].type",
+ "count": 1
+ },
+ {
+ "path": "proposed_test_characteristics[5]",
+ "count": 1
+ },
+ {
+ "path": "inner_model_assessment.r_squared_interpretation[4].context",
+ "count": 1
+ },
+ {
+ "path": "inner_model_assessment.r_squared_interpretation[4].description",
+ "count": 1
+ },
+ {
+ "path": "inner_model_assessment.r_squared_interpretation[4].r_squared_value",
+ "count": 1
+ },
+ {
+ "path": "study_objectives[3]",
+ "count": 1
+ },
+ {
+ "path": "influential_approaches[1].derived_elements[2]",
+ "count": 1
+ },
+ {
+ "path": "prayer_support_entities[2]",
+ "count": 1
+ },
+ {
+ "path": "experimental_techniques.equilibrium_criteria[2]",
+ "count": 1
+ }
+ ]
+ }
+}
diff --git a/data/evaluation/image/openai-gpt-5-4/eval_records.jsonl b/data/evaluation/image/openai-gpt-5-4/eval_records.jsonl
new file mode 100644
index 0000000..2790ef6
--- /dev/null
+++ b/data/evaluation/image/openai-gpt-5-4/eval_records.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:af574f815b11e6282c77ff0df18cbb164086e6a78234ec2a05f042818e3e7bdb
+size 136768
diff --git a/data/evaluation/image/openai-gpt-5-4/eval_summary.json b/data/evaluation/image/openai-gpt-5-4/eval_summary.json
new file mode 100644
index 0000000..6f27e95
--- /dev/null
+++ b/data/evaluation/image/openai-gpt-5-4/eval_summary.json
@@ -0,0 +1,348 @@
+{
+ "response_file": "data/images_responses/response_openai-gpt-5-4_image.jsonl",
+ "num_records": 209,
+ "model_ids": [
+ "gpt-5.4"
+ ],
+ "data_quality": {
+ "json_parse_fail_count": 0,
+ "json_non_structured_root_count": 0,
+ "invalid_schema_input_count": 0,
+ "unknown_difficulty_count": 0,
+ "malformed_jsonl_line_count": 0
+ },
+ "summary": {
+ "overall": {
+ "n": 209,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.9234449760765551,
+ "ci95_low": 0.8803827751196173,
+ "ci95_high": 0.9521531100478469,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.5446318254636978,
+ "ci95_low": 0.49994060739371243,
+ "ci95_high": 0.593292241236835,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.7446473213179331,
+ "ci95_low": 0.7005316072418912,
+ "ci95_high": 0.7857770240188632,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.9155481317963652,
+ "ci95_low": 0.8762546866394325,
+ "ci95_high": 0.9511418181446042,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.8807958527083013,
+ "ci95_low": 0.8423560373214941,
+ "ci95_high": 0.9139117461419686,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.9234449760765551,
+ "ci95_low": 0.8851674641148325,
+ "ci95_high": 0.9569377990430622,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.10526315789473684,
+ "ci95_low": 0.06220095693779904,
+ "ci95_high": 0.15311004784688995,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.7349424261926654,
+ "ci95_low": 0.6996606164712107,
+ "ci95_high": 0.7720254624780748,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.9092286016204704,
+ "ci95_low": 0.8730191131439395,
+ "ci95_high": 0.9431962394498407,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.6446395733908156,
+ "ci95_low": 0.6023051805693552,
+ "ci95_high": 0.6857330394781699,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.94896331738437,
+ "ci95_low": 0.9266347687400318,
+ "ci95_high": 0.9712918660287081,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.10526315789473684,
+ "ci95_low": 0.06698564593301436,
+ "ci95_high": 0.14832535885167464,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ }
+ },
+ "overall_weighted": {
+ "n": 209,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.920265780730897,
+ "ci95_low": 0.8801996672212978,
+ "ci95_high": 0.9551495016611296,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.5460867913522505,
+ "ci95_low": 0.49975159564759486,
+ "ci95_high": 0.589915386533654,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.7451610305644731,
+ "ci95_low": 0.7025854680006847,
+ "ci95_high": 0.7865695693517897,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.9120409944124932,
+ "ci95_low": 0.873561471220254,
+ "ci95_high": 0.9483787080956946,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.8783715662203411,
+ "ci95_low": 0.8380555437109533,
+ "ci95_high": 0.9145776340401556,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.920265780730897,
+ "ci95_low": 0.8830508474576271,
+ "ci95_high": 0.9595959595959596,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.10132890365448505,
+ "ci95_low": 0.065,
+ "ci95_high": 0.14309210526315788,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.7344296054430723,
+ "ci95_low": 0.6945564703957607,
+ "ci95_high": 0.7692236735725048,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.9063010425607116,
+ "ci95_low": 0.8679581710511158,
+ "ci95_high": 0.9398557410947163,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.6456239109583619,
+ "ci95_low": 0.6046441623565496,
+ "ci95_high": 0.6865203124242407,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.946843853820598,
+ "ci95_low": 0.9205298013245033,
+ "ci95_high": 0.9701492537313433,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.10132890365448505,
+ "ci95_low": 0.0651085141903172,
+ "ci95_high": 0.1423841059602649,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ },
+ "weighting": "schema_complexity",
+ "weight_field_priority": [
+ "schema_complexity",
+ "difficulty"
+ ],
+ "difficulty_weights": {
+ "easy": 1.0,
+ "medium": 2.0,
+ "hard": 3.0
+ }
+ }
+ },
+ "error_analysis": {
+ "top_missing_gt_paths": [
+ {
+ "path": "spring_mechanisms[4].benefit",
+ "count": 1
+ },
+ {
+ "path": "spring_mechanisms[4].type",
+ "count": 1
+ },
+ {
+ "path": "influential_approaches[1].derived_elements[1]",
+ "count": 1
+ },
+ {
+ "path": "influential_approaches[1].derived_elements[2]",
+ "count": 1
+ },
+ {
+ "path": "potential_outsourcing_risks[8].type",
+ "count": 1
+ },
+ {
+ "path": "nf_kb_downstream_effects[5].effect_type",
+ "count": 1
+ },
+ {
+ "path": "nf_kb_downstream_effects[5].role_in_cancer",
+ "count": 1
+ },
+ {
+ "path": "nf_kb_downstream_effects[5].specific_molecules_or_processes[0]",
+ "count": 1
+ },
+ {
+ "path": "nf_kb_downstream_effects[5].specific_molecules_or_processes[1]",
+ "count": 1
+ },
+ {
+ "path": "red_fruit_inhibition_mechanisms[5].component_involved",
+ "count": 1
+ },
+ {
+ "path": "red_fruit_inhibition_mechanisms[5].observed_effects",
+ "count": 1
+ },
+ {
+ "path": "red_fruit_inhibition_mechanisms[5].specific_action",
+ "count": 1
+ },
+ {
+ "path": "red_fruit_inhibition_mechanisms[5].target_process",
+ "count": 1
+ },
+ {
+ "path": "red_fruit_inhibition_mechanisms[6].component_involved",
+ "count": 1
+ },
+ {
+ "path": "red_fruit_inhibition_mechanisms[6].observed_effects",
+ "count": 1
+ },
+ {
+ "path": "red_fruit_inhibition_mechanisms[6].specific_action",
+ "count": 1
+ },
+ {
+ "path": "red_fruit_inhibition_mechanisms[6].target_process",
+ "count": 1
+ },
+ {
+ "path": "impacts_of_adverse_events[2].examples[3]",
+ "count": 1
+ },
+ {
+ "path": "impacts_of_adverse_events[2].examples[4]",
+ "count": 1
+ },
+ {
+ "path": "impacts_of_adverse_events[3].examples[2]",
+ "count": 1
+ }
+ ]
+ }
+}
diff --git a/data/evaluation/image/openai-gpt-5-mini/eval_records.jsonl b/data/evaluation/image/openai-gpt-5-mini/eval_records.jsonl
new file mode 100644
index 0000000..c18836b
--- /dev/null
+++ b/data/evaluation/image/openai-gpt-5-mini/eval_records.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0915a24f84bf172f7360f77eb438e702685f52e4cebb8830c8a7d97453446a4c
+size 143114
diff --git a/data/evaluation/image/openai-gpt-5-mini/eval_summary.json b/data/evaluation/image/openai-gpt-5-mini/eval_summary.json
new file mode 100644
index 0000000..48964b5
--- /dev/null
+++ b/data/evaluation/image/openai-gpt-5-mini/eval_summary.json
@@ -0,0 +1,348 @@
+{
+ "response_file": "data/images_responses/response_openai-gpt-5-mini_image.jsonl",
+ "num_records": 209,
+ "model_ids": [
+ "gpt-5-mini"
+ ],
+ "data_quality": {
+ "json_parse_fail_count": 0,
+ "json_non_structured_root_count": 0,
+ "invalid_schema_input_count": 0,
+ "unknown_difficulty_count": 0,
+ "malformed_jsonl_line_count": 0
+ },
+ "summary": {
+ "overall": {
+ "n": 209,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.9138755980861244,
+ "ci95_low": 0.8755980861244019,
+ "ci95_high": 0.9473684210526315,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.4815052290667386,
+ "ci95_low": 0.4364950164726322,
+ "ci95_high": 0.5311877054872216,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.6994960258968403,
+ "ci95_low": 0.6562480177616844,
+ "ci95_high": 0.7432764842001498,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.8914716687662445,
+ "ci95_low": 0.8494781411462056,
+ "ci95_high": 0.9294833259910203,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.8682125745758791,
+ "ci95_low": 0.8297042311191375,
+ "ci95_high": 0.9027833971075764,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.9138755980861244,
+ "ci95_low": 0.8755980861244019,
+ "ci95_high": 0.9473684210526315,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.09090909090909091,
+ "ci95_low": 0.05741626794258373,
+ "ci95_high": 0.1339712918660287,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.6908243079099411,
+ "ci95_low": 0.6527956881321332,
+ "ci95_high": 0.730667442527559,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.8986545902493761,
+ "ci95_low": 0.8565949999154023,
+ "ci95_high": 0.9324651909823733,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.5905006274817894,
+ "ci95_low": 0.5471619665115219,
+ "ci95_high": 0.634215483414591,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.9425837320574163,
+ "ci95_low": 0.9170653907496013,
+ "ci95_high": 0.9649122807017544,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.09090909090909091,
+ "ci95_low": 0.05741626794258373,
+ "ci95_high": 0.1291866028708134,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ }
+ },
+ "overall_weighted": {
+ "n": 209,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.9102990033222591,
+ "ci95_low": 0.8697829716193656,
+ "ci95_high": 0.945364238410596,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.48354577186462566,
+ "ci95_low": 0.4346516171623667,
+ "ci95_high": 0.5316574477233446,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.6994513482643615,
+ "ci95_low": 0.6534688342674022,
+ "ci95_high": 0.7450193134485213,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.8877327474066661,
+ "ci95_low": 0.8455701162736732,
+ "ci95_high": 0.9253341060433717,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.8648873468163036,
+ "ci95_low": 0.8227429710350077,
+ "ci95_high": 0.9011952418596643,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.9102990033222591,
+ "ci95_low": 0.8712871287128713,
+ "ci95_high": 0.95,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.08637873754152824,
+ "ci95_low": 0.05263157894736842,
+ "ci95_high": 0.12333333333333334,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.6902432891785512,
+ "ci95_low": 0.6505572217488498,
+ "ci95_high": 0.7275144345913239,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.8951617844869407,
+ "ci95_low": 0.853049748289605,
+ "ci95_high": 0.9315987904993648,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.5914985600644935,
+ "ci95_low": 0.5510313120398233,
+ "ci95_high": 0.633734265771322,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.9401993355481728,
+ "ci95_low": 0.9113300492610837,
+ "ci95_high": 0.9666110183639399,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.08637873754152824,
+ "ci95_low": 0.05,
+ "ci95_high": 0.12331081081081081,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ },
+ "weighting": "schema_complexity",
+ "weight_field_priority": [
+ "schema_complexity",
+ "difficulty"
+ ],
+ "difficulty_weights": {
+ "easy": 1.0,
+ "medium": 2.0,
+ "hard": 3.0
+ }
+ }
+ },
+ "error_analysis": {
+ "top_missing_gt_paths": [
+ {
+ "path": "key_advantages[10]",
+ "count": 1
+ },
+ {
+ "path": "key_advantages[11]",
+ "count": 1
+ },
+ {
+ "path": "key_advantages[7]",
+ "count": 1
+ },
+ {
+ "path": "key_advantages[8]",
+ "count": 1
+ },
+ {
+ "path": "key_advantages[9]",
+ "count": 1
+ },
+ {
+ "path": "spring_mechanisms[3].benefit",
+ "count": 1
+ },
+ {
+ "path": "spring_mechanisms[3].configuration",
+ "count": 1
+ },
+ {
+ "path": "spring_mechanisms[3].type",
+ "count": 1
+ },
+ {
+ "path": "spring_mechanisms[4].benefit",
+ "count": 1
+ },
+ {
+ "path": "spring_mechanisms[4].type",
+ "count": 1
+ },
+ {
+ "path": "inner_model_assessment.r_squared_interpretation[4].context",
+ "count": 1
+ },
+ {
+ "path": "inner_model_assessment.r_squared_interpretation[4].description",
+ "count": 1
+ },
+ {
+ "path": "inner_model_assessment.r_squared_interpretation[4].r_squared_value",
+ "count": 1
+ },
+ {
+ "path": "study_objectives[3]",
+ "count": 1
+ },
+ {
+ "path": "experimental_techniques.equilibrium_criteria[2]",
+ "count": 1
+ },
+ {
+ "path": "potential_outsourcing_risks[6].type",
+ "count": 1
+ },
+ {
+ "path": "potential_outsourcing_risks[7].description",
+ "count": 1
+ },
+ {
+ "path": "potential_outsourcing_risks[7].type",
+ "count": 1
+ },
+ {
+ "path": "potential_outsourcing_risks[8].description",
+ "count": 1
+ },
+ {
+ "path": "potential_outsourcing_risks[8].type",
+ "count": 1
+ }
+ ]
+ }
+}
diff --git a/data/evaluation/image/openai-gpt-5/eval_records.jsonl b/data/evaluation/image/openai-gpt-5/eval_records.jsonl
new file mode 100644
index 0000000..4210ce2
--- /dev/null
+++ b/data/evaluation/image/openai-gpt-5/eval_records.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:23e690728069b7293058f81d15e1f3711c81685aa54b5c185a67bbe4b1208f01
+size 137526
diff --git a/data/evaluation/image/openai-gpt-5/eval_summary.json b/data/evaluation/image/openai-gpt-5/eval_summary.json
new file mode 100644
index 0000000..943aedb
--- /dev/null
+++ b/data/evaluation/image/openai-gpt-5/eval_summary.json
@@ -0,0 +1,348 @@
+{
+ "response_file": "data/images_responses/response_openai-gpt-5_image.jsonl",
+ "num_records": 209,
+ "model_ids": [
+ "gpt-5"
+ ],
+ "data_quality": {
+ "json_parse_fail_count": 0,
+ "json_non_structured_root_count": 0,
+ "invalid_schema_input_count": 0,
+ "unknown_difficulty_count": 0,
+ "malformed_jsonl_line_count": 0
+ },
+ "summary": {
+ "overall": {
+ "n": 209,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.9330143540669856,
+ "ci95_low": 0.8947368421052632,
+ "ci95_high": 0.9617224880382775,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.5216239125475759,
+ "ci95_low": 0.476915086145799,
+ "ci95_high": 0.5663111559879663,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.7354240481461227,
+ "ci95_low": 0.6973616397283108,
+ "ci95_high": 0.77428336310157,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.9189518221967927,
+ "ci95_low": 0.8831428301182511,
+ "ci95_high": 0.9516681333318107,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.8940576996858097,
+ "ci95_low": 0.8580767113092831,
+ "ci95_high": 0.9257937535467254,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.9330143540669856,
+ "ci95_low": 0.8995215311004785,
+ "ci95_high": 0.9665071770334929,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.09090909090909091,
+ "ci95_low": 0.05263157894736842,
+ "ci95_high": 0.1339712918660287,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.7253332609634972,
+ "ci95_low": 0.6872332455787248,
+ "ci95_high": 0.7618225515977544,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.9200288026065937,
+ "ci95_low": 0.8839784517967013,
+ "ci95_high": 0.9498931133317327,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.6285239803468493,
+ "ci95_low": 0.5898790355828799,
+ "ci95_high": 0.665510143565641,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.9553429027113237,
+ "ci95_low": 0.9298245614035088,
+ "ci95_high": 0.974481658692185,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.09090909090909091,
+ "ci95_low": 0.05263157894736842,
+ "ci95_high": 0.1291866028708134,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ }
+ },
+ "overall_weighted": {
+ "n": 209,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.9318936877076412,
+ "ci95_low": 0.8923327895595432,
+ "ci95_high": 0.9663865546218487,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.5259430249707161,
+ "ci95_low": 0.4749760049057011,
+ "ci95_high": 0.5695485162774524,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.7367163715307037,
+ "ci95_low": 0.6959693212997398,
+ "ci95_high": 0.7771745800655014,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.917468647592562,
+ "ci95_low": 0.8791491781400154,
+ "ci95_high": 0.9496147833764957,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.8930140233415385,
+ "ci95_low": 0.8575682912438268,
+ "ci95_high": 0.9250444563248371,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.9318936877076412,
+ "ci95_low": 0.8945634266886326,
+ "ci95_high": 0.9651741293532339,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.08970099667774087,
+ "ci95_low": 0.05271828665568369,
+ "ci95_high": 0.1282051282051282,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.7267093480313273,
+ "ci95_low": 0.6933402382893399,
+ "ci95_high": 0.7634712122131851,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.918933799585607,
+ "ci95_low": 0.8841607762593543,
+ "ci95_high": 0.9499857699021379,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.63132969825071,
+ "ci95_low": 0.5895427984292296,
+ "ci95_high": 0.6689722013095222,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.954595791805094,
+ "ci95_low": 0.931111111111111,
+ "ci95_high": 0.9751412429378532,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.08970099667774087,
+ "ci95_low": 0.05306799336650083,
+ "ci95_high": 0.13179571663920922,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ },
+ "weighting": "schema_complexity",
+ "weight_field_priority": [
+ "schema_complexity",
+ "difficulty"
+ ],
+ "difficulty_weights": {
+ "easy": 1.0,
+ "medium": 2.0,
+ "hard": 3.0
+ }
+ }
+ },
+ "error_analysis": {
+ "top_missing_gt_paths": [
+ {
+ "path": "key_advantages[11]",
+ "count": 1
+ },
+ {
+ "path": "study_objectives[3]",
+ "count": 1
+ },
+ {
+ "path": "prayer_support_entities[2]",
+ "count": 1
+ },
+ {
+ "path": "contractor_responsibilities.indemnification_details[2]",
+ "count": 1
+ },
+ {
+ "path": "key_partners_for_initiative[4]",
+ "count": 1
+ },
+ {
+ "path": "experimental_techniques.equilibrium_criteria[2]",
+ "count": 1
+ },
+ {
+ "path": "potential_outsourcing_risks[8].description",
+ "count": 1
+ },
+ {
+ "path": "potential_outsourcing_risks[8].type",
+ "count": 1
+ },
+ {
+ "path": "benefits_of_previews[4]",
+ "count": 1
+ },
+ {
+ "path": "benefits_of_previews[5]",
+ "count": 1
+ },
+ {
+ "path": "red_fruit_inhibition_mechanisms[5].component_involved",
+ "count": 1
+ },
+ {
+ "path": "red_fruit_inhibition_mechanisms[5].observed_effects",
+ "count": 1
+ },
+ {
+ "path": "red_fruit_inhibition_mechanisms[5].specific_action",
+ "count": 1
+ },
+ {
+ "path": "red_fruit_inhibition_mechanisms[5].target_process",
+ "count": 1
+ },
+ {
+ "path": "red_fruit_inhibition_mechanisms[6].component_involved",
+ "count": 1
+ },
+ {
+ "path": "red_fruit_inhibition_mechanisms[6].observed_effects",
+ "count": 1
+ },
+ {
+ "path": "red_fruit_inhibition_mechanisms[6].specific_action",
+ "count": 1
+ },
+ {
+ "path": "red_fruit_inhibition_mechanisms[6].target_process",
+ "count": 1
+ },
+ {
+ "path": "impacts_of_adverse_events[0].examples[10]",
+ "count": 1
+ },
+ {
+ "path": "impacts_of_adverse_events[0].examples[11]",
+ "count": 1
+ }
+ ]
+ }
+}
diff --git a/data/evaluation/image/phi-4/eval_records.jsonl b/data/evaluation/image/phi-4/eval_records.jsonl
new file mode 100644
index 0000000..07bd05d
--- /dev/null
+++ b/data/evaluation/image/phi-4/eval_records.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fe170bc162ccdb574a12560d8f5012e1ab04064603529bbc5dd72e83e0a1acd7
+size 171428
diff --git a/data/evaluation/image/phi-4/eval_summary.json b/data/evaluation/image/phi-4/eval_summary.json
new file mode 100644
index 0000000..b00aa5a
--- /dev/null
+++ b/data/evaluation/image/phi-4/eval_summary.json
@@ -0,0 +1,430 @@
+{
+ "response_file": "data/images_responses/response_phi-4_image.jsonl",
+ "num_records": 209,
+ "model_ids": [
+ "microsoft/phi-4"
+ ],
+ "data_quality": {
+ "json_parse_fail_count": 3,
+ "json_non_structured_root_count": 3,
+ "invalid_schema_input_count": 0,
+ "unknown_difficulty_count": 0,
+ "malformed_jsonl_line_count": 0
+ },
+ "summary": {
+ "overall": {
+ "n": 209,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 0.9856459330143541,
+ "ci95_low": 0.9665071770334929,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 0.9856459330143541,
+ "ci95_low": 0.9665071770334929,
+ "ci95_high": 1.0,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.8755980861244019,
+ "ci95_low": 0.8277511961722488,
+ "ci95_high": 0.9186602870813397,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.5522529861963452,
+ "ci95_low": 0.5053363480904247,
+ "ci95_high": 0.600266807385062,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.7240553754491865,
+ "ci95_low": 0.6795515312056803,
+ "ci95_high": 0.7704401926985424,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.8439477773569353,
+ "ci95_low": 0.7965063504775524,
+ "ci95_high": 0.8845542929906615,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.8450196062744465,
+ "ci95_low": 0.8001801910581385,
+ "ci95_high": 0.8831167319225168,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.8755980861244019,
+ "ci95_low": 0.8229665071770335,
+ "ci95_high": 0.9138755980861244,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.10047846889952153,
+ "ci95_low": 0.06220095693779904,
+ "ci95_high": 0.13875598086124402,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.7067520463341557,
+ "ci95_low": 0.665451303762021,
+ "ci95_high": 0.7475978139991514,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.86540525950775,
+ "ci95_low": 0.8182309750925548,
+ "ci95_high": 0.9037087485152654,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.6381541808227659,
+ "ci95_low": 0.5968156585095029,
+ "ci95_high": 0.6797972208700523,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.9122807017543859,
+ "ci95_low": 0.8819776714513557,
+ "ci95_high": 0.9409888357256778,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.10047846889952153,
+ "ci95_low": 0.06220095693779904,
+ "ci95_high": 0.14354066985645933,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ }
+ },
+ "overall_weighted": {
+ "n": 209,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 0.9883720930232558,
+ "ci95_low": 0.9734660033167496,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 0.9883720930232558,
+ "ci95_low": 0.9736408566721582,
+ "ci95_high": 1.0,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.8754152823920266,
+ "ci95_low": 0.8281505728314239,
+ "ci95_high": 0.9165275459098498,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.5570805122905684,
+ "ci95_low": 0.5076459589648217,
+ "ci95_high": 0.604594136430062,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.7254523364498268,
+ "ci95_low": 0.6773269548659177,
+ "ci95_high": 0.7728502802626721,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.8434758421594668,
+ "ci95_low": 0.7968598067712696,
+ "ci95_high": 0.8862954126115055,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.8449409521242762,
+ "ci95_low": 0.7966866528836362,
+ "ci95_high": 0.8901906989734327,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.8754152823920266,
+ "ci95_low": 0.8319327731092437,
+ "ci95_high": 0.9216666666666666,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.09966777408637874,
+ "ci95_low": 0.06218487394957983,
+ "ci95_high": 0.14262023217247097,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.7086695636332874,
+ "ci95_low": 0.664979632294662,
+ "ci95_high": 0.749797120346074,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.8652571723027764,
+ "ci95_low": 0.8198131217635102,
+ "ci95_high": 0.9091745606005324,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.6412664243701977,
+ "ci95_low": 0.6006148023401112,
+ "ci95_high": 0.6831505598728156,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.9130675526024362,
+ "ci95_low": 0.8788888888888889,
+ "ci95_high": 0.9457494407158836,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.09966777408637874,
+ "ci95_low": 0.0628099173553719,
+ "ci95_high": 0.1423785594639866,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ },
+ "weighting": "schema_complexity",
+ "weight_field_priority": [
+ "schema_complexity",
+ "difficulty"
+ ],
+ "difficulty_weights": {
+ "easy": 1.0,
+ "medium": 2.0,
+ "hard": 3.0
+ }
+ }
+ },
+ "error_analysis": {
+ "top_missing_gt_paths": [
+ {
+ "path": "market_dynamics_differences[0].characteristics[3]",
+ "count": 1
+ },
+ {
+ "path": "month",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].date",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].day_of_week",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[0].is_meeting",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[0].name",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[0].time",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[1].is_meeting",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[1].name",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[1].time",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[2].is_meeting",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[2].name",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[2].time",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[3].is_meeting",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[3].name",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[3].time",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[4].is_meeting",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[4].name",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[4].time",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[5].is_meeting",
+ "count": 1
+ }
+ ],
+ "top_missing_required_paths": [
+ {
+ "path": "month",
+ "count": 1
+ },
+ {
+ "path": "schedule",
+ "count": 1
+ },
+ {
+ "path": "schedule[].date",
+ "count": 1
+ },
+ {
+ "path": "schedule[].day_of_week",
+ "count": 1
+ },
+ {
+ "path": "schedule[].events",
+ "count": 1
+ },
+ {
+ "path": "schedule[].events[].is_meeting",
+ "count": 1
+ },
+ {
+ "path": "schedule[].events[].name",
+ "count": 1
+ },
+ {
+ "path": "schedule[].events[].time",
+ "count": 1
+ },
+ {
+ "path": "year",
+ "count": 1
+ },
+ {
+ "path": "shared_values_importance",
+ "count": 1
+ },
+ {
+ "path": "formalization_methods",
+ "count": 1
+ },
+ {
+ "path": "manager_key_message",
+ "count": 1
+ },
+ {
+ "path": "figure_4_concept",
+ "count": 1
+ },
+ {
+ "path": "van_vught_illustration_summary",
+ "count": 1
+ },
+ {
+ "path": "other_laser_types",
+ "count": 1
+ },
+ {
+ "path": "other_laser_types[].name",
+ "count": 1
+ },
+ {
+ "path": "conclusions",
+ "count": 1
+ },
+ {
+ "path": "recommended_bibliography",
+ "count": 1
+ },
+ {
+ "path": "recommended_bibliography[].id",
+ "count": 1
+ },
+ {
+ "path": "recommended_bibliography[].authors",
+ "count": 1
+ }
+ ]
+ }
+}
diff --git a/data/evaluation/image/zai-org-GLM-4.7/eval_records.jsonl b/data/evaluation/image/zai-org-GLM-4.7/eval_records.jsonl
new file mode 100644
index 0000000..4ef20c2
--- /dev/null
+++ b/data/evaluation/image/zai-org-GLM-4.7/eval_records.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:08faf17eb9d99c26f50463610794872c1aad73e4636313951aae44096fcc3b0c
+size 164409
diff --git a/data/evaluation/image/zai-org-GLM-4.7/eval_summary.json b/data/evaluation/image/zai-org-GLM-4.7/eval_summary.json
new file mode 100644
index 0000000..dec22b8
--- /dev/null
+++ b/data/evaluation/image/zai-org-GLM-4.7/eval_summary.json
@@ -0,0 +1,430 @@
+{
+ "response_file": "data/images_responses/response_zai-org-GLM-4.7_image.jsonl",
+ "num_records": 209,
+ "model_ids": [
+ "zai-org/GLM-4.7"
+ ],
+ "data_quality": {
+ "json_parse_fail_count": 3,
+ "json_non_structured_root_count": 3,
+ "invalid_schema_input_count": 0,
+ "unknown_difficulty_count": 0,
+ "malformed_jsonl_line_count": 0
+ },
+ "summary": {
+ "overall": {
+ "n": 209,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 0.9856459330143541,
+ "ci95_low": 0.9665071770334929,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 0.9856459330143541,
+ "ci95_low": 0.9665071770334929,
+ "ci95_high": 1.0,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.8516746411483254,
+ "ci95_low": 0.8038277511961722,
+ "ci95_high": 0.8995215311004785,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.571782919167694,
+ "ci95_low": 0.5230908809300819,
+ "ci95_high": 0.6205750419651556,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.746606251463296,
+ "ci95_low": 0.6971869950540396,
+ "ci95_high": 0.7922944546197637,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.839602707202921,
+ "ci95_low": 0.7904060039066545,
+ "ci95_high": 0.8884442746546901,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.8326528006575471,
+ "ci95_low": 0.7849430959863487,
+ "ci95_high": 0.8776174466981828,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.8516746411483254,
+ "ci95_low": 0.8038277511961722,
+ "ci95_high": 0.8947368421052632,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.11483253588516747,
+ "ci95_low": 0.07655502392344497,
+ "ci95_high": 0.16267942583732056,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.7193306259446369,
+ "ci95_low": 0.6755510516532879,
+ "ci95_high": 0.7620684586441848,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.8453340276513993,
+ "ci95_low": 0.793094730288873,
+ "ci95_high": 0.8920861860003584,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.6591945853154949,
+ "ci95_low": 0.6146008707622788,
+ "ci95_high": 0.7003271630729728,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.8963317384370016,
+ "ci95_low": 0.8596491228070176,
+ "ci95_high": 0.9298245614035088,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.11483253588516747,
+ "ci95_low": 0.07177033492822966,
+ "ci95_high": 0.15789473684210525,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ }
+ },
+ "overall_weighted": {
+ "n": 209,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 0.9867109634551495,
+ "ci95_low": 0.9700996677740864,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 0.9867109634551495,
+ "ci95_low": 0.97,
+ "ci95_high": 1.0,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.8488372093023255,
+ "ci95_low": 0.795417348608838,
+ "ci95_high": 0.8973509933774835,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.5746838875833915,
+ "ci95_low": 0.5273747768316273,
+ "ci95_high": 0.6268064290478785,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.7467098102247003,
+ "ci95_low": 0.6965390459155029,
+ "ci95_high": 0.796588169442986,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.8371031432249605,
+ "ci95_low": 0.7907920345695837,
+ "ci95_high": 0.8849941580611089,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.8304585872494332,
+ "ci95_low": 0.779333027903205,
+ "ci95_high": 0.8803152848856406,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.8488372093023255,
+ "ci95_low": 0.7996688741721855,
+ "ci95_high": 0.8983333333333333,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.11295681063122924,
+ "ci95_low": 0.07107438016528926,
+ "ci95_high": 0.15625,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.7194989470110175,
+ "ci95_low": 0.6747629657208503,
+ "ci95_high": 0.7621338744872148,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.8427110019513615,
+ "ci95_low": 0.7932360262543804,
+ "ci95_high": 0.8883674740342937,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.6606968489040459,
+ "ci95_low": 0.6142564354253067,
+ "ci95_high": 0.7042131342254616,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.8947951273532668,
+ "ci95_low": 0.8610188261351053,
+ "ci95_high": 0.9297385620915033,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.11295681063122924,
+ "ci95_low": 0.07166666666666667,
+ "ci95_high": 0.15551839464882944,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ },
+ "weighting": "schema_complexity",
+ "weight_field_priority": [
+ "schema_complexity",
+ "difficulty"
+ ],
+ "difficulty_weights": {
+ "easy": 1.0,
+ "medium": 2.0,
+ "hard": 3.0
+ }
+ }
+ },
+ "error_analysis": {
+ "top_missing_gt_paths": [
+ {
+ "path": "month",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].date",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].day_of_week",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[0].is_meeting",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[0].name",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[0].time",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[1].is_meeting",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[1].name",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[1].time",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[2].is_meeting",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[2].name",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[2].time",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[3].is_meeting",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[3].name",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[3].time",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[4].is_meeting",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[4].name",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[4].time",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[5].is_meeting",
+ "count": 1
+ },
+ {
+ "path": "schedule[0].events[5].name",
+ "count": 1
+ }
+ ],
+ "top_missing_required_paths": [
+ {
+ "path": "month",
+ "count": 1
+ },
+ {
+ "path": "year",
+ "count": 1
+ },
+ {
+ "path": "schedule",
+ "count": 1
+ },
+ {
+ "path": "schedule[].day_of_week",
+ "count": 1
+ },
+ {
+ "path": "schedule[].date",
+ "count": 1
+ },
+ {
+ "path": "schedule[].events",
+ "count": 1
+ },
+ {
+ "path": "schedule[].events[].time",
+ "count": 1
+ },
+ {
+ "path": "schedule[].events[].name",
+ "count": 1
+ },
+ {
+ "path": "schedule[].events[].is_meeting",
+ "count": 1
+ },
+ {
+ "path": "judicial_idiosyncrasy_definition",
+ "count": 1
+ },
+ {
+ "path": "judicial_idiosyncrasy_definition.meaning",
+ "count": 1
+ },
+ {
+ "path": "judicial_idiosyncrasy_definition.term",
+ "count": 1
+ },
+ {
+ "path": "other_laser_types",
+ "count": 1
+ },
+ {
+ "path": "other_laser_types[].name",
+ "count": 1
+ },
+ {
+ "path": "conclusions",
+ "count": 1
+ },
+ {
+ "path": "recommended_bibliography",
+ "count": 1
+ },
+ {
+ "path": "recommended_bibliography[].id",
+ "count": 1
+ },
+ {
+ "path": "recommended_bibliography[].authors",
+ "count": 1
+ },
+ {
+ "path": "recommended_bibliography[].title",
+ "count": 1
+ },
+ {
+ "path": "recommended_bibliography[].journal_info",
+ "count": 1
+ }
+ ]
+ }
+}
diff --git a/data/evaluation/text/DeepSeek-R1-Distill-Qwen-32B/eval_records.jsonl b/data/evaluation/text/DeepSeek-R1-Distill-Qwen-32B/eval_records.jsonl
new file mode 100644
index 0000000..6dba2a6
--- /dev/null
+++ b/data/evaluation/text/DeepSeek-R1-Distill-Qwen-32B/eval_records.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:54e5d2b653d5b330e12b94a1e18a059d0791c9fd5fe39011f513f7aadab08116
+size 2982358
diff --git a/data/evaluation/text/DeepSeek-R1-Distill-Qwen-32B/eval_summary.json b/data/evaluation/text/DeepSeek-R1-Distill-Qwen-32B/eval_summary.json
new file mode 100644
index 0000000..f0307fa
--- /dev/null
+++ b/data/evaluation/text/DeepSeek-R1-Distill-Qwen-32B/eval_summary.json
@@ -0,0 +1,264 @@
+{
+ "response_file": "data/text_responses/response_DeepSeek-R1-Distill-Qwen-32B.jsonl",
+ "num_records": 5000,
+ "model_ids": [
+ "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B"
+ ],
+ "data_quality": {
+ "json_parse_fail_count": 56,
+ "json_non_structured_root_count": 56,
+ "invalid_schema_input_count": 0,
+ "unknown_difficulty_count": 0,
+ "malformed_jsonl_line_count": 0
+ },
+ "summary": {
+ "overall": {
+ "n": 5000,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 0.9888,
+ "ci95_low": 0.9852,
+ "ci95_high": 0.9916,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 0.9888,
+ "ci95_low": 0.9856,
+ "ci95_high": 0.9918,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.9654,
+ "ci95_low": 0.9586,
+ "ci95_high": 0.9702,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.7789097770019126,
+ "ci95_low": 0.7708740639056879,
+ "ci95_high": 0.7873516399776694,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.8428248226149974,
+ "ci95_low": 0.8329000326686493,
+ "ci95_high": 0.8501665760867145,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.9567801862873617,
+ "ci95_low": 0.9505978030973247,
+ "ci95_high": 0.9613903671620777,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.9569885800283917,
+ "ci95_low": 0.9520179166374101,
+ "ci95_high": 0.9612071000607146,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.9654,
+ "ci95_low": 0.9596,
+ "ci95_high": 0.9702,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.442,
+ "ci95_low": 0.427,
+ "ci95_high": 0.4582,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.8595049286347572,
+ "ci95_low": 0.8523502634795632,
+ "ci95_high": 0.8661268619736676,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.9625961933427972,
+ "ci95_low": 0.9581027151522951,
+ "ci95_high": 0.9668802157666471,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.810867299808455,
+ "ci95_low": 0.8022676814216981,
+ "ci95_high": 0.8181828006689652,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.9732,
+ "ci95_low": 0.9693333333333334,
+ "ci95_high": 0.9766666666666666,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.442,
+ "ci95_low": 0.4278,
+ "ci95_high": 0.4572,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ }
+ },
+ "overall_weighted": {
+ "n": 5000,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 0.9881262448291711,
+ "ci95_low": 0.9848891616169364,
+ "ci95_high": 0.9912133251833741,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 0.9881262448291711,
+ "ci95_low": 0.9847824424562208,
+ "ci95_high": 0.990780577750461,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.9638425003830243,
+ "ci95_low": 0.9577215773695135,
+ "ci95_high": 0.9688671307274536,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.7731816599595219,
+ "ci95_low": 0.7637179798152969,
+ "ci95_high": 0.782934710079144,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.8375972927247738,
+ "ci95_low": 0.828365153372369,
+ "ci95_high": 0.8469303043725173,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.9546238123327622,
+ "ci95_low": 0.9491313220117539,
+ "ci95_high": 0.959691023455533,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.9549976796768813,
+ "ci95_low": 0.9486488662172382,
+ "ci95_high": 0.9600929025258829,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.9638425003830243,
+ "ci95_low": 0.9582409177820268,
+ "ci95_high": 0.9684080752041917,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.4270721617894898,
+ "ci95_low": 0.4142615904206325,
+ "ci95_high": 0.44002457379818766,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.855134255005686,
+ "ci95_low": 0.8477746668378506,
+ "ci95_high": 0.8617005453823272,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.96089422681431,
+ "ci95_low": 0.9546749536005656,
+ "ci95_high": 0.9662230860379908,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.8053894763421479,
+ "ci95_low": 0.7950442555021097,
+ "ci95_high": 0.8142992028349926,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.9719370818650732,
+ "ci95_low": 0.9665187678498572,
+ "ci95_high": 0.9769140704540826,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.4270721617894898,
+ "ci95_low": 0.4120316947457497,
+ "ci95_high": 0.4416839199447895,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ },
+ "weighting": "schema_complexity",
+ "weight_field_priority": [
+ "schema_complexity",
+ "difficulty"
+ ],
+ "difficulty_weights": {
+ "easy": 1.0,
+ "medium": 2.0,
+ "hard": 3.0
+ }
+ }
+ }
+}
diff --git a/data/evaluation/text/Ministral-3-14B-Instruct-2512/eval_records.jsonl b/data/evaluation/text/Ministral-3-14B-Instruct-2512/eval_records.jsonl
new file mode 100644
index 0000000..e1b22c1
--- /dev/null
+++ b/data/evaluation/text/Ministral-3-14B-Instruct-2512/eval_records.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4fc267b5d36e715e122bf89fb44036381e72622608857362bca72bd646574f52
+size 2977182
diff --git a/data/evaluation/text/Ministral-3-14B-Instruct-2512/eval_summary.json b/data/evaluation/text/Ministral-3-14B-Instruct-2512/eval_summary.json
new file mode 100644
index 0000000..05c4ed3
--- /dev/null
+++ b/data/evaluation/text/Ministral-3-14B-Instruct-2512/eval_summary.json
@@ -0,0 +1,264 @@
+{
+ "response_file": "data/text_responses/response_Ministral-3-14B-Instruct-2512.jsonl",
+ "num_records": 5000,
+ "model_ids": [
+ "mistralai/Ministral-3-14B-Instruct-2512"
+ ],
+ "data_quality": {
+ "json_parse_fail_count": 12,
+ "json_non_structured_root_count": 12,
+ "invalid_schema_input_count": 0,
+ "unknown_difficulty_count": 0,
+ "malformed_jsonl_line_count": 0
+ },
+ "summary": {
+ "overall": {
+ "n": 5000,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 0.9976,
+ "ci95_low": 0.996,
+ "ci95_high": 0.9988,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 0.9976,
+ "ci95_low": 0.996,
+ "ci95_high": 0.999,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.9098,
+ "ci95_low": 0.9016,
+ "ci95_high": 0.917,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.7280334607272523,
+ "ci95_low": 0.7186029878014852,
+ "ci95_high": 0.737814494066604,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.7918623772250899,
+ "ci95_low": 0.7800706043932953,
+ "ci95_high": 0.8016751402607329,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.9051096398263954,
+ "ci95_low": 0.8975102703233493,
+ "ci95_high": 0.912615768008776,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.9030556479650446,
+ "ci95_low": 0.8949806493541234,
+ "ci95_high": 0.9106238971221957,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.9098,
+ "ci95_low": 0.902,
+ "ci95_high": 0.9186,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.394,
+ "ci95_low": 0.3812,
+ "ci95_high": 0.4062,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.8083351592595792,
+ "ci95_low": 0.7995626081793498,
+ "ci95_high": 0.8166834224022959,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.907551882655015,
+ "ci95_low": 0.9002923941330644,
+ "ci95_high": 0.9150875218844496,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.7599479189761711,
+ "ci95_low": 0.7506849947324589,
+ "ci95_high": 0.7701025904217768,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.9390666666666666,
+ "ci95_low": 0.9339333333333334,
+ "ci95_high": 0.9441333333333334,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.394,
+ "ci95_low": 0.3796,
+ "ci95_high": 0.4088,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ }
+ },
+ "overall_weighted": {
+ "n": 5000,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 0.997778458709974,
+ "ci95_low": 0.9964031529807913,
+ "ci95_high": 0.9990013827008757,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 0.997778458709974,
+ "ci95_low": 0.996542451018056,
+ "ci95_high": 0.9990045941807044,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.9093764363413513,
+ "ci95_low": 0.9010963735336962,
+ "ci95_high": 0.916724124722286,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.7242820506745328,
+ "ci95_low": 0.7141841789164477,
+ "ci95_high": 0.7341203292279455,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.7884206995318576,
+ "ci95_low": 0.7773116219378398,
+ "ci95_high": 0.7994495749637022,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.9044894247687516,
+ "ci95_low": 0.8969168598036076,
+ "ci95_high": 0.91208122653089,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.9023217462714745,
+ "ci95_low": 0.8929788244858239,
+ "ci95_high": 0.9109920629804668,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.9093764363413513,
+ "ci95_low": 0.9010356731875719,
+ "ci95_high": 0.9187743562313746,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.38164547265206067,
+ "ci95_low": 0.366579914268218,
+ "ci95_high": 0.3990185554362828,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.8057307249917139,
+ "ci95_low": 0.7978906920975878,
+ "ci95_high": 0.8135664706438271,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.9070248729847258,
+ "ci95_low": 0.8997061593532892,
+ "ci95_high": 0.9144609613616174,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.7563513751031951,
+ "ci95_low": 0.7469076682151845,
+ "ci95_high": 0.7652440806753683,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.9388437771308922,
+ "ci95_low": 0.9334099616858238,
+ "ci95_high": 0.9439362492633304,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.38164547265206067,
+ "ci95_low": 0.3686430927360589,
+ "ci95_high": 0.3957949662369552,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ },
+ "weighting": "schema_complexity",
+ "weight_field_priority": [
+ "schema_complexity",
+ "difficulty"
+ ],
+ "difficulty_weights": {
+ "easy": 1.0,
+ "medium": 2.0,
+ "hard": 3.0
+ }
+ }
+ }
+}
diff --git a/data/evaluation/text/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16/eval_records.jsonl b/data/evaluation/text/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16/eval_records.jsonl
new file mode 100644
index 0000000..864c071
--- /dev/null
+++ b/data/evaluation/text/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16/eval_records.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fc004dc08db7c4f046a9f106604614a1c3ff804ef0f8fbdd3966516402471f86
+size 2995864
diff --git a/data/evaluation/text/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16/eval_summary.json b/data/evaluation/text/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16/eval_summary.json
new file mode 100644
index 0000000..cc5cae2
--- /dev/null
+++ b/data/evaluation/text/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16/eval_summary.json
@@ -0,0 +1,264 @@
+{
+ "response_file": "data/text_responses/response_NVIDIA-Nemotron-3-Nano-30B-A3B-BF16.jsonl",
+ "num_records": 5000,
+ "model_ids": [
+ "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16"
+ ],
+ "data_quality": {
+ "json_parse_fail_count": 3,
+ "json_non_structured_root_count": 3,
+ "invalid_schema_input_count": 0,
+ "unknown_difficulty_count": 0,
+ "malformed_jsonl_line_count": 0
+ },
+ "summary": {
+ "overall": {
+ "n": 5000,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 0.9994,
+ "ci95_low": 0.9984,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 0.9994,
+ "ci95_low": 0.9988,
+ "ci95_high": 1.0,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.992,
+ "ci95_low": 0.9896,
+ "ci95_high": 0.9942,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.7788045284617344,
+ "ci95_low": 0.7696879760762113,
+ "ci95_high": 0.7883228449818156,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.8402661722061348,
+ "ci95_low": 0.8310216826044392,
+ "ci95_high": 0.848721106422434,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.9834369055722347,
+ "ci95_low": 0.9805798707746737,
+ "ci95_high": 0.9855861185814477,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.9811063205907203,
+ "ci95_low": 0.9785048499305949,
+ "ci95_high": 0.9838461438525801,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.992,
+ "ci95_low": 0.989,
+ "ci95_high": 0.9946,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.4296,
+ "ci95_low": 0.4172,
+ "ci95_high": 0.442,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.8675025354133679,
+ "ci95_low": 0.862139109626256,
+ "ci95_high": 0.8741686751559812,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.9883687735302401,
+ "ci95_low": 0.9856498658776778,
+ "ci95_high": 0.9908161914321266,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.8095353503339345,
+ "ci95_low": 0.8009682366012519,
+ "ci95_high": 0.8179575186923151,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.9944666666666666,
+ "ci95_low": 0.9926,
+ "ci95_high": 0.9962,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.4296,
+ "ci95_low": 0.4144,
+ "ci95_high": 0.4462,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ }
+ },
+ "overall_weighted": {
+ "n": 5000,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 0.9993105561513712,
+ "ci95_low": 0.9983935128518971,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 0.9993105561513712,
+ "ci95_low": 0.9983858570330515,
+ "ci95_high": 1.0,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.991037229967826,
+ "ci95_low": 0.9876760563380281,
+ "ci95_high": 0.9939482151064808,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.7737410120536432,
+ "ci95_low": 0.7652948205992259,
+ "ci95_high": 0.7826748443965592,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.8354167260352635,
+ "ci95_low": 0.8271333930424157,
+ "ci95_high": 0.8434138169784906,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.9821284097352688,
+ "ci95_low": 0.979189112909612,
+ "ci95_high": 0.9854049258697027,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.9796529487822787,
+ "ci95_low": 0.9764163116290755,
+ "ci95_high": 0.9826592922689757,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.991037229967826,
+ "ci95_low": 0.9880027685918634,
+ "ci95_high": 0.9938814531548757,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.41642408457177876,
+ "ci95_low": 0.40221062327295054,
+ "ci95_high": 0.4309124480689337,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.8637620492747252,
+ "ci95_low": 0.8573150551144955,
+ "ci95_high": 0.8697082833320191,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.9872424695726436,
+ "ci95_low": 0.9839003832555725,
+ "ci95_high": 0.9899556700183766,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.8045788690444534,
+ "ci95_low": 0.7940895089125163,
+ "ci95_high": 0.8129294440382563,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.993795005362341,
+ "ci95_low": 0.9920307566962012,
+ "ci95_high": 0.9958680847807789,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.41642408457177876,
+ "ci95_low": 0.39822670641290225,
+ "ci95_high": 0.43093415007656966,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ },
+ "weighting": "schema_complexity",
+ "weight_field_priority": [
+ "schema_complexity",
+ "difficulty"
+ ],
+ "difficulty_weights": {
+ "easy": 1.0,
+ "medium": 2.0,
+ "hard": 3.0
+ }
+ }
+ }
+}
diff --git a/data/evaluation/text/Qwen3-235B-A22B-Instruct-2507/eval_records.jsonl b/data/evaluation/text/Qwen3-235B-A22B-Instruct-2507/eval_records.jsonl
new file mode 100644
index 0000000..bc87fcf
--- /dev/null
+++ b/data/evaluation/text/Qwen3-235B-A22B-Instruct-2507/eval_records.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:78dc6eec2b3d8badb48f6601895c0f81f55ecd4705026e919ed2c0e6426ca848
+size 2948724
diff --git a/data/evaluation/text/Qwen3-235B-A22B-Instruct-2507/eval_summary.json b/data/evaluation/text/Qwen3-235B-A22B-Instruct-2507/eval_summary.json
new file mode 100644
index 0000000..4c6104d
--- /dev/null
+++ b/data/evaluation/text/Qwen3-235B-A22B-Instruct-2507/eval_summary.json
@@ -0,0 +1,264 @@
+{
+ "response_file": "data/text_responses/response_Qwen3-235B-A22B-Instruct-2507.jsonl",
+ "num_records": 5000,
+ "model_ids": [
+ "Qwen/Qwen3-235B-A22B-Instruct-2507"
+ ],
+ "data_quality": {
+ "json_parse_fail_count": 0,
+ "json_non_structured_root_count": 0,
+ "invalid_schema_input_count": 0,
+ "unknown_difficulty_count": 0,
+ "malformed_jsonl_line_count": 0
+ },
+ "summary": {
+ "overall": {
+ "n": 5000,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.9828,
+ "ci95_low": 0.9796,
+ "ci95_high": 0.9856,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.8153542208935604,
+ "ci95_low": 0.8082487915851666,
+ "ci95_high": 0.8226297624193832,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.873668158600983,
+ "ci95_low": 0.8661186074137278,
+ "ci95_high": 0.8813897972358502,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.9781475399373976,
+ "ci95_low": 0.9742959184316633,
+ "ci95_high": 0.9820071938455809,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.9754685248082522,
+ "ci95_low": 0.9720806263450787,
+ "ci95_high": 0.9795799271519723,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.9828,
+ "ci95_low": 0.9786,
+ "ci95_high": 0.987,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.494,
+ "ci95_low": 0.4806,
+ "ci95_high": 0.5074,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.889056639810647,
+ "ci95_low": 0.8834098676767453,
+ "ci95_high": 0.8943423591147767,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.9803561749360841,
+ "ci95_low": 0.9762868523587358,
+ "ci95_high": 0.9838871641629852,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.8445111897472718,
+ "ci95_low": 0.8364293474629138,
+ "ci95_high": 0.8519205974223282,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.9885333333333334,
+ "ci95_low": 0.9861333333333334,
+ "ci95_high": 0.9913333333333334,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.494,
+ "ci95_low": 0.4774,
+ "ci95_high": 0.5078,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ }
+ },
+ "overall_weighted": {
+ "n": 5000,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.9815382258311629,
+ "ci95_low": 0.9768348623853211,
+ "ci95_high": 0.985227360160037,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.8113247243081531,
+ "ci95_low": 0.8025039866340027,
+ "ci95_high": 0.8201432904826422,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.8699028328689945,
+ "ci95_low": 0.8620076657357748,
+ "ci95_high": 0.876662801533545,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.9766568429735173,
+ "ci95_low": 0.9723703087424423,
+ "ci95_high": 0.980349363900011,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.973934038341358,
+ "ci95_low": 0.9702255529411328,
+ "ci95_high": 0.9770498163718003,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.9815382258311629,
+ "ci95_low": 0.9779967911987165,
+ "ci95_high": 0.9857841638642617,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.4810785965987437,
+ "ci95_low": 0.4680964738207907,
+ "ci95_high": 0.49573699976956753,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.8859614667168884,
+ "ci95_low": 0.8800167156440121,
+ "ci95_high": 0.8919368238203124,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.9790034966678945,
+ "ci95_low": 0.9747614808414238,
+ "ci95_high": 0.9828112304531248,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.8406137785885739,
+ "ci95_low": 0.8326521332801577,
+ "ci95_high": 0.8489778404060612,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.9876921505541086,
+ "ci95_low": 0.9850788073230973,
+ "ci95_high": 0.9904659936779852,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.4810785965987437,
+ "ci95_low": 0.4670750382848392,
+ "ci95_high": 0.4975403535741737,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ },
+ "weighting": "schema_complexity",
+ "weight_field_priority": [
+ "schema_complexity",
+ "difficulty"
+ ],
+ "difficulty_weights": {
+ "easy": 1.0,
+ "medium": 2.0,
+ "hard": 3.0
+ }
+ }
+ }
+}
diff --git a/data/evaluation/text/Qwen3-30B-A3B-Instruct-2507/eval_records.jsonl b/data/evaluation/text/Qwen3-30B-A3B-Instruct-2507/eval_records.jsonl
new file mode 100644
index 0000000..3428d07
--- /dev/null
+++ b/data/evaluation/text/Qwen3-30B-A3B-Instruct-2507/eval_records.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4192fb5d4664ae7a45e3fc0eadfc8eb79db8f8b4a15f16a761bc6354d9ff8e44
+size 2943975
diff --git a/data/evaluation/text/Qwen3-30B-A3B-Instruct-2507/eval_summary.json b/data/evaluation/text/Qwen3-30B-A3B-Instruct-2507/eval_summary.json
new file mode 100644
index 0000000..9a178d4
--- /dev/null
+++ b/data/evaluation/text/Qwen3-30B-A3B-Instruct-2507/eval_summary.json
@@ -0,0 +1,264 @@
+{
+ "response_file": "data/text_responses/response_Qwen3-30B-A3B-Instruct-2507.jsonl",
+ "num_records": 5000,
+ "model_ids": [
+ "Qwen/Qwen3-30B-A3B-Instruct-2507"
+ ],
+ "data_quality": {
+ "json_parse_fail_count": 1,
+ "json_non_structured_root_count": 1,
+ "invalid_schema_input_count": 0,
+ "unknown_difficulty_count": 0,
+ "malformed_jsonl_line_count": 0
+ },
+ "summary": {
+ "overall": {
+ "n": 5000,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 0.9998,
+ "ci95_low": 0.9994,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 0.9998,
+ "ci95_low": 0.9994,
+ "ci95_high": 1.0,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.9886,
+ "ci95_low": 0.9854,
+ "ci95_high": 0.9912,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.7818832414446952,
+ "ci95_low": 0.7744013240981958,
+ "ci95_high": 0.7898343304571246,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.8529258251801773,
+ "ci95_low": 0.8448245691024322,
+ "ci95_high": 0.8616684840386534,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.9837111729998164,
+ "ci95_low": 0.9806704255874843,
+ "ci95_high": 0.9863332022450808,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.9790853883965547,
+ "ci95_low": 0.976018664889496,
+ "ci95_high": 0.9820716776616895,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.9886,
+ "ci95_low": 0.9856,
+ "ci95_high": 0.9912,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.427,
+ "ci95_low": 0.4114,
+ "ci95_high": 0.4402,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.8728400798748962,
+ "ci95_low": 0.8665946107584159,
+ "ci95_high": 0.8787753142362937,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.9854284627988517,
+ "ci95_low": 0.9823219391764837,
+ "ci95_high": 0.988072375203332,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.8174045333124362,
+ "ci95_low": 0.8095203149199518,
+ "ci95_high": 0.8245297910592037,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.9923333333333334,
+ "ci95_low": 0.9905333333333334,
+ "ci95_high": 0.9941333333333334,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.427,
+ "ci95_low": 0.41,
+ "ci95_high": 0.4404,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ }
+ },
+ "overall_weighted": {
+ "n": 5000,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 0.9997701853837904,
+ "ci95_low": 0.999306625577812,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 0.9997701853837904,
+ "ci95_low": 0.9993081180811808,
+ "ci95_high": 1.0,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.987666615596752,
+ "ci95_low": 0.9849531705819131,
+ "ci95_high": 0.9906549214860206,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.777995261967025,
+ "ci95_low": 0.7697888495229487,
+ "ci95_high": 0.7858888486408432,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.849481552534382,
+ "ci95_low": 0.8418364837842254,
+ "ci95_high": 0.8565274092204764,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.9825148504809642,
+ "ci95_low": 0.9783984051927783,
+ "ci95_high": 0.986035749709599,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.9777384412562642,
+ "ci95_low": 0.9744342996542816,
+ "ci95_high": 0.9807345423115558,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.987666615596752,
+ "ci95_low": 0.9848543092181133,
+ "ci95_high": 0.9910172744721689,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.4144323578979623,
+ "ci95_low": 0.40248009797917944,
+ "ci95_high": 0.4278271529267545,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.8699972216607903,
+ "ci95_low": 0.8638861298326501,
+ "ci95_high": 0.8769571964632036,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.9843572241499228,
+ "ci95_low": 0.9810973388578391,
+ "ci95_high": 0.9876822114037797,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.8137384072507033,
+ "ci95_low": 0.8050570244319913,
+ "ci95_high": 0.8223038423503096,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.991701138859098,
+ "ci95_low": 0.9895263765883712,
+ "ci95_high": 0.9936079363861828,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.4144323578979623,
+ "ci95_low": 0.4009680393362016,
+ "ci95_high": 0.4317692307692308,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ },
+ "weighting": "schema_complexity",
+ "weight_field_priority": [
+ "schema_complexity",
+ "difficulty"
+ ],
+ "difficulty_weights": {
+ "easy": 1.0,
+ "medium": 2.0,
+ "hard": 3.0
+ }
+ }
+ }
+}
diff --git a/data/evaluation/text/Qwen3.5-35B-A3B/eval_records.jsonl b/data/evaluation/text/Qwen3.5-35B-A3B/eval_records.jsonl
new file mode 100644
index 0000000..0b19f64
--- /dev/null
+++ b/data/evaluation/text/Qwen3.5-35B-A3B/eval_records.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:76edf54928817afb8eb5f9f9158436c6f419e481e6b95787562b0d4cf991bc2b
+size 2875090
diff --git a/data/evaluation/text/Qwen3.5-35B-A3B/eval_summary.json b/data/evaluation/text/Qwen3.5-35B-A3B/eval_summary.json
new file mode 100644
index 0000000..cd269e1
--- /dev/null
+++ b/data/evaluation/text/Qwen3.5-35B-A3B/eval_summary.json
@@ -0,0 +1,264 @@
+{
+ "response_file": "data/text_responses/response_Qwen3.5-35B-A3B.jsonl",
+ "num_records": 5000,
+ "model_ids": [
+ "Qwen/Qwen3.5-35B-A3B"
+ ],
+ "data_quality": {
+ "json_parse_fail_count": 3,
+ "json_non_structured_root_count": 3,
+ "invalid_schema_input_count": 0,
+ "unknown_difficulty_count": 0,
+ "malformed_jsonl_line_count": 0
+ },
+ "summary": {
+ "overall": {
+ "n": 5000,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 0.9994,
+ "ci95_low": 0.9986,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 0.9994,
+ "ci95_low": 0.9988,
+ "ci95_high": 0.9998,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.9762,
+ "ci95_low": 0.972,
+ "ci95_high": 0.98,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.8318731591044721,
+ "ci95_low": 0.8243719087139211,
+ "ci95_high": 0.8402211307809837,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.8838380794193091,
+ "ci95_low": 0.8756014694885184,
+ "ci95_high": 0.8914705731788531,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.9723620063176113,
+ "ci95_low": 0.9678914170470052,
+ "ci95_high": 0.9762183024363532,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.970406493807297,
+ "ci95_low": 0.9664750870608722,
+ "ci95_high": 0.9744228625141803,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.9760666666666666,
+ "ci95_low": 0.9716,
+ "ci95_high": 0.9797333333333333,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.5306,
+ "ci95_low": 0.5162,
+ "ci95_high": 0.5448,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.8960244149471309,
+ "ci95_low": 0.8905268757007041,
+ "ci95_high": 0.9016566769229736,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.9742243868246545,
+ "ci95_low": 0.9692887373359678,
+ "ci95_high": 0.9787385317577726,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.8578556192618906,
+ "ci95_low": 0.8495985244671499,
+ "ci95_high": 0.8646628573818894,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.9838888888888889,
+ "ci95_low": 0.9807333333333333,
+ "ci95_high": 0.9868,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.5306,
+ "ci95_low": 0.5142,
+ "ci95_high": 0.5468,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ }
+ },
+ "overall_weighted": {
+ "n": 5000,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 0.999387161023441,
+ "ci95_low": 0.998546511627907,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 0.999387161023441,
+ "ci95_low": 0.9984697781178271,
+ "ci95_high": 1.0,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.9742607629845258,
+ "ci95_low": 0.9696320660904154,
+ "ci95_high": 0.9791252485089463,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.8282370864139122,
+ "ci95_low": 0.8211127544714676,
+ "ci95_high": 0.8352230385617313,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.880547221114934,
+ "ci95_low": 0.8727276573008009,
+ "ci95_high": 0.8868211523943648,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.9703153826896798,
+ "ci95_low": 0.9644358501007393,
+ "ci95_high": 0.9761398725336671,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.9682938163417013,
+ "ci95_low": 0.9640702314924784,
+ "ci95_high": 0.9729771610885852,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.9741586231550993,
+ "ci95_low": 0.9700763358778626,
+ "ci95_high": 0.9786383542112231,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.518538379040907,
+ "ci95_low": 0.5041468284441714,
+ "ci95_high": 0.5337475072863936,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.8930332300728421,
+ "ci95_low": 0.886030555786149,
+ "ci95_high": 0.8985045541624118,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.9722377341604421,
+ "ci95_low": 0.9664645450420271,
+ "ci95_high": 0.976823467082748,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.8543921537644231,
+ "ci95_low": 0.8468045842595211,
+ "ci95_high": 0.8622252490848776,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.9826021823876887,
+ "ci95_low": 0.9790134803921569,
+ "ci95_high": 0.9857433634373289,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.518538379040907,
+ "ci95_low": 0.5028235653235653,
+ "ci95_high": 0.533,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ },
+ "weighting": "schema_complexity",
+ "weight_field_priority": [
+ "schema_complexity",
+ "difficulty"
+ ],
+ "difficulty_weights": {
+ "easy": 1.0,
+ "medium": 2.0,
+ "hard": 3.0
+ }
+ }
+ }
+}
diff --git a/data/evaluation/text/claude-sonnet-4-6/eval_records.jsonl b/data/evaluation/text/claude-sonnet-4-6/eval_records.jsonl
new file mode 100644
index 0000000..44733d1
--- /dev/null
+++ b/data/evaluation/text/claude-sonnet-4-6/eval_records.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8a94d640da959a8472cdfc5348e0ddd0984ecd2a51706f71f2689d678a019426
+size 2865534
diff --git a/data/evaluation/text/claude-sonnet-4-6/eval_summary.json b/data/evaluation/text/claude-sonnet-4-6/eval_summary.json
new file mode 100644
index 0000000..df80119
--- /dev/null
+++ b/data/evaluation/text/claude-sonnet-4-6/eval_summary.json
@@ -0,0 +1,264 @@
+{
+ "response_file": "data/text_responses/response_claude-sonnet-4-6.jsonl",
+ "num_records": 5000,
+ "model_ids": [
+ "claude-sonnet-4-6"
+ ],
+ "data_quality": {
+ "json_parse_fail_count": 2,
+ "json_non_structured_root_count": 2,
+ "invalid_schema_input_count": 0,
+ "unknown_difficulty_count": 0,
+ "malformed_jsonl_line_count": 0
+ },
+ "summary": {
+ "overall": {
+ "n": 5000,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 0.9996,
+ "ci95_low": 0.999,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 0.9996,
+ "ci95_low": 0.999,
+ "ci95_high": 1.0,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.9858,
+ "ci95_low": 0.9822,
+ "ci95_high": 0.9886,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.8132007010210591,
+ "ci95_low": 0.8058686414729062,
+ "ci95_high": 0.8209939331583449,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.8798868300916088,
+ "ci95_low": 0.8731523690428674,
+ "ci95_high": 0.8882022787543666,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.9832333094604941,
+ "ci95_low": 0.9798109442362791,
+ "ci95_high": 0.9861780371957791,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.9785922689969139,
+ "ci95_low": 0.9753156727917359,
+ "ci95_high": 0.982159899348117,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.9855777777777778,
+ "ci95_low": 0.9823555555555554,
+ "ci95_high": 0.9886888888888888,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.472,
+ "ci95_low": 0.4582,
+ "ci95_high": 0.487,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.8921069468577207,
+ "ci95_low": 0.886607002927696,
+ "ci95_high": 0.8968125924066653,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.9833233489248973,
+ "ci95_low": 0.9795230592244987,
+ "ci95_high": 0.9863423474065017,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.8465437655563339,
+ "ci95_low": 0.8397474685316396,
+ "ci95_high": 0.8532064992112753,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.9903259259259259,
+ "ci95_low": 0.9880592592592593,
+ "ci95_high": 0.9925925925925925,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.472,
+ "ci95_low": 0.4602,
+ "ci95_high": 0.4874,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ }
+ },
+ "overall_weighted": {
+ "n": 5000,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 0.9996169756396507,
+ "ci95_low": 0.9989299908284928,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 0.9996169756396507,
+ "ci95_low": 0.9989256388611772,
+ "ci95_high": 1.0,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.9840661866094683,
+ "ci95_low": 0.979996934396076,
+ "ci95_high": 0.9875239188672025,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.8086856179472204,
+ "ci95_low": 0.7988055013480095,
+ "ci95_high": 0.8156836267111631,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.8756200154130082,
+ "ci95_low": 0.8680989255836106,
+ "ci95_high": 0.8840160113777844,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.9813837656899378,
+ "ci95_low": 0.9774048538673318,
+ "ci95_high": 0.9849739949643359,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.9766964516218097,
+ "ci95_low": 0.9718651178788984,
+ "ci95_high": 0.9801152932903437,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.9838959535604241,
+ "ci95_low": 0.9803277017557271,
+ "ci95_high": 0.9880688336520076,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.4588631836984832,
+ "ci95_low": 0.44364106857447294,
+ "ci95_high": 0.47247000922792987,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.8885631330167221,
+ "ci95_low": 0.8820003548260822,
+ "ci95_high": 0.8944215173017921,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.9815528639305675,
+ "ci95_low": 0.977891704744462,
+ "ci95_high": 0.9859287374408039,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.8421528166801142,
+ "ci95_low": 0.8333254010630856,
+ "ci95_high": 0.8502474428433665,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.9891930386031811,
+ "ci95_low": 0.986426365156281,
+ "ci95_high": 0.9917805733468292,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.4588631836984832,
+ "ci95_low": 0.4435830518882407,
+ "ci95_high": 0.47572294239472274,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ },
+ "weighting": "schema_complexity",
+ "weight_field_priority": [
+ "schema_complexity",
+ "difficulty"
+ ],
+ "difficulty_weights": {
+ "easy": 1.0,
+ "medium": 2.0,
+ "hard": 3.0
+ }
+ }
+ }
+}
diff --git a/data/evaluation/text/gemini-2.5-flash/eval_records.jsonl b/data/evaluation/text/gemini-2.5-flash/eval_records.jsonl
new file mode 100644
index 0000000..9bb918b
--- /dev/null
+++ b/data/evaluation/text/gemini-2.5-flash/eval_records.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9dd6310f097e759e0a8398484170c7d757124e78b740d9262073d6b02c47cac7
+size 2854952
diff --git a/data/evaluation/text/gemini-2.5-flash/eval_summary.json b/data/evaluation/text/gemini-2.5-flash/eval_summary.json
new file mode 100644
index 0000000..1858f85
--- /dev/null
+++ b/data/evaluation/text/gemini-2.5-flash/eval_summary.json
@@ -0,0 +1,264 @@
+{
+ "response_file": "data/text_responses/response_gemini-2.5-flash.jsonl",
+ "num_records": 5000,
+ "model_ids": [
+ "gemini-2.5-flash"
+ ],
+ "data_quality": {
+ "json_parse_fail_count": 82,
+ "json_non_structured_root_count": 82,
+ "invalid_schema_input_count": 0,
+ "unknown_difficulty_count": 0,
+ "malformed_jsonl_line_count": 0
+ },
+ "summary": {
+ "overall": {
+ "n": 5000,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 0.9836,
+ "ci95_low": 0.9794,
+ "ci95_high": 0.9868,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 0.9836,
+ "ci95_low": 0.9802,
+ "ci95_high": 0.9868,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.9836,
+ "ci95_low": 0.98,
+ "ci95_high": 0.987,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.8258574581230455,
+ "ci95_low": 0.8173914373584642,
+ "ci95_high": 0.8335148920034214,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.8776325173756863,
+ "ci95_low": 0.8705534831546682,
+ "ci95_high": 0.8860522941794344,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.9805109079703767,
+ "ci95_low": 0.976742606484281,
+ "ci95_high": 0.9842522810743969,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.9753541669686673,
+ "ci95_low": 0.9719374013880271,
+ "ci95_high": 0.9790826845443243,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.9836,
+ "ci95_low": 0.98,
+ "ci95_high": 0.987,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.527,
+ "ci95_low": 0.5122,
+ "ci95_high": 0.5416,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.8946669611563695,
+ "ci95_low": 0.8892938281396573,
+ "ci95_high": 0.899852497461637,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.9808513889895557,
+ "ci95_low": 0.9772510263860681,
+ "ci95_high": 0.983985222882468,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.8517449877493658,
+ "ci95_low": 0.8438109684609908,
+ "ci95_high": 0.8587752008871601,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.9836,
+ "ci95_low": 0.9802,
+ "ci95_high": 0.9872,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.527,
+ "ci95_low": 0.511,
+ "ci95_high": 0.5424,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ }
+ },
+ "overall_weighted": {
+ "n": 5000,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 0.9833001378887697,
+ "ci95_low": 0.9791953495487227,
+ "ci95_high": 0.9870248816974507,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 0.9833001378887697,
+ "ci95_low": 0.978646869738252,
+ "ci95_high": 0.9870977651486061,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.9833001378887697,
+ "ci95_low": 0.9791794243723209,
+ "ci95_high": 0.9868108273905375,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.8216547075367664,
+ "ci95_low": 0.8133449676502447,
+ "ci95_high": 0.8306723915972982,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.8737130235289601,
+ "ci95_low": 0.8667742174566301,
+ "ci95_high": 0.8819136041332714,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.9800702536026599,
+ "ci95_low": 0.9762865504655998,
+ "ci95_high": 0.9844845390980739,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.9746879170539741,
+ "ci95_low": 0.9713851743940858,
+ "ci95_high": 0.9789196735447064,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.9833001378887697,
+ "ci95_low": 0.9787867226249523,
+ "ci95_high": 0.9870666564628453,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.515167764669833,
+ "ci95_low": 0.5012236157846436,
+ "ci95_high": 0.5299486156913874,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.8918126615561287,
+ "ci95_low": 0.8859151450776273,
+ "ci95_high": 0.8984407225442808,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.9804293976105045,
+ "ci95_low": 0.9771859423201696,
+ "ci95_high": 0.9843164002888372,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.8476838655328633,
+ "ci95_low": 0.8399253551570163,
+ "ci95_high": 0.8561281722918568,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.9833001378887697,
+ "ci95_low": 0.9802864623440628,
+ "ci95_high": 0.9867817848410758,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.515167764669833,
+ "ci95_low": 0.5027200980767758,
+ "ci95_high": 0.528762926081961,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ },
+ "weighting": "schema_complexity",
+ "weight_field_priority": [
+ "schema_complexity",
+ "difficulty"
+ ],
+ "difficulty_weights": {
+ "easy": 1.0,
+ "medium": 2.0,
+ "hard": 3.0
+ }
+ }
+ }
+}
diff --git a/data/evaluation/text/gemini-3-flash/eval_records.jsonl b/data/evaluation/text/gemini-3-flash/eval_records.jsonl
new file mode 100644
index 0000000..916ee7a
--- /dev/null
+++ b/data/evaluation/text/gemini-3-flash/eval_records.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:71919837c73f7aa19985ddc3d7bf4d87490c76068e8fdb7f3835cbb83f986d8a
+size 2883625
diff --git a/data/evaluation/text/gemini-3-flash/eval_summary.json b/data/evaluation/text/gemini-3-flash/eval_summary.json
new file mode 100644
index 0000000..3ba0466
--- /dev/null
+++ b/data/evaluation/text/gemini-3-flash/eval_summary.json
@@ -0,0 +1,264 @@
+{
+ "response_file": "data/text_responses/response_gemini-3-flash.jsonl",
+ "num_records": 5000,
+ "model_ids": [
+ "gemini-3-flash-preview"
+ ],
+ "data_quality": {
+ "json_parse_fail_count": 224,
+ "json_non_structured_root_count": 224,
+ "invalid_schema_input_count": 0,
+ "unknown_difficulty_count": 0,
+ "malformed_jsonl_line_count": 0
+ },
+ "summary": {
+ "overall": {
+ "n": 5000,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 0.9552,
+ "ci95_low": 0.9484,
+ "ci95_high": 0.9606,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 0.9552,
+ "ci95_low": 0.9482,
+ "ci95_high": 0.9604,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.9552,
+ "ci95_low": 0.9492,
+ "ci95_high": 0.9604,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.8068084652261635,
+ "ci95_low": 0.7975741480415011,
+ "ci95_high": 0.8157309969101657,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.8576533409378744,
+ "ci95_low": 0.8489289973174109,
+ "ci95_high": 0.8682152974372856,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.9537786703741734,
+ "ci95_low": 0.9474286671670263,
+ "ci95_high": 0.9590456216332687,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.9481770660700518,
+ "ci95_low": 0.9420269400467239,
+ "ci95_high": 0.953373182699083,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.9552,
+ "ci95_low": 0.9472,
+ "ci95_high": 0.9614,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.5138,
+ "ci95_low": 0.499,
+ "ci95_high": 0.53,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.8727468255127371,
+ "ci95_low": 0.8658662968840661,
+ "ci95_high": 0.8785574286142335,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.9528590220233506,
+ "ci95_low": 0.9470068795319112,
+ "ci95_high": 0.9599261180259507,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.832230903082019,
+ "ci95_low": 0.8247024675434023,
+ "ci95_high": 0.8402886419431123,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.9552,
+ "ci95_low": 0.949,
+ "ci95_high": 0.961,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.5138,
+ "ci95_low": 0.4982,
+ "ci95_high": 0.5284,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ }
+ },
+ "overall_weighted": {
+ "n": 5000,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 0.9509728818752873,
+ "ci95_low": 0.9437222816944487,
+ "ci95_high": 0.9564383771761639,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 0.9509728818752873,
+ "ci95_low": 0.9435403774161509,
+ "ci95_high": 0.9579754601226994,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.9509728818752873,
+ "ci95_low": 0.9438580388557443,
+ "ci95_high": 0.9571395652507874,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.8001122435323944,
+ "ci95_low": 0.789594407023626,
+ "ci95_high": 0.8088548241694334,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.850856832482827,
+ "ci95_low": 0.8430810423169467,
+ "ci95_high": 0.8596040822010496,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.9494898732935003,
+ "ci95_low": 0.944023020461934,
+ "ci95_high": 0.9563477345549503,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.9437867390734168,
+ "ci95_low": 0.9372591093538164,
+ "ci95_high": 0.9498529790247171,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.9509728818752873,
+ "ci95_low": 0.9435645086882977,
+ "ci95_high": 0.9566316757336603,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.500612838976559,
+ "ci95_low": 0.4875307881773399,
+ "ci95_high": 0.5143840856924254,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.8668196497695738,
+ "ci95_low": 0.8582730459308714,
+ "ci95_high": 0.874481389831887,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.9485775009413304,
+ "ci95_low": 0.9426688892813444,
+ "ci95_high": 0.9548839783697712,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.8254845380076107,
+ "ci95_low": 0.8172813188278311,
+ "ci95_high": 0.8334900897730663,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.9509728818752873,
+ "ci95_low": 0.9443889699669713,
+ "ci95_high": 0.957814291173099,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.500612838976559,
+ "ci95_low": 0.48604080380426445,
+ "ci95_high": 0.5174373943770164,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ },
+ "weighting": "schema_complexity",
+ "weight_field_priority": [
+ "schema_complexity",
+ "difficulty"
+ ],
+ "difficulty_weights": {
+ "easy": 1.0,
+ "medium": 2.0,
+ "hard": 3.0
+ }
+ }
+ }
+}
diff --git a/data/evaluation/text/gemma-3-27b-it/eval_records.jsonl b/data/evaluation/text/gemma-3-27b-it/eval_records.jsonl
new file mode 100644
index 0000000..ca73ac9
--- /dev/null
+++ b/data/evaluation/text/gemma-3-27b-it/eval_records.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:12757aec220fcade7a34ab20c324f7e8dce698c5e408919e36316799488091f2
+size 2884201
diff --git a/data/evaluation/text/gemma-3-27b-it/eval_summary.json b/data/evaluation/text/gemma-3-27b-it/eval_summary.json
new file mode 100644
index 0000000..25fa139
--- /dev/null
+++ b/data/evaluation/text/gemma-3-27b-it/eval_summary.json
@@ -0,0 +1,264 @@
+{
+ "response_file": "data/text_responses/response_gemma-3-27b-it.jsonl",
+ "num_records": 5000,
+ "model_ids": [
+ "google/gemma-3-27b-it"
+ ],
+ "data_quality": {
+ "json_parse_fail_count": 0,
+ "json_non_structured_root_count": 0,
+ "invalid_schema_input_count": 0,
+ "unknown_difficulty_count": 0,
+ "malformed_jsonl_line_count": 0
+ },
+ "summary": {
+ "overall": {
+ "n": 5000,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.9762,
+ "ci95_low": 0.9716,
+ "ci95_high": 0.9806,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.805649177505501,
+ "ci95_low": 0.7982796317408082,
+ "ci95_high": 0.8139227445756857,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.8634667334758236,
+ "ci95_low": 0.855025651756477,
+ "ci95_high": 0.8716088425096726,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.9718386541850318,
+ "ci95_low": 0.9668994390764551,
+ "ci95_high": 0.975663828147867,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.9682184227837235,
+ "ci95_low": 0.9643077348722243,
+ "ci95_high": 0.9721292366117641,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.9762,
+ "ci95_low": 0.972,
+ "ci95_high": 0.9804,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.4832,
+ "ci95_low": 0.472,
+ "ci95_high": 0.4962,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.8803181883887855,
+ "ci95_low": 0.8739640888932431,
+ "ci95_high": 0.8865568589889404,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.9735394742612412,
+ "ci95_low": 0.9690669507049755,
+ "ci95_high": 0.9777306703496524,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.8345579554906625,
+ "ci95_low": 0.8260177802794385,
+ "ci95_high": 0.8425723317250425,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.9841333333333334,
+ "ci95_low": 0.9808,
+ "ci95_high": 0.9866666666666666,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.4832,
+ "ci95_low": 0.4694,
+ "ci95_high": 0.4976,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ }
+ },
+ "overall_weighted": {
+ "n": 5000,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.9754864409376436,
+ "ci95_low": 0.9711346537693843,
+ "ci95_high": 0.9800689919509391,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.8027859855989503,
+ "ci95_low": 0.7945651306798193,
+ "ci95_high": 0.8118620337239548,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.8610802712653672,
+ "ci95_low": 0.8522185969471855,
+ "ci95_high": 0.8695905931237349,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.9709763076946059,
+ "ci95_low": 0.9662962806230768,
+ "ci95_high": 0.9753074242357025,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.9672824966812846,
+ "ci95_low": 0.9625413980286172,
+ "ci95_high": 0.9715423201705471,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.9754864409376436,
+ "ci95_low": 0.9710477941176471,
+ "ci95_high": 0.9797848806163705,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.4707369388693121,
+ "ci95_low": 0.4564984709480122,
+ "ci95_high": 0.48537966932026944,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.8782808548529745,
+ "ci95_low": 0.8718802256234315,
+ "ci95_high": 0.8845450857101813,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.9727517928521906,
+ "ci95_low": 0.9675689237458427,
+ "ci95_high": 0.976735047855554,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.8319331284321587,
+ "ci95_low": 0.824502505558532,
+ "ci95_high": 0.8397637952660403,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.9836576272917624,
+ "ci95_low": 0.9805800434393764,
+ "ci95_high": 0.9863192683114288,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.4707369388693121,
+ "ci95_low": 0.4574052812858783,
+ "ci95_high": 0.48486482323037816,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ },
+ "weighting": "schema_complexity",
+ "weight_field_priority": [
+ "schema_complexity",
+ "difficulty"
+ ],
+ "difficulty_weights": {
+ "easy": 1.0,
+ "medium": 2.0,
+ "hard": 3.0
+ }
+ }
+ }
+}
diff --git a/data/evaluation/text/gemma-4-31b-it/eval_records.jsonl b/data/evaluation/text/gemma-4-31b-it/eval_records.jsonl
new file mode 100644
index 0000000..44477eb
--- /dev/null
+++ b/data/evaluation/text/gemma-4-31b-it/eval_records.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:04e5edec484c71fd276a76f84f05ef26192692f4db07717ce7554e953e2b8158
+size 2846997
diff --git a/data/evaluation/text/gemma-4-31b-it/eval_summary.json b/data/evaluation/text/gemma-4-31b-it/eval_summary.json
new file mode 100644
index 0000000..31ce26e
--- /dev/null
+++ b/data/evaluation/text/gemma-4-31b-it/eval_summary.json
@@ -0,0 +1,264 @@
+{
+ "response_file": "data/text_responses/response_gemma-4-31b-it.jsonl",
+ "num_records": 5000,
+ "model_ids": [
+ "gemma-4-31b-it"
+ ],
+ "data_quality": {
+ "json_parse_fail_count": 0,
+ "json_non_structured_root_count": 0,
+ "invalid_schema_input_count": 0,
+ "unknown_difficulty_count": 0,
+ "malformed_jsonl_line_count": 0
+ },
+ "summary": {
+ "overall": {
+ "n": 5000,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.948,
+ "ci95_low": 0.9418,
+ "ci95_high": 0.954,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.8029379378883408,
+ "ci95_low": 0.7953025422872098,
+ "ci95_high": 0.8119704200559316,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.8598204930689873,
+ "ci95_low": 0.8504051514945206,
+ "ci95_high": 0.8680318371768293,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.9421769890089993,
+ "ci95_low": 0.9362380201076872,
+ "ci95_high": 0.9482461298346337,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.9421975954496298,
+ "ci95_low": 0.936206402629733,
+ "ci95_high": 0.9471918571158525,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.948,
+ "ci95_low": 0.9416,
+ "ci95_high": 0.9536,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.4898,
+ "ci95_low": 0.475,
+ "ci95_high": 0.5052,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.8683118066554424,
+ "ci95_low": 0.8614695037759451,
+ "ci95_high": 0.8757899394574215,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.9460658651498767,
+ "ci95_low": 0.9404652751996333,
+ "ci95_high": 0.9514361321631122,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.8313792154786641,
+ "ci95_low": 0.8233109577934224,
+ "ci95_high": 0.8397046105832798,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.9653333333333334,
+ "ci95_low": 0.9610666666666666,
+ "ci95_high": 0.9693333333333334,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.4898,
+ "ci95_low": 0.475,
+ "ci95_high": 0.5058,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ }
+ },
+ "overall_weighted": {
+ "n": 5000,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.9448444921096982,
+ "ci95_low": 0.9376683339746056,
+ "ci95_high": 0.9513409961685824,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.798138951373379,
+ "ci95_low": 0.789943595465639,
+ "ci95_high": 0.8061250652448904,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.8552065474357955,
+ "ci95_low": 0.8452390319958857,
+ "ci95_high": 0.8629381987818978,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.9390295403395403,
+ "ci95_low": 0.9315605553840647,
+ "ci95_high": 0.945802205557241,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.938902476929242,
+ "ci95_low": 0.932085777739889,
+ "ci95_high": 0.9460074929992619,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.9448444921096982,
+ "ci95_low": 0.9386709489944177,
+ "ci95_high": 0.9519098021168891,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.4767121188907614,
+ "ci95_low": 0.46261897451642614,
+ "ci95_high": 0.49115928659286595,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.8641250130495717,
+ "ci95_low": 0.8560534817709381,
+ "ci95_high": 0.8715691285799395,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.9428638203828795,
+ "ci95_low": 0.9356519196760142,
+ "ci95_high": 0.9495179789933527,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.8266727494045872,
+ "ci95_low": 0.8177919639708618,
+ "ci95_high": 0.8350363710766631,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.9632296614064655,
+ "ci95_low": 0.9589625654784719,
+ "ci95_high": 0.9672548570264559,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.4767121188907614,
+ "ci95_low": 0.46281499692685923,
+ "ci95_high": 0.4893027551177467,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ },
+ "weighting": "schema_complexity",
+ "weight_field_priority": [
+ "schema_complexity",
+ "difficulty"
+ ],
+ "difficulty_weights": {
+ "easy": 1.0,
+ "medium": 2.0,
+ "hard": 3.0
+ }
+ }
+ }
+}
diff --git a/data/evaluation/text/gpt-oss/eval_records.jsonl b/data/evaluation/text/gpt-oss/eval_records.jsonl
new file mode 100644
index 0000000..8d0bc11
--- /dev/null
+++ b/data/evaluation/text/gpt-oss/eval_records.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3493f3504dec3bc94659a926149453beee4563c21647c1fb64e87f08e52ac8f9
+size 2866602
diff --git a/data/evaluation/text/gpt-oss/eval_summary.json b/data/evaluation/text/gpt-oss/eval_summary.json
new file mode 100644
index 0000000..1159538
--- /dev/null
+++ b/data/evaluation/text/gpt-oss/eval_summary.json
@@ -0,0 +1,264 @@
+{
+ "response_file": "data/text_responses/response_gpt-oss.jsonl",
+ "num_records": 5000,
+ "model_ids": [
+ "openai/gpt-oss-20b"
+ ],
+ "data_quality": {
+ "json_parse_fail_count": 659,
+ "json_non_structured_root_count": 659,
+ "invalid_schema_input_count": 0,
+ "unknown_difficulty_count": 0,
+ "malformed_jsonl_line_count": 0
+ },
+ "summary": {
+ "overall": {
+ "n": 5000,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 0.8682,
+ "ci95_low": 0.8588,
+ "ci95_high": 0.8762,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 0.8682,
+ "ci95_low": 0.86,
+ "ci95_high": 0.8772,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.86,
+ "ci95_low": 0.8498,
+ "ci95_high": 0.8714,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.6972286454803367,
+ "ci95_low": 0.6867727494727495,
+ "ci95_high": 0.7082146797646798,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.7559379306951166,
+ "ci95_low": 0.742761681717413,
+ "ci95_high": 0.7673899017177938,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.8553476282386344,
+ "ci95_low": 0.8445969123197065,
+ "ci95_high": 0.8652081478325595,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.8537481612006137,
+ "ci95_low": 0.8427460791292118,
+ "ci95_high": 0.8637351643044007,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.86,
+ "ci95_low": 0.8504,
+ "ci95_high": 0.8706,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.3878,
+ "ci95_low": 0.3744,
+ "ci95_high": 0.4024,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.769504734804696,
+ "ci95_low": 0.7613321994324591,
+ "ci95_high": 0.7801695979024444,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.857916053733538,
+ "ci95_low": 0.8490208972239074,
+ "ci95_high": 0.8691523724878358,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.7265832880877267,
+ "ci95_low": 0.7169961968427353,
+ "ci95_high": 0.7361154430780252,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.8627333333333334,
+ "ci95_low": 0.8532666666666666,
+ "ci95_high": 0.8714666666666666,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.3878,
+ "ci95_low": 0.3732,
+ "ci95_high": 0.4028,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ }
+ },
+ "overall_weighted": {
+ "n": 5000,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 0.8658648690056687,
+ "ci95_low": 0.8560733384262796,
+ "ci95_high": 0.8755944163215217,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 0.8658648690056687,
+ "ci95_low": 0.8563139410597038,
+ "ci95_high": 0.8764268750478817,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.8575149379500536,
+ "ci95_low": 0.8464384828862165,
+ "ci95_high": 0.8665743305632503,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.6927152614930588,
+ "ci95_low": 0.6813780278916982,
+ "ci95_high": 0.7049088132811601,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.7515410115692235,
+ "ci95_low": 0.7403109867002431,
+ "ci95_high": 0.7604063779981854,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.8526023634794964,
+ "ci95_low": 0.8426536593696119,
+ "ci95_high": 0.8624898701692927,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.8510859483322526,
+ "ci95_low": 0.8406480916063486,
+ "ci95_high": 0.8623074056353366,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.8575149379500536,
+ "ci95_low": 0.8470326637018862,
+ "ci95_high": 0.8684290953545232,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.37559368775854146,
+ "ci95_low": 0.3619571025112587,
+ "ci95_high": 0.3917557486733831,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.7656195455139262,
+ "ci95_low": 0.7557947411694138,
+ "ci95_high": 0.7726194647198673,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.8553719414107867,
+ "ci95_low": 0.8464794257118011,
+ "ci95_high": 0.8669279583929782,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.7221281365311413,
+ "ci95_low": 0.7109876527701949,
+ "ci95_high": 0.7331606673168081,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.8602982483019254,
+ "ci95_low": 0.8507539103499475,
+ "ci95_high": 0.8691983122362869,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.37559368775854146,
+ "ci95_low": 0.3608625831612755,
+ "ci95_high": 0.38937375593324147,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ },
+ "weighting": "schema_complexity",
+ "weight_field_priority": [
+ "schema_complexity",
+ "difficulty"
+ ],
+ "difficulty_weights": {
+ "easy": 1.0,
+ "medium": 2.0,
+ "hard": 3.0
+ }
+ }
+ }
+}
diff --git a/data/evaluation/text/ibm-granite-4.0-h-small/eval_records.jsonl b/data/evaluation/text/ibm-granite-4.0-h-small/eval_records.jsonl
new file mode 100644
index 0000000..7b94418
--- /dev/null
+++ b/data/evaluation/text/ibm-granite-4.0-h-small/eval_records.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4563db625ed2bfda4b364651c5395bc78bbcb66be9ee13014656881530d3f89f
+size 2941471
diff --git a/data/evaluation/text/ibm-granite-4.0-h-small/eval_summary.json b/data/evaluation/text/ibm-granite-4.0-h-small/eval_summary.json
new file mode 100644
index 0000000..744f43f
--- /dev/null
+++ b/data/evaluation/text/ibm-granite-4.0-h-small/eval_summary.json
@@ -0,0 +1,264 @@
+{
+ "response_file": "data/text_responses/response_ibm-granite-4.0-h-small.jsonl",
+ "num_records": 5000,
+ "model_ids": [
+ "ibm-granite/granite-4.0-h-small"
+ ],
+ "data_quality": {
+ "json_parse_fail_count": 1,
+ "json_non_structured_root_count": 1,
+ "invalid_schema_input_count": 0,
+ "unknown_difficulty_count": 0,
+ "malformed_jsonl_line_count": 0
+ },
+ "summary": {
+ "overall": {
+ "n": 5000,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 0.9998,
+ "ci95_low": 0.9994,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 0.9998,
+ "ci95_low": 0.9992,
+ "ci95_high": 1.0,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.9856,
+ "ci95_low": 0.9824,
+ "ci95_high": 0.9888,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.7644308473879062,
+ "ci95_low": 0.7566369332954627,
+ "ci95_high": 0.7724784853708383,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.8320582036687447,
+ "ci95_low": 0.8228215690363613,
+ "ci95_high": 0.8406139048812683,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.9741667077447268,
+ "ci95_low": 0.9702461807714877,
+ "ci95_high": 0.9779791306073817,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.9743385705723382,
+ "ci95_low": 0.9704366891112968,
+ "ci95_high": 0.9780589191520208,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.9856,
+ "ci95_low": 0.9824,
+ "ci95_high": 0.9888,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.4066,
+ "ci95_low": 0.3936,
+ "ci95_high": 0.4218,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.8568852529337926,
+ "ci95_low": 0.850239363566517,
+ "ci95_high": 0.8630831998493068,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.9818461901907795,
+ "ci95_low": 0.978591608615191,
+ "ci95_high": 0.984918282318626,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.7982445255283255,
+ "ci95_low": 0.7902485794012394,
+ "ci95_high": 0.8062329634731407,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.9903333333333334,
+ "ci95_low": 0.9879333333333334,
+ "ci95_high": 0.9924666666666666,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.4066,
+ "ci95_low": 0.3902,
+ "ci95_high": 0.4214,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ }
+ },
+ "overall_weighted": {
+ "n": 5000,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 0.9998467902558603,
+ "ci95_low": 0.999540018399264,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 0.9998467902558603,
+ "ci95_low": 0.9993871140733931,
+ "ci95_high": 1.0,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.9852918645625862,
+ "ci95_low": 0.9812926473970712,
+ "ci95_high": 0.9887882045768699,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.7614464885245084,
+ "ci95_low": 0.7504124614286459,
+ "ci95_high": 0.7698701743324545,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.8291197854742709,
+ "ci95_low": 0.818743041255978,
+ "ci95_high": 0.8354453317915204,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.9735168388297916,
+ "ci95_low": 0.9690699914463984,
+ "ci95_high": 0.9765391529706331,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.973618028306024,
+ "ci95_low": 0.9696509257683736,
+ "ci95_high": 0.9767528709665854,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.9852918645625862,
+ "ci95_low": 0.9822852760736196,
+ "ci95_high": 0.989027852374741,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.39650681783361424,
+ "ci95_low": 0.38465652938030104,
+ "ci95_high": 0.4108331420702318,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.8546943709428569,
+ "ci95_low": 0.8461390588912252,
+ "ci95_high": 0.8617010097400788,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.9814005858103988,
+ "ci95_low": 0.9779890451245419,
+ "ci95_high": 0.9850350988080817,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.7952831369993896,
+ "ci95_low": 0.7868717140450571,
+ "ci95_high": 0.804994250672669,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.9901435064603442,
+ "ci95_low": 0.9881298079372357,
+ "ci95_high": 0.9924932975871313,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.39650681783361424,
+ "ci95_low": 0.3835438193647149,
+ "ci95_high": 0.4092646384344901,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ },
+ "weighting": "schema_complexity",
+ "weight_field_priority": [
+ "schema_complexity",
+ "difficulty"
+ ],
+ "difficulty_weights": {
+ "easy": 1.0,
+ "medium": 2.0,
+ "hard": 3.0
+ }
+ }
+ }
+}
diff --git a/data/evaluation/text/inference-net-Schematron-8B/eval_records.jsonl b/data/evaluation/text/inference-net-Schematron-8B/eval_records.jsonl
new file mode 100644
index 0000000..3560974
--- /dev/null
+++ b/data/evaluation/text/inference-net-Schematron-8B/eval_records.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ef6614f029f975637957b3c74023c5cfd41ca721582104e7c96bb33dbd21e946
+size 2923300
diff --git a/data/evaluation/text/inference-net-Schematron-8B/eval_summary.json b/data/evaluation/text/inference-net-Schematron-8B/eval_summary.json
new file mode 100644
index 0000000..b60f6c7
--- /dev/null
+++ b/data/evaluation/text/inference-net-Schematron-8B/eval_summary.json
@@ -0,0 +1,264 @@
+{
+ "response_file": "data/text_responses/response_inference-net-Schematron-8B.jsonl",
+ "num_records": 5000,
+ "model_ids": [
+ "inference-net/Schematron-8B"
+ ],
+ "data_quality": {
+ "json_parse_fail_count": 4,
+ "json_non_structured_root_count": 4,
+ "invalid_schema_input_count": 0,
+ "unknown_difficulty_count": 0,
+ "malformed_jsonl_line_count": 0
+ },
+ "summary": {
+ "overall": {
+ "n": 5000,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 0.9992,
+ "ci95_low": 0.9982,
+ "ci95_high": 0.9998,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 0.9992,
+ "ci95_low": 0.9984,
+ "ci95_high": 0.9998,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.9908,
+ "ci95_low": 0.988,
+ "ci95_high": 0.9934,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.7587824604059233,
+ "ci95_low": 0.7500721383681678,
+ "ci95_high": 0.7672797061082498,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.8280625069232254,
+ "ci95_low": 0.8201567665897843,
+ "ci95_high": 0.8367827125006104,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.9831044559805727,
+ "ci95_low": 0.9803180940625144,
+ "ci95_high": 0.9865076806356364,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.9774988526649137,
+ "ci95_low": 0.973893706069906,
+ "ci95_high": 0.9804505622844024,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.9906666666666666,
+ "ci95_low": 0.9875333333333334,
+ "ci95_high": 0.9931333333333334,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.395,
+ "ci95_low": 0.3818,
+ "ci95_high": 0.408,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.8566498077699073,
+ "ci95_low": 0.8497687873634677,
+ "ci95_high": 0.8627863401200925,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.9863218397771935,
+ "ci95_low": 0.9836070608464575,
+ "ci95_high": 0.9890272552784856,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.7934224836645745,
+ "ci95_low": 0.7849866458054774,
+ "ci95_high": 0.8014582818114133,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.9935555555555555,
+ "ci95_low": 0.9919555555555555,
+ "ci95_high": 0.9954,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.395,
+ "ci95_low": 0.3776,
+ "ci95_high": 0.4106,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ }
+ },
+ "overall_weighted": {
+ "n": 5000,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 0.9991573464072315,
+ "ci95_low": 0.9982329440688383,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 0.9991573464072315,
+ "ci95_low": 0.9980854648491346,
+ "ci95_high": 0.9998472232831717,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.990424390991267,
+ "ci95_low": 0.9877253548139624,
+ "ci95_high": 0.9931150550795593,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.7544035365850543,
+ "ci95_low": 0.7445887204027056,
+ "ci95_high": 0.7632938852978487,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.823693461726025,
+ "ci95_low": 0.8135380547102325,
+ "ci95_high": 0.832400993989273,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.9823532606332323,
+ "ci95_low": 0.9791418160463096,
+ "ci95_high": 0.9852177503219519,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.9766076125616068,
+ "ci95_low": 0.9723980098643755,
+ "ci95_high": 0.9797785215332612,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.9903222511618405,
+ "ci95_low": 0.9875460668445799,
+ "ci95_high": 0.9930682510664229,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.383713804197947,
+ "ci95_low": 0.37064925086438727,
+ "ci95_high": 0.39791538933169834,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.8534834196481039,
+ "ci95_low": 0.8467910969477469,
+ "ci95_high": 0.8599724848986486,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.9857847515715715,
+ "ci95_low": 0.9826777720482269,
+ "ci95_high": 0.9885237219010118,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.7890484991555397,
+ "ci95_low": 0.7800413403356035,
+ "ci95_high": 0.7977206599542156,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.993301329520113,
+ "ci95_low": 0.9910244268687024,
+ "ci95_high": 0.9951132867608316,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.383713804197947,
+ "ci95_low": 0.36854999234420455,
+ "ci95_high": 0.3978667894413751,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ },
+ "weighting": "schema_complexity",
+ "weight_field_priority": [
+ "schema_complexity",
+ "difficulty"
+ ],
+ "difficulty_weights": {
+ "easy": 1.0,
+ "medium": 2.0,
+ "hard": 3.0
+ }
+ }
+ }
+}
diff --git a/data/evaluation/text/interfaze-beta/eval_records.jsonl b/data/evaluation/text/interfaze-beta/eval_records.jsonl
new file mode 100644
index 0000000..f0f73cf
--- /dev/null
+++ b/data/evaluation/text/interfaze-beta/eval_records.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fc28d2249b9bb5e1e51cd73a924195777a152eadb75b48d7d5e969cacba1287f
+size 2847407
diff --git a/data/evaluation/text/interfaze-beta/eval_summary.json b/data/evaluation/text/interfaze-beta/eval_summary.json
new file mode 100644
index 0000000..eef67a2
--- /dev/null
+++ b/data/evaluation/text/interfaze-beta/eval_summary.json
@@ -0,0 +1,264 @@
+{
+ "response_file": "data/text_responses/response_interfaze-beta_false.jsonl",
+ "num_records": 5000,
+ "model_ids": [
+ "interfaze-beta"
+ ],
+ "data_quality": {
+ "json_parse_fail_count": 0,
+ "json_non_structured_root_count": 0,
+ "invalid_schema_input_count": 0,
+ "unknown_difficulty_count": 0,
+ "malformed_jsonl_line_count": 0
+ },
+ "summary": {
+ "overall": {
+ "n": 5000,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.978,
+ "ci95_low": 0.9736,
+ "ci95_high": 0.9818,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.8260597218874962,
+ "ci95_low": 0.8185544919892555,
+ "ci95_high": 0.8332752214778686,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.8807928336271681,
+ "ci95_low": 0.872954611159785,
+ "ci95_high": 0.8879772297154225,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.9760101850094876,
+ "ci95_low": 0.9721352971622517,
+ "ci95_high": 0.979985332229967,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.9709326016768515,
+ "ci95_low": 0.9666094284319579,
+ "ci95_high": 0.974968898915265,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.9779333333333334,
+ "ci95_low": 0.9739333333333334,
+ "ci95_high": 0.982,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.5094,
+ "ci95_low": 0.4964,
+ "ci95_high": 0.5228,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.8942875801747173,
+ "ci95_low": 0.8883438345605182,
+ "ci95_high": 0.8995746115635035,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.9756219783367284,
+ "ci95_low": 0.9712443988611272,
+ "ci95_high": 0.9795444065638088,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.8534262777573322,
+ "ci95_low": 0.8455336320455219,
+ "ci95_high": 0.8613775688787059,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.9853111111111111,
+ "ci95_low": 0.9825333333333334,
+ "ci95_high": 0.9878222222222223,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.5094,
+ "ci95_low": 0.4956,
+ "ci95_high": 0.524,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ }
+ },
+ "overall_weighted": {
+ "n": 5000,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.9753332311935039,
+ "ci95_low": 0.9709657701711492,
+ "ci95_high": 0.9796294872780383,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.8214554680245045,
+ "ci95_low": 0.8135472656093765,
+ "ci95_high": 0.829723601364549,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.8764661058549273,
+ "ci95_low": 0.8689968438364172,
+ "ci95_high": 0.8846006486107064,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.9732792013176983,
+ "ci95_low": 0.9683916195050154,
+ "ci95_high": 0.9777311071954731,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.9680789297618998,
+ "ci95_low": 0.9631913653389397,
+ "ci95_high": 0.9726548812248902,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.9752821612787907,
+ "ci95_low": 0.9704641350210971,
+ "ci95_high": 0.9798481606032814,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.4968592002451356,
+ "ci95_low": 0.48337028824833705,
+ "ci95_high": 0.5105779549287138,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.8904002583990435,
+ "ci95_low": 0.8840433753240811,
+ "ci95_high": 0.8962262801898538,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.9728981074113981,
+ "ci95_low": 0.9682368987994624,
+ "ci95_high": 0.9771954960907373,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.848960786939716,
+ "ci95_low": 0.8407361264625063,
+ "ci95_high": 0.8566700029918322,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.9835384641574315,
+ "ci95_low": 0.980579555011851,
+ "ci95_high": 0.9866270937809163,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.4968592002451356,
+ "ci95_low": 0.4823538437931564,
+ "ci95_high": 0.5101618222256308,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ },
+ "weighting": "schema_complexity",
+ "weight_field_priority": [
+ "schema_complexity",
+ "difficulty"
+ ],
+ "difficulty_weights": {
+ "easy": 1.0,
+ "medium": 2.0,
+ "hard": 3.0
+ }
+ }
+ }
+}
diff --git a/data/evaluation/text/openai-gpt-4.1/eval_records.jsonl b/data/evaluation/text/openai-gpt-4.1/eval_records.jsonl
new file mode 100644
index 0000000..95a15e0
--- /dev/null
+++ b/data/evaluation/text/openai-gpt-4.1/eval_records.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d0c327808521b941c0d7432f2daa3f7b96b38dff56377762b407c718b132c5a2
+size 2824664
diff --git a/data/evaluation/text/openai-gpt-4.1/eval_summary.json b/data/evaluation/text/openai-gpt-4.1/eval_summary.json
new file mode 100644
index 0000000..3bf3902
--- /dev/null
+++ b/data/evaluation/text/openai-gpt-4.1/eval_summary.json
@@ -0,0 +1,264 @@
+{
+ "response_file": "data/text_responses/response_openai-gpt-4.1.jsonl",
+ "num_records": 5000,
+ "model_ids": [
+ "gpt-4.1"
+ ],
+ "data_quality": {
+ "json_parse_fail_count": 2,
+ "json_non_structured_root_count": 2,
+ "invalid_schema_input_count": 0,
+ "unknown_difficulty_count": 0,
+ "malformed_jsonl_line_count": 0
+ },
+ "summary": {
+ "overall": {
+ "n": 5000,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 0.9996,
+ "ci95_low": 0.999,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 0.9996,
+ "ci95_low": 0.9988,
+ "ci95_high": 1.0,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.9766,
+ "ci95_low": 0.9724,
+ "ci95_high": 0.9804,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.8145314366617066,
+ "ci95_low": 0.8063063888072712,
+ "ci95_high": 0.8227203436535945,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.8752961736688194,
+ "ci95_low": 0.8678540020170187,
+ "ci95_high": 0.8823392420054994,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.9730869796612611,
+ "ci95_low": 0.9686249075597605,
+ "ci95_high": 0.9774067929645729,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.9703122682686866,
+ "ci95_low": 0.9660207008404795,
+ "ci95_high": 0.9741026164906443,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.9766,
+ "ci95_low": 0.972,
+ "ci95_high": 0.9804,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.481,
+ "ci95_low": 0.467,
+ "ci95_high": 0.4942,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.887638196663929,
+ "ci95_low": 0.880736519320747,
+ "ci95_high": 0.8937371582838929,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.9745040894228956,
+ "ci95_low": 0.9699217170325665,
+ "ci95_high": 0.9790980632431536,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.8449138051652629,
+ "ci95_low": 0.8368293900069932,
+ "ci95_high": 0.8512532887703994,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.9842666666666666,
+ "ci95_low": 0.9812666666666666,
+ "ci95_high": 0.9872,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.481,
+ "ci95_low": 0.4652,
+ "ci95_high": 0.496,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ }
+ },
+ "overall_weighted": {
+ "n": 5000,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 0.9995403707675808,
+ "ci95_low": 0.9988475722188076,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 0.9995403707675808,
+ "ci95_low": 0.9988511029411765,
+ "ci95_high": 1.0,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.9744139727286656,
+ "ci95_low": 0.9690265486725663,
+ "ci95_high": 0.9795355254081398,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.8105212558023679,
+ "ci95_low": 0.8020809147940402,
+ "ci95_high": 0.8181828127375929,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.871738328861089,
+ "ci95_low": 0.8630642760604325,
+ "ci95_high": 0.879211924202928,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.970755102842143,
+ "ci95_low": 0.9654731827235593,
+ "ci95_high": 0.9754569868058507,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.9679872585528496,
+ "ci95_low": 0.9634597118838487,
+ "ci95_high": 0.971888177763426,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.9744139727286656,
+ "ci95_low": 0.9693642506142506,
+ "ci95_high": 0.9799892662730967,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.46974107553240385,
+ "ci95_low": 0.45578963555282176,
+ "ci95_high": 0.48424531201967413,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.8843382291685333,
+ "ci95_low": 0.8785516379384064,
+ "ci95_high": 0.8911929374947002,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.9722717346700602,
+ "ci95_low": 0.96711033139344,
+ "ci95_high": 0.9764030514592313,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.8411297923317285,
+ "ci95_low": 0.8314490631383327,
+ "ci95_high": 0.8488600657754581,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.9827894387416374,
+ "ci95_low": 0.9791443030210091,
+ "ci95_high": 0.986294364324435,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.46974107553240385,
+ "ci95_low": 0.4547055670418868,
+ "ci95_high": 0.48427095292766936,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ },
+ "weighting": "schema_complexity",
+ "weight_field_priority": [
+ "schema_complexity",
+ "difficulty"
+ ],
+ "difficulty_weights": {
+ "easy": 1.0,
+ "medium": 2.0,
+ "hard": 3.0
+ }
+ }
+ }
+}
diff --git a/data/evaluation/text/openai-gpt-5-4/eval_records.jsonl b/data/evaluation/text/openai-gpt-5-4/eval_records.jsonl
new file mode 100644
index 0000000..18e3ec3
--- /dev/null
+++ b/data/evaluation/text/openai-gpt-5-4/eval_records.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c0c8557466ce108a1bb687bf370ca29134ea0d2aa99bbafce158fac27377a308
+size 2814666
diff --git a/data/evaluation/text/openai-gpt-5-4/eval_summary.json b/data/evaluation/text/openai-gpt-5-4/eval_summary.json
new file mode 100644
index 0000000..3029b19
--- /dev/null
+++ b/data/evaluation/text/openai-gpt-5-4/eval_summary.json
@@ -0,0 +1,264 @@
+{
+ "response_file": "data/text_responses/response_openai-gpt-5-4.jsonl",
+ "num_records": 5000,
+ "model_ids": [
+ "gpt-5.4"
+ ],
+ "data_quality": {
+ "json_parse_fail_count": 0,
+ "json_non_structured_root_count": 0,
+ "invalid_schema_input_count": 0,
+ "unknown_difficulty_count": 0,
+ "malformed_jsonl_line_count": 0
+ },
+ "summary": {
+ "overall": {
+ "n": 5000,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.9996,
+ "ci95_low": 0.999,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.8299521979759483,
+ "ci95_low": 0.823109853137059,
+ "ci95_high": 0.837446674446125,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.8917536013714636,
+ "ci95_low": 0.8839590836732375,
+ "ci95_high": 0.8992931714106264,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.9958425437319153,
+ "ci95_low": 0.9947872249104248,
+ "ci95_high": 0.9968929199243503,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.9920355019644512,
+ "ci95_low": 0.991048057249492,
+ "ci95_high": 0.9930642127482333,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.9996,
+ "ci95_low": 0.9988,
+ "ci95_high": 1.0,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.4998,
+ "ci95_low": 0.4846,
+ "ci95_high": 0.513,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.9058494476931092,
+ "ci95_low": 0.9013892064651156,
+ "ci95_high": 0.9110274739225961,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.9970785006548171,
+ "ci95_low": 0.9964755000644856,
+ "ci95_high": 0.9977209186212945,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.860852899673706,
+ "ci95_low": 0.8533564114477545,
+ "ci95_high": 0.8682632615368686,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.9997333333333334,
+ "ci95_low": 0.9993333333333334,
+ "ci95_high": 1.0,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.4998,
+ "ci95_low": 0.4846,
+ "ci95_high": 0.5122,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ }
+ },
+ "overall_weighted": {
+ "n": 5000,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.9996935805117205,
+ "ci95_low": 0.9992318918503725,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.8254885165434618,
+ "ci95_low": 0.8178665831528139,
+ "ci95_high": 0.8322032851365856,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.8874496664233668,
+ "ci95_low": 0.8796333171741821,
+ "ci95_high": 0.8946728354165774,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.995640691698649,
+ "ci95_low": 0.9944074450221091,
+ "ci95_high": 0.9968476425332284,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.9915926652811123,
+ "ci95_low": 0.9907220297091723,
+ "ci95_high": 0.9925287925634305,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.9996935805117205,
+ "ci95_low": 0.9992336577515518,
+ "ci95_high": 1.0,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.4862877278994944,
+ "ci95_low": 0.4725299915947123,
+ "ci95_high": 0.5011874664828009,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.9028596248884926,
+ "ci95_low": 0.8985056925347905,
+ "ci95_high": 0.9075159920671536,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.9969932754348512,
+ "ci95_low": 0.9964767355036892,
+ "ci95_high": 0.9975642638137019,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.8564690914834143,
+ "ci95_low": 0.8488426389662566,
+ "ci95_high": 0.8640352639656631,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.999795720341147,
+ "ci95_low": 0.9995903737839221,
+ "ci95_high": 1.0,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.4862877278994944,
+ "ci95_low": 0.47006515906477575,
+ "ci95_high": 0.5001921155767309,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ },
+ "weighting": "schema_complexity",
+ "weight_field_priority": [
+ "schema_complexity",
+ "difficulty"
+ ],
+ "difficulty_weights": {
+ "easy": 1.0,
+ "medium": 2.0,
+ "hard": 3.0
+ }
+ }
+ }
+}
diff --git a/data/evaluation/text/openai-gpt-5-mini/eval_records.jsonl b/data/evaluation/text/openai-gpt-5-mini/eval_records.jsonl
new file mode 100644
index 0000000..df7e00e
--- /dev/null
+++ b/data/evaluation/text/openai-gpt-5-mini/eval_records.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5d09f4a8d1e29366f2a67cded5bf7d4c9c68cf8532dcddb9becf12f5ee497ebf
+size 2844691
diff --git a/data/evaluation/text/openai-gpt-5-mini/eval_summary.json b/data/evaluation/text/openai-gpt-5-mini/eval_summary.json
new file mode 100644
index 0000000..4ecde39
--- /dev/null
+++ b/data/evaluation/text/openai-gpt-5-mini/eval_summary.json
@@ -0,0 +1,264 @@
+{
+ "response_file": "data/text_responses/response_openai-gpt-5-mini.jsonl",
+ "num_records": 5000,
+ "model_ids": [
+ "gpt-5-mini"
+ ],
+ "data_quality": {
+ "json_parse_fail_count": 1,
+ "json_non_structured_root_count": 1,
+ "invalid_schema_input_count": 0,
+ "unknown_difficulty_count": 0,
+ "malformed_jsonl_line_count": 0
+ },
+ "summary": {
+ "overall": {
+ "n": 5000,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 0.9998,
+ "ci95_low": 0.9994,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 0.9998,
+ "ci95_low": 0.9992,
+ "ci95_high": 1.0,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.9792,
+ "ci95_low": 0.9756,
+ "ci95_high": 0.9828,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.7830350577179285,
+ "ci95_low": 0.7747013185180832,
+ "ci95_high": 0.7913411532095356,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.8609229560275445,
+ "ci95_low": 0.8526809870715634,
+ "ci95_high": 0.8690019854304885,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.9755862693348796,
+ "ci95_low": 0.9714703280290234,
+ "ci95_high": 0.9796461959124301,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.9712224671467522,
+ "ci95_low": 0.9668948902904431,
+ "ci95_high": 0.97479218020205,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.9791333333333334,
+ "ci95_low": 0.9754,
+ "ci95_high": 0.9831333333333334,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.4126,
+ "ci95_low": 0.3994,
+ "ci95_high": 0.4258,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.8731814276934509,
+ "ci95_low": 0.867949102418724,
+ "ci95_high": 0.8799012811649117,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.9765186001600284,
+ "ci95_low": 0.9728139825013552,
+ "ci95_high": 0.9802045858540532,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.8219790068727365,
+ "ci95_low": 0.8145825789814541,
+ "ci95_high": 0.8298735085611033,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.9860444444444445,
+ "ci95_low": 0.9831777777777777,
+ "ci95_high": 0.9884222222222223,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.4126,
+ "ci95_low": 0.3978,
+ "ci95_high": 0.426,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ }
+ },
+ "overall_weighted": {
+ "n": 5000,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 0.9997701853837904,
+ "ci95_low": 0.9993093392678997,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 0.9997701853837904,
+ "ci95_low": 0.9993086495621447,
+ "ci95_high": 1.0,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.9772483529952505,
+ "ci95_low": 0.9726918075422627,
+ "ci95_high": 0.9818390804597701,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.7791805311116817,
+ "ci95_low": 0.7701998946916461,
+ "ci95_high": 0.7879108433613091,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.8570615365133253,
+ "ci95_low": 0.8483362261228251,
+ "ci95_high": 0.8636370797360349,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.9733727795838378,
+ "ci95_low": 0.9689384804590839,
+ "ci95_high": 0.9779345944486536,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.9690694898551958,
+ "ci95_low": 0.9645753264365913,
+ "ci95_high": 0.9735374437712937,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.9771972830805373,
+ "ci95_low": 0.9733854885718668,
+ "ci95_high": 0.9814234385750324,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.40148613451815535,
+ "ci95_low": 0.3881407804131599,
+ "ci95_high": 0.41657085920134895,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.8698716157362816,
+ "ci95_low": 0.8639329375022281,
+ "ci95_high": 0.875084446804089,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.9745050419769945,
+ "ci95_low": 0.9703852700824315,
+ "ci95_high": 0.9789127007702095,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.8181210338125035,
+ "ci95_low": 0.8077725201612157,
+ "ci95_high": 0.8263991777984909,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.9847386071531927,
+ "ci95_low": 0.9816785781616879,
+ "ci95_high": 0.9874497225192199,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.40148613451815535,
+ "ci95_low": 0.3876767055931162,
+ "ci95_high": 0.4142550911039657,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ },
+ "weighting": "schema_complexity",
+ "weight_field_priority": [
+ "schema_complexity",
+ "difficulty"
+ ],
+ "difficulty_weights": {
+ "easy": 1.0,
+ "medium": 2.0,
+ "hard": 3.0
+ }
+ }
+ }
+}
diff --git a/data/evaluation/text/openai-gpt-5/eval_records.jsonl b/data/evaluation/text/openai-gpt-5/eval_records.jsonl
new file mode 100644
index 0000000..65a8e1f
--- /dev/null
+++ b/data/evaluation/text/openai-gpt-5/eval_records.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:00067a64860637cbe6ccafcbc2e665f709de771ebacda988daf9850f591780a1
+size 2818732
diff --git a/data/evaluation/text/openai-gpt-5/eval_summary.json b/data/evaluation/text/openai-gpt-5/eval_summary.json
new file mode 100644
index 0000000..9310026
--- /dev/null
+++ b/data/evaluation/text/openai-gpt-5/eval_summary.json
@@ -0,0 +1,264 @@
+{
+ "response_file": "data/text_responses/response_openai-gpt-5.jsonl",
+ "num_records": 5000,
+ "model_ids": [
+ "gpt-5"
+ ],
+ "data_quality": {
+ "json_parse_fail_count": 1,
+ "json_non_structured_root_count": 1,
+ "invalid_schema_input_count": 0,
+ "unknown_difficulty_count": 0,
+ "malformed_jsonl_line_count": 0
+ },
+ "summary": {
+ "overall": {
+ "n": 5000,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 0.9998,
+ "ci95_low": 0.9994,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 0.9998,
+ "ci95_low": 0.9992,
+ "ci95_high": 1.0,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.9882,
+ "ci95_low": 0.9852,
+ "ci95_high": 0.991,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.7990647920650834,
+ "ci95_low": 0.7915018271028028,
+ "ci95_high": 0.8067828098951438,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.8810696782706454,
+ "ci95_low": 0.8731594960798275,
+ "ci95_high": 0.8889237517931662,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.984299828057506,
+ "ci95_low": 0.9813841812871223,
+ "ci95_high": 0.9873000039829453,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.981272738843963,
+ "ci95_low": 0.9779543344044735,
+ "ci95_high": 0.9838506795785009,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.9882,
+ "ci95_low": 0.9848,
+ "ci95_high": 0.9912,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.425,
+ "ci95_low": 0.4122,
+ "ci95_high": 0.4398,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.8881447661310784,
+ "ci95_low": 0.8828044883795321,
+ "ci95_high": 0.8932863804810806,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.9858909129479876,
+ "ci95_low": 0.9822088194331066,
+ "ci95_high": 0.988632852900441,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.8400672351678644,
+ "ci95_low": 0.8327795164591278,
+ "ci95_high": 0.847185925069126,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.9920666666666667,
+ "ci95_low": 0.9897333333333334,
+ "ci95_high": 0.9942666666666666,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.425,
+ "ci95_low": 0.4096,
+ "ci95_high": 0.4406,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ }
+ },
+ "overall_weighted": {
+ "n": 5000,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 0.9997701853837904,
+ "ci95_low": 0.9993093392678997,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 0.9997701853837904,
+ "ci95_low": 0.9993086495621447,
+ "ci95_high": 1.0,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.9875900107246821,
+ "ci95_low": 0.9842416788377277,
+ "ci95_high": 0.9907591263173973,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.7954125928976968,
+ "ci95_low": 0.7882890795383692,
+ "ci95_high": 0.8029216020599295,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.8777185046987411,
+ "ci95_low": 0.8712755547444532,
+ "ci95_high": 0.8842435980320612,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.9834356263431774,
+ "ci95_low": 0.980192432474489,
+ "ci95_high": 0.9870715076229416,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.9804214966643283,
+ "ci95_low": 0.9766613280849844,
+ "ci95_high": 0.9842470826634804,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.9875900107246821,
+ "ci95_low": 0.984259828009828,
+ "ci95_high": 0.9908531898539585,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.4118277922475869,
+ "ci95_low": 0.3990635554190973,
+ "ci95_high": 0.4282857802045682,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.8855222413132051,
+ "ci95_low": 0.8799592864900818,
+ "ci95_high": 0.8902067223352581,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.9852005060378975,
+ "ci95_low": 0.9821021780087494,
+ "ci95_high": 0.9880541770606603,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.8365655487982191,
+ "ci95_low": 0.8288723873181977,
+ "ci95_high": 0.8441730875094452,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.9916500689443849,
+ "ci95_low": 0.9896391568417291,
+ "ci95_high": 0.9933690385105841,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.4118277922475869,
+ "ci95_low": 0.3990189315551468,
+ "ci95_high": 0.4257941063911213,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ },
+ "weighting": "schema_complexity",
+ "weight_field_priority": [
+ "schema_complexity",
+ "difficulty"
+ ],
+ "difficulty_weights": {
+ "easy": 1.0,
+ "medium": 2.0,
+ "hard": 3.0
+ }
+ }
+ }
+}
diff --git a/data/evaluation/text/phi-4/eval_records.jsonl b/data/evaluation/text/phi-4/eval_records.jsonl
new file mode 100644
index 0000000..6c60f43
--- /dev/null
+++ b/data/evaluation/text/phi-4/eval_records.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8072945458b71f5f98cd8d0f5f86ccb41249811c80d4c3c1c3402c6f3f7ef6b9
+size 2854857
diff --git a/data/evaluation/text/phi-4/eval_summary.json b/data/evaluation/text/phi-4/eval_summary.json
new file mode 100644
index 0000000..7459873
--- /dev/null
+++ b/data/evaluation/text/phi-4/eval_summary.json
@@ -0,0 +1,264 @@
+{
+ "response_file": "data/text_responses/response_phi-4.jsonl",
+ "num_records": 5000,
+ "model_ids": [
+ "microsoft/phi-4"
+ ],
+ "data_quality": {
+ "json_parse_fail_count": 2,
+ "json_non_structured_root_count": 2,
+ "invalid_schema_input_count": 0,
+ "unknown_difficulty_count": 0,
+ "malformed_jsonl_line_count": 0
+ },
+ "summary": {
+ "overall": {
+ "n": 5000,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 0.9996,
+ "ci95_low": 0.999,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 0.9996,
+ "ci95_low": 0.999,
+ "ci95_high": 1.0,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.975,
+ "ci95_low": 0.9714,
+ "ci95_high": 0.979,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.8016818961090061,
+ "ci95_low": 0.7936938693985753,
+ "ci95_high": 0.8106050603971192,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.8587462403736643,
+ "ci95_low": 0.8494489309056756,
+ "ci95_high": 0.8668029312589118,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.967698787833226,
+ "ci95_low": 0.9634118742136776,
+ "ci95_high": 0.9720739374977753,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.9675088551676261,
+ "ci95_low": 0.9635511269971505,
+ "ci95_high": 0.9719438008122468,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.975,
+ "ci95_low": 0.9706,
+ "ci95_high": 0.9794,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.4804,
+ "ci95_low": 0.4668,
+ "ci95_high": 0.4942,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.8760423081052988,
+ "ci95_low": 0.8697072239903421,
+ "ci95_high": 0.8824652440127898,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.9725029517225421,
+ "ci95_low": 0.9675596338199559,
+ "ci95_high": 0.9771666910992007,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.8302140682413351,
+ "ci95_low": 0.8223448624248518,
+ "ci95_high": 0.8371278645336042,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.9832,
+ "ci95_low": 0.9791333333333334,
+ "ci95_high": 0.9860666666666666,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.4804,
+ "ci95_low": 0.464,
+ "ci95_high": 0.4962,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ }
+ },
+ "overall_weighted": {
+ "n": 5000,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 0.9995403707675808,
+ "ci95_low": 0.9986276303751144,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 0.9995403707675808,
+ "ci95_low": 0.9988488987798327,
+ "ci95_high": 1.0,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.9738011337521066,
+ "ci95_low": 0.9690184049079754,
+ "ci95_high": 0.9777624415305575,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.7977389519887786,
+ "ci95_low": 0.7904985360678354,
+ "ci95_high": 0.8059278448140381,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.8551512116448519,
+ "ci95_low": 0.8473192072397855,
+ "ci95_high": 0.862746942471794,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.9663680464271527,
+ "ci95_low": 0.9623364695397824,
+ "ci95_high": 0.9711466634341238,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.9661159495226056,
+ "ci95_low": 0.9620304922652142,
+ "ci95_high": 0.9706159198717029,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.9738011337521066,
+ "ci95_low": 0.9692331241389867,
+ "ci95_high": 0.9784023895228613,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.4679025586027271,
+ "ci95_low": 0.4532230898790384,
+ "ci95_high": 0.4802636621445543,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.8730860700202611,
+ "ci95_low": 0.866598474279447,
+ "ci95_high": 0.8802378771278462,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.9712394056756063,
+ "ci95_low": 0.9663854881705986,
+ "ci95_high": 0.9757642972396622,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.8264450818168153,
+ "ci95_low": 0.8168962612250262,
+ "ci95_high": 0.8352546553383846,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.9823808794239314,
+ "ci95_low": 0.9790521278761962,
+ "ci95_high": 0.9857387920219167,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.4679025586027271,
+ "ci95_low": 0.45424611379125507,
+ "ci95_high": 0.48372735646300274,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ },
+ "weighting": "schema_complexity",
+ "weight_field_priority": [
+ "schema_complexity",
+ "difficulty"
+ ],
+ "difficulty_weights": {
+ "easy": 1.0,
+ "medium": 2.0,
+ "hard": 3.0
+ }
+ }
+ }
+}
diff --git a/data/evaluation/text/zai-org-GLM-4.7/eval_records.jsonl b/data/evaluation/text/zai-org-GLM-4.7/eval_records.jsonl
new file mode 100644
index 0000000..27eb7bb
--- /dev/null
+++ b/data/evaluation/text/zai-org-GLM-4.7/eval_records.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:036193d456370b18db5f066552a257d1c97ec35cacaf8cf187baf5b3b9505226
+size 2849292
diff --git a/data/evaluation/text/zai-org-GLM-4.7/eval_summary.json b/data/evaluation/text/zai-org-GLM-4.7/eval_summary.json
new file mode 100644
index 0000000..b82cf9e
--- /dev/null
+++ b/data/evaluation/text/zai-org-GLM-4.7/eval_summary.json
@@ -0,0 +1,264 @@
+{
+ "response_file": "data/text_responses/response_zai-org-GLM-4.7.jsonl",
+ "num_records": 5000,
+ "model_ids": [
+ "zai-org/GLM-4.7"
+ ],
+ "data_quality": {
+ "json_parse_fail_count": 0,
+ "json_non_structured_root_count": 0,
+ "invalid_schema_input_count": 0,
+ "unknown_difficulty_count": 0,
+ "malformed_jsonl_line_count": 0
+ },
+ "summary": {
+ "overall": {
+ "n": 5000,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.9738,
+ "ci95_low": 0.9694,
+ "ci95_high": 0.9776,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.8341244624407987,
+ "ci95_low": 0.8257830243037725,
+ "ci95_high": 0.841789014023771,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.8878451405740959,
+ "ci95_low": 0.8799809052293466,
+ "ci95_high": 0.8954780741974613,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.9690919696903738,
+ "ci95_low": 0.9640015228725246,
+ "ci95_high": 0.9733609751293792,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.9684208300658109,
+ "ci95_low": 0.9635283409529447,
+ "ci95_high": 0.9728209246123531,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.9738,
+ "ci95_low": 0.9692,
+ "ci95_high": 0.9786,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.539,
+ "ci95_low": 0.5254,
+ "ci95_high": 0.5538,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.8970205242350894,
+ "ci95_low": 0.8908077661512696,
+ "ci95_high": 0.9028299124264795,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.9720069433552703,
+ "ci95_low": 0.9679456983646507,
+ "ci95_high": 0.9769606733639168,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.8609848015074473,
+ "ci95_low": 0.8537603004370072,
+ "ci95_high": 0.8682865877463981,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.9825333333333334,
+ "ci95_low": 0.9794666666666666,
+ "ci95_high": 0.9852,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.539,
+ "ci95_low": 0.5234,
+ "ci95_high": 0.5558,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ }
+ },
+ "overall_weighted": {
+ "n": 5000,
+ "metrics": {
+ "json_parse_success": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "JSON Parse Success"
+ },
+ "json_root_structured": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Structured JSON Root"
+ },
+ "schema_valid_input": {
+ "mean": 1.0,
+ "ci95_low": 1.0,
+ "ci95_high": 1.0,
+ "metric_name": "Schema Valid Input"
+ },
+ "schema_compliance": {
+ "mean": 0.9721924314386395,
+ "ci95_low": 0.9664599127038824,
+ "ci95_high": 0.9765642950141686,
+ "metric_name": "JSON Pass Rate"
+ },
+ "leaf_value_em": {
+ "mean": 0.8298141736046313,
+ "ci95_low": 0.8213072480208289,
+ "ci95_high": 0.838255447291089,
+ "metric_name": "Truth Score"
+ },
+ "value_token_f1": {
+ "mean": 0.8840342407303813,
+ "ci95_low": 0.8770093460029859,
+ "ci95_high": 0.8905913638079861,
+ "metric_name": "Faithfulness Score"
+ },
+ "hier_path_recall": {
+ "mean": 0.9673022441197761,
+ "ci95_low": 0.9627925863946535,
+ "ci95_high": 0.9715869323426665,
+ "metric_name": "Path Recall"
+ },
+ "path_set_f1": {
+ "mean": 0.9666393138813097,
+ "ci95_low": 0.9616012824172505,
+ "ci95_high": 0.9717058877349548,
+ "metric_name": "Structure Coverage"
+ },
+ "type_precision": {
+ "mean": 0.9721924314386395,
+ "ci95_low": 0.9672206128983076,
+ "ci95_high": 0.9773388455060481,
+ "metric_name": "Type Safety"
+ },
+ "strict_json_em": {
+ "mean": 0.5261988662478934,
+ "ci95_low": 0.5101041028781383,
+ "ci95_high": 0.5413286392162866,
+ "metric_name": "Perfect Response Rate"
+ }
+ },
+ "category_scores": {
+ "Long Context Extraction": {
+ "mean": 0.8937168861515962,
+ "ci95_low": 0.8875974870564204,
+ "ci95_high": 0.8998720834657834,
+ "category_name": "Long Context Extraction",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall"
+ ]
+ },
+ "Complex Schema Handling": {
+ "mean": 0.9703413922528629,
+ "ci95_low": 0.9652191961239522,
+ "ci95_high": 0.9756915598454603,
+ "category_name": "Complex Schema Handling",
+ "components": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision"
+ ]
+ },
+ "Multi-Context Linking": {
+ "mean": 0.8569242071675062,
+ "ci95_low": 0.8491178479228392,
+ "ci95_high": 0.8640017427679654,
+ "category_name": "Multi-Context Linking",
+ "components": [
+ "leaf_value_em",
+ "value_token_f1"
+ ]
+ },
+ "Output Contract Reliability": {
+ "mean": 0.981461620959093,
+ "ci95_low": 0.9778797145769622,
+ "ci95_high": 0.9848938730573266,
+ "category_name": "Output Contract Reliability",
+ "components": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision"
+ ]
+ },
+ "Strict Precision": {
+ "mean": 0.5261988662478934,
+ "ci95_low": 0.5119950946577757,
+ "ci95_high": 0.539609644087256,
+ "category_name": "Strict Precision",
+ "components": [
+ "strict_json_em"
+ ]
+ }
+ },
+ "weighting": "schema_complexity",
+ "weight_field_priority": [
+ "schema_complexity",
+ "difficulty"
+ ],
+ "difficulty_weights": {
+ "easy": 1.0,
+ "medium": 2.0,
+ "hard": 3.0
+ }
+ }
+ }
+}
diff --git a/data/images_responses/response_DeepSeek-R1-Distill-Qwen-32B_image.jsonl b/data/images_responses/response_DeepSeek-R1-Distill-Qwen-32B_image.jsonl
new file mode 100644
index 0000000..3775101
--- /dev/null
+++ b/data/images_responses/response_DeepSeek-R1-Distill-Qwen-32B_image.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d99eb7985e129942b70a575f05b22e4a069826cee84058f4d693177d4a8b3bc6
+size 1872346
diff --git a/data/images_responses/response_Ministral-3-14B-Instruct-2512_image.jsonl b/data/images_responses/response_Ministral-3-14B-Instruct-2512_image.jsonl
new file mode 100644
index 0000000..0560b1b
--- /dev/null
+++ b/data/images_responses/response_Ministral-3-14B-Instruct-2512_image.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:119ac48801590f462c4ce1e8520781a914ea0970c54955f5ab9a3db35bed64e5
+size 1897111
diff --git a/data/images_responses/response_NVIDIA-Nemotron-3-Nano-30B-A3B-BF16_image.jsonl b/data/images_responses/response_NVIDIA-Nemotron-3-Nano-30B-A3B-BF16_image.jsonl
new file mode 100644
index 0000000..2b328d0
--- /dev/null
+++ b/data/images_responses/response_NVIDIA-Nemotron-3-Nano-30B-A3B-BF16_image.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b300d4d120eeaa0b56d9e61af9ac6a74e6f1a361fec16e05993869e0c7cd4743
+size 1881895
diff --git a/data/images_responses/response_Qwen3-235B-A22B-Instruct-2507_image.jsonl b/data/images_responses/response_Qwen3-235B-A22B-Instruct-2507_image.jsonl
new file mode 100644
index 0000000..2bdf4c3
--- /dev/null
+++ b/data/images_responses/response_Qwen3-235B-A22B-Instruct-2507_image.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6c97d2d4949036d17aec4f44960129c5996d8901e34c1f37d765838233d65cd5
+size 1897406
diff --git a/data/images_responses/response_Qwen3-30B-A3B-Instruct-2507_image.jsonl b/data/images_responses/response_Qwen3-30B-A3B-Instruct-2507_image.jsonl
new file mode 100644
index 0000000..8cfd4a4
--- /dev/null
+++ b/data/images_responses/response_Qwen3-30B-A3B-Instruct-2507_image.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:54b25d614ed504e5efd60c05d7d808c184680f69421644f365b58e82dd06a6e2
+size 1900072
diff --git a/data/images_responses/response_Qwen3.5-35B-A3B_image.jsonl b/data/images_responses/response_Qwen3.5-35B-A3B_image.jsonl
new file mode 100644
index 0000000..19039fe
--- /dev/null
+++ b/data/images_responses/response_Qwen3.5-35B-A3B_image.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c8c6d56c7d7fb3e15ab32d960a209eef2fe6ced3131fa3f6a0c1600636bcaba5
+size 1874737
diff --git a/data/images_responses/response_claude-sonnet-4-6_image.jsonl b/data/images_responses/response_claude-sonnet-4-6_image.jsonl
new file mode 100644
index 0000000..02ef59d
--- /dev/null
+++ b/data/images_responses/response_claude-sonnet-4-6_image.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:af8da6e1084495160670a0f6e4fb6aad7e3d60a56a559aeeeb7f8e8a99e1963e
+size 1950928
diff --git a/data/images_responses/response_gemini-2.5-flash_image.jsonl b/data/images_responses/response_gemini-2.5-flash_image.jsonl
new file mode 100644
index 0000000..e8cb47b
--- /dev/null
+++ b/data/images_responses/response_gemini-2.5-flash_image.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:806a95b853360a84ee40175cb059a4754e016e28760fe0b62077bd15a0d2b01a
+size 1853009
diff --git a/data/images_responses/response_gemini-3-flash_image.jsonl b/data/images_responses/response_gemini-3-flash_image.jsonl
new file mode 100644
index 0000000..5f17be0
--- /dev/null
+++ b/data/images_responses/response_gemini-3-flash_image.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f52d2bbd41d518c0e917a6192864159e7d220213aa7b6aec29dd333d8c840af3
+size 1818748
diff --git a/data/images_responses/response_gemma-3-27b-it_image.jsonl b/data/images_responses/response_gemma-3-27b-it_image.jsonl
new file mode 100644
index 0000000..0b61fd5
--- /dev/null
+++ b/data/images_responses/response_gemma-3-27b-it_image.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:445582b3fddb5a4823644d9086819e0ab93be7e2863914e7e71ba1250b14a58d
+size 1863911
diff --git a/data/images_responses/response_gemma-4-31b-it_image.jsonl b/data/images_responses/response_gemma-4-31b-it_image.jsonl
new file mode 100644
index 0000000..e917a92
--- /dev/null
+++ b/data/images_responses/response_gemma-4-31b-it_image.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:01f7d1c63b62367f93b497b462dca5397ffbaa8da4aedb6347adaece67c01873
+size 1938537
diff --git a/data/images_responses/response_gpt-oss_image.jsonl b/data/images_responses/response_gpt-oss_image.jsonl
new file mode 100644
index 0000000..3499230
--- /dev/null
+++ b/data/images_responses/response_gpt-oss_image.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8926681a0df78de9e2cbaae8fe033aa2c5c8e5a956216151c403c25a1638188f
+size 2059641
diff --git a/data/images_responses/response_ibm-granite-4.0-h-small_image.jsonl b/data/images_responses/response_ibm-granite-4.0-h-small_image.jsonl
new file mode 100644
index 0000000..755bc58
--- /dev/null
+++ b/data/images_responses/response_ibm-granite-4.0-h-small_image.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:11be13d1b3d3bdc47177ef673725b929323d9786f4302a4e6bac1412fe66449d
+size 1858683
diff --git a/data/images_responses/response_inference-net-Schematron-8B_image.jsonl b/data/images_responses/response_inference-net-Schematron-8B_image.jsonl
new file mode 100644
index 0000000..8a4adaa
--- /dev/null
+++ b/data/images_responses/response_inference-net-Schematron-8B_image.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:21252e3d61b6fa4c1860cc35be46cd482ed92bf530aff3cf3f8007a61553554e
+size 1908351
diff --git a/data/images_responses/response_interfaze-beta_image.jsonl b/data/images_responses/response_interfaze-beta_image.jsonl
new file mode 100644
index 0000000..1f45013
--- /dev/null
+++ b/data/images_responses/response_interfaze-beta_image.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2adaca395ed6d38823c4b91bf97da42d9fa5e951866adc167d9f9361989e22f0
+size 1876138
diff --git a/data/images_responses/response_openai-gpt-4.1_image.jsonl b/data/images_responses/response_openai-gpt-4.1_image.jsonl
new file mode 100644
index 0000000..dcfb2bf
--- /dev/null
+++ b/data/images_responses/response_openai-gpt-4.1_image.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f8108600740ce566f1b9dbd1d61903719bf6f47cace5d5810df55b6c1aae47a5
+size 1893721
diff --git a/data/images_responses/response_openai-gpt-5-4_image.jsonl b/data/images_responses/response_openai-gpt-5-4_image.jsonl
new file mode 100644
index 0000000..d91153d
--- /dev/null
+++ b/data/images_responses/response_openai-gpt-5-4_image.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ff7234cefa6ba84f3efc1dffb2a927f18dd6a168ba4a3305c3f61414f1782185
+size 1936114
diff --git a/data/images_responses/response_openai-gpt-5-mini_image.jsonl b/data/images_responses/response_openai-gpt-5-mini_image.jsonl
new file mode 100644
index 0000000..8ea4b50
--- /dev/null
+++ b/data/images_responses/response_openai-gpt-5-mini_image.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7bafefcfc9d7813ca0cfa2f5041b53ec0daf1600fda4c9d40e41ed93bc62e569
+size 1920734
diff --git a/data/images_responses/response_openai-gpt-5_image.jsonl b/data/images_responses/response_openai-gpt-5_image.jsonl
new file mode 100644
index 0000000..685ef2f
--- /dev/null
+++ b/data/images_responses/response_openai-gpt-5_image.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:07086484eb98798b2bb4800efd2865d712a5ba7abb803aa44743cda88f23ebdb
+size 1904459
diff --git a/data/images_responses/response_phi-4_image.jsonl b/data/images_responses/response_phi-4_image.jsonl
new file mode 100644
index 0000000..3a62bf7
--- /dev/null
+++ b/data/images_responses/response_phi-4_image.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:424ebeb76e9d7e4d22dd3d170b6d24cd61280507bb2772acb4d110562a352d7d
+size 1857491
diff --git a/data/images_responses/response_zai-org-GLM-4.7_image.jsonl b/data/images_responses/response_zai-org-GLM-4.7_image.jsonl
new file mode 100644
index 0000000..5143114
--- /dev/null
+++ b/data/images_responses/response_zai-org-GLM-4.7_image.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c0835283e8a98a3af4fd3c6713575421237673422e90809169d0bcf6a99da518
+size 1886141
diff --git a/data/text_responses/response_DeepSeek-R1-Distill-Qwen-32B.jsonl b/data/text_responses/response_DeepSeek-R1-Distill-Qwen-32B.jsonl
new file mode 100644
index 0000000..acf6d76
--- /dev/null
+++ b/data/text_responses/response_DeepSeek-R1-Distill-Qwen-32B.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8c0279f741d396f6766177626b38705d687cdf48752d9c2a0e5a0e3751946e93
+size 38498582
diff --git a/data/text_responses/response_Ministral-3-14B-Instruct-2512.jsonl b/data/text_responses/response_Ministral-3-14B-Instruct-2512.jsonl
new file mode 100644
index 0000000..ee84327
--- /dev/null
+++ b/data/text_responses/response_Ministral-3-14B-Instruct-2512.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:62978ace1d4e8f542cd9afa06571a22b3ece3ad369e7e6ed27ad4f1f06402d2f
+size 38352259
diff --git a/data/text_responses/response_NVIDIA-Nemotron-3-Nano-30B-A3B-BF16.jsonl b/data/text_responses/response_NVIDIA-Nemotron-3-Nano-30B-A3B-BF16.jsonl
new file mode 100644
index 0000000..9941206
--- /dev/null
+++ b/data/text_responses/response_NVIDIA-Nemotron-3-Nano-30B-A3B-BF16.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:39d1ea0a0f62af52233979ceb9caa55dabb55532565a410fae7139671fb30252
+size 38360841
diff --git a/data/text_responses/response_Qwen3-235B-A22B-Instruct-2507.jsonl b/data/text_responses/response_Qwen3-235B-A22B-Instruct-2507.jsonl
new file mode 100644
index 0000000..9aa9acd
--- /dev/null
+++ b/data/text_responses/response_Qwen3-235B-A22B-Instruct-2507.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3e74b073b7994ef2c7888a92dbcb3e466528e6ca73b742608248e551d27fd2f7
+size 38358933
diff --git a/data/text_responses/response_Qwen3-30B-A3B-Instruct-2507.jsonl b/data/text_responses/response_Qwen3-30B-A3B-Instruct-2507.jsonl
new file mode 100644
index 0000000..8982e54
--- /dev/null
+++ b/data/text_responses/response_Qwen3-30B-A3B-Instruct-2507.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:697a3af9ab9660a9f9e1da8bf5f5559cdba1ff9cbbbaac9bd1ffdf2046cbba2b
+size 38371316
diff --git a/data/text_responses/response_Qwen3.5-35B-A3B.jsonl b/data/text_responses/response_Qwen3.5-35B-A3B.jsonl
new file mode 100644
index 0000000..8e545f0
--- /dev/null
+++ b/data/text_responses/response_Qwen3.5-35B-A3B.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bafaf1e0ad863bb4c68a845856119d48c5fac6cf72493922838cd78288b0afd5
+size 38257471
diff --git a/data/text_responses/response_claude-sonnet-4-6.jsonl b/data/text_responses/response_claude-sonnet-4-6.jsonl
new file mode 100644
index 0000000..2033775
--- /dev/null
+++ b/data/text_responses/response_claude-sonnet-4-6.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1b2396bab1d19e26512a1393c87113af2ced65b0e8cefea68a1da4ce84734303
+size 38305736
diff --git a/data/text_responses/response_gemini-2.5-flash.jsonl b/data/text_responses/response_gemini-2.5-flash.jsonl
new file mode 100644
index 0000000..2e43efc
--- /dev/null
+++ b/data/text_responses/response_gemini-2.5-flash.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:11006a79e4d4bc1cb99016f1b0b09e6ea06e29114d9411da63341590b484c660
+size 38323976
diff --git a/data/text_responses/response_gemini-3-flash.jsonl b/data/text_responses/response_gemini-3-flash.jsonl
new file mode 100644
index 0000000..f837afc
--- /dev/null
+++ b/data/text_responses/response_gemini-3-flash.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c80b2183ca4022f4d4cf6a55a32d4bb508903a78306c8d98c7eed96ce087511b
+size 38213033
diff --git a/data/text_responses/response_gemma-3-27b-it.jsonl b/data/text_responses/response_gemma-3-27b-it.jsonl
new file mode 100644
index 0000000..2a5fd61
--- /dev/null
+++ b/data/text_responses/response_gemma-3-27b-it.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c882ece83b44d6f803b007e8c3637dee1e1229c96589635f512747407896b738
+size 38239317
diff --git a/data/text_responses/response_gemma-4-31b-it.jsonl b/data/text_responses/response_gemma-4-31b-it.jsonl
new file mode 100644
index 0000000..591794d
--- /dev/null
+++ b/data/text_responses/response_gemma-4-31b-it.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1f6a4d119662b806e7deefab30dd825afe5ba49dc719d8dd233eb1f982c17e9d
+size 38157775
diff --git a/data/text_responses/response_gpt-oss.jsonl b/data/text_responses/response_gpt-oss.jsonl
new file mode 100644
index 0000000..1db823b
--- /dev/null
+++ b/data/text_responses/response_gpt-oss.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:df4cc7bb1cccd65520c4896cc94d86821e38cef51e642c698294468e63d932be
+size 39546162
diff --git a/data/text_responses/response_ibm-granite-4.0-h-small.jsonl b/data/text_responses/response_ibm-granite-4.0-h-small.jsonl
new file mode 100644
index 0000000..f8c35f1
--- /dev/null
+++ b/data/text_responses/response_ibm-granite-4.0-h-small.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dbd545b606358c10a81cf1de49eec9a257369991a1a8dc6f13300f6ba4a5a328
+size 38281922
diff --git a/data/text_responses/response_inference-net-Schematron-8B.jsonl b/data/text_responses/response_inference-net-Schematron-8B.jsonl
new file mode 100644
index 0000000..41fc2fc
--- /dev/null
+++ b/data/text_responses/response_inference-net-Schematron-8B.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:24f4f4dd1be479da0d585e91f282156a2527e00be5d8e063c15e88a648deed52
+size 38336718
diff --git a/data/text_responses/response_interfaze-beta.jsonl b/data/text_responses/response_interfaze-beta.jsonl
new file mode 100644
index 0000000..b8bd72a
--- /dev/null
+++ b/data/text_responses/response_interfaze-beta.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:be9071125c0218dd34074fb656089cdca66af334b70d42a147bceb665c73f7ea
+size 38246003
diff --git a/data/text_responses/response_openai-gpt-4.1.jsonl b/data/text_responses/response_openai-gpt-4.1.jsonl
new file mode 100644
index 0000000..8c5836d
--- /dev/null
+++ b/data/text_responses/response_openai-gpt-4.1.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3bc5ee2e2f1fc124078e8027db6f92f12982b68bb1b552e3d74a6532437a0242
+size 38313277
diff --git a/data/text_responses/response_openai-gpt-5-4.jsonl b/data/text_responses/response_openai-gpt-5-4.jsonl
new file mode 100644
index 0000000..0bbb867
--- /dev/null
+++ b/data/text_responses/response_openai-gpt-5-4.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f11a4a02149e65b86f4a028fe2fadad6f0304fa1f804dbba8f7d0fb2466229fd
+size 38208224
diff --git a/data/text_responses/response_openai-gpt-5-mini.jsonl b/data/text_responses/response_openai-gpt-5-mini.jsonl
new file mode 100644
index 0000000..6b1fd24
--- /dev/null
+++ b/data/text_responses/response_openai-gpt-5-mini.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:98f4631194c1af57adfc815991cce7fb0d8ebcc9418799a2742f81df7a850b00
+size 38349054
diff --git a/data/text_responses/response_openai-gpt-5.jsonl b/data/text_responses/response_openai-gpt-5.jsonl
new file mode 100644
index 0000000..37d58d0
--- /dev/null
+++ b/data/text_responses/response_openai-gpt-5.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:56d863482757217409d25b3ed28db4b927c92730db52cc48be26a29106b2e21b
+size 38276659
diff --git a/data/text_responses/response_phi-4.jsonl b/data/text_responses/response_phi-4.jsonl
new file mode 100644
index 0000000..3dc2dcc
--- /dev/null
+++ b/data/text_responses/response_phi-4.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d848adf4b61ebc1afdf5c963640f54c6a6ae5634952911faf418880e3f2247a7
+size 38206207
diff --git a/data/text_responses/response_zai-org-GLM-4.7.jsonl b/data/text_responses/response_zai-org-GLM-4.7.jsonl
new file mode 100644
index 0000000..527ce3b
--- /dev/null
+++ b/data/text_responses/response_zai-org-GLM-4.7.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f4b3a78b64977b7abcbe4ba51373517b16467c36b118d4a37fdd89d86d89b5fd
+size 38206125
diff --git a/evaluate.py b/evaluate.py
new file mode 100644
index 0000000..f34e07f
--- /dev/null
+++ b/evaluate.py
@@ -0,0 +1,771 @@
+#!/usr/bin/env python3
+from __future__ import annotations
+
+import argparse
+import json
+import math
+import random
+import re
+from collections import Counter
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any
+
+from jsonschema import Draft7Validator, ValidationError
+
+
+DIFFICULTY_WEIGHTS = {
+ "easy": 1.0,
+ "medium": 2.0,
+ "hard": 3.0,
+}
+WEIGHT_FIELD_PRIORITY = ("schema_complexity", "difficulty")
+
+METRIC_DISPLAY_NAMES = {
+ "json_parse_success": "JSON Parse Success",
+ "json_root_structured": "Structured JSON Root",
+ "schema_valid_input": "Schema Valid Input",
+ "schema_compliance": "JSON Pass Rate",
+ "leaf_value_em": "Truth Score",
+ "value_token_f1": "Faithfulness Score",
+ "hier_path_recall": "Path Recall",
+ "path_set_f1": "Structure Coverage",
+ "type_precision": "Type Safety",
+ "required_key_recall": "Required Key Recall",
+ "strict_json_em": "Perfect Response Rate",
+}
+
+CATEGORIES = {
+ "Long Context Extraction": [
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall",
+ ],
+ "Complex Schema Handling": [
+ "schema_compliance",
+ "path_set_f1",
+ "type_precision",
+ ],
+ "Multi-Context Linking": [
+ "leaf_value_em",
+ "value_token_f1",
+ ],
+ "Output Contract Reliability": [
+ "json_parse_success",
+ "schema_compliance",
+ "type_precision",
+ ],
+ "Strict Precision": [
+ "strict_json_em",
+ ],
+}
+
+JSON_BLOCK_RE = re.compile(r"```(?:json)?\s*(.*?)```", re.DOTALL | re.IGNORECASE)
+ARTICLES_RE = re.compile(r"\b(a|an|the)\b")
+PUNCT_RE = re.compile(r"[^\w\s]")
+INDEX_RE = re.compile(r"\[\d+\]")
+
+
+@dataclass
+class RecordMetrics:
+ row: dict[str, Any]
+ missing_gt_paths: list[str]
+ missing_required_paths: list[str]
+
+
+def parse_args() -> argparse.Namespace:
+ parser = argparse.ArgumentParser(
+ description="Evaluate SOB response JSONL files and recreate eval_records/eval_summary outputs."
+ )
+ parser.add_argument(
+ "input_path",
+ help="A response JSONL file or a directory containing response JSONL files.",
+ )
+ parser.add_argument(
+ "--output-root",
+ default="data/evaluation",
+ help="Root directory for evaluation outputs. Default: data/evaluation",
+ )
+ parser.add_argument(
+ "--modality",
+ choices=["auto", "text", "image", "audio"],
+ default="auto",
+ help="Response modality. Default: auto",
+ )
+ parser.add_argument(
+ "--bootstrap-samples",
+ type=int,
+ default=1000,
+ help="Number of bootstrap samples for confidence intervals. Default: 1000",
+ )
+ parser.add_argument(
+ "--seed",
+ type=int,
+ default=42,
+ help="Random seed for bootstrap sampling. Default: 42",
+ )
+ parser.add_argument(
+ "--top-k-errors",
+ type=int,
+ default=20,
+ help="Number of most common missing paths to include in error_analysis. Default: 20",
+ )
+ return parser.parse_args()
+
+
+def main() -> None:
+ args = parse_args()
+ input_path = Path(args.input_path)
+ if input_path.is_dir():
+ response_files = sorted(input_path.glob("*.jsonl"))
+ else:
+ response_files = [input_path]
+
+ if not response_files:
+ raise SystemExit(f"No JSONL response files found under {input_path}")
+
+ for response_file in response_files:
+ modality = infer_modality(response_file, args.modality)
+ records, data_quality = evaluate_file(response_file, modality)
+ if not records:
+ raise SystemExit(f"No valid records found in {response_file}")
+
+ model_ids = sorted({record.row["model_id"] for record in records})
+ output_dir = resolve_output_dir(Path(args.output_root), modality, response_file, model_ids)
+ output_dir.mkdir(parents=True, exist_ok=True)
+
+ records_path = output_dir / "eval_records.jsonl"
+ summary_path = output_dir / "eval_summary.json"
+
+ write_records(records_path, records, modality)
+ summary = build_summary(
+ response_file=response_file,
+ records=records,
+ data_quality=data_quality,
+ bootstrap_samples=args.bootstrap_samples,
+ seed=args.seed,
+ top_k_errors=args.top_k_errors,
+ modality=modality,
+ )
+ summary_path.write_text(json.dumps(summary, indent=2) + "\n")
+
+ print(f"Wrote {records_path}")
+ print(f"Wrote {summary_path}")
+
+
+def infer_modality(response_file: Path, requested: str) -> str:
+ if requested != "auto":
+ return requested
+ path_str = response_file.as_posix().lower()
+ if "text_response" in path_str:
+ return "text"
+ if "images_response" in path_str or "image_response" in path_str:
+ return "image"
+ if "audio_response" in path_str:
+ return "audio"
+ name = response_file.name.lower()
+ if "_image" in name:
+ return "image"
+ if "_audio" in name:
+ return "audio"
+ return "text"
+
+
+def resolve_output_dir(
+ output_root: Path,
+ modality: str,
+ response_file: Path,
+ model_ids: list[str],
+) -> Path:
+ stem = response_file.stem.removeprefix("response_")
+ if modality == "audio" and stem.endswith("_audio"):
+ stem = stem[: -len("_audio")]
+ elif modality == "image" and stem.endswith("_image"):
+ stem = stem[: -len("_image")]
+ model_name = sanitize_model_id(stem)
+ return output_root / modality / model_name
+
+
+def sanitize_model_id(model_id: str) -> str:
+ return model_id.split("/")[-1]
+
+
+def write_records(records_path: Path, records: list[RecordMetrics], modality: str) -> None:
+ with records_path.open("w", encoding="utf-8") as handle:
+ for record in records:
+ payload = dict(record.row)
+ if modality == "image":
+ payload["required_key_recall"] = payload["required_key_recall"]
+ payload["missing_gt_paths"] = record.missing_gt_paths
+ payload["missing_required_paths"] = record.missing_required_paths
+ handle.write(json.dumps(payload) + "\n")
+
+
+def evaluate_file(response_file: Path, modality: str) -> tuple[list[RecordMetrics], dict[str, int]]:
+ records: list[RecordMetrics] = []
+ data_quality = {
+ "json_parse_fail_count": 0,
+ "json_non_structured_root_count": 0,
+ "invalid_schema_input_count": 0,
+ "unknown_difficulty_count": 0,
+ "malformed_jsonl_line_count": 0,
+ }
+
+ with response_file.open("r", encoding="utf-8") as handle:
+ for line in handle:
+ line = line.strip()
+ if not line:
+ continue
+ try:
+ record = json.loads(line)
+ except json.JSONDecodeError:
+ data_quality["malformed_jsonl_line_count"] += 1
+ continue
+
+ evaluated = evaluate_record(record, modality)
+ row = evaluated.row
+ if row["json_parse_success"] == 0.0:
+ data_quality["json_parse_fail_count"] += 1
+ if row["json_root_structured"] == 0.0:
+ data_quality["json_non_structured_root_count"] += 1
+ if row["schema_valid_input"] == 0.0:
+ data_quality["invalid_schema_input_count"] += 1
+ if row["known_difficulty"] == 0.0:
+ data_quality["unknown_difficulty_count"] += 1
+ records.append(evaluated)
+
+ return records, data_quality
+
+
+def evaluate_record(record: dict[str, Any], modality: str) -> RecordMetrics:
+ metadata = record.get("metadata", {})
+ input_block = record.get("input", {})
+ output_block = record.get("output", {})
+
+ record_id = metadata.get("record_id", "")
+ model_id = metadata.get("model_id", "unknown")
+ schema_complexity = metadata.get("schema_complexity") or "unknown"
+ question_difficulty = metadata.get("difficulty") or "unknown"
+ weighting_basis = "unknown"
+ difficulty = "unknown"
+ for field_name in WEIGHT_FIELD_PRIORITY:
+ candidate = metadata.get(field_name)
+ if candidate in DIFFICULTY_WEIGHTS:
+ difficulty = candidate
+ weighting_basis = field_name
+ break
+ difficulty_weight = DIFFICULTY_WEIGHTS.get(difficulty, 0.0)
+ known_difficulty = 1.0 if difficulty in DIFFICULTY_WEIGHTS else 0.0
+
+ schema = input_block.get("json_schema")
+ candidate_raw = output_block.get("candidate_response")
+ ground_truth = output_block.get("ground_truth")
+
+ schema_valid_input = 1.0 if is_valid_schema(schema) else 0.0
+ parsed_ok, candidate = parse_candidate(candidate_raw)
+ root_structured = 1.0 if parsed_ok and isinstance(candidate, (dict, list)) else 0.0
+ schema_compliance = 1.0 if parsed_ok and root_structured and schema_valid_input and validates(candidate, schema) else 0.0
+
+ gt_leafs = flatten_leaf_paths(ground_truth)
+ pred_leafs = flatten_leaf_paths(candidate) if root_structured else {}
+
+ raw_path_recall = ratio(len(set(gt_leafs) & set(pred_leafs)), len(gt_leafs))
+ raw_path_set_f1 = f1_from_counts(len(set(gt_leafs) & set(pred_leafs)), len(pred_leafs), len(gt_leafs))
+ raw_leaf_em = exact_match_ratio(gt_leafs, pred_leafs)
+ raw_value_token_f1 = mean_token_f1(gt_leafs, pred_leafs)
+ raw_type_precision = compute_type_precision(candidate, schema) if root_structured and schema_valid_input else 0.0
+ required_key_recall, missing_required_paths = compute_required_key_recall(candidate, schema) if root_structured and schema_valid_input else (0.0, required_paths(schema))
+
+ hardening = 1.0 if parsed_ok and root_structured and schema_compliance else 0.0
+ coverage_gate = compute_coverage_gate(modality, raw_path_set_f1)
+
+ leaf_value_em = raw_leaf_em * hardening * coverage_gate
+ value_token_f1 = raw_value_token_f1 * hardening * coverage_gate
+ hier_path_recall = raw_path_recall * hardening
+ path_set_f1 = raw_path_set_f1 * hardening
+ type_precision = raw_type_precision * hardening
+ strict_json_em = 1.0 if canonical_json(candidate) == canonical_json(ground_truth) else 0.0
+
+ row = {
+ "record_id": record_id,
+ "model_id": model_id,
+ "schema_complexity": schema_complexity,
+ "question_difficulty": question_difficulty,
+ "difficulty": difficulty,
+ "weighting_basis": weighting_basis,
+ "difficulty_weight": difficulty_weight,
+ "known_difficulty": known_difficulty,
+ "json_parse_success": 1.0 if parsed_ok else 0.0,
+ "json_root_structured": root_structured,
+ "schema_valid_input": schema_valid_input,
+ "schema_compliance": schema_compliance,
+ "leaf_value_em": leaf_value_em,
+ "value_token_f1": value_token_f1,
+ "hier_path_recall": hier_path_recall,
+ "path_set_f1": path_set_f1,
+ "type_precision": type_precision,
+ "required_key_recall": required_key_recall,
+ "strict_json_em": strict_json_em,
+ }
+
+ missing_gt_paths = sorted(set(gt_leafs) - set(pred_leafs))
+ return RecordMetrics(
+ row=row,
+ missing_gt_paths=missing_gt_paths,
+ missing_required_paths=missing_required_paths,
+ )
+
+
+def parse_candidate(candidate_raw: Any) -> tuple[bool, Any]:
+ if isinstance(candidate_raw, (dict, list)):
+ return True, candidate_raw
+ return False, None
+
+
+def extract_json_candidates(text: str) -> list[str]:
+ candidates = [text]
+ for match in JSON_BLOCK_RE.findall(text):
+ candidates.append(match.strip())
+
+ starts = [idx for idx, char in enumerate(text) if char in "[{"]
+ for start in starts:
+ for end in range(len(text), start, -1):
+ segment = text[start:end].strip()
+ if segment.endswith(("}", "]")):
+ candidates.append(segment)
+ break
+ deduped: list[str] = []
+ seen = set()
+ for item in candidates:
+ if item not in seen:
+ seen.add(item)
+ deduped.append(item)
+ return deduped
+
+
+def is_valid_schema(schema: Any) -> bool:
+ if not isinstance(schema, dict):
+ return False
+ try:
+ Draft7Validator.check_schema(schema)
+ return True
+ except Exception:
+ return False
+
+
+def validates(candidate: Any, schema: dict[str, Any]) -> bool:
+ try:
+ Draft7Validator(schema).validate(candidate)
+ return True
+ except ValidationError:
+ return False
+
+
+def flatten_leaf_paths(value: Any, prefix: str = "") -> dict[str, Any]:
+ leafs: dict[str, Any] = {}
+ if isinstance(value, dict):
+ for key, child in value.items():
+ next_prefix = f"{prefix}.{key}" if prefix else key
+ leafs.update(flatten_leaf_paths(child, next_prefix))
+ return leafs
+ if isinstance(value, list):
+ for idx, child in enumerate(value):
+ next_prefix = f"{prefix}[{idx}]"
+ leafs.update(flatten_leaf_paths(child, next_prefix))
+ return leafs
+ if prefix:
+ leafs[prefix] = value
+ return leafs
+
+
+def flatten_present_paths(value: Any, prefix: str = "") -> set[str]:
+ paths: set[str] = set()
+ if prefix:
+ paths.add(prefix)
+ if isinstance(value, dict):
+ for key, child in value.items():
+ next_prefix = f"{prefix}.{key}" if prefix else key
+ paths.update(flatten_present_paths(child, next_prefix))
+ elif isinstance(value, list):
+ for idx, child in enumerate(value):
+ next_prefix = f"{prefix}[{idx}]"
+ paths.update(flatten_present_paths(child, next_prefix))
+ return paths
+
+
+def wildcard_path(path: str) -> str:
+ return INDEX_RE.sub("[]", path)
+
+
+def required_paths(schema: Any, prefix: str = "") -> list[str]:
+ if not isinstance(schema, dict):
+ return []
+
+ schema_type = schema.get("type")
+ results: list[str] = []
+
+ if schema_type == "object":
+ properties = schema.get("properties", {})
+ for required_key in schema.get("required", []):
+ child_schema = properties.get(required_key, {})
+ child_prefix = f"{prefix}.{required_key}" if prefix else required_key
+ results.append(child_prefix)
+ results.extend(required_paths(child_schema, child_prefix))
+ elif schema_type == "array":
+ items_schema = schema.get("items", {})
+ child_prefix = f"{prefix}[]" if prefix else "[]"
+ results.extend(required_paths(items_schema, child_prefix))
+
+ return results
+
+
+def compute_required_key_recall(candidate: Any, schema: Any) -> tuple[float, list[str]]:
+ req_paths = sorted(set(required_paths(schema)))
+ if not req_paths:
+ return 1.0, []
+ present = {wildcard_path(path) for path in flatten_present_paths(candidate)}
+ missing = [path for path in req_paths if path not in present]
+ return ratio(len(req_paths) - len(missing), len(req_paths)), missing
+
+
+def compute_type_precision(candidate: Any, schema: Any) -> float:
+ pred_leafs = flatten_leaf_paths(candidate)
+ if not pred_leafs:
+ return 0.0
+
+ matches = 0
+ for path, value in pred_leafs.items():
+ expected_type = schema_type_for_path(schema, path)
+ if expected_type is not None and is_type_match(value, expected_type):
+ matches += 1
+ return ratio(matches, len(pred_leafs))
+
+
+def schema_type_for_path(schema: Any, path: str) -> str | None:
+ if not isinstance(schema, dict):
+ return None
+ parts = split_path(path)
+ current = schema
+ for part in parts:
+ if isinstance(part, str):
+ if current.get("type") != "object":
+ return None
+ current = current.get("properties", {}).get(part)
+ else:
+ if current.get("type") != "array":
+ return None
+ current = current.get("items")
+ if current is None:
+ return None
+ return normalize_schema_type(current.get("type")) if isinstance(current, dict) else None
+
+
+def split_path(path: str) -> list[str | int]:
+ parts: list[str | int] = []
+ token = ""
+ idx = 0
+ while idx < len(path):
+ char = path[idx]
+ if char == ".":
+ if token:
+ parts.append(token)
+ token = ""
+ idx += 1
+ continue
+ if char == "[":
+ if token:
+ parts.append(token)
+ token = ""
+ end = path.index("]", idx)
+ parts.append(int(path[idx + 1 : end]))
+ idx = end + 1
+ continue
+ token += char
+ idx += 1
+ if token:
+ parts.append(token)
+ return parts
+
+
+def normalize_schema_type(schema_type: Any) -> str | None:
+ if isinstance(schema_type, list):
+ non_null = [item for item in schema_type if item != "null"]
+ if len(non_null) == 1:
+ schema_type = non_null[0]
+ if schema_type in {"string", "integer", "number", "boolean", "object", "array", "null"}:
+ return str(schema_type)
+ return None
+
+
+def json_type_name(value: Any) -> str:
+ if value is None:
+ return "null"
+ if isinstance(value, bool):
+ return "boolean"
+ if isinstance(value, int) and not isinstance(value, bool):
+ return "integer"
+ if isinstance(value, float):
+ return "number"
+ if isinstance(value, str):
+ return "string"
+ if isinstance(value, list):
+ return "array"
+ if isinstance(value, dict):
+ return "object"
+ return "unknown"
+
+
+def is_type_match(value: Any, expected_type: str) -> bool:
+ actual = json_type_name(value)
+ if actual == expected_type:
+ return True
+ # JSON Schema treats integers as valid numbers.
+ if expected_type == "number" and actual == "integer":
+ return True
+ return False
+
+
+def exact_match_ratio(gt_leafs: dict[str, Any], pred_leafs: dict[str, Any]) -> float:
+ if not gt_leafs:
+ return 1.0
+ matches = sum(1 for path, gt_val in gt_leafs.items() if path in pred_leafs and gt_val == pred_leafs[path])
+ return matches / len(gt_leafs)
+
+
+def mean_token_f1(gt_leafs: dict[str, Any], pred_leafs: dict[str, Any]) -> float:
+ if not gt_leafs:
+ return 1.0
+ return sum(token_f1(gt_val, pred_leafs.get(path)) for path, gt_val in gt_leafs.items()) / len(gt_leafs)
+
+
+def token_f1(gt_value: Any, pred_value: Any) -> float:
+ gt_tokens = normalize_tokens(gt_value)
+ pred_tokens = normalize_tokens(pred_value)
+ if not gt_tokens and not pred_tokens:
+ return 1.0
+ if not gt_tokens or not pred_tokens:
+ return 0.0
+ gt_counter = Counter(gt_tokens)
+ pred_counter = Counter(pred_tokens)
+ overlap = sum((gt_counter & pred_counter).values())
+ if overlap == 0:
+ return 0.0
+ precision = overlap / sum(pred_counter.values())
+ recall = overlap / sum(gt_counter.values())
+ return (2 * precision * recall) / (precision + recall)
+
+
+def normalize_tokens(value: Any) -> list[str]:
+ if value is None:
+ text = "null"
+ elif isinstance(value, bool):
+ text = "true" if value else "false"
+ else:
+ text = str(value)
+ text = text.lower()
+ text = PUNCT_RE.sub(" ", text)
+ text = ARTICLES_RE.sub(" ", text)
+ text = " ".join(text.split())
+ return text.split() if text else []
+
+
+def compute_coverage_gate(modality: str, structure_coverage: float) -> float:
+ if modality == "text":
+ return 1.0 if structure_coverage >= 0.95 else 0.0
+ return min(1.0, (structure_coverage / 0.90) ** 2) if structure_coverage > 0 else 0.0
+
+
+def canonical_json(value: Any) -> str:
+ return json.dumps(value, sort_keys=True, separators=(",", ":"), ensure_ascii=False)
+
+
+def ratio(numerator: int | float, denominator: int | float) -> float:
+ if denominator == 0:
+ return 0.0
+ return float(numerator) / float(denominator)
+
+
+def f1_from_counts(overlap: int, pred_total: int, gt_total: int) -> float:
+ if pred_total == 0 or gt_total == 0:
+ return 0.0
+ precision = overlap / pred_total
+ recall = overlap / gt_total
+ if precision + recall == 0:
+ return 0.0
+ return (2 * precision * recall) / (precision + recall)
+
+
+def build_summary(
+ response_file: Path,
+ records: list[RecordMetrics],
+ data_quality: dict[str, int],
+ bootstrap_samples: int,
+ seed: int,
+ top_k_errors: int,
+ modality: str,
+) -> dict[str, Any]:
+ rows = [record.row for record in records]
+ metrics = [
+ "json_parse_success",
+ "json_root_structured",
+ "schema_valid_input",
+ "schema_compliance",
+ "leaf_value_em",
+ "value_token_f1",
+ "hier_path_recall",
+ "path_set_f1",
+ "type_precision",
+ "strict_json_em",
+ ]
+ overall = summarize_rows(rows, metrics, bootstrap_samples, seed, weighted=False)
+ overall_weighted = summarize_rows(rows, metrics, bootstrap_samples, seed, weighted=True)
+
+ payload: dict[str, Any] = {
+ "response_file": response_file.as_posix(),
+ "num_records": len(rows),
+ "model_ids": sorted({row["model_id"] for row in rows}),
+ "data_quality": data_quality,
+ "summary": {
+ "overall": overall,
+ "overall_weighted": overall_weighted,
+ },
+ }
+
+ if modality == "image":
+ error_analysis = summarize_error_paths(records, top_k_errors)
+ if error_analysis:
+ payload["error_analysis"] = error_analysis
+
+ return payload
+
+
+def summarize_rows(
+ rows: list[dict[str, Any]],
+ metrics: list[str],
+ bootstrap_samples: int,
+ seed: int,
+ weighted: bool,
+) -> dict[str, Any]:
+ rng = random.Random(seed + (1 if weighted else 0))
+ summary_metrics: dict[str, Any] = {}
+ for metric in metrics:
+ mean = weighted_mean(rows, metric) if weighted else arithmetic_mean(rows, metric)
+ low, high = bootstrap_ci(rows, metric, bootstrap_samples, rng, weighted)
+ summary_metrics[metric] = {
+ "mean": mean,
+ "ci95_low": low,
+ "ci95_high": high,
+ "metric_name": METRIC_DISPLAY_NAMES[metric],
+ }
+
+ category_scores: dict[str, Any] = {}
+ for category_name, components in CATEGORIES.items():
+ mean = category_mean(rows, components, weighted)
+ low, high = bootstrap_category_ci(rows, components, bootstrap_samples, rng, weighted)
+ category_scores[category_name] = {
+ "mean": mean,
+ "ci95_low": low,
+ "ci95_high": high,
+ "category_name": category_name,
+ "components": components,
+ }
+
+ payload: dict[str, Any] = {
+ "n": len(rows),
+ "metrics": summary_metrics,
+ "category_scores": category_scores,
+ }
+ if weighted:
+ payload["weighting"] = "schema_complexity"
+ payload["weight_field_priority"] = list(WEIGHT_FIELD_PRIORITY)
+ payload["difficulty_weights"] = DIFFICULTY_WEIGHTS
+ return payload
+
+
+def arithmetic_mean(rows: list[dict[str, Any]], metric: str) -> float:
+ if not rows:
+ return 0.0
+ return sum(row[metric] for row in rows) / len(rows)
+
+
+def weighted_mean(rows: list[dict[str, Any]], metric: str) -> float:
+ total_weight = sum(row["difficulty_weight"] for row in rows)
+ if total_weight == 0:
+ return 0.0
+ return sum(row[metric] * row["difficulty_weight"] for row in rows) / total_weight
+
+
+def category_mean(rows: list[dict[str, Any]], components: list[str], weighted: bool) -> float:
+ if not rows:
+ return 0.0
+ per_row = [{**row, "_category": sum(row[m] for m in components) / len(components)} for row in rows]
+ if weighted:
+ return weighted_mean(per_row, "_category")
+ return arithmetic_mean(per_row, "_category")
+
+
+def bootstrap_ci(
+ rows: list[dict[str, Any]],
+ metric: str,
+ samples: int,
+ rng: random.Random,
+ weighted: bool,
+) -> tuple[float, float]:
+ if not rows:
+ return 0.0, 0.0
+ estimates = []
+ for _ in range(samples):
+ sample_rows = [rows[rng.randrange(len(rows))] for _ in range(len(rows))]
+ estimates.append(weighted_mean(sample_rows, metric) if weighted else arithmetic_mean(sample_rows, metric))
+ estimates.sort()
+ return percentile_bounds(estimates)
+
+
+def bootstrap_category_ci(
+ rows: list[dict[str, Any]],
+ components: list[str],
+ samples: int,
+ rng: random.Random,
+ weighted: bool,
+) -> tuple[float, float]:
+ if not rows:
+ return 0.0, 0.0
+ estimates = []
+ for _ in range(samples):
+ sample_rows = [rows[rng.randrange(len(rows))] for _ in range(len(rows))]
+ estimates.append(category_mean(sample_rows, components, weighted))
+ estimates.sort()
+ return percentile_bounds(estimates)
+
+
+def percentile_bounds(values: list[float]) -> tuple[float, float]:
+ if not values:
+ return 0.0, 0.0
+ low_idx = max(0, math.floor(0.025 * (len(values) - 1)))
+ high_idx = min(len(values) - 1, math.ceil(0.975 * (len(values) - 1)))
+ return values[low_idx], values[high_idx]
+
+
+def summarize_error_paths(records: list[RecordMetrics], top_k_errors: int) -> dict[str, Any]:
+ gt_counter = Counter()
+ required_counter = Counter()
+ for record in records:
+ gt_counter.update(record.missing_gt_paths)
+ required_counter.update(record.missing_required_paths)
+
+ payload: dict[str, Any] = {}
+ if gt_counter:
+ payload["top_missing_gt_paths"] = [
+ {"path": path, "count": count}
+ for path, count in gt_counter.most_common(top_k_errors)
+ ]
+ if required_counter:
+ payload["top_missing_required_paths"] = [
+ {"path": path, "count": count}
+ for path, count in required_counter.most_common(top_k_errors)
+ ]
+ return payload
+
+
+if __name__ == "__main__":
+ main()
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..8113e31
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,31 @@
+[project]
+name = "sob"
+version = "0.1.3"
+description = "The Structured Output Benchmark"
+readme = "README.md"
+requires-python = ">=3.12,<3.13"
+dependencies = [
+ "datasets>=4.5.0",
+ "matplotlib>=3.10.8",
+ "numpy>=1.24,<2.3",
+ "pandas>=3.0.1",
+ "ruff>=0.15.2",
+ "seaborn>=0.13.2",
+ "pillow>=10.0.0",
+ "python-dotenv>=1.2.1",
+ "pydantic>=2.0.0",
+ "jsonschema>=4.0.0",
+ "huggingface-hub>=0.22.0",
+ "tqdm>=4.67.3",
+ "transformers>=4.40.0",
+ "sentence-transformers>=5.2.3",
+ "torch>=2.10.0",
+ "vllm==0.17.0",
+ "openai>=2.24.0",
+ "anthropic>=0.75.0",
+ "google-genai>=1.0.0",
+ "pyarrow>=16.0.0",
+]
+
+[tool.pytest.ini_options]
+pythonpath = ["."]
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..96f5b74
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,18 @@
+datasets>=4.5.0
+matplotlib>=3.10.8
+numpy>=1.24,<2.3
+pandas>=3.0.1
+seaborn>=0.13.2
+pillow>=10.0.0
+python-dotenv>=1.2.1
+pydantic>=2.0.0
+jsonschema>=4.0.0
+huggingface-hub>=0.22.0
+tqdm>=4.67.3
+transformers>=4.40.0
+sentence-transformers>=5.2.3
+torch>=2.10.0
+openai>=2.24.0
+anthropic>=0.75.0
+google-genai>=1.0.0
+
diff --git a/scripts/build_leaderboard.py b/scripts/build_leaderboard.py
new file mode 100644
index 0000000..18ee8e3
--- /dev/null
+++ b/scripts/build_leaderboard.py
@@ -0,0 +1,235 @@
+"""Build the unified leaderboard (paper Table 1) from per-source eval_summary.json files.
+
+Discovery
+---------
+Model directories are auto-discovered by scanning
+``data/evaluation/{text,image,audio}//eval_summary.json``. Any
+directory that contains an ``eval_summary.json`` is included; a model can be
+present in any subset of the three modalities.
+
+Display names are read from ``data/evaluation/display_names.json`` (a flat
+``{dir_name: pretty_name}`` map). Directories without an entry fall back to
+the directory name itself, so adding a new model is "drop the eval dir +
+add one line to ``display_names.json``."
+
+Aggregation
+-----------
+Uses the formula stated in the paper:
+
+ bar_m_k = sum_u W_u * m^(w)_{k,u} / sum_u W_u
+
+where m^(w)_{k,u} is the schema-complexity-weighted within-source mean
+(read from ``summary.overall_weighted.metrics``) and W_u is the total
+schema-complexity weight for source u: (W_t, W_i, W_a) = (13054, 602, 343).
+
+Perfect Response Rate is aggregated over text + image only (audio omitted).
+
+Overall (Adj.) = Overall (Raw) * coverage, where coverage = n_eval / 5324.
+"""
+import argparse
+import json
+import sys
+from datetime import datetime, timezone
+from pathlib import Path
+
+REPO = Path(__file__).resolve().parent.parent
+EVAL_DIR = REPO / "data" / "evaluation"
+DISPLAY_NAMES_FILE = EVAL_DIR / "display_names.json"
+
+W_T, W_I, W_A = 13054, 602, 343
+N_T, N_I, N_A = 5000, 209, 115
+TOTAL_N = N_T + N_I + N_A # 5324
+
+MODALITIES = ("text", "image", "audio")
+
+METRIC_KEYS = [
+ ("leaf_value_em", "Val.Acc."),
+ ("value_token_f1", "Faithful."),
+ ("schema_compliance", "JSON Pass"),
+ ("hier_path_recall", "Path Rec."),
+ ("path_set_f1", "Str.Cov."),
+ ("type_precision", "Type Saf."),
+]
+
+
+def load_display_names() -> dict[str, str]:
+ """Load the dir_name -> display_name map; tolerate `_comment` entries."""
+ if not DISPLAY_NAMES_FILE.exists():
+ return {}
+ raw = json.loads(DISPLAY_NAMES_FILE.read_text())
+ return {k: v for k, v in raw.items() if not k.startswith("_")}
+
+
+def discover_model_dirs() -> list[str]:
+ """Return the sorted union of model directory names across all modalities.
+
+ A directory counts if any modality has ``/eval_summary.json``. Hidden
+ dirs and any leftover ``_audio_true`` ablation dirs are ignored as a guard.
+ """
+ seen: set[str] = set()
+ for modality in MODALITIES:
+ modality_dir = EVAL_DIR / modality
+ if not modality_dir.is_dir():
+ continue
+ for child in modality_dir.iterdir():
+ if not child.is_dir():
+ continue
+ if child.name.startswith(".") or child.name.endswith("_audio_true"):
+ continue
+ if (child / "eval_summary.json").exists():
+ seen.add(child.name)
+ return sorted(seen)
+
+
+def load_weighted_metrics(modality: str, model_dir: str):
+ """Return dict[metric_key -> mean] from overall_weighted, or None if missing."""
+ p = EVAL_DIR / modality / model_dir / "eval_summary.json"
+ if not p.exists():
+ return None
+ with open(p) as f:
+ s = json.load(f)
+ metrics = s["summary"]["overall_weighted"]["metrics"]
+ return {k: metrics[k]["mean"] for k in metrics}
+
+
+def aggregate(values_with_weights):
+ total_w = sum(w for _, w in values_with_weights)
+ total = sum(v * w for v, w in values_with_weights)
+ return total / total_w if total_w > 0 else 0.0
+
+
+def compute_row(model_dir: str):
+ text = load_weighted_metrics("text", model_dir)
+ image = load_weighted_metrics("image", model_dir)
+ audio = load_weighted_metrics("audio", model_dir)
+
+ n_eval = (N_T if text else 0) + (N_I if image else 0) + (N_A if audio else 0)
+ coverage = n_eval / TOTAL_N
+
+ aggregates = {}
+ for k, _ in METRIC_KEYS:
+ vals = []
+ if text is not None and k in text: vals.append((text[k], W_T))
+ if image is not None and k in image: vals.append((image[k], W_I))
+ if audio is not None and k in audio: vals.append((audio[k], W_A))
+ if vals:
+ aggregates[k] = aggregate(vals)
+
+ perf_vals = []
+ if text is not None and "strict_json_em" in text: perf_vals.append((text["strict_json_em"], W_T))
+ if image is not None and "strict_json_em" in image: perf_vals.append((image["strict_json_em"], W_I))
+ if perf_vals:
+ aggregates["perfect"] = aggregate(perf_vals)
+
+ keys = [k for k, _ in METRIC_KEYS] + ["perfect"]
+ avail = [aggregates[k] for k in keys if k in aggregates]
+ if not avail:
+ return None
+ raw = sum(avail) / len(avail)
+ overall_adj = raw * coverage
+
+ return {
+ "overall_adj": overall_adj,
+ "overall_raw": raw,
+ "coverage": coverage,
+ "modalities": [m for m, x in zip(MODALITIES, [text, image, audio]) if x],
+ "value_accuracy": aggregates.get("leaf_value_em"),
+ "faithfulness": aggregates.get("value_token_f1"),
+ "json_pass": aggregates.get("schema_compliance"),
+ "path_recall": aggregates.get("hier_path_recall"),
+ "structure_cov": aggregates.get("path_set_f1"),
+ "type_safety": aggregates.get("type_precision"),
+ "perfect": aggregates.get("perfect"),
+ }
+
+
+JSON_KEY_MAP = {
+ "overall_adj": "overall",
+ "json_pass": "json_pass_rate",
+ "structure_cov": "structure_coverage",
+ "perfect": "perfect_response",
+}
+JSON_ROW_KEYS = [
+ "model", "overall", "value_accuracy", "faithfulness",
+ "json_pass_rate", "path_recall", "structure_coverage",
+ "type_safety", "perfect_response", "modalities",
+]
+
+
+def _to_json_row(row: dict) -> dict:
+ out = {}
+ for k, v in row.items():
+ out[JSON_KEY_MAP.get(k, k)] = v
+ return {k: out.get(k) for k in JSON_ROW_KEYS}
+
+
+def main():
+ parser = argparse.ArgumentParser(description=__doc__)
+ parser.add_argument("--output", default="leaderboard.json",
+ help="Path to write the leaderboard JSON (default: leaderboard.json)")
+ parser.add_argument("--print-latex", action="store_true",
+ help="Also print LaTeX rows for paper Table 1.")
+ args = parser.parse_args()
+
+ display_names = load_display_names()
+ model_dirs = discover_model_dirs()
+ if not model_dirs:
+ print("error: no model directories with eval_summary.json found under data/evaluation/", file=sys.stderr)
+ sys.exit(1)
+
+ rows = []
+ skipped: list[str] = []
+ for model_dir in model_dirs:
+ row = compute_row(model_dir)
+ if row is None:
+ skipped.append(model_dir)
+ continue
+ row["model"] = display_names.get(model_dir, model_dir)
+ row["model_dir"] = model_dir
+ rows.append(row)
+
+ rows.sort(key=lambda r: -r["overall_adj"])
+
+ payload = {
+ "generated_at": datetime.now(timezone.utc).isoformat(timespec="seconds"),
+ "schema_version": 1,
+ "rows": [_to_json_row(r) for r in rows],
+ }
+ out_path = Path(args.output)
+ out_path.write_text(json.dumps(payload, indent=2) + "\n")
+ print(f"Wrote {len(rows)} rows -> {out_path}")
+ if skipped:
+ print(f" (skipped {len(skipped)} dirs with no metrics: {', '.join(skipped)})")
+ print()
+
+ cols = ["model", "overall_adj", "value_accuracy", "faithfulness",
+ "json_pass", "path_recall", "structure_cov", "type_safety", "perfect"]
+ widths = [25, 12, 10, 10, 10, 10, 10, 10, 10]
+ print("".join(f"{c:<{w}}" for c, w in zip(cols, widths)))
+ for r in rows:
+ cells = []
+ for c, w in zip(cols, widths):
+ v = r.get(c)
+ if isinstance(v, str):
+ cells.append(f"{v:<{w}}")
+ elif v is None:
+ cells.append(f"{'-':<{w}}")
+ else:
+ cells.append(f"{v:.4f}".ljust(w))
+ print("".join(cells))
+
+ if args.print_latex:
+ print("\n% --- LaTeX rows for Table 1 (overall_leaderboard) ---")
+ for r in rows:
+ perf = f"{r['perfect']:.3f}" if r["perfect"] is not None else "--"
+ print(
+ f"{r['model']:<22} & "
+ f"{r['overall_adj']:.3f} & {r['value_accuracy']:.3f} & "
+ f"{r['faithfulness']:.3f} & {r['json_pass']:.3f} & "
+ f"{r['path_recall']:.3f} & {r['structure_cov']:.3f} & "
+ f"{r['type_safety']:.3f} & {perf} \\\\"
+ )
+
+
+if __name__ == "__main__":
+ main()
diff --git a/sob/__init__.py b/sob/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/sob/common/__init__.py b/sob/common/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/sob/common/checkpoint.py b/sob/common/checkpoint.py
new file mode 100644
index 0000000..4cf7a34
--- /dev/null
+++ b/sob/common/checkpoint.py
@@ -0,0 +1,72 @@
+import json
+import os
+import time
+from pathlib import Path
+from typing import Any
+
+
+def checkpoint_path_for(model_id: str) -> Path:
+ """Modal-aware checkpoint path, timestamped to avoid cross-run collisions."""
+ model_slug = model_id.replace("/", "_")
+ stamp = time.strftime("%Y%m%d_%H%M%S")
+ if os.getenv("MODAL_TASK_ID"):
+ base = Path("/mnt/hf-cache/checkpoints")
+ else:
+ base = Path("data/checkpoints")
+ return base / f"results_{model_slug}_{stamp}.jsonl"
+
+
+class JsonlCheckpoint:
+ """Append-only JSONL checkpoint with periodic flush.
+
+ Used by the Anthropic provider for long runs where the full retry shape
+ makes mid-run failures expensive. Other providers use line-count-based
+ resumption instead.
+
+ Records are keyed by `metadata.record_id` in `load()` so a resume call
+ can skip already-completed records.
+ """
+
+ def __init__(self, path: Path, every: int = 50):
+ self.path = path
+ self.every = max(1, every)
+ self.path.parent.mkdir(parents=True, exist_ok=True)
+ self._file = None
+ self._since_flush = 0
+
+ def load(self) -> dict[str, dict]:
+ done: dict[str, dict] = {}
+ if not self.path.exists():
+ return done
+ with open(self.path, "r", encoding="utf-8") as f:
+ for line in f:
+ line = line.strip()
+ if not line:
+ continue
+ try:
+ rec = json.loads(line)
+ rid = rec.get("metadata", {}).get("record_id")
+ if rid is not None:
+ done[str(rid)] = rec
+ except json.JSONDecodeError:
+ pass # skip corrupt lines from a prior aborted run
+ return done
+
+ def __enter__(self):
+ self._file = open(self.path, "a", encoding="utf-8")
+ return self
+
+ def __exit__(self, exc_type, exc_val, exc_tb):
+ if self._file is not None:
+ self._file.flush()
+ self._file.close()
+ self._file = None
+
+ def append(self, result: dict[str, Any]) -> None:
+ if self._file is None:
+ raise RuntimeError("JsonlCheckpoint.append called outside `with` block")
+ self._file.write(json.dumps(result) + "\n")
+ self._since_flush += 1
+ if self._since_flush >= self.every:
+ self._file.flush()
+ self._since_flush = 0
diff --git a/sob/common/prompts.py b/sob/common/prompts.py
new file mode 100644
index 0000000..a9b9c2c
--- /dev/null
+++ b/sob/common/prompts.py
@@ -0,0 +1,34 @@
+import json
+from typing import Any
+
+from utils.utils import parse_string
+
+SYSTEM_PROMPT = (
+ "Answer the question using only the provided context. "
+ "Return a valid JSON object that strictly follows the given JSON schema. "
+ "Do not output anything except the JSON object.\n\n"
+ "Rules:\n"
+ "- No explanations: Do not include reasoning, analysis, or any sentences outside the JSON.\n"
+ "- No markdown: Do not wrap the JSON in code blocks or add formatting like ``` or 'json'.\n"
+ "- No extra text: Do not add prefixes or suffixes such as 'Answer:' or 'Output:'.\n"
+ "- Follow the schema exactly: Use only the keys defined in the schema and ensure correct data types.\n"
+ "- Include all required fields: Every field listed as required in the schema must appear in the JSON.\n"
+ "- If unknown, return null: If the context does not contain the answer, set the field value to null instead of guessing."
+)
+
+
+def build_user_message(record: dict[str, Any], schema: dict | None = None) -> str:
+ """Construct the user message used by every provider.
+
+ If `schema` is passed, it is used verbatim (e.g. the provider has already
+ sanitized it for its own constraints). Otherwise the record's json_schema
+ is parsed and used.
+ """
+ if schema is None:
+ schema = parse_string(record["json_schema"])
+ schema_str = json.dumps(schema, indent=2)
+ return (
+ f"Context:\n{record['context']}\n\n"
+ f"Question: {record['question']}\n\n"
+ f"Respond with JSON matching this schema:\n{schema_str}"
+ )
diff --git a/sob/common/schema_utils.py b/sob/common/schema_utils.py
new file mode 100644
index 0000000..ec97404
--- /dev/null
+++ b/sob/common/schema_utils.py
@@ -0,0 +1,106 @@
+import copy
+from typing import Any
+
+from utils.utils import parse_string, extract_json
+
+parse_if_string = parse_string
+__all__ = [
+ "parse_if_string",
+ "extract_json",
+ "normalize_schema_strict",
+ "sanitize_schema_for_gemini",
+ "ALLOWED_SCHEMA_KEYS",
+]
+
+ALLOWED_SCHEMA_KEYS = {
+ "type",
+ "properties",
+ "items",
+ "required",
+ "enum",
+ "description",
+ "nullable",
+ "format",
+ "minimum",
+ "maximum",
+ "minItems",
+ "maxItems",
+ "minLength",
+ "maxLength",
+}
+
+
+def normalize_schema_strict(schema: Any) -> Any:
+ """Enforce OpenAI strict-mode invariants on a schema.
+
+ - Every object's `required` = list of its property keys.
+ - Every object gets `additionalProperties = False`.
+ - Recurses into array items.
+
+ Returns a deepcopy; does NOT mutate the input.
+ """
+ out = copy.deepcopy(schema)
+ _normalize_inplace(out)
+ return out
+
+
+def _normalize_inplace(schema: Any) -> None:
+ if not isinstance(schema, dict):
+ return
+
+ if schema.get("type") == "object" and "properties" in schema:
+ props = schema["properties"]
+ if isinstance(props, dict):
+ schema["required"] = list(props.keys())
+ schema["additionalProperties"] = False
+ for v in props.values():
+ _normalize_inplace(v)
+
+ if schema.get("type") == "array" and "items" in schema:
+ _normalize_inplace(schema["items"])
+
+
+def sanitize_schema_for_gemini(schema: Any) -> Any:
+ """Strip unsupported keys + lowercase types for google-genai `response_schema`.
+
+ - Recursively keeps only ALLOWED_SCHEMA_KEYS.
+ - Lowercases `type` strings (Gemini rejects "STRING").
+ - Prunes `required` to only keys that survive filtering in `properties`.
+
+ Returns a deepcopy; does NOT mutate the input.
+ """
+ return _sanitize(copy.deepcopy(schema), is_properties=False)
+
+
+def _sanitize(schema: Any, is_properties: bool) -> Any:
+ if isinstance(schema, dict):
+ if is_properties:
+ # This dict is the "properties" map itself โ its keys are property
+ # names, not schema keywords, so don't filter them.
+ return {k: _sanitize(v, is_properties=False) for k, v in schema.items()}
+
+ out: dict[str, Any] = {}
+ for k, v in schema.items():
+ if k not in ALLOWED_SCHEMA_KEYS:
+ continue
+ if k == "type" and isinstance(v, str):
+ out[k] = v.lower()
+ elif k == "properties":
+ out[k] = _sanitize(v, is_properties=True)
+ else:
+ out[k] = _sanitize(v, is_properties=False)
+
+ if (
+ "required" in out
+ and "properties" in out
+ and isinstance(out["required"], list)
+ and isinstance(out["properties"], dict)
+ ):
+ out["required"] = [r for r in out["required"] if r in out["properties"]]
+
+ return out
+
+ if isinstance(schema, list):
+ return [_sanitize(x, is_properties=False) for x in schema]
+
+ return schema
diff --git a/sob/common/serialization.py b/sob/common/serialization.py
new file mode 100644
index 0000000..c13188c
--- /dev/null
+++ b/sob/common/serialization.py
@@ -0,0 +1,122 @@
+import json
+import os
+from pathlib import Path
+from typing import Any
+
+from utils.utils import parse_string
+
+# Top-level fields that are already consumed by the input/output/eval_info
+# sections of the eval record. Excluded from the metadata passthrough so we
+# don't duplicate them.
+KNOWN_FIELDS: set[str] = {
+ # input
+ "context",
+ "question",
+ "json_schema",
+ # output
+ "ground_truth",
+ "validated_output",
+ "candidate_response",
+ # eval_info / computed upstream
+ "input_context_length",
+ "difficulty_weight",
+}
+
+# Maps our modality enum to the on-disk directory name. Note the plural for
+# `image` โ existing response dir is `data/images_responses/`.
+MODALITY_DIR = {
+ "text": "text_responses",
+ "image": "images_responses",
+ "audio": "audio_responses",
+}
+
+
+def build_eval_record(
+ record: dict[str, Any],
+ candidate: Any,
+ model_id: str,
+ modality: str,
+ input_tokens: int,
+ output_tokens: int,
+ avg_time: float,
+ schema: dict | None = None,
+) -> dict[str, Any]:
+ """Build the nested {metadata, input, output, eval_info} record.
+
+ `metadata` carries `record_id`, `model_id`, a resolved `difficulty`, plus
+ a generic passthrough of any other top-level fields on the record that
+ aren't part of the input/output/eval_info sections. This preserves
+ modality-specific fields (`meeting_id`, `num_speakers`, `source_pdf`,
+ `source_category`, `source_id`, etc.) automatically.
+ """
+ difficulty = (
+ record.get("question_difficulty")
+ or record.get("schema_complexity")
+ or "unknown"
+ )
+
+ # Generic metadata passthrough: everything on the record that's not
+ # a known input/output field, not the record_id (handled below), and
+ # not the computed difficulty keys we explicitly resolve.
+ reserved = KNOWN_FIELDS | {"record_id", "question_difficulty"}
+ passthrough = {k: v for k, v in record.items() if k not in reserved}
+
+ metadata: dict[str, Any] = {
+ "record_id": record.get("record_id"),
+ "difficulty": difficulty,
+ "model_id": model_id,
+ }
+ metadata.update(passthrough)
+
+ resolved_schema = schema if schema is not None else parse_string(record.get("json_schema"))
+ ground_truth = record.get("ground_truth")
+ if ground_truth is not None:
+ ground_truth = parse_string(ground_truth)
+
+ return {
+ "metadata": metadata,
+ "input": {
+ "context": record.get("context", ""),
+ "question": record.get("question", ""),
+ "json_schema": resolved_schema,
+ },
+ "output": {
+ "candidate_response": candidate,
+ "ground_truth": ground_truth,
+ },
+ "eval_info": {
+ "input_tokens": input_tokens,
+ "output_tokens": output_tokens,
+ "avg_time_per_record": avg_time,
+ },
+ }
+
+
+def resolve_output_path(model_id: str, modality: str) -> Path:
+ """Pick the right response directory and filename for `(model, modality)`.
+
+ - Dir: `data/{text,images,audio}_responses/` locally,
+ `/mnt/hf-cache/{text,images,audio}_responses/` on Modal.
+ - Filename: `response_{model_slug}.jsonl` for text,
+ `response_{model_slug}_{modality}.jsonl` for image/audio.
+ """
+ if modality not in MODALITY_DIR:
+ raise ValueError(f"Unknown modality {modality!r}")
+
+ model_slug = model_id.replace("/", "_")
+ suffix = "" if modality == "text" else f"_{modality}"
+ filename = f"response_{model_slug}{suffix}.jsonl"
+
+ if os.getenv("MODAL_TASK_ID"):
+ base = Path("/mnt/hf-cache") / MODALITY_DIR[modality]
+ else:
+ base = Path("data") / MODALITY_DIR[modality]
+
+ return base / filename
+
+
+def write_jsonl(path: Path, records: list[dict]) -> None:
+ path.parent.mkdir(parents=True, exist_ok=True)
+ with open(path, "w", encoding="utf-8") as f:
+ for r in records:
+ f.write(json.dumps(r) + "\n")
diff --git a/sob/common/tokenize.py b/sob/common/tokenize.py
new file mode 100644
index 0000000..af485c5
--- /dev/null
+++ b/sob/common/tokenize.py
@@ -0,0 +1,46 @@
+from datasets import Dataset
+from transformers import AutoTokenizer
+
+from utils.logger import logger
+
+DIFFICULTY_WEIGHTS = {
+ "hard": 3,
+ "medium": 2,
+ "easy": 1,
+}
+
+
+def add_token_count(dataset: Dataset, tokenizer_name: str) -> Dataset:
+ """Add `input_context_length` column using the given HF tokenizer."""
+ tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
+ logger.info(f"Tokenizer '{tokenizer_name}' loaded for token counting.")
+
+ def _count(batch):
+ tokens = tokenizer(
+ batch["context"],
+ truncation=False,
+ add_special_tokens=False,
+ )
+ return {
+ "input_context_length": [len(ids) for ids in tokens["input_ids"]]
+ }
+
+ dataset = dataset.map(_count, batched=True, batch_size=64)
+ logger.info("Token counts added to dataset.")
+ return dataset
+
+
+def add_difficulty_weight(dataset: Dataset) -> Dataset:
+ """Add `difficulty_weight` using `question_difficulty` if present,
+ else falling back to `schema_complexity` (image/audio records)."""
+ def _weight(example):
+ difficulty = (
+ example.get("question_difficulty")
+ or example.get("schema_complexity")
+ or "easy"
+ )
+ return {"difficulty_weight": DIFFICULTY_WEIGHTS.get(difficulty, 1)}
+
+ dataset = dataset.map(_weight)
+ logger.info("Difficulty weights added to dataset.")
+ return dataset
diff --git a/sob/data_loader.py b/sob/data_loader.py
new file mode 100644
index 0000000..f3b7ab1
--- /dev/null
+++ b/sob/data_loader.py
@@ -0,0 +1,44 @@
+import os
+
+from datasets import Dataset, load_dataset
+
+from sob.common.tokenize import add_difficulty_weight, add_token_count
+from utils.config import InferenceConfig
+from utils.logger import logger
+
+# Maps each modality to its HF dataset config + split. Text records live in
+# the default config's "test" split; image and audio live under dedicated
+# configs with a single "train" split each.
+MODALITY_DATASET = {
+ "text": {"config_name": None, "split": "test"},
+ "image": {"config_name": "image", "split": "train"},
+ "audio": {"config_name": "audio", "split": "train"},
+}
+
+
+def load_data(config: InferenceConfig | None = None) -> Dataset:
+ config = config or InferenceConfig()
+ modality = config.modality
+ if modality not in MODALITY_DATASET:
+ raise ValueError(
+ f"Unknown modality {modality!r}. Expected one of {list(MODALITY_DATASET)}"
+ )
+
+ spec = MODALITY_DATASET[modality]
+ load_kwargs = {"split": spec["split"]}
+ if spec["config_name"]:
+ load_kwargs["name"] = spec["config_name"]
+ hf_token = os.getenv("HF_TOKEN") or os.getenv("hf_token")
+ if hf_token:
+ load_kwargs["token"] = hf_token
+
+ dataset = load_dataset("interfaze-ai/sob", **load_kwargs)
+ logger.info(
+ f"Dataset loaded for modality={modality!r} "
+ f"(config={spec['config_name']}, split={spec['split']})"
+ )
+ logger.info(f"Dataset size: {len(dataset)}")
+
+ dataset = add_token_count(dataset, config.sentence_transformer_model)
+ dataset = add_difficulty_weight(dataset)
+ return dataset
diff --git a/sob/providers/__init__.py b/sob/providers/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/sob/providers/anthropic_native.py b/sob/providers/anthropic_native.py
new file mode 100644
index 0000000..881c943
--- /dev/null
+++ b/sob/providers/anthropic_native.py
@@ -0,0 +1,169 @@
+import json
+import os
+import time
+from concurrent.futures import ThreadPoolExecutor, as_completed
+
+import anthropic
+from tqdm import tqdm
+
+from sob.common.checkpoint import JsonlCheckpoint, checkpoint_path_for
+from sob.common.prompts import SYSTEM_PROMPT, build_user_message
+from sob.common.schema_utils import extract_json, parse_if_string
+from sob.common.serialization import build_eval_record
+from utils.config import InferenceConfig
+from utils.logger import logger
+
+
+def _backoff_seconds(error: Exception, attempt: int) -> int:
+ """Rate-limit-aware exponential backoff.
+
+ - RateLimitError: more aggressive (2^(n+2) = 4, 8, 16, 32, ...)
+ - Other errors: gentler (2^(n+1) = 2, 4, 8, ...)
+ """
+ if isinstance(error, anthropic.RateLimitError):
+ return 2 ** (attempt + 2)
+ return 2 ** (attempt + 1)
+
+
+def _infer_one(
+ client: anthropic.Anthropic,
+ record: dict,
+ config: InferenceConfig,
+) -> tuple[dict, object | None, int, int]:
+ schema = parse_if_string(record.get("json_schema"))
+ user_msg = build_user_message(record, schema=schema)
+
+ candidate = None
+ input_tokens = output_tokens = 0
+
+ for attempt in range(config.max_retries):
+ try:
+ response = client.messages.create(
+ model=config.model_id,
+ max_tokens=config.max_tokens,
+ system=SYSTEM_PROMPT,
+ messages=[{"role": "user", "content": user_msg}],
+ temperature=config.temperature,
+ )
+ # Anthropic returns a list of content blocks; text lives on the
+ # first one (TextBlock) for plain JSON tasks.
+ raw = ""
+ for block in response.content:
+ if getattr(block, "type", None) == "text":
+ raw = block.text
+ break
+ if response.usage:
+ input_tokens = response.usage.input_tokens
+ output_tokens = response.usage.output_tokens
+
+ candidate = extract_json(raw)
+ break
+ except Exception as e:
+ if attempt < config.max_retries - 1:
+ wait = _backoff_seconds(e, attempt)
+ logger.warning(
+ f"Retry {attempt + 1}/{config.max_retries} for "
+ f"{str(record.get('record_id', ''))[:12]}... waiting {wait}s "
+ f"({type(e).__name__})"
+ )
+ time.sleep(wait)
+ else:
+ logger.error(f"Failed: {str(record.get('record_id', ''))[:12]}... ({e})")
+
+ return record, candidate, input_tokens, output_tokens
+
+
+def run(records: list[dict], config: InferenceConfig) -> list[tuple[dict, object, int, int, float]]:
+ """Run Anthropic native inference with a JsonlCheckpoint.
+
+ On resume, records already in the checkpoint are skipped. The checkpoint
+ file is timestamped; if you want to resume a specific prior run, pass its
+ path via `config.anthropic_checkpoint_path` (not yet implemented โ a
+ feature request flag).
+ """
+ client = anthropic.Anthropic(api_key=os.environ["ANTHROPIC_API_KEY"])
+ ckpt_path = checkpoint_path_for(config.model_id)
+ logger.info(
+ f"Anthropic provider ready: model={config.model_id} "
+ f"checkpoint={ckpt_path} every={config.anthropic_checkpoint_every}"
+ )
+
+ ckpt = JsonlCheckpoint(ckpt_path, every=config.anthropic_checkpoint_every)
+ done = ckpt.load()
+ if done:
+ logger.info(f"Loaded {len(done)} completed records from checkpoint.")
+
+ remaining_indexed = [
+ (i, r) for i, r in enumerate(records)
+ if str(r.get("record_id")) not in done
+ ]
+ logger.info(
+ f"{len(records)} total records; {len(remaining_indexed)} to run, "
+ f"{len(records) - len(remaining_indexed)} already done."
+ )
+
+ results: list = [None] * len(records)
+
+ # Fill in already-completed entries (re-project back to the tuple format).
+ for i, r in enumerate(records):
+ prev = done.get(str(r.get("record_id")))
+ if prev is None:
+ continue
+ out_meta = prev.get("output", {})
+ ei = prev.get("eval_info", {})
+ results[i] = (
+ r,
+ out_meta.get("candidate_response"),
+ ei.get("input_tokens", 0),
+ ei.get("output_tokens", 0),
+ )
+
+ n_valid = sum(1 for x in results if x is not None and isinstance(x[1], dict))
+ n_failed = 0
+ n_invalid = 0
+
+ start = time.time()
+ with ckpt:
+ with ThreadPoolExecutor(max_workers=config.max_workers) as ex:
+ futures = {
+ ex.submit(_infer_one, client, r, config): i
+ for i, r in remaining_indexed
+ }
+ pbar = tqdm(as_completed(futures), total=len(remaining_indexed), desc="Anthropic")
+ for fut in pbar:
+ idx = futures[fut]
+ record, candidate, in_tok, out_tok = fut.result()
+ results[idx] = (record, candidate, in_tok, out_tok)
+
+ if isinstance(candidate, dict):
+ n_valid += 1
+ elif candidate is None:
+ n_failed += 1
+ else:
+ n_invalid += 1
+
+ # Checkpoint the intermediate record. We use a zero avg_time
+ # here because per-record wall-clock isn't meaningful under
+ # ThreadPool contention; the caller patches in the true
+ # `avg_time` after the whole run finishes.
+ ckpt_record = build_eval_record(
+ record=record,
+ candidate=candidate,
+ model_id=config.model_id,
+ modality=config.modality,
+ input_tokens=in_tok,
+ output_tokens=out_tok,
+ avg_time=0.0,
+ )
+ ckpt.append(ckpt_record)
+ pbar.set_postfix(valid=n_valid, bad_json=n_invalid, failed=n_failed)
+
+ total_time = time.time() - start
+ avg_time = round(total_time / max(1, len(remaining_indexed)), 4) if remaining_indexed else 0.0
+ logger.info(
+ f"Anthropic done. {len(records)} records total "
+ f"(newly inferred: {len(remaining_indexed)} in {total_time:.1f}s, "
+ f"{avg_time}s/record). valid={n_valid} invalid={n_invalid} failed={n_failed}"
+ )
+
+ return [(r, c, i, o, avg_time) for (r, c, i, o) in results if r is not None]
diff --git a/sob/providers/gemini_native.py b/sob/providers/gemini_native.py
new file mode 100644
index 0000000..e82a9c5
--- /dev/null
+++ b/sob/providers/gemini_native.py
@@ -0,0 +1,141 @@
+"""Gemini provider.
+
+Reasoning / thinking notes
+--------------------------
+The default `thinking_budget=0` below fully disables thinking on **Gemini 2.5
+Flash** (the model used in the published leaderboard). It will NOT work for
+every other Gemini variant โ each enforces a different floor and a different
+parameter:
+
+ - 2.5 Flash โ thinking_budget = 0 (full disable supported)
+ - 2.5 Flash-Lite โ thinking_budget โฅ 512 (no full disable)
+ - 2.5 Pro โ thinking_budget โฅ 128 (no full disable)
+ - 3 Flash โ thinking_level = "minimal" (different param entirely)
+ - 3 Pro / 3.1 Pro โ thinking_level = "low" ("minimal" not exposed)
+
+If you run a model from any of the other rows above, edit the
+`ThinkingConfig` call below to match โ passing `thinking_budget=0` to a Pro or
+Flash-Lite model will error, and 2.5-style budgets are silently ignored on
+Gemini 3.
+"""
+import asyncio
+import os
+import time
+
+from google import genai
+from google.genai import types
+from tqdm.auto import tqdm
+
+from sob.common.prompts import SYSTEM_PROMPT, build_user_message
+from sob.common.schema_utils import (
+ extract_json,
+ parse_if_string,
+ sanitize_schema_for_gemini,
+)
+from utils.config import InferenceConfig
+from utils.logger import logger
+
+
+async def _infer_one(
+ aclient,
+ record: dict,
+ config: InferenceConfig,
+ sem: asyncio.Semaphore,
+) -> tuple[dict, object | None, dict, int, int]:
+ raw_schema = parse_if_string(record.get("json_schema"))
+ sanitized = sanitize_schema_for_gemini(raw_schema)
+
+ user_msg = build_user_message(record, schema=raw_schema)
+ config_kwargs = {
+ "system_instruction": SYSTEM_PROMPT,
+ "temperature": config.temperature,
+ "max_output_tokens": config.max_tokens,
+ "response_mime_type": "application/json",
+ }
+ if config.disable_thinking:
+ config_kwargs["thinking_config"] = types.ThinkingConfig(thinking_budget=0)
+ if config.use_structured_decoding:
+ config_kwargs["response_schema"] = sanitized
+
+ input_tokens = output_tokens = 0
+ candidate = None
+ async with sem:
+ response = None
+ raw = ""
+ try:
+ response = await aclient.models.generate_content(
+ model=config.model_id,
+ contents=user_msg,
+ config=types.GenerateContentConfig(**config_kwargs),
+ )
+ raw = response.text or ""
+ except Exception as e:
+ logger.error(f"Failed: {str(record.get('record_id', ''))[:12]}... ({e})")
+ raw = ""
+
+ if response is not None:
+ usage = getattr(response, "usage_metadata", None)
+ if usage is not None:
+ input_tokens = getattr(usage, "prompt_token_count", 0) or 0
+ output_tokens = getattr(usage, "candidates_token_count", 0) or 0
+
+ candidate = extract_json(raw) if raw else None
+ effective_schema = sanitized if config.use_structured_decoding else raw_schema
+ return record, candidate, effective_schema, input_tokens, output_tokens
+
+
+async def _run_async(records: list[dict], config: InferenceConfig):
+ client = genai.Client(api_key=os.environ["GEMINI_API_KEY"])
+ aclient = client.aio
+ sem = asyncio.Semaphore(config.gemini_max_concurrency)
+
+ logger.info(
+ f"Gemini native provider ready: model={config.model_id} "
+ f"concurrency={config.gemini_max_concurrency} "
+ f"structured={config.use_structured_decoding}"
+ )
+
+ tasks = [_infer_one(aclient, r, config, sem) for r in records]
+ results_by_idx: list = [None] * len(records)
+ order = {id(t): i for i, t in enumerate(tasks)}
+
+ start = time.time()
+ pbar = tqdm(total=len(tasks), desc="Gemini")
+ for fut in asyncio.as_completed(tasks):
+ rec, candidate, sanitized, in_tok, out_tok = await fut
+ # We don't know the original index from `as_completed`, so match by
+ # record_id; cheaper than per-task wrapping.
+ rid = rec.get("record_id")
+ for i, r in enumerate(records):
+ if r.get("record_id") == rid and results_by_idx[i] is None:
+ results_by_idx[i] = (rec, candidate, sanitized, in_tok, out_tok)
+ break
+ pbar.update(1)
+ pbar.close()
+
+ total_time = time.time() - start
+ avg_time = round(total_time / max(1, len(records)), 4)
+ logger.info(
+ f"Gemini done. {len(records)} records in {total_time:.1f}s "
+ f"({avg_time}s/record)."
+ )
+
+ out = []
+ for entry in results_by_idx:
+ if entry is None:
+ continue
+ rec, candidate, sanitized, in_tok, out_tok = entry
+ rec = dict(rec)
+ rec["json_schema"] = sanitized
+ out.append((rec, candidate, in_tok, out_tok, avg_time))
+ return out
+
+
+def run(records: list[dict], config: InferenceConfig) -> list[tuple[dict, object, int, int, float]]:
+ """Run Gemini inference (wraps async internals in a sync call).
+
+ Note: if called from within an existing event loop (e.g. a Jupyter
+ notebook without nest_asyncio), this will fail. Use `asyncio.run`
+ directly from a notebook instead.
+ """
+ return asyncio.run(_run_async(records, config))
diff --git a/sob/providers/openai_native.py b/sob/providers/openai_native.py
new file mode 100644
index 0000000..2b69fa3
--- /dev/null
+++ b/sob/providers/openai_native.py
@@ -0,0 +1,158 @@
+"""OpenAI provider.
+
+Reasoning / thinking notes
+--------------------------
+The chat completions endpoint accepts different knobs for different families,
+and passing the wrong one returns 400. Current behaviour matches the gates
+below:
+
+ - GPT-5 / GPT-5-Mini / GPT-5-Nano โ reasoning_effort = "minimal".
+ Temperature is NOT accepted; we omit it.
+ - o-series (o1, o3, o4-mini, ...) โ reasoning_effort = "minimal".
+ Most also refuse temperature; current code does not pass it for them
+ either (the `not startswith("gpt-5")` gate sends temperature, which o1/o3
+ will reject โ patch the gate if you add an o-series model).
+ - GPT-4.1 / GPT-4o / GPT-4o-mini โ standard chat. Reasoning effort is not
+ exposed and would error if passed; temperature is required.
+
+If you add a new model from any family, double-check both gates before
+running โ silent acceptance of a 400 wastes the whole sweep.
+"""
+import json
+import os
+import time
+from concurrent.futures import ThreadPoolExecutor, as_completed
+
+from openai import OpenAI
+from tqdm import tqdm
+
+from sob.common.prompts import SYSTEM_PROMPT, build_user_message
+from sob.common.schema_utils import parse_if_string, normalize_schema_strict
+from utils.config import InferenceConfig
+from utils.logger import logger
+
+
+def _infer_one(
+ client: OpenAI,
+ record: dict,
+ config: InferenceConfig,
+) -> tuple[dict, object | None, dict, int, int]:
+ schema = parse_if_string(record.get("json_schema"))
+ strict_schema = normalize_schema_strict(schema)
+
+ messages = [
+ {"role": "system", "content": SYSTEM_PROMPT},
+ {"role": "user", "content": build_user_message(record, schema=schema)},
+ ]
+
+ request_kwargs: dict = dict(
+ model=config.model_id,
+ messages=messages,
+ max_completion_tokens=config.max_tokens,
+ response_format={"type": "json_object"},
+ )
+ if config.use_structured_decoding:
+ request_kwargs["response_format"] = {
+ "type": "json_schema",
+ "json_schema": {
+ "name": "answer",
+ "schema": strict_schema,
+ "strict": True,
+ },
+ }
+
+ # gpt-5 family doesn't accept temperature; other models do.
+ if not config.model_id.startswith("gpt-5"):
+ request_kwargs["temperature"] = config.temperature
+
+ # Reasoning models (gpt-5 family, o-series): minimum reasoning effort.
+ # Chat completions accepts reasoning_effort for these; non-reasoning
+ # models would error on this param, so we gate it.
+ if config.disable_thinking and (
+ config.model_id.startswith("gpt-5") or config.model_id.startswith("o")
+ ):
+ request_kwargs["reasoning_effort"] = "minimal"
+
+ candidate = None
+ input_tokens = output_tokens = 0
+
+ for attempt in range(config.max_retries):
+ try:
+ response = client.chat.completions.create(**request_kwargs)
+ raw = response.choices[0].message.content or ""
+ if response.usage:
+ input_tokens = response.usage.prompt_tokens
+ output_tokens = response.usage.completion_tokens
+ try:
+ candidate = json.loads(raw)
+ except json.JSONDecodeError:
+ candidate = raw
+ break
+ except Exception as e:
+ if attempt < config.max_retries - 1:
+ wait = 2 ** (attempt + 1)
+ logger.warning(
+ f"Retry {attempt + 1}/{config.max_retries} for "
+ f"{str(record.get('record_id', ''))[:12]}... waiting {wait}s"
+ )
+ time.sleep(wait)
+ else:
+ logger.error(f"Failed: {str(record.get('record_id', ''))[:12]}... ({e})")
+
+ # Record the effective schema used for evaluation: strict schema only when
+ # it was sent to OpenAI's schema-constrained mode.
+ effective_schema = strict_schema if config.use_structured_decoding else schema
+
+ return record, candidate, effective_schema, input_tokens, output_tokens
+
+
+def run(records: list[dict], config: InferenceConfig) -> list[tuple[dict, object, int, int, float]]:
+ client = OpenAI(
+ api_key=os.environ["OPENAI_API_KEY"],
+ base_url="https://api.openai.com/v1",
+ )
+ logger.info(f"OpenAI native provider ready: model={config.model_id}")
+
+ results: list = [None] * len(records)
+ n_valid = n_failed = n_invalid = 0
+ # We keep the schema-per-record inside the tuple so serialization records
+ # the effective schema for the selected output mode.
+ records_with_schema: list = [None] * len(records)
+
+ start = time.time()
+ with ThreadPoolExecutor(max_workers=config.max_workers) as ex:
+ futures = {
+ ex.submit(_infer_one, client, r, config): i
+ for i, r in enumerate(records)
+ }
+ pbar = tqdm(as_completed(futures), total=len(records), desc="OpenAI")
+ for fut in pbar:
+ idx = futures[fut]
+ record, candidate, effective_schema, in_tok, out_tok = fut.result()
+ results[idx] = (record, candidate, in_tok, out_tok)
+ records_with_schema[idx] = (record, effective_schema)
+ if isinstance(candidate, dict):
+ n_valid += 1
+ elif candidate is None:
+ n_failed += 1
+ else:
+ n_invalid += 1
+ pbar.set_postfix(valid=n_valid, bad_json=n_invalid, failed=n_failed)
+
+ total_time = time.time() - start
+ avg_time = round(total_time / max(1, len(records)), 4)
+ logger.info(
+ f"OpenAI done. {len(records)} records in {total_time:.1f}s "
+ f"({avg_time}s/record). valid={n_valid} invalid={n_invalid} failed={n_failed}"
+ )
+
+ # Merge the effective schema into the record so downstream serialization
+ # evaluates against the same contract used by this run.
+ out = []
+ for (r, c, i, o), (_, s) in zip(results, records_with_schema):
+ if r is None:
+ continue
+ r = dict(r)
+ r["json_schema"] = s
+ out.append((r, c, i, o, avg_time))
+ return out
diff --git a/sob/providers/openrouter.py b/sob/providers/openrouter.py
new file mode 100644
index 0000000..18d3ffd
--- /dev/null
+++ b/sob/providers/openrouter.py
@@ -0,0 +1,118 @@
+import json
+import os
+import time
+from concurrent.futures import ThreadPoolExecutor, as_completed
+
+from openai import OpenAI
+from tqdm import tqdm
+
+from sob.common.prompts import SYSTEM_PROMPT, build_user_message
+from sob.common.schema_utils import parse_if_string
+from utils.config import InferenceConfig
+from utils.logger import logger
+
+
+def _infer_one(
+ client: OpenAI,
+ record: dict,
+ config: InferenceConfig,
+) -> tuple[dict, object | None, int, int]:
+ schema = parse_if_string(record.get("json_schema"))
+ messages = [
+ {"role": "system", "content": SYSTEM_PROMPT},
+ {"role": "user", "content": build_user_message(record, schema=schema)},
+ ]
+
+ extra_body: dict = {}
+ if config.openrouter_extra_body:
+ extra_body.update(config.openrouter_extra_body)
+ # OpenRouter routes the per-provider reasoning toggle for thinking-capable
+ # models (gpt-5 reasoning_effort, qwen3 enable_thinking, etc.). User-provided
+ # extra_body wins via the order above.
+ if config.disable_thinking:
+ extra_body.setdefault("reasoning", {"effort": "None", "exclude": True})
+
+ candidate = None
+ input_tokens = 0
+ output_tokens = 0
+
+ for attempt in range(config.max_retries):
+ try:
+ kwargs = dict(
+ model=config.model_id,
+ messages=messages,
+ max_tokens=config.max_tokens,
+ temperature=config.temperature,
+ response_format={"type": "json_object"},
+ )
+ if extra_body:
+ kwargs["extra_body"] = extra_body
+
+ response = client.chat.completions.create(**kwargs)
+ raw = response.choices[0].message.content
+ if response.usage:
+ input_tokens = response.usage.prompt_tokens
+ output_tokens = response.usage.completion_tokens
+
+ try:
+ candidate = json.loads(raw)
+ except json.JSONDecodeError:
+ candidate = raw
+ break
+ except Exception as e:
+ if attempt < config.max_retries - 1:
+ wait = 2 ** (attempt + 1)
+ logger.warning(
+ f"Retry {attempt + 1}/{config.max_retries} for "
+ f"{str(record.get('record_id', ''))[:12]}... waiting {wait}s"
+ )
+ time.sleep(wait)
+ else:
+ logger.error(f"Failed: {str(record.get('record_id', ''))[:12]}... ({e})")
+
+ return record, candidate, input_tokens, output_tokens
+
+
+def run(records: list[dict], config: InferenceConfig) -> list[tuple[dict, object, int, int, float]]:
+ """Run OpenRouter inference over `records`.
+
+ Returns a list of tuples: (record, candidate_response, input_tokens,
+ output_tokens, avg_time_per_record). The driver passes each tuple into
+ `build_eval_record` for final serialization.
+ """
+ client = OpenAI(
+ base_url="https://openrouter.ai/api/v1",
+ api_key=os.environ["OPENROUTER_API_KEY"],
+ )
+ logger.info(f"OpenRouter provider ready: model={config.model_id}")
+
+ results: list = [None] * len(records)
+ n_valid = n_failed = n_invalid_json = 0
+
+ start = time.time()
+ with ThreadPoolExecutor(max_workers=config.max_workers) as ex:
+ futures = {
+ ex.submit(_infer_one, client, r, config): i
+ for i, r in enumerate(records)
+ }
+ pbar = tqdm(as_completed(futures), total=len(records), desc="OpenRouter")
+ for fut in pbar:
+ idx = futures[fut]
+ record, candidate, in_tok, out_tok = fut.result()
+ results[idx] = (record, candidate, in_tok, out_tok)
+ if isinstance(candidate, dict):
+ n_valid += 1
+ elif candidate is None:
+ n_failed += 1
+ else:
+ n_invalid_json += 1
+ pbar.set_postfix(valid=n_valid, bad_json=n_invalid_json, failed=n_failed)
+
+ total_time = time.time() - start
+ avg_time = round(total_time / max(1, len(records)), 4)
+ logger.info(
+ f"OpenRouter done. {len(records)} records in {total_time:.1f}s "
+ f"({avg_time}s/record). valid={n_valid} invalid={n_invalid_json} failed={n_failed}"
+ )
+
+ return [(r, c, i, o, avg_time) for (r, c, i, o) in results if r is not None]
diff --git a/sob/providers/vllm.py b/sob/providers/vllm.py
new file mode 100644
index 0000000..b86c7b2
--- /dev/null
+++ b/sob/providers/vllm.py
@@ -0,0 +1,93 @@
+import json
+import time
+
+from tqdm import tqdm
+from vllm import LLM, SamplingParams
+from vllm.sampling_params import StructuredOutputsParams
+
+from sob.common.prompts import SYSTEM_PROMPT
+from sob.common.schema_utils import parse_if_string, extract_json
+from utils.config import InferenceConfig
+from utils.logger import logger
+
+
+def run(records: list[dict], config: InferenceConfig) -> list[tuple[dict, object, int, int, float]]:
+ """Run local vLLM inference over `records`."""
+ llm = LLM(
+ model=config.model_id,
+ tensor_parallel_size=config.tensor_parallel_size,
+ max_model_len=config.max_model_len,
+ trust_remote_code=True,
+ )
+ logger.info(
+ f"vLLM provider ready: model={config.model_id} "
+ f"tp={config.tensor_parallel_size} structured={config.use_structured_decoding}"
+ )
+
+ tokenizer = llm.get_tokenizer()
+ prompts: list[str] = []
+ params_list: list = []
+ schemas: list[dict] = []
+
+ for record in tqdm(records, desc="Building prompts"):
+ schema = parse_if_string(record["json_schema"])
+ schemas.append(schema)
+ schema_str = json.dumps(schema, indent=2)
+ user_msg = (
+ f"Context:\n{record['context']}\n\n"
+ f"Question: {record['question']}\n\n"
+ f"Respond with JSON matching this schema:\n{schema_str}\n\n"
+ f"Return ONLY the JSON object.\n\n"
+ )
+ messages = [
+ {"role": "system", "content": SYSTEM_PROMPT},
+ {"role": "user", "content": user_msg},
+ ]
+
+ template_kwargs = {"tokenize": False, "add_generation_prompt": True}
+ if config.disable_thinking:
+ template_kwargs["enable_thinking"] = False
+ prompts.append(tokenizer.apply_chat_template(messages, **template_kwargs))
+
+ sp_kwargs = {"max_tokens": config.max_tokens, "temperature": config.temperature}
+ if config.use_structured_decoding:
+ sp_kwargs["structured_outputs"] = StructuredOutputsParams(json=schema)
+ params_list.append(SamplingParams(**sp_kwargs))
+
+ logger.info(f"Built {len(prompts)} prompts. Running batch inference...")
+ start = time.time()
+ outputs = llm.generate(prompts, sampling_params=params_list)
+ total_time = time.time() - start
+ avg_time = round(total_time / max(1, len(records)), 4)
+
+ results: list[tuple[dict, object, int, int, float]] = []
+ n_valid = n_invalid = 0
+ total_input = total_output = 0
+
+ for record, output, schema in zip(records, outputs, schemas):
+ raw = output.outputs[0].text
+ if config.use_structured_decoding:
+ try:
+ candidate = json.loads(raw)
+ except json.JSONDecodeError:
+ candidate = raw
+ else:
+ candidate = extract_json(raw)
+
+ input_toks = len(output.prompt_token_ids)
+ output_toks = len(output.outputs[0].token_ids)
+ total_input += input_toks
+ total_output += output_toks
+
+ if isinstance(candidate, dict):
+ n_valid += 1
+ else:
+ n_invalid += 1
+
+ results.append((record, candidate, input_toks, output_toks, avg_time))
+
+ logger.info(
+ f"vLLM done. {len(records)} records in {total_time:.1f}s ({avg_time}s/record). "
+ f"valid={n_valid} invalid={n_invalid} input_tokens={total_input:,} output_tokens={total_output:,}"
+ )
+ return results
diff --git a/sob/run.py b/sob/run.py
new file mode 100644
index 0000000..a7a834b
--- /dev/null
+++ b/sob/run.py
@@ -0,0 +1,109 @@
+"""Unified inference entry point.
+
+Usage
+-----
+ python -m sob.run --provider openrouter --modality text \
+ --model-id google/gemma-4-31b-it --sample-size 5
+
+Each provider module exposes a `run(records, config)` callable that returns
+a list of (record, candidate, input_tokens, output_tokens, avg_time) tuples.
+This driver dispatches on `config.provider`, then serializes each tuple
+through `build_eval_record` and writes to the right response directory via
+`resolve_output_path`.
+"""
+
+import argparse
+from importlib import import_module
+
+from sob.common.serialization import (
+ build_eval_record,
+ resolve_output_path,
+ write_jsonl,
+)
+from sob.data_loader import load_data
+from utils.config import InferenceConfig
+from utils.logger import logger
+
+PROVIDER_REGISTRY = {
+ "openrouter": "sob.providers.openrouter",
+ "vllm": "sob.providers.vllm",
+ "openai": "sob.providers.openai_native",
+ "anthropic": "sob.providers.anthropic_native",
+ "gemini": "sob.providers.gemini_native",
+}
+
+
+def _provider_runner(name: str):
+ if name not in PROVIDER_REGISTRY:
+ raise ValueError(f"Unknown provider {name!r}")
+ return import_module(PROVIDER_REGISTRY[name]).run
+
+
+def parse_args() -> argparse.Namespace:
+ p = argparse.ArgumentParser(description="Run model inference on the SOB benchmark.")
+ p.add_argument("--provider", default=None, help="openrouter|vllm|openai|anthropic|gemini")
+ p.add_argument("--modality", default=None, help="text|image|audio")
+ p.add_argument("--model-id", default=None)
+ p.add_argument("--sample-size", type=int, default=None)
+ p.add_argument("--temperature", type=float, default=None)
+ p.add_argument("--max-tokens", type=int, default=None)
+ p.add_argument("--use-structured-decoding", action="store_true")
+ return p.parse_args()
+
+
+def _override_config(config: InferenceConfig, args: argparse.Namespace) -> InferenceConfig:
+ if args.provider:
+ config.provider = args.provider
+ if args.modality:
+ config.modality = args.modality
+ if args.model_id:
+ config.model_id = args.model_id
+ if args.sample_size is not None:
+ config.sample_size = args.sample_size
+ if args.temperature is not None:
+ config.temperature = args.temperature
+ if args.max_tokens is not None:
+ config.max_tokens = args.max_tokens
+ if args.use_structured_decoding:
+ config.use_structured_decoding = True
+ return config
+
+
+def main() -> None:
+ args = parse_args()
+ config = _override_config(InferenceConfig(), args)
+
+ logger.info(
+ f"Run: provider={config.provider} modality={config.modality} "
+ f"model_id={config.model_id} sample_size={config.sample_size}"
+ )
+
+ dataset = load_data(config)
+ records = list(dataset)
+ if config.sample_size is not None:
+ records = records[: config.sample_size]
+ logger.info(f"Sliced to first {config.sample_size} records.")
+
+ runner = _provider_runner(config.provider)
+ tuples = runner(records, config)
+
+ eval_records = [
+ build_eval_record(
+ record=r,
+ candidate=c,
+ model_id=config.model_id,
+ modality=config.modality,
+ input_tokens=i,
+ output_tokens=o,
+ avg_time=t,
+ )
+ for (r, c, i, o, t) in tuples
+ ]
+
+ output_path = resolve_output_path(config.model_id, config.modality)
+ write_jsonl(output_path, eval_records)
+ logger.info(f"Saved {len(eval_records)} records to {output_path.resolve()}")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/utils/__init__.py b/utils/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/utils/config.py b/utils/config.py
new file mode 100644
index 0000000..d9f7b27
--- /dev/null
+++ b/utils/config.py
@@ -0,0 +1,23 @@
+from dataclasses import dataclass
+
+
+@dataclass
+class InferenceConfig:
+ sentence_transformer_model: str = "all-MiniLM-L6-v2"
+ model_id: str = "openai/gpt-oss-20b" # Change to the model you want to run inference with
+ modality: str = "text" # "text" | "image" | "audio"
+ provider: str = "openrouter" # "openrouter" | "vllm" | "openai" | "anthropic" | "gemini"
+ sample_size: int | None = None # Set to None to run on the full dataset
+ use_structured_decoding: bool = False
+ disable_thinking: bool = True
+ tensor_parallel_size: int = 1 # Adjust based on your GPU setup -> 1 for CPU or single GPU, >1 for multiple GPUs
+ max_model_len: int = 8192
+ max_tokens: int = 2048
+ temperature: float = 0.0
+ max_workers: int = 20
+ max_retries: int = 5
+ # Provider-specific knobs
+ openai_reasoning_effort: str | None = None # reserved for future Responses API support
+ anthropic_checkpoint_every: int = 50
+ gemini_max_concurrency: int = 20
+ openrouter_extra_body: dict | None = None # e.g. {"provider": {"only": [...]}, "chat_template_kwargs": {...}}
diff --git a/utils/logger.py b/utils/logger.py
new file mode 100644
index 0000000..d9f3ff6
--- /dev/null
+++ b/utils/logger.py
@@ -0,0 +1,22 @@
+import sys
+import logging
+from datetime import datetime
+from pathlib import Path
+
+LOG_FILE = f"{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.log"
+
+log_dir = Path.cwd() / "logs"
+log_dir.mkdir(parents=True, exist_ok=True)
+
+LOG_FILE_PATH = log_dir / LOG_FILE
+
+logging.basicConfig(
+ level=logging.INFO,
+ format="[ %(asctime)s ] %(lineno)d %(name)s - %(levelname)s - %(message)s",
+ handlers=[
+ logging.FileHandler(LOG_FILE_PATH),
+ logging.StreamHandler(sys.stdout)
+ ]
+)
+
+logger = logging.getLogger("sob")
\ No newline at end of file
diff --git a/utils/utils.py b/utils/utils.py
new file mode 100644
index 0000000..29acc30
--- /dev/null
+++ b/utils/utils.py
@@ -0,0 +1,22 @@
+import re
+import json
+
+def parse_string(val):
+ if isinstance(val, str):
+ return json.loads(val)
+ return val
+
+def extract_json(text):
+ try:
+ return json.loads(text)
+ except Exception:
+ pass
+
+ match = re.search(r"\{.*\}", text, re.DOTALL)
+ if match:
+ try:
+ return json.loads(match.group())
+ except Exception:
+ pass
+
+ return text
\ No newline at end of file
diff --git a/uv.lock b/uv.lock
new file mode 100644
index 0000000..22da3ac
--- /dev/null
+++ b/uv.lock
@@ -0,0 +1,3281 @@
+version = 1
+revision = 3
+requires-python = "==3.12.*"
+resolution-markers = [
+ "sys_platform == 'win32'",
+ "sys_platform == 'emscripten'",
+ "sys_platform != 'emscripten' and sys_platform != 'win32'",
+]
+
+[[package]]
+name = "aiohappyeyeballs"
+version = "2.6.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/26/30/f84a107a9c4331c14b2b586036f40965c128aa4fee4dda5d3d51cb14ad54/aiohappyeyeballs-2.6.1.tar.gz", hash = "sha256:c3f9d0113123803ccadfdf3f0faa505bc78e6a72d1cc4806cbd719826e943558", size = 22760, upload-time = "2025-03-12T01:42:48.764Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/0f/15/5bf3b99495fb160b63f95972b81750f18f7f4e02ad051373b669d17d44f2/aiohappyeyeballs-2.6.1-py3-none-any.whl", hash = "sha256:f349ba8f4b75cb25c99c5c2d84e997e485204d2902a9597802b0371f09331fb8", size = 15265, upload-time = "2025-03-12T01:42:47.083Z" },
+]
+
+[[package]]
+name = "aiohttp"
+version = "3.13.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "aiohappyeyeballs" },
+ { name = "aiosignal" },
+ { name = "attrs" },
+ { name = "frozenlist" },
+ { name = "multidict" },
+ { name = "propcache" },
+ { name = "yarl" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/50/42/32cf8e7704ceb4481406eb87161349abb46a57fee3f008ba9cb610968646/aiohttp-3.13.3.tar.gz", hash = "sha256:a949eee43d3782f2daae4f4a2819b2cb9b0c5d3b7f7a927067cc84dafdbb9f88", size = 7844556, upload-time = "2026-01-03T17:33:05.204Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/a0/be/4fc11f202955a69e0db803a12a062b8379c970c7c84f4882b6da17337cc1/aiohttp-3.13.3-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:b903a4dfee7d347e2d87697d0713be59e0b87925be030c9178c5faa58ea58d5c", size = 739732, upload-time = "2026-01-03T17:30:14.23Z" },
+ { url = "https://files.pythonhosted.org/packages/97/2c/621d5b851f94fa0bb7430d6089b3aa970a9d9b75196bc93bb624b0db237a/aiohttp-3.13.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:a45530014d7a1e09f4a55f4f43097ba0fd155089372e105e4bff4ca76cb1b168", size = 494293, upload-time = "2026-01-03T17:30:15.96Z" },
+ { url = "https://files.pythonhosted.org/packages/5d/43/4be01406b78e1be8320bb8316dc9c42dbab553d281c40364e0f862d5661c/aiohttp-3.13.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:27234ef6d85c914f9efeb77ff616dbf4ad2380be0cda40b4db086ffc7ddd1b7d", size = 493533, upload-time = "2026-01-03T17:30:17.431Z" },
+ { url = "https://files.pythonhosted.org/packages/8d/a8/5a35dc56a06a2c90d4742cbf35294396907027f80eea696637945a106f25/aiohttp-3.13.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d32764c6c9aafb7fb55366a224756387cd50bfa720f32b88e0e6fa45b27dcf29", size = 1737839, upload-time = "2026-01-03T17:30:19.422Z" },
+ { url = "https://files.pythonhosted.org/packages/bf/62/4b9eeb331da56530bf2e198a297e5303e1c1ebdceeb00fe9b568a65c5a0c/aiohttp-3.13.3-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:b1a6102b4d3ebc07dad44fbf07b45bb600300f15b552ddf1851b5390202ea2e3", size = 1703932, upload-time = "2026-01-03T17:30:21.756Z" },
+ { url = "https://files.pythonhosted.org/packages/7c/f6/af16887b5d419e6a367095994c0b1332d154f647e7dc2bd50e61876e8e3d/aiohttp-3.13.3-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c014c7ea7fb775dd015b2d3137378b7be0249a448a1612268b5a90c2d81de04d", size = 1771906, upload-time = "2026-01-03T17:30:23.932Z" },
+ { url = "https://files.pythonhosted.org/packages/ce/83/397c634b1bcc24292fa1e0c7822800f9f6569e32934bdeef09dae7992dfb/aiohttp-3.13.3-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:2b8d8ddba8f95ba17582226f80e2de99c7a7948e66490ef8d947e272a93e9463", size = 1871020, upload-time = "2026-01-03T17:30:26Z" },
+ { url = "https://files.pythonhosted.org/packages/86/f6/a62cbbf13f0ac80a70f71b1672feba90fdb21fd7abd8dbf25c0105fb6fa3/aiohttp-3.13.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9ae8dd55c8e6c4257eae3a20fd2c8f41edaea5992ed67156642493b8daf3cecc", size = 1755181, upload-time = "2026-01-03T17:30:27.554Z" },
+ { url = "https://files.pythonhosted.org/packages/0a/87/20a35ad487efdd3fba93d5843efdfaa62d2f1479eaafa7453398a44faf13/aiohttp-3.13.3-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:01ad2529d4b5035578f5081606a465f3b814c542882804e2e8cda61adf5c71bf", size = 1561794, upload-time = "2026-01-03T17:30:29.254Z" },
+ { url = "https://files.pythonhosted.org/packages/de/95/8fd69a66682012f6716e1bc09ef8a1a2a91922c5725cb904689f112309c4/aiohttp-3.13.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:bb4f7475e359992b580559e008c598091c45b5088f28614e855e42d39c2f1033", size = 1697900, upload-time = "2026-01-03T17:30:31.033Z" },
+ { url = "https://files.pythonhosted.org/packages/e5/66/7b94b3b5ba70e955ff597672dad1691333080e37f50280178967aff68657/aiohttp-3.13.3-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:c19b90316ad3b24c69cd78d5c9b4f3aa4497643685901185b65166293d36a00f", size = 1728239, upload-time = "2026-01-03T17:30:32.703Z" },
+ { url = "https://files.pythonhosted.org/packages/47/71/6f72f77f9f7d74719692ab65a2a0252584bf8d5f301e2ecb4c0da734530a/aiohttp-3.13.3-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:96d604498a7c782cb15a51c406acaea70d8c027ee6b90c569baa6e7b93073679", size = 1740527, upload-time = "2026-01-03T17:30:34.695Z" },
+ { url = "https://files.pythonhosted.org/packages/fa/b4/75ec16cbbd5c01bdaf4a05b19e103e78d7ce1ef7c80867eb0ace42ff4488/aiohttp-3.13.3-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:084911a532763e9d3dd95adf78a78f4096cd5f58cdc18e6fdbc1b58417a45423", size = 1554489, upload-time = "2026-01-03T17:30:36.864Z" },
+ { url = "https://files.pythonhosted.org/packages/52/8f/bc518c0eea29f8406dcf7ed1f96c9b48e3bc3995a96159b3fc11f9e08321/aiohttp-3.13.3-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:7a4a94eb787e606d0a09404b9c38c113d3b099d508021faa615d70a0131907ce", size = 1767852, upload-time = "2026-01-03T17:30:39.433Z" },
+ { url = "https://files.pythonhosted.org/packages/9d/f2/a07a75173124f31f11ea6f863dc44e6f09afe2bca45dd4e64979490deab1/aiohttp-3.13.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:87797e645d9d8e222e04160ee32aa06bc5c163e8499f24db719e7852ec23093a", size = 1722379, upload-time = "2026-01-03T17:30:41.081Z" },
+ { url = "https://files.pythonhosted.org/packages/3c/4a/1a3fee7c21350cac78e5c5cef711bac1b94feca07399f3d406972e2d8fcd/aiohttp-3.13.3-cp312-cp312-win32.whl", hash = "sha256:b04be762396457bef43f3597c991e192ee7da460a4953d7e647ee4b1c28e7046", size = 428253, upload-time = "2026-01-03T17:30:42.644Z" },
+ { url = "https://files.pythonhosted.org/packages/d9/b7/76175c7cb4eb73d91ad63c34e29fc4f77c9386bba4a65b53ba8e05ee3c39/aiohttp-3.13.3-cp312-cp312-win_amd64.whl", hash = "sha256:e3531d63d3bdfa7e3ac5e9b27b2dd7ec9df3206a98e0b3445fa906f233264c57", size = 455407, upload-time = "2026-01-03T17:30:44.195Z" },
+]
+
+[[package]]
+name = "aiosignal"
+version = "1.4.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "frozenlist" },
+ { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/61/62/06741b579156360248d1ec624842ad0edf697050bbaf7c3e46394e106ad1/aiosignal-1.4.0.tar.gz", hash = "sha256:f47eecd9468083c2029cc99945502cb7708b082c232f9aca65da147157b251c7", size = 25007, upload-time = "2025-07-03T22:54:43.528Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/fb/76/641ae371508676492379f16e2fa48f4e2c11741bd63c48be4b12a6b09cba/aiosignal-1.4.0-py3-none-any.whl", hash = "sha256:053243f8b92b990551949e63930a839ff0cf0b0ebbe0597b0f3fb19e1a0fe82e", size = 7490, upload-time = "2025-07-03T22:54:42.156Z" },
+]
+
+[[package]]
+name = "annotated-doc"
+version = "0.0.4"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/57/ba/046ceea27344560984e26a590f90bc7f4a75b06701f653222458922b558c/annotated_doc-0.0.4.tar.gz", hash = "sha256:fbcda96e87e9c92ad167c2e53839e57503ecfda18804ea28102353485033faa4", size = 7288, upload-time = "2025-11-10T22:07:42.062Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/1e/d3/26bf1008eb3d2daa8ef4cacc7f3bfdc11818d111f7e2d0201bc6e3b49d45/annotated_doc-0.0.4-py3-none-any.whl", hash = "sha256:571ac1dc6991c450b25a9c2d84a3705e2ae7a53467b5d111c24fa8baabbed320", size = 5303, upload-time = "2025-11-10T22:07:40.673Z" },
+]
+
+[[package]]
+name = "annotated-types"
+version = "0.7.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/ee/67/531ea369ba64dcff5ec9c3402f9f51bf748cec26dde048a2f973a4eea7f5/annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89", size = 16081, upload-time = "2024-05-20T21:33:25.928Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643, upload-time = "2024-05-20T21:33:24.1Z" },
+]
+
+[[package]]
+name = "anthropic"
+version = "0.84.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "anyio" },
+ { name = "distro" },
+ { name = "docstring-parser" },
+ { name = "httpx" },
+ { name = "jiter" },
+ { name = "pydantic" },
+ { name = "sniffio" },
+ { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/04/ea/0869d6df9ef83dcf393aeefc12dd81677d091c6ffc86f783e51cf44062f2/anthropic-0.84.0.tar.gz", hash = "sha256:72f5f90e5aebe62dca316cb013629cfa24996b0f5a4593b8c3d712bc03c43c37", size = 539457, upload-time = "2026-02-25T05:22:38.54Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/64/ca/218fa25002a332c0aa149ba18ffc0543175998b1f65de63f6d106689a345/anthropic-0.84.0-py3-none-any.whl", hash = "sha256:861c4c50f91ca45f942e091d83b60530ad6d4f98733bfe648065364da05d29e7", size = 455156, upload-time = "2026-02-25T05:22:40.468Z" },
+]
+
+[[package]]
+name = "anyio"
+version = "4.12.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "idna" },
+ { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/96/f0/5eb65b2bb0d09ac6776f2eb54adee6abe8228ea05b20a5ad0e4945de8aac/anyio-4.12.1.tar.gz", hash = "sha256:41cfcc3a4c85d3f05c932da7c26d0201ac36f72abd4435ba90d0464a3ffed703", size = 228685, upload-time = "2026-01-06T11:45:21.246Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/38/0e/27be9fdef66e72d64c0cdc3cc2823101b80585f8119b5c112c2e8f5f7dab/anyio-4.12.1-py3-none-any.whl", hash = "sha256:d405828884fc140aa80a3c667b8beed277f1dfedec42ba031bd6ac3db606ab6c", size = 113592, upload-time = "2026-01-06T11:45:19.497Z" },
+]
+
+[[package]]
+name = "apache-tvm-ffi"
+version = "0.1.9"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/6f/60/1e787a0b5ebf318483235be2a689ee367173983067e441b8379564f667c0/apache_tvm_ffi-0.1.9.tar.gz", hash = "sha256:d2d402587e8906de0a07f4746aa78f3d452c7efe3625d4bb39ac2ad693bce530", size = 2513731, upload-time = "2026-02-27T19:28:06.602Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/df/f2/b8c4b151169f6d7ba8773c8af68b2e0c1013d7fb3f1bdf87573f47157ce9/apache_tvm_ffi-0.1.9-cp312-abi3-macosx_11_0_arm64.whl", hash = "sha256:49e52350b0470654847de752e65603b604a4d3323e7e9f5e8a982f44acc4c143", size = 2041756, upload-time = "2026-02-27T19:27:23.931Z" },
+ { url = "https://files.pythonhosted.org/packages/a7/c0/6d3d54f50012255b41bc3e24944c086f63c4707c8686c7c6780e9283eb96/apache_tvm_ffi-0.1.9-cp312-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:7d503029e66c43b1a1cb1a42a1e9bb428c8a28dcbdec31c28e705472ca648a3a", size = 2203712, upload-time = "2026-02-27T19:27:25.867Z" },
+ { url = "https://files.pythonhosted.org/packages/c6/dd/2bab4c6cd86257dbf99e93452a1af833113f8dc3e25a25579f6e4e4c8a94/apache_tvm_ffi-0.1.9-cp312-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:28241371934ea8af10d5067087ba1229ebddded7b2c02d33a258ec2a96df8c46", size = 2299704, upload-time = "2026-02-27T19:27:27.477Z" },
+ { url = "https://files.pythonhosted.org/packages/7a/4a/b469bcb2e1014cb84d336d2a59f42958a058251c577a4c2680cacad346e2/apache_tvm_ffi-0.1.9-cp312-abi3-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:87cacce81df55685fc6a76e1e3c5db1200e85e87bf5974b692c59d131b7bc622", size = 2130865, upload-time = "2026-02-27T19:27:29.092Z" },
+ { url = "https://files.pythonhosted.org/packages/70/ef/5402da5d37f5270fd88ea0348acca78dba9be8bdbf6c2bcae0935eb03ef1/apache_tvm_ffi-0.1.9-cp312-abi3-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f45eb43499acac45ff6c93564f0ff2d3ca27b69656d540fd56ce59d51c0b4c65", size = 2278991, upload-time = "2026-02-27T19:27:30.729Z" },
+ { url = "https://files.pythonhosted.org/packages/b5/23/1b7dc5f0807f83098183a57db6ee85b2c93b646d74a6e03781c9208aaeb0/apache_tvm_ffi-0.1.9-cp312-abi3-win_amd64.whl", hash = "sha256:d1dcf4c041d5ec05e3da1d545800c33cdbb95c113baa7705085ff79fa262752b", size = 1973200, upload-time = "2026-02-27T19:27:32.367Z" },
+]
+
+[[package]]
+name = "astor"
+version = "0.8.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/5a/21/75b771132fee241dfe601d39ade629548a9626d1d39f333fde31bc46febe/astor-0.8.1.tar.gz", hash = "sha256:6a6effda93f4e1ce9f618779b2dd1d9d84f1e32812c23a29b3fff6fd7f63fa5e", size = 35090, upload-time = "2019-12-10T01:50:35.51Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/c3/88/97eef84f48fa04fbd6750e62dcceafba6c63c81b7ac1420856c8dcc0a3f9/astor-0.8.1-py2.py3-none-any.whl", hash = "sha256:070a54e890cefb5b3739d19f30f5a5ec840ffc9c50ffa7d23cc9fc1a38ebbfc5", size = 27488, upload-time = "2019-12-10T01:50:33.628Z" },
+]
+
+[[package]]
+name = "attrs"
+version = "25.4.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/6b/5c/685e6633917e101e5dcb62b9dd76946cbb57c26e133bae9e0cd36033c0a9/attrs-25.4.0.tar.gz", hash = "sha256:16d5969b87f0859ef33a48b35d55ac1be6e42ae49d5e853b597db70c35c57e11", size = 934251, upload-time = "2025-10-06T13:54:44.725Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/3a/2a/7cc015f5b9f5db42b7d48157e23356022889fc354a2813c15934b7cb5c0e/attrs-25.4.0-py3-none-any.whl", hash = "sha256:adcf7e2a1fb3b36ac48d97835bb6d8ade15b8dcce26aba8bf1d14847b57a3373", size = 67615, upload-time = "2025-10-06T13:54:43.17Z" },
+]
+
+[[package]]
+name = "blake3"
+version = "1.0.8"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/75/aa/abcd75e9600987a0bc6cfe9b6b2ff3f0e2cb08c170addc6e76035b5c4cb3/blake3-1.0.8.tar.gz", hash = "sha256:513cc7f0f5a7c035812604c2c852a0c1468311345573de647e310aca4ab165ba", size = 117308, upload-time = "2025-10-14T06:47:48.83Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/ed/a0/b7b6dff04012cfd6e665c09ee446f749bd8ea161b00f730fe1bdecd0f033/blake3-1.0.8-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:d8da4233984d51471bd4e4366feda1d90d781e712e0a504ea54b1f2b3577557b", size = 347983, upload-time = "2025-10-14T06:45:47.214Z" },
+ { url = "https://files.pythonhosted.org/packages/5b/a2/264091cac31d7ae913f1f296abc20b8da578b958ffb86100a7ce80e8bf5c/blake3-1.0.8-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1257be19f2d381c868a34cc822fc7f12f817ddc49681b6d1a2790bfbda1a9865", size = 325415, upload-time = "2025-10-14T06:45:48.482Z" },
+ { url = "https://files.pythonhosted.org/packages/ee/7d/85a4c0782f613de23d114a7a78fcce270f75b193b3ff3493a0de24ba104a/blake3-1.0.8-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:269f255b110840e52b6ce9db02217e39660ebad3e34ddd5bca8b8d378a77e4e1", size = 371296, upload-time = "2025-10-14T06:45:49.674Z" },
+ { url = "https://files.pythonhosted.org/packages/e3/20/488475254976ed93fab57c67aa80d3b40df77f7d9db6528c9274bff53e08/blake3-1.0.8-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:66ca28a673025c40db3eba21a9cac52f559f83637efa675b3f6bd8683f0415f3", size = 374516, upload-time = "2025-10-14T06:45:51.23Z" },
+ { url = "https://files.pythonhosted.org/packages/7b/21/2a1c47fedb77fb396512677ec6d46caf42ac6e9a897db77edd0a2a46f7bb/blake3-1.0.8-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bcb04966537777af56c1f399b35525aa70a1225816e121ff95071c33c0f7abca", size = 447911, upload-time = "2025-10-14T06:45:52.637Z" },
+ { url = "https://files.pythonhosted.org/packages/cb/7d/db0626df16029713e7e61b67314c4835e85c296d82bd907c21c6ea271da2/blake3-1.0.8-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e5b5da177d62cc4b7edf0cea08fe4dec960c9ac27f916131efa890a01f747b93", size = 505420, upload-time = "2025-10-14T06:45:54.445Z" },
+ { url = "https://files.pythonhosted.org/packages/5b/55/6e737850c2d58a6d9de8a76dad2ae0f75b852a23eb4ecb07a0b165e6e436/blake3-1.0.8-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:38209b10482c97e151681ea3e91cc7141f56adbbf4820a7d701a923124b41e6a", size = 394189, upload-time = "2025-10-14T06:45:55.719Z" },
+ { url = "https://files.pythonhosted.org/packages/5b/94/eafaa5cdddadc0c9c603a6a6d8339433475e1a9f60c8bb9c2eed2d8736b6/blake3-1.0.8-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:504d1399b7fb91dfe5c25722d2807990493185faa1917456455480c36867adb5", size = 388001, upload-time = "2025-10-14T06:45:57.067Z" },
+ { url = "https://files.pythonhosted.org/packages/17/81/735fa00d13de7f68b25e1b9cb36ff08c6f165e688d85d8ec2cbfcdedccc5/blake3-1.0.8-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:c84af132aa09abeadf9a0118c8fb26f4528f3f42c10ef8be0fcf31c478774ec4", size = 550302, upload-time = "2025-10-14T06:45:58.657Z" },
+ { url = "https://files.pythonhosted.org/packages/0e/c6/d1fe8bdea4a6088bd54b5a58bc40aed89a4e784cd796af7722a06f74bae7/blake3-1.0.8-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:a25db3d36b55f5ed6a86470155cc749fc9c5b91c949b8d14f48658f9d960d9ec", size = 554211, upload-time = "2025-10-14T06:46:00.269Z" },
+ { url = "https://files.pythonhosted.org/packages/55/d1/ca74aa450cbe10e396e061f26f7a043891ffa1485537d6b30d3757e20995/blake3-1.0.8-cp312-cp312-win32.whl", hash = "sha256:e0fee93d5adcd44378b008c147e84f181f23715307a64f7b3db432394bbfce8b", size = 228343, upload-time = "2025-10-14T06:46:01.533Z" },
+ { url = "https://files.pythonhosted.org/packages/4d/42/bbd02647169e3fbed27558555653ac2578c6f17ccacf7d1956c58ef1d214/blake3-1.0.8-cp312-cp312-win_amd64.whl", hash = "sha256:6a6eafc29e4f478d365a87d2f25782a521870c8514bb43734ac85ae9be71caf7", size = 215704, upload-time = "2025-10-14T06:46:02.79Z" },
+]
+
+[[package]]
+name = "cachetools"
+version = "7.0.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/48/5c/3b882b82e9af737906539a2eafb62f96a229f1fa80255bede0c7b554cbc4/cachetools-7.0.3.tar.gz", hash = "sha256:8c246313b95849964e54a909c03b327a87ab0428b068fac10da7b105ca275ef6", size = 37187, upload-time = "2026-03-05T21:00:57.918Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/05/4a/573185481c50a8841331f54ddae44e4a3469c46aa0b397731c53a004369a/cachetools-7.0.3-py3-none-any.whl", hash = "sha256:c128ffca156eef344c25fcd08a96a5952803786fa33097f5f2d49edf76f79d53", size = 13907, upload-time = "2026-03-05T21:00:56.486Z" },
+]
+
+[[package]]
+name = "cbor2"
+version = "5.8.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/d9/8e/8b4fdde28e42ffcd741a37f4ffa9fb59cd4fe01625b544dfcfd9ccb54f01/cbor2-5.8.0.tar.gz", hash = "sha256:b19c35fcae9688ac01ef75bad5db27300c2537eb4ee00ed07e05d8456a0d4931", size = 107825, upload-time = "2025-12-30T18:44:22.455Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/2f/4f/3a16e3e8fd7e5fd86751a4f1aad218a8d19a96e75ec3989c3e95a8fe1d8f/cbor2-5.8.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4b3f91fa699a5ce22470e973601c62dd9d55dc3ca20ee446516ac075fcab27c9", size = 70270, upload-time = "2025-12-30T18:43:46.005Z" },
+ { url = "https://files.pythonhosted.org/packages/38/81/0d0cf0796fe8081492a61c45278f03def21a929535a492dd97c8438f5dbe/cbor2-5.8.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:518c118a5e00001854adb51f3164e647aa99b6a9877d2a733a28cb5c0a4d6857", size = 286242, upload-time = "2025-12-30T18:43:47.026Z" },
+ { url = "https://files.pythonhosted.org/packages/7b/a9/fdab6c10190cfb8d639e01f2b168f2406fc847a2a6bc00e7de78c3381d0a/cbor2-5.8.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:cff2a1999e49cd51c23d1b6786a012127fd8f722c5946e82bd7ab3eb307443f3", size = 285412, upload-time = "2025-12-30T18:43:48.563Z" },
+ { url = "https://files.pythonhosted.org/packages/31/59/746a8e630996217a3afd523f583fcf7e3d16640d63f9a03f0f4e4f74b5b1/cbor2-5.8.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:4c4492160212374973cdc14e46f0565f2462721ef922b40f7ea11e7d613dfb2a", size = 278041, upload-time = "2025-12-30T18:43:49.92Z" },
+ { url = "https://files.pythonhosted.org/packages/0f/a3/f3bbeb6dedd45c6e0cddd627ea790dea295eaf82c83f0e2159b733365ebd/cbor2-5.8.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:546c7c7c4c6bcdc54a59242e0e82cea8f332b17b4465ae628718fef1fce401ca", size = 278185, upload-time = "2025-12-30T18:43:51.192Z" },
+ { url = "https://files.pythonhosted.org/packages/67/e5/9013d6b857ceb6cdb2851ffb5a887f53f2bab934a528c9d6fa73d9989d84/cbor2-5.8.0-cp312-cp312-win_amd64.whl", hash = "sha256:074f0fa7535dd7fdee247c2c99f679d94f3aa058ccb1ccf4126cc72d6d89cbae", size = 69817, upload-time = "2025-12-30T18:43:52.352Z" },
+ { url = "https://files.pythonhosted.org/packages/a8/ab/7aa94ba3d44ecbc3a97bdb2fb6a8298063fe2e0b611e539a6fe41e36da20/cbor2-5.8.0-cp312-cp312-win_arm64.whl", hash = "sha256:f95fed480b2a0d843f294d2a1ef4cc0f6a83c7922927f9f558e1f5a8dc54b7ca", size = 64923, upload-time = "2025-12-30T18:43:53.719Z" },
+ { url = "https://files.pythonhosted.org/packages/d6/4f/101071f880b4da05771128c0b89f41e334cff044dee05fb013c8f4be661c/cbor2-5.8.0-py3-none-any.whl", hash = "sha256:3727d80f539567b03a7aa11890e57798c67092c38df9e6c23abb059e0f65069c", size = 24374, upload-time = "2025-12-30T18:44:21.476Z" },
+]
+
+[[package]]
+name = "certifi"
+version = "2026.1.4"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/e0/2d/a891ca51311197f6ad14a7ef42e2399f36cf2f9bd44752b3dc4eab60fdc5/certifi-2026.1.4.tar.gz", hash = "sha256:ac726dd470482006e014ad384921ed6438c457018f4b3d204aea4281258b2120", size = 154268, upload-time = "2026-01-04T02:42:41.825Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/e6/ad/3cc14f097111b4de0040c83a525973216457bbeeb63739ef1ed275c1c021/certifi-2026.1.4-py3-none-any.whl", hash = "sha256:9943707519e4add1115f44c2bc244f782c0249876bf51b6599fee1ffbedd685c", size = 152900, upload-time = "2026-01-04T02:42:40.15Z" },
+]
+
+[[package]]
+name = "cffi"
+version = "2.0.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "pycparser", marker = "implementation_name != 'PyPy'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/eb/56/b1ba7935a17738ae8453301356628e8147c79dbb825bcbc73dc7401f9846/cffi-2.0.0.tar.gz", hash = "sha256:44d1b5909021139fe36001ae048dbdde8214afa20200eda0f64c068cac5d5529", size = 523588, upload-time = "2025-09-08T23:24:04.541Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/ea/47/4f61023ea636104d4f16ab488e268b93008c3d0bb76893b1b31db1f96802/cffi-2.0.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6d02d6655b0e54f54c4ef0b94eb6be0607b70853c45ce98bd278dc7de718be5d", size = 185271, upload-time = "2025-09-08T23:22:44.795Z" },
+ { url = "https://files.pythonhosted.org/packages/df/a2/781b623f57358e360d62cdd7a8c681f074a71d445418a776eef0aadb4ab4/cffi-2.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8eca2a813c1cb7ad4fb74d368c2ffbbb4789d377ee5bb8df98373c2cc0dee76c", size = 181048, upload-time = "2025-09-08T23:22:45.938Z" },
+ { url = "https://files.pythonhosted.org/packages/ff/df/a4f0fbd47331ceeba3d37c2e51e9dfc9722498becbeec2bd8bc856c9538a/cffi-2.0.0-cp312-cp312-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:21d1152871b019407d8ac3985f6775c079416c282e431a4da6afe7aefd2bccbe", size = 212529, upload-time = "2025-09-08T23:22:47.349Z" },
+ { url = "https://files.pythonhosted.org/packages/d5/72/12b5f8d3865bf0f87cf1404d8c374e7487dcf097a1c91c436e72e6badd83/cffi-2.0.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:b21e08af67b8a103c71a250401c78d5e0893beff75e28c53c98f4de42f774062", size = 220097, upload-time = "2025-09-08T23:22:48.677Z" },
+ { url = "https://files.pythonhosted.org/packages/c2/95/7a135d52a50dfa7c882ab0ac17e8dc11cec9d55d2c18dda414c051c5e69e/cffi-2.0.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:1e3a615586f05fc4065a8b22b8152f0c1b00cdbc60596d187c2a74f9e3036e4e", size = 207983, upload-time = "2025-09-08T23:22:50.06Z" },
+ { url = "https://files.pythonhosted.org/packages/3a/c8/15cb9ada8895957ea171c62dc78ff3e99159ee7adb13c0123c001a2546c1/cffi-2.0.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:81afed14892743bbe14dacb9e36d9e0e504cd204e0b165062c488942b9718037", size = 206519, upload-time = "2025-09-08T23:22:51.364Z" },
+ { url = "https://files.pythonhosted.org/packages/78/2d/7fa73dfa841b5ac06c7b8855cfc18622132e365f5b81d02230333ff26e9e/cffi-2.0.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:3e17ed538242334bf70832644a32a7aae3d83b57567f9fd60a26257e992b79ba", size = 219572, upload-time = "2025-09-08T23:22:52.902Z" },
+ { url = "https://files.pythonhosted.org/packages/07/e0/267e57e387b4ca276b90f0434ff88b2c2241ad72b16d31836adddfd6031b/cffi-2.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3925dd22fa2b7699ed2617149842d2e6adde22b262fcbfada50e3d195e4b3a94", size = 222963, upload-time = "2025-09-08T23:22:54.518Z" },
+ { url = "https://files.pythonhosted.org/packages/b6/75/1f2747525e06f53efbd878f4d03bac5b859cbc11c633d0fb81432d98a795/cffi-2.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:2c8f814d84194c9ea681642fd164267891702542f028a15fc97d4674b6206187", size = 221361, upload-time = "2025-09-08T23:22:55.867Z" },
+ { url = "https://files.pythonhosted.org/packages/7b/2b/2b6435f76bfeb6bbf055596976da087377ede68df465419d192acf00c437/cffi-2.0.0-cp312-cp312-win32.whl", hash = "sha256:da902562c3e9c550df360bfa53c035b2f241fed6d9aef119048073680ace4a18", size = 172932, upload-time = "2025-09-08T23:22:57.188Z" },
+ { url = "https://files.pythonhosted.org/packages/f8/ed/13bd4418627013bec4ed6e54283b1959cf6db888048c7cf4b4c3b5b36002/cffi-2.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:da68248800ad6320861f129cd9c1bf96ca849a2771a59e0344e88681905916f5", size = 183557, upload-time = "2025-09-08T23:22:58.351Z" },
+ { url = "https://files.pythonhosted.org/packages/95/31/9f7f93ad2f8eff1dbc1c3656d7ca5bfd8fb52c9d786b4dcf19b2d02217fa/cffi-2.0.0-cp312-cp312-win_arm64.whl", hash = "sha256:4671d9dd5ec934cb9a73e7ee9676f9362aba54f7f34910956b84d727b0d73fb6", size = 177762, upload-time = "2025-09-08T23:22:59.668Z" },
+]
+
+[[package]]
+name = "charset-normalizer"
+version = "3.4.4"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/13/69/33ddede1939fdd074bce5434295f38fae7136463422fe4fd3e0e89b98062/charset_normalizer-3.4.4.tar.gz", hash = "sha256:94537985111c35f28720e43603b8e7b43a6ecfb2ce1d3058bbe955b73404e21a", size = 129418, upload-time = "2025-10-14T04:42:32.879Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/f3/85/1637cd4af66fa687396e757dec650f28025f2a2f5a5531a3208dc0ec43f2/charset_normalizer-3.4.4-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:0a98e6759f854bd25a58a73fa88833fba3b7c491169f86ce1180c948ab3fd394", size = 208425, upload-time = "2025-10-14T04:40:53.353Z" },
+ { url = "https://files.pythonhosted.org/packages/9d/6a/04130023fef2a0d9c62d0bae2649b69f7b7d8d24ea5536feef50551029df/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b5b290ccc2a263e8d185130284f8501e3e36c5e02750fc6b6bdeb2e9e96f1e25", size = 148162, upload-time = "2025-10-14T04:40:54.558Z" },
+ { url = "https://files.pythonhosted.org/packages/78/29/62328d79aa60da22c9e0b9a66539feae06ca0f5a4171ac4f7dc285b83688/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:74bb723680f9f7a6234dcf67aea57e708ec1fbdf5699fb91dfd6f511b0a320ef", size = 144558, upload-time = "2025-10-14T04:40:55.677Z" },
+ { url = "https://files.pythonhosted.org/packages/86/bb/b32194a4bf15b88403537c2e120b817c61cd4ecffa9b6876e941c3ee38fe/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f1e34719c6ed0b92f418c7c780480b26b5d9c50349e9a9af7d76bf757530350d", size = 161497, upload-time = "2025-10-14T04:40:57.217Z" },
+ { url = "https://files.pythonhosted.org/packages/19/89/a54c82b253d5b9b111dc74aca196ba5ccfcca8242d0fb64146d4d3183ff1/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:2437418e20515acec67d86e12bf70056a33abdacb5cb1655042f6538d6b085a8", size = 159240, upload-time = "2025-10-14T04:40:58.358Z" },
+ { url = "https://files.pythonhosted.org/packages/c0/10/d20b513afe03acc89ec33948320a5544d31f21b05368436d580dec4e234d/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:11d694519d7f29d6cd09f6ac70028dba10f92f6cdd059096db198c283794ac86", size = 153471, upload-time = "2025-10-14T04:40:59.468Z" },
+ { url = "https://files.pythonhosted.org/packages/61/fa/fbf177b55bdd727010f9c0a3c49eefa1d10f960e5f09d1d887bf93c2e698/charset_normalizer-3.4.4-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:ac1c4a689edcc530fc9d9aa11f5774b9e2f33f9a0c6a57864e90908f5208d30a", size = 150864, upload-time = "2025-10-14T04:41:00.623Z" },
+ { url = "https://files.pythonhosted.org/packages/05/12/9fbc6a4d39c0198adeebbde20b619790e9236557ca59fc40e0e3cebe6f40/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:21d142cc6c0ec30d2efee5068ca36c128a30b0f2c53c1c07bd78cb6bc1d3be5f", size = 150647, upload-time = "2025-10-14T04:41:01.754Z" },
+ { url = "https://files.pythonhosted.org/packages/ad/1f/6a9a593d52e3e8c5d2b167daf8c6b968808efb57ef4c210acb907c365bc4/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:5dbe56a36425d26d6cfb40ce79c314a2e4dd6211d51d6d2191c00bed34f354cc", size = 145110, upload-time = "2025-10-14T04:41:03.231Z" },
+ { url = "https://files.pythonhosted.org/packages/30/42/9a52c609e72471b0fc54386dc63c3781a387bb4fe61c20231a4ebcd58bdd/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:5bfbb1b9acf3334612667b61bd3002196fe2a1eb4dd74d247e0f2a4d50ec9bbf", size = 162839, upload-time = "2025-10-14T04:41:04.715Z" },
+ { url = "https://files.pythonhosted.org/packages/c4/5b/c0682bbf9f11597073052628ddd38344a3d673fda35a36773f7d19344b23/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:d055ec1e26e441f6187acf818b73564e6e6282709e9bcb5b63f5b23068356a15", size = 150667, upload-time = "2025-10-14T04:41:05.827Z" },
+ { url = "https://files.pythonhosted.org/packages/e4/24/a41afeab6f990cf2daf6cb8c67419b63b48cf518e4f56022230840c9bfb2/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:af2d8c67d8e573d6de5bc30cdb27e9b95e49115cd9baad5ddbd1a6207aaa82a9", size = 160535, upload-time = "2025-10-14T04:41:06.938Z" },
+ { url = "https://files.pythonhosted.org/packages/2a/e5/6a4ce77ed243c4a50a1fecca6aaaab419628c818a49434be428fe24c9957/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:780236ac706e66881f3b7f2f32dfe90507a09e67d1d454c762cf642e6e1586e0", size = 154816, upload-time = "2025-10-14T04:41:08.101Z" },
+ { url = "https://files.pythonhosted.org/packages/a8/ef/89297262b8092b312d29cdb2517cb1237e51db8ecef2e9af5edbe7b683b1/charset_normalizer-3.4.4-cp312-cp312-win32.whl", hash = "sha256:5833d2c39d8896e4e19b689ffc198f08ea58116bee26dea51e362ecc7cd3ed26", size = 99694, upload-time = "2025-10-14T04:41:09.23Z" },
+ { url = "https://files.pythonhosted.org/packages/3d/2d/1e5ed9dd3b3803994c155cd9aacb60c82c331bad84daf75bcb9c91b3295e/charset_normalizer-3.4.4-cp312-cp312-win_amd64.whl", hash = "sha256:a79cfe37875f822425b89a82333404539ae63dbdddf97f84dcbc3d339aae9525", size = 107131, upload-time = "2025-10-14T04:41:10.467Z" },
+ { url = "https://files.pythonhosted.org/packages/d0/d9/0ed4c7098a861482a7b6a95603edce4c0d9db2311af23da1fb2b75ec26fc/charset_normalizer-3.4.4-cp312-cp312-win_arm64.whl", hash = "sha256:376bec83a63b8021bb5c8ea75e21c4ccb86e7e45ca4eb81146091b56599b80c3", size = 100390, upload-time = "2025-10-14T04:41:11.915Z" },
+ { url = "https://files.pythonhosted.org/packages/0a/4c/925909008ed5a988ccbb72dcc897407e5d6d3bd72410d69e051fc0c14647/charset_normalizer-3.4.4-py3-none-any.whl", hash = "sha256:7a32c560861a02ff789ad905a2fe94e3f840803362c84fecf1851cb4cf3dc37f", size = 53402, upload-time = "2025-10-14T04:42:31.76Z" },
+]
+
+[[package]]
+name = "click"
+version = "8.3.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "colorama", marker = "sys_platform == 'win32'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/3d/fa/656b739db8587d7b5dfa22e22ed02566950fbfbcdc20311993483657a5c0/click-8.3.1.tar.gz", hash = "sha256:12ff4785d337a1bb490bb7e9c2b1ee5da3112e94a8622f26a6c77f5d2fc6842a", size = 295065, upload-time = "2025-11-15T20:45:42.706Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/98/78/01c019cdb5d6498122777c1a43056ebb3ebfeef2076d9d026bfe15583b2b/click-8.3.1-py3-none-any.whl", hash = "sha256:981153a64e25f12d547d3426c367a4857371575ee7ad18df2a6183ab0545b2a6", size = 108274, upload-time = "2025-11-15T20:45:41.139Z" },
+]
+
+[[package]]
+name = "cloudpickle"
+version = "3.1.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/27/fb/576f067976d320f5f0114a8d9fa1215425441bb35627b1993e5afd8111e5/cloudpickle-3.1.2.tar.gz", hash = "sha256:7fda9eb655c9c230dab534f1983763de5835249750e85fbcef43aaa30a9a2414", size = 22330, upload-time = "2025-11-03T09:25:26.604Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/88/39/799be3f2f0f38cc727ee3b4f1445fe6d5e4133064ec2e4115069418a5bb6/cloudpickle-3.1.2-py3-none-any.whl", hash = "sha256:9acb47f6afd73f60dc1df93bb801b472f05ff42fa6c84167d25cb206be1fbf4a", size = 22228, upload-time = "2025-11-03T09:25:25.534Z" },
+]
+
+[[package]]
+name = "colorama"
+version = "0.4.6"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697, upload-time = "2022-10-25T02:36:22.414Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" },
+]
+
+[[package]]
+name = "compressed-tensors"
+version = "0.13.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "loguru" },
+ { name = "pydantic" },
+ { name = "torch" },
+ { name = "transformers" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/fc/65/88dd1c58fb9d0ded51b5c86471b937a1525f91fad2211a6f051dc1ea822d/compressed_tensors-0.13.0.tar.gz", hash = "sha256:23893824d3498ea3f1a829f14a8fa85f9a5e76a34c711a038b8d7c619ca9a67c", size = 200995, upload-time = "2025-12-16T16:03:55.397Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/0b/b5/61ac2563c62490922b603c09113a083fd74af3630ec3931e769484d6dcb5/compressed_tensors-0.13.0-py3-none-any.whl", hash = "sha256:3518799c9baf034eb642efb551db6b0537b8713d45a64fe4def26f7f8d6cabec", size = 192620, upload-time = "2025-12-16T16:03:53.041Z" },
+]
+
+[[package]]
+name = "contourpy"
+version = "1.3.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "numpy" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/58/01/1253e6698a07380cd31a736d248a3f2a50a7c88779a1813da27503cadc2a/contourpy-1.3.3.tar.gz", hash = "sha256:083e12155b210502d0bca491432bb04d56dc3432f95a979b429f2848c3dbe880", size = 13466174, upload-time = "2025-07-26T12:03:12.549Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/be/45/adfee365d9ea3d853550b2e735f9d66366701c65db7855cd07621732ccfc/contourpy-1.3.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b08a32ea2f8e42cf1d4be3169a98dd4be32bafe4f22b6c4cb4ba810fa9e5d2cb", size = 293419, upload-time = "2025-07-26T12:01:21.16Z" },
+ { url = "https://files.pythonhosted.org/packages/53/3e/405b59cfa13021a56bba395a6b3aca8cec012b45bf177b0eaf7a202cde2c/contourpy-1.3.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:556dba8fb6f5d8742f2923fe9457dbdd51e1049c4a43fd3986a0b14a1d815fc6", size = 273979, upload-time = "2025-07-26T12:01:22.448Z" },
+ { url = "https://files.pythonhosted.org/packages/d4/1c/a12359b9b2ca3a845e8f7f9ac08bdf776114eb931392fcad91743e2ea17b/contourpy-1.3.3-cp312-cp312-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:92d9abc807cf7d0e047b95ca5d957cf4792fcd04e920ca70d48add15c1a90ea7", size = 332653, upload-time = "2025-07-26T12:01:24.155Z" },
+ { url = "https://files.pythonhosted.org/packages/63/12/897aeebfb475b7748ea67b61e045accdfcf0d971f8a588b67108ed7f5512/contourpy-1.3.3-cp312-cp312-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:b2e8faa0ed68cb29af51edd8e24798bb661eac3bd9f65420c1887b6ca89987c8", size = 379536, upload-time = "2025-07-26T12:01:25.91Z" },
+ { url = "https://files.pythonhosted.org/packages/43/8a/a8c584b82deb248930ce069e71576fc09bd7174bbd35183b7943fb1064fd/contourpy-1.3.3-cp312-cp312-manylinux_2_26_s390x.manylinux_2_28_s390x.whl", hash = "sha256:626d60935cf668e70a5ce6ff184fd713e9683fb458898e4249b63be9e28286ea", size = 384397, upload-time = "2025-07-26T12:01:27.152Z" },
+ { url = "https://files.pythonhosted.org/packages/cc/8f/ec6289987824b29529d0dfda0d74a07cec60e54b9c92f3c9da4c0ac732de/contourpy-1.3.3-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4d00e655fcef08aba35ec9610536bfe90267d7ab5ba944f7032549c55a146da1", size = 362601, upload-time = "2025-07-26T12:01:28.808Z" },
+ { url = "https://files.pythonhosted.org/packages/05/0a/a3fe3be3ee2dceb3e615ebb4df97ae6f3828aa915d3e10549ce016302bd1/contourpy-1.3.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:451e71b5a7d597379ef572de31eeb909a87246974d960049a9848c3bc6c41bf7", size = 1331288, upload-time = "2025-07-26T12:01:31.198Z" },
+ { url = "https://files.pythonhosted.org/packages/33/1d/acad9bd4e97f13f3e2b18a3977fe1b4a37ecf3d38d815333980c6c72e963/contourpy-1.3.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:459c1f020cd59fcfe6650180678a9993932d80d44ccde1fa1868977438f0b411", size = 1403386, upload-time = "2025-07-26T12:01:33.947Z" },
+ { url = "https://files.pythonhosted.org/packages/cf/8f/5847f44a7fddf859704217a99a23a4f6417b10e5ab1256a179264561540e/contourpy-1.3.3-cp312-cp312-win32.whl", hash = "sha256:023b44101dfe49d7d53932be418477dba359649246075c996866106da069af69", size = 185018, upload-time = "2025-07-26T12:01:35.64Z" },
+ { url = "https://files.pythonhosted.org/packages/19/e8/6026ed58a64563186a9ee3f29f41261fd1828f527dd93d33b60feca63352/contourpy-1.3.3-cp312-cp312-win_amd64.whl", hash = "sha256:8153b8bfc11e1e4d75bcb0bff1db232f9e10b274e0929de9d608027e0d34ff8b", size = 226567, upload-time = "2025-07-26T12:01:36.804Z" },
+ { url = "https://files.pythonhosted.org/packages/d1/e2/f05240d2c39a1ed228d8328a78b6f44cd695f7ef47beb3e684cf93604f86/contourpy-1.3.3-cp312-cp312-win_arm64.whl", hash = "sha256:07ce5ed73ecdc4a03ffe3e1b3e3c1166db35ae7584be76f65dbbe28a7791b0cc", size = 193655, upload-time = "2025-07-26T12:01:37.999Z" },
+]
+
+[[package]]
+name = "cryptography"
+version = "46.0.5"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "cffi", marker = "platform_python_implementation != 'PyPy'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/60/04/ee2a9e8542e4fa2773b81771ff8349ff19cdd56b7258a0cc442639052edb/cryptography-46.0.5.tar.gz", hash = "sha256:abace499247268e3757271b2f1e244b36b06f8515cf27c4d49468fc9eb16e93d", size = 750064, upload-time = "2026-02-10T19:18:38.255Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/f7/81/b0bb27f2ba931a65409c6b8a8b358a7f03c0e46eceacddff55f7c84b1f3b/cryptography-46.0.5-cp311-abi3-macosx_10_9_universal2.whl", hash = "sha256:351695ada9ea9618b3500b490ad54c739860883df6c1f555e088eaf25b1bbaad", size = 7176289, upload-time = "2026-02-10T19:17:08.274Z" },
+ { url = "https://files.pythonhosted.org/packages/ff/9e/6b4397a3e3d15123de3b1806ef342522393d50736c13b20ec4c9ea6693a6/cryptography-46.0.5-cp311-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:c18ff11e86df2e28854939acde2d003f7984f721eba450b56a200ad90eeb0e6b", size = 4275637, upload-time = "2026-02-10T19:17:10.53Z" },
+ { url = "https://files.pythonhosted.org/packages/63/e7/471ab61099a3920b0c77852ea3f0ea611c9702f651600397ac567848b897/cryptography-46.0.5-cp311-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4d7e3d356b8cd4ea5aff04f129d5f66ebdc7b6f8eae802b93739ed520c47c79b", size = 4424742, upload-time = "2026-02-10T19:17:12.388Z" },
+ { url = "https://files.pythonhosted.org/packages/37/53/a18500f270342d66bf7e4d9f091114e31e5ee9e7375a5aba2e85a91e0044/cryptography-46.0.5-cp311-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:50bfb6925eff619c9c023b967d5b77a54e04256c4281b0e21336a130cd7fc263", size = 4277528, upload-time = "2026-02-10T19:17:13.853Z" },
+ { url = "https://files.pythonhosted.org/packages/22/29/c2e812ebc38c57b40e7c583895e73c8c5adb4d1e4a0cc4c5a4fdab2b1acc/cryptography-46.0.5-cp311-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:803812e111e75d1aa73690d2facc295eaefd4439be1023fefc4995eaea2af90d", size = 4947993, upload-time = "2026-02-10T19:17:15.618Z" },
+ { url = "https://files.pythonhosted.org/packages/6b/e7/237155ae19a9023de7e30ec64e5d99a9431a567407ac21170a046d22a5a3/cryptography-46.0.5-cp311-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:3ee190460e2fbe447175cda91b88b84ae8322a104fc27766ad09428754a618ed", size = 4456855, upload-time = "2026-02-10T19:17:17.221Z" },
+ { url = "https://files.pythonhosted.org/packages/2d/87/fc628a7ad85b81206738abbd213b07702bcbdada1dd43f72236ef3cffbb5/cryptography-46.0.5-cp311-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:f145bba11b878005c496e93e257c1e88f154d278d2638e6450d17e0f31e558d2", size = 3984635, upload-time = "2026-02-10T19:17:18.792Z" },
+ { url = "https://files.pythonhosted.org/packages/84/29/65b55622bde135aedf4565dc509d99b560ee4095e56989e815f8fd2aa910/cryptography-46.0.5-cp311-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:e9251e3be159d1020c4030bd2e5f84d6a43fe54b6c19c12f51cde9542a2817b2", size = 4277038, upload-time = "2026-02-10T19:17:20.256Z" },
+ { url = "https://files.pythonhosted.org/packages/bc/36/45e76c68d7311432741faf1fbf7fac8a196a0a735ca21f504c75d37e2558/cryptography-46.0.5-cp311-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:47fb8a66058b80e509c47118ef8a75d14c455e81ac369050f20ba0d23e77fee0", size = 4912181, upload-time = "2026-02-10T19:17:21.825Z" },
+ { url = "https://files.pythonhosted.org/packages/6d/1a/c1ba8fead184d6e3d5afcf03d569acac5ad063f3ac9fb7258af158f7e378/cryptography-46.0.5-cp311-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:4c3341037c136030cb46e4b1e17b7418ea4cbd9dd207e4a6f3b2b24e0d4ac731", size = 4456482, upload-time = "2026-02-10T19:17:25.133Z" },
+ { url = "https://files.pythonhosted.org/packages/f9/e5/3fb22e37f66827ced3b902cf895e6a6bc1d095b5b26be26bd13c441fdf19/cryptography-46.0.5-cp311-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:890bcb4abd5a2d3f852196437129eb3667d62630333aacc13dfd470fad3aaa82", size = 4405497, upload-time = "2026-02-10T19:17:26.66Z" },
+ { url = "https://files.pythonhosted.org/packages/1a/df/9d58bb32b1121a8a2f27383fabae4d63080c7ca60b9b5c88be742be04ee7/cryptography-46.0.5-cp311-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:80a8d7bfdf38f87ca30a5391c0c9ce4ed2926918e017c29ddf643d0ed2778ea1", size = 4667819, upload-time = "2026-02-10T19:17:28.569Z" },
+ { url = "https://files.pythonhosted.org/packages/ea/ed/325d2a490c5e94038cdb0117da9397ece1f11201f425c4e9c57fe5b9f08b/cryptography-46.0.5-cp311-abi3-win32.whl", hash = "sha256:60ee7e19e95104d4c03871d7d7dfb3d22ef8a9b9c6778c94e1c8fcc8365afd48", size = 3028230, upload-time = "2026-02-10T19:17:30.518Z" },
+ { url = "https://files.pythonhosted.org/packages/e9/5a/ac0f49e48063ab4255d9e3b79f5def51697fce1a95ea1370f03dc9db76f6/cryptography-46.0.5-cp311-abi3-win_amd64.whl", hash = "sha256:38946c54b16c885c72c4f59846be9743d699eee2b69b6988e0a00a01f46a61a4", size = 3480909, upload-time = "2026-02-10T19:17:32.083Z" },
+ { url = "https://files.pythonhosted.org/packages/e2/fa/a66aa722105ad6a458bebd64086ca2b72cdd361fed31763d20390f6f1389/cryptography-46.0.5-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:4108d4c09fbbf2789d0c926eb4152ae1760d5a2d97612b92d508d96c861e4d31", size = 7170514, upload-time = "2026-02-10T19:17:56.267Z" },
+ { url = "https://files.pythonhosted.org/packages/0f/04/c85bdeab78c8bc77b701bf0d9bdcf514c044e18a46dcff330df5448631b0/cryptography-46.0.5-cp38-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:7d1f30a86d2757199cb2d56e48cce14deddf1f9c95f1ef1b64ee91ea43fe2e18", size = 4275349, upload-time = "2026-02-10T19:17:58.419Z" },
+ { url = "https://files.pythonhosted.org/packages/5c/32/9b87132a2f91ee7f5223b091dc963055503e9b442c98fc0b8a5ca765fab0/cryptography-46.0.5-cp38-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:039917b0dc418bb9f6edce8a906572d69e74bd330b0b3fea4f79dab7f8ddd235", size = 4420667, upload-time = "2026-02-10T19:18:00.619Z" },
+ { url = "https://files.pythonhosted.org/packages/a1/a6/a7cb7010bec4b7c5692ca6f024150371b295ee1c108bdc1c400e4c44562b/cryptography-46.0.5-cp38-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:ba2a27ff02f48193fc4daeadf8ad2590516fa3d0adeeb34336b96f7fa64c1e3a", size = 4276980, upload-time = "2026-02-10T19:18:02.379Z" },
+ { url = "https://files.pythonhosted.org/packages/8e/7c/c4f45e0eeff9b91e3f12dbd0e165fcf2a38847288fcfd889deea99fb7b6d/cryptography-46.0.5-cp38-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:61aa400dce22cb001a98014f647dc21cda08f7915ceb95df0c9eaf84b4b6af76", size = 4939143, upload-time = "2026-02-10T19:18:03.964Z" },
+ { url = "https://files.pythonhosted.org/packages/37/19/e1b8f964a834eddb44fa1b9a9976f4e414cbb7aa62809b6760c8803d22d1/cryptography-46.0.5-cp38-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:3ce58ba46e1bc2aac4f7d9290223cead56743fa6ab94a5d53292ffaac6a91614", size = 4453674, upload-time = "2026-02-10T19:18:05.588Z" },
+ { url = "https://files.pythonhosted.org/packages/db/ed/db15d3956f65264ca204625597c410d420e26530c4e2943e05a0d2f24d51/cryptography-46.0.5-cp38-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:420d0e909050490d04359e7fdb5ed7e667ca5c3c402b809ae2563d7e66a92229", size = 3978801, upload-time = "2026-02-10T19:18:07.167Z" },
+ { url = "https://files.pythonhosted.org/packages/41/e2/df40a31d82df0a70a0daf69791f91dbb70e47644c58581d654879b382d11/cryptography-46.0.5-cp38-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:582f5fcd2afa31622f317f80426a027f30dc792e9c80ffee87b993200ea115f1", size = 4276755, upload-time = "2026-02-10T19:18:09.813Z" },
+ { url = "https://files.pythonhosted.org/packages/33/45/726809d1176959f4a896b86907b98ff4391a8aa29c0aaaf9450a8a10630e/cryptography-46.0.5-cp38-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:bfd56bb4b37ed4f330b82402f6f435845a5f5648edf1ad497da51a8452d5d62d", size = 4901539, upload-time = "2026-02-10T19:18:11.263Z" },
+ { url = "https://files.pythonhosted.org/packages/99/0f/a3076874e9c88ecb2ecc31382f6e7c21b428ede6f55aafa1aa272613e3cd/cryptography-46.0.5-cp38-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:a3d507bb6a513ca96ba84443226af944b0f7f47dcc9a399d110cd6146481d24c", size = 4452794, upload-time = "2026-02-10T19:18:12.914Z" },
+ { url = "https://files.pythonhosted.org/packages/02/ef/ffeb542d3683d24194a38f66ca17c0a4b8bf10631feef44a7ef64e631b1a/cryptography-46.0.5-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:9f16fbdf4da055efb21c22d81b89f155f02ba420558db21288b3d0035bafd5f4", size = 4404160, upload-time = "2026-02-10T19:18:14.375Z" },
+ { url = "https://files.pythonhosted.org/packages/96/93/682d2b43c1d5f1406ed048f377c0fc9fc8f7b0447a478d5c65ab3d3a66eb/cryptography-46.0.5-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:ced80795227d70549a411a4ab66e8ce307899fad2220ce5ab2f296e687eacde9", size = 4667123, upload-time = "2026-02-10T19:18:15.886Z" },
+ { url = "https://files.pythonhosted.org/packages/45/2d/9c5f2926cb5300a8eefc3f4f0b3f3df39db7f7ce40c8365444c49363cbda/cryptography-46.0.5-cp38-abi3-win32.whl", hash = "sha256:02f547fce831f5096c9a567fd41bc12ca8f11df260959ecc7c3202555cc47a72", size = 3010220, upload-time = "2026-02-10T19:18:17.361Z" },
+ { url = "https://files.pythonhosted.org/packages/48/ef/0c2f4a8e31018a986949d34a01115dd057bf536905dca38897bacd21fac3/cryptography-46.0.5-cp38-abi3-win_amd64.whl", hash = "sha256:556e106ee01aa13484ce9b0239bca667be5004efb0aabbed28d353df86445595", size = 3467050, upload-time = "2026-02-10T19:18:18.899Z" },
+]
+
+[[package]]
+name = "cuda-bindings"
+version = "12.9.4"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+ "sys_platform != 'emscripten' and sys_platform != 'win32'",
+]
+dependencies = [
+ { name = "cuda-pathfinder", marker = "sys_platform != 'emscripten' and sys_platform != 'win32'" },
+]
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/0c/c2/65bfd79292b8ff18be4dd7f7442cea37bcbc1a228c1886f1dea515c45b67/cuda_bindings-12.9.4-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:694ba35023846625ef471257e6b5a4bc8af690f961d197d77d34b1d1db393f56", size = 11760260, upload-time = "2025-10-21T14:51:40.79Z" },
+ { url = "https://files.pythonhosted.org/packages/a9/c1/dabe88f52c3e3760d861401bb994df08f672ec893b8f7592dc91626adcf3/cuda_bindings-12.9.4-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fda147a344e8eaeca0c6ff113d2851ffca8f7dfc0a6c932374ee5c47caa649c8", size = 12151019, upload-time = "2025-10-21T14:51:43.167Z" },
+]
+
+[[package]]
+name = "cuda-bindings"
+version = "13.1.1"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+ "sys_platform == 'win32'",
+ "sys_platform == 'emscripten'",
+]
+dependencies = [
+ { name = "cuda-pathfinder", marker = "sys_platform == 'emscripten' or sys_platform == 'win32'" },
+]
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/60/1f/ecc4701ade3e85f091c625a920574527b9daf7fb354189fbfbc5516af6cd/cuda_bindings-13.1.1-cp312-cp312-win_amd64.whl", hash = "sha256:ccde9c95c0e953b31fe7731bb08da9d0a34b1770498df9a3c156fdfdbe3951ad", size = 15250028, upload-time = "2025-12-09T22:06:00.346Z" },
+]
+
+[[package]]
+name = "cuda-pathfinder"
+version = "1.4.1"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/07/02/59a5bc738a09def0b49aea0e460bdf97f65206d0d041246147cf6207e69c/cuda_pathfinder-1.4.1-py3-none-any.whl", hash = "sha256:40793006082de88e0950753655e55558a446bed9a7d9d0bcb48b2506d50ed82a", size = 43903, upload-time = "2026-03-06T21:05:24.372Z" },
+]
+
+[[package]]
+name = "cuda-python"
+version = "12.9.4"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+ "sys_platform != 'emscripten' and sys_platform != 'win32'",
+]
+dependencies = [
+ { name = "cuda-bindings", version = "12.9.4", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform != 'emscripten' and sys_platform != 'win32'" },
+]
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/af/f3/6b032a554019cfb3447e671798c1bd3e79b5f1af20d10253f56cea269ef2/cuda_python-12.9.4-py3-none-any.whl", hash = "sha256:d2cacea882a69863f1e7d27ee71d75f0684f4c76910aff839067e4f89c902279", size = 7594, upload-time = "2025-10-21T14:55:12.846Z" },
+]
+
+[[package]]
+name = "cuda-python"
+version = "13.1.1"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+ "sys_platform == 'win32'",
+ "sys_platform == 'emscripten'",
+]
+dependencies = [
+ { name = "cuda-bindings", version = "13.1.1", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'emscripten' or sys_platform == 'win32'" },
+ { name = "cuda-pathfinder", marker = "sys_platform == 'emscripten' or sys_platform == 'win32'" },
+]
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/cd/08/b5e3b9822662d72d540d830531e3ab6a7cabbda3dd56175696aabccfeb76/cuda_python-13.1.1-py3-none-any.whl", hash = "sha256:944cc4fe6482673d28dd545797a28840945a1668739328fa2ad1e9be4f7050d9", size = 8038, upload-time = "2025-12-09T22:13:10.719Z" },
+]
+
+[[package]]
+name = "cupy-cuda12x"
+version = "14.0.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "cuda-pathfinder" },
+ { name = "numpy" },
+]
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/38/ca/b93ef9fca1471a65f136a73e10819634c0b83427362fc08fc9f29f935bf0/cupy_cuda12x-14.0.1-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:f244bc14fad6f1ef0c74abd98afa4b82d2534aecdba911197810ec0047f0d1f3", size = 145578614, upload-time = "2026-02-20T10:22:49.108Z" },
+ { url = "https://files.pythonhosted.org/packages/5a/a6/944406223a190815d9df156a1d66f3b0352bd8827dc4a8c752196d616dbc/cupy_cuda12x-14.0.1-cp312-cp312-manylinux2014_x86_64.whl", hash = "sha256:9f0c81c3509f77be3ae8444759d5b314201b2dfcbbf2ae0d0b5fb7a61f20893c", size = 134613763, upload-time = "2026-02-20T10:22:56.792Z" },
+ { url = "https://files.pythonhosted.org/packages/11/fd/62e6e3f3c0c9f785b2dbdc2bff01bc375f5c6669d52e5e151f7aeb577801/cupy_cuda12x-14.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:63dc8a3a88d2ffd0386796b915d27acc7f2332c2291efd1ff4f0021b96f02051", size = 96267167, upload-time = "2026-02-20T10:23:02.263Z" },
+]
+
+[[package]]
+name = "cycler"
+version = "0.12.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/a9/95/a3dbbb5028f35eafb79008e7522a75244477d2838f38cbb722248dabc2a8/cycler-0.12.1.tar.gz", hash = "sha256:88bb128f02ba341da8ef447245a9e138fae777f6a23943da4540077d3601eb1c", size = 7615, upload-time = "2023-10-07T05:32:18.335Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/e7/05/c19819d5e3d95294a6f5947fb9b9629efb316b96de511b418c53d245aae6/cycler-0.12.1-py3-none-any.whl", hash = "sha256:85cef7cff222d8644161529808465972e51340599459b8ac3ccbac5a854e0d30", size = 8321, upload-time = "2023-10-07T05:32:16.783Z" },
+]
+
+[[package]]
+name = "datasets"
+version = "4.5.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "dill" },
+ { name = "filelock" },
+ { name = "fsspec", extra = ["http"] },
+ { name = "httpx" },
+ { name = "huggingface-hub" },
+ { name = "multiprocess" },
+ { name = "numpy" },
+ { name = "packaging" },
+ { name = "pandas" },
+ { name = "pyarrow" },
+ { name = "pyyaml" },
+ { name = "requests" },
+ { name = "tqdm" },
+ { name = "xxhash" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/55/bf/bb927bde63d649296c83e883171ae77074717c1b80fe2868b328bd0dbcbb/datasets-4.5.0.tar.gz", hash = "sha256:00c698ce1c2452e646cc5fad47fef39d3fe78dd650a8a6eb205bb45eb63cd500", size = 588384, upload-time = "2026-01-14T18:27:54.297Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/fc/d5/0d563ea3c205eee226dc8053cf7682a8ac588db8acecd0eda2b587987a0b/datasets-4.5.0-py3-none-any.whl", hash = "sha256:b5d7e08096ffa407dd69e58b1c0271c9b2506140839b8d99af07375ad31b6726", size = 515196, upload-time = "2026-01-14T18:27:52.419Z" },
+]
+
+[[package]]
+name = "depyf"
+version = "0.20.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "astor" },
+ { name = "dill" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/88/35/83fb0178212279aa0af031031905804c6de5618435d229f41ed21bb9ad2c/depyf-0.20.0.tar.gz", hash = "sha256:fb7683bd72c44f67b56029df2c47721e9a02ffa4d7b19095f1c54c4ebf797a98", size = 6168761, upload-time = "2025-10-13T12:33:38.589Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/cf/65/4df6936130b56e1429114e663e7c1576cf845f3aef1b2dd200c0a5d19dba/depyf-0.20.0-py3-none-any.whl", hash = "sha256:d31effad4261cebecb58955d832e448ace88f432328f95f82fd99c30fd9308d4", size = 39381, upload-time = "2025-10-13T12:33:33.647Z" },
+]
+
+[[package]]
+name = "dill"
+version = "0.4.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/12/80/630b4b88364e9a8c8c5797f4602d0f76ef820909ee32f0bacb9f90654042/dill-0.4.0.tar.gz", hash = "sha256:0633f1d2df477324f53a895b02c901fb961bdbf65a17122586ea7019292cbcf0", size = 186976, upload-time = "2025-04-16T00:41:48.867Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/50/3d/9373ad9c56321fdab5b41197068e1d8c25883b3fea29dd361f9b55116869/dill-0.4.0-py3-none-any.whl", hash = "sha256:44f54bf6412c2c8464c14e8243eb163690a9800dbe2c367330883b19c7561049", size = 119668, upload-time = "2025-04-16T00:41:47.671Z" },
+]
+
+[[package]]
+name = "diskcache"
+version = "5.6.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/3f/21/1c1ffc1a039ddcc459db43cc108658f32c57d271d7289a2794e401d0fdb6/diskcache-5.6.3.tar.gz", hash = "sha256:2c3a3fa2743d8535d832ec61c2054a1641f41775aa7c556758a109941e33e4fc", size = 67916, upload-time = "2023-08-31T06:12:00.316Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/3f/27/4570e78fc0bf5ea0ca45eb1de3818a23787af9b390c0b0a0033a1b8236f9/diskcache-5.6.3-py3-none-any.whl", hash = "sha256:5e31b2d5fbad117cc363ebaf6b689474db18a1f6438bc82358b024abd4c2ca19", size = 45550, upload-time = "2023-08-31T06:11:58.822Z" },
+]
+
+[[package]]
+name = "distro"
+version = "1.9.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/fc/f8/98eea607f65de6527f8a2e8885fc8015d3e6f5775df186e443e0964a11c3/distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed", size = 60722, upload-time = "2023-12-24T09:54:32.31Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/12/b3/231ffd4ab1fc9d679809f356cebee130ac7daa00d6d6f3206dd4fd137e9e/distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2", size = 20277, upload-time = "2023-12-24T09:54:30.421Z" },
+]
+
+[[package]]
+name = "dnspython"
+version = "2.8.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/8c/8b/57666417c0f90f08bcafa776861060426765fdb422eb10212086fb811d26/dnspython-2.8.0.tar.gz", hash = "sha256:181d3c6996452cb1189c4046c61599b84a5a86e099562ffde77d26984ff26d0f", size = 368251, upload-time = "2025-09-07T18:58:00.022Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/ba/5a/18ad964b0086c6e62e2e7500f7edc89e3faa45033c71c1893d34eed2b2de/dnspython-2.8.0-py3-none-any.whl", hash = "sha256:01d9bbc4a2d76bf0db7c1f729812ded6d912bd318d3b1cf81d30c0f845dbf3af", size = 331094, upload-time = "2025-09-07T18:57:58.071Z" },
+]
+
+[[package]]
+name = "docstring-parser"
+version = "0.17.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/b2/9d/c3b43da9515bd270df0f80548d9944e389870713cc1fe2b8fb35fe2bcefd/docstring_parser-0.17.0.tar.gz", hash = "sha256:583de4a309722b3315439bb31d64ba3eebada841f2e2cee23b99df001434c912", size = 27442, upload-time = "2025-07-21T07:35:01.868Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/55/e2/2537ebcff11c1ee1ff17d8d0b6f4db75873e3b0fb32c2d4a2ee31ecb310a/docstring_parser-0.17.0-py3-none-any.whl", hash = "sha256:cf2569abd23dce8099b300f9b4fa8191e9582dda731fd533daf54c4551658708", size = 36896, upload-time = "2025-07-21T07:35:00.684Z" },
+]
+
+[[package]]
+name = "einops"
+version = "0.8.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/2c/77/850bef8d72ffb9219f0b1aac23fbc1bf7d038ee6ea666f331fa273031aa2/einops-0.8.2.tar.gz", hash = "sha256:609da665570e5e265e27283aab09e7f279ade90c4f01bcfca111f3d3e13f2827", size = 56261, upload-time = "2026-01-26T04:13:17.638Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/2a/09/f8d8f8f31e4483c10a906437b4ce31bdf3d6d417b73fe33f1a8b59e34228/einops-0.8.2-py3-none-any.whl", hash = "sha256:54058201ac7087911181bfec4af6091bb59380360f069276601256a76af08193", size = 65638, upload-time = "2026-01-26T04:13:18.546Z" },
+]
+
+[[package]]
+name = "email-validator"
+version = "2.3.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "dnspython" },
+ { name = "idna" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/f5/22/900cb125c76b7aaa450ce02fd727f452243f2e91a61af068b40adba60ea9/email_validator-2.3.0.tar.gz", hash = "sha256:9fc05c37f2f6cf439ff414f8fc46d917929974a82244c20eb10231ba60c54426", size = 51238, upload-time = "2025-08-26T13:09:06.831Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/de/15/545e2b6cf2e3be84bc1ed85613edd75b8aea69807a71c26f4ca6a9258e82/email_validator-2.3.0-py3-none-any.whl", hash = "sha256:80f13f623413e6b197ae73bb10bf4eb0908faf509ad8362c5edeb0be7fd450b4", size = 35604, upload-time = "2025-08-26T13:09:05.858Z" },
+]
+
+[[package]]
+name = "fastapi"
+version = "0.135.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "annotated-doc" },
+ { name = "pydantic" },
+ { name = "starlette" },
+ { name = "typing-extensions" },
+ { name = "typing-inspection" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/e7/7b/f8e0211e9380f7195ba3f3d40c292594fd81ba8ec4629e3854c353aaca45/fastapi-0.135.1.tar.gz", hash = "sha256:d04115b508d936d254cea545b7312ecaa58a7b3a0f84952535b4c9afae7668cd", size = 394962, upload-time = "2026-03-01T18:18:29.369Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/e4/72/42e900510195b23a56bde950d26a51f8b723846bfcaa0286e90287f0422b/fastapi-0.135.1-py3-none-any.whl", hash = "sha256:46e2fc5745924b7c840f71ddd277382af29ce1cdb7d5eab5bf697e3fb9999c9e", size = 116999, upload-time = "2026-03-01T18:18:30.831Z" },
+]
+
+[package.optional-dependencies]
+standard = [
+ { name = "email-validator" },
+ { name = "fastapi-cli", extra = ["standard"] },
+ { name = "httpx" },
+ { name = "jinja2" },
+ { name = "pydantic-extra-types" },
+ { name = "pydantic-settings" },
+ { name = "python-multipart" },
+ { name = "uvicorn", extra = ["standard"] },
+]
+
+[[package]]
+name = "fastapi-cli"
+version = "0.0.24"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "rich-toolkit" },
+ { name = "typer" },
+ { name = "uvicorn", extra = ["standard"] },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/6e/58/74797ae9e4610cfa0c6b34c8309096d3b20bb29be3b8b5fbf1004d10fa5f/fastapi_cli-0.0.24.tar.gz", hash = "sha256:1afc9c9e21d7ebc8a3ca5e31790cd8d837742be7e4f8b9236e99cb3451f0de00", size = 19043, upload-time = "2026-02-24T10:45:10.476Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/c7/4b/68f9fe268e535d79c76910519530026a4f994ce07189ac0dded45c6af825/fastapi_cli-0.0.24-py3-none-any.whl", hash = "sha256:4a1f78ed798f106b4fee85ca93b85d8fe33c0a3570f775964d37edb80b8f0edc", size = 12304, upload-time = "2026-02-24T10:45:09.552Z" },
+]
+
+[package.optional-dependencies]
+standard = [
+ { name = "fastapi-cloud-cli" },
+ { name = "uvicorn", extra = ["standard"] },
+]
+
+[[package]]
+name = "fastapi-cloud-cli"
+version = "0.14.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "fastar" },
+ { name = "httpx" },
+ { name = "pydantic", extra = ["email"] },
+ { name = "rich-toolkit" },
+ { name = "rignore" },
+ { name = "sentry-sdk" },
+ { name = "typer" },
+ { name = "uvicorn", extra = ["standard"] },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/2b/eb/e78ebd05a714c62a0578cdce4339cb6cd138421a7d865fbddedd7242420b/fastapi_cloud_cli-0.14.0.tar.gz", hash = "sha256:d3ecb8c942685a71df0af7bd59f463b5eff76f5818b48e5a03c6159726831e68", size = 39822, upload-time = "2026-02-25T14:19:53.535Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/d9/18/7bf922ee0b6a737a9d88cf613182ecd6031f52298da893556f158eba763f/fastapi_cloud_cli-0.14.0-py3-none-any.whl", hash = "sha256:325fcb4b45e661184152da6db861d9fb718739fbcd561a4d334dbe78c026586f", size = 28350, upload-time = "2026-02-25T14:19:52.416Z" },
+]
+
+[[package]]
+name = "fastar"
+version = "0.8.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/69/e7/f89d54fb04104114dd0552836dc2b47914f416cc0e200b409dd04a33de5e/fastar-0.8.0.tar.gz", hash = "sha256:f4d4d68dbf1c4c2808f0e730fac5843493fc849f70fe3ad3af60dfbaf68b9a12", size = 68524, upload-time = "2025-11-26T02:36:00.72Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/58/f1/5b2ff898abac7f1a418284aad285e3a4f68d189c572ab2db0f6c9079dd16/fastar-0.8.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:0f10d2adfe40f47ff228f4efaa32d409d732ded98580e03ed37c9535b5fc923d", size = 706369, upload-time = "2025-11-26T02:34:37.783Z" },
+ { url = "https://files.pythonhosted.org/packages/23/60/8046a386dca39154f80c927cbbeeb4b1c1267a3271bffe61552eb9995757/fastar-0.8.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b930da9d598e3bc69513d131f397e6d6be4643926ef3de5d33d1e826631eb036", size = 629097, upload-time = "2025-11-26T02:34:21.888Z" },
+ { url = "https://files.pythonhosted.org/packages/22/7e/1ae005addc789924a9268da2394d3bb5c6f96836f7e37b7e3d23c2362675/fastar-0.8.0-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:9d210da2de733ca801de83e931012349d209f38b92d9630ccaa94bd445bdc9b8", size = 868938, upload-time = "2025-11-26T02:33:51.119Z" },
+ { url = "https://files.pythonhosted.org/packages/a6/77/290a892b073b84bf82e6b2259708dfe79c54f356e252c2dd40180b16fe07/fastar-0.8.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aa02270721517078a5bd61a38719070ac2537a4aa6b6c48cf369cf2abc59174a", size = 765204, upload-time = "2025-11-26T02:32:47.02Z" },
+ { url = "https://files.pythonhosted.org/packages/d0/00/c3155171b976003af3281f5258189f1935b15d1221bfc7467b478c631216/fastar-0.8.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:83c391e5b789a720e4d0029b9559f5d6dee3226693c5b39c0eab8eaece997e0f", size = 764717, upload-time = "2025-11-26T02:33:02.453Z" },
+ { url = "https://files.pythonhosted.org/packages/b7/43/405b7ad76207b2c11b7b59335b70eac19e4a2653977f5588a1ac8fed54f4/fastar-0.8.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3258d7a78a72793cdd081545da61cabe85b1f37634a1d0b97ffee0ff11d105ef", size = 931502, upload-time = "2025-11-26T02:33:18.619Z" },
+ { url = "https://files.pythonhosted.org/packages/da/8a/a3dde6d37cc3da4453f2845cdf16675b5686b73b164f37e2cc579b057c2c/fastar-0.8.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e6eab95dd985cdb6a50666cbeb9e4814676e59cfe52039c880b69d67cfd44767", size = 821454, upload-time = "2025-11-26T02:33:33.427Z" },
+ { url = "https://files.pythonhosted.org/packages/da/c1/904fe2468609c8990dce9fe654df3fbc7324a8d8e80d8240ae2c89757064/fastar-0.8.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:829b1854166141860887273c116c94e31357213fa8e9fe8baeb18bd6c38aa8d9", size = 821647, upload-time = "2025-11-26T02:34:07Z" },
+ { url = "https://files.pythonhosted.org/packages/c8/73/a0642ab7a400bc07528091785e868ace598fde06fcd139b8f865ec1b6f3c/fastar-0.8.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:b1667eae13f9457a3c737f4376d68e8c3e548353538b28f7e4273a30cb3965cd", size = 986342, upload-time = "2025-11-26T02:34:53.371Z" },
+ { url = "https://files.pythonhosted.org/packages/af/af/60c1bfa6edab72366461a95f053d0f5f7ab1825fe65ca2ca367432cd8629/fastar-0.8.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:b864a95229a7db0814cd9ef7987cb713fd43dce1b0d809dd17d9cd6f02fdde3e", size = 1040207, upload-time = "2025-11-26T02:35:10.65Z" },
+ { url = "https://files.pythonhosted.org/packages/f6/a0/0d624290dec622e7fa084b6881f456809f68777d54a314f5dde932714506/fastar-0.8.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:c05fbc5618ce17675a42576fa49858d79734627f0a0c74c0875ab45ee8de340c", size = 1045031, upload-time = "2025-11-26T02:35:28.108Z" },
+ { url = "https://files.pythonhosted.org/packages/a7/74/cf663af53c4706ba88e6b4af44a6b0c3bd7d7ca09f079dc40647a8f06585/fastar-0.8.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:7f41c51ee96f338662ee3c3df4840511ba3f9969606840f1b10b7cb633a3c716", size = 994877, upload-time = "2025-11-26T02:35:45.797Z" },
+ { url = "https://files.pythonhosted.org/packages/52/17/444c8be6e77206050e350da7c338102b6cab384be937fa0b1d6d1f9ede73/fastar-0.8.0-cp312-cp312-win32.whl", hash = "sha256:d949a1a2ea7968b734632c009df0571c94636a5e1622c87a6e2bf712a7334f47", size = 455996, upload-time = "2025-11-26T02:36:26.938Z" },
+ { url = "https://files.pythonhosted.org/packages/dc/34/fc3b5e56d71a17b1904800003d9251716e8fd65f662e1b10a26881698a74/fastar-0.8.0-cp312-cp312-win_amd64.whl", hash = "sha256:fc645994d5b927d769121094e8a649b09923b3c13a8b0b98696d8f853f23c532", size = 490429, upload-time = "2025-11-26T02:36:12.707Z" },
+ { url = "https://files.pythonhosted.org/packages/35/a8/5608cc837417107c594e2e7be850b9365bcb05e99645966a5d6a156285fe/fastar-0.8.0-cp312-cp312-win_arm64.whl", hash = "sha256:d81ee82e8dc78a0adb81728383bd39611177d642a8fa2d601d4ad5ad59e5f3bd", size = 461297, upload-time = "2025-11-26T02:36:03.546Z" },
+]
+
+[[package]]
+name = "filelock"
+version = "3.24.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/73/92/a8e2479937ff39185d20dd6a851c1a63e55849e447a55e798cc2e1f49c65/filelock-3.24.3.tar.gz", hash = "sha256:011a5644dc937c22699943ebbfc46e969cdde3e171470a6e40b9533e5a72affa", size = 37935, upload-time = "2026-02-19T00:48:20.543Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/9c/0f/5d0c71a1aefeb08efff26272149e07ab922b64f46c63363756224bd6872e/filelock-3.24.3-py3-none-any.whl", hash = "sha256:426e9a4660391f7f8a810d71b0555bce9008b0a1cc342ab1f6947d37639e002d", size = 24331, upload-time = "2026-02-19T00:48:18.465Z" },
+]
+
+[[package]]
+name = "flashinfer-python"
+version = "0.6.4"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "apache-tvm-ffi" },
+ { name = "click" },
+ { name = "einops" },
+ { name = "ninja" },
+ { name = "numpy" },
+ { name = "nvidia-cudnn-frontend" },
+ { name = "nvidia-cutlass-dsl" },
+ { name = "nvidia-ml-py" },
+ { name = "packaging" },
+ { name = "requests" },
+ { name = "tabulate" },
+ { name = "torch" },
+ { name = "tqdm" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/77/45/15645d2a4ee81d08206f3e132a77323e48312f510462415d7cd1122eba43/flashinfer_python-0.6.4.tar.gz", hash = "sha256:e6ab798bd1030e5ff7a3bc6952f36386c406928f60b79cf964a6db7aa7ccde75", size = 5337134, upload-time = "2026-02-19T07:33:36.647Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/17/9a/d2bab76d2bb15062c6a2329614653e4f8bec9c78eec9069856ef0c7c0a79/flashinfer_python-0.6.4-py3-none-any.whl", hash = "sha256:105596b505892ae330af84e250ee0eb6fc2c3a22e8dc42bd46de1b90d36004c8", size = 7819999, upload-time = "2026-02-19T07:33:34.82Z" },
+]
+
+[[package]]
+name = "fonttools"
+version = "4.61.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/ec/ca/cf17b88a8df95691275a3d77dc0a5ad9907f328ae53acbe6795da1b2f5ed/fonttools-4.61.1.tar.gz", hash = "sha256:6675329885c44657f826ef01d9e4fb33b9158e9d93c537d84ad8399539bc6f69", size = 3565756, upload-time = "2025-12-12T17:31:24.246Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/6f/16/7decaa24a1bd3a70c607b2e29f0adc6159f36a7e40eaba59846414765fd4/fonttools-4.61.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:f3cb4a569029b9f291f88aafc927dd53683757e640081ca8c412781ea144565e", size = 2851593, upload-time = "2025-12-12T17:30:04.225Z" },
+ { url = "https://files.pythonhosted.org/packages/94/98/3c4cb97c64713a8cf499b3245c3bf9a2b8fd16a3e375feff2aed78f96259/fonttools-4.61.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:41a7170d042e8c0024703ed13b71893519a1a6d6e18e933e3ec7507a2c26a4b2", size = 2400231, upload-time = "2025-12-12T17:30:06.47Z" },
+ { url = "https://files.pythonhosted.org/packages/b7/37/82dbef0f6342eb01f54bca073ac1498433d6ce71e50c3c3282b655733b31/fonttools-4.61.1-cp312-cp312-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:10d88e55330e092940584774ee5e8a6971b01fc2f4d3466a1d6c158230880796", size = 4954103, upload-time = "2025-12-12T17:30:08.432Z" },
+ { url = "https://files.pythonhosted.org/packages/6c/44/f3aeac0fa98e7ad527f479e161aca6c3a1e47bb6996b053d45226fe37bf2/fonttools-4.61.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:15acc09befd16a0fb8a8f62bc147e1a82817542d72184acca9ce6e0aeda9fa6d", size = 5004295, upload-time = "2025-12-12T17:30:10.56Z" },
+ { url = "https://files.pythonhosted.org/packages/14/e8/7424ced75473983b964d09f6747fa09f054a6d656f60e9ac9324cf40c743/fonttools-4.61.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e6bcdf33aec38d16508ce61fd81838f24c83c90a1d1b8c68982857038673d6b8", size = 4944109, upload-time = "2025-12-12T17:30:12.874Z" },
+ { url = "https://files.pythonhosted.org/packages/c8/8b/6391b257fa3d0b553d73e778f953a2f0154292a7a7a085e2374b111e5410/fonttools-4.61.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:5fade934607a523614726119164ff621e8c30e8fa1ffffbbd358662056ba69f0", size = 5093598, upload-time = "2025-12-12T17:30:15.79Z" },
+ { url = "https://files.pythonhosted.org/packages/d9/71/fd2ea96cdc512d92da5678a1c98c267ddd4d8c5130b76d0f7a80f9a9fde8/fonttools-4.61.1-cp312-cp312-win32.whl", hash = "sha256:75da8f28eff26defba42c52986de97b22106cb8f26515b7c22443ebc9c2d3261", size = 2269060, upload-time = "2025-12-12T17:30:18.058Z" },
+ { url = "https://files.pythonhosted.org/packages/80/3b/a3e81b71aed5a688e89dfe0e2694b26b78c7d7f39a5ffd8a7d75f54a12a8/fonttools-4.61.1-cp312-cp312-win_amd64.whl", hash = "sha256:497c31ce314219888c0e2fce5ad9178ca83fe5230b01a5006726cdf3ac9f24d9", size = 2319078, upload-time = "2025-12-12T17:30:22.862Z" },
+ { url = "https://files.pythonhosted.org/packages/c7/4e/ce75a57ff3aebf6fc1f4e9d508b8e5810618a33d900ad6c19eb30b290b97/fonttools-4.61.1-py3-none-any.whl", hash = "sha256:17d2bf5d541add43822bcf0c43d7d847b160c9bb01d15d5007d84e2217aaa371", size = 1148996, upload-time = "2025-12-12T17:31:21.03Z" },
+]
+
+[[package]]
+name = "frozenlist"
+version = "1.8.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/2d/f5/c831fac6cc817d26fd54c7eaccd04ef7e0288806943f7cc5bbf69f3ac1f0/frozenlist-1.8.0.tar.gz", hash = "sha256:3ede829ed8d842f6cd48fc7081d7a41001a56f1f38603f9d49bf3020d59a31ad", size = 45875, upload-time = "2025-10-06T05:38:17.865Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/69/29/948b9aa87e75820a38650af445d2ef2b6b8a6fab1a23b6bb9e4ef0be2d59/frozenlist-1.8.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:78f7b9e5d6f2fdb88cdde9440dc147259b62b9d3b019924def9f6478be254ac1", size = 87782, upload-time = "2025-10-06T05:36:06.649Z" },
+ { url = "https://files.pythonhosted.org/packages/64/80/4f6e318ee2a7c0750ed724fa33a4bdf1eacdc5a39a7a24e818a773cd91af/frozenlist-1.8.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:229bf37d2e4acdaf808fd3f06e854a4a7a3661e871b10dc1f8f1896a3b05f18b", size = 50594, upload-time = "2025-10-06T05:36:07.69Z" },
+ { url = "https://files.pythonhosted.org/packages/2b/94/5c8a2b50a496b11dd519f4a24cb5496cf125681dd99e94c604ccdea9419a/frozenlist-1.8.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f833670942247a14eafbb675458b4e61c82e002a148f49e68257b79296e865c4", size = 50448, upload-time = "2025-10-06T05:36:08.78Z" },
+ { url = "https://files.pythonhosted.org/packages/6a/bd/d91c5e39f490a49df14320f4e8c80161cfcce09f1e2cde1edd16a551abb3/frozenlist-1.8.0-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:494a5952b1c597ba44e0e78113a7266e656b9794eec897b19ead706bd7074383", size = 242411, upload-time = "2025-10-06T05:36:09.801Z" },
+ { url = "https://files.pythonhosted.org/packages/8f/83/f61505a05109ef3293dfb1ff594d13d64a2324ac3482be2cedc2be818256/frozenlist-1.8.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:96f423a119f4777a4a056b66ce11527366a8bb92f54e541ade21f2374433f6d4", size = 243014, upload-time = "2025-10-06T05:36:11.394Z" },
+ { url = "https://files.pythonhosted.org/packages/d8/cb/cb6c7b0f7d4023ddda30cf56b8b17494eb3a79e3fda666bf735f63118b35/frozenlist-1.8.0-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:3462dd9475af2025c31cc61be6652dfa25cbfb56cbbf52f4ccfe029f38decaf8", size = 234909, upload-time = "2025-10-06T05:36:12.598Z" },
+ { url = "https://files.pythonhosted.org/packages/31/c5/cd7a1f3b8b34af009fb17d4123c5a778b44ae2804e3ad6b86204255f9ec5/frozenlist-1.8.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c4c800524c9cd9bac5166cd6f55285957fcfc907db323e193f2afcd4d9abd69b", size = 250049, upload-time = "2025-10-06T05:36:14.065Z" },
+ { url = "https://files.pythonhosted.org/packages/c0/01/2f95d3b416c584a1e7f0e1d6d31998c4a795f7544069ee2e0962a4b60740/frozenlist-1.8.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d6a5df73acd3399d893dafc71663ad22534b5aa4f94e8a2fabfe856c3c1b6a52", size = 256485, upload-time = "2025-10-06T05:36:15.39Z" },
+ { url = "https://files.pythonhosted.org/packages/ce/03/024bf7720b3abaebcff6d0793d73c154237b85bdf67b7ed55e5e9596dc9a/frozenlist-1.8.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:405e8fe955c2280ce66428b3ca55e12b3c4e9c336fb2103a4937e891c69a4a29", size = 237619, upload-time = "2025-10-06T05:36:16.558Z" },
+ { url = "https://files.pythonhosted.org/packages/69/fa/f8abdfe7d76b731f5d8bd217827cf6764d4f1d9763407e42717b4bed50a0/frozenlist-1.8.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:908bd3f6439f2fef9e85031b59fd4f1297af54415fb60e4254a95f75b3cab3f3", size = 250320, upload-time = "2025-10-06T05:36:17.821Z" },
+ { url = "https://files.pythonhosted.org/packages/f5/3c/b051329f718b463b22613e269ad72138cc256c540f78a6de89452803a47d/frozenlist-1.8.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:294e487f9ec720bd8ffcebc99d575f7eff3568a08a253d1ee1a0378754b74143", size = 246820, upload-time = "2025-10-06T05:36:19.046Z" },
+ { url = "https://files.pythonhosted.org/packages/0f/ae/58282e8f98e444b3f4dd42448ff36fa38bef29e40d40f330b22e7108f565/frozenlist-1.8.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:74c51543498289c0c43656701be6b077f4b265868fa7f8a8859c197006efb608", size = 250518, upload-time = "2025-10-06T05:36:20.763Z" },
+ { url = "https://files.pythonhosted.org/packages/8f/96/007e5944694d66123183845a106547a15944fbbb7154788cbf7272789536/frozenlist-1.8.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:776f352e8329135506a1d6bf16ac3f87bc25b28e765949282dcc627af36123aa", size = 239096, upload-time = "2025-10-06T05:36:22.129Z" },
+ { url = "https://files.pythonhosted.org/packages/66/bb/852b9d6db2fa40be96f29c0d1205c306288f0684df8fd26ca1951d461a56/frozenlist-1.8.0-cp312-cp312-win32.whl", hash = "sha256:433403ae80709741ce34038da08511d4a77062aa924baf411ef73d1146e74faf", size = 39985, upload-time = "2025-10-06T05:36:23.661Z" },
+ { url = "https://files.pythonhosted.org/packages/b8/af/38e51a553dd66eb064cdf193841f16f077585d4d28394c2fa6235cb41765/frozenlist-1.8.0-cp312-cp312-win_amd64.whl", hash = "sha256:34187385b08f866104f0c0617404c8eb08165ab1272e884abc89c112e9c00746", size = 44591, upload-time = "2025-10-06T05:36:24.958Z" },
+ { url = "https://files.pythonhosted.org/packages/a7/06/1dc65480ab147339fecc70797e9c2f69d9cea9cf38934ce08df070fdb9cb/frozenlist-1.8.0-cp312-cp312-win_arm64.whl", hash = "sha256:fe3c58d2f5db5fbd18c2987cba06d51b0529f52bc3a6cdc33d3f4eab725104bd", size = 40102, upload-time = "2025-10-06T05:36:26.333Z" },
+ { url = "https://files.pythonhosted.org/packages/9a/9a/e35b4a917281c0b8419d4207f4334c8e8c5dbf4f3f5f9ada73958d937dcc/frozenlist-1.8.0-py3-none-any.whl", hash = "sha256:0c18a16eab41e82c295618a77502e17b195883241c563b00f0aa5106fc4eaa0d", size = 13409, upload-time = "2025-10-06T05:38:16.721Z" },
+]
+
+[[package]]
+name = "fsspec"
+version = "2025.10.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/24/7f/2747c0d332b9acfa75dc84447a066fdf812b5a6b8d30472b74d309bfe8cb/fsspec-2025.10.0.tar.gz", hash = "sha256:b6789427626f068f9a83ca4e8a3cc050850b6c0f71f99ddb4f542b8266a26a59", size = 309285, upload-time = "2025-10-30T14:58:44.036Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/eb/02/a6b21098b1d5d6249b7c5ab69dde30108a71e4e819d4a9778f1de1d5b70d/fsspec-2025.10.0-py3-none-any.whl", hash = "sha256:7c7712353ae7d875407f97715f0e1ffcc21e33d5b24556cb1e090ae9409ec61d", size = 200966, upload-time = "2025-10-30T14:58:42.53Z" },
+]
+
+[package.optional-dependencies]
+http = [
+ { name = "aiohttp" },
+]
+
+[[package]]
+name = "gguf"
+version = "0.18.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "numpy" },
+ { name = "pyyaml" },
+ { name = "requests" },
+ { name = "tqdm" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/3f/26/7622a41c39db9d7090225a4bf8368550e59694dcf7313b44f9a82b501209/gguf-0.18.0.tar.gz", hash = "sha256:b4659093d5d0dccdb5902a904d54b327f4052879fe5e90946ad5fce9f8018c2e", size = 107170, upload-time = "2026-02-27T15:05:39.254Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/5e/0c/e0f1eae7535a97476fb903f65301e35da2a66182b8161066b7eb312b2cb8/gguf-0.18.0-py3-none-any.whl", hash = "sha256:af93f7ef198a265cbde5fa6a6b3101528bca285903949ab0a3e591cd993a1864", size = 114244, upload-time = "2026-02-27T15:05:37.991Z" },
+]
+
+[[package]]
+name = "google-auth"
+version = "2.48.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "cryptography" },
+ { name = "pyasn1-modules" },
+ { name = "rsa" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/0c/41/242044323fbd746615884b1c16639749e73665b718209946ebad7ba8a813/google_auth-2.48.0.tar.gz", hash = "sha256:4f7e706b0cd3208a3d940a19a822c37a476ddba5450156c3e6624a71f7c841ce", size = 326522, upload-time = "2026-01-26T19:22:47.157Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/83/1d/d6466de3a5249d35e832a52834115ca9d1d0de6abc22065f049707516d47/google_auth-2.48.0-py3-none-any.whl", hash = "sha256:2e2a537873d449434252a9632c28bfc268b0adb1e53f9fb62afc5333a975903f", size = 236499, upload-time = "2026-01-26T19:22:45.099Z" },
+]
+
+[package.optional-dependencies]
+requests = [
+ { name = "requests" },
+]
+
+[[package]]
+name = "google-genai"
+version = "1.65.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "anyio" },
+ { name = "distro" },
+ { name = "google-auth", extra = ["requests"] },
+ { name = "httpx" },
+ { name = "pydantic" },
+ { name = "requests" },
+ { name = "sniffio" },
+ { name = "tenacity" },
+ { name = "typing-extensions" },
+ { name = "websockets" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/79/f9/cc1191c2540d6a4e24609a586c4ed45d2db57cfef47931c139ee70e5874a/google_genai-1.65.0.tar.gz", hash = "sha256:d470eb600af802d58a79c7f13342d9ea0d05d965007cae8f76c7adff3d7a4750", size = 497206, upload-time = "2026-02-26T00:20:33.824Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/68/3c/3fea4e7c91357c71782d7dcaad7a2577d636c90317e003386893c25bc62c/google_genai-1.65.0-py3-none-any.whl", hash = "sha256:68c025205856919bc03edb0155c11b4b833810b7ce17ad4b7a9eeba5158f6c44", size = 724429, upload-time = "2026-02-26T00:20:32.186Z" },
+]
+
+[[package]]
+name = "googleapis-common-protos"
+version = "1.72.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "protobuf" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/e5/7b/adfd75544c415c487b33061fe7ae526165241c1ea133f9a9125a56b39fd8/googleapis_common_protos-1.72.0.tar.gz", hash = "sha256:e55a601c1b32b52d7a3e65f43563e2aa61bcd737998ee672ac9b951cd49319f5", size = 147433, upload-time = "2025-11-06T18:29:24.087Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/c4/ab/09169d5a4612a5f92490806649ac8d41e3ec9129c636754575b3553f4ea4/googleapis_common_protos-1.72.0-py3-none-any.whl", hash = "sha256:4299c5a82d5ae1a9702ada957347726b167f9f8d1fc352477702a1e851ff4038", size = 297515, upload-time = "2025-11-06T18:29:13.14Z" },
+]
+
+[[package]]
+name = "grpcio"
+version = "1.78.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/1f/de/de568532d9907552700f80dcec38219d8d298ad9e71f5e0a095abaf2761e/grpcio-1.78.1.tar.gz", hash = "sha256:27c625532d33ace45d57e775edf1982e183ff8641c72e4e91ef7ba667a149d72", size = 12835760, upload-time = "2026-02-20T01:16:10.869Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/ab/ed/d2eb9d27fded1a76b2a80eb9aa8b12101da7e41ce2bac0ad3651e88a14ae/grpcio-1.78.1-cp312-cp312-linux_armv7l.whl", hash = "sha256:41e4605c923e0e9a84a2718e4948a53a530172bfaf1a6d1ded16ef9c5849fca2", size = 5913389, upload-time = "2026-02-20T01:13:49.005Z" },
+ { url = "https://files.pythonhosted.org/packages/69/1b/40034e9ab010eeb3fa41ec61d8398c6dbf7062f3872c866b8f72700e2522/grpcio-1.78.1-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:39da1680d260c0c619c3b5fa2dc47480ca24d5704c7a548098bca7de7f5dd17f", size = 11811839, upload-time = "2026-02-20T01:13:51.839Z" },
+ { url = "https://files.pythonhosted.org/packages/b4/69/fe16ef2979ea62b8aceb3a3f1e7a8bbb8b717ae2a44b5899d5d426073273/grpcio-1.78.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:b5d5881d72a09b8336a8f874784a8eeffacde44a7bc1a148bce5a0243a265ef0", size = 6475805, upload-time = "2026-02-20T01:13:55.423Z" },
+ { url = "https://files.pythonhosted.org/packages/5b/1e/069e0a9062167db18446917d7c00ae2e91029f96078a072bedc30aaaa8c3/grpcio-1.78.1-cp312-cp312-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:888ceb7821acd925b1c90f0cdceaed1386e69cfe25e496e0771f6c35a156132f", size = 7169955, upload-time = "2026-02-20T01:13:59.553Z" },
+ { url = "https://files.pythonhosted.org/packages/38/fc/44a57e2bb4a755e309ee4e9ed2b85c9af93450b6d3118de7e69410ee05fa/grpcio-1.78.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:8942bdfc143b467c264b048862090c4ba9a0223c52ae28c9ae97754361372e42", size = 6690767, upload-time = "2026-02-20T01:14:02.31Z" },
+ { url = "https://files.pythonhosted.org/packages/b8/87/21e16345d4c75046d453916166bc72a3309a382c8e97381ec4b8c1a54729/grpcio-1.78.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:716a544969660ed609164aff27b2effd3ff84e54ac81aa4ce77b1607ca917d22", size = 7266846, upload-time = "2026-02-20T01:14:12.974Z" },
+ { url = "https://files.pythonhosted.org/packages/11/df/d6261983f9ca9ef4d69893765007a9a3211b91d9faf85a2591063df381c7/grpcio-1.78.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:4d50329b081c223d444751076bb5b389d4f06c2b32d51b31a1e98172e6cecfb9", size = 8253522, upload-time = "2026-02-20T01:14:17.407Z" },
+ { url = "https://files.pythonhosted.org/packages/de/7c/4f96a0ff113c5d853a27084d7590cd53fdb05169b596ea9f5f27f17e021e/grpcio-1.78.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:7e836778c13ff70edada16567e8da0c431e8818eaae85b80d11c1ba5782eccbb", size = 7698070, upload-time = "2026-02-20T01:14:20.032Z" },
+ { url = "https://files.pythonhosted.org/packages/17/3c/7b55c0b5af88fbeb3d0c13e25492d3ace41ac9dbd0f5f8f6c0fb613b6706/grpcio-1.78.1-cp312-cp312-win32.whl", hash = "sha256:07eb016ea7444a22bef465cce045512756956433f54450aeaa0b443b8563b9ca", size = 4066474, upload-time = "2026-02-20T01:14:22.602Z" },
+ { url = "https://files.pythonhosted.org/packages/5d/17/388c12d298901b0acf10b612b650692bfed60e541672b1d8965acbf2d722/grpcio-1.78.1-cp312-cp312-win_amd64.whl", hash = "sha256:02b82dcd2fa580f5e82b4cf62ecde1b3c7cc9ba27b946421200706a6e5acaf85", size = 4797537, upload-time = "2026-02-20T01:14:25.444Z" },
+]
+
+[[package]]
+name = "grpcio-reflection"
+version = "1.71.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "grpcio" },
+ { name = "protobuf" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/41/14/4e5f8e902fa9461abae292773b921a578f68333c7c3e731bcff7514f78cd/grpcio_reflection-1.71.2.tar.gz", hash = "sha256:bedfac3d2095d6c066b16b66bfce85b4be3e92dc9f3b7121e6f019d24a9c09c0", size = 18798, upload-time = "2025-06-28T04:24:06.019Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/a3/89/c99ff79b90315cf47dbcdd86babb637764e5f14f523d622020bfee57dc4d/grpcio_reflection-1.71.2-py3-none-any.whl", hash = "sha256:c4f1a0959acb94ec9e1369bb7dab827cc9a6efcc448bdb10436246c8e52e2f57", size = 22684, upload-time = "2025-06-28T04:23:44.759Z" },
+]
+
+[[package]]
+name = "h11"
+version = "0.16.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/01/ee/02a2c011bdab74c6fb3c75474d40b3052059d95df7e73351460c8588d963/h11-0.16.0.tar.gz", hash = "sha256:4e35b956cf45792e4caa5885e69fba00bdbc6ffafbfa020300e549b208ee5ff1", size = 101250, upload-time = "2025-04-24T03:35:25.427Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515, upload-time = "2025-04-24T03:35:24.344Z" },
+]
+
+[[package]]
+name = "hf-xet"
+version = "1.3.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/4f/3a/9aa61729228fb03e946409c51963f0cd2fd7c109f4ab93edc5f04a10be86/hf_xet-1.3.0.tar.gz", hash = "sha256:9c154ad63e17aca970987b2cf17dbd8a0c09bb18aeb246f637647a8058e4522b", size = 641390, upload-time = "2026-02-24T00:16:19.935Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/a1/00/22d3d896466ded4c46ef6465b85fa434fa97d79f8f61cea322afde1d6157/hf_xet-1.3.0-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:df4447f69086dcc6418583315eda6ed09033ac1fbbc784fedcbbbdf67bea1680", size = 3761293, upload-time = "2026-02-24T00:16:06.012Z" },
+ { url = "https://files.pythonhosted.org/packages/97/fd/ebb0ea49e9bd9eb9f52844e417e0e6e9c8a59a1e84790691873fa910adc5/hf_xet-1.3.0-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:39f4fe714628adc2214ab4a67391182ee751bc4db581868cb3204900817758a8", size = 3523345, upload-time = "2026-02-24T00:16:04.615Z" },
+ { url = "https://files.pythonhosted.org/packages/8a/bb/72ceaaf619cad23d151a281d52e15456bae72f52c3795e820c0b64a5f637/hf_xet-1.3.0-cp37-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:9b16e53ed6b5c8197cefb3fd12047a430b7034428effed463c03cec68de7e9a3", size = 4178623, upload-time = "2026-02-24T00:15:57.857Z" },
+ { url = "https://files.pythonhosted.org/packages/19/30/3280f4b5e407b442923a80ac0b2d96a65be7494457c55695e63f9a2b33dd/hf_xet-1.3.0-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:92051a1f73019489be77f6837671024ec785a3d1b888466b09d3a9ea15c4a1b5", size = 3958884, upload-time = "2026-02-24T00:15:56.326Z" },
+ { url = "https://files.pythonhosted.org/packages/8f/13/5174c6d52583e54a761c88570ca657d621ac684747613f47846debfd6d4d/hf_xet-1.3.0-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:943046b160e7804a85e68a659d2eee1a83ce3661f72d1294d3cc5ece0f45a355", size = 4158146, upload-time = "2026-02-24T00:16:13.158Z" },
+ { url = "https://files.pythonhosted.org/packages/12/13/ea8619021b119e19efdcaeec72f762b5be923cf79b5d4434f2cbbff39829/hf_xet-1.3.0-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:9b798a95d41b4f33b0b455c8aa76ff1fd26a587a4dd3bdec29f0a37c60b78a2f", size = 4395565, upload-time = "2026-02-24T00:16:14.574Z" },
+ { url = "https://files.pythonhosted.org/packages/64/cd/b81d922118a171bfbbecffd60a477e79188ab876260412fac47226a685bf/hf_xet-1.3.0-cp37-abi3-win_amd64.whl", hash = "sha256:227eee5b99d19b9f20c31d901a0c2373af610a24a34e6c2701072c9de48d6d95", size = 3637830, upload-time = "2026-02-24T00:16:22.474Z" },
+]
+
+[[package]]
+name = "httpcore"
+version = "1.0.9"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "certifi" },
+ { name = "h11" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/06/94/82699a10bca87a5556c9c59b5963f2d039dbd239f25bc2a63907a05a14cb/httpcore-1.0.9.tar.gz", hash = "sha256:6e34463af53fd2ab5d807f399a9b45ea31c3dfa2276f15a2c3f00afff6e176e8", size = 85484, upload-time = "2025-04-24T22:06:22.219Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/7e/f5/f66802a942d491edb555dd61e3a9961140fd64c90bce1eafd741609d334d/httpcore-1.0.9-py3-none-any.whl", hash = "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55", size = 78784, upload-time = "2025-04-24T22:06:20.566Z" },
+]
+
+[[package]]
+name = "httptools"
+version = "0.7.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/b5/46/120a669232c7bdedb9d52d4aeae7e6c7dfe151e99dc70802e2fc7a5e1993/httptools-0.7.1.tar.gz", hash = "sha256:abd72556974f8e7c74a259655924a717a2365b236c882c3f6f8a45fe94703ac9", size = 258961, upload-time = "2025-10-10T03:55:08.559Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/53/7f/403e5d787dc4942316e515e949b0c8a013d84078a915910e9f391ba9b3ed/httptools-0.7.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:38e0c83a2ea9746ebbd643bdfb521b9aa4a91703e2cd705c20443405d2fd16a5", size = 206280, upload-time = "2025-10-10T03:54:39.274Z" },
+ { url = "https://files.pythonhosted.org/packages/2a/0d/7f3fd28e2ce311ccc998c388dd1c53b18120fda3b70ebb022b135dc9839b/httptools-0.7.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f25bbaf1235e27704f1a7b86cd3304eabc04f569c828101d94a0e605ef7205a5", size = 110004, upload-time = "2025-10-10T03:54:40.403Z" },
+ { url = "https://files.pythonhosted.org/packages/84/a6/b3965e1e146ef5762870bbe76117876ceba51a201e18cc31f5703e454596/httptools-0.7.1-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:2c15f37ef679ab9ecc06bfc4e6e8628c32a8e4b305459de7cf6785acd57e4d03", size = 517655, upload-time = "2025-10-10T03:54:41.347Z" },
+ { url = "https://files.pythonhosted.org/packages/11/7d/71fee6f1844e6fa378f2eddde6c3e41ce3a1fb4b2d81118dd544e3441ec0/httptools-0.7.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7fe6e96090df46b36ccfaf746f03034e5ab723162bc51b0a4cf58305324036f2", size = 511440, upload-time = "2025-10-10T03:54:42.452Z" },
+ { url = "https://files.pythonhosted.org/packages/22/a5/079d216712a4f3ffa24af4a0381b108aa9c45b7a5cc6eb141f81726b1823/httptools-0.7.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:f72fdbae2dbc6e68b8239defb48e6a5937b12218e6ffc2c7846cc37befa84362", size = 495186, upload-time = "2025-10-10T03:54:43.937Z" },
+ { url = "https://files.pythonhosted.org/packages/e9/9e/025ad7b65278745dee3bd0ebf9314934c4592560878308a6121f7f812084/httptools-0.7.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e99c7b90a29fd82fea9ef57943d501a16f3404d7b9ee81799d41639bdaae412c", size = 499192, upload-time = "2025-10-10T03:54:45.003Z" },
+ { url = "https://files.pythonhosted.org/packages/6d/de/40a8f202b987d43afc4d54689600ff03ce65680ede2f31df348d7f368b8f/httptools-0.7.1-cp312-cp312-win_amd64.whl", hash = "sha256:3e14f530fefa7499334a79b0cf7e7cd2992870eb893526fb097d51b4f2d0f321", size = 86694, upload-time = "2025-10-10T03:54:45.923Z" },
+]
+
+[[package]]
+name = "httpx"
+version = "0.28.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "anyio" },
+ { name = "certifi" },
+ { name = "httpcore" },
+ { name = "idna" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/b1/df/48c586a5fe32a0f01324ee087459e112ebb7224f646c0b5023f5e79e9956/httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc", size = 141406, upload-time = "2024-12-06T15:37:23.222Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517, upload-time = "2024-12-06T15:37:21.509Z" },
+]
+
+[[package]]
+name = "httpx-sse"
+version = "0.4.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/0f/4c/751061ffa58615a32c31b2d82e8482be8dd4a89154f003147acee90f2be9/httpx_sse-0.4.3.tar.gz", hash = "sha256:9b1ed0127459a66014aec3c56bebd93da3c1bc8bb6618c8082039a44889a755d", size = 15943, upload-time = "2025-10-10T21:48:22.271Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/d2/fd/6668e5aec43ab844de6fc74927e155a3b37bf40d7c3790e49fc0406b6578/httpx_sse-0.4.3-py3-none-any.whl", hash = "sha256:0ac1c9fe3c0afad2e0ebb25a934a59f4c7823b60792691f779fad2c5568830fc", size = 8960, upload-time = "2025-10-10T21:48:21.158Z" },
+]
+
+[[package]]
+name = "huggingface-hub"
+version = "0.36.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "filelock" },
+ { name = "fsspec" },
+ { name = "hf-xet", marker = "platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'" },
+ { name = "packaging" },
+ { name = "pyyaml" },
+ { name = "requests" },
+ { name = "tqdm" },
+ { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/7c/b7/8cb61d2eece5fb05a83271da168186721c450eb74e3c31f7ef3169fa475b/huggingface_hub-0.36.2.tar.gz", hash = "sha256:1934304d2fb224f8afa3b87007d58501acfda9215b334eed53072dd5e815ff7a", size = 649782, upload-time = "2026-02-06T09:24:13.098Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/a8/af/48ac8483240de756d2438c380746e7130d1c6f75802ef22f3c6d49982787/huggingface_hub-0.36.2-py3-none-any.whl", hash = "sha256:48f0c8eac16145dfce371e9d2d7772854a4f591bcb56c9cf548accf531d54270", size = 566395, upload-time = "2026-02-06T09:24:11.133Z" },
+]
+
+[[package]]
+name = "idna"
+version = "3.11"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/6f/6d/0703ccc57f3a7233505399edb88de3cbd678da106337b9fcde432b65ed60/idna-3.11.tar.gz", hash = "sha256:795dafcc9c04ed0c1fb032c2aa73654d8e8c5023a7df64a53f39190ada629902", size = 194582, upload-time = "2025-10-12T14:55:20.501Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/0e/61/66938bbb5fc52dbdf84594873d5b51fb1f7c7794e9c0f5bd885f30bc507b/idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea", size = 71008, upload-time = "2025-10-12T14:55:18.883Z" },
+]
+
+[[package]]
+name = "ijson"
+version = "3.5.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/f4/57/60d1a6a512f2f0508d0bc8b4f1cc5616fd3196619b66bd6a01f9155a1292/ijson-3.5.0.tar.gz", hash = "sha256:94688760720e3f5212731b3cb8d30267f9a045fb38fb3870254e7b9504246f31", size = 68658, upload-time = "2026-02-24T03:58:30.974Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/aa/17/9c63c7688025f3a8c47ea717b8306649c8c7244e49e20a2be4e3515dc75c/ijson-3.5.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:1ebefbe149a6106cc848a3eaf536af51a9b5ccc9082de801389f152dba6ab755", size = 88536, upload-time = "2026-02-24T03:57:06.809Z" },
+ { url = "https://files.pythonhosted.org/packages/6f/dd/e15c2400244c117b06585452ebc63ae254f5a6964f712306afd1422daae0/ijson-3.5.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:19e30d9f00f82e64de689c0b8651b9cfed879c184b139d7e1ea5030cec401c21", size = 60499, upload-time = "2026-02-24T03:57:09.155Z" },
+ { url = "https://files.pythonhosted.org/packages/77/a9/bf4fe3538a0c965f16b406f180a06105b875da83f0743e36246be64ef550/ijson-3.5.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a04a33ee78a6f27b9b8528c1ca3c207b1df3b8b867a4cf2fcc4109986f35c227", size = 60330, upload-time = "2026-02-24T03:57:10.574Z" },
+ { url = "https://files.pythonhosted.org/packages/31/76/6f91bdb019dd978fce1bc5ea1cd620cfc096d258126c91db2c03a20a7f34/ijson-3.5.0-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:7d48dc2984af02eb3c56edfb3f13b3f62f2f3e4fe36f058c8cfc75d93adf4fed", size = 138977, upload-time = "2026-02-24T03:57:11.932Z" },
+ { url = "https://files.pythonhosted.org/packages/11/be/bbc983059e48a54b0121ee60042979faed7674490bbe7b2c41560db3f436/ijson-3.5.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f1e73a44844d9adbca9cf2c4132cd875933e83f3d4b23881fcaf82be83644c7d", size = 149785, upload-time = "2026-02-24T03:57:13.255Z" },
+ { url = "https://files.pythonhosted.org/packages/6d/81/2fee58f9024a3449aee83edfa7167fb5ccd7e1af2557300e28531bb68e16/ijson-3.5.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7389a56b8562a19948bdf1d7bae3a2edc8c7f86fb59834dcb1c4c722818e645a", size = 149729, upload-time = "2026-02-24T03:57:14.191Z" },
+ { url = "https://files.pythonhosted.org/packages/c7/56/f1706761fcc096c9d414b3dcd000b1e6e5c24364c21cfba429837f98ee8d/ijson-3.5.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3176f23f8ebec83f374ed0c3b4e5a0c4db7ede54c005864efebbed46da123608", size = 150697, upload-time = "2026-02-24T03:57:15.855Z" },
+ { url = "https://files.pythonhosted.org/packages/d9/6e/ee0d9c875a0193b632b3e9ccd1b22a50685fb510256ad57ba483b6529f77/ijson-3.5.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:6babd88e508630c6ef86c9bebaaf13bb2fb8ec1d8f8868773a03c20253f599bc", size = 142873, upload-time = "2026-02-24T03:57:16.831Z" },
+ { url = "https://files.pythonhosted.org/packages/d2/bf/f9d4399d0e6e3fd615035290a71e97c843f17f329b43638c0a01cf112d73/ijson-3.5.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:dc1b3836b174b6db2fa8319f1926fb5445abd195dc963368092103f8579cb8ed", size = 151583, upload-time = "2026-02-24T03:57:17.757Z" },
+ { url = "https://files.pythonhosted.org/packages/b2/71/a7254a065933c0e2ffd3586f46187d84830d3d7b6f41cfa5901820a4f87d/ijson-3.5.0-cp312-cp312-win32.whl", hash = "sha256:6673de9395fb9893c1c79a43becd8c8fbee0a250be6ea324bfd1487bb5e9ee4c", size = 53079, upload-time = "2026-02-24T03:57:18.703Z" },
+ { url = "https://files.pythonhosted.org/packages/8f/7b/2edca79b359fc9f95d774616867a03ecccdf333797baf5b3eea79733918c/ijson-3.5.0-cp312-cp312-win_amd64.whl", hash = "sha256:f4f7fabd653459dcb004175235f310435959b1bb5dfa8878578391c6cc9ad944", size = 55500, upload-time = "2026-02-24T03:57:20.428Z" },
+]
+
+[[package]]
+name = "importlib-metadata"
+version = "8.7.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "zipp" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/f3/49/3b30cad09e7771a4982d9975a8cbf64f00d4a1ececb53297f1d9a7be1b10/importlib_metadata-8.7.1.tar.gz", hash = "sha256:49fef1ae6440c182052f407c8d34a68f72efc36db9ca90dc0113398f2fdde8bb", size = 57107, upload-time = "2025-12-21T10:00:19.278Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/fa/5e/f8e9a1d23b9c20a551a8a02ea3637b4642e22c2626e3a13a9a29cdea99eb/importlib_metadata-8.7.1-py3-none-any.whl", hash = "sha256:5a1f80bf1daa489495071efbb095d75a634cf28a8bc299581244063b53176151", size = 27865, upload-time = "2025-12-21T10:00:18.329Z" },
+]
+
+[[package]]
+name = "interegular"
+version = "0.3.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/dc/9d/8b6dde58a028a3962ce17e84d5fe73758df61378e00ef8ac3d85da34b0ff/interegular-0.3.3.tar.gz", hash = "sha256:d9b697b21b34884711399ba0f0376914b81899ce670032486d0d048344a76600", size = 24705, upload-time = "2024-01-06T23:01:22.372Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/c4/01/72d6472f80651673716d1deda2a5bbb633e563ecf94f4479da5519d69d25/interegular-0.3.3-py37-none-any.whl", hash = "sha256:b0c07007d48c89d6d19f7204972d369b2a77222722e126b6aa63aa721dc3b19c", size = 23635, upload-time = "2024-01-06T23:01:20.829Z" },
+]
+
+[[package]]
+name = "jinja2"
+version = "3.1.6"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "markupsafe" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/df/bf/f7da0350254c0ed7c72f3e33cef02e048281fec7ecec5f032d4aac52226b/jinja2-3.1.6.tar.gz", hash = "sha256:0137fb05990d35f1275a587e9aee6d56da821fc83491a0fb838183be43f66d6d", size = 245115, upload-time = "2025-03-05T20:05:02.478Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67", size = 134899, upload-time = "2025-03-05T20:05:00.369Z" },
+]
+
+[[package]]
+name = "jiter"
+version = "0.13.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/0d/5e/4ec91646aee381d01cdb9974e30882c9cd3b8c5d1079d6b5ff4af522439a/jiter-0.13.0.tar.gz", hash = "sha256:f2839f9c2c7e2dffc1bc5929a510e14ce0a946be9365fd1219e7ef342dae14f4", size = 164847, upload-time = "2026-02-02T12:37:56.441Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/2e/30/7687e4f87086829955013ca12a9233523349767f69653ebc27036313def9/jiter-0.13.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:0a2bd69fc1d902e89925fc34d1da51b2128019423d7b339a45d9e99c894e0663", size = 307958, upload-time = "2026-02-02T12:35:57.165Z" },
+ { url = "https://files.pythonhosted.org/packages/c3/27/e57f9a783246ed95481e6749cc5002a8a767a73177a83c63ea71f0528b90/jiter-0.13.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f917a04240ef31898182f76a332f508f2cc4b57d2b4d7ad2dbfebbfe167eb505", size = 318597, upload-time = "2026-02-02T12:35:58.591Z" },
+ { url = "https://files.pythonhosted.org/packages/cf/52/e5719a60ac5d4d7c5995461a94ad5ef962a37c8bf5b088390e6fad59b2ff/jiter-0.13.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c1e2b199f446d3e82246b4fd9236d7cb502dc2222b18698ba0d986d2fecc6152", size = 348821, upload-time = "2026-02-02T12:36:00.093Z" },
+ { url = "https://files.pythonhosted.org/packages/61/db/c1efc32b8ba4c740ab3fc2d037d8753f67685f475e26b9d6536a4322bcdd/jiter-0.13.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:04670992b576fa65bd056dbac0c39fe8bd67681c380cb2b48efa885711d9d726", size = 364163, upload-time = "2026-02-02T12:36:01.937Z" },
+ { url = "https://files.pythonhosted.org/packages/55/8a/fb75556236047c8806995671a18e4a0ad646ed255276f51a20f32dceaeec/jiter-0.13.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5a1aff1fbdb803a376d4d22a8f63f8e7ccbce0b4890c26cc7af9e501ab339ef0", size = 483709, upload-time = "2026-02-02T12:36:03.41Z" },
+ { url = "https://files.pythonhosted.org/packages/7e/16/43512e6ee863875693a8e6f6d532e19d650779d6ba9a81593ae40a9088ff/jiter-0.13.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3b3fb8c2053acaef8580809ac1d1f7481a0a0bdc012fd7f5d8b18fb696a5a089", size = 370480, upload-time = "2026-02-02T12:36:04.791Z" },
+ { url = "https://files.pythonhosted.org/packages/f8/4c/09b93e30e984a187bc8aaa3510e1ec8dcbdcd71ca05d2f56aac0492453aa/jiter-0.13.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bdaba7d87e66f26a2c45d8cbadcbfc4bf7884182317907baf39cfe9775bb4d93", size = 360735, upload-time = "2026-02-02T12:36:06.994Z" },
+ { url = "https://files.pythonhosted.org/packages/1a/1b/46c5e349019874ec5dfa508c14c37e29864ea108d376ae26d90bee238cd7/jiter-0.13.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:7b88d649135aca526da172e48083da915ec086b54e8e73a425ba50999468cc08", size = 391814, upload-time = "2026-02-02T12:36:08.368Z" },
+ { url = "https://files.pythonhosted.org/packages/15/9e/26184760e85baee7162ad37b7912797d2077718476bf91517641c92b3639/jiter-0.13.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:e404ea551d35438013c64b4f357b0474c7abf9f781c06d44fcaf7a14c69ff9e2", size = 513990, upload-time = "2026-02-02T12:36:09.993Z" },
+ { url = "https://files.pythonhosted.org/packages/e9/34/2c9355247d6debad57a0a15e76ab1566ab799388042743656e566b3b7de1/jiter-0.13.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:1f4748aad1b4a93c8bdd70f604d0f748cdc0e8744c5547798acfa52f10e79228", size = 548021, upload-time = "2026-02-02T12:36:11.376Z" },
+ { url = "https://files.pythonhosted.org/packages/ac/4a/9f2c23255d04a834398b9c2e0e665382116911dc4d06b795710503cdad25/jiter-0.13.0-cp312-cp312-win32.whl", hash = "sha256:0bf670e3b1445fc4d31612199f1744f67f889ee1bbae703c4b54dc097e5dd394", size = 203024, upload-time = "2026-02-02T12:36:12.682Z" },
+ { url = "https://files.pythonhosted.org/packages/09/ee/f0ae675a957ae5a8f160be3e87acea6b11dc7b89f6b7ab057e77b2d2b13a/jiter-0.13.0-cp312-cp312-win_amd64.whl", hash = "sha256:15db60e121e11fe186c0b15236bd5d18381b9ddacdcf4e659feb96fc6c969c92", size = 205424, upload-time = "2026-02-02T12:36:13.93Z" },
+ { url = "https://files.pythonhosted.org/packages/1b/02/ae611edf913d3cbf02c97cdb90374af2082c48d7190d74c1111dde08bcdd/jiter-0.13.0-cp312-cp312-win_arm64.whl", hash = "sha256:41f92313d17989102f3cb5dd533a02787cdb99454d494344b0361355da52fcb9", size = 186818, upload-time = "2026-02-02T12:36:15.308Z" },
+ { url = "https://files.pythonhosted.org/packages/80/60/e50fa45dd7e2eae049f0ce964663849e897300433921198aef94b6ffa23a/jiter-0.13.0-graalpy312-graalpy250_312_native-macosx_10_12_x86_64.whl", hash = "sha256:3d744a6061afba08dd7ae375dcde870cffb14429b7477e10f67e9e6d68772a0a", size = 305169, upload-time = "2026-02-02T12:37:50.376Z" },
+ { url = "https://files.pythonhosted.org/packages/d2/73/a009f41c5eed71c49bec53036c4b33555afcdee70682a18c6f66e396c039/jiter-0.13.0-graalpy312-graalpy250_312_native-macosx_11_0_arm64.whl", hash = "sha256:ff732bd0a0e778f43d5009840f20b935e79087b4dc65bd36f1cd0f9b04b8ff7f", size = 303808, upload-time = "2026-02-02T12:37:52.092Z" },
+ { url = "https://files.pythonhosted.org/packages/c4/10/528b439290763bff3d939268085d03382471b442f212dca4ff5f12802d43/jiter-0.13.0-graalpy312-graalpy250_312_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ab44b178f7981fcaea7e0a5df20e773c663d06ffda0198f1a524e91b2fde7e59", size = 337384, upload-time = "2026-02-02T12:37:53.582Z" },
+ { url = "https://files.pythonhosted.org/packages/67/8a/a342b2f0251f3dac4ca17618265d93bf244a2a4d089126e81e4c1056ac50/jiter-0.13.0-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7bb00b6d26db67a05fe3e12c76edc75f32077fb51deed13822dc648fa373bc19", size = 343768, upload-time = "2026-02-02T12:37:55.055Z" },
+]
+
+[[package]]
+name = "jmespath"
+version = "1.1.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/d3/59/322338183ecda247fb5d1763a6cbe46eff7222eaeebafd9fa65d4bf5cb11/jmespath-1.1.0.tar.gz", hash = "sha256:472c87d80f36026ae83c6ddd0f1d05d4e510134ed462851fd5f754c8c3cbb88d", size = 27377, upload-time = "2026-01-22T16:35:26.279Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/14/2f/967ba146e6d58cf6a652da73885f52fc68001525b4197effc174321d70b4/jmespath-1.1.0-py3-none-any.whl", hash = "sha256:a5663118de4908c91729bea0acadca56526eb2698e83de10cd116ae0f4e97c64", size = 20419, upload-time = "2026-01-22T16:35:24.919Z" },
+]
+
+[[package]]
+name = "joblib"
+version = "1.5.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/41/f2/d34e8b3a08a9cc79a50b2208a93dce981fe615b64d5a4d4abee421d898df/joblib-1.5.3.tar.gz", hash = "sha256:8561a3269e6801106863fd0d6d84bb737be9e7631e33aaed3fb9ce5953688da3", size = 331603, upload-time = "2025-12-15T08:41:46.427Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/7b/91/984aca2ec129e2757d1e4e3c81c3fcda9d0f85b74670a094cc443d9ee949/joblib-1.5.3-py3-none-any.whl", hash = "sha256:5fc3c5039fc5ca8c0276333a188bbd59d6b7ab37fe6632daa76bc7f9ec18e713", size = 309071, upload-time = "2025-12-15T08:41:44.973Z" },
+]
+
+[[package]]
+name = "jsonschema"
+version = "4.26.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "attrs" },
+ { name = "jsonschema-specifications" },
+ { name = "referencing" },
+ { name = "rpds-py" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/b3/fc/e067678238fa451312d4c62bf6e6cf5ec56375422aee02f9cb5f909b3047/jsonschema-4.26.0.tar.gz", hash = "sha256:0c26707e2efad8aa1bfc5b7ce170f3fccc2e4918ff85989ba9ffa9facb2be326", size = 366583, upload-time = "2026-01-07T13:41:07.246Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/69/90/f63fb5873511e014207a475e2bb4e8b2e570d655b00ac19a9a0ca0a385ee/jsonschema-4.26.0-py3-none-any.whl", hash = "sha256:d489f15263b8d200f8387e64b4c3a75f06629559fb73deb8fdfb525f2dab50ce", size = 90630, upload-time = "2026-01-07T13:41:05.306Z" },
+]
+
+[[package]]
+name = "jsonschema-specifications"
+version = "2025.9.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "referencing" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/19/74/a633ee74eb36c44aa6d1095e7cc5569bebf04342ee146178e2d36600708b/jsonschema_specifications-2025.9.1.tar.gz", hash = "sha256:b540987f239e745613c7a9176f3edb72b832a4ac465cf02712288397832b5e8d", size = 32855, upload-time = "2025-09-08T01:34:59.186Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/41/45/1a4ed80516f02155c51f51e8cedb3c1902296743db0bbc66608a0db2814f/jsonschema_specifications-2025.9.1-py3-none-any.whl", hash = "sha256:98802fee3a11ee76ecaca44429fda8a41bff98b00a0f2838151b113f210cc6fe", size = 18437, upload-time = "2025-09-08T01:34:57.871Z" },
+]
+
+[[package]]
+name = "kaldi-native-fbank"
+version = "1.22.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/3a/2c/84076b352107ce12d56f28c313f1aca1be332d953dd96aec7b84976e6d53/kaldi-native-fbank-1.22.3.tar.gz", hash = "sha256:387bf87225c6b83c93ae652eeaef1b4d531994b6e398e7a77189de340674f9af", size = 71013, upload-time = "2025-10-09T02:31:21.487Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/c2/de/fbdbfcc75fad9d9a6f9a250bc986f1002902581eaa47a5948f53a7f11851/kaldi_native_fbank-1.22.3-cp312-cp312-macosx_10_15_x86_64.whl", hash = "sha256:7f636ccdea28bd187f93b06a1e4b9275e42e43af9405b0684fc739e829299c4b", size = 249003, upload-time = "2025-10-09T02:29:48.509Z" },
+ { url = "https://files.pythonhosted.org/packages/77/64/e57ce185dda028b7b9af72cdfb16825bfa52183653945681e7cb8e7c2dfa/kaldi_native_fbank-1.22.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:abd31a8bfe1db62a7ddb0beee84f3a5de9bb559fcdd2b96ca0fb729c551b9412", size = 228933, upload-time = "2025-10-09T02:31:35.8Z" },
+ { url = "https://files.pythonhosted.org/packages/43/28/6f4fd8953c0b3f30de4526fd024095032abcdc25b6736c77a891687c604e/kaldi_native_fbank-1.22.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f5a44b4a83cf9bf13d3f77858928068b06d3ec2238c27ff2e39393fbf7749c9f", size = 298887, upload-time = "2025-10-09T02:30:53.739Z" },
+ { url = "https://files.pythonhosted.org/packages/84/90/01ef7331c52b1eaf9916f3f7a535155aac2e9e2ddad12a141613d92758c7/kaldi_native_fbank-1.22.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f16e74372fe9e20abb4183f98a8e2288d5ee4c48d04d94b6160311170e007661", size = 322002, upload-time = "2025-10-09T02:30:13.04Z" },
+ { url = "https://files.pythonhosted.org/packages/66/1c/fce142bd3aeadb1292360a90ceb91f923c8e12081c21576fe69917243c5f/kaldi_native_fbank-1.22.3-cp312-cp312-win32.whl", hash = "sha256:a90f51377569575fc0d1a66ef7e89a36102bfb6dcd1d15d6c4afb930ce726672", size = 273308, upload-time = "2025-10-09T02:29:59.931Z" },
+ { url = "https://files.pythonhosted.org/packages/cb/8d/c0b0b6280edabad85d7e15093fad612c027e175fe4e0b960ce2f36485143/kaldi_native_fbank-1.22.3-cp312-cp312-win_amd64.whl", hash = "sha256:cbbeea19fe6d584c54e93fe6615a7185b10e0d78fdb6471f9e44596018437c38", size = 308023, upload-time = "2025-10-09T02:28:43.909Z" },
+]
+
+[[package]]
+name = "kiwisolver"
+version = "1.4.9"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/5c/3c/85844f1b0feb11ee581ac23fe5fce65cd049a200c1446708cc1b7f922875/kiwisolver-1.4.9.tar.gz", hash = "sha256:c3b22c26c6fd6811b0ae8363b95ca8ce4ea3c202d3d0975b2914310ceb1bcc4d", size = 97564, upload-time = "2025-08-10T21:27:49.279Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/86/c9/13573a747838aeb1c76e3267620daa054f4152444d1f3d1a2324b78255b5/kiwisolver-1.4.9-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:ac5a486ac389dddcc5bef4f365b6ae3ffff2c433324fb38dd35e3fab7c957999", size = 123686, upload-time = "2025-08-10T21:26:10.034Z" },
+ { url = "https://files.pythonhosted.org/packages/51/ea/2ecf727927f103ffd1739271ca19c424d0e65ea473fbaeea1c014aea93f6/kiwisolver-1.4.9-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:f2ba92255faa7309d06fe44c3a4a97efe1c8d640c2a79a5ef728b685762a6fd2", size = 66460, upload-time = "2025-08-10T21:26:11.083Z" },
+ { url = "https://files.pythonhosted.org/packages/5b/5a/51f5464373ce2aeb5194508298a508b6f21d3867f499556263c64c621914/kiwisolver-1.4.9-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4a2899935e724dd1074cb568ce7ac0dce28b2cd6ab539c8e001a8578eb106d14", size = 64952, upload-time = "2025-08-10T21:26:12.058Z" },
+ { url = "https://files.pythonhosted.org/packages/70/90/6d240beb0f24b74371762873e9b7f499f1e02166a2d9c5801f4dbf8fa12e/kiwisolver-1.4.9-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f6008a4919fdbc0b0097089f67a1eb55d950ed7e90ce2cc3e640abadd2757a04", size = 1474756, upload-time = "2025-08-10T21:26:13.096Z" },
+ { url = "https://files.pythonhosted.org/packages/12/42/f36816eaf465220f683fb711efdd1bbf7a7005a2473d0e4ed421389bd26c/kiwisolver-1.4.9-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:67bb8b474b4181770f926f7b7d2f8c0248cbcb78b660fdd41a47054b28d2a752", size = 1276404, upload-time = "2025-08-10T21:26:14.457Z" },
+ { url = "https://files.pythonhosted.org/packages/2e/64/bc2de94800adc830c476dce44e9b40fd0809cddeef1fde9fcf0f73da301f/kiwisolver-1.4.9-cp312-cp312-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2327a4a30d3ee07d2fbe2e7933e8a37c591663b96ce42a00bc67461a87d7df77", size = 1294410, upload-time = "2025-08-10T21:26:15.73Z" },
+ { url = "https://files.pythonhosted.org/packages/5f/42/2dc82330a70aa8e55b6d395b11018045e58d0bb00834502bf11509f79091/kiwisolver-1.4.9-cp312-cp312-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:7a08b491ec91b1d5053ac177afe5290adacf1f0f6307d771ccac5de30592d198", size = 1343631, upload-time = "2025-08-10T21:26:17.045Z" },
+ { url = "https://files.pythonhosted.org/packages/22/fd/f4c67a6ed1aab149ec5a8a401c323cee7a1cbe364381bb6c9c0d564e0e20/kiwisolver-1.4.9-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:d8fc5c867c22b828001b6a38d2eaeb88160bf5783c6cb4a5e440efc981ce286d", size = 2224963, upload-time = "2025-08-10T21:26:18.737Z" },
+ { url = "https://files.pythonhosted.org/packages/45/aa/76720bd4cb3713314677d9ec94dcc21ced3f1baf4830adde5bb9b2430a5f/kiwisolver-1.4.9-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:3b3115b2581ea35bb6d1f24a4c90af37e5d9b49dcff267eeed14c3893c5b86ab", size = 2321295, upload-time = "2025-08-10T21:26:20.11Z" },
+ { url = "https://files.pythonhosted.org/packages/80/19/d3ec0d9ab711242f56ae0dc2fc5d70e298bb4a1f9dfab44c027668c673a1/kiwisolver-1.4.9-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:858e4c22fb075920b96a291928cb7dea5644e94c0ee4fcd5af7e865655e4ccf2", size = 2487987, upload-time = "2025-08-10T21:26:21.49Z" },
+ { url = "https://files.pythonhosted.org/packages/39/e9/61e4813b2c97e86b6fdbd4dd824bf72d28bcd8d4849b8084a357bc0dd64d/kiwisolver-1.4.9-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ed0fecd28cc62c54b262e3736f8bb2512d8dcfdc2bcf08be5f47f96bf405b145", size = 2291817, upload-time = "2025-08-10T21:26:22.812Z" },
+ { url = "https://files.pythonhosted.org/packages/a0/41/85d82b0291db7504da3c2defe35c9a8a5c9803a730f297bd823d11d5fb77/kiwisolver-1.4.9-cp312-cp312-win_amd64.whl", hash = "sha256:f68208a520c3d86ea51acf688a3e3002615a7f0238002cccc17affecc86a8a54", size = 73895, upload-time = "2025-08-10T21:26:24.37Z" },
+ { url = "https://files.pythonhosted.org/packages/e2/92/5f3068cf15ee5cb624a0c7596e67e2a0bb2adee33f71c379054a491d07da/kiwisolver-1.4.9-cp312-cp312-win_arm64.whl", hash = "sha256:2c1a4f57df73965f3f14df20b80ee29e6a7930a57d2d9e8491a25f676e197c60", size = 64992, upload-time = "2025-08-10T21:26:25.732Z" },
+]
+
+[[package]]
+name = "lark"
+version = "1.2.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/af/60/bc7622aefb2aee1c0b4ba23c1446d3e30225c8770b38d7aedbfb65ca9d5a/lark-1.2.2.tar.gz", hash = "sha256:ca807d0162cd16cef15a8feecb862d7319e7a09bdb13aef927968e45040fed80", size = 252132, upload-time = "2024-08-13T19:49:00.652Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/2d/00/d90b10b962b4277f5e64a78b6609968859ff86889f5b898c1a778c06ec00/lark-1.2.2-py3-none-any.whl", hash = "sha256:c2276486b02f0f1b90be155f2c8ba4a8e194d42775786db622faccd652d8e80c", size = 111036, upload-time = "2024-08-13T19:48:58.603Z" },
+]
+
+[[package]]
+name = "llguidance"
+version = "1.3.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/95/48/3f7a9d3ff1b36bba92b5107a3a21286821227afe9ea464736133994d61fb/llguidance-1.3.0.tar.gz", hash = "sha256:861249afd51dc325646834462ea827e57a5c2b2042e108e6aae7059fdad9104d", size = 1070460, upload-time = "2025-10-20T19:58:44.164Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/3b/33/be5acb85cd8cdc4afde33d9c234eece9f318e087920255af3c05864cd3e7/llguidance-1.3.0-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:f7685222660a762e481ac633d49cc559c64980fe2ee59c8f932a5bb5cbc0c2c2", size = 3220647, upload-time = "2025-10-20T19:58:42.542Z" },
+ { url = "https://files.pythonhosted.org/packages/82/e6/b48bda5b15efeaeb62bd0dba8fc6a01d4ae5457a85dbb5d18632385fe15c/llguidance-1.3.0-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:098030ff0687261a3f1bd54cf21fe951fc861d56d37a0671250dd36677eaf224", size = 3099830, upload-time = "2025-10-20T19:58:40.826Z" },
+ { url = "https://files.pythonhosted.org/packages/aa/11/44389d3d1526d7a5c38ffd587a5ebc61d7bee443ac1dea95f2089ad58f5f/llguidance-1.3.0-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6f6caca5d78db7f76e1fbb0fff8607b861c32d47fa3d5dee2fc49de27ee269df", size = 2835242, upload-time = "2025-10-20T19:58:34.518Z" },
+ { url = "https://files.pythonhosted.org/packages/83/a8/1ff2bedb8f9acb46a2d2d603415d272bb622c142ea86f5b95445cc6e366c/llguidance-1.3.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc17e9dd602c3879bf91664a64bf72f54c74dbfbeb24ccfab6a5fe435b12f7aa", size = 3033133, upload-time = "2025-10-20T19:58:38.721Z" },
+ { url = "https://files.pythonhosted.org/packages/5a/7e/809349638231f469b9056c0e1bfd924d5ef5558b3b3ec72d093b6fad33b1/llguidance-1.3.0-cp39-abi3-win_amd64.whl", hash = "sha256:1d1cd1c8618d1a13605d3e057c978651e551c8c469b481ee4041f1d6c436002d", size = 2789946, upload-time = "2025-10-20T19:58:45.958Z" },
+]
+
+[[package]]
+name = "llvmlite"
+version = "0.44.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/89/6a/95a3d3610d5c75293d5dbbb2a76480d5d4eeba641557b69fe90af6c5b84e/llvmlite-0.44.0.tar.gz", hash = "sha256:07667d66a5d150abed9157ab6c0b9393c9356f229784a4385c02f99e94fc94d4", size = 171880, upload-time = "2025-01-20T11:14:41.342Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/15/86/e3c3195b92e6e492458f16d233e58a1a812aa2bfbef9bdd0fbafcec85c60/llvmlite-0.44.0-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:1d671a56acf725bf1b531d5ef76b86660a5ab8ef19bb6a46064a705c6ca80aad", size = 28132297, upload-time = "2025-01-20T11:13:32.57Z" },
+ { url = "https://files.pythonhosted.org/packages/d6/53/373b6b8be67b9221d12b24125fd0ec56b1078b660eeae266ec388a6ac9a0/llvmlite-0.44.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5f79a728e0435493611c9f405168682bb75ffd1fbe6fc360733b850c80a026db", size = 26201105, upload-time = "2025-01-20T11:13:38.744Z" },
+ { url = "https://files.pythonhosted.org/packages/cb/da/8341fd3056419441286c8e26bf436923021005ece0bff5f41906476ae514/llvmlite-0.44.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c0143a5ef336da14deaa8ec26c5449ad5b6a2b564df82fcef4be040b9cacfea9", size = 42361901, upload-time = "2025-01-20T11:13:46.711Z" },
+ { url = "https://files.pythonhosted.org/packages/53/ad/d79349dc07b8a395a99153d7ce8b01d6fcdc9f8231355a5df55ded649b61/llvmlite-0.44.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d752f89e31b66db6f8da06df8b39f9b91e78c5feea1bf9e8c1fba1d1c24c065d", size = 41184247, upload-time = "2025-01-20T11:13:56.159Z" },
+ { url = "https://files.pythonhosted.org/packages/e2/3b/a9a17366af80127bd09decbe2a54d8974b6d8b274b39bf47fbaedeec6307/llvmlite-0.44.0-cp312-cp312-win_amd64.whl", hash = "sha256:eae7e2d4ca8f88f89d315b48c6b741dcb925d6a1042da694aa16ab3dd4cbd3a1", size = 30332380, upload-time = "2025-01-20T11:14:02.442Z" },
+]
+
+[[package]]
+name = "lm-format-enforcer"
+version = "0.11.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "interegular" },
+ { name = "packaging" },
+ { name = "pydantic" },
+ { name = "pyyaml" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/84/d5/41cd417ba7dfdbbcfe46cebf81fb3dfd7c591b89897560ad05bb410a465d/lm_format_enforcer-0.11.3.tar.gz", hash = "sha256:e68081c108719cce284a9bcc889709b26ffb085a1945b5eba3a12cfa96d528da", size = 40258, upload-time = "2025-08-24T19:37:47.527Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/a0/ef/11292bb0b85cf4c93447cab5a29f64576ed14d3ab4280e35ddd23486594a/lm_format_enforcer-0.11.3-py3-none-any.whl", hash = "sha256:cf586350875def1ae7a8fba84fcbbfc8371424b6c9d05c1fcba70aa233fbf06f", size = 45418, upload-time = "2025-08-24T19:37:46.325Z" },
+]
+
+[[package]]
+name = "loguru"
+version = "0.7.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "colorama", marker = "sys_platform == 'win32'" },
+ { name = "win32-setctime", marker = "sys_platform == 'win32'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/3a/05/a1dae3dffd1116099471c643b8924f5aa6524411dc6c63fdae648c4f1aca/loguru-0.7.3.tar.gz", hash = "sha256:19480589e77d47b8d85b2c827ad95d49bf31b0dcde16593892eb51dd18706eb6", size = 63559, upload-time = "2024-12-06T11:20:56.608Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/0c/29/0348de65b8cc732daa3e33e67806420b2ae89bdce2b04af740289c5c6c8c/loguru-0.7.3-py3-none-any.whl", hash = "sha256:31a33c10c8e1e10422bfd431aeb5d351c7cf7fa671e3c4df004162264b28220c", size = 61595, upload-time = "2024-12-06T11:20:54.538Z" },
+]
+
+[[package]]
+name = "markdown-it-py"
+version = "4.0.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "mdurl" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/5b/f5/4ec618ed16cc4f8fb3b701563655a69816155e79e24a17b651541804721d/markdown_it_py-4.0.0.tar.gz", hash = "sha256:cb0a2b4aa34f932c007117b194e945bd74e0ec24133ceb5bac59009cda1cb9f3", size = 73070, upload-time = "2025-08-11T12:57:52.854Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/94/54/e7d793b573f298e1c9013b8c4dade17d481164aa517d1d7148619c2cedbf/markdown_it_py-4.0.0-py3-none-any.whl", hash = "sha256:87327c59b172c5011896038353a81343b6754500a08cd7a4973bb48c6d578147", size = 87321, upload-time = "2025-08-11T12:57:51.923Z" },
+]
+
+[[package]]
+name = "markupsafe"
+version = "3.0.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/7e/99/7690b6d4034fffd95959cbe0c02de8deb3098cc577c67bb6a24fe5d7caa7/markupsafe-3.0.3.tar.gz", hash = "sha256:722695808f4b6457b320fdc131280796bdceb04ab50fe1795cd540799ebe1698", size = 80313, upload-time = "2025-09-27T18:37:40.426Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/5a/72/147da192e38635ada20e0a2e1a51cf8823d2119ce8883f7053879c2199b5/markupsafe-3.0.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d53197da72cc091b024dd97249dfc7794d6a56530370992a5e1a08983ad9230e", size = 11615, upload-time = "2025-09-27T18:36:30.854Z" },
+ { url = "https://files.pythonhosted.org/packages/9a/81/7e4e08678a1f98521201c3079f77db69fb552acd56067661f8c2f534a718/markupsafe-3.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1872df69a4de6aead3491198eaf13810b565bdbeec3ae2dc8780f14458ec73ce", size = 12020, upload-time = "2025-09-27T18:36:31.971Z" },
+ { url = "https://files.pythonhosted.org/packages/1e/2c/799f4742efc39633a1b54a92eec4082e4f815314869865d876824c257c1e/markupsafe-3.0.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3a7e8ae81ae39e62a41ec302f972ba6ae23a5c5396c8e60113e9066ef893da0d", size = 24332, upload-time = "2025-09-27T18:36:32.813Z" },
+ { url = "https://files.pythonhosted.org/packages/3c/2e/8d0c2ab90a8c1d9a24f0399058ab8519a3279d1bd4289511d74e909f060e/markupsafe-3.0.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d6dd0be5b5b189d31db7cda48b91d7e0a9795f31430b7f271219ab30f1d3ac9d", size = 22947, upload-time = "2025-09-27T18:36:33.86Z" },
+ { url = "https://files.pythonhosted.org/packages/2c/54/887f3092a85238093a0b2154bd629c89444f395618842e8b0c41783898ea/markupsafe-3.0.3-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:94c6f0bb423f739146aec64595853541634bde58b2135f27f61c1ffd1cd4d16a", size = 21962, upload-time = "2025-09-27T18:36:35.099Z" },
+ { url = "https://files.pythonhosted.org/packages/c9/2f/336b8c7b6f4a4d95e91119dc8521402461b74a485558d8f238a68312f11c/markupsafe-3.0.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:be8813b57049a7dc738189df53d69395eba14fb99345e0a5994914a3864c8a4b", size = 23760, upload-time = "2025-09-27T18:36:36.001Z" },
+ { url = "https://files.pythonhosted.org/packages/32/43/67935f2b7e4982ffb50a4d169b724d74b62a3964bc1a9a527f5ac4f1ee2b/markupsafe-3.0.3-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:83891d0e9fb81a825d9a6d61e3f07550ca70a076484292a70fde82c4b807286f", size = 21529, upload-time = "2025-09-27T18:36:36.906Z" },
+ { url = "https://files.pythonhosted.org/packages/89/e0/4486f11e51bbba8b0c041098859e869e304d1c261e59244baa3d295d47b7/markupsafe-3.0.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:77f0643abe7495da77fb436f50f8dab76dbc6e5fd25d39589a0f1fe6548bfa2b", size = 23015, upload-time = "2025-09-27T18:36:37.868Z" },
+ { url = "https://files.pythonhosted.org/packages/2f/e1/78ee7a023dac597a5825441ebd17170785a9dab23de95d2c7508ade94e0e/markupsafe-3.0.3-cp312-cp312-win32.whl", hash = "sha256:d88b440e37a16e651bda4c7c2b930eb586fd15ca7406cb39e211fcff3bf3017d", size = 14540, upload-time = "2025-09-27T18:36:38.761Z" },
+ { url = "https://files.pythonhosted.org/packages/aa/5b/bec5aa9bbbb2c946ca2733ef9c4ca91c91b6a24580193e891b5f7dbe8e1e/markupsafe-3.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:26a5784ded40c9e318cfc2bdb30fe164bdb8665ded9cd64d500a34fb42067b1c", size = 15105, upload-time = "2025-09-27T18:36:39.701Z" },
+ { url = "https://files.pythonhosted.org/packages/e5/f1/216fc1bbfd74011693a4fd837e7026152e89c4bcf3e77b6692fba9923123/markupsafe-3.0.3-cp312-cp312-win_arm64.whl", hash = "sha256:35add3b638a5d900e807944a078b51922212fb3dedb01633a8defc4b01a3c85f", size = 13906, upload-time = "2025-09-27T18:36:40.689Z" },
+]
+
+[[package]]
+name = "matplotlib"
+version = "3.10.8"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "contourpy" },
+ { name = "cycler" },
+ { name = "fonttools" },
+ { name = "kiwisolver" },
+ { name = "numpy" },
+ { name = "packaging" },
+ { name = "pillow" },
+ { name = "pyparsing" },
+ { name = "python-dateutil" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/8a/76/d3c6e3a13fe484ebe7718d14e269c9569c4eb0020a968a327acb3b9a8fe6/matplotlib-3.10.8.tar.gz", hash = "sha256:2299372c19d56bcd35cf05a2738308758d32b9eaed2371898d8f5bd33f084aa3", size = 34806269, upload-time = "2025-12-10T22:56:51.155Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/9e/67/f997cdcbb514012eb0d10cd2b4b332667997fb5ebe26b8d41d04962fa0e6/matplotlib-3.10.8-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:64fcc24778ca0404ce0cb7b6b77ae1f4c7231cdd60e6778f999ee05cbd581b9a", size = 8260453, upload-time = "2025-12-10T22:55:30.709Z" },
+ { url = "https://files.pythonhosted.org/packages/7e/65/07d5f5c7f7c994f12c768708bd2e17a4f01a2b0f44a1c9eccad872433e2e/matplotlib-3.10.8-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b9a5ca4ac220a0cdd1ba6bcba3608547117d30468fefce49bb26f55c1a3d5c58", size = 8148321, upload-time = "2025-12-10T22:55:33.265Z" },
+ { url = "https://files.pythonhosted.org/packages/3e/f3/c5195b1ae57ef85339fd7285dfb603b22c8b4e79114bae5f4f0fcf688677/matplotlib-3.10.8-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:3ab4aabc72de4ff77b3ec33a6d78a68227bf1123465887f9905ba79184a1cc04", size = 8716944, upload-time = "2025-12-10T22:55:34.922Z" },
+ { url = "https://files.pythonhosted.org/packages/00/f9/7638f5cc82ec8a7aa005de48622eecc3ed7c9854b96ba15bd76b7fd27574/matplotlib-3.10.8-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:24d50994d8c5816ddc35411e50a86ab05f575e2530c02752e02538122613371f", size = 9550099, upload-time = "2025-12-10T22:55:36.789Z" },
+ { url = "https://files.pythonhosted.org/packages/57/61/78cd5920d35b29fd2a0fe894de8adf672ff52939d2e9b43cb83cd5ce1bc7/matplotlib-3.10.8-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:99eefd13c0dc3b3c1b4d561c1169e65fe47aab7b8158754d7c084088e2329466", size = 9613040, upload-time = "2025-12-10T22:55:38.715Z" },
+ { url = "https://files.pythonhosted.org/packages/30/4e/c10f171b6e2f44d9e3a2b96efa38b1677439d79c99357600a62cc1e9594e/matplotlib-3.10.8-cp312-cp312-win_amd64.whl", hash = "sha256:dd80ecb295460a5d9d260df63c43f4afbdd832d725a531f008dad1664f458adf", size = 8142717, upload-time = "2025-12-10T22:55:41.103Z" },
+ { url = "https://files.pythonhosted.org/packages/f1/76/934db220026b5fef85f45d51a738b91dea7d70207581063cd9bd8fafcf74/matplotlib-3.10.8-cp312-cp312-win_arm64.whl", hash = "sha256:3c624e43ed56313651bc18a47f838b60d7b8032ed348911c54906b130b20071b", size = 8012751, upload-time = "2025-12-10T22:55:42.684Z" },
+]
+
+[[package]]
+name = "mcp"
+version = "1.26.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "anyio" },
+ { name = "httpx" },
+ { name = "httpx-sse" },
+ { name = "jsonschema" },
+ { name = "pydantic" },
+ { name = "pydantic-settings" },
+ { name = "pyjwt", extra = ["crypto"] },
+ { name = "python-multipart" },
+ { name = "pywin32", marker = "sys_platform == 'win32'" },
+ { name = "sse-starlette" },
+ { name = "starlette" },
+ { name = "typing-extensions" },
+ { name = "typing-inspection" },
+ { name = "uvicorn", marker = "sys_platform != 'emscripten'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/fc/6d/62e76bbb8144d6ed86e202b5edd8a4cb631e7c8130f3f4893c3f90262b10/mcp-1.26.0.tar.gz", hash = "sha256:db6e2ef491eecc1a0d93711a76f28dec2e05999f93afd48795da1c1137142c66", size = 608005, upload-time = "2026-01-24T19:40:32.468Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/fd/d9/eaa1f80170d2b7c5ba23f3b59f766f3a0bb41155fbc32a69adfa1adaaef9/mcp-1.26.0-py3-none-any.whl", hash = "sha256:904a21c33c25aa98ddbeb47273033c435e595bbacfdb177f4bd87f6dceebe1ca", size = 233615, upload-time = "2026-01-24T19:40:30.652Z" },
+]
+
+[[package]]
+name = "mdurl"
+version = "0.1.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/d6/54/cfe61301667036ec958cb99bd3efefba235e65cdeb9c84d24a8293ba1d90/mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba", size = 8729, upload-time = "2022-08-14T12:40:10.846Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload-time = "2022-08-14T12:40:09.779Z" },
+]
+
+[[package]]
+name = "mistral-common"
+version = "1.9.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "jsonschema" },
+ { name = "numpy" },
+ { name = "pillow" },
+ { name = "pydantic" },
+ { name = "pydantic-extra-types", extra = ["pycountry"] },
+ { name = "requests" },
+ { name = "tiktoken" },
+ { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/db/ce/685b8127a326478e05501cb4c9ca23d1cd9f37e16c465a1e832c75aea709/mistral_common-1.9.1.tar.gz", hash = "sha256:550583d70a395c3586cfb748ffab53bd1d7c3409507f0efc0118bff30ffb26e9", size = 6338922, upload-time = "2026-02-12T10:53:41.639Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/ac/72/a38bb1fd9fd4d4ef990341c9dd1a7c8061f1951e10efa6d50c0a3f04eced/mistral_common-1.9.1-py3-none-any.whl", hash = "sha256:9e2b2520b6f67bac2e2bb06fcf985b7a1277b01938da2b7cda8cf0fdbfa92e91", size = 6518623, upload-time = "2026-02-12T10:53:39.457Z" },
+]
+
+[package.optional-dependencies]
+image = [
+ { name = "opencv-python-headless" },
+]
+
+[[package]]
+name = "mlx"
+version = "0.31.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "mlx-metal", marker = "sys_platform == 'darwin'" },
+]
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/1a/7d/87fb0daa006dbbbd8894c3d496c7d9dfc52e4ade260482276d3eca137a15/mlx-0.31.0-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:de6c0a3e8aa0e7d1365d46634fdbb3f835c164fbdb6ba8a239e039a4efa07fe2", size = 575834, upload-time = "2026-02-27T23:49:26.61Z" },
+ { url = "https://files.pythonhosted.org/packages/d4/e3/aa0fac5a9d52b1a4686c7097e56775c1a96dee3084f9c587b74e4c2cd284/mlx-0.31.0-cp312-cp312-macosx_15_0_arm64.whl", hash = "sha256:d6af01b15177da995336a6fd9878e7c5994720a9f1614d8f4d1dbe9293167c30", size = 575836, upload-time = "2026-02-27T23:49:28.505Z" },
+ { url = "https://files.pythonhosted.org/packages/8d/15/6aa3edaa34aeef370634756b7d131b8dc1cdb0002ddecdd3d876b5f9fa0c/mlx-0.31.0-cp312-cp312-macosx_26_0_arm64.whl", hash = "sha256:1ad14ddc3a15818f5bba0de35e88559ed8dcb93ccff2ef879ff604d02d663b25", size = 575828, upload-time = "2026-02-27T23:49:29.684Z" },
+]
+
+[[package]]
+name = "mlx-lm"
+version = "0.29.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "jinja2", marker = "sys_platform != 'emscripten' and sys_platform != 'win32'" },
+ { name = "mlx", marker = "sys_platform == 'darwin'" },
+ { name = "numpy", marker = "sys_platform != 'emscripten' and sys_platform != 'win32'" },
+ { name = "protobuf", marker = "sys_platform != 'emscripten' and sys_platform != 'win32'" },
+ { name = "pyyaml", marker = "sys_platform != 'emscripten' and sys_platform != 'win32'" },
+ { name = "sentencepiece", marker = "sys_platform != 'emscripten' and sys_platform != 'win32'" },
+ { name = "transformers", marker = "sys_platform != 'emscripten' and sys_platform != 'win32'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/e3/62/f46e1355256a114808517947f8e83ad6be310c7288c551db0fa678f47923/mlx_lm-0.29.1.tar.gz", hash = "sha256:b99180d8f33d33a077b814e550bfb2d8a59ae003d668fd1f4b3fff62a381d34b", size = 232302, upload-time = "2025-12-16T16:58:27.959Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/e5/53/913099c91d384e115ea078325efd9a0bc1ea3eb3458c694b4596cbd267f2/mlx_lm-0.29.1-py3-none-any.whl", hash = "sha256:440941b3054c2a2216e97615de584cc90fa1ea874782e20699b9895721fad8dc", size = 324884, upload-time = "2025-12-16T16:58:26.36Z" },
+]
+
+[[package]]
+name = "mlx-metal"
+version = "0.31.0"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/94/4f/0a0671dfa62b59bf429edab0e2c9c7f9bc77865aa4218cd46f2f41d7d11a/mlx_metal-0.31.0-py3-none-macosx_14_0_arm64.whl", hash = "sha256:1c572a6e3634a63060c103b0c38ac309e2d217be15519e3d8f0d6b452bb015f5", size = 38596752, upload-time = "2026-02-27T23:29:39.52Z" },
+ { url = "https://files.pythonhosted.org/packages/8d/42/c6d7bfd097b777f932d6cf8c79e41b565070b63cc452a069b8804e505140/mlx_metal-0.31.0-py3-none-macosx_15_0_arm64.whl", hash = "sha256:554dc7cb29e0ea5fb6941df42f11a1de385b095848e6183c7a99d7c1f1a11f5d", size = 38595434, upload-time = "2026-02-27T23:29:43.285Z" },
+ { url = "https://files.pythonhosted.org/packages/ed/8f/cdaffd759b4c71e74c294e773daacad8aafabac103b93e0aa56d4468d279/mlx_metal-0.31.0-py3-none-macosx_26_0_arm64.whl", hash = "sha256:7fd412f55ddf9f1d90c2cd86ce281d19e8eb93d093c6dbd784a49f8bd7d0a22c", size = 47879607, upload-time = "2026-02-27T23:29:46.571Z" },
+]
+
+[[package]]
+name = "model-hosting-container-standards"
+version = "0.1.13"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "fastapi" },
+ { name = "httpx" },
+ { name = "jmespath" },
+ { name = "pydantic" },
+ { name = "setuptools" },
+ { name = "starlette" },
+ { name = "supervisor" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/d7/b7/a6a31b4dfd30d14b1019dc358f09c9d88ca38e555ba7c976e7d3e6b593fe/model_hosting_container_standards-0.1.13.tar.gz", hash = "sha256:27a1333410dde2719286a300a2803e24fdde407baa91894eb845c0f268aa194d", size = 79116, upload-time = "2026-01-09T21:45:20.683Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/8c/37/6dc61971ba31450bbed460b5f40543f0915e352680534e3bcaf57116d8d7/model_hosting_container_standards-0.1.13-py3-none-any.whl", hash = "sha256:be307d4a988cc660df4e6bd8bdedb7917844bac940e332f9fd001cb385d7994c", size = 105738, upload-time = "2026-01-09T21:45:18.959Z" },
+]
+
+[[package]]
+name = "mpmath"
+version = "1.3.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/e0/47/dd32fa426cc72114383ac549964eecb20ecfd886d1e5ccf5340b55b02f57/mpmath-1.3.0.tar.gz", hash = "sha256:7a28eb2a9774d00c7bc92411c19a89209d5da7c4c9a9e227be8330a23a25b91f", size = 508106, upload-time = "2023-03-07T16:47:11.061Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/43/e3/7d92a15f894aa0c9c4b49b8ee9ac9850d6e63b03c9c32c0367a13ae62209/mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c", size = 536198, upload-time = "2023-03-07T16:47:09.197Z" },
+]
+
+[[package]]
+name = "msgpack"
+version = "1.1.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/4d/f2/bfb55a6236ed8725a96b0aa3acbd0ec17588e6a2c3b62a93eb513ed8783f/msgpack-1.1.2.tar.gz", hash = "sha256:3b60763c1373dd60f398488069bcdc703cd08a711477b5d480eecc9f9626f47e", size = 173581, upload-time = "2025-10-08T09:15:56.596Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/ad/bd/8b0d01c756203fbab65d265859749860682ccd2a59594609aeec3a144efa/msgpack-1.1.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:70a0dff9d1f8da25179ffcf880e10cf1aad55fdb63cd59c9a49a1b82290062aa", size = 81939, upload-time = "2025-10-08T09:15:01.472Z" },
+ { url = "https://files.pythonhosted.org/packages/34/68/ba4f155f793a74c1483d4bdef136e1023f7bcba557f0db4ef3db3c665cf1/msgpack-1.1.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:446abdd8b94b55c800ac34b102dffd2f6aa0ce643c55dfc017ad89347db3dbdb", size = 85064, upload-time = "2025-10-08T09:15:03.764Z" },
+ { url = "https://files.pythonhosted.org/packages/f2/60/a064b0345fc36c4c3d2c743c82d9100c40388d77f0b48b2f04d6041dbec1/msgpack-1.1.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c63eea553c69ab05b6747901b97d620bb2a690633c77f23feb0c6a947a8a7b8f", size = 417131, upload-time = "2025-10-08T09:15:05.136Z" },
+ { url = "https://files.pythonhosted.org/packages/65/92/a5100f7185a800a5d29f8d14041f61475b9de465ffcc0f3b9fba606e4505/msgpack-1.1.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:372839311ccf6bdaf39b00b61288e0557916c3729529b301c52c2d88842add42", size = 427556, upload-time = "2025-10-08T09:15:06.837Z" },
+ { url = "https://files.pythonhosted.org/packages/f5/87/ffe21d1bf7d9991354ad93949286f643b2bb6ddbeab66373922b44c3b8cc/msgpack-1.1.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:2929af52106ca73fcb28576218476ffbb531a036c2adbcf54a3664de124303e9", size = 404920, upload-time = "2025-10-08T09:15:08.179Z" },
+ { url = "https://files.pythonhosted.org/packages/ff/41/8543ed2b8604f7c0d89ce066f42007faac1eaa7d79a81555f206a5cdb889/msgpack-1.1.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:be52a8fc79e45b0364210eef5234a7cf8d330836d0a64dfbb878efa903d84620", size = 415013, upload-time = "2025-10-08T09:15:09.83Z" },
+ { url = "https://files.pythonhosted.org/packages/41/0d/2ddfaa8b7e1cee6c490d46cb0a39742b19e2481600a7a0e96537e9c22f43/msgpack-1.1.2-cp312-cp312-win32.whl", hash = "sha256:1fff3d825d7859ac888b0fbda39a42d59193543920eda9d9bea44d958a878029", size = 65096, upload-time = "2025-10-08T09:15:11.11Z" },
+ { url = "https://files.pythonhosted.org/packages/8c/ec/d431eb7941fb55a31dd6ca3404d41fbb52d99172df2e7707754488390910/msgpack-1.1.2-cp312-cp312-win_amd64.whl", hash = "sha256:1de460f0403172cff81169a30b9a92b260cb809c4cb7e2fc79ae8d0510c78b6b", size = 72708, upload-time = "2025-10-08T09:15:12.554Z" },
+ { url = "https://files.pythonhosted.org/packages/c5/31/5b1a1f70eb0e87d1678e9624908f86317787b536060641d6798e3cf70ace/msgpack-1.1.2-cp312-cp312-win_arm64.whl", hash = "sha256:be5980f3ee0e6bd44f3a9e9dea01054f175b50c3e6cdb692bc9424c0bbb8bf69", size = 64119, upload-time = "2025-10-08T09:15:13.589Z" },
+]
+
+[[package]]
+name = "msgspec"
+version = "0.20.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/ea/9c/bfbd12955a49180cbd234c5d29ec6f74fe641698f0cd9df154a854fc8a15/msgspec-0.20.0.tar.gz", hash = "sha256:692349e588fde322875f8d3025ac01689fead5901e7fb18d6870a44519d62a29", size = 317862, upload-time = "2025-11-24T03:56:28.934Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/d9/6f/1e25eee957e58e3afb2a44b94fa95e06cebc4c236193ed0de3012fff1e19/msgspec-0.20.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:2aba22e2e302e9231e85edc24f27ba1f524d43c223ef5765bd8624c7df9ec0a5", size = 196391, upload-time = "2025-11-24T03:55:32.677Z" },
+ { url = "https://files.pythonhosted.org/packages/7f/ee/af51d090ada641d4b264992a486435ba3ef5b5634bc27e6eb002f71cef7d/msgspec-0.20.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:716284f898ab2547fedd72a93bb940375de9fbfe77538f05779632dc34afdfde", size = 188644, upload-time = "2025-11-24T03:55:33.934Z" },
+ { url = "https://files.pythonhosted.org/packages/49/d6/9709ee093b7742362c2934bfb1bbe791a1e09bed3ea5d8a18ce552fbfd73/msgspec-0.20.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:558ed73315efa51b1538fa8f1d3b22c8c5ff6d9a2a62eff87d25829b94fc5054", size = 218852, upload-time = "2025-11-24T03:55:35.575Z" },
+ { url = "https://files.pythonhosted.org/packages/5c/a2/488517a43ccf5a4b6b6eca6dd4ede0bd82b043d1539dd6bb908a19f8efd3/msgspec-0.20.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:509ac1362a1d53aa66798c9b9fd76872d7faa30fcf89b2fba3bcbfd559d56eb0", size = 224937, upload-time = "2025-11-24T03:55:36.859Z" },
+ { url = "https://files.pythonhosted.org/packages/d5/e8/49b832808aa23b85d4f090d1d2e48a4e3834871415031ed7c5fe48723156/msgspec-0.20.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:1353c2c93423602e7dea1aa4c92f3391fdfc25ff40e0bacf81d34dbc68adb870", size = 222858, upload-time = "2025-11-24T03:55:38.187Z" },
+ { url = "https://files.pythonhosted.org/packages/9f/56/1dc2fa53685dca9c3f243a6cbecd34e856858354e455b77f47ebd76cf5bf/msgspec-0.20.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:cb33b5eb5adb3c33d749684471c6a165468395d7aa02d8867c15103b81e1da3e", size = 227248, upload-time = "2025-11-24T03:55:39.496Z" },
+ { url = "https://files.pythonhosted.org/packages/5a/51/aba940212c23b32eedce752896205912c2668472ed5b205fc33da28a6509/msgspec-0.20.0-cp312-cp312-win_amd64.whl", hash = "sha256:fb1d934e435dd3a2b8cf4bbf47a8757100b4a1cfdc2afdf227541199885cdacb", size = 190024, upload-time = "2025-11-24T03:55:40.829Z" },
+ { url = "https://files.pythonhosted.org/packages/41/ad/3b9f259d94f183daa9764fef33fdc7010f7ecffc29af977044fa47440a83/msgspec-0.20.0-cp312-cp312-win_arm64.whl", hash = "sha256:00648b1e19cf01b2be45444ba9dc961bd4c056ffb15706651e64e5d6ec6197b7", size = 175390, upload-time = "2025-11-24T03:55:42.05Z" },
+]
+
+[[package]]
+name = "multidict"
+version = "6.7.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/1a/c2/c2d94cbe6ac1753f3fc980da97b3d930efe1da3af3c9f5125354436c073d/multidict-6.7.1.tar.gz", hash = "sha256:ec6652a1bee61c53a3e5776b6049172c53b6aaba34f18c9ad04f82712bac623d", size = 102010, upload-time = "2026-01-26T02:46:45.979Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/8d/9c/f20e0e2cf80e4b2e4b1c365bf5fe104ee633c751a724246262db8f1a0b13/multidict-6.7.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:a90f75c956e32891a4eda3639ce6dd86e87105271f43d43442a3aedf3cddf172", size = 76893, upload-time = "2026-01-26T02:43:52.754Z" },
+ { url = "https://files.pythonhosted.org/packages/fe/cf/18ef143a81610136d3da8193da9d80bfe1cb548a1e2d1c775f26b23d024a/multidict-6.7.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:3fccb473e87eaa1382689053e4a4618e7ba7b9b9b8d6adf2027ee474597128cd", size = 45456, upload-time = "2026-01-26T02:43:53.893Z" },
+ { url = "https://files.pythonhosted.org/packages/a9/65/1caac9d4cd32e8433908683446eebc953e82d22b03d10d41a5f0fefe991b/multidict-6.7.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b0fa96985700739c4c7853a43c0b3e169360d6855780021bfc6d0f1ce7c123e7", size = 43872, upload-time = "2026-01-26T02:43:55.041Z" },
+ { url = "https://files.pythonhosted.org/packages/cf/3b/d6bd75dc4f3ff7c73766e04e705b00ed6dbbaccf670d9e05a12b006f5a21/multidict-6.7.1-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:cb2a55f408c3043e42b40cc8eecd575afa27b7e0b956dfb190de0f8499a57a53", size = 251018, upload-time = "2026-01-26T02:43:56.198Z" },
+ { url = "https://files.pythonhosted.org/packages/fd/80/c959c5933adedb9ac15152e4067c702a808ea183a8b64cf8f31af8ad3155/multidict-6.7.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:eb0ce7b2a32d09892b3dd6cc44877a0d02a33241fafca5f25c8b6b62374f8b75", size = 258883, upload-time = "2026-01-26T02:43:57.499Z" },
+ { url = "https://files.pythonhosted.org/packages/86/85/7ed40adafea3d4f1c8b916e3b5cc3a8e07dfcdcb9cd72800f4ed3ca1b387/multidict-6.7.1-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:c3a32d23520ee37bf327d1e1a656fec76a2edd5c038bf43eddfa0572ec49c60b", size = 242413, upload-time = "2026-01-26T02:43:58.755Z" },
+ { url = "https://files.pythonhosted.org/packages/d2/57/b8565ff533e48595503c785f8361ff9a4fde4d67de25c207cd0ba3befd03/multidict-6.7.1-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:9c90fed18bffc0189ba814749fdcc102b536e83a9f738a9003e569acd540a733", size = 268404, upload-time = "2026-01-26T02:44:00.216Z" },
+ { url = "https://files.pythonhosted.org/packages/e0/50/9810c5c29350f7258180dfdcb2e52783a0632862eb334c4896ac717cebcb/multidict-6.7.1-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:da62917e6076f512daccfbbde27f46fed1c98fee202f0559adec8ee0de67f71a", size = 269456, upload-time = "2026-01-26T02:44:02.202Z" },
+ { url = "https://files.pythonhosted.org/packages/f3/8d/5e5be3ced1d12966fefb5c4ea3b2a5b480afcea36406559442c6e31d4a48/multidict-6.7.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bfde23ef6ed9db7eaee6c37dcec08524cb43903c60b285b172b6c094711b3961", size = 256322, upload-time = "2026-01-26T02:44:03.56Z" },
+ { url = "https://files.pythonhosted.org/packages/31/6e/d8a26d81ac166a5592782d208dd90dfdc0a7a218adaa52b45a672b46c122/multidict-6.7.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3758692429e4e32f1ba0df23219cd0b4fc0a52f476726fff9337d1a57676a582", size = 253955, upload-time = "2026-01-26T02:44:04.845Z" },
+ { url = "https://files.pythonhosted.org/packages/59/4c/7c672c8aad41534ba619bcd4ade7a0dc87ed6b8b5c06149b85d3dd03f0cd/multidict-6.7.1-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:398c1478926eca669f2fd6a5856b6de9c0acf23a2cb59a14c0ba5844fa38077e", size = 251254, upload-time = "2026-01-26T02:44:06.133Z" },
+ { url = "https://files.pythonhosted.org/packages/7b/bd/84c24de512cbafbdbc39439f74e967f19570ce7924e3007174a29c348916/multidict-6.7.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:c102791b1c4f3ab36ce4101154549105a53dc828f016356b3e3bcae2e3a039d3", size = 252059, upload-time = "2026-01-26T02:44:07.518Z" },
+ { url = "https://files.pythonhosted.org/packages/fa/ba/f5449385510825b73d01c2d4087bf6d2fccc20a2d42ac34df93191d3dd03/multidict-6.7.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:a088b62bd733e2ad12c50dad01b7d0166c30287c166e137433d3b410add807a6", size = 263588, upload-time = "2026-01-26T02:44:09.382Z" },
+ { url = "https://files.pythonhosted.org/packages/d7/11/afc7c677f68f75c84a69fe37184f0f82fce13ce4b92f49f3db280b7e92b3/multidict-6.7.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:3d51ff4785d58d3f6c91bdbffcb5e1f7ddfda557727043aa20d20ec4f65e324a", size = 259642, upload-time = "2026-01-26T02:44:10.73Z" },
+ { url = "https://files.pythonhosted.org/packages/2b/17/ebb9644da78c4ab36403739e0e6e0e30ebb135b9caf3440825001a0bddcb/multidict-6.7.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:fc5907494fccf3e7d3f94f95c91d6336b092b5fc83811720fae5e2765890dfba", size = 251377, upload-time = "2026-01-26T02:44:12.042Z" },
+ { url = "https://files.pythonhosted.org/packages/ca/a4/840f5b97339e27846c46307f2530a2805d9d537d8b8bd416af031cad7fa0/multidict-6.7.1-cp312-cp312-win32.whl", hash = "sha256:28ca5ce2fd9716631133d0e9a9b9a745ad7f60bac2bccafb56aa380fc0b6c511", size = 41887, upload-time = "2026-01-26T02:44:14.245Z" },
+ { url = "https://files.pythonhosted.org/packages/80/31/0b2517913687895f5904325c2069d6a3b78f66cc641a86a2baf75a05dcbb/multidict-6.7.1-cp312-cp312-win_amd64.whl", hash = "sha256:fcee94dfbd638784645b066074b338bc9cc155d4b4bffa4adce1615c5a426c19", size = 46053, upload-time = "2026-01-26T02:44:15.371Z" },
+ { url = "https://files.pythonhosted.org/packages/0c/5b/aba28e4ee4006ae4c7df8d327d31025d760ffa992ea23812a601d226e682/multidict-6.7.1-cp312-cp312-win_arm64.whl", hash = "sha256:ba0a9fb644d0c1a2194cf7ffb043bd852cea63a57f66fbd33959f7dae18517bf", size = 43307, upload-time = "2026-01-26T02:44:16.852Z" },
+ { url = "https://files.pythonhosted.org/packages/81/08/7036c080d7117f28a4af526d794aab6a84463126db031b007717c1a6676e/multidict-6.7.1-py3-none-any.whl", hash = "sha256:55d97cc6dae627efa6a6e548885712d4864b81110ac76fa4e534c03819fa4a56", size = 12319, upload-time = "2026-01-26T02:46:44.004Z" },
+]
+
+[[package]]
+name = "multiprocess"
+version = "0.70.18"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "dill" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/72/fd/2ae3826f5be24c6ed87266bc4e59c46ea5b059a103f3d7e7eb76a52aeecb/multiprocess-0.70.18.tar.gz", hash = "sha256:f9597128e6b3e67b23956da07cf3d2e5cba79e2f4e0fba8d7903636663ec6d0d", size = 1798503, upload-time = "2025-04-17T03:11:27.742Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/ba/d8/0cba6cf51a1a31f20471fbc823a716170c73012ddc4fb85d706630ed6e8f/multiprocess-0.70.18-py310-none-any.whl", hash = "sha256:60c194974c31784019c1f459d984e8f33ee48f10fcf42c309ba97b30d9bd53ea", size = 134948, upload-time = "2025-04-17T03:11:20.223Z" },
+ { url = "https://files.pythonhosted.org/packages/4b/88/9039f2fed1012ef584751d4ceff9ab4a51e5ae264898f0b7cbf44340a859/multiprocess-0.70.18-py311-none-any.whl", hash = "sha256:5aa6eef98e691281b3ad923be2832bf1c55dd2c859acd73e5ec53a66aae06a1d", size = 144462, upload-time = "2025-04-17T03:11:21.657Z" },
+ { url = "https://files.pythonhosted.org/packages/bf/b6/5f922792be93b82ec6b5f270bbb1ef031fd0622847070bbcf9da816502cc/multiprocess-0.70.18-py312-none-any.whl", hash = "sha256:9b78f8e5024b573730bfb654783a13800c2c0f2dfc0c25e70b40d184d64adaa2", size = 150287, upload-time = "2025-04-17T03:11:22.69Z" },
+ { url = "https://files.pythonhosted.org/packages/3b/c3/ca84c19bd14cdfc21c388fdcebf08b86a7a470ebc9f5c3c084fc2dbc50f7/multiprocess-0.70.18-py38-none-any.whl", hash = "sha256:dbf705e52a154fe5e90fb17b38f02556169557c2dd8bb084f2e06c2784d8279b", size = 132636, upload-time = "2025-04-17T03:11:24.936Z" },
+ { url = "https://files.pythonhosted.org/packages/6c/28/dd72947e59a6a8c856448a5e74da6201cb5502ddff644fbc790e4bd40b9a/multiprocess-0.70.18-py39-none-any.whl", hash = "sha256:e78ca805a72b1b810c690b6b4cc32579eba34f403094bbbae962b7b5bf9dfcb8", size = 133478, upload-time = "2025-04-17T03:11:26.253Z" },
+]
+
+[[package]]
+name = "networkx"
+version = "3.6.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/6a/51/63fe664f3908c97be9d2e4f1158eb633317598cfa6e1fc14af5383f17512/networkx-3.6.1.tar.gz", hash = "sha256:26b7c357accc0c8cde558ad486283728b65b6a95d85ee1cd66bafab4c8168509", size = 2517025, upload-time = "2025-12-08T17:02:39.908Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/9e/c9/b2622292ea83fbb4ec318f5b9ab867d0a28ab43c5717bb85b0a5f6b3b0a4/networkx-3.6.1-py3-none-any.whl", hash = "sha256:d47fbf302e7d9cbbb9e2555a0d267983d2aa476bac30e90dfbe5669bd57f3762", size = 2068504, upload-time = "2025-12-08T17:02:38.159Z" },
+]
+
+[[package]]
+name = "ninja"
+version = "1.13.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/43/73/79a0b22fc731989c708068427579e840a6cf4e937fe7ae5c5d0b7356ac22/ninja-1.13.0.tar.gz", hash = "sha256:4a40ce995ded54d9dc24f8ea37ff3bf62ad192b547f6c7126e7e25045e76f978", size = 242558, upload-time = "2025-08-11T15:10:19.421Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/3c/74/d02409ed2aa865e051b7edda22ad416a39d81a84980f544f8de717cab133/ninja-1.13.0-py3-none-macosx_10_9_universal2.whl", hash = "sha256:fa2a8bfc62e31b08f83127d1613d10821775a0eb334197154c4d6067b7068ff1", size = 310125, upload-time = "2025-08-11T15:09:50.971Z" },
+ { url = "https://files.pythonhosted.org/packages/8e/de/6e1cd6b84b412ac1ef327b76f0641aeb5dcc01e9d3f9eee0286d0c34fd93/ninja-1.13.0-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:3d00c692fb717fd511abeb44b8c5d00340c36938c12d6538ba989fe764e79630", size = 177467, upload-time = "2025-08-11T15:09:52.767Z" },
+ { url = "https://files.pythonhosted.org/packages/c8/83/49320fb6e58ae3c079381e333575fdbcf1cca3506ee160a2dcce775046fa/ninja-1.13.0-py3-none-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:be7f478ff9f96a128b599a964fc60a6a87b9fa332ee1bd44fa243ac88d50291c", size = 187834, upload-time = "2025-08-11T15:09:54.115Z" },
+ { url = "https://files.pythonhosted.org/packages/56/c7/ba22748fb59f7f896b609cd3e568d28a0a367a6d953c24c461fe04fc4433/ninja-1.13.0-py3-none-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:60056592cf495e9a6a4bea3cd178903056ecb0943e4de45a2ea825edb6dc8d3e", size = 202736, upload-time = "2025-08-11T15:09:55.745Z" },
+ { url = "https://files.pythonhosted.org/packages/79/22/d1de07632b78ac8e6b785f41fa9aad7a978ec8c0a1bf15772def36d77aac/ninja-1.13.0-py3-none-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:1c97223cdda0417f414bf864cfb73b72d8777e57ebb279c5f6de368de0062988", size = 179034, upload-time = "2025-08-11T15:09:57.394Z" },
+ { url = "https://files.pythonhosted.org/packages/ed/de/0e6edf44d6a04dabd0318a519125ed0415ce437ad5a1ec9b9be03d9048cf/ninja-1.13.0-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:fb46acf6b93b8dd0322adc3a4945452a4e774b75b91293bafcc7b7f8e6517dfa", size = 180716, upload-time = "2025-08-11T15:09:58.696Z" },
+ { url = "https://files.pythonhosted.org/packages/54/28/938b562f9057aaa4d6bfbeaa05e81899a47aebb3ba6751e36c027a7f5ff7/ninja-1.13.0-py3-none-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:4be9c1b082d244b1ad7ef41eb8ab088aae8c109a9f3f0b3e56a252d3e00f42c1", size = 146843, upload-time = "2025-08-11T15:10:00.046Z" },
+ { url = "https://files.pythonhosted.org/packages/2a/fb/d06a3838de4f8ab866e44ee52a797b5491df823901c54943b2adb0389fbb/ninja-1.13.0-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:6739d3352073341ad284246f81339a384eec091d9851a886dfa5b00a6d48b3e2", size = 154402, upload-time = "2025-08-11T15:10:01.657Z" },
+ { url = "https://files.pythonhosted.org/packages/31/bf/0d7808af695ceddc763cf251b84a9892cd7f51622dc8b4c89d5012779f06/ninja-1.13.0-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:11be2d22027bde06f14c343f01d31446747dbb51e72d00decca2eb99be911e2f", size = 552388, upload-time = "2025-08-11T15:10:03.349Z" },
+ { url = "https://files.pythonhosted.org/packages/9d/70/c99d0c2c809f992752453cce312848abb3b1607e56d4cd1b6cded317351a/ninja-1.13.0-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:aa45b4037b313c2f698bc13306239b8b93b4680eb47e287773156ac9e9304714", size = 472501, upload-time = "2025-08-11T15:10:04.735Z" },
+ { url = "https://files.pythonhosted.org/packages/9f/43/c217b1153f0e499652f5e0766da8523ce3480f0a951039c7af115e224d55/ninja-1.13.0-py3-none-musllinux_1_2_i686.whl", hash = "sha256:5f8e1e8a1a30835eeb51db05cf5a67151ad37542f5a4af2a438e9490915e5b72", size = 638280, upload-time = "2025-08-11T15:10:06.512Z" },
+ { url = "https://files.pythonhosted.org/packages/8c/45/9151bba2c8d0ae2b6260f71696330590de5850e5574b7b5694dce6023e20/ninja-1.13.0-py3-none-musllinux_1_2_ppc64le.whl", hash = "sha256:3d7d7779d12cb20c6d054c61b702139fd23a7a964ec8f2c823f1ab1b084150db", size = 642420, upload-time = "2025-08-11T15:10:08.35Z" },
+ { url = "https://files.pythonhosted.org/packages/3c/fb/95752eb635bb8ad27d101d71bef15bc63049de23f299e312878fc21cb2da/ninja-1.13.0-py3-none-musllinux_1_2_riscv64.whl", hash = "sha256:d741a5e6754e0bda767e3274a0f0deeef4807f1fec6c0d7921a0244018926ae5", size = 585106, upload-time = "2025-08-11T15:10:09.818Z" },
+ { url = "https://files.pythonhosted.org/packages/c1/31/aa56a1a286703800c0cbe39fb4e82811c277772dc8cd084f442dd8e2938a/ninja-1.13.0-py3-none-musllinux_1_2_s390x.whl", hash = "sha256:e8bad11f8a00b64137e9b315b137d8bb6cbf3086fbdc43bf1f90fd33324d2e96", size = 707138, upload-time = "2025-08-11T15:10:11.366Z" },
+ { url = "https://files.pythonhosted.org/packages/34/6f/5f5a54a1041af945130abdb2b8529cbef0cdcbbf9bcf3f4195378319d29a/ninja-1.13.0-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:b4f2a072db3c0f944c32793e91532d8948d20d9ab83da9c0c7c15b5768072200", size = 581758, upload-time = "2025-08-11T15:10:13.295Z" },
+ { url = "https://files.pythonhosted.org/packages/95/97/51359c77527d45943fe7a94d00a3843b81162e6c4244b3579fe8fc54cb9c/ninja-1.13.0-py3-none-win32.whl", hash = "sha256:8cfbb80b4a53456ae8a39f90ae3d7a2129f45ea164f43fadfa15dc38c4aef1c9", size = 267201, upload-time = "2025-08-11T15:10:15.158Z" },
+ { url = "https://files.pythonhosted.org/packages/29/45/c0adfbfb0b5895aa18cec400c535b4f7ff3e52536e0403602fc1a23f7de9/ninja-1.13.0-py3-none-win_amd64.whl", hash = "sha256:fb8ee8719f8af47fed145cced4a85f0755dd55d45b2bddaf7431fa89803c5f3e", size = 309975, upload-time = "2025-08-11T15:10:16.697Z" },
+ { url = "https://files.pythonhosted.org/packages/df/93/a7b983643d1253bb223234b5b226e69de6cda02b76cdca7770f684b795f5/ninja-1.13.0-py3-none-win_arm64.whl", hash = "sha256:3c0b40b1f0bba764644385319028650087b4c1b18cdfa6f45cb39a3669b81aa9", size = 290806, upload-time = "2025-08-11T15:10:18.018Z" },
+]
+
+[[package]]
+name = "numba"
+version = "0.61.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "llvmlite" },
+ { name = "numpy" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/1c/a0/e21f57604304aa03ebb8e098429222722ad99176a4f979d34af1d1ee80da/numba-0.61.2.tar.gz", hash = "sha256:8750ee147940a6637b80ecf7f95062185ad8726c8c28a2295b8ec1160a196f7d", size = 2820615, upload-time = "2025-04-09T02:58:07.659Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/b4/a0/c6b7b9c615cfa3b98c4c63f4316e3f6b3bbe2387740277006551784218cd/numba-0.61.2-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:34fba9406078bac7ab052efbf0d13939426c753ad72946baaa5bf9ae0ebb8dd2", size = 2776626, upload-time = "2025-04-09T02:57:51.857Z" },
+ { url = "https://files.pythonhosted.org/packages/92/4a/fe4e3c2ecad72d88f5f8cd04e7f7cff49e718398a2fac02d2947480a00ca/numba-0.61.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4ddce10009bc097b080fc96876d14c051cc0c7679e99de3e0af59014dab7dfe8", size = 2779287, upload-time = "2025-04-09T02:57:53.658Z" },
+ { url = "https://files.pythonhosted.org/packages/9a/2d/e518df036feab381c23a624dac47f8445ac55686ec7f11083655eb707da3/numba-0.61.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5b1bb509d01f23d70325d3a5a0e237cbc9544dd50e50588bc581ba860c213546", size = 3885928, upload-time = "2025-04-09T02:57:55.206Z" },
+ { url = "https://files.pythonhosted.org/packages/10/0f/23cced68ead67b75d77cfcca3df4991d1855c897ee0ff3fe25a56ed82108/numba-0.61.2-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:48a53a3de8f8793526cbe330f2a39fe9a6638efcbf11bd63f3d2f9757ae345cd", size = 3577115, upload-time = "2025-04-09T02:57:56.818Z" },
+ { url = "https://files.pythonhosted.org/packages/68/1d/ddb3e704c5a8fb90142bf9dc195c27db02a08a99f037395503bfbc1d14b3/numba-0.61.2-cp312-cp312-win_amd64.whl", hash = "sha256:97cf4f12c728cf77c9c1d7c23707e4d8fb4632b46275f8f3397de33e5877af18", size = 2831929, upload-time = "2025-04-09T02:57:58.45Z" },
+]
+
+[[package]]
+name = "numpy"
+version = "2.2.6"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/76/21/7d2a95e4bba9dc13d043ee156a356c0a8f0c6309dff6b21b4d71a073b8a8/numpy-2.2.6.tar.gz", hash = "sha256:e29554e2bef54a90aa5cc07da6ce955accb83f21ab5de01a62c8478897b264fd", size = 20276440, upload-time = "2025-05-17T22:38:04.611Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/82/5d/c00588b6cf18e1da539b45d3598d3557084990dcc4331960c15ee776ee41/numpy-2.2.6-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:41c5a21f4a04fa86436124d388f6ed60a9343a6f767fced1a8a71c3fbca038ff", size = 20875348, upload-time = "2025-05-17T21:34:39.648Z" },
+ { url = "https://files.pythonhosted.org/packages/66/ee/560deadcdde6c2f90200450d5938f63a34b37e27ebff162810f716f6a230/numpy-2.2.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:de749064336d37e340f640b05f24e9e3dd678c57318c7289d222a8a2f543e90c", size = 14119362, upload-time = "2025-05-17T21:35:01.241Z" },
+ { url = "https://files.pythonhosted.org/packages/3c/65/4baa99f1c53b30adf0acd9a5519078871ddde8d2339dc5a7fde80d9d87da/numpy-2.2.6-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:894b3a42502226a1cac872f840030665f33326fc3dac8e57c607905773cdcde3", size = 5084103, upload-time = "2025-05-17T21:35:10.622Z" },
+ { url = "https://files.pythonhosted.org/packages/cc/89/e5a34c071a0570cc40c9a54eb472d113eea6d002e9ae12bb3a8407fb912e/numpy-2.2.6-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:71594f7c51a18e728451bb50cc60a3ce4e6538822731b2933209a1f3614e9282", size = 6625382, upload-time = "2025-05-17T21:35:21.414Z" },
+ { url = "https://files.pythonhosted.org/packages/f8/35/8c80729f1ff76b3921d5c9487c7ac3de9b2a103b1cd05e905b3090513510/numpy-2.2.6-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f2618db89be1b4e05f7a1a847a9c1c0abd63e63a1607d892dd54668dd92faf87", size = 14018462, upload-time = "2025-05-17T21:35:42.174Z" },
+ { url = "https://files.pythonhosted.org/packages/8c/3d/1e1db36cfd41f895d266b103df00ca5b3cbe965184df824dec5c08c6b803/numpy-2.2.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd83c01228a688733f1ded5201c678f0c53ecc1006ffbc404db9f7a899ac6249", size = 16527618, upload-time = "2025-05-17T21:36:06.711Z" },
+ { url = "https://files.pythonhosted.org/packages/61/c6/03ed30992602c85aa3cd95b9070a514f8b3c33e31124694438d88809ae36/numpy-2.2.6-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:37c0ca431f82cd5fa716eca9506aefcabc247fb27ba69c5062a6d3ade8cf8f49", size = 15505511, upload-time = "2025-05-17T21:36:29.965Z" },
+ { url = "https://files.pythonhosted.org/packages/b7/25/5761d832a81df431e260719ec45de696414266613c9ee268394dd5ad8236/numpy-2.2.6-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:fe27749d33bb772c80dcd84ae7e8df2adc920ae8297400dabec45f0dedb3f6de", size = 18313783, upload-time = "2025-05-17T21:36:56.883Z" },
+ { url = "https://files.pythonhosted.org/packages/57/0a/72d5a3527c5ebffcd47bde9162c39fae1f90138c961e5296491ce778e682/numpy-2.2.6-cp312-cp312-win32.whl", hash = "sha256:4eeaae00d789f66c7a25ac5f34b71a7035bb474e679f410e5e1a94deb24cf2d4", size = 6246506, upload-time = "2025-05-17T21:37:07.368Z" },
+ { url = "https://files.pythonhosted.org/packages/36/fa/8c9210162ca1b88529ab76b41ba02d433fd54fecaf6feb70ef9f124683f1/numpy-2.2.6-cp312-cp312-win_amd64.whl", hash = "sha256:c1f9540be57940698ed329904db803cf7a402f3fc200bfe599334c9bd84a40b2", size = 12614190, upload-time = "2025-05-17T21:37:26.213Z" },
+]
+
+[[package]]
+name = "nvidia-cublas-cu12"
+version = "12.8.4.1"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/dc/61/e24b560ab2e2eaeb3c839129175fb330dfcfc29e5203196e5541a4c44682/nvidia_cublas_cu12-12.8.4.1-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:8ac4e771d5a348c551b2a426eda6193c19aa630236b418086020df5ba9667142", size = 594346921, upload-time = "2025-03-07T01:44:31.254Z" },
+]
+
+[[package]]
+name = "nvidia-cuda-cupti-cu12"
+version = "12.8.90"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/f8/02/2adcaa145158bf1a8295d83591d22e4103dbfd821bcaf6f3f53151ca4ffa/nvidia_cuda_cupti_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ea0cb07ebda26bb9b29ba82cda34849e73c166c18162d3913575b0c9db9a6182", size = 10248621, upload-time = "2025-03-07T01:40:21.213Z" },
+]
+
+[[package]]
+name = "nvidia-cuda-nvrtc-cu12"
+version = "12.8.93"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/05/6b/32f747947df2da6994e999492ab306a903659555dddc0fbdeb9d71f75e52/nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:a7756528852ef889772a84c6cd89d41dfa74667e24cca16bb31f8f061e3e9994", size = 88040029, upload-time = "2025-03-07T01:42:13.562Z" },
+]
+
+[[package]]
+name = "nvidia-cuda-runtime-cu12"
+version = "12.8.90"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/0d/9b/a997b638fcd068ad6e4d53b8551a7d30fe8b404d6f1804abf1df69838932/nvidia_cuda_runtime_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:adade8dcbd0edf427b7204d480d6066d33902cab2a4707dcfc48a2d0fd44ab90", size = 954765, upload-time = "2025-03-07T01:40:01.615Z" },
+]
+
+[[package]]
+name = "nvidia-cudnn-cu12"
+version = "9.10.2.21"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "nvidia-cublas-cu12", marker = "sys_platform != 'emscripten' and sys_platform != 'win32'" },
+]
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/ba/51/e123d997aa098c61d029f76663dedbfb9bc8dcf8c60cbd6adbe42f76d049/nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:949452be657fa16687d0930933f032835951ef0892b37d2d53824d1a84dc97a8", size = 706758467, upload-time = "2025-06-06T21:54:08.597Z" },
+]
+
+[[package]]
+name = "nvidia-cudnn-frontend"
+version = "1.18.0"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/e3/b4/604e230378680ee117849a4e1045baca092f93161a829291a84d5acce70c/nvidia_cudnn_frontend-1.18.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:310b417f2848a83d1437203fcaeea320a74fb7f28af20bf42bf5afc9c01f1c12", size = 2027408, upload-time = "2026-01-27T23:32:46.576Z" },
+ { url = "https://files.pythonhosted.org/packages/c6/52/08f98262e77b1cbcc834cc1a5db494d0661ea1dbdea58c2e2d51a57fdaca/nvidia_cudnn_frontend-1.18.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6c023539ca6de99234cf5102c3ec0d6af817f5396fc93028a22ba5b834a35b8a", size = 2159245, upload-time = "2026-01-27T23:07:32.664Z" },
+ { url = "https://files.pythonhosted.org/packages/aa/1f/751a5a8cfdc95fb4dc556192d37369ae488c30c473fe9a3ec720b23d07ea/nvidia_cudnn_frontend-1.18.0-cp312-cp312-win_amd64.whl", hash = "sha256:e13f7dd46cdb4762dde87f181f06d1c5e15e9478bbdd547bfa74d9b11f415aae", size = 1591041, upload-time = "2026-01-27T23:09:04.118Z" },
+]
+
+[[package]]
+name = "nvidia-cufft-cu12"
+version = "11.3.3.83"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "nvidia-nvjitlink-cu12", marker = "sys_platform != 'emscripten' and sys_platform != 'win32'" },
+]
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/1f/13/ee4e00f30e676b66ae65b4f08cb5bcbb8392c03f54f2d5413ea99a5d1c80/nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4d2dd21ec0b88cf61b62e6b43564355e5222e4a3fb394cac0db101f2dd0d4f74", size = 193118695, upload-time = "2025-03-07T01:45:27.821Z" },
+]
+
+[[package]]
+name = "nvidia-cufile-cu12"
+version = "1.13.1.3"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/bb/fe/1bcba1dfbfb8d01be8d93f07bfc502c93fa23afa6fd5ab3fc7c1df71038a/nvidia_cufile_cu12-1.13.1.3-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1d069003be650e131b21c932ec3d8969c1715379251f8d23a1860554b1cb24fc", size = 1197834, upload-time = "2025-03-07T01:45:50.723Z" },
+]
+
+[[package]]
+name = "nvidia-curand-cu12"
+version = "10.3.9.90"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/fb/aa/6584b56dc84ebe9cf93226a5cde4d99080c8e90ab40f0c27bda7a0f29aa1/nvidia_curand_cu12-10.3.9.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:b32331d4f4df5d6eefa0554c565b626c7216f87a06a4f56fab27c3b68a830ec9", size = 63619976, upload-time = "2025-03-07T01:46:23.323Z" },
+]
+
+[[package]]
+name = "nvidia-cusolver-cu12"
+version = "11.7.3.90"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "nvidia-cublas-cu12", marker = "sys_platform != 'emscripten' and sys_platform != 'win32'" },
+ { name = "nvidia-cusparse-cu12", marker = "sys_platform != 'emscripten' and sys_platform != 'win32'" },
+ { name = "nvidia-nvjitlink-cu12", marker = "sys_platform != 'emscripten' and sys_platform != 'win32'" },
+]
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/85/48/9a13d2975803e8cf2777d5ed57b87a0b6ca2cc795f9a4f59796a910bfb80/nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:4376c11ad263152bd50ea295c05370360776f8c3427b30991df774f9fb26c450", size = 267506905, upload-time = "2025-03-07T01:47:16.273Z" },
+]
+
+[[package]]
+name = "nvidia-cusparse-cu12"
+version = "12.5.8.93"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "nvidia-nvjitlink-cu12", marker = "sys_platform != 'emscripten' and sys_platform != 'win32'" },
+]
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/c2/f5/e1854cb2f2bcd4280c44736c93550cc300ff4b8c95ebe370d0aa7d2b473d/nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1ec05d76bbbd8b61b06a80e1eaf8cf4959c3d4ce8e711b65ebd0443bb0ebb13b", size = 288216466, upload-time = "2025-03-07T01:48:13.779Z" },
+]
+
+[[package]]
+name = "nvidia-cusparselt-cu12"
+version = "0.7.1"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/56/79/12978b96bd44274fe38b5dde5cfb660b1d114f70a65ef962bcbbed99b549/nvidia_cusparselt_cu12-0.7.1-py3-none-manylinux2014_x86_64.whl", hash = "sha256:f1bb701d6b930d5a7cea44c19ceb973311500847f81b634d802b7b539dc55623", size = 287193691, upload-time = "2025-02-26T00:15:44.104Z" },
+]
+
+[[package]]
+name = "nvidia-cutlass-dsl"
+version = "4.4.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "nvidia-cutlass-dsl-libs-base" },
+]
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/0d/a8/d9f2b82bf6f6b48502267fcf2fa7b229392affb68a6092da92b0edef7476/nvidia_cutlass_dsl-4.4.1-py3-none-any.whl", hash = "sha256:7b8ffa0117be35ef6c9a88f4462ee2a794efd0f7d9f65090e10a953e434fbfce", size = 10167, upload-time = "2026-02-27T09:37:34.551Z" },
+]
+
+[[package]]
+name = "nvidia-cutlass-dsl-libs-base"
+version = "4.4.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "cuda-python", version = "12.9.4", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform != 'emscripten' and sys_platform != 'win32'" },
+ { name = "cuda-python", version = "13.1.1", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'emscripten' or sys_platform == 'win32'" },
+ { name = "numpy" },
+ { name = "typing-extensions" },
+]
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/f5/cd/d09f6c998a9d52372d97d85d6561392d745ca00cf46de956d7cd7ec608cf/nvidia_cutlass_dsl_libs_base-4.4.1-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:74192716b18c1825382723891842f87fa2a045b4b100c5c0f474042731e21e86", size = 75458464, upload-time = "2026-02-27T09:45:01.155Z" },
+ { url = "https://files.pythonhosted.org/packages/b4/c1/acca814bc209562ef6cefbdec2ca36520f9a0380cdc7c6feaa69874bb50d/nvidia_cutlass_dsl_libs_base-4.4.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:ba5e3d7148f7882911bb3cb453c313c790d1c2096bdfdd2d96da2123cf562201", size = 74347149, upload-time = "2026-02-27T09:45:56.602Z" },
+]
+
+[[package]]
+name = "nvidia-ml-py"
+version = "13.590.48"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/af/a0/f4fc18cf72f06821a9a665085435b901449986855519d5b3843532db35e9/nvidia_ml_py-13.590.48.tar.gz", hash = "sha256:8184d1be52914ac7f0991cd1c0d946c65dc88a840c754cd12c274b77b88760dd", size = 49732, upload-time = "2026-01-22T01:14:56.456Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/fd/72/fb2af0d259a651affdce65fd6a495f0e07a685a0136baf585c5065204ee7/nvidia_ml_py-13.590.48-py3-none-any.whl", hash = "sha256:fd43d30ee9cd0b7940f5f9f9220b68d42722975e3992b6c21d14144c48760e43", size = 50680, upload-time = "2026-01-22T01:14:55.281Z" },
+]
+
+[[package]]
+name = "nvidia-nccl-cu12"
+version = "2.27.5"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/6e/89/f7a07dc961b60645dbbf42e80f2bc85ade7feb9a491b11a1e973aa00071f/nvidia_nccl_cu12-2.27.5-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ad730cf15cb5d25fe849c6e6ca9eb5b76db16a80f13f425ac68d8e2e55624457", size = 322348229, upload-time = "2025-06-26T04:11:28.385Z" },
+]
+
+[[package]]
+name = "nvidia-nvjitlink-cu12"
+version = "12.8.93"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/f6/74/86a07f1d0f42998ca31312f998bd3b9a7eff7f52378f4f270c8679c77fb9/nvidia_nvjitlink_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:81ff63371a7ebd6e6451970684f916be2eab07321b73c9d244dc2b4da7f73b88", size = 39254836, upload-time = "2025-03-07T01:49:55.661Z" },
+]
+
+[[package]]
+name = "nvidia-nvshmem-cu12"
+version = "3.4.5"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/b5/09/6ea3ea725f82e1e76684f0708bbedd871fc96da89945adeba65c3835a64c/nvidia_nvshmem_cu12-3.4.5-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:042f2500f24c021db8a06c5eec2539027d57460e1c1a762055a6554f72c369bd", size = 139103095, upload-time = "2025-09-06T00:32:31.266Z" },
+]
+
+[[package]]
+name = "nvidia-nvtx-cu12"
+version = "12.8.90"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/a2/eb/86626c1bbc2edb86323022371c39aa48df6fd8b0a1647bc274577f72e90b/nvidia_nvtx_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5b17e2001cc0d751a5bc2c6ec6d26ad95913324a4adb86788c944f8ce9ba441f", size = 89954, upload-time = "2025-03-07T01:42:44.131Z" },
+]
+
+[[package]]
+name = "openai"
+version = "2.24.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "anyio" },
+ { name = "distro" },
+ { name = "httpx" },
+ { name = "jiter" },
+ { name = "pydantic" },
+ { name = "sniffio" },
+ { name = "tqdm" },
+ { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/55/13/17e87641b89b74552ed408a92b231283786523edddc95f3545809fab673c/openai-2.24.0.tar.gz", hash = "sha256:1e5769f540dbd01cb33bc4716a23e67b9d695161a734aff9c5f925e2bf99a673", size = 658717, upload-time = "2026-02-24T20:02:07.958Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/c9/30/844dc675ee6902579b8eef01ed23917cc9319a1c9c0c14ec6e39340c96d0/openai-2.24.0-py3-none-any.whl", hash = "sha256:fed30480d7d6c884303287bde864980a4b137b60553ffbcf9ab4a233b7a73d94", size = 1120122, upload-time = "2026-02-24T20:02:05.669Z" },
+]
+
+[[package]]
+name = "openai-harmony"
+version = "0.0.8"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "pydantic" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/3e/92/2d038d096f29179c7c9571b431f9e739f87a487121901725e23fe338dd9d/openai_harmony-0.0.8.tar.gz", hash = "sha256:6e43f98e6c242fa2de6f8ea12eab24af63fa2ed3e89c06341fb9d92632c5cbdf", size = 284777, upload-time = "2025-11-05T19:07:06.727Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/45/c6/2502f416d46be3ec08bb66d696cccffb57781a499e3ff2e4d7c174af4e8f/openai_harmony-0.0.8-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:029ec25ca74abe48fdb58eb9fdd2a8c1618581fc33ce8e5653f8a1ffbfbd9326", size = 2627806, upload-time = "2025-11-05T19:06:57.063Z" },
+ { url = "https://files.pythonhosted.org/packages/d3/d2/ce6953ca87db9cae3e775024184da7d1c5cb88cead19a2d75b42f00a959c/openai_harmony-0.0.8-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e4f709815924ec325b9a890e6ab2bbb0ceec8e319a4e257328eb752cf36b2efc", size = 2948463, upload-time = "2025-11-05T19:06:48.17Z" },
+ { url = "https://files.pythonhosted.org/packages/fa/4c/b553c9651662d6ce102ca7f3629d268b23df1abe5841e24bed81e8a8e949/openai_harmony-0.0.8-cp38-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5cfcfd963b50a41fc656c84d3440ca6eecdccd6c552158ce790b8f2e33dfb5a9", size = 2704083, upload-time = "2025-11-05T19:06:50.205Z" },
+ { url = "https://files.pythonhosted.org/packages/9b/af/4eec8f9ab9c27bcdb444460c72cf43011d176fc44c79d6e113094ca1e152/openai_harmony-0.0.8-cp38-abi3-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0a3a16972aa1cee38ea958470cd04ac9a2d5ac38fdcf77ab686611246220c158", size = 2959765, upload-time = "2025-11-05T19:06:53.62Z" },
+ { url = "https://files.pythonhosted.org/packages/11/3c/33f3374e4624e0e776f6b13b73c45a7ead7f9c4529f8369ed5bfcaa30cac/openai_harmony-0.0.8-cp38-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b4d5cfa168e74d08f8ba6d58a7e49bc7daef4d58951ec69b66b0d56f4927a68d", size = 3427031, upload-time = "2025-11-05T19:06:51.829Z" },
+ { url = "https://files.pythonhosted.org/packages/25/3f/1a192b93bb47c6b44cd98ba8cc1d3d2a9308f1bb700c3017e6352da11bda/openai_harmony-0.0.8-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c007d277218a50db8839e599ed78e0fffe5130f614c3f6d93ae257f282071a29", size = 2953260, upload-time = "2025-11-05T19:06:55.406Z" },
+ { url = "https://files.pythonhosted.org/packages/5b/f8/93b582cad3531797c3db7c2db5400fd841538ccddfd9f5e3df61be99a630/openai_harmony-0.0.8-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:8565d4f5a0638da1bffde29832ed63c9e695c558611053add3b2dc0b56c92dbc", size = 3127044, upload-time = "2025-11-05T19:06:59.553Z" },
+ { url = "https://files.pythonhosted.org/packages/1d/10/4327dbf87f75ae813405fd9a9b4a5cde63d506ffed0a096a440a4cabd89c/openai_harmony-0.0.8-cp38-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:cbaa3bda75ef0d8836e1f8cc84af62f971b1d756d740efc95c38c3e04c0bfde2", size = 2932931, upload-time = "2025-11-05T19:07:01.437Z" },
+ { url = "https://files.pythonhosted.org/packages/8a/c8/1774eec4f6f360ef57618fb8f52e3d3af245b2491bd0297513aa09eec04b/openai_harmony-0.0.8-cp38-abi3-musllinux_1_2_i686.whl", hash = "sha256:772922a9bd24e133950fad71eb1550836f415a88e8c77870e12d0c3bd688ddc2", size = 2996140, upload-time = "2025-11-05T19:07:03.438Z" },
+ { url = "https://files.pythonhosted.org/packages/60/c3/3d1e01e2dba517a91760e4a03e4f20ffc75039a6fe584d0e6f9b5c78fd15/openai_harmony-0.0.8-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:007b0476a1f331f8130783f901f1da6f5a7057af1a4891f1b6a31dec364189b5", size = 3205080, upload-time = "2025-11-05T19:07:05.078Z" },
+ { url = "https://files.pythonhosted.org/packages/14/63/119de431572d7c70a7bf1037034a9be6ed0a7502a7498ba7302bca5b3242/openai_harmony-0.0.8-cp38-abi3-win32.whl", hash = "sha256:a9b5f893326b28d9e935ade14b4f655f5a840942473bc89b201c25f7a15af9cf", size = 2082457, upload-time = "2025-11-05T19:07:09.631Z" },
+ { url = "https://files.pythonhosted.org/packages/40/1f/c83cf5a206c263ee70448a5ae4264682555f4d0b5bed0d2cc6ca1108103d/openai_harmony-0.0.8-cp38-abi3-win_amd64.whl", hash = "sha256:39d44f0d8f466bd56698e7ead708bead3141e27b9b87e3ab7d5a6d0e4a869ee5", size = 2438369, upload-time = "2025-11-05T19:07:08.1Z" },
+]
+
+[[package]]
+name = "opencv-python-headless"
+version = "4.13.0.92"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "numpy" },
+]
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/79/42/2310883be3b8826ac58c3f2787b9358a2d46923d61f88fedf930bc59c60c/opencv_python_headless-4.13.0.92-cp37-abi3-macosx_13_0_arm64.whl", hash = "sha256:1a7d040ac656c11b8c38677cc8cccdc149f98535089dbe5b081e80a4e5903209", size = 46247192, upload-time = "2026-02-05T07:01:35.187Z" },
+ { url = "https://files.pythonhosted.org/packages/2d/1e/6f9e38005a6f7f22af785df42a43139d0e20f169eb5787ce8be37ee7fcc9/opencv_python_headless-4.13.0.92-cp37-abi3-macosx_14_0_x86_64.whl", hash = "sha256:3e0a6f0a37994ec6ce5f59e936be21d5d6384a4556f2d2da9c2f9c5dc948394c", size = 32568914, upload-time = "2026-02-05T07:01:51.989Z" },
+ { url = "https://files.pythonhosted.org/packages/21/76/9417a6aef9def70e467a5bf560579f816148a4c658b7d525581b356eda9e/opencv_python_headless-4.13.0.92-cp37-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5c8cfc8e87ed452b5cecb9419473ee5560a989859fe1d10d1ce11ae87b09a2cb", size = 33703709, upload-time = "2026-02-05T10:24:46.469Z" },
+ { url = "https://files.pythonhosted.org/packages/92/ce/bd17ff5772938267fd49716e94ca24f616ff4cb1ff4c6be13085108037be/opencv_python_headless-4.13.0.92-cp37-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0525a3d2c0b46c611e2130b5fdebc94cf404845d8fa64d2f3a3b679572a5bd22", size = 56016764, upload-time = "2026-02-05T10:26:48.904Z" },
+ { url = "https://files.pythonhosted.org/packages/8f/b4/b7bcbf7c874665825a8c8e1097e93ea25d1f1d210a3e20d4451d01da30aa/opencv_python_headless-4.13.0.92-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:eb60e36b237b1ebd40a912da5384b348df8ed534f6f644d8e0b4f103e272ba7d", size = 35010236, upload-time = "2026-02-05T10:28:11.031Z" },
+ { url = "https://files.pythonhosted.org/packages/4b/33/b5db29a6c00eb8f50708110d8d453747ca125c8b805bc437b289dbdcc057/opencv_python_headless-4.13.0.92-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:0bd48544f77c68b2941392fcdf9bcd2b9cdf00e98cb8c29b2455d194763cf99e", size = 60391106, upload-time = "2026-02-05T10:30:14.236Z" },
+ { url = "https://files.pythonhosted.org/packages/fb/c3/52cfea47cd33e53e8c0fbd6e7c800b457245c1fda7d61660b4ffe9596a7f/opencv_python_headless-4.13.0.92-cp37-abi3-win32.whl", hash = "sha256:a7cf08e5b191f4ebb530791acc0825a7986e0d0dee2a3c491184bd8599848a4b", size = 30812232, upload-time = "2026-02-05T07:02:29.594Z" },
+ { url = "https://files.pythonhosted.org/packages/4a/90/b338326131ccb2aaa3c2c85d00f41822c0050139a4bfe723cfd95455bd2d/opencv_python_headless-4.13.0.92-cp37-abi3-win_amd64.whl", hash = "sha256:77a82fe35ddcec0f62c15f2ba8a12ecc2ed4207c17b0902c7a3151ae29f37fb6", size = 40070414, upload-time = "2026-02-05T07:02:26.448Z" },
+]
+
+[[package]]
+name = "opentelemetry-api"
+version = "1.40.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "importlib-metadata" },
+ { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/2c/1d/4049a9e8698361cc1a1aa03a6c59e4fa4c71e0c0f94a30f988a6876a2ae6/opentelemetry_api-1.40.0.tar.gz", hash = "sha256:159be641c0b04d11e9ecd576906462773eb97ae1b657730f0ecf64d32071569f", size = 70851, upload-time = "2026-03-04T14:17:21.555Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/5f/bf/93795954016c522008da367da292adceed71cca6ee1717e1d64c83089099/opentelemetry_api-1.40.0-py3-none-any.whl", hash = "sha256:82dd69331ae74b06f6a874704be0cfaa49a1650e1537d4a813b86ecef7d0ecf9", size = 68676, upload-time = "2026-03-04T14:17:01.24Z" },
+]
+
+[[package]]
+name = "opentelemetry-exporter-otlp"
+version = "1.40.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "opentelemetry-exporter-otlp-proto-grpc" },
+ { name = "opentelemetry-exporter-otlp-proto-http" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/d0/37/b6708e0eff5c5fb9aba2e0ea09f7f3bcbfd12a592d2a780241b5f6014df7/opentelemetry_exporter_otlp-1.40.0.tar.gz", hash = "sha256:7caa0870b95e2fcb59d64e16e2b639ecffb07771b6cd0000b5d12e5e4fef765a", size = 6152, upload-time = "2026-03-04T14:17:23.235Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/2d/fc/aea77c28d9f3ffef2fdafdc3f4a235aee4091d262ddabd25882f47ce5c5f/opentelemetry_exporter_otlp-1.40.0-py3-none-any.whl", hash = "sha256:48c87e539ec9afb30dc443775a1334cc5487de2f72a770a4c00b1610bf6c697d", size = 7023, upload-time = "2026-03-04T14:17:03.612Z" },
+]
+
+[[package]]
+name = "opentelemetry-exporter-otlp-proto-common"
+version = "1.40.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "opentelemetry-proto" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/51/bc/1559d46557fe6eca0b46c88d4c2676285f1f3be2e8d06bb5d15fbffc814a/opentelemetry_exporter_otlp_proto_common-1.40.0.tar.gz", hash = "sha256:1cbee86a4064790b362a86601ee7934f368b81cd4cc2f2e163902a6e7818a0fa", size = 20416, upload-time = "2026-03-04T14:17:23.801Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/8b/ca/8f122055c97a932311a3f640273f084e738008933503d0c2563cd5d591fc/opentelemetry_exporter_otlp_proto_common-1.40.0-py3-none-any.whl", hash = "sha256:7081ff453835a82417bf38dccf122c827c3cbc94f2079b03bba02a3165f25149", size = 18369, upload-time = "2026-03-04T14:17:04.796Z" },
+]
+
+[[package]]
+name = "opentelemetry-exporter-otlp-proto-grpc"
+version = "1.40.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "googleapis-common-protos" },
+ { name = "grpcio" },
+ { name = "opentelemetry-api" },
+ { name = "opentelemetry-exporter-otlp-proto-common" },
+ { name = "opentelemetry-proto" },
+ { name = "opentelemetry-sdk" },
+ { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/8f/7f/b9e60435cfcc7590fa87436edad6822240dddbc184643a2a005301cc31f4/opentelemetry_exporter_otlp_proto_grpc-1.40.0.tar.gz", hash = "sha256:bd4015183e40b635b3dab8da528b27161ba83bf4ef545776b196f0fb4ec47740", size = 25759, upload-time = "2026-03-04T14:17:24.4Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/96/6f/7ee0980afcbdcd2d40362da16f7f9796bd083bf7f0b8e038abfbc0300f5d/opentelemetry_exporter_otlp_proto_grpc-1.40.0-py3-none-any.whl", hash = "sha256:2aa0ca53483fe0cf6405087a7491472b70335bc5c7944378a0a8e72e86995c52", size = 20304, upload-time = "2026-03-04T14:17:05.942Z" },
+]
+
+[[package]]
+name = "opentelemetry-exporter-otlp-proto-http"
+version = "1.40.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "googleapis-common-protos" },
+ { name = "opentelemetry-api" },
+ { name = "opentelemetry-exporter-otlp-proto-common" },
+ { name = "opentelemetry-proto" },
+ { name = "opentelemetry-sdk" },
+ { name = "requests" },
+ { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/2e/fa/73d50e2c15c56be4d000c98e24221d494674b0cc95524e2a8cb3856d95a4/opentelemetry_exporter_otlp_proto_http-1.40.0.tar.gz", hash = "sha256:db48f5e0f33217588bbc00274a31517ba830da576e59503507c839b38fa0869c", size = 17772, upload-time = "2026-03-04T14:17:25.324Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/a0/3a/8865d6754e61c9fb170cdd530a124a53769ee5f740236064816eb0ca7301/opentelemetry_exporter_otlp_proto_http-1.40.0-py3-none-any.whl", hash = "sha256:a8d1dab28f504c5d96577d6509f80a8150e44e8f45f82cdbe0e34c99ab040069", size = 19960, upload-time = "2026-03-04T14:17:07.153Z" },
+]
+
+[[package]]
+name = "opentelemetry-proto"
+version = "1.40.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "protobuf" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/4c/77/dd38991db037fdfce45849491cb61de5ab000f49824a00230afb112a4392/opentelemetry_proto-1.40.0.tar.gz", hash = "sha256:03f639ca129ba513f5819810f5b1f42bcb371391405d99c168fe6937c62febcd", size = 45667, upload-time = "2026-03-04T14:17:31.194Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/b9/b2/189b2577dde745b15625b3214302605b1353436219d42b7912e77fa8dc24/opentelemetry_proto-1.40.0-py3-none-any.whl", hash = "sha256:266c4385d88923a23d63e353e9761af0f47a6ed0d486979777fe4de59dc9b25f", size = 72073, upload-time = "2026-03-04T14:17:16.673Z" },
+]
+
+[[package]]
+name = "opentelemetry-sdk"
+version = "1.40.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "opentelemetry-api" },
+ { name = "opentelemetry-semantic-conventions" },
+ { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/58/fd/3c3125b20ba18ce2155ba9ea74acb0ae5d25f8cd39cfd37455601b7955cc/opentelemetry_sdk-1.40.0.tar.gz", hash = "sha256:18e9f5ec20d859d268c7cb3c5198c8d105d073714db3de50b593b8c1345a48f2", size = 184252, upload-time = "2026-03-04T14:17:31.87Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/2c/c5/6a852903d8bfac758c6dc6e9a68b015d3c33f2f1be5e9591e0f4b69c7e0a/opentelemetry_sdk-1.40.0-py3-none-any.whl", hash = "sha256:787d2154a71f4b3d81f20524a8ce061b7db667d24e46753f32a7bc48f1c1f3f1", size = 141951, upload-time = "2026-03-04T14:17:17.961Z" },
+]
+
+[[package]]
+name = "opentelemetry-semantic-conventions"
+version = "0.61b0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "opentelemetry-api" },
+ { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/6d/c0/4ae7973f3c2cfd2b6e321f1675626f0dab0a97027cc7a297474c9c8f3d04/opentelemetry_semantic_conventions-0.61b0.tar.gz", hash = "sha256:072f65473c5d7c6dc0355b27d6c9d1a679d63b6d4b4b16a9773062cb7e31192a", size = 145755, upload-time = "2026-03-04T14:17:32.664Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/b2/37/cc6a55e448deaa9b27377d087da8615a3416d8ad523d5960b78dbeadd02a/opentelemetry_semantic_conventions-0.61b0-py3-none-any.whl", hash = "sha256:fa530a96be229795f8cef353739b618148b0fe2b4b3f005e60e262926c4d38e2", size = 231621, upload-time = "2026-03-04T14:17:19.33Z" },
+]
+
+[[package]]
+name = "opentelemetry-semantic-conventions-ai"
+version = "0.4.15"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "opentelemetry-sdk" },
+ { name = "opentelemetry-semantic-conventions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/e9/75/455c15f8360b475dd31101a87eab316420388486f7941bf019cbf4e63d5b/opentelemetry_semantic_conventions_ai-0.4.15.tar.gz", hash = "sha256:12de172d1e11d21c6e82bbf578c7e8a713589a7fda76af9ed785632564a28b81", size = 18595, upload-time = "2026-03-02T15:36:50.254Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/12/49/819fb212386f77cfd93f81bd916d674f0e735f87c8ac2262ed14e3b852c2/opentelemetry_semantic_conventions_ai-0.4.15-py3-none-any.whl", hash = "sha256:011461f1fba30f27035c49ab3b8344367adc72da0a6c8d3c7428303c6779edc9", size = 5999, upload-time = "2026-03-02T15:36:51.44Z" },
+]
+
+[[package]]
+name = "outlines-core"
+version = "0.2.11"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/1a/d3/e04e9145f8f806723dec9b9e5227ad695a3efcd3ced7794cf7c22b15df5e/outlines_core-0.2.11.tar.gz", hash = "sha256:dfce56f717ff5083e54cbcfdb66cad243365437fccbb5509adaa7e31e030f1d8", size = 197263, upload-time = "2025-05-19T10:12:51.719Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/5f/2c/c7636823244c70e2960060bf9bd978248dffb55c5e7c91c46d18354b2a24/outlines_core-0.2.11-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:4a9db4872bae083631d720994f4cee603bce0536b33d5a988814576863b657cf", size = 1957668, upload-time = "2025-05-19T10:12:18.29Z" },
+ { url = "https://files.pythonhosted.org/packages/c7/09/5c62047da139d722317a444a4d01cd5f11943a8c2eaecce784341dd0844a/outlines_core-0.2.11-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:8359a45c59f6a8f2eb717245806501a59044c75f6ea8bd08faaa131cc8cdec45", size = 2130493, upload-time = "2025-05-19T10:12:19.537Z" },
+ { url = "https://files.pythonhosted.org/packages/89/7a/d6a2810f90e37d550168e0c0a9a915086ea721444727e3ca2c630898d1ef/outlines_core-0.2.11-cp312-cp312-macosx_15_0_arm64.whl", hash = "sha256:5d26a46591377340e0b870b8a96ea8341058341a62ee0bded9098e0c88dd24f4", size = 1956804, upload-time = "2025-05-19T10:12:20.755Z" },
+ { url = "https://files.pythonhosted.org/packages/ca/ea/339e6c273b5581128c3b7ca27d428d8993c3085912af1a467aa32ef0e9d1/outlines_core-0.2.11-cp312-cp312-macosx_15_0_x86_64.whl", hash = "sha256:ae460a34675fb11d92a5c605a480fbae4cd6c1b2d11b3698da64a7fcaba64dcf", size = 2127085, upload-time = "2025-05-19T10:12:22.02Z" },
+ { url = "https://files.pythonhosted.org/packages/92/c7/a65d1fddf49830ebc41422294eacde35286d9f68994a8aa905cb14f5aade/outlines_core-0.2.11-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:86df9740368866295077346440d911df4972da2b3f1f54b8125e6f329e8a8891", size = 2287677, upload-time = "2025-05-19T10:12:24.24Z" },
+ { url = "https://files.pythonhosted.org/packages/23/79/8795aed8be9b77dd69d78e7cfbfcf28c179e6b08da6e56bbbf48a09fe55f/outlines_core-0.2.11-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:96ce4dd78f106799be4a0a5795cefd1352806162973756a4b6fce4bb6eddd7e4", size = 2113000, upload-time = "2025-05-19T10:12:25.446Z" },
+ { url = "https://files.pythonhosted.org/packages/59/e3/cbe9294b06d92ee1892dbb6f2125d833d68e8629d45d080d6daba54eec2d/outlines_core-0.2.11-cp312-cp312-win32.whl", hash = "sha256:358db161cce3650ba822e118dcf0a1efa571c7deb4864ab9d64ca2c9cca7425d", size = 1765703, upload-time = "2025-05-19T10:12:26.693Z" },
+ { url = "https://files.pythonhosted.org/packages/1d/c9/ed3cf362515fac16e313368b9b2f2497051f4ded88679205830b6f889f54/outlines_core-0.2.11-cp312-cp312-win_amd64.whl", hash = "sha256:231f9d20d2630c70665345821780d7808b29539620a75c99f65113b518c51032", size = 2060945, upload-time = "2025-05-19T10:12:28.294Z" },
+]
+
+[[package]]
+name = "packaging"
+version = "26.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/65/ee/299d360cdc32edc7d2cf530f3accf79c4fca01e96ffc950d8a52213bd8e4/packaging-26.0.tar.gz", hash = "sha256:00243ae351a257117b6a241061796684b084ed1c516a08c48a3f7e147a9d80b4", size = 143416, upload-time = "2026-01-21T20:50:39.064Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/b7/b9/c538f279a4e237a006a2c98387d081e9eb060d203d8ed34467cc0f0b9b53/packaging-26.0-py3-none-any.whl", hash = "sha256:b36f1fef9334a5588b4166f8bcd26a14e521f2b55e6b9de3aaa80d3ff7a37529", size = 74366, upload-time = "2026-01-21T20:50:37.788Z" },
+]
+
+[[package]]
+name = "pandas"
+version = "3.0.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "numpy" },
+ { name = "python-dateutil" },
+ { name = "tzdata", marker = "sys_platform == 'emscripten' or sys_platform == 'win32'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/2e/0c/b28ed414f080ee0ad153f848586d61d1878f91689950f037f976ce15f6c8/pandas-3.0.1.tar.gz", hash = "sha256:4186a699674af418f655dbd420ed87f50d56b4cd6603784279d9eef6627823c8", size = 4641901, upload-time = "2026-02-17T22:20:16.434Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/37/51/b467209c08dae2c624873d7491ea47d2b47336e5403309d433ea79c38571/pandas-3.0.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:476f84f8c20c9f5bc47252b66b4bb25e1a9fc2fa98cead96744d8116cb85771d", size = 10344357, upload-time = "2026-02-17T22:18:38.262Z" },
+ { url = "https://files.pythonhosted.org/packages/7c/f1/e2567ffc8951ab371db2e40b2fe068e36b81d8cf3260f06ae508700e5504/pandas-3.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0ab749dfba921edf641d4036c4c21c0b3ea70fea478165cb98a998fb2a261955", size = 9884543, upload-time = "2026-02-17T22:18:41.476Z" },
+ { url = "https://files.pythonhosted.org/packages/d7/39/327802e0b6d693182403c144edacbc27eb82907b57062f23ef5a4c4a5ea7/pandas-3.0.1-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b8e36891080b87823aff3640c78649b91b8ff6eea3c0d70aeabd72ea43ab069b", size = 10396030, upload-time = "2026-02-17T22:18:43.822Z" },
+ { url = "https://files.pythonhosted.org/packages/3d/fe/89d77e424365280b79d99b3e1e7d606f5165af2f2ecfaf0c6d24c799d607/pandas-3.0.1-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:532527a701281b9dd371e2f582ed9094f4c12dd9ffb82c0c54ee28d8ac9520c4", size = 10876435, upload-time = "2026-02-17T22:18:45.954Z" },
+ { url = "https://files.pythonhosted.org/packages/b5/a6/2a75320849dd154a793f69c951db759aedb8d1dd3939eeacda9bdcfa1629/pandas-3.0.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:356e5c055ed9b0da1580d465657bc7d00635af4fd47f30afb23025352ba764d1", size = 11405133, upload-time = "2026-02-17T22:18:48.533Z" },
+ { url = "https://files.pythonhosted.org/packages/58/53/1d68fafb2e02d7881df66aa53be4cd748d25cbe311f3b3c85c93ea5d30ca/pandas-3.0.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:9d810036895f9ad6345b8f2a338dd6998a74e8483847403582cab67745bff821", size = 11932065, upload-time = "2026-02-17T22:18:50.837Z" },
+ { url = "https://files.pythonhosted.org/packages/75/08/67cc404b3a966b6df27b38370ddd96b3b023030b572283d035181854aac5/pandas-3.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:536232a5fe26dd989bd633e7a0c450705fdc86a207fec7254a55e9a22950fe43", size = 9741627, upload-time = "2026-02-17T22:18:53.905Z" },
+ { url = "https://files.pythonhosted.org/packages/86/4f/caf9952948fb00d23795f09b893d11f1cacb384e666854d87249530f7cbe/pandas-3.0.1-cp312-cp312-win_arm64.whl", hash = "sha256:0f463ebfd8de7f326d38037c7363c6dacb857c5881ab8961fb387804d6daf2f7", size = 9052483, upload-time = "2026-02-17T22:18:57.31Z" },
+]
+
+[[package]]
+name = "partial-json-parser"
+version = "0.2.1.1.post7"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/6a/6d/eed37d7ebc1e0bcd27b831c0cf1fe94881934316187c4b30d23f29ea0bd4/partial_json_parser-0.2.1.1.post7.tar.gz", hash = "sha256:86590e1ba6bcb6739a2dfc17d2323f028cb5884f4c6ce23db376999132c9a922", size = 10296, upload-time = "2025-11-17T07:27:41.202Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/42/32/658973117bf0fd82a24abbfb94fe73a5e86216e49342985e10acce54775a/partial_json_parser-0.2.1.1.post7-py3-none-any.whl", hash = "sha256:145119e5eabcf80cbb13844a6b50a85c68bf99d376f8ed771e2a3c3b03e653ae", size = 10877, upload-time = "2025-11-17T07:27:40.457Z" },
+]
+
+[[package]]
+name = "pillow"
+version = "12.1.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/1f/42/5c74462b4fd957fcd7b13b04fb3205ff8349236ea74c7c375766d6c82288/pillow-12.1.1.tar.gz", hash = "sha256:9ad8fa5937ab05218e2b6a4cff30295ad35afd2f83ac592e68c0d871bb0fdbc4", size = 46980264, upload-time = "2026-02-11T04:23:07.146Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/07/d3/8df65da0d4df36b094351dce696f2989bec731d4f10e743b1c5f4da4d3bf/pillow-12.1.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:ab323b787d6e18b3d91a72fc99b1a2c28651e4358749842b8f8dfacd28ef2052", size = 5262803, upload-time = "2026-02-11T04:20:47.653Z" },
+ { url = "https://files.pythonhosted.org/packages/d6/71/5026395b290ff404b836e636f51d7297e6c83beceaa87c592718747e670f/pillow-12.1.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:adebb5bee0f0af4909c30db0d890c773d1a92ffe83da908e2e9e720f8edf3984", size = 4657601, upload-time = "2026-02-11T04:20:49.328Z" },
+ { url = "https://files.pythonhosted.org/packages/b1/2e/1001613d941c67442f745aff0f7cc66dd8df9a9c084eb497e6a543ee6f7e/pillow-12.1.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:bb66b7cc26f50977108790e2456b7921e773f23db5630261102233eb355a3b79", size = 6234995, upload-time = "2026-02-11T04:20:51.032Z" },
+ { url = "https://files.pythonhosted.org/packages/07/26/246ab11455b2549b9233dbd44d358d033a2f780fa9007b61a913c5b2d24e/pillow-12.1.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:aee2810642b2898bb187ced9b349e95d2a7272930796e022efaf12e99dccd293", size = 8045012, upload-time = "2026-02-11T04:20:52.882Z" },
+ { url = "https://files.pythonhosted.org/packages/b2/8b/07587069c27be7535ac1fe33874e32de118fbd34e2a73b7f83436a88368c/pillow-12.1.1-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a0b1cd6232e2b618adcc54d9882e4e662a089d5768cd188f7c245b4c8c44a397", size = 6349638, upload-time = "2026-02-11T04:20:54.444Z" },
+ { url = "https://files.pythonhosted.org/packages/ff/79/6df7b2ee763d619cda2fb4fea498e5f79d984dae304d45a8999b80d6cf5c/pillow-12.1.1-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7aac39bcf8d4770d089588a2e1dd111cbaa42df5a94be3114222057d68336bd0", size = 7041540, upload-time = "2026-02-11T04:20:55.97Z" },
+ { url = "https://files.pythonhosted.org/packages/2c/5e/2ba19e7e7236d7529f4d873bdaf317a318896bac289abebd4bb00ef247f0/pillow-12.1.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:ab174cd7d29a62dd139c44bf74b698039328f45cb03b4596c43473a46656b2f3", size = 6462613, upload-time = "2026-02-11T04:20:57.542Z" },
+ { url = "https://files.pythonhosted.org/packages/03/03/31216ec124bb5c3dacd74ce8efff4cc7f52643653bad4825f8f08c697743/pillow-12.1.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:339ffdcb7cbeaa08221cd401d517d4b1fe7a9ed5d400e4a8039719238620ca35", size = 7166745, upload-time = "2026-02-11T04:20:59.196Z" },
+ { url = "https://files.pythonhosted.org/packages/1f/e7/7c4552d80052337eb28653b617eafdef39adfb137c49dd7e831b8dc13bc5/pillow-12.1.1-cp312-cp312-win32.whl", hash = "sha256:5d1f9575a12bed9e9eedd9a4972834b08c97a352bd17955ccdebfeca5913fa0a", size = 6328823, upload-time = "2026-02-11T04:21:01.385Z" },
+ { url = "https://files.pythonhosted.org/packages/3d/17/688626d192d7261bbbf98846fc98995726bddc2c945344b65bec3a29d731/pillow-12.1.1-cp312-cp312-win_amd64.whl", hash = "sha256:21329ec8c96c6e979cd0dfd29406c40c1d52521a90544463057d2aaa937d66a6", size = 7033367, upload-time = "2026-02-11T04:21:03.536Z" },
+ { url = "https://files.pythonhosted.org/packages/ed/fe/a0ef1f73f939b0eca03ee2c108d0043a87468664770612602c63266a43c4/pillow-12.1.1-cp312-cp312-win_arm64.whl", hash = "sha256:af9a332e572978f0218686636610555ae3defd1633597be015ed50289a03c523", size = 2453811, upload-time = "2026-02-11T04:21:05.116Z" },
+]
+
+[[package]]
+name = "prometheus-client"
+version = "0.24.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/f0/58/a794d23feb6b00fc0c72787d7e87d872a6730dd9ed7c7b3e954637d8f280/prometheus_client-0.24.1.tar.gz", hash = "sha256:7e0ced7fbbd40f7b84962d5d2ab6f17ef88a72504dcf7c0b40737b43b2a461f9", size = 85616, upload-time = "2026-01-14T15:26:26.965Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/74/c3/24a2f845e3917201628ecaba4f18bab4d18a337834c1df2a159ee9d22a42/prometheus_client-0.24.1-py3-none-any.whl", hash = "sha256:150db128af71a5c2482b36e588fc8a6b95e498750da4b17065947c16070f4055", size = 64057, upload-time = "2026-01-14T15:26:24.42Z" },
+]
+
+[[package]]
+name = "prometheus-fastapi-instrumentator"
+version = "7.1.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "prometheus-client" },
+ { name = "starlette" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/69/6d/24d53033cf93826aa7857699a4450c1c67e5b9c710e925b1ed2b320c04df/prometheus_fastapi_instrumentator-7.1.0.tar.gz", hash = "sha256:be7cd61eeea4e5912aeccb4261c6631b3f227d8924542d79eaf5af3f439cbe5e", size = 20220, upload-time = "2025-03-19T19:35:05.351Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/27/72/0824c18f3bc75810f55dacc2dd933f6ec829771180245ae3cc976195dec0/prometheus_fastapi_instrumentator-7.1.0-py3-none-any.whl", hash = "sha256:978130f3c0bb7b8ebcc90d35516a6fe13e02d2eb358c8f83887cdef7020c31e9", size = 19296, upload-time = "2025-03-19T19:35:04.323Z" },
+]
+
+[[package]]
+name = "propcache"
+version = "0.4.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/9e/da/e9fc233cf63743258bff22b3dfa7ea5baef7b5bc324af47a0ad89b8ffc6f/propcache-0.4.1.tar.gz", hash = "sha256:f48107a8c637e80362555f37ecf49abe20370e557cc4ab374f04ec4423c97c3d", size = 46442, upload-time = "2025-10-08T19:49:02.291Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/a2/0f/f17b1b2b221d5ca28b4b876e8bb046ac40466513960646bda8e1853cdfa2/propcache-0.4.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:e153e9cd40cc8945138822807139367f256f89c6810c2634a4f6902b52d3b4e2", size = 80061, upload-time = "2025-10-08T19:46:46.075Z" },
+ { url = "https://files.pythonhosted.org/packages/76/47/8ccf75935f51448ba9a16a71b783eb7ef6b9ee60f5d14c7f8a8a79fbeed7/propcache-0.4.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:cd547953428f7abb73c5ad82cbb32109566204260d98e41e5dfdc682eb7f8403", size = 46037, upload-time = "2025-10-08T19:46:47.23Z" },
+ { url = "https://files.pythonhosted.org/packages/0a/b6/5c9a0e42df4d00bfb4a3cbbe5cf9f54260300c88a0e9af1f47ca5ce17ac0/propcache-0.4.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f048da1b4f243fc44f205dfd320933a951b8d89e0afd4c7cacc762a8b9165207", size = 47324, upload-time = "2025-10-08T19:46:48.384Z" },
+ { url = "https://files.pythonhosted.org/packages/9e/d3/6c7ee328b39a81ee877c962469f1e795f9db87f925251efeb0545e0020d0/propcache-0.4.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ec17c65562a827bba85e3872ead335f95405ea1674860d96483a02f5c698fa72", size = 225505, upload-time = "2025-10-08T19:46:50.055Z" },
+ { url = "https://files.pythonhosted.org/packages/01/5d/1c53f4563490b1d06a684742cc6076ef944bc6457df6051b7d1a877c057b/propcache-0.4.1-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:405aac25c6394ef275dee4c709be43745d36674b223ba4eb7144bf4d691b7367", size = 230242, upload-time = "2025-10-08T19:46:51.815Z" },
+ { url = "https://files.pythonhosted.org/packages/20/e1/ce4620633b0e2422207c3cb774a0ee61cac13abc6217763a7b9e2e3f4a12/propcache-0.4.1-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:0013cb6f8dde4b2a2f66903b8ba740bdfe378c943c4377a200551ceb27f379e4", size = 238474, upload-time = "2025-10-08T19:46:53.208Z" },
+ { url = "https://files.pythonhosted.org/packages/46/4b/3aae6835b8e5f44ea6a68348ad90f78134047b503765087be2f9912140ea/propcache-0.4.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:15932ab57837c3368b024473a525e25d316d8353016e7cc0e5ba9eb343fbb1cf", size = 221575, upload-time = "2025-10-08T19:46:54.511Z" },
+ { url = "https://files.pythonhosted.org/packages/6e/a5/8a5e8678bcc9d3a1a15b9a29165640d64762d424a16af543f00629c87338/propcache-0.4.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:031dce78b9dc099f4c29785d9cf5577a3faf9ebf74ecbd3c856a7b92768c3df3", size = 216736, upload-time = "2025-10-08T19:46:56.212Z" },
+ { url = "https://files.pythonhosted.org/packages/f1/63/b7b215eddeac83ca1c6b934f89d09a625aa9ee4ba158338854c87210cc36/propcache-0.4.1-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:ab08df6c9a035bee56e31af99be621526bd237bea9f32def431c656b29e41778", size = 213019, upload-time = "2025-10-08T19:46:57.595Z" },
+ { url = "https://files.pythonhosted.org/packages/57/74/f580099a58c8af587cac7ba19ee7cb418506342fbbe2d4a4401661cca886/propcache-0.4.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:4d7af63f9f93fe593afbf104c21b3b15868efb2c21d07d8732c0c4287e66b6a6", size = 220376, upload-time = "2025-10-08T19:46:59.067Z" },
+ { url = "https://files.pythonhosted.org/packages/c4/ee/542f1313aff7eaf19c2bb758c5d0560d2683dac001a1c96d0774af799843/propcache-0.4.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:cfc27c945f422e8b5071b6e93169679e4eb5bf73bbcbf1ba3ae3a83d2f78ebd9", size = 226988, upload-time = "2025-10-08T19:47:00.544Z" },
+ { url = "https://files.pythonhosted.org/packages/8f/18/9c6b015dd9c6930f6ce2229e1f02fb35298b847f2087ea2b436a5bfa7287/propcache-0.4.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:35c3277624a080cc6ec6f847cbbbb5b49affa3598c4535a0a4682a697aaa5c75", size = 215615, upload-time = "2025-10-08T19:47:01.968Z" },
+ { url = "https://files.pythonhosted.org/packages/80/9e/e7b85720b98c45a45e1fca6a177024934dc9bc5f4d5dd04207f216fc33ed/propcache-0.4.1-cp312-cp312-win32.whl", hash = "sha256:671538c2262dadb5ba6395e26c1731e1d52534bfe9ae56d0b5573ce539266aa8", size = 38066, upload-time = "2025-10-08T19:47:03.503Z" },
+ { url = "https://files.pythonhosted.org/packages/54/09/d19cff2a5aaac632ec8fc03737b223597b1e347416934c1b3a7df079784c/propcache-0.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:cb2d222e72399fcf5890d1d5cc1060857b9b236adff2792ff48ca2dfd46c81db", size = 41655, upload-time = "2025-10-08T19:47:04.973Z" },
+ { url = "https://files.pythonhosted.org/packages/68/ab/6b5c191bb5de08036a8c697b265d4ca76148efb10fa162f14af14fb5f076/propcache-0.4.1-cp312-cp312-win_arm64.whl", hash = "sha256:204483131fb222bdaaeeea9f9e6c6ed0cac32731f75dfc1d4a567fc1926477c1", size = 37789, upload-time = "2025-10-08T19:47:06.077Z" },
+ { url = "https://files.pythonhosted.org/packages/5b/5a/bc7b4a4ef808fa59a816c17b20c4bef6884daebbdf627ff2a161da67da19/propcache-0.4.1-py3-none-any.whl", hash = "sha256:af2a6052aeb6cf17d3e46ee169099044fd8224cbaf75c76a2ef596e8163e2237", size = 13305, upload-time = "2025-10-08T19:49:00.792Z" },
+]
+
+[[package]]
+name = "protobuf"
+version = "5.29.6"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/7e/57/394a763c103e0edf87f0938dafcd918d53b4c011dfc5c8ae80f3b0452dbb/protobuf-5.29.6.tar.gz", hash = "sha256:da9ee6a5424b6b30fd5e45c5ea663aef540ca95f9ad99d1e887e819cdf9b8723", size = 425623, upload-time = "2026-02-04T22:54:40.584Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/d4/88/9ee58ff7863c479d6f8346686d4636dd4c415b0cbeed7a6a7d0617639c2a/protobuf-5.29.6-cp310-abi3-win32.whl", hash = "sha256:62e8a3114992c7c647bce37dcc93647575fc52d50e48de30c6fcb28a6a291eb1", size = 423357, upload-time = "2026-02-04T22:54:25.805Z" },
+ { url = "https://files.pythonhosted.org/packages/1c/66/2dc736a4d576847134fb6d80bd995c569b13cdc7b815d669050bf0ce2d2c/protobuf-5.29.6-cp310-abi3-win_amd64.whl", hash = "sha256:7e6ad413275be172f67fdee0f43484b6de5a904cc1c3ea9804cb6fe2ff366eda", size = 435175, upload-time = "2026-02-04T22:54:28.592Z" },
+ { url = "https://files.pythonhosted.org/packages/06/db/49b05966fd208ae3f44dcd33837b6243b4915c57561d730a43f881f24dea/protobuf-5.29.6-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:b5a169e664b4057183a34bdc424540e86eea47560f3c123a0d64de4e137f9269", size = 418619, upload-time = "2026-02-04T22:54:30.266Z" },
+ { url = "https://files.pythonhosted.org/packages/b7/d7/48cbf6b0c3c39761e47a99cb483405f0fde2be22cf00d71ef316ce52b458/protobuf-5.29.6-cp38-abi3-manylinux2014_aarch64.whl", hash = "sha256:a8866b2cff111f0f863c1b3b9e7572dc7eaea23a7fae27f6fc613304046483e6", size = 320284, upload-time = "2026-02-04T22:54:31.782Z" },
+ { url = "https://files.pythonhosted.org/packages/e3/dd/cadd6ec43069247d91f6345fa7a0d2858bef6af366dbd7ba8f05d2c77d3b/protobuf-5.29.6-cp38-abi3-manylinux2014_x86_64.whl", hash = "sha256:e3387f44798ac1106af0233c04fb8abf543772ff241169946f698b3a9a3d3ab9", size = 320478, upload-time = "2026-02-04T22:54:32.909Z" },
+ { url = "https://files.pythonhosted.org/packages/5a/cb/e3065b447186cb70aa65acc70c86baf482d82bf75625bf5a2c4f6919c6a3/protobuf-5.29.6-py3-none-any.whl", hash = "sha256:6b9edb641441b2da9fa8f428760fc136a49cf97a52076010cf22a2ff73438a86", size = 173126, upload-time = "2026-02-04T22:54:39.462Z" },
+]
+
+[[package]]
+name = "psutil"
+version = "7.2.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/aa/c6/d1ddf4abb55e93cebc4f2ed8b5d6dbad109ecb8d63748dd2b20ab5e57ebe/psutil-7.2.2.tar.gz", hash = "sha256:0746f5f8d406af344fd547f1c8daa5f5c33dbc293bb8d6a16d80b4bb88f59372", size = 493740, upload-time = "2026-01-28T18:14:54.428Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/e7/36/5ee6e05c9bd427237b11b3937ad82bb8ad2752d72c6969314590dd0c2f6e/psutil-7.2.2-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:ed0cace939114f62738d808fdcecd4c869222507e266e574799e9c0faa17d486", size = 129090, upload-time = "2026-01-28T18:15:22.168Z" },
+ { url = "https://files.pythonhosted.org/packages/80/c4/f5af4c1ca8c1eeb2e92ccca14ce8effdeec651d5ab6053c589b074eda6e1/psutil-7.2.2-cp36-abi3-macosx_11_0_arm64.whl", hash = "sha256:1a7b04c10f32cc88ab39cbf606e117fd74721c831c98a27dc04578deb0c16979", size = 129859, upload-time = "2026-01-28T18:15:23.795Z" },
+ { url = "https://files.pythonhosted.org/packages/b5/70/5d8df3b09e25bce090399cf48e452d25c935ab72dad19406c77f4e828045/psutil-7.2.2-cp36-abi3-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:076a2d2f923fd4821644f5ba89f059523da90dc9014e85f8e45a5774ca5bc6f9", size = 155560, upload-time = "2026-01-28T18:15:25.976Z" },
+ { url = "https://files.pythonhosted.org/packages/63/65/37648c0c158dc222aba51c089eb3bdfa238e621674dc42d48706e639204f/psutil-7.2.2-cp36-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b0726cecd84f9474419d67252add4ac0cd9811b04d61123054b9fb6f57df6e9e", size = 156997, upload-time = "2026-01-28T18:15:27.794Z" },
+ { url = "https://files.pythonhosted.org/packages/8e/13/125093eadae863ce03c6ffdbae9929430d116a246ef69866dad94da3bfbc/psutil-7.2.2-cp36-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:fd04ef36b4a6d599bbdb225dd1d3f51e00105f6d48a28f006da7f9822f2606d8", size = 148972, upload-time = "2026-01-28T18:15:29.342Z" },
+ { url = "https://files.pythonhosted.org/packages/04/78/0acd37ca84ce3ddffaa92ef0f571e073faa6d8ff1f0559ab1272188ea2be/psutil-7.2.2-cp36-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:b58fabe35e80b264a4e3bb23e6b96f9e45a3df7fb7eed419ac0e5947c61e47cc", size = 148266, upload-time = "2026-01-28T18:15:31.597Z" },
+ { url = "https://files.pythonhosted.org/packages/b4/90/e2159492b5426be0c1fef7acba807a03511f97c5f86b3caeda6ad92351a7/psutil-7.2.2-cp37-abi3-win_amd64.whl", hash = "sha256:eb7e81434c8d223ec4a219b5fc1c47d0417b12be7ea866e24fb5ad6e84b3d988", size = 137737, upload-time = "2026-01-28T18:15:33.849Z" },
+ { url = "https://files.pythonhosted.org/packages/8c/c7/7bb2e321574b10df20cbde462a94e2b71d05f9bbda251ef27d104668306a/psutil-7.2.2-cp37-abi3-win_arm64.whl", hash = "sha256:8c233660f575a5a89e6d4cb65d9f938126312bca76d8fe087b947b3a1aaac9ee", size = 134617, upload-time = "2026-01-28T18:15:36.514Z" },
+]
+
+[[package]]
+name = "py-cpuinfo"
+version = "9.0.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/37/a8/d832f7293ebb21690860d2e01d8115e5ff6f2ae8bbdc953f0eb0fa4bd2c7/py-cpuinfo-9.0.0.tar.gz", hash = "sha256:3cdbbf3fac90dc6f118bfd64384f309edeadd902d7c8fb17f02ffa1fc3f49690", size = 104716, upload-time = "2022-10-25T20:38:06.303Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/e0/a9/023730ba63db1e494a271cb018dcd361bd2c917ba7004c3e49d5daf795a2/py_cpuinfo-9.0.0-py3-none-any.whl", hash = "sha256:859625bc251f64e21f077d099d4162689c762b5d6a4c3c97553d56241c9674d5", size = 22335, upload-time = "2022-10-25T20:38:27.636Z" },
+]
+
+[[package]]
+name = "pyarrow"
+version = "23.0.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/88/22/134986a4cc224d593c1afde5494d18ff629393d74cc2eddb176669f234a4/pyarrow-23.0.1.tar.gz", hash = "sha256:b8c5873e33440b2bc2f4a79d2b47017a89c5a24116c055625e6f2ee50523f019", size = 1167336, upload-time = "2026-02-16T10:14:12.39Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/9a/4b/4166bb5abbfe6f750fc60ad337c43ecf61340fa52ab386da6e8dbf9e63c4/pyarrow-23.0.1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:f4b0dbfa124c0bb161f8b5ebb40f1a680b70279aa0c9901d44a2b5a20806039f", size = 34214575, upload-time = "2026-02-16T10:09:56.225Z" },
+ { url = "https://files.pythonhosted.org/packages/e1/da/3f941e3734ac8088ea588b53e860baeddac8323ea40ce22e3d0baa865cc9/pyarrow-23.0.1-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:7707d2b6673f7de054e2e83d59f9e805939038eebe1763fe811ee8fa5c0cd1a7", size = 35832540, upload-time = "2026-02-16T10:10:03.428Z" },
+ { url = "https://files.pythonhosted.org/packages/88/7c/3d841c366620e906d54430817531b877ba646310296df42ef697308c2705/pyarrow-23.0.1-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:86ff03fb9f1a320266e0de855dee4b17da6794c595d207f89bba40d16b5c78b9", size = 44470940, upload-time = "2026-02-16T10:10:10.704Z" },
+ { url = "https://files.pythonhosted.org/packages/2c/a5/da83046273d990f256cb79796a190bbf7ec999269705ddc609403f8c6b06/pyarrow-23.0.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:813d99f31275919c383aab17f0f455a04f5a429c261cc411b1e9a8f5e4aaaa05", size = 47586063, upload-time = "2026-02-16T10:10:17.95Z" },
+ { url = "https://files.pythonhosted.org/packages/5b/3c/b7d2ebcff47a514f47f9da1e74b7949138c58cfeb108cdd4ee62f43f0cf3/pyarrow-23.0.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:bf5842f960cddd2ef757d486041d57c96483efc295a8c4a0e20e704cbbf39c67", size = 48173045, upload-time = "2026-02-16T10:10:25.363Z" },
+ { url = "https://files.pythonhosted.org/packages/43/b2/b40961262213beaba6acfc88698eb773dfce32ecdf34d19291db94c2bd73/pyarrow-23.0.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:564baf97c858ecc03ec01a41062e8f4698abc3e6e2acd79c01c2e97880a19730", size = 50621741, upload-time = "2026-02-16T10:10:33.477Z" },
+ { url = "https://files.pythonhosted.org/packages/f6/70/1fdda42d65b28b078e93d75d371b2185a61da89dda4def8ba6ba41ebdeb4/pyarrow-23.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:07deae7783782ac7250989a7b2ecde9b3c343a643f82e8a4df03d93b633006f0", size = 27620678, upload-time = "2026-02-16T10:10:39.31Z" },
+]
+
+[[package]]
+name = "pyasn1"
+version = "0.6.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/fe/b6/6e630dff89739fcd427e3f72b3d905ce0acb85a45d4ec3e2678718a3487f/pyasn1-0.6.2.tar.gz", hash = "sha256:9b59a2b25ba7e4f8197db7686c09fb33e658b98339fadb826e9512629017833b", size = 146586, upload-time = "2026-01-16T18:04:18.534Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/44/b5/a96872e5184f354da9c84ae119971a0a4c221fe9b27a4d94bd43f2596727/pyasn1-0.6.2-py3-none-any.whl", hash = "sha256:1eb26d860996a18e9b6ed05e7aae0e9fc21619fcee6af91cca9bad4fbea224bf", size = 83371, upload-time = "2026-01-16T18:04:17.174Z" },
+]
+
+[[package]]
+name = "pyasn1-modules"
+version = "0.4.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "pyasn1" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/e9/e6/78ebbb10a8c8e4b61a59249394a4a594c1a7af95593dc933a349c8d00964/pyasn1_modules-0.4.2.tar.gz", hash = "sha256:677091de870a80aae844b1ca6134f54652fa2c8c5a52aa396440ac3106e941e6", size = 307892, upload-time = "2025-03-28T02:41:22.17Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/47/8d/d529b5d697919ba8c11ad626e835d4039be708a35b0d22de83a269a6682c/pyasn1_modules-0.4.2-py3-none-any.whl", hash = "sha256:29253a9207ce32b64c3ac6600edc75368f98473906e8fd1043bd6b5b1de2c14a", size = 181259, upload-time = "2025-03-28T02:41:19.028Z" },
+]
+
+[[package]]
+name = "pybase64"
+version = "1.4.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/aa/b8/4ed5c7ad5ec15b08d35cc79ace6145d5c1ae426e46435f4987379439dfea/pybase64-1.4.3.tar.gz", hash = "sha256:c2ed274c9e0ba9c8f9c4083cfe265e66dd679126cd9c2027965d807352f3f053", size = 137272, upload-time = "2025-12-06T13:27:04.013Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/86/a7/efcaa564f091a2af7f18a83c1c4875b1437db56ba39540451dc85d56f653/pybase64-1.4.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:18d85e5ab8b986bb32d8446aca6258ed80d1bafe3603c437690b352c648f5967", size = 38167, upload-time = "2025-12-06T13:23:16.821Z" },
+ { url = "https://files.pythonhosted.org/packages/db/c7/c7ad35adff2d272bf2930132db2b3eea8c44bb1b1f64eb9b2b8e57cde7b4/pybase64-1.4.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3f5791a3491d116d0deaf4d83268f48792998519698f8751efb191eac84320e9", size = 31673, upload-time = "2025-12-06T13:23:17.835Z" },
+ { url = "https://files.pythonhosted.org/packages/43/1b/9a8cab0042b464e9a876d5c65fe5127445a2436da36fda64899b119b1a1b/pybase64-1.4.3-cp312-cp312-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:f0b3f200c3e06316f6bebabd458b4e4bcd4c2ca26af7c0c766614d91968dee27", size = 68210, upload-time = "2025-12-06T13:23:18.813Z" },
+ { url = "https://files.pythonhosted.org/packages/62/f7/965b79ff391ad208b50e412b5d3205ccce372a2d27b7218ae86d5295b105/pybase64-1.4.3-cp312-cp312-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:bb632edfd132b3eaf90c39c89aa314beec4e946e210099b57d40311f704e11d4", size = 71599, upload-time = "2025-12-06T13:23:20.195Z" },
+ { url = "https://files.pythonhosted.org/packages/03/4b/a3b5175130b3810bbb8ccfa1edaadbd3afddb9992d877c8a1e2f274b476e/pybase64-1.4.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:356ef1d74648ce997f5a777cf8f1aefecc1c0b4fe6201e0ef3ec8a08170e1b54", size = 59922, upload-time = "2025-12-06T13:23:21.487Z" },
+ { url = "https://files.pythonhosted.org/packages/da/5d/c38d1572027fc601b62d7a407721688b04b4d065d60ca489912d6893e6cf/pybase64-1.4.3-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.whl", hash = "sha256:c48361f90db32bacaa5518419d4eb9066ba558013aaf0c7781620279ecddaeb9", size = 56712, upload-time = "2025-12-06T13:23:22.77Z" },
+ { url = "https://files.pythonhosted.org/packages/e7/d4/4e04472fef485caa8f561d904d4d69210a8f8fc1608ea15ebd9012b92655/pybase64-1.4.3-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:702bcaa16ae02139d881aeaef5b1c8ffb4a3fae062fe601d1e3835e10310a517", size = 59300, upload-time = "2025-12-06T13:23:24.543Z" },
+ { url = "https://files.pythonhosted.org/packages/86/e7/16e29721b86734b881d09b7e23dfd7c8408ad01a4f4c7525f3b1088e25ec/pybase64-1.4.3-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:53d0ffe1847b16b647c6413d34d1de08942b7724273dd57e67dcbdb10c574045", size = 60278, upload-time = "2025-12-06T13:23:25.608Z" },
+ { url = "https://files.pythonhosted.org/packages/b1/02/18515f211d7c046be32070709a8efeeef8a0203de4fd7521e6b56404731b/pybase64-1.4.3-cp312-cp312-manylinux_2_31_riscv64.whl", hash = "sha256:9a1792e8b830a92736dae58f0c386062eb038dfe8004fb03ba33b6083d89cd43", size = 54817, upload-time = "2025-12-06T13:23:26.633Z" },
+ { url = "https://files.pythonhosted.org/packages/e7/be/14e29d8e1a481dbff151324c96dd7b5d2688194bb65dc8a00ca0e1ad1e86/pybase64-1.4.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:1d468b1b1ac5ad84875a46eaa458663c3721e8be5f155ade356406848d3701f6", size = 58611, upload-time = "2025-12-06T13:23:27.684Z" },
+ { url = "https://files.pythonhosted.org/packages/b4/8a/a2588dfe24e1bbd742a554553778ab0d65fdf3d1c9a06d10b77047d142aa/pybase64-1.4.3-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:e97b7bdbd62e71898cd542a6a9e320d9da754ff3ebd02cb802d69087ee94d468", size = 52404, upload-time = "2025-12-06T13:23:28.714Z" },
+ { url = "https://files.pythonhosted.org/packages/27/fc/afcda7445bebe0cbc38cafdd7813234cdd4fc5573ff067f1abf317bb0cec/pybase64-1.4.3-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:b33aeaa780caaa08ffda87fc584d5eab61e3d3bbb5d86ead02161dc0c20d04bc", size = 68817, upload-time = "2025-12-06T13:23:30.079Z" },
+ { url = "https://files.pythonhosted.org/packages/d3/3a/87c3201e555ed71f73e961a787241a2438c2bbb2ca8809c29ddf938a3157/pybase64-1.4.3-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:1c0efcf78f11cf866bed49caa7b97552bc4855a892f9cc2372abcd3ed0056f0d", size = 57854, upload-time = "2025-12-06T13:23:31.17Z" },
+ { url = "https://files.pythonhosted.org/packages/fd/7d/931c2539b31a7b375e7d595b88401eeb5bd6c5ce1059c9123f9b608aaa14/pybase64-1.4.3-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:66e3791f2ed725a46593f8bd2761ff37d01e2cdad065b1dceb89066f476e50c6", size = 54333, upload-time = "2025-12-06T13:23:32.422Z" },
+ { url = "https://files.pythonhosted.org/packages/de/5e/537601e02cc01f27e9d75f440f1a6095b8df44fc28b1eef2cd739aea8cec/pybase64-1.4.3-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:72bb0b6bddadab26e1b069bb78e83092711a111a80a0d6b9edcb08199ad7299b", size = 56492, upload-time = "2025-12-06T13:23:33.515Z" },
+ { url = "https://files.pythonhosted.org/packages/96/97/2a2e57acf8f5c9258d22aba52e71f8050e167b29ed2ee1113677c1b600c1/pybase64-1.4.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:5b3365dbcbcdb0a294f0f50af0c0a16b27a232eddeeb0bceeefd844ef30d2a23", size = 70974, upload-time = "2025-12-06T13:23:36.27Z" },
+ { url = "https://files.pythonhosted.org/packages/75/2e/a9e28941c6dab6f06e6d3f6783d3373044be9b0f9a9d3492c3d8d2260ac0/pybase64-1.4.3-cp312-cp312-win32.whl", hash = "sha256:7bca1ed3a5df53305c629ca94276966272eda33c0d71f862d2d3d043f1e1b91a", size = 33686, upload-time = "2025-12-06T13:23:37.848Z" },
+ { url = "https://files.pythonhosted.org/packages/83/e3/507ab649d8c3512c258819c51d25c45d6e29d9ca33992593059e7b646a33/pybase64-1.4.3-cp312-cp312-win_amd64.whl", hash = "sha256:9f2da8f56d9b891b18b4daf463a0640eae45a80af548ce435be86aa6eff3603b", size = 35833, upload-time = "2025-12-06T13:23:38.877Z" },
+ { url = "https://files.pythonhosted.org/packages/bc/8a/6eba66cd549a2fc74bb4425fd61b839ba0ab3022d3c401b8a8dc2cc00c7a/pybase64-1.4.3-cp312-cp312-win_arm64.whl", hash = "sha256:0631d8a2d035de03aa9bded029b9513e1fee8ed80b7ddef6b8e9389ffc445da0", size = 31185, upload-time = "2025-12-06T13:23:39.908Z" },
+ { url = "https://files.pythonhosted.org/packages/17/45/92322aec1b6979e789b5710f73c59f2172bc37c8ce835305434796824b7b/pybase64-1.4.3-graalpy312-graalpy250_312_native-macosx_10_13_x86_64.whl", hash = "sha256:2baaa092f3475f3a9c87ac5198023918ea8b6c125f4c930752ab2cbe3cd1d520", size = 38746, upload-time = "2025-12-06T13:26:25.869Z" },
+ { url = "https://files.pythonhosted.org/packages/11/94/f1a07402870388fdfc2ecec0c718111189732f7d0f2d7fe1386e19e8fad0/pybase64-1.4.3-graalpy312-graalpy250_312_native-macosx_11_0_arm64.whl", hash = "sha256:cde13c0764b1af07a631729f26df019070dad759981d6975527b7e8ecb465b6c", size = 32573, upload-time = "2025-12-06T13:26:27.792Z" },
+ { url = "https://files.pythonhosted.org/packages/fa/8f/43c3bb11ca9bacf81cb0b7a71500bb65b2eda6d5fe07433c09b543de97f3/pybase64-1.4.3-graalpy312-graalpy250_312_native-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:5c29a582b0ea3936d02bd6fe9bf674ab6059e6e45ab71c78404ab2c913224414", size = 43461, upload-time = "2025-12-06T13:26:28.906Z" },
+ { url = "https://files.pythonhosted.org/packages/2d/4c/2a5258329200be57497d3972b5308558c6de42e3749c6cc2aa1cbe34b25a/pybase64-1.4.3-graalpy312-graalpy250_312_native-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:b6b664758c804fa919b4f1257aa8cf68e95db76fc331de5f70bfc3a34655afe1", size = 36058, upload-time = "2025-12-06T13:26:30.092Z" },
+ { url = "https://files.pythonhosted.org/packages/ea/6d/41faa414cde66ec023b0ca8402a8f11cb61731c3dc27c082909cbbd1f929/pybase64-1.4.3-graalpy312-graalpy250_312_native-win_amd64.whl", hash = "sha256:f7537fa22ae56a0bf51e4b0ffc075926ad91c618e1416330939f7ef366b58e3b", size = 36231, upload-time = "2025-12-06T13:26:31.656Z" },
+]
+
+[[package]]
+name = "pycountry"
+version = "26.2.16"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/de/1d/061b9e7a48b85cfd69f33c33d2ef784a531c359399ad764243399673c8f5/pycountry-26.2.16.tar.gz", hash = "sha256:5b6027d453fcd6060112b951dd010f01f168b51b4bf8a1f1fc8c95c8d94a0801", size = 7711342, upload-time = "2026-02-17T03:42:52.367Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/9c/42/7703bd45b62fecd44cd7d3495423097e2f7d28bc2e99e7c1af68892ab157/pycountry-26.2.16-py3-none-any.whl", hash = "sha256:115c4baf7cceaa30f59a4694d79483c9167dbce7a9de4d3d571c5f3ea77c305a", size = 8044600, upload-time = "2026-02-17T03:42:49.777Z" },
+]
+
+[[package]]
+name = "pycparser"
+version = "3.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/1b/7d/92392ff7815c21062bea51aa7b87d45576f649f16458d78b7cf94b9ab2e6/pycparser-3.0.tar.gz", hash = "sha256:600f49d217304a5902ac3c37e1281c9fe94e4d0489de643a9504c5cdfdfc6b29", size = 103492, upload-time = "2026-01-21T14:26:51.89Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/0c/c3/44f3fbbfa403ea2a7c779186dc20772604442dde72947e7d01069cbe98e3/pycparser-3.0-py3-none-any.whl", hash = "sha256:b727414169a36b7d524c1c3e31839a521725078d7b2ff038656844266160a992", size = 48172, upload-time = "2026-01-21T14:26:50.693Z" },
+]
+
+[[package]]
+name = "pydantic"
+version = "2.12.5"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "annotated-types" },
+ { name = "pydantic-core" },
+ { name = "typing-extensions" },
+ { name = "typing-inspection" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/69/44/36f1a6e523abc58ae5f928898e4aca2e0ea509b5aa6f6f392a5d882be928/pydantic-2.12.5.tar.gz", hash = "sha256:4d351024c75c0f085a9febbb665ce8c0c6ec5d30e903bdb6394b7ede26aebb49", size = 821591, upload-time = "2025-11-26T15:11:46.471Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/5a/87/b70ad306ebb6f9b585f114d0ac2137d792b48be34d732d60e597c2f8465a/pydantic-2.12.5-py3-none-any.whl", hash = "sha256:e561593fccf61e8a20fc46dfc2dfe075b8be7d0188df33f221ad1f0139180f9d", size = 463580, upload-time = "2025-11-26T15:11:44.605Z" },
+]
+
+[package.optional-dependencies]
+email = [
+ { name = "email-validator" },
+]
+
+[[package]]
+name = "pydantic-core"
+version = "2.41.5"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/71/70/23b021c950c2addd24ec408e9ab05d59b035b39d97cdc1130e1bce647bb6/pydantic_core-2.41.5.tar.gz", hash = "sha256:08daa51ea16ad373ffd5e7606252cc32f07bc72b28284b6bc9c6df804816476e", size = 460952, upload-time = "2025-11-04T13:43:49.098Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/5f/5d/5f6c63eebb5afee93bcaae4ce9a898f3373ca23df3ccaef086d0233a35a7/pydantic_core-2.41.5-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:f41a7489d32336dbf2199c8c0a215390a751c5b014c2c1c5366e817202e9cdf7", size = 2110990, upload-time = "2025-11-04T13:39:58.079Z" },
+ { url = "https://files.pythonhosted.org/packages/aa/32/9c2e8ccb57c01111e0fd091f236c7b371c1bccea0fa85247ac55b1e2b6b6/pydantic_core-2.41.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:070259a8818988b9a84a449a2a7337c7f430a22acc0859c6b110aa7212a6d9c0", size = 1896003, upload-time = "2025-11-04T13:39:59.956Z" },
+ { url = "https://files.pythonhosted.org/packages/68/b8/a01b53cb0e59139fbc9e4fda3e9724ede8de279097179be4ff31f1abb65a/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e96cea19e34778f8d59fe40775a7a574d95816eb150850a85a7a4c8f4b94ac69", size = 1919200, upload-time = "2025-11-04T13:40:02.241Z" },
+ { url = "https://files.pythonhosted.org/packages/38/de/8c36b5198a29bdaade07b5985e80a233a5ac27137846f3bc2d3b40a47360/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ed2e99c456e3fadd05c991f8f437ef902e00eedf34320ba2b0842bd1c3ca3a75", size = 2052578, upload-time = "2025-11-04T13:40:04.401Z" },
+ { url = "https://files.pythonhosted.org/packages/00/b5/0e8e4b5b081eac6cb3dbb7e60a65907549a1ce035a724368c330112adfdd/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:65840751b72fbfd82c3c640cff9284545342a4f1eb1586ad0636955b261b0b05", size = 2208504, upload-time = "2025-11-04T13:40:06.072Z" },
+ { url = "https://files.pythonhosted.org/packages/77/56/87a61aad59c7c5b9dc8caad5a41a5545cba3810c3e828708b3d7404f6cef/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e536c98a7626a98feb2d3eaf75944ef6f3dbee447e1f841eae16f2f0a72d8ddc", size = 2335816, upload-time = "2025-11-04T13:40:07.835Z" },
+ { url = "https://files.pythonhosted.org/packages/0d/76/941cc9f73529988688a665a5c0ecff1112b3d95ab48f81db5f7606f522d3/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eceb81a8d74f9267ef4081e246ffd6d129da5d87e37a77c9bde550cb04870c1c", size = 2075366, upload-time = "2025-11-04T13:40:09.804Z" },
+ { url = "https://files.pythonhosted.org/packages/d3/43/ebef01f69baa07a482844faaa0a591bad1ef129253ffd0cdaa9d8a7f72d3/pydantic_core-2.41.5-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d38548150c39b74aeeb0ce8ee1d8e82696f4a4e16ddc6de7b1d8823f7de4b9b5", size = 2171698, upload-time = "2025-11-04T13:40:12.004Z" },
+ { url = "https://files.pythonhosted.org/packages/b1/87/41f3202e4193e3bacfc2c065fab7706ebe81af46a83d3e27605029c1f5a6/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:c23e27686783f60290e36827f9c626e63154b82b116d7fe9adba1fda36da706c", size = 2132603, upload-time = "2025-11-04T13:40:13.868Z" },
+ { url = "https://files.pythonhosted.org/packages/49/7d/4c00df99cb12070b6bccdef4a195255e6020a550d572768d92cc54dba91a/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:482c982f814460eabe1d3bb0adfdc583387bd4691ef00b90575ca0d2b6fe2294", size = 2329591, upload-time = "2025-11-04T13:40:15.672Z" },
+ { url = "https://files.pythonhosted.org/packages/cc/6a/ebf4b1d65d458f3cda6a7335d141305dfa19bdc61140a884d165a8a1bbc7/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:bfea2a5f0b4d8d43adf9d7b8bf019fb46fdd10a2e5cde477fbcb9d1fa08c68e1", size = 2319068, upload-time = "2025-11-04T13:40:17.532Z" },
+ { url = "https://files.pythonhosted.org/packages/49/3b/774f2b5cd4192d5ab75870ce4381fd89cf218af999515baf07e7206753f0/pydantic_core-2.41.5-cp312-cp312-win32.whl", hash = "sha256:b74557b16e390ec12dca509bce9264c3bbd128f8a2c376eaa68003d7f327276d", size = 1985908, upload-time = "2025-11-04T13:40:19.309Z" },
+ { url = "https://files.pythonhosted.org/packages/86/45/00173a033c801cacf67c190fef088789394feaf88a98a7035b0e40d53dc9/pydantic_core-2.41.5-cp312-cp312-win_amd64.whl", hash = "sha256:1962293292865bca8e54702b08a4f26da73adc83dd1fcf26fbc875b35d81c815", size = 2020145, upload-time = "2025-11-04T13:40:21.548Z" },
+ { url = "https://files.pythonhosted.org/packages/f9/22/91fbc821fa6d261b376a3f73809f907cec5ca6025642c463d3488aad22fb/pydantic_core-2.41.5-cp312-cp312-win_arm64.whl", hash = "sha256:1746d4a3d9a794cacae06a5eaaccb4b8643a131d45fbc9af23e353dc0a5ba5c3", size = 1976179, upload-time = "2025-11-04T13:40:23.393Z" },
+ { url = "https://files.pythonhosted.org/packages/09/32/59b0c7e63e277fa7911c2fc70ccfb45ce4b98991e7ef37110663437005af/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-macosx_10_12_x86_64.whl", hash = "sha256:7da7087d756b19037bc2c06edc6c170eeef3c3bafcb8f532ff17d64dc427adfd", size = 2110495, upload-time = "2025-11-04T13:42:49.689Z" },
+ { url = "https://files.pythonhosted.org/packages/aa/81/05e400037eaf55ad400bcd318c05bb345b57e708887f07ddb2d20e3f0e98/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-macosx_11_0_arm64.whl", hash = "sha256:aabf5777b5c8ca26f7824cb4a120a740c9588ed58df9b2d196ce92fba42ff8dc", size = 1915388, upload-time = "2025-11-04T13:42:52.215Z" },
+ { url = "https://files.pythonhosted.org/packages/6e/0d/e3549b2399f71d56476b77dbf3cf8937cec5cd70536bdc0e374a421d0599/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c007fe8a43d43b3969e8469004e9845944f1a80e6acd47c150856bb87f230c56", size = 1942879, upload-time = "2025-11-04T13:42:56.483Z" },
+ { url = "https://files.pythonhosted.org/packages/f7/07/34573da085946b6a313d7c42f82f16e8920bfd730665de2d11c0c37a74b5/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:76d0819de158cd855d1cbb8fcafdf6f5cf1eb8e470abe056d5d161106e38062b", size = 2139017, upload-time = "2025-11-04T13:42:59.471Z" },
+]
+
+[[package]]
+name = "pydantic-extra-types"
+version = "2.11.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "pydantic" },
+ { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/fd/35/2fee58b1316a73e025728583d3b1447218a97e621933fc776fb8c0f2ebdd/pydantic_extra_types-2.11.0.tar.gz", hash = "sha256:4e9991959d045b75feb775683437a97991d02c138e00b59176571db9ce634f0e", size = 157226, upload-time = "2025-12-31T16:18:27.944Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/fe/17/fabd56da47096d240dd45ba627bead0333b0cf0ee8ada9bec579287dadf3/pydantic_extra_types-2.11.0-py3-none-any.whl", hash = "sha256:84b864d250a0fc62535b7ec591e36f2c5b4d1325fa0017eb8cda9aeb63b374a6", size = 74296, upload-time = "2025-12-31T16:18:26.38Z" },
+]
+
+[package.optional-dependencies]
+pycountry = [
+ { name = "pycountry" },
+]
+
+[[package]]
+name = "pydantic-settings"
+version = "2.13.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "pydantic" },
+ { name = "python-dotenv" },
+ { name = "typing-inspection" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/52/6d/fffca34caecc4a3f97bda81b2098da5e8ab7efc9a66e819074a11955d87e/pydantic_settings-2.13.1.tar.gz", hash = "sha256:b4c11847b15237fb0171e1462bf540e294affb9b86db4d9aa5c01730bdbe4025", size = 223826, upload-time = "2026-02-19T13:45:08.055Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/00/4b/ccc026168948fec4f7555b9164c724cf4125eac006e176541483d2c959be/pydantic_settings-2.13.1-py3-none-any.whl", hash = "sha256:d56fd801823dbeae7f0975e1f8c8e25c258eb75d278ea7abb5d9cebb01b56237", size = 58929, upload-time = "2026-02-19T13:45:06.034Z" },
+]
+
+[[package]]
+name = "pygments"
+version = "2.19.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/b0/77/a5b8c569bf593b0140bde72ea885a803b82086995367bf2037de0159d924/pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887", size = 4968631, upload-time = "2025-06-21T13:39:12.283Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" },
+]
+
+[[package]]
+name = "pyjwt"
+version = "2.11.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/5c/5a/b46fa56bf322901eee5b0454a34343cdbdae202cd421775a8ee4e42fd519/pyjwt-2.11.0.tar.gz", hash = "sha256:35f95c1f0fbe5d5ba6e43f00271c275f7a1a4db1dab27bf708073b75318ea623", size = 98019, upload-time = "2026-01-30T19:59:55.694Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/6f/01/c26ce75ba460d5cd503da9e13b21a33804d38c2165dec7b716d06b13010c/pyjwt-2.11.0-py3-none-any.whl", hash = "sha256:94a6bde30eb5c8e04fee991062b534071fd1439ef58d2adc9ccb823e7bcd0469", size = 28224, upload-time = "2026-01-30T19:59:54.539Z" },
+]
+
+[package.optional-dependencies]
+crypto = [
+ { name = "cryptography" },
+]
+
+[[package]]
+name = "pyparsing"
+version = "3.3.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/f3/91/9c6ee907786a473bf81c5f53cf703ba0957b23ab84c264080fb5a450416f/pyparsing-3.3.2.tar.gz", hash = "sha256:c777f4d763f140633dcb6d8a3eda953bf7a214dc4eff598413c070bcdc117cbc", size = 6851574, upload-time = "2026-01-21T03:57:59.36Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/10/bd/c038d7cc38edc1aa5bf91ab8068b63d4308c66c4c8bb3cbba7dfbc049f9c/pyparsing-3.3.2-py3-none-any.whl", hash = "sha256:850ba148bd908d7e2411587e247a1e4f0327839c40e2e5e6d05a007ecc69911d", size = 122781, upload-time = "2026-01-21T03:57:55.912Z" },
+]
+
+[[package]]
+name = "python-dateutil"
+version = "2.9.0.post0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "six" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/66/c0/0c8b6ad9f17a802ee498c46e004a0eb49bc148f2fd230864601a86dcf6db/python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3", size = 342432, upload-time = "2024-03-01T18:36:20.211Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892, upload-time = "2024-03-01T18:36:18.57Z" },
+]
+
+[[package]]
+name = "python-dotenv"
+version = "1.2.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/f0/26/19cadc79a718c5edbec86fd4919a6b6d3f681039a2f6d66d14be94e75fb9/python_dotenv-1.2.1.tar.gz", hash = "sha256:42667e897e16ab0d66954af0e60a9caa94f0fd4ecf3aaf6d2d260eec1aa36ad6", size = 44221, upload-time = "2025-10-26T15:12:10.434Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/14/1b/a298b06749107c305e1fe0f814c6c74aea7b2f1e10989cb30f544a1b3253/python_dotenv-1.2.1-py3-none-any.whl", hash = "sha256:b81ee9561e9ca4004139c6cbba3a238c32b03e4894671e181b671e8cb8425d61", size = 21230, upload-time = "2025-10-26T15:12:09.109Z" },
+]
+
+[[package]]
+name = "python-json-logger"
+version = "4.0.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/29/bf/eca6a3d43db1dae7070f70e160ab20b807627ba953663ba07928cdd3dc58/python_json_logger-4.0.0.tar.gz", hash = "sha256:f58e68eb46e1faed27e0f574a55a0455eecd7b8a5b88b85a784519ba3cff047f", size = 17683, upload-time = "2025-10-06T04:15:18.984Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/51/e5/fecf13f06e5e5f67e8837d777d1bc43fac0ed2b77a676804df5c34744727/python_json_logger-4.0.0-py3-none-any.whl", hash = "sha256:af09c9daf6a813aa4cc7180395f50f2a9e5fa056034c9953aec92e381c5ba1e2", size = 15548, upload-time = "2025-10-06T04:15:17.553Z" },
+]
+
+[[package]]
+name = "python-multipart"
+version = "0.0.22"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/94/01/979e98d542a70714b0cb2b6728ed0b7c46792b695e3eaec3e20711271ca3/python_multipart-0.0.22.tar.gz", hash = "sha256:7340bef99a7e0032613f56dc36027b959fd3b30a787ed62d310e951f7c3a3a58", size = 37612, upload-time = "2026-01-25T10:15:56.219Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/1b/d0/397f9626e711ff749a95d96b7af99b9c566a9bb5129b8e4c10fc4d100304/python_multipart-0.0.22-py3-none-any.whl", hash = "sha256:2b2cd894c83d21bf49d702499531c7bafd057d730c201782048f7945d82de155", size = 24579, upload-time = "2026-01-25T10:15:54.811Z" },
+]
+
+[[package]]
+name = "pywin32"
+version = "311"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/e7/ab/01ea1943d4eba0f850c3c61e78e8dd59757ff815ff3ccd0a84de5f541f42/pywin32-311-cp312-cp312-win32.whl", hash = "sha256:750ec6e621af2b948540032557b10a2d43b0cee2ae9758c54154d711cc852d31", size = 8706543, upload-time = "2025-07-14T20:13:20.765Z" },
+ { url = "https://files.pythonhosted.org/packages/d1/a8/a0e8d07d4d051ec7502cd58b291ec98dcc0c3fff027caad0470b72cfcc2f/pywin32-311-cp312-cp312-win_amd64.whl", hash = "sha256:b8c095edad5c211ff31c05223658e71bf7116daa0ecf3ad85f3201ea3190d067", size = 9495040, upload-time = "2025-07-14T20:13:22.543Z" },
+ { url = "https://files.pythonhosted.org/packages/ba/3a/2ae996277b4b50f17d61f0603efd8253cb2d79cc7ae159468007b586396d/pywin32-311-cp312-cp312-win_arm64.whl", hash = "sha256:e286f46a9a39c4a18b319c28f59b61de793654af2f395c102b4f819e584b5852", size = 8710102, upload-time = "2025-07-14T20:13:24.682Z" },
+]
+
+[[package]]
+name = "pyyaml"
+version = "6.0.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/05/8e/961c0007c59b8dd7729d542c61a4d537767a59645b82a0b521206e1e25c2/pyyaml-6.0.3.tar.gz", hash = "sha256:d76623373421df22fb4cf8817020cbb7ef15c725b9d5e45f17e189bfc384190f", size = 130960, upload-time = "2025-09-25T21:33:16.546Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/d1/33/422b98d2195232ca1826284a76852ad5a86fe23e31b009c9886b2d0fb8b2/pyyaml-6.0.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7f047e29dcae44602496db43be01ad42fc6f1cc0d8cd6c83d342306c32270196", size = 182063, upload-time = "2025-09-25T21:32:11.445Z" },
+ { url = "https://files.pythonhosted.org/packages/89/a0/6cf41a19a1f2f3feab0e9c0b74134aa2ce6849093d5517a0c550fe37a648/pyyaml-6.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:fc09d0aa354569bc501d4e787133afc08552722d3ab34836a80547331bb5d4a0", size = 173973, upload-time = "2025-09-25T21:32:12.492Z" },
+ { url = "https://files.pythonhosted.org/packages/ed/23/7a778b6bd0b9a8039df8b1b1d80e2e2ad78aa04171592c8a5c43a56a6af4/pyyaml-6.0.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9149cad251584d5fb4981be1ecde53a1ca46c891a79788c0df828d2f166bda28", size = 775116, upload-time = "2025-09-25T21:32:13.652Z" },
+ { url = "https://files.pythonhosted.org/packages/65/30/d7353c338e12baef4ecc1b09e877c1970bd3382789c159b4f89d6a70dc09/pyyaml-6.0.3-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5fdec68f91a0c6739b380c83b951e2c72ac0197ace422360e6d5a959d8d97b2c", size = 844011, upload-time = "2025-09-25T21:32:15.21Z" },
+ { url = "https://files.pythonhosted.org/packages/8b/9d/b3589d3877982d4f2329302ef98a8026e7f4443c765c46cfecc8858c6b4b/pyyaml-6.0.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ba1cc08a7ccde2d2ec775841541641e4548226580ab850948cbfda66a1befcdc", size = 807870, upload-time = "2025-09-25T21:32:16.431Z" },
+ { url = "https://files.pythonhosted.org/packages/05/c0/b3be26a015601b822b97d9149ff8cb5ead58c66f981e04fedf4e762f4bd4/pyyaml-6.0.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8dc52c23056b9ddd46818a57b78404882310fb473d63f17b07d5c40421e47f8e", size = 761089, upload-time = "2025-09-25T21:32:17.56Z" },
+ { url = "https://files.pythonhosted.org/packages/be/8e/98435a21d1d4b46590d5459a22d88128103f8da4c2d4cb8f14f2a96504e1/pyyaml-6.0.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:41715c910c881bc081f1e8872880d3c650acf13dfa8214bad49ed4cede7c34ea", size = 790181, upload-time = "2025-09-25T21:32:18.834Z" },
+ { url = "https://files.pythonhosted.org/packages/74/93/7baea19427dcfbe1e5a372d81473250b379f04b1bd3c4c5ff825e2327202/pyyaml-6.0.3-cp312-cp312-win32.whl", hash = "sha256:96b533f0e99f6579b3d4d4995707cf36df9100d67e0c8303a0c55b27b5f99bc5", size = 137658, upload-time = "2025-09-25T21:32:20.209Z" },
+ { url = "https://files.pythonhosted.org/packages/86/bf/899e81e4cce32febab4fb42bb97dcdf66bc135272882d1987881a4b519e9/pyyaml-6.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:5fcd34e47f6e0b794d17de1b4ff496c00986e1c83f7ab2fb8fcfe9616ff7477b", size = 154003, upload-time = "2025-09-25T21:32:21.167Z" },
+ { url = "https://files.pythonhosted.org/packages/1a/08/67bd04656199bbb51dbed1439b7f27601dfb576fb864099c7ef0c3e55531/pyyaml-6.0.3-cp312-cp312-win_arm64.whl", hash = "sha256:64386e5e707d03a7e172c0701abfb7e10f0fb753ee1d773128192742712a98fd", size = 140344, upload-time = "2025-09-25T21:32:22.617Z" },
+]
+
+[[package]]
+name = "pyzmq"
+version = "27.1.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "cffi", marker = "implementation_name == 'pypy'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/04/0b/3c9baedbdf613ecaa7aa07027780b8867f57b6293b6ee50de316c9f3222b/pyzmq-27.1.0.tar.gz", hash = "sha256:ac0765e3d44455adb6ddbf4417dcce460fc40a05978c08efdf2948072f6db540", size = 281750, upload-time = "2025-09-08T23:10:18.157Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/92/e7/038aab64a946d535901103da16b953c8c9cc9c961dadcbf3609ed6428d23/pyzmq-27.1.0-cp312-abi3-macosx_10_15_universal2.whl", hash = "sha256:452631b640340c928fa343801b0d07eb0c3789a5ffa843f6e1a9cee0ba4eb4fc", size = 1306279, upload-time = "2025-09-08T23:08:03.807Z" },
+ { url = "https://files.pythonhosted.org/packages/e8/5e/c3c49fdd0f535ef45eefcc16934648e9e59dace4a37ee88fc53f6cd8e641/pyzmq-27.1.0-cp312-abi3-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:1c179799b118e554b66da67d88ed66cd37a169f1f23b5d9f0a231b4e8d44a113", size = 895645, upload-time = "2025-09-08T23:08:05.301Z" },
+ { url = "https://files.pythonhosted.org/packages/f8/e5/b0b2504cb4e903a74dcf1ebae157f9e20ebb6ea76095f6cfffea28c42ecd/pyzmq-27.1.0-cp312-abi3-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3837439b7f99e60312f0c926a6ad437b067356dc2bc2ec96eb395fd0fe804233", size = 652574, upload-time = "2025-09-08T23:08:06.828Z" },
+ { url = "https://files.pythonhosted.org/packages/f8/9b/c108cdb55560eaf253f0cbdb61b29971e9fb34d9c3499b0e96e4e60ed8a5/pyzmq-27.1.0-cp312-abi3-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:43ad9a73e3da1fab5b0e7e13402f0b2fb934ae1c876c51d0afff0e7c052eca31", size = 840995, upload-time = "2025-09-08T23:08:08.396Z" },
+ { url = "https://files.pythonhosted.org/packages/c2/bb/b79798ca177b9eb0825b4c9998c6af8cd2a7f15a6a1a4272c1d1a21d382f/pyzmq-27.1.0-cp312-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:0de3028d69d4cdc475bfe47a6128eb38d8bc0e8f4d69646adfbcd840facbac28", size = 1642070, upload-time = "2025-09-08T23:08:09.989Z" },
+ { url = "https://files.pythonhosted.org/packages/9c/80/2df2e7977c4ede24c79ae39dcef3899bfc5f34d1ca7a5b24f182c9b7a9ca/pyzmq-27.1.0-cp312-abi3-musllinux_1_2_i686.whl", hash = "sha256:cf44a7763aea9298c0aa7dbf859f87ed7012de8bda0f3977b6fb1d96745df856", size = 2021121, upload-time = "2025-09-08T23:08:11.907Z" },
+ { url = "https://files.pythonhosted.org/packages/46/bd/2d45ad24f5f5ae7e8d01525eb76786fa7557136555cac7d929880519e33a/pyzmq-27.1.0-cp312-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:f30f395a9e6fbca195400ce833c731e7b64c3919aa481af4d88c3759e0cb7496", size = 1878550, upload-time = "2025-09-08T23:08:13.513Z" },
+ { url = "https://files.pythonhosted.org/packages/e6/2f/104c0a3c778d7c2ab8190e9db4f62f0b6957b53c9d87db77c284b69f33ea/pyzmq-27.1.0-cp312-abi3-win32.whl", hash = "sha256:250e5436a4ba13885494412b3da5d518cd0d3a278a1ae640e113c073a5f88edd", size = 559184, upload-time = "2025-09-08T23:08:15.163Z" },
+ { url = "https://files.pythonhosted.org/packages/fc/7f/a21b20d577e4100c6a41795842028235998a643b1ad406a6d4163ea8f53e/pyzmq-27.1.0-cp312-abi3-win_amd64.whl", hash = "sha256:9ce490cf1d2ca2ad84733aa1d69ce6855372cb5ce9223802450c9b2a7cba0ccf", size = 619480, upload-time = "2025-09-08T23:08:17.192Z" },
+ { url = "https://files.pythonhosted.org/packages/78/c2/c012beae5f76b72f007a9e91ee9401cb88c51d0f83c6257a03e785c81cc2/pyzmq-27.1.0-cp312-abi3-win_arm64.whl", hash = "sha256:75a2f36223f0d535a0c919e23615fc85a1e23b71f40c7eb43d7b1dedb4d8f15f", size = 552993, upload-time = "2025-09-08T23:08:18.926Z" },
+]
+
+[[package]]
+name = "quack-kernels"
+version = "0.2.10"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "apache-tvm-ffi" },
+ { name = "nvidia-cutlass-dsl" },
+ { name = "torch" },
+ { name = "torch-c-dlpack-ext" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/23/28/231c862500fec531080cc733e5766b46518edaefe0a068d46b276c380a25/quack_kernels-0.2.10.tar.gz", hash = "sha256:df86e981ea76542467ae2cd9ac606d587658e8d648a51c34dc0f2913a3e26bf6", size = 161102, upload-time = "2026-02-18T22:20:50.17Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/10/ac/8f70ddd397aff1d606d7aa6fbe857a2bc58a817965099cd97d91264175a8/quack_kernels-0.2.10-py3-none-any.whl", hash = "sha256:a5b604c5cf28d9e601aae00488b6b603bb4060ccab8409a4443e72a649226f74", size = 165298, upload-time = "2026-02-18T22:20:48.978Z" },
+]
+
+[[package]]
+name = "ray"
+version = "2.54.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "click" },
+ { name = "filelock" },
+ { name = "jsonschema" },
+ { name = "msgpack" },
+ { name = "packaging" },
+ { name = "protobuf" },
+ { name = "pyyaml" },
+ { name = "requests" },
+]
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/0e/16/45eefb51eb1767342a6dbf41af0b432279e422e56160705fcd1098a7ec53/ray-2.54.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:cf5c33b4b13850ec24a5bd5f9d9e0a8161f8e586bfd297e52913d170dec447fe", size = 70084880, upload-time = "2026-02-18T04:05:22.007Z" },
+ { url = "https://files.pythonhosted.org/packages/60/ad/e07aca3637e9c3ec4857ec4366208099cf8488ece8061a9925ba29b66382/ray-2.54.0-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:795ae21d6b764245d3f521bc5833446d58569e7dfde9c5777417eb285d87450f", size = 72107346, upload-time = "2026-02-18T04:05:27.999Z" },
+ { url = "https://files.pythonhosted.org/packages/9e/b9/cc5ea8460c3dc602e6b7198277a7c59ba2b8929374ab22efa8df9f3deac8/ray-2.54.0-cp312-cp312-manylinux2014_x86_64.whl", hash = "sha256:a972afd5aa3dda99d0b2f369b5f62e5dd95865ab7d37bf2e0a0e0d2cfbd9b325", size = 72967230, upload-time = "2026-02-18T04:05:33.771Z" },
+ { url = "https://files.pythonhosted.org/packages/de/d7/744de3b1bb881701330ddcbb2f6efaccd65915d564ece899a3838f9fb105/ray-2.54.0-cp312-cp312-win_amd64.whl", hash = "sha256:2ee074ede491d0aacfa339c003f5d7a15826e1e2a72ce873234ccbc0446e19b3", size = 27427353, upload-time = "2026-02-18T04:05:38.853Z" },
+]
+
+[package.optional-dependencies]
+cgraph = [
+ { name = "cupy-cuda12x", marker = "sys_platform != 'darwin'" },
+]
+
+[[package]]
+name = "referencing"
+version = "0.37.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "attrs" },
+ { name = "rpds-py" },
+ { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/22/f5/df4e9027acead3ecc63e50fe1e36aca1523e1719559c499951bb4b53188f/referencing-0.37.0.tar.gz", hash = "sha256:44aefc3142c5b842538163acb373e24cce6632bd54bdb01b21ad5863489f50d8", size = 78036, upload-time = "2025-10-13T15:30:48.871Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/2c/58/ca301544e1fa93ed4f80d724bf5b194f6e4b945841c5bfd555878eea9fcb/referencing-0.37.0-py3-none-any.whl", hash = "sha256:381329a9f99628c9069361716891d34ad94af76e461dcb0335825aecc7692231", size = 26766, upload-time = "2025-10-13T15:30:47.625Z" },
+]
+
+[[package]]
+name = "regex"
+version = "2026.2.28"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/8b/71/41455aa99a5a5ac1eaf311f5d8efd9ce6433c03ac1e0962de163350d0d97/regex-2026.2.28.tar.gz", hash = "sha256:a729e47d418ea11d03469f321aaf67cdee8954cde3ff2cf8403ab87951ad10f2", size = 415184, upload-time = "2026-02-28T02:19:42.792Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/07/42/9061b03cf0fc4b5fa2c3984cbbaed54324377e440a5c5a29d29a72518d62/regex-2026.2.28-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:fcf26c3c6d0da98fada8ae4ef0aa1c3405a431c0a77eb17306d38a89b02adcd7", size = 489574, upload-time = "2026-02-28T02:16:50.455Z" },
+ { url = "https://files.pythonhosted.org/packages/77/83/0c8a5623a233015595e3da499c5a1c13720ac63c107897a6037bb97af248/regex-2026.2.28-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:02473c954af35dd2defeb07e44182f5705b30ea3f351a7cbffa9177beb14da5d", size = 291426, upload-time = "2026-02-28T02:16:52.52Z" },
+ { url = "https://files.pythonhosted.org/packages/9e/06/3ef1ac6910dc3295ebd71b1f9bfa737e82cfead211a18b319d45f85ddd09/regex-2026.2.28-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:9b65d33a17101569f86d9c5966a8b1d7fbf8afdda5a8aa219301b0a80f58cf7d", size = 289200, upload-time = "2026-02-28T02:16:54.08Z" },
+ { url = "https://files.pythonhosted.org/packages/dd/c9/8cc8d850b35ab5650ff6756a1cb85286e2000b66c97520b29c1587455344/regex-2026.2.28-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e71dcecaa113eebcc96622c17692672c2d104b1d71ddf7adeda90da7ddeb26fc", size = 796765, upload-time = "2026-02-28T02:16:55.905Z" },
+ { url = "https://files.pythonhosted.org/packages/e9/5d/57702597627fc23278ebf36fbb497ac91c0ce7fec89ac6c81e420ca3e38c/regex-2026.2.28-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:481df4623fa4969c8b11f3433ed7d5e3dc9cec0f008356c3212b3933fb77e3d8", size = 863093, upload-time = "2026-02-28T02:16:58.094Z" },
+ { url = "https://files.pythonhosted.org/packages/02/6d/f3ecad537ca2811b4d26b54ca848cf70e04fcfc138667c146a9f3157779c/regex-2026.2.28-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:64e7c6ad614573e0640f271e811a408d79a9e1fe62a46adb602f598df42a818d", size = 909455, upload-time = "2026-02-28T02:17:00.918Z" },
+ { url = "https://files.pythonhosted.org/packages/9e/40/bb226f203caa22c1043c1ca79b36340156eca0f6a6742b46c3bb222a3a57/regex-2026.2.28-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d6b08a06976ff4fb0d83077022fde3eca06c55432bb997d8c0495b9a4e9872f4", size = 802037, upload-time = "2026-02-28T02:17:02.842Z" },
+ { url = "https://files.pythonhosted.org/packages/44/7c/c6d91d8911ac6803b45ca968e8e500c46934e58c0903cbc6d760ee817a0a/regex-2026.2.28-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:864cdd1a2ef5716b0ab468af40139e62ede1b3a53386b375ec0786bb6783fc05", size = 775113, upload-time = "2026-02-28T02:17:04.506Z" },
+ { url = "https://files.pythonhosted.org/packages/dc/8d/4a9368d168d47abd4158580b8c848709667b1cd293ff0c0c277279543bd0/regex-2026.2.28-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:511f7419f7afab475fd4d639d4aedfc54205bcb0800066753ef68a59f0f330b5", size = 784194, upload-time = "2026-02-28T02:17:06.888Z" },
+ { url = "https://files.pythonhosted.org/packages/cc/bf/2c72ab5d8b7be462cb1651b5cc333da1d0068740342f350fcca3bca31947/regex-2026.2.28-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:b42f7466e32bf15a961cf09f35fa6323cc72e64d3d2c990b10de1274a5da0a59", size = 856846, upload-time = "2026-02-28T02:17:09.11Z" },
+ { url = "https://files.pythonhosted.org/packages/7c/f4/6b65c979bb6d09f51bb2d2a7bc85de73c01ec73335d7ddd202dcb8cd1c8f/regex-2026.2.28-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:8710d61737b0c0ce6836b1da7109f20d495e49b3809f30e27e9560be67a257bf", size = 763516, upload-time = "2026-02-28T02:17:11.004Z" },
+ { url = "https://files.pythonhosted.org/packages/8e/32/29ea5e27400ee86d2cc2b4e80aa059df04eaf78b4f0c18576ae077aeff68/regex-2026.2.28-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:4390c365fd2d45278f45afd4673cb90f7285f5701607e3ad4274df08e36140ae", size = 849278, upload-time = "2026-02-28T02:17:12.693Z" },
+ { url = "https://files.pythonhosted.org/packages/1d/91/3233d03b5f865111cd517e1c95ee8b43e8b428d61fa73764a80c9bb6f537/regex-2026.2.28-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:cb3b1db8ff6c7b8bf838ab05583ea15230cb2f678e569ab0e3a24d1e8320940b", size = 790068, upload-time = "2026-02-28T02:17:14.9Z" },
+ { url = "https://files.pythonhosted.org/packages/76/92/abc706c1fb03b4580a09645b206a3fc032f5a9f457bc1a8038ac555658ab/regex-2026.2.28-cp312-cp312-win32.whl", hash = "sha256:f8ed9a5d4612df9d4de15878f0bc6aa7a268afbe5af21a3fdd97fa19516e978c", size = 266416, upload-time = "2026-02-28T02:17:17.15Z" },
+ { url = "https://files.pythonhosted.org/packages/fa/06/2a6f7dff190e5fa9df9fb4acf2fdf17a1aa0f7f54596cba8de608db56b3a/regex-2026.2.28-cp312-cp312-win_amd64.whl", hash = "sha256:01d65fd24206c8e1e97e2e31b286c59009636c022eb5d003f52760b0f42155d4", size = 277297, upload-time = "2026-02-28T02:17:18.723Z" },
+ { url = "https://files.pythonhosted.org/packages/b7/f0/58a2484851fadf284458fdbd728f580d55c1abac059ae9f048c63b92f427/regex-2026.2.28-cp312-cp312-win_arm64.whl", hash = "sha256:c0b5ccbb8ffb433939d248707d4a8b31993cb76ab1a0187ca886bf50e96df952", size = 270408, upload-time = "2026-02-28T02:17:20.328Z" },
+]
+
+[[package]]
+name = "requests"
+version = "2.32.5"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "certifi" },
+ { name = "charset-normalizer" },
+ { name = "idna" },
+ { name = "urllib3" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/c9/74/b3ff8e6c8446842c3f5c837e9c3dfcfe2018ea6ecef224c710c85ef728f4/requests-2.32.5.tar.gz", hash = "sha256:dbba0bac56e100853db0ea71b82b4dfd5fe2bf6d3754a8893c3af500cec7d7cf", size = 134517, upload-time = "2025-08-18T20:46:02.573Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/1e/db/4254e3eabe8020b458f1a747140d32277ec7a271daf1d235b70dc0b4e6e3/requests-2.32.5-py3-none-any.whl", hash = "sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6", size = 64738, upload-time = "2025-08-18T20:46:00.542Z" },
+]
+
+[[package]]
+name = "rich"
+version = "14.3.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "markdown-it-py" },
+ { name = "pygments" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/b3/c6/f3b320c27991c46f43ee9d856302c70dc2d0fb2dba4842ff739d5f46b393/rich-14.3.3.tar.gz", hash = "sha256:b8daa0b9e4eef54dd8cf7c86c03713f53241884e814f4e2f5fb342fe520f639b", size = 230582, upload-time = "2026-02-19T17:23:12.474Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/14/25/b208c5683343959b670dc001595f2f3737e051da617f66c31f7c4fa93abc/rich-14.3.3-py3-none-any.whl", hash = "sha256:793431c1f8619afa7d3b52b2cdec859562b950ea0d4b6b505397612db8d5362d", size = 310458, upload-time = "2026-02-19T17:23:13.732Z" },
+]
+
+[[package]]
+name = "rich-toolkit"
+version = "0.19.7"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "click" },
+ { name = "rich" },
+ { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/42/ba/dae9e3096651042754da419a4042bc1c75e07d615f9b15066d738838e4df/rich_toolkit-0.19.7.tar.gz", hash = "sha256:133c0915872da91d4c25d85342d5ec1dfacc69b63448af1a08a0d4b4f23ef46e", size = 195877, upload-time = "2026-02-24T16:06:20.555Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/fb/3c/c923619f6d2f5fafcc96fec0aaf9550a46cd5b6481f06e0c6b66a2a4fed0/rich_toolkit-0.19.7-py3-none-any.whl", hash = "sha256:0288e9203728c47c5a4eb60fd2f0692d9df7455a65901ab6f898437a2ba5989d", size = 32963, upload-time = "2026-02-24T16:06:22.066Z" },
+]
+
+[[package]]
+name = "rignore"
+version = "0.7.6"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/e5/f5/8bed2310abe4ae04b67a38374a4d311dd85220f5d8da56f47ae9361be0b0/rignore-0.7.6.tar.gz", hash = "sha256:00d3546cd793c30cb17921ce674d2c8f3a4b00501cb0e3dd0e82217dbeba2671", size = 57140, upload-time = "2025-11-05T21:41:21.968Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/0b/0e/012556ef3047a2628842b44e753bb15f4dc46806780ff090f1e8fe4bf1eb/rignore-0.7.6-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:03e82348cb7234f8d9b2834f854400ddbbd04c0f8f35495119e66adbd37827a8", size = 883488, upload-time = "2025-11-05T20:42:41.359Z" },
+ { url = "https://files.pythonhosted.org/packages/93/b0/d4f1f3fe9eb3f8e382d45ce5b0547ea01c4b7e0b4b4eb87bcd66a1d2b888/rignore-0.7.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b9e624f6be6116ea682e76c5feb71ea91255c67c86cb75befe774365b2931961", size = 820411, upload-time = "2025-11-05T20:42:24.782Z" },
+ { url = "https://files.pythonhosted.org/packages/4a/c8/dea564b36dedac8de21c18e1851789545bc52a0c22ece9843444d5608a6a/rignore-0.7.6-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bda49950d405aa8d0ebe26af807c4e662dd281d926530f03f29690a2e07d649a", size = 897821, upload-time = "2025-11-05T20:40:52.613Z" },
+ { url = "https://files.pythonhosted.org/packages/b3/2b/ee96db17ac1835e024c5d0742eefb7e46de60020385ac883dd3d1cde2c1f/rignore-0.7.6-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b5fd5ab3840b8c16851d327ed06e9b8be6459702a53e5ab1fc4073b684b3789e", size = 873963, upload-time = "2025-11-05T20:41:07.49Z" },
+ { url = "https://files.pythonhosted.org/packages/a5/8c/ad5a57bbb9d14d5c7e5960f712a8a0b902472ea3f4a2138cbf70d1777b75/rignore-0.7.6-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ced2a248352636a5c77504cb755dc02c2eef9a820a44d3f33061ce1bb8a7f2d2", size = 1169216, upload-time = "2025-11-05T20:41:23.73Z" },
+ { url = "https://files.pythonhosted.org/packages/80/e6/5b00bc2a6bc1701e6878fca798cf5d9125eb3113193e33078b6fc0d99123/rignore-0.7.6-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a04a3b73b75ddc12c9c9b21efcdaab33ca3832941d6f1d67bffd860941cd448a", size = 942942, upload-time = "2025-11-05T20:41:39.393Z" },
+ { url = "https://files.pythonhosted.org/packages/85/e5/7f99bd0cc9818a91d0e8b9acc65b792e35750e3bdccd15a7ee75e64efca4/rignore-0.7.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d24321efac92140b7ec910ac7c53ab0f0c86a41133d2bb4b0e6a7c94967f44dd", size = 959787, upload-time = "2025-11-05T20:42:09.765Z" },
+ { url = "https://files.pythonhosted.org/packages/55/54/2ffea79a7c1eabcede1926347ebc2a81bc6b81f447d05b52af9af14948b9/rignore-0.7.6-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:73c7aa109d41e593785c55fdaa89ad80b10330affa9f9d3e3a51fa695f739b20", size = 984245, upload-time = "2025-11-05T20:41:54.062Z" },
+ { url = "https://files.pythonhosted.org/packages/41/f7/e80f55dfe0f35787fa482aa18689b9c8251e045076c35477deb0007b3277/rignore-0.7.6-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:1734dc49d1e9501b07852ef44421f84d9f378da9fbeda729e77db71f49cac28b", size = 1078647, upload-time = "2025-11-05T21:40:13.463Z" },
+ { url = "https://files.pythonhosted.org/packages/d4/cf/2c64f0b6725149f7c6e7e5a909d14354889b4beaadddaa5fff023ec71084/rignore-0.7.6-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:5719ea14ea2b652c0c0894be5dfde954e1853a80dea27dd2fbaa749618d837f5", size = 1139186, upload-time = "2025-11-05T21:40:31.27Z" },
+ { url = "https://files.pythonhosted.org/packages/75/95/a86c84909ccc24af0d094b50d54697951e576c252a4d9f21b47b52af9598/rignore-0.7.6-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:8e23424fc7ce35726854f639cb7968151a792c0c3d9d082f7f67e0c362cfecca", size = 1117604, upload-time = "2025-11-05T21:40:48.07Z" },
+ { url = "https://files.pythonhosted.org/packages/7f/5e/13b249613fd5d18d58662490ab910a9f0be758981d1797789913adb4e918/rignore-0.7.6-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3efdcf1dd84d45f3e2bd2f93303d9be103888f56dfa7c3349b5bf4f0657ec696", size = 1127725, upload-time = "2025-11-05T21:41:05.804Z" },
+ { url = "https://files.pythonhosted.org/packages/c7/28/fa5dcd1e2e16982c359128664e3785f202d3eca9b22dd0b2f91c4b3d242f/rignore-0.7.6-cp312-cp312-win32.whl", hash = "sha256:ccca9d1a8b5234c76b71546fc3c134533b013f40495f394a65614a81f7387046", size = 646145, upload-time = "2025-11-05T21:41:51.096Z" },
+ { url = "https://files.pythonhosted.org/packages/26/87/69387fb5dd81a0f771936381431780b8cf66fcd2cfe9495e1aaf41548931/rignore-0.7.6-cp312-cp312-win_amd64.whl", hash = "sha256:c96a285e4a8bfec0652e0bfcf42b1aabcdda1e7625f5006d188e3b1c87fdb543", size = 726090, upload-time = "2025-11-05T21:41:36.485Z" },
+ { url = "https://files.pythonhosted.org/packages/24/5f/e8418108dcda8087fb198a6f81caadbcda9fd115d61154bf0df4d6d3619b/rignore-0.7.6-cp312-cp312-win_arm64.whl", hash = "sha256:a64a750e7a8277a323f01ca50b7784a764845f6cce2fe38831cb93f0508d0051", size = 656317, upload-time = "2025-11-05T21:41:25.305Z" },
+]
+
+[[package]]
+name = "rpds-py"
+version = "0.30.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/20/af/3f2f423103f1113b36230496629986e0ef7e199d2aa8392452b484b38ced/rpds_py-0.30.0.tar.gz", hash = "sha256:dd8ff7cf90014af0c0f787eea34794ebf6415242ee1d6fa91eaba725cc441e84", size = 69469, upload-time = "2025-11-30T20:24:38.837Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/03/e7/98a2f4ac921d82f33e03f3835f5bf3a4a40aa1bfdc57975e74a97b2b4bdd/rpds_py-0.30.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:a161f20d9a43006833cd7068375a94d035714d73a172b681d8881820600abfad", size = 375086, upload-time = "2025-11-30T20:22:17.93Z" },
+ { url = "https://files.pythonhosted.org/packages/4d/a1/bca7fd3d452b272e13335db8d6b0b3ecde0f90ad6f16f3328c6fb150c889/rpds_py-0.30.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6abc8880d9d036ecaafe709079969f56e876fcf107f7a8e9920ba6d5a3878d05", size = 359053, upload-time = "2025-11-30T20:22:19.297Z" },
+ { url = "https://files.pythonhosted.org/packages/65/1c/ae157e83a6357eceff62ba7e52113e3ec4834a84cfe07fa4b0757a7d105f/rpds_py-0.30.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ca28829ae5f5d569bb62a79512c842a03a12576375d5ece7d2cadf8abe96ec28", size = 390763, upload-time = "2025-11-30T20:22:21.661Z" },
+ { url = "https://files.pythonhosted.org/packages/d4/36/eb2eb8515e2ad24c0bd43c3ee9cd74c33f7ca6430755ccdb240fd3144c44/rpds_py-0.30.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a1010ed9524c73b94d15919ca4d41d8780980e1765babf85f9a2f90d247153dd", size = 408951, upload-time = "2025-11-30T20:22:23.408Z" },
+ { url = "https://files.pythonhosted.org/packages/d6/65/ad8dc1784a331fabbd740ef6f71ce2198c7ed0890dab595adb9ea2d775a1/rpds_py-0.30.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f8d1736cfb49381ba528cd5baa46f82fdc65c06e843dab24dd70b63d09121b3f", size = 514622, upload-time = "2025-11-30T20:22:25.16Z" },
+ { url = "https://files.pythonhosted.org/packages/63/8e/0cfa7ae158e15e143fe03993b5bcd743a59f541f5952e1546b1ac1b5fd45/rpds_py-0.30.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d948b135c4693daff7bc2dcfc4ec57237a29bd37e60c2fabf5aff2bbacf3e2f1", size = 414492, upload-time = "2025-11-30T20:22:26.505Z" },
+ { url = "https://files.pythonhosted.org/packages/60/1b/6f8f29f3f995c7ffdde46a626ddccd7c63aefc0efae881dc13b6e5d5bb16/rpds_py-0.30.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47f236970bccb2233267d89173d3ad2703cd36a0e2a6e92d0560d333871a3d23", size = 394080, upload-time = "2025-11-30T20:22:27.934Z" },
+ { url = "https://files.pythonhosted.org/packages/6d/d5/a266341051a7a3ca2f4b750a3aa4abc986378431fc2da508c5034d081b70/rpds_py-0.30.0-cp312-cp312-manylinux_2_31_riscv64.whl", hash = "sha256:2e6ecb5a5bcacf59c3f912155044479af1d0b6681280048b338b28e364aca1f6", size = 408680, upload-time = "2025-11-30T20:22:29.341Z" },
+ { url = "https://files.pythonhosted.org/packages/10/3b/71b725851df9ab7a7a4e33cf36d241933da66040d195a84781f49c50490c/rpds_py-0.30.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a8fa71a2e078c527c3e9dc9fc5a98c9db40bcc8a92b4e8858e36d329f8684b51", size = 423589, upload-time = "2025-11-30T20:22:31.469Z" },
+ { url = "https://files.pythonhosted.org/packages/00/2b/e59e58c544dc9bd8bd8384ecdb8ea91f6727f0e37a7131baeff8d6f51661/rpds_py-0.30.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:73c67f2db7bc334e518d097c6d1e6fed021bbc9b7d678d6cc433478365d1d5f5", size = 573289, upload-time = "2025-11-30T20:22:32.997Z" },
+ { url = "https://files.pythonhosted.org/packages/da/3e/a18e6f5b460893172a7d6a680e86d3b6bc87a54c1f0b03446a3c8c7b588f/rpds_py-0.30.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:5ba103fb455be00f3b1c2076c9d4264bfcb037c976167a6047ed82f23153f02e", size = 599737, upload-time = "2025-11-30T20:22:34.419Z" },
+ { url = "https://files.pythonhosted.org/packages/5c/e2/714694e4b87b85a18e2c243614974413c60aa107fd815b8cbc42b873d1d7/rpds_py-0.30.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:7cee9c752c0364588353e627da8a7e808a66873672bcb5f52890c33fd965b394", size = 563120, upload-time = "2025-11-30T20:22:35.903Z" },
+ { url = "https://files.pythonhosted.org/packages/6f/ab/d5d5e3bcedb0a77f4f613706b750e50a5a3ba1c15ccd3665ecc636c968fd/rpds_py-0.30.0-cp312-cp312-win32.whl", hash = "sha256:1ab5b83dbcf55acc8b08fc62b796ef672c457b17dbd7820a11d6c52c06839bdf", size = 223782, upload-time = "2025-11-30T20:22:37.271Z" },
+ { url = "https://files.pythonhosted.org/packages/39/3b/f786af9957306fdc38a74cef405b7b93180f481fb48453a114bb6465744a/rpds_py-0.30.0-cp312-cp312-win_amd64.whl", hash = "sha256:a090322ca841abd453d43456ac34db46e8b05fd9b3b4ac0c78bcde8b089f959b", size = 240463, upload-time = "2025-11-30T20:22:39.021Z" },
+ { url = "https://files.pythonhosted.org/packages/f3/d2/b91dc748126c1559042cfe41990deb92c4ee3e2b415f6b5234969ffaf0cc/rpds_py-0.30.0-cp312-cp312-win_arm64.whl", hash = "sha256:669b1805bd639dd2989b281be2cfd951c6121b65e729d9b843e9639ef1fd555e", size = 230868, upload-time = "2025-11-30T20:22:40.493Z" },
+]
+
+[[package]]
+name = "rsa"
+version = "4.9.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "pyasn1" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/da/8a/22b7beea3ee0d44b1916c0c1cb0ee3af23b700b6da9f04991899d0c555d4/rsa-4.9.1.tar.gz", hash = "sha256:e7bdbfdb5497da4c07dfd35530e1a902659db6ff241e39d9953cad06ebd0ae75", size = 29034, upload-time = "2025-04-16T09:51:18.218Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/64/8d/0133e4eb4beed9e425d9a98ed6e081a55d195481b7632472be1af08d2f6b/rsa-4.9.1-py3-none-any.whl", hash = "sha256:68635866661c6836b8d39430f97a996acbd61bfa49406748ea243539fe239762", size = 34696, upload-time = "2025-04-16T09:51:17.142Z" },
+]
+
+[[package]]
+name = "ruff"
+version = "0.15.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/06/04/eab13a954e763b0606f460443fcbf6bb5a0faf06890ea3754ff16523dce5/ruff-0.15.2.tar.gz", hash = "sha256:14b965afee0969e68bb871eba625343b8673375f457af4abe98553e8bbb98342", size = 4558148, upload-time = "2026-02-19T22:32:20.271Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/2f/70/3a4dc6d09b13cb3e695f28307e5d889b2e1a66b7af9c5e257e796695b0e6/ruff-0.15.2-py3-none-linux_armv6l.whl", hash = "sha256:120691a6fdae2f16d65435648160f5b81a9625288f75544dc40637436b5d3c0d", size = 10430565, upload-time = "2026-02-19T22:32:41.824Z" },
+ { url = "https://files.pythonhosted.org/packages/71/0b/bb8457b56185ece1305c666dc895832946d24055be90692381c31d57466d/ruff-0.15.2-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:a89056d831256099658b6bba4037ac6dd06f49d194199215befe2bb10457ea5e", size = 10820354, upload-time = "2026-02-19T22:32:07.366Z" },
+ { url = "https://files.pythonhosted.org/packages/2d/c1/e0532d7f9c9e0b14c46f61b14afd563298b8b83f337b6789ddd987e46121/ruff-0.15.2-py3-none-macosx_11_0_arm64.whl", hash = "sha256:e36dee3a64be0ebd23c86ffa3aa3fd3ac9a712ff295e192243f814a830b6bd87", size = 10170767, upload-time = "2026-02-19T22:32:13.188Z" },
+ { url = "https://files.pythonhosted.org/packages/47/e8/da1aa341d3af017a21c7a62fb5ec31d4e7ad0a93ab80e3a508316efbcb23/ruff-0.15.2-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a9fb47b6d9764677f8c0a193c0943ce9a05d6763523f132325af8a858eadc2b9", size = 10529591, upload-time = "2026-02-19T22:32:02.547Z" },
+ { url = "https://files.pythonhosted.org/packages/93/74/184fbf38e9f3510231fbc5e437e808f0b48c42d1df9434b208821efcd8d6/ruff-0.15.2-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f376990f9d0d6442ea9014b19621d8f2aaf2b8e39fdbfc79220b7f0c596c9b80", size = 10260771, upload-time = "2026-02-19T22:32:36.938Z" },
+ { url = "https://files.pythonhosted.org/packages/05/ac/605c20b8e059a0bc4b42360414baa4892ff278cec1c91fff4be0dceedefd/ruff-0.15.2-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2dcc987551952d73cbf5c88d9fdee815618d497e4df86cd4c4824cc59d5dd75f", size = 11045791, upload-time = "2026-02-19T22:32:31.642Z" },
+ { url = "https://files.pythonhosted.org/packages/fd/52/db6e419908f45a894924d410ac77d64bdd98ff86901d833364251bd08e22/ruff-0.15.2-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:42a47fd785cbe8c01b9ff45031af875d101b040ad8f4de7bbb716487c74c9a77", size = 11879271, upload-time = "2026-02-19T22:32:29.305Z" },
+ { url = "https://files.pythonhosted.org/packages/3e/d8/7992b18f2008bdc9231d0f10b16df7dda964dbf639e2b8b4c1b4e91b83af/ruff-0.15.2-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cbe9f49354866e575b4c6943856989f966421870e85cd2ac94dccb0a9dcb2fea", size = 11303707, upload-time = "2026-02-19T22:32:22.492Z" },
+ { url = "https://files.pythonhosted.org/packages/d7/02/849b46184bcfdd4b64cde61752cc9a146c54759ed036edd11857e9b8443b/ruff-0.15.2-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b7a672c82b5f9887576087d97be5ce439f04bbaf548ee987b92d3a7dede41d3a", size = 11149151, upload-time = "2026-02-19T22:32:44.234Z" },
+ { url = "https://files.pythonhosted.org/packages/70/04/f5284e388bab60d1d3b99614a5a9aeb03e0f333847e2429bebd2aaa1feec/ruff-0.15.2-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:72ecc64f46f7019e2bcc3cdc05d4a7da958b629a5ab7033195e11a438403d956", size = 11091132, upload-time = "2026-02-19T22:32:24.691Z" },
+ { url = "https://files.pythonhosted.org/packages/fa/ae/88d844a21110e14d92cf73d57363fab59b727ebeabe78009b9ccb23500af/ruff-0.15.2-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:8dcf243b15b561c655c1ef2f2b0050e5d50db37fe90115507f6ff37d865dc8b4", size = 10504717, upload-time = "2026-02-19T22:32:26.75Z" },
+ { url = "https://files.pythonhosted.org/packages/64/27/867076a6ada7f2b9c8292884ab44d08fd2ba71bd2b5364d4136f3cd537e1/ruff-0.15.2-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:dab6941c862c05739774677c6273166d2510d254dac0695c0e3f5efa1b5585de", size = 10263122, upload-time = "2026-02-19T22:32:10.036Z" },
+ { url = "https://files.pythonhosted.org/packages/e7/ef/faf9321d550f8ebf0c6373696e70d1758e20ccdc3951ad7af00c0956be7c/ruff-0.15.2-py3-none-musllinux_1_2_i686.whl", hash = "sha256:1b9164f57fc36058e9a6806eb92af185b0697c9fe4c7c52caa431c6554521e5c", size = 10735295, upload-time = "2026-02-19T22:32:39.227Z" },
+ { url = "https://files.pythonhosted.org/packages/2f/55/e8089fec62e050ba84d71b70e7834b97709ca9b7aba10c1a0b196e493f97/ruff-0.15.2-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:80d24fcae24d42659db7e335b9e1531697a7102c19185b8dc4a028b952865fd8", size = 11241641, upload-time = "2026-02-19T22:32:34.617Z" },
+ { url = "https://files.pythonhosted.org/packages/23/01/1c30526460f4d23222d0fabd5888868262fd0e2b71a00570ca26483cd993/ruff-0.15.2-py3-none-win32.whl", hash = "sha256:fd5ff9e5f519a7e1bd99cbe8daa324010a74f5e2ebc97c6242c08f26f3714f6f", size = 10507885, upload-time = "2026-02-19T22:32:15.635Z" },
+ { url = "https://files.pythonhosted.org/packages/5c/10/3d18e3bbdf8fc50bbb4ac3cc45970aa5a9753c5cb51bf9ed9a3cd8b79fa3/ruff-0.15.2-py3-none-win_amd64.whl", hash = "sha256:d20014e3dfa400f3ff84830dfb5755ece2de45ab62ecea4af6b7262d0fb4f7c5", size = 11623725, upload-time = "2026-02-19T22:32:04.947Z" },
+ { url = "https://files.pythonhosted.org/packages/6d/78/097c0798b1dab9f8affe73da9642bb4500e098cb27fd8dc9724816ac747b/ruff-0.15.2-py3-none-win_arm64.whl", hash = "sha256:cabddc5822acdc8f7b5527b36ceac55cc51eec7b1946e60181de8fe83ca8876e", size = 10941649, upload-time = "2026-02-19T22:32:18.108Z" },
+]
+
+[[package]]
+name = "safetensors"
+version = "0.7.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/29/9c/6e74567782559a63bd040a236edca26fd71bc7ba88de2ef35d75df3bca5e/safetensors-0.7.0.tar.gz", hash = "sha256:07663963b67e8bd9f0b8ad15bb9163606cd27cc5a1b96235a50d8369803b96b0", size = 200878, upload-time = "2025-11-19T15:18:43.199Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/fa/47/aef6c06649039accf914afef490268e1067ed82be62bcfa5b7e886ad15e8/safetensors-0.7.0-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:c82f4d474cf725255d9e6acf17252991c3c8aac038d6ef363a4bf8be2f6db517", size = 467781, upload-time = "2025-11-19T15:18:35.84Z" },
+ { url = "https://files.pythonhosted.org/packages/e8/00/374c0c068e30cd31f1e1b46b4b5738168ec79e7689ca82ee93ddfea05109/safetensors-0.7.0-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:94fd4858284736bb67a897a41608b5b0c2496c9bdb3bf2af1fa3409127f20d57", size = 447058, upload-time = "2025-11-19T15:18:34.416Z" },
+ { url = "https://files.pythonhosted.org/packages/f1/06/578ffed52c2296f93d7fd2d844cabfa92be51a587c38c8afbb8ae449ca89/safetensors-0.7.0-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e07d91d0c92a31200f25351f4acb2bc6aff7f48094e13ebb1d0fb995b54b6542", size = 491748, upload-time = "2025-11-19T15:18:09.79Z" },
+ { url = "https://files.pythonhosted.org/packages/ae/33/1debbbb70e4791dde185edb9413d1fe01619255abb64b300157d7f15dddd/safetensors-0.7.0-cp38-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8469155f4cb518bafb4acf4865e8bb9d6804110d2d9bdcaa78564b9fd841e104", size = 503881, upload-time = "2025-11-19T15:18:16.145Z" },
+ { url = "https://files.pythonhosted.org/packages/8e/1c/40c2ca924d60792c3be509833df711b553c60effbd91da6f5284a83f7122/safetensors-0.7.0-cp38-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:54bef08bf00a2bff599982f6b08e8770e09cc012d7bba00783fc7ea38f1fb37d", size = 623463, upload-time = "2025-11-19T15:18:21.11Z" },
+ { url = "https://files.pythonhosted.org/packages/9b/3a/13784a9364bd43b0d61eef4bea2845039bc2030458b16594a1bd787ae26e/safetensors-0.7.0-cp38-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:42cb091236206bb2016d245c377ed383aa7f78691748f3bb6ee1bfa51ae2ce6a", size = 532855, upload-time = "2025-11-19T15:18:25.719Z" },
+ { url = "https://files.pythonhosted.org/packages/a0/60/429e9b1cb3fc651937727befe258ea24122d9663e4d5709a48c9cbfceecb/safetensors-0.7.0-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dac7252938f0696ddea46f5e855dd3138444e82236e3be475f54929f0c510d48", size = 507152, upload-time = "2025-11-19T15:18:33.023Z" },
+ { url = "https://files.pythonhosted.org/packages/3c/a8/4b45e4e059270d17af60359713ffd83f97900d45a6afa73aaa0d737d48b6/safetensors-0.7.0-cp38-abi3-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1d060c70284127fa805085d8f10fbd0962792aed71879d00864acda69dbab981", size = 541856, upload-time = "2025-11-19T15:18:31.075Z" },
+ { url = "https://files.pythonhosted.org/packages/06/87/d26d8407c44175d8ae164a95b5a62707fcc445f3c0c56108e37d98070a3d/safetensors-0.7.0-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:cdab83a366799fa730f90a4ebb563e494f28e9e92c4819e556152ad55e43591b", size = 674060, upload-time = "2025-11-19T15:18:37.211Z" },
+ { url = "https://files.pythonhosted.org/packages/11/f5/57644a2ff08dc6325816ba7217e5095f17269dada2554b658442c66aed51/safetensors-0.7.0-cp38-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:672132907fcad9f2aedcb705b2d7b3b93354a2aec1b2f706c4db852abe338f85", size = 771715, upload-time = "2025-11-19T15:18:38.689Z" },
+ { url = "https://files.pythonhosted.org/packages/86/31/17883e13a814bd278ae6e266b13282a01049b0c81341da7fd0e3e71a80a3/safetensors-0.7.0-cp38-abi3-musllinux_1_2_i686.whl", hash = "sha256:5d72abdb8a4d56d4020713724ba81dac065fedb7f3667151c4a637f1d3fb26c0", size = 714377, upload-time = "2025-11-19T15:18:40.162Z" },
+ { url = "https://files.pythonhosted.org/packages/4a/d8/0c8a7dc9b41dcac53c4cbf9df2b9c83e0e0097203de8b37a712b345c0be5/safetensors-0.7.0-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:b0f6d66c1c538d5a94a73aa9ddca8ccc4227e6c9ff555322ea40bdd142391dd4", size = 677368, upload-time = "2025-11-19T15:18:41.627Z" },
+ { url = "https://files.pythonhosted.org/packages/05/e5/cb4b713c8a93469e3c5be7c3f8d77d307e65fe89673e731f5c2bfd0a9237/safetensors-0.7.0-cp38-abi3-win32.whl", hash = "sha256:c74af94bf3ac15ac4d0f2a7c7b4663a15f8c2ab15ed0fc7531ca61d0835eccba", size = 326423, upload-time = "2025-11-19T15:18:45.74Z" },
+ { url = "https://files.pythonhosted.org/packages/5d/e6/ec8471c8072382cb91233ba7267fd931219753bb43814cbc71757bfd4dab/safetensors-0.7.0-cp38-abi3-win_amd64.whl", hash = "sha256:d1239932053f56f3456f32eb9625590cc7582e905021f94636202a864d470755", size = 341380, upload-time = "2025-11-19T15:18:44.427Z" },
+]
+
+[[package]]
+name = "scikit-learn"
+version = "1.8.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "joblib" },
+ { name = "numpy" },
+ { name = "scipy" },
+ { name = "threadpoolctl" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/0e/d4/40988bf3b8e34feec1d0e6a051446b1f66225f8529b9309becaeef62b6c4/scikit_learn-1.8.0.tar.gz", hash = "sha256:9bccbb3b40e3de10351f8f5068e105d0f4083b1a65fa07b6634fbc401a6287fd", size = 7335585, upload-time = "2025-12-10T07:08:53.618Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/90/74/e6a7cc4b820e95cc38cf36cd74d5aa2b42e8ffc2d21fe5a9a9c45c1c7630/scikit_learn-1.8.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:5fb63362b5a7ddab88e52b6dbb47dac3fd7dafeee740dc6c8d8a446ddedade8e", size = 8548242, upload-time = "2025-12-10T07:07:51.568Z" },
+ { url = "https://files.pythonhosted.org/packages/49/d8/9be608c6024d021041c7f0b3928d4749a706f4e2c3832bbede4fb4f58c95/scikit_learn-1.8.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:5025ce924beccb28298246e589c691fe1b8c1c96507e6d27d12c5fadd85bfd76", size = 8079075, upload-time = "2025-12-10T07:07:53.697Z" },
+ { url = "https://files.pythonhosted.org/packages/dd/47/f187b4636ff80cc63f21cd40b7b2d177134acaa10f6bb73746130ee8c2e5/scikit_learn-1.8.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4496bb2cf7a43ce1a2d7524a79e40bc5da45cf598dbf9545b7e8316ccba47bb4", size = 8660492, upload-time = "2025-12-10T07:07:55.574Z" },
+ { url = "https://files.pythonhosted.org/packages/97/74/b7a304feb2b49df9fafa9382d4d09061a96ee9a9449a7cbea7988dda0828/scikit_learn-1.8.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a0bcfe4d0d14aec44921545fd2af2338c7471de9cb701f1da4c9d85906ab847a", size = 8931904, upload-time = "2025-12-10T07:07:57.666Z" },
+ { url = "https://files.pythonhosted.org/packages/9f/c4/0ab22726a04ede56f689476b760f98f8f46607caecff993017ac1b64aa5d/scikit_learn-1.8.0-cp312-cp312-win_amd64.whl", hash = "sha256:35c007dedb2ffe38fe3ee7d201ebac4a2deccd2408e8621d53067733e3c74809", size = 8019359, upload-time = "2025-12-10T07:07:59.838Z" },
+ { url = "https://files.pythonhosted.org/packages/24/90/344a67811cfd561d7335c1b96ca21455e7e472d281c3c279c4d3f2300236/scikit_learn-1.8.0-cp312-cp312-win_arm64.whl", hash = "sha256:8c497fff237d7b4e07e9ef1a640887fa4fb765647f86fbe00f969ff6280ce2bb", size = 7641898, upload-time = "2025-12-10T07:08:01.36Z" },
+]
+
+[[package]]
+name = "scipy"
+version = "1.17.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "numpy" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/7a/97/5a3609c4f8d58b039179648e62dd220f89864f56f7357f5d4f45c29eb2cc/scipy-1.17.1.tar.gz", hash = "sha256:95d8e012d8cb8816c226aef832200b1d45109ed4464303e997c5b13122b297c0", size = 30573822, upload-time = "2026-02-23T00:26:24.851Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/35/48/b992b488d6f299dbe3f11a20b24d3dda3d46f1a635ede1c46b5b17a7b163/scipy-1.17.1-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:35c3a56d2ef83efc372eaec584314bd0ef2e2f0d2adb21c55e6ad5b344c0dcb8", size = 31610954, upload-time = "2026-02-23T00:17:49.855Z" },
+ { url = "https://files.pythonhosted.org/packages/b2/02/cf107b01494c19dc100f1d0b7ac3cc08666e96ba2d64db7626066cee895e/scipy-1.17.1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:fcb310ddb270a06114bb64bbe53c94926b943f5b7f0842194d585c65eb4edd76", size = 28172662, upload-time = "2026-02-23T00:18:01.64Z" },
+ { url = "https://files.pythonhosted.org/packages/cf/a9/599c28631bad314d219cf9ffd40e985b24d603fc8a2f4ccc5ae8419a535b/scipy-1.17.1-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:cc90d2e9c7e5c7f1a482c9875007c095c3194b1cfedca3c2f3291cdc2bc7c086", size = 20344366, upload-time = "2026-02-23T00:18:12.015Z" },
+ { url = "https://files.pythonhosted.org/packages/35/f5/906eda513271c8deb5af284e5ef0206d17a96239af79f9fa0aebfe0e36b4/scipy-1.17.1-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:c80be5ede8f3f8eded4eff73cc99a25c388ce98e555b17d31da05287015ffa5b", size = 22704017, upload-time = "2026-02-23T00:18:21.502Z" },
+ { url = "https://files.pythonhosted.org/packages/da/34/16f10e3042d2f1d6b66e0428308ab52224b6a23049cb2f5c1756f713815f/scipy-1.17.1-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e19ebea31758fac5893a2ac360fedd00116cbb7628e650842a6691ba7ca28a21", size = 32927842, upload-time = "2026-02-23T00:18:35.367Z" },
+ { url = "https://files.pythonhosted.org/packages/01/8e/1e35281b8ab6d5d72ebe9911edcdffa3f36b04ed9d51dec6dd140396e220/scipy-1.17.1-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:02ae3b274fde71c5e92ac4d54bc06c42d80e399fec704383dcd99b301df37458", size = 35235890, upload-time = "2026-02-23T00:18:49.188Z" },
+ { url = "https://files.pythonhosted.org/packages/c5/5c/9d7f4c88bea6e0d5a4f1bc0506a53a00e9fcb198de372bfe4d3652cef482/scipy-1.17.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8a604bae87c6195d8b1045eddece0514d041604b14f2727bbc2b3020172045eb", size = 35003557, upload-time = "2026-02-23T00:18:54.74Z" },
+ { url = "https://files.pythonhosted.org/packages/65/94/7698add8f276dbab7a9de9fb6b0e02fc13ee61d51c7c3f85ac28b65e1239/scipy-1.17.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:f590cd684941912d10becc07325a3eeb77886fe981415660d9265c4c418d0bea", size = 37625856, upload-time = "2026-02-23T00:19:00.307Z" },
+ { url = "https://files.pythonhosted.org/packages/a2/84/dc08d77fbf3d87d3ee27f6a0c6dcce1de5829a64f2eae85a0ecc1f0daa73/scipy-1.17.1-cp312-cp312-win_amd64.whl", hash = "sha256:41b71f4a3a4cab9d366cd9065b288efc4d4f3c0b37a91a8e0947fb5bd7f31d87", size = 36549682, upload-time = "2026-02-23T00:19:07.67Z" },
+ { url = "https://files.pythonhosted.org/packages/bc/98/fe9ae9ffb3b54b62559f52dedaebe204b408db8109a8c66fdd04869e6424/scipy-1.17.1-cp312-cp312-win_arm64.whl", hash = "sha256:f4115102802df98b2b0db3cce5cb9b92572633a1197c77b7553e5203f284a5b3", size = 24547340, upload-time = "2026-02-23T00:19:12.024Z" },
+]
+
+[[package]]
+name = "seaborn"
+version = "0.13.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "matplotlib" },
+ { name = "numpy" },
+ { name = "pandas" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/86/59/a451d7420a77ab0b98f7affa3a1d78a313d2f7281a57afb1a34bae8ab412/seaborn-0.13.2.tar.gz", hash = "sha256:93e60a40988f4d65e9f4885df477e2fdaff6b73a9ded434c1ab356dd57eefff7", size = 1457696, upload-time = "2024-01-25T13:21:52.551Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/83/11/00d3c3dfc25ad54e731d91449895a79e4bf2384dc3ac01809010ba88f6d5/seaborn-0.13.2-py3-none-any.whl", hash = "sha256:636f8336facf092165e27924f223d3c62ca560b1f2bb5dff7ab7fad265361987", size = 294914, upload-time = "2024-01-25T13:21:49.598Z" },
+]
+
+[[package]]
+name = "sentence-transformers"
+version = "5.2.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "huggingface-hub" },
+ { name = "numpy" },
+ { name = "scikit-learn" },
+ { name = "scipy" },
+ { name = "torch" },
+ { name = "tqdm" },
+ { name = "transformers" },
+ { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/5b/30/21664028fc0776eb1ca024879480bbbab36f02923a8ff9e4cae5a150fa35/sentence_transformers-5.2.3.tar.gz", hash = "sha256:3cd3044e1f3fe859b6a1b66336aac502eaae5d3dd7d5c8fc237f37fbf58137c7", size = 381623, upload-time = "2026-02-17T14:05:20.238Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/46/9f/dba4b3e18ebbe1eaa29d9f1764fbc7da0cd91937b83f2b7928d15c5d2d36/sentence_transformers-5.2.3-py3-none-any.whl", hash = "sha256:6437c62d4112b615ddebda362dfc16a4308d604c5b68125ed586e3e95d5b2e30", size = 494225, upload-time = "2026-02-17T14:05:18.596Z" },
+]
+
+[[package]]
+name = "sentencepiece"
+version = "0.2.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/15/15/2e7a025fc62d764b151ae6d0f2a92f8081755ebe8d4a64099accc6f77ba6/sentencepiece-0.2.1.tar.gz", hash = "sha256:8138cec27c2f2282f4a34d9a016e3374cd40e5c6e9cb335063db66a0a3b71fad", size = 3228515, upload-time = "2025-08-12T07:00:51.718Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/4a/be/32ce495aa1d0e0c323dcb1ba87096037358edee539cac5baf8755a6bd396/sentencepiece-0.2.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:57cae326c8727de58c85977b175af132a7138d84c764635d7e71bbee7e774133", size = 1943152, upload-time = "2025-08-12T06:59:40.048Z" },
+ { url = "https://files.pythonhosted.org/packages/88/7e/ff23008899a58678e98c6ff592bf4d368eee5a71af96d0df6b38a039dd4f/sentencepiece-0.2.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:56dd39a3c4d6493db3cdca7e8cc68c6b633f0d4195495cbadfcf5af8a22d05a6", size = 1325651, upload-time = "2025-08-12T06:59:41.536Z" },
+ { url = "https://files.pythonhosted.org/packages/19/84/42eb3ce4796777a1b5d3699dfd4dca85113e68b637f194a6c8d786f16a04/sentencepiece-0.2.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d9381351182ff9888cc80e41c632e7e274b106f450de33d67a9e8f6043da6f76", size = 1253645, upload-time = "2025-08-12T06:59:42.903Z" },
+ { url = "https://files.pythonhosted.org/packages/89/fa/d3d5ebcba3cb9e6d3775a096251860c41a6bc53a1b9461151df83fe93255/sentencepiece-0.2.1-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:99f955df238021bf11f0fc37cdb54fd5e5b5f7fd30ecc3d93fb48b6815437167", size = 1316273, upload-time = "2025-08-12T06:59:44.476Z" },
+ { url = "https://files.pythonhosted.org/packages/04/88/14f2f4a2b922d8b39be45bf63d79e6cd3a9b2f248b2fcb98a69b12af12f5/sentencepiece-0.2.1-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0cdfecef430d985f1c2bcbfff3defd1d95dae876fbd0173376012d2d7d24044b", size = 1387881, upload-time = "2025-08-12T06:59:46.09Z" },
+ { url = "https://files.pythonhosted.org/packages/fd/b8/903e5ccb77b4ef140605d5d71b4f9e0ad95d456d6184688073ed11712809/sentencepiece-0.2.1-cp312-cp312-win32.whl", hash = "sha256:a483fd29a34c3e34c39ac5556b0a90942bec253d260235729e50976f5dba1068", size = 999540, upload-time = "2025-08-12T06:59:48.023Z" },
+ { url = "https://files.pythonhosted.org/packages/2d/81/92df5673c067148c2545b1bfe49adfd775bcc3a169a047f5a0e6575ddaca/sentencepiece-0.2.1-cp312-cp312-win_amd64.whl", hash = "sha256:4cdc7c36234fda305e85c32949c5211faaf8dd886096c7cea289ddc12a2d02de", size = 1054671, upload-time = "2025-08-12T06:59:49.895Z" },
+ { url = "https://files.pythonhosted.org/packages/fe/02/c5e3bc518655d714622bec87d83db9cdba1cd0619a4a04e2109751c4f47f/sentencepiece-0.2.1-cp312-cp312-win_arm64.whl", hash = "sha256:daeb5e9e9fcad012324807856113708614d534f596d5008638eb9b40112cd9e4", size = 1033923, upload-time = "2025-08-12T06:59:51.952Z" },
+]
+
+[[package]]
+name = "sentry-sdk"
+version = "2.54.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "certifi" },
+ { name = "urllib3" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/c8/e9/2e3a46c304e7fa21eaa70612f60354e32699c7102eb961f67448e222ad7c/sentry_sdk-2.54.0.tar.gz", hash = "sha256:2620c2575128d009b11b20f7feb81e4e4e8ae08ec1d36cbc845705060b45cc1b", size = 413813, upload-time = "2026-03-02T15:12:41.355Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/53/39/be412cc86bc6247b8f69e9383d7950711bd86f8d0a4a4b0fe8fad685bc21/sentry_sdk-2.54.0-py2.py3-none-any.whl", hash = "sha256:fd74e0e281dcda63afff095d23ebcd6e97006102cdc8e78a29f19ecdf796a0de", size = 439198, upload-time = "2026-03-02T15:12:39.546Z" },
+]
+
+[[package]]
+name = "setproctitle"
+version = "1.3.7"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/8d/48/49393a96a2eef1ab418b17475fb92b8fcfad83d099e678751b05472e69de/setproctitle-1.3.7.tar.gz", hash = "sha256:bc2bc917691c1537d5b9bca1468437176809c7e11e5694ca79a9ca12345dcb9e", size = 27002, upload-time = "2025-09-05T12:51:25.278Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/fb/f0/2dc88e842077719d7384d86cc47403e5102810492b33680e7dadcee64cd8/setproctitle-1.3.7-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:2dc99aec591ab6126e636b11035a70991bc1ab7a261da428491a40b84376654e", size = 18049, upload-time = "2025-09-05T12:49:36.241Z" },
+ { url = "https://files.pythonhosted.org/packages/f0/b4/50940504466689cda65680c9e9a1e518e5750c10490639fa687489ac7013/setproctitle-1.3.7-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:cdd8aa571b7aa39840fdbea620e308a19691ff595c3a10231e9ee830339dd798", size = 13079, upload-time = "2025-09-05T12:49:38.088Z" },
+ { url = "https://files.pythonhosted.org/packages/d0/99/71630546b9395b095f4082be41165d1078204d1696c2d9baade3de3202d0/setproctitle-1.3.7-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:2906b6c7959cdb75f46159bf0acd8cc9906cf1361c9e1ded0d065fe8f9039629", size = 32932, upload-time = "2025-09-05T12:49:39.271Z" },
+ { url = "https://files.pythonhosted.org/packages/50/22/cee06af4ffcfb0e8aba047bd44f5262e644199ae7527ae2c1f672b86495c/setproctitle-1.3.7-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6915964a6dda07920a1159321dcd6d94fc7fc526f815ca08a8063aeca3c204f1", size = 33736, upload-time = "2025-09-05T12:49:40.565Z" },
+ { url = "https://files.pythonhosted.org/packages/5c/00/a5949a8bb06ef5e7df214fc393bb2fb6aedf0479b17214e57750dfdd0f24/setproctitle-1.3.7-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:cff72899861c765bd4021d1ff1c68d60edc129711a2fdba77f9cb69ef726a8b6", size = 35605, upload-time = "2025-09-05T12:49:42.362Z" },
+ { url = "https://files.pythonhosted.org/packages/b0/3a/50caca532a9343828e3bf5778c7a84d6c737a249b1796d50dd680290594d/setproctitle-1.3.7-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:b7cb05bd446687ff816a3aaaf831047fc4c364feff7ada94a66024f1367b448c", size = 33143, upload-time = "2025-09-05T12:49:43.515Z" },
+ { url = "https://files.pythonhosted.org/packages/ca/14/b843a251296ce55e2e17c017d6b9f11ce0d3d070e9265de4ecad948b913d/setproctitle-1.3.7-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:3a57b9a00de8cae7e2a1f7b9f0c2ac7b69372159e16a7708aa2f38f9e5cc987a", size = 34434, upload-time = "2025-09-05T12:49:45.31Z" },
+ { url = "https://files.pythonhosted.org/packages/c8/b7/06145c238c0a6d2c4bc881f8be230bb9f36d2bf51aff7bddcb796d5eed67/setproctitle-1.3.7-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:d8828b356114f6b308b04afe398ed93803d7fca4a955dd3abe84430e28d33739", size = 32795, upload-time = "2025-09-05T12:49:46.419Z" },
+ { url = "https://files.pythonhosted.org/packages/ef/dc/ef76a81fac9bf27b84ed23df19c1f67391a753eed6e3c2254ebcb5133f56/setproctitle-1.3.7-cp312-cp312-win32.whl", hash = "sha256:b0304f905efc845829ac2bc791ddebb976db2885f6171f4a3de678d7ee3f7c9f", size = 12552, upload-time = "2025-09-05T12:49:47.635Z" },
+ { url = "https://files.pythonhosted.org/packages/e2/5b/a9fe517912cd6e28cf43a212b80cb679ff179a91b623138a99796d7d18a0/setproctitle-1.3.7-cp312-cp312-win_amd64.whl", hash = "sha256:9888ceb4faea3116cf02a920ff00bfbc8cc899743e4b4ac914b03625bdc3c300", size = 13247, upload-time = "2025-09-05T12:49:49.16Z" },
+]
+
+[[package]]
+name = "setuptools"
+version = "80.10.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/76/95/faf61eb8363f26aa7e1d762267a8d602a1b26d4f3a1e758e92cb3cb8b054/setuptools-80.10.2.tar.gz", hash = "sha256:8b0e9d10c784bf7d262c4e5ec5d4ec94127ce206e8738f29a437945fbc219b70", size = 1200343, upload-time = "2026-01-25T22:38:17.252Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/94/b8/f1f62a5e3c0ad2ff1d189590bfa4c46b4f3b6e49cef6f26c6ee4e575394d/setuptools-80.10.2-py3-none-any.whl", hash = "sha256:95b30ddfb717250edb492926c92b5221f7ef3fbcc2b07579bcd4a27da21d0173", size = 1064234, upload-time = "2026-01-25T22:38:15.216Z" },
+]
+
+[[package]]
+name = "shellingham"
+version = "1.5.4"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/58/15/8b3609fd3830ef7b27b655beb4b4e9c62313a4e8da8c676e142cc210d58e/shellingham-1.5.4.tar.gz", hash = "sha256:8dbca0739d487e5bd35ab3ca4b36e11c4078f3a234bfce294b0a0291363404de", size = 10310, upload-time = "2023-10-24T04:13:40.426Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/e0/f9/0595336914c5619e5f28a1fb793285925a8cd4b432c9da0a987836c7f822/shellingham-1.5.4-py2.py3-none-any.whl", hash = "sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686", size = 9755, upload-time = "2023-10-24T04:13:38.866Z" },
+]
+
+[[package]]
+name = "six"
+version = "1.17.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/94/e7/b2c673351809dca68a0e064b6af791aa332cf192da575fd474ed7d6f16a2/six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81", size = 34031, upload-time = "2024-12-04T17:35:28.174Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050, upload-time = "2024-12-04T17:35:26.475Z" },
+]
+
+[[package]]
+name = "sniffio"
+version = "1.3.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/a2/87/a6771e1546d97e7e041b6ae58d80074f81b7d5121207425c964ddf5cfdbd/sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc", size = 20372, upload-time = "2024-02-25T23:20:04.057Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235, upload-time = "2024-02-25T23:20:01.196Z" },
+]
+
+[[package]]
+name = "sob"
+version = "0.1.3"
+source = { virtual = "." }
+dependencies = [
+ { name = "anthropic" },
+ { name = "datasets" },
+ { name = "google-genai" },
+ { name = "huggingface-hub" },
+ { name = "jsonschema" },
+ { name = "matplotlib" },
+ { name = "numpy" },
+ { name = "openai" },
+ { name = "pandas" },
+ { name = "pillow" },
+ { name = "pyarrow" },
+ { name = "pydantic" },
+ { name = "python-dotenv" },
+ { name = "ruff" },
+ { name = "seaborn" },
+ { name = "sentence-transformers" },
+ { name = "torch" },
+ { name = "tqdm" },
+ { name = "transformers" },
+ { name = "vllm" },
+]
+
+[package.metadata]
+requires-dist = [
+ { name = "anthropic", specifier = ">=0.75.0" },
+ { name = "datasets", specifier = ">=4.5.0" },
+ { name = "google-genai", specifier = ">=1.0.0" },
+ { name = "huggingface-hub", specifier = ">=0.22.0" },
+ { name = "jsonschema", specifier = ">=4.0.0" },
+ { name = "matplotlib", specifier = ">=3.10.8" },
+ { name = "numpy", specifier = ">=1.24,<2.3" },
+ { name = "openai", specifier = ">=2.24.0" },
+ { name = "pandas", specifier = ">=3.0.1" },
+ { name = "pillow", specifier = ">=10.0.0" },
+ { name = "pyarrow", specifier = ">=16.0.0" },
+ { name = "pydantic", specifier = ">=2.0.0" },
+ { name = "python-dotenv", specifier = ">=1.2.1" },
+ { name = "ruff", specifier = ">=0.15.2" },
+ { name = "seaborn", specifier = ">=0.13.2" },
+ { name = "sentence-transformers", specifier = ">=5.2.3" },
+ { name = "torch", specifier = ">=2.10.0" },
+ { name = "tqdm", specifier = ">=4.67.3" },
+ { name = "transformers", specifier = ">=4.40.0" },
+ { name = "vllm", specifier = "==0.17.0" },
+]
+
+[[package]]
+name = "sse-starlette"
+version = "3.3.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "anyio" },
+ { name = "starlette" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/5a/9f/c3695c2d2d4ef70072c3a06992850498b01c6bc9be531950813716b426fa/sse_starlette-3.3.2.tar.gz", hash = "sha256:678fca55a1945c734d8472a6cad186a55ab02840b4f6786f5ee8770970579dcd", size = 32326, upload-time = "2026-02-28T11:24:34.36Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/61/28/8cb142d3fe80c4a2d8af54ca0b003f47ce0ba920974e7990fa6e016402d1/sse_starlette-3.3.2-py3-none-any.whl", hash = "sha256:5c3ea3dad425c601236726af2f27689b74494643f57017cafcb6f8c9acfbb862", size = 14270, upload-time = "2026-02-28T11:24:32.984Z" },
+]
+
+[[package]]
+name = "starlette"
+version = "0.52.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "anyio" },
+ { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/c4/68/79977123bb7be889ad680d79a40f339082c1978b5cfcf62c2d8d196873ac/starlette-0.52.1.tar.gz", hash = "sha256:834edd1b0a23167694292e94f597773bc3f89f362be6effee198165a35d62933", size = 2653702, upload-time = "2026-01-18T13:34:11.062Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/81/0d/13d1d239a25cbfb19e740db83143e95c772a1fe10202dda4b76792b114dd/starlette-0.52.1-py3-none-any.whl", hash = "sha256:0029d43eb3d273bc4f83a08720b4912ea4b071087a3b48db01b7c839f7954d74", size = 74272, upload-time = "2026-01-18T13:34:09.188Z" },
+]
+
+[[package]]
+name = "supervisor"
+version = "4.3.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/a9/b5/37e7a3706de436a8a2d75334711dad1afb4ddffab09f25e31d89e467542f/supervisor-4.3.0.tar.gz", hash = "sha256:4a2bf149adf42997e1bb44b70c43b613275ec9852c3edacca86a9166b27e945e", size = 468912, upload-time = "2025-08-23T18:25:02.418Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/0e/65/5e726c372da8a5e35022a94388b12252710aad0c2351699c3d76ae8dba78/supervisor-4.3.0-py2.py3-none-any.whl", hash = "sha256:0bcb763fddafba410f35cbde226aa7f8514b9fb82eb05a0c85f6588d1c13f8db", size = 320736, upload-time = "2025-08-23T18:25:00.767Z" },
+]
+
+[[package]]
+name = "sympy"
+version = "1.14.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "mpmath" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/83/d3/803453b36afefb7c2bb238361cd4ae6125a569b4db67cd9e79846ba2d68c/sympy-1.14.0.tar.gz", hash = "sha256:d3d3fe8df1e5a0b42f0e7bdf50541697dbe7d23746e894990c030e2b05e72517", size = 7793921, upload-time = "2025-04-27T18:05:01.611Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/a2/09/77d55d46fd61b4a135c444fc97158ef34a095e5681d0a6c10b75bf356191/sympy-1.14.0-py3-none-any.whl", hash = "sha256:e091cc3e99d2141a0ba2847328f5479b05d94a6635cb96148ccb3f34671bd8f5", size = 6299353, upload-time = "2025-04-27T18:04:59.103Z" },
+]
+
+[[package]]
+name = "tabulate"
+version = "0.10.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/46/58/8c37dea7bbf769b20d58e7ace7e5edfe65b849442b00ffcdd56be88697c6/tabulate-0.10.0.tar.gz", hash = "sha256:e2cfde8f79420f6deeffdeda9aaec3b6bc5abce947655d17ac662b126e48a60d", size = 91754, upload-time = "2026-03-04T18:55:34.402Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/99/55/db07de81b5c630da5cbf5c7df646580ca26dfaefa593667fc6f2fe016d2e/tabulate-0.10.0-py3-none-any.whl", hash = "sha256:f0b0622e567335c8fabaaa659f1b33bcb6ddfe2e496071b743aa113f8774f2d3", size = 39814, upload-time = "2026-03-04T18:55:31.284Z" },
+]
+
+[[package]]
+name = "tenacity"
+version = "9.1.4"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/47/c6/ee486fd809e357697ee8a44d3d69222b344920433d3b6666ccd9b374630c/tenacity-9.1.4.tar.gz", hash = "sha256:adb31d4c263f2bd041081ab33b498309a57c77f9acf2db65aadf0898179cf93a", size = 49413, upload-time = "2026-02-07T10:45:33.841Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/d7/c1/eb8f9debc45d3b7918a32ab756658a0904732f75e555402972246b0b8e71/tenacity-9.1.4-py3-none-any.whl", hash = "sha256:6095a360c919085f28c6527de529e76a06ad89b23659fa881ae0649b867a9d55", size = 28926, upload-time = "2026-02-07T10:45:32.24Z" },
+]
+
+[[package]]
+name = "threadpoolctl"
+version = "3.6.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/b7/4d/08c89e34946fce2aec4fbb45c9016efd5f4d7f24af8e5d93296e935631d8/threadpoolctl-3.6.0.tar.gz", hash = "sha256:8ab8b4aa3491d812b623328249fab5302a68d2d71745c8a4c719a2fcaba9f44e", size = 21274, upload-time = "2025-03-13T13:49:23.031Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/32/d5/f9a850d79b0851d1d4ef6456097579a9005b31fea68726a4ae5f2d82ddd9/threadpoolctl-3.6.0-py3-none-any.whl", hash = "sha256:43a0b8fd5a2928500110039e43a5eed8480b918967083ea48dc3ab9f13c4a7fb", size = 18638, upload-time = "2025-03-13T13:49:21.846Z" },
+]
+
+[[package]]
+name = "tiktoken"
+version = "0.12.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "regex" },
+ { name = "requests" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/7d/ab/4d017d0f76ec3171d469d80fc03dfbb4e48a4bcaddaa831b31d526f05edc/tiktoken-0.12.0.tar.gz", hash = "sha256:b18ba7ee2b093863978fcb14f74b3707cdc8d4d4d3836853ce7ec60772139931", size = 37806, upload-time = "2025-10-06T20:22:45.419Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/a4/85/be65d39d6b647c79800fd9d29241d081d4eeb06271f383bb87200d74cf76/tiktoken-0.12.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b97f74aca0d78a1ff21b8cd9e9925714c15a9236d6ceacf5c7327c117e6e21e8", size = 1050728, upload-time = "2025-10-06T20:21:52.756Z" },
+ { url = "https://files.pythonhosted.org/packages/4a/42/6573e9129bc55c9bf7300b3a35bef2c6b9117018acca0dc760ac2d93dffe/tiktoken-0.12.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2b90f5ad190a4bb7c3eb30c5fa32e1e182ca1ca79f05e49b448438c3e225a49b", size = 994049, upload-time = "2025-10-06T20:21:53.782Z" },
+ { url = "https://files.pythonhosted.org/packages/66/c5/ed88504d2f4a5fd6856990b230b56d85a777feab84e6129af0822f5d0f70/tiktoken-0.12.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:65b26c7a780e2139e73acc193e5c63ac754021f160df919add909c1492c0fb37", size = 1129008, upload-time = "2025-10-06T20:21:54.832Z" },
+ { url = "https://files.pythonhosted.org/packages/f4/90/3dae6cc5436137ebd38944d396b5849e167896fc2073da643a49f372dc4f/tiktoken-0.12.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:edde1ec917dfd21c1f2f8046b86348b0f54a2c0547f68149d8600859598769ad", size = 1152665, upload-time = "2025-10-06T20:21:56.129Z" },
+ { url = "https://files.pythonhosted.org/packages/a3/fe/26df24ce53ffde419a42f5f53d755b995c9318908288c17ec3f3448313a3/tiktoken-0.12.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:35a2f8ddd3824608b3d650a000c1ef71f730d0c56486845705a8248da00f9fe5", size = 1194230, upload-time = "2025-10-06T20:21:57.546Z" },
+ { url = "https://files.pythonhosted.org/packages/20/cc/b064cae1a0e9fac84b0d2c46b89f4e57051a5f41324e385d10225a984c24/tiktoken-0.12.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:83d16643edb7fa2c99eff2ab7733508aae1eebb03d5dfc46f5565862810f24e3", size = 1254688, upload-time = "2025-10-06T20:21:58.619Z" },
+ { url = "https://files.pythonhosted.org/packages/81/10/b8523105c590c5b8349f2587e2fdfe51a69544bd5a76295fc20f2374f470/tiktoken-0.12.0-cp312-cp312-win_amd64.whl", hash = "sha256:ffc5288f34a8bc02e1ea7047b8d041104791d2ddbf42d1e5fa07822cbffe16bd", size = 878694, upload-time = "2025-10-06T20:21:59.876Z" },
+]
+
+[[package]]
+name = "tokenizers"
+version = "0.22.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "huggingface-hub" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/73/6f/f80cfef4a312e1fb34baf7d85c72d4411afde10978d4657f8cdd811d3ccc/tokenizers-0.22.2.tar.gz", hash = "sha256:473b83b915e547aa366d1eee11806deaf419e17be16310ac0a14077f1e28f917", size = 372115, upload-time = "2026-01-05T10:45:15.988Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/92/97/5dbfabf04c7e348e655e907ed27913e03db0923abb5dfdd120d7b25630e1/tokenizers-0.22.2-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:544dd704ae7238755d790de45ba8da072e9af3eea688f698b137915ae959281c", size = 3100275, upload-time = "2026-01-05T10:41:02.158Z" },
+ { url = "https://files.pythonhosted.org/packages/2e/47/174dca0502ef88b28f1c9e06b73ce33500eedfac7a7692108aec220464e7/tokenizers-0.22.2-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:1e418a55456beedca4621dbab65a318981467a2b188e982a23e117f115ce5001", size = 2981472, upload-time = "2026-01-05T10:41:00.276Z" },
+ { url = "https://files.pythonhosted.org/packages/d6/84/7990e799f1309a8b87af6b948f31edaa12a3ed22d11b352eaf4f4b2e5753/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2249487018adec45d6e3554c71d46eb39fa8ea67156c640f7513eb26f318cec7", size = 3290736, upload-time = "2026-01-05T10:40:32.165Z" },
+ { url = "https://files.pythonhosted.org/packages/78/59/09d0d9ba94dcd5f4f1368d4858d24546b4bdc0231c2354aa31d6199f0399/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:25b85325d0815e86e0bac263506dd114578953b7b53d7de09a6485e4a160a7dd", size = 3168835, upload-time = "2026-01-05T10:40:38.847Z" },
+ { url = "https://files.pythonhosted.org/packages/47/50/b3ebb4243e7160bda8d34b731e54dd8ab8b133e50775872e7a434e524c28/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bfb88f22a209ff7b40a576d5324bf8286b519d7358663db21d6246fb17eea2d5", size = 3521673, upload-time = "2026-01-05T10:40:56.614Z" },
+ { url = "https://files.pythonhosted.org/packages/e0/fa/89f4cb9e08df770b57adb96f8cbb7e22695a4cb6c2bd5f0c4f0ebcf33b66/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1c774b1276f71e1ef716e5486f21e76333464f47bece56bbd554485982a9e03e", size = 3724818, upload-time = "2026-01-05T10:40:44.507Z" },
+ { url = "https://files.pythonhosted.org/packages/64/04/ca2363f0bfbe3b3d36e95bf67e56a4c88c8e3362b658e616d1ac185d47f2/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:df6c4265b289083bf710dff49bc51ef252f9d5be33a45ee2bed151114a56207b", size = 3379195, upload-time = "2026-01-05T10:40:51.139Z" },
+ { url = "https://files.pythonhosted.org/packages/2e/76/932be4b50ef6ccedf9d3c6639b056a967a86258c6d9200643f01269211ca/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:369cc9fc8cc10cb24143873a0d95438bb8ee257bb80c71989e3ee290e8d72c67", size = 3274982, upload-time = "2026-01-05T10:40:58.331Z" },
+ { url = "https://files.pythonhosted.org/packages/1d/28/5f9f5a4cc211b69e89420980e483831bcc29dade307955cc9dc858a40f01/tokenizers-0.22.2-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:29c30b83d8dcd061078b05ae0cb94d3c710555fbb44861139f9f83dcca3dc3e4", size = 9478245, upload-time = "2026-01-05T10:41:04.053Z" },
+ { url = "https://files.pythonhosted.org/packages/6c/fb/66e2da4704d6aadebf8cb39f1d6d1957df667ab24cff2326b77cda0dcb85/tokenizers-0.22.2-cp39-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:37ae80a28c1d3265bb1f22464c856bd23c02a05bb211e56d0c5301a435be6c1a", size = 9560069, upload-time = "2026-01-05T10:45:10.673Z" },
+ { url = "https://files.pythonhosted.org/packages/16/04/fed398b05caa87ce9b1a1bb5166645e38196081b225059a6edaff6440fac/tokenizers-0.22.2-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:791135ee325f2336f498590eb2f11dc5c295232f288e75c99a36c5dbce63088a", size = 9899263, upload-time = "2026-01-05T10:45:12.559Z" },
+ { url = "https://files.pythonhosted.org/packages/05/a1/d62dfe7376beaaf1394917e0f8e93ee5f67fea8fcf4107501db35996586b/tokenizers-0.22.2-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:38337540fbbddff8e999d59970f3c6f35a82de10053206a7562f1ea02d046fa5", size = 10033429, upload-time = "2026-01-05T10:45:14.333Z" },
+ { url = "https://files.pythonhosted.org/packages/fd/18/a545c4ea42af3df6effd7d13d250ba77a0a86fb20393143bbb9a92e434d4/tokenizers-0.22.2-cp39-abi3-win32.whl", hash = "sha256:a6bf3f88c554a2b653af81f3204491c818ae2ac6fbc09e76ef4773351292bc92", size = 2502363, upload-time = "2026-01-05T10:45:20.593Z" },
+ { url = "https://files.pythonhosted.org/packages/65/71/0670843133a43d43070abeb1949abfdef12a86d490bea9cd9e18e37c5ff7/tokenizers-0.22.2-cp39-abi3-win_amd64.whl", hash = "sha256:c9ea31edff2968b44a88f97d784c2f16dc0729b8b143ed004699ebca91f05c48", size = 2747786, upload-time = "2026-01-05T10:45:18.411Z" },
+ { url = "https://files.pythonhosted.org/packages/72/f4/0de46cfa12cdcbcd464cc59fde36912af405696f687e53a091fb432f694c/tokenizers-0.22.2-cp39-abi3-win_arm64.whl", hash = "sha256:9ce725d22864a1e965217204946f830c37876eee3b2ba6fc6255e8e903d5fcbc", size = 2612133, upload-time = "2026-01-05T10:45:17.232Z" },
+]
+
+[[package]]
+name = "torch"
+version = "2.10.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "cuda-bindings", version = "12.9.4", source = { registry = "https://pypi.org/simple" }, marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+ { name = "filelock" },
+ { name = "fsspec" },
+ { name = "jinja2" },
+ { name = "networkx" },
+ { name = "nvidia-cublas-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+ { name = "nvidia-cuda-cupti-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+ { name = "nvidia-cuda-nvrtc-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+ { name = "nvidia-cuda-runtime-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+ { name = "nvidia-cudnn-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+ { name = "nvidia-cufft-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+ { name = "nvidia-cufile-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+ { name = "nvidia-curand-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+ { name = "nvidia-cusolver-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+ { name = "nvidia-cusparse-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+ { name = "nvidia-cusparselt-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+ { name = "nvidia-nccl-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+ { name = "nvidia-nvjitlink-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+ { name = "nvidia-nvshmem-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+ { name = "nvidia-nvtx-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+ { name = "setuptools" },
+ { name = "sympy" },
+ { name = "triton", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+ { name = "typing-extensions" },
+]
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/d3/54/a2ba279afcca44bbd320d4e73675b282fcee3d81400ea1b53934efca6462/torch-2.10.0-2-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:13ec4add8c3faaed8d13e0574f5cd4a323c11655546f91fbe6afa77b57423574", size = 79498202, upload-time = "2026-02-10T21:44:52.603Z" },
+ { url = "https://files.pythonhosted.org/packages/b3/7a/abada41517ce0011775f0f4eacc79659bc9bc6c361e6bfe6f7052a6b9363/torch-2.10.0-3-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:98c01b8bb5e3240426dcde1446eed6f40c778091c8544767ef1168fc663a05a6", size = 915622781, upload-time = "2026-03-11T14:17:11.354Z" },
+ { url = "https://files.pythonhosted.org/packages/cc/af/758e242e9102e9988969b5e621d41f36b8f258bb4a099109b7a4b4b50ea4/torch-2.10.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:5fd4117d89ffd47e3dcc71e71a22efac24828ad781c7e46aaaf56bf7f2796acf", size = 145996088, upload-time = "2026-01-21T16:24:44.171Z" },
+ { url = "https://files.pythonhosted.org/packages/23/8e/3c74db5e53bff7ed9e34c8123e6a8bfef718b2450c35eefab85bb4a7e270/torch-2.10.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:787124e7db3b379d4f1ed54dd12ae7c741c16a4d29b49c0226a89bea50923ffb", size = 915711952, upload-time = "2026-01-21T16:23:53.503Z" },
+ { url = "https://files.pythonhosted.org/packages/6e/01/624c4324ca01f66ae4c7cd1b74eb16fb52596dce66dbe51eff95ef9e7a4c/torch-2.10.0-cp312-cp312-win_amd64.whl", hash = "sha256:2c66c61f44c5f903046cc696d088e21062644cbe541c7f1c4eaae88b2ad23547", size = 113757972, upload-time = "2026-01-21T16:24:39.516Z" },
+ { url = "https://files.pythonhosted.org/packages/c9/5c/dee910b87c4d5c0fcb41b50839ae04df87c1cfc663cf1b5fca7ea565eeaa/torch-2.10.0-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:6d3707a61863d1c4d6ebba7be4ca320f42b869ee657e9b2c21c736bf17000294", size = 79498198, upload-time = "2026-01-21T16:24:34.704Z" },
+]
+
+[[package]]
+name = "torch-c-dlpack-ext"
+version = "0.1.5"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "torch" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/37/de/921b6491efce5c389a5ef9bbed3d2d6660005840dae488124173180859ab/torch_c_dlpack_ext-0.1.5.tar.gz", hash = "sha256:d06f0357d575d22a168cc77acb9020fc4bae30968ceb6718a055dcbe92bacabe", size = 12913, upload-time = "2026-01-12T11:25:08.484Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/b1/67/10d236698525d7b7db4d74ec0a4b01f5b2db33968995fdd9ac6b4635e327/torch_c_dlpack_ext-0.1.5-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:c0f2bd51fcd99c0e5b50314e1985f2728c4941bfa821f065e6c30951d1f995ca", size = 5291237, upload-time = "2026-01-12T11:24:44.011Z" },
+ { url = "https://files.pythonhosted.org/packages/87/06/8d760997307a5c3be4384424667bf31aae0a42060838c532c7d846516175/torch_c_dlpack_ext-0.1.5-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3562ee411258676f9c38b8ad39306d1c8d027b6a86f6a87c920d2d009a9d1510", size = 443069, upload-time = "2026-01-12T11:24:45.451Z" },
+ { url = "https://files.pythonhosted.org/packages/e2/79/a914539b4785f3e44f891aa012a886edb8bc10fe081c440981c57543ce21/torch_c_dlpack_ext-0.1.5-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e6f9da4bb9af70e27facc777458be62e10dbbbddda7672d16138db0553c5a524", size = 897846, upload-time = "2026-01-12T11:24:48.168Z" },
+ { url = "https://files.pythonhosted.org/packages/3a/e6/7d7a97a3953208d6d6ce749180c34d1dab48464ded9a76cecabe9d021ce6/torch_c_dlpack_ext-0.1.5-cp312-cp312-win_amd64.whl", hash = "sha256:670fbbab70123cc228bed41693a3720757af57a0ad22669063c9db25321e8f55", size = 1482855, upload-time = "2026-01-12T11:24:49.581Z" },
+]
+
+[[package]]
+name = "torchaudio"
+version = "2.10.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "torch" },
+]
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/0f/36/28a6f3e857616cf7576bdbf8170e483b8c5d0a1f8d349ecb2b75921236aa/torchaudio-2.10.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:9d0fbdbfd2f621c51d28571050d6d0c7287791034e5c7303b31480af1258f33f", size = 737144, upload-time = "2026-01-21T16:28:44.189Z" },
+ { url = "https://files.pythonhosted.org/packages/ea/3f/df620439a76ece170472d41438d11a1545d5db5dc9f1eaeab8c6e055a328/torchaudio-2.10.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:42b148a0921a3721abd1f6ae098b1ec9f89703e555c4f7a0d44da87b8decbcb9", size = 391973, upload-time = "2026-01-21T16:28:39.732Z" },
+ { url = "https://files.pythonhosted.org/packages/98/25/e55a30d7138f8fe56ed006df25b0a3c27681f0ec7bc9989e1778e6d559c3/torchaudio-2.10.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:0e77b2956448d63790a99beed0b74ac8b8cd3a94dcdd9ad01974411078f46278", size = 1895234, upload-time = "2026-01-21T16:28:37.034Z" },
+ { url = "https://files.pythonhosted.org/packages/be/a0/da53c7d20fac15f66f8838653b91162de1bf21fb40fee88cf839e4ef5174/torchaudio-2.10.0-cp312-cp312-win_amd64.whl", hash = "sha256:7f76a01ecebf1869e1f2c50a261f1cf07e5fccb24402b4e9bbb82d6725b9c7dd", size = 475470, upload-time = "2026-01-21T16:28:40.615Z" },
+]
+
+[[package]]
+name = "torchvision"
+version = "0.25.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "numpy" },
+ { name = "pillow" },
+ { name = "torch" },
+]
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/56/3a/6ea0d73f49a9bef38a1b3a92e8dd455cea58470985d25635beab93841748/torchvision-0.25.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:c2abe430c90b1d5e552680037d68da4eb80a5852ebb1c811b2b89d299b10573b", size = 1874920, upload-time = "2026-01-21T16:27:45.348Z" },
+ { url = "https://files.pythonhosted.org/packages/51/f8/c0e1ef27c66e15406fece94930e7d6feee4cb6374bbc02d945a630d6426e/torchvision-0.25.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:b75deafa2dfea3e2c2a525559b04783515e3463f6e830cb71de0fb7ea36fe233", size = 2344556, upload-time = "2026-01-21T16:27:40.125Z" },
+ { url = "https://files.pythonhosted.org/packages/68/2f/f24b039169db474e8688f649377de082a965fbf85daf4e46c44412f1d15a/torchvision-0.25.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:f25aa9e380865b11ea6e9d99d84df86b9cc959f1a007cd966fc6f1ab2ed0e248", size = 8072351, upload-time = "2026-01-21T16:27:21.074Z" },
+ { url = "https://files.pythonhosted.org/packages/ad/16/8f650c2e288977cf0f8f85184b90ee56ed170a4919347fc74ee99286ed6f/torchvision-0.25.0-cp312-cp312-win_amd64.whl", hash = "sha256:f9c55ae8d673ab493325d1267cbd285bb94d56f99626c00ac4644de32a59ede3", size = 4303059, upload-time = "2026-01-21T16:27:11.08Z" },
+]
+
+[[package]]
+name = "tqdm"
+version = "4.67.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "colorama", marker = "sys_platform == 'win32'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/09/a9/6ba95a270c6f1fbcd8dac228323f2777d886cb206987444e4bce66338dd4/tqdm-4.67.3.tar.gz", hash = "sha256:7d825f03f89244ef73f1d4ce193cb1774a8179fd96f31d7e1dcde62092b960bb", size = 169598, upload-time = "2026-02-03T17:35:53.048Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/16/e1/3079a9ff9b8e11b846c6ac5c8b5bfb7ff225eee721825310c91b3b50304f/tqdm-4.67.3-py3-none-any.whl", hash = "sha256:ee1e4c0e59148062281c49d80b25b67771a127c85fc9676d3be5f243206826bf", size = 78374, upload-time = "2026-02-03T17:35:50.982Z" },
+]
+
+[[package]]
+name = "transformers"
+version = "4.57.6"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "filelock" },
+ { name = "huggingface-hub" },
+ { name = "numpy" },
+ { name = "packaging" },
+ { name = "pyyaml" },
+ { name = "regex" },
+ { name = "requests" },
+ { name = "safetensors" },
+ { name = "tokenizers" },
+ { name = "tqdm" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/c4/35/67252acc1b929dc88b6602e8c4a982e64f31e733b804c14bc24b47da35e6/transformers-4.57.6.tar.gz", hash = "sha256:55e44126ece9dc0a291521b7e5492b572e6ef2766338a610b9ab5afbb70689d3", size = 10134912, upload-time = "2026-01-16T10:38:39.284Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/03/b8/e484ef633af3887baeeb4b6ad12743363af7cce68ae51e938e00aaa0529d/transformers-4.57.6-py3-none-any.whl", hash = "sha256:4c9e9de11333ddfe5114bc872c9f370509198acf0b87a832a0ab9458e2bd0550", size = 11993498, upload-time = "2026-01-16T10:38:31.289Z" },
+]
+
+[[package]]
+name = "triton"
+version = "3.6.0"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/ab/a8/cdf8b3e4c98132f965f88c2313a4b493266832ad47fb52f23d14d4f86bb5/triton-3.6.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:74caf5e34b66d9f3a429af689c1c7128daba1d8208df60e81106b115c00d6fca", size = 188266850, upload-time = "2026-01-20T16:00:43.041Z" },
+]
+
+[[package]]
+name = "typer"
+version = "0.24.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "annotated-doc" },
+ { name = "click" },
+ { name = "rich" },
+ { name = "shellingham" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/f5/24/cb09efec5cc954f7f9b930bf8279447d24618bb6758d4f6adf2574c41780/typer-0.24.1.tar.gz", hash = "sha256:e39b4732d65fbdcde189ae76cf7cd48aeae72919dea1fdfc16593be016256b45", size = 118613, upload-time = "2026-02-21T16:54:40.609Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/4a/91/48db081e7a63bb37284f9fbcefda7c44c277b18b0e13fbc36ea2335b71e6/typer-0.24.1-py3-none-any.whl", hash = "sha256:112c1f0ce578bfb4cab9ffdabc68f031416ebcc216536611ba21f04e9aa84c9e", size = 56085, upload-time = "2026-02-21T16:54:41.616Z" },
+]
+
+[[package]]
+name = "typing-extensions"
+version = "4.15.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/72/94/1a15dd82efb362ac84269196e94cf00f187f7ed21c242792a923cdb1c61f/typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466", size = 109391, upload-time = "2025-08-25T13:49:26.313Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/18/67/36e9267722cc04a6b9f15c7f3441c2363321a3ea07da7ae0c0707beb2a9c/typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548", size = 44614, upload-time = "2025-08-25T13:49:24.86Z" },
+]
+
+[[package]]
+name = "typing-inspection"
+version = "0.4.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/55/e3/70399cb7dd41c10ac53367ae42139cf4b1ca5f36bb3dc6c9d33acdb43655/typing_inspection-0.4.2.tar.gz", hash = "sha256:ba561c48a67c5958007083d386c3295464928b01faa735ab8547c5692e87f464", size = 75949, upload-time = "2025-10-01T02:14:41.687Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/dc/9b/47798a6c91d8bdb567fe2698fe81e0c6b7cb7ef4d13da4114b41d239f65d/typing_inspection-0.4.2-py3-none-any.whl", hash = "sha256:4ed1cacbdc298c220f1bd249ed5287caa16f34d44ef4e9c3d0cbad5b521545e7", size = 14611, upload-time = "2025-10-01T02:14:40.154Z" },
+]
+
+[[package]]
+name = "tzdata"
+version = "2025.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/5e/a7/c202b344c5ca7daf398f3b8a477eeb205cf3b6f32e7ec3a6bac0629ca975/tzdata-2025.3.tar.gz", hash = "sha256:de39c2ca5dc7b0344f2eba86f49d614019d29f060fc4ebc8a417896a620b56a7", size = 196772, upload-time = "2025-12-13T17:45:35.667Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/c7/b0/003792df09decd6849a5e39c28b513c06e84436a54440380862b5aeff25d/tzdata-2025.3-py2.py3-none-any.whl", hash = "sha256:06a47e5700f3081aab02b2e513160914ff0694bce9947d6b76ebd6bf57cfc5d1", size = 348521, upload-time = "2025-12-13T17:45:33.889Z" },
+]
+
+[[package]]
+name = "urllib3"
+version = "2.6.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/c7/24/5f1b3bdffd70275f6661c76461e25f024d5a38a46f04aaca912426a2b1d3/urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed", size = 435556, upload-time = "2026-01-07T16:24:43.925Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/39/08/aaaad47bc4e9dc8c725e68f9d04865dbcb2052843ff09c97b08904852d84/urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4", size = 131584, upload-time = "2026-01-07T16:24:42.685Z" },
+]
+
+[[package]]
+name = "uvicorn"
+version = "0.41.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "click" },
+ { name = "h11" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/32/ce/eeb58ae4ac36fe09e3842eb02e0eb676bf2c53ae062b98f1b2531673efdd/uvicorn-0.41.0.tar.gz", hash = "sha256:09d11cf7008da33113824ee5a1c6422d89fbc2ff476540d69a34c87fab8b571a", size = 82633, upload-time = "2026-02-16T23:07:24.1Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/83/e4/d04a086285c20886c0daad0e026f250869201013d18f81d9ff5eada73a88/uvicorn-0.41.0-py3-none-any.whl", hash = "sha256:29e35b1d2c36a04b9e180d4007ede3bcb32a85fbdfd6c6aeb3f26839de088187", size = 68783, upload-time = "2026-02-16T23:07:22.357Z" },
+]
+
+[package.optional-dependencies]
+standard = [
+ { name = "colorama", marker = "sys_platform == 'win32'" },
+ { name = "httptools" },
+ { name = "python-dotenv" },
+ { name = "pyyaml" },
+ { name = "uvloop", marker = "platform_python_implementation != 'PyPy' and sys_platform != 'cygwin' and sys_platform != 'win32'" },
+ { name = "watchfiles" },
+ { name = "websockets" },
+]
+
+[[package]]
+name = "uvloop"
+version = "0.22.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/06/f0/18d39dbd1971d6d62c4629cc7fa67f74821b0dc1f5a77af43719de7936a7/uvloop-0.22.1.tar.gz", hash = "sha256:6c84bae345b9147082b17371e3dd5d42775bddce91f885499017f4607fdaf39f", size = 2443250, upload-time = "2025-10-16T22:17:19.342Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/3d/ff/7f72e8170be527b4977b033239a83a68d5c881cc4775fca255c677f7ac5d/uvloop-0.22.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:fe94b4564e865d968414598eea1a6de60adba0c040ba4ed05ac1300de402cd42", size = 1359936, upload-time = "2025-10-16T22:16:29.436Z" },
+ { url = "https://files.pythonhosted.org/packages/c3/c6/e5d433f88fd54d81ef4be58b2b7b0cea13c442454a1db703a1eea0db1a59/uvloop-0.22.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:51eb9bd88391483410daad430813d982010f9c9c89512321f5b60e2cddbdddd6", size = 752769, upload-time = "2025-10-16T22:16:30.493Z" },
+ { url = "https://files.pythonhosted.org/packages/24/68/a6ac446820273e71aa762fa21cdcc09861edd3536ff47c5cd3b7afb10eeb/uvloop-0.22.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:700e674a166ca5778255e0e1dc4e9d79ab2acc57b9171b79e65feba7184b3370", size = 4317413, upload-time = "2025-10-16T22:16:31.644Z" },
+ { url = "https://files.pythonhosted.org/packages/5f/6f/e62b4dfc7ad6518e7eff2516f680d02a0f6eb62c0c212e152ca708a0085e/uvloop-0.22.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7b5b1ac819a3f946d3b2ee07f09149578ae76066d70b44df3fa990add49a82e4", size = 4426307, upload-time = "2025-10-16T22:16:32.917Z" },
+ { url = "https://files.pythonhosted.org/packages/90/60/97362554ac21e20e81bcef1150cb2a7e4ffdaf8ea1e5b2e8bf7a053caa18/uvloop-0.22.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e047cc068570bac9866237739607d1313b9253c3051ad84738cbb095be0537b2", size = 4131970, upload-time = "2025-10-16T22:16:34.015Z" },
+ { url = "https://files.pythonhosted.org/packages/99/39/6b3f7d234ba3964c428a6e40006340f53ba37993f46ed6e111c6e9141d18/uvloop-0.22.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:512fec6815e2dd45161054592441ef76c830eddaad55c8aa30952e6fe1ed07c0", size = 4296343, upload-time = "2025-10-16T22:16:35.149Z" },
+]
+
+[[package]]
+name = "vllm"
+version = "0.17.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "aiohttp" },
+ { name = "anthropic" },
+ { name = "blake3" },
+ { name = "cachetools" },
+ { name = "cbor2" },
+ { name = "cloudpickle" },
+ { name = "compressed-tensors" },
+ { name = "depyf" },
+ { name = "diskcache" },
+ { name = "einops" },
+ { name = "fastapi", extra = ["standard"] },
+ { name = "filelock" },
+ { name = "flashinfer-python" },
+ { name = "gguf" },
+ { name = "grpcio" },
+ { name = "grpcio-reflection" },
+ { name = "ijson" },
+ { name = "kaldi-native-fbank" },
+ { name = "lark" },
+ { name = "llguidance", marker = "platform_machine == 'aarch64' or platform_machine == 'arm64' or platform_machine == 'ppc64le' or platform_machine == 's390x' or platform_machine == 'x86_64'" },
+ { name = "lm-format-enforcer" },
+ { name = "mcp" },
+ { name = "mistral-common", extra = ["image"] },
+ { name = "model-hosting-container-standards" },
+ { name = "msgspec" },
+ { name = "ninja" },
+ { name = "numba" },
+ { name = "numpy" },
+ { name = "nvidia-cutlass-dsl" },
+ { name = "openai" },
+ { name = "openai-harmony" },
+ { name = "opencv-python-headless" },
+ { name = "opentelemetry-api" },
+ { name = "opentelemetry-exporter-otlp" },
+ { name = "opentelemetry-sdk" },
+ { name = "opentelemetry-semantic-conventions-ai" },
+ { name = "outlines-core" },
+ { name = "partial-json-parser" },
+ { name = "pillow" },
+ { name = "prometheus-client" },
+ { name = "prometheus-fastapi-instrumentator" },
+ { name = "protobuf" },
+ { name = "psutil" },
+ { name = "py-cpuinfo" },
+ { name = "pybase64" },
+ { name = "pydantic" },
+ { name = "python-json-logger" },
+ { name = "pyyaml" },
+ { name = "pyzmq" },
+ { name = "quack-kernels" },
+ { name = "ray", extra = ["cgraph"] },
+ { name = "regex" },
+ { name = "requests" },
+ { name = "sentencepiece" },
+ { name = "setproctitle" },
+ { name = "setuptools" },
+ { name = "six" },
+ { name = "tiktoken" },
+ { name = "tokenizers" },
+ { name = "torch" },
+ { name = "torchaudio" },
+ { name = "torchvision" },
+ { name = "tqdm" },
+ { name = "transformers" },
+ { name = "typing-extensions" },
+ { name = "watchfiles" },
+ { name = "xgrammar", marker = "platform_machine == 'aarch64' or platform_machine == 'arm64' or platform_machine == 'ppc64le' or platform_machine == 's390x' or platform_machine == 'x86_64'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/13/d5/af83a4262ca4d5692a93b3c322ae954e3e6c4e23f8f9db3ab87bd79c919e/vllm-0.17.0.tar.gz", hash = "sha256:b0b62e58ef4eb633ef371f2726976372cf6dfcb7ff2ea9ddf7194c1930d5629a", size = 30541311, upload-time = "2026-03-07T03:54:54.333Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/f2/72/78a48668f2631def18bbaaa331d7878bcfc5c3137455422aafb0748e1261/vllm-0.17.0-cp38-abi3-manylinux_2_31_aarch64.whl", hash = "sha256:310fb82fe061ed75dceeb4aeb803cd8ee0d590337ec720f7abfb03a69314d710", size = 385329399, upload-time = "2026-03-07T03:54:34.261Z" },
+ { url = "https://files.pythonhosted.org/packages/25/4f/972726f9a501f01203b5c4796e1932abbe435fae6d7715a4c3f1aad14a58/vllm-0.17.0-cp38-abi3-manylinux_2_31_x86_64.whl", hash = "sha256:0296670a09d392ee43455d9bebf590d05a9bc2ebce5e25e2919222fc815158da", size = 432927988, upload-time = "2026-03-07T03:54:02.312Z" },
+]
+
+[[package]]
+name = "watchfiles"
+version = "1.1.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "anyio" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/c2/c9/8869df9b2a2d6c59d79220a4db37679e74f807c559ffe5265e08b227a210/watchfiles-1.1.1.tar.gz", hash = "sha256:a173cb5c16c4f40ab19cecf48a534c409f7ea983ab8fed0741304a1c0a31b3f2", size = 94440, upload-time = "2025-10-14T15:06:21.08Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/74/d5/f039e7e3c639d9b1d09b07ea412a6806d38123f0508e5f9b48a87b0a76cc/watchfiles-1.1.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:8c89f9f2f740a6b7dcc753140dd5e1ab9215966f7a3530d0c0705c83b401bd7d", size = 404745, upload-time = "2025-10-14T15:04:46.731Z" },
+ { url = "https://files.pythonhosted.org/packages/a5/96/a881a13aa1349827490dab2d363c8039527060cfcc2c92cc6d13d1b1049e/watchfiles-1.1.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:bd404be08018c37350f0d6e34676bd1e2889990117a2b90070b3007f172d0610", size = 391769, upload-time = "2025-10-14T15:04:48.003Z" },
+ { url = "https://files.pythonhosted.org/packages/4b/5b/d3b460364aeb8da471c1989238ea0e56bec24b6042a68046adf3d9ddb01c/watchfiles-1.1.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8526e8f916bb5b9a0a777c8317c23ce65de259422bba5b31325a6fa6029d33af", size = 449374, upload-time = "2025-10-14T15:04:49.179Z" },
+ { url = "https://files.pythonhosted.org/packages/b9/44/5769cb62d4ed055cb17417c0a109a92f007114a4e07f30812a73a4efdb11/watchfiles-1.1.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2edc3553362b1c38d9f06242416a5d8e9fe235c204a4072e988ce2e5bb1f69f6", size = 459485, upload-time = "2025-10-14T15:04:50.155Z" },
+ { url = "https://files.pythonhosted.org/packages/19/0c/286b6301ded2eccd4ffd0041a1b726afda999926cf720aab63adb68a1e36/watchfiles-1.1.1-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:30f7da3fb3f2844259cba4720c3fc7138eb0f7b659c38f3bfa65084c7fc7abce", size = 488813, upload-time = "2025-10-14T15:04:51.059Z" },
+ { url = "https://files.pythonhosted.org/packages/c7/2b/8530ed41112dd4a22f4dcfdb5ccf6a1baad1ff6eed8dc5a5f09e7e8c41c7/watchfiles-1.1.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f8979280bdafff686ba5e4d8f97840f929a87ed9cdf133cbbd42f7766774d2aa", size = 594816, upload-time = "2025-10-14T15:04:52.031Z" },
+ { url = "https://files.pythonhosted.org/packages/ce/d2/f5f9fb49489f184f18470d4f99f4e862a4b3e9ac2865688eb2099e3d837a/watchfiles-1.1.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dcc5c24523771db3a294c77d94771abcfcb82a0e0ee8efd910c37c59ec1b31bb", size = 475186, upload-time = "2025-10-14T15:04:53.064Z" },
+ { url = "https://files.pythonhosted.org/packages/cf/68/5707da262a119fb06fbe214d82dd1fe4a6f4af32d2d14de368d0349eb52a/watchfiles-1.1.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1db5d7ae38ff20153d542460752ff397fcf5c96090c1230803713cf3147a6803", size = 456812, upload-time = "2025-10-14T15:04:55.174Z" },
+ { url = "https://files.pythonhosted.org/packages/66/ab/3cbb8756323e8f9b6f9acb9ef4ec26d42b2109bce830cc1f3468df20511d/watchfiles-1.1.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:28475ddbde92df1874b6c5c8aaeb24ad5be47a11f87cde5a28ef3835932e3e94", size = 630196, upload-time = "2025-10-14T15:04:56.22Z" },
+ { url = "https://files.pythonhosted.org/packages/78/46/7152ec29b8335f80167928944a94955015a345440f524d2dfe63fc2f437b/watchfiles-1.1.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:36193ed342f5b9842edd3532729a2ad55c4160ffcfa3700e0d54be496b70dd43", size = 622657, upload-time = "2025-10-14T15:04:57.521Z" },
+ { url = "https://files.pythonhosted.org/packages/0a/bf/95895e78dd75efe9a7f31733607f384b42eb5feb54bd2eb6ed57cc2e94f4/watchfiles-1.1.1-cp312-cp312-win32.whl", hash = "sha256:859e43a1951717cc8de7f4c77674a6d389b106361585951d9e69572823f311d9", size = 272042, upload-time = "2025-10-14T15:04:59.046Z" },
+ { url = "https://files.pythonhosted.org/packages/87/0a/90eb755f568de2688cb220171c4191df932232c20946966c27a59c400850/watchfiles-1.1.1-cp312-cp312-win_amd64.whl", hash = "sha256:91d4c9a823a8c987cce8fa2690923b069966dabb196dd8d137ea2cede885fde9", size = 288410, upload-time = "2025-10-14T15:05:00.081Z" },
+ { url = "https://files.pythonhosted.org/packages/36/76/f322701530586922fbd6723c4f91ace21364924822a8772c549483abed13/watchfiles-1.1.1-cp312-cp312-win_arm64.whl", hash = "sha256:a625815d4a2bdca61953dbba5a39d60164451ef34c88d751f6c368c3ea73d404", size = 278209, upload-time = "2025-10-14T15:05:01.168Z" },
+]
+
+[[package]]
+name = "websockets"
+version = "16.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/04/24/4b2031d72e840ce4c1ccb255f693b15c334757fc50023e4db9537080b8c4/websockets-16.0.tar.gz", hash = "sha256:5f6261a5e56e8d5c42a4497b364ea24d94d9563e8fbd44e78ac40879c60179b5", size = 179346, upload-time = "2026-01-10T09:23:47.181Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/84/7b/bac442e6b96c9d25092695578dda82403c77936104b5682307bd4deb1ad4/websockets-16.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:71c989cbf3254fbd5e84d3bff31e4da39c43f884e64f2551d14bb3c186230f00", size = 177365, upload-time = "2026-01-10T09:22:46.787Z" },
+ { url = "https://files.pythonhosted.org/packages/b0/fe/136ccece61bd690d9c1f715baaeefd953bb2360134de73519d5df19d29ca/websockets-16.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:8b6e209ffee39ff1b6d0fa7bfef6de950c60dfb91b8fcead17da4ee539121a79", size = 175038, upload-time = "2026-01-10T09:22:47.999Z" },
+ { url = "https://files.pythonhosted.org/packages/40/1e/9771421ac2286eaab95b8575b0cb701ae3663abf8b5e1f64f1fd90d0a673/websockets-16.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:86890e837d61574c92a97496d590968b23c2ef0aeb8a9bc9421d174cd378ae39", size = 175328, upload-time = "2026-01-10T09:22:49.809Z" },
+ { url = "https://files.pythonhosted.org/packages/18/29/71729b4671f21e1eaa5d6573031ab810ad2936c8175f03f97f3ff164c802/websockets-16.0-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:9b5aca38b67492ef518a8ab76851862488a478602229112c4b0d58d63a7a4d5c", size = 184915, upload-time = "2026-01-10T09:22:51.071Z" },
+ { url = "https://files.pythonhosted.org/packages/97/bb/21c36b7dbbafc85d2d480cd65df02a1dc93bf76d97147605a8e27ff9409d/websockets-16.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e0334872c0a37b606418ac52f6ab9cfd17317ac26365f7f65e203e2d0d0d359f", size = 186152, upload-time = "2026-01-10T09:22:52.224Z" },
+ { url = "https://files.pythonhosted.org/packages/4a/34/9bf8df0c0cf88fa7bfe36678dc7b02970c9a7d5e065a3099292db87b1be2/websockets-16.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a0b31e0b424cc6b5a04b8838bbaec1688834b2383256688cf47eb97412531da1", size = 185583, upload-time = "2026-01-10T09:22:53.443Z" },
+ { url = "https://files.pythonhosted.org/packages/47/88/4dd516068e1a3d6ab3c7c183288404cd424a9a02d585efbac226cb61ff2d/websockets-16.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:485c49116d0af10ac698623c513c1cc01c9446c058a4e61e3bf6c19dff7335a2", size = 184880, upload-time = "2026-01-10T09:22:55.033Z" },
+ { url = "https://files.pythonhosted.org/packages/91/d6/7d4553ad4bf1c0421e1ebd4b18de5d9098383b5caa1d937b63df8d04b565/websockets-16.0-cp312-cp312-win32.whl", hash = "sha256:eaded469f5e5b7294e2bdca0ab06becb6756ea86894a47806456089298813c89", size = 178261, upload-time = "2026-01-10T09:22:56.251Z" },
+ { url = "https://files.pythonhosted.org/packages/c3/f0/f3a17365441ed1c27f850a80b2bc680a0fa9505d733fe152fdf5e98c1c0b/websockets-16.0-cp312-cp312-win_amd64.whl", hash = "sha256:5569417dc80977fc8c2d43a86f78e0a5a22fee17565d78621b6bb264a115d4ea", size = 178693, upload-time = "2026-01-10T09:22:57.478Z" },
+ { url = "https://files.pythonhosted.org/packages/6f/28/258ebab549c2bf3e64d2b0217b973467394a9cea8c42f70418ca2c5d0d2e/websockets-16.0-py3-none-any.whl", hash = "sha256:1637db62fad1dc833276dded54215f2c7fa46912301a24bd94d45d46a011ceec", size = 171598, upload-time = "2026-01-10T09:23:45.395Z" },
+]
+
+[[package]]
+name = "win32-setctime"
+version = "1.2.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/b3/8f/705086c9d734d3b663af0e9bb3d4de6578d08f46b1b101c2442fd9aecaa2/win32_setctime-1.2.0.tar.gz", hash = "sha256:ae1fdf948f5640aae05c511ade119313fb6a30d7eabe25fef9764dca5873c4c0", size = 4867, upload-time = "2024-12-07T15:28:28.314Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/e1/07/c6fe3ad3e685340704d314d765b7912993bcb8dc198f0e7a89382d37974b/win32_setctime-1.2.0-py3-none-any.whl", hash = "sha256:95d644c4e708aba81dc3704a116d8cbc974d70b3bdb8be1d150e36be6e9d1390", size = 4083, upload-time = "2024-12-07T15:28:26.465Z" },
+]
+
+[[package]]
+name = "xgrammar"
+version = "0.1.29"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "mlx-lm", marker = "platform_machine == 'arm64' and sys_platform == 'darwin'" },
+ { name = "numpy" },
+ { name = "pydantic" },
+ { name = "torch" },
+ { name = "transformers" },
+ { name = "triton", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+ { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/02/a3/70dbe3ffd331a1e7e1ad5a95690a4086e6c7cdb8089f5c7eda712219ccec/xgrammar-0.1.29.tar.gz", hash = "sha256:cf195afa81b489eebf35d4c6f37f27136d05420739ab4a6f7f065c938d7e4baa", size = 2321317, upload-time = "2025-12-19T08:23:54.53Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/c1/d8/fb282fc78be6e9bbefb5cb389f66b22e4efd6ae14f06234f599651620da5/xgrammar-0.1.29-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:d992a3cee7594bbdaa64ae59f90da5ce21c5fe654719df3816014289ada6f04d", size = 16007376, upload-time = "2025-12-19T08:23:23.634Z" },
+ { url = "https://files.pythonhosted.org/packages/82/a7/2c9767620ee50f2f40f1eb95e55a3a29e1a0670f087ee6dc1bc1c887b906/xgrammar-0.1.29-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1bbdf02e45cfa8614218ba01ca7952d375f8bc1c13884e3d04daa4b54180cbc2", size = 17913535, upload-time = "2025-12-19T08:23:26.02Z" },
+ { url = "https://files.pythonhosted.org/packages/57/94/18793c64bf0368075a34c06e196bf002f1e6ab0aee332268f44e8d356d5a/xgrammar-0.1.29-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6eb370a16b27a683e5f2b9e429ab41440c69977d4a504849ed61831b94cc704c", size = 34705239, upload-time = "2025-12-19T08:23:28.369Z" },
+ { url = "https://files.pythonhosted.org/packages/3e/da/4c14e3e00be698009b52700f15326a23272b4b00475939b6acc86b151188/xgrammar-0.1.29-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:79e6e4f5cd33be77418cf91efc482f2b3d773d309891224383bc8a4948ad7b07", size = 34906135, upload-time = "2025-12-19T08:23:30.838Z" },
+ { url = "https://files.pythonhosted.org/packages/22/d8/34423997f48627cef3b74cc894d9dfcaacae02941c06237ac5f3196406a7/xgrammar-0.1.29-cp312-cp312-win_amd64.whl", hash = "sha256:39bdfadedbce34599835486164fa80ba00248c6c75ad91f3843db90ef37e037f", size = 5928381, upload-time = "2025-12-19T08:23:33.428Z" },
+]
+
+[[package]]
+name = "xxhash"
+version = "3.6.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/02/84/30869e01909fb37a6cc7e18688ee8bf1e42d57e7e0777636bd47524c43c7/xxhash-3.6.0.tar.gz", hash = "sha256:f0162a78b13a0d7617b2845b90c763339d1f1d82bb04a4b07f4ab535cc5e05d6", size = 85160, upload-time = "2025-10-02T14:37:08.097Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/9a/07/d9412f3d7d462347e4511181dea65e47e0d0e16e26fbee2ea86a2aefb657/xxhash-3.6.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:01362c4331775398e7bb34e3ab403bc9ee9f7c497bc7dee6272114055277dd3c", size = 32744, upload-time = "2025-10-02T14:34:34.622Z" },
+ { url = "https://files.pythonhosted.org/packages/79/35/0429ee11d035fc33abe32dca1b2b69e8c18d236547b9a9b72c1929189b9a/xxhash-3.6.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b7b2df81a23f8cb99656378e72501b2cb41b1827c0f5a86f87d6b06b69f9f204", size = 30816, upload-time = "2025-10-02T14:34:36.043Z" },
+ { url = "https://files.pythonhosted.org/packages/b7/f2/57eb99aa0f7d98624c0932c5b9a170e1806406cdbcdb510546634a1359e0/xxhash-3.6.0-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:dc94790144e66b14f67b10ac8ed75b39ca47536bf8800eb7c24b50271ea0c490", size = 194035, upload-time = "2025-10-02T14:34:37.354Z" },
+ { url = "https://files.pythonhosted.org/packages/4c/ed/6224ba353690d73af7a3f1c7cdb1fc1b002e38f783cb991ae338e1eb3d79/xxhash-3.6.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:93f107c673bccf0d592cdba077dedaf52fe7f42dcd7676eba1f6d6f0c3efffd2", size = 212914, upload-time = "2025-10-02T14:34:38.6Z" },
+ { url = "https://files.pythonhosted.org/packages/38/86/fb6b6130d8dd6b8942cc17ab4d90e223653a89aa32ad2776f8af7064ed13/xxhash-3.6.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2aa5ee3444c25b69813663c9f8067dcfaa2e126dc55e8dddf40f4d1c25d7effa", size = 212163, upload-time = "2025-10-02T14:34:39.872Z" },
+ { url = "https://files.pythonhosted.org/packages/ee/dc/e84875682b0593e884ad73b2d40767b5790d417bde603cceb6878901d647/xxhash-3.6.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:f7f99123f0e1194fa59cc69ad46dbae2e07becec5df50a0509a808f90a0f03f0", size = 445411, upload-time = "2025-10-02T14:34:41.569Z" },
+ { url = "https://files.pythonhosted.org/packages/11/4f/426f91b96701ec2f37bb2b8cec664eff4f658a11f3fa9d94f0a887ea6d2b/xxhash-3.6.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:49e03e6fe2cac4a1bc64952dd250cf0dbc5ef4ebb7b8d96bce82e2de163c82a2", size = 193883, upload-time = "2025-10-02T14:34:43.249Z" },
+ { url = "https://files.pythonhosted.org/packages/53/5a/ddbb83eee8e28b778eacfc5a85c969673e4023cdeedcfcef61f36731610b/xxhash-3.6.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:bd17fede52a17a4f9a7bc4472a5867cb0b160deeb431795c0e4abe158bc784e9", size = 210392, upload-time = "2025-10-02T14:34:45.042Z" },
+ { url = "https://files.pythonhosted.org/packages/1e/c2/ff69efd07c8c074ccdf0a4f36fcdd3d27363665bcdf4ba399abebe643465/xxhash-3.6.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:6fb5f5476bef678f69db04f2bd1efbed3030d2aba305b0fc1773645f187d6a4e", size = 197898, upload-time = "2025-10-02T14:34:46.302Z" },
+ { url = "https://files.pythonhosted.org/packages/58/ca/faa05ac19b3b622c7c9317ac3e23954187516298a091eb02c976d0d3dd45/xxhash-3.6.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:843b52f6d88071f87eba1631b684fcb4b2068cd2180a0224122fe4ef011a9374", size = 210655, upload-time = "2025-10-02T14:34:47.571Z" },
+ { url = "https://files.pythonhosted.org/packages/d4/7a/06aa7482345480cc0cb597f5c875b11a82c3953f534394f620b0be2f700c/xxhash-3.6.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:7d14a6cfaf03b1b6f5f9790f76880601ccc7896aff7ab9cd8978a939c1eb7e0d", size = 414001, upload-time = "2025-10-02T14:34:49.273Z" },
+ { url = "https://files.pythonhosted.org/packages/23/07/63ffb386cd47029aa2916b3d2f454e6cc5b9f5c5ada3790377d5430084e7/xxhash-3.6.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:418daf3db71e1413cfe211c2f9a528456936645c17f46b5204705581a45390ae", size = 191431, upload-time = "2025-10-02T14:34:50.798Z" },
+ { url = "https://files.pythonhosted.org/packages/0f/93/14fde614cadb4ddf5e7cebf8918b7e8fac5ae7861c1875964f17e678205c/xxhash-3.6.0-cp312-cp312-win32.whl", hash = "sha256:50fc255f39428a27299c20e280d6193d8b63b8ef8028995323bf834a026b4fbb", size = 30617, upload-time = "2025-10-02T14:34:51.954Z" },
+ { url = "https://files.pythonhosted.org/packages/13/5d/0d125536cbe7565a83d06e43783389ecae0c0f2ed037b48ede185de477c0/xxhash-3.6.0-cp312-cp312-win_amd64.whl", hash = "sha256:c0f2ab8c715630565ab8991b536ecded9416d615538be8ecddce43ccf26cbc7c", size = 31534, upload-time = "2025-10-02T14:34:53.276Z" },
+ { url = "https://files.pythonhosted.org/packages/54/85/6ec269b0952ec7e36ba019125982cf11d91256a778c7c3f98a4c5043d283/xxhash-3.6.0-cp312-cp312-win_arm64.whl", hash = "sha256:eae5c13f3bc455a3bbb68bdc513912dc7356de7e2280363ea235f71f54064829", size = 27876, upload-time = "2025-10-02T14:34:54.371Z" },
+]
+
+[[package]]
+name = "yarl"
+version = "1.22.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "idna" },
+ { name = "multidict" },
+ { name = "propcache" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/57/63/0c6ebca57330cd313f6102b16dd57ffaf3ec4c83403dcb45dbd15c6f3ea1/yarl-1.22.0.tar.gz", hash = "sha256:bebf8557577d4401ba8bd9ff33906f1376c877aa78d1fe216ad01b4d6745af71", size = 187169, upload-time = "2025-10-06T14:12:55.963Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/75/ff/46736024fee3429b80a165a732e38e5d5a238721e634ab41b040d49f8738/yarl-1.22.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:e340382d1afa5d32b892b3ff062436d592ec3d692aeea3bef3a5cfe11bbf8c6f", size = 142000, upload-time = "2025-10-06T14:09:44.631Z" },
+ { url = "https://files.pythonhosted.org/packages/5a/9a/b312ed670df903145598914770eb12de1bac44599549b3360acc96878df8/yarl-1.22.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:f1e09112a2c31ffe8d80be1b0988fa6a18c5d5cad92a9ffbb1c04c91bfe52ad2", size = 94338, upload-time = "2025-10-06T14:09:46.372Z" },
+ { url = "https://files.pythonhosted.org/packages/ba/f5/0601483296f09c3c65e303d60c070a5c19fcdbc72daa061e96170785bc7d/yarl-1.22.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:939fe60db294c786f6b7c2d2e121576628468f65453d86b0fe36cb52f987bd74", size = 94909, upload-time = "2025-10-06T14:09:48.648Z" },
+ { url = "https://files.pythonhosted.org/packages/60/41/9a1fe0b73dbcefce72e46cf149b0e0a67612d60bfc90fb59c2b2efdfbd86/yarl-1.22.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e1651bf8e0398574646744c1885a41198eba53dc8a9312b954073f845c90a8df", size = 372940, upload-time = "2025-10-06T14:09:50.089Z" },
+ { url = "https://files.pythonhosted.org/packages/17/7a/795cb6dfee561961c30b800f0ed616b923a2ec6258b5def2a00bf8231334/yarl-1.22.0-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:b8a0588521a26bf92a57a1705b77b8b59044cdceccac7151bd8d229e66b8dedb", size = 345825, upload-time = "2025-10-06T14:09:52.142Z" },
+ { url = "https://files.pythonhosted.org/packages/d7/93/a58f4d596d2be2ae7bab1a5846c4d270b894958845753b2c606d666744d3/yarl-1.22.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:42188e6a615c1a75bcaa6e150c3fe8f3e8680471a6b10150c5f7e83f47cc34d2", size = 386705, upload-time = "2025-10-06T14:09:54.128Z" },
+ { url = "https://files.pythonhosted.org/packages/61/92/682279d0e099d0e14d7fd2e176bd04f48de1484f56546a3e1313cd6c8e7c/yarl-1.22.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:f6d2cb59377d99718913ad9a151030d6f83ef420a2b8f521d94609ecc106ee82", size = 396518, upload-time = "2025-10-06T14:09:55.762Z" },
+ { url = "https://files.pythonhosted.org/packages/db/0f/0d52c98b8a885aeda831224b78f3be7ec2e1aa4a62091f9f9188c3c65b56/yarl-1.22.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:50678a3b71c751d58d7908edc96d332af328839eea883bb554a43f539101277a", size = 377267, upload-time = "2025-10-06T14:09:57.958Z" },
+ { url = "https://files.pythonhosted.org/packages/22/42/d2685e35908cbeaa6532c1fc73e89e7f2efb5d8a7df3959ea8e37177c5a3/yarl-1.22.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:1e8fbaa7cec507aa24ea27a01456e8dd4b6fab829059b69844bd348f2d467124", size = 365797, upload-time = "2025-10-06T14:09:59.527Z" },
+ { url = "https://files.pythonhosted.org/packages/a2/83/cf8c7bcc6355631762f7d8bdab920ad09b82efa6b722999dfb05afa6cfac/yarl-1.22.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:433885ab5431bc3d3d4f2f9bd15bfa1614c522b0f1405d62c4f926ccd69d04fa", size = 365535, upload-time = "2025-10-06T14:10:01.139Z" },
+ { url = "https://files.pythonhosted.org/packages/25/e1/5302ff9b28f0c59cac913b91fe3f16c59a033887e57ce9ca5d41a3a94737/yarl-1.22.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:b790b39c7e9a4192dc2e201a282109ed2985a1ddbd5ac08dc56d0e121400a8f7", size = 382324, upload-time = "2025-10-06T14:10:02.756Z" },
+ { url = "https://files.pythonhosted.org/packages/bf/cd/4617eb60f032f19ae3a688dc990d8f0d89ee0ea378b61cac81ede3e52fae/yarl-1.22.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:31f0b53913220599446872d757257be5898019c85e7971599065bc55065dc99d", size = 383803, upload-time = "2025-10-06T14:10:04.552Z" },
+ { url = "https://files.pythonhosted.org/packages/59/65/afc6e62bb506a319ea67b694551dab4a7e6fb7bf604e9bd9f3e11d575fec/yarl-1.22.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a49370e8f711daec68d09b821a34e1167792ee2d24d405cbc2387be4f158b520", size = 374220, upload-time = "2025-10-06T14:10:06.489Z" },
+ { url = "https://files.pythonhosted.org/packages/e7/3d/68bf18d50dc674b942daec86a9ba922d3113d8399b0e52b9897530442da2/yarl-1.22.0-cp312-cp312-win32.whl", hash = "sha256:70dfd4f241c04bd9239d53b17f11e6ab672b9f1420364af63e8531198e3f5fe8", size = 81589, upload-time = "2025-10-06T14:10:09.254Z" },
+ { url = "https://files.pythonhosted.org/packages/c8/9a/6ad1a9b37c2f72874f93e691b2e7ecb6137fb2b899983125db4204e47575/yarl-1.22.0-cp312-cp312-win_amd64.whl", hash = "sha256:8884d8b332a5e9b88e23f60bb166890009429391864c685e17bd73a9eda9105c", size = 87213, upload-time = "2025-10-06T14:10:11.369Z" },
+ { url = "https://files.pythonhosted.org/packages/44/c5/c21b562d1680a77634d748e30c653c3ca918beb35555cff24986fff54598/yarl-1.22.0-cp312-cp312-win_arm64.whl", hash = "sha256:ea70f61a47f3cc93bdf8b2f368ed359ef02a01ca6393916bc8ff877427181e74", size = 81330, upload-time = "2025-10-06T14:10:13.112Z" },
+ { url = "https://files.pythonhosted.org/packages/73/ae/b48f95715333080afb75a4504487cbe142cae1268afc482d06692d605ae6/yarl-1.22.0-py3-none-any.whl", hash = "sha256:1380560bdba02b6b6c90de54133c81c9f2a453dee9912fe58c1dcced1edb7cff", size = 46814, upload-time = "2025-10-06T14:12:53.872Z" },
+]
+
+[[package]]
+name = "zipp"
+version = "3.23.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/e3/02/0f2892c661036d50ede074e376733dca2ae7c6eb617489437771209d4180/zipp-3.23.0.tar.gz", hash = "sha256:a07157588a12518c9d4034df3fbbee09c814741a33ff63c05fa29d26a2404166", size = 25547, upload-time = "2025-06-08T17:06:39.4Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/2e/54/647ade08bf0db230bfea292f893923872fd20be6ac6f53b2b936ba839d75/zipp-3.23.0-py3-none-any.whl", hash = "sha256:071652d6115ed432f5ce1d34c336c0adfd6a884660d1e9712a256d3d3bd4b14e", size = 10276, upload-time = "2025-06-08T17:06:38.034Z" },
+]