Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion stackchan_server/speech_recognition/whisper_cpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,14 @@ def _normalize_transcript(text: str) -> str:


def _load_transcript_from_json(path: Path) -> str:
data = json.loads(path.read_text(encoding="utf-8"))
try:
raw_bytes = path.read_bytes()
text = raw_bytes.decode("utf-8", errors="replace")
if "\ufffd" in text:
logger.warning("whisper.cpp JSON output contains invalid UTF-8 bytes")
data = json.loads(text)
except (json.JSONDecodeError, OSError) as exc:
raise RuntimeError(f"Failed to read whisper.cpp JSON output: {exc}") from exc
transcription = data.get("transcription")
if not isinstance(transcription, list):
return ""
Expand Down
14 changes: 12 additions & 2 deletions stackchan_server/speech_recognition/whisper_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,11 +96,14 @@ def _request_transcript(self, wav_bytes: bytes, language: str) -> str:
raise RuntimeError(f"whisper-server request failed: {exc.reason}") from exc

if self._response_format == "json":
payload = json.loads(response_body.decode("utf-8"))
payload = _load_json_response_bytes(response_body)
if not isinstance(payload, Mapping):
return ""
payload = cast(Mapping[str, object], payload)
text = payload.get("text")
return text.strip() if isinstance(text, str) else ""

payload = json.loads(response_body.decode("utf-8"))
payload = _load_json_response_bytes(response_body)
return _load_transcript_from_verbose_json(payload)


Expand All @@ -118,6 +121,13 @@ def _normalize_language(language_code: str) -> str:
return language_code.split("-", 1)[0].lower()


def _load_json_response_bytes(response_body: bytes) -> object:
response_text = response_body.decode("utf-8", errors="replace")
if "\ufffd" in response_text:
logger.warning("whisper-server JSON output contains invalid UTF-8 bytes")
return json.loads(response_text)


def _load_transcript_from_verbose_json(payload: object) -> str:
if not isinstance(payload, Mapping):
return ""
Expand Down
31 changes: 31 additions & 0 deletions tests/test_whisper_server.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from __future__ import annotations

import unittest

from stackchan_server.speech_recognition.whisper_server import (
_load_json_response_bytes,
_load_transcript_from_verbose_json,
)


class WhisperServerJsonTests(unittest.TestCase):
def test_load_json_response_bytes_replaces_invalid_utf8(self) -> None:
payload = _load_json_response_bytes(b'{"transcription":[{"text":"\xe6\x90"},{"text":"ok"}]}')

self.assertEqual(payload, {"transcription": [{"text": "�"}, {"text": "ok"}]})

def test_load_transcript_from_verbose_json_with_replacement_char(self) -> None:
payload = {
"transcription": [
{"text": "�"},
{"text": "ok"},
]
}

transcript = _load_transcript_from_verbose_json(payload)

self.assertEqual(transcript, "� ok")


if __name__ == "__main__":
unittest.main()
Loading