From b18a26225cd38316a8cb3e01397137604f3e2e48 Mon Sep 17 00:00:00 2001 From: Niko Nousiainen Date: Sat, 28 Mar 2026 12:05:23 +0200 Subject: [PATCH] More detaching model handling from whisper.cpp to mutterkey side --- AGENTS.md | 47 ++- CMakeLists.txt | 94 +++--- README.md | 5 + RELEASE_CHECKLIST.md | 20 +- docs/mainpage.md | 18 +- src/service.cpp | 1 + src/transcription/cpureferencemodel.cpp | 153 ++++++++++ src/transcription/cpureferencemodel.h | 82 +++++ src/transcription/cpureferencetranscriber.cpp | 280 ++++++++++++++++++ src/transcription/cpureferencetranscriber.h | 95 ++++++ src/transcription/modelcatalog.cpp | 8 +- src/transcription/modelpackage.cpp | 28 ++ src/transcription/modelpackage.h | 35 +++ src/transcription/modelvalidator.cpp | 4 + src/transcription/rawwhisperimporter.cpp | 6 +- src/transcription/runtimeselector.cpp | 59 ++++ src/transcription/runtimeselector.h | 39 +++ src/transcription/transcriptionengine.cpp | 14 + src/transcription/transcriptiontypes.h | 2 + src/transcription/whispercpptranscriber.cpp | 8 +- tests/CMakeLists.txt | 36 ++- tests/cpureferencetranscribertest.cpp | 259 ++++++++++++++++ tests/transcriptionworkertest.cpp | 1 + tests/whispercpptranscribertest.cpp | 16 + 24 files changed, 1252 insertions(+), 58 deletions(-) create mode 100644 src/transcription/cpureferencemodel.cpp create mode 100644 src/transcription/cpureferencemodel.h create mode 100644 src/transcription/cpureferencetranscriber.cpp create mode 100644 src/transcription/cpureferencetranscriber.h create mode 100644 src/transcription/runtimeselector.cpp create mode 100644 src/transcription/runtimeselector.h create mode 100644 tests/cpureferencetranscribertest.cpp diff --git a/AGENTS.md b/AGENTS.md index a4fc376..5c9f9b4 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -8,7 +8,13 @@ Current architecture: - Global shortcut handling goes through `KGlobalAccel` - Audio capture uses Qt Multimedia -- Transcription is in-process through vendored `whisper.cpp` +- Transcription goes through an app-owned runtime seam with explicit runtime + selection +- A product-owned native CPU reference runtime scaffold now exists alongside + the legacy whisper adapter +- `whisper.cpp` is still the only real end-user speech decoder today, but the + vendored runtime is now optional at build time through + `MUTTERKEY_ENABLE_LEGACY_WHISPER=OFF` - Native Mutterkey model packages are now the canonical model artifact; raw whisper.cpp-compatible `.bin` files remain only as a migration/import path - The public runtime seam is streaming-first through app-owned chunks, events, and compatibility helpers @@ -22,7 +28,9 @@ Current architecture: This repository is intentionally kept minimal: - CMake is the only supported build system -- `whisper.cpp` is the only supported transcription backend +- `whisper.cpp` remains a vendored legacy backend, but new runtime ownership + work should prefer the product-owned native CPU path and selector/model-loader + seams first - Keep the repo free of generated build output - Keep publication-facing files free of machine-specific paths and broken local links - Do not reintroduce legacy qmake or external-command transcription paths unless explicitly requested @@ -37,11 +45,14 @@ This repository is intentionally kept minimal: - `src/clipboardwriter.*`: clipboard integration, preferring KDE system clipboard support - `src/audio/recordingnormalizer.*`: conversion to runtime-ready mono `float32` at `16 kHz` - `src/transcription/audiochunker.*`: deterministic chunking of normalized audio for the streaming runtime path +- `src/transcription/cpureferencemodel.*`: product-owned native CPU reference model header/parser and immutable model-handle loading +- `src/transcription/cpureferencetranscriber.*`: native CPU reference runtime scaffold behind the app-owned engine/session seam - `src/transcription/modelpackage.*`: product-owned manifest and validated package value types - `src/transcription/modelvalidator.*`: package integrity, compatibility, and bounds validation - `src/transcription/modelcatalog.*`: model artifact inspection and resolution - `src/transcription/rawwhisperprobe.*`: lightweight raw whisper.cpp header inspection used for migration compatibility - `src/transcription/rawwhisperimporter.*`: import path from raw Whisper `.bin` files into native Mutterkey packages +- `src/transcription/runtimeselector.*`: app-owned runtime-selection policy and diagnostic reasoning - `src/transcription/transcriptassembler.*`: final transcript assembly from streaming transcript events - `src/transcription/transcriptioncompat.*`: compatibility wrapper that routes one-shot recordings through the streaming runtime seam - `src/transcription/whispercpptranscriber.*`: in-process Whisper integration and whisper-specific engine construction @@ -86,6 +97,13 @@ cmake -S . -B "$BUILD_DIR" -G Ninja cmake --build "$BUILD_DIR" -j"$(nproc)" ``` +To validate the native-runtime-only path without vendored `whisper.cpp` / +`ggml`, configure with: + +```bash +cmake -S . -B "$BUILD_DIR" -G Ninja -DMUTTERKEY_ENABLE_LEGACY_WHISPER=OFF +``` + If a sandboxed build fails with `ccache: error: Read-only file system`, treat that as an environment limitation rather than a repo regression and rerun the build with `CCACHE_DISABLE=1`. @@ -136,6 +154,9 @@ Notes: - Use `bash scripts/check-release-hygiene.sh` when touching publication-facing files such as `README.md`, licenses, `contrib/`, CI, or helper scripts - Use `cmake --build "$BUILD_DIR" --target docs` when touching repo-owned public headers, Doxygen config, the Doxygen main page, or CI/docs wiring - If install rules or licensing files change, confirm the temporary install contains the expected files under `share/licenses/mutterkey` +- If a task changes runtime selection, native model loading, or legacy-whisper + build toggles, validate at least one `MUTTERKEY_ENABLE_LEGACY_WHISPER=OFF` + build in addition to the normal default build - If you add or change public methods in repo-owned headers, expect `cmake --build "$BUILD_DIR" --target docs` to fail until the new API is documented; treat that as part of the normal implementation loop, not follow-up polish - Newly added repo-owned public structs and free functions in public headers also need Doxygen comments immediately; the `docs` target treats undocumented new @@ -158,6 +179,12 @@ Notes: - When validating inside a restricted sandbox, be ready to disable `ccache` with `CCACHE_DISABLE=1` if the cache location is read-only; that is an execution-environment issue, not a Mutterkey build failure - Prefer fixing the code over weakening `.clang-tidy` or the Clazy check set; only relax tool config when the warning is clearly low-value for this repo - If `clang-tidy` flags a new small enum for `performance-enum-size`, prefer an explicit narrow underlying type such as `std::uint8_t` instead of suppressing the warning +- If `clang-tidy` flags a small fixed binary header type, prefer + `std::array` or `std::array` plus value + initialization over C-style arrays +- When helper functions take two adjacent same-shaped parameters such as two + `QString` values, prefer a small request struct when that keeps tests and + runtime code from tripping `bugprone-easily-swappable-parameters` - In this Qt-heavy repo, treat `misc-include-cleaner` and `readability-redundant-access-specifiers` as low-value `clang-tidy` noise unless the underlying tool behavior improves; they conflict with Qt header-provider reality and `signals` / `slots` / `Q_SLOTS` sectioning more than they improve safety - Prefer anonymous-namespace `Q_LOGGING_CATEGORY` for file-local logging categories; `Q_STATIC_LOGGING_CATEGORY` is not portable enough across the Qt versions this repo may build against - Do not add broad Valgrind suppressions by default; only add narrow suppressions after reproducing stable third-party noise and keep them clearly scoped @@ -183,7 +210,15 @@ Notes: - Keep JSON and other transport details at subsystem boundaries; prefer typed C++ snapshots/results once data crosses into app-owned control, tray, or service code - Prefer dependency injection for tray-shell and control-surface code from the first implementation so headless Qt tests stay simple - When preparing the transcription path for future runtime work, prefer app-owned engine/session seams and injected sessions over leaking concrete backend types into CLI, service, or worker orchestration. Keep immutable capability reporting on the engine side, keep runtime inspection data in `RuntimeDiagnostics`, and keep the session side focused on mutable decode state, warmup, chunk ingestion, finish, and cancellation -- Prefer product-owned runtime interfaces, model/session separation, and deterministic backend selection before adding new inference backends or widening cross-platform support +- Prefer product-owned runtime interfaces, model/session separation, explicit + runtime-selection policy, and deterministic backend selection before adding + new inference backends or widening cross-platform support +- Keep runtime-selection policy in `src/transcription/runtimeselector.*` + instead of burying compatibility/fallback rules inside + `createTranscriptionEngine()` +- Keep native model-format parsing and immutable model loading in + `src/transcription/cpureferencemodel.*` or similar app-owned loader code + rather than mixing artifact parsing into the mutable session implementation - Keep model validation, metadata extraction, and compatibility checks app-owned. `whisper.cpp` should not be the first component that tells Mutterkey whether a model artifact is obviously malformed, incompatible, or oversized @@ -218,6 +253,9 @@ Apply the C++ Core Guidelines selectively and pragmatically. For this repo, the - Prefer resolving model-package, metadata, and import work entirely in app-owned code. Raw whisper.cpp `.bin` support is now a compatibility/import concern, not the canonical product contract +- Prefer treating `whisper.cpp` as a legacy migration/parity dependency from + here forward. If new work can land in app-owned selector, model-loader, + native-runtime, or package code instead, do that first - Prefer keeping fake runtime tests and app-owned helpers free of vendored whisper linkage unless the test is specifically about the whisper adapter or engine factory - Prefer fixing vendored target metadata from the top-level CMake when the issue is Mutterkey packaging or warning noise, instead of patching upstream vendored files directly - If you must modify vendored code, document why in the final response and record the deviation in `third_party/whisper.cpp.UPSTREAM.md` @@ -233,6 +271,9 @@ Apply the C++ Core Guidelines selectively and pragmatically. For this repo, the separate release asset outside Git - Do not introduce machine-specific home-directory paths, absolute local Markdown links, or generated build artifacts into tracked files - If a task changes install layout or shipped assets, keep the CMake install rules and license installs aligned with the new behavior +- If a task changes whether legacy whisper support is installed, keep + `README.md`, `RELEASE_CHECKLIST.md`, `docs/mainpage.md`, install rules, and + license installs aligned with that choice - The installed shared-library payload is runtime-focused; do not start installing vendored upstream public headers unless the package contract intentionally changes ## Config Expectations diff --git a/CMakeLists.txt b/CMakeLists.txt index e2eacbb..2544dab 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -15,6 +15,7 @@ set(CMAKE_CXX_EXTENSIONS OFF) option(MUTTERKEY_ENABLE_ASAN "Enable AddressSanitizer for repo-owned code and vendored whisper.cpp" OFF) option(MUTTERKEY_ENABLE_UBSAN "Enable UndefinedBehaviorSanitizer for repo-owned code and vendored whisper.cpp" OFF) +option(MUTTERKEY_ENABLE_LEGACY_WHISPER "Build the legacy whisper.cpp runtime for migration and parity validation" ON) option(MUTTERKEY_ENABLE_WHISPER_CUDA "Enable whisper.cpp CUDA backend support (NVIDIA)" OFF) option(MUTTERKEY_ENABLE_WHISPER_VULKAN "Enable whisper.cpp Vulkan backend support" OFF) option(MUTTERKEY_ENABLE_WHISPER_BLAS "Enable whisper.cpp BLAS CPU acceleration" OFF) @@ -47,6 +48,10 @@ set(MUTTERKEY_CORE_SOURCES src/transcription/transcriptionengine.h src/transcription/audiochunker.cpp src/transcription/audiochunker.h + src/transcription/cpureferencemodel.cpp + src/transcription/cpureferencemodel.h + src/transcription/cpureferencetranscriber.cpp + src/transcription/cpureferencetranscriber.h src/transcription/modelcatalog.cpp src/transcription/modelcatalog.h src/transcription/modelpackage.cpp @@ -57,16 +62,23 @@ set(MUTTERKEY_CORE_SOURCES src/transcription/rawwhisperimporter.h src/transcription/rawwhisperprobe.cpp src/transcription/rawwhisperprobe.h + src/transcription/runtimeselector.cpp + src/transcription/runtimeselector.h src/transcription/transcriptassembler.cpp src/transcription/transcriptassembler.h src/transcription/transcriptioncompat.cpp src/transcription/transcriptioncompat.h src/transcription/transcriptionworker.cpp src/transcription/transcriptionworker.h - src/transcription/whispercpptranscriber.cpp - src/transcription/whispercpptranscriber.h ) +if(MUTTERKEY_ENABLE_LEGACY_WHISPER) + list(APPEND MUTTERKEY_CORE_SOURCES + src/transcription/whispercpptranscriber.cpp + src/transcription/whispercpptranscriber.h + ) +endif() + set(MUTTERKEY_CONTROL_SOURCES src/control/daemoncontrolclient.cpp src/control/daemoncontrolclient.h @@ -114,6 +126,10 @@ set_target_properties(mutterkey-tray PROPERTIES INSTALL_RPATH "$ORIGIN/../lib" ) +if(MUTTERKEY_ENABLE_LEGACY_WHISPER) + target_compile_definitions(mutterkey_core PRIVATE MUTTERKEY_WITH_LEGACY_WHISPER) +endif() + function(mutterkey_enable_sanitizers target_name) if(NOT CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang|AppleClang") message(WARNING "Sanitizers were requested, but ${CMAKE_CXX_COMPILER_ID} is not configured for repo-owned sanitizer flags") @@ -203,47 +219,53 @@ else() message(STATUS "Doxygen not found; the docs target will be unavailable") endif() -if(NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/third_party/whisper.cpp/CMakeLists.txt") - message(FATAL_ERROR "Vendored whisper.cpp dependency is missing from third_party/whisper.cpp") -endif() - -set(WHISPER_BUILD_TESTS OFF CACHE BOOL "" FORCE) -set(WHISPER_BUILD_EXAMPLES OFF CACHE BOOL "" FORCE) -set(WHISPER_BUILD_SERVER OFF CACHE BOOL "" FORCE) -set(WHISPER_SANITIZE_ADDRESS ${MUTTERKEY_ENABLE_ASAN} CACHE BOOL "" FORCE) -set(WHISPER_SANITIZE_UNDEFINED ${MUTTERKEY_ENABLE_UBSAN} CACHE BOOL "" FORCE) -set(GGML_CUDA ${MUTTERKEY_ENABLE_WHISPER_CUDA} CACHE BOOL "" FORCE) -set(GGML_VULKAN ${MUTTERKEY_ENABLE_WHISPER_VULKAN} CACHE BOOL "" FORCE) -set(GGML_BLAS ${MUTTERKEY_ENABLE_WHISPER_BLAS} CACHE BOOL "" FORCE) -set(GGML_BLAS_VENDOR ${MUTTERKEY_WHISPER_BLAS_VENDOR} CACHE STRING "" FORCE) -add_subdirectory(third_party/whisper.cpp EXCLUDE_FROM_ALL) - -# Mutterkey ships the vendored shared libraries, but it does not install their -# upstream public headers as part of its own package layout. -set_target_properties(whisper ggml PROPERTIES PUBLIC_HEADER "") +if(MUTTERKEY_ENABLE_LEGACY_WHISPER) + if(NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/third_party/whisper.cpp/CMakeLists.txt") + message(FATAL_ERROR "Vendored whisper.cpp dependency is missing from third_party/whisper.cpp") + endif() -target_link_libraries(mutterkey_core PRIVATE whisper) + set(WHISPER_BUILD_TESTS OFF CACHE BOOL "" FORCE) + set(WHISPER_BUILD_EXAMPLES OFF CACHE BOOL "" FORCE) + set(WHISPER_BUILD_SERVER OFF CACHE BOOL "" FORCE) + set(WHISPER_SANITIZE_ADDRESS ${MUTTERKEY_ENABLE_ASAN} CACHE BOOL "" FORCE) + set(WHISPER_SANITIZE_UNDEFINED ${MUTTERKEY_ENABLE_UBSAN} CACHE BOOL "" FORCE) + set(GGML_CUDA ${MUTTERKEY_ENABLE_WHISPER_CUDA} CACHE BOOL "" FORCE) + set(GGML_VULKAN ${MUTTERKEY_ENABLE_WHISPER_VULKAN} CACHE BOOL "" FORCE) + set(GGML_BLAS ${MUTTERKEY_ENABLE_WHISPER_BLAS} CACHE BOOL "" FORCE) + set(GGML_BLAS_VENDOR ${MUTTERKEY_WHISPER_BLAS_VENDOR} CACHE STRING "" FORCE) + add_subdirectory(third_party/whisper.cpp EXCLUDE_FROM_ALL) + + # Mutterkey ships the vendored shared libraries, but it does not install their + # upstream public headers as part of its own package layout. + set_target_properties(whisper ggml PROPERTIES PUBLIC_HEADER "") + + target_link_libraries(mutterkey_core PRIVATE whisper) +endif() install(TARGETS mutterkey RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}) install(TARGETS mutterkey-tray RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}) -install(TARGETS whisper ggml ggml-base - LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} -) -if(TARGET ggml-cpu) - install(TARGETS ggml-cpu LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}) -endif() -if(TARGET ggml-cuda) - install(TARGETS ggml-cuda LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}) -endif() -if(TARGET ggml-vulkan) - install(TARGETS ggml-vulkan LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}) -endif() -if(TARGET ggml-blas) - install(TARGETS ggml-blas LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}) +if(MUTTERKEY_ENABLE_LEGACY_WHISPER) + install(TARGETS whisper ggml ggml-base + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + ) + if(TARGET ggml-cpu) + install(TARGETS ggml-cpu LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}) + endif() + if(TARGET ggml-cuda) + install(TARGETS ggml-cuda LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}) + endif() + if(TARGET ggml-vulkan) + install(TARGETS ggml-vulkan LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}) + endif() + if(TARGET ggml-blas) + install(TARGETS ggml-blas LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}) + endif() endif() install(FILES contrib/org.mutterkey.mutterkey.desktop DESTINATION ${CMAKE_INSTALL_DATADIR}/applications) install(FILES LICENSE THIRD_PARTY_NOTICES.md DESTINATION ${MUTTERKEY_LICENSE_INSTALL_DIR}) -install(FILES third_party/whisper.cpp/LICENSE DESTINATION ${MUTTERKEY_LICENSE_INSTALL_DIR}/third_party/whisper.cpp) +if(MUTTERKEY_ENABLE_LEGACY_WHISPER) + install(FILES third_party/whisper.cpp/LICENSE DESTINATION ${MUTTERKEY_LICENSE_INSTALL_DIR}/third_party/whisper.cpp) +endif() if(BUILD_TESTING) find_package(Qt6 REQUIRED COMPONENTS Test) diff --git a/README.md b/README.md index 8c57c67..ca39424 100644 --- a/README.md +++ b/README.md @@ -131,6 +131,10 @@ This installs: - `~/.local/lib/libwhisper.so*` and the required `ggml` libraries - `~/.local/share/applications/org.mutterkey.mutterkey.desktop` +If you configure with `-DMUTTERKEY_ENABLE_LEGACY_WHISPER=OFF`, Mutterkey builds +without the vendored `whisper.cpp` runtime and does not install the legacy +`libwhisper` / `ggml` shared libraries. + Optional acceleration flags: ```bash @@ -164,6 +168,7 @@ Notes: - `MUTTERKEY_ENABLE_WHISPER_VULKAN=ON` is for Vulkan-capable GPUs and requires Vulkan development headers and loader libraries - `MUTTERKEY_ENABLE_WHISPER_BLAS=ON` improves CPU inference speed rather than enabling GPU execution - these options are forwarded to the vendored `whisper.cpp` / `ggml` build and install any resulting backend libraries alongside Mutterkey +- `-DMUTTERKEY_ENABLE_LEGACY_WHISPER=OFF` disables the vendored runtime entirely and skips all `whisper.cpp` / `ggml` install targets ### 2. Put a model on disk diff --git a/RELEASE_CHECKLIST.md b/RELEASE_CHECKLIST.md index 0664d52..0f66190 100644 --- a/RELEASE_CHECKLIST.md +++ b/RELEASE_CHECKLIST.md @@ -41,6 +41,10 @@ bash scripts/run-release-checklist.sh bash scripts/run-release-checklist.sh -- -DMUTTERKEY_ENABLE_WHISPER_CUDA=ON ``` +```bash +bash scripts/run-release-checklist.sh -- -DMUTTERKEY_ENABLE_LEGACY_WHISPER=OFF +``` + - The script intentionally stops before install validation and still prints the remaining manual review items that need human judgment. @@ -69,6 +73,8 @@ cmake -S . -B "$BUILD_DIR" -G Ninja -DCMAKE_BUILD_TYPE=Debug -DMUTTERKEY_ENABLE_ - `MUTTERKEY_ENABLE_WHISPER_CUDA=ON`: NVIDIA GPU build through vendored `ggml` - `MUTTERKEY_ENABLE_WHISPER_VULKAN=ON`: Vulkan GPU build through vendored `ggml` - `MUTTERKEY_ENABLE_WHISPER_BLAS=ON`: faster CPU inference, not GPU execution + - `MUTTERKEY_ENABLE_LEGACY_WHISPER=OFF`: disable vendored `whisper.cpp` / + `ggml` entirely and validate the native-runtime-only build path - choose the backend intentionally for the release artifact and record that choice in release notes or packaging docs when relevant - Build: @@ -140,14 +146,18 @@ cmake --install "$BUILD_DIR" --prefix "$INSTALL_DIR" - Confirm the installed tree contains: - `bin/mutterkey` - `bin/mutterkey-tray` - - required `libwhisper` / `ggml` shared libraries - the desktop file under `share/applications` - license files under `share/licenses/mutterkey` +- If the release keeps legacy whisper support enabled, also confirm the + installed tree contains: + - required `libwhisper` / `ggml` shared libraries - If acceleration was enabled for the release, also confirm the installed tree contains the expected backend library: - `libggml-cuda.so*` for `MUTTERKEY_ENABLE_WHISPER_CUDA=ON` - `libggml-vulkan.so*` for `MUTTERKEY_ENABLE_WHISPER_VULKAN=ON` - `libggml-blas.so*` for `MUTTERKEY_ENABLE_WHISPER_BLAS=ON` +- If the release disables legacy whisper support, confirm the installed tree + does not contain vendored `libwhisper` / `ggml` shared libraries by mistake. - Do not expect vendored upstream public headers to be installed; Mutterkey's install rules ship the runtime libraries but intentionally clear vendored `PUBLIC_HEADER` metadata to avoid upstream header-install warnings. @@ -204,6 +214,11 @@ MODEL_OUT="$(mktemp -d /tmp/mutterkey-release-model-XXXXXX)/base-en" repo-owned API docs or docs/CI wiring. - Confirm the docs describe native Mutterkey model packages as the canonical artifact and raw Whisper `.bin` files as migration compatibility only. +- Confirm the docs also reflect the current runtime state accurately: + - a product-owned native CPU reference runtime now exists + - `MUTTERKEY_ENABLE_LEGACY_WHISPER=OFF` is supported for native-only builds + - `whisper.cpp` is still the only real end-user speech decoder unless the + release intentionally changes that - Confirm the documented recommended path is still the `systemd --user` service. - Confirm [contrib/mutterkey.service](contrib/mutterkey.service) matches the recommended installed-binary setup. @@ -215,6 +230,9 @@ MODEL_OUT="$(mktemp -d /tmp/mutterkey-release-model-XXXXXX)/base-en" - CUDA/Vulkan releases should not log only `registered backend CPU` - CPU-accelerated BLAS releases may still be CPU-only, but should be tested against a representative Whisper model and expected performance target +- If the release is intended to rely on the native runtime path, verify + diagnostics on a representative machine show the native runtime and an + explicit native selection reason rather than a legacy whisper fallback. ## Vendored whisper.cpp Updates diff --git a/docs/mainpage.md b/docs/mainpage.md index d0455b5..a745f8b 100644 --- a/docs/mainpage.md +++ b/docs/mainpage.md @@ -13,7 +13,7 @@ Current behavior: - registers a global shortcut through `KGlobalAccel` - records microphone audio while the shortcut is held -- transcribes locally through an embedded `whisper.cpp` backend +- transcribes locally through the configured runtime path - copies the resulting text to the clipboard - expects you to paste the text yourself with `Ctrl+V` @@ -26,12 +26,16 @@ Current runtime shape: transcript events - `BackendCapabilities` reports static backend support used for orchestration - `RuntimeDiagnostics` reports runtime/device/model inspection data separately - from static capabilities + from static capabilities, including runtime-selection reasoning - `RuntimeError` and `RuntimeErrorCode` provide typed runtime failures - `ModelCatalog`, `ModelPackage`, and `ModelValidator` own model inspection, compatibility checks, and integrity validation before backend load - raw Whisper `.bin` files are handled only through an explicit compatibility path and import flow +- `RuntimeSelector` owns runtime-selection policy instead of burying that logic + in the generic factory +- `CpuReferenceModelHandle` and related native model helpers own the current + product-owned CPU reference model loading boundary - `TranscriptionWorker` hosts transcription on a dedicated `QThread` and creates live sessions lazily on that worker thread - the shipped daemon and `once` flows still use a compatibility wrapper that @@ -51,8 +55,12 @@ Core API surface covered here: artifacts into validated product-owned model metadata. - `RawWhisperImporter` converts raw whisper.cpp-compatible `ggml` `.bin` files into native Mutterkey packages. +- `RuntimeSelector` decides which runtime implementation should handle a given + configured model path and records the reason in diagnostics. - `TranscriptionEngine` and `TranscriptionSession` define the app-owned runtime seam. +- `CpuReferenceTranscriber` provides the current product-owned native CPU + reference runtime scaffold. - `WhisperCppTranscriber` performs in-process transcription through vendored `whisper.cpp`. - `ClipboardWriter` copies the resulting text to the clipboard. @@ -65,7 +73,11 @@ Current product direction: - local-only transcription - CLI/service-first operation - tray-shell work exists, but the daemon remains the product core -- `whisper.cpp` is still the only supported backend implementation +- a product-owned native CPU reference runtime now exists for ownership, + packaging, and conformance work +- `whisper.cpp` is still the only real end-user speech decoder today +- the vendored runtime is now optional at build time through + `MUTTERKEY_ENABLE_LEGACY_WHISPER=OFF` For build, runtime, release, and service setup use the repository `README.md` and `RELEASE_CHECKLIST.md`. diff --git a/src/service.cpp b/src/service.cpp index b7c39c7..26d68e2 100644 --- a/src/service.cpp +++ b/src/service.cpp @@ -84,6 +84,7 @@ QJsonObject MutterkeyService::diagnostics() const m_transcriptionWorker != nullptr ? m_transcriptionWorker->runtimeDiagnostics() : m_transcriptionEngine->diagnostics(); object.insert(QStringLiteral("transcriber_model"), runtimeDiagnostics.loadedModelDescription); object.insert(QStringLiteral("transcriber_runtime"), runtimeDiagnostics.runtimeDescription); + object.insert(QStringLiteral("transcriber_selection_reason"), runtimeDiagnostics.selectionReason); const BackendCapabilities capabilities = m_transcriptionWorker != nullptr ? m_transcriptionWorker->capabilities() : m_transcriptionEngine->capabilities(); object.insert(QStringLiteral("transcriber_supports_translation"), capabilities.supportsTranslation); diff --git a/src/transcription/cpureferencemodel.cpp b/src/transcription/cpureferencemodel.cpp new file mode 100644 index 0000000..99fc5cd --- /dev/null +++ b/src/transcription/cpureferencemodel.cpp @@ -0,0 +1,153 @@ +#include "transcription/cpureferencemodel.h" + +#include +#include +#include + +#include +#include +#include + +namespace { + +constexpr std::array kCpuReferenceMagic{'M', 'K', 'C', 'P', 'U', 'R', '1', '\0'}; +constexpr std::uint32_t kCpuReferenceVersion = 1; + +RuntimeError makeRuntimeError(RuntimeErrorCode code, QString message, QString detail = {}) +{ + return RuntimeError{.code = code, .message = std::move(message), .detail = std::move(detail)}; +} + +bool readModelHeader(QFile *file, CpuReferenceModelHeader *header) +{ + if (file == nullptr || header == nullptr) { + return false; + } + + const QByteArray bytes = file->read(static_cast(sizeof(CpuReferenceModelHeader))); + if (bytes.size() != static_cast(sizeof(CpuReferenceModelHeader))) { + return false; + } + + std::memcpy(header, bytes.constData(), sizeof(CpuReferenceModelHeader)); + return true; +} + +std::optional loadCpuReferenceModelData(const QString &path, RuntimeError *error) +{ + QFile file(path); + if (!file.open(QIODevice::ReadOnly)) { + if (error != nullptr) { + *error = makeRuntimeError(RuntimeErrorCode::ModelLoadFailed, + QStringLiteral("Failed to open native CPU model weights"), + QFileInfo(path).absoluteFilePath()); + } + return std::nullopt; + } + + CpuReferenceModelHeader header; + if (!readModelHeader(&file, &header)) { + if (error != nullptr) { + *error = makeRuntimeError(RuntimeErrorCode::ModelLoadFailed, + QStringLiteral("Native CPU model header is truncated"), + QFileInfo(path).absoluteFilePath()); + } + return std::nullopt; + } + + if (header.magic != kCpuReferenceMagic) { + if (error != nullptr) { + *error = makeRuntimeError(RuntimeErrorCode::ModelLoadFailed, + QStringLiteral("Native CPU model has an invalid header"), + QFileInfo(path).absoluteFilePath()); + } + return std::nullopt; + } + + if (header.version != kCpuReferenceVersion) { + if (error != nullptr) { + *error = makeRuntimeError(RuntimeErrorCode::UnsupportedModelPackageVersion, + QStringLiteral("Native CPU model version is unsupported"), + QString::number(header.version)); + } + return std::nullopt; + } + + if (header.transcriptBytes == 0 || header.transcriptBytes > 4096U) { + if (error != nullptr) { + *error = makeRuntimeError(RuntimeErrorCode::ModelLoadFailed, + QStringLiteral("Native CPU model transcript payload is invalid"), + QString::number(header.transcriptBytes)); + } + return std::nullopt; + } + + const QByteArray transcriptBytes = file.read(static_cast(header.transcriptBytes)); + if (transcriptBytes.size() != static_cast(header.transcriptBytes)) { + if (error != nullptr) { + *error = makeRuntimeError(RuntimeErrorCode::ModelLoadFailed, + QStringLiteral("Native CPU model transcript payload is truncated"), + QFileInfo(path).absoluteFilePath()); + } + return std::nullopt; + } + + const QString transcript = QString::fromUtf8(transcriptBytes).trimmed(); + if (transcript.isEmpty()) { + if (error != nullptr) { + *error = makeRuntimeError(RuntimeErrorCode::ModelLoadFailed, + QStringLiteral("Native CPU model transcript payload is empty"), + QFileInfo(path).absoluteFilePath()); + } + return std::nullopt; + } + + return CpuReferenceModelData{.transcript = transcript}; +} + +} // namespace + +CpuReferenceModelHandle::CpuReferenceModelHandle(ValidatedModelPackage package, CpuReferenceModelData model, qint64 loadTimeMs) + : m_package(std::move(package)) + , m_model(std::move(model)) + , m_loadTimeMs(loadTimeMs) +{ +} + +QString CpuReferenceModelHandle::backendName() const +{ + return cpuReferenceEngineName(); +} + +ModelMetadata CpuReferenceModelHandle::metadata() const +{ + return m_package.metadata(); +} + +QString CpuReferenceModelHandle::modelDescription() const +{ + return QStringLiteral("%1; load=%2 ms").arg(m_package.description()).arg(m_loadTimeMs); +} + +const QString &CpuReferenceModelHandle::transcript() const +{ + return m_model.transcript; +} + +std::shared_ptr loadCpuReferenceModelHandle(const ValidatedModelPackage &package, RuntimeError *error) +{ + QElapsedTimer timer; + timer.start(); + const std::optional model = loadCpuReferenceModelData(package.weightsPath, error); + if (!model.has_value()) { + return nullptr; + } + + return std::make_shared(package, *model, timer.elapsed()); +} + +std::shared_ptr +resolveCpuReferenceModelHandle(std::shared_ptr model) +{ + return std::dynamic_pointer_cast(std::move(model)); +} diff --git a/src/transcription/cpureferencemodel.h b/src/transcription/cpureferencemodel.h new file mode 100644 index 0000000..e8f4412 --- /dev/null +++ b/src/transcription/cpureferencemodel.h @@ -0,0 +1,82 @@ +#pragma once + +#include "transcription/modelpackage.h" +#include "transcription/transcriptionengine.h" +#include "transcription/transcriptiontypes.h" + +#include +#include +#include +#include + +/** + * @file + * @brief Native CPU reference model format definitions and loading helpers. + */ + +/** + * @brief Fixed header stored at the start of a native CPU reference weights asset. + */ +struct CpuReferenceModelHeader { + /// File-format magic. Kept ASCII for stable inspection and tests. + std::array magic{}; + /// Native CPU reference weights format version. + std::uint32_t version = 0; + /// UTF-8 transcript payload size used by the deterministic reference fixture format. + std::uint32_t transcriptBytes = 0; +}; + +/** + * @brief Deterministic native CPU reference model payload loaded from disk. + */ +struct CpuReferenceModelData { + /// Final transcript emitted by the deterministic reference runtime fixture. + QString transcript; +}; + +/** + * @brief Immutable loaded native CPU model handle owned by the runtime. + */ +class CpuReferenceModelHandle final : public TranscriptionModelHandle +{ +public: + /** + * @brief Creates an immutable native CPU reference model handle. + * @param package Validated package metadata and resolved asset paths. + * @param model Parsed deterministic reference model payload. + * @param loadTimeMs Measured load time in milliseconds for diagnostics. + */ + CpuReferenceModelHandle(ValidatedModelPackage package, CpuReferenceModelData model, qint64 loadTimeMs); + + [[nodiscard]] QString backendName() const override; + [[nodiscard]] ModelMetadata metadata() const override; + [[nodiscard]] QString modelDescription() const override; + + /** + * @brief Returns the deterministic transcript fixture embedded in this model. + * @return Transcript text emitted by the reference runtime. + */ + [[nodiscard]] const QString &transcript() const; + +private: + ValidatedModelPackage m_package; + CpuReferenceModelData m_model; + qint64 m_loadTimeMs = 0; +}; + +/** + * @brief Loads a native CPU reference model handle from a validated package. + * @param package Validated native package resolved by the model catalog. + * @param error Optional output for format or IO failures. + * @return Shared immutable model handle on success. + */ +[[nodiscard]] std::shared_ptr +loadCpuReferenceModelHandle(const ValidatedModelPackage &package, RuntimeError *error = nullptr); + +/** + * @brief Downcasts a generic model handle to the native CPU reference handle type. + * @param model Shared generic runtime model handle. + * @return Native CPU reference model handle on success. + */ +[[nodiscard]] std::shared_ptr +resolveCpuReferenceModelHandle(std::shared_ptr model); diff --git a/src/transcription/cpureferencetranscriber.cpp b/src/transcription/cpureferencetranscriber.cpp new file mode 100644 index 0000000..fb24aca --- /dev/null +++ b/src/transcription/cpureferencetranscriber.cpp @@ -0,0 +1,280 @@ +#include "transcription/cpureferencetranscriber.h" + +#include "transcription/modelcatalog.h" +#include "transcription/runtimeselector.h" + +#include +#include + +#include +#include +#include + +namespace { + +Q_LOGGING_CATEGORY(cpuReferenceLog, "mutterkey.transcriber.cpu_reference") + +constexpr int kExpectedSampleRate = 16000; +constexpr float kMinimumSpeechPeak = 0.02F; + +RuntimeError makeRuntimeError(RuntimeErrorCode code, QString message, QString detail = {}) +{ + return RuntimeError{.code = code, .message = std::move(message), .detail = std::move(detail)}; +} + +RuntimeError makeUnsupportedLanguageError(const QString &language) +{ + return makeRuntimeError(RuntimeErrorCode::UnsupportedLanguage, + QStringLiteral("Native CPU reference runtime does not support language: %1").arg(language), + QStringLiteral("Requested language code: %1").arg(language)); +} + +QString normalizedLanguage(const QString &value) +{ + return value.trimmed().toLower(); +} + +bool supportsLanguage(const QString &languageProfile, const QString &requestedLanguage) +{ + if (requestedLanguage.isEmpty() || requestedLanguage == QStringLiteral("auto")) { + return languageProfile == QStringLiteral("multilingual"); + } + + if (languageProfile == QStringLiteral("multilingual")) { + return true; + } + + return requestedLanguage == languageProfile; +} + +float peakAbsoluteSample(const std::vector &samples) +{ + float peak = 0.0F; + for (const float sample : samples) { + peak = std::max(peak, std::abs(sample)); + } + return peak; +} + +} // namespace + +std::shared_ptr +CpuReferenceTranscriber::loadModelHandle(const TranscriberConfig &config, RuntimeError *error) +{ + const std::optional package = + ModelCatalog::inspectPath(config.modelPath, cpuReferenceEngineName(), cpuReferenceModelFormat(), error); + if (!package.has_value()) { + return nullptr; + } + + qCInfo(cpuReferenceLog) << "Loaded native CPU reference model from" << package->weightsPath; + return loadCpuReferenceModelHandle(*package, error); +} + +std::unique_ptr +CpuReferenceTranscriber::createSession(TranscriberConfig config, std::shared_ptr model) +{ + std::shared_ptr cpuModel = resolveCpuReferenceModelHandle(std::move(model)); + if (cpuModel == nullptr) { + return nullptr; + } + + return std::make_unique(std::move(config), std::move(cpuModel)); +} + +CpuReferenceTranscriber::CpuReferenceTranscriber(TranscriberConfig config, std::shared_ptr model) + : m_config(std::move(config)) + , m_model(resolveCpuReferenceModelHandle(std::move(model))) +{ +} + +CpuReferenceTranscriber::~CpuReferenceTranscriber() = default; + +QString CpuReferenceTranscriber::backendNameStatic() +{ + return cpuReferenceEngineName(); +} + +BackendCapabilities CpuReferenceTranscriber::capabilitiesStatic() +{ + return BackendCapabilities{ + .backendName = backendNameStatic(), + .supportedLanguages = {QStringLiteral("en")}, + .supportsAutoLanguage = false, + .supportsTranslation = false, + .supportsWarmup = true, + }; +} + +RuntimeDiagnostics CpuReferenceTranscriber::diagnosticsStatic() +{ + return RuntimeDiagnostics{ + .backendName = backendNameStatic(), + .selectionReason = QStringLiteral("Native CPU reference runtime selected explicitly"), + .runtimeDescription = QStringLiteral("native cpu reference runtime; threads=%1") + .arg(std::max(1, QThread::idealThreadCount())), + .loadedModelDescription = {}, + }; +} + +QString CpuReferenceTranscriber::backendName() const +{ + return backendNameStatic(); +} + +bool CpuReferenceTranscriber::warmup(RuntimeError *error) +{ + if (m_model == nullptr) { + if (error != nullptr) { + *error = makeRuntimeError(RuntimeErrorCode::InternalRuntimeError, + QStringLiteral("Native CPU reference model handle is not available")); + } + return false; + } + + m_cancelRequested = false; + m_warmedUp = true; + return true; +} + +TranscriptUpdate CpuReferenceTranscriber::pushAudioChunk(const AudioChunk &chunk) +{ + if (!chunk.isValid()) { + return TranscriptUpdate{ + .events = {}, + .error = makeRuntimeError(RuntimeErrorCode::AudioNormalizationFailed, + QStringLiteral("Streaming audio chunk is empty")), + }; + } + + if (chunk.sampleRate != kExpectedSampleRate || chunk.channels != 1) { + return TranscriptUpdate{ + .events = {}, + .error = makeRuntimeError(RuntimeErrorCode::AudioNormalizationFailed, + QStringLiteral("Native CPU reference runtime requires 16 kHz mono chunks")), + }; + } + + m_bufferedSamples.insert(m_bufferedSamples.end(), chunk.samples.begin(), chunk.samples.end()); + return {}; +} + +TranscriptUpdate CpuReferenceTranscriber::finish() +{ + if (m_cancelRequested) { + m_bufferedSamples.clear(); + return TranscriptUpdate{ + .events = {}, + .error = makeRuntimeError(RuntimeErrorCode::Cancelled, + QStringLiteral("Native CPU reference transcription was cancelled")), + }; + } + + if (m_model == nullptr) { + return TranscriptUpdate{ + .events = {}, + .error = makeRuntimeError(RuntimeErrorCode::InternalRuntimeError, + QStringLiteral("Native CPU reference model handle is not available")), + }; + } + + if (m_config.translate) { + return TranscriptUpdate{ + .events = {}, + .error = makeRuntimeError(RuntimeErrorCode::InvalidConfig, + QStringLiteral("Native CPU reference runtime does not support translation mode")), + }; + } + + const QString requestedLanguage = normalizedLanguage(m_config.language); + if (!supportsLanguage(m_model->metadata().languageProfile, requestedLanguage)) { + return TranscriptUpdate{ + .events = {}, + .error = makeUnsupportedLanguageError(requestedLanguage), + }; + } + + if (!m_warmedUp) { + RuntimeError warmupError; + if (!warmup(&warmupError)) { + return TranscriptUpdate{.events = {}, .error = std::move(warmupError)}; + } + } + + if (m_bufferedSamples.empty() || peakAbsoluteSample(m_bufferedSamples) < kMinimumSpeechPeak) { + m_bufferedSamples.clear(); + return {}; + } + + const auto durationMs = std::max( + 1, + static_cast((static_cast(m_bufferedSamples.size()) * 1000.0) / kExpectedSampleRate)); + const QString transcript = m_model->transcript().trimmed(); + m_bufferedSamples.clear(); + return TranscriptUpdate{ + .events = { + TranscriptEvent{ + .kind = TranscriptEventKind::Final, + .text = transcript, + .startMs = 0, + .endMs = durationMs, + }, + }, + .error = {}, + }; +} + +TranscriptUpdate CpuReferenceTranscriber::cancel() +{ + m_cancelRequested = true; + m_bufferedSamples.clear(); + return TranscriptUpdate{ + .events = {}, + .error = makeRuntimeError(RuntimeErrorCode::Cancelled, + QStringLiteral("Native CPU reference transcription was cancelled")), + }; +} + +namespace { + +class CpuReferenceTranscriptionEngine final : public TranscriptionEngine +{ +public: + explicit CpuReferenceTranscriptionEngine(TranscriberConfig config) + : m_config(std::move(config)) + { + } + + [[nodiscard]] BackendCapabilities capabilities() const override + { + return CpuReferenceTranscriber::capabilitiesStatic(); + } + + [[nodiscard]] RuntimeDiagnostics diagnostics() const override + { + RuntimeDiagnostics diagnostics = CpuReferenceTranscriber::diagnosticsStatic(); + diagnostics.selectionReason = selectRuntimeForConfig(m_config).reason; + return diagnostics; + } + + [[nodiscard]] std::shared_ptr loadModel(RuntimeError *error) const override + { + return CpuReferenceTranscriber::loadModelHandle(m_config, error); + } + + [[nodiscard]] std::unique_ptr + createSession(std::shared_ptr model) const override + { + return CpuReferenceTranscriber::createSession(m_config, std::move(model)); + } + +private: + TranscriberConfig m_config; +}; + +} // namespace + +std::shared_ptr createCpuReferenceTranscriptionEngine(const TranscriberConfig &config) +{ + return std::make_shared(config); +} diff --git a/src/transcription/cpureferencetranscriber.h b/src/transcription/cpureferencetranscriber.h new file mode 100644 index 0000000..bfea93c --- /dev/null +++ b/src/transcription/cpureferencetranscriber.h @@ -0,0 +1,95 @@ +#pragma once + +#include "config.h" +#include "transcription/cpureferencemodel.h" +#include "transcription/modelpackage.h" +#include "transcription/transcriptionengine.h" +#include "transcription/transcriptiontypes.h" + +#include + +/** + * @file + * @brief Product-owned deterministic CPU reference runtime. + */ + +/** + * @brief In-process native CPU transcription backend used as the product-owned reference runtime. + * + * The Phase 5 reference path is intentionally narrow and deterministic. It owns + * its model parsing, session state, and transcript emission without exposing any + * third-party runtime handles to the rest of the app. + */ +class CpuReferenceTranscriber final : public TranscriptionSession +{ +public: + /** + * @brief Loads an immutable native CPU model handle from a validated package. + * @param config Runtime configuration copied into the handle. + * @param error Optional output for model validation or load failures. + * @return Shared immutable model handle on success. + */ + [[nodiscard]] static std::shared_ptr + loadModelHandle(const TranscriberConfig &config, RuntimeError *error = nullptr); + + /** + * @brief Creates a mutable session from a generic app-owned model handle. + * @param config Runtime configuration copied into the session. + * @param model Shared immutable model handle. + * @return Session on success, otherwise `nullptr` when the model type is incompatible. + */ + [[nodiscard]] static std::unique_ptr + createSession(TranscriberConfig config, std::shared_ptr model); + + /** + * @brief Creates a mutable native CPU session from a loaded model handle. + * @param config Runtime configuration copied into the session. + * @param model Shared immutable model handle created by the native CPU engine. + */ + CpuReferenceTranscriber(TranscriberConfig config, std::shared_ptr model); + + ~CpuReferenceTranscriber() override; + + CpuReferenceTranscriber(const CpuReferenceTranscriber &) = delete; + CpuReferenceTranscriber &operator=(const CpuReferenceTranscriber &) = delete; + CpuReferenceTranscriber(CpuReferenceTranscriber &&) = delete; + CpuReferenceTranscriber &operator=(CpuReferenceTranscriber &&) = delete; + + /** + * @brief Returns the stable backend identifier used in diagnostics. + * @return Product-owned backend name for the CPU reference runtime. + */ + [[nodiscard]] static QString backendNameStatic(); + + /** + * @brief Returns static capability metadata for the native CPU runtime. + * @return Engine-owned capability snapshot. + */ + [[nodiscard]] static BackendCapabilities capabilitiesStatic(); + + /** + * @brief Returns static runtime diagnostics for the native CPU runtime. + * @return Engine-owned diagnostic description. + */ + [[nodiscard]] static RuntimeDiagnostics diagnosticsStatic(); + + [[nodiscard]] QString backendName() const override; + bool warmup(RuntimeError *error = nullptr) override; + [[nodiscard]] TranscriptUpdate pushAudioChunk(const AudioChunk &chunk) override; + [[nodiscard]] TranscriptUpdate finish() override; + [[nodiscard]] TranscriptUpdate cancel() override; + +private: + TranscriberConfig m_config; + std::shared_ptr m_model; + std::vector m_bufferedSamples; + bool m_warmedUp = false; + bool m_cancelRequested = false; +}; + +/** + * @brief Creates the product-owned CPU reference engine implementation. + * @param config Runtime configuration copied into the engine. + * @return Engine backed by the native CPU reference runtime. + */ +[[nodiscard]] std::shared_ptr createCpuReferenceTranscriptionEngine(const TranscriberConfig &config); diff --git a/src/transcription/modelcatalog.cpp b/src/transcription/modelcatalog.cpp index 25e2a04..559cab7 100644 --- a/src/transcription/modelcatalog.cpp +++ b/src/transcription/modelcatalog.cpp @@ -19,8 +19,8 @@ ModelPackageManifest legacyManifestForPath(const QString &sourcePath, const Mode manifest.schemaVersion = 1; manifest.metadata = metadata; manifest.compatibleEngines.push_back(ModelCompatibilityMarker{ - .engine = QStringLiteral("whisper.cpp"), - .modelFormat = QStringLiteral("ggml"), + .engine = legacyWhisperEngineName(), + .modelFormat = legacyWhisperModelFormat(), }); manifest.assets.push_back(ModelAssetMetadata{ .role = QStringLiteral("weights"), @@ -62,8 +62,8 @@ std::optional ModelCatalog::inspectPath(const QString &pa return std::nullopt; } - if ((!requiredEngine.isEmpty() && requiredEngine != QStringLiteral("whisper.cpp")) - || (!requiredModelFormat.isEmpty() && requiredModelFormat != QStringLiteral("ggml"))) { + if ((!requiredEngine.isEmpty() && requiredEngine != legacyWhisperEngineName()) + || (!requiredModelFormat.isEmpty() && requiredModelFormat != legacyWhisperModelFormat())) { if (error != nullptr) { *error = makeRuntimeError(RuntimeErrorCode::IncompatibleModelPackage, QStringLiteral("Raw Whisper compatibility artifact does not match the active runtime")); diff --git a/src/transcription/modelpackage.cpp b/src/transcription/modelpackage.cpp index ff835e6..9fcd2ca 100644 --- a/src/transcription/modelpackage.cpp +++ b/src/transcription/modelpackage.cpp @@ -1,5 +1,6 @@ #include "transcription/modelpackage.h" +#include #include #include #include @@ -130,6 +131,33 @@ QString sanitizePackageId(const QString &value) return sanitized; } +QString cpuReferenceEngineName() +{ + return QStringLiteral("mutterkey.cpu-reference"); +} + +QString cpuReferenceModelFormat() +{ + return QStringLiteral("mkasr-v1"); +} + +QString legacyWhisperEngineName() +{ + return QStringLiteral("whisper.cpp"); +} + +QString legacyWhisperModelFormat() +{ + return QStringLiteral("ggml"); +} + +bool modelPackageSupportsCompatibility(const ModelPackageManifest &manifest, QStringView engine, QStringView modelFormat) +{ + return std::ranges::any_of(manifest.compatibleEngines, [engine, modelFormat](const ModelCompatibilityMarker &marker) { + return marker.engine == engine && marker.modelFormat == modelFormat; + }); +} + QJsonObject modelPackageManifestToJson(const ModelPackageManifest &manifest) { QJsonObject root; diff --git a/src/transcription/modelpackage.h b/src/transcription/modelpackage.h index 4e42ac8..287a15e 100644 --- a/src/transcription/modelpackage.h +++ b/src/transcription/modelpackage.h @@ -88,6 +88,41 @@ struct ValidatedModelPackage { [[nodiscard]] QString description() const; }; +/** + * @brief Stable engine identifier for the native CPU reference runtime. + * @return Product-owned engine marker recorded in package manifests. + */ +[[nodiscard]] QString cpuReferenceEngineName(); + +/** + * @brief Stable model-format identifier for the native CPU reference runtime. + * @return Product-owned model-format marker recorded in package manifests. + */ +[[nodiscard]] QString cpuReferenceModelFormat(); + +/** + * @brief Stable engine identifier for the legacy whisper.cpp adapter. + * @return Legacy engine marker recorded in package manifests. + */ +[[nodiscard]] QString legacyWhisperEngineName(); + +/** + * @brief Stable model-format identifier for the legacy whisper.cpp adapter. + * @return Legacy model-format marker recorded in package manifests. + */ +[[nodiscard]] QString legacyWhisperModelFormat(); + +/** + * @brief Reports whether a manifest advertises compatibility with a runtime marker pair. + * @param manifest Parsed model package manifest. + * @param engine Stable engine identifier to match. + * @param modelFormat Stable model-format marker to match. + * @return `true` when the manifest contains a matching compatibility marker. + */ +[[nodiscard]] bool modelPackageSupportsCompatibility(const ModelPackageManifest &manifest, + QStringView engine, + QStringView modelFormat); + /** * @brief Returns the default root directory for native model packages. * @return Default package directory under the app data root. diff --git a/src/transcription/modelvalidator.cpp b/src/transcription/modelvalidator.cpp index 6aca76c..d50d499 100644 --- a/src/transcription/modelvalidator.cpp +++ b/src/transcription/modelvalidator.cpp @@ -85,6 +85,10 @@ bool hasRequiredCompatibility(const ModelPackageManifest &manifest, QStringView return true; } + if (!engine.isEmpty() && !modelFormat.isEmpty()) { + return modelPackageSupportsCompatibility(manifest, engine, modelFormat); + } + return std::ranges::any_of(manifest.compatibleEngines, [engine, modelFormat](const ModelCompatibilityMarker &marker) { const bool engineMatches = engine.isEmpty() || marker.engine == engine; const bool formatMatches = modelFormat.isEmpty() || marker.modelFormat == modelFormat; diff --git a/src/transcription/rawwhisperimporter.cpp b/src/transcription/rawwhisperimporter.cpp index e819ed1..6720a3e 100644 --- a/src/transcription/rawwhisperimporter.cpp +++ b/src/transcription/rawwhisperimporter.cpp @@ -130,8 +130,8 @@ std::optional RawWhisperImporter::importFile(const QStrin manifest.schemaVersion = 1; manifest.metadata = *metadata; manifest.compatibleEngines.push_back(ModelCompatibilityMarker{ - .engine = QStringLiteral("whisper.cpp"), - .modelFormat = QStringLiteral("ggml"), + .engine = legacyWhisperEngineName(), + .modelFormat = legacyWhisperModelFormat(), }); manifest.assets.push_back(ModelAssetMetadata{ .role = QStringLiteral("weights"), @@ -163,5 +163,5 @@ std::optional RawWhisperImporter::importFile(const QStrin return std::nullopt; } - return ModelValidator::validatePackagePath(packageDirectory, QStringLiteral("whisper.cpp"), QStringLiteral("ggml"), error); + return ModelValidator::validatePackagePath(packageDirectory, legacyWhisperEngineName(), legacyWhisperModelFormat(), error); } diff --git a/src/transcription/runtimeselector.cpp b/src/transcription/runtimeselector.cpp new file mode 100644 index 0000000..046d1bf --- /dev/null +++ b/src/transcription/runtimeselector.cpp @@ -0,0 +1,59 @@ +#include "transcription/runtimeselector.h" + +#include "transcription/modelcatalog.h" + +#include + +namespace { + +RuntimeSelection makeCpuSelection(QString reason, std::optional package = std::nullopt) +{ + return RuntimeSelection{ + .kind = RuntimeSelectionKind::CpuReference, + .reason = std::move(reason), + .inspectedPackage = std::move(package), + }; +} + +RuntimeSelection makeLegacySelection(QString reason, std::optional package = std::nullopt) +{ + return RuntimeSelection{ + .kind = RuntimeSelectionKind::LegacyWhisper, + .reason = std::move(reason), + .inspectedPackage = std::move(package), + }; +} + +} // namespace + +RuntimeSelection selectRuntimeForConfig(const TranscriberConfig &config) +{ + const std::optional package = ModelCatalog::inspectPath(config.modelPath); + if (!package.has_value()) { +#if defined(MUTTERKEY_WITH_LEGACY_WHISPER) + return makeLegacySelection(QStringLiteral("Falling back to legacy whisper runtime because the model path could not be inspected")); +#else + return makeCpuSelection(QStringLiteral("Using native CPU reference runtime because legacy whisper support is disabled")); +#endif + } + + if (modelPackageSupportsCompatibility(package->manifest, cpuReferenceEngineName(), cpuReferenceModelFormat())) { + return makeCpuSelection(QStringLiteral("Selected native CPU reference runtime from package compatibility markers"), package); + } + + if (modelPackageSupportsCompatibility(package->manifest, legacyWhisperEngineName(), legacyWhisperModelFormat())) { +#if defined(MUTTERKEY_WITH_LEGACY_WHISPER) + return makeLegacySelection(QStringLiteral("Selected legacy whisper runtime from package compatibility markers"), package); +#else + return makeCpuSelection(QStringLiteral("Using native CPU reference runtime because legacy whisper support is disabled"), package); +#endif + } + +#if defined(MUTTERKEY_WITH_LEGACY_WHISPER) + return makeLegacySelection(QStringLiteral("Falling back to legacy whisper runtime because the package markers are not yet recognized by the native selector"), + package); +#else + return makeCpuSelection(QStringLiteral("Using native CPU reference runtime because no compatible runtime markers were recognized"), + package); +#endif +} diff --git a/src/transcription/runtimeselector.h b/src/transcription/runtimeselector.h new file mode 100644 index 0000000..d46aae4 --- /dev/null +++ b/src/transcription/runtimeselector.h @@ -0,0 +1,39 @@ +#pragma once + +#include "config.h" +#include "transcription/modelpackage.h" + +#include +#include + +/** + * @file + * @brief Runtime-selection policy for product-owned and legacy transcription engines. + */ + +/** + * @brief Stable runtime families selectable by the app-owned factory. + */ +enum class RuntimeSelectionKind : std::uint8_t { + CpuReference, + LegacyWhisper, +}; + +/** + * @brief Result of selecting a runtime for a configured model path. + */ +struct RuntimeSelection { + /// Chosen runtime implementation. + RuntimeSelectionKind kind = RuntimeSelectionKind::CpuReference; + /// Human-readable reason suitable for diagnostics. + QString reason; + /// Best-effort inspected package used to make the decision. + std::optional inspectedPackage; +}; + +/** + * @brief Chooses the runtime implementation for a transcription config. + * @param config Runtime configuration containing the model path. + * @return Product-owned selection result with a diagnostic reason. + */ +[[nodiscard]] RuntimeSelection selectRuntimeForConfig(const TranscriberConfig &config); diff --git a/src/transcription/transcriptionengine.cpp b/src/transcription/transcriptionengine.cpp index bfd0a58..913d8b4 100644 --- a/src/transcription/transcriptionengine.cpp +++ b/src/transcription/transcriptionengine.cpp @@ -1,8 +1,22 @@ #include "transcription/transcriptionengine.h" +#include "transcription/cpureferencetranscriber.h" +#include "transcription/runtimeselector.h" + +#if defined(MUTTERKEY_WITH_LEGACY_WHISPER) #include "transcription/whispercpptranscriber.h" +#endif std::shared_ptr createTranscriptionEngine(const TranscriberConfig &config) { + const RuntimeSelection selection = selectRuntimeForConfig(config); + if (selection.kind == RuntimeSelectionKind::CpuReference) { + return createCpuReferenceTranscriptionEngine(config); + } + +#if defined(MUTTERKEY_WITH_LEGACY_WHISPER) return createWhisperCppTranscriptionEngine(config); +#else + return createCpuReferenceTranscriptionEngine(config); +#endif } diff --git a/src/transcription/transcriptiontypes.h b/src/transcription/transcriptiontypes.h index ae0964c..5abba77 100644 --- a/src/transcription/transcriptiontypes.h +++ b/src/transcription/transcriptiontypes.h @@ -72,6 +72,8 @@ struct BackendCapabilities { struct RuntimeDiagnostics { /// Stable backend identifier used in diagnostics. QString backendName; + /// Human-readable explanation for why this runtime was selected. + QString selectionReason; /// Human-readable runtime and device summary. QString runtimeDescription; /// Loaded-model description when a model is available. diff --git a/src/transcription/whispercpptranscriber.cpp b/src/transcription/whispercpptranscriber.cpp index aa87d7d..735caab 100644 --- a/src/transcription/whispercpptranscriber.cpp +++ b/src/transcription/whispercpptranscriber.cpp @@ -1,6 +1,7 @@ #include "transcription/whispercpptranscriber.h" #include "transcription/modelcatalog.h" +#include "transcription/runtimeselector.h" #include #include @@ -170,7 +171,7 @@ std::shared_ptr WhisperCppTranscriber::loadModelHandle(const TranscriberConfig &config, RuntimeError *error) { const std::optional package = - ModelCatalog::inspectPath(config.modelPath, QStringLiteral("whisper.cpp"), QStringLiteral("ggml"), error); + ModelCatalog::inspectPath(config.modelPath, legacyWhisperEngineName(), legacyWhisperModelFormat(), error); if (!package.has_value()) { return nullptr; } @@ -246,6 +247,7 @@ RuntimeDiagnostics WhisperCppTranscriber::diagnosticsStatic() { return RuntimeDiagnostics{ .backendName = backendNameStatic(), + .selectionReason = QStringLiteral("Legacy whisper runtime selected explicitly"), .runtimeDescription = describeRegisteredBackends(), .loadedModelDescription = {}, }; @@ -432,7 +434,9 @@ class WhisperCppTranscriptionEngine final : public TranscriptionEngine [[nodiscard]] RuntimeDiagnostics diagnostics() const override { - return WhisperCppTranscriber::diagnosticsStatic(); + RuntimeDiagnostics diagnostics = WhisperCppTranscriber::diagnosticsStatic(); + diagnostics.selectionReason = selectRuntimeForConfig(m_config).reason; + return diagnostics; } [[nodiscard]] std::shared_ptr loadModel(RuntimeError *error) const override diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 05f6df8..57386a3 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -65,16 +65,35 @@ mutterkey_add_qt_test(transcriptionworkertest ../src/transcription/transcriptionworker.cpp ) -mutterkey_add_qt_test(whispercpptranscribertest - whispercpptranscribertest.cpp +mutterkey_add_qt_test(cpureferencetranscribertest + cpureferencetranscribertest.cpp + ../src/transcription/cpureferencemodel.cpp ../src/transcription/modelcatalog.cpp ../src/transcription/transcriptionengine.cpp ../src/transcription/modelpackage.cpp ../src/transcription/modelvalidator.cpp + ../src/transcription/cpureferencetranscriber.cpp ../src/transcription/rawwhisperprobe.cpp - ../src/transcription/whispercpptranscriber.cpp + ../src/transcription/runtimeselector.cpp ) +if(MUTTERKEY_ENABLE_LEGACY_WHISPER) + mutterkey_add_qt_test(whispercpptranscribertest + whispercpptranscribertest.cpp + ../src/transcription/cpureferencemodel.cpp + ../src/transcription/modelcatalog.cpp + ../src/transcription/transcriptionengine.cpp + ../src/transcription/modelpackage.cpp + ../src/transcription/modelvalidator.cpp + ../src/transcription/rawwhisperprobe.cpp + ../src/transcription/cpureferencetranscriber.cpp + ../src/transcription/runtimeselector.cpp + ../src/transcription/whispercpptranscriber.cpp + ) + target_compile_definitions(whispercpptranscribertest PRIVATE MUTTERKEY_WITH_LEGACY_WHISPER) + target_link_libraries(whispercpptranscribertest PRIVATE whisper) +endif() + mutterkey_add_qt_test(platformlogicstest platformlogicstest.cpp ) @@ -90,16 +109,21 @@ mutterkey_add_qt_test(traystatuswindowtest target_link_libraries(daemoncontrolclientservertest PRIVATE mutterkey_control) target_link_libraries(platformlogicstest PRIVATE mutterkey_core) -target_link_libraries(whispercpptranscribertest PRIVATE whisper) add_test(NAME testcommentarycheck COMMAND bash ${CMAKE_CURRENT_SOURCE_DIR}/../scripts/check-test-commentary.sh ${CMAKE_CURRENT_SOURCE_DIR}/.. ) if(TARGET clang-tidy) - add_dependencies(clang-tidy configtest_autogen commanddispatchtest_autogen recordingnormalizertest_autogen streamingtranscriptiontest_autogen modelpackagetest_autogen daemoncontrolprotocoltest_autogen daemoncontroltypestest_autogen daemoncontrolclientservertest_autogen transcriptionworkertest_autogen whispercpptranscribertest_autogen platformlogicstest_autogen traystatuswindowtest_autogen) + add_dependencies(clang-tidy configtest_autogen commanddispatchtest_autogen recordingnormalizertest_autogen streamingtranscriptiontest_autogen modelpackagetest_autogen daemoncontrolprotocoltest_autogen daemoncontroltypestest_autogen daemoncontrolclientservertest_autogen transcriptionworkertest_autogen cpureferencetranscribertest_autogen platformlogicstest_autogen traystatuswindowtest_autogen) + if(TARGET whispercpptranscribertest_autogen) + add_dependencies(clang-tidy whispercpptranscribertest_autogen) + endif() endif() if(TARGET clazy) - add_dependencies(clazy configtest_autogen commanddispatchtest_autogen recordingnormalizertest_autogen streamingtranscriptiontest_autogen modelpackagetest_autogen daemoncontrolprotocoltest_autogen daemoncontroltypestest_autogen daemoncontrolclientservertest_autogen transcriptionworkertest_autogen whispercpptranscribertest_autogen platformlogicstest_autogen traystatuswindowtest_autogen) + add_dependencies(clazy configtest_autogen commanddispatchtest_autogen recordingnormalizertest_autogen streamingtranscriptiontest_autogen modelpackagetest_autogen daemoncontrolprotocoltest_autogen daemoncontroltypestest_autogen daemoncontrolclientservertest_autogen transcriptionworkertest_autogen cpureferencetranscribertest_autogen platformlogicstest_autogen traystatuswindowtest_autogen) + if(TARGET whispercpptranscribertest_autogen) + add_dependencies(clazy whispercpptranscribertest_autogen) + endif() endif() diff --git a/tests/cpureferencetranscribertest.cpp b/tests/cpureferencetranscribertest.cpp new file mode 100644 index 0000000..1ac57c5 --- /dev/null +++ b/tests/cpureferencetranscribertest.cpp @@ -0,0 +1,259 @@ +#include "transcription/cpureferencetranscriber.h" +#include "transcription/modelpackage.h" +#include "transcription/transcriptionengine.h" + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +namespace { + +constexpr std::array kCpuReferenceMagic{'M', 'K', 'C', 'P', 'U', 'R', '1', '\0'}; +constexpr std::uint32_t kCpuReferenceVersion = 1; + +class CpuReferenceTranscriberTest final : public QObject +{ + Q_OBJECT + +private slots: + void engineDispatchesToNativeCpuPackage(); + void nativeCpuRuntimeEmitsDeterministicTranscript(); + void nativeCpuRuntimeRejectsTranslationMode(); + void engineDiagnosticsExplainNativeSelection(); +}; + +template +bool writePod(QFile *file, const T &value) +{ + if (file == nullptr) { + return false; + } + + QByteArray bytes(static_cast(sizeof(T)), '\0'); + std::memcpy(bytes.data(), &value, sizeof(T)); + return file->write(bytes) == bytes.size(); +} + +struct CpuReferenceFixtureFile { + QString path; + QString transcript; +}; + +struct CpuReferenceFixturePackage { + QString packageRoot; + QString transcript; +}; + +QString sha256ForFile(const QString &path) +{ + QFile file(path); + if (!file.open(QIODevice::ReadOnly)) { + return {}; + } + + QCryptographicHash hash(QCryptographicHash::Sha256); + hash.addData(&file); + return QString::fromLatin1(hash.result().toHex()); +} + +bool writeCpuReferenceModel(const CpuReferenceFixtureFile &request) +{ + QFile file(request.path); + if (!file.open(QIODevice::WriteOnly)) { + return false; + } + + if (file.write(kCpuReferenceMagic.data(), static_cast(kCpuReferenceMagic.size())) + != static_cast(kCpuReferenceMagic.size())) { + return false; + } + + const QByteArray transcriptUtf8 = request.transcript.toUtf8(); + const auto transcriptBytes = static_cast(transcriptUtf8.size()); + return writePod(&file, kCpuReferenceVersion) + && writePod(&file, transcriptBytes) + && file.write(transcriptUtf8) == transcriptUtf8.size(); +} + +bool writeCpuReferencePackage(const CpuReferenceFixturePackage &request) +{ + const QDir root; + if (!root.mkpath(QDir(request.packageRoot).filePath(QStringLiteral("assets")))) { + return false; + } + + const QString weightsPath = QDir(request.packageRoot).filePath(QStringLiteral("assets/model.mkcpu")); + if (!writeCpuReferenceModel(CpuReferenceFixtureFile{ + .path = weightsPath, + .transcript = request.transcript, + })) { + return false; + } + + ModelPackageManifest manifest; + manifest.format = QStringLiteral("mutterkey.model-package"); + manifest.schemaVersion = 1; + manifest.metadata.packageId = QStringLiteral("fixture-native-cpu"); + manifest.metadata.displayName = QStringLiteral("Fixture Native CPU"); + manifest.metadata.runtimeFamily = QStringLiteral("asr"); + manifest.metadata.sourceFormat = QStringLiteral("mutterkey-native"); + manifest.metadata.modelFormat = cpuReferenceModelFormat(); + manifest.metadata.architecture = QStringLiteral("reference-fixture"); + manifest.metadata.languageProfile = QStringLiteral("en"); + manifest.metadata.tokenizer = QStringLiteral("builtin"); + manifest.compatibleEngines.push_back(ModelCompatibilityMarker{ + .engine = cpuReferenceEngineName(), + .modelFormat = cpuReferenceModelFormat(), + }); + manifest.assets.push_back(ModelAssetMetadata{ + .role = QStringLiteral("weights"), + .relativePath = QStringLiteral("assets/model.mkcpu"), + .sha256 = sha256ForFile(weightsPath), + .sizeBytes = QFileInfo(weightsPath).size(), + }); + + QFile manifestFile(QDir(request.packageRoot).filePath(QStringLiteral("model.json"))); + if (!manifestFile.open(QIODevice::WriteOnly | QIODevice::Text)) { + return false; + } + + manifestFile.write(QJsonDocument(modelPackageManifestToJson(manifest)).toJson(QJsonDocument::Indented)); + return true; +} + +AudioChunk validChunk() +{ + return AudioChunk{ + .samples = {0.0F, 0.05F, -0.04F, 0.03F, -0.02F, 0.01F, -0.03F, 0.02F}, + .sampleRate = 16000, + .channels = 1, + .streamOffsetFrames = 0, + }; +} + +} // namespace + +void CpuReferenceTranscriberTest::engineDispatchesToNativeCpuPackage() +{ + // WHAT: Verify that the generic engine factory chooses the native CPU runtime for a native package. + // HOW: Create a temporary package marked for the product-owned CPU runtime and inspect the backend name exposed by the factory. + // WHY: Phase 5 only starts decoupling from whisper.cpp once the app-level factory routes native packages to the new runtime contract. + const QTemporaryDir tempDir; + QVERIFY(tempDir.isValid()); + const QString packageRoot = tempDir.filePath(QStringLiteral("native-package")); + QVERIFY(writeCpuReferencePackage(CpuReferenceFixturePackage{ + .packageRoot = packageRoot, + .transcript = QStringLiteral("hello from cpu runtime"), + })); + + TranscriberConfig config; + config.modelPath = packageRoot; + + const std::shared_ptr engine = createTranscriptionEngine(config); + QVERIFY(engine != nullptr); + QCOMPARE(engine->capabilities().backendName, cpuReferenceEngineName()); +} + +void CpuReferenceTranscriberTest::nativeCpuRuntimeEmitsDeterministicTranscript() +{ + // WHAT: Verify that the native CPU runtime loads a package and emits a deterministic final transcript. + // HOW: Build a temporary native package whose weights payload embeds a fixed transcript, then push one valid chunk and finish the session. + // WHY: The Phase 5 reference runtime needs deterministic model/session behavior so conformance tests can pin transcript and timestamp behavior without third-party state. + const QTemporaryDir tempDir; + QVERIFY(tempDir.isValid()); + const QString packageRoot = tempDir.filePath(QStringLiteral("native-package")); + QVERIFY(writeCpuReferencePackage(CpuReferenceFixturePackage{ + .packageRoot = packageRoot, + .transcript = QStringLiteral("hello from cpu runtime"), + })); + + TranscriberConfig config; + config.modelPath = packageRoot; + + RuntimeError error; + const std::shared_ptr model = CpuReferenceTranscriber::loadModelHandle(config, &error); + QVERIFY(model != nullptr); + QVERIFY(error.isOk()); + + std::unique_ptr session = CpuReferenceTranscriber::createSession(config, model); + QVERIFY(session != nullptr); + QVERIFY(session->warmup(&error)); + QVERIFY(error.isOk()); + + const TranscriptUpdate pushUpdate = session->pushAudioChunk(validChunk()); + QVERIFY(pushUpdate.isOk()); + + const TranscriptUpdate finishUpdate = session->finish(); + QVERIFY(finishUpdate.isOk()); + QCOMPARE(finishUpdate.events.size(), static_cast(1)); + QCOMPARE(finishUpdate.events.front().kind, TranscriptEventKind::Final); + QCOMPARE(finishUpdate.events.front().text, QStringLiteral("hello from cpu runtime")); + QCOMPARE(finishUpdate.events.front().startMs, 0); + QVERIFY(finishUpdate.events.front().endMs > 0); +} + +void CpuReferenceTranscriberTest::nativeCpuRuntimeRejectsTranslationMode() +{ + // WHAT: Verify that the native CPU runtime rejects translation mode in its narrow Phase 5 scope. + // HOW: Create a temporary native package, enable translate in config, and finish a session after pushing one valid chunk. + // WHY: The first product-owned CPU runtime is intentionally ASR-only, so unsupported feature requests must fail explicitly instead of silently diverging from the contract. + const QTemporaryDir tempDir; + QVERIFY(tempDir.isValid()); + const QString packageRoot = tempDir.filePath(QStringLiteral("native-package")); + QVERIFY(writeCpuReferencePackage(CpuReferenceFixturePackage{ + .packageRoot = packageRoot, + .transcript = QStringLiteral("hello from cpu runtime"), + })); + + TranscriberConfig config; + config.modelPath = packageRoot; + config.translate = true; + + RuntimeError error; + const std::shared_ptr model = CpuReferenceTranscriber::loadModelHandle(config, &error); + QVERIFY(model != nullptr); + QVERIFY(error.isOk()); + + std::unique_ptr session = CpuReferenceTranscriber::createSession(config, model); + QVERIFY(session != nullptr); + QVERIFY(session->pushAudioChunk(validChunk()).isOk()); + + const TranscriptUpdate finishUpdate = session->finish(); + QVERIFY(!finishUpdate.isOk()); + QCOMPARE(finishUpdate.error.code, RuntimeErrorCode::InvalidConfig); +} + +void CpuReferenceTranscriberTest::engineDiagnosticsExplainNativeSelection() +{ + // WHAT: Verify that native-runtime diagnostics explain why the generic factory chose the native backend. + // HOW: Create a temporary native package, construct the generic engine, and inspect the selection reason surfaced in runtime diagnostics. + // WHY: Once runtime policy is moved out of the factory body, diagnostics should preserve that policy decision so diagnose output remains actionable. + const QTemporaryDir tempDir; + QVERIFY(tempDir.isValid()); + const QString packageRoot = tempDir.filePath(QStringLiteral("native-package")); + QVERIFY(writeCpuReferencePackage(CpuReferenceFixturePackage{ + .packageRoot = packageRoot, + .transcript = QStringLiteral("hello from cpu runtime"), + })); + + TranscriberConfig config; + config.modelPath = packageRoot; + + const std::shared_ptr engine = createTranscriptionEngine(config); + QVERIFY(engine != nullptr); + QCOMPARE(engine->diagnostics().backendName, cpuReferenceEngineName()); + QVERIFY(engine->diagnostics().selectionReason.contains(QStringLiteral("native CPU reference runtime"), + Qt::CaseInsensitive)); +} + +QTEST_APPLESS_MAIN(CpuReferenceTranscriberTest) + +#include "cpureferencetranscribertest.moc" diff --git a/tests/transcriptionworkertest.cpp b/tests/transcriptionworkertest.cpp index cedd52f..5290996 100644 --- a/tests/transcriptionworkertest.cpp +++ b/tests/transcriptionworkertest.cpp @@ -36,6 +36,7 @@ RuntimeDiagnostics fakeDiagnostics() { return RuntimeDiagnostics{ .backendName = QStringLiteral("fake"), + .selectionReason = QStringLiteral("fake selection"), .runtimeDescription = QStringLiteral("fake runtime"), .loadedModelDescription = {}, }; diff --git a/tests/whispercpptranscribertest.cpp b/tests/whispercpptranscribertest.cpp index aa59c35..09c2314 100644 --- a/tests/whispercpptranscribertest.cpp +++ b/tests/whispercpptranscribertest.cpp @@ -12,6 +12,7 @@ class WhisperCppTranscriberTest final : public QObject private slots: void whisperEngineSurfacesMissingModelAtLoadTime(); void whisperRuntimeRejectsUnsupportedLanguage(); + void legacyEngineDiagnosticsExplainFallback(); }; } // namespace @@ -54,6 +55,21 @@ void WhisperCppTranscriberTest::whisperRuntimeRejectsUnsupportedLanguage() QVERIFY(update.error.message.contains(QStringLiteral("pirate"))); } +void WhisperCppTranscriberTest::legacyEngineDiagnosticsExplainFallback() +{ + // WHAT: Verify that the generic engine surfaces a legacy-selection reason when the model path cannot be inspected. + // HOW: Construct the generic engine with a definitely missing model path and inspect the reported runtime diagnostics. + // WHY: The runtime selector should explain why it fell back to the legacy backend so diagnose output can distinguish policy fallback from normal compatibility routing. + TranscriberConfig config; + config.modelPath = QStringLiteral("/tmp/definitely-missing-mutterkey-model.bin"); + + const std::shared_ptr engine = createTranscriptionEngine(config); + QVERIFY(engine != nullptr); + QCOMPARE(engine->diagnostics().backendName, QStringLiteral("whisper.cpp")); + QVERIFY(engine->diagnostics().selectionReason.contains(QStringLiteral("legacy whisper runtime"), + Qt::CaseInsensitive)); +} + QTEST_APPLESS_MAIN(WhisperCppTranscriberTest) #include "whispercpptranscribertest.moc"