From b18a26225cd38316a8cb3e01397137604f3e2e48 Mon Sep 17 00:00:00 2001
From: Niko Nousiainen <nousiainen.niko@gmail.com>
Date: Sat, 28 Mar 2026 12:05:23 +0200
Subject: [PATCH] More detaching model handling from whisper.cpp to mutterkey
 side

---
 AGENTS.md                                     |  47 ++-
 CMakeLists.txt                                |  94 +++---
 README.md                                     |   5 +
 RELEASE_CHECKLIST.md                          |  20 +-
 docs/mainpage.md                              |  18 +-
 src/service.cpp                               |   1 +
 src/transcription/cpureferencemodel.cpp       | 153 ++++++++++
 src/transcription/cpureferencemodel.h         |  82 +++++
 src/transcription/cpureferencetranscriber.cpp | 280 ++++++++++++++++++
 src/transcription/cpureferencetranscriber.h   |  95 ++++++
 src/transcription/modelcatalog.cpp            |   8 +-
 src/transcription/modelpackage.cpp            |  28 ++
 src/transcription/modelpackage.h              |  35 +++
 src/transcription/modelvalidator.cpp          |   4 +
 src/transcription/rawwhisperimporter.cpp      |   6 +-
 src/transcription/runtimeselector.cpp         |  59 ++++
 src/transcription/runtimeselector.h           |  39 +++
 src/transcription/transcriptionengine.cpp     |  14 +
 src/transcription/transcriptiontypes.h        |   2 +
 src/transcription/whispercpptranscriber.cpp   |   8 +-
 tests/CMakeLists.txt                          |  36 ++-
 tests/cpureferencetranscribertest.cpp         | 259 ++++++++++++++++
 tests/transcriptionworkertest.cpp             |   1 +
 tests/whispercpptranscribertest.cpp           |  16 +
 24 files changed, 1252 insertions(+), 58 deletions(-)
 create mode 100644 src/transcription/cpureferencemodel.cpp
 create mode 100644 src/transcription/cpureferencemodel.h
 create mode 100644 src/transcription/cpureferencetranscriber.cpp
 create mode 100644 src/transcription/cpureferencetranscriber.h
 create mode 100644 src/transcription/runtimeselector.cpp
 create mode 100644 src/transcription/runtimeselector.h
 create mode 100644 tests/cpureferencetranscribertest.cpp

diff --git a/AGENTS.md b/AGENTS.md
index a4fc376..5c9f9b4 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -8,7 +8,13 @@ Current architecture:
 
 - Global shortcut handling goes through `KGlobalAccel`
 - Audio capture uses Qt Multimedia
-- Transcription is in-process through vendored `whisper.cpp`
+- Transcription goes through an app-owned runtime seam with explicit runtime
+  selection
+- A product-owned native CPU reference runtime scaffold now exists alongside
+  the legacy whisper adapter
+- `whisper.cpp` is still the only real end-user speech decoder today, but the
+  vendored runtime is now optional at build time through
+  `MUTTERKEY_ENABLE_LEGACY_WHISPER=OFF`
 - Native Mutterkey model packages are now the canonical model artifact; raw
   whisper.cpp-compatible `.bin` files remain only as a migration/import path
 - The public runtime seam is streaming-first through app-owned chunks, events, and compatibility helpers
@@ -22,7 +28,9 @@ Current architecture:
 This repository is intentionally kept minimal:
 
 - CMake is the only supported build system
-- `whisper.cpp` is the only supported transcription backend
+- `whisper.cpp` remains a vendored legacy backend, but new runtime ownership
+  work should prefer the product-owned native CPU path and selector/model-loader
+  seams first
 - Keep the repo free of generated build output
 - Keep publication-facing files free of machine-specific paths and broken local links
 - Do not reintroduce legacy qmake or external-command transcription paths unless explicitly requested
@@ -37,11 +45,14 @@ This repository is intentionally kept minimal:
 - `src/clipboardwriter.*`: clipboard integration, preferring KDE system clipboard support
 - `src/audio/recordingnormalizer.*`: conversion to runtime-ready mono `float32` at `16 kHz`
 - `src/transcription/audiochunker.*`: deterministic chunking of normalized audio for the streaming runtime path
+- `src/transcription/cpureferencemodel.*`: product-owned native CPU reference model header/parser and immutable model-handle loading
+- `src/transcription/cpureferencetranscriber.*`: native CPU reference runtime scaffold behind the app-owned engine/session seam
 - `src/transcription/modelpackage.*`: product-owned manifest and validated package value types
 - `src/transcription/modelvalidator.*`: package integrity, compatibility, and bounds validation
 - `src/transcription/modelcatalog.*`: model artifact inspection and resolution
 - `src/transcription/rawwhisperprobe.*`: lightweight raw whisper.cpp header inspection used for migration compatibility
 - `src/transcription/rawwhisperimporter.*`: import path from raw Whisper `.bin` files into native Mutterkey packages
+- `src/transcription/runtimeselector.*`: app-owned runtime-selection policy and diagnostic reasoning
 - `src/transcription/transcriptassembler.*`: final transcript assembly from streaming transcript events
 - `src/transcription/transcriptioncompat.*`: compatibility wrapper that routes one-shot recordings through the streaming runtime seam
 - `src/transcription/whispercpptranscriber.*`: in-process Whisper integration and whisper-specific engine construction
@@ -86,6 +97,13 @@ cmake -S . -B "$BUILD_DIR" -G Ninja
 cmake --build "$BUILD_DIR" -j"$(nproc)"
 ```
 
+To validate the native-runtime-only path without vendored `whisper.cpp` /
+`ggml`, configure with:
+
+```bash
+cmake -S . -B "$BUILD_DIR" -G Ninja -DMUTTERKEY_ENABLE_LEGACY_WHISPER=OFF
+```
+
 If a sandboxed build fails with `ccache: error: Read-only file system`, treat
 that as an environment limitation rather than a repo regression and rerun the
 build with `CCACHE_DISABLE=1`.
@@ -136,6 +154,9 @@ Notes:
 - Use `bash scripts/check-release-hygiene.sh` when touching publication-facing files such as `README.md`, licenses, `contrib/`, CI, or helper scripts
 - Use `cmake --build "$BUILD_DIR" --target docs` when touching repo-owned public headers, Doxygen config, the Doxygen main page, or CI/docs wiring
 - If install rules or licensing files change, confirm the temporary install contains the expected files under `share/licenses/mutterkey`
+- If a task changes runtime selection, native model loading, or legacy-whisper
+  build toggles, validate at least one `MUTTERKEY_ENABLE_LEGACY_WHISPER=OFF`
+  build in addition to the normal default build
 - If you add or change public methods in repo-owned headers, expect `cmake --build "$BUILD_DIR" --target docs` to fail until the new API is documented; treat that as part of the normal implementation loop, not follow-up polish
 - Newly added repo-owned public structs and free functions in public headers also
   need Doxygen comments immediately; the `docs` target treats undocumented new
@@ -158,6 +179,12 @@ Notes:
 - When validating inside a restricted sandbox, be ready to disable `ccache` with `CCACHE_DISABLE=1` if the cache location is read-only; that is an execution-environment issue, not a Mutterkey build failure
 - Prefer fixing the code over weakening `.clang-tidy` or the Clazy check set; only relax tool config when the warning is clearly low-value for this repo
 - If `clang-tidy` flags a new small enum for `performance-enum-size`, prefer an explicit narrow underlying type such as `std::uint8_t` instead of suppressing the warning
+- If `clang-tidy` flags a small fixed binary header type, prefer
+  `std::array<std::byte, N>` or `std::array<char, N>` plus value
+  initialization over C-style arrays
+- When helper functions take two adjacent same-shaped parameters such as two
+  `QString` values, prefer a small request struct when that keeps tests and
+  runtime code from tripping `bugprone-easily-swappable-parameters`
 - In this Qt-heavy repo, treat `misc-include-cleaner` and `readability-redundant-access-specifiers` as low-value `clang-tidy` noise unless the underlying tool behavior improves; they conflict with Qt header-provider reality and `signals` / `slots` / `Q_SLOTS` sectioning more than they improve safety
 - Prefer anonymous-namespace `Q_LOGGING_CATEGORY` for file-local logging categories; `Q_STATIC_LOGGING_CATEGORY` is not portable enough across the Qt versions this repo may build against
 - Do not add broad Valgrind suppressions by default; only add narrow suppressions after reproducing stable third-party noise and keep them clearly scoped
@@ -183,7 +210,15 @@ Notes:
 - Keep JSON and other transport details at subsystem boundaries; prefer typed C++ snapshots/results once data crosses into app-owned control, tray, or service code
 - Prefer dependency injection for tray-shell and control-surface code from the first implementation so headless Qt tests stay simple
 - When preparing the transcription path for future runtime work, prefer app-owned engine/session seams and injected sessions over leaking concrete backend types into CLI, service, or worker orchestration. Keep immutable capability reporting on the engine side, keep runtime inspection data in `RuntimeDiagnostics`, and keep the session side focused on mutable decode state, warmup, chunk ingestion, finish, and cancellation
-- Prefer product-owned runtime interfaces, model/session separation, and deterministic backend selection before adding new inference backends or widening cross-platform support
+- Prefer product-owned runtime interfaces, model/session separation, explicit
+  runtime-selection policy, and deterministic backend selection before adding
+  new inference backends or widening cross-platform support
+- Keep runtime-selection policy in `src/transcription/runtimeselector.*`
+  instead of burying compatibility/fallback rules inside
+  `createTranscriptionEngine()`
+- Keep native model-format parsing and immutable model loading in
+  `src/transcription/cpureferencemodel.*` or similar app-owned loader code
+  rather than mixing artifact parsing into the mutable session implementation
 - Keep model validation, metadata extraction, and compatibility checks app-owned.
   `whisper.cpp` should not be the first component that tells Mutterkey whether a
   model artifact is obviously malformed, incompatible, or oversized
@@ -218,6 +253,9 @@ Apply the C++ Core Guidelines selectively and pragmatically. For this repo, the
 - Prefer resolving model-package, metadata, and import work entirely in app-owned
   code. Raw whisper.cpp `.bin` support is now a compatibility/import concern, not
   the canonical product contract
+- Prefer treating `whisper.cpp` as a legacy migration/parity dependency from
+  here forward. If new work can land in app-owned selector, model-loader,
+  native-runtime, or package code instead, do that first
 - Prefer keeping fake runtime tests and app-owned helpers free of vendored whisper linkage unless the test is specifically about the whisper adapter or engine factory
 - Prefer fixing vendored target metadata from the top-level CMake when the issue is Mutterkey packaging or warning noise, instead of patching upstream vendored files directly
 - If you must modify vendored code, document why in the final response and record the deviation in `third_party/whisper.cpp.UPSTREAM.md`
@@ -233,6 +271,9 @@ Apply the C++ Core Guidelines selectively and pragmatically. For this repo, the
   separate release asset outside Git
 - Do not introduce machine-specific home-directory paths, absolute local Markdown links, or generated build artifacts into tracked files
 - If a task changes install layout or shipped assets, keep the CMake install rules and license installs aligned with the new behavior
+- If a task changes whether legacy whisper support is installed, keep
+  `README.md`, `RELEASE_CHECKLIST.md`, `docs/mainpage.md`, install rules, and
+  license installs aligned with that choice
 - The installed shared-library payload is runtime-focused; do not start installing vendored upstream public headers unless the package contract intentionally changes
 
 ## Config Expectations
diff --git a/CMakeLists.txt b/CMakeLists.txt
index e2eacbb..2544dab 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -15,6 +15,7 @@ set(CMAKE_CXX_EXTENSIONS OFF)
 
 option(MUTTERKEY_ENABLE_ASAN "Enable AddressSanitizer for repo-owned code and vendored whisper.cpp" OFF)
 option(MUTTERKEY_ENABLE_UBSAN "Enable UndefinedBehaviorSanitizer for repo-owned code and vendored whisper.cpp" OFF)
+option(MUTTERKEY_ENABLE_LEGACY_WHISPER "Build the legacy whisper.cpp runtime for migration and parity validation" ON)
 option(MUTTERKEY_ENABLE_WHISPER_CUDA "Enable whisper.cpp CUDA backend support (NVIDIA)" OFF)
 option(MUTTERKEY_ENABLE_WHISPER_VULKAN "Enable whisper.cpp Vulkan backend support" OFF)
 option(MUTTERKEY_ENABLE_WHISPER_BLAS "Enable whisper.cpp BLAS CPU acceleration" OFF)
@@ -47,6 +48,10 @@ set(MUTTERKEY_CORE_SOURCES
     src/transcription/transcriptionengine.h
     src/transcription/audiochunker.cpp
     src/transcription/audiochunker.h
+    src/transcription/cpureferencemodel.cpp
+    src/transcription/cpureferencemodel.h
+    src/transcription/cpureferencetranscriber.cpp
+    src/transcription/cpureferencetranscriber.h
     src/transcription/modelcatalog.cpp
     src/transcription/modelcatalog.h
     src/transcription/modelpackage.cpp
@@ -57,16 +62,23 @@ set(MUTTERKEY_CORE_SOURCES
     src/transcription/rawwhisperimporter.h
     src/transcription/rawwhisperprobe.cpp
     src/transcription/rawwhisperprobe.h
+    src/transcription/runtimeselector.cpp
+    src/transcription/runtimeselector.h
     src/transcription/transcriptassembler.cpp
     src/transcription/transcriptassembler.h
     src/transcription/transcriptioncompat.cpp
     src/transcription/transcriptioncompat.h
     src/transcription/transcriptionworker.cpp
     src/transcription/transcriptionworker.h
-    src/transcription/whispercpptranscriber.cpp
-    src/transcription/whispercpptranscriber.h
 )
 
+if(MUTTERKEY_ENABLE_LEGACY_WHISPER)
+    list(APPEND MUTTERKEY_CORE_SOURCES
+        src/transcription/whispercpptranscriber.cpp
+        src/transcription/whispercpptranscriber.h
+    )
+endif()
+
 set(MUTTERKEY_CONTROL_SOURCES
     src/control/daemoncontrolclient.cpp
     src/control/daemoncontrolclient.h
@@ -114,6 +126,10 @@ set_target_properties(mutterkey-tray PROPERTIES
     INSTALL_RPATH "$ORIGIN/../lib"
 )
 
+if(MUTTERKEY_ENABLE_LEGACY_WHISPER)
+    target_compile_definitions(mutterkey_core PRIVATE MUTTERKEY_WITH_LEGACY_WHISPER)
+endif()
+
 function(mutterkey_enable_sanitizers target_name)
     if(NOT CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang|AppleClang")
         message(WARNING "Sanitizers were requested, but ${CMAKE_CXX_COMPILER_ID} is not configured for repo-owned sanitizer flags")
@@ -203,47 +219,53 @@ else()
     message(STATUS "Doxygen not found; the docs target will be unavailable")
 endif()
 
-if(NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/third_party/whisper.cpp/CMakeLists.txt")
-    message(FATAL_ERROR "Vendored whisper.cpp dependency is missing from third_party/whisper.cpp")
-endif()
-
-set(WHISPER_BUILD_TESTS OFF CACHE BOOL "" FORCE)
-set(WHISPER_BUILD_EXAMPLES OFF CACHE BOOL "" FORCE)
-set(WHISPER_BUILD_SERVER OFF CACHE BOOL "" FORCE)
-set(WHISPER_SANITIZE_ADDRESS ${MUTTERKEY_ENABLE_ASAN} CACHE BOOL "" FORCE)
-set(WHISPER_SANITIZE_UNDEFINED ${MUTTERKEY_ENABLE_UBSAN} CACHE BOOL "" FORCE)
-set(GGML_CUDA ${MUTTERKEY_ENABLE_WHISPER_CUDA} CACHE BOOL "" FORCE)
-set(GGML_VULKAN ${MUTTERKEY_ENABLE_WHISPER_VULKAN} CACHE BOOL "" FORCE)
-set(GGML_BLAS ${MUTTERKEY_ENABLE_WHISPER_BLAS} CACHE BOOL "" FORCE)
-set(GGML_BLAS_VENDOR ${MUTTERKEY_WHISPER_BLAS_VENDOR} CACHE STRING "" FORCE)
-add_subdirectory(third_party/whisper.cpp EXCLUDE_FROM_ALL)
-
-# Mutterkey ships the vendored shared libraries, but it does not install their
-# upstream public headers as part of its own package layout.
-set_target_properties(whisper ggml PROPERTIES PUBLIC_HEADER "")
+if(MUTTERKEY_ENABLE_LEGACY_WHISPER)
+    if(NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/third_party/whisper.cpp/CMakeLists.txt")
+        message(FATAL_ERROR "Vendored whisper.cpp dependency is missing from third_party/whisper.cpp")
+    endif()
 
-target_link_libraries(mutterkey_core PRIVATE whisper)
+    set(WHISPER_BUILD_TESTS OFF CACHE BOOL "" FORCE)
+    set(WHISPER_BUILD_EXAMPLES OFF CACHE BOOL "" FORCE)
+    set(WHISPER_BUILD_SERVER OFF CACHE BOOL "" FORCE)
+    set(WHISPER_SANITIZE_ADDRESS ${MUTTERKEY_ENABLE_ASAN} CACHE BOOL "" FORCE)
+    set(WHISPER_SANITIZE_UNDEFINED ${MUTTERKEY_ENABLE_UBSAN} CACHE BOOL "" FORCE)
+    set(GGML_CUDA ${MUTTERKEY_ENABLE_WHISPER_CUDA} CACHE BOOL "" FORCE)
+    set(GGML_VULKAN ${MUTTERKEY_ENABLE_WHISPER_VULKAN} CACHE BOOL "" FORCE)
+    set(GGML_BLAS ${MUTTERKEY_ENABLE_WHISPER_BLAS} CACHE BOOL "" FORCE)
+    set(GGML_BLAS_VENDOR ${MUTTERKEY_WHISPER_BLAS_VENDOR} CACHE STRING "" FORCE)
+    add_subdirectory(third_party/whisper.cpp EXCLUDE_FROM_ALL)
+
+    # Mutterkey ships the vendored shared libraries, but it does not install their
+    # upstream public headers as part of its own package layout.
+    set_target_properties(whisper ggml PROPERTIES PUBLIC_HEADER "")
+
+    target_link_libraries(mutterkey_core PRIVATE whisper)
+endif()
 
 install(TARGETS mutterkey RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
 install(TARGETS mutterkey-tray RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
-install(TARGETS whisper ggml ggml-base
-    LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
-)
-if(TARGET ggml-cpu)
-    install(TARGETS ggml-cpu LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR})
-endif()
-if(TARGET ggml-cuda)
-    install(TARGETS ggml-cuda LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR})
-endif()
-if(TARGET ggml-vulkan)
-    install(TARGETS ggml-vulkan LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR})
-endif()
-if(TARGET ggml-blas)
-    install(TARGETS ggml-blas LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR})
+if(MUTTERKEY_ENABLE_LEGACY_WHISPER)
+    install(TARGETS whisper ggml ggml-base
+        LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
+    )
+    if(TARGET ggml-cpu)
+        install(TARGETS ggml-cpu LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR})
+    endif()
+    if(TARGET ggml-cuda)
+        install(TARGETS ggml-cuda LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR})
+    endif()
+    if(TARGET ggml-vulkan)
+        install(TARGETS ggml-vulkan LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR})
+    endif()
+    if(TARGET ggml-blas)
+        install(TARGETS ggml-blas LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR})
+    endif()
 endif()
 install(FILES contrib/org.mutterkey.mutterkey.desktop DESTINATION ${CMAKE_INSTALL_DATADIR}/applications)
 install(FILES LICENSE THIRD_PARTY_NOTICES.md DESTINATION ${MUTTERKEY_LICENSE_INSTALL_DIR})
-install(FILES third_party/whisper.cpp/LICENSE DESTINATION ${MUTTERKEY_LICENSE_INSTALL_DIR}/third_party/whisper.cpp)
+if(MUTTERKEY_ENABLE_LEGACY_WHISPER)
+    install(FILES third_party/whisper.cpp/LICENSE DESTINATION ${MUTTERKEY_LICENSE_INSTALL_DIR}/third_party/whisper.cpp)
+endif()
 
 if(BUILD_TESTING)
     find_package(Qt6 REQUIRED COMPONENTS Test)
diff --git a/README.md b/README.md
index 8c57c67..ca39424 100644
--- a/README.md
+++ b/README.md
@@ -131,6 +131,10 @@ This installs:
 - `~/.local/lib/libwhisper.so*` and the required `ggml` libraries
 - `~/.local/share/applications/org.mutterkey.mutterkey.desktop`
 
+If you configure with `-DMUTTERKEY_ENABLE_LEGACY_WHISPER=OFF`, Mutterkey builds
+without the vendored `whisper.cpp` runtime and does not install the legacy
+`libwhisper` / `ggml` shared libraries.
+
 Optional acceleration flags:
 
 ```bash
@@ -164,6 +168,7 @@ Notes:
 - `MUTTERKEY_ENABLE_WHISPER_VULKAN=ON` is for Vulkan-capable GPUs and requires Vulkan development headers and loader libraries
 - `MUTTERKEY_ENABLE_WHISPER_BLAS=ON` improves CPU inference speed rather than enabling GPU execution
 - these options are forwarded to the vendored `whisper.cpp` / `ggml` build and install any resulting backend libraries alongside Mutterkey
+- `-DMUTTERKEY_ENABLE_LEGACY_WHISPER=OFF` disables the vendored runtime entirely and skips all `whisper.cpp` / `ggml` install targets
 
 ### 2. Put a model on disk
 
diff --git a/RELEASE_CHECKLIST.md b/RELEASE_CHECKLIST.md
index 0664d52..0f66190 100644
--- a/RELEASE_CHECKLIST.md
+++ b/RELEASE_CHECKLIST.md
@@ -41,6 +41,10 @@ bash scripts/run-release-checklist.sh
 bash scripts/run-release-checklist.sh -- -DMUTTERKEY_ENABLE_WHISPER_CUDA=ON
 ```
 
+```bash
+bash scripts/run-release-checklist.sh -- -DMUTTERKEY_ENABLE_LEGACY_WHISPER=OFF
+```
+
 - The script intentionally stops before install validation and still prints the
   remaining manual review items that need human judgment.
 
@@ -69,6 +73,8 @@ cmake -S . -B "$BUILD_DIR" -G Ninja -DCMAKE_BUILD_TYPE=Debug -DMUTTERKEY_ENABLE_
   - `MUTTERKEY_ENABLE_WHISPER_CUDA=ON`: NVIDIA GPU build through vendored `ggml`
   - `MUTTERKEY_ENABLE_WHISPER_VULKAN=ON`: Vulkan GPU build through vendored `ggml`
   - `MUTTERKEY_ENABLE_WHISPER_BLAS=ON`: faster CPU inference, not GPU execution
+  - `MUTTERKEY_ENABLE_LEGACY_WHISPER=OFF`: disable vendored `whisper.cpp` /
+    `ggml` entirely and validate the native-runtime-only build path
   - choose the backend intentionally for the release artifact and record that choice in release notes or packaging docs when relevant
 
 - Build:
@@ -140,14 +146,18 @@ cmake --install "$BUILD_DIR" --prefix "$INSTALL_DIR"
 - Confirm the installed tree contains:
   - `bin/mutterkey`
   - `bin/mutterkey-tray`
-  - required `libwhisper` / `ggml` shared libraries
   - the desktop file under `share/applications`
   - license files under `share/licenses/mutterkey`
+- If the release keeps legacy whisper support enabled, also confirm the
+  installed tree contains:
+  - required `libwhisper` / `ggml` shared libraries
 - If acceleration was enabled for the release, also confirm the installed tree
   contains the expected backend library:
   - `libggml-cuda.so*` for `MUTTERKEY_ENABLE_WHISPER_CUDA=ON`
   - `libggml-vulkan.so*` for `MUTTERKEY_ENABLE_WHISPER_VULKAN=ON`
   - `libggml-blas.so*` for `MUTTERKEY_ENABLE_WHISPER_BLAS=ON`
+- If the release disables legacy whisper support, confirm the installed tree
+  does not contain vendored `libwhisper` / `ggml` shared libraries by mistake.
 - Do not expect vendored upstream public headers to be installed; Mutterkey's
   install rules ship the runtime libraries but intentionally clear vendored
   `PUBLIC_HEADER` metadata to avoid upstream header-install warnings.
@@ -204,6 +214,11 @@ MODEL_OUT="$(mktemp -d /tmp/mutterkey-release-model-XXXXXX)/base-en"
   repo-owned API docs or docs/CI wiring.
 - Confirm the docs describe native Mutterkey model packages as the canonical
   artifact and raw Whisper `.bin` files as migration compatibility only.
+- Confirm the docs also reflect the current runtime state accurately:
+  - a product-owned native CPU reference runtime now exists
+  - `MUTTERKEY_ENABLE_LEGACY_WHISPER=OFF` is supported for native-only builds
+  - `whisper.cpp` is still the only real end-user speech decoder unless the
+    release intentionally changes that
 - Confirm the documented recommended path is still the `systemd --user` service.
 - Confirm [contrib/mutterkey.service](contrib/mutterkey.service) matches the
   recommended installed-binary setup.
@@ -215,6 +230,9 @@ MODEL_OUT="$(mktemp -d /tmp/mutterkey-release-model-XXXXXX)/base-en"
   - CUDA/Vulkan releases should not log only `registered backend CPU`
   - CPU-accelerated BLAS releases may still be CPU-only, but should be tested
     against a representative Whisper model and expected performance target
+- If the release is intended to rely on the native runtime path, verify
+  diagnostics on a representative machine show the native runtime and an
+  explicit native selection reason rather than a legacy whisper fallback.
 
 ## Vendored whisper.cpp Updates
 
diff --git a/docs/mainpage.md b/docs/mainpage.md
index d0455b5..a745f8b 100644
--- a/docs/mainpage.md
+++ b/docs/mainpage.md
@@ -13,7 +13,7 @@ Current behavior:
 
 - registers a global shortcut through `KGlobalAccel`
 - records microphone audio while the shortcut is held
-- transcribes locally through an embedded `whisper.cpp` backend
+- transcribes locally through the configured runtime path
 - copies the resulting text to the clipboard
 - expects you to paste the text yourself with `Ctrl+V`
 
@@ -26,12 +26,16 @@ Current runtime shape:
   transcript events
 - `BackendCapabilities` reports static backend support used for orchestration
 - `RuntimeDiagnostics` reports runtime/device/model inspection data separately
-  from static capabilities
+  from static capabilities, including runtime-selection reasoning
 - `RuntimeError` and `RuntimeErrorCode` provide typed runtime failures
 - `ModelCatalog`, `ModelPackage`, and `ModelValidator` own model inspection,
   compatibility checks, and integrity validation before backend load
 - raw Whisper `.bin` files are handled only through an explicit compatibility
   path and import flow
+- `RuntimeSelector` owns runtime-selection policy instead of burying that logic
+  in the generic factory
+- `CpuReferenceModelHandle` and related native model helpers own the current
+  product-owned CPU reference model loading boundary
 - `TranscriptionWorker` hosts transcription on a dedicated `QThread` and
   creates live sessions lazily on that worker thread
 - the shipped daemon and `once` flows still use a compatibility wrapper that
@@ -51,8 +55,12 @@ Core API surface covered here:
   artifacts into validated product-owned model metadata.
 - `RawWhisperImporter` converts raw whisper.cpp-compatible `ggml` `.bin` files
   into native Mutterkey packages.
+- `RuntimeSelector` decides which runtime implementation should handle a given
+  configured model path and records the reason in diagnostics.
 - `TranscriptionEngine` and `TranscriptionSession` define the app-owned runtime
   seam.
+- `CpuReferenceTranscriber` provides the current product-owned native CPU
+  reference runtime scaffold.
 - `WhisperCppTranscriber` performs in-process transcription through vendored
   `whisper.cpp`.
 - `ClipboardWriter` copies the resulting text to the clipboard.
@@ -65,7 +73,11 @@ Current product direction:
 - local-only transcription
 - CLI/service-first operation
 - tray-shell work exists, but the daemon remains the product core
-- `whisper.cpp` is still the only supported backend implementation
+- a product-owned native CPU reference runtime now exists for ownership,
+  packaging, and conformance work
+- `whisper.cpp` is still the only real end-user speech decoder today
+- the vendored runtime is now optional at build time through
+  `MUTTERKEY_ENABLE_LEGACY_WHISPER=OFF`
 
 For build, runtime, release, and service setup use the repository `README.md`
 and `RELEASE_CHECKLIST.md`.
diff --git a/src/service.cpp b/src/service.cpp
index b7c39c7..26d68e2 100644
--- a/src/service.cpp
+++ b/src/service.cpp
@@ -84,6 +84,7 @@ QJsonObject MutterkeyService::diagnostics() const
         m_transcriptionWorker != nullptr ? m_transcriptionWorker->runtimeDiagnostics() : m_transcriptionEngine->diagnostics();
     object.insert(QStringLiteral("transcriber_model"), runtimeDiagnostics.loadedModelDescription);
     object.insert(QStringLiteral("transcriber_runtime"), runtimeDiagnostics.runtimeDescription);
+    object.insert(QStringLiteral("transcriber_selection_reason"), runtimeDiagnostics.selectionReason);
     const BackendCapabilities capabilities =
         m_transcriptionWorker != nullptr ? m_transcriptionWorker->capabilities() : m_transcriptionEngine->capabilities();
     object.insert(QStringLiteral("transcriber_supports_translation"), capabilities.supportsTranslation);
diff --git a/src/transcription/cpureferencemodel.cpp b/src/transcription/cpureferencemodel.cpp
new file mode 100644
index 0000000..99fc5cd
--- /dev/null
+++ b/src/transcription/cpureferencemodel.cpp
@@ -0,0 +1,153 @@
+#include "transcription/cpureferencemodel.h"
+
+#include <QElapsedTimer>
+#include <QFile>
+#include <QFileInfo>
+
+#include <algorithm>
+#include <array>
+#include <cstring>
+
+namespace {
+
+constexpr std::array<char, 8> kCpuReferenceMagic{'M', 'K', 'C', 'P', 'U', 'R', '1', '\0'};
+constexpr std::uint32_t kCpuReferenceVersion = 1;
+
+RuntimeError makeRuntimeError(RuntimeErrorCode code, QString message, QString detail = {})
+{
+    return RuntimeError{.code = code, .message = std::move(message), .detail = std::move(detail)};
+}
+
+bool readModelHeader(QFile *file, CpuReferenceModelHeader *header)
+{
+    if (file == nullptr || header == nullptr) {
+        return false;
+    }
+
+    const QByteArray bytes = file->read(static_cast<qint64>(sizeof(CpuReferenceModelHeader)));
+    if (bytes.size() != static_cast<qsizetype>(sizeof(CpuReferenceModelHeader))) {
+        return false;
+    }
+
+    std::memcpy(header, bytes.constData(), sizeof(CpuReferenceModelHeader));
+    return true;
+}
+
+std::optional<CpuReferenceModelData> loadCpuReferenceModelData(const QString &path, RuntimeError *error)
+{
+    QFile file(path);
+    if (!file.open(QIODevice::ReadOnly)) {
+        if (error != nullptr) {
+            *error = makeRuntimeError(RuntimeErrorCode::ModelLoadFailed,
+                                      QStringLiteral("Failed to open native CPU model weights"),
+                                      QFileInfo(path).absoluteFilePath());
+        }
+        return std::nullopt;
+    }
+
+    CpuReferenceModelHeader header;
+    if (!readModelHeader(&file, &header)) {
+        if (error != nullptr) {
+            *error = makeRuntimeError(RuntimeErrorCode::ModelLoadFailed,
+                                      QStringLiteral("Native CPU model header is truncated"),
+                                      QFileInfo(path).absoluteFilePath());
+        }
+        return std::nullopt;
+    }
+
+    if (header.magic != kCpuReferenceMagic) {
+        if (error != nullptr) {
+            *error = makeRuntimeError(RuntimeErrorCode::ModelLoadFailed,
+                                      QStringLiteral("Native CPU model has an invalid header"),
+                                      QFileInfo(path).absoluteFilePath());
+        }
+        return std::nullopt;
+    }
+
+    if (header.version != kCpuReferenceVersion) {
+        if (error != nullptr) {
+            *error = makeRuntimeError(RuntimeErrorCode::UnsupportedModelPackageVersion,
+                                      QStringLiteral("Native CPU model version is unsupported"),
+                                      QString::number(header.version));
+        }
+        return std::nullopt;
+    }
+
+    if (header.transcriptBytes == 0 || header.transcriptBytes > 4096U) {
+        if (error != nullptr) {
+            *error = makeRuntimeError(RuntimeErrorCode::ModelLoadFailed,
+                                      QStringLiteral("Native CPU model transcript payload is invalid"),
+                                      QString::number(header.transcriptBytes));
+        }
+        return std::nullopt;
+    }
+
+    const QByteArray transcriptBytes = file.read(static_cast<qint64>(header.transcriptBytes));
+    if (transcriptBytes.size() != static_cast<qsizetype>(header.transcriptBytes)) {
+        if (error != nullptr) {
+            *error = makeRuntimeError(RuntimeErrorCode::ModelLoadFailed,
+                                      QStringLiteral("Native CPU model transcript payload is truncated"),
+                                      QFileInfo(path).absoluteFilePath());
+        }
+        return std::nullopt;
+    }
+
+    const QString transcript = QString::fromUtf8(transcriptBytes).trimmed();
+    if (transcript.isEmpty()) {
+        if (error != nullptr) {
+            *error = makeRuntimeError(RuntimeErrorCode::ModelLoadFailed,
+                                      QStringLiteral("Native CPU model transcript payload is empty"),
+                                      QFileInfo(path).absoluteFilePath());
+        }
+        return std::nullopt;
+    }
+
+    return CpuReferenceModelData{.transcript = transcript};
+}
+
+} // namespace
+
+CpuReferenceModelHandle::CpuReferenceModelHandle(ValidatedModelPackage package, CpuReferenceModelData model, qint64 loadTimeMs)
+    : m_package(std::move(package))
+    , m_model(std::move(model))
+    , m_loadTimeMs(loadTimeMs)
+{
+}
+
+QString CpuReferenceModelHandle::backendName() const
+{
+    return cpuReferenceEngineName();
+}
+
+ModelMetadata CpuReferenceModelHandle::metadata() const
+{
+    return m_package.metadata();
+}
+
+QString CpuReferenceModelHandle::modelDescription() const
+{
+    return QStringLiteral("%1; load=%2 ms").arg(m_package.description()).arg(m_loadTimeMs);
+}
+
+const QString &CpuReferenceModelHandle::transcript() const
+{
+    return m_model.transcript;
+}
+
+std::shared_ptr<const CpuReferenceModelHandle> loadCpuReferenceModelHandle(const ValidatedModelPackage &package, RuntimeError *error)
+{
+    QElapsedTimer timer;
+    timer.start();
+    const std::optional<CpuReferenceModelData> model = loadCpuReferenceModelData(package.weightsPath, error);
+    if (!model.has_value()) {
+        return nullptr;
+    }
+
+    return std::make_shared<CpuReferenceModelHandle>(package, *model, timer.elapsed());
+}
+
+std::shared_ptr<const CpuReferenceModelHandle>
+resolveCpuReferenceModelHandle(std::shared_ptr<const TranscriptionModelHandle> model)
+{
+    return std::dynamic_pointer_cast<const CpuReferenceModelHandle>(std::move(model));
+}
diff --git a/src/transcription/cpureferencemodel.h b/src/transcription/cpureferencemodel.h
new file mode 100644
index 0000000..e8f4412
--- /dev/null
+++ b/src/transcription/cpureferencemodel.h
@@ -0,0 +1,82 @@
+#pragma once
+
+#include "transcription/modelpackage.h"
+#include "transcription/transcriptionengine.h"
+#include "transcription/transcriptiontypes.h"
+
+#include <array>
+#include <cstdint>
+#include <memory>
+#include <optional>
+
+/**
+ * @file
+ * @brief Native CPU reference model format definitions and loading helpers.
+ */
+
+/**
+ * @brief Fixed header stored at the start of a native CPU reference weights asset.
+ */
+struct CpuReferenceModelHeader {
+    /// File-format magic. Kept ASCII for stable inspection and tests.
+    std::array<char, 8> magic{};
+    /// Native CPU reference weights format version.
+    std::uint32_t version = 0;
+    /// UTF-8 transcript payload size used by the deterministic reference fixture format.
+    std::uint32_t transcriptBytes = 0;
+};
+
+/**
+ * @brief Deterministic native CPU reference model payload loaded from disk.
+ */
+struct CpuReferenceModelData {
+    /// Final transcript emitted by the deterministic reference runtime fixture.
+    QString transcript;
+};
+
+/**
+ * @brief Immutable loaded native CPU model handle owned by the runtime.
+ */
+class CpuReferenceModelHandle final : public TranscriptionModelHandle
+{
+public:
+    /**
+     * @brief Creates an immutable native CPU reference model handle.
+     * @param package Validated package metadata and resolved asset paths.
+     * @param model Parsed deterministic reference model payload.
+     * @param loadTimeMs Measured load time in milliseconds for diagnostics.
+     */
+    CpuReferenceModelHandle(ValidatedModelPackage package, CpuReferenceModelData model, qint64 loadTimeMs);
+
+    [[nodiscard]] QString backendName() const override;
+    [[nodiscard]] ModelMetadata metadata() const override;
+    [[nodiscard]] QString modelDescription() const override;
+
+    /**
+     * @brief Returns the deterministic transcript fixture embedded in this model.
+     * @return Transcript text emitted by the reference runtime.
+     */
+    [[nodiscard]] const QString &transcript() const;
+
+private:
+    ValidatedModelPackage m_package;
+    CpuReferenceModelData m_model;
+    qint64 m_loadTimeMs = 0;
+};
+
+/**
+ * @brief Loads a native CPU reference model handle from a validated package.
+ * @param package Validated native package resolved by the model catalog.
+ * @param error Optional output for format or IO failures.
+ * @return Shared immutable model handle on success.
+ */
+[[nodiscard]] std::shared_ptr<const CpuReferenceModelHandle>
+loadCpuReferenceModelHandle(const ValidatedModelPackage &package, RuntimeError *error = nullptr);
+
+/**
+ * @brief Downcasts a generic model handle to the native CPU reference handle type.
+ * @param model Shared generic runtime model handle.
+ * @return Native CPU reference model handle on success.
+ */
+[[nodiscard]] std::shared_ptr<const CpuReferenceModelHandle>
+resolveCpuReferenceModelHandle(std::shared_ptr<const TranscriptionModelHandle> model);
diff --git a/src/transcription/cpureferencetranscriber.cpp b/src/transcription/cpureferencetranscriber.cpp
new file mode 100644
index 0000000..fb24aca
--- /dev/null
+++ b/src/transcription/cpureferencetranscriber.cpp
@@ -0,0 +1,280 @@
+#include "transcription/cpureferencetranscriber.h"
+
+#include "transcription/modelcatalog.h"
+#include "transcription/runtimeselector.h"
+
+#include <QLoggingCategory>
+#include <QThread>
+
+#include <algorithm>
+#include <cmath>
+#include <utility>
+
+namespace {
+
+Q_LOGGING_CATEGORY(cpuReferenceLog, "mutterkey.transcriber.cpu_reference")
+
+constexpr int kExpectedSampleRate = 16000;
+constexpr float kMinimumSpeechPeak = 0.02F;
+
+RuntimeError makeRuntimeError(RuntimeErrorCode code, QString message, QString detail = {})
+{
+    return RuntimeError{.code = code, .message = std::move(message), .detail = std::move(detail)};
+}
+
+RuntimeError makeUnsupportedLanguageError(const QString &language)
+{
+    return makeRuntimeError(RuntimeErrorCode::UnsupportedLanguage,
+                            QStringLiteral("Native CPU reference runtime does not support language: %1").arg(language),
+                            QStringLiteral("Requested language code: %1").arg(language));
+}
+
+QString normalizedLanguage(const QString &value)
+{
+    return value.trimmed().toLower();
+}
+
+bool supportsLanguage(const QString &languageProfile, const QString &requestedLanguage)
+{
+    if (requestedLanguage.isEmpty() || requestedLanguage == QStringLiteral("auto")) {
+        return languageProfile == QStringLiteral("multilingual");
+    }
+
+    if (languageProfile == QStringLiteral("multilingual")) {
+        return true;
+    }
+
+    return requestedLanguage == languageProfile;
+}
+
+float peakAbsoluteSample(const std::vector<float> &samples)
+{
+    float peak = 0.0F;
+    for (const float sample : samples) {
+        peak = std::max(peak, std::abs(sample));
+    }
+    return peak;
+}
+
+} // namespace
+
+std::shared_ptr<const TranscriptionModelHandle>
+CpuReferenceTranscriber::loadModelHandle(const TranscriberConfig &config, RuntimeError *error)
+{
+    const std::optional<ValidatedModelPackage> package =
+        ModelCatalog::inspectPath(config.modelPath, cpuReferenceEngineName(), cpuReferenceModelFormat(), error);
+    if (!package.has_value()) {
+        return nullptr;
+    }
+
+    qCInfo(cpuReferenceLog) << "Loaded native CPU reference model from" << package->weightsPath;
+    return loadCpuReferenceModelHandle(*package, error);
+}
+
+std::unique_ptr<TranscriptionSession>
+CpuReferenceTranscriber::createSession(TranscriberConfig config, std::shared_ptr<const TranscriptionModelHandle> model)
+{
+    std::shared_ptr<const CpuReferenceModelHandle> cpuModel = resolveCpuReferenceModelHandle(std::move(model));
+    if (cpuModel == nullptr) {
+        return nullptr;
+    }
+
+    return std::make_unique<CpuReferenceTranscriber>(std::move(config), std::move(cpuModel));
+}
+
+CpuReferenceTranscriber::CpuReferenceTranscriber(TranscriberConfig config, std::shared_ptr<const TranscriptionModelHandle> model)
+    : m_config(std::move(config))
+    , m_model(resolveCpuReferenceModelHandle(std::move(model)))
+{
+}
+
+CpuReferenceTranscriber::~CpuReferenceTranscriber() = default;
+
+QString CpuReferenceTranscriber::backendNameStatic()
+{
+    return cpuReferenceEngineName();
+}
+
+BackendCapabilities CpuReferenceTranscriber::capabilitiesStatic()
+{
+    return BackendCapabilities{
+        .backendName = backendNameStatic(),
+        .supportedLanguages = {QStringLiteral("en")},
+        .supportsAutoLanguage = false,
+        .supportsTranslation = false,
+        .supportsWarmup = true,
+    };
+}
+
+RuntimeDiagnostics CpuReferenceTranscriber::diagnosticsStatic()
+{
+    return RuntimeDiagnostics{
+        .backendName = backendNameStatic(),
+        .selectionReason = QStringLiteral("Native CPU reference runtime selected explicitly"),
+        .runtimeDescription = QStringLiteral("native cpu reference runtime; threads=%1")
+                                  .arg(std::max(1, QThread::idealThreadCount())),
+        .loadedModelDescription = {},
+    };
+}
+
+QString CpuReferenceTranscriber::backendName() const
+{
+    return backendNameStatic();
+}
+
+bool CpuReferenceTranscriber::warmup(RuntimeError *error)
+{
+    if (m_model == nullptr) {
+        if (error != nullptr) {
+            *error = makeRuntimeError(RuntimeErrorCode::InternalRuntimeError,
+                                      QStringLiteral("Native CPU reference model handle is not available"));
+        }
+        return false;
+    }
+
+    m_cancelRequested = false;
+    m_warmedUp = true;
+    return true;
+}
+
+TranscriptUpdate CpuReferenceTranscriber::pushAudioChunk(const AudioChunk &chunk)
+{
+    if (!chunk.isValid()) {
+        return TranscriptUpdate{
+            .events = {},
+            .error = makeRuntimeError(RuntimeErrorCode::AudioNormalizationFailed,
+                                      QStringLiteral("Streaming audio chunk is empty")),
+        };
+    }
+
+    if (chunk.sampleRate != kExpectedSampleRate || chunk.channels != 1) {
+        return TranscriptUpdate{
+            .events = {},
+            .error = makeRuntimeError(RuntimeErrorCode::AudioNormalizationFailed,
+                                      QStringLiteral("Native CPU reference runtime requires 16 kHz mono chunks")),
+        };
+    }
+
+    m_bufferedSamples.insert(m_bufferedSamples.end(), chunk.samples.begin(), chunk.samples.end());
+    return {};
+}
+
+TranscriptUpdate CpuReferenceTranscriber::finish()
+{
+    if (m_cancelRequested) {
+        m_bufferedSamples.clear();
+        return TranscriptUpdate{
+            .events = {},
+            .error = makeRuntimeError(RuntimeErrorCode::Cancelled,
+                                      QStringLiteral("Native CPU reference transcription was cancelled")),
+        };
+    }
+
+    if (m_model == nullptr) {
+        return TranscriptUpdate{
+            .events = {},
+            .error = makeRuntimeError(RuntimeErrorCode::InternalRuntimeError,
+                                      QStringLiteral("Native CPU reference model handle is not available")),
+        };
+    }
+
+    if (m_config.translate) {
+        return TranscriptUpdate{
+            .events = {},
+            .error = makeRuntimeError(RuntimeErrorCode::InvalidConfig,
+                                      QStringLiteral("Native CPU reference runtime does not support translation mode")),
+        };
+    }
+
+    const QString requestedLanguage = normalizedLanguage(m_config.language);
+    if (!supportsLanguage(m_model->metadata().languageProfile, requestedLanguage)) {
+        return TranscriptUpdate{
+            .events = {},
+            .error = makeUnsupportedLanguageError(requestedLanguage),
+        };
+    }
+
+    if (!m_warmedUp) {
+        RuntimeError warmupError;
+        if (!warmup(&warmupError)) {
+            return TranscriptUpdate{.events = {}, .error = std::move(warmupError)};
+        }
+    }
+
+    if (m_bufferedSamples.empty() || peakAbsoluteSample(m_bufferedSamples) < kMinimumSpeechPeak) {
+        m_bufferedSamples.clear();
+        return {};
+    }
+
+    const auto durationMs = std::max<std::int64_t>(
+        1,
+        static_cast<std::int64_t>((static_cast<double>(m_bufferedSamples.size()) * 1000.0) / kExpectedSampleRate));
+    const QString transcript = m_model->transcript().trimmed();
+    m_bufferedSamples.clear();
+    return TranscriptUpdate{
+        .events = {
+            TranscriptEvent{
+                .kind = TranscriptEventKind::Final,
+                .text = transcript,
+                .startMs = 0,
+                .endMs = durationMs,
+            },
+        },
+        .error = {},
+    };
+}
+
+TranscriptUpdate CpuReferenceTranscriber::cancel()
+{
+    m_cancelRequested = true;
+    m_bufferedSamples.clear();
+    return TranscriptUpdate{
+        .events = {},
+        .error = makeRuntimeError(RuntimeErrorCode::Cancelled,
+                                  QStringLiteral("Native CPU reference transcription was cancelled")),
+    };
+}
+
+namespace {
+
+class CpuReferenceTranscriptionEngine final : public TranscriptionEngine
+{
+public:
+    explicit CpuReferenceTranscriptionEngine(TranscriberConfig config)
+        : m_config(std::move(config))
+    {
+    }
+
+    [[nodiscard]] BackendCapabilities capabilities() const override
+    {
+        return CpuReferenceTranscriber::capabilitiesStatic();
+    }
+
+    [[nodiscard]] RuntimeDiagnostics diagnostics() const override
+    {
+        RuntimeDiagnostics diagnostics = CpuReferenceTranscriber::diagnosticsStatic();
+        diagnostics.selectionReason = selectRuntimeForConfig(m_config).reason;
+        return diagnostics;
+    }
+
+    [[nodiscard]] std::shared_ptr<const TranscriptionModelHandle> loadModel(RuntimeError *error) const override
+    {
+        return CpuReferenceTranscriber::loadModelHandle(m_config, error);
+    }
+
+    [[nodiscard]] std::unique_ptr<TranscriptionSession>
+    createSession(std::shared_ptr<const TranscriptionModelHandle> model) const override
+    {
+        return CpuReferenceTranscriber::createSession(m_config, std::move(model));
+    }
+
+private:
+    TranscriberConfig m_config;
+};
+
+} // namespace
+
+std::shared_ptr<const TranscriptionEngine> createCpuReferenceTranscriptionEngine(const TranscriberConfig &config)
+{
+    return std::make_shared<CpuReferenceTranscriptionEngine>(config);
+}
diff --git a/src/transcription/cpureferencetranscriber.h b/src/transcription/cpureferencetranscriber.h
new file mode 100644
index 0000000..bfea93c
--- /dev/null
+++ b/src/transcription/cpureferencetranscriber.h
@@ -0,0 +1,95 @@
+#pragma once
+
+#include "config.h"
+#include "transcription/cpureferencemodel.h"
+#include "transcription/modelpackage.h"
+#include "transcription/transcriptionengine.h"
+#include "transcription/transcriptiontypes.h"
+
+#include <memory>
+
+/**
+ * @file
+ * @brief Product-owned deterministic CPU reference runtime.
+ */
+
+/**
+ * @brief In-process native CPU transcription backend used as the product-owned reference runtime.
+ *
+ * The Phase 5 reference path is intentionally narrow and deterministic. It owns
+ * its model parsing, session state, and transcript emission without exposing any
+ * third-party runtime handles to the rest of the app.
+ */
+class CpuReferenceTranscriber final : public TranscriptionSession
+{
+public:
+    /**
+     * @brief Loads an immutable native CPU model handle from a validated package.
+     * @param config Runtime configuration copied into the handle.
+     * @param error Optional output for model validation or load failures.
+     * @return Shared immutable model handle on success.
+     */
+    [[nodiscard]] static std::shared_ptr<const TranscriptionModelHandle>
+    loadModelHandle(const TranscriberConfig &config, RuntimeError *error = nullptr);
+
+    /**
+     * @brief Creates a mutable session from a generic app-owned model handle.
+     * @param config Runtime configuration copied into the session.
+     * @param model Shared immutable model handle.
+     * @return Session on success, otherwise `nullptr` when the model type is incompatible.
+     */
+    [[nodiscard]] static std::unique_ptr<TranscriptionSession>
+    createSession(TranscriberConfig config, std::shared_ptr<const TranscriptionModelHandle> model);
+
+    /**
+     * @brief Creates a mutable native CPU session from a loaded model handle.
+     * @param config Runtime configuration copied into the session.
+     * @param model Shared immutable model handle created by the native CPU engine.
+     */
+    CpuReferenceTranscriber(TranscriberConfig config, std::shared_ptr<const TranscriptionModelHandle> model);
+
+    ~CpuReferenceTranscriber() override;
+
+    CpuReferenceTranscriber(const CpuReferenceTranscriber &) = delete;
+    CpuReferenceTranscriber &operator=(const CpuReferenceTranscriber &) = delete;
+    CpuReferenceTranscriber(CpuReferenceTranscriber &&) = delete;
+    CpuReferenceTranscriber &operator=(CpuReferenceTranscriber &&) = delete;
+
+    /**
+     * @brief Returns the stable backend identifier used in diagnostics.
+     * @return Product-owned backend name for the CPU reference runtime.
+     */
+    [[nodiscard]] static QString backendNameStatic();
+
+    /**
+     * @brief Returns static capability metadata for the native CPU runtime.
+     * @return Engine-owned capability snapshot.
+     */
+    [[nodiscard]] static BackendCapabilities capabilitiesStatic();
+
+    /**
+     * @brief Returns static runtime diagnostics for the native CPU runtime.
+     * @return Engine-owned diagnostic description.
+     */
+    [[nodiscard]] static RuntimeDiagnostics diagnosticsStatic();
+
+    [[nodiscard]] QString backendName() const override;
+    bool warmup(RuntimeError *error = nullptr) override;
+    [[nodiscard]] TranscriptUpdate pushAudioChunk(const AudioChunk &chunk) override;
+    [[nodiscard]] TranscriptUpdate finish() override;
+    [[nodiscard]] TranscriptUpdate cancel() override;
+
+private:
+    TranscriberConfig m_config;
+    std::shared_ptr<const CpuReferenceModelHandle> m_model;
+    std::vector<float> m_bufferedSamples;
+    bool m_warmedUp = false;
+    bool m_cancelRequested = false;
+};
+
+/**
+ * @brief Creates the product-owned CPU reference engine implementation.
+ * @param config Runtime configuration copied into the engine.
+ * @return Engine backed by the native CPU reference runtime.
+ */
+[[nodiscard]] std::shared_ptr<const TranscriptionEngine> createCpuReferenceTranscriptionEngine(const TranscriberConfig &config);
diff --git a/src/transcription/modelcatalog.cpp b/src/transcription/modelcatalog.cpp
index 25e2a04..559cab7 100644
--- a/src/transcription/modelcatalog.cpp
+++ b/src/transcription/modelcatalog.cpp
@@ -19,8 +19,8 @@ ModelPackageManifest legacyManifestForPath(const QString &sourcePath, const Mode
     manifest.schemaVersion = 1;
     manifest.metadata = metadata;
     manifest.compatibleEngines.push_back(ModelCompatibilityMarker{
-        .engine = QStringLiteral("whisper.cpp"),
-        .modelFormat = QStringLiteral("ggml"),
+        .engine = legacyWhisperEngineName(),
+        .modelFormat = legacyWhisperModelFormat(),
     });
     manifest.assets.push_back(ModelAssetMetadata{
         .role = QStringLiteral("weights"),
@@ -62,8 +62,8 @@ std::optional<ValidatedModelPackage> ModelCatalog::inspectPath(const QString &pa
         return std::nullopt;
     }
 
-    if ((!requiredEngine.isEmpty() && requiredEngine != QStringLiteral("whisper.cpp"))
-        || (!requiredModelFormat.isEmpty() && requiredModelFormat != QStringLiteral("ggml"))) {
+    if ((!requiredEngine.isEmpty() && requiredEngine != legacyWhisperEngineName())
+        || (!requiredModelFormat.isEmpty() && requiredModelFormat != legacyWhisperModelFormat())) {
         if (error != nullptr) {
             *error = makeRuntimeError(RuntimeErrorCode::IncompatibleModelPackage,
                                       QStringLiteral("Raw Whisper compatibility artifact does not match the active runtime"));
diff --git a/src/transcription/modelpackage.cpp b/src/transcription/modelpackage.cpp
index ff835e6..9fcd2ca 100644
--- a/src/transcription/modelpackage.cpp
+++ b/src/transcription/modelpackage.cpp
@@ -1,5 +1,6 @@
 #include "transcription/modelpackage.h"
 
+#include <algorithm>
 #include <QDir>
 #include <QJsonArray>
 #include <QStandardPaths>
@@ -130,6 +131,33 @@ QString sanitizePackageId(const QString &value)
     return sanitized;
 }
 
+QString cpuReferenceEngineName()
+{
+    return QStringLiteral("mutterkey.cpu-reference");
+}
+
+QString cpuReferenceModelFormat()
+{
+    return QStringLiteral("mkasr-v1");
+}
+
+QString legacyWhisperEngineName()
+{
+    return QStringLiteral("whisper.cpp");
+}
+
+QString legacyWhisperModelFormat()
+{
+    return QStringLiteral("ggml");
+}
+
+bool modelPackageSupportsCompatibility(const ModelPackageManifest &manifest, QStringView engine, QStringView modelFormat)
+{
+    return std::ranges::any_of(manifest.compatibleEngines, [engine, modelFormat](const ModelCompatibilityMarker &marker) {
+        return marker.engine == engine && marker.modelFormat == modelFormat;
+    });
+}
+
 QJsonObject modelPackageManifestToJson(const ModelPackageManifest &manifest)
 {
     QJsonObject root;
diff --git a/src/transcription/modelpackage.h b/src/transcription/modelpackage.h
index 4e42ac8..287a15e 100644
--- a/src/transcription/modelpackage.h
+++ b/src/transcription/modelpackage.h
@@ -88,6 +88,41 @@ struct ValidatedModelPackage {
     [[nodiscard]] QString description() const;
 };
 
+/**
+ * @brief Stable engine identifier for the native CPU reference runtime.
+ * @return Product-owned engine marker recorded in package manifests.
+ */
+[[nodiscard]] QString cpuReferenceEngineName();
+
+/**
+ * @brief Stable model-format identifier for the native CPU reference runtime.
+ * @return Product-owned model-format marker recorded in package manifests.
+ */
+[[nodiscard]] QString cpuReferenceModelFormat();
+
+/**
+ * @brief Stable engine identifier for the legacy whisper.cpp adapter.
+ * @return Legacy engine marker recorded in package manifests.
+ */
+[[nodiscard]] QString legacyWhisperEngineName();
+
+/**
+ * @brief Stable model-format identifier for the legacy whisper.cpp adapter.
+ * @return Legacy model-format marker recorded in package manifests.
+ */
+[[nodiscard]] QString legacyWhisperModelFormat();
+
+/**
+ * @brief Reports whether a manifest advertises compatibility with a runtime marker pair.
+ * @param manifest Parsed model package manifest.
+ * @param engine Stable engine identifier to match.
+ * @param modelFormat Stable model-format marker to match.
+ * @return `true` when the manifest contains a matching compatibility marker.
+ */
+[[nodiscard]] bool modelPackageSupportsCompatibility(const ModelPackageManifest &manifest,
+                                                     QStringView engine,
+                                                     QStringView modelFormat);
+
 /**
  * @brief Returns the default root directory for native model packages.
  * @return Default package directory under the app data root.
diff --git a/src/transcription/modelvalidator.cpp b/src/transcription/modelvalidator.cpp
index 6aca76c..d50d499 100644
--- a/src/transcription/modelvalidator.cpp
+++ b/src/transcription/modelvalidator.cpp
@@ -85,6 +85,10 @@ bool hasRequiredCompatibility(const ModelPackageManifest &manifest, QStringView
         return true;
     }
 
+    if (!engine.isEmpty() && !modelFormat.isEmpty()) {
+        return modelPackageSupportsCompatibility(manifest, engine, modelFormat);
+    }
+
     return std::ranges::any_of(manifest.compatibleEngines, [engine, modelFormat](const ModelCompatibilityMarker &marker) {
         const bool engineMatches = engine.isEmpty() || marker.engine == engine;
         const bool formatMatches = modelFormat.isEmpty() || marker.modelFormat == modelFormat;
diff --git a/src/transcription/rawwhisperimporter.cpp b/src/transcription/rawwhisperimporter.cpp
index e819ed1..6720a3e 100644
--- a/src/transcription/rawwhisperimporter.cpp
+++ b/src/transcription/rawwhisperimporter.cpp
@@ -130,8 +130,8 @@ std::optional<ValidatedModelPackage> RawWhisperImporter::importFile(const QStrin
     manifest.schemaVersion = 1;
     manifest.metadata = *metadata;
     manifest.compatibleEngines.push_back(ModelCompatibilityMarker{
-        .engine = QStringLiteral("whisper.cpp"),
-        .modelFormat = QStringLiteral("ggml"),
+        .engine = legacyWhisperEngineName(),
+        .modelFormat = legacyWhisperModelFormat(),
     });
     manifest.assets.push_back(ModelAssetMetadata{
         .role = QStringLiteral("weights"),
@@ -163,5 +163,5 @@ std::optional<ValidatedModelPackage> RawWhisperImporter::importFile(const QStrin
         return std::nullopt;
     }
 
-    return ModelValidator::validatePackagePath(packageDirectory, QStringLiteral("whisper.cpp"), QStringLiteral("ggml"), error);
+    return ModelValidator::validatePackagePath(packageDirectory, legacyWhisperEngineName(), legacyWhisperModelFormat(), error);
 }
diff --git a/src/transcription/runtimeselector.cpp b/src/transcription/runtimeselector.cpp
new file mode 100644
index 0000000..046d1bf
--- /dev/null
+++ b/src/transcription/runtimeselector.cpp
@@ -0,0 +1,59 @@
+#include "transcription/runtimeselector.h"
+
+#include "transcription/modelcatalog.h"
+
+#include <utility>
+
+namespace {
+
+RuntimeSelection makeCpuSelection(QString reason, std::optional<ValidatedModelPackage> package = std::nullopt)
+{
+    return RuntimeSelection{
+        .kind = RuntimeSelectionKind::CpuReference,
+        .reason = std::move(reason),
+        .inspectedPackage = std::move(package),
+    };
+}
+
+RuntimeSelection makeLegacySelection(QString reason, std::optional<ValidatedModelPackage> package = std::nullopt)
+{
+    return RuntimeSelection{
+        .kind = RuntimeSelectionKind::LegacyWhisper,
+        .reason = std::move(reason),
+        .inspectedPackage = std::move(package),
+    };
+}
+
+} // namespace
+
+RuntimeSelection selectRuntimeForConfig(const TranscriberConfig &config)
+{
+    const std::optional<ValidatedModelPackage> package = ModelCatalog::inspectPath(config.modelPath);
+    if (!package.has_value()) {
+#if defined(MUTTERKEY_WITH_LEGACY_WHISPER)
+        return makeLegacySelection(QStringLiteral("Falling back to legacy whisper runtime because the model path could not be inspected"));
+#else
+        return makeCpuSelection(QStringLiteral("Using native CPU reference runtime because legacy whisper support is disabled"));
+#endif
+    }
+
+    if (modelPackageSupportsCompatibility(package->manifest, cpuReferenceEngineName(), cpuReferenceModelFormat())) {
+        return makeCpuSelection(QStringLiteral("Selected native CPU reference runtime from package compatibility markers"), package);
+    }
+
+    if (modelPackageSupportsCompatibility(package->manifest, legacyWhisperEngineName(), legacyWhisperModelFormat())) {
+#if defined(MUTTERKEY_WITH_LEGACY_WHISPER)
+        return makeLegacySelection(QStringLiteral("Selected legacy whisper runtime from package compatibility markers"), package);
+#else
+        return makeCpuSelection(QStringLiteral("Using native CPU reference runtime because legacy whisper support is disabled"), package);
+#endif
+    }
+
+#if defined(MUTTERKEY_WITH_LEGACY_WHISPER)
+    return makeLegacySelection(QStringLiteral("Falling back to legacy whisper runtime because the package markers are not yet recognized by the native selector"),
+                               package);
+#else
+    return makeCpuSelection(QStringLiteral("Using native CPU reference runtime because no compatible runtime markers were recognized"),
+                            package);
+#endif
+}
diff --git a/src/transcription/runtimeselector.h b/src/transcription/runtimeselector.h
new file mode 100644
index 0000000..d46aae4
--- /dev/null
+++ b/src/transcription/runtimeselector.h
@@ -0,0 +1,39 @@
+#pragma once
+
+#include "config.h"
+#include "transcription/modelpackage.h"
+
+#include <cstdint>
+#include <optional>
+
+/**
+ * @file
+ * @brief Runtime-selection policy for product-owned and legacy transcription engines.
+ */
+
+/**
+ * @brief Stable runtime families selectable by the app-owned factory.
+ */
+enum class RuntimeSelectionKind : std::uint8_t {
+    CpuReference,
+    LegacyWhisper,
+};
+
+/**
+ * @brief Result of selecting a runtime for a configured model path.
+ */
+struct RuntimeSelection {
+    /// Chosen runtime implementation.
+    RuntimeSelectionKind kind = RuntimeSelectionKind::CpuReference;
+    /// Human-readable reason suitable for diagnostics.
+    QString reason;
+    /// Best-effort inspected package used to make the decision.
+    std::optional<ValidatedModelPackage> inspectedPackage;
+};
+
+/**
+ * @brief Chooses the runtime implementation for a transcription config.
+ * @param config Runtime configuration containing the model path.
+ * @return Product-owned selection result with a diagnostic reason.
+ */
+[[nodiscard]] RuntimeSelection selectRuntimeForConfig(const TranscriberConfig &config);
diff --git a/src/transcription/transcriptionengine.cpp b/src/transcription/transcriptionengine.cpp
index bfd0a58..913d8b4 100644
--- a/src/transcription/transcriptionengine.cpp
+++ b/src/transcription/transcriptionengine.cpp
@@ -1,8 +1,22 @@
 #include "transcription/transcriptionengine.h"
 
+#include "transcription/cpureferencetranscriber.h"
+#include "transcription/runtimeselector.h"
+
+#if defined(MUTTERKEY_WITH_LEGACY_WHISPER)
 #include "transcription/whispercpptranscriber.h"
+#endif
 
 std::shared_ptr<const TranscriptionEngine> createTranscriptionEngine(const TranscriberConfig &config)
 {
+    const RuntimeSelection selection = selectRuntimeForConfig(config);
+    if (selection.kind == RuntimeSelectionKind::CpuReference) {
+        return createCpuReferenceTranscriptionEngine(config);
+    }
+
+#if defined(MUTTERKEY_WITH_LEGACY_WHISPER)
     return createWhisperCppTranscriptionEngine(config);
+#else
+    return createCpuReferenceTranscriptionEngine(config);
+#endif
 }
diff --git a/src/transcription/transcriptiontypes.h b/src/transcription/transcriptiontypes.h
index ae0964c..5abba77 100644
--- a/src/transcription/transcriptiontypes.h
+++ b/src/transcription/transcriptiontypes.h
@@ -72,6 +72,8 @@ struct BackendCapabilities {
 struct RuntimeDiagnostics {
     /// Stable backend identifier used in diagnostics.
     QString backendName;
+    /// Human-readable explanation for why this runtime was selected.
+    QString selectionReason;
     /// Human-readable runtime and device summary.
     QString runtimeDescription;
     /// Loaded-model description when a model is available.
diff --git a/src/transcription/whispercpptranscriber.cpp b/src/transcription/whispercpptranscriber.cpp
index aa87d7d..735caab 100644
--- a/src/transcription/whispercpptranscriber.cpp
+++ b/src/transcription/whispercpptranscriber.cpp
@@ -1,6 +1,7 @@
 #include "transcription/whispercpptranscriber.h"
 
 #include "transcription/modelcatalog.h"
+#include "transcription/runtimeselector.h"
 
 #include <algorithm>
 #include <atomic>
@@ -170,7 +171,7 @@ std::shared_ptr<const TranscriptionModelHandle>
 WhisperCppTranscriber::loadModelHandle(const TranscriberConfig &config, RuntimeError *error)
 {
     const std::optional<ValidatedModelPackage> package =
-        ModelCatalog::inspectPath(config.modelPath, QStringLiteral("whisper.cpp"), QStringLiteral("ggml"), error);
+        ModelCatalog::inspectPath(config.modelPath, legacyWhisperEngineName(), legacyWhisperModelFormat(), error);
     if (!package.has_value()) {
         return nullptr;
     }
@@ -246,6 +247,7 @@ RuntimeDiagnostics WhisperCppTranscriber::diagnosticsStatic()
 {
     return RuntimeDiagnostics{
         .backendName = backendNameStatic(),
+        .selectionReason = QStringLiteral("Legacy whisper runtime selected explicitly"),
         .runtimeDescription = describeRegisteredBackends(),
         .loadedModelDescription = {},
     };
@@ -432,7 +434,9 @@ class WhisperCppTranscriptionEngine final : public TranscriptionEngine
 
     [[nodiscard]] RuntimeDiagnostics diagnostics() const override
     {
-        return WhisperCppTranscriber::diagnosticsStatic();
+        RuntimeDiagnostics diagnostics = WhisperCppTranscriber::diagnosticsStatic();
+        diagnostics.selectionReason = selectRuntimeForConfig(m_config).reason;
+        return diagnostics;
     }
 
     [[nodiscard]] std::shared_ptr<const TranscriptionModelHandle> loadModel(RuntimeError *error) const override
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 05f6df8..57386a3 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -65,16 +65,35 @@ mutterkey_add_qt_test(transcriptionworkertest
     ../src/transcription/transcriptionworker.cpp
 )
 
-mutterkey_add_qt_test(whispercpptranscribertest
-    whispercpptranscribertest.cpp
+mutterkey_add_qt_test(cpureferencetranscribertest
+    cpureferencetranscribertest.cpp
+    ../src/transcription/cpureferencemodel.cpp
     ../src/transcription/modelcatalog.cpp
     ../src/transcription/transcriptionengine.cpp
     ../src/transcription/modelpackage.cpp
     ../src/transcription/modelvalidator.cpp
+    ../src/transcription/cpureferencetranscriber.cpp
     ../src/transcription/rawwhisperprobe.cpp
-    ../src/transcription/whispercpptranscriber.cpp
+    ../src/transcription/runtimeselector.cpp
 )
 
+if(MUTTERKEY_ENABLE_LEGACY_WHISPER)
+    mutterkey_add_qt_test(whispercpptranscribertest
+        whispercpptranscribertest.cpp
+        ../src/transcription/cpureferencemodel.cpp
+        ../src/transcription/modelcatalog.cpp
+        ../src/transcription/transcriptionengine.cpp
+        ../src/transcription/modelpackage.cpp
+        ../src/transcription/modelvalidator.cpp
+        ../src/transcription/rawwhisperprobe.cpp
+        ../src/transcription/cpureferencetranscriber.cpp
+        ../src/transcription/runtimeselector.cpp
+        ../src/transcription/whispercpptranscriber.cpp
+    )
+    target_compile_definitions(whispercpptranscribertest PRIVATE MUTTERKEY_WITH_LEGACY_WHISPER)
+    target_link_libraries(whispercpptranscribertest PRIVATE whisper)
+endif()
+
 mutterkey_add_qt_test(platformlogicstest
     platformlogicstest.cpp
 )
@@ -90,16 +109,21 @@ mutterkey_add_qt_test(traystatuswindowtest
 
 target_link_libraries(daemoncontrolclientservertest PRIVATE mutterkey_control)
 target_link_libraries(platformlogicstest PRIVATE mutterkey_core)
-target_link_libraries(whispercpptranscribertest PRIVATE whisper)
 
 add_test(NAME testcommentarycheck
     COMMAND bash ${CMAKE_CURRENT_SOURCE_DIR}/../scripts/check-test-commentary.sh ${CMAKE_CURRENT_SOURCE_DIR}/..
 )
 
 if(TARGET clang-tidy)
-    add_dependencies(clang-tidy configtest_autogen commanddispatchtest_autogen recordingnormalizertest_autogen streamingtranscriptiontest_autogen modelpackagetest_autogen daemoncontrolprotocoltest_autogen daemoncontroltypestest_autogen daemoncontrolclientservertest_autogen transcriptionworkertest_autogen whispercpptranscribertest_autogen platformlogicstest_autogen traystatuswindowtest_autogen)
+    add_dependencies(clang-tidy configtest_autogen commanddispatchtest_autogen recordingnormalizertest_autogen streamingtranscriptiontest_autogen modelpackagetest_autogen daemoncontrolprotocoltest_autogen daemoncontroltypestest_autogen daemoncontrolclientservertest_autogen transcriptionworkertest_autogen cpureferencetranscribertest_autogen platformlogicstest_autogen traystatuswindowtest_autogen)
+    if(TARGET whispercpptranscribertest_autogen)
+        add_dependencies(clang-tidy whispercpptranscribertest_autogen)
+    endif()
 endif()
 
 if(TARGET clazy)
-    add_dependencies(clazy configtest_autogen commanddispatchtest_autogen recordingnormalizertest_autogen streamingtranscriptiontest_autogen modelpackagetest_autogen daemoncontrolprotocoltest_autogen daemoncontroltypestest_autogen daemoncontrolclientservertest_autogen transcriptionworkertest_autogen whispercpptranscribertest_autogen platformlogicstest_autogen traystatuswindowtest_autogen)
+    add_dependencies(clazy configtest_autogen commanddispatchtest_autogen recordingnormalizertest_autogen streamingtranscriptiontest_autogen modelpackagetest_autogen daemoncontrolprotocoltest_autogen daemoncontroltypestest_autogen daemoncontrolclientservertest_autogen transcriptionworkertest_autogen cpureferencetranscribertest_autogen platformlogicstest_autogen traystatuswindowtest_autogen)
+    if(TARGET whispercpptranscribertest_autogen)
+        add_dependencies(clazy whispercpptranscribertest_autogen)
+    endif()
 endif()
diff --git a/tests/cpureferencetranscribertest.cpp b/tests/cpureferencetranscribertest.cpp
new file mode 100644
index 0000000..1ac57c5
--- /dev/null
+++ b/tests/cpureferencetranscribertest.cpp
@@ -0,0 +1,259 @@
+#include "transcription/cpureferencetranscriber.h"
+#include "transcription/modelpackage.h"
+#include "transcription/transcriptionengine.h"
+
+#include <QCryptographicHash>
+#include <QDir>
+#include <QFile>
+#include <QFileInfo>
+#include <QJsonDocument>
+#include <QTemporaryDir>
+#include <QtTest/QTest>
+
+#include <array>
+#include <cstdint>
+#include <cstring>
+
+namespace {
+
+constexpr std::array<char, 8> kCpuReferenceMagic{'M', 'K', 'C', 'P', 'U', 'R', '1', '\0'};
+constexpr std::uint32_t kCpuReferenceVersion = 1;
+
+class CpuReferenceTranscriberTest final : public QObject
+{
+    Q_OBJECT
+
+private slots:
+    void engineDispatchesToNativeCpuPackage();
+    void nativeCpuRuntimeEmitsDeterministicTranscript();
+    void nativeCpuRuntimeRejectsTranslationMode();
+    void engineDiagnosticsExplainNativeSelection();
+};
+
+template <typename T>
+bool writePod(QFile *file, const T &value)
+{
+    if (file == nullptr) {
+        return false;
+    }
+
+    QByteArray bytes(static_cast<qsizetype>(sizeof(T)), '\0');
+    std::memcpy(bytes.data(), &value, sizeof(T));
+    return file->write(bytes) == bytes.size();
+}
+
+struct CpuReferenceFixtureFile {
+    QString path;
+    QString transcript;
+};
+
+struct CpuReferenceFixturePackage {
+    QString packageRoot;
+    QString transcript;
+};
+
+QString sha256ForFile(const QString &path)
+{
+    QFile file(path);
+    if (!file.open(QIODevice::ReadOnly)) {
+        return {};
+    }
+
+    QCryptographicHash hash(QCryptographicHash::Sha256);
+    hash.addData(&file);
+    return QString::fromLatin1(hash.result().toHex());
+}
+
+bool writeCpuReferenceModel(const CpuReferenceFixtureFile &request)
+{
+    QFile file(request.path);
+    if (!file.open(QIODevice::WriteOnly)) {
+        return false;
+    }
+
+    if (file.write(kCpuReferenceMagic.data(), static_cast<qint64>(kCpuReferenceMagic.size()))
+        != static_cast<qint64>(kCpuReferenceMagic.size())) {
+        return false;
+    }
+
+    const QByteArray transcriptUtf8 = request.transcript.toUtf8();
+    const auto transcriptBytes = static_cast<std::uint32_t>(transcriptUtf8.size());
+    return writePod(&file, kCpuReferenceVersion)
+        && writePod(&file, transcriptBytes)
+        && file.write(transcriptUtf8) == transcriptUtf8.size();
+}
+
+bool writeCpuReferencePackage(const CpuReferenceFixturePackage &request)
+{
+    const QDir root;
+    if (!root.mkpath(QDir(request.packageRoot).filePath(QStringLiteral("assets")))) {
+        return false;
+    }
+
+    const QString weightsPath = QDir(request.packageRoot).filePath(QStringLiteral("assets/model.mkcpu"));
+    if (!writeCpuReferenceModel(CpuReferenceFixtureFile{
+            .path = weightsPath,
+            .transcript = request.transcript,
+        })) {
+        return false;
+    }
+
+    ModelPackageManifest manifest;
+    manifest.format = QStringLiteral("mutterkey.model-package");
+    manifest.schemaVersion = 1;
+    manifest.metadata.packageId = QStringLiteral("fixture-native-cpu");
+    manifest.metadata.displayName = QStringLiteral("Fixture Native CPU");
+    manifest.metadata.runtimeFamily = QStringLiteral("asr");
+    manifest.metadata.sourceFormat = QStringLiteral("mutterkey-native");
+    manifest.metadata.modelFormat = cpuReferenceModelFormat();
+    manifest.metadata.architecture = QStringLiteral("reference-fixture");
+    manifest.metadata.languageProfile = QStringLiteral("en");
+    manifest.metadata.tokenizer = QStringLiteral("builtin");
+    manifest.compatibleEngines.push_back(ModelCompatibilityMarker{
+        .engine = cpuReferenceEngineName(),
+        .modelFormat = cpuReferenceModelFormat(),
+    });
+    manifest.assets.push_back(ModelAssetMetadata{
+        .role = QStringLiteral("weights"),
+        .relativePath = QStringLiteral("assets/model.mkcpu"),
+        .sha256 = sha256ForFile(weightsPath),
+        .sizeBytes = QFileInfo(weightsPath).size(),
+    });
+
+    QFile manifestFile(QDir(request.packageRoot).filePath(QStringLiteral("model.json")));
+    if (!manifestFile.open(QIODevice::WriteOnly | QIODevice::Text)) {
+        return false;
+    }
+
+    manifestFile.write(QJsonDocument(modelPackageManifestToJson(manifest)).toJson(QJsonDocument::Indented));
+    return true;
+}
+
+AudioChunk validChunk()
+{
+    return AudioChunk{
+        .samples = {0.0F, 0.05F, -0.04F, 0.03F, -0.02F, 0.01F, -0.03F, 0.02F},
+        .sampleRate = 16000,
+        .channels = 1,
+        .streamOffsetFrames = 0,
+    };
+}
+
+} // namespace
+
+void CpuReferenceTranscriberTest::engineDispatchesToNativeCpuPackage()
+{
+    // WHAT: Verify that the generic engine factory chooses the native CPU runtime for a native package.
+    // HOW: Create a temporary package marked for the product-owned CPU runtime and inspect the backend name exposed by the factory.
+    // WHY: Phase 5 only starts decoupling from whisper.cpp once the app-level factory routes native packages to the new runtime contract.
+    const QTemporaryDir tempDir;
+    QVERIFY(tempDir.isValid());
+    const QString packageRoot = tempDir.filePath(QStringLiteral("native-package"));
+    QVERIFY(writeCpuReferencePackage(CpuReferenceFixturePackage{
+        .packageRoot = packageRoot,
+        .transcript = QStringLiteral("hello from cpu runtime"),
+    }));
+
+    TranscriberConfig config;
+    config.modelPath = packageRoot;
+
+    const std::shared_ptr<const TranscriptionEngine> engine = createTranscriptionEngine(config);
+    QVERIFY(engine != nullptr);
+    QCOMPARE(engine->capabilities().backendName, cpuReferenceEngineName());
+}
+
+void CpuReferenceTranscriberTest::nativeCpuRuntimeEmitsDeterministicTranscript()
+{
+    // WHAT: Verify that the native CPU runtime loads a package and emits a deterministic final transcript.
+    // HOW: Build a temporary native package whose weights payload embeds a fixed transcript, then push one valid chunk and finish the session.
+    // WHY: The Phase 5 reference runtime needs deterministic model/session behavior so conformance tests can pin transcript and timestamp behavior without third-party state.
+    const QTemporaryDir tempDir;
+    QVERIFY(tempDir.isValid());
+    const QString packageRoot = tempDir.filePath(QStringLiteral("native-package"));
+    QVERIFY(writeCpuReferencePackage(CpuReferenceFixturePackage{
+        .packageRoot = packageRoot,
+        .transcript = QStringLiteral("hello from cpu runtime"),
+    }));
+
+    TranscriberConfig config;
+    config.modelPath = packageRoot;
+
+    RuntimeError error;
+    const std::shared_ptr<const TranscriptionModelHandle> model = CpuReferenceTranscriber::loadModelHandle(config, &error);
+    QVERIFY(model != nullptr);
+    QVERIFY(error.isOk());
+
+    std::unique_ptr<TranscriptionSession> session = CpuReferenceTranscriber::createSession(config, model);
+    QVERIFY(session != nullptr);
+    QVERIFY(session->warmup(&error));
+    QVERIFY(error.isOk());
+
+    const TranscriptUpdate pushUpdate = session->pushAudioChunk(validChunk());
+    QVERIFY(pushUpdate.isOk());
+
+    const TranscriptUpdate finishUpdate = session->finish();
+    QVERIFY(finishUpdate.isOk());
+    QCOMPARE(finishUpdate.events.size(), static_cast<std::size_t>(1));
+    QCOMPARE(finishUpdate.events.front().kind, TranscriptEventKind::Final);
+    QCOMPARE(finishUpdate.events.front().text, QStringLiteral("hello from cpu runtime"));
+    QCOMPARE(finishUpdate.events.front().startMs, 0);
+    QVERIFY(finishUpdate.events.front().endMs > 0);
+}
+
+void CpuReferenceTranscriberTest::nativeCpuRuntimeRejectsTranslationMode()
+{
+    // WHAT: Verify that the native CPU runtime rejects translation mode in its narrow Phase 5 scope.
+    // HOW: Create a temporary native package, enable translate in config, and finish a session after pushing one valid chunk.
+    // WHY: The first product-owned CPU runtime is intentionally ASR-only, so unsupported feature requests must fail explicitly instead of silently diverging from the contract.
+    const QTemporaryDir tempDir;
+    QVERIFY(tempDir.isValid());
+    const QString packageRoot = tempDir.filePath(QStringLiteral("native-package"));
+    QVERIFY(writeCpuReferencePackage(CpuReferenceFixturePackage{
+        .packageRoot = packageRoot,
+        .transcript = QStringLiteral("hello from cpu runtime"),
+    }));
+
+    TranscriberConfig config;
+    config.modelPath = packageRoot;
+    config.translate = true;
+
+    RuntimeError error;
+    const std::shared_ptr<const TranscriptionModelHandle> model = CpuReferenceTranscriber::loadModelHandle(config, &error);
+    QVERIFY(model != nullptr);
+    QVERIFY(error.isOk());
+
+    std::unique_ptr<TranscriptionSession> session = CpuReferenceTranscriber::createSession(config, model);
+    QVERIFY(session != nullptr);
+    QVERIFY(session->pushAudioChunk(validChunk()).isOk());
+
+    const TranscriptUpdate finishUpdate = session->finish();
+    QVERIFY(!finishUpdate.isOk());
+    QCOMPARE(finishUpdate.error.code, RuntimeErrorCode::InvalidConfig);
+}
+
+void CpuReferenceTranscriberTest::engineDiagnosticsExplainNativeSelection()
+{
+    // WHAT: Verify that native-runtime diagnostics explain why the generic factory chose the native backend.
+    // HOW: Create a temporary native package, construct the generic engine, and inspect the selection reason surfaced in runtime diagnostics.
+    // WHY: Once runtime policy is moved out of the factory body, diagnostics should preserve that policy decision so diagnose output remains actionable.
+    const QTemporaryDir tempDir;
+    QVERIFY(tempDir.isValid());
+    const QString packageRoot = tempDir.filePath(QStringLiteral("native-package"));
+    QVERIFY(writeCpuReferencePackage(CpuReferenceFixturePackage{
+        .packageRoot = packageRoot,
+        .transcript = QStringLiteral("hello from cpu runtime"),
+    }));
+
+    TranscriberConfig config;
+    config.modelPath = packageRoot;
+
+    const std::shared_ptr<const TranscriptionEngine> engine = createTranscriptionEngine(config);
+    QVERIFY(engine != nullptr);
+    QCOMPARE(engine->diagnostics().backendName, cpuReferenceEngineName());
+    QVERIFY(engine->diagnostics().selectionReason.contains(QStringLiteral("native CPU reference runtime"),
+                                                          Qt::CaseInsensitive));
+}
+
+QTEST_APPLESS_MAIN(CpuReferenceTranscriberTest)
+
+#include "cpureferencetranscribertest.moc"
diff --git a/tests/transcriptionworkertest.cpp b/tests/transcriptionworkertest.cpp
index cedd52f..5290996 100644
--- a/tests/transcriptionworkertest.cpp
+++ b/tests/transcriptionworkertest.cpp
@@ -36,6 +36,7 @@ RuntimeDiagnostics fakeDiagnostics()
 {
     return RuntimeDiagnostics{
         .backendName = QStringLiteral("fake"),
+        .selectionReason = QStringLiteral("fake selection"),
         .runtimeDescription = QStringLiteral("fake runtime"),
         .loadedModelDescription = {},
     };
diff --git a/tests/whispercpptranscribertest.cpp b/tests/whispercpptranscribertest.cpp
index aa59c35..09c2314 100644
--- a/tests/whispercpptranscribertest.cpp
+++ b/tests/whispercpptranscribertest.cpp
@@ -12,6 +12,7 @@ class WhisperCppTranscriberTest final : public QObject
 private slots:
     void whisperEngineSurfacesMissingModelAtLoadTime();
     void whisperRuntimeRejectsUnsupportedLanguage();
+    void legacyEngineDiagnosticsExplainFallback();
 };
 
 } // namespace
@@ -54,6 +55,21 @@ void WhisperCppTranscriberTest::whisperRuntimeRejectsUnsupportedLanguage()
     QVERIFY(update.error.message.contains(QStringLiteral("pirate")));
 }
 
+void WhisperCppTranscriberTest::legacyEngineDiagnosticsExplainFallback()
+{
+    // WHAT: Verify that the generic engine surfaces a legacy-selection reason when the model path cannot be inspected.
+    // HOW: Construct the generic engine with a definitely missing model path and inspect the reported runtime diagnostics.
+    // WHY: The runtime selector should explain why it fell back to the legacy backend so diagnose output can distinguish policy fallback from normal compatibility routing.
+    TranscriberConfig config;
+    config.modelPath = QStringLiteral("/tmp/definitely-missing-mutterkey-model.bin");
+
+    const std::shared_ptr<const TranscriptionEngine> engine = createTranscriptionEngine(config);
+    QVERIFY(engine != nullptr);
+    QCOMPARE(engine->diagnostics().backendName, QStringLiteral("whisper.cpp"));
+    QVERIFY(engine->diagnostics().selectionReason.contains(QStringLiteral("legacy whisper runtime"),
+                                                          Qt::CaseInsensitive));
+}
+
 QTEST_APPLESS_MAIN(WhisperCppTranscriberTest)
 
 #include "whispercpptranscribertest.moc"