From 8d7c827ef6a1d248fff425da2c209d92f04e888a Mon Sep 17 00:00:00 2001
From: Tobiszewski <adrian.tobiszewski@intel.com>
Date: Mon, 8 Jun 2026 13:15:37 +0200
Subject: [PATCH 1/3] IntegrateGStreamer library for HW-accelerated audio
 decode/encode

- Add src/mpi/: Intel MPI C API (intel_mpi.h, imp_mpi_impl.h, intel_mpi.cpp)
  and OVMS adapter layer (imp_audio_utils.hpp/cpp)
- Add third_party/gstreamer/: Bazel BUILD for pre-installed GStreamer MSVC
- windows_build.bat, windows_test.bat: add GStreamer bin dir to PATH

The MPI library wraps GStreamer pipelines to provide HW-accelerated audio
decode (readAudio) and encode (writeAudio) supporting WAV, MP3, FLAC,
Opus, AAC and OGG. Upper layers call only through the MPI C API; GStreamer
is an implementation detail hidden inside intel_mpi.cpp.

On Linux the imp_audio_utils stubs return false, falling back to dr_libs.
---
 src/mpi/BUILD                                 |   89 +
 src/mpi/imp_audio_utils.cpp                   |  230 +++
 src/mpi/imp_audio_utils.hpp                   |   98 ++
 src/mpi/imp_mpi_impl.h                        |  286 +++
 src/mpi/intel_mpi.cpp                         | 1531 +++++++++++++++++
 src/mpi/intel_mpi.h                           | 1013 +++++++++++
 third_party/gstreamer/BUILD                   |    2 +
 third_party/gstreamer/gstreamer_windows.BUILD |  131 ++
 windows_build.bat                             |   12 +-
 windows_test.bat                              |    2 +-
 10 files changed, 3383 insertions(+), 11 deletions(-)
 create mode 100644 src/mpi/BUILD
 create mode 100644 src/mpi/imp_audio_utils.cpp
 create mode 100644 src/mpi/imp_audio_utils.hpp
 create mode 100644 src/mpi/imp_mpi_impl.h
 create mode 100644 src/mpi/intel_mpi.cpp
 create mode 100644 src/mpi/intel_mpi.h
 create mode 100644 third_party/gstreamer/BUILD
 create mode 100644 third_party/gstreamer/gstreamer_windows.BUILD

diff --git a/src/mpi/BUILD b/src/mpi/BUILD
new file mode 100644
index 0000000000..3239c5f622
--- /dev/null
+++ b/src/mpi/BUILD
@@ -0,0 +1,89 @@
+#
+# Copyright (c) 2025 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+load("//:common_settings.bzl", "COMMON_LOCAL_DEFINES", "ovms_cc_library")
+
+package(default_visibility = ["//visibility:public"])
+
+# Intel Media Processing Interface (Intel MPI) library
+# Windows-only for now — wraps GStreamer for HW-accelerated media decode/encode
+# with zero-copy OpenVINO tensor integration.
+#
+# On Linux this target is a no-op (empty lib) so dependents can
+# unconditionally list it without breaking the build.
+
+cc_library(
+    name = "intel_mpi",
+    srcs = select({
+        "//src:windows": ["intel_mpi.cpp"],
+        "//conditions:default": [],
+    }),
+    hdrs = select({
+        "//src:windows": [
+            "intel_mpi.h",
+            "imp_mpi_impl.h",
+        ],
+        "//conditions:default": ["intel_mpi.h"],
+    }),
+    copts = select({
+        "//src:windows": [
+            "/std:c++17",
+            "/EHsc",
+            "/MD",
+            "/DGST_USE_UNSTABLE_API",
+            "/external:anglebrackets",
+            "/external:W0",
+            "/wd4005",
+            "/wd4100",
+            "/wd4267",
+            "/wd4244",
+            "/wd4996",
+        ],
+        "//conditions:default": [],
+    }),
+    local_defines = COMMON_LOCAL_DEFINES + select({
+        "//src:windows": ["INTEL_MPI_AVAILABLE=1"],
+        "//conditions:default": [],
+    }),
+    deps = select({
+        "//src:windows": [
+            "//third_party:openvino",
+            "//third_party:gstreamer",
+        ],
+        "//conditions:default": [],
+    }),
+    visibility = ["//visibility:public"],
+)
+
+# Thin audio-decode/encode utilities that wrap Intel MPI for OVMS calculators.
+# On Linux this is a stub (all functions return false → callers fall back to dr_libs).
+ovms_cc_library(
+    name = "imp_audio_utils",
+    srcs = ["imp_audio_utils.cpp"],
+    hdrs = ["imp_audio_utils.hpp"],
+    local_defines = COMMON_LOCAL_DEFINES + select({
+        "//src:windows": ["INTEL_MPI_AVAILABLE=1"],
+        "//conditions:default": [],
+    }),
+    deps = select({
+        "//src:windows": [
+            ":intel_mpi",
+            "//third_party:openvino",
+        ],
+        "//conditions:default": [],
+    }),
+    visibility = ["//visibility:public"],
+)
diff --git a/src/mpi/imp_audio_utils.cpp b/src/mpi/imp_audio_utils.cpp
new file mode 100644
index 0000000000..44e95e8b33
--- /dev/null
+++ b/src/mpi/imp_audio_utils.cpp
@@ -0,0 +1,230 @@
+//*****************************************************************************
+// Copyright 2025 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+/**
+ * @file imp_audio_utils.cpp
+ * @brief Implementation of Intel MPI audio helpers for OVMS.
+ *
+ * All media operations go through the Intel MPI C API (intel_mpi.h).
+ * This file does NOT call GStreamer directly — the MPI library owns
+ * that abstraction.
+ *
+ * Two compile-time paths:
+ *   1. INTEL_MPI_AVAILABLE — calls imp_decode_audio() etc.
+ *   2. Fallback           — all functions return false / no-op
+ */
+
+#include "imp_audio_utils.hpp"
+
+#ifdef INTEL_MPI_AVAILABLE
+
+#include "intel_mpi.h"          // MPI C API
+
+#include <openvino/openvino.hpp>  // ov::Tensor for data extraction
+#include <cstring>
+#include <iostream>
+
+namespace ovms {
+namespace imp {
+
+bool isAvailable() {
+    // imp_get_version succeeds when the library is linked in.
+    // A more thorough check could try creating a dummy context,
+    // but version presence is enough to confirm linkage.
+    int major = 0, minor = 0, patch = 0;
+    imp_get_version(&major, &minor, &patch);
+    return (major > 0 || minor > 0 || patch > 0);
+}
+
+bool decodeAudioBuffer(const std::string_view& audioData,
+                       uint32_t sampleRate,
+                       std::vector<float>& pcm) {
+    if (audioData.empty()) return false;
+
+    // Configure decode options
+    imp_audio_decode_opts_t opts{};
+    opts.sample_rate = sampleRate;
+    opts.channels    = 1;          // mono output for speech models
+    opts.output_type = IMP_TYPE_FP32;
+    opts.normalize   = true;
+
+    // Decode via Intel MPI (context = NULL → no GPU, pure media decode)
+    imp_tensor_t* tensor = nullptr;
+    imp_status_t status = imp_decode_audio(
+        &tensor,
+        audioData.data(),
+        audioData.size(),
+        nullptr,           // ctx — not needed for CPU audio decode
+        &opts,
+        nullptr,           // callback — synchronous
+        nullptr);          // user_data
+
+    if (status != IMP_OK || !tensor) {
+        if (tensor) imp_tensor_release(tensor);
+        return false;
+    }
+
+    // Extract shape → number of float samples
+    int64_t dims[4] = {};
+    size_t ndims = 4;
+    imp_tensor_get_shape(tensor, dims, &ndims);
+
+    size_t numSamples = 1;
+    for (size_t i = 0; i < ndims; i++) {
+        numSamples *= static_cast<size_t>(dims[i]);
+    }
+
+    // Get pointer to underlying ov::Tensor data
+    void* ov_tensor_ptr = nullptr;
+    imp_device_type_t dev = IMP_DEVICE_CPU;
+    imp_tensor_get_ov(tensor, &ov_tensor_ptr, &dev);
+
+    if (!ov_tensor_ptr || dev != IMP_DEVICE_CPU) {
+        imp_tensor_release(tensor);
+        return false;
+    }
+
+    // The ov_tensor_ptr is an ov::Tensor* — extract float data
+    auto* ovt = reinterpret_cast<ov::Tensor*>(ov_tensor_ptr);
+    const float* fdata = ovt->data<float>();
+
+    pcm.assign(fdata, fdata + numSamples);
+
+    // Release the tensor through the MPI C API.
+    imp_tensor_release(tensor);
+
+    return !pcm.empty();
+}
+
+bool encodeToWav(const std::vector<float>& pcm,
+                 uint32_t sampleRate,
+                 uint16_t bitsPerSample,
+                 std::vector<uint8_t>& wavData) {
+    // WAV encoding is trivial — no need to route through GStreamer.
+    // Keep this self-contained for minimal overhead.
+    if (pcm.empty()) return false;
+    (void)bitsPerSample;  // always output 32-bit float WAV
+
+    uint32_t numChannels = 1;
+    uint32_t byteRate = sampleRate * numChannels * sizeof(float);
+    uint32_t blockAlign = numChannels * sizeof(float);
+    uint32_t dataSize = static_cast<uint32_t>(pcm.size() * sizeof(float));
+    uint32_t chunkSize = 36 + dataSize;
+
+    wavData.clear();
+    wavData.reserve(44 + dataSize);
+
+    auto write16 = [&](uint16_t v) { wavData.push_back(v & 0xFF); wavData.push_back((v >> 8) & 0xFF); };
+    auto write32 = [&](uint32_t v) {
+        wavData.push_back(v & 0xFF); wavData.push_back((v >> 8) & 0xFF);
+        wavData.push_back((v >> 16) & 0xFF); wavData.push_back((v >> 24) & 0xFF);
+    };
+    auto writeStr = [&](const char* s, size_t n) { wavData.insert(wavData.end(), s, s + n); };
+
+    writeStr("RIFF", 4);
+    write32(chunkSize);
+    writeStr("WAVE", 4);
+    writeStr("fmt ", 4);
+    write32(16);              // subchunk1 size
+    write16(3);               // IEEE float format
+    write16(static_cast<uint16_t>(numChannels));
+    write32(sampleRate);
+    write32(byteRate);
+    write16(static_cast<uint16_t>(blockAlign));
+    write16(32);              // bits per sample
+    writeStr("data", 4);
+    write32(dataSize);
+
+    const uint8_t* raw = reinterpret_cast<const uint8_t*>(pcm.data());
+    wavData.insert(wavData.end(), raw, raw + dataSize);
+
+    return true;
+}
+
+bool encodeAudioBuffer(const float* samples,
+                       size_t numSamples,
+                       uint32_t sampleRate,
+                       uint16_t bitsPerSample,
+                       const std::string& format,
+                       std::vector<uint8_t>& output) {
+    if (!samples || numSamples == 0) return false;
+
+    // For WAV we can use the local writer (fast, no GStreamer init)
+    if (format == "wav") {
+        std::vector<float> pcm(samples, samples + numSamples);
+        return encodeToWav(pcm, sampleRate, bitsPerSample, output);
+    }
+
+    // Route everything else through the MPI C API
+    imp_audio_encode_opts_t opts{};
+    opts.codec       = format.c_str();
+    opts.sample_rate = sampleRate;
+    opts.channels    = 1;
+    opts.bitrate_kbps = 192;  // reasonable default for speech
+    opts.output_path  = nullptr;
+
+    void*  data = nullptr;
+    size_t dataSize = 0;
+    imp_status_t st = imp_encode_audio(&data, &dataSize, samples, numSamples, &opts);
+    if (st != IMP_OK || !data) {
+        if (data) imp_free(data);
+        return false;
+    }
+
+    output.assign(static_cast<uint8_t*>(data),
+                  static_cast<uint8_t*>(data) + dataSize);
+    imp_free(data);
+    return true;
+}
+
+}  // namespace imp
+}  // namespace ovms
+
+#else  // !INTEL_MPI_AVAILABLE
+
+// ---- Stub implementation (Linux / no GStreamer) ------------------------------
+
+namespace ovms {
+namespace imp {
+
+bool isAvailable() { return false; }
+
+bool decodeAudioBuffer(const std::string_view& /*audioData*/,
+                       uint32_t /*sampleRate*/,
+                       std::vector<float>& /*pcm*/) {
+    return false;
+}
+
+bool encodeToWav(const std::vector<float>& /*pcm*/,
+                 uint32_t /*sampleRate*/,
+                 uint16_t /*bitsPerSample*/,
+                 std::vector<uint8_t>& /*wavData*/) {
+    return false;
+}
+
+bool encodeAudioBuffer(const float* /*samples*/,
+                       size_t /*numSamples*/,
+                       uint32_t /*sampleRate*/,
+                       uint16_t /*bitsPerSample*/,
+                       const std::string& /*format*/,
+                       std::vector<uint8_t>& /*output*/) {
+    return false;
+}
+
+}  // namespace imp
+}  // namespace ovms
+
+#endif  // INTEL_MPI_AVAILABLE
diff --git a/src/mpi/imp_audio_utils.hpp b/src/mpi/imp_audio_utils.hpp
new file mode 100644
index 0000000000..332aa0c8c5
--- /dev/null
+++ b/src/mpi/imp_audio_utils.hpp
@@ -0,0 +1,98 @@
+//*****************************************************************************
+// Copyright 2025 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#pragma once
+
+/**
+ * @file imp_audio_utils.hpp
+ * @brief Intel MPI audio utilities — thin wrappers around the Intel MPI
+ *        decode/encode API for use inside OVMS audio calculators.
+ *
+ * When INTEL_MPI_AVAILABLE is defined (Windows builds with GStreamer),
+ * these helpers provide GStreamer-backed audio decode that supports a
+ * wider range of formats (ogg, flac, aac, opus, …) beyond the basic
+ * WAV/MP3 support offered by dr_libs.
+ *
+ * On Linux (or when GStreamer is not present), the functions return false
+ * so the caller can fall back to the existing dr_libs path.
+ */
+
+#include <string>
+#include <string_view>
+#include <vector>
+#include <cstdint>
+
+namespace ovms {
+namespace imp {
+
+/**
+ * Check whether Intel MPI audio decode is available at runtime.
+ * Returns true only on Windows builds compiled with INTEL_MPI_AVAILABLE
+ * AND where GStreamer is actually initialised.
+ */
+bool isAvailable();
+
+/**
+ * Try to decode an audio buffer (any format GStreamer supports) into
+ * mono float PCM samples at the requested sample rate.
+ *
+ * @param audioData   Raw encoded audio bytes (WAV, MP3, OGG, FLAC, …)
+ * @param sampleRate  Desired output sample rate (e.g. 16000)
+ * @param[out] pcm    Output float samples, normalised to [-1, 1]
+ * @return true on success, false if MPI is unavailable or decode fails.
+ */
+bool decodeAudioBuffer(const std::string_view& audioData,
+                       uint32_t sampleRate,
+                       std::vector<float>& pcm);
+
+/**
+ * Encode float PCM samples to a WAV buffer in memory.
+ *
+ * @param pcm          Input float samples (mono, normalised [-1,1])
+ * @param sampleRate   Sample rate (e.g. 16000)
+ * @param bitsPerSample  Bits per sample for output (16 or 32)
+ * @param[out] wavData  Output WAV bytes
+ * @return true on success.
+ */
+bool encodeToWav(const std::vector<float>& pcm,
+                 uint32_t sampleRate,
+                 uint16_t bitsPerSample,
+                 std::vector<uint8_t>& wavData);
+
+/**
+ * Encode float PCM samples to the specified audio format.
+ *
+ * Supported formats: "wav", "mp3", "flac", "opus", "aac", "pcm".
+ * On Windows, non-WAV formats are encoded via the Intel MPI / GStreamer
+ * pipeline.  On Linux (stub), only "wav" and "pcm" are supported.
+ *
+ * @param samples        Raw float PCM (mono)
+ * @param numSamples     Number of float values
+ * @param sampleRate     Sample rate (e.g. 16000)
+ * @param bitsPerSample  Bits per sample for WAV output (16 or 32)
+ * @param format         Target format string ("wav","mp3","flac",...)
+ * @param[out] output    Encoded bytes
+ * @return true on success.
+ */
+bool encodeAudioBuffer(const float* samples,
+                       size_t numSamples,
+                       uint32_t sampleRate,
+                       uint16_t bitsPerSample,
+                       const std::string& format,
+                       std::vector<uint8_t>& output);
+
+}  // namespace imp
+}  // namespace ovms
diff --git a/src/mpi/imp_mpi_impl.h b/src/mpi/imp_mpi_impl.h
new file mode 100644
index 0000000000..ad90e7f74c
--- /dev/null
+++ b/src/mpi/imp_mpi_impl.h
@@ -0,0 +1,286 @@
+/**
+ * Intel MPI - Internal Implementation Structures
+ * 
+ * NOT part of the public API. These are the C++ structs behind the
+ * opaque handles declared in intel_mpi.h.
+ *
+ * Only intel_mpi.cpp should include this file.
+ */
+
+#ifndef IMP_MPI_IMPL_H
+#define IMP_MPI_IMPL_H
+
+#define NOMINMAX
+#define WIN32_LEAN_AND_MEAN
+
+#include <string>
+#include <vector>
+#include <memory>
+#include <cstdint>
+#include <cstring>
+#include <chrono>
+
+// Windows + D3D11
+#include <windows.h>
+#include <d3d11.h>
+#include <dxgi.h>
+
+// OpenVINO C++ API
+#include <openvino/openvino.hpp>
+#include <openvino/runtime/intel_gpu/properties.hpp>
+
+// GStreamer
+#include <gst/gst.h>
+#include <gst/app/gstappsink.h>
+#include <gst/app/gstappsrc.h>
+#include <gst/video/video.h>
+#include <gst/d3d11/gstd3d11.h>
+#include <gst/pbutils/pbutils.h>
+
+// Public API types
+#include "intel_mpi.h"
+
+//////////////////////////////////////////////////////////////////////////////
+// NV12 frame buffer (used internally for CPU-side NV12 data)
+//////////////////////////////////////////////////////////////////////////////
+
+struct imp_nv12_frame_t {
+    std::vector<uint8_t> y_plane;
+    std::vector<uint8_t> uv_plane;
+    int width  = 0;
+    int height = 0;
+    bool valid     = false;
+    bool allocated = false;
+
+    void allocate(int w, int h) {
+        if (allocated && width == w && height == h) return;
+        y_plane.resize(w * h);
+        uv_plane.resize(w * (h / 2));
+        width  = w;
+        height = h;
+        allocated = true;
+        valid = false;
+    }
+};
+
+//////////////////////////////////////////////////////////////////////////////
+// Timing data (accumulated per-context for benchmarking)
+//////////////////////////////////////////////////////////////////////////////
+
+struct imp_timing_data_t {
+    double context_create_ms           = 0;
+    double video_open_ms               = 0;
+    double encoder_open_ms             = 0;
+    double total_decode_fullres_ms     = 0;
+    double total_decode_small_ms       = 0;
+    double total_decode_pull_fullres_ms = 0;
+    double total_decode_pull_small_ms  = 0;
+    double total_decode_copy_fullres_ms = 0;
+    double total_decode_copy_small_ms  = 0;
+    double total_inference_ms          = 0;
+    double total_tensor_setup_ms       = 0;
+    double total_infer_call_ms         = 0;
+    double total_output_parse_ms       = 0;
+    double total_async_start_ms        = 0;
+    double total_async_wait_ms         = 0;
+    double total_encode_ms             = 0;
+    double total_display_ms            = 0;
+    double total_overlay_ms            = 0;
+    int    async_overlaps              = 0;
+    int    frame_count                 = 0;
+};
+
+//////////////////////////////////////////////////////////////////////////////
+// Context — wraps a compiled model + infer requests
+//
+// The public API creates this from an already-compiled OV model.
+// The caller is responsible for:
+//   1. Loading the model XML
+//   2. Applying PrePostProcessor (NV12, resize, etc.)
+//   3. Compiling the model for a device
+//   4. Passing the compiled model here
+//////////////////////////////////////////////////////////////////////////////
+
+struct imp_context_s {
+    // Device info (extracted from compiled model or created standalone)
+    imp_device_type_t device_type   = IMP_DEVICE_GPU;
+    std::string       device_name;         // e.g. "GPU.0"
+
+    // Model dimension hints (for branch auto-deduction)
+    // Set when context is created from a compiled model.
+    // Used by imp_video_open when branch width/height are 0.
+    int               model_w = 0;
+    int               model_h = 0;
+
+    bool              initialized   = false;
+    std::string       last_error;
+    imp_timing_data_t timing;
+};
+
+//////////////////////////////////////////////////////////////////////////////
+// Tensor handle — wraps frame data for the C API
+//
+// Can hold either:
+//   - NV12 frame data (y_plane + uv_plane, format = NV12)
+//   - An ov::Tensor (for future use)
+//
+// NOTE: defined here (before imp_branch_info_t) because branches hold
+//       a tensor_cache instance.
+//////////////////////////////////////////////////////////////////////////////
+
+struct imp_tensor_s {
+    // NV12 frame data (when format is NV12)
+    uint8_t*          y_data  = nullptr;   // Y plane pointer (not owned)
+    uint8_t*          uv_data = nullptr;   // UV plane pointer (not owned)
+    int               width   = 0;
+    int               height  = 0;
+    imp_pixel_format_t format = IMP_FORMAT_NV12;
+    bool              valid   = false;
+
+    // Optional ov::Tensor (for non-NV12 or GPU tensors)
+    ov::Tensor        ov_tensor;
+    imp_device_type_t device_type = IMP_DEVICE_CPU;
+    std::string       device_name;
+};
+
+//////////////////////////////////////////////////////////////////////////////
+// Branch info — one per output branch in the tee pipeline
+//////////////////////////////////////////////////////////////////////////////
+
+struct imp_branch_info_t {
+    GstElement*      appsink = nullptr;
+    int              width   = 0;
+    int              height  = 0;
+    std::string      name;
+    imp_nv12_frame_t frame;             // pre-allocated frame buffer
+    imp_tensor_s     tensor_cache;      // reusable tensor wrapper
+
+    // Per-branch timing
+    double total_decode_ms      = 0;
+    double total_decode_pull_ms = 0;
+    double total_decode_copy_ms = 0;
+};
+
+//////////////////////////////////////////////////////////////////////////////
+// Video stream — GStreamer tee pipeline with N output branches
+//////////////////////////////////////////////////////////////////////////////
+
+struct imp_video_stream_s {
+    GstElement* pipeline = nullptr;
+    bool use_hw_decode   = false;
+
+    // Dynamic branches (replaces fixed fullres + small appsinks)
+    std::vector<imp_branch_info_t> branches;
+
+    // Timing reference
+    imp_timing_data_t* timing = nullptr;
+};
+
+//////////////////////////////////////////////////////////////////////////////
+// Video encoder — GStreamer appsrc → encoder → mux → file
+//////////////////////////////////////////////////////////////////////////////
+
+struct imp_video_encoder_s {
+    GstElement* pipeline = nullptr;
+    GstElement* appsrc   = nullptr;
+    int         width    = 0;
+    int         height   = 0;
+    int         fps_num  = 30;
+    int         fps_den  = 1;
+    bool        initialized    = false;
+    bool        is_gpu_encoder = false;
+    std::string encoder_name;
+    int64_t     frame_count = 0;
+
+    // Timing reference
+    imp_timing_data_t* timing = nullptr;
+};
+
+////////////////////////////////////////////////////////////////////////////// 
+// Video source configuration
+//////////////////////////////////////////////////////////////////////////////
+
+struct imp_video_source_s {
+    imp_source_type_t type = IMP_SOURCE_FILE;
+    std::string path;           // file path or URL
+    std::string device;         // camera device id
+    int width  = 0;
+    int height = 0;
+    int framerate = 0;
+    std::string format;         // capture format
+    // Network properties
+    std::string transport;
+    std::string username;
+    std::string password;
+    int timeout_ms = 0;
+    bool low_latency = false;
+};
+
+//////////////////////////////////////////////////////////////////////////////
+// Detection result (demo-level, not part of API)
+//////////////////////////////////////////////////////////////////////////////
+
+struct imp_detection_t {
+    float confidence = 0.0f;
+    int x_min = 0, y_min = 0, x_max = 0, y_max = 0;
+};
+
+//////////////////////////////////////////////////////////////////////////////
+// Audio encoder — GStreamer pipeline for encoding audio samples to file
+//////////////////////////////////////////////////////////////////////////////
+
+struct imp_audio_encoder_s {
+    GstElement* pipeline = nullptr;
+    GstElement* appsrc   = nullptr;
+    
+    // Output configuration
+    std::string output_path;
+    std::string codec;                  // "mp3", "aac", "flac", "wav", "opus"
+    uint32_t bitrate_kbps = 192;
+    uint32_t sample_rate  = 44100;
+    uint32_t channels     = 2;
+    
+    // Callback for completion notification
+    imp_encode_callback_t callback = nullptr;
+    void* user_data = nullptr;
+    
+    // State
+    bool initialized = false;
+    int64_t samples_written = 0;
+    
+    std::string last_error;
+};
+
+//////////////////////////////////////////////////////////////////////////////
+// Audio stream — internal state for decode→encode pipeline
+//////////////////////////////////////////////////////////////////////////////
+
+struct imp_audio_stream_s {
+    // Input
+    std::string input_path;
+    
+    // Output
+    std::string output_path;
+    std::string output_codec;
+    uint32_t    output_sample_rate  = 44100;
+    uint32_t    output_channels     = 2;
+    uint32_t    output_bitrate_kbps = 192;
+    
+    // Discovered info
+    uint32_t    sample_rate  = 0;
+    uint32_t    channels     = 0;
+    double      duration_sec = 0.0;
+    int64_t     num_samples  = 0;
+    
+    // Pipeline
+    GstElement* pipeline = nullptr;
+    
+    // State
+    bool        initialized  = false;
+    bool        processed    = false;
+    double      wall_time_sec = 0.0;
+    
+    std::string last_error;
+};
+
+#endif // IMP_MPI_IMPL_H
diff --git a/src/mpi/intel_mpi.cpp b/src/mpi/intel_mpi.cpp
new file mode 100644
index 0000000000..5e4122331f
--- /dev/null
+++ b/src/mpi/intel_mpi.cpp
@@ -0,0 +1,1531 @@
+/**
+ * Intel MPI - API Implementation (v8 — Layer 1 Media I/O)
+ *
+ * Implements the C-compatible functions declared in intel_mpi.h.
+ * Uses the internal C++ structs from imp_mpi_impl.h.
+ *
+ * This is a PURE MEDIA layer:
+ *   - Context = device handle (extracts GPU device from model, or standalone)
+ *   - Video decode = N-branch tee pipeline (caller defines branches)
+ *   - Tensor = NV12 frame wrapper returned by imp_video_read_frame
+ *   - Encoder = accepts imp_tensor_t* directly
+ *
+ * NO inference logic lives here. Inference helpers belong in the demo/OVMS layer.
+ */
+
+#define NOMINMAX
+#define WIN32_LEAN_AND_MEAN
+#ifndef GST_USE_UNSTABLE_API
+#define GST_USE_UNSTABLE_API
+#endif
+
+#include "imp_mpi_impl.h"  // same directory
+
+#include <iostream>
+#include <algorithm>
+#include <chrono>
+
+namespace chrono = std::chrono;
+
+//////////////////////////////////////////////////////////////////////////////
+// Version
+//////////////////////////////////////////////////////////////////////////////
+
+void imp_get_version(int* major, int* minor, int* patch) {
+    if (major) *major = IMP_VERSION_MAJOR;
+    if (minor) *minor = IMP_VERSION_MINOR;
+    if (patch) *patch = IMP_VERSION_PATCH;
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// Context — device handle only, no inference
+//////////////////////////////////////////////////////////////////////////////
+
+imp_status_t imp_context_create(imp_context_t** ctx,
+                                ov_compiled_model_t* compiled_model) {
+    if (!ctx) return IMP_ERROR_INVALID_ARGUMENT;
+
+    // In this POC, compiled_model is actually a C++ ov::CompiledModel*
+    // cast through the C opaque pointer.
+    auto* cm = reinterpret_cast<ov::CompiledModel*>(compiled_model);
+    if (!cm) return IMP_ERROR_INVALID_ARGUMENT;
+
+    auto start = chrono::high_resolution_clock::now();
+
+    auto* c = new imp_context_s();
+
+    // Extract model input dimensions for branch auto-deduction.
+    // After NV12 PPP the Y input is [1, H, W, 1].
+    auto inputs = cm->inputs();
+    if (!inputs.empty()) {
+        auto shape = inputs[0].get_shape();
+        if (shape.size() == 4) {
+            c->model_h = static_cast<int>(shape[1]);
+            c->model_w = static_cast<int>(shape[2]);
+        }
+    }
+
+    c->device_type = IMP_DEVICE_GPU;
+    c->device_name = "GPU";
+    c->initialized = true;
+
+    auto end = chrono::high_resolution_clock::now();
+    c->timing.context_create_ms = chrono::duration<double, std::milli>(end - start).count();
+
+    *ctx = c;
+    return IMP_OK;
+}
+
+imp_status_t imp_context_create_from_remote(imp_context_t** ctx,
+                                            ov_remote_context_t* remote_ctx,
+                                            imp_context_type_t type) {
+    (void)ctx; (void)remote_ctx; (void)type;
+    return IMP_ERROR_INTERNAL; // Not yet implemented
+}
+
+imp_status_t imp_context_get_native(imp_context_t* ctx,
+                                    imp_context_type_t* type,
+                                    void** native_handle) {
+    (void)ctx; (void)type; (void)native_handle;
+    return IMP_ERROR_INTERNAL; // Not yet implemented
+}
+
+imp_status_t imp_context_get_ov_remote(imp_context_t* ctx,
+                                       ov_remote_context_t** remote_ctx) {
+    (void)ctx; (void)remote_ctx;
+    return IMP_ERROR_INTERNAL; // Not yet implemented
+}
+
+imp_status_t imp_context_get_device_type(imp_context_t* ctx,
+                                         imp_device_type_t* device_type) {
+    if (!ctx || !device_type) return IMP_ERROR_INVALID_ARGUMENT;
+    *device_type = ctx->device_type;
+    return IMP_OK;
+}
+
+imp_status_t imp_context_get_device_name(imp_context_t* ctx,
+                                         const char** device_name) {
+    if (!ctx || !device_name) return IMP_ERROR_INVALID_ARGUMENT;
+    *device_name = ctx->device_name.c_str();
+    return IMP_OK;
+}
+
+void imp_context_destroy(imp_context_t* ctx) {
+    delete ctx;
+}
+
+const char* imp_context_get_error(imp_context_t* ctx) {
+    if (!ctx) return "null context";
+    return ctx->last_error.c_str();
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// Video Source Configuration
+//////////////////////////////////////////////////////////////////////////////
+
+imp_status_t imp_video_source_create(imp_video_source_t** source,
+                                     imp_source_type_t type) {
+    if (!source) return IMP_ERROR_INVALID_ARGUMENT;
+    auto* s = new imp_video_source_s();
+    s->type = type;
+    *source = s;
+    return IMP_OK;
+}
+
+imp_status_t imp_video_source_set(imp_video_source_t* source,
+                                  const char* key,
+                                  const char* value) {
+    if (!source || !key || !value) return IMP_ERROR_INVALID_ARGUMENT;
+
+    std::string k(key), v(value);
+    if (k == "path" || k == "url")  source->path = v;
+    else if (k == "device")         source->device = v;
+    else if (k == "width")          source->width = std::stoi(v);
+    else if (k == "height")         source->height = std::stoi(v);
+    else if (k == "framerate")      source->framerate = std::stoi(v);
+    else if (k == "format")         source->format = v;
+    else if (k == "transport")      source->transport = v;
+    else if (k == "username")       source->username = v;
+    else if (k == "password")       source->password = v;
+    else if (k == "timeout")        source->timeout_ms = std::stoi(v);
+    else if (k == "low_latency")    source->low_latency = (v == "1" || v == "true");
+    else return IMP_ERROR_INVALID_ARGUMENT;
+
+    return IMP_OK;
+}
+
+void imp_video_source_destroy(imp_video_source_t* source) {
+    delete source;
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// Internal: read NV12 from appsink into frame buffer
+//////////////////////////////////////////////////////////////////////////////
+
+static bool read_nv12_from_appsink(GstElement* appsink, imp_nv12_frame_t* frame,
+                                   double& total_ms, double& pull_ms, double& copy_ms) {
+    auto decode_start = chrono::high_resolution_clock::now();
+
+    auto ps = chrono::high_resolution_clock::now();
+    GstSample* sample = gst_app_sink_try_pull_sample(GST_APP_SINK(appsink), GST_SECOND);
+    if (!sample) return false;
+    pull_ms += chrono::duration<double, std::milli>(
+        chrono::high_resolution_clock::now() - ps).count();
+
+    GstBuffer* buffer = gst_sample_get_buffer(sample);
+    GstCaps*   caps   = gst_sample_get_caps(sample);
+
+    GstVideoInfo info;
+    gst_video_info_from_caps(&info, caps);
+
+    frame->allocate(info.width, info.height);
+    frame->valid = false;
+
+    auto cs = chrono::high_resolution_clock::now();
+
+    GstVideoFrame vframe;
+    if (gst_video_frame_map(&vframe, &info, buffer, GST_MAP_READ)) {
+        uint8_t* y_data  = (uint8_t*)GST_VIDEO_FRAME_PLANE_DATA(&vframe, 0);
+        int      y_stride = GST_VIDEO_FRAME_PLANE_STRIDE(&vframe, 0);
+
+        if (y_stride == frame->width) {
+            memcpy(frame->y_plane.data(), y_data, frame->width * frame->height);
+        } else {
+            for (int row = 0; row < frame->height; row++)
+                memcpy(frame->y_plane.data() + row * frame->width,
+                       y_data + row * y_stride, frame->width);
+        }
+
+        uint8_t* uv_data  = (uint8_t*)GST_VIDEO_FRAME_PLANE_DATA(&vframe, 1);
+        int      uv_stride = GST_VIDEO_FRAME_PLANE_STRIDE(&vframe, 1);
+
+        if (uv_stride == frame->width) {
+            memcpy(frame->uv_plane.data(), uv_data, frame->width * (frame->height / 2));
+        } else {
+            for (int row = 0; row < frame->height / 2; row++)
+                memcpy(frame->uv_plane.data() + row * frame->width,
+                       uv_data + row * uv_stride, frame->width);
+        }
+
+        frame->valid = true;
+        gst_video_frame_unmap(&vframe);
+    }
+
+    gst_sample_unref(sample);
+
+    copy_ms  += chrono::duration<double, std::milli>(
+        chrono::high_resolution_clock::now() - cs).count();
+    total_ms += chrono::duration<double, std::milli>(
+        chrono::high_resolution_clock::now() - decode_start).count();
+
+    return frame->valid;
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// Video Stream — N-branch tee pipeline
+//////////////////////////////////////////////////////////////////////////////
+
+imp_status_t imp_video_open(imp_video_stream_t** stream,
+                            imp_video_source_t* source,
+                            imp_context_t* ctx,
+                            const imp_video_decode_opts_t* opts) {
+    if (!stream || !source || !ctx) return IMP_ERROR_INVALID_ARGUMENT;
+
+    auto start = chrono::high_resolution_clock::now();
+
+    gst_init(nullptr, nullptr);
+
+    auto* s = new imp_video_stream_s();
+    s->timing = &ctx->timing;
+
+    // Determine source resolution from source config
+    int src_w = (source->width  > 0) ? source->width  : 1280;
+    int src_h = (source->height > 0) ? source->height : 720;
+
+    bool is_file   = (source->type == IMP_SOURCE_FILE);
+    bool is_camera = (source->type == IMP_SOURCE_CAMERA);
+
+    // Build resolved branch list.
+    // If no branches specified, create a single branch at source resolution.
+    struct resolved_branch {
+        int width;
+        int height;
+        std::string name;
+    };
+    std::vector<resolved_branch> resolved;
+
+    if (opts && opts->branches && opts->branch_count > 0) {
+        for (uint32_t i = 0; i < opts->branch_count; i++) {
+            resolved_branch rb;
+            int bw = (int)opts->branches[i].width;
+            int bh = (int)opts->branches[i].height;
+
+            // Layered resolution: explicit > model dims > source res
+            if (bw > 0 && bh > 0) {
+                rb.width  = bw;
+                rb.height = bh;
+            } else if (ctx->model_w > 0 && ctx->model_h > 0) {
+                rb.width  = ctx->model_w;
+                rb.height = ctx->model_h;
+            } else {
+                rb.width  = src_w;
+                rb.height = src_h;
+            }
+
+            rb.name = opts->branches[i].name ? opts->branches[i].name
+                                             : ("branch_" + std::to_string(i));
+            resolved.push_back(rb);
+        }
+    } else {
+        // Default: single branch at source resolution
+        resolved.push_back({src_w, src_h, "default"});
+    }
+
+    // Determine if we need a tee (multiple branches or single branch != source res)
+    bool needs_tee = (resolved.size() > 1);
+
+    // Build GStreamer pipeline string
+    std::string pipeline_str;
+
+    // Queue params differ for file vs camera
+    std::string queue_file = "queue max-size-buffers=0 max-size-time=0 max-size-bytes=0";
+    std::string queue_cam  = "queue max-size-buffers=2 leaky=downstream";
+    std::string appsink_file = "emit-signals=false sync=false max-buffers=0 drop=false";
+    std::string appsink_cam  = "emit-signals=false sync=false max-buffers=2 drop=true";
+    std::string queue_str   = is_file ? queue_file : queue_cam;
+    std::string sink_props  = is_file ? appsink_file : appsink_cam;
+
+    // Source + decode + NV12 convert
+    if (is_file) {
+        pipeline_str =
+            "filesrc location=\"" + source->path + "\" ! "
+            "decodebin ! "
+            "d3d11upload ! "
+            "d3d11convert ! "
+            "video/x-raw(memory:D3D11Memory),format=NV12,"
+            "width=" + std::to_string(src_w) + ",height=" + std::to_string(src_h);
+    } else {
+        pipeline_str =
+            "mfvideosrc ! "
+            "video/x-raw,width=" + std::to_string(src_w) + ",height=" + std::to_string(src_h) + " ! "
+            "d3d11upload ! "
+            "d3d11convert ! "
+            "video/x-raw(memory:D3D11Memory),format=NV12,"
+            "width=" + std::to_string(src_w) + ",height=" + std::to_string(src_h);
+    }
+
+    if (needs_tee) {
+        pipeline_str += " ! tee name=t";
+
+        for (size_t i = 0; i < resolved.size(); i++) {
+            auto& rb = resolved[i];
+            std::string sink_name = "branch_" + std::to_string(i);
+
+            pipeline_str += " t. ! " + queue_str + " ! ";
+
+            // If branch resolution differs from source, add d3d11scale
+            if (rb.width != src_w || rb.height != src_h) {
+                pipeline_str +=
+                    "d3d11scale ! "
+                    "video/x-raw(memory:D3D11Memory),format=NV12,"
+                    "width=" + std::to_string(rb.width) + ",height=" + std::to_string(rb.height) + " ! ";
+            }
+
+            pipeline_str +=
+                "d3d11download ! "
+                "video/x-raw,format=NV12,"
+                "width=" + std::to_string(rb.width) + ",height=" + std::to_string(rb.height) + " ! "
+                "appsink name=" + sink_name + " " + sink_props;
+        }
+    } else {
+        // Single branch — no tee needed
+        auto& rb = resolved[0];
+
+        if (rb.width != src_w || rb.height != src_h) {
+            pipeline_str +=
+                " ! d3d11scale ! "
+                "video/x-raw(memory:D3D11Memory),format=NV12,"
+                "width=" + std::to_string(rb.width) + ",height=" + std::to_string(rb.height);
+        }
+
+        pipeline_str +=
+            " ! d3d11download ! "
+            "video/x-raw,format=NV12,"
+            "width=" + std::to_string(rb.width) + ",height=" + std::to_string(rb.height) + " ! "
+            "appsink name=branch_0 " + sink_props;
+    }
+
+    std::cout << "Pipeline: " << pipeline_str << std::endl;
+
+    GError* error = nullptr;
+    s->pipeline = gst_parse_launch(pipeline_str.c_str(), &error);
+    if (error || !s->pipeline) {
+        ctx->last_error = error ? error->message : "pipeline creation failed";
+        if (error) g_error_free(error);
+        delete s;
+        return IMP_ERROR_DECODE_FAILED;
+    }
+
+    // Retrieve appsinks and populate branch info
+    s->branches.resize(resolved.size());
+    for (size_t i = 0; i < resolved.size(); i++) {
+        std::string sink_name = "branch_" + std::to_string(i);
+        s->branches[i].appsink = gst_bin_get_by_name(GST_BIN(s->pipeline), sink_name.c_str());
+        if (!s->branches[i].appsink) {
+            ctx->last_error = "failed to get appsink: " + sink_name;
+            gst_object_unref(s->pipeline);
+            delete s;
+            return IMP_ERROR_DECODE_FAILED;
+        }
+        s->branches[i].width  = resolved[i].width;
+        s->branches[i].height = resolved[i].height;
+        s->branches[i].name   = resolved[i].name;
+        s->branches[i].frame.allocate(resolved[i].width, resolved[i].height);
+    }
+
+    gst_element_set_state(s->pipeline, GST_STATE_PLAYING);
+
+    GstStateChangeReturn ret = gst_element_get_state(
+        s->pipeline, nullptr, nullptr, 5 * GST_SECOND);
+    if (ret == GST_STATE_CHANGE_FAILURE) {
+        ctx->last_error = "pipeline failed to start";
+        for (auto& b : s->branches) {
+            if (b.appsink) gst_object_unref(b.appsink);
+        }
+        gst_object_unref(s->pipeline);
+        delete s;
+        return IMP_ERROR_DECODE_FAILED;
+    }
+
+    // Get actual dimensions from first branch negotiated caps
+    GstPad* pad = gst_element_get_static_pad(s->branches[0].appsink, "sink");
+    if (pad) {
+        GstCaps* caps = gst_pad_get_current_caps(pad);
+        if (caps) {
+            GstStructure* st = gst_caps_get_structure(caps, 0);
+            int actual_w = 0, actual_h = 0;
+            gst_structure_get_int(st, "width", &actual_w);
+            gst_structure_get_int(st, "height", &actual_h);
+            if (actual_w > 0) s->branches[0].width = actual_w;
+            if (actual_h > 0) s->branches[0].height = actual_h;
+            gst_caps_unref(caps);
+        }
+        gst_object_unref(pad);
+    }
+
+    s->use_hw_decode = true;
+
+    auto end = chrono::high_resolution_clock::now();
+    ctx->timing.video_open_ms = chrono::duration<double, std::milli>(end - start).count();
+
+    // Print branch info
+    for (size_t i = 0; i < s->branches.size(); i++) {
+        auto& b = s->branches[i];
+        std::cout << "Branch " << i << " [" << b.name << "]: "
+                  << b.width << "x" << b.height << " NV12" << std::endl;
+    }
+    if (s->branches.size() > 1) {
+        std::cout << "GPU resize via d3d11scale (on D3D11)" << std::endl;
+    }
+
+    *stream = s;
+    return IMP_OK;
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// Video Read Frame — branch-aware
+//////////////////////////////////////////////////////////////////////////////
+
+imp_status_t imp_video_read_frame(imp_tensor_t** tensor,
+                                  imp_video_stream_t* stream,
+                                  uint32_t branch_index) {
+    if (!stream || branch_index >= (uint32_t)stream->branches.size())
+        return IMP_ERROR_INVALID_ARGUMENT;
+
+    auto& branch = stream->branches[branch_index];
+
+    bool ok = read_nv12_from_appsink(
+        branch.appsink, &branch.frame,
+        branch.total_decode_ms,
+        branch.total_decode_pull_ms,
+        branch.total_decode_copy_ms);
+
+    if (!ok) return IMP_ERROR_STREAM_END;
+
+    // Fill tensor wrapper pointing to the branch's frame data
+    branch.tensor_cache.y_data  = branch.frame.y_plane.data();
+    branch.tensor_cache.uv_data = branch.frame.uv_plane.data();
+    branch.tensor_cache.width   = branch.frame.width;
+    branch.tensor_cache.height  = branch.frame.height;
+    branch.tensor_cache.format  = IMP_FORMAT_NV12;
+    branch.tensor_cache.valid   = true;
+    branch.tensor_cache.device_type = IMP_DEVICE_CPU;
+
+    if (tensor) *tensor = &branch.tensor_cache;
+    return IMP_OK;
+}
+
+imp_status_t imp_video_read_frame_by_name(imp_tensor_t** tensor,
+                                          imp_video_stream_t* stream,
+                                          const char* branch_name) {
+    if (!stream || !branch_name) return IMP_ERROR_INVALID_ARGUMENT;
+
+    for (uint32_t i = 0; i < (uint32_t)stream->branches.size(); i++) {
+        if (stream->branches[i].name == branch_name) {
+            return imp_video_read_frame(tensor, stream, i);
+        }
+    }
+    return IMP_ERROR_INVALID_ARGUMENT; // branch name not found
+}
+
+imp_status_t imp_video_start_async(imp_video_stream_t* stream,
+                                   imp_video_frame_callback_t callback,
+                                   void* user_data) {
+    (void)stream; (void)callback; (void)user_data;
+    return IMP_ERROR_INTERNAL; // Not yet implemented
+}
+
+void imp_video_stop(imp_video_stream_t* stream) {
+    (void)stream;
+}
+
+imp_status_t imp_video_get_info(imp_video_stream_t* stream,
+                                uint32_t* width, uint32_t* height,
+                                float* fps, int64_t* frame_count) {
+    if (!stream || stream->branches.empty()) return IMP_ERROR_INVALID_ARGUMENT;
+    // Return first branch dimensions as the "primary" stream info
+    if (width)       *width  = (uint32_t)stream->branches[0].width;
+    if (height)      *height = (uint32_t)stream->branches[0].height;
+    if (fps)         *fps    = 30.0f; // TODO: detect from stream
+    if (frame_count) *frame_count = -1; // unknown for live
+    return IMP_OK;
+}
+
+void imp_video_close(imp_video_stream_t* stream) {
+    if (!stream) return;
+    if (stream->pipeline) {
+        gst_element_set_state(stream->pipeline, GST_STATE_NULL);
+        for (auto& b : stream->branches) {
+            if (b.appsink) gst_object_unref(b.appsink);
+        }
+        gst_object_unref(stream->pipeline);
+    }
+    delete stream;
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// Video Encoder
+//////////////////////////////////////////////////////////////////////////////
+
+imp_status_t imp_video_encoder_create(imp_video_encoder_t** encoder,
+                                      uint32_t width, uint32_t height,
+                                      imp_context_t* ctx,
+                                      const imp_video_encode_opts_t* opts) {
+    if (!encoder || !ctx) return IMP_ERROR_INVALID_ARGUMENT;
+
+    auto* e = new imp_video_encoder_s();
+    e->width   = (int)width;
+    e->height  = (int)height;
+    e->fps_num = (opts && opts->framerate > 0) ? (int)opts->framerate : 30;
+    e->fps_den = 1;
+    e->timing  = &ctx->timing;
+
+    std::string output_path = (opts && opts->output_path) ? opts->output_path : "output.mp4";
+
+    std::cout << "=== Opening GPU encoder: " << output_path << " ===" << std::endl;
+
+    std::string pipeline =
+        "appsrc name=src format=time is-live=true do-timestamp=true ! "
+        "video/x-raw,format=NV12,width=" + std::to_string(width) +
+        ",height=" + std::to_string(height) +
+        ",framerate=" + std::to_string(e->fps_num) + "/" + std::to_string(e->fps_den) + " ! "
+        "mfh264enc ! h264parse ! mp4mux ! filesink location=\"" + output_path + "\"";
+
+    GError* error = nullptr;
+    e->pipeline = gst_parse_launch(pipeline.c_str(), &error);
+    if (error) {
+        ctx->last_error = error->message;
+        g_error_free(error);
+        delete e;
+        return IMP_ERROR_ENCODE_FAILED;
+    }
+
+    e->appsrc = gst_bin_get_by_name(GST_BIN(e->pipeline), "src");
+    if (!e->appsrc) {
+        gst_object_unref(e->pipeline);
+        delete e;
+        return IMP_ERROR_ENCODE_FAILED;
+    }
+
+    g_object_set(e->appsrc, "stream-type", 0, "format", GST_FORMAT_TIME, nullptr);
+
+    GstStateChangeReturn ret = gst_element_set_state(e->pipeline, GST_STATE_PLAYING);
+    if (ret == GST_STATE_CHANGE_FAILURE) {
+        gst_object_unref(e->appsrc);
+        gst_object_unref(e->pipeline);
+        delete e;
+        return IMP_ERROR_ENCODE_FAILED;
+    }
+    gst_element_get_state(e->pipeline, nullptr, nullptr, 500 * GST_MSECOND);
+
+    e->encoder_name    = "mfh264enc";
+    e->is_gpu_encoder  = true;
+    e->initialized     = true;
+
+    std::cout << "GPU encoder opened: mfh264enc" << std::endl;
+
+    *encoder = e;
+    return IMP_OK;
+}
+
+/**
+ * Encode a frame from an imp_tensor_t (NV12 format).
+ * The tensor must have valid y_data and uv_data pointers.
+ */
+imp_status_t imp_video_encoder_write(imp_video_encoder_t* encoder,
+                                     imp_tensor_t* tensor) {
+    if (!encoder || !encoder->initialized || !tensor || !tensor->valid)
+        return IMP_ERROR_INVALID_ARGUMENT;
+
+    auto encode_start = chrono::high_resolution_clock::now();
+
+    int w = tensor->width;
+    int h = tensor->height;
+    size_t y_size  = (size_t)w * h;
+    size_t uv_size = (size_t)w * (h / 2);
+
+    GstBuffer* buffer = gst_buffer_new_allocate(nullptr, y_size + uv_size, nullptr);
+    if (!buffer) return IMP_ERROR_ENCODE_FAILED;
+
+    GstMapInfo map;
+    if (!gst_buffer_map(buffer, &map, GST_MAP_WRITE)) {
+        gst_buffer_unref(buffer);
+        return IMP_ERROR_ENCODE_FAILED;
+    }
+
+    memcpy(map.data, tensor->y_data, y_size);
+    memcpy(map.data + y_size, tensor->uv_data, uv_size);
+    gst_buffer_unmap(buffer, &map);
+
+    GstClockTime duration = gst_util_uint64_scale(GST_SECOND,
+                                                  encoder->fps_den, encoder->fps_num);
+    GST_BUFFER_PTS(buffer)      = encoder->frame_count * duration;
+    GST_BUFFER_DURATION(buffer) = duration;
+
+    GstFlowReturn ret = gst_app_src_push_buffer(GST_APP_SRC(encoder->appsrc), buffer);
+    if (ret != GST_FLOW_OK) return IMP_ERROR_ENCODE_FAILED;
+
+    encoder->frame_count++;
+
+    if (encoder->timing) {
+        encoder->timing->total_encode_ms += chrono::duration<double, std::milli>(
+            chrono::high_resolution_clock::now() - encode_start).count();
+    }
+
+    return IMP_OK;
+}
+
+void imp_video_encoder_close(imp_video_encoder_t* encoder) {
+    if (!encoder || !encoder->initialized) return;
+
+    gst_app_src_end_of_stream(GST_APP_SRC(encoder->appsrc));
+
+    GstBus* bus = gst_element_get_bus(encoder->pipeline);
+    if (bus) {
+        GstMessage* msg = gst_bus_timed_pop_filtered(bus, 5 * GST_SECOND,
+            (GstMessageType)(GST_MESSAGE_EOS | GST_MESSAGE_ERROR));
+        if (msg) gst_message_unref(msg);
+        gst_object_unref(bus);
+    }
+
+    gst_element_set_state(encoder->pipeline, GST_STATE_NULL);
+    gst_object_unref(encoder->appsrc);
+    gst_object_unref(encoder->pipeline);
+
+    std::cout << "Encoder closed. Frames: " << encoder->frame_count << std::endl;
+    delete encoder;
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// Tensor utilities
+//////////////////////////////////////////////////////////////////////////////
+
+imp_status_t imp_tensor_get_device_type(imp_tensor_t* tensor,
+                                        imp_device_type_t* device_type) {
+    if (!tensor || !device_type) return IMP_ERROR_INVALID_ARGUMENT;
+    *device_type = tensor->device_type;
+    return IMP_OK;
+}
+
+imp_status_t imp_tensor_get_device_name(imp_tensor_t* tensor,
+                                        const char** device_name) {
+    if (!tensor || !device_name) return IMP_ERROR_INVALID_ARGUMENT;
+    *device_name = tensor->device_name.c_str();
+    return IMP_OK;
+}
+
+imp_status_t imp_tensor_get_context_type(imp_tensor_t* tensor,
+                                         imp_context_type_t* context_type) {
+    (void)tensor; (void)context_type;
+    return IMP_ERROR_INTERNAL;
+}
+
+imp_status_t imp_tensor_get_ov(imp_tensor_t* tensor,
+                               void** ov_tensor,
+                               imp_device_type_t* device_type) {
+    if (!tensor || !ov_tensor) return IMP_ERROR_INVALID_ARGUMENT;
+    *ov_tensor = &tensor->ov_tensor;
+    if (device_type) *device_type = tensor->device_type;
+    return IMP_OK;
+}
+
+imp_status_t imp_tensor_get_shape(imp_tensor_t* tensor,
+                                  int64_t* dims,
+                                  size_t* num_dims) {
+    if (!tensor || !dims || !num_dims) return IMP_ERROR_INVALID_ARGUMENT;
+
+    // For NV12 tensors, report [height, width] as the shape
+    if (tensor->format == IMP_FORMAT_NV12) {
+        size_t n = std::min(*num_dims, (size_t)2);
+        if (n >= 1) dims[0] = tensor->height;
+        if (n >= 2) dims[1] = tensor->width;
+        *num_dims = 2;
+        return IMP_OK;
+    }
+
+    // For ov::Tensor backed tensors
+    auto shape = tensor->ov_tensor.get_shape();
+    size_t n = std::min(*num_dims, shape.size());
+    for (size_t i = 0; i < n; i++) dims[i] = (int64_t)shape[i];
+    *num_dims = shape.size();
+    return IMP_OK;
+}
+
+imp_status_t imp_tensor_get_element_type(imp_tensor_t* tensor,
+                                         imp_element_type_t* type) {
+    if (!tensor || !type) return IMP_ERROR_INVALID_ARGUMENT;
+    // NV12 tensors are always U8
+    if (tensor->format == IMP_FORMAT_NV12) {
+        *type = IMP_TYPE_U8;
+        return IMP_OK;
+    }
+    return IMP_ERROR_INTERNAL; // TODO: map ov::element::Type
+}
+
+void imp_tensor_release(imp_tensor_t* tensor) {
+    if (!tensor) return;
+    delete tensor;
+}
+
+void imp_free(void* ptr) {
+    free(ptr);
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// HW support queries
+//////////////////////////////////////////////////////////////////////////////
+
+imp_status_t imp_hw_decode_supported(imp_context_t* ctx, bool* supported) {
+    (void)ctx;
+    if (supported) *supported = true;
+    return IMP_OK;
+}
+
+imp_status_t imp_hw_encode_supported(imp_context_t* ctx, bool* supported) {
+    (void)ctx;
+    if (supported) *supported = true;
+    return IMP_OK;
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// Image decode / encode — stubs
+//////////////////////////////////////////////////////////////////////////////
+
+imp_status_t imp_decode_image(imp_tensor_t** t, const void* d, size_t s,
+                              imp_context_t* c, const imp_image_decode_opts_t* o,
+                              imp_decode_callback_t cb, void* ud) {
+    (void)t;(void)d;(void)s;(void)c;(void)o;(void)cb;(void)ud;
+    return IMP_ERROR_INTERNAL;
+}
+
+imp_status_t imp_decode_image_file(imp_tensor_t** t, const char* f,
+                                   imp_context_t* c, const imp_image_decode_opts_t* o,
+                                   imp_decode_callback_t cb, void* ud) {
+    (void)t;(void)f;(void)c;(void)o;(void)cb;(void)ud;
+    return IMP_ERROR_INTERNAL;
+}
+
+imp_status_t imp_decode_audio(imp_tensor_t** tensor, const void* data, size_t size,
+                              imp_context_t* ctx, const imp_audio_decode_opts_t* opts,
+                              imp_decode_callback_t callback, void* user_data) {
+    (void)callback; (void)user_data;  // async not yet implemented
+    if (!tensor || !data || size == 0) return IMP_ERROR_INVALID_ARGUMENT;
+
+    gst_init(nullptr, nullptr);
+
+    // Resolve options
+    uint32_t target_rate = (opts && opts->sample_rate > 0) ? opts->sample_rate : 16000;
+    uint32_t target_channels = (opts && opts->channels > 0) ? opts->channels : 1;
+    bool normalize = opts ? opts->normalize : true;
+    (void)normalize;  // GStreamer F32LE is already [-1,1]
+
+    // Build pipeline: appsrc → decodebin → audioconvert → audioresample → caps → appsink
+    std::string capsStr =
+        "audio/x-raw,format=F32LE,channels=" + std::to_string(target_channels) +
+        ",rate=" + std::to_string(target_rate);
+
+    std::string pipelineStr =
+        "appsrc name=src ! decodebin ! audioconvert ! audioresample ! "
+        + capsStr + " ! appsink name=sink sync=false";
+
+    GError* error = nullptr;
+    GstElement* pipeline = gst_parse_launch(pipelineStr.c_str(), &error);
+    if (error || !pipeline) {
+        if (ctx) ctx->last_error = error ? error->message : "audio pipeline creation failed";
+        if (error) g_error_free(error);
+        return IMP_ERROR_DECODE_FAILED;
+    }
+
+    GstElement* appsrc = gst_bin_get_by_name(GST_BIN(pipeline), "src");
+    GstElement* appsink = gst_bin_get_by_name(GST_BIN(pipeline), "sink");
+    if (!appsrc || !appsink) {
+        if (appsrc) gst_object_unref(appsrc);
+        if (appsink) gst_object_unref(appsink);
+        gst_object_unref(pipeline);
+        return IMP_ERROR_INTERNAL;
+    }
+
+    // Allocate GStreamer buffer and copy input data
+    GstBuffer* buf = gst_buffer_new_allocate(nullptr, size, nullptr);
+    GstMapInfo map;
+    if (gst_buffer_map(buf, &map, GST_MAP_WRITE)) {
+        memcpy(map.data, data, size);
+        gst_buffer_unmap(buf, &map);
+    }
+
+    gst_element_set_state(pipeline, GST_STATE_PLAYING);
+
+    // Push buffer + EOS
+    gst_app_src_push_buffer(GST_APP_SRC(appsrc), buf);  // takes ownership of buf
+    gst_app_src_end_of_stream(GST_APP_SRC(appsrc));
+
+    // Pull all decoded float samples
+    std::vector<float> samples;
+    while (true) {
+        GstSample* sample = gst_app_sink_try_pull_sample(GST_APP_SINK(appsink), 5 * GST_SECOND);
+        if (!sample) break;
+
+        GstBuffer* outBuf = gst_sample_get_buffer(sample);
+        GstMapInfo outMap;
+        if (gst_buffer_map(outBuf, &outMap, GST_MAP_READ)) {
+            size_t numFloats = outMap.size / sizeof(float);
+            const float* fdata = reinterpret_cast<const float*>(outMap.data);
+            samples.insert(samples.end(), fdata, fdata + numFloats);
+            gst_buffer_unmap(outBuf, &outMap);
+        }
+        gst_sample_unref(sample);
+    }
+
+    // Check for pipeline errors
+    imp_status_t status = IMP_OK;
+    GstBus* bus = gst_element_get_bus(pipeline);
+    GstMessage* msg = gst_bus_pop_filtered(bus,
+        static_cast<GstMessageType>(GST_MESSAGE_ERROR));
+    if (msg) {
+        GError* err = nullptr;
+        gst_message_parse_error(msg, &err, nullptr);
+        if (ctx && err) ctx->last_error = err->message;
+        if (err) g_error_free(err);
+        gst_message_unref(msg);
+        status = IMP_ERROR_DECODE_FAILED;
+    }
+    gst_object_unref(bus);
+
+    gst_element_set_state(pipeline, GST_STATE_NULL);
+    gst_object_unref(appsrc);
+    gst_object_unref(appsink);
+    gst_object_unref(pipeline);
+
+    if (status != IMP_OK || samples.empty()) {
+        return status != IMP_OK ? status : IMP_ERROR_DECODE_FAILED;
+    }
+
+    // Wrap samples in an imp_tensor_t backed by ov::Tensor
+    auto* t = new imp_tensor_s();
+    t->ov_tensor = ov::Tensor(ov::element::f32, {1, samples.size()});
+    std::memcpy(t->ov_tensor.data<float>(), samples.data(), samples.size() * sizeof(float));
+    t->device_type = IMP_DEVICE_CPU;
+    t->device_name = "CPU";
+    t->format = IMP_FORMAT_GRAY;  // 1-D audio — not a pixel format, but marks non-NV12
+    t->valid = true;
+
+    *tensor = t;
+    return IMP_OK;
+}
+
+imp_status_t imp_decode_audio_file(imp_tensor_t** t, const char* f,
+                                   imp_context_t* c, const imp_audio_decode_opts_t* o,
+                                   imp_decode_callback_t cb, void* ud) {
+    (void)t;(void)f;(void)c;(void)o;(void)cb;(void)ud;
+    return IMP_ERROR_INTERNAL; // TODO: implement - decode file to tensor with all samples
+}
+
+imp_status_t imp_audio_file_info(const char* file_path,
+                                 uint32_t* sample_rate,
+                                 uint32_t* channels,
+                                 double* duration_sec) {
+    if (!file_path) {
+        return IMP_ERROR_INVALID_ARGUMENT;
+    }
+    
+    // Convert to URI
+    std::string uri = "file:///";
+    for (const char* p = file_path; *p; ++p) {
+        if (*p == '\\') uri += '/';
+        else if (*p == ' ') uri += "%20";
+        else uri += *p;
+    }
+    
+    GError* error = nullptr;
+    GstDiscoverer* discoverer = gst_discoverer_new(10 * GST_SECOND, &error);
+    if (!discoverer) {
+        if (error) g_error_free(error);
+        return IMP_ERROR_DECODE_FAILED;
+    }
+    
+    GstDiscovererInfo* info = gst_discoverer_discover_uri(discoverer, uri.c_str(), &error);
+    if (!info) {
+        if (error) g_error_free(error);
+        g_object_unref(discoverer);
+        return IMP_ERROR_DECODE_FAILED;
+    }
+    
+    // Get duration
+    if (duration_sec) {
+        GstClockTime dur = gst_discoverer_info_get_duration(info);
+        *duration_sec = (double)dur / GST_SECOND;
+    }
+    
+    // Get audio stream info
+    GList* audio_streams = gst_discoverer_info_get_audio_streams(info);
+    if (audio_streams) {
+        GstDiscovererAudioInfo* audio_info = (GstDiscovererAudioInfo*)audio_streams->data;
+        if (sample_rate) {
+            *sample_rate = gst_discoverer_audio_info_get_sample_rate(audio_info);
+        }
+        if (channels) {
+            *channels = gst_discoverer_audio_info_get_channels(audio_info);
+        }
+        gst_discoverer_stream_info_list_free(audio_streams);
+    } else {
+        // No audio stream found
+        if (sample_rate) *sample_rate = 0;
+        if (channels) *channels = 0;
+    }
+    
+    gst_discoverer_info_unref(info);
+    g_object_unref(discoverer);
+    
+    return IMP_OK;
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// Audio Stream Implementation (high-level decode → encode)
+//////////////////////////////////////////////////////////////////////////////
+
+// Helper: Get audio duration using GstDiscoverer
+static double imp_audio_get_duration(const std::string& filepath) {
+    GError* error = nullptr;
+    
+    std::string uri = "file:///";
+    for (char c : filepath) {
+        if (c == '\\') uri += '/';
+        else if (c == ' ') uri += "%20";
+        else uri += c;
+    }
+    
+    GstDiscoverer* discoverer = gst_discoverer_new(10 * GST_SECOND, &error);
+    if (!discoverer) {
+        if (error) g_error_free(error);
+        return 0.0;
+    }
+    
+    GstDiscovererInfo* info = gst_discoverer_discover_uri(discoverer, uri.c_str(), &error);
+    if (!info) {
+        if (error) g_error_free(error);
+        g_object_unref(discoverer);
+        return 0.0;
+    }
+    
+    GstClockTime duration = gst_discoverer_info_get_duration(info);
+    double duration_sec = (double)duration / GST_SECOND;
+    
+    gst_discoverer_info_unref(info);
+    g_object_unref(discoverer);
+    
+    return duration_sec;
+}
+
+// Forward declarations for helpers defined in the Audio Encoder section below
+static std::string imp_audio_get_encoder(const std::string& codec);
+static std::string imp_audio_get_muxer(const std::string& codec);
+
+imp_status_t imp_audio_open(imp_audio_stream_t** stream,
+                            const char* input_path,
+                            const char* output_path,
+                            const imp_audio_stream_opts_t* opts) {
+    if (!stream || !input_path) {
+        return IMP_ERROR_INVALID_ARGUMENT;
+    }
+    
+    auto* s = new imp_audio_stream_s();
+    s->input_path = input_path;
+    if (output_path) {
+        s->output_path = output_path;
+    }
+    
+    if (opts) {
+        s->output_sample_rate = opts->sample_rate > 0 ? opts->sample_rate : 44100;
+        s->output_channels = opts->channels > 0 ? opts->channels : 2;
+        s->output_codec = opts->output_codec ? opts->output_codec : "mp3";
+        s->output_bitrate_kbps = opts->output_bitrate_kbps > 0 ? opts->output_bitrate_kbps : 192;
+    }
+    
+    // Get duration from input file
+    s->duration_sec = imp_audio_get_duration(input_path);
+    if (s->duration_sec <= 0) {
+        s->last_error = "Failed to discover audio duration";
+        delete s;
+        return IMP_ERROR_DECODE_FAILED;
+    }
+    
+    s->sample_rate = s->output_sample_rate;
+    s->channels = s->output_channels;
+    s->num_samples = (int64_t)(s->duration_sec * s->output_sample_rate);
+    
+    s->initialized = true;
+    *stream = s;
+    return IMP_OK;
+}
+
+imp_status_t imp_audio_get_info(imp_audio_stream_t* stream, imp_audio_info_t* info) {
+    if (!stream || !info) {
+        return IMP_ERROR_INVALID_ARGUMENT;
+    }
+    
+    info->sample_rate = stream->sample_rate;
+    info->channels = stream->channels;
+    info->duration_sec = stream->duration_sec;
+    info->num_samples = stream->num_samples;
+    
+    return IMP_OK;
+}
+
+imp_status_t imp_audio_process(imp_audio_stream_t* stream) {
+    if (!stream || !stream->initialized) {
+        return IMP_ERROR_INVALID_ARGUMENT;
+    }
+    
+    if (stream->output_path.empty()) {
+        stream->last_error = "No output path specified";
+        return IMP_ERROR_INVALID_ARGUMENT;
+    }
+    
+    // Convert paths to forward slashes for GStreamer
+    std::string input_path = stream->input_path;
+    std::string output_path = stream->output_path;
+    std::replace(input_path.begin(), input_path.end(), '\\', '/');
+    std::replace(output_path.begin(), output_path.end(), '\\', '/');
+    
+    std::string encoder = imp_audio_get_encoder(stream->output_codec);
+    std::string muxer = imp_audio_get_muxer(stream->output_codec);
+    
+    // Build pipeline: filesrc → decodebin → audioconvert → audioresample → caps → encoder → [muxer] → filesink
+    std::string pipeline_str = 
+        "filesrc location=\"" + input_path + "\" ! "
+        "decodebin ! "
+        "audioconvert ! "
+        "audioresample ! "
+        "audio/x-raw,format=S16LE,channels=" + std::to_string(stream->output_channels) + 
+        ",rate=" + std::to_string(stream->output_sample_rate) + " ! ";
+    
+    if (encoder == "lamemp3enc") {
+        pipeline_str += "lamemp3enc bitrate=" + std::to_string(stream->output_bitrate_kbps) + " ! ";
+    } else if (encoder == "avenc_aac") {
+        pipeline_str += "avenc_aac bitrate=" + std::to_string(stream->output_bitrate_kbps * 1000) + " ! ";
+    } else if (encoder == "opusenc") {
+        pipeline_str += "opusenc bitrate=" + std::to_string(stream->output_bitrate_kbps * 1000) + " ! ";
+    } else {
+        pipeline_str += encoder + " ! ";
+    }
+    
+    if (!muxer.empty()) {
+        pipeline_str += muxer + " ! ";
+    }
+    
+    pipeline_str += "filesink location=\"" + output_path + "\"";
+    
+    GError* error = nullptr;
+    stream->pipeline = gst_parse_launch(pipeline_str.c_str(), &error);
+    
+    if (error || !stream->pipeline) {
+        stream->last_error = error ? error->message : "Pipeline creation failed";
+        if (error) g_error_free(error);
+        return IMP_ERROR_DECODE_FAILED;
+    }
+    
+    auto start_time = chrono::high_resolution_clock::now();
+    
+    GstStateChangeReturn ret = gst_element_set_state(stream->pipeline, GST_STATE_PLAYING);
+    if (ret == GST_STATE_CHANGE_FAILURE) {
+        stream->last_error = "Failed to start pipeline";
+        gst_object_unref(stream->pipeline);
+        stream->pipeline = nullptr;
+        return IMP_ERROR_DECODE_FAILED;
+    }
+    
+    // Wait for EOS or error
+    GstBus* bus = gst_element_get_bus(stream->pipeline);
+    GstMessage* msg = gst_bus_timed_pop_filtered(bus, GST_CLOCK_TIME_NONE,
+        (GstMessageType)(GST_MESSAGE_EOS | GST_MESSAGE_ERROR));
+    
+    auto end_time = chrono::high_resolution_clock::now();
+    stream->wall_time_sec = chrono::duration<double>(end_time - start_time).count();
+    
+    imp_status_t status = IMP_OK;
+    if (msg) {
+        if (GST_MESSAGE_TYPE(msg) == GST_MESSAGE_ERROR) {
+            GError* err = nullptr;
+            gchar* debug = nullptr;
+            gst_message_parse_error(msg, &err, &debug);
+            stream->last_error = err ? err->message : "Unknown error";
+            if (err) g_error_free(err);
+            if (debug) g_free(debug);
+            status = IMP_ERROR_ENCODE_FAILED;
+        }
+        gst_message_unref(msg);
+    }
+    
+    gst_object_unref(bus);
+    gst_element_set_state(stream->pipeline, GST_STATE_NULL);
+    gst_object_unref(stream->pipeline);
+    stream->pipeline = nullptr;
+    
+    stream->processed = (status == IMP_OK);
+    return status;
+}
+
+imp_status_t imp_audio_get_timing(imp_audio_stream_t* stream,
+                                  double* wall_time_sec,
+                                  double* realtime_factor) {
+    if (!stream) {
+        return IMP_ERROR_INVALID_ARGUMENT;
+    }
+    
+    if (wall_time_sec) {
+        *wall_time_sec = stream->wall_time_sec;
+    }
+    if (realtime_factor) {
+        if (stream->wall_time_sec > 0) {
+            *realtime_factor = stream->duration_sec / stream->wall_time_sec;
+        } else {
+            *realtime_factor = 0.0;
+        }
+    }
+    
+    return IMP_OK;
+}
+
+void imp_audio_close(imp_audio_stream_t* stream) {
+    if (!stream) return;
+    
+    if (stream->pipeline) {
+        gst_element_set_state(stream->pipeline, GST_STATE_NULL);
+        gst_object_unref(stream->pipeline);
+    }
+    
+    delete stream;
+}
+
+imp_status_t imp_encode_image(void** d, size_t* s, imp_tensor_t* t,
+                              imp_context_t* c, const imp_image_encode_opts_t* o,
+                              imp_encode_callback_t cb, void* ud) {
+    (void)d;(void)s;(void)t;(void)c;(void)o;(void)cb;(void)ud;
+    return IMP_ERROR_INTERNAL;
+}
+
+imp_status_t imp_encode_image_file(const char* f, imp_tensor_t* t,
+                                   imp_context_t* c, const imp_image_encode_opts_t* o,
+                                   imp_encode_callback_t cb, void* ud) {
+    (void)f;(void)t;(void)c;(void)o;(void)cb;(void)ud;
+    return IMP_ERROR_INTERNAL;
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// Audio Encoder Implementation
+//////////////////////////////////////////////////////////////////////////////
+
+// Helper: Get encoder element for codec
+static std::string imp_audio_get_encoder(const std::string& codec) {
+    if (codec == "mp3")  return "lamemp3enc";
+    if (codec == "aac")  return "avenc_aac";
+    if (codec == "flac") return "flacenc";
+    if (codec == "opus") return "opusenc";
+    if (codec == "wav")  return "wavenc";
+    return "lamemp3enc"; // default
+}
+
+// Helper: Get muxer element for codec (some don't need muxer)
+static std::string imp_audio_get_muxer(const std::string& codec) {
+    if (codec == "aac")  return "mp4mux";
+    if (codec == "flac") return "";      // no muxer needed
+    if (codec == "wav")  return "";      // wavenc is already containerized
+    if (codec == "opus") return "oggmux";
+    return ""; // mp3 with lamemp3enc doesn't need muxer
+}
+
+imp_status_t imp_audio_encoder_create(imp_audio_encoder_t** encoder,
+                                      const imp_audio_encode_opts_t* opts,
+                                      imp_encode_callback_t callback,
+                                      void* user_data) {
+    if (!encoder || !opts || !opts->output_path) {
+        return IMP_ERROR_INVALID_ARGUMENT;
+    }
+    
+    auto* e = new imp_audio_encoder_s();
+    e->output_path = opts->output_path;
+    e->codec = opts->codec ? opts->codec : "mp3";
+    e->bitrate_kbps = opts->bitrate_kbps > 0 ? opts->bitrate_kbps : 192;
+    e->sample_rate = opts->sample_rate > 0 ? opts->sample_rate : 44100;
+    e->channels = opts->channels > 0 ? opts->channels : 2;
+    e->callback = callback;
+    e->user_data = user_data;
+    
+    // Convert path to forward slashes for GStreamer
+    std::string output_path = e->output_path;
+    std::replace(output_path.begin(), output_path.end(), '\\', '/');
+    
+    // Get encoder element
+    std::string enc_element = imp_audio_get_encoder(e->codec);
+    std::string muxer = imp_audio_get_muxer(e->codec);
+    
+    // Build pipeline: appsrc → audioconvert → audioresample → encoder → [muxer] → filesink
+    std::string pipeline_str = 
+        "appsrc name=src format=time ! "
+        "audioconvert ! "
+        "audioresample ! "
+        "audio/x-raw,format=S16LE,channels=" + std::to_string(e->channels) + 
+        ",rate=" + std::to_string(e->sample_rate) + " ! ";
+    
+    // Add encoder with appropriate options
+    if (enc_element == "lamemp3enc") {
+        pipeline_str += "lamemp3enc bitrate=" + std::to_string(e->bitrate_kbps) + " ! ";
+    } else if (enc_element == "avenc_aac") {
+        pipeline_str += "avenc_aac bitrate=" + std::to_string(e->bitrate_kbps * 1000) + " ! ";
+    } else if (enc_element == "opusenc") {
+        pipeline_str += "opusenc bitrate=" + std::to_string(e->bitrate_kbps * 1000) + " ! ";
+    } else {
+        pipeline_str += enc_element + " ! ";
+    }
+    
+    // Add muxer if needed
+    if (!muxer.empty()) {
+        pipeline_str += muxer + " ! ";
+    }
+    
+    pipeline_str += "filesink location=\"" + output_path + "\"";
+    
+    // Create pipeline
+    GError* error = nullptr;
+    e->pipeline = gst_parse_launch(pipeline_str.c_str(), &error);
+    
+    if (error || !e->pipeline) {
+        e->last_error = error ? error->message : "Pipeline creation failed";
+        if (error) g_error_free(error);
+        delete e;
+        return IMP_ERROR_ENCODE_FAILED;
+    }
+    
+    // Get appsrc element
+    e->appsrc = gst_bin_get_by_name(GST_BIN(e->pipeline), "src");
+    if (!e->appsrc) {
+        e->last_error = "Failed to get appsrc element";
+        gst_object_unref(e->pipeline);
+        delete e;
+        return IMP_ERROR_INTERNAL;
+    }
+    
+    // Configure appsrc caps
+    GstCaps* caps = gst_caps_new_simple("audio/x-raw",
+        "format", G_TYPE_STRING, "S16LE",
+        "rate", G_TYPE_INT, (int)e->sample_rate,
+        "channels", G_TYPE_INT, (int)e->channels,
+        "layout", G_TYPE_STRING, "interleaved",
+        nullptr);
+    g_object_set(e->appsrc, "caps", caps, "is-live", FALSE, nullptr);
+    gst_caps_unref(caps);
+    
+    // Start pipeline
+    GstStateChangeReturn ret = gst_element_set_state(e->pipeline, GST_STATE_PLAYING);
+    if (ret == GST_STATE_CHANGE_FAILURE) {
+        e->last_error = "Failed to start pipeline";
+        gst_object_unref(e->appsrc);
+        gst_object_unref(e->pipeline);
+        delete e;
+        return IMP_ERROR_ENCODE_FAILED;
+    }
+    
+    e->initialized = true;
+    *encoder = e;
+    return IMP_OK;
+}
+
+imp_status_t imp_audio_encoder_write(imp_audio_encoder_t* encoder,
+                                     imp_tensor_t* tensor) {
+    if (!encoder || !encoder->initialized || !tensor) {
+        return IMP_ERROR_INVALID_ARGUMENT;
+    }
+    
+    // TODO: Extract audio data from tensor and push to appsrc
+    // For now, this is a stub - real implementation needs tensor audio data access
+    (void)tensor;
+    
+    return IMP_ERROR_INTERNAL; // Not fully implemented yet
+}
+
+void imp_audio_encoder_close(imp_audio_encoder_t* encoder) {
+    if (!encoder) return;
+    
+    if (encoder->appsrc) {
+        // Signal end of stream
+        gst_app_src_end_of_stream(GST_APP_SRC(encoder->appsrc));
+    }
+    
+    if (encoder->pipeline) {
+        // Wait for EOS to propagate
+        GstBus* bus = gst_element_get_bus(encoder->pipeline);
+        if (bus) {
+            GstMessage* msg = gst_bus_timed_pop_filtered(bus, 5 * GST_SECOND,
+                (GstMessageType)(GST_MESSAGE_EOS | GST_MESSAGE_ERROR));
+            
+            if (msg) {
+                if (GST_MESSAGE_TYPE(msg) == GST_MESSAGE_ERROR && encoder->callback) {
+                    // Call callback with error
+                    encoder->callback(IMP_ERROR_ENCODE_FAILED, nullptr, 0, encoder->user_data);
+                } else if (encoder->callback) {
+                    // Call callback with success (no data returned for file output)
+                    encoder->callback(IMP_OK, nullptr, 0, encoder->user_data);
+                }
+                gst_message_unref(msg);
+            }
+            gst_object_unref(bus);
+        }
+        
+        gst_element_set_state(encoder->pipeline, GST_STATE_NULL);
+        
+        if (encoder->appsrc) {
+            gst_object_unref(encoder->appsrc);
+        }
+        gst_object_unref(encoder->pipeline);
+    }
+    
+    delete encoder;
+}
+
+// ---------------------------------------------------------------------------
+// One-shot audio encode (to memory)
+// ---------------------------------------------------------------------------
+
+// Internal: write a WAV file into a malloc'd buffer (no GStreamer)
+static imp_status_t imp_encode_audio_wav(void** data, size_t* data_size,
+                                         const float* samples, size_t num_samples,
+                                         uint32_t sampleRate, uint32_t channels) {
+    uint32_t byteRate    = sampleRate * channels * sizeof(float);
+    uint32_t blockAlign  = channels * static_cast<uint32_t>(sizeof(float));
+    uint32_t dataSize    = static_cast<uint32_t>(num_samples * sizeof(float));
+    uint32_t chunkSize   = 36 + dataSize;
+    size_t   totalSize   = 44 + dataSize;
+
+    uint8_t* buf = static_cast<uint8_t*>(malloc(totalSize));
+    if (!buf) return IMP_ERROR_INTERNAL;
+
+    auto w16 = [](uint8_t* p, uint16_t v) { p[0]=v&0xFF; p[1]=(v>>8)&0xFF; };
+    auto w32 = [](uint8_t* p, uint32_t v) { p[0]=v&0xFF; p[1]=(v>>8)&0xFF; p[2]=(v>>16)&0xFF; p[3]=(v>>24)&0xFF; };
+
+    memcpy(buf,      "RIFF", 4);  w32(buf+4, chunkSize);
+    memcpy(buf+8,    "WAVE", 4);
+    memcpy(buf+12,   "fmt ", 4);  w32(buf+16, 16);
+    w16(buf+20, 3);  // IEEE float
+    w16(buf+22, static_cast<uint16_t>(channels));
+    w32(buf+24, sampleRate);
+    w32(buf+28, byteRate);
+    w16(buf+32, static_cast<uint16_t>(blockAlign));
+    w16(buf+34, 32);  // bits per sample
+    memcpy(buf+36, "data", 4);  w32(buf+40, dataSize);
+    memcpy(buf+44, samples, dataSize);
+
+    *data = buf;
+    *data_size = totalSize;
+    return IMP_OK;
+}
+
+imp_status_t imp_encode_audio(void** data,
+                              size_t* data_size,
+                              const float* samples,
+                              size_t num_samples,
+                              const imp_audio_encode_opts_t* opts) {
+    if (!data || !data_size || !samples || num_samples == 0 || !opts)
+        return IMP_ERROR_INVALID_ARGUMENT;
+
+    std::string codec = opts->codec ? opts->codec : "wav";
+    uint32_t sampleRate = opts->sample_rate > 0 ? opts->sample_rate : 16000;
+    uint32_t channels   = opts->channels   > 0 ? opts->channels   : 1;
+
+    // ----- WAV: built-in writer (fast, no GStreamer) -----
+    if (codec == "wav") {
+        return imp_encode_audio_wav(data, data_size, samples, num_samples,
+                                    sampleRate, channels);
+    }
+
+    // ----- PCM: raw float bytes -----
+    if (codec == "pcm") {
+        size_t sz = num_samples * sizeof(float);
+        *data = malloc(sz);
+        if (!*data) return IMP_ERROR_INTERNAL;
+        memcpy(*data, samples, sz);
+        *data_size = sz;
+        return IMP_OK;
+    }
+
+    // ----- Lossy / lossless codecs via GStreamer -----
+    gst_init(nullptr, nullptr);
+
+    std::string encElement;
+    std::string muxElement;
+    uint32_t bitrate = opts->bitrate_kbps > 0 ? opts->bitrate_kbps : 192;
+
+    if (codec == "mp3") {
+        encElement = "lamemp3enc bitrate=" + std::to_string(bitrate);
+    } else if (codec == "flac") {
+        encElement = "flacenc";
+    } else if (codec == "opus") {
+        // Opus standard sample rates: 8k, 12k, 16k, 24k, 48k
+        uint32_t opusRate = (sampleRate <= 8000) ? 8000 :
+                            (sampleRate <= 12000) ? 12000 :
+                            (sampleRate <= 16000) ? 16000 :
+                            (sampleRate <= 24000) ? 24000 : 48000;
+        encElement = "opusenc bitrate=" + std::to_string(bitrate * 1000);
+        // audioresample will convert to opusRate automatically via caps
+        muxElement = "oggmux";
+        sampleRate = opusRate;  // override for caps
+    } else if (codec == "aac") {
+        encElement = "avenc_aac bitrate=" + std::to_string(bitrate * 1000);
+        muxElement = "aacparse ! adtsmux";
+    } else {
+        return IMP_ERROR_INVALID_ARGUMENT;
+    }
+
+    std::string pipeStr =
+        "appsrc name=src format=time ! "
+        "audioconvert ! audioresample ! "
+        "audio/x-raw,format=S16LE,channels=" + std::to_string(channels) +
+        ",rate=" + std::to_string(sampleRate) + " ! " +
+        encElement + " ! ";
+    if (!muxElement.empty()) pipeStr += muxElement + " ! ";
+    pipeStr += "appsink name=sink";
+
+    GError* error = nullptr;
+    GstElement* pipeline = gst_parse_launch(pipeStr.c_str(), &error);
+    if (error || !pipeline) {
+        if (error) g_error_free(error);
+        if (pipeline) gst_object_unref(pipeline);
+        return IMP_ERROR_ENCODE_FAILED;
+    }
+
+    GstElement* appsrc  = gst_bin_get_by_name(GST_BIN(pipeline), "src");
+    GstElement* appsink = gst_bin_get_by_name(GST_BIN(pipeline), "sink");
+    if (!appsrc || !appsink) {
+        if (appsrc)  gst_object_unref(appsrc);
+        if (appsink) gst_object_unref(appsink);
+        gst_object_unref(pipeline);
+        return IMP_ERROR_INTERNAL;
+    }
+
+    // Configure appsrc for F32LE input
+    GstCaps* caps = gst_caps_new_simple("audio/x-raw",
+        "format",  G_TYPE_STRING, "F32LE",
+        "rate",    G_TYPE_INT, (int)(opts->sample_rate > 0 ? opts->sample_rate : 16000),
+        "channels", G_TYPE_INT, (int)channels,
+        "layout",  G_TYPE_STRING, "interleaved",
+        nullptr);
+    g_object_set(appsrc, "caps", caps, "is-live", FALSE, nullptr);
+    gst_caps_unref(caps);
+
+    gst_element_set_state(pipeline, GST_STATE_PLAYING);
+
+    // Push float samples
+    size_t byteSize = num_samples * sizeof(float);
+    GstBuffer* buffer = gst_buffer_new_allocate(nullptr, byteSize, nullptr);
+    GstMapInfo map;
+    gst_buffer_map(buffer, &map, GST_MAP_WRITE);
+    memcpy(map.data, samples, byteSize);
+    gst_buffer_unmap(buffer, &map);
+
+    GST_BUFFER_PTS(buffer)      = 0;
+    GST_BUFFER_DURATION(buffer) = gst_util_uint64_scale(
+        num_samples / channels, GST_SECOND, opts->sample_rate > 0 ? opts->sample_rate : 16000);
+
+    gst_app_src_push_buffer(GST_APP_SRC(appsrc), buffer);  // takes ownership
+    gst_app_src_end_of_stream(GST_APP_SRC(appsrc));
+
+    // Collect encoded output from appsink
+    std::vector<uint8_t> encoded;
+    for (;;) {
+        GstSample* sample = gst_app_sink_try_pull_sample(GST_APP_SINK(appsink), 10 * GST_SECOND);
+        if (!sample) {
+            // Check if EOS was reached (normal completion)
+            if (gst_app_sink_is_eos(GST_APP_SINK(appsink))) break;
+            // Timeout — abort
+            break;
+        }
+        GstBuffer* outBuf = gst_sample_get_buffer(sample);
+        GstMapInfo outMap;
+        if (gst_buffer_map(outBuf, &outMap, GST_MAP_READ)) {
+            encoded.insert(encoded.end(), outMap.data, outMap.data + outMap.size);
+            gst_buffer_unmap(outBuf, &outMap);
+        }
+        gst_sample_unref(sample);
+    }
+
+    gst_element_set_state(pipeline, GST_STATE_NULL);
+    gst_object_unref(appsrc);
+    gst_object_unref(appsink);
+    gst_object_unref(pipeline);
+
+    if (encoded.empty()) return IMP_ERROR_ENCODE_FAILED;
+
+    *data = malloc(encoded.size());
+    if (!*data) return IMP_ERROR_INTERNAL;
+    memcpy(*data, encoded.data(), encoded.size());
+    *data_size = encoded.size();
+    return IMP_OK;
+}
+
+imp_status_t imp_encode_audio_file(const char* file_path,
+                                   const float* samples,
+                                   size_t num_samples,
+                                   const imp_audio_encode_opts_t* opts) {
+    // Encode to memory, then write to file
+    void* data = nullptr;
+    size_t data_size = 0;
+    imp_status_t st = imp_encode_audio(&data, &data_size, samples, num_samples, opts);
+    if (st != IMP_OK) return st;
+
+    FILE* fp = fopen(file_path, "wb");
+    if (!fp) { free(data); return IMP_ERROR_INTERNAL; }
+    fwrite(data, 1, data_size, fp);
+    fclose(fp);
+    free(data);
+    return IMP_OK;
+}
+
diff --git a/src/mpi/intel_mpi.h b/src/mpi/intel_mpi.h
new file mode 100644
index 0000000000..168d1562aa
--- /dev/null
+++ b/src/mpi/intel_mpi.h
@@ -0,0 +1,1013 @@
+/**
+ * Intel Media Processing Interface (Intel MPI)
+ * 
+ * Lightweight API for media decode/encode with zero-copy OpenVINO integration.
+ * Designed for Intel GPU acceleration with minimal CPU-GPU memory transfers.
+ * 
+ * Version: 0.1.0
+ */
+
+#ifndef INTEL_MPI_H
+#define INTEL_MPI_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+#include <stddef.h>
+#include <stdbool.h>
+
+//////////////////////////////////////////////////////////////////////////////
+// Forward Declarations (OpenVINO C API types)
+//////////////////////////////////////////////////////////////////////////////
+
+typedef struct ov_compiled_model ov_compiled_model_t;
+typedef struct ov_remote_context ov_remote_context_t;
+
+//////////////////////////////////////////////////////////////////////////////
+// Device Types
+//////////////////////////////////////////////////////////////////////////////
+
+typedef enum {
+    IMP_DEVICE_AUTO = 0,    // Auto-detect from context (match inference device)
+    IMP_DEVICE_CPU = 1,     // CPU (host memory)
+    IMP_DEVICE_GPU = 2,     // Intel GPU
+    IMP_DEVICE_NPU = 3,     // Intel NPU
+} imp_device_type_t;
+
+//////////////////////////////////////////////////////////////////////////////
+// Tensor Handle
+//////////////////////////////////////////////////////////////////////////////
+
+/**
+ * Opaque tensor handle (pointer type)
+ * 
+ * Wraps either ov::Tensor (CPU) or ov::RemoteTensor (GPU/NPU).
+ * Use imp_tensor_get_*() functions to query properties.
+ * 
+ * NOTE: Despite the abstraction, the underlying tensor type matters:
+ * - CPU context: wraps ov::Tensor (host memory)
+ * - GPU/NPU context: wraps ov::RemoteTensor (device memory)
+ * Use imp_tensor_get_device_type() to determine the actual type,
+ * and imp_tensor_get_ov() to access the underlying OpenVINO tensor.
+ */
+typedef struct imp_tensor_s imp_tensor_t;
+
+//////////////////////////////////////////////////////////////////////////////
+// Version
+//////////////////////////////////////////////////////////////////////////////
+
+#define IMP_VERSION_MAJOR 0
+#define IMP_VERSION_MINOR 1
+#define IMP_VERSION_PATCH 0
+
+//////////////////////////////////////////////////////////////////////////////
+// Error Codes
+//////////////////////////////////////////////////////////////////////////////
+
+typedef enum {
+    IMP_OK = 0,
+    IMP_ERROR_INVALID_ARGUMENT = -1,
+    IMP_ERROR_OUT_OF_MEMORY = -2,
+    IMP_ERROR_DEVICE_NOT_AVAILABLE = -3,
+    IMP_ERROR_UNSUPPORTED_FORMAT = -4,
+    IMP_ERROR_DECODE_FAILED = -5,
+    IMP_ERROR_ENCODE_FAILED = -6,
+    IMP_ERROR_CONTEXT_MISMATCH = -7,
+    IMP_ERROR_STREAM_END = -8,
+    IMP_ERROR_TIMEOUT = -9,
+    IMP_ERROR_INTERNAL = -99
+} imp_status_t;
+
+//////////////////////////////////////////////////////////////////////////////
+// Context Types
+//////////////////////////////////////////////////////////////////////////////
+
+typedef enum {
+    IMP_CONTEXT_OPENCL = 0,      // OpenCL context (Linux/Windows)
+    IMP_CONTEXT_D3D11 = 1,       // Direct3D 11 (Windows)
+    IMP_CONTEXT_VAAPI = 2,       // VA-API (Linux)
+} imp_context_type_t;
+
+//////////////////////////////////////////////////////////////////////////////
+// Data Types
+//////////////////////////////////////////////////////////////////////////////
+
+typedef enum {
+    IMP_TYPE_U8 = 0,
+    IMP_TYPE_U16 = 1,
+    IMP_TYPE_FP16 = 2,
+    IMP_TYPE_FP32 = 3,
+    IMP_TYPE_I8 = 4,
+    IMP_TYPE_I32 = 5
+} imp_element_type_t;
+
+typedef enum {
+    IMP_FORMAT_RGB = 0,
+    IMP_FORMAT_BGR = 1,
+    IMP_FORMAT_NV12 = 2,        // YUV 4:2:0, two planes (Y + interleaved UV)
+    IMP_FORMAT_I420 = 3,        // YUV 4:2:0, three planes
+    IMP_FORMAT_GRAY = 4,
+    IMP_FORMAT_RGBA = 5,
+    IMP_FORMAT_BGRA = 6
+} imp_pixel_format_t;
+
+typedef enum {
+    IMP_LAYOUT_NHWC = 0,        // [batch, height, width, channels]
+    IMP_LAYOUT_NCHW = 1,        // [batch, channels, height, width]
+    IMP_LAYOUT_HWC = 2,         // [height, width, channels]
+    IMP_LAYOUT_CHW = 3          // [channels, height, width]
+} imp_layout_t;
+
+//////////////////////////////////////////////////////////////////////////////
+// Context
+//////////////////////////////////////////////////////////////////////////////
+
+/**
+ * Opaque context handle (pointer type)
+ */
+typedef struct imp_context_s imp_context_t;
+
+/**
+ * Create context from OpenVINO compiled model
+ * Extracts GPU context from model for zero-copy tensor sharing.
+ * 
+ * @param ctx Output: context handle pointer
+ * @param compiled_model OpenVINO compiled model (must be GPU-compiled)
+ * @return Status code
+ */
+imp_status_t imp_context_create(imp_context_t** ctx, 
+                                ov_compiled_model_t* compiled_model);
+
+/**
+ * Create context from OpenVINO remote context
+ * Use when sharing context between multiple models.
+ * 
+ * @param ctx Output: context handle pointer
+ * @param remote_ctx OpenVINO remote context
+ * @param type Context backend type
+ * @return Status code
+ */
+imp_status_t imp_context_create_from_remote(imp_context_t** ctx,
+                                            ov_remote_context_t* remote_ctx,
+                                            imp_context_type_t type);
+
+/**
+ * Get underlying native handle
+ * 
+ * @param ctx Context handle
+ * @param type Output: context type
+ * @param native_handle Output: native handle (cl_context, ID3D11Device*, etc.)
+ * @return Status code
+ */
+imp_status_t imp_context_get_native(imp_context_t* ctx,
+                                    imp_context_type_t* type,
+                                    void** native_handle);
+
+/**
+ * Get OpenVINO remote context
+ * 
+ * @param ctx Context handle
+ * @param remote_ctx Output: OpenVINO remote context
+ * @return Status code
+ */
+imp_status_t imp_context_get_ov_remote(imp_context_t* ctx,
+                                       ov_remote_context_t** remote_ctx);
+
+/**
+ * Get device type the context is configured for
+ * 
+ * @param ctx Context handle
+ * @param device_type Output: device type
+ * @return Status code
+ */
+imp_status_t imp_context_get_device_type(imp_context_t* ctx,
+                                          imp_device_type_t* device_type);
+
+/**
+ * Get device name (e.g., "GPU.0", "GPU.1", "CPU", "NPU")
+ * 
+ * @param ctx Context handle
+ * @param device_name Output: device name string (valid until context destroyed)
+ * @return Status code
+ */
+imp_status_t imp_context_get_device_name(imp_context_t* ctx,
+                                          const char** device_name);
+
+/**
+ * Destroy context
+ * 
+ * @param ctx Context handle
+ */
+void imp_context_destroy(imp_context_t* ctx);
+
+// TODO: imp_context_create_from_device(imp_context_t** ctx, const char* device_name)
+// Creates a pure media context for decode/encode without a model.
+// Needed for transcode workflows with no inference.
+
+/**
+ * Get last error message for context
+ * 
+ * @param ctx Context handle
+ * @return Error message string (valid until next API call)
+ */
+const char* imp_context_get_error(imp_context_t* ctx);
+
+//////////////////////////////////////////////////////////////////////////////
+// Video Source Configuration
+//////////////////////////////////////////////////////////////////////////////
+
+typedef enum {
+    IMP_SOURCE_FILE = 0,           // Local file path
+    IMP_SOURCE_URL = 1,            // Network URL (rtsp://, http://, udp://)
+    IMP_SOURCE_CAMERA = 2,         // Camera device
+} imp_source_type_t;
+
+/**
+ * Opaque video source configuration handle (pointer type)
+ */
+typedef struct imp_video_source_s imp_video_source_t;
+
+/**
+ * Create video source configuration
+ * 
+ * @param source Output: source configuration handle pointer
+ * @param type Source type
+ * @return Status code
+ */
+imp_status_t imp_video_source_create(imp_video_source_t** source,
+                                     imp_source_type_t type);
+
+/**
+ * Set source property
+ * 
+ * Standard keys by source type:
+ *   IMP_SOURCE_FILE:
+ *     - "path": file path (required)
+ *   IMP_SOURCE_URL:
+ *     - "url": stream URL (required)
+ *     - "transport": "tcp" or "udp" (RTSP only)
+ *     - "username": auth username
+ *     - "password": auth password
+ *     - "timeout": connection timeout in ms
+ *     - "low_latency": "1" to enable
+ *   IMP_SOURCE_CAMERA:
+ *     - "device": device index ("0", "1") or path ("/dev/video0")
+ *     - "width": preferred width
+ *     - "height": preferred height
+ *     - "framerate": preferred FPS
+ *     - "format": capture format ("mjpeg", "nv12", "yuyv")
+ * 
+ * @param source Source configuration handle
+ * @param key Property key
+ * @param value Property value (as string)
+ * @return Status code
+ */
+imp_status_t imp_video_source_set(imp_video_source_t* source,
+                                  const char* key,
+                                  const char* value);
+
+/**
+ * Destroy source configuration
+ * 
+ * @param source Source configuration handle
+ */
+void imp_video_source_destroy(imp_video_source_t* source);
+
+//////////////////////////////////////////////////////////////////////////////
+// Decode Configuration
+//////////////////////////////////////////////////////////////////////////////
+
+/**
+ * Image decode options
+ */
+typedef struct {
+    imp_pixel_format_t output_format;   // Desired output format (default: RGB)
+    imp_layout_t output_layout;         // Tensor layout (default: NHWC)
+    imp_element_type_t output_type;     // Element type (default: U8)
+    uint32_t resize_width;              // 0 = keep original
+    uint32_t resize_height;             // 0 = keep original
+    const char* decode_device;          // Where decoding runs: "CPU", "GPU", "GPU.0", "GPU.1", "NPU", NULL = match context
+    const char* output_device;          // Where result tensor is placed: NULL = same as decode_device
+} imp_image_decode_opts_t;
+
+/**
+ * Video decode output branch
+ * 
+ * Defines one output branch at a specific resolution/format.
+ * Multiple branches share a single decode + tee pipeline on GPU.
+ * Width/height of 0 means: deduce from context model dims.
+ * If context has no model and dims are 0, source resolution is used.
+ */
+typedef struct {
+    uint32_t width;                     // Output width  (0 = deduce from model, or source res)
+    uint32_t height;                    // Output height (0 = deduce from model, or source res)
+    imp_pixel_format_t format;          // Output format (default: NV12)
+    const char* name;                   // Optional branch name (for imp_video_read_frame_by_name)
+} imp_video_branch_t;
+
+/**
+ * Video stream configuration
+ * 
+ * If branches is NULL and branch_count is 0, a single branch at source
+ * resolution is created automatically.
+ */
+typedef struct {
+    imp_pixel_format_t output_format;   // Base format after decode (default: NV12)
+    imp_layout_t output_layout;         // Tensor layout (default: NHWC)
+    imp_element_type_t output_type;     // Element type (default: U8)
+    const char* decode_device;          // Where decoding runs: "CPU", "GPU", "GPU.0", "GPU.1", "NPU", NULL = match context
+    const char* output_device;          // Where result tensor is placed: NULL = same as decode_device
+    uint32_t buffer_count;              // Frame buffer pool size (default: 4)
+    int64_t timeout_ms;                 // Read timeout, -1 = infinite
+    const imp_video_branch_t* branches; // Array of output branches (NULL = single branch at source res) // TODO FIXME do we need more than one?
+    uint32_t branch_count;              // Number of branches (0 = single branch at source res)
+} imp_video_decode_opts_t;
+
+/**
+ * Audio decode options
+ */
+typedef struct {
+    uint32_t sample_rate;               // Target sample rate (0 = keep original)
+    uint32_t channels;                  // Target channels (0 = keep original)
+    imp_element_type_t output_type;     // Element type (default: FP32)
+    bool normalize;                     // Normalize to [-1,1] for float types (default: true)
+} imp_audio_decode_opts_t;
+
+//////////////////////////////////////////////////////////////////////////////
+// Encode Configuration
+//////////////////////////////////////////////////////////////////////////////
+
+/**
+ * Image encode options
+ */
+typedef struct {
+    const char* format;                 // "jpeg", "png", "bmp"
+    int quality;                        // JPEG quality 1-100 (default: 90)
+    const char* input_device;           // Expected input tensor location: NULL = accept any, copy if needed
+    const char* encode_device;          // Where encoding runs: "CPU", "GPU", "GPU.0", etc., NULL = match context
+} imp_image_encode_opts_t;
+
+/**
+ * Video encode options
+ */
+typedef struct {
+    const char* codec;                  // "h264", "h265", "av1"
+    uint32_t bitrate_kbps;              // Target bitrate
+    uint32_t framerate;                 // Frames per second
+    const char* input_device;           // Expected input tensor location: NULL = accept any, copy if needed
+    const char* encode_device;          // Where encoding runs: "CPU", "GPU", "GPU.0", etc., NULL = match context
+    const char* output_path;            // Output file path (NULL for memory)
+} imp_video_encode_opts_t;
+
+//////////////////////////////////////////////////////////////////////////////
+// Async Callback Types
+//////////////////////////////////////////////////////////////////////////////
+
+/**
+ * Decode completion callback
+ * 
+ * @param status Operation status
+ * @param tensor Output tensor (owned by caller after callback)
+ * @param user_data User-provided data
+ */
+typedef void (*imp_decode_callback_t)(imp_status_t status,
+                                      imp_tensor_t* tensor,
+                                      void* user_data);
+
+/**
+ * Video frame callback (for streaming)
+ * 
+ * @param status Operation status (IMP_ERROR_STREAM_END when done)
+ * @param tensor Frame tensor (valid only during callback)
+ * @param frame_index Frame number (0-based)
+ * @param timestamp_us Presentation timestamp in microseconds
+ * @param user_data User-provided data
+ * @return true to continue, false to stop stream
+ */
+typedef bool (*imp_video_frame_callback_t)(imp_status_t status,
+                                           imp_tensor_t* tensor,
+                                           uint64_t frame_index,
+                                           int64_t timestamp_us,
+                                           void* user_data);
+
+/**
+ * Encode completion callback
+ * 
+ * @param status Operation status
+ * @param data Encoded data (owned by caller after callback)
+ * @param size Data size in bytes
+ * @param user_data User-provided data
+ */
+typedef void (*imp_encode_callback_t)(imp_status_t status,
+                                      void* data,
+                                      size_t size,
+                                      void* user_data);
+
+//////////////////////////////////////////////////////////////////////////////
+// Image Decode
+//////////////////////////////////////////////////////////////////////////////
+
+/**
+ * Decode image from memory buffer
+ * 
+ * @param tensor Output: tensor handle pointer (CPU or GPU depending on context/options)
+ * @param data Encoded image data (JPEG, PNG, etc.)
+ * @param size Data size in bytes
+ * @param ctx Context handle
+ * @param opts Decode options (NULL for defaults)
+ * @param callback Async callback (NULL for synchronous)
+ * @param user_data User data for callback
+ * @return Status code (IMP_OK if async started successfully)
+ */
+imp_status_t imp_decode_image(imp_tensor_t** tensor,
+                              const void* data,
+                              size_t size,
+                              imp_context_t* ctx,
+                              const imp_image_decode_opts_t* opts,
+                              imp_decode_callback_t callback,
+                              void* user_data);
+
+/**
+ * Decode image from file
+ * 
+ * @param tensor Output: tensor handle pointer (CPU or GPU depending on context/options)
+ * @param file_path Path to image file
+ * @param ctx Context handle
+ * @param opts Decode options (NULL for defaults)
+ * @param callback Async callback (NULL for synchronous)
+ * @param user_data User data for callback
+ * @return Status code
+ */
+imp_status_t imp_decode_image_file(imp_tensor_t** tensor,
+                                   const char* file_path,
+                                   imp_context_t* ctx,
+                                   const imp_image_decode_opts_t* opts,
+                                   imp_decode_callback_t callback,
+                                   void* user_data);
+
+//////////////////////////////////////////////////////////////////////////////
+// Video Decode
+//////////////////////////////////////////////////////////////////////////////
+
+/**
+ * Opaque video stream handle (pointer type)
+ */
+typedef struct imp_video_stream_s imp_video_stream_t;
+
+/**
+ * Open video stream from source configuration
+ * 
+ * @param stream Output: stream handle pointer
+ * @param source Source configuration (ownership transferred, do not destroy separately)
+ * @param ctx Context handle
+ * @param opts Stream options (NULL for defaults)
+ * @return Status code
+ */
+imp_status_t imp_video_open(imp_video_stream_t** stream,
+                            imp_video_source_t* source,
+                            imp_context_t* ctx,
+                            const imp_video_decode_opts_t* opts);
+
+/**
+ * Read next frame from a specific branch
+ * 
+ * @param tensor Output: frame tensor pointer (NV12 data, CPU side)
+ * @param stream Stream handle
+ * @param branch_index Branch index (0-based, must be < branch_count)
+ * @return Status code (IMP_ERROR_STREAM_END when no more frames)
+ */
+imp_status_t imp_video_read_frame(imp_tensor_t** tensor,
+                                  imp_video_stream_t* stream,
+                                  uint32_t branch_index);
+
+/**
+ * Read next frame from a named branch
+ * 
+ * @param tensor Output: frame tensor pointer (NV12 data, CPU side)
+ * @param stream Stream handle
+ * @param branch_name Branch name (as specified in imp_video_branch_t)
+ * @return Status code (IMP_ERROR_STREAM_END when no more frames)
+ */
+imp_status_t imp_video_read_frame_by_name(imp_tensor_t** tensor,
+                                          imp_video_stream_t* stream,
+                                          const char* branch_name);
+
+/**
+ * Start async frame processing with callback
+ * 
+ * @param stream Stream handle
+ * @param callback Frame callback
+ * @param user_data User data for callback
+ * @return Status code
+ */
+imp_status_t imp_video_start_async(imp_video_stream_t* stream,
+                                   imp_video_frame_callback_t callback,
+                                   void* user_data);
+
+/**
+ * Stop async processing
+ * 
+ * @param stream Stream handle
+ */
+void imp_video_stop(imp_video_stream_t* stream);
+
+/**
+ * Get video metadata
+ * 
+ * @param stream Stream handle
+ * @param width Output: frame width
+ * @param height Output: frame height
+ * @param fps Output: frames per second
+ * @param frame_count Output: total frames (-1 if unknown/live)
+ * @return Status code
+ */
+imp_status_t imp_video_get_info(imp_video_stream_t* stream,
+                                uint32_t* width,
+                                uint32_t* height,
+                                float* fps,
+                                int64_t* frame_count);
+
+/**
+ * Close video stream
+ * 
+ * @param stream Stream handle
+ */
+void imp_video_close(imp_video_stream_t* stream);
+
+/**
+ * Query video file metadata
+ * 
+ * Use before decode to determine file properties.
+ * 
+ * @param file_path Path to video file
+ * @param width Output: frame width (can be NULL)
+ * @param height Output: frame height (can be NULL)
+ * @param fps Output: frames per second (can be NULL)
+ * @param frame_count Output: total frames, -1 if unknown (can be NULL)
+ * @param duration_sec Output: duration in seconds (can be NULL)
+ * @return Status code
+ */
+imp_status_t imp_video_file_info(const char* file_path,
+                                 uint32_t* width,
+                                 uint32_t* height,
+                                 float* fps,
+                                 int64_t* frame_count,
+                                 double* duration_sec);
+
+// TODO: One-shot video decode (batch decode entire video at once)
+// Decode entire video to 4D tensor [frames, H, W, C]
+// imp_status_t imp_decode_video(imp_tensor_t** tensor, const void* data, size_t size, ...);
+// imp_status_t imp_decode_video_file(imp_tensor_t** tensor, const char* file_path, ...);
+
+//////////////////////////////////////////////////////////////////////////////
+// Audio Decode
+//////////////////////////////////////////////////////////////////////////////
+
+/**
+ * Decode audio from memory buffer
+ * 
+ * @param tensor Output: tensor handle pointer with audio samples
+ * @param data Encoded audio data
+ * @param size Data size in bytes
+ * @param ctx Context handle
+ * @param opts Decode options (NULL for defaults)
+ * @param callback Async callback (NULL for synchronous)
+ * @param user_data User data for callback
+ * @return Status code
+ */
+imp_status_t imp_decode_audio(imp_tensor_t** tensor,
+                              const void* data,
+                              size_t size,
+                              imp_context_t* ctx,
+                              const imp_audio_decode_opts_t* opts,
+                              imp_decode_callback_t callback,
+                              void* user_data);
+
+/**
+ * Decode audio from file
+ * 
+ * @param tensor Output: tensor handle pointer with audio samples
+ * @param file_path Path to audio file
+ * @param ctx Context handle
+ * @param opts Decode options (NULL for defaults)
+ * @param callback Async callback (NULL for synchronous)
+ * @param user_data User data for callback
+ * @return Status code
+ */
+imp_status_t imp_decode_audio_file(imp_tensor_t** tensor,
+                                   const char* file_path,
+                                   imp_context_t* ctx,
+                                   const imp_audio_decode_opts_t* opts,
+                                   imp_decode_callback_t callback,
+                                   void* user_data);
+
+/**
+ * Query audio file metadata
+ * 
+ * Use before decode to determine file properties and allocate appropriately.
+ * 
+ * @param file_path Path to audio file
+ * @param sample_rate Output: sample rate in Hz (can be NULL)
+ * @param channels Output: number of channels (can be NULL)
+ * @param duration_sec Output: duration in seconds (can be NULL)
+ * @return Status code
+ */
+imp_status_t imp_audio_file_info(const char* file_path,
+                                 uint32_t* sample_rate,
+                                 uint32_t* channels,
+                                 double* duration_sec);
+
+//////////////////////////////////////////////////////////////////////////////
+// Audio Stream (high-level decode → encode pipeline)
+//////////////////////////////////////////////////////////////////////////////
+
+/**
+ * Audio stream options
+ */
+typedef struct {
+    uint32_t    sample_rate;            // Output sample rate (0 = 44100)
+    uint32_t    channels;               // Output channels (0 = 2)
+    const char* output_codec;           // "mp3","aac","flac","wav","opus" (NULL = "mp3")
+    uint32_t    output_bitrate_kbps;    // Bitrate for lossy codecs (0 = 192)
+    bool        expose_samples;         // Reserved for future use
+} imp_audio_stream_opts_t;
+
+/**
+ * Audio stream info (returned by imp_audio_get_info)
+ */
+typedef struct {
+    uint32_t sample_rate;
+    uint32_t channels;
+    double   duration_sec;
+    int64_t  num_samples;
+} imp_audio_info_t;
+
+/**
+ * Opaque audio stream handle (pointer type)
+ */
+typedef struct imp_audio_stream_s imp_audio_stream_t;
+
+/**
+ * Open audio stream for processing
+ * 
+ * Creates a decode → encode pipeline for file-based audio processing.
+ * 
+ * @param stream Output: stream handle pointer
+ * @param input_path Input audio file path
+ * @param output_path Output audio file path (NULL for decode-only)
+ * @param opts Stream options (NULL for defaults: 44100 Hz, stereo, MP3 192kbps)
+ * @return Status code
+ */
+imp_status_t imp_audio_open(imp_audio_stream_t** stream,
+                            const char* input_path,
+                            const char* output_path,
+                            const imp_audio_stream_opts_t* opts);
+
+/**
+ * Get audio stream info (duration, sample rate, channels)
+ * 
+ * @param stream Stream handle
+ * @param info Output: audio info structure
+ * @return Status code
+ */
+imp_status_t imp_audio_get_info(imp_audio_stream_t* stream,
+                                imp_audio_info_t* info);
+
+/**
+ * Process audio stream (run decode → encode pipeline to completion)
+ * 
+ * @param stream Stream handle
+ * @return Status code
+ */
+imp_status_t imp_audio_process(imp_audio_stream_t* stream);
+
+/**
+ * Get processing timing information
+ * 
+ * @param stream Stream handle
+ * @param wall_time_sec Output: wall-clock time in seconds
+ * @param realtime_factor Output: audio_duration / wall_time (>1 = faster than realtime)
+ * @return Status code
+ */
+imp_status_t imp_audio_get_timing(imp_audio_stream_t* stream,
+                                  double* wall_time_sec,
+                                  double* realtime_factor);
+
+/**
+ * Close audio stream and free resources
+ * 
+ * @param stream Stream handle
+ */
+void imp_audio_close(imp_audio_stream_t* stream);
+
+//////////////////////////////////////////////////////////////////////////////
+// Image Encode
+//////////////////////////////////////////////////////////////////////////////
+
+/**
+ * Encode tensor to image format
+ * 
+ * @param data Output: encoded data (caller must free with imp_free)
+ * @param size Output: data size
+ * @param tensor Input tensor
+ * @param ctx Context handle
+ * @param opts Encode options (NULL for defaults: JPEG quality 90)
+ * @param callback Async callback (NULL for synchronous)
+ * @param user_data User data for callback
+ * @return Status code
+ */
+imp_status_t imp_encode_image(void** data,
+                              size_t* size,
+                              imp_tensor_t* tensor,
+                              imp_context_t* ctx,
+                              const imp_image_encode_opts_t* opts,
+                              imp_encode_callback_t callback,
+                              void* user_data);
+
+/**
+ * Encode tensor to image file
+ * 
+ * @param file_path Output file path (format inferred from extension)
+ * @param tensor Input tensor
+ * @param ctx Context handle
+ * @param opts Encode options (NULL for defaults)
+ * @param callback Async callback (NULL for synchronous)
+ * @param user_data User data for callback
+ * @return Status code
+ */
+imp_status_t imp_encode_image_file(const char* file_path,
+                                   imp_tensor_t* tensor,
+                                   imp_context_t* ctx,
+                                   const imp_image_encode_opts_t* opts,
+                                   imp_encode_callback_t callback,
+                                   void* user_data);
+
+//////////////////////////////////////////////////////////////////////////////
+// Video Encode
+//////////////////////////////////////////////////////////////////////////////
+
+/**
+ * Opaque video encoder handle (pointer type)
+ */
+typedef struct imp_video_encoder_s imp_video_encoder_t;
+
+/**
+ * Create video encoder
+ * 
+ * @param encoder Output: encoder handle pointer
+ * @param width Frame width
+ * @param height Frame height
+ * @param ctx Context handle
+ * @param opts Encode options
+ * @return Status code
+ */
+imp_status_t imp_video_encoder_create(imp_video_encoder_t** encoder,
+                                      uint32_t width,
+                                      uint32_t height,
+                                      imp_context_t* ctx,
+                                      const imp_video_encode_opts_t* opts);
+
+/**
+ * Encode frame
+ * 
+ * @param encoder Encoder handle
+ * @param tensor Frame tensor
+ * @return Status code
+ */
+imp_status_t imp_video_encoder_write(imp_video_encoder_t* encoder,
+                                     imp_tensor_t* tensor);
+
+/**
+ * Finalize and close encoder
+ * 
+ * @param encoder Encoder handle
+ */
+void imp_video_encoder_close(imp_video_encoder_t* encoder);
+
+// TODO: One-shot video encode (batch encode entire video at once)
+// Encode 4D tensor [frames, H, W, C] to video
+// To memory (e.g. for network send):
+// imp_status_t imp_encode_video(void** data, size_t* size, imp_tensor_t* tensor, ...);
+// To file:
+// imp_status_t imp_encode_video_file(const char* file_path, imp_tensor_t* tensor, ...);
+
+//////////////////////////////////////////////////////////////////////////////
+// Audio Encode
+//////////////////////////////////////////////////////////////////////////////
+
+/**
+ * Audio encode options
+ */
+typedef struct {
+    const char* codec;                  // "mp3", "aac", "opus", "flac", "wav"
+    uint32_t bitrate_kbps;              // Target bitrate for lossy codecs (default: 192)
+    uint32_t sample_rate;               // Output sample rate (0 = match input)
+    uint32_t channels;                  // Output channels (0 = match input)
+    const char* output_path;            // Output file path
+} imp_audio_encode_opts_t;
+
+/**
+ * Opaque audio encoder handle (pointer type)
+ */
+typedef struct imp_audio_encoder_s imp_audio_encoder_t;
+
+/**
+ * Create audio encoder
+ * 
+ * @param encoder Output: encoder handle pointer
+ * @param opts Encode options
+ * @param callback Completion callback (NULL for synchronous close)
+ * @param user_data User data for callback
+ * @return Status code
+ */
+imp_status_t imp_audio_encoder_create(imp_audio_encoder_t** encoder,
+                                      const imp_audio_encode_opts_t* opts,
+                                      imp_encode_callback_t callback,
+                                      void* user_data);
+
+/**
+ * Write audio samples to encoder
+ * 
+ * @param encoder Encoder handle
+ * @param tensor Audio samples tensor (from imp_decode_audio_file or custom)
+ * @return Status code
+ */
+imp_status_t imp_audio_encoder_write(imp_audio_encoder_t* encoder,
+                                     imp_tensor_t* tensor);
+
+/**
+ * Finalize and close encoder
+ * 
+ * If callback was provided at create, it will be called when encoding completes.
+ * 
+ * @param encoder Encoder handle
+ */
+void imp_audio_encoder_close(imp_audio_encoder_t* encoder);
+
+/**
+ * One-shot audio encode to memory buffer.
+ *
+ * Encodes raw float PCM samples into the specified codec format and
+ * returns the encoded bytes.  For "wav" the library uses a built-in
+ * header writer (no GStreamer overhead).  For "pcm" the raw float
+ * bytes are returned as-is.  For lossy/lossless codecs ("mp3",
+ * "flac", "opus", "aac") a GStreamer pipeline is created internally.
+ *
+ * The caller must free the returned buffer with imp_free().
+ *
+ * @param data        Output: pointer to encoded data (heap-allocated)
+ * @param data_size   Output: size of encoded data in bytes
+ * @param samples     Input float PCM samples (mono, interleaved if stereo)
+ * @param num_samples Number of float values in @p samples
+ * @param opts        Encode options (codec, sample_rate, channels, bitrate).
+ *                    opts->output_path is ignored — output goes to memory.
+ * @return IMP_OK on success
+ */
+imp_status_t imp_encode_audio(void** data,
+                              size_t* data_size,
+                              const float* samples,
+                              size_t num_samples,
+                              const imp_audio_encode_opts_t* opts);
+
+/**
+ * One-shot audio encode to file.
+ *
+ * Same as imp_encode_audio() but writes directly to disk.
+ *
+ * @param file_path   Output file path
+ * @param samples     Input float PCM samples
+ * @param num_samples Number of float values
+ * @param opts        Encode options (codec, bitrate, sample_rate, channels).
+ *                    opts->output_path is overridden by @p file_path.
+ * @return IMP_OK on success
+ */
+imp_status_t imp_encode_audio_file(const char* file_path,
+                                   const float* samples,
+                                   size_t num_samples,
+                                   const imp_audio_encode_opts_t* opts);
+
+//////////////////////////////////////////////////////////////////////////////
+// Tensor Utilities
+//////////////////////////////////////////////////////////////////////////////
+
+/**
+ * Get tensor's device type
+ * 
+ * @param tensor Tensor handle
+ * @param device_type Output: device type (CPU, GPU, NPU)
+ * @return Status code
+ */
+imp_status_t imp_tensor_get_device_type(imp_tensor_t* tensor,
+                                         imp_device_type_t* device_type);
+
+/**
+ * Get tensor's device name (e.g., "GPU.0", "GPU.1", "CPU", "NPU")
+ * 
+ * @param tensor Tensor handle
+ * @param device_name Output: device name string
+ * @return Status code
+ */
+imp_status_t imp_tensor_get_device_name(imp_tensor_t* tensor,
+                                         const char** device_name);
+
+/**
+ * Get tensor's context type (for GPU/NPU tensors)
+ * Returns IMP_ERROR_INVALID_ARGUMENT for CPU tensors.
+ * 
+ * @param tensor Tensor handle
+ * @param context_type Output: context type (OpenCL, D3D11, VA-API)
+ * @return Status code
+ */
+imp_status_t imp_tensor_get_context_type(imp_tensor_t* tensor,
+                                          imp_context_type_t* context_type);
+
+/**
+ * Get underlying OpenVINO tensor pointer
+ * 
+ * @param tensor Tensor handle
+ * @param ov_tensor Output: pointer to underlying OV tensor (ov::Tensor* or ov::RemoteTensor*)
+ * @param device_type Output: device type (to know actual type for casting)
+ * @return Status code
+ */
+imp_status_t imp_tensor_get_ov(imp_tensor_t* tensor,
+                                void** ov_tensor,
+                                imp_device_type_t* device_type);
+
+/**
+ * Get tensor shape
+ * 
+ * @param tensor Tensor handle
+ * @param dims Output: dimension array (caller provides)
+ * @param num_dims Input: array size, Output: actual dimensions
+ * @return Status code
+ */
+imp_status_t imp_tensor_get_shape(imp_tensor_t* tensor,
+                                  int64_t* dims,
+                                  size_t* num_dims);
+
+/**
+ * Get tensor element type
+ * 
+ * @param tensor Tensor handle
+ * @param type Output: element type
+ * @return Status code
+ */
+imp_status_t imp_tensor_get_element_type(imp_tensor_t* tensor,
+                                          imp_element_type_t* type);
+
+/**
+ * Release tensor
+ * 
+ * @param tensor Tensor handle
+ */
+void imp_tensor_release(imp_tensor_t* tensor);
+
+//////////////////////////////////////////////////////////////////////////////
+// Memory Management
+//////////////////////////////////////////////////////////////////////////////
+
+/**
+ * Free memory allocated by IMP functions
+ * 
+ * @param ptr Pointer to free
+ */
+void imp_free(void* ptr);
+
+//////////////////////////////////////////////////////////////////////////////
+// Utility
+//////////////////////////////////////////////////////////////////////////////
+
+/**
+ * Get API version
+ * 
+ * @param major Output: major version
+ * @param minor Output: minor version  
+ * @param patch Output: patch version
+ */
+void imp_get_version(int* major, int* minor, int* patch);
+
+/**
+ * Check hardware decode support
+ * 
+ * @param ctx Context handle
+ * @param supported Output: true if HW decode available
+ * @return Status code
+ */
+imp_status_t imp_hw_decode_supported(imp_context_t* ctx, bool* supported);
+
+/**
+ * Check hardware encode support
+ * 
+ * @param ctx Context handle
+ * @param supported Output: true if HW encode available
+ * @return Status code
+ */
+imp_status_t imp_hw_encode_supported(imp_context_t* ctx, bool* supported);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // INTEL_MPI_H
diff --git a/third_party/gstreamer/BUILD b/third_party/gstreamer/BUILD
new file mode 100644
index 0000000000..ae706597d0
--- /dev/null
+++ b/third_party/gstreamer/BUILD
@@ -0,0 +1,2 @@
+# Empty BUILD file — makes this directory a Bazel package so that
+# gstreamer_windows.BUILD can be referenced as @//third_party/gstreamer:gstreamer_windows.BUILD
diff --git a/third_party/gstreamer/gstreamer_windows.BUILD b/third_party/gstreamer/gstreamer_windows.BUILD
new file mode 100644
index 0000000000..6b53b18cac
--- /dev/null
+++ b/third_party/gstreamer/gstreamer_windows.BUILD
@@ -0,0 +1,131 @@
+#
+# Copyright (c) 2025 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# GStreamer pre-built import for Windows (msvc_x86_64 installer layout)
+#
+# Expected install location: C:\Program Files\gstreamer\1.0\msvc_x86_64
+# The new_local_repository in WORKSPACE points "path" there;
+# this BUILD file maps headers and import libs for Bazel consumption.
+
+package(default_visibility = ["//visibility:public"])
+
+# ---------- headers ----------
+
+cc_library(
+    name = "gstreamer_headers",
+    hdrs = glob([
+        "include/gstreamer-1.0/**/*.h",
+        "lib/gstreamer-1.0/include/**/*.h",
+    ]),
+    includes = [
+        "include/gstreamer-1.0",
+        "lib/gstreamer-1.0/include",
+    ],
+    visibility = ["//visibility:public"],
+)
+
+cc_library(
+    name = "glib_headers",
+    hdrs = glob([
+        "include/glib-2.0/**/*.h",
+        "lib/glib-2.0/include/**/*.h",
+    ]),
+    includes = [
+        "include/glib-2.0",
+        "lib/glib-2.0/include",
+    ],
+    visibility = ["//visibility:public"],
+)
+
+# ---------- import libraries ----------
+
+cc_import(
+    name = "gstreamer_lib",
+    interface_library = "lib/gstreamer-1.0.lib",
+    system_provided = True,
+)
+
+cc_import(
+    name = "gstapp_lib",
+    interface_library = "lib/gstapp-1.0.lib",
+    system_provided = True,
+)
+
+cc_import(
+    name = "gstvideo_lib",
+    interface_library = "lib/gstvideo-1.0.lib",
+    system_provided = True,
+)
+
+cc_import(
+    name = "gstd3d11_lib",
+    interface_library = "lib/gstd3d11-1.0.lib",
+    system_provided = True,
+)
+
+cc_import(
+    name = "gstpbutils_lib",
+    interface_library = "lib/gstpbutils-1.0.lib",
+    system_provided = True,
+)
+
+cc_import(
+    name = "gstbase_lib",
+    interface_library = "lib/gstbase-1.0.lib",
+    system_provided = True,
+)
+
+cc_import(
+    name = "gstaudio_lib",
+    interface_library = "lib/gstaudio-1.0.lib",
+    system_provided = True,
+)
+
+cc_import(
+    name = "glib_lib",
+    interface_library = "lib/glib-2.0.lib",
+    system_provided = True,
+)
+
+cc_import(
+    name = "gobject_lib",
+    interface_library = "lib/gobject-2.0.lib",
+    system_provided = True,
+)
+
+# ---------- aggregate target ----------
+
+cc_library(
+    name = "gstreamer",
+    deps = [
+        ":gstreamer_headers",
+        ":glib_headers",
+        ":gstreamer_lib",
+        ":gstapp_lib",
+        ":gstvideo_lib",
+        ":gstd3d11_lib",
+        ":gstpbutils_lib",
+        ":gstbase_lib",
+        ":gstaudio_lib",
+        ":glib_lib",
+        ":gobject_lib",
+    ],
+    linkopts = [
+        "d3d11.lib",
+        "dxgi.lib",
+    ],
+    visibility = ["//visibility:public"],
+)
diff --git a/windows_build.bat b/windows_build.bat
index 4c3f100075..c8caa31cfd 100644
--- a/windows_build.bat
+++ b/windows_build.bat
@@ -41,23 +41,15 @@ IF "%~3"=="--with_tests" (
     set "buildTargets=//src:ovms"
 )
 
-IF "%~4"=="--integrity" (
-    echo Building model server with integrity checks
-    set "buildWithIntegrity=--config=win_integritycheck"
-) ELSE (
-    echo Building model server without integrity checks
-    set "buildWithIntegrity="
-)
-
 set "bazelStartupCmd=--output_user_root=!BAZEL_SHORT_PATH!"
 set "openvino_dir=!BAZEL_SHORT_PATH!/openvino/runtime/cmake"
 
-set "buildCommand=bazel %bazelStartupCmd% build  %buildWithIntegrity% %bazelBuildArgs% --action_env OpenVINO_DIR=%openvino_dir% --jobs=%NUMBER_OF_PROCESSORS% --verbose_failures %buildTargets% 2>&1 | tee win_build.log"
+set "buildCommand=bazel %bazelStartupCmd% build  %bazelBuildArgs% --action_env OpenVINO_DIR=%openvino_dir% --jobs=4 --verbose_failures %buildTargets% 2>&1 | tee win_build.log"
 set "setOvmsVersionCmd=python windows_set_ovms_version.py"
 
 :: Setting PATH environment variable based on default windows node settings: Added ovms_windows specific python settings and c:/opt and removed unused Nvidia and OCL specific tools.
 :: When changing the values here you can print the node default PATH value and base your changes on it.
-set "setPath=C:\opt;C:\opt\Python312\;C:\opt\Python312\Scripts\;C:\opt\msys64\usr\bin\;%PATH%;"
+set "setPath=C:\opt;C:\opt\Python312\;C:\opt\Python312\Scripts\;C:\opt\msys64\usr\bin\;C:\Program Files\gstreamer\1.0\msvc_x86_64\bin;%PATH%;"
 set "PYTHONHOME=C:\opt\Python312"
 set "envPath=win_environment.log"
 set "setPythonPath=%cd%\bazel-out\x64_windows-opt\bin\src\python\binding"
diff --git a/windows_test.bat b/windows_test.bat
index f00948a18e..ba789bce88 100644
--- a/windows_test.bat
+++ b/windows_test.bat
@@ -41,7 +41,7 @@ IF "%~3"=="" (
     set "gtestFilter=%3"
 )
 
-set "buildTestCommand=bazel %bazelStartupCmd% build %bazelBuildArgs% --jobs=%NUMBER_OF_PROCESSORS% --verbose_failures //src:ovms_test"
+set "buildTestCommand=bazel %bazelStartupCmd% build %bazelBuildArgs% --jobs=6 --verbose_failures //src:ovms_test"
 set "changeConfigsCmd=python windows_change_test_configs.py"
 set "runTest=%cd%\bazel-bin\src\ovms_test.exe --gtest_filter=!gtestFilter! > win_full_test.log 2>&1"
 

From 36d34070f175e10007d959a47f29881b38ad9a10 Mon Sep 17 00:00:00 2001
From: Tobiszewski <adrian.tobiszewski@intel.com>
Date: Thu, 11 Jun 2026 16:40:55 +0200
Subject: [PATCH 2/3] Initial commit

---
 src/mpi/intel_mpi.cpp | 787 +++++++++++++++++++++++++++++++++++++-----
 src/mpi/intel_mpi.h   |   4 +-
 2 files changed, 695 insertions(+), 96 deletions(-)

diff --git a/src/mpi/intel_mpi.cpp b/src/mpi/intel_mpi.cpp
index 5e4122331f..317fd068f8 100644
--- a/src/mpi/intel_mpi.cpp
+++ b/src/mpi/intel_mpi.cpp
@@ -140,7 +140,7 @@ imp_status_t imp_video_source_set(imp_video_source_t* source,
     std::string k(key), v(value);
     if (k == "path" || k == "url")  source->path = v;
     else if (k == "device")         source->device = v;
-    else if (k == "width")          source->width = std::stoi(v);
+    else if (k == "width")          source->width = std::stoi(v); // no error handling FIXME stoi
     else if (k == "height")         source->height = std::stoi(v);
     else if (k == "framerate")      source->framerate = std::stoi(v);
     else if (k == "format")         source->format = v;
@@ -295,6 +295,22 @@ imp_status_t imp_video_open(imp_video_stream_t** stream,
     std::string queue_str   = is_file ? queue_file : queue_cam;
     std::string sink_props  = is_file ? appsink_file : appsink_cam;
 
+    // Audit mode: force single-buffer, synchronous appsink (disable GStreamer
+    // pre-buffering to isolate the effect of pipeline parallelism).
+    if (is_file && opts && opts->sync_appsink) {
+        queue_str  = "queue max-size-buffers=1 max-size-time=0 max-size-bytes=0 leaky=no";
+        sink_props = "emit-signals=false sync=false max-buffers=1 drop=false";
+    }
+
+    // Audit mode: arbitrary bounded queue depth (overrides sync_appsink).
+    if (is_file && opts && opts->queue_depth > 0) {
+        uint32_t d = opts->queue_depth;
+        queue_str  = "queue max-size-buffers=" + std::to_string(d) +
+                     " max-size-time=0 max-size-bytes=0 leaky=no";
+        sink_props = "emit-signals=false sync=false max-buffers=" +
+                     std::to_string(d) + " drop=false";
+    }
+
     // Source + decode + NV12 convert
     if (is_file) {
         pipeline_str =
@@ -738,128 +754,459 @@ imp_status_t imp_hw_encode_supported(imp_context_t* ctx, bool* supported) {
 }
 
 //////////////////////////////////////////////////////////////////////////////
-// Image decode / encode — stubs
+// Image decode / encode
 //////////////////////////////////////////////////////////////////////////////
 
-imp_status_t imp_decode_image(imp_tensor_t** t, const void* d, size_t s,
-                              imp_context_t* c, const imp_image_decode_opts_t* o,
-                              imp_decode_callback_t cb, void* ud) {
-    (void)t;(void)d;(void)s;(void)c;(void)o;(void)cb;(void)ud;
-    return IMP_ERROR_INTERNAL;
+// Map imp_pixel_format_t → GStreamer video format string
+static const char* imp_format_to_gst(imp_pixel_format_t fmt) {
+    switch (fmt) {
+        case IMP_FORMAT_RGB:  return "RGB";
+        case IMP_FORMAT_BGR:  return "BGR";
+        case IMP_FORMAT_RGBA: return "RGBA";
+        case IMP_FORMAT_BGRA: return "BGRA";
+        case IMP_FORMAT_NV12: return "NV12";
+        case IMP_FORMAT_I420: return "I420";
+        case IMP_FORMAT_GRAY: return "GRAY8";
+        default:              return "RGB";
+    }
+}
+
+// Channels for a given pixel format (planar formats return 0)
+static int imp_format_channels(imp_pixel_format_t fmt) {
+    switch (fmt) {
+        case IMP_FORMAT_RGB:  return 3;
+        case IMP_FORMAT_BGR:  return 3;
+        case IMP_FORMAT_RGBA: return 4;
+        case IMP_FORMAT_BGRA: return 4;
+        case IMP_FORMAT_GRAY: return 1;
+        default:              return 0;
+    }
+}
+
+// Internal: pull decoded image sample from appsink, populate tensor
+static imp_status_t imp_image_decode_pull(GstElement* pipeline,
+                                          GstElement* appsink,
+                                          imp_tensor_t** out_tensor,
+                                          imp_pixel_format_t fmt,
+                                          imp_layout_t layout,
+                                          imp_element_type_t elem_type) {
+    GstStateChangeReturn ret = gst_element_set_state(pipeline, GST_STATE_PLAYING);
+    if (ret == GST_STATE_CHANGE_FAILURE) {
+        gst_element_set_state(pipeline, GST_STATE_NULL);
+        return IMP_ERROR_DECODE_FAILED;
+    }
+
+    GstSample* sample = gst_app_sink_try_pull_sample(GST_APP_SINK(appsink), 10 * GST_SECOND);
+    if (!sample) {
+        gst_element_set_state(pipeline, GST_STATE_NULL);
+        return IMP_ERROR_DECODE_FAILED;
+    }
+
+    GstBuffer* buffer = gst_sample_get_buffer(sample);
+    GstCaps*   caps   = gst_sample_get_caps(sample);
+
+    GstVideoInfo info;
+    gst_video_info_from_caps(&info, caps);
+
+    int w = info.width;
+    int h = info.height;
+
+    auto* tensor = new imp_tensor_s();
+    tensor->width  = w;
+    tensor->height = h;
+    tensor->format = fmt;
+    tensor->device_type = IMP_DEVICE_CPU;
+    tensor->valid  = false;
+
+    GstVideoFrame vframe;
+    if (!gst_video_frame_map(&vframe, &info, buffer, GST_MAP_READ)) {
+        delete tensor;
+        gst_sample_unref(sample);
+        gst_element_set_state(pipeline, GST_STATE_NULL);
+        return IMP_ERROR_DECODE_FAILED;
+    }
+
+    if (fmt == IMP_FORMAT_NV12) {
+        size_t y_size  = (size_t)w * h;
+        size_t uv_size = (size_t)w * (h / 2);
+        uint8_t* y_buf  = (uint8_t*)malloc(y_size);
+        uint8_t* uv_buf = (uint8_t*)malloc(uv_size);
+        if (!y_buf || !uv_buf) {
+            free(y_buf); free(uv_buf);
+            gst_video_frame_unmap(&vframe);
+            delete tensor;
+            gst_sample_unref(sample);
+            gst_element_set_state(pipeline, GST_STATE_NULL);
+            return IMP_ERROR_OUT_OF_MEMORY;
+        }
+
+        uint8_t* y_src  = (uint8_t*)GST_VIDEO_FRAME_PLANE_DATA(&vframe, 0);
+        int y_stride    = GST_VIDEO_FRAME_PLANE_STRIDE(&vframe, 0);
+        for (int row = 0; row < h; row++)
+            memcpy(y_buf + row * w, y_src + row * y_stride, w);
+
+        uint8_t* uv_src = (uint8_t*)GST_VIDEO_FRAME_PLANE_DATA(&vframe, 1);
+        int uv_stride   = GST_VIDEO_FRAME_PLANE_STRIDE(&vframe, 1);
+        for (int row = 0; row < h / 2; row++)
+            memcpy(uv_buf + row * w, uv_src + row * uv_stride, w);
+
+        tensor->y_data  = y_buf;
+        tensor->uv_data = uv_buf;
+        tensor->valid   = true;
+    } else {
+        int channels = imp_format_channels(fmt);
+        if (channels == 0) channels = 3;
+
+        uint8_t* src_data = (uint8_t*)GST_VIDEO_FRAME_PLANE_DATA(&vframe, 0);
+        int stride = GST_VIDEO_FRAME_PLANE_STRIDE(&vframe, 0);
+
+        if (elem_type == IMP_TYPE_FP32) {
+            size_t fp32_size = (size_t)w * h * channels;
+            std::vector<float> fp32_data(fp32_size);
+            for (int row = 0; row < h; row++) {
+                uint8_t* row_ptr = src_data + row * stride;
+                for (int col = 0; col < w * channels; col++)
+                    fp32_data[row * w * channels + col] = row_ptr[col] / 255.0f;
+            }
+
+            if (layout == IMP_LAYOUT_NCHW) {
+                ov::Shape shape = {1, (size_t)channels, (size_t)h, (size_t)w};
+                tensor->ov_tensor = ov::Tensor(ov::element::f32, shape);
+                float* dst = tensor->ov_tensor.data<float>();
+                for (int c = 0; c < channels; c++)
+                    for (int row = 0; row < h; row++)
+                        for (int col = 0; col < w; col++)
+                            dst[c * h * w + row * w + col] = fp32_data[row * w * channels + col + c];
+            } else {
+                ov::Shape shape = {1, (size_t)h, (size_t)w, (size_t)channels};
+                tensor->ov_tensor = ov::Tensor(ov::element::f32, shape);
+                memcpy(tensor->ov_tensor.data<float>(), fp32_data.data(), fp32_size * sizeof(float));
+            }
+        } else {
+            if (layout == IMP_LAYOUT_NCHW) {
+                ov::Shape shape = {1, (size_t)channels, (size_t)h, (size_t)w};
+                tensor->ov_tensor = ov::Tensor(ov::element::u8, shape);
+                uint8_t* dst = tensor->ov_tensor.data<uint8_t>();
+                for (int c = 0; c < channels; c++)
+                    for (int row = 0; row < h; row++) {
+                        uint8_t* row_ptr = src_data + row * stride;
+                        for (int col = 0; col < w; col++)
+                            dst[c * h * w + row * w + col] = row_ptr[col * channels + c];
+                    }
+            } else {
+                ov::Shape shape = {1, (size_t)h, (size_t)w, (size_t)channels};
+                tensor->ov_tensor = ov::Tensor(ov::element::u8, shape);
+                uint8_t* dst = tensor->ov_tensor.data<uint8_t>();
+                int row_bytes = w * channels;
+                for (int row = 0; row < h; row++)
+                    memcpy(dst + row * row_bytes, src_data + row * stride, row_bytes);
+            }
+        }
+        tensor->valid = true;
+    }
+
+    gst_video_frame_unmap(&vframe);
+    gst_sample_unref(sample);
+    gst_element_set_state(pipeline, GST_STATE_NULL);
+
+    *out_tensor = tensor;
+    return IMP_OK;
 }
 
 imp_status_t imp_decode_image_file(imp_tensor_t** t, const char* f,
                                    imp_context_t* c, const imp_image_decode_opts_t* o,
                                    imp_decode_callback_t cb, void* ud) {
-    (void)t;(void)f;(void)c;(void)o;(void)cb;(void)ud;
-    return IMP_ERROR_INTERNAL;
+    (void)cb; (void)ud;
+
+    if (!t || !f) return IMP_ERROR_INVALID_ARGUMENT;
+
+    gst_init(nullptr, nullptr);
+
+    imp_pixel_format_t fmt     = (o && o->output_format) ? o->output_format : IMP_FORMAT_RGB;
+    imp_layout_t       layout  = (o) ? o->output_layout : IMP_LAYOUT_NHWC;
+    imp_element_type_t etype   = (o) ? o->output_type   : IMP_TYPE_U8;
+    uint32_t rw = (o) ? o->resize_width  : 0;
+    uint32_t rh = (o) ? o->resize_height : 0;
+
+    const char* gst_fmt = imp_format_to_gst(fmt);
+
+    // Normalize path (backslashes break gst_parse_launch)
+    std::string path(f);
+    for (auto& ch : path) if (ch == '\\') ch = '/';
+
+    std::string pipe = "filesrc location=\"" + path + "\" ! decodebin ! videoconvert ! "
+                       "video/x-raw,format=" + gst_fmt;
+    if (rw > 0 && rh > 0) {
+        pipe += " ! videoscale ! video/x-raw,width=" + std::to_string(rw) +
+                ",height=" + std::to_string(rh);
+    }
+    pipe += " ! appsink name=sink sync=false emit-signals=false";
+
+    GError* error = nullptr;
+    GstElement* pipeline = gst_parse_launch(pipe.c_str(), &error);
+    if (error) {
+        if (c) c->last_error = error->message;
+        g_error_free(error);
+        if (pipeline) gst_object_unref(pipeline);
+        return IMP_ERROR_DECODE_FAILED;
+    }
+
+    GstElement* appsink = gst_bin_get_by_name(GST_BIN(pipeline), "sink");
+    if (!appsink) {
+        gst_object_unref(pipeline);
+        return IMP_ERROR_DECODE_FAILED;
+    }
+
+    imp_status_t status = imp_image_decode_pull(pipeline, appsink, t, fmt, layout, etype);
+
+    gst_object_unref(appsink);
+    gst_object_unref(pipeline);
+    return status;
 }
 
-imp_status_t imp_decode_audio(imp_tensor_t** tensor, const void* data, size_t size,
-                              imp_context_t* ctx, const imp_audio_decode_opts_t* opts,
-                              imp_decode_callback_t callback, void* user_data) {
-    (void)callback; (void)user_data;  // async not yet implemented
-    if (!tensor || !data || size == 0) return IMP_ERROR_INVALID_ARGUMENT;
+imp_status_t imp_decode_image(imp_tensor_t** t, const void* d, size_t s,
+                              imp_context_t* c, const imp_image_decode_opts_t* o,
+                              imp_decode_callback_t cb, void* ud) {
+    (void)cb; (void)ud;
+
+    if (!t || !d || s == 0) return IMP_ERROR_INVALID_ARGUMENT;
 
     gst_init(nullptr, nullptr);
 
-    // Resolve options
-    uint32_t target_rate = (opts && opts->sample_rate > 0) ? opts->sample_rate : 16000;
-    uint32_t target_channels = (opts && opts->channels > 0) ? opts->channels : 1;
-    bool normalize = opts ? opts->normalize : true;
-    (void)normalize;  // GStreamer F32LE is already [-1,1]
+    imp_pixel_format_t fmt     = (o && o->output_format) ? o->output_format : IMP_FORMAT_RGB;
+    imp_layout_t       layout  = (o) ? o->output_layout : IMP_LAYOUT_NHWC;
+    imp_element_type_t etype   = (o) ? o->output_type   : IMP_TYPE_U8;
+    uint32_t rw = (o) ? o->resize_width  : 0;
+    uint32_t rh = (o) ? o->resize_height : 0;
 
-    // Build pipeline: appsrc → decodebin → audioconvert → audioresample → caps → appsink
-    std::string capsStr =
-        "audio/x-raw,format=F32LE,channels=" + std::to_string(target_channels) +
-        ",rate=" + std::to_string(target_rate);
+    const char* gst_fmt = imp_format_to_gst(fmt);
 
-    std::string pipelineStr =
-        "appsrc name=src ! decodebin ! audioconvert ! audioresample ! "
-        + capsStr + " ! appsink name=sink sync=false";
+    std::string pipe = "appsrc name=src ! decodebin ! videoconvert ! "
+                       "video/x-raw,format=" + std::string(gst_fmt);
+    if (rw > 0 && rh > 0) {
+        pipe += " ! videoscale ! video/x-raw,width=" + std::to_string(rw) +
+                ",height=" + std::to_string(rh);
+    }
+    pipe += " ! appsink name=sink sync=false emit-signals=false";
 
     GError* error = nullptr;
-    GstElement* pipeline = gst_parse_launch(pipelineStr.c_str(), &error);
-    if (error || !pipeline) {
-        if (ctx) ctx->last_error = error ? error->message : "audio pipeline creation failed";
-        if (error) g_error_free(error);
+    GstElement* pipeline = gst_parse_launch(pipe.c_str(), &error);
+    if (error) {
+        if (c) c->last_error = error->message;
+        g_error_free(error);
+        if (pipeline) gst_object_unref(pipeline);
         return IMP_ERROR_DECODE_FAILED;
     }
 
-    GstElement* appsrc = gst_bin_get_by_name(GST_BIN(pipeline), "src");
+    GstElement* appsrc  = gst_bin_get_by_name(GST_BIN(pipeline), "src");
     GstElement* appsink = gst_bin_get_by_name(GST_BIN(pipeline), "sink");
     if (!appsrc || !appsink) {
         if (appsrc) gst_object_unref(appsrc);
         if (appsink) gst_object_unref(appsink);
         gst_object_unref(pipeline);
-        return IMP_ERROR_INTERNAL;
+        return IMP_ERROR_DECODE_FAILED;
     }
 
-    // Allocate GStreamer buffer and copy input data
-    GstBuffer* buf = gst_buffer_new_allocate(nullptr, size, nullptr);
+    gst_element_set_state(pipeline, GST_STATE_PLAYING);
+
+    GstBuffer* buf = gst_buffer_new_allocate(nullptr, s, nullptr);
     GstMapInfo map;
     if (gst_buffer_map(buf, &map, GST_MAP_WRITE)) {
-        memcpy(map.data, data, size);
+        memcpy(map.data, d, s);
         gst_buffer_unmap(buf, &map);
     }
+    gst_app_src_push_buffer(GST_APP_SRC(appsrc), buf);
+    gst_app_src_end_of_stream(GST_APP_SRC(appsrc));
+
+    GstSample* sample = gst_app_sink_try_pull_sample(GST_APP_SINK(appsink), 10 * GST_SECOND);
+    imp_status_t status;
+    if (!sample) {
+        status = IMP_ERROR_DECODE_FAILED;
+    } else {
+        GstBuffer* sbuf = gst_sample_get_buffer(sample);
+        GstCaps*   caps = gst_sample_get_caps(sample);
+        GstVideoInfo info;
+        gst_video_info_from_caps(&info, caps);
+
+        int w = info.width;
+        int h = info.height;
+        auto* tensor = new imp_tensor_s();
+        tensor->width  = w;
+        tensor->height = h;
+        tensor->format = fmt;
+        tensor->device_type = IMP_DEVICE_CPU;
+        tensor->valid  = false;
+
+        GstVideoFrame vframe;
+        if (gst_video_frame_map(&vframe, &info, sbuf, GST_MAP_READ)) {
+            if (fmt == IMP_FORMAT_NV12) {
+                size_t y_size  = (size_t)w * h;
+                size_t uv_size = (size_t)w * (h / 2);
+                uint8_t* y_buf  = (uint8_t*)malloc(y_size);
+                uint8_t* uv_buf = (uint8_t*)malloc(uv_size);
+                if (y_buf && uv_buf) {
+                    uint8_t* y_src = (uint8_t*)GST_VIDEO_FRAME_PLANE_DATA(&vframe, 0);
+                    int y_stride   = GST_VIDEO_FRAME_PLANE_STRIDE(&vframe, 0);
+                    for (int row = 0; row < h; row++)
+                        memcpy(y_buf + row * w, y_src + row * y_stride, w);
+                    uint8_t* uv_src = (uint8_t*)GST_VIDEO_FRAME_PLANE_DATA(&vframe, 1);
+                    int uv_stride   = GST_VIDEO_FRAME_PLANE_STRIDE(&vframe, 1);
+                    for (int row = 0; row < h / 2; row++)
+                        memcpy(uv_buf + row * w, uv_src + row * uv_stride, w);
+                    tensor->y_data  = y_buf;
+                    tensor->uv_data = uv_buf;
+                    tensor->valid   = true;
+                } else {
+                    free(y_buf); free(uv_buf);
+                }
+            } else {
+                int channels = imp_format_channels(fmt);
+                if (channels == 0) channels = 3;
+                uint8_t* src_data = (uint8_t*)GST_VIDEO_FRAME_PLANE_DATA(&vframe, 0);
+                int stride = GST_VIDEO_FRAME_PLANE_STRIDE(&vframe, 0);
+
+                if (etype == IMP_TYPE_FP32) {
+                    size_t fp32_count = (size_t)w * h * channels;
+                    std::vector<float> fp32_data(fp32_count);
+                    for (int row = 0; row < h; row++) {
+                        uint8_t* rp = src_data + row * stride;
+                        for (int col = 0; col < w * channels; col++)
+                            fp32_data[row * w * channels + col] = rp[col] / 255.0f;
+                    }
+                    if (layout == IMP_LAYOUT_NCHW) {
+                        ov::Shape shape = {1, (size_t)channels, (size_t)h, (size_t)w};
+                        tensor->ov_tensor = ov::Tensor(ov::element::f32, shape);
+                        float* dst = tensor->ov_tensor.data<float>();
+                        for (int ch = 0; ch < channels; ch++)
+                            for (int row = 0; row < h; row++)
+                                for (int col = 0; col < w; col++)
+                                    dst[ch * h * w + row * w + col] = fp32_data[row * w * channels + col + ch];
+                    } else {
+                        ov::Shape shape = {1, (size_t)h, (size_t)w, (size_t)channels};
+                        tensor->ov_tensor = ov::Tensor(ov::element::f32, shape);
+                        memcpy(tensor->ov_tensor.data<float>(), fp32_data.data(), fp32_count * sizeof(float));
+                    }
+                } else {
+                    if (layout == IMP_LAYOUT_NCHW) {
+                        ov::Shape shape = {1, (size_t)channels, (size_t)h, (size_t)w};
+                        tensor->ov_tensor = ov::Tensor(ov::element::u8, shape);
+                        uint8_t* dst = tensor->ov_tensor.data<uint8_t>();
+                        for (int ch = 0; ch < channels; ch++)
+                            for (int row = 0; row < h; row++) {
+                                uint8_t* rp = src_data + row * stride;
+                                for (int col = 0; col < w; col++)
+                                    dst[ch * h * w + row * w + col] = rp[col * channels + ch];
+                            }
+                    } else {
+                        ov::Shape shape = {1, (size_t)h, (size_t)w, (size_t)channels};
+                        tensor->ov_tensor = ov::Tensor(ov::element::u8, shape);
+                        uint8_t* dst = tensor->ov_tensor.data<uint8_t>();
+                        int row_bytes = w * channels;
+                        for (int row = 0; row < h; row++)
+                            memcpy(dst + row * row_bytes, src_data + row * stride, row_bytes);
+                    }
+                }
+                tensor->valid = true;
+            }
+            gst_video_frame_unmap(&vframe);
+        }
+
+        gst_sample_unref(sample);
+        if (tensor->valid) {
+            *t = tensor;
+            status = IMP_OK;
+        } else {
+            delete tensor;
+            status = IMP_ERROR_DECODE_FAILED;
+        }
+    }
+
+    gst_element_set_state(pipeline, GST_STATE_NULL);
+    gst_object_unref(appsrc);
+    gst_object_unref(appsink);
+    gst_object_unref(pipeline);
+    return status;
+}
+
+imp_status_t imp_decode_audio(imp_tensor_t** t, const void* d, size_t s,
+                              imp_context_t* c, const imp_audio_decode_opts_t* o,
+                              imp_decode_callback_t cb, void* ud) {
+    (void)c; (void)cb; (void)ud;
+    if (!t || !d || s == 0 || !o) return IMP_ERROR_INVALID_ARGUMENT;
+
+    gst_init(nullptr, nullptr);
+
+    uint32_t sampleRate = o->sample_rate > 0 ? o->sample_rate : 16000;
+    uint32_t channels   = o->channels   > 0 ? o->channels   : 1;
+
+    std::string pipeStr =
+        "appsrc name=src ! decodebin ! audioconvert ! audioresample ! "
+        "audio/x-raw,format=F32LE,channels=" + std::to_string(channels) +
+        ",rate=" + std::to_string(sampleRate) +
+        " ! appsink name=sink";
+
+    GError* error = nullptr;
+    GstElement* pipeline = gst_parse_launch(pipeStr.c_str(), &error);
+    if (error || !pipeline) {
+        if (error) g_error_free(error);
+        if (pipeline) gst_object_unref(pipeline);
+        return IMP_ERROR_DECODE_FAILED;
+    }
 
+    GstElement* appsrc  = gst_bin_get_by_name(GST_BIN(pipeline), "src");
+    GstElement* appsink = gst_bin_get_by_name(GST_BIN(pipeline), "sink");
+    if (!appsrc || !appsink) {
+        if (appsrc)  gst_object_unref(appsrc);
+        if (appsink) gst_object_unref(appsink);
+        gst_object_unref(pipeline);
+        return IMP_ERROR_INTERNAL;
+    }
+
+    g_object_set(appsrc, "is-live", FALSE, nullptr);
     gst_element_set_state(pipeline, GST_STATE_PLAYING);
 
-    // Push buffer + EOS
-    gst_app_src_push_buffer(GST_APP_SRC(appsrc), buf);  // takes ownership of buf
+    // Push input buffer
+    GstBuffer* buffer = gst_buffer_new_allocate(nullptr, s, nullptr);
+    GstMapInfo map;
+    gst_buffer_map(buffer, &map, GST_MAP_WRITE);
+    memcpy(map.data, d, s);
+    gst_buffer_unmap(buffer, &map);
+    gst_app_src_push_buffer(GST_APP_SRC(appsrc), buffer);
     gst_app_src_end_of_stream(GST_APP_SRC(appsrc));
 
-    // Pull all decoded float samples
+    // Pull decoded samples
     std::vector<float> samples;
-    while (true) {
-        GstSample* sample = gst_app_sink_try_pull_sample(GST_APP_SINK(appsink), 5 * GST_SECOND);
-        if (!sample) break;
-
+    for (;;) {
+        GstSample* sample = gst_app_sink_try_pull_sample(GST_APP_SINK(appsink), 10 * GST_SECOND);
+        if (!sample) {
+            if (gst_app_sink_is_eos(GST_APP_SINK(appsink))) break;
+            break;
+        }
         GstBuffer* outBuf = gst_sample_get_buffer(sample);
         GstMapInfo outMap;
         if (gst_buffer_map(outBuf, &outMap, GST_MAP_READ)) {
-            size_t numFloats = outMap.size / sizeof(float);
-            const float* fdata = reinterpret_cast<const float*>(outMap.data);
-            samples.insert(samples.end(), fdata, fdata + numFloats);
+            size_t nFloats = outMap.size / sizeof(float);
+            const float* fptr = reinterpret_cast<const float*>(outMap.data);
+            samples.insert(samples.end(), fptr, fptr + nFloats);
             gst_buffer_unmap(outBuf, &outMap);
         }
         gst_sample_unref(sample);
     }
 
-    // Check for pipeline errors
-    imp_status_t status = IMP_OK;
-    GstBus* bus = gst_element_get_bus(pipeline);
-    GstMessage* msg = gst_bus_pop_filtered(bus,
-        static_cast<GstMessageType>(GST_MESSAGE_ERROR));
-    if (msg) {
-        GError* err = nullptr;
-        gst_message_parse_error(msg, &err, nullptr);
-        if (ctx && err) ctx->last_error = err->message;
-        if (err) g_error_free(err);
-        gst_message_unref(msg);
-        status = IMP_ERROR_DECODE_FAILED;
-    }
-    gst_object_unref(bus);
-
     gst_element_set_state(pipeline, GST_STATE_NULL);
     gst_object_unref(appsrc);
     gst_object_unref(appsink);
     gst_object_unref(pipeline);
 
-    if (status != IMP_OK || samples.empty()) {
-        return status != IMP_OK ? status : IMP_ERROR_DECODE_FAILED;
-    }
-
-    // Wrap samples in an imp_tensor_t backed by ov::Tensor
-    auto* t = new imp_tensor_s();
-    t->ov_tensor = ov::Tensor(ov::element::f32, {1, samples.size()});
-    std::memcpy(t->ov_tensor.data<float>(), samples.data(), samples.size() * sizeof(float));
-    t->device_type = IMP_DEVICE_CPU;
-    t->device_name = "CPU";
-    t->format = IMP_FORMAT_GRAY;  // 1-D audio — not a pixel format, but marks non-NV12
-    t->valid = true;
+    if (samples.empty()) return IMP_ERROR_DECODE_FAILED;
 
-    *tensor = t;
+    // Wrap in ov::Tensor and return as imp_tensor_t
+    auto* tensor = new imp_tensor_t();
+    tensor->ov_tensor = ov::Tensor(ov::element::f32, {1, samples.size()});
+    memcpy(tensor->ov_tensor.data<float>(), samples.data(), samples.size() * sizeof(float));
+    tensor->device_type = IMP_DEVICE_CPU;
+    tensor->valid = true;
+    *t = tensor;
     return IMP_OK;
 }
 
@@ -1147,18 +1494,282 @@ void imp_audio_close(imp_audio_stream_t* stream) {
     delete stream;
 }
 
+// Internal: prepare raw pixel buffer from tensor for GStreamer encode.
+// Uses GstVideoInfo to compute correct stride-aligned buffer size.
+static GstBuffer* imp_image_prepare_encode_buffer(imp_tensor_t* t,
+                                                   imp_pixel_format_t src_fmt,
+                                                   int w, int h, int channels,
+                                                   std::string& caps_str) {
+    const char* gst_fmt = imp_format_to_gst(src_fmt);
+
+    GstVideoFormat vfmt = gst_video_format_from_string(gst_fmt);
+    GstVideoInfo vinfo;
+    gst_video_info_set_format(&vinfo, vfmt, w, h);
+    size_t data_size = vinfo.size;
+
+    GstBuffer* buffer = gst_buffer_new_allocate(nullptr, data_size, nullptr);
+    if (!buffer) return nullptr;
+
+    GstVideoFrame vframe;
+    if (!gst_video_frame_map(&vframe, &vinfo, buffer, GST_MAP_WRITE)) {
+        gst_buffer_unref(buffer);
+        return nullptr;
+    }
+
+    if (src_fmt == IMP_FORMAT_NV12) {
+        uint8_t* dst_y = (uint8_t*)GST_VIDEO_FRAME_PLANE_DATA(&vframe, 0);
+        int dst_y_stride = GST_VIDEO_FRAME_PLANE_STRIDE(&vframe, 0);
+        for (int row = 0; row < h; row++)
+            memcpy(dst_y + row * dst_y_stride, t->y_data + row * w, w);
+        uint8_t* dst_uv = (uint8_t*)GST_VIDEO_FRAME_PLANE_DATA(&vframe, 1);
+        int dst_uv_stride = GST_VIDEO_FRAME_PLANE_STRIDE(&vframe, 1);
+        for (int row = 0; row < h / 2; row++)
+            memcpy(dst_uv + row * dst_uv_stride, t->uv_data + row * w, w);
+    } else if (t->ov_tensor) {
+        uint8_t* dst = (uint8_t*)GST_VIDEO_FRAME_PLANE_DATA(&vframe, 0);
+        int dst_stride = GST_VIDEO_FRAME_PLANE_STRIDE(&vframe, 0);
+        int src_row_bytes = w * channels;
+
+        auto shape = t->ov_tensor.get_shape();
+        bool is_nchw = (shape.size() == 4 && shape[1] == (size_t)channels &&
+                        shape[2] == (size_t)h && shape[3] == (size_t)w);
+        bool is_fp32 = (t->ov_tensor.get_element_type() == ov::element::f32);
+
+        if (is_nchw) {
+            if (is_fp32) {
+                float* src = t->ov_tensor.data<float>();
+                for (int row = 0; row < h; row++)
+                    for (int col = 0; col < w; col++)
+                        for (int ch = 0; ch < channels; ch++)
+                            dst[row * dst_stride + col * channels + ch] =
+                                (uint8_t)(std::min(std::max(src[ch * h * w + row * w + col] * 255.0f, 0.0f), 255.0f));
+            } else {
+                uint8_t* src = t->ov_tensor.data<uint8_t>();
+                for (int row = 0; row < h; row++)
+                    for (int col = 0; col < w; col++)
+                        for (int ch = 0; ch < channels; ch++)
+                            dst[row * dst_stride + col * channels + ch] =
+                                src[ch * h * w + row * w + col];
+            }
+        } else {
+            if (is_fp32) {
+                float* src = t->ov_tensor.data<float>();
+                for (int row = 0; row < h; row++)
+                    for (int col = 0; col < src_row_bytes; col++)
+                        dst[row * dst_stride + col] =
+                            (uint8_t)(std::min(std::max(src[row * src_row_bytes + col] * 255.0f, 0.0f), 255.0f));
+            } else {
+                uint8_t* src = t->ov_tensor.data<uint8_t>();
+                for (int row = 0; row < h; row++)
+                    memcpy(dst + row * dst_stride, src + row * src_row_bytes, src_row_bytes);
+            }
+        }
+    }
+
+    gst_video_frame_unmap(&vframe);
+
+    caps_str = "video/x-raw,format=" + std::string(gst_fmt) +
+               ",width=" + std::to_string(w) +
+               ",height=" + std::to_string(h) +
+               ",framerate=1/1";
+
+    return buffer;
+}
+
 imp_status_t imp_encode_image(void** d, size_t* s, imp_tensor_t* t,
                               imp_context_t* c, const imp_image_encode_opts_t* o,
                               imp_encode_callback_t cb, void* ud) {
-    (void)d;(void)s;(void)t;(void)c;(void)o;(void)cb;(void)ud;
-    return IMP_ERROR_INTERNAL;
+    (void)cb; (void)ud;
+
+    if (!d || !s || !t || !t->valid) return IMP_ERROR_INVALID_ARGUMENT;
+
+    gst_init(nullptr, nullptr);
+
+    std::string format_str = (o && o->format) ? o->format : "jpeg";
+    std::string encoder_elem;
+    if (format_str == "jpeg" || format_str == "jpg")  encoder_elem = "jpegenc";
+    else if (format_str == "png")                      encoder_elem = "pngenc";
+    else return IMP_ERROR_UNSUPPORTED_FORMAT;
+
+    int quality = (o && o->quality > 0) ? o->quality : 90;
+
+    int w = t->width, h = t->height;
+    imp_pixel_format_t src_fmt = t->format;
+    int channels = imp_format_channels(src_fmt);
+    if (channels == 0 && src_fmt != IMP_FORMAT_NV12) channels = 3;
+
+    if ((w == 0 || h == 0) && t->ov_tensor) {
+        auto shape = t->ov_tensor.get_shape();
+        if (shape.size() == 4) {
+            if (shape[1] <= 4) { h = (int)shape[2]; w = (int)shape[3]; channels = (int)shape[1]; }
+            else { h = (int)shape[1]; w = (int)shape[2]; channels = (int)shape[3]; }
+        }
+    }
+    if (w == 0 || h == 0) return IMP_ERROR_INVALID_ARGUMENT;
+
+    std::string caps_str;
+    GstBuffer* buffer = imp_image_prepare_encode_buffer(t, src_fmt, w, h, channels, caps_str);
+    if (!buffer) return IMP_ERROR_OUT_OF_MEMORY;
+
+    GstElement* pipeline   = gst_pipeline_new("img-enc");
+    GstElement* appsrc     = gst_element_factory_make("appsrc", "src");
+    GstElement* convert    = gst_element_factory_make("videoconvert", "conv");
+    GstElement* encoder    = gst_element_factory_make(encoder_elem.c_str(), "enc");
+    GstElement* appsink    = gst_element_factory_make("appsink", "sink");
+
+    if (!pipeline || !appsrc || !convert || !encoder || !appsink) {
+        gst_buffer_unref(buffer);
+        if (pipeline) gst_object_unref(pipeline);
+        return IMP_ERROR_ENCODE_FAILED;
+    }
+
+    GstCaps* src_caps = gst_caps_from_string(caps_str.c_str());
+    g_object_set(appsrc, "caps", src_caps, "format", GST_FORMAT_TIME,
+                 "stream-type", 0, "is-live", FALSE, nullptr);
+    gst_caps_unref(src_caps);
+
+    if (encoder_elem == "jpegenc")
+        g_object_set(encoder, "quality", quality, nullptr);
+    g_object_set(appsink, "sync", FALSE, "emit-signals", FALSE, nullptr);
+
+    gst_bin_add_many(GST_BIN(pipeline), appsrc, convert, encoder, appsink, nullptr);
+    gst_element_link_many(appsrc, convert, encoder, appsink, nullptr);
+
+    GST_BUFFER_PTS(buffer) = 0;
+    GST_BUFFER_DURATION(buffer) = GST_CLOCK_TIME_NONE;
+
+    gst_element_set_state(pipeline, GST_STATE_PLAYING);
+    gst_app_src_push_buffer(GST_APP_SRC(appsrc), buffer);
+    gst_app_src_end_of_stream(GST_APP_SRC(appsrc));
+
+    GstSample* sample = gst_app_sink_try_pull_sample(GST_APP_SINK(appsink), 10 * GST_SECOND);
+    imp_status_t status;
+    if (!sample) {
+        status = IMP_ERROR_ENCODE_FAILED;
+    } else {
+        GstBuffer* enc_buf = gst_sample_get_buffer(sample);
+        GstMapInfo map;
+        if (gst_buffer_map(enc_buf, &map, GST_MAP_READ)) {
+            void* out = malloc(map.size);
+            if (out) {
+                memcpy(out, map.data, map.size);
+                *d = out;
+                *s = map.size;
+                status = IMP_OK;
+            } else {
+                status = IMP_ERROR_OUT_OF_MEMORY;
+            }
+            gst_buffer_unmap(enc_buf, &map);
+        } else {
+            status = IMP_ERROR_ENCODE_FAILED;
+        }
+        gst_sample_unref(sample);
+    }
+
+    gst_element_set_state(pipeline, GST_STATE_NULL);
+    gst_object_unref(pipeline);
+    return status;
 }
 
 imp_status_t imp_encode_image_file(const char* f, imp_tensor_t* t,
                                    imp_context_t* c, const imp_image_encode_opts_t* o,
                                    imp_encode_callback_t cb, void* ud) {
-    (void)f;(void)t;(void)c;(void)o;(void)cb;(void)ud;
-    return IMP_ERROR_INTERNAL;
+    (void)cb; (void)ud;
+
+    if (!f || !t || !t->valid) return IMP_ERROR_INVALID_ARGUMENT;
+
+    gst_init(nullptr, nullptr);
+
+    std::string format_str;
+    if (o && o->format) {
+        format_str = o->format;
+    } else {
+        std::string path(f);
+        auto dot = path.rfind('.');
+        if (dot != std::string::npos) {
+            std::string ext = path.substr(dot + 1);
+            std::transform(ext.begin(), ext.end(), ext.begin(), ::tolower);
+            if (ext == "jpg" || ext == "jpeg") format_str = "jpeg";
+            else if (ext == "png")             format_str = "png";
+        }
+    }
+    if (format_str.empty()) format_str = "jpeg";
+
+    std::string encoder_elem;
+    if (format_str == "jpeg" || format_str == "jpg")  encoder_elem = "jpegenc";
+    else if (format_str == "png")                      encoder_elem = "pngenc";
+    else return IMP_ERROR_UNSUPPORTED_FORMAT;
+
+    int quality = (o && o->quality > 0) ? o->quality : 90;
+
+    int w = t->width, h = t->height;
+    imp_pixel_format_t src_fmt = t->format;
+    int channels = imp_format_channels(src_fmt);
+    if (channels == 0 && src_fmt != IMP_FORMAT_NV12) channels = 3;
+
+    if ((w == 0 || h == 0) && t->ov_tensor) {
+        auto shape = t->ov_tensor.get_shape();
+        if (shape.size() == 4) {
+            if (shape[1] <= 4) { h = (int)shape[2]; w = (int)shape[3]; channels = (int)shape[1]; }
+            else { h = (int)shape[1]; w = (int)shape[2]; channels = (int)shape[3]; }
+        }
+    }
+    if (w == 0 || h == 0) return IMP_ERROR_INVALID_ARGUMENT;
+
+    std::string caps_str;
+    GstBuffer* buffer = imp_image_prepare_encode_buffer(t, src_fmt, w, h, channels, caps_str);
+    if (!buffer) return IMP_ERROR_OUT_OF_MEMORY;
+
+    std::string out_path(f);
+    for (auto& ch : out_path) if (ch == '\\') ch = '/';
+
+    GstElement* pipeline   = gst_pipeline_new("img-enc-file");
+    GstElement* appsrc     = gst_element_factory_make("appsrc", "src");
+    GstElement* convert    = gst_element_factory_make("videoconvert", "conv");
+    GstElement* encoder    = gst_element_factory_make(encoder_elem.c_str(), "enc");
+    GstElement* filesink   = gst_element_factory_make("filesink", "sink");
+
+    if (!pipeline || !appsrc || !convert || !encoder || !filesink) {
+        gst_buffer_unref(buffer);
+        if (pipeline) gst_object_unref(pipeline);
+        return IMP_ERROR_ENCODE_FAILED;
+    }
+
+    GstCaps* src_caps = gst_caps_from_string(caps_str.c_str());
+    g_object_set(appsrc, "caps", src_caps, "format", GST_FORMAT_TIME,
+                 "stream-type", 0, "is-live", FALSE, nullptr);
+    gst_caps_unref(src_caps);
+
+    if (encoder_elem == "jpegenc")
+        g_object_set(encoder, "quality", quality, nullptr);
+    g_object_set(filesink, "location", out_path.c_str(), nullptr);
+
+    gst_bin_add_many(GST_BIN(pipeline), appsrc, convert, encoder, filesink, nullptr);
+    gst_element_link_many(appsrc, convert, encoder, filesink, nullptr);
+
+    GST_BUFFER_PTS(buffer) = 0;
+    GST_BUFFER_DURATION(buffer) = GST_CLOCK_TIME_NONE;
+
+    gst_element_set_state(pipeline, GST_STATE_PLAYING);
+    gst_app_src_push_buffer(GST_APP_SRC(appsrc), buffer);
+    gst_app_src_end_of_stream(GST_APP_SRC(appsrc));
+
+    GstBus* bus = gst_element_get_bus(pipeline);
+    imp_status_t status = IMP_ERROR_ENCODE_FAILED;
+    if (bus) {
+        GstMessage* msg = gst_bus_timed_pop_filtered(bus, 10 * GST_SECOND,
+            (GstMessageType)(GST_MESSAGE_EOS | GST_MESSAGE_ERROR));
+        if (msg) {
+            if (GST_MESSAGE_TYPE(msg) == GST_MESSAGE_EOS)
+                status = IMP_OK;
+            gst_message_unref(msg);
+        }
+        gst_object_unref(bus);
+    }
+
+    gst_element_set_state(pipeline, GST_STATE_NULL);
+    gst_object_unref(pipeline);
+    return status;
 }
 
 //////////////////////////////////////////////////////////////////////////////
@@ -1336,7 +1947,6 @@ void imp_audio_encoder_close(imp_audio_encoder_t* encoder) {
 // One-shot audio encode (to memory)
 // ---------------------------------------------------------------------------
 
-// Internal: write a WAV file into a malloc'd buffer (no GStreamer)
 static imp_status_t imp_encode_audio_wav(void** data, size_t* data_size,
                                          const float* samples, size_t num_samples,
                                          uint32_t sampleRate, uint32_t channels) {
@@ -1381,13 +1991,10 @@ imp_status_t imp_encode_audio(void** data,
     uint32_t sampleRate = opts->sample_rate > 0 ? opts->sample_rate : 16000;
     uint32_t channels   = opts->channels   > 0 ? opts->channels   : 1;
 
-    // ----- WAV: built-in writer (fast, no GStreamer) -----
     if (codec == "wav") {
-        return imp_encode_audio_wav(data, data_size, samples, num_samples,
-                                    sampleRate, channels);
+        return imp_encode_audio_wav(data, data_size, samples, num_samples, sampleRate, channels);
     }
 
-    // ----- PCM: raw float bytes -----
     if (codec == "pcm") {
         size_t sz = num_samples * sizeof(float);
         *data = malloc(sz);
@@ -1397,7 +2004,7 @@ imp_status_t imp_encode_audio(void** data,
         return IMP_OK;
     }
 
-    // ----- Lossy / lossless codecs via GStreamer -----
+    // Lossy / lossless codecs via GStreamer
     gst_init(nullptr, nullptr);
 
     std::string encElement;
@@ -1409,15 +2016,13 @@ imp_status_t imp_encode_audio(void** data,
     } else if (codec == "flac") {
         encElement = "flacenc";
     } else if (codec == "opus") {
-        // Opus standard sample rates: 8k, 12k, 16k, 24k, 48k
         uint32_t opusRate = (sampleRate <= 8000) ? 8000 :
                             (sampleRate <= 12000) ? 12000 :
                             (sampleRate <= 16000) ? 16000 :
                             (sampleRate <= 24000) ? 24000 : 48000;
         encElement = "opusenc bitrate=" + std::to_string(bitrate * 1000);
-        // audioresample will convert to opusRate automatically via caps
         muxElement = "oggmux";
-        sampleRate = opusRate;  // override for caps
+        sampleRate = opusRate;
     } else if (codec == "aac") {
         encElement = "avenc_aac bitrate=" + std::to_string(bitrate * 1000);
         muxElement = "aacparse ! adtsmux";
@@ -1451,7 +2056,6 @@ imp_status_t imp_encode_audio(void** data,
         return IMP_ERROR_INTERNAL;
     }
 
-    // Configure appsrc for F32LE input
     GstCaps* caps = gst_caps_new_simple("audio/x-raw",
         "format",  G_TYPE_STRING, "F32LE",
         "rate",    G_TYPE_INT, (int)(opts->sample_rate > 0 ? opts->sample_rate : 16000),
@@ -1463,29 +2067,23 @@ imp_status_t imp_encode_audio(void** data,
 
     gst_element_set_state(pipeline, GST_STATE_PLAYING);
 
-    // Push float samples
     size_t byteSize = num_samples * sizeof(float);
     GstBuffer* buffer = gst_buffer_new_allocate(nullptr, byteSize, nullptr);
     GstMapInfo map;
     gst_buffer_map(buffer, &map, GST_MAP_WRITE);
     memcpy(map.data, samples, byteSize);
     gst_buffer_unmap(buffer, &map);
-
     GST_BUFFER_PTS(buffer)      = 0;
     GST_BUFFER_DURATION(buffer) = gst_util_uint64_scale(
         num_samples / channels, GST_SECOND, opts->sample_rate > 0 ? opts->sample_rate : 16000);
-
-    gst_app_src_push_buffer(GST_APP_SRC(appsrc), buffer);  // takes ownership
+    gst_app_src_push_buffer(GST_APP_SRC(appsrc), buffer);
     gst_app_src_end_of_stream(GST_APP_SRC(appsrc));
 
-    // Collect encoded output from appsink
     std::vector<uint8_t> encoded;
     for (;;) {
         GstSample* sample = gst_app_sink_try_pull_sample(GST_APP_SINK(appsink), 10 * GST_SECOND);
         if (!sample) {
-            // Check if EOS was reached (normal completion)
             if (gst_app_sink_is_eos(GST_APP_SINK(appsink))) break;
-            // Timeout — abort
             break;
         }
         GstBuffer* outBuf = gst_sample_get_buffer(sample);
@@ -1515,7 +2113,6 @@ imp_status_t imp_encode_audio_file(const char* file_path,
                                    const float* samples,
                                    size_t num_samples,
                                    const imp_audio_encode_opts_t* opts) {
-    // Encode to memory, then write to file
     void* data = nullptr;
     size_t data_size = 0;
     imp_status_t st = imp_encode_audio(&data, &data_size, samples, num_samples, opts);
diff --git a/src/mpi/intel_mpi.h b/src/mpi/intel_mpi.h
index 168d1562aa..3de44b21e3 100644
--- a/src/mpi/intel_mpi.h
+++ b/src/mpi/intel_mpi.h
@@ -323,6 +323,8 @@ typedef struct {
     int64_t timeout_ms;                 // Read timeout, -1 = infinite
     const imp_video_branch_t* branches; // Array of output branches (NULL = single branch at source res) // TODO FIXME do we need more than one?
     uint32_t branch_count;              // Number of branches (0 = single branch at source res)
+    bool sync_appsink;                  // If true (file mode): force single-buffer queue+appsink (disable pre-buffering)
+    uint32_t queue_depth;               // If nonzero (file mode): bound queue + appsink max-buffers to N (overrides sync_appsink)
 } imp_video_decode_opts_t;
 
 /**
@@ -862,7 +864,7 @@ void imp_audio_encoder_close(imp_audio_encoder_t* encoder);
  * @param samples     Input float PCM samples (mono, interleaved if stereo)
  * @param num_samples Number of float values in @p samples
  * @param opts        Encode options (codec, sample_rate, channels, bitrate).
- *                    opts->output_path is ignored — output goes to memory.
+ *                    opts->output_path is ignored - output goes to memory.
  * @return IMP_OK on success
  */
 imp_status_t imp_encode_audio(void** data,

From f14a95d3b112730106ad3968e164ce305875db8f Mon Sep 17 00:00:00 2001
From: Adrian Tobiszewski <adrian.tobiszewski@intel.com>
Date: Fri, 19 Jun 2026 17:15:26 +0200
Subject: [PATCH 3/3] WIP Linux/Win gstreamer build

---
 .gitignore                                    |   1 +
 Dockerfile.ubuntu                             | 101 ++++
 WORKSPACE                                     |  16 +
 src/mpi/BUILD                                 |  31 +-
 src/mpi/gst_loader.cpp                        | 492 ++++++++++++++++++
 src/mpi/gst_loader.h                          | 105 ++++
 src/mpi/imp_mpi_impl.h                        |  25 +-
 src/mpi/intel_mpi.cpp                         | 243 ++++++++-
 src/mpi/intel_mpi.h                           |  16 +
 src/test/mediapipe/calculators/BUILD          |  30 ++
 .../video_decode_infer_calculator.cc          | 266 ++++++++++
 .../calculators/video_decode_infer_test.cpp   | 202 +++++++
 third_party/BUILD                             |   9 +
 third_party/gstreamer/gstreamer_linux.BUILD   |  53 ++
 14 files changed, 1562 insertions(+), 28 deletions(-)
 create mode 100644 src/mpi/gst_loader.cpp
 create mode 100644 src/mpi/gst_loader.h
 create mode 100644 src/test/mediapipe/calculators/video_decode_infer_calculator.cc
 create mode 100644 src/test/mediapipe/calculators/video_decode_infer_test.cpp
 create mode 100644 third_party/gstreamer/gstreamer_linux.BUILD

diff --git a/.gitignore b/.gitignore
index c404eb8cb8..5161fb94f0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -28,6 +28,7 @@ cppclean_test
 tags
 .venv-style/
 src/test/llm_testing
+gstreamer_task/
 node_modules/
 yarn.*
 out
diff --git a/Dockerfile.ubuntu b/Dockerfile.ubuntu
index 7f480d15a4..8ff64ae74a 100644
--- a/Dockerfile.ubuntu
+++ b/Dockerfile.ubuntu
@@ -88,6 +88,102 @@ ARG OPENCV_VERSION=4.13.0
 RUN OPENCV_VERSION=${OPENCV_VERSION} ./install_opencv.sh
 ####### End of OpenCV
 
+################### GSTREAMER BUILD ##########################
+# Build GStreamer from source so we control exact version and plugin set.
+# Legal: gpl=disabled, ugly=disabled — no GPL/patent-risky plugins.
+# Installed to /opt/gstreamer (self-contained, includes glib headers).
+FROM base_build as gstreamer-builder
+ARG GST_VERSION=1.26.2
+ARG JOBS=8
+ENV DEBIAN_FRONTEND=noninteractive
+SHELL ["/bin/bash", "-xo", "pipefail", "-c"]
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+        python3-pip \
+        python3-venv \
+        flex \
+        bison \
+        libglib2.0-dev \
+        libmount-dev \
+        libpcre2-dev \
+        libffi-dev \
+        libssl-dev \
+        libdrm-dev \
+        nasm \
+        yasm \
+        libavcodec-dev \
+        libavformat-dev \
+        libavutil-dev \
+        libswscale-dev \
+        git \
+    && apt-get clean && rm -rf /var/lib/apt/lists/*
+
+RUN python3 -m venv /build_venv && \
+    /build_venv/bin/pip install --no-cache-dir meson==1.4.1 ninja==1.11.1.1
+
+ENV PATH="/build_venv/bin:${PATH}"
+
+WORKDIR /src
+RUN git clone --depth 1 --branch ${GST_VERSION} \
+    https://gitlab.freedesktop.org/gstreamer/gstreamer.git
+
+WORKDIR /src/gstreamer
+
+# Install prefix: /opt/gstreamer
+RUN meson setup \
+    -Dexamples=disabled \
+    -Dtests=disabled \
+    -Dgst-examples=disabled \
+    -Ddevtools=disabled \
+    -Dorc=disabled \
+    -Dgpl=disabled \
+    -Dpython=disabled \
+    -Dlibnice=disabled \
+    -Dugly=disabled \
+    -Dgst-plugins-bad:gpl=disabled \
+    -Dgst-plugins-bad:va=disabled \
+    -Dgst-plugins-bad:doc=disabled \
+    -Dgst-plugins-bad:nls=disabled \
+    -Dgst-plugins-bad:directfb=disabled \
+    -Dgst-plugins-bad:openni2=disabled \
+    -Dgst-plugins-bad:fdkaac=disabled \
+    -Dgst-plugins-bad:ladspa=disabled \
+    -Dgst-plugins-bad:assrender=disabled \
+    -Dgst-plugins-bad:bs2b=disabled \
+    -Dgst-plugins-bad:flite=disabled \
+    -Dgst-plugins-bad:rtmp=disabled \
+    -Dgst-plugins-bad:opencv=disabled \
+    -Dgst-plugins-bad:sbc=disabled \
+    -Dgst-plugins-bad:teletext=disabled \
+    -Dgst-plugins-bad:x265=disabled \
+    -Dgst-plugins-bad:webrtcdsp=disabled \
+    -Dgst-plugins-bad:dash=disabled \
+    -Dgst-plugins-bad:openjpeg=disabled \
+    -Dgst-plugins-bad:soundtouch=disabled \
+    -Dgst-plugins-bad:isac=disabled \
+    -Dgst-plugins-base:nls=disabled \
+    -Dgst-plugins-base:gl=disabled \
+    -Dgst-plugins-base:pango=disabled \
+    -Dgst-plugins-base:xvideo=disabled \
+    -Dgst-plugins-good:nls=disabled \
+    -Dgst-plugins-good:libcaca=disabled \
+    -Dgst-plugins-good:lame=disabled \
+    -Dgst-plugins-good:flac=disabled \
+    -Dgst-plugins-good:dv=disabled \
+    -Dgst-plugins-good:adaptivedemux2=disabled \
+    --buildtype=release \
+    --prefix=/opt/gstreamer \
+    --libdir=lib \
+    --libexecdir=bin \
+    build/
+
+RUN ninja -C build/ -j${JOBS} && meson install -C build/
+
+# Copy glib/glib-2.0 headers into the prefix so it is self-contained.
+# glib is a build dependency; its headers live in the system include path.
+RUN cp -r /usr/include/glib-2.0 /opt/gstreamer/include/glib-2.0 && \
+    cp -r /usr/lib/x86_64-linux-gnu/glib-2.0/include/. /opt/gstreamer/include/glib-2.0/
+
 ################### BASE BUILD ##########################
 FROM base_build as build
 ARG BASE_IMAGE
@@ -125,6 +221,11 @@ RUN apt-get update && apt-get install --no-install-recommends -y \
             vim && \
             apt-get clean && \
             rm -rf /var/lib/apt/lists/*
+
+# GStreamer built from source — bring in the self-contained prefix.
+# Runtime dlopen picks up /opt/gstreamer/lib at runtime via LD_LIBRARY_PATH or ldconfig.
+COPY --from=gstreamer-builder /opt/gstreamer /opt/gstreamer
+RUN echo /opt/gstreamer/lib > /etc/ld.so.conf.d/gstreamer.conf && ldconfig
 # on ubuntu 24.04 python3.12 is used as default python for ovms build and release
 # TF build needs python3.10 with numpy as it does not support python3.12
 RUN python3.10 -m pip install "numpy<2.0.0" --no-cache-dir
diff --git a/WORKSPACE b/WORKSPACE
index 49413cd595..c3966202c3 100644
--- a/WORKSPACE
+++ b/WORKSPACE
@@ -122,6 +122,22 @@ new_local_repository(
     build_file = "@//third_party/boringssl:BUILD",
 )
 
+# GStreamer — built from source in gstreamer-builder Docker stage.
+# Installed to /opt/gstreamer (self-contained: GStreamer + glib headers).
+# Linux: dlopen at runtime (no DT_NEEDED), headers-only for compile.
+# Windows: pre-built installer from gstreamer.freedesktop.org/data/pkg/windows/.
+new_local_repository(
+    name = "linux_gstreamer",
+    path = "/opt/gstreamer",
+    build_file = "@//third_party/gstreamer:gstreamer_linux.BUILD",
+)
+
+new_local_repository(
+    name = "windows_gstreamer",
+    path = "C:\\gstreamer\\1.0\\msvc_x86_64",
+    build_file = "@//third_party/gstreamer:gstreamer_windows.BUILD",
+)
+
 new_local_repository(
     name = "linux_curl",
     path = "/usr/",
diff --git a/src/mpi/BUILD b/src/mpi/BUILD
index 3239c5f622..b7ab637873 100644
--- a/src/mpi/BUILD
+++ b/src/mpi/BUILD
@@ -19,24 +19,26 @@ load("//:common_settings.bzl", "COMMON_LOCAL_DEFINES", "ovms_cc_library")
 package(default_visibility = ["//visibility:public"])
 
 # Intel Media Processing Interface (Intel MPI) library
-# Windows-only for now — wraps GStreamer for HW-accelerated media decode/encode
-# with zero-copy OpenVINO tensor integration.
-#
-# On Linux this target is a no-op (empty lib) so dependents can
-# unconditionally list it without breaking the build.
+# Windows: direct GStreamer linking with D3D11 HW pipeline.
+# Linux:   GStreamer loaded at runtime via dlopen (gst_loader.cpp).
+#          CPU decode only; VA-API GPU path deferred.
 
 cc_library(
     name = "intel_mpi",
     srcs = select({
         "//src:windows": ["intel_mpi.cpp"],
-        "//conditions:default": [],
+        "//conditions:default": ["intel_mpi.cpp", "gst_loader.cpp"],
     }),
     hdrs = select({
         "//src:windows": [
             "intel_mpi.h",
             "imp_mpi_impl.h",
         ],
-        "//conditions:default": ["intel_mpi.h"],
+        "//conditions:default": [
+            "intel_mpi.h",
+            "imp_mpi_impl.h",
+            "gst_loader.h",
+        ],
     }),
     copts = select({
         "//src:windows": [
@@ -52,18 +54,27 @@ cc_library(
             "/wd4244",
             "/wd4996",
         ],
-        "//conditions:default": [],
+        "//conditions:default": [
+            "-std=c++17",
+        ],
     }),
     local_defines = COMMON_LOCAL_DEFINES + select({
         "//src:windows": ["INTEL_MPI_AVAILABLE=1"],
-        "//conditions:default": [],
+        "//conditions:default": ["INTEL_MPI_AVAILABLE=1"],
+    }),
+    linkopts = select({
+        "//src:windows": [],
+        "//conditions:default": ["-ldl"],
     }),
     deps = select({
         "//src:windows": [
             "//third_party:openvino",
             "//third_party:gstreamer",
         ],
-        "//conditions:default": [],
+        "//conditions:default": [
+            "//third_party:openvino",
+            "//third_party:gstreamer",
+        ],
     }),
     visibility = ["//visibility:public"],
 )
diff --git a/src/mpi/gst_loader.cpp b/src/mpi/gst_loader.cpp
new file mode 100644
index 0000000000..a6ad852c96
--- /dev/null
+++ b/src/mpi/gst_loader.cpp
@@ -0,0 +1,492 @@
+// Copyright (c) 2025 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+/**
+ * GStreamer runtime loader — Linux implementation.
+ *
+ * This is the ONLY translation unit that may include GStreamer headers.
+ * All gst_* calls go through function pointers resolved via dlopen so
+ * that the binary has no DT_NEEDED entry for GStreamer and can start on
+ * systems where GStreamer is not installed.
+ *
+ * TODO (Phase 2): verify graceful degradation in release image without
+ * GStreamer runtime packages.
+ */
+
+#ifndef _WIN32
+
+// ---- GStreamer headers (compile-time type definitions and macros) ----------
+// These are only needed here because we use GstVideoFrame / GstVideoInfo by
+// value (for GST_VIDEO_FRAME_PLANE_DATA / _STRIDE macros which are struct
+// field accesses, not function calls).
+#include <gst/gst.h>
+#include <gst/app/gstappsink.h>
+#include <gst/video/video.h>
+
+// ---- standard headers -------------------------------------------------------
+#include <dlfcn.h>
+#include <iostream>
+#include <string>
+#include <vector>
+#include <algorithm>
+#include <chrono>
+#include <cstring>
+#include <cstdlib>
+
+// ---- Intel MPI public API ---------------------------------------------------
+#include "intel_mpi.h"
+
+// ---- Internal structs (needed for branch / stream layout) ------------------
+// We include only what's needed for the Linux video path; Windows-only
+// sections (D3D11, etc.) are guarded in imp_mpi_impl.h.
+#include "imp_mpi_impl.h"
+
+namespace chrono = std::chrono;
+
+// ============================================================================
+// Function pointer table
+// ============================================================================
+
+struct GstFunctionTable {
+    // gstreamer-1.0
+    void (*gst_init)(int*, char***);
+    GstElement* (*gst_parse_launch)(const gchar*, GError**);
+    GstStateChangeReturn (*gst_element_set_state)(GstElement*, GstState);
+    GstStateChangeReturn (*gst_element_get_state)(GstElement*, GstState*,
+                                                   GstState*, GstClockTime);
+    GstElement* (*gst_bin_get_by_name)(GstBin*, const gchar*);
+    GstPad* (*gst_element_get_static_pad)(GstElement*, const gchar*);
+    GstCaps* (*gst_pad_get_current_caps)(GstPad*);
+    GstStructure* (*gst_caps_get_structure)(const GstCaps*, guint);
+    gboolean (*gst_structure_get_int)(const GstStructure*, const gchar*, gint*);
+    void (*gst_caps_unref)(GstCaps*);
+    void (*gst_object_unref)(gpointer);
+    GstBus* (*gst_element_get_bus)(GstElement*);
+    gboolean (*gst_element_send_event)(GstElement*, GstEvent*);
+    GstEvent* (*gst_event_new_eos)(void);
+    GstMessage* (*gst_bus_timed_pop_filtered)(GstBus*, GstClockTime,
+                                               GstMessageType);
+    void (*gst_message_unref)(GstMessage*);
+
+    // gstapp-1.0
+    GstSample* (*gst_app_sink_try_pull_sample)(GstAppSink*, GstClockTime);
+    GstBuffer* (*gst_sample_get_buffer)(GstSample*);
+    GstCaps* (*gst_sample_get_caps)(GstSample*);
+    void (*gst_sample_unref)(GstSample*);
+
+    // gstvideo-1.0
+    gboolean (*gst_video_info_from_caps)(GstVideoInfo*, const GstCaps*);
+    gboolean (*gst_video_frame_map)(GstVideoFrame*, GstVideoInfo*,
+                                     GstBuffer*, GstMapFlags);
+    void (*gst_video_frame_unmap)(GstVideoFrame*);
+
+    // glib-2.0
+    void (*g_error_free)(GError*);
+};
+
+static GstFunctionTable s_fns = {};
+static bool s_available       = false;
+static bool s_initialized     = false;
+
+// ============================================================================
+// Symbol loading helpers
+// ============================================================================
+
+#define LOAD_SYM(handle, sym)                                           \
+    do {                                                                \
+        s_fns.sym = reinterpret_cast<decltype(s_fns.sym)>(             \
+            dlsym((handle), #sym));                                     \
+        if (!s_fns.sym) {                                               \
+            std::cerr << "[GstLoader] Missing symbol: " #sym "\n";     \
+            return false;                                               \
+        }                                                               \
+    } while (0)
+
+static bool load_symbols(void* gst_h, void* app_h,
+                         void* vid_h, void* glib_h) {
+    LOAD_SYM(gst_h,  gst_init);
+    LOAD_SYM(gst_h,  gst_parse_launch);
+    LOAD_SYM(gst_h,  gst_element_set_state);
+    LOAD_SYM(gst_h,  gst_element_get_state);
+    LOAD_SYM(gst_h,  gst_bin_get_by_name);
+    LOAD_SYM(gst_h,  gst_element_get_static_pad);
+    LOAD_SYM(gst_h,  gst_pad_get_current_caps);
+    LOAD_SYM(gst_h,  gst_caps_get_structure);
+    LOAD_SYM(gst_h,  gst_structure_get_int);
+    LOAD_SYM(gst_h,  gst_caps_unref);
+    LOAD_SYM(gst_h,  gst_object_unref);
+    LOAD_SYM(gst_h,  gst_element_get_bus);
+    LOAD_SYM(gst_h,  gst_element_send_event);
+    LOAD_SYM(gst_h,  gst_event_new_eos);
+    LOAD_SYM(gst_h,  gst_bus_timed_pop_filtered);
+    LOAD_SYM(gst_h,  gst_message_unref);
+
+    LOAD_SYM(app_h,  gst_app_sink_try_pull_sample);
+    LOAD_SYM(app_h,  gst_sample_get_buffer);
+    LOAD_SYM(app_h,  gst_sample_get_caps);
+    LOAD_SYM(app_h,  gst_sample_unref);
+
+    LOAD_SYM(vid_h,  gst_video_info_from_caps);
+    LOAD_SYM(vid_h,  gst_video_frame_map);
+    LOAD_SYM(vid_h,  gst_video_frame_unmap);
+
+    LOAD_SYM(glib_h, g_error_free);
+
+    return true;
+}
+
+#undef LOAD_SYM
+
+// ============================================================================
+// Public loader API
+// ============================================================================
+
+bool gst_loader_init() {
+    if (s_initialized) return s_available;
+    s_initialized = true;
+
+    void* gst_h  = dlopen("libgstreamer-1.0.so.0",  RTLD_LAZY | RTLD_GLOBAL);
+    void* app_h  = dlopen("libgstapp-1.0.so.0",     RTLD_LAZY | RTLD_GLOBAL);
+    void* vid_h  = dlopen("libgstvideo-1.0.so.0",   RTLD_LAZY | RTLD_GLOBAL);
+    void* glib_h = dlopen("libglib-2.0.so.0",       RTLD_LAZY | RTLD_GLOBAL);
+
+    if (!gst_h || !app_h || !vid_h || !glib_h) {
+        std::cerr << "[GstLoader] GStreamer not available: " << dlerror() << "\n"
+                  << "[GstLoader] Install gstreamer1.0-* packages to enable "
+                     "media decode.\n";
+        return false;
+    }
+
+    if (!load_symbols(gst_h, app_h, vid_h, glib_h)) {
+        return false;
+    }
+
+    s_available = true;
+    std::cerr << "[GstLoader] GStreamer loaded successfully.\n";
+    return true;
+}
+
+bool gst_loader_available() { return s_available; }
+
+// ============================================================================
+// Internal: read one NV12 frame from an appsink
+// ============================================================================
+
+static bool read_nv12_from_appsink(GstElement* appsink,
+                                    imp_nv12_frame_t* frame,
+                                    double& total_ms,
+                                    double& pull_ms,
+                                    double& copy_ms) {
+    const auto& f = s_fns;
+    auto decode_start = chrono::high_resolution_clock::now();
+
+    auto ps = chrono::high_resolution_clock::now();
+    GstSample* sample = f.gst_app_sink_try_pull_sample(
+        GST_APP_SINK(appsink), GST_SECOND);
+    if (!sample) return false;
+    pull_ms += chrono::duration<double, std::milli>(
+        chrono::high_resolution_clock::now() - ps).count();
+
+    GstBuffer* buffer = f.gst_sample_get_buffer(sample);
+    GstCaps*   caps   = f.gst_sample_get_caps(sample);
+
+    GstVideoInfo info;
+    f.gst_video_info_from_caps(&info, caps);
+
+    frame->allocate(info.width, info.height);
+    frame->valid = false;
+
+    auto cs = chrono::high_resolution_clock::now();
+
+    GstVideoFrame vframe;
+    if (f.gst_video_frame_map(&vframe, &info, buffer, GST_MAP_READ)) {
+        uint8_t* y_data   = static_cast<uint8_t*>(
+            GST_VIDEO_FRAME_PLANE_DATA(&vframe, 0));
+        int      y_stride = GST_VIDEO_FRAME_PLANE_STRIDE(&vframe, 0);
+
+        if (y_stride == frame->width) {
+            memcpy(frame->y_plane.data(), y_data,
+                   frame->width * frame->height);
+        } else {
+            for (int row = 0; row < frame->height; row++)
+                memcpy(frame->y_plane.data() + row * frame->width,
+                       y_data + row * y_stride, frame->width);
+        }
+
+        uint8_t* uv_data   = static_cast<uint8_t*>(
+            GST_VIDEO_FRAME_PLANE_DATA(&vframe, 1));
+        int      uv_stride = GST_VIDEO_FRAME_PLANE_STRIDE(&vframe, 1);
+
+        if (uv_stride == frame->width) {
+            memcpy(frame->uv_plane.data(), uv_data,
+                   frame->width * (frame->height / 2));
+        } else {
+            for (int row = 0; row < frame->height / 2; row++)
+                memcpy(frame->uv_plane.data() + row * frame->width,
+                       uv_data + row * uv_stride, frame->width);
+        }
+
+        frame->valid = true;
+        f.gst_video_frame_unmap(&vframe);
+    }
+
+    f.gst_sample_unref(sample);
+
+    copy_ms  += chrono::duration<double, std::milli>(
+        chrono::high_resolution_clock::now() - cs).count();
+    total_ms += chrono::duration<double, std::milli>(
+        chrono::high_resolution_clock::now() - decode_start).count();
+
+    return frame->valid;
+}
+
+// ============================================================================
+// Public: linux_video_open
+// ============================================================================
+
+imp_status_t linux_video_open(imp_video_stream_t** stream,
+                               imp_video_source_t* source,
+                               imp_context_t* ctx,
+                               const imp_video_decode_opts_t* opts) {
+    if (!stream || !source || !ctx) return IMP_ERROR_INVALID_ARGUMENT;
+
+    if (!gst_loader_available()) {
+        ctx->last_error = "GStreamer not available; install gstreamer1.0-* "
+                          "packages and retry.";
+        return IMP_ERROR_DEVICE_NOT_AVAILABLE;
+    }
+
+    const auto& f = s_fns;
+    auto start = chrono::high_resolution_clock::now();
+
+    f.gst_init(nullptr, nullptr);
+
+    auto* s   = new imp_video_stream_s();
+    s->timing = &ctx->timing;
+
+    int src_w = (source->width  > 0) ? source->width  : 1280;
+    int src_h = (source->height > 0) ? source->height : 720;
+
+    bool is_file = (source->type == IMP_SOURCE_FILE);
+
+    // Build branch list (single branch only for initial Linux impl)
+    struct resolved_branch { int width; int height; std::string name; };
+    std::vector<resolved_branch> resolved;
+
+    if (opts && opts->branches && opts->branch_count > 0) {
+        for (uint32_t i = 0; i < opts->branch_count; i++) {
+            resolved_branch rb;
+            int bw = static_cast<int>(opts->branches[i].width);
+            int bh = static_cast<int>(opts->branches[i].height);
+            if (bw > 0 && bh > 0) {
+                rb.width  = bw;
+                rb.height = bh;
+            } else if (ctx->model_w > 0 && ctx->model_h > 0) {
+                rb.width  = ctx->model_w;
+                rb.height = ctx->model_h;
+            } else {
+                rb.width  = src_w;
+                rb.height = src_h;
+            }
+            rb.name = opts->branches[i].name
+                      ? opts->branches[i].name
+                      : ("branch_" + std::to_string(i));
+            resolved.push_back(rb);
+        }
+    } else {
+        resolved.push_back({src_w, src_h, "default"});
+    }
+
+    // appsink properties
+    std::string sink_props = is_file
+        ? "emit-signals=false sync=false max-buffers=0 drop=false"
+        : "emit-signals=false sync=false max-buffers=2 drop=true";
+
+    if (is_file && opts && opts->sync_appsink) {
+        sink_props = "emit-signals=false sync=false max-buffers=1 drop=false";
+    }
+
+    // Linux CPU decode pipeline:
+    //   filesrc | decodebin | videoconvert | videoscale | caps | appsink
+    //
+    // TODO: VA-API GPU path (deferred — hardware not available yet):
+    //   filesrc | decodebin | vapostproc | caps | appsink
+    std::string pipeline_str;
+
+    if (!is_file) {
+        // Camera / live source not yet implemented on Linux
+        ctx->last_error = "Live camera source not yet supported on Linux";
+        delete s;
+        return IMP_ERROR_UNSUPPORTED_FORMAT;
+    }
+
+    // Normalize path (backslashes break gst_parse_launch)
+    std::string path = source->path;
+    std::replace(path.begin(), path.end(), '\\', '/');
+
+    if (resolved.size() == 1) {
+        const auto& rb = resolved[0];
+        pipeline_str =
+            "filesrc location=\"" + path + "\" ! "
+            "decodebin ! "
+            "videoconvert ! "
+            "videoscale ! "
+            "video/x-raw,format=NV12,"
+            "width=" + std::to_string(rb.width) +
+            ",height=" + std::to_string(rb.height) + " ! "
+            "appsink name=branch_0 " + sink_props;
+    } else {
+        // Multi-branch via tee
+        pipeline_str =
+            "filesrc location=\"" + path + "\" ! "
+            "decodebin ! "
+            "videoconvert ! tee name=t";
+
+        for (size_t i = 0; i < resolved.size(); i++) {
+            const auto& rb = resolved[i];
+            std::string sink_name = "branch_" + std::to_string(i);
+            pipeline_str +=
+                " t. ! queue ! "
+                "videoscale ! "
+                "video/x-raw,format=NV12,"
+                "width=" + std::to_string(rb.width) +
+                ",height=" + std::to_string(rb.height) + " ! "
+                "appsink name=" + sink_name + " " + sink_props;
+        }
+    }
+
+    std::cout << "[linux_video_open] Pipeline: " << pipeline_str << "\n";
+
+    GError* error = nullptr;
+    s->pipeline = f.gst_parse_launch(pipeline_str.c_str(), &error);
+    if (error || !s->pipeline) {
+        ctx->last_error = error ? error->message : "pipeline creation failed";
+        if (error) f.g_error_free(error);
+        delete s;
+        return IMP_ERROR_DECODE_FAILED;
+    }
+
+    s->branches.resize(resolved.size());
+    for (size_t i = 0; i < resolved.size(); i++) {
+        std::string sink_name = "branch_" + std::to_string(i);
+        s->branches[i].appsink = f.gst_bin_get_by_name(
+            GST_BIN(s->pipeline), sink_name.c_str());
+        if (!s->branches[i].appsink) {
+            ctx->last_error = "failed to get appsink: " + sink_name;
+            f.gst_object_unref(s->pipeline);
+            delete s;
+            return IMP_ERROR_DECODE_FAILED;
+        }
+        s->branches[i].width  = resolved[i].width;
+        s->branches[i].height = resolved[i].height;
+        s->branches[i].name   = resolved[i].name;
+        s->branches[i].frame.allocate(resolved[i].width, resolved[i].height);
+    }
+
+    f.gst_element_set_state(s->pipeline, GST_STATE_PLAYING);
+    GstStateChangeReturn ret = f.gst_element_get_state(
+        s->pipeline, nullptr, nullptr, 5 * GST_SECOND);
+    if (ret == GST_STATE_CHANGE_FAILURE) {
+        ctx->last_error = "pipeline failed to start";
+        for (auto& b : s->branches) {
+            if (b.appsink) f.gst_object_unref(b.appsink);
+        }
+        f.gst_object_unref(s->pipeline);
+        delete s;
+        return IMP_ERROR_DECODE_FAILED;
+    }
+
+    // Read negotiated caps from first appsink to get actual dimensions
+    GstPad* pad = f.gst_element_get_static_pad(s->branches[0].appsink, "sink");
+    if (pad) {
+        GstCaps* caps = f.gst_pad_get_current_caps(pad);
+        if (caps) {
+            GstStructure* st = f.gst_caps_get_structure(caps, 0);
+            int actual_w = 0, actual_h = 0;
+            f.gst_structure_get_int(st, "width",  &actual_w);
+            f.gst_structure_get_int(st, "height", &actual_h);
+            if (actual_w > 0) s->branches[0].width  = actual_w;
+            if (actual_h > 0) s->branches[0].height = actual_h;
+            f.gst_caps_unref(caps);
+        }
+        f.gst_object_unref(pad);
+    }
+
+    s->use_hw_decode = false;  // CPU decode
+
+    auto end = chrono::high_resolution_clock::now();
+    ctx->timing.video_open_ms =
+        chrono::duration<double, std::milli>(end - start).count();
+
+    for (size_t i = 0; i < s->branches.size(); i++) {
+        const auto& b = s->branches[i];
+        std::cout << "[linux_video_open] Branch " << i
+                  << " [" << b.name << "]: "
+                  << b.width << "x" << b.height << " NV12 (CPU)\n";
+    }
+
+    *stream = s;
+    return IMP_OK;
+}
+
+// ============================================================================
+// Public: linux_video_read_frame
+// ============================================================================
+
+imp_status_t linux_video_read_frame(imp_tensor_t** tensor,
+                                     imp_video_stream_t* stream,
+                                     uint32_t branch_index) {
+    if (!stream || branch_index >= static_cast<uint32_t>(stream->branches.size()))
+        return IMP_ERROR_INVALID_ARGUMENT;
+
+    auto& branch = stream->branches[branch_index];
+
+    bool ok = read_nv12_from_appsink(
+        branch.appsink, &branch.frame,
+        branch.total_decode_ms,
+        branch.total_decode_pull_ms,
+        branch.total_decode_copy_ms);
+
+    if (!ok) return IMP_ERROR_STREAM_END;
+
+    branch.tensor_cache.y_data      = branch.frame.y_plane.data();
+    branch.tensor_cache.uv_data     = branch.frame.uv_plane.data();
+    branch.tensor_cache.width       = branch.frame.width;
+    branch.tensor_cache.height      = branch.frame.height;
+    branch.tensor_cache.format      = IMP_FORMAT_NV12;
+    branch.tensor_cache.valid       = true;
+    branch.tensor_cache.device_type = IMP_DEVICE_CPU;
+
+    if (tensor) *tensor = &branch.tensor_cache;
+    return IMP_OK;
+}
+
+// ============================================================================
+// Public: linux_video_close
+// ============================================================================
+
+void linux_video_close(imp_video_stream_t* stream) {
+    if (!stream) return;
+    const auto& f = s_fns;
+    if (stream->pipeline) {
+        f.gst_element_set_state(stream->pipeline, GST_STATE_NULL);
+        for (auto& b : stream->branches) {
+            if (b.appsink) f.gst_object_unref(b.appsink);
+        }
+        f.gst_object_unref(stream->pipeline);
+    }
+    delete stream;
+}
+
+#endif  // !_WIN32
diff --git a/src/mpi/gst_loader.h b/src/mpi/gst_loader.h
new file mode 100644
index 0000000000..2963c19517
--- /dev/null
+++ b/src/mpi/gst_loader.h
@@ -0,0 +1,105 @@
+// Copyright (c) 2025 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+/**
+ * GStreamer runtime loader for Linux.
+ *
+ * Provides a dlopen-based loading of GStreamer shared libraries so that
+ * the Intel MPI binary can start even on systems without GStreamer installed.
+ * All GStreamer operations are only available when gst_loader_available()
+ * returns true.
+ *
+ * Usage:
+ *   gst_loader_init();          // call once; safe to call multiple times
+ *   if (!gst_loader_available()) return IMP_ERROR_DEVICE_NOT_AVAILABLE;
+ *   // use linux_video_open() / linux_video_read_frame() / linux_video_close()
+ *
+ * This header intentionally does NOT include any GStreamer headers so that
+ * files outside of gst_loader.cpp do not transitively link against GStreamer.
+ * All GStreamer types exposed in the public API are forward-declared below.
+ */
+
+#ifndef GST_LOADER_H
+#define GST_LOADER_H
+
+#ifndef _WIN32
+
+#include <cstdint>
+#include <cstddef>
+
+// Forward-declare GStreamer opaque types.  Consumers only hold pointers so
+// full definitions are not required here.
+typedef struct _GstElement GstElement;
+
+// Intel MPI public API — provides all imp_* types and status codes.
+#include "intel_mpi.h"
+
+/**
+ * Attempt to dlopen GStreamer shared libraries and resolve all required
+ * function pointers.  Safe to call multiple times (idempotent after first
+ * call).
+ *
+ * @return true  if all symbols were resolved and GStreamer is usable.
+ * @return false if any library or symbol was missing.
+ */
+bool gst_loader_init();
+
+/**
+ * Returns true after a successful gst_loader_init().
+ * Returns false if GStreamer is not installed or init has not been called.
+ */
+bool gst_loader_available();
+
+// ---------------------------------------------------------------------------
+// High-level Linux video operations.
+// These are the only entry-points needed by intel_mpi.cpp on Linux.
+// All GStreamer details are hidden inside gst_loader.cpp.
+// ---------------------------------------------------------------------------
+
+/**
+ * Open a video file for decode.
+ * Equivalent to imp_video_open() on Linux.
+ *
+ * @param stream   output: newly allocated stream handle
+ * @param source   video source configuration (path, dimensions, …)
+ * @param ctx      Intel MPI context (for dimensions hints and error reporting)
+ * @param opts     decode options (branches, sync mode, …) — may be nullptr
+ * @return IMP_OK on success, error code otherwise.
+ */
+imp_status_t linux_video_open(imp_video_stream_t** stream,
+                              imp_video_source_t* source,
+                              imp_context_t* ctx,
+                              const imp_video_decode_opts_t* opts);
+
+/**
+ * Read one NV12 frame from the given branch of an open stream.
+ * Equivalent to imp_video_read_frame() on Linux.
+ *
+ * @param tensor        output: pointer to internal tensor cache (not owned)
+ * @param stream        open stream
+ * @param branch_index  branch to read from (0 for single-branch streams)
+ * @return IMP_OK, IMP_ERROR_STREAM_END when exhausted, or error code.
+ */
+imp_status_t linux_video_read_frame(imp_tensor_t** tensor,
+                                    imp_video_stream_t* stream,
+                                    uint32_t branch_index);
+
+/**
+ * Close and free a stream opened with linux_video_open().
+ * Equivalent to imp_video_close() on Linux.
+ */
+void linux_video_close(imp_video_stream_t* stream);
+
+#endif  // !_WIN32
+#endif  // GST_LOADER_H
diff --git a/src/mpi/imp_mpi_impl.h b/src/mpi/imp_mpi_impl.h
index ad90e7f74c..8c2deafcc7 100644
--- a/src/mpi/imp_mpi_impl.h
+++ b/src/mpi/imp_mpi_impl.h
@@ -4,15 +4,12 @@
  * NOT part of the public API. These are the C++ structs behind the
  * opaque handles declared in intel_mpi.h.
  *
- * Only intel_mpi.cpp should include this file.
+ * Only intel_mpi.cpp and gst_loader.cpp should include this file.
  */
 
 #ifndef IMP_MPI_IMPL_H
 #define IMP_MPI_IMPL_H
 
-#define NOMINMAX
-#define WIN32_LEAN_AND_MEAN
-
 #include <string>
 #include <vector>
 #include <memory>
@@ -20,7 +17,10 @@
 #include <cstring>
 #include <chrono>
 
+#ifdef _WIN32
 // Windows + D3D11
+#define NOMINMAX
+#define WIN32_LEAN_AND_MEAN
 #include <windows.h>
 #include <d3d11.h>
 #include <dxgi.h>
@@ -29,7 +29,7 @@
 #include <openvino/openvino.hpp>
 #include <openvino/runtime/intel_gpu/properties.hpp>
 
-// GStreamer
+// GStreamer (Windows: direct linking via import libs)
 #include <gst/gst.h>
 #include <gst/app/gstappsink.h>
 #include <gst/app/gstappsrc.h>
@@ -37,6 +37,21 @@
 #include <gst/d3d11/gstd3d11.h>
 #include <gst/pbutils/pbutils.h>
 
+#else  // Linux
+
+// OpenVINO C++ API
+#include <openvino/openvino.hpp>
+
+// GStreamer types needed for struct members (pointer-only use).
+// Full headers are only included in gst_loader.cpp where the dlopen
+// wrapper lives.  Using forward declarations here keeps intel_mpi.cpp
+// free of any direct GStreamer symbol references.
+typedef struct _GstElement   GstElement;
+typedef struct _GstBus       GstBus;
+typedef struct _GstMessage   GstMessage;
+
+#endif  // _WIN32
+
 // Public API types
 #include "intel_mpi.h"
 
diff --git a/src/mpi/intel_mpi.cpp b/src/mpi/intel_mpi.cpp
index 317fd068f8..173c5aa7b0 100644
--- a/src/mpi/intel_mpi.cpp
+++ b/src/mpi/intel_mpi.cpp
@@ -11,16 +11,29 @@
  *   - Encoder = accepts imp_tensor_t* directly
  *
  * NO inference logic lives here. Inference helpers belong in the demo/OVMS layer.
+ *
+ * Platform notes:
+ *   Windows — direct GStreamer linking via import libs; D3D11 HW pipeline.
+ *   Linux   — GStreamer loaded at runtime via dlopen (gst_loader.cpp);
+ *             CPU decode pipeline only (VA-API GPU path deferred).
+ *             Image / audio encode-decode functions are Windows-only for now
+ *             and return IMP_ERROR_INTERNAL on Linux.
  */
 
+#ifdef _WIN32
 #define NOMINMAX
 #define WIN32_LEAN_AND_MEAN
 #ifndef GST_USE_UNSTABLE_API
 #define GST_USE_UNSTABLE_API
 #endif
+#endif  // _WIN32
 
 #include "imp_mpi_impl.h"  // same directory
 
+#ifndef _WIN32
+#include "gst_loader.h"
+#endif
+
 #include <iostream>
 #include <algorithm>
 #include <chrono>
@@ -45,10 +58,13 @@ imp_status_t imp_context_create(imp_context_t** ctx,
                                 ov_compiled_model_t* compiled_model) {
     if (!ctx) return IMP_ERROR_INVALID_ARGUMENT;
 
-    // In this POC, compiled_model is actually a C++ ov::CompiledModel*
-    // cast through the C opaque pointer.
+    // compiled_model is a C++ ov::CompiledModel* cast through the C opaque pointer.
+    // On Linux / CPU-only mode, nullptr is valid — creates a standalone context
+    // with no model dimension hints.
     auto* cm = reinterpret_cast<ov::CompiledModel*>(compiled_model);
+#ifdef _WIN32
     if (!cm) return IMP_ERROR_INVALID_ARGUMENT;
+#endif
 
     auto start = chrono::high_resolution_clock::now();
 
@@ -56,17 +72,21 @@ imp_status_t imp_context_create(imp_context_t** ctx,
 
     // Extract model input dimensions for branch auto-deduction.
     // After NV12 PPP the Y input is [1, H, W, 1].
-    auto inputs = cm->inputs();
-    if (!inputs.empty()) {
-        auto shape = inputs[0].get_shape();
-        if (shape.size() == 4) {
-            c->model_h = static_cast<int>(shape[1]);
-            c->model_w = static_cast<int>(shape[2]);
+    if (cm) {
+        // Extract model input dimensions for branch auto-deduction.
+        // After NV12 PPP the Y input is [1, H, W, 1].
+        auto inputs = cm->inputs();
+        if (!inputs.empty()) {
+            auto shape = inputs[0].get_shape();
+            if (shape.size() == 4) {
+                c->model_h = static_cast<int>(shape[1]);
+                c->model_w = static_cast<int>(shape[2]);
+            }
         }
     }
 
-    c->device_type = IMP_DEVICE_GPU;
-    c->device_name = "GPU";
+    c->device_type = IMP_DEVICE_CPU;
+    c->device_name = cm ? "GPU" : "CPU";
     c->initialized = true;
 
     auto end = chrono::high_resolution_clock::now();
@@ -158,6 +178,12 @@ void imp_video_source_destroy(imp_video_source_t* source) {
     delete source;
 }
 
+// ============================================================================
+// Windows-only GStreamer video / audio / image pipeline implementations.
+// Linux equivalents live in gst_loader.cpp (dlopen-based).
+// ============================================================================
+#ifdef _WIN32
+
 //////////////////////////////////////////////////////////////////////////////
 // Internal: read NV12 from appsink into frame buffer
 //////////////////////////////////////////////////////////////////////////////
@@ -448,6 +474,18 @@ imp_status_t imp_video_open(imp_video_stream_t** stream,
     return IMP_OK;
 }
 
+#else  // Linux — delegate to gst_loader.cpp
+
+imp_status_t imp_video_open(imp_video_stream_t** stream,
+                            imp_video_source_t* source,
+                            imp_context_t* ctx,
+                            const imp_video_decode_opts_t* opts) {
+    gst_loader_init();
+    return linux_video_open(stream, source, ctx, opts);
+}
+
+#endif  // _WIN32
+
 //////////////////////////////////////////////////////////////////////////////
 // Video Read Frame — branch-aware
 //////////////////////////////////////////////////////////////////////////////
@@ -455,8 +493,7 @@ imp_status_t imp_video_open(imp_video_stream_t** stream,
 imp_status_t imp_video_read_frame(imp_tensor_t** tensor,
                                   imp_video_stream_t* stream,
                                   uint32_t branch_index) {
-    if (!stream || branch_index >= (uint32_t)stream->branches.size())
-        return IMP_ERROR_INVALID_ARGUMENT;
+#ifdef _WIN32
 
     auto& branch = stream->branches[branch_index];
 
@@ -479,6 +516,10 @@ imp_status_t imp_video_read_frame(imp_tensor_t** tensor,
 
     if (tensor) *tensor = &branch.tensor_cache;
     return IMP_OK;
+
+#else  // Linux
+    return linux_video_read_frame(tensor, stream, branch_index);
+#endif
 }
 
 imp_status_t imp_video_read_frame_by_name(imp_tensor_t** tensor,
@@ -518,6 +559,7 @@ imp_status_t imp_video_get_info(imp_video_stream_t* stream,
 }
 
 void imp_video_close(imp_video_stream_t* stream) {
+#ifdef _WIN32
     if (!stream) return;
     if (stream->pipeline) {
         gst_element_set_state(stream->pipeline, GST_STATE_NULL);
@@ -527,12 +569,17 @@ void imp_video_close(imp_video_stream_t* stream) {
         gst_object_unref(stream->pipeline);
     }
     delete stream;
+#else
+    linux_video_close(stream);
+#endif
 }
 
 //////////////////////////////////////////////////////////////////////////////
-// Video Encoder
+// Video Encoder (Windows-only — Linux stub returns IMP_ERROR_INTERNAL)
 //////////////////////////////////////////////////////////////////////////////
 
+#ifdef _WIN32
+
 imp_status_t imp_video_encoder_create(imp_video_encoder_t** encoder,
                                       uint32_t width, uint32_t height,
                                       imp_context_t* ctx,
@@ -662,6 +709,28 @@ void imp_video_encoder_close(imp_video_encoder_t* encoder) {
     delete encoder;
 }
 
+#else  // Linux stubs for video encoder
+
+imp_status_t imp_video_encoder_create(imp_video_encoder_t** encoder,
+                                      uint32_t width, uint32_t height,
+                                      imp_context_t* ctx,
+                                      const imp_video_encode_opts_t* opts) {
+    (void)encoder; (void)width; (void)height; (void)ctx; (void)opts;
+    return IMP_ERROR_INTERNAL;  // Not yet implemented on Linux
+}
+
+imp_status_t imp_video_encoder_write(imp_video_encoder_t* encoder,
+                                     imp_tensor_t* tensor) {
+    (void)encoder; (void)tensor;
+    return IMP_ERROR_INTERNAL;
+}
+
+void imp_video_encoder_close(imp_video_encoder_t* encoder) {
+    (void)encoder;
+}
+
+#endif  // _WIN32
+
 //////////////////////////////////////////////////////////////////////////////
 // Tensor utilities
 //////////////////////////////////////////////////////////////////////////////
@@ -728,6 +797,22 @@ imp_status_t imp_tensor_get_element_type(imp_tensor_t* tensor,
     return IMP_ERROR_INTERNAL; // TODO: map ov::element::Type
 }
 
+imp_status_t imp_tensor_get_nv12_planes(imp_tensor_t* tensor,
+                                        const uint8_t** y_data,
+                                        const uint8_t** uv_data,
+                                        int* width,
+                                        int* height) {
+    if (!tensor || !y_data || !uv_data || !width || !height)
+        return IMP_ERROR_INVALID_ARGUMENT;
+    if (tensor->format != IMP_FORMAT_NV12 || tensor->device_type != IMP_DEVICE_CPU)
+        return IMP_ERROR_INVALID_ARGUMENT;
+    *y_data  = tensor->y_data;
+    *uv_data = tensor->uv_data;
+    *width   = tensor->width;
+    *height  = tensor->height;
+    return IMP_OK;
+}
+
 void imp_tensor_release(imp_tensor_t* tensor) {
     if (!tensor) return;
     delete tensor;
@@ -753,6 +838,14 @@ imp_status_t imp_hw_encode_supported(imp_context_t* ctx, bool* supported) {
     return IMP_OK;
 }
 
+// ============================================================================
+// Image decode / encode and audio decode / encode — Windows-only.
+// These functions use direct GStreamer calls that are only available on
+// Windows (D3D11 pipeline, mfh264enc, etc.).  Linux stubs follow at the
+// end of the #ifdef _WIN32 / #else block.
+// ============================================================================
+#ifdef _WIN32
+
 //////////////////////////////////////////////////////////////////////////////
 // Image decode / encode
 //////////////////////////////////////////////////////////////////////////////
@@ -2126,3 +2219,127 @@ imp_status_t imp_encode_audio_file(const char* file_path,
     return IMP_OK;
 }
 
+#else  // Linux stubs for image/audio decode-encode and audio stream/encoder
+
+imp_status_t imp_decode_image_file(imp_tensor_t** t, const char* f,
+                                   imp_context_t* c,
+                                   const imp_image_decode_opts_t* o,
+                                   imp_decode_callback_t cb, void* ud) {
+    (void)t; (void)f; (void)c; (void)o; (void)cb; (void)ud;
+    return IMP_ERROR_INTERNAL;  // Not yet implemented on Linux
+}
+
+imp_status_t imp_decode_image(imp_tensor_t** t, const void* d, size_t s,
+                              imp_context_t* c,
+                              const imp_image_decode_opts_t* o,
+                              imp_decode_callback_t cb, void* ud) {
+    (void)t; (void)d; (void)s; (void)c; (void)o; (void)cb; (void)ud;
+    return IMP_ERROR_INTERNAL;
+}
+
+imp_status_t imp_decode_audio(imp_tensor_t** t, const void* d, size_t s,
+                              imp_context_t* c,
+                              const imp_audio_decode_opts_t* o,
+                              imp_decode_callback_t cb, void* ud) {
+    (void)t; (void)d; (void)s; (void)c; (void)o; (void)cb; (void)ud;
+    return IMP_ERROR_INTERNAL;
+}
+
+imp_status_t imp_decode_audio_file(imp_tensor_t** t, const char* f,
+                                   imp_context_t* c,
+                                   const imp_audio_decode_opts_t* o,
+                                   imp_decode_callback_t cb, void* ud) {
+    (void)t; (void)f; (void)c; (void)o; (void)cb; (void)ud;
+    return IMP_ERROR_INTERNAL;
+}
+
+imp_status_t imp_audio_file_info(const char* file_path,
+                                 uint32_t* sample_rate,
+                                 uint32_t* channels,
+                                 double* duration_sec) {
+    (void)file_path; (void)sample_rate; (void)channels; (void)duration_sec;
+    return IMP_ERROR_INTERNAL;
+}
+
+imp_status_t imp_audio_open(imp_audio_stream_t** stream,
+                            const char* input_path,
+                            const char* output_path,
+                            const imp_audio_stream_opts_t* opts) {
+    (void)stream; (void)input_path; (void)output_path; (void)opts;
+    return IMP_ERROR_INTERNAL;
+}
+
+imp_status_t imp_audio_get_info(imp_audio_stream_t* stream,
+                                imp_audio_info_t* info) {
+    (void)stream; (void)info;
+    return IMP_ERROR_INTERNAL;
+}
+
+imp_status_t imp_audio_process(imp_audio_stream_t* stream) {
+    (void)stream;
+    return IMP_ERROR_INTERNAL;
+}
+
+imp_status_t imp_audio_get_timing(imp_audio_stream_t* stream,
+                                  double* wall_time_sec,
+                                  double* realtime_factor) {
+    (void)stream; (void)wall_time_sec; (void)realtime_factor;
+    return IMP_ERROR_INTERNAL;
+}
+
+void imp_audio_close(imp_audio_stream_t* stream) {
+    (void)stream;
+}
+
+imp_status_t imp_encode_image(void** d, size_t* s, imp_tensor_t* t,
+                              imp_context_t* c,
+                              const imp_image_encode_opts_t* o,
+                              imp_encode_callback_t cb, void* ud) {
+    (void)d; (void)s; (void)t; (void)c; (void)o; (void)cb; (void)ud;
+    return IMP_ERROR_INTERNAL;
+}
+
+imp_status_t imp_encode_image_file(const char* f, imp_tensor_t* t,
+                                   imp_context_t* c,
+                                   const imp_image_encode_opts_t* o,
+                                   imp_encode_callback_t cb, void* ud) {
+    (void)f; (void)t; (void)c; (void)o; (void)cb; (void)ud;
+    return IMP_ERROR_INTERNAL;
+}
+
+imp_status_t imp_audio_encoder_create(imp_audio_encoder_t** encoder,
+                                      const imp_audio_encode_opts_t* opts,
+                                      imp_encode_callback_t callback,
+                                      void* user_data) {
+    (void)encoder; (void)opts; (void)callback; (void)user_data;
+    return IMP_ERROR_INTERNAL;
+}
+
+imp_status_t imp_audio_encoder_write(imp_audio_encoder_t* encoder,
+                                     imp_tensor_t* tensor) {
+    (void)encoder; (void)tensor;
+    return IMP_ERROR_INTERNAL;
+}
+
+void imp_audio_encoder_close(imp_audio_encoder_t* encoder) {
+    (void)encoder;
+}
+
+imp_status_t imp_encode_audio(void** data,
+                              size_t* data_size,
+                              const float* samples,
+                              size_t num_samples,
+                              const imp_audio_encode_opts_t* opts) {
+    (void)data; (void)data_size; (void)samples; (void)num_samples; (void)opts;
+    return IMP_ERROR_INTERNAL;
+}
+
+imp_status_t imp_encode_audio_file(const char* file_path,
+                                   const float* samples,
+                                   size_t num_samples,
+                                   const imp_audio_encode_opts_t* opts) {
+    (void)file_path; (void)samples; (void)num_samples; (void)opts;
+    return IMP_ERROR_INTERNAL;
+}
+
+#endif  // _WIN32
diff --git a/src/mpi/intel_mpi.h b/src/mpi/intel_mpi.h
index 3de44b21e3..4a4786f628 100644
--- a/src/mpi/intel_mpi.h
+++ b/src/mpi/intel_mpi.h
@@ -959,6 +959,22 @@ imp_status_t imp_tensor_get_shape(imp_tensor_t* tensor,
 imp_status_t imp_tensor_get_element_type(imp_tensor_t* tensor,
                                           imp_element_type_t* type);
 
+/**
+ * Get NV12 plane pointers and dimensions for CPU-side NV12 tensors.
+ *
+ * @param tensor     Tensor handle (must be format IMP_FORMAT_NV12, device CPU)
+ * @param y_data     Output: pointer to Y plane (width * height bytes)
+ * @param uv_data    Output: pointer to interleaved UV plane (width * height/2 bytes)
+ * @param width      Output: frame width in pixels
+ * @param height     Output: frame height in pixels
+ * @return IMP_OK on success, IMP_ERROR_INVALID_ARGUMENT if tensor is not NV12/CPU
+ */
+imp_status_t imp_tensor_get_nv12_planes(imp_tensor_t* tensor,
+                                        const uint8_t** y_data,
+                                        const uint8_t** uv_data,
+                                        int* width,
+                                        int* height);
+
 /**
  * Release tensor
  * 
diff --git a/src/test/mediapipe/calculators/BUILD b/src/test/mediapipe/calculators/BUILD
index a62497082b..ea9c9ece4e 100644
--- a/src/test/mediapipe/calculators/BUILD
+++ b/src/test/mediapipe/calculators/BUILD
@@ -95,3 +95,33 @@ cc_library(
     linkopts = LINKOPTS_ADJUSTED,
 )
 
+# Standalone smoke test: decode video with GStreamer + infer with OV directly
+# Run with:
+#   bazel run //src/test/mediapipe/calculators:video_decode_infer_test \
+#     -- /path/to/video.avi /path/to/model.xml
+cc_binary(
+    name = "video_decode_infer_test",
+    srcs = ["video_decode_infer_test.cpp"],
+    copts = COPTS_ADJUSTED,
+    deps = [
+        "//src/mpi:intel_mpi",
+        "//third_party:openvino",
+    ],
+    linkopts = LINKOPTS_ADJUSTED,
+)
+
+cc_library(
+    name = "video_decode_infer_calculator",
+    linkstatic = 1,
+    alwayslink = 1,
+    srcs = ["video_decode_infer_calculator.cc"],
+    copts = COPTS_ADJUSTED,
+    visibility = ["//visibility:public"],
+    deps = [
+        "//src/mpi:intel_mpi",
+        "@mediapipe_calculators//:mediapipe_calculators",
+        "//third_party:openvino",
+    ],
+    linkopts = LINKOPTS_ADJUSTED,
+)
+
diff --git a/src/test/mediapipe/calculators/video_decode_infer_calculator.cc b/src/test/mediapipe/calculators/video_decode_infer_calculator.cc
new file mode 100644
index 0000000000..a87c7601e1
--- /dev/null
+++ b/src/test/mediapipe/calculators/video_decode_infer_calculator.cc
@@ -0,0 +1,266 @@
+// Copyright (c) 2026 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+/**
+ * VideoDecodeInferCalculator — GStreamer integration smoke test.
+ *
+ * Reads a video file using Intel MPI (imp_* API), decodes frames to NV12,
+ * converts to BGR float32, and runs inference with face-detection-adas-0001
+ * via OpenVINO directly (not through an OVMS model instance).
+ *
+ * face-detection-adas-0001 expects: [1, 3, 384, 672]  BGR NCHW FP32
+ *
+ * Side-packet inputs:
+ *   VIDEO_PATH  : std::string — path to input video file
+ *   MODEL_PATH  : std::string — path to face-detection-adas-0001.xml
+ *
+ * Output stream:
+ *   DETECTIONS  : int — total detections above threshold across all frames
+ */
+
+#include <cstring>
+#include <iostream>
+#include <string>
+#include <vector>
+
+#include <openvino/openvino.hpp>
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+#include "mediapipe/framework/calculator_framework.h"
+#include "mediapipe/framework/port/status.h"
+#pragma GCC diagnostic pop
+
+#include "src/mpi/intel_mpi.h"
+
+namespace mediapipe {
+
+// ============================================================================
+// NV12 → BGR float32 conversion (plain C, no opencv dependency)
+//
+// Output: contiguous BGR row-major buffer [H×W×3] float32, values [0,1]
+// ============================================================================
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-function"
+static void nv12_to_bgr_float(const uint8_t* y_plane, const uint8_t* uv_plane,
+                               int src_w, int src_h,
+                               int dst_w, int dst_h,
+                               std::vector<float>& out_bgr) {
+    // Simple nearest-neighbour resize + NV12→BGR
+    out_bgr.resize(static_cast<size_t>(dst_h) * dst_w * 3);
+
+    const float scale_x = static_cast<float>(src_w) / dst_w;
+    const float scale_y = static_cast<float>(src_h) / dst_h;
+
+    for (int dy = 0; dy < dst_h; dy++) {
+        int sy = static_cast<int>(dy * scale_y);
+        if (sy >= src_h) sy = src_h - 1;
+
+        int uv_row = (sy / 2) * src_w;  // UV plane row offset
+
+        for (int dx = 0; dx < dst_w; dx++) {
+            int sx = static_cast<int>(dx * scale_x);
+            if (sx >= src_w) sx = src_w - 1;
+
+            // Y sample
+            float Y = static_cast<float>(y_plane[sy * src_w + sx]) - 16.0f;
+
+            // UV sample (interleaved: U at even index, V at odd)
+            int uv_col = (sx / 2) * 2;
+            float U = static_cast<float>(uv_plane[uv_row + uv_col])     - 128.0f;
+            float V = static_cast<float>(uv_plane[uv_row + uv_col + 1]) - 128.0f;
+
+            // BT.601 limited range
+            float r = 1.164f * Y + 1.596f * V;
+            float g = 1.164f * Y - 0.392f * U - 0.813f * V;
+            float b = 1.164f * Y + 2.017f * U;
+
+            // Clamp and normalize to [0,1]
+            auto clamp01 = [](float v) { return v < 0.0f ? 0.0f : (v > 255.0f ? 1.0f : v / 255.0f); };
+
+            size_t idx = static_cast<size_t>((dy * dst_w + dx) * 3);
+            out_bgr[idx + 0] = clamp01(b);
+            out_bgr[idx + 1] = clamp01(g);
+            out_bgr[idx + 2] = clamp01(r);
+        }
+    }
+}
+#pragma GCC diagnostic pop
+
+// ============================================================================
+// Calculator
+// ============================================================================
+
+class VideoDecodeInferCalculator : public CalculatorBase {
+ public:
+    static absl::Status GetContract(CalculatorContract* cc) {
+        cc->InputSidePackets().Tag("VIDEO_PATH").Set<std::string>();
+        cc->InputSidePackets().Tag("MODEL_PATH").Set<std::string>();
+        cc->Outputs().Tag("DETECTIONS").Set<int>();
+        return absl::OkStatus();
+    }
+
+    absl::Status Open(CalculatorContext* cc) override {
+        video_path_ = cc->InputSidePackets().Tag("VIDEO_PATH").Get<std::string>();
+        model_path_ = cc->InputSidePackets().Tag("MODEL_PATH").Get<std::string>();
+        return absl::OkStatus();
+    }
+
+    absl::Status Process(CalculatorContext* cc) override {
+        int total_detections = run_decode_infer();
+
+        cc->Outputs()
+            .Tag("DETECTIONS")
+            .AddPacket(MakePacket<int>(total_detections)
+                           .At(cc->InputTimestamp()));
+        return absl::OkStatus();
+    }
+
+ private:
+    std::string video_path_;
+    std::string model_path_;
+
+    // face-detection-adas-0001 input geometry
+    static constexpr int kModelH = 384;
+    static constexpr int kModelW = 672;
+
+    int run_decode_infer() {
+        // ----------------------------------------------------------------
+        // 1. Open standalone context (CPU, no GPU model needed)
+        // ----------------------------------------------------------------
+        imp_context_t* ctx = nullptr;
+        imp_status_t st = imp_context_create(&ctx, nullptr);
+        if (st != IMP_OK) {
+            LOG(ERROR) << "[VideoDecodeInferCalculator] imp_context_create failed: " << st;
+            return -1;
+        }
+
+        // ----------------------------------------------------------------
+        // 2. Open video — source dimensions come from the file
+        // ----------------------------------------------------------------
+        imp_video_source_t* src = nullptr;
+        imp_video_source_create(&src, IMP_SOURCE_FILE);
+        imp_video_source_set(src, "path", video_path_.c_str());
+
+        imp_video_stream_t* stream = nullptr;
+        st = imp_video_open(&stream, src, ctx, nullptr);
+        imp_video_source_destroy(src);
+
+        if (st != IMP_OK) {
+            LOG(ERROR) << "[VideoDecodeInferCalculator] imp_video_open failed: " << st
+                       << "  ctx_error: " << imp_context_get_error(ctx);
+            imp_context_destroy(ctx);
+            return -1;
+        }
+
+        uint32_t frame_w = 0, frame_h = 0;
+        imp_video_get_info(stream, &frame_w, &frame_h, nullptr, nullptr);
+        LOG(INFO) << "[VideoDecodeInferCalculator] Video: " << frame_w << "x" << frame_h;
+
+        // ----------------------------------------------------------------
+        // 3. Load face-detection-adas-0001 — expects [1,3,384,672] BGR FP32
+        // ----------------------------------------------------------------
+        ov::Core core;
+        ov::CompiledModel model;
+        try {
+            model = core.compile_model(model_path_, "CPU");
+        } catch (const std::exception& e) {
+            LOG(ERROR) << "[VideoDecodeInferCalculator] model load failed: " << e.what();
+            imp_video_close(stream);
+            imp_context_destroy(ctx);
+            return -1;
+        }
+
+        ov::InferRequest req = model.create_infer_request();
+        // Get actual model input shape
+        auto in_shape = model.input(0).get_shape();  // [1,3,H,W]
+        int model_h = static_cast<int>(in_shape[2]);
+        int model_w = static_cast<int>(in_shape[3]);
+        LOG(INFO) << "[VideoDecodeInferCalculator] Model input: "
+                  << model_w << "x" << model_h;
+
+        // ----------------------------------------------------------------
+        // 4. Decode loop
+        // ----------------------------------------------------------------
+        int frame_count      = 0;
+        int total_detections = 0;
+        const float detection_threshold = 0.5f;
+
+        std::vector<float> bgr_buf;
+
+        for (;;) {
+            imp_tensor_t* tensor = nullptr;
+            st = imp_video_read_frame(&tensor, stream, 0);
+            if (st == IMP_ERROR_STREAM_END) break;
+            if (st != IMP_OK || !tensor) break;
+
+            frame_count++;
+
+            // Get NV12 plane pointers via public API (tensor struct is opaque)
+            const uint8_t* y_ptr  = nullptr;
+            const uint8_t* uv_ptr = nullptr;
+            int fw = 0, fh = 0;
+            if (imp_tensor_get_nv12_planes(tensor, &y_ptr, &uv_ptr, &fw, &fh) != IMP_OK)
+                continue;
+
+            // NV12 → BGR float, resize to model input
+            nv12_to_bgr_float(y_ptr, uv_ptr,
+                               fw, fh,
+                               model_w, model_h,
+                               bgr_buf);
+
+            // NHWC → NCHW transpose
+            std::vector<float> nchw(bgr_buf.size());
+            for (int c = 0; c < 3; c++)
+                for (int y = 0; y < model_h; y++)
+                    for (int x = 0; x < model_w; x++)
+                        nchw[c * model_h * model_w + y * model_w + x] =
+                            bgr_buf[(y * model_w + x) * 3 + c];
+
+            ov::Shape shape = {1, 3,
+                               static_cast<size_t>(model_h),
+                               static_cast<size_t>(model_w)};
+            ov::Tensor input_tensor(ov::element::f32, shape, nchw.data());
+            req.set_input_tensor(input_tensor);
+            req.infer();
+
+            // Output: [1,1,N,7]  columns: [img_id, label, conf, x1,y1,x2,y2]
+            auto out = req.get_output_tensor(0);
+            const float* det = out.data<const float>();
+            size_t num_dets = out.get_shape()[2];
+
+            int frame_dets = 0;
+            for (size_t i = 0; i < num_dets; i++) {
+                float conf = det[i * 7 + 2];
+                if (conf > detection_threshold) frame_dets++;
+            }
+            total_detections += frame_dets;
+
+            if (frame_count <= 5 || frame_count % 50 == 0)
+                LOG(INFO) << "[VideoDecodeInferCalculator] frame " << frame_count
+                          << " detections=" << frame_dets;
+        }
+
+        LOG(INFO) << "[VideoDecodeInferCalculator] Done: " << frame_count
+                  << " frames, total detections=" << total_detections;
+
+        imp_video_close(stream);
+        imp_context_destroy(ctx);
+        return total_detections;
+    }
+};
+
+REGISTER_CALCULATOR(VideoDecodeInferCalculator);
+
+}  // namespace mediapipe
diff --git a/src/test/mediapipe/calculators/video_decode_infer_test.cpp b/src/test/mediapipe/calculators/video_decode_infer_test.cpp
new file mode 100644
index 0000000000..c6cc1e0934
--- /dev/null
+++ b/src/test/mediapipe/calculators/video_decode_infer_test.cpp
@@ -0,0 +1,202 @@
+// Copyright (c) 2025 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+/**
+ * video_decode_infer_test — standalone end-to-end smoke test.
+ *
+ * Usage:
+ *   ./video_decode_infer_test <video_file> <model_xml>
+ *
+ * Returns exit code 0 if at least one detection found across all frames,
+ * non-zero otherwise.
+ */
+
+#include <cstring>
+#include <iostream>
+#include <string>
+#include <vector>
+
+#include <openvino/openvino.hpp>
+
+#include "src/mpi/intel_mpi.h"
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-function"
+// ============================================================================
+// NV12 → BGR float32 (nearest-neighbour resize, BT.601 limited range)
+// Output: NCHW [3 × H × W] float, values [0,1]
+// ============================================================================
+static void nv12_to_bgr_float_nchw(const uint8_t* y_plane,
+                                    const uint8_t* uv_plane,
+                                    int src_w, int src_h,
+                                    int dst_w, int dst_h,
+                                    std::vector<float>& out_nchw) {
+    out_nchw.resize(static_cast<size_t>(3) * dst_h * dst_w);
+
+    const float scale_x = static_cast<float>(src_w) / dst_w;
+    const float scale_y = static_cast<float>(src_h) / dst_h;
+
+    // Pointers to B/G/R planes in NCHW output
+    float* B = out_nchw.data();
+    float* G = B + static_cast<size_t>(dst_h) * dst_w;
+    float* R = G + static_cast<size_t>(dst_h) * dst_w;
+
+    for (int dy = 0; dy < dst_h; dy++) {
+        int sy = static_cast<int>(dy * scale_y);
+        if (sy >= src_h) sy = src_h - 1;
+        int uv_row = (sy / 2) * src_w;
+
+        for (int dx = 0; dx < dst_w; dx++) {
+            int sx = static_cast<int>(dx * scale_x);
+            if (sx >= src_w) sx = src_w - 1;
+
+            float Y = static_cast<float>(y_plane[sy * src_w + sx]) - 16.0f;
+
+            int uv_col = (sx / 2) * 2;
+            float U = static_cast<float>(uv_plane[uv_row + uv_col])     - 128.0f;
+            float V = static_cast<float>(uv_plane[uv_row + uv_col + 1]) - 128.0f;
+
+            float r = (1.164f * Y + 1.596f * V);
+            float g = (1.164f * Y - 0.392f * U - 0.813f * V);
+            float b = (1.164f * Y + 2.017f * U);
+
+            auto clamp255 = [](float v) { return v < 0.f ? 0.f : (v > 255.f ? 255.f : v); };
+
+            size_t pix = static_cast<size_t>(dy) * dst_w + dx;
+            B[pix] = clamp255(b);
+            G[pix] = clamp255(g);
+            R[pix] = clamp255(r);
+        }
+    }
+}
+#pragma GCC diagnostic pop
+
+int main(int argc, char* argv[]) {
+    if (argc < 3) {
+        std::cerr << "Usage: " << argv[0] << " <video_file> <model_xml>\n";
+        return 2;
+    }
+    const char* video_path = argv[1];
+    const char* model_path = argv[2];
+
+    // ---- Open standalone context -----------------------------------------
+    imp_context_t* ctx = nullptr;
+    imp_status_t st = imp_context_create(&ctx, nullptr);
+    if (st != IMP_OK) {
+        std::cerr << "imp_context_create failed: " << st << "\n";
+        return 1;
+    }
+
+    // ---- Open video ---------------------------------------------------------
+    imp_video_source_t* src = nullptr;
+    imp_video_source_create(&src, IMP_SOURCE_FILE);
+    imp_video_source_set(src, "path", video_path);
+
+    imp_video_stream_t* stream = nullptr;
+    st = imp_video_open(&stream, src, ctx, nullptr);
+    imp_video_source_destroy(src);
+
+    if (st != IMP_OK) {
+        std::cerr << "imp_video_open failed: " << st
+                  << "  (" << imp_context_get_error(ctx) << ")\n";
+        imp_context_destroy(ctx);
+        return 1;
+    }
+
+    uint32_t frame_w = 0, frame_h = 0;
+    imp_video_get_info(stream, &frame_w, &frame_h, nullptr, nullptr);
+    std::cout << "Video opened: " << frame_w << "x" << frame_h << "\n";
+
+    // ---- Load model ---------------------------------------------------------
+    ov::Core core;
+    ov::CompiledModel model;
+    try {
+        model = core.compile_model(model_path, "CPU");
+    } catch (const std::exception& e) {
+        std::cerr << "model compile failed: " << e.what() << "\n";
+        imp_video_close(stream);
+        imp_context_destroy(ctx);
+        return 1;
+    }
+
+    auto in_shape = model.input(0).get_shape();  // [1,3,H,W]
+    int model_h   = static_cast<int>(in_shape[2]);
+    int model_w   = static_cast<int>(in_shape[3]);
+    std::cout << "Model input: " << model_w << "x" << model_h << "\n";
+
+    ov::InferRequest req = model.create_infer_request();
+
+    // ---- Decode + infer loop ------------------------------------------------
+    int frame_count      = 0;
+    int total_detections = 0;
+    const float threshold = 0.5f;
+    std::vector<float> nchw_buf;
+
+    for (;;) {
+        imp_tensor_t* tensor = nullptr;
+        st = imp_video_read_frame(&tensor, stream, 0);
+        if (st == IMP_ERROR_STREAM_END) break;
+        if (st != IMP_OK || !tensor) {
+            std::cerr << "read_frame failed: " << st << "\n";
+            break;
+        }
+
+        frame_count++;
+
+        const uint8_t* y_ptr  = nullptr;
+        const uint8_t* uv_ptr = nullptr;
+        int fw = 0, fh = 0;
+        imp_tensor_get_nv12_planes(tensor, &y_ptr, &uv_ptr, &fw, &fh);
+
+        nv12_to_bgr_float_nchw(y_ptr, uv_ptr,
+                                fw, fh,
+                                model_w, model_h, nchw_buf);
+
+        ov::Tensor input_tensor(ov::element::f32,
+                                {1, 3,
+                                 static_cast<size_t>(model_h),
+                                 static_cast<size_t>(model_w)},
+                                nchw_buf.data());
+        req.set_input_tensor(input_tensor);
+        req.infer();
+
+        auto out = req.get_output_tensor(0);
+        const float* det  = out.data<const float>();
+        size_t num_dets   = out.get_shape()[2];
+
+        int frame_dets = 0;
+        for (size_t i = 0; i < num_dets; i++) {
+            float conf = det[i * 7 + 2];
+            if (conf > threshold) frame_dets++;
+        }
+        total_detections += frame_dets;
+
+        if (frame_count <= 5 || frame_count % 50 == 0)
+            std::cout << "  frame " << frame_count
+                      << "  detections=" << frame_dets << "\n";
+    }
+
+    std::cout << "Done: " << frame_count << " frames, "
+              << "total detections=" << total_detections << "\n";
+
+    imp_video_close(stream);
+    imp_context_destroy(ctx);
+
+    if (total_detections < 1) {
+        std::cerr << "FAIL: expected at least 1 detection\n";
+        return 1;
+    }
+    std::cout << "PASS\n";
+    return 0;
+}
diff --git a/third_party/BUILD b/third_party/BUILD
index dcde0bd7e4..e9c08dd0b4 100644
--- a/third_party/BUILD
+++ b/third_party/BUILD
@@ -58,4 +58,13 @@ alias(
         "//conditions:default": "@linux_curl//:curl",
     }),
     visibility = ["//visibility:public"],
+)
+
+alias(
+    name = "gstreamer",
+    actual = select({
+        "//src:windows": "@windows_gstreamer//:gstreamer",
+        "//conditions:default": "@linux_gstreamer//:gstreamer_headers",
+    }),
+    visibility = ["//visibility:public"],
 )
\ No newline at end of file
diff --git a/third_party/gstreamer/gstreamer_linux.BUILD b/third_party/gstreamer/gstreamer_linux.BUILD
new file mode 100644
index 0000000000..4efc0a6b56
--- /dev/null
+++ b/third_party/gstreamer/gstreamer_linux.BUILD
@@ -0,0 +1,53 @@
+#
+# Copyright (c) 2025 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# GStreamer headers for Linux.
+#
+# GStreamer is built from source in the gstreamer-builder Docker stage
+# and installed to /opt/gstreamer (prefix).  The glib headers are copied
+# into the same prefix so the tree is self-contained.
+#
+# Layout expected under /opt/gstreamer:
+#   include/gstreamer-1.0/      GStreamer core + plugins headers
+#   include/glib-2.0/           GLib/GObject headers (copied from system)
+#   lib/gstreamer-1.0/include/  per-plugin generated headers (if any)
+#
+# NOTE: This target provides HEADERS ONLY — no linkopts.
+# src/mpi:gst_loader dlopens the shared libs at runtime so the OVMS
+# binary starts without GStreamer present on end-user systems.
+
+package(default_visibility = ["//visibility:public"])
+
+cc_library(
+    name = "gstreamer_headers",
+    hdrs = glob([
+        "include/gstreamer-1.0/**/*.h",
+        "include/glib-2.0/**/*.h",
+        "lib/gstreamer-1.0/include/**/*.h",
+    ], allow_empty = True),
+    includes = [
+        "include/gstreamer-1.0",
+        "include/glib-2.0",
+        "lib/gstreamer-1.0/include",
+    ],
+    visibility = ["//visibility:public"],
+)
+
+cc_library(
+    name = "gstreamer",
+    deps = [":gstreamer_headers"],
+    visibility = ["//visibility:public"],
+)