diff --git a/src/runtime/HalideRuntimeVulkan.h b/src/runtime/HalideRuntimeVulkan.h index e150b7c6d00b..8583f69ec5e9 100644 --- a/src/runtime/HalideRuntimeVulkan.h +++ b/src/runtime/HalideRuntimeVulkan.h @@ -105,6 +105,64 @@ extern int halide_vulkan_release_context(void *user_context, VkDevice device, VkQueue queue, VkDebugUtilsMessengerEXT messenger); + +typedef int (*halide_vulkan_acquire_context_t)(void *user_context, + struct halide_vulkan_memory_allocator **allocator, + VkInstance *instance, + VkDevice *device, + VkPhysicalDevice *physical_device, + VkQueue *queue, + uint32_t *queue_family_index, + VkDebugUtilsMessengerEXT *messenger, + bool create); +typedef int (*halide_vulkan_release_context_t)(void *user_context, + VkInstance instance, + VkDevice device, + VkQueue queue, + VkDebugUtilsMessengerEXT messenger); + +/** Override the Vulkan context acquisition callback. Returns the previous + * handler. If unset, Halide uses its built-in Vulkan context management. + */ +extern halide_vulkan_acquire_context_t halide_set_vulkan_acquire_context(halide_vulkan_acquire_context_t handler); + +/** Override the Vulkan context release callback. Returns the previous handler. */ +extern halide_vulkan_release_context_t halide_set_vulkan_release_context(halide_vulkan_release_context_t handler); + +/** Ensure a Halide Vulkan memory allocator exists for an externally-managed + * Vulkan context. Intended for embedders that override + * halide_vulkan_acquire_context()/halide_vulkan_release_context(). + * + * The embedder should store the returned allocator with the same object that + * owns the external context, return it from later acquire-context calls for + * that context, and release it when that external context is torn down. + * + * This call refreshes Halide's Vulkan dispatch tables for the supplied + * instance/device. If `*allocator` is null, a new allocator bound to + * `device`/`physical_device` is created and stored back. If `*allocator` is + * non-null, it must already be bound to the supplied device. + */ +extern int halide_vulkan_acquire_memory_allocator(void *user_context, + struct halide_vulkan_memory_allocator **allocator, + VkInstance instance, + VkDevice device, + VkPhysicalDevice physical_device); + +/** Destroy a Halide Vulkan memory allocator created for an externally-managed + * Vulkan context after the embedder has ensured no in-flight Halide work is + * using it. This only releases Halide-owned allocator and shader-module state; + * it does not destroy the Vulkan instance, device, queue, or any + * embedder-owned debug messenger. + * + * This call refreshes Halide's Vulkan dispatch tables for the supplied + * instance/device. The supplied device and physical_device must match the + * allocator's context. + */ +extern int halide_vulkan_release_memory_allocator(void *user_context, + struct halide_vulkan_memory_allocator *allocator, + VkInstance instance, + VkDevice device, + VkPhysicalDevice physical_device); // -- // Override the default allocation callbacks (default uses Vulkan runtime implementation) diff --git a/src/runtime/gpu_context_common.h b/src/runtime/gpu_context_common.h index 618c6abb8da9..75b131899dd1 100644 --- a/src/runtime/gpu_context_common.h +++ b/src/runtime/gpu_context_common.h @@ -127,7 +127,7 @@ class GPUCompilationCache { } for (int i = 0; i < (1 << log2_compilations_size); i++) { - if (compilations[i].kernel_id > kInvalidId && + if (compilations[i].kernel_id > kDeletedId && (all || (compilations[i].context == context)) && compilations[i].use_count == 0) { debug(user_context) << "Releasing cached compilation: " << compilations[i].module_state diff --git a/src/runtime/runtime_api.cpp b/src/runtime/runtime_api.cpp index 734af982bf91..8c2acd8b6820 100644 --- a/src/runtime/runtime_api.cpp +++ b/src/runtime/runtime_api.cpp @@ -213,10 +213,14 @@ extern "C" __attribute__((used)) void *halide_runtime_api_functions[] = { (void *)&halide_d3d12compute_release_context, (void *)&halide_d3d12compute_run, (void *)&halide_vulkan_acquire_context, + (void *)&halide_vulkan_acquire_memory_allocator, (void *)&halide_vulkan_device_interface, (void *)&halide_vulkan_initialize_kernels, + (void *)&halide_vulkan_release_memory_allocator, (void *)&halide_vulkan_release_context, (void *)&halide_vulkan_run, + (void *)&halide_set_vulkan_acquire_context, + (void *)&halide_set_vulkan_release_context, (void *)&halide_webgpu_device_interface, (void *)&halide_webgpu_initialize_kernels, (void *)&halide_webgpu_finalize_kernels, diff --git a/src/runtime/vulkan.cpp b/src/runtime/vulkan.cpp index f44b60b90e94..ff2f77f941f3 100644 --- a/src/runtime/vulkan.cpp +++ b/src/runtime/vulkan.cpp @@ -13,9 +13,34 @@ using namespace Halide::Runtime::Internal::Vulkan; // -------------------------------------------------------------------------- -extern "C" { +namespace Halide { +namespace Runtime { +namespace Internal { +namespace Vulkan { -// -------------------------------------------------------------------------- +ALWAYS_INLINE int vk_load_external_context_functions(void *user_context, VkInstance instance, VkDevice device) { + if (vkGetInstanceProcAddr == nullptr) { + vk_load_vulkan_loader_functions(user_context); + if (vkGetInstanceProcAddr == nullptr) { + error(user_context) << "Vulkan: Failed to resolve loader functions for external context!\n"; + return halide_error_code_symbol_not_found; + } + } + + vk_load_vulkan_instance_functions(user_context, instance); + if (vkGetPhysicalDeviceProperties == nullptr || vkGetDeviceProcAddr == nullptr) { + error(user_context) << "Vulkan: Failed to resolve instance functions for external context!\n"; + return halide_error_code_symbol_not_found; + } + + vk_load_vulkan_device_functions(user_context, device); + if (vkCreateBuffer == nullptr || vkAllocateMemory == nullptr) { + error(user_context) << "Vulkan: Failed to resolve device functions for external context!\n"; + return halide_error_code_symbol_not_found; + } + + return halide_error_code_success; +} // The default implementation of halide_acquire_vulkan_context uses // the global pointers above, and serializes access with a spin lock. @@ -29,15 +54,15 @@ extern "C" { // call to halide_release_vulkan_context. halide_acquire_vulkan_context // should block while a previous call (if any) has not yet been // released via halide_release_vulkan_context. -WEAK int halide_vulkan_acquire_context(void *user_context, - halide_vulkan_memory_allocator **allocator, - VkInstance *instance, - VkDevice *device, - VkPhysicalDevice *physical_device, - VkQueue *queue, - uint32_t *queue_family_index, - VkDebugUtilsMessengerEXT *messenger, - bool create) { +WEAK int default_vulkan_acquire_context(void *user_context, + halide_vulkan_memory_allocator **allocator, + VkInstance *instance, + VkDevice *device, + VkPhysicalDevice *physical_device, + VkQueue *queue, + uint32_t *queue_family_index, + VkDebugUtilsMessengerEXT *messenger, + bool create) { #ifdef DEBUG_RUNTIME halide_start_clock(user_context); #endif @@ -74,11 +99,133 @@ WEAK int halide_vulkan_acquire_context(void *user_context, return halide_error_code_success; } -WEAK int halide_vulkan_release_context(void *user_context, VkInstance instance, VkDevice device, VkQueue queue, VkDebugUtilsMessengerEXT messenger) { +WEAK int default_vulkan_release_context(void *user_context, VkInstance instance, VkDevice device, VkQueue queue, VkDebugUtilsMessengerEXT messenger) { halide_mutex_unlock(&thread_lock); return halide_error_code_success; } +WEAK halide_vulkan_acquire_context_t vulkan_acquire_context_handler = + default_vulkan_acquire_context; +WEAK halide_vulkan_release_context_t vulkan_release_context_handler = + default_vulkan_release_context; + +} // namespace Vulkan +} // namespace Internal +} // namespace Runtime +} // namespace Halide + +// -------------------------------------------------------------------------- + +extern "C" { + +// -------------------------------------------------------------------------- + +WEAK int halide_vulkan_acquire_context(void *user_context, + halide_vulkan_memory_allocator **allocator, + VkInstance *instance, + VkDevice *device, + VkPhysicalDevice *physical_device, + VkQueue *queue, + uint32_t *queue_family_index, + VkDebugUtilsMessengerEXT *messenger, + bool create) { + return vulkan_acquire_context_handler(user_context, allocator, instance, device, + physical_device, queue, queue_family_index, + messenger, create); +} + +WEAK int halide_vulkan_release_context(void *user_context, VkInstance instance, VkDevice device, VkQueue queue, VkDebugUtilsMessengerEXT messenger) { + return vulkan_release_context_handler(user_context, instance, device, queue, messenger); +} + +WEAK halide_vulkan_acquire_context_t halide_set_vulkan_acquire_context(halide_vulkan_acquire_context_t handler) { + halide_vulkan_acquire_context_t result = vulkan_acquire_context_handler; + vulkan_acquire_context_handler = handler ? handler : default_vulkan_acquire_context; + return result; +} + +WEAK halide_vulkan_release_context_t halide_set_vulkan_release_context(halide_vulkan_release_context_t handler) { + halide_vulkan_release_context_t result = vulkan_release_context_handler; + vulkan_release_context_handler = handler ? handler : default_vulkan_release_context; + return result; +} + +WEAK int halide_vulkan_acquire_memory_allocator(void *user_context, + halide_vulkan_memory_allocator **allocator, + VkInstance instance, + VkDevice device, + VkPhysicalDevice physical_device) { + if (allocator == nullptr) { + error(user_context) << "Vulkan: allocator output pointer is null!\n"; + return halide_error_code_buffer_argument_is_null; + } + if (instance == VK_NULL_HANDLE || device == VK_NULL_HANDLE || physical_device == VK_NULL_HANDLE) { + error(user_context) << "Vulkan: invalid external context handles for allocator acquisition!\n"; + return halide_error_code_device_interface_no_device; + } + + int error_code = vk_load_external_context_functions(user_context, instance, device); + if (error_code != halide_error_code_success) { + return error_code; + } + + VulkanMemoryAllocator *runtime_allocator = + reinterpret_cast(*allocator); + if (runtime_allocator != nullptr) { + if (runtime_allocator->current_device() != device || + runtime_allocator->current_physical_device() != physical_device) { + error(user_context) << "Vulkan: external allocator does not match supplied device handles!\n"; + return halide_error_code_internal_error; + } + return halide_error_code_success; + } + + const VkAllocationCallbacks *alloc_callbacks = + halide_vulkan_get_allocation_callbacks(user_context); + runtime_allocator = + vk_create_memory_allocator(user_context, device, physical_device, alloc_callbacks); + if (runtime_allocator == nullptr) { + error(user_context) << "Vulkan: Failed to create memory allocator for external context!\n"; + return halide_error_code_out_of_memory; + } + + *allocator = reinterpret_cast(runtime_allocator); + return halide_error_code_success; +} + +WEAK int halide_vulkan_release_memory_allocator(void *user_context, + halide_vulkan_memory_allocator *allocator, + VkInstance instance, + VkDevice device, + VkPhysicalDevice physical_device) { + VulkanMemoryAllocator *runtime_allocator = + reinterpret_cast(allocator); + if (runtime_allocator == nullptr) { + return halide_error_code_success; + } + if (instance == VK_NULL_HANDLE || device == VK_NULL_HANDLE || physical_device == VK_NULL_HANDLE) { + error(user_context) << "Vulkan: invalid external context handles for allocator release!\n"; + return halide_error_code_device_interface_no_device; + } + if (runtime_allocator->current_device() != device || + runtime_allocator->current_physical_device() != physical_device) { + error(user_context) << "Vulkan: external allocator does not match supplied device handles during release!\n"; + return halide_error_code_internal_error; + } + + int error_code = vk_load_external_context_functions(user_context, instance, device); + if (error_code != halide_error_code_success) { + return error_code; + } + if (vkDestroyShaderModule == nullptr || vkFreeMemory == nullptr) { + error(user_context) << "Vulkan: Failed to resolve device functions for external allocator release!\n"; + return halide_error_code_symbol_not_found; + } + + vk_destroy_shader_modules(user_context, runtime_allocator); + return vk_destroy_memory_allocator(user_context, runtime_allocator); +} + WEAK bool halide_vulkan_is_initialized() { halide_mutex_lock(&thread_lock); bool is_initialized = (cached_instance != nullptr) && (cached_device != nullptr); @@ -159,7 +306,7 @@ WEAK int halide_vulkan_initialize_kernels(void *user_context, void **state_ptr, debug(user_context) << "halide_vulkan_initialize_kernels got compilation_cache mutex.\n"; VulkanCompilationCacheEntry *cache_entry = nullptr; - if (!compilation_cache.kernel_state_setup(user_context, state_ptr, ctx.device, cache_entry, + if (!compilation_cache.kernel_state_setup(user_context, state_ptr, ctx.allocator, cache_entry, Halide::Runtime::Internal::Vulkan::vk_compile_kernel_module, user_context, ctx.allocator, src, size)) { error(user_context) << "Vulkan: Failed to setup compilation cache!\n"; @@ -185,7 +332,7 @@ WEAK void halide_vulkan_finalize_kernels(void *user_context, void *state_ptr) { VulkanContext ctx(user_context); if (ctx.error == halide_error_code_success) { - compilation_cache.release_hold(user_context, ctx.device, state_ptr); + compilation_cache.release_hold(user_context, ctx.allocator, state_ptr); } #ifdef DEBUG_RUNTIME @@ -1151,7 +1298,7 @@ WEAK int halide_vulkan_run(void *user_context, // 1. Get the shader module cache entry VulkanCompilationCacheEntry *cache_entry = nullptr; - bool found = compilation_cache.lookup(ctx.device, state_ptr, cache_entry); + bool found = compilation_cache.lookup(ctx.allocator, state_ptr, cache_entry); if (!found || (cache_entry == nullptr)) { error(user_context) << "Vulkan: Failed to locate shader module! Unable to proceed!\n"; return halide_error_code_internal_error; diff --git a/src/runtime/vulkan_resources.h b/src/runtime/vulkan_resources.h index d2ef2ee5ba6f..50e4e88a6be6 100644 --- a/src/runtime/vulkan_resources.h +++ b/src/runtime/vulkan_resources.h @@ -72,7 +72,7 @@ struct VulkanCompilationCacheEntry { uint32_t module_count = 0; }; -WEAK Halide::Internal::GPUCompilationCache compilation_cache; +WEAK Halide::Internal::GPUCompilationCache compilation_cache; // -------------------------------------------------------------------------- @@ -1665,22 +1665,6 @@ void vk_destroy_compiled_shader_module(VulkanCompiledShaderModule *shader_module return; } - if (shader_module->descriptor_set_layouts) { - for (uint32_t n = 0; n < shader_module->shader_count; n++) { - debug(user_context) << " destroying descriptor set layout [" << n << "] " << shader_module->descriptor_set_layouts[n] << "\n"; - vk_destroy_descriptor_set_layout(user_context, allocator, shader_module->descriptor_set_layouts[n]); - shader_module->descriptor_set_layouts[n] = VK_NULL_HANDLE; - } - debug(user_context) << " destroying descriptor set layout " << (void *)shader_module->descriptor_set_layouts << "\n"; - vk_host_free(user_context, shader_module->descriptor_set_layouts, allocator->callbacks()); - shader_module->descriptor_set_layouts = nullptr; - } - if (shader_module->pipeline_layout) { - debug(user_context) << " destroying pipeline layout " << (void *)shader_module->pipeline_layout << "\n"; - vk_destroy_pipeline_layout(user_context, allocator, shader_module->pipeline_layout); - shader_module->pipeline_layout = VK_NULL_HANDLE; - } - if (shader_module->shader_bindings) { #ifdef DEBUG_RUNTIME debug(user_context) @@ -1688,6 +1672,13 @@ void vk_destroy_compiled_shader_module(VulkanCompiledShaderModule *shader_module << "shader_module: " << shader_module << ", " << "shader_bindings: " << shader_module->shader_bindings << ")\n"; #endif + for (uint32_t n = 0; n < shader_module->shader_count; n++) { + if (shader_module->shader_bindings[n].compute_pipeline) { + debug(user_context) << " destroying shader binding compute pipeline [" << n << "]\n"; + vk_destroy_compute_pipeline(user_context, allocator, shader_module->shader_bindings[n].compute_pipeline); + shader_module->shader_bindings[n].compute_pipeline = VK_NULL_HANDLE; + } + } for (uint32_t n = 0; n < shader_module->shader_count; n++) { debug(user_context) << " destroying shader binding [" << n << "] "; if (shader_module->shader_bindings[n].entry_point_name) { @@ -1717,15 +1708,25 @@ void vk_destroy_compiled_shader_module(VulkanCompiledShaderModule *shader_module vk_host_free(user_context, shader_module->shader_bindings[n].shared_memory_allocations, allocator->callbacks()); shader_module->shader_bindings[n].shared_memory_allocations = nullptr; } - if (shader_module->shader_bindings[n].compute_pipeline) { - debug(user_context) << " destroying shader binding compute pipeline [" << n << "]\n"; - vk_destroy_compute_pipeline(user_context, allocator, shader_module->shader_bindings[n].compute_pipeline); - shader_module->shader_bindings[n].compute_pipeline = VK_NULL_HANDLE; - } } vk_host_free(user_context, shader_module->shader_bindings, allocator->callbacks()); shader_module->shader_bindings = nullptr; } + if (shader_module->pipeline_layout) { + debug(user_context) << " destroying pipeline layout " << (void *)shader_module->pipeline_layout << "\n"; + vk_destroy_pipeline_layout(user_context, allocator, shader_module->pipeline_layout); + shader_module->pipeline_layout = VK_NULL_HANDLE; + } + if (shader_module->descriptor_set_layouts) { + for (uint32_t n = 0; n < shader_module->shader_count; n++) { + debug(user_context) << " destroying descriptor set layout [" << n << "] " << shader_module->descriptor_set_layouts[n] << "\n"; + vk_destroy_descriptor_set_layout(user_context, allocator, shader_module->descriptor_set_layouts[n]); + shader_module->descriptor_set_layouts[n] = VK_NULL_HANDLE; + } + debug(user_context) << " destroying descriptor set layout " << (void *)shader_module->descriptor_set_layouts << "\n"; + vk_host_free(user_context, shader_module->descriptor_set_layouts, allocator->callbacks()); + shader_module->descriptor_set_layouts = nullptr; + } if (shader_module->shader_module) { debug(user_context) << " destroying shader module " << (void *)shader_module->shader_module << "\n"; vkDestroyShaderModule(allocator->current_device(), shader_module->shader_module, allocator->callbacks()); @@ -1778,7 +1779,7 @@ int vk_destroy_shader_modules(void *user_context, VulkanMemoryAllocator *allocat uint64_t t_before = halide_current_time_ns(user_context); #endif if (allocator != nullptr) { - compilation_cache.delete_context(user_context, allocator->current_device(), vk_destroy_compilation_cache_entry); + compilation_cache.delete_context(user_context, allocator, vk_destroy_compilation_cache_entry); } #ifdef DEBUG_RUNTIME diff --git a/test/generator/acquire_release_aottest.cpp b/test/generator/acquire_release_aottest.cpp index 16b27d988aa7..1d0049c33324 100644 --- a/test/generator/acquire_release_aottest.cpp +++ b/test/generator/acquire_release_aottest.cpp @@ -143,6 +143,109 @@ extern "C" int halide_webgpu_release_context(void *user_context) { } #define HAS_MULTIPLE_CONTEXTS true +#elif defined(TEST_VULKAN) + +#include "HalideRuntimeVulkan.h" + +struct gpu_context { + halide_vulkan_memory_allocator *allocator = nullptr; + VkInstance instance = nullptr; + VkDevice device = nullptr; + VkPhysicalDevice physical_device = nullptr; + VkQueue queue = nullptr; + uint32_t queue_family_index = 0; + VkDebugUtilsMessengerEXT messenger = nullptr; + halide_vulkan_acquire_context_t previous_acquire = nullptr; + halide_vulkan_release_context_t previous_release = nullptr; + int acquires = 0; + int releases = 0; +} vulkan_context; + +extern "C" int custom_vulkan_acquire_context(void *user_context, + halide_vulkan_memory_allocator **allocator, + VkInstance *instance, + VkDevice *device, + VkPhysicalDevice *physical_device, + VkQueue *queue, + uint32_t *queue_family_index, + VkDebugUtilsMessengerEXT *messenger, + bool create) { + if (vulkan_context.allocator == nullptr || vulkan_context.instance == nullptr) { + return halide_error_code_device_interface_no_device; + } + *allocator = vulkan_context.allocator; + *instance = vulkan_context.instance; + *device = vulkan_context.device; + *physical_device = vulkan_context.physical_device; + *queue = vulkan_context.queue; + *queue_family_index = vulkan_context.queue_family_index; + *messenger = vulkan_context.messenger; + vulkan_context.acquires++; + return halide_error_code_success; +} + +extern "C" int custom_vulkan_release_context(void *user_context, + VkInstance instance, + VkDevice device, + VkQueue queue, + VkDebugUtilsMessengerEXT messenger) { + vulkan_context.releases++; + return halide_error_code_success; +} + +bool init_context() { + halide_vulkan_memory_allocator *default_allocator = nullptr; + // Use Halide's built-in Vulkan setup to get real handles, then exercise the + // same callback and allocator path an embedder would use for its own context. + int result = halide_vulkan_acquire_context(nullptr, + &default_allocator, + &vulkan_context.instance, + &vulkan_context.device, + &vulkan_context.physical_device, + &vulkan_context.queue, + &vulkan_context.queue_family_index, + &vulkan_context.messenger, + true); + if (result != halide_error_code_success) { + printf("[SKIP] Failed to create Vulkan context: %d\n", result); + return false; + } + halide_vulkan_release_context(nullptr, vulkan_context.instance, vulkan_context.device, + vulkan_context.queue, vulkan_context.messenger); + + result = halide_vulkan_acquire_memory_allocator(nullptr, + &vulkan_context.allocator, + vulkan_context.instance, + vulkan_context.device, + vulkan_context.physical_device); + if (result != halide_error_code_success) { + printf("Failed to create external Vulkan allocator: %d\n", result); + return false; + } + + vulkan_context.previous_acquire = halide_set_vulkan_acquire_context(custom_vulkan_acquire_context); + vulkan_context.previous_release = halide_set_vulkan_release_context(custom_vulkan_release_context); + return true; +} + +void destroy_context() { + int result = halide_vulkan_release_memory_allocator(nullptr, + vulkan_context.allocator, + vulkan_context.instance, + vulkan_context.device, + vulkan_context.physical_device); + if (result != halide_error_code_success) { + printf("Failed to release external Vulkan allocator: %d\n", result); + } + assert(vulkan_context.acquires == vulkan_context.releases); + vulkan_context.allocator = nullptr; + + halide_set_vulkan_acquire_context(vulkan_context.previous_acquire); + halide_set_vulkan_release_context(vulkan_context.previous_release); + halide_device_release(nullptr, halide_vulkan_device_interface()); + + vulkan_context = gpu_context{}; +} #else // Just use the default implementation of acquire/release. bool init_context() { @@ -194,10 +297,14 @@ bool run_test() { output.device_free(); if (interface != nullptr) { +#if defined(TEST_VULKAN) + destroy_context(); +#else halide_device_release(nullptr, interface); // Free the context we created. destroy_context(); +#endif } else { printf("Device interface is nullptr.\n"); } @@ -207,9 +314,6 @@ bool run_test() { } int main(int argc, char **argv) { -#if defined(TEST_VULKAN) - printf("[SKIP] Vulkan doesn't implement a custom context for this test.\n"); -#else if (!run_test()) { return 1; } @@ -217,7 +321,6 @@ int main(int argc, char **argv) { if (!run_test()) { return 1; } -#endif return 0; }