diff --git a/src/runtime/metal/metal_device_api.mm b/src/runtime/metal/metal_device_api.mm index 2a44d98f109f..93351b754bd7 100644 --- a/src/runtime/metal/metal_device_api.mm +++ b/src/runtime/metal/metal_device_api.mm @@ -24,12 +24,91 @@ #include #include #include + +#include +#include +#include +#include + #include "metal_common.h" namespace tvm { namespace runtime { namespace metal { +/*! + * \brief Resolve the storage mode used for device data buffers from the + * TVM_METAL_STORAGE_MODE environment variable (read once, cached). + * + * Accepted values (case-insensitive): + * - "private" (default; backward compatible) -> MTLResourceStorageModePrivate + * - "shared" -> MTLResourceStorageModeShared + * - "managed" (macOS discrete GPUs only) -> MTLResourceStorageModeManaged + * + * Why an environment variable instead of a CMake flag or a runtime API: + * - CMake flag would require rebuilding TVM to flip behaviour, which makes + * downstream packaging (mlc-llm, tvm-ffi consumers, prebuilt wheels) + * awkward — they would need two builds. + * - A runtime SetStorageMode() API has the right shape but adds a public + * C++/Python surface to MetalWorkspace; it is also racy if any allocation + * can happen before the user sets the mode, so consumers would still need + * to set an env var or call the API extremely early. + * - An env var read-once-at-init is the smallest possible behaviour change: + * default (unset) keeps the historical Private mode, opt-in is a one-line + * export. This matches how TVM exposes other low-level toggles + * (e.g. TVM_LOG_DEBUG, TVM_NUM_THREADS). + * + * The motivating use case is zero-copy DLPack interop with MLX (which + * allocates Metal buffers as Shared). Two allocators on the same MTLDevice + * cannot share an MTLBuffer that has different page-mapping semantics, so + * DLPack capsules cross-imported between TVM-NDArray and mx.array currently + * fail. With TVM_METAL_STORAGE_MODE=shared, the two allocators agree on the + * storage class and the underlying MTLBuffer can be aliased without copy. + * + * Notes: + * - This intentionally does NOT change the staging buffer pool + * (StagingBufferPool::GetOrCreate) or the GPU->CPU temp buffer + * (GetTempBuffer); both must remain Shared because they are host-staging + * buffers whose entire purpose is host visibility. Changing them would + * break CPU<->GPU memcpy, which is unrelated to the device-data + * allocation that this flag governs. + * - Shared-mode device buffers are valid arguments to compute kernels + * (setBuffer:offset:atIndex:) on every Metal device family — MLX itself + * proves this in production. The kernel dispatch path in metal_module.mm + * does not need any change. + */ +inline MTLResourceOptions GetMetalStorageOptions() { + static const MTLResourceOptions cached = []() -> MTLResourceOptions { + const char* raw = std::getenv("TVM_METAL_STORAGE_MODE"); + if (raw == nullptr || raw[0] == '\0') { + return MTLResourceStorageModePrivate; + } + // Lowercase a small bounded copy for case-insensitive comparison. + std::string v(raw); + for (auto& c : v) c = static_cast(std::tolower(static_cast(c))); + if (v == "shared") { + return MTLResourceStorageModeShared; + } + if (v == "managed") { +#if TARGET_OS_IPHONE + LOG(WARNING) << "TVM_METAL_STORAGE_MODE=managed is not supported on iOS, " + << "falling back to MTLResourceStorageModeShared."; + return MTLResourceStorageModeShared; +#else + return MTLResourceStorageModeManaged; +#endif + } + if (v == "private") { + return MTLResourceStorageModePrivate; + } + LOG(WARNING) << "Unknown TVM_METAL_STORAGE_MODE='" << raw + << "', expected one of {private,shared,managed}. " + << "Falling back to MTLResourceStorageModePrivate."; + return MTLResourceStorageModePrivate; + }(); + return cached; +} + AutoReleasePoolWrapper& AutoReleasePoolWrapper::GetInstance() { static AutoReleasePoolWrapper instance; return instance; @@ -184,15 +263,11 @@ int GetWarpSize(id dev) { id buf; AUTORELEASEPOOL { id dev = GetDevice(device); - // GPU memory only - MTLResourceOptions storage_mode = MTLResourceStorageModePrivate; - /* - #if TARGET_OS_IPHONE - storage_mode = MTLResourceStorageModeShared; - #else - storage_mode = MTLResourceStorageModeManaged; - #endif - */ + // Storage mode for device-data buffers. Defaults to Private (the + // historical behaviour); set TVM_METAL_STORAGE_MODE=shared to enable + // zero-copy DLPack interop with frameworks that allocate Shared (e.g. + // MLX). See GetMetalStorageOptions() for the rationale. + MTLResourceOptions storage_mode = GetMetalStorageOptions(); buf = [dev newBufferWithLength:nbytes options:storage_mode]; TVM_FFI_ICHECK(buf != nil); }; @@ -418,10 +493,24 @@ int GetWarpSize(id dev) { result.Set("free_syncs", static_cast(p.free_syncs)); return result; }) - .def("metal.ResetProfileCounters", [](int device_id) { - auto* ws = MetalWorkspace::Global(); - ws->CastStreamOrGetDefault(nullptr, device_id)->profile.Reset(); - }); + .def("metal.ResetProfileCounters", + [](int device_id) { + auto* ws = MetalWorkspace::Global(); + ws->CastStreamOrGetDefault(nullptr, device_id)->profile.Reset(); + }) + .def("metal.GetStorageMode", + []() -> ffi::String { + // Return a stable string for parity tests; mirrors the env-var + // names accepted by GetMetalStorageOptions(). + MTLResourceOptions opts = GetMetalStorageOptions(); + // The Resource enum encodes the storage mode in the high bits + // (MTLResourceStorageModeShift == 4), but the lower-level enum + // values overlap, so we compare directly with the resource enum + // values to remain robust on every SDK version. + if (opts == MTLResourceStorageModeShared) return ffi::String("shared"); + if (opts == MTLResourceStorageModeManaged) return ffi::String("managed"); + return ffi::String("private"); + }); } class MetalTimerNode : public TimerNode {