Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 21 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@
#
# ==============================================================================

.PHONY: voxtral-cuda voxtral-cpu voxtral-metal voxtral-mlx voxtral_realtime-cuda voxtral_realtime-cpu voxtral_realtime-metal voxtral_realtime-mlx voxtral_tts-cpu voxtral_tts-cuda whisper-cuda whisper-cuda-debug whisper-cpu whisper-metal parakeet-cuda parakeet-cuda-debug parakeet-cpu parakeet-metal parakeet-mlx parakeet-vulkan dinov2-cuda dinov2-cuda-debug sortformer-cuda sortformer-cpu silero-vad-cpu llama-cuda llama-cuda-debug llama-cpu lfm_2_5-mlx llava-cpu gemma3-cuda gemma3-cpu gemma4_31b-cuda gemma4_31b-mlx qwen3_5_moe-cuda qwen3_5_moe-metal clean help
.PHONY: voxtral-cuda voxtral-cpu voxtral-metal voxtral-mlx voxtral_realtime-cuda voxtral_realtime-cpu voxtral_realtime-metal voxtral_realtime-mlx voxtral_tts-cpu voxtral_tts-cuda whisper-cuda whisper-cuda-debug whisper-cpu whisper-metal parakeet-cuda parakeet-cuda-debug parakeet-cpu parakeet-metal parakeet-mlx parakeet-vulkan dinov2-cuda dinov2-cuda-debug sortformer-cuda sortformer-cpu silero-vad-cpu llama-cuda llama-cuda-debug llama-cpu lfm_2_5-mlx llava-cpu gemma3-cuda gemma3-cpu gemma4_31b-cuda gemma4_31b-mlx eagle3-cuda eagle3-mlx qwen3_5_moe-cuda qwen3_5_moe-metal clean help

help:
@echo "This Makefile adds targets to build runners for various models on various backends. Run using \`make <target>\`. Available targets:"
Expand Down Expand Up @@ -129,6 +129,8 @@ help:
@echo " gemma3-cpu - Build Gemma3 runner with CPU backend"
@echo " gemma4_31b-cuda - Build Gemma 4 31B runner with CUDA backend"
@echo " gemma4_31b-mlx - Build Gemma 4 31B runner with MLX backend"
@echo " eagle3-cuda - Build EAGLE-3 speculator runner with CUDA backend"
@echo " eagle3-mlx - Build EAGLE-3 speculator runner with MLX backend"
@echo " qwen3_5_moe-cuda - Build Qwen3.5 MoE runner with CUDA backend"
@echo " qwen3_5_moe-metal - Build Qwen3.5 MoE runner with Metal backend"
@echo " clean - Clean build artifacts"
Expand Down Expand Up @@ -457,6 +459,24 @@ gemma4_31b-mlx:
@echo "✓ Build complete!"
@echo " Binary: cmake-out/examples/models/gemma4_31b/gemma4_31b_runner"

eagle3-cuda:
@echo "==> Building and installing ExecuTorch with CUDA..."
cmake --workflow --preset llm-release-cuda
@echo "==> Building EAGLE-3 speculator runner with CUDA..."
cd examples/models/eagle3 && cmake --workflow --preset eagle3-cuda
@echo ""
@echo "✓ Build complete!"
@echo " Binary: cmake-out/examples/models/eagle3/eagle3_speculator_runner"

eagle3-mlx:
@echo "==> Building and installing ExecuTorch with MLX..."
cmake --workflow --preset mlx-release
@echo "==> Building EAGLE-3 speculator runner with MLX..."
cd examples/models/eagle3 && cmake --workflow --preset eagle3-mlx
@echo ""
@echo "✓ Build complete!"
@echo " Binary: cmake-out/examples/models/eagle3/eagle3_speculator_runner"

qwen3_5_moe-metal:
@echo "==> Building and installing ExecuTorch with Metal..."
cmake --workflow --preset llm-release-metal
Expand Down
13 changes: 11 additions & 2 deletions examples/models/eagle3/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -42,14 +42,19 @@ list(
extension_flat_tensor
)

# Backend: CUDA (AOTI). The EAGLE-3 speculator export is CUDA-only.
# Backend: CUDA (AOTI) or MLX (exactly one required). CUDA returns greedy ids;
# MLX returns logits and the runner argmaxes + maps draft ids via d2t on the
# host.
if(EXECUTORCH_BUILD_CUDA)
find_package(CUDAToolkit REQUIRED)
list(APPEND link_libraries aoti_cuda_backend)
executorch_target_link_options_shared_lib(aoti_cuda_backend)
add_compile_definitions(EXECUTORCH_BUILD_CUDA)
elseif(TARGET mlxdelegate)
list(APPEND link_libraries mlxdelegate mlx)
executorch_target_link_options_shared_lib(mlxdelegate)
else()
message(FATAL_ERROR "EAGLE-3 speculator runner requires EXECUTORCH_BUILD_CUDA=ON")
message(FATAL_ERROR "Set EXECUTORCH_BUILD_CUDA=ON or EXECUTORCH_BUILD_MLX=ON")
endif()

# Tokenizer (HuggingFace tokenizer.json)
Expand All @@ -67,3 +72,7 @@ if(NOT CMAKE_BUILD_TYPE STREQUAL "Debug")
target_link_options(eagle3_speculator_runner PRIVATE "LINKER:-s")
endif()
endif()

if(TARGET mlxdelegate)
executorch_target_copy_mlx_metallib(eagle3_speculator_runner)
endif()
39 changes: 39 additions & 0 deletions examples/models/eagle3/CMakePresets.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,21 @@
"string": "${hostSystemName}",
"list": ["Linux", "Windows"]
}
},
{
"name": "eagle3-mlx",
"displayName": "EAGLE-3 speculator runner (MLX)",
"binaryDir": "${sourceDir}/../../../cmake-out/examples/models/eagle3",
"cacheVariables": {
"CMAKE_BUILD_TYPE": "Release",
"CMAKE_FIND_ROOT_PATH": "${sourceDir}/../../../cmake-out",
"CMAKE_PREFIX_PATH": "${sourceDir}/../../../cmake-out"
},
"condition": {
"type": "equals",
"lhs": "${hostSystemName}",
"rhs": "Darwin"
}
}
],
"buildPresets": [
Expand All @@ -24,6 +39,30 @@
"displayName": "Build EAGLE-3 speculator runner (CUDA)",
"configurePreset": "eagle3-cuda",
"targets": ["eagle3_speculator_runner"]
},
{
"name": "eagle3-mlx",
"displayName": "Build EAGLE-3 speculator runner (MLX)",
"configurePreset": "eagle3-mlx",
"targets": ["eagle3_speculator_runner"]
}
],
"workflowPresets": [
{
"name": "eagle3-cuda",
"displayName": "Configure and build EAGLE-3 speculator runner (CUDA)",
"steps": [
{"type": "configure", "name": "eagle3-cuda"},
{"type": "build", "name": "eagle3-cuda"}
]
},
{
"name": "eagle3-mlx",
"displayName": "Configure and build EAGLE-3 speculator runner (MLX)",
"steps": [
{"type": "configure", "name": "eagle3-mlx"},
{"type": "build", "name": "eagle3-mlx"}
]
}
]
}
Loading
Loading