Skip to content

Commit 04a5f02

Browse files
Merge branch 'main' into fix/tool-call-streaming-with-reasoning-parser
2 parents 2df2ff4 + 2108a57 commit 04a5f02

File tree

96 files changed

+1333
-1309
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

96 files changed

+1333
-1309
lines changed

CMakeLists.txt

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -241,7 +241,7 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
241241
message(STATUS "Enabling cumem allocator extension.")
242242
# link against cuda driver library
243243
list(APPEND CUMEM_LIBS CUDA::cuda_driver)
244-
define_gpu_extension_target(
244+
define_extension_target(
245245
cumem_allocator
246246
DESTINATION vllm
247247
LANGUAGE CXX
@@ -858,7 +858,7 @@ if (VLLM_GPU_LANG STREQUAL "HIP")
858858
endif()
859859

860860
message(STATUS "Enabling C extension.")
861-
define_gpu_extension_target(
861+
define_extension_target(
862862
_C
863863
DESTINATION vllm
864864
LANGUAGE ${VLLM_GPU_LANG}
@@ -973,7 +973,7 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
973973
endif()
974974

975975
message(STATUS "Enabling moe extension.")
976-
define_gpu_extension_target(
976+
define_extension_target(
977977
_moe_C
978978
DESTINATION vllm
979979
LANGUAGE ${VLLM_GPU_LANG}
@@ -994,7 +994,7 @@ if(VLLM_GPU_LANG STREQUAL "HIP")
994994
"csrc/rocm/skinny_gemms.cu"
995995
"csrc/rocm/attention.cu")
996996

997-
define_gpu_extension_target(
997+
define_extension_target(
998998
_rocm_C
999999
DESTINATION vllm
10001000
LANGUAGE ${VLLM_GPU_LANG}

cmake/cpu_extension.cmake

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -343,7 +343,7 @@ message(STATUS "CPU extension source files: ${VLLM_EXT_SRC}")
343343
# Define extension targets
344344
#
345345

346-
define_gpu_extension_target(
346+
define_extension_target(
347347
_C
348348
DESTINATION vllm
349349
LANGUAGE CXX
@@ -354,4 +354,4 @@ define_gpu_extension_target(
354354
WITH_SOABI
355355
)
356356

357-
message(STATUS "Enabling C extension.")
357+
message(STATUS "Enabling C extension.")

cmake/external_projects/flashmla.cmake

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ if(FLASH_MLA_ARCHS)
9292
SRCS "${FlashMLA_Extension_SOURCES}"
9393
CUDA_ARCHS "${FLASH_MLA_ARCHS}")
9494

95-
define_gpu_extension_target(
95+
define_extension_target(
9696
_flashmla_C
9797
DESTINATION vllm
9898
LANGUAGE ${VLLM_GPU_LANG}
@@ -109,7 +109,7 @@ if(FLASH_MLA_ARCHS)
109109
$<$<COMPILE_LANGUAGE:CUDA>:-UPy_LIMITED_API>
110110
$<$<COMPILE_LANGUAGE:CXX>:-UPy_LIMITED_API>)
111111

112-
define_gpu_extension_target(
112+
define_extension_target(
113113
_flashmla_extension_C
114114
DESTINATION vllm
115115
LANGUAGE ${VLLM_GPU_LANG}

cmake/utils.cmake

Lines changed: 34 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -453,21 +453,20 @@ macro(override_gpu_arches GPU_ARCHES GPU_LANG GPU_SUPPORTED_ARCHES)
453453
endmacro()
454454

455455
#
456-
# Define a target named `GPU_MOD_NAME` for a single extension. The
456+
# Define a target named `MOD_NAME` for a single extension. The
457457
# arguments are:
458458
#
459459
# DESTINATION <dest> - Module destination directory.
460-
# LANGUAGE <lang> - The GPU language for this module, e.g CUDA, HIP,
461-
# etc.
460+
# LANGUAGE <lang> - The language for this module, e.g. CUDA, HIP,
461+
# CXX, etc.
462462
# SOURCES <sources> - List of source files relative to CMakeLists.txt
463463
# directory.
464464
#
465465
# Optional arguments:
466466
#
467-
# ARCHITECTURES <arches> - A list of target GPU architectures in cmake
468-
# format.
469-
# Refer `CMAKE_CUDA_ARCHITECTURES` documentation
470-
# and `CMAKE_HIP_ARCHITECTURES` for more info.
467+
# ARCHITECTURES <arches> - A list of target architectures in cmake format.
468+
# For GPU, refer to CMAKE_CUDA_ARCHITECTURES and
469+
# CMAKE_HIP_ARCHITECTURES for more info.
471470
# ARCHITECTURES will use cmake's defaults if
472471
# not provided.
473472
# COMPILE_FLAGS <flags> - Extra compiler flags passed to NVCC/hip.
@@ -478,63 +477,61 @@ endmacro()
478477
#
479478
# Note: optimization level/debug info is set via cmake build type.
480479
#
481-
function (define_gpu_extension_target GPU_MOD_NAME)
480+
function (define_extension_target MOD_NAME)
482481
cmake_parse_arguments(PARSE_ARGV 1
483-
GPU
482+
ARG
484483
"WITH_SOABI"
485484
"DESTINATION;LANGUAGE;USE_SABI"
486485
"SOURCES;ARCHITECTURES;COMPILE_FLAGS;INCLUDE_DIRECTORIES;LIBRARIES")
487486

488487
# Add hipify preprocessing step when building with HIP/ROCm.
489-
if (GPU_LANGUAGE STREQUAL "HIP")
490-
hipify_sources_target(GPU_SOURCES ${GPU_MOD_NAME} "${GPU_SOURCES}")
488+
if (ARG_LANGUAGE STREQUAL "HIP")
489+
hipify_sources_target(ARG_SOURCES ${MOD_NAME} "${ARG_SOURCES}")
491490
endif()
492491

493-
if (GPU_WITH_SOABI)
494-
set(GPU_WITH_SOABI WITH_SOABI)
492+
if (ARG_WITH_SOABI)
493+
set(SOABI_KEYWORD WITH_SOABI)
495494
else()
496-
set(GPU_WITH_SOABI)
495+
set(SOABI_KEYWORD "")
497496
endif()
498497

499-
if (GPU_USE_SABI)
500-
Python_add_library(${GPU_MOD_NAME} MODULE USE_SABI ${GPU_USE_SABI} ${GPU_WITH_SOABI} "${GPU_SOURCES}")
498+
if (ARG_USE_SABI)
499+
Python_add_library(${MOD_NAME} MODULE USE_SABI ${ARG_USE_SABI} ${SOABI_KEYWORD} "${ARG_SOURCES}")
501500
else()
502-
Python_add_library(${GPU_MOD_NAME} MODULE ${GPU_WITH_SOABI} "${GPU_SOURCES}")
501+
Python_add_library(${MOD_NAME} MODULE ${SOABI_KEYWORD} "${ARG_SOURCES}")
503502
endif()
504503

505-
if (GPU_LANGUAGE STREQUAL "HIP")
504+
if (ARG_LANGUAGE STREQUAL "HIP")
506505
# Make this target dependent on the hipify preprocessor step.
507-
add_dependencies(${GPU_MOD_NAME} hipify${GPU_MOD_NAME})
506+
add_dependencies(${MOD_NAME} hipify${MOD_NAME})
508507
# Make sure we include the hipified versions of the headers, and avoid conflicts with the ones in the original source folder
509-
target_include_directories(${GPU_MOD_NAME} PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/csrc
510-
${GPU_INCLUDE_DIRECTORIES})
508+
target_include_directories(${MOD_NAME} PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/csrc
509+
${ARG_INCLUDE_DIRECTORIES})
511510
else()
512-
target_include_directories(${GPU_MOD_NAME} PRIVATE csrc
513-
${GPU_INCLUDE_DIRECTORIES})
511+
target_include_directories(${MOD_NAME} PRIVATE csrc
512+
${ARG_INCLUDE_DIRECTORIES})
514513
endif()
515514

516-
if (GPU_ARCHITECTURES)
517-
set_target_properties(${GPU_MOD_NAME} PROPERTIES
518-
${GPU_LANGUAGE}_ARCHITECTURES "${GPU_ARCHITECTURES}")
515+
if (ARG_ARCHITECTURES)
516+
set_target_properties(${MOD_NAME} PROPERTIES
517+
${ARG_LANGUAGE}_ARCHITECTURES "${ARG_ARCHITECTURES}")
519518
endif()
520519

520+
target_compile_options(${MOD_NAME} PRIVATE
521+
$<$<COMPILE_LANGUAGE:${ARG_LANGUAGE}>:${ARG_COMPILE_FLAGS}>)
521522

522-
target_compile_options(${GPU_MOD_NAME} PRIVATE
523-
$<$<COMPILE_LANGUAGE:${GPU_LANGUAGE}>:${GPU_COMPILE_FLAGS}>)
523+
target_compile_definitions(${MOD_NAME} PRIVATE
524+
"-DTORCH_EXTENSION_NAME=${MOD_NAME}")
524525

525-
target_compile_definitions(${GPU_MOD_NAME} PRIVATE
526-
"-DTORCH_EXTENSION_NAME=${GPU_MOD_NAME}")
527-
528-
529-
target_link_libraries(${GPU_MOD_NAME} PRIVATE torch ${GPU_LIBRARIES})
526+
target_link_libraries(${MOD_NAME} PRIVATE torch ${ARG_LIBRARIES})
530527

531528
# Don't use `TORCH_LIBRARIES` for CUDA since it pulls in a bunch of
532529
# dependencies that are not necessary and may not be installed.
533-
if (GPU_LANGUAGE STREQUAL "CUDA")
534-
target_link_libraries(${GPU_MOD_NAME} PRIVATE CUDA::cudart CUDA::cuda_driver)
530+
if (ARG_LANGUAGE STREQUAL "CUDA")
531+
target_link_libraries(${MOD_NAME} PRIVATE torch CUDA::cudart CUDA::cuda_driver ${ARG_LIBRARIES})
535532
else()
536-
target_link_libraries(${GPU_MOD_NAME} PRIVATE ${TORCH_LIBRARIES})
533+
target_link_libraries(${MOD_NAME} PRIVATE torch ${TORCH_LIBRARIES} ${ARG_LIBRARIES})
537534
endif()
538535

539-
install(TARGETS ${GPU_MOD_NAME} LIBRARY DESTINATION ${GPU_DESTINATION} COMPONENT ${GPU_MOD_NAME})
536+
install(TARGETS ${MOD_NAME} LIBRARY DESTINATION ${ARG_DESTINATION} COMPONENT ${MOD_NAME})
540537
endfunction()

0 commit comments

Comments
 (0)