From 6e9d7f100fee817f165d0340ae79e0ea74f2a515 Mon Sep 17 00:00:00 2001 From: zhangyue Date: Wed, 28 Jan 2026 05:59:58 +0000 Subject: [PATCH] =?UTF-8?q?demo131=E5=88=86=E6=94=AF=E9=80=82=E9=85=8D?= =?UTF-8?q?=E5=A4=A9=E6=95=B0=E6=94=B9=E5=8A=A8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ops/paged_attention/cuda/kernel_v2.cuh | 4 ++++ src/infiniop/ops/paged_attention/operator.cc | 14 +++++++++++++- .../ops/paged_attention_prefill/operator.cc | 14 +++++++++++++- src/infiniop/ops/paged_caching/operator.cc | 14 +++++++++++++- test/infinicore/ops/embedding.py | 2 +- xmake/iluvatar.lua | 6 +++--- 6 files changed, 47 insertions(+), 7 deletions(-) diff --git a/src/infiniop/ops/paged_attention/cuda/kernel_v2.cuh b/src/infiniop/ops/paged_attention/cuda/kernel_v2.cuh index e63dd68e2..1bb909a15 100644 --- a/src/infiniop/ops/paged_attention/cuda/kernel_v2.cuh +++ b/src/infiniop/ops/paged_attention/cuda/kernel_v2.cuh @@ -30,7 +30,11 @@ __device__ __forceinline__ float warpReduceMax(float x) { } __device__ __forceinline__ unsigned int cvtaToShared(const void *ptr) { +#if defined(__CUDA_ARCH__) && defined(__cvta_generic_to_shared) return static_cast(__cvta_generic_to_shared(ptr)); +#else + return static_cast(reinterpret_cast(ptr)); +#endif } __device__ __forceinline__ void cpAsyncCaSharedGlobal16(void *dst_shared, const void *src_global) { diff --git a/src/infiniop/ops/paged_attention/operator.cc b/src/infiniop/ops/paged_attention/operator.cc index 46bea9e1e..8bb603cdb 100644 --- a/src/infiniop/ops/paged_attention/operator.cc +++ b/src/infiniop/ops/paged_attention/operator.cc @@ -2,7 +2,7 @@ #include "../../handle.h" #include "infiniop/ops/paged_attention.h" -#ifdef ENABLE_NVIDIA_API +#if defined(ENABLE_NVIDIA_API) || defined(ENABLE_ILUVATAR_API) #include "nvidia/paged_attention_nvidia.cuh" #endif #ifdef ENABLE_METAX_API @@ -36,6 +36,9 @@ __C infiniStatus_t infiniopCreatePagedAttentionDescriptor( #endif #ifdef ENABLE_METAX_API CREATE(INFINI_DEVICE_METAX, metax) +#endif +#ifdef ENABLE_ILUVATAR_API + CREATE(INFINI_DEVICE_ILUVATAR, nvidia) #endif default: return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED; @@ -57,6 +60,9 @@ __C infiniStatus_t infiniopGetPagedAttentionWorkspaceSize( #endif #ifdef ENABLE_METAX_API GET(INFINI_DEVICE_METAX, metax) +#endif +#ifdef ENABLE_ILUVATAR_API + GET(INFINI_DEVICE_ILUVATAR, nvidia) #endif default: return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED; @@ -82,6 +88,9 @@ __C infiniStatus_t infiniopPagedAttention( #endif #ifdef ENABLE_METAX_API CALCULATE(INFINI_DEVICE_METAX, metax) +#endif +#ifdef ENABLE_ILUVATAR_API + CALCULATE(INFINI_DEVICE_ILUVATAR, nvidia) #endif default: return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED; @@ -102,6 +111,9 @@ __C infiniStatus_t infiniopDestroyPagedAttentionDescriptor( #endif #ifdef ENABLE_METAX_API DESTROY(INFINI_DEVICE_METAX, metax) +#endif +#ifdef ENABLE_ILUVATAR_API + DESTROY(INFINI_DEVICE_ILUVATAR, nvidia) #endif default: return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED; diff --git a/src/infiniop/ops/paged_attention_prefill/operator.cc b/src/infiniop/ops/paged_attention_prefill/operator.cc index af21df651..207157b22 100644 --- a/src/infiniop/ops/paged_attention_prefill/operator.cc +++ b/src/infiniop/ops/paged_attention_prefill/operator.cc @@ -2,7 +2,7 @@ #include "../../handle.h" #include "infiniop/ops/paged_attention_prefill.h" -#ifdef ENABLE_NVIDIA_API +#if defined(ENABLE_NVIDIA_API) || defined(ENABLE_ILUVATAR_API) #include "nvidia/paged_attention_prefill_nvidia.cuh" #endif #ifdef ENABLE_METAX_API @@ -38,6 +38,9 @@ __C infiniStatus_t infiniopCreatePagedAttentionPrefillDescriptor( #endif #ifdef ENABLE_METAX_API CREATE(INFINI_DEVICE_METAX, metax) +#endif +#ifdef ENABLE_ILUVATAR_API + CREATE(INFINI_DEVICE_ILUVATAR, nvidia) #endif default: return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED; @@ -59,6 +62,9 @@ __C infiniStatus_t infiniopGetPagedAttentionPrefillWorkspaceSize( #endif #ifdef ENABLE_METAX_API GET(INFINI_DEVICE_METAX, metax) +#endif +#ifdef ENABLE_ILUVATAR_API + GET(INFINI_DEVICE_ILUVATAR, nvidia) #endif default: return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED; @@ -87,6 +93,9 @@ __C infiniStatus_t infiniopPagedAttentionPrefill( #endif #ifdef ENABLE_METAX_API CALCULATE(INFINI_DEVICE_METAX, metax) +#endif +#ifdef ENABLE_ILUVATAR_API + CALCULATE(INFINI_DEVICE_ILUVATAR, nvidia) #endif default: return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED; @@ -107,6 +116,9 @@ __C infiniStatus_t infiniopDestroyPagedAttentionPrefillDescriptor( #endif #ifdef ENABLE_METAX_API DESTROY(INFINI_DEVICE_METAX, metax) +#endif +#ifdef ENABLE_ILUVATAR_API + DESTROY(INFINI_DEVICE_ILUVATAR, nvidia) #endif default: return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED; diff --git a/src/infiniop/ops/paged_caching/operator.cc b/src/infiniop/ops/paged_caching/operator.cc index 6eb746f9f..3afc7a84b 100644 --- a/src/infiniop/ops/paged_caching/operator.cc +++ b/src/infiniop/ops/paged_caching/operator.cc @@ -2,7 +2,7 @@ #include "../../handle.h" #include "infiniop/ops/paged_caching.h" -#ifdef ENABLE_NVIDIA_API +#if defined(ENABLE_NVIDIA_API) || defined(ENABLE_ILUVATAR_API) #include "nvidia/paged_caching_nvidia.cuh" #endif #ifdef ENABLE_METAX_API @@ -31,6 +31,9 @@ __C infiniStatus_t infiniopCreatePagedCachingDescriptor( #endif #ifdef ENABLE_METAX_API CREATE(INFINI_DEVICE_METAX, metax) +#endif +#ifdef ENABLE_ILUVATAR_API + CREATE(INFINI_DEVICE_ILUVATAR, nvidia) #endif default: return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED; @@ -52,6 +55,9 @@ __C infiniStatus_t infiniopGetPagedCachingWorkspaceSize( #endif #ifdef ENABLE_METAX_API GET(INFINI_DEVICE_METAX, metax) +#endif +#ifdef ENABLE_ILUVATAR_API + GET(INFINI_DEVICE_ILUVATAR, nvidia) #endif default: return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED; @@ -77,6 +83,9 @@ __C infiniStatus_t infiniopPagedCaching( #endif #ifdef ENABLE_METAX_API CALCULATE(INFINI_DEVICE_METAX, metax) +#endif +#ifdef ENABLE_ILUVATAR_API + CALCULATE(INFINI_DEVICE_ILUVATAR, nvidia) #endif default: return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED; @@ -97,6 +106,9 @@ __C infiniStatus_t infiniopDestroyPagedCachingDescriptor( #endif #ifdef ENABLE_METAX_API DESTROY(INFINI_DEVICE_METAX, metax) +#endif +#ifdef ENABLE_ILUVATAR_API + DESTROY(INFINI_DEVICE_ILUVATAR, nvidia) #endif default: return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED; diff --git a/test/infinicore/ops/embedding.py b/test/infinicore/ops/embedding.py index 6cb7755af..f720cfe2b 100644 --- a/test/infinicore/ops/embedding.py +++ b/test/infinicore/ops/embedding.py @@ -3,6 +3,7 @@ sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) +import infinicore import torch from framework import BaseOperatorTest, TensorSpec, TestCase, GenericTestRunner from framework.tensor import TensorInitializer @@ -12,7 +13,6 @@ to_torch_dtype, ) -import infinicore # ============================================================================== # Operator-specific configuration diff --git a/xmake/iluvatar.lua b/xmake/iluvatar.lua index 1bb5f6c4c..4c641d459 100644 --- a/xmake/iluvatar.lua +++ b/xmake/iluvatar.lua @@ -42,14 +42,14 @@ target("infiniop-iluvatar") add_links("cudart", "cublas", "cudnn") set_warnings("all", "error") - add_cuflags("-Wno-error=unused-private-field") + add_cuflags("-Wno-error=unused-private-field", "-Wno-error=unused-variable", "-Wno-unused-variable") add_cuflags("-fPIC", "-x", "ivcore", "-std=c++17", {force = true}) if has_config("ivcore-20") then add_cuflags("--cuda-gpu-arch=ivcore20", {force = true}) end add_culdflags("-fPIC") - add_cxflags("-fPIC") - add_cxxflags("-fPIC") + add_cxflags("-fPIC", "-Wno-error=unused-variable", "-Wno-unused-variable") + add_cxxflags("-fPIC", "-Wno-error=unused-variable", "-Wno-unused-variable") -- set_languages("cxx17") 天数似乎不能用这个配置 add_files("../src/infiniop/devices/nvidia/*.cu", "../src/infiniop/ops/*/nvidia/*.cu")