diff --git a/clang/lib/Basic/Targets/SPIR.cpp b/clang/lib/Basic/Targets/SPIR.cpp index 19f160e279b21..e527464a81515 100644 --- a/clang/lib/Basic/Targets/SPIR.cpp +++ b/clang/lib/Basic/Targets/SPIR.cpp @@ -84,6 +84,17 @@ void SPIR64TargetInfo::getTargetDefines(const LangOptions &Opts, DefineStd(Builder, "SPIR64", Opts); } +bool WindowsX86_64_SPIR64TargetInfo::initFeatureMap( + llvm::StringMap &Features, DiagnosticsEngine &Diags, StringRef CPU, + const std::vector &FeaturesVec) const { + // Mirror X86TargetInfo's "x86_64 always has SSE2" baseline: the matching + // _M_X64 macro makes MSVC STL headers take the x86 intrinsics path, whose + // _mm_* intrinsics require sse/sse2 in the target feature set. + Features["sse"] = true; + Features["sse2"] = true; + return SPIR64TargetInfo::initFeatureMap(Features, Diags, CPU, FeaturesVec); +} + void BaseSPIRVTargetInfo::getTargetDefines(const LangOptions &Opts, MacroBuilder &Builder) const { DefineStd(Builder, "SPIRV", Opts); diff --git a/clang/lib/Basic/Targets/SPIR.h b/clang/lib/Basic/Targets/SPIR.h index f5f97af54dca7..8e6e3a0a1dd00 100644 --- a/clang/lib/Basic/Targets/SPIR.h +++ b/clang/lib/Basic/Targets/SPIR.h @@ -395,6 +395,11 @@ class LLVM_LIBRARY_VISIBILITY WindowsX86_64_SPIR64TargetInfo return (CC == CC_SpirFunction || CC == CC_DeviceKernel) ? CCCR_OK : CCCR_Warning; } + + bool + initFeatureMap(llvm::StringMap &Features, DiagnosticsEngine &Diags, + StringRef CPU, + const std::vector &FeaturesVec) const override; }; // x86-64 SPIR64 Windows Visual Studio target diff --git a/clang/test/CodeGenSYCL/kernel-caller-entry-point.cpp b/clang/test/CodeGenSYCL/kernel-caller-entry-point.cpp index a3c62ddbd4058..f6a4e799991b8 100644 --- a/clang/test/CodeGenSYCL/kernel-caller-entry-point.cpp +++ b/clang/test/CodeGenSYCL/kernel-caller-entry-point.cpp @@ -2,24 +2,24 @@ // RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-unknown-linux-gnu -triple amdgcn-amd-amdhsa -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-AMDGCN %s // RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-unknown-linux-gnu -triple nvptx-nvidia-cuda -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-NVPTX %s // RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-unknown-linux-gnu -triple nvptx64-nvidia-cuda -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-NVPTX %s -// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-unknown-linux-gnu -triple spir-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR,CHECK-SPIRNV %s -// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-unknown-linux-gnu -triple spir64-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR,CHECK-SPIRNV %s -// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-unknown-linux-gnu -triple spirv32-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR,CHECK-SPIRV %s -// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-unknown-linux-gnu -triple spirv64-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR,CHECK-SPIRV %s +// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-unknown-linux-gnu -triple spir-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR,CHECK-SPIRNV,CHECK-SPIR-NO-SSE2 %s +// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-unknown-linux-gnu -triple spir64-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR,CHECK-SPIRNV,CHECK-SPIR-NO-SSE2 %s +// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-unknown-linux-gnu -triple spirv32-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR,CHECK-SPIRV,CHECK-SPIR-NO-SSE2 %s +// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-unknown-linux-gnu -triple spirv64-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR,CHECK-SPIRV,CHECK-SPIR-NO-SSE2 %s // RUN: %clang_cc1 -fsycl-is-host -emit-llvm -triple x86_64-pc-windows-msvc -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-HOST,CHECK-HOST-WINDOWS %s // RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-pc-windows-msvc -triple amdgcn-amd-amdhsa -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-AMDGCN %s // RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-pc-windows-msvc -triple nvptx-nvidia-cuda -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-NVPTX %s // RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-pc-windows-msvc -triple nvptx64-nvidia-cuda -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-NVPTX %s -// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-pc-windows-msvc -triple spir64-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR,CHECK-SPIRNV %s -// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-pc-windows-msvc -triple spirv64-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR,CHECK-SPIRV %s +// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-pc-windows-msvc -triple spir64-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR,CHECK-SPIRNV,CHECK-SPIR-SSE2 %s +// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-pc-windows-msvc -triple spirv64-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR,CHECK-SPIRV,CHECK-SPIR-NO-SSE2 %s // RUN: %clang_cc1 -fsycl-is-host -emit-llvm -triple x86_64-uefi -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-HOST,CHECK-HOST-WINDOWS %s // RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-uefi -triple amdgcn-amd-amdhsa -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-AMDGCN %s // RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-uefi -triple nvptx-nvidia-cuda -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-NVPTX %s // RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-uefi -triple nvptx64-nvidia-cuda -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-NVPTX %s -// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-uefi -triple spir-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR,CHECK-SPIRNV %s -// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-uefi -triple spir64-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR,CHECK-SPIRNV %s -// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-uefi -triple spirv32-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR,CHECK-SPIRV %s -// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-uefi -triple spirv64-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR,CHECK-SPIRV %s +// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-uefi -triple spir-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR,CHECK-SPIRNV,CHECK-SPIR-NO-SSE2 %s +// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-uefi -triple spir64-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR,CHECK-SPIRNV,CHECK-SPIR-NO-SSE2 %s +// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-uefi -triple spirv32-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR,CHECK-SPIRV,CHECK-SPIR-NO-SSE2 %s +// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-uefi -triple spirv64-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR,CHECK-SPIRV,CHECK-SPIR-NO-SSE2 %s // Test code generation for functions declared with the sycl_kernel_entry_point // attribute. During host compilation, the bodies of such functions are replaced @@ -704,5 +704,9 @@ int main() { // CHECK-NVPTX: #[[NVPTX_ATTR0]] = { convergent mustprogress noinline norecurse nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-optlevel"="0" } // CHECK-NVPTX: #[[NVPTX_ATTR1]] = { convergent nounwind } // -// CHECK-SPIR: #[[SPIR_ATTR0]] = { convergent mustprogress noinline norecurse nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-optlevel"="0" } +// WindowsX86_64_SPIR64TargetInfo::initFeatureMap adds +sse/+sse2 to the +// device-target feature baseline; every other SPIR/SPIRV target class used by +// the RUN lines above leaves it empty. +// CHECK-SPIR-NO-SSE2: #[[SPIR_ATTR0]] = { convergent mustprogress noinline norecurse nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-optlevel"="0" } +// CHECK-SPIR-SSE2: #[[SPIR_ATTR0]] = { convergent mustprogress noinline norecurse nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-optlevel"="0" "target-features"="+sse,+sse2" } // CHECK-SPIR: #[[SPIR_ATTR1]] = { convergent nounwind } diff --git a/clang/test/CodeGenSYCL/windows-msvc-spir64-sse2.cpp b/clang/test/CodeGenSYCL/windows-msvc-spir64-sse2.cpp new file mode 100644 index 0000000000000..021b17a9cf904 --- /dev/null +++ b/clang/test/CodeGenSYCL/windows-msvc-spir64-sse2.cpp @@ -0,0 +1,20 @@ +// RUN: %clang_cc1 -triple spir64-unknown-unknown -aux-triple x86_64-pc-windows-msvc \ +// RUN: -fsycl-is-device -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s + +// When SYCL device code is compiled with a Windows-MSVC host, the device +// target (spir64) defines _M_X64 so that MSVC STL headers take the x86 +// intrinsics path. The device target feature set must correspondingly carry +// sse/sse2 so that function-level __target__ attributes (e.g. VS2026 +// 's [[gnu::target("fma")]] on _Sqr_error_x86_x64_fma) don't strip +// the baseline, which would break intrinsic calls like _mm_set_sd / _mm_store_sd. + +#include "Inputs/sycl.hpp" + +int main() { + sycl::queue q; + q.submit([&](sycl::handler &h) { h.single_task([=] {}); }); + return 0; +} + +// CHECK: spir_kernel void @{{.*}}TestK{{.*}}() [[ATTRS:#[0-9]+]] +// CHECK: attributes [[ATTRS]] = {{.*}}"target-features"="+sse,+sse2" diff --git a/sycl/include/sycl/stl_wrappers/complex b/sycl/include/sycl/stl_wrappers/complex index 861784f33f109..c07c0c358153c 100644 --- a/sycl/include/sycl/stl_wrappers/complex +++ b/sycl/include/sycl/stl_wrappers/complex @@ -15,6 +15,21 @@ #pragma once +// Provide __isa_available for MSVC device code BEFORE including STL headers. +// Must come before #include_next so our definition is seen first. +#if defined(__SYCL_DEVICE_ONLY__) && defined(_MSC_VER) +// VS2026 STL headers use __isa_available (a runtime global variable) to +// detect CPU features: `if (__isa_available >= _Stl_isa_available_avx2) ...`. +// SYCL device code cannot access host runtime globals, so provide a device- +// side definition. The VALUE of this variable only steers the STL's runtime +// feature dispatch — both branches of the dispatch compile either way. We +// pick __ISA_AVAILABLE_X86 (== 0, the baseline in ), +// which matches a spir64 device's reality (no x86 ISA), and so selects the +// STL's scalar fallback paths if these dispatches are ever reached. +extern "C" int __isa_available __attribute__((sycl_global_var)) +__attribute__((weak)) = 0; +#endif // defined(__SYCL_DEVICE_ONLY__) && defined(_MSC_VER) + // Include real STL header - the next one from the include search // directories. #if defined(__has_include_next)