From 02705e03827ed004a514097a1d6d7a3044ab13b4 Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Fri, 8 May 2026 10:27:35 -0700 Subject: [PATCH 1/7] VS2026 STL is using a runtime global (isa_available) which is not available on GPU. This fix masks and works around it. --- sycl/include/sycl/stl_wrappers/complex | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/sycl/include/sycl/stl_wrappers/complex b/sycl/include/sycl/stl_wrappers/complex index 861784f33f109..20155bc0803b0 100644 --- a/sycl/include/sycl/stl_wrappers/complex +++ b/sycl/include/sycl/stl_wrappers/complex @@ -15,6 +15,28 @@ #pragma once +// Provide __isa_available for MSVC device code BEFORE including STL headers. +// Must come before #include_next so our definition is seen first. +#if defined(__SYCL_DEVICE_ONLY__) && defined(_MSC_VER) +// VS2022+ STL headers use __isa_available (a runtime global variable) to detect +// CPU features. SYCL device code cannot access host runtime globals. Provide a +// definition that assumes AVX2+ features are available (value >= 5 enables FMA +// optimizations). Must be non-const to match STL's 'extern int' declaration. +// sycl_global_var attribute suppresses the "non-const global in device code" +// diagnostic. +extern "C" int __isa_available __attribute__((sycl_global_var)) = 10; +#endif // defined(__SYCL_DEVICE_ONLY__) && defined(_MSC_VER) + +// VS2026's annotates _Sqr_error_x86_x64_fma with +// [[__gnu__::__target__("fma")]]. clang/icx treats target(...) as REPLACING the +// function's target feature set (not augmenting it), so sse2 drops out and the +// function's _mm_set_sd / _mm_store_sd calls fail to compile on spir64. Augment +// the target string so sse2 (and sse) stay in the feature set. +#if defined(__SYCL_DEVICE_ONLY__) && defined(_MSC_VER) +#pragma push_macro("__target__") +#define __target__(x) __target__(x ",sse2,sse") +#endif + // Include real STL header - the next one from the include search // directories. #if defined(__has_include_next) @@ -29,6 +51,10 @@ #include <../include/complex> #endif +#if defined(__SYCL_DEVICE_ONLY__) && defined(_MSC_VER) +#pragma pop_macro("__target__") +#endif + #if defined(__NVPTX__) || defined(__AMDGCN__) #include "__sycl_complex_impl.hpp" #endif From 1231205f747d6dfe0c4840e0e07345d68e6fa605 Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Mon, 11 May 2026 15:12:15 -0700 Subject: [PATCH 2/7] overriding __target__ seemed really hacky. But driver already has to deal with this, so adjusting there. --- clang/lib/Basic/Targets/SPIR.cpp | 16 ++++++++++++ clang/lib/Basic/Targets/SPIR.h | 5 ++++ sycl/include/sycl/stl_wrappers/complex | 35 +++++++++++--------------- 3 files changed, 35 insertions(+), 21 deletions(-) diff --git a/clang/lib/Basic/Targets/SPIR.cpp b/clang/lib/Basic/Targets/SPIR.cpp index 19f160e279b21..b81960560d8be 100644 --- a/clang/lib/Basic/Targets/SPIR.cpp +++ b/clang/lib/Basic/Targets/SPIR.cpp @@ -84,6 +84,22 @@ void SPIR64TargetInfo::getTargetDefines(const LangOptions &Opts, DefineStd(Builder, "SPIR64", Opts); } +bool WindowsX86_64_SPIR64TargetInfo::initFeatureMap( + llvm::StringMap &Features, DiagnosticsEngine &Diags, StringRef CPU, + const std::vector &FeaturesVec) const { + // When SYCL device code compiles MSVC STL headers, the headers take the + // x86 intrinsics path under _M_X64 (defined by MicrosoftX86_64_SPIR64- + // TargetInfo). Those intrinsics require sse/sse2 in the target feature set; + // without them, any function-level __target__ attribute that recomputes + // features (e.g. VS2026 's [[gnu::target("fma")]] on + // _Sqr_error_x86_x64_fma) strips the baseline and the function's sse2 + // intrinsic calls (_mm_set_sd, _mm_store_sd, ...) fail to compile. Mirror + // X86TargetInfo's "x86_64 always has SSE2" assumption. + Features["sse"] = true; + Features["sse2"] = true; + return SPIR64TargetInfo::initFeatureMap(Features, Diags, CPU, FeaturesVec); +} + void BaseSPIRVTargetInfo::getTargetDefines(const LangOptions &Opts, MacroBuilder &Builder) const { DefineStd(Builder, "SPIRV", Opts); diff --git a/clang/lib/Basic/Targets/SPIR.h b/clang/lib/Basic/Targets/SPIR.h index f5f97af54dca7..8ab65aa56db29 100644 --- a/clang/lib/Basic/Targets/SPIR.h +++ b/clang/lib/Basic/Targets/SPIR.h @@ -395,6 +395,11 @@ class LLVM_LIBRARY_VISIBILITY WindowsX86_64_SPIR64TargetInfo return (CC == CC_SpirFunction || CC == CC_DeviceKernel) ? CCCR_OK : CCCR_Warning; } + + bool initFeatureMap(llvm::StringMap &Features, DiagnosticsEngine &Diags, + StringRef CPU, + const std::vector &FeaturesVec) + const override; }; // x86-64 SPIR64 Windows Visual Studio target diff --git a/sycl/include/sycl/stl_wrappers/complex b/sycl/include/sycl/stl_wrappers/complex index 20155bc0803b0..614d5523d7dc8 100644 --- a/sycl/include/sycl/stl_wrappers/complex +++ b/sycl/include/sycl/stl_wrappers/complex @@ -18,24 +18,21 @@ // Provide __isa_available for MSVC device code BEFORE including STL headers. // Must come before #include_next so our definition is seen first. #if defined(__SYCL_DEVICE_ONLY__) && defined(_MSC_VER) -// VS2022+ STL headers use __isa_available (a runtime global variable) to detect -// CPU features. SYCL device code cannot access host runtime globals. Provide a -// definition that assumes AVX2+ features are available (value >= 5 enables FMA -// optimizations). Must be non-const to match STL's 'extern int' declaration. -// sycl_global_var attribute suppresses the "non-const global in device code" -// diagnostic. -extern "C" int __isa_available __attribute__((sycl_global_var)) = 10; -#endif // defined(__SYCL_DEVICE_ONLY__) && defined(_MSC_VER) - -// VS2026's annotates _Sqr_error_x86_x64_fma with -// [[__gnu__::__target__("fma")]]. clang/icx treats target(...) as REPLACING the -// function's target feature set (not augmenting it), so sse2 drops out and the -// function's _mm_set_sd / _mm_store_sd calls fail to compile on spir64. Augment -// the target string so sse2 (and sse) stay in the feature set. -#if defined(__SYCL_DEVICE_ONLY__) && defined(_MSC_VER) -#pragma push_macro("__target__") -#define __target__(x) __target__(x ",sse2,sse") +// VS2026 STL headers use __isa_available (a runtime global variable) to detect +// CPU features. SYCL device code cannot access host runtime globals, so +// provide a workaround. +// __isa_available is an integer "level" the MSVC STL compares against named +// constants in (e.g. __ISA_AVAILABLE_AVX2 == 5, +// __ISA_AVAILABLE_AVX512 == 6). We pick a value greater than any currently +// defined level so every feature gate in the STL evaluates "available." +#ifndef __SYCL_MSVC_ISA_AVAILABLE_DEVICE_STUB +#define __SYCL_MSVC_ISA_AVAILABLE_DEVICE_STUB 10 // > __ISA_AVAILABLE_AVX512 #endif +extern "C" int __isa_available + __attribute__((sycl_global_var)) + __attribute__((weak)) + = __SYCL_MSVC_ISA_AVAILABLE_DEVICE_STUB; +#endif // defined(__SYCL_DEVICE_ONLY__) && defined(_MSC_VER) // Include real STL header - the next one from the include search // directories. @@ -51,10 +48,6 @@ extern "C" int __isa_available __attribute__((sycl_global_var)) = 10; #include <../include/complex> #endif -#if defined(__SYCL_DEVICE_ONLY__) && defined(_MSC_VER) -#pragma pop_macro("__target__") -#endif - #if defined(__NVPTX__) || defined(__AMDGCN__) #include "__sycl_complex_impl.hpp" #endif From 2573f86a4408af97e03444c4eb4ca41582fc913b Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Mon, 11 May 2026 16:13:59 -0700 Subject: [PATCH 3/7] tighten up --- sycl/include/sycl/stl_wrappers/complex | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/sycl/include/sycl/stl_wrappers/complex b/sycl/include/sycl/stl_wrappers/complex index 614d5523d7dc8..4c8ca8b93fc54 100644 --- a/sycl/include/sycl/stl_wrappers/complex +++ b/sycl/include/sycl/stl_wrappers/complex @@ -18,20 +18,18 @@ // Provide __isa_available for MSVC device code BEFORE including STL headers. // Must come before #include_next so our definition is seen first. #if defined(__SYCL_DEVICE_ONLY__) && defined(_MSC_VER) -// VS2026 STL headers use __isa_available (a runtime global variable) to detect -// CPU features. SYCL device code cannot access host runtime globals, so -// provide a workaround. -// __isa_available is an integer "level" the MSVC STL compares against named -// constants in (e.g. __ISA_AVAILABLE_AVX2 == 5, -// __ISA_AVAILABLE_AVX512 == 6). We pick a value greater than any currently -// defined level so every feature gate in the STL evaluates "available." -#ifndef __SYCL_MSVC_ISA_AVAILABLE_DEVICE_STUB -#define __SYCL_MSVC_ISA_AVAILABLE_DEVICE_STUB 10 // > __ISA_AVAILABLE_AVX512 -#endif +// VS2026 STL headers use __isa_available (a runtime global variable) to +// detect CPU features: `if (__isa_available >= _Stl_isa_available_avx2) ...`. +// SYCL device code cannot access host runtime globals, so provide a device- +// side definition. The VALUE of this variable only steers the STL's runtime +// feature dispatch — both branches of the dispatch compile either way. We +// pick __ISA_AVAILABLE_X86 (== 0, the baseline in ), +// which matches a spir64 device's reality (no x86 ISA), and so selects the +// STL's scalar fallback paths if these dispatches are ever reached. extern "C" int __isa_available __attribute__((sycl_global_var)) __attribute__((weak)) - = __SYCL_MSVC_ISA_AVAILABLE_DEVICE_STUB; + = 0; #endif // defined(__SYCL_DEVICE_ONLY__) && defined(_MSC_VER) // Include real STL header - the next one from the include search From bc8bb8dc19528b9b76c464a63756e1c1b6cf32c6 Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Mon, 11 May 2026 16:27:27 -0700 Subject: [PATCH 4/7] more tight --- clang/lib/Basic/Targets/SPIR.cpp | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/clang/lib/Basic/Targets/SPIR.cpp b/clang/lib/Basic/Targets/SPIR.cpp index b81960560d8be..e527464a81515 100644 --- a/clang/lib/Basic/Targets/SPIR.cpp +++ b/clang/lib/Basic/Targets/SPIR.cpp @@ -87,14 +87,9 @@ void SPIR64TargetInfo::getTargetDefines(const LangOptions &Opts, bool WindowsX86_64_SPIR64TargetInfo::initFeatureMap( llvm::StringMap &Features, DiagnosticsEngine &Diags, StringRef CPU, const std::vector &FeaturesVec) const { - // When SYCL device code compiles MSVC STL headers, the headers take the - // x86 intrinsics path under _M_X64 (defined by MicrosoftX86_64_SPIR64- - // TargetInfo). Those intrinsics require sse/sse2 in the target feature set; - // without them, any function-level __target__ attribute that recomputes - // features (e.g. VS2026 's [[gnu::target("fma")]] on - // _Sqr_error_x86_x64_fma) strips the baseline and the function's sse2 - // intrinsic calls (_mm_set_sd, _mm_store_sd, ...) fail to compile. Mirror - // X86TargetInfo's "x86_64 always has SSE2" assumption. + // Mirror X86TargetInfo's "x86_64 always has SSE2" baseline: the matching + // _M_X64 macro makes MSVC STL headers take the x86 intrinsics path, whose + // _mm_* intrinsics require sse/sse2 in the target feature set. Features["sse"] = true; Features["sse2"] = true; return SPIR64TargetInfo::initFeatureMap(Features, Diags, CPU, FeaturesVec); From b209ccbc28fa2118dbb2c63f9bff474f3710ed17 Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Mon, 11 May 2026 17:15:30 -0700 Subject: [PATCH 5/7] test --- .../CodeGenSYCL/windows-msvc-spir64-sse2.cpp | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 clang/test/CodeGenSYCL/windows-msvc-spir64-sse2.cpp diff --git a/clang/test/CodeGenSYCL/windows-msvc-spir64-sse2.cpp b/clang/test/CodeGenSYCL/windows-msvc-spir64-sse2.cpp new file mode 100644 index 0000000000000..021b17a9cf904 --- /dev/null +++ b/clang/test/CodeGenSYCL/windows-msvc-spir64-sse2.cpp @@ -0,0 +1,20 @@ +// RUN: %clang_cc1 -triple spir64-unknown-unknown -aux-triple x86_64-pc-windows-msvc \ +// RUN: -fsycl-is-device -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s + +// When SYCL device code is compiled with a Windows-MSVC host, the device +// target (spir64) defines _M_X64 so that MSVC STL headers take the x86 +// intrinsics path. The device target feature set must correspondingly carry +// sse/sse2 so that function-level __target__ attributes (e.g. VS2026 +// 's [[gnu::target("fma")]] on _Sqr_error_x86_x64_fma) don't strip +// the baseline, which would break intrinsic calls like _mm_set_sd / _mm_store_sd. + +#include "Inputs/sycl.hpp" + +int main() { + sycl::queue q; + q.submit([&](sycl::handler &h) { h.single_task([=] {}); }); + return 0; +} + +// CHECK: spir_kernel void @{{.*}}TestK{{.*}}() [[ATTRS:#[0-9]+]] +// CHECK: attributes [[ATTRS]] = {{.*}}"target-features"="+sse,+sse2" From a5ebd4ec4e95a3e80e774e07301152eaad5283e7 Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Mon, 11 May 2026 17:36:35 -0700 Subject: [PATCH 6/7] clang-format to the rescue --- clang/lib/Basic/Targets/SPIR.h | 8 ++++---- sycl/include/sycl/stl_wrappers/complex | 6 ++---- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/clang/lib/Basic/Targets/SPIR.h b/clang/lib/Basic/Targets/SPIR.h index 8ab65aa56db29..8e6e3a0a1dd00 100644 --- a/clang/lib/Basic/Targets/SPIR.h +++ b/clang/lib/Basic/Targets/SPIR.h @@ -396,10 +396,10 @@ class LLVM_LIBRARY_VISIBILITY WindowsX86_64_SPIR64TargetInfo : CCCR_Warning; } - bool initFeatureMap(llvm::StringMap &Features, DiagnosticsEngine &Diags, - StringRef CPU, - const std::vector &FeaturesVec) - const override; + bool + initFeatureMap(llvm::StringMap &Features, DiagnosticsEngine &Diags, + StringRef CPU, + const std::vector &FeaturesVec) const override; }; // x86-64 SPIR64 Windows Visual Studio target diff --git a/sycl/include/sycl/stl_wrappers/complex b/sycl/include/sycl/stl_wrappers/complex index 4c8ca8b93fc54..c07c0c358153c 100644 --- a/sycl/include/sycl/stl_wrappers/complex +++ b/sycl/include/sycl/stl_wrappers/complex @@ -26,10 +26,8 @@ // pick __ISA_AVAILABLE_X86 (== 0, the baseline in ), // which matches a spir64 device's reality (no x86 ISA), and so selects the // STL's scalar fallback paths if these dispatches are ever reached. -extern "C" int __isa_available - __attribute__((sycl_global_var)) - __attribute__((weak)) - = 0; +extern "C" int __isa_available __attribute__((sycl_global_var)) +__attribute__((weak)) = 0; #endif // defined(__SYCL_DEVICE_ONLY__) && defined(_MSC_VER) // Include real STL header - the next one from the include search From 168383e9d58a1b154419893c51c58737f5f9359c Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Tue, 12 May 2026 11:46:03 -0700 Subject: [PATCH 7/7] update test --- .../CodeGenSYCL/kernel-caller-entry-point.cpp | 26 +++++++++++-------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/clang/test/CodeGenSYCL/kernel-caller-entry-point.cpp b/clang/test/CodeGenSYCL/kernel-caller-entry-point.cpp index a3c62ddbd4058..f6a4e799991b8 100644 --- a/clang/test/CodeGenSYCL/kernel-caller-entry-point.cpp +++ b/clang/test/CodeGenSYCL/kernel-caller-entry-point.cpp @@ -2,24 +2,24 @@ // RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-unknown-linux-gnu -triple amdgcn-amd-amdhsa -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-AMDGCN %s // RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-unknown-linux-gnu -triple nvptx-nvidia-cuda -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-NVPTX %s // RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-unknown-linux-gnu -triple nvptx64-nvidia-cuda -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-NVPTX %s -// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-unknown-linux-gnu -triple spir-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR,CHECK-SPIRNV %s -// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-unknown-linux-gnu -triple spir64-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR,CHECK-SPIRNV %s -// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-unknown-linux-gnu -triple spirv32-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR,CHECK-SPIRV %s -// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-unknown-linux-gnu -triple spirv64-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR,CHECK-SPIRV %s +// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-unknown-linux-gnu -triple spir-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR,CHECK-SPIRNV,CHECK-SPIR-NO-SSE2 %s +// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-unknown-linux-gnu -triple spir64-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR,CHECK-SPIRNV,CHECK-SPIR-NO-SSE2 %s +// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-unknown-linux-gnu -triple spirv32-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR,CHECK-SPIRV,CHECK-SPIR-NO-SSE2 %s +// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-unknown-linux-gnu -triple spirv64-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR,CHECK-SPIRV,CHECK-SPIR-NO-SSE2 %s // RUN: %clang_cc1 -fsycl-is-host -emit-llvm -triple x86_64-pc-windows-msvc -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-HOST,CHECK-HOST-WINDOWS %s // RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-pc-windows-msvc -triple amdgcn-amd-amdhsa -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-AMDGCN %s // RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-pc-windows-msvc -triple nvptx-nvidia-cuda -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-NVPTX %s // RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-pc-windows-msvc -triple nvptx64-nvidia-cuda -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-NVPTX %s -// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-pc-windows-msvc -triple spir64-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR,CHECK-SPIRNV %s -// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-pc-windows-msvc -triple spirv64-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR,CHECK-SPIRV %s +// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-pc-windows-msvc -triple spir64-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR,CHECK-SPIRNV,CHECK-SPIR-SSE2 %s +// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-pc-windows-msvc -triple spirv64-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR,CHECK-SPIRV,CHECK-SPIR-NO-SSE2 %s // RUN: %clang_cc1 -fsycl-is-host -emit-llvm -triple x86_64-uefi -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-HOST,CHECK-HOST-WINDOWS %s // RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-uefi -triple amdgcn-amd-amdhsa -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-AMDGCN %s // RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-uefi -triple nvptx-nvidia-cuda -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-NVPTX %s // RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-uefi -triple nvptx64-nvidia-cuda -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-NVPTX %s -// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-uefi -triple spir-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR,CHECK-SPIRNV %s -// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-uefi -triple spir64-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR,CHECK-SPIRNV %s -// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-uefi -triple spirv32-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR,CHECK-SPIRV %s -// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-uefi -triple spirv64-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR,CHECK-SPIRV %s +// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-uefi -triple spir-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR,CHECK-SPIRNV,CHECK-SPIR-NO-SSE2 %s +// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-uefi -triple spir64-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR,CHECK-SPIRNV,CHECK-SPIR-NO-SSE2 %s +// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-uefi -triple spirv32-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR,CHECK-SPIRV,CHECK-SPIR-NO-SSE2 %s +// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-uefi -triple spirv64-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR,CHECK-SPIRV,CHECK-SPIR-NO-SSE2 %s // Test code generation for functions declared with the sycl_kernel_entry_point // attribute. During host compilation, the bodies of such functions are replaced @@ -704,5 +704,9 @@ int main() { // CHECK-NVPTX: #[[NVPTX_ATTR0]] = { convergent mustprogress noinline norecurse nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-optlevel"="0" } // CHECK-NVPTX: #[[NVPTX_ATTR1]] = { convergent nounwind } // -// CHECK-SPIR: #[[SPIR_ATTR0]] = { convergent mustprogress noinline norecurse nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-optlevel"="0" } +// WindowsX86_64_SPIR64TargetInfo::initFeatureMap adds +sse/+sse2 to the +// device-target feature baseline; every other SPIR/SPIRV target class used by +// the RUN lines above leaves it empty. +// CHECK-SPIR-NO-SSE2: #[[SPIR_ATTR0]] = { convergent mustprogress noinline norecurse nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-optlevel"="0" } +// CHECK-SPIR-SSE2: #[[SPIR_ATTR0]] = { convergent mustprogress noinline norecurse nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-optlevel"="0" "target-features"="+sse,+sse2" } // CHECK-SPIR: #[[SPIR_ATTR1]] = { convergent nounwind }