From fe97f537b6357e6280190ce5faf69454c274f0dc Mon Sep 17 00:00:00 2001 From: Alex Sepkowski Date: Tue, 10 Mar 2026 17:20:19 -0700 Subject: [PATCH 01/34] Add min precision type infrastructure for long vector tests Add device support helper, wrapper types, input data sets, type registration, and validation for min16float, min16int, min16uint. - doesDeviceSupportMinPrecision() checks D3D12 MinPrecisionSupport - HLSLMin16Float_t/HLSLMin16Int_t/HLSLMin16Uint_t wrapper structs (32-bit storage, matching DXIL layout without -enable-16bit-types) - Input data constrained to 16-bit representable range - DATA_TYPE registrations and isFloatingPointType/isMinPrecisionType traits - doValuesMatch overloads: min16float compares in half-precision space (reuses CompareHalfULP/CompareHalfEpsilon), integers use exact match - TrigonometricValidation specializations matching HLSLHalf_t tolerances Part of: https://github.com/microsoft/DirectXShaderCompiler/issues/7780 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../unittests/HLSLExec/HlslExecTestUtils.cpp | 9 + .../unittests/HLSLExec/HlslExecTestUtils.h | 1 + .../unittests/HLSLExec/LongVectorTestData.h | 244 ++++++++++++++++++ .../clang/unittests/HLSLExec/LongVectors.cpp | 57 +++- 4 files changed, 310 insertions(+), 1 deletion(-) diff --git a/tools/clang/unittests/HLSLExec/HlslExecTestUtils.cpp b/tools/clang/unittests/HLSLExec/HlslExecTestUtils.cpp index b5b05f0d87..1f98dc4574 100644 --- a/tools/clang/unittests/HLSLExec/HlslExecTestUtils.cpp +++ b/tools/clang/unittests/HLSLExec/HlslExecTestUtils.cpp @@ -497,6 +497,15 @@ bool doesDeviceSupportDouble(ID3D12Device *pDevice) { return O.DoublePrecisionFloatShaderOps != FALSE; } +bool doesDeviceSupportMinPrecision(ID3D12Device *pDevice) { + D3D12_FEATURE_DATA_D3D12_OPTIONS O; + if (FAILED(pDevice->CheckFeatureSupport( + (D3D12_FEATURE)D3D12_FEATURE_D3D12_OPTIONS, &O, sizeof(O)))) + return false; + return (O.MinPrecisionSupport & D3D12_SHADER_MIN_PRECISION_SUPPORT_16_BIT) != + 0; +} + bool doesDeviceSupportWaveOps(ID3D12Device *pDevice) { D3D12_FEATURE_DATA_D3D12_OPTIONS1 O; if (FAILED(pDevice->CheckFeatureSupport( diff --git a/tools/clang/unittests/HLSLExec/HlslExecTestUtils.h b/tools/clang/unittests/HLSLExec/HlslExecTestUtils.h index a16be6a16c..17f1545029 100644 --- a/tools/clang/unittests/HLSLExec/HlslExecTestUtils.h +++ b/tools/clang/unittests/HLSLExec/HlslExecTestUtils.h @@ -55,6 +55,7 @@ void readHlslDataIntoNewStream(LPCWSTR RelativePath, IStream **Stream, bool doesDeviceSupportInt64(ID3D12Device *pDevice); bool doesDeviceSupportDouble(ID3D12Device *pDevice); +bool doesDeviceSupportMinPrecision(ID3D12Device *pDevice); bool doesDeviceSupportWaveOps(ID3D12Device *pDevice); bool doesDeviceSupportBarycentrics(ID3D12Device *pDevice); bool doesDeviceSupportNative16bitOps(ID3D12Device *pDevice); diff --git a/tools/clang/unittests/HLSLExec/LongVectorTestData.h b/tools/clang/unittests/HLSLExec/LongVectorTestData.h index 4126d861ac..ff88f243bf 100644 --- a/tools/clang/unittests/HLSLExec/LongVectorTestData.h +++ b/tools/clang/unittests/HLSLExec/LongVectorTestData.h @@ -237,6 +237,203 @@ struct HLSLHalf_t { DirectX::PackedVector::HALF Val = 0; }; +// Min precision wrapper types. Without -enable-16bit-types, min precision types +// are 32-bit in DXIL storage. These thin wrappers provide distinct C++ types +// that map to different HLSL type strings via DATA_TYPE. +struct HLSLMin16Float_t { + HLSLMin16Float_t() : Val(0.0f) {} + HLSLMin16Float_t(float F) : Val(F) {} + HLSLMin16Float_t(double D) : Val(static_cast(D)) {} + HLSLMin16Float_t(int I) : Val(static_cast(I)) {} + HLSLMin16Float_t(uint32_t U) : Val(static_cast(U)) {} + + operator float() const { return Val; } + operator double() const { return static_cast(Val); } + operator int32_t() const { return static_cast(Val); } + operator uint32_t() const { return static_cast(Val); } + operator bool() const { return Val != 0.0f; } + + bool operator==(const HLSLMin16Float_t &O) const { return Val == O.Val; } + bool operator!=(const HLSLMin16Float_t &O) const { return Val != O.Val; } + bool operator<(const HLSLMin16Float_t &O) const { return Val < O.Val; } + bool operator>(const HLSLMin16Float_t &O) const { return Val > O.Val; } + bool operator<=(const HLSLMin16Float_t &O) const { return Val <= O.Val; } + bool operator>=(const HLSLMin16Float_t &O) const { return Val >= O.Val; } + + HLSLMin16Float_t operator+(const HLSLMin16Float_t &O) const { + return HLSLMin16Float_t(Val + O.Val); + } + HLSLMin16Float_t operator-(const HLSLMin16Float_t &O) const { + return HLSLMin16Float_t(Val - O.Val); + } + HLSLMin16Float_t operator*(const HLSLMin16Float_t &O) const { + return HLSLMin16Float_t(Val * O.Val); + } + HLSLMin16Float_t operator/(const HLSLMin16Float_t &O) const { + return HLSLMin16Float_t(Val / O.Val); + } + HLSLMin16Float_t operator%(const HLSLMin16Float_t &O) const { + return HLSLMin16Float_t(std::fmod(Val, O.Val)); + } + + friend std::wostream &operator<<(std::wostream &Os, + const HLSLMin16Float_t &Obj) { + Os << Obj.Val; + return Os; + } + friend std::ostream &operator<<(std::ostream &Os, + const HLSLMin16Float_t &Obj) { + Os << Obj.Val; + return Os; + } + + float Val; +}; +static_assert(sizeof(HLSLMin16Float_t) == sizeof(float), + "HLSLMin16Float_t must be same size as float"); + +struct HLSLMin16Int_t { + HLSLMin16Int_t() : Val(0) {} + HLSLMin16Int_t(int32_t I) : Val(I) {} + HLSLMin16Int_t(int64_t I) : Val(static_cast(I)) {} + HLSLMin16Int_t(float F) : Val(static_cast(F)) {} + HLSLMin16Int_t(double D) : Val(static_cast(D)) {} + + operator int32_t() const { return Val; } + operator int64_t() const { return static_cast(Val); } + operator uint32_t() const { return static_cast(Val); } + operator float() const { return static_cast(Val); } + operator double() const { return static_cast(Val); } + operator bool() const { return Val != 0; } + + bool operator==(const HLSLMin16Int_t &O) const { return Val == O.Val; } + bool operator!=(const HLSLMin16Int_t &O) const { return Val != O.Val; } + bool operator<(const HLSLMin16Int_t &O) const { return Val < O.Val; } + bool operator>(const HLSLMin16Int_t &O) const { return Val > O.Val; } + bool operator<=(const HLSLMin16Int_t &O) const { return Val <= O.Val; } + bool operator>=(const HLSLMin16Int_t &O) const { return Val >= O.Val; } + + HLSLMin16Int_t operator+(const HLSLMin16Int_t &O) const { + return HLSLMin16Int_t(Val + O.Val); + } + HLSLMin16Int_t operator-(const HLSLMin16Int_t &O) const { + return HLSLMin16Int_t(Val - O.Val); + } + HLSLMin16Int_t operator*(const HLSLMin16Int_t &O) const { + return HLSLMin16Int_t(Val * O.Val); + } + HLSLMin16Int_t operator/(const HLSLMin16Int_t &O) const { + return HLSLMin16Int_t(Val / O.Val); + } + HLSLMin16Int_t operator%(const HLSLMin16Int_t &O) const { + return HLSLMin16Int_t(Val % O.Val); + } + HLSLMin16Int_t operator&(const HLSLMin16Int_t &O) const { + return HLSLMin16Int_t(Val & O.Val); + } + HLSLMin16Int_t operator|(const HLSLMin16Int_t &O) const { + return HLSLMin16Int_t(Val | O.Val); + } + HLSLMin16Int_t operator^(const HLSLMin16Int_t &O) const { + return HLSLMin16Int_t(Val ^ O.Val); + } + HLSLMin16Int_t operator<<(const HLSLMin16Int_t &O) const { + return HLSLMin16Int_t(Val << O.Val); + } + HLSLMin16Int_t operator>>(const HLSLMin16Int_t &O) const { + return HLSLMin16Int_t(Val >> O.Val); + } + HLSLMin16Int_t operator&&(const HLSLMin16Int_t &O) const { + return HLSLMin16Int_t(Val && O.Val); + } + HLSLMin16Int_t operator||(const HLSLMin16Int_t &O) const { + return HLSLMin16Int_t(Val || O.Val); + } + + friend std::wostream &operator<<(std::wostream &Os, + const HLSLMin16Int_t &Obj) { + Os << Obj.Val; + return Os; + } + friend std::ostream &operator<<(std::ostream &Os, const HLSLMin16Int_t &Obj) { + Os << Obj.Val; + return Os; + } + + int32_t Val; +}; +static_assert(sizeof(HLSLMin16Int_t) == sizeof(int32_t), + "HLSLMin16Int_t must be same size as int32_t"); + +struct HLSLMin16Uint_t { + HLSLMin16Uint_t() : Val(0) {} + HLSLMin16Uint_t(uint32_t U) : Val(U) {} + HLSLMin16Uint_t(uint64_t U) : Val(static_cast(U)) {} + HLSLMin16Uint_t(int32_t I) : Val(static_cast(I)) {} + HLSLMin16Uint_t(float F) : Val(static_cast(F)) {} + HLSLMin16Uint_t(double D) : Val(static_cast(D)) {} + + operator uint32_t() const { return Val; } + operator uint64_t() const { return static_cast(Val); } + operator int32_t() const { return static_cast(Val); } + operator float() const { return static_cast(Val); } + operator double() const { return static_cast(Val); } + operator bool() const { return Val != 0; } + + bool operator==(const HLSLMin16Uint_t &O) const { return Val == O.Val; } + bool operator!=(const HLSLMin16Uint_t &O) const { return Val != O.Val; } + bool operator<(const HLSLMin16Uint_t &O) const { return Val < O.Val; } + bool operator>(const HLSLMin16Uint_t &O) const { return Val > O.Val; } + bool operator<=(const HLSLMin16Uint_t &O) const { return Val <= O.Val; } + bool operator>=(const HLSLMin16Uint_t &O) const { return Val >= O.Val; } + + HLSLMin16Uint_t operator+(const HLSLMin16Uint_t &O) const { + return HLSLMin16Uint_t(Val + O.Val); + } + HLSLMin16Uint_t operator-(const HLSLMin16Uint_t &O) const { + return HLSLMin16Uint_t(Val - O.Val); + } + HLSLMin16Uint_t operator*(const HLSLMin16Uint_t &O) const { + return HLSLMin16Uint_t(Val * O.Val); + } + HLSLMin16Uint_t operator/(const HLSLMin16Uint_t &O) const { + return HLSLMin16Uint_t(Val / O.Val); + } + HLSLMin16Uint_t operator%(const HLSLMin16Uint_t &O) const { + return HLSLMin16Uint_t(Val % O.Val); + } + HLSLMin16Uint_t operator&(const HLSLMin16Uint_t &O) const { + return HLSLMin16Uint_t(Val & O.Val); + } + HLSLMin16Uint_t operator|(const HLSLMin16Uint_t &O) const { + return HLSLMin16Uint_t(Val | O.Val); + } + HLSLMin16Uint_t operator^(const HLSLMin16Uint_t &O) const { + return HLSLMin16Uint_t(Val ^ O.Val); + } + HLSLMin16Uint_t operator<<(const HLSLMin16Uint_t &O) const { + return HLSLMin16Uint_t(Val << O.Val); + } + HLSLMin16Uint_t operator>>(const HLSLMin16Uint_t &O) const { + return HLSLMin16Uint_t(Val >> O.Val); + } + + friend std::wostream &operator<<(std::wostream &Os, + const HLSLMin16Uint_t &Obj) { + Os << Obj.Val; + return Os; + } + friend std::ostream &operator<<(std::ostream &Os, + const HLSLMin16Uint_t &Obj) { + Os << Obj.Val; + return Os; + } + + uint32_t Val; +}; +static_assert(sizeof(HLSLMin16Uint_t) == sizeof(uint32_t), + "HLSLMin16Uint_t must be same size as uint32_t"); + enum class InputSet { #define INPUT_SET(SYMBOL) SYMBOL, #include "LongVectorOps.def" @@ -450,6 +647,53 @@ INPUT_SET(InputSet::SelectCond, 0.0, 1.0); INPUT_SET(InputSet::AllOnes, 1.0); END_INPUT_SETS() +// Min precision input sets. Values are within the fp16 representable range. +// No FP specials (INF/NaN/denorm) per issue #7780. +BEGIN_INPUT_SETS(HLSLMin16Float_t) +INPUT_SET(InputSet::Default1, -1.0f, -1.0f, 1.0f, -0.01f, 1.0f, -0.01f, 1.0f, + -0.01f, 1.0f, -0.01f); +INPUT_SET(InputSet::Default2, 1.0f, -1.0f, 1.0f, -1.0f, 1.0f, -1.0f, 1.0f, + -1.0f, 1.0f, -1.0f); +INPUT_SET(InputSet::Default3, -1.0f, 1.0f, -1.0f, 1.0f, -1.0f, 1.0f, -1.0f, + 1.0f, -1.0f, 1.0f); +INPUT_SET(InputSet::Zero, 0.0f); +INPUT_SET(InputSet::RangeHalfPi, -1.073f, 0.044f, -1.047f, 0.313f, 1.447f, + -0.865f, 1.364f, -0.715f, -0.800f, 0.541f); +INPUT_SET(InputSet::RangeOne, 0.331f, 0.727f, -0.957f, 0.677f, -0.025f, 0.495f, + 0.855f, -0.673f, -0.678f, -0.905f); +INPUT_SET(InputSet::Positive, 1.0f, 1.0f, 342.0f, 0.01f, 5531.0f, 0.01f, 1.0f, + 0.01f, 331.233f, 3250.01f); +INPUT_SET(InputSet::SelectCond, 0.0f, 1.0f); +INPUT_SET(InputSet::AllOnes, 1.0f); +END_INPUT_SETS() + +// Values constrained to int16 range. Kept small to avoid overflow ambiguity. +BEGIN_INPUT_SETS(HLSLMin16Int_t) +INPUT_SET(InputSet::Default1, -6, 1, 7, 3, 8, 4, -3, 8, 8, -2); +INPUT_SET(InputSet::Default2, 5, -6, -3, -2, 9, 3, 1, -3, -7, 2); +INPUT_SET(InputSet::Default3, -5, 6, 3, 2, -9, -3, -1, 3, 7, -2); +INPUT_SET(InputSet::BitShiftRhs, 1, 6, 3, 0, 9, 3, 12, 13, 14, 15); +INPUT_SET(InputSet::Zero, 0); +INPUT_SET(InputSet::NoZero, 1); +INPUT_SET(InputSet::SelectCond, 0, 1); +INPUT_SET(InputSet::AllOnes, 1); +INPUT_SET(InputSet::WaveMultiPrefixBitwise, 0x0, 0x1, 0x3, 0x4, 0x10, 0x12, 0xF, + -1); +END_INPUT_SETS() + +// Values constrained to uint16 range. Kept small to avoid overflow ambiguity. +BEGIN_INPUT_SETS(HLSLMin16Uint_t) +INPUT_SET(InputSet::Default1, 1, 699, 3, 1023, 5, 6, 0, 8, 9, 10); +INPUT_SET(InputSet::Default2, 2, 111, 3, 4, 5, 9, 21, 8, 9, 10); +INPUT_SET(InputSet::Default3, 4, 112, 4, 5, 3, 7, 21, 1, 11, 9); +INPUT_SET(InputSet::Zero, 0); +INPUT_SET(InputSet::BitShiftRhs, 1, 6, 3, 0, 9, 3, 12, 13, 14, 15); +INPUT_SET(InputSet::SelectCond, 0, 1); +INPUT_SET(InputSet::AllOnes, 1); +INPUT_SET(InputSet::WaveMultiPrefixBitwise, 0x0, 0x1, 0x3, 0x4, 0x10, 0x12, 0xF, + std::numeric_limits::max()); +END_INPUT_SETS() + #undef BEGIN_INPUT_SETS #undef INPUT_SET #undef END_INPUT_SETS diff --git a/tools/clang/unittests/HLSLExec/LongVectors.cpp b/tools/clang/unittests/HLSLExec/LongVectors.cpp index dbb8a8d672..66ecb2bd57 100644 --- a/tools/clang/unittests/HLSLExec/LongVectors.cpp +++ b/tools/clang/unittests/HLSLExec/LongVectors.cpp @@ -59,6 +59,9 @@ DATA_TYPE(uint16_t, "uint16_t", 2) DATA_TYPE(uint32_t, "uint32_t", 4) DATA_TYPE(uint64_t, "uint64_t", 8) DATA_TYPE(HLSLHalf_t, "half", 2) +DATA_TYPE(HLSLMin16Float_t, "min16float", 4) +DATA_TYPE(HLSLMin16Int_t, "min16int", 4) +DATA_TYPE(HLSLMin16Uint_t, "min16uint", 4) DATA_TYPE(float, "float", 4) DATA_TYPE(double, "double", 8) @@ -66,7 +69,13 @@ DATA_TYPE(double, "double", 8) template constexpr bool isFloatingPointType() { return std::is_same_v || std::is_same_v || - std::is_same_v; + std::is_same_v || std::is_same_v; +} + +template constexpr bool isMinPrecisionType() { + return std::is_same_v || + std::is_same_v || + std::is_same_v; } // @@ -218,6 +227,34 @@ bool doValuesMatch(HLSLHalf_t A, HLSLHalf_t B, double Tolerance, } } +// Min precision float comparison: convert to half and compare in fp16 space. +// This reuses the same tolerance values as HLSLHalf_t. Min precision is at +// least 16-bit, so fp16 tolerances are an upper bound for all cases. +bool doValuesMatch(HLSLMin16Float_t A, HLSLMin16Float_t B, double Tolerance, + ValidationType ValidationType) { + auto HalfA = DirectX::PackedVector::XMConvertFloatToHalf(A.Val); + auto HalfB = DirectX::PackedVector::XMConvertFloatToHalf(B.Val); + switch (ValidationType) { + case ValidationType::Epsilon: + return CompareHalfEpsilon(HalfA, HalfB, static_cast(Tolerance)); + case ValidationType::Ulp: + return CompareHalfULP(HalfA, HalfB, static_cast(Tolerance)); + default: + hlsl_test::LogErrorFmt( + L"Invalid ValidationType. Expecting Epsilon or ULP."); + return false; + } +} + +bool doValuesMatch(HLSLMin16Int_t A, HLSLMin16Int_t B, double, ValidationType) { + return A == B; +} + +bool doValuesMatch(HLSLMin16Uint_t A, HLSLMin16Uint_t B, double, + ValidationType) { + return A == B; +} + bool doValuesMatch(float A, float B, double Tolerance, ValidationType ValidationType) { switch (ValidationType) { @@ -917,6 +954,24 @@ template <> struct TrigonometricValidation { ValidationConfig ValidationConfig = ValidationConfig::Ulp(2.0f); }; +// Min precision trig tolerances: same as half precision since min precision +// is at least 16-bit and our doValuesMatch compares in half-precision space. +template struct TrigonometricValidation { + ValidationConfig ValidationConfig = ValidationConfig::Epsilon(0.003f); +}; + +template <> struct TrigonometricValidation { + ValidationConfig ValidationConfig = ValidationConfig::Ulp(2.0f); +}; + +template <> struct TrigonometricValidation { + ValidationConfig ValidationConfig = ValidationConfig::Ulp(2.0f); +}; + +template <> struct TrigonometricValidation { + ValidationConfig ValidationConfig = ValidationConfig::Ulp(2.0f); +}; + #define TRIG_OP(OP, IMPL) \ template struct Op : TrigonometricValidation { \ T operator()(T A) { return IMPL; } \ From 4172cd810af70a6394a7735934e9610a8edf1495 Mon Sep 17 00:00:00 2001 From: Alex Sepkowski Date: Tue, 10 Mar 2026 17:22:03 -0700 Subject: [PATCH 02/34] Add min precision test class and test entries for long vectors Add DxilConf_SM69_Vectorized_MinPrecision test class with HLK_TEST_MINP and HLK_WAVEOP_TEST_MINP macros. Mirrors 16-bit counterpart coverage (HLSLHalf_t/int16_t/uint16_t) minus documented exclusions. - New test class with Kits.Specification = Device.Graphics.D3D12.DXILCore.ShaderModel69.MinPrecision - setupClass skips when device lacks min precision support - ~160 test entries across 3 types (min16float/min16int/min16uint) - MakeDifferent overloads in ShaderOpArith.xml (not gated by __HLSL_ENABLE_16_BIT since min precision is always available) - Excluded: FP specials, AsType, Cast, bit-manipulation ops Part of: https://github.com/microsoft/DirectXShaderCompiler/issues/7780 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../unittests/HLSLExec/LongVectorTestData.h | 3 + .../clang/unittests/HLSLExec/LongVectors.cpp | 324 ++++++++++++++++++ .../unittests/HLSLExec/ShaderOpArith.xml | 4 + 3 files changed, 331 insertions(+) diff --git a/tools/clang/unittests/HLSLExec/LongVectorTestData.h b/tools/clang/unittests/HLSLExec/LongVectorTestData.h index ff88f243bf..c90b45f744 100644 --- a/tools/clang/unittests/HLSLExec/LongVectorTestData.h +++ b/tools/clang/unittests/HLSLExec/LongVectorTestData.h @@ -418,6 +418,9 @@ struct HLSLMin16Uint_t { return HLSLMin16Uint_t(Val >> O.Val); } + bool operator&&(const HLSLMin16Uint_t &O) const { return Val && O.Val; } + bool operator||(const HLSLMin16Uint_t &O) const { return Val || O.Val; } + friend std::wostream &operator<<(std::wostream &Os, const HLSLMin16Uint_t &Obj) { Os << Obj.Val; diff --git a/tools/clang/unittests/HLSLExec/LongVectors.cpp b/tools/clang/unittests/HLSLExec/LongVectors.cpp index 66ecb2bd57..15551cb3b6 100644 --- a/tools/clang/unittests/HLSLExec/LongVectors.cpp +++ b/tools/clang/unittests/HLSLExec/LongVectors.cpp @@ -2919,3 +2919,327 @@ class DxilConf_SM69_Vectorized_Double : public TestClassCommon { HLK_WAVEOP_TEST_DOUBLE(WaveMultiPrefixProduct, double); HLK_WAVEOP_TEST_DOUBLE(WaveMatch, double); }; + +#define HLK_TEST_MINP(Op, DataType) \ + TEST_METHOD(Op##_##DataType) { \ + BEGIN_TEST_METHOD_PROPERTIES() \ + TEST_METHOD_PROPERTY( \ + "Kits.Specification", \ + "Device.Graphics.D3D12.DXILCore.ShaderModel69.MinPrecision") \ + END_TEST_METHOD_PROPERTIES() \ + runTest(); \ + } + +#define HLK_WAVEOP_TEST_MINP(Op, DataType) \ + TEST_METHOD(Op##_##DataType) { \ + BEGIN_TEST_METHOD_PROPERTIES() \ + TEST_METHOD_PROPERTY( \ + "Kits.Specification", \ + "Device.Graphics.D3D12.DXILCore.ShaderModel69.MinPrecision") \ + END_TEST_METHOD_PROPERTIES() \ + runWaveOpTest(); \ + } + +class DxilConf_SM69_Vectorized_MinPrecision : public TestClassCommon { +public: + BEGIN_TEST_CLASS(DxilConf_SM69_Vectorized_MinPrecision) + TEST_CLASS_PROPERTY( + "Kits.TestName", + "D3D12 - Shader Model 6.9 - Vectorized DXIL - Min Precision Tests") + TEST_CLASS_PROPERTY("Kits.TestId", "b4e2c8a1-3f7d-4a9e-8b5c-1d6e0f2a3b4c") + TEST_CLASS_PROPERTY( + "Kits.Description", + "Validates required min precision SM 6.9 vectorized DXIL operations") + TEST_METHOD_PROPERTY( + "Kits.Specification", + "Device.Graphics.D3D12.DXILCore.ShaderModel69.MinPrecision") + TEST_METHOD_PROPERTY(L"Priority", L"0") + END_TEST_CLASS() + + TEST_CLASS_SETUP(setupClass) { + const bool result = TestClassCommon::setupClass(); +#ifndef _HLK_CONF + if (result && !doesDeviceSupportMinPrecision(D3DDevice)) { + WEX::Logging::Log::Comment( + L"Skipping test as device does not support min precision."); + WEX::Logging::Log::Result(WEX::Logging::TestResults::Skipped); + return false; + } +#endif + return result; + } + + TEST_METHOD_SETUP(setupMethod) { return TestClassCommon::setupMethod(); } + + // ---- HLSLMin16Float_t (mirrors HLSLHalf_t) ---- + + // TernaryMath + HLK_TEST_MINP(Mad, HLSLMin16Float_t); + + // BinaryMath + HLK_TEST_MINP(Add, HLSLMin16Float_t); + HLK_TEST_MINP(Subtract, HLSLMin16Float_t); + HLK_TEST_MINP(Multiply, HLSLMin16Float_t); + HLK_TEST_MINP(Divide, HLSLMin16Float_t); + HLK_TEST_MINP(Modulus, HLSLMin16Float_t); + HLK_TEST_MINP(Min, HLSLMin16Float_t); + HLK_TEST_MINP(Max, HLSLMin16Float_t); + HLK_TEST_MINP(Ldexp, HLSLMin16Float_t); + + // Saturate + HLK_TEST_MINP(Saturate, HLSLMin16Float_t); + + // Unary + HLK_TEST_MINP(Initialize, HLSLMin16Float_t); + HLK_TEST_MINP(ArrayOperator_StaticAccess, HLSLMin16Float_t); + HLK_TEST_MINP(ArrayOperator_DynamicAccess, HLSLMin16Float_t); + HLK_TEST_MINP(ShuffleVector, HLSLMin16Float_t); + + // Trigonometric + HLK_TEST_MINP(Acos, HLSLMin16Float_t); + HLK_TEST_MINP(Asin, HLSLMin16Float_t); + HLK_TEST_MINP(Atan, HLSLMin16Float_t); + HLK_TEST_MINP(Cos, HLSLMin16Float_t); + HLK_TEST_MINP(Cosh, HLSLMin16Float_t); + HLK_TEST_MINP(Sin, HLSLMin16Float_t); + HLK_TEST_MINP(Sinh, HLSLMin16Float_t); + HLK_TEST_MINP(Tan, HLSLMin16Float_t); + HLK_TEST_MINP(Tanh, HLSLMin16Float_t); + + // UnaryMath + HLK_TEST_MINP(Abs, HLSLMin16Float_t); + HLK_TEST_MINP(Ceil, HLSLMin16Float_t); + HLK_TEST_MINP(Exp, HLSLMin16Float_t); + HLK_TEST_MINP(Floor, HLSLMin16Float_t); + HLK_TEST_MINP(Frac, HLSLMin16Float_t); + HLK_TEST_MINP(Log, HLSLMin16Float_t); + HLK_TEST_MINP(Rcp, HLSLMin16Float_t); + HLK_TEST_MINP(Round, HLSLMin16Float_t); + HLK_TEST_MINP(Rsqrt, HLSLMin16Float_t); + HLK_TEST_MINP(Sign, HLSLMin16Float_t); + HLK_TEST_MINP(Sqrt, HLSLMin16Float_t); + HLK_TEST_MINP(Trunc, HLSLMin16Float_t); + HLK_TEST_MINP(Exp2, HLSLMin16Float_t); + HLK_TEST_MINP(Log10, HLSLMin16Float_t); + HLK_TEST_MINP(Log2, HLSLMin16Float_t); + + // BinaryComparison + HLK_TEST_MINP(LessThan, HLSLMin16Float_t); + HLK_TEST_MINP(LessEqual, HLSLMin16Float_t); + HLK_TEST_MINP(GreaterThan, HLSLMin16Float_t); + HLK_TEST_MINP(GreaterEqual, HLSLMin16Float_t); + HLK_TEST_MINP(Equal, HLSLMin16Float_t); + HLK_TEST_MINP(NotEqual, HLSLMin16Float_t); + + // Select + HLK_TEST_MINP(Select, HLSLMin16Float_t); + + // Dot + HLK_TEST_MINP(Dot, HLSLMin16Float_t); + + // LoadAndStore + HLK_TEST_MINP(LoadAndStore_RDH_BAB_SRV, HLSLMin16Float_t); + HLK_TEST_MINP(LoadAndStore_RDH_BAB_UAV, HLSLMin16Float_t); + HLK_TEST_MINP(LoadAndStore_DT_BAB_SRV, HLSLMin16Float_t); + HLK_TEST_MINP(LoadAndStore_DT_BAB_UAV, HLSLMin16Float_t); + HLK_TEST_MINP(LoadAndStore_RD_BAB_SRV, HLSLMin16Float_t); + HLK_TEST_MINP(LoadAndStore_RD_BAB_UAV, HLSLMin16Float_t); + HLK_TEST_MINP(LoadAndStore_RDH_SB_SRV, HLSLMin16Float_t); + HLK_TEST_MINP(LoadAndStore_RDH_SB_UAV, HLSLMin16Float_t); + HLK_TEST_MINP(LoadAndStore_DT_SB_SRV, HLSLMin16Float_t); + HLK_TEST_MINP(LoadAndStore_DT_SB_UAV, HLSLMin16Float_t); + HLK_TEST_MINP(LoadAndStore_RD_SB_SRV, HLSLMin16Float_t); + HLK_TEST_MINP(LoadAndStore_RD_SB_UAV, HLSLMin16Float_t); + + // Derivative + HLK_TEST_MINP(DerivativeDdx, HLSLMin16Float_t); + HLK_TEST_MINP(DerivativeDdy, HLSLMin16Float_t); + HLK_TEST_MINP(DerivativeDdxFine, HLSLMin16Float_t); + HLK_TEST_MINP(DerivativeDdyFine, HLSLMin16Float_t); + + // Quad + HLK_TEST_MINP(QuadReadLaneAt, HLSLMin16Float_t); + HLK_TEST_MINP(QuadReadAcrossX, HLSLMin16Float_t); + HLK_TEST_MINP(QuadReadAcrossY, HLSLMin16Float_t); + HLK_TEST_MINP(QuadReadAcrossDiagonal, HLSLMin16Float_t); + + // Wave + HLK_WAVEOP_TEST_MINP(WaveActiveSum, HLSLMin16Float_t); + HLK_WAVEOP_TEST_MINP(WaveActiveMin, HLSLMin16Float_t); + HLK_WAVEOP_TEST_MINP(WaveActiveMax, HLSLMin16Float_t); + HLK_WAVEOP_TEST_MINP(WaveActiveProduct, HLSLMin16Float_t); + HLK_WAVEOP_TEST_MINP(WaveActiveAllEqual, HLSLMin16Float_t); + HLK_WAVEOP_TEST_MINP(WaveReadLaneAt, HLSLMin16Float_t); + HLK_WAVEOP_TEST_MINP(WaveReadLaneFirst, HLSLMin16Float_t); + HLK_WAVEOP_TEST_MINP(WavePrefixSum, HLSLMin16Float_t); + HLK_WAVEOP_TEST_MINP(WavePrefixProduct, HLSLMin16Float_t); + HLK_WAVEOP_TEST_MINP(WaveMultiPrefixSum, HLSLMin16Float_t); + HLK_WAVEOP_TEST_MINP(WaveMultiPrefixProduct, HLSLMin16Float_t); + HLK_WAVEOP_TEST_MINP(WaveMatch, HLSLMin16Float_t); + + // ---- HLSLMin16Int_t (mirrors int16_t) ---- + + // TernaryMath + HLK_TEST_MINP(Mad, HLSLMin16Int_t); + + // BinaryMath + HLK_TEST_MINP(Add, HLSLMin16Int_t); + HLK_TEST_MINP(Subtract, HLSLMin16Int_t); + HLK_TEST_MINP(Multiply, HLSLMin16Int_t); + HLK_TEST_MINP(Divide, HLSLMin16Int_t); + HLK_TEST_MINP(Modulus, HLSLMin16Int_t); + HLK_TEST_MINP(Min, HLSLMin16Int_t); + HLK_TEST_MINP(Max, HLSLMin16Int_t); + + // Bitwise (logical and shift — bit-manipulation excluded) + HLK_TEST_MINP(And, HLSLMin16Int_t); + HLK_TEST_MINP(Or, HLSLMin16Int_t); + HLK_TEST_MINP(Xor, HLSLMin16Int_t); + HLK_TEST_MINP(LeftShift, HLSLMin16Int_t); + HLK_TEST_MINP(RightShift, HLSLMin16Int_t); + + // UnaryMath + HLK_TEST_MINP(Abs, HLSLMin16Int_t); + HLK_TEST_MINP(Sign, HLSLMin16Int_t); + + // Unary + HLK_TEST_MINP(Initialize, HLSLMin16Int_t); + HLK_TEST_MINP(ArrayOperator_StaticAccess, HLSLMin16Int_t); + HLK_TEST_MINP(ArrayOperator_DynamicAccess, HLSLMin16Int_t); + HLK_TEST_MINP(ShuffleVector, HLSLMin16Int_t); + + // BinaryComparison + HLK_TEST_MINP(LessThan, HLSLMin16Int_t); + HLK_TEST_MINP(LessEqual, HLSLMin16Int_t); + HLK_TEST_MINP(GreaterThan, HLSLMin16Int_t); + HLK_TEST_MINP(GreaterEqual, HLSLMin16Int_t); + HLK_TEST_MINP(Equal, HLSLMin16Int_t); + HLK_TEST_MINP(NotEqual, HLSLMin16Int_t); + + // Select + HLK_TEST_MINP(Select, HLSLMin16Int_t); + + // Reduction + HLK_TEST_MINP(Any_Mixed, HLSLMin16Int_t); + HLK_TEST_MINP(Any_Zero, HLSLMin16Int_t); + HLK_TEST_MINP(Any_NoZero, HLSLMin16Int_t); + HLK_TEST_MINP(All_Mixed, HLSLMin16Int_t); + HLK_TEST_MINP(All_Zero, HLSLMin16Int_t); + HLK_TEST_MINP(All_NoZero, HLSLMin16Int_t); + + // LoadAndStore + HLK_TEST_MINP(LoadAndStore_RDH_BAB_SRV, HLSLMin16Int_t); + HLK_TEST_MINP(LoadAndStore_RDH_BAB_UAV, HLSLMin16Int_t); + HLK_TEST_MINP(LoadAndStore_DT_BAB_SRV, HLSLMin16Int_t); + HLK_TEST_MINP(LoadAndStore_DT_BAB_UAV, HLSLMin16Int_t); + HLK_TEST_MINP(LoadAndStore_RD_BAB_SRV, HLSLMin16Int_t); + HLK_TEST_MINP(LoadAndStore_RD_BAB_UAV, HLSLMin16Int_t); + HLK_TEST_MINP(LoadAndStore_RDH_SB_SRV, HLSLMin16Int_t); + HLK_TEST_MINP(LoadAndStore_RDH_SB_UAV, HLSLMin16Int_t); + HLK_TEST_MINP(LoadAndStore_DT_SB_SRV, HLSLMin16Int_t); + HLK_TEST_MINP(LoadAndStore_DT_SB_UAV, HLSLMin16Int_t); + HLK_TEST_MINP(LoadAndStore_RD_SB_SRV, HLSLMin16Int_t); + HLK_TEST_MINP(LoadAndStore_RD_SB_UAV, HLSLMin16Int_t); + + // Quad + HLK_TEST_MINP(QuadReadLaneAt, HLSLMin16Int_t); + HLK_TEST_MINP(QuadReadAcrossX, HLSLMin16Int_t); + HLK_TEST_MINP(QuadReadAcrossY, HLSLMin16Int_t); + HLK_TEST_MINP(QuadReadAcrossDiagonal, HLSLMin16Int_t); + + // Wave + HLK_WAVEOP_TEST_MINP(WaveActiveSum, HLSLMin16Int_t); + HLK_WAVEOP_TEST_MINP(WaveActiveMin, HLSLMin16Int_t); + HLK_WAVEOP_TEST_MINP(WaveActiveMax, HLSLMin16Int_t); + HLK_WAVEOP_TEST_MINP(WaveActiveProduct, HLSLMin16Int_t); + HLK_WAVEOP_TEST_MINP(WaveActiveAllEqual, HLSLMin16Int_t); + HLK_WAVEOP_TEST_MINP(WaveReadLaneAt, HLSLMin16Int_t); + HLK_WAVEOP_TEST_MINP(WaveReadLaneFirst, HLSLMin16Int_t); + HLK_WAVEOP_TEST_MINP(WavePrefixSum, HLSLMin16Int_t); + HLK_WAVEOP_TEST_MINP(WavePrefixProduct, HLSLMin16Int_t); + HLK_WAVEOP_TEST_MINP(WaveMultiPrefixSum, HLSLMin16Int_t); + HLK_WAVEOP_TEST_MINP(WaveMultiPrefixProduct, HLSLMin16Int_t); + HLK_WAVEOP_TEST_MINP(WaveMultiPrefixBitAnd, HLSLMin16Int_t); + HLK_WAVEOP_TEST_MINP(WaveMultiPrefixBitOr, HLSLMin16Int_t); + HLK_WAVEOP_TEST_MINP(WaveMultiPrefixBitXor, HLSLMin16Int_t); + HLK_WAVEOP_TEST_MINP(WaveMatch, HLSLMin16Int_t); + + // ---- HLSLMin16Uint_t (mirrors uint16_t) ---- + + // TernaryMath + HLK_TEST_MINP(Mad, HLSLMin16Uint_t); + + // BinaryMath + HLK_TEST_MINP(Add, HLSLMin16Uint_t); + HLK_TEST_MINP(Subtract, HLSLMin16Uint_t); + HLK_TEST_MINP(Multiply, HLSLMin16Uint_t); + HLK_TEST_MINP(Divide, HLSLMin16Uint_t); + HLK_TEST_MINP(Modulus, HLSLMin16Uint_t); + HLK_TEST_MINP(Min, HLSLMin16Uint_t); + HLK_TEST_MINP(Max, HLSLMin16Uint_t); + + // Bitwise (logical and shift — bit-manipulation excluded) + HLK_TEST_MINP(And, HLSLMin16Uint_t); + HLK_TEST_MINP(Or, HLSLMin16Uint_t); + HLK_TEST_MINP(Xor, HLSLMin16Uint_t); + HLK_TEST_MINP(LeftShift, HLSLMin16Uint_t); + HLK_TEST_MINP(RightShift, HLSLMin16Uint_t); + + // UnaryMath + HLK_TEST_MINP(Abs, HLSLMin16Uint_t); + HLK_TEST_MINP(Sign, HLSLMin16Uint_t); + + // Unary + HLK_TEST_MINP(Initialize, HLSLMin16Uint_t); + HLK_TEST_MINP(ArrayOperator_StaticAccess, HLSLMin16Uint_t); + HLK_TEST_MINP(ArrayOperator_DynamicAccess, HLSLMin16Uint_t); + HLK_TEST_MINP(ShuffleVector, HLSLMin16Uint_t); + + // BinaryComparison + HLK_TEST_MINP(LessThan, HLSLMin16Uint_t); + HLK_TEST_MINP(LessEqual, HLSLMin16Uint_t); + HLK_TEST_MINP(GreaterThan, HLSLMin16Uint_t); + HLK_TEST_MINP(GreaterEqual, HLSLMin16Uint_t); + HLK_TEST_MINP(Equal, HLSLMin16Uint_t); + HLK_TEST_MINP(NotEqual, HLSLMin16Uint_t); + + // Select + HLK_TEST_MINP(Select, HLSLMin16Uint_t); + + // LoadAndStore + HLK_TEST_MINP(LoadAndStore_RDH_BAB_SRV, HLSLMin16Uint_t); + HLK_TEST_MINP(LoadAndStore_RDH_BAB_UAV, HLSLMin16Uint_t); + HLK_TEST_MINP(LoadAndStore_DT_BAB_SRV, HLSLMin16Uint_t); + HLK_TEST_MINP(LoadAndStore_DT_BAB_UAV, HLSLMin16Uint_t); + HLK_TEST_MINP(LoadAndStore_RD_BAB_SRV, HLSLMin16Uint_t); + HLK_TEST_MINP(LoadAndStore_RD_BAB_UAV, HLSLMin16Uint_t); + HLK_TEST_MINP(LoadAndStore_RDH_SB_SRV, HLSLMin16Uint_t); + HLK_TEST_MINP(LoadAndStore_RDH_SB_UAV, HLSLMin16Uint_t); + HLK_TEST_MINP(LoadAndStore_DT_SB_SRV, HLSLMin16Uint_t); + HLK_TEST_MINP(LoadAndStore_DT_SB_UAV, HLSLMin16Uint_t); + HLK_TEST_MINP(LoadAndStore_RD_SB_SRV, HLSLMin16Uint_t); + HLK_TEST_MINP(LoadAndStore_RD_SB_UAV, HLSLMin16Uint_t); + + // Quad + HLK_TEST_MINP(QuadReadLaneAt, HLSLMin16Uint_t); + HLK_TEST_MINP(QuadReadAcrossX, HLSLMin16Uint_t); + HLK_TEST_MINP(QuadReadAcrossY, HLSLMin16Uint_t); + HLK_TEST_MINP(QuadReadAcrossDiagonal, HLSLMin16Uint_t); + + // Wave + HLK_WAVEOP_TEST_MINP(WaveActiveSum, HLSLMin16Uint_t); + HLK_WAVEOP_TEST_MINP(WaveActiveMin, HLSLMin16Uint_t); + HLK_WAVEOP_TEST_MINP(WaveActiveMax, HLSLMin16Uint_t); + HLK_WAVEOP_TEST_MINP(WaveActiveProduct, HLSLMin16Uint_t); + HLK_WAVEOP_TEST_MINP(WaveActiveAllEqual, HLSLMin16Uint_t); + HLK_WAVEOP_TEST_MINP(WaveReadLaneAt, HLSLMin16Uint_t); + HLK_WAVEOP_TEST_MINP(WaveReadLaneFirst, HLSLMin16Uint_t); + HLK_WAVEOP_TEST_MINP(WavePrefixSum, HLSLMin16Uint_t); + HLK_WAVEOP_TEST_MINP(WavePrefixProduct, HLSLMin16Uint_t); + HLK_WAVEOP_TEST_MINP(WaveMultiPrefixSum, HLSLMin16Uint_t); + HLK_WAVEOP_TEST_MINP(WaveMultiPrefixProduct, HLSLMin16Uint_t); + HLK_WAVEOP_TEST_MINP(WaveMultiPrefixBitAnd, HLSLMin16Uint_t); + HLK_WAVEOP_TEST_MINP(WaveMultiPrefixBitOr, HLSLMin16Uint_t); + HLK_WAVEOP_TEST_MINP(WaveMultiPrefixBitXor, HLSLMin16Uint_t); + HLK_WAVEOP_TEST_MINP(WaveMatch, HLSLMin16Uint_t); +}; diff --git a/tools/clang/unittests/HLSLExec/ShaderOpArith.xml b/tools/clang/unittests/HLSLExec/ShaderOpArith.xml index 3e8dd1ee6e..319d468a43 100644 --- a/tools/clang/unittests/HLSLExec/ShaderOpArith.xml +++ b/tools/clang/unittests/HLSLExec/ShaderOpArith.xml @@ -4218,6 +4218,10 @@ void MSMain(uint GID : SV_GroupIndex, float MakeDifferent(float A) { return A + 1.0f; } double MakeDifferent(double A) { return A + 1.0; } + min16float MakeDifferent(min16float A) { return A + (min16float)1.0f; } + min16int MakeDifferent(min16int A) { return A ^ 1; } + min16uint MakeDifferent(min16uint A) { return A ^ 1; } + #if __HLSL_ENABLE_16_BIT uint16_t MakeDifferent(uint16_t A) { return A ^ 1; } int16_t MakeDifferent(int16_t A) { return A ^ 1; } From 54731ffe662d0a581e6f876d307e63cfc0db6d24 Mon Sep 17 00:00:00 2001 From: Alex Sepkowski Date: Tue, 10 Mar 2026 17:36:04 -0700 Subject: [PATCH 03/34] Fix ambiguous operator overload in min precision wrapper types Make all but one conversion operator explicit per wrapper type to avoid C2666 ambiguity with built-in arithmetic operators. Matches HLSLHalf_t pattern: one implicit conversion to the natural type (float/int32_t/uint32_t), all others explicit. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../unittests/HLSLExec/LongVectorTestData.h | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/tools/clang/unittests/HLSLExec/LongVectorTestData.h b/tools/clang/unittests/HLSLExec/LongVectorTestData.h index c90b45f744..61d7074777 100644 --- a/tools/clang/unittests/HLSLExec/LongVectorTestData.h +++ b/tools/clang/unittests/HLSLExec/LongVectorTestData.h @@ -248,10 +248,10 @@ struct HLSLMin16Float_t { HLSLMin16Float_t(uint32_t U) : Val(static_cast(U)) {} operator float() const { return Val; } - operator double() const { return static_cast(Val); } - operator int32_t() const { return static_cast(Val); } - operator uint32_t() const { return static_cast(Val); } - operator bool() const { return Val != 0.0f; } + explicit operator double() const { return static_cast(Val); } + explicit operator int32_t() const { return static_cast(Val); } + explicit operator uint32_t() const { return static_cast(Val); } + explicit operator bool() const { return Val != 0.0f; } bool operator==(const HLSLMin16Float_t &O) const { return Val == O.Val; } bool operator!=(const HLSLMin16Float_t &O) const { return Val != O.Val; } @@ -300,11 +300,11 @@ struct HLSLMin16Int_t { HLSLMin16Int_t(double D) : Val(static_cast(D)) {} operator int32_t() const { return Val; } - operator int64_t() const { return static_cast(Val); } - operator uint32_t() const { return static_cast(Val); } - operator float() const { return static_cast(Val); } - operator double() const { return static_cast(Val); } - operator bool() const { return Val != 0; } + explicit operator int64_t() const { return static_cast(Val); } + explicit operator uint32_t() const { return static_cast(Val); } + explicit operator float() const { return static_cast(Val); } + explicit operator double() const { return static_cast(Val); } + explicit operator bool() const { return Val != 0; } bool operator==(const HLSLMin16Int_t &O) const { return Val == O.Val; } bool operator!=(const HLSLMin16Int_t &O) const { return Val != O.Val; } @@ -374,11 +374,11 @@ struct HLSLMin16Uint_t { HLSLMin16Uint_t(double D) : Val(static_cast(D)) {} operator uint32_t() const { return Val; } - operator uint64_t() const { return static_cast(Val); } - operator int32_t() const { return static_cast(Val); } - operator float() const { return static_cast(Val); } - operator double() const { return static_cast(Val); } - operator bool() const { return Val != 0; } + explicit operator uint64_t() const { return static_cast(Val); } + explicit operator int32_t() const { return static_cast(Val); } + explicit operator float() const { return static_cast(Val); } + explicit operator double() const { return static_cast(Val); } + explicit operator bool() const { return Val != 0; } bool operator==(const HLSLMin16Uint_t &O) const { return Val == O.Val; } bool operator!=(const HLSLMin16Uint_t &O) const { return Val != O.Val; } From 011c9c575f5b044d79826da9b46a1ddd90ae3f20 Mon Sep 17 00:00:00 2001 From: Alex Sepkowski Date: Tue, 10 Mar 2026 17:47:17 -0700 Subject: [PATCH 04/34] Fix C2666: make int/uint constructors explicit on HLSLMin16Float_t The implicit operator float() combined with implicit constructors from int/uint32_t created ambiguity for expressions like 'A + 4': the compiler could not choose between member operator+(HLSLMin16Float_t) via constructor and built-in float+int via conversion. Making the int/uint constructors explicit eliminates the member operator+ path for int literals while preserving T(0) direct construction and implicit float conversion for std:: math functions. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- tools/clang/unittests/HLSLExec/LongVectorTestData.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/clang/unittests/HLSLExec/LongVectorTestData.h b/tools/clang/unittests/HLSLExec/LongVectorTestData.h index 61d7074777..0050908db4 100644 --- a/tools/clang/unittests/HLSLExec/LongVectorTestData.h +++ b/tools/clang/unittests/HLSLExec/LongVectorTestData.h @@ -244,8 +244,8 @@ struct HLSLMin16Float_t { HLSLMin16Float_t() : Val(0.0f) {} HLSLMin16Float_t(float F) : Val(F) {} HLSLMin16Float_t(double D) : Val(static_cast(D)) {} - HLSLMin16Float_t(int I) : Val(static_cast(I)) {} - HLSLMin16Float_t(uint32_t U) : Val(static_cast(U)) {} + explicit HLSLMin16Float_t(int I) : Val(static_cast(I)) {} + explicit HLSLMin16Float_t(uint32_t U) : Val(static_cast(U)) {} operator float() const { return Val; } explicit operator double() const { return static_cast(Val); } From 3e4aa581d0f68d0fceca1eebf2c81ea727656f66 Mon Sep 17 00:00:00 2001 From: Alex Sepkowski Date: Tue, 10 Mar 2026 17:51:16 -0700 Subject: [PATCH 05/34] Add missing constructors and operators to min precision int types HLSLMin16Int_t: add uint32_t and uint64_t constructors for static_cast(UINT) and static_cast(size_t) patterns used in shift masking and wave ops. Add operator~() for bitwise NOT in WaveMultiPrefixBit ops. HLSLMin16Uint_t: add operator~() for the same reason. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- tools/clang/unittests/HLSLExec/LongVectorTestData.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/clang/unittests/HLSLExec/LongVectorTestData.h b/tools/clang/unittests/HLSLExec/LongVectorTestData.h index 0050908db4..d2790d18cb 100644 --- a/tools/clang/unittests/HLSLExec/LongVectorTestData.h +++ b/tools/clang/unittests/HLSLExec/LongVectorTestData.h @@ -296,6 +296,8 @@ struct HLSLMin16Int_t { HLSLMin16Int_t() : Val(0) {} HLSLMin16Int_t(int32_t I) : Val(I) {} HLSLMin16Int_t(int64_t I) : Val(static_cast(I)) {} + HLSLMin16Int_t(uint32_t U) : Val(static_cast(U)) {} + HLSLMin16Int_t(uint64_t U) : Val(static_cast(U)) {} HLSLMin16Int_t(float F) : Val(static_cast(F)) {} HLSLMin16Int_t(double D) : Val(static_cast(D)) {} @@ -349,6 +351,7 @@ struct HLSLMin16Int_t { HLSLMin16Int_t operator||(const HLSLMin16Int_t &O) const { return HLSLMin16Int_t(Val || O.Val); } + HLSLMin16Int_t operator~() const { return HLSLMin16Int_t(~Val); } friend std::wostream &operator<<(std::wostream &Os, const HLSLMin16Int_t &Obj) { @@ -420,6 +423,7 @@ struct HLSLMin16Uint_t { bool operator&&(const HLSLMin16Uint_t &O) const { return Val && O.Val; } bool operator||(const HLSLMin16Uint_t &O) const { return Val || O.Val; } + HLSLMin16Uint_t operator~() const { return HLSLMin16Uint_t(~Val); } friend std::wostream &operator<<(std::wostream &Os, const HLSLMin16Uint_t &Obj) { From bf0b3f57d7b8dd44f314795e431b34dbf1990390 Mon Sep 17 00:00:00 2001 From: Alex Sepkowski Date: Tue, 10 Mar 2026 17:53:38 -0700 Subject: [PATCH 06/34] Fix abs ambiguity for unsigned wrapper and constexpr literal type UnaryMathAbs: extend unsigned check to include HLSLMin16Uint_t (std::is_unsigned_v is false for class types, so abs was called with ambiguous overloads via implicit operator uint32_t). MaskShiftAmount: change constexpr to const since wrapper types are not literal types (no constexpr constructors). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- tools/clang/unittests/HLSLExec/LongVectors.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/clang/unittests/HLSLExec/LongVectors.cpp b/tools/clang/unittests/HLSLExec/LongVectors.cpp index 15551cb3b6..0d0dd373fd 100644 --- a/tools/clang/unittests/HLSLExec/LongVectors.cpp +++ b/tools/clang/unittests/HLSLExec/LongVectors.cpp @@ -797,7 +797,7 @@ DEFAULT_OP_2(OpType::Xor, (A ^ B)); // 32-bit, 6 bits for 64-bit). We must do the same in C++ to avoid undefined // behavior when shift amount >= bit width, and to match GPU results. template T MaskShiftAmount(T ShiftAmount) { - constexpr T ShiftMask = static_cast(sizeof(T) * 8 - 1); + const T ShiftMask = static_cast(sizeof(T) * 8 - 1); return ShiftAmount & ShiftMask; } @@ -1128,7 +1128,7 @@ template <> struct ExpectedBuilder { // template T UnaryMathAbs(T A) { - if constexpr (std::is_unsigned_v) + if constexpr (std::is_unsigned_v || std::is_same_v) return A; else return static_cast(std::abs(A)); From a5e03b9ba53ed3b4925a226f0e19d66aef7ffc91 Mon Sep 17 00:00:00 2001 From: Alex Sepkowski Date: Tue, 10 Mar 2026 17:59:58 -0700 Subject: [PATCH 07/34] Update MinPrecision test class GUID Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- tools/clang/unittests/HLSLExec/LongVectors.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/clang/unittests/HLSLExec/LongVectors.cpp b/tools/clang/unittests/HLSLExec/LongVectors.cpp index 0d0dd373fd..c235141f7e 100644 --- a/tools/clang/unittests/HLSLExec/LongVectors.cpp +++ b/tools/clang/unittests/HLSLExec/LongVectors.cpp @@ -2946,7 +2946,7 @@ class DxilConf_SM69_Vectorized_MinPrecision : public TestClassCommon { TEST_CLASS_PROPERTY( "Kits.TestName", "D3D12 - Shader Model 6.9 - Vectorized DXIL - Min Precision Tests") - TEST_CLASS_PROPERTY("Kits.TestId", "b4e2c8a1-3f7d-4a9e-8b5c-1d6e0f2a3b4c") + TEST_CLASS_PROPERTY("Kits.TestId", "ca1373a5-09c3-43f4-8455-7a752c6443c9") TEST_CLASS_PROPERTY( "Kits.Description", "Validates required min precision SM 6.9 vectorized DXIL operations") From 250f29fd1a7f47a3a64d53b8e4d73dca8aa4bc49 Mon Sep 17 00:00:00 2001 From: Alex Sepkowski Date: Tue, 10 Mar 2026 18:09:55 -0700 Subject: [PATCH 08/34] Update comment: clarify why FP specials are excluded Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- tools/clang/unittests/HLSLExec/LongVectorTestData.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/clang/unittests/HLSLExec/LongVectorTestData.h b/tools/clang/unittests/HLSLExec/LongVectorTestData.h index d2790d18cb..1b65b4a3fc 100644 --- a/tools/clang/unittests/HLSLExec/LongVectorTestData.h +++ b/tools/clang/unittests/HLSLExec/LongVectorTestData.h @@ -655,7 +655,7 @@ INPUT_SET(InputSet::AllOnes, 1.0); END_INPUT_SETS() // Min precision input sets. Values are within the fp16 representable range. -// No FP specials (INF/NaN/denorm) per issue #7780. +// No FP specials (INF/NaN/denorm) as min precision types do not support them. BEGIN_INPUT_SETS(HLSLMin16Float_t) INPUT_SET(InputSet::Default1, -1.0f, -1.0f, 1.0f, -0.01f, 1.0f, -0.01f, 1.0f, -0.01f, 1.0f, -0.01f); From 15cdf7adaa22e51a4a342a4e1e49274b7d29a7fa Mon Sep 17 00:00:00 2001 From: Alex Sepkowski Date: Tue, 10 Mar 2026 18:19:51 -0700 Subject: [PATCH 09/34] Add Cast test entries and remove unused explicit conversion operators MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add ~27 Cast test entries (CastToBool, CastToInt16, CastToInt32, CastToInt64, CastToUint16/32/64, CastToFloat16/32) for all three min precision types. The generic Cast templates work via the single implicit conversion operator on each wrapper type — C-style casts chain through it (e.g. (int32_t)min16float goes float->int32_t). Remove explicit conversion operators (operator double, operator int32_t, etc.) that were not exercised since Cast tests were not previously included and no other code paths use them. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../unittests/HLSLExec/LongVectorTestData.h | 14 --------- .../clang/unittests/HLSLExec/LongVectors.cpp | 31 +++++++++++++++++++ 2 files changed, 31 insertions(+), 14 deletions(-) diff --git a/tools/clang/unittests/HLSLExec/LongVectorTestData.h b/tools/clang/unittests/HLSLExec/LongVectorTestData.h index 1b65b4a3fc..082be2d6f9 100644 --- a/tools/clang/unittests/HLSLExec/LongVectorTestData.h +++ b/tools/clang/unittests/HLSLExec/LongVectorTestData.h @@ -248,10 +248,6 @@ struct HLSLMin16Float_t { explicit HLSLMin16Float_t(uint32_t U) : Val(static_cast(U)) {} operator float() const { return Val; } - explicit operator double() const { return static_cast(Val); } - explicit operator int32_t() const { return static_cast(Val); } - explicit operator uint32_t() const { return static_cast(Val); } - explicit operator bool() const { return Val != 0.0f; } bool operator==(const HLSLMin16Float_t &O) const { return Val == O.Val; } bool operator!=(const HLSLMin16Float_t &O) const { return Val != O.Val; } @@ -302,11 +298,6 @@ struct HLSLMin16Int_t { HLSLMin16Int_t(double D) : Val(static_cast(D)) {} operator int32_t() const { return Val; } - explicit operator int64_t() const { return static_cast(Val); } - explicit operator uint32_t() const { return static_cast(Val); } - explicit operator float() const { return static_cast(Val); } - explicit operator double() const { return static_cast(Val); } - explicit operator bool() const { return Val != 0; } bool operator==(const HLSLMin16Int_t &O) const { return Val == O.Val; } bool operator!=(const HLSLMin16Int_t &O) const { return Val != O.Val; } @@ -377,11 +368,6 @@ struct HLSLMin16Uint_t { HLSLMin16Uint_t(double D) : Val(static_cast(D)) {} operator uint32_t() const { return Val; } - explicit operator uint64_t() const { return static_cast(Val); } - explicit operator int32_t() const { return static_cast(Val); } - explicit operator float() const { return static_cast(Val); } - explicit operator double() const { return static_cast(Val); } - explicit operator bool() const { return Val != 0; } bool operator==(const HLSLMin16Uint_t &O) const { return Val == O.Val; } bool operator!=(const HLSLMin16Uint_t &O) const { return Val != O.Val; } diff --git a/tools/clang/unittests/HLSLExec/LongVectors.cpp b/tools/clang/unittests/HLSLExec/LongVectors.cpp index c235141f7e..46a4fea82b 100644 --- a/tools/clang/unittests/HLSLExec/LongVectors.cpp +++ b/tools/clang/unittests/HLSLExec/LongVectors.cpp @@ -2995,6 +2995,17 @@ class DxilConf_SM69_Vectorized_MinPrecision : public TestClassCommon { HLK_TEST_MINP(ArrayOperator_DynamicAccess, HLSLMin16Float_t); HLK_TEST_MINP(ShuffleVector, HLSLMin16Float_t); + // Cast + HLK_TEST_MINP(CastToBool, HLSLMin16Float_t); + HLK_TEST_MINP(CastToInt16, HLSLMin16Float_t); + HLK_TEST_MINP(CastToInt32, HLSLMin16Float_t); + HLK_TEST_MINP(CastToInt64, HLSLMin16Float_t); + HLK_TEST_MINP(CastToUint16_FromFP, HLSLMin16Float_t); + HLK_TEST_MINP(CastToUint32_FromFP, HLSLMin16Float_t); + HLK_TEST_MINP(CastToUint64_FromFP, HLSLMin16Float_t); + HLK_TEST_MINP(CastToFloat16, HLSLMin16Float_t); + HLK_TEST_MINP(CastToFloat32, HLSLMin16Float_t); + // Trigonometric HLK_TEST_MINP(Acos, HLSLMin16Float_t); HLK_TEST_MINP(Asin, HLSLMin16Float_t); @@ -3108,6 +3119,16 @@ class DxilConf_SM69_Vectorized_MinPrecision : public TestClassCommon { HLK_TEST_MINP(ArrayOperator_DynamicAccess, HLSLMin16Int_t); HLK_TEST_MINP(ShuffleVector, HLSLMin16Int_t); + // Cast + HLK_TEST_MINP(CastToBool, HLSLMin16Int_t); + HLK_TEST_MINP(CastToInt32, HLSLMin16Int_t); + HLK_TEST_MINP(CastToInt64, HLSLMin16Int_t); + HLK_TEST_MINP(CastToUint16, HLSLMin16Int_t); + HLK_TEST_MINP(CastToUint32, HLSLMin16Int_t); + HLK_TEST_MINP(CastToUint64, HLSLMin16Int_t); + HLK_TEST_MINP(CastToFloat16, HLSLMin16Int_t); + HLK_TEST_MINP(CastToFloat32, HLSLMin16Int_t); + // BinaryComparison HLK_TEST_MINP(LessThan, HLSLMin16Int_t); HLK_TEST_MINP(LessEqual, HLSLMin16Int_t); @@ -3195,6 +3216,16 @@ class DxilConf_SM69_Vectorized_MinPrecision : public TestClassCommon { HLK_TEST_MINP(ArrayOperator_DynamicAccess, HLSLMin16Uint_t); HLK_TEST_MINP(ShuffleVector, HLSLMin16Uint_t); + // Cast + HLK_TEST_MINP(CastToBool, HLSLMin16Uint_t); + HLK_TEST_MINP(CastToInt16, HLSLMin16Uint_t); + HLK_TEST_MINP(CastToInt32, HLSLMin16Uint_t); + HLK_TEST_MINP(CastToInt64, HLSLMin16Uint_t); + HLK_TEST_MINP(CastToUint32, HLSLMin16Uint_t); + HLK_TEST_MINP(CastToUint64, HLSLMin16Uint_t); + HLK_TEST_MINP(CastToFloat16, HLSLMin16Uint_t); + HLK_TEST_MINP(CastToFloat32, HLSLMin16Uint_t); + // BinaryComparison HLK_TEST_MINP(LessThan, HLSLMin16Uint_t); HLK_TEST_MINP(LessEqual, HLSLMin16Uint_t); From d9e5f735b88e8dda683be705e4a2b48dccf9e898 Mon Sep 17 00:00:00 2001 From: Alex Sepkowski Date: Wed, 11 Mar 2026 15:56:26 -0700 Subject: [PATCH 10/34] Remove WaveMultiPrefixBit* tests for min precision integer types WaveMultiPrefixBitAnd/BitOr/BitXor use the any_int type set (g_AnyIntCT) which is defined as {int16, int32, int64, uint16, uint32, uint64} and does not include min precision integer types (min16int, min16uint). Remove the 6 invalid test entries and the now-unused operator~() from both integer wrapper types. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- tools/clang/unittests/HLSLExec/LongVectorTestData.h | 4 ---- tools/clang/unittests/HLSLExec/LongVectors.cpp | 10 ++++------ 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/tools/clang/unittests/HLSLExec/LongVectorTestData.h b/tools/clang/unittests/HLSLExec/LongVectorTestData.h index 082be2d6f9..9b44ff35b3 100644 --- a/tools/clang/unittests/HLSLExec/LongVectorTestData.h +++ b/tools/clang/unittests/HLSLExec/LongVectorTestData.h @@ -342,8 +342,6 @@ struct HLSLMin16Int_t { HLSLMin16Int_t operator||(const HLSLMin16Int_t &O) const { return HLSLMin16Int_t(Val || O.Val); } - HLSLMin16Int_t operator~() const { return HLSLMin16Int_t(~Val); } - friend std::wostream &operator<<(std::wostream &Os, const HLSLMin16Int_t &Obj) { Os << Obj.Val; @@ -409,8 +407,6 @@ struct HLSLMin16Uint_t { bool operator&&(const HLSLMin16Uint_t &O) const { return Val && O.Val; } bool operator||(const HLSLMin16Uint_t &O) const { return Val || O.Val; } - HLSLMin16Uint_t operator~() const { return HLSLMin16Uint_t(~Val); } - friend std::wostream &operator<<(std::wostream &Os, const HLSLMin16Uint_t &Obj) { Os << Obj.Val; diff --git a/tools/clang/unittests/HLSLExec/LongVectors.cpp b/tools/clang/unittests/HLSLExec/LongVectors.cpp index 46a4fea82b..0330d65d88 100644 --- a/tools/clang/unittests/HLSLExec/LongVectors.cpp +++ b/tools/clang/unittests/HLSLExec/LongVectors.cpp @@ -3180,9 +3180,8 @@ class DxilConf_SM69_Vectorized_MinPrecision : public TestClassCommon { HLK_WAVEOP_TEST_MINP(WavePrefixProduct, HLSLMin16Int_t); HLK_WAVEOP_TEST_MINP(WaveMultiPrefixSum, HLSLMin16Int_t); HLK_WAVEOP_TEST_MINP(WaveMultiPrefixProduct, HLSLMin16Int_t); - HLK_WAVEOP_TEST_MINP(WaveMultiPrefixBitAnd, HLSLMin16Int_t); - HLK_WAVEOP_TEST_MINP(WaveMultiPrefixBitOr, HLSLMin16Int_t); - HLK_WAVEOP_TEST_MINP(WaveMultiPrefixBitXor, HLSLMin16Int_t); + // WaveMultiPrefixBitAnd/BitOr/BitXor excluded: these intrinsics use the + // any_int type set which does not include min precision integer types. HLK_WAVEOP_TEST_MINP(WaveMatch, HLSLMin16Int_t); // ---- HLSLMin16Uint_t (mirrors uint16_t) ---- @@ -3269,8 +3268,7 @@ class DxilConf_SM69_Vectorized_MinPrecision : public TestClassCommon { HLK_WAVEOP_TEST_MINP(WavePrefixProduct, HLSLMin16Uint_t); HLK_WAVEOP_TEST_MINP(WaveMultiPrefixSum, HLSLMin16Uint_t); HLK_WAVEOP_TEST_MINP(WaveMultiPrefixProduct, HLSLMin16Uint_t); - HLK_WAVEOP_TEST_MINP(WaveMultiPrefixBitAnd, HLSLMin16Uint_t); - HLK_WAVEOP_TEST_MINP(WaveMultiPrefixBitOr, HLSLMin16Uint_t); - HLK_WAVEOP_TEST_MINP(WaveMultiPrefixBitXor, HLSLMin16Uint_t); + // WaveMultiPrefixBitAnd/BitOr/BitXor excluded: these intrinsics use the + // any_int type set which does not include min precision integer types. HLK_WAVEOP_TEST_MINP(WaveMatch, HLSLMin16Uint_t); }; From f087900bcaa9e53e5018f95daea85f30e149956b Mon Sep 17 00:00:00 2001 From: Alex Sepkowski Date: Thu, 12 Mar 2026 15:14:50 -0700 Subject: [PATCH 11/34] Merge min precision tests into Core class and remove device check Move all min precision test entries (min16float, min16int, min16uint) from the separate DxilConf_SM69_Vectorized_MinPrecision class into DxilConf_SM69_Vectorized_Core, using HLK_TEST/HLK_WAVEOP_TEST macros. Remove the HLK_TEST_MINP and HLK_WAVEOP_TEST_MINP macro definitions, the DxilConf_SM69_Vectorized_MinPrecision class, and the doesDeviceSupportMinPrecision utility function since min precision support checking is not required. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../unittests/HLSLExec/HlslExecTestUtils.cpp | 9 - .../unittests/HLSLExec/HlslExecTestUtils.h | 1 - .../clang/unittests/HLSLExec/LongVectors.cpp | 654 ++++++++---------- 3 files changed, 301 insertions(+), 363 deletions(-) diff --git a/tools/clang/unittests/HLSLExec/HlslExecTestUtils.cpp b/tools/clang/unittests/HLSLExec/HlslExecTestUtils.cpp index 1f98dc4574..b5b05f0d87 100644 --- a/tools/clang/unittests/HLSLExec/HlslExecTestUtils.cpp +++ b/tools/clang/unittests/HLSLExec/HlslExecTestUtils.cpp @@ -497,15 +497,6 @@ bool doesDeviceSupportDouble(ID3D12Device *pDevice) { return O.DoublePrecisionFloatShaderOps != FALSE; } -bool doesDeviceSupportMinPrecision(ID3D12Device *pDevice) { - D3D12_FEATURE_DATA_D3D12_OPTIONS O; - if (FAILED(pDevice->CheckFeatureSupport( - (D3D12_FEATURE)D3D12_FEATURE_D3D12_OPTIONS, &O, sizeof(O)))) - return false; - return (O.MinPrecisionSupport & D3D12_SHADER_MIN_PRECISION_SUPPORT_16_BIT) != - 0; -} - bool doesDeviceSupportWaveOps(ID3D12Device *pDevice) { D3D12_FEATURE_DATA_D3D12_OPTIONS1 O; if (FAILED(pDevice->CheckFeatureSupport( diff --git a/tools/clang/unittests/HLSLExec/HlslExecTestUtils.h b/tools/clang/unittests/HLSLExec/HlslExecTestUtils.h index 17f1545029..a16be6a16c 100644 --- a/tools/clang/unittests/HLSLExec/HlslExecTestUtils.h +++ b/tools/clang/unittests/HLSLExec/HlslExecTestUtils.h @@ -55,7 +55,6 @@ void readHlslDataIntoNewStream(LPCWSTR RelativePath, IStream **Stream, bool doesDeviceSupportInt64(ID3D12Device *pDevice); bool doesDeviceSupportDouble(ID3D12Device *pDevice); -bool doesDeviceSupportMinPrecision(ID3D12Device *pDevice); bool doesDeviceSupportWaveOps(ID3D12Device *pDevice); bool doesDeviceSupportBarycentrics(ID3D12Device *pDevice); bool doesDeviceSupportNative16bitOps(ID3D12Device *pDevice); diff --git a/tools/clang/unittests/HLSLExec/LongVectors.cpp b/tools/clang/unittests/HLSLExec/LongVectors.cpp index 0330d65d88..55dfc08713 100644 --- a/tools/clang/unittests/HLSLExec/LongVectors.cpp +++ b/tools/clang/unittests/HLSLExec/LongVectors.cpp @@ -2769,6 +2769,307 @@ class DxilConf_SM69_Vectorized_Core : public TestClassCommon { HLK_WAVEOP_TEST(WaveMultiPrefixSum, float); HLK_WAVEOP_TEST(WaveMultiPrefixProduct, float); HLK_WAVEOP_TEST(WaveMatch, float); + + // ---- HLSLMin16Float_t (mirrors HLSLHalf_t) ---- + + // TernaryMath + HLK_TEST(Mad, HLSLMin16Float_t); + + // BinaryMath + HLK_TEST(Add, HLSLMin16Float_t); + HLK_TEST(Subtract, HLSLMin16Float_t); + HLK_TEST(Multiply, HLSLMin16Float_t); + HLK_TEST(Divide, HLSLMin16Float_t); + HLK_TEST(Modulus, HLSLMin16Float_t); + HLK_TEST(Min, HLSLMin16Float_t); + HLK_TEST(Max, HLSLMin16Float_t); + HLK_TEST(Ldexp, HLSLMin16Float_t); + + // Saturate + HLK_TEST(Saturate, HLSLMin16Float_t); + + // Unary + HLK_TEST(Initialize, HLSLMin16Float_t); + HLK_TEST(ArrayOperator_StaticAccess, HLSLMin16Float_t); + HLK_TEST(ArrayOperator_DynamicAccess, HLSLMin16Float_t); + HLK_TEST(ShuffleVector, HLSLMin16Float_t); + + // Cast + HLK_TEST(CastToBool, HLSLMin16Float_t); + HLK_TEST(CastToInt16, HLSLMin16Float_t); + HLK_TEST(CastToInt32, HLSLMin16Float_t); + HLK_TEST(CastToInt64, HLSLMin16Float_t); + HLK_TEST(CastToUint16_FromFP, HLSLMin16Float_t); + HLK_TEST(CastToUint32_FromFP, HLSLMin16Float_t); + HLK_TEST(CastToUint64_FromFP, HLSLMin16Float_t); + HLK_TEST(CastToFloat16, HLSLMin16Float_t); + HLK_TEST(CastToFloat32, HLSLMin16Float_t); + + // Trigonometric + HLK_TEST(Acos, HLSLMin16Float_t); + HLK_TEST(Asin, HLSLMin16Float_t); + HLK_TEST(Atan, HLSLMin16Float_t); + HLK_TEST(Cos, HLSLMin16Float_t); + HLK_TEST(Cosh, HLSLMin16Float_t); + HLK_TEST(Sin, HLSLMin16Float_t); + HLK_TEST(Sinh, HLSLMin16Float_t); + HLK_TEST(Tan, HLSLMin16Float_t); + HLK_TEST(Tanh, HLSLMin16Float_t); + + // UnaryMath + HLK_TEST(Abs, HLSLMin16Float_t); + HLK_TEST(Ceil, HLSLMin16Float_t); + HLK_TEST(Exp, HLSLMin16Float_t); + HLK_TEST(Floor, HLSLMin16Float_t); + HLK_TEST(Frac, HLSLMin16Float_t); + HLK_TEST(Log, HLSLMin16Float_t); + HLK_TEST(Rcp, HLSLMin16Float_t); + HLK_TEST(Round, HLSLMin16Float_t); + HLK_TEST(Rsqrt, HLSLMin16Float_t); + HLK_TEST(Sign, HLSLMin16Float_t); + HLK_TEST(Sqrt, HLSLMin16Float_t); + HLK_TEST(Trunc, HLSLMin16Float_t); + HLK_TEST(Exp2, HLSLMin16Float_t); + HLK_TEST(Log10, HLSLMin16Float_t); + HLK_TEST(Log2, HLSLMin16Float_t); + + // BinaryComparison + HLK_TEST(LessThan, HLSLMin16Float_t); + HLK_TEST(LessEqual, HLSLMin16Float_t); + HLK_TEST(GreaterThan, HLSLMin16Float_t); + HLK_TEST(GreaterEqual, HLSLMin16Float_t); + HLK_TEST(Equal, HLSLMin16Float_t); + HLK_TEST(NotEqual, HLSLMin16Float_t); + + // Select + HLK_TEST(Select, HLSLMin16Float_t); + + // Dot + HLK_TEST(Dot, HLSLMin16Float_t); + + // LoadAndStore + HLK_TEST(LoadAndStore_RDH_BAB_SRV, HLSLMin16Float_t); + HLK_TEST(LoadAndStore_RDH_BAB_UAV, HLSLMin16Float_t); + HLK_TEST(LoadAndStore_DT_BAB_SRV, HLSLMin16Float_t); + HLK_TEST(LoadAndStore_DT_BAB_UAV, HLSLMin16Float_t); + HLK_TEST(LoadAndStore_RD_BAB_SRV, HLSLMin16Float_t); + HLK_TEST(LoadAndStore_RD_BAB_UAV, HLSLMin16Float_t); + HLK_TEST(LoadAndStore_RDH_SB_SRV, HLSLMin16Float_t); + HLK_TEST(LoadAndStore_RDH_SB_UAV, HLSLMin16Float_t); + HLK_TEST(LoadAndStore_DT_SB_SRV, HLSLMin16Float_t); + HLK_TEST(LoadAndStore_DT_SB_UAV, HLSLMin16Float_t); + HLK_TEST(LoadAndStore_RD_SB_SRV, HLSLMin16Float_t); + HLK_TEST(LoadAndStore_RD_SB_UAV, HLSLMin16Float_t); + + // Derivative + HLK_TEST(DerivativeDdx, HLSLMin16Float_t); + HLK_TEST(DerivativeDdy, HLSLMin16Float_t); + HLK_TEST(DerivativeDdxFine, HLSLMin16Float_t); + HLK_TEST(DerivativeDdyFine, HLSLMin16Float_t); + + // Quad + HLK_TEST(QuadReadLaneAt, HLSLMin16Float_t); + HLK_TEST(QuadReadAcrossX, HLSLMin16Float_t); + HLK_TEST(QuadReadAcrossY, HLSLMin16Float_t); + HLK_TEST(QuadReadAcrossDiagonal, HLSLMin16Float_t); + + // Wave + HLK_WAVEOP_TEST(WaveActiveSum, HLSLMin16Float_t); + HLK_WAVEOP_TEST(WaveActiveMin, HLSLMin16Float_t); + HLK_WAVEOP_TEST(WaveActiveMax, HLSLMin16Float_t); + HLK_WAVEOP_TEST(WaveActiveProduct, HLSLMin16Float_t); + HLK_WAVEOP_TEST(WaveActiveAllEqual, HLSLMin16Float_t); + HLK_WAVEOP_TEST(WaveReadLaneAt, HLSLMin16Float_t); + HLK_WAVEOP_TEST(WaveReadLaneFirst, HLSLMin16Float_t); + HLK_WAVEOP_TEST(WavePrefixSum, HLSLMin16Float_t); + HLK_WAVEOP_TEST(WavePrefixProduct, HLSLMin16Float_t); + HLK_WAVEOP_TEST(WaveMultiPrefixSum, HLSLMin16Float_t); + HLK_WAVEOP_TEST(WaveMultiPrefixProduct, HLSLMin16Float_t); + HLK_WAVEOP_TEST(WaveMatch, HLSLMin16Float_t); + + // ---- HLSLMin16Int_t (mirrors int16_t) ---- + + // TernaryMath + HLK_TEST(Mad, HLSLMin16Int_t); + + // BinaryMath + HLK_TEST(Add, HLSLMin16Int_t); + HLK_TEST(Subtract, HLSLMin16Int_t); + HLK_TEST(Multiply, HLSLMin16Int_t); + HLK_TEST(Divide, HLSLMin16Int_t); + HLK_TEST(Modulus, HLSLMin16Int_t); + HLK_TEST(Min, HLSLMin16Int_t); + HLK_TEST(Max, HLSLMin16Int_t); + + // Bitwise (logical and shift — bit-manipulation excluded) + HLK_TEST(And, HLSLMin16Int_t); + HLK_TEST(Or, HLSLMin16Int_t); + HLK_TEST(Xor, HLSLMin16Int_t); + HLK_TEST(LeftShift, HLSLMin16Int_t); + HLK_TEST(RightShift, HLSLMin16Int_t); + + // UnaryMath + HLK_TEST(Abs, HLSLMin16Int_t); + HLK_TEST(Sign, HLSLMin16Int_t); + + // Unary + HLK_TEST(Initialize, HLSLMin16Int_t); + HLK_TEST(ArrayOperator_StaticAccess, HLSLMin16Int_t); + HLK_TEST(ArrayOperator_DynamicAccess, HLSLMin16Int_t); + HLK_TEST(ShuffleVector, HLSLMin16Int_t); + + // Cast + HLK_TEST(CastToBool, HLSLMin16Int_t); + HLK_TEST(CastToInt32, HLSLMin16Int_t); + HLK_TEST(CastToInt64, HLSLMin16Int_t); + HLK_TEST(CastToUint16, HLSLMin16Int_t); + HLK_TEST(CastToUint32, HLSLMin16Int_t); + HLK_TEST(CastToUint64, HLSLMin16Int_t); + HLK_TEST(CastToFloat16, HLSLMin16Int_t); + HLK_TEST(CastToFloat32, HLSLMin16Int_t); + + // BinaryComparison + HLK_TEST(LessThan, HLSLMin16Int_t); + HLK_TEST(LessEqual, HLSLMin16Int_t); + HLK_TEST(GreaterThan, HLSLMin16Int_t); + HLK_TEST(GreaterEqual, HLSLMin16Int_t); + HLK_TEST(Equal, HLSLMin16Int_t); + HLK_TEST(NotEqual, HLSLMin16Int_t); + + // Select + HLK_TEST(Select, HLSLMin16Int_t); + + // Reduction + HLK_TEST(Any_Mixed, HLSLMin16Int_t); + HLK_TEST(Any_Zero, HLSLMin16Int_t); + HLK_TEST(Any_NoZero, HLSLMin16Int_t); + HLK_TEST(All_Mixed, HLSLMin16Int_t); + HLK_TEST(All_Zero, HLSLMin16Int_t); + HLK_TEST(All_NoZero, HLSLMin16Int_t); + + // LoadAndStore + HLK_TEST(LoadAndStore_RDH_BAB_SRV, HLSLMin16Int_t); + HLK_TEST(LoadAndStore_RDH_BAB_UAV, HLSLMin16Int_t); + HLK_TEST(LoadAndStore_DT_BAB_SRV, HLSLMin16Int_t); + HLK_TEST(LoadAndStore_DT_BAB_UAV, HLSLMin16Int_t); + HLK_TEST(LoadAndStore_RD_BAB_SRV, HLSLMin16Int_t); + HLK_TEST(LoadAndStore_RD_BAB_UAV, HLSLMin16Int_t); + HLK_TEST(LoadAndStore_RDH_SB_SRV, HLSLMin16Int_t); + HLK_TEST(LoadAndStore_RDH_SB_UAV, HLSLMin16Int_t); + HLK_TEST(LoadAndStore_DT_SB_SRV, HLSLMin16Int_t); + HLK_TEST(LoadAndStore_DT_SB_UAV, HLSLMin16Int_t); + HLK_TEST(LoadAndStore_RD_SB_SRV, HLSLMin16Int_t); + HLK_TEST(LoadAndStore_RD_SB_UAV, HLSLMin16Int_t); + + // Quad + HLK_TEST(QuadReadLaneAt, HLSLMin16Int_t); + HLK_TEST(QuadReadAcrossX, HLSLMin16Int_t); + HLK_TEST(QuadReadAcrossY, HLSLMin16Int_t); + HLK_TEST(QuadReadAcrossDiagonal, HLSLMin16Int_t); + + // Wave + HLK_WAVEOP_TEST(WaveActiveSum, HLSLMin16Int_t); + HLK_WAVEOP_TEST(WaveActiveMin, HLSLMin16Int_t); + HLK_WAVEOP_TEST(WaveActiveMax, HLSLMin16Int_t); + HLK_WAVEOP_TEST(WaveActiveProduct, HLSLMin16Int_t); + HLK_WAVEOP_TEST(WaveActiveAllEqual, HLSLMin16Int_t); + HLK_WAVEOP_TEST(WaveReadLaneAt, HLSLMin16Int_t); + HLK_WAVEOP_TEST(WaveReadLaneFirst, HLSLMin16Int_t); + HLK_WAVEOP_TEST(WavePrefixSum, HLSLMin16Int_t); + HLK_WAVEOP_TEST(WavePrefixProduct, HLSLMin16Int_t); + HLK_WAVEOP_TEST(WaveMultiPrefixSum, HLSLMin16Int_t); + HLK_WAVEOP_TEST(WaveMultiPrefixProduct, HLSLMin16Int_t); + // WaveMultiPrefixBitAnd/BitOr/BitXor excluded: these intrinsics use the + // any_int type set which does not include min precision integer types. + HLK_WAVEOP_TEST(WaveMatch, HLSLMin16Int_t); + + // ---- HLSLMin16Uint_t (mirrors uint16_t) ---- + + // TernaryMath + HLK_TEST(Mad, HLSLMin16Uint_t); + + // BinaryMath + HLK_TEST(Add, HLSLMin16Uint_t); + HLK_TEST(Subtract, HLSLMin16Uint_t); + HLK_TEST(Multiply, HLSLMin16Uint_t); + HLK_TEST(Divide, HLSLMin16Uint_t); + HLK_TEST(Modulus, HLSLMin16Uint_t); + HLK_TEST(Min, HLSLMin16Uint_t); + HLK_TEST(Max, HLSLMin16Uint_t); + + // Bitwise (logical and shift — bit-manipulation excluded) + HLK_TEST(And, HLSLMin16Uint_t); + HLK_TEST(Or, HLSLMin16Uint_t); + HLK_TEST(Xor, HLSLMin16Uint_t); + HLK_TEST(LeftShift, HLSLMin16Uint_t); + HLK_TEST(RightShift, HLSLMin16Uint_t); + + // UnaryMath + HLK_TEST(Abs, HLSLMin16Uint_t); + HLK_TEST(Sign, HLSLMin16Uint_t); + + // Unary + HLK_TEST(Initialize, HLSLMin16Uint_t); + HLK_TEST(ArrayOperator_StaticAccess, HLSLMin16Uint_t); + HLK_TEST(ArrayOperator_DynamicAccess, HLSLMin16Uint_t); + HLK_TEST(ShuffleVector, HLSLMin16Uint_t); + + // Cast + HLK_TEST(CastToBool, HLSLMin16Uint_t); + HLK_TEST(CastToInt16, HLSLMin16Uint_t); + HLK_TEST(CastToInt32, HLSLMin16Uint_t); + HLK_TEST(CastToInt64, HLSLMin16Uint_t); + HLK_TEST(CastToUint32, HLSLMin16Uint_t); + HLK_TEST(CastToUint64, HLSLMin16Uint_t); + HLK_TEST(CastToFloat16, HLSLMin16Uint_t); + HLK_TEST(CastToFloat32, HLSLMin16Uint_t); + + // BinaryComparison + HLK_TEST(LessThan, HLSLMin16Uint_t); + HLK_TEST(LessEqual, HLSLMin16Uint_t); + HLK_TEST(GreaterThan, HLSLMin16Uint_t); + HLK_TEST(GreaterEqual, HLSLMin16Uint_t); + HLK_TEST(Equal, HLSLMin16Uint_t); + HLK_TEST(NotEqual, HLSLMin16Uint_t); + + // Select + HLK_TEST(Select, HLSLMin16Uint_t); + + // LoadAndStore + HLK_TEST(LoadAndStore_RDH_BAB_SRV, HLSLMin16Uint_t); + HLK_TEST(LoadAndStore_RDH_BAB_UAV, HLSLMin16Uint_t); + HLK_TEST(LoadAndStore_DT_BAB_SRV, HLSLMin16Uint_t); + HLK_TEST(LoadAndStore_DT_BAB_UAV, HLSLMin16Uint_t); + HLK_TEST(LoadAndStore_RD_BAB_SRV, HLSLMin16Uint_t); + HLK_TEST(LoadAndStore_RD_BAB_UAV, HLSLMin16Uint_t); + HLK_TEST(LoadAndStore_RDH_SB_SRV, HLSLMin16Uint_t); + HLK_TEST(LoadAndStore_RDH_SB_UAV, HLSLMin16Uint_t); + HLK_TEST(LoadAndStore_DT_SB_SRV, HLSLMin16Uint_t); + HLK_TEST(LoadAndStore_DT_SB_UAV, HLSLMin16Uint_t); + HLK_TEST(LoadAndStore_RD_SB_SRV, HLSLMin16Uint_t); + HLK_TEST(LoadAndStore_RD_SB_UAV, HLSLMin16Uint_t); + + // Quad + HLK_TEST(QuadReadLaneAt, HLSLMin16Uint_t); + HLK_TEST(QuadReadAcrossX, HLSLMin16Uint_t); + HLK_TEST(QuadReadAcrossY, HLSLMin16Uint_t); + HLK_TEST(QuadReadAcrossDiagonal, HLSLMin16Uint_t); + + // Wave + HLK_WAVEOP_TEST(WaveActiveSum, HLSLMin16Uint_t); + HLK_WAVEOP_TEST(WaveActiveMin, HLSLMin16Uint_t); + HLK_WAVEOP_TEST(WaveActiveMax, HLSLMin16Uint_t); + HLK_WAVEOP_TEST(WaveActiveProduct, HLSLMin16Uint_t); + HLK_WAVEOP_TEST(WaveActiveAllEqual, HLSLMin16Uint_t); + HLK_WAVEOP_TEST(WaveReadLaneAt, HLSLMin16Uint_t); + HLK_WAVEOP_TEST(WaveReadLaneFirst, HLSLMin16Uint_t); + HLK_WAVEOP_TEST(WavePrefixSum, HLSLMin16Uint_t); + HLK_WAVEOP_TEST(WavePrefixProduct, HLSLMin16Uint_t); + HLK_WAVEOP_TEST(WaveMultiPrefixSum, HLSLMin16Uint_t); + HLK_WAVEOP_TEST(WaveMultiPrefixProduct, HLSLMin16Uint_t); + // WaveMultiPrefixBitAnd/BitOr/BitXor excluded: these intrinsics use the + // any_int type set which does not include min precision integer types. + HLK_WAVEOP_TEST(WaveMatch, HLSLMin16Uint_t); }; #define HLK_TEST_DOUBLE(Op, DataType) \ @@ -2919,356 +3220,3 @@ class DxilConf_SM69_Vectorized_Double : public TestClassCommon { HLK_WAVEOP_TEST_DOUBLE(WaveMultiPrefixProduct, double); HLK_WAVEOP_TEST_DOUBLE(WaveMatch, double); }; - -#define HLK_TEST_MINP(Op, DataType) \ - TEST_METHOD(Op##_##DataType) { \ - BEGIN_TEST_METHOD_PROPERTIES() \ - TEST_METHOD_PROPERTY( \ - "Kits.Specification", \ - "Device.Graphics.D3D12.DXILCore.ShaderModel69.MinPrecision") \ - END_TEST_METHOD_PROPERTIES() \ - runTest(); \ - } - -#define HLK_WAVEOP_TEST_MINP(Op, DataType) \ - TEST_METHOD(Op##_##DataType) { \ - BEGIN_TEST_METHOD_PROPERTIES() \ - TEST_METHOD_PROPERTY( \ - "Kits.Specification", \ - "Device.Graphics.D3D12.DXILCore.ShaderModel69.MinPrecision") \ - END_TEST_METHOD_PROPERTIES() \ - runWaveOpTest(); \ - } - -class DxilConf_SM69_Vectorized_MinPrecision : public TestClassCommon { -public: - BEGIN_TEST_CLASS(DxilConf_SM69_Vectorized_MinPrecision) - TEST_CLASS_PROPERTY( - "Kits.TestName", - "D3D12 - Shader Model 6.9 - Vectorized DXIL - Min Precision Tests") - TEST_CLASS_PROPERTY("Kits.TestId", "ca1373a5-09c3-43f4-8455-7a752c6443c9") - TEST_CLASS_PROPERTY( - "Kits.Description", - "Validates required min precision SM 6.9 vectorized DXIL operations") - TEST_METHOD_PROPERTY( - "Kits.Specification", - "Device.Graphics.D3D12.DXILCore.ShaderModel69.MinPrecision") - TEST_METHOD_PROPERTY(L"Priority", L"0") - END_TEST_CLASS() - - TEST_CLASS_SETUP(setupClass) { - const bool result = TestClassCommon::setupClass(); -#ifndef _HLK_CONF - if (result && !doesDeviceSupportMinPrecision(D3DDevice)) { - WEX::Logging::Log::Comment( - L"Skipping test as device does not support min precision."); - WEX::Logging::Log::Result(WEX::Logging::TestResults::Skipped); - return false; - } -#endif - return result; - } - - TEST_METHOD_SETUP(setupMethod) { return TestClassCommon::setupMethod(); } - - // ---- HLSLMin16Float_t (mirrors HLSLHalf_t) ---- - - // TernaryMath - HLK_TEST_MINP(Mad, HLSLMin16Float_t); - - // BinaryMath - HLK_TEST_MINP(Add, HLSLMin16Float_t); - HLK_TEST_MINP(Subtract, HLSLMin16Float_t); - HLK_TEST_MINP(Multiply, HLSLMin16Float_t); - HLK_TEST_MINP(Divide, HLSLMin16Float_t); - HLK_TEST_MINP(Modulus, HLSLMin16Float_t); - HLK_TEST_MINP(Min, HLSLMin16Float_t); - HLK_TEST_MINP(Max, HLSLMin16Float_t); - HLK_TEST_MINP(Ldexp, HLSLMin16Float_t); - - // Saturate - HLK_TEST_MINP(Saturate, HLSLMin16Float_t); - - // Unary - HLK_TEST_MINP(Initialize, HLSLMin16Float_t); - HLK_TEST_MINP(ArrayOperator_StaticAccess, HLSLMin16Float_t); - HLK_TEST_MINP(ArrayOperator_DynamicAccess, HLSLMin16Float_t); - HLK_TEST_MINP(ShuffleVector, HLSLMin16Float_t); - - // Cast - HLK_TEST_MINP(CastToBool, HLSLMin16Float_t); - HLK_TEST_MINP(CastToInt16, HLSLMin16Float_t); - HLK_TEST_MINP(CastToInt32, HLSLMin16Float_t); - HLK_TEST_MINP(CastToInt64, HLSLMin16Float_t); - HLK_TEST_MINP(CastToUint16_FromFP, HLSLMin16Float_t); - HLK_TEST_MINP(CastToUint32_FromFP, HLSLMin16Float_t); - HLK_TEST_MINP(CastToUint64_FromFP, HLSLMin16Float_t); - HLK_TEST_MINP(CastToFloat16, HLSLMin16Float_t); - HLK_TEST_MINP(CastToFloat32, HLSLMin16Float_t); - - // Trigonometric - HLK_TEST_MINP(Acos, HLSLMin16Float_t); - HLK_TEST_MINP(Asin, HLSLMin16Float_t); - HLK_TEST_MINP(Atan, HLSLMin16Float_t); - HLK_TEST_MINP(Cos, HLSLMin16Float_t); - HLK_TEST_MINP(Cosh, HLSLMin16Float_t); - HLK_TEST_MINP(Sin, HLSLMin16Float_t); - HLK_TEST_MINP(Sinh, HLSLMin16Float_t); - HLK_TEST_MINP(Tan, HLSLMin16Float_t); - HLK_TEST_MINP(Tanh, HLSLMin16Float_t); - - // UnaryMath - HLK_TEST_MINP(Abs, HLSLMin16Float_t); - HLK_TEST_MINP(Ceil, HLSLMin16Float_t); - HLK_TEST_MINP(Exp, HLSLMin16Float_t); - HLK_TEST_MINP(Floor, HLSLMin16Float_t); - HLK_TEST_MINP(Frac, HLSLMin16Float_t); - HLK_TEST_MINP(Log, HLSLMin16Float_t); - HLK_TEST_MINP(Rcp, HLSLMin16Float_t); - HLK_TEST_MINP(Round, HLSLMin16Float_t); - HLK_TEST_MINP(Rsqrt, HLSLMin16Float_t); - HLK_TEST_MINP(Sign, HLSLMin16Float_t); - HLK_TEST_MINP(Sqrt, HLSLMin16Float_t); - HLK_TEST_MINP(Trunc, HLSLMin16Float_t); - HLK_TEST_MINP(Exp2, HLSLMin16Float_t); - HLK_TEST_MINP(Log10, HLSLMin16Float_t); - HLK_TEST_MINP(Log2, HLSLMin16Float_t); - - // BinaryComparison - HLK_TEST_MINP(LessThan, HLSLMin16Float_t); - HLK_TEST_MINP(LessEqual, HLSLMin16Float_t); - HLK_TEST_MINP(GreaterThan, HLSLMin16Float_t); - HLK_TEST_MINP(GreaterEqual, HLSLMin16Float_t); - HLK_TEST_MINP(Equal, HLSLMin16Float_t); - HLK_TEST_MINP(NotEqual, HLSLMin16Float_t); - - // Select - HLK_TEST_MINP(Select, HLSLMin16Float_t); - - // Dot - HLK_TEST_MINP(Dot, HLSLMin16Float_t); - - // LoadAndStore - HLK_TEST_MINP(LoadAndStore_RDH_BAB_SRV, HLSLMin16Float_t); - HLK_TEST_MINP(LoadAndStore_RDH_BAB_UAV, HLSLMin16Float_t); - HLK_TEST_MINP(LoadAndStore_DT_BAB_SRV, HLSLMin16Float_t); - HLK_TEST_MINP(LoadAndStore_DT_BAB_UAV, HLSLMin16Float_t); - HLK_TEST_MINP(LoadAndStore_RD_BAB_SRV, HLSLMin16Float_t); - HLK_TEST_MINP(LoadAndStore_RD_BAB_UAV, HLSLMin16Float_t); - HLK_TEST_MINP(LoadAndStore_RDH_SB_SRV, HLSLMin16Float_t); - HLK_TEST_MINP(LoadAndStore_RDH_SB_UAV, HLSLMin16Float_t); - HLK_TEST_MINP(LoadAndStore_DT_SB_SRV, HLSLMin16Float_t); - HLK_TEST_MINP(LoadAndStore_DT_SB_UAV, HLSLMin16Float_t); - HLK_TEST_MINP(LoadAndStore_RD_SB_SRV, HLSLMin16Float_t); - HLK_TEST_MINP(LoadAndStore_RD_SB_UAV, HLSLMin16Float_t); - - // Derivative - HLK_TEST_MINP(DerivativeDdx, HLSLMin16Float_t); - HLK_TEST_MINP(DerivativeDdy, HLSLMin16Float_t); - HLK_TEST_MINP(DerivativeDdxFine, HLSLMin16Float_t); - HLK_TEST_MINP(DerivativeDdyFine, HLSLMin16Float_t); - - // Quad - HLK_TEST_MINP(QuadReadLaneAt, HLSLMin16Float_t); - HLK_TEST_MINP(QuadReadAcrossX, HLSLMin16Float_t); - HLK_TEST_MINP(QuadReadAcrossY, HLSLMin16Float_t); - HLK_TEST_MINP(QuadReadAcrossDiagonal, HLSLMin16Float_t); - - // Wave - HLK_WAVEOP_TEST_MINP(WaveActiveSum, HLSLMin16Float_t); - HLK_WAVEOP_TEST_MINP(WaveActiveMin, HLSLMin16Float_t); - HLK_WAVEOP_TEST_MINP(WaveActiveMax, HLSLMin16Float_t); - HLK_WAVEOP_TEST_MINP(WaveActiveProduct, HLSLMin16Float_t); - HLK_WAVEOP_TEST_MINP(WaveActiveAllEqual, HLSLMin16Float_t); - HLK_WAVEOP_TEST_MINP(WaveReadLaneAt, HLSLMin16Float_t); - HLK_WAVEOP_TEST_MINP(WaveReadLaneFirst, HLSLMin16Float_t); - HLK_WAVEOP_TEST_MINP(WavePrefixSum, HLSLMin16Float_t); - HLK_WAVEOP_TEST_MINP(WavePrefixProduct, HLSLMin16Float_t); - HLK_WAVEOP_TEST_MINP(WaveMultiPrefixSum, HLSLMin16Float_t); - HLK_WAVEOP_TEST_MINP(WaveMultiPrefixProduct, HLSLMin16Float_t); - HLK_WAVEOP_TEST_MINP(WaveMatch, HLSLMin16Float_t); - - // ---- HLSLMin16Int_t (mirrors int16_t) ---- - - // TernaryMath - HLK_TEST_MINP(Mad, HLSLMin16Int_t); - - // BinaryMath - HLK_TEST_MINP(Add, HLSLMin16Int_t); - HLK_TEST_MINP(Subtract, HLSLMin16Int_t); - HLK_TEST_MINP(Multiply, HLSLMin16Int_t); - HLK_TEST_MINP(Divide, HLSLMin16Int_t); - HLK_TEST_MINP(Modulus, HLSLMin16Int_t); - HLK_TEST_MINP(Min, HLSLMin16Int_t); - HLK_TEST_MINP(Max, HLSLMin16Int_t); - - // Bitwise (logical and shift — bit-manipulation excluded) - HLK_TEST_MINP(And, HLSLMin16Int_t); - HLK_TEST_MINP(Or, HLSLMin16Int_t); - HLK_TEST_MINP(Xor, HLSLMin16Int_t); - HLK_TEST_MINP(LeftShift, HLSLMin16Int_t); - HLK_TEST_MINP(RightShift, HLSLMin16Int_t); - - // UnaryMath - HLK_TEST_MINP(Abs, HLSLMin16Int_t); - HLK_TEST_MINP(Sign, HLSLMin16Int_t); - - // Unary - HLK_TEST_MINP(Initialize, HLSLMin16Int_t); - HLK_TEST_MINP(ArrayOperator_StaticAccess, HLSLMin16Int_t); - HLK_TEST_MINP(ArrayOperator_DynamicAccess, HLSLMin16Int_t); - HLK_TEST_MINP(ShuffleVector, HLSLMin16Int_t); - - // Cast - HLK_TEST_MINP(CastToBool, HLSLMin16Int_t); - HLK_TEST_MINP(CastToInt32, HLSLMin16Int_t); - HLK_TEST_MINP(CastToInt64, HLSLMin16Int_t); - HLK_TEST_MINP(CastToUint16, HLSLMin16Int_t); - HLK_TEST_MINP(CastToUint32, HLSLMin16Int_t); - HLK_TEST_MINP(CastToUint64, HLSLMin16Int_t); - HLK_TEST_MINP(CastToFloat16, HLSLMin16Int_t); - HLK_TEST_MINP(CastToFloat32, HLSLMin16Int_t); - - // BinaryComparison - HLK_TEST_MINP(LessThan, HLSLMin16Int_t); - HLK_TEST_MINP(LessEqual, HLSLMin16Int_t); - HLK_TEST_MINP(GreaterThan, HLSLMin16Int_t); - HLK_TEST_MINP(GreaterEqual, HLSLMin16Int_t); - HLK_TEST_MINP(Equal, HLSLMin16Int_t); - HLK_TEST_MINP(NotEqual, HLSLMin16Int_t); - - // Select - HLK_TEST_MINP(Select, HLSLMin16Int_t); - - // Reduction - HLK_TEST_MINP(Any_Mixed, HLSLMin16Int_t); - HLK_TEST_MINP(Any_Zero, HLSLMin16Int_t); - HLK_TEST_MINP(Any_NoZero, HLSLMin16Int_t); - HLK_TEST_MINP(All_Mixed, HLSLMin16Int_t); - HLK_TEST_MINP(All_Zero, HLSLMin16Int_t); - HLK_TEST_MINP(All_NoZero, HLSLMin16Int_t); - - // LoadAndStore - HLK_TEST_MINP(LoadAndStore_RDH_BAB_SRV, HLSLMin16Int_t); - HLK_TEST_MINP(LoadAndStore_RDH_BAB_UAV, HLSLMin16Int_t); - HLK_TEST_MINP(LoadAndStore_DT_BAB_SRV, HLSLMin16Int_t); - HLK_TEST_MINP(LoadAndStore_DT_BAB_UAV, HLSLMin16Int_t); - HLK_TEST_MINP(LoadAndStore_RD_BAB_SRV, HLSLMin16Int_t); - HLK_TEST_MINP(LoadAndStore_RD_BAB_UAV, HLSLMin16Int_t); - HLK_TEST_MINP(LoadAndStore_RDH_SB_SRV, HLSLMin16Int_t); - HLK_TEST_MINP(LoadAndStore_RDH_SB_UAV, HLSLMin16Int_t); - HLK_TEST_MINP(LoadAndStore_DT_SB_SRV, HLSLMin16Int_t); - HLK_TEST_MINP(LoadAndStore_DT_SB_UAV, HLSLMin16Int_t); - HLK_TEST_MINP(LoadAndStore_RD_SB_SRV, HLSLMin16Int_t); - HLK_TEST_MINP(LoadAndStore_RD_SB_UAV, HLSLMin16Int_t); - - // Quad - HLK_TEST_MINP(QuadReadLaneAt, HLSLMin16Int_t); - HLK_TEST_MINP(QuadReadAcrossX, HLSLMin16Int_t); - HLK_TEST_MINP(QuadReadAcrossY, HLSLMin16Int_t); - HLK_TEST_MINP(QuadReadAcrossDiagonal, HLSLMin16Int_t); - - // Wave - HLK_WAVEOP_TEST_MINP(WaveActiveSum, HLSLMin16Int_t); - HLK_WAVEOP_TEST_MINP(WaveActiveMin, HLSLMin16Int_t); - HLK_WAVEOP_TEST_MINP(WaveActiveMax, HLSLMin16Int_t); - HLK_WAVEOP_TEST_MINP(WaveActiveProduct, HLSLMin16Int_t); - HLK_WAVEOP_TEST_MINP(WaveActiveAllEqual, HLSLMin16Int_t); - HLK_WAVEOP_TEST_MINP(WaveReadLaneAt, HLSLMin16Int_t); - HLK_WAVEOP_TEST_MINP(WaveReadLaneFirst, HLSLMin16Int_t); - HLK_WAVEOP_TEST_MINP(WavePrefixSum, HLSLMin16Int_t); - HLK_WAVEOP_TEST_MINP(WavePrefixProduct, HLSLMin16Int_t); - HLK_WAVEOP_TEST_MINP(WaveMultiPrefixSum, HLSLMin16Int_t); - HLK_WAVEOP_TEST_MINP(WaveMultiPrefixProduct, HLSLMin16Int_t); - // WaveMultiPrefixBitAnd/BitOr/BitXor excluded: these intrinsics use the - // any_int type set which does not include min precision integer types. - HLK_WAVEOP_TEST_MINP(WaveMatch, HLSLMin16Int_t); - - // ---- HLSLMin16Uint_t (mirrors uint16_t) ---- - - // TernaryMath - HLK_TEST_MINP(Mad, HLSLMin16Uint_t); - - // BinaryMath - HLK_TEST_MINP(Add, HLSLMin16Uint_t); - HLK_TEST_MINP(Subtract, HLSLMin16Uint_t); - HLK_TEST_MINP(Multiply, HLSLMin16Uint_t); - HLK_TEST_MINP(Divide, HLSLMin16Uint_t); - HLK_TEST_MINP(Modulus, HLSLMin16Uint_t); - HLK_TEST_MINP(Min, HLSLMin16Uint_t); - HLK_TEST_MINP(Max, HLSLMin16Uint_t); - - // Bitwise (logical and shift — bit-manipulation excluded) - HLK_TEST_MINP(And, HLSLMin16Uint_t); - HLK_TEST_MINP(Or, HLSLMin16Uint_t); - HLK_TEST_MINP(Xor, HLSLMin16Uint_t); - HLK_TEST_MINP(LeftShift, HLSLMin16Uint_t); - HLK_TEST_MINP(RightShift, HLSLMin16Uint_t); - - // UnaryMath - HLK_TEST_MINP(Abs, HLSLMin16Uint_t); - HLK_TEST_MINP(Sign, HLSLMin16Uint_t); - - // Unary - HLK_TEST_MINP(Initialize, HLSLMin16Uint_t); - HLK_TEST_MINP(ArrayOperator_StaticAccess, HLSLMin16Uint_t); - HLK_TEST_MINP(ArrayOperator_DynamicAccess, HLSLMin16Uint_t); - HLK_TEST_MINP(ShuffleVector, HLSLMin16Uint_t); - - // Cast - HLK_TEST_MINP(CastToBool, HLSLMin16Uint_t); - HLK_TEST_MINP(CastToInt16, HLSLMin16Uint_t); - HLK_TEST_MINP(CastToInt32, HLSLMin16Uint_t); - HLK_TEST_MINP(CastToInt64, HLSLMin16Uint_t); - HLK_TEST_MINP(CastToUint32, HLSLMin16Uint_t); - HLK_TEST_MINP(CastToUint64, HLSLMin16Uint_t); - HLK_TEST_MINP(CastToFloat16, HLSLMin16Uint_t); - HLK_TEST_MINP(CastToFloat32, HLSLMin16Uint_t); - - // BinaryComparison - HLK_TEST_MINP(LessThan, HLSLMin16Uint_t); - HLK_TEST_MINP(LessEqual, HLSLMin16Uint_t); - HLK_TEST_MINP(GreaterThan, HLSLMin16Uint_t); - HLK_TEST_MINP(GreaterEqual, HLSLMin16Uint_t); - HLK_TEST_MINP(Equal, HLSLMin16Uint_t); - HLK_TEST_MINP(NotEqual, HLSLMin16Uint_t); - - // Select - HLK_TEST_MINP(Select, HLSLMin16Uint_t); - - // LoadAndStore - HLK_TEST_MINP(LoadAndStore_RDH_BAB_SRV, HLSLMin16Uint_t); - HLK_TEST_MINP(LoadAndStore_RDH_BAB_UAV, HLSLMin16Uint_t); - HLK_TEST_MINP(LoadAndStore_DT_BAB_SRV, HLSLMin16Uint_t); - HLK_TEST_MINP(LoadAndStore_DT_BAB_UAV, HLSLMin16Uint_t); - HLK_TEST_MINP(LoadAndStore_RD_BAB_SRV, HLSLMin16Uint_t); - HLK_TEST_MINP(LoadAndStore_RD_BAB_UAV, HLSLMin16Uint_t); - HLK_TEST_MINP(LoadAndStore_RDH_SB_SRV, HLSLMin16Uint_t); - HLK_TEST_MINP(LoadAndStore_RDH_SB_UAV, HLSLMin16Uint_t); - HLK_TEST_MINP(LoadAndStore_DT_SB_SRV, HLSLMin16Uint_t); - HLK_TEST_MINP(LoadAndStore_DT_SB_UAV, HLSLMin16Uint_t); - HLK_TEST_MINP(LoadAndStore_RD_SB_SRV, HLSLMin16Uint_t); - HLK_TEST_MINP(LoadAndStore_RD_SB_UAV, HLSLMin16Uint_t); - - // Quad - HLK_TEST_MINP(QuadReadLaneAt, HLSLMin16Uint_t); - HLK_TEST_MINP(QuadReadAcrossX, HLSLMin16Uint_t); - HLK_TEST_MINP(QuadReadAcrossY, HLSLMin16Uint_t); - HLK_TEST_MINP(QuadReadAcrossDiagonal, HLSLMin16Uint_t); - - // Wave - HLK_WAVEOP_TEST_MINP(WaveActiveSum, HLSLMin16Uint_t); - HLK_WAVEOP_TEST_MINP(WaveActiveMin, HLSLMin16Uint_t); - HLK_WAVEOP_TEST_MINP(WaveActiveMax, HLSLMin16Uint_t); - HLK_WAVEOP_TEST_MINP(WaveActiveProduct, HLSLMin16Uint_t); - HLK_WAVEOP_TEST_MINP(WaveActiveAllEqual, HLSLMin16Uint_t); - HLK_WAVEOP_TEST_MINP(WaveReadLaneAt, HLSLMin16Uint_t); - HLK_WAVEOP_TEST_MINP(WaveReadLaneFirst, HLSLMin16Uint_t); - HLK_WAVEOP_TEST_MINP(WavePrefixSum, HLSLMin16Uint_t); - HLK_WAVEOP_TEST_MINP(WavePrefixProduct, HLSLMin16Uint_t); - HLK_WAVEOP_TEST_MINP(WaveMultiPrefixSum, HLSLMin16Uint_t); - HLK_WAVEOP_TEST_MINP(WaveMultiPrefixProduct, HLSLMin16Uint_t); - // WaveMultiPrefixBitAnd/BitOr/BitXor excluded: these intrinsics use the - // any_int type set which does not include min precision integer types. - HLK_WAVEOP_TEST_MINP(WaveMatch, HLSLMin16Uint_t); -}; From ad5bb9a299c8a4ea8df76969bd9ad77999a1af4a Mon Sep 17 00:00:00 2001 From: Alex Sepkowski Date: Fri, 13 Mar 2026 21:51:26 -0700 Subject: [PATCH 12/34] Use full-precision types for buffer I/O with min precision types Min precision types (min16float, min16int, min16uint) are hints that allow hardware to use any precision >= the specified minimum, making buffer storage width implementation-defined. Add IO_TYPE/IO_OUT_TYPE compiler defines that map min precision types to their full-precision equivalents (float, int, uint) for buffer Load/Store operations. For all other types, IO_TYPE equals TYPE and IO_OUT_TYPE equals OUT_TYPE. This ensures deterministic buffer data layout regardless of the device's min precision implementation, while still testing min precision computation via explicit casts between the I/O types and the min precision types. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../clang/unittests/HLSLExec/LongVectors.cpp | 19 +++ .../unittests/HLSLExec/ShaderOpArith.xml | 111 +++++++++++------- 2 files changed, 87 insertions(+), 43 deletions(-) diff --git a/tools/clang/unittests/HLSLExec/LongVectors.cpp b/tools/clang/unittests/HLSLExec/LongVectors.cpp index 55dfc08713..3ff4d7385b 100644 --- a/tools/clang/unittests/HLSLExec/LongVectors.cpp +++ b/tools/clang/unittests/HLSLExec/LongVectors.cpp @@ -78,6 +78,20 @@ template constexpr bool isMinPrecisionType() { std::is_same_v; } +// Min precision types (min16float, min16int, min16uint) are hints that allow +// hardware to use any precision >= the specified minimum, making buffer storage +// width implementation-defined. We use full-precision types for buffer I/O to +// ensure deterministic data layout regardless of the device's implementation. +const char *getIOTypeString(const char *HLSLType) { + if (strcmp(HLSLType, "min16float") == 0) + return "float"; + if (strcmp(HLSLType, "min16int") == 0) + return "int"; + if (strcmp(HLSLType, "min16uint") == 0) + return "uint"; + return HLSLType; +} + // // Operation Types // @@ -374,6 +388,11 @@ std::string getCompilerOptionsString( CompilerOptions << " -DOUT_TYPE=" << OutDataType.HLSLTypeString; + CompilerOptions << " -DIO_TYPE=" + << getIOTypeString(OpDataType.HLSLTypeString); + CompilerOptions << " -DIO_OUT_TYPE=" + << getIOTypeString(OutDataType.HLSLTypeString); + CompilerOptions << " -DBASIC_OP_TYPE=0x" << std::hex << Operation.Arity; if (AdditionalOptions) diff --git a/tools/clang/unittests/HLSLExec/ShaderOpArith.xml b/tools/clang/unittests/HLSLExec/ShaderOpArith.xml index 319d468a43..a14b45181d 100644 --- a/tools/clang/unittests/HLSLExec/ShaderOpArith.xml +++ b/tools/clang/unittests/HLSLExec/ShaderOpArith.xml @@ -3796,7 +3796,7 @@ void MSMain(uint GID : SV_GroupIndex, data; + vector data; }; RWStructuredBuffer InputVector : register(u0); RWStructuredBuffer OutputVector: register(u1); @@ -3808,10 +3808,12 @@ void MSMain(uint GID : SV_GroupIndex, [numthreads(1,1,1)] void main(uint GI : SV_GroupIndex) { #if USE_STRUCTURED_BUFFER - OutputVector[0].data = InputVector[0].data; + vector temp = (vector)InputVector[0].data; + OutputVector[0].data = (vector)temp; #else - vector Input = InputVector.Load< vector >(0); - OutputVector.Store< vector >(0, Input); + vector RawInput = InputVector.Load< vector >(0); + vector Input = (vector)RawInput; + OutputVector.Store< vector >(0, (vector)Input); #endif }; ]]> @@ -3835,7 +3837,7 @@ void MSMain(uint GID : SV_GroupIndex, data; + vector data; }; StructuredBuffer InputVector : register(t0); RWStructuredBuffer OutputVector : register(u1); @@ -3847,10 +3849,12 @@ void MSMain(uint GID : SV_GroupIndex, [numthreads(1,1,1)] void main(uint GI : SV_GroupIndex) { #if USE_STRUCTURED_BUFFER - OutputVector[0].data = InputVector[0].data; + vector temp = (vector)InputVector[0].data; + OutputVector[0].data = (vector)temp; #else - vector Input = InputVector.Load< vector >(0); - OutputVector.Store< vector >(0, Input); + vector RawInput = InputVector.Load< vector >(0); + vector Input = (vector)RawInput; + OutputVector.Store< vector >(0, (vector)Input); #endif }; ]]> @@ -3882,7 +3886,7 @@ void MSMain(uint GID : SV_GroupIndex, #if USE_STRUCTURED_BUFFER struct SLongVec { - vector data; + vector data; }; RWStructuredBuffer InputVector : register(u0); RWStructuredBuffer OutputVector: register(u1); @@ -3896,10 +3900,12 @@ void MSMain(uint GID : SV_GroupIndex, void main(uint GI : SV_GroupIndex) { #if USE_STRUCTURED_BUFFER - OutputVector[0].data = InputVector[0].data; + vector temp = (vector)InputVector[0].data; + OutputVector[0].data = (vector)temp; #else - vector Input = InputVector.Load< vector >(0); - OutputVector.Store< vector >(0, Input); + vector RawInput = InputVector.Load< vector >(0); + vector Input = (vector)RawInput; + OutputVector.Store< vector >(0, (vector)Input); #endif }; @@ -3930,7 +3936,7 @@ void MSMain(uint GID : SV_GroupIndex, data; + vector data; }; StructuredBuffer InputVector : register(t0); RWStructuredBuffer OutputVector: register(u0); @@ -3942,10 +3948,12 @@ void MSMain(uint GID : SV_GroupIndex, [numthreads(1,1,1)] void main(uint GI : SV_GroupIndex) { #if USE_STRUCTURED_BUFFER - OutputVector[0].data = InputVector[0].data; + vector temp = (vector)InputVector[0].data; + OutputVector[0].data = (vector)temp; #else - vector Input = InputVector.Load< vector >(0); - OutputVector.Store< vector >(0, Input); + vector RawInput = InputVector.Load< vector >(0); + vector Input = (vector)RawInput; + OutputVector.Store< vector >(0, (vector)Input); #endif }; ]]> @@ -3973,7 +3981,7 @@ void MSMain(uint GID : SV_GroupIndex, data; + vector data; }; #endif @@ -3983,14 +3991,15 @@ void MSMain(uint GID : SV_GroupIndex, #if USE_STRUCTURED_BUFFER StructuredBuffer InputVector = ResourceDescriptorHeap[0]; RWStructuredBuffer OutputVector = ResourceDescriptorHeap[1]; - OutputVector[0].data = InputVector[0].data; + vector temp = (vector)InputVector[0].data; + OutputVector[0].data = (vector)temp; #else ByteAddressBuffer InputVector = ResourceDescriptorHeap[0]; RWByteAddressBuffer OutputVector = ResourceDescriptorHeap[1]; - vector Input = InputVector.Load< vector >(0); - - OutputVector.Store< vector >(0, Input); + vector RawInput = InputVector.Load< vector >(0); + vector Input = (vector)RawInput; + OutputVector.Store< vector >(0, (vector)Input); #endif }; ]]> @@ -4017,7 +4026,7 @@ void MSMain(uint GID : SV_GroupIndex, data; + vector data; }; #endif @@ -4026,14 +4035,15 @@ void MSMain(uint GID : SV_GroupIndex, #if USE_STRUCTURED_BUFFER RWStructuredBuffer InputVector = ResourceDescriptorHeap[0]; RWStructuredBuffer OutputVector = ResourceDescriptorHeap[1]; - OutputVector[0].data = InputVector[0].data; + vector temp = (vector)InputVector[0].data; + OutputVector[0].data = (vector)temp; #else RWByteAddressBuffer InputVector = ResourceDescriptorHeap[0]; RWByteAddressBuffer OutputVector = ResourceDescriptorHeap[1]; - vector Input = InputVector.Load< vector >(0); - - OutputVector.Store< vector >(0, Input); + vector RawInput = InputVector.Load< vector >(0); + vector Input = (vector)RawInput; + OutputVector.Store< vector >(0, (vector)Input); #endif }; ]]> @@ -4061,6 +4071,17 @@ void MSMain(uint GID : SV_GroupIndex, OUT_TYPE : The type of the output vector, e.g. float, double, int, uint. In most cases OUT_TYPE == TYPE. + IO_TYPE : Full-precision type used for buffer Load operations. For min + precision types (min16float, min16int, min16uint), this maps to + their full-precision equivalent (float, int, uint) because min + precision is a hint that allows hardware to use any precision >= + the specified minimum, making buffer storage width + implementation-defined. Using full-precision types for buffer I/O + ensures deterministic data layout. For all other types, IO_TYPE + equals TYPE. + IO_OUT_TYPE : Same as IO_TYPE but for the output/store type. For all + non-min-precision types, IO_OUT_TYPE equals OUT_TYPE. + NUM : The number of elements in the vector, e.g. 2, 3, 4, 8, 16, 32, FUNC : Used to expand to the HLSL intrinsic being tested. e.g cos, cosh, @@ -4120,7 +4141,7 @@ void MSMain(uint GID : SV_GroupIndex, // Store the high bits in the second half of the output vector. // Because we know the outputs of asuint are always 32 bits, we can // use 4 bytes per element for our offset. - g_OutputVector.Store< vector >(4 * NUM, HighBits); + g_OutputVector.Store< vector >(4 * NUM, HighBits); // Generic store logic in main handles storing LowBits in // g_OutputVector. @@ -4139,7 +4160,7 @@ void MSMain(uint GID : SV_GroupIndex, // Store the exponent outputs in the second half of the output vector. // Exponent values are always floats, so we can use 4 bytes per // element for our offset. - g_OutputVector.Store< vector >(4 * NUM, Exponent); + g_OutputVector.Store< vector >(4 * NUM, Exponent); return Mantissa; } @@ -4280,7 +4301,7 @@ void MSMain(uint GID : SV_GroupIndex, Vector = WavePrefixSum(Vector); if(WaveGetLaneIndex() == MidLane) { - g_OutputVector.Store< vector >(0, Vector); + g_OutputVector.Store< vector >(0, Vector); } } #endif @@ -4291,7 +4312,7 @@ void MSMain(uint GID : SV_GroupIndex, Vector = WavePrefixProduct(Vector); if(WaveGetLaneIndex() == 2) { - g_OutputVector.Store< vector >(0, Vector); + g_OutputVector.Store< vector >(0, Vector); } } #endif @@ -4324,7 +4345,7 @@ void MSMain(uint GID : SV_GroupIndex, { // Lane 3 is the last lane in the mask that we care about. Store the // result from it. - g_OutputVector.Store< vector >(0, Vector); + g_OutputVector.Store< vector >(0, Vector); } } #endif @@ -4356,7 +4377,7 @@ void MSMain(uint GID : SV_GroupIndex, if(WaveGetLaneIndex() == 3) { // Lane 3 is the last lane in the mask. Store the result from it. - g_OutputVector.Store< vector >(0, Vector); + g_OutputVector.Store< vector >(0, Vector); } } #endif @@ -4387,7 +4408,7 @@ void MSMain(uint GID : SV_GroupIndex, if(WaveGetLaneIndex() == 3) { // Lane 3 is the last lane in the mask. Store the result from it. - g_OutputVector.Store< vector >(0, Vector); + g_OutputVector.Store< vector >(0, Vector); } } #endif @@ -4425,7 +4446,7 @@ void MSMain(uint GID : SV_GroupIndex, if(WaveGetLaneIndex() == 3) { // Lane 3 is the last lane in the mask. Store the result from it. - g_OutputVector.Store< vector >(0, Vector); + g_OutputVector.Store< vector >(0, Vector); } } #endif @@ -4466,7 +4487,7 @@ void MSMain(uint GID : SV_GroupIndex, if(WaveGetLaneIndex() == 3) { // Store result from lane 3 (last lane in mask) - g_OutputVector.Store< vector >(0, Vector); + g_OutputVector.Store< vector >(0, Vector); } } #endif @@ -4514,7 +4535,7 @@ void MSMain(uint GID : SV_GroupIndex, Mantissa = modf(Vector, Exponent); - g_OutputVector.Store< vector >(sizeof(OUT_TYPE) * NUM, Exponent); + g_OutputVector.Store< vector >(sizeof(OUT_TYPE) * NUM, Exponent); return Mantissa; } @@ -4557,7 +4578,7 @@ void MSMain(uint GID : SV_GroupIndex, // lane we arbitrarily chose for validation with fine derivatives. if(LaneIndex == 3) { - g_OutputVector.Store< vector >(0, Result); + g_OutputVector.Store< vector >(0, Result); } } #endif @@ -4586,7 +4607,7 @@ void MSMain(uint GID : SV_GroupIndex, if(LaneIndex == 3) { - g_OutputVector.Store< vector >(0, Result); + g_OutputVector.Store< vector >(0, Result); } } #endif @@ -4609,21 +4630,25 @@ void MSMain(uint GID : SV_GroupIndex, #ifdef FUNC_SHUFFLE_VECTOR // For shuffle vector, the input is a scalar, not a vector. - TYPE Input1 = g_InputVector1.Load(0); + IO_TYPE RawInput1 = g_InputVector1.Load(0); + TYPE Input1 = (TYPE)RawInput1; #else // For all other basic op types the first input is always a vector. - vector Input1 = g_InputVector1.Load< vector RawInput1 = g_InputVector1.Load< vector >(0); + vector Input1 = (vector)RawInput1; #endif #if (IS_BINARY_OP || IS_TERNARY_OP) - vector Input2 = g_InputVector2.Load< vector RawInput2 = g_InputVector2.Load< vector >(0); + vector Input2 = (vector)RawInput2; #endif #if IS_TERNARY_OP - vector Input3 = g_InputVector3.Load< vector RawInput3 = g_InputVector3.Load< vector >(0); + vector Input3 = (vector)RawInput3; #endif #ifdef IS_REDUCTION_OP @@ -4676,7 +4701,7 @@ void MSMain(uint GID : SV_GroupIndex, OutputVector = FUNC(Input1, Input2, Input3); #endif - g_OutputVector.Store< vector >(0, OutputVector); + g_OutputVector.Store< vector >(0, OutputVector); }; ]]> From 59e56167fc594f09d7df70e852e42a5fc0ab1f00 Mon Sep 17 00:00:00 2001 From: Alex Sepkowski Date: Fri, 13 Mar 2026 22:23:37 -0700 Subject: [PATCH 13/34] Remove Divide and Modulus tests for min16int HLSL does not support signed integer division on minimum-precision types. The compiler rejects these with: 'signed integer division is not supported on minimum-precision types, cast to int to use 32-bit division'. Remove the Divide and Modulus test entries for HLSLMin16Int_t. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- tools/clang/unittests/HLSLExec/LongVectors.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/clang/unittests/HLSLExec/LongVectors.cpp b/tools/clang/unittests/HLSLExec/LongVectors.cpp index 3ff4d7385b..17c5b4a135 100644 --- a/tools/clang/unittests/HLSLExec/LongVectors.cpp +++ b/tools/clang/unittests/HLSLExec/LongVectors.cpp @@ -2912,11 +2912,11 @@ class DxilConf_SM69_Vectorized_Core : public TestClassCommon { HLK_TEST(Mad, HLSLMin16Int_t); // BinaryMath + // Note: Divide and Modulus excluded — HLSL does not support signed integer + // division on minimum-precision types. HLK_TEST(Add, HLSLMin16Int_t); HLK_TEST(Subtract, HLSLMin16Int_t); HLK_TEST(Multiply, HLSLMin16Int_t); - HLK_TEST(Divide, HLSLMin16Int_t); - HLK_TEST(Modulus, HLSLMin16Int_t); HLK_TEST(Min, HLSLMin16Int_t); HLK_TEST(Max, HLSLMin16Int_t); From 4604c6e989e30f922618b59edc849b6b1fdb4e29 Mon Sep 17 00:00:00 2001 From: Alex Sepkowski Date: Fri, 13 Mar 2026 22:24:05 -0700 Subject: [PATCH 14/34] Use float16-exact values for min16float test input data Replace min16float input values that are not exactly representable in float16 with values that are. This avoids precision mismatches between CPU-side expected value computation (float32) and GPU-side min precision results, where the cast to min16float rounds values to the nearest float16 representation. Key changes: - Default1: -0.01f -> -0.03125f (exact power-of-2 fraction) - Positive: 0.01f -> 0.03125f, 5531.0f -> 5504.0f, 331.233f -> 331.25f, 3250.01f -> 3250.0f - RangeHalfPi/RangeOne: replaced with float16-exact fractions covering the same ranges Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../unittests/HLSLExec/LongVectorTestData.h | 22 ++++++++++--------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/tools/clang/unittests/HLSLExec/LongVectorTestData.h b/tools/clang/unittests/HLSLExec/LongVectorTestData.h index 9b44ff35b3..4ab0e7dacb 100644 --- a/tools/clang/unittests/HLSLExec/LongVectorTestData.h +++ b/tools/clang/unittests/HLSLExec/LongVectorTestData.h @@ -636,22 +636,24 @@ INPUT_SET(InputSet::SelectCond, 0.0, 1.0); INPUT_SET(InputSet::AllOnes, 1.0); END_INPUT_SETS() -// Min precision input sets. Values are within the fp16 representable range. -// No FP specials (INF/NaN/denorm) as min precision types do not support them. +// Min precision input sets. All values are exactly representable in float16 +// to avoid precision mismatch between CPU-side expected values and GPU-side +// min precision computation. No FP specials (INF/NaN/denorm) as min precision +// types do not support them. BEGIN_INPUT_SETS(HLSLMin16Float_t) -INPUT_SET(InputSet::Default1, -1.0f, -1.0f, 1.0f, -0.01f, 1.0f, -0.01f, 1.0f, - -0.01f, 1.0f, -0.01f); +INPUT_SET(InputSet::Default1, -1.0f, -1.0f, 1.0f, -0.03125f, 1.0f, -0.03125f, + 1.0f, -0.03125f, 1.0f, -0.03125f); INPUT_SET(InputSet::Default2, 1.0f, -1.0f, 1.0f, -1.0f, 1.0f, -1.0f, 1.0f, -1.0f, 1.0f, -1.0f); INPUT_SET(InputSet::Default3, -1.0f, 1.0f, -1.0f, 1.0f, -1.0f, 1.0f, -1.0f, 1.0f, -1.0f, 1.0f); INPUT_SET(InputSet::Zero, 0.0f); -INPUT_SET(InputSet::RangeHalfPi, -1.073f, 0.044f, -1.047f, 0.313f, 1.447f, - -0.865f, 1.364f, -0.715f, -0.800f, 0.541f); -INPUT_SET(InputSet::RangeOne, 0.331f, 0.727f, -0.957f, 0.677f, -0.025f, 0.495f, - 0.855f, -0.673f, -0.678f, -0.905f); -INPUT_SET(InputSet::Positive, 1.0f, 1.0f, 342.0f, 0.01f, 5531.0f, 0.01f, 1.0f, - 0.01f, 331.233f, 3250.01f); +INPUT_SET(InputSet::RangeHalfPi, -1.0625f, 0.046875f, -1.046875f, 0.3125f, + 1.4375f, -0.875f, 1.375f, -0.71875f, -0.8125f, 0.5625f); +INPUT_SET(InputSet::RangeOne, 0.328125f, 0.71875f, -0.953125f, 0.671875f, + -0.03125f, 0.5f, 0.84375f, -0.671875f, -0.6875f, -0.90625f); +INPUT_SET(InputSet::Positive, 1.0f, 1.0f, 342.0f, 0.03125f, 5504.0f, 0.03125f, + 1.0f, 0.03125f, 331.25f, 3250.0f); INPUT_SET(InputSet::SelectCond, 0.0f, 1.0f); INPUT_SET(InputSet::AllOnes, 1.0f); END_INPUT_SETS() From 4e654b6d380313176309f03fd37afd1dc26474b2 Mon Sep 17 00:00:00 2001 From: Alex Sepkowski Date: Fri, 13 Mar 2026 22:24:34 -0700 Subject: [PATCH 15/34] Constrain min16int/uint input data to 16-bit range Adjust input values so that arithmetic results (multiply, mad, subtract, left-shift, wave prefix products) do not overflow 16-bit integer range. Min precision types compute at >= 16 bits, so results that overflow at 16 bits differ from the 32-bit expected values. min16uint changes: - Default1: reduced large values (699->199, 1023->200) so products and wave prefix products fit in uint16 - Default1: ensured all values >= Default2 to avoid subtract underflow (1->3, 6->10, 0->22) - BitShiftRhs: reduced large shifts (13->12, 14->12, 15->12) so shifted values fit in uint16 min16int changes: - BitShiftRhs: reduced large shifts (13->11, 14->11, 15->14) so shifted values fit in int16 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- tools/clang/unittests/HLSLExec/LongVectorTestData.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tools/clang/unittests/HLSLExec/LongVectorTestData.h b/tools/clang/unittests/HLSLExec/LongVectorTestData.h index 4ab0e7dacb..b37dad9848 100644 --- a/tools/clang/unittests/HLSLExec/LongVectorTestData.h +++ b/tools/clang/unittests/HLSLExec/LongVectorTestData.h @@ -659,11 +659,12 @@ INPUT_SET(InputSet::AllOnes, 1.0f); END_INPUT_SETS() // Values constrained to int16 range. Kept small to avoid overflow ambiguity. +// Shift amounts limited so results fit in int16 (-32768..32767). BEGIN_INPUT_SETS(HLSLMin16Int_t) INPUT_SET(InputSet::Default1, -6, 1, 7, 3, 8, 4, -3, 8, 8, -2); INPUT_SET(InputSet::Default2, 5, -6, -3, -2, 9, 3, 1, -3, -7, 2); INPUT_SET(InputSet::Default3, -5, 6, 3, 2, -9, -3, -1, 3, 7, -2); -INPUT_SET(InputSet::BitShiftRhs, 1, 6, 3, 0, 9, 3, 12, 13, 14, 15); +INPUT_SET(InputSet::BitShiftRhs, 1, 6, 3, 0, 9, 3, 12, 11, 11, 14); INPUT_SET(InputSet::Zero, 0); INPUT_SET(InputSet::NoZero, 1); INPUT_SET(InputSet::SelectCond, 0, 1); @@ -672,13 +673,14 @@ INPUT_SET(InputSet::WaveMultiPrefixBitwise, 0x0, 0x1, 0x3, 0x4, 0x10, 0x12, 0xF, -1); END_INPUT_SETS() -// Values constrained to uint16 range. Kept small to avoid overflow ambiguity. +// Values constrained to uint16 range. Kept small so that multiply, mad, +// subtract, shift, and wave prefix products do not overflow 16 bits. BEGIN_INPUT_SETS(HLSLMin16Uint_t) -INPUT_SET(InputSet::Default1, 1, 699, 3, 1023, 5, 6, 0, 8, 9, 10); +INPUT_SET(InputSet::Default1, 3, 199, 3, 200, 5, 10, 22, 8, 9, 10); INPUT_SET(InputSet::Default2, 2, 111, 3, 4, 5, 9, 21, 8, 9, 10); INPUT_SET(InputSet::Default3, 4, 112, 4, 5, 3, 7, 21, 1, 11, 9); INPUT_SET(InputSet::Zero, 0); -INPUT_SET(InputSet::BitShiftRhs, 1, 6, 3, 0, 9, 3, 12, 13, 14, 15); +INPUT_SET(InputSet::BitShiftRhs, 1, 6, 3, 0, 9, 3, 11, 12, 12, 12); INPUT_SET(InputSet::SelectCond, 0, 1); INPUT_SET(InputSet::AllOnes, 1); INPUT_SET(InputSet::WaveMultiPrefixBitwise, 0x0, 0x1, 0x3, 0x4, 0x10, 0x12, 0xF, From 351dda7bd0e9e7ee7400603498b6d9ed8bc213e7 Mon Sep 17 00:00:00 2001 From: Alex Sepkowski Date: Fri, 13 Mar 2026 22:49:16 -0700 Subject: [PATCH 16/34] Remove wave/quad op tests for min precision types Wave and quad intrinsics (WaveReadLaneAt, WaveReadLaneFirst, WaveActiveSum/Min/Max/Product/AllEqual, WavePrefixSum/Product, WaveMultiPrefixSum/Product, WaveMatch, QuadReadLaneAt, QuadReadAcrossX/Y/Diagonal) do not support min precision types (min16float, min16int, min16uint). The DXIL wave/quad shuffle operations operate on 32-bit or 64-bit register slots and do not handle 16-bit min precision payloads. Removes 48 test entries (16 per min precision type) and adds explanatory comments. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../clang/unittests/HLSLExec/LongVectors.cpp | 70 +++---------------- 1 file changed, 9 insertions(+), 61 deletions(-) diff --git a/tools/clang/unittests/HLSLExec/LongVectors.cpp b/tools/clang/unittests/HLSLExec/LongVectors.cpp index 17c5b4a135..d2134e9c00 100644 --- a/tools/clang/unittests/HLSLExec/LongVectors.cpp +++ b/tools/clang/unittests/HLSLExec/LongVectors.cpp @@ -2886,25 +2886,9 @@ class DxilConf_SM69_Vectorized_Core : public TestClassCommon { HLK_TEST(DerivativeDdxFine, HLSLMin16Float_t); HLK_TEST(DerivativeDdyFine, HLSLMin16Float_t); - // Quad - HLK_TEST(QuadReadLaneAt, HLSLMin16Float_t); - HLK_TEST(QuadReadAcrossX, HLSLMin16Float_t); - HLK_TEST(QuadReadAcrossY, HLSLMin16Float_t); - HLK_TEST(QuadReadAcrossDiagonal, HLSLMin16Float_t); - - // Wave - HLK_WAVEOP_TEST(WaveActiveSum, HLSLMin16Float_t); - HLK_WAVEOP_TEST(WaveActiveMin, HLSLMin16Float_t); - HLK_WAVEOP_TEST(WaveActiveMax, HLSLMin16Float_t); - HLK_WAVEOP_TEST(WaveActiveProduct, HLSLMin16Float_t); - HLK_WAVEOP_TEST(WaveActiveAllEqual, HLSLMin16Float_t); - HLK_WAVEOP_TEST(WaveReadLaneAt, HLSLMin16Float_t); - HLK_WAVEOP_TEST(WaveReadLaneFirst, HLSLMin16Float_t); - HLK_WAVEOP_TEST(WavePrefixSum, HLSLMin16Float_t); - HLK_WAVEOP_TEST(WavePrefixProduct, HLSLMin16Float_t); - HLK_WAVEOP_TEST(WaveMultiPrefixSum, HLSLMin16Float_t); - HLK_WAVEOP_TEST(WaveMultiPrefixProduct, HLSLMin16Float_t); - HLK_WAVEOP_TEST(WaveMatch, HLSLMin16Float_t); + // Wave and Quad ops excluded: these intrinsics do not support min precision + // types. The DXIL wave/quad shuffle operations operate on 32-bit or 64-bit + // register slots and do not handle 16-bit min precision payloads. // ---- HLSLMin16Int_t (mirrors int16_t) ---- @@ -2980,27 +2964,9 @@ class DxilConf_SM69_Vectorized_Core : public TestClassCommon { HLK_TEST(LoadAndStore_RD_SB_SRV, HLSLMin16Int_t); HLK_TEST(LoadAndStore_RD_SB_UAV, HLSLMin16Int_t); - // Quad - HLK_TEST(QuadReadLaneAt, HLSLMin16Int_t); - HLK_TEST(QuadReadAcrossX, HLSLMin16Int_t); - HLK_TEST(QuadReadAcrossY, HLSLMin16Int_t); - HLK_TEST(QuadReadAcrossDiagonal, HLSLMin16Int_t); - - // Wave - HLK_WAVEOP_TEST(WaveActiveSum, HLSLMin16Int_t); - HLK_WAVEOP_TEST(WaveActiveMin, HLSLMin16Int_t); - HLK_WAVEOP_TEST(WaveActiveMax, HLSLMin16Int_t); - HLK_WAVEOP_TEST(WaveActiveProduct, HLSLMin16Int_t); - HLK_WAVEOP_TEST(WaveActiveAllEqual, HLSLMin16Int_t); - HLK_WAVEOP_TEST(WaveReadLaneAt, HLSLMin16Int_t); - HLK_WAVEOP_TEST(WaveReadLaneFirst, HLSLMin16Int_t); - HLK_WAVEOP_TEST(WavePrefixSum, HLSLMin16Int_t); - HLK_WAVEOP_TEST(WavePrefixProduct, HLSLMin16Int_t); - HLK_WAVEOP_TEST(WaveMultiPrefixSum, HLSLMin16Int_t); - HLK_WAVEOP_TEST(WaveMultiPrefixProduct, HLSLMin16Int_t); - // WaveMultiPrefixBitAnd/BitOr/BitXor excluded: these intrinsics use the - // any_int type set which does not include min precision integer types. - HLK_WAVEOP_TEST(WaveMatch, HLSLMin16Int_t); + // Wave and Quad ops excluded: these intrinsics do not support min precision + // types. The DXIL wave/quad shuffle operations operate on 32-bit or 64-bit + // register slots and do not handle 16-bit min precision payloads. // ---- HLSLMin16Uint_t (mirrors uint16_t) ---- @@ -3068,27 +3034,9 @@ class DxilConf_SM69_Vectorized_Core : public TestClassCommon { HLK_TEST(LoadAndStore_RD_SB_SRV, HLSLMin16Uint_t); HLK_TEST(LoadAndStore_RD_SB_UAV, HLSLMin16Uint_t); - // Quad - HLK_TEST(QuadReadLaneAt, HLSLMin16Uint_t); - HLK_TEST(QuadReadAcrossX, HLSLMin16Uint_t); - HLK_TEST(QuadReadAcrossY, HLSLMin16Uint_t); - HLK_TEST(QuadReadAcrossDiagonal, HLSLMin16Uint_t); - - // Wave - HLK_WAVEOP_TEST(WaveActiveSum, HLSLMin16Uint_t); - HLK_WAVEOP_TEST(WaveActiveMin, HLSLMin16Uint_t); - HLK_WAVEOP_TEST(WaveActiveMax, HLSLMin16Uint_t); - HLK_WAVEOP_TEST(WaveActiveProduct, HLSLMin16Uint_t); - HLK_WAVEOP_TEST(WaveActiveAllEqual, HLSLMin16Uint_t); - HLK_WAVEOP_TEST(WaveReadLaneAt, HLSLMin16Uint_t); - HLK_WAVEOP_TEST(WaveReadLaneFirst, HLSLMin16Uint_t); - HLK_WAVEOP_TEST(WavePrefixSum, HLSLMin16Uint_t); - HLK_WAVEOP_TEST(WavePrefixProduct, HLSLMin16Uint_t); - HLK_WAVEOP_TEST(WaveMultiPrefixSum, HLSLMin16Uint_t); - HLK_WAVEOP_TEST(WaveMultiPrefixProduct, HLSLMin16Uint_t); - // WaveMultiPrefixBitAnd/BitOr/BitXor excluded: these intrinsics use the - // any_int type set which does not include min precision integer types. - HLK_WAVEOP_TEST(WaveMatch, HLSLMin16Uint_t); + // Wave and Quad ops excluded: these intrinsics do not support min precision + // types. The DXIL wave/quad shuffle operations operate on 32-bit or 64-bit + // register slots and do not handle 16-bit min precision payloads. }; #define HLK_TEST_DOUBLE(Op, DataType) \ From d8cfc9ee5314d1723724028acffcdb4cb04d02da Mon Sep 17 00:00:00 2001 From: Alex Sepkowski Date: Fri, 13 Mar 2026 22:53:40 -0700 Subject: [PATCH 17/34] Use half-precision ULP for min16float dot product tolerance The dot product tolerance computation was using float32 ULPs for HLSLMin16Float_t, but the GPU may compute at float16 precision. With NUM=256 elements the accumulated error exceeds the float32-based epsilon. Use HLSLHalf_t::GetULP to compute half-precision ULPs for min16float, matching the approach already used for HLSLHalf_t. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- tools/clang/unittests/HLSLExec/LongVectors.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tools/clang/unittests/HLSLExec/LongVectors.cpp b/tools/clang/unittests/HLSLExec/LongVectors.cpp index d2134e9c00..da2823f3a2 100644 --- a/tools/clang/unittests/HLSLExec/LongVectors.cpp +++ b/tools/clang/unittests/HLSLExec/LongVectors.cpp @@ -1359,7 +1359,12 @@ static double computeAbsoluteEpsilon(double A, double ULPTolerance) { if constexpr (std::is_same_v) ULP = HLSLHalf_t::GetULP(A); - else + else if constexpr (std::is_same_v) { + // Min precision floats may be computed at float16 on the GPU, so use + // half-precision ULP for tolerance. Reuse HLSLHalf_t::GetULP which + // computes ULP by incrementing the float16 bit representation. + ULP = HLSLHalf_t::GetULP(HLSLHalf_t(static_cast(A))); + } else ULP = std::nextafter(static_cast(A), std::numeric_limits::infinity()) - static_cast(A); From 7088172b433d6763931dd3d57f2cd03f6118052e Mon Sep 17 00:00:00 2001 From: Alex Sepkowski Date: Mon, 16 Mar 2026 16:46:45 -0700 Subject: [PATCH 18/34] Add comment explaining why IO_TYPE and IO_OUT_TYPE are both needed Cast operations have different input and output types (e.g. min16float input with int32 output), so each side needs its own IO type mapping. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- tools/clang/unittests/HLSLExec/LongVectors.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/clang/unittests/HLSLExec/LongVectors.cpp b/tools/clang/unittests/HLSLExec/LongVectors.cpp index da2823f3a2..8ca5ebd3fe 100644 --- a/tools/clang/unittests/HLSLExec/LongVectors.cpp +++ b/tools/clang/unittests/HLSLExec/LongVectors.cpp @@ -388,6 +388,11 @@ std::string getCompilerOptionsString( CompilerOptions << " -DOUT_TYPE=" << OutDataType.HLSLTypeString; + // IO_TYPE / IO_OUT_TYPE: full-precision equivalents of TYPE / OUT_TYPE for + // buffer Load/Store. For non-min-precision types these equal TYPE / OUT_TYPE. + // Both are needed because cast operations can have different input and output + // types (e.g. TYPE=min16float → IO_TYPE=float, OUT_TYPE=int → + // IO_OUT_TYPE=int). CompilerOptions << " -DIO_TYPE=" << getIOTypeString(OpDataType.HLSLTypeString); CompilerOptions << " -DIO_OUT_TYPE=" From b5e69b5df285f3d84ed17a5f34a7f6626a700984 Mon Sep 17 00:00:00 2001 From: Alex Sepkowski Date: Mon, 16 Mar 2026 17:09:06 -0700 Subject: [PATCH 19/34] Make min precision wrapper constructors constexpr Enables constexpr on MaskShiftAmount's ShiftMask local variable, restoring the original constexpr qualifier that was downgraded to const. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../unittests/HLSLExec/LongVectorTestData.h | 45 ++++++++++--------- .../clang/unittests/HLSLExec/LongVectors.cpp | 2 +- 2 files changed, 24 insertions(+), 23 deletions(-) diff --git a/tools/clang/unittests/HLSLExec/LongVectorTestData.h b/tools/clang/unittests/HLSLExec/LongVectorTestData.h index b37dad9848..abec41afd3 100644 --- a/tools/clang/unittests/HLSLExec/LongVectorTestData.h +++ b/tools/clang/unittests/HLSLExec/LongVectorTestData.h @@ -241,13 +241,14 @@ struct HLSLHalf_t { // are 32-bit in DXIL storage. These thin wrappers provide distinct C++ types // that map to different HLSL type strings via DATA_TYPE. struct HLSLMin16Float_t { - HLSLMin16Float_t() : Val(0.0f) {} - HLSLMin16Float_t(float F) : Val(F) {} - HLSLMin16Float_t(double D) : Val(static_cast(D)) {} - explicit HLSLMin16Float_t(int I) : Val(static_cast(I)) {} - explicit HLSLMin16Float_t(uint32_t U) : Val(static_cast(U)) {} + constexpr HLSLMin16Float_t() : Val(0.0f) {} + constexpr HLSLMin16Float_t(float F) : Val(F) {} + constexpr HLSLMin16Float_t(double D) : Val(static_cast(D)) {} + explicit constexpr HLSLMin16Float_t(int I) : Val(static_cast(I)) {} + explicit constexpr HLSLMin16Float_t(uint32_t U) + : Val(static_cast(U)) {} - operator float() const { return Val; } + constexpr operator float() const { return Val; } bool operator==(const HLSLMin16Float_t &O) const { return Val == O.Val; } bool operator!=(const HLSLMin16Float_t &O) const { return Val != O.Val; } @@ -289,15 +290,15 @@ static_assert(sizeof(HLSLMin16Float_t) == sizeof(float), "HLSLMin16Float_t must be same size as float"); struct HLSLMin16Int_t { - HLSLMin16Int_t() : Val(0) {} - HLSLMin16Int_t(int32_t I) : Val(I) {} - HLSLMin16Int_t(int64_t I) : Val(static_cast(I)) {} - HLSLMin16Int_t(uint32_t U) : Val(static_cast(U)) {} - HLSLMin16Int_t(uint64_t U) : Val(static_cast(U)) {} - HLSLMin16Int_t(float F) : Val(static_cast(F)) {} - HLSLMin16Int_t(double D) : Val(static_cast(D)) {} + constexpr HLSLMin16Int_t() : Val(0) {} + constexpr HLSLMin16Int_t(int32_t I) : Val(I) {} + constexpr HLSLMin16Int_t(int64_t I) : Val(static_cast(I)) {} + constexpr HLSLMin16Int_t(uint32_t U) : Val(static_cast(U)) {} + constexpr HLSLMin16Int_t(uint64_t U) : Val(static_cast(U)) {} + constexpr HLSLMin16Int_t(float F) : Val(static_cast(F)) {} + constexpr HLSLMin16Int_t(double D) : Val(static_cast(D)) {} - operator int32_t() const { return Val; } + constexpr operator int32_t() const { return Val; } bool operator==(const HLSLMin16Int_t &O) const { return Val == O.Val; } bool operator!=(const HLSLMin16Int_t &O) const { return Val != O.Val; } @@ -358,14 +359,14 @@ static_assert(sizeof(HLSLMin16Int_t) == sizeof(int32_t), "HLSLMin16Int_t must be same size as int32_t"); struct HLSLMin16Uint_t { - HLSLMin16Uint_t() : Val(0) {} - HLSLMin16Uint_t(uint32_t U) : Val(U) {} - HLSLMin16Uint_t(uint64_t U) : Val(static_cast(U)) {} - HLSLMin16Uint_t(int32_t I) : Val(static_cast(I)) {} - HLSLMin16Uint_t(float F) : Val(static_cast(F)) {} - HLSLMin16Uint_t(double D) : Val(static_cast(D)) {} - - operator uint32_t() const { return Val; } + constexpr HLSLMin16Uint_t() : Val(0) {} + constexpr HLSLMin16Uint_t(uint32_t U) : Val(U) {} + constexpr HLSLMin16Uint_t(uint64_t U) : Val(static_cast(U)) {} + constexpr HLSLMin16Uint_t(int32_t I) : Val(static_cast(I)) {} + constexpr HLSLMin16Uint_t(float F) : Val(static_cast(F)) {} + constexpr HLSLMin16Uint_t(double D) : Val(static_cast(D)) {} + + constexpr operator uint32_t() const { return Val; } bool operator==(const HLSLMin16Uint_t &O) const { return Val == O.Val; } bool operator!=(const HLSLMin16Uint_t &O) const { return Val != O.Val; } diff --git a/tools/clang/unittests/HLSLExec/LongVectors.cpp b/tools/clang/unittests/HLSLExec/LongVectors.cpp index 8ca5ebd3fe..0e684a0326 100644 --- a/tools/clang/unittests/HLSLExec/LongVectors.cpp +++ b/tools/clang/unittests/HLSLExec/LongVectors.cpp @@ -821,7 +821,7 @@ DEFAULT_OP_2(OpType::Xor, (A ^ B)); // 32-bit, 6 bits for 64-bit). We must do the same in C++ to avoid undefined // behavior when shift amount >= bit width, and to match GPU results. template T MaskShiftAmount(T ShiftAmount) { - const T ShiftMask = static_cast(sizeof(T) * 8 - 1); + constexpr T ShiftMask = static_cast(sizeof(T) * 8 - 1); return ShiftAmount & ShiftMask; } From 501e73ca70bf6b1893c78e0e3bbd65607937f284 Mon Sep 17 00:00:00 2001 From: Alex Sepkowski Date: Mon, 16 Mar 2026 17:29:30 -0700 Subject: [PATCH 20/34] Remove unnecessary static_asserts from min precision wrappers Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- tools/clang/unittests/HLSLExec/LongVectorTestData.h | 9 --------- 1 file changed, 9 deletions(-) diff --git a/tools/clang/unittests/HLSLExec/LongVectorTestData.h b/tools/clang/unittests/HLSLExec/LongVectorTestData.h index abec41afd3..42268dc47c 100644 --- a/tools/clang/unittests/HLSLExec/LongVectorTestData.h +++ b/tools/clang/unittests/HLSLExec/LongVectorTestData.h @@ -286,9 +286,6 @@ struct HLSLMin16Float_t { float Val; }; -static_assert(sizeof(HLSLMin16Float_t) == sizeof(float), - "HLSLMin16Float_t must be same size as float"); - struct HLSLMin16Int_t { constexpr HLSLMin16Int_t() : Val(0) {} constexpr HLSLMin16Int_t(int32_t I) : Val(I) {} @@ -355,9 +352,6 @@ struct HLSLMin16Int_t { int32_t Val; }; -static_assert(sizeof(HLSLMin16Int_t) == sizeof(int32_t), - "HLSLMin16Int_t must be same size as int32_t"); - struct HLSLMin16Uint_t { constexpr HLSLMin16Uint_t() : Val(0) {} constexpr HLSLMin16Uint_t(uint32_t U) : Val(U) {} @@ -421,9 +415,6 @@ struct HLSLMin16Uint_t { uint32_t Val; }; -static_assert(sizeof(HLSLMin16Uint_t) == sizeof(uint32_t), - "HLSLMin16Uint_t must be same size as uint32_t"); - enum class InputSet { #define INPUT_SET(SYMBOL) SYMBOL, #include "LongVectorOps.def" From 7f8613dab95c8bc8d7b36f5dbbf866cc41e1f66c Mon Sep 17 00:00:00 2001 From: Alex Sepkowski Date: Mon, 16 Mar 2026 17:38:21 -0700 Subject: [PATCH 21/34] Remove unused WaveMultiPrefixBitwise input sets for min precision types These input sets are not referenced by any test entry since wave ops are excluded for min precision types. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- tools/clang/unittests/HLSLExec/LongVectorTestData.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tools/clang/unittests/HLSLExec/LongVectorTestData.h b/tools/clang/unittests/HLSLExec/LongVectorTestData.h index 42268dc47c..0367d8b8ee 100644 --- a/tools/clang/unittests/HLSLExec/LongVectorTestData.h +++ b/tools/clang/unittests/HLSLExec/LongVectorTestData.h @@ -661,8 +661,6 @@ INPUT_SET(InputSet::Zero, 0); INPUT_SET(InputSet::NoZero, 1); INPUT_SET(InputSet::SelectCond, 0, 1); INPUT_SET(InputSet::AllOnes, 1); -INPUT_SET(InputSet::WaveMultiPrefixBitwise, 0x0, 0x1, 0x3, 0x4, 0x10, 0x12, 0xF, - -1); END_INPUT_SETS() // Values constrained to uint16 range. Kept small so that multiply, mad, @@ -675,8 +673,6 @@ INPUT_SET(InputSet::Zero, 0); INPUT_SET(InputSet::BitShiftRhs, 1, 6, 3, 0, 9, 3, 11, 12, 12, 12); INPUT_SET(InputSet::SelectCond, 0, 1); INPUT_SET(InputSet::AllOnes, 1); -INPUT_SET(InputSet::WaveMultiPrefixBitwise, 0x0, 0x1, 0x3, 0x4, 0x10, 0x12, 0xF, - std::numeric_limits::max()); END_INPUT_SETS() #undef BEGIN_INPUT_SETS From b723e8673a63eeb993bbf532d52ec70b7720313c Mon Sep 17 00:00:00 2001 From: Alex Sepkowski Date: Mon, 16 Mar 2026 17:48:43 -0700 Subject: [PATCH 22/34] Gate IO_TYPE/IO_OUT_TYPE behind MIN_PRECISION preprocessor flag Only define -DMIN_PRECISION, -DIO_TYPE, and -DIO_OUT_TYPE for min precision test types. Shader templates use #ifdef MIN_PRECISION to gate the load-with-cast paths, leaving non-min-precision shaders completely unchanged. Fallback #ifndef defines ensure IO_OUT_TYPE resolves to OUT_TYPE for Store calls when MIN_PRECISION is not set. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../clang/unittests/HLSLExec/LongVectors.cpp | 25 ++- .../unittests/HLSLExec/ShaderOpArith.xml | 155 ++++++++++++++---- 2 files changed, 138 insertions(+), 42 deletions(-) diff --git a/tools/clang/unittests/HLSLExec/LongVectors.cpp b/tools/clang/unittests/HLSLExec/LongVectors.cpp index 0e684a0326..d294ceacde 100644 --- a/tools/clang/unittests/HLSLExec/LongVectors.cpp +++ b/tools/clang/unittests/HLSLExec/LongVectors.cpp @@ -388,15 +388,22 @@ std::string getCompilerOptionsString( CompilerOptions << " -DOUT_TYPE=" << OutDataType.HLSLTypeString; - // IO_TYPE / IO_OUT_TYPE: full-precision equivalents of TYPE / OUT_TYPE for - // buffer Load/Store. For non-min-precision types these equal TYPE / OUT_TYPE. - // Both are needed because cast operations can have different input and output - // types (e.g. TYPE=min16float → IO_TYPE=float, OUT_TYPE=int → - // IO_OUT_TYPE=int). - CompilerOptions << " -DIO_TYPE=" - << getIOTypeString(OpDataType.HLSLTypeString); - CompilerOptions << " -DIO_OUT_TYPE=" - << getIOTypeString(OutDataType.HLSLTypeString); + // For min precision types, buffer storage width is implementation-defined. + // We define IO_TYPE/IO_OUT_TYPE as full-precision equivalents and set + // MIN_PRECISION so shader templates use them for Load/Store operations. + // Both IO defines are needed because cast operations can have different input + // and output types (e.g. TYPE=min16float, IO_TYPE=float, OUT_TYPE=int, + // IO_OUT_TYPE=int). Non-min-precision tests are unaffected. + if (strcmp(getIOTypeString(OpDataType.HLSLTypeString), + OpDataType.HLSLTypeString) != 0 || + strcmp(getIOTypeString(OutDataType.HLSLTypeString), + OutDataType.HLSLTypeString) != 0) { + CompilerOptions << " -DMIN_PRECISION"; + CompilerOptions << " -DIO_TYPE=" + << getIOTypeString(OpDataType.HLSLTypeString); + CompilerOptions << " -DIO_OUT_TYPE=" + << getIOTypeString(OutDataType.HLSLTypeString); + } CompilerOptions << " -DBASIC_OP_TYPE=0x" << std::hex << Operation.Arity; diff --git a/tools/clang/unittests/HLSLExec/ShaderOpArith.xml b/tools/clang/unittests/HLSLExec/ShaderOpArith.xml index a14b45181d..8354269ef4 100644 --- a/tools/clang/unittests/HLSLExec/ShaderOpArith.xml +++ b/tools/clang/unittests/HLSLExec/ShaderOpArith.xml @@ -3796,7 +3796,11 @@ void MSMain(uint GID : SV_GroupIndex, data; + #else + vector data; + #endif }; RWStructuredBuffer InputVector : register(u0); RWStructuredBuffer OutputVector: register(u1); @@ -3808,12 +3812,20 @@ void MSMain(uint GID : SV_GroupIndex, [numthreads(1,1,1)] void main(uint GI : SV_GroupIndex) { #if USE_STRUCTURED_BUFFER + #ifdef MIN_PRECISION vector temp = (vector)InputVector[0].data; OutputVector[0].data = (vector)temp; + #else + OutputVector[0].data = InputVector[0].data; + #endif #else - vector RawInput = InputVector.Load< vector >(0); - vector Input = (vector)RawInput; + #ifdef MIN_PRECISION + vector Input = (vector)InputVector.Load< vector >(0); OutputVector.Store< vector >(0, (vector)Input); + #else + vector Input = InputVector.Load< vector >(0); + OutputVector.Store< vector >(0, Input); + #endif #endif }; ]]> @@ -3837,7 +3849,11 @@ void MSMain(uint GID : SV_GroupIndex, data; + #else + vector data; + #endif }; StructuredBuffer InputVector : register(t0); RWStructuredBuffer OutputVector : register(u1); @@ -3849,12 +3865,20 @@ void MSMain(uint GID : SV_GroupIndex, [numthreads(1,1,1)] void main(uint GI : SV_GroupIndex) { #if USE_STRUCTURED_BUFFER + #ifdef MIN_PRECISION vector temp = (vector)InputVector[0].data; OutputVector[0].data = (vector)temp; + #else + OutputVector[0].data = InputVector[0].data; + #endif #else - vector RawInput = InputVector.Load< vector >(0); - vector Input = (vector)RawInput; + #ifdef MIN_PRECISION + vector Input = (vector)InputVector.Load< vector >(0); OutputVector.Store< vector >(0, (vector)Input); + #else + vector Input = InputVector.Load< vector >(0); + OutputVector.Store< vector >(0, Input); + #endif #endif }; ]]> @@ -3886,7 +3910,11 @@ void MSMain(uint GID : SV_GroupIndex, #if USE_STRUCTURED_BUFFER struct SLongVec { + #ifdef MIN_PRECISION vector data; + #else + vector data; + #endif }; RWStructuredBuffer InputVector : register(u0); RWStructuredBuffer OutputVector: register(u1); @@ -3900,12 +3928,20 @@ void MSMain(uint GID : SV_GroupIndex, void main(uint GI : SV_GroupIndex) { #if USE_STRUCTURED_BUFFER + #ifdef MIN_PRECISION vector temp = (vector)InputVector[0].data; OutputVector[0].data = (vector)temp; + #else + OutputVector[0].data = InputVector[0].data; + #endif #else - vector RawInput = InputVector.Load< vector >(0); - vector Input = (vector)RawInput; + #ifdef MIN_PRECISION + vector Input = (vector)InputVector.Load< vector >(0); OutputVector.Store< vector >(0, (vector)Input); + #else + vector Input = InputVector.Load< vector >(0); + OutputVector.Store< vector >(0, Input); + #endif #endif }; @@ -3936,7 +3972,11 @@ void MSMain(uint GID : SV_GroupIndex, data; + #else + vector data; + #endif }; StructuredBuffer InputVector : register(t0); RWStructuredBuffer OutputVector: register(u0); @@ -3948,12 +3988,20 @@ void MSMain(uint GID : SV_GroupIndex, [numthreads(1,1,1)] void main(uint GI : SV_GroupIndex) { #if USE_STRUCTURED_BUFFER + #ifdef MIN_PRECISION vector temp = (vector)InputVector[0].data; OutputVector[0].data = (vector)temp; + #else + OutputVector[0].data = InputVector[0].data; + #endif #else - vector RawInput = InputVector.Load< vector >(0); - vector Input = (vector)RawInput; + #ifdef MIN_PRECISION + vector Input = (vector)InputVector.Load< vector >(0); OutputVector.Store< vector >(0, (vector)Input); + #else + vector Input = InputVector.Load< vector >(0); + OutputVector.Store< vector >(0, Input); + #endif #endif }; ]]> @@ -3981,7 +4029,11 @@ void MSMain(uint GID : SV_GroupIndex, data; + #else + vector data; + #endif }; #endif @@ -3991,15 +4043,23 @@ void MSMain(uint GID : SV_GroupIndex, #if USE_STRUCTURED_BUFFER StructuredBuffer InputVector = ResourceDescriptorHeap[0]; RWStructuredBuffer OutputVector = ResourceDescriptorHeap[1]; + #ifdef MIN_PRECISION vector temp = (vector)InputVector[0].data; OutputVector[0].data = (vector)temp; + #else + OutputVector[0].data = InputVector[0].data; + #endif #else ByteAddressBuffer InputVector = ResourceDescriptorHeap[0]; RWByteAddressBuffer OutputVector = ResourceDescriptorHeap[1]; - vector RawInput = InputVector.Load< vector >(0); - vector Input = (vector)RawInput; + #ifdef MIN_PRECISION + vector Input = (vector)InputVector.Load< vector >(0); OutputVector.Store< vector >(0, (vector)Input); + #else + vector Input = InputVector.Load< vector >(0); + OutputVector.Store< vector >(0, Input); + #endif #endif }; ]]> @@ -4026,7 +4086,11 @@ void MSMain(uint GID : SV_GroupIndex, data; + #else + vector data; + #endif }; #endif @@ -4035,15 +4099,23 @@ void MSMain(uint GID : SV_GroupIndex, #if USE_STRUCTURED_BUFFER RWStructuredBuffer InputVector = ResourceDescriptorHeap[0]; RWStructuredBuffer OutputVector = ResourceDescriptorHeap[1]; + #ifdef MIN_PRECISION vector temp = (vector)InputVector[0].data; OutputVector[0].data = (vector)temp; + #else + OutputVector[0].data = InputVector[0].data; + #endif #else RWByteAddressBuffer InputVector = ResourceDescriptorHeap[0]; RWByteAddressBuffer OutputVector = ResourceDescriptorHeap[1]; - vector RawInput = InputVector.Load< vector >(0); - vector Input = (vector)RawInput; + #ifdef MIN_PRECISION + vector Input = (vector)InputVector.Load< vector >(0); OutputVector.Store< vector >(0, (vector)Input); + #else + vector Input = InputVector.Load< vector >(0); + OutputVector.Store< vector >(0, Input); + #endif #endif }; ]]> @@ -4071,16 +4143,14 @@ void MSMain(uint GID : SV_GroupIndex, OUT_TYPE : The type of the output vector, e.g. float, double, int, uint. In most cases OUT_TYPE == TYPE. - IO_TYPE : Full-precision type used for buffer Load operations. For min - precision types (min16float, min16int, min16uint), this maps to - their full-precision equivalent (float, int, uint) because min - precision is a hint that allows hardware to use any precision >= - the specified minimum, making buffer storage width - implementation-defined. Using full-precision types for buffer I/O - ensures deterministic data layout. For all other types, IO_TYPE - equals TYPE. - IO_OUT_TYPE : Same as IO_TYPE but for the output/store type. For all - non-min-precision types, IO_OUT_TYPE equals OUT_TYPE. + IO_TYPE : (Min precision only) Full-precision type used for buffer Load/Store. + Maps min16float/min16int/min16uint to float/int/uint for + deterministic buffer layout. Only defined when MIN_PRECISION is set; + otherwise falls back to TYPE via #ifndef in the shader. + IO_OUT_TYPE : (Min precision only) Same as IO_TYPE but derived from OUT_TYPE. + Falls back to OUT_TYPE via #ifndef when not defined. + MIN_PRECISION : Defined when TYPE is a min precision type. Gates the + IO_TYPE/IO_OUT_TYPE load/store paths in shader templates. NUM : The number of elements in the vector, e.g. 2, 3, 4, 8, 16, 32, @@ -4104,6 +4174,16 @@ void MSMain(uint GID : SV_GroupIndex, RWByteAddressBuffer g_InputVector3 : register(u2); RWByteAddressBuffer g_OutputVector : register(u3); + // For min precision types, IO_TYPE/IO_OUT_TYPE are defined as + // full-precision equivalents for deterministic buffer I/O layout. + // For all other types, fall back to TYPE/OUT_TYPE (no-op). + #ifndef IO_TYPE + #define IO_TYPE TYPE + #endif + #ifndef IO_OUT_TYPE + #define IO_OUT_TYPE OUT_TYPE + #endif + #define IS_UNARY_OP (BASIC_OP_TYPE == 0x1) #define IS_BINARY_OP (BASIC_OP_TYPE == 0x2) #define IS_TERNARY_OP (BASIC_OP_TYPE == 0x3) @@ -4630,25 +4710,34 @@ void MSMain(uint GID : SV_GroupIndex, #ifdef FUNC_SHUFFLE_VECTOR // For shuffle vector, the input is a scalar, not a vector. - IO_TYPE RawInput1 = g_InputVector1.Load(0); - TYPE Input1 = (TYPE)RawInput1; + #ifdef MIN_PRECISION + TYPE Input1 = (TYPE)g_InputVector1.Load(0); + #else + TYPE Input1 = g_InputVector1.Load(0); + #endif #else // For all other basic op types the first input is always a vector. - vector RawInput1 = g_InputVector1.Load< vector >(0); - vector Input1 = (vector)RawInput1; + #ifdef MIN_PRECISION + vector Input1 = (vector)g_InputVector1.Load< vector >(0); + #else + vector Input1 = g_InputVector1.Load< vector >(0); + #endif #endif #if (IS_BINARY_OP || IS_TERNARY_OP) - vector RawInput2 = g_InputVector2.Load< vector >(0); - vector Input2 = (vector)RawInput2; + #ifdef MIN_PRECISION + vector Input2 = (vector)g_InputVector2.Load< vector >(0); + #else + vector Input2 = g_InputVector2.Load< vector >(0); + #endif #endif #if IS_TERNARY_OP - vector RawInput3 = g_InputVector3.Load< vector >(0); - vector Input3 = (vector)RawInput3; + #ifdef MIN_PRECISION + vector Input3 = (vector)g_InputVector3.Load< vector >(0); + #else + vector Input3 = g_InputVector3.Load< vector >(0); + #endif #endif #ifdef IS_REDUCTION_OP From bbfb62434a753c7ea726d6d2f306dc50d1b0db84 Mon Sep 17 00:00:00 2001 From: Alex Sepkowski Date: Mon, 16 Mar 2026 17:53:27 -0700 Subject: [PATCH 23/34] Revert IO_OUT_TYPE to OUT_TYPE for ops excluded from min precision Wave, quad, splitdouble, frexp, and modf stores will never be reached by min precision test types, so IO_OUT_TYPE is unnecessary. Keep it only for the final main store and derivative stores which are exercised by min precision tests. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../unittests/HLSLExec/ShaderOpArith.xml | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/tools/clang/unittests/HLSLExec/ShaderOpArith.xml b/tools/clang/unittests/HLSLExec/ShaderOpArith.xml index 8354269ef4..694eda77f1 100644 --- a/tools/clang/unittests/HLSLExec/ShaderOpArith.xml +++ b/tools/clang/unittests/HLSLExec/ShaderOpArith.xml @@ -4221,7 +4221,7 @@ void MSMain(uint GID : SV_GroupIndex, // Store the high bits in the second half of the output vector. // Because we know the outputs of asuint are always 32 bits, we can // use 4 bytes per element for our offset. - g_OutputVector.Store< vector >(4 * NUM, HighBits); + g_OutputVector.Store< vector >(4 * NUM, HighBits); // Generic store logic in main handles storing LowBits in // g_OutputVector. @@ -4240,7 +4240,7 @@ void MSMain(uint GID : SV_GroupIndex, // Store the exponent outputs in the second half of the output vector. // Exponent values are always floats, so we can use 4 bytes per // element for our offset. - g_OutputVector.Store< vector >(4 * NUM, Exponent); + g_OutputVector.Store< vector >(4 * NUM, Exponent); return Mantissa; } @@ -4381,7 +4381,7 @@ void MSMain(uint GID : SV_GroupIndex, Vector = WavePrefixSum(Vector); if(WaveGetLaneIndex() == MidLane) { - g_OutputVector.Store< vector >(0, Vector); + g_OutputVector.Store< vector >(0, Vector); } } #endif @@ -4392,7 +4392,7 @@ void MSMain(uint GID : SV_GroupIndex, Vector = WavePrefixProduct(Vector); if(WaveGetLaneIndex() == 2) { - g_OutputVector.Store< vector >(0, Vector); + g_OutputVector.Store< vector >(0, Vector); } } #endif @@ -4425,7 +4425,7 @@ void MSMain(uint GID : SV_GroupIndex, { // Lane 3 is the last lane in the mask that we care about. Store the // result from it. - g_OutputVector.Store< vector >(0, Vector); + g_OutputVector.Store< vector >(0, Vector); } } #endif @@ -4457,7 +4457,7 @@ void MSMain(uint GID : SV_GroupIndex, if(WaveGetLaneIndex() == 3) { // Lane 3 is the last lane in the mask. Store the result from it. - g_OutputVector.Store< vector >(0, Vector); + g_OutputVector.Store< vector >(0, Vector); } } #endif @@ -4488,7 +4488,7 @@ void MSMain(uint GID : SV_GroupIndex, if(WaveGetLaneIndex() == 3) { // Lane 3 is the last lane in the mask. Store the result from it. - g_OutputVector.Store< vector >(0, Vector); + g_OutputVector.Store< vector >(0, Vector); } } #endif @@ -4526,7 +4526,7 @@ void MSMain(uint GID : SV_GroupIndex, if(WaveGetLaneIndex() == 3) { // Lane 3 is the last lane in the mask. Store the result from it. - g_OutputVector.Store< vector >(0, Vector); + g_OutputVector.Store< vector >(0, Vector); } } #endif @@ -4567,7 +4567,7 @@ void MSMain(uint GID : SV_GroupIndex, if(WaveGetLaneIndex() == 3) { // Store result from lane 3 (last lane in mask) - g_OutputVector.Store< vector >(0, Vector); + g_OutputVector.Store< vector >(0, Vector); } } #endif @@ -4615,7 +4615,7 @@ void MSMain(uint GID : SV_GroupIndex, Mantissa = modf(Vector, Exponent); - g_OutputVector.Store< vector >(sizeof(OUT_TYPE) * NUM, Exponent); + g_OutputVector.Store< vector >(sizeof(OUT_TYPE) * NUM, Exponent); return Mantissa; } From cc2c4b17d5c6ade3e8384992f80ad0bee60cb062 Mon Sep 17 00:00:00 2001 From: Alex Sepkowski Date: Mon, 16 Mar 2026 18:02:41 -0700 Subject: [PATCH 24/34] Refactor: use dispatchMinPrecisionTest and HLK_MIN_PRECISION_TEST macro Move min precision IO_TYPE/IO_OUT_TYPE/MIN_PRECISION defines from getCompilerOptionsString into a dedicated dispatchMinPrecisionTest function that passes them via AdditionalCompilerOptions. This matches the existing pattern used by dispatchWaveOpTest and keeps the shared compiler options builder clean. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../clang/unittests/HLSLExec/LongVectors.cpp | 397 ++++++++++-------- 1 file changed, 213 insertions(+), 184 deletions(-) diff --git a/tools/clang/unittests/HLSLExec/LongVectors.cpp b/tools/clang/unittests/HLSLExec/LongVectors.cpp index d294ceacde..0444ed12a4 100644 --- a/tools/clang/unittests/HLSLExec/LongVectors.cpp +++ b/tools/clang/unittests/HLSLExec/LongVectors.cpp @@ -388,23 +388,6 @@ std::string getCompilerOptionsString( CompilerOptions << " -DOUT_TYPE=" << OutDataType.HLSLTypeString; - // For min precision types, buffer storage width is implementation-defined. - // We define IO_TYPE/IO_OUT_TYPE as full-precision equivalents and set - // MIN_PRECISION so shader templates use them for Load/Store operations. - // Both IO defines are needed because cast operations can have different input - // and output types (e.g. TYPE=min16float, IO_TYPE=float, OUT_TYPE=int, - // IO_OUT_TYPE=int). Non-min-precision tests are unaffected. - if (strcmp(getIOTypeString(OpDataType.HLSLTypeString), - OpDataType.HLSLTypeString) != 0 || - strcmp(getIOTypeString(OutDataType.HLSLTypeString), - OutDataType.HLSLTypeString) != 0) { - CompilerOptions << " -DMIN_PRECISION"; - CompilerOptions << " -DIO_TYPE=" - << getIOTypeString(OpDataType.HLSLTypeString); - CompilerOptions << " -DIO_OUT_TYPE=" - << getIOTypeString(OutDataType.HLSLTypeString); - } - CompilerOptions << " -DBASIC_OP_TYPE=0x" << std::hex << Operation.Arity; if (AdditionalOptions) @@ -1897,6 +1880,41 @@ void dispatchWaveOpTest(ID3D12Device *D3DDevice, bool VerboseLogging, } } +template +void dispatchMinPrecisionTest(ID3D12Device *D3DDevice, bool VerboseLogging, + size_t OverrideInputSize) { + static_assert(isMinPrecisionType(), + "dispatchMinPrecisionTest only for min precision types"); + + const std::vector InputVectorSizes = + getInputSizesToTest(OverrideInputSize); + + constexpr const Operation &Operation = getOperation(OP); + Op Op; + + // Min precision buffer storage width is implementation-defined, so we use + // full-precision types for Load/Store via IO_TYPE/IO_OUT_TYPE defines. + const DataType &OpDataType = getDataType(); + + for (size_t VectorSize : InputVectorSizes) { + std::vector> Inputs = + buildTestInputs(VectorSize, Operation.InputSets, Operation.Arity); + + auto Expected = ExpectedBuilder::buildExpected(Op, Inputs); + + using OutT = typename decltype(Expected)::value_type; + const DataType &OutDataType = getDataType(); + + const std::string AdditionalCompilerOptions = + std::string("-DMIN_PRECISION") + + " -DIO_TYPE=" + getIOTypeString(OpDataType.HLSLTypeString) + + " -DIO_OUT_TYPE=" + getIOTypeString(OutDataType.HLSLTypeString); + + runAndVerify(D3DDevice, VerboseLogging, Operation, Inputs, Expected, + Op.ValidationConfig, AdditionalCompilerOptions); + } +} + } // namespace LongVector using namespace LongVector; @@ -1905,6 +1923,9 @@ using namespace LongVector; #define HLK_TEST(Op, DataType) \ TEST_METHOD(Op##_##DataType) { runTest(); } +#define HLK_MIN_PRECISION_TEST(Op, DataType) \ + TEST_METHOD(Op##_##DataType) { runMinPrecisionTest(); } + #define HLK_WAVEOP_TEST(Op, DataType) \ TEST_METHOD(Op##_##DataType) { \ BEGIN_TEST_METHOD_PROPERTIES() \ @@ -2034,6 +2055,14 @@ class TestClassCommon { dispatchTest(D3DDevice, VerboseLogging, OverrideInputSize); } + template void runMinPrecisionTest() { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + + dispatchMinPrecisionTest(D3DDevice, VerboseLogging, + OverrideInputSize); + } + protected: CComPtr D3DDevice; @@ -2809,99 +2838,99 @@ class DxilConf_SM69_Vectorized_Core : public TestClassCommon { // ---- HLSLMin16Float_t (mirrors HLSLHalf_t) ---- // TernaryMath - HLK_TEST(Mad, HLSLMin16Float_t); + HLK_MIN_PRECISION_TEST(Mad, HLSLMin16Float_t); // BinaryMath - HLK_TEST(Add, HLSLMin16Float_t); - HLK_TEST(Subtract, HLSLMin16Float_t); - HLK_TEST(Multiply, HLSLMin16Float_t); - HLK_TEST(Divide, HLSLMin16Float_t); - HLK_TEST(Modulus, HLSLMin16Float_t); - HLK_TEST(Min, HLSLMin16Float_t); - HLK_TEST(Max, HLSLMin16Float_t); - HLK_TEST(Ldexp, HLSLMin16Float_t); + HLK_MIN_PRECISION_TEST(Add, HLSLMin16Float_t); + HLK_MIN_PRECISION_TEST(Subtract, HLSLMin16Float_t); + HLK_MIN_PRECISION_TEST(Multiply, HLSLMin16Float_t); + HLK_MIN_PRECISION_TEST(Divide, HLSLMin16Float_t); + HLK_MIN_PRECISION_TEST(Modulus, HLSLMin16Float_t); + HLK_MIN_PRECISION_TEST(Min, HLSLMin16Float_t); + HLK_MIN_PRECISION_TEST(Max, HLSLMin16Float_t); + HLK_MIN_PRECISION_TEST(Ldexp, HLSLMin16Float_t); // Saturate - HLK_TEST(Saturate, HLSLMin16Float_t); + HLK_MIN_PRECISION_TEST(Saturate, HLSLMin16Float_t); // Unary - HLK_TEST(Initialize, HLSLMin16Float_t); - HLK_TEST(ArrayOperator_StaticAccess, HLSLMin16Float_t); - HLK_TEST(ArrayOperator_DynamicAccess, HLSLMin16Float_t); - HLK_TEST(ShuffleVector, HLSLMin16Float_t); + HLK_MIN_PRECISION_TEST(Initialize, HLSLMin16Float_t); + HLK_MIN_PRECISION_TEST(ArrayOperator_StaticAccess, HLSLMin16Float_t); + HLK_MIN_PRECISION_TEST(ArrayOperator_DynamicAccess, HLSLMin16Float_t); + HLK_MIN_PRECISION_TEST(ShuffleVector, HLSLMin16Float_t); // Cast - HLK_TEST(CastToBool, HLSLMin16Float_t); - HLK_TEST(CastToInt16, HLSLMin16Float_t); - HLK_TEST(CastToInt32, HLSLMin16Float_t); - HLK_TEST(CastToInt64, HLSLMin16Float_t); - HLK_TEST(CastToUint16_FromFP, HLSLMin16Float_t); - HLK_TEST(CastToUint32_FromFP, HLSLMin16Float_t); - HLK_TEST(CastToUint64_FromFP, HLSLMin16Float_t); - HLK_TEST(CastToFloat16, HLSLMin16Float_t); - HLK_TEST(CastToFloat32, HLSLMin16Float_t); + HLK_MIN_PRECISION_TEST(CastToBool, HLSLMin16Float_t); + HLK_MIN_PRECISION_TEST(CastToInt16, HLSLMin16Float_t); + HLK_MIN_PRECISION_TEST(CastToInt32, HLSLMin16Float_t); + HLK_MIN_PRECISION_TEST(CastToInt64, HLSLMin16Float_t); + HLK_MIN_PRECISION_TEST(CastToUint16_FromFP, HLSLMin16Float_t); + HLK_MIN_PRECISION_TEST(CastToUint32_FromFP, HLSLMin16Float_t); + HLK_MIN_PRECISION_TEST(CastToUint64_FromFP, HLSLMin16Float_t); + HLK_MIN_PRECISION_TEST(CastToFloat16, HLSLMin16Float_t); + HLK_MIN_PRECISION_TEST(CastToFloat32, HLSLMin16Float_t); // Trigonometric - HLK_TEST(Acos, HLSLMin16Float_t); - HLK_TEST(Asin, HLSLMin16Float_t); - HLK_TEST(Atan, HLSLMin16Float_t); - HLK_TEST(Cos, HLSLMin16Float_t); - HLK_TEST(Cosh, HLSLMin16Float_t); - HLK_TEST(Sin, HLSLMin16Float_t); - HLK_TEST(Sinh, HLSLMin16Float_t); - HLK_TEST(Tan, HLSLMin16Float_t); - HLK_TEST(Tanh, HLSLMin16Float_t); + HLK_MIN_PRECISION_TEST(Acos, HLSLMin16Float_t); + HLK_MIN_PRECISION_TEST(Asin, HLSLMin16Float_t); + HLK_MIN_PRECISION_TEST(Atan, HLSLMin16Float_t); + HLK_MIN_PRECISION_TEST(Cos, HLSLMin16Float_t); + HLK_MIN_PRECISION_TEST(Cosh, HLSLMin16Float_t); + HLK_MIN_PRECISION_TEST(Sin, HLSLMin16Float_t); + HLK_MIN_PRECISION_TEST(Sinh, HLSLMin16Float_t); + HLK_MIN_PRECISION_TEST(Tan, HLSLMin16Float_t); + HLK_MIN_PRECISION_TEST(Tanh, HLSLMin16Float_t); // UnaryMath - HLK_TEST(Abs, HLSLMin16Float_t); - HLK_TEST(Ceil, HLSLMin16Float_t); - HLK_TEST(Exp, HLSLMin16Float_t); - HLK_TEST(Floor, HLSLMin16Float_t); - HLK_TEST(Frac, HLSLMin16Float_t); - HLK_TEST(Log, HLSLMin16Float_t); - HLK_TEST(Rcp, HLSLMin16Float_t); - HLK_TEST(Round, HLSLMin16Float_t); - HLK_TEST(Rsqrt, HLSLMin16Float_t); - HLK_TEST(Sign, HLSLMin16Float_t); - HLK_TEST(Sqrt, HLSLMin16Float_t); - HLK_TEST(Trunc, HLSLMin16Float_t); - HLK_TEST(Exp2, HLSLMin16Float_t); - HLK_TEST(Log10, HLSLMin16Float_t); - HLK_TEST(Log2, HLSLMin16Float_t); + HLK_MIN_PRECISION_TEST(Abs, HLSLMin16Float_t); + HLK_MIN_PRECISION_TEST(Ceil, HLSLMin16Float_t); + HLK_MIN_PRECISION_TEST(Exp, HLSLMin16Float_t); + HLK_MIN_PRECISION_TEST(Floor, HLSLMin16Float_t); + HLK_MIN_PRECISION_TEST(Frac, HLSLMin16Float_t); + HLK_MIN_PRECISION_TEST(Log, HLSLMin16Float_t); + HLK_MIN_PRECISION_TEST(Rcp, HLSLMin16Float_t); + HLK_MIN_PRECISION_TEST(Round, HLSLMin16Float_t); + HLK_MIN_PRECISION_TEST(Rsqrt, HLSLMin16Float_t); + HLK_MIN_PRECISION_TEST(Sign, HLSLMin16Float_t); + HLK_MIN_PRECISION_TEST(Sqrt, HLSLMin16Float_t); + HLK_MIN_PRECISION_TEST(Trunc, HLSLMin16Float_t); + HLK_MIN_PRECISION_TEST(Exp2, HLSLMin16Float_t); + HLK_MIN_PRECISION_TEST(Log10, HLSLMin16Float_t); + HLK_MIN_PRECISION_TEST(Log2, HLSLMin16Float_t); // BinaryComparison - HLK_TEST(LessThan, HLSLMin16Float_t); - HLK_TEST(LessEqual, HLSLMin16Float_t); - HLK_TEST(GreaterThan, HLSLMin16Float_t); - HLK_TEST(GreaterEqual, HLSLMin16Float_t); - HLK_TEST(Equal, HLSLMin16Float_t); - HLK_TEST(NotEqual, HLSLMin16Float_t); + HLK_MIN_PRECISION_TEST(LessThan, HLSLMin16Float_t); + HLK_MIN_PRECISION_TEST(LessEqual, HLSLMin16Float_t); + HLK_MIN_PRECISION_TEST(GreaterThan, HLSLMin16Float_t); + HLK_MIN_PRECISION_TEST(GreaterEqual, HLSLMin16Float_t); + HLK_MIN_PRECISION_TEST(Equal, HLSLMin16Float_t); + HLK_MIN_PRECISION_TEST(NotEqual, HLSLMin16Float_t); // Select - HLK_TEST(Select, HLSLMin16Float_t); + HLK_MIN_PRECISION_TEST(Select, HLSLMin16Float_t); // Dot - HLK_TEST(Dot, HLSLMin16Float_t); + HLK_MIN_PRECISION_TEST(Dot, HLSLMin16Float_t); // LoadAndStore - HLK_TEST(LoadAndStore_RDH_BAB_SRV, HLSLMin16Float_t); - HLK_TEST(LoadAndStore_RDH_BAB_UAV, HLSLMin16Float_t); - HLK_TEST(LoadAndStore_DT_BAB_SRV, HLSLMin16Float_t); - HLK_TEST(LoadAndStore_DT_BAB_UAV, HLSLMin16Float_t); - HLK_TEST(LoadAndStore_RD_BAB_SRV, HLSLMin16Float_t); - HLK_TEST(LoadAndStore_RD_BAB_UAV, HLSLMin16Float_t); - HLK_TEST(LoadAndStore_RDH_SB_SRV, HLSLMin16Float_t); - HLK_TEST(LoadAndStore_RDH_SB_UAV, HLSLMin16Float_t); - HLK_TEST(LoadAndStore_DT_SB_SRV, HLSLMin16Float_t); - HLK_TEST(LoadAndStore_DT_SB_UAV, HLSLMin16Float_t); - HLK_TEST(LoadAndStore_RD_SB_SRV, HLSLMin16Float_t); - HLK_TEST(LoadAndStore_RD_SB_UAV, HLSLMin16Float_t); + HLK_MIN_PRECISION_TEST(LoadAndStore_RDH_BAB_SRV, HLSLMin16Float_t); + HLK_MIN_PRECISION_TEST(LoadAndStore_RDH_BAB_UAV, HLSLMin16Float_t); + HLK_MIN_PRECISION_TEST(LoadAndStore_DT_BAB_SRV, HLSLMin16Float_t); + HLK_MIN_PRECISION_TEST(LoadAndStore_DT_BAB_UAV, HLSLMin16Float_t); + HLK_MIN_PRECISION_TEST(LoadAndStore_RD_BAB_SRV, HLSLMin16Float_t); + HLK_MIN_PRECISION_TEST(LoadAndStore_RD_BAB_UAV, HLSLMin16Float_t); + HLK_MIN_PRECISION_TEST(LoadAndStore_RDH_SB_SRV, HLSLMin16Float_t); + HLK_MIN_PRECISION_TEST(LoadAndStore_RDH_SB_UAV, HLSLMin16Float_t); + HLK_MIN_PRECISION_TEST(LoadAndStore_DT_SB_SRV, HLSLMin16Float_t); + HLK_MIN_PRECISION_TEST(LoadAndStore_DT_SB_UAV, HLSLMin16Float_t); + HLK_MIN_PRECISION_TEST(LoadAndStore_RD_SB_SRV, HLSLMin16Float_t); + HLK_MIN_PRECISION_TEST(LoadAndStore_RD_SB_UAV, HLSLMin16Float_t); // Derivative - HLK_TEST(DerivativeDdx, HLSLMin16Float_t); - HLK_TEST(DerivativeDdy, HLSLMin16Float_t); - HLK_TEST(DerivativeDdxFine, HLSLMin16Float_t); - HLK_TEST(DerivativeDdyFine, HLSLMin16Float_t); + HLK_MIN_PRECISION_TEST(DerivativeDdx, HLSLMin16Float_t); + HLK_MIN_PRECISION_TEST(DerivativeDdy, HLSLMin16Float_t); + HLK_MIN_PRECISION_TEST(DerivativeDdxFine, HLSLMin16Float_t); + HLK_MIN_PRECISION_TEST(DerivativeDdyFine, HLSLMin16Float_t); // Wave and Quad ops excluded: these intrinsics do not support min precision // types. The DXIL wave/quad shuffle operations operate on 32-bit or 64-bit @@ -2910,76 +2939,76 @@ class DxilConf_SM69_Vectorized_Core : public TestClassCommon { // ---- HLSLMin16Int_t (mirrors int16_t) ---- // TernaryMath - HLK_TEST(Mad, HLSLMin16Int_t); + HLK_MIN_PRECISION_TEST(Mad, HLSLMin16Int_t); // BinaryMath // Note: Divide and Modulus excluded — HLSL does not support signed integer // division on minimum-precision types. - HLK_TEST(Add, HLSLMin16Int_t); - HLK_TEST(Subtract, HLSLMin16Int_t); - HLK_TEST(Multiply, HLSLMin16Int_t); - HLK_TEST(Min, HLSLMin16Int_t); - HLK_TEST(Max, HLSLMin16Int_t); + HLK_MIN_PRECISION_TEST(Add, HLSLMin16Int_t); + HLK_MIN_PRECISION_TEST(Subtract, HLSLMin16Int_t); + HLK_MIN_PRECISION_TEST(Multiply, HLSLMin16Int_t); + HLK_MIN_PRECISION_TEST(Min, HLSLMin16Int_t); + HLK_MIN_PRECISION_TEST(Max, HLSLMin16Int_t); // Bitwise (logical and shift — bit-manipulation excluded) - HLK_TEST(And, HLSLMin16Int_t); - HLK_TEST(Or, HLSLMin16Int_t); - HLK_TEST(Xor, HLSLMin16Int_t); - HLK_TEST(LeftShift, HLSLMin16Int_t); - HLK_TEST(RightShift, HLSLMin16Int_t); + HLK_MIN_PRECISION_TEST(And, HLSLMin16Int_t); + HLK_MIN_PRECISION_TEST(Or, HLSLMin16Int_t); + HLK_MIN_PRECISION_TEST(Xor, HLSLMin16Int_t); + HLK_MIN_PRECISION_TEST(LeftShift, HLSLMin16Int_t); + HLK_MIN_PRECISION_TEST(RightShift, HLSLMin16Int_t); // UnaryMath - HLK_TEST(Abs, HLSLMin16Int_t); - HLK_TEST(Sign, HLSLMin16Int_t); + HLK_MIN_PRECISION_TEST(Abs, HLSLMin16Int_t); + HLK_MIN_PRECISION_TEST(Sign, HLSLMin16Int_t); // Unary - HLK_TEST(Initialize, HLSLMin16Int_t); - HLK_TEST(ArrayOperator_StaticAccess, HLSLMin16Int_t); - HLK_TEST(ArrayOperator_DynamicAccess, HLSLMin16Int_t); - HLK_TEST(ShuffleVector, HLSLMin16Int_t); + HLK_MIN_PRECISION_TEST(Initialize, HLSLMin16Int_t); + HLK_MIN_PRECISION_TEST(ArrayOperator_StaticAccess, HLSLMin16Int_t); + HLK_MIN_PRECISION_TEST(ArrayOperator_DynamicAccess, HLSLMin16Int_t); + HLK_MIN_PRECISION_TEST(ShuffleVector, HLSLMin16Int_t); // Cast - HLK_TEST(CastToBool, HLSLMin16Int_t); - HLK_TEST(CastToInt32, HLSLMin16Int_t); - HLK_TEST(CastToInt64, HLSLMin16Int_t); - HLK_TEST(CastToUint16, HLSLMin16Int_t); - HLK_TEST(CastToUint32, HLSLMin16Int_t); - HLK_TEST(CastToUint64, HLSLMin16Int_t); - HLK_TEST(CastToFloat16, HLSLMin16Int_t); - HLK_TEST(CastToFloat32, HLSLMin16Int_t); + HLK_MIN_PRECISION_TEST(CastToBool, HLSLMin16Int_t); + HLK_MIN_PRECISION_TEST(CastToInt32, HLSLMin16Int_t); + HLK_MIN_PRECISION_TEST(CastToInt64, HLSLMin16Int_t); + HLK_MIN_PRECISION_TEST(CastToUint16, HLSLMin16Int_t); + HLK_MIN_PRECISION_TEST(CastToUint32, HLSLMin16Int_t); + HLK_MIN_PRECISION_TEST(CastToUint64, HLSLMin16Int_t); + HLK_MIN_PRECISION_TEST(CastToFloat16, HLSLMin16Int_t); + HLK_MIN_PRECISION_TEST(CastToFloat32, HLSLMin16Int_t); // BinaryComparison - HLK_TEST(LessThan, HLSLMin16Int_t); - HLK_TEST(LessEqual, HLSLMin16Int_t); - HLK_TEST(GreaterThan, HLSLMin16Int_t); - HLK_TEST(GreaterEqual, HLSLMin16Int_t); - HLK_TEST(Equal, HLSLMin16Int_t); - HLK_TEST(NotEqual, HLSLMin16Int_t); + HLK_MIN_PRECISION_TEST(LessThan, HLSLMin16Int_t); + HLK_MIN_PRECISION_TEST(LessEqual, HLSLMin16Int_t); + HLK_MIN_PRECISION_TEST(GreaterThan, HLSLMin16Int_t); + HLK_MIN_PRECISION_TEST(GreaterEqual, HLSLMin16Int_t); + HLK_MIN_PRECISION_TEST(Equal, HLSLMin16Int_t); + HLK_MIN_PRECISION_TEST(NotEqual, HLSLMin16Int_t); // Select - HLK_TEST(Select, HLSLMin16Int_t); + HLK_MIN_PRECISION_TEST(Select, HLSLMin16Int_t); // Reduction - HLK_TEST(Any_Mixed, HLSLMin16Int_t); - HLK_TEST(Any_Zero, HLSLMin16Int_t); - HLK_TEST(Any_NoZero, HLSLMin16Int_t); - HLK_TEST(All_Mixed, HLSLMin16Int_t); - HLK_TEST(All_Zero, HLSLMin16Int_t); - HLK_TEST(All_NoZero, HLSLMin16Int_t); + HLK_MIN_PRECISION_TEST(Any_Mixed, HLSLMin16Int_t); + HLK_MIN_PRECISION_TEST(Any_Zero, HLSLMin16Int_t); + HLK_MIN_PRECISION_TEST(Any_NoZero, HLSLMin16Int_t); + HLK_MIN_PRECISION_TEST(All_Mixed, HLSLMin16Int_t); + HLK_MIN_PRECISION_TEST(All_Zero, HLSLMin16Int_t); + HLK_MIN_PRECISION_TEST(All_NoZero, HLSLMin16Int_t); // LoadAndStore - HLK_TEST(LoadAndStore_RDH_BAB_SRV, HLSLMin16Int_t); - HLK_TEST(LoadAndStore_RDH_BAB_UAV, HLSLMin16Int_t); - HLK_TEST(LoadAndStore_DT_BAB_SRV, HLSLMin16Int_t); - HLK_TEST(LoadAndStore_DT_BAB_UAV, HLSLMin16Int_t); - HLK_TEST(LoadAndStore_RD_BAB_SRV, HLSLMin16Int_t); - HLK_TEST(LoadAndStore_RD_BAB_UAV, HLSLMin16Int_t); - HLK_TEST(LoadAndStore_RDH_SB_SRV, HLSLMin16Int_t); - HLK_TEST(LoadAndStore_RDH_SB_UAV, HLSLMin16Int_t); - HLK_TEST(LoadAndStore_DT_SB_SRV, HLSLMin16Int_t); - HLK_TEST(LoadAndStore_DT_SB_UAV, HLSLMin16Int_t); - HLK_TEST(LoadAndStore_RD_SB_SRV, HLSLMin16Int_t); - HLK_TEST(LoadAndStore_RD_SB_UAV, HLSLMin16Int_t); + HLK_MIN_PRECISION_TEST(LoadAndStore_RDH_BAB_SRV, HLSLMin16Int_t); + HLK_MIN_PRECISION_TEST(LoadAndStore_RDH_BAB_UAV, HLSLMin16Int_t); + HLK_MIN_PRECISION_TEST(LoadAndStore_DT_BAB_SRV, HLSLMin16Int_t); + HLK_MIN_PRECISION_TEST(LoadAndStore_DT_BAB_UAV, HLSLMin16Int_t); + HLK_MIN_PRECISION_TEST(LoadAndStore_RD_BAB_SRV, HLSLMin16Int_t); + HLK_MIN_PRECISION_TEST(LoadAndStore_RD_BAB_UAV, HLSLMin16Int_t); + HLK_MIN_PRECISION_TEST(LoadAndStore_RDH_SB_SRV, HLSLMin16Int_t); + HLK_MIN_PRECISION_TEST(LoadAndStore_RDH_SB_UAV, HLSLMin16Int_t); + HLK_MIN_PRECISION_TEST(LoadAndStore_DT_SB_SRV, HLSLMin16Int_t); + HLK_MIN_PRECISION_TEST(LoadAndStore_DT_SB_UAV, HLSLMin16Int_t); + HLK_MIN_PRECISION_TEST(LoadAndStore_RD_SB_SRV, HLSLMin16Int_t); + HLK_MIN_PRECISION_TEST(LoadAndStore_RD_SB_UAV, HLSLMin16Int_t); // Wave and Quad ops excluded: these intrinsics do not support min precision // types. The DXIL wave/quad shuffle operations operate on 32-bit or 64-bit @@ -2988,68 +3017,68 @@ class DxilConf_SM69_Vectorized_Core : public TestClassCommon { // ---- HLSLMin16Uint_t (mirrors uint16_t) ---- // TernaryMath - HLK_TEST(Mad, HLSLMin16Uint_t); + HLK_MIN_PRECISION_TEST(Mad, HLSLMin16Uint_t); // BinaryMath - HLK_TEST(Add, HLSLMin16Uint_t); - HLK_TEST(Subtract, HLSLMin16Uint_t); - HLK_TEST(Multiply, HLSLMin16Uint_t); - HLK_TEST(Divide, HLSLMin16Uint_t); - HLK_TEST(Modulus, HLSLMin16Uint_t); - HLK_TEST(Min, HLSLMin16Uint_t); - HLK_TEST(Max, HLSLMin16Uint_t); + HLK_MIN_PRECISION_TEST(Add, HLSLMin16Uint_t); + HLK_MIN_PRECISION_TEST(Subtract, HLSLMin16Uint_t); + HLK_MIN_PRECISION_TEST(Multiply, HLSLMin16Uint_t); + HLK_MIN_PRECISION_TEST(Divide, HLSLMin16Uint_t); + HLK_MIN_PRECISION_TEST(Modulus, HLSLMin16Uint_t); + HLK_MIN_PRECISION_TEST(Min, HLSLMin16Uint_t); + HLK_MIN_PRECISION_TEST(Max, HLSLMin16Uint_t); // Bitwise (logical and shift — bit-manipulation excluded) - HLK_TEST(And, HLSLMin16Uint_t); - HLK_TEST(Or, HLSLMin16Uint_t); - HLK_TEST(Xor, HLSLMin16Uint_t); - HLK_TEST(LeftShift, HLSLMin16Uint_t); - HLK_TEST(RightShift, HLSLMin16Uint_t); + HLK_MIN_PRECISION_TEST(And, HLSLMin16Uint_t); + HLK_MIN_PRECISION_TEST(Or, HLSLMin16Uint_t); + HLK_MIN_PRECISION_TEST(Xor, HLSLMin16Uint_t); + HLK_MIN_PRECISION_TEST(LeftShift, HLSLMin16Uint_t); + HLK_MIN_PRECISION_TEST(RightShift, HLSLMin16Uint_t); // UnaryMath - HLK_TEST(Abs, HLSLMin16Uint_t); - HLK_TEST(Sign, HLSLMin16Uint_t); + HLK_MIN_PRECISION_TEST(Abs, HLSLMin16Uint_t); + HLK_MIN_PRECISION_TEST(Sign, HLSLMin16Uint_t); // Unary - HLK_TEST(Initialize, HLSLMin16Uint_t); - HLK_TEST(ArrayOperator_StaticAccess, HLSLMin16Uint_t); - HLK_TEST(ArrayOperator_DynamicAccess, HLSLMin16Uint_t); - HLK_TEST(ShuffleVector, HLSLMin16Uint_t); + HLK_MIN_PRECISION_TEST(Initialize, HLSLMin16Uint_t); + HLK_MIN_PRECISION_TEST(ArrayOperator_StaticAccess, HLSLMin16Uint_t); + HLK_MIN_PRECISION_TEST(ArrayOperator_DynamicAccess, HLSLMin16Uint_t); + HLK_MIN_PRECISION_TEST(ShuffleVector, HLSLMin16Uint_t); // Cast - HLK_TEST(CastToBool, HLSLMin16Uint_t); - HLK_TEST(CastToInt16, HLSLMin16Uint_t); - HLK_TEST(CastToInt32, HLSLMin16Uint_t); - HLK_TEST(CastToInt64, HLSLMin16Uint_t); - HLK_TEST(CastToUint32, HLSLMin16Uint_t); - HLK_TEST(CastToUint64, HLSLMin16Uint_t); - HLK_TEST(CastToFloat16, HLSLMin16Uint_t); - HLK_TEST(CastToFloat32, HLSLMin16Uint_t); + HLK_MIN_PRECISION_TEST(CastToBool, HLSLMin16Uint_t); + HLK_MIN_PRECISION_TEST(CastToInt16, HLSLMin16Uint_t); + HLK_MIN_PRECISION_TEST(CastToInt32, HLSLMin16Uint_t); + HLK_MIN_PRECISION_TEST(CastToInt64, HLSLMin16Uint_t); + HLK_MIN_PRECISION_TEST(CastToUint32, HLSLMin16Uint_t); + HLK_MIN_PRECISION_TEST(CastToUint64, HLSLMin16Uint_t); + HLK_MIN_PRECISION_TEST(CastToFloat16, HLSLMin16Uint_t); + HLK_MIN_PRECISION_TEST(CastToFloat32, HLSLMin16Uint_t); // BinaryComparison - HLK_TEST(LessThan, HLSLMin16Uint_t); - HLK_TEST(LessEqual, HLSLMin16Uint_t); - HLK_TEST(GreaterThan, HLSLMin16Uint_t); - HLK_TEST(GreaterEqual, HLSLMin16Uint_t); - HLK_TEST(Equal, HLSLMin16Uint_t); - HLK_TEST(NotEqual, HLSLMin16Uint_t); + HLK_MIN_PRECISION_TEST(LessThan, HLSLMin16Uint_t); + HLK_MIN_PRECISION_TEST(LessEqual, HLSLMin16Uint_t); + HLK_MIN_PRECISION_TEST(GreaterThan, HLSLMin16Uint_t); + HLK_MIN_PRECISION_TEST(GreaterEqual, HLSLMin16Uint_t); + HLK_MIN_PRECISION_TEST(Equal, HLSLMin16Uint_t); + HLK_MIN_PRECISION_TEST(NotEqual, HLSLMin16Uint_t); // Select - HLK_TEST(Select, HLSLMin16Uint_t); + HLK_MIN_PRECISION_TEST(Select, HLSLMin16Uint_t); // LoadAndStore - HLK_TEST(LoadAndStore_RDH_BAB_SRV, HLSLMin16Uint_t); - HLK_TEST(LoadAndStore_RDH_BAB_UAV, HLSLMin16Uint_t); - HLK_TEST(LoadAndStore_DT_BAB_SRV, HLSLMin16Uint_t); - HLK_TEST(LoadAndStore_DT_BAB_UAV, HLSLMin16Uint_t); - HLK_TEST(LoadAndStore_RD_BAB_SRV, HLSLMin16Uint_t); - HLK_TEST(LoadAndStore_RD_BAB_UAV, HLSLMin16Uint_t); - HLK_TEST(LoadAndStore_RDH_SB_SRV, HLSLMin16Uint_t); - HLK_TEST(LoadAndStore_RDH_SB_UAV, HLSLMin16Uint_t); - HLK_TEST(LoadAndStore_DT_SB_SRV, HLSLMin16Uint_t); - HLK_TEST(LoadAndStore_DT_SB_UAV, HLSLMin16Uint_t); - HLK_TEST(LoadAndStore_RD_SB_SRV, HLSLMin16Uint_t); - HLK_TEST(LoadAndStore_RD_SB_UAV, HLSLMin16Uint_t); + HLK_MIN_PRECISION_TEST(LoadAndStore_RDH_BAB_SRV, HLSLMin16Uint_t); + HLK_MIN_PRECISION_TEST(LoadAndStore_RDH_BAB_UAV, HLSLMin16Uint_t); + HLK_MIN_PRECISION_TEST(LoadAndStore_DT_BAB_SRV, HLSLMin16Uint_t); + HLK_MIN_PRECISION_TEST(LoadAndStore_DT_BAB_UAV, HLSLMin16Uint_t); + HLK_MIN_PRECISION_TEST(LoadAndStore_RD_BAB_SRV, HLSLMin16Uint_t); + HLK_MIN_PRECISION_TEST(LoadAndStore_RD_BAB_UAV, HLSLMin16Uint_t); + HLK_MIN_PRECISION_TEST(LoadAndStore_RDH_SB_SRV, HLSLMin16Uint_t); + HLK_MIN_PRECISION_TEST(LoadAndStore_RDH_SB_UAV, HLSLMin16Uint_t); + HLK_MIN_PRECISION_TEST(LoadAndStore_DT_SB_SRV, HLSLMin16Uint_t); + HLK_MIN_PRECISION_TEST(LoadAndStore_DT_SB_UAV, HLSLMin16Uint_t); + HLK_MIN_PRECISION_TEST(LoadAndStore_RD_SB_SRV, HLSLMin16Uint_t); + HLK_MIN_PRECISION_TEST(LoadAndStore_RD_SB_UAV, HLSLMin16Uint_t); // Wave and Quad ops excluded: these intrinsics do not support min precision // types. The DXIL wave/quad shuffle operations operate on 32-bit or 64-bit From 17c5a2e2bc1b62dfc88d94345ba3ec45fbee52aa Mon Sep 17 00:00:00 2001 From: Alex Sepkowski Date: Mon, 16 Mar 2026 18:06:31 -0700 Subject: [PATCH 25/34] Replace getIOTypeString with template-based type trait Use if constexpr on the C++ type instead of strcmp on HLSL type strings. Cleaner and resolved at compile time. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../clang/unittests/HLSLExec/LongVectors.cpp | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/tools/clang/unittests/HLSLExec/LongVectors.cpp b/tools/clang/unittests/HLSLExec/LongVectors.cpp index 0444ed12a4..e80f97375d 100644 --- a/tools/clang/unittests/HLSLExec/LongVectors.cpp +++ b/tools/clang/unittests/HLSLExec/LongVectors.cpp @@ -82,14 +82,15 @@ template constexpr bool isMinPrecisionType() { // hardware to use any precision >= the specified minimum, making buffer storage // width implementation-defined. We use full-precision types for buffer I/O to // ensure deterministic data layout regardless of the device's implementation. -const char *getIOTypeString(const char *HLSLType) { - if (strcmp(HLSLType, "min16float") == 0) +template const char *getIOTypeString() { + if constexpr (std::is_same_v) return "float"; - if (strcmp(HLSLType, "min16int") == 0) + else if constexpr (std::is_same_v) return "int"; - if (strcmp(HLSLType, "min16uint") == 0) + else if constexpr (std::is_same_v) return "uint"; - return HLSLType; + else + return getDataType().HLSLTypeString; } // @@ -1894,8 +1895,6 @@ void dispatchMinPrecisionTest(ID3D12Device *D3DDevice, bool VerboseLogging, // Min precision buffer storage width is implementation-defined, so we use // full-precision types for Load/Store via IO_TYPE/IO_OUT_TYPE defines. - const DataType &OpDataType = getDataType(); - for (size_t VectorSize : InputVectorSizes) { std::vector> Inputs = buildTestInputs(VectorSize, Operation.InputSets, Operation.Arity); @@ -1903,12 +1902,10 @@ void dispatchMinPrecisionTest(ID3D12Device *D3DDevice, bool VerboseLogging, auto Expected = ExpectedBuilder::buildExpected(Op, Inputs); using OutT = typename decltype(Expected)::value_type; - const DataType &OutDataType = getDataType(); const std::string AdditionalCompilerOptions = - std::string("-DMIN_PRECISION") + - " -DIO_TYPE=" + getIOTypeString(OpDataType.HLSLTypeString) + - " -DIO_OUT_TYPE=" + getIOTypeString(OutDataType.HLSLTypeString); + std::string("-DMIN_PRECISION") + " -DIO_TYPE=" + getIOTypeString() + + " -DIO_OUT_TYPE=" + getIOTypeString(); runAndVerify(D3DDevice, VerboseLogging, Operation, Inputs, Expected, Op.ValidationConfig, AdditionalCompilerOptions); From 0c39a8e829ab89287fcbaecd9bce3d270dce86f1 Mon Sep 17 00:00:00 2001 From: Alex Sepkowski Date: Mon, 16 Mar 2026 18:10:51 -0700 Subject: [PATCH 26/34] Move IO type string into DataType struct Add IOTypeString field to DataType and MIN_PRECISION_DATA_TYPE macro. Remove standalone getIOTypeString template function. The IO type mapping now lives alongside the other type metadata. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../clang/unittests/HLSLExec/LongVectors.cpp | 38 +++++++++---------- 1 file changed, 17 insertions(+), 21 deletions(-) diff --git a/tools/clang/unittests/HLSLExec/LongVectors.cpp b/tools/clang/unittests/HLSLExec/LongVectors.cpp index e80f97375d..3401994acc 100644 --- a/tools/clang/unittests/HLSLExec/LongVectors.cpp +++ b/tools/clang/unittests/HLSLExec/LongVectors.cpp @@ -39,6 +39,7 @@ struct DataType { const char *HLSLTypeString; bool Is16Bit; size_t HLSLSizeInBytes; + const char *IOTypeString; // Full-precision type for buffer I/O. }; template const DataType &getDataType() { @@ -47,7 +48,15 @@ template const DataType &getDataType() { #define DATA_TYPE(TYPE, HLSL_STRING, HLSL_SIZE) \ template <> const DataType &getDataType() { \ - static DataType DataType{HLSL_STRING, is16BitType(), HLSL_SIZE}; \ + static DataType DataType{HLSL_STRING, is16BitType(), HLSL_SIZE, \ + HLSL_STRING}; \ + return DataType; \ + } + +#define MIN_PRECISION_DATA_TYPE(TYPE, HLSL_STRING, HLSL_SIZE, IO_STRING) \ + template <> const DataType &getDataType() { \ + static DataType DataType{HLSL_STRING, is16BitType(), HLSL_SIZE, \ + IO_STRING}; \ return DataType; \ } @@ -59,13 +68,14 @@ DATA_TYPE(uint16_t, "uint16_t", 2) DATA_TYPE(uint32_t, "uint32_t", 4) DATA_TYPE(uint64_t, "uint64_t", 8) DATA_TYPE(HLSLHalf_t, "half", 2) -DATA_TYPE(HLSLMin16Float_t, "min16float", 4) -DATA_TYPE(HLSLMin16Int_t, "min16int", 4) -DATA_TYPE(HLSLMin16Uint_t, "min16uint", 4) +MIN_PRECISION_DATA_TYPE(HLSLMin16Float_t, "min16float", 4, "float") +MIN_PRECISION_DATA_TYPE(HLSLMin16Int_t, "min16int", 4, "int") +MIN_PRECISION_DATA_TYPE(HLSLMin16Uint_t, "min16uint", 4, "uint") DATA_TYPE(float, "float", 4) DATA_TYPE(double, "double", 8) #undef DATA_TYPE +#undef MIN_PRECISION_DATA_TYPE template constexpr bool isFloatingPointType() { return std::is_same_v || std::is_same_v || @@ -78,21 +88,6 @@ template constexpr bool isMinPrecisionType() { std::is_same_v; } -// Min precision types (min16float, min16int, min16uint) are hints that allow -// hardware to use any precision >= the specified minimum, making buffer storage -// width implementation-defined. We use full-precision types for buffer I/O to -// ensure deterministic data layout regardless of the device's implementation. -template const char *getIOTypeString() { - if constexpr (std::is_same_v) - return "float"; - else if constexpr (std::is_same_v) - return "int"; - else if constexpr (std::is_same_v) - return "uint"; - else - return getDataType().HLSLTypeString; -} - // // Operation Types // @@ -1904,8 +1899,9 @@ void dispatchMinPrecisionTest(ID3D12Device *D3DDevice, bool VerboseLogging, using OutT = typename decltype(Expected)::value_type; const std::string AdditionalCompilerOptions = - std::string("-DMIN_PRECISION") + " -DIO_TYPE=" + getIOTypeString() + - " -DIO_OUT_TYPE=" + getIOTypeString(); + std::string("-DMIN_PRECISION") + + " -DIO_TYPE=" + getDataType().IOTypeString + + " -DIO_OUT_TYPE=" + getDataType().IOTypeString; runAndVerify(D3DDevice, VerboseLogging, Operation, Inputs, Expected, Op.ValidationConfig, AdditionalCompilerOptions); From e97d4bd7ff47a59906b25bbe04963a0f7eba4618 Mon Sep 17 00:00:00 2001 From: Alex Sepkowski Date: Mon, 16 Mar 2026 18:16:40 -0700 Subject: [PATCH 27/34] Some cleanup --- tools/clang/unittests/HLSLExec/LongVectors.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tools/clang/unittests/HLSLExec/LongVectors.cpp b/tools/clang/unittests/HLSLExec/LongVectors.cpp index 3401994acc..130f9c9acb 100644 --- a/tools/clang/unittests/HLSLExec/LongVectors.cpp +++ b/tools/clang/unittests/HLSLExec/LongVectors.cpp @@ -68,11 +68,11 @@ DATA_TYPE(uint16_t, "uint16_t", 2) DATA_TYPE(uint32_t, "uint32_t", 4) DATA_TYPE(uint64_t, "uint64_t", 8) DATA_TYPE(HLSLHalf_t, "half", 2) +DATA_TYPE(float, "float", 4) +DATA_TYPE(double, "double", 8) MIN_PRECISION_DATA_TYPE(HLSLMin16Float_t, "min16float", 4, "float") MIN_PRECISION_DATA_TYPE(HLSLMin16Int_t, "min16int", 4, "int") MIN_PRECISION_DATA_TYPE(HLSLMin16Uint_t, "min16uint", 4, "uint") -DATA_TYPE(float, "float", 4) -DATA_TYPE(double, "double", 8) #undef DATA_TYPE #undef MIN_PRECISION_DATA_TYPE @@ -1879,8 +1879,6 @@ void dispatchWaveOpTest(ID3D12Device *D3DDevice, bool VerboseLogging, template void dispatchMinPrecisionTest(ID3D12Device *D3DDevice, bool VerboseLogging, size_t OverrideInputSize) { - static_assert(isMinPrecisionType(), - "dispatchMinPrecisionTest only for min precision types"); const std::vector InputVectorSizes = getInputSizesToTest(OverrideInputSize); From eb9c0e5a2ece2b9ccaed26a7bbdc68f57881d6dd Mon Sep 17 00:00:00 2001 From: Alex Sepkowski Date: Tue, 17 Mar 2026 13:43:23 -0700 Subject: [PATCH 28/34] Remove bit shift tests for min precision types Bit shifting is not supported for min precision data types (min16int, min16uint). Remove LeftShift/RightShift test entries and their associated BitShiftRhs input data sets. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- tools/clang/unittests/HLSLExec/LongVectorTestData.h | 2 -- tools/clang/unittests/HLSLExec/LongVectors.cpp | 6 ++---- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/tools/clang/unittests/HLSLExec/LongVectorTestData.h b/tools/clang/unittests/HLSLExec/LongVectorTestData.h index 0367d8b8ee..3a69ab68c2 100644 --- a/tools/clang/unittests/HLSLExec/LongVectorTestData.h +++ b/tools/clang/unittests/HLSLExec/LongVectorTestData.h @@ -656,7 +656,6 @@ BEGIN_INPUT_SETS(HLSLMin16Int_t) INPUT_SET(InputSet::Default1, -6, 1, 7, 3, 8, 4, -3, 8, 8, -2); INPUT_SET(InputSet::Default2, 5, -6, -3, -2, 9, 3, 1, -3, -7, 2); INPUT_SET(InputSet::Default3, -5, 6, 3, 2, -9, -3, -1, 3, 7, -2); -INPUT_SET(InputSet::BitShiftRhs, 1, 6, 3, 0, 9, 3, 12, 11, 11, 14); INPUT_SET(InputSet::Zero, 0); INPUT_SET(InputSet::NoZero, 1); INPUT_SET(InputSet::SelectCond, 0, 1); @@ -670,7 +669,6 @@ INPUT_SET(InputSet::Default1, 3, 199, 3, 200, 5, 10, 22, 8, 9, 10); INPUT_SET(InputSet::Default2, 2, 111, 3, 4, 5, 9, 21, 8, 9, 10); INPUT_SET(InputSet::Default3, 4, 112, 4, 5, 3, 7, 21, 1, 11, 9); INPUT_SET(InputSet::Zero, 0); -INPUT_SET(InputSet::BitShiftRhs, 1, 6, 3, 0, 9, 3, 11, 12, 12, 12); INPUT_SET(InputSet::SelectCond, 0, 1); INPUT_SET(InputSet::AllOnes, 1); END_INPUT_SETS() diff --git a/tools/clang/unittests/HLSLExec/LongVectors.cpp b/tools/clang/unittests/HLSLExec/LongVectors.cpp index 130f9c9acb..4e7ac033e0 100644 --- a/tools/clang/unittests/HLSLExec/LongVectors.cpp +++ b/tools/clang/unittests/HLSLExec/LongVectors.cpp @@ -2945,8 +2945,7 @@ class DxilConf_SM69_Vectorized_Core : public TestClassCommon { HLK_MIN_PRECISION_TEST(And, HLSLMin16Int_t); HLK_MIN_PRECISION_TEST(Or, HLSLMin16Int_t); HLK_MIN_PRECISION_TEST(Xor, HLSLMin16Int_t); - HLK_MIN_PRECISION_TEST(LeftShift, HLSLMin16Int_t); - HLK_MIN_PRECISION_TEST(RightShift, HLSLMin16Int_t); + // UnaryMath HLK_MIN_PRECISION_TEST(Abs, HLSLMin16Int_t); @@ -3023,8 +3022,7 @@ class DxilConf_SM69_Vectorized_Core : public TestClassCommon { HLK_MIN_PRECISION_TEST(And, HLSLMin16Uint_t); HLK_MIN_PRECISION_TEST(Or, HLSLMin16Uint_t); HLK_MIN_PRECISION_TEST(Xor, HLSLMin16Uint_t); - HLK_MIN_PRECISION_TEST(LeftShift, HLSLMin16Uint_t); - HLK_MIN_PRECISION_TEST(RightShift, HLSLMin16Uint_t); + // UnaryMath HLK_MIN_PRECISION_TEST(Abs, HLSLMin16Uint_t); From 354fe618b4eb00db072896dcfc9da9d05a73627b Mon Sep 17 00:00:00 2001 From: Alex Sepkowski Date: Tue, 17 Mar 2026 14:09:38 -0700 Subject: [PATCH 29/34] Add wave and quad op tests for min precision types Add wave and quad op test entries for min16float, min16int, and min16uint long vector types, mirroring the ops supported by their 16-bit equivalents. Test entries added: - Min16Float: 12 wave ops + 4 quad ops - Min16Int: 15 wave ops (includes WaveMultiPrefixBit*) + 4 quad ops - Min16Uint: 15 wave ops (includes WaveMultiPrefixBit*) + 4 quad ops Infrastructure changes: - ShaderOpArith.xml: Add #ifdef MIN_PRECISION guards to 7 wave op Store calls so they use IO_OUT_TYPE for min precision buffer I/O while keeping OUT_TYPE for standard types (no DXIL change for existing tests) - LongVectors.cpp: Add dispatchMinPrecisionWaveOpTest combining WaveSize with IO_TYPE/MIN_PRECISION compiler options, and HLK_MIN_PRECISION_WAVEOP_TEST macro - LongVectorTestData.h: Add WaveMultiPrefixBitwise input sets for HLSLMin16Int_t and HLSLMin16Uint_t - Fix ambiguous operator& in waveMultiPrefixBitOr for min precision types Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../unittests/HLSLExec/LongVectorTestData.h | 4 + .../clang/unittests/HLSLExec/LongVectors.cpp | 143 ++++++++++++++++-- .../unittests/HLSLExec/ShaderOpArith.xml | 28 ++++ 3 files changed, 165 insertions(+), 10 deletions(-) diff --git a/tools/clang/unittests/HLSLExec/LongVectorTestData.h b/tools/clang/unittests/HLSLExec/LongVectorTestData.h index 3a69ab68c2..84affce45f 100644 --- a/tools/clang/unittests/HLSLExec/LongVectorTestData.h +++ b/tools/clang/unittests/HLSLExec/LongVectorTestData.h @@ -660,6 +660,8 @@ INPUT_SET(InputSet::Zero, 0); INPUT_SET(InputSet::NoZero, 1); INPUT_SET(InputSet::SelectCond, 0, 1); INPUT_SET(InputSet::AllOnes, 1); +INPUT_SET(InputSet::WaveMultiPrefixBitwise, 0x0, 0x1, 0x3, 0x4, 0x10, 0x12, 0xF, + -1); END_INPUT_SETS() // Values constrained to uint16 range. Kept small so that multiply, mad, @@ -671,6 +673,8 @@ INPUT_SET(InputSet::Default3, 4, 112, 4, 5, 3, 7, 21, 1, 11, 9); INPUT_SET(InputSet::Zero, 0); INPUT_SET(InputSet::SelectCond, 0, 1); INPUT_SET(InputSet::AllOnes, 1); +INPUT_SET(InputSet::WaveMultiPrefixBitwise, 0x0, 0x1, 0x3, 0x4, 0x10, 0x12, 0xF, + std::numeric_limits::max()); END_INPUT_SETS() #undef BEGIN_INPUT_SETS diff --git a/tools/clang/unittests/HLSLExec/LongVectors.cpp b/tools/clang/unittests/HLSLExec/LongVectors.cpp index 4e7ac033e0..75b98c0ffd 100644 --- a/tools/clang/unittests/HLSLExec/LongVectors.cpp +++ b/tools/clang/unittests/HLSLExec/LongVectors.cpp @@ -1566,7 +1566,7 @@ WAVE_OP(OpType::WaveMultiPrefixBitOr, waveMultiPrefixBitOr(A, WaveSize)); template T waveMultiPrefixBitOr(T A, UINT) { // All lanes in the group mask clear the second LSB. - return static_cast(A & ~static_cast(0x2)); + return static_cast(A & static_cast(~static_cast(0x2))); } template @@ -1906,6 +1906,40 @@ void dispatchMinPrecisionTest(ID3D12Device *D3DDevice, bool VerboseLogging, } } +template +void dispatchMinPrecisionWaveOpTest(ID3D12Device *D3DDevice, + bool VerboseLogging, + size_t OverrideInputSize, UINT WaveSize) { + + const std::vector InputVectorSizes = + getInputSizesToTest(OverrideInputSize); + + constexpr const Operation &Operation = getOperation(OP); + Op Op; + + // Min precision buffer storage width is implementation-defined, so we use + // full-precision types for Load/Store via IO_TYPE/IO_OUT_TYPE defines. + for (size_t VectorSize : InputVectorSizes) { + std::vector> Inputs = + buildTestInputs(VectorSize, Operation.InputSets, Operation.Arity); + + auto Expected = + ExpectedBuilder::buildExpected(Op, Inputs, WaveSize); + + using OutT = typename decltype(Expected)::value_type; + + const std::string AdditionalCompilerOptions = + std::string("-DMIN_PRECISION") + + " -DIO_TYPE=" + getDataType().IOTypeString + + " -DIO_OUT_TYPE=" + getDataType().IOTypeString + + " -DWAVE_SIZE=" + std::to_string(WaveSize) + + " -DNUMTHREADS_XYZ=" + std::to_string(WaveSize) + ",1,1 "; + + runAndVerify(D3DDevice, VerboseLogging, Operation, Inputs, Expected, + Op.ValidationConfig, AdditionalCompilerOptions); + } +} + } // namespace LongVector using namespace LongVector; @@ -1927,6 +1961,16 @@ using namespace LongVector; runWaveOpTest(); \ } +#define HLK_MIN_PRECISION_WAVEOP_TEST(Op, DataType) \ + TEST_METHOD(Op##_##DataType) { \ + BEGIN_TEST_METHOD_PROPERTIES() \ + TEST_METHOD_PROPERTY( \ + "Kits.Specification", \ + "Device.Graphics.D3D12.DXILCore.ShaderModel69.CoreRequirement") \ + END_TEST_METHOD_PROPERTIES() \ + runMinPrecisionWaveOpTest(); \ + } + class TestClassCommon { public: bool setupClass() { @@ -2054,6 +2098,31 @@ class TestClassCommon { OverrideInputSize); } + template void runMinPrecisionWaveOpTest() { + WEX::TestExecution::SetVerifyOutput VerifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + + UINT WaveSize = 0; + + if (OverrideWaveLaneCount > 0) { + WaveSize = OverrideWaveLaneCount; + hlsl_test::LogCommentFmt( + L"Using overridden WaveLaneCount of %d for this test.", WaveSize); + } else { + D3D12_FEATURE_DATA_D3D12_OPTIONS1 WaveOpts; + VERIFY_SUCCEEDED(D3DDevice->CheckFeatureSupport( + D3D12_FEATURE_D3D12_OPTIONS1, &WaveOpts, sizeof(WaveOpts))); + + WaveSize = WaveOpts.WaveLaneCountMin; + } + + DXASSERT_NOMSG(WaveSize > 0); + DXASSERT((WaveSize & (WaveSize - 1)) == 0, "must be a power of 2"); + + dispatchMinPrecisionWaveOpTest(D3DDevice, VerboseLogging, + OverrideInputSize, WaveSize); + } + protected: CComPtr D3DDevice; @@ -2923,9 +2992,25 @@ class DxilConf_SM69_Vectorized_Core : public TestClassCommon { HLK_MIN_PRECISION_TEST(DerivativeDdxFine, HLSLMin16Float_t); HLK_MIN_PRECISION_TEST(DerivativeDdyFine, HLSLMin16Float_t); - // Wave and Quad ops excluded: these intrinsics do not support min precision - // types. The DXIL wave/quad shuffle operations operate on 32-bit or 64-bit - // register slots and do not handle 16-bit min precision payloads. + // Quad + HLK_MIN_PRECISION_TEST(QuadReadLaneAt, HLSLMin16Float_t); + HLK_MIN_PRECISION_TEST(QuadReadAcrossX, HLSLMin16Float_t); + HLK_MIN_PRECISION_TEST(QuadReadAcrossY, HLSLMin16Float_t); + HLK_MIN_PRECISION_TEST(QuadReadAcrossDiagonal, HLSLMin16Float_t); + + // Wave + HLK_MIN_PRECISION_WAVEOP_TEST(WaveActiveSum, HLSLMin16Float_t); + HLK_MIN_PRECISION_WAVEOP_TEST(WaveActiveMin, HLSLMin16Float_t); + HLK_MIN_PRECISION_WAVEOP_TEST(WaveActiveMax, HLSLMin16Float_t); + HLK_MIN_PRECISION_WAVEOP_TEST(WaveActiveProduct, HLSLMin16Float_t); + HLK_MIN_PRECISION_WAVEOP_TEST(WaveActiveAllEqual, HLSLMin16Float_t); + HLK_MIN_PRECISION_WAVEOP_TEST(WaveReadLaneAt, HLSLMin16Float_t); + HLK_MIN_PRECISION_WAVEOP_TEST(WaveReadLaneFirst, HLSLMin16Float_t); + HLK_MIN_PRECISION_WAVEOP_TEST(WavePrefixSum, HLSLMin16Float_t); + HLK_MIN_PRECISION_WAVEOP_TEST(WavePrefixProduct, HLSLMin16Float_t); + HLK_MIN_PRECISION_WAVEOP_TEST(WaveMultiPrefixSum, HLSLMin16Float_t); + HLK_MIN_PRECISION_WAVEOP_TEST(WaveMultiPrefixProduct, HLSLMin16Float_t); + HLK_MIN_PRECISION_WAVEOP_TEST(WaveMatch, HLSLMin16Float_t); // ---- HLSLMin16Int_t (mirrors int16_t) ---- @@ -3000,9 +3085,28 @@ class DxilConf_SM69_Vectorized_Core : public TestClassCommon { HLK_MIN_PRECISION_TEST(LoadAndStore_RD_SB_SRV, HLSLMin16Int_t); HLK_MIN_PRECISION_TEST(LoadAndStore_RD_SB_UAV, HLSLMin16Int_t); - // Wave and Quad ops excluded: these intrinsics do not support min precision - // types. The DXIL wave/quad shuffle operations operate on 32-bit or 64-bit - // register slots and do not handle 16-bit min precision payloads. + // Quad + HLK_MIN_PRECISION_TEST(QuadReadLaneAt, HLSLMin16Int_t); + HLK_MIN_PRECISION_TEST(QuadReadAcrossX, HLSLMin16Int_t); + HLK_MIN_PRECISION_TEST(QuadReadAcrossY, HLSLMin16Int_t); + HLK_MIN_PRECISION_TEST(QuadReadAcrossDiagonal, HLSLMin16Int_t); + + // Wave + HLK_MIN_PRECISION_WAVEOP_TEST(WaveActiveSum, HLSLMin16Int_t); + HLK_MIN_PRECISION_WAVEOP_TEST(WaveActiveMin, HLSLMin16Int_t); + HLK_MIN_PRECISION_WAVEOP_TEST(WaveActiveMax, HLSLMin16Int_t); + HLK_MIN_PRECISION_WAVEOP_TEST(WaveActiveProduct, HLSLMin16Int_t); + HLK_MIN_PRECISION_WAVEOP_TEST(WaveActiveAllEqual, HLSLMin16Int_t); + HLK_MIN_PRECISION_WAVEOP_TEST(WaveReadLaneAt, HLSLMin16Int_t); + HLK_MIN_PRECISION_WAVEOP_TEST(WaveReadLaneFirst, HLSLMin16Int_t); + HLK_MIN_PRECISION_WAVEOP_TEST(WavePrefixSum, HLSLMin16Int_t); + HLK_MIN_PRECISION_WAVEOP_TEST(WavePrefixProduct, HLSLMin16Int_t); + HLK_MIN_PRECISION_WAVEOP_TEST(WaveMultiPrefixSum, HLSLMin16Int_t); + HLK_MIN_PRECISION_WAVEOP_TEST(WaveMultiPrefixProduct, HLSLMin16Int_t); + HLK_MIN_PRECISION_WAVEOP_TEST(WaveMultiPrefixBitAnd, HLSLMin16Int_t); + HLK_MIN_PRECISION_WAVEOP_TEST(WaveMultiPrefixBitOr, HLSLMin16Int_t); + HLK_MIN_PRECISION_WAVEOP_TEST(WaveMultiPrefixBitXor, HLSLMin16Int_t); + HLK_MIN_PRECISION_WAVEOP_TEST(WaveMatch, HLSLMin16Int_t); // ---- HLSLMin16Uint_t (mirrors uint16_t) ---- @@ -3069,9 +3173,28 @@ class DxilConf_SM69_Vectorized_Core : public TestClassCommon { HLK_MIN_PRECISION_TEST(LoadAndStore_RD_SB_SRV, HLSLMin16Uint_t); HLK_MIN_PRECISION_TEST(LoadAndStore_RD_SB_UAV, HLSLMin16Uint_t); - // Wave and Quad ops excluded: these intrinsics do not support min precision - // types. The DXIL wave/quad shuffle operations operate on 32-bit or 64-bit - // register slots and do not handle 16-bit min precision payloads. + // Quad + HLK_MIN_PRECISION_TEST(QuadReadLaneAt, HLSLMin16Uint_t); + HLK_MIN_PRECISION_TEST(QuadReadAcrossX, HLSLMin16Uint_t); + HLK_MIN_PRECISION_TEST(QuadReadAcrossY, HLSLMin16Uint_t); + HLK_MIN_PRECISION_TEST(QuadReadAcrossDiagonal, HLSLMin16Uint_t); + + // Wave + HLK_MIN_PRECISION_WAVEOP_TEST(WaveActiveSum, HLSLMin16Uint_t); + HLK_MIN_PRECISION_WAVEOP_TEST(WaveActiveMin, HLSLMin16Uint_t); + HLK_MIN_PRECISION_WAVEOP_TEST(WaveActiveMax, HLSLMin16Uint_t); + HLK_MIN_PRECISION_WAVEOP_TEST(WaveActiveProduct, HLSLMin16Uint_t); + HLK_MIN_PRECISION_WAVEOP_TEST(WaveActiveAllEqual, HLSLMin16Uint_t); + HLK_MIN_PRECISION_WAVEOP_TEST(WaveReadLaneAt, HLSLMin16Uint_t); + HLK_MIN_PRECISION_WAVEOP_TEST(WaveReadLaneFirst, HLSLMin16Uint_t); + HLK_MIN_PRECISION_WAVEOP_TEST(WavePrefixSum, HLSLMin16Uint_t); + HLK_MIN_PRECISION_WAVEOP_TEST(WavePrefixProduct, HLSLMin16Uint_t); + HLK_MIN_PRECISION_WAVEOP_TEST(WaveMultiPrefixSum, HLSLMin16Uint_t); + HLK_MIN_PRECISION_WAVEOP_TEST(WaveMultiPrefixProduct, HLSLMin16Uint_t); + HLK_MIN_PRECISION_WAVEOP_TEST(WaveMultiPrefixBitAnd, HLSLMin16Uint_t); + HLK_MIN_PRECISION_WAVEOP_TEST(WaveMultiPrefixBitOr, HLSLMin16Uint_t); + HLK_MIN_PRECISION_WAVEOP_TEST(WaveMultiPrefixBitXor, HLSLMin16Uint_t); + HLK_MIN_PRECISION_WAVEOP_TEST(WaveMatch, HLSLMin16Uint_t); }; #define HLK_TEST_DOUBLE(Op, DataType) \ diff --git a/tools/clang/unittests/HLSLExec/ShaderOpArith.xml b/tools/clang/unittests/HLSLExec/ShaderOpArith.xml index 694eda77f1..31a09ee57d 100644 --- a/tools/clang/unittests/HLSLExec/ShaderOpArith.xml +++ b/tools/clang/unittests/HLSLExec/ShaderOpArith.xml @@ -4381,7 +4381,11 @@ void MSMain(uint GID : SV_GroupIndex, Vector = WavePrefixSum(Vector); if(WaveGetLaneIndex() == MidLane) { + #ifdef MIN_PRECISION + g_OutputVector.Store< vector >(0, Vector); + #else g_OutputVector.Store< vector >(0, Vector); + #endif } } #endif @@ -4392,7 +4396,11 @@ void MSMain(uint GID : SV_GroupIndex, Vector = WavePrefixProduct(Vector); if(WaveGetLaneIndex() == 2) { + #ifdef MIN_PRECISION + g_OutputVector.Store< vector >(0, Vector); + #else g_OutputVector.Store< vector >(0, Vector); + #endif } } #endif @@ -4425,7 +4433,11 @@ void MSMain(uint GID : SV_GroupIndex, { // Lane 3 is the last lane in the mask that we care about. Store the // result from it. + #ifdef MIN_PRECISION + g_OutputVector.Store< vector >(0, Vector); + #else g_OutputVector.Store< vector >(0, Vector); + #endif } } #endif @@ -4457,7 +4469,11 @@ void MSMain(uint GID : SV_GroupIndex, if(WaveGetLaneIndex() == 3) { // Lane 3 is the last lane in the mask. Store the result from it. + #ifdef MIN_PRECISION + g_OutputVector.Store< vector >(0, Vector); + #else g_OutputVector.Store< vector >(0, Vector); + #endif } } #endif @@ -4488,7 +4504,11 @@ void MSMain(uint GID : SV_GroupIndex, if(WaveGetLaneIndex() == 3) { // Lane 3 is the last lane in the mask. Store the result from it. + #ifdef MIN_PRECISION + g_OutputVector.Store< vector >(0, Vector); + #else g_OutputVector.Store< vector >(0, Vector); + #endif } } #endif @@ -4526,7 +4546,11 @@ void MSMain(uint GID : SV_GroupIndex, if(WaveGetLaneIndex() == 3) { // Lane 3 is the last lane in the mask. Store the result from it. + #ifdef MIN_PRECISION + g_OutputVector.Store< vector >(0, Vector); + #else g_OutputVector.Store< vector >(0, Vector); + #endif } } #endif @@ -4567,7 +4591,11 @@ void MSMain(uint GID : SV_GroupIndex, if(WaveGetLaneIndex() == 3) { // Store result from lane 3 (last lane in mask) + #ifdef MIN_PRECISION + g_OutputVector.Store< vector >(0, Vector); + #else g_OutputVector.Store< vector >(0, Vector); + #endif } } #endif From f07f7b28c210fd849fd6f872c5eddb92c536a122 Mon Sep 17 00:00:00 2001 From: Alex Sepkowski Date: Tue, 17 Mar 2026 14:41:35 -0700 Subject: [PATCH 30/34] Add comment explaining extra static_cast in waveMultiPrefixBitOr Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- tools/clang/unittests/HLSLExec/LongVectors.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/clang/unittests/HLSLExec/LongVectors.cpp b/tools/clang/unittests/HLSLExec/LongVectors.cpp index 75b98c0ffd..eb66ed3cc8 100644 --- a/tools/clang/unittests/HLSLExec/LongVectors.cpp +++ b/tools/clang/unittests/HLSLExec/LongVectors.cpp @@ -1566,6 +1566,8 @@ WAVE_OP(OpType::WaveMultiPrefixBitOr, waveMultiPrefixBitOr(A, WaveSize)); template T waveMultiPrefixBitOr(T A, UINT) { // All lanes in the group mask clear the second LSB. + // Extra static_cast around ~ needed because bitwise NOT promotes min + // precision wrapper types to int32/uint32, making operator& ambiguous. return static_cast(A & static_cast(~static_cast(0x2))); } From 212db3104ad79c5f14257b71e7ff60a24673c31b Mon Sep 17 00:00:00 2001 From: Alex Sepkowski Date: Tue, 17 Mar 2026 14:46:33 -0700 Subject: [PATCH 31/34] Rename IO_TYPE/IO_OUT_TYPE to BUFFER_TYPE/BUFFER_OUT_TYPE Rename the shader defines and C++ field to better communicate their purpose: these specify the full-precision buffer storage type for Load/Store operations, not a generic I/O type. - IO_TYPE -> BUFFER_TYPE - IO_OUT_TYPE -> BUFFER_OUT_TYPE - IOTypeString -> BufferTypeString Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../clang/unittests/HLSLExec/LongVectors.cpp | 14 +-- .../unittests/HLSLExec/ShaderOpArith.xml | 92 +++++++++---------- 2 files changed, 53 insertions(+), 53 deletions(-) diff --git a/tools/clang/unittests/HLSLExec/LongVectors.cpp b/tools/clang/unittests/HLSLExec/LongVectors.cpp index eb66ed3cc8..441955ecb6 100644 --- a/tools/clang/unittests/HLSLExec/LongVectors.cpp +++ b/tools/clang/unittests/HLSLExec/LongVectors.cpp @@ -39,7 +39,7 @@ struct DataType { const char *HLSLTypeString; bool Is16Bit; size_t HLSLSizeInBytes; - const char *IOTypeString; // Full-precision type for buffer I/O. + const char *BufferTypeString; // Full-precision type for buffer I/O. }; template const DataType &getDataType() { @@ -1889,7 +1889,7 @@ void dispatchMinPrecisionTest(ID3D12Device *D3DDevice, bool VerboseLogging, Op Op; // Min precision buffer storage width is implementation-defined, so we use - // full-precision types for Load/Store via IO_TYPE/IO_OUT_TYPE defines. + // full-precision types for Load/Store via BUFFER_TYPE/BUFFER_OUT_TYPE defines. for (size_t VectorSize : InputVectorSizes) { std::vector> Inputs = buildTestInputs(VectorSize, Operation.InputSets, Operation.Arity); @@ -1900,8 +1900,8 @@ void dispatchMinPrecisionTest(ID3D12Device *D3DDevice, bool VerboseLogging, const std::string AdditionalCompilerOptions = std::string("-DMIN_PRECISION") + - " -DIO_TYPE=" + getDataType().IOTypeString + - " -DIO_OUT_TYPE=" + getDataType().IOTypeString; + " -DBUFFER_TYPE=" + getDataType().BufferTypeString + + " -DBUFFER_OUT_TYPE=" + getDataType().BufferTypeString; runAndVerify(D3DDevice, VerboseLogging, Operation, Inputs, Expected, Op.ValidationConfig, AdditionalCompilerOptions); @@ -1920,7 +1920,7 @@ void dispatchMinPrecisionWaveOpTest(ID3D12Device *D3DDevice, Op Op; // Min precision buffer storage width is implementation-defined, so we use - // full-precision types for Load/Store via IO_TYPE/IO_OUT_TYPE defines. + // full-precision types for Load/Store via BUFFER_TYPE/BUFFER_OUT_TYPE defines. for (size_t VectorSize : InputVectorSizes) { std::vector> Inputs = buildTestInputs(VectorSize, Operation.InputSets, Operation.Arity); @@ -1932,8 +1932,8 @@ void dispatchMinPrecisionWaveOpTest(ID3D12Device *D3DDevice, const std::string AdditionalCompilerOptions = std::string("-DMIN_PRECISION") + - " -DIO_TYPE=" + getDataType().IOTypeString + - " -DIO_OUT_TYPE=" + getDataType().IOTypeString + + " -DBUFFER_TYPE=" + getDataType().BufferTypeString + + " -DBUFFER_OUT_TYPE=" + getDataType().BufferTypeString + " -DWAVE_SIZE=" + std::to_string(WaveSize) + " -DNUMTHREADS_XYZ=" + std::to_string(WaveSize) + ",1,1 "; diff --git a/tools/clang/unittests/HLSLExec/ShaderOpArith.xml b/tools/clang/unittests/HLSLExec/ShaderOpArith.xml index 31a09ee57d..0677d4d16c 100644 --- a/tools/clang/unittests/HLSLExec/ShaderOpArith.xml +++ b/tools/clang/unittests/HLSLExec/ShaderOpArith.xml @@ -3797,7 +3797,7 @@ void MSMain(uint GID : SV_GroupIndex, #if USE_STRUCTURED_BUFFER struct SLongVec { #ifdef MIN_PRECISION - vector data; + vector data; #else vector data; #endif @@ -3814,14 +3814,14 @@ void MSMain(uint GID : SV_GroupIndex, #if USE_STRUCTURED_BUFFER #ifdef MIN_PRECISION vector temp = (vector)InputVector[0].data; - OutputVector[0].data = (vector)temp; + OutputVector[0].data = (vector)temp; #else OutputVector[0].data = InputVector[0].data; #endif #else #ifdef MIN_PRECISION - vector Input = (vector)InputVector.Load< vector >(0); - OutputVector.Store< vector >(0, (vector)Input); + vector Input = (vector)InputVector.Load< vector >(0); + OutputVector.Store< vector >(0, (vector)Input); #else vector Input = InputVector.Load< vector >(0); OutputVector.Store< vector >(0, Input); @@ -3850,7 +3850,7 @@ void MSMain(uint GID : SV_GroupIndex, #if USE_STRUCTURED_BUFFER struct SLongVec { #ifdef MIN_PRECISION - vector data; + vector data; #else vector data; #endif @@ -3867,14 +3867,14 @@ void MSMain(uint GID : SV_GroupIndex, #if USE_STRUCTURED_BUFFER #ifdef MIN_PRECISION vector temp = (vector)InputVector[0].data; - OutputVector[0].data = (vector)temp; + OutputVector[0].data = (vector)temp; #else OutputVector[0].data = InputVector[0].data; #endif #else #ifdef MIN_PRECISION - vector Input = (vector)InputVector.Load< vector >(0); - OutputVector.Store< vector >(0, (vector)Input); + vector Input = (vector)InputVector.Load< vector >(0); + OutputVector.Store< vector >(0, (vector)Input); #else vector Input = InputVector.Load< vector >(0); OutputVector.Store< vector >(0, Input); @@ -3911,7 +3911,7 @@ void MSMain(uint GID : SV_GroupIndex, #if USE_STRUCTURED_BUFFER struct SLongVec { #ifdef MIN_PRECISION - vector data; + vector data; #else vector data; #endif @@ -3930,14 +3930,14 @@ void MSMain(uint GID : SV_GroupIndex, #if USE_STRUCTURED_BUFFER #ifdef MIN_PRECISION vector temp = (vector)InputVector[0].data; - OutputVector[0].data = (vector)temp; + OutputVector[0].data = (vector)temp; #else OutputVector[0].data = InputVector[0].data; #endif #else #ifdef MIN_PRECISION - vector Input = (vector)InputVector.Load< vector >(0); - OutputVector.Store< vector >(0, (vector)Input); + vector Input = (vector)InputVector.Load< vector >(0); + OutputVector.Store< vector >(0, (vector)Input); #else vector Input = InputVector.Load< vector >(0); OutputVector.Store< vector >(0, Input); @@ -3973,7 +3973,7 @@ void MSMain(uint GID : SV_GroupIndex, #if USE_STRUCTURED_BUFFER struct SLongVec { #ifdef MIN_PRECISION - vector data; + vector data; #else vector data; #endif @@ -3990,14 +3990,14 @@ void MSMain(uint GID : SV_GroupIndex, #if USE_STRUCTURED_BUFFER #ifdef MIN_PRECISION vector temp = (vector)InputVector[0].data; - OutputVector[0].data = (vector)temp; + OutputVector[0].data = (vector)temp; #else OutputVector[0].data = InputVector[0].data; #endif #else #ifdef MIN_PRECISION - vector Input = (vector)InputVector.Load< vector >(0); - OutputVector.Store< vector >(0, (vector)Input); + vector Input = (vector)InputVector.Load< vector >(0); + OutputVector.Store< vector >(0, (vector)Input); #else vector Input = InputVector.Load< vector >(0); OutputVector.Store< vector >(0, Input); @@ -4030,7 +4030,7 @@ void MSMain(uint GID : SV_GroupIndex, #if USE_STRUCTURED_BUFFER struct SLongVec { #ifdef MIN_PRECISION - vector data; + vector data; #else vector data; #endif @@ -4045,7 +4045,7 @@ void MSMain(uint GID : SV_GroupIndex, RWStructuredBuffer OutputVector = ResourceDescriptorHeap[1]; #ifdef MIN_PRECISION vector temp = (vector)InputVector[0].data; - OutputVector[0].data = (vector)temp; + OutputVector[0].data = (vector)temp; #else OutputVector[0].data = InputVector[0].data; #endif @@ -4054,8 +4054,8 @@ void MSMain(uint GID : SV_GroupIndex, RWByteAddressBuffer OutputVector = ResourceDescriptorHeap[1]; #ifdef MIN_PRECISION - vector Input = (vector)InputVector.Load< vector >(0); - OutputVector.Store< vector >(0, (vector)Input); + vector Input = (vector)InputVector.Load< vector >(0); + OutputVector.Store< vector >(0, (vector)Input); #else vector Input = InputVector.Load< vector >(0); OutputVector.Store< vector >(0, Input); @@ -4087,7 +4087,7 @@ void MSMain(uint GID : SV_GroupIndex, #if USE_STRUCTURED_BUFFER struct SLongVec { #ifdef MIN_PRECISION - vector data; + vector data; #else vector data; #endif @@ -4101,7 +4101,7 @@ void MSMain(uint GID : SV_GroupIndex, RWStructuredBuffer OutputVector = ResourceDescriptorHeap[1]; #ifdef MIN_PRECISION vector temp = (vector)InputVector[0].data; - OutputVector[0].data = (vector)temp; + OutputVector[0].data = (vector)temp; #else OutputVector[0].data = InputVector[0].data; #endif @@ -4110,8 +4110,8 @@ void MSMain(uint GID : SV_GroupIndex, RWByteAddressBuffer OutputVector = ResourceDescriptorHeap[1]; #ifdef MIN_PRECISION - vector Input = (vector)InputVector.Load< vector >(0); - OutputVector.Store< vector >(0, (vector)Input); + vector Input = (vector)InputVector.Load< vector >(0); + OutputVector.Store< vector >(0, (vector)Input); #else vector Input = InputVector.Load< vector >(0); OutputVector.Store< vector >(0, Input); @@ -4143,14 +4143,14 @@ void MSMain(uint GID : SV_GroupIndex, OUT_TYPE : The type of the output vector, e.g. float, double, int, uint. In most cases OUT_TYPE == TYPE. - IO_TYPE : (Min precision only) Full-precision type used for buffer Load/Store. + BUFFER_TYPE : (Min precision only) Full-precision type used for buffer Load/Store. Maps min16float/min16int/min16uint to float/int/uint for deterministic buffer layout. Only defined when MIN_PRECISION is set; otherwise falls back to TYPE via #ifndef in the shader. - IO_OUT_TYPE : (Min precision only) Same as IO_TYPE but derived from OUT_TYPE. + BUFFER_OUT_TYPE : (Min precision only) Same as BUFFER_TYPE but derived from OUT_TYPE. Falls back to OUT_TYPE via #ifndef when not defined. MIN_PRECISION : Defined when TYPE is a min precision type. Gates the - IO_TYPE/IO_OUT_TYPE load/store paths in shader templates. + BUFFER_TYPE/BUFFER_OUT_TYPE load/store paths in shader templates. NUM : The number of elements in the vector, e.g. 2, 3, 4, 8, 16, 32, @@ -4174,14 +4174,14 @@ void MSMain(uint GID : SV_GroupIndex, RWByteAddressBuffer g_InputVector3 : register(u2); RWByteAddressBuffer g_OutputVector : register(u3); - // For min precision types, IO_TYPE/IO_OUT_TYPE are defined as + // For min precision types, BUFFER_TYPE/BUFFER_OUT_TYPE are defined as // full-precision equivalents for deterministic buffer I/O layout. // For all other types, fall back to TYPE/OUT_TYPE (no-op). - #ifndef IO_TYPE - #define IO_TYPE TYPE + #ifndef BUFFER_TYPE + #define BUFFER_TYPE TYPE #endif - #ifndef IO_OUT_TYPE - #define IO_OUT_TYPE OUT_TYPE + #ifndef BUFFER_OUT_TYPE + #define BUFFER_OUT_TYPE OUT_TYPE #endif #define IS_UNARY_OP (BASIC_OP_TYPE == 0x1) @@ -4382,7 +4382,7 @@ void MSMain(uint GID : SV_GroupIndex, if(WaveGetLaneIndex() == MidLane) { #ifdef MIN_PRECISION - g_OutputVector.Store< vector >(0, Vector); + g_OutputVector.Store< vector >(0, Vector); #else g_OutputVector.Store< vector >(0, Vector); #endif @@ -4397,7 +4397,7 @@ void MSMain(uint GID : SV_GroupIndex, if(WaveGetLaneIndex() == 2) { #ifdef MIN_PRECISION - g_OutputVector.Store< vector >(0, Vector); + g_OutputVector.Store< vector >(0, Vector); #else g_OutputVector.Store< vector >(0, Vector); #endif @@ -4434,7 +4434,7 @@ void MSMain(uint GID : SV_GroupIndex, // Lane 3 is the last lane in the mask that we care about. Store the // result from it. #ifdef MIN_PRECISION - g_OutputVector.Store< vector >(0, Vector); + g_OutputVector.Store< vector >(0, Vector); #else g_OutputVector.Store< vector >(0, Vector); #endif @@ -4470,7 +4470,7 @@ void MSMain(uint GID : SV_GroupIndex, { // Lane 3 is the last lane in the mask. Store the result from it. #ifdef MIN_PRECISION - g_OutputVector.Store< vector >(0, Vector); + g_OutputVector.Store< vector >(0, Vector); #else g_OutputVector.Store< vector >(0, Vector); #endif @@ -4505,7 +4505,7 @@ void MSMain(uint GID : SV_GroupIndex, { // Lane 3 is the last lane in the mask. Store the result from it. #ifdef MIN_PRECISION - g_OutputVector.Store< vector >(0, Vector); + g_OutputVector.Store< vector >(0, Vector); #else g_OutputVector.Store< vector >(0, Vector); #endif @@ -4547,7 +4547,7 @@ void MSMain(uint GID : SV_GroupIndex, { // Lane 3 is the last lane in the mask. Store the result from it. #ifdef MIN_PRECISION - g_OutputVector.Store< vector >(0, Vector); + g_OutputVector.Store< vector >(0, Vector); #else g_OutputVector.Store< vector >(0, Vector); #endif @@ -4592,7 +4592,7 @@ void MSMain(uint GID : SV_GroupIndex, { // Store result from lane 3 (last lane in mask) #ifdef MIN_PRECISION - g_OutputVector.Store< vector >(0, Vector); + g_OutputVector.Store< vector >(0, Vector); #else g_OutputVector.Store< vector >(0, Vector); #endif @@ -4686,7 +4686,7 @@ void MSMain(uint GID : SV_GroupIndex, // lane we arbitrarily chose for validation with fine derivatives. if(LaneIndex == 3) { - g_OutputVector.Store< vector >(0, Result); + g_OutputVector.Store< vector >(0, Result); } } #endif @@ -4715,7 +4715,7 @@ void MSMain(uint GID : SV_GroupIndex, if(LaneIndex == 3) { - g_OutputVector.Store< vector >(0, Result); + g_OutputVector.Store< vector >(0, Result); } } #endif @@ -4739,14 +4739,14 @@ void MSMain(uint GID : SV_GroupIndex, #ifdef FUNC_SHUFFLE_VECTOR // For shuffle vector, the input is a scalar, not a vector. #ifdef MIN_PRECISION - TYPE Input1 = (TYPE)g_InputVector1.Load(0); + TYPE Input1 = (TYPE)g_InputVector1.Load(0); #else TYPE Input1 = g_InputVector1.Load(0); #endif #else // For all other basic op types the first input is always a vector. #ifdef MIN_PRECISION - vector Input1 = (vector)g_InputVector1.Load< vector >(0); + vector Input1 = (vector)g_InputVector1.Load< vector >(0); #else vector Input1 = g_InputVector1.Load< vector >(0); #endif @@ -4754,7 +4754,7 @@ void MSMain(uint GID : SV_GroupIndex, #if (IS_BINARY_OP || IS_TERNARY_OP) #ifdef MIN_PRECISION - vector Input2 = (vector)g_InputVector2.Load< vector >(0); + vector Input2 = (vector)g_InputVector2.Load< vector >(0); #else vector Input2 = g_InputVector2.Load< vector >(0); #endif @@ -4762,7 +4762,7 @@ void MSMain(uint GID : SV_GroupIndex, #if IS_TERNARY_OP #ifdef MIN_PRECISION - vector Input3 = (vector)g_InputVector3.Load< vector >(0); + vector Input3 = (vector)g_InputVector3.Load< vector >(0); #else vector Input3 = g_InputVector3.Load< vector >(0); #endif @@ -4818,7 +4818,7 @@ void MSMain(uint GID : SV_GroupIndex, OutputVector = FUNC(Input1, Input2, Input3); #endif - g_OutputVector.Store< vector >(0, OutputVector); + g_OutputVector.Store< vector >(0, OutputVector); }; ]]> From cb6175aa8b8258ce062fed1677f474b9f9e02a50 Mon Sep 17 00:00:00 2001 From: Alex Sepkowski Date: Tue, 17 Mar 2026 17:30:49 -0700 Subject: [PATCH 32/34] Extract getWaveSize helper and clarify mirrors comments Extract duplicated WaveSize computation from runWaveOpTest and runMinPrecisionWaveOpTest into a shared getWaveSize() method. Update section comments to say 'mirrors applicable ops' since not all ops from the 16-bit equivalent types are supported for min precision. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../clang/unittests/HLSLExec/LongVectors.cpp | 39 +++++++------------ 1 file changed, 13 insertions(+), 26 deletions(-) diff --git a/tools/clang/unittests/HLSLExec/LongVectors.cpp b/tools/clang/unittests/HLSLExec/LongVectors.cpp index 441955ecb6..28fba8d0a9 100644 --- a/tools/clang/unittests/HLSLExec/LongVectors.cpp +++ b/tools/clang/unittests/HLSLExec/LongVectors.cpp @@ -2060,10 +2060,7 @@ class TestClassCommon { return true; } - template void runWaveOpTest() { - WEX::TestExecution::SetVerifyOutput VerifySettings( - WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); - + UINT getWaveSize() { UINT WaveSize = 0; if (OverrideWaveLaneCount > 0) { @@ -2081,8 +2078,15 @@ class TestClassCommon { DXASSERT_NOMSG(WaveSize > 0); DXASSERT((WaveSize & (WaveSize - 1)) == 0, "must be a power of 2"); + return WaveSize; + } + + template void runWaveOpTest() { + WEX::TestExecution::SetVerifyOutput VerifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + dispatchWaveOpTest(D3DDevice, VerboseLogging, OverrideInputSize, - WaveSize); + getWaveSize()); } template void runTest() { @@ -2104,25 +2108,8 @@ class TestClassCommon { WEX::TestExecution::SetVerifyOutput VerifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); - UINT WaveSize = 0; - - if (OverrideWaveLaneCount > 0) { - WaveSize = OverrideWaveLaneCount; - hlsl_test::LogCommentFmt( - L"Using overridden WaveLaneCount of %d for this test.", WaveSize); - } else { - D3D12_FEATURE_DATA_D3D12_OPTIONS1 WaveOpts; - VERIFY_SUCCEEDED(D3DDevice->CheckFeatureSupport( - D3D12_FEATURE_D3D12_OPTIONS1, &WaveOpts, sizeof(WaveOpts))); - - WaveSize = WaveOpts.WaveLaneCountMin; - } - - DXASSERT_NOMSG(WaveSize > 0); - DXASSERT((WaveSize & (WaveSize - 1)) == 0, "must be a power of 2"); - dispatchMinPrecisionWaveOpTest(D3DDevice, VerboseLogging, - OverrideInputSize, WaveSize); + OverrideInputSize, getWaveSize()); } protected: @@ -2897,7 +2884,7 @@ class DxilConf_SM69_Vectorized_Core : public TestClassCommon { HLK_WAVEOP_TEST(WaveMultiPrefixProduct, float); HLK_WAVEOP_TEST(WaveMatch, float); - // ---- HLSLMin16Float_t (mirrors HLSLHalf_t) ---- + // ---- HLSLMin16Float_t (mirrors applicable HLSLHalf_t ops) ---- // TernaryMath HLK_MIN_PRECISION_TEST(Mad, HLSLMin16Float_t); @@ -3014,7 +3001,7 @@ class DxilConf_SM69_Vectorized_Core : public TestClassCommon { HLK_MIN_PRECISION_WAVEOP_TEST(WaveMultiPrefixProduct, HLSLMin16Float_t); HLK_MIN_PRECISION_WAVEOP_TEST(WaveMatch, HLSLMin16Float_t); - // ---- HLSLMin16Int_t (mirrors int16_t) ---- + // ---- HLSLMin16Int_t (mirrors applicable int16_t ops) ---- // TernaryMath HLK_MIN_PRECISION_TEST(Mad, HLSLMin16Int_t); @@ -3110,7 +3097,7 @@ class DxilConf_SM69_Vectorized_Core : public TestClassCommon { HLK_MIN_PRECISION_WAVEOP_TEST(WaveMultiPrefixBitXor, HLSLMin16Int_t); HLK_MIN_PRECISION_WAVEOP_TEST(WaveMatch, HLSLMin16Int_t); - // ---- HLSLMin16Uint_t (mirrors uint16_t) ---- + // ---- HLSLMin16Uint_t (mirrors applicable uint16_t ops) ---- // TernaryMath HLK_MIN_PRECISION_TEST(Mad, HLSLMin16Uint_t); From e8d9169d44339ecaa45b45d46d0af626e66f48a4 Mon Sep 17 00:00:00 2001 From: Alex Sepkowski Date: Tue, 17 Mar 2026 17:43:03 -0700 Subject: [PATCH 33/34] Add bitwise and shift ops for min precision types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add LeftShift and RightShift test entries for min16int and min16uint. Both produce valid min-precision DXIL (shl/ashr/lshr i16 with 4-bit shift masking). ReverseBits, CountBits, FirstBitHigh, FirstBitLow are excluded — DXC promotes min precision to i32 before calling these DXIL intrinsics, so they don't actually test min precision behavior. Infrastructure changes: - LongVectorTestData.h: Add Bitwise and BitShiftRhs input sets for HLSLMin16Int_t and HLSLMin16Uint_t matching int16_t/uint16_t names. Values constrained to 16-bit safe range. - LongVectorTestData.h: Add compound assignment operators (<<=, >>=, |=, &=, ^=) and unary ~ to both wrapper types to resolve ambiguity with integer promotion in template functions. - LongVectorTestData.h: Specialize std::is_signed for wrapper types so FirstBitHigh SFINAE selects the correct signed/unsigned variant. - LongVectors.cpp: Fix ReverseBits, ScanFromMSB, FirstBitLow to use explicit static_cast for integer literals, avoiding ambiguous operator overload resolution with wrapper types. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../unittests/HLSLExec/LongVectorTestData.h | 45 +++++++++++++++++++ .../clang/unittests/HLSLExec/LongVectors.cpp | 44 ++++++++++-------- 2 files changed, 71 insertions(+), 18 deletions(-) diff --git a/tools/clang/unittests/HLSLExec/LongVectorTestData.h b/tools/clang/unittests/HLSLExec/LongVectorTestData.h index 84affce45f..876ff2dab6 100644 --- a/tools/clang/unittests/HLSLExec/LongVectorTestData.h +++ b/tools/clang/unittests/HLSLExec/LongVectorTestData.h @@ -334,6 +334,27 @@ struct HLSLMin16Int_t { HLSLMin16Int_t operator>>(const HLSLMin16Int_t &O) const { return HLSLMin16Int_t(Val >> O.Val); } + HLSLMin16Int_t operator~() const { return HLSLMin16Int_t(~Val); } + HLSLMin16Int_t &operator<<=(const HLSLMin16Int_t &O) { + Val <<= O.Val; + return *this; + } + HLSLMin16Int_t &operator>>=(const HLSLMin16Int_t &O) { + Val >>= O.Val; + return *this; + } + HLSLMin16Int_t &operator|=(const HLSLMin16Int_t &O) { + Val |= O.Val; + return *this; + } + HLSLMin16Int_t &operator&=(const HLSLMin16Int_t &O) { + Val &= O.Val; + return *this; + } + HLSLMin16Int_t &operator^=(const HLSLMin16Int_t &O) { + Val ^= O.Val; + return *this; + } HLSLMin16Int_t operator&&(const HLSLMin16Int_t &O) const { return HLSLMin16Int_t(Val && O.Val); } @@ -399,6 +420,27 @@ struct HLSLMin16Uint_t { HLSLMin16Uint_t operator>>(const HLSLMin16Uint_t &O) const { return HLSLMin16Uint_t(Val >> O.Val); } + HLSLMin16Uint_t operator~() const { return HLSLMin16Uint_t(~Val); } + HLSLMin16Uint_t &operator<<=(const HLSLMin16Uint_t &O) { + Val <<= O.Val; + return *this; + } + HLSLMin16Uint_t &operator>>=(const HLSLMin16Uint_t &O) { + Val >>= O.Val; + return *this; + } + HLSLMin16Uint_t &operator|=(const HLSLMin16Uint_t &O) { + Val |= O.Val; + return *this; + } + HLSLMin16Uint_t &operator&=(const HLSLMin16Uint_t &O) { + Val &= O.Val; + return *this; + } + HLSLMin16Uint_t &operator^=(const HLSLMin16Uint_t &O) { + Val ^= O.Val; + return *this; + } bool operator&&(const HLSLMin16Uint_t &O) const { return Val && O.Val; } bool operator||(const HLSLMin16Uint_t &O) const { return Val || O.Val; } @@ -415,6 +457,7 @@ struct HLSLMin16Uint_t { uint32_t Val; }; + enum class InputSet { #define INPUT_SET(SYMBOL) SYMBOL, #include "LongVectorOps.def" @@ -656,6 +699,7 @@ BEGIN_INPUT_SETS(HLSLMin16Int_t) INPUT_SET(InputSet::Default1, -6, 1, 7, 3, 8, 4, -3, 8, 8, -2); INPUT_SET(InputSet::Default2, 5, -6, -3, -2, 9, 3, 1, -3, -7, 2); INPUT_SET(InputSet::Default3, -5, 6, 3, 2, -9, -3, -1, 3, 7, -2); +INPUT_SET(InputSet::BitShiftRhs, 1, 6, 3, 0, 9, 3, 12, 11, 11, 14); INPUT_SET(InputSet::Zero, 0); INPUT_SET(InputSet::NoZero, 1); INPUT_SET(InputSet::SelectCond, 0, 1); @@ -671,6 +715,7 @@ INPUT_SET(InputSet::Default1, 3, 199, 3, 200, 5, 10, 22, 8, 9, 10); INPUT_SET(InputSet::Default2, 2, 111, 3, 4, 5, 9, 21, 8, 9, 10); INPUT_SET(InputSet::Default3, 4, 112, 4, 5, 3, 7, 21, 1, 11, 9); INPUT_SET(InputSet::Zero, 0); +INPUT_SET(InputSet::BitShiftRhs, 1, 6, 3, 0, 9, 3, 11, 12, 12, 12); INPUT_SET(InputSet::SelectCond, 0, 1); INPUT_SET(InputSet::AllOnes, 1); INPUT_SET(InputSet::WaveMultiPrefixBitwise, 0x0, 0x1, 0x3, 0x4, 0x10, 0x12, 0xF, diff --git a/tools/clang/unittests/HLSLExec/LongVectors.cpp b/tools/clang/unittests/HLSLExec/LongVectors.cpp index 28fba8d0a9..b797bb350f 100644 --- a/tools/clang/unittests/HLSLExec/LongVectors.cpp +++ b/tools/clang/unittests/HLSLExec/LongVectors.cpp @@ -743,12 +743,12 @@ template T Saturate(T A) { } template T ReverseBits(T A) { - T Result = 0; + T Result = static_cast(0); const size_t NumBits = sizeof(T) * 8; for (size_t I = 0; I < NumBits; I++) { - Result <<= 1; - Result |= (A & 1); - A >>= 1; + Result <<= static_cast(1); + Result |= (A & static_cast(1)); + A >>= static_cast(1); } return Result; } @@ -760,12 +760,13 @@ template uint32_t CountBits(T A) { // General purpose bit scan from the MSB. Based on the value of LookingForZero // returns the index of the first high/low bit found. template uint32_t ScanFromMSB(T A, bool LookingForZero) { - if (A == 0) + if (A == static_cast(0)) return std::numeric_limits::max(); constexpr uint32_t NumBits = sizeof(T) * 8; for (int32_t I = NumBits - 1; I >= 0; --I) { - bool BitSet = (A & (static_cast(1) << I)) != 0; + bool BitSet = + (A & (static_cast(1) << static_cast(I))) != static_cast(0); if (BitSet != LookingForZero) return static_cast(I); } @@ -788,11 +789,11 @@ FirstBitHigh(T A) { template uint32_t FirstBitLow(T A) { const uint32_t NumBits = sizeof(T) * 8; - if (A == 0) + if (A == static_cast(0)) return std::numeric_limits::max(); for (uint32_t I = 0; I < NumBits; ++I) { - if (A & (static_cast(1) << I)) + if (A & (static_cast(1) << static_cast(I))) return static_cast(I); } @@ -1888,8 +1889,8 @@ void dispatchMinPrecisionTest(ID3D12Device *D3DDevice, bool VerboseLogging, constexpr const Operation &Operation = getOperation(OP); Op Op; - // Min precision buffer storage width is implementation-defined, so we use - // full-precision types for Load/Store via BUFFER_TYPE/BUFFER_OUT_TYPE defines. + // Min precision buffer storage width is implementation-defined, so we + // use full-precision types for buffer I/O via BUFFER_TYPE/BUFFER_OUT_TYPE. for (size_t VectorSize : InputVectorSizes) { std::vector> Inputs = buildTestInputs(VectorSize, Operation.InputSets, Operation.Arity); @@ -1919,14 +1920,13 @@ void dispatchMinPrecisionWaveOpTest(ID3D12Device *D3DDevice, constexpr const Operation &Operation = getOperation(OP); Op Op; - // Min precision buffer storage width is implementation-defined, so we use - // full-precision types for Load/Store via BUFFER_TYPE/BUFFER_OUT_TYPE defines. + // Min precision buffer storage width is implementation-defined, so we + // use full-precision types for buffer I/O via BUFFER_TYPE/BUFFER_OUT_TYPE. for (size_t VectorSize : InputVectorSizes) { std::vector> Inputs = buildTestInputs(VectorSize, Operation.InputSets, Operation.Arity); - auto Expected = - ExpectedBuilder::buildExpected(Op, Inputs, WaveSize); + auto Expected = ExpectedBuilder::buildExpected(Op, Inputs, WaveSize); using OutT = typename decltype(Expected)::value_type; @@ -3015,11 +3015,15 @@ class DxilConf_SM69_Vectorized_Core : public TestClassCommon { HLK_MIN_PRECISION_TEST(Min, HLSLMin16Int_t); HLK_MIN_PRECISION_TEST(Max, HLSLMin16Int_t); - // Bitwise (logical and shift — bit-manipulation excluded) + // Bitwise HLK_MIN_PRECISION_TEST(And, HLSLMin16Int_t); HLK_MIN_PRECISION_TEST(Or, HLSLMin16Int_t); HLK_MIN_PRECISION_TEST(Xor, HLSLMin16Int_t); - + HLK_MIN_PRECISION_TEST(LeftShift, HLSLMin16Int_t); + HLK_MIN_PRECISION_TEST(RightShift, HLSLMin16Int_t); + // Note: ReverseBits, CountBits, FirstBitHigh, FirstBitLow excluded - + // DXC promotes min precision to i32 before these intrinsics, so they + // don't operate at min precision. // UnaryMath HLK_MIN_PRECISION_TEST(Abs, HLSLMin16Int_t); @@ -3111,11 +3115,15 @@ class DxilConf_SM69_Vectorized_Core : public TestClassCommon { HLK_MIN_PRECISION_TEST(Min, HLSLMin16Uint_t); HLK_MIN_PRECISION_TEST(Max, HLSLMin16Uint_t); - // Bitwise (logical and shift — bit-manipulation excluded) + // Bitwise HLK_MIN_PRECISION_TEST(And, HLSLMin16Uint_t); HLK_MIN_PRECISION_TEST(Or, HLSLMin16Uint_t); HLK_MIN_PRECISION_TEST(Xor, HLSLMin16Uint_t); - + HLK_MIN_PRECISION_TEST(LeftShift, HLSLMin16Uint_t); + HLK_MIN_PRECISION_TEST(RightShift, HLSLMin16Uint_t); + // Note: ReverseBits, CountBits, FirstBitHigh, FirstBitLow excluded - + // DXC promotes min precision to i32 before these intrinsics, so they + // don't operate at min precision. // UnaryMath HLK_MIN_PRECISION_TEST(Abs, HLSLMin16Uint_t); From 44a281ea3f5215ddeb6ff11af6d37480864ec9dd Mon Sep 17 00:00:00 2001 From: Alex Sepkowski Date: Tue, 24 Mar 2026 14:24:58 -0700 Subject: [PATCH 34/34] Remove IO_TYPE/BUFFER_TYPE min precision buffer workaround The DXC compiler fix in PR #8274 handles min precision vector load/store correctly at the DXIL level, so the test-side IO type indirection (MIN_PRECISION, IO_TYPE, IO_OUT_TYPE, BUFFER_TYPE, dispatchMinPrecisionTest, HLK_MIN_PRECISION_TEST) is no longer needed. Min precision tests now use the same HLK_TEST/HLK_WAVEOP_TEST macros and dispatch paths as all other types. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../clang/unittests/HLSLExec/LongVectors.cpp | 552 +++++++----------- .../unittests/HLSLExec/ShaderOpArith.xml | 164 +----- 2 files changed, 236 insertions(+), 480 deletions(-) diff --git a/tools/clang/unittests/HLSLExec/LongVectors.cpp b/tools/clang/unittests/HLSLExec/LongVectors.cpp index b797bb350f..12ae2c66ab 100644 --- a/tools/clang/unittests/HLSLExec/LongVectors.cpp +++ b/tools/clang/unittests/HLSLExec/LongVectors.cpp @@ -39,7 +39,6 @@ struct DataType { const char *HLSLTypeString; bool Is16Bit; size_t HLSLSizeInBytes; - const char *BufferTypeString; // Full-precision type for buffer I/O. }; template const DataType &getDataType() { @@ -48,15 +47,7 @@ template const DataType &getDataType() { #define DATA_TYPE(TYPE, HLSL_STRING, HLSL_SIZE) \ template <> const DataType &getDataType() { \ - static DataType DataType{HLSL_STRING, is16BitType(), HLSL_SIZE, \ - HLSL_STRING}; \ - return DataType; \ - } - -#define MIN_PRECISION_DATA_TYPE(TYPE, HLSL_STRING, HLSL_SIZE, IO_STRING) \ - template <> const DataType &getDataType() { \ - static DataType DataType{HLSL_STRING, is16BitType(), HLSL_SIZE, \ - IO_STRING}; \ + static DataType DataType{HLSL_STRING, is16BitType(), HLSL_SIZE}; \ return DataType; \ } @@ -68,14 +59,13 @@ DATA_TYPE(uint16_t, "uint16_t", 2) DATA_TYPE(uint32_t, "uint32_t", 4) DATA_TYPE(uint64_t, "uint64_t", 8) DATA_TYPE(HLSLHalf_t, "half", 2) +DATA_TYPE(HLSLMin16Float_t, "min16float", 4) +DATA_TYPE(HLSLMin16Int_t, "min16int", 4) +DATA_TYPE(HLSLMin16Uint_t, "min16uint", 4) DATA_TYPE(float, "float", 4) DATA_TYPE(double, "double", 8) -MIN_PRECISION_DATA_TYPE(HLSLMin16Float_t, "min16float", 4, "float") -MIN_PRECISION_DATA_TYPE(HLSLMin16Int_t, "min16int", 4, "int") -MIN_PRECISION_DATA_TYPE(HLSLMin16Uint_t, "min16uint", 4, "uint") #undef DATA_TYPE -#undef MIN_PRECISION_DATA_TYPE template constexpr bool isFloatingPointType() { return std::is_same_v || std::is_same_v || @@ -1879,69 +1869,6 @@ void dispatchWaveOpTest(ID3D12Device *D3DDevice, bool VerboseLogging, } } -template -void dispatchMinPrecisionTest(ID3D12Device *D3DDevice, bool VerboseLogging, - size_t OverrideInputSize) { - - const std::vector InputVectorSizes = - getInputSizesToTest(OverrideInputSize); - - constexpr const Operation &Operation = getOperation(OP); - Op Op; - - // Min precision buffer storage width is implementation-defined, so we - // use full-precision types for buffer I/O via BUFFER_TYPE/BUFFER_OUT_TYPE. - for (size_t VectorSize : InputVectorSizes) { - std::vector> Inputs = - buildTestInputs(VectorSize, Operation.InputSets, Operation.Arity); - - auto Expected = ExpectedBuilder::buildExpected(Op, Inputs); - - using OutT = typename decltype(Expected)::value_type; - - const std::string AdditionalCompilerOptions = - std::string("-DMIN_PRECISION") + - " -DBUFFER_TYPE=" + getDataType().BufferTypeString + - " -DBUFFER_OUT_TYPE=" + getDataType().BufferTypeString; - - runAndVerify(D3DDevice, VerboseLogging, Operation, Inputs, Expected, - Op.ValidationConfig, AdditionalCompilerOptions); - } -} - -template -void dispatchMinPrecisionWaveOpTest(ID3D12Device *D3DDevice, - bool VerboseLogging, - size_t OverrideInputSize, UINT WaveSize) { - - const std::vector InputVectorSizes = - getInputSizesToTest(OverrideInputSize); - - constexpr const Operation &Operation = getOperation(OP); - Op Op; - - // Min precision buffer storage width is implementation-defined, so we - // use full-precision types for buffer I/O via BUFFER_TYPE/BUFFER_OUT_TYPE. - for (size_t VectorSize : InputVectorSizes) { - std::vector> Inputs = - buildTestInputs(VectorSize, Operation.InputSets, Operation.Arity); - - auto Expected = ExpectedBuilder::buildExpected(Op, Inputs, WaveSize); - - using OutT = typename decltype(Expected)::value_type; - - const std::string AdditionalCompilerOptions = - std::string("-DMIN_PRECISION") + - " -DBUFFER_TYPE=" + getDataType().BufferTypeString + - " -DBUFFER_OUT_TYPE=" + getDataType().BufferTypeString + - " -DWAVE_SIZE=" + std::to_string(WaveSize) + - " -DNUMTHREADS_XYZ=" + std::to_string(WaveSize) + ",1,1 "; - - runAndVerify(D3DDevice, VerboseLogging, Operation, Inputs, Expected, - Op.ValidationConfig, AdditionalCompilerOptions); - } -} - } // namespace LongVector using namespace LongVector; @@ -1950,9 +1877,6 @@ using namespace LongVector; #define HLK_TEST(Op, DataType) \ TEST_METHOD(Op##_##DataType) { runTest(); } -#define HLK_MIN_PRECISION_TEST(Op, DataType) \ - TEST_METHOD(Op##_##DataType) { runMinPrecisionTest(); } - #define HLK_WAVEOP_TEST(Op, DataType) \ TEST_METHOD(Op##_##DataType) { \ BEGIN_TEST_METHOD_PROPERTIES() \ @@ -1963,16 +1887,6 @@ using namespace LongVector; runWaveOpTest(); \ } -#define HLK_MIN_PRECISION_WAVEOP_TEST(Op, DataType) \ - TEST_METHOD(Op##_##DataType) { \ - BEGIN_TEST_METHOD_PROPERTIES() \ - TEST_METHOD_PROPERTY( \ - "Kits.Specification", \ - "Device.Graphics.D3D12.DXILCore.ShaderModel69.CoreRequirement") \ - END_TEST_METHOD_PROPERTIES() \ - runMinPrecisionWaveOpTest(); \ - } - class TestClassCommon { public: bool setupClass() { @@ -2096,22 +2010,6 @@ class TestClassCommon { dispatchTest(D3DDevice, VerboseLogging, OverrideInputSize); } - template void runMinPrecisionTest() { - WEX::TestExecution::SetVerifyOutput verifySettings( - WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); - - dispatchMinPrecisionTest(D3DDevice, VerboseLogging, - OverrideInputSize); - } - - template void runMinPrecisionWaveOpTest() { - WEX::TestExecution::SetVerifyOutput VerifySettings( - WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); - - dispatchMinPrecisionWaveOpTest(D3DDevice, VerboseLogging, - OverrideInputSize, getWaveSize()); - } - protected: CComPtr D3DDevice; @@ -2887,311 +2785,311 @@ class DxilConf_SM69_Vectorized_Core : public TestClassCommon { // ---- HLSLMin16Float_t (mirrors applicable HLSLHalf_t ops) ---- // TernaryMath - HLK_MIN_PRECISION_TEST(Mad, HLSLMin16Float_t); + HLK_TEST(Mad, HLSLMin16Float_t); // BinaryMath - HLK_MIN_PRECISION_TEST(Add, HLSLMin16Float_t); - HLK_MIN_PRECISION_TEST(Subtract, HLSLMin16Float_t); - HLK_MIN_PRECISION_TEST(Multiply, HLSLMin16Float_t); - HLK_MIN_PRECISION_TEST(Divide, HLSLMin16Float_t); - HLK_MIN_PRECISION_TEST(Modulus, HLSLMin16Float_t); - HLK_MIN_PRECISION_TEST(Min, HLSLMin16Float_t); - HLK_MIN_PRECISION_TEST(Max, HLSLMin16Float_t); - HLK_MIN_PRECISION_TEST(Ldexp, HLSLMin16Float_t); + HLK_TEST(Add, HLSLMin16Float_t); + HLK_TEST(Subtract, HLSLMin16Float_t); + HLK_TEST(Multiply, HLSLMin16Float_t); + HLK_TEST(Divide, HLSLMin16Float_t); + HLK_TEST(Modulus, HLSLMin16Float_t); + HLK_TEST(Min, HLSLMin16Float_t); + HLK_TEST(Max, HLSLMin16Float_t); + HLK_TEST(Ldexp, HLSLMin16Float_t); // Saturate - HLK_MIN_PRECISION_TEST(Saturate, HLSLMin16Float_t); + HLK_TEST(Saturate, HLSLMin16Float_t); // Unary - HLK_MIN_PRECISION_TEST(Initialize, HLSLMin16Float_t); - HLK_MIN_PRECISION_TEST(ArrayOperator_StaticAccess, HLSLMin16Float_t); - HLK_MIN_PRECISION_TEST(ArrayOperator_DynamicAccess, HLSLMin16Float_t); - HLK_MIN_PRECISION_TEST(ShuffleVector, HLSLMin16Float_t); + HLK_TEST(Initialize, HLSLMin16Float_t); + HLK_TEST(ArrayOperator_StaticAccess, HLSLMin16Float_t); + HLK_TEST(ArrayOperator_DynamicAccess, HLSLMin16Float_t); + HLK_TEST(ShuffleVector, HLSLMin16Float_t); // Cast - HLK_MIN_PRECISION_TEST(CastToBool, HLSLMin16Float_t); - HLK_MIN_PRECISION_TEST(CastToInt16, HLSLMin16Float_t); - HLK_MIN_PRECISION_TEST(CastToInt32, HLSLMin16Float_t); - HLK_MIN_PRECISION_TEST(CastToInt64, HLSLMin16Float_t); - HLK_MIN_PRECISION_TEST(CastToUint16_FromFP, HLSLMin16Float_t); - HLK_MIN_PRECISION_TEST(CastToUint32_FromFP, HLSLMin16Float_t); - HLK_MIN_PRECISION_TEST(CastToUint64_FromFP, HLSLMin16Float_t); - HLK_MIN_PRECISION_TEST(CastToFloat16, HLSLMin16Float_t); - HLK_MIN_PRECISION_TEST(CastToFloat32, HLSLMin16Float_t); + HLK_TEST(CastToBool, HLSLMin16Float_t); + HLK_TEST(CastToInt16, HLSLMin16Float_t); + HLK_TEST(CastToInt32, HLSLMin16Float_t); + HLK_TEST(CastToInt64, HLSLMin16Float_t); + HLK_TEST(CastToUint16_FromFP, HLSLMin16Float_t); + HLK_TEST(CastToUint32_FromFP, HLSLMin16Float_t); + HLK_TEST(CastToUint64_FromFP, HLSLMin16Float_t); + HLK_TEST(CastToFloat16, HLSLMin16Float_t); + HLK_TEST(CastToFloat32, HLSLMin16Float_t); // Trigonometric - HLK_MIN_PRECISION_TEST(Acos, HLSLMin16Float_t); - HLK_MIN_PRECISION_TEST(Asin, HLSLMin16Float_t); - HLK_MIN_PRECISION_TEST(Atan, HLSLMin16Float_t); - HLK_MIN_PRECISION_TEST(Cos, HLSLMin16Float_t); - HLK_MIN_PRECISION_TEST(Cosh, HLSLMin16Float_t); - HLK_MIN_PRECISION_TEST(Sin, HLSLMin16Float_t); - HLK_MIN_PRECISION_TEST(Sinh, HLSLMin16Float_t); - HLK_MIN_PRECISION_TEST(Tan, HLSLMin16Float_t); - HLK_MIN_PRECISION_TEST(Tanh, HLSLMin16Float_t); + HLK_TEST(Acos, HLSLMin16Float_t); + HLK_TEST(Asin, HLSLMin16Float_t); + HLK_TEST(Atan, HLSLMin16Float_t); + HLK_TEST(Cos, HLSLMin16Float_t); + HLK_TEST(Cosh, HLSLMin16Float_t); + HLK_TEST(Sin, HLSLMin16Float_t); + HLK_TEST(Sinh, HLSLMin16Float_t); + HLK_TEST(Tan, HLSLMin16Float_t); + HLK_TEST(Tanh, HLSLMin16Float_t); // UnaryMath - HLK_MIN_PRECISION_TEST(Abs, HLSLMin16Float_t); - HLK_MIN_PRECISION_TEST(Ceil, HLSLMin16Float_t); - HLK_MIN_PRECISION_TEST(Exp, HLSLMin16Float_t); - HLK_MIN_PRECISION_TEST(Floor, HLSLMin16Float_t); - HLK_MIN_PRECISION_TEST(Frac, HLSLMin16Float_t); - HLK_MIN_PRECISION_TEST(Log, HLSLMin16Float_t); - HLK_MIN_PRECISION_TEST(Rcp, HLSLMin16Float_t); - HLK_MIN_PRECISION_TEST(Round, HLSLMin16Float_t); - HLK_MIN_PRECISION_TEST(Rsqrt, HLSLMin16Float_t); - HLK_MIN_PRECISION_TEST(Sign, HLSLMin16Float_t); - HLK_MIN_PRECISION_TEST(Sqrt, HLSLMin16Float_t); - HLK_MIN_PRECISION_TEST(Trunc, HLSLMin16Float_t); - HLK_MIN_PRECISION_TEST(Exp2, HLSLMin16Float_t); - HLK_MIN_PRECISION_TEST(Log10, HLSLMin16Float_t); - HLK_MIN_PRECISION_TEST(Log2, HLSLMin16Float_t); + HLK_TEST(Abs, HLSLMin16Float_t); + HLK_TEST(Ceil, HLSLMin16Float_t); + HLK_TEST(Exp, HLSLMin16Float_t); + HLK_TEST(Floor, HLSLMin16Float_t); + HLK_TEST(Frac, HLSLMin16Float_t); + HLK_TEST(Log, HLSLMin16Float_t); + HLK_TEST(Rcp, HLSLMin16Float_t); + HLK_TEST(Round, HLSLMin16Float_t); + HLK_TEST(Rsqrt, HLSLMin16Float_t); + HLK_TEST(Sign, HLSLMin16Float_t); + HLK_TEST(Sqrt, HLSLMin16Float_t); + HLK_TEST(Trunc, HLSLMin16Float_t); + HLK_TEST(Exp2, HLSLMin16Float_t); + HLK_TEST(Log10, HLSLMin16Float_t); + HLK_TEST(Log2, HLSLMin16Float_t); // BinaryComparison - HLK_MIN_PRECISION_TEST(LessThan, HLSLMin16Float_t); - HLK_MIN_PRECISION_TEST(LessEqual, HLSLMin16Float_t); - HLK_MIN_PRECISION_TEST(GreaterThan, HLSLMin16Float_t); - HLK_MIN_PRECISION_TEST(GreaterEqual, HLSLMin16Float_t); - HLK_MIN_PRECISION_TEST(Equal, HLSLMin16Float_t); - HLK_MIN_PRECISION_TEST(NotEqual, HLSLMin16Float_t); + HLK_TEST(LessThan, HLSLMin16Float_t); + HLK_TEST(LessEqual, HLSLMin16Float_t); + HLK_TEST(GreaterThan, HLSLMin16Float_t); + HLK_TEST(GreaterEqual, HLSLMin16Float_t); + HLK_TEST(Equal, HLSLMin16Float_t); + HLK_TEST(NotEqual, HLSLMin16Float_t); // Select - HLK_MIN_PRECISION_TEST(Select, HLSLMin16Float_t); + HLK_TEST(Select, HLSLMin16Float_t); // Dot - HLK_MIN_PRECISION_TEST(Dot, HLSLMin16Float_t); + HLK_TEST(Dot, HLSLMin16Float_t); // LoadAndStore - HLK_MIN_PRECISION_TEST(LoadAndStore_RDH_BAB_SRV, HLSLMin16Float_t); - HLK_MIN_PRECISION_TEST(LoadAndStore_RDH_BAB_UAV, HLSLMin16Float_t); - HLK_MIN_PRECISION_TEST(LoadAndStore_DT_BAB_SRV, HLSLMin16Float_t); - HLK_MIN_PRECISION_TEST(LoadAndStore_DT_BAB_UAV, HLSLMin16Float_t); - HLK_MIN_PRECISION_TEST(LoadAndStore_RD_BAB_SRV, HLSLMin16Float_t); - HLK_MIN_PRECISION_TEST(LoadAndStore_RD_BAB_UAV, HLSLMin16Float_t); - HLK_MIN_PRECISION_TEST(LoadAndStore_RDH_SB_SRV, HLSLMin16Float_t); - HLK_MIN_PRECISION_TEST(LoadAndStore_RDH_SB_UAV, HLSLMin16Float_t); - HLK_MIN_PRECISION_TEST(LoadAndStore_DT_SB_SRV, HLSLMin16Float_t); - HLK_MIN_PRECISION_TEST(LoadAndStore_DT_SB_UAV, HLSLMin16Float_t); - HLK_MIN_PRECISION_TEST(LoadAndStore_RD_SB_SRV, HLSLMin16Float_t); - HLK_MIN_PRECISION_TEST(LoadAndStore_RD_SB_UAV, HLSLMin16Float_t); + HLK_TEST(LoadAndStore_RDH_BAB_SRV, HLSLMin16Float_t); + HLK_TEST(LoadAndStore_RDH_BAB_UAV, HLSLMin16Float_t); + HLK_TEST(LoadAndStore_DT_BAB_SRV, HLSLMin16Float_t); + HLK_TEST(LoadAndStore_DT_BAB_UAV, HLSLMin16Float_t); + HLK_TEST(LoadAndStore_RD_BAB_SRV, HLSLMin16Float_t); + HLK_TEST(LoadAndStore_RD_BAB_UAV, HLSLMin16Float_t); + HLK_TEST(LoadAndStore_RDH_SB_SRV, HLSLMin16Float_t); + HLK_TEST(LoadAndStore_RDH_SB_UAV, HLSLMin16Float_t); + HLK_TEST(LoadAndStore_DT_SB_SRV, HLSLMin16Float_t); + HLK_TEST(LoadAndStore_DT_SB_UAV, HLSLMin16Float_t); + HLK_TEST(LoadAndStore_RD_SB_SRV, HLSLMin16Float_t); + HLK_TEST(LoadAndStore_RD_SB_UAV, HLSLMin16Float_t); // Derivative - HLK_MIN_PRECISION_TEST(DerivativeDdx, HLSLMin16Float_t); - HLK_MIN_PRECISION_TEST(DerivativeDdy, HLSLMin16Float_t); - HLK_MIN_PRECISION_TEST(DerivativeDdxFine, HLSLMin16Float_t); - HLK_MIN_PRECISION_TEST(DerivativeDdyFine, HLSLMin16Float_t); + HLK_TEST(DerivativeDdx, HLSLMin16Float_t); + HLK_TEST(DerivativeDdy, HLSLMin16Float_t); + HLK_TEST(DerivativeDdxFine, HLSLMin16Float_t); + HLK_TEST(DerivativeDdyFine, HLSLMin16Float_t); // Quad - HLK_MIN_PRECISION_TEST(QuadReadLaneAt, HLSLMin16Float_t); - HLK_MIN_PRECISION_TEST(QuadReadAcrossX, HLSLMin16Float_t); - HLK_MIN_PRECISION_TEST(QuadReadAcrossY, HLSLMin16Float_t); - HLK_MIN_PRECISION_TEST(QuadReadAcrossDiagonal, HLSLMin16Float_t); + HLK_TEST(QuadReadLaneAt, HLSLMin16Float_t); + HLK_TEST(QuadReadAcrossX, HLSLMin16Float_t); + HLK_TEST(QuadReadAcrossY, HLSLMin16Float_t); + HLK_TEST(QuadReadAcrossDiagonal, HLSLMin16Float_t); // Wave - HLK_MIN_PRECISION_WAVEOP_TEST(WaveActiveSum, HLSLMin16Float_t); - HLK_MIN_PRECISION_WAVEOP_TEST(WaveActiveMin, HLSLMin16Float_t); - HLK_MIN_PRECISION_WAVEOP_TEST(WaveActiveMax, HLSLMin16Float_t); - HLK_MIN_PRECISION_WAVEOP_TEST(WaveActiveProduct, HLSLMin16Float_t); - HLK_MIN_PRECISION_WAVEOP_TEST(WaveActiveAllEqual, HLSLMin16Float_t); - HLK_MIN_PRECISION_WAVEOP_TEST(WaveReadLaneAt, HLSLMin16Float_t); - HLK_MIN_PRECISION_WAVEOP_TEST(WaveReadLaneFirst, HLSLMin16Float_t); - HLK_MIN_PRECISION_WAVEOP_TEST(WavePrefixSum, HLSLMin16Float_t); - HLK_MIN_PRECISION_WAVEOP_TEST(WavePrefixProduct, HLSLMin16Float_t); - HLK_MIN_PRECISION_WAVEOP_TEST(WaveMultiPrefixSum, HLSLMin16Float_t); - HLK_MIN_PRECISION_WAVEOP_TEST(WaveMultiPrefixProduct, HLSLMin16Float_t); - HLK_MIN_PRECISION_WAVEOP_TEST(WaveMatch, HLSLMin16Float_t); + HLK_WAVEOP_TEST(WaveActiveSum, HLSLMin16Float_t); + HLK_WAVEOP_TEST(WaveActiveMin, HLSLMin16Float_t); + HLK_WAVEOP_TEST(WaveActiveMax, HLSLMin16Float_t); + HLK_WAVEOP_TEST(WaveActiveProduct, HLSLMin16Float_t); + HLK_WAVEOP_TEST(WaveActiveAllEqual, HLSLMin16Float_t); + HLK_WAVEOP_TEST(WaveReadLaneAt, HLSLMin16Float_t); + HLK_WAVEOP_TEST(WaveReadLaneFirst, HLSLMin16Float_t); + HLK_WAVEOP_TEST(WavePrefixSum, HLSLMin16Float_t); + HLK_WAVEOP_TEST(WavePrefixProduct, HLSLMin16Float_t); + HLK_WAVEOP_TEST(WaveMultiPrefixSum, HLSLMin16Float_t); + HLK_WAVEOP_TEST(WaveMultiPrefixProduct, HLSLMin16Float_t); + HLK_WAVEOP_TEST(WaveMatch, HLSLMin16Float_t); // ---- HLSLMin16Int_t (mirrors applicable int16_t ops) ---- // TernaryMath - HLK_MIN_PRECISION_TEST(Mad, HLSLMin16Int_t); + HLK_TEST(Mad, HLSLMin16Int_t); // BinaryMath // Note: Divide and Modulus excluded — HLSL does not support signed integer // division on minimum-precision types. - HLK_MIN_PRECISION_TEST(Add, HLSLMin16Int_t); - HLK_MIN_PRECISION_TEST(Subtract, HLSLMin16Int_t); - HLK_MIN_PRECISION_TEST(Multiply, HLSLMin16Int_t); - HLK_MIN_PRECISION_TEST(Min, HLSLMin16Int_t); - HLK_MIN_PRECISION_TEST(Max, HLSLMin16Int_t); + HLK_TEST(Add, HLSLMin16Int_t); + HLK_TEST(Subtract, HLSLMin16Int_t); + HLK_TEST(Multiply, HLSLMin16Int_t); + HLK_TEST(Min, HLSLMin16Int_t); + HLK_TEST(Max, HLSLMin16Int_t); // Bitwise - HLK_MIN_PRECISION_TEST(And, HLSLMin16Int_t); - HLK_MIN_PRECISION_TEST(Or, HLSLMin16Int_t); - HLK_MIN_PRECISION_TEST(Xor, HLSLMin16Int_t); - HLK_MIN_PRECISION_TEST(LeftShift, HLSLMin16Int_t); - HLK_MIN_PRECISION_TEST(RightShift, HLSLMin16Int_t); + HLK_TEST(And, HLSLMin16Int_t); + HLK_TEST(Or, HLSLMin16Int_t); + HLK_TEST(Xor, HLSLMin16Int_t); + HLK_TEST(LeftShift, HLSLMin16Int_t); + HLK_TEST(RightShift, HLSLMin16Int_t); // Note: ReverseBits, CountBits, FirstBitHigh, FirstBitLow excluded - // DXC promotes min precision to i32 before these intrinsics, so they // don't operate at min precision. // UnaryMath - HLK_MIN_PRECISION_TEST(Abs, HLSLMin16Int_t); - HLK_MIN_PRECISION_TEST(Sign, HLSLMin16Int_t); + HLK_TEST(Abs, HLSLMin16Int_t); + HLK_TEST(Sign, HLSLMin16Int_t); // Unary - HLK_MIN_PRECISION_TEST(Initialize, HLSLMin16Int_t); - HLK_MIN_PRECISION_TEST(ArrayOperator_StaticAccess, HLSLMin16Int_t); - HLK_MIN_PRECISION_TEST(ArrayOperator_DynamicAccess, HLSLMin16Int_t); - HLK_MIN_PRECISION_TEST(ShuffleVector, HLSLMin16Int_t); + HLK_TEST(Initialize, HLSLMin16Int_t); + HLK_TEST(ArrayOperator_StaticAccess, HLSLMin16Int_t); + HLK_TEST(ArrayOperator_DynamicAccess, HLSLMin16Int_t); + HLK_TEST(ShuffleVector, HLSLMin16Int_t); // Cast - HLK_MIN_PRECISION_TEST(CastToBool, HLSLMin16Int_t); - HLK_MIN_PRECISION_TEST(CastToInt32, HLSLMin16Int_t); - HLK_MIN_PRECISION_TEST(CastToInt64, HLSLMin16Int_t); - HLK_MIN_PRECISION_TEST(CastToUint16, HLSLMin16Int_t); - HLK_MIN_PRECISION_TEST(CastToUint32, HLSLMin16Int_t); - HLK_MIN_PRECISION_TEST(CastToUint64, HLSLMin16Int_t); - HLK_MIN_PRECISION_TEST(CastToFloat16, HLSLMin16Int_t); - HLK_MIN_PRECISION_TEST(CastToFloat32, HLSLMin16Int_t); + HLK_TEST(CastToBool, HLSLMin16Int_t); + HLK_TEST(CastToInt32, HLSLMin16Int_t); + HLK_TEST(CastToInt64, HLSLMin16Int_t); + HLK_TEST(CastToUint16, HLSLMin16Int_t); + HLK_TEST(CastToUint32, HLSLMin16Int_t); + HLK_TEST(CastToUint64, HLSLMin16Int_t); + HLK_TEST(CastToFloat16, HLSLMin16Int_t); + HLK_TEST(CastToFloat32, HLSLMin16Int_t); // BinaryComparison - HLK_MIN_PRECISION_TEST(LessThan, HLSLMin16Int_t); - HLK_MIN_PRECISION_TEST(LessEqual, HLSLMin16Int_t); - HLK_MIN_PRECISION_TEST(GreaterThan, HLSLMin16Int_t); - HLK_MIN_PRECISION_TEST(GreaterEqual, HLSLMin16Int_t); - HLK_MIN_PRECISION_TEST(Equal, HLSLMin16Int_t); - HLK_MIN_PRECISION_TEST(NotEqual, HLSLMin16Int_t); + HLK_TEST(LessThan, HLSLMin16Int_t); + HLK_TEST(LessEqual, HLSLMin16Int_t); + HLK_TEST(GreaterThan, HLSLMin16Int_t); + HLK_TEST(GreaterEqual, HLSLMin16Int_t); + HLK_TEST(Equal, HLSLMin16Int_t); + HLK_TEST(NotEqual, HLSLMin16Int_t); // Select - HLK_MIN_PRECISION_TEST(Select, HLSLMin16Int_t); + HLK_TEST(Select, HLSLMin16Int_t); // Reduction - HLK_MIN_PRECISION_TEST(Any_Mixed, HLSLMin16Int_t); - HLK_MIN_PRECISION_TEST(Any_Zero, HLSLMin16Int_t); - HLK_MIN_PRECISION_TEST(Any_NoZero, HLSLMin16Int_t); - HLK_MIN_PRECISION_TEST(All_Mixed, HLSLMin16Int_t); - HLK_MIN_PRECISION_TEST(All_Zero, HLSLMin16Int_t); - HLK_MIN_PRECISION_TEST(All_NoZero, HLSLMin16Int_t); + HLK_TEST(Any_Mixed, HLSLMin16Int_t); + HLK_TEST(Any_Zero, HLSLMin16Int_t); + HLK_TEST(Any_NoZero, HLSLMin16Int_t); + HLK_TEST(All_Mixed, HLSLMin16Int_t); + HLK_TEST(All_Zero, HLSLMin16Int_t); + HLK_TEST(All_NoZero, HLSLMin16Int_t); // LoadAndStore - HLK_MIN_PRECISION_TEST(LoadAndStore_RDH_BAB_SRV, HLSLMin16Int_t); - HLK_MIN_PRECISION_TEST(LoadAndStore_RDH_BAB_UAV, HLSLMin16Int_t); - HLK_MIN_PRECISION_TEST(LoadAndStore_DT_BAB_SRV, HLSLMin16Int_t); - HLK_MIN_PRECISION_TEST(LoadAndStore_DT_BAB_UAV, HLSLMin16Int_t); - HLK_MIN_PRECISION_TEST(LoadAndStore_RD_BAB_SRV, HLSLMin16Int_t); - HLK_MIN_PRECISION_TEST(LoadAndStore_RD_BAB_UAV, HLSLMin16Int_t); - HLK_MIN_PRECISION_TEST(LoadAndStore_RDH_SB_SRV, HLSLMin16Int_t); - HLK_MIN_PRECISION_TEST(LoadAndStore_RDH_SB_UAV, HLSLMin16Int_t); - HLK_MIN_PRECISION_TEST(LoadAndStore_DT_SB_SRV, HLSLMin16Int_t); - HLK_MIN_PRECISION_TEST(LoadAndStore_DT_SB_UAV, HLSLMin16Int_t); - HLK_MIN_PRECISION_TEST(LoadAndStore_RD_SB_SRV, HLSLMin16Int_t); - HLK_MIN_PRECISION_TEST(LoadAndStore_RD_SB_UAV, HLSLMin16Int_t); + HLK_TEST(LoadAndStore_RDH_BAB_SRV, HLSLMin16Int_t); + HLK_TEST(LoadAndStore_RDH_BAB_UAV, HLSLMin16Int_t); + HLK_TEST(LoadAndStore_DT_BAB_SRV, HLSLMin16Int_t); + HLK_TEST(LoadAndStore_DT_BAB_UAV, HLSLMin16Int_t); + HLK_TEST(LoadAndStore_RD_BAB_SRV, HLSLMin16Int_t); + HLK_TEST(LoadAndStore_RD_BAB_UAV, HLSLMin16Int_t); + HLK_TEST(LoadAndStore_RDH_SB_SRV, HLSLMin16Int_t); + HLK_TEST(LoadAndStore_RDH_SB_UAV, HLSLMin16Int_t); + HLK_TEST(LoadAndStore_DT_SB_SRV, HLSLMin16Int_t); + HLK_TEST(LoadAndStore_DT_SB_UAV, HLSLMin16Int_t); + HLK_TEST(LoadAndStore_RD_SB_SRV, HLSLMin16Int_t); + HLK_TEST(LoadAndStore_RD_SB_UAV, HLSLMin16Int_t); // Quad - HLK_MIN_PRECISION_TEST(QuadReadLaneAt, HLSLMin16Int_t); - HLK_MIN_PRECISION_TEST(QuadReadAcrossX, HLSLMin16Int_t); - HLK_MIN_PRECISION_TEST(QuadReadAcrossY, HLSLMin16Int_t); - HLK_MIN_PRECISION_TEST(QuadReadAcrossDiagonal, HLSLMin16Int_t); + HLK_TEST(QuadReadLaneAt, HLSLMin16Int_t); + HLK_TEST(QuadReadAcrossX, HLSLMin16Int_t); + HLK_TEST(QuadReadAcrossY, HLSLMin16Int_t); + HLK_TEST(QuadReadAcrossDiagonal, HLSLMin16Int_t); // Wave - HLK_MIN_PRECISION_WAVEOP_TEST(WaveActiveSum, HLSLMin16Int_t); - HLK_MIN_PRECISION_WAVEOP_TEST(WaveActiveMin, HLSLMin16Int_t); - HLK_MIN_PRECISION_WAVEOP_TEST(WaveActiveMax, HLSLMin16Int_t); - HLK_MIN_PRECISION_WAVEOP_TEST(WaveActiveProduct, HLSLMin16Int_t); - HLK_MIN_PRECISION_WAVEOP_TEST(WaveActiveAllEqual, HLSLMin16Int_t); - HLK_MIN_PRECISION_WAVEOP_TEST(WaveReadLaneAt, HLSLMin16Int_t); - HLK_MIN_PRECISION_WAVEOP_TEST(WaveReadLaneFirst, HLSLMin16Int_t); - HLK_MIN_PRECISION_WAVEOP_TEST(WavePrefixSum, HLSLMin16Int_t); - HLK_MIN_PRECISION_WAVEOP_TEST(WavePrefixProduct, HLSLMin16Int_t); - HLK_MIN_PRECISION_WAVEOP_TEST(WaveMultiPrefixSum, HLSLMin16Int_t); - HLK_MIN_PRECISION_WAVEOP_TEST(WaveMultiPrefixProduct, HLSLMin16Int_t); - HLK_MIN_PRECISION_WAVEOP_TEST(WaveMultiPrefixBitAnd, HLSLMin16Int_t); - HLK_MIN_PRECISION_WAVEOP_TEST(WaveMultiPrefixBitOr, HLSLMin16Int_t); - HLK_MIN_PRECISION_WAVEOP_TEST(WaveMultiPrefixBitXor, HLSLMin16Int_t); - HLK_MIN_PRECISION_WAVEOP_TEST(WaveMatch, HLSLMin16Int_t); + HLK_WAVEOP_TEST(WaveActiveSum, HLSLMin16Int_t); + HLK_WAVEOP_TEST(WaveActiveMin, HLSLMin16Int_t); + HLK_WAVEOP_TEST(WaveActiveMax, HLSLMin16Int_t); + HLK_WAVEOP_TEST(WaveActiveProduct, HLSLMin16Int_t); + HLK_WAVEOP_TEST(WaveActiveAllEqual, HLSLMin16Int_t); + HLK_WAVEOP_TEST(WaveReadLaneAt, HLSLMin16Int_t); + HLK_WAVEOP_TEST(WaveReadLaneFirst, HLSLMin16Int_t); + HLK_WAVEOP_TEST(WavePrefixSum, HLSLMin16Int_t); + HLK_WAVEOP_TEST(WavePrefixProduct, HLSLMin16Int_t); + HLK_WAVEOP_TEST(WaveMultiPrefixSum, HLSLMin16Int_t); + HLK_WAVEOP_TEST(WaveMultiPrefixProduct, HLSLMin16Int_t); + HLK_WAVEOP_TEST(WaveMultiPrefixBitAnd, HLSLMin16Int_t); + HLK_WAVEOP_TEST(WaveMultiPrefixBitOr, HLSLMin16Int_t); + HLK_WAVEOP_TEST(WaveMultiPrefixBitXor, HLSLMin16Int_t); + HLK_WAVEOP_TEST(WaveMatch, HLSLMin16Int_t); // ---- HLSLMin16Uint_t (mirrors applicable uint16_t ops) ---- // TernaryMath - HLK_MIN_PRECISION_TEST(Mad, HLSLMin16Uint_t); + HLK_TEST(Mad, HLSLMin16Uint_t); // BinaryMath - HLK_MIN_PRECISION_TEST(Add, HLSLMin16Uint_t); - HLK_MIN_PRECISION_TEST(Subtract, HLSLMin16Uint_t); - HLK_MIN_PRECISION_TEST(Multiply, HLSLMin16Uint_t); - HLK_MIN_PRECISION_TEST(Divide, HLSLMin16Uint_t); - HLK_MIN_PRECISION_TEST(Modulus, HLSLMin16Uint_t); - HLK_MIN_PRECISION_TEST(Min, HLSLMin16Uint_t); - HLK_MIN_PRECISION_TEST(Max, HLSLMin16Uint_t); + HLK_TEST(Add, HLSLMin16Uint_t); + HLK_TEST(Subtract, HLSLMin16Uint_t); + HLK_TEST(Multiply, HLSLMin16Uint_t); + HLK_TEST(Divide, HLSLMin16Uint_t); + HLK_TEST(Modulus, HLSLMin16Uint_t); + HLK_TEST(Min, HLSLMin16Uint_t); + HLK_TEST(Max, HLSLMin16Uint_t); // Bitwise - HLK_MIN_PRECISION_TEST(And, HLSLMin16Uint_t); - HLK_MIN_PRECISION_TEST(Or, HLSLMin16Uint_t); - HLK_MIN_PRECISION_TEST(Xor, HLSLMin16Uint_t); - HLK_MIN_PRECISION_TEST(LeftShift, HLSLMin16Uint_t); - HLK_MIN_PRECISION_TEST(RightShift, HLSLMin16Uint_t); + HLK_TEST(And, HLSLMin16Uint_t); + HLK_TEST(Or, HLSLMin16Uint_t); + HLK_TEST(Xor, HLSLMin16Uint_t); + HLK_TEST(LeftShift, HLSLMin16Uint_t); + HLK_TEST(RightShift, HLSLMin16Uint_t); // Note: ReverseBits, CountBits, FirstBitHigh, FirstBitLow excluded - // DXC promotes min precision to i32 before these intrinsics, so they // don't operate at min precision. // UnaryMath - HLK_MIN_PRECISION_TEST(Abs, HLSLMin16Uint_t); - HLK_MIN_PRECISION_TEST(Sign, HLSLMin16Uint_t); + HLK_TEST(Abs, HLSLMin16Uint_t); + HLK_TEST(Sign, HLSLMin16Uint_t); // Unary - HLK_MIN_PRECISION_TEST(Initialize, HLSLMin16Uint_t); - HLK_MIN_PRECISION_TEST(ArrayOperator_StaticAccess, HLSLMin16Uint_t); - HLK_MIN_PRECISION_TEST(ArrayOperator_DynamicAccess, HLSLMin16Uint_t); - HLK_MIN_PRECISION_TEST(ShuffleVector, HLSLMin16Uint_t); + HLK_TEST(Initialize, HLSLMin16Uint_t); + HLK_TEST(ArrayOperator_StaticAccess, HLSLMin16Uint_t); + HLK_TEST(ArrayOperator_DynamicAccess, HLSLMin16Uint_t); + HLK_TEST(ShuffleVector, HLSLMin16Uint_t); // Cast - HLK_MIN_PRECISION_TEST(CastToBool, HLSLMin16Uint_t); - HLK_MIN_PRECISION_TEST(CastToInt16, HLSLMin16Uint_t); - HLK_MIN_PRECISION_TEST(CastToInt32, HLSLMin16Uint_t); - HLK_MIN_PRECISION_TEST(CastToInt64, HLSLMin16Uint_t); - HLK_MIN_PRECISION_TEST(CastToUint32, HLSLMin16Uint_t); - HLK_MIN_PRECISION_TEST(CastToUint64, HLSLMin16Uint_t); - HLK_MIN_PRECISION_TEST(CastToFloat16, HLSLMin16Uint_t); - HLK_MIN_PRECISION_TEST(CastToFloat32, HLSLMin16Uint_t); + HLK_TEST(CastToBool, HLSLMin16Uint_t); + HLK_TEST(CastToInt16, HLSLMin16Uint_t); + HLK_TEST(CastToInt32, HLSLMin16Uint_t); + HLK_TEST(CastToInt64, HLSLMin16Uint_t); + HLK_TEST(CastToUint32, HLSLMin16Uint_t); + HLK_TEST(CastToUint64, HLSLMin16Uint_t); + HLK_TEST(CastToFloat16, HLSLMin16Uint_t); + HLK_TEST(CastToFloat32, HLSLMin16Uint_t); // BinaryComparison - HLK_MIN_PRECISION_TEST(LessThan, HLSLMin16Uint_t); - HLK_MIN_PRECISION_TEST(LessEqual, HLSLMin16Uint_t); - HLK_MIN_PRECISION_TEST(GreaterThan, HLSLMin16Uint_t); - HLK_MIN_PRECISION_TEST(GreaterEqual, HLSLMin16Uint_t); - HLK_MIN_PRECISION_TEST(Equal, HLSLMin16Uint_t); - HLK_MIN_PRECISION_TEST(NotEqual, HLSLMin16Uint_t); + HLK_TEST(LessThan, HLSLMin16Uint_t); + HLK_TEST(LessEqual, HLSLMin16Uint_t); + HLK_TEST(GreaterThan, HLSLMin16Uint_t); + HLK_TEST(GreaterEqual, HLSLMin16Uint_t); + HLK_TEST(Equal, HLSLMin16Uint_t); + HLK_TEST(NotEqual, HLSLMin16Uint_t); // Select - HLK_MIN_PRECISION_TEST(Select, HLSLMin16Uint_t); + HLK_TEST(Select, HLSLMin16Uint_t); // LoadAndStore - HLK_MIN_PRECISION_TEST(LoadAndStore_RDH_BAB_SRV, HLSLMin16Uint_t); - HLK_MIN_PRECISION_TEST(LoadAndStore_RDH_BAB_UAV, HLSLMin16Uint_t); - HLK_MIN_PRECISION_TEST(LoadAndStore_DT_BAB_SRV, HLSLMin16Uint_t); - HLK_MIN_PRECISION_TEST(LoadAndStore_DT_BAB_UAV, HLSLMin16Uint_t); - HLK_MIN_PRECISION_TEST(LoadAndStore_RD_BAB_SRV, HLSLMin16Uint_t); - HLK_MIN_PRECISION_TEST(LoadAndStore_RD_BAB_UAV, HLSLMin16Uint_t); - HLK_MIN_PRECISION_TEST(LoadAndStore_RDH_SB_SRV, HLSLMin16Uint_t); - HLK_MIN_PRECISION_TEST(LoadAndStore_RDH_SB_UAV, HLSLMin16Uint_t); - HLK_MIN_PRECISION_TEST(LoadAndStore_DT_SB_SRV, HLSLMin16Uint_t); - HLK_MIN_PRECISION_TEST(LoadAndStore_DT_SB_UAV, HLSLMin16Uint_t); - HLK_MIN_PRECISION_TEST(LoadAndStore_RD_SB_SRV, HLSLMin16Uint_t); - HLK_MIN_PRECISION_TEST(LoadAndStore_RD_SB_UAV, HLSLMin16Uint_t); + HLK_TEST(LoadAndStore_RDH_BAB_SRV, HLSLMin16Uint_t); + HLK_TEST(LoadAndStore_RDH_BAB_UAV, HLSLMin16Uint_t); + HLK_TEST(LoadAndStore_DT_BAB_SRV, HLSLMin16Uint_t); + HLK_TEST(LoadAndStore_DT_BAB_UAV, HLSLMin16Uint_t); + HLK_TEST(LoadAndStore_RD_BAB_SRV, HLSLMin16Uint_t); + HLK_TEST(LoadAndStore_RD_BAB_UAV, HLSLMin16Uint_t); + HLK_TEST(LoadAndStore_RDH_SB_SRV, HLSLMin16Uint_t); + HLK_TEST(LoadAndStore_RDH_SB_UAV, HLSLMin16Uint_t); + HLK_TEST(LoadAndStore_DT_SB_SRV, HLSLMin16Uint_t); + HLK_TEST(LoadAndStore_DT_SB_UAV, HLSLMin16Uint_t); + HLK_TEST(LoadAndStore_RD_SB_SRV, HLSLMin16Uint_t); + HLK_TEST(LoadAndStore_RD_SB_UAV, HLSLMin16Uint_t); // Quad - HLK_MIN_PRECISION_TEST(QuadReadLaneAt, HLSLMin16Uint_t); - HLK_MIN_PRECISION_TEST(QuadReadAcrossX, HLSLMin16Uint_t); - HLK_MIN_PRECISION_TEST(QuadReadAcrossY, HLSLMin16Uint_t); - HLK_MIN_PRECISION_TEST(QuadReadAcrossDiagonal, HLSLMin16Uint_t); + HLK_TEST(QuadReadLaneAt, HLSLMin16Uint_t); + HLK_TEST(QuadReadAcrossX, HLSLMin16Uint_t); + HLK_TEST(QuadReadAcrossY, HLSLMin16Uint_t); + HLK_TEST(QuadReadAcrossDiagonal, HLSLMin16Uint_t); // Wave - HLK_MIN_PRECISION_WAVEOP_TEST(WaveActiveSum, HLSLMin16Uint_t); - HLK_MIN_PRECISION_WAVEOP_TEST(WaveActiveMin, HLSLMin16Uint_t); - HLK_MIN_PRECISION_WAVEOP_TEST(WaveActiveMax, HLSLMin16Uint_t); - HLK_MIN_PRECISION_WAVEOP_TEST(WaveActiveProduct, HLSLMin16Uint_t); - HLK_MIN_PRECISION_WAVEOP_TEST(WaveActiveAllEqual, HLSLMin16Uint_t); - HLK_MIN_PRECISION_WAVEOP_TEST(WaveReadLaneAt, HLSLMin16Uint_t); - HLK_MIN_PRECISION_WAVEOP_TEST(WaveReadLaneFirst, HLSLMin16Uint_t); - HLK_MIN_PRECISION_WAVEOP_TEST(WavePrefixSum, HLSLMin16Uint_t); - HLK_MIN_PRECISION_WAVEOP_TEST(WavePrefixProduct, HLSLMin16Uint_t); - HLK_MIN_PRECISION_WAVEOP_TEST(WaveMultiPrefixSum, HLSLMin16Uint_t); - HLK_MIN_PRECISION_WAVEOP_TEST(WaveMultiPrefixProduct, HLSLMin16Uint_t); - HLK_MIN_PRECISION_WAVEOP_TEST(WaveMultiPrefixBitAnd, HLSLMin16Uint_t); - HLK_MIN_PRECISION_WAVEOP_TEST(WaveMultiPrefixBitOr, HLSLMin16Uint_t); - HLK_MIN_PRECISION_WAVEOP_TEST(WaveMultiPrefixBitXor, HLSLMin16Uint_t); - HLK_MIN_PRECISION_WAVEOP_TEST(WaveMatch, HLSLMin16Uint_t); + HLK_WAVEOP_TEST(WaveActiveSum, HLSLMin16Uint_t); + HLK_WAVEOP_TEST(WaveActiveMin, HLSLMin16Uint_t); + HLK_WAVEOP_TEST(WaveActiveMax, HLSLMin16Uint_t); + HLK_WAVEOP_TEST(WaveActiveProduct, HLSLMin16Uint_t); + HLK_WAVEOP_TEST(WaveActiveAllEqual, HLSLMin16Uint_t); + HLK_WAVEOP_TEST(WaveReadLaneAt, HLSLMin16Uint_t); + HLK_WAVEOP_TEST(WaveReadLaneFirst, HLSLMin16Uint_t); + HLK_WAVEOP_TEST(WavePrefixSum, HLSLMin16Uint_t); + HLK_WAVEOP_TEST(WavePrefixProduct, HLSLMin16Uint_t); + HLK_WAVEOP_TEST(WaveMultiPrefixSum, HLSLMin16Uint_t); + HLK_WAVEOP_TEST(WaveMultiPrefixProduct, HLSLMin16Uint_t); + HLK_WAVEOP_TEST(WaveMultiPrefixBitAnd, HLSLMin16Uint_t); + HLK_WAVEOP_TEST(WaveMultiPrefixBitOr, HLSLMin16Uint_t); + HLK_WAVEOP_TEST(WaveMultiPrefixBitXor, HLSLMin16Uint_t); + HLK_WAVEOP_TEST(WaveMatch, HLSLMin16Uint_t); }; #define HLK_TEST_DOUBLE(Op, DataType) \ diff --git a/tools/clang/unittests/HLSLExec/ShaderOpArith.xml b/tools/clang/unittests/HLSLExec/ShaderOpArith.xml index 0677d4d16c..319d468a43 100644 --- a/tools/clang/unittests/HLSLExec/ShaderOpArith.xml +++ b/tools/clang/unittests/HLSLExec/ShaderOpArith.xml @@ -3796,11 +3796,7 @@ void MSMain(uint GID : SV_GroupIndex, data; - #else vector data; - #endif }; RWStructuredBuffer InputVector : register(u0); RWStructuredBuffer OutputVector: register(u1); @@ -3812,20 +3808,10 @@ void MSMain(uint GID : SV_GroupIndex, [numthreads(1,1,1)] void main(uint GI : SV_GroupIndex) { #if USE_STRUCTURED_BUFFER - #ifdef MIN_PRECISION - vector temp = (vector)InputVector[0].data; - OutputVector[0].data = (vector)temp; - #else OutputVector[0].data = InputVector[0].data; - #endif #else - #ifdef MIN_PRECISION - vector Input = (vector)InputVector.Load< vector >(0); - OutputVector.Store< vector >(0, (vector)Input); - #else vector Input = InputVector.Load< vector >(0); OutputVector.Store< vector >(0, Input); - #endif #endif }; ]]> @@ -3849,11 +3835,7 @@ void MSMain(uint GID : SV_GroupIndex, data; - #else vector data; - #endif }; StructuredBuffer InputVector : register(t0); RWStructuredBuffer OutputVector : register(u1); @@ -3865,20 +3847,10 @@ void MSMain(uint GID : SV_GroupIndex, [numthreads(1,1,1)] void main(uint GI : SV_GroupIndex) { #if USE_STRUCTURED_BUFFER - #ifdef MIN_PRECISION - vector temp = (vector)InputVector[0].data; - OutputVector[0].data = (vector)temp; - #else OutputVector[0].data = InputVector[0].data; - #endif #else - #ifdef MIN_PRECISION - vector Input = (vector)InputVector.Load< vector >(0); - OutputVector.Store< vector >(0, (vector)Input); - #else vector Input = InputVector.Load< vector >(0); OutputVector.Store< vector >(0, Input); - #endif #endif }; ]]> @@ -3910,11 +3882,7 @@ void MSMain(uint GID : SV_GroupIndex, #if USE_STRUCTURED_BUFFER struct SLongVec { - #ifdef MIN_PRECISION - vector data; - #else vector data; - #endif }; RWStructuredBuffer InputVector : register(u0); RWStructuredBuffer OutputVector: register(u1); @@ -3928,20 +3896,10 @@ void MSMain(uint GID : SV_GroupIndex, void main(uint GI : SV_GroupIndex) { #if USE_STRUCTURED_BUFFER - #ifdef MIN_PRECISION - vector temp = (vector)InputVector[0].data; - OutputVector[0].data = (vector)temp; - #else OutputVector[0].data = InputVector[0].data; - #endif #else - #ifdef MIN_PRECISION - vector Input = (vector)InputVector.Load< vector >(0); - OutputVector.Store< vector >(0, (vector)Input); - #else vector Input = InputVector.Load< vector >(0); OutputVector.Store< vector >(0, Input); - #endif #endif }; @@ -3972,11 +3930,7 @@ void MSMain(uint GID : SV_GroupIndex, data; - #else vector data; - #endif }; StructuredBuffer InputVector : register(t0); RWStructuredBuffer OutputVector: register(u0); @@ -3988,20 +3942,10 @@ void MSMain(uint GID : SV_GroupIndex, [numthreads(1,1,1)] void main(uint GI : SV_GroupIndex) { #if USE_STRUCTURED_BUFFER - #ifdef MIN_PRECISION - vector temp = (vector)InputVector[0].data; - OutputVector[0].data = (vector)temp; - #else OutputVector[0].data = InputVector[0].data; - #endif #else - #ifdef MIN_PRECISION - vector Input = (vector)InputVector.Load< vector >(0); - OutputVector.Store< vector >(0, (vector)Input); - #else vector Input = InputVector.Load< vector >(0); OutputVector.Store< vector >(0, Input); - #endif #endif }; ]]> @@ -4029,11 +3973,7 @@ void MSMain(uint GID : SV_GroupIndex, data; - #else vector data; - #endif }; #endif @@ -4043,23 +3983,14 @@ void MSMain(uint GID : SV_GroupIndex, #if USE_STRUCTURED_BUFFER StructuredBuffer InputVector = ResourceDescriptorHeap[0]; RWStructuredBuffer OutputVector = ResourceDescriptorHeap[1]; - #ifdef MIN_PRECISION - vector temp = (vector)InputVector[0].data; - OutputVector[0].data = (vector)temp; - #else OutputVector[0].data = InputVector[0].data; - #endif #else ByteAddressBuffer InputVector = ResourceDescriptorHeap[0]; RWByteAddressBuffer OutputVector = ResourceDescriptorHeap[1]; - #ifdef MIN_PRECISION - vector Input = (vector)InputVector.Load< vector >(0); - OutputVector.Store< vector >(0, (vector)Input); - #else vector Input = InputVector.Load< vector >(0); + OutputVector.Store< vector >(0, Input); - #endif #endif }; ]]> @@ -4086,11 +4017,7 @@ void MSMain(uint GID : SV_GroupIndex, data; - #else vector data; - #endif }; #endif @@ -4099,23 +4026,14 @@ void MSMain(uint GID : SV_GroupIndex, #if USE_STRUCTURED_BUFFER RWStructuredBuffer InputVector = ResourceDescriptorHeap[0]; RWStructuredBuffer OutputVector = ResourceDescriptorHeap[1]; - #ifdef MIN_PRECISION - vector temp = (vector)InputVector[0].data; - OutputVector[0].data = (vector)temp; - #else OutputVector[0].data = InputVector[0].data; - #endif #else RWByteAddressBuffer InputVector = ResourceDescriptorHeap[0]; RWByteAddressBuffer OutputVector = ResourceDescriptorHeap[1]; - #ifdef MIN_PRECISION - vector Input = (vector)InputVector.Load< vector >(0); - OutputVector.Store< vector >(0, (vector)Input); - #else vector Input = InputVector.Load< vector >(0); + OutputVector.Store< vector >(0, Input); - #endif #endif }; ]]> @@ -4143,15 +4061,6 @@ void MSMain(uint GID : SV_GroupIndex, OUT_TYPE : The type of the output vector, e.g. float, double, int, uint. In most cases OUT_TYPE == TYPE. - BUFFER_TYPE : (Min precision only) Full-precision type used for buffer Load/Store. - Maps min16float/min16int/min16uint to float/int/uint for - deterministic buffer layout. Only defined when MIN_PRECISION is set; - otherwise falls back to TYPE via #ifndef in the shader. - BUFFER_OUT_TYPE : (Min precision only) Same as BUFFER_TYPE but derived from OUT_TYPE. - Falls back to OUT_TYPE via #ifndef when not defined. - MIN_PRECISION : Defined when TYPE is a min precision type. Gates the - BUFFER_TYPE/BUFFER_OUT_TYPE load/store paths in shader templates. - NUM : The number of elements in the vector, e.g. 2, 3, 4, 8, 16, 32, FUNC : Used to expand to the HLSL intrinsic being tested. e.g cos, cosh, @@ -4174,16 +4083,6 @@ void MSMain(uint GID : SV_GroupIndex, RWByteAddressBuffer g_InputVector3 : register(u2); RWByteAddressBuffer g_OutputVector : register(u3); - // For min precision types, BUFFER_TYPE/BUFFER_OUT_TYPE are defined as - // full-precision equivalents for deterministic buffer I/O layout. - // For all other types, fall back to TYPE/OUT_TYPE (no-op). - #ifndef BUFFER_TYPE - #define BUFFER_TYPE TYPE - #endif - #ifndef BUFFER_OUT_TYPE - #define BUFFER_OUT_TYPE OUT_TYPE - #endif - #define IS_UNARY_OP (BASIC_OP_TYPE == 0x1) #define IS_BINARY_OP (BASIC_OP_TYPE == 0x2) #define IS_TERNARY_OP (BASIC_OP_TYPE == 0x3) @@ -4381,11 +4280,7 @@ void MSMain(uint GID : SV_GroupIndex, Vector = WavePrefixSum(Vector); if(WaveGetLaneIndex() == MidLane) { - #ifdef MIN_PRECISION - g_OutputVector.Store< vector >(0, Vector); - #else g_OutputVector.Store< vector >(0, Vector); - #endif } } #endif @@ -4396,11 +4291,7 @@ void MSMain(uint GID : SV_GroupIndex, Vector = WavePrefixProduct(Vector); if(WaveGetLaneIndex() == 2) { - #ifdef MIN_PRECISION - g_OutputVector.Store< vector >(0, Vector); - #else g_OutputVector.Store< vector >(0, Vector); - #endif } } #endif @@ -4433,11 +4324,7 @@ void MSMain(uint GID : SV_GroupIndex, { // Lane 3 is the last lane in the mask that we care about. Store the // result from it. - #ifdef MIN_PRECISION - g_OutputVector.Store< vector >(0, Vector); - #else g_OutputVector.Store< vector >(0, Vector); - #endif } } #endif @@ -4469,11 +4356,7 @@ void MSMain(uint GID : SV_GroupIndex, if(WaveGetLaneIndex() == 3) { // Lane 3 is the last lane in the mask. Store the result from it. - #ifdef MIN_PRECISION - g_OutputVector.Store< vector >(0, Vector); - #else g_OutputVector.Store< vector >(0, Vector); - #endif } } #endif @@ -4504,11 +4387,7 @@ void MSMain(uint GID : SV_GroupIndex, if(WaveGetLaneIndex() == 3) { // Lane 3 is the last lane in the mask. Store the result from it. - #ifdef MIN_PRECISION - g_OutputVector.Store< vector >(0, Vector); - #else g_OutputVector.Store< vector >(0, Vector); - #endif } } #endif @@ -4546,11 +4425,7 @@ void MSMain(uint GID : SV_GroupIndex, if(WaveGetLaneIndex() == 3) { // Lane 3 is the last lane in the mask. Store the result from it. - #ifdef MIN_PRECISION - g_OutputVector.Store< vector >(0, Vector); - #else g_OutputVector.Store< vector >(0, Vector); - #endif } } #endif @@ -4591,11 +4466,7 @@ void MSMain(uint GID : SV_GroupIndex, if(WaveGetLaneIndex() == 3) { // Store result from lane 3 (last lane in mask) - #ifdef MIN_PRECISION - g_OutputVector.Store< vector >(0, Vector); - #else g_OutputVector.Store< vector >(0, Vector); - #endif } } #endif @@ -4686,7 +4557,7 @@ void MSMain(uint GID : SV_GroupIndex, // lane we arbitrarily chose for validation with fine derivatives. if(LaneIndex == 3) { - g_OutputVector.Store< vector >(0, Result); + g_OutputVector.Store< vector >(0, Result); } } #endif @@ -4715,7 +4586,7 @@ void MSMain(uint GID : SV_GroupIndex, if(LaneIndex == 3) { - g_OutputVector.Store< vector >(0, Result); + g_OutputVector.Store< vector >(0, Result); } } #endif @@ -4738,34 +4609,21 @@ void MSMain(uint GID : SV_GroupIndex, #ifdef FUNC_SHUFFLE_VECTOR // For shuffle vector, the input is a scalar, not a vector. - #ifdef MIN_PRECISION - TYPE Input1 = (TYPE)g_InputVector1.Load(0); - #else TYPE Input1 = g_InputVector1.Load(0); - #endif #else // For all other basic op types the first input is always a vector. - #ifdef MIN_PRECISION - vector Input1 = (vector)g_InputVector1.Load< vector >(0); - #else - vector Input1 = g_InputVector1.Load< vector >(0); - #endif + vector Input1 = g_InputVector1.Load< vector >(0); #endif #if (IS_BINARY_OP || IS_TERNARY_OP) - #ifdef MIN_PRECISION - vector Input2 = (vector)g_InputVector2.Load< vector >(0); - #else - vector Input2 = g_InputVector2.Load< vector >(0); - #endif + vector Input2 = g_InputVector2.Load< vector >(0); #endif #if IS_TERNARY_OP - #ifdef MIN_PRECISION - vector Input3 = (vector)g_InputVector3.Load< vector >(0); - #else - vector Input3 = g_InputVector3.Load< vector >(0); - #endif + vector Input3 = g_InputVector3.Load< vector >(0); #endif #ifdef IS_REDUCTION_OP @@ -4818,7 +4676,7 @@ void MSMain(uint GID : SV_GroupIndex, OutputVector = FUNC(Input1, Input2, Input3); #endif - g_OutputVector.Store< vector >(0, OutputVector); + g_OutputVector.Store< vector >(0, OutputVector); }; ]]>