diff --git a/tools/clang/unittests/HLSLExec/LongVectorTestData.h b/tools/clang/unittests/HLSLExec/LongVectorTestData.h index 4126d861ac..876ff2dab6 100644 --- a/tools/clang/unittests/HLSLExec/LongVectorTestData.h +++ b/tools/clang/unittests/HLSLExec/LongVectorTestData.h @@ -237,6 +237,227 @@ struct HLSLHalf_t { DirectX::PackedVector::HALF Val = 0; }; +// Min precision wrapper types. Without -enable-16bit-types, min precision types +// are 32-bit in DXIL storage. These thin wrappers provide distinct C++ types +// that map to different HLSL type strings via DATA_TYPE. +struct HLSLMin16Float_t { + constexpr HLSLMin16Float_t() : Val(0.0f) {} + constexpr HLSLMin16Float_t(float F) : Val(F) {} + constexpr HLSLMin16Float_t(double D) : Val(static_cast(D)) {} + explicit constexpr HLSLMin16Float_t(int I) : Val(static_cast(I)) {} + explicit constexpr HLSLMin16Float_t(uint32_t U) + : Val(static_cast(U)) {} + + constexpr operator float() const { return Val; } + + bool operator==(const HLSLMin16Float_t &O) const { return Val == O.Val; } + bool operator!=(const HLSLMin16Float_t &O) const { return Val != O.Val; } + bool operator<(const HLSLMin16Float_t &O) const { return Val < O.Val; } + bool operator>(const HLSLMin16Float_t &O) const { return Val > O.Val; } + bool operator<=(const HLSLMin16Float_t &O) const { return Val <= O.Val; } + bool operator>=(const HLSLMin16Float_t &O) const { return Val >= O.Val; } + + HLSLMin16Float_t operator+(const HLSLMin16Float_t &O) const { + return HLSLMin16Float_t(Val + O.Val); + } + HLSLMin16Float_t operator-(const HLSLMin16Float_t &O) const { + return HLSLMin16Float_t(Val - O.Val); + } + HLSLMin16Float_t operator*(const HLSLMin16Float_t &O) const { + return HLSLMin16Float_t(Val * O.Val); + } + HLSLMin16Float_t operator/(const HLSLMin16Float_t &O) const { + return HLSLMin16Float_t(Val / O.Val); + } + HLSLMin16Float_t operator%(const HLSLMin16Float_t &O) const { + return HLSLMin16Float_t(std::fmod(Val, O.Val)); + } + + friend std::wostream &operator<<(std::wostream &Os, + const HLSLMin16Float_t &Obj) { + Os << Obj.Val; + return Os; + } + friend std::ostream &operator<<(std::ostream &Os, + const HLSLMin16Float_t &Obj) { + Os << Obj.Val; + return Os; + } + + float Val; +}; +struct HLSLMin16Int_t { + constexpr HLSLMin16Int_t() : Val(0) {} + constexpr HLSLMin16Int_t(int32_t I) : Val(I) {} + constexpr HLSLMin16Int_t(int64_t I) : Val(static_cast(I)) {} + constexpr HLSLMin16Int_t(uint32_t U) : Val(static_cast(U)) {} + constexpr HLSLMin16Int_t(uint64_t U) : Val(static_cast(U)) {} + constexpr HLSLMin16Int_t(float F) : Val(static_cast(F)) {} + constexpr HLSLMin16Int_t(double D) : Val(static_cast(D)) {} + + constexpr operator int32_t() const { return Val; } + + bool operator==(const HLSLMin16Int_t &O) const { return Val == O.Val; } + bool operator!=(const HLSLMin16Int_t &O) const { return Val != O.Val; } + bool operator<(const HLSLMin16Int_t &O) const { return Val < O.Val; } + bool operator>(const HLSLMin16Int_t &O) const { return Val > O.Val; } + bool operator<=(const HLSLMin16Int_t &O) const { return Val <= O.Val; } + bool operator>=(const HLSLMin16Int_t &O) const { return Val >= O.Val; } + + HLSLMin16Int_t operator+(const HLSLMin16Int_t &O) const { + return HLSLMin16Int_t(Val + O.Val); + } + HLSLMin16Int_t operator-(const HLSLMin16Int_t &O) const { + return HLSLMin16Int_t(Val - O.Val); + } + HLSLMin16Int_t operator*(const HLSLMin16Int_t &O) const { + return HLSLMin16Int_t(Val * O.Val); + } + HLSLMin16Int_t operator/(const HLSLMin16Int_t &O) const { + return HLSLMin16Int_t(Val / O.Val); + } + HLSLMin16Int_t operator%(const HLSLMin16Int_t &O) const { + return HLSLMin16Int_t(Val % O.Val); + } + HLSLMin16Int_t operator&(const HLSLMin16Int_t &O) const { + return HLSLMin16Int_t(Val & O.Val); + } + HLSLMin16Int_t operator|(const HLSLMin16Int_t &O) const { + return HLSLMin16Int_t(Val | O.Val); + } + HLSLMin16Int_t operator^(const HLSLMin16Int_t &O) const { + return HLSLMin16Int_t(Val ^ O.Val); + } + HLSLMin16Int_t operator<<(const HLSLMin16Int_t &O) const { + return HLSLMin16Int_t(Val << O.Val); + } + HLSLMin16Int_t operator>>(const HLSLMin16Int_t &O) const { + return HLSLMin16Int_t(Val >> O.Val); + } + HLSLMin16Int_t operator~() const { return HLSLMin16Int_t(~Val); } + HLSLMin16Int_t &operator<<=(const HLSLMin16Int_t &O) { + Val <<= O.Val; + return *this; + } + HLSLMin16Int_t &operator>>=(const HLSLMin16Int_t &O) { + Val >>= O.Val; + return *this; + } + HLSLMin16Int_t &operator|=(const HLSLMin16Int_t &O) { + Val |= O.Val; + return *this; + } + HLSLMin16Int_t &operator&=(const HLSLMin16Int_t &O) { + Val &= O.Val; + return *this; + } + HLSLMin16Int_t &operator^=(const HLSLMin16Int_t &O) { + Val ^= O.Val; + return *this; + } + HLSLMin16Int_t operator&&(const HLSLMin16Int_t &O) const { + return HLSLMin16Int_t(Val && O.Val); + } + HLSLMin16Int_t operator||(const HLSLMin16Int_t &O) const { + return HLSLMin16Int_t(Val || O.Val); + } + friend std::wostream &operator<<(std::wostream &Os, + const HLSLMin16Int_t &Obj) { + Os << Obj.Val; + return Os; + } + friend std::ostream &operator<<(std::ostream &Os, const HLSLMin16Int_t &Obj) { + Os << Obj.Val; + return Os; + } + + int32_t Val; +}; +struct HLSLMin16Uint_t { + constexpr HLSLMin16Uint_t() : Val(0) {} + constexpr HLSLMin16Uint_t(uint32_t U) : Val(U) {} + constexpr HLSLMin16Uint_t(uint64_t U) : Val(static_cast(U)) {} + constexpr HLSLMin16Uint_t(int32_t I) : Val(static_cast(I)) {} + constexpr HLSLMin16Uint_t(float F) : Val(static_cast(F)) {} + constexpr HLSLMin16Uint_t(double D) : Val(static_cast(D)) {} + + constexpr operator uint32_t() const { return Val; } + + bool operator==(const HLSLMin16Uint_t &O) const { return Val == O.Val; } + bool operator!=(const HLSLMin16Uint_t &O) const { return Val != O.Val; } + bool operator<(const HLSLMin16Uint_t &O) const { return Val < O.Val; } + bool operator>(const HLSLMin16Uint_t &O) const { return Val > O.Val; } + bool operator<=(const HLSLMin16Uint_t &O) const { return Val <= O.Val; } + bool operator>=(const HLSLMin16Uint_t &O) const { return Val >= O.Val; } + + HLSLMin16Uint_t operator+(const HLSLMin16Uint_t &O) const { + return HLSLMin16Uint_t(Val + O.Val); + } + HLSLMin16Uint_t operator-(const HLSLMin16Uint_t &O) const { + return HLSLMin16Uint_t(Val - O.Val); + } + HLSLMin16Uint_t operator*(const HLSLMin16Uint_t &O) const { + return HLSLMin16Uint_t(Val * O.Val); + } + HLSLMin16Uint_t operator/(const HLSLMin16Uint_t &O) const { + return HLSLMin16Uint_t(Val / O.Val); + } + HLSLMin16Uint_t operator%(const HLSLMin16Uint_t &O) const { + return HLSLMin16Uint_t(Val % O.Val); + } + HLSLMin16Uint_t operator&(const HLSLMin16Uint_t &O) const { + return HLSLMin16Uint_t(Val & O.Val); + } + HLSLMin16Uint_t operator|(const HLSLMin16Uint_t &O) const { + return HLSLMin16Uint_t(Val | O.Val); + } + HLSLMin16Uint_t operator^(const HLSLMin16Uint_t &O) const { + return HLSLMin16Uint_t(Val ^ O.Val); + } + HLSLMin16Uint_t operator<<(const HLSLMin16Uint_t &O) const { + return HLSLMin16Uint_t(Val << O.Val); + } + HLSLMin16Uint_t operator>>(const HLSLMin16Uint_t &O) const { + return HLSLMin16Uint_t(Val >> O.Val); + } + HLSLMin16Uint_t operator~() const { return HLSLMin16Uint_t(~Val); } + HLSLMin16Uint_t &operator<<=(const HLSLMin16Uint_t &O) { + Val <<= O.Val; + return *this; + } + HLSLMin16Uint_t &operator>>=(const HLSLMin16Uint_t &O) { + Val >>= O.Val; + return *this; + } + HLSLMin16Uint_t &operator|=(const HLSLMin16Uint_t &O) { + Val |= O.Val; + return *this; + } + HLSLMin16Uint_t &operator&=(const HLSLMin16Uint_t &O) { + Val &= O.Val; + return *this; + } + HLSLMin16Uint_t &operator^=(const HLSLMin16Uint_t &O) { + Val ^= O.Val; + return *this; + } + + bool operator&&(const HLSLMin16Uint_t &O) const { return Val && O.Val; } + bool operator||(const HLSLMin16Uint_t &O) const { return Val || O.Val; } + friend std::wostream &operator<<(std::wostream &Os, + const HLSLMin16Uint_t &Obj) { + Os << Obj.Val; + return Os; + } + friend std::ostream &operator<<(std::ostream &Os, + const HLSLMin16Uint_t &Obj) { + Os << Obj.Val; + return Os; + } + + uint32_t Val; +}; + enum class InputSet { #define INPUT_SET(SYMBOL) SYMBOL, #include "LongVectorOps.def" @@ -450,6 +671,57 @@ INPUT_SET(InputSet::SelectCond, 0.0, 1.0); INPUT_SET(InputSet::AllOnes, 1.0); END_INPUT_SETS() +// Min precision input sets. All values are exactly representable in float16 +// to avoid precision mismatch between CPU-side expected values and GPU-side +// min precision computation. No FP specials (INF/NaN/denorm) as min precision +// types do not support them. +BEGIN_INPUT_SETS(HLSLMin16Float_t) +INPUT_SET(InputSet::Default1, -1.0f, -1.0f, 1.0f, -0.03125f, 1.0f, -0.03125f, + 1.0f, -0.03125f, 1.0f, -0.03125f); +INPUT_SET(InputSet::Default2, 1.0f, -1.0f, 1.0f, -1.0f, 1.0f, -1.0f, 1.0f, + -1.0f, 1.0f, -1.0f); +INPUT_SET(InputSet::Default3, -1.0f, 1.0f, -1.0f, 1.0f, -1.0f, 1.0f, -1.0f, + 1.0f, -1.0f, 1.0f); +INPUT_SET(InputSet::Zero, 0.0f); +INPUT_SET(InputSet::RangeHalfPi, -1.0625f, 0.046875f, -1.046875f, 0.3125f, + 1.4375f, -0.875f, 1.375f, -0.71875f, -0.8125f, 0.5625f); +INPUT_SET(InputSet::RangeOne, 0.328125f, 0.71875f, -0.953125f, 0.671875f, + -0.03125f, 0.5f, 0.84375f, -0.671875f, -0.6875f, -0.90625f); +INPUT_SET(InputSet::Positive, 1.0f, 1.0f, 342.0f, 0.03125f, 5504.0f, 0.03125f, + 1.0f, 0.03125f, 331.25f, 3250.0f); +INPUT_SET(InputSet::SelectCond, 0.0f, 1.0f); +INPUT_SET(InputSet::AllOnes, 1.0f); +END_INPUT_SETS() + +// Values constrained to int16 range. Kept small to avoid overflow ambiguity. +// Shift amounts limited so results fit in int16 (-32768..32767). +BEGIN_INPUT_SETS(HLSLMin16Int_t) +INPUT_SET(InputSet::Default1, -6, 1, 7, 3, 8, 4, -3, 8, 8, -2); +INPUT_SET(InputSet::Default2, 5, -6, -3, -2, 9, 3, 1, -3, -7, 2); +INPUT_SET(InputSet::Default3, -5, 6, 3, 2, -9, -3, -1, 3, 7, -2); +INPUT_SET(InputSet::BitShiftRhs, 1, 6, 3, 0, 9, 3, 12, 11, 11, 14); +INPUT_SET(InputSet::Zero, 0); +INPUT_SET(InputSet::NoZero, 1); +INPUT_SET(InputSet::SelectCond, 0, 1); +INPUT_SET(InputSet::AllOnes, 1); +INPUT_SET(InputSet::WaveMultiPrefixBitwise, 0x0, 0x1, 0x3, 0x4, 0x10, 0x12, 0xF, + -1); +END_INPUT_SETS() + +// Values constrained to uint16 range. Kept small so that multiply, mad, +// subtract, shift, and wave prefix products do not overflow 16 bits. +BEGIN_INPUT_SETS(HLSLMin16Uint_t) +INPUT_SET(InputSet::Default1, 3, 199, 3, 200, 5, 10, 22, 8, 9, 10); +INPUT_SET(InputSet::Default2, 2, 111, 3, 4, 5, 9, 21, 8, 9, 10); +INPUT_SET(InputSet::Default3, 4, 112, 4, 5, 3, 7, 21, 1, 11, 9); +INPUT_SET(InputSet::Zero, 0); +INPUT_SET(InputSet::BitShiftRhs, 1, 6, 3, 0, 9, 3, 11, 12, 12, 12); +INPUT_SET(InputSet::SelectCond, 0, 1); +INPUT_SET(InputSet::AllOnes, 1); +INPUT_SET(InputSet::WaveMultiPrefixBitwise, 0x0, 0x1, 0x3, 0x4, 0x10, 0x12, 0xF, + std::numeric_limits::max()); +END_INPUT_SETS() + #undef BEGIN_INPUT_SETS #undef INPUT_SET #undef END_INPUT_SETS diff --git a/tools/clang/unittests/HLSLExec/LongVectors.cpp b/tools/clang/unittests/HLSLExec/LongVectors.cpp index dbb8a8d672..12ae2c66ab 100644 --- a/tools/clang/unittests/HLSLExec/LongVectors.cpp +++ b/tools/clang/unittests/HLSLExec/LongVectors.cpp @@ -59,6 +59,9 @@ DATA_TYPE(uint16_t, "uint16_t", 2) DATA_TYPE(uint32_t, "uint32_t", 4) DATA_TYPE(uint64_t, "uint64_t", 8) DATA_TYPE(HLSLHalf_t, "half", 2) +DATA_TYPE(HLSLMin16Float_t, "min16float", 4) +DATA_TYPE(HLSLMin16Int_t, "min16int", 4) +DATA_TYPE(HLSLMin16Uint_t, "min16uint", 4) DATA_TYPE(float, "float", 4) DATA_TYPE(double, "double", 8) @@ -66,7 +69,13 @@ DATA_TYPE(double, "double", 8) template constexpr bool isFloatingPointType() { return std::is_same_v || std::is_same_v || - std::is_same_v; + std::is_same_v || std::is_same_v; +} + +template constexpr bool isMinPrecisionType() { + return std::is_same_v || + std::is_same_v || + std::is_same_v; } // @@ -218,6 +227,34 @@ bool doValuesMatch(HLSLHalf_t A, HLSLHalf_t B, double Tolerance, } } +// Min precision float comparison: convert to half and compare in fp16 space. +// This reuses the same tolerance values as HLSLHalf_t. Min precision is at +// least 16-bit, so fp16 tolerances are an upper bound for all cases. +bool doValuesMatch(HLSLMin16Float_t A, HLSLMin16Float_t B, double Tolerance, + ValidationType ValidationType) { + auto HalfA = DirectX::PackedVector::XMConvertFloatToHalf(A.Val); + auto HalfB = DirectX::PackedVector::XMConvertFloatToHalf(B.Val); + switch (ValidationType) { + case ValidationType::Epsilon: + return CompareHalfEpsilon(HalfA, HalfB, static_cast(Tolerance)); + case ValidationType::Ulp: + return CompareHalfULP(HalfA, HalfB, static_cast(Tolerance)); + default: + hlsl_test::LogErrorFmt( + L"Invalid ValidationType. Expecting Epsilon or ULP."); + return false; + } +} + +bool doValuesMatch(HLSLMin16Int_t A, HLSLMin16Int_t B, double, ValidationType) { + return A == B; +} + +bool doValuesMatch(HLSLMin16Uint_t A, HLSLMin16Uint_t B, double, + ValidationType) { + return A == B; +} + bool doValuesMatch(float A, float B, double Tolerance, ValidationType ValidationType) { switch (ValidationType) { @@ -696,12 +733,12 @@ template T Saturate(T A) { } template T ReverseBits(T A) { - T Result = 0; + T Result = static_cast(0); const size_t NumBits = sizeof(T) * 8; for (size_t I = 0; I < NumBits; I++) { - Result <<= 1; - Result |= (A & 1); - A >>= 1; + Result <<= static_cast(1); + Result |= (A & static_cast(1)); + A >>= static_cast(1); } return Result; } @@ -713,12 +750,13 @@ template uint32_t CountBits(T A) { // General purpose bit scan from the MSB. Based on the value of LookingForZero // returns the index of the first high/low bit found. template uint32_t ScanFromMSB(T A, bool LookingForZero) { - if (A == 0) + if (A == static_cast(0)) return std::numeric_limits::max(); constexpr uint32_t NumBits = sizeof(T) * 8; for (int32_t I = NumBits - 1; I >= 0; --I) { - bool BitSet = (A & (static_cast(1) << I)) != 0; + bool BitSet = + (A & (static_cast(1) << static_cast(I))) != static_cast(0); if (BitSet != LookingForZero) return static_cast(I); } @@ -741,11 +779,11 @@ FirstBitHigh(T A) { template uint32_t FirstBitLow(T A) { const uint32_t NumBits = sizeof(T) * 8; - if (A == 0) + if (A == static_cast(0)) return std::numeric_limits::max(); for (uint32_t I = 0; I < NumBits; ++I) { - if (A & (static_cast(1) << I)) + if (A & (static_cast(1) << static_cast(I))) return static_cast(I); } @@ -917,6 +955,24 @@ template <> struct TrigonometricValidation { ValidationConfig ValidationConfig = ValidationConfig::Ulp(2.0f); }; +// Min precision trig tolerances: same as half precision since min precision +// is at least 16-bit and our doValuesMatch compares in half-precision space. +template struct TrigonometricValidation { + ValidationConfig ValidationConfig = ValidationConfig::Epsilon(0.003f); +}; + +template <> struct TrigonometricValidation { + ValidationConfig ValidationConfig = ValidationConfig::Ulp(2.0f); +}; + +template <> struct TrigonometricValidation { + ValidationConfig ValidationConfig = ValidationConfig::Ulp(2.0f); +}; + +template <> struct TrigonometricValidation { + ValidationConfig ValidationConfig = ValidationConfig::Ulp(2.0f); +}; + #define TRIG_OP(OP, IMPL) \ template struct Op : TrigonometricValidation { \ T operator()(T A) { return IMPL; } \ @@ -1073,7 +1129,7 @@ template <> struct ExpectedBuilder { // template T UnaryMathAbs(T A) { - if constexpr (std::is_unsigned_v) + if constexpr (std::is_unsigned_v || std::is_same_v) return A; else return static_cast(std::abs(A)); @@ -1285,7 +1341,12 @@ static double computeAbsoluteEpsilon(double A, double ULPTolerance) { if constexpr (std::is_same_v) ULP = HLSLHalf_t::GetULP(A); - else + else if constexpr (std::is_same_v) { + // Min precision floats may be computed at float16 on the GPU, so use + // half-precision ULP for tolerance. Reuse HLSLHalf_t::GetULP which + // computes ULP by incrementing the float16 bit representation. + ULP = HLSLHalf_t::GetULP(HLSLHalf_t(static_cast(A))); + } else ULP = std::nextafter(static_cast(A), std::numeric_limits::infinity()) - static_cast(A); @@ -1496,7 +1557,9 @@ WAVE_OP(OpType::WaveMultiPrefixBitOr, waveMultiPrefixBitOr(A, WaveSize)); template T waveMultiPrefixBitOr(T A, UINT) { // All lanes in the group mask clear the second LSB. - return static_cast(A & ~static_cast(0x2)); + // Extra static_cast around ~ needed because bitwise NOT promotes min + // precision wrapper types to int32/uint32, making operator& ambiguous. + return static_cast(A & static_cast(~static_cast(0x2))); } template @@ -1911,10 +1974,7 @@ class TestClassCommon { return true; } - template void runWaveOpTest() { - WEX::TestExecution::SetVerifyOutput VerifySettings( - WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); - + UINT getWaveSize() { UINT WaveSize = 0; if (OverrideWaveLaneCount > 0) { @@ -1932,8 +1992,15 @@ class TestClassCommon { DXASSERT_NOMSG(WaveSize > 0); DXASSERT((WaveSize & (WaveSize - 1)) == 0, "must be a power of 2"); + return WaveSize; + } + + template void runWaveOpTest() { + WEX::TestExecution::SetVerifyOutput VerifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + dispatchWaveOpTest(D3DDevice, VerboseLogging, OverrideInputSize, - WaveSize); + getWaveSize()); } template void runTest() { @@ -2714,6 +2781,315 @@ class DxilConf_SM69_Vectorized_Core : public TestClassCommon { HLK_WAVEOP_TEST(WaveMultiPrefixSum, float); HLK_WAVEOP_TEST(WaveMultiPrefixProduct, float); HLK_WAVEOP_TEST(WaveMatch, float); + + // ---- HLSLMin16Float_t (mirrors applicable HLSLHalf_t ops) ---- + + // TernaryMath + HLK_TEST(Mad, HLSLMin16Float_t); + + // BinaryMath + HLK_TEST(Add, HLSLMin16Float_t); + HLK_TEST(Subtract, HLSLMin16Float_t); + HLK_TEST(Multiply, HLSLMin16Float_t); + HLK_TEST(Divide, HLSLMin16Float_t); + HLK_TEST(Modulus, HLSLMin16Float_t); + HLK_TEST(Min, HLSLMin16Float_t); + HLK_TEST(Max, HLSLMin16Float_t); + HLK_TEST(Ldexp, HLSLMin16Float_t); + + // Saturate + HLK_TEST(Saturate, HLSLMin16Float_t); + + // Unary + HLK_TEST(Initialize, HLSLMin16Float_t); + HLK_TEST(ArrayOperator_StaticAccess, HLSLMin16Float_t); + HLK_TEST(ArrayOperator_DynamicAccess, HLSLMin16Float_t); + HLK_TEST(ShuffleVector, HLSLMin16Float_t); + + // Cast + HLK_TEST(CastToBool, HLSLMin16Float_t); + HLK_TEST(CastToInt16, HLSLMin16Float_t); + HLK_TEST(CastToInt32, HLSLMin16Float_t); + HLK_TEST(CastToInt64, HLSLMin16Float_t); + HLK_TEST(CastToUint16_FromFP, HLSLMin16Float_t); + HLK_TEST(CastToUint32_FromFP, HLSLMin16Float_t); + HLK_TEST(CastToUint64_FromFP, HLSLMin16Float_t); + HLK_TEST(CastToFloat16, HLSLMin16Float_t); + HLK_TEST(CastToFloat32, HLSLMin16Float_t); + + // Trigonometric + HLK_TEST(Acos, HLSLMin16Float_t); + HLK_TEST(Asin, HLSLMin16Float_t); + HLK_TEST(Atan, HLSLMin16Float_t); + HLK_TEST(Cos, HLSLMin16Float_t); + HLK_TEST(Cosh, HLSLMin16Float_t); + HLK_TEST(Sin, HLSLMin16Float_t); + HLK_TEST(Sinh, HLSLMin16Float_t); + HLK_TEST(Tan, HLSLMin16Float_t); + HLK_TEST(Tanh, HLSLMin16Float_t); + + // UnaryMath + HLK_TEST(Abs, HLSLMin16Float_t); + HLK_TEST(Ceil, HLSLMin16Float_t); + HLK_TEST(Exp, HLSLMin16Float_t); + HLK_TEST(Floor, HLSLMin16Float_t); + HLK_TEST(Frac, HLSLMin16Float_t); + HLK_TEST(Log, HLSLMin16Float_t); + HLK_TEST(Rcp, HLSLMin16Float_t); + HLK_TEST(Round, HLSLMin16Float_t); + HLK_TEST(Rsqrt, HLSLMin16Float_t); + HLK_TEST(Sign, HLSLMin16Float_t); + HLK_TEST(Sqrt, HLSLMin16Float_t); + HLK_TEST(Trunc, HLSLMin16Float_t); + HLK_TEST(Exp2, HLSLMin16Float_t); + HLK_TEST(Log10, HLSLMin16Float_t); + HLK_TEST(Log2, HLSLMin16Float_t); + + // BinaryComparison + HLK_TEST(LessThan, HLSLMin16Float_t); + HLK_TEST(LessEqual, HLSLMin16Float_t); + HLK_TEST(GreaterThan, HLSLMin16Float_t); + HLK_TEST(GreaterEqual, HLSLMin16Float_t); + HLK_TEST(Equal, HLSLMin16Float_t); + HLK_TEST(NotEqual, HLSLMin16Float_t); + + // Select + HLK_TEST(Select, HLSLMin16Float_t); + + // Dot + HLK_TEST(Dot, HLSLMin16Float_t); + + // LoadAndStore + HLK_TEST(LoadAndStore_RDH_BAB_SRV, HLSLMin16Float_t); + HLK_TEST(LoadAndStore_RDH_BAB_UAV, HLSLMin16Float_t); + HLK_TEST(LoadAndStore_DT_BAB_SRV, HLSLMin16Float_t); + HLK_TEST(LoadAndStore_DT_BAB_UAV, HLSLMin16Float_t); + HLK_TEST(LoadAndStore_RD_BAB_SRV, HLSLMin16Float_t); + HLK_TEST(LoadAndStore_RD_BAB_UAV, HLSLMin16Float_t); + HLK_TEST(LoadAndStore_RDH_SB_SRV, HLSLMin16Float_t); + HLK_TEST(LoadAndStore_RDH_SB_UAV, HLSLMin16Float_t); + HLK_TEST(LoadAndStore_DT_SB_SRV, HLSLMin16Float_t); + HLK_TEST(LoadAndStore_DT_SB_UAV, HLSLMin16Float_t); + HLK_TEST(LoadAndStore_RD_SB_SRV, HLSLMin16Float_t); + HLK_TEST(LoadAndStore_RD_SB_UAV, HLSLMin16Float_t); + + // Derivative + HLK_TEST(DerivativeDdx, HLSLMin16Float_t); + HLK_TEST(DerivativeDdy, HLSLMin16Float_t); + HLK_TEST(DerivativeDdxFine, HLSLMin16Float_t); + HLK_TEST(DerivativeDdyFine, HLSLMin16Float_t); + + // Quad + HLK_TEST(QuadReadLaneAt, HLSLMin16Float_t); + HLK_TEST(QuadReadAcrossX, HLSLMin16Float_t); + HLK_TEST(QuadReadAcrossY, HLSLMin16Float_t); + HLK_TEST(QuadReadAcrossDiagonal, HLSLMin16Float_t); + + // Wave + HLK_WAVEOP_TEST(WaveActiveSum, HLSLMin16Float_t); + HLK_WAVEOP_TEST(WaveActiveMin, HLSLMin16Float_t); + HLK_WAVEOP_TEST(WaveActiveMax, HLSLMin16Float_t); + HLK_WAVEOP_TEST(WaveActiveProduct, HLSLMin16Float_t); + HLK_WAVEOP_TEST(WaveActiveAllEqual, HLSLMin16Float_t); + HLK_WAVEOP_TEST(WaveReadLaneAt, HLSLMin16Float_t); + HLK_WAVEOP_TEST(WaveReadLaneFirst, HLSLMin16Float_t); + HLK_WAVEOP_TEST(WavePrefixSum, HLSLMin16Float_t); + HLK_WAVEOP_TEST(WavePrefixProduct, HLSLMin16Float_t); + HLK_WAVEOP_TEST(WaveMultiPrefixSum, HLSLMin16Float_t); + HLK_WAVEOP_TEST(WaveMultiPrefixProduct, HLSLMin16Float_t); + HLK_WAVEOP_TEST(WaveMatch, HLSLMin16Float_t); + + // ---- HLSLMin16Int_t (mirrors applicable int16_t ops) ---- + + // TernaryMath + HLK_TEST(Mad, HLSLMin16Int_t); + + // BinaryMath + // Note: Divide and Modulus excluded — HLSL does not support signed integer + // division on minimum-precision types. + HLK_TEST(Add, HLSLMin16Int_t); + HLK_TEST(Subtract, HLSLMin16Int_t); + HLK_TEST(Multiply, HLSLMin16Int_t); + HLK_TEST(Min, HLSLMin16Int_t); + HLK_TEST(Max, HLSLMin16Int_t); + + // Bitwise + HLK_TEST(And, HLSLMin16Int_t); + HLK_TEST(Or, HLSLMin16Int_t); + HLK_TEST(Xor, HLSLMin16Int_t); + HLK_TEST(LeftShift, HLSLMin16Int_t); + HLK_TEST(RightShift, HLSLMin16Int_t); + // Note: ReverseBits, CountBits, FirstBitHigh, FirstBitLow excluded - + // DXC promotes min precision to i32 before these intrinsics, so they + // don't operate at min precision. + + // UnaryMath + HLK_TEST(Abs, HLSLMin16Int_t); + HLK_TEST(Sign, HLSLMin16Int_t); + + // Unary + HLK_TEST(Initialize, HLSLMin16Int_t); + HLK_TEST(ArrayOperator_StaticAccess, HLSLMin16Int_t); + HLK_TEST(ArrayOperator_DynamicAccess, HLSLMin16Int_t); + HLK_TEST(ShuffleVector, HLSLMin16Int_t); + + // Cast + HLK_TEST(CastToBool, HLSLMin16Int_t); + HLK_TEST(CastToInt32, HLSLMin16Int_t); + HLK_TEST(CastToInt64, HLSLMin16Int_t); + HLK_TEST(CastToUint16, HLSLMin16Int_t); + HLK_TEST(CastToUint32, HLSLMin16Int_t); + HLK_TEST(CastToUint64, HLSLMin16Int_t); + HLK_TEST(CastToFloat16, HLSLMin16Int_t); + HLK_TEST(CastToFloat32, HLSLMin16Int_t); + + // BinaryComparison + HLK_TEST(LessThan, HLSLMin16Int_t); + HLK_TEST(LessEqual, HLSLMin16Int_t); + HLK_TEST(GreaterThan, HLSLMin16Int_t); + HLK_TEST(GreaterEqual, HLSLMin16Int_t); + HLK_TEST(Equal, HLSLMin16Int_t); + HLK_TEST(NotEqual, HLSLMin16Int_t); + + // Select + HLK_TEST(Select, HLSLMin16Int_t); + + // Reduction + HLK_TEST(Any_Mixed, HLSLMin16Int_t); + HLK_TEST(Any_Zero, HLSLMin16Int_t); + HLK_TEST(Any_NoZero, HLSLMin16Int_t); + HLK_TEST(All_Mixed, HLSLMin16Int_t); + HLK_TEST(All_Zero, HLSLMin16Int_t); + HLK_TEST(All_NoZero, HLSLMin16Int_t); + + // LoadAndStore + HLK_TEST(LoadAndStore_RDH_BAB_SRV, HLSLMin16Int_t); + HLK_TEST(LoadAndStore_RDH_BAB_UAV, HLSLMin16Int_t); + HLK_TEST(LoadAndStore_DT_BAB_SRV, HLSLMin16Int_t); + HLK_TEST(LoadAndStore_DT_BAB_UAV, HLSLMin16Int_t); + HLK_TEST(LoadAndStore_RD_BAB_SRV, HLSLMin16Int_t); + HLK_TEST(LoadAndStore_RD_BAB_UAV, HLSLMin16Int_t); + HLK_TEST(LoadAndStore_RDH_SB_SRV, HLSLMin16Int_t); + HLK_TEST(LoadAndStore_RDH_SB_UAV, HLSLMin16Int_t); + HLK_TEST(LoadAndStore_DT_SB_SRV, HLSLMin16Int_t); + HLK_TEST(LoadAndStore_DT_SB_UAV, HLSLMin16Int_t); + HLK_TEST(LoadAndStore_RD_SB_SRV, HLSLMin16Int_t); + HLK_TEST(LoadAndStore_RD_SB_UAV, HLSLMin16Int_t); + + // Quad + HLK_TEST(QuadReadLaneAt, HLSLMin16Int_t); + HLK_TEST(QuadReadAcrossX, HLSLMin16Int_t); + HLK_TEST(QuadReadAcrossY, HLSLMin16Int_t); + HLK_TEST(QuadReadAcrossDiagonal, HLSLMin16Int_t); + + // Wave + HLK_WAVEOP_TEST(WaveActiveSum, HLSLMin16Int_t); + HLK_WAVEOP_TEST(WaveActiveMin, HLSLMin16Int_t); + HLK_WAVEOP_TEST(WaveActiveMax, HLSLMin16Int_t); + HLK_WAVEOP_TEST(WaveActiveProduct, HLSLMin16Int_t); + HLK_WAVEOP_TEST(WaveActiveAllEqual, HLSLMin16Int_t); + HLK_WAVEOP_TEST(WaveReadLaneAt, HLSLMin16Int_t); + HLK_WAVEOP_TEST(WaveReadLaneFirst, HLSLMin16Int_t); + HLK_WAVEOP_TEST(WavePrefixSum, HLSLMin16Int_t); + HLK_WAVEOP_TEST(WavePrefixProduct, HLSLMin16Int_t); + HLK_WAVEOP_TEST(WaveMultiPrefixSum, HLSLMin16Int_t); + HLK_WAVEOP_TEST(WaveMultiPrefixProduct, HLSLMin16Int_t); + HLK_WAVEOP_TEST(WaveMultiPrefixBitAnd, HLSLMin16Int_t); + HLK_WAVEOP_TEST(WaveMultiPrefixBitOr, HLSLMin16Int_t); + HLK_WAVEOP_TEST(WaveMultiPrefixBitXor, HLSLMin16Int_t); + HLK_WAVEOP_TEST(WaveMatch, HLSLMin16Int_t); + + // ---- HLSLMin16Uint_t (mirrors applicable uint16_t ops) ---- + + // TernaryMath + HLK_TEST(Mad, HLSLMin16Uint_t); + + // BinaryMath + HLK_TEST(Add, HLSLMin16Uint_t); + HLK_TEST(Subtract, HLSLMin16Uint_t); + HLK_TEST(Multiply, HLSLMin16Uint_t); + HLK_TEST(Divide, HLSLMin16Uint_t); + HLK_TEST(Modulus, HLSLMin16Uint_t); + HLK_TEST(Min, HLSLMin16Uint_t); + HLK_TEST(Max, HLSLMin16Uint_t); + + // Bitwise + HLK_TEST(And, HLSLMin16Uint_t); + HLK_TEST(Or, HLSLMin16Uint_t); + HLK_TEST(Xor, HLSLMin16Uint_t); + HLK_TEST(LeftShift, HLSLMin16Uint_t); + HLK_TEST(RightShift, HLSLMin16Uint_t); + // Note: ReverseBits, CountBits, FirstBitHigh, FirstBitLow excluded - + // DXC promotes min precision to i32 before these intrinsics, so they + // don't operate at min precision. + + // UnaryMath + HLK_TEST(Abs, HLSLMin16Uint_t); + HLK_TEST(Sign, HLSLMin16Uint_t); + + // Unary + HLK_TEST(Initialize, HLSLMin16Uint_t); + HLK_TEST(ArrayOperator_StaticAccess, HLSLMin16Uint_t); + HLK_TEST(ArrayOperator_DynamicAccess, HLSLMin16Uint_t); + HLK_TEST(ShuffleVector, HLSLMin16Uint_t); + + // Cast + HLK_TEST(CastToBool, HLSLMin16Uint_t); + HLK_TEST(CastToInt16, HLSLMin16Uint_t); + HLK_TEST(CastToInt32, HLSLMin16Uint_t); + HLK_TEST(CastToInt64, HLSLMin16Uint_t); + HLK_TEST(CastToUint32, HLSLMin16Uint_t); + HLK_TEST(CastToUint64, HLSLMin16Uint_t); + HLK_TEST(CastToFloat16, HLSLMin16Uint_t); + HLK_TEST(CastToFloat32, HLSLMin16Uint_t); + + // BinaryComparison + HLK_TEST(LessThan, HLSLMin16Uint_t); + HLK_TEST(LessEqual, HLSLMin16Uint_t); + HLK_TEST(GreaterThan, HLSLMin16Uint_t); + HLK_TEST(GreaterEqual, HLSLMin16Uint_t); + HLK_TEST(Equal, HLSLMin16Uint_t); + HLK_TEST(NotEqual, HLSLMin16Uint_t); + + // Select + HLK_TEST(Select, HLSLMin16Uint_t); + + // LoadAndStore + HLK_TEST(LoadAndStore_RDH_BAB_SRV, HLSLMin16Uint_t); + HLK_TEST(LoadAndStore_RDH_BAB_UAV, HLSLMin16Uint_t); + HLK_TEST(LoadAndStore_DT_BAB_SRV, HLSLMin16Uint_t); + HLK_TEST(LoadAndStore_DT_BAB_UAV, HLSLMin16Uint_t); + HLK_TEST(LoadAndStore_RD_BAB_SRV, HLSLMin16Uint_t); + HLK_TEST(LoadAndStore_RD_BAB_UAV, HLSLMin16Uint_t); + HLK_TEST(LoadAndStore_RDH_SB_SRV, HLSLMin16Uint_t); + HLK_TEST(LoadAndStore_RDH_SB_UAV, HLSLMin16Uint_t); + HLK_TEST(LoadAndStore_DT_SB_SRV, HLSLMin16Uint_t); + HLK_TEST(LoadAndStore_DT_SB_UAV, HLSLMin16Uint_t); + HLK_TEST(LoadAndStore_RD_SB_SRV, HLSLMin16Uint_t); + HLK_TEST(LoadAndStore_RD_SB_UAV, HLSLMin16Uint_t); + + // Quad + HLK_TEST(QuadReadLaneAt, HLSLMin16Uint_t); + HLK_TEST(QuadReadAcrossX, HLSLMin16Uint_t); + HLK_TEST(QuadReadAcrossY, HLSLMin16Uint_t); + HLK_TEST(QuadReadAcrossDiagonal, HLSLMin16Uint_t); + + // Wave + HLK_WAVEOP_TEST(WaveActiveSum, HLSLMin16Uint_t); + HLK_WAVEOP_TEST(WaveActiveMin, HLSLMin16Uint_t); + HLK_WAVEOP_TEST(WaveActiveMax, HLSLMin16Uint_t); + HLK_WAVEOP_TEST(WaveActiveProduct, HLSLMin16Uint_t); + HLK_WAVEOP_TEST(WaveActiveAllEqual, HLSLMin16Uint_t); + HLK_WAVEOP_TEST(WaveReadLaneAt, HLSLMin16Uint_t); + HLK_WAVEOP_TEST(WaveReadLaneFirst, HLSLMin16Uint_t); + HLK_WAVEOP_TEST(WavePrefixSum, HLSLMin16Uint_t); + HLK_WAVEOP_TEST(WavePrefixProduct, HLSLMin16Uint_t); + HLK_WAVEOP_TEST(WaveMultiPrefixSum, HLSLMin16Uint_t); + HLK_WAVEOP_TEST(WaveMultiPrefixProduct, HLSLMin16Uint_t); + HLK_WAVEOP_TEST(WaveMultiPrefixBitAnd, HLSLMin16Uint_t); + HLK_WAVEOP_TEST(WaveMultiPrefixBitOr, HLSLMin16Uint_t); + HLK_WAVEOP_TEST(WaveMultiPrefixBitXor, HLSLMin16Uint_t); + HLK_WAVEOP_TEST(WaveMatch, HLSLMin16Uint_t); }; #define HLK_TEST_DOUBLE(Op, DataType) \ diff --git a/tools/clang/unittests/HLSLExec/ShaderOpArith.xml b/tools/clang/unittests/HLSLExec/ShaderOpArith.xml index 3e8dd1ee6e..319d468a43 100644 --- a/tools/clang/unittests/HLSLExec/ShaderOpArith.xml +++ b/tools/clang/unittests/HLSLExec/ShaderOpArith.xml @@ -4218,6 +4218,10 @@ void MSMain(uint GID : SV_GroupIndex, float MakeDifferent(float A) { return A + 1.0f; } double MakeDifferent(double A) { return A + 1.0; } + min16float MakeDifferent(min16float A) { return A + (min16float)1.0f; } + min16int MakeDifferent(min16int A) { return A ^ 1; } + min16uint MakeDifferent(min16uint A) { return A ^ 1; } + #if __HLSL_ENABLE_16_BIT uint16_t MakeDifferent(uint16_t A) { return A ^ 1; } int16_t MakeDifferent(int16_t A) { return A ^ 1; }