diff --git a/include/nbl/builtin/hlsl/algorithm.hlsl b/include/nbl/builtin/hlsl/algorithm.hlsl index 7eca7d51df..66442a11a1 100644 --- a/include/nbl/builtin/hlsl/algorithm.hlsl +++ b/include/nbl/builtin/hlsl/algorithm.hlsl @@ -19,7 +19,7 @@ namespace impl // TODO: use structs template - NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(T) lhs, NBL_REF_ARG(T) rhs) + NBL_CONSTEXPR_FUNC void swap(NBL_REF_ARG(T) lhs, NBL_REF_ARG(T) rhs) { T tmp = lhs; lhs = rhs; @@ -27,7 +27,7 @@ namespace impl } template<> - NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(uint16_t) lhs, NBL_REF_ARG(uint16_t) rhs) + NBL_CONSTEXPR_FUNC void swap(NBL_REF_ARG(uint16_t) lhs, NBL_REF_ARG(uint16_t) rhs) { lhs ^= rhs; rhs ^= lhs; @@ -35,7 +35,7 @@ namespace impl } template<> - NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(uint32_t) lhs, NBL_REF_ARG(uint32_t) rhs) + NBL_CONSTEXPR_FUNC void swap(NBL_REF_ARG(uint32_t) lhs, NBL_REF_ARG(uint32_t) rhs) { lhs ^= rhs; rhs ^= lhs; @@ -43,7 +43,7 @@ namespace impl } template<> - NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(uint64_t) lhs, NBL_REF_ARG(uint64_t) rhs) + NBL_CONSTEXPR_FUNC void swap(NBL_REF_ARG(uint64_t) lhs, NBL_REF_ARG(uint64_t) rhs) { lhs ^= rhs; rhs ^= lhs; @@ -51,7 +51,7 @@ namespace impl } template<> - NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(int16_t) lhs, NBL_REF_ARG(int16_t) rhs) + NBL_CONSTEXPR_FUNC void swap(NBL_REF_ARG(int16_t) lhs, NBL_REF_ARG(int16_t) rhs) { lhs ^= rhs; rhs ^= lhs; @@ -59,7 +59,7 @@ namespace impl } template<> - NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(int32_t) lhs, NBL_REF_ARG(int32_t) rhs) + NBL_CONSTEXPR_FUNC void swap(NBL_REF_ARG(int32_t) lhs, NBL_REF_ARG(int32_t) rhs) { lhs ^= rhs; rhs ^= lhs; @@ -67,7 +67,7 @@ namespace impl } template<> - NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(int64_t) lhs, NBL_REF_ARG(int64_t) rhs) + NBL_CONSTEXPR_FUNC void swap(NBL_REF_ARG(int64_t) lhs, NBL_REF_ARG(int64_t) rhs) { lhs ^= rhs; rhs ^= lhs; @@ -75,7 +75,7 @@ namespace impl } #else template - NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(T) lhs, NBL_REF_ARG(T) rhs) + NBL_CONSTEXPR_FUNC void swap(NBL_REF_ARG(T) lhs, NBL_REF_ARG(T) rhs) { std::swap(lhs, rhs); } @@ -83,7 +83,7 @@ namespace impl } template -NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(T) lhs, NBL_REF_ARG(T) rhs) +NBL_CONSTEXPR_FUNC void swap(NBL_REF_ARG(T) lhs, NBL_REF_ARG(T) rhs) { impl::swap(lhs, rhs); } diff --git a/include/nbl/builtin/hlsl/bxdf/ndf/ggx.hlsl b/include/nbl/builtin/hlsl/bxdf/ndf/ggx.hlsl index b27c892abe..40f64d9cf8 100644 --- a/include/nbl/builtin/hlsl/bxdf/ndf/ggx.hlsl +++ b/include/nbl/builtin/hlsl/bxdf/ndf/ggx.hlsl @@ -406,7 +406,7 @@ template struct is_ggx : impl::is_ggx {}; template -NBL_CONSTEXPR bool is_ggx_v = is_ggx::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR bool is_ggx_v = is_ggx::value; } } diff --git a/include/nbl/builtin/hlsl/complex.hlsl b/include/nbl/builtin/hlsl/complex.hlsl index f61b707865..1e5621be5b 100644 --- a/include/nbl/builtin/hlsl/complex.hlsl +++ b/include/nbl/builtin/hlsl/complex.hlsl @@ -436,22 +436,6 @@ complex_t rotateRight(NBL_CONST_REF_ARG(complex_t) value) return retVal; } -template -struct ternary_operator< complex_t > -{ - using type_t = complex_t; - - complex_t operator()(bool condition, NBL_CONST_REF_ARG(complex_t) lhs, NBL_CONST_REF_ARG(complex_t) rhs) - { - const vector lhsVector = vector(lhs.real(), lhs.imag()); - const vector rhsVector = vector(rhs.real(), rhs.imag()); - const vector resultVector = condition ? lhsVector : rhsVector; - const complex_t result = { resultVector.x, resultVector.y }; - return result; - } -}; - - } } diff --git a/include/nbl/builtin/hlsl/concepts/core.hlsl b/include/nbl/builtin/hlsl/concepts/core.hlsl index dcbafae8a5..e3ff3f611f 100644 --- a/include/nbl/builtin/hlsl/concepts/core.hlsl +++ b/include/nbl/builtin/hlsl/concepts/core.hlsl @@ -72,13 +72,23 @@ namespace impl template struct is_emulating_floating_point_scalar { - NBL_CONSTEXPR_STATIC_INLINE bool value = FloatingPointScalar; + NBL_CONSTEXPR_STATIC_INLINE bool value = false; +}; + +template +struct is_emulating_integral_scalar +{ + NBL_CONSTEXPR_STATIC_INLINE bool value = false; }; } //! Floating point types are native floating point types or types that imitate native floating point types (for example emulated_float64_t) template -NBL_BOOL_CONCEPT FloatingPointLikeScalar = impl::is_emulating_floating_point_scalar::value; +NBL_BOOL_CONCEPT FloatingPointLikeScalar = FloatingPointScalar || impl::is_emulating_floating_point_scalar::value; + +//! Integral-like types are native integral types or types that imitate native integral types (for example emulated_uint64_t) +template +NBL_BOOL_CONCEPT IntegralLikeScalar = IntegralScalar || impl::is_emulating_integral_scalar::value; } } diff --git a/include/nbl/builtin/hlsl/concepts/vector.hlsl b/include/nbl/builtin/hlsl/concepts/vector.hlsl index 468838730a..f132531cb9 100644 --- a/include/nbl/builtin/hlsl/concepts/vector.hlsl +++ b/include/nbl/builtin/hlsl/concepts/vector.hlsl @@ -40,14 +40,12 @@ NBL_BOOL_CONCEPT FloatingPointLikeVectorial = concepts::Vectorial && concepts template NBL_BOOL_CONCEPT IntVectorial = concepts::Vectorial && (is_integral_v::scalar_type>); template +NBL_BOOL_CONCEPT IntegralLikeVectorial = concepts::Vectorial && concepts::IntegralLikeScalar::scalar_type>; +template NBL_BOOL_CONCEPT SignedIntVectorial = concepts::Vectorial && concepts::SignedIntegralScalar::scalar_type>; } -template -NBL_PARTIAL_REQ_TOP(concepts::Vectorial) -struct extent) > : integral_constant::Dimension> {}; - } } #endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/cpp_compat.hlsl b/include/nbl/builtin/hlsl/cpp_compat.hlsl index 175a3e76c1..03d47864fb 100644 --- a/include/nbl/builtin/hlsl/cpp_compat.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat.hlsl @@ -5,5 +5,9 @@ // it includes vector and matrix #include #include +#include + +// Had to push some stuff here to avoid circular dependencies +#include #endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/cpp_compat/basic.h b/include/nbl/builtin/hlsl/cpp_compat/basic.h index 87baa1f0d6..3ca499c567 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/basic.h +++ b/include/nbl/builtin/hlsl/cpp_compat/basic.h @@ -3,39 +3,12 @@ #include -namespace nbl -{ -namespace hlsl -{ -namespace impl -{ -template -struct static_cast_helper -{ - static inline To cast(From u) - { -#ifndef __HLSL_VERSION - return static_cast(u); -#else - return To(u); -#endif - } -}; -} - -template -inline To _static_cast(From v) -{ - return impl::static_cast_helper::cast(v); -} - -} -} #ifndef __HLSL_VERSION #include #define ARROW -> +#define NBL_DEREF_THIS (*this) #define NBL_CONSTEXPR constexpr // TODO: rename to NBL_CONSTEXPR_VAR #define NBL_CONSTEXPR_FUNC constexpr #define NBL_CONSTEXPR_STATIC constexpr static @@ -43,7 +16,12 @@ inline To _static_cast(From v) #define NBL_CONSTEXPR_INLINE_FUNC constexpr inline #define NBL_CONSTEXPR_FORCED_INLINE_FUNC NBL_FORCE_INLINE constexpr #define NBL_CONST_MEMBER_FUNC const +#define NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR constexpr inline +#define NBL_CONSTEXPR_FUNC_SCOPE_VAR constexpr +#define NBL_CONSTEXPR_OOL_MEMBER constexpr +#define NBL_CONSTEXPR_INLINE_OOL_MEMBER constexpr inline #define NBL_IF_CONSTEXPR(...) if constexpr (__VA_ARGS__) +#define NBL_ASSERT(...) assert(__VA_ARGS__) namespace nbl::hlsl { @@ -67,6 +45,7 @@ namespace nbl::hlsl #else #define ARROW .arrow(). +#define NBL_DEREF_THIS this #define NBL_CONSTEXPR const static // TODO: rename to NBL_CONSTEXPR_VAR #define NBL_CONSTEXPR_FUNC #define NBL_CONSTEXPR_STATIC const static @@ -74,7 +53,12 @@ namespace nbl::hlsl #define NBL_CONSTEXPR_INLINE_FUNC inline #define NBL_CONSTEXPR_FORCED_INLINE_FUNC inline #define NBL_CONST_MEMBER_FUNC +#define NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR const static +#define NBL_CONSTEXPR_FUNC_SCOPE_VAR const +#define NBL_CONSTEXPR_OOL_MEMBER const +#define NBL_CONSTEXPR_INLINE_OOL_MEMBER const #define NBL_IF_CONSTEXPR(...) if (__VA_ARGS__) +#define NBL_ASSERT(...) namespace nbl { @@ -102,4 +86,33 @@ struct add_pointer #endif +namespace nbl +{ +namespace hlsl +{ +namespace impl +{ +template +struct static_cast_helper +{ + NBL_CONSTEXPR_STATIC_INLINE To cast(From u) + { +#ifndef __HLSL_VERSION + return static_cast(u); +#else + return To(u); +#endif + } +}; +} + +template +NBL_CONSTEXPR_INLINE_FUNC To _static_cast(From v) +{ + return impl::static_cast_helper::cast(v); +} + +} +} + #endif diff --git a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl index cd89ce45d1..7850fd7cf3 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl @@ -27,7 +27,7 @@ template::type; - NBL_CONSTEXPR UnsignedInteger Mask = (UnsignedInteger(0) - 1) >> 1; + NBL_CONSTEXPR_FUNC_SCOPE_VAR UnsignedInteger Mask = (UnsignedInteger(0) - 1) >> 1; UnsignedInteger absVal = val & Mask; return absVal > (ieee754::traits::specialValueExp << ieee754::traits::mantissaBitCnt); } @@ -90,6 +90,8 @@ template struct all_helper; template struct any_helper; +template +struct select_helper; template struct bitReverseAs_helper; template @@ -119,6 +121,12 @@ struct nMax_helper; template struct nClamp_helper; template +struct addCarry_helper; +template +struct subBorrow_helper; +template +struct undef_helper; +template struct fma_helper; #ifdef __HLSL_VERSION // HLSL only specializations @@ -133,8 +141,8 @@ struct fma_helper; // the template<> needs to be written ourselves // return type is __VA_ARGS__ to protect against `,` in templated return types #define AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(HELPER_NAME, SPIRV_FUNCTION_NAME, ARG_TYPE_LIST, ARG_TYPE_SET, ...)\ -NBL_PARTIAL_REQ_TOP(is_same_v(BOOST_PP_SEQ_FOR_EACH_I(DECLVAL, _, ARG_TYPE_SET))), __VA_ARGS__ >) \ -struct HELPER_NAME(BOOST_PP_SEQ_FOR_EACH_I(DECLVAL, _, ARG_TYPE_SET))), __VA_ARGS__ >) >\ +NBL_PARTIAL_REQ_TOP(is_same_v(BOOST_PP_SEQ_FOR_EACH_I(DECLVAL, _, ARG_TYPE_SET))), __VA_ARGS__ >) \ +struct HELPER_NAME(BOOST_PP_SEQ_FOR_EACH_I(DECLVAL, _, ARG_TYPE_SET))), __VA_ARGS__ >) >\ {\ using return_t = __VA_ARGS__;\ static inline return_t __call( BOOST_PP_SEQ_FOR_EACH_I(DECL_ARG, _, ARG_TYPE_SET) )\ @@ -156,8 +164,9 @@ template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(length_helper, length, template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(normalize_helper, normalize, (T), (T), T) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(rsqrt_helper, inverseSqrt, (T), (T), T) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(fract_helper, fract, (T), (T), T) -template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(all_helper, any, (T), (T), bool) +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(all_helper, all, (T), (T), bool) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(any_helper, any, (T), (T), bool) +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(select_helper, select, (B)(T), (B)(T)(T), T) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(sign_helper, fSign, (T), (T), T) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(sign_helper, sSign, (T), (T), T) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(radians_helper, radians, (T), (T), T) @@ -179,6 +188,10 @@ template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(refract_hel template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(nMax_helper, nMax, (T), (T)(T), T) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(nMin_helper, nMin, (T), (T)(T), T) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(nClamp_helper, nClamp, (T), (T)(T), T) +// Can use trivial case and not worry about restricting `T` with a concept since `spirv::AddCarryOutput / SubBorrowOutput` already take care of that +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(addCarry_helper, addCarry, (T), (T)(T), spirv::AddCarryOutput) +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(subBorrow_helper, subBorrow, (T), (T)(T), spirv::SubBorrowOutput) +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(undef_helper, undef, (T), , T) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(fma_helper, fma, (T), (T)(T)(T), T) #define BITCOUNT_HELPER_RETRUN_TYPE conditional_t, vector::Dimension>, int32_t> @@ -629,6 +642,72 @@ struct nClamp_helper } }; +// Once again no need to restrict the two below with concepts for same reason as HLSL version +template +struct addCarry_helper +{ + using return_t = spirv::AddCarryOutput; + constexpr static inline return_t __call(const T operand1, const T operand2) + { + return_t retVal; + retVal.result = operand1 + operand2; + retVal.carry = T(retVal.result < operand1); + return retVal; + } +}; + +template +struct subBorrow_helper +{ + using return_t = spirv::SubBorrowOutput; + constexpr static inline return_t __call(const T operand1, const T operand2) + { + return_t retVal; + retVal.result = static_cast(operand1 - operand2); + retVal.borrow = T(operand1 < operand2); + return retVal; + } +}; + +template +NBL_PARTIAL_REQ_TOP(concepts::BooleanScalar) +struct select_helper) > +{ + NBL_CONSTEXPR_STATIC T __call(NBL_CONST_REF_ARG(B) condition, NBL_CONST_REF_ARG(T) object1, NBL_CONST_REF_ARG(T) object2) + { + return condition ? object1 : object2; + } +}; + +template +NBL_PARTIAL_REQ_TOP(concepts::Boolean&& concepts::Vector&& concepts::Vector && (extent_v == extent_v)) +struct select_helper&& concepts::Vector&& concepts::Vector && (extent_v == extent_v)) > +{ + NBL_CONSTEXPR_STATIC T __call(NBL_CONST_REF_ARG(B) condition, NBL_CONST_REF_ARG(T) object1, NBL_CONST_REF_ARG(T) object2) + { + using traits = hlsl::vector_traits; + array_get conditionGetter; + array_get objectGetter; + array_set setter; + + T selected; + for (uint32_t i = 0; i < traits::Dimension; ++i) + setter(selected, i, conditionGetter(condition, i) ? objectGetter(object1, i) : objectGetter(object2, i)); + + return selected; + } +}; + +template +struct undef_helper +{ + NBL_CONSTEXPR_STATIC T __call() + { + T t; + return t; + } +}; + template requires concepts::FloatingPointScalar struct fma_helper diff --git a/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl b/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl index 7198bae563..27518222b3 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl @@ -23,6 +23,12 @@ namespace nbl namespace hlsl { +template +NBL_CONSTEXPR_FUNC T undef() +{ + return cpp_compat_intrinsics_impl::undef_helper::__call(); +} + template inline typename cpp_compat_intrinsics_impl::bitCount_helper::return_t bitCount(NBL_CONST_REF_ARG(T) val) { @@ -150,6 +156,12 @@ inline bool any(Vector vec) return cpp_compat_intrinsics_impl::any_helper::__call(vec); } +template +NBL_CONSTEXPR_FUNC ResultType select(Condition condition, ResultType object1, ResultType object2) +{ + return cpp_compat_intrinsics_impl::select_helper::__call(condition, object1, object2); +} + /** * @brief Returns x - floor(x). * @@ -217,6 +229,19 @@ inline T refract(NBL_CONST_REF_ARG(T) I, NBL_CONST_REF_ARG(T) N, NBL_CONST_REF_A return cpp_compat_intrinsics_impl::refract_helper::__call(I, N, eta); } +template +NBL_CONSTEXPR_FUNC spirv::AddCarryOutput addCarry(NBL_CONST_REF_ARG(T) operand1, NBL_CONST_REF_ARG(T) operand2) +{ + return cpp_compat_intrinsics_impl::addCarry_helper::__call(operand1, operand2); +} + +template +NBL_CONSTEXPR_FUNC spirv::SubBorrowOutput subBorrow(NBL_CONST_REF_ARG(T) operand1, NBL_CONST_REF_ARG(T) operand2) +{ + return cpp_compat_intrinsics_impl::subBorrow_helper::__call(operand1, operand2); +} + + #ifdef __HLSL_VERSION #define NAMESPACE spirv #else diff --git a/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl b/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl index 51ca73f6d3..6a8476e644 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl @@ -12,68 +12,39 @@ namespace impl { // partial specialize this for `T=matrix|vector` and `U=matrix|vector|scalar_t` -template +template struct Promote { - T operator()(U v) + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(U) v) { return T(v); } }; -#ifdef __HLSL_VERSION - -template -struct Promote, U> -{ - enable_if_t::value && is_scalar::value, vector > operator()(U v) - { - vector promoted = {Scalar(v)}; - return promoted; - } -}; - -template -struct Promote, U> +template NBL_PARTIAL_REQ_TOP(concepts::Vectorial && (concepts::IntegralLikeScalar || concepts::FloatingPointLikeScalar) && is_same_v::scalar_type, From>) +struct Promote && is_scalar_v && is_same_v::scalar_type, From>) > { - enable_if_t::value && is_scalar::value, vector > operator()(U v) + NBL_CONSTEXPR_FUNC To operator()(const From v) { - vector promoted = {Scalar(v), Scalar(v)}; - return promoted; + array_set setter; + To output; + [[unroll]] + for (int i = 0; i < vector_traits::Dimension; ++i) + setter(output, i, v); + return output; } }; -template -struct Promote, U> -{ - enable_if_t::value && is_scalar::value, vector > operator()(U v) - { - vector promoted = {Scalar(v), Scalar(v), Scalar(v)}; - return promoted; - } -}; - -template -struct Promote, U> -{ - enable_if_t::value && is_scalar::value, vector > operator()(U v) - { - vector promoted = {Scalar(v), Scalar(v), Scalar(v), Scalar(v)}; - return promoted; - } -}; - -#endif - } template -T promote(const U v) // TODO: use NBL_CONST_REF_ARG(U) instead of U v (circular ref) +NBL_CONSTEXPR_FUNC T promote(NBL_CONST_REF_ARG(U) v) { impl::Promote _promote; return _promote(v); } + } } diff --git a/include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl b/include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl new file mode 100644 index 0000000000..ffe3d12641 --- /dev/null +++ b/include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl @@ -0,0 +1,58 @@ +#ifndef _NBL_BUILTIN_HLSL_CPP_COMPAT_TRUNCATE_INCLUDED_ +#define _NBL_BUILTIN_HLSL_CPP_COMPAT_TRUNCATE_INCLUDED_ + +#include "nbl/builtin/hlsl/type_traits.hlsl" +#include "nbl/builtin/hlsl/concepts/core.hlsl" + +namespace nbl +{ +namespace hlsl +{ + +namespace concepts +{ + template + NBL_BOOL_CONCEPT can_truncate_vector = concepts::Vectorial && concepts::Vectorial && concepts::same_as::scalar_type, typename vector_traits::scalar_type > && vector_traits::Dimension <= vector_traits::Dimension; +} + +namespace impl +{ + +template +struct Truncate +{ + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(U) v) + { + return T(v); + } +}; + +template NBL_PARTIAL_REQ_TOP(concepts::can_truncate_vector) +struct Truncate) > +{ + NBL_CONSTEXPR_FUNC To operator()(const From v) + { + array_get::scalar_type> getter; + array_set::scalar_type> setter; + To output; + [[unroll]] + for (int i = 0; i < vector_traits::Dimension; ++i) + setter(output, i, getter(v, i)); + return output; + } + +}; + +} //namespace impl + +template +NBL_CONSTEXPR_FUNC T truncate(NBL_CONST_REF_ARG(U) v) +{ + impl::Truncate _truncate; + return _truncate(v); +} + +} +} + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/emulated/float64_t.hlsl b/include/nbl/builtin/hlsl/emulated/float64_t.hlsl index 9872675e3a..da32fab7b0 100644 --- a/include/nbl/builtin/hlsl/emulated/float64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/float64_t.hlsl @@ -471,25 +471,25 @@ inline int extractExponent(__VA_ARGS__ x)\ }\ \ template<>\ -NBL_CONSTEXPR_INLINE_FUNC __VA_ARGS__ replaceBiasedExponent(__VA_ARGS__ x, typename unsigned_integer_of_size::type biasedExp)\ +NBL_CONSTEXPR_FUNC __VA_ARGS__ replaceBiasedExponent(__VA_ARGS__ x, typename unsigned_integer_of_size::type biasedExp)\ {\ return __VA_ARGS__(replaceBiasedExponent(x.data, biasedExp));\ }\ \ template <>\ -NBL_CONSTEXPR_INLINE_FUNC __VA_ARGS__ fastMulExp2(__VA_ARGS__ x, int n)\ +NBL_CONSTEXPR_FUNC __VA_ARGS__ fastMulExp2(__VA_ARGS__ x, int n)\ {\ return __VA_ARGS__(replaceBiasedExponent(x.data, extractBiasedExponent(x) + uint32_t(n)));\ }\ \ template <>\ -NBL_CONSTEXPR_INLINE_FUNC unsigned_integer_of_size::type extractMantissa(__VA_ARGS__ x)\ +NBL_CONSTEXPR_FUNC unsigned_integer_of_size::type extractMantissa(__VA_ARGS__ x)\ {\ return extractMantissa(x.data);\ }\ \ template <>\ -NBL_CONSTEXPR_INLINE_FUNC uint64_t extractNormalizeMantissa(__VA_ARGS__ x)\ +NBL_CONSTEXPR_FUNC uint64_t extractNormalizeMantissa(__VA_ARGS__ x)\ {\ return extractNormalizeMantissa(x.data);\ }\ @@ -636,10 +636,10 @@ namespace ieee754 { namespace impl { -template<> NBL_CONSTEXPR_INLINE_FUNC uint64_t bitCastToUintType(emulated_float64_t x) { return x.data; } -template<> NBL_CONSTEXPR_INLINE_FUNC uint64_t bitCastToUintType(emulated_float64_t x) { return x.data; } -template<> NBL_CONSTEXPR_INLINE_FUNC uint64_t bitCastToUintType(emulated_float64_t x) { return x.data; } -template<> NBL_CONSTEXPR_INLINE_FUNC uint64_t bitCastToUintType(emulated_float64_t x) { return x.data; } +template<> NBL_CONSTEXPR_FUNC uint64_t bitCastToUintType(emulated_float64_t x) { return x.data; } +template<> NBL_CONSTEXPR_FUNC uint64_t bitCastToUintType(emulated_float64_t x) { return x.data; } +template<> NBL_CONSTEXPR_FUNC uint64_t bitCastToUintType(emulated_float64_t x) { return x.data; } +template<> NBL_CONSTEXPR_FUNC uint64_t bitCastToUintType(emulated_float64_t x) { return x.data; } } IMPLEMENT_IEEE754_FUNC_SPEC_FOR_EMULATED_F64_TYPE(emulated_float64_t); diff --git a/include/nbl/builtin/hlsl/emulated/float64_t_impl.hlsl b/include/nbl/builtin/hlsl/emulated/float64_t_impl.hlsl index 44b881345d..df785e3e8f 100644 --- a/include/nbl/builtin/hlsl/emulated/float64_t_impl.hlsl +++ b/include/nbl/builtin/hlsl/emulated/float64_t_impl.hlsl @@ -41,7 +41,7 @@ namespace hlsl { namespace emulated_float64_t_impl { -NBL_CONSTEXPR_INLINE_FUNC uint64_t2 shiftMantissaLeftBy53(uint64_t mantissa64) +NBL_CONSTEXPR_FUNC uint64_t2 shiftMantissaLeftBy53(uint64_t mantissa64) { uint64_t2 output; output.x = mantissa64 >> (64 - ieee754::traits::mantissaBitCnt); @@ -74,7 +74,7 @@ inline uint64_t castFloat32ToStorageType(float32_t val) } }; -NBL_CONSTEXPR_INLINE_FUNC bool isZero(uint64_t val) +NBL_CONSTEXPR_FUNC bool isZero(uint64_t val) { return (val << 1) == 0ull; } @@ -137,18 +137,18 @@ inline uint64_t reinterpretAsFloat64BitPattern(int64_t val) return sign | reinterpretAsFloat64BitPattern(absVal); }; -NBL_CONSTEXPR_INLINE_FUNC uint64_t flushDenormToZero(uint64_t value) +NBL_CONSTEXPR_FUNC uint64_t flushDenormToZero(uint64_t value) { const uint64_t biasBits = value & ieee754::traits::exponentMask; return biasBits ? value : (value & ieee754::traits::signMask); } -NBL_CONSTEXPR_INLINE_FUNC uint64_t assembleFloat64(uint64_t signShifted, uint64_t expShifted, uint64_t mantissa) +NBL_CONSTEXPR_FUNC uint64_t assembleFloat64(uint64_t signShifted, uint64_t expShifted, uint64_t mantissa) { return signShifted | expShifted | mantissa; } -NBL_CONSTEXPR_INLINE_FUNC bool areBothInfinity(uint64_t lhs, uint64_t rhs) +NBL_CONSTEXPR_FUNC bool areBothInfinity(uint64_t lhs, uint64_t rhs) { lhs &= ~ieee754::traits::signMask; rhs &= ~ieee754::traits::signMask; @@ -156,18 +156,18 @@ NBL_CONSTEXPR_INLINE_FUNC bool areBothInfinity(uint64_t lhs, uint64_t rhs) return lhs == rhs && lhs == ieee754::traits::inf; } -NBL_CONSTEXPR_INLINE_FUNC bool areBothZero(uint64_t lhs, uint64_t rhs) +NBL_CONSTEXPR_FUNC bool areBothZero(uint64_t lhs, uint64_t rhs) { return !bool((lhs | rhs) << 1); } -NBL_CONSTEXPR_INLINE_FUNC bool areBothSameSignZero(uint64_t lhs, uint64_t rhs) +NBL_CONSTEXPR_FUNC bool areBothSameSignZero(uint64_t lhs, uint64_t rhs) { return !bool((lhs) << 1) && (lhs == rhs); } template -NBL_CONSTEXPR_INLINE_FUNC bool operatorLessAndGreaterCommonImplementation(uint64_t lhs, uint64_t rhs) +NBL_CONSTEXPR_FUNC bool operatorLessAndGreaterCommonImplementation(uint64_t lhs, uint64_t rhs) { if (!FastMath) { diff --git a/include/nbl/builtin/hlsl/emulated/int64_common_member_inc.hlsl b/include/nbl/builtin/hlsl/emulated/int64_common_member_inc.hlsl new file mode 100644 index 0000000000..3818814a49 --- /dev/null +++ b/include/nbl/builtin/hlsl/emulated/int64_common_member_inc.hlsl @@ -0,0 +1,155 @@ +using storage_t = vector; +storage_t data; + +/** +* @brief Creates an `emulated_int64` from a vector of two `uint32_t`s representing its bitpattern +* +* @param [in] _data Vector of `uint32_t` encoding the `uint64_t/int64_t` being emulated. Stored as little endian (first component are the lower 32 bits) +*/ +NBL_CONSTEXPR_STATIC this_t create(NBL_CONST_REF_ARG(storage_t) _data) +{ + this_t retVal; + retVal.data = _data; + return retVal; +} + +/** +* @brief Creates an `emulated_int64` from two `uint32_t`s representing its bitpattern +* +* @param [in] lo Lowest 32 bits of the `uint64_t/int64_t` being emulated +* @param [in] hi Highest 32 bits of the `uint64_t/int64_t` being emulated +*/ +NBL_CONSTEXPR_STATIC this_t create(NBL_CONST_REF_ARG(uint32_t) lo, NBL_CONST_REF_ARG(uint32_t) hi) +{ + return create(storage_t(lo, hi)); +} + +// ------------------------------------------------------- CONVERSION OPERATORS--------------------------------------------------------------- +// GLM requires these for vector casts + +#ifndef __HLSL_VERSION + +template +constexpr explicit operator I() const noexcept; + +#endif + +// ------------------------------------------------------- INTERNAL GETTERS ------------------------------------------------- + +NBL_CONSTEXPR_FUNC uint32_t __getLSB() NBL_CONST_MEMBER_FUNC +{ + return data.x; +} + +NBL_CONSTEXPR_FUNC uint32_t __getMSB() NBL_CONST_MEMBER_FUNC +{ + return data.y; +} + +// ------------------------------------------------------- BITWISE OPERATORS ------------------------------------------------- + +NBL_CONSTEXPR_FUNC this_t operator&(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC +{ + this_t retVal = create(data & rhs.data); + return retVal; +} + +NBL_CONSTEXPR_FUNC this_t operator|(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC +{ + this_t retVal = create(data | rhs.data); + return retVal; +} + +NBL_CONSTEXPR_FUNC this_t operator^(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC +{ + this_t retVal = create(data ^ rhs.data); + return retVal; +} + +NBL_CONSTEXPR_FUNC this_t operator~() NBL_CONST_MEMBER_FUNC +{ + this_t retVal = create(~data); + return retVal; +} + +// Only valid in CPP +#ifndef __HLSL_VERSION +constexpr inline this_t operator>>(uint32_t bits) const; + +constexpr inline this_t operator<<(uint32_t bits) const; + +constexpr inline this_t& operator&=(const this_t& val) +{ + data &= val.data; + return *this; +} + +constexpr inline this_t& operator|=(const this_t& val) +{ + data |= val.data; + return *this; +} + +constexpr inline this_t& operator^=(const this_t& val) +{ + data ^= val.data; + return *this; +} + +#endif + +// ------------------------------------------------------- ARITHMETIC OPERATORS ------------------------------------------------- + +NBL_CONSTEXPR_FUNC this_t operator+(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC +{ + const spirv::AddCarryOutput lowerAddResult = addCarry(__getLSB(), rhs.__getLSB()); + return create(lowerAddResult.result, __getMSB() + rhs.__getMSB() + lowerAddResult.carry); +} + +NBL_CONSTEXPR_FUNC this_t operator-(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC +{ + const spirv::SubBorrowOutput lowerSubResult = subBorrow(__getLSB(), rhs.__getLSB()); + return create(lowerSubResult.result, __getMSB() - rhs.__getMSB() - lowerSubResult.borrow); +} + +// ------------------------------------------------------- COMPARISON OPERATORS ------------------------------------------------- +NBL_CONSTEXPR_FUNC bool operator==(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC +{ + equal_to equals; + return all(equals(data, rhs.data)); +} + +NBL_CONSTEXPR_FUNC bool operator!=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC +{ + not_equal_to notEquals; + return any(notEquals(data, rhs.data)); +} + +NBL_CONSTEXPR_FUNC bool operator<(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC +{ + // Either the topmost bits, when interpreted with correct sign, are less than those of `rhs`, or they're equal and the lower bits are less + // (lower bits are always positive in both unsigned and 2's complement so comparison can happen as-is) + const bool MSBEqual = __getMSB() == rhs.__getMSB(); + const bool MSB = Signed ? (bit_cast(__getMSB()) < bit_cast(rhs.__getMSB())) : (__getMSB() < rhs.__getMSB()); + const bool LSB = __getLSB() < rhs.__getLSB(); + return MSBEqual ? LSB : MSB; +} + +NBL_CONSTEXPR_FUNC bool operator>(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC +{ + // Same reasoning as above + const bool MSBEqual = __getMSB() == rhs.__getMSB(); + const bool MSB = Signed ? (bit_cast(__getMSB()) > bit_cast(rhs.__getMSB())) : (__getMSB() > rhs.__getMSB()); + const bool LSB = __getLSB() > rhs.__getLSB(); + return MSBEqual ? LSB : MSB; +} + +NBL_CONSTEXPR_FUNC bool operator<=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC +{ + return !operator>(rhs); +} + +NBL_CONSTEXPR_FUNC bool operator>=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC +{ + return !operator<(rhs); +} diff --git a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl new file mode 100644 index 0000000000..30c23d8693 --- /dev/null +++ b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl @@ -0,0 +1,411 @@ +#ifndef _NBL_BUILTIN_HLSL_EMULATED_INT64_T_HLSL_INCLUDED_ +#define _NBL_BUILTIN_HLSL_EMULATED_INT64_T_HLSL_INCLUDED_ + +#include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include "nbl/builtin/hlsl/functional.hlsl" +#include "nbl/builtin/hlsl/concepts/core.hlsl" +#include "nbl/builtin/hlsl/bit.hlsl" + +// Didn't bother with operator*, operator/, implement if you need them. Multiplication is pretty straightforward, division requires switching on signs +// and whether the topmost bits of the divisor are equal to 0 +// - Francisco + +namespace nbl +{ +namespace hlsl +{ + +struct emulated_int64_t; + +struct emulated_uint64_t +{ + using this_t = emulated_uint64_t; + NBL_CONSTEXPR_STATIC_INLINE bool Signed = false; + + #include "int64_common_member_inc.hlsl" + + #ifndef __HLSL_VERSION + emulated_uint64_t() = default; + // GLM requires these to cast vectors because it uses a native `static_cast` + template + constexpr explicit emulated_uint64_t(const I& toEmulate); + + constexpr explicit emulated_uint64_t(const emulated_int64_t& other); + #endif +}; + + +struct emulated_int64_t +{ + using this_t = emulated_int64_t; + NBL_CONSTEXPR_STATIC_INLINE bool Signed = true; + + #include "int64_common_member_inc.hlsl" + + #ifndef __HLSL_VERSION + emulated_int64_t() = default; + // GLM requires these to cast vectors because it uses a native `static_cast` + template + constexpr explicit emulated_int64_t(const I& toEmulate); + + constexpr explicit emulated_int64_t(const emulated_uint64_t& other); + #endif + + NBL_CONSTEXPR_FUNC emulated_int64_t operator-() NBL_CONST_MEMBER_FUNC + { + storage_t inverted = ~data; + return create(_static_cast(inverted)) + _static_cast(1); + } + +}; + +// ------------------------------------------------ TYPE TRAITS SATISFIED ----------------------------------------------------- + +template<> +struct is_signed : bool_constant {}; + +template<> +struct is_unsigned : bool_constant {}; + +// --------------------------------------------------- CONCEPTS SATISFIED ----------------------------------------------------- +namespace concepts +{ + +template +NBL_BOOL_CONCEPT EmulatedIntegralScalar64 = same_as || same_as; + +namespace impl +{ + +template<> +struct is_emulating_integral_scalar +{ + NBL_CONSTEXPR_STATIC_INLINE bool value = true; +}; + +template<> +struct is_emulating_integral_scalar +{ + NBL_CONSTEXPR_STATIC_INLINE bool value = true; +}; +} + + +} + + +namespace impl +{ + +template NBL_PARTIAL_REQ_TOP(concepts::EmulatedIntegralScalar64 && concepts::EmulatedIntegralScalar64 && !concepts::same_as) +struct static_cast_helper && concepts::EmulatedIntegralScalar64 && !concepts::same_as) > +{ + + NBL_CONSTEXPR_STATIC To cast(NBL_CONST_REF_ARG(From) other) + { + To retVal; + retVal.data = other.data; + return retVal; + } +}; + +template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(To) <= sizeof(uint32_t)) && concepts::EmulatedIntegralScalar64) +struct static_cast_helper && (sizeof(To) <= sizeof(uint32_t)) && concepts::EmulatedIntegralScalar64) > +{ + // Return only the lowest bits + NBL_CONSTEXPR_STATIC To cast(NBL_CONST_REF_ARG(From) val) + { + return _static_cast(val.data.x); + } +}; + +template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(To) > sizeof(uint32_t)) && concepts::EmulatedIntegralScalar64) +struct static_cast_helper && (sizeof(To) > sizeof(uint32_t)) && concepts::EmulatedIntegralScalar64) > +{ + NBL_CONSTEXPR_STATIC To cast(NBL_CONST_REF_ARG(From) val) + { + return bit_cast(val.data); + } +}; + +template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(From) <= sizeof(uint32_t)) && concepts::EmulatedIntegralScalar64) +struct static_cast_helper && (sizeof(From) <= sizeof(uint32_t)) && concepts::EmulatedIntegralScalar64) > +{ + // Set only lower bits + NBL_CONSTEXPR_STATIC To cast(NBL_CONST_REF_ARG(From) i) + { + return To::create(_static_cast(i), uint32_t(0)); + } +}; + +template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(From) > sizeof(uint32_t)) && concepts::EmulatedIntegralScalar64) +struct static_cast_helper && (sizeof(From) > sizeof(uint32_t)) && concepts::EmulatedIntegralScalar64) > +{ + NBL_CONSTEXPR_STATIC To cast(NBL_CONST_REF_ARG(From) i) + { + // `bit_cast` blocked by GLM vectors using a union + #ifndef __HLSL_VERSION + return To::create(_static_cast(i), _static_cast(i >> 32)); + #else + To retVal; + retVal.data = bit_cast >(i); + return retVal; + #endif + } +}; + +} //namespace impl + +// Define constructor and conversion operators + +#ifndef __HLSL_VERSION + +constexpr emulated_int64_t::emulated_int64_t(const emulated_uint64_t& other) : data(other.data) {} + +constexpr emulated_uint64_t::emulated_uint64_t(const emulated_int64_t& other) : data(other.data) {} + +template +constexpr emulated_int64_t::emulated_int64_t(const I& toEmulate) +{ + *this = _static_cast(toEmulate); +} + +template +constexpr emulated_uint64_t::emulated_uint64_t(const I& toEmulate) +{ + *this = _static_cast(toEmulate); +} + +template +constexpr emulated_int64_t::operator I() const noexcept +{ + return _static_cast(*this); +} + +template +constexpr emulated_uint64_t::operator I() const noexcept +{ + return _static_cast(*this); +} + +#endif + +// ---------------------- Functional operators ------------------------ + +template NBL_PARTIAL_REQ_TOP(concepts::EmulatedIntegralScalar64) +struct left_shift_operator) > +{ + using type_t = T; + NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); + + // Can't do generic templated definition, see: + //https://github.com/microsoft/DirectXShaderCompiler/issues/7325 + + // If `_bits > 63` or `_bits < 0` the result is undefined + NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint32_t bits) + { + const bool bigShift = bits >= ComponentBitWidth; // Shift that completely rewrites LSB + const uint32_t shift = bigShift ? bits - ComponentBitWidth : ComponentBitWidth - bits; + const type_t shifted = type_t::create(bigShift ? vector(0, operand.__getLSB() << shift) + : vector(operand.__getLSB() << bits, (operand.__getMSB() << bits) | (operand.__getLSB() >> shift))); + ternary_operator ternary; + return ternary(bool(bits), shifted, operand); + } + + // If `_bits > 63` or `_bits < 0` the result is undefined + NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t bits) + { + return operator()(operand, _static_cast(bits)); + } +}; + +template<> +struct arithmetic_right_shift_operator +{ + using type_t = emulated_uint64_t; + NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); + + // Can't do generic templated definition, see: + //https://github.com/microsoft/DirectXShaderCompiler/issues/7325 + + // If `_bits > 63` the result is undefined + NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint32_t bits) + { + const bool bigShift = bits >= ComponentBitWidth; // Shift that completely rewrites MSB + const uint32_t shift = bigShift ? bits - ComponentBitWidth : ComponentBitWidth - bits; + const type_t shifted = type_t::create(bigShift ? vector(operand.__getMSB() >> shift, 0) + : vector((operand.__getMSB() << shift) | (operand.__getLSB() >> bits), operand.__getMSB() >> bits)); + ternary_operator ternary; + return ternary(bool(bits), shifted, operand); + } + + // If `_bits > 63` the result is undefined + NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t bits) + { + return operator()(operand, _static_cast(bits)); + } +}; + +template<> +struct arithmetic_right_shift_operator +{ + using type_t = emulated_int64_t; + NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); + + // Can't do generic templated definition, see: + //https://github.com/microsoft/DirectXShaderCompiler/issues/7325 + + // If `_bits > 63` or `_bits < 0` the result is undefined + NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint32_t bits) + { + const bool bigShift = bits >= ComponentBitWidth; // Shift that completely rewrites MSB + const uint32_t shift = bigShift ? bits - ComponentBitWidth : ComponentBitWidth - bits; + const type_t shifted = type_t::create(bigShift ? vector(uint32_t(int32_t(operand.__getMSB()) >> shift), int32_t(operand.__getMSB()) < 0 ? ~uint32_t(0) : uint32_t(0)) + : vector((operand.__getMSB() << shift) | (operand.__getLSB() >> bits), uint32_t(int32_t(operand.__getMSB()) >> bits))); + ternary_operator ternary; + return ternary(bool(bits), shifted, operand); + } + + // If `_bits > 63` or `_bits < 0` the result is undefined + NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t bits) + { + return operator()(operand, _static_cast(bits)); + } +}; + +#ifndef __HLSL_VERSION + +constexpr inline emulated_int64_t emulated_int64_t::operator<<(uint32_t bits) const +{ + left_shift_operator leftShift; + return leftShift(*this, bits); +} + +constexpr inline emulated_uint64_t emulated_uint64_t::operator<<(uint32_t bits) const +{ + left_shift_operator leftShift; + return leftShift(*this, bits); +} + +constexpr inline emulated_uint64_t emulated_uint64_t::operator>>(uint32_t bits) const +{ + arithmetic_right_shift_operator rightShift; + return rightShift(*this, bits); +} + +constexpr inline emulated_int64_t emulated_int64_t::operator>>(uint32_t bits) const +{ + arithmetic_right_shift_operator rightShift; + return rightShift(*this, bits); +} + +#endif + + +// ---------------------- STD arithmetic operators ------------------------ +// Specializations of the structs found in functional.hlsl +// These all have to be specialized because of the identity that can't be initialized inside the struct definition + +template NBL_PARTIAL_REQ_TOP(concepts::EmulatedIntegralScalar64) +struct plus) > +{ + using type_t = T; + + type_t operator()(NBL_CONST_REF_ARG(type_t) lhs, NBL_CONST_REF_ARG(type_t) rhs) + { + return lhs + rhs; + } + + const static type_t identity; +}; + +template NBL_PARTIAL_REQ_TOP(concepts::EmulatedIntegralScalar64) +struct minus) > +{ + using type_t = T; + + type_t operator()(NBL_CONST_REF_ARG(type_t) lhs, NBL_CONST_REF_ARG(type_t) rhs) + { + return lhs - rhs; + } + + const static type_t identity; +}; + +template<> +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR emulated_uint64_t plus::identity = _static_cast(uint64_t(0)); +template<> +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR emulated_int64_t plus::identity = _static_cast(int64_t(0)); +template<> +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR emulated_uint64_t minus::identity = _static_cast(uint64_t(0)); +template<> +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR emulated_int64_t minus::identity = _static_cast(int64_t(0)); + +// --------------------------------- Compound assignment operators ------------------------------------------ +// Specializations of the structs found in functional.hlsl + +template NBL_PARTIAL_REQ_TOP(concepts::EmulatedIntegralScalar64) +struct plus_assign) > +{ + using type_t = T; + using base_t = plus; + base_t baseOp; + void operator()(NBL_REF_ARG(type_t) lhs, NBL_CONST_REF_ARG(type_t) rhs) + { + lhs = baseOp(lhs, rhs); + } + + const static type_t identity; +}; + +template NBL_PARTIAL_REQ_TOP(concepts::EmulatedIntegralScalar64) +struct minus_assign) > +{ + using type_t = T; + using base_t = minus; + base_t baseOp; + void operator()(NBL_REF_ARG(type_t) lhs, NBL_CONST_REF_ARG(type_t) rhs) + { + lhs = baseOp(lhs, rhs); + } + + const static type_t identity; +}; + +template<> +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR emulated_uint64_t plus_assign::identity = plus::identity; +template<> +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR emulated_int64_t plus_assign::identity = plus::identity; +template<> +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR emulated_uint64_t minus_assign::identity = minus::identity; +template<> +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR emulated_int64_t minus_assign::identity = minus::identity; + +} //namespace nbl +} //namespace hlsl + +// Declare them as signed/unsigned versions of each other + +#ifndef __HLSL_VERSION +#define NBL_ADD_STD std:: +#else +#define NBL_ADD_STD nbl::hlsl:: +#endif + +template<> +struct NBL_ADD_STD make_unsigned : type_identity {}; + +template<> +struct NBL_ADD_STD make_unsigned : type_identity {}; + +template<> +struct NBL_ADD_STD make_signed : type_identity {}; + +template<> +struct NBL_ADD_STD make_signed : type_identity {}; + +#undef NBL_ADD_STD + + + +#endif diff --git a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl index 0053008aa4..4eb8b7bf06 100644 --- a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl @@ -2,6 +2,7 @@ #define _NBL_BUILTIN_HLSL_EMULATED_VECTOR_T_HLSL_INCLUDED_ #include +#include #include #include #include @@ -23,7 +24,7 @@ struct _2_component_vec static_assert(sizeof(T) <= 8); - NBL_CONSTEXPR_INLINE_FUNC void setComponent(uint32_t componentIdx, T val) + NBL_CONSTEXPR_FUNC void setComponent(uint32_t componentIdx, T val) { if (componentIdx == 0) x = val; @@ -31,7 +32,7 @@ struct _2_component_vec y = val; } - NBL_CONSTEXPR_INLINE_FUNC T getComponent(uint32_t componentIdx) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC T getComponent(uint32_t componentIdx) NBL_CONST_MEMBER_FUNC { if (componentIdx == 0) return x; @@ -39,9 +40,7 @@ struct _2_component_vec return y; // TODO: avoid code duplication, make it constexpr - using TAsUint = typename unsigned_integer_of_size::type; - TAsUint invalidComponentValue = nbl::hlsl::_static_cast(0xdeadbeefbadcaffeull); - return nbl::hlsl::bit_cast(invalidComponentValue); + return nbl::hlsl::undef(); } NBL_CONSTEXPR_STATIC uint32_t Dimension = 2; @@ -55,7 +54,7 @@ struct _3_component_vec T z; - NBL_CONSTEXPR_INLINE_FUNC void setComponent(uint32_t componentIdx, T val) + NBL_CONSTEXPR_FUNC void setComponent(uint32_t componentIdx, T val) { if (componentIdx == 0) x = val; @@ -65,7 +64,7 @@ struct _3_component_vec z = val; } - NBL_CONSTEXPR_INLINE_FUNC T getComponent(uint32_t componentIdx) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC T getComponent(uint32_t componentIdx) NBL_CONST_MEMBER_FUNC { if (componentIdx == 0) return x; @@ -75,9 +74,7 @@ struct _3_component_vec return z; // TODO: avoid code duplication, make it constexpr - using TAsUint = typename unsigned_integer_of_size::type; - TAsUint invalidComponentValue = nbl::hlsl::_static_cast(0xdeadbeefbadcaffeull >> (64 - sizeof(T) * 8)); - return nbl::hlsl::bit_cast(invalidComponentValue); + return nbl::hlsl::undef(); } NBL_CONSTEXPR_STATIC uint32_t Dimension = 3; @@ -91,7 +88,7 @@ struct _4_component_vec T z; T w; - NBL_CONSTEXPR_INLINE_FUNC void setComponent(uint32_t componentIdx, T val) + NBL_CONSTEXPR_FUNC void setComponent(uint32_t componentIdx, T val) { if (componentIdx == 0) x = val; @@ -103,7 +100,7 @@ struct _4_component_vec w = val; } - NBL_CONSTEXPR_INLINE_FUNC T getComponent(uint32_t componentIdx) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC T getComponent(uint32_t componentIdx) NBL_CONST_MEMBER_FUNC { if (componentIdx == 0) return x; @@ -115,133 +112,207 @@ struct _4_component_vec return w; // TODO: avoid code duplication, make it constexpr - using TAsUint = typename unsigned_integer_of_size::type; - uint64_t invalidComponentValue = nbl::hlsl::_static_cast(0xdeadbeefbadcaffeull >> (64 - sizeof(T) * 8)); - return nbl::hlsl::bit_cast(invalidComponentValue); + return nbl::hlsl::undef(); } NBL_CONSTEXPR_STATIC uint32_t Dimension = 4; }; -template ::value> -struct emulated_vector : CRTP -{ - using this_t = emulated_vector; - using component_t = ComponentType; - - NBL_CONSTEXPR_STATIC_INLINE this_t create(this_t other) - { - CRTP output; - - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, other.getComponent(i)); - } - NBL_CONSTEXPR_STATIC_INLINE this_t create(vector other) - { - this_t output; - - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, other[i]); - - return output; - } - - NBL_CONSTEXPR_INLINE_FUNC this_t operator+(component_t val) - { - this_t output; - - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, this_t::getComponent(i) + val); +template +struct emulated_vector; - return output; - } - NBL_CONSTEXPR_INLINE_FUNC this_t operator+(this_t other) - { - this_t output; - - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, this_t::getComponent(i) + other.getComponent(i)); - - return output; - } - NBL_CONSTEXPR_INLINE_FUNC this_t operator+(vector other) - { - this_t output; - - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, this_t::getComponent(i) + other[i]); - - return output; - } - - NBL_CONSTEXPR_INLINE_FUNC this_t operator-(component_t val) - { - this_t output; +// Generic ComponentType vectors still have to be partial specialized based on whether they're fundamental and/or integral - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, CRTP::getComponent(i) - val); - - return output; - } - NBL_CONSTEXPR_INLINE_FUNC this_t operator-(this_t other) - { - this_t output; - - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, CRTP::getComponent(i) - other.getComponent(i)); +#define NBL_EMULATED_VECTOR_UNARY_OPERATOR(OP)\ +NBL_CONSTEXPR_FUNC this_t operator OP() NBL_CONST_MEMBER_FUNC \ +{\ + this_t output;\ + [[unroll]]\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output.setComponent(i, this_t::getComponent(i).operator OP());\ + return output;\ +} - return output; - } - NBL_CONSTEXPR_INLINE_FUNC this_t operator-(vector other) - { - this_t output; +#define NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(OP)\ +NBL_CONSTEXPR_FUNC this_t operator OP (component_t val) NBL_CONST_MEMBER_FUNC \ +{\ + this_t output;\ + [[unroll]]\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output.setComponent(i, this_t::getComponent(i) OP val);\ + return output;\ +}\ +NBL_CONSTEXPR_FUNC this_t operator OP (this_t other) NBL_CONST_MEMBER_FUNC \ +{\ + this_t output;\ + [[unroll]]\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output.setComponent(i, this_t::getComponent(i) OP other.getComponent(i));\ + return output;\ +} - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, CRTP::getComponent(i) - other[i]); +#define NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(OP) NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(OP)\ +NBL_CONSTEXPR_FUNC this_t operator OP(vector other) NBL_CONST_MEMBER_FUNC \ +{\ + this_t output;\ + [[unroll]]\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output.setComponent(i, this_t::getComponent(i) OP other[i]);\ + return output;\ +} - return output; - } +#define NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(OP) NBL_CONSTEXPR_FUNC vector operator OP (this_t other) NBL_CONST_MEMBER_FUNC \ +{\ + vector output;\ + [[unroll]]\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output[i] = CRTP::getComponent(i) OP other.getComponent(i);\ + return output;\ +} - NBL_CONSTEXPR_INLINE_FUNC this_t operator*(component_t val) - { - this_t output; +#define NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(OP) NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(OP)\ +NBL_CONSTEXPR_FUNC vector operator OP (vector other) NBL_CONST_MEMBER_FUNC \ +{\ + vector output;\ + [[unroll]]\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output[i] = CRTP::getComponent(i) OP other[i];\ + return output;\ +} - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, CRTP::getComponent(i) * val); +#define NBL_EMULATED_VECTOR_CREATION_AND_COMPONENT_SUM \ +using this_t = emulated_vector;\ +using component_t = ComponentType;\ +NBL_CONSTEXPR_STATIC this_t create(this_t other)\ +{\ + CRTP output;\ + [[unroll]]\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output.setComponent(i, other.getComponent(i));\ +}\ +NBL_CONSTEXPR_FUNC component_t calcComponentSum() NBL_CONST_MEMBER_FUNC \ +{\ + component_t sum = CRTP::getComponent(0);\ + [[unroll]]\ + for (uint32_t i = 1u; i < CRTP::Dimension; ++i)\ + sum = sum + CRTP::getComponent(i);\ + return sum;\ +} - return output; - } - NBL_CONSTEXPR_INLINE_FUNC this_t operator*(this_t other) - { - this_t output; +#define NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_CREATION_AND_COMPONENT_SUM NBL_EMULATED_VECTOR_CREATION_AND_COMPONENT_SUM \ +NBL_CONSTEXPR_STATIC this_t create(vector other)\ +{\ + this_t output;\ + [[unroll]]\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output.setComponent(i, other[i]);\ + return output;\ +} - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, CRTP::getComponent(i) * other.getComponent(i)); +// Fundamental, integral +template NBL_PARTIAL_REQ_TOP(is_fundamental_v && concepts::IntegralLikeScalar) +struct emulated_vector&& concepts::IntegralLikeScalar) > : CRTP +{ + // Creation for fundamental type + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_CREATION_AND_COMPONENT_SUM + // Operators, including integral + NBL_EMULATED_VECTOR_UNARY_OPERATOR(~) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(&) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(|) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(^) + NBL_EMULATED_VECTOR_UNARY_OPERATOR(-) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(+) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(-) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(*) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(/) + // Comparison operators + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(==) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(!=) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(<) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(<=) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(>) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(>=) +}; - return output; - } - NBL_CONSTEXPR_INLINE_FUNC this_t operator*(vector other) - { - this_t output; +// Fundamental, not integral +template NBL_PARTIAL_REQ_TOP(is_fundamental_v && !concepts::IntegralLikeScalar) +struct emulated_vector && !concepts::IntegralLikeScalar) > : CRTP +{ + // Creation for fundamental type + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_CREATION_AND_COMPONENT_SUM + // Operators + NBL_EMULATED_VECTOR_UNARY_OPERATOR(-) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(+) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(-) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(*) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(/) + // Comparison operators + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(==) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(!=) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(<) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(<=) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(>) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(>=) +}; - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, CRTP::getComponent(i) * other[i]); +// Not fundamental, integral +template NBL_PARTIAL_REQ_TOP(!is_fundamental_v && concepts::IntegralLikeScalar) +struct emulated_vector && concepts::IntegralLikeScalar) > : CRTP +{ + // Creation + NBL_EMULATED_VECTOR_CREATION_AND_COMPONENT_SUM + // Operators, including integral + NBL_EMULATED_VECTOR_UNARY_OPERATOR(~) + NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(&) + NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(|) + NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(^) + NBL_EMULATED_VECTOR_UNARY_OPERATOR(-) + NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(+) + NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(-) + NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(*) + NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(/) + // Comparison operators + NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(==) + NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(!=) + NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(<) + NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(<=) + NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(>) + NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(>=) +}; - return output; - } +// Not fundamental, not integral +template NBL_PARTIAL_REQ_TOP(!is_fundamental_v && !concepts::IntegralLikeScalar) +struct emulated_vector && !concepts::IntegralLikeScalar) > : CRTP +{ + // Creation + NBL_EMULATED_VECTOR_CREATION_AND_COMPONENT_SUM + // Operators + NBL_EMULATED_VECTOR_UNARY_OPERATOR(-) + NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(+) + NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(-) + NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(*) + NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(/) + // Comparison operators + NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(==) + NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(!=) + NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(<) + NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(<=) + NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(>) + NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(>=) +}; - NBL_CONSTEXPR_INLINE_FUNC component_t calcComponentSum() - { - component_t sum = 0; - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - sum = sum + CRTP::getComponent(i); +#undef NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_CREATION_AND_COMPONENT_SUM +#undef NBL_EMULATED_VECTOR_CREATION_AND_COMPONENT_SUM +#undef NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR +#undef NBL_EMULATED_VECTOR_COMPARISON_OPERATOR +#undef NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR +#undef NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR +#undef NBL_EMULATED_VECTOR_UNARY_OPERATOR - return sum; - } -}; +// ----------------------------------------------------- EMULATED FLOAT SPECIALIZATION -------------------------------------------------------------------- #define DEFINE_OPERATORS_FOR_TYPE(...)\ -NBL_CONSTEXPR_INLINE_FUNC this_t operator+(__VA_ARGS__ val)\ +NBL_CONSTEXPR_FUNC this_t operator+(__VA_ARGS__ val) NBL_CONST_MEMBER_FUNC \ {\ this_t output;\ for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ @@ -250,7 +321,7 @@ NBL_CONSTEXPR_INLINE_FUNC this_t operator+(__VA_ARGS__ val)\ return output;\ }\ \ -NBL_CONSTEXPR_INLINE_FUNC this_t operator-(__VA_ARGS__ val)\ +NBL_CONSTEXPR_FUNC this_t operator-(__VA_ARGS__ val) NBL_CONST_MEMBER_FUNC \ {\ this_t output;\ for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ @@ -259,7 +330,7 @@ NBL_CONSTEXPR_INLINE_FUNC this_t operator-(__VA_ARGS__ val)\ return output;\ }\ \ -NBL_CONSTEXPR_INLINE_FUNC this_t operator*(__VA_ARGS__ val)\ +NBL_CONSTEXPR_FUNC this_t operator*(__VA_ARGS__ val) NBL_CONST_MEMBER_FUNC \ {\ this_t output;\ for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ @@ -269,14 +340,14 @@ NBL_CONSTEXPR_INLINE_FUNC this_t operator*(__VA_ARGS__ val)\ }\ \ -// TODO: some of code duplication could be avoided -template -struct emulated_vector : CRTP + +template +struct emulated_vector, CRTP> : CRTP { - using component_t = ComponentType; - using this_t = emulated_vector; + using component_t = emulated_float64_t; + using this_t = emulated_vector; - NBL_CONSTEXPR_STATIC_INLINE this_t create(this_t other) + NBL_CONSTEXPR_STATIC this_t create(this_t other) { this_t output; @@ -287,17 +358,17 @@ struct emulated_vector : CRTP } template - NBL_CONSTEXPR_STATIC_INLINE this_t create(vector other) + NBL_CONSTEXPR_STATIC this_t create(vector other) { this_t output; for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, ComponentType::create(other[i])); + output.setComponent(i, component_t::create(other[i])); return output; } - NBL_CONSTEXPR_INLINE_FUNC this_t operator+(this_t other) + NBL_CONSTEXPR_FUNC this_t operator+(this_t other) NBL_CONST_MEMBER_FUNC { this_t output; @@ -306,7 +377,7 @@ struct emulated_vector : CRTP return output; } - NBL_CONSTEXPR_INLINE_FUNC this_t operator-(this_t other) + NBL_CONSTEXPR_FUNC this_t operator-(this_t other) NBL_CONST_MEMBER_FUNC { this_t output; @@ -315,7 +386,7 @@ struct emulated_vector : CRTP return output; } - NBL_CONSTEXPR_INLINE_FUNC this_t operator*(this_t other) + NBL_CONSTEXPR_FUNC this_t operator*(this_t other) NBL_CONST_MEMBER_FUNC { this_t output; @@ -338,9 +409,9 @@ struct emulated_vector : CRTP DEFINE_OPERATORS_FOR_TYPE(int32_t) DEFINE_OPERATORS_FOR_TYPE(int64_t) - NBL_CONSTEXPR_INLINE_FUNC ComponentType calcComponentSum() + NBL_CONSTEXPR_FUNC component_t calcComponentSum() NBL_CONST_MEMBER_FUNC { - ComponentType sum = ComponentType::create(0); + component_t sum = component_t::create(0); for (uint32_t i = 0u; i < CRTP::Dimension; ++i) sum = sum + CRTP::getComponent(i); @@ -420,12 +491,21 @@ DEFINE_SCALAR_OF_SPECIALIZATION(3) DEFINE_SCALAR_OF_SPECIALIZATION(4) #undef DEFINE_SCALAR_OF_SPECIALIZATION +#define DEFINE_EXTENT_SPECIALIZATION(DIMENSION)\ +template\ +struct extent, I> : extent {}; + +DEFINE_EXTENT_SPECIALIZATION(2) +DEFINE_EXTENT_SPECIALIZATION(3) +DEFINE_EXTENT_SPECIALIZATION(4) +#undef DEFINE_EXTENT_SPECIALIZATION + namespace impl { template struct static_cast_helper, vector, void> { - static inline emulated_vector_t2 cast(vector vec) + NBL_CONSTEXPR_STATIC emulated_vector_t2 cast(NBL_CONST_REF_ARG(vector) vec) { emulated_vector_t2 output; output.x = _static_cast(vec.x); @@ -438,7 +518,7 @@ struct static_cast_helper, vector, void> template struct static_cast_helper, vector, void> { - static inline emulated_vector_t3 cast(vector vec) + NBL_CONSTEXPR_STATIC emulated_vector_t3 cast(NBL_CONST_REF_ARG(vector) vec) { emulated_vector_t3 output; output.x = _static_cast(vec.x); @@ -452,7 +532,7 @@ struct static_cast_helper, vector, void> template struct static_cast_helper, vector, void> { - static inline emulated_vector_t4 cast(vector vec) + NBL_CONSTEXPR_STATIC emulated_vector_t4 cast(NBL_CONST_REF_ARG(vector) vec) { emulated_vector_t4 output; output.x = _static_cast(vec.x); @@ -470,12 +550,13 @@ struct static_cast_helper, emulated_vector_t; using InputVecType = emulated_vector_t; - static inline OutputVecType cast(InputVecType vec) + NBL_CONSTEXPR_STATIC OutputVecType cast(NBL_CONST_REF_ARG(InputVecType) vec) { array_get getter; array_set setter; OutputVecType output; + [[unroll]] for (int i = 0; i < N; ++i) setter(output, i, _static_cast(getter(vec, i))); @@ -483,7 +564,77 @@ struct static_cast_helper, emulated_vector_t\ +struct static_cast_helper, emulated_vector_t##N , void>\ +{\ + using OutputVecType = emulated_vector_t##N ;\ + using InputVecType = emulated_vector_t##N ;\ + NBL_CONSTEXPR_STATIC OutputVecType cast(NBL_CONST_REF_ARG(InputVecType) vec)\ + {\ + array_get getter;\ + array_set setter;\ + OutputVecType output;\ + [[unroll]]\ + for (int i = 0; i < N; ++i)\ + setter(output, i, _static_cast(getter(vec, i)));\ + return output;\ + }\ +}; + +NBL_EMULATED_VEC_TO_EMULATED_VEC_STATIC_CAST(2) +NBL_EMULATED_VEC_TO_EMULATED_VEC_STATIC_CAST(3) +NBL_EMULATED_VEC_TO_EMULATED_VEC_STATIC_CAST(4) + +#undef NBL_EMULATED_VEC_TO_EMULATED_VEC_STATIC_CAST + +#define NBL_EMULATED_VEC_PROMOTION(N) template\ +struct Promote, ComponentType>\ +{\ + using VecType = emulated_vector_t##N ;\ + NBL_CONSTEXPR_FUNC VecType operator()(NBL_CONST_REF_ARG(ComponentType) v)\ + {\ + array_set setter;\ + VecType promoted;\ + [[unroll]]\ + for (int i = 0; i < N; ++i)\ + setter(promoted, i, v);\ + return promoted;\ + }\ +}; + +NBL_EMULATED_VEC_PROMOTION(2) +NBL_EMULATED_VEC_PROMOTION(3) +NBL_EMULATED_VEC_PROMOTION(4) + +#undef NBL_EMULATED_VEC_PROMOTION + +#define NBL_EMULATED_VEC_TRUNCATION(N, M) template\ +struct Truncate, emulated_vector_t##M >\ +{\ + using OutputVecType = emulated_vector_t##N ;\ + using InputVecType = emulated_vector_t##M ;\ + NBL_CONSTEXPR_FUNC OutputVecType operator()(NBL_CONST_REF_ARG(InputVecType) vec)\ + {\ + array_get getter;\ + array_set setter;\ + OutputVecType output;\ + [[unroll]]\ + for (int i = 0; i < N; ++i)\ + setter(output, i, getter(vec, i));\ + return output;\ + }\ +}; + +NBL_EMULATED_VEC_TRUNCATION(2, 2) +NBL_EMULATED_VEC_TRUNCATION(2, 3) +NBL_EMULATED_VEC_TRUNCATION(2, 4) +NBL_EMULATED_VEC_TRUNCATION(3, 3) +NBL_EMULATED_VEC_TRUNCATION(3, 4) +NBL_EMULATED_VEC_TRUNCATION(4, 4) + +#undef NBL_EMULATED_VEC_TRUNCATION + +} //namespace impl } } diff --git a/include/nbl/builtin/hlsl/functional.hlsl b/include/nbl/builtin/hlsl/functional.hlsl index 25d822a940..da416a538f 100644 --- a/include/nbl/builtin/hlsl/functional.hlsl +++ b/include/nbl/builtin/hlsl/functional.hlsl @@ -7,6 +7,7 @@ #include "nbl/builtin/hlsl/glsl_compat/core.hlsl" #include "nbl/builtin/hlsl/limits.hlsl" +#include "nbl/builtin/hlsl/concepts/vector.hlsl" namespace nbl @@ -79,7 +80,7 @@ struct reference_wrapper : enable_if_t< // TODO: partial specializations for T being a special SPIR-V type for image ops, etc. -#define ALIAS_STD(NAME,OP) template struct NAME { \ +#define ALIAS_STD(NAME,OP) template struct NAME { \ using type_t = T; \ \ T operator()(NBL_CONST_REF_ARG(T) lhs, NBL_CONST_REF_ARG(T) rhs) \ @@ -90,7 +91,6 @@ struct reference_wrapper : enable_if_t< #else // CPP - #define ALIAS_STD(NAME,OP) template struct NAME : std::NAME { \ using type_t = T; @@ -135,16 +135,56 @@ ALIAS_STD(divides,/) }; -ALIAS_STD(greater,>) }; -ALIAS_STD(less,<) }; -ALIAS_STD(greater_equal,>=) }; -ALIAS_STD(less_equal,<=) }; +ALIAS_STD(equal_to, ==) }; +ALIAS_STD(not_equal_to, !=) }; +ALIAS_STD(greater, >) }; +ALIAS_STD(less, <) }; +ALIAS_STD(greater_equal, >=) }; +ALIAS_STD(less_equal, <=) }; #undef ALIAS_STD -// ------------------------ Compound assignment operators ---------------------- +// The above comparison operators return bool on STD, but in HLSL they're supposed to yield bool vectors, so here's a specialization so that they return `vector` for vectorial types + +// GLM doesn't have operators on vectors +#ifndef __HLSL_VERSION + +#define NBL_COMPARISON_VECTORIAL_SPECIALIZATION(NAME, OP, GLM_OP) template requires (concepts::Vectorial)\ +struct NAME \ +{\ + using type_t = T;\ + vector::Dimension> operator()(const T& lhs, const T& rhs)\ + {\ + return glm::GLM_OP (lhs, rhs);\ + }\ +}; + +#else + +#define NBL_COMPARISON_VECTORIAL_SPECIALIZATION(NAME, OP, GLM_OP) template NBL_PARTIAL_REQ_TOP(concepts::Vectorial)\ +struct NAME ) >\ +{\ + using type_t = T;\ + vector::Dimension> operator()(NBL_CONST_REF_ARG(T) lhs, NBL_CONST_REF_ARG(T) rhs)\ + {\ + return lhs OP rhs;\ + }\ +}; + +#endif + +NBL_COMPARISON_VECTORIAL_SPECIALIZATION(equal_to, ==, equal) +NBL_COMPARISON_VECTORIAL_SPECIALIZATION(not_equal_to, !=, notEqual) +NBL_COMPARISON_VECTORIAL_SPECIALIZATION(greater, >, greaterThan) +NBL_COMPARISON_VECTORIAL_SPECIALIZATION(less, <, lessThan) +NBL_COMPARISON_VECTORIAL_SPECIALIZATION(greater_equal, >=, greaterThanEqual) +NBL_COMPARISON_VECTORIAL_SPECIALIZATION(less_equal, <=, lessThanEqual) + +#undef NBL_COMPARISON_VECTORIAL_SPECIALIZATION -#define COMPOUND_ASSIGN(NAME) template struct NAME##_assign { \ +// ------------------------------------------------------------- COMPOUND ASSIGNMENT OPERATORS -------------------------------------------------------------------- + +#define COMPOUND_ASSIGN(NAME) template struct NAME##_assign { \ using type_t = T; \ using base_t = NAME ; \ base_t baseOp; \ @@ -163,9 +203,9 @@ COMPOUND_ASSIGN(divides) #undef COMPOUND_ASSIGN -// ----------------- End of compound assignment ops ---------------- +// ---------------------------------------------------------------- MIN, MAX, TERNARY ------------------------------------------------------------------------- -// Min, Max and Ternary Operator don't use ALIAS_STD because they don't exist in STD +// Min, Max, and Ternary and Shift operators don't use ALIAS_STD because they don't exist in STD // TODO: implement as mix(rhs struct minimum @@ -195,18 +235,257 @@ struct maximum NBL_CONSTEXPR_STATIC_INLINE T identity = numeric_limits::lowest; // TODO: `all_components` }; -template +template struct ternary_operator { using type_t = T; - T operator()(bool condition, NBL_CONST_REF_ARG(T) lhs, NBL_CONST_REF_ARG(T) rhs) + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(bool) condition, NBL_CONST_REF_ARG(T) lhs, NBL_CONST_REF_ARG(T) rhs) { - return condition ? lhs : rhs; + return select(condition, lhs, rhs); } }; -} -} +// ----------------------------------------------------------------- SHIFT OPERATORS -------------------------------------------------------------------- + +template +struct left_shift_operator +{ + using type_t = T; + + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) + { + return operand << bits; + } +}; + +template NBL_PARTIAL_REQ_TOP(concepts::IntVector) +struct left_shift_operator) > +{ + using type_t = T; + using scalar_t = scalar_type_t; + + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) + { + return operand << bits; + } + + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(scalar_t) bits) + { + return operand << bits; + } +}; + +template NBL_PARTIAL_REQ_TOP(!concepts::IntVector && concepts::IntegralLikeVectorial) +struct left_shift_operator && concepts::IntegralLikeVectorial) > +{ + using type_t = T; + using scalar_t = typename vector_traits::scalar_type; + + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) + { + array_get getter; + array_set setter; + NBL_CONSTEXPR_FUNC_SCOPE_VAR uint16_t extent = uint16_t(extent_v); + left_shift_operator leftShift; + T shifted; + [[unroll]] + for (uint16_t i = 0; i < extent; i++) + { + setter(shifted, i, leftShift(getter(operand, i), getter(bits, i))); + } + return shifted; + } + + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(scalar_t) bits) + { + array_get getter; + array_set setter; + NBL_CONSTEXPR_FUNC_SCOPE_VAR uint16_t extent = uint16_t(extent_v); + left_shift_operator leftShift; + T shifted; + [[unroll]] + for (uint16_t i = 0; i < extent; i++) + { + setter(shifted, i, leftShift(getter(operand, i), bits)); + } + return shifted; + } + + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(vector::Dimension>) bits) + { + array_get getter; + array_set setter; + NBL_CONSTEXPR_FUNC_SCOPE_VAR uint16_t extent = uint16_t(extent_v); + left_shift_operator leftShift; + T shifted; + [[unroll]] + for (uint16_t i = 0; i < extent; i++) + { + setter(shifted, i, leftShift(getter(operand, i), bits[i])); + } + return shifted; + } + + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(uint16_t) bits) + { + array_get getter; + array_set setter; + NBL_CONSTEXPR_FUNC_SCOPE_VAR uint16_t extent = uint16_t(extent_v); + left_shift_operator leftShift; + T shifted; + [[unroll]] + for (uint16_t i = 0; i < extent; i++) + { + setter(shifted, i, leftShift(getter(operand, i), bits)); + } + return shifted; + } +}; + +template +struct arithmetic_right_shift_operator +{ + using type_t = T; + + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) + { + return operand >> bits; + } +}; + +template NBL_PARTIAL_REQ_TOP(concepts::IntVector) +struct arithmetic_right_shift_operator) > +{ + using type_t = T; + using scalar_t = scalar_type_t; + + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) + { + return operand >> bits; + } + + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(scalar_t) bits) + { + return operand >> bits; + } +}; + +template NBL_PARTIAL_REQ_TOP(!concepts::IntVector&& concepts::IntegralLikeVectorial) +struct arithmetic_right_shift_operator&& concepts::IntegralLikeVectorial) > +{ + using type_t = T; + using scalar_t = typename vector_traits::scalar_type; + + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) + { + array_get getter; + array_set setter; + NBL_CONSTEXPR_FUNC_SCOPE_VAR uint16_t extent = uint16_t(extent_v); + arithmetic_right_shift_operator rightShift; + T shifted; + [[unroll]] + for (uint16_t i = 0; i < extent; i++) + { + setter(shifted, i, rightShift(getter(operand, i), getter(bits, i))); + } + return shifted; + } + + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(scalar_t) bits) + { + array_get getter; + array_set setter; + NBL_CONSTEXPR_FUNC_SCOPE_VAR uint16_t extent = uint16_t(extent_v); + arithmetic_right_shift_operator rightShift; + T shifted; + [[unroll]] + for (uint16_t i = 0; i < extent; i++) + { + setter(shifted, i, rightShift(getter(operand, i), bits)); + } + return shifted; + } + + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(vector::Dimension>) bits) + { + array_get getter; + array_set setter; + NBL_CONSTEXPR_FUNC_SCOPE_VAR uint16_t extent = uint16_t(extent_v); + arithmetic_right_shift_operator rightShift; + T shifted; + [[unroll]] + for (uint16_t i = 0; i < extent; i++) + { + setter(shifted, i, rightShift(getter(operand, i), bits[i])); + } + return shifted; + } + + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(uint16_t) bits) + { + array_get getter; + array_set setter; + NBL_CONSTEXPR_FUNC_SCOPE_VAR uint16_t extent = uint16_t(extent_v); + arithmetic_right_shift_operator rightShift; + T shifted; + [[unroll]] + for (uint16_t i = 0; i < extent; i++) + { + setter(shifted, i, rightShift(getter(operand, i), bits)); + } + return shifted; + } +}; + +// Left unimplemented for vectorial types by default +template +struct logical_right_shift_operator +{ + using type_t = T; + using unsigned_type_t = make_unsigned_t; + + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) + { + arithmetic_right_shift_operator arithmeticRightShift; + return _static_cast(arithmeticRightShift(_static_cast(operand), _static_cast(bits))); + } +}; + +// ----------------------------------------------------------------- UNARY OPERATORS -------------------------------------------------------------------- +#ifndef __HLSL_VERSION +#define NBL_UNARY_OP_SPECIALIZATION(NAME, OP) template \ +struct NAME : std::NAME { \ + using type_t = T; \ +}; +#else +#define NBL_UNARY_OP_SPECIALIZATION(NAME, OP) template \ +struct NAME \ +{ \ + using type_t = T; \ + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand) \ + { \ + return operand.operator OP(); \ + } \ +}; \ +template NBL_PARTIAL_REQ_TOP(concepts::Scalar || concepts::Vector || concepts::Matrix ) \ +struct NAME || concepts::Vector || concepts::Matrix ) > \ +{ \ + using type_t = T; \ + NBL_CONSTEXPR_FUNC T operator()(const T operand) \ + { \ + return (OP operand); \ + } \ +}; +#endif + +NBL_UNARY_OP_SPECIALIZATION(bit_not, ~) +NBL_UNARY_OP_SPECIALIZATION(negate, -) + + + +#endif +} //namespace nbl +} //namespace hlsl #endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/ieee754.hlsl b/include/nbl/builtin/hlsl/ieee754.hlsl index 307a11101f..29c48a79d1 100644 --- a/include/nbl/builtin/hlsl/ieee754.hlsl +++ b/include/nbl/builtin/hlsl/ieee754.hlsl @@ -90,7 +90,7 @@ inline int extractExponent(T x) } template -NBL_CONSTEXPR_INLINE_FUNC T replaceBiasedExponent(T x, typename unsigned_integer_of_size::type biasedExp) +NBL_CONSTEXPR_FUNC T replaceBiasedExponent(T x, typename unsigned_integer_of_size::type biasedExp) { using AsFloat = typename float_of_size::type; return impl::castBackToFloatType(glsl::bitfieldInsert(ieee754::impl::bitCastToUintType(x), biasedExp, traits::mantissaBitCnt, traits::exponentBitCnt)); @@ -98,20 +98,20 @@ NBL_CONSTEXPR_INLINE_FUNC T replaceBiasedExponent(T x, typename unsigned_integer // performs no overflow tests, returns x*exp2(n) template -NBL_CONSTEXPR_INLINE_FUNC T fastMulExp2(T x, int n) +NBL_CONSTEXPR_FUNC T fastMulExp2(T x, int n) { return replaceBiasedExponent(x, extractBiasedExponent(x) + uint32_t(n)); } template -NBL_CONSTEXPR_INLINE_FUNC typename unsigned_integer_of_size::type extractMantissa(T x) +NBL_CONSTEXPR_FUNC typename unsigned_integer_of_size::type extractMantissa(T x) { using AsUint = typename unsigned_integer_of_size::type; return ieee754::impl::bitCastToUintType(x) & traits::type>::mantissaMask; } template -NBL_CONSTEXPR_INLINE_FUNC typename unsigned_integer_of_size::type extractNormalizeMantissa(T x) +NBL_CONSTEXPR_FUNC typename unsigned_integer_of_size::type extractNormalizeMantissa(T x) { using AsUint = typename unsigned_integer_of_size::type; using AsFloat = typename float_of_size::type; @@ -119,21 +119,21 @@ NBL_CONSTEXPR_INLINE_FUNC typename unsigned_integer_of_size::type ext } template -NBL_CONSTEXPR_INLINE_FUNC typename unsigned_integer_of_size::type extractSign(T x) +NBL_CONSTEXPR_FUNC typename unsigned_integer_of_size::type extractSign(T x) { using AsFloat = typename float_of_size::type; return (ieee754::impl::bitCastToUintType(x) & traits::signMask) >> ((sizeof(T) * 8) - 1); } template -NBL_CONSTEXPR_INLINE_FUNC typename unsigned_integer_of_size::type extractSignPreserveBitPattern(T x) +NBL_CONSTEXPR_FUNC typename unsigned_integer_of_size::type extractSignPreserveBitPattern(T x) { using AsFloat = typename float_of_size::type; return ieee754::impl::bitCastToUintType(x) & traits::signMask; } template ) -NBL_CONSTEXPR_INLINE_FUNC FloatingPoint copySign(FloatingPoint to, FloatingPoint from) +NBL_CONSTEXPR_FUNC FloatingPoint copySign(FloatingPoint to, FloatingPoint from) { using AsUint = typename unsigned_integer_of_size::type; @@ -240,13 +240,13 @@ struct flipSignIfRHSNegative_helper -NBL_CONSTEXPR_INLINE_FUNC T flipSign(T val, U flip) +NBL_CONSTEXPR_FUNC T flipSign(T val, U flip) { return impl::flipSign_helper::__call(val, flip); } template -NBL_CONSTEXPR_INLINE_FUNC T flipSignIfRHSNegative(T val, T flip) +NBL_CONSTEXPR_FUNC T flipSignIfRHSNegative(T val, T flip) { return impl::flipSignIfRHSNegative_helper::__call(val, flip); } diff --git a/include/nbl/builtin/hlsl/ieee754/impl.hlsl b/include/nbl/builtin/hlsl/ieee754/impl.hlsl index ad8a3f9228..69fba9795f 100644 --- a/include/nbl/builtin/hlsl/ieee754/impl.hlsl +++ b/include/nbl/builtin/hlsl/ieee754/impl.hlsl @@ -15,25 +15,25 @@ namespace ieee754 namespace impl { template -NBL_CONSTEXPR_INLINE_FUNC unsigned_integer_of_size_t bitCastToUintType(T x) +NBL_CONSTEXPR_FUNC unsigned_integer_of_size_t bitCastToUintType(T x) { using AsUint = unsigned_integer_of_size_t; return bit_cast(x); } // to avoid bit cast from uintN_t to uintN_t -template <> NBL_CONSTEXPR_INLINE_FUNC unsigned_integer_of_size_t<2> bitCastToUintType(uint16_t x) { return x; } -template <> NBL_CONSTEXPR_INLINE_FUNC unsigned_integer_of_size_t<4> bitCastToUintType(uint32_t x) { return x; } -template <> NBL_CONSTEXPR_INLINE_FUNC unsigned_integer_of_size_t<8> bitCastToUintType(uint64_t x) { return x; } +template <> NBL_CONSTEXPR_FUNC unsigned_integer_of_size_t<2> bitCastToUintType(uint16_t x) { return x; } +template <> NBL_CONSTEXPR_FUNC unsigned_integer_of_size_t<4> bitCastToUintType(uint32_t x) { return x; } +template <> NBL_CONSTEXPR_FUNC unsigned_integer_of_size_t<8> bitCastToUintType(uint64_t x) { return x; } template -NBL_CONSTEXPR_INLINE_FUNC T castBackToFloatType(T x) +NBL_CONSTEXPR_FUNC T castBackToFloatType(T x) { using AsFloat = typename float_of_size::type; return bit_cast(x); } -template<> NBL_CONSTEXPR_INLINE_FUNC uint16_t castBackToFloatType(uint16_t x) { return x; } -template<> NBL_CONSTEXPR_INLINE_FUNC uint32_t castBackToFloatType(uint32_t x) { return x; } -template<> NBL_CONSTEXPR_INLINE_FUNC uint64_t castBackToFloatType(uint64_t x) { return x; } +template<> NBL_CONSTEXPR_FUNC uint16_t castBackToFloatType(uint16_t x) { return x; } +template<> NBL_CONSTEXPR_FUNC uint32_t castBackToFloatType(uint32_t x) { return x; } +template<> NBL_CONSTEXPR_FUNC uint64_t castBackToFloatType(uint64_t x) { return x; } } } diff --git a/include/nbl/builtin/hlsl/math/functions.hlsl b/include/nbl/builtin/hlsl/math/functions.hlsl index 20442c467b..21f0e6ef2b 100644 --- a/include/nbl/builtin/hlsl/math/functions.hlsl +++ b/include/nbl/builtin/hlsl/math/functions.hlsl @@ -123,9 +123,9 @@ void frisvad(NBL_CONST_REF_ARG(T) normal, NBL_REF_ARG(T) tangent, NBL_REF_ARG(T) bool partitionRandVariable(float leftProb, NBL_REF_ARG(float) xi, NBL_REF_ARG(float) rcpChoiceProb) { #ifdef __HLSL_VERSION - NBL_CONSTEXPR float NEXT_ULP_AFTER_UNITY = asfloat(0x3f800001u); + NBL_CONSTEXPR_FUNC_SCOPE_VAR float NEXT_ULP_AFTER_UNITY = asfloat(0x3f800001u); #else - NBL_CONSTEXPR float32_t NEXT_ULP_AFTER_UNITY = bit_cast(0x3f800001u); + NBL_CONSTEXPR_FUNC_SCOPE_VAR float32_t NEXT_ULP_AFTER_UNITY = bit_cast(0x3f800001u); #endif const bool pickRight = xi >= leftProb * NEXT_ULP_AFTER_UNITY; diff --git a/include/nbl/builtin/hlsl/math/quadrature/gauss_legendre/impl.hlsl b/include/nbl/builtin/hlsl/math/quadrature/gauss_legendre/impl.hlsl index 3bcfbb2388..cd402d0cd4 100644 --- a/include/nbl/builtin/hlsl/math/quadrature/gauss_legendre/impl.hlsl +++ b/include/nbl/builtin/hlsl/math/quadrature/gauss_legendre/impl.hlsl @@ -14,25 +14,25 @@ namespace float_t_namespace { -NBL_CONSTEXPR float_t xi_2[2] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t xi_2[2] = { TYPED_NUMBER(-0.5773502691896257), TYPED_NUMBER(0.5773502691896257) }; -NBL_CONSTEXPR float_t xi_3[3] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t xi_3[3] = { TYPED_NUMBER(0.0), TYPED_NUMBER(-0.7745966692414833), TYPED_NUMBER(0.7745966692414833) }; -NBL_CONSTEXPR float_t xi_4[4] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t xi_4[4] = { TYPED_NUMBER(-0.3399810435848562), TYPED_NUMBER(0.3399810435848562), TYPED_NUMBER(-0.8611363115940525), TYPED_NUMBER(0.8611363115940525) }; -NBL_CONSTEXPR float_t xi_5[5] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t xi_5[5] = { TYPED_NUMBER(0.0), TYPED_NUMBER(-0.5384693101056830), TYPED_NUMBER(0.5384693101056830), @@ -40,7 +40,7 @@ NBL_CONSTEXPR float_t xi_5[5] = { TYPED_NUMBER(0.9061798459386639) }; -NBL_CONSTEXPR float_t xi_6[6] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t xi_6[6] = { TYPED_NUMBER(0.6612093864662645), TYPED_NUMBER(-0.6612093864662645), TYPED_NUMBER(-0.2386191860831969), @@ -49,7 +49,7 @@ NBL_CONSTEXPR float_t xi_6[6] = { TYPED_NUMBER(0.9324695142031520) }; -NBL_CONSTEXPR float_t xi_7[7] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t xi_7[7] = { TYPED_NUMBER(0.0), TYPED_NUMBER(0.4058451513773971), TYPED_NUMBER(-0.4058451513773971), @@ -59,7 +59,7 @@ NBL_CONSTEXPR float_t xi_7[7] = { TYPED_NUMBER(0.9491079123427585) }; -NBL_CONSTEXPR float_t xi_8[8] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t xi_8[8] = { TYPED_NUMBER(-0.1834346424956498), TYPED_NUMBER(0.1834346424956498), TYPED_NUMBER(-0.5255324099163289), @@ -70,7 +70,7 @@ NBL_CONSTEXPR float_t xi_8[8] = { TYPED_NUMBER(0.9602898564975362) }; -NBL_CONSTEXPR float_t xi_9[9] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t xi_9[9] = { TYPED_NUMBER(0.0), TYPED_NUMBER(-0.8360311073266357), TYPED_NUMBER(0.8360311073266357), @@ -82,7 +82,7 @@ NBL_CONSTEXPR float_t xi_9[9] = { TYPED_NUMBER(0.6133714327005903) }; -NBL_CONSTEXPR float_t xi_10[10] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t xi_10[10] = { TYPED_NUMBER(-0.1488743389816312), TYPED_NUMBER(0.1488743389816312), TYPED_NUMBER(-0.4333953941292471), @@ -95,7 +95,7 @@ NBL_CONSTEXPR float_t xi_10[10] = { TYPED_NUMBER(0.9739065285171717) }; -NBL_CONSTEXPR float_t xi_11[11] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t xi_11[11] = { TYPED_NUMBER(0.0), TYPED_NUMBER(-0.2695431559523449), TYPED_NUMBER(0.2695431559523449), @@ -109,7 +109,7 @@ NBL_CONSTEXPR float_t xi_11[11] = { TYPED_NUMBER(0.9782286581460569) }; -NBL_CONSTEXPR float_t xi_12[12] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t xi_12[12] = { TYPED_NUMBER(-0.1252334085114689), TYPED_NUMBER(0.1252334085114689), TYPED_NUMBER(-0.3678314989981801), @@ -124,7 +124,7 @@ NBL_CONSTEXPR float_t xi_12[12] = { TYPED_NUMBER(0.9815606342467192) }; -NBL_CONSTEXPR float_t xi_13[13] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t xi_13[13] = { TYPED_NUMBER(0.0), TYPED_NUMBER(-0.2304583159551347), TYPED_NUMBER(0.2304583159551347), @@ -140,7 +140,7 @@ NBL_CONSTEXPR float_t xi_13[13] = { TYPED_NUMBER(0.9841830547185881) }; -NBL_CONSTEXPR float_t xi_14[14] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t xi_14[14] = { TYPED_NUMBER(-0.1080549487073436), TYPED_NUMBER(0.1080549487073436), TYPED_NUMBER(-0.3191123689278897), @@ -157,7 +157,7 @@ NBL_CONSTEXPR float_t xi_14[14] = { TYPED_NUMBER(0.9862838086968123) }; -NBL_CONSTEXPR float_t xi_15[15] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t xi_15[15] = { TYPED_NUMBER(0.0), TYPED_NUMBER(-0.2011940939974345), TYPED_NUMBER(0.2011940939974345), @@ -175,25 +175,25 @@ NBL_CONSTEXPR float_t xi_15[15] = { TYPED_NUMBER(0.9879925180204854) }; -NBL_CONSTEXPR float_t wi_2[2] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t wi_2[2] = { TYPED_NUMBER(1.0000000000000000), TYPED_NUMBER(1.0000000000000000) }; -NBL_CONSTEXPR float_t wi_3[3] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t wi_3[3] = { TYPED_NUMBER(0.8888888888888888), TYPED_NUMBER(0.5555555555555555), TYPED_NUMBER(0.5555555555555555) }; -NBL_CONSTEXPR float_t wi_4[4] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t wi_4[4] = { TYPED_NUMBER(0.6521451548625461), TYPED_NUMBER(0.6521451548625461), TYPED_NUMBER(0.3478548451374538), TYPED_NUMBER(0.3478548451374538) }; -NBL_CONSTEXPR float_t wi_5[5] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t wi_5[5] = { TYPED_NUMBER(0.5688888888888888), TYPED_NUMBER(0.4786286704993664), TYPED_NUMBER(0.4786286704993664), @@ -201,7 +201,7 @@ NBL_CONSTEXPR float_t wi_5[5] = { TYPED_NUMBER(0.2369268850561890) }; -NBL_CONSTEXPR float_t wi_6[6] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t wi_6[6] = { TYPED_NUMBER(0.3607615730481386), TYPED_NUMBER(0.3607615730481386), TYPED_NUMBER(0.4679139345726910), @@ -210,7 +210,7 @@ NBL_CONSTEXPR float_t wi_6[6] = { TYPED_NUMBER(0.1713244923791703) }; -NBL_CONSTEXPR float_t wi_7[7] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t wi_7[7] = { TYPED_NUMBER(0.4179591836734693), TYPED_NUMBER(0.3818300505051189), TYPED_NUMBER(0.3818300505051189), @@ -220,7 +220,7 @@ NBL_CONSTEXPR float_t wi_7[7] = { TYPED_NUMBER(0.1294849661688696) }; -NBL_CONSTEXPR float_t wi_8[8] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t wi_8[8] = { TYPED_NUMBER(0.3626837833783619), TYPED_NUMBER(0.3626837833783619), TYPED_NUMBER(0.3137066458778872), @@ -231,7 +231,7 @@ NBL_CONSTEXPR float_t wi_8[8] = { TYPED_NUMBER(0.1012285362903762) }; -NBL_CONSTEXPR float_t wi_9[9] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t wi_9[9] = { TYPED_NUMBER(0.3302393550012597), TYPED_NUMBER(0.1806481606948574), TYPED_NUMBER(0.1806481606948574), @@ -243,7 +243,7 @@ NBL_CONSTEXPR float_t wi_9[9] = { TYPED_NUMBER(0.2606106964029354) }; -NBL_CONSTEXPR float_t wi_10[10] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t wi_10[10] = { TYPED_NUMBER(0.2955242247147528), TYPED_NUMBER(0.2955242247147528), TYPED_NUMBER(0.2692667193099963), @@ -256,7 +256,7 @@ NBL_CONSTEXPR float_t wi_10[10] = { TYPED_NUMBER(0.0666713443086881) }; -NBL_CONSTEXPR float_t wi_11[11] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t wi_11[11] = { TYPED_NUMBER(0.2729250867779006), TYPED_NUMBER(0.2628045445102466), TYPED_NUMBER(0.2628045445102466), @@ -270,7 +270,7 @@ NBL_CONSTEXPR float_t wi_11[11] = { TYPED_NUMBER(0.0556685671161736) }; -NBL_CONSTEXPR float_t wi_12[12] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t wi_12[12] = { TYPED_NUMBER(0.2491470458134027), TYPED_NUMBER(0.2491470458134027), TYPED_NUMBER(0.2334925365383548), @@ -285,7 +285,7 @@ NBL_CONSTEXPR float_t wi_12[12] = { TYPED_NUMBER(0.0471753363865118) }; -NBL_CONSTEXPR float_t wi_13[13] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t wi_13[13] = { TYPED_NUMBER(0.2325515532308739), TYPED_NUMBER(0.2262831802628972), TYPED_NUMBER(0.2262831802628972), @@ -301,7 +301,7 @@ NBL_CONSTEXPR float_t wi_13[13] = { TYPED_NUMBER(0.0404840047653158) }; -NBL_CONSTEXPR float_t wi_14[14] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t wi_14[14] = { TYPED_NUMBER(0.2152638534631577), TYPED_NUMBER(0.2152638534631577), TYPED_NUMBER(0.2051984637212956), @@ -318,7 +318,7 @@ NBL_CONSTEXPR float_t wi_14[14] = { TYPED_NUMBER(0.0351194603317518) }; -NBL_CONSTEXPR float_t wi_15[15] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t wi_15[15] = { TYPED_NUMBER(0.2025782419255612), TYPED_NUMBER(0.1984314853271115), TYPED_NUMBER(0.1984314853271115), diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl new file mode 100644 index 0000000000..d7a781fad9 --- /dev/null +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -0,0 +1,658 @@ +#ifndef _NBL_BUILTIN_HLSL_MORTON_INCLUDED_ +#define _NBL_BUILTIN_HLSL_MORTON_INCLUDED_ + +#include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include "nbl/builtin/hlsl/concepts/core.hlsl" +#include "nbl/builtin/hlsl/bit.hlsl" +#include "nbl/builtin/hlsl/functional.hlsl" +#include "nbl/builtin/hlsl/emulated/int64_t.hlsl" +#include "nbl/builtin/hlsl/mpl.hlsl" +#include "nbl/builtin/hlsl/portable/vector_t.hlsl" + +// TODO: mega macro to get functional plus, minus, plus_assign, minus_assign + +namespace nbl +{ +namespace hlsl +{ +namespace morton +{ + +namespace impl +{ + +// Valid dimension for a morton code +template +NBL_BOOL_CONCEPT Dimension = 1 < D && D < 5; + +template && concepts::Scalar) +NBL_CONSTEXPR_FUNC bool verifyAnyBitIntegral(T val) +{ + NBL_CONSTEXPR_FUNC_SCOPE_VAR T mask = ~((T(1) << Bits) - 1); + const bool allZero = ((val & mask) == 0); + NBL_IF_CONSTEXPR(is_signed_v) + { + const bool allOne = ((val & mask) == mask); + return allZero || allOne; + } + return allZero; +} + +template && concepts::Scalar) +NBL_CONSTEXPR_FUNC bool verifyAnyBitIntegralVec(vector vec) +{ + array_get, T> getter; + NBL_UNROLL + for (uint16_t i = 0; i < Dim; i++) + if (!verifyAnyBitIntegral(getter(vec, i))) return false; + return true; +} + + +// --------------------------------------------------------- MORTON ENCOE/DECODE MASKS --------------------------------------------------- + +NBL_CONSTEXPR uint16_t CodingStages = 5; + +template +struct coding_mask; + +template +NBL_CONSTEXPR T coding_mask_v = _static_cast(coding_mask::value); + +// It's a complete cointoss whether template variables work or not, since it's a C++14 feature (not supported in HLSL2021). Most of the ones we use in Nabla work, +// but this one will only work for some parameters and not for others. Therefore, this was made into a macro to inline where used + +#define NBL_MORTON_INTERLEAVE_MASKS(STORAGE_T, DIM, BITS, NAMESPACE_PREFIX) _static_cast >(\ + truncate >(\ + vector(NAMESPACE_PREFIX coding_mask_v< DIM, BITS, 0>,\ + NAMESPACE_PREFIX coding_mask_v< DIM, BITS, 0> << 1,\ + NAMESPACE_PREFIX coding_mask_v< DIM, BITS, 0> << 2,\ + NAMESPACE_PREFIX coding_mask_v< DIM, BITS, 0> << 3))) + + +template +struct sign_mask : integral_constant {}; + +template +NBL_CONSTEXPR T sign_mask_v = _static_cast(sign_mask::value); + +#define NBL_MORTON_SIGN_MASKS(STORAGE_T, DIM, BITS) _static_cast >(\ + truncate >(\ + vector(sign_mask_v< DIM, BITS >,\ + sign_mask_v< DIM, BITS > << 1,\ + sign_mask_v< DIM, BITS > << 2,\ + sign_mask_v< DIM, BITS > << 3))) + +// 0th stage will be special: to avoid masking twice during encode/decode, and to get a proper mask that only gets the relevant bits out of a morton code, the 0th stage +// mask also considers the total number of bits we're cnsidering for a code (all other masks operate on a bit-agnostic basis). +#define NBL_HLSL_MORTON_SPECIALIZE_FIRST_CODING_MASK(DIM, BASE_VALUE) template struct coding_mask\ +{\ + enum : uint64_t { _Bits = Bits };\ + NBL_CONSTEXPR_STATIC_INLINE uint64_t KilloffMask = _Bits * DIM < 64 ? (uint64_t(1) << (_Bits * DIM)) - 1 : ~uint64_t(0);\ + NBL_CONSTEXPR_STATIC_INLINE uint64_t value = uint64_t(BASE_VALUE) & KilloffMask;\ +}; + +#define NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(DIM, STAGE, BASE_VALUE) template struct coding_mask\ +{\ + NBL_CONSTEXPR_STATIC_INLINE uint64_t value = uint64_t(BASE_VALUE);\ +}; + +// Final stage mask also counts exact number of bits, although maybe it's not necessary +#define NBL_HLSL_MORTON_SPECIALIZE_LAST_CODING_MASKS template struct coding_mask\ +{\ + enum : uint64_t { _Bits = Bits };\ + NBL_CONSTEXPR_STATIC_INLINE uint64_t value = (uint64_t(1) << _Bits) - 1;\ +}; + +NBL_HLSL_MORTON_SPECIALIZE_FIRST_CODING_MASK(2, 0x5555555555555555ull) // Groups bits by 1 on, 1 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(2, 1, 0x3333333333333333ull) // Groups bits by 2 on, 2 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(2, 2, 0x0F0F0F0F0F0F0F0Full) // Groups bits by 4 on, 4 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(2, 3, 0x00FF00FF00FF00FFull) // Groups bits by 8 on, 8 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(2, 4, 0x0000FFFF0000FFFFull) // Groups bits by 16 on, 16 off + +NBL_HLSL_MORTON_SPECIALIZE_FIRST_CODING_MASK(3, 0x9249249249249249ull) // Groups bits by 1 on, 2 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(3, 1, 0x30C30C30C30C30C3ull) // Groups bits by 2 on, 4 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(3, 2, 0xF00F00F00F00F00Full) // Groups bits by 4 on, 8 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(3, 3, 0x00FF0000FF0000FFull) // Groups bits by 8 on, 16 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(3, 4, 0xFFFF00000000FFFFull) // Groups bits by 16 on, 32 off + +NBL_HLSL_MORTON_SPECIALIZE_FIRST_CODING_MASK(4, 0x1111111111111111ull) // Groups bits by 1 on, 3 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(4, 1, 0x0303030303030303ull) // Groups bits by 2 on, 6 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(4, 2, 0x000F000F000F000Full) // Groups bits by 4 on, 12 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(4, 3, 0x000000FF000000FFull) // Groups bits by 8 on, 24 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(4, 4, 0x000000000000FFFFull) // Groups bits by 16 on, 48 off (unused but here for completion + likely keeps compiler from complaining) + +NBL_HLSL_MORTON_SPECIALIZE_LAST_CODING_MASKS + +#undef NBL_HLSL_MORTON_SPECIALIZE_LAST_CODING_MASK +#undef NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK +#undef NBL_HLSL_MORTON_SPECIALIZE_FIRST_CODING_MASK + +// ----------------------------------------------------------------- MORTON TRANSCODER --------------------------------------------------- +template && Dim * Bits <= 64 && 8 * sizeof(encode_t) == mpl::max_v, uint64_t(16)>) +struct Transcoder +{ + using decode_component_t = conditional_t<(Bits > 16), uint32_t, uint16_t>; + using decode_t = vector; + + template ) + /** + * @brief Interleaves each coordinate with `Dim - 1` zeros inbetween each bit, and left-shifts each by their coordinate index + * + * @param [in] decodedValue Cartesian coordinates to interleave and shift + */ + NBL_CONSTEXPR_STATIC portable_vector_t interleaveShift(NBL_CONST_REF_ARG(T) decodedValue) + { + left_shift_operator > leftShift; + portable_vector_t interleaved = _static_cast >(decodedValue) & coding_mask_v; + + // Read this to understand how interleaving and spreading bits works https://fgiesen.wordpress.com/2009/12/13/decoding-morton-codes/ + #define ENCODE_LOOP_ITERATION(I) NBL_IF_CONSTEXPR(Bits > (uint16_t(1) << I))\ + {\ + interleaved = interleaved | leftShift(interleaved, (uint16_t(1) << I) * (Dim - 1));\ + interleaved = interleaved & coding_mask_v;\ + } + ENCODE_LOOP_ITERATION(4) + ENCODE_LOOP_ITERATION(3) + ENCODE_LOOP_ITERATION(2) + ENCODE_LOOP_ITERATION(1) + ENCODE_LOOP_ITERATION(0) + + #undef ENCODE_LOOP_ITERATION + + // After interleaving, shift each coordinate left by their index + return leftShift(interleaved, truncate >(vector(0, 1, 2, 3))); + } + + template + /** + * @brief Encodes a vector of cartesian coordinates as a Morton code + * + * @param [in] decodedValue Cartesian coordinates to encode + */ + NBL_CONSTEXPR_STATIC encode_t encode(NBL_CONST_REF_ARG(T) decodedValue) + { + const portable_vector_t interleaveShifted = interleaveShift(decodedValue); + + array_get, encode_t> getter; + encode_t encoded = getter(interleaveShifted, 0); + + NBL_UNROLL + for (uint16_t i = 1; i < Dim; i++) + encoded = encoded | getter(interleaveShifted, i); + + return encoded; + } + + /** + * @brief Decodes a Morton code back to a vector of cartesian coordinates + * + * @param [in] encodedValue Representation of a Morton code (binary code, not the morton class defined below) + */ + NBL_CONSTEXPR_STATIC decode_t decode(NBL_CONST_REF_ARG(encode_t) encodedValue) + { + arithmetic_right_shift_operator encodedRightShift; + portable_vector_t decoded; + array_set, encode_t> setter; + // Write initial values into decoded + NBL_UNROLL + for (uint16_t i = 0; i < Dim; i++) + setter(decoded, i, encodedRightShift(encodedValue, i)); + + arithmetic_right_shift_operator > rightShift; + + #define DECODE_LOOP_ITERATION(I) NBL_IF_CONSTEXPR(Bits > (uint16_t(1) << I))\ + {\ + decoded = decoded & coding_mask_v;\ + decoded = decoded | rightShift(decoded, (uint16_t(1) << I) * (Dim - 1));\ + } + + DECODE_LOOP_ITERATION(0) + DECODE_LOOP_ITERATION(1) + DECODE_LOOP_ITERATION(2) + DECODE_LOOP_ITERATION(3) + DECODE_LOOP_ITERATION(4) + + #undef DECODE_LOOP_ITERATION + + // If `Bits` is greater than half the bitwidth of the decode type, then we can avoid `&`ing against the last mask since duplicated MSB get truncated + NBL_IF_CONSTEXPR(Bits > 4 * sizeof(typename vector_traits::scalar_type)) + return _static_cast(decoded); + else + return _static_cast(decoded & coding_mask_v); + } +}; + +// ---------------------------------------------------- COMPARISON OPERATORS --------------------------------------------------------------- +// Here because no partial specialization of methods +// `BitsAlreadySpread` assumes both pre-interleaved and pre-shifted + +template +NBL_BOOL_CONCEPT Comparable = concepts::IntegralLikeScalar && is_signed_v == Signed && ((BitsAlreadySpread && sizeof(I) == sizeof(storage_t)) || (!BitsAlreadySpread && 8 * sizeof(I) == mpl::max_v, uint64_t(16)>)); + +template +struct Equal; + +template +struct Equal +{ + template) + NBL_CONSTEXPR_STATIC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(portable_vector_t) rhs) + { + const portable_vector_t InterleaveMasks = NBL_MORTON_INTERLEAVE_MASKS(storage_t, D, Bits, ); + const portable_vector_t zeros = promote >(_static_cast(0)); + + const portable_vector_t rhsCasted = _static_cast >(rhs); + const portable_vector_t xored = rhsCasted ^ (InterleaveMasks & value); + equal_to > _equal; + return _equal(xored, zeros); + } +}; + +template +struct Equal +{ + template) + NBL_CONSTEXPR_STATIC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(vector) rhs) + { + using right_sign_t = conditional_t, make_unsigned_t >; + using transcoder_t = Transcoder; + const portable_vector_t interleaved = _static_cast >(transcoder_t::interleaveShift(_static_cast(rhs))); + return Equal::template __call(value, interleaved); + } +}; + +template +struct BaseComparison; + +// Aux variable that has only the sign bit for the first of D dimensions +template +NBL_CONSTEXPR uint64_t SignMask = uint64_t(1) << (D * (Bits - 1)); + +template +struct BaseComparison +{ + template) + NBL_CONSTEXPR_STATIC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(portable_vector_t) rhs) + { + const portable_vector_t InterleaveMasks = NBL_MORTON_INTERLEAVE_MASKS(storage_t, D, Bits, ); + const portable_vector_t SignMasks = NBL_MORTON_SIGN_MASKS(storage_t, D, Bits); + ComparisonOp comparison; + NBL_IF_CONSTEXPR(Signed) + { + // Obtain a vector of deinterleaved coordinates and flip their sign bits + portable_vector_t thisCoord = (InterleaveMasks & value) ^ SignMasks; + // rhs already deinterleaved, just have to cast type and flip sign + const portable_vector_t rhsCoord = _static_cast >(rhs) ^ SignMasks; + + return comparison(thisCoord, rhsCoord); + } + else + { + // Obtain a vector of deinterleaved coordinates + portable_vector_t thisCoord = InterleaveMasks & value; + // rhs already deinterleaved, just have to cast type + const portable_vector_t rhsCoord = _static_cast >(rhs); + + return comparison(thisCoord, rhsCoord); + } + + } +}; + +template +struct BaseComparison +{ + template) + NBL_CONSTEXPR_STATIC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(vector) rhs) + { + using right_sign_t = conditional_t, make_unsigned_t >; + using transcoder_t = Transcoder; + const portable_vector_t interleaved = _static_cast >(transcoder_t::interleaveShift(_static_cast(rhs))); + return BaseComparison::template __call(value, interleaved); + } +}; + +template +struct LessThan : BaseComparison > > {}; + +template +struct LessEqual : BaseComparison > > {}; + +template +struct GreaterThan : BaseComparison > > {}; + +template +struct GreaterEqual : BaseComparison > > {}; + +} //namespace impl + +// Making this even slightly less ugly is blocked by https://github.com/microsoft/DirectXShaderCompiler/issues/7006 +// In particular, `Masks` should be a `const static` member field instead of appearing in every method using it +template && D * Bits <= 64) +struct code +{ + using this_t = code; + using this_signed_t = code; + NBL_CONSTEXPR_STATIC uint16_t TotalBitWidth = D * Bits; + using storage_t = conditional_t<(TotalBitWidth > 16), conditional_t<(TotalBitWidth > 32), _uint64_t, uint32_t>, uint16_t>; + + using transcoder_t = impl::Transcoder; + using decode_component_t = conditional_t, + typename transcoder_t::decode_component_t>; + + storage_t value; + + // ---------------------------------------------------- CONSTRUCTORS --------------------------------------------------------------- + + #ifndef __HLSL_VERSION + + code() = default; + + #endif + + /** + * @brief Creates a Morton code from a set of integral cartesian coordinates + * + * @param [in] cartesian Coordinates to encode. Signedness MUST match the signedness of this Morton code class + */ + template + NBL_CONSTEXPR_STATIC enable_if_t , this_t> + create(NBL_CONST_REF_ARG(vector) cartesian) + { + this_t retVal; + NBL_ASSERT((impl::verifyAnyBitIntegralVec(cartesian))); + using decode_t = typename transcoder_t::decode_t; + retVal.value = transcoder_t::encode(_static_cast(cartesian)); + return retVal; + } + + // CPP can also have an actual constructor + #ifndef __HLSL_VERSION + + /** + * @brief Creates a Morton code from a set of cartesian coordinates + * + * @param [in] cartesian Coordinates to encode + */ + template + inline explicit code(NBL_CONST_REF_ARG(vector) cartesian) + { + *this = create(cartesian); + } + + /** + * @brief Decodes this Morton code back to a set of cartesian coordinates + */ + template == Signed) + constexpr explicit operator vector() const noexcept; + + #endif + + // ------------------------------------------------------- BITWISE OPERATORS ------------------------------------------------- + + NBL_CONSTEXPR_FUNC this_t operator&(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + this_t retVal; + retVal.value = value & rhs.value; + return retVal; + } + + NBL_CONSTEXPR_FUNC this_t operator|(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + this_t retVal; + retVal.value = value | rhs.value; + return retVal; + } + + NBL_CONSTEXPR_FUNC this_t operator^(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + this_t retVal; + retVal.value = value ^ rhs.value; + return retVal; + } + + NBL_CONSTEXPR_FUNC this_t operator~() NBL_CONST_MEMBER_FUNC + { + this_t retVal; + retVal.value = ~value; + return retVal; + } + + // Only valid in CPP + #ifndef __HLSL_VERSION + + constexpr this_t operator<<(uint16_t bits) const; + + constexpr this_t operator>>(uint16_t bits) const; + + #endif + + // ------------------------------------------------------- UNARY ARITHMETIC OPERATORS ------------------------------------------------- + + NBL_CONSTEXPR_FUNC this_signed_t operator-() NBL_CONST_MEMBER_FUNC + { + this_t zero; + zero.value = _static_cast(0); + #ifndef __HLSL_VERSION + return zero - *this; + #else + return zero - this; + #endif + } + + // ------------------------------------------------------- BINARY ARITHMETIC OPERATORS ------------------------------------------------- + + // put 1 bits everywhere in the bits the current axis is not using + // then extract just the axis bits for the right hand coordinate + // carry-1 will propagate the bits across the already set bits + // then clear out the bits not belonging to current axis + // Note: Its possible to clear on `this` and fill on `rhs` but that will + // disable optimizations, we expect the compiler to optimize a lot if the + // value of `rhs` is known at compile time, e.g. `static_cast>(glm::ivec3(1,0,0))` + NBL_CONSTEXPR_FUNC this_t operator+(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + const portable_vector_t InterleaveMasks = NBL_MORTON_INTERLEAVE_MASKS(storage_t, D, Bits, impl::); + bit_not > bitnot; + // For each coordinate, leave its bits intact and turn every other bit ON + const portable_vector_t counterMaskedValue = bitnot(InterleaveMasks) | value; + // For each coordinate in rhs, leave its bits intact and turn every other bit OFF + const portable_vector_t maskedRhsValue = InterleaveMasks & rhs.value; + // Add these coordinate-wise, then turn all bits not belonging to the current coordinate OFF + const portable_vector_t interleaveShiftedResult = (counterMaskedValue + maskedRhsValue) & InterleaveMasks; + // Re-encode the result + array_get, storage_t> getter; + this_t retVal; + retVal.value = getter(interleaveShiftedResult, 0); + NBL_UNROLL + for (uint16_t i = 1; i < D; i++) + retVal.value = retVal.value | getter(interleaveShiftedResult, i); + return retVal; + } + + // This is the dual trick of the one used for addition: set all other bits to 0 so borrows propagate + NBL_CONSTEXPR_FUNC this_t operator-(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + const portable_vector_t InterleaveMasks = NBL_MORTON_INTERLEAVE_MASKS(storage_t, D, Bits, impl::); + // For each coordinate, leave its bits intact and turn every other bit OFF + const portable_vector_t maskedValue = InterleaveMasks & value; + // Do the same for each coordinate in rhs + const portable_vector_t maskedRhsValue = InterleaveMasks & rhs.value; + // Subtract these coordinate-wise, then turn all bits not belonging to the current coordinate OFF + const portable_vector_t interleaveShiftedResult = (maskedValue - maskedRhsValue) & InterleaveMasks; + // Re-encode the result + array_get, storage_t> getter; + this_t retVal; + retVal.value = getter(interleaveShiftedResult, 0); + NBL_UNROLL + for (uint16_t i = 1; i < D; i++) + retVal.value = retVal.value | getter(interleaveShiftedResult, i); + + return retVal; + } + + // ------------------------------------------------------- COMPARISON OPERATORS ------------------------------------------------- + + NBL_CONSTEXPR_FUNC bool operator==(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + return value == rhs.value; + } + + template) + NBL_CONSTEXPR_FUNC vector equal(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC + { + return impl::Equal::template __call(value, rhs); + } + + NBL_CONSTEXPR_FUNC bool operator!=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + return value != rhs.value; + } + + template) + NBL_CONSTEXPR_FUNC vector notEqual(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC + { + return !equal(rhs); + } + + template) + NBL_CONSTEXPR_FUNC vector lessThan(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC + { + return impl::LessThan::template __call(value, rhs); + } + + template) + NBL_CONSTEXPR_FUNC vector lessThanEqual(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC + { + return impl::LessEqual::template __call(value, rhs); + } + + template) + NBL_CONSTEXPR_FUNC vector greaterThan(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC + { + return impl::GreaterThan::template __call(value, rhs); + } + + template) + NBL_CONSTEXPR_FUNC vector greaterThanEqual(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC + { + return impl::GreaterEqual::template __call(value, rhs); + } + +}; + +} //namespace morton + +// Specialize the `static_cast_helper` +namespace impl +{ + +// I must be of same signedness as the morton code, and be wide enough to hold each component +template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar) +struct static_cast_helper, morton::code, Bits, D, _uint64_t> NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar) > +{ + NBL_CONSTEXPR_STATIC vector cast(NBL_CONST_REF_ARG(morton::code, Bits, D, _uint64_t>) val) + { + using storage_t = typename morton::code, Bits, D, _uint64_t>::storage_t; + return morton::impl::Transcoder::decode(val.value); + } +}; + +} // namespace impl + +template +struct left_shift_operator > +{ + using type_t = morton::code; + using storage_t = typename type_t::storage_t; + + NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) + { + left_shift_operator valueLeftShift; + type_t retVal; + // Shift every coordinate by `bits` + retVal.value = valueLeftShift(operand.value, bits * D); + // Previous shift might move bits to positions that storage has available but the morton code does not use + // Un-decoding the resulting morton is still fine and produces expected results, but some operations such as equality expect these unused bits to be 0 so we mask them off + const uint64_t UsedBitsMask = Bits * D < 64 ? (uint64_t(1) << (Bits * D)) - 1 : ~uint64_t(0); + retVal.value = retVal.value & _static_cast(UsedBitsMask); + return retVal; + } +}; + +template +struct arithmetic_right_shift_operator > +{ + using type_t = morton::code; + using storage_t = typename type_t::storage_t; + + NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) + { + arithmetic_right_shift_operator valueArithmeticRightShift; + type_t retVal; + // Shift every coordinate by `bits` + retVal.value = valueArithmeticRightShift(operand.value, bits * D); + return retVal; + } +}; + +// This one's uglier - have to unpack to get the expected behaviour +template +struct arithmetic_right_shift_operator > +{ + using type_t = morton::code; + using scalar_t = conditional_t<(Bits > 16), int32_t, int16_t>; + + NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) + { + vector cartesian = _static_cast >(operand); + // To avoid branching, we left-shift each coordinate to put the MSB (of the encoded Morton) at the position of the MSB (of the `scalar_t` used for the decoded coordinate), + // then right-shift again to get correct sign on each coordinate + // The number of bits we shift by to put MSB of Morton at MSB of `scalar_t` is the difference between the bitwidth of `scalar_t` and Bits + const scalar_t ShiftFactor = scalar_t(8 * sizeof(scalar_t) - Bits); + cartesian <<= ShiftFactor; + cartesian >>= ShiftFactor + scalar_t(bits); + return type_t::create(cartesian); + } +}; + +#ifndef __HLSL_VERSION + +template&& D* Bits <= 64) +constexpr morton::code morton::code::operator<<(uint16_t bits) const +{ + left_shift_operator> leftShift; + return leftShift(*this, bits); +} + +template&& D* Bits <= 64) +constexpr morton::code morton::code::operator>>(uint16_t bits) const +{ + arithmetic_right_shift_operator> rightShift; + return rightShift(*this, bits); +} + +template && D* Bits <= 64) +template == Signed) +constexpr morton::code::operator vector() const noexcept +{ + return _static_cast, morton::code>(*this); +} + +#endif + +#undef NBL_MORTON_INTERLEAVE_MASKS +#undef NBL_MORTON_SIGN_MASKS + +} //namespace hlsl +} //namespace nbl + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/mpl.hlsl b/include/nbl/builtin/hlsl/mpl.hlsl index 8fb13db872..7734dea15f 100644 --- a/include/nbl/builtin/hlsl/mpl.hlsl +++ b/include/nbl/builtin/hlsl/mpl.hlsl @@ -41,7 +41,12 @@ struct countl_zero : impl::countl_zero static_assert(is_integral::value, "countl_zero type parameter must be an integral type"); }; template -NBL_CONSTEXPR T countl_zero_v = countl_zero::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR T countl_zero_v = countl_zero::value; + +template +struct is_pot : bool_constant< (N > 0 && !(N & (N - 1))) > {}; +template +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR bool is_pot_v = is_pot::value; template struct log2 @@ -49,7 +54,12 @@ struct log2 NBL_CONSTEXPR_STATIC_INLINE uint16_t value = X ? (1ull<<6)-countl_zero::value-1 : -1ull; }; template -NBL_CONSTEXPR uint64_t log2_v = log2::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint16_t log2_v = log2::value; + +template +struct log2_ceil : integral_constant + uint16_t(!is_pot_v)> {}; +template +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint16_t log2_ceil_v = log2_ceil::value; template struct rotl @@ -59,7 +69,7 @@ struct rotl NBL_CONSTEXPR_STATIC_INLINE T value = (S >= 0) ? ((X << r) | (X >> (N - r))) : (X >> (-r)) | (X << (N - (-r))); }; template -NBL_CONSTEXPR T rotl_v = rotl::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR T rotl_v = rotl::value; template struct rotr @@ -69,7 +79,7 @@ struct rotr NBL_CONSTEXPR_STATIC_INLINE T value = (S >= 0) ? ((X >> r) | (X << (N - r))) : (X << (-r)) | (X >> (N - (-r))); }; template -NBL_CONSTEXPR T rotr_v = rotr::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR T rotr_v = rotr::value; template struct align_up @@ -77,12 +87,7 @@ struct align_up NBL_CONSTEXPR_STATIC_INLINE uint64_t value = X ? (((X-1)/M+1)*M):0; }; template -NBL_CONSTEXPR uint64_t align_up_v = align_up::value; - -template -struct is_pot : bool_constant< (N > 0 && !(N & (N - 1))) > {}; -template -NBL_CONSTEXPR bool is_pot_v = is_pot::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint64_t align_up_v = align_up::value; template struct max @@ -90,7 +95,7 @@ struct max NBL_CONSTEXPR_STATIC_INLINE T value = X -NBL_CONSTEXPR T max_v = max::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR T max_v = max::value; template struct min @@ -98,7 +103,18 @@ struct min NBL_CONSTEXPR_STATIC_INLINE T value = X -NBL_CONSTEXPR T min_v = min::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR T min_v = min::value; + +template +struct round_up_to_pot : integral_constant > {}; +template +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint64_t round_up_to_pot_v = round_up_to_pot::value; + +// TODO: should rename log2 to log2_floor +template +struct round_down_to_pot : integral_constant > {}; +template +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint64_t round_down_to_pot_v = round_down_to_pot::value; template struct find_lsb @@ -106,7 +122,7 @@ struct find_lsb NBL_CONSTEXPR_STATIC_INLINE uint16_t value = log2::value; }; template -NBL_CONSTEXPR uint64_t find_lsb_v = find_lsb::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint64_t find_lsb_v = find_lsb::value; } } } diff --git a/include/nbl/builtin/hlsl/numbers.hlsl b/include/nbl/builtin/hlsl/numbers.hlsl index 6671a44756..4594596590 100644 --- a/include/nbl/builtin/hlsl/numbers.hlsl +++ b/include/nbl/builtin/hlsl/numbers.hlsl @@ -11,33 +11,33 @@ namespace numbers { template -NBL_CONSTEXPR float_t e = float_t(2.718281828459045); +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t e = float_t(2.718281828459045); template -NBL_CONSTEXPR float_t log2e = float_t(1.4426950408889634); +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t log2e = float_t(1.4426950408889634); template -NBL_CONSTEXPR float_t log10e = float_t(0.4342944819032518); +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t log10e = float_t(0.4342944819032518); template -NBL_CONSTEXPR float_t pi = float_t(3.141592653589793); +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t pi = float_t(3.141592653589793); template -NBL_CONSTEXPR float_t inv_pi = float_t(0.3183098861837907); +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t inv_pi = float_t(0.3183098861837907); template -NBL_CONSTEXPR float_t inv_sqrtpi = float_t(0.5641895835477563); +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t inv_sqrtpi = float_t(0.5641895835477563); template -NBL_CONSTEXPR float_t ln2 = float_t(0.6931471805599453); +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t ln2 = float_t(0.6931471805599453); template -NBL_CONSTEXPR float_t inv_ln2 = float_t(1.44269504088896); +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t inv_ln2 = float_t(1.44269504088896); template -NBL_CONSTEXPR float_t ln10 = float_t(2.302585092994046); +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t ln10 = float_t(2.302585092994046); template -NBL_CONSTEXPR float_t sqrt2 = float_t(1.4142135623730951); +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t sqrt2 = float_t(1.4142135623730951); template -NBL_CONSTEXPR float_t sqrt3 = float_t(1.7320508075688772); +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t sqrt3 = float_t(1.7320508075688772); template -NBL_CONSTEXPR float_t inv_sqrt3 = float_t(0.5773502691896257); +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t inv_sqrt3 = float_t(0.5773502691896257); template -NBL_CONSTEXPR float_t egamma = float_t(0.5772156649015329); +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t egamma = float_t(0.5772156649015329); template -NBL_CONSTEXPR float_t phi = float_t(1.618033988749895); +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t phi = float_t(1.618033988749895); } } diff --git a/include/nbl/builtin/hlsl/portable/int64_t.hlsl b/include/nbl/builtin/hlsl/portable/int64_t.hlsl new file mode 100644 index 0000000000..2dffa40a2d --- /dev/null +++ b/include/nbl/builtin/hlsl/portable/int64_t.hlsl @@ -0,0 +1,36 @@ +#ifndef _NBL_BUILTIN_HLSL_PORTABLE_INT64_T_INCLUDED_ +#define _NBL_BUILTIN_HLSL_PORTABLE_INT64_T_INCLUDED_ + +#include +#include + +// define NBL_FORCE_EMULATED_INT_64 to force using emulated int64 types + +namespace nbl +{ +namespace hlsl +{ +#ifdef __HLSL_VERSION +#ifdef NBL_FORCE_EMULATED_INT_64 +template +using portable_uint64_t = emulated_uint64_t; +template +using portable_int64_t = emulated_int64_t; +#else +template +using portable_uint64_t = typename conditional::shaderInt64, uint64_t, emulated_uint64_t>::type; +template +using portable_int64_t = typename conditional::shaderInt64, int64_t, emulated_int64_t>::type; +#endif + +#else +template +using portable_uint64_t = uint64_t; +template +using portable_int64_t = int64_t; +#endif + +} +} + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/portable/vector_t.hlsl b/include/nbl/builtin/hlsl/portable/vector_t.hlsl index ace199e20b..16d5b40f81 100644 --- a/include/nbl/builtin/hlsl/portable/vector_t.hlsl +++ b/include/nbl/builtin/hlsl/portable/vector_t.hlsl @@ -3,6 +3,7 @@ #include #include +#include namespace nbl { @@ -36,19 +37,53 @@ template using portable_vector_t4 = portable_vector_t; #ifdef __HLSL_VERSION +// Float template using portable_float64_t2 = portable_vector_t2 >; template using portable_float64_t3 = portable_vector_t3 >; template using portable_float64_t4 = portable_vector_t4 >; + +// Uint +template +using portable_uint64_t2 = portable_vector_t2 >; +template +using portable_uint64_t3 = portable_vector_t3 >; +template +using portable_uint64_t4 = portable_vector_t4 >; + +//Int +template +using portable_int64_t2 = portable_vector_t2 >; +template +using portable_int64_t3 = portable_vector_t3 >; +template +using portable_int64_t4 = portable_vector_t4 >; #else +// Float template using portable_float64_t2 = portable_vector_t2; template using portable_float64_t3 = portable_vector_t3; template using portable_float64_t4 = portable_vector_t4; + +// Uint +template +using portable_uint64_t2 = portable_vector_t2; +template +using portable_uint64_t3 = portable_vector_t3; +template +using portable_uint64_t4 = portable_vector_t4; + +// Int +template +using portable_int64_t2 = portable_vector_t2; +template +using portable_int64_t3 = portable_vector_t3; +template +using portable_int64_t4 = portable_vector_t4; #endif } diff --git a/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl b/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl index a7614469dd..b22ab1490d 100644 --- a/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl +++ b/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl @@ -4,6 +4,8 @@ #ifndef _NBL_BUILTIN_HLSL_SPIRV_INTRINSICS_CORE_INCLUDED_ #define _NBL_BUILTIN_HLSL_SPIRV_INTRINSICS_CORE_INCLUDED_ +#include + #ifdef __HLSL_VERSION // TODO: AnastZIuk fix public search paths so we don't choke #include "spirv/unified1/spirv.hpp" @@ -11,7 +13,6 @@ #include #include #include -#include namespace nbl { @@ -115,7 +116,12 @@ NBL_CONSTEXPR_STATIC_INLINE bool is_bda_pointer_v = is_bda_pointer::value; //! General Operations - + +//! Miscellaneous Instructions +template +[[vk::ext_instruction(spv::OpUndef)]] +T undef(); + // template [[vk::ext_instruction(spv::OpAccessChain)]] @@ -341,6 +347,11 @@ template [[vk::ext_instruction(spv::OpAny)]] enable_if_t&& is_same_v::scalar_type, bool>, bool> any(BooleanVector vec); +// If Condition is a vector, ResultType must be a vector with the same number of components. Using (p -> q) = (~p v q) +template && (! concepts::Vector || (concepts::Vector && (extent_v == extent_v)))) +[[vk::ext_instruction(spv::OpSelect)]] +ResultType select(Condition condition, ResultType object1, ResultType object2); + template) [[vk::ext_instruction(spv::OpIAddCarry)]] AddCarryOutput addCarry(T operand1, T operand2); diff --git a/include/nbl/builtin/hlsl/type_traits.hlsl b/include/nbl/builtin/hlsl/type_traits.hlsl index a9701619dd..257a753129 100644 --- a/include/nbl/builtin/hlsl/type_traits.hlsl +++ b/include/nbl/builtin/hlsl/type_traits.hlsl @@ -636,28 +636,39 @@ template using conditional_t = typename conditional::type; -// Template variables +// Template Variables +template +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR T integral_constant_v = integral_constant::value; template -NBL_CONSTEXPR bool is_same_v = is_same::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR bool is_same_v = is_same::value; template -NBL_CONSTEXPR bool is_unsigned_v = is_unsigned::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR bool is_unsigned_v = is_unsigned::value; template -NBL_CONSTEXPR bool is_integral_v = is_integral::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR bool is_integral_v = is_integral::value; template -NBL_CONSTEXPR bool is_floating_point_v = is_floating_point::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR bool is_floating_point_v = is_floating_point::value; template -NBL_CONSTEXPR bool is_signed_v = is_signed::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR bool is_signed_v = is_signed::value; template -NBL_CONSTEXPR bool is_scalar_v = is_scalar::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR bool is_scalar_v = is_scalar::value; template -NBL_CONSTEXPR uint64_t size_of_v = size_of::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint64_t size_of_v = size_of::value; template -NBL_CONSTEXPR uint32_t alignment_of_v = alignment_of::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t alignment_of_v = alignment_of::value; +template +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR bool is_fundamental_v = is_fundamental::value; + // Overlapping definitions template using make_void_t = typename make_void::type; +template +using make_signed_t = typename make_signed::type; + +template +using make_unsigned_t = typename make_unsigned::type; + template struct conditional_value { @@ -674,7 +685,7 @@ template struct is_vector > : bool_constant {}; template -NBL_CONSTEXPR bool is_vector_v = is_vector::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR bool is_vector_v = is_vector::value; #ifndef __HLSL_VERSION template @@ -685,7 +696,7 @@ template struct is_matrix > : bool_constant {}; template -NBL_CONSTEXPR bool is_matrix_v = is_matrix::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR bool is_matrix_v = is_matrix::value; template @@ -721,16 +732,16 @@ struct extent : integral_constant::value> {}; template struct extent : integral_constant::value> {}; -template -struct extent, 0> : integral_constant {}; +template +struct extent, I> : extent {}; template -struct extent, I> : integral_constant::value> {}; +struct extent, I> : extent {}; // Template Variables template -NBL_CONSTEXPR uint64_t extent_v = extent::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint64_t extent_v = extent::value; template::value> @@ -844,15 +855,6 @@ struct float_of_size<8> template using float_of_size_t = typename float_of_size::type; -template -struct extent, 0> : integral_constant {}; - -template -struct extent, 0> : integral_constant {}; - -template -struct extent, 1> : integral_constant {}; - } } diff --git a/include/nbl/builtin/hlsl/workgroup2/arithmetic_config.hlsl b/include/nbl/builtin/hlsl/workgroup2/arithmetic_config.hlsl index 03ccd64d4e..22c93ce193 100644 --- a/include/nbl/builtin/hlsl/workgroup2/arithmetic_config.hlsl +++ b/include/nbl/builtin/hlsl/workgroup2/arithmetic_config.hlsl @@ -225,7 +225,7 @@ template struct is_configuration > : bool_constant {}; template -NBL_CONSTEXPR bool is_configuration_v = is_configuration::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR bool is_configuration_v = is_configuration::value; } } diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index 1d6c224ce9..bbadd5f6fc 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -145,10 +145,12 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/indirect_commands.hlsl") # emulated LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/float64_t.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/float64_t_impl.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/int64_t.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/vector_t.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/matrix_t.hlsl") # portable LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/portable/float64_t.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/portable/int64_t.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/portable/vector_t.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/portable/matrix_t.hlsl") # ieee754 @@ -177,6 +179,7 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/basic.h") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/intrinsics.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/matrix.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/promote.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/truncate.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/vector.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/impl/intrinsics_impl.hlsl") #glsl compat @@ -348,5 +351,7 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/tgmath/output_structs.hlsl") #blur LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/prefix_sum_blur/blur.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/prefix_sum_blur/box_sampler.hlsl") +#morton codes +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/morton.hlsl") ADD_CUSTOM_BUILTIN_RESOURCES(nblBuiltinResourceData NBL_RESOURCES_TO_EMBED "${NBL_ROOT_PATH}/include" "nbl/builtin" "nbl::builtin" "${NBL_ROOT_PATH_BINARY}/include" "${NBL_ROOT_PATH_BINARY}/src" "STATIC" "INTERNAL") \ No newline at end of file