diff --git a/examples_tests b/examples_tests index eebde787c2..e377c1f524 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit eebde787c233367ade8eb0580bc79c0d562e97aa +Subproject commit e377c1f52402a79a6d1be6d0a1655910241ed08f diff --git a/include/nbl/asset/asset.h b/include/nbl/asset/asset.h index 3d21842195..1767908a6b 100644 --- a/include/nbl/asset/asset.h +++ b/include/nbl/asset/asset.h @@ -41,6 +41,9 @@ #include "nbl/asset/utils/CGLSLCompiler.h" #include "nbl/asset/utils/CSPIRVIntrospector.h" +// material compiler +#include "nbl/asset/material_compiler3/CFrontendIR.h" + // pipelines // skinning diff --git a/include/nbl/asset/material_compiler3/CFrontendIR.h b/include/nbl/asset/material_compiler3/CFrontendIR.h index 4269af5d6f..f8427aba5e 100644 --- a/include/nbl/asset/material_compiler3/CFrontendIR.h +++ b/include/nbl/asset/material_compiler3/CFrontendIR.h @@ -1,20 +1,13 @@ -// Copyright (C) 2022-2025 - DevSH Graphics Programming Sp. z O.O. + +// Copyright (C) 2022-2025 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h #ifndef _NBL_ASSET_MATERIAL_COMPILER_V3_C_FRONTEND_IR_H_INCLUDED_ #define _NBL_ASSET_MATERIAL_COMPILER_V3_C_FRONTEND_IR_H_INCLUDED_ -#include "nbl/system/ILogger.h" +#include "nbl/asset/material_compiler3/CTrueIR.h" -#include "nbl/asset/material_compiler3/CNodePool.h" -#include "nbl/asset/format/EColorSpace.h" -#include "nbl/asset/ICPUImageView.h" - - - -// temporary -#define NBL_API namespace nbl::asset::material_compiler3 { @@ -35,7 +28,7 @@ namespace nbl::asset::material_compiler3 // // f(w_i,w_o) = Sum_i^N Product_j^{N_i} h_{ij}(w_i,w_o) l_i(w_i,w_o) // -// Where `l(w_i,w_o)` is a Contributor Node BxDF such as Oren Nayar or Cook-Torrance, which is doesn't model absorption and is usually Monochrome. +// Where `l(w_i,w_o)` is a Contributor Node BxDF such as Oren Nayar or Cook-Torrance, which doesn't model absorption and is usually Monochrome. // These are assumed to be 100% valid BxDFs with White Furnace Test <= 1 and obeying Helmholtz Reciprocity. This is why you can't multiply two "Contributor Nodes" together. // We make an attempt to implement Energy Normalized versions of `l_i` but not always, so there might be energy loss due to single scattering assumptions. // @@ -71,9 +64,9 @@ namespace nbl::asset::material_compiler3 // I've considered expressing the layers using only a BTDF and BRDF (same top and bottom hemisphere) but that would lead to more layers in for materials, // requiring the placing of a mirror then vantablack layer for most one-sided materials, and most importantly disallow the expression of certain front-back correlations. // -// Because we implement Schussler et. al 2017 we also ensure that signs of dot products with shading normals are identical to smooth normals. +// Because we implement Schussler et. al 2017 or Yining 2019 we also ensure that signs of dot products with shading normals are identical to smooth normals. // However the smooth normals are not identical to geometric normals, we reserve the right to use the "normal pull up trick" to make them consistent. -// Schussler can't help with disparity of Smooth Normal and Geometric Normal, it turns smooth surfaces into glistening "disco balls" really outlining the +// Schussler and Yining can't help with disparity of Smooth Normal and Geometric Normal, it turns smooth surfaces into glistening "disco balls" really outlining the // polygonization. Using PN-Triangles/displacement would be the optimal solution here. class CFrontendIR final : public CNodePool { @@ -90,116 +83,6 @@ class CFrontendIR final : public CNodePool return nullptr; return core::smart_refctd_ptr(new CFrontendIR(std::move(params)),core::dont_grab); } - template - static inline void printMatrix(std::ostringstream& sstr, const hlsl::matrix& m) - { - for (uint16_t i=0; i::infinity() && (!view || viewChannelgetCreationParameters().format)); - } - inline bool operator!=(const SParameter& other) const - { - if (scale!=other.scale) - return true; - if (viewChannel!=other.viewChannel) - return true; - // don't compare paddings! - if (view!=other.view) - return true; - return sampler!=other.sampler; - } - inline bool operator==(const SParameter& other) const {return !operator!=(other);} - - NBL_API2 void printDot(std::ostringstream& sstr, const core::string& selfID) const; - - // at this stage we store the multipliers in highest precision - float scale = std::numeric_limits::infinity(); - // rest are ignored if the view is null - uint8_t viewChannel : 2 = 0; - uint8_t padding[3] = {0,0,0}; // TODO: padding stores metadata, shall we exclude from assignment and copy operators? - core::smart_refctd_ptr view = {}; - // shadow comparison functions are ignored - // NOTE: could take only things that matter from the sampler and pack the viewChannel and reduce padding - ICPUSampler::SParams sampler = {}; - }; - // In the forest, this is not a node, we'll deduplicate later - template - struct SParameterSet - { - private: - friend class CSpectralVariable; - template - inline void printDot(const uint8_t _count, std::ostringstream& sstr, const core::string& selfID, StringConstIterator paramNameBegin={}, const bool uvRequired=false) const - { - bool imageUsed = false; - for (uint8_t i=0; i<_count; i++) - { - const auto paramID = selfID+"_param"+std::to_string(i); - if (params[i].view) - imageUsed = true; - params[i].printDot(sstr,paramID); - sstr << "\n\t" << selfID << " -> " << paramID; - if (paramNameBegin) - sstr <<" [label=\"" << *(paramNameBegin++) << "\"]"; - else - sstr <<" [label=\"Param " << std::to_string(i) <<"\"]"; - } - if (uvRequired || imageUsed) - { - const auto uvTransformID = selfID+"_uvTransform"; - sstr << "\n\t" << uvTransformID << " [label=\"uvSlot = " << std::to_string(uvSlot()) << "\\n"; - printMatrix(sstr,*reinterpret_cast(params+_count)); - sstr << "\"]"; - sstr << "\n\t" << selfID << " -> " << uvTransformID << "[label=\"UV Transform\"]"; - } - } - - public: - inline operator bool() const - { - for (uint8_t i=0; i0); - - template - inline void printDot(std::ostringstream& sstr, const core::string& selfID, StringConstIterator paramNameBegin={}, const bool uvRequired=false) const - { - printDot(Count,sstr,selfID,std::forward(paramNameBegin),uvRequired); - } - - // identity transform by default, ignored if no UVs - // NOTE: a transform could be applied per-param, whats important that the UV slot remains the smae across all of them. - hlsl::float32_t2x3 uvTransform = hlsl::float32_t2x3( - 1,0,0, - 0,1,0 - ); - SParameter params[Count] = {}; - - // to make sure there will be no padding inbetween - static_assert(alignof(SParameter)>=alignof(hlsl::float32_t2x3)); - }; // basic "built-in" nodes class INode : public CNodePool::INode @@ -258,6 +141,7 @@ class CFrontendIR final : public CNodePool return retval; } + // TODO: rename to `EType` and the `getType` to `getExprNodeType` // A "contributor" of a term to the lighting equation: a BxDF (reflection or tranmission) or Emitter term // Contributors are not allowed to be multiplied together, but every additive term in the Expression must contain a contributor factor. enum class Type : uint8_t @@ -265,7 +149,9 @@ class CFrontendIR final : public CNodePool Contributor = 0, Mul = 1, Add = 2, - Other = 3 + Complement = 3, + SpectralVariable = 4, + Other = 5 }; virtual inline Type getType() const {return Type::Other;} @@ -275,11 +161,7 @@ class CFrontendIR final : public CNodePool virtual _typed_pointer_type copy(CFrontendIR* ir) const = 0; #define COPY_DEFAULT_IMPL inline _typed_pointer_type copy(CFrontendIR* ir) const override final \ { \ - auto& pool = ir->getObjectPool(); \ - const auto copyH = pool.emplace > >(); \ - if (auto* const copy = pool.deref(copyH); copyH) \ - *copy = *this; \ - return copyH; \ + return CNodePool::copyNode > >(this,ir); \ } // child managment @@ -326,156 +208,44 @@ class CFrontendIR final : public CNodePool { public: inline Type getType() const override final {return Type::Contributor;} + + protected: + friend class CFrontendIR; + using ir_contributor_handle_t = CTrueIR::typed_pointer_type; + virtual ir_contributor_handle_t createIRNode(const bool forBTDF, const CFrontendIR* ast, CTrueIR* ir) const = 0; }; // This node could also represent non directional emission, but we have another node for that - class CSpectralVariable final : public obj_pool_type::IVariableSize, public IExprNode + class ISpectralVariableExpr : public CTrueIR::ISpectralVariable, public IExprNode { public: - inline uint8_t getChildCount() const override final { return 0; } - inline const std::string_view getTypeName() const override {return TYPE_NAME_STR(CSpectralVariable);} // Variable length but has no children - - enum class Semantics : uint8_t - { - NoneUndefined = 0, - // 3 knots, their wavelengths are implied and fixed at color primaries - Fixed3_SRGB = 1, - Fixed3_DCI_P3 = 2, - Fixed3_BT2020 = 3, - Fixed3_AdobeRGB = 4, - Fixed3_AcesCG = 5, - // Ideas: each node is described by (wavelength,value) pair - // PairsLinear = 5, // linear interpolation - // PairsLogLinear = 5, // linear interpolation in wavelenght log space - }; + inline uint8_t getChildCount() const override final {return 0;} // - template - struct SCreationParams - { - // Knots are "data points" on the (wavelength,value) plot, from which we can interpolate the rest of the spectrum - SParameterSet knots = {}; - - // a little bit of abuse and padding reuse - static_assert(sizeof(SParameter::padding)>2); - template requires (Enable==(Count>1)) - Semantics& getSemantics() {return reinterpret_cast(knots.params[0].padding[2]); } - template requires (Enable==(Count>1)) - const Semantics& getSemantics() const {return const_cast(const_cast*>(this)->getSemantics());} - }; - // - template - inline CSpectralVariable(SCreationParams&& params) - { - // back up the count - params.knots.params[0].padding[1] = Count; - // set it correctly for monochrome - if constexpr (Count==1) - params.knots.params[0].padding[2] = static_cast(Semantics::NoneUndefined); - else - { - assert(params.getSemantics()!=Semantics::NoneUndefined); - } - std::construct_at(reinterpret_cast*>(this+1),std::move(params)); - } - inline CSpectralVariable(const CSpectralVariable& other) - { - std::uninitialized_copy_n(other.pWonky(),other.getKnotCount(),pWonky()); - } - - // encapsulation due to padding abuse - inline uint8_t& uvSlot() {return pWonky()->knots.uvSlot();} - inline const uint8_t& uvSlot() const {return pWonky()->knots.uvSlot();} - - // these getters are immutable - inline uint8_t getKnotCount() const - { - static_assert(sizeof(SParameter::padding)>1); - return paramsBeginPadding()[1]; - } - inline Semantics getSemantics() const - { - static_assert(sizeof(SParameter::padding)>2); - const auto retval = static_cast(paramsBeginPadding()[2]); - assert((getKnotCount()==1)==(retval==Semantics::NoneUndefined)); - return retval; - } + inline IExprNode::Type getType() const override final {return Type::SpectralVariable;} // - inline SParameter* getParam(const uint8_t i) - { - if (iknots.params[i]; - return nullptr; - } - inline const SParameter* getParam(const uint8_t i) const {return const_cast(const_cast(this)->getParam(i));} - - // - template - static inline uint32_t calc_size(const SCreationParams&) - { - return sizeof(CSpectralVariable)+sizeof(SCreationParams); - } - // for copying - static inline uint32_t calc_size(const CSpectralVariable& other) - { - return sizeof(CSpectralVariable)+sizeof(SCreationParams<1>)+(other.getKnotCount()-1)*sizeof(SParameter); - } - - inline operator bool() const {return !invalid(SInvalidCheckArgs{.pool=nullptr,.logger=nullptr});} + inline operator bool() const {return valid(nullptr);} protected: - inline ~CSpectralVariable() - { - std::destroy_n(pWonky()->knots.params,getKnotCount()); - } inline _typed_pointer_type copy(CFrontendIR* ir) const override final { - auto& pool = ir->getObjectPool(); - const uint8_t count = getKnotCount(); - return pool.emplace(*this); + return static_cast(this)->copy(ir->getObjectPool()); } - inline bool invalid(const SInvalidCheckArgs& args) const override - { - const auto knotCount = getKnotCount(); - // non-monochrome spectral variable - if (const auto semantic=getSemantics(); knotCount>1) - switch (semantic) - { - case Semantics::Fixed3_SRGB: [[fallthrough]]; - case Semantics::Fixed3_DCI_P3: [[fallthrough]]; - case Semantics::Fixed3_BT2020: [[fallthrough]]; - case Semantics::Fixed3_AdobeRGB: [[fallthrough]]; - case Semantics::Fixed3_AcesCG: - if (knotCount!=3) - { - args.logger.log("Semantic %d is only usable with 3 knots, this has %d knots",system::ILogger::ELL_ERROR,static_cast(semantic),knotCount); - return false; - } - break; - default: - args.logger.log("Invalid Semantic %d",system::ILogger::ELL_ERROR,static_cast(semantic)); - return true; - } - for (auto i=0u; i* pWonky() {return reinterpret_cast*>(this+1);} - const SCreationParams<1>* pWonky() const {return reinterpret_cast*>(this+1);} - const uint8_t* paramsBeginPadding() const {return pWonky()->knots.params[0].padding;} + // + friend class CFrontendIR; + NBL_API2 CTrueIR::typed_pointer_type createIRNode(const CFrontendIR* ast, CTrueIR* ir) const; }; + using CSpectralVariableExpr = CTrueIR::CSpectralVariable; // class IUnaryOp : public obj_pool_type::INonTrivial, public IExprNode { @@ -532,6 +302,7 @@ class CFrontendIR final : public CNodePool { public: inline const std::string_view getTypeName() const override {return TYPE_NAME_STR(CComplement);} + inline Type getType() const override {return Type::Complement;} // you can set the children later inline CComplement() = default; @@ -552,7 +323,7 @@ class CFrontendIR final : public CNodePool // This can be anything like an IES profile, if invalid, there's no directionality to the emission // `profile.scale` can still be used to influence the light strength without influencing NEE light picking probabilities - SParameter profile = {}; + CTrueIR::SParameter profile = {}; hlsl::float32_t3x3 profileTransform = hlsl::float32_t3x3( 1,0,0, 0,1,0, @@ -566,6 +337,8 @@ class CFrontendIR final : public CNodePool NBL_API2 bool invalid(const SInvalidCheckArgs& args) const override; NBL_API2 void printDot(std::ostringstream& sstr, const core::string& selfID) const override; + + NBL_API2 ir_contributor_handle_t createIRNode(const bool forBTDF, const CFrontendIR* ast, CTrueIR* ir) const; }; //! Special nodes meant to be used as `CMul::rhs`, their behaviour depends on the IContributor in its MUL node relative subgraph. //! If you use a different contributor node type or normal for shading, these nodes get split and duplicated into two in our Final IR. @@ -589,10 +362,9 @@ class CFrontendIR final : public CNodePool // you can set the members later inline CBeer() = default; - // Effective transparency = exp2(log2(perpTransmittance)*thickness/dot(refract(V,X,eta),X)) = exp2(log2(perpTransmittance)*thickness*inversesqrt(1.f+(LdotX-1)*rcpEta)) - // Eta and `LdotX` is taken from the leaf BTDF node. With refractions from Dielectrics, we get just `1/LdotX`, for Delta Transmission we get `1/VdotN` since its the same - typed_pointer_type perpTransmittance = {}; - typed_pointer_type thickness = {}; + // See `CTrueIR::Beer` for docs + typed_pointer_type perpTransmittance = {}; + typed_pointer_type thickness = {}; protected: COPY_DEFAULT_IMPL @@ -600,10 +372,10 @@ class CFrontendIR final : public CNodePool inline typed_pointer_type getChildHandle_impl(const uint8_t ix) const override {return ix ? perpTransmittance:thickness;} inline void setChild_impl(const uint8_t ix, _typed_pointer_type newChild) override { - *(ix ? &perpTransmittance:&thickness) = block_allocator_type::_static_cast(newChild); + *(ix ? &perpTransmittance:&thickness) = block_allocator_type::_static_cast(newChild); } - inline std::string_view getChildName_impl(const uint8_t ix) const override {return "Perpendicular\\nTransmittance";} + inline std::string_view getChildName_impl(const uint8_t ix) const override {return ix ? "Thickness":"Perpendicular\\nTransmittance";} NBL_API2 bool invalid(const SInvalidCheckArgs& args) const override; }; // The "oriented" in the Etas means from frontface to backface, so there's no need to reciprocate them when creating matching BTDF for BRDF @@ -617,9 +389,9 @@ class CFrontendIR final : public CNodePool inline CFresnel() = default; // Already pre-divided Index of Refraction, e.g. exterior/interior since VdotG>0 the ray always arrives from the exterior. - typed_pointer_type orientedRealEta = {}; + typed_pointer_type orientedRealEta = {}; // Specifying this turns your Fresnel into a conductor one, note that currently these are disallowed on BTDFs! - typed_pointer_type orientedImagEta = {}; + typed_pointer_type orientedImagEta = {}; // if you want to reuse the same parameter but want to flip the interfaces around uint8_t reciprocateEtas : 1 = false; @@ -629,7 +401,7 @@ class CFrontendIR final : public CNodePool inline typed_pointer_type getChildHandle_impl(const uint8_t ix) const override {return ix ? orientedImagEta:orientedRealEta;} inline void setChild_impl(const uint8_t ix, _typed_pointer_type newChild) override { - *(ix ? &orientedImagEta:&orientedRealEta) = block_allocator_type::_static_cast(newChild); + *(ix ? &orientedImagEta:&orientedRealEta) = block_allocator_type::_static_cast(newChild); } NBL_API2 bool invalid(const SInvalidCheckArgs& args) const override; @@ -710,46 +482,7 @@ class CFrontendIR final : public CNodePool class IBxDF : public obj_pool_type::INonTrivial, public IContributor { public: - // Why are all of these kept together and forced to fetch from the same UV ? - // Because they're supposed to be filtered together with the knowledge of the NDF - // TODO: should really be 5 parameters (2+3) cause of rotatable anisotropic roughness - struct SBasicNDFParams : SParameterSet<4> - { - inline auto getDerivMap() {return std::span(params,2);} - inline auto getDerivMap() const {return std::span(params,2);} - inline auto getRougness() {return std::span(params+2,2);} - inline auto getRougness() const {return std::span(params+2,2);} - - inline SBasicNDFParams() - { - // initialize with constant flat deriv map and smooth roughness - for (auto& param : params) - param.scale = 0.f; - } - - // conservative check, checks if we can optimize certain things this way - inline bool definitelyIsotropic() const - { - // a derivative map from a texture allows for anisotropic NDFs at higher mip levels when pre-filtered properly - for (auto i=0; i<2; i++) - if (getDerivMap()[i].scale!=0.f && getDerivMap()[i].view) - return false; - // if roughness inputs are not equal (same scale, same texture) then NDF can be anisotropic in places - if (getRougness()[0]!=getRougness()[1]) - return false; - // if a reference stretch is used, stretched triangles can turn the distribution anisotropic - return stretchInvariant(); - } - // whether the derivative map and roughness is constant regardless of UV-space texture stretching - inline bool stretchInvariant() const {return !(abs(hlsl::determinant(reference))>std::numeric_limits::min());} - - NBL_API2 void printDot(std::ostringstream& sstr, const core::string& selfID) const; - - // Ignored if not invertible, otherwise its the reference "stretch" (UV derivatives) at which identity roughness and normalmapping occurs - hlsl::float32_t2x2 reference = hlsl::float32_t2x2(0,0,0,0); - }; - - // For Schussler et. al 2017 we'll spawn 2-3 additional BRDF leaf nodes in the proper IR for every normalmap present + // ? }; // Delta Transmission is the only Special Delta Distribution Node, because of how useful it is for compiling Anyhit shaders, the rest can be done easily with: // - Delta Reflection -> Any Cook Torrance BxDF with roughness=0 attached as BRDF @@ -775,6 +508,8 @@ class CFrontendIR final : public CNodePool protected: COPY_DEFAULT_IMPL + + NBL_API2 ir_contributor_handle_t createIRNode(const bool forBTDF, const CFrontendIR* ast, CTrueIR* ir) const; }; //! Because of Schussler et. al 2017 every one of these nodes splits into 2 (if no L dependence) or 3 during canonicalization // Base diffuse node @@ -783,9 +518,12 @@ class CFrontendIR final : public CNodePool public: inline uint8_t getChildCount() const override {return 0;} inline const std::string_view getTypeName() const override {return TYPE_NAME_STR(COrenNayar);} - inline COrenNayar() = default; + inline COrenNayar() + { + ndParams.getDistribution() = CTrueIR::SBasicNDFParams::EDistribution::Invalid; + } - SBasicNDFParams ndParams = {}; + CTrueIR::SBasicNDFParams ndParams = {}; protected: COPY_DEFAULT_IMPL @@ -793,6 +531,8 @@ class CFrontendIR final : public CNodePool NBL_API2 bool invalid(const SInvalidCheckArgs& args) const override; NBL_API2 void printDot(std::ostringstream& sstr, const core::string& selfID) const override; + + NBL_API2 ir_contributor_handle_t createIRNode(const bool forBTDF, const CFrontendIR* ast, CTrueIR* ir) const; }; // Supports anisotropy for all models class CCookTorrance final : public IBxDF @@ -800,47 +540,51 @@ class CFrontendIR final : public CNodePool public: inline uint8_t getChildCount() const override {return 1;} - enum class NDF : uint8_t + inline const std::string_view getTypeName() const override {return TYPE_NAME_STR(CCookTorrance);} + + inline CCookTorrance() { - GGX = 0, - Beckmann = 1 - }; + ndParams.getDistribution() = CTrueIR::SBasicNDFParams::EDistribution::GGX; + } + + inline bool isEtaReciprocal() const {return ndParams.params[2].padding[0];} + inline void setEtaReciprocal(const bool value) {ndParams.params[2].padding[0] = value;} - inline const std::string_view getTypeName() const override {return TYPE_NAME_STR(CCookTorrance);} - inline CCookTorrance() = default; - - SBasicNDFParams ndParams = {}; - // We need this eta to compute the refractions of `L` when importance sampling and the Jacobian during H to L generation for rough dielectrics - // It does not mean we compute the Fresnel weights though! You might ask why we don't do that given that state of the art importance sampling - // (at time of writing) is to decide upon reflection vs. refraction after the microfacet normal `H` is already sampled, - // producing an estimator with just Masking and Shadowing function ratios. The reason is because we can simplify our IR by separating out - // BRDFs and BTDFs components into separate expressions, and also importance sample much better, for details see comments in CTrueIR. - typed_pointer_type orientedRealEta = {}; - // - NDF ndf : 7 = NDF::GGX; - uint8_t reciprocateEta : 1 = false; + CTrueIR::SBasicNDFParams ndParams = {}; + // See the comments in CTrueIR about this on a matching class + typed_pointer_type orientedRealEta = {}; protected: COPY_DEFAULT_IMPL inline typed_pointer_type getChildHandle_impl(const uint8_t ix) const override {return orientedRealEta;} - inline void setChild_impl(const uint8_t ix, _typed_pointer_type newChild) override {orientedRealEta = block_allocator_type::_static_cast(newChild);} + inline void setChild_impl(const uint8_t ix, _typed_pointer_type newChild) override {orientedRealEta = block_allocator_type::_static_cast(newChild);} NBL_API2 bool invalid(const SInvalidCheckArgs& args) const override; inline bool reciprocatable() const override {return true;} inline void reciprocate(IExprNode* dst) const override { - (*static_cast(dst) = *this).reciprocateEta = ~reciprocateEta; + (*static_cast(dst) = *this).setEtaReciprocal(!isEtaReciprocal()); } - inline core::string getLabelSuffix() const override {return ndf!=NDF::GGX ? "\\nNDF = Beckmann":"\\nNDF = GGX";} + inline core::string getLabelSuffix() const override + { + return ndParams.getDistribution()!=CTrueIR::SBasicNDFParams::EDistribution::GGX ? "\\nNDF = Beckmann":"\\nNDF = GGX"; + } inline std::string_view getChildName_impl(const uint8_t ix) const override {return "Oriented η";} NBL_API2 void printDot(std::ostringstream& sstr, const core::string& selfID) const override; + + NBL_API2 ir_contributor_handle_t createIRNode(const bool forBTDF, const CFrontendIR* ast, CTrueIR* ir) const; }; #undef COPY_DEFAULT_IMPL #undef TYPE_NAME_STR + // + inline void reset() + { + getObjectPool().reset(); + } // basic utilities inline typed_pointer_type createMul(const typed_pointer_type lhs, const typed_pointer_type rhs) @@ -888,20 +632,27 @@ class CFrontendIR final : public CNodePool { auto& pool = getObjectPool(); const auto fresnelH = pool.emplace(); - CSpectralVariable::SCreationParams<1> params = {}; - params.knots.params[0].scale = orientedRealEta; if (auto* const fresnel=pool.deref(fresnelH); fresnel) - fresnel->orientedRealEta = pool.emplace(std::move(params)); + { + fresnel->orientedRealEta = pool.emplace(uint8_t(1)); + if (auto* const var=pool.deref(fresnel->orientedRealEta); var) + var->setParameter(0,{.scale=orientedRealEta}); + else + return {}; + } return fresnelH; } + + // To copy every node in the tree keeping same dedup, optionally can take an `orig` from another AST/pool and have the reciprocal copy over to our pool + NBL_API2 typed_pointer_type deepCopy(const typed_pointer_type orig, const CFrontendIR* pSourceIR=nullptr); - // To quickly make a matching backface BxDF from a frontface or vice versa - NBL_API2 typed_pointer_type reciprocate(const typed_pointer_type orig); + // To quickly make a matching backface BxDF from a frontface or vice versa, optionally can take an `orig` from another AST/pool and have the reciprocal copy over to our pool + NBL_API2 typed_pointer_type reciprocate(const typed_pointer_type orig, const CFrontendIR* pSourceIR=nullptr); - // a deep copy of the layer stack, wont copy the BxDFs - NBL_API2 typed_pointer_type copyLayers(const typed_pointer_type orig); - // Reverse the linked list of layers and reciprocate their Etas - NBL_API2 typed_pointer_type reverse(const typed_pointer_type orig); + // a deep copy of the layer stack, wont copy the BxDFs, optionally can take an `orig` from another AST/pool and have the reciprocal copy over to our pool + NBL_API2 typed_pointer_type copyLayers(const typed_pointer_type orig, const CFrontendIR* pSourceIR=nullptr); + // Reverse the linked list of layers and reciprocate their Etas, optionally can take an `orig` from another AST/pool and have the reciprocal copy over to our pool + NBL_API2 typed_pointer_type reverse(const typed_pointer_type orig, const CFrontendIR* pSourceIR=nullptr); // first query, we check presence of btdf layers all the way through the layer stack inline bool transmissive(const typed_pointer_type rootHandle) const @@ -921,24 +672,53 @@ class CFrontendIR final : public CNodePool // Some things we can't check such as the compatibility of the BTDF with the BRDF (matching indices of refraction, etc.) NBL_API2 bool valid(const typed_pointer_type rootHandle, system::logger_opt_ptr logger) const; - inline std::span> getMaterials() {return m_rootNodes;} + // Each material comes down to this, after lowering to the true IR ir the indices into `ir->getMaterials()` are returned + // We take the trees from the forest, and canonicalize them into our weird Domain Specific IR with Upside down expression trees. + // Process: + // 1. Decompression (duplicating nodes, etc.) + // 2. Canonicalize Expressions (Transform into Sum-Product form, DCE, etc.) + // 3. Split BTDFs (front vs. back part), reciprocate Etas + // 4. Simplify and Hoist Layer terms (delta sampling property) + // 5. Subexpression elimination + // Further transforms in the IR can be done by invoking IR passes + struct SAddMaterialsArgs + { + explicit inline operator bool() const {return !rootNodes.empty() && ir && result;} - // Each material comes down to this, YOU MUST NOT MODIFY THE NODES AFTER ADDING THEIR PARENT TO THE ROOT NODES! - // TODO: shall we copy and hand out a new handle? Allow RootNode from a foreign const pool - inline bool addMaterial(const typed_pointer_type rootNode, system::logger_opt_ptr logger) + std::span> rootNodes; + CTrueIR* ir; + CTrueIR::SMaterialHandle* result; + system::logger_opt_ptr logger; + }; + // returns the number of materials successfully converted + inline uint32_t addMaterials(const SAddMaterialsArgs args) const { - if (valid(rootNode,logger)) + uint32_t retval = 0; + if (!args) { - m_rootNodes.push_back(rootNode); - return true; + args.logger.log("Invalid Arguments to `CTrueIR::addMaterials`",system::ILogger::ELL_ERROR); + return retval; } - return false; + SAdd2IRSession session = {args}; + auto outIt = args.result; + for (const auto& rootH : args.rootNodes) + { + if (!rootH) // its a valid material (blackhole) + *outIt = CTrueIR::BlackholeMaterialHandle; + else if (valid(rootH,args.logger)) + *outIt = session.makeFinalIR(rootH,this); + // now check for failure + if (*outIt) + retval++; + } + return retval; } // For Debug Visualization struct SDotPrinter final { public: + inline SDotPrinter() = default; inline SDotPrinter(const CFrontendIR* ir) : m_ir(ir) {} // assign in reverse because we want materials to print in order inline SDotPrinter(const CFrontendIR* ir, std::span> roots) : m_ir(ir), layerStack(roots.rbegin(),roots.rend()) @@ -947,6 +727,14 @@ class CFrontendIR final : public CNodePool visitedNodes.reserve(roots.size()<<3); } + inline void reset(const CFrontendIR* ir) + { + visitedNodes.clear(); + layerStack.clear(); + exprStack.clear(); + m_ir = ir; + } + NBL_API2 void operator()(std::ostringstream& output); inline core::string operator()() { @@ -958,13 +746,121 @@ class CFrontendIR final : public CNodePool core::unordered_set> visitedNodes; // TODO: track layering depth and indent accordingly? core::vector> layerStack; - core::stack> exprStack; + core::vector> exprStack; private: - const CFrontendIR* m_ir; + const CFrontendIR* m_ir = nullptr; }; protected: using CNodePool::CNodePool; + + struct SAdd2IRSession final + { + public: + inline SAdd2IRSession(const SAddMaterialsArgs& _args) : args(_args) + { + tmpAST = CFrontendIR::create({.composed={.blockSizeKBLog2=10},.maxBlocks=64}); + // give slightly more memory to IR, since the AST tends to be a bit more compact + tmpIR = CTrueIR::create({.composed={.blockSizeKBLog2=12},.maxBlocks=64}); + } + + NBL_API2 CTrueIR::SMaterialHandle makeFinalIR(const typed_pointer_type rootH, const CFrontendIR* ast); + + private: + inline void printSubtree(const CFrontendIR::typed_pointer_type nodeH) + { + assert(astPrinter.exprStack.empty()); + astPrinter.exprStack.push_back(nodeH); + args.logger.log("Subtree Dot3 : \n%s\n",system::ILogger::ELL_DEBUG,astPrinter().c_str()); + assert(astPrinter.exprStack.empty()); + astPrinter.visitedNodes.clear(); + } + inline void printIRLayer(const CTrueIR::typed_pointer_type layerH, const CTrueIR* ir) + { + irPrinter.reset(ir); + irPrinter.layerStack.push_back(layerH); + args.logger.log("IR Layer Dot3 : \n%s\n",system::ILogger::ELL_DEBUG,irPrinter().c_str()); + irPrinter.visitedNodes.clear(); + } + + using oriented_material_t = CTrueIR::SMaterial::SOriented; + NBL_API2 oriented_material_t makeOrientedMaterial(const CFrontendIR::typed_pointer_type rootH, const CFrontendIR* _srcAST); + + NBL_API2 CTrueIR::typed_pointer_type makeContributors(const CFrontendIR::typed_pointer_type bxdfRootH); + + // inputs to the addMaterials function + const SAddMaterialsArgs& args; + // for rewriting AST expressions + core::smart_refctd_ptr tmpAST; + // for making IR nodes before we Merkle Hash them and remove duplicates (so main IR doesn't get bloated) + core::smart_refctd_ptr tmpIR; + // changes dynamically + const CFrontendIR* srcAST; + SDotPrinter astPrinter; + CTrueIR::SDotPrinter irPrinter; + bool btdfSubtree = false; + // for going over layers in the AST + core::vector layerStack; + // Distribute/hoist the ADD over the MUL. So replace a `MUL ADD A B C` with `ADD MUL A C MUL B C` + struct SFactor + { + struct SContributor + { + CTrueIR::typed_pointer_type handle = {}; + uint32_t monochrome : 1 = true; + uint32_t isContributor : 1 = true; + uint32_t zeroValue : 30 = 0; + }; + // these are the parts that need to be sorted + struct SOrdered + { + CTrueIR::typed_pointer_type handle = {}; + uint32_t monochrome : 1 = true; + // 0 for contributors and leaf factors, contributors can be told apart from leaf factors by having 0s in the whole DWORD here + uint32_t padding : 31 = 0; + }; + + // TODO: do this better, check whole DWORD is 0 + inline bool isContributor() const {return contributor.monochrome && contributor.isContributor && contributor.zeroValue==0;} + + union + { + CTrueIR::typed_pointer_type typeless; + // contributor is always monochrome, etc. + SContributor contributor; + // the fatter thing which actually can be monochrome, etc. + SOrdered factor = {}; + }; + }; + // Holds the single `Product_j` of full expression in the form: + // f(w_i,w_o) = Sum_i^N Product_j^{N_i} h_{ij}(w_i,w_o) l_i(w_i,w_o) + // Everything on the `irChain` multiplies together, everything on the `astStack` before the current top is our relative through a MUL node. + // CONTRIBUTOR and OTHER are leaf nodes which don't add any children onto the `astStack`. + // ADD node (and COMPLEMENT which is a specialization of `ADD 1 (-X)`) duplicates the `astStack`, the ADD node at the top of the stack, + // itself was in a MUL relationship with all of the preceding AST nodes which are not explored yet, so its children will also be. + // The key is to not add both children of the ADD onto the same `astStack` because they themselves are not MUL together. + struct SCanonicalProduct + { + // Deal with optimizing this later on, not sure if `DoublyLinkedList` is appropriate, maybe I'd need a `DoublyLinkedBeadedCurtain` data structure + // also the mulChain needs to be sorted later on, and doubly linked list is PITA to sort + core::vector> astStack = {}; // its also a stack + core::vector irChain = {}; + // this is to help us hash in reverse properly + CTrueIR::typed_pointer_type sumTermH = {}; + // Expressions for `h_{ij}` can also have ADD/MUL inside and we distribute and canonicalize them at the same time + uint8_t hasContributor : 1 = false; + // extend later when allowing variable bucket count + uint8_t negate : 3 = 0b000; + uint8_t liveSpectralChannels : 3 = 0b111; + }; + // We rework the expression Top down because Bottom up would require descent from the top anyway to find ADD within MUL. + // The List> allows us to descend the AST dually with multiple traversals at once, so we never actually need to rewrite the AST into something else. + core::list canonicalSum; + // TODO: Visit Cache? Every original AST node has its own set of partial mul chains which can be slapped on later ? + // For the visited cache, we'd somehow need to cache a "2D slice" from this, meaning storing a part of the `irChain` prefix of a node + // (would need linked list IR so span/front-back of an irChain subsection can be kept) + // Maybe we could actually keep each IExprNode's irChains as reference to another IExprNode's irChains with a bitmask of the terms to take (merge sort to apply during traversal) + }; inline core::string getNodeID(const typed_pointer_type handle) const {return core::string("_")+std::to_string(handle.value);} inline core::string getLabelledNodeID(const typed_pointer_type handle) const @@ -983,9 +879,43 @@ class CFrontendIR final : public CNodePool retval += "\"]"; return retval; } +}; + +template class CTrueIR::CSpectralVariable; +} + +// specialize the `to_string +namespace nbl::system::impl +{ +template<> +struct to_string_helper +{ + using type = nbl::asset::material_compiler3::CFrontendIR::IExprNode::Type; - core::vector> m_rootNodes; + static inline std::string __call(const type value) + { + switch (value) + { + case type::Contributor: + return "Contributor"; + case type::Mul: + return "Mul"; + case type::Complement: + return "Complement"; + case type::SpectralVariable: + return "SpectralVariable"; + case type::Other: + return "Other"; + default: + break; + } + return ""; + } }; +} + +namespace nbl::asset::material_compiler3 +{ inline bool CFrontendIR::valid(const typed_pointer_type rootHandle, system::logger_opt_ptr logger) const { @@ -1008,10 +938,9 @@ inline bool CFrontendIR::valid(const typed_pointer_type rootHandle core::stack exprStack; // why a separate stack to the main one? Because we don't push siblings. core::vector> ancestorPrefix; - // unused yet + // TODO: unused yet core::unordered_set> visitedNodes; - // should probably size it better, if I knew total node count allocated or live - visitedNodes.reserve(m_rootNodes.size()<<3); + visitedNodes.reserve(128); // auto validateExpression = [&](const typed_pointer_type exprRoot, const bool isBTDF) -> bool { @@ -1058,7 +987,8 @@ inline bool CFrontendIR::valid(const typed_pointer_type rootHandle const auto childHandle = node->getChildHandle(childIx); if (const auto child=getObjectPool().deref(childHandle); child) { - const bool noContribBelow = entry.contribState==SubtreeContributorState::Forbidden || childIx!=0 && nodeIsMul; + // Only Add nodes can have Contributors in any subtree, Mul only the first, and others can't have them at all. Especially don't allow the complementing of a BxDF! + const bool noContribBelow = entry.contribState==SubtreeContributorState::Forbidden || childIx!=0 && !nodeIsAdd || nodeType==IExprNode::Type::Other || nodeType==IExprNode::Type::Complement; StackEntry newEntry = {.node=child,.handle=childHandle}; if (noContribBelow) { @@ -1109,8 +1039,12 @@ inline bool CFrontendIR::valid(const typed_pointer_type rootHandle logger.log("Node %u of type %s is invalid!",ELL_ERROR,entry.handle.value,node->getTypeName().data()); return false; } - if (entry.contribSlot + static inline void printMatrix(std::ostringstream& sstr, const hlsl::matrix& m) + { + for (uint16_t i=0; i; @@ -79,7 +95,18 @@ class CNodePool : public core::IReferenceCounted protected: const uint32_t m_size; }; + // copy the debug nodes between pools + inline typed_pointer_type copyDebugInfo(const typed_pointer_type orig, const CNodePool* pSource) + { + assert(pSource); + if (!orig) + return {}; + const auto span = pSource->getObjectPool().deref(orig)->data(); + const auto oldView = std::string_view(reinterpret_cast(span.data()),span.size()-1); + return getObjectPool().emplace(oldView); + } + // template inline const std::string_view getTypeName(const typed_pointer_type h) const { @@ -91,9 +118,20 @@ class CNodePool : public core::IReferenceCounted protected: inline CNodePool(typename obj_pool_type::creation_params_type&& params) : m_composed(std::move(params)) {} + // does a shallow copy (no need to deref any of the children/deeper references), the pool itself must have a `deepCopy` method for that + template requires std::is_copy_assignable_v + static typed_pointer_type copyNode(const T* src, CNodePool* dstPool) + { + assert(src); + auto& pool = dstPool->getObjectPool(); + const auto copyH = pool.emplace(); + if (auto* const copy=pool.deref(copyH); copyH) + *copy = *src; + return copyH; + } + obj_pool_type m_composed; }; - } // namespace nbl::asset::material_compiler3 #endif \ No newline at end of file diff --git a/include/nbl/asset/material_compiler3/CTrueIR.h b/include/nbl/asset/material_compiler3/CTrueIR.h index fd304b9375..496ae3487f 100644 --- a/include/nbl/asset/material_compiler3/CTrueIR.h +++ b/include/nbl/asset/material_compiler3/CTrueIR.h @@ -1,11 +1,16 @@ -// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h #ifndef _NBL_ASSET_MATERIAL_COMPILER_V3_C_TRUE_IR_H_INCLUDED_ #define _NBL_ASSET_MATERIAL_COMPILER_V3_C_TRUE_IR_H_INCLUDED_ -#include "nbl/asset/material_compiler3/CFrontendIR.h" +#include "nbl/system/ILogger.h" +#include "nbl/system/to_string.h" + +#include "nbl/asset/material_compiler3/CNodePool.h" +#include "nbl/asset/format/EColorSpace.h" +#include "nbl/asset/ICPUImageView.h" namespace nbl::asset::material_compiler3 @@ -13,7 +18,7 @@ namespace nbl::asset::material_compiler3 // You make the Materials with a classical expression IR, one Root Node per material's interface layer, but here they're in "Accumulator Form" // They appear "flipped upside down", its expected our backends will evaluate contributors first, and then bother with the attenuators. -class CTrueIR : public CNodePool +class CTrueIR : public CNodePool // TODO: turn into an asset! { template using _typed_pointer_type = CNodePool::obj_pool_type::mem_pool_type::typed_pointer_type; @@ -28,56 +33,1408 @@ class CTrueIR : public CNodePool return core::smart_refctd_ptr(new CTrueIR(std::move(params)),core::dont_grab); } + //! Stuff thats kind-of common to CFrontendIR but doesn't make sense in CNodePool + struct SParameter + { + inline operator bool() const + { + return abs(scale)::infinity() && (!view || viewChannelgetCreationParameters().format)); + } + inline bool operator!=(const SParameter& other) const + { + if (scale!=other.scale) + return true; + if (view) + { + if (viewChannel!=other.viewChannel) + return true; + if (wrapU!=other.wrapU) + return true; + if (wrapV!=other.wrapV) + return true; + if (wrapW!=other.wrapW) + return true; + if (borderColor!=other.borderColor) + return true; + if (linearMagnification!=other.linearMagnification) + return true; + } + // don't compare paddings! + return view!=other.view; + } + inline bool operator==(const SParameter& other) const {return !operator!=(other);} + + void printDot(std::ostringstream& sstr, const core::string& selfID) const; + + // at this stage we store the multipliers in highest precision + float scale = std::numeric_limits::infinity(); + // rest are ignored if the view is null, only take the things that matter from ISampler + static_assert(std::is_same_v,uint16_t>); + uint16_t viewChannel : 2 = 0; + uint16_t linearMagnification : 1 = true; + hlsl::TextureClamp wrapU : 3 = hlsl::TextureClamp::ETC_REPEAT; + hlsl::TextureClamp wrapV : 3 = hlsl::TextureClamp::ETC_REPEAT; + hlsl::TextureClamp wrapW : 3 = hlsl::TextureClamp::ETC_REPEAT; + uint16_t borderColor : 3 = ISampler::E_TEXTURE_BORDER_COLOR::ETBC_FLOAT_OPAQUE_BLACK; + // 1 bit left over + uint8_t padding[2] = {0,0}; // TODO: padding stores metadata, shall we exclude from assignment and copy operators? + core::smart_refctd_ptr view = {}; + }; + // We worry about keeping parameters in the same image view later (the backend) + template + struct SParameterSet + { + inline operator bool() const + { + for (uint8_t i=0; i0); + + template + inline void printDot(std::ostringstream& sstr, const core::string& selfID, StringConstIterator paramNameBegin={}, const bool uvRequired=false) const + { + CTrueIR::printDotParameterSet(*this,Count,sstr,selfID,std::forward(paramNameBegin),uvRequired); + } + + // identity transform by default, ignored if no UVs + // NOTE: a transform could be applied per-param, whats important that the UV slot remains the smae across all of them. + hlsl::float32_t2x3 uvTransform = hlsl::float32_t2x3( + 1,0,0, + 0,1,0 + ); + SParameter params[Count] = {}; + + // to make sure there will be no padding inbetween + static_assert(alignof(SParameter)>=alignof(hlsl::float32_t2x3)); + }; + // Why are all of these kept together and forced to fetch from the same UV ? + // Because they're supposed to be filtered together with the knowledge of the NDF + // TODO: should really be 5 parameters (2+3) cause of rotatable anisotropic roughness + struct SBasicNDFParams : SParameterSet<4> + { + enum class EDistribution : uint8_t + { + GGX = 0, + Beckmann = 1, + Invalid = 255 + }; + inline EDistribution& getDistribution() {return reinterpret_cast(params[1].padding[0]);} + inline const EDistribution& getDistribution() const {return reinterpret_cast(params[1].padding[0]);} + + inline auto getDerivMap() {return std::span(params,2);} + inline auto getDerivMap() const {return std::span(params,2);} + inline auto getRougness() {return std::span(params+2,2);} + inline auto getRougness() const {return std::span(params+2,2);} + + inline SBasicNDFParams() + { + getDistribution() = EDistribution::Invalid; + // initialize with constant flat deriv map and smooth roughness + for (auto& param : params) + param.scale = 0.f; + } + + // The usage of a normal modifier implies potential anisotropic roughness when filtering (CLEAR, CLEAN, Neural), so all 4 (or 5) parameters should come from a texture. + // When normal modifier is not used, the roughness can still come from a texture but can be isotropic or anisotropic. Weird combos will require making tiny textures when converting from AST. + enum class EParamType : uint8_t + { + TotallyMapped, + AnisotropicMapped, + IsotropicMapped, + AnisotropicConstant, + IsotropicConstant + }; + // This is about how we load our data into the NDF not whether the NDF is really isotropic + inline EParamType determineParamType() const + { + // a derivative map from a texture allows for anisotropic NDFs at higher mip levels when pre-filtered properly + for (auto i=0; i<2; i++) + if (getDerivMap()[i].scale!=0.f && getDerivMap()[i].view) + return EParamType::TotallyMapped; + const auto roughness = getRougness(); + // having one roughness be mapped and another not mapped, isn't very useful in any renderer + const bool roughnessIsMapped = roughness[0].scale!=0.f && roughness[0].view || roughness[1].scale!=0.f && roughness[1].view; + // if roughness inputs are not equal (same scale, same texture) then NDF can be anisotropic in places + if (roughness[0]!=roughness[1]) + { + return roughnessIsMapped ? EParamType::AnisotropicMapped:EParamType::AnisotropicConstant; + } + else if (roughnessIsMapped) + { + return EParamType::IsotropicMapped; + } + else + return EParamType::IsotropicConstant; + } + + // conservative check, checks if we can optimize certain things this way + inline bool definitelyIsotropic() const + { + switch (determineParamType()) + { + case EParamType::IsotropicMapped: [[fallthrough]]; + case EParamType::IsotropicConstant: + break; + default: + return false; + } + // if a reference stretch is used, stretched triangles can turn the distribution anisotropic + return stretchInvariant(); + } + // whether the derivative map and roughness is constant regardless of UV-space texture stretching + inline bool stretchInvariant() const {return !(abs(hlsl::determinant(reference))>std::numeric_limits::min());} + + void printDot(std::ostringstream& sstr, const core::string& selfID) const; + + // Ignored if not invertible, otherwise its the reference "stretch" (UV derivatives) at which identity roughness and normalmapping occurs + hlsl::float32_t2x2 reference = hlsl::float32_t2x2(0,0,0,0); + }; + // basic "built-in" nodes class INode : public CNodePool::INode { public: + enum class EFinalType : uint8_t + { + COrientedLayer=0, + CCorellatedTransmission, + CContributorSum, + CWeightedContributor, + CEmitter, + CDeltaTransmission, + CSpectralVariable, + COrenNayar, + CCookTorrance, + CFactorCombiner, + CBeer, + CFresnel, + CThinInfiniteScatterCorrection + }; + virtual EFinalType getFinalType() const = 0; + + const auto& getHash() const {return hash;} + + // only call once the nodes underneath are linked up (because it doesn't call recursively), returning empty hash means error/invalid node + inline core::blake3_hash_t computeHash(const obj_pool_type& pool) const + { + core::blake3_hasher hasher = {}; + // always put the node type into the hash + hasher << static_cast(getFinalType()); + if (!computeHash_impl(pool,hasher)) + return {}; + return hasher.operator core::blake3_hash_t(); + } + + virtual inline std::string_view getChildName_impl(const uint8_t ix) const { return ""; } + virtual inline void printDot(std::ostringstream& sstr, const core::string& selfID) const {} protected: + friend class CTrueIR; + inline bool recomputeHash(const obj_pool_type& pool) + { + hash = computeHash(pool); + return hash!=core::blake3_hash_t::EmptyInput(); + } + virtual bool computeHash_impl(const obj_pool_type& pool, core::blake3_hasher& hasher) const = 0; +#define HASH_REQUIREDS_HASH(HANDLE) { \ + if (const auto hash=pool.deref(HANDLE)->getHash(); hash==core::blake3_hash_t::EmptyInput()) \ + return false; \ + else \ + hasher << hash; \ + } +#define HASH_OPTIONALS_HASH(HANDLE) if (HANDLE) {HASH_REQUIREDS_HASH(HANDLE);} else {hasher << core::blake3_hash_t::EmptyInput();} + // Each node is final and immutable, has a precomputed hash for the whole subtree beneath it. // Debug info does not form part of the hash, so can get wildly replaced. - core::blake3_hash_t hash; + core::blake3_hash_t hash = core::blake3_hash_t::EmptyInput(); }; // template requires std::is_base_of_v> using typed_pointer_type = _typed_pointer_type; + // Contributors are things which can either be importance sampled to continue a path or impart a contribution ot the integral + // We don't make the contributor hold its factor so that we still have a separate hash value and good codegen (all identical contributors go to same function call + class IContributor : public INode + { + public: + virtual bool isEmitter() const = 0; + }; + // this is for a term in a mul or add expression + class IFactor : public INode + { + protected: + uint64_t padding = 0; + }; +#define TYPE_NAME_STR(NAME) "nbl::asset::material_compiler3::CTrueIR::"#NAME + // we step away from our usual way of doing things via linked lists, because this is simpler to rearrange + class CFactorCombiner final : public obj_pool_type::IVariableSize, public IFactor + { + public: + inline EFinalType getFinalType() const override {return EFinalType::CFactorCombiner;} + + inline const std::string_view getTypeName() const override {return TYPE_NAME_STR(CFactorCombiner);} + + // There's only two ops we support + enum Type : uint8_t + { + Mul = 0, + Add = 1 + }; + struct SState + { + uint64_t type : 1 = Type::Mul; + uint64_t childCount : 6 = 0; + // which factors get `1-x` for an Add node or `-x` for a Mul node before getting used + uint64_t childIxComplementMask : 57 = 0x0u; + }; + static_assert(sizeof(SState) == sizeof(IFactor::padding)); + + // + static inline uint32_t calc_size(const SState state) + { + return sizeof(CFactorCombiner)+sizeof(child[0])*(state.childCount-1); + } + inline CFactorCombiner(const SState state) + { + padding = std::bit_cast(state); + } + + // + inline SState getState() const {return std::bit_cast(padding);} + // + inline void setComplementMask(const uint64_t mask) + { + assert(mask < (0x1<<57)); + auto state = getState(); + state.childIxComplementMask = mask; + padding = std::bit_cast(state); + } + + // Only sane child count allowed + inline typed_pointer_type getChildHandle(const uint8_t ix) const + { + if (ix handle) + { + if (ix child[1] = {{}}; + }; + // Note that this is not a root node, its a flipped leaf! + class CWeightedContributor final : public obj_pool_type::INonTrivial, public INode + { + inline bool computeHash_impl(const obj_pool_type& pool, core::blake3_hasher& hasher) const override + { + if (!contributor) + return false; + HASH_REQUIREDS_HASH(contributor); + HASH_OPTIONALS_HASH(factor); + // TODO: else where it hashes with a premade hash of a `IFactor = CConstantFactor` of monochrome 1.0 scalar + return true; + } + + public: + inline EFinalType getFinalType() const override {return EFinalType::CWeightedContributor;} + + inline const std::string_view getTypeName() const override {return TYPE_NAME_STR(CWeightedContributor);} + inline std::string_view getChildName_impl(const uint8_t ix) const override final { return ix ? "factor" : "contributor"; } + + + typed_pointer_type contributor = {}; + // if null then assumed to be 1 + typed_pointer_type factor = {}; + }; + // One BRDF or BTDF component of a layer is represented as + // f(w_i,w_o) = Sum_i^N Product_j^{N_i} h_{ij}(w_i,w_o) l_i(w_i,w_o) + class CContributorSum final : public obj_pool_type::INonTrivial, public INode + { + + // CANONICALIZATION NOTE: The conntributors shall be ordered in following order: + // - according to their type, emitters first, then BxDFs, within those + // + Emitters with IES profile, then without + // + BxDFs by their type + // * within the same type, by the parameters (with/without bump, then rest + // - all things being equal, order by hash + // For factors we want to order from lowest spectrality to highest, the computational expense. + // Due to Schussler and Yining, BxDFs are highly unlikely to evaluate to 0 but they can produce invalid samples. + // Also BxDFs and Emitters (which only hold IES profiles) are monochromatic so accumulating their contribution first uses least registers. + // Fresnel, Beer extinction and other analytic modifiers never produce 0s so should be evaluated last. + // Function factors which have to be evaluated via composition go even later, but within their dimension category. + inline bool computeHash_impl(const obj_pool_type& pool, core::blake3_hasher& hasher) const override + { + if (product) + { + HASH_REQUIREDS_HASH(product); + HASH_OPTIONALS_HASH(rest); + } + else if (rest) // this is an invalid combo, because `rest->product` could be hoisted in place of `product` + return false; + return true; + } + + public: + inline EFinalType getFinalType() const override {return EFinalType::CContributorSum;} + + inline const std::string_view getTypeName() const override {return TYPE_NAME_STR(CContributorSum);} + inline std::string_view getChildName_impl(const uint8_t ix) const override final { return ix ? "rest" : "product"; } + + // the product is ... + typed_pointer_type product = {}; + // the rest node is ... + _typed_pointer_type rest = {}; + }; + + // For codegen, we can compute total uncorrelated layering by convolving every `h_{ij}(w_i,w_o) l_i(w_i,w_o)` term in the layer above with layer below + class COrientedLayer; + // Corellated layering is a far far far TODO, all it means that certain convolutions don't happen - certain BxDFs don't layer over each other (tricky to express in AST and IR) + class CCorellatedTransmission final : public obj_pool_type::INonTrivial, public INode + { + inline bool computeHash_impl(const obj_pool_type& pool, core::blake3_hasher& hasher) const override + { + // invalid combo, null btdf prevents you crossing over to the next layer and hitting current from below + // also if there's no btdf it makes no sense to have a `next` sibling. + if (!btdf) + return false; + HASH_REQUIREDS_HASH(btdf); + HASH_OPTIONALS_HASH(brdfBottom); + HASH_OPTIONALS_HASH(coated); + HASH_OPTIONALS_HASH(next); + return true; + } + + public: + inline EFinalType getFinalType() const override {return EFinalType::CCorellatedTransmission;} + + inline const std::string_view getTypeName() const override {return TYPE_NAME_STR(CCorellatedTransmission);} + inline std::string_view getChildName_impl(const uint8_t ix) const override final { return ix ? (ix > 1 ? "next" : "brdfBottom") : "btdf"; } + + // you can set the children later + inline CCorellatedTransmission() = default; + + // Obligatory if you don't want transmission then don't put a valid handle in `COrientedLayer::firstTransmission` + // Also shouldn't contain `CDeltaTransmission` in the BTDF unless `coated` is null (TODO a check for that) + typed_pointer_type btdf = {}; + // because the layer is oriented, these last two members must be null when the coating stops + typed_pointer_type brdfBottom = {}; + _typed_pointer_type coated = {}; + // optional, indicates a "sibling" transmission thats next to this one + _typed_pointer_type next = {}; + }; + // The oriented layer is a layer with already all the Etas reciprocated, etc. + class COrientedLayer final : public obj_pool_type::INonTrivial, public INode + { + inline bool computeHash_impl(const obj_pool_type& pool, core::blake3_hasher& hasher) const override + { + HASH_OPTIONALS_HASH(brdfTop); + HASH_OPTIONALS_HASH(firstTransmission); + return true; + } + + public: + inline EFinalType getFinalType() const override {return EFinalType::COrientedLayer;} + + inline const std::string_view getTypeName() const override {return TYPE_NAME_STR(COrientedLayer);} + + // you can set the children later + inline COrientedLayer() = default; + + // These are same as the frontend's except that all the etas are oriented and reciprocated + typed_pointer_type brdfTop = {}; + // this node must be non-null until the last layer + typed_pointer_type firstTransmission = {}; + }; + // + class IFactorLeaf : public IFactor + { + public: + inline bool isScalar() const {return getSpectralBins()==1;} + + virtual uint8_t getSpectralBins() const = 0; + }; + // + class ISpectralVariable + { + inline SParameter& getParameter(const uint8_t i) {return pWonky()->params[i];} + + public: + enum class ESemantics : uint8_t + { + NoneUndefined = 0, + // 3 knots, their wavelengths are implied and fixed at color primaries + Fixed3_SRGB = 1, + Fixed3_DCI_P3 = 2, + Fixed3_BT2020 = 3, + Fixed3_AdobeRGB = 4, + Fixed3_AcesCG = 5, + // Ideas: each node is described by (wavelength,value) pair + // PairsLinear = 5, // linear interpolation + // PairsLogLinear = 5, // linear interpolation in wavelenght log space + }; + + // essential! + inline uint8_t getKnotCount() const + { + static_assert(sizeof(SParameter::padding)>1); + return pWonky()->params[0].padding[1]; + } + + // encapsulation due to padding abuse + inline auto& uvTransform() {return pWonky()->uvTransform;} + inline const auto& uvTransform() const {return pWonky()->uvTransform;} + + inline uint8_t& uvSlot() {return pWonky()->uvSlot();} + inline const uint8_t& uvSlot() const {return pWonky()->uvSlot();} + + inline const SParameter& getParameter(const uint8_t i) const {assert(iparams[i];} + inline void setParameter(const uint8_t i, const SParameter& value) + { + assert(i1) + return static_cast(getParameter(1).padding[0]); + else + return ESemantics::NoneUndefined; + } + + inline void setSemantics(const ESemantics value) + { + if (getKnotCount()>1) + getParameter(1).padding[0] = static_cast(value); + } + + protected: + inline ISpectralVariable() = default; + // delete all these so we dont implicitly copy + inline ISpectralVariable(const ISpectralVariable&) = delete; + inline ISpectralVariable(ISpectralVariable&&) = delete; + inline ISpectralVariable& operator=(const ISpectralVariable&) = delete; + inline ISpectralVariable& operator=(ISpectralVariable&&) = delete; + // fill out the params later + inline void init(const uint8_t knotCount) + { + std::uninitialized_default_construct_n(pWonky(),1); + if (knotCount>1) + std::uninitialized_default_construct_n(pWonky()->params+1,knotCount-1); + // back up the count + static_assert(sizeof(SParameter::padding)>1); + getParameter(0).padding[1] = knotCount; + // set it correctly for monochrome + setSemantics(ESemantics::NoneUndefined); + } + inline void init(const ISpectralVariable& other) + { + const auto* const src = other.pWonky(); + auto* const dst = pWonky(); + std::uninitialized_copy_n(src,1,dst); + const size_t count = other.getKnotCount(); + if (count>1) + std::uninitialized_copy_n(src->params+1,count-1,dst->params+1); + } + inline void init(const uint8_t knotCount, const ISpectralVariable& other) + { + init(knotCount); + const auto count = hlsl::min(other.getKnotCount(),knotCount); + for (uint8_t c=0; c* pWonky() = 0; + inline const SParameterSet<1>* pWonky() const {return const_cast*>(const_cast(this)->pWonky());} + }; + // This node could also represent non directional emission, but we have another node for that + template requires std::is_base_of_v + class alignas(SParameterSet<1>) CSpectralVariable final : public obj_pool_type::IVariableSize, public OtherBase + { + using this_t = CSpectralVariable; + + protected: + inline ~CSpectralVariable() + { + const auto count = ISpectralVariable::getKnotCount(); + std::destroy_n(pWonky(),1); + if (count>1) + std::destroy_n(pWonky()->params+1,count-1); + } + + inline SParameterSet<1>* pWonky() override final {return reinterpret_cast*>(this+1);} + + public: + inline CSpectralVariable(const uint8_t knotCount) : OtherBase() {ISpectralVariable::init(knotCount);} + inline CSpectralVariable(const ISpectralVariable& other) : OtherBase() {ISpectralVariable::init(other);} + inline CSpectralVariable(const uint8_t knotCount, const ISpectralVariable& other) : OtherBase() {ISpectralVariable::init(knotCount,other);} + + // + inline _typed_pointer_type copy(obj_pool_type& pool) const + { + // need to coax the compiler into compiling + const ISpectralVariable& rThis = *this; + return pool.emplace(rThis); + } + + // + static inline uint32_t calc_size(const uint8_t knotCount) + { + return sizeof(this_t)+sizeof(SParameterSet<1>)+sizeof(SParameter)*(knotCount-1); + } + // for copying + static inline uint32_t calc_size(const ISpectralVariable& other) + { + return calc_size(other.getKnotCount()); + } + static inline uint32_t calc_size(const uint8_t knotCount, const ISpectralVariable& other) + { + return calc_size(knotCount); + } + + // TODO: improve the token pasting here + inline const std::string_view getTypeName() const override final {return TYPE_NAME_STR(CSpectralVariable);} + }; + // + class ISpectralVariableFactor : public ISpectralVariable, public IFactorLeaf + { + inline bool computeHash_impl(const obj_pool_type& pool, core::blake3_hasher& hasher) const override final + { + const auto count = getKnotCount(); + hasher << count; + { + const ESemantics semantics = getSemantics(); + if (getKnotCount()>1 && semantics==ESemantics::NoneUndefined) + return false; + hasher << semantics; + } + bool hasTextures = false; + const auto* const wonky = pWonky(); + for (uint8_t i=0; iparams[i]; + hasTextures = hasTextures || wonky->params[i].view; + } + if (hasTextures) + { + hasher << wonky->uvTransform; + hasher << wonky->uvSlot(); + } + return true; + } + + public: + inline EFinalType getFinalType() const override final + { + return EFinalType::CSpectralVariable; + } + + // + inline uint8_t getSpectralBins() const override final {return getKnotCount();} + + NBL_API2 void printDot(std::ostringstream& sstr, const core::string& selfID) const override final; + }; + using CSpectralVariableFactor = CSpectralVariable; + + // Unit Radiance emitter modulated by an IES profile + class CEmitter final : public obj_pool_type::INonTrivial, public IContributor + { + inline bool computeHash_impl(const obj_pool_type& pool, core::blake3_hasher& hasher) const override + { + hasher << profileTransform; + if (profile.view) + { + // we ignore most of the sampler, needs to be set always the same + auto copy = profile; + copy.wrapU = hlsl::TextureClamp::ETC_CLAMP_TO_EDGE; + copy.wrapV = hlsl::TextureClamp::ETC_CLAMP_TO_EDGE; + copy.wrapW = hlsl::TextureClamp::ETC_CLAMP_TO_EDGE; + copy.linearMagnification = true; + copy.borderColor = ISampler::E_TEXTURE_BORDER_COLOR::ETBC_FLOAT_TRANSPARENT_BLACK; + // there's a limited set of symmetries we can exploit in our IES tabulations, TODO: check which (probably not REPEAT) + hasher << copy; + } + else + hasher << profile.scale; + return true; + } + + public: + inline EFinalType getFinalType() const override {return EFinalType::CEmitter;} + + inline const std::string_view getTypeName() const override {return TYPE_NAME_STR(CEmitter);} + + inline bool isEmitter() const override {return true;} + + // you can set the members later + inline CEmitter() = default; + + // This can be anything like an IES profile, if invalid, there's no directionality to the emission + // `profile.scale` can still be used to influence the light strength without influencing NEE light picking probabilities + SParameter profile = {}; + hlsl::float32_t3x3 profileTransform = hlsl::float32_t3x3( + 1,0,0, + 0,1,0, + 0,0,1 + ); + // TODO: semantic flags/metadata (symmetries of the profile) + + NBL_API2 void printDot(std::ostringstream& sstr, const core::string& selfID) const override final; + }; + + // To use a bump map, the Material needs to be provided UVs (which can or can not have associated tangents and smooth normals), but that's the responsibility of backend. + // The bump map just needs to be provided a TBN basis thats orthogonal so that normals or derivatives can be transformed. + // Normal maps will use Olano so won't be assumed to be normalized (they wouldn't be anyway due to interpolation), and derivative maps are never normalized and have huge range. + // Depending on the mode (deformation stretch enabled or not) the TB of the matrix will be normalized before the multiplication so that TBN is orthonormal and does not impart a UV stretch. + // After multiplication the normal will be normalized again, which is when we should apply Schussler or Yining. + // + // Schussler and Yining require we define a tangential microfacet from the perturbed normal `P` but differently: + // - Schussler wants normalize({-P.x,-P.y,0}) which is totally tangential and orthogonal to the geometric normal + // - Yining wants normalize({-P.xy*P.z,1-P.z*P.z}) which is orthogonal to the perturbed normal + // Both give an NDF with average normal projected onto the geometric normal being colinear to the geometric normal. + // Each microfacet has different pros and cons: + // - Tangential microfacet can use 100% perfect mirror material because its impossible to get 1-order scattering from it (camera and light can't see it at the same time) + // - Mirror Tangential microfacet makes 2-nd order scattering tractable, just evaluate the BRDF 3 times with original L and V, then once with reflected L and reflected V + // - for Lambertian BRDF this is only 2 evaluations because reflected V doesn't change anything + // - the dot products with reflected L or V can be computed quickly from the regular ones, but `H` always changes in this case + // - Orthogonal microfacet makes sure zero masking is achieved when LdotP=1 and appearance is 100% preserved in this case + // - Orthogonal microfacet can be seen directly and therefore it needs to have the same BxDF as the perturbed microfacet + // - The above requirements makes 2-nd order scattering intractable for orthogonal microfacets without random walks in the surface profile + // - We only need to evaluate the BxDF twice with Yinning and all products with perturbed normal can be obtained similarly to obtaining the reflected L and V for schussler (permutations and scaling) + // Both approaches are imperfect and in the limit will cause "fresnel lens" like appearance with Cook Torrance low roughness materials, you'll see a blend between classical bump mapped appearance and: + // - for 2nd-order Schussler a reflection of the objects by a flipped normal {-P.xy,P.z} due to mirror microfacet making a "virtual normal" behind the mirror + // - for 1st-order Yinning a reflection of the objects by an orthogonal normal which approaches the geometric normal as the perturbed normal gets more extreme + // This means that a tangential microfacet will show no blend/retroreflection if both L and V are behind the tangential microfacet. + // Whereas an orthogonal microfacet will show no blend/retroreflection if both L and V are below the orthogonal microfacet, so tangential ensures this property for half the hemisphere. + // But with extreme normal perturbation, the appearance of Schussler will match that of a very deep V-groove losing a lot of energy, but Yining will look like a flat surface of the same material. + // This is because Schussler has no bound on the ratio of the projected microsurface area on the geometric surface, whereas Yining bounds it to be <= 2.0 + // + // There's also a Schussler variant where the tangential microfacet has the same BRDF as the perturbed one, with only 1-st order scattering. This is nice because diffuse doesn't become a special case. + // Then with this you can actually see the tangential microfacet (smooth mirror makes it black with only 1st order), which works better for BSDF than 2nd order scattering with a mirror facet. + // Note that transmissive or smooth glass tangential face could never work, because it presents same issues as a non-tangential mirror facet (V and L can interact directly in 1-st order). + // Mirror facet would cause you to see refractions from a reflected V, which would be weird when the VdotP->0 and you'd expect 100% fresnel reflection. Worse yet you'd see them in the "wrong" direction. + // Having a tangential microfacet with same BSDF does blend towards transmission as `VdotP->0`, but at least the refractive rays correctly have `L.xy = -V.xy` and are going "forward" although + // they are bending upwards vs. the transmitted direction, not downwards like they would against the macro surface or the perturbed surface before VdotP==0. + // + // Both this and Yining are faster because of the 2 BxDF evaluations instead of 3, and they only differ by the inter-facet shadowing and masking functions and the tangent facet normal is computed. + // However an orthogonal microfacet will work better for BSDFs because the facet tangents and normals form a square and not a rhombus, which means that a V to the "left" of the perturbed normal + // will always be to the "right" of the orthogonal microfacet, meaning we'll get a consistent refraction (both L=refract(V,facet) will curve away from -V in the same direction. + class IBxDF : public IContributor + { + public: + inline bool isEmitter() const override {return false;} + }; + class CDeltaTransmission final : public obj_pool_type::INonTrivial, public IBxDF + { + // nothing to do + inline bool computeHash_impl(const obj_pool_type& pool, core::blake3_hasher& hasher) const override {return true;} + + public: + inline EFinalType getFinalType() const override {return EFinalType::CDeltaTransmission;} + + inline const std::string_view getTypeName() const override {return TYPE_NAME_STR(CDeltaTransmission);} + + inline CDeltaTransmission() = default; + }; + class IBxDFWithNDF : public IBxDF + { + protected: + inline bool computeHash_impl(const obj_pool_type& pool, core::blake3_hasher& hasher) const override + { + hasher << ndfParams; + return true; + } + + public: + SBasicNDFParams ndfParams = {}; + }; + class COrenNayar final : public obj_pool_type::INonTrivial, public IBxDFWithNDF + { + public: + inline bool computeHash_impl(const obj_pool_type& pool, core::blake3_hasher& hasher) const override + { + if (ndfParams.getDistribution()!=SBasicNDFParams::EDistribution::Invalid) + return false; + IBxDFWithNDF::computeHash_impl(pool,hasher); + return true; + } + + inline EFinalType getFinalType() const override {return EFinalType::COrenNayar;} + + inline const std::string_view getTypeName() const override {return TYPE_NAME_STR(COrenNayar);} + + inline COrenNayar() = default; + + NBL_API2 void printDot(std::ostringstream& sstr, const core::string& selfID) const override final; + }; + class CCookTorrance final : public obj_pool_type::INonTrivial, public IBxDFWithNDF + { + inline bool computeHash_impl(const obj_pool_type& pool, core::blake3_hasher& hasher) const override + { + if (ndfParams.getDistribution()>SBasicNDFParams::EDistribution::Beckmann) + return false; + IBxDFWithNDF::computeHash_impl(pool,hasher); + hasher << static_cast(ndfParams.getDistribution()); + hasher << isEtaReciprocal(); + HASH_OPTIONALS_HASH(orientedRealEta); + return true; + } + + public: + inline EFinalType getFinalType() const override {return EFinalType::CCookTorrance;} + + inline const std::string_view getTypeName() const override {return TYPE_NAME_STR(CCookTorrance);} + + inline CCookTorrance() = default; + + // + inline bool isEtaReciprocal() const {return ndfParams.params[2].padding[0];} + inline void setEtaReciprocal(const bool value) {ndfParams.params[2].padding[0] = value;} + NBL_API2 void printDot(std::ostringstream& sstr, const core::string& selfID) const override final; - // Each material comes down to this - struct Material + // BTDF ONLY! We need this eta to compute the refractions of `L` when importance sampling and the Jacobian during H to L generation for rough dielectrics + // It does not mean we compute the Fresnel weights though! You might ask why we don't do that given that state of the art importance sampling + // (at time of writing) is to decide upon reflection vs. refraction after the microfacet normal `H` is already sampled, + // producing an estimator with just Masking and Shadowing function ratios. The reason is because we can simplify our IR by separating out + // BRDFs and BTDFs components into separate expressions, and also importance sample much better. + typed_pointer_type orientedRealEta = {}; + }; + //! Basic factor nodes + // Effective transparency = exp2(log2(perpTransmittance)*thickness/dot(refract(V,X,eta),X)) = exp2(log2(perpTransmittance)*thickness*inversesqrt(1.f+(LdotX-1)*rcpEta)) + // Eta and `LdotX` is taken from the contributor BxDF node. With refractions from Dielectrics, we get just `1/LdotX`, for Delta Transmission we get `1/VdotN` since its the same. + // Note: its allowed to apply Beer directly on BRDF as well as BTDF to simulate foggy extinction on the top layer + class CBeer final : public obj_pool_type::INonTrivial, public IFactorLeaf { -// TypedHandle root; - CNodePool::typed_pointer_type debugInfo; + inline bool computeHash_impl(const obj_pool_type& pool, core::blake3_hasher& hasher) const override + { + hasher << channels; + HASH_REQUIREDS_HASH(perpTransmittance); + HASH_REQUIREDS_HASH(thickness); + return true; + } + + public: + inline EFinalType getFinalType() const override {return EFinalType::CBeer;} + + inline const std::string_view getTypeName() const override {return TYPE_NAME_STR(CBeer);} + + inline std::string_view getChildName_impl(const uint8_t ix) const override final {return ix ? "Thickness":"Perpendicular\\nTransmittance";} + + inline uint8_t getSpectralBins() const override {return channels;} + + // cannot be null, otherwise no point being there as term will multiply to 0 + typed_pointer_type perpTransmittance = {}; + // cannot be null, otherwise its always exp2(0) and term will always be 1 + typed_pointer_type thickness = {}; + // can be worked out by analyzing what we point to, but not needed + uint8_t channels = 3; + }; + class CFresnel final : public obj_pool_type::INonTrivial, public IFactorLeaf + { + inline bool computeHash_impl(const obj_pool_type& pool, core::blake3_hasher& hasher) const override + { + hasher << reciprocateEtas; + hasher << channels; + if (orientedImagEta) + { + HASH_OPTIONALS_HASH(orientedRealEta); + hasher << orientedImagEta; + } + else + { + HASH_REQUIREDS_HASH(orientedRealEta); + } + return true; + } + + public: + inline EFinalType getFinalType() const override {return EFinalType::CFresnel;} + + inline const std::string_view getTypeName() const override {return TYPE_NAME_STR(CFresnel);} + + inline std::string_view getChildName_impl(const uint8_t ix) const override final {return ix ? "Imaginary":"Real";} + + inline uint8_t getSpectralBins() const override {return channels;} + + // cannot be null or a constant of 1 while imaginary is null + typed_pointer_type orientedRealEta = {}; + // If null, then treated as a 0 (important to optimize those to 0). MUST be null for BTDFs! + typed_pointer_type orientedImagEta = {}; + // easier on the codegen + uint8_t reciprocateEtas : 1 = false; + // can be worked out by analyzing what we point to, but not needed + uint8_t channels : 7 = 3; + }; + class CThinInfiniteScatterCorrection final : public obj_pool_type::INonTrivial, public IFactorLeaf + { + inline bool computeHash_impl(const obj_pool_type& pool, core::blake3_hasher& hasher) const override + { + hasher << channels; + HASH_REQUIREDS_HASH(reflectanceTop); + HASH_OPTIONALS_HASH(extinction); + if (reflectanceBottom) + hasher << reflectanceBottom; + else + hasher << reflectanceTop; + return true; + } + + public: + inline EFinalType getFinalType() const override {return EFinalType::CThinInfiniteScatterCorrection;} + + inline const std::string_view getTypeName() const override {return TYPE_NAME_STR(CThinInfiniteScatterCorrection);} + + inline std::string_view getChildName_impl(const uint8_t ix) const override final {return ix ? (ix>1 ? "reflectanceBottom":"extinction"):"reflectanceTop";} + + inline uint8_t getSpectralBins() const override {return channels;} + + // cannot be null otherwise no point being there + typed_pointer_type reflectanceTop = {}; + // optional, if null then treated as E=1.0 + typed_pointer_type extinction = {}; + // optional, if null then `reflectanceTop` used in its place + typed_pointer_type reflectanceBottom = {}; + // can be worked out by analyzing what we point to, but not needed + uint8_t channels = 3; + }; +#undef TYPE_NAME_STR +#undef HASH_THE_HASH + + // + struct SBasicNodes final + { + public: + inline bool operator==(const SBasicNodes& other) const = default; + + typed_pointer_type blackHoleBxDF = {}; + typed_pointer_type scalarNegation = {}; + // these are never meant to be hashed and inserted into `m_uniqueNodes` + typed_pointer_type errorBxDF = {}; + typed_pointer_type errorLayer = {}; + + private: + friend class CTrueIR; + NBL_API2 SBasicNodes(CTrueIR* ir); + }; + const SBasicNodes& getBasicNodes() const {return m_basicNodes;} + + // + template requires std::is_base_of_v + inline typed_pointer_type hashNCache(const typed_pointer_type origH) + { + auto* const orig = getObjectPool().deref(origH); + if (orig) + if (orig->getHash()!=core::blake3_hash_t::EmptyInput() || orig->recomputeHash(getObjectPool())) + { + auto& slot = m_uniqueNodes[orig->getHash()]; + if (slot) + return CNodePool::obj_pool_type::block_allocator_type::_static_cast(slot); + slot = origH; + return origH; + } + return {}; + } + + // Each material comes down to this, this is the only struct we don't de-duplicate + struct SMaterial + { + // + struct SMetadata + { + // Stats needed by a renderer to skip loading parts of a material or remove expensive code altogether + enum class ECapabilityBits : uint16_t + { + None = 0, + // if any such contributor present + NotBlackhole = 0x1u<<0, // actually have a material + NonDelta = 0x1u<<1, // can evaluate against point lights (or other samplings) + DeltaTransmissive = 0x1u<<2, // can use stochastic transparency for closest hit rays and blending for anyhit + NonSpatiallyVaryingEmissive = 0x1u<<3, // definitely register for NEE + SpatiallyVaryingEmissive = 0x1u<<4, // maybe register for NEE but needs different kind of NEE + // TODO: 5,6,7 left + // Bits that help us remove expensive code from impl + DerivativeMap = 0x1u<<8, + DirectionallyVaryingEmissive = 0x1u<<9, // IES profile + Lambertian = 0x1u<<10, + OrenNayar = 0x1u<<11, + GGX = 0x1u<<12, + AnisotropicGGX = 0x1u<<13, + Beckmann = 0x1u<<14, + AnisotropicBeckmann = 0x1u<<15, + }; + // + constexpr static inline uint8_t MaxUVSlots = 32; + std::bitset usedUVSlots = {}; + // the tangent frames are a subset of used UV slots, unless there's an anisotropic BRDF involved + std::bitset usedTangentFrames = {}; + // + core::bitflag capabilities = {}; + }; // - constexpr static inline uint8_t MaxUVSlots = 32; - std::bitset usedUVSlots; - // the tangent frames are a subset of used UV slots, unless there's an anisotropic BRDF involved - std::bitset usedTangentFrames; - }; - inline std::span getMaterials() const {return m_materials;} - - // We take the trees from the forest, and canonicalize them into our weird Domain Specific IR with Upside down expression trees. - // Process: - // 1. Schusslerization (for derivative map usage) and Decompression (duplicating nodes, etc.) - // 2. Canonicalize Expressions (Transform into Sum-Product form, DCE, etc.) - // 3. Split BTDFs (front vs. back part), reciprocate Etas - // 4. Simplify and Hoist Layer terms (delta sampling property) - // 5. Subexpression elimination + struct SOriented + { + // null means no material + typed_pointer_type root = {}; + // + SMetadata metadata = {}; + }; + SOriented front = {}; + SOriented back = {}; + // TODO: more detailed debug info + CNodePool::typed_pointer_type debugInfo = {}; + }; + inline std::span getMaterials() const {return m_materials;} + + struct SMaterialHandle + { + constexpr static inline uint32_t Invalid = ~0u; + explicit inline operator bool() const {return value!=Invalid;} + + uint32_t value = Invalid; + }; + constexpr static inline SMaterialHandle BlackholeMaterialHandle = { 0u }; + // + inline void reset() + { + getObjectPool().reset(); + m_materials.clear(); + m_uniqueNodes.clear(); + // remake reinsert the unique nodes + const SBasicNodes tmp = SBasicNodes(this); + assert(m_basicNodes==tmp); + // create the `BlackholeMaterialHandle` + m_materials = {SMaterial{.debugInfo=getObjectPool().emplace("CTrueIR's BlackHole Material")}}; + } + + //! Utitilies for copying from other IR into ours while optimizing + // This one doesn't touch your coated nodes, but only the first coating + inline bool rewriteSingleLayer(typed_pointer_type& singleLayer, SMaterial::SMetadata& metadata, CTrueIR* srcIR) + { + SRewriteSession session = SRewriteArgs{.src=srcIR,.dst=this,.pMetadata=&metadata}; + return session.rewriteSingleLayer(singleLayer); + } + inline bool rewrite(SMaterial::SOriented& material, CTrueIR* srcIR) + { + SRewriteSession session = SRewriteArgs{.src=srcIR,.dst=this,.pMetadata=&material.metadata}; + return session.rewrite(material.root); + } + inline bool rewrite(SMaterial& material, CTrueIR* srcIR) + { + SRewriteSession session = SRewriteArgs{.src=srcIR,.dst=this}; + if (material.front.root) + { + session.changeMetadata(&material.front.metadata); + session.rewrite(material.front.root); + } + if (material.back.root) + { + session.changeMetadata(&material.back.metadata); + session.rewrite(material.back.root); + } + if (material.debugInfo && srcIR!=this) + { + // TODO: copy the debug info across into new pool + assert(false); + material = {}; + return false; + } + return true; + } + + // TODO: Optimization passes on the IR // It is the backend's job to handle: // - constant encoding precision (scale factors, UV matrices, IoRs) // - multiscatter compensation // - compilation failure to unsupported complex layering // - compilation failure to unsupported complex layering - bool addMaterials(const CFrontendIR* forest); + SMaterialHandle addMaterial(SMaterial material, CTrueIR* srcIR=nullptr) + { + if (!srcIR) + srcIR = this; + if (rewrite(material,srcIR)) + { + m_materials.push_back(material); + return {.value=static_cast(m_materials.size()-1)}; + } + else + return {}; + } + + // For Debug Visualization + struct SDotPrinter final + { + public: + inline SDotPrinter() = default; + inline SDotPrinter(const CTrueIR* ir) : m_ir(ir) {} + // assign in reverse because we want materials to print in order + inline SDotPrinter(const CTrueIR* ir, std::span roots) : m_ir(ir)//, layerStack(roots.rbegin(),roots.rend()) + { + // should probably size it better, if I knew total node count allocated or live + visitedNodes.reserve(roots.size()<<4); + layerStack.reserve(roots.size()); + for (const auto& m : roots) + { + layerStack.push_back(m.front.root); + layerStack.push_back(m.back.root); + } + } + + inline void reset(const CTrueIR* ir) + { + visitedNodes.clear(); + layerStack.clear(); + nodeStack.clear(); + m_ir = ir; + } + + NBL_API2 void operator()(std::ostringstream& output); + inline core::string operator()() + { + std::ostringstream tmp; + operator()(tmp); + return tmp.str(); + } + + core::unordered_set> visitedNodes; + // TODO: track layering depth and indent accordingly? + core::vector> layerStack; + core::vector> nodeStack; + + private: + const CTrueIR* m_ir; + }; + + // Use `_count` instead of `Count` because of how wonkily this stuff gets used + template + static inline void printDotParameterSet(const SParameterSet& _set, const uint8_t _count, std::ostringstream& sstr, const core::string& selfID, StringConstIterator paramNameBegin={}, const bool uvRequired=false) + { + bool imageUsed = false; + for (uint8_t i=0; i<_count; i++) + { + const auto paramID = selfID+"_param"+std::to_string(i); + if (_set.params[i].view) + imageUsed = true; + _set.params[i].printDot(sstr,paramID); + sstr << "\n\t" << selfID << " -> " << paramID; + if (paramNameBegin) + sstr <<" [label=\"" << *(paramNameBegin++) << "\"]"; + else + sstr <<" [label=\"Param " << std::to_string(i) <<"\"]"; + } + if (uvRequired || imageUsed) + { + const auto uvTransformID = selfID+"_uvTransform"; + sstr << "\n\t" << uvTransformID << " [label=\"uvSlot = " << std::to_string(_set.uvSlot()) << "\\n"; + printMatrix(sstr,_set.uvTransform); + sstr << "\"]"; + sstr << "\n\t" << selfID << " -> " << uvTransformID << "[label=\"UV Transform\"]"; + } + } protected: - using CNodePool::CNodePool; + struct SRewriteArgs final + { + inline bool needDeepCopy() const {return src!=dst;} - core::vector m_materials; - core::unordered_map> m_uniqueNodes; + const CTrueIR* const src; + CTrueIR* const dst; + const SMaterial::SMetadata* pMetadata; + }; + struct SRewriteSession final + { + public: + inline SRewriteSession(const SRewriteArgs& _args) : args(_args) {} + NBL_API2 ~SRewriteSession(); + + inline void changeMetadata(SMaterial::SMetadata* pMetadata) {args.pMetadata = pMetadata;} + + NBL_API2 bool rewrite(typed_pointer_type& oriented); + NBL_API2 bool rewriteSingleLayer(typed_pointer_type& oriented); + + private: + template + inline typed_pointer_type emplace(FuncArgs&&... args) + { + const auto retval = args.dst->getObjectPool().emplace(1u,std::forward(args)...); + if (retval) + createdNodes.push_back(retval); + return retval; + } + + SRewriteArgs args; + core::vector> createdNodes; + bool success = true; + }; + + inline core::string getNodeID(const typed_pointer_type handle) const { return core::string("_") + std::to_string(handle.value); } + inline core::string getLabelledNodeID(const typed_pointer_type handle) const + { + const INode* node = getObjectPool().deref(handle); + assert(node); + core::string retval = getNodeID(handle); + retval += " [label=\""; + retval += node->getTypeName(); + retval += "\\n" + system::to_string(node->getHash()); + // maybe label suffix? + retval += "\"]"; + return retval; + } + + NBL_API2 CTrueIR(creation_params_type&& params); + + core::vector m_materials; + // TODO: either we put the typeid in the hash, or we have a type of hashmap per type + core::unordered_map> m_uniqueNodes; + friend struct SBasicNodes; + const SBasicNodes m_basicNodes; }; -//! DAG (baked) +template class CTrueIR::CSpectralVariable; + +NBL_ENUM_ADD_BITWISE_OPERATORS(CTrueIR::SMaterial::SMetadata::ECapabilityBits) + +} + +// +namespace nbl::system::impl +{ +template<> +struct system::impl::to_string_helper +{ + using type = asset::material_compiler3::CTrueIR::ISpectralVariable::ESemantics; + + static inline std::string __call(const type value) + { + switch (value) + { + case type::NoneUndefined: + return "NoneUndefined"; + case type::Fixed3_SRGB: + return "Fixed3_SRGB"; + case type::Fixed3_DCI_P3: + return "Fixed3_DCI_P3"; + case type::Fixed3_BT2020: + return "Fixed3_BT2020"; + case type::Fixed3_AdobeRGB: + return "Fixed3_AdobeRGB"; + case type::Fixed3_AcesCG: + return "Fixed3_AcesCG"; + default: + break; + } + return ""; + } +}; +} + +// +namespace nbl::asset::material_compiler3 +{ + +inline bool CTrueIR::ISpectralVariable::valid(const system::logger_opt_ptr logger) const +{ + const auto knotCount = getKnotCount(); + // non-monochrome spectral variable + if (const auto semantic=getSemantics(); knotCount>1) + switch (semantic) + { + case ESemantics::Fixed3_SRGB: [[fallthrough]]; + case ESemantics::Fixed3_DCI_P3: [[fallthrough]]; + case ESemantics::Fixed3_BT2020: [[fallthrough]]; + case ESemantics::Fixed3_AdobeRGB: [[fallthrough]]; + case ESemantics::Fixed3_AcesCG: + if (knotCount!=3) + { + logger.log("Semantic %s is only usable with 3 knots, this has %d knots",system::ILogger::ELL_ERROR,system::to_string(semantic).c_str(),knotCount); + return false; + } + break; + default: + logger.log("Invalid Semantic %s",system::ILogger::ELL_ERROR,system::to_string(semantic).c_str()); + return false; + } + for (auto i=0u; i need a printing context struct) + /* + struct SDotPrintContext + { + std::ostringstream* sstr; + core::unordered_map* usedViews; + uint16_t indentation = 0; + }; + */ + if (view) + sstr << "\n\t" << selfID << " -> _view_" << std::to_string(reinterpret_cast(view)); +} + +inline void CTrueIR::SBasicNDFParams::printDot(std::ostringstream& sstr, const core::string& selfID) const +{ + constexpr const char* paramSemantics[] = { + "dh/du", + "dh/dv", + "alpha_u", + "alpha_v" + }; + SParameterSet<4>::printDot(sstr,selfID,paramSemantics,!definitelyIsotropic()); + if (!stretchInvariant()) + { + const auto referenceID = selfID+"_reference"; + sstr << "\n\t" << referenceID << " [label=\""; + printMatrix(sstr,reference); + sstr << "\"]"; + sstr << "\n\t" << selfID << " -> " << referenceID << " [label=\"Stretch Reference\"]"; + } +} + +// specialization of parameter hashing +template +struct core::blake3_hasher::update_impl +{ + using input_t = asset::material_compiler3::CTrueIR::SParameter; + + static inline void __call(blake3_hasher& hasher, const input_t& param) + { + hasher << param.scale; + if (!param.view) + return; + const auto& viewParams = param.view->getCreationParameters(); + // TODO: hash it like CAssetConverter + { + hasher << ptrdiff_t(param.view.get()); + } + // in the future this might change + hasher << param.viewChannel; + hasher << param.linearMagnification; + bool wrapModeUsesBorder = false; + auto hashWrap = [&](const hlsl::TextureClamp wrap)->void + { + hasher << wrap; + switch (wrap) + { + case hlsl::TextureClamp::ETC_CLAMP_TO_BORDER: + wrapModeUsesBorder = true; + break; + default: + break; + } + }; + using view_type_e = asset::IImageView::E_TYPE; + switch (viewParams.viewType) + { + case view_type_e::ET_3D: + hashWrap(param.wrapW); + [[fallthrough]]; + case view_type_e::ET_2D: [[fallthrough]]; + case view_type_e::ET_2D_ARRAY: [[fallthrough]]; + case view_type_e::ET_CUBE_MAP: [[fallthrough]]; + case view_type_e::ET_CUBE_MAP_ARRAY: + hashWrap(param.wrapV); + [[fallthrough]]; + default: + hashWrap(param.wrapU); + break; + } + if (wrapModeUsesBorder) + hasher << param.borderColor; + } +}; +template +struct core::blake3_hasher::update_impl,Dummy> +{ + using input_t = asset::material_compiler3::CTrueIR::SParameterSet; + + static inline void __call(blake3_hasher& hasher, const input_t& input) + { + bool noTextures = true; + for (uint8_t i=0; i +struct core::blake3_hasher::update_impl +{ + using input_t = asset::material_compiler3::CTrueIR::SBasicNDFParams; + + static inline void __call(blake3_hasher& hasher, const input_t& input) + { + using type_e = input_t::EParamType; + const type_e type = input.determineParamType(); + update_impl::__call(hasher,static_cast(type)); + update_impl>::__call(hasher,input); + hasher << input.getDistribution(); + // reference stretch can be applied on non-mapped NDFs too + if (!input.stretchInvariant()) + hasher << input.reference; + } +}; } // namespace nbl::asset::material_compiler3 diff --git a/include/nbl/core/alloc/SimpleBlockBasedAllocator.h b/include/nbl/core/alloc/SimpleBlockBasedAllocator.h index 503a673c78..dce33987f8 100644 --- a/include/nbl/core/alloc/SimpleBlockBasedAllocator.h +++ b/include/nbl/core/alloc/SimpleBlockBasedAllocator.h @@ -404,7 +404,7 @@ class SimpleBlockBasedAllocator final : protected block_t* block = entry.second; if (recycledBlocks.size()addrAlloc.reset(); + block->getAllocator().reset(); const auto id = m_blockIndexAlloc.alloc_addr(1,1); assert(id!=block_id_alloc_t::invalid_address); recycledBlocks[id] = block; @@ -533,6 +533,33 @@ class SimpleBlockBasedAllocatorMT final }; // no aliases + +// specialize the blake3 hasher +template +struct blake3_hasher::update_impl,Dummy> +{ + static inline void __call(blake3_hasher& hasher, const ConstHandle input) + { + update_impl::__call(hasher,input.value); + } +}; +template +struct blake3_hasher::update_impl >,Dummy> +{ + static inline void __call(blake3_hasher& hasher, const Handle > input) + { + update_impl::__call(hasher,input.value); + } +}; +template +struct blake3_hasher::update_impl > >,Dummy> +{ + static inline void __call(blake3_hasher& hasher, const TypedHandle > > input) + { + update_impl::__call(hasher,input.value); + } +}; + } namespace std diff --git a/include/nbl/core/containers/CMemoryPool.h b/include/nbl/core/containers/CMemoryPool.h index 118a80a695..207cc36362 100644 --- a/include/nbl/core/containers/CMemoryPool.h +++ b/include/nbl/core/containers/CMemoryPool.h @@ -38,6 +38,12 @@ class CMemoryPool final : public Uncopyable using creation_params_type = block_allocator_st_type::SCreationParams; inline CMemoryPool(creation_params_type&& params) : m_block_alctr(std::move(params)) {} + + // + inline void reset() + { + m_block_alctr.reset(); + } // template requires (!std::is_const_v) diff --git a/include/nbl/core/containers/CObjectPool.h b/include/nbl/core/containers/CObjectPool.h index 71c54be5ed..b49c668e71 100644 --- a/include/nbl/core/containers/CObjectPool.h +++ b/include/nbl/core/containers/CObjectPool.h @@ -86,6 +86,15 @@ class CObjectPool final : public IObjectPoolBase for (auto& entry : m_allocations) destroy(deref(entry.first),entry.second.count,entry.second.stride); } + + // + inline void reset() + { + for (auto& entry : m_allocations) + destroy(deref(entry.first),entry.second.count,entry.second.stride); + m_allocations.clear(); + m_pool.reset(); + } // struct check_t diff --git a/include/nbl/core/containers/DoublyLinkedList.h b/include/nbl/core/containers/DoublyLinkedList.h index af7edcc01a..d6757d9843 100644 --- a/include/nbl/core/containers/DoublyLinkedList.h +++ b/include/nbl/core/containers/DoublyLinkedList.h @@ -1,9 +1,8 @@ -// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. +// Copyright (C) 2018-2026 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h - -#ifndef __NBL_CORE_CONTAINERS_DOUBLY_LINKED_LIST_H_INCLUDED__ -#define __NBL_CORE_CONTAINERS_DOUBLY_LINKED_LIST_H_INCLUDED__ +#ifndef _NBL_CORE_CONTAINERS_DOUBLY_LINKED_LIST_H_INCLUDED_ +#define _NBL_CORE_CONTAINERS_DOUBLY_LINKED_LIST_H_INCLUDED_ #include "nbl/core/alloc/PoolAddressAllocator.h" @@ -11,9 +10,8 @@ #include -namespace nbl -{ -namespace core + +namespace nbl::core { //Struct for use in a doubly linked list. Stores data and pointers to next and previous elements the list, or invalid iterator if it is first/last @@ -65,6 +63,7 @@ struct alignas(void*) SDoublyLinkedNode } }; +// TODO: this could use the ObjectPool that CNodePool does template> > class DoublyLinkedList { @@ -99,6 +98,9 @@ class DoublyLinkedList return (m_array + address); } + // + inline bool empty() const {return m_begin==m_back;} + //get node ptr of the first item in the list inline node_t* getBegin() { return m_array + m_begin; } inline const node_t* getBegin() const { return m_array + m_begin; } @@ -120,9 +122,9 @@ class DoublyLinkedList } template - inline void emplaceFront(Args&&... args) + inline node_t* emplaceFront(Args&&... args) { - insertAt(reserveAddress(), std::forward(args)...); + return insertAt(reserveAddress(), std::forward(args)...); } /** @@ -328,7 +330,7 @@ class DoublyLinkedList //create a new node which stores data at already allocated address template - inline void insertAt(uint32_t addr, Args&&... args) + inline node_t* insertAt(uint32_t addr, Args&&... args) { assert(addr < m_cap); assert(addr != invalid_iterator); @@ -342,6 +344,7 @@ class DoublyLinkedList if (m_back == invalid_iterator) m_back = addr; m_begin = addr; + return n; } /** @@ -506,8 +509,6 @@ std::reverse_iterator::const_iterato return std::reverse_iterator(const_iterator(this, m_begin)); } -} //namespace core -} //namespace nbl - +} //namespace nbl::core #endif diff --git a/include/nbl/core/hash/blake.h b/include/nbl/core/hash/blake.h index fb91c9969f..a50d587e86 100644 --- a/include/nbl/core/hash/blake.h +++ b/include/nbl/core/hash/blake.h @@ -9,12 +9,19 @@ #include "blake3.h" #include +#include namespace nbl::core { struct blake3_hash_t final { + constexpr static inline core::blake3_hash_t EmptyInput() + { + constexpr uint64_t data[4] = {0xa6a1f9f5b94913afull,0x49c9dc36ea4d40a0ull,0xb712c1adc925cb9bull,0x62321fe4ca939accull}; + return std::bit_cast(data); + } + inline bool operator==(const blake3_hash_t&) const = default; // could initialize this to a hash of a zero-length array, diff --git a/include/nbl/ext/MitsubaLoader/SContext.h b/include/nbl/ext/MitsubaLoader/SContext.h index 17b8f48bb3..dfba92d0fa 100644 --- a/include/nbl/ext/MitsubaLoader/SContext.h +++ b/include/nbl/ext/MitsubaLoader/SContext.h @@ -38,7 +38,7 @@ struct SContext final inline void writeFrontendForestDot3(system::ISystem* system, const system::path& filepath) { - asset::material_compiler3::CFrontendIR::SDotPrinter printer = {frontIR.get(),frontIR->getMaterials()}; + asset::material_compiler3::CFrontendIR::SDotPrinter printer = {frontIR.get(),debugAllMaterials}; writeDot3File(system,filepath,printer); } @@ -48,6 +48,10 @@ struct SContext final meta->setGeometryCollectionMeta(std::move(groupCache)); } + using true_ir_t = asset::material_compiler3::CTrueIR; + using frontend_ir_t = asset::material_compiler3::CFrontendIR; + inline const frontend_ir_t* getMaterialFrontend() const {return frontIR.get();} + const asset::IAssetLoader::SAssetLoadContext inner; asset::IAssetLoader::IAssetLoaderOverride* override_; std::function interm_getAssetInHierarchy; @@ -56,7 +60,6 @@ struct SContext final core::smart_refctd_ptr scene; private: - using frontend_ir_t = asset::material_compiler3::CFrontendIR; using frontend_material_t = frontend_ir_t::typed_pointer_type; // not `frontend_ir_t::CEmitter` because the color factor gets multiplied in using frontend_emitter_t = frontend_ir_t::typed_pointer_type; @@ -65,10 +68,12 @@ struct SContext final // void writeDot3File(system::ISystem* system, const system::path& filepath, frontend_ir_t::SDotPrinter& printer); // - hlsl::float32_t2x3 getParameters(const std::span out, const CElementTexture::FloatOrTexture& src); - hlsl::float32_t2x3 getParameters(const std::span out, const CElementTexture::SpectrumOrTexture& src); - frontend_ir_t::SParameter getTexture(const CElementTexture* tex, hlsl::float32_t2x3* outUvTransform); - frontend_ir_t::SParameter genProfile(const CElementEmissionProfile* profile); + hlsl::float32_t2x3 getParameters(const std::span out, const CElementTexture::FloatOrTexture& src); + void getParameters(asset::material_compiler3::CFrontendIR::CSpectralVariableExpr* const out, const CElementTexture::FloatOrTexture& src); + hlsl::float32_t2x3 getParameters(const std::span out, const CElementTexture::SpectrumOrTexture& src); + void getParameters(asset::material_compiler3::CFrontendIR::CSpectralVariableExpr* const out, const CElementTexture::SpectrumOrTexture& src); + true_ir_t::SParameter getTexture(const CElementTexture* tex, hlsl::float32_t2x3* outUvTransform); + true_ir_t::SParameter genProfile(const CElementEmissionProfile* profile); // core::unordered_map shapeCache; @@ -77,7 +82,7 @@ struct SContext final // core::unordered_map emitterCache; core::unordered_map bsdfCache; - core::unordered_map profileCache; + core::unordered_map profileCache; #if 0 // stuff that belongs in the Material Compiler backend //image, sampler @@ -86,8 +91,10 @@ struct SContext final core::map,float> derivMapCache; #endif core::smart_refctd_ptr frontIR; + // for a debug print + core::vector> debugAllMaterials; // common frontend nodes - frontend_ir_t::typed_pointer_type unityFactor; + frontend_ir_t::typed_pointer_type unityFactor; frontend_ir_t::typed_pointer_type errorBRDF; frontend_ir_t::typed_pointer_type errorMaterial, unsupportedPhong, unsupportedWard; frontend_ir_t::typed_pointer_type deltaTransmission; diff --git a/include/nbl/system/to_string.h b/include/nbl/system/to_string.h index 1f8988566e..d746f66ce6 100644 --- a/include/nbl/system/to_string.h +++ b/include/nbl/system/to_string.h @@ -31,6 +31,31 @@ struct to_string_helper } }; +template<> +struct to_string_helper +{ + static std::string __call(const core::blake3_hash_t& value) + { + // fast base64 optimized for this without leaking deps + std::string retval(44,'='); + constexpr const char* base = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + auto out = retval.data(); + // every 3 bytes of input create 4 bytes of output + for (auto i=0; i<11; i++) + { + const uint8_t bytes[3] = {value.data[3*i+0],value.data[3*i+1],i!=10 ? value.data[3*i+2]:uint8_t(0)}; + *(out++) = base[bytes[0]>>2]; + // take bottom bits of first byte to be top of the 6 bit, and 4 top bits of next byte to be bottom 4 bits of 6 bit + *(out++) = base[((bytes[0]&0x03u)<<4)|(bytes[1]>>4)]; + // take bottom bits of the second byte to be top 4 bits of next byte, and top 2 bits of last byte to be bottom 2 + *(out++) = base[((bytes[1]&0x0fu)<<2)|(bytes[2]>>6)]; + *(out++) = base[bytes[2]&0x3Fu]; + } + // padding is inferred from length + return retval; + } +}; + template<> struct to_string_helper { diff --git a/src/nbl/CMakeLists.txt b/src/nbl/CMakeLists.txt index 9c994bfa41..502d0c70c8 100644 --- a/src/nbl/CMakeLists.txt +++ b/src/nbl/CMakeLists.txt @@ -169,6 +169,7 @@ set(NBL_ASSET_SOURCES # Materials asset/material_compiler3/CFrontendIR.cpp + asset/material_compiler3/CTrueIR.cpp # Shaders asset/utils/ISPIRVOptimizer.cpp diff --git a/src/nbl/asset/material_compiler3/CFrontendIR.cpp b/src/nbl/asset/material_compiler3/CFrontendIR.cpp index d77ac287da..5af5f25e01 100644 --- a/src/nbl/asset/material_compiler3/CFrontendIR.cpp +++ b/src/nbl/asset/material_compiler3/CFrontendIR.cpp @@ -1,6 +1,7 @@ // Copyright (C) 2022-2025 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h +#define _NBL_ASSET_MATERIAL_COMPILER3_C_FRONTEND_IR_CPP_ #include "nbl/asset/material_compiler3/CFrontendIR.h" #include "nbl/builtin/hlsl/complex.hlsl" @@ -11,6 +12,7 @@ namespace nbl::asset::material_compiler3 { constexpr auto ELL_ERROR = nbl::system::ILogger::E_LOG_LEVEL::ELL_ERROR; +constexpr auto ELL_DEBUG = nbl::system::ILogger::E_LOG_LEVEL::ELL_DEBUG; using namespace nbl::system; bool CFrontendIR::CEmitter::invalid(const SInvalidCheckArgs& args) const @@ -54,7 +56,7 @@ bool CFrontendIR::CFresnel::invalid(const SInvalidCheckArgs& args) const const auto knotCount = imagEta->getKnotCount(); for (uint8_t i=0; igetParam(i); + const auto& param = imagEta->getParameter(i); if (param.scale==0.f) continue; args.logger.log("Fresnels used for BTDFs cannot have Imaginary Eta, scale must be 0.f for all knots, is %.*e at knot %u",ELL_ERROR,param.scale,i); @@ -116,41 +118,98 @@ bool CFrontendIR::CCookTorrance::invalid(const SInvalidCheckArgs& args) const } -auto CFrontendIR::reciprocate(const typed_pointer_type orig) -> typed_pointer_type +auto CFrontendIR::deepCopy(const typed_pointer_type orig, const CFrontendIR* pSourceIR) -> typed_pointer_type { - auto& pool = getObjectPool(); - struct SEntry + auto& dstPool = getObjectPool(); + // if not explicitly other, then its ours + if (!pSourceIR) + pSourceIR = this; + const auto& srcPool = pSourceIR->getObjectPool(); + + core::vector> stack; + stack.reserve(32); + stack.push_back(orig); + // use a hashmap to not explore whole DAG + core::unordered_map,typed_pointer_type> substitutions; + while (!stack.empty()) { - typed_pointer_type handle; - bool visited = false; - }; - core::vector stack; + const auto entry = stack.back(); + const auto* const node = srcPool.deref(entry); + if (!node) // this is an error + return {}; + const auto childCount = node->getChildCount(); + if (auto& copyH=substitutions[entry]; !copyH) + { + for (uint8_t c=0; cgetChildHandle(c); + if (auto child=srcPool.deref(childH); !child) + continue; // this is not an error + stack.push_back(childH); + } + // copy copies everything including child handles + copyH = node->copy(this); + if (!copyH) + return {}; + } + else + { + auto* const copy = dstPool.deref(copyH); + copy->debugInfo = copyDebugInfo(node->debugInfo,pSourceIR); + for (uint8_t c=0; cgetChildHandle(c); + if (!childH) + continue; + auto found = substitutions.find(childH); + assert(found!=substitutions.end()); + copy->setChild(c,found->second); + } + stack.pop_back(); + } + } + return substitutions[orig]; +} + +auto CFrontendIR::reciprocate(const typed_pointer_type orig, const CFrontendIR* pSourceIR) -> typed_pointer_type +{ + if (!orig) + return {}; + + auto& dstPool = getObjectPool(); + // if not explicitly other, then its ours + if (!pSourceIR) + pSourceIR = this; + const auto& srcPool = pSourceIR->getObjectPool(); + + core::vector> stack; stack.reserve(32); - stack.push_back({.handle=orig}); + stack.push_back(orig); + // use a hashmap to not explore whole DAG + core::unordered_set> visited; // use a hashmap because of holes in child arrays core::unordered_map,typed_pointer_type> substitutions; while (!stack.empty()) { - auto& entry = stack.back(); - const auto* const node = pool.deref(entry.handle); + const auto entry = stack.back(); + const auto* const node = srcPool.deref(entry); if (!node) // this is an error return {}; const auto childCount = node->getChildCount(); - if (entry.visited) + if (auto [it,inserted] = visited.insert(entry); inserted) { - entry.visited = true; for (uint8_t c=0; cgetChildHandle(c); - if (auto child=pool.deref(childH); !child) + if (auto child=srcPool.deref(childH); !child) continue; // this is not an error - stack.push_back({.handle=childH}); + stack.push_back(childH); } } else { const bool needToReciprocate = node->reciprocatable(); - bool needToCopy = needToReciprocate; + bool needToCopy = pSourceIR!=this || needToReciprocate; // if one descendant has changed then we need to copy node if (!needToCopy) { @@ -165,12 +224,15 @@ auto CFrontendIR::reciprocate(const typed_pointer_type orig) -> if (needToCopy) { const auto copyH = node->copy(this); - // copy copies everything including children - auto* const copy = pool.deref(copyH); + // copy copies everything including child handles + auto* const copy = dstPool.deref(copyH); if (!copy) return {}; if (needToReciprocate) node->reciprocate(copy); + // reciprocate might take full copies, and copy pointers across, so do all modifications after + if (pSourceIR!=this) + copy->debugInfo = copyDebugInfo(node->debugInfo,pSourceIR); // only changed children need to be set for (uint8_t c=0; c orig) -> if (auto found=substitutions.find(childH); found!=substitutions.end()) copy->setChild(c,found->second); } - substitutions.insert({entry.handle,copyH}); + substitutions.insert({entry,copyH}); } stack.pop_back(); } @@ -191,15 +253,28 @@ auto CFrontendIR::reciprocate(const typed_pointer_type orig) -> return substitutions[orig]; } -auto CFrontendIR::copyLayers(const typed_pointer_type orig) -> typed_pointer_type +auto CFrontendIR::copyLayers(const typed_pointer_type orig, const CFrontendIR* pSourceIR) -> typed_pointer_type { - auto& pool = getObjectPool(); - auto copyH = pool.emplace(); + auto& dstPool = getObjectPool(); + // if not explicitly other, then its ours + if (!pSourceIR) + pSourceIR = this; + const auto& srcPool = pSourceIR->getObjectPool(); + + auto copyH = dstPool.emplace(); { - auto* outLayer = pool.deref(copyH); - for (const auto* layer=pool.deref(orig); true; layer=pool.deref(layer->coated)) + auto* outLayer = dstPool.deref(copyH); + for (const auto* layer=srcPool.deref(orig); true; layer=srcPool.deref(layer->coated)) { *outLayer = *layer; + // need to deep copy the nodes + if (pSourceIR!=this) + { + outLayer->debugInfo = copyDebugInfo(layer->debugInfo,pSourceIR); + outLayer->brdfBottom = deepCopy(layer->brdfBottom,pSourceIR)._const_cast(); + outLayer->btdf = deepCopy(layer->btdf,pSourceIR)._const_cast(); + outLayer->brdfTop = deepCopy(layer->brdfTop,pSourceIR)._const_cast(); + } if (!layer->coated) { // terminate the new stack @@ -207,33 +282,47 @@ auto CFrontendIR::copyLayers(const typed_pointer_type orig) -> typ break; } // continue the new stack - outLayer->coated = pool.emplace(); - outLayer = pool.deref(outLayer->coated); + outLayer->coated = dstPool.emplace(); + outLayer = dstPool.deref(outLayer->coated); } } return copyH; } -auto CFrontendIR::reverse(const typed_pointer_type orig) -> typed_pointer_type +auto CFrontendIR::reverse(const typed_pointer_type orig, const CFrontendIR* pSourceIR) -> typed_pointer_type { - auto& pool = getObjectPool(); + auto& dstPool = getObjectPool(); + // if not explicitly other, then its ours + if (!pSourceIR) + pSourceIR = this; + const auto& srcPool = pSourceIR->getObjectPool(); + // we build the new linked list from the tail - auto copyH = pool.emplace(); + auto copyH = dstPool.emplace(); { - auto* outLayer = pool.deref(copyH); + auto* outLayer = dstPool.deref(copyH); typed_pointer_type underLayerH={}; - for (const auto* layer=pool.deref(orig); true; layer=pool.deref(layer->coated)) + std::string debugData; debugData.reserve(4096); + for (const auto* layer=srcPool.deref(orig); true; layer=srcPool.deref(layer->coated)) { + debugData = "REVERSED {"; + if (auto* debugInfo=srcPool.deref(layer->debugInfo); debugInfo) + { + const auto span = debugInfo->data(); + debugData += std::string_view(reinterpret_cast(span.data()),span.size()-1); + } + debugData += '}'; + outLayer->debugInfo = dstPool.emplace(debugData); outLayer->coated = underLayerH; // we reciprocate everything because numerator and denominator switch (top and bottom of layer stack) - outLayer->brdfBottom = reciprocate(layer->brdfTop)._const_cast(); - outLayer->btdf = reciprocate(layer->btdf)._const_cast(); - outLayer->brdfTop = reciprocate(layer->brdfBottom)._const_cast(); + outLayer->brdfBottom = reciprocate(layer->brdfTop,pSourceIR)._const_cast(); + outLayer->btdf = reciprocate(layer->btdf,pSourceIR)._const_cast(); + outLayer->brdfTop = reciprocate(layer->brdfBottom,pSourceIR)._const_cast(); if (!layer->coated) break; underLayerH = copyH; - copyH = pool.emplace(); - outLayer = pool.deref(copyH); + copyH = dstPool.emplace(); + outLayer = dstPool.deref(copyH); } } return copyH; @@ -315,21 +404,19 @@ auto CFrontendIR::createNamedFresnel(const std::string_view name) -> typed_point const auto frH = getObjectPool().emplace(); auto* fr = getObjectPool().deref(frH); fr->debugInfo = getObjectPool().emplace(found->first); + fr->orientedRealEta = getObjectPool().emplace(3); { - CSpectralVariable::SCreationParams<3> params = {}; - params.getSemantics() = CSpectralVariable::Semantics::Fixed3_SRGB; - params.knots.params[0].scale = found->second.x.real(); - params.knots.params[1].scale = found->second.y.real(); - params.knots.params[2].scale = found->second.z.real(); - fr->orientedRealEta = getObjectPool().emplace(std::move(params)); + auto* const eta = getObjectPool().deref(fr->orientedRealEta); + eta->setSemantics(CTrueIR::ISpectralVariable::ESemantics::Fixed3_SRGB); + for (uint8_t c=0; c<3; c++) + eta->setParameter(c,{.scale=found->second[c].real()}); } + fr->orientedImagEta = getObjectPool().emplace(3); { - CSpectralVariable::SCreationParams<3> params = {}; - params.getSemantics() = CSpectralVariable::Semantics::Fixed3_SRGB; - params.knots.params[0].scale = found->second.x.imag(); - params.knots.params[1].scale = found->second.y.imag(); - params.knots.params[2].scale = found->second.z.imag(); - fr->orientedImagEta = getObjectPool().emplace(std::move(params)); + auto* const eta = getObjectPool().deref(fr->orientedImagEta); + eta->setSemantics(CTrueIR::ISpectralVariable::ESemantics::Fixed3_SRGB); + for (uint8_t c=0; c<3; c++) + eta->setParameter(c,{.scale=found->second[c].imag()}); } return frH; } @@ -342,8 +429,8 @@ void CFrontendIR::SDotPrinter::operator()(std::ostringstream& str) { while (!exprStack.empty()) { - const auto entry = exprStack.top(); - exprStack.pop(); + const auto entry = exprStack.back(); + exprStack.pop_back(); const auto nodeID = m_ir->getNodeID(entry); str << "\n\t" << m_ir->getLabelledNodeID(entry); const auto* node = m_ir->getObjectPool().deref(entry); @@ -359,7 +446,7 @@ void CFrontendIR::SDotPrinter::operator()(std::ostringstream& str) const auto visited = visitedNodes.find(childHandle); if (visited!=visitedNodes.end()) continue; - exprStack.push(childHandle); + exprStack.push_back(childHandle); visitedNodes.insert(childHandle); } } @@ -399,7 +486,7 @@ void CFrontendIR::SDotPrinter::operator()(std::ostringstream& str) const auto visited = visitedNodes.find(root); if (visited!=visitedNodes.end()) return; - exprStack.push(root); + exprStack.push_back(root); visitedNodes.insert(root); }; pushExprRoot(layerNode->brdfTop,"Top BRDF"); @@ -413,55 +500,25 @@ void CFrontendIR::SDotPrinter::operator()(std::ostringstream& str) str << "\n}\n"; } -void CFrontendIR::SParameter::printDot(std::ostringstream& sstr, const core::string& selfID) const -{ - sstr << "\n\t" << selfID << "[label=\"scale = " << std::to_string(scale); - if (view) - { - sstr << "\\nchannel = " << std::to_string(viewChannel); - const auto& viewParams = view->getCreationParameters(); - sstr << "\\nWraps = {" << sampler.TextureWrapU; - if (viewParams.viewType!=ICPUImageView::ET_1D && viewParams.viewType!=ICPUImageView::ET_1D_ARRAY) - sstr << "," << sampler.TextureWrapV; - if (viewParams.viewType==ICPUImageView::ET_3D) - sstr << "," << sampler.TextureWrapW; - sstr << "}\\nBorder = " << sampler.BorderColor; - // don't bother printing the rest, we really don't care much about those - } - sstr << "\"]"; - // TODO: do specialized printing for image views (they need to be gathered into a view set -> need a printing context struct) - /* - struct SDotPrintContext - { - std::ostringstream* sstr; - core::unordered_map* usedViews; - uint16_t indentation = 0; - }; - */ - if (view) - sstr << "\n\t" << selfID << " -> _view_" << std::to_string(reinterpret_cast(view)); -} - -core::string CFrontendIR::CSpectralVariable::getLabelSuffix() const +core::string CFrontendIR::ISpectralVariableExpr::getLabelSuffix() const { if (getKnotCount()<2) return ""; constexpr const char* SemanticNames[] = { - "", + "", "\\nSemantics = Fixed3_SRGB", "\\nSemantics = Fixed3_DCI_P3", "\\nSemantics = Fixed3_BT2020", "\\nSemantics = Fixed3_AdobeRGB", "\\nSemantics = Fixed3_AcesCG" }; - auto pWonky = reinterpret_cast*>(this+1); - return SemanticNames[static_cast(pWonky->getSemantics())]; + return SemanticNames[static_cast(getSemantics())]; } -void CFrontendIR::CSpectralVariable::printDot(std::ostringstream& sstr, const core::string& selfID) const +// TODO: move `printDot` to CNodePool ? +void CFrontendIR::ISpectralVariableExpr::printDot(std::ostringstream& sstr, const core::string& selfID) const { - auto pWonky = reinterpret_cast*>(this+1); - pWonky->knots.printDot(getKnotCount(),sstr,selfID,{}); + CTrueIR::printDotParameterSet(*pWonky(),getKnotCount(),sstr,selfID,{}); } void CFrontendIR::CEmitter::printDot(std::ostringstream& sstr, const core::string& selfID) const @@ -483,33 +540,657 @@ void CFrontendIR::CFresnel::printDot(std::ostringstream& sstr, const core::strin { } -void CFrontendIR::IBxDF::SBasicNDFParams::printDot(std::ostringstream& sstr, const core::string& selfID) const +void CFrontendIR::COrenNayar::printDot(std::ostringstream& sstr, const core::string& selfID) const +{ + ndParams.printDot(sstr,selfID); +} + +void CFrontendIR::CCookTorrance::printDot(std::ostringstream& sstr, const core::string& selfID) const +{ + ndParams.printDot(sstr,selfID); +} + + +//! IR making +CTrueIR::SMaterialHandle CFrontendIR::SAdd2IRSession::makeFinalIR(const typed_pointer_type rootH, const CFrontendIR* ast) { - constexpr const char* paramSemantics[] = { - "dh/du", - "dh/dv", - "alpha_u", - "alpha_v" + const auto& astPool = ast->getObjectPool(); + const auto* astRoot = astPool.deref(rootH); + // no material + if (!astRoot) + return CTrueIR::BlackholeMaterialHandle; + tmpIR->reset(); + // reverse AST into another tree + tmpAST->reset(); + const auto backRootH = tmpAST->reverse(rootH,ast); + // do not attempt and cache these, a normal person will not send the same material over and over to convert to IR + CTrueIR::SMaterial material = { + .front = makeOrientedMaterial(rootH,ast), + .back = makeOrientedMaterial(backRootH,tmpAST.get()) }; - SParameterSet<4>::printDot(sstr,selfID,paramSemantics,!definitelyIsotropic()); - if (!stretchInvariant()) + + const auto errorLayer = args.ir->getBasicNodes().errorLayer; + auto printLayer = [&](const typed_pointer_type _rootH, const CFrontendIR* _ast)->void { - const auto referenceID = selfID+"_reference"; - sstr << "\n\t" << referenceID << " [label=\""; - printMatrix(sstr,reference); - sstr << "\"]"; - sstr << "\n\t" << selfID << " -> " << referenceID << " [label=\"Stretch Reference\"]"; + astPrinter.reset(_ast); + astPrinter.layerStack.push_back(_rootH); + args.logger.log("Subtree Dot3 : \n%s\n",ELL_DEBUG,astPrinter().c_str()); + assert(astPrinter.layerStack.empty()); + }; + if (material.front.root==errorLayer) + { + args.logger.log("Failed to create Frontface Material",ELL_ERROR); + printLayer(rootH,ast); + return {}; + } + if (material.back.root==errorLayer) + { + args.logger.log("Failed to create Backface Material for reversed AST",ELL_ERROR); + printLayer(backRootH,tmpAST.get()); + return {}; } + + auto retval = args.ir->addMaterial(material,tmpIR.get()); + if (retval) + { + // TODO: better debug info (e.g. concat all the layer info during `makeOrientedMaterial` via the `session` object + if (const auto* debug=astPool.deref(astRoot->debugInfo); debug && !debug->data().empty()) + { + material.debugInfo = args.ir->getObjectPool().emplace(debug->data().data(),static_cast(debug->data().size())); + } + } + return retval; } -void CFrontendIR::COrenNayar::printDot(std::ostringstream& sstr, const core::string& selfID) const +auto CFrontendIR::SAdd2IRSession::makeOrientedMaterial(const CFrontendIR::typed_pointer_type rootH, const CFrontendIR* _srcAST) -> oriented_material_t { - ndParams.printDot(sstr,selfID); + oriented_material_t retval = {}; + + // TODO: cache root expressions for layers since they tend to be reused quite a bit + // HOWEVER the `tmpAST` gets cleared every call to `makeOrientedMaterial` so only cache within a `makeOrientedMaterial` call, + // so clear when AST changes or keep separate caches for original AST and tmp. +// core::unordered_map layers; +// core::unordered_map transmissions; +// core::unordered_map topBRDFcache; +// core::unordered_map BTDFcache; + + srcAST = _srcAST; + astPrinter.reset(srcAST); + + const auto& astPool = srcAST->getObjectPool(); + assert(layerStack.empty()); + auto clearLayerStackOnExit = core::makeRAIIExiter([this]()->void{layerStack.clear();}); + + auto& irPool = tmpIR->getObjectPool(); + // go down through layers and enqueue them so the layers can be added in reverse + for (const auto* layer=astPool.deref(rootH); layer; layer=astPool.deref(layer->coated)) + { + // TODO: actually re-check the expressions for being null after optimization + bool noTopReflection = !layer->brdfTop; + bool noTransmission = !layer->btdf; + // if there's literally nothing on the top level, you can't get to the next layer to retroreflect from it + if (noTopReflection && noTransmission) + { + if (layer->coated) + args.logger.log("Skipping current layer and farther ones due to no transmission and reflection",ELL_DEBUG); + break; + } + layerStack.push_back(layer); + // find out rest of the layers don't matter because they're blocked from being seen, its not a complete check + if (noTransmission) + { + if (layer->coated) + args.logger.log("Skipping remaining layers due to no transmission",ELL_DEBUG); + break; + } + } + + const auto errorBxDF = tmpIR->getBasicNodes().errorBxDF; + const auto errorLayer = tmpIR->getBasicNodes().errorLayer; + // Some metadata needed for us + bool layersBelowCanScatterBack = false; + // then go in reverse and do the layers bottom up + for (auto layerIt=layerStack.rbegin(); layerIt!=layerStack.rend(); layerIt++) + { + const auto& inLayer = *layerIt; + // allocate a layer + const auto layerH = irPool.emplace(); + { + auto* const outLayer = irPool.deref(layerH); + // process the top BRDF + outLayer->brdfTop = makeContributors(inLayer->brdfTop); + if (outLayer->brdfTop==errorBxDF) + return {.root=errorLayer}; + // process the BTDF + btdfSubtree = true; + const auto btdfH = makeContributors(inLayer->btdf); + btdfSubtree = false; + // because we're oriented, the bottom brdf can't exist without a BTDF on top (there's no ray that can reach it from our oriented side) + if (btdfH) + { + if (btdfH==errorBxDF) + return {.root=errorLayer}; + const auto transmissionH = irPool.emplace(); + { + auto* const transmission = irPool.deref(transmissionH); + transmission->btdf = btdfH; + // Only if we have a layer below us capable of reflecting the ray back, do we care about the bottom BRDF (you can't hit it otherwise) + if (layersBelowCanScatterBack) + { + transmission->brdfBottom = makeContributors(inLayer->brdfBottom); + if (transmission->brdfBottom==errorBxDF) + return {.root=errorLayer}; + } + // we check if previous layer didn't get optimized away + if (retval.root) + transmission->coated = retval.root; + } + outLayer->firstTransmission = tmpIR->hashNCache(transmissionH); + } + } + retval.root = tmpIR->hashNCache(layerH); + // Now optimize everything inserting it into the proper IR + { + // temporary debug print + constexpr bool DebugBeforeAndAfterOpt = true; + if constexpr(DebugBeforeAndAfterOpt) + { + args.logger.log("Before optimization:",ELL_DEBUG); + printIRLayer(layerH,tmpIR.get()); + } + // avoid O(Layer^2) scaling by processing bottom layers over and over + if (!tmpIR->rewriteSingleLayer(retval.root,retval.metadata,tmpIR.get())) + { + args.logger.log("Failed to rewrite and optimize IR layer (printing layer and everything it coats):\n",ELL_ERROR); + printIRLayer(layerH,tmpIR.get()); + return {.root=errorLayer}; + } + // Now remember that our rewriter can optimize us into a blackhole/null layer + if constexpr(DebugBeforeAndAfterOpt) + { + args.logger.log("After optimization:",ELL_DEBUG); + printIRLayer(retval.root,tmpIR.get()); + } + // Set this for the next layer after us, we are reflective or previous layer scatters back towards us and we don't block it + if (const auto* const outLayer=irPool.deref(retval.root); outLayer) + layersBelowCanScatterBack = bool(outLayer->brdfTop) || layersBelowCanScatterBack && outLayer->firstTransmission; + else + layersBelowCanScatterBack = false; + } + } + // last checks + if (retval.root) + { +// assert(retval.metadata.capabilities|CTrueIR::SMaterial::SMetadata::ECapabilityBits::NotBlackhole); + } + else + { + assert(!layersBelowCanScatterBack); + } + return retval; } -void CFrontendIR::CCookTorrance::printDot(std::ostringstream& sstr, const core::string& selfID) const + +// +auto CFrontendIR::SAdd2IRSession::makeContributors(const CFrontendIR::typed_pointer_type bxdfRootH) -> CTrueIR::typed_pointer_type +{ + CTrueIR::typed_pointer_type headH = {}; + if (!bxdfRootH) + return headH; + // temporary debug for WIP + printSubtree(bxdfRootH); + + auto& astPool = srcAST->getObjectPool(); + auto& irPool = tmpIR->getObjectPool(); + + // basic checks + assert(canonicalSum.empty()); + + // Multiplication Chain need to be sorted in a canonical order so its easier to spot them being the same + auto sortMuls = [](const SFactor& lhs, const SFactor& rhs)->bool + { + // contributor goes first + if (lhs.isContributor()!=rhs.isContributor()) + return lhs.isContributor(); + // only one contributor allowed per chain! + assert(&lhs==&rhs || !lhs.isContributor() && !rhs.isContributor()); + // monochrome is cheaper + if (lhs.factor.monochrome!=rhs.factor.monochrome) + return lhs.factor.monochrome; + // then by handle + return lhs.typeless.valuegetBasicNodes().errorBxDF; + // negationNode + const auto scalarNegation = tmpIR->getBasicNodes().scalarNegation; + + // good code start + assert(canonicalSum.empty()); + // add the first term to explore + canonicalSum.emplace_back().astStack.emplace_back() = bxdfRootH; + // error on exit + auto printFailAndCleanupOnExit = core::makeRAIIExiter([&]()->void + { + if (headH!=errorRetval) + return; + args.logger.log("Within BxDF:",ELL_DEBUG); + printSubtree(bxdfRootH); + // no point emitting an error contributor, don't want a best effort compilation within a layer, don't want contributors missing or substituted + canonicalSum.clear(); + } + ); + for (auto it=canonicalSum.begin(); it!=canonicalSum.end(); it++) + { + auto& irChain = it->irChain; + auto& astStack = it->astStack; + while (!astStack.empty()) + { + auto* pEntry = &astStack.back(); + const auto nodeH = *pEntry; + const auto* const node = astPool.deref(nodeH); + // only non null nodes get pushed onto the stack + assert(node); + // depending on the type we have different things to do + using ast_expr_type_e = CFrontendIR::IExprNode::Type; + const ast_expr_type_e astExprType = node->getType(); + constexpr auto ELL_WARNING = system::ILogger::ELL_WARNING; + switch (astExprType) + { + case ast_expr_type_e::Contributor: + { + // This must be the only contributor, as a contributor can only be in the leftmost branch of a Mul node's subtree, it also cannot be under any other node than Add or Mul. + assert(!it->hasContributor); + // + astStack.pop_back(); + // shouldn't invalidate iterator, but underline our vector changes + pEntry = nullptr; + // push onto the irChain + const auto contributorH = tmpIR->hashNCache(static_cast(node)->createIRNode(btdfSubtree,srcAST,tmpIR.get())); + if (!contributorH) + { + args.logger.log("Failed to Create IR Contributor from AST",ELL_ERROR); + printSubtree(nodeH); + return (headH=errorRetval); + } + it->hasContributor = true; + irChain.emplace_back().contributor = {.handle=contributorH}; + break; + } + case ast_expr_type_e::Mul: // pop self but push the two children + { + // add in reverse so children are visited left to right + for (auto c=node->getChildCount(); c;) + { + const auto childH = node->getChildHandle(--c); + // if any child is null, kill everything + if (auto* const child=astPool.deref(childH); !child) + { + printSubtree(nodeH); + constexpr bool HardFail = true; + if constexpr (HardFail) + { + args.logger.log("Undef node in a MUL",ELL_ERROR); + printSubtree(nodeH); + return (headH=errorRetval); + } + else + { + args.logger.log("Undef node in a MUL, turning the MUL into a 0",ELL_WARNING); + irChain.clear(); + astStack.clear(); + break; + } + } + // replace self with second child + if (c) + *pEntry = childH; + else + { + astStack.emplace_back() = childH; + pEntry = nullptr; + } + } + break; + } + case ast_expr_type_e::Add: // start one new chain + { + constexpr bool HardFail = true; + bool takeOverChain = true; + // add in reverse so children are visited left to right + for (auto c=node->getChildCount(); c;) + { + const auto childH = node->getChildHandle(--c); + // if child is null, skip it + if (auto* const child=astPool.deref(childH); !child) + { + printSubtree(nodeH); + if constexpr (HardFail) + { + args.logger.log("Undef node in an ADD",ELL_ERROR); + printSubtree(nodeH); + return (headH=errorRetval); + } + else + { + args.logger.log("Undef node in an ADD, substituting with a 0\n",ELL_WARNING); + continue; + } + } + if (takeOverChain) + { + // second child (but first to be pushed) takes over our ASTchain spot + *pEntry = childH; + // does not invalidate but changes contents + pEntry = nullptr; + takeOverChain = false; + } + else + { + // first child (second to be pushed) copies our chains and carries on + auto afterIt = it; + // but we need to remove the first child from the copy's astStack + canonicalSum.insert(++afterIt,*it)->astStack.pop_back(); + } + } + // didn't push anything, need to kill current sum term + if constexpr (!HardFail) + if (takeOverChain) + { + args.logger.log("Both children of ADD are NULL, deleting whole Contributor Sum Term",ELL_WARNING); + irChain.clear(); + astStack.clear(); + } + break; + } + case ast_expr_type_e::Complement: // MUL PREFIX ADD 1.0 CHILD + { + constexpr bool HardFail = true; + if (const auto childH=node->getChildHandle(0); astPool.deref(childH)) + { + // insert a copy of the current chain before the current with AST cleared, so chain stopped (gets us our MUL PREFIX 1.0 term) + canonicalSum.insert(it,*it)->astStack.clear(); + // start a new chain with a copy of ours but negate it (now `it` points to the copy) + it->negate ^= 0b111; + // now the expression which was getting complemented needs to be on the AST stack so we don't visit the complement again + it->astStack.back() = childH; + // need to finalize the irChain, so go back to the entry stopped + it--; + } + else // the child is OpUndef, replace complement with 1 node + { + printSubtree(nodeH); + if constexpr (HardFail) + { + args.logger.log("Child of COMPLEMENT is null,",ELL_ERROR); + printSubtree(nodeH); + return (headH=errorRetval); + } + else + { + args.logger.log("Child of COMPLEMENT is null, replacing with 1.0",ELL_WARNING); + // keep irChain but stop AST exploration + astStack.clear(); + } + } + break; + } + case ast_expr_type_e::SpectralVariable: + { + // + astStack.pop_back(); + // shouldn't invalidate iterator, but underline our vector changes + pEntry = nullptr; + // + const auto varH = tmpIR->hashNCache(static_cast(node)->createIRNode(srcAST,tmpIR.get())); + const auto* const var = irPool.deref(varH); + // no soft fail, the node wasn't null to begin with + if (!var) + { + args.logger.log("Failed to create the Spectral Variable.",ELL_ERROR); + printSubtree(nodeH); + return (headH=errorRetval); + } + // see what channels are dead + uint8_t liveMask = 0b000; + const auto channels = var->getSpectralBins(); + for (uint8_t c=0; cgetParameter(c).scale); + if (std::numeric_limits::min()<=absScale && absScale::infinity()) + liveMask |= uint8_t(1)<liveSpectralChannels&=liveMask)==0) + { + const auto logLevel = system::ILogger::ELL_PERFORMANCE; + // if we REALLY want to we can print all the nodes in the `irChain` so far, but then the irChain needs to track debug data from AST + args.logger.log("Product turns to 0 by multiplying the chain so far with the Spectral Variable:\n",logLevel); + printSubtree(nodeH); + args.logger.log("Forms a 0 constant factor across its ancestors in the mul chain, within the BxDF:\n",logLevel); + printSubtree(bxdfRootH); + // kill whole term + irChain.clear(); + astStack.clear(); + pEntry = nullptr; + break; + } + irChain.emplace_back().factor = {.handle=varH,.monochrome=monochrome}; + break; + } + case ast_expr_type_e::Other: + { + // + astStack.pop_back(); + // TODO: We need to start a new `canonicalSum` and save a link to it in the current IR + args.logger.log("Unsupported AST Expression type \"%s\"",ELL_ERROR,system::to_string(astExprType).c_str()); + printSubtree(nodeH); + return (headH=errorRetval); + break; + } + default: + assert(false); + printSubtree(nodeH); + return (headH=errorRetval); + } + } + // only once the astChain is empty, if the ir chain is empty skip it, but don't remove it because some Other AST node might need it + if (irChain.empty()) + { + printSubtree(bxdfRootH); + // can't really print the subtree, because a lot of ADDs and MULs have to collude to produce this + args.logger.log("Empty IR mul chain encountered, skipping...",system::ILogger::ELL_PERFORMANCE); + continue; + } + // should have gotten removed beforehand + assert(it->liveSpectralChannels); + // sort the factors in the mulchain + std::sort(irChain.begin(),irChain.end(),sortMuls); + // make the combiner + if (it->hasContributor) + { + // if we have a contributor first node in the sorted chain must be the contributor + assert(irChain.front().isContributor()); + // produce the weighted contributor + const auto weightedContribH = irPool.emplace(); + if (auto* const weightedContrib=irPool.deref(weightedContribH); weightedContrib) + { + weightedContrib->contributor = irChain.front().contributor.handle; + // now the mul chain + if (irChain.size()>1) + { + CTrueIR::CFactorCombiner::SState combinerState = { + .type = CTrueIR::CFactorCombiner::Type::Mul, + .childCount = irChain.size() + }; + // if we negate we need to add one more mul factor, otherwise nothing + if (it->negate==0) + --combinerState.childCount; + const auto factorH = irPool.emplace(combinerState); + if (auto* const factor=irPool.deref(factorH); factor) + { + auto i = 0; + // monochrome negation + if (it->negate && it->negate==0b111) + factor->setChildHandle(i++,scalarNegation); + bool monochromeNodes = true; + for (auto itChain=irChain.begin()+1; itChain!=irChain.end(); itChain++) + { + // only one contributor per chain is allowed + assert(!itChain->isContributor()); + if (monochromeNodes) + { + // not monochrome for the first time + if (!itChain->factor.monochrome) + { + monochromeNodes = false; + // make the non-monochrome negation node, not using premade cause that would tie me up with spectral buckets + if (it->negate && it->negate!=0b111) + { + const auto negationH = irPool.emplace(uint8_t(3)); + if (auto* const negation=irPool.deref(negationH); negation) + { + for (uint8_t c=0; c<3; c++) + negation->setParameter(c,{.scale=bool((it->negate>>c)&0x1u) ? (-1.f):1.f}); + } + const auto uniqueH = tmpIR->hashNCache(negationH); + if (!uniqueH) + { + args.logger.log("Couldn't create a unique spectral negation node",ELL_ERROR); + return (headH=errorRetval); + } + factor->setChildHandle(i++,negationH); + } + } + } + else + { + // once you go spectral, you never go back + assert(!itChain->factor.monochrome); + } + factor->setChildHandle(i++,itChain->factor.handle); + } + } + const auto uniqueH = tmpIR->hashNCache(factorH); + if (!uniqueH) + { + args.logger.log("Couldn't allocate or hash a `CTrueIR::CFactorCombiner` node",ELL_ERROR); + return (headH=errorRetval); + } + weightedContrib->factor = uniqueH; + } + } + const auto uniqueWeightedH = tmpIR->hashNCache(weightedContribH); + if (!uniqueWeightedH) + { + args.logger.log("Couldn't allocate or hash a `CTrueIR::CWeightedContributor` node",ELL_ERROR); + return (headH=errorRetval); + } + // allocate new tail and slap the mul chain onto it + it->sumTermH = irPool.emplace(); + // note that we hold off on hashing the tail node, cause its subject to change + if (auto* const tail=irPool.deref(it->sumTermH); tail) + tail->product = uniqueWeightedH; + else + { + args.logger.log("Couldn't allocate a `CTrueIR::CContributorSum` node",ELL_ERROR); + return (headH=errorRetval); + } + // append it + if (it!=canonicalSum.begin()) + { + auto itCopy = it; + irPool.deref((--itCopy)->sumTermH)->rest = it->sumTermH; + } + } + else + { + // TODO: make without a contributor + assert(false); + } + } + // now hash in reverse + for (auto it=canonicalSum.rbegin(); it!=canonicalSum.rend(); it++) + it->sumTermH = tmpIR->hashNCache(it->sumTermH)._const_cast(); + // set result + if (!canonicalSum.empty()) + { + headH = canonicalSum.begin()->sumTermH; + canonicalSum.clear(); + } + return headH; +} + +auto CFrontendIR::ISpectralVariableExpr::createIRNode(const CFrontendIR* ast, CTrueIR* ir) const -> CTrueIR::typed_pointer_type { - ndParams.printDot(sstr,selfID); + auto& irPool = ir->getObjectPool(); + uint8_t realCount = 1; + for (uint8_t c=0; c(realCount,*this); +} + +// AST Node -> IR methods +auto CFrontendIR::CEmitter::createIRNode(const bool forBTDF, const CFrontendIR* ast, CTrueIR* ir) const -> ir_contributor_handle_t +{ + assert(!forBTDF); + auto& irPool = ir->getObjectPool(); + const auto retval = irPool.emplace(); + if (auto* const contributor=irPool.deref(retval); contributor) + { + contributor->profile = profile; + contributor->profileTransform = profileTransform; + } + return retval; +} + +auto CFrontendIR::CDeltaTransmission::createIRNode(const bool forBTDF, const CFrontendIR* ast, CTrueIR* ir) const -> ir_contributor_handle_t +{ + assert(forBTDF); + return ir->getObjectPool().emplace(); +} + +auto CFrontendIR::COrenNayar::createIRNode(const bool forBTDF, const CFrontendIR* ast, CTrueIR* ir) const -> ir_contributor_handle_t +{ + if (ndParams.getDistribution()!=CTrueIR::SBasicNDFParams::EDistribution::Invalid) + return {}; + auto& irPool = ir->getObjectPool(); + const auto retval = irPool.emplace(); + if (auto* const contributor = irPool.deref(retval)) + contributor->ndfParams = ndParams; // the padding abuse is the same between the classes + return retval; +} + +auto CFrontendIR::CCookTorrance::createIRNode(const bool forBTDF, const CFrontendIR* ast, CTrueIR* ir) const -> ir_contributor_handle_t +{ + if (ndParams.getDistribution()>CTrueIR::SBasicNDFParams::EDistribution::Beckmann) + return {}; + auto& irPool = ir->getObjectPool(); + CTrueIR::typed_pointer_type etaH = {}; + if (forBTDF) + { + const auto* const srcEta = ast->getObjectPool().deref(orientedRealEta); + if (!srcEta) + return {}; + etaH = srcEta->createIRNode(ast,ir); + if (!etaH) + return {}; + } + const auto retval = irPool.emplace(); + if (auto* const ct=irPool.deref(retval); ct) + { + ct->ndfParams = ndParams; // the padding abuse is the same between the classes + ct->orientedRealEta = etaH; + } + return retval; } +template class CTrueIR::CSpectralVariable; } \ No newline at end of file diff --git a/src/nbl/asset/material_compiler3/CTrueIR.cpp b/src/nbl/asset/material_compiler3/CTrueIR.cpp new file mode 100644 index 0000000000..497bdf9f23 --- /dev/null +++ b/src/nbl/asset/material_compiler3/CTrueIR.cpp @@ -0,0 +1,340 @@ +// Copyright (C) 2022-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#define _NBL_ASSET_MATERIAL_COMPILER3_C_TRUE_IR_CPP_ +#include "nbl/asset/material_compiler3/CTrueIR.h" + +#include "nbl/builtin/hlsl/complex.hlsl" +#include "nbl/builtin/hlsl/portable/vector_t.hlsl" + + +namespace nbl::asset::material_compiler3 +{ +constexpr auto ELL_ERROR = nbl::system::ILogger::E_LOG_LEVEL::ELL_ERROR; +constexpr auto ELL_DEBUG = nbl::system::ILogger::E_LOG_LEVEL::ELL_DEBUG; +using namespace nbl::system; + +// TODO: Move these notes, they're for the backend. +// +// Any material (unless its emission only) requires a shading normal to be computed and kept, this should be the first register. +// Materials are primitive agnostic, don't assume triangles, so any attribute interpolation happens in Material System attribute getting callbacks. +// In practice most renderers will apply them on triangles, the advice is to keep triangle indices in own live state raw or compressed (uint32_t base, and then uint16_t diffs). +// To use the material outside of Raytracing or Visibility Buffer one could use HW interpolators, so all interpolated attributes will sit in global input variables. +// Whenever Material Compiler calls `getUV` or `getTBN` its the user's choice how to implement that. +// +// However whenever HW interpolators are not used, to compute a UV it costs: +// - 3x index buffer fetch (from primitive ID and index buffer BDA) +// - 3x dependent UV attribute fetch +// - 2 FMA per channel to interpolate +// And to compute a TBN its: +// - 3x index buffer fetch (from primitive ID and index buffer BDA) +// - 3x dependent fetch of Tangent (and optional Bitangent), or Rotation Around normal + scales +// - 2 FMA per channel to interpolate +// - 1 reconstruction of a TBN matrix (can be expensive, i.e. from a quaternion or include renormalizations of interpolated tangents) +// For this reason, it makes sense to spill interpolated UVs and Tangent+BitangentScale instead of recomputing. +// The material compiler will keep a bitmask of UVs and TBNs (TBN depends on UV set) already requested and will spill if user marks sets as expensive to recompute. +// +// If everyone is using the same TBN set, we could back up the interaction and sample because these are just V and L transformed into the TBN. +// In theory this requires us to use anisotropic interactions whenever the isotropic BRDF has a normal perturbation which seems like an overhead. +// However specular AA mitigations which derive roughness from per-pixel surface curvature and path regularizations would force this upon us anyway. +// Problem is that importance sampling requires us to transform a sampled tangent space L into worldspace and that requires knowing the TBN. +// I can see some optimizations for TBN calc where we only recompute the tangent space .xy of L and V for each TBN since N stays constant and NdotX2 can be known. +// Since we must have full TBN for anisotropic BxDF, tangent space V can be computed quicker than fetching it. +// +// Pertubed normal can be spilled or refetched. If it gets refetched some coefficients need to be recomputed which allow for taking dot products of V and L with it. +// If the BRDF is anisotropic, a small nested TBN needs construction so perturbed normal is 3 coefficients. +// Spilling as a float16_t3 or float16_t4 is identical memory traffic to a bilinear derivative map tap assuming UNORM8 storage. +// Tangents consistent with unperturbed TBN can be worked out as `B_p = normalize(cross(N_p,T))` and `T_p = cross(B_p,N_p)`, unless `N_p==T` then cross N_p with B instead. +// If tangents want to be rotated/explicitly controlled, then the spilled normal plus a rotation are still 3 coefficients if encoded as a quaternion (painful to decode) or 4 coeffs. +// +// Already computed shading normals and other BRDF parameters will be computed on demand but also cached (statically or dynamically) in registers. +// Register leftovers can be passed between command streams to not re-fetch or recompute BxDF parameters. +// Register allocation should spill whole quantities (all of RGB of a color or XYZ of a normal) not singular channels. +// Registers spilled should be the ones with least upcoming uses (should assume streams will run as AOV emission, AOV throughputs, Emission, NEE Eval, Generate, Quotient order) +// Relative cost to recompute or refetch should be included in the decision whether to keep around when register allocating. +// Perturbed Normals should be spilled to VRAM instead of recomputed as they require fetching at least a quaternion and a texture sample. +// Spill should be a big circular buffer allocated per-subgroup - hard to control in Pipeline Shaders https://github.com/KhronosGroup/Vulkan-Docs/issues/2717 +// So need to allocate worst case spill for all rays in a dispatch (although can do persistent threads and reduce the dispatch size a little) + + +CTrueIR::CTrueIR(creation_params_type&& params) : CNodePool(std::move(params)), m_basicNodes(this) +{ + reset(); +} + +CTrueIR::SBasicNodes::SBasicNodes(CTrueIR* ir) +{ + auto& pool = ir->getObjectPool(); + // + blackHoleBxDF = pool.emplace(); + { + auto* const node = pool.deref(blackHoleBxDF._const_cast()); + node->product = {}; + const bool success = node->recomputeHash(pool); + assert(success); + ir->m_uniqueNodes[node->getHash()] = blackHoleBxDF; + } + // + scalarNegation = pool.emplace(uint8_t(1)); + { + auto* const node = pool.deref(scalarNegation._const_cast()); + node->setParameter(0,{.scale=-1.f}); + const bool success = node->recomputeHash(pool); + assert(success); + ir->m_uniqueNodes[node->getHash()] = scalarNegation; + } + // we never compute the hashes on these ones, they're supposed to have invalid hash + errorLayer = pool.emplace(); + { + auto* const node = pool.deref(errorLayer._const_cast()); + node->brdfTop = errorBxDF; + node->firstTransmission = {}; + } + errorBxDF = pool.emplace(); + { + auto* const node = pool.deref(errorBxDF._const_cast()); + node->product = pool.emplace(); + { + auto* const weighted = pool.deref(node->product._const_cast()); + weighted->contributor = pool.emplace(); + const auto factorH = pool.emplace(uint8_t(3)); + { + auto* const factor = pool.deref(factorH); + // make a magenta constant color (can do checkerboard of green & magenta in the future with a small texture) + for (auto i=0; i<3; i++) + factor->setParameter(i,{.scale = i!=1 ? 1.f:0.f}); + } + weighted->factor = factorH; + } + node->rest = {}; + } +} + +void CTrueIR::SDotPrinter::operator()(std::ostringstream& output) +{ + output << "digraph {\n"; + + const auto errorBxDF = m_ir->getBasicNodes().errorBxDF; + const auto errorLayer = m_ir->getBasicNodes().errorLayer; + auto drainNodeStack = [&]()->void + { + while (!nodeStack.empty()) + { + const auto entry = nodeStack.back(); + // don't print null nodes + assert(entry); + nodeStack.pop_back(); + const auto nodeID = m_ir->getNodeID(entry); + const auto* node = m_ir->getObjectPool().deref(entry); + if (!node) + continue; + output << "\n\t" << m_ir->getLabelledNodeID(entry); + auto printChildren = [&](std::span> children, const INode* node)->void { + uint32_t childIx = 0u; + for (const auto childHandle : children) + { + if (const auto child = m_ir->getObjectPool().deref(childHandle); child) + { + output << "\n\t" << nodeID << " -> " << m_ir->getNodeID(childHandle) << "[label=\"" << node->getChildName_impl(childIx) << "\"]"; + const auto visited = visitedNodes.find(childHandle); + if (visited != visitedNodes.end()) + continue; + nodeStack.push_back(childHandle); + visitedNodes.insert(childHandle); + } + childIx++; + } + }; + switch (node->getFinalType()) + { + case INode::EFinalType::CFactorCombiner: + { + const auto* combiner = dynamic_cast(node); + const auto state = combiner->getState(); + const auto childCount = state.childCount; + if (childCount) + { + for (auto childIx = 0; childIx < childCount; childIx++) + { + const auto childHandle = combiner->getChildHandle(childIx); + if (const auto child = m_ir->getObjectPool().deref(childHandle); child) + { + output << "\n\t" << nodeID << " -> " << m_ir->getNodeID(childHandle); + const auto visited = visitedNodes.find(childHandle); + if (visited != visitedNodes.end()) + continue; + nodeStack.push_back(childHandle); + visitedNodes.insert(childHandle); + } + } + } + break; + } + case INode::EFinalType::CContributorSum: + { + const auto* contributeSum = dynamic_cast(node); + if (contributeSum) + { + typed_pointer_type children[] = {contributeSum->product, contributeSum->rest}; + printChildren(children, node); + } + break; + } + case INode::EFinalType::CCorellatedTransmission: + { + const auto* transmission = dynamic_cast(node); + if (transmission) + { + typed_pointer_type children[] = { transmission->btdf, transmission->brdfBottom, transmission->next }; + printChildren(children, node); + layerStack.push_back(transmission->coated); + } + break; + } + case INode::EFinalType::CWeightedContributor: + { + const auto* contributor = dynamic_cast(node); + if (contributor) + { + typed_pointer_type children[] = { contributor->contributor, contributor->factor }; + printChildren(children, node); + } + break; + } + case INode::EFinalType::CCookTorrance: + { + const auto* ct = dynamic_cast(node); + if (ct) + { + if (const auto eta = m_ir->getObjectPool().deref(ct->orientedRealEta); eta) + { + output << "\n\t" << nodeID << " -> " << m_ir->getNodeID(ct->orientedRealEta) << "[label=\"orientedRealEta\"]"; + const auto visited = visitedNodes.find(ct->orientedRealEta); + if (visited != visitedNodes.end()) + continue; + nodeStack.push_back(ct->orientedRealEta); + visitedNodes.insert(ct->orientedRealEta); + } + } + break; + } + default: + break; + } + // special printing + node->printDot(output, nodeID); + } + }; + drainNodeStack(); + + while (!layerStack.empty()) + { + const auto layerHandle = layerStack.back(); + layerStack.pop_back(); + // don't print layer nodes multiple times + const auto visited = visitedNodes.find(layerHandle); + if (visited != visitedNodes.end()) + continue; + visitedNodes.insert(layerHandle); + const auto* layerNode = m_ir->getObjectPool().deref(layerHandle); + if (!layerNode) + continue; + // + const auto layerID = m_ir->getNodeID(layerHandle); + output << "\n\t" << m_ir->getLabelledNodeID(layerHandle); + // + auto pushNodeRoot = [&](const typed_pointer_type root, const std::string_view edgeLabel)->void + { + if (!root) + return; + // print the link from the layer to the expression + output << "\n\t" << layerID << " -> " << m_ir->getNodeID(root) << "[label=\"" << edgeLabel << "\"]"; + // but not the expression again + const auto visited = visitedNodes.find(root); + if (visited != visitedNodes.end()) + return; + nodeStack.push_back(root); + visitedNodes.insert(root); + drainNodeStack(); + }; + pushNodeRoot(layerNode->brdfTop, "Top BRDF"); + pushNodeRoot(layerNode->firstTransmission, "Corellated Trans"); + } + + // TODO: print image views + + output << "\n}\n"; +} + +CTrueIR::SRewriteSession::~SRewriteSession() +{ + if (success) + return; + // if session was not a success, then clean up all the nodes + for (const auto& handle : createdNodes) + args.dst->getObjectPool()._delete(handle,1); +} + +bool CTrueIR::SRewriteSession::rewrite(typed_pointer_type& oriented) +{ + if (!success) + return false; + + success = args.src==args.dst; + // TODO: go through the layers +// assert(success); + return success; +} + +bool CTrueIR::SRewriteSession::rewriteSingleLayer(typed_pointer_type& oriented) +{ + if (!success) + return false; + // ideas for the pass: + // - skip replace delta transmissions by the layer undernearth, if null then keep as delta + // - if BTDF has delta transmissions, then via the sampling property hoist next layer into current layer BRDFs with the DeltaTransmission weights applied + // - order the `CWeightedContributor` within the `CContributorSum` linked list so emitters are first, and so on (null products go last) + // observations: + // - Any V-dependent factors cannot be commonalized across layers, most of the CSE has to be done within a layer + + // TODO: combine layer meta with `retval.metadata` + + // TODO: deduplicate, collect metadata and insert into current IR + + success = args.src==args.dst; + assert(success); + return success; +} + +void CTrueIR::ISpectralVariableFactor::printDot(std::ostringstream& sstr, const core::string& selfID) const +{ + printDotParameterSet(*pWonky(), getKnotCount(), sstr, selfID, {}); +} + +void CTrueIR::CEmitter::printDot(std::ostringstream& sstr, const core::string& selfID) const +{ + if (profile) + profile.printDot(sstr, selfID); + if (profile.view) + { + const auto transformNodeID = selfID + "_pTform"; + sstr << "\n\t" << transformNodeID << " [label=\""; + printMatrix(sstr, profileTransform); + sstr << "\"]"; + // connect up + sstr << "\n\t" << selfID << " -> " << transformNodeID << "[label=\"Profile Transform\"]"; + } +} + +void CTrueIR::COrenNayar::printDot(std::ostringstream& sstr, const core::string& selfID) const +{ + ndfParams.printDot(sstr, selfID); +} + +void CTrueIR::CCookTorrance::printDot(std::ostringstream& sstr, const core::string& selfID) const +{ + ndfParams.printDot(sstr, selfID); +} + +template class CTrueIR::CSpectralVariable; +} \ No newline at end of file diff --git a/src/nbl/ext/MitsubaLoader/CMitsubaLoader.cpp b/src/nbl/ext/MitsubaLoader/CMitsubaLoader.cpp index dac1d783d0..2beb94eeb8 100644 --- a/src/nbl/ext/MitsubaLoader/CMitsubaLoader.cpp +++ b/src/nbl/ext/MitsubaLoader/CMitsubaLoader.cpp @@ -271,8 +271,17 @@ SAssetBundle CMitsubaLoader::loadAsset(system::IFile* _file, const IAssetLoader: instances.getInitialTransforms()[index] = shape->getTransform(); // const core::string debugName = shape->id.empty() ? std::format("0x{:x}",ptrdiff_t(shape)):shape->id; - ctx.getMaterial(shape->bsdf,shape->obtainEmitter(),debugName,PrintMaterialDot3 ? m_system.get():nullptr); - // TODO: compile the material into the true IR and push it into the instances + const auto astRootH = ctx.getMaterial(shape->bsdf,shape->obtainEmitter(),debugName,PrintMaterialDot3 ? m_system.get():nullptr); + // TODO: get the output slot for the material in the ICPUScene from `instances.getMaterialTables()` + material_compiler3::CTrueIR::SMaterialHandle rMaterialHandle = {}; // instances.getMaterialTables() + if (astRootH) + { + // we can't batch because of how the parser works + const auto addedCount = ctx.getMaterialFrontend()->addMaterials({.rootNodes={&astRootH,1},.ir=ctx.scene->getMaterialPool(),.result=&rMaterialHandle,.logger=_params.logger}); + // we could check `addedCount` but the function above does its own logging + } + else // TODO: make a special Error material + rMaterialHandle = material_compiler3::CTrueIR::BlackholeMaterialHandle; }; // first go over all actually used shapes which are not shapegroups (regular shapes and instances) @@ -385,7 +394,8 @@ auto SContext::getMaterial( } else root->brdfTop = foundEmitter->second._const_cast(); - const bool success = frontIR->addMaterial(rootH,inner.params.logger); + // + const bool success = frontIR->valid(rootH,inner.params.logger); auto logger = inner.params.logger; if (!success) @@ -398,13 +408,14 @@ auto SContext::getMaterial( const frontend_ir_t::typed_pointer_type constRootH = rootH; frontend_ir_t::SDotPrinter printer = {frontIR.get(),{&constRootH,1}}; writeDot3File(debugFileWriter,DebugDir/"material_frontend"/(debugName+".dot"),printer); + debugAllMaterials.push_back(constRootH); } return rootH; } -using parameter_t = asset::material_compiler3::CFrontendIR::SParameter; +using parameter_t = asset::material_compiler3::CTrueIR::SParameter; parameter_t SContext::getTexture(const CElementTexture* const rootTex, hlsl::float32_t2x3* outUvTransform) { parameter_t retval = {}; @@ -494,6 +505,7 @@ parameter_t SContext::getTexture(const CElementTexture* const rootTex, hlsl::flo } +using spectral_var_t = asset::material_compiler3::CFrontendIR::CSpectralVariableExpr; template void getParameters(const std::span out, const hlsl::vector value) { @@ -505,6 +517,21 @@ void getParameters(const std::span out, const float32_t value) for (auto it=out.begin(); it!=out.end(); it++) it->scale = value; } +template requires hlsl::concepts::FloatingPoint +void getParameters(spectral_var_t* const out, const T value) +{ + out->setSemantics(spectral_var_t::ESemantics::Fixed3_SRGB); + for (uint8_t i=0; igetKnotCount(); i++) + { + parameter_t param = {}; + if constexpr (hlsl::is_vector_v) + getParameters({¶m,1},value[i]); + else + getParameters({¶m,1},value); + out->setParameter(i,std::move(param)); + } +} + hlsl::float32_t2x3 SContext::getParameters(const std::span out, const CElementTexture::SpectrumOrTexture& src) { auto retval = hlsl::math::linalg::diagonal(0.f); @@ -545,6 +572,16 @@ hlsl::float32_t2x3 SContext::getParameters(const std::span out, c } return retval; } +void SContext::getParameters(spectral_var_t* const out, const CElementTexture::SpectrumOrTexture& src) +{ + assert(out->getKnotCount()==3); + out->setSemantics(spectral_var_t::ESemantics::Fixed3_SRGB); + parameter_t params[3] = {}; + out->uvTransform() = getParameters(params,src); + for (uint8_t i=0; igetKnotCount(); i++) + out->setParameter(i,std::move(params[i])); +} + hlsl::float32_t2x3 SContext::getParameters(const std::span out, const CElementTexture::FloatOrTexture& src) { auto retval = hlsl::math::linalg::diagonal(0.f); @@ -564,8 +601,15 @@ hlsl::float32_t2x3 SContext::getParameters(const std::span out, con MitsubaLoader::getParameters(out,src.value); return retval; } +void SContext::getParameters(spectral_var_t* const out, const CElementTexture::FloatOrTexture& src) +{ + out->setSemantics(spectral_var_t::ESemantics::Fixed3_SRGB); + parameter_t param = {}; + out->uvTransform() = getParameters({¶m,1},src); + for (uint8_t i=0; igetKnotCount(); i++) + out->setParameter(i,std::move(param)); +} -using spectral_var_t = asset::material_compiler3::CFrontendIR::CSpectralVariable; auto SContext::genEmitter(const CElementEmitter* _emitter, system::ISystem* debugFileWriter) -> frontend_emitter_t { auto& frontPool = frontIR->getObjectPool(); @@ -590,26 +634,22 @@ auto SContext::genEmitter(const CElementEmitter* _emitter, system::ISystem* debu emitter->profileTransform = hlsl::math::linalg::truncate<3,3>(_emitter->transform.matrix); } } - { - spectral_var_t::SCreationParams<3> params = {}; - // if you wanted a textured emitter, this would be the place to do it - MitsubaLoader::getParameters<3>(params.knots.params,_emitter->area.radiance); - params.getSemantics() = spectral_var_t::Semantics::Fixed3_SRGB; - mul->rhs = frontPool.emplace(std::move(params)); - } + const auto varH = frontPool.emplace(3); + MitsubaLoader::getParameters(frontPool.deref(varH),_emitter->area.radiance); + mul->rhs = varH; if (debugFileWriter) { frontend_ir_t::SDotPrinter printer = {frontIR.get()}; - printer.exprStack.push(handle); + printer.exprStack.push_back(handle); writeDot3File(debugFileWriter,DebugDir/"material_frontend/emitters"/(debugName+".dot"),printer); } return handle; } -auto SContext::genProfile(const CElementEmissionProfile* profile) -> frontend_ir_t::SParameter +auto SContext::genProfile(const CElementEmissionProfile* profile) -> true_ir_t::SParameter { - frontend_ir_t::SParameter retval = {}; + true_ir_t::SParameter retval = {}; // load it! using namespace nbl::asset; const CIESProfileMetadata* iesMeta = nullptr; @@ -655,13 +695,12 @@ auto SContext::genMaterial(const CElementBSDF* bsdf, system::ISystem* debugFileW auto logger = inner.params.logger; const core::string debugName = bsdf->id.empty() ? std::format("0x{:x}",ptrdiff_t(bsdf)):bsdf->id; - auto createFactorNode = [&](const CElementTexture::SpectrumOrTexture& factor, const ECommonDebug debug)->auto + auto createFactorNode = [&](const CElementTexture::SpectrumOrTexture& _factor, const ECommonDebug debug)->auto { - spectral_var_t::SCreationParams<3> params = {}; - params.knots.uvTransform = getParameters(params.knots.params,factor); - params.getSemantics() = spectral_var_t::Semantics::Fixed3_SRGB; - const auto factorH = frontPool.emplace(std::move(params)); - frontPool.deref(factorH)->debugInfo = commonDebugNames[uint16_t(debug)]._const_cast(); + const auto factorH = frontPool.emplace(3); + auto* const factor = frontPool.deref(factorH); + getParameters(factor,_factor); + factor->debugInfo = commonDebugNames[uint16_t(debug)]._const_cast(); return factorH; }; @@ -678,7 +717,7 @@ auto SContext::genMaterial(const CElementBSDF* bsdf, system::ISystem* debugFileW auto* mul = frontPool.deref(mulH); const auto ctH = frontPool.emplace(); { - using ndf_e = frontend_ir_t::CCookTorrance::NDF; + using ndf_e = true_ir_t::SBasicNDFParams::EDistribution; constexpr ndf_e ndfMap[4] = { ndf_e::Beckmann, ndf_e::GGX, @@ -690,7 +729,7 @@ auto SContext::genMaterial(const CElementBSDF* bsdf, system::ISystem* debugFileW if (base) { // ct->orientedRealEta gets set in the fresnel part - ct->ndf = ndfMap[base->distribution]; + ct->ndParams.getDistribution() = ndfMap[base->distribution]; // this function sets both roughnesses to same alpha ct->ndParams.uvTransform = getParameters(roughness,base->alpha); if (base->alphaV.texture || !hlsl::isnan(base->alphaV.value)) @@ -816,14 +855,13 @@ auto SContext::genMaterial(const CElementBSDF* bsdf, system::ISystem* debugFileW roughness[0].scale = roughness[1].scale = 0.f; mul->lhs = orenNayarH; } + const auto albedoH = frontPool.emplace(3); { - spectral_var_t::SCreationParams<3> params = {}; - params.knots.uvTransform = getParameters(params.knots.params,_bsdf->diffuse.reflectance); - params.getSemantics() = spectral_var_t::Semantics::Fixed3_SRGB; - const auto albedoH = frontPool.emplace(std::move(params)); - frontPool.deref(albedoH)->debugInfo = commonDebugNames[uint16_t(ECommonDebug::Albedo)]._const_cast(); - mul->rhs = albedoH; + auto* const albedo = frontPool.deref(albedoH); + getParameters(albedo,_bsdf->diffuse.reflectance); + albedo->debugInfo = commonDebugNames[uint16_t(ECommonDebug::Albedo)]._const_cast(); } + mul->rhs = albedoH; } leaf->brdfTop = roughDiffuseH; break; @@ -855,18 +893,16 @@ auto SContext::genMaterial(const CElementBSDF* bsdf, system::ISystem* debugFileW auto* const fresnel = frontPool.deref(fresnelH); const float extEta = _bsdf->conductor.extEta; { - spectral_var_t::SCreationParams<3> params = {}; + const auto varH = frontPool.emplace(1); const hlsl::float32_t3 eta = _bsdf->conductor.eta.vvalue.xyz; - MitsubaLoader::getParameters<3>(params.knots.params,eta/extEta); - params.getSemantics() = spectral_var_t::Semantics::Fixed3_SRGB; - fresnel->orientedRealEta = frontPool.emplace(std::move(params)); + MitsubaLoader::getParameters(frontPool.deref(varH),eta/extEta); + fresnel->orientedRealEta = varH; } { - spectral_var_t::SCreationParams<3> params = {}; + const auto varH = frontPool.emplace(1); const hlsl::float32_t3 k = _bsdf->conductor.k.vvalue.xyz; - MitsubaLoader::getParameters<3>(params.knots.params,k/extEta); - params.getSemantics() = spectral_var_t::Semantics::Fixed3_SRGB; - fresnel->orientedImagEta = frontPool.emplace(std::move(params)); + MitsubaLoader::getParameters(frontPool.deref(varH),k/extEta); + fresnel->orientedImagEta = varH; } } else @@ -947,11 +983,9 @@ auto SContext::genMaterial(const CElementBSDF* bsdf, system::ISystem* debugFileW const CElementTexture::FloatOrTexture constZero = {0.f}; mul->lhs = createOrenNayar(_bsdf->difftrans.transmittance,constZero,constZero); // normalize the Oren Nayar over the full sphere - { - spectral_var_t::SCreationParams<1> params = {}; - params.knots.params[0].scale = 0.5f; - mul->rhs = frontPool.emplace(std::move(params)); - } + const auto varH = frontPool.emplace(1); + frontPool.deref(varH)->setParameter(0,{.scale=0.5f}); + mul->rhs = varH; } leaf->brdfTop = diffTransH; leaf->btdf = diffTransH; @@ -1047,10 +1081,9 @@ auto SContext::genMaterial(const CElementBSDF* bsdf, system::ISystem* debugFileW const auto beerH = hasExtinction ? frontPool.emplace():frontend_ir_t::typed_pointer_type{}; if (auto* const beer=frontPool.deref(beerH); beer) { - spectral_var_t::SCreationParams<3> params = {}; - params.knots.uvTransform = getParameters(params.knots.params,sigmaA); - params.getSemantics() = spectral_var_t::Semantics::Fixed3_SRGB; - beer->perpTransmittance = frontPool.emplace(std::move(params)); + const auto varH = frontPool.emplace(1); + getParameters(frontPool.deref(varH),sigmaA); + beer->perpTransmittance = varH; } fillCoatingLayer(coating,_bsdf->coating,_bsdf->type==CElementBSDF::ROUGHCOATING,beerH); // attach the nested as layer @@ -1091,19 +1124,18 @@ auto SContext::genMaterial(const CElementBSDF* bsdf, system::ISystem* debugFileW auto* const mix = frontPool.deref(mixH); const uint8_t realChildCount = hlsl::min(childCount,_bsdf->mixturebsdf.weightCount); const auto firstH = realChildCount>=1 ? getChildFromCache(_bsdf->mixturebsdf.bsdf[0]):frontend_ir_t::typed_pointer_type{}; - spectral_var_t::SCreationParams<1> firstParams = {}; - firstParams.knots.params[0].scale = _bsdf->mixturebsdf.weights[0]; - const auto firstWeightH = frontPool.emplace(std::move(firstParams)); + const auto firstWeightH = frontPool.emplace(1); + frontPool.deref(firstWeightH)->setParameter(0,{.scale=_bsdf->mixturebsdf.weights[0]}); if (realChildCount<2) createWeightedSum(mix,firstH,firstWeightH,{},{}); else for (uint8_t c=1; cmixturebsdf.bsdf[c]); - spectral_var_t::SCreationParams<1> params = {}; - params.knots.params[0].scale = _bsdf->mixturebsdf.weights[c]; + const auto weightH = frontPool.emplace(1); + frontPool.deref(weightH)->setParameter(0,{.scale=_bsdf->mixturebsdf.weights[c]}); // don't feel like spending time on this, since its never used, trust CTrueIR optimizer to remove this during canonicalization - createWeightedSum(mix,mixH,unityFactor._const_cast(),otherH,frontPool.emplace(std::move(params))); + createWeightedSum(mix,mixH,unityFactor._const_cast(),otherH,weightH); } newMaterialH = mixH; break; @@ -1172,7 +1204,7 @@ auto SContext::genMaterial(const CElementBSDF* bsdf, system::ISystem* debugFileW auto* const root = frontPool.deref(rootH); root->debugInfo = frontPool.emplace(debugName); - const bool success = frontIR->addMaterial(rootH,inner.params.logger); + const bool success = frontIR->valid(rootH,inner.params.logger); if (!success) { logger.log("Failed to add Material for %s",LoggerError,debugName.c_str()); @@ -1281,8 +1313,7 @@ SContext::SContext( ) : inner(_ctx), override_(_override), meta(_metadata) //,ir(core::make_smart_refctd_ptr()), frontend(this) { - auto materialPool = material_compiler3::CTrueIR::create({.composed={.blockSizeKBLog2=4}}); - scene = ICPUScene::create(core::smart_refctd_ptr(materialPool)); // TODO: feed it max shapes per group + scene = ICPUScene::create(material_compiler3::CTrueIR::create({.composed={.blockSizeKBLog2=4}})); // TODO: feed it max shapes per group // { frontIR = frontend_ir_t::create({.composed={.blockSizeKBLog2=4}}); @@ -1302,19 +1333,17 @@ SContext::SContext( // { { - spectral_var_t::SCreationParams<1> params = {}; - params.knots.params[0].scale = 1.f; - unityFactor = frontPool.emplace(std::move(params)); + unityFactor = frontPool.emplace(1); + frontPool.deref(unityFactor._const_cast())->setParameter(0,{.scale=1.f}); } deltaTransmission = frontPool.emplace(); const auto mulH = frontPool.emplace(); { auto* const mul = frontPool.deref(mulH); mul->lhs = frontPool.emplace(); - spectral_var_t::SCreationParams<3> params = {}; - MitsubaLoader::getParameters<3>(params.knots.params,{1.f,0.f,1.f}); - params.getSemantics() = spectral_var_t::Semantics::Fixed3_SRGB; - mul->rhs = frontPool.emplace(std::move(params)); + const auto varH = frontPool.emplace(3); + MitsubaLoader::getParameters(frontPool.deref(varH),float32_t3(1.f,0.f,1.f)); + mul->rhs = varH; } errorBRDF = mulH; auto constructUnsupported = [&](const std::string_view debugName)->auto