From 1f3fed4ac4161f56c88a9e310c4611d8c29c2f50 Mon Sep 17 00:00:00 2001 From: devshgraphicsprogramming Date: Thu, 16 Apr 2026 15:09:27 +0200 Subject: [PATCH 1/7] factorize out a bunch of stuff, make the Spherical Rectangle Solid Angle Sampler global like its Triangle counterpart, we need to keep the basis around --- examples_tests | 2 +- .../hlsl/sampling/spherical_rectangle.hlsl | 116 ++++++++++++------ .../hlsl/shapes/spherical_rectangle.hlsl | 5 +- 3 files changed, 86 insertions(+), 37 deletions(-) diff --git a/examples_tests b/examples_tests index 2fd21fd891..5eeb47351e 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 2fd21fd8917660d2a559b97d80afa95dd6b591d5 +Subproject commit 5eeb47351e29cb6ee02f8d8319f131a2c012b5a2 diff --git a/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl b/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl index 131cc92d70..8029205384 100644 --- a/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl +++ b/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl @@ -26,6 +26,7 @@ struct SphericalRectangle using vector2_type = vector; using vector3_type = vector; using vector4_type = vector; + using matrix3x3_type = matrix; // BackwardTractableSampler concept types using domain_type = vector2_type; @@ -35,16 +36,15 @@ struct SphericalRectangle struct cache_type {}; - NBL_CONSTEXPR_STATIC_INLINE scalar_type ClampEps = 1e-5; - static SphericalRectangle create(NBL_CONST_REF_ARG(shapes::SphericalRectangle) rect, const vector3_type observer) { - return create(rect.solidAngle(observer), rect.extents); + return create(rect.basis,rect.solidAngle(observer),rect.extents); } - static SphericalRectangle create(NBL_CONST_REF_ARG(typename shapes::SphericalRectangle::solid_angle_type) sa, const vector2_type _extents) + static SphericalRectangle create(const matrix3x3_type _basis, NBL_CONST_REF_ARG(typename shapes::SphericalRectangle::solid_angle_type) sa, const vector2_type _extents) { SphericalRectangle retval; + retval.basis = _basis; retval.r0 = sa.r0; retval.extents = _extents; @@ -62,7 +62,7 @@ struct SphericalRectangle // Create directly from a local-frame corner position and rectangle extents. // Use when you already know r0 (e.g. from a gnomonic projection) and don't // need the shapes::SphericalRectangle + solidAngle(observer) roundtrip. - static SphericalRectangle create(const vector3_type _r0, const vector2_type _extents) + static SphericalRectangle create(const matrix3x3_type _basis, const vector3_type _r0, const vector2_type _extents) { // Same math as shapes::SphericalRectangle::solidAngle() but without // the mul(basis, origin - observer) step since we already have r0. @@ -82,13 +82,22 @@ struct SphericalRectangle acc.addCosine(sa.cosGamma[3]); sa.value = acc.getSumOfArccos() - scalar_type(2.0) * numbers::pi; - return create(sa, _extents); + return create(_basis, sa, _extents); } // shared core of generate and generateSurfaceOffset // returns (xu, hv, d) packed into a vector3; caller derives either 2D offset or 3D direction - vector3_type __generate(const domain_type u) NBL_CONST_MEMBER_FUNC + struct SCommonGen + { + scalar_type xu; + scalar_type d2; + scalar_type hv; + scalar_type cosElevation2; + }; + SCommonGen __generate(const domain_type u) NBL_CONST_MEMBER_FUNC { + SCommonGen retval; + // algorithm needs r0.z < 0; use -abs(r0.z) without storing the flip const scalar_type negAbsR0z = -hlsl::abs(r0.z); const scalar_type r0zSq = r0.z * r0.z; @@ -103,44 +112,80 @@ struct SphericalRectangle const scalar_type rcpCu_2 = hlsl::max(absNegFu * absNegFu + b0 * b0, scalar_type(1.0)); // sign(negFu) = sign(numerator) * sign(sin(au)); sin(au) < 0 iff au > PI const scalar_type negFuSign = hlsl::select((au > numbers::pi) != (numerator < scalar_type(0.0)), scalar_type(-1.0), scalar_type(1.0)); - scalar_type xu = negAbsR0z * negFuSign * hlsl::rsqrt(rcpCu_2 - scalar_type(1.0)); - xu = hlsl::clamp(xu, r0.x, r1.x); // avoid Infs - const scalar_type d_2 = xu * xu + r0zSq; - const scalar_type d = hlsl::sqrt(d_2); + retval.xu = negAbsR0z * negFuSign * hlsl::rsqrt(rcpCu_2 - scalar_type(1.0)); + retval.xu = hlsl::clamp(retval.xu, r0.x, r1.x); // avoid Infs + // TODO: see if we can compute this direct from definition of `d_2` ignoring the clamp on `xu` and `cosElevation2` and reclamping this result in a different way + // with `d2 = r0zSq*(rcpCu_2*rcpCu_2-rcpCu_2)` or `d2 = r0zSq*rcpCu_2*(rcpCu_2-1)` + retval.d2 = retval.xu * retval.xu + r0zSq; - const scalar_type h0 = r0.y * hlsl::rsqrt(d_2 + r0.y * r0.y); - const scalar_type h1 = r1.y * hlsl::rsqrt(d_2 + r1.y * r1.y); - const scalar_type hv = h0 + u.y * (h1 - h0); + const scalar_type h0 = r0.y * hlsl::rsqrt(retval.d2 + r0.y * r0.y); + const scalar_type h1 = r1.y * hlsl::rsqrt(retval.d2 + r1.y * r1.y); + retval.hv = h0 + u.y * (h1 - h0); + retval.cosElevation2 = scalar_type(1.0) - hlsl::min(retval.hv * retval.hv,1); - return vector3_type(xu, hv, d); + return retval; } // returns a normalized 3D direction in the local frame with correct r0.z sign + vector3_type generateNormalizedLocal(const domain_type u, NBL_REF_ARG(cache_type) cache, NBL_REF_ARG(scalar_type) hitDist) NBL_CONST_MEMBER_FUNC + { + const SCommonGen core = __generate(u); + scalar_type cosElevationOverD = hlsl::rsqrt(core.d2/core.cosElevation2); + // TODO: or shall we do some other more sophisticated clamp or correction? Is this even the right one to use? + cosElevationOverD = hlsl::select(hlsl::isnan(cosElevationOverD),1.f,cosElevationOverD); + + // TODO: investigate if due to precision we need to compute this as a `sqrt` then the quantity being computed is `core.cosElevation2/core.d2` + // which what alss `generateLocalBasisXY` needs, in which case `__generate` can already compute it and return it as `hitDist2` + hitDist = 1.f/cosElevationOverD; + + const vector3_type retval = vector3_type(core.xu / hitDist, core.hv, r0.z / hitDist); + assert(!hlsl::isnan(computeHitT(retval))); + return retval; + } + codomain_type generate(const domain_type u, NBL_REF_ARG(cache_type) cache) NBL_CONST_MEMBER_FUNC { - const vector3_type core = __generate(u); - const scalar_type xu = core.x; - const scalar_type hv = core.y; - const scalar_type d = core.z; - const scalar_type hv2 = hv * hv; - const scalar_type cosElevation = hlsl::sqrt(hlsl::max(scalar_type(1.0) - hv2, scalar_type(0.0))); - const scalar_type rcpD = scalar_type(1.0) / d; - - return vector3_type(xu * cosElevation * rcpD, hv, r0.z * cosElevation * rcpD); + scalar_type dummy; + const vector3_type localL = generateNormalizedLocal(u,cache,dummy); + // could return `hlsl::mul(hlsl::tranpose(basis),localL)` or just this + return basis[0]*localL[0]+basis[1]*localL[1]+basis[2]*localL[2]; } - // returns a 2D offset on the rectangle surface from the r0 corner - vector2_type generateSurfaceOffset(const domain_type u, NBL_REF_ARG(cache_type) cache) NBL_CONST_MEMBER_FUNC + // utility to determine maxT for a ray from L shot from the origin which we're sure intersects the rectangle + scalar_type computeHitT(const vector3_type L) { - const vector3_type core = __generate(u); - const scalar_type xu = core.x; - const scalar_type hv = core.y; - const scalar_type d = core.z; + const scalar_type retval = hlsl::abs(r0.z/hlsl::dot(L,basis[2])); + { + const vector3_type hitPointRelative = L*retval; + const vector2_type uv = mul(basis,L).xy-r0.xy; + assert(uv[0]>=0.0 && uv[0]<=rectExtents[0]); + assert(uv[1]>=0.0 && uv[1]<=rectExtents[1]); + } + return retval; + } + + // returns a 2D offset on the rectangle surface including the r0 corner -useful for generating unnormalized worldspace L + vector2_type generateLocalBasisXY(const domain_type u, NBL_REF_ARG(cache_type) cache) NBL_CONST_MEMBER_FUNC + { + const SCommonGen core = __generate(u); const scalar_type r1y = r0.y + extents.y; - const scalar_type hv2 = hv * hv; - const scalar_type yv = hlsl::mix(r1y, (hv * d) / hlsl::sqrt(scalar_type(1.0) - hv2), hv2 < scalar_type(1.0) - ClampEps); + // TODO: see if we can compute this direct from definition of `d_2` ignoring the clamp on `xu` and `cosElevation2` + const scalar_type yv = core.hv * hlsl::rsqrt(core.cosElevation2/core.d2); + + // fun fact, when one of the operands to min or max is NaN, the SPIR-V builtin will select the other one + // TODO: maybe try just `min(yv,ry1)` + const vector2_type retval = vector2_type(core.xu,hlsl::clamp(yv,r0.y,r1y)); + assert(retval[0] >= r0.x && retval[1] >= r0.y); + assert(retval[0] <= r0.x+rectExtents[0] && retval[1] <= r0.y+rectExtents[1]); + return retval; + } - return vector2_type((xu - r0.x), (yv - r0.y)); + // its basically the hitpoint minus the observer origin + codomain_type generateUnnormalized(const domain_type u, NBL_REF_ARG(cache_type) cache) NBL_CONST_MEMBER_FUNC + { + const vector2_type localXY = generateLocalBasisXY(u,cache); + // the `localXY` already contains r0.xy + return basis[0]*localXY[0]+basis[1]*localXY[1]+basis[2]*r0.z; } density_type forwardPdf(const domain_type u, const cache_type cache) NBL_CONST_MEMBER_FUNC @@ -163,12 +208,13 @@ struct SphericalRectangle return backwardPdf(L); } + matrix3x3_type basis; + vector3_type r0; + vector2_type extents; scalar_type solidAngle; scalar_type k; scalar_type b0; scalar_type b1; - vector3_type r0; - vector2_type extents; }; } // namespace sampling diff --git a/include/nbl/builtin/hlsl/shapes/spherical_rectangle.hlsl b/include/nbl/builtin/hlsl/shapes/spherical_rectangle.hlsl index 5895e4bc80..bc864f7a19 100644 --- a/include/nbl/builtin/hlsl/shapes/spherical_rectangle.hlsl +++ b/include/nbl/builtin/hlsl/shapes/spherical_rectangle.hlsl @@ -102,7 +102,8 @@ struct SphericalRectangle retval.basis[0] = compressed.right / retval.extents[0]; retval.basis[1] = compressed.up / retval.extents[1]; assert(hlsl::abs(hlsl::dot(retval.basis[0], retval.basis[1])) < scalar_type(1e-5)); - retval.basis[2] = hlsl::normalize(hlsl::cross(retval.basis[0], retval.basis[1])); + // don't normalize, the `right` and `up` vectors are orthogonal and the two bases are normalized already! + retval.basis[2] = hlsl::cross(retval.basis[0], retval.basis[1]); return retval; } @@ -184,6 +185,8 @@ struct SphericalRectangle // rcpLen[i]*rcpLen[j] for adjacent pairs: (0,1), (1,2), (2,3), (3,0) const vector4_type cos_sides = unnormDots * rcpLen * rcpLen.yzwx; + // TODO: there's the same opportunity for optimization of this as the Spherical Triangle + // https://www.linkedin.com/posts/matt-kielan-9b054a165_untitled-graph-activity-7442910005671923712-jHz6?utm_source=share&utm_medium=member_desktop&rcm=ACoAACdp2RQBqq2bJfC2zxpsme-vRv2zh9oP-8E const vector4_type pyramidAngles = hlsl::acos(cos_sides); return hlsl::dot(pyramidAngles, externalProducts) * scalar_type(0.5); From 1df4116e33dfb26f6a8fc81b109b8f3c27a97146 Mon Sep 17 00:00:00 2001 From: devshgraphicsprogramming Date: Thu, 16 Apr 2026 23:14:57 +0200 Subject: [PATCH 2/7] epic optimization! --- .../hlsl/shapes/spherical_triangle.hlsl | 52 ++++++++++++------- 1 file changed, 34 insertions(+), 18 deletions(-) diff --git a/include/nbl/builtin/hlsl/shapes/spherical_triangle.hlsl b/include/nbl/builtin/hlsl/shapes/spherical_triangle.hlsl index 1a5681b39e..4ebe52d312 100644 --- a/include/nbl/builtin/hlsl/shapes/spherical_triangle.hlsl +++ b/include/nbl/builtin/hlsl/shapes/spherical_triangle.hlsl @@ -20,6 +20,26 @@ namespace hlsl namespace shapes { +// TODO: move to where fast_acos lives +template +T acos_csc_approx(const T arg) +{ + const T u = hlsl::log2(_static_cast(1)+arg); + // The curve fit "revealed in a dream" to me is `exp2(F(log2(x+1)))` where `F(u)` is a polynomial + // I have a feeling that a polynomial of ((Au+B)u+C)u+D could be sufficient if it has following properties: + // `F(0) = 0` and + // `F(u) <= log2(\frac{\cos^{-1}\left(2^{x}-1\right)}{\sqrt{1-\left(2^{x}-1\right)^{2}}})` because you want to consistently under-estimate the Projected Solid Angle to avoid creating energy + // See https://www.desmos.com/calculator/sdptomhbju + // Furthermore we could clip the polynomial calc to `Cu+D or `(Bu+C)u+D` for small arguments + T poly; + // TODO: actually optimize these constants in real world scenarios (renders) + if (order==1) + poly = (_static_cast(1)-u)*_static_cast(0.6); + else if (order==2) + poly = (_static_cast(1)-u)*_static_cast(0.637)+(_static_cast(1) - u * u) * _static_cast(0.0115); + return hlsl::exp2(poly); +} + template struct SphericalTriangle { @@ -87,34 +107,30 @@ struct SphericalTriangle return 0; matrix awayFromEdgePlane; - awayFromEdgePlane[0] = hlsl::cross(vertices[1], vertices[2]) * csc_sides[0]; - awayFromEdgePlane[1] = hlsl::cross(vertices[2], vertices[0]) * csc_sides[1]; - awayFromEdgePlane[2] = hlsl::cross(vertices[0], vertices[1]) * csc_sides[2]; + // `cross(A,B)*acos(dot(A,B))/sin(1-dot^2)` can be done with `cross(A,B)*acos_csc_approx(dot(A,B))` +#define ACOS_CSC(I) acos_csc_approx(cos_sides[I]) +//#define ACOS_CSC(I) hlsl::acos(cos_sides[I])*csc_sides[I] + awayFromEdgePlane[0] = hlsl::cross(vertices[1], vertices[2]) * ACOS_CSC(0); + awayFromEdgePlane[1] = hlsl::cross(vertices[2], vertices[0]) * ACOS_CSC(1); + awayFromEdgePlane[2] = hlsl::cross(vertices[0], vertices[1]) * ACOS_CSC(2); +#undef ACOS_CSC + const vector3_type externalProductsWeightedByPyramidAngles = hlsl::mul(/* transposed already */awayFromEdgePlane, receiverNormal); + // The ABS makes it so that the computation is correct for an `abs(cos(theta))` factor which is the projected solid angle used for a BSDF. + // It also makes the computation insensitive to the CW or CCW winding of the vertices in the triangle. // Proof: Kelvin-Stokes theorem, if you split the set into two along the horizon with constant CCW winding, the `cross` along the shared edge // goes in different directions and cancels out, while `acos` of the clipped great arcs corresponding to polygon edges add up to the original sides again. - const vector3_type externalProducts = hlsl::abs(hlsl::mul(/* transposed already */awayFromEdgePlane, receiverNormal)); - - // Far TODO: `cross(A,B)*acos(dot(A,B))/sin(1-dot^2)` can be done with `cross*acos_csc_approx(dot(A,B))` - // We could skip the `csc_sides` factor, and computing `pyramidAngles` and replace them with this approximation weighting before the dot product with the receiver notmal - // The curve fit "revealed in a dream" to me is `exp2(F(log2(x+1)))` where `F(u)` is a polynomial, so far I've calculated `F = (1-u)0.635+(1-u^2)0.0118` which gives <5% error until 165 degrees - // I have a feeling that a polynomial of ((Au+B)u+C)u+D could be sufficient if it has following properties: - // `F(0) = 0` and - // `F(u) <= log2(\frac{\cos^{-1}\left(2^{x}-1\right)}{\sqrt{1-\left(2^{x}-1\right)^{2}}})` because you want to consistently under-estimate the Projected Solid Angle to avoid creating energy - // See https://www.desmos.com/calculator/sdptomhbju - // Furthermore we could clip the polynomial calc to `Cu+D or `(Bu+C)u+D` for small arguments - const vector3_type pyramidAngles = hlsl::acos(cos_sides); - // So that triangle covering almost whole hemisphere sums to PI - return hlsl::dot(pyramidAngles, externalProducts) * scalar_type(0.5); + // The 0.5 is so that triangle covering almost whole hemisphere sums to PI + return hlsl::abs(externalProductsWeightedByPyramidAngles[0]+externalProductsWeightedByPyramidAngles[1]+externalProductsWeightedByPyramidAngles[2]) * scalar_type(0.5); } vector3_type vertices[3]; // angles of vertices with origin, so the sides are INSIDE the sphere vector3_type cos_sides; - vector3_type csc_sides; + vector3_type csc_sides; // TODO: spherical triangle sampling only needs `csc_sides[1]` and possibly `csc_sides[2]` // angles between arcs on the sphere, so angles in the TANGENT plane at each vertex vector3_type cos_vertices; - vector3_type sin_vertices; + vector3_type sin_vertices; // TODO: spherical triangle sampling only needs `sin_vertices[0]` a.k.a `sinA` scalar_type solid_angle; }; From a3ed91a1b8354162027adfb2370820299bdbcf2e Mon Sep 17 00:00:00 2001 From: devshgraphicsprogramming Date: Fri, 17 Apr 2026 02:00:44 +0200 Subject: [PATCH 3/7] `UsePdfAsWeight==false` implementation was 100% wrong --- .../projected_spherical_triangle.hlsl | 59 +++++++++---------- 1 file changed, 27 insertions(+), 32 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl b/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl index c4bc5fcea8..923fc6b537 100644 --- a/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl +++ b/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl @@ -47,73 +47,68 @@ struct ProjectedSphericalTriangle struct cache_type { - scalar_type abs_cos_theta; - vector2_type warped; typename Bilinear::cache_type bilinearCache; + vector3_type L; // TODO: erase when UsePdfAsWeight==false }; - // NOTE: produces a degenerate (all-zero) bilinear patch when the receiver normal faces away - // from all three triangle vertices, resulting in NaN PDFs (0 * inf). Callers must ensure - // at least one vertex has positive projection onto the receiver normal. + // Shouldn't produce NAN if all corners have 0 proj solid angle due to min density adds/clamps in the linear sampler static ProjectedSphericalTriangle create(NBL_REF_ARG(shapes::SphericalTriangle) shape, const vector3_type _receiverNormal, const bool _receiverWasBSDF) { ProjectedSphericalTriangle retval; - retval.sphtri = SphericalTriangle::create(shape); - retval.receiverNormal = _receiverNormal; - retval.receiverWasBSDF = _receiverWasBSDF; + retval.sphtri = SphericalTriangle::create(shape); const scalar_type minimumProjSolidAngle = 0.0; matrix m = matrix(shape.vertices[0], shape.vertices[1], shape.vertices[2]); const vector3_type bxdfPdfAtVertex = math::conditionalAbsOrMax(_receiverWasBSDF, hlsl::mul(m, _receiverNormal), hlsl::promote(minimumProjSolidAngle)); retval.bilinearPatch = Bilinear::create(bxdfPdfAtVertex.yyxz); - const scalar_type projSA = shape.projectedSolidAngle(_receiverNormal); - retval.rcpProjSolidAngle = projSA > scalar_type(0.0) ? scalar_type(1.0) / projSA : scalar_type(0.0); + NBL_IF_CONSTEXPR(!UsePdfAsWeight) + { + retval.receiverNormal = _receiverNormal; + // prevent division of 0 cosine by 0 + retval.projSolidAngle = max(shape.projectedSolidAngle(_receiverNormal),numeric_limits::min); + } return retval; } codomain_type generate(const domain_type u, NBL_REF_ARG(cache_type) cache) NBL_CONST_MEMBER_FUNC { - Bilinear bilinear = bilinearPatch; - cache.warped = bilinear.generate(u, cache.bilinearCache); - typename SphericalTriangle::cache_type sphtriCache; - const vector3_type L = sphtri.generate(cache.warped, sphtriCache); - cache.abs_cos_theta = bilinear.forwardWeight(u, cache.bilinearCache); + const vector2_type warped = bilinearPatch.generate(u, cache.bilinearCache); + typename SphericalTriangle::cache_type sphtriCache; // PDF is constant caches nothing, its empty + const codomain_type L = sphtri.generate(warped, sphtriCache); + NBL_IF_CONSTEXPR(!UsePdfAsWeight) + cache.L = L; return L; } density_type forwardPdf(const domain_type u, const cache_type cache) NBL_CONST_MEMBER_FUNC { - return sphtri.rcpSolidAngle * bilinearPatch.forwardPdf(u, cache.bilinearCache); + return sphtri.rcpSolidAngle * bilinearPatch.forwardPdf(u,cache.bilinearCache); } weight_type forwardWeight(const domain_type u, const cache_type cache) NBL_CONST_MEMBER_FUNC { - if (UsePdfAsWeight) - return forwardPdf(u, cache); - return cache.abs_cos_theta * rcpProjSolidAngle; - } - - density_type backwardPdf(const codomain_type L) NBL_CONST_MEMBER_FUNC - { - const vector2_type u = sphtri.generateInverse(L); - return sphtri.rcpSolidAngle * bilinearPatch.backwardPdf(u); + NBL_IF_CONSTEXPR (UsePdfAsWeight) + return forwardPdf(u,cache); + return backwardWeight(cache.L); } weight_type backwardWeight(const codomain_type L) NBL_CONST_MEMBER_FUNC { - NBL_IF_CONSTEXPR(UsePdfAsWeight) - return backwardPdf(L); - const scalar_type minimumProjSolidAngle = 0.0; - const scalar_type abs_cos_theta = math::conditionalAbsOrMax(receiverWasBSDF, hlsl::dot(receiverNormal, L), minimumProjSolidAngle); - return abs_cos_theta * rcpProjSolidAngle; + NBL_IF_CONSTEXPR (UsePdfAsWeight) + { + const vector2_type u = sphtri.generateInverse(L); + return sphtri.rcpSolidAngle * bilinearPatch.backwardPdf(u); + } + // make the MIS weight always abs because even when receiver is a BRDF, the samples in lower hemisphere will get killed and MIS weight never used + return hlsl::abs(hlsl::dot(L,receiverNormal))/projSolidAngle; } sampling::SphericalTriangle sphtri; Bilinear bilinearPatch; - scalar_type rcpProjSolidAngle; + // TODO: erase when UsePdfAsWeight==false vector3_type receiverNormal; - bool receiverWasBSDF; + vector3_type projSolidAngle; }; } // namespace sampling From 99e9495506616dda793df3fd63b22ba1dbcdde39 Mon Sep 17 00:00:00 2001 From: devshgraphicsprogramming Date: Fri, 17 Apr 2026 03:32:20 +0200 Subject: [PATCH 4/7] why its always to use accessing methods than members directly in other code :P --- .../sampling/projected_spherical_triangle.hlsl | 4 ++-- .../builtin/hlsl/sampling/spherical_triangle.hlsl | 14 +++++++------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl b/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl index 923fc6b537..a542357ead 100644 --- a/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl +++ b/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl @@ -83,7 +83,7 @@ struct ProjectedSphericalTriangle density_type forwardPdf(const domain_type u, const cache_type cache) NBL_CONST_MEMBER_FUNC { - return sphtri.rcpSolidAngle * bilinearPatch.forwardPdf(u,cache.bilinearCache); + return sphtri.getRcpSolidAngle() * bilinearPatch.forwardPdf(u,cache.bilinearCache); } weight_type forwardWeight(const domain_type u, const cache_type cache) NBL_CONST_MEMBER_FUNC @@ -98,7 +98,7 @@ struct ProjectedSphericalTriangle NBL_IF_CONSTEXPR (UsePdfAsWeight) { const vector2_type u = sphtri.generateInverse(L); - return sphtri.rcpSolidAngle * bilinearPatch.backwardPdf(u); + return sphtri.getRcpSolidAngle() * bilinearPatch.backwardPdf(u); } // make the MIS weight always abs because even when receiver is a BRDF, the samples in lower hemisphere will get killed and MIS weight never used return hlsl::abs(hlsl::dot(L,receiverNormal))/projSolidAngle; diff --git a/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl b/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl index 6f29582e04..eb714a2505 100644 --- a/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl +++ b/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl @@ -172,6 +172,8 @@ struct SphericalTriangle return backwardPdf(L); } + scalar_type getRcpSolidAngle() NBL_CONST_MEMBER_FUNC {return rcpSolidAngle;} + scalar_type rcpSolidAngle; scalar_type cosA; scalar_type sinA; @@ -203,7 +205,6 @@ struct SphericalTriangle { SphericalTriangle retval; retval.base = base_type::create(tri); - retval.rcpSolidAngle = retval.base.rcpSolidAngle; retval.vertexC = tri.vertices[2]; // precompute great circle normal of arc AC (needed for generateInverse) const scalar_type cscB = tri.csc_sides[1]; @@ -277,7 +278,7 @@ struct SphericalTriangle const scalar_type cosCpB = nbl::hlsl::dot(base.tri_vertices[1], cp); const scalar_type den = scalar_type(1.0) + base.triCosC + cosCpB + cosBp_inv; const scalar_type subSolidAngle = scalar_type(2.0) * nbl::hlsl::atan2(nbl::hlsl::abs(num), den); - const scalar_type u = nbl::hlsl::clamp(subSolidAngle * rcpSolidAngle, scalar_type(0.0), scalar_type(1.0)); + const scalar_type u = nbl::hlsl::clamp(subSolidAngle * base.rcpSolidAngle, scalar_type(0.0), scalar_type(1.0)); // Step 3: u.y = |L-B|^2 / |C'-B|^2 // Squared Euclidean distance avoids catastrophic cancellation vs (1-dot)/(1-dot) @@ -295,7 +296,7 @@ struct SphericalTriangle density_type forwardPdf(const domain_type u, const cache_type cache) NBL_CONST_MEMBER_FUNC { - return rcpSolidAngle; + return base.rcpSolidAngle; } weight_type forwardWeight(const domain_type u, const cache_type cache) NBL_CONST_MEMBER_FUNC @@ -305,16 +306,15 @@ struct SphericalTriangle density_type backwardPdf(const codomain_type L) NBL_CONST_MEMBER_FUNC { - return rcpSolidAngle; + return base.rcpSolidAngle; } weight_type backwardWeight(const codomain_type L) NBL_CONST_MEMBER_FUNC { return backwardPdf(L); } - - // mirrored from base for uniform access across both specializations - scalar_type rcpSolidAngle; + + scalar_type getRcpSolidAngle() NBL_CONST_MEMBER_FUNC {return base.rcpSolidAngle;} base_type base; vector3_type vertexC; From 6da021b89138b4d5cfd058b1ca62cfaf980c1fc0 Mon Sep 17 00:00:00 2001 From: devshgraphicsprogramming Date: Fri, 17 Apr 2026 03:34:42 +0200 Subject: [PATCH 5/7] The Spherical Rectangle Shape needs to be created with the observer (local coordinate system) already given and fixed. This way we can share computation between Solid Angle and Projected Solid Angle calculations Also apply the `abs` fix from the triangle --- .../hlsl/shapes/spherical_rectangle.hlsl | 24 ++++++++++++------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/include/nbl/builtin/hlsl/shapes/spherical_rectangle.hlsl b/include/nbl/builtin/hlsl/shapes/spherical_rectangle.hlsl index bc864f7a19..7ffe2ef407 100644 --- a/include/nbl/builtin/hlsl/shapes/spherical_rectangle.hlsl +++ b/include/nbl/builtin/hlsl/shapes/spherical_rectangle.hlsl @@ -94,6 +94,8 @@ struct SphericalRectangle vector4_type cosGamma; }; + // TODO: take an observer already, this way we can precompute and store the `r0`, `denorm_n_z` and `rcpLen_denorm_n_z` + // we need all of the above for solid angle and projected solid angle computation static SphericalRectangle create(NBL_CONST_REF_ARG(CompressedSphericalRectangle) compressed) { SphericalRectangle retval; @@ -113,7 +115,8 @@ struct SphericalRectangle result.r0 = hlsl::mul(basis, origin - observer); const vector4_type denorm_n_z = vector4_type(-result.r0.y, result.r0.x + extents.x, result.r0.y + extents.y, -result.r0.x); - result.n_z = denorm_n_z * hlsl::rsqrt(hlsl::promote(result.r0.z * result.r0.z) + denorm_n_z * denorm_n_z); + const vector4_type rcpLen_denorm_n_z = hlsl::rsqrt(hlsl::promote(result.r0.z * result.r0.z) + denorm_n_z * denorm_n_z); + result.n_z = denorm_n_z * rcpLen_denorm_n_z; result.cosGamma = vector4_type( -result.n_z[0] * result.n_z[1], -result.n_z[1] * result.n_z[2], @@ -129,17 +132,17 @@ struct SphericalRectangle } // Kelvin-Stokes theorem: signed projected solid angle = integral_{rect} (n . omega) d_omega + // TODO: don't take the observer, observer should be taken at creation scalar_type projectedSolidAngle(const vector3_type observer, const vector3_type receiverNormal) NBL_CONST_MEMBER_FUNC { return projectedSolidAngleFromLocal(hlsl::mul(basis, origin - observer), hlsl::mul(basis, receiverNormal)); } - // Overload for when r0 and localNormal are already computed (avoids redundant mul(basis, ...)). - // Exploits rectangle structure: all 4 corners share the same z, so cross products - // have only 2 nonzero components each, and externalProducts can be computed without - // normalizing the corner directions. + // TODO: only take a `localN` scalar_type projectedSolidAngleFromLocal(const vector3_type r0, const vector3_type n) NBL_CONST_MEMBER_FUNC { + // FUN FACT: `n_z` already holds Z coordinate the NORMALIZED `awayFromEdgePlane`, the non-zero coordinate absolute value is equal to `r0.z * rcpLen_denorm_n_z` +// TODO: skip all this code until just call `acos` on the `unnormDots` const scalar_type x0 = r0.x, y0 = r0.y, z = r0.z; const scalar_type x1 = x0 + extents.x; const scalar_type y1 = y0 + extents.y; @@ -155,8 +158,9 @@ struct SphericalRectangle zSq + x1 * x1, zSq + y1 * y1, zSq + x0 * x0 - ); + ); // TODO: this is already computed as `rcpLen_denorm_n_z` +// TODO: this can be computed from `denorm_n_z`, `z` and `rcpLen_denorm_n_z` instead // dot(cross(ri,rj), n) / |cross(ri,rj)| the ex/ey scale factors cancel const vector4_type crossDotN = vector4_type( z * n.y - y0 * n.z, @@ -165,8 +169,12 @@ struct SphericalRectangle z * n.x - x0 * n.z ); // The ABS makes the computation correct for abs(cos(theta)) (BSDF projected solid angle). - const vector4_type externalProducts = hlsl::abs(crossDotN) * hlsl::rsqrt(crossLenSq); + const vector4_type externalProducts = crossDotN * hlsl::rsqrt(crossLenSq); +// TODO: isn't `rcpLen_denorm_n_z` related to the sin^-1() of arclengths ? Wouldn't `ACOS_CSC` apply instead of `acos*rsqrt(1-cos^2)` +// wouldn't then `hlsl::promote(result.r0.z * result.r0.z) + denorm_n_z * denorm_n_z` be the sin^2 ? +// it would probably have to be a different, here's the `acos(sqrt(1-x*x))/x` curve fit again revealed to me in a dream +// https://www.desmos.com/calculator/sbdrulot5a = exp2(-1.6*sqrt(1-x*x))*A+B // cos(arc length) between adjacent corners: dot(ri,rj) / (|ri|*|rj|) const vector4_type lenSq = vector4_type( x0 * x0 + y0 * y0, @@ -189,7 +197,7 @@ struct SphericalRectangle // https://www.linkedin.com/posts/matt-kielan-9b054a165_untitled-graph-activity-7442910005671923712-jHz6?utm_source=share&utm_medium=member_desktop&rcm=ACoAACdp2RQBqq2bJfC2zxpsme-vRv2zh9oP-8E const vector4_type pyramidAngles = hlsl::acos(cos_sides); - return hlsl::dot(pyramidAngles, externalProducts) * scalar_type(0.5); + return hlsl::abs(hlsl::dot(pyramidAngles, externalProducts)) * scalar_type(0.5); } vector3_type origin; From 404592196897a57ba991cbd24cea0ecc7ec797a6 Mon Sep 17 00:00:00 2001 From: devshgraphicsprogramming Date: Fri, 17 Apr 2026 03:39:43 +0200 Subject: [PATCH 6/7] Spherical Rectangle needs `generateInverse` implemented, otherwise PRactical Warps are broken wrong weight was getting returned and MIS weights were inconsistent. They're broken now, NaN returned as the backwardWeight --- examples_tests | 2 +- .../projected_spherical_rectangle.hlsl | 120 +++++++++--------- 2 files changed, 60 insertions(+), 62 deletions(-) diff --git a/examples_tests b/examples_tests index 5eeb47351e..89ecce1444 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 5eeb47351e29cb6ee02f8d8319f131a2c012b5a2 +Subproject commit 89ecce14443c216b30ff84b837b899045bb5513f diff --git a/include/nbl/builtin/hlsl/sampling/projected_spherical_rectangle.hlsl b/include/nbl/builtin/hlsl/sampling/projected_spherical_rectangle.hlsl index 5bf652cb4c..618f401ba9 100644 --- a/include/nbl/builtin/hlsl/sampling/projected_spherical_rectangle.hlsl +++ b/include/nbl/builtin/hlsl/sampling/projected_spherical_rectangle.hlsl @@ -25,12 +25,6 @@ namespace sampling // 2. Warp uniform [0,1]^2 through the bilinear to importance-sample NdotL // 3. Feed the warped UV into the solid angle sampler to get a rect offset // 4. PDF = (1/SolidAngle) * bilinearPdf -// -// Template parameter `UsePdfAsWeight`: when true (default), forwardWeight/backwardWeight -// return the PDF instead of the projected-solid-angle MIS weight. -// TODO: the projected-solid-angle MIS weight (UsePdfAsWeight=false) has been shown to be -// poor in practice. Once confirmed by testing, remove the false path and stop storing -// receiverNormal, receiverWasBSDF, and rcpProjSolidAngle as members. template struct ProjectedSphericalRectangle { @@ -47,24 +41,32 @@ struct ProjectedSphericalRectangle struct cache_type { - scalar_type abs_cos_theta; - vector2_type warped; typename Bilinear::cache_type bilinearCache; + vector3_type L; // TODO: same as projected triangle w.r.t. UsePdfAsWeight==false }; - // NOTE: produces a degenerate (all-zero) bilinear patch when the receiver normal faces away - // from all four rectangle vertices, resulting in NaN PDFs (0 * inf). Callers must ensure - // at least one vertex has positive projection onto the receiver normal. - static ProjectedSphericalRectangle create(NBL_CONST_REF_ARG(shapes::SphericalRectangle) shape, const vector3_type observer, const vector3_type _receiverNormal, const bool _receiverWasBSDF) + // Shouldn't produce NAN if all corners have 0 proj solid angle due to min density adds/clamps in the linear sampler + static ProjectedSphericalRectangle create(NBL_CONST_REF_ARG(shapes::CompressedSphericalRectangle) shape, const vector3_type observer, const vector3_type _receiverNormal, const bool _receiverWasBSDF) { - ProjectedSphericalRectangle retval; + ProjectedSphericalRectangle retval; +// TODO:https://github.com/Devsh-Graphics-Programming/Nabla/pull/1001#discussion_r3088570145 + return retval; + } + + // Shouldn't produce NAN if all corners have 0 proj solid angle due to min density adds/clamps in the linear sampler + static ProjectedSphericalRectangle create(NBL_CONST_REF_ARG(shapes::SphericalRectangle) shape, const vector3_type observer, const vector3_type _receiverNormal, const bool _receiverWasBSDF) + { + ProjectedSphericalRectangle retval; const vector3_type n = hlsl::mul(shape.basis, _receiverNormal); - retval.localReceiverNormal = n; - retval.receiverWasBSDF = _receiverWasBSDF; // Compute solid angle and get r0 in local frame (before z-flip) const typename shapes::SphericalRectangle::solid_angle_type sa = shape.solidAngle(observer); const vector3_type r0 = sa.r0; + NBL_IF_CONSTEXPR(!UsePdfAsWeight) + { + retval.receiverNormal = _receiverNormal; + retval.projSolidAngle = shape.projectedSolidAngleFromLocal(r0,n); + } // All 4 corners share r0.z; x is r0.x or r0.x+ex, y is r0.y or r0.y+ey const scalar_type r1x = r0.x + shape.extents.x; @@ -94,78 +96,74 @@ struct ProjectedSphericalRectangle retval.bilinearPatch = Bilinear::create(bxdfPdfAtVertex); // Reuse the already-computed solid_angle_type to avoid recomputing mul(basis, origin - observer) - retval.sphrect = SphericalRectangle::create(sa, shape.extents); - retval.rcpSolidAngle = retval.sphrect.solidAngle > scalar_type(0.0) ? scalar_type(1.0) / retval.sphrect.solidAngle : scalar_type(0.0); - - NBL_IF_CONSTEXPR(!UsePdfAsWeight) - { - const scalar_type projSA = shape.projectedSolidAngleFromLocal(r0, n); - retval.rcpProjSolidAngle = projSA > scalar_type(0.0) ? scalar_type(1.0) / projSA : scalar_type(0.0); - } + retval.sphrect = SphericalRectangle::create(shape.basis, sa, shape.extents); return retval; } // returns a normalized 3D direction in the local frame - codomain_type generate(const domain_type u, NBL_REF_ARG(cache_type) cache) NBL_CONST_MEMBER_FUNC + codomain_type generateNormalizedLocal(const domain_type u, NBL_REF_ARG(cache_type) cache, NBL_REF_ARG(scalar_type) hitDist) NBL_CONST_MEMBER_FUNC { - Bilinear bilinear = bilinearPatch; - cache.warped = bilinear.generate(u, cache.bilinearCache); - typename SphericalRectangle::cache_type sphrectCache; - const vector3_type dir = sphrect.generate(cache.warped, sphrectCache); - cache.abs_cos_theta = bilinear.forwardWeight(u, cache.bilinearCache); + const vector2_type warped = bilinearPatch.generate(u, cache.bilinearCache); + typename SphericalRectangle::cache_type sphrectCache; // there's nothing in the cache + const vector3_type dir = sphrect.generateNormalizedLocal(warped,sphrectCache,hitDist); + NBL_IF_CONSTEXPR(!UsePdfAsWeight) + cache.L = hlsl::mul(hlsl::transpose(sphrect.basis),dir); return dir; } - // returns a 2D offset on the rectangle surface from the r0 corner - vector2_type generateSurfaceOffset(const domain_type u, NBL_REF_ARG(cache_type) cache) NBL_CONST_MEMBER_FUNC + // returns a unnormalized 3D direction in the global frame + codomain_type generateUnnormalized(const domain_type u, NBL_REF_ARG(cache_type) cache) NBL_CONST_MEMBER_FUNC { - Bilinear bilinear = bilinearPatch; - cache.warped = bilinear.generate(u, cache.bilinearCache); - typename SphericalRectangle::cache_type sphrectCache; - const vector2_type sampleOffset = sphrect.generateSurfaceOffset(cache.warped, sphrectCache); - cache.abs_cos_theta = bilinear.forwardWeight(u, cache.bilinearCache); - return sampleOffset; + const vector2_type warped = bilinearPatch.generate(u, cache.bilinearCache); + typename SphericalRectangle::cache_type sphrectCache; // there's nothing in the cache + const vector3_type dir = sphrect.generateUnnormalized(warped,sphrectCache); + NBL_IF_CONSTEXPR(!UsePdfAsWeight) + cache.L = dir * hlsl::rsqrt(hlsl::dot(dir,dir)); + return dir; } - density_type forwardPdf(const domain_type u, const cache_type cache) NBL_CONST_MEMBER_FUNC + // returns a normalized 3D direction in the global frame + codomain_type generate(const domain_type u, NBL_REF_ARG(cache_type) cache) NBL_CONST_MEMBER_FUNC { - return rcpSolidAngle * bilinearPatch.forwardPdf(u, cache.bilinearCache); + const vector2_type warped = bilinearPatch.generate(u, cache.bilinearCache); + typename SphericalRectangle::cache_type sphrectCache; // there's nothing in the cache + const vector3_type dir = sphrect.generate(warped, sphrectCache); + NBL_IF_CONSTEXPR(!UsePdfAsWeight) + cache.L = dir; + return dir; } - weight_type forwardWeight(const domain_type u, const cache_type cache) NBL_CONST_MEMBER_FUNC + density_type forwardPdf(const domain_type u, const cache_type cache) NBL_CONST_MEMBER_FUNC { - if (UsePdfAsWeight) - return forwardPdf(u, cache); - return cache.abs_cos_theta * rcpProjSolidAngle; + return bilinearPatch.forwardPdf(u,cache.bilinearCache) / sphrect.solidAngle; } - // `p` is the normalized [0,1]^2 position on the rectangle - density_type backwardPdf(const vector2_type p) NBL_CONST_MEMBER_FUNC + weight_type forwardWeight(const domain_type u, const cache_type cache) NBL_CONST_MEMBER_FUNC { - return rcpSolidAngle * bilinearPatch.backwardPdf(p); + NBL_IF_CONSTEXPR (UsePdfAsWeight) + return forwardPdf(u,cache); + return backwardWeight(cache.L); } - weight_type backwardWeight(const vector2_type p) NBL_CONST_MEMBER_FUNC + weight_type backwardWeight(const codomain_type L) NBL_CONST_MEMBER_FUNC { NBL_IF_CONSTEXPR(UsePdfAsWeight) - return backwardPdf(p); - const scalar_type minimumProjSolidAngle = 0.0; - // Reconstruct local direction from normalized rect position - const vector3_type localDir = hlsl::normalize(sphrect.r0 + vector3_type( - p.x * sphrect.extents.x, - p.y * sphrect.extents.y, - scalar_type(0) - )); - const scalar_type abs_cos_theta = math::conditionalAbsOrMax(receiverWasBSDF, hlsl::dot(localReceiverNormal, localDir), minimumProjSolidAngle); - return abs_cos_theta * rcpProjSolidAngle; + { +#if 0 + const vector2_type warped = sphrect.generateInvese(L); // TODO: implement `generateInverse` + return bilinearPatch.backwardPdf(warped) / sphrect.solidAngle; +#endif + return 0.f/0.f; + } + // make the MIS weight always abs because even when receiver is a BRDF, the samples in lower hemisphere will get killed and MIS weight never used + return hlsl::abs(hlsl::dot(L,receiverNormal))/projSolidAngle; } sampling::SphericalRectangle sphrect; Bilinear bilinearPatch; - scalar_type rcpSolidAngle; - scalar_type rcpProjSolidAngle; - vector3_type localReceiverNormal; - bool receiverWasBSDF; + // TODO: same as projected triangle w.r.t. UsePdfAsWeight==false + vector3_type receiverNormal; + vector3_type projSolidAngle; }; } // namespace sampling From 45a68a625475e3bd49245ac71da1a8d9c787a00d Mon Sep 17 00:00:00 2001 From: Mateusz Kielan Date: Fri, 17 Apr 2026 23:40:00 +0200 Subject: [PATCH 7/7] Update spherical_triangle.hlsl always move the scalar multiplication of a vector in a dot product outside the dot product --- .../nbl/builtin/hlsl/shapes/spherical_triangle.hlsl | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/include/nbl/builtin/hlsl/shapes/spherical_triangle.hlsl b/include/nbl/builtin/hlsl/shapes/spherical_triangle.hlsl index 4ebe52d312..b8a2c7229e 100644 --- a/include/nbl/builtin/hlsl/shapes/spherical_triangle.hlsl +++ b/include/nbl/builtin/hlsl/shapes/spherical_triangle.hlsl @@ -74,6 +74,7 @@ struct SphericalTriangle // degenerate triangle: any side has near-zero sin, so csc blows up if (hlsl::any >(retval.csc_sides >= hlsl::promote(numeric_limits::max))) { + // TODO: can't do this, still need to be able to sample thin triangle like a line light, so need to know all the angles which are still valid retval.cos_vertices = hlsl::promote(0.0); retval.sin_vertices = hlsl::promote(0.0); retval.solid_angle = 0; @@ -106,22 +107,20 @@ struct SphericalTriangle if (solid_angle <= numeric_limits::epsilon) return 0; - matrix awayFromEdgePlane; // `cross(A,B)*acos(dot(A,B))/sin(1-dot^2)` can be done with `cross(A,B)*acos_csc_approx(dot(A,B))` #define ACOS_CSC(I) acos_csc_approx(cos_sides[I]) //#define ACOS_CSC(I) hlsl::acos(cos_sides[I])*csc_sides[I] - awayFromEdgePlane[0] = hlsl::cross(vertices[1], vertices[2]) * ACOS_CSC(0); - awayFromEdgePlane[1] = hlsl::cross(vertices[2], vertices[0]) * ACOS_CSC(1); - awayFromEdgePlane[2] = hlsl::cross(vertices[0], vertices[1]) * ACOS_CSC(2); + scalar_type externalProductsWeightedByPyramidAngles = hlsl::dot(hlsl::cross(vertices[1], vertices[2]),receiverNormal) * ACOS_CSC(0); + externalProductsWeightedByPyramidAngles += hlsl::dot(hlsl::cross(vertices[2], vertices[0]),receiverNormal) * ACOS_CSC(1); + externalProductsWeightedByPyramidAngles += hlsl::dot(hlsl::cross(vertices[0], vertices[1]),receiverNormal) * ACOS_CSC(2); #undef ACOS_CSC - const vector3_type externalProductsWeightedByPyramidAngles = hlsl::mul(/* transposed already */awayFromEdgePlane, receiverNormal); // The ABS makes it so that the computation is correct for an `abs(cos(theta))` factor which is the projected solid angle used for a BSDF. // It also makes the computation insensitive to the CW or CCW winding of the vertices in the triangle. // Proof: Kelvin-Stokes theorem, if you split the set into two along the horizon with constant CCW winding, the `cross` along the shared edge // goes in different directions and cancels out, while `acos` of the clipped great arcs corresponding to polygon edges add up to the original sides again. // The 0.5 is so that triangle covering almost whole hemisphere sums to PI - return hlsl::abs(externalProductsWeightedByPyramidAngles[0]+externalProductsWeightedByPyramidAngles[1]+externalProductsWeightedByPyramidAngles[2]) * scalar_type(0.5); + return externalProductsWeightedByPyramidAngles * scalar_type(0.5); } vector3_type vertices[3];