diff --git a/31_HLSLPathTracer/CMakeLists.txt b/31_HLSLPathTracer/CMakeLists.txt index 2e769bb18..b6dacdfa5 100644 --- a/31_HLSLPathTracer/CMakeLists.txt +++ b/31_HLSLPathTracer/CMakeLists.txt @@ -1,10 +1,19 @@ -include(common RESULT_VARIABLE RES) - -if(NOT RES) - message(FATAL_ERROR "common.cmake not found. Should be in {repo_root}/cmake directory") -endif() +include(common) +include("${CMAKE_CURRENT_SOURCE_DIR}/pt.cmake") +include("${CMAKE_CURRENT_SOURCE_DIR}/pt.variant_ids.cmake") if(NBL_BUILD_IMGUI) + # EX31 keeps triangle polygon-method variants as separate precompiled entrypoints. + # This keeps polygon-method choice compile-time and avoids runtime shader switching on this axis. + # On AMD Ryzen 5 5600G with Radeon Graphics (6C/12T), + # a Visual Studio Debug x64 full rebuild of the SPIR-V project completed in about 19.789 s. + set(PT_CACHE_ROOT "pipeline/cache" CACHE STRING + "Relative cache root written to path_tracer.runtime.json in the common bin directory. The runtime resolves this path relative to the JSON file location. Empty disables the generated dev-mode JSON and falls back to --pipeline-cache-dir or LocalAppData." + ) + if(IS_ABSOLUTE "${PT_CACHE_ROOT}") + message(FATAL_ERROR "PT_CACHE_ROOT must stay relative because the runtime resolves it against path_tracer.runtime.json") + endif() + set(NBL_INCLUDE_SERACH_DIRECTORIES "${CMAKE_CURRENT_SOURCE_DIR}/include" ) @@ -16,25 +25,69 @@ if(NBL_BUILD_IMGUI) ) nbl_create_executable_project("" "" "${NBL_INCLUDE_SERACH_DIRECTORIES}" "${NBL_LIBRARIES}" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}") + add_dependencies(${EXECUTABLE_NAME} argparse) + target_include_directories(${EXECUTABLE_NAME} PUBLIC $) + target_compile_definitions(${EXECUTABLE_NAME} PRIVATE PATH_TRACER_BUILD_CONFIG_NAME=\"$\") + if(NOT PT_CACHE_ROOT STREQUAL "") + string(REPLACE "\\" "/" PT_CACHE_ROOT_JSON "${PT_CACHE_ROOT}") + configure_file( + "${CMAKE_CURRENT_SOURCE_DIR}/path_tracer.runtime.json.in" + "${CMAKE_CURRENT_BINARY_DIR}/path_tracer.runtime.json" + @ONLY + ) + file(GENERATE + OUTPUT "$/path_tracer.runtime.json" + INPUT "${CMAKE_CURRENT_BINARY_DIR}/path_tracer.runtime.json" + ) + endif() - if(NBL_EMBED_BUILTIN_RESOURCES) - set(_BR_TARGET_ ${EXECUTABLE_NAME}_builtinResourceData) - set(RESOURCE_DIR "app_resources") - - get_filename_component(_SEARCH_DIRECTORIES_ "${CMAKE_CURRENT_SOURCE_DIR}" ABSOLUTE) - get_filename_component(_OUTPUT_DIRECTORY_SOURCE_ "${CMAKE_CURRENT_BINARY_DIR}/src" ABSOLUTE) - get_filename_component(_OUTPUT_DIRECTORY_HEADER_ "${CMAKE_CURRENT_BINARY_DIR}/include" ABSOLUTE) - - file(GLOB_RECURSE BUILTIN_RESOURCE_FILES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}" "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}/*") + set(SM 6_8) + set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") + set(COMPILE_OPTIONS + -I "${CMAKE_CURRENT_SOURCE_DIR}/include" + -I "${CMAKE_CURRENT_SOURCE_DIR}/app_resources/hlsl" + -I "${NBL_ROOT_PATH}/include" # workaround, the same thing like in IES I will address this issue later + -T "lib_${SM}" + -Wno-conversion + -Wno-sign-conversion + -Wno-float-conversion + -Wno-shorten-64-to-32 + -Wno-shadow + -Wno-literal-range + ) - foreach(RES_FILE ${BUILTIN_RESOURCE_FILES}) - LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "${RES_FILE}") - endforeach() + # Keep the payload flat and explicit here. Once Nabla PR #988 lands, these per-rule compile axes should move to first-class packaged-variant support there. + PT_APPEND_SPIRV_RULE(VAR JSON INPUT "app_resources/hlsl/spirv/pt.compute.sphere.proxy.hlsl" KEY "pt.compute.sphere" COMPILE_OPTIONS "-DPT_VARIANT_USE_RWMC=0" "-DPT_VARIANT_SCENE_KIND=${PT_SCENE_SPHERE}" "-DPT_VARIANT_ENTRYPOINT_KIND=${PT_ENTRYPOINT_COMBINED}") + PT_APPEND_SPIRV_RULE(VAR JSON INPUT "app_resources/hlsl/spirv/pt.compute.sphere.rwmc.proxy.hlsl" KEY "pt.compute.sphere.rwmc" COMPILE_OPTIONS "-DPT_VARIANT_USE_RWMC=1" "-DPT_VARIANT_SCENE_KIND=${PT_SCENE_SPHERE}" "-DPT_VARIANT_ENTRYPOINT_KIND=${PT_ENTRYPOINT_COMBINED}") + PT_APPEND_SPIRV_RULE(VAR JSON INPUT "app_resources/hlsl/spirv/pt.compute.triangle.linear.proxy.hlsl" KEY "pt.compute.triangle.linear" COMPILE_OPTIONS "-DPT_VARIANT_USE_RWMC=0" "-DPT_VARIANT_ENTRYPOINT_KIND=${PT_ENTRYPOINT_LINEAR}") + PT_APPEND_SPIRV_RULE(VAR JSON INPUT "app_resources/hlsl/spirv/pt.compute.triangle.persistent.proxy.hlsl" KEY "pt.compute.triangle.persistent" COMPILE_OPTIONS "-DPT_VARIANT_USE_RWMC=0" "-DPT_VARIANT_ENTRYPOINT_KIND=${PT_ENTRYPOINT_PERSISTENT}") + PT_APPEND_SPIRV_RULE(VAR JSON INPUT "app_resources/hlsl/spirv/pt.compute.triangle.rwmc.linear.proxy.hlsl" KEY "pt.compute.triangle.rwmc.linear" COMPILE_OPTIONS "-DPT_VARIANT_USE_RWMC=1" "-DPT_VARIANT_ENTRYPOINT_KIND=${PT_ENTRYPOINT_LINEAR}") + PT_APPEND_SPIRV_RULE(VAR JSON INPUT "app_resources/hlsl/spirv/pt.compute.triangle.rwmc.persistent.proxy.hlsl" KEY "pt.compute.triangle.rwmc.persistent" COMPILE_OPTIONS "-DPT_VARIANT_USE_RWMC=1" "-DPT_VARIANT_ENTRYPOINT_KIND=${PT_ENTRYPOINT_PERSISTENT}") + PT_APPEND_SPIRV_RULE(VAR JSON INPUT "app_resources/hlsl/spirv/pt.compute.rectangle.rwmc.linear.proxy.hlsl" KEY "pt.compute.rectangle.rwmc.linear" COMPILE_OPTIONS "-DPT_VARIANT_USE_RWMC=1" "-DPT_VARIANT_SCENE_KIND=${PT_SCENE_RECTANGLE}" "-DPT_VARIANT_ENTRYPOINT_KIND=${PT_ENTRYPOINT_LINEAR}") + PT_APPEND_SPIRV_RULE(VAR JSON INPUT "app_resources/hlsl/spirv/pt.compute.rectangle.rwmc.persistent.proxy.hlsl" KEY "pt.compute.rectangle.rwmc.persistent" COMPILE_OPTIONS "-DPT_VARIANT_USE_RWMC=1" "-DPT_VARIANT_SCENE_KIND=${PT_SCENE_RECTANGLE}" "-DPT_VARIANT_ENTRYPOINT_KIND=${PT_ENTRYPOINT_PERSISTENT}") + PT_APPEND_SPIRV_RULE(VAR JSON INPUT "app_resources/hlsl/spirv/pt.compute.rectangle.proxy.hlsl" KEY "pt.compute.rectangle" COMPILE_OPTIONS "-DPT_VARIANT_USE_RWMC=0" "-DPT_VARIANT_SCENE_KIND=${PT_SCENE_RECTANGLE}" "-DPT_VARIANT_ENTRYPOINT_KIND=${PT_ENTRYPOINT_COMBINED}") + PT_APPEND_SPIRV_RULE(VAR JSON INPUT "app_resources/hlsl/resolve.comp.hlsl" KEY "pt.compute.resolve") + PT_APPEND_SPIRV_RULE(VAR JSON INPUT "app_resources/hlsl/spirv/pt.misc.proxy.hlsl" KEY "pt.misc") + PT_FINALIZE_JSON_PAYLOAD(INOUT JSON) - ADD_CUSTOM_BUILTIN_RESOURCES(${_BR_TARGET_} RESOURCES_TO_EMBED "${_SEARCH_DIRECTORIES_}" "${RESOURCE_DIR}" "nbl::this_example::builtin" "${_OUTPUT_DIRECTORY_HEADER_}" "${_OUTPUT_DIRECTORY_SOURCE_}") + NBL_CREATE_NSC_COMPILE_RULES( + TARGET ${EXECUTABLE_NAME}SPIRV + LINK_TO ${EXECUTABLE_NAME} + DEPENDS ${DEPENDS} + BINARY_DIR ${OUTPUT_DIRECTORY} + MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT + COMMON_OPTIONS ${COMPILE_OPTIONS} + OUTPUT_VAR KEYS + INCLUDE nbl/this_example/builtin/build/spirv/keys.hpp + NAMESPACE nbl::this_example::builtin::build + INPUTS ${JSON} + ) - LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} ${_BR_TARGET_}) - endif() + NBL_CREATE_RESOURCE_ARCHIVE( + NAMESPACE nbl::this_example::builtin::build + TARGET ${EXECUTABLE_NAME}_builtinsBuild + LINK_TO ${EXECUTABLE_NAME} + BIND ${OUTPUT_DIRECTORY} + BUILTINS ${KEYS} + ) endif() - - diff --git a/31_HLSLPathTracer/app_resources/hlsl/compute.render.common.hlsl b/31_HLSLPathTracer/app_resources/hlsl/compute.render.common.hlsl new file mode 100644 index 000000000..be055bd83 --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/compute.render.common.hlsl @@ -0,0 +1,62 @@ +#ifndef PATH_TRACER_USE_RWMC +#error PATH_TRACER_USE_RWMC must be defined before including compute.render.common.hlsl +#endif + +#ifndef PATH_TRACER_ENABLE_LINEAR +#define PATH_TRACER_ENABLE_LINEAR 1 +#endif + +#ifndef PATH_TRACER_ENABLE_PERSISTENT +#define PATH_TRACER_ENABLE_PERSISTENT 1 +#endif + +#if !PATH_TRACER_ENABLE_LINEAR && !PATH_TRACER_ENABLE_PERSISTENT +#error At least one path tracer entrypoint mode must be enabled +#endif + +#include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include "nbl/builtin/hlsl/glsl_compat/core.hlsl" +#include "nbl/builtin/hlsl/random/pcg.hlsl" +#include "nbl/builtin/hlsl/random/xoroshiro.hlsl" +#if PATH_TRACER_ENABLE_PERSISTENT +#include "nbl/builtin/hlsl/morton.hlsl" +#endif +#include "nbl/builtin/hlsl/bxdf/reflection.hlsl" +#include "nbl/builtin/hlsl/bxdf/transmission.hlsl" +#include "nbl/builtin/hlsl/path_tracing/basic_ray_gen.hlsl" +#include "nbl/builtin/hlsl/path_tracing/unidirectional.hlsl" +#include "render_common.hlsl" + +#if PATH_TRACER_USE_RWMC +#include "nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl" +#include "render_rwmc_common.hlsl" +#else +#include "nbl/builtin/hlsl/path_tracing/default_accumulator.hlsl" +#endif + +#if PATH_TRACER_USE_RWMC +[[vk::push_constant]] RenderRWMCPushConstants pc; +#else +[[vk::push_constant]] RenderPushConstants pc; +#endif + +[[vk::combinedImageSampler]] [[vk::binding(0, 0)]] Texture2D envMap; +[[vk::combinedImageSampler]] [[vk::binding(0, 0)]] SamplerState envSampler; + +[[vk::combinedImageSampler]] [[vk::binding(1, 0)]] Texture2D scramblebuf; +[[vk::combinedImageSampler]] [[vk::binding(1, 0)]] SamplerState scrambleSampler; + +[[vk::image_format("rgba16f")]] [[vk::binding(2, 0)]] RWTexture2DArray outImage; + +#if PATH_TRACER_USE_RWMC +[[vk::image_format("rgba16f")]] [[vk::binding(3, 0)]] RWTexture2DArray cascade; +#endif + +#include "example_common.hlsl" +#include "rand_gen.hlsl" +#include "intersector.hlsl" +#include "material_system.hlsl" +#include "next_event_estimator.hlsl" + +using namespace nbl; +using namespace hlsl; diff --git a/31_HLSLPathTracer/app_resources/hlsl/compute.render.linear.entrypoints.hlsl b/31_HLSLPathTracer/app_resources/hlsl/compute.render.linear.entrypoints.hlsl new file mode 100644 index 000000000..be3d20dd3 --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/compute.render.linear.entrypoints.hlsl @@ -0,0 +1,21 @@ +#ifndef PATH_TRACER_ENTRYPOINT_NAME +#define PATH_TRACER_ENTRYPOINT_NAME main +#endif + +#ifndef PATH_TRACER_ENTRYPOINT_POLYGON_METHOD +#define PATH_TRACER_ENTRYPOINT_POLYGON_METHOD PPM_APPROX_PROJECTED_SOLID_ANGLE +#endif + +#if !PATH_TRACER_ENABLE_LINEAR +#error Linear entrypoint requested while PATH_TRACER_ENABLE_LINEAR is disabled +#endif + +[numthreads(RenderWorkgroupSize, 1, 1)] +[shader("compute")] +void PATH_TRACER_ENTRYPOINT_NAME(uint32_t3 threadID : SV_DispatchThreadID) +{ + pathtracer_render_variant::runLinear(threadID); +} + +#undef PATH_TRACER_ENTRYPOINT_POLYGON_METHOD +#undef PATH_TRACER_ENTRYPOINT_NAME diff --git a/31_HLSLPathTracer/app_resources/hlsl/compute.render.persistent.entrypoints.hlsl b/31_HLSLPathTracer/app_resources/hlsl/compute.render.persistent.entrypoints.hlsl new file mode 100644 index 000000000..9e4765348 --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/compute.render.persistent.entrypoints.hlsl @@ -0,0 +1,21 @@ +#ifndef PATH_TRACER_ENTRYPOINT_NAME +#define PATH_TRACER_ENTRYPOINT_NAME mainPersistent +#endif + +#ifndef PATH_TRACER_ENTRYPOINT_POLYGON_METHOD +#define PATH_TRACER_ENTRYPOINT_POLYGON_METHOD PPM_APPROX_PROJECTED_SOLID_ANGLE +#endif + +#if !PATH_TRACER_ENABLE_PERSISTENT +#error Persistent entrypoint requested while PATH_TRACER_ENABLE_PERSISTENT is disabled +#endif + +[numthreads(RenderWorkgroupSize, 1, 1)] +[shader("compute")] +void PATH_TRACER_ENTRYPOINT_NAME(uint32_t3 threadID : SV_DispatchThreadID) +{ + pathtracer_render_variant::runPersistent(); +} + +#undef PATH_TRACER_ENTRYPOINT_POLYGON_METHOD +#undef PATH_TRACER_ENTRYPOINT_NAME diff --git a/31_HLSLPathTracer/app_resources/hlsl/compute_render_scene_impl.hlsl b/31_HLSLPathTracer/app_resources/hlsl/compute_render_scene_impl.hlsl new file mode 100644 index 000000000..ca47e57f5 --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/compute_render_scene_impl.hlsl @@ -0,0 +1,159 @@ +#ifndef PATH_TRACER_USE_RWMC +#error PATH_TRACER_USE_RWMC must be defined before including compute_render_scene_impl.hlsl +#endif + +namespace pathtracer_render_variant +{ +using namespace nbl; +using namespace hlsl; + +using ray_dir_info_t = bxdf::ray_dir_info::SBasic; +using iso_interaction = PTIsotropicInteraction; +using aniso_interaction = PTAnisotropicInteraction; +using sample_t = bxdf::SLightSample; +using iso_cache = bxdf::SIsotropicMicrofacetCache; +using aniso_cache = bxdf::SAnisotropicMicrofacetCache; + +using iso_config_t = PTIsoConfiguration; +using iso_microfacet_config_t = PTIsoMicrofacetConfiguration; + +using diffuse_bxdf_type = bxdf::reflection::SOrenNayar; +using conductor_bxdf_type = bxdf::reflection::SGGXIsotropic; +using dielectric_bxdf_type = bxdf::transmission::SGGXDielectricIsotropic; +using iri_conductor_bxdf_type = bxdf::reflection::SIridescent; +using iri_dielectric_bxdf_type = bxdf::transmission::SIridescent; + +using payload_type = Payload; +using randgen_type = RandomUniformND; +using material_system_type = MaterialSystem; + +#if PATH_TRACER_USE_RWMC +using accumulator_type = rwmc::CascadeAccumulator >; +#else +using accumulator_type = path_tracing::DefaultAccumulator; +#endif + +template +struct SVariantTypes +{ + using ray_type = Ray; + using raygen_type = path_tracing::BasicRayGenerator; + using intersector_type = Intersector; + using nee_type = NextEventEstimator; + using pathtracer_type = path_tracing::Unidirectional; +}; + +RenderPushConstants getRenderPushConstants() +{ +#if PATH_TRACER_USE_RWMC + return ::pc.renderPushConstants; +#else + return ::pc; +#endif +} + +template +void tracePixel(int32_t2 coords) +{ + const RenderPushConstants renderPushConstants = getRenderPushConstants(); + using variant_types = SVariantTypes; + + uint32_t width, height, imageArraySize; + ::outImage.GetDimensions(width, height, imageArraySize); + if (any(coords < int32_t2(0, 0)) || any(coords >= int32_t2(width, height))) + return; + + float32_t2 texCoord = float32_t2(coords) / float32_t2(width, height); + texCoord.y = 1.0 - texCoord.y; + + if (((renderPushConstants.depth - 1) >> MaxDepthLog2) > 0 || ((renderPushConstants.sampleCount - 1) >> MaxSamplesLog2) > 0) + { + ::outImage[uint3(coords.x, coords.y, 0)] = float32_t4(1.0, 0.0, 0.0, 1.0); + return; + } + + typename variant_types::pathtracer_type pathtracer; + + uint2 scrambleDim; + ::scramblebuf.GetDimensions(scrambleDim.x, scrambleDim.y); + const float32_t2 pixOffsetParam = float32_t2(1.0, 1.0) / float32_t2(scrambleDim); + + float32_t4 NDC = float32_t4(texCoord * float32_t2(2.0, -2.0) + float32_t2(-1.0, 1.0), 0.0, 1.0); + float32_t3 camPos; + { + float32_t4 tmp = mul(renderPushConstants.invMVP, NDC); + camPos = tmp.xyz / tmp.w; + NDC.z = 1.0; + } + + scene_type scene; + scene.updateLight(renderPushConstants.generalPurposeLightMatrix); + + typename variant_types::raygen_type rayGen; + rayGen.pixOffsetParam = pixOffsetParam; + rayGen.camPos = camPos; + rayGen.NDC = NDC; + rayGen.invMVP = renderPushConstants.invMVP; + + pathtracer.scene = scene; + pathtracer.randGen = randgen_type::create(::scramblebuf[coords].rg, renderPushConstants.pSampleSequence); + pathtracer.nee.lights = lights; + pathtracer.materialSystem.bxdfs = bxdfs; + pathtracer.bxdfPdfThreshold = 0.0001; + pathtracer.lumaContributionThreshold = hlsl::dot(colorspace::scRGBtoXYZ[1], colorspace::eotf::sRGB(hlsl::promote(1.0 / 255.0))); + pathtracer.spectralTypeToLumaCoeffs = colorspace::scRGBtoXYZ[1]; + +#if PATH_TRACER_USE_RWMC + accumulator_type accumulator = accumulator_type::create(::pc.splattingParameters); +#else + accumulator_type accumulator = accumulator_type::create(); +#endif + + for (int i = 0; i < renderPushConstants.sampleCount; ++i) + { + const float32_t3 uvw = pathtracer.randGen(0u, i); + typename variant_types::ray_type ray = rayGen.generate(uvw); + ray.initPayload(); + pathtracer.sampleMeasure(ray, i, renderPushConstants.depth, accumulator); + } + +#if PATH_TRACER_USE_RWMC + for (uint32_t i = 0; i < CascadeCount; ++i) + ::cascade[uint3(coords.x, coords.y, i)] = float32_t4(accumulator.accumulation.data[i], 1.0f); +#else + ::outImage[uint3(coords.x, coords.y, 0)] = float32_t4(accumulator.accumulation, 1.0); +#endif +} + +#if PATH_TRACER_ENABLE_LINEAR +template +void runLinear(uint32_t3 threadID) +{ + uint32_t width, height, imageArraySize; + ::outImage.GetDimensions(width, height, imageArraySize); + tracePixel(int32_t2(threadID.x % width, threadID.x / width)); +} +#endif + +#if PATH_TRACER_ENABLE_PERSISTENT +template +void runPersistent() +{ + uint32_t width, height, imageArraySize; + ::outImage.GetDimensions(width, height, imageArraySize); + const uint32_t numWorkgroupsX = width / RenderWorkgroupSizeSqrt; + const uint32_t numWorkgroupsY = height / RenderWorkgroupSizeSqrt; + + [loop] + for (uint32_t wgBase = glsl::gl_WorkGroupID().x; wgBase < numWorkgroupsX * numWorkgroupsY; wgBase += glsl::gl_NumWorkGroups().x) + { + const int32_t2 wgCoords = int32_t2(wgBase % numWorkgroupsX, wgBase / numWorkgroupsX); + morton::code mc; + mc.value = glsl::gl_LocalInvocationIndex().x; + const int32_t2 localCoords = _static_cast(mc); + tracePixel(wgCoords * int32_t2(RenderWorkgroupSizeSqrt, RenderWorkgroupSizeSqrt) + localCoords); + } +} +#endif +} +#undef PATH_TRACER_USE_RWMC diff --git a/31_HLSLPathTracer/app_resources/hlsl/imgui.unified.hlsl b/31_HLSLPathTracer/app_resources/hlsl/imgui.unified.hlsl new file mode 100644 index 000000000..f36d98144 --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/imgui.unified.hlsl @@ -0,0 +1,9 @@ +#define NBL_TEXTURES_BINDING_IX 0 +#define NBL_SAMPLER_STATES_BINDING_IX 1 +#define NBL_TEXTURES_SET_IX 0 +#define NBL_SAMPLER_STATES_SET_IX 0 +#define NBL_TEXTURES_COUNT 1 +#define NBL_SAMPLERS_COUNT 2 + +#include "nbl/ext/ImGui/builtin/hlsl/fragment.hlsl" +#include "nbl/ext/ImGui/builtin/hlsl/vertex.hlsl" diff --git a/31_HLSLPathTracer/app_resources/hlsl/next_event_estimator.hlsl b/31_HLSLPathTracer/app_resources/hlsl/next_event_estimator.hlsl index 2a50c71a6..cd4be9929 100644 --- a/31_HLSLPathTracer/app_resources/hlsl/next_event_estimator.hlsl +++ b/31_HLSLPathTracer/app_resources/hlsl/next_event_estimator.hlsl @@ -9,7 +9,6 @@ using namespace hlsl; template struct ShapeSampling; -// Sphere only supports solid angle template struct ShapeSampling { @@ -304,10 +303,26 @@ struct ShapeSampling Shape rect; }; -// PPM_APPROX_PROJECTED_SOLID_ANGLE not available for PST_TRIANGLE +template +struct EffectivePolygonMethod +{ + NBL_CONSTEXPR_STATIC_INLINE NEEPolygonMethod value = PPM; +}; + +template +struct EffectivePolygonMethod +{ + NBL_CONSTEXPR_STATIC_INLINE NEEPolygonMethod value = PPM_SOLID_ANGLE; +}; + +template<> +struct EffectivePolygonMethod +{ + NBL_CONSTEXPR_STATIC_INLINE NEEPolygonMethod value = PPM_SOLID_ANGLE; +}; -template +template struct NextEventEstimator { using scalar_type = typename Ray::scalar_type; @@ -324,7 +339,6 @@ struct NextEventEstimator using tolerance_method_type = Tolerance; using shape_type = Shape; - using shape_sampling_type = ShapeSampling; struct SampleQuotientReturn { @@ -344,24 +358,23 @@ struct NextEventEstimator object_handle_type getLightObjectID() NBL_CONST_MEMBER_FUNC { return lightObjectID; } }; using sample_quotient_return_type = SampleQuotientReturn; + NBL_CONSTEXPR_STATIC_INLINE NEEPolygonMethod EffectivePPM = EffectivePolygonMethod::value; + using shape_sampling_type = ShapeSampling; template NBL_FUNC_REQUIRES(C::value && PST==PST_SPHERE) - shape_sampling_type __getShapeSampling(uint32_t lightObjectID, NBL_CONST_REF_ARG(scene_type) scene) + shape_type __getShape(uint32_t lightObjectID, NBL_CONST_REF_ARG(scene_type) scene) { - const shape_type sphere = scene.getSphere(lightObjectID); - return shape_sampling_type::create(sphere); + return scene.getSphere(lightObjectID); } template NBL_FUNC_REQUIRES(C::value && PST==PST_TRIANGLE) - shape_sampling_type __getShapeSampling(uint32_t lightObjectID, NBL_CONST_REF_ARG(scene_type) scene) + shape_type __getShape(uint32_t lightObjectID, NBL_CONST_REF_ARG(scene_type) scene) { - const shape_type tri = scene.getTriangle(lightObjectID); - return shape_sampling_type::create(tri); + return scene.getTriangle(lightObjectID); } template NBL_FUNC_REQUIRES(C::value && PST==PST_RECTANGLE) - shape_sampling_type __getShapeSampling(uint32_t lightObjectID, NBL_CONST_REF_ARG(scene_type) scene) + shape_type __getShape(uint32_t lightObjectID, NBL_CONST_REF_ARG(scene_type) scene) { - const shape_type rect = scene.getRectangle(lightObjectID); - return shape_sampling_type::create(rect); + return scene.getRectangle(lightObjectID); } scalar_type deferred_pdf(NBL_CONST_REF_ARG(scene_type) scene, light_id_type lightID, NBL_CONST_REF_ARG(ray_type) ray) @@ -369,7 +382,8 @@ struct NextEventEstimator if (lightID.id == 0u) return scalar_type(0.0); // env light pdf=0 const light_type light = lights[0u]; - const shape_sampling_type sampling = __getShapeSampling(light.objectID.id, scene); + const shape_type shape = __getShape(light.objectID.id, scene); + const shape_sampling_type sampling = shape_sampling_type::create(shape); return sampling.template deferredPdf(ray) / scalar_type(scene_type::SCENE_LIGHT_COUNT); } @@ -381,10 +395,11 @@ struct NextEventEstimator // use constant indices because with variables, driver (at least nvidia) seemed to nuke the light array and propagated constants throughout the code // which caused frame times to increase from 16ms to 85ms const light_type light = lights[0u]; - const shape_sampling_type sampling = __getShapeSampling(light.objectID.id, scene); + const shape_type shape = __getShape(light.objectID.id, scene); sample_quotient_return_type retval; scalar_type pdf, newRayMaxT; + const shape_sampling_type sampling = shape_sampling_type::create(shape); const vector3_type sampleL = sampling.template generate_and_pdf(pdf, newRayMaxT, origin, interaction, xi); const vector3_type N = interaction.getN(); diff --git a/31_HLSLPathTracer/app_resources/hlsl/present.frag.hlsl b/31_HLSLPathTracer/app_resources/hlsl/present.frag.hlsl index d556a7162..d69815fba 100644 --- a/31_HLSLPathTracer/app_resources/hlsl/present.frag.hlsl +++ b/31_HLSLPathTracer/app_resources/hlsl/present.frag.hlsl @@ -2,8 +2,6 @@ // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h -#pragma wave shader_stage(fragment) - // vertex shader is provided by the fullScreenTriangle extension #include using namespace nbl::hlsl; @@ -13,7 +11,8 @@ using namespace ext::FullScreenTriangle; [[vk::combinedImageSampler]] [[vk::binding(0, 0)]] Texture2DArray texture; [[vk::combinedImageSampler]] [[vk::binding(0, 0)]] SamplerState samplerState; -[[vk::location(0)]] float32_t4 main(SVertexAttributes vxAttr) : SV_Target0 +[shader("pixel")] +float32_t4 main(SVertexAttributes vxAttr) : SV_Target0 { return float32_t4(texture.Sample(samplerState, float3(vxAttr.uv, 0)).rgb, 1.0f); -} \ No newline at end of file +} diff --git a/31_HLSLPathTracer/app_resources/hlsl/render.comp.hlsl b/31_HLSLPathTracer/app_resources/hlsl/render.comp.hlsl deleted file mode 100644 index 204020719..000000000 --- a/31_HLSLPathTracer/app_resources/hlsl/render.comp.hlsl +++ /dev/null @@ -1,222 +0,0 @@ -#include "nbl/builtin/hlsl/cpp_compat.hlsl" -#include "nbl/builtin/hlsl/glsl_compat/core.hlsl" -#include "nbl/builtin/hlsl/random/pcg.hlsl" -#include "nbl/builtin/hlsl/random/xoroshiro.hlsl" -#ifdef PERSISTENT_WORKGROUPS -#include "nbl/builtin/hlsl/morton.hlsl" -#endif - -#include "nbl/builtin/hlsl/bxdf/reflection.hlsl" -#include "nbl/builtin/hlsl/bxdf/transmission.hlsl" - -#include "nbl/builtin/hlsl/path_tracing/basic_ray_gen.hlsl" -#include "nbl/builtin/hlsl/path_tracing/unidirectional.hlsl" - -// add these defines (one at a time) using -D argument to dxc -// #define SPHERE_LIGHT -// #define TRIANGLE_LIGHT -// #define RECTANGLE_LIGHT - -#include "render_common.hlsl" -#include "resolve_common.hlsl" - -#ifdef RWMC_ENABLED -#include -#include -#endif - -#ifdef RWMC_ENABLED -[[vk::push_constant]] RenderRWMCPushConstants pc; -#else -[[vk::push_constant]] RenderPushConstants pc; -#endif - -[[vk::combinedImageSampler]] [[vk::binding(0, 0)]] Texture2D envMap; // unused -[[vk::combinedImageSampler]] [[vk::binding(0, 0)]] SamplerState envSampler; - -[[vk::combinedImageSampler]] [[vk::binding(1, 0)]] Texture2D scramblebuf; -[[vk::combinedImageSampler]] [[vk::binding(1, 0)]] SamplerState scrambleSampler; - -[[vk::image_format("rgba16f")]] [[vk::binding(2, 0)]] RWTexture2DArray outImage; -[[vk::image_format("rgba16f")]] [[vk::binding(3, 0)]] RWTexture2DArray cascade; - -#include "example_common.hlsl" -#include "rand_gen.hlsl" -#include "intersector.hlsl" -#include "material_system.hlsl" -#include "next_event_estimator.hlsl" - -using namespace nbl; -using namespace hlsl; - -#ifdef SPHERE_LIGHT -#include "scene_sphere_light.hlsl" -#endif -#ifdef TRIANGLE_LIGHT -#include "scene_triangle_light.hlsl" -#endif -#ifdef RECTANGLE_LIGHT -#include "scene_rectangle_light.hlsl" -#endif - -NBL_CONSTEXPR NEEPolygonMethod POLYGON_METHOD = PPM_APPROX_PROJECTED_SOLID_ANGLE; - -int32_t2 getCoordinates() -{ - uint32_t width, height, imageArraySize; - outImage.GetDimensions(width, height, imageArraySize); - return int32_t2(glsl::gl_GlobalInvocationID().x % width, glsl::gl_GlobalInvocationID().x / width); -} - -float32_t2 getTexCoords() -{ - uint32_t width, height, imageArraySize; - outImage.GetDimensions(width, height, imageArraySize); - int32_t2 iCoords = getCoordinates(); - return float32_t2(float(iCoords.x) / width, 1.0 - float(iCoords.y) / height); -} - -using spectral_t = vector; -using ray_dir_info_t = bxdf::ray_dir_info::SBasic; -using iso_interaction = PTIsotropicInteraction; -using aniso_interaction = PTAnisotropicInteraction; -using sample_t = bxdf::SLightSample; -using iso_cache = bxdf::SIsotropicMicrofacetCache; -using aniso_cache = bxdf::SAnisotropicMicrofacetCache; -using quotient_pdf_t = sampling::quotient_and_pdf; - -using iso_config_t = PTIsoConfiguration; -using iso_microfacet_config_t = PTIsoMicrofacetConfiguration; - -using diffuse_bxdf_type = bxdf::reflection::SOrenNayar; -using conductor_bxdf_type = bxdf::reflection::SGGXIsotropic; -using dielectric_bxdf_type = bxdf::transmission::SGGXDielectricIsotropic; -using iri_conductor_bxdf_type = bxdf::reflection::SIridescent; -using iri_dielectric_bxdf_type = bxdf::transmission::SIridescent; - -using payload_type = Payload; -using ray_type = Ray; -using randgen_type = RandomUniformND; -using raygen_type = path_tracing::BasicRayGenerator; -using intersector_type = Intersector; -using material_system_type = MaterialSystem; -using nee_type = NextEventEstimator; - -#ifdef RWMC_ENABLED -using accumulator_type = rwmc::CascadeAccumulator >; -#else -#include "nbl/builtin/hlsl/path_tracing/default_accumulator.hlsl" -using accumulator_type = path_tracing::DefaultAccumulator; -#endif - -using pathtracer_type = path_tracing::Unidirectional; - -RenderPushConstants retireveRenderPushConstants() -{ -#ifdef RWMC_ENABLED - return pc.renderPushConstants; -#else - return pc; -#endif -} - -[numthreads(RenderWorkgroupSize, 1, 1)] -void main(uint32_t3 threadID : SV_DispatchThreadID) -{ - const RenderPushConstants renderPushConstants = retireveRenderPushConstants(); - - uint32_t width, height, imageArraySize; - outImage.GetDimensions(width, height, imageArraySize); -#ifdef PERSISTENT_WORKGROUPS - const uint32_t NumWorkgroupsX = width / RenderWorkgroupSizeSqrt; - const uint32_t NumWorkgroupsY = height / RenderWorkgroupSizeSqrt; - [loop] - for (uint32_t wgBase = glsl::gl_WorkGroupID().x; wgBase < NumWorkgroupsX*NumWorkgroupsY; wgBase += glsl::gl_NumWorkGroups().x) - { - const int32_t2 wgCoords = int32_t2(wgBase % NumWorkgroupsX, wgBase / NumWorkgroupsX); - morton::code mc; - mc.value = glsl::gl_LocalInvocationIndex().x; - const int32_t2 localCoords = _static_cast(mc); - const int32_t2 coords = wgCoords * int32_t2(RenderWorkgroupSizeSqrt,RenderWorkgroupSizeSqrt) + localCoords; -#else - const int32_t2 coords = getCoordinates(); -#endif - float32_t2 texCoord = float32_t2(coords) / float32_t2(width, height); - texCoord.y = 1.0 - texCoord.y; - - if (any(coords < int32_t2(0,0)) || any(coords >= int32_t2(width, height))) { -#ifdef PERSISTENT_WORKGROUPS - continue; -#else - return; -#endif - } - - if (((renderPushConstants.depth - 1) >> MaxDepthLog2) > 0 || ((renderPushConstants.sampleCount - 1) >> MaxSamplesLog2) > 0) - { - float32_t4 pixelCol = float32_t4(1.0,0.0,0.0,1.0); - outImage[uint3(coords.x, coords.y, 0)] = pixelCol; -#ifdef PERSISTENT_WORKGROUPS - continue; -#else - return; -#endif - } - - // set up path tracer - pathtracer_type pathtracer; - - uint2 scrambleDim; - scramblebuf.GetDimensions(scrambleDim.x, scrambleDim.y); - float32_t2 pixOffsetParam = (float2)1.0 / float2(scrambleDim); - - float32_t4 NDC = float4(texCoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0); - float32_t3 camPos; - { - float4 tmp = mul(renderPushConstants.invMVP, NDC); - camPos = tmp.xyz / tmp.w; - NDC.z = 1.0; - } - - scene_type scene; - scene.updateLight(renderPushConstants.generalPurposeLightMatrix); - - raygen_type rayGen; - rayGen.pixOffsetParam = pixOffsetParam; - rayGen.camPos = camPos; - rayGen.NDC = NDC; - rayGen.invMVP = renderPushConstants.invMVP; - - pathtracer.scene = scene; - pathtracer.randGen = randgen_type::create(scramblebuf[coords].rg, renderPushConstants.pSampleSequence); - pathtracer.nee.lights = lights; - pathtracer.materialSystem.bxdfs = bxdfs; - pathtracer.bxdfPdfThreshold = 0.0001; - pathtracer.lumaContributionThreshold = hlsl::dot(colorspace::scRGBtoXYZ[1], colorspace::eotf::sRGB(hlsl::promote(1.0 / 255.0))); - pathtracer.spectralTypeToLumaCoeffs = colorspace::scRGBtoXYZ[1]; - -#ifdef RWMC_ENABLED - accumulator_type accumulator = accumulator_type::create(pc.splattingParameters); -#else - accumulator_type accumulator = accumulator_type::create(); -#endif - // path tracing loop - for(int i = 0; i < renderPushConstants.sampleCount; ++i) - { - float32_t3 uvw = pathtracer.randGen(0u, i); - ray_type ray = rayGen.generate(uvw); - ray.initPayload(); - pathtracer.sampleMeasure(ray, i, renderPushConstants.depth, accumulator); - } - -#ifdef RWMC_ENABLED - for (uint32_t i = 0; i < CascadeCount; ++i) - cascade[uint3(coords.x, coords.y, i)] = float32_t4(accumulator.accumulation.data[i], 1.0f); -#else - outImage[uint3(coords.x, coords.y, 0)] = float32_t4(accumulator.accumulation, 1.0); -#endif - -#ifdef PERSISTENT_WORKGROUPS - } -#endif -} \ No newline at end of file diff --git a/31_HLSLPathTracer/app_resources/hlsl/render_rwmc_common.hlsl b/31_HLSLPathTracer/app_resources/hlsl/render_rwmc_common.hlsl index 540aadf76..7f1794b15 100644 --- a/31_HLSLPathTracer/app_resources/hlsl/render_rwmc_common.hlsl +++ b/31_HLSLPathTracer/app_resources/hlsl/render_rwmc_common.hlsl @@ -1,6 +1,7 @@ #ifndef _PATHTRACER_EXAMPLE_RENDER_RWMC_COMMON_INCLUDED_ #define _PATHTRACER_EXAMPLE_RENDER_RWMC_COMMON_INCLUDED_ #include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include "rwmc_common.hlsl" #include "nbl/builtin/hlsl/rwmc/SplattingParameters.hlsl" #include "render_common.hlsl" diff --git a/31_HLSLPathTracer/app_resources/hlsl/resolve.comp.hlsl b/31_HLSLPathTracer/app_resources/hlsl/resolve.comp.hlsl index c0982e9f2..346ff7322 100644 --- a/31_HLSLPathTracer/app_resources/hlsl/resolve.comp.hlsl +++ b/31_HLSLPathTracer/app_resources/hlsl/resolve.comp.hlsl @@ -47,7 +47,8 @@ int32_t2 getImageExtents() } [numthreads(ResolveWorkgroupSizeX, ResolveWorkgroupSizeY, 1)] -void main(uint32_t3 threadID : SV_DispatchThreadID) +[shader("compute")] +void resolve(uint32_t3 threadID : SV_DispatchThreadID) { const int32_t2 coords = int32_t2(threadID.x, threadID.y); const int32_t2 imageExtents = getImageExtents(); diff --git a/31_HLSLPathTracer/app_resources/hlsl/resolve_common.hlsl b/31_HLSLPathTracer/app_resources/hlsl/resolve_common.hlsl index 66fb20acb..ec13c0080 100644 --- a/31_HLSLPathTracer/app_resources/hlsl/resolve_common.hlsl +++ b/31_HLSLPathTracer/app_resources/hlsl/resolve_common.hlsl @@ -1,6 +1,7 @@ #ifndef _PATHTRACER_EXAMPLE_RESOLVE_COMMON_INCLUDED_ #define _PATHTRACER_EXAMPLE_RESOLVE_COMMON_INCLUDED_ #include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include "rwmc_common.hlsl" #include "nbl/builtin/hlsl/rwmc/ResolveParameters.hlsl" struct ResolvePushConstants @@ -10,6 +11,4 @@ struct ResolvePushConstants NBL_CONSTEXPR uint32_t ResolveWorkgroupSizeX = 32u; NBL_CONSTEXPR uint32_t ResolveWorkgroupSizeY = 16u; -NBL_CONSTEXPR uint32_t CascadeCount = 6u; - #endif diff --git a/31_HLSLPathTracer/app_resources/hlsl/rwmc_common.hlsl b/31_HLSLPathTracer/app_resources/hlsl/rwmc_common.hlsl new file mode 100644 index 000000000..77020ce17 --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/rwmc_common.hlsl @@ -0,0 +1,8 @@ +#ifndef _PATHTRACER_EXAMPLE_RWMC_COMMON_INCLUDED_ +#define _PATHTRACER_EXAMPLE_RWMC_COMMON_INCLUDED_ + +#include "nbl/builtin/hlsl/cpp_compat.hlsl" + +NBL_CONSTEXPR uint32_t CascadeCount = 6u; + +#endif diff --git a/31_HLSLPathTracer/app_resources/hlsl/scene_base.hlsl b/31_HLSLPathTracer/app_resources/hlsl/scene_base.hlsl index 070a7c164..59a9f3c57 100644 --- a/31_HLSLPathTracer/app_resources/hlsl/scene_base.hlsl +++ b/31_HLSLPathTracer/app_resources/hlsl/scene_base.hlsl @@ -1,7 +1,13 @@ -#ifndef _PATHTRACER_EXAMPLE_SCENE_BASE_INCLUDED_ -#define _PATHTRACER_EXAMPLE_SCENE_BASE_INCLUDED_ +#if !defined(PATHTRACER_SCENE_BASE_MULTI_INCLUDE) + #ifndef _PATHTRACER_EXAMPLE_SCENE_BASE_INCLUDED_ + #define _PATHTRACER_EXAMPLE_SCENE_BASE_INCLUDED_ + #define PATHTRACER_SCENE_BASE_EMIT_BODY 1 + #endif +#else + #define PATHTRACER_SCENE_BASE_EMIT_BODY 1 +#endif -#include "example_common.hlsl" +#if PATHTRACER_SCENE_BASE_EMIT_BODY using namespace nbl; using namespace hlsl; @@ -73,4 +79,5 @@ static const bxdfnode_type bxdfs[SceneBase::SCENE_BXDF_COUNT] = { bxdfnode_type::create(MaterialType::EMISSIVE, LightEminence) }; +#undef PATHTRACER_SCENE_BASE_EMIT_BODY #endif diff --git a/31_HLSLPathTracer/app_resources/hlsl/spirv/pt.compute.rectangle.proxy.hlsl b/31_HLSLPathTracer/app_resources/hlsl/spirv/pt.compute.rectangle.proxy.hlsl new file mode 100644 index 000000000..8e04bcfc1 --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/spirv/pt.compute.rectangle.proxy.hlsl @@ -0,0 +1 @@ +#include "pt.compute.variant.shared.hlsl" diff --git a/31_HLSLPathTracer/app_resources/hlsl/spirv/pt.compute.rectangle.rwmc.linear.proxy.hlsl b/31_HLSLPathTracer/app_resources/hlsl/spirv/pt.compute.rectangle.rwmc.linear.proxy.hlsl new file mode 100644 index 000000000..8e04bcfc1 --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/spirv/pt.compute.rectangle.rwmc.linear.proxy.hlsl @@ -0,0 +1 @@ +#include "pt.compute.variant.shared.hlsl" diff --git a/31_HLSLPathTracer/app_resources/hlsl/spirv/pt.compute.rectangle.rwmc.persistent.proxy.hlsl b/31_HLSLPathTracer/app_resources/hlsl/spirv/pt.compute.rectangle.rwmc.persistent.proxy.hlsl new file mode 100644 index 000000000..8e04bcfc1 --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/spirv/pt.compute.rectangle.rwmc.persistent.proxy.hlsl @@ -0,0 +1 @@ +#include "pt.compute.variant.shared.hlsl" diff --git a/31_HLSLPathTracer/app_resources/hlsl/spirv/pt.compute.sphere.proxy.hlsl b/31_HLSLPathTracer/app_resources/hlsl/spirv/pt.compute.sphere.proxy.hlsl new file mode 100644 index 000000000..8e04bcfc1 --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/spirv/pt.compute.sphere.proxy.hlsl @@ -0,0 +1 @@ +#include "pt.compute.variant.shared.hlsl" diff --git a/31_HLSLPathTracer/app_resources/hlsl/spirv/pt.compute.sphere.rwmc.proxy.hlsl b/31_HLSLPathTracer/app_resources/hlsl/spirv/pt.compute.sphere.rwmc.proxy.hlsl new file mode 100644 index 000000000..8e04bcfc1 --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/spirv/pt.compute.sphere.rwmc.proxy.hlsl @@ -0,0 +1 @@ +#include "pt.compute.variant.shared.hlsl" diff --git a/31_HLSLPathTracer/app_resources/hlsl/spirv/pt.compute.triangle.linear.proxy.hlsl b/31_HLSLPathTracer/app_resources/hlsl/spirv/pt.compute.triangle.linear.proxy.hlsl new file mode 100644 index 000000000..5035e960d --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/spirv/pt.compute.triangle.linear.proxy.hlsl @@ -0,0 +1 @@ +#include "pt.compute.triangle.methods.shared.hlsl" diff --git a/31_HLSLPathTracer/app_resources/hlsl/spirv/pt.compute.triangle.methods.shared.hlsl b/31_HLSLPathTracer/app_resources/hlsl/spirv/pt.compute.triangle.methods.shared.hlsl new file mode 100644 index 000000000..405b25dd2 --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/spirv/pt.compute.triangle.methods.shared.hlsl @@ -0,0 +1,50 @@ +#pragma once + +#if !defined(PT_VARIANT_USE_RWMC) || !defined(PT_VARIANT_ENTRYPOINT_KIND) +#error Missing triangle method compile options +#endif + +#define PT_VARIANT_ENTRYPOINT_LINEAR 1 +#define PT_VARIANT_ENTRYPOINT_PERSISTENT 2 +#define PATH_TRACER_USE_RWMC PT_VARIANT_USE_RWMC +#if PT_VARIANT_ENTRYPOINT_KIND == PT_VARIANT_ENTRYPOINT_LINEAR +#define PATH_TRACER_ENABLE_LINEAR 1 +#define PATH_TRACER_ENABLE_PERSISTENT 0 +#elif PT_VARIANT_ENTRYPOINT_KIND == PT_VARIANT_ENTRYPOINT_PERSISTENT +#define PATH_TRACER_ENABLE_LINEAR 0 +#define PATH_TRACER_ENABLE_PERSISTENT 1 +#else +#error Unsupported PT_VARIANT_ENTRYPOINT_KIND +#endif + +#include "compute.render.common.hlsl" +#include "scene_triangle_light.hlsl" +#include "compute_render_scene_impl.hlsl" + +#if PT_VARIANT_ENTRYPOINT_KIND == PT_VARIANT_ENTRYPOINT_LINEAR +#define PATH_TRACER_ENTRYPOINT_NAME main +#define PATH_TRACER_ENTRYPOINT_POLYGON_METHOD PPM_APPROX_PROJECTED_SOLID_ANGLE +#include "compute.render.linear.entrypoints.hlsl" + +#define PATH_TRACER_ENTRYPOINT_NAME mainArea +#define PATH_TRACER_ENTRYPOINT_POLYGON_METHOD PPM_AREA +#include "compute.render.linear.entrypoints.hlsl" + +#define PATH_TRACER_ENTRYPOINT_NAME mainSolidAngle +#define PATH_TRACER_ENTRYPOINT_POLYGON_METHOD PPM_SOLID_ANGLE +#include "compute.render.linear.entrypoints.hlsl" +#endif + +#if PT_VARIANT_ENTRYPOINT_KIND == PT_VARIANT_ENTRYPOINT_PERSISTENT +#define PATH_TRACER_ENTRYPOINT_NAME mainPersistent +#define PATH_TRACER_ENTRYPOINT_POLYGON_METHOD PPM_APPROX_PROJECTED_SOLID_ANGLE +#include "compute.render.persistent.entrypoints.hlsl" + +#define PATH_TRACER_ENTRYPOINT_NAME mainPersistentArea +#define PATH_TRACER_ENTRYPOINT_POLYGON_METHOD PPM_AREA +#include "compute.render.persistent.entrypoints.hlsl" + +#define PATH_TRACER_ENTRYPOINT_NAME mainPersistentSolidAngle +#define PATH_TRACER_ENTRYPOINT_POLYGON_METHOD PPM_SOLID_ANGLE +#include "compute.render.persistent.entrypoints.hlsl" +#endif diff --git a/31_HLSLPathTracer/app_resources/hlsl/spirv/pt.compute.triangle.persistent.proxy.hlsl b/31_HLSLPathTracer/app_resources/hlsl/spirv/pt.compute.triangle.persistent.proxy.hlsl new file mode 100644 index 000000000..5035e960d --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/spirv/pt.compute.triangle.persistent.proxy.hlsl @@ -0,0 +1 @@ +#include "pt.compute.triangle.methods.shared.hlsl" diff --git a/31_HLSLPathTracer/app_resources/hlsl/spirv/pt.compute.triangle.rwmc.linear.proxy.hlsl b/31_HLSLPathTracer/app_resources/hlsl/spirv/pt.compute.triangle.rwmc.linear.proxy.hlsl new file mode 100644 index 000000000..5035e960d --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/spirv/pt.compute.triangle.rwmc.linear.proxy.hlsl @@ -0,0 +1 @@ +#include "pt.compute.triangle.methods.shared.hlsl" diff --git a/31_HLSLPathTracer/app_resources/hlsl/spirv/pt.compute.triangle.rwmc.persistent.proxy.hlsl b/31_HLSLPathTracer/app_resources/hlsl/spirv/pt.compute.triangle.rwmc.persistent.proxy.hlsl new file mode 100644 index 000000000..5035e960d --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/spirv/pt.compute.triangle.rwmc.persistent.proxy.hlsl @@ -0,0 +1 @@ +#include "pt.compute.triangle.methods.shared.hlsl" diff --git a/31_HLSLPathTracer/app_resources/hlsl/spirv/pt.compute.variant.shared.hlsl b/31_HLSLPathTracer/app_resources/hlsl/spirv/pt.compute.variant.shared.hlsl new file mode 100644 index 000000000..39c46b402 --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/spirv/pt.compute.variant.shared.hlsl @@ -0,0 +1,52 @@ +#pragma once + +#if !defined(PT_VARIANT_USE_RWMC) || !defined(PT_VARIANT_SCENE_KIND) || !defined(PT_VARIANT_ENTRYPOINT_KIND) +#error Missing path tracer variant compile options +#endif + +#define PT_VARIANT_SCENE_SPHERE 0 +#define PT_VARIANT_SCENE_RECTANGLE 2 +#define PT_VARIANT_ENTRYPOINT_COMBINED 0 +#define PT_VARIANT_ENTRYPOINT_LINEAR 1 +#define PT_VARIANT_ENTRYPOINT_PERSISTENT 2 + +#define PATH_TRACER_USE_RWMC PT_VARIANT_USE_RWMC +#if PT_VARIANT_ENTRYPOINT_KIND == PT_VARIANT_ENTRYPOINT_COMBINED +#define PATH_TRACER_ENABLE_LINEAR 1 +#define PATH_TRACER_ENABLE_PERSISTENT 1 +#elif PT_VARIANT_ENTRYPOINT_KIND == PT_VARIANT_ENTRYPOINT_LINEAR +#define PATH_TRACER_ENABLE_LINEAR 1 +#define PATH_TRACER_ENABLE_PERSISTENT 0 +#elif PT_VARIANT_ENTRYPOINT_KIND == PT_VARIANT_ENTRYPOINT_PERSISTENT +#define PATH_TRACER_ENABLE_LINEAR 0 +#define PATH_TRACER_ENABLE_PERSISTENT 1 +#else +#error Unsupported PT_VARIANT_ENTRYPOINT_KIND +#endif + +#include "compute.render.common.hlsl" +#if PT_VARIANT_SCENE_KIND == PT_VARIANT_SCENE_SPHERE +#define PT_VARIANT_POLYGON_METHOD PPM_SOLID_ANGLE +#include "scene_sphere_light.hlsl" +#elif PT_VARIANT_SCENE_KIND == PT_VARIANT_SCENE_RECTANGLE +#define PT_VARIANT_POLYGON_METHOD PPM_SOLID_ANGLE +#include "scene_rectangle_light.hlsl" +#else +#error Unsupported PT_VARIANT_SCENE_KIND +#endif +#include "compute_render_scene_impl.hlsl" + +#if PT_VARIANT_ENTRYPOINT_KIND == PT_VARIANT_ENTRYPOINT_COMBINED +#define PATH_TRACER_ENTRYPOINT_POLYGON_METHOD PT_VARIANT_POLYGON_METHOD +#include "compute.render.linear.entrypoints.hlsl" +#define PATH_TRACER_ENTRYPOINT_POLYGON_METHOD PT_VARIANT_POLYGON_METHOD +#include "compute.render.persistent.entrypoints.hlsl" +#elif PT_VARIANT_ENTRYPOINT_KIND == PT_VARIANT_ENTRYPOINT_LINEAR +#define PATH_TRACER_ENTRYPOINT_POLYGON_METHOD PT_VARIANT_POLYGON_METHOD +#include "compute.render.linear.entrypoints.hlsl" +#else +#define PATH_TRACER_ENTRYPOINT_POLYGON_METHOD PT_VARIANT_POLYGON_METHOD +#include "compute.render.persistent.entrypoints.hlsl" +#endif + +#undef PT_VARIANT_POLYGON_METHOD diff --git a/31_HLSLPathTracer/app_resources/hlsl/spirv/pt.misc.proxy.hlsl b/31_HLSLPathTracer/app_resources/hlsl/spirv/pt.misc.proxy.hlsl new file mode 100644 index 000000000..8a2882574 --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/spirv/pt.misc.proxy.hlsl @@ -0,0 +1,5 @@ +#include "present.frag.hlsl" + +#define pc ex31_imgui_pc +#include "imgui.unified.hlsl" +#undef pc diff --git a/31_HLSLPathTracer/include/nbl/builtin/hlsl/bxdf/ndf/microfacet_to_light_transform.hlsl b/31_HLSLPathTracer/include/nbl/builtin/hlsl/bxdf/ndf/microfacet_to_light_transform.hlsl new file mode 100644 index 000000000..bdcd5ae31 --- /dev/null +++ b/31_HLSLPathTracer/include/nbl/builtin/hlsl/bxdf/ndf/microfacet_to_light_transform.hlsl @@ -0,0 +1,119 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_BUILTIN_HLSL_BXDF_NDF_MICROFACET_LIGHT_TRANSFORM_INCLUDED_ +#define _NBL_BUILTIN_HLSL_BXDF_NDF_MICROFACET_LIGHT_TRANSFORM_INCLUDED_ + +#include "nbl/builtin/hlsl/limits.hlsl" +#include "nbl/builtin/hlsl/bxdf/common.hlsl" + +namespace nbl +{ +namespace hlsl +{ +namespace bxdf +{ +namespace ndf +{ + +enum MicrofacetTransformTypes : uint16_t +{ + MTT_REFLECT = 0b01, + MTT_REFRACT = 0b10, + MTT_REFLECT_REFRACT = 0b11 +}; + +namespace microfacet_transform_concepts +{ +#define NBL_CONCEPT_NAME QuantQuery +#define NBL_CONCEPT_TPLT_PRM_KINDS (typename) +#define NBL_CONCEPT_TPLT_PRM_NAMES (T) +#define NBL_CONCEPT_PARAM_0 (query, T) +NBL_CONCEPT_BEGIN(1) +#define query NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 +NBL_CONCEPT_END( + ((NBL_CONCEPT_REQ_TYPE)(T::scalar_type)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((query.getVdotHLdotH()), ::nbl::hlsl::is_same_v, typename T::scalar_type)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((query.getNeg_rcp2_VdotH_etaLdotH()), ::nbl::hlsl::is_same_v, typename T::scalar_type)) +); +#undef query +#include +} + +template +struct DualMeasureQuantQuery +{ + using scalar_type = T; + + template + static DualMeasureQuantQuery create(NBL_CONST_REF_ARG(Interaction) interaction, NBL_CONST_REF_ARG(MicrofacetCache) cache, scalar_type orientedEta) + { + DualMeasureQuantQuery retval; + retval.VdotHLdotH = cache.getVdotHLdotH(); + const scalar_type VdotH = cache.getVdotH(); + const scalar_type VdotH_etaLdotH = hlsl::mix(VdotH + orientedEta * cache.getLdotH(), + VdotH / orientedEta + cache.getLdotH(), + interaction.getPathOrigin() == PathOrigin::PO_SENSOR); + retval.neg_rcp2_refractionDenom = scalar_type(-1.0) / (VdotH_etaLdotH * VdotH_etaLdotH); + return retval; + } + + scalar_type getVdotHLdotH() NBL_CONST_MEMBER_FUNC { return VdotHLdotH; } + scalar_type getNeg_rcp2_refractionDenom() NBL_CONST_MEMBER_FUNC { return neg_rcp2_refractionDenom ; } + + scalar_type VdotHLdotH; + scalar_type neg_rcp2_refractionDenom; +}; + + +template +struct SDualMeasureQuant +{ + using value_type = T; + + T microfacetMeasure; + T projectedLightMeasure; +}; + +namespace impl +{ +template +struct createDualMeasureQuantity_helper +{ + using scalar_type = typename vector_traits::scalar_type; + + static SDualMeasureQuant __call(const T microfacetMeasure, scalar_type clampedNdotV, scalar_type clampedNdotL, scalar_type VdotHLdotH, scalar_type neg_rcp2_refractionDenom) + { + assert(clampedNdotV >= scalar_type(0.0) && clampedNdotL >= scalar_type(0.0)); + SDualMeasureQuant retval; + retval.microfacetMeasure = microfacetMeasure; + const bool transmitted = reflect_refract==MTT_REFRACT || (reflect_refract!=MTT_REFLECT && VdotHLdotH < scalar_type(0.0)); + retval.projectedLightMeasure = microfacetMeasure * hlsl::mix(scalar_type(0.25),VdotHLdotH*neg_rcp2_refractionDenom,transmitted)/clampedNdotV; + return retval; + } +}; +} + +template +SDualMeasureQuant createDualMeasureQuantity(const T specialMeasure, typename vector_traits::scalar_type clampedNdotV, typename vector_traits::scalar_type clampedNdotL) +{ + typename vector_traits::scalar_type dummy; + return impl::createDualMeasureQuantity_helper::__call(specialMeasure,clampedNdotV,clampedNdotL,dummy,dummy); +} +template +SDualMeasureQuant createDualMeasureQuantity(const T specialMeasure, typename vector_traits::scalar_type clampedNdotV, typename vector_traits::scalar_type clampedNdotL, typename vector_traits::scalar_type VdotHLdotH, typename vector_traits::scalar_type neg_rcp2_refractionDenom) +{ + return impl::createDualMeasureQuantity_helper::__call(specialMeasure,clampedNdotV,clampedNdotL,VdotHLdotH,neg_rcp2_refractionDenom); +} +template +SDualMeasureQuant createDualMeasureQuantity(const T specialMeasure, typename vector_traits::scalar_type clampedNdotV, typename vector_traits::scalar_type clampedNdotL, NBL_CONST_REF_ARG(Query) query) +{ + return impl::createDualMeasureQuantity_helper::__call(specialMeasure,clampedNdotV,clampedNdotL,query.getVdotHLdotH(),query.getNeg_rcp2_refractionDenom()); +} + +} +} +} +} + +#endif diff --git a/31_HLSLPathTracer/include/nbl/this_example/render_variant_enums.hlsl b/31_HLSLPathTracer/include/nbl/this_example/render_variant_enums.hlsl new file mode 100644 index 000000000..b0095aa6d --- /dev/null +++ b/31_HLSLPathTracer/include/nbl/this_example/render_variant_enums.hlsl @@ -0,0 +1,20 @@ +#ifndef __NBL_THIS_EXAMPLE_RENDER_VARIANT_ENUMS_HLSL_INCLUDED__ +#define __NBL_THIS_EXAMPLE_RENDER_VARIANT_ENUMS_HLSL_INCLUDED__ + +enum E_LIGHT_GEOMETRY : uint8_t +{ + ELG_SPHERE, + ELG_TRIANGLE, + ELG_RECTANGLE, + ELG_COUNT +}; + +enum E_POLYGON_METHOD : uint8_t +{ + EPM_AREA, + EPM_SOLID_ANGLE, + EPM_PROJECTED_SOLID_ANGLE, + EPM_COUNT +}; + +#endif diff --git a/31_HLSLPathTracer/include/nbl/this_example/render_variant_strings.hpp b/31_HLSLPathTracer/include/nbl/this_example/render_variant_strings.hpp new file mode 100644 index 000000000..0d7434758 --- /dev/null +++ b/31_HLSLPathTracer/include/nbl/this_example/render_variant_strings.hpp @@ -0,0 +1,98 @@ +#ifndef __NBL_THIS_EXAMPLE_RENDER_VARIANT_STRINGS_HPP_INCLUDED__ +#define __NBL_THIS_EXAMPLE_RENDER_VARIANT_STRINGS_HPP_INCLUDED__ + +#include +#include + +#include "nbl/system/to_string.h" +#include "nbl/this_example/render_variant_enums.hlsl" + +namespace nbl::system::impl +{ +template<> +struct to_string_helper +{ + static inline std::string __call(const E_LIGHT_GEOMETRY value) + { + switch (value) + { + case ELG_SPHERE: + return "ELG_SPHERE"; + case ELG_TRIANGLE: + return "ELG_TRIANGLE"; + case ELG_RECTANGLE: + return "ELG_RECTANGLE"; + default: + return "ERROR (geometry)"; + } + } +}; + +template<> +struct to_string_helper +{ + static inline std::string __call(const E_POLYGON_METHOD value) + { + switch (value) + { + case EPM_AREA: + return "Area"; + case EPM_SOLID_ANGLE: + return "Solid Angle"; + case EPM_PROJECTED_SOLID_ANGLE: + return "Projected Solid Angle"; + default: + return "ERROR (method)"; + } + } +}; +} + +namespace nbl::this_example +{ +inline const auto& getLightGeometryNameStorage() +{ + static const auto names = std::to_array({ + system::to_string(ELG_SPHERE), + system::to_string(ELG_TRIANGLE), + system::to_string(ELG_RECTANGLE) + }); + return names; +} + +inline const auto& getLightGeometryNamePointers() +{ + static const auto ptrs = [] { + std::array retval = {}; + const auto& names = getLightGeometryNameStorage(); + for (size_t i = 0u; i < names.size(); ++i) + retval[i] = names[i].c_str(); + return retval; + }(); + return ptrs; +} + +inline const auto& getPolygonMethodNameStorage() +{ + static const auto names = std::to_array({ + system::to_string(EPM_AREA), + system::to_string(EPM_SOLID_ANGLE), + system::to_string(EPM_PROJECTED_SOLID_ANGLE) + }); + return names; +} + +inline const auto& getPolygonMethodNamePointers() +{ + static const auto ptrs = [] { + std::array retval = {}; + const auto& names = getPolygonMethodNameStorage(); + for (size_t i = 0u; i < names.size(); ++i) + retval[i] = names[i].c_str(); + return retval; + }(); + return ptrs; +} +} + +#endif diff --git a/31_HLSLPathTracer/main.cpp b/31_HLSLPathTracer/main.cpp index f6f180a2c..b2bb96cd3 100644 --- a/31_HLSLPathTracer/main.cpp +++ b/31_HLSLPathTracer/main.cpp @@ -2,17 +2,34 @@ // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h +#include "argparse/argparse.hpp" #include "nbl/examples/examples.hpp" #include "nbl/this_example/transform.hpp" +#include "nbl/this_example/render_variant_strings.hpp" #include "nbl/ext/FullScreenTriangle/FullScreenTriangle.h" #include "nbl/builtin/hlsl/math/thin_lens_projection.hlsl" #include "nbl/this_example/common.hpp" +#include "nbl/this_example/builtin/build/spirv/keys.hpp" #include "nbl/builtin/hlsl/colorspace/encodeCIEXYZ.hlsl" #include "nbl/builtin/hlsl/sampling/quantized_sequence.hlsl" +#include "nbl/asset/utils/ISPIRVEntryPointTrimmer.h" +#include "nbl/system/ModuleLookupUtils.h" #include "app_resources/hlsl/render_common.hlsl" #include "app_resources/hlsl/render_rwmc_common.hlsl" #include "app_resources/hlsl/resolve_common.hlsl" +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "nlohmann/json.hpp" + using namespace nbl; using namespace core; using namespace hlsl; @@ -30,32 +47,13 @@ class HLSLComputePathtracer final : public SimpleWindowedApplication, public Bui using asset_base_t = BuiltinResourcesApplication; using clock_t = std::chrono::steady_clock; - enum E_LIGHT_GEOMETRY : uint8_t - { - ELG_SPHERE, - ELG_TRIANGLE, - ELG_RECTANGLE, - ELG_COUNT - }; - constexpr static inline uint32_t2 WindowDimensions = { 1280, 720 }; constexpr static inline uint32_t MaxFramesInFlight = 5; + static constexpr size_t BinaryToggleCount = 2ull; + static constexpr std::string_view BuildConfigName = PATH_TRACER_BUILD_CONFIG_NAME; + static constexpr std::string_view RuntimeConfigFilename = "path_tracer.runtime.json"; static inline std::string DefaultImagePathsFile = "envmap/envmap_0.exr"; static inline std::string OwenSamplerFilePath = "owen_sampler_buffer.bin"; - static inline std::string PTHLSLShaderPath = "app_resources/hlsl/render.comp.hlsl"; - static inline std::array PTHLSLShaderVariants = { - "SPHERE_LIGHT", - "TRIANGLE_LIGHT", - "RECTANGLE_LIGHT" - }; - static inline std::string ResolveShaderPath = "app_resources/hlsl/resolve.comp.hlsl"; - static inline std::string PresentShaderPath = "app_resources/hlsl/present.frag.hlsl"; - - const char* shaderNames[E_LIGHT_GEOMETRY::ELG_COUNT] = { - "ELG_SPHERE", - "ELG_TRIANGLE", - "ELG_RECTANGLE" - }; public: inline HLSLComputePathtracer(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) @@ -63,14 +61,12 @@ class HLSLComputePathtracer final : public SimpleWindowedApplication, public Bui inline bool isComputeOnly() const override { return false; } - inline video::SPhysicalDeviceLimits getRequiredDeviceLimits() const override + inline core::bitflag getLogLevelMask() override { - video::SPhysicalDeviceLimits retval = device_base_t::getRequiredDeviceLimits(); - retval.storagePushConstant16 = true; - return retval; + return core::bitflag(system::ILogger::ELL_INFO) | system::ILogger::ELL_WARNING | system::ILogger::ELL_PERFORMANCE | system::ILogger::ELL_ERROR; } - virtual SPhysicalDeviceFeatures getPreferredDeviceFeatures() const override + inline video::SPhysicalDeviceFeatures getPreferredDeviceFeatures() const override { auto retval = device_base_t::getPreferredDeviceFeatures(); retval.pipelineExecutableInfo = true; @@ -107,6 +103,8 @@ class HLSLComputePathtracer final : public SimpleWindowedApplication, public Bui inline bool onAppInitialized(smart_refctd_ptr&& system) override { + m_startupBeganAt = clock_t::now(); + // Init systems { m_inputSystem = make_smart_refctd_ptr(logger_opt_smart_ptr(smart_refctd_ptr(m_logger))); @@ -123,6 +121,8 @@ class HLSLComputePathtracer final : public SimpleWindowedApplication, public Bui return logFail("Failed to create semaphore!"); } + if (!parseCommandLine()) + return false; // Create renderpass and init surface nbl::video::IGPURenderpass* renderpass; { @@ -165,7 +165,6 @@ class HLSLComputePathtracer final : public SimpleWindowedApplication, public Bui if (!m_surface || !m_surface->init(gQueue, std::move(scResources), swapchainParams.sharedParams)) return logFail("Could not create Window & Surface or initialize the Surface!"); } - // Create command pool and buffers { auto gQueue = getGraphicsQueue(); @@ -176,7 +175,21 @@ class HLSLComputePathtracer final : public SimpleWindowedApplication, public Bui if (!m_cmdPool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, { m_cmdBufs.data(), MaxFramesInFlight })) return logFail("Couldn't create Command Buffer!"); } - + { + m_scratchSemaphore = m_device->createSemaphore(0); + if (!m_scratchSemaphore) + return logFail("Could not create Scratch Semaphore"); + m_scratchSemaphore->setObjectDebugName("Scratch Semaphore"); + m_intendedSubmit.queue = getGraphicsQueue(); + m_intendedSubmit.waitSemaphores = {}; + m_intendedSubmit.scratchCommandBuffers = {}; + m_intendedSubmit.scratchSemaphore = { + .semaphore = m_scratchSemaphore.get(), + .value = 0, + .stageMask = PIPELINE_STAGE_FLAGS::ALL_TRANSFER_BITS + }; + } + initializePipelineCache(); ISampler::SParams samplerParams = { .AnisotropicFilter = 0 }; @@ -289,202 +302,101 @@ class HLSLComputePathtracer final : public SimpleWindowedApplication, public Bui } m_presentDescriptorSet = presentDSPool->createDescriptorSet(gpuPresentDescriptorSetLayout); - // Create Shaders - auto loadAndCompileHLSLShader = [&](const std::string& pathToShader, const std::string& defineMacro = "", bool persistentWorkGroups = false, bool rwmc = false) -> smart_refctd_ptr - { - IAssetLoader::SAssetLoadParams lp = {}; - lp.workingDirectory = localInputCWD; - auto assetBundle = m_assetMgr->getAsset(pathToShader, lp); - const auto assets = assetBundle.getContents(); - if (assets.empty()) - { - m_logger->log("Could not load shader: ", ILogger::ELL_ERROR, pathToShader); - std::exit(-1); - } - - auto source = smart_refctd_ptr_static_cast(assets[0]); - // The down-cast should not fail! - assert(source); - - auto compiler = make_smart_refctd_ptr(smart_refctd_ptr(m_system)); - CHLSLCompiler::SOptions options = {}; - options.stage = IShader::E_SHADER_STAGE::ESS_COMPUTE; - options.preprocessorOptions.targetSpirvVersion = m_device->getPhysicalDevice()->getLimits().spirvVersion; - options.spirvOptimizer = nullptr; -#ifndef _NBL_DEBUG - ISPIRVOptimizer::E_OPTIMIZER_PASS optPasses = ISPIRVOptimizer::EOP_STRIP_DEBUG_INFO; - auto opt = make_smart_refctd_ptr(std::span(&optPasses, 1)); - options.spirvOptimizer = opt.get(); -#endif - options.debugInfoFlags |= IShaderCompiler::E_DEBUG_INFO_FLAGS::EDIF_LINE_BIT; - options.preprocessorOptions.sourceIdentifier = source->getFilepathHint(); - options.preprocessorOptions.logger = m_logger.get(); - options.preprocessorOptions.includeFinder = compiler->getDefaultIncludeFinder(); - - core::vector defines; - defines.reserve(3); - if (!defineMacro.empty()) - defines.push_back({ defineMacro, "" }); - if(persistentWorkGroups) - defines.push_back({ "PERSISTENT_WORKGROUPS", "1" }); - if(rwmc) - defines.push_back({ "RWMC_ENABLED", "" }); - - options.preprocessorOptions.extraDefines = defines; - - source = compiler->compileToSPIRV((const char*)source->getContent()->getPointer(), options); - - auto shader = m_device->compileShader({ source.get(), nullptr, nullptr, nullptr }); - if (!shader) - { - m_logger->log("HLSL shader creationed failed: %s!", ILogger::ELL_ERROR, pathToShader); - std::exit(-1); - } - - return shader; - }; - const uint32_t deviceMinSubgroupSize = m_device->getPhysicalDevice()->getLimits().minSubgroupSize; - const bool pipelineExecutableInfo = m_device->getEnabledFeatures().pipelineExecutableInfo; - auto getComputePipelineCreationParams = [deviceMinSubgroupSize, pipelineExecutableInfo](IShader* shader, IGPUPipelineLayout* pipelineLayout) -> IGPUComputePipeline::SCreationParams - { - IGPUComputePipeline::SCreationParams params = {}; - params.layout = pipelineLayout; - params.shader.shader = shader; - params.shader.entryPoint = "main"; - params.shader.entries = nullptr; - params.cached.requireFullSubgroups = true; - params.shader.requiredSubgroupSize = static_cast(hlsl::log2(float(deviceMinSubgroupSize))); - if (pipelineExecutableInfo) - { - params.flags |= IGPUComputePipeline::SCreationParams::FLAGS::CAPTURE_STATISTICS; - params.flags |= IGPUComputePipeline::SCreationParams::FLAGS::CAPTURE_INTERNAL_REPRESENTATIONS; - } - return params; - }; + m_requiredSubgroupSize = static_cast(hlsl::log2(float(deviceMinSubgroupSize))); - // Create compute pipelines { - for (int index = 0; index < E_LIGHT_GEOMETRY::ELG_COUNT; index++) - { - const nbl::asset::SPushConstantRange pcRange = { - .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, - .offset = 0, - .size = sizeof(RenderPushConstants) - }; - auto ptPipelineLayout = m_device->createPipelineLayout( - { &pcRange, 1 }, - core::smart_refctd_ptr(gpuDescriptorSetLayout), - nullptr, - nullptr, - nullptr - ); - if (!ptPipelineLayout) - return logFail("Failed to create Pathtracing pipeline layout"); - - const nbl::asset::SPushConstantRange rwmcPcRange = { - .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, - .offset = 0, - .size = sizeof(RenderRWMCPushConstants) - }; - auto rwmcPtPipelineLayout = m_device->createPipelineLayout( - { &rwmcPcRange, 1 }, - core::smart_refctd_ptr(gpuDescriptorSetLayout), - nullptr, - nullptr, - nullptr - ); - if (!rwmcPtPipelineLayout) - return logFail("Failed to create RWMC Pathtracing pipeline layout"); - - { - auto ptShader = loadAndCompileHLSLShader(PTHLSLShaderPath, PTHLSLShaderVariants[index]); - auto params = getComputePipelineCreationParams(ptShader.get(), ptPipelineLayout.get()); - - if (!m_device->createComputePipelines(nullptr, { ¶ms, 1 }, m_PTHLSLPipelines.data() + index)) - return logFail("Failed to create HLSL compute pipeline!\n"); - - if (m_device->getEnabledFeatures().pipelineExecutableInfo) - { - auto report = system::to_string(m_PTHLSLPipelines[index]->getExecutableInfo()); - m_logger->log("%s Pipeline Executable Report:\n%s", ILogger::ELL_PERFORMANCE, shaderNames[index], report.c_str()); - } - } - { - auto ptShader = loadAndCompileHLSLShader(PTHLSLShaderPath, PTHLSLShaderVariants[index], true); - auto params = getComputePipelineCreationParams(ptShader.get(), ptPipelineLayout.get()); - - if (!m_device->createComputePipelines(nullptr, { ¶ms, 1 }, m_PTHLSLPersistentWGPipelines.data() + index)) - return logFail("Failed to create HLSL PersistentWG compute pipeline!\n"); - - if (m_device->getEnabledFeatures().pipelineExecutableInfo) - { - auto report = system::to_string(m_PTHLSLPersistentWGPipelines[index]->getExecutableInfo()); - m_logger->log("%s PersistentWG Pipeline Executable Report:\n%s", ILogger::ELL_PERFORMANCE, shaderNames[index], report.c_str()); - } - } - - // rwmc pipelines - { - auto ptShader = loadAndCompileHLSLShader(PTHLSLShaderPath, PTHLSLShaderVariants[index], false, true); - auto params = getComputePipelineCreationParams(ptShader.get(), rwmcPtPipelineLayout.get()); - - if (!m_device->createComputePipelines(nullptr, { ¶ms, 1 }, m_PTHLSLPipelinesRWMC.data() + index)) - return logFail("Failed to create HLSL RWMC compute pipeline!\n"); - - if (m_device->getEnabledFeatures().pipelineExecutableInfo) - { - auto report = system::to_string(m_PTHLSLPipelinesRWMC[index]->getExecutableInfo()); - m_logger->log("%s RWMC Pipeline Executable Report:\n%s", ILogger::ELL_PERFORMANCE, shaderNames[index], report.c_str()); - } - } - { - auto ptShader = loadAndCompileHLSLShader(PTHLSLShaderPath, PTHLSLShaderVariants[index], true, true); - auto params = getComputePipelineCreationParams(ptShader.get(), rwmcPtPipelineLayout.get()); - - if (!m_device->createComputePipelines(nullptr, { ¶ms, 1 }, m_PTHLSLPersistentWGPipelinesRWMC.data() + index)) - return logFail("Failed to create HLSL RWMC PersistentWG compute pipeline!\n"); - - if (m_device->getEnabledFeatures().pipelineExecutableInfo) - { - auto report = system::to_string(m_PTHLSLPersistentWGPipelinesRWMC[index]->getExecutableInfo()); - m_logger->log("%s RWMC PersistentWG Pipeline Executable Report:\n%s", ILogger::ELL_PERFORMANCE, shaderNames[index], report.c_str()); - } - } - } + const nbl::asset::SPushConstantRange pcRange = { + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .offset = 0, + .size = sizeof(RenderPushConstants) + }; + m_renderPipelineLayout = m_device->createPipelineLayout( + { &pcRange, 1 }, + core::smart_refctd_ptr(gpuDescriptorSetLayout), + nullptr, + nullptr, + nullptr + ); + if (!m_renderPipelineLayout) + return logFail("Failed to create Pathtracing pipeline layout"); } - // Create resolve pipelines { const nbl::asset::SPushConstantRange pcRange = { - .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, - .offset = 0, - .size = sizeof(ResolvePushConstants) + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .offset = 0, + .size = sizeof(RenderRWMCPushConstants) }; + m_rwmcRenderPipelineLayout = m_device->createPipelineLayout( + { &pcRange, 1 }, + core::smart_refctd_ptr(gpuDescriptorSetLayout), + nullptr, + nullptr, + nullptr + ); + if (!m_rwmcRenderPipelineLayout) + return logFail("Failed to create RWMC Pathtracing pipeline layout"); + } - auto pipelineLayout = m_device->createPipelineLayout( + { + const nbl::asset::SPushConstantRange pcRange = { + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .offset = 0u, + .size = sizeof(ResolvePushConstants) + }; + m_resolvePipelineState.layout = m_device->createPipelineLayout( { &pcRange, 1 }, core::smart_refctd_ptr(gpuDescriptorSetLayout) ); - - if (!pipelineLayout) { + if (!m_resolvePipelineState.layout) return logFail("Failed to create resolve pipeline layout"); - } + } - { - auto shader = loadAndCompileHLSLShader(ResolveShaderPath); - auto params = getComputePipelineCreationParams(shader.get(), pipelineLayout.get()); + const auto ensureRenderShaderLoaded = [this](const E_LIGHT_GEOMETRY geometry, const bool persistentWorkGroups, const bool rwmc) -> bool + { + auto& shaderSlot = m_renderPipelines.getShaders(persistentWorkGroups, rwmc)[geometry]; + if (shaderSlot) + return true; + shaderSlot = loadRenderShader(geometry, persistentWorkGroups, rwmc); + return static_cast(shaderSlot); + }; + const auto ensureResolveShaderLoaded = [this]() -> bool + { + if (m_resolvePipelineState.shader) + return true; + m_resolvePipelineState.shader = loadPrecompiledShader(); + return static_cast(m_resolvePipelineState.shader); + }; - if (!m_device->createComputePipelines(nullptr, { ¶ms, 1 }, &m_resolvePipeline)) - return logFail("Failed to create HLSL resolve compute pipeline!\n"); + const auto startupGeometry = static_cast(guiControlled.PTPipeline); + if (!ensureRenderShaderLoaded(startupGeometry, guiControlled.usePersistentWorkGroups, guiControlled.useRWMC)) + return logFail("Failed to load current precompiled compute shader variant"); + if (guiControlled.useRWMC && !ensureResolveShaderLoaded()) + return logFail("Failed to load precompiled resolve compute shader"); + + ensureRenderPipeline( + startupGeometry, + guiControlled.usePersistentWorkGroups, + guiControlled.useRWMC, + static_cast(guiControlled.polygonMethod) + ); + if (guiControlled.useRWMC) + ensureResolvePipeline(); - if (m_device->getEnabledFeatures().pipelineExecutableInfo) + for (auto geometry = 0u; geometry < ELG_COUNT; ++geometry) + { + for (const auto persistentWorkGroups : { false, true }) + { + for (const auto rwmc : { false, true }) { - auto report = system::to_string(m_resolvePipeline->getExecutableInfo()); - m_logger->log("Resolve Pipeline Executable Report:\n%s", ILogger::ELL_PERFORMANCE, report.c_str()); + if (!ensureRenderShaderLoaded(static_cast(geometry), persistentWorkGroups, rwmc)) + return logFail("Failed to load precompiled compute shader variant"); } } } + if (!ensureResolveShaderLoaded()) + return logFail("Failed to load precompiled resolve compute shader"); // Create graphics pipeline { @@ -493,8 +405,7 @@ class HLSLComputePathtracer final : public SimpleWindowedApplication, public Bui if (!fsTriProtoPPln) return logFail("Failed to create Full Screen Triangle protopipeline or load its vertex shader!"); - // Load Fragment Shader - auto fragmentShader = loadAndCompileHLSLShader(PresentShaderPath); + auto fragmentShader = loadPrecompiledShader(); if (!fragmentShader) return logFail("Failed to Load and Compile Fragment Shader: lumaMeterShader!"); @@ -510,9 +421,10 @@ class HLSLComputePathtracer final : public SimpleWindowedApplication, public Bui nullptr, nullptr ); - m_presentPipeline = fsTriProtoPPln.createPipeline(fragSpec, presentLayout.get(), scRes->getRenderpass()); + m_presentPipeline = fsTriProtoPPln.createPipeline(fragSpec, presentLayout.get(), scRes->getRenderpass(), 0u, {}, hlsl::SurfaceTransform::FLAG_BITS::IDENTITY_BIT, m_pipelineCache.object.get()); if (!m_presentPipeline) return logFail("Could not create Graphics Pipeline!"); + m_pipelineCache.dirty = true; } } @@ -742,65 +654,80 @@ class HLSLComputePathtracer final : public SimpleWindowedApplication, public Bui { // TODO: do this better use asset manager to get the ICPUBuffer from `.bin` auto createBufferFromCacheFile = [this]( - system::path filename, - size_t bufferSize, - void *data, + const system::path& filePath, + size_t byteSize, + void* data, smart_refctd_ptr& buffer - ) -> std::pair, bool> + ) -> bool { ISystem::future_t> owenSamplerFileFuture; ISystem::future_t owenSamplerFileReadFuture; - size_t owenSamplerFileBytesRead; + size_t owenSamplerFileBytesRead = 0ull; - m_system->createFile(owenSamplerFileFuture, localOutputCWD / filename, IFile::ECF_READ); + m_system->createFile(owenSamplerFileFuture, filePath, IFile::ECF_READ); smart_refctd_ptr owenSamplerFile; if (owenSamplerFileFuture.wait()) { owenSamplerFileFuture.acquire().move_into(owenSamplerFile); if (!owenSamplerFile) - return { nullptr, false }; + return false; - owenSamplerFile->read(owenSamplerFileReadFuture, data, 0, bufferSize); + owenSamplerFile->read(owenSamplerFileReadFuture, data, 0, byteSize); if (owenSamplerFileReadFuture.wait()) { owenSamplerFileReadFuture.acquire().move_into(owenSamplerFileBytesRead); - if (owenSamplerFileBytesRead < bufferSize) - { - buffer = asset::ICPUBuffer::create({ sizeof(uint32_t) * bufferSize }); - return { owenSamplerFile, false }; - } + if (owenSamplerFileBytesRead < byteSize) + return false; - buffer = asset::ICPUBuffer::create({ { sizeof(uint32_t) * bufferSize }, data }); + buffer = asset::ICPUBuffer::create({ { byteSize }, data }); + return true; } } - return { owenSamplerFile, true }; + return false; }; - auto writeBufferIntoCacheFile = [this](smart_refctd_ptr file, size_t bufferSize, void* data) + auto writeBufferIntoCacheFile = [this](const system::path& filePath, size_t byteSize, const void* data) { + std::filesystem::create_directories(filePath.parent_path()); + + ISystem::future_t> owenSamplerFileFuture; ISystem::future_t owenSamplerFileWriteFuture; - size_t owenSamplerFileBytesWritten; + size_t owenSamplerFileBytesWritten = 0ull; + + m_system->createFile(owenSamplerFileFuture, filePath, IFile::ECF_WRITE); + if (!owenSamplerFileFuture.wait()) + return; + + smart_refctd_ptr file; + owenSamplerFileFuture.acquire().move_into(file); + if (!file) + return; - file->write(owenSamplerFileWriteFuture, data, 0, bufferSize); + file->write(owenSamplerFileWriteFuture, const_cast(data), 0, byteSize); if (owenSamplerFileWriteFuture.wait()) owenSamplerFileWriteFuture.acquire().move_into(owenSamplerFileBytesWritten); }; constexpr uint32_t quantizedDimensions = MaxBufferDimensions / 3u; - constexpr size_t bufferSize = quantizedDimensions * MaxSamplesBuffer; using sequence_type = sampling::QuantizedSequence; - std::array data = {}; + constexpr size_t sequenceCount = quantizedDimensions * MaxSamplesBuffer; + constexpr size_t sequenceByteSize = sequenceCount * sizeof(sequence_type); + std::array data = {}; smart_refctd_ptr sampleSeq; - auto cacheBufferResult = createBufferFromCacheFile(sharedOutputCWD/OwenSamplerFilePath, bufferSize, data.data(), sampleSeq); - if (!cacheBufferResult.second) + const auto packagedOwenSamplerPath = sharedInputCWD / OwenSamplerFilePath; + const auto generatedOwenSamplerPath = sharedOutputCWD / OwenSamplerFilePath; + const bool cacheLoaded = + createBufferFromCacheFile(packagedOwenSamplerPath, sequenceByteSize, data.data(), sampleSeq) || + createBufferFromCacheFile(generatedOwenSamplerPath, sequenceByteSize, data.data(), sampleSeq); + if (!cacheLoaded) { core::OwenSampler sampler(MaxBufferDimensions, 0xdeadbeefu); ICPUBuffer::SCreationParams params = {}; - params.size = quantizedDimensions * MaxSamplesBuffer * sizeof(sequence_type); + params.size = sequenceByteSize; sampleSeq = ICPUBuffer::create(std::move(params)); auto out = reinterpret_cast(sampleSeq->getPointer()); @@ -813,20 +740,31 @@ class HLSLComputePathtracer final : public SimpleWindowedApplication, public Bui const uint32_t sample = sampler.sample(dim, i); seq.set(offset, sample); } - if (cacheBufferResult.first) - writeBufferIntoCacheFile(cacheBufferResult.first, bufferSize, out); + writeBufferIntoCacheFile(generatedOwenSamplerPath, sequenceByteSize, out); } IGPUBuffer::SCreationParams params = {}; params.usage = asset::IBuffer::EUF_TRANSFER_DST_BIT | asset::IBuffer::EUF_STORAGE_BUFFER_BIT | asset::IBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT; - params.size = bufferSize; - - // we don't want to overcomplicate the example with multi-queue - m_utils->createFilledDeviceLocalBufferOnDedMem( - SIntendedSubmitInfo{ .queue = getGraphicsQueue() }, + params.size = sampleSeq->getSize(); + + auto queue = getGraphicsQueue(); + auto cmdbuf = m_cmdBufs[0].get(); + cmdbuf->reset(IGPUCommandBuffer::RESET_FLAGS::NONE); + IQueue::SSubmitInfo::SCommandBufferInfo cmdbufInfo = { cmdbuf }; + m_intendedSubmit.queue = queue; + m_intendedSubmit.scratchCommandBuffers = { &cmdbufInfo, 1 }; + cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + + auto bufferFuture = m_utils->createFilledDeviceLocalBufferOnDedMem( + m_intendedSubmit, std::move(params), sampleSeq->getPointer() - ).move_into(m_sequenceBuffer); + ); + bufferFuture.wait(); + const auto uploadedBuffer = bufferFuture.get(); + if (!uploadedBuffer || !uploadedBuffer->get()) + return logFail("Failed to upload sequence buffer"); + m_sequenceBuffer = smart_refctd_ptr(*uploadedBuffer); m_sequenceBuffer->setObjectDebugName("Sequence buffer"); } @@ -941,15 +879,23 @@ class HLSLComputePathtracer final : public SimpleWindowedApplication, public Bui params.resources.texturesInfo = { .setIx = 0u, .bindingIx = 0u }; params.resources.samplersInfo = { .setIx = 0u, .bindingIx = 1u }; params.assetManager = m_assetMgr; - params.pipelineCache = nullptr; + params.pipelineCache = m_pipelineCache.object; params.pipelineLayout = nbl::ext::imgui::UI::createDefaultPipelineLayout(m_utils->getLogicalDevice(), params.resources.texturesInfo, params.resources.samplersInfo, MaxUITextureCount); params.renderpass = smart_refctd_ptr(renderpass); params.streamingBuffer = nullptr; params.subpassIx = 0u; params.transfer = getTransferUpQueue(); params.utilities = m_utils; + params.spirv = nbl::ext::imgui::UI::SCreationParameters::PrecompiledShaders{ + .vertex = loadPrecompiledShader(), + .fragment = loadPrecompiledShader() + }; + if (!params.spirv->vertex || !params.spirv->fragment) + return logFail("Failed to load precompiled ImGui shaders"); { m_ui.manager = ext::imgui::UI::create(std::move(params)); + if (m_ui.manager) + m_pipelineCache.dirty = true; // note that we use default layout provided by our extension, but you are free to create your own by filling nbl::ext::imgui::UI::S_CREATION_PARAMETERS::resources const auto* descriptorSetLayout = m_ui.manager->getPipeline()->getLayout()->getDescriptorSetLayout(0u); @@ -978,41 +924,342 @@ class HLSLComputePathtracer final : public SimpleWindowedApplication, public Bui const auto aspectRatio = io.DisplaySize.x / io.DisplaySize.y; m_camera.setProjectionMatrix(hlsl::math::thin_lens::rhPerspectiveFovMatrix(hlsl::radians(guiControlled.fov), aspectRatio, guiControlled.zNear, guiControlled.zFar)); - ImGui::SetNextWindowPos(ImVec2(1024, 100), ImGuiCond_Appearing); - ImGui::SetNextWindowSize(ImVec2(256, 256), ImGuiCond_Appearing); - - // create a window and insert the inspector - ImGui::SetNextWindowPos(ImVec2(10, 10), ImGuiCond_Appearing); - ImGui::SetNextWindowSize(ImVec2(320, 340), ImGuiCond_Appearing); - ImGui::Begin("Controls"); - - ImGui::SameLine(); - - ImGui::Text("Camera"); + const ImGuiViewport* viewport = ImGui::GetMainViewport(); + const ImVec2 viewportPos = viewport->Pos; + const ImVec2 viewportSize = viewport->Size; + const ImGuiStyle& style = ImGui::GetStyle(); + const float panelMargin = 10.f; + const auto currentGeometry = static_cast(guiControlled.PTPipeline); + const auto requestedMethod = static_cast(guiControlled.polygonMethod); + const auto currentVariant = getRenderVariantInfo(currentGeometry, guiControlled.usePersistentWorkGroups, requestedMethod); + const size_t readyRenderPipelines = getReadyRenderPipelineCount(); + const size_t totalRenderPipelines = getKnownRenderPipelineCount(); + const size_t readyTotalPipelines = readyRenderPipelines + (m_resolvePipelineState.pipeline ? 1ull : 0ull); + const size_t totalKnownPipelines = totalRenderPipelines + 1ull; + const size_t runningPipelineBuilds = getRunningPipelineBuildCount(); + const size_t queuedPipelineBuilds = m_pipelineCache.warmup.queue.size(); + const bool warmupInProgress = m_startupLog.hasPathtraceOutput && !m_pipelineCache.warmup.loggedComplete; + const char* const effectiveEntryPoint = currentVariant.entryPoint; + struct SFloatSliderRow + { + const char* label; + float* value; + float min; + float max; + const char* format; + }; + struct SIntSliderRow + { + const char* label; + int* value; + int min; + int max; + }; + struct SCheckboxRow + { + const char* label; + bool* value; + }; + struct SComboRow + { + const char* label; + int* value; + const char* const* items; + int count; + }; + struct STextRow + { + const char* label; + std::string value; + }; + const auto calcMaxTextWidth = [](const auto& items, auto&& toText) -> float + { + float width = 0.f; + for (const auto& item : items) + width = std::max(width, ImGui::CalcTextSize(toText(item)).x); + return width; + }; + const auto makeReadyText = [](const size_t ready, const size_t total) -> std::string + { + return std::to_string(ready) + "/" + std::to_string(total); + }; + const auto makeRunQueueText = [](const size_t running, const size_t queued) -> std::string + { + return std::to_string(running) + " / " + std::to_string(queued); + }; + const auto& shaderNames = this_example::getLightGeometryNamePointers(); + const auto& polygonMethodNames = this_example::getPolygonMethodNamePointers(); + const std::string pipelineStatusText = !m_startupLog.hasPathtraceOutput ? + "Building pipeline..." : + (warmupInProgress ? + ("Warmup " + std::to_string(readyTotalPipelines) + "/" + std::to_string(totalKnownPipelines)) : + "All pipelines ready"); + const std::string cacheStateText = m_pipelineCache.loadedFromDisk ? "loaded from disk" : "cold start"; + const std::string trimCacheText = std::to_string(m_pipelineCache.trimmedShaders.loadedFromDiskCount + m_pipelineCache.trimmedShaders.generatedCount) + " ready"; + const std::string parallelismText = std::to_string(m_pipelineCache.warmup.budget); + const std::string renderStateText = makeReadyText(readyTotalPipelines, totalKnownPipelines); + const std::string warmupStateText = makeRunQueueText(runningPipelineBuilds, queuedPipelineBuilds); + const std::string cursorText = "cursor " + std::to_string(static_cast(io.MousePos.x)) + " " + std::to_string(static_cast(io.MousePos.y)); + const SFloatSliderRow cameraFloatRows[] = { + { "move", &guiControlled.moveSpeed, 0.1f, 10.f, "%.2f" }, + { "rotate", &guiControlled.rotateSpeed, 0.1f, 10.f, "%.2f" }, + { "fov", &guiControlled.fov, 20.f, 150.f, "%.0f" }, + { "zNear", &guiControlled.zNear, 0.1f, 100.f, "%.2f" }, + { "zFar", &guiControlled.zFar, 110.f, 10000.f, "%.0f" }, + }; + const SComboRow renderComboRows[] = { + { "shader", &guiControlled.PTPipeline, shaderNames.data(), static_cast(shaderNames.size()) }, + { "method", &guiControlled.polygonMethod, polygonMethodNames.data(), static_cast(polygonMethodNames.size()) }, + }; + const SIntSliderRow renderIntRows[] = { + { "spp", &guiControlled.spp, 1, MaxSamplesBuffer }, + { "depth", &guiControlled.depth, 1, MaxBufferDimensions / 4 }, + }; + const SCheckboxRow renderCheckboxRows[] = { + { "persistent WG", &guiControlled.usePersistentWorkGroups }, + }; + const SCheckboxRow rwmcCheckboxRows[] = { + { "enable", &guiControlled.useRWMC }, + }; + const SFloatSliderRow rwmcFloatRows[] = { + { "start", &guiControlled.rwmcParams.start, 1.0f, 32.0f, "%.3f" }, + { "base", &guiControlled.rwmcParams.base, 1.0f, 32.0f, "%.3f" }, + { "min rel.", &guiControlled.rwmcParams.minReliableLuma, 0.1f, 1024.0f, "%.3f" }, + { "kappa", &guiControlled.rwmcParams.kappa, 0.1f, 1024.0f, "%.3f" }, + }; + const STextRow diagnosticsRows[] = { + { "geometry", system::to_string(currentGeometry) }, + { "req. method", system::to_string(requestedMethod) }, + { "eff. method", system::to_string(currentVariant.effectiveMethod) }, + { "entrypoint", effectiveEntryPoint }, + { "config", std::string(BuildConfigName) }, + { "cache", cacheStateText }, + { "trim cache", trimCacheText }, + { "parallel", parallelismText }, + { "render", renderStateText }, + { "run/queue", warmupStateText }, + }; + const char* const standaloneTexts[] = { + "PATH_TRACER", + "Home camera End light", + pipelineStatusText.c_str(), + cursorText.c_str(), + }; + const char* const sliderPreviewTexts[] = { + "10000.000", + "1024.000", + effectiveEntryPoint, + BuildConfigName.data(), + cacheStateText.c_str(), + renderStateText.c_str(), + warmupStateText.c_str(), + }; + const float maxStandaloneTextWidth = calcMaxTextWidth(standaloneTexts, [](const char* text) { return text; }); + const float maxLabelTextWidth = std::max({ + calcMaxTextWidth(cameraFloatRows, [](const auto& row) { return row.label; }), + calcMaxTextWidth(renderComboRows, [](const auto& row) { return row.label; }), + calcMaxTextWidth(renderIntRows, [](const auto& row) { return row.label; }), + calcMaxTextWidth(renderCheckboxRows, [](const auto& row) { return row.label; }), + calcMaxTextWidth(rwmcCheckboxRows, [](const auto& row) { return row.label; }), + calcMaxTextWidth(rwmcFloatRows, [](const auto& row) { return row.label; }), + calcMaxTextWidth(diagnosticsRows, [](const auto& row) { return row.label; }) + }); + const float comboPreviewWidth = std::max( + calcMaxTextWidth(shaderNames, [](const char* text) { return text; }), + calcMaxTextWidth(polygonMethodNames, [](const char* text) { return text; }) + ); + const float sliderPreviewWidth = calcMaxTextWidth(sliderPreviewTexts, [](const char* text) { return text; }); + const float tableLabelColumnWidth = std::ceil(maxLabelTextWidth + style.FramePadding.x * 2.f + style.CellPadding.x * 2.f); + const float tableValueColumnMinWidth = + std::ceil(std::max(comboPreviewWidth, sliderPreviewWidth) + style.FramePadding.x * 2.f + style.ItemInnerSpacing.x + ImGui::GetFrameHeight() + 18.f); + const float sectionTableWidth = tableLabelColumnWidth + tableValueColumnMinWidth + style.CellPadding.x * 4.f + style.ItemSpacing.x; + const float contentWidth = std::max(maxStandaloneTextWidth, sectionTableWidth); + const float panelWidth = std::min( + std::ceil(contentWidth + style.WindowPadding.x * 2.f), + std::max(0.f, viewportSize.x - panelMargin * 2.f) + ); + const float panelMaxHeight = ImMax(300.0f, viewportSize.y * 0.84f); + ImGui::SetNextWindowPos(ImVec2(viewportPos.x + panelMargin, viewportPos.y + panelMargin), ImGuiCond_Always); + ImGui::SetNextWindowSizeConstraints(ImVec2(panelWidth, 0.0f), ImVec2(panelWidth, panelMaxHeight)); + ImGui::SetNextWindowBgAlpha(0.72f); + ImGui::PushStyleVar(ImGuiStyleVar_WindowPadding, ImVec2(5.f, 5.f)); + ImGui::PushStyleVar(ImGuiStyleVar_WindowRounding, 10.f); + ImGui::PushStyleVar(ImGuiStyleVar_FrameRounding, 4.f); + ImGui::PushStyleVar(ImGuiStyleVar_GrabRounding, 4.f); + ImGui::PushStyleVar(ImGuiStyleVar_ItemSpacing, ImVec2(5.f, 2.f)); + ImGui::PushStyleColor(ImGuiCol_WindowBg, ImVec4(0.08f, 0.10f, 0.13f, 0.88f)); + ImGui::PushStyleColor(ImGuiCol_Border, ImVec4(0.32f, 0.39f, 0.47f, 0.65f)); + ImGui::PushStyleColor(ImGuiCol_Header, ImVec4(0.18f, 0.28f, 0.36f, 0.92f)); + ImGui::PushStyleColor(ImGuiCol_HeaderHovered, ImVec4(0.24f, 0.36f, 0.46f, 0.96f)); + ImGui::PushStyleColor(ImGuiCol_HeaderActive, ImVec4(0.28f, 0.42f, 0.54f, 1.0f)); + + const ImGuiWindowFlags panelFlags = + ImGuiWindowFlags_NoDecoration | + ImGuiWindowFlags_NoMove | + ImGuiWindowFlags_NoSavedSettings | + ImGuiWindowFlags_NoNav | + ImGuiWindowFlags_AlwaysAutoResize | + ImGuiWindowFlags_NoResize; + + const auto beginSectionTable = [](const char* id) -> bool + { + return ImGui::BeginTable(id, 2, ImGuiTableFlags_SizingFixedFit); + }; + const auto setupSectionTable = [tableLabelColumnWidth]() -> void + { + ImGui::TableSetupColumn("label", ImGuiTableColumnFlags_WidthFixed, tableLabelColumnWidth); + ImGui::TableSetupColumn("value", ImGuiTableColumnFlags_WidthStretch); + }; + const auto sliderFloatRow = [](const SFloatSliderRow& row) -> void + { + ImGui::TableNextRow(); + ImGui::TableSetColumnIndex(0); + ImGui::TextUnformatted(row.label); + ImGui::TableSetColumnIndex(1); + ImGui::SetNextItemWidth(-FLT_MIN); + ImGui::PushID(row.label); + ImGui::SliderFloat("##value", row.value, row.min, row.max, row.format, ImGuiSliderFlags_AlwaysClamp); + ImGui::PopID(); + }; + const auto sliderIntRow = [](const SIntSliderRow& row) -> void + { + ImGui::TableNextRow(); + ImGui::TableSetColumnIndex(0); + ImGui::TextUnformatted(row.label); + ImGui::TableSetColumnIndex(1); + ImGui::SetNextItemWidth(-FLT_MIN); + ImGui::PushID(row.label); + ImGui::SliderInt("##value", row.value, row.min, row.max); + ImGui::PopID(); + }; + const auto comboRow = [](const SComboRow& row) -> void + { + ImGui::TableNextRow(); + ImGui::TableSetColumnIndex(0); + ImGui::TextUnformatted(row.label); + ImGui::TableSetColumnIndex(1); + ImGui::SetNextItemWidth(-FLT_MIN); + ImGui::PushID(row.label); + ImGui::Combo("##value", row.value, row.items, row.count); + ImGui::PopID(); + }; + const auto checkboxRow = [](const SCheckboxRow& row) -> void + { + ImGui::TableNextRow(); + ImGui::TableSetColumnIndex(0); + ImGui::TextUnformatted(row.label); + ImGui::TableSetColumnIndex(1); + ImGui::PushID(row.label); + ImGui::Checkbox("##value", row.value); + ImGui::PopID(); + }; + const auto textRow = [](const STextRow& row) -> void + { + ImGui::TableNextRow(); + ImGui::TableSetColumnIndex(0); + ImGui::TextUnformatted(row.label); + ImGui::TableSetColumnIndex(1); + ImGui::TextUnformatted(row.value.c_str()); + }; - ImGui::Text("Press Home to reset camera."); - ImGui::Text("Press End to reset light."); + if (ImGui::Begin("Path Tracer Controls", nullptr, panelFlags)) + { + ImGui::TextUnformatted("PATH_TRACER"); + ImGui::Separator(); + ImGui::TextDisabled("Home camera End light"); + if (!m_startupLog.hasPathtraceOutput) + ImGui::TextColored(ImVec4(0.83f, 0.86f, 0.90f, 1.0f), "Building pipeline..."); + else if (warmupInProgress) + ImGui::TextColored(ImVec4(0.83f, 0.86f, 0.90f, 1.0f), "Warmup %zu/%zu", readyTotalPipelines, totalKnownPipelines); + else + ImGui::TextDisabled("All pipelines ready"); + ImGui::Dummy(ImVec2(0.f, 2.f)); + + if (ImGui::CollapsingHeader("Controls", ImGuiTreeNodeFlags_DefaultOpen)) + { + if (ImGui::CollapsingHeader("Camera")) + { + if (beginSectionTable("##camera_controls_table")) + { + setupSectionTable(); + for (const auto& row : cameraFloatRows) + sliderFloatRow(row); + ImGui::EndTable(); + } + } - ImGui::SliderFloat("Move speed", &guiControlled.moveSpeed, 0.1f, 10.f); - ImGui::SliderFloat("Rotate speed", &guiControlled.rotateSpeed, 0.1f, 10.f); - ImGui::SliderFloat("Fov", &guiControlled.fov, 20.f, 150.f); - ImGui::SliderFloat("zNear", &guiControlled.zNear, 0.1f, 100.f); - ImGui::SliderFloat("zFar", &guiControlled.zFar, 110.f, 10000.f); - ImGui::Combo("Shader", &guiControlled.PTPipeline, shaderNames, E_LIGHT_GEOMETRY::ELG_COUNT); - ImGui::SliderInt("SPP", &guiControlled.spp, 1, MaxSamplesBuffer); - ImGui::SliderInt("Depth", &guiControlled.depth, 1, MaxBufferDimensions / 4); - ImGui::Checkbox("Persistent WorkGroups", &guiControlled.usePersistentWorkGroups); + if (ImGui::CollapsingHeader("Render", ImGuiTreeNodeFlags_DefaultOpen)) + { + if (beginSectionTable("##render_controls_table")) + { + setupSectionTable(); + for (const auto& row : renderComboRows) + comboRow(row); + for (const auto& row : renderIntRows) + sliderIntRow(row); + for (const auto& row : renderCheckboxRows) + checkboxRow(row); + ImGui::EndTable(); + } + } - ImGui::Text("X: %f Y: %f", io.MousePos.x, io.MousePos.y); + if (ImGui::CollapsingHeader("RWMC", ImGuiTreeNodeFlags_DefaultOpen)) + { + if (beginSectionTable("##rwmc_controls_table")) + { + setupSectionTable(); + for (const auto& row : rwmcCheckboxRows) + checkboxRow(row); + for (const auto& row : rwmcFloatRows) + sliderFloatRow(row); + ImGui::EndTable(); + } + } - ImGui::Text("\nRWMC settings:"); - ImGui::Checkbox("Enable RWMC", &guiControlled.useRWMC); - ImGui::SliderFloat("start", &guiControlled.rwmcParams.start, 1.0f, 32.0f); - ImGui::SliderFloat("base", &guiControlled.rwmcParams.base, 1.0f, 32.0f); - ImGui::SliderFloat("minReliableLuma", &guiControlled.rwmcParams.minReliableLuma, 0.1f, 1024.0f); - ImGui::SliderFloat("kappa", &guiControlled.rwmcParams.kappa, 0.1f, 1024.0f); + if (ImGui::CollapsingHeader("Diagnostics")) + { + if (beginSectionTable("##diagnostics_controls_table")) + { + setupSectionTable(); + for (const auto& row : diagnosticsRows) + textRow(row); + ImGui::EndTable(); + } + } + ImGui::Dummy(ImVec2(0.f, 2.f)); + ImGui::Separator(); + ImGui::TextDisabled("%s", cursorText.c_str()); + } + } ImGui::End(); + + if (!m_startupLog.hasPathtraceOutput || warmupInProgress) + { + ImGui::SetNextWindowPos(ImVec2(viewportPos.x + viewportSize.x - panelMargin, viewportPos.y + panelMargin), ImGuiCond_Always, ImVec2(1.0f, 0.0f)); + ImGui::SetNextWindowBgAlpha(0.62f); + ImGui::PushStyleVar(ImGuiStyleVar_WindowPadding, ImVec2(12.f, 10.f)); + ImGui::PushStyleVar(ImGuiStyleVar_WindowRounding, 8.f); + ImGui::PushStyleColor(ImGuiCol_WindowBg, ImVec4(0.07f, 0.09f, 0.12f, 0.90f)); + const ImGuiWindowFlags overlayFlags = + ImGuiWindowFlags_NoDecoration | + ImGuiWindowFlags_NoSavedSettings | + ImGuiWindowFlags_NoMove | + ImGuiWindowFlags_NoNav | + ImGuiWindowFlags_AlwaysAutoResize | + ImGuiWindowFlags_NoInputs; + if (ImGui::Begin("##path_tracer_status_overlay", nullptr, overlayFlags)) + { + ImGui::TextUnformatted(pipelineStatusText.c_str()); + ImGui::Text("Run %zu Queue %zu", runningPipelineBuilds, queuedPipelineBuilds); + ImGui::Text("Cache: %s", m_pipelineCache.loadedFromDisk ? "disk" : "cold"); + } + ImGui::End(); + ImGui::PopStyleColor(1); + ImGui::PopStyleVar(2); + } + ImGui::PopStyleColor(5); + ImGui::PopStyleVar(5); } ); @@ -1112,6 +1359,14 @@ class HLSLComputePathtracer final : public SimpleWindowedApplication, public Bui inline void workLoopBody() override { + pollPendingPipelines(); + pumpPipelineWarmup(); + if (!m_startupLog.loggedFirstFrameLoop) + { + logStartupEvent("first_frame_loop"); + m_startupLog.loggedFirstFrameLoop = true; + } + // framesInFlight: ensuring safe execution of command buffers and acquires, `framesInFlight` only affect semaphore waits, don't use this to index your resources because it can change with swapchain recreation. const uint32_t framesInFlight = core::min(MaxFramesInFlight, m_surface->getMaxAcquiresInFlight()); // We block for semaphores for 2 reasons here: @@ -1177,6 +1432,7 @@ class HLSLComputePathtracer final : public SimpleWindowedApplication, public Bui } }; updatePathtracerPushConstants(); + bool producedRenderableOutput = false; // TRANSITION m_outImgView to GENERAL (because of descriptorSets0 -> ComputeShader Writes into the image) { @@ -1240,15 +1496,18 @@ class HLSLComputePathtracer final : public SimpleWindowedApplication, public Bui 1 + (WindowDimensions.x * WindowDimensions.y - 1) / RenderWorkgroupSize; IGPUComputePipeline* pipeline = pickPTPipeline(); + if (pipeline) + { + cmdbuf->bindComputePipeline(pipeline); + cmdbuf->bindDescriptorSets(EPBP_COMPUTE, pipeline->getLayout(), 0u, 1u, &m_descriptorSet.get()); - cmdbuf->bindComputePipeline(pipeline); - cmdbuf->bindDescriptorSets(EPBP_COMPUTE, pipeline->getLayout(), 0u, 1u, &m_descriptorSet.get()); - - const uint32_t pushConstantsSize = guiControlled.useRWMC ? sizeof(RenderRWMCPushConstants) : sizeof(RenderPushConstants); - const void* pushConstantsPtr = guiControlled.useRWMC ? reinterpret_cast(&rwmcPushConstants) : reinterpret_cast(&pc); - cmdbuf->pushConstants(pipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_COMPUTE, 0, pushConstantsSize, pushConstantsPtr); + const uint32_t pushConstantsSize = guiControlled.useRWMC ? sizeof(RenderRWMCPushConstants) : sizeof(RenderPushConstants); + const void* pushConstantsPtr = guiControlled.useRWMC ? reinterpret_cast(&rwmcPushConstants) : reinterpret_cast(&pc); + cmdbuf->pushConstants(pipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_COMPUTE, 0, pushConstantsSize, pushConstantsPtr); - cmdbuf->dispatch(dispatchSize, 1u, 1u); + cmdbuf->dispatch(dispatchSize, 1u, 1u); + producedRenderableOutput = !guiControlled.useRWMC; + } } // m_cascadeView synchronization - wait for previous compute shader to write into the cascade @@ -1283,15 +1542,28 @@ class HLSLComputePathtracer final : public SimpleWindowedApplication, public Bui (m_window->getHeight() + ResolveWorkgroupSizeY - 1) / ResolveWorkgroupSizeY ); - IGPUComputePipeline* pipeline = m_resolvePipeline.get(); + IGPUComputePipeline* pipeline = ensureResolvePipeline(); + if (pipeline) + { + resolvePushConstants.resolveParameters = rwmc::SResolveParameters::create(guiControlled.rwmcParams); - resolvePushConstants.resolveParameters = rwmc::SResolveParameters::create(guiControlled.rwmcParams); + cmdbuf->bindComputePipeline(pipeline); + cmdbuf->bindDescriptorSets(EPBP_COMPUTE, pipeline->getLayout(), 0u, 1u, &m_descriptorSet.get()); + cmdbuf->pushConstants(pipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_COMPUTE, 0u, sizeof(ResolvePushConstants), &resolvePushConstants); - cmdbuf->bindComputePipeline(pipeline); - cmdbuf->bindDescriptorSets(EPBP_COMPUTE, pipeline->getLayout(), 0u, 1u, &m_descriptorSet.get()); - cmdbuf->pushConstants(pipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_COMPUTE, 0, sizeof(ResolvePushConstants), &resolvePushConstants); + cmdbuf->dispatch(dispatchSize.x, dispatchSize.y, 1u); + producedRenderableOutput = true; + } + } - cmdbuf->dispatch(dispatchSize.x, dispatchSize.y, 1u); + if (producedRenderableOutput) + { + m_startupLog.hasPathtraceOutput = true; + if (!m_startupLog.loggedFirstRenderDispatch) + { + logStartupEvent("first_render_dispatch"); + m_startupLog.loggedFirstRenderDispatch = true; + } } // TRANSITION m_outImgView to READ (because of descriptorSets0 -> ComputeShader Writes into the image) @@ -1358,9 +1630,12 @@ class HLSLComputePathtracer final : public SimpleWindowedApplication, public Bui cmdbuf->beginRenderPass(info, IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); - cmdbuf->bindGraphicsPipeline(m_presentPipeline.get()); - cmdbuf->bindDescriptorSets(EPBP_GRAPHICS, m_presentPipeline->getLayout(), 0, 1u, &m_presentDescriptorSet.get()); - ext::FullScreenTriangle::recordDrawCall(cmdbuf); + if (m_startupLog.hasPathtraceOutput) + { + cmdbuf->bindGraphicsPipeline(m_presentPipeline.get()); + cmdbuf->bindDescriptorSets(EPBP_GRAPHICS, m_presentPipeline->getLayout(), 0, 1u, &m_presentDescriptorSet.get()); + ext::FullScreenTriangle::recordDrawCall(cmdbuf); + } if (m_showUI) { @@ -1417,6 +1692,17 @@ class HLSLComputePathtracer final : public SimpleWindowedApplication, public Bui } } + if (producedRenderableOutput && !m_startupLog.loggedFirstRenderSubmit) + { + logStartupEvent("first_render_submit"); + m_startupLog.loggedFirstRenderSubmit = true; + } + if (m_startupLog.hasPathtraceOutput && !m_pipelineCache.warmup.started) + { + kickoffPipelineWarmup(); + } + maybeCheckpointPipelineCache(); + m_window->setCaption("[Nabla Engine] HLSL Compute Path Tracer"); m_surface->present(m_currentImageAcquire.imageIndex, rendered); } @@ -1432,6 +1718,8 @@ class HLSLComputePathtracer final : public SimpleWindowedApplication, public Bui inline bool onAppTerminated() override { + waitForPendingPipelines(); + savePipelineCache(); return device_base_t::onAppTerminated(); } @@ -1524,16 +1812,1166 @@ class HLSLComputePathtracer final : public SimpleWindowedApplication, public Bui } private: + template + smart_refctd_ptr loadPrecompiledShader() + { + IAssetLoader::SAssetLoadParams lp = {}; + lp.logger = m_logger.get(); + lp.workingDirectory = "app_resources"; + + const auto key = nbl::this_example::builtin::build::get_spirv_key(m_device.get()); + auto assetBundle = m_assetMgr->getAsset(key, lp); + const auto assets = assetBundle.getContents(); + if (assets.empty()) + { + m_logger->log("Could not load precompiled shader: %s", ILogger::ELL_ERROR, key.c_str()); + return nullptr; + } - IGPUComputePipeline* pickPTPipeline() + auto shader = IAsset::castDown(assets[0]); + if (!shader) + { + m_logger->log("Failed to cast %s asset to IShader!", ILogger::ELL_ERROR, key.c_str()); + return nullptr; + } + + shader->setFilePathHint(std::string(std::string_view(ShaderKey.value))); + return shader; + } + + void logStartupEvent(const char* const eventName) + { + const auto elapsedMs = std::chrono::duration_cast(clock_t::now() - m_startupBeganAt).count(); + m_logger->log("PATH_TRACER_STARTUP %s_ms=%lld", ILogger::ELL_INFO, eventName, static_cast(elapsedMs)); + } + + bool parseCommandLine() + { + argparse::ArgumentParser parser("31_hlslpathtracer"); + parser.add_argument("--pipeline-cache-dir") + .nargs(1) + .help("Override the PATH_TRACER pipeline cache root directory"); + parser.add_argument("--clear-pipeline-cache") + .help("Clear the PATH_TRACER cache root before startup") + .flag(); + + try + { + parser.parse_args({ argv.data(), argv.data() + argv.size() }); + } + catch (const std::exception& e) + { + m_logger->log("Failed to parse arguments: %s", ILogger::ELL_ERROR, e.what()); + return false; + } + + m_commandLine.pipelineCacheDirOverride.reset(); + if (parser.present("--pipeline-cache-dir")) + m_commandLine.pipelineCacheDirOverride = path(parser.get("--pipeline-cache-dir")); + m_commandLine.clearPipelineCache = parser.get("--clear-pipeline-cache"); + return true; + } + + static std::string hashToHex(const core::blake3_hash_t& hash) + { + static constexpr char digits[] = "0123456789abcdef"; + static constexpr size_t HexCharsPerByte = 2ull; + static constexpr uint32_t HighNibbleBitOffset = 4u; + static constexpr uint8_t NibbleMask = 0xfu; + const auto hashByteCount = sizeof(hash.data); + std::string retval; + retval.resize(hashByteCount * HexCharsPerByte); + for (size_t i = 0ull; i < hashByteCount; ++i) + { + const auto hexOffset = i * HexCharsPerByte; + retval[hexOffset] = digits[(hash.data[i] >> HighNibbleBitOffset) & NibbleMask]; + retval[hexOffset + 1ull] = digits[hash.data[i] & NibbleMask]; + } + return retval; + } + + path getDefaultPipelineCacheDir() const + { + if (const auto* localAppData = std::getenv("LOCALAPPDATA"); localAppData && localAppData[0] != '\0') + return path(localAppData) / "nabla/examples/31_HLSLPathTracer/pipeline/cache"; + return localOutputCWD / "pipeline/cache"; + } + + path getRuntimeConfigPath() const + { + return system::executableDirectory() / RuntimeConfigFilename; + } + + std::optional tryGetPipelineCacheDirFromRuntimeConfig() const + { + const auto configPath = getRuntimeConfigPath(); + if (!m_system->exists(configPath, IFile::ECF_READ)) + return std::nullopt; + + std::ifstream input(configPath); + if (!input.is_open()) + return std::nullopt; + + nlohmann::json json; + try + { + input >> json; + } + catch (const std::exception& e) + { + m_logger->log("Failed to parse PATH_TRACER runtime config %s: %s", ILogger::ELL_WARNING, configPath.string().c_str(), e.what()); + return std::nullopt; + } + + const auto cacheRootIt = json.find("cache_root"); + if (cacheRootIt == json.end() || !cacheRootIt->is_string()) + return std::nullopt; + + const auto cacheRoot = cacheRootIt->get(); + if (cacheRoot.empty()) + return std::nullopt; + + const path relativeRoot(cacheRoot); + if (relativeRoot.is_absolute()) + { + m_logger->log("Ignoring absolute cache_root in %s", ILogger::ELL_WARNING, configPath.string().c_str()); + return std::nullopt; + } + + return (configPath.parent_path() / relativeRoot).lexically_normal(); + } + + path getPipelineCacheRootDir() const + { + if (m_commandLine.pipelineCacheDirOverride.has_value()) + return m_commandLine.pipelineCacheDirOverride.value(); + if (const auto runtimeConfigDir = tryGetPipelineCacheDirFromRuntimeConfig(); runtimeConfigDir.has_value()) + return runtimeConfigDir.value(); + return getDefaultPipelineCacheDir(); + } + + path getPipelineCacheBlobPath() const + { + const auto key = m_device->getPipelineCacheKey(); + return getPipelineCacheRootDir() / "blob" / BuildConfigName / (std::string(key.deviceAndDriverUUID) + ".bin"); + } + + path getSpirvCacheDir() const + { + return getPipelineCacheRootDir() / "spirv" / BuildConfigName; + } + + path getTrimmedShaderCachePath(const IShader* shader, const char* const entryPoint) const + { + core::blake3_hasher hasher; + hasher << std::string_view(shader ? shader->getFilepathHint() : std::string_view{}); + hasher << std::string_view(entryPoint); + return getSpirvCacheDir() / (hashToHex(static_cast(hasher)) + ".spv"); + } + + path getValidatedSpirvMarkerPath(const ICPUBuffer* spirvBuffer) const { - IGPUComputePipeline* pipeline; - if (guiControlled.useRWMC) - pipeline = guiControlled.usePersistentWorkGroups ? m_PTHLSLPersistentWGPipelinesRWMC[guiControlled.PTPipeline].get() : m_PTHLSLPipelinesRWMC[guiControlled.PTPipeline].get(); - else - pipeline = guiControlled.usePersistentWorkGroups ? m_PTHLSLPersistentWGPipelines[guiControlled.PTPipeline].get() : m_PTHLSLPipelines[guiControlled.PTPipeline].get(); + auto contentHash = spirvBuffer->getContentHash(); + if (contentHash == ICPUBuffer::INVALID_HASH) + contentHash = spirvBuffer->computeContentHash(); + return getSpirvCacheDir() / (hashToHex(contentHash) + ".hash"); + } + + size_t getBackgroundPipelineBuildBudget() const + { + static constexpr uint32_t ReservedForegroundThreadCount = 1u; + const auto concurrency = std::thread::hardware_concurrency(); + if (concurrency > ReservedForegroundThreadCount) + return static_cast(concurrency - ReservedForegroundThreadCount); + return ReservedForegroundThreadCount; + } - return pipeline; + bool ensureCacheDirectoryExists(const path& dir, const char* const description) + { + if (dir.empty() || m_system->isDirectory(dir)) + return true; + + if (m_system->createDirectory(dir) || m_system->isDirectory(dir)) + return true; + + m_logger->log("Failed to create %s %s", ILogger::ELL_WARNING, description, dir.string().c_str()); + return false; + } + + bool finalizeCacheFile(const path& tempPath, const path& finalPath, const char* const description) + { + m_system->deleteFile(finalPath); + const auto ec = m_system->moveFileOrDirectory(tempPath, finalPath); + if (!ec) + return true; + + m_system->deleteFile(tempPath); + m_logger->log("Failed to finalize %s %s", ILogger::ELL_WARNING, description, finalPath.string().c_str()); + return false; + } + + void initializePipelineCache() + { + m_pipelineCache.blobPath = getPipelineCacheBlobPath(); + m_pipelineCache.trimmedShaders.rootDir = getSpirvCacheDir(); + m_pipelineCache.trimmedShaders.validationDir = getSpirvCacheDir(); + if (!m_pipelineCache.trimmedShaders.trimmer) + m_pipelineCache.trimmedShaders.trimmer = core::make_smart_refctd_ptr(); + const auto pipelineCacheRootDir = getPipelineCacheRootDir(); + std::error_code ec; + m_pipelineCache.loadedBytes = 0ull; + m_pipelineCache.loadedFromDisk = false; + m_pipelineCache.clearedOnStartup = m_commandLine.clearPipelineCache; + m_pipelineCache.newlyReadyPipelinesSinceLastSave = 0ull; + m_pipelineCache.checkpointedAfterFirstSubmit = false; + m_pipelineCache.lastSaveAt = clock_t::now(); + if (m_commandLine.clearPipelineCache) + { + if (m_system->isDirectory(pipelineCacheRootDir) && !m_system->deleteDirectory(pipelineCacheRootDir)) + m_logger->log("Failed to clear pipeline cache directory %s", ILogger::ELL_WARNING, pipelineCacheRootDir.string().c_str()); + else + m_logger->log("PATH_TRACER_PIPELINE_CACHE clear root=%s", ILogger::ELL_INFO, pipelineCacheRootDir.string().c_str()); + } + ensureCacheDirectoryExists(m_pipelineCache.blobPath.parent_path(), "pipeline cache directory"); + ensureCacheDirectoryExists(m_pipelineCache.trimmedShaders.rootDir, "trimmed shader cache directory"); + ensureCacheDirectoryExists(m_pipelineCache.trimmedShaders.validationDir, "validated shader cache directory"); + + std::vector initialData; + { + std::ifstream input(m_pipelineCache.blobPath, std::ios::binary | std::ios::ate); + if (input.is_open()) + { + const auto size = input.tellg(); + if (size > 0) + { + initialData.resize(static_cast(size)); + input.seekg(0, std::ios::beg); + input.read(reinterpret_cast(initialData.data()), static_cast(initialData.size())); + if (!input) + initialData.clear(); + } + } + } + + std::span initialDataSpan = {}; + if (!initialData.empty()) + { + initialDataSpan = { initialData.data(), initialData.size() }; + m_pipelineCache.loadedBytes = initialData.size(); + m_pipelineCache.loadedFromDisk = true; + } + + m_pipelineCache.object = m_device->createPipelineCache(initialDataSpan); + if (!m_pipelineCache.object && !initialData.empty()) + { + m_logger->log("Pipeline cache blob at %s was rejected. Falling back to empty cache.", ILogger::ELL_WARNING, m_pipelineCache.blobPath.string().c_str()); + m_pipelineCache.object = m_device->createPipelineCache(std::span{}); + } + if (!m_pipelineCache.object) + { + m_logger->log("Failed to create PATH_TRACER pipeline cache.", ILogger::ELL_WARNING); + return; + } + + m_pipelineCache.object->setObjectDebugName("PATH_TRACER Pipeline Cache"); + m_logger->log("PATH_TRACER pipeline cache path: %s", ILogger::ELL_INFO, m_pipelineCache.blobPath.string().c_str()); + m_logger->log("PATH_TRACER trimmed shader cache path: %s", ILogger::ELL_INFO, m_pipelineCache.trimmedShaders.rootDir.string().c_str()); + m_logger->log("PATH_TRACER validated shader cache path: %s", ILogger::ELL_INFO, m_pipelineCache.trimmedShaders.validationDir.string().c_str()); + m_logger->log( + "PATH_TRACER_PIPELINE_CACHE init clear=%u loaded_from_disk=%u loaded_bytes=%zu path=%s", + ILogger::ELL_INFO, + m_pipelineCache.clearedOnStartup ? 1u : 0u, + m_pipelineCache.loadedFromDisk ? 1u : 0u, + m_pipelineCache.loadedBytes, + m_pipelineCache.blobPath.string().c_str() + ); + if (!initialData.empty()) + m_logger->log("Loaded PATH_TRACER pipeline cache blob: %s", ILogger::ELL_INFO, m_pipelineCache.blobPath.string().c_str()); + } + + smart_refctd_ptr tryLoadTrimmedShaderFromDisk(const IShader* sourceShader, const char* const entryPoint) + { + const auto cachePath = getTrimmedShaderCachePath(sourceShader, entryPoint); + std::ifstream input(cachePath, std::ios::binary | std::ios::ate); + if (!input.is_open()) + return nullptr; + + const auto size = input.tellg(); + if (size <= 0) + return nullptr; + + std::vector bytes(static_cast(size)); + input.seekg(0, std::ios::beg); + input.read(reinterpret_cast(bytes.data()), static_cast(bytes.size())); + if (!input) + return nullptr; + + auto buffer = ICPUBuffer::create({ { bytes.size() }, bytes.data() }); + if (!buffer) + return nullptr; + buffer->setContentHash(buffer->computeContentHash()); + { + std::lock_guard lock(m_pipelineCache.trimmedShaders.mutex); + m_pipelineCache.trimmedShaders.loadedBytes += bytes.size(); + ++m_pipelineCache.trimmedShaders.loadedFromDiskCount; + } + m_logger->log( + "PATH_TRACER_SHADER_CACHE load entrypoint=%s bytes=%zu path=%s", + ILogger::ELL_INFO, + entryPoint, + bytes.size(), + cachePath.string().c_str() + ); + return core::make_smart_refctd_ptr(std::move(buffer), IShader::E_CONTENT_TYPE::ECT_SPIRV, std::string(sourceShader->getFilepathHint())); + } + + bool hasValidatedSpirvMarker(const ICPUBuffer* spirvBuffer) const + { + return m_system->exists(getValidatedSpirvMarkerPath(spirvBuffer), IFile::ECF_READ); + } + + void saveValidatedSpirvMarker(const ICPUBuffer* spirvBuffer) + { + const auto markerPath = getValidatedSpirvMarkerPath(spirvBuffer); + if (!ensureCacheDirectoryExists(markerPath.parent_path(), "validated shader cache directory")) + return; + + auto tempPath = markerPath; + tempPath += ".tmp"; + { + std::ofstream output(tempPath, std::ios::binary | std::ios::trunc); + if (!output.is_open()) + { + m_logger->log("Failed to open validated shader marker temp file %s", ILogger::ELL_WARNING, tempPath.string().c_str()); + return; + } + output << "ok\n"; + output.flush(); + if (!output) + { + output.close(); + m_system->deleteFile(tempPath); + m_logger->log("Failed to write validated shader marker %s", ILogger::ELL_WARNING, tempPath.string().c_str()); + return; + } + } + + finalizeCacheFile(tempPath, markerPath, "validated shader marker"); + } + + bool ensurePreparedShaderValidated(const smart_refctd_ptr& preparedShader) + { + if (!preparedShader) + return false; + + auto* const content = preparedShader->getContent(); + if (!content) + return false; + + if (hasValidatedSpirvMarker(content)) + { + m_pipelineCache.trimmedShaders.trimmer->markValidated(content); + return true; + } + + if (!m_pipelineCache.trimmedShaders.trimmer->ensureValidated(content, m_logger.get())) + return false; + + saveValidatedSpirvMarker(content); + return true; + } + + void saveTrimmedShaderToDisk(const IShader* shader, const char* const entryPoint, const path& cachePath) + { + const auto* content = shader->getContent(); + if (!content || !content->getPointer() || cachePath.empty()) + return; + + if (!ensureCacheDirectoryExists(cachePath.parent_path(), "trimmed shader cache directory")) + return; + + auto tempPath = cachePath; + tempPath += ".tmp"; + { + std::ofstream output(tempPath, std::ios::binary | std::ios::trunc); + if (!output.is_open()) + { + m_logger->log("Failed to open trimmed shader cache temp file %s", ILogger::ELL_WARNING, tempPath.string().c_str()); + return; + } + output.write(reinterpret_cast(content->getPointer()), static_cast(content->getSize())); + output.flush(); + if (!output) + { + output.close(); + m_system->deleteFile(tempPath); + m_logger->log("Failed to write trimmed shader cache blob to %s", ILogger::ELL_WARNING, tempPath.string().c_str()); + return; + } + } + + if (!finalizeCacheFile(tempPath, cachePath, "trimmed shader cache blob")) + return; + + { + std::lock_guard lock(m_pipelineCache.trimmedShaders.mutex); + m_pipelineCache.trimmedShaders.savedBytes += content->getSize(); + ++m_pipelineCache.trimmedShaders.savedToDiskCount; + } + m_logger->log( + "PATH_TRACER_SHADER_CACHE save entrypoint=%s bytes=%zu path=%s", + ILogger::ELL_INFO, + entryPoint, + content->getSize(), + cachePath.string().c_str() + ); + } + + smart_refctd_ptr getPreparedShaderForEntryPoint(const smart_refctd_ptr& shaderModule, const char* const entryPoint) + { + if (!shaderModule || shaderModule->getContentType() != IShader::E_CONTENT_TYPE::ECT_SPIRV) + return shaderModule; + + const auto cachePath = getTrimmedShaderCachePath(shaderModule.get(), entryPoint); + const auto cacheKey = cachePath.string(); + { + std::lock_guard lock(m_pipelineCache.trimmedShaders.mutex); + const auto found = m_pipelineCache.trimmedShaders.runtimeShaders.find(cacheKey); + if (found != m_pipelineCache.trimmedShaders.runtimeShaders.end()) + return found->second; + } + + const auto startedAt = clock_t::now(); + auto preparedShader = tryLoadTrimmedShaderFromDisk(shaderModule.get(), entryPoint); + bool cameFromDisk = static_cast(preparedShader); + bool wasTrimmed = false; + if (!preparedShader) + { + const core::set entryPoints = { asset::ISPIRVEntryPointTrimmer::EntryPoint{ .name = entryPoint, .stage = hlsl::ShaderStage::ESS_COMPUTE } }; + const auto result = m_pipelineCache.trimmedShaders.trimmer->trim(shaderModule->getContent(), entryPoints, nullptr); + if (!result) + { + m_logger->log("Failed to prepare trimmed PATH_TRACER shader for %s. Falling back to the original module.", ILogger::ELL_WARNING, entryPoint); + return shaderModule; + } + if (result.spirv) + { + result.spirv->setContentHash(result.spirv->computeContentHash()); + preparedShader = core::make_smart_refctd_ptr(core::smart_refctd_ptr(result.spirv), IShader::E_CONTENT_TYPE::ECT_SPIRV, std::string(shaderModule->getFilepathHint())); + } + else + preparedShader = shaderModule; + + saveTrimmedShaderToDisk(preparedShader.get(), entryPoint, cachePath); + { + std::lock_guard lock(m_pipelineCache.trimmedShaders.mutex); + ++m_pipelineCache.trimmedShaders.generatedCount; + } + wasTrimmed = (preparedShader != shaderModule); + } + + if (!ensurePreparedShaderValidated(preparedShader)) + { + m_logger->log("Prepared PATH_TRACER shader for %s is not valid SPIR-V", ILogger::ELL_ERROR, entryPoint); + return nullptr; + } + + { + std::lock_guard lock(m_pipelineCache.trimmedShaders.mutex); + const auto [it, inserted] = m_pipelineCache.trimmedShaders.runtimeShaders.emplace(cacheKey, preparedShader); + if (!inserted) + preparedShader = it->second; + } + + const auto wallMs = std::chrono::duration_cast(clock_t::now() - startedAt).count(); + m_logger->log( + "PATH_TRACER_SHADER_CACHE ready entrypoint=%s wall_ms=%lld from_disk=%u trimmed=%u", + ILogger::ELL_INFO, + entryPoint, + static_cast(wallMs), + cameFromDisk ? 1u : 0u, + wasTrimmed ? 1u : 0u + ); + return preparedShader; + } + + void savePipelineCache() + { + if (!m_pipelineCache.object || !m_pipelineCache.dirty || m_pipelineCache.blobPath.empty()) + return; + + const auto saveStartedAt = clock_t::now(); + auto cpuCache = m_pipelineCache.object->convertToCPUCache(); + if (!cpuCache) + return; + + const auto& entries = cpuCache->getEntries(); + const auto found = entries.find(m_device->getPipelineCacheKey()); + if (found == entries.end() || !found->second.bin || found->second.bin->empty()) + return; + + if (!ensureCacheDirectoryExists(m_pipelineCache.blobPath.parent_path(), "pipeline cache directory")) + return; + + auto tempPath = m_pipelineCache.blobPath; + tempPath += ".tmp"; + { + std::ofstream output(tempPath, std::ios::binary | std::ios::trunc); + if (!output.is_open()) + { + m_logger->log("Failed to open pipeline cache temp file %s", ILogger::ELL_WARNING, tempPath.string().c_str()); + return; + } + output.write(reinterpret_cast(found->second.bin->data()), static_cast(found->second.bin->size())); + output.flush(); + if (!output) + { + output.close(); + m_system->deleteFile(tempPath); + m_logger->log("Failed to write pipeline cache blob to %s", ILogger::ELL_WARNING, tempPath.string().c_str()); + return; + } + } + + if (!finalizeCacheFile(tempPath, m_pipelineCache.blobPath, "pipeline cache blob")) + return; + + m_pipelineCache.dirty = false; + m_pipelineCache.savedBytes = found->second.bin->size(); + m_pipelineCache.newlyReadyPipelinesSinceLastSave = 0ull; + m_pipelineCache.lastSaveAt = clock_t::now(); + const auto saveElapsedMs = std::chrono::duration_cast(clock_t::now() - saveStartedAt).count(); + m_logger->log( + "PATH_TRACER_PIPELINE_CACHE save bytes=%zu wall_ms=%lld path=%s", + ILogger::ELL_INFO, + m_pipelineCache.savedBytes, + static_cast(saveElapsedMs), + m_pipelineCache.blobPath.string().c_str() + ); + m_logger->log("Saved PATH_TRACER pipeline cache blob: %s", ILogger::ELL_INFO, m_pipelineCache.blobPath.string().c_str()); + } + + void maybeCheckpointPipelineCache() + { + if (!m_pipelineCache.object || !m_pipelineCache.dirty) + return; + + if (m_startupLog.loggedFirstRenderSubmit && !m_pipelineCache.checkpointedAfterFirstSubmit) + { + savePipelineCache(); + m_pipelineCache.checkpointedAfterFirstSubmit = true; + return; + } + + if (!m_pipelineCache.warmup.started || m_pipelineCache.warmup.loggedComplete) + return; + + static constexpr size_t WarmupCheckpointThreshold = 4ull; + if (m_pipelineCache.newlyReadyPipelinesSinceLastSave < WarmupCheckpointThreshold) + return; + + const auto elapsedSinceLastSave = std::chrono::duration_cast(clock_t::now() - m_pipelineCache.lastSaveAt).count(); + if (elapsedSinceLastSave < 1000ll) + return; + + savePipelineCache(); + } + + smart_refctd_ptr loadRenderShader(const E_LIGHT_GEOMETRY geometry, const bool persistentWorkGroups, const bool rwmc) + { + switch (geometry) + { + case ELG_SPHERE: + if (rwmc) + return loadPrecompiledShader(); + return loadPrecompiledShader(); + case ELG_TRIANGLE: + if (rwmc) + return persistentWorkGroups ? + loadPrecompiledShader() : + loadPrecompiledShader(); + return persistentWorkGroups ? + loadPrecompiledShader() : + loadPrecompiledShader(); + case ELG_RECTANGLE: + if (rwmc) + return persistentWorkGroups ? + loadPrecompiledShader() : + loadPrecompiledShader(); + return loadPrecompiledShader(); + default: + return nullptr; + } + } + + using pipeline_future_t = std::future>; + using shader_array_t = std::array, E_LIGHT_GEOMETRY::ELG_COUNT>; + using pipeline_method_array_t = std::array, EPM_COUNT>; + using pipeline_future_method_array_t = std::array; + using pipeline_array_t = std::array; + using pipeline_future_array_t = std::array; + struct SRenderPipelineStorage + { + std::array, BinaryToggleCount> shaders = {}; + std::array, BinaryToggleCount> pipelines = {}; + std::array, BinaryToggleCount> pendingPipelines = {}; + + static constexpr size_t boolToIndex(const bool value) + { + return static_cast(value); + } + + shader_array_t& getShaders(const bool persistentWorkGroups, const bool rwmc) + { + return shaders[boolToIndex(rwmc)][boolToIndex(persistentWorkGroups)]; + } + + const shader_array_t& getShaders(const bool persistentWorkGroups, const bool rwmc) const + { + return shaders[boolToIndex(rwmc)][boolToIndex(persistentWorkGroups)]; + } + + pipeline_array_t& getPipelines(const bool persistentWorkGroups, const bool rwmc) + { + return pipelines[boolToIndex(rwmc)][boolToIndex(persistentWorkGroups)]; + } + + const pipeline_array_t& getPipelines(const bool persistentWorkGroups, const bool rwmc) const + { + return pipelines[boolToIndex(rwmc)][boolToIndex(persistentWorkGroups)]; + } + + pipeline_future_array_t& getPendingPipelines(const bool persistentWorkGroups, const bool rwmc) + { + return pendingPipelines[boolToIndex(rwmc)][boolToIndex(persistentWorkGroups)]; + } + + const pipeline_future_array_t& getPendingPipelines(const bool persistentWorkGroups, const bool rwmc) const + { + return pendingPipelines[boolToIndex(rwmc)][boolToIndex(persistentWorkGroups)]; + } + }; + + struct SResolvePipelineState + { + smart_refctd_ptr layout; + smart_refctd_ptr shader; + smart_refctd_ptr pipeline; + pipeline_future_t pendingPipeline; + }; + struct SWarmupJob + { + enum class E_TYPE : uint8_t + { + Render, + Resolve + }; + + E_TYPE type = E_TYPE::Render; + E_LIGHT_GEOMETRY geometry = ELG_SPHERE; + bool persistentWorkGroups = false; + bool rwmc = false; + E_POLYGON_METHOD polygonMethod = EPM_PROJECTED_SOLID_ANGLE; + }; + + struct SPipelineCacheState + { + struct STrimmedShaderCache + { + smart_refctd_ptr trimmer; + path rootDir; + path validationDir; + size_t loadedFromDiskCount = 0ull; + size_t generatedCount = 0ull; + size_t savedToDiskCount = 0ull; + size_t loadedBytes = 0ull; + size_t savedBytes = 0ull; + core::unordered_map> runtimeShaders; + std::mutex mutex; + } trimmedShaders; + + struct SWarmupState + { + bool started = false; + bool loggedComplete = false; + clock_t::time_point beganAt = clock_t::now(); + size_t budget = 1ull; + size_t queuedJobs = 0ull; + size_t launchedJobs = 0ull; + size_t skippedJobs = 0ull; + std::deque queue; + } warmup; + + smart_refctd_ptr object; + path blobPath; + bool dirty = false; + bool loadedFromDisk = false; + bool clearedOnStartup = false; + size_t loadedBytes = 0ull; + size_t savedBytes = 0ull; + size_t newlyReadyPipelinesSinceLastSave = 0ull; + bool checkpointedAfterFirstSubmit = false; + clock_t::time_point lastSaveAt = clock_t::now(); + }; + + struct SCommandLineOptions + { + std::optional pipelineCacheDirOverride; + bool clearPipelineCache = false; + }; + + struct SStartupLogState + { + bool hasPathtraceOutput = false; + bool loggedFirstFrameLoop = false; + bool loggedFirstRenderDispatch = false; + bool loggedFirstRenderSubmit = false; + }; + + struct SRenderVariantInfo + { + E_POLYGON_METHOD effectiveMethod; + E_POLYGON_METHOD pipelineMethod; + const char* entryPoint; + }; + + static constexpr const char* getDefaultRenderEntryPointName(const bool persistentWorkGroups) + { + return persistentWorkGroups ? "mainPersistent" : "main"; + } + + static constexpr SRenderVariantInfo getRenderVariantInfo(const E_LIGHT_GEOMETRY geometry, const bool persistentWorkGroups, const E_POLYGON_METHOD requestedMethod) + { + const char* const defaultEntryPoint = getDefaultRenderEntryPointName(persistentWorkGroups); + switch (geometry) + { + case ELG_SPHERE: + return { EPM_SOLID_ANGLE, EPM_SOLID_ANGLE, defaultEntryPoint }; + case ELG_TRIANGLE: + switch (requestedMethod) + { + case EPM_AREA: + return { EPM_AREA, EPM_AREA, persistentWorkGroups ? "mainPersistentArea" : "mainArea" }; + case EPM_SOLID_ANGLE: + return { EPM_SOLID_ANGLE, EPM_SOLID_ANGLE, persistentWorkGroups ? "mainPersistentSolidAngle" : "mainSolidAngle" }; + case EPM_PROJECTED_SOLID_ANGLE: + default: + return { EPM_PROJECTED_SOLID_ANGLE, EPM_PROJECTED_SOLID_ANGLE, defaultEntryPoint }; + } + case ELG_RECTANGLE: + return { EPM_SOLID_ANGLE, EPM_SOLID_ANGLE, defaultEntryPoint }; + default: + return { EPM_PROJECTED_SOLID_ANGLE, EPM_PROJECTED_SOLID_ANGLE, defaultEntryPoint }; + } + } + + size_t getRunningPipelineBuildCount() const + { + size_t count = 0ull; + const auto countPending = [&count](const pipeline_future_array_t& futures, const pipeline_array_t& pipelines) -> void + { + for (auto geometry = 0u; geometry < ELG_COUNT; ++geometry) + { + for (auto method = 0u; method < EPM_COUNT; ++method) + { + if (futures[geometry][method].valid() && !pipelines[geometry][method]) + ++count; + } + } + }; + for (const auto rwmc : { false, true }) + { + for (const auto persistentWorkGroups : { false, true }) + countPending(m_renderPipelines.getPendingPipelines(persistentWorkGroups, rwmc), m_renderPipelines.getPipelines(persistentWorkGroups, rwmc)); + } + if (m_resolvePipelineState.pendingPipeline.valid() && !m_resolvePipelineState.pipeline) + ++count; + return count; + } + + size_t getKnownRenderPipelineCount() const + { + size_t count = 0ull; + bool seen[ELG_COUNT][BinaryToggleCount][BinaryToggleCount][EPM_COUNT] = {}; + for (auto geometry = 0u; geometry < ELG_COUNT; ++geometry) + { + for (auto persistentWorkGroups = 0u; persistentWorkGroups < BinaryToggleCount; ++persistentWorkGroups) + { + for (auto rwmc = 0u; rwmc < BinaryToggleCount; ++rwmc) + { + for (auto method = 0u; method < EPM_COUNT; ++method) + { + const auto pipelineMethod = static_cast(getRenderVariantInfo( + static_cast(geometry), + static_cast(persistentWorkGroups), + static_cast(method) + ).pipelineMethod); + if (seen[geometry][persistentWorkGroups][rwmc][pipelineMethod]) + continue; + seen[geometry][persistentWorkGroups][rwmc][pipelineMethod] = true; + ++count; + } + } + } + } + return count; + } + + size_t getReadyRenderPipelineCount() const + { + size_t count = 0ull; + const auto countReady = [&count](const pipeline_array_t& pipelines) -> void + { + for (const auto& perGeometry : pipelines) + { + for (const auto& pipeline : perGeometry) + { + if (pipeline) + ++count; + } + } + }; + for (const auto rwmc : { false, true }) + { + for (const auto persistentWorkGroups : { false, true }) + countReady(m_renderPipelines.getPipelines(persistentWorkGroups, rwmc)); + } + return count; + } + + void enqueueWarmupJob(const SWarmupJob& job) + { + for (const auto& existing : m_pipelineCache.warmup.queue) + { + if (existing.type != job.type) + continue; + if (existing.type == SWarmupJob::E_TYPE::Resolve) + return; + if ( + existing.geometry == job.geometry && + existing.persistentWorkGroups == job.persistentWorkGroups && + existing.rwmc == job.rwmc && + getRenderVariantInfo(existing.geometry, existing.persistentWorkGroups, existing.polygonMethod).pipelineMethod == + getRenderVariantInfo(job.geometry, job.persistentWorkGroups, job.polygonMethod).pipelineMethod + ) + return; + } + m_pipelineCache.warmup.queue.push_back(job); + } + + bool launchWarmupJobIfNeeded(const SWarmupJob& job) + { + if (job.type == SWarmupJob::E_TYPE::Resolve) + { + if (m_resolvePipelineState.pipeline || m_resolvePipelineState.pendingPipeline.valid()) + return false; + ensureResolvePipeline(); + return m_resolvePipelineState.pendingPipeline.valid(); + } + + auto& pipelines = m_renderPipelines.getPipelines(job.persistentWorkGroups, job.rwmc); + auto& pendingPipelines = m_renderPipelines.getPendingPipelines(job.persistentWorkGroups, job.rwmc); + const auto methodIx = static_cast(getRenderVariantInfo(job.geometry, job.persistentWorkGroups, job.polygonMethod).pipelineMethod); + if (pipelines[job.geometry][methodIx] || pendingPipelines[job.geometry][methodIx].valid()) + return false; + + ensureRenderPipeline(job.geometry, job.persistentWorkGroups, job.rwmc, job.polygonMethod); + return pendingPipelines[job.geometry][methodIx].valid(); + } + + void pumpPipelineWarmup() + { + if (!m_pipelineCache.warmup.started) + return; + + while (!m_pipelineCache.warmup.queue.empty() && getRunningPipelineBuildCount() < m_pipelineCache.warmup.budget) + { + const auto job = m_pipelineCache.warmup.queue.front(); + m_pipelineCache.warmup.queue.pop_front(); + if (launchWarmupJobIfNeeded(job)) + ++m_pipelineCache.warmup.launchedJobs; + else + ++m_pipelineCache.warmup.skippedJobs; + } + + if (!m_pipelineCache.warmup.loggedComplete && m_pipelineCache.warmup.queue.empty() && getRunningPipelineBuildCount() == 0ull) + { + m_pipelineCache.warmup.loggedComplete = true; + const auto warmupElapsedMs = std::chrono::duration_cast(clock_t::now() - m_pipelineCache.warmup.beganAt).count(); + const auto readyRenderPipelines = getReadyRenderPipelineCount(); + const auto totalRenderPipelines = getKnownRenderPipelineCount(); + m_logger->log( + "PATH_TRACER_PIPELINE_CACHE warmup_complete wall_ms=%lld queued_jobs=%zu launched_jobs=%zu skipped_jobs=%zu max_parallel=%zu ready_render=%zu total_render=%zu resolve_ready=%u", + ILogger::ELL_INFO, + static_cast(warmupElapsedMs), + m_pipelineCache.warmup.queuedJobs, + m_pipelineCache.warmup.launchedJobs, + m_pipelineCache.warmup.skippedJobs, + m_pipelineCache.warmup.budget, + readyRenderPipelines, + totalRenderPipelines, + m_resolvePipelineState.pipeline ? 1u : 0u + ); + logStartupEvent("pipeline_warmup_complete"); + savePipelineCache(); + } + } + + pipeline_future_t requestComputePipelineBuild(smart_refctd_ptr shaderModule, IGPUPipelineLayout* const pipelineLayout, const char* const entryPoint) + { + if (!shaderModule) + return {}; + + return std::async( + std::launch::async, + [ + this, + device = m_device, + pipelineCache = m_pipelineCache.object, + shader = std::move(shaderModule), + layout = smart_refctd_ptr(pipelineLayout), + requiredSubgroupSize = m_requiredSubgroupSize, + logger = m_logger.get(), + entryPointName = std::string(entryPoint), + cacheLoadedFromDisk = m_pipelineCache.loadedFromDisk + ]() -> smart_refctd_ptr + { + const auto startedAt = clock_t::now(); + auto preparedShader = getPreparedShaderForEntryPoint(shader, entryPointName.c_str()); + if (!preparedShader) + return nullptr; + smart_refctd_ptr pipeline; + IGPUComputePipeline::SCreationParams params = {}; + params.layout = layout.get(); + params.shader.shader = preparedShader.get(); + params.shader.entryPoint = entryPointName.c_str(); + params.shader.entries = nullptr; + params.cached.requireFullSubgroups = true; + params.shader.requiredSubgroupSize = requiredSubgroupSize; + if (!device->createComputePipelines(pipelineCache.get(), { ¶ms, 1 }, &pipeline)) + { + if (logger) + logger->log("Failed to create precompiled path tracing pipeline for %s", ILogger::ELL_ERROR, entryPointName.c_str()); + return nullptr; + } + if (logger) + { + const auto wallMs = std::chrono::duration_cast(clock_t::now() - startedAt).count(); + logger->log( + "PATH_TRACER_PIPELINE_BUILD entrypoint=%s wall_ms=%lld cache_loaded_from_disk=%u", + ILogger::ELL_INFO, + entryPointName.c_str(), + static_cast(wallMs), + cacheLoadedFromDisk ? 1u : 0u + ); + } + return pipeline; + } + ); + } + + void pollPendingPipeline(pipeline_future_t& future, smart_refctd_ptr& pipeline) + { + if (!future.valid() || pipeline) + return; + if (future.wait_for(std::chrono::milliseconds(0)) != std::future_status::ready) + return; + pipeline = future.get(); + if (pipeline) + { + m_pipelineCache.dirty = true; + ++m_pipelineCache.newlyReadyPipelinesSinceLastSave; + } + } + + void pollPendingPipelines() + { + for (const auto rwmc : { false, true }) + { + for (const auto persistentWorkGroups : { false, true }) + { + auto& pendingPipelines = m_renderPipelines.getPendingPipelines(persistentWorkGroups, rwmc); + auto& pipelines = m_renderPipelines.getPipelines(persistentWorkGroups, rwmc); + for (auto geometry = 0u; geometry < ELG_COUNT; ++geometry) + { + for (auto method = 0u; method < EPM_COUNT; ++method) + pollPendingPipeline(pendingPipelines[geometry][method], pipelines[geometry][method]); + } + } + } + pollPendingPipeline(m_resolvePipelineState.pendingPipeline, m_resolvePipelineState.pipeline); + } + + void waitForPendingPipelines() + { + auto waitAndStore = [](pipeline_future_t& future, smart_refctd_ptr& pipeline) -> void + { + if (!future.valid() || pipeline) + return; + future.wait(); + pipeline = future.get(); + }; + + for (const auto rwmc : { false, true }) + { + for (const auto persistentWorkGroups : { false, true }) + { + auto& pendingPipelines = m_renderPipelines.getPendingPipelines(persistentWorkGroups, rwmc); + auto& pipelines = m_renderPipelines.getPipelines(persistentWorkGroups, rwmc); + for (auto geometry = 0u; geometry < ELG_COUNT; ++geometry) + { + for (auto method = 0u; method < EPM_COUNT; ++method) + { + const auto hadPipeline = static_cast(pipelines[geometry][method]); + waitAndStore(pendingPipelines[geometry][method], pipelines[geometry][method]); + const auto pipelineBecameReady = !hadPipeline && static_cast(pipelines[geometry][method]); + m_pipelineCache.dirty = m_pipelineCache.dirty || pipelineBecameReady; + m_pipelineCache.newlyReadyPipelinesSinceLastSave += pipelineBecameReady ? 1ull : 0ull; + } + } + } + } + const auto hadResolvePipeline = static_cast(m_resolvePipelineState.pipeline); + waitAndStore(m_resolvePipelineState.pendingPipeline, m_resolvePipelineState.pipeline); + m_pipelineCache.dirty = m_pipelineCache.dirty || (!hadResolvePipeline && static_cast(m_resolvePipelineState.pipeline)); + if (!hadResolvePipeline && static_cast(m_resolvePipelineState.pipeline)) + ++m_pipelineCache.newlyReadyPipelinesSinceLastSave; + } + + IGPUComputePipeline* ensureRenderPipeline(const E_LIGHT_GEOMETRY geometry, const bool persistentWorkGroups, const bool rwmc, const E_POLYGON_METHOD polygonMethod) + { + auto& pipelines = m_renderPipelines.getPipelines(persistentWorkGroups, rwmc); + auto& pendingPipelines = m_renderPipelines.getPendingPipelines(persistentWorkGroups, rwmc); + const auto variantInfo = getRenderVariantInfo(geometry, persistentWorkGroups, polygonMethod); + const auto methodIx = static_cast(variantInfo.pipelineMethod); + auto& pipeline = pipelines[geometry][methodIx]; + auto& future = pendingPipelines[geometry][methodIx]; + + pollPendingPipeline(future, pipeline); + if (pipeline) + return pipeline.get(); + + if (!future.valid()) + { + const auto& shaders = m_renderPipelines.getShaders(persistentWorkGroups, rwmc); + auto* const layout = rwmc ? m_rwmcRenderPipelineLayout.get() : m_renderPipelineLayout.get(); + future = requestComputePipelineBuild(shaders[geometry], layout, variantInfo.entryPoint); + } + + return nullptr; + } + + IGPUComputePipeline* ensureResolvePipeline() + { + pollPendingPipeline(m_resolvePipelineState.pendingPipeline, m_resolvePipelineState.pipeline); + if (m_resolvePipelineState.pipeline) + return m_resolvePipelineState.pipeline.get(); + + if (!m_resolvePipelineState.pendingPipeline.valid()) + m_resolvePipelineState.pendingPipeline = requestComputePipelineBuild(m_resolvePipelineState.shader, m_resolvePipelineState.layout.get(), "resolve"); + + return nullptr; + } + + void kickoffPipelineWarmup() + { + m_pipelineCache.warmup.started = true; + m_pipelineCache.warmup.queue.clear(); + m_pipelineCache.warmup.loggedComplete = false; + m_pipelineCache.warmup.beganAt = clock_t::now(); + m_pipelineCache.warmup.budget = getBackgroundPipelineBuildBudget(); + m_pipelineCache.warmup.queuedJobs = 0ull; + m_pipelineCache.warmup.launchedJobs = 0ull; + m_pipelineCache.warmup.skippedJobs = 0ull; + const auto currentGeometry = static_cast(guiControlled.PTPipeline); + const auto currentMethod = static_cast(guiControlled.polygonMethod); + const auto enqueueRenderVariants = [this, currentGeometry](const E_LIGHT_GEOMETRY geometry, const E_POLYGON_METHOD preferredMethod) -> void + { + const auto enqueueForMethods = [this, geometry](const std::initializer_list methods, const bool preferPersistent, const bool preferRWMC) -> void + { + const bool persistentOrder[2] = { preferPersistent, !preferPersistent }; + const bool rwmcOrder[2] = { preferRWMC, !preferRWMC }; + for (const auto method : methods) + { + for (const auto persistentWorkGroups : persistentOrder) + { + for (const auto rwmc : rwmcOrder) + { + enqueueWarmupJob({ + .type = SWarmupJob::E_TYPE::Render, + .geometry = geometry, + .persistentWorkGroups = persistentWorkGroups, + .rwmc = rwmc, + .polygonMethod = method + }); + } + } + } + }; + + const bool preferPersistent = geometry == currentGeometry ? guiControlled.usePersistentWorkGroups : false; + const bool preferRWMC = geometry == currentGeometry ? guiControlled.useRWMC : false; + switch (geometry) + { + case ELG_SPHERE: + enqueueForMethods({ EPM_SOLID_ANGLE }, preferPersistent, preferRWMC); + break; + case ELG_TRIANGLE: + { + switch (preferredMethod) + { + case EPM_AREA: + enqueueForMethods({ EPM_AREA, EPM_SOLID_ANGLE, EPM_PROJECTED_SOLID_ANGLE }, preferPersistent, preferRWMC); + break; + case EPM_SOLID_ANGLE: + enqueueForMethods({ EPM_SOLID_ANGLE, EPM_AREA, EPM_PROJECTED_SOLID_ANGLE }, preferPersistent, preferRWMC); + break; + case EPM_PROJECTED_SOLID_ANGLE: + default: + enqueueForMethods({ EPM_PROJECTED_SOLID_ANGLE, EPM_AREA, EPM_SOLID_ANGLE }, preferPersistent, preferRWMC); + break; + } + break; + } + case ELG_RECTANGLE: + enqueueForMethods({ EPM_SOLID_ANGLE }, preferPersistent, preferRWMC); + break; + default: + break; + } + }; + + enqueueRenderVariants(currentGeometry, currentMethod); + for (auto geometry = 0u; geometry < ELG_COUNT; ++geometry) + { + const auto geometryEnum = static_cast(geometry); + if (geometryEnum == currentGeometry) + continue; + enqueueRenderVariants(geometryEnum, currentMethod); + } + enqueueWarmupJob({ .type = SWarmupJob::E_TYPE::Resolve }); + m_pipelineCache.warmup.queuedJobs = m_pipelineCache.warmup.queue.size(); + const auto logicalConcurrency = std::thread::hardware_concurrency(); + m_logger->log( + "PATH_TRACER_PIPELINE_CACHE warmup_start queued_jobs=%zu max_parallel=%zu logical_threads=%u current_geometry=%u current_method=%u", + ILogger::ELL_INFO, + m_pipelineCache.warmup.queuedJobs, + m_pipelineCache.warmup.budget, + logicalConcurrency, + static_cast(currentGeometry), + static_cast(currentMethod) + ); + pumpPipelineWarmup(); + } + + IGPUComputePipeline* pickPTPipeline() + { + return ensureRenderPipeline( + static_cast(guiControlled.PTPipeline), + guiControlled.usePersistentWorkGroups, + guiControlled.useRWMC, + static_cast(guiControlled.polygonMethod) + ); } private: @@ -1542,12 +2980,12 @@ class HLSLComputePathtracer final : public SimpleWindowedApplication, public Bui // gpu resources smart_refctd_ptr m_cmdPool; - std::array, E_LIGHT_GEOMETRY::ELG_COUNT> m_PTHLSLPipelines; - std::array, E_LIGHT_GEOMETRY::ELG_COUNT> m_PTHLSLPersistentWGPipelines; - std::array, E_LIGHT_GEOMETRY::ELG_COUNT> m_PTHLSLPipelinesRWMC; - std::array, E_LIGHT_GEOMETRY::ELG_COUNT> m_PTHLSLPersistentWGPipelinesRWMC; - smart_refctd_ptr m_resolvePipeline; + SRenderPipelineStorage m_renderPipelines; + smart_refctd_ptr m_renderPipelineLayout; + smart_refctd_ptr m_rwmcRenderPipelineLayout; + SResolvePipelineState m_resolvePipelineState; smart_refctd_ptr m_presentPipeline; + IPipelineBase::SUBGROUP_SIZE m_requiredSubgroupSize = IPipelineBase::SUBGROUP_SIZE::UNKNOWN; uint64_t m_realFrameIx = 0; std::array, MaxFramesInFlight> m_cmdBufs; ISimpleManagedSurface::SAcquireResult m_currentImageAcquire = {}; @@ -1590,7 +3028,7 @@ class HLSLComputePathtracer final : public SimpleWindowedApplication, public Bui video::CDumbPresentationOracle m_oracle; - uint16_t gcIndex = {}; // note: this is dirty however since I assume only single object in scene I can leave it now, when this example is upgraded to support multiple objects this needs to be changed + uint16_t gcIndex = {}; struct GUIControllables { @@ -1599,6 +3037,7 @@ class HLSLComputePathtracer final : public SimpleWindowedApplication, public Bui float camYAngle = 165.f / 180.f * 3.14159f; float camXAngle = 32.f / 180.f * 3.14159f; int PTPipeline = E_LIGHT_GEOMETRY::ELG_SPHERE; + int polygonMethod = EPM_PROJECTED_SOLID_ANGLE; int spp = 32; int depth = 3; rwmc::SResolveParameters::SCreateParams rwmcParams; @@ -1615,7 +3054,10 @@ class HLSLComputePathtracer final : public SimpleWindowedApplication, public Bui }; TransformRequestParams m_transformParams; - bool m_firstFrame = true; + clock_t::time_point m_startupBeganAt = clock_t::now(); + SCommandLineOptions m_commandLine; + SStartupLogState m_startupLog; + SPipelineCacheState m_pipelineCache; IGPUCommandBuffer::SClearColorValue clearColor = { .float32 = {0.f,0.f,0.f,1.f} }; }; diff --git a/31_HLSLPathTracer/path_tracer.runtime.json.in b/31_HLSLPathTracer/path_tracer.runtime.json.in new file mode 100644 index 000000000..29177c49f --- /dev/null +++ b/31_HLSLPathTracer/path_tracer.runtime.json.in @@ -0,0 +1,3 @@ +{ + "cache_root": "@PT_CACHE_ROOT_JSON@" +} diff --git a/31_HLSLPathTracer/pt.cmake b/31_HLSLPathTracer/pt.cmake new file mode 100644 index 000000000..a8927acdb --- /dev/null +++ b/31_HLSLPathTracer/pt.cmake @@ -0,0 +1,35 @@ +macro(PT_APPEND_SPIRV_RULE) + set(options) + set(oneValueArgs VAR INPUT KEY) + set(multiValueArgs COMPILE_OPTIONS) + cmake_parse_arguments(PT_RULE "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + if(PT_RULE_INPUT STREQUAL "" OR PT_RULE_KEY STREQUAL "") + message(FATAL_ERROR "PT_APPEND_SPIRV_RULE requires INPUT and KEY") + endif() + set(PT_RULE_JSON "{\"INPUT\":\"${PT_RULE_INPUT}\",\"KEY\":\"${PT_RULE_KEY}\"") + if(PT_RULE_COMPILE_OPTIONS) + set(PT_RULE_COMPILE_OPTIONS_JSON "") + foreach(PT_RULE_COMPILE_OPTION IN LISTS PT_RULE_COMPILE_OPTIONS) + string(APPEND PT_RULE_COMPILE_OPTIONS_JSON "\"${PT_RULE_COMPILE_OPTION}\",") + endforeach() + string(REGEX REPLACE ",$" "" PT_RULE_COMPILE_OPTIONS_JSON "${PT_RULE_COMPILE_OPTIONS_JSON}") + string(APPEND PT_RULE_JSON ",\"COMPILE_OPTIONS\":[${PT_RULE_COMPILE_OPTIONS_JSON}]") + endif() + string(APPEND PT_RULE_JSON "}") + list(APPEND ${PT_RULE_VAR} "${PT_RULE_JSON}") +endmacro() + +macro(PT_FINALIZE_JSON_PAYLOAD) + set(oneValueArgs INOUT) + cmake_parse_arguments(PT_PAYLOAD "" "${oneValueArgs}" "" ${ARGN}) + if(PT_PAYLOAD_INOUT STREQUAL "") + message(FATAL_ERROR "PT_FINALIZE_JSON_PAYLOAD requires INOUT") + endif() + string(JOIN ",\n" PT_RULES_BODY ${${PT_PAYLOAD_INOUT}}) + set(PT_PAYLOAD_TEMPLATE [=[ +[ +@PT_RULES_BODY@ +] +]=]) + string(CONFIGURE "${PT_PAYLOAD_TEMPLATE}" ${PT_PAYLOAD_INOUT} @ONLY) +endmacro() diff --git a/31_HLSLPathTracer/pt.variant_ids.cmake b/31_HLSLPathTracer/pt.variant_ids.cmake new file mode 100644 index 000000000..db0bf6f12 --- /dev/null +++ b/31_HLSLPathTracer/pt.variant_ids.cmake @@ -0,0 +1,6 @@ +set(PT_SCENE_SPHERE 0) +set(PT_SCENE_TRIANGLE 1) +set(PT_SCENE_RECTANGLE 2) +set(PT_ENTRYPOINT_COMBINED 0) +set(PT_ENTRYPOINT_LINEAR 1) +set(PT_ENTRYPOINT_PERSISTENT 2)