Precompile and cache EX31 path tracer variants#262
Precompile and cache EX31 path tracer variants#262AnastaZIuk wants to merge 29 commits intomasterfrom
Conversation
| // Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. | ||
| // This file is part of the "Nabla Engine". | ||
| // For conditions of distribution and use, see copyright notice in nabla.h | ||
| #ifndef _NBL_BUILTIN_HLSL_BXDF_NDF_MICROFACET_LIGHT_TRANSFORM_INCLUDED_ | ||
| #define _NBL_BUILTIN_HLSL_BXDF_NDF_MICROFACET_LIGHT_TRANSFORM_INCLUDED_ | ||
|
|
||
| #include "nbl/builtin/hlsl/limits.hlsl" | ||
| #include "nbl/builtin/hlsl/bxdf/common.hlsl" | ||
|
|
||
| namespace nbl | ||
| { | ||
| namespace hlsl | ||
| { | ||
| namespace bxdf | ||
| { | ||
| namespace ndf | ||
| { | ||
|
|
||
| enum MicrofacetTransformTypes : uint16_t | ||
| { | ||
| MTT_REFLECT = 0b01, | ||
| MTT_REFRACT = 0b10, | ||
| MTT_REFLECT_REFRACT = 0b11 | ||
| }; | ||
|
|
||
| namespace microfacet_transform_concepts | ||
| { | ||
| #define NBL_CONCEPT_NAME QuantQuery | ||
| #define NBL_CONCEPT_TPLT_PRM_KINDS (typename) | ||
| #define NBL_CONCEPT_TPLT_PRM_NAMES (T) | ||
| #define NBL_CONCEPT_PARAM_0 (query, T) | ||
| NBL_CONCEPT_BEGIN(1) | ||
| #define query NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 | ||
| NBL_CONCEPT_END( | ||
| ((NBL_CONCEPT_REQ_TYPE)(T::scalar_type)) | ||
| ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((query.getVdotHLdotH()), ::nbl::hlsl::is_same_v, typename T::scalar_type)) | ||
| ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((query.getNeg_rcp2_VdotH_etaLdotH()), ::nbl::hlsl::is_same_v, typename T::scalar_type)) | ||
| ); | ||
| #undef query | ||
| #include <nbl/builtin/hlsl/concepts/__end.hlsl> | ||
| } | ||
|
|
||
| template<typename T> | ||
| struct DualMeasureQuantQuery | ||
| { | ||
| using scalar_type = T; | ||
|
|
||
| template<class Interaction, class MicrofacetCache> | ||
| static DualMeasureQuantQuery<T> create(NBL_CONST_REF_ARG(Interaction) interaction, NBL_CONST_REF_ARG(MicrofacetCache) cache, scalar_type orientedEta) | ||
| { | ||
| DualMeasureQuantQuery<T> retval; | ||
| retval.VdotHLdotH = cache.getVdotHLdotH(); | ||
| const scalar_type VdotH = cache.getVdotH(); | ||
| const scalar_type VdotH_etaLdotH = hlsl::mix(VdotH + orientedEta * cache.getLdotH(), |
There was a problem hiding this comment.
whats this file doing here!?
31_HLSLPathTracer/main.cpp
Outdated
| enum E_POLYGON_METHOD : uint8_t | ||
| { | ||
| EPM_AREA, | ||
| EPM_SOLID_ANGLE, | ||
| EPM_PROJECTED_SOLID_ANGLE, | ||
| EPM_COUNT | ||
| }; |
There was a problem hiding this comment.
why not a shared enum in HLSL common include?
There was a problem hiding this comment.
moved to separate header here but I think I might slap it into nabla
There was a problem hiding this comment.
not Nabla its a thing only for this example
31_HLSLPathTracer/main.cpp
Outdated
| const char* shaderNames[E_LIGHT_GEOMETRY::ELG_COUNT] = { | ||
| "ELG_SPHERE", | ||
| "ELG_TRIANGLE", | ||
| "ELG_RECTANGLE" | ||
| }; | ||
| const char* polygonMethodNames[EPM_COUNT] = { | ||
| "Area", | ||
| "Solid Angle", | ||
| "Projected Solid Angle" | ||
| }; |
There was a problem hiding this comment.
system::to_string_helper specializations is now our preferred way
There was a problem hiding this comment.
resolved + moved to separate header here but I think I might slap it into nabla
There was a problem hiding this comment.
not Nabla its a thing only for this example
| // TODO: do this better use asset manager to get the ICPUBuffer from `.bin` | ||
| auto createBufferFromCacheFile = [this]( | ||
| system::path filename, | ||
| size_t bufferSize, | ||
| void *data, | ||
| const system::path& filePath, | ||
| size_t byteSize, | ||
| void* data, | ||
| smart_refctd_ptr<ICPUBuffer>& buffer | ||
| ) -> std::pair<smart_refctd_ptr<IFile>, bool> | ||
| ) -> bool | ||
| { | ||
| ISystem::future_t<smart_refctd_ptr<nbl::system::IFile>> owenSamplerFileFuture; | ||
| ISystem::future_t<size_t> owenSamplerFileReadFuture; | ||
| size_t owenSamplerFileBytesRead; | ||
| size_t owenSamplerFileBytesRead = 0ull; | ||
|
|
||
| m_system->createFile(owenSamplerFileFuture, localOutputCWD / filename, IFile::ECF_READ); | ||
| m_system->createFile(owenSamplerFileFuture, filePath, IFile::ECF_READ); | ||
| smart_refctd_ptr<IFile> owenSamplerFile; | ||
|
|
||
| if (owenSamplerFileFuture.wait()) | ||
| { | ||
| owenSamplerFileFuture.acquire().move_into(owenSamplerFile); | ||
| if (!owenSamplerFile) | ||
| return { nullptr, false }; | ||
| return false; | ||
|
|
||
| owenSamplerFile->read(owenSamplerFileReadFuture, data, 0, bufferSize); | ||
| owenSamplerFile->read(owenSamplerFileReadFuture, data, 0, byteSize); | ||
| if (owenSamplerFileReadFuture.wait()) | ||
| { | ||
| owenSamplerFileReadFuture.acquire().move_into(owenSamplerFileBytesRead); | ||
|
|
||
| if (owenSamplerFileBytesRead < bufferSize) | ||
| { | ||
| buffer = asset::ICPUBuffer::create({ sizeof(uint32_t) * bufferSize }); | ||
| return { owenSamplerFile, false }; | ||
| } | ||
| if (owenSamplerFileBytesRead < byteSize) | ||
| return false; | ||
|
|
||
| buffer = asset::ICPUBuffer::create({ { sizeof(uint32_t) * bufferSize }, data }); | ||
| buffer = asset::ICPUBuffer::create({ { byteSize }, data }); | ||
| return true; | ||
| } | ||
| } | ||
|
|
||
| return { owenSamplerFile, true }; | ||
| return false; | ||
| }; | ||
| auto writeBufferIntoCacheFile = [this](smart_refctd_ptr<IFile> file, size_t bufferSize, void* data) | ||
| auto writeBufferIntoCacheFile = [this](const system::path& filePath, size_t byteSize, const void* data) | ||
| { | ||
| std::filesystem::create_directories(filePath.parent_path()); | ||
|
|
||
| ISystem::future_t<smart_refctd_ptr<nbl::system::IFile>> owenSamplerFileFuture; | ||
| ISystem::future_t<size_t> owenSamplerFileWriteFuture; | ||
| size_t owenSamplerFileBytesWritten; | ||
| size_t owenSamplerFileBytesWritten = 0ull; | ||
|
|
||
| file->write(owenSamplerFileWriteFuture, data, 0, bufferSize); | ||
| m_system->createFile(owenSamplerFileFuture, filePath, IFile::ECF_WRITE); | ||
| if (!owenSamplerFileFuture.wait()) | ||
| return; | ||
|
|
||
| smart_refctd_ptr<IFile> file; | ||
| owenSamplerFileFuture.acquire().move_into(file); | ||
| if (!file) | ||
| return; | ||
|
|
||
| file->write(owenSamplerFileWriteFuture, const_cast<void*>(data), 0, byteSize); | ||
| if (owenSamplerFileWriteFuture.wait()) | ||
| owenSamplerFileWriteFuture.acquire().move_into(owenSamplerFileBytesWritten); | ||
| }; |
There was a problem hiding this comment.
@keptsecret can you put it in a header in a examples_common so that example 40 can use this cache and generate Scrambled Owen Sequence as well and we dont have a huge block of code like this sitting duplicate in both examples?
| template<typename T> | ||
| struct RuntimeShapeSamplingSelector<T, PST_SPHERE> | ||
| { | ||
| using scalar_type = T; | ||
| using vector3_type = vector<T, 3>; | ||
| using shape_type = Shape<T, PST_SPHERE>; | ||
|
|
||
| template<typename Ray> | ||
| static scalar_type deferredPdf(NEEPolygonMethod, NBL_CONST_REF_ARG(shape_type) shape, NBL_CONST_REF_ARG(Ray) ray) | ||
| { | ||
| const ShapeSampling<scalar_type, PST_SPHERE, PPM_SOLID_ANGLE> sampling = ShapeSampling<scalar_type, PST_SPHERE, PPM_SOLID_ANGLE>::create(shape); | ||
| return sampling.template deferredPdf<Ray>(ray); | ||
| } | ||
|
|
||
| template<class Aniso> | ||
| static vector3_type generate_and_pdf(NEEPolygonMethod, NBL_REF_ARG(scalar_type) pdf, NBL_REF_ARG(scalar_type) newRayMaxT, NBL_CONST_REF_ARG(shape_type) shape, NBL_CONST_REF_ARG(vector3_type) origin, NBL_CONST_REF_ARG(Aniso) interaction, NBL_CONST_REF_ARG(vector3_type) xi) | ||
| { | ||
| const ShapeSampling<scalar_type, PST_SPHERE, PPM_SOLID_ANGLE> sampling = ShapeSampling<scalar_type, PST_SPHERE, PPM_SOLID_ANGLE>::create(shape); | ||
| return sampling.template generate_and_pdf<Aniso>(pdf, newRayMaxT, origin, interaction, xi); | ||
| } | ||
| }; | ||
|
|
||
| template<typename T> | ||
| struct RuntimeShapeSamplingSelector<T, PST_TRIANGLE> | ||
| { | ||
| using scalar_type = T; | ||
| using vector3_type = vector<T, 3>; | ||
| using shape_type = Shape<T, PST_TRIANGLE>; | ||
|
|
||
| template<typename Ray> | ||
| static scalar_type deferredPdf(const NEEPolygonMethod polygonMethod, NBL_CONST_REF_ARG(shape_type) shape, NBL_CONST_REF_ARG(Ray) ray) | ||
| { | ||
| switch (polygonMethod) | ||
| { | ||
| case PPM_AREA: | ||
| { | ||
| const ShapeSampling<scalar_type, PST_TRIANGLE, PPM_AREA> sampling = ShapeSampling<scalar_type, PST_TRIANGLE, PPM_AREA>::create(shape); | ||
| return sampling.template deferredPdf<Ray>(ray); | ||
| } | ||
| case PPM_SOLID_ANGLE: | ||
| { | ||
| const ShapeSampling<scalar_type, PST_TRIANGLE, PPM_SOLID_ANGLE> sampling = ShapeSampling<scalar_type, PST_TRIANGLE, PPM_SOLID_ANGLE>::create(shape); | ||
| return sampling.template deferredPdf<Ray>(ray); | ||
| } | ||
| case PPM_APPROX_PROJECTED_SOLID_ANGLE: | ||
| default: | ||
| { | ||
| const ShapeSampling<scalar_type, PST_TRIANGLE, PPM_APPROX_PROJECTED_SOLID_ANGLE> sampling = ShapeSampling<scalar_type, PST_TRIANGLE, PPM_APPROX_PROJECTED_SOLID_ANGLE>::create(shape); | ||
| return sampling.template deferredPdf<Ray>(ray); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| template<class Aniso> | ||
| static vector3_type generate_and_pdf(const NEEPolygonMethod polygonMethod, NBL_REF_ARG(scalar_type) pdf, NBL_REF_ARG(scalar_type) newRayMaxT, NBL_CONST_REF_ARG(shape_type) shape, NBL_CONST_REF_ARG(vector3_type) origin, NBL_CONST_REF_ARG(Aniso) interaction, NBL_CONST_REF_ARG(vector3_type) xi) | ||
| { | ||
| switch (polygonMethod) | ||
| { | ||
| case PPM_AREA: | ||
| { | ||
| const ShapeSampling<scalar_type, PST_TRIANGLE, PPM_AREA> sampling = ShapeSampling<scalar_type, PST_TRIANGLE, PPM_AREA>::create(shape); | ||
| return sampling.template generate_and_pdf<Aniso>(pdf, newRayMaxT, origin, interaction, xi); | ||
| } | ||
| case PPM_SOLID_ANGLE: | ||
| { | ||
| const ShapeSampling<scalar_type, PST_TRIANGLE, PPM_SOLID_ANGLE> sampling = ShapeSampling<scalar_type, PST_TRIANGLE, PPM_SOLID_ANGLE>::create(shape); | ||
| return sampling.template generate_and_pdf<Aniso>(pdf, newRayMaxT, origin, interaction, xi); | ||
| } | ||
| case PPM_APPROX_PROJECTED_SOLID_ANGLE: | ||
| default: | ||
| { | ||
| const ShapeSampling<scalar_type, PST_TRIANGLE, PPM_APPROX_PROJECTED_SOLID_ANGLE> sampling = ShapeSampling<scalar_type, PST_TRIANGLE, PPM_APPROX_PROJECTED_SOLID_ANGLE>::create(shape); | ||
| return sampling.template generate_and_pdf<Aniso>(pdf, newRayMaxT, origin, interaction, xi); | ||
| } | ||
| } | ||
| } | ||
| }; | ||
|
|
||
| template<typename T> | ||
| struct RuntimeShapeSamplingSelector<T, PST_RECTANGLE> | ||
| { | ||
| using scalar_type = T; | ||
| using vector3_type = vector<T, 3>; | ||
| using shape_type = Shape<T, PST_RECTANGLE>; | ||
|
|
||
| template<typename Ray> | ||
| static scalar_type deferredPdf(const NEEPolygonMethod polygonMethod, NBL_CONST_REF_ARG(shape_type) shape, NBL_CONST_REF_ARG(Ray) ray) | ||
| { | ||
| switch (polygonMethod) | ||
| { | ||
| case PPM_AREA: | ||
| { | ||
| const ShapeSampling<scalar_type, PST_RECTANGLE, PPM_AREA> sampling = ShapeSampling<scalar_type, PST_RECTANGLE, PPM_AREA>::create(shape); | ||
| return sampling.template deferredPdf<Ray>(ray); | ||
| } | ||
| case PPM_SOLID_ANGLE: | ||
| case PPM_APPROX_PROJECTED_SOLID_ANGLE: | ||
| default: | ||
| { | ||
| const ShapeSampling<scalar_type, PST_RECTANGLE, PPM_SOLID_ANGLE> sampling = ShapeSampling<scalar_type, PST_RECTANGLE, PPM_SOLID_ANGLE>::create(shape); | ||
| return sampling.template deferredPdf<Ray>(ray); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| template<class Aniso> | ||
| static vector3_type generate_and_pdf(const NEEPolygonMethod polygonMethod, NBL_REF_ARG(scalar_type) pdf, NBL_REF_ARG(scalar_type) newRayMaxT, NBL_CONST_REF_ARG(shape_type) shape, NBL_CONST_REF_ARG(vector3_type) origin, NBL_CONST_REF_ARG(Aniso) interaction, NBL_CONST_REF_ARG(vector3_type) xi) | ||
| { | ||
| switch (polygonMethod) | ||
| { | ||
| case PPM_AREA: | ||
| { | ||
| const ShapeSampling<scalar_type, PST_RECTANGLE, PPM_AREA> sampling = ShapeSampling<scalar_type, PST_RECTANGLE, PPM_AREA>::create(shape); | ||
| return sampling.template generate_and_pdf<Aniso>(pdf, newRayMaxT, origin, interaction, xi); | ||
| } | ||
| case PPM_SOLID_ANGLE: | ||
| case PPM_APPROX_PROJECTED_SOLID_ANGLE: | ||
| default: | ||
| { | ||
| const ShapeSampling<scalar_type, PST_RECTANGLE, PPM_SOLID_ANGLE> sampling = ShapeSampling<scalar_type, PST_RECTANGLE, PPM_SOLID_ANGLE>::create(shape); | ||
| return sampling.template generate_and_pdf<Aniso>(pdf, newRayMaxT, origin, interaction, xi); | ||
| } | ||
| } | ||
| } | ||
| }; | ||
|
|
There was a problem hiding this comment.
Undo, you just went from 10ms to 80+
There was a problem hiding this comment.
resolved resolved resolved
| #ifndef PATH_TRACER_ENTRYPOINT_NAME | ||
| #define PATH_TRACER_ENTRYPOINT_NAME mainPersistent | ||
| #endif | ||
|
|
||
| #ifndef PATH_TRACER_ENTRYPOINT_POLYGON_METHOD | ||
| #ifdef PATH_TRACER_RUNTIME_POLYGON_METHOD | ||
| #define PATH_TRACER_ENTRYPOINT_POLYGON_METHOD PATH_TRACER_RUNTIME_POLYGON_METHOD | ||
| #else | ||
| #define PATH_TRACER_ENTRYPOINT_POLYGON_METHOD PPM_APPROX_PROJECTED_SOLID_ANGLE | ||
| #endif | ||
| #endif | ||
|
|
||
| #if !PATH_TRACER_ENABLE_PERSISTENT | ||
| #error Persistent entrypoint requested while PATH_TRACER_ENABLE_PERSISTENT is disabled | ||
| #endif |
There was a problem hiding this comment.
why not just make a Config struct with NBL_CONSTEXPR_STATIC_INLINE members and then we don't need to change definitions, so:
- Boost Wave preprocessing is reusable
- Clang-AST actually spots similar instantiations
- Unity Build is possible
| #if PATH_TRACER_USE_RWMC | ||
| accumulator_type accumulator = accumulator_type::create(::pc.splattingParameters); | ||
| #else | ||
| accumulator_type accumulator = accumulator_type::create(); | ||
| #endif | ||
|
|
||
| for (int i = 0; i < renderPushConstants.sampleCount; ++i) | ||
| { | ||
| const float32_t3 uvw = pathtracer.randGen(0u, i); | ||
| ray_type ray = rayGen.generate(uvw); | ||
| ray.initPayload(); | ||
| pathtracer.sampleMeasure(ray, i, renderPushConstants.depth, accumulator); | ||
| } | ||
|
|
||
| #if PATH_TRACER_USE_RWMC | ||
| for (uint32_t i = 0; i < CascadeCount; ++i) | ||
| ::cascade[uint3(coords.x, coords.y, i)] = float32_t4(accumulator.accumulation.data[i], 1.0f); | ||
| #else | ||
| ::outImage[uint3(coords.x, coords.y, 0)] = float32_t4(accumulator.accumulation, 1.0); | ||
| #endif | ||
| } | ||
|
|
||
| #if PATH_TRACER_ENABLE_LINEAR | ||
| void runLinear(uint32_t3 threadID, NEEPolygonMethod polygonMethod) | ||
| { | ||
| uint32_t width, height, imageArraySize; | ||
| ::outImage.GetDimensions(width, height, imageArraySize); | ||
| tracePixel(int32_t2(threadID.x % width, threadID.x / width), polygonMethod); | ||
| } | ||
| #endif | ||
|
|
||
| #if PATH_TRACER_ENABLE_PERSISTENT | ||
| void runPersistent(NEEPolygonMethod polygonMethod) | ||
| { | ||
| uint32_t width, height, imageArraySize; | ||
| ::outImage.GetDimensions(width, height, imageArraySize); | ||
| const uint32_t numWorkgroupsX = width / RenderWorkgroupSizeSqrt; | ||
| const uint32_t numWorkgroupsY = height / RenderWorkgroupSizeSqrt; | ||
|
|
||
| [loop] | ||
| for (uint32_t wgBase = glsl::gl_WorkGroupID().x; wgBase < numWorkgroupsX * numWorkgroupsY; wgBase += glsl::gl_NumWorkGroups().x) | ||
| { | ||
| const int32_t2 wgCoords = int32_t2(wgBase % numWorkgroupsX, wgBase / numWorkgroupsX); | ||
| morton::code<true, 32, 2> mc; | ||
| mc.value = glsl::gl_LocalInvocationIndex().x; | ||
| const int32_t2 localCoords = _static_cast<int32_t2>(mc); | ||
| tracePixel(wgCoords * int32_t2(RenderWorkgroupSizeSqrt, RenderWorkgroupSizeSqrt) + localCoords, polygonMethod); | ||
| } | ||
| } | ||
| #endif |
There was a problem hiding this comment.
a lot of this stuff would be nicer if you packaged it into a struct with constexpr members of templates and used NBL_IF_CONSTEXPR whenever possible
| video::SPhysicalDeviceLimits retval = device_base_t::getRequiredDeviceLimits(); | ||
| retval.storagePushConstant16 = true; | ||
| auto retval = device_base_t::getPreferredDeviceFeatures(); | ||
| retval.pipelineExecutableInfo = true; |
There was a problem hiding this comment.
also output the pipeline info like the example used to
| const ImGuiViewport* viewport = ImGui::GetMainViewport(); | ||
| const ImVec2 viewportPos = viewport->Pos; | ||
| const ImVec2 viewportSize = viewport->Size; | ||
| const ImGuiStyle& style = ImGui::GetStyle(); | ||
| const float panelMargin = 10.f; | ||
| const auto currentGeometry = static_cast<E_LIGHT_GEOMETRY>(guiControlled.PTPipeline); | ||
| const auto requestedMethod = static_cast<E_POLYGON_METHOD>(guiControlled.polygonMethod); | ||
| const auto currentVariant = getRenderVariantInfo(currentGeometry, guiControlled.usePersistentWorkGroups, requestedMethod); | ||
| const size_t readyRenderPipelines = getReadyRenderPipelineCount(); | ||
| const size_t totalRenderPipelines = getKnownRenderPipelineCount(); | ||
| const size_t readyTotalPipelines = readyRenderPipelines + (m_resolvePipelineState.pipeline ? 1ull : 0ull); | ||
| const size_t totalKnownPipelines = totalRenderPipelines + 1ull; | ||
| const size_t runningPipelineBuilds = getRunningPipelineBuildCount(); | ||
| const size_t queuedPipelineBuilds = m_pipelineCache.warmup.queue.size(); | ||
| const bool warmupInProgress = m_startupLog.hasPathtraceOutput && !m_pipelineCache.warmup.loggedComplete; | ||
| const char* const effectiveEntryPoint = currentVariant.entryPoint; | ||
| struct SFloatSliderRow | ||
| { | ||
| const char* label; | ||
| float* value; | ||
| float min; | ||
| float max; | ||
| const char* format; | ||
| }; | ||
| struct SIntSliderRow | ||
| { | ||
| const char* label; | ||
| int* value; | ||
| int min; | ||
| int max; | ||
| }; | ||
| struct SCheckboxRow | ||
| { | ||
| const char* label; | ||
| bool* value; | ||
| }; | ||
| struct SComboRow | ||
| { | ||
| const char* label; | ||
| int* value; | ||
| const char* const* items; | ||
| int count; | ||
| }; | ||
| struct STextRow | ||
| { | ||
| const char* label; | ||
| std::string value; | ||
| }; | ||
| const auto calcMaxTextWidth = [](const auto& items, auto&& toText) -> float | ||
| { | ||
| float width = 0.f; | ||
| for (const auto& item : items) | ||
| width = std::max(width, ImGui::CalcTextSize(toText(item)).x); | ||
| return width; | ||
| }; | ||
| const auto makeReadyText = [](const size_t ready, const size_t total) -> std::string | ||
| { | ||
| return std::to_string(ready) + "/" + std::to_string(total); | ||
| }; | ||
| const auto makeRunQueueText = [](const size_t running, const size_t queued) -> std::string | ||
| { | ||
| return std::to_string(running) + " / " + std::to_string(queued); | ||
| }; | ||
| const auto& shaderNames = this_example::getLightGeometryNamePointers(); | ||
| const auto& polygonMethodNames = this_example::getPolygonMethodNamePointers(); | ||
| const std::string pipelineStatusText = !m_startupLog.hasPathtraceOutput ? | ||
| "Building pipeline..." : | ||
| (warmupInProgress ? | ||
| ("Warmup " + std::to_string(readyTotalPipelines) + "/" + std::to_string(totalKnownPipelines)) : | ||
| "All pipelines ready"); | ||
| const std::string cacheStateText = m_pipelineCache.loadedFromDisk ? "loaded from disk" : "cold start"; | ||
| const std::string trimCacheText = std::to_string(m_pipelineCache.trimmedShaders.loadedFromDiskCount + m_pipelineCache.trimmedShaders.generatedCount) + " ready"; | ||
| const std::string parallelismText = std::to_string(m_pipelineCache.warmup.budget); | ||
| const std::string renderStateText = makeReadyText(readyTotalPipelines, totalKnownPipelines); | ||
| const std::string warmupStateText = makeRunQueueText(runningPipelineBuilds, queuedPipelineBuilds); | ||
| const std::string cursorText = "cursor " + std::to_string(static_cast<int>(io.MousePos.x)) + " " + std::to_string(static_cast<int>(io.MousePos.y)); | ||
| const SFloatSliderRow cameraFloatRows[] = { | ||
| { "move", &guiControlled.moveSpeed, 0.1f, 10.f, "%.2f" }, | ||
| { "rotate", &guiControlled.rotateSpeed, 0.1f, 10.f, "%.2f" }, | ||
| { "fov", &guiControlled.fov, 20.f, 150.f, "%.0f" }, | ||
| { "zNear", &guiControlled.zNear, 0.1f, 100.f, "%.2f" }, | ||
| { "zFar", &guiControlled.zFar, 110.f, 10000.f, "%.0f" }, | ||
| }; | ||
| const SComboRow renderComboRows[] = { | ||
| { "shader", &guiControlled.PTPipeline, shaderNames.data(), static_cast<int>(shaderNames.size()) }, | ||
| { "method", &guiControlled.polygonMethod, polygonMethodNames.data(), static_cast<int>(polygonMethodNames.size()) }, | ||
| }; | ||
| const SIntSliderRow renderIntRows[] = { | ||
| { "spp", &guiControlled.spp, 1, MaxSamplesBuffer }, | ||
| { "depth", &guiControlled.depth, 1, MaxBufferDimensions / 4 }, | ||
| }; | ||
| const SCheckboxRow renderCheckboxRows[] = { | ||
| { "persistent WG", &guiControlled.usePersistentWorkGroups }, | ||
| }; | ||
| const SCheckboxRow rwmcCheckboxRows[] = { | ||
| { "enable", &guiControlled.useRWMC }, | ||
| }; | ||
| const SFloatSliderRow rwmcFloatRows[] = { | ||
| { "start", &guiControlled.rwmcParams.start, 1.0f, 32.0f, "%.3f" }, | ||
| { "base", &guiControlled.rwmcParams.base, 1.0f, 32.0f, "%.3f" }, | ||
| { "min rel.", &guiControlled.rwmcParams.minReliableLuma, 0.1f, 1024.0f, "%.3f" }, | ||
| { "kappa", &guiControlled.rwmcParams.kappa, 0.1f, 1024.0f, "%.3f" }, | ||
| }; | ||
| const STextRow diagnosticsRows[] = { | ||
| { "geometry", system::to_string(currentGeometry) }, | ||
| { "req. method", system::to_string(requestedMethod) }, | ||
| { "eff. method", system::to_string(currentVariant.effectiveMethod) }, | ||
| { "entrypoint", effectiveEntryPoint }, | ||
| { "config", std::string(BuildConfigName) }, | ||
| { "cache", cacheStateText }, | ||
| { "trim cache", trimCacheText }, | ||
| { "parallel", parallelismText }, | ||
| { "render", renderStateText }, | ||
| { "run/queue", warmupStateText }, | ||
| }; | ||
| const char* const standaloneTexts[] = { | ||
| "PATH_TRACER", | ||
| "Home camera End light", | ||
| pipelineStatusText.c_str(), | ||
| cursorText.c_str(), | ||
| }; | ||
| const char* const sliderPreviewTexts[] = { | ||
| "10000.000", | ||
| "1024.000", | ||
| effectiveEntryPoint, | ||
| BuildConfigName.data(), | ||
| cacheStateText.c_str(), | ||
| renderStateText.c_str(), | ||
| warmupStateText.c_str(), | ||
| }; | ||
| const float maxStandaloneTextWidth = calcMaxTextWidth(standaloneTexts, [](const char* text) { return text; }); | ||
| const float maxLabelTextWidth = std::max({ | ||
| calcMaxTextWidth(cameraFloatRows, [](const auto& row) { return row.label; }), | ||
| calcMaxTextWidth(renderComboRows, [](const auto& row) { return row.label; }), | ||
| calcMaxTextWidth(renderIntRows, [](const auto& row) { return row.label; }), | ||
| calcMaxTextWidth(renderCheckboxRows, [](const auto& row) { return row.label; }), | ||
| calcMaxTextWidth(rwmcCheckboxRows, [](const auto& row) { return row.label; }), | ||
| calcMaxTextWidth(rwmcFloatRows, [](const auto& row) { return row.label; }), | ||
| calcMaxTextWidth(diagnosticsRows, [](const auto& row) { return row.label; }) | ||
| }); | ||
| const float comboPreviewWidth = std::max( | ||
| calcMaxTextWidth(shaderNames, [](const char* text) { return text; }), | ||
| calcMaxTextWidth(polygonMethodNames, [](const char* text) { return text; }) | ||
| ); | ||
| const float sliderPreviewWidth = calcMaxTextWidth(sliderPreviewTexts, [](const char* text) { return text; }); | ||
| const float tableLabelColumnWidth = std::ceil(maxLabelTextWidth + style.FramePadding.x * 2.f + style.CellPadding.x * 2.f); | ||
| const float tableValueColumnMinWidth = | ||
| std::ceil(std::max(comboPreviewWidth, sliderPreviewWidth) + style.FramePadding.x * 2.f + style.ItemInnerSpacing.x + ImGui::GetFrameHeight() + 18.f); | ||
| const float sectionTableWidth = tableLabelColumnWidth + tableValueColumnMinWidth + style.CellPadding.x * 4.f + style.ItemSpacing.x; | ||
| const float contentWidth = std::max(maxStandaloneTextWidth, sectionTableWidth); | ||
| const float panelWidth = std::min( | ||
| std::ceil(contentWidth + style.WindowPadding.x * 2.f), | ||
| std::max(0.f, viewportSize.x - panelMargin * 2.f) | ||
| ); | ||
| const float panelMaxHeight = ImMax(300.0f, viewportSize.y * 0.84f); | ||
| ImGui::SetNextWindowPos(ImVec2(viewportPos.x + panelMargin, viewportPos.y + panelMargin), ImGuiCond_Always); | ||
| ImGui::SetNextWindowSizeConstraints(ImVec2(panelWidth, 0.0f), ImVec2(panelWidth, panelMaxHeight)); | ||
| ImGui::SetNextWindowBgAlpha(0.72f); | ||
| ImGui::PushStyleVar(ImGuiStyleVar_WindowPadding, ImVec2(5.f, 5.f)); | ||
| ImGui::PushStyleVar(ImGuiStyleVar_WindowRounding, 10.f); | ||
| ImGui::PushStyleVar(ImGuiStyleVar_FrameRounding, 4.f); | ||
| ImGui::PushStyleVar(ImGuiStyleVar_GrabRounding, 4.f); | ||
| ImGui::PushStyleVar(ImGuiStyleVar_ItemSpacing, ImVec2(5.f, 2.f)); | ||
| ImGui::PushStyleColor(ImGuiCol_WindowBg, ImVec4(0.08f, 0.10f, 0.13f, 0.88f)); | ||
| ImGui::PushStyleColor(ImGuiCol_Border, ImVec4(0.32f, 0.39f, 0.47f, 0.65f)); | ||
| ImGui::PushStyleColor(ImGuiCol_Header, ImVec4(0.18f, 0.28f, 0.36f, 0.92f)); | ||
| ImGui::PushStyleColor(ImGuiCol_HeaderHovered, ImVec4(0.24f, 0.36f, 0.46f, 0.96f)); | ||
| ImGui::PushStyleColor(ImGuiCol_HeaderActive, ImVec4(0.28f, 0.42f, 0.54f, 1.0f)); | ||
|
|
||
| const ImGuiWindowFlags panelFlags = | ||
| ImGuiWindowFlags_NoDecoration | | ||
| ImGuiWindowFlags_NoMove | | ||
| ImGuiWindowFlags_NoSavedSettings | | ||
| ImGuiWindowFlags_NoNav | | ||
| ImGuiWindowFlags_AlwaysAutoResize | | ||
| ImGuiWindowFlags_NoResize; | ||
|
|
||
| const auto beginSectionTable = [](const char* id) -> bool | ||
| { | ||
| return ImGui::BeginTable(id, 2, ImGuiTableFlags_SizingFixedFit); | ||
| }; | ||
| const auto setupSectionTable = [tableLabelColumnWidth]() -> void | ||
| { | ||
| ImGui::TableSetupColumn("label", ImGuiTableColumnFlags_WidthFixed, tableLabelColumnWidth); | ||
| ImGui::TableSetupColumn("value", ImGuiTableColumnFlags_WidthStretch); | ||
| }; | ||
| const auto sliderFloatRow = [](const SFloatSliderRow& row) -> void | ||
| { | ||
| ImGui::TableNextRow(); | ||
| ImGui::TableSetColumnIndex(0); | ||
| ImGui::TextUnformatted(row.label); | ||
| ImGui::TableSetColumnIndex(1); | ||
| ImGui::SetNextItemWidth(-FLT_MIN); | ||
| ImGui::PushID(row.label); | ||
| ImGui::SliderFloat("##value", row.value, row.min, row.max, row.format, ImGuiSliderFlags_AlwaysClamp); | ||
| ImGui::PopID(); | ||
| }; | ||
| const auto sliderIntRow = [](const SIntSliderRow& row) -> void | ||
| { | ||
| ImGui::TableNextRow(); | ||
| ImGui::TableSetColumnIndex(0); | ||
| ImGui::TextUnformatted(row.label); | ||
| ImGui::TableSetColumnIndex(1); | ||
| ImGui::SetNextItemWidth(-FLT_MIN); | ||
| ImGui::PushID(row.label); | ||
| ImGui::SliderInt("##value", row.value, row.min, row.max); | ||
| ImGui::PopID(); | ||
| }; | ||
| const auto comboRow = [](const SComboRow& row) -> void | ||
| { | ||
| ImGui::TableNextRow(); | ||
| ImGui::TableSetColumnIndex(0); | ||
| ImGui::TextUnformatted(row.label); | ||
| ImGui::TableSetColumnIndex(1); | ||
| ImGui::SetNextItemWidth(-FLT_MIN); | ||
| ImGui::PushID(row.label); | ||
| ImGui::Combo("##value", row.value, row.items, row.count); | ||
| ImGui::PopID(); | ||
| }; | ||
| const auto checkboxRow = [](const SCheckboxRow& row) -> void | ||
| { | ||
| ImGui::TableNextRow(); | ||
| ImGui::TableSetColumnIndex(0); | ||
| ImGui::TextUnformatted(row.label); | ||
| ImGui::TableSetColumnIndex(1); | ||
| ImGui::PushID(row.label); | ||
| ImGui::Checkbox("##value", row.value); | ||
| ImGui::PopID(); | ||
| }; | ||
| const auto textRow = [](const STextRow& row) -> void | ||
| { | ||
| ImGui::TableNextRow(); | ||
| ImGui::TableSetColumnIndex(0); | ||
| ImGui::TextUnformatted(row.label); | ||
| ImGui::TableSetColumnIndex(1); | ||
| ImGui::TextUnformatted(row.value.c_str()); | ||
| }; | ||
|
|
||
| ImGui::Text("Press Home to reset camera."); | ||
| ImGui::Text("Press End to reset light."); | ||
| if (ImGui::Begin("Path Tracer Controls", nullptr, panelFlags)) | ||
| { | ||
| ImGui::TextUnformatted("PATH_TRACER"); | ||
| ImGui::Separator(); | ||
| ImGui::TextDisabled("Home camera End light"); | ||
| if (!m_startupLog.hasPathtraceOutput) | ||
| ImGui::TextColored(ImVec4(0.83f, 0.86f, 0.90f, 1.0f), "Building pipeline..."); | ||
| else if (warmupInProgress) | ||
| ImGui::TextColored(ImVec4(0.83f, 0.86f, 0.90f, 1.0f), "Warmup %zu/%zu", readyTotalPipelines, totalKnownPipelines); | ||
| else | ||
| ImGui::TextDisabled("All pipelines ready"); | ||
| ImGui::Dummy(ImVec2(0.f, 2.f)); | ||
|
|
||
| if (ImGui::CollapsingHeader("Controls", ImGuiTreeNodeFlags_DefaultOpen)) | ||
| { | ||
| if (ImGui::CollapsingHeader("Camera")) | ||
| { | ||
| if (beginSectionTable("##camera_controls_table")) | ||
| { | ||
| setupSectionTable(); | ||
| for (const auto& row : cameraFloatRows) | ||
| sliderFloatRow(row); | ||
| ImGui::EndTable(); | ||
| } | ||
| } | ||
|
|
||
| ImGui::SliderFloat("Move speed", &guiControlled.moveSpeed, 0.1f, 10.f); | ||
| ImGui::SliderFloat("Rotate speed", &guiControlled.rotateSpeed, 0.1f, 10.f); | ||
| ImGui::SliderFloat("Fov", &guiControlled.fov, 20.f, 150.f); | ||
| ImGui::SliderFloat("zNear", &guiControlled.zNear, 0.1f, 100.f); | ||
| ImGui::SliderFloat("zFar", &guiControlled.zFar, 110.f, 10000.f); | ||
| ImGui::Combo("Shader", &guiControlled.PTPipeline, shaderNames, E_LIGHT_GEOMETRY::ELG_COUNT); | ||
| ImGui::SliderInt("SPP", &guiControlled.spp, 1, MaxSamplesBuffer); | ||
| ImGui::SliderInt("Depth", &guiControlled.depth, 1, MaxBufferDimensions / 4); | ||
| ImGui::Checkbox("Persistent WorkGroups", &guiControlled.usePersistentWorkGroups); | ||
| if (ImGui::CollapsingHeader("Render", ImGuiTreeNodeFlags_DefaultOpen)) | ||
| { | ||
| if (beginSectionTable("##render_controls_table")) | ||
| { | ||
| setupSectionTable(); | ||
| for (const auto& row : renderComboRows) | ||
| comboRow(row); | ||
| for (const auto& row : renderIntRows) | ||
| sliderIntRow(row); | ||
| for (const auto& row : renderCheckboxRows) | ||
| checkboxRow(row); | ||
| ImGui::EndTable(); | ||
| } | ||
| } | ||
|
|
||
| ImGui::Text("X: %f Y: %f", io.MousePos.x, io.MousePos.y); | ||
| if (ImGui::CollapsingHeader("RWMC", ImGuiTreeNodeFlags_DefaultOpen)) | ||
| { | ||
| if (beginSectionTable("##rwmc_controls_table")) | ||
| { | ||
| setupSectionTable(); | ||
| for (const auto& row : rwmcCheckboxRows) | ||
| checkboxRow(row); | ||
| for (const auto& row : rwmcFloatRows) | ||
| sliderFloatRow(row); | ||
| ImGui::EndTable(); | ||
| } | ||
| } | ||
|
|
||
| ImGui::Text("\nRWMC settings:"); | ||
| ImGui::Checkbox("Enable RWMC", &guiControlled.useRWMC); | ||
| ImGui::SliderFloat("start", &guiControlled.rwmcParams.start, 1.0f, 32.0f); | ||
| ImGui::SliderFloat("base", &guiControlled.rwmcParams.base, 1.0f, 32.0f); | ||
| ImGui::SliderFloat("minReliableLuma", &guiControlled.rwmcParams.minReliableLuma, 0.1f, 1024.0f); | ||
| ImGui::SliderFloat("kappa", &guiControlled.rwmcParams.kappa, 0.1f, 1024.0f); | ||
| if (ImGui::CollapsingHeader("Diagnostics")) | ||
| { | ||
| if (beginSectionTable("##diagnostics_controls_table")) | ||
| { | ||
| setupSectionTable(); | ||
| for (const auto& row : diagnosticsRows) | ||
| textRow(row); | ||
| ImGui::EndTable(); | ||
| } | ||
| } | ||
|
|
||
| ImGui::Dummy(ImVec2(0.f, 2.f)); | ||
| ImGui::Separator(); | ||
| ImGui::TextDisabled("%s", cursorText.c_str()); | ||
| } | ||
| } | ||
| ImGui::End(); | ||
|
|
||
| if (!m_startupLog.hasPathtraceOutput || warmupInProgress) | ||
| { | ||
| ImGui::SetNextWindowPos(ImVec2(viewportPos.x + viewportSize.x - panelMargin, viewportPos.y + panelMargin), ImGuiCond_Always, ImVec2(1.0f, 0.0f)); | ||
| ImGui::SetNextWindowBgAlpha(0.62f); | ||
| ImGui::PushStyleVar(ImGuiStyleVar_WindowPadding, ImVec2(12.f, 10.f)); | ||
| ImGui::PushStyleVar(ImGuiStyleVar_WindowRounding, 8.f); | ||
| ImGui::PushStyleColor(ImGuiCol_WindowBg, ImVec4(0.07f, 0.09f, 0.12f, 0.90f)); | ||
| const ImGuiWindowFlags overlayFlags = | ||
| ImGuiWindowFlags_NoDecoration | | ||
| ImGuiWindowFlags_NoSavedSettings | | ||
| ImGuiWindowFlags_NoMove | | ||
| ImGuiWindowFlags_NoNav | | ||
| ImGuiWindowFlags_AlwaysAutoResize | | ||
| ImGuiWindowFlags_NoInputs; | ||
| if (ImGui::Begin("##path_tracer_status_overlay", nullptr, overlayFlags)) | ||
| { | ||
| ImGui::TextUnformatted(pipelineStatusText.c_str()); | ||
| ImGui::Text("Run %zu Queue %zu", runningPipelineBuilds, queuedPipelineBuilds); | ||
| ImGui::Text("Cache: %s", m_pipelineCache.loadedFromDisk ? "disk" : "cold"); | ||
| } | ||
| ImGui::End(); | ||
| ImGui::PopStyleColor(1); | ||
| ImGui::PopStyleVar(2); | ||
| } | ||
| ImGui::PopStyleColor(5); | ||
| ImGui::PopStyleVar(5); |
There was a problem hiding this comment.
can we tuck the UI away in a header ?
| template<core::StringLiteral ShaderKey> | ||
| smart_refctd_ptr<IShader> loadPrecompiledShader() | ||
| { | ||
| IAssetLoader::SAssetLoadParams lp = {}; | ||
| lp.logger = m_logger.get(); | ||
| lp.workingDirectory = "app_resources"; | ||
|
|
||
| const auto key = nbl::this_example::builtin::build::get_spirv_key<ShaderKey>(m_device.get()); | ||
| auto assetBundle = m_assetMgr->getAsset(key, lp); | ||
| const auto assets = assetBundle.getContents(); | ||
| if (assets.empty()) | ||
| { | ||
| m_logger->log("Could not load precompiled shader: %s", ILogger::ELL_ERROR, key.c_str()); | ||
| return nullptr; | ||
| } | ||
|
|
||
| IGPUComputePipeline* pickPTPipeline() | ||
| auto shader = IAsset::castDown<IShader>(assets[0]); | ||
| if (!shader) | ||
| { | ||
| m_logger->log("Failed to cast %s asset to IShader!", ILogger::ELL_ERROR, key.c_str()); | ||
| return nullptr; | ||
| } | ||
|
|
||
| shader->setFilePathHint(std::string(std::string_view(ShaderKey.value))); | ||
| return shader; | ||
| } | ||
|
|
||
| void logStartupEvent(const char* const eventName) | ||
| { | ||
| const auto elapsedMs = std::chrono::duration_cast<std::chrono::milliseconds>(clock_t::now() - m_startupBeganAt).count(); | ||
| m_logger->log("PATH_TRACER_STARTUP %s_ms=%lld", ILogger::ELL_INFO, eventName, static_cast<long long>(elapsedMs)); | ||
| } | ||
|
|
||
| bool parseCommandLine() | ||
| { | ||
| argparse::ArgumentParser parser("31_hlslpathtracer"); | ||
| parser.add_argument("--pipeline-cache-dir") | ||
| .nargs(1) | ||
| .help("Override the PATH_TRACER pipeline cache root directory"); | ||
| parser.add_argument("--clear-pipeline-cache") | ||
| .help("Clear the PATH_TRACER cache root before startup") | ||
| .flag(); | ||
|
|
||
| try | ||
| { | ||
| parser.parse_args({ argv.data(), argv.data() + argv.size() }); | ||
| } | ||
| catch (const std::exception& e) | ||
| { | ||
| m_logger->log("Failed to parse arguments: %s", ILogger::ELL_ERROR, e.what()); | ||
| return false; | ||
| } | ||
|
|
||
| m_commandLine.pipelineCacheDirOverride.reset(); | ||
| if (parser.present("--pipeline-cache-dir")) | ||
| m_commandLine.pipelineCacheDirOverride = path(parser.get<std::string>("--pipeline-cache-dir")); | ||
| m_commandLine.clearPipelineCache = parser.get<bool>("--clear-pipeline-cache"); | ||
| return true; | ||
| } | ||
|
|
||
| static std::string hashToHex(const core::blake3_hash_t& hash) | ||
| { | ||
| static constexpr char digits[] = "0123456789abcdef"; | ||
| static constexpr size_t HexCharsPerByte = 2ull; | ||
| static constexpr uint32_t HighNibbleBitOffset = 4u; | ||
| static constexpr uint8_t NibbleMask = 0xfu; | ||
| const auto hashByteCount = sizeof(hash.data); | ||
| std::string retval; | ||
| retval.resize(hashByteCount * HexCharsPerByte); | ||
| for (size_t i = 0ull; i < hashByteCount; ++i) | ||
| { | ||
| const auto hexOffset = i * HexCharsPerByte; | ||
| retval[hexOffset] = digits[(hash.data[i] >> HighNibbleBitOffset) & NibbleMask]; | ||
| retval[hexOffset + 1ull] = digits[hash.data[i] & NibbleMask]; | ||
| } | ||
| return retval; | ||
| } | ||
|
|
||
| path getDefaultPipelineCacheDir() const | ||
| { | ||
| if (const auto* localAppData = std::getenv("LOCALAPPDATA"); localAppData && localAppData[0] != '\0') | ||
| return path(localAppData) / "nabla/examples/31_HLSLPathTracer/pipeline/cache"; | ||
| return localOutputCWD / "pipeline/cache"; | ||
| } | ||
|
|
||
| path getRuntimeConfigPath() const | ||
| { | ||
| return system::executableDirectory() / RuntimeConfigFilename; | ||
| } | ||
|
|
||
| std::optional<path> tryGetPipelineCacheDirFromRuntimeConfig() const | ||
| { | ||
| const auto configPath = getRuntimeConfigPath(); | ||
| if (!m_system->exists(configPath, IFile::ECF_READ)) | ||
| return std::nullopt; | ||
|
|
||
| std::ifstream input(configPath); | ||
| if (!input.is_open()) | ||
| return std::nullopt; | ||
|
|
||
| nlohmann::json json; | ||
| try | ||
| { | ||
| input >> json; | ||
| } | ||
| catch (const std::exception& e) | ||
| { | ||
| m_logger->log("Failed to parse PATH_TRACER runtime config %s: %s", ILogger::ELL_WARNING, configPath.string().c_str(), e.what()); | ||
| return std::nullopt; | ||
| } | ||
|
|
||
| const auto cacheRootIt = json.find("cache_root"); | ||
| if (cacheRootIt == json.end() || !cacheRootIt->is_string()) | ||
| return std::nullopt; | ||
|
|
||
| const auto cacheRoot = cacheRootIt->get<std::string>(); | ||
| if (cacheRoot.empty()) | ||
| return std::nullopt; | ||
|
|
||
| const path relativeRoot(cacheRoot); | ||
| if (relativeRoot.is_absolute()) | ||
| { | ||
| m_logger->log("Ignoring absolute cache_root in %s", ILogger::ELL_WARNING, configPath.string().c_str()); | ||
| return std::nullopt; | ||
| } | ||
|
|
||
| return (configPath.parent_path() / relativeRoot).lexically_normal(); | ||
| } | ||
|
|
||
| path getPipelineCacheRootDir() const | ||
| { | ||
| if (m_commandLine.pipelineCacheDirOverride.has_value()) | ||
| return m_commandLine.pipelineCacheDirOverride.value(); | ||
| if (const auto runtimeConfigDir = tryGetPipelineCacheDirFromRuntimeConfig(); runtimeConfigDir.has_value()) | ||
| return runtimeConfigDir.value(); | ||
| return getDefaultPipelineCacheDir(); | ||
| } | ||
|
|
||
| path getPipelineCacheBlobPath() const | ||
| { | ||
| const auto key = m_device->getPipelineCacheKey(); | ||
| return getPipelineCacheRootDir() / "blob" / BuildConfigName / (std::string(key.deviceAndDriverUUID) + ".bin"); | ||
| } | ||
|
|
||
| path getSpirvCacheDir() const | ||
| { | ||
| return getPipelineCacheRootDir() / "spirv" / BuildConfigName; | ||
| } | ||
|
|
||
| path getTrimmedShaderCachePath(const IShader* shader, const char* const entryPoint) const | ||
| { | ||
| core::blake3_hasher hasher; | ||
| hasher << std::string_view(shader ? shader->getFilepathHint() : std::string_view{}); | ||
| hasher << std::string_view(entryPoint); | ||
| return getSpirvCacheDir() / (hashToHex(static_cast<core::blake3_hash_t>(hasher)) + ".spv"); | ||
| } | ||
|
|
||
| path getValidatedSpirvMarkerPath(const ICPUBuffer* spirvBuffer) const | ||
| { | ||
| IGPUComputePipeline* pipeline; | ||
| if (guiControlled.useRWMC) | ||
| pipeline = guiControlled.usePersistentWorkGroups ? m_PTHLSLPersistentWGPipelinesRWMC[guiControlled.PTPipeline].get() : m_PTHLSLPipelinesRWMC[guiControlled.PTPipeline].get(); | ||
| else | ||
| pipeline = guiControlled.usePersistentWorkGroups ? m_PTHLSLPersistentWGPipelines[guiControlled.PTPipeline].get() : m_PTHLSLPipelines[guiControlled.PTPipeline].get(); | ||
| auto contentHash = spirvBuffer->getContentHash(); | ||
| if (contentHash == ICPUBuffer::INVALID_HASH) | ||
| contentHash = spirvBuffer->computeContentHash(); | ||
| return getSpirvCacheDir() / (hashToHex(contentHash) + ".hash"); | ||
| } | ||
|
|
||
| size_t getBackgroundPipelineBuildBudget() const | ||
| { | ||
| static constexpr uint32_t ReservedForegroundThreadCount = 1u; | ||
| const auto concurrency = std::thread::hardware_concurrency(); | ||
| if (concurrency > ReservedForegroundThreadCount) | ||
| return static_cast<size_t>(concurrency - ReservedForegroundThreadCount); | ||
| return ReservedForegroundThreadCount; | ||
| } | ||
|
|
||
| return pipeline; | ||
| bool ensureCacheDirectoryExists(const path& dir, const char* const description) | ||
| { | ||
| if (dir.empty() || m_system->isDirectory(dir)) | ||
| return true; | ||
|
|
||
| if (m_system->createDirectory(dir) || m_system->isDirectory(dir)) | ||
| return true; | ||
|
|
||
| m_logger->log("Failed to create %s %s", ILogger::ELL_WARNING, description, dir.string().c_str()); | ||
| return false; | ||
| } | ||
|
|
||
| bool finalizeCacheFile(const path& tempPath, const path& finalPath, const char* const description) | ||
| { | ||
| m_system->deleteFile(finalPath); | ||
| const auto ec = m_system->moveFileOrDirectory(tempPath, finalPath); | ||
| if (!ec) | ||
| return true; | ||
|
|
||
| m_system->deleteFile(tempPath); | ||
| m_logger->log("Failed to finalize %s %s", ILogger::ELL_WARNING, description, finalPath.string().c_str()); | ||
| return false; | ||
| } | ||
|
|
||
| void initializePipelineCache() | ||
| { | ||
| m_pipelineCache.blobPath = getPipelineCacheBlobPath(); | ||
| m_pipelineCache.trimmedShaders.rootDir = getSpirvCacheDir(); | ||
| m_pipelineCache.trimmedShaders.validationDir = getSpirvCacheDir(); | ||
| if (!m_pipelineCache.trimmedShaders.trimmer) | ||
| m_pipelineCache.trimmedShaders.trimmer = core::make_smart_refctd_ptr<asset::ISPIRVEntryPointTrimmer>(); | ||
| const auto pipelineCacheRootDir = getPipelineCacheRootDir(); | ||
| std::error_code ec; | ||
| m_pipelineCache.loadedBytes = 0ull; | ||
| m_pipelineCache.loadedFromDisk = false; | ||
| m_pipelineCache.clearedOnStartup = m_commandLine.clearPipelineCache; | ||
| m_pipelineCache.newlyReadyPipelinesSinceLastSave = 0ull; | ||
| m_pipelineCache.checkpointedAfterFirstSubmit = false; | ||
| m_pipelineCache.lastSaveAt = clock_t::now(); | ||
| if (m_commandLine.clearPipelineCache) | ||
| { | ||
| if (m_system->isDirectory(pipelineCacheRootDir) && !m_system->deleteDirectory(pipelineCacheRootDir)) | ||
| m_logger->log("Failed to clear pipeline cache directory %s", ILogger::ELL_WARNING, pipelineCacheRootDir.string().c_str()); | ||
| else | ||
| m_logger->log("PATH_TRACER_PIPELINE_CACHE clear root=%s", ILogger::ELL_INFO, pipelineCacheRootDir.string().c_str()); | ||
| } | ||
| ensureCacheDirectoryExists(m_pipelineCache.blobPath.parent_path(), "pipeline cache directory"); | ||
| ensureCacheDirectoryExists(m_pipelineCache.trimmedShaders.rootDir, "trimmed shader cache directory"); | ||
| ensureCacheDirectoryExists(m_pipelineCache.trimmedShaders.validationDir, "validated shader cache directory"); | ||
|
|
||
| std::vector<uint8_t> initialData; | ||
| { | ||
| std::ifstream input(m_pipelineCache.blobPath, std::ios::binary | std::ios::ate); | ||
| if (input.is_open()) | ||
| { | ||
| const auto size = input.tellg(); | ||
| if (size > 0) | ||
| { | ||
| initialData.resize(static_cast<size_t>(size)); | ||
| input.seekg(0, std::ios::beg); | ||
| input.read(reinterpret_cast<char*>(initialData.data()), static_cast<std::streamsize>(initialData.size())); | ||
| if (!input) | ||
| initialData.clear(); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| std::span<const uint8_t> initialDataSpan = {}; | ||
| if (!initialData.empty()) | ||
| { | ||
| initialDataSpan = { initialData.data(), initialData.size() }; | ||
| m_pipelineCache.loadedBytes = initialData.size(); | ||
| m_pipelineCache.loadedFromDisk = true; | ||
| } | ||
|
|
||
| m_pipelineCache.object = m_device->createPipelineCache(initialDataSpan); | ||
| if (!m_pipelineCache.object && !initialData.empty()) | ||
| { | ||
| m_logger->log("Pipeline cache blob at %s was rejected. Falling back to empty cache.", ILogger::ELL_WARNING, m_pipelineCache.blobPath.string().c_str()); | ||
| m_pipelineCache.object = m_device->createPipelineCache(std::span<const uint8_t>{}); | ||
| } | ||
| if (!m_pipelineCache.object) | ||
| { | ||
| m_logger->log("Failed to create PATH_TRACER pipeline cache.", ILogger::ELL_WARNING); | ||
| return; | ||
| } | ||
|
|
||
| m_pipelineCache.object->setObjectDebugName("PATH_TRACER Pipeline Cache"); | ||
| m_logger->log("PATH_TRACER pipeline cache path: %s", ILogger::ELL_INFO, m_pipelineCache.blobPath.string().c_str()); | ||
| m_logger->log("PATH_TRACER trimmed shader cache path: %s", ILogger::ELL_INFO, m_pipelineCache.trimmedShaders.rootDir.string().c_str()); | ||
| m_logger->log("PATH_TRACER validated shader cache path: %s", ILogger::ELL_INFO, m_pipelineCache.trimmedShaders.validationDir.string().c_str()); | ||
| m_logger->log( | ||
| "PATH_TRACER_PIPELINE_CACHE init clear=%u loaded_from_disk=%u loaded_bytes=%zu path=%s", | ||
| ILogger::ELL_INFO, | ||
| m_pipelineCache.clearedOnStartup ? 1u : 0u, | ||
| m_pipelineCache.loadedFromDisk ? 1u : 0u, | ||
| m_pipelineCache.loadedBytes, | ||
| m_pipelineCache.blobPath.string().c_str() | ||
| ); | ||
| if (!initialData.empty()) | ||
| m_logger->log("Loaded PATH_TRACER pipeline cache blob: %s", ILogger::ELL_INFO, m_pipelineCache.blobPath.string().c_str()); | ||
| } | ||
|
|
||
| smart_refctd_ptr<IShader> tryLoadTrimmedShaderFromDisk(const IShader* sourceShader, const char* const entryPoint) | ||
| { | ||
| const auto cachePath = getTrimmedShaderCachePath(sourceShader, entryPoint); | ||
| std::ifstream input(cachePath, std::ios::binary | std::ios::ate); | ||
| if (!input.is_open()) | ||
| return nullptr; | ||
|
|
||
| const auto size = input.tellg(); | ||
| if (size <= 0) | ||
| return nullptr; | ||
|
|
||
| std::vector<uint8_t> bytes(static_cast<size_t>(size)); | ||
| input.seekg(0, std::ios::beg); | ||
| input.read(reinterpret_cast<char*>(bytes.data()), static_cast<std::streamsize>(bytes.size())); | ||
| if (!input) | ||
| return nullptr; | ||
|
|
||
| auto buffer = ICPUBuffer::create({ { bytes.size() }, bytes.data() }); | ||
| if (!buffer) | ||
| return nullptr; | ||
| buffer->setContentHash(buffer->computeContentHash()); | ||
| { | ||
| std::lock_guard lock(m_pipelineCache.trimmedShaders.mutex); | ||
| m_pipelineCache.trimmedShaders.loadedBytes += bytes.size(); | ||
| ++m_pipelineCache.trimmedShaders.loadedFromDiskCount; | ||
| } | ||
| m_logger->log( | ||
| "PATH_TRACER_SHADER_CACHE load entrypoint=%s bytes=%zu path=%s", | ||
| ILogger::ELL_INFO, | ||
| entryPoint, | ||
| bytes.size(), | ||
| cachePath.string().c_str() | ||
| ); | ||
| return core::make_smart_refctd_ptr<IShader>(std::move(buffer), IShader::E_CONTENT_TYPE::ECT_SPIRV, std::string(sourceShader->getFilepathHint())); | ||
| } | ||
|
|
||
| bool hasValidatedSpirvMarker(const ICPUBuffer* spirvBuffer) const | ||
| { | ||
| return m_system->exists(getValidatedSpirvMarkerPath(spirvBuffer), IFile::ECF_READ); | ||
| } | ||
|
|
||
| void saveValidatedSpirvMarker(const ICPUBuffer* spirvBuffer) | ||
| { | ||
| const auto markerPath = getValidatedSpirvMarkerPath(spirvBuffer); | ||
| if (!ensureCacheDirectoryExists(markerPath.parent_path(), "validated shader cache directory")) | ||
| return; | ||
|
|
||
| auto tempPath = markerPath; | ||
| tempPath += ".tmp"; | ||
| { | ||
| std::ofstream output(tempPath, std::ios::binary | std::ios::trunc); | ||
| if (!output.is_open()) | ||
| { | ||
| m_logger->log("Failed to open validated shader marker temp file %s", ILogger::ELL_WARNING, tempPath.string().c_str()); | ||
| return; | ||
| } | ||
| output << "ok\n"; | ||
| output.flush(); | ||
| if (!output) | ||
| { | ||
| output.close(); | ||
| m_system->deleteFile(tempPath); | ||
| m_logger->log("Failed to write validated shader marker %s", ILogger::ELL_WARNING, tempPath.string().c_str()); | ||
| return; | ||
| } | ||
| } | ||
|
|
||
| finalizeCacheFile(tempPath, markerPath, "validated shader marker"); | ||
| } | ||
|
|
||
| bool ensurePreparedShaderValidated(const smart_refctd_ptr<IShader>& preparedShader) | ||
| { | ||
| if (!preparedShader) | ||
| return false; | ||
|
|
||
| auto* const content = preparedShader->getContent(); | ||
| if (!content) | ||
| return false; | ||
|
|
||
| if (hasValidatedSpirvMarker(content)) | ||
| { | ||
| m_pipelineCache.trimmedShaders.trimmer->markValidated(content); | ||
| return true; | ||
| } | ||
|
|
||
| if (!m_pipelineCache.trimmedShaders.trimmer->ensureValidated(content, m_logger.get())) | ||
| return false; | ||
|
|
||
| saveValidatedSpirvMarker(content); | ||
| return true; | ||
| } | ||
|
|
||
| void saveTrimmedShaderToDisk(const IShader* shader, const char* const entryPoint, const path& cachePath) | ||
| { | ||
| const auto* content = shader->getContent(); | ||
| if (!content || !content->getPointer() || cachePath.empty()) | ||
| return; | ||
|
|
||
| if (!ensureCacheDirectoryExists(cachePath.parent_path(), "trimmed shader cache directory")) | ||
| return; | ||
|
|
||
| auto tempPath = cachePath; | ||
| tempPath += ".tmp"; | ||
| { | ||
| std::ofstream output(tempPath, std::ios::binary | std::ios::trunc); | ||
| if (!output.is_open()) | ||
| { | ||
| m_logger->log("Failed to open trimmed shader cache temp file %s", ILogger::ELL_WARNING, tempPath.string().c_str()); | ||
| return; | ||
| } | ||
| output.write(reinterpret_cast<const char*>(content->getPointer()), static_cast<std::streamsize>(content->getSize())); | ||
| output.flush(); | ||
| if (!output) | ||
| { | ||
| output.close(); | ||
| m_system->deleteFile(tempPath); | ||
| m_logger->log("Failed to write trimmed shader cache blob to %s", ILogger::ELL_WARNING, tempPath.string().c_str()); | ||
| return; | ||
| } | ||
| } | ||
|
|
||
| if (!finalizeCacheFile(tempPath, cachePath, "trimmed shader cache blob")) | ||
| return; | ||
|
|
||
| { | ||
| std::lock_guard lock(m_pipelineCache.trimmedShaders.mutex); | ||
| m_pipelineCache.trimmedShaders.savedBytes += content->getSize(); | ||
| ++m_pipelineCache.trimmedShaders.savedToDiskCount; | ||
| } | ||
| m_logger->log( | ||
| "PATH_TRACER_SHADER_CACHE save entrypoint=%s bytes=%zu path=%s", | ||
| ILogger::ELL_INFO, | ||
| entryPoint, | ||
| content->getSize(), | ||
| cachePath.string().c_str() | ||
| ); | ||
| } | ||
|
|
||
| smart_refctd_ptr<IShader> getPreparedShaderForEntryPoint(const smart_refctd_ptr<IShader>& shaderModule, const char* const entryPoint) | ||
| { | ||
| if (!shaderModule || shaderModule->getContentType() != IShader::E_CONTENT_TYPE::ECT_SPIRV) | ||
| return shaderModule; | ||
|
|
||
| const auto cachePath = getTrimmedShaderCachePath(shaderModule.get(), entryPoint); | ||
| const auto cacheKey = cachePath.string(); | ||
| { | ||
| std::lock_guard lock(m_pipelineCache.trimmedShaders.mutex); | ||
| const auto found = m_pipelineCache.trimmedShaders.runtimeShaders.find(cacheKey); | ||
| if (found != m_pipelineCache.trimmedShaders.runtimeShaders.end()) | ||
| return found->second; | ||
| } | ||
|
|
||
| const auto startedAt = clock_t::now(); | ||
| auto preparedShader = tryLoadTrimmedShaderFromDisk(shaderModule.get(), entryPoint); | ||
| bool cameFromDisk = static_cast<bool>(preparedShader); | ||
| bool wasTrimmed = false; | ||
| if (!preparedShader) | ||
| { | ||
| const core::set entryPoints = { asset::ISPIRVEntryPointTrimmer::EntryPoint{ .name = entryPoint, .stage = hlsl::ShaderStage::ESS_COMPUTE } }; | ||
| const auto result = m_pipelineCache.trimmedShaders.trimmer->trim(shaderModule->getContent(), entryPoints, nullptr); | ||
| if (!result) | ||
| { | ||
| m_logger->log("Failed to prepare trimmed PATH_TRACER shader for %s. Falling back to the original module.", ILogger::ELL_WARNING, entryPoint); | ||
| return shaderModule; | ||
| } | ||
| if (result.spirv) | ||
| { | ||
| result.spirv->setContentHash(result.spirv->computeContentHash()); | ||
| preparedShader = core::make_smart_refctd_ptr<IShader>(core::smart_refctd_ptr(result.spirv), IShader::E_CONTENT_TYPE::ECT_SPIRV, std::string(shaderModule->getFilepathHint())); | ||
| } | ||
| else | ||
| preparedShader = shaderModule; | ||
|
|
||
| saveTrimmedShaderToDisk(preparedShader.get(), entryPoint, cachePath); | ||
| { | ||
| std::lock_guard lock(m_pipelineCache.trimmedShaders.mutex); | ||
| ++m_pipelineCache.trimmedShaders.generatedCount; | ||
| } | ||
| wasTrimmed = (preparedShader != shaderModule); | ||
| } | ||
|
|
||
| if (!ensurePreparedShaderValidated(preparedShader)) | ||
| { | ||
| m_logger->log("Prepared PATH_TRACER shader for %s is not valid SPIR-V", ILogger::ELL_ERROR, entryPoint); | ||
| return nullptr; | ||
| } | ||
|
|
||
| { | ||
| std::lock_guard lock(m_pipelineCache.trimmedShaders.mutex); | ||
| const auto [it, inserted] = m_pipelineCache.trimmedShaders.runtimeShaders.emplace(cacheKey, preparedShader); | ||
| if (!inserted) | ||
| preparedShader = it->second; | ||
| } | ||
|
|
||
| const auto wallMs = std::chrono::duration_cast<std::chrono::milliseconds>(clock_t::now() - startedAt).count(); | ||
| m_logger->log( | ||
| "PATH_TRACER_SHADER_CACHE ready entrypoint=%s wall_ms=%lld from_disk=%u trimmed=%u", | ||
| ILogger::ELL_INFO, | ||
| entryPoint, | ||
| static_cast<long long>(wallMs), | ||
| cameFromDisk ? 1u : 0u, | ||
| wasTrimmed ? 1u : 0u | ||
| ); | ||
| return preparedShader; | ||
| } | ||
|
|
||
| void savePipelineCache() | ||
| { | ||
| if (!m_pipelineCache.object || !m_pipelineCache.dirty || m_pipelineCache.blobPath.empty()) | ||
| return; | ||
|
|
||
| const auto saveStartedAt = clock_t::now(); | ||
| auto cpuCache = m_pipelineCache.object->convertToCPUCache(); | ||
| if (!cpuCache) | ||
| return; | ||
|
|
||
| const auto& entries = cpuCache->getEntries(); | ||
| const auto found = entries.find(m_device->getPipelineCacheKey()); | ||
| if (found == entries.end() || !found->second.bin || found->second.bin->empty()) | ||
| return; | ||
|
|
||
| if (!ensureCacheDirectoryExists(m_pipelineCache.blobPath.parent_path(), "pipeline cache directory")) | ||
| return; | ||
|
|
||
| auto tempPath = m_pipelineCache.blobPath; | ||
| tempPath += ".tmp"; | ||
| { | ||
| std::ofstream output(tempPath, std::ios::binary | std::ios::trunc); | ||
| if (!output.is_open()) | ||
| { | ||
| m_logger->log("Failed to open pipeline cache temp file %s", ILogger::ELL_WARNING, tempPath.string().c_str()); | ||
| return; | ||
| } | ||
| output.write(reinterpret_cast<const char*>(found->second.bin->data()), static_cast<std::streamsize>(found->second.bin->size())); | ||
| output.flush(); | ||
| if (!output) | ||
| { | ||
| output.close(); | ||
| m_system->deleteFile(tempPath); | ||
| m_logger->log("Failed to write pipeline cache blob to %s", ILogger::ELL_WARNING, tempPath.string().c_str()); | ||
| return; | ||
| } | ||
| } | ||
|
|
||
| if (!finalizeCacheFile(tempPath, m_pipelineCache.blobPath, "pipeline cache blob")) | ||
| return; | ||
|
|
||
| m_pipelineCache.dirty = false; | ||
| m_pipelineCache.savedBytes = found->second.bin->size(); | ||
| m_pipelineCache.newlyReadyPipelinesSinceLastSave = 0ull; | ||
| m_pipelineCache.lastSaveAt = clock_t::now(); | ||
| const auto saveElapsedMs = std::chrono::duration_cast<std::chrono::milliseconds>(clock_t::now() - saveStartedAt).count(); | ||
| m_logger->log( | ||
| "PATH_TRACER_PIPELINE_CACHE save bytes=%zu wall_ms=%lld path=%s", | ||
| ILogger::ELL_INFO, | ||
| m_pipelineCache.savedBytes, | ||
| static_cast<long long>(saveElapsedMs), | ||
| m_pipelineCache.blobPath.string().c_str() | ||
| ); | ||
| m_logger->log("Saved PATH_TRACER pipeline cache blob: %s", ILogger::ELL_INFO, m_pipelineCache.blobPath.string().c_str()); | ||
| } | ||
|
|
||
| void maybeCheckpointPipelineCache() | ||
| { | ||
| if (!m_pipelineCache.object || !m_pipelineCache.dirty) | ||
| return; | ||
|
|
||
| if (m_startupLog.loggedFirstRenderSubmit && !m_pipelineCache.checkpointedAfterFirstSubmit) | ||
| { | ||
| savePipelineCache(); | ||
| m_pipelineCache.checkpointedAfterFirstSubmit = true; | ||
| return; | ||
| } | ||
|
|
||
| if (!m_pipelineCache.warmup.started || m_pipelineCache.warmup.loggedComplete) | ||
| return; | ||
|
|
||
| static constexpr size_t WarmupCheckpointThreshold = 4ull; | ||
| if (m_pipelineCache.newlyReadyPipelinesSinceLastSave < WarmupCheckpointThreshold) | ||
| return; | ||
|
|
||
| const auto elapsedSinceLastSave = std::chrono::duration_cast<std::chrono::milliseconds>(clock_t::now() - m_pipelineCache.lastSaveAt).count(); | ||
| if (elapsedSinceLastSave < 1000ll) | ||
| return; | ||
|
|
||
| savePipelineCache(); | ||
| } | ||
|
|
||
| smart_refctd_ptr<IShader> loadRenderShader(const E_LIGHT_GEOMETRY geometry, const bool persistentWorkGroups, const bool rwmc) | ||
| { | ||
| switch (geometry) | ||
| { | ||
| case ELG_SPHERE: | ||
| if (rwmc) | ||
| return loadPrecompiledShader<NBL_CORE_UNIQUE_STRING_LITERAL_TYPE("pt.compute.sphere.rwmc")>(); | ||
| return loadPrecompiledShader<NBL_CORE_UNIQUE_STRING_LITERAL_TYPE("pt.compute.sphere")>(); | ||
| case ELG_TRIANGLE: | ||
| if (rwmc) | ||
| return persistentWorkGroups ? | ||
| loadPrecompiledShader<NBL_CORE_UNIQUE_STRING_LITERAL_TYPE("pt.compute.triangle.rwmc.persistent")>() : | ||
| loadPrecompiledShader<NBL_CORE_UNIQUE_STRING_LITERAL_TYPE("pt.compute.triangle.rwmc.linear")>(); | ||
| return persistentWorkGroups ? | ||
| loadPrecompiledShader<NBL_CORE_UNIQUE_STRING_LITERAL_TYPE("pt.compute.triangle.persistent")>() : | ||
| loadPrecompiledShader<NBL_CORE_UNIQUE_STRING_LITERAL_TYPE("pt.compute.triangle.linear")>(); | ||
| case ELG_RECTANGLE: | ||
| if (rwmc) | ||
| return persistentWorkGroups ? | ||
| loadPrecompiledShader<NBL_CORE_UNIQUE_STRING_LITERAL_TYPE("pt.compute.rectangle.rwmc.persistent")>() : | ||
| loadPrecompiledShader<NBL_CORE_UNIQUE_STRING_LITERAL_TYPE("pt.compute.rectangle.rwmc.linear")>(); | ||
| return loadPrecompiledShader<NBL_CORE_UNIQUE_STRING_LITERAL_TYPE("pt.compute.rectangle")>(); | ||
| default: | ||
| return nullptr; | ||
| } | ||
| } |
There was a problem hiding this comment.
can you:
- more this 1500 LoC addition to its own file
- make as much of this reusable between the examples ?
| pipeline_future_t requestComputePipelineBuild(smart_refctd_ptr<IShader> shaderModule, IGPUPipelineLayout* const pipelineLayout, const char* const entryPoint) | ||
| { | ||
| if (!shaderModule) | ||
| return {}; | ||
|
|
||
| return std::async( | ||
| std::launch::async, | ||
| [ | ||
| this, | ||
| device = m_device, | ||
| pipelineCache = m_pipelineCache.object, | ||
| shader = std::move(shaderModule), | ||
| layout = smart_refctd_ptr<IGPUPipelineLayout>(pipelineLayout), | ||
| requiredSubgroupSize = m_requiredSubgroupSize, | ||
| logger = m_logger.get(), | ||
| entryPointName = std::string(entryPoint), | ||
| cacheLoadedFromDisk = m_pipelineCache.loadedFromDisk | ||
| ]() -> smart_refctd_ptr<IGPUComputePipeline> | ||
| { | ||
| const auto startedAt = clock_t::now(); | ||
| auto preparedShader = getPreparedShaderForEntryPoint(shader, entryPointName.c_str()); | ||
| if (!preparedShader) | ||
| return nullptr; | ||
| smart_refctd_ptr<IGPUComputePipeline> pipeline; | ||
| IGPUComputePipeline::SCreationParams params = {}; | ||
| params.layout = layout.get(); | ||
| params.shader.shader = preparedShader.get(); | ||
| params.shader.entryPoint = entryPointName.c_str(); | ||
| params.shader.entries = nullptr; | ||
| params.cached.requireFullSubgroups = true; | ||
| params.shader.requiredSubgroupSize = requiredSubgroupSize; | ||
| if (!device->createComputePipelines(pipelineCache.get(), { ¶ms, 1 }, &pipeline)) | ||
| { | ||
| if (logger) | ||
| logger->log("Failed to create precompiled path tracing pipeline for %s", ILogger::ELL_ERROR, entryPointName.c_str()); | ||
| return nullptr; | ||
| } | ||
| if (logger) | ||
| { | ||
| const auto wallMs = std::chrono::duration_cast<std::chrono::milliseconds>(clock_t::now() - startedAt).count(); | ||
| logger->log( | ||
| "PATH_TRACER_PIPELINE_BUILD entrypoint=%s wall_ms=%lld cache_loaded_from_disk=%u", | ||
| ILogger::ELL_INFO, | ||
| entryPointName.c_str(), | ||
| static_cast<long long>(wallMs), | ||
| cacheLoadedFromDisk ? 1u : 0u | ||
| ); | ||
| } | ||
| return pipeline; | ||
| } | ||
| ); | ||
| } | ||
|
|
||
| void pollPendingPipeline(pipeline_future_t& future, smart_refctd_ptr<IGPUComputePipeline>& pipeline) | ||
| { | ||
| if (!future.valid() || pipeline) | ||
| return; | ||
| if (future.wait_for(std::chrono::milliseconds(0)) != std::future_status::ready) | ||
| return; | ||
| pipeline = future.get(); | ||
| if (pipeline) | ||
| { | ||
| m_pipelineCache.dirty = true; | ||
| ++m_pipelineCache.newlyReadyPipelinesSinceLastSave; | ||
| } | ||
| } |
There was a problem hiding this comment.
make something like a CachedPipelinesManager in the examples_common
Summary
PATH_TRACER_BUILD_MODEwithWALLTIME_OPTIMIZEDas the default andSPECIALIZEDas the alternate triangle-method layout--pipeline-cache-dir,--clear-pipeline-cache, and a generatedpath_tracer.runtime.jsonthat resolves a relativepipeline/cacheroot from the common bin directory and falls back toLocalAppDataoutside the CMake flowNote on shape
A noticeable part of the current example-side proxy and permutation scaffolding exists because this branch cannot assume Devsh-Graphics-Programming/Nabla#988 is merged. If that PR lands, a large part of this glue can move out of the example and the packaged SPIR-V setup can be reduced materially.
Root cause
The base EX31 path had two separate problems.
First, EX31 started as a runtime-oriented example in
eab0f70cand2f77555ce. Shader selection and compute pipeline creation lived in runtime from the start. That runtime matrix then expanded with persistent workgroups in153556152and with RWMC in3d206fd4. The current line locations inmain.cppcome from later refactors, but the semantic shape predates them.Second, once EX31 is moved to packaged SPIR-V, startup repays pipeline creation unless those packaged variants share a real pipeline cache and the prepared SPIR-V path avoids revalidating the same blob every run. The base render and resolve compute pipeline creation sites pass
nullptrcache inmain.cpp#L404-L478. That runtime creation model originates in2f77555ceand was widened by153556152and3d206fd4.Only triangle has three distinct polygon-method implementations. Specializing those methods into separate precompiled entrypoints does not add only thin wrappers. It multiplies heavy triangle-side path tracing instantiations and pushes much more work into the DXC/SPIR-V backend.
Validation
Validation was run on AMD Ryzen 5 5600G with Radeon Graphics (6C/12T).
A Visual Studio
Debug x64full rebuild of the SPIR-V project completed in:WALLTIME_OPTIMIZED = 12.785 sSPECIALIZED = 18.314 sSPECIALIZEDregression:+5.529 swhich is+43.25%WALLTIME_OPTIMIZEDimprovement overSPECIALIZED:30.19%SPECIALIZEDis materially slower because it multiplies the heavy triangle-side path tracing instantiations and pushes more work into the DXC/SPIR-V backend, so the default isWALLTIME_OPTIMIZED.Runtime validation on the final state:
Releasecold clear:first_render_submit_ms=2383Releasewarm cache hit:loaded_from_disk=1,first_render_submit_ms=1793RelWithDebInfocold clear:first_render_submit_ms=2245RelWithDebInfowarm cache hit:loaded_from_disk=1,first_render_submit_ms=1598Debugcold clear:first_render_submit_ms=11781Debugwarm cache hit:loaded_from_disk=1,first_render_submit_ms=2698queued_jobs=21andmax_parallel=11on this 6C/12T CPU--pipeline-cache-dir <path>and--clear-pipeline-cacheRelease,RelWithDebInfo, andDebug; the generatedpath_tracer.runtime.jsonresolvespipeline/cacherelative to the common bin directory