Conversation
…rge to use compare with 0
40_PathTracer/include/renderer/shaders/pathtrace/push_constants.hlsl
Outdated
Show resolved
Hide resolved
| [[vk::binding(SensorDSBindings::UBO,SessionDSIndex)]] ConstantBuffer<SSensorUniforms> gSensor; | ||
| // could be uint32_t2 | ||
| [[vk::binding(SensorDSBindings::ScrambleKey,SessionDSIndex)]] RWTexture2DArray<uint32_t4> gScrambleKey; | ||
| [[vk::binding(SensorDSBindings::ScrambleKey,SessionDSIndex)]] RWTexture2D<uint32_t2> gScrambleKey; |
There was a problem hiding this comment.
keep it an array texture
| // storage buffer with sobol sequence | ||
| OwenSampler sampler(SSensorUniforms::MaxBufferDimensions, 0xdeadbeefu); | ||
|
|
||
| constexpr uint32_t quantizedDimensions = SSensorUniforms::MaxBufferDimensions / 3u; | ||
| constexpr size_t bufferSize = quantizedDimensions * SSensorUniforms::MaxSamplesBuffer; | ||
| using sequence_type = hlsl::sampling::QuantizedSequence<hlsl::uint32_t2, 3>; | ||
| std::vector<sequence_type> data(bufferSize); | ||
|
|
||
| for (auto dim = 0u; dim < SSensorUniforms::MaxBufferDimensions; dim++) | ||
| for (uint32_t i = 0; i < SSensorUniforms::MaxSamplesBuffer; i++) | ||
| { | ||
| const uint32_t quant_dim = dim / 3u; | ||
| const uint32_t offset = dim % 3u; | ||
| auto& seq = data[i * quantizedDimensions + quant_dim]; | ||
| const uint32_t sample = sampler.sample(dim, i); | ||
| seq.set(offset, sample); | ||
| } | ||
|
|
There was a problem hiding this comment.
where's the caching? you're making the example start horribly slow
| // TODO: reset m_framesDispatched to 0 every time camera moves considerable amount | ||
| m_framesDispatched++; |
There was a problem hiding this comment.
this m_framesDispatched should live in the session!
| auto mreqs = memBacked->getMemoryReqs(); | ||
| mreqs.memoryTypeBits &= device->getPhysicalDevice()->getDeviceLocalMemoryTypeBits(); | ||
| if (!device->allocate(mreqs,memBacked,IDeviceMemoryAllocation::E_MEMORY_ALLOCATE_FLAGS::EMAF_NONE).isValid()) | ||
| if (!device->allocate(mreqs,memBacked,deviceAddress ? IDeviceMemoryAllocation::E_MEMORY_ALLOCATE_FLAGS::EMAF_DEVICE_ADDRESS_BIT : IDeviceMemoryAllocation::E_MEMORY_ALLOCATE_FLAGS::EMAF_NONE).isValid()) |
There was a problem hiding this comment.
you can deduce it from whether the memBacked is a GPUBuffer and if it has the shader address bit usage
| immutables.scrambleKey.image = scrambleKey; | ||
|
|
||
| const auto& params = immutables.scrambleKey.image->getCreationParameters(); | ||
| const auto viewFormat = params.format; | ||
| const auto thisFormatUsages = static_cast<core::bitflag<IGPUImage::E_USAGE_FLAGS>>(allowedFormatUsages[viewFormat]); | ||
| auto view = device->createImageView({ | ||
| .subUsages = immutables.scrambleKey.image->getCreationParameters().usage & thisFormatUsages, | ||
| .image = immutables.scrambleKey.image, | ||
| .viewType = IGPUImageView::E_TYPE::ET_2D, | ||
| .format = viewFormat | ||
| }); | ||
| string viewDebugName = "Scramble Key " + to_string(viewFormat) + " View"; | ||
| if (!view) | ||
| { | ||
| logger.log("Failed to create Sensor \"%s\"'s \"%s\" in CSession::init()", ILogger::ELL_ERROR, m_params.name.c_str(), viewDebugName.c_str()); | ||
| return {}; | ||
| } | ||
| view->setObjectDebugName(viewDebugName.c_str()); | ||
| immutables.scrambleKey.views[viewFormat] = std::move(view); |
There was a problem hiding this comment.
each session should get its own Scramble Key image made when we initialize, and decouple from the renderer, because we may want to use Heitz' rank and key permutation techniques to produce blue noise
There was a problem hiding this comment.
or later experiment with having a scramble key image with as many layers as we have max pixel depth, so as notto store the xoroshiro64 state in the ray payload and read from an image instead
| template<typename RNG, uint16_t N> | ||
| struct RandomUniformND | ||
| { | ||
| using rng_type = RNG; | ||
| using return_type = vector<float32_t, N>; | ||
|
|
||
| static RandomUniformND<RNG,N> create(uint32_t2 seed, uint64_t pSampleSequence) | ||
| { | ||
| RandomUniformND<RNG,N> retval; | ||
| retval.rng = rng_type::construct(seed); | ||
| retval.pSampleBuffer = pSampleSequence; | ||
| return retval; | ||
| } | ||
|
|
||
| // baseDimension: offset index of the sequence | ||
| // sampleIndex: iteration number of current pixel (samples per pixel) | ||
| return_type operator()(uint32_t baseDimension, uint32_t sampleIndex) | ||
| { | ||
| using sequence_type = hlsl::sampling::QuantizedSequence<uint32_t2,3>; | ||
| uint32_t address = hlsl::glsl::bitfieldInsert<uint32_t>(baseDimension, sampleIndex, SSensorUniforms::MaxPathDepthLog2, SSensorUniforms::MaxSamplesLog2); | ||
| sequence_type tmpSeq = vk::RawBufferLoad<sequence_type>(pSampleBuffer + address * sizeof(sequence_type)); | ||
| return tmpSeq.template decode<float32_t>(hlsl::random::DimAdaptorRecursive<rng_type, N>::__call(rng)); | ||
| } | ||
|
|
||
| rng_type rng; | ||
| uint64_t pSampleBuffer; | ||
| }; |
There was a problem hiding this comment.
didn't we commonalize this between ex 31 and nabla master?
| uint32_t sampleCount = pc.sensorDynamics.maxSPP; | ||
| float rcpSampleCount = 1.0 / float(sampleCount); | ||
| for (uint32_t i = 0; i < sampleCount; i++) | ||
| { |
There was a problem hiding this comment.
maxSpp shouldn't be used, rather spp per frame
| const bool firstFrame = pc.sensorDynamics.rcpFramesDispatched == 1.0; | ||
| // clear accumulations totally if beginning a new frame | ||
| if (firstFrame) | ||
| { | ||
| gAlbedo[launchID] = float32_t4(acc_albedo * rcpSampleCount, 1.0); | ||
| gNormal[launchID] = float32_t4(acc_normal * rcpSampleCount, 1.0); | ||
| } | ||
| else | ||
| { | ||
| float32_t3 prev_albedo = gAlbedo[launchID]; | ||
| float32_t3 delta = (acc_albedo * rcpSampleCount - prev_albedo) * pc.sensorDynamics.rcpFramesDispatched; | ||
| if (hlsl::any(delta > hlsl::promote<float32_t3>(1.0/1024.0))) | ||
| gAlbedo[launchID] = float32_t4(prev_albedo + delta, 1.0); | ||
|
|
||
| float32_t3 prev_normal = gNormal[launchID]; | ||
| delta = (acc_normal * rcpSampleCount - prev_normal) * pc.sensorDynamics.rcpFramesDispatched; | ||
| if (hlsl::any(delta > hlsl::promote<float32_t3>(1.0/512.0))) | ||
| gNormal[launchID] = float32_t4(prev_normal + delta, 1.0); | ||
| } |
There was a problem hiding this comment.
this time I want the accumulation to be done variably per pixel which is why I added a pixel count texture
| hlsl::float32_t2x3 ndcToRay; | ||
| hlsl::float32_t nearClip; | ||
| hlsl::float32_t tMax; | ||
| hlsl::float32_t rcpFramesDispatched; |
There was a problem hiding this comment.
shouldn't exist
| hlsl::float32_t nearClip; | ||
| hlsl::float32_t tMax; | ||
| hlsl::float32_t rcpFramesDispatched; | ||
| uint64_t pSampleSequence; |
There was a problem hiding this comment.
this should be in the gSensor UBO, not the push constant
| uint32_t minSPP : MAX_SPP_LOG2; | ||
| uint32_t maxSPP : MAX_SPP_LOG2; |
There was a problem hiding this comment.
I've set aside this texture
To keep the count on the number of samples in a particular pixel, each frame we do 1spp (or 2 spp) but then the job is to arrive at maxSPP at each pixel (for now)
| { | ||
| return session->init(info.getCommandBufferForRecording()->cmdbuf); | ||
| const auto& params = m_renderer->getConstructionParams(); | ||
| return session->init(info.getCommandBufferForRecording()->cmdbuf, smart_refctd_ptr(params.sampleSequenceBuffer), smart_refctd_ptr(params.scrambleKey)); |
There was a problem hiding this comment.
sampleSquenceBuffer should be passed to session during session create, not init
Init is only for making resources that are tied to the session and VRAM heavy.
No description provided.