Merge branch 'master' into erfan_todos

devshgraphicsprogramming · web-flow · commit 2f75a8d36a8a · 2022-02-23T13:29:29.000+01:00
diff --git a/artifacts/CMakeLists.txt b/artifacts/CMakeLists.txt
@@ -36,5 +36,7 @@ add_custom_target(pack_artifact_ditt
 	COMMAND cmake -E copy ${EXAMPLES_TESTS_PATH}/media/kernels/physical_flare_256.exr ${CMAKE_CURRENT_BINARY_DIR}/Ditt/pack/media/kernels/physical_flare_256.exr
 	COMMAND cmake -E copy ${EXAMPLES_TESTS_PATH}/media/kernels/physical_flare_512.exr ${CMAKE_CURRENT_BINARY_DIR}/Ditt/pack/media/kernels/physical_flare_512.exr
 	
+	COMMAND cmake -E copy ${EXAMPLES_TESTS_PATH}/media/mitsuba/staircase2.zip ${CMAKE_CURRENT_BINARY_DIR}/Ditt/pack/media/mitsuba/staircase2.zip
+	
 	COMMAND cd ${CMAKE_CURRENT_BINARY_DIR}/Ditt && cmake -E tar -cvj Ditt.tar.bz2 pack/
 )
diff --git a/examples_tests/22.RaytracedAO/closestHit.comp b/examples_tests/22.RaytracedAO/closestHit.comp
@@ -69,16 +69,17 @@ void main()
 			const uvec3 indices = get_triangle_indices(batchInstanceData,triangleID);
 			
 			// positions
-			vec3 geomDenormal;
-			const vec3 lastVxPos = load_positions(geomDenormal,batchInstanceData,indices);
-			const bool frontfacing = bool((batchInstanceData.determinantSignBit^floatBitsToUint(dot(geomDenormal,normalizedV)))&0x80000000u);
+			vec3 geomNormal;
+			const vec3 lastVxPos = load_positions(geomNormal,batchInstanceData,indices);
+
+			const bool frontfacing = bool((batchInstanceData.determinantSignBit^floatBitsToUint(dot(normalizedV,geomNormal)))&0x80000000u);
 
 			// get material
 			const nbl_glsl_MC_oriented_material_t material = nbl_glsl_MC_material_data_t_getOriented(batchInstanceData.material,frontfacing);
 			contrib.color = contrib.albedo = nbl_glsl_MC_oriented_material_t_getEmissive(material);
 	
 			const uint pathDepth = bitfieldExtract(staticViewData.pathDepth_noRussianRouletteDepth_samplesPerPixelPerDispatch,0,8);
-			const bool _continue = vertex_depth!=pathDepth && ray.maxT==nbl_glsl_FLT_MAX; // not last vertex and not NEE path
+			const bool _continue = vertex_depth!=pathDepth && material.genchoice_count!=0u && ray.maxT==nbl_glsl_FLT_MAX; // not last vertex and has a BxDF and not NEE path
 			if (_continue)
 			{
 				// if we ever support spatially varying emissive, we'll need to hoist barycentric computation and UV fetching to the position fetching
@@ -89,7 +90,7 @@ void main()
 				
 				//
 				normalizedN = load_normal_and_prefetch_textures(
-					batchInstanceData,indices,compactBary,geomDenormal,material
+					batchInstanceData,indices,compactBary,geomNormal,material
 #ifdef TEX_PREFETCH_STREAM
 					,mat2(0.0) // TODO: Covariance Rendering
 #endif
@@ -103,10 +104,10 @@ void main()
 				);
 			}
 			else
-				contrib.worldspaceNormal = normalize(geomDenormal)*nbl_glsl_MC_colorToScalar(contrib.albedo);
+				contrib.worldspaceNormal = geomNormal*nbl_glsl_MC_colorToScalar(contrib.albedo);
 		}
 		else
-			Contribution_initMiss(contrib);
+			Contribution_initMiss(contrib,aovThroughputScale);
 			
 		Contribution_normalizeAoV(contrib);
 		
diff --git a/examples_tests/22.RaytracedAO/raygen.comp b/examples_tests/22.RaytracedAO/raygen.comp
@@ -79,35 +79,41 @@ void main()
 			// get material while waiting for indices
 			const nbl_glsl_MC_oriented_material_t material = nbl_glsl_MC_material_data_t_getOriented(batchInstanceData.material,frontfacing);
 			contrib.color = contrib.albedo = nbl_glsl_MC_oriented_material_t_getEmissive(material);
-
+			
 			// load vertex data
-			vec3 geomDenormal;
-			const vec3 lastVxPos = load_positions(geomDenormal,batchInstanceData,indices);
-						
-			// get initial scramble key while waiting for vertex positions
-			const nbl_glsl_xoroshiro64star_state_t scramble_start_state = texelFetch(scramblebuf,ivec2(outPixelLocation),0).rg;
+			vec3 geomNormal;
+			const vec3 lastVxPos = load_positions(geomNormal,batchInstanceData,indices);
 
-			//
-			normalizedN = load_normal_and_prefetch_textures(
-				batchInstanceData,indices,compactBary,geomDenormal,material
-				#ifdef TEX_PREFETCH_STREAM
-				,dBarydScreen
-				#endif
-			);
+			// little optimization for non-twosided materials
+			if (material.genchoice_count!=0u)
+			{			
+				// get initial scramble key while waiting for vertex positions
+				const nbl_glsl_xoroshiro64star_state_t scramble_start_state = texelFetch(scramblebuf,ivec2(outPixelLocation),0).rg;
+
+				//
+				normalizedN = load_normal_and_prefetch_textures(
+					batchInstanceData,indices,compactBary,geomNormal,material
+					#ifdef TEX_PREFETCH_STREAM
+					,dBarydScreen
+					#endif
+				);
 			
-			const vec3 origin = dPdBary*compactBary+lastVxPos;
-			normalizedV = normalize(pc.cummon.camPos-origin);
+				const vec3 origin = dPdBary*compactBary+lastVxPos;
+				normalizedV = normalize(pc.cummon.camPos-origin);
 
-			// generate rays
-			const uint vertex_depth = 1u;
-			generate_next_rays(
-				samplesPerPixelPerDispatch,material,frontfacing,vertex_depth,
-				scramble_start_state,pc.cummon.samplesComputed,outPixelLocation,origin,
-				vec3(pc.cummon.rcpFramesDispatched),1.f,contrib.albedo,contrib.worldspaceNormal
-			);
+				// generate rays
+				const uint vertex_depth = 1u;
+				generate_next_rays(
+					samplesPerPixelPerDispatch,material,frontfacing,vertex_depth,
+					scramble_start_state,pc.cummon.samplesComputed,outPixelLocation,origin,
+					vec3(pc.cummon.rcpFramesDispatched),1.f,contrib.albedo,contrib.worldspaceNormal
+				);
+			}
+			else
+				contrib.worldspaceNormal = geomNormal*nbl_glsl_MC_colorToScalar(contrib.albedo);
 		}
 		else
-			Contribution_initMiss(contrib);
+			Contribution_initMiss(contrib,1.f);
 
 		if (bool(pc.cummon.depth))
 		{
diff --git a/examples_tests/22.RaytracedAO/raytraceCommon.glsl b/examples_tests/22.RaytracedAO/raytraceCommon.glsl
@@ -79,8 +79,10 @@ void storeAccumulation(in vec3 color, in uvec3 coord)
 }
 void storeAccumulation(in vec3 prev, in vec3 delta, in uvec3 coord)
 {
-	if (any(greaterThan(abs(delta),vec3(nbl_glsl_FLT_MIN*16.f))))
-		storeAccumulation(prev+delta,coord);
+	const vec3 newVal = prev+delta;
+	const uvec3 diff = floatBitsToUint(newVal)^floatBitsToUint(prev);
+	if (bool((diff.x|diff.y|diff.z)&0x7ffffff0u))
+		storeAccumulation(newVal,coord);
 }
 
 vec3 fetchAlbedo(in uvec3 coord)
@@ -152,7 +154,7 @@ bool has_world_transform(in nbl_glsl_ext_Mitsuba_Loader_instance_data_t batchIns
 
 #include <nbl/builtin/glsl/barycentric/utils.glsl>
 mat2x3 dPdBary;
-vec3 load_positions(out vec3 geomDenormal, in nbl_glsl_ext_Mitsuba_Loader_instance_data_t batchInstanceData, in uvec3 indices)
+vec3 load_positions(out vec3 geomNormal, in nbl_glsl_ext_Mitsuba_Loader_instance_data_t batchInstanceData, in uvec3 indices)
 {
 	mat3 positions = mat3(
 		nbl_glsl_fetchVtxPos(indices[0],batchInstanceData),
@@ -165,7 +167,7 @@ vec3 load_positions(out vec3 geomDenormal, in nbl_glsl_ext_Mitsuba_Loader_instan
 	//
 	for (int i=0; i<2; i++)
 		dPdBary[i] = positions[i]-positions[2];
-	geomDenormal = cross(dPdBary[0],dPdBary[1]);
+	geomNormal = normalize(cross(dPdBary[0],dPdBary[1]));
 	//
 	if (tform)
 		positions[2] += batchInstanceData.tform[3];
@@ -194,7 +196,7 @@ bool needs_texture_prefetch(in nbl_glsl_ext_Mitsuba_Loader_instance_data_t batch
 
 vec3 load_normal_and_prefetch_textures(
 	in nbl_glsl_ext_Mitsuba_Loader_instance_data_t batchInstanceData,
-	in uvec3 indices, in vec2 compactBary, in vec3 geomDenormal,
+	in uvec3 indices, in vec2 compactBary, in vec3 geomNormal,
 	in nbl_glsl_MC_oriented_material_t material
 #ifdef TEX_PREFETCH_STREAM
 	,in mat2 dBarydScreen
@@ -222,7 +224,6 @@ vec3 load_normal_and_prefetch_textures(
 	// the rest is always only needed for continuing rays
 
 
-	vec3 normal = geomDenormal;
 	// while waiting for the scramble state
 	// TODO: optimize, add loads more flags to control this
 	const bool needsSmoothNormals = true;
@@ -235,18 +236,20 @@ vec3 load_normal_and_prefetch_textures(
 		);
 
 		// not needed for NEE unless doing Area or Projected Solid Angle Sampling
-		const vec3 smoothNormal = normals*nbl_glsl_barycentric_expand(compactBary);
-		// TODO: first check wouldn't be needed if we had `needsSmoothNormals` implemented
-		if (!isnan(smoothNormal.x) && has_world_transform(batchInstanceData))
+		vec3 smoothNormal = normals*nbl_glsl_barycentric_expand(compactBary);
+		if (has_world_transform(batchInstanceData))
 		{
-			normal = vec3(
+			smoothNormal = vec3(
 				dot(batchInstanceData.normalMatrixRow0,smoothNormal),
 				dot(batchInstanceData.normalMatrixRow1,smoothNormal),
 				dot(batchInstanceData.normalMatrixRow2,smoothNormal)
 			);
 		}
+		// TODO: this check wouldn't be needed if we had `needsSmoothNormals` implemented
+		if (!isnan(smoothNormal.x))
+			return normalize(smoothNormal);
 	}
-	return normalize(normal);
+	return geomNormal;
 }
 
 #include <nbl/builtin/glsl/sampling/quantized_sequence.glsl>
@@ -324,8 +327,8 @@ void generate_next_rays(
 			worldspaceNormal += result.aov.normal/float(maxRaysToGen);
 
 			nextThroughput[i] = prevThroughput*result.quotient;
-			// do denormalized half floats flush to 0 ?
-			if (max(max(nextThroughput[i].x,nextThroughput[i].y),nextThroughput[i].z)>=exp2(-14.f))
+			// TODO: add some sort of factor to this inequality that could account for highest possible emission (direct or indirect) we could encounter
+			if (max(max(nextThroughput[i].x,nextThroughput[i].y),nextThroughput[i].z)>exp2(-19.f)) // match output mantissa (won't contribute anything afterwards)
 			{
 				maxT[i] = nbl_glsl_FLT_MAX;
 				nextAoVThroughputScale[i] = prevAoVThroughputScale*result.aov.throughputFactor;
@@ -392,11 +395,11 @@ vec2 SampleSphericalMap(vec3 v)
     return uv;
 }
 
-void Contribution_initMiss(out Contribution contrib)
+void Contribution_initMiss(out Contribution contrib, in float aovThroughputScale)
 {
 	vec2 uv = SampleSphericalMap(-normalizedV);
 	// funny little trick borrowed from things like Progressive Photon Mapping
-	const float bias = 0.25*sqrt(pc.cummon.rcpFramesDispatched);
+	const float bias = 0.0625f*(1.f-aovThroughputScale)*pow(pc.cummon.rcpFramesDispatched,0.08f);
 	contrib.albedo = contrib.color = textureGrad(envMap, uv, vec2(bias*0.5,0.f), vec2(0.f,bias)).rgb;
 	contrib.worldspaceNormal = normalizedV;
 }
diff --git a/examples_tests/39.DenoiserTonemapper/main.cpp b/examples_tests/39.DenoiserTonemapper/main.cpp
@@ -79,7 +79,9 @@ int main(int argc, char* argv[])
 	params.Vsync = true;
 	params.Doublebuffer = true;
 	params.Stencilbuffer = false;
-	params.StreamingDownloadBufferSize = 1024*1024*1024; // for 16k images
+	// TODO: this is a temporary fix for a problem solved in the Vulkan Branch
+	params.StreamingUploadBufferSize = 1024*1024*1024; // for Color + 2 AoV of 8k images
+	params.StreamingDownloadBufferSize = core::roundUp(params.StreamingUploadBufferSize/3u,256u); // for output image
 	auto device = createDeviceEx(params);
 
 	if (check_error(!device,"Could not create Irrlicht Device!"))
@@ -1270,8 +1272,17 @@ nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(ivec3 coordinate, in uint channe
 		uint32_t inImageByteOffset[EII_COUNT];
 		{
 			asset::ICPUBuffer* buffersToUpload[EII_COUNT];
+			size_t inputSize = 0u;
 			for (uint32_t j=0u; j<denoiserInputCount; j++)
+			{
 				buffersToUpload[j] = param.image[j]->getBuffer();
+				inputSize += buffersToUpload[j]->getSize();
+			}
+			if (inputSize>=params.StreamingUploadBufferSize)
+			{
+				printf("[ERROR] Denoiser Failed, input too large to fit in VRAM, Streaming Denoise not implemented yet!");
+				return -1;
+			}
 			auto gpubuffers = driver->getGPUObjectsFromAssets(buffersToUpload,buffersToUpload+denoiserInputCount,&assetConverter);
 
 			bool skip = false;
diff --git a/include/nbl/builtin/glsl/material_compiler/common.glsl b/include/nbl/builtin/glsl/material_compiler/common.glsl
@@ -746,9 +746,6 @@ nbl_glsl_MC_eval_pdf_aov_t nbl_glsl_MC_instr_bxdf_eval_and_pdf_common(
 	in uint op, in bool is_not_brdf,
 	in nbl_glsl_MC_params_t params,
 	in mat2x3 ior, in mat2x3 ior2,
-	#if GEN_CHOICE_STREAM>=GEN_CHOICE_WITH_AOV_EXTRACTION
-	in vec3 normal,
-	#endif
 	in float absOrMaxNdotV,
 	in float absOrMaxNdotL,
 	in nbl_glsl_LightSample s,
@@ -775,7 +772,7 @@ nbl_glsl_MC_eval_pdf_aov_t nbl_glsl_MC_instr_bxdf_eval_and_pdf_common(
 		const float a = nbl_glsl_MC_params_getAlpha(params);
 		const float a2 = a*a;
 		#if GEN_CHOICE_STREAM>=GEN_CHOICE_WITH_AOV_EXTRACTION
-		result.aov.normal = normal;
+		result.aov.normal = currInteraction.inner.isotropic.N;
 		#endif
 					
 		#if defined(OP_DIFFUSE) || defined(OP_DIFFTRANS)
@@ -961,9 +958,6 @@ void nbl_glsl_MC_instr_eval_and_pdf_execute(
 			{
 				result = nbl_glsl_MC_instr_bxdf_eval_and_pdf_common(
 					instr,op,is_not_brdf,params,ior,ior2,
-					#if GEN_CHOICE_STREAM>=GEN_CHOICE_WITH_AOV_EXTRACTION
-					precomp.N,
-					#endif
 					NdotV,NdotL,
 					s,microfacet,run
 				);
@@ -1235,7 +1229,7 @@ nbl_glsl_LightSample nbl_bsdf_cos_generate(
 				const float ax = nbl_glsl_MC_params_getAlpha(params);
 				const float ax2 = ax*ax;
 				#if GEN_CHOICE_STREAM>=GEN_CHOICE_WITH_AOV_EXTRACTION
-				out_values.aov.normal = precomp.N;
+				out_values.aov.normal = currInteraction.inner.isotropic.N;
 				#endif
 
 				// TODO: refactor
diff --git a/include/nbl/builtin/glsl/material_compiler/common_invariant_declarations.glsl b/include/nbl/builtin/glsl/material_compiler/common_invariant_declarations.glsl
@@ -37,6 +37,7 @@ struct nbl_glsl_MC_instr_stream_t
 // (in case of precomp.NdotV<0.0, currInteraction will be set with -precomp.N)
 struct nbl_glsl_MC_precomputed_t
 {
+	// TODO: shadingN and geomN
 	vec3 N;
 	vec3 V;
 	bool frontface;
@@ -82,6 +83,7 @@ void nbl_glsl_MC_finalizeMicrofacet(inout nbl_glsl_MC_microfacet_t mf)
 struct nbl_glsl_MC_oriented_material_t
 {
 	uvec2 emissive;
+	// TODO: derive/define upper bounds for instruction counts and bitpack them!
 	uint prefetch_offset;
 	uint prefetch_count;
 	uint instr_offset;
diff --git a/src/nbl/asset/interchange/CImageLoaderJPG.cpp b/src/nbl/asset/interchange/CImageLoaderJPG.cpp
@@ -156,12 +156,11 @@ bool CImageLoaderJPG::isALoadableFileFormat(system::IFile* _file, const system::
 	if (!_file)
 		return false;
 
-	int32_t jfif = 0;
-	
+	uint32_t header = 0;	
 	system::future<size_t> future;
-	_file->read(future, &jfif, 6, sizeof(uint32_t));
+	_file->read(future, &header, 6, sizeof(uint32_t));
 	future.get();
-	return (jfif == 0x4a464946 || jfif == 0x4649464a || jfif == 0x66697845u || jfif == 0x70747468u); // maybe 0x4a464946 can go
+	return (header&0x00FFD8FFu)==0x00FFD8FFu;
 #endif
 }
 
diff --git a/src/nbl/ext/MitsubaLoader/CElementBSDF.cpp b/src/nbl/ext/MitsubaLoader/CElementBSDF.cpp
@@ -725,23 +725,25 @@ bool CElementBSDF::processChildData(IElement* _child, const std::string& name)
 					//{"albedo",				processAlbedo}
 				};
 
-				auto found = SetChildMap.find(name);
-				if (found==SetChildMap.end())
+				switch (type)
 				{
-					switch (type)
-					{
-						case Type::BUMPMAP:
-							bumpmap.texture = _texture;
-							break;
-						default:
-							_NBL_DEBUG_BREAK_IF(true);
-							ParserLog::invalidXMLFileStructure("No BSDF can have such property set with name: " + name);
-							return false;
-							break;
-					}
+					case Type::BUMPMAP:
+						bumpmap.texture = _texture;
+						break;
+					default:
+						{
+							auto found = SetChildMap.find(name);
+							if (found!=SetChildMap.end())
+								found->second();
+							else
+							{
+								_NBL_DEBUG_BREAK_IF(true);
+								ParserLog::invalidXMLFileStructure("No BSDF can have such property set with name: " + name);
+								return false;
+							}
+						}
+						break;
 				}
-				else
-					found->second();
 
 				if (error)
 					return false;
diff --git a/src/nbl/ext/MitsubaLoader/CMitsubaLoader.cpp b/src/nbl/ext/MitsubaLoader/CMitsubaLoader.cpp
@@ -967,10 +967,16 @@ void CMitsubaLoader::cacheTexture(SContext& ctx, uint32_t hierarchyLevel, const
 						// check if found
 						auto contentRange = bundle.getContents();
 						if (contentRange.empty())
+						{
+						    os::Printer::log("[ERROR] Could Not Find Texture: "+cacheKey,ELL_ERROR);
 							return;
+						}
 						auto asset = contentRange.begin()[0];
 						if (asset->getAssetType()!=asset::IAsset::ET_IMAGE)
+						{
+						    os::Printer::log("[ERROR] Loaded an Asset but it wasn't a texture, was E_ASSET_TYPE "+std::to_string(asset->getAssetType()),ELL_ERROR);
 							return;
+						}
 
 						viewParams.image = core::smart_refctd_ptr_static_cast<asset::ICPUImage>(asset);
 					}
@@ -1086,12 +1092,6 @@ auto CMitsubaLoader::genBSDFtreeTraversal(SContext& ctx, const CElementBSDF* _bs
 			if (const_or_tex.value.type==SPropertyElementData::INVALID)
 				cacheTexture(ctx,0u,const_or_tex.texture,semantic);
 		};
-		auto unrollScales = [](CElementTexture* tex)
-		{
-			while (tex->type == CElementTexture::SCALE)
-				tex = tex->scale.texture;
-			return tex;
-		};
 
 		core::stack<const CElementBSDF*> stack;
 		stack.push(_bsdf);
diff --git a/src/nbl/ext/MitsubaLoader/CMitsubaMaterialCompilerFrontend.cpp b/src/nbl/ext/MitsubaLoader/CMitsubaMaterialCompilerFrontend.cpp

Original file line number	Diff line number	Diff line change
`@@ -36,5 +36,7 @@ add_custom_target(pack_artifact_ditt`
`36`	`36`	`COMMAND cmake -E copy ${EXAMPLES_TESTS_PATH}/media/kernels/physical_flare_256.exr ${CMAKE_CURRENT_BINARY_DIR}/Ditt/pack/media/kernels/physical_flare_256.exr`
`37`	`37`	`COMMAND cmake -E copy ${EXAMPLES_TESTS_PATH}/media/kernels/physical_flare_512.exr ${CMAKE_CURRENT_BINARY_DIR}/Ditt/pack/media/kernels/physical_flare_512.exr`
`38`	`38`
	`39`	`+ COMMAND cmake -E copy ${EXAMPLES_TESTS_PATH}/media/mitsuba/staircase2.zip ${CMAKE_CURRENT_BINARY_DIR}/Ditt/pack/media/mitsuba/staircase2.zip`
	`40`	`+`
`39`	`41`	`COMMAND cd ${CMAKE_CURRENT_BINARY_DIR}/Ditt && cmake -E tar -cvj Ditt.tar.bz2 pack/`
`40`	`42`	`)`