@@ -216,7 +216,8 @@ int CudaRasterizer::Rasterizer::forward(
216216 const float tan_fovx, float tan_fovy,
217217 const bool prefiltered,
218218 float* out_color,
219- int* radii)
219+ int* radii,
220+ bool debug)
220221{
221222 const float focal_y = height / (2 .0f * tan_fovy);
222223 const float focal_x = width / (2 .0f * tan_fovx);
@@ -244,7 +245,7 @@ int CudaRasterizer::Rasterizer::forward(
244245 }
245246
246247 // Run preprocessing per-Gaussian (transformation, bounding, conversion of SHs to RGB)
247- FORWARD::preprocess (
248+ CHECK_CUDA ( FORWARD::preprocess (
248249 P, D, M,
249250 means3D,
250251 (glm::vec3*)scales,
@@ -269,16 +270,15 @@ int CudaRasterizer::Rasterizer::forward(
269270 tile_grid,
270271 geomState.tiles_touched ,
271272 prefiltered
272- );
273+ ), debug)
273274
274275 // Compute prefix sum over full list of touched tile counts by Gaussians
275276 // E.g., [2, 3, 0, 2, 1] -> [2, 5, 5, 7, 8]
276- cub::DeviceScan::InclusiveSum (geomState.scanning_space , geomState.scan_size ,
277- geomState.tiles_touched , geomState.point_offsets , P);
277+ CHECK_CUDA (cub::DeviceScan::InclusiveSum (geomState.scanning_space , geomState.scan_size , geomState.tiles_touched , geomState.point_offsets , P), debug)
278278
279279 // Retrieve total number of Gaussian instances to launch and resize aux buffers
280280 int num_rendered;
281- cudaMemcpy (&num_rendered, geomState.point_offsets + P - 1 , sizeof (int ), cudaMemcpyDeviceToHost);
281+ CHECK_CUDA ( cudaMemcpy (&num_rendered, geomState.point_offsets + P - 1 , sizeof (int ), cudaMemcpyDeviceToHost), debug );
282282
283283 size_t binning_chunk_size = required<BinningState>(num_rendered);
284284 char * binning_chunkptr = binningBuffer (binning_chunk_size);
@@ -294,32 +294,32 @@ int CudaRasterizer::Rasterizer::forward(
294294 binningState.point_list_keys_unsorted ,
295295 binningState.point_list_unsorted ,
296296 radii,
297- tile_grid
298- );
297+ tile_grid)
298+ CHECK_CUDA (, debug)
299299
300300 int bit = getHigherMsb (tile_grid.x * tile_grid.y );
301301
302302 // Sort complete list of (duplicated) Gaussian indices by keys
303- cub::DeviceRadixSort::SortPairs (
303+ CHECK_CUDA ( cub::DeviceRadixSort::SortPairs (
304304 binningState.list_sorting_space ,
305305 binningState.sorting_size ,
306306 binningState.point_list_keys_unsorted , binningState.point_list_keys ,
307307 binningState.point_list_unsorted , binningState.point_list ,
308- num_rendered, 0 , 32 + bit);
308+ num_rendered, 0 , 32 + bit), debug)
309309
310- cudaMemset (imgState.ranges , 0 , tile_grid.x * tile_grid.y * sizeof (uint2 ));
310+ CHECK_CUDA ( cudaMemset (imgState.ranges , 0 , tile_grid.x * tile_grid.y * sizeof (uint2 )), debug );
311311
312312 // Identify start and end of per-tile workloads in sorted list
313313 if (num_rendered > 0 )
314314 identifyTileRanges << <(num_rendered + 255 ) / 256 , 256 >> > (
315315 num_rendered,
316316 binningState.point_list_keys ,
317- imgState.ranges
318- );
317+ imgState.ranges );
318+ CHECK_CUDA (, debug)
319319
320320 // Let each tile blend its range of Gaussians independently in parallel
321321 const float * feature_ptr = colors_precomp != nullptr ? colors_precomp : geomState.rgb ;
322- FORWARD::render (
322+ CHECK_CUDA ( FORWARD::render (
323323 tile_grid, block,
324324 imgState.ranges ,
325325 binningState.point_list ,
@@ -330,7 +330,7 @@ int CudaRasterizer::Rasterizer::forward(
330330 imgState.accum_alpha ,
331331 imgState.n_contrib ,
332332 background,
333- out_color);
333+ out_color), debug)
334334
335335 return num_rendered;
336336}
@@ -365,7 +365,8 @@ void CudaRasterizer::Rasterizer::backward(
365365 float * dL_dcov3D,
366366 float * dL_dsh,
367367 float * dL_dscale,
368- float * dL_drot)
368+ float * dL_drot,
369+ bool debug)
369370{
370371 GeometryState geomState = GeometryState::fromChunk (geom_buffer, P);
371372 BinningState binningState = BinningState::fromChunk (binning_buffer, R);
@@ -386,7 +387,7 @@ void CudaRasterizer::Rasterizer::backward(
386387 // opacity and RGB of Gaussians from per-pixel loss gradients.
387388 // If we were given precomputed colors and not SHs, use them.
388389 const float * color_ptr = (colors_precomp != nullptr ) ? colors_precomp : geomState.rgb ;
389- BACKWARD::render (
390+ CHECK_CUDA ( BACKWARD::render (
390391 tile_grid,
391392 block,
392393 imgState.ranges ,
@@ -402,13 +403,13 @@ void CudaRasterizer::Rasterizer::backward(
402403 (float3 *)dL_dmean2D,
403404 (float4 *)dL_dconic,
404405 dL_dopacity,
405- dL_dcolor);
406+ dL_dcolor), debug)
406407
407408 // Take care of the rest of preprocessing. Was the precomputed covariance
408409 // given to us or a scales/rot pair? If precomputed, pass that. If not,
409410 // use the one we computed ourselves.
410411 const float * cov3D_ptr = (cov3D_precomp != nullptr ) ? cov3D_precomp : geomState.cov3D ;
411- BACKWARD::preprocess (P, D, M,
412+ CHECK_CUDA ( BACKWARD::preprocess (P, D, M,
412413 (float3 *)means3D,
413414 radii,
414415 shs,
@@ -429,5 +430,5 @@ void CudaRasterizer::Rasterizer::backward(
429430 dL_dcov3D,
430431 dL_dsh,
431432 (glm::vec3*)dL_dscale,
432- (glm::vec4*)dL_drot);
433+ (glm::vec4*)dL_drot), debug)
433434}
0 commit comments