@@ -60,20 +60,25 @@ __device__ glm::vec3 computeColorFromSH(int idx, int deg, int max_coeffs, const
6060}
6161
6262// Forward version of 2D covariance matrix computation
63- __device__ float3 computeCov2D (const float3 & mean, float focal_x, float focal_y, const float * cov3D, const float * viewmatrix)
63+ __device__ float3 computeCov2D (const float3 & mean, float focal_x, float focal_y, float tan_fovx, float tan_fovy, const float * cov3D, const float * viewmatrix)
6464{
6565 // The following models the steps outlined by equations 29
6666 // and 31 in "EWA Splatting" (Zwicker et al., 2002).
6767 // Additionally considers aspect / scaling of viewport.
6868 // Transposes used to account for row-/column-major conventions.
6969 float3 t = transformPoint4x3 (mean, viewmatrix);
7070
71- float t_inv_norm = 1 .f / sqrt (t.x * t.x + t.y * t.y + t.z * t.z );
71+ const float limx = 1 .3f * tan_fovx;
72+ const float limy = 1 .3f * tan_fovy;
73+ const float txtz = t.x / t.z ;
74+ const float tytz = t.y / t.z ;
75+ t.x = min (limx, max (-limx, txtz)) * t.z ;
76+ t.y = min (limy, max (-limy, tytz)) * t.z ;
7277
7378 glm::mat3 J = glm::mat3 (
7479 focal_x / t.z , 0 .0f , -(focal_x * t.x ) / (t.z * t.z ),
7580 0 .0f , focal_y / t.z , -(focal_y * t.y ) / (t.z * t.z ),
76- t. x * t_inv_norm, t. y * t_inv_norm, t. z * t_inv_norm );
81+ 0 , 0 , 0 );
7782
7883 glm::mat3 W = glm::mat3 (
7984 viewmatrix[0 ], viewmatrix[4 ], viewmatrix[8 ],
@@ -98,17 +103,17 @@ __device__ float3 computeCov2D(const float3& mean, float focal_x, float focal_y,
98103
99104// Forward method for converting scale and rotation properties of each
100105// Gaussian to a 3D covariance matrix in world space. Also takes care
101- // of quaternion normalization and scale activation via exp .
106+ // of quaternion normalization.
102107__device__ void computeCov3D (const glm::vec3 scale, float mod, const glm::vec4 rot, float * cov3D)
103108{
104109 // Create scaling matrix
105110 glm::mat3 S = glm::mat3 (1 .0f );
106- S[0 ][0 ] = mod * exp ( scale.x ) ;
107- S[1 ][1 ] = mod * exp ( scale.y ) ;
108- S[2 ][2 ] = mod * exp ( scale.z ) ;
111+ S[0 ][0 ] = mod * scale.x ;
112+ S[1 ][1 ] = mod * scale.y ;
113+ S[2 ][2 ] = mod * scale.z ;
109114
110115 // Normalize quaternion to get valid rotation
111- glm::vec4 q = rot / glm::length (rot);
116+ glm::vec4 q = rot; // / glm::length(rot);
112117 float r = q.x ;
113118 float x = q.y ;
114119 float y = q.z ;
@@ -172,7 +177,7 @@ __global__ void preprocessCUDA(int P, int D, int M,
172177 radii[idx] = 0 ;
173178 tiles_touched[idx] = 0 ;
174179
175- // Perform near and frustum culling with guardband , quit if outside.
180+ // Perform near culling, quit if outside.
176181 float3 p_view;
177182 if (!in_frustum (idx, orig_points, viewmatrix, projmatrix, prefiltered, p_view))
178183 return ;
@@ -196,11 +201,8 @@ __global__ void preprocessCUDA(int P, int D, int M,
196201 cov3D = cov3Ds + idx * 6 ;
197202 }
198203
199- // Compute max extent of Gaussian for fine-grained fustum culling
200- float max_dist2 = 9 .f * max (cov3D[0 ], max (cov3D[3 ], cov3D[5 ]));
201-
202204 // Compute 2D screen-space covariance matrix
203- float3 cov = computeCov2D (p_orig, focal_x, focal_y, cov3D, viewmatrix);
205+ float3 cov = computeCov2D (p_orig, focal_x, focal_y, tan_fovx, tan_fovy, cov3D, viewmatrix);
204206
205207 // Invert covariance (EWA algorithm)
206208 float det = (cov.x * cov.z - cov.y * cov.y );
@@ -209,14 +211,6 @@ __global__ void preprocessCUDA(int P, int D, int M,
209211 float det_inv = 1 .f / det;
210212 float3 conic = { cov.z * det_inv, -cov.y * det_inv, cov.x * det_inv };
211213
212- // Fine-grained frustum culling against ellipsoid
213- float z_at_point = p_view.z + sqrt (max_dist2);
214- float x_to_border = z_at_point * tan_fovx;
215- float y_to_border = z_at_point * tan_fovy;
216- float D2_point = p_view.x * p_view.x + p_view.y * p_view.y ;
217- if (D2_point - (x_to_border * x_to_border + y_to_border * y_to_border) > max_dist2)
218- return ;
219-
220214 // Compute extent in screen space (by finding eigenvalues of
221215 // 2D covariance matrix). Use extent to compute a bounding rectangle
222216 // of screen-space tiles that this Gaussian overlaps with. Quit if
@@ -254,7 +248,8 @@ __global__ void preprocessCUDA(int P, int D, int M,
254248// block, each thread treats one pixel. Alternates between fetching
255249// and rasterizing data.
256250template <uint32_t CHANNELS>
257- __global__ void renderCUDA (
251+ __global__ void __launch_bounds__ (BLOCK_X * BLOCK_Y)
252+ renderCUDA(
258253 const uint2 * __restrict__ ranges,
259254 const uint32_t * __restrict__ point_list,
260255 int W, int H,
@@ -407,8 +402,8 @@ void FORWARD::preprocess(int P, int D, int M,
407402 const float * projmatrix,
408403 const glm::vec3* cam_pos,
409404 const int W, int H,
410- const float tan_fovx, float tan_fovy,
411405 const float focal_x, float focal_y,
406+ const float tan_fovx, float tan_fovy,
412407 int * radii,
413408 float2 * means2D,
414409 float * depths,
0 commit comments