Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
D
diff-gaussian-rasterization
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Alan de Oliveira
diff-gaussian-rasterization
Commits
3a07ac2e
Commit
3a07ac2e
authored
Jul 02, 2023
by
bkerbl
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
No more activation inside, guardbands removed
parent
feecabda
Show whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
53 additions
and
44 deletions
+53
-44
auxiliary.h
cuda_rasterizer/auxiliary.h
+1
-1
backward.cu
cuda_rasterizer/backward.cu
+27
-14
backward.h
cuda_rasterizer/backward.h
+1
-0
forward.cu
cuda_rasterizer/forward.cu
+18
-23
forward.h
cuda_rasterizer/forward.h
+1
-1
rasterizer_impl.cu
cuda_rasterizer/rasterizer_impl.cu
+2
-1
rasterize_points.cu
rasterize_points.cu
+3
-4
No files found.
cuda_rasterizer/auxiliary.h
View file @
3a07ac2e
...
...
@@ -140,7 +140,7 @@ __forceinline__ __device__ bool in_frustum(int idx,
float3
p_proj
=
{
p_hom
.
x
*
p_w
,
p_hom
.
y
*
p_w
,
p_hom
.
z
*
p_w
};
p_view
=
transformPoint4x3
(
p_orig
,
viewmatrix
);
if
(
p_view
.
z
<=
0
.
2
f
||
((
p_proj
.
x
<
-
1
.
3
||
p_proj
.
x
>
1
.
3
||
p_proj
.
y
<
-
1
.
3
||
p_proj
.
y
>
1
.
3
)))
if
(
p_view
.
z
<=
0
.
2
f
)
//
|| ((p_proj.x < -1.3 || p_proj.x > 1.3 || p_proj.y < -1.3 || p_proj.y > 1.3)))
{
if
(
prefiltered
)
{
...
...
cuda_rasterizer/backward.cu
View file @
3a07ac2e
...
...
@@ -134,8 +134,8 @@ __global__ void computeCov2DCUDA(int P,
const float3* means,
const int* radii,
const float* cov3Ds,
float h_x
,
float h_
y,
const float h_x, float h_y
,
const float tan_fovx, float tan_fov
y,
const float* view_matrix,
const float* dL_dconics,
float3* dL_dmeans,
...
...
@@ -153,11 +153,20 @@ __global__ void computeCov2DCUDA(int P,
float3 mean = means[idx];
float3 dL_dconic = { dL_dconics[4 * idx], dL_dconics[4 * idx + 1], dL_dconics[4 * idx + 3] };
float3 t = transformPoint4x3(mean, view_matrix);
float t_inv_norm = 1.f / sqrt(t.x * t.x + t.y * t.y + t.z * t.z);
const float limx = 1.3f * tan_fovx;
const float limy = 1.3f * tan_fovy;
const float txtz = t.x / t.z;
const float tytz = t.y / t.z;
t.x = min(limx, max(-limx, txtz)) * t.z;
t.y = min(limy, max(-limy, tytz)) * t.z;
const float x_grad_mul = txtz < -limx || txtz > limx ? 0 : 1;
const float y_grad_mul = tytz < -limy || tytz > limy ? 0 : 1;
glm::mat3 J = glm::mat3(h_x / t.z, 0.0f, -(h_x * t.x) / (t.z * t.z),
0.0f, h_y / t.z, -(h_y * t.y) / (t.z * t.z),
t.x * t_inv_norm, t.y * t_inv_norm, t.z * t_inv_norm
);
0, 0, 0
);
glm::mat3 W = glm::mat3(
view_matrix[0], view_matrix[4], view_matrix[8],
...
...
@@ -239,8 +248,8 @@ __global__ void computeCov2DCUDA(int P,
float tz3 = tz2 * tz;
// Gradients of loss w.r.t. transformed Gaussian mean t
float dL_dtx = -h_x * tz2 * dL_dJ02;
float dL_dty = -h_y * tz2 * dL_dJ12;
float dL_dtx =
x_grad_mul *
-h_x * tz2 * dL_dJ02;
float dL_dty =
y_grad_mul *
-h_y * tz2 * dL_dJ12;
float dL_dtz = -h_x * tz2 * dL_dJ00 - h_y * tz2 * dL_dJ11 + (2 * h_x * t.x) * tz3 * dL_dJ02 + (2 * h_y * t.y) * tz3 * dL_dJ12;
// Account for transformation of mean to t
...
...
@@ -258,7 +267,7 @@ __global__ void computeCov2DCUDA(int P,
__device__ void computeCov3D(int idx, const glm::vec3 scale, float mod, const glm::vec4 rot, const float* dL_dcov3Ds, glm::vec3* dL_dscales, glm::vec4* dL_drots)
{
// Recompute (intermediate) results for the 3D covariance computation.
glm::vec4 q = rot / glm::length(rot);
glm::vec4 q = rot
;//
/ glm::length(rot);
float r = q.x;
float x = q.y;
float y = q.z;
...
...
@@ -272,7 +281,7 @@ __device__ void computeCov3D(int idx, const glm::vec3 scale, float mod, const gl
glm::mat3 S = glm::mat3(1.0f);
glm::vec3 s = mod *
exp(scale)
;
glm::vec3 s = mod *
scale
;
S[0][0] = s.x;
S[1][1] = s.y;
S[2][2] = s.z;
...
...
@@ -298,16 +307,16 @@ __device__ void computeCov3D(int idx, const glm::vec3 scale, float mod, const gl
glm::mat3 Rt = glm::transpose(R);
glm::mat3 dL_dMt = glm::transpose(dL_dM);
dL_dMt[0] *= s.x;
dL_dMt[1] *= s.y;
dL_dMt[2] *= s.z;
// Gradients of loss w.r.t. scale
glm::vec3* dL_dscale = dL_dscales + idx;
dL_dscale->x = glm::dot(Rt[0], dL_dMt[0]);
dL_dscale->y = glm::dot(Rt[1], dL_dMt[1]);
dL_dscale->z = glm::dot(Rt[2], dL_dMt[2]);
dL_dMt[0] *= s.x;
dL_dMt[1] *= s.y;
dL_dMt[2] *= s.z;
// Gradients of loss w.r.t. normalized quaternion
glm::vec4 dL_dq;
dL_dq.x = 2 * z * (dL_dMt[0][1] - dL_dMt[1][0]) + 2 * y * (dL_dMt[2][0] - dL_dMt[0][2]) + 2 * x * (dL_dMt[1][2] - dL_dMt[2][1]);
...
...
@@ -317,7 +326,7 @@ __device__ void computeCov3D(int idx, const glm::vec3 scale, float mod, const gl
// Gradients of loss w.r.t. unnormalized quaternion
float4* dL_drot = (float4*)(dL_drots + idx);
*dL_drot = dnormvdv(float4{ rot.x, rot.y, rot.z, rot.w }, float4{ dL_dq.x, dL_dq.y, dL_dq.z, dL_dq.w });
*dL_drot =
float4{ dL_dq.x, dL_dq.y, dL_dq.z, dL_dq.w };//
dnormvdv(float4{ rot.x, rot.y, rot.z, rot.w }, float4{ dL_dq.x, dL_dq.y, dL_dq.z, dL_dq.w });
}
// Backward pass of the preprocessing steps, except
...
...
@@ -377,7 +386,8 @@ __global__ void preprocessCUDA(
// Backward version of the rendering procedure.
template <uint32_t C>
__global__ void renderCUDA(
__global__ void __launch_bounds__(BLOCK_X * BLOCK_Y)
renderCUDA(
const uint2* __restrict__ ranges,
const uint32_t* __restrict__ point_list,
int W, int H,
...
...
@@ -548,6 +558,7 @@ void BACKWARD::preprocess(
const float* viewmatrix,
const float* projmatrix,
const float focal_x, float focal_y,
const float tan_fovx, float tan_fovy,
const glm::vec3* campos,
const float3* dL_dmean2D,
const float* dL_dconic,
...
...
@@ -569,6 +580,8 @@ void BACKWARD::preprocess(
cov3Ds,
focal_x,
focal_y,
tan_fovx,
tan_fovy,
viewmatrix,
dL_dconic,
(float3*)dL_dmean3D,
...
...
cuda_rasterizer/backward.h
View file @
3a07ac2e
...
...
@@ -39,6 +39,7 @@ namespace BACKWARD
const
float
*
view
,
const
float
*
proj
,
const
float
focal_x
,
float
focal_y
,
const
float
tan_fovx
,
float
tan_fovy
,
const
glm
::
vec3
*
campos
,
const
float3
*
dL_dmean2D
,
const
float
*
dL_dconics
,
...
...
cuda_rasterizer/forward.cu
View file @
3a07ac2e
...
...
@@ -60,7 +60,7 @@ __device__ glm::vec3 computeColorFromSH(int idx, int deg, int max_coeffs, const
}
// Forward version of 2D covariance matrix computation
__device__ float3 computeCov2D(const float3& mean, float focal_x, float focal_y, const float* cov3D, const float* viewmatrix)
__device__ float3 computeCov2D(const float3& mean, float focal_x, float focal_y,
float tan_fovx, float tan_fovy,
const float* cov3D, const float* viewmatrix)
{
// The following models the steps outlined by equations 29
// and 31 in "EWA Splatting" (Zwicker et al., 2002).
...
...
@@ -68,12 +68,17 @@ __device__ float3 computeCov2D(const float3& mean, float focal_x, float focal_y,
// Transposes used to account for row-/column-major conventions.
float3 t = transformPoint4x3(mean, viewmatrix);
float t_inv_norm = 1.f / sqrt(t.x * t.x + t.y * t.y + t.z * t.z);
const float limx = 1.3f * tan_fovx;
const float limy = 1.3f * tan_fovy;
const float txtz = t.x / t.z;
const float tytz = t.y / t.z;
t.x = min(limx, max(-limx, txtz)) * t.z;
t.y = min(limy, max(-limy, tytz)) * t.z;
glm::mat3 J = glm::mat3(
focal_x / t.z, 0.0f, -(focal_x * t.x) / (t.z * t.z),
0.0f, focal_y / t.z, -(focal_y * t.y) / (t.z * t.z),
t.x * t_inv_norm, t.y * t_inv_norm, t.z * t_inv_norm
);
0, 0, 0
);
glm::mat3 W = glm::mat3(
viewmatrix[0], viewmatrix[4], viewmatrix[8],
...
...
@@ -98,17 +103,17 @@ __device__ float3 computeCov2D(const float3& mean, float focal_x, float focal_y,
// Forward method for converting scale and rotation properties of each
// Gaussian to a 3D covariance matrix in world space. Also takes care
// of quaternion normalization
and scale activation via exp
.
// of quaternion normalization.
__device__ void computeCov3D(const glm::vec3 scale, float mod, const glm::vec4 rot, float* cov3D)
{
// Create scaling matrix
glm::mat3 S = glm::mat3(1.0f);
S[0][0] = mod *
exp(scale.x)
;
S[1][1] = mod *
exp(scale.y)
;
S[2][2] = mod *
exp(scale.z)
;
S[0][0] = mod *
scale.x
;
S[1][1] = mod *
scale.y
;
S[2][2] = mod *
scale.z
;
// Normalize quaternion to get valid rotation
glm::vec4 q = rot / glm::length(rot);
glm::vec4 q = rot
;//
/ glm::length(rot);
float r = q.x;
float x = q.y;
float y = q.z;
...
...
@@ -172,7 +177,7 @@ __global__ void preprocessCUDA(int P, int D, int M,
radii[idx] = 0;
tiles_touched[idx] = 0;
// Perform near
and frustum culling with guardband
, quit if outside.
// Perform near
culling
, quit if outside.
float3 p_view;
if (!in_frustum(idx, orig_points, viewmatrix, projmatrix, prefiltered, p_view))
return;
...
...
@@ -196,11 +201,8 @@ __global__ void preprocessCUDA(int P, int D, int M,
cov3D = cov3Ds + idx * 6;
}
// Compute max extent of Gaussian for fine-grained fustum culling
float max_dist2 = 9.f * max(cov3D[0], max(cov3D[3], cov3D[5]));
// Compute 2D screen-space covariance matrix
float3 cov = computeCov2D(p_orig, focal_x, focal_y, cov3D, viewmatrix);
float3 cov = computeCov2D(p_orig, focal_x, focal_y,
tan_fovx, tan_fovy,
cov3D, viewmatrix);
// Invert covariance (EWA algorithm)
float det = (cov.x * cov.z - cov.y * cov.y);
...
...
@@ -209,14 +211,6 @@ __global__ void preprocessCUDA(int P, int D, int M,
float det_inv = 1.f / det;
float3 conic = { cov.z * det_inv, -cov.y * det_inv, cov.x * det_inv };
// Fine-grained frustum culling against ellipsoid
float z_at_point = p_view.z + sqrt(max_dist2);
float x_to_border = z_at_point * tan_fovx;
float y_to_border = z_at_point * tan_fovy;
float D2_point = p_view.x * p_view.x + p_view.y * p_view.y;
if (D2_point - (x_to_border * x_to_border + y_to_border * y_to_border) > max_dist2)
return;
// Compute extent in screen space (by finding eigenvalues of
// 2D covariance matrix). Use extent to compute a bounding rectangle
// of screen-space tiles that this Gaussian overlaps with. Quit if
...
...
@@ -254,7 +248,8 @@ __global__ void preprocessCUDA(int P, int D, int M,
// block, each thread treats one pixel. Alternates between fetching
// and rasterizing data.
template <uint32_t CHANNELS>
__global__ void renderCUDA(
__global__ void __launch_bounds__(BLOCK_X * BLOCK_Y)
renderCUDA(
const uint2* __restrict__ ranges,
const uint32_t* __restrict__ point_list,
int W, int H,
...
...
@@ -407,8 +402,8 @@ void FORWARD::preprocess(int P, int D, int M,
const float* projmatrix,
const glm::vec3* cam_pos,
const int W, int H,
const float tan_fovx, float tan_fovy,
const float focal_x, float focal_y,
const float tan_fovx, float tan_fovy,
int* radii,
float2* means2D,
float* depths,
...
...
cuda_rasterizer/forward.h
View file @
3a07ac2e
...
...
@@ -24,8 +24,8 @@ namespace FORWARD
const
float
*
projmatrix
,
const
glm
::
vec3
*
cam_pos
,
const
int
W
,
int
H
,
const
float
tan_fovx
,
float
tan_fovy
,
const
float
focal_x
,
float
focal_y
,
const
float
tan_fovx
,
float
tan_fovy
,
int
*
radii
,
float2
*
points_xy_image
,
float
*
depths
,
...
...
cuda_rasterizer/rasterizer_impl.cu
View file @
3a07ac2e
...
...
@@ -247,8 +247,8 @@ int CudaRasterizer::Rasterizer::forward(
viewmatrix, projmatrix,
(glm::vec3*)cam_pos,
width, height,
tan_fovx, tan_fovy,
focal_x, focal_y,
tan_fovx, tan_fovy,
radii,
geomState.means2D,
geomState.depths,
...
...
@@ -408,6 +408,7 @@ void CudaRasterizer::Rasterizer::backward(
viewmatrix,
projmatrix,
focal_x, focal_y,
tan_fovx, tan_fovy,
(glm::vec3*)campos,
(float3*)dL_dmean2D,
dL_dconic,
...
...
rasterize_points.cu
View file @
3a07ac2e
...
...
@@ -47,14 +47,13 @@ RasterizeGaussiansCUDA(
}
const int P = means3D.size(0);
const int N = 1; // batch size hard-coded
const int H = image_height;
const int W = image_width;
auto int_opts = means3D.options().dtype(torch::kInt32);
auto float_opts = means3D.options().dtype(torch::kFloat32);
torch::Tensor out_color = torch::full({N
, N
UM_CHANNELS, H, W}, 0.0, float_opts);
torch::Tensor out_color = torch::full({NUM_CHANNELS, H, W}, 0.0, float_opts);
torch::Tensor radii = torch::full({P}, 0, means3D.options().dtype(torch::kInt32));
torch::Device device(torch::kCUDA);
...
...
@@ -126,8 +125,8 @@ std::tuple<torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Te
const torch::Tensor& imageBuffer)
{
const int P = means3D.size(0);
const int H = dL_dout_color.size(
2
);
const int W = dL_dout_color.size(
3
);
const int H = dL_dout_color.size(
1
);
const int W = dL_dout_color.size(
2
);
int M = 0;
if(sh.size(0) != 0)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment