Commit 30215244 by Bernhard Kerbl

Initial commit

parents
cmake_minimum_required(VERSION 3.20)
project(DiffRast LANGUAGES CUDA CXX)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_EXTENSIONS OFF)
set(CMAKE_CUDA_STANDARD 17)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
add_library(CudaRasterizer
cuda_rasterizer/backward.h
cuda_rasterizer/backward.cu
cuda_rasterizer/forward.h
cuda_rasterizer/forward.cu
cuda_rasterizer/auxiliary.h
cuda_rasterizer/rasterizer_impl.cu
cuda_rasterizer/rasterizer_impl.h
cuda_rasterizer/rasterizer.h
)
set_target_properties(CudaRasterizer PROPERTIES CUDA_ARCHITECTURES "75;86")
target_include_directories(CudaRasterizer PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/cuda_rasterizer)
target_include_directories(CudaRasterizer PRIVATE third_party/glm ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
#ifndef CUDA_RASTERIZER_AUXILIARY_H_INCLUDED
#define CUDA_RASTERIZER_AUXILIARY_H_INCLUDED
#include "config.h"
#define BLOCK_SIZE (BLOCK_X * BLOCK_Y)
#define NUM_WARPS (BLOCK_SIZE/32)
// Spherical harmonics coefficients
__device__ constexpr float SH_C0 = 0.28209479177387814f;
__device__ constexpr float SH_C1 = 0.4886025119029199f;
__device__ constexpr float SH_C2[] = {
1.0925484305920792f,
-1.0925484305920792f,
0.31539156525252005f,
-1.0925484305920792f,
0.5462742152960396f
};
__device__ constexpr float SH_C3[] = {
-0.5900435899266435f,
2.890611442640554f,
-0.4570457994644658f,
0.3731763325901154f,
-0.4570457994644658f,
1.445305721320277f,
-0.5900435899266435f
};
__forceinline __host__ __device__ float ndc2Pix(float v, int S)
{
return ((v + 1.0) * S - 1.0) * 0.5;
}
__forceinline __device__ void getRect(const float2 p, int max_radius, uint2& rect_min, uint2& rect_max, dim3 grid)
{
rect_min = {
min(grid.x, max((int)0, (int)((p.x - max_radius) / BLOCK_X))),
min(grid.y, max((int)0, (int)((p.y - max_radius) / BLOCK_Y)))
};
rect_max = {
min(grid.x, max((int)0, (int)((p.x + max_radius + BLOCK_X - 1) / BLOCK_X))),
min(grid.y, max((int)0, (int)((p.y + max_radius + BLOCK_Y - 1) / BLOCK_Y)))
};
}
__forceinline __device__ float3 transformPoint4x3(const float3& p, const float* matrix)
{
float3 transformed = {
matrix[0] * p.x + matrix[4] * p.y + matrix[8] * p.z + matrix[12],
matrix[1] * p.x + matrix[5] * p.y + matrix[9] * p.z + matrix[13],
matrix[2] * p.x + matrix[6] * p.y + matrix[10] * p.z + matrix[14],
};
return transformed;
}
__forceinline __device__ float4 transformPoint4x4(const float3& p, const float* matrix)
{
float4 transformed = {
matrix[0] * p.x + matrix[4] * p.y + matrix[8] * p.z + matrix[12],
matrix[1] * p.x + matrix[5] * p.y + matrix[9] * p.z + matrix[13],
matrix[2] * p.x + matrix[6] * p.y + matrix[10] * p.z + matrix[14],
matrix[3] * p.x + matrix[7] * p.y + matrix[11] * p.z + matrix[15]
};
return transformed;
}
__forceinline __device__ float3 transformVec4x3(const float3& p, const float* matrix)
{
float3 transformed = {
matrix[0] * p.x + matrix[4] * p.y + matrix[8] * p.z,
matrix[1] * p.x + matrix[5] * p.y + matrix[9] * p.z,
matrix[2] * p.x + matrix[6] * p.y + matrix[10] * p.z,
};
return transformed;
}
__forceinline __device__ float3 transformVec4x3Transpose(const float3& p, const float* matrix)
{
float3 transformed = {
matrix[0] * p.x + matrix[1] * p.y + matrix[2] * p.z,
matrix[4] * p.x + matrix[5] * p.y + matrix[6] * p.z,
matrix[8] * p.x + matrix[9] * p.y + matrix[10] * p.z,
};
return transformed;
}
__forceinline __device__ float dnormvdz(float3 v, float3 dv)
{
float sum2 = v.x * v.x + v.y * v.y + v.z * v.z;
float invsum32 = 1.0f / sqrt(sum2 * sum2 * sum2);
float dnormvdz = (-v.x * v.z * dv.x - v.y * v.z * dv.y + (sum2 - v.z * v.z) * dv.z) * invsum32;
return dnormvdz;
}
__forceinline __device__ float3 dnormvdv(float3 v, float3 dv)
{
float sum2 = v.x * v.x + v.y * v.y + v.z * v.z;
float invsum32 = 1.0f / sqrt(sum2 * sum2 * sum2);
float3 dnormvdv;
dnormvdv.x = ((+sum2 - v.x * v.x) * dv.x - v.y * v.x * dv.y - v.z * v.x * dv.z) * invsum32;
dnormvdv.y = (-v.x * v.y * dv.x + (sum2 - v.y * v.y) * dv.y - v.z * v.y * dv.z) * invsum32;
dnormvdv.z = (-v.x * v.z * dv.x - v.y * v.z * dv.y + (sum2 - v.z * v.z) * dv.z) * invsum32;
return dnormvdv;
}
__forceinline __device__ float4 dnormvdv(float4 v, float4 dv)
{
float sum2 = v.x * v.x + v.y * v.y + v.z * v.z + v.w * v.w;
float invsum32 = 1.0f / sqrt(sum2 * sum2 * sum2);
float4 vdv = { v.x * dv.x, v.y * dv.y, v.z * dv.z, v.w * dv.w };
float vdv_sum = vdv.x + vdv.y + vdv.z + vdv.w;
float4 dnormvdv;
dnormvdv.x = ((sum2 - v.x * v.x) * dv.x - v.x * (vdv_sum - vdv.x)) * invsum32;
dnormvdv.y = ((sum2 - v.y * v.y) * dv.y - v.y * (vdv_sum - vdv.y)) * invsum32;
dnormvdv.z = ((sum2 - v.z * v.z) * dv.z - v.z * (vdv_sum - vdv.z)) * invsum32;
dnormvdv.w = ((sum2 - v.w * v.w) * dv.w - v.w * (vdv_sum - vdv.w)) * invsum32;
return dnormvdv;
}
__forceinline __device__ float sigmoid(float x)
{
return 1.0f / (1.0f + expf(-x));
}
__forceinline __device__ bool in_frustum(int idx,
const float* orig_points,
const float* viewmatrix,
const float* projmatrix,
bool prefiltered,
float3& p_view)
{
float3 p_orig = { orig_points[3 * idx], orig_points[3 * idx + 1], orig_points[3 * idx + 2] };
// Bring points to screen space
float4 p_hom = transformPoint4x4(p_orig, projmatrix);
float p_w = 1.0f / (p_hom.w + 0.0000001f);
float3 p_proj = { p_hom.x * p_w, p_hom.y * p_w, p_hom.z * p_w };
p_view = transformPoint4x3(p_orig, viewmatrix);
if (p_view.z <= 0.2f || ((p_proj.x < -1.3 || p_proj.x > 1.3 || p_proj.y < -1.3 || p_proj.y > 1.3)))
{
if (prefiltered)
{
printf("Point is filtered although prefiltered is set. This shouldn't happen!");
__trap();
}
return false;
}
return true;
}
#endif
\ No newline at end of file
#ifndef CUDA_RASTERIZER_BACKWARD_H_INCLUDED
#define CUDA_RASTERIZER_BACKWARD_H_INCLUDED
#include <cuda.h>
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#define GLM_FORCE_CUDA
#include <glm/glm.hpp>
namespace BACKWARD
{
void render(
const dim3 grid, dim3 block,
const uint2* ranges,
const uint32_t* point_list,
int W, int H,
const float* bg_color,
const float2* means2D,
const float4* conic_opacity,
const float* colors,
const float* final_Ts,
const uint32_t* n_contrib,
const float* dL_dpixels,
float3* dL_dmean2D,
float4* dL_dconic2D,
float* dL_dopacity,
float* dL_dcolors);
void preprocess(
int P, int D, int M,
const float3* means,
const int* radii,
const float* shs,
const bool* clamped,
const glm::vec3* scales,
const glm::vec4* rotations,
const float scale_modifier,
const float* cov3Ds,
const float* view,
const float* proj,
const float focal_x, float focal_y,
const glm::vec3* campos,
const float3* dL_dmean2D,
const float* dL_dconics,
glm::vec3* dL_dmeans,
float* dL_dcolor,
float* dL_dcov3D,
float* dL_dsh,
glm::vec3* dL_dscale,
glm::vec4* dL_drot);
}
#endif
\ No newline at end of file
#ifndef CUDA_RASTERIZER_CONFIG_H_INCLUDED
#define CUDA_RASTERIZER_CONFIG_H_INCLUDED
#define NUM_CHANNELS 3 // Default 3, RGB
#define BLOCK_X 16
#define BLOCK_Y 16
#endif
\ No newline at end of file
#ifndef CUDA_RASTERIZER_FORWARD_H_INCLUDED
#define CUDA_RASTERIZER_FORWARD_H_INCLUDED
#include <cuda.h>
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#define GLM_FORCE_CUDA
#include <glm/glm.hpp>
namespace FORWARD
{
// Perform initial steps for each Gaussian prior to rasterization.
void preprocess(int P, int D, int M,
const float* orig_points,
const glm::vec3* scales,
const float scale_modifier,
const glm::vec4* rotations,
const float* opacities,
const float* shs,
bool* clamped,
const float* cov3D_precomp,
const float* colors_precomp,
const float* viewmatrix,
const float* projmatrix,
const glm::vec3* cam_pos,
const int W, int H,
const float tan_fovx, float tan_fovy,
const float focal_x, float focal_y,
int* radii,
float2* points_xy_image,
float* depths,
float* cov3Ds,
float* colors,
float4* conic_opacity,
const dim3 grid,
uint32_t* tiles_touched,
bool prefiltered);
// Main rasterization method.
void render(
const dim3 grid, dim3 block,
const uint2* ranges,
const uint32_t* point_list,
int W, int H,
const float2* points_xy_image,
const float* features,
const float4* conic_opacity,
float* final_T,
uint32_t* n_contrib,
const float* bg_color,
float* out_color);
}
#endif
\ No newline at end of file
#ifndef CUDA_RASTERIZER_H_INCLUDED
#define CUDA_RASTERIZER_H_INCLUDED
#include <vector>
namespace CudaRasterizer
{
class Rasterizer
{
public:
virtual void markVisible(
int P,
float* means3D,
float* viewmatrix,
float* projmatrix,
bool* present) = 0;
virtual void forward(
const int P, int D, int M,
const float* background,
const int width, int height,
const float* means3D,
const float* shs,
const float* colors_precomp,
const float* opacities,
const float* scales,
const float scale_modifier,
const float* rotations,
const float* cov3D_precomp,
const float* viewmatrix,
const float* projmatrix,
const float* cam_pos,
const float tan_fovx, float tan_fovy,
const bool prefiltered,
float* out_color,
int* radii = nullptr) = 0;
virtual void backward(
const int P, int D, int M,
const float* background,
const int width, int height,
const float* means3D,
const float* shs,
const float* colors_precomp,
const float* scales,
const float scale_modifier,
const float* rotations,
const float* cov3D_precomp,
const float* viewmatrix,
const float* projmatrix,
const float* campos,
const float tan_fovx, float tan_fovy,
const int* radii,
const float* dL_dpix,
float* dL_dmean2D,
float* dL_dconic,
float* dL_dopacity,
float* dL_dcolor,
float* dL_dmean3D,
float* dL_dcov3D,
float* dL_dsh,
float* dL_dscale,
float* dL_drot) = 0;
virtual ~Rasterizer() {};
static Rasterizer* make(int resizeMultipliyer = 2);
};
};
#endif
\ No newline at end of file
#pragma once
#include <iostream>
#include <vector>
#include "rasterizer.h"
#include <cuda_runtime_api.h>
#include <thrust/device_vector.h>
namespace CudaRasterizer
{
class RasterizerImpl : public Rasterizer
{
private:
int maxP = 0;
int maxPixels = 0;
int resizeMultiplier = 2;
// Initial aux structs
size_t sorting_size;
size_t list_sorting_size;
size_t scan_size;
thrust::device_vector<float> depths;
thrust::device_vector<uint32_t> tiles_touched;
thrust::device_vector<uint32_t> point_offsets;
thrust::device_vector<uint64_t> point_list_keys_unsorted;
thrust::device_vector<uint64_t> point_list_keys;
thrust::device_vector<uint32_t> point_list_unsorted;
thrust::device_vector<uint32_t> point_list;
thrust::device_vector<char> scanning_space;
thrust::device_vector<char> list_sorting_space;
thrust::device_vector<bool> clamped;
thrust::device_vector<int> internal_radii;
// Internal state kept across forward / backward
thrust::device_vector<uint2> ranges;
thrust::device_vector<uint32_t> n_contrib;
thrust::device_vector<float> accum_alpha;
thrust::device_vector<float2> means2D;
thrust::device_vector<float> cov3D;
thrust::device_vector<float4> conic_opacity;
thrust::device_vector<float> rgb;
public:
virtual void markVisible(
int P,
float* means3D,
float* viewmatrix,
float* projmatrix,
bool* present) override;
virtual void forward(
const int P, int D, int M,
const float* background,
const int width, int height,
const float* means3D,
const float* shs,
const float* colors_precomp,
const float* opacities,
const float* scales,
const float scale_modifier,
const float* rotations,
const float* cov3D_precomp,
const float* viewmatrix,
const float* projmatrix,
const float* cam_pos,
const float tan_fovx, float tan_fovy,
const bool prefiltered,
float* out_color,
int* radii) override;
virtual void backward(
const int P, int D, int M,
const float* background,
const int width, int height,
const float* means3D,
const float* shs,
const float* colors_precomp,
const float* scales,
const float scale_modifier,
const float* rotations,
const float* cov3D_precomp,
const float* viewmatrix,
const float* projmatrix,
const float* campos,
const float tan_fovx, float tan_fovy,
const int* radii,
const float* dL_dpix,
float* dL_dmean2D,
float* dL_dconic,
float* dL_dopacity,
float* dL_dcolor,
float* dL_dmean3D,
float* dL_dcov3D,
float* dL_dsh,
float* dL_dscale,
float* dL_drot) override;
RasterizerImpl(int resizeMultiplier);
virtual ~RasterizerImpl() override;
};
};
\ No newline at end of file
#include <torch/extension.h>
#include "rasterize_points.h"
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("rasterize_gaussians", &RasterizeGaussiansCUDA);
m.def("rasterize_gaussians_backward", &RasterizeGaussiansBackwardCUDA);
m.def("mark_visible", &markVisible);
}
\ No newline at end of file
// Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.
#include <math.h>
#include <torch/extension.h>
#include <cstdio>
#include <sstream>
#include <iostream>
#include <tuple>
#include <stdio.h>
#include <cuda_runtime_api.h>
#include <memory>
#include "cuda_rasterizer/config.h"
#include "cuda_rasterizer/rasterizer.h"
#include <fstream>
#include <string>
static std::unique_ptr<CudaRasterizer::Rasterizer> cudaRenderer = nullptr;
std::tuple<torch::Tensor, torch::Tensor>
RasterizeGaussiansCUDA(
const torch::Tensor& background,
const torch::Tensor& means3D,
const torch::Tensor& colors,
const torch::Tensor& opacity,
const torch::Tensor& scales,
const torch::Tensor& rotations,
const float scale_modifier,
const torch::Tensor& cov3D_precomp,
const torch::Tensor& viewmatrix,
const torch::Tensor& projmatrix,
const float tan_fovx,
const float tan_fovy,
const int image_height,
const int image_width,
const torch::Tensor& sh,
const int degree,
const torch::Tensor& campos,
const bool prefiltered)
{
if (means3D.ndimension() != 2 || means3D.size(1) != 3) {
AT_ERROR("means3D must have dimensions (num_points, 3)");
}
if (cudaRenderer == nullptr)
{
cudaRenderer = std::unique_ptr<CudaRasterizer::Rasterizer>(CudaRasterizer::Rasterizer::make());
}
const int P = means3D.size(0);
const int N = 1; // batch size hard-coded
const int H = image_height;
const int W = image_width;
auto int_opts = means3D.options().dtype(torch::kInt32);
auto float_opts = means3D.options().dtype(torch::kFloat32);
torch::Tensor out_color = torch::full({N, NUM_CHANNELS, H, W}, 0.0, float_opts);
torch::Tensor radii = torch::full({P}, 0, means3D.options().dtype(torch::kInt32));
if(P != 0)
{
int M = 0;
if(sh.size(0) != 0)
{
M = sh.size(1);
}
cudaRenderer->forward(P, degree, M,
background.contiguous().data<float>(),
W, H,
means3D.contiguous().data<float>(),
sh.contiguous().data_ptr<float>(),
colors.contiguous().data<float>(),
opacity.contiguous().data<float>(),
scales.contiguous().data_ptr<float>(),
scale_modifier,
rotations.contiguous().data_ptr<float>(),
cov3D_precomp.contiguous().data<float>(),
viewmatrix.contiguous().data<float>(),
projmatrix.contiguous().data<float>(),
campos.contiguous().data<float>(),
tan_fovx,
tan_fovy,
prefiltered,
out_color.contiguous().data<float>(),
radii.contiguous().data<int>());
}
return std::make_tuple(out_color, radii);
}
std::tuple<torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor>
RasterizeGaussiansBackwardCUDA(
const torch::Tensor& background,
const torch::Tensor& means3D,
const torch::Tensor& radii,
const torch::Tensor& colors,
const torch::Tensor& scales,
const torch::Tensor& rotations,
const float scale_modifier,
const torch::Tensor& cov3D_precomp,
const torch::Tensor& viewmatrix,
const torch::Tensor& projmatrix,
const float tan_fovx,
const float tan_fovy,
const torch::Tensor& dL_dout_color,
const torch::Tensor& sh,
const int degree,
const torch::Tensor& campos)
{
const int P = means3D.size(0);
const int H = dL_dout_color.size(2);
const int W = dL_dout_color.size(3);
int M = 0;
if(sh.size(0) != 0)
{
M = sh.size(1);
}
torch::Tensor dL_dmeans3D = torch::zeros({P, 3}, means3D.options());
torch::Tensor dL_dmeans2D = torch::zeros({P, 3}, means3D.options());
torch::Tensor dL_dcolors = torch::zeros({P, NUM_CHANNELS}, means3D.options());
torch::Tensor dL_dconic = torch::zeros({P, 2, 2}, means3D.options());
torch::Tensor dL_dopacity = torch::zeros({P, 1}, means3D.options());
torch::Tensor dL_dcov3D = torch::zeros({P, 6}, means3D.options());
torch::Tensor dL_dsh = torch::zeros({P, M, 3}, means3D.options());
torch::Tensor dL_dscales = torch::zeros({P, 3}, means3D.options());
torch::Tensor dL_drotations = torch::zeros({P, 4}, means3D.options());
if(P != 0)
{
cudaRenderer->backward(P, degree, M,
background.contiguous().data<float>(),
W, H,
means3D.contiguous().data<float>(),
sh.contiguous().data<float>(),
colors.contiguous().data<float>(),
scales.data_ptr<float>(),
scale_modifier,
rotations.data_ptr<float>(),
cov3D_precomp.contiguous().data<float>(),
viewmatrix.contiguous().data<float>(),
projmatrix.contiguous().data<float>(),
campos.contiguous().data<float>(),
tan_fovx,
tan_fovy,
radii.contiguous().data<int>(),
dL_dout_color.contiguous().data<float>(),
dL_dmeans2D.contiguous().data<float>(),
dL_dconic.contiguous().data<float>(),
dL_dopacity.contiguous().data<float>(),
dL_dcolors.contiguous().data<float>(),
dL_dmeans3D.contiguous().data<float>(),
dL_dcov3D.contiguous().data<float>(),
dL_dsh.contiguous().data<float>(),
dL_dscales.contiguous().data<float>(),
dL_drotations.contiguous().data<float>());
}
return std::make_tuple(dL_dmeans2D, dL_dcolors, dL_dopacity, dL_dmeans3D, dL_dcov3D, dL_dsh, dL_dscales, dL_drotations);
}
torch::Tensor markVisible(
torch::Tensor& means3D,
torch::Tensor& viewmatrix,
torch::Tensor& projmatrix)
{
if (cudaRenderer == nullptr)
{
cudaRenderer = std::unique_ptr<CudaRasterizer::Rasterizer>(CudaRasterizer::Rasterizer::make());
}
const int P = means3D.size(0);
torch::Tensor present = torch::full({P}, false, means3D.options().dtype(at::kBool));
if(P != 0)
{
cudaRenderer->markVisible(P,
means3D.contiguous().data<float>(),
viewmatrix.contiguous().data<float>(),
projmatrix.contiguous().data<float>(),
present.contiguous().data<bool>());
}
return present;
}
\ No newline at end of file
// Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.
#pragma once
#include <torch/extension.h>
#include <cstdio>
#include <tuple>
#include <string>
std::tuple<torch::Tensor, torch::Tensor>
RasterizeGaussiansCUDA(
const torch::Tensor& background,
const torch::Tensor& means3D,
const torch::Tensor& colors,
const torch::Tensor& opacity,
const torch::Tensor& scales,
const torch::Tensor& rotations,
const float scale_modifier,
const torch::Tensor& cov3D_precomp,
const torch::Tensor& viewmatrix,
const torch::Tensor& projmatrix,
const float tan_fovx,
const float tan_fovy,
const int image_height,
const int image_width,
const torch::Tensor& sh,
const int degree,
const torch::Tensor& campos,
const bool prefiltered);
std::tuple<torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor>
RasterizeGaussiansBackwardCUDA(
const torch::Tensor& background,
const torch::Tensor& means3D,
const torch::Tensor& radii,
const torch::Tensor& colors,
const torch::Tensor& scales,
const torch::Tensor& rotations,
const float scale_modifier,
const torch::Tensor& cov3D_precomp,
const torch::Tensor& viewmatrix,
const torch::Tensor& projmatrix,
const float tan_fovx,
const float tan_fovy,
const torch::Tensor& dL_dout_color,
const torch::Tensor& sh,
const int degree,
const torch::Tensor& campos);
torch::Tensor markVisible(
torch::Tensor& means3D,
torch::Tensor& viewmatrix,
torch::Tensor& projmatrix);
\ No newline at end of file
from typing import NamedTuple
import torch.nn as nn
import torch
import utils.profiling_utils as profiling_utils
from diff_rasterizationCUDA import _C
def rasterize_gaussians(
means3D,
means2D,
sh,
colors_precomp,
opacities,
scales,
rotations,
cov3Ds_precomp,
raster_settings,
):
return _RasterizeGaussians.apply(
means3D,
means2D,
sh,
colors_precomp,
opacities,
scales,
rotations,
cov3Ds_precomp,
raster_settings,
)
class _RasterizeGaussians(torch.autograd.Function):
@staticmethod
def forward(
ctx,
means3D,
means2D,
sh,
colors_precomp,
opacities,
scales,
rotations,
cov3Ds_precomp,
raster_settings,
):
# Restructure arguments the way that the C++ lib expects them
args = (
raster_settings.bg,
means3D,
colors_precomp,
opacities,
scales,
rotations,
raster_settings.scale_modifier,
cov3Ds_precomp,
raster_settings.viewmatrix,
raster_settings.projmatrix,
raster_settings.tanfovx,
raster_settings.tanfovy,
raster_settings.image_height,
raster_settings.image_width,
sh,
raster_settings.sh_degree,
raster_settings.campos,
raster_settings.prefiltered,
)
# Invoke C++/CUDA rasterizer
color, radii = _C.rasterize_gaussians(*args)
# Keep relevant tensors for backward
ctx.raster_settings = raster_settings
ctx.save_for_backward(colors_precomp, means3D, scales, rotations, cov3Ds_precomp, radii, sh)
return color, radii
@staticmethod
def backward(ctx, grad_out_color, _):
# Restore necessary values from context
raster_settings = ctx.raster_settings
colors_precomp, means3D, scales, rotations, cov3Ds_precomp, radii, sh = ctx.saved_tensors
# Restructure args as C++ method expects them
args = (raster_settings.bg,
means3D,
radii,
colors_precomp,
scales,
rotations,
raster_settings.scale_modifier,
cov3Ds_precomp,
raster_settings.viewmatrix,
raster_settings.projmatrix,
raster_settings.tanfovx,
raster_settings.tanfovy,
grad_out_color,
sh,
raster_settings.sh_degree,
raster_settings.campos)
back_rng = profiling_utils.start("rasterize", "yellow", "render" )
# Compute gradients for relevant tensors by invoking backward method
grad_means2D, grad_colors_precomp, grad_opacities, grad_means3D, grad_cov3Ds_precomp, grad_sh, grad_scales, grad_rotations = _C.rasterize_gaussians_backward(*args)
profiling_utils.stop(back_rng)
grads = (
grad_means3D,
grad_means2D,
grad_sh,
grad_colors_precomp,
grad_opacities,
grad_scales,
grad_rotations,
grad_cov3Ds_precomp,
None,
)
return grads
class GaussianRasterizationSettings(NamedTuple):
image_height: int
image_width: int
tanfovx : float
tanfovy : float
bg : torch.Tensor
scale_modifier : float
viewmatrix : torch.Tensor
projmatrix : torch.Tensor
sh_degree : int
campos : torch.Tensor
prefiltered : bool
class GaussianRasterizer(nn.Module):
def __init__(self, raster_settings):
super().__init__()
self.raster_settings = raster_settings
def markVisible(self, positions):
# Mark visible points (based on frustum culling for camera) with a boolean
with torch.no_grad():
raster_settings = self.raster_settings
visible = _C.mark_visible(
positions,
raster_settings.viewmatrix,
raster_settings.projmatrix)
return visible
def forward(self, means3D, means2D, opacities, shs = None, colors_precomp = None, scales = None, rotations = None, cov3D_precomp = None):
raster_settings = self.raster_settings
if (shs is None and colors_precomp is None) or (shs is not None and colors_precomp is not None):
raise Exception('Please provide excatly one of either SHs or precomputed colors!')
if ((scales is None or rotations is None) and cov3D_precomp is None) or ((scales is not None or rotations is not None) and cov3D_precomp is not None):
raise Exception('Please provide exactly one of either scale/rotation pair or precomputed 3D covariance!')
if shs is None:
shs = torch.Tensor([])
if colors_precomp is None:
colors_precomp = torch.Tensor([])
if scales is None:
scales = torch.Tensor([])
if rotations is None:
rotations = torch.Tensor([])
if cov3D_precomp is None:
cov3D_precomp = torch.Tensor([])
# Invoke C++/CUDA rasterization routine
return rasterize_gaussians(
means3D,
means2D,
shs,
colors_precomp,
opacities,
scales,
rotations,
cov3D_precomp,
raster_settings,
)
from setuptools import setup
from torch.utils.cpp_extension import CUDAExtension, BuildExtension
import os
os.path.dirname(os.path.abspath(__file__))
setup(
name="diff_rasterizationCUDA",
ext_modules=[
CUDAExtension(
name="diff_rasterizationCUDA._C",
sources=[
"rasterize_points.cu",
"cuda_rasterizer/rasterizer_impl.cu",
"cuda_rasterizer/forward.cu",
"cuda_rasterizer/backward.cu",
"ext.cpp"],
extra_compile_args={"nvcc": ["-I" + os.path.join(os.path.dirname(os.path.abspath(__file__)), "third_party/glm/")],
"cxx": ["/wd4624"]})
],
cmdclass={
'build_ext': BuildExtension
}
)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment