Commit a42fd590 by Bernhard Kerbl

Merge branch 'static_only'

parents 55b0c1b0 681c0f9e
......@@ -2,6 +2,7 @@
#define CUDA_RASTERIZER_H_INCLUDED
#include <vector>
#include <functional>
namespace CudaRasterizer
{
......@@ -9,14 +10,17 @@ namespace CudaRasterizer
{
public:
virtual void markVisible(
static void markVisible(
int P,
float* means3D,
float* viewmatrix,
float* projmatrix,
bool* present) = 0;
bool* present);
virtual void forward(
static int forward(
std::function<char* (size_t)> geometryBuffer,
std::function<char* (size_t)> binningBuffer,
std::function<char* (size_t)> imageBuffer,
const int P, int D, int M,
const float* background,
const int width, int height,
......@@ -34,10 +38,10 @@ namespace CudaRasterizer
const float tan_fovx, float tan_fovy,
const bool prefiltered,
float* out_color,
int* radii = nullptr) = 0;
int* radii);
virtual void backward(
const int P, int D, int M,
static void backward(
const int P, int D, int M, int R,
const float* background,
const int width, int height,
const float* means3D,
......@@ -47,11 +51,14 @@ namespace CudaRasterizer
const float scale_modifier,
const float* rotations,
const float* cov3D_precomp,
const float* viewmatrix,
const float* viewmatrix,
const float* projmatrix,
const float* campos,
const float tan_fovx, float tan_fovy,
const int* radii,
char* geom_buffer,
char* binning_buffer,
char* image_buffer,
const float* dL_dpix,
float* dL_dmean2D,
float* dL_dconic,
......@@ -61,11 +68,7 @@ namespace CudaRasterizer
float* dL_dcov3D,
float* dL_dsh,
float* dL_dscale,
float* dL_drot) = 0;
virtual ~Rasterizer() {};
static Rasterizer* make(int resizeMultipliyer = 2);
float* dL_drot);
};
};
......
......@@ -4,101 +4,60 @@
#include <vector>
#include "rasterizer.h"
#include <cuda_runtime_api.h>
#include <thrust/device_vector.h>
namespace CudaRasterizer
{
class RasterizerImpl : public Rasterizer
template <typename T>
static void obtain(char*& chunk, T*& ptr, std::size_t count, std::size_t alignment)
{
private:
int maxP = 0;
int maxPixels = 0;
int resizeMultiplier = 2;
std::size_t offset = (reinterpret_cast<std::uintptr_t>(chunk) + alignment - 1) & ~(alignment - 1);
ptr = reinterpret_cast<T*>(offset);
chunk = reinterpret_cast<char*>(ptr + count);
}
// Initial aux structs
size_t sorting_size;
size_t list_sorting_size;
struct GeometryState
{
size_t scan_size;
thrust::device_vector<float> depths;
thrust::device_vector<uint32_t> tiles_touched;
thrust::device_vector<uint32_t> point_offsets;
thrust::device_vector<uint64_t> point_list_keys_unsorted;
thrust::device_vector<uint64_t> point_list_keys;
thrust::device_vector<uint32_t> point_list_unsorted;
thrust::device_vector<uint32_t> point_list;
thrust::device_vector<char> scanning_space;
thrust::device_vector<char> list_sorting_space;
thrust::device_vector<bool> clamped;
thrust::device_vector<int> internal_radii;
// Internal state kept across forward / backward
thrust::device_vector<uint2> ranges;
thrust::device_vector<uint32_t> n_contrib;
thrust::device_vector<float> accum_alpha;
thrust::device_vector<float2> means2D;
thrust::device_vector<float> cov3D;
thrust::device_vector<float4> conic_opacity;
thrust::device_vector<float> rgb;
public:
virtual void markVisible(
int P,
float* means3D,
float* viewmatrix,
float* projmatrix,
bool* present) override;
float* depths;
char* scanning_space;
bool* clamped;
int* internal_radii;
float2* means2D;
float* cov3D;
float4* conic_opacity;
float* rgb;
uint32_t* point_offsets;
uint32_t* tiles_touched;
static GeometryState fromChunk(char*& chunk, size_t P);
};
virtual void forward(
const int P, int D, int M,
const float* background,
const int width, int height,
const float* means3D,
const float* shs,
const float* colors_precomp,
const float* opacities,
const float* scales,
const float scale_modifier,
const float* rotations,
const float* cov3D_precomp,
const float* viewmatrix,
const float* projmatrix,
const float* cam_pos,
const float tan_fovx, float tan_fovy,
const bool prefiltered,
float* out_color,
int* radii) override;
struct ImageState
{
uint2* ranges;
uint32_t* n_contrib;
float* accum_alpha;
virtual void backward(
const int P, int D, int M,
const float* background,
const int width, int height,
const float* means3D,
const float* shs,
const float* colors_precomp,
const float* scales,
const float scale_modifier,
const float* rotations,
const float* cov3D_precomp,
const float* viewmatrix,
const float* projmatrix,
const float* campos,
const float tan_fovx, float tan_fovy,
const int* radii,
const float* dL_dpix,
float* dL_dmean2D,
float* dL_dconic,
float* dL_dopacity,
float* dL_dcolor,
float* dL_dmean3D,
float* dL_dcov3D,
float* dL_dsh,
float* dL_dscale,
float* dL_drot) override;
static ImageState fromChunk(char*& chunk, size_t N);
};
RasterizerImpl(int resizeMultiplier);
struct BinningState
{
size_t sorting_size;
uint64_t* point_list_keys_unsorted;
uint64_t* point_list_keys;
uint32_t* point_list_unsorted;
uint32_t* point_list;
char* list_sorting_space;
virtual ~RasterizerImpl() override;
static BinningState fromChunk(char*& chunk, size_t P);
};
template<typename T>
size_t required(size_t P)
{
char* size = nullptr;
T::fromChunk(size, P);
return ((size_t)size) + 128;
}
};
\ No newline at end of file
......@@ -64,19 +64,21 @@ class _RasterizeGaussians(torch.autograd.Function):
)
# Invoke C++/CUDA rasterizer
color, radii = _C.rasterize_gaussians(*args)
num_rendered, color, radii, geomBuffer, binningBuffer, imgBuffer = _C.rasterize_gaussians(*args)
# Keep relevant tensors for backward
ctx.raster_settings = raster_settings
ctx.save_for_backward(colors_precomp, means3D, scales, rotations, cov3Ds_precomp, radii, sh)
ctx.num_rendered = num_rendered
ctx.save_for_backward(colors_precomp, means3D, scales, rotations, cov3Ds_precomp, radii, sh, geomBuffer, binningBuffer, imgBuffer)
return color, radii
@staticmethod
def backward(ctx, grad_out_color, _):
# Restore necessary values from context
num_rendered = ctx.num_rendered
raster_settings = ctx.raster_settings
colors_precomp, means3D, scales, rotations, cov3Ds_precomp, radii, sh = ctx.saved_tensors
colors_precomp, means3D, scales, rotations, cov3Ds_precomp, radii, sh, geomBuffer, binningBuffer, imgBuffer = ctx.saved_tensors
# Restructure args as C++ method expects them
args = (raster_settings.bg,
......@@ -94,7 +96,11 @@ class _RasterizeGaussians(torch.autograd.Function):
grad_out_color,
sh,
raster_settings.sh_degree,
raster_settings.campos)
raster_settings.campos,
geomBuffer,
num_rendered,
binningBuffer,
imgBuffer)
# Compute gradients for relevant tensors by invoking backward method
grad_means2D, grad_colors_precomp, grad_opacities, grad_means3D, grad_cov3Ds_precomp, grad_sh, grad_scales, grad_rotations = _C.rasterize_gaussians_backward(*args)
......
......@@ -13,10 +13,17 @@
#include "cuda_rasterizer/rasterizer.h"
#include <fstream>
#include <string>
#include <functional>
static std::unique_ptr<CudaRasterizer::Rasterizer> cudaRenderer = nullptr;
std::function<char*(size_t N)> resizeFunctional(torch::Tensor& t) {
auto lambda = [&t](size_t N) {
t.resize_({(long long)N});
return reinterpret_cast<char*>(t.contiguous().data_ptr());
};
return lambda;
}
std::tuple<torch::Tensor, torch::Tensor>
std::tuple<int, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor>
RasterizeGaussiansCUDA(
const torch::Tensor& background,
const torch::Tensor& means3D,
......@@ -37,16 +44,10 @@ RasterizeGaussiansCUDA(
const torch::Tensor& campos,
const bool prefiltered)
{
if (means3D.ndimension() != 2 || means3D.size(1) != 3) {
AT_ERROR("means3D must have dimensions (num_points, 3)");
}
if (cudaRenderer == nullptr)
{
cudaRenderer = std::unique_ptr<CudaRasterizer::Rasterizer>(CudaRasterizer::Rasterizer::make());
}
const int P = means3D.size(0);
const int N = 1; // batch size hard-coded
const int H = image_height;
......@@ -57,7 +58,17 @@ RasterizeGaussiansCUDA(
torch::Tensor out_color = torch::full({N, NUM_CHANNELS, H, W}, 0.0, float_opts);
torch::Tensor radii = torch::full({P}, 0, means3D.options().dtype(torch::kInt32));
torch::Device device(torch::kCUDA);
torch::TensorOptions options(torch::kByte);
torch::Tensor geomBuffer = torch::empty({0}, options.device(device));
torch::Tensor binningBuffer = torch::empty({0}, options.device(device));
torch::Tensor imgBuffer = torch::empty({0}, options.device(device));
std::function<char*(size_t)> geomFunc = resizeFunctional(geomBuffer);
std::function<char*(size_t)> binningFunc = resizeFunctional(binningBuffer);
std::function<char*(size_t)> imgFunc = resizeFunctional(imgBuffer);
int rendered = 0;
if(P != 0)
{
int M = 0;
......@@ -66,7 +77,11 @@ RasterizeGaussiansCUDA(
M = sh.size(1);
}
cudaRenderer->forward(P, degree, M,
rendered = CudaRasterizer::Rasterizer::forward(
geomFunc,
binningFunc,
imgFunc,
P, degree, M,
background.contiguous().data<float>(),
W, H,
means3D.contiguous().data<float>(),
......@@ -86,7 +101,7 @@ RasterizeGaussiansCUDA(
out_color.contiguous().data<float>(),
radii.contiguous().data<int>());
}
return std::make_tuple(out_color, radii);
return std::make_tuple(rendered, out_color, radii, geomBuffer, binningBuffer, imgBuffer);
}
std::tuple<torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor>
......@@ -106,7 +121,11 @@ std::tuple<torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Te
const torch::Tensor& dL_dout_color,
const torch::Tensor& sh,
const int degree,
const torch::Tensor& campos)
const torch::Tensor& campos,
const torch::Tensor& geomBuffer,
const int R,
const torch::Tensor& binningBuffer,
const torch::Tensor& imageBuffer)
{
const int P = means3D.size(0);
const int H = dL_dout_color.size(2);
......@@ -130,7 +149,7 @@ std::tuple<torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Te
if(P != 0)
{
cudaRenderer->backward(P, degree, M,
CudaRasterizer::Rasterizer::backward(P, degree, M, R,
background.contiguous().data<float>(),
W, H,
means3D.contiguous().data<float>(),
......@@ -146,6 +165,9 @@ std::tuple<torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Te
tan_fovx,
tan_fovy,
radii.contiguous().data<int>(),
reinterpret_cast<char*>(geomBuffer.contiguous().data_ptr()),
reinterpret_cast<char*>(binningBuffer.contiguous().data_ptr()),
reinterpret_cast<char*>(imageBuffer.contiguous().data_ptr()),
dL_dout_color.contiguous().data<float>(),
dL_dmeans2D.contiguous().data<float>(),
dL_dconic.contiguous().data<float>(),
......@@ -166,18 +188,13 @@ torch::Tensor markVisible(
torch::Tensor& viewmatrix,
torch::Tensor& projmatrix)
{
if (cudaRenderer == nullptr)
{
cudaRenderer = std::unique_ptr<CudaRasterizer::Rasterizer>(CudaRasterizer::Rasterizer::make());
}
const int P = means3D.size(0);
torch::Tensor present = torch::full({P}, false, means3D.options().dtype(at::kBool));
if(P != 0)
{
cudaRenderer->markVisible(P,
CudaRasterizer::Rasterizer::markVisible(P,
means3D.contiguous().data<float>(),
viewmatrix.contiguous().data<float>(),
projmatrix.contiguous().data<float>(),
......
......@@ -6,7 +6,7 @@
#include <tuple>
#include <string>
std::tuple<torch::Tensor, torch::Tensor>
std::tuple<int, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor>
RasterizeGaussiansCUDA(
const torch::Tensor& background,
const torch::Tensor& means3D,
......@@ -44,7 +44,11 @@ std::tuple<torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Te
const torch::Tensor& dL_dout_color,
const torch::Tensor& sh,
const int degree,
const torch::Tensor& campos);
const torch::Tensor& campos,
const torch::Tensor& geomBuffer,
const int R,
const torch::Tensor& binningBuffer,
const torch::Tensor& imageBuffer);
torch::Tensor markVisible(
torch::Tensor& means3D,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment