summaryrefslogtreecommitdiffstats
path: root/cuda
diff options
context:
space:
mode:
authorWillem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>2021-11-16 10:21:35 +0100
committerWillem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>2021-11-16 10:21:35 +0100
commit231dd3e5e28319aa16155efd9ec7fdc69834666b (patch)
tree5b061e199bc3b7b5e39e48c8fbfa79eb5c5b906d /cuda
parent295ad96290c78a57ef982100014b1242a89d915c (diff)
downloadastra-231dd3e5e28319aa16155efd9ec7fdc69834666b.tar.gz
astra-231dd3e5e28319aa16155efd9ec7fdc69834666b.tar.bz2
astra-231dd3e5e28319aa16155efd9ec7fdc69834666b.tar.xz
astra-231dd3e5e28319aa16155efd9ec7fdc69834666b.zip
Add specialization to BP3D kernels for single slice volumes
Diffstat (limited to 'cuda')
-rw-r--r--cuda/3d/cone_bp.cu23
-rw-r--r--cuda/3d/par3d_bp.cu14
2 files changed, 24 insertions, 13 deletions
diff --git a/cuda/3d/cone_bp.cu b/cuda/3d/cone_bp.cu
index 4f01d25..7c3fc8d 100644
--- a/cuda/3d/cone_bp.cu
+++ b/cuda/3d/cone_bp.cu
@@ -41,8 +41,7 @@ static texture3D gT_coneProjTexture;
namespace astraCUDA3d {
-#define ZSIZE 6
-static const unsigned int g_volBlockZ = ZSIZE;
+static const unsigned int g_volBlockZ = 6;
static const unsigned int g_anglesPerBlock = 32;
static const unsigned int g_volBlockX = 16;
@@ -77,7 +76,7 @@ bool bindProjDataTexture(const cudaArray* array)
//__launch_bounds__(32*16, 4)
-template<bool FDKWEIGHT>
+template<bool FDKWEIGHT, unsigned int ZSIZE>
__global__ void dev_cone_BP(void* D_volData, unsigned int volPitch, int startAngle,
int angleOffset, const astraCUDA3d::SDimensions3D dims,
float fOutputScale)
@@ -342,11 +341,19 @@ bool ConeBP_Array(cudaPitchedPtr D_volumeData,
for (unsigned int i = 0; i < angleCount; i += g_anglesPerBlock) {
// printf("Calling BP: %d, %dx%d, %dx%d to %p\n", i, dimBlock.x, dimBlock.y, dimGrid.x, dimGrid.y, (void*)D_volumeData.ptr);
- if (params.bFDKWeighting)
- dev_cone_BP<true><<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale);
- else if (params.iRaysPerVoxelDim == 1)
- dev_cone_BP<false><<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale);
- else
+ if (params.bFDKWeighting) {
+ if (dims.iVolZ == 1) {
+ dev_cone_BP<true, 1><<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale);
+ } else {
+ dev_cone_BP<true, g_volBlockZ><<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale);
+ }
+ } else if (params.iRaysPerVoxelDim == 1) {
+ if (dims.iVolZ == 1) {
+ dev_cone_BP<false, 1><<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale);
+ } else {
+ dev_cone_BP<false, g_volBlockZ><<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale);
+ }
+ } else
dev_cone_BP_SS<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, params.iRaysPerVoxelDim, fOutputScale);
}
diff --git a/cuda/3d/par3d_bp.cu b/cuda/3d/par3d_bp.cu
index 004fc99..d356b9f 100644
--- a/cuda/3d/par3d_bp.cu
+++ b/cuda/3d/par3d_bp.cu
@@ -41,8 +41,7 @@ static texture3D gT_par3DProjTexture;
namespace astraCUDA3d {
-#define ZSIZE 6
-static const unsigned int g_volBlockZ = ZSIZE;
+static const unsigned int g_volBlockZ = 6;
static const unsigned int g_anglesPerBlock = 32;
static const unsigned int g_volBlockX = 16;
@@ -77,6 +76,7 @@ static bool bindProjDataTexture(const cudaArray* array)
}
+template<unsigned int ZSIZE>
__global__ void dev_par3D_BP(void* D_volData, unsigned int volPitch, int startAngle, int angleOffset, const SDimensions3D dims, float fOutputScale)
{
float* volData = (float*)D_volData;
@@ -281,9 +281,13 @@ bool Par3DBP_Array(cudaPitchedPtr D_volumeData,
for (unsigned int i = 0; i < angleCount; i += g_anglesPerBlock) {
// printf("Calling BP: %d, %dx%d, %dx%d to %p\n", i, dimBlock.x, dimBlock.y, dimGrid.x, dimGrid.y, (void*)D_volumeData.ptr);
- if (params.iRaysPerVoxelDim == 1)
- dev_par3D_BP<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale);
- else
+ if (params.iRaysPerVoxelDim == 1) {
+ if (dims.iVolZ == 1) {
+ dev_par3D_BP<1><<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale);
+ } else {
+ dev_par3D_BP<g_volBlockZ><<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale);
+ }
+ } else
dev_par3D_BP_SS<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, params.iRaysPerVoxelDim, fOutputScale);
}