summaryrefslogtreecommitdiffstats
path: root/cuda
diff options
context:
space:
mode:
authorWillem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>2018-10-23 10:19:32 +0200
committerWillem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>2018-11-01 12:09:35 +0100
commit9c4222b4cab810815a0adee0302501471da177aa (patch)
tree6a8876055f5edbe37d220653f7fca85b9a1df8d1 /cuda
parentf84f0c623cd2652fcf80ec47cb287cc29fd430c5 (diff)
downloadastra-9c4222b4cab810815a0adee0302501471da177aa.tar.gz
astra-9c4222b4cab810815a0adee0302501471da177aa.tar.bz2
astra-9c4222b4cab810815a0adee0302501471da177aa.tar.xz
astra-9c4222b4cab810815a0adee0302501471da177aa.zip
Add minimal GPU Array interface
This extension (only) allows creating a CUDA 3D array, copying projection data into it, performing a BP from the array, and freeing the array.
Diffstat (limited to 'cuda')
-rw-r--r--cuda/3d/astra3d.cu45
-rw-r--r--cuda/3d/mem3d.cu95
-rw-r--r--cuda/3d/util3d.cu35
3 files changed, 167 insertions, 8 deletions
diff --git a/cuda/3d/astra3d.cu b/cuda/3d/astra3d.cu
index 23af36a..be258be 100644
--- a/cuda/3d/astra3d.cu
+++ b/cuda/3d/astra3d.cu
@@ -35,12 +35,15 @@ along with the ASTRA Toolbox. If not, see <http://www.gnu.org/licenses/>.
#include "astra/cuda/3d/fdk.h"
#include "astra/cuda/3d/arith3d.h"
#include "astra/cuda/3d/astra3d.h"
+#include "astra/cuda/3d/mem3d.h"
#include "astra/ParallelProjectionGeometry3D.h"
#include "astra/ParallelVecProjectionGeometry3D.h"
#include "astra/ConeProjectionGeometry3D.h"
#include "astra/ConeVecProjectionGeometry3D.h"
#include "astra/VolumeGeometry3D.h"
+#include "astra/Float32ProjectionData3DGPU.h"
+#include "astra/Logging.h"
#include <iostream>
#include <cstdio>
@@ -1317,4 +1320,46 @@ bool astraCudaBP_SIRTWeighted(float* pfVolume,
}
+_AstraExport void uploadMultipleProjections(CFloat32ProjectionData3DGPU *proj,
+ const float *data,
+ unsigned int y_min, unsigned int y_max)
+{
+ astraCUDA3d::MemHandle3D hnd = proj->getHandle();
+
+ astraCUDA3d::SDimensions3D dims1;
+ dims1.iProjU = proj->getDetectorColCount();
+ dims1.iProjV = proj->getDetectorRowCount();
+ dims1.iProjAngles = y_max - y_min + 1;
+
+ cudaPitchedPtr D_proj = allocateProjectionData(dims1);
+ bool ok = copyProjectionsToDevice(data, D_proj, dims1);
+ if (!ok)
+ ASTRA_ERROR("Failed to upload projection to GPU");
+
+ astraCUDA3d::MemHandle3D hnd1 = astraCUDA3d::wrapHandle(
+ (float *)D_proj.ptr,
+ dims1.iProjU, dims1.iProjAngles, dims1.iProjV,
+ D_proj.pitch / sizeof(float));
+
+ astraCUDA3d::SSubDimensions3D subdims;
+ subdims.nx = dims1.iProjU;
+ subdims.ny = proj->getAngleCount();
+ subdims.nz = dims1.iProjV;
+ subdims.pitch = D_proj.pitch / sizeof(float); // FIXME: Pitch for wrong obj!
+ subdims.subnx = dims1.iProjU;
+ subdims.subny = dims1.iProjAngles;
+ subdims.subnz = dims1.iProjV;
+ subdims.subx = 0;
+ subdims.suby = y_min;
+ subdims.subz = 0;
+
+ ok = astraCUDA3d::copyIntoArray(hnd, hnd1, subdims);
+ if (!ok)
+ ASTRA_ERROR("Failed to copy projection into 3d data");
+
+ cudaFree(D_proj.ptr);
+
+}
+
+
}
diff --git a/cuda/3d/mem3d.cu b/cuda/3d/mem3d.cu
index fe3e723..697d2d2 100644
--- a/cuda/3d/mem3d.cu
+++ b/cuda/3d/mem3d.cu
@@ -49,6 +49,7 @@ namespace astraCUDA3d {
struct SMemHandle3D_internal
{
cudaPitchedPtr ptr;
+ cudaArray *arr;
unsigned int nx;
unsigned int ny;
unsigned int nz;
@@ -79,6 +80,7 @@ MemHandle3D allocateGPUMemory(unsigned int x, unsigned int y, unsigned int z, Me
hnd.nx = x;
hnd.ny = y;
hnd.nz = z;
+ hnd.arr = 0;
size_t free = astraCUDA::availableGPUMemory();
@@ -113,6 +115,7 @@ MemHandle3D allocateGPUMemory(unsigned int x, unsigned int y, unsigned int z, Me
bool zeroGPUMemory(MemHandle3D handle, unsigned int x, unsigned int y, unsigned int z)
{
SMemHandle3D_internal& hnd = *handle.d.get();
+ assert(!hnd.arr);
cudaError_t err = cudaMemset3D(hnd.ptr, 0, make_cudaExtent(sizeof(float)*x, y, z));
return err == cudaSuccess;
}
@@ -120,7 +123,11 @@ bool zeroGPUMemory(MemHandle3D handle, unsigned int x, unsigned int y, unsigned
bool freeGPUMemory(MemHandle3D handle)
{
size_t free = astraCUDA::availableGPUMemory();
- cudaError_t err = cudaFree(handle.d->ptr.ptr);
+ cudaError_t err;
+ if (handle.d->arr)
+ err = cudaFreeArray(handle.d->arr);
+ else
+ err = cudaFree(handle.d->ptr.ptr);
size_t free2 = astraCUDA::availableGPUMemory();
ASTRA_DEBUG("Freeing memory. (Pre: %lu, post: %lu)", free, free2);
@@ -132,6 +139,7 @@ bool copyToGPUMemory(const float *src, MemHandle3D dst, const SSubDimensions3D &
{
ASTRA_DEBUG("Copying %d x %d x %d to GPU", pos.subnx, pos.subny, pos.subnz);
ASTRA_DEBUG("Offset %d,%d,%d", pos.subx, pos.suby, pos.subz);
+ assert(!dst.d->arr);
cudaPitchedPtr s;
s.ptr = (void*)src; // const cast away
s.pitch = pos.pitch * sizeof(float);
@@ -162,6 +170,7 @@ bool copyFromGPUMemory(float *dst, MemHandle3D src, const SSubDimensions3D &pos)
{
ASTRA_DEBUG("Copying %d x %d x %d from GPU", pos.subnx, pos.subny, pos.subnz);
ASTRA_DEBUG("Offset %d,%d,%d", pos.subx, pos.suby, pos.subz);
+ assert(!src.d->arr);
cudaPitchedPtr d;
d.ptr = (void*)dst;
d.pitch = pos.pitch * sizeof(float);
@@ -170,15 +179,21 @@ bool copyFromGPUMemory(float *dst, MemHandle3D src, const SSubDimensions3D &pos)
ASTRA_DEBUG("Pitch %d, xsize %d, ysize %d", d.pitch, d.xsize, d.ysize);
cudaMemcpy3DParms p;
- p.srcArray = 0;
p.srcPos = make_cudaPos(0, 0, 0);
- p.srcPtr = src.d->ptr;
p.dstArray = 0;
p.dstPos = make_cudaPos(pos.subx * sizeof(float), pos.suby, pos.subz);
p.dstPtr = d;
- p.extent = make_cudaExtent(pos.subnx * sizeof(float), pos.subny, pos.subnz);
+ if (src.d->ptr.ptr) {
+ p.srcArray = 0;
+ p.srcPtr = src.d->ptr;
+ p.extent = make_cudaExtent(pos.subnx * sizeof(float), pos.subny, pos.subnz);
+ } else {
+ p.srcArray = src.d->arr;
+ p.srcPtr.ptr = 0;
+ p.extent = make_cudaExtent(pos.subnx, pos.subny, pos.subnz);
+ }
p.kind = cudaMemcpyDeviceToHost;
@@ -191,6 +206,8 @@ bool copyFromGPUMemory(float *dst, MemHandle3D src, const SSubDimensions3D &pos)
bool FP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, const astra::CVolumeGeometry3D* pVolGeom, MemHandle3D volData, int iDetectorSuperSampling, astra::Cuda3DProjectionKernel projKernel)
{
+ assert(!projData.d->arr);
+ assert(!volData.d->arr);
SDimensions3D dims;
SProjectorParams3D params;
@@ -253,6 +270,7 @@ bool FP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, con
bool BP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, const astra::CVolumeGeometry3D* pVolGeom, MemHandle3D volData, int iVoxelSuperSampling, bool bFDKWeighting)
{
+ assert(!volData.d->arr);
SDimensions3D dims;
SProjectorParams3D params;
@@ -273,10 +291,17 @@ bool BP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, con
params.bFDKWeighting = bFDKWeighting;
- if (pParProjs)
- ok &= Par3DBP(volData.d->ptr, projData.d->ptr, dims, pParProjs, params);
- else
- ok &= ConeBP(volData.d->ptr, projData.d->ptr, dims, pConeProjs, params);
+ if (pParProjs) {
+ if (projData.d->arr)
+ ok &= Par3DBP_Array(volData.d->ptr, projData.d->arr, dims, pParProjs, params);
+ else
+ ok &= Par3DBP(volData.d->ptr, projData.d->ptr, dims, pParProjs, params);
+ } else {
+ if (projData.d->arr)
+ ok &= ConeBP_Array(volData.d->ptr, projData.d->arr, dims, pConeProjs, params);
+ else
+ ok &= ConeBP(volData.d->ptr, projData.d->ptr, dims, pConeProjs, params);
+ }
delete[] pParProjs;
delete[] pConeProjs;
@@ -287,6 +312,8 @@ bool BP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, con
bool FDK(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, const astra::CVolumeGeometry3D* pVolGeom, MemHandle3D volData, bool bShortScan, const float *pfFilter)
{
+ assert(!projData.d->arr);
+ assert(!volData.d->arr);
SDimensions3D dims;
SProjectorParams3D params;
@@ -328,6 +355,7 @@ _AstraExport MemHandle3D wrapHandle(float *D_ptr, unsigned int x, unsigned int y
SMemHandle3D_internal h;
h.ptr = ptr;
+ h.arr = 0;
MemHandle3D hnd;
hnd.d = boost::shared_ptr<SMemHandle3D_internal>(new SMemHandle3D_internal);
@@ -336,6 +364,57 @@ _AstraExport MemHandle3D wrapHandle(float *D_ptr, unsigned int x, unsigned int y
return hnd;
}
+MemHandle3D createProjectionArrayHandle(const float *ptr, unsigned int x, unsigned int y, unsigned int z)
+{
+ SDimensions3D dims;
+ dims.iProjU = x;
+ dims.iProjAngles = y;
+ dims.iProjV = z;
+
+ cudaArray* cuArray = allocateProjectionArray(dims);
+ transferHostProjectionsToArray(ptr, cuArray, dims);
+
+ SMemHandle3D_internal h;
+ h.arr = cuArray;
+ h.ptr.ptr = 0;
+
+ MemHandle3D hnd;
+ hnd.d = boost::shared_ptr<SMemHandle3D_internal>(new SMemHandle3D_internal);
+ *hnd.d = h;
+
+ return hnd;
+}
+
+bool copyIntoArray(MemHandle3D handle, MemHandle3D subdata, const SSubDimensions3D &pos)
+{
+ assert(handle.d->arr);
+ assert(!handle.d->ptr.ptr);
+ assert(!subdata.d->arr);
+ assert(subdata.d->ptr.ptr);
+
+ ASTRA_DEBUG("Copying %d x %d x %d into GPU array", pos.subnx, pos.subny, pos.subnz);
+ ASTRA_DEBUG("Offset %d,%d,%d", pos.subx, pos.suby, pos.subz);
+ ASTRA_DEBUG("Pitch %d, xsize %d, ysize %d", subdata.d->ptr.pitch, subdata.d->ptr.xsize, subdata.d->ptr.ysize);
+
+ cudaMemcpy3DParms p;
+ p.srcArray = 0;
+ p.srcPos = make_cudaPos(0, 0, 0);
+ p.srcPtr = subdata.d->ptr;
+
+ p.dstArray = handle.d->arr;
+ p.dstPos = make_cudaPos(pos.subx, pos.suby, pos.subz);
+ p.dstPtr.ptr = 0;
+
+ p.extent = make_cudaExtent(pos.subnx, pos.subny, pos.subnz);
+
+ p.kind = cudaMemcpyHostToDevice;
+
+ cudaError_t err = cudaMemcpy3D(&p);
+
+ return err == cudaSuccess;
+
+}
+
}
diff --git a/cuda/3d/util3d.cu b/cuda/3d/util3d.cu
index 90aa5ea..41eb9d2 100644
--- a/cuda/3d/util3d.cu
+++ b/cuda/3d/util3d.cu
@@ -386,6 +386,41 @@ bool transferProjectionsToArray(cudaPitchedPtr D_projData, cudaArray* array, con
return true;
}
+bool transferHostProjectionsToArray(const float *projData, cudaArray* array, const SDimensions3D& dims)
+{
+ cudaExtent extentA;
+ extentA.width = dims.iProjU;
+ extentA.height = dims.iProjAngles;
+ extentA.depth = dims.iProjV;
+
+ cudaPitchedPtr ptr;
+ ptr.ptr = (void*)projData; // const cast away
+ ptr.pitch = dims.iProjU*sizeof(float);
+ ptr.xsize = dims.iProjU*sizeof(float);
+ ptr.ysize = dims.iProjAngles;
+
+ cudaMemcpy3DParms p;
+ cudaPos zp = {0, 0, 0};
+ p.srcArray = 0;
+ p.srcPos = zp;
+ p.srcPtr = ptr;
+ p.dstArray = array;
+ p.dstPtr.ptr = 0;
+ p.dstPtr.pitch = 0;
+ p.dstPtr.xsize = 0;
+ p.dstPtr.ysize = 0;
+ p.dstPos = zp;
+ p.extent = extentA;
+ p.kind = cudaMemcpyHostToDevice;
+
+ cudaError err = cudaMemcpy3D(&p);
+ ASTRA_CUDA_ASSERT(err);
+
+ // TODO: check errors
+
+ return true;
+}
+
float dotProduct3D(cudaPitchedPtr data, unsigned int x, unsigned int y,