summaryrefslogtreecommitdiffstats
path: root/cuda
diff options
context:
space:
mode:
authorWillem Jan Palenstijn <wjp@usecode.org>2019-03-29 21:21:29 +0100
committerWillem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>2019-09-25 14:10:08 +0200
commit3cf63d335ebe392a8c77f0c90395c18150647aeb (patch)
tree60a96bfe45c62aa9e57c1c5751e72d3794934845 /cuda
parent87715885f3b4a80693493e37aa8293899a6b987e (diff)
downloadastra-3cf63d335ebe392a8c77f0c90395c18150647aeb.tar.gz
astra-3cf63d335ebe392a8c77f0c90395c18150647aeb.tar.bz2
astra-3cf63d335ebe392a8c77f0c90395c18150647aeb.tar.xz
astra-3cf63d335ebe392a8c77f0c90395c18150647aeb.zip
Adjust adjoint to line integral scaling
Diffstat (limited to 'cuda')
-rw-r--r--cuda/2d/algo.cu12
-rw-r--r--cuda/2d/fan_bp.cu62
-rw-r--r--cuda/2d/par_bp.cu20
-rw-r--r--cuda/3d/cone_bp.cu6
4 files changed, 72 insertions, 28 deletions
diff --git a/cuda/2d/algo.cu b/cuda/2d/algo.cu
index b4c2864..11422ff 100644
--- a/cuda/2d/algo.cu
+++ b/cuda/2d/algo.cu
@@ -258,10 +258,10 @@ bool ReconAlgo::copyDataToGPU(const float* pfSinogram, unsigned int iSinogramPit
if (!ok)
return false;
- // rescale sinogram to adjust for pixel size
- processSino<opMul>(D_sinoData, fSinogramScale,
- //1.0f/(fPixelSize*fPixelSize),
- sinoPitch, dims);
+ // rescale sinogram
+ if (fSinogramScale != 1.0f)
+ processSino<opMul>(D_sinoData, fSinogramScale,
+ sinoPitch, dims);
ok = copyVolumeToDevice(pfReconstruction, iReconstructionPitch,
dims,
@@ -331,11 +331,11 @@ bool ReconAlgo::callBP(float* D_volumeData, unsigned int volumePitch,
if (parProjs) {
assert(!fanProjs);
return BP(D_volumeData, volumePitch, D_projData, projPitch,
- dims, parProjs, outputScale);
+ dims, parProjs, fOutputScale * outputScale);
} else {
assert(fanProjs);
return FanBP(D_volumeData, volumePitch, D_projData, projPitch,
- dims, fanProjs, outputScale);
+ dims, fanProjs, fOutputScale * outputScale);
}
}
diff --git a/cuda/2d/fan_bp.cu b/cuda/2d/fan_bp.cu
index dac3ac2..2072cd4 100644
--- a/cuda/2d/fan_bp.cu
+++ b/cuda/2d/fan_bp.cu
@@ -48,7 +48,7 @@ const unsigned int g_anglesPerBlock = 16;
const unsigned int g_blockSliceSize = 32;
const unsigned int g_blockSlices = 16;
-const unsigned int g_MaxAngles = 2560;
+const unsigned int g_MaxAngles = 2240;
__constant__ float gC_SrcX[g_MaxAngles];
__constant__ float gC_SrcY[g_MaxAngles];
@@ -56,6 +56,7 @@ __constant__ float gC_DetSX[g_MaxAngles];
__constant__ float gC_DetSY[g_MaxAngles];
__constant__ float gC_DetUX[g_MaxAngles];
__constant__ float gC_DetUY[g_MaxAngles];
+__constant__ float gC_Scale[g_MaxAngles];
static bool bindProjDataTexture(float* data, unsigned int pitch, unsigned int width, unsigned int height, cudaTextureAddressMode mode = cudaAddressModeBorder)
@@ -96,8 +97,6 @@ __global__ void devFanBP(float* D_volData, unsigned int volPitch, unsigned int s
float fVal = 0.0f;
float fA = startAngle + 0.5f;
- // TODO: Distance correction?
-
for (int angle = startAngle; angle < endAngle; ++angle)
{
const float fSrcX = gC_SrcX[angle];
@@ -106,15 +105,24 @@ __global__ void devFanBP(float* D_volData, unsigned int volPitch, unsigned int s
const float fDetSY = gC_DetSY[angle];
const float fDetUX = gC_DetUX[angle];
const float fDetUY = gC_DetUY[angle];
+ const float fScale = gC_Scale[angle];
const float fXD = fSrcX - fX;
const float fYD = fSrcY - fY;
const float fNum = fDetSY * fXD - fDetSX * fYD + fX*fSrcY - fY*fSrcX;
const float fDen = fDetUX * fYD - fDetUY * fXD;
-
- const float fT = fNum / fDen;
- fVal += tex2D(gT_FanProjTexture, fT, fA);
+
+ // fDen = || u (x-s) || (2x2 determinant)
+
+ // Scale factor is the approximate number of rays traversing this pixel,
+ // given by the inverse size of a detector pixel scaled by the magnification
+ // factor of this pixel.
+ // Magnification factor is || u (d-s) || / || u (x-s) ||
+
+ const float fr = __fdividef(1.0f, fDen);
+ const float fT = fNum * fr;
+ fVal += tex2D(gT_FanProjTexture, fT, fA) * fScale * fr;
fA += 1.0f;
}
@@ -148,8 +156,6 @@ __global__ void devFanBP_SS(float* D_volData, unsigned int volPitch, unsigned in
float fVal = 0.0f;
float fA = startAngle + 0.5f;
- // TODO: Distance correction?
-
for (int angle = startAngle; angle < endAngle; ++angle)
{
const float fSrcX = gC_SrcX[angle];
@@ -158,6 +164,7 @@ __global__ void devFanBP_SS(float* D_volData, unsigned int volPitch, unsigned in
const float fDetSY = gC_DetSY[angle];
const float fDetUX = gC_DetUX[angle];
const float fDetUY = gC_DetUY[angle];
+ const float fScale = gC_Scale[angle];
// TODO: Optimize these loops...
float fX = fXb;
@@ -169,9 +176,10 @@ __global__ void devFanBP_SS(float* D_volData, unsigned int volPitch, unsigned in
const float fNum = fDetSY * fXD - fDetSX * fYD + fX*fSrcY - fY*fSrcX;
const float fDen = fDetUX * fYD - fDetUY * fXD;
-
- const float fT = fNum / fDen;
- fVal += tex2D(gT_FanProjTexture, fT, fA);
+ const float fr = __fdividef(1.0f, fDen);
+
+ const float fT = fNum * fr;
+ fVal += tex2D(gT_FanProjTexture, fT, fA) * fScale * fr;
fY -= fSubStep;
}
fX += fSubStep;
@@ -202,8 +210,6 @@ __global__ void devFanBP_SART(float* D_volData, unsigned int volPitch, const SDi
float* volData = (float*)D_volData;
- // TODO: Distance correction?
-
// TODO: Constant memory vs parameters.
const float fSrcX = gC_SrcX[0];
const float fSrcY = gC_SrcY[0];
@@ -211,15 +217,17 @@ __global__ void devFanBP_SART(float* D_volData, unsigned int volPitch, const SDi
const float fDetSY = gC_DetSY[0];
const float fDetUX = gC_DetUX[0];
const float fDetUY = gC_DetUY[0];
+ const float fScale = gC_Scale[0];
const float fXD = fSrcX - fX;
const float fYD = fSrcY - fY;
const float fNum = fDetSY * fXD - fDetSX * fYD + fX*fSrcY - fY*fSrcX;
const float fDen = fDetUX * fYD - fDetUY * fXD;
-
- const float fT = fNum / fDen;
- const float fVal = tex2D(gT_FanProjTexture, fT, 0.5f);
+ const float fr = __fdividef(1.0f, fDen);
+
+ const float fT = fNum * fr;
+ const float fVal = tex2D(gT_FanProjTexture, fT, 0.5f) * fScale * fr;
volData[Y*volPitch+X] += fVal * fOutputScale;
}
@@ -248,7 +256,7 @@ __global__ void devFanBP_FBPWeighted(float* D_volData, unsigned int volPitch, un
float fVal = 0.0f;
float fA = startAngle + 0.5f;
- // TODO: Distance correction?
+ // TODO: Update for new projection scaling
for (int angle = startAngle; angle < endAngle; ++angle)
{
@@ -299,6 +307,13 @@ bool FanBP_internal(float* D_volumeData, unsigned int volumePitch,
#undef TRANSFER_TO_CONSTANT
+ for (unsigned int i = 0; i < dims.iProjAngles; ++i) {
+ double detsize = sqrt(angles[i].fDetUX * angles[i].fDetUX + angles[i].fDetUY * angles[i].fDetUY);
+ double scale = (angles[i].fDetUX * (angles[i].fSrcY - angles[i].fDetSY) - angles[i].fDetUY * (angles[i].fSrcX - angles[i].fDetSX)) / detsize;
+ tmp[i] = (float)scale;
+ }
+ cudaMemcpyToSymbol(gC_Scale, tmp, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice);
+
delete[] tmp;
dim3 dimBlock(g_blockSlices, g_blockSliceSize);
@@ -346,6 +361,14 @@ bool FanBP_FBPWeighted_internal(float* D_volumeData, unsigned int volumePitch,
#undef TRANSFER_TO_CONSTANT
+ for (unsigned int i = 0; i < dims.iProjAngles; ++i) {
+ double detsize = sqrt(angles[i].fDetUX * angles[i].fDetUX + angles[i].fDetUY * angles[i].fDetUY);
+ double scale = (angles[i].fDetUX * (angles[i].fSrcY - angles[i].fDetSY) - angles[i].fDetUY * (angles[i].fSrcX - angles[i].fDetSX)) / detsize;
+ tmp[i] = (float)scale;
+ }
+ cudaMemcpyToSymbol(gC_Scale, tmp, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice);
+
+
delete[] tmp;
dim3 dimBlock(g_blockSlices, g_blockSliceSize);
@@ -389,6 +412,11 @@ bool FanBP_SART(float* D_volumeData, unsigned int volumePitch,
#undef TRANSFER_TO_CONSTANT
+ double detsize = sqrt(angles[angle].fDetUX * angles[angle].fDetUX + angles[angle].fDetUY * angles[angle].fDetUY);
+ double scale = (angles[angle].fDetUX * (angles[angle].fSrcY - angles[angle].fDetSY) - angles[angle].fDetUY * (angles[angle].fSrcX - angles[angle].fDetSX)) / detsize;
+ float tmp = (float)scale;
+ cudaMemcpyToSymbol(gC_Scale, &tmp, sizeof(float), 0, cudaMemcpyHostToDevice);
+
dim3 dimBlock(g_blockSlices, g_blockSliceSize);
dim3 dimGrid((dims.iVolWidth+g_blockSlices-1)/g_blockSlices,
(dims.iVolHeight+g_blockSliceSize-1)/g_blockSliceSize);
diff --git a/cuda/2d/par_bp.cu b/cuda/2d/par_bp.cu
index 09a6554..21484b1 100644
--- a/cuda/2d/par_bp.cu
+++ b/cuda/2d/par_bp.cu
@@ -53,6 +53,7 @@ const unsigned int g_MaxAngles = 2560;
__constant__ float gC_angle_scaled_sin[g_MaxAngles];
__constant__ float gC_angle_scaled_cos[g_MaxAngles];
__constant__ float gC_angle_offset[g_MaxAngles];
+__constant__ float gC_angle_scale[g_MaxAngles];
static bool bindProjDataTexture(float* data, unsigned int pitch, unsigned int width, unsigned int height, cudaTextureAddressMode mode = cudaAddressModeBorder)
{
@@ -70,6 +71,7 @@ static bool bindProjDataTexture(float* data, unsigned int pitch, unsigned int wi
return true;
}
+// TODO: Templated version with/without scale? (Or only the global outputscale)
__global__ void devBP(float* D_volData, unsigned int volPitch, unsigned int startAngle, const SDimensions dims, float fOutputScale)
{
const int relX = threadIdx.x;
@@ -97,9 +99,10 @@ __global__ void devBP(float* D_volData, unsigned int volPitch, unsigned int star
const float scaled_cos_theta = gC_angle_scaled_cos[angle];
const float scaled_sin_theta = gC_angle_scaled_sin[angle];
const float TOffset = gC_angle_offset[angle];
+ const float scale = gC_angle_scale[angle];
const float fT = fX * scaled_cos_theta - fY * scaled_sin_theta + TOffset;
- fVal += tex2D(gT_projTexture, fT, fA);
+ fVal += tex2D(gT_projTexture, fT, fA) * scale;
fA += 1.0f;
}
@@ -138,6 +141,7 @@ __global__ void devBP_SS(float* D_volData, unsigned int volPitch, unsigned int s
const float cos_theta = gC_angle_scaled_cos[angle];
const float sin_theta = gC_angle_scaled_sin[angle];
const float TOffset = gC_angle_offset[angle];
+ const float scale = gC_angle_scale[angle];
float fT = fX * cos_theta - fY * sin_theta + TOffset;
@@ -145,7 +149,7 @@ __global__ void devBP_SS(float* D_volData, unsigned int volPitch, unsigned int s
float fTy = fT;
fT += fSubStep * cos_theta;
for (int iSubY = 0; iSubY < dims.iRaysPerPixelDim; ++iSubY) {
- fVal += tex2D(gT_projTexture, fTy, fA);
+ fVal += tex2D(gT_projTexture, fTy, fA) * scale;
fTy -= fSubStep * sin_theta;
}
}
@@ -172,6 +176,8 @@ __global__ void devBP_SART(float* D_volData, unsigned int volPitch, float offset
const float fT = fX * angle_cos - fY * angle_sin + offset;
const float fVal = tex2D(gT_projTexture, fT, 0.5f);
+ // NB: The 'scale' constant in devBP has been folded into fOutputScale by the caller here
+
D_volData[Y*volPitch+X] += fVal * fOutputScale;
}
@@ -186,27 +192,34 @@ bool BP_internal(float* D_volumeData, unsigned int volumePitch,
float* angle_scaled_sin = new float[dims.iProjAngles];
float* angle_scaled_cos = new float[dims.iProjAngles];
float* angle_offset = new float[dims.iProjAngles];
+ float* angle_scale = new float[dims.iProjAngles];
bindProjDataTexture(D_projData, projPitch, dims.iProjDets, dims.iProjAngles);
for (unsigned int i = 0; i < dims.iProjAngles; ++i) {
double d = angles[i].fDetUX * angles[i].fRayY - angles[i].fDetUY * angles[i].fRayX;
angle_scaled_cos[i] = angles[i].fRayY / d;
- angle_scaled_sin[i] = -angles[i].fRayX / d; // TODO: Check signs
+ angle_scaled_sin[i] = -angles[i].fRayX / d;
angle_offset[i] = (angles[i].fDetSY * angles[i].fRayX - angles[i].fDetSX * angles[i].fRayY) / d;
+ angle_scale[i] = sqrt(angles[i].fRayX * angles[i].fRayX + angles[i].fRayY * angles[i].fRayY) / abs(d);
}
+ //fprintf(stderr, "outputscale in BP_internal: %f, %f\n", fOutputScale, angle_scale[0]);
+ //fprintf(stderr, "ray in BP_internal: %f,%f (length %f)\n", angles[0].fRayX, angles[0].fRayY, sqrt(angles[0].fRayX * angles[0].fRayX + angles[0].fRayY * angles[0].fRayY));
cudaError_t e1 = cudaMemcpyToSymbol(gC_angle_scaled_sin, angle_scaled_sin, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice);
cudaError_t e2 = cudaMemcpyToSymbol(gC_angle_scaled_cos, angle_scaled_cos, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice);
cudaError_t e3 = cudaMemcpyToSymbol(gC_angle_offset, angle_offset, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice);
+ cudaError_t e4 = cudaMemcpyToSymbol(gC_angle_scale, angle_scale, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice);
assert(e1 == cudaSuccess);
assert(e2 == cudaSuccess);
assert(e3 == cudaSuccess);
+ assert(e4 == cudaSuccess);
delete[] angle_scaled_sin;
delete[] angle_scaled_cos;
delete[] angle_offset;
+ delete[] angle_scale;
dim3 dimBlock(g_blockSlices, g_blockSliceSize);
dim3 dimGrid((dims.iVolWidth+g_blockSlices-1)/g_blockSlices,
@@ -267,6 +280,7 @@ bool BP_SART(float* D_volumeData, unsigned int volumePitch,
float angle_scaled_cos = angles[angle].fRayY / d;
float angle_scaled_sin = -angles[angle].fRayX / d; // TODO: Check signs
float angle_offset = (angles[angle].fDetSY * angles[angle].fRayX - angles[angle].fDetSX * angles[angle].fRayY) / d;
+ fOutputScale *= sqrt(angles[angle].fRayX * angles[angle].fRayX + angles[angle].fRayY * angles[angle].fRayY) / abs(d);
dim3 dimBlock(g_blockSlices, g_blockSliceSize);
dim3 dimGrid((dims.iVolWidth+g_blockSlices-1)/g_blockSlices,
diff --git a/cuda/3d/cone_bp.cu b/cuda/3d/cone_bp.cu
index feebda2..2a7ec80 100644
--- a/cuda/3d/cone_bp.cu
+++ b/cuda/3d/cone_bp.cu
@@ -140,8 +140,10 @@ __global__ void dev_cone_BP(void* D_volData, unsigned int volPitch, int startAng
if (FDKWEIGHT) {
// The correct factor here is this one:
// Z[idx] += (fr*fCd.w)*(fr*fCd.w)*fVal;
- // This is the square of the inverse magnification factor
- // from fX,fY,fZ to the detector.
+ // This is the square of the magnification factor
+ // from fX,fY,fZ to the virtual detector, where the
+ // virtual detector is the plane through the origin
+ // parallel to the detector, so spanned by u,v.
// Since we are assuming we have a circular cone
// beam trajectory, fCd.w is constant, and we instead