Adjust adjoint to line integral scaling

author: Willem Jan Palenstijn <wjp@usecode.org> 2019-03-29 21:21:29 +0100
committer: Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl> 2019-09-25 14:10:08 +0200
commit: 3cf63d335ebe392a8c77f0c90395c18150647aeb (patch)
tree: 60a96bfe45c62aa9e57c1c5751e72d3794934845 /cuda
parent: 87715885f3b4a80693493e37aa8293899a6b987e (diff)
download: astra-3cf63d335ebe392a8c77f0c90395c18150647aeb.tar.gz
astra-3cf63d335ebe392a8c77f0c90395c18150647aeb.tar.bz2
astra-3cf63d335ebe392a8c77f0c90395c18150647aeb.tar.xz
astra-3cf63d335ebe392a8c77f0c90395c18150647aeb.zip
4 files changed, 72 insertions, 28 deletions
diff --git a/cuda/2d/algo.cu b/cuda/2d/algo.cu
index b4c2864..11422ff 100644
--- a/cuda/2d/algo.cu
+++ b/cuda/2d/algo.cu
@@ -258,10 +258,10 @@ bool ReconAlgo::copyDataToGPU(const float* pfSinogram, unsigned int iSinogramPit
 	if (!ok)
 		return false;
 
-	// rescale sinogram to adjust for pixel size
-	processSino<opMul>(D_sinoData, fSinogramScale,
-	                       //1.0f/(fPixelSize*fPixelSize),
-	                       sinoPitch, dims);
+	// rescale sinogram
+	if (fSinogramScale != 1.0f)
+		processSino<opMul>(D_sinoData, fSinogramScale,
+		                   sinoPitch, dims);
 
 	ok = copyVolumeToDevice(pfReconstruction, iReconstructionPitch,
 	                        dims,
@@ -331,11 +331,11 @@ bool ReconAlgo::callBP(float* D_volumeData, unsigned int volumePitch,
 	if (parProjs) {
 		assert(!fanProjs);
 		return BP(D_volumeData, volumePitch, D_projData, projPitch,
-		          dims, parProjs, outputScale);
+		          dims, parProjs, fOutputScale * outputScale);
 	} else {
 		assert(fanProjs);
 		return FanBP(D_volumeData, volumePitch, D_projData, projPitch,
-		             dims, fanProjs, outputScale);
+		             dims, fanProjs, fOutputScale * outputScale);
 	}
 
 }
diff --git a/cuda/2d/fan_bp.cu b/cuda/2d/fan_bp.cu
index dac3ac2..2072cd4 100644
--- a/cuda/2d/fan_bp.cu
+++ b/cuda/2d/fan_bp.cu
@@ -48,7 +48,7 @@ const unsigned int g_anglesPerBlock = 16;
 const unsigned int g_blockSliceSize = 32;
 const unsigned int g_blockSlices = 16;
 
-const unsigned int g_MaxAngles = 2560;
+const unsigned int g_MaxAngles = 2240;
 
 __constant__ float gC_SrcX[g_MaxAngles];
 __constant__ float gC_SrcY[g_MaxAngles];
@@ -56,6 +56,7 @@ __constant__ float gC_DetSX[g_MaxAngles];
 __constant__ float gC_DetSY[g_MaxAngles];
 __constant__ float gC_DetUX[g_MaxAngles];
 __constant__ float gC_DetUY[g_MaxAngles];
+__constant__ float gC_Scale[g_MaxAngles];
 
 
 static bool bindProjDataTexture(float* data, unsigned int pitch, unsigned int width, unsigned int height, cudaTextureAddressMode mode = cudaAddressModeBorder)
@@ -96,8 +97,6 @@ __global__ void devFanBP(float* D_volData, unsigned int volPitch, unsigned int s
 	float fVal = 0.0f;
 	float fA = startAngle + 0.5f;
 
-	// TODO: Distance correction?
-
 	for (int angle = startAngle; angle < endAngle; ++angle)
 	{
 		const float fSrcX = gC_SrcX[angle];
@@ -106,15 +105,24 @@ __global__ void devFanBP(float* D_volData, unsigned int volPitch, unsigned int s
 		const float fDetSY = gC_DetSY[angle];
 		const float fDetUX = gC_DetUX[angle];
 		const float fDetUY = gC_DetUY[angle];
+		const float fScale = gC_Scale[angle];
 
 		const float fXD = fSrcX - fX;
 		const float fYD = fSrcY - fY;
 
 		const float fNum = fDetSY * fXD - fDetSX * fYD + fX*fSrcY - fY*fSrcX;
 		const float fDen = fDetUX * fYD - fDetUY * fXD;
-		
-		const float fT = fNum / fDen;
-		fVal += tex2D(gT_FanProjTexture, fT, fA);
+
+		// fDen = || u (x-s) ||     (2x2 determinant)
+
+		// Scale factor is the approximate number of rays traversing this pixel,
+		// given by the inverse size of a detector pixel scaled by the magnification
+		// factor of this pixel.
+		// Magnification factor is || u (d-s) || / || u (x-s) ||
+
+		const float fr = __fdividef(1.0f, fDen);
+		const float fT = fNum * fr;
+		fVal += tex2D(gT_FanProjTexture, fT, fA) * fScale * fr;
 		fA += 1.0f;
 	}
 
@@ -148,8 +156,6 @@ __global__ void devFanBP_SS(float* D_volData, unsigned int volPitch, unsigned in
 	float fVal = 0.0f;
 	float fA = startAngle + 0.5f;
 
-	// TODO: Distance correction?
-
 	for (int angle = startAngle; angle < endAngle; ++angle)
 	{
 		const float fSrcX = gC_SrcX[angle];
@@ -158,6 +164,7 @@ __global__ void devFanBP_SS(float* D_volData, unsigned int volPitch, unsigned in
 		const float fDetSY = gC_DetSY[angle];
 		const float fDetUX = gC_DetUX[angle];
 		const float fDetUY = gC_DetUY[angle];
+		const float fScale = gC_Scale[angle];
 
 		// TODO: Optimize these loops...
 		float fX = fXb;
@@ -169,9 +176,10 @@ __global__ void devFanBP_SS(float* D_volData, unsigned int volPitch, unsigned in
 
 				const float fNum = fDetSY * fXD - fDetSX * fYD + fX*fSrcY - fY*fSrcX;
 				const float fDen = fDetUX * fYD - fDetUY * fXD;
-		
-				const float fT = fNum / fDen;
-				fVal += tex2D(gT_FanProjTexture, fT, fA);
+				const float fr = __fdividef(1.0f, fDen);
+
+				const float fT = fNum * fr;
+				fVal += tex2D(gT_FanProjTexture, fT, fA) * fScale * fr;
 				fY -= fSubStep;
 			}
 			fX += fSubStep;
@@ -202,8 +210,6 @@ __global__ void devFanBP_SART(float* D_volData, unsigned int volPitch, const SDi
 
 	float* volData = (float*)D_volData;
 
-	// TODO: Distance correction?
-
 	// TODO: Constant memory vs parameters.
 	const float fSrcX = gC_SrcX[0];
 	const float fSrcY = gC_SrcY[0];
@@ -211,15 +217,17 @@ __global__ void devFanBP_SART(float* D_volData, unsigned int volPitch, const SDi
 	const float fDetSY = gC_DetSY[0];
 	const float fDetUX = gC_DetUX[0];
 	const float fDetUY = gC_DetUY[0];
+	const float fScale = gC_Scale[0];
 
 	const float fXD = fSrcX - fX;
 	const float fYD = fSrcY - fY;
 
 	const float fNum = fDetSY * fXD - fDetSX * fYD + fX*fSrcY - fY*fSrcX;
 	const float fDen = fDetUX * fYD - fDetUY * fXD;
-		
-	const float fT = fNum / fDen;
-	const float fVal = tex2D(gT_FanProjTexture, fT, 0.5f);
+	const float fr = __fdividef(1.0f, fDen);
+
+	const float fT = fNum * fr;
+	const float fVal = tex2D(gT_FanProjTexture, fT, 0.5f) * fScale * fr;
 
 	volData[Y*volPitch+X] += fVal * fOutputScale;
 }
@@ -248,7 +256,7 @@ __global__ void devFanBP_FBPWeighted(float* D_volData, unsigned int volPitch, un
 	float fVal = 0.0f;
 	float fA = startAngle + 0.5f;
 
-	// TODO: Distance correction?
+	// TODO: Update for new projection scaling
 
 	for (int angle = startAngle; angle < endAngle; ++angle)
 	{
@@ -299,6 +307,13 @@ bool FanBP_internal(float* D_volumeData, unsigned int volumePitch,
 
 #undef TRANSFER_TO_CONSTANT
 
+	for (unsigned int i = 0; i < dims.iProjAngles; ++i) {
+		double detsize = sqrt(angles[i].fDetUX * angles[i].fDetUX + angles[i].fDetUY * angles[i].fDetUY);
+		double scale = (angles[i].fDetUX * (angles[i].fSrcY - angles[i].fDetSY) - angles[i].fDetUY * (angles[i].fSrcX - angles[i].fDetSX)) / detsize;
+		tmp[i] = (float)scale;
+	}
+	cudaMemcpyToSymbol(gC_Scale, tmp, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice);
+
 	delete[] tmp;
 
 	dim3 dimBlock(g_blockSlices, g_blockSliceSize);
@@ -346,6 +361,14 @@ bool FanBP_FBPWeighted_internal(float* D_volumeData, unsigned int volumePitch,
 
 #undef TRANSFER_TO_CONSTANT
 
+	for (unsigned int i = 0; i < dims.iProjAngles; ++i) {
+		double detsize = sqrt(angles[i].fDetUX * angles[i].fDetUX + angles[i].fDetUY * angles[i].fDetUY);
+		double scale = (angles[i].fDetUX * (angles[i].fSrcY - angles[i].fDetSY) - angles[i].fDetUY * (angles[i].fSrcX - angles[i].fDetSX)) / detsize;
+		tmp[i] = (float)scale;
+	}
+	cudaMemcpyToSymbol(gC_Scale, tmp, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice);
+
+
 	delete[] tmp;
 
 	dim3 dimBlock(g_blockSlices, g_blockSliceSize);
@@ -389,6 +412,11 @@ bool FanBP_SART(float* D_volumeData, unsigned int volumePitch,
 
 #undef TRANSFER_TO_CONSTANT
 
+	double detsize = sqrt(angles[angle].fDetUX * angles[angle].fDetUX + angles[angle].fDetUY * angles[angle].fDetUY);
+	double scale = (angles[angle].fDetUX * (angles[angle].fSrcY - angles[angle].fDetSY) - angles[angle].fDetUY * (angles[angle].fSrcX - angles[angle].fDetSX)) / detsize;
+	float tmp = (float)scale;
+	cudaMemcpyToSymbol(gC_Scale, &tmp, sizeof(float), 0, cudaMemcpyHostToDevice);
+
 	dim3 dimBlock(g_blockSlices, g_blockSliceSize);
 	dim3 dimGrid((dims.iVolWidth+g_blockSlices-1)/g_blockSlices,
 	             (dims.iVolHeight+g_blockSliceSize-1)/g_blockSliceSize);
diff --git a/cuda/2d/par_bp.cu b/cuda/2d/par_bp.cu
index 09a6554..21484b1 100644
--- a/cuda/2d/par_bp.cu
+++ b/cuda/2d/par_bp.cu
@@ -53,6 +53,7 @@ const unsigned int g_MaxAngles = 2560;
 __constant__ float gC_angle_scaled_sin[g_MaxAngles];
 __constant__ float gC_angle_scaled_cos[g_MaxAngles];
 __constant__ float gC_angle_offset[g_MaxAngles];
+__constant__ float gC_angle_scale[g_MaxAngles];
 
 static bool bindProjDataTexture(float* data, unsigned int pitch, unsigned int width, unsigned int height, cudaTextureAddressMode mode = cudaAddressModeBorder)
 {
@@ -70,6 +71,7 @@ static bool bindProjDataTexture(float* data, unsigned int pitch, unsigned int wi
 	return true;
 }
 
+// TODO: Templated version with/without scale? (Or only the global outputscale)
 __global__ void devBP(float* D_volData, unsigned int volPitch, unsigned int startAngle, const SDimensions dims, float fOutputScale)
 {
 	const int relX = threadIdx.x;
@@ -97,9 +99,10 @@ __global__ void devBP(float* D_volData, unsigned int volPitch, unsigned int star
 		const float scaled_cos_theta = gC_angle_scaled_cos[angle];
 		const float scaled_sin_theta = gC_angle_scaled_sin[angle];
 		const float TOffset = gC_angle_offset[angle];
+		const float scale = gC_angle_scale[angle];
 
 		const float fT = fX * scaled_cos_theta - fY * scaled_sin_theta + TOffset;
-		fVal += tex2D(gT_projTexture, fT, fA);
+		fVal += tex2D(gT_projTexture, fT, fA) * scale;
 		fA += 1.0f;
 	}
 
@@ -138,6 +141,7 @@ __global__ void devBP_SS(float* D_volData, unsigned int volPitch, unsigned int s
 		const float cos_theta = gC_angle_scaled_cos[angle];
 		const float sin_theta = gC_angle_scaled_sin[angle];
 		const float TOffset = gC_angle_offset[angle];
+		const float scale = gC_angle_scale[angle];
 
 		float fT = fX * cos_theta - fY * sin_theta + TOffset;
 
@@ -145,7 +149,7 @@ __global__ void devBP_SS(float* D_volData, unsigned int volPitch, unsigned int s
 			float fTy = fT;
 			fT += fSubStep * cos_theta;
 			for (int iSubY = 0; iSubY < dims.iRaysPerPixelDim; ++iSubY) {
-				fVal += tex2D(gT_projTexture, fTy, fA);
+				fVal += tex2D(gT_projTexture, fTy, fA) * scale;
 				fTy -= fSubStep * sin_theta;
 			}
 		}
@@ -172,6 +176,8 @@ __global__ void devBP_SART(float* D_volData, unsigned int volPitch, float offset
 	const float fT = fX * angle_cos - fY * angle_sin + offset;
 	const float fVal = tex2D(gT_projTexture, fT, 0.5f);
 
+	// NB: The 'scale' constant in devBP has been folded into fOutputScale by the caller here
+
 	D_volData[Y*volPitch+X] += fVal * fOutputScale;
 }
 
@@ -186,27 +192,34 @@ bool BP_internal(float* D_volumeData, unsigned int volumePitch,
 	float* angle_scaled_sin = new float[dims.iProjAngles];
 	float* angle_scaled_cos = new float[dims.iProjAngles];
 	float* angle_offset = new float[dims.iProjAngles];
+	float* angle_scale = new float[dims.iProjAngles];
 
 	bindProjDataTexture(D_projData, projPitch, dims.iProjDets, dims.iProjAngles);
 
 	for (unsigned int i = 0; i < dims.iProjAngles; ++i) {
 		double d = angles[i].fDetUX * angles[i].fRayY - angles[i].fDetUY * angles[i].fRayX;
 		angle_scaled_cos[i] = angles[i].fRayY / d;
-		angle_scaled_sin[i] = -angles[i].fRayX / d; // TODO: Check signs
+		angle_scaled_sin[i] = -angles[i].fRayX / d;
 		angle_offset[i] = (angles[i].fDetSY * angles[i].fRayX - angles[i].fDetSX * angles[i].fRayY) / d;
+		angle_scale[i] = sqrt(angles[i].fRayX * angles[i].fRayX + angles[i].fRayY * angles[i].fRayY) / abs(d);
 	}
+	//fprintf(stderr, "outputscale in BP_internal: %f, %f\n", fOutputScale, angle_scale[0]);
+	//fprintf(stderr, "ray in BP_internal: %f,%f (length %f)\n", angles[0].fRayX, angles[0].fRayY, sqrt(angles[0].fRayX * angles[0].fRayX + angles[0].fRayY * angles[0].fRayY));
 
 	cudaError_t e1 = cudaMemcpyToSymbol(gC_angle_scaled_sin, angle_scaled_sin, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice);
 	cudaError_t e2 = cudaMemcpyToSymbol(gC_angle_scaled_cos, angle_scaled_cos, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice);
 	cudaError_t e3 = cudaMemcpyToSymbol(gC_angle_offset, angle_offset, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice);
+	cudaError_t e4 = cudaMemcpyToSymbol(gC_angle_scale, angle_scale, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice);
 	assert(e1 == cudaSuccess);
 	assert(e2 == cudaSuccess);
 	assert(e3 == cudaSuccess);
+	assert(e4 == cudaSuccess);
 
 
 	delete[] angle_scaled_sin;
 	delete[] angle_scaled_cos;
 	delete[] angle_offset;
+	delete[] angle_scale;
 
 	dim3 dimBlock(g_blockSlices, g_blockSliceSize);
 	dim3 dimGrid((dims.iVolWidth+g_blockSlices-1)/g_blockSlices,
@@ -267,6 +280,7 @@ bool BP_SART(float* D_volumeData, unsigned int volumePitch,
 	float angle_scaled_cos = angles[angle].fRayY / d;
 	float angle_scaled_sin = -angles[angle].fRayX / d; // TODO: Check signs
 	float angle_offset = (angles[angle].fDetSY * angles[angle].fRayX - angles[angle].fDetSX * angles[angle].fRayY) / d;
+	fOutputScale *= sqrt(angles[angle].fRayX * angles[angle].fRayX + angles[angle].fRayY * angles[angle].fRayY) / abs(d);
 
 	dim3 dimBlock(g_blockSlices, g_blockSliceSize);
 	dim3 dimGrid((dims.iVolWidth+g_blockSlices-1)/g_blockSlices,
diff --git a/cuda/3d/cone_bp.cu b/cuda/3d/cone_bp.cu
index feebda2..2a7ec80 100644
--- a/cuda/3d/cone_bp.cu
+++ b/cuda/3d/cone_bp.cu
@@ -140,8 +140,10 @@ __global__ void dev_cone_BP(void* D_volData, unsigned int volPitch, int startAng
 				if (FDKWEIGHT) {
 					// The correct factor here is this one:
 					// Z[idx] += (fr*fCd.w)*(fr*fCd.w)*fVal;
-					// This is the square of the inverse magnification factor
-					// from fX,fY,fZ to the detector.
+					// This is the square of the magnification factor
+					// from fX,fY,fZ to the virtual detector, where the
+					// virtual detector is the plane through the origin
+					// parallel to the detector, so spanned by u,v.
 
 					// Since we are assuming we have a circular cone
 					// beam trajectory, fCd.w is constant, and we instead
author	Willem Jan Palenstijn <wjp@usecode.org>	2019-03-29 21:21:29 +0100
committer	Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>	2019-09-25 14:10:08 +0200
commit	3cf63d335ebe392a8c77f0c90395c18150647aeb (patch)
tree	60a96bfe45c62aa9e57c1c5751e72d3794934845 /cuda
parent	87715885f3b4a80693493e37aa8293899a6b987e (diff)
download	astra-3cf63d335ebe392a8c77f0c90395c18150647aeb.tar.gz astra-3cf63d335ebe392a8c77f0c90395c18150647aeb.tar.bz2 astra-3cf63d335ebe392a8c77f0c90395c18150647aeb.tar.xz astra-3cf63d335ebe392a8c77f0c90395c18150647aeb.zip