summaryrefslogtreecommitdiffstats
path: root/cuda/3d/par3d_fp.cu
diff options
context:
space:
mode:
Diffstat (limited to 'cuda/3d/par3d_fp.cu')
-rw-r--r--cuda/3d/par3d_fp.cu22
1 files changed, 21 insertions, 1 deletions
diff --git a/cuda/3d/par3d_fp.cu b/cuda/3d/par3d_fp.cu
index fda6f93..075784b 100644
--- a/cuda/3d/par3d_fp.cu
+++ b/cuda/3d/par3d_fp.cu
@@ -146,6 +146,7 @@ bool transferConstants(const SPar3DProjection* angles, unsigned int iProjAngles)
// blockIdx: x = u/v detector
// y = angle block
+#include "rounding.h"
template<class COORD, class SCALE>
__global__ void par3D_FP_t(float* D_projData, unsigned int projPitch,
@@ -212,10 +213,28 @@ __global__ void par3D_FP_t(float* D_projData, unsigned int projPitch,
float f0 = startSlice + 0.5f;
float f1 = a1 * (startSlice - 0.5f*c.nSlices(dims) + 0.5f) + b1 + 0.5f*c.nDim1(dims) - 0.5f + 0.5f;
float f2 = a2 * (startSlice - 0.5f*c.nSlices(dims) + 0.5f) + b2 + 0.5f*c.nDim2(dims) - 0.5f + 0.5f;
+ //printf("%f, %f (%f), %f (%f)\n", f0, f1, a1, f2, a2); // Only f1 non linear
for (int s = startSlice; s < endSlice; ++s)
{
- fVal += c.tex(tex, f0, f1, f2);
+
+ textype h5 = texto(0.5f);
+ textype f1_ = texto(f1);
+ textype f1f_ = texto(floor(f1));
+ float f1f = floor(f1);
+
+ if ((f1 - f1f) < 0.5f) {
+ textype fVal1 = texto(c.tex(tex, f0, f1f - 0.5f, f2));
+ textype fVal2 = texto(c.tex(tex, f0, f1f + 0.5f, f2));
+ fVal += texfrom(fVal1 + (f1_ + h5 - f1f_) * (fVal2 - fVal1));
+// fVal += texfrom(__hfma(__hadd(h5,__hsub(f1_, f1f_)), __hsub(fVal2, fVal1), fVal1));
+ } else {
+ textype fVal1 = texto(c.tex(tex, f0, f1f + 0.5f, f2));
+ textype fVal2 = texto(c.tex(tex, f0, f1f + 1.5f, f2));
+ fVal += texfrom(fVal1 + (f1_ - h5 - f1f_) * (fVal2 - fVal1));
+ }
+
+// fVal += c.tex(tex, f0, f1, f2);
f0 += 1.0f;
f1 += a1;
f2 += a2;
@@ -308,6 +327,7 @@ __global__ void par3D_FP_SS_t(float* D_projData, unsigned int projPitch,
for (int s = startSlice; s < endSlice; ++s)
{
fVal += c.tex(tex, f0, f1, f2);
+
f0 += 1.0f;
f1 += a1;
f2 += a2;