1 files changed, 478 insertions, 0 deletions
diff --git a/src/kernels/roll_kernel.cl b/src/kernels/roll_kernel.cl
new file mode 100644
index 0000000..129e14f
--- /dev/null
+++ b/src/kernels/roll_kernel.cl
@@ -0,0 +1,478 @@
+#define rotate()    pixel.x -= x_center.x; \
+                    pixel.y -= y_center; \
+                    pixel.x = pixel.x * cos_roll + pixel.y * sin_roll; \
+                    pixel.y = -pixel.x * sin_roll + pixel.y * cos_roll; \
+                    pixel.x += x_center.x; \
+                    pixel.y += y_center;
+
+/*
+ * Copyright (C) 2015-2016 Karlsruhe Institute of Technology
+ *
+ * This file is part of Ufo.
+ *
+ * This library is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation, either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+kernel void backproject_burst_1 (
+read_only image2d_t projection_0,
+                                    global float *volume,
+                                    const sampler_t sampler,
+                                    const int3 real_size,
+                                    const float2 x_center,
+                                    const float y_center,
+                                    const float2 x_region,
+                                    const float2 y_region,
+                                    const float2 z_region,
+                                    const float2 lamino_region,
+                                    const float2 roll_region,
+                                    float sin_lamino,
+                                    float cos_lamino,
+                                    const float sines,
+                                    const float cosines,
+                                    const float norm_factor,
+                                    float sin_roll,
+                                    float cos_roll,
+                                    const int cumulate)
+{
+    int idx = get_global_id (0);
+    int idy = get_global_id (1);
+    int idz = get_global_id (2);
+    float result, tmp, tmp_x, tmp_y;
+    float2 pixel;
+    float3 voxel;
+
+    if (idx < real_size.x && idy < real_size.y && idz < real_size.z) {
+        voxel.x = mad((float) idx, x_region.y, x_region.x);
+        voxel.y = mad((float) idy, y_region.y, y_region.x);
+        sin_roll = sincos (mad((float) idz, -roll_region.y, -roll_region.x), &cos_roll);
+        tmp = mad(z_region.x, sin_lamino, y_center);
+        tmp_x = voxel.x * cos_lamino;
+        tmp_y = -voxel.y * cos_lamino;
+
+        pixel.x = mad(voxel.x, cosines, mad(voxel.y, sines, x_center.x));
+        pixel.y = mad(tmp_x, sines, mad(tmp_y, cosines, tmp));
+        rotate ();
+        result = read_imagef (projection_0, sampler, pixel).x;
+
+
+        if (cumulate) {
+            volume[idz * real_size.x * real_size.y + idy * real_size.x + idx] += result * norm_factor;
+        } else {
+            volume[idz * real_size.x * real_size.y + idy * real_size.x + idx] = result * norm_factor;
+        }
+    }
+}
+/*
+ * Copyright (C) 2015-2016 Karlsruhe Institute of Technology
+ *
+ * This file is part of Ufo.
+ *
+ * This library is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation, either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+kernel void backproject_burst_2 (
+read_only image2d_t projection_0,
+read_only image2d_t projection_1,
+                                    global float *volume,
+                                    const sampler_t sampler,
+                                    const int3 real_size,
+                                    const float2 x_center,
+                                    const float y_center,
+                                    const float2 x_region,
+                                    const float2 y_region,
+                                    const float2 z_region,
+                                    const float2 lamino_region,
+                                    const float2 roll_region,
+                                    float sin_lamino,
+                                    float cos_lamino,
+                                    const float2 sines,
+                                    const float2 cosines,
+                                    const float norm_factor,
+                                    float sin_roll,
+                                    float cos_roll,
+                                    const int cumulate)
+{
+    int idx = get_global_id (0);
+    int idy = get_global_id (1);
+    int idz = get_global_id (2);
+    float result, tmp, tmp_x, tmp_y;
+    float2 pixel;
+    float3 voxel;
+
+    if (idx < real_size.x && idy < real_size.y && idz < real_size.z) {
+        voxel.x = mad((float) idx, x_region.y, x_region.x);
+        voxel.y = mad((float) idy, y_region.y, y_region.x);
+        sin_roll = sincos (mad((float) idz, -roll_region.y, -roll_region.x), &cos_roll);
+        tmp = mad(z_region.x, sin_lamino, y_center);
+        tmp_x = voxel.x * cos_lamino;
+        tmp_y = -voxel.y * cos_lamino;
+
+        pixel.x = mad(voxel.x, cosines.s0, mad(voxel.y, sines.s0, x_center.x));
+        pixel.y = mad(tmp_x, sines.s0, mad(tmp_y, cosines.s0, tmp));
+        rotate ();
+        result = read_imagef (projection_0, sampler, pixel).x;
+        pixel.x = mad(voxel.x, cosines.s1, mad(voxel.y, sines.s1, x_center.x));
+        pixel.y = mad(tmp_x, sines.s1, mad(tmp_y, cosines.s1, tmp));
+        rotate ();
+        result += read_imagef (projection_1, sampler, pixel).x;
+
+
+        if (cumulate) {
+            volume[idz * real_size.x * real_size.y + idy * real_size.x + idx] += result * norm_factor;
+        } else {
+            volume[idz * real_size.x * real_size.y + idy * real_size.x + idx] = result * norm_factor;
+        }
+    }
+}
+/*
+ * Copyright (C) 2015-2016 Karlsruhe Institute of Technology
+ *
+ * This file is part of Ufo.
+ *
+ * This library is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation, either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+kernel void backproject_burst_4 (
+read_only image2d_t projection_0,
+read_only image2d_t projection_1,
+read_only image2d_t projection_2,
+read_only image2d_t projection_3,
+                                    global float *volume,
+                                    const sampler_t sampler,
+                                    const int3 real_size,
+                                    const float2 x_center,
+                                    const float y_center,
+                                    const float2 x_region,
+                                    const float2 y_region,
+                                    const float2 z_region,
+                                    const float2 lamino_region,
+                                    const float2 roll_region,
+                                    float sin_lamino,
+                                    float cos_lamino,
+                                    const float4 sines,
+                                    const float4 cosines,
+                                    const float norm_factor,
+                                    float sin_roll,
+                                    float cos_roll,
+                                    const int cumulate)
+{
+    int idx = get_global_id (0);
+    int idy = get_global_id (1);
+    int idz = get_global_id (2);
+    float result, tmp, tmp_x, tmp_y;
+    float2 pixel;
+    float3 voxel;
+
+    if (idx < real_size.x && idy < real_size.y && idz < real_size.z) {
+        voxel.x = mad((float) idx, x_region.y, x_region.x);
+        voxel.y = mad((float) idy, y_region.y, y_region.x);
+        sin_roll = sincos (mad((float) idz, -roll_region.y, -roll_region.x), &cos_roll);
+        tmp = mad(z_region.x, sin_lamino, y_center);
+        tmp_x = voxel.x * cos_lamino;
+        tmp_y = -voxel.y * cos_lamino;
+
+        pixel.x = mad(voxel.x, cosines.s0, mad(voxel.y, sines.s0, x_center.x));
+        pixel.y = mad(tmp_x, sines.s0, mad(tmp_y, cosines.s0, tmp));
+        rotate ();
+        result = read_imagef (projection_0, sampler, pixel).x;
+        pixel.x = mad(voxel.x, cosines.s1, mad(voxel.y, sines.s1, x_center.x));
+        pixel.y = mad(tmp_x, sines.s1, mad(tmp_y, cosines.s1, tmp));
+        rotate ();
+        result += read_imagef (projection_1, sampler, pixel).x;
+        pixel.x = mad(voxel.x, cosines.s2, mad(voxel.y, sines.s2, x_center.x));
+        pixel.y = mad(tmp_x, sines.s2, mad(tmp_y, cosines.s2, tmp));
+        rotate ();
+        result += read_imagef (projection_2, sampler, pixel).x;
+        pixel.x = mad(voxel.x, cosines.s3, mad(voxel.y, sines.s3, x_center.x));
+        pixel.y = mad(tmp_x, sines.s3, mad(tmp_y, cosines.s3, tmp));
+        rotate ();
+        result += read_imagef (projection_3, sampler, pixel).x;
+
+
+        if (cumulate) {
+            volume[idz * real_size.x * real_size.y + idy * real_size.x + idx] += result * norm_factor;
+        } else {
+            volume[idz * real_size.x * real_size.y + idy * real_size.x + idx] = result * norm_factor;
+        }
+    }
+}
+/*
+ * Copyright (C) 2015-2016 Karlsruhe Institute of Technology
+ *
+ * This file is part of Ufo.
+ *
+ * This library is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation, either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+kernel void backproject_burst_8 (
+read_only image2d_t projection_0,
+read_only image2d_t projection_1,
+read_only image2d_t projection_2,
+read_only image2d_t projection_3,
+read_only image2d_t projection_4,
+read_only image2d_t projection_5,
+read_only image2d_t projection_6,
+read_only image2d_t projection_7,
+                                    global float *volume,
+                                    const sampler_t sampler,
+                                    const int3 real_size,
+                                    const float2 x_center,
+                                    const float y_center,
+                                    const float2 x_region,
+                                    const float2 y_region,
+                                    const float2 z_region,
+                                    const float2 lamino_region,
+                                    const float2 roll_region,
+                                    float sin_lamino,
+                                    float cos_lamino,
+                                    const float8 sines,
+                                    const float8 cosines,
+                                    const float norm_factor,
+                                    float sin_roll,
+                                    float cos_roll,
+                                    const int cumulate)
+{
+    int idx = get_global_id (0);
+    int idy = get_global_id (1);
+    int idz = get_global_id (2);
+    float result, tmp, tmp_x, tmp_y;
+    float2 pixel;
+    float3 voxel;
+
+    if (idx < real_size.x && idy < real_size.y && idz < real_size.z) {
+        voxel.x = mad((float) idx, x_region.y, x_region.x);
+        voxel.y = mad((float) idy, y_region.y, y_region.x);
+        sin_roll = sincos (mad((float) idz, -roll_region.y, -roll_region.x), &cos_roll);
+        tmp = mad(z_region.x, sin_lamino, y_center);
+        tmp_x = voxel.x * cos_lamino;
+        tmp_y = -voxel.y * cos_lamino;
+
+        pixel.x = mad(voxel.x, cosines.s0, mad(voxel.y, sines.s0, x_center.x));
+        pixel.y = mad(tmp_x, sines.s0, mad(tmp_y, cosines.s0, tmp));
+        rotate ();
+        result = read_imagef (projection_0, sampler, pixel).x;
+        pixel.x = mad(voxel.x, cosines.s1, mad(voxel.y, sines.s1, x_center.x));
+        pixel.y = mad(tmp_x, sines.s1, mad(tmp_y, cosines.s1, tmp));
+        rotate ();
+        result += read_imagef (projection_1, sampler, pixel).x;
+        pixel.x = mad(voxel.x, cosines.s2, mad(voxel.y, sines.s2, x_center.x));
+        pixel.y = mad(tmp_x, sines.s2, mad(tmp_y, cosines.s2, tmp));
+        rotate ();
+        result += read_imagef (projection_2, sampler, pixel).x;
+        pixel.x = mad(voxel.x, cosines.s3, mad(voxel.y, sines.s3, x_center.x));
+        pixel.y = mad(tmp_x, sines.s3, mad(tmp_y, cosines.s3, tmp));
+        rotate ();
+        result += read_imagef (projection_3, sampler, pixel).x;
+        pixel.x = mad(voxel.x, cosines.s4, mad(voxel.y, sines.s4, x_center.x));
+        pixel.y = mad(tmp_x, sines.s4, mad(tmp_y, cosines.s4, tmp));
+        rotate ();
+        result += read_imagef (projection_4, sampler, pixel).x;
+        pixel.x = mad(voxel.x, cosines.s5, mad(voxel.y, sines.s5, x_center.x));
+        pixel.y = mad(tmp_x, sines.s5, mad(tmp_y, cosines.s5, tmp));
+        rotate ();
+        result += read_imagef (projection_5, sampler, pixel).x;
+        pixel.x = mad(voxel.x, cosines.s6, mad(voxel.y, sines.s6, x_center.x));
+        pixel.y = mad(tmp_x, sines.s6, mad(tmp_y, cosines.s6, tmp));
+        rotate ();
+        result += read_imagef (projection_6, sampler, pixel).x;
+        pixel.x = mad(voxel.x, cosines.s7, mad(voxel.y, sines.s7, x_center.x));
+        pixel.y = mad(tmp_x, sines.s7, mad(tmp_y, cosines.s7, tmp));
+        rotate ();
+        result += read_imagef (projection_7, sampler, pixel).x;
+
+
+        if (cumulate) {
+            volume[idz * real_size.x * real_size.y + idy * real_size.x + idx] += result * norm_factor;
+        } else {
+            volume[idz * real_size.x * real_size.y + idy * real_size.x + idx] = result * norm_factor;
+        }
+    }
+}
+/*
+ * Copyright (C) 2015-2016 Karlsruhe Institute of Technology
+ *
+ * This file is part of Ufo.
+ *
+ * This library is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation, either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+kernel void backproject_burst_16 (
+read_only image2d_t projection_0,
+read_only image2d_t projection_1,
+read_only image2d_t projection_2,
+read_only image2d_t projection_3,
+read_only image2d_t projection_4,
+read_only image2d_t projection_5,
+read_only image2d_t projection_6,
+read_only image2d_t projection_7,
+read_only image2d_t projection_8,
+read_only image2d_t projection_9,
+read_only image2d_t projection_10,
+read_only image2d_t projection_11,
+read_only image2d_t projection_12,
+read_only image2d_t projection_13,
+read_only image2d_t projection_14,
+read_only image2d_t projection_15,
+                                    global float *volume,
+                                    const sampler_t sampler,
+                                    const int3 real_size,
+                                    const float2 x_center,
+                                    const float y_center,
+                                    const float2 x_region,
+                                    const float2 y_region,
+                                    const float2 z_region,
+                                    const float2 lamino_region,
+                                    const float2 roll_region,
+                                    float sin_lamino,
+                                    float cos_lamino,
+                                    const float16 sines,
+                                    const float16 cosines,
+                                    const float norm_factor,
+                                    float sin_roll,
+                                    float cos_roll,
+                                    const int cumulate)
+{
+    int idx = get_global_id (0);
+    int idy = get_global_id (1);
+    int idz = get_global_id (2);
+    float result, tmp, tmp_x, tmp_y;
+    float2 pixel;
+    float3 voxel;
+
+    if (idx < real_size.x && idy < real_size.y && idz < real_size.z) {
+        voxel.x = mad((float) idx, x_region.y, x_region.x);
+        voxel.y = mad((float) idy, y_region.y, y_region.x);
+        sin_roll = sincos (mad((float) idz, -roll_region.y, -roll_region.x), &cos_roll);
+        tmp = mad(z_region.x, sin_lamino, y_center);
+        tmp_x = voxel.x * cos_lamino;
+        tmp_y = -voxel.y * cos_lamino;
+
+        pixel.x = mad(voxel.x, cosines.s0, mad(voxel.y, sines.s0, x_center.x));
+        pixel.y = mad(tmp_x, sines.s0, mad(tmp_y, cosines.s0, tmp));
+        rotate ();
+        result = read_imagef (projection_0, sampler, pixel).x;
+        pixel.x = mad(voxel.x, cosines.s1, mad(voxel.y, sines.s1, x_center.x));
+        pixel.y = mad(tmp_x, sines.s1, mad(tmp_y, cosines.s1, tmp));
+        rotate ();
+        result += read_imagef (projection_1, sampler, pixel).x;
+        pixel.x = mad(voxel.x, cosines.s2, mad(voxel.y, sines.s2, x_center.x));
+        pixel.y = mad(tmp_x, sines.s2, mad(tmp_y, cosines.s2, tmp));
+        rotate ();
+        result += read_imagef (projection_2, sampler, pixel).x;
+        pixel.x = mad(voxel.x, cosines.s3, mad(voxel.y, sines.s3, x_center.x));
+        pixel.y = mad(tmp_x, sines.s3, mad(tmp_y, cosines.s3, tmp));
+        rotate ();
+        result += read_imagef (projection_3, sampler, pixel).x;
+        pixel.x = mad(voxel.x, cosines.s4, mad(voxel.y, sines.s4, x_center.x));
+        pixel.y = mad(tmp_x, sines.s4, mad(tmp_y, cosines.s4, tmp));
+        rotate ();
+        result += read_imagef (projection_4, sampler, pixel).x;
+        pixel.x = mad(voxel.x, cosines.s5, mad(voxel.y, sines.s5, x_center.x));
+        pixel.y = mad(tmp_x, sines.s5, mad(tmp_y, cosines.s5, tmp));
+        rotate ();
+        result += read_imagef (projection_5, sampler, pixel).x;
+        pixel.x = mad(voxel.x, cosines.s6, mad(voxel.y, sines.s6, x_center.x));
+        pixel.y = mad(tmp_x, sines.s6, mad(tmp_y, cosines.s6, tmp));
+        rotate ();
+        result += read_imagef (projection_6, sampler, pixel).x;
+        pixel.x = mad(voxel.x, cosines.s7, mad(voxel.y, sines.s7, x_center.x));
+        pixel.y = mad(tmp_x, sines.s7, mad(tmp_y, cosines.s7, tmp));
+        rotate ();
+        result += read_imagef (projection_7, sampler, pixel).x;
+        pixel.x = mad(voxel.x, cosines.s8, mad(voxel.y, sines.s8, x_center.x));
+        pixel.y = mad(tmp_x, sines.s8, mad(tmp_y, cosines.s8, tmp));
+        rotate ();
+        result += read_imagef (projection_8, sampler, pixel).x;
+        pixel.x = mad(voxel.x, cosines.s9, mad(voxel.y, sines.s9, x_center.x));
+        pixel.y = mad(tmp_x, sines.s9, mad(tmp_y, cosines.s9, tmp));
+        rotate ();
+        result += read_imagef (projection_9, sampler, pixel).x;
+        pixel.x = mad(voxel.x, cosines.sa, mad(voxel.y, sines.sa, x_center.x));
+        pixel.y = mad(tmp_x, sines.sa, mad(tmp_y, cosines.sa, tmp));
+        rotate ();
+        result += read_imagef (projection_10, sampler, pixel).x;
+        pixel.x = mad(voxel.x, cosines.sb, mad(voxel.y, sines.sb, x_center.x));
+        pixel.y = mad(tmp_x, sines.sb, mad(tmp_y, cosines.sb, tmp));
+        rotate ();
+        result += read_imagef (projection_11, sampler, pixel).x;
+        pixel.x = mad(voxel.x, cosines.sc, mad(voxel.y, sines.sc, x_center.x));
+        pixel.y = mad(tmp_x, sines.sc, mad(tmp_y, cosines.sc, tmp));
+        rotate ();
+        result += read_imagef (projection_12, sampler, pixel).x;
+        pixel.x = mad(voxel.x, cosines.sd, mad(voxel.y, sines.sd, x_center.x));
+        pixel.y = mad(tmp_x, sines.sd, mad(tmp_y, cosines.sd, tmp));
+        rotate ();
+        result += read_imagef (projection_13, sampler, pixel).x;
+        pixel.x = mad(voxel.x, cosines.se, mad(voxel.y, sines.se, x_center.x));
+        pixel.y = mad(tmp_x, sines.se, mad(tmp_y, cosines.se, tmp));
+        rotate ();
+        result += read_imagef (projection_14, sampler, pixel).x;
+        pixel.x = mad(voxel.x, cosines.sf, mad(voxel.y, sines.sf, x_center.x));
+        pixel.y = mad(tmp_x, sines.sf, mad(tmp_y, cosines.sf, tmp));
+        rotate ();
+        result += read_imagef (projection_15, sampler, pixel).x;
+
+
+        if (cumulate) {
+            volume[idz * real_size.x * real_size.y + idy * real_size.x + idx] += result * norm_factor;
+        } else {
+            volume[idz * real_size.x * real_size.y + idy * real_size.x + idx] = result * norm_factor;
+        }
+    }
+}
+