summaryrefslogtreecommitdiffstats
path: root/src/kernels/stacked-forwardproject.cl
diff options
context:
space:
mode:
Diffstat (limited to 'src/kernels/stacked-forwardproject.cl')
-rw-r--r--src/kernels/stacked-forwardproject.cl274
1 files changed, 274 insertions, 0 deletions
diff --git a/src/kernels/stacked-forwardproject.cl b/src/kernels/stacked-forwardproject.cl
new file mode 100644
index 0000000..c37eb53
--- /dev/null
+++ b/src/kernels/stacked-forwardproject.cl
@@ -0,0 +1,274 @@
+/*
+ * Copyright (C) 2011-2013 Karlsruhe Institute of Technology
+ *
+ * This file is part of Ufo.
+ *
+ * This library is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation, either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+ constant sampler_t volumeSampler = CLK_NORMALIZED_COORDS_FALSE |
+ CLK_ADDRESS_CLAMP |
+ CLK_FILTER_NEAREST;
+
+ kernel void
+ interleave_single ( global float *slices,
+ write_only image2d_array_t interleaved_slices)
+ {
+ const int idx = get_global_id(0);
+ const int idy = get_global_id(1);
+ const int idz = get_global_id(2);
+ const int sizex = get_global_size(0);
+ const int sizey = get_global_size(1);
+
+ int slice_offset = idz*2;
+
+ float x = slices[idx + idy * sizex + (slice_offset) * sizex * sizey];
+ float y = slices[idx + idy * sizex + (slice_offset+1) * sizex * sizey];
+
+ write_imagef(interleaved_slices, (int4)(idx, idy, idz, 0),(float4)(x,y,0.0f,0.0f));
+ }
+
+ kernel void
+ texture_single (
+ read_only image2d_array_t slices,
+ global float2 *reconstructed_buffer,
+ float axis_pos,
+ float angle_step,
+ unsigned long size){
+
+ const int idx = get_global_id(0);
+ const int idy = get_global_id(1);
+ const int idz = get_global_id(2);
+
+ float angle = idy * angle_step;
+ float r = fmin (axis_pos, size - axis_pos);
+ float d = idx - axis_pos + 0.5f;
+ float l = sqrt(8.0f*r*r - 4.0f*d*d);
+
+ float2 D = (float2) (cos(angle), -sin(angle));
+
+ D = normalize(D);
+
+ float2 N = (float2) (D.y, -D.x);
+
+ float2 sample = d * D - l/2.0f * N + ((float2) (axis_pos, axis_pos));
+
+ float2 sum = {0.0f,0.0f};
+
+ for (int i = 0; i < l; i++) {
+ sum += read_imagef(slices, volumeSampler, (float4)((float2)sample,idz,0.0f)).xy;
+ sample += N;
+ }
+
+ reconstructed_buffer[idx + idy*size + idz*size*size] = sum;
+ }
+
+
+ kernel void
+ uninterleave_single (global float2 *reconstructed_buffer,
+ global float *output)
+ {
+ const int idx = get_global_id(0);
+ const int idy = get_global_id(1);
+ const int idz = get_global_id(2);
+ const int sizex = get_global_size(0);
+ const int sizey = get_global_size(1);
+ int output_offset = idz*2;
+
+ output[idx + idy*sizex + (output_offset)*sizex*sizey] = reconstructed_buffer[idx + idy*sizex + idz*sizex*sizey].x;
+ output[idx + idy*sizex + (output_offset+1)*sizex*sizey] = reconstructed_buffer[idx + idy*sizex + idz*sizex*sizey].y;
+ }
+
+ kernel void
+ interleave_half ( global float *slices,
+ write_only image2d_array_t interleaved_slices)
+ {
+ const int idx = get_global_id(0);
+ const int idy = get_global_id(1);
+ const int idz = get_global_id(2);
+
+ const int sizex = get_global_size(0);
+ const int sizey = get_global_size(1);
+
+ int slice_offset = idz*4;
+
+ float4 b = {slices[idx + idy * sizex + (slice_offset) * sizex * sizey],
+ slices[idx + idy * sizex + (slice_offset+1) * sizex * sizey],
+ slices[idx + idy * sizex + (slice_offset+2) * sizex * sizey],
+ slices[idx + idy * sizex + (slice_offset+3) * sizex * sizey]};
+
+ write_imagef(interleaved_slices, (int4)(idx, idy, idz, 0),(float4)(b));
+ }
+
+kernel void
+texture_half (
+ read_only image2d_array_t slices,
+ global float4 *reconstructed_buffer,
+ float axis_pos,
+ float angle_step,
+ unsigned long size){
+
+ const int idx = get_global_id(0);
+ const int idy = get_global_id(1);
+ const int idz = get_global_id(2);
+
+ float angle = idy * angle_step;
+ float r = fmin (axis_pos, size - axis_pos);
+ float d = idx - axis_pos + 0.5f;
+ float l = sqrt(8.0f*r*r - 4.0f*d*d);
+
+ float2 D = (float2) (cos(angle), -sin(angle));
+
+ D = normalize(D);
+
+ float2 N = (float2) (D.y, -D.x);
+
+ float2 sample = d * D - l/2.0f * N + ((float2) (axis_pos, axis_pos));
+
+ float4 sum = {0.0f,0.0f,0.0f,0.0f};
+
+ for (int i = 0; i < l; i++) {
+ sum += read_imagef(slices, volumeSampler, (float4)((float2)sample,idz,0.0f));
+ sample += N;
+ }
+
+ reconstructed_buffer[idx + idy*size + idz*size*size] = sum;
+}
+
+ kernel void
+ uninterleave_half (global float4 *reconstructed_buffer,
+ global float *output)
+ {
+ const int idx = get_global_id(0);
+ const int idy = get_global_id(1);
+ const int idz = get_global_id(2);
+
+ const int sizex = get_global_size(0);
+ const int sizey = get_global_size(1);
+
+ int output_offset = idz*4;
+
+ output[idx + idy*sizex + (output_offset)*sizex*sizey] = reconstructed_buffer[idx + idy*sizex + idz*sizex*sizey].x;
+ output[idx + idy*sizex + (output_offset+1)*sizex*sizey] = reconstructed_buffer[idx + idy*sizex + idz*sizex*sizey].y;
+ output[idx + idy*sizex + (output_offset+2)*sizex*sizey] = reconstructed_buffer[idx + idy*sizex + idz*sizex*sizey].z;
+ output[idx + idy*sizex + (output_offset+3)*sizex*sizey] = reconstructed_buffer[idx + idy*sizex + idz*sizex*sizey].w;
+ }
+
+ union converter {
+ uint2 storage;
+ uchar8 a;
+ };
+
+ kernel void
+ interleave_uint ( global float *slices,
+ write_only image2d_array_t interleaved_slices,
+ const float min,
+ const float max)
+ {
+ const int idx = get_global_id(0);
+ const int idy = get_global_id(1);
+ const int idz = get_global_id(2);
+
+ const int sizex = get_global_size(0);
+ const int sizey = get_global_size(1);
+
+ int slice_offset = idz*8;
+
+ const float scale = 255.0f / (max - min);
+
+ union converter il;
+ il.a.s0 = (slices[idx + idy * sizex + (slice_offset) * sizex * sizey] - min)*scale;
+ il.a.s1 = (slices[idx + idy * sizex + (slice_offset+1) * sizex * sizey] - min)*scale;
+ il.a.s2 = (slices[idx + idy * sizex + (slice_offset+2) * sizex * sizey] - min)*scale;
+ il.a.s3 = (slices[idx + idy * sizex + (slice_offset+3) * sizex * sizey] - min)*scale;
+ il.a.s4 = (slices[idx + idy * sizex + (slice_offset+4) * sizex * sizey] - min)*scale;
+ il.a.s5 = (slices[idx + idy * sizex + (slice_offset+5) * sizex * sizey] - min)*scale;
+ il.a.s6 = (slices[idx + idy * sizex + (slice_offset+6) * sizex * sizey] - min)*scale;
+ il.a.s7 = (slices[idx + idy * sizex + (slice_offset+7) * sizex * sizey] - min)*scale;
+
+ write_imageui(interleaved_slices, (int4)(idx, idy, idz, 0),(uint4)((uint)il.storage.x,(uint)il.storage.y,0,0));
+ }
+
+kernel void
+texture_uint (
+ read_only image2d_array_t slices,
+ global uint8 *reconstructed_buffer,
+ float axis_pos,
+ float angle_step,
+ unsigned long size){
+
+ const int idx = get_global_id(0);
+ const int idy = get_global_id(1);
+ const int idz = get_global_id(2);
+
+ float angle = idy * angle_step;
+ float r = fmin (axis_pos, size - axis_pos);
+ float d = idx - axis_pos + 0.5f;
+ float l = sqrt(8.0f*r*r - 4.0f*d*d);
+
+ float2 D = (float2) (cos(angle), -sin(angle));
+
+ D = normalize(D);
+
+ float2 N = (float2) (D.y, -D.x);
+
+ float2 sample = d * D - l/2.0f * N + ((float2) (axis_pos, axis_pos));
+
+ uint8 sum = {0,0,0,0,0,0,0,0};
+
+ union converter tex;
+ for (int i = 0; i < l; i++) {
+ tex.storage = read_imageui(slices, volumeSampler, (float4)((float2)sample,idz,0.0f)).xy;
+
+ sum.s0 += (uint)tex.a.s0;
+ sum.s1 += (uint)tex.a.s1;
+ sum.s2 += (uint)tex.a.s2;
+ sum.s3 += (uint)tex.a.s3;
+ sum.s4 += (uint)tex.a.s4;
+ sum.s5 += (uint)tex.a.s5;
+ sum.s6 += (uint)tex.a.s6;
+ sum.s7 += (uint)tex.a.s7;
+
+ sample += N;
+ }
+
+ reconstructed_buffer[idx + idy*size + idz*size*size] = sum;
+}
+
+ kernel void
+ uninterleave_uint (global uint8 *reconstructed_buffer,
+ global float *output,
+ const float min,
+ const float max)
+ {
+ const int idx = get_global_id(0);
+ const int idy = get_global_id(1);
+ const int idz = get_global_id(2);
+
+ const int sizex = get_global_size(0);
+ const int sizey = get_global_size(1);
+
+ int output_offset = idz*8;
+ float scale = (max-min)/255.0f;
+
+ output[idx + idy*sizex + (output_offset)*sizex*sizey] = (reconstructed_buffer[idx + idy*sizex + idz*sizex*sizey].s0)*scale+min;
+ output[idx + idy*sizex + (output_offset+1)*sizex*sizey] = (reconstructed_buffer[idx + idy*sizex + idz*sizex*sizey].s1)*scale+min;
+ output[idx + idy*sizex + (output_offset+2)*sizex*sizey] = (reconstructed_buffer[idx + idy*sizex + idz*sizex*sizey].s2)*scale+min;
+ output[idx + idy*sizex + (output_offset+3)*sizex*sizey] = (reconstructed_buffer[idx + idy*sizex + idz*sizex*sizey].s3)*scale+min;
+ output[idx + idy*sizex + (output_offset+4)*sizex*sizey] = (reconstructed_buffer[idx + idy*sizex + idz*sizex*sizey].s4)*scale+min;
+ output[idx + idy*sizex + (output_offset+5)*sizex*sizey] = (reconstructed_buffer[idx + idy*sizex + idz*sizex*sizey].s5)*scale+min;
+ output[idx + idy*sizex + (output_offset+6)*sizex*sizey] = (reconstructed_buffer[idx + idy*sizex + idz*sizex*sizey].s6)*scale+min;
+ output[idx + idy*sizex + (output_offset+7)*sizex*sizey] = (reconstructed_buffer[idx + idy*sizex + idz*sizex*sizey].s7)*scale+min;
+ }
+