/* * Copyright (C) 2011-2013 Karlsruhe Institute of Technology * * This file is part of Ufo. * * This library is free software: you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation, either * version 3 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . */ constant sampler_t volumeSampler_single = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_LINEAR; constant sampler_t volumeSampler_half = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_LINEAR; constant sampler_t volumeSampler_int8 = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST; kernel void interleave_single ( global float *sinogram, write_only image2d_array_t interleaved_sinograms) { const int idx = get_global_id(0); const int idy = get_global_id(1); const int idz = get_global_id(2); const int sizex = get_global_size(0); const int sizey = get_global_size(1); int sinogram_offset = idz*2; float x = sinogram[idx + idy * sizex + (sinogram_offset) * sizex * sizey]; float y = sinogram[idx + idy * sizex + (sinogram_offset+1) * sizex * sizey]; write_imagef(interleaved_sinograms, (int4)(idx, idy, idz, 0),(float4)(x,y,0.0f,0.0f)); } /*kernel void texture_single (read_only image2d_array_t sinogram, global float2 *reconstructed_buffer, constant float *sin_lut, constant float *cos_lut, const unsigned int x_offset, const unsigned int y_offset, const unsigned int angle_offset, const unsigned int n_projections, const float axis_pos, unsigned long size) { const int idx = get_global_id(0); const int idy = get_global_id(1); const int idz = get_global_id(2); const float bx = idx - axis_pos + x_offset + 0.5f; const float by = idy - axis_pos + y_offset + 0.5f; float2 sum = {0.0f, 0.0f}; for(int proj = 0; proj < n_projections; proj++) { float h = -by * sin_lut[angle_offset + proj] + bx * cos_lut[angle_offset + proj] + axis_pos; sum += read_imagef (sinogram, volumeSampler_single, (float4)(h, proj + 0.5f,idz,0.0f)).xy; } reconstructed_buffer[idx + idy*size + idz*size*size] = sum; }*/ kernel void texture_single ( read_only image2d_array_t sinogram, global float2 *reconstructed_buffer, constant float *sin_lut, constant float *cos_lut, const unsigned int x_offset, const unsigned int y_offset, const unsigned int angle_offset, const unsigned int n_projections, const float axis_pos, unsigned long size){ const int local_idx = get_local_id(0); const int local_idy = get_local_id(1); const int global_idx = get_global_id(0); const int global_idy = get_global_id(1); const int idz = get_global_id(2); int local_sizex = get_local_size(0); int local_sizey = get_local_size(1); int global_sizex = get_global_size(0); int global_sizey = get_global_size(1); // Computing sequential numbers of 4x4 square, quadrant, and pixel within quadrant int square = local_idy%4; int quadrant = local_idx/4; int pixel = local_idx%4; // Computing projection and pixel offsets int projection_index = local_idy/4; int2 remapped_index_local = {(4*square + 2*(quadrant%2) + (pixel%2)), (2* (quadrant/2) + (pixel/2))}; int2 remapped_index_global = {(get_group_id(0)*get_local_size(0)+remapped_index_local.x), (get_group_id(1)*get_local_size(1)+remapped_index_local.y)}; float2 pixel_coord = {(remapped_index_global.x - axis_pos + x_offset + 0.5f), (remapped_index_global.y - axis_pos + y_offset+0.5f)}; //bx and by float2 sum[4] = {0.0f,0.0f}; __local float2 shared_mem[64][4]; __local float2 reconstructed_cache[16][16]; /*#ifdef DEVICE_TESLA_K20XM #pragma unroll 4 #endif #ifdef DEVICE_TESLA_P100_PCIE_16GB #pragma unroll 2 #endif #ifdef DEVICE_GEFORCE_GTX_TITAN_BLACK #pragma unroll 8 #endif #ifdef DEVICE_GEFORCE_GTX_TITAN #pragma unroll 14 #endif #ifdef DEVICE_GEFORCE_GTX_1080_TI #pragma unroll 10 #endif #ifdef DEVICE_QUADRO_M6000 #pragma unroll 2 #endif #ifdef DEVICE_GFX1010 #pragma unroll 4 #endif*/ for(int proj = projection_index; proj < n_projections; proj+=4) { float sine_value = sin_lut[angle_offset + proj]; float h = pixel_coord.x * cos_lut[angle_offset + proj] - pixel_coord.y * sin_lut[angle_offset + proj] + axis_pos; for(int q=0; q<4; q+=1){ sum[q] += read_imagef(sinogram, volumeSampler_single, (float4)(h-4*q*sine_value, proj + 0.5f,idz, 0.0)).xy; } } int2 remapped_index = {(local_idx%4), (4*local_idy + (local_idx/4))}; for(int q=0; q<4;q+=1){ // Moving partial sums to shared memory shared_mem[(local_sizex*remapped_index_local.y + remapped_index_local.x)][projection_index] = sum[q]; barrier(CLK_LOCAL_MEM_FENCE); // syncthreads for(int i=2; i>=1; i/=2){ if(remapped_index.x =1; i/=2){ if(remapped_index.x =1; i/=2){ if(remapped_index.x