diff options
author | Matthias Vogelgesang <matthias.vogelgesang@gmail.com> | 2020-05-23 20:04:38 +0200 |
---|---|---|
committer | Matthias Vogelgesang <matthias.vogelgesang@gmail.com> | 2020-05-23 20:04:38 +0200 |
commit | db3cd886b8303a8a5aff20c76660ed49c2365f94 (patch) | |
tree | 21b77da289051fbb7c5fd65591b0af1c4b797b24 | |
parent | 4442f11d1fd509cea68c8d794fe48b884ef7caaa (diff) | |
download | ufo-filters-db3cd886b8303a8a5aff20c76660ed49c2365f94.tar.gz ufo-filters-db3cd886b8303a8a5aff20c76660ed49c2365f94.tar.bz2 ufo-filters-db3cd886b8303a8a5aff20c76660ed49c2365f94.tar.xz ufo-filters-db3cd886b8303a8a5aff20c76660ed49c2365f94.zip |
backproject: unroll for navi10
With that an RX 5600 XT achieves ~22 slices/s according to the benchmark
protocol from https://tomopedia.github.io/software/ufo.
-rw-r--r-- | src/kernels/backproject.cl | 3 |
1 files changed, 3 insertions, 0 deletions
diff --git a/src/kernels/backproject.cl b/src/kernels/backproject.cl index 4ee84de..8a81790 100644 --- a/src/kernels/backproject.cl +++ b/src/kernels/backproject.cl @@ -82,6 +82,9 @@ backproject_tex (read_only image2d_t sinogram, #ifdef DEVICE_QUADRO_M6000 #pragma unroll 2 #endif +#ifdef DEVICE_GFX1010 +#pragma unroll 4 +#endif for(int proj = 0; proj < n_projections; proj++) { float h = by * sin_lut[angle_offset + proj] + bx * cos_lut[angle_offset + proj] + axis_pos; sum += read_imagef (sinogram, volumeSampler, (float2)(h, proj + 0.5f)).x; |