summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatthias Vogelgesang <matthias.vogelgesang@gmail.com>2020-05-23 20:04:38 +0200
committerMatthias Vogelgesang <matthias.vogelgesang@gmail.com>2020-05-23 20:04:38 +0200
commitdb3cd886b8303a8a5aff20c76660ed49c2365f94 (patch)
tree21b77da289051fbb7c5fd65591b0af1c4b797b24
parent4442f11d1fd509cea68c8d794fe48b884ef7caaa (diff)
downloadufo-filters-db3cd886b8303a8a5aff20c76660ed49c2365f94.tar.gz
ufo-filters-db3cd886b8303a8a5aff20c76660ed49c2365f94.tar.bz2
ufo-filters-db3cd886b8303a8a5aff20c76660ed49c2365f94.tar.xz
ufo-filters-db3cd886b8303a8a5aff20c76660ed49c2365f94.zip
backproject: unroll for navi10
With that an RX 5600 XT achieves ~22 slices/s according to the benchmark protocol from https://tomopedia.github.io/software/ufo.
-rw-r--r--src/kernels/backproject.cl3
1 files changed, 3 insertions, 0 deletions
diff --git a/src/kernels/backproject.cl b/src/kernels/backproject.cl
index 4ee84de..8a81790 100644
--- a/src/kernels/backproject.cl
+++ b/src/kernels/backproject.cl
@@ -82,6 +82,9 @@ backproject_tex (read_only image2d_t sinogram,
#ifdef DEVICE_QUADRO_M6000
#pragma unroll 2
#endif
+#ifdef DEVICE_GFX1010
+#pragma unroll 4
+#endif
for(int proj = 0; proj < n_projections; proj++) {
float h = by * sin_lut[angle_offset + proj] + bx * cos_lut[angle_offset + proj] + axis_pos;
sum += read_imagef (sinogram, volumeSampler, (float2)(h, proj + 0.5f)).x;