summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTomas Farago <sensej007@email.cz>2019-08-05 08:58:37 +0200
committerTomas Farago <sensej007@email.cz>2020-02-05 10:16:26 +0100
commit1028504583fe0ba72ecce004610e50a6eca42474 (patch)
treefc819d83facf1730df2292d039d36e888b2b6b84
parent861c41d886ccb4feec36a9387b05e2f5630fba33 (diff)
downloadufo-filters-1028504583fe0ba72ecce004610e50a6eca42474.tar.gz
ufo-filters-1028504583fe0ba72ecce004610e50a6eca42474.tar.bz2
ufo-filters-1028504583fe0ba72ecce004610e50a6eca42474.tar.xz
ufo-filters-1028504583fe0ba72ecce004610e50a6eca42474.zip
NLM: Put common things to src/common
-rw-r--r--src/CMakeLists.txt4
-rw-r--r--src/common/ufo-common.c104
-rw-r--r--src/common/ufo-common.h41
-rw-r--r--src/common/ufo-math.c9
-rw-r--r--src/common/ufo-math.h3
-rw-r--r--src/meson.build15
-rw-r--r--src/ufo-non-local-means-task.c105
7 files changed, 191 insertions, 90 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index d4bc70c..6f69f19 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -127,6 +127,10 @@ set(general_backproject_aux_SRCS
common/ufo-scarray.c
common/ufo-ctgeometry.c)
+set(non_local_means_aux_SRCS
+ common/ufo-math.c
+ common/ufo-common.c)
+
file(GLOB ufofilter_KERNELS "kernels/*.cl")
#}}}
#{{{ Variables
diff --git a/src/common/ufo-common.c b/src/common/ufo-common.c
new file mode 100644
index 0000000..f157cf4
--- /dev/null
+++ b/src/common/ufo-common.c
@@ -0,0 +1,104 @@
+/*
+ * Copyright (C) 2015-2019 Karlsruhe Institute of Technology
+ *
+ * This file is part of Ufo.
+ *
+ * This library is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation, either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <math.h>
+#include <glib.h>
+#include "ufo-math.h"
+#include "ufo-common.h"
+
+gfloat
+ufo_common_estimate_sigma (cl_kernel convolution_kernel,
+ cl_kernel sum_kernel,
+ cl_command_queue cmd_queue,
+ cl_sampler sampler,
+ UfoProfiler *profiler,
+ cl_mem input_image,
+ cl_mem out_mem,
+ const gsize max_work_group_size,
+ const gsize *global_size)
+{
+ gsize n = global_size[0] * global_size[1];
+ gsize local_size, num_groups, global_size_1D;
+ gint num_group_iterations;
+ gfloat *result, sum = 0.0f;
+ cl_int err;
+ cl_mem group_sums;
+ cl_context context;
+
+ clGetCommandQueueInfo (cmd_queue, CL_QUEUE_CONTEXT, sizeof (cl_context), &context, NULL);
+
+ /* First compute the convolution of the input with the difference of
+ * laplacians.
+ */
+ UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (convolution_kernel, 0, sizeof (cl_mem), &input_image));
+ UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (convolution_kernel, 1, sizeof (cl_sampler), &sampler));
+ UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (convolution_kernel, 2, sizeof (cl_mem), &out_mem));
+ ufo_profiler_call (profiler, cmd_queue, convolution_kernel, 2, global_size, NULL);
+
+ /* Now compute partial sums of the convolved image. */
+ /* Compute global and local dimensions for the cumsum kernel */
+ /* Make sure local_size is a power of 2 */
+ local_size = ufo_math_compute_closest_smaller_power_of_2 (max_work_group_size);
+ /* Number of iterations of every group is given by the number of pixels
+ * divided by the number of pixels *num_groups* can process. */
+ num_groups = MIN (local_size, UFO_MATH_NUM_CHUNKS (n, local_size));
+ num_group_iterations = UFO_MATH_NUM_CHUNKS (n, local_size * num_groups);
+ /* The real number of groups is given by the number of pixels
+ * divided by the group size and the number of group iterations. */
+ num_groups = UFO_MATH_NUM_CHUNKS (n, num_group_iterations * local_size);
+ global_size_1D = num_groups * local_size;
+
+ g_debug (" n: %lu", n);
+ g_debug (" num groups: %lu", num_groups);
+ g_debug (" group iterations: %d", num_group_iterations);
+ g_debug ("kernel global size: %lu", global_size_1D);
+ g_debug (" kernel local size: %lu", local_size);
+
+ result = g_malloc0 (sizeof (cl_float) * num_groups);
+ group_sums = clCreateBuffer (context,
+ CL_MEM_READ_WRITE,
+ sizeof (cl_float) * num_groups,
+ NULL,
+ &err);
+ UFO_RESOURCES_CHECK_CLERR (err);
+
+ UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (sum_kernel, 0, sizeof (cl_mem), &out_mem));
+ UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (sum_kernel, 1, sizeof (cl_mem), &group_sums));
+ UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (sum_kernel, 2, sizeof (cl_mem), &out_mem));
+ UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (sum_kernel, 3, sizeof (cl_float) * local_size, NULL));
+ UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (sum_kernel, 4, sizeof (gsize), &n));
+ UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (sum_kernel, 5, sizeof (gint), &num_group_iterations));
+ ufo_profiler_call (profiler, cmd_queue, sum_kernel, 1, &global_size_1D, &local_size);
+
+ clEnqueueReadBuffer (cmd_queue,
+ group_sums,
+ CL_TRUE,
+ 0, sizeof (cl_float) * num_groups,
+ result,
+ 0, NULL, NULL);
+ UFO_RESOURCES_CHECK_CLERR (clReleaseMemObject (group_sums));
+
+ /* Sum partial sums computed by the groups. */
+ for (gsize i = 0; i < num_groups; i++) {
+ sum += result[i];
+ }
+ g_free (result);
+
+ return sqrt (G_PI_2) / (6 * (global_size[0] - 2.0f) * (global_size[1] - 2.0f)) * sum;
+}
diff --git a/src/common/ufo-common.h b/src/common/ufo-common.h
new file mode 100644
index 0000000..f6e2349
--- /dev/null
+++ b/src/common/ufo-common.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2015-2019 Karlsruhe Institute of Technology
+ *
+ * This file is part of Ufo.
+ *
+ * This library is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation, either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef UFO_COMMON_H
+#define UFO_COMMON_H
+
+#ifdef __APPLE__
+#include <OpenCL/cl.h>
+#else
+#include <CL/cl.h>
+#endif
+
+#include <ufo/ufo.h>
+
+gfloat ufo_common_estimate_sigma (cl_kernel convolution_kernel,
+ cl_kernel sum_kernel,
+ cl_command_queue cmd_queue,
+ cl_sampler sampler,
+ UfoProfiler *profiler,
+ cl_mem input_image,
+ cl_mem out_mem,
+ const gsize max_work_group_size,
+ const gsize *global_size);
+
+#endif
diff --git a/src/common/ufo-math.c b/src/common/ufo-math.c
index 78b7dda..725b7e6 100644
--- a/src/common/ufo-math.c
+++ b/src/common/ufo-math.c
@@ -140,3 +140,12 @@ ufo_array_minimum (gdouble *array, gint num_values)
{
return find_extremum (array, num_values, 1);
}
+
+gsize
+ufo_math_compute_closest_smaller_power_of_2 (gsize value)
+{
+ gdouble integer;
+ modf (log2 (value), &integer);
+
+ return (gsize) pow (2, integer);
+}
diff --git a/src/common/ufo-math.h b/src/common/ufo-math.h
index 70084ce..8b283e1 100644
--- a/src/common/ufo-math.h
+++ b/src/common/ufo-math.h
@@ -24,6 +24,8 @@
#define UFO_MATH_EPSILON 1e-7
#define UFO_MATH_ARE_ALMOST_EQUAL(a, b) (ABS ((a) - (b)) < UFO_MATH_EPSILON)
+#define UFO_MATH_NUM_CHUNKS(n, k) (((n) - 1) / (k) + 1)
+
typedef struct {
gdouble x, y, z;
@@ -54,5 +56,6 @@ gdouble ufo_array_minimum (gdouble *array,
gdouble ufo_clip_value (gdouble value,
gdouble minimum,
gdouble maximum);
+gsize ufo_math_compute_closest_smaller_power_of_2 (gsize value);
#endif
diff --git a/src/meson.build b/src/meson.build
index eb981b2..4cd4bdc 100644
--- a/src/meson.build
+++ b/src/meson.build
@@ -48,7 +48,6 @@ plugins = [
'merge',
'metaballs',
'monitor',
- 'non-local-means',
'null',
'opencl',
'opencl-reduce',
@@ -171,6 +170,20 @@ shared_module('conebeamprojectionweight',
install_dir: plugin_install_dir,
)
+# non local means
+
+shared_module('nonlocalmeans',
+ sources: [
+ 'ufo-non-local-means-task.c',
+ 'common/ufo-math.c',
+ 'common/ufo-common.c',
+ ],
+ dependencies: deps,
+ name_prefix: 'libufofilter',
+ install: true,
+ install_dir: plugin_install_dir,
+)
+
# fft plugins
have_clfft = clfft_dep.found()
diff --git a/src/ufo-non-local-means-task.c b/src/ufo-non-local-means-task.c
index 3a9b1a7..a0eff2c 100644
--- a/src/ufo-non-local-means-task.c
+++ b/src/ufo-non-local-means-task.c
@@ -25,10 +25,11 @@
#include <math.h>
#include "ufo-non-local-means-task.h"
+#include "common/ufo-math.h"
#include "common/ufo-addressing.h"
+#include "common/ufo-common.h"
#define PIXELS_PER_THREAD 4
-#define NUM_CHUNKS(n, k) (((n) - 1) / (k) + 1)
struct _UfoNonLocalMeansTaskPrivate {
guint search_radius;
@@ -71,15 +72,6 @@ enum {
static GParamSpec *properties[N_PROPERTIES] = { NULL, };
-static gsize
-compute_closest_smaller_power_of_2 (gsize value)
-{
- gdouble integer;
- modf (log2 (value), &integer);
-
- return (gsize) pow (2, integer);
-}
-
static gint
compute_cumsum_local_width (UfoNonLocalMeansTaskPrivate *priv)
{
@@ -88,7 +80,7 @@ compute_cumsum_local_width (UfoNonLocalMeansTaskPrivate *priv)
/* Compute global and local dimensions for the cumsum kernel */
/* First make sure local_width is a power of 2 */
- local_width = (gint) compute_closest_smaller_power_of_2 (priv->max_work_group_size);
+ local_width = (gint) ufo_math_compute_closest_smaller_power_of_2 (priv->max_work_group_size);
if (local_width > 4) {
/* Empirically determined value on NVIDIA cards */
local_width /= 4;
@@ -253,19 +245,19 @@ compute_cumsum (UfoNonLocalMeansTaskPrivate *priv,
* This is not be the final number of groups, it's just used to compute the
* number of iterations of every group.
*/
- num_groups = MIN (local_width, NUM_CHUNKS (width, local_width));
+ num_groups = MIN (local_width, UFO_MATH_NUM_CHUNKS (width, local_width));
/* Number of iterations of every group is given by the number of pixels
* divided by the number of pixels *num_groups* can process. */
- num_group_iterations = NUM_CHUNKS (width, local_width * num_groups);
+ num_group_iterations = UFO_MATH_NUM_CHUNKS (width, local_width * num_groups);
/* Finally, the real number of groups is given by the number of pixels
* divided by the group size and the number of group iterations. */
- num_groups = NUM_CHUNKS (width, num_group_iterations * local_width);
+ num_groups = UFO_MATH_NUM_CHUNKS (width, num_group_iterations * local_width);
/* Cache size must be larger by *local_size* / 16 because of the bank
* conflicts avoidance. Additionally, +1 is needed because of the shifted
* access to the local memory.
*/
- cache_size = sizeof (cl_float) * (local_width + NUM_CHUNKS (local_width, 16) + 1);
+ cache_size = sizeof (cl_float) * (local_width + UFO_MATH_NUM_CHUNKS (local_width, 16) + 1);
cumsum_global_size[0] = num_groups * local_width / 2;
cumsum_global_size[1] = height;
block_sums_global_size[0] = local_width / 2;
@@ -559,79 +551,6 @@ ufo_non_local_means_task_get_mode (UfoTask *task)
return UFO_TASK_MODE_PROCESSOR | UFO_TASK_MODE_GPU;
}
-static gfloat
-compute_sigma (UfoNonLocalMeansTaskPrivate *priv,
- cl_command_queue cmd_queue,
- UfoProfiler *profiler,
- cl_mem input_image,
- cl_mem out_mem)
-{
- gsize n = priv->cropped_size[0] * priv->cropped_size[1];
- gsize local_size, num_groups, num_group_iterations, global_size;
- gfloat *result, sum = 0.0f;
- cl_int err;
- cl_mem group_sums;
-
- /* First compute the convolution of the input with the difference of
- * laplacians.
- */
- UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->convolution_kernel, 0, sizeof (cl_mem), &input_image));
- UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->convolution_kernel, 1, sizeof (cl_sampler), &priv->sampler));
- UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->convolution_kernel, 2, sizeof (cl_mem), &out_mem));
- ufo_profiler_call (profiler, cmd_queue, priv->convolution_kernel, 2, priv->cropped_size, NULL);
-
- /* Now compute partial sums of the convolved image. */
- /* Compute global and local dimensions for the cumsum kernel */
- /* Make sure local_size is a power of 2 */
- local_size = compute_closest_smaller_power_of_2 (priv->max_work_group_size);
- /* Number of iterations of every group is given by the number of pixels
- * divided by the number of pixels *num_groups* can process. */
- num_groups = MIN (local_size, NUM_CHUNKS (n, local_size));
- num_group_iterations = NUM_CHUNKS (n, local_size * num_groups);
- /* The real number of groups is given by the number of pixels
- * divided by the group size and the number of group iterations. */
- num_groups = NUM_CHUNKS (n, num_group_iterations * local_size);
- global_size = num_groups * local_size;
-
- g_debug (" n: %lu", n);
- g_debug (" num groups: %lu", num_groups);
- g_debug (" group iterations: %lu", num_group_iterations);
- g_debug ("kernel global size: %lu", global_size);
- g_debug (" kernel local size: %lu", local_size);
-
- result = g_malloc0 (sizeof (cl_float) * num_groups);
- group_sums = clCreateBuffer (priv->context,
- CL_MEM_READ_WRITE,
- sizeof (cl_float) * num_groups,
- NULL,
- &err);
- UFO_RESOURCES_CHECK_CLERR (err);
-
- UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->sum_kernel, 0, sizeof (cl_mem), &out_mem));
- UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->sum_kernel, 1, sizeof (cl_mem), &group_sums));
- UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->sum_kernel, 2, sizeof (cl_mem), &out_mem));
- UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->sum_kernel, 3, sizeof (cl_float) * local_size, NULL));
- UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->sum_kernel, 4, sizeof (gsize), &n));
- UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->sum_kernel, 5, sizeof (gint), &num_group_iterations));
- ufo_profiler_call (profiler, cmd_queue, priv->sum_kernel, 1, &global_size, &local_size);
-
- clEnqueueReadBuffer (cmd_queue,
- group_sums,
- CL_TRUE,
- 0, sizeof (cl_float) * num_groups,
- result,
- 0, NULL, NULL);
- UFO_RESOURCES_CHECK_CLERR (clReleaseMemObject (group_sums));
-
- /* Sum partial sums computed by the groups. */
- for (gsize i = 0; i < num_groups; i++) {
- sum += result[i];
- }
- g_free (result);
-
- return sqrt (G_PI_2) / (6 * (priv->cropped_size[0] - 2.0f) * (priv->cropped_size[1] - 2.0f)) * sum;
-}
-
static gboolean
ufo_non_local_means_task_process (UfoTask *task,
UfoBuffer **inputs,
@@ -665,7 +584,15 @@ ufo_non_local_means_task_process (UfoTask *task,
/* Use out_mem for the convolution, it's not necessary after the
* computation and can be re-used by the de-noising itself.
*/
- estimated_sigma = compute_sigma (priv, cmd_queue, profiler, in_mem, out_mem);
+ estimated_sigma = ufo_common_estimate_sigma (priv->convolution_kernel,
+ priv->sum_kernel,
+ cmd_queue,
+ priv->sampler,
+ profiler,
+ in_mem,
+ out_mem,
+ priv->max_work_group_size,
+ priv->cropped_size);
g_debug ("Estimated sigma: %g", estimated_sigma);
if (priv->h <= 0.0f) {
priv->h = estimated_sigma;