summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTomas Farago <sensej007@email.cz>2019-07-31 17:03:46 +0200
committerTomas Farago <sensej007@email.cz>2020-02-05 10:16:26 +0100
commit2009b00e1d086e6ccef2d3fab84da2eba7b41c60 (patch)
tree06d87da96c97ce11d021803dd26a22b57713aabc
parent692afa64b6e508653d091f641d8a85fc95f7733a (diff)
downloadufo-filters-2009b00e1d086e6ccef2d3fab84da2eba7b41c60.tar.gz
ufo-filters-2009b00e1d086e6ccef2d3fab84da2eba7b41c60.tar.bz2
ufo-filters-2009b00e1d086e6ccef2d3fab84da2eba7b41c60.tar.xz
ufo-filters-2009b00e1d086e6ccef2d3fab84da2eba7b41c60.zip
NLM: Add estimate-sigma
-rw-r--r--docs/filters.rst22
-rw-r--r--src/kernels/estimate-noise.cl42
-rw-r--r--src/kernels/meson.build1
-rw-r--r--src/ufo-non-local-means-task.c160
4 files changed, 202 insertions, 23 deletions
diff --git a/docs/filters.rst b/docs/filters.rst
index 2abe2ea..8afd0cb 100644
--- a/docs/filters.rst
+++ b/docs/filters.rst
@@ -542,16 +542,20 @@ Non-local-means denoising
Smoothing control parameter, should be around noise standard deviation
or slightly less. Higher h results in a smoother image but with blurred
- features.
+ features. If it is 0, estimate noise standard deviation and use it as
+ the parameter value.
.. gobj:prop:: sigma:float
- Noise standard deviation, improves weights computation.
+ Noise standard deviation, improves weights computation. If it is zero,
+ it is *not* automatically estimated as opposed to :gobj:prop:`h`.
+ :gobj:prop:`estimate-sigma` has to be specified in order to override
+ :gobj:prop:`sigma` value.
.. gobj:prop:: window:boolean
Apply Gaussian profile with :math:`\sigma = \frac{P}{2}`, where :math:`P`
- is the gobj:prop:`patch-radius` parameter to the weight computation
+ is the :gobj:prop:`patch-radius` parameter to the weight computation
which decreases the influence of pixels towards the corners of the
patches.
@@ -562,6 +566,13 @@ Non-local-means denoising
no Gaussian profile used and from the nature of the fast algorithm,
floating point precision errors might occur for large images.
+ .. gobj:prop:: estimate-sigma:boolean
+
+ Estimate sigma based on [#]_, which overrides :gobj:prop:`sigma`
+ parameter value. Only the first image in a sequence is used for
+ estimation and the estimated sigma is re-used for every consequent
+ image.
+
.. gobj:prop:: addressing-mode:enum
Addressing mode specifies the behavior for pixels falling outside the
@@ -573,6 +584,11 @@ Non-local-means denoising
International Symposium on Biomedical Imaging: From Nano to Macro,
2008, pp. 1331-1334. DOI:10.1109/ISBI.2008.4541250
+ .. [#]
+ J. Immerkaer, *Fast noise variance estimation* in Computer vision and image
+ understanding 64.2 (1996): 300-302. DOI:10.1006/cviu.1996.0060
+
+
Horizontal interpolation
------------------------
diff --git a/src/kernels/estimate-noise.cl b/src/kernels/estimate-noise.cl
new file mode 100644
index 0000000..31815d3
--- /dev/null
+++ b/src/kernels/estimate-noise.cl
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2011-2019 Karlsruhe Institute of Technology
+ *
+ * This file is part of Ufo.
+ *
+ * This library is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation, either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+kernel void
+convolve_abs_laplacian_diff (read_only image2d_t input,
+ const sampler_t sampler,
+ global float *output)
+{
+ int idx = get_global_id (0);
+ int idy = get_global_id (1);
+ int width = get_global_size (0);
+ int height = get_global_size (1);
+ float sum = 0.0f;
+ float coeffs[3][3] = {{ 1.0f, -2.0f, 1.0f},
+ {-2.0f, 4.0f, -2.0f},
+ { 1.0f, -2.0f, 1.0f}};
+
+ for (int dy = -1; dy < 2; dy++) {
+ for (int dx = -1; dx < 2; dx++) {
+ sum += coeffs[dy + 1][dx + 1] * read_imagef (input, sampler, (float2) ((idx + dx + 0.5f) / width,
+ (idy + dy + 0.5f) / height)).x;
+ }
+ }
+
+ output[idy * width + idx] = fabs (sum);
+}
diff --git a/src/kernels/meson.build b/src/kernels/meson.build
index 03cdac5..4126c27 100644
--- a/src/kernels/meson.build
+++ b/src/kernels/meson.build
@@ -13,6 +13,7 @@ kernel_files = [
'denoise.cl',
'dfi.cl',
'edge.cl',
+ 'estimate-noise.cl',
'ffc.cl',
'fft.cl',
'fftmult.cl',
diff --git a/src/ufo-non-local-means-task.c b/src/ufo-non-local-means-task.c
index 4c14c67..5ca2e2e 100644
--- a/src/ufo-non-local-means-task.c
+++ b/src/ufo-non-local-means-task.c
@@ -39,7 +39,9 @@ struct _UfoNonLocalMeansTaskPrivate {
gfloat sigma;
gboolean use_window;
gboolean fast;
- cl_kernel kernel, mse_kernel, cumsum_kernel, spread_kernel, transpose_kernel, weight_kernel, divide_kernel;
+ gboolean estimate_sigma;
+ cl_kernel kernel, mse_kernel, cumsum_kernel, spread_kernel, transpose_kernel, weight_kernel, divide_kernel,
+ convolution_kernel, sum_kernel;
cl_sampler sampler;
cl_context context;
cl_mem window_mem, group_sums, aux_mem[2], weights_mem;
@@ -62,6 +64,7 @@ enum {
PROP_SIGMA,
PROP_WINDOW,
PROP_FAST,
+ PROP_ESTIMATE_SIGMA,
PROP_ADDRESSING_MODE,
N_PROPERTIES
};
@@ -456,6 +459,9 @@ ufo_non_local_means_task_setup (UfoTask *task,
} else {
priv->kernel = ufo_resources_get_kernel (resources, "nlm.cl", "nlm_noise_reduction", NULL, error);
}
+ priv->convolution_kernel = ufo_resources_get_kernel (resources, "estimate-noise.cl", "convolve_abs_laplacian_diff", NULL, error);
+ priv->sum_kernel = ufo_resources_get_kernel (resources, "reductor.cl", "reduce_M_SUM", NULL, error);
+
priv->window_mem = NULL;
priv->group_sums = NULL;
priv->weights_mem = NULL;
@@ -484,6 +490,12 @@ ufo_non_local_means_task_setup (UfoTask *task,
if (priv->divide_kernel) {
UFO_RESOURCES_CHECK_SET_AND_RETURN (clRetainKernel (priv->divide_kernel), error);
}
+ if (priv->convolution_kernel) {
+ UFO_RESOURCES_CHECK_SET_AND_RETURN (clRetainKernel (priv->convolution_kernel), error);
+ }
+ if (priv->sum_kernel) {
+ UFO_RESOURCES_CHECK_SET_AND_RETURN (clRetainKernel (priv->sum_kernel), error);
+ }
priv->context = ufo_resources_get_context (resources);
UFO_RESOURCES_CHECK_SET_AND_RETURN (clRetainContext (priv->context), error);
@@ -508,27 +520,26 @@ ufo_non_local_means_task_get_requisition (UfoTask *task,
ufo_buffer_get_requisition (inputs[0], requisition);
+ if (priv->max_work_group_size == 0) {
+ node = UFO_GPU_NODE (ufo_task_node_get_proc_node (UFO_TASK_NODE (task)));
+ max_work_group_size_gvalue = ufo_gpu_node_get_info (node, UFO_GPU_NODE_INFO_MAX_WORK_GROUP_SIZE);
+ priv->max_work_group_size = g_value_get_ulong (max_work_group_size_gvalue);
+ g_value_unset (max_work_group_size_gvalue);
+ }
if (priv->use_window && !priv->window_mem) {
create_coefficients (priv);
}
- if (priv->fast) {
- if (priv->max_work_group_size == 0) {
- node = UFO_GPU_NODE (ufo_task_node_get_proc_node (UFO_TASK_NODE (task)));
- max_work_group_size_gvalue = ufo_gpu_node_get_info (node, UFO_GPU_NODE_INFO_MAX_WORK_GROUP_SIZE);
- priv->max_work_group_size = g_value_get_ulong (max_work_group_size_gvalue);
- g_value_unset (max_work_group_size_gvalue);
- }
- if (priv->cropped_size[0] == 0 || priv->cropped_size[0] != requisition->dims[0] ||
- priv->cropped_size[1] != requisition->dims[1]) {
- if (priv->cropped_size[0] != 0) {
- /* Size changed, release first. */
- release_fast_buffers (priv);
- }
- priv->cropped_size[0] = requisition->dims[0];
- priv->cropped_size[1] = requisition->dims[1];
+ if (priv->cropped_size[0] != requisition->dims[0] || priv->cropped_size[1] != requisition->dims[1]) {
+ priv->cropped_size[0] = requisition->dims[0];
+ priv->cropped_size[1] = requisition->dims[1];
+ if (priv->fast) {
priv->padding = priv->search_radius + priv->patch_radius + 1;
priv->padded_size[0] = priv->cropped_size[0] + 2 * priv->padding;
priv->padded_size[1] = priv->cropped_size[1] + 2 * priv->padding;
+ if (priv->group_sums) {
+ /* Buffers exist, which means size changed, release first. */
+ release_fast_buffers (priv);
+ }
create_fast_buffers (priv);
}
}
@@ -553,6 +564,75 @@ ufo_non_local_means_task_get_mode (UfoTask *task)
return UFO_TASK_MODE_PROCESSOR | UFO_TASK_MODE_GPU;
}
+static gfloat
+compute_sigma (UfoNonLocalMeansTaskPrivate *priv,
+ cl_command_queue cmd_queue,
+ UfoProfiler *profiler,
+ cl_mem input_image,
+ cl_mem out_mem)
+{
+ gsize n = priv->cropped_size[0] * priv->cropped_size[1];
+ gsize num_groups = MIN (priv->max_work_group_size, NUM_CHUNKS (n, priv->max_work_group_size));
+ gsize num_group_iterations, global_size;
+ gfloat *result, sum = 0.0f;
+ cl_int err;
+ cl_mem group_sums;
+
+ /* First compute the convolution of the input with the difference of
+ * laplacians.
+ */
+ UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->convolution_kernel, 0, sizeof (cl_mem), &input_image));
+ UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->convolution_kernel, 1, sizeof (cl_sampler), &priv->sampler));
+ UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->convolution_kernel, 2, sizeof (cl_mem), &out_mem));
+ ufo_profiler_call (profiler, cmd_queue, priv->convolution_kernel, 2, priv->cropped_size, NULL);
+
+ /* Now compute partial sums of the convolved image. */
+ /* Number of iterations of every group is given by the number of pixels
+ * divided by the number of pixels *num_groups* can process. */
+ num_group_iterations = NUM_CHUNKS (n, priv->max_work_group_size * num_groups);
+ /* The real number of groups is given by the number of pixels
+ * divided by the group size and the number of group iterations. */
+ num_groups = NUM_CHUNKS (n, num_group_iterations * priv->max_work_group_size);
+ global_size = num_groups * priv->max_work_group_size;
+
+ g_debug (" n: %lu", n);
+ g_debug (" num groups: %lu", num_groups);
+ g_debug ("group iterations: %lu", num_group_iterations);
+ g_debug (" kernel dims: %lu", global_size);
+
+ result = g_malloc0 (sizeof (cl_float) * num_groups);
+ group_sums = clCreateBuffer (priv->context,
+ CL_MEM_READ_WRITE,
+ sizeof (cl_float) * num_groups,
+ NULL,
+ &err);
+ UFO_RESOURCES_CHECK_CLERR (err);
+
+ UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->sum_kernel, 0, sizeof (cl_mem), &out_mem));
+ UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->sum_kernel, 1, sizeof (cl_mem), &group_sums));
+ UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->sum_kernel, 2, sizeof (cl_mem), &out_mem));
+ UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->sum_kernel, 3, sizeof (cl_float) * priv->max_work_group_size, NULL));
+ UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->sum_kernel, 4, sizeof (gsize), &n));
+ UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->sum_kernel, 5, sizeof (gint), &num_group_iterations));
+ ufo_profiler_call (profiler, cmd_queue, priv->sum_kernel, 1, &global_size, &priv->max_work_group_size);
+
+ clEnqueueReadBuffer (cmd_queue,
+ group_sums,
+ CL_TRUE,
+ 0, sizeof (cl_float) * num_groups,
+ result,
+ 0, NULL, NULL);
+ UFO_RESOURCES_CHECK_CLERR (clReleaseMemObject (group_sums));
+
+ /* Sum partial sums computed by the groups. */
+ for (gsize i = 0; i < num_groups; i++) {
+ sum += result[i];
+ }
+ g_free (result);
+
+ return sqrt (G_PI_2) / (6 * (priv->cropped_size[0] - 2.0f) * (priv->cropped_size[1] - 2.0f)) * sum;
+}
+
static gboolean
ufo_non_local_means_task_process (UfoTask *task,
UfoBuffer **inputs,
@@ -566,20 +646,39 @@ ufo_non_local_means_task_process (UfoTask *task,
cl_command_queue cmd_queue;
cl_mem in_mem;
cl_mem out_mem;
- gfloat h, var;
+ gfloat h, var, estimated_sigma;
gfloat fill_pattern = 0.0f;
+ guint num_processed;
priv = UFO_NON_LOCAL_MEANS_TASK_GET_PRIVATE (task);
node = UFO_GPU_NODE (ufo_task_node_get_proc_node (UFO_TASK_NODE (task)));
cmd_queue = ufo_gpu_node_get_cmd_queue (node);
+ in_mem = ufo_buffer_get_device_image (inputs[0], cmd_queue);
out_mem = ufo_buffer_get_device_array (output, cmd_queue);
profiler = ufo_task_node_get_profiler (UFO_TASK_NODE (task));
ufo_buffer_get_requisition (inputs[0], &in_req);
+ g_object_get (task, "num_processed", &num_processed, NULL);
+
+ /* Estimate sigma only from the first image in the stream in order to
+ * keep the results consistent.
+ */
+ if (num_processed == 0 && (priv->h <= 0.0f || priv->estimate_sigma)) {
+ /* Use out_mem for the convolution, it's not necessary after the
+ * computation and can be re-used by the de-noising itself.
+ */
+ estimated_sigma = compute_sigma (priv, cmd_queue, profiler, in_mem, out_mem);
+ g_debug ("Estimated sigma: %g", estimated_sigma);
+ if (priv->h <= 0.0f) {
+ priv->h = estimated_sigma;
+ }
+ if (priv->estimate_sigma) {
+ priv->sigma = estimated_sigma;
+ }
+ }
h = 1 / priv->h / priv->h;
var = priv->sigma * priv->sigma;
if (priv->fast) {
- in_mem = ufo_buffer_get_device_image (inputs[0], cmd_queue);
UFO_RESOURCES_CHECK_CLERR (clEnqueueFillBuffer (cmd_queue, out_mem, &fill_pattern, sizeof (gfloat), 0,
sizeof (gfloat) * priv->cropped_size[0] * priv->cropped_size[1],
0, NULL, NULL));
@@ -588,7 +687,6 @@ ufo_non_local_means_task_process (UfoTask *task,
0, NULL, NULL));
compute_nlm_fast (priv, cmd_queue, profiler, in_mem, out_mem);
} else {
- in_mem = ufo_buffer_get_device_image (inputs[0], cmd_queue);
UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->kernel, 0, sizeof (cl_mem), &in_mem));
UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->kernel, 1, sizeof (cl_mem), &out_mem));
UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->kernel, 2, sizeof (cl_sampler), &priv->sampler));
@@ -631,6 +729,9 @@ ufo_non_local_means_task_set_property (GObject *object,
case PROP_FAST:
priv->fast = g_value_get_boolean (value);
break;
+ case PROP_ESTIMATE_SIGMA:
+ priv->estimate_sigma = g_value_get_boolean (value);
+ break;
case PROP_ADDRESSING_MODE:
priv->addressing_mode = g_value_get_enum (value);
break;
@@ -667,6 +768,9 @@ ufo_non_local_means_task_get_property (GObject *object,
case PROP_FAST:
g_value_set_boolean (value, priv->fast);
break;
+ case PROP_ESTIMATE_SIGMA:
+ g_value_set_boolean (value, priv->estimate_sigma);
+ break;
case PROP_ADDRESSING_MODE:
g_value_set_enum (value, priv->addressing_mode);
break;
@@ -711,6 +815,14 @@ ufo_non_local_means_task_finalize (GObject *object)
UFO_RESOURCES_CHECK_CLERR (clReleaseKernel (priv->divide_kernel));
priv->divide_kernel = NULL;
}
+ if (priv->convolution_kernel) {
+ UFO_RESOURCES_CHECK_CLERR (clReleaseKernel (priv->convolution_kernel));
+ priv->convolution_kernel = NULL;
+ }
+ if (priv->sum_kernel) {
+ UFO_RESOURCES_CHECK_CLERR (clReleaseKernel (priv->sum_kernel));
+ priv->sum_kernel = NULL;
+ }
if (priv->sampler) {
UFO_RESOURCES_CHECK_CLERR (clReleaseSampler (priv->sampler));
priv->sampler = NULL;
@@ -787,6 +899,13 @@ ufo_non_local_means_task_class_init (UfoNonLocalMeansTaskClass *klass)
TRUE,
G_PARAM_READWRITE);
+ properties[PROP_ESTIMATE_SIGMA] =
+ g_param_spec_boolean ("estimate-sigma",
+ "Estimate noise standard deviation",
+ "Estimate noise standard deviation",
+ FALSE,
+ G_PARAM_READWRITE);
+
properties[PROP_ADDRESSING_MODE] =
g_param_spec_enum ("addressing-mode",
"Outlier treatment (\"none\", \"clamp\", \"clamp_to_edge\", \"repeat\", \"mirrored_repeat\")",
@@ -813,10 +932,11 @@ ufo_non_local_means_task_init(UfoNonLocalMeansTask *self)
self->priv->padded_size[0] = 0;
self->priv->padded_size[1] = 0;
self->priv->padding = -1;
- self->priv->h = 0.1f;
+ self->priv->h = 0.0f;
self->priv->sigma = 0.0f;
self->priv->max_work_group_size = 0;
self->priv->use_window = TRUE;
self->priv->addressing_mode = CL_ADDRESS_MIRRORED_REPEAT;
self->priv->fast = FALSE;
+ self->priv->estimate_sigma = FALSE;
}