From a10789e252b6dd849f7d8fee016d57132562afad Mon Sep 17 00:00:00 2001 From: Tomas Farago Date: Fri, 12 Jul 2019 12:19:22 +0200 Subject: NLM: use texture memory --- docs/filters.rst | 5 +++++ src/kernels/nlm.cl | 49 +++++++++++++++++------------------------- src/ufo-non-local-means-task.c | 48 +++++++++++++++++++++++++++++++++++++---- 3 files changed, 69 insertions(+), 33 deletions(-) diff --git a/docs/filters.rst b/docs/filters.rst index c04b536..530ca67 100644 --- a/docs/filters.rst +++ b/docs/filters.rst @@ -542,6 +542,11 @@ Non-local-means denoising Sigma influencing the Gaussian weighting. + .. gobj:prop:: addressing-mode:enum + + Addressing mode specifies the behavior for pixels falling outside the + original image. See OpenCL ``sampler_t`` documentation for more information. + Horizontal interpolation ------------------------ diff --git a/src/kernels/nlm.cl b/src/kernels/nlm.cl index 8b1bcb8..f904ac7 100644 --- a/src/kernels/nlm.cl +++ b/src/kernels/nlm.cl @@ -17,35 +17,34 @@ * License along with this library. If not, see . */ -#define flatten(x,y,r,w) ((y-r)*w + (x-r)) - -/* Compute the distance of two neighbourhood vectors _starting_ from index i - and j and edge length radius */ float -dist (global float *input, - int i, - int j, +dist (read_only image2d_t input, + sampler_t sampler, + float2 p, + float2 q, int radius, - int image_width) + int width, + int height) { float dist = 0.0f, tmp; float wsize = (2.0f * radius + 1.0f); wsize *= wsize; - const int nb_width = 2 * radius + 1; - const int stride = image_width - nb_width; - for (int k = 0; k < nb_width; k++, i += stride, j += stride) { - for (int l = 0; l < nb_width; l++, i++, j++) { - tmp = input[i] - input[j]; + for (int i = -radius; i < radius + 1; i++) { + for (int j = -radius; j < radius + 1; j++) { + tmp = read_imagef (input, sampler, (float2) ((p.x + i) / width, (p.y + j) / height)).x - + read_imagef (input, sampler, (float2) ((q.x + i) / width, (q.y + j) / height)).x; dist += tmp * tmp; } } + return dist / wsize; } kernel void -nlm_noise_reduction (global float *input, +nlm_noise_reduction (read_only image2d_t input, global float *output, + sampler_t sampler, const int search_radius, const int patch_radius, const float sigma) @@ -55,25 +54,17 @@ nlm_noise_reduction (global float *input, const int width = get_global_size (0); const int height = get_global_size (1); const float sigma_2 = sigma * sigma; + float d, weight; float total_weight = 0.0f; float pixel_value = 0.0f; - /* - * Compute the upper left (sx,sy) and lower right (tx, ty) corner points of - * our search window. - */ - int r = min (patch_radius, min(width - 1 - x, min (height - 1 - y, min (x, y)))); - int sx = max (x - search_radius, r); - int sy = max (y - search_radius, r); - int tx = min (x + search_radius, width - 1 - r); - int ty = min (y + search_radius, height - 1 - r); - - for (int i = sx; i < tx; i++) { - for (int j = sy; j < ty; j++) { - float d = dist (input, flatten(x, y, r, width), flatten (i,j,r,width), r, width); - float weight = exp (- sigma_2 * d); - pixel_value += weight * input[j * width + i]; + for (int i = x - search_radius; i < x + search_radius; i++) { + for (int j = y - search_radius; j < y + search_radius; j++) { + d = dist (input, sampler, (float2) (x + 0.5f, y + 0.5f), (float2) (i + 0.5f, j + 0.5f), + patch_radius, width, height); + weight = exp (- sigma_2 * d); + pixel_value += weight * read_imagef (input, sampler, (float2) ((i + 0.5f) / width, (j + 0.5f) / height)).x; total_weight += weight; } } diff --git a/src/ufo-non-local-means-task.c b/src/ufo-non-local-means-task.c index 650cecc..76a07ac 100644 --- a/src/ufo-non-local-means-task.c +++ b/src/ufo-non-local-means-task.c @@ -24,6 +24,7 @@ #endif #include "ufo-non-local-means-task.h" +#include "common/ufo-addressing.h" struct _UfoNonLocalMeansTaskPrivate { @@ -31,6 +32,9 @@ struct _UfoNonLocalMeansTaskPrivate { guint patch_radius; gfloat sigma; cl_kernel kernel; + cl_sampler sampler; + cl_context context; + AddressingMode addressing_mode; }; static void ufo_task_interface_init (UfoTaskIface *iface); @@ -46,6 +50,7 @@ enum { PROP_SEARCH_RADIUS, PROP_PATCH_RADIUS, PROP_SIGMA, + PROP_ADDRESSING_MODE, N_PROPERTIES }; @@ -63,12 +68,23 @@ ufo_non_local_means_task_setup (UfoTask *task, GError **error) { UfoNonLocalMeansTaskPrivate *priv; + cl_int err; priv = UFO_NON_LOCAL_MEANS_TASK_GET_PRIVATE (task); priv->kernel = ufo_resources_get_kernel (resources, "nlm.cl", "nlm_noise_reduction", NULL, error); if (priv->kernel) UFO_RESOURCES_CHECK_SET_AND_RETURN (clRetainKernel (priv->kernel), error); + + priv->context = ufo_resources_get_context (resources); + UFO_RESOURCES_CHECK_SET_AND_RETURN (clRetainContext (priv->context), error); + + priv->sampler = clCreateSampler (priv->context, + (cl_bool) TRUE, + priv->addressing_mode, + CL_FILTER_NEAREST, + &err); + UFO_RESOURCES_CHECK_CLERR (err); } static void @@ -115,14 +131,15 @@ ufo_non_local_means_task_process (UfoTask *task, priv = UFO_NON_LOCAL_MEANS_TASK_GET_PRIVATE (task); node = UFO_GPU_NODE (ufo_task_node_get_proc_node (UFO_TASK_NODE (task))); cmd_queue = ufo_gpu_node_get_cmd_queue (node); - in_mem = ufo_buffer_get_device_array (inputs[0], cmd_queue); + in_mem = ufo_buffer_get_device_image (inputs[0], cmd_queue); out_mem = ufo_buffer_get_device_array (output, cmd_queue); UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->kernel, 0, sizeof (cl_mem), &in_mem)); UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->kernel, 1, sizeof (cl_mem), &out_mem)); - UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->kernel, 2, sizeof (guint), &priv->search_radius)); - UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->kernel, 3, sizeof (guint), &priv->patch_radius)); - UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->kernel, 4, sizeof (gfloat), &priv->sigma)); + UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->kernel, 2, sizeof (cl_sampler), &priv->sampler)); + UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->kernel, 3, sizeof (guint), &priv->search_radius)); + UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->kernel, 4, sizeof (guint), &priv->patch_radius)); + UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->kernel, 5, sizeof (gfloat), &priv->sigma)); profiler = ufo_task_node_get_profiler (UFO_TASK_NODE (task)); ufo_profiler_call (profiler, cmd_queue, priv->kernel, 2, requisition->dims, NULL); @@ -158,6 +175,9 @@ ufo_non_local_means_task_set_property (GObject *object, case PROP_SIGMA: priv->sigma = g_value_get_float (value); break; + case PROP_ADDRESSING_MODE: + priv->addressing_mode = g_value_get_enum (value); + break; default: G_OBJECT_WARN_INVALID_PROPERTY_ID (object, property_id, pspec); break; @@ -182,6 +202,9 @@ ufo_non_local_means_task_get_property (GObject *object, case PROP_SIGMA: g_value_set_float (value, priv->sigma); break; + case PROP_ADDRESSING_MODE: + g_value_set_enum (value, priv->addressing_mode); + break; default: G_OBJECT_WARN_INVALID_PROPERTY_ID (object, property_id, pspec); break; @@ -199,6 +222,14 @@ ufo_non_local_means_task_finalize (GObject *object) UFO_RESOURCES_CHECK_CLERR (clReleaseKernel (priv->kernel)); priv->kernel = NULL; } + if (priv->sampler) { + UFO_RESOURCES_CHECK_CLERR (clReleaseSampler (priv->sampler)); + priv->sampler = NULL; + } + if (priv->context) { + UFO_RESOURCES_CHECK_CLERR (clReleaseContext (priv->context)); + priv->context = NULL; + } G_OBJECT_CLASS (ufo_non_local_means_task_parent_class)->finalize (object); } @@ -244,6 +275,14 @@ ufo_non_local_means_task_class_init (UfoNonLocalMeansTaskClass *klass) 0.0f, G_MAXFLOAT, 0.1f, G_PARAM_READWRITE); + properties[PROP_ADDRESSING_MODE] = + g_param_spec_enum ("addressing-mode", + "Outlier treatment (\"none\", \"clamp\", \"clamp_to_edge\", \"repeat\", \"mirrored_repeat\")", + "Outlier treatment (\"none\", \"clamp\", \"clamp_to_edge\", \"repeat\", \"mirrored_repeat\")", + g_enum_register_static ("nlm_addressing_mode", addressing_values), + CL_ADDRESS_MIRRORED_REPEAT, + G_PARAM_READWRITE); + for (guint i = PROP_0 + 1; i < N_PROPERTIES; i++) g_object_class_install_property (oclass, i, properties[i]); @@ -258,4 +297,5 @@ ufo_non_local_means_task_init(UfoNonLocalMeansTask *self) self->priv->search_radius = 10; self->priv->patch_radius = 3; self->priv->sigma = 0.1f; + self->priv->addressing_mode = CL_ADDRESS_MIRRORED_REPEAT; } -- cgit v1.2.1