summaryrefslogtreecommitdiffstats
path: root/src/kernels/nlm.cl
diff options
context:
space:
mode:
Diffstat (limited to 'src/kernels/nlm.cl')
-rw-r--r--src/kernels/nlm.cl161
1 files changed, 124 insertions, 37 deletions
diff --git a/src/kernels/nlm.cl b/src/kernels/nlm.cl
index 8b1bcb8..a094f34 100644
--- a/src/kernels/nlm.cl
+++ b/src/kernels/nlm.cl
@@ -17,63 +17,150 @@
* License along with this library. If not, see <http://www.gnu.org/licenses/>.
*/
-#define flatten(x,y,r,w) ((y-r)*w + (x-r))
-
-/* Compute the distance of two neighbourhood vectors _starting_ from index i
- and j and edge length radius */
float
-dist (global float *input,
- int i,
- int j,
- int radius,
- int image_width)
+compute_dist (read_only image2d_t input,
+ sampler_t sampler,
+ float2 p,
+ float2 q,
+ int radius,
+ int width,
+ int height,
+ float h_2,
+ float variance,
+ constant float *window_coeffs)
{
float dist = 0.0f, tmp;
- float wsize = (2.0f * radius + 1.0f);
- wsize *= wsize;
-
- const int nb_width = 2 * radius + 1;
- const int stride = image_width - nb_width;
- for (int k = 0; k < nb_width; k++, i += stride, j += stride) {
- for (int l = 0; l < nb_width; l++, i++, j++) {
- tmp = input[i] - input[j];
- dist += tmp * tmp;
+ int wsize = (2 * radius + 1);
+ float coeff = h_2;
+
+ if (!window_coeffs) {
+ /* Gaussian window is normalized to sum=1, so if it is used, we are done
+ * with just summation. If it is not, we need to compute the mean. */
+ coeff /= wsize * wsize;
+ }
+
+ for (int j = -radius; j < radius + 1; j++) {
+ for (int i = -radius; i < radius + 1; i++) {
+ tmp = read_imagef (input, sampler, (float2) ((p.x + i) / width, (p.y + j) / height)).x -
+ read_imagef (input, sampler, (float2) ((q.x + i) / width, (q.y + j) / height)).x;
+ if (window_coeffs) {
+ dist += window_coeffs[(j + radius) * wsize + (i + radius)] * (tmp * tmp - 2 * variance);
+ } else {
+ dist += tmp * tmp - 2 * variance;
+ }
}
}
- return dist / wsize;
+
+ return fmax (0.0f, dist) * coeff;
+}
+
+kernel void
+compute_shifted_mse (read_only image2d_t input,
+ global float *output,
+ sampler_t sampler,
+ const int dx,
+ const int dy,
+ const int padding,
+ const float variance)
+{
+ /* Global dimensions are for the padded output image */
+ int idx = get_global_id (0);
+ int idy = get_global_id (1);
+ int padded_width = get_global_size (0);
+ /* Image is padded by *padding* on both sides of both dimensions. */
+ float cropped_width = (float) (padded_width - 2 * padding);
+ float cropped_height = (float) (get_global_size (1) - 2 * padding);
+ float x = ((float) (idx - padding)) + 0.5f;
+ float y = ((float) (idy - padding)) + 0.5f;
+ float a, b;
+
+ a = read_imagef (input, sampler, (float2) (x / cropped_width, y / cropped_height)).x;
+ b = read_imagef (input, sampler, (float2) ((x + dx) / cropped_width, (y + dy) / cropped_height)).x;
+
+ output[idy * padded_width + idx] = (a - b) * (a - b) - 2 * variance;
+}
+
+kernel void
+process_shift (read_only image2d_t input,
+ global float *integral_input,
+ global float *weights,
+ global float *output,
+ const sampler_t sampler,
+ const int dx,
+ const int dy,
+ const int patch_radius,
+ const int padding,
+ const float coeff,
+ const int is_conjugate)
+{
+ /* Global dimensions are for the cropped output image */
+ int idx = get_global_id (0);
+ int idy = get_global_id (1);
+ /* Image is padded by *padding* on both sides of both dimensions. */
+ int x = idx + padding;
+ int y = idy + padding;
+ float x_im = idx + dx + 0.5f;
+ float y_im = idy + dy + 0.5f;
+ int cropped_width = get_global_size (0);
+ int cropped_height = get_global_size (1);
+ int padded_width = cropped_width + 2 * padding;
+ float dist, weight;
+ if (is_conjugate) {
+ /* direct computation: g(x) = w(x) + f(x + dx)
+ * conjugate: g(x + dx) = w(x) + f(x), where idx = x + dx, so
+ * g(idx) = w(idx - dx) * f(idx - x) = w(idx - dx) * f(x_im - 2dx)
+ */
+ x -= dx;
+ y -= dy;
+ x_im -= 2 * dx;
+ y_im -= 2 * dy;
+ }
+
+ dist = integral_input[(y + patch_radius) * padded_width + x + patch_radius] -
+ integral_input[(y - patch_radius - 1) * padded_width + x + patch_radius] -
+ integral_input[(y + patch_radius) * padded_width + x - patch_radius - 1] +
+ integral_input[(y - patch_radius - 1) * padded_width + x - patch_radius - 1];
+ dist = fmax (0.0f, dist) * coeff;
+ weight = exp (-dist);
+
+ weights[idy * cropped_width + idx] += weight;
+ output[idy * cropped_width + idx] += weight * read_imagef (input, sampler, (float2) (x_im / cropped_width,
+ y_im / cropped_height)).x;
+}
+
+kernel void divide_inplace (global float *coefficients,
+ global float *output)
+{
+ int index = get_global_size (0) * get_global_id (1) + get_global_id (0);
+
+ output[index] = native_divide (output[index], coefficients[index]);
}
kernel void
-nlm_noise_reduction (global float *input,
+nlm_noise_reduction (read_only image2d_t input,
global float *output,
+ sampler_t sampler,
const int search_radius,
const int patch_radius,
- const float sigma)
+ const float h_2,
+ const float variance,
+ constant float *window_coeffs)
{
const int x = get_global_id (0);
const int y = get_global_id (1);
const int width = get_global_size (0);
const int height = get_global_size (1);
- const float sigma_2 = sigma * sigma;
+ float d, weight;
float total_weight = 0.0f;
float pixel_value = 0.0f;
- /*
- * Compute the upper left (sx,sy) and lower right (tx, ty) corner points of
- * our search window.
- */
- int r = min (patch_radius, min(width - 1 - x, min (height - 1 - y, min (x, y))));
- int sx = max (x - search_radius, r);
- int sy = max (y - search_radius, r);
- int tx = min (x + search_radius, width - 1 - r);
- int ty = min (y + search_radius, height - 1 - r);
-
- for (int i = sx; i < tx; i++) {
- for (int j = sy; j < ty; j++) {
- float d = dist (input, flatten(x, y, r, width), flatten (i,j,r,width), r, width);
- float weight = exp (- sigma_2 * d);
- pixel_value += weight * input[j * width + i];
+ for (int j = y - search_radius; j < y + search_radius + 1; j++) {
+ for (int i = x - search_radius; i < x + search_radius + 1; i++) {
+ d = compute_dist (input, sampler, (float2) (x + 0.5f, y + 0.5f), (float2) (i + 0.5f, j + 0.5f),
+ patch_radius, width, height, h_2, variance, window_coeffs);
+ weight = exp (-d);
+ pixel_value += weight * read_imagef (input, sampler, (float2) ((i + 0.5f) / width, (j + 0.5f) / height)).x;
total_weight += weight;
}
}