diff options
Diffstat (limited to 'src/kernels/nlm.cl')
-rw-r--r-- | src/kernels/nlm.cl | 161 |
1 files changed, 124 insertions, 37 deletions
diff --git a/src/kernels/nlm.cl b/src/kernels/nlm.cl index 8b1bcb8..a094f34 100644 --- a/src/kernels/nlm.cl +++ b/src/kernels/nlm.cl @@ -17,63 +17,150 @@ * License along with this library. If not, see <http://www.gnu.org/licenses/>. */ -#define flatten(x,y,r,w) ((y-r)*w + (x-r)) - -/* Compute the distance of two neighbourhood vectors _starting_ from index i - and j and edge length radius */ float -dist (global float *input, - int i, - int j, - int radius, - int image_width) +compute_dist (read_only image2d_t input, + sampler_t sampler, + float2 p, + float2 q, + int radius, + int width, + int height, + float h_2, + float variance, + constant float *window_coeffs) { float dist = 0.0f, tmp; - float wsize = (2.0f * radius + 1.0f); - wsize *= wsize; - - const int nb_width = 2 * radius + 1; - const int stride = image_width - nb_width; - for (int k = 0; k < nb_width; k++, i += stride, j += stride) { - for (int l = 0; l < nb_width; l++, i++, j++) { - tmp = input[i] - input[j]; - dist += tmp * tmp; + int wsize = (2 * radius + 1); + float coeff = h_2; + + if (!window_coeffs) { + /* Gaussian window is normalized to sum=1, so if it is used, we are done + * with just summation. If it is not, we need to compute the mean. */ + coeff /= wsize * wsize; + } + + for (int j = -radius; j < radius + 1; j++) { + for (int i = -radius; i < radius + 1; i++) { + tmp = read_imagef (input, sampler, (float2) ((p.x + i) / width, (p.y + j) / height)).x - + read_imagef (input, sampler, (float2) ((q.x + i) / width, (q.y + j) / height)).x; + if (window_coeffs) { + dist += window_coeffs[(j + radius) * wsize + (i + radius)] * (tmp * tmp - 2 * variance); + } else { + dist += tmp * tmp - 2 * variance; + } } } - return dist / wsize; + + return fmax (0.0f, dist) * coeff; +} + +kernel void +compute_shifted_mse (read_only image2d_t input, + global float *output, + sampler_t sampler, + const int dx, + const int dy, + const int padding, + const float variance) +{ + /* Global dimensions are for the padded output image */ + int idx = get_global_id (0); + int idy = get_global_id (1); + int padded_width = get_global_size (0); + /* Image is padded by *padding* on both sides of both dimensions. */ + float cropped_width = (float) (padded_width - 2 * padding); + float cropped_height = (float) (get_global_size (1) - 2 * padding); + float x = ((float) (idx - padding)) + 0.5f; + float y = ((float) (idy - padding)) + 0.5f; + float a, b; + + a = read_imagef (input, sampler, (float2) (x / cropped_width, y / cropped_height)).x; + b = read_imagef (input, sampler, (float2) ((x + dx) / cropped_width, (y + dy) / cropped_height)).x; + + output[idy * padded_width + idx] = (a - b) * (a - b) - 2 * variance; +} + +kernel void +process_shift (read_only image2d_t input, + global float *integral_input, + global float *weights, + global float *output, + const sampler_t sampler, + const int dx, + const int dy, + const int patch_radius, + const int padding, + const float coeff, + const int is_conjugate) +{ + /* Global dimensions are for the cropped output image */ + int idx = get_global_id (0); + int idy = get_global_id (1); + /* Image is padded by *padding* on both sides of both dimensions. */ + int x = idx + padding; + int y = idy + padding; + float x_im = idx + dx + 0.5f; + float y_im = idy + dy + 0.5f; + int cropped_width = get_global_size (0); + int cropped_height = get_global_size (1); + int padded_width = cropped_width + 2 * padding; + float dist, weight; + if (is_conjugate) { + /* direct computation: g(x) = w(x) + f(x + dx) + * conjugate: g(x + dx) = w(x) + f(x), where idx = x + dx, so + * g(idx) = w(idx - dx) * f(idx - x) = w(idx - dx) * f(x_im - 2dx) + */ + x -= dx; + y -= dy; + x_im -= 2 * dx; + y_im -= 2 * dy; + } + + dist = integral_input[(y + patch_radius) * padded_width + x + patch_radius] - + integral_input[(y - patch_radius - 1) * padded_width + x + patch_radius] - + integral_input[(y + patch_radius) * padded_width + x - patch_radius - 1] + + integral_input[(y - patch_radius - 1) * padded_width + x - patch_radius - 1]; + dist = fmax (0.0f, dist) * coeff; + weight = exp (-dist); + + weights[idy * cropped_width + idx] += weight; + output[idy * cropped_width + idx] += weight * read_imagef (input, sampler, (float2) (x_im / cropped_width, + y_im / cropped_height)).x; +} + +kernel void divide_inplace (global float *coefficients, + global float *output) +{ + int index = get_global_size (0) * get_global_id (1) + get_global_id (0); + + output[index] = native_divide (output[index], coefficients[index]); } kernel void -nlm_noise_reduction (global float *input, +nlm_noise_reduction (read_only image2d_t input, global float *output, + sampler_t sampler, const int search_radius, const int patch_radius, - const float sigma) + const float h_2, + const float variance, + constant float *window_coeffs) { const int x = get_global_id (0); const int y = get_global_id (1); const int width = get_global_size (0); const int height = get_global_size (1); - const float sigma_2 = sigma * sigma; + float d, weight; float total_weight = 0.0f; float pixel_value = 0.0f; - /* - * Compute the upper left (sx,sy) and lower right (tx, ty) corner points of - * our search window. - */ - int r = min (patch_radius, min(width - 1 - x, min (height - 1 - y, min (x, y)))); - int sx = max (x - search_radius, r); - int sy = max (y - search_radius, r); - int tx = min (x + search_radius, width - 1 - r); - int ty = min (y + search_radius, height - 1 - r); - - for (int i = sx; i < tx; i++) { - for (int j = sy; j < ty; j++) { - float d = dist (input, flatten(x, y, r, width), flatten (i,j,r,width), r, width); - float weight = exp (- sigma_2 * d); - pixel_value += weight * input[j * width + i]; + for (int j = y - search_radius; j < y + search_radius + 1; j++) { + for (int i = x - search_radius; i < x + search_radius + 1; i++) { + d = compute_dist (input, sampler, (float2) (x + 0.5f, y + 0.5f), (float2) (i + 0.5f, j + 0.5f), + patch_radius, width, height, h_2, variance, window_coeffs); + weight = exp (-d); + pixel_value += weight * read_imagef (input, sampler, (float2) ((i + 0.5f) / width, (j + 0.5f) / height)).x; total_weight += weight; } } |