summaryrefslogtreecommitdiffstats
path: root/src/common/ufo-common.c
blob: f157cf44d94530a5a8ef2df6901e0ebeac7cf86b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
/*
 * Copyright (C) 2015-2019 Karlsruhe Institute of Technology
 *
 * This file is part of Ufo.
 *
 * This library is free software: you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation, either
 * version 3 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library.  If not, see <http://www.gnu.org/licenses/>.
 */

#include <math.h>
#include <glib.h>
#include "ufo-math.h"
#include "ufo-common.h"

gfloat
ufo_common_estimate_sigma (cl_kernel convolution_kernel,
                           cl_kernel sum_kernel,
                           cl_command_queue cmd_queue,
                           cl_sampler sampler,
                           UfoProfiler *profiler,
                           cl_mem input_image,
                           cl_mem out_mem,
                           const gsize max_work_group_size,
                           const gsize *global_size)
{
    gsize n = global_size[0] * global_size[1];
    gsize local_size, num_groups, global_size_1D;
    gint num_group_iterations;
    gfloat *result, sum = 0.0f;
    cl_int err;
    cl_mem group_sums;
    cl_context context;
    
    clGetCommandQueueInfo (cmd_queue, CL_QUEUE_CONTEXT, sizeof (cl_context), &context, NULL);

    /* First compute the convolution of the input with the difference of
     * laplacians.
     */
    UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (convolution_kernel, 0, sizeof (cl_mem), &input_image));
    UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (convolution_kernel, 1, sizeof (cl_sampler), &sampler));
    UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (convolution_kernel, 2, sizeof (cl_mem), &out_mem));
    ufo_profiler_call (profiler, cmd_queue, convolution_kernel, 2, global_size, NULL);

    /* Now compute partial sums of the convolved image. */
    /* Compute global and local dimensions for the cumsum kernel */
    /* Make sure local_size is a power of 2 */
    local_size = ufo_math_compute_closest_smaller_power_of_2 (max_work_group_size);
    /* Number of iterations of every group is given by the number of pixels
     * divided by the number of pixels *num_groups* can process. */
    num_groups = MIN (local_size, UFO_MATH_NUM_CHUNKS (n, local_size));
    num_group_iterations = UFO_MATH_NUM_CHUNKS (n, local_size * num_groups);
    /* The real number of groups is given by the number of pixels
     * divided by the group size and the number of group iterations. */
    num_groups = UFO_MATH_NUM_CHUNKS (n, num_group_iterations * local_size);
    global_size_1D = num_groups * local_size;

    g_debug ("                 n: %lu", n);
    g_debug ("        num groups: %lu", num_groups);
    g_debug ("  group iterations: %d", num_group_iterations);
    g_debug ("kernel global size: %lu", global_size_1D);
    g_debug (" kernel local size: %lu", local_size);

    result = g_malloc0 (sizeof (cl_float) * num_groups);
    group_sums = clCreateBuffer (context,
                                 CL_MEM_READ_WRITE,
                                 sizeof (cl_float) * num_groups,
                                 NULL,
                                 &err);
    UFO_RESOURCES_CHECK_CLERR (err);

    UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (sum_kernel, 0, sizeof (cl_mem), &out_mem));
    UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (sum_kernel, 1, sizeof (cl_mem), &group_sums));
    UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (sum_kernel, 2, sizeof (cl_mem), &out_mem));
    UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (sum_kernel, 3, sizeof (cl_float) * local_size, NULL));
    UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (sum_kernel, 4, sizeof (gsize), &n));
    UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (sum_kernel, 5, sizeof (gint), &num_group_iterations));
    ufo_profiler_call (profiler, cmd_queue, sum_kernel, 1, &global_size_1D, &local_size);

    clEnqueueReadBuffer (cmd_queue,
                         group_sums,
                         CL_TRUE,
                         0, sizeof (cl_float) * num_groups,
                         result,
                         0, NULL, NULL);
    UFO_RESOURCES_CHECK_CLERR (clReleaseMemObject (group_sums));

    /* Sum partial sums computed by the groups. */
    for (gsize i = 0; i < num_groups; i++) {
        sum += result[i];
    }
    g_free (result);

    return sqrt (G_PI_2) / (6 * (global_size[0] - 2.0f) * (global_size[1] - 2.0f)) * sum;
}