/*
* Copyright (C) 2011-2013 Karlsruhe Institute of Technology
*
* This file is part of Ufo.
*
* This library is free software: you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation, either
* version 3 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library. If not, see .
*/
#include "config.h"
#include
#include
#include
#ifdef __APPLE__
#include
#else
#include
#endif
#include "ufo-measure-task.h"
/**
* SECTION:ufo-measure-task
* @Short_description: Measure basic image properties with GSL
* @Title: measure
*
*/
typedef enum {
M_MIN,
M_MAX,
M_SUM,
M_MEAN,
M_VAR,
M_STD,
M_SKEW,
M_KURTOSIS,
M_LAST
} Metric;
static GEnumValue metric_values[] = {
{ M_MIN, "M_MIN", "min" },
{ M_MAX, "M_MAX", "max" },
{ M_SUM, "M_SUM", "sum" },
{ M_MEAN, "M_SUM", "mean" },
{ M_VAR, "M_SQUARE", "var" },
{ M_STD, "M_SQUARE", "std" },
{ M_SKEW, "M_CUBE", "skew" },
{ M_KURTOSIS, "M_QUADRATE", "kurtosis" },
{0, NULL, NULL}
};
/* Statistics often need some posprocessing (normalization). This table maps metric */
/* to the respective postprocessing OpenCL code */
static gchar *postproc_codes[] = {
NULL,
NULL,
NULL,
"array[x] * param_scalar",
"array[x] * param_scalar",
"sqrt (array[x] * param_scalar)",
"param[x] != 0.0f ? array[x] * param_scalar / pow (param[x], 1.5f) : 0.0f",
"(param[x] != 0.0f ? array[x] * param_scalar / (param[x] * param[x]) : 0.0f) - 3.0f",
};
struct _UfoMeasureTaskPrivate {
size_t local_shape[2];
cl_context context;
/* Metrics often iteract, e.g. kurtosis needs mean and variance, thus use */
/* separate kernels and memories so that they can be easily shared between */
/* metrics. */
cl_kernel kernels[M_LAST], postproc_kernels[M_LAST];
cl_mem mems[M_LAST];
Metric metric;
gint axis;
};
static void ufo_task_interface_init (UfoTaskIface *iface);
G_DEFINE_TYPE_WITH_CODE (UfoMeasureTask, ufo_measure_task, UFO_TYPE_TASK_NODE,
G_IMPLEMENT_INTERFACE (UFO_TYPE_TASK,
ufo_task_interface_init))
#define UFO_MEASURE_TASK_GET_PRIVATE(obj) (G_TYPE_INSTANCE_GET_PRIVATE((obj), UFO_TYPE_MEASURE_TASK, UfoMeasureTaskPrivate))
enum {
PROP_0,
PROP_METRIC,
PROP_AXIS,
N_PROPERTIES
};
static GParamSpec *properties[N_PROPERTIES] = { NULL, };
UfoNode *
ufo_measure_task_new (void)
{
return UFO_NODE (g_object_new (UFO_TYPE_MEASURE_TASK, NULL));
}
static gchar *
create_postprocessing_kernel (const gchar *exec_code)
{
const gchar *template = "kernel void calculate (global float *array, "\
"global float *param, const float param_scalar) "\
"{int x = get_global_id (0); array[x] = ";
return g_strconcat (template, exec_code, ";}", NULL);
}
static gint
next_power_of_two (gdouble value)
{
return pow (2, (gint) ceil (log2 (value)));
}
static void
ufo_measure_task_setup (UfoTask *task,
UfoResources *resources,
GError **error)
{
size_t local_size;
GValue *local_size_gvalue;
UfoMeasureTaskPrivate *priv = UFO_MEASURE_TASK_GET_PRIVATE (task);
UfoGpuNode *node = UFO_GPU_NODE (ufo_task_node_get_proc_node (UFO_TASK_NODE (task)));
local_size_gvalue = ufo_gpu_node_get_info (node, UFO_GPU_NODE_INFO_MAX_WORK_GROUP_SIZE);
local_size = g_value_get_ulong (local_size_gvalue);
g_value_unset (local_size_gvalue);
gchar *axis, *kernel_name, *postproc_source;
gint i;
if (priv->axis == -1) {
priv->local_shape[0] = local_size;
priv->local_shape[1] = 1;
axis = g_strdup("");
}
else {
if (priv->axis == 0) {
priv->local_shape[0] = 128;
priv->local_shape[1] = 1;
} else {
priv->local_shape[0] = 32;
priv->local_shape[1] = 8;
}
axis = g_strdup_printf ("%d", priv->axis);
}
while (priv->local_shape[0] * priv->local_shape[1] > local_size) {
priv->local_shape[0] /= 2;
}
g_debug ("Measure local work group size: %lu %lu", priv->local_shape[0], priv->local_shape[1]);
for (i = 0; i < M_LAST; i++) {
/* Reduction kernels */
kernel_name = g_strconcat ("reduce_", axis, metric_values[i].value_name, NULL),
priv->kernels[i] = ufo_resources_get_kernel (resources, "reductor.cl", kernel_name, NULL, error);
UFO_RESOURCES_CHECK_SET_AND_RETURN (clRetainKernel (priv->kernels[i]), error);
/* Postprocessing (normalization) kernels */
priv->postproc_kernels[i] = NULL;
if (postproc_codes[i]) {
postproc_source = create_postprocessing_kernel (postproc_codes[i]);
priv->postproc_kernels[i] = ufo_resources_get_kernel_from_source (resources, postproc_source, "calculate", NULL, error);
UFO_RESOURCES_CHECK_SET_AND_RETURN (clRetainKernel (priv->postproc_kernels[i]), error);
g_free (postproc_source);
}
priv->mems[i] = NULL;
}
priv->context = ufo_resources_get_context (resources);
UFO_RESOURCES_CHECK_SET_AND_RETURN (clRetainContext (priv->context), error);
g_free (axis);
}
static gsize
get_input_size (UfoMeasureTaskPrivate *priv,
UfoBuffer *input)
{
UfoRequisition requisition;
ufo_buffer_get_requisition (input, &requisition);
return priv->axis < 0 ? requisition.dims[0] * requisition.dims[1] : requisition.dims[priv->axis];
}
static gsize
get_output_size (UfoMeasureTaskPrivate *priv,
UfoBuffer *input)
{
UfoRequisition requisition;
ufo_buffer_get_requisition (input, &requisition);
return priv->axis < 0 ? 1 : requisition.dims[1 - priv->axis];
}
static void
ufo_measure_task_get_requisition (UfoTask *task,
UfoBuffer **inputs,
UfoRequisition *requisition,
GError **error)
{
UfoMeasureTaskPrivate *priv;
UfoRequisition in_req;
priv = UFO_MEASURE_TASK_GET_PRIVATE (task);
ufo_buffer_get_requisition (inputs[0], &in_req);
requisition->n_dims = in_req.n_dims - 1;
requisition->dims[0] = get_output_size (priv, inputs[0]);
}
static guint
ufo_measure_task_get_num_inputs (UfoTask *task)
{
return 1;
}
static guint
ufo_measure_task_get_num_dimensions (UfoTask *task,
guint input)
{
return 2;
}
static UfoTaskMode
ufo_measure_task_get_mode (UfoTask *task)
{
return UFO_TASK_MODE_PROCESSOR | UFO_TASK_MODE_GPU;
}
static cl_mem
create_output (UfoMeasureTaskPrivate *priv,
UfoBuffer *input)
{
cl_int error;
cl_mem output;
UfoRequisition requisition;
gsize pixels_per_thread;
size_t other_dim = 1;
cl_ulong num_groups;
ufo_buffer_get_requisition (input, &requisition);
if (priv->axis >= 0) {
other_dim = requisition.dims[1 - priv->axis];
num_groups = requisition.dims[priv->axis];
} else {
num_groups = requisition.dims[0] * requisition.dims[1];
}
num_groups = (num_groups - 1) / priv->local_shape[priv->axis < 0 ? 0 : priv->axis] + 1;
pixels_per_thread = MAX (32, next_power_of_two (sqrt (num_groups)));
num_groups = (num_groups - 1) / pixels_per_thread + 1;
g_debug ("Measure result memory size (dimensions order arbitrary): (%lu, %lu)", num_groups, other_dim);
output = clCreateBuffer (priv->context,
CL_MEM_READ_WRITE,
num_groups * other_dim * sizeof (cl_float),
NULL,
&error);
UFO_RESOURCES_CHECK_CLERR (error);
return output;
}
static void copy_result (UfoTask *task,
cl_mem input,
UfoBuffer *output)
{
UfoGpuNode *node;
cl_command_queue cmd_queue;
cl_mem output_mem;
node = UFO_GPU_NODE (ufo_task_node_get_proc_node (UFO_TASK_NODE (task)));
cmd_queue = ufo_gpu_node_get_cmd_queue (node);
output_mem = ufo_buffer_get_device_array (output, cmd_queue);
UFO_RESOURCES_CHECK_CLERR (clEnqueueCopyBuffer (cmd_queue,
input, output_mem,
0, 0,
ufo_buffer_get_size (output),
0, NULL, NULL));
}
static void
reduce (UfoTask *task,
cl_kernel op_kernel,
cl_kernel sum_kernel,
UfoBuffer *input,
cl_mem output,
cl_mem param)
{
UfoMeasureTaskPrivate *priv;
UfoGpuNode *node;
UfoProfiler *profiler;
UfoRequisition requisition;
cl_command_queue cmd_queue;
cl_kernel kernel;
cl_ulong num_groups, real_size;
cl_int pixels_per_thread, input_width, other_dim, real_shape[2], result_width;
cl_mem in_mem;
size_t exec_shape[2];
gint axis;
priv = UFO_MEASURE_TASK_GET_PRIVATE (task);
node = UFO_GPU_NODE (ufo_task_node_get_proc_node (UFO_TASK_NODE (task)));
cmd_queue = ufo_gpu_node_get_cmd_queue (node);
profiler = ufo_task_node_get_profiler (UFO_TASK_NODE (task));
in_mem = ufo_buffer_get_device_array (input, cmd_queue);
ufo_buffer_get_requisition (input, &requisition);
/* Balance the load and process multiple times until the global reduction
* result is stored in the first pixel. One work item in the kernel
* processes more pixels (global work size is thus less than the input
* size). At the same time, we try to have many groups in order to have good
* occupancy. */
kernel = op_kernel;
num_groups = priv->axis == -1 ? requisition.dims[0] * requisition.dims[1] : requisition.dims[priv->axis];
input_width = requisition.dims[0];
if (priv->axis < 0) {
/* From now on if the axis is -1, treat the computation as horizontal */
axis = 0;
other_dim = 1;
} else {
axis = priv->axis;
other_dim = requisition.dims[1 - axis];
}
while (TRUE) {
/* Real shape is the previous number of groups and the other dimension */
real_shape[axis] = num_groups;
real_shape[1 - axis] = other_dim;
/* Make sure global work size divides the local work size */
/* Number of groups processing real size data */
num_groups = (real_shape[axis] - 1) / priv->local_shape[axis] + 1;
pixels_per_thread = MAX (32, next_power_of_two (sqrt (num_groups)));
num_groups = (num_groups - 1) / pixels_per_thread + 1;
exec_shape[axis] = num_groups * priv->local_shape[axis];
/* Make *exec_shape* dividable by *local_shape* */
exec_shape[1 - axis] = (real_shape[1 - axis] - 1) / priv->local_shape[1 - axis] + 1;
exec_shape[1 - axis] *= priv->local_shape[1 - axis];
/* Result width is only needed for the horizontal kernel and in that
* case it is the number of groups */
result_width = num_groups;
g_debug ("Measure real size: (%d, %d) global size: (%lu, %lu) in_width: %d res_width: %d G: %lu PPT: %d",
real_shape[0], real_shape[1], exec_shape[0], exec_shape[1], input_width, result_width,
num_groups, pixels_per_thread);
UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (kernel, 0, sizeof (cl_mem), &in_mem));
UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (kernel, 1, sizeof (cl_mem), &output));
UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (kernel, 2, sizeof (cl_mem), ¶m));
UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (kernel, 3, priv->local_shape[0] * priv->local_shape[1] *
sizeof (cl_float), NULL));
if (priv->axis < 0) {
real_size = real_shape[0];
UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (kernel, 4, sizeof (cl_ulong), &real_size));
UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (kernel, 5, sizeof (cl_int), &pixels_per_thread));
ufo_profiler_call (profiler, cmd_queue, kernel, 1, exec_shape, priv->local_shape);
} else {
/* We need real shape because the global kernel dimensions need to
* be able to divide the chosen local shape, then we need to know
* input width because input can be either the original image or an
* intermediate result from the second iteration on. Result width is
* the number of horizontal groups in case of horizontal reduction
* and changes every iteration. */
UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (kernel, 4, sizeof (cl_int), &real_shape[0]));
UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (kernel, 5, sizeof (cl_int), &real_shape[1]));
UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (kernel, 6, sizeof (cl_int), &input_width));
UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (kernel, 7, sizeof (cl_int), &result_width));
UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (kernel, 8, sizeof (cl_int), &pixels_per_thread));
ufo_profiler_call (profiler, cmd_queue, kernel, 2, exec_shape, priv->local_shape);
}
if (num_groups == 1) {
break;
}
/* Result becomes input, we need only the summing kernel from now on and
* we need to adjust the input_width as well. */
in_mem = output;
kernel = sum_kernel;
if (priv->axis == 0 && input_width == (gint) requisition.dims[0]) {
input_width = result_width;
}
}
}
static void
execute_postproc_kernel (UfoTask *task,
cl_kernel kernel,
cl_mem mem,
cl_mem param_mem,
cl_float param_scalar)
{
UfoGpuNode *node;
UfoProfiler *profiler;
cl_command_queue cmd_queue;
gsize global_work_size;
node = UFO_GPU_NODE (ufo_task_node_get_proc_node (UFO_TASK_NODE (task)));
cmd_queue = ufo_gpu_node_get_cmd_queue (node);
profiler = ufo_task_node_get_profiler (UFO_TASK_NODE (task));
UFO_RESOURCES_CHECK_CLERR (clGetMemObjectInfo (mem, CL_MEM_SIZE, sizeof (size_t),
&global_work_size, NULL));
global_work_size /= sizeof (cl_float);
if (!param_mem) {
/* If *param_scalar* is NULL, the parameter is not needed. Assign the
* output memory as a placeholder. */
param_mem = mem;
}
UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (kernel, 0, sizeof (cl_mem), &mem));
UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (kernel, 1, sizeof (cl_mem), ¶m_mem));
UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (kernel, 2, sizeof (cl_float), ¶m_scalar));
ufo_profiler_call (profiler, cmd_queue, kernel, 1, &global_work_size, NULL);
}
/* Generic reduction. Data is reduced based on reduction operation defined by
* *metric*, *sum_kernel* defines how to proceed with values in the shared
* memory. For trivial reductions, like min or sum, this is the same operation.
* For reductions which apply some operation this is typically sum.
* *reduction_param_mem* is the parameter which is applied during the reduction
* process (e.g. mean sutraction by var). *postproc_param_mem* is the parameter
* memory used for postprocessing (e.g. var by kurtosis).
*/
static void
_compute_default (UfoTask *task,
UfoBuffer *input,
UfoBuffer *output,
Metric metric,
cl_kernel sum_kernel,
cl_mem reduction_param_mem,
cl_mem postproc_param_mem)
{
UfoMeasureTaskPrivate *priv = UFO_MEASURE_TASK_GET_PRIVATE (task);
if (!priv->mems[metric]) {
priv->mems[metric] = create_output (priv, input);
}
if (!reduction_param_mem) {
/* param not needed, use the result memory for uniform computations. */
reduction_param_mem = priv->mems[metric];
}
if (!postproc_param_mem) {
/* Same for the postprocessing parameter */
postproc_param_mem = priv->mems[metric];
}
if (!sum_kernel) {
sum_kernel = priv->kernels[metric];
}
reduce (task, priv->kernels[metric], sum_kernel, input, priv->mems[metric], reduction_param_mem);
if (priv->postproc_kernels[metric]) {
execute_postproc_kernel (task, priv->postproc_kernels[metric], priv->mems[metric],
postproc_param_mem, 1.0f / get_input_size (priv, input));
}
if (output) {
copy_result (task, priv->mems[metric], output);
}
}
static void
compute_default (UfoTask *task,
UfoBuffer *input,
UfoBuffer *output)
{
UfoMeasureTaskPrivate *priv = UFO_MEASURE_TASK_GET_PRIVATE (task);
_compute_default (task, input, output, priv->metric, NULL, NULL, NULL);
}
static void
compute_variance (UfoTask *task,
UfoBuffer *input,
UfoBuffer *output)
{
UfoMeasureTaskPrivate *priv = UFO_MEASURE_TASK_GET_PRIVATE (task);
_compute_default (task, input, NULL, M_MEAN, NULL, NULL, NULL);
_compute_default (task, input, output, M_VAR, priv->kernels[M_SUM], priv->mems[M_MEAN], NULL);
}
static void
compute_std (UfoTask *task,
UfoBuffer *input,
UfoBuffer *output)
{
UfoMeasureTaskPrivate *priv = UFO_MEASURE_TASK_GET_PRIVATE (task);
_compute_default (task, input, NULL, M_MEAN, NULL, NULL, NULL);
_compute_default (task, input, output, M_STD, priv->kernels[M_SUM], priv->mems[M_MEAN], NULL);
}
/* Skew and kurtosis both use priv->metric for postprocessing, thus the correct
* function is invoked and there is no need to have separate functions for both
* metrics
*/
static void
compute_skew_kurtosis (UfoTask *task,
UfoBuffer *input,
UfoBuffer *output)
{
UfoMeasureTaskPrivate *priv = UFO_MEASURE_TASK_GET_PRIVATE (task);
compute_variance (task, input, NULL);
_compute_default (task, input, output, priv->metric, priv->kernels[M_SUM],
priv->mems[M_MEAN], priv->mems[M_VAR]);
}
static gboolean
ufo_measure_task_process (UfoTask *task,
UfoBuffer **inputs,
UfoBuffer *output,
UfoRequisition *requisition)
{
UfoMeasureTaskPrivate *priv;
typedef void (*MetricFunc) (UfoTask *, UfoBuffer *, UfoBuffer *);
MetricFunc metric_func[] = {compute_default, compute_default, compute_default,
compute_default, compute_variance, compute_std,
compute_skew_kurtosis, compute_skew_kurtosis};
priv = UFO_MEASURE_TASK_GET_PRIVATE (task);
metric_func[priv->metric] (task, inputs[0], output);
return TRUE;
}
static void
ufo_measure_task_set_property (GObject *object,
guint property_id,
const GValue *value,
GParamSpec *pspec)
{
UfoMeasureTaskPrivate *priv;
priv = UFO_MEASURE_TASK_GET_PRIVATE (object);
switch (property_id) {
case PROP_AXIS:
priv->axis = g_value_get_int (value);
break;
case PROP_METRIC:
priv->metric = g_value_get_enum (value);
break;
default:
G_OBJECT_WARN_INVALID_PROPERTY_ID (object, property_id, pspec);
break;
}
}
static void
ufo_measure_task_get_property (GObject *object,
guint property_id,
GValue *value,
GParamSpec *pspec)
{
UfoMeasureTaskPrivate *priv = UFO_MEASURE_TASK_GET_PRIVATE (object);
switch (property_id) {
case PROP_AXIS:
g_value_set_int (value, priv->axis);
break;
case PROP_METRIC:
g_value_set_enum (value, priv->metric);
break;
default:
G_OBJECT_WARN_INVALID_PROPERTY_ID (object, property_id, pspec);
break;
}
}
static void
ufo_measure_task_finalize (GObject *object)
{
UfoMeasureTaskPrivate *priv = UFO_MEASURE_TASK_GET_PRIVATE (object);
gint i;
for (i = 0; i < M_LAST; i++) {
if (priv->kernels[i]) {
UFO_RESOURCES_CHECK_CLERR (clReleaseKernel (priv->kernels[i]));
priv->kernels[i] = NULL;
}
if (priv->postproc_kernels[i]) {
UFO_RESOURCES_CHECK_CLERR (clReleaseKernel (priv->postproc_kernels[i]));
priv->postproc_kernels[i] = NULL;
}
if (priv->mems[i]) {
UFO_RESOURCES_CHECK_CLERR (clReleaseMemObject (priv->mems[i]));
priv->mems[i] = NULL;
}
}
if (priv->context) {
UFO_RESOURCES_CHECK_CLERR (clReleaseContext (priv->context));
priv->context = NULL;
}
G_OBJECT_CLASS (ufo_measure_task_parent_class)->finalize (object);
}
static void
ufo_task_interface_init (UfoTaskIface *iface)
{
iface->setup = ufo_measure_task_setup;
iface->get_num_inputs = ufo_measure_task_get_num_inputs;
iface->get_num_dimensions = ufo_measure_task_get_num_dimensions;
iface->get_mode = ufo_measure_task_get_mode;
iface->get_requisition = ufo_measure_task_get_requisition;
iface->process = ufo_measure_task_process;
}
static void
ufo_measure_task_class_init (UfoMeasureTaskClass *klass)
{
GObjectClass *oclass = G_OBJECT_CLASS (klass);
oclass->set_property = ufo_measure_task_set_property;
oclass->get_property = ufo_measure_task_get_property;
oclass->finalize = ufo_measure_task_finalize;
properties[PROP_METRIC] =
g_param_spec_enum ("metric",
"Metric (min, max, sum, mean, var, std, skew, kurtosis)",
"Metric (min, max, sum, mean, var, std, skew, kurtosis)",
g_enum_register_static ("ufo_measure_metric", metric_values),
M_STD, G_PARAM_READWRITE);
properties[PROP_AXIS] =
g_param_spec_int ("axis",
"Along which axis to measure (-1, all)",
"Along which axis to measure (-1, all)",
-1, UFO_BUFFER_MAX_NDIMS, -1,
G_PARAM_READWRITE);
for (guint i = PROP_0 + 1; i < N_PROPERTIES; i++)
g_object_class_install_property (oclass, i, properties[i]);
g_type_class_add_private (oclass, sizeof(UfoMeasureTaskPrivate));
}
static void
ufo_measure_task_init(UfoMeasureTask *self)
{
self->priv = UFO_MEASURE_TASK_GET_PRIVATE(self);
self->priv->axis = -1;
self->priv->metric = M_STD;
}