/*
* Copyright (C) 2011-2017 Karlsruhe Institute of Technology
*
* This file is part of Ufo.
*
* This library is free software: you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation, either
* version 3 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library. If not, see .
*/
#include "config.h"
#include
#ifdef __APPLE__
#include
#else
#include
#endif
#include "ufo-stitch-task.h"
/* Number of input pixels processed by one work item in the parallel sum */
#define GLOBAL_SUM_HEIGHT 128
struct _UfoStitchTaskPrivate {
gboolean adjust_mean;
gboolean blend;
gint shift;
gint overlap;
cl_context context;
cl_kernel kernel, sum_kernel, pad_kernel;
cl_mem sum_mem;
};
static void ufo_task_interface_init (UfoTaskIface *iface);
G_DEFINE_TYPE_WITH_CODE (UfoStitchTask, ufo_stitch_task, UFO_TYPE_TASK_NODE,
G_IMPLEMENT_INTERFACE (UFO_TYPE_TASK,
ufo_task_interface_init))
#define UFO_STITCH_TASK_GET_PRIVATE(obj) (G_TYPE_INSTANCE_GET_PRIVATE((obj), UFO_TYPE_STITCH_TASK, UfoStitchTaskPrivate))
enum {
PROP_0,
PROP_SHIFT,
PROP_ADJUST_MEAN,
PROP_BLEND,
N_PROPERTIES
};
static GParamSpec *properties[N_PROPERTIES] = { NULL, };
UfoNode *
ufo_stitch_task_new (void)
{
return UFO_NODE (g_object_new (UFO_TYPE_STITCH_TASK, NULL));
}
static void
ufo_stitch_task_setup (UfoTask *task,
UfoResources *resources,
GError **error)
{
UfoStitchTaskPrivate *priv;
priv = UFO_STITCH_TASK_GET_PRIVATE (task);
priv->context = ufo_resources_get_context (resources);
priv->kernel = ufo_resources_get_kernel (resources, "interpolator.cl", "interpolate_horizontally", NULL, error);
priv->sum_kernel = ufo_resources_get_kernel (resources, "reductor.cl", "parallel_sum_2D", NULL, error);
priv->pad_kernel = ufo_resources_get_kernel (resources, "pad.cl", "pad_with_image", NULL, error);
UFO_RESOURCES_CHECK_SET_AND_RETURN (clRetainContext (priv->context), error);
if (priv->kernel != NULL)
UFO_RESOURCES_CHECK_SET_AND_RETURN (clRetainKernel (priv->kernel), error);
if (priv->sum_kernel != NULL)
UFO_RESOURCES_CHECK_SET_AND_RETURN (clRetainKernel (priv->sum_kernel), error);
if (priv->pad_kernel != NULL)
UFO_RESOURCES_CHECK_SET_AND_RETURN (clRetainKernel (priv->pad_kernel), error);
priv->sum_mem = NULL;
}
static void
ufo_stitch_task_get_requisition (UfoTask *task,
UfoBuffer **inputs,
UfoRequisition *requisition,
GError **error)
{
UfoStitchTaskPrivate *priv;
UfoRequisition left_req, right_req;
priv = UFO_STITCH_TASK_GET_PRIVATE (task);
ufo_buffer_get_requisition (inputs[0], &left_req);
ufo_buffer_get_requisition (inputs[1], &right_req);
if (left_req.dims[1] != right_req.dims[1]) {
g_set_error_literal (error, UFO_TASK_ERROR, UFO_TASK_ERROR_GET_REQUISITION,
"Both stitch inputs must have the same height");
return;
}
priv->overlap = (priv->shift >= 0) ? left_req.dims[0] - priv->shift : right_req.dims[0] + priv->shift;
requisition->n_dims = 2;
requisition->dims[0] = left_req.dims[0] + right_req.dims[0] - (gsize) priv->overlap;
requisition->dims[1] = left_req.dims[1];
}
static guint
ufo_stitch_task_get_num_inputs (UfoTask *task)
{
return 2;
}
static guint
ufo_stitch_task_get_num_dimensions (UfoTask *task,
guint input)
{
return 2;
}
static UfoTaskMode
ufo_stitch_task_get_mode (UfoTask *task)
{
return UFO_TASK_MODE_PROCESSOR | UFO_TASK_MODE_GPU;
}
static gfloat
compute_mean (UfoStitchTaskPrivate *priv,
UfoProfiler *profiler,
cl_command_queue cmd_queue,
cl_mem input,
gint width,
gint height,
gsize work_group_size,
gint offset)
{
gfloat mean = 0.0f, *summed_blocks;
gsize global_work_size[2], num_blocks, i;
gint groups_per_roi_width;
cl_int cl_error;
groups_per_roi_width = (priv->overlap + work_group_size - 1) / work_group_size;
global_work_size[0] = groups_per_roi_width * work_group_size;
global_work_size[1] = height / GLOBAL_SUM_HEIGHT;
/* Number of output points (every work group produces 1 output value) */
num_blocks = global_work_size[0] * global_work_size[1] / work_group_size,
summed_blocks = (gfloat *) g_malloc (num_blocks * sizeof (gfloat));
if (!priv->sum_mem) {
priv->sum_mem = clCreateBuffer (priv->context,
CL_MEM_WRITE_ONLY,
num_blocks * sizeof (float),
NULL,
&cl_error);
UFO_RESOURCES_CHECK_CLERR (cl_error);
}
UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->sum_kernel, 0, sizeof (cl_mem), &input));
UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->sum_kernel, 1, sizeof (cl_mem), &priv->sum_mem));
UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->sum_kernel, 2, sizeof (float) * work_group_size, NULL));
UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->sum_kernel, 3, sizeof (gint), &offset));
UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->sum_kernel, 4, sizeof (gint), &width));
UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->sum_kernel, 5, sizeof (gint), &priv->overlap));
UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->sum_kernel, 6, sizeof (gint), &height));
ufo_profiler_call (profiler, cmd_queue, priv->sum_kernel, 2, global_work_size, NULL);
UFO_RESOURCES_CHECK_CLERR (clEnqueueReadBuffer (cmd_queue,
priv->sum_mem,
CL_TRUE,
0,
num_blocks * sizeof (float),
(void *) summed_blocks,
0, NULL, NULL));
for (i = 0; i < num_blocks; i++)
mean += summed_blocks[i];
return mean / priv->overlap * height;
}
static gboolean
ufo_stitch_task_process (UfoTask *task,
UfoBuffer **inputs,
UfoBuffer *output,
UfoRequisition *requisition)
{
UfoStitchTaskPrivate *priv;
UfoProfiler *profiler;
UfoGpuNode *node;
UfoRequisition left_req, right_req;
cl_command_queue cmd_queue;
cl_mem left_mem;
cl_mem right_mem;
cl_mem out_mem;
gint left, right, left_width, right_width, width, height, offset;
gsize work_group_size;
gsize global_work_size[2];
size_t src_origin[3] = {0, 0, 0};
size_t dst_origin[3] = {0, 0, 0};
size_t region[3] = {0, 0, 1};
size_t left_row_pitch;
gfloat mean_left, mean_right, weight = 1.0f;
priv = UFO_STITCH_TASK_GET_PRIVATE (task);
/* If the shift is negative, it is the same as exchanging the left and right
* image and changing the shift sign. */
left = priv->shift >= 0 ? 0 : 1;
right = priv->shift >= 0 ? 1 : 0;
node = UFO_GPU_NODE (ufo_task_node_get_proc_node (UFO_TASK_NODE (task)));
profiler = ufo_task_node_get_profiler (UFO_TASK_NODE (task));
work_group_size = g_value_get_ulong (ufo_gpu_node_get_info (node, UFO_GPU_NODE_INFO_MAX_WORK_GROUP_SIZE));
cmd_queue = ufo_gpu_node_get_cmd_queue (node);
left_mem = ufo_buffer_get_device_array (inputs[left], cmd_queue);
right_mem = ufo_buffer_get_device_array (inputs[right], cmd_queue);
out_mem = ufo_buffer_get_device_array (output, cmd_queue);
ufo_buffer_get_requisition (inputs[left], &left_req);
ufo_buffer_get_requisition (inputs[right], &right_req);
left_width = (gint) left_req.dims[0];
right_width = (gint) right_req.dims[0];
width = (gint) requisition->dims[0];
height = (gint) requisition->dims[1];
left_row_pitch = left_req.dims[0] * sizeof (float);
offset = left_req.dims[0] - priv->overlap;
if (priv->adjust_mean && priv->overlap) {
/* Means of the overlapping region should match for both images to have
* a nice transition, so compute the means and from that a weight which
* will be used to adjust the right image in order to match the left
* one. */
mean_left = compute_mean (priv, profiler, cmd_queue, left_mem,
left_width, height, work_group_size, offset);
mean_right = compute_mean (priv, profiler, cmd_queue, right_mem,
right_width, height, work_group_size, 0);
weight = mean_left / mean_right;
}
/* Copy the left projection into the stitched one */
region[0] = priv->blend ? (left_req.dims[0] - priv->overlap) * sizeof (float): left_row_pitch;
region[1] = left_req.dims[1];
UFO_RESOURCES_CHECK_CLERR (clEnqueueCopyBufferRect (cmd_queue,
left_mem, out_mem,
src_origin, dst_origin, region,
left_row_pitch, 0,
requisition->dims[0] * sizeof (float), 0,
0, NULL, NULL));
if (priv->shift) {
/* Copy the weighted right projection into the stitched one */
UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->pad_kernel, 0, sizeof (cl_mem), &right_mem));
UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->pad_kernel, 1, sizeof (cl_mem), &out_mem));
UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->pad_kernel, 2, sizeof (gint), &priv->overlap));
UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->pad_kernel, 3, sizeof (gint), &left_width));
UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->pad_kernel, 4, sizeof (gint), &right_width));
UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->pad_kernel, 5, sizeof (gint), &width));
UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->pad_kernel, 6, sizeof (gfloat), &weight));
global_work_size[0] = right_width - priv->overlap;
global_work_size[1] = height;
ufo_profiler_call (profiler, cmd_queue, priv->pad_kernel, 2, global_work_size, NULL);
}
if (priv->blend && priv->overlap) {
/* Blend the overlapping region by linear interpolation */
global_work_size[0] = priv->overlap;
global_work_size[1] = height;
UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->kernel, 0, sizeof (cl_mem), &left_mem));
UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->kernel, 1, sizeof (cl_mem), &right_mem));
UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->kernel, 2, sizeof (cl_mem), &out_mem));
UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->kernel, 3, sizeof (gint), &width));
UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->kernel, 4, sizeof (gint), &left_width));
UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->kernel, 5, sizeof (gint), &right_width));
UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->kernel, 6, sizeof (gint), &offset));
UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->kernel, 7, sizeof (gfloat), &weight));
ufo_profiler_call (profiler, cmd_queue, priv->kernel, 2, global_work_size, NULL);
}
return TRUE;
}
static void
ufo_stitch_task_set_property (GObject *object,
guint property_id,
const GValue *value,
GParamSpec *pspec)
{
UfoStitchTaskPrivate *priv = UFO_STITCH_TASK_GET_PRIVATE (object);
switch (property_id) {
case PROP_SHIFT:
priv->shift = g_value_get_int (value);
break;
case PROP_ADJUST_MEAN:
priv->adjust_mean = g_value_get_boolean (value);
break;
case PROP_BLEND:
priv->blend = g_value_get_boolean (value);
break;
default:
G_OBJECT_WARN_INVALID_PROPERTY_ID (object, property_id, pspec);
break;
}
}
static void
ufo_stitch_task_get_property (GObject *object,
guint property_id,
GValue *value,
GParamSpec *pspec)
{
UfoStitchTaskPrivate *priv = UFO_STITCH_TASK_GET_PRIVATE (object);
switch (property_id) {
case PROP_SHIFT:
g_value_set_int (value, priv->shift);
break;
case PROP_ADJUST_MEAN:
g_value_set_boolean (value, priv->adjust_mean);
break;
case PROP_BLEND:
g_value_set_boolean (value, priv->blend);
break;
default:
G_OBJECT_WARN_INVALID_PROPERTY_ID (object, property_id, pspec);
break;
}
}
static void
ufo_stitch_task_finalize (GObject *object)
{
UfoStitchTaskPrivate *priv;
priv = UFO_STITCH_TASK_GET_PRIVATE (object);
if (priv->kernel) {
UFO_RESOURCES_CHECK_CLERR (clReleaseKernel (priv->kernel));
priv->kernel = NULL;
}
if (priv->sum_kernel) {
UFO_RESOURCES_CHECK_CLERR (clReleaseKernel (priv->sum_kernel));
priv->sum_kernel = NULL;
}
if (priv->pad_kernel) {
UFO_RESOURCES_CHECK_CLERR (clReleaseKernel (priv->pad_kernel));
priv->pad_kernel = NULL;
}
if (priv->sum_mem) {
UFO_RESOURCES_CHECK_CLERR (clReleaseMemObject (priv->sum_mem));
priv->sum_mem = NULL;
}
if (priv->context) {
UFO_RESOURCES_CHECK_CLERR (clReleaseContext (priv->context));
priv->context = NULL;
}
G_OBJECT_CLASS (ufo_stitch_task_parent_class)->finalize (object);
}
static void
ufo_task_interface_init (UfoTaskIface *iface)
{
iface->setup = ufo_stitch_task_setup;
iface->get_num_inputs = ufo_stitch_task_get_num_inputs;
iface->get_num_dimensions = ufo_stitch_task_get_num_dimensions;
iface->get_mode = ufo_stitch_task_get_mode;
iface->get_requisition = ufo_stitch_task_get_requisition;
iface->process = ufo_stitch_task_process;
}
static void
ufo_stitch_task_class_init (UfoStitchTaskClass *klass)
{
GObjectClass *oclass = G_OBJECT_CLASS (klass);
oclass->set_property = ufo_stitch_task_set_property;
oclass->get_property = ufo_stitch_task_get_property;
oclass->finalize = ufo_stitch_task_finalize;
properties[PROP_SHIFT] =
g_param_spec_int("shift",
"If the second image is horizontally shifted by this value, the images will overlap (partially)",
"If the second image is horizontally shifted by this value, the images will overlap (partially)",
G_MININT, G_MAXINT, 0,
G_PARAM_READWRITE);
properties[PROP_ADJUST_MEAN] =
g_param_spec_boolean ("adjust-mean",
"Adjust second image's mean value based on the overlapping region",
"Adjust second image's mean value based on the overlapping region",
FALSE,
G_PARAM_READWRITE);
properties[PROP_BLEND] =
g_param_spec_boolean ("blend",
"Linearly interpolate between the first and the second image in the overlapping region",
"Linearly interpolate between the first and the second image in the overlapping region",
FALSE,
G_PARAM_READWRITE);
for (guint i = PROP_0 + 1; i < N_PROPERTIES; i++)
g_object_class_install_property (oclass, i, properties[i]);
g_type_class_add_private (oclass, sizeof(UfoStitchTaskPrivate));
}
static void
ufo_stitch_task_init(UfoStitchTask *self)
{
self->priv = UFO_STITCH_TASK_GET_PRIVATE(self);
self->priv->shift = 0;
self->priv->adjust_mean = TRUE;
self->priv->blend = FALSE;
}