/*
* Copyright (C) 2011-2015 Karlsruhe Institute of Technology
*
* This file is part of Ufo.
*
* This library is free software: you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation, either
* version 3 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library. If not, see .
*
* Authored by: Alexandre Lewkowicz (lewkow_a@epita.fr)
*/
#include "config.h"
#ifdef __APPLE__
# include
#else
# include
#endif
#include
#include "ufo-fftmult-task.h"
#include "ufo-priv.h"
struct _UfoFftmultTaskPrivate {
cl_kernel k_fftmult;
UfoResources *resources;
};
static void ufo_task_interface_init (UfoTaskIface *iface);
G_DEFINE_TYPE_WITH_CODE (UfoFftmultTask, ufo_fftmult_task, UFO_TYPE_TASK_NODE,
G_IMPLEMENT_INTERFACE (UFO_TYPE_TASK,
ufo_task_interface_init))
#define UFO_FFTMULT_TASK_GET_PRIVATE(obj) (G_TYPE_INSTANCE_GET_PRIVATE((obj), UFO_TYPE_FFTMULT_TASK, UfoFftmultTaskPrivate))
UfoNode *
ufo_fftmult_task_new (void)
{
return UFO_NODE (g_object_new (UFO_TYPE_FFTMULT_TASK, NULL));
}
static void
ufo_fftmult_task_setup (UfoTask *task,
UfoResources *resources,
GError **error)
{
UfoFftmultTaskPrivate *priv;
priv = UFO_FFTMULT_TASK_GET_PRIVATE (task);
priv->resources = resources;
priv->k_fftmult = ufo_resources_get_kernel (resources, "fftmult.cl", "mult", NULL, error);
if (priv->k_fftmult != NULL)
UFO_RESOURCES_CHECK_SET_AND_RETURN (clRetainKernel (priv->k_fftmult), error);
}
static void
ufo_fftmult_task_get_requisition (UfoTask *task,
UfoBuffer **inputs,
UfoRequisition *requisition,
GError **error)
{
ufo_buffer_get_requisition(inputs[1], requisition);
}
static guint
ufo_fftmult_task_get_num_inputs (UfoTask *task)
{
return 2;
}
static guint
ufo_fftmult_task_get_num_dimensions (UfoTask *task,
guint input)
{
return 2;
}
static UfoTaskMode
ufo_fftmult_task_get_mode (UfoTask *task)
{
return UFO_TASK_MODE_PROCESSOR | UFO_TASK_MODE_GPU;
}
static void
get_ring_metadata(UfoBuffer *src, unsigned *number_ones, unsigned *radius)
{
GValue *value;
value = ufo_buffer_get_metadata(src, "radius");
*radius = g_value_get_uint(value);
value = ufo_buffer_get_metadata(src, "number_ones");
*number_ones = g_value_get_uint(value);
}
static void
get_max_work_group_size (UfoResources *resources, size_t *x_worker_count,
size_t * y_worker_count)
{
*x_worker_count = G_MAXSIZE;
GList *devices = ufo_resources_get_devices (resources);
GList *it;
g_list_for (devices, it) {
cl_device_id device = (cl_device_id) it->data;
size_t byte_count = 0;
size_t max_group_size = 0;
clGetDeviceInfo (device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof (size_t),
&max_group_size, &byte_count);
g_assert (sizeof (size_t) == byte_count);
if (max_group_size < *x_worker_count)
*x_worker_count = max_group_size;
}
*x_worker_count = (unsigned) sqrtf((float)*x_worker_count);
*y_worker_count = *x_worker_count;
}
static void
launch_kernel_2D(UfoFftmultTaskPrivate *priv,
UfoBuffer *ufo_a, UfoBuffer *ufo_b,
UfoBuffer *ufo_dst, cl_command_queue cmd_queue)
{
cl_kernel kernel = priv->k_fftmult;
cl_mem a, b, dst;
UfoRequisition requisition;
size_t global_work_size[2];
size_t local_work_size[2];
dst = ufo_buffer_get_device_array(ufo_dst, cmd_queue);
a = ufo_buffer_get_device_array(ufo_a, cmd_queue);
b = ufo_buffer_get_device_array(ufo_b, cmd_queue);
UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (kernel, 0, sizeof (cl_mem), &a));
UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (kernel, 1, sizeof (cl_mem), &b));
UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (kernel, 2, sizeof (cl_mem), &dst));
/* Launch the kernel over 2D grid, using dst requisition which reperesents a
* crop of the image */
ufo_buffer_get_requisition (ufo_dst, &requisition);
global_work_size[0] = requisition.dims[0] / 2;
/* Buffer may have mod extra rows, don't take them into account */
g_assert(requisition.dims[0] % 2 == 0 && "FFT images are multiples of 2\n");
global_work_size[1] = requisition.dims[1];
size_t y_worker_count, x_worker_count;
get_max_work_group_size(priv->resources, &x_worker_count, &y_worker_count);
while (global_work_size[1] % y_worker_count)
--y_worker_count;
while (global_work_size[0] % x_worker_count)
--x_worker_count;
local_work_size[0] = x_worker_count; /* Multiple of image_width=1080 */
local_work_size[1] = y_worker_count; /* Multiple of image_height=1280 */
UFO_RESOURCES_CHECK_CLERR (clEnqueueNDRangeKernel (cmd_queue,
kernel,
2, NULL, global_work_size,
local_work_size,
0, NULL, NULL));
}
static gboolean
ufo_fftmult_task_process (UfoTask *task,
UfoBuffer **inputs,
UfoBuffer *output,
UfoRequisition *requisition)
{
UfoFftmultTaskPrivate *priv;
/* Forwarding ring radius metada to next plugin */
unsigned radius, number_ones;
get_ring_metadata(inputs[1], &number_ones, &radius);
UfoGpuNode *node;
cl_command_queue cmd_queue;
node = UFO_GPU_NODE (ufo_task_node_get_proc_node (UFO_TASK_NODE (task)));
cmd_queue = ufo_gpu_node_get_cmd_queue (node);
priv = UFO_FFTMULT_TASK_GET_PRIVATE (task);
launch_kernel_2D (priv, inputs[0], inputs[1], output, cmd_queue);
return TRUE;
}
static void
ufo_fftmult_task_finalize (GObject *object)
{
G_OBJECT_CLASS (ufo_fftmult_task_parent_class)->finalize (object);
}
static void
ufo_task_interface_init (UfoTaskIface *iface)
{
iface->setup = ufo_fftmult_task_setup;
iface->get_num_inputs = ufo_fftmult_task_get_num_inputs;
iface->get_num_dimensions = ufo_fftmult_task_get_num_dimensions;
iface->get_mode = ufo_fftmult_task_get_mode;
iface->get_requisition = ufo_fftmult_task_get_requisition;
iface->process = ufo_fftmult_task_process;
}
static void
ufo_fftmult_task_class_init (UfoFftmultTaskClass *klass)
{
GObjectClass *oclass = G_OBJECT_CLASS (klass);
oclass->finalize = ufo_fftmult_task_finalize;
g_type_class_add_private (oclass, sizeof(UfoFftmultTaskPrivate));
}
static void
ufo_fftmult_task_init(UfoFftmultTask *self)
{
self->priv = UFO_FFTMULT_TASK_GET_PRIVATE(self);
}