/* * Copyright (C) 2011-2014 Karlsruhe Institute of Technology * * This file is part of Ufo. * * This library is free software: you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation, either * version 3 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . */ #include "config.h" #ifdef __APPLE__ #include #else #include #endif #include "ufo-transpose-task.h" #define PIXELS_PER_THREAD 4 struct _UfoTransposeTaskPrivate { cl_kernel kernel; }; static void ufo_task_interface_init (UfoTaskIface *iface); G_DEFINE_TYPE_WITH_CODE (UfoTransposeTask, ufo_transpose_task, UFO_TYPE_TASK_NODE, G_IMPLEMENT_INTERFACE (UFO_TYPE_TASK, ufo_task_interface_init)) #define UFO_TRANSPOSE_TASK_GET_PRIVATE(obj) (G_TYPE_INSTANCE_GET_PRIVATE((obj), UFO_TYPE_TRANSPOSE_TASK, UfoTransposeTaskPrivate)) UfoNode * ufo_transpose_task_new (void) { return UFO_NODE (g_object_new (UFO_TYPE_TRANSPOSE_TASK, NULL)); } static void ufo_transpose_task_setup (UfoTask *task, UfoResources *resources, GError **error) { UfoTransposeTaskPrivate *priv = UFO_TRANSPOSE_TASK_GET_PRIVATE (task); priv->kernel = ufo_resources_get_kernel (resources, "transpose.cl", "transpose_shared", NULL, error); if (priv->kernel) UFO_RESOURCES_CHECK_SET_AND_RETURN (clRetainKernel (priv->kernel), error); } static void ufo_transpose_task_get_requisition (UfoTask *task, UfoBuffer **inputs, UfoRequisition *requisition, GError **error) { UfoRequisition in_req; ufo_buffer_get_requisition (inputs[0], &in_req); requisition->n_dims = 2; requisition->dims[0] = in_req.dims[1]; requisition->dims[1] = in_req.dims[0]; } static guint ufo_transpose_task_get_num_inputs (UfoTask *task) { return 1; } static guint ufo_transpose_task_get_num_dimensions (UfoTask *task, guint input) { g_return_val_if_fail (input == 0, 0); return 2; } static UfoTaskMode ufo_transpose_task_get_mode (UfoTask *task) { return UFO_TASK_MODE_PROCESSOR | UFO_TASK_MODE_GPU; } static gboolean ufo_transpose_task_process (UfoTask *task, UfoBuffer **inputs, UfoBuffer *output, UfoRequisition *requisition) { UfoTransposeTaskPrivate *priv; UfoGpuNode *node; UfoProfiler *profiler; UfoRequisition in_req; cl_command_queue cmd_queue; cl_mem in_mem; cl_mem out_mem; cl_int width, height; GValue *work_group_size_gvalue; size_t work_group_size; gsize global_size[2]; gsize local_size[2] = {32, 32 / PIXELS_PER_THREAD}; priv = UFO_TRANSPOSE_TASK_GET_PRIVATE (task); node = UFO_GPU_NODE (ufo_task_node_get_proc_node (UFO_TASK_NODE (task))); cmd_queue = ufo_gpu_node_get_cmd_queue (node); profiler = ufo_task_node_get_profiler (UFO_TASK_NODE (task)); in_mem = ufo_buffer_get_device_array (inputs[0], cmd_queue); out_mem = ufo_buffer_get_device_array (output, cmd_queue); ufo_buffer_get_requisition (inputs[0], &in_req); width = (cl_int) in_req.dims[0]; height = (cl_int) in_req.dims[1]; work_group_size_gvalue = ufo_gpu_node_get_info (node, UFO_GPU_NODE_INFO_MAX_WORK_GROUP_SIZE); work_group_size = g_value_get_ulong (work_group_size_gvalue); g_value_unset (work_group_size_gvalue); while (local_size[0] * local_size[1] > work_group_size) { local_size[0] /= 2; local_size[1] /= 2; } global_size[0] = ((in_req.dims[0] - 1) / local_size[0] + 1) * local_size[0]; global_size[1] = ((in_req.dims[1] - 1) / local_size[1] / PIXELS_PER_THREAD + 1) * local_size[1]; g_debug ("Image size: %lu x %lu", in_req.dims[0], in_req.dims[1]); g_debug ("Transpose global work group size: %lu x %lu", global_size[0], global_size[1]); g_debug ("Transpose local work group size: %lu x %lu", local_size[0], local_size[1]); UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->kernel, 0, sizeof (cl_mem), &in_mem)); UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->kernel, 1, sizeof (cl_mem), &out_mem)); /* (local_size[0] + 1) to prevent shared memory bank conflicts */ UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->kernel, 2, (local_size[0] + 1) * local_size[1] * PIXELS_PER_THREAD * sizeof (cl_float), NULL)); UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->kernel, 3, sizeof (cl_int), &width)); UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->kernel, 4, sizeof (cl_int), &height)); ufo_profiler_call (profiler, cmd_queue, priv->kernel, 2, global_size, local_size); return TRUE; } static void ufo_transpose_task_finalize (GObject *object) { UfoTransposeTaskPrivate *priv = UFO_TRANSPOSE_TASK_GET_PRIVATE (object); if (priv->kernel) { UFO_RESOURCES_CHECK_CLERR (clReleaseKernel (priv->kernel)); priv->kernel = NULL; } G_OBJECT_CLASS (ufo_transpose_task_parent_class)->finalize (object); } static void ufo_task_interface_init (UfoTaskIface *iface) { iface->setup = ufo_transpose_task_setup; iface->get_num_inputs = ufo_transpose_task_get_num_inputs; iface->get_num_dimensions = ufo_transpose_task_get_num_dimensions; iface->get_mode = ufo_transpose_task_get_mode; iface->get_requisition = ufo_transpose_task_get_requisition; iface->process = ufo_transpose_task_process; } static void ufo_transpose_task_class_init (UfoTransposeTaskClass *klass) { GObjectClass *oclass = G_OBJECT_CLASS (klass); oclass->finalize = ufo_transpose_task_finalize; g_type_class_add_private (oclass, sizeof(UfoTransposeTaskPrivate)); } static void ufo_transpose_task_init(UfoTransposeTask *self) { }