/* * Copyright (C) 2011-2013 Karlsruhe Institute of Technology * * This file is part of Ufo. * * This library is free software: you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation, either * version 3 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . */ #include "config.h" #ifdef __APPLE__ # include #else # include #endif #include #include #include "ufo-ordfilt-task.h" #include "ufo-priv.h" struct _UfoOrdfiltTaskPrivate { cl_kernel k_bitonic_ordfilt; cl_kernel k_load_elements_from_patern; size_t max_alloc_size; gpointer context; }; static void ufo_task_interface_init (UfoTaskIface *iface); G_DEFINE_TYPE_WITH_CODE (UfoOrdfiltTask, ufo_ordfilt_task, UFO_TYPE_TASK_NODE, G_IMPLEMENT_INTERFACE (UFO_TYPE_TASK, ufo_task_interface_init)) #define UFO_ORDFILT_TASK_GET_PRIVATE(obj) (G_TYPE_INSTANCE_GET_PRIVATE((obj), UFO_TYPE_ORDFILT_TASK, UfoOrdfiltTaskPrivate)) UfoNode * ufo_ordfilt_task_new (void) { return UFO_NODE (g_object_new (UFO_TYPE_ORDFILT_TASK, NULL)); } static void get_max_alloc_size (UfoResources *resources, UfoOrdfiltTaskPrivate *priv) { priv->max_alloc_size = G_MAXSIZE; GList *devices = ufo_resources_get_devices (resources); GList *it; g_list_for (devices, it) { cl_device_id device = (cl_device_id) it->data; size_t byte_count = 0; size_t max_alloc_size = 0; clGetDeviceInfo (device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof (size_t), &max_alloc_size, &byte_count); g_assert (sizeof (size_t) == byte_count); if (max_alloc_size < priv->max_alloc_size) priv->max_alloc_size = max_alloc_size; } } static void ufo_ordfilt_task_setup (UfoTask *task, UfoResources *resources, GError **error) { UfoOrdfiltTaskPrivate *priv; priv = UFO_ORDFILT_TASK_GET_PRIVATE (task); priv->context = ufo_resources_get_context (resources); get_max_alloc_size (resources, priv); priv->k_bitonic_ordfilt = ufo_resources_get_kernel (resources, "ordfilt.cl", "bitonic_ordfilt", NULL, error); if (priv->k_bitonic_ordfilt != NULL) UFO_RESOURCES_CHECK_CLERR (clRetainKernel (priv->k_bitonic_ordfilt)); priv->k_load_elements_from_patern = ufo_resources_get_kernel (resources, "ordfilt.cl", "load_elements_from_pattern", NULL, error); if (priv->k_load_elements_from_patern != NULL) UFO_RESOURCES_CHECK_CLERR (clRetainKernel (priv->k_load_elements_from_patern)); } static void ufo_ordfilt_task_get_requisition (UfoTask *task, UfoBuffer **inputs, UfoRequisition *requisition, GError **error) { ufo_buffer_get_requisition (inputs[0], requisition); } static guint ufo_ordfilt_task_get_num_inputs (UfoTask *task) { return 2; } static guint ufo_ordfilt_task_get_num_dimensions (UfoTask *task, guint input) { return 2; } static UfoTaskMode ufo_ordfilt_task_get_mode (UfoTask *task) { return UFO_TASK_MODE_PROCESSOR | UFO_TASK_MODE_GPU; } /* K is ordfilt kernel */ static void launch_kernel_1D (cl_kernel k, UfoBuffer *ufo_src, UfoBuffer *ufo_dst, cl_command_queue cmd_queue, size_t num_elements, unsigned idx_offset, unsigned mod) { cl_mem dst; cl_mem src; UfoRequisition requisition; size_t global_work_size[1]; size_t local_work_size[1]; dst = ufo_buffer_get_device_array (ufo_dst, cmd_queue); src = ufo_buffer_get_device_array (ufo_src, cmd_queue); /* Power of 2 above num_elements */ size_t array_length = (size_t) ceil_power_of_two (num_elements); cl_float low_threshold = 0.25; cl_float high_threshold = 0.50; UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (k, 0, sizeof (cl_mem), &src)); UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (k, 1, sizeof (cl_mem), &dst)); UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (k, 2, sizeof (cl_int), &num_elements)); UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (k, 3, sizeof (cl_int), &array_length)); UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (k, 4, sizeof (cl_float), &low_threshold)); UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (k, 5, sizeof (cl_float), &high_threshold)); UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (k, 6, sizeof (cl_float) * array_length, NULL)); UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (k, 7, sizeof (cl_uint), &idx_offset)); /* Launch the kernel over 1D grid */ ufo_buffer_get_requisition(ufo_src, &requisition); /* buffer may have mod extra rows -> so dont do extra work */ global_work_size[0] = requisition.dims[0] * (requisition.dims[1] - mod) * (array_length / 2); /* Power of two below number of elements to sort */ local_work_size[0] = array_length / 2; UFO_RESOURCES_CHECK_CLERR (clEnqueueNDRangeKernel (cmd_queue, k, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL)); } static void launch_kernel_2D(cl_kernel kernel, UfoBuffer *ufo_src, UfoBuffer *ufo_pattern, UfoBuffer *ufo_dst, cl_command_queue cmd_queue, size_t dimension, size_t num_ones, unsigned height, unsigned y_offset, unsigned mod) { cl_mem dst; cl_mem src; cl_mem pattern; UfoRequisition requisition; size_t global_work_size[2]; size_t local_work_size[2]; dst = ufo_buffer_get_device_array(ufo_dst, cmd_queue); src = ufo_buffer_get_device_array(ufo_src, cmd_queue); pattern = ufo_buffer_get_device_array(ufo_pattern, cmd_queue); UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (kernel, 0, sizeof (cl_mem), &src)); UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (kernel, 1, sizeof (cl_mem), &dst)); UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (kernel, 2, sizeof (cl_mem), &pattern)); UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (kernel, 3, sizeof (cl_int), &dimension)); UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (kernel, 4, sizeof (cl_int), &num_ones)); UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (kernel, 5, sizeof (cl_uint), &height)); UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (kernel, 6, sizeof (cl_uint), &y_offset)); /* Launch the kernel over 2D grid, using dst requisition which reperesents a * crop of the image */ ufo_buffer_get_requisition (ufo_dst, &requisition); global_work_size[0] = requisition.dims[0]; /* Buffer may have mod extra rows, don't take them into account */ global_work_size[1] = requisition.dims[1] - mod; unsigned y_worker_count = 32; unsigned x_worker_count = 32; while (global_work_size[1] % y_worker_count) --y_worker_count; while (global_work_size[0] % x_worker_count) --x_worker_count; local_work_size[0] = x_worker_count; /* Multiple of image_width=1080 */ local_work_size[1] = y_worker_count; /* Multiple of image_height=1280 */ UFO_RESOURCES_CHECK_CLERR (clEnqueueNDRangeKernel (cmd_queue, kernel, 2, NULL, global_work_size, local_work_size, 0, NULL, NULL)); } static void get_ring_metadata(UfoBuffer *pattern, unsigned *number_ones, unsigned *radius) { GValue *value; value = ufo_buffer_get_metadata(pattern, "number_ones"); *number_ones = g_value_get_uint(value); value = ufo_buffer_get_metadata(pattern, "radius"); *radius = g_value_get_uint(value); } static void compute_ordfilt (UfoOrdfiltTaskPrivate *priv, UfoBuffer *src, UfoBuffer *pattern, UfoBuffer *dst, cl_command_queue cmd_queue) { UfoRequisition image_requisition; UfoRequisition pattern_requisition; ufo_buffer_get_requisition(src, &image_requisition); ufo_buffer_get_requisition(pattern, &pattern_requisition); unsigned number_ones; unsigned radius; get_ring_metadata(pattern, &number_ones, &radius); unsigned height = (unsigned) image_requisition.dims[1]; unsigned width = (unsigned) image_requisition.dims[0]; /* Tells us in how many chunks to chop image */ unsigned iter_count = (unsigned) (1 + sizeof (float) * height * width * number_ones / (priv->max_alloc_size + 1)); unsigned y_offset = 0; /* On first iteration process mod rows more, this is needed when the height * of image is not divisible by iter_count */ unsigned mod = (unsigned) (image_requisition.dims[1] % iter_count); UfoRequisition requisition = { .n_dims = 3, .dims[0] = image_requisition.dims[0], .dims[1] = image_requisition.dims[1] / iter_count + mod, .dims[2] = number_ones, }; UfoBuffer *ufo_buffer = ufo_buffer_new(&requisition, priv->context); /* loads surrounding number_ones pixels of each pixel in image. * Result in buffer */ launch_kernel_2D (priv->k_load_elements_from_patern, src, pattern, ufo_buffer, cmd_queue, pattern_requisition.dims[0], number_ones, height, y_offset, 0); /* Create image of threshold telling how likely a pixel is a center of a * ring */ launch_kernel_1D (priv->k_bitonic_ordfilt, ufo_buffer, dst, cmd_queue, number_ones, width * y_offset, 0); for (unsigned iter = 0; iter < iter_count; ++iter) { /* start at mod offset, since first iteration manipulated * iter * height + mod rows */ y_offset = mod + iter * (height - mod) / iter_count; launch_kernel_2D (priv->k_load_elements_from_patern, src, pattern, ufo_buffer, cmd_queue, pattern_requisition.dims[0], number_ones, height, y_offset, mod); launch_kernel_1D (priv->k_bitonic_ordfilt, ufo_buffer, dst, cmd_queue, number_ones, width * y_offset, mod); } g_object_unref(ufo_buffer); } static gboolean ufo_ordfilt_task_process (UfoTask *task, UfoBuffer **inputs, UfoBuffer *output, UfoRequisition *requisition) { UfoOrdfiltTaskPrivate *priv; UfoGpuNode *node; cl_command_queue cmd_queue; priv = UFO_ORDFILT_TASK_GET_PRIVATE (task); node = UFO_GPU_NODE (ufo_task_node_get_proc_node (UFO_TASK_NODE (task))); cmd_queue = ufo_gpu_node_get_cmd_queue (node); compute_ordfilt (priv, inputs[0], inputs[1], output, cmd_queue); return TRUE; } static void ufo_ordfilt_task_finalize (GObject *object) { G_OBJECT_CLASS (ufo_ordfilt_task_parent_class)->finalize (object); } static void ufo_task_interface_init (UfoTaskIface *iface) { iface->setup = ufo_ordfilt_task_setup; iface->get_num_inputs = ufo_ordfilt_task_get_num_inputs; iface->get_num_dimensions = ufo_ordfilt_task_get_num_dimensions; iface->get_mode = ufo_ordfilt_task_get_mode; iface->get_requisition = ufo_ordfilt_task_get_requisition; iface->process = ufo_ordfilt_task_process; } static void ufo_ordfilt_task_class_init (UfoOrdfiltTaskClass *klass) { GObjectClass *gobject_class = G_OBJECT_CLASS (klass); gobject_class->finalize = ufo_ordfilt_task_finalize; g_type_class_add_private (gobject_class, sizeof(UfoOrdfiltTaskPrivate)); } static void ufo_ordfilt_task_init(UfoOrdfiltTask *self) { self->priv = UFO_ORDFILT_TASK_GET_PRIVATE(self); }