/* * Copyright (C) 2016 Karlsruhe Institute of Technology * * This file is part of Ufo. * * This library is free software: you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation, either * version 3 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . */ #include "config.h" #ifdef __APPLE__ #include #else #include #endif #include "ufo-segment-task.h" #define MAX_SEGMENTS 16 #define MAX_LABELS 32768 /* * Data structures * --------------- * * accumulator: stores the number of hits for the current segment. * * bitmap: stores if the pixel belongs to a segment. * * * Algorithm * --------- * * 1. For each segment from the label field we generate arrays of labels, * essentially the position. * * 2. For each segment * * 2.1. we use the label array to call the `walk` kernel which starts random * walks from each label position. Going to a new position increases the * accumulator array at that position. * * 2.2. we use the accumulator to fill out the bitmap. One entry contains 32 * successive bits (MSB) denoting if the corresponding pixel belongs to that * particular segment. To avoid too many buffers we use a linearized array, * i.e. it has four dimensions: width x height x slices x segment. * * 3. For the current slice, we look up the segment bitmaps and store the * segment in the output. * */ typedef struct { int x; int y; } Label; typedef struct { Label *labels; guint num_labels; } SegmentLabels; struct _UfoSegmentTaskPrivate { cl_context context; cl_kernel walk; cl_kernel render; cl_kernel threshold; cl_mem bitmap; cl_mem label_map; guint num_slices; guint num_segments; guint current; }; static void ufo_task_interface_init (UfoTaskIface *iface); G_DEFINE_TYPE_WITH_CODE (UfoSegmentTask, ufo_segment_task, UFO_TYPE_TASK_NODE, G_IMPLEMENT_INTERFACE (UFO_TYPE_TASK, ufo_task_interface_init)) #define UFO_SEGMENT_TASK_GET_PRIVATE(obj) (G_TYPE_INSTANCE_GET_PRIVATE((obj), UFO_TYPE_SEGMENT_TASK, UfoSegmentTaskPrivate)) UfoNode * ufo_segment_task_new (void) { return UFO_NODE (g_object_new (UFO_TYPE_SEGMENT_TASK, NULL)); } static void ufo_segment_task_setup (UfoTask *task, UfoResources *resources, GError **error) { UfoSegmentTaskPrivate *priv; priv = UFO_SEGMENT_TASK_GET_PRIVATE (task); priv->context = ufo_resources_get_context (resources); priv->walk = ufo_resources_get_kernel (resources, "segment.cl", "walk", NULL, error); priv->render = ufo_resources_get_kernel (resources, "segment.cl", "render", NULL, error); priv->threshold = ufo_resources_get_kernel (resources, "segment.cl", "threshold", NULL, error); UFO_RESOURCES_CHECK_SET_AND_RETURN (clRetainContext (priv->context), error); if (priv->walk != NULL) UFO_RESOURCES_CHECK_SET_AND_RETURN (clRetainKernel (priv->walk), error); if (priv->render != NULL) UFO_RESOURCES_CHECK_SET_AND_RETURN (clRetainKernel (priv->render), error); if (priv->threshold != NULL) UFO_RESOURCES_CHECK_SET_AND_RETURN (clRetainKernel (priv->threshold), error); } static void ufo_segment_task_get_requisition (UfoTask *task, UfoBuffer **inputs, UfoRequisition *requisition, GError **error) { UfoRequisition label_req; ufo_buffer_get_requisition (inputs[0], requisition); requisition->n_dims = 2; /* ensure inputs match */ ufo_buffer_get_requisition (inputs[1], &label_req); if ((label_req.dims[0] != requisition->dims[0]) || (label_req.dims[1] != requisition->dims[1])) { g_set_error (error, UFO_TASK_ERROR, UFO_TASK_ERROR_GET_REQUISITION, "Label field and input dimensions do not match ([%zu, %zu] != [%zu, %zu])", label_req.dims[0], label_req.dims[1], requisition->dims[0], requisition->dims[1]); } } static guint ufo_segment_task_get_num_inputs (UfoTask *task) { return 2; } static guint ufo_segment_task_get_num_dimensions (UfoTask *task, guint input) { if (input == 0) return 3; return 2; } static UfoTaskMode ufo_segment_task_get_mode (UfoTask *task) { return UFO_TASK_MODE_REDUCTOR | UFO_TASK_MODE_GPU; } static SegmentLabels * extract_labels (UfoBuffer *buffer) { UfoRequisition requisition; gfloat *data; SegmentLabels *result; guint width; guint height; ufo_buffer_get_requisition (buffer, &requisition); data = ufo_buffer_get_host_array (buffer, NULL); result = g_malloc0 (sizeof (SegmentLabels) * MAX_SEGMENTS); width = requisition.dims[0]; height = requisition.dims[1]; for (guint x = 0; x < width; x++) { for (guint y = 0; y < height; y++) { gint v = (gint) data[y * width + x] - 1; if (v >= 0 && v < MAX_SEGMENTS) { guint n; if (result[v].labels == NULL) { result[v].labels = g_malloc0 (sizeof (Label) * MAX_LABELS); result[v].num_labels = 0; } n = result[v].num_labels; if (n < MAX_LABELS) { result[v].labels[n].x = x; result[v].labels[n].y = y; result[v].num_labels++; } } } } return result; } static gboolean ufo_segment_task_process (UfoTask *task, UfoBuffer **inputs, UfoBuffer *output, UfoRequisition *requisition) { UfoSegmentTaskPrivate *priv; UfoRequisition in_req; UfoGpuNode *node; UfoProfiler *profiler; cl_command_queue cmd_queue; SegmentLabels *prelabeled; gfloat *random_host; cl_mem random_device; cl_mem prelabeled_device; cl_mem accumulator; cl_mem slices; cl_int error; guint width; guint height; guint segment; guint label_map_host[MAX_LABELS]; gsize work_size[3]; guint16 fill_pattern = 0; priv = UFO_SEGMENT_TASK_GET_PRIVATE (task); node = UFO_GPU_NODE (ufo_task_node_get_proc_node (UFO_TASK_NODE (task))); cmd_queue = ufo_gpu_node_get_cmd_queue (node); /* extract labels */ prelabeled = extract_labels (inputs[1]); priv->num_segments = 0; for (guint s = 0; s < MAX_SEGMENTS; s++) { if (prelabeled[s].num_labels > 0) priv->num_segments++; } /* create uniformly distributed data */ random_host = g_malloc0 (32768 * sizeof (gfloat)); for (guint i = 0; i < 32768; i++) random_host[i] = (gfloat) g_random_double (); random_device = clCreateBuffer (priv->context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, 32768 * sizeof (gfloat), random_host, &error); UFO_RESOURCES_CHECK_CLERR (error); ufo_buffer_get_requisition (inputs[0], &in_req); width = in_req.dims[0]; height = in_req.dims[1]; priv->current = in_req.dims[2]; priv->num_slices = in_req.dims[2]; profiler = ufo_task_node_get_profiler (UFO_TASK_NODE (task)); slices = ufo_buffer_get_device_array (inputs[0], cmd_queue); /* create bitmap memory */ priv->bitmap = clCreateBuffer (priv->context, CL_MEM_READ_WRITE, sizeof (guint32) * width / 32 * height * priv->num_slices * priv->num_segments, NULL, &error); UFO_RESOURCES_CHECK_CLERR (error); /* create and initialize accumulator memory */ accumulator = clCreateBuffer (priv->context, CL_MEM_READ_WRITE, sizeof (guint16) * width * height * priv->num_slices, NULL, &error); UFO_RESOURCES_CHECK_CLERR (error); prelabeled_device = clCreateBuffer (priv->context, CL_MEM_READ_ONLY, MAX_LABELS * sizeof (Label), NULL, &error); UFO_RESOURCES_CHECK_CLERR (error); segment = 0; for (guint s = 0; s < MAX_SEGMENTS; s++) { if (prelabeled[s].num_labels == 0) continue; /* upload labels for this segment */ UFO_RESOURCES_CHECK_CLERR (clEnqueueWriteBuffer (cmd_queue, prelabeled_device, CL_TRUE, 0, prelabeled[s].num_labels * sizeof (Label), prelabeled[s].labels, 0, NULL, NULL)); /* reset accumulator to zero */ UFO_RESOURCES_CHECK_CLERR (clEnqueueFillBuffer (cmd_queue, accumulator, &fill_pattern, sizeof (fill_pattern), 0, width * height * priv->num_slices * sizeof (guint16), 0, NULL, NULL)); /* start random walk */ UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->walk, 0, sizeof (cl_mem), &slices)); UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->walk, 1, sizeof (cl_mem), &accumulator)); UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->walk, 2, sizeof (cl_mem), &prelabeled_device)); UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->walk, 3, sizeof (guint), &width)); UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->walk, 4, sizeof (guint), &height)); UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->walk, 5, sizeof (guint), &priv->num_slices)); UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->walk, 6, sizeof (cl_mem), &random_device)); work_size[0] = prelabeled[s].num_labels; ufo_profiler_call (profiler, cmd_queue, priv->walk, 1, work_size, NULL); /* threshold and record hit in bitmap */ UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->threshold, 0, sizeof (cl_mem), &accumulator)); UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->threshold, 1, sizeof (cl_mem), &priv->bitmap)); UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->threshold, 2, sizeof (guint), &segment)); work_size[0] = width / 32; work_size[1] = height; work_size[2] = priv->num_slices; ufo_profiler_call (profiler, cmd_queue, priv->threshold, 3, work_size, NULL); label_map_host[segment] = s + 1; segment++; } UFO_RESOURCES_CHECK_CLERR (clReleaseMemObject (accumulator)); UFO_RESOURCES_CHECK_CLERR (clReleaseMemObject (prelabeled_device)); UFO_RESOURCES_CHECK_CLERR (clReleaseMemObject (random_device)); priv->label_map = clCreateBuffer (priv->context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, MAX_LABELS * sizeof (guint), label_map_host, &error); UFO_RESOURCES_CHECK_CLERR (error); for (guint s = 0; s < MAX_SEGMENTS; s++) g_free (prelabeled[s].labels); g_free (prelabeled); g_free (random_host); return TRUE; } static gboolean ufo_segment_task_generate (UfoTask *task, UfoBuffer *output, UfoRequisition *requisition) { UfoSegmentTaskPrivate *priv; UfoProfiler *profiler; UfoGpuNode *node; cl_command_queue cmd_queue; cl_mem out_mem; guint slice; gsize work_size[3]; priv = UFO_SEGMENT_TASK_GET_PRIVATE (task); if (priv->current == 0) { UFO_RESOURCES_CHECK_CLERR (clReleaseMemObject (priv->bitmap)); UFO_RESOURCES_CHECK_CLERR (clReleaseMemObject (priv->label_map)); return FALSE; } node = UFO_GPU_NODE (ufo_task_node_get_proc_node (UFO_TASK_NODE (task))); cmd_queue = ufo_gpu_node_get_cmd_queue (node); out_mem = ufo_buffer_get_device_array (output, cmd_queue); slice = priv->num_slices - priv->current; UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->render, 0, sizeof (cl_mem), &priv->bitmap)); UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->render, 1, sizeof (cl_mem), &out_mem)); UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->render, 2, sizeof (cl_mem), &priv->label_map)); UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->render, 3, sizeof (guint), &slice)); UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->render, 4, sizeof (guint), &priv->num_segments)); UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->render, 5, sizeof (guint), &priv->num_slices)); work_size[0] = requisition->dims[0] / 32; work_size[1] = requisition->dims[1]; profiler = ufo_task_node_get_profiler (UFO_TASK_NODE (task)); ufo_profiler_call (profiler, cmd_queue, priv->render, 2, work_size, NULL); priv->current--; return TRUE; } static void ufo_segment_task_finalize (GObject *object) { UfoSegmentTaskPrivate *priv; priv = UFO_SEGMENT_TASK_GET_PRIVATE (object); if (priv->walk != NULL) UFO_RESOURCES_CHECK_CLERR (clReleaseKernel (priv->walk)); if (priv->render != NULL) UFO_RESOURCES_CHECK_CLERR (clReleaseKernel (priv->render)); if (priv->threshold != NULL) UFO_RESOURCES_CHECK_CLERR (clReleaseKernel (priv->threshold)); if (priv->context) { UFO_RESOURCES_CHECK_CLERR (clReleaseContext (priv->context)); priv->context = NULL; } G_OBJECT_CLASS (ufo_segment_task_parent_class)->finalize (object); } static void ufo_task_interface_init (UfoTaskIface *iface) { iface->setup = ufo_segment_task_setup; iface->get_num_inputs = ufo_segment_task_get_num_inputs; iface->get_num_dimensions = ufo_segment_task_get_num_dimensions; iface->get_mode = ufo_segment_task_get_mode; iface->get_requisition = ufo_segment_task_get_requisition; iface->process = ufo_segment_task_process; iface->generate = ufo_segment_task_generate; } static void ufo_segment_task_class_init (UfoSegmentTaskClass *klass) { GObjectClass *oclass = G_OBJECT_CLASS (klass); oclass->finalize = ufo_segment_task_finalize; g_type_class_add_private (oclass, sizeof(UfoSegmentTaskPrivate)); } static void ufo_segment_task_init(UfoSegmentTask *self) { self->priv = UFO_SEGMENT_TASK_GET_PRIVATE(self); }