/* * Copyright (C) 2011-2015 Karlsruhe Institute of Technology * * This file is part of Ufo. * * This library is free software: you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation, either * version 3 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . */ #ifdef __APPLE__ #include #else #include #endif #include #include "ufo-correlate-stacks-task.h" #define USE_GPU 0 struct _UfoCorrelateStacksTaskPrivate { guint number; gsize num_references; guint current; gboolean generated; #if USE_GPU cl_context context; cl_mem result; cl_kernel diff_kernel; cl_kernel sum_kernel; #endif }; static void ufo_task_interface_init (UfoTaskIface *iface); G_DEFINE_TYPE_WITH_CODE (UfoCorrelateStacksTask, ufo_correlate_stacks_task, UFO_TYPE_TASK_NODE, G_IMPLEMENT_INTERFACE (UFO_TYPE_TASK, ufo_task_interface_init)) #define UFO_CORRELATE_STACKS_TASK_GET_PRIVATE(obj) (G_TYPE_INSTANCE_GET_PRIVATE((obj), UFO_TYPE_CORRELATE_STACKS_TASK, UfoCorrelateStacksTaskPrivate)) enum { PROP_0, PROP_NUMBER, N_PROPERTIES }; static GParamSpec *properties[N_PROPERTIES] = { NULL, }; UfoNode * ufo_correlate_stacks_task_new (void) { return UFO_NODE (g_object_new (UFO_TYPE_CORRELATE_STACKS_TASK, NULL)); } static void ufo_correlate_stacks_task_setup (UfoTask *task, UfoResources *resources, GError **error) { UfoCorrelateStacksTaskPrivate *priv; priv = UFO_CORRELATE_STACKS_TASK_GET_PRIVATE (task); if (priv->number == 0) { g_set_error (error, UFO_TASK_ERROR, UFO_TASK_ERROR_SETUP, "::number not set"); return; } #if USE_GPU priv->diff_kernel = ufo_resources_get_kernel (resources, "correlate.cl", "diff", error); priv->sum_kernel = ufo_resources_get_kernel (resources, "correlate.cl", "sum", error); if (priv->diff_kernel != NULL) UFO_RESOURCES_CHECK_SET_AND_RETURN (clRetainKernel (priv->diff_kernel), error); if (priv->sum_kernel != NULL) UFO_RESOURCES_CHECK_SET_AND_RETURN (clRetainKernel (priv->sum_kernel), error); priv->context = ufo_resources_get_context (resources); UFO_RESOURCES_CHECK_SET_AND_RETURN (clRetainContext (priv->context), error); #endif priv->current = 0; priv->generated = FALSE; } static void ufo_correlate_stacks_task_get_requisition (UfoTask *task, UfoBuffer **inputs, UfoRequisition *requisition, GError **error) { UfoCorrelateStacksTaskPrivate *priv; UfoRequisition ref_req; priv = UFO_CORRELATE_STACKS_TASK_GET_PRIVATE (task); ufo_buffer_get_requisition (inputs[0], &ref_req); priv->num_references = ref_req.dims[2]; #if USE_GPU if (priv->result == NULL) { cl_int error; priv->result = clCreateBuffer (priv->context, CL_MEM_READ_WRITE, ufo_buffer_get_size (inputs[0]), NULL, &error); UFO_RESOURCES_CHECK_CLERR (error); } #endif /* Output is a correlation matrix with rows being input and columns * references. */ requisition->n_dims = 2; requisition->dims[0] = priv->num_references; requisition->dims[1] = priv->number; } static guint ufo_correlate_stacks_task_get_num_inputs (UfoTask *task) { return 2; } static guint ufo_correlate_stacks_task_get_num_dimensions (UfoTask *task, guint input) { if (input == 0) return 3; return 2; } static UfoTaskMode ufo_correlate_stacks_task_get_mode (UfoTask *task) { return UFO_TASK_MODE_REDUCTOR | UFO_TASK_MODE_GPU; } static gboolean ufo_correlate_stacks_task_process (UfoTask *task, UfoBuffer **inputs, UfoBuffer *output, UfoRequisition *requisition) { UfoCorrelateStacksTaskPrivate *priv; UfoRequisition refs_req; #if USE_GPU UfoProfiler *profiler; UfoGpuNode *node; cl_command_queue queue; cl_mem in_mem; cl_mem ref_mem; cl_mem matrix_mem; gsize work_size[2]; guint width; guint height; #else UfoRequisition in_req; #endif priv = UFO_CORRELATE_STACKS_TASK_GET_PRIVATE (task); if (priv->current >= priv->number) { g_warning ("Received too many inputs"); return FALSE; } ufo_buffer_get_requisition (inputs[0], &refs_req); #if USE_GPU width = refs_req.dims[0]; height = refs_req.dims[1]; node = UFO_GPU_NODE (ufo_task_node_get_proc_node (UFO_TASK_NODE (task))); queue = ufo_gpu_node_get_cmd_queue (node); ref_mem = ufo_buffer_get_device_array (inputs[0], queue); in_mem = ufo_buffer_get_device_array (inputs[1], queue); UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->diff_kernel, 0, sizeof (cl_mem), &ref_mem)); UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->diff_kernel, 1, sizeof (cl_mem), &in_mem)); UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->diff_kernel, 2, sizeof (cl_mem), &priv->result)); UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->diff_kernel, 3, sizeof (guint), &height)); work_size[0] = refs_req.dims[0]; work_size[1] = refs_req.dims[1] * refs_req.dims[2]; profiler = ufo_task_node_get_profiler (UFO_TASK_NODE (task)); ufo_profiler_call (profiler, queue, priv->diff_kernel, 2, work_size, NULL); matrix_mem = ufo_buffer_get_device_array (output, queue); UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->sum_kernel, 0, sizeof (cl_mem), &priv->result)); UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->sum_kernel, 1, sizeof (cl_mem), &matrix_mem)); UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->sum_kernel, 2, sizeof (guint), &width)); UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->sum_kernel, 3, sizeof (guint), &height)); UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->sum_kernel, 4, sizeof (guint), &priv->current)); work_size[0] = requisition->dims[0]; ufo_profiler_call (profiler, queue, priv->sum_kernel, 1, work_size, NULL); #else gfloat *refs; gfloat *in_mem; gfloat *out_mem; refs = ufo_buffer_get_host_array (inputs[0], NULL); in_mem = ufo_buffer_get_host_array (inputs[1], NULL); out_mem = ufo_buffer_get_host_array (output, NULL); ufo_buffer_get_requisition (inputs[1], &in_req); for (gsize i = 0; i < refs_req.dims[2]; i++) { gfloat *ref; gfloat sum = 0; ref = refs + i * refs_req.dims[0] * refs_req.dims[1]; for (gsize j = 0; j < in_req.dims[0] * in_req.dims[1]; j++) { sum += (ref[j] - in_mem[j]) * (ref[j] - in_mem[j]); } out_mem[i * priv->number + priv->current] = sum; } #endif priv->current++; return TRUE; } static gboolean ufo_correlate_stacks_task_generate (UfoTask *task, UfoBuffer *output, UfoRequisition *requisition) { UfoCorrelateStacksTaskPrivate *priv; priv = UFO_CORRELATE_STACKS_TASK_GET_PRIVATE (task); if (priv->generated) return FALSE; priv->generated = TRUE; return TRUE; } static void ufo_correlate_stacks_task_set_property (GObject *object, guint property_id, const GValue *value, GParamSpec *pspec) { UfoCorrelateStacksTaskPrivate *priv = UFO_CORRELATE_STACKS_TASK_GET_PRIVATE (object); switch (property_id) { case PROP_NUMBER: priv->number = g_value_get_uint (value); break; default: G_OBJECT_WARN_INVALID_PROPERTY_ID (object, property_id, pspec); break; } } static void ufo_correlate_stacks_task_get_property (GObject *object, guint property_id, GValue *value, GParamSpec *pspec) { UfoCorrelateStacksTaskPrivate *priv = UFO_CORRELATE_STACKS_TASK_GET_PRIVATE (object); switch (property_id) { case PROP_NUMBER: g_value_set_uint (value, priv->number); break; default: G_OBJECT_WARN_INVALID_PROPERTY_ID (object, property_id, pspec); break; } } static void ufo_correlate_stacks_task_finalize (GObject *object) { #if USE_GPU UfoCorrelateStacksTaskPrivate *priv; priv = UFO_CORRELATE_STACKS_TASK_GET_PRIVATE (object); if (priv->result) { UFO_RESOURCES_CHECK_CLERR (clReleaseMemObject (priv->result)); priv->result = NULL; } if (priv->diff_kernel) { UFO_RESOURCES_CHECK_CLERR (clReleaseKernel (priv->diff_kernel)); priv->diff_kernel = NULL; } if (priv->sum_kernel) { UFO_RESOURCES_CHECK_CLERR (clReleaseKernel (priv->sum_kernel)); priv->sum_kernel = NULL; } if (priv->context) { UFO_RESOURCES_CHECK_CLERR (clReleaseContext (priv->context)); priv->context = NULL; } #endif G_OBJECT_CLASS (ufo_correlate_stacks_task_parent_class)->finalize (object); } static void ufo_task_interface_init (UfoTaskIface *iface) { iface->setup = ufo_correlate_stacks_task_setup; iface->get_num_inputs = ufo_correlate_stacks_task_get_num_inputs; iface->get_num_dimensions = ufo_correlate_stacks_task_get_num_dimensions; iface->get_mode = ufo_correlate_stacks_task_get_mode; iface->get_requisition = ufo_correlate_stacks_task_get_requisition; iface->process = ufo_correlate_stacks_task_process; iface->generate = ufo_correlate_stacks_task_generate; } static void ufo_correlate_stacks_task_class_init (UfoCorrelateStacksTaskClass *klass) { GObjectClass *oclass = G_OBJECT_CLASS (klass); oclass->set_property = ufo_correlate_stacks_task_set_property; oclass->get_property = ufo_correlate_stacks_task_get_property; oclass->finalize = ufo_correlate_stacks_task_finalize; properties[PROP_NUMBER] = g_param_spec_uint ("number", "Number of input items", "Number of input items", 0, G_MAXUINT, 0, G_PARAM_READWRITE); for (guint i = PROP_0 + 1; i < N_PROPERTIES; i++) g_object_class_install_property (oclass, i, properties[i]); g_type_class_add_private (oclass, sizeof(UfoCorrelateStacksTaskPrivate)); } static void ufo_correlate_stacks_task_init(UfoCorrelateStacksTask *self) { self->priv = UFO_CORRELATE_STACKS_TASK_GET_PRIVATE(self); self->priv->number = 0; self->priv->num_references = 0; #if USE_GPU self->priv->result = NULL; self->priv->diff_kernel = NULL; self->priv->sum_kernel = NULL; self->priv->context = NULL; #endif }