/* * Copyright (C) 2017 Karlsruhe Institute of Technology * * This file is part of Ufo. * * This library is free software: you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation, either * version 3 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . */ #include "config.h" #ifdef __APPLE__ #include #else #include #endif #include "ufo-opencl-reduce-task.h" struct _UfoOpenCLReduceTaskPrivate { cl_kernel kernel; cl_kernel finish; cl_uint n_inputs; gchar *filename; gchar *kernel_name; gchar *finish_name; gchar *source; guint n_dims; gboolean generated; gboolean fold; gfloat fold_value; cl_uint counter; }; static void ufo_task_interface_init (UfoTaskIface *iface); G_DEFINE_TYPE_WITH_CODE (UfoOpenCLReduceTask, ufo_opencl_reduce_task, UFO_TYPE_TASK_NODE, G_IMPLEMENT_INTERFACE (UFO_TYPE_TASK, ufo_task_interface_init)) #define UFO_OPENCL_REDUCE_TASK_GET_PRIVATE(obj) (G_TYPE_INSTANCE_GET_PRIVATE((obj), UFO_TYPE_OPENCL_REDUCE_TASK, UfoOpenCLReduceTaskPrivate)) enum { PROP_0, PROP_FILENAME, PROP_SOURCE, PROP_KERNEL, PROP_FINISH, PROP_NUM_DIMS, PROP_FOLD_VALUE, N_PROPERTIES }; static GParamSpec *properties[N_PROPERTIES] = { NULL, }; UfoNode * ufo_opencl_reduce_task_new (void) { return UFO_NODE (g_object_new (UFO_TYPE_OPENCL_REDUCE_TASK, NULL)); } static void ufo_opencl_reduce_task_setup (UfoTask *task, UfoResources *resources, GError **error) { UfoOpenCLReduceTaskPrivate *priv; priv = UFO_OPENCL_REDUCE_TASK_GET_PRIVATE (task); if (priv->kernel_name == NULL) { g_set_error (error, UFO_TASK_ERROR, UFO_TASK_ERROR_SETUP, "Must specify a ::kernel name to use for operation"); return; } if (priv->filename != NULL && priv->source != NULL) { g_set_error (error, UFO_TASK_ERROR, UFO_TASK_ERROR_SETUP, "Cannot use ::filename and ::source at the same time"); return; } if (priv->source != NULL) { priv->kernel = ufo_resources_get_kernel_from_source (resources, priv->source, priv->kernel_name, NULL, error); if (priv->finish_name) priv->finish = ufo_resources_get_kernel_from_source (resources, priv->source, priv->finish_name, NULL, error); } else { const gchar *filename; filename = priv->filename != NULL ? priv->filename : "opencl-reduce.cl"; priv->kernel = ufo_resources_get_kernel (resources, filename, priv->kernel_name, NULL, error); if (priv->finish_name) priv->finish = ufo_resources_get_kernel (resources, filename, priv->finish_name, NULL, error); } if (priv->kernel != NULL) { cl_uint n_args; UFO_RESOURCES_CHECK_SET_AND_RETURN (clGetKernelInfo (priv->kernel, CL_KERNEL_NUM_ARGS, sizeof (cl_uint), &n_args, NULL), error); if (n_args != 2) { g_set_error (error, UFO_TASK_ERROR, UFO_TASK_ERROR_SETUP, "Kernel `%s' must accept exactly two arguments", priv->kernel_name); return; } UFO_RESOURCES_CHECK_SET_AND_RETURN (clRetainKernel (priv->kernel), error); } if (priv->finish) UFO_RESOURCES_CHECK_SET_AND_RETURN (clRetainKernel (priv->finish), error); priv->generated = FALSE; priv->counter = 0; } static void ufo_opencl_reduce_task_get_requisition (UfoTask *task, UfoBuffer **inputs, UfoRequisition *requisition, GError **error) { ufo_buffer_get_requisition (inputs[0], requisition); } static guint ufo_opencl_reduce_task_get_num_inputs (UfoTask *task) { return 1; } static guint ufo_opencl_reduce_task_get_num_dimensions (UfoTask *task, guint input) { return UFO_OPENCL_REDUCE_TASK_GET_PRIVATE (task)->n_dims; } static UfoTaskMode ufo_opencl_reduce_task_get_mode (UfoTask *task) { return UFO_TASK_MODE_REDUCTOR | UFO_TASK_MODE_GPU; } static UfoNode * ufo_opencl_reduce_task_copy_real (UfoNode *node, GError **error) { g_set_error (error, UFO_TASK_ERROR, UFO_TASK_ERROR_COPY, "Cannot be copied (please disable graph expansion or limit the used devices to 1, e.g. UFO_DEVICES=0)"); return NULL; } static gboolean ufo_opencl_reduce_task_equal_real (UfoNode *n1, UfoNode *n2) { g_return_val_if_fail (UFO_IS_OPENCL_REDUCE_TASK (n1) && UFO_IS_OPENCL_REDUCE_TASK (n2), FALSE); return UFO_OPENCL_REDUCE_TASK (n1)->priv->kernel == UFO_OPENCL_REDUCE_TASK (n2)->priv->kernel; } static gboolean ufo_opencl_reduce_task_process (UfoTask *task, UfoBuffer **inputs, UfoBuffer *output, UfoRequisition *requisition) { UfoOpenCLReduceTaskPrivate *priv; UfoGpuNode *node; UfoProfiler *profiler; cl_command_queue cmd_queue; cl_mem out_mem; cl_mem in_mem; priv = UFO_OPENCL_REDUCE_TASK_GET_PRIVATE (task); node = UFO_GPU_NODE (ufo_task_node_get_proc_node (UFO_TASK_NODE (task))); cmd_queue = ufo_gpu_node_get_cmd_queue (node); out_mem = ufo_buffer_get_device_array (output, cmd_queue); in_mem = ufo_buffer_get_device_array (inputs[0], cmd_queue); if (priv->counter == 0) { if (priv->fold) { UFO_RESOURCES_CHECK_CLERR (clEnqueueFillBuffer (cmd_queue, out_mem, &priv->fold_value, sizeof (gfloat), 0, ufo_buffer_get_size (output), 0, NULL, NULL)); } else { UFO_RESOURCES_CHECK_CLERR (clEnqueueCopyBuffer (cmd_queue, in_mem, out_mem, 0, 0, ufo_buffer_get_size (output), 0, NULL, NULL)); } } /* * We have to skip the first iteration for reduce operations because we * already copied the data into output buffer. */ if (priv->fold || priv->counter > 0) { UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->kernel, 0, sizeof (cl_mem), &in_mem)); UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->kernel, 1, sizeof (cl_mem), &out_mem)); profiler = ufo_task_node_get_profiler (UFO_TASK_NODE (task)); ufo_profiler_call (profiler, cmd_queue, priv->kernel, priv->n_dims, requisition->dims, NULL); } priv->counter++; return TRUE; } static gboolean ufo_opencl_reduce_task_generate (UfoTask *task, UfoBuffer *output, UfoRequisition *requisition) { UfoOpenCLReduceTaskPrivate *priv; priv = UFO_OPENCL_REDUCE_TASK_GET_PRIVATE (task); if (priv->generated) return FALSE; if (priv->finish) { UfoGpuNode *node; UfoProfiler *profiler; cl_command_queue cmd_queue; cl_mem out_mem; priv = UFO_OPENCL_REDUCE_TASK_GET_PRIVATE (task); node = UFO_GPU_NODE (ufo_task_node_get_proc_node (UFO_TASK_NODE (task))); cmd_queue = ufo_gpu_node_get_cmd_queue (node); out_mem = ufo_buffer_get_device_array (output, cmd_queue); UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->finish, 0, sizeof (cl_mem), &out_mem)); UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->finish, 1, sizeof (cl_uint), &priv->counter)); profiler = ufo_task_node_get_profiler (UFO_TASK_NODE (task)); ufo_profiler_call (profiler, cmd_queue, priv->finish, priv->n_dims, requisition->dims, NULL); } priv->generated = TRUE; return TRUE; } static void ufo_opencl_reduce_task_set_property (GObject *object, guint property_id, const GValue *value, GParamSpec *pspec) { UfoOpenCLReduceTaskPrivate *priv = UFO_OPENCL_REDUCE_TASK_GET_PRIVATE (object); switch (property_id) { case PROP_FILENAME: g_free (priv->filename); priv->filename = g_value_dup_string (value); break; case PROP_SOURCE: g_free (priv->source); priv->source = g_value_dup_string (value); break; case PROP_KERNEL: g_free (priv->kernel_name); priv->kernel_name = g_value_dup_string (value); break; case PROP_FINISH: g_free (priv->finish_name); priv->finish_name = g_value_dup_string (value); break; case PROP_NUM_DIMS: priv->n_dims = g_value_get_uint (value); break; case PROP_FOLD_VALUE: priv->fold = TRUE; priv->fold_value = g_value_get_float (value); break; default: G_OBJECT_WARN_INVALID_PROPERTY_ID (object, property_id, pspec); break; } } static void ufo_opencl_reduce_task_get_property (GObject *object, guint property_id, GValue *value, GParamSpec *pspec) { UfoOpenCLReduceTaskPrivate *priv = UFO_OPENCL_REDUCE_TASK_GET_PRIVATE (object); switch (property_id) { case PROP_FILENAME: g_value_set_string (value, priv->filename ? priv->filename : ""); break; case PROP_SOURCE: g_value_set_string (value, priv->source ? priv->source : ""); break; case PROP_KERNEL: g_value_set_string (value, priv->kernel_name ? priv->kernel_name : ""); break; case PROP_FINISH: g_value_set_string (value, priv->finish_name ? priv->finish_name : ""); break; case PROP_NUM_DIMS: g_value_set_uint (value, priv->n_dims); break; case PROP_FOLD_VALUE: g_value_set_float (value, priv->fold_value); break; default: G_OBJECT_WARN_INVALID_PROPERTY_ID (object, property_id, pspec); break; } } static void ufo_opencl_reduce_task_finalize (GObject *object) { UfoOpenCLReduceTaskPrivate *priv; priv = UFO_OPENCL_REDUCE_TASK_GET_PRIVATE (object); if (priv->kernel) { clReleaseKernel (priv->kernel); priv->kernel = NULL; } if (priv->finish) { clReleaseKernel (priv->finish); priv->finish = NULL; } g_free (priv->filename); g_free (priv->kernel_name); g_free (priv->finish_name); priv->filename = NULL; priv->kernel_name = NULL; priv->finish_name = NULL; G_OBJECT_CLASS (ufo_opencl_reduce_task_parent_class)->finalize (object); } static void ufo_task_interface_init (UfoTaskIface *iface) { iface->setup = ufo_opencl_reduce_task_setup; iface->get_num_inputs = ufo_opencl_reduce_task_get_num_inputs; iface->get_num_dimensions = ufo_opencl_reduce_task_get_num_dimensions; iface->get_mode = ufo_opencl_reduce_task_get_mode; iface->get_requisition = ufo_opencl_reduce_task_get_requisition; iface->process = ufo_opencl_reduce_task_process; iface->generate = ufo_opencl_reduce_task_generate; } static void ufo_opencl_reduce_task_class_init (UfoOpenCLReduceTaskClass *klass) { GObjectClass *oclass = G_OBJECT_CLASS (klass); UfoNodeClass *node_class = UFO_NODE_CLASS (klass); oclass->set_property = ufo_opencl_reduce_task_set_property; oclass->get_property = ufo_opencl_reduce_task_get_property; oclass->finalize = ufo_opencl_reduce_task_finalize; properties[PROP_FILENAME] = g_param_spec_string ("filename", "OpenCL kernel filename", "OpenCL kernel filename", "", G_PARAM_READWRITE); properties[PROP_SOURCE] = g_param_spec_string ("source", "OpenCL kernel source", "OpenCL kernel source", "", G_PARAM_READWRITE); properties[PROP_KERNEL] = g_param_spec_string ("kernel", "Kernel name", "Name of the kernel that should be computed with this task", "", G_PARAM_READWRITE); properties[PROP_FINISH] = g_param_spec_string ("finish", "Optional finish kernel name", "Name of the kernel that should be run at the end", "", G_PARAM_READWRITE); properties[PROP_FOLD_VALUE] = g_param_spec_float ("fold-value", "Initial fold value", "Initial fold value, if not set the operation is a real reduction", -G_MAXFLOAT, G_MAXFLOAT, 0.0f, G_PARAM_READWRITE); properties[PROP_NUM_DIMS] = g_param_spec_uint ("dimensions", "Number of dimensions", "Number of dimensions that the kernel works on", 1, 3, 2, G_PARAM_READWRITE); for (guint i = PROP_0 + 1; i < N_PROPERTIES; i++) g_object_class_install_property (oclass, i, properties[i]); node_class->copy = ufo_opencl_reduce_task_copy_real; node_class->equal = ufo_opencl_reduce_task_equal_real; g_type_class_add_private (oclass, sizeof(UfoOpenCLReduceTaskPrivate)); } static void ufo_opencl_reduce_task_init(UfoOpenCLReduceTask *self) { self->priv = UFO_OPENCL_REDUCE_TASK_GET_PRIVATE(self); self->priv->kernel = NULL; self->priv->finish = NULL; self->priv->filename = NULL; self->priv->kernel_name = NULL; self->priv->finish_name = NULL; self->priv->source = NULL; self->priv->n_dims = 2; self->priv->fold = FALSE; self->priv->counter = 0; }