/*
* Copyright (C) 2011-2015 Karlsruhe Institute of Technology
*
* This file is part of Ufo.
*
* This library is free software: you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation, either
* version 3 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library. If not, see .
*/
#ifdef __APPLE__
#include
#else
#include
#endif
#include
#include "ufo-gemm-task.h"
struct _UfoGemmTaskPrivate {
gfloat alpha;
gfloat beta;
gsize m;
gsize k;
gsize n;
gboolean error;
};
static void ufo_task_interface_init (UfoTaskIface *iface);
G_DEFINE_TYPE_WITH_CODE (UfoGemmTask, ufo_gemm_task, UFO_TYPE_TASK_NODE,
G_IMPLEMENT_INTERFACE (UFO_TYPE_TASK,
ufo_task_interface_init))
#define UFO_GEMM_TASK_GET_PRIVATE(obj) (G_TYPE_INSTANCE_GET_PRIVATE((obj), UFO_TYPE_GEMM_TASK, UfoGemmTaskPrivate))
enum {
PROP_0,
PROP_ALPHA,
PROP_BETA,
N_PROPERTIES
};
static GParamSpec *properties[N_PROPERTIES] = { NULL, };
UfoNode *
ufo_gemm_task_new (void)
{
return UFO_NODE (g_object_new (UFO_TYPE_GEMM_TASK, NULL));
}
static void
ufo_gemm_task_setup (UfoTask *task,
UfoResources *resources,
GError **error)
{
}
static void
ufo_gemm_task_get_requisition (UfoTask *task,
UfoBuffer **inputs,
UfoRequisition *requisition,
GError **error)
{
UfoGemmTaskPrivate *priv;
UfoRequisition r_A;
UfoRequisition r_B;
UfoRequisition r_C;
priv = UFO_GEMM_TASK_GET_PRIVATE (task);
priv->error = FALSE;
ufo_buffer_get_requisition (inputs[0], &r_A);
ufo_buffer_get_requisition (inputs[1], &r_B);
ufo_buffer_get_requisition (inputs[2], &r_C);
if (r_B.dims[0] != r_A.dims[1]) {
g_set_error (error, UFO_TASK_ERROR, UFO_TASK_ERROR_GET_REQUISITION,
"A = <%zu, %zu> not compatible with B = <%zu, %zu>",
r_A.dims[0], r_A.dims[1], r_B.dims[0], r_B.dims[0]);
priv->error = TRUE;
}
if ((r_C.dims[0] != r_A.dims[0]) || (r_C.dims[1] != r_B.dims[1])) {
g_set_error (error, UFO_TASK_ERROR, UFO_TASK_ERROR_GET_REQUISITION,
"C = <%zu, %zu> not compatible with A = <%zu, %zu> and B = <%zu, %zu>",
r_C.dims[0], r_C.dims[1], r_A.dims[0], r_A.dims[1], r_B.dims[0], r_B.dims[1]);
priv->error = TRUE;
}
priv->m = r_A.dims[0];
priv->k = r_A.dims[1];
priv->n = r_B.dims[1];
requisition->n_dims = 2;
requisition->dims[0] = priv->m;
requisition->dims[1] = priv->n;
}
static guint
ufo_gemm_task_get_num_inputs (UfoTask *task)
{
return 3;
}
static guint
ufo_gemm_task_get_num_dimensions (UfoTask *task,
guint input)
{
return 2;
}
static UfoTaskMode
ufo_gemm_task_get_mode (UfoTask *task)
{
return UFO_TASK_MODE_PROCESSOR | UFO_TASK_MODE_GPU;
}
static gboolean
ufo_gemm_task_process (UfoTask *task,
UfoBuffer **inputs,
UfoBuffer *output,
UfoRequisition *requisition)
{
UfoGemmTaskPrivate *priv;
UfoGpuNode *node;
cl_command_queue cmd_queue;
cl_mem a_mem;
cl_mem b_mem;
cl_mem c_mem;
cl_event event;
CLBlastStatusCode code;
priv = UFO_GEMM_TASK_GET_PRIVATE (task);
node = UFO_GPU_NODE (ufo_task_node_get_proc_node (UFO_TASK_NODE (task)));
cmd_queue = ufo_gpu_node_get_cmd_queue (node);
a_mem = ufo_buffer_get_device_array (inputs[0], cmd_queue);
b_mem = ufo_buffer_get_device_array (inputs[1], cmd_queue);
c_mem = ufo_buffer_get_device_array (inputs[2], cmd_queue);
if (priv->error)
return FALSE;
code = CLBlastSgemm (CLBlastLayoutRowMajor, CLBlastTransposeNo, CLBlastTransposeNo,
priv->m, priv->n, priv->k,
priv->alpha,
a_mem, 0, priv->m,
b_mem, 0, priv->k,
priv->beta,
c_mem, 0, priv->m,
&cmd_queue, &event);
if (code > CLBlastNotImplemented)
UFO_RESOURCES_CHECK_CLERR (code);
if (code == CLBlastSuccess) {
cl_mem out_mem;
cl_event copy_event;
out_mem = ufo_buffer_get_device_array (output, cmd_queue);
UFO_RESOURCES_CHECK_CLERR (clEnqueueCopyBuffer (cmd_queue, c_mem, out_mem, 0, 0, ufo_buffer_get_size (output), 1, &event, ©_event));
UFO_RESOURCES_CHECK_CLERR (clWaitForEvents (1, ©_event));
UFO_RESOURCES_CHECK_CLERR (clReleaseEvent (event));
UFO_RESOURCES_CHECK_CLERR (clReleaseEvent (copy_event));
}
return TRUE;
}
static void
ufo_gemm_task_set_property (GObject *object,
guint property_id,
const GValue *value,
GParamSpec *pspec)
{
UfoGemmTaskPrivate *priv = UFO_GEMM_TASK_GET_PRIVATE (object);
switch (property_id) {
case PROP_ALPHA:
priv->alpha = g_value_get_float (value);
break;
case PROP_BETA:
priv->beta = g_value_get_float (value);
break;
default:
G_OBJECT_WARN_INVALID_PROPERTY_ID (object, property_id, pspec);
break;
}
}
static void
ufo_gemm_task_get_property (GObject *object,
guint property_id,
GValue *value,
GParamSpec *pspec)
{
UfoGemmTaskPrivate *priv = UFO_GEMM_TASK_GET_PRIVATE (object);
switch (property_id) {
case PROP_ALPHA:
g_value_set_float (value, priv->alpha);
break;
case PROP_BETA:
g_value_set_float (value, priv->beta);
break;
default:
G_OBJECT_WARN_INVALID_PROPERTY_ID (object, property_id, pspec);
break;
}
}
static void
ufo_gemm_task_finalize (GObject *object)
{
G_OBJECT_CLASS (ufo_gemm_task_parent_class)->finalize (object);
}
static void
ufo_task_interface_init (UfoTaskIface *iface)
{
iface->setup = ufo_gemm_task_setup;
iface->get_num_inputs = ufo_gemm_task_get_num_inputs;
iface->get_num_dimensions = ufo_gemm_task_get_num_dimensions;
iface->get_mode = ufo_gemm_task_get_mode;
iface->get_requisition = ufo_gemm_task_get_requisition;
iface->process = ufo_gemm_task_process;
}
static void
ufo_gemm_task_class_init (UfoGemmTaskClass *klass)
{
GObjectClass *oclass = G_OBJECT_CLASS (klass);
oclass->set_property = ufo_gemm_task_set_property;
oclass->get_property = ufo_gemm_task_get_property;
oclass->finalize = ufo_gemm_task_finalize;
properties[PROP_ALPHA] =
g_param_spec_float ("alpha",
"Scalar GEMM alpha value",
"Scalar GEMM alpha value",
-G_MAXFLOAT, G_MAXFLOAT, 1.0f,
G_PARAM_READWRITE);
properties[PROP_BETA] =
g_param_spec_float ("beta",
"Scalar GEMM beta value",
"Scalar GEMM beta value",
-G_MAXFLOAT, G_MAXFLOAT, 0.0f,
G_PARAM_READWRITE);
for (guint i = PROP_0 + 1; i < N_PROPERTIES; i++)
g_object_class_install_property (oclass, i, properties[i]);
g_type_class_add_private (oclass, sizeof(UfoGemmTaskPrivate));
}
static void
ufo_gemm_task_init(UfoGemmTask *self)
{
self->priv = UFO_GEMM_TASK_GET_PRIVATE(self);
self->priv->alpha = 1.0f;
self->priv->beta = 0.0f;
}