summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGemma Fardell <47746591+gfardell@users.noreply.github.com>2020-01-23 11:52:46 +0000
committerEdoardo Pasca <edo.paskino@gmail.com>2020-01-23 11:52:46 +0000
commit894f35c9be404bc2c13f90f4a6184a545029181a (patch)
tree17938025fe60afac7006b8fb6c4e7016bf687ccd
parentfecff8ded735d309aba43d30226c0bb51386c905 (diff)
downloadframework-894f35c9be404bc2c13f90f4a6184a545029181a.tar.gz
framework-894f35c9be404bc2c13f90f4a6184a545029181a.tar.bz2
framework-894f35c9be404bc2c13f90f4a6184a545029181a.tar.xz
framework-894f35c9be404bc2c13f90f4a6184a545029181a.zip
Allows user to set number of threads used by openMP in C library grad… (#476)
* Allows user to set number of threads used by openMP in C library gradient operator. Changed to release build flags * closes #477 * added test function for c lib thread deployment * improved thread scaling for neumann algoritims * removed unnecessary thread sync * reverts omp number of threads at the end of the c function call Co-authored-by: Edoardo Pasca <edo.paskino@gmail.com>
-rw-r--r--Wrappers/Python/CMakeLists.txt2
-rw-r--r--Wrappers/Python/ccpi/optimisation/operators/GradientOperator.py41
-rw-r--r--src/Core/CMakeLists.txt1
-rw-r--r--src/Core/FiniteDifferenceLibrary.c281
4 files changed, 111 insertions, 214 deletions
diff --git a/Wrappers/Python/CMakeLists.txt b/Wrappers/Python/CMakeLists.txt
index 9104afd..0c24540 100644
--- a/Wrappers/Python/CMakeLists.txt
+++ b/Wrappers/Python/CMakeLists.txt
@@ -76,7 +76,9 @@ if (BUILD_PYTHON_WRAPPER)
)
endif()
#set (PYTHON_DEST ${CMAKE_INSTALL_PREFIX}/python/)
+
install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/build/lib/ccpi
+
DESTINATION ${PYTHON_DEST})
install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/data/ DESTINATION ${CMAKE_INSTALL_PREFIX}/share/ccpi)
#file(TOUCH ${PYTHON_DEST}/edo/__init__.py)
diff --git a/Wrappers/Python/ccpi/optimisation/operators/GradientOperator.py b/Wrappers/Python/ccpi/optimisation/operators/GradientOperator.py
index 6391cf7..a45c3d2 100644
--- a/Wrappers/Python/ccpi/optimisation/operators/GradientOperator.py
+++ b/Wrappers/Python/ccpi/optimisation/operators/GradientOperator.py
@@ -25,6 +25,11 @@ from ccpi.framework import ImageData, ImageGeometry, BlockGeometry, BlockDataCon
import numpy
import warnings
+#default nThreads
+import multiprocessing
+cpus = multiprocessing.cpu_count()
+NUM_THREADS = max(int(cpus/2),1)
+
NEUMANN = 'Neumann'
PERIODIC = 'Periodic'
C = 'c'
@@ -51,10 +56,11 @@ class Gradient(LinearOperator):
'Space' or 'SpaceChannels', defaults to 'Space'
* *backend* (``str``) --
'c' or 'numpy', defaults to 'c' if correlation is 'SpaceChannels' or channels = 1
-
-
+ * *num_threads* (``int``) --
+ If backend is 'c' specify the number of threads to use. Default is number of cpus/2
+
+
Example (2D):
-
.. math::
\nabla : X -> Y \\
@@ -85,7 +91,7 @@ class Gradient(LinearOperator):
if backend == NUMPY:
self.operator = Gradient_numpy(gm_domain, bnd_cond=bnd_cond, **kwargs)
else:
- self.operator = Gradient_C(gm_domain, bnd_cond=bnd_cond)
+ self.operator = Gradient_C(gm_domain, bnd_cond=bnd_cond, **kwargs)
def direct(self, x, out=None):
@@ -183,7 +189,7 @@ class Gradient_numpy(LinearOperator):
# Call FiniteDiff operator
self.FD = FiniteDiff(self.gm_domain, direction = 0, bnd_cond = self.bnd_cond)
-
+ print("Initialised GradientOperator with numpy backend")
def direct(self, x, out=None):
@@ -275,7 +281,8 @@ class Gradient_numpy(LinearOperator):
spMat = SparseFiniteDiff(self.gm_domain, direction=self.ind[i], bnd_cond=self.bnd_cond)
res.append(spMat.sum_abs_col())
return BlockDataContainer(*res)
-
+
+
import ctypes, platform
# check for the extension
@@ -288,14 +295,13 @@ elif platform.system() == 'Darwin':
else:
raise ValueError('Not supported platform, ', platform.system())
-#print ("dll location", dll)
-cilacc = ctypes.cdll.LoadLibrary(dll)
-#FD = ctypes.CDLL(dll)
+cilacc = ctypes.cdll.LoadLibrary(dll)
c_float_p = ctypes.POINTER(ctypes.c_float)
cilacc.openMPtest.restypes = ctypes.c_int32
+cilacc.openMPtest.argtypes = [ctypes.c_int32]
cilacc.fdiff4D.argtypes = [ctypes.POINTER(ctypes.c_float),
ctypes.POINTER(ctypes.c_float),
@@ -307,6 +313,7 @@ cilacc.fdiff4D.argtypes = [ctypes.POINTER(ctypes.c_float),
ctypes.c_long,
ctypes.c_long,
ctypes.c_int32,
+ ctypes.c_int32,
ctypes.c_int32]
cilacc.fdiff3D.argtypes = [ctypes.POINTER(ctypes.c_float),
@@ -317,6 +324,7 @@ cilacc.fdiff3D.argtypes = [ctypes.POINTER(ctypes.c_float),
ctypes.c_long,
ctypes.c_long,
ctypes.c_int32,
+ ctypes.c_int32,
ctypes.c_int32]
cilacc.fdiff2D.argtypes = [ctypes.POINTER(ctypes.c_float),
@@ -325,6 +333,7 @@ cilacc.fdiff2D.argtypes = [ctypes.POINTER(ctypes.c_float),
ctypes.c_long,
ctypes.c_long,
ctypes.c_int32,
+ ctypes.c_int32,
ctypes.c_int32]
@@ -336,10 +345,12 @@ class Gradient_C(LinearOperator):
on 2D, 3D, 4D ImageData
under Neumann/Periodic boundary conditions'''
- def __init__(self, gm_domain, gm_range=None, bnd_cond = NEUMANN):
+ def __init__(self, gm_domain, gm_range=None, bnd_cond = NEUMANN, **kwargs):
super(Gradient_C, self).__init__()
+ self.num_threads = kwargs.get('num_threads',NUM_THREADS)
+
self.gm_domain = gm_domain
self.gm_range = gm_range
@@ -362,8 +373,9 @@ class Gradient_C(LinearOperator):
self.fd = cilacc.fdiff2D
else:
raise ValueError('Number of dimensions not supported, expected 2, 3 or 4, got {}'.format(len(gm_domain.shape)))
-
-
+ #self.num_threads
+ print("Initialised GradientOperator with C backend running with ", cilacc.openMPtest(self.num_threads)," threads")
+
@staticmethod
def datacontainer_as_c_pointer(x):
ndx = x.as_array()
@@ -377,9 +389,10 @@ class Gradient_C(LinearOperator):
out = self.gm_range.allocate(None)
return_val = True
+ #pass list of all arguments
arg1 = [Gradient_C.datacontainer_as_c_pointer(out.get_item(i))[1] for i in range(self.gm_range.shape[0])]
arg2 = [el for el in x.shape]
- args = arg1 + arg2 + [self.bnd_cond, 1]
+ args = arg1 + arg2 + [self.bnd_cond, 1, self.num_threads]
self.fd(x_p, *args)
if return_val is True:
@@ -397,7 +410,7 @@ class Gradient_C(LinearOperator):
arg1 = [Gradient_C.datacontainer_as_c_pointer(x.get_item(i))[1] for i in range(self.gm_range.shape[0])]
arg2 = [el for el in out.shape]
- args = arg1 + arg2 + [self.bnd_cond, 0]
+ args = arg1 + arg2 + [self.bnd_cond, 0, self.num_threads]
self.fd(out_p, *args)
diff --git a/src/Core/CMakeLists.txt b/src/Core/CMakeLists.txt
index 741b985..e828fe5 100644
--- a/src/Core/CMakeLists.txt
+++ b/src/Core/CMakeLists.txt
@@ -1,5 +1,6 @@
set (CMAKE_C_STANDARD 11)
+set (CMAKE_BUILD_TYPE Release)
if(APPLE)
if(NOT DEFINED OPENMP_INCLUDES OR NOT DEFINED OPENMP_LIBRARIES)
diff --git a/src/Core/FiniteDifferenceLibrary.c b/src/Core/FiniteDifferenceLibrary.c
index f5736e2..fbf2646 100644
--- a/src/Core/FiniteDifferenceLibrary.c
+++ b/src/Core/FiniteDifferenceLibrary.c
@@ -2,161 +2,66 @@
#include "FiniteDifferenceLibrary.h"
-DLL_EXPORT int openMPtest(void)
+DLL_EXPORT int openMPtest(int nThreads)
{
- int nThreads;
+ omp_set_num_threads(nThreads);
+
+ int nThreads_running;
#pragma omp parallel
{
if (omp_get_thread_num() == 0)
{
- nThreads = omp_get_num_threads();
+ nThreads_running = omp_get_num_threads();
}
}
- return nThreads;
+ return nThreads_running;
}
-int fdiff_direct_neumann(const float *inimagefull, float *outimageXfull, float *outimageYfull, float *outimageZfull, float *outimageCfull, long nx, long ny, long nz, long nc)
+void threads_setup(int nThreads_requested, int *nThreads_current)
{
- size_t volume = nx * ny * nz;
-
- const float * inimage = inimagefull;
- float * outimageX = outimageXfull;
- float * outimageY = outimageYfull;
- float * outimageZ = outimageZfull;
-
- int offset1 = (nz - 1) * nx * ny; //ind to beginning of last slice
- int offset2 = offset1 + (ny - 1) * nx; //ind to beginning of last row
-
- long c;
- for (c = 0; c < nc; c++)
- {
#pragma omp parallel
- {
- long ind, k, j, i;
- float pix0;
- //run over all and then fix boundaries
-#pragma omp for
- for (ind = 0; ind < nx * ny * (nz - 1); ind++)
- {
- pix0 = -inimage[ind];
-
- outimageX[ind] = pix0 + inimage[ind + 1];
- outimageY[ind] = pix0 + inimage[ind + nx];
- outimageZ[ind] = pix0 + inimage[ind + nx * ny];
- }
-
-#pragma omp for
- for (ind = 0; ind < nx * (ny - 1); ind++)
- {
- pix0 = -inimage[ind + offset1];
-
- outimageX[ind + offset1] = pix0 + inimage[ind + offset1 + 1];
- outimageY[ind + offset1] = pix0 + inimage[ind + offset1 + nx];
- }
-
-#pragma omp for
- for (ind = 0; ind < nx - 1; ind++)
- {
- pix0 = -inimage[ind + offset2];
-
- outimageX[ind + offset2] = pix0 + inimage[ind + offset2 + 1];
- }
-
- //boundaries
-#pragma omp for
- for (k = 0; k < nz; k++)
- {
- for (i = 0; i < nx; i++)
- {
- outimageY[(k * ny * nx) + (ny - 1) * nx + i] = 0;
- }
- }
-
-#pragma omp for
- for (k = 0; k < nz; k++)
- {
- for (j = 0; j < ny; j++)
- {
- outimageX[k * ny * nx + j * nx + nx - 1] = 0;
- }
- }
-
- if (nz > 1)
- {
-#pragma omp for
- for (ind = 0; ind < ny * nx; ind++)
- {
- outimageZ[nx * ny * (nz - 1) + ind] = 0;
- }
- }
- }
-
- inimage += volume;
- outimageX += volume;
- outimageY += volume;
- outimageZ += volume;
- }
-
-
- //now the rest of the channels
- if (nc > 1)
{
- long ind;
-
- for (c = 0; c < nc - 1; c++)
- {
- float * outimageC = outimageCfull + c * volume;
- const float * inimage = inimagefull + c * volume;
-
-#pragma omp parallel for
- for (ind = 0; ind < volume; ind++)
- {
- outimageC[ind] = -inimage[ind] + inimage[ind + volume];
- }
- }
-
-#pragma omp parallel for
- for (ind = 0; ind < volume; ind++)
+ if (omp_get_thread_num() == 0)
{
- outimageCfull[(nc - 1) * volume + ind] = 0;
+ *nThreads_current = omp_get_num_threads();
}
}
-
- return 0;
+ omp_set_num_threads(nThreads_requested);
}
-int fdiff_direct_neumann_L21(const float *inimagefull, float *outimageL21normfull, float *outimageXfull, float *outimageYfull, float *outimageZfull, float *outimageCfull, long nx, long ny, long nz, long nc)
+
+int fdiff_direct_neumann(const float *inimagefull, float *outimageXfull, float *outimageYfull, float *outimageZfull, float *outimageCfull, long nx, long ny, long nz, long nc)
{
size_t volume = nx * ny * nz;
- int z_dim = nz - 1;
-
- const float * inimage = inimagefull;
- float * outimageX = outimageXfull;
- float * outimageY = outimageYfull;
- float * outimageZ = outimageZfull;
- float * outimageL21norm = outimageL21normfull;
+ const float *inimage = inimagefull;
+ float *outimageX = outimageXfull;
+ float *outimageY = outimageYfull;
+ float *outimageZ = outimageZfull;
- int offset1 = (nz - 1) * nx * ny; //ind to beginning of last slice
+ int offset1 = (nz - 1) * nx * ny; //ind to beginning of last slice
int offset2 = offset1 + (ny - 1) * nx; //ind to beginning of last row
long c;
+
+ int z_dim = nz > 1 ? 1: 0;
+
for (c = 0; c < nc; c++)
{
#pragma omp parallel
{
- long ind, k;
+ long ind, k, j, i;
float pix0;
//run over all and then fix boundaries
-#pragma omp for
+
+#pragma omp for nowait
for (ind = 0; ind < nx * ny * (nz - 1); ind++)
{
pix0 = -inimage[ind];
-
outimageX[ind] = pix0 + inimage[ind + 1];
outimageY[ind] = pix0 + inimage[ind + nx];
outimageZ[ind] = pix0 + inimage[ind + nx * ny];
}
-#pragma omp for
+#pragma omp for nowait
for (ind = 0; ind < nx * (ny - 1); ind++)
{
pix0 = -inimage[ind + offset1];
@@ -169,29 +74,26 @@ int fdiff_direct_neumann_L21(const float *inimagefull, float *outimageL21normful
for (ind = 0; ind < nx - 1; ind++)
{
pix0 = -inimage[ind + offset2];
-
outimageX[ind + offset2] = pix0 + inimage[ind + offset2 + 1];
}
//boundaries
-#pragma omp for
+#pragma omp for nowait
for (k = 0; k < nz; k++)
{
- for (int i = 0; i < nx; i++)
+ for (i = 0; i < nx; i++)
{
outimageY[(k * ny * nx) + (ny - 1) * nx + i] = 0;
}
}
-
-#pragma omp for
+#pragma omp for nowait
for (k = 0; k < nz; k++)
{
- for (int j = 0; j < ny; j++)
+ for (j = 0; j < ny; j++)
{
outimageX[k * ny * nx + j * nx + nx - 1] = 0;
}
}
-
if (z_dim)
{
#pragma omp for
@@ -199,34 +101,15 @@ int fdiff_direct_neumann_L21(const float *inimagefull, float *outimageL21normful
{
outimageZ[nx * ny * (nz - 1) + ind] = 0;
}
-
-#pragma omp for
- for (ind = 0; ind < volume; ind++)
- {
- outimageL21norm[ind] = outimageX[ind] * outimageX[ind] + outimageY[ind] * outimageY[ind] + outimageZ[ind] * outimageZ[ind];
- }
-
- }
- else
- {
-
-#pragma omp for
- for (ind = 0; ind < volume; ind++)
- {
- outimageL21norm[ind] = outimageX[ind] * outimageX[ind] + outimageY[ind] * outimageY[ind];
- }
}
}
-
inimage += volume;
outimageX += volume;
outimageY += volume;
outimageZ += volume;
- outimageL21norm += volume;
}
-
//now the rest of the channels
if (nc > 1)
{
@@ -234,18 +117,13 @@ int fdiff_direct_neumann_L21(const float *inimagefull, float *outimageL21normful
for (c = 0; c < nc - 1; c++)
{
- float * outimageC = outimageCfull + c * volume;
- float * outimageL21norm = outimageL21normfull + c * volume;
- const float * inimage = inimagefull + c * volume;
+ float *outimageC = outimageCfull + c * volume;
+ const float *inimage = inimagefull + c * volume;
#pragma omp parallel for
for (ind = 0; ind < volume; ind++)
{
outimageC[ind] = -inimage[ind] + inimage[ind + volume];
- outimageL21norm[ind] += outimageC[ind] * outimageC[ind];
-
- //sqrt
- outimageL21norm[ind] = (float)sqrt(outimageL21norm[ind]);
}
}
@@ -262,15 +140,14 @@ int fdiff_direct_periodic(const float *inimagefull, float *outimageXfull, float
{
size_t volume = nx * ny * nz;
- const float * inimage = inimagefull;
- float * outimageX = outimageXfull;
- float * outimageY = outimageYfull;
- float * outimageZ = outimageZfull;
+ const float *inimage = inimagefull;
+ float *outimageX = outimageXfull;
+ float *outimageY = outimageYfull;
+ float *outimageZ = outimageZfull;
- int offset1 = (nz - 1) * nx * ny; //ind to beginning of last slice
+ int offset1 = (nz - 1) * nx * ny; //ind to beginning of last slice
int offset2 = offset1 + (ny - 1) * nx; //ind to beginning of last row
-
long c;
for (c = 0; c < nc; c++)
{
@@ -280,7 +157,7 @@ int fdiff_direct_periodic(const float *inimagefull, float *outimageXfull, float
long ind, k;
float pix0;
//run over all and then fix boundaries
-#pragma omp for
+#pragma omp for nowait
for (ind = 0; ind < nx * ny * (nz - 1); ind++)
{
pix0 = -inimage[ind];
@@ -290,7 +167,7 @@ int fdiff_direct_periodic(const float *inimagefull, float *outimageXfull, float
outimageZ[ind] = pix0 + inimage[ind + nx * ny];
}
-#pragma omp for
+#pragma omp for nowait
for (ind = 0; ind < nx * (ny - 1); ind++)
{
pix0 = -inimage[ind + offset1];
@@ -308,7 +185,7 @@ int fdiff_direct_periodic(const float *inimagefull, float *outimageXfull, float
}
//boundaries
-#pragma omp for
+#pragma omp for nowait
for (k = 0; k < nz; k++)
{
for (int i = 0; i < nx; i++)
@@ -320,7 +197,7 @@ int fdiff_direct_periodic(const float *inimagefull, float *outimageXfull, float
}
}
-#pragma omp for
+#pragma omp for nowait
for (k = 0; k < nz; k++)
{
for (int j = 0; j < ny; j++)
@@ -332,10 +209,9 @@ int fdiff_direct_periodic(const float *inimagefull, float *outimageXfull, float
}
}
-
if (nz > 1)
{
-#pragma omp for
+#pragma omp for nowait
for (ind = 0; ind < ny * nx; ind++)
{
outimageZ[nx * ny * (nz - 1) + ind] = -inimage[nx * ny * (nz - 1) + ind] + inimage[ind];
@@ -356,8 +232,8 @@ int fdiff_direct_periodic(const float *inimagefull, float *outimageXfull, float
for (c = 0; c < nc - 1; c++)
{
- float * outimageC = outimageCfull + c * volume;
- const float * inimage = inimagefull + c * volume;
+ float *outimageC = outimageCfull + c * volume;
+ const float *inimage = inimagefull + c * volume;
#pragma omp parallel for
for (ind = 0; ind < volume; ind++)
@@ -383,18 +259,18 @@ int fdiff_adjoint_neumann(float *outimagefull, const float *inimageXfull, const
//assumes nx and ny > 1
int z_dim = nz - 1;
- float * outimage = outimagefull;
- const float * inimageX = inimageXfull;
- const float * inimageY = inimageYfull;
- const float * inimageZ = inimageZfull;
+ float *outimage = outimagefull;
+ const float *inimageX = inimageXfull;
+ const float *inimageY = inimageYfull;
+ const float *inimageZ = inimageZfull;
- float * tempX = (float*)malloc(volume * sizeof(float));
- float * tempY = (float*)malloc(volume * sizeof(float));
- float * tempZ;
-
- if(z_dim)
+ float *tempX = (float *)malloc(volume * sizeof(float));
+ float *tempY = (float *)malloc(volume * sizeof(float));
+ float *tempZ;
+
+ if (z_dim)
{
- tempZ = (float*)malloc(volume * sizeof(float));
+ tempZ = (float *)malloc(volume * sizeof(float));
}
long c;
@@ -413,7 +289,6 @@ int fdiff_adjoint_neumann(float *outimagefull, const float *inimageXfull, const
for (ind = nx; ind < nx * ny * nz; ind++)
{
tempY[ind] = -inimageY[ind] + inimageY[ind - nx];
-
}
//boundaries
@@ -442,7 +317,7 @@ int fdiff_adjoint_neumann(float *outimagefull, const float *inimageXfull, const
for (ind = nx * ny; ind < nx * ny * nz; ind++)
{
tempZ[ind] = -inimageZ[ind] + inimageZ[ind - nx * ny];
- }
+ }
#pragma omp for
for (ind = 0; ind < ny * nx; ind++)
{
@@ -454,7 +329,6 @@ int fdiff_adjoint_neumann(float *outimagefull, const float *inimageXfull, const
{
outimage[ind] = tempX[ind] + tempY[ind] + tempZ[ind];
}
-
}
else
{
@@ -464,7 +338,6 @@ int fdiff_adjoint_neumann(float *outimagefull, const float *inimageXfull, const
outimage[ind] = tempX[ind] + tempY[ind];
}
}
-
}
outimage += volume;
@@ -475,10 +348,9 @@ int fdiff_adjoint_neumann(float *outimagefull, const float *inimageXfull, const
free(tempX);
free(tempY);
- if(z_dim)
+ if (z_dim)
free(tempZ);
-
// //now the rest of the channels
if (nc > 1)
{
@@ -500,7 +372,6 @@ int fdiff_adjoint_neumann(float *outimagefull, const float *inimageXfull, const
outimagefull[ind] += -inimageCfull[ind];
outimagefull[(nc - 1) * volume + ind] += inimageCfull[(nc - 2) * volume + ind];
}
-
}
return 0;
@@ -513,18 +384,18 @@ int fdiff_adjoint_periodic(float *outimagefull, const float *inimageXfull, const
//assumes nx and ny > 1
int z_dim = nz - 1;
- float * outimage = outimagefull;
- const float * inimageX = inimageXfull;
- const float * inimageY = inimageYfull;
- const float * inimageZ = inimageZfull;
+ float *outimage = outimagefull;
+ const float *inimageX = inimageXfull;
+ const float *inimageY = inimageYfull;
+ const float *inimageZ = inimageZfull;
- float * tempX = (float*)malloc(volume * sizeof(float));
- float * tempY = (float*)malloc(volume * sizeof(float));
- float * tempZ;
+ float *tempX = (float *)malloc(volume * sizeof(float));
+ float *tempY = (float *)malloc(volume * sizeof(float));
+ float *tempZ;
if (z_dim)
{
- tempZ = (float*)malloc(volume * sizeof(float));
+ tempZ = (float *)malloc(volume * sizeof(float));
}
long c;
@@ -583,7 +454,6 @@ int fdiff_adjoint_periodic(float *outimagefull, const float *inimageXfull, const
{
outimage[ind] = tempX[ind] + tempY[ind] + tempZ[ind];
}
-
}
else
{
@@ -593,7 +463,6 @@ int fdiff_adjoint_periodic(float *outimagefull, const float *inimageXfull, const
outimage[ind] = tempX[ind] + tempY[ind];
}
}
-
}
outimage += volume;
@@ -604,7 +473,7 @@ int fdiff_adjoint_periodic(float *outimagefull, const float *inimageXfull, const
free(tempX);
free(tempY);
- if(z_dim)
+ if (z_dim)
free(tempZ);
//now the rest of the channels
@@ -632,9 +501,11 @@ int fdiff_adjoint_periodic(float *outimagefull, const float *inimageXfull, const
return 0;
}
-
-DLL_EXPORT int fdiff4D(float *imagefull, float *gradCfull, float *gradZfull, float *gradYfull, float *gradXfull, long nc, long nz, long ny, long nx, int boundary, int direction)
+DLL_EXPORT int fdiff4D(float *imagefull, float *gradCfull, float *gradZfull, float *gradYfull, float *gradXfull, long nc, long nz, long ny, long nx, int boundary, int direction, int nThreads)
{
+ int nThreads_initial;
+ threads_setup(nThreads, &nThreads_initial);
+
if (boundary)
{
if (direction)
@@ -643,17 +514,21 @@ DLL_EXPORT int fdiff4D(float *imagefull, float *gradCfull, float *gradZfull, flo
fdiff_adjoint_periodic(imagefull, gradXfull, gradYfull, gradZfull, gradCfull, nx, ny, nz, nc);
}
else
- {
+ {
if (direction)
fdiff_direct_neumann(imagefull, gradXfull, gradYfull, gradZfull, gradCfull, nx, ny, nz, nc);
else
fdiff_adjoint_neumann(imagefull, gradXfull, gradYfull, gradZfull, gradCfull, nx, ny, nz, nc);
}
-
+
+ omp_set_num_threads(nThreads_initial);
return 0;
}
-DLL_EXPORT int fdiff3D(float *imagefull, float *gradZfull, float *gradYfull, float *gradXfull, long nz, long ny, long nx, int boundary, int direction)
+DLL_EXPORT int fdiff3D(float *imagefull, float *gradZfull, float *gradYfull, float *gradXfull, long nz, long ny, long nx, int boundary, int direction, int nThreads)
{
+ int nThreads_initial;
+ threads_setup(nThreads, &nThreads_initial);
+
if (boundary)
{
if (direction)
@@ -669,10 +544,14 @@ DLL_EXPORT int fdiff3D(float *imagefull, float *gradZfull, float *gradYfull, flo
fdiff_adjoint_neumann(imagefull, gradXfull, gradYfull, gradZfull, NULL, nx, ny, nz, 1);
}
+ omp_set_num_threads(nThreads_initial);
return 0;
}
-DLL_EXPORT int fdiff2D(float *imagefull, float *gradYfull, float *gradXfull, long ny, long nx, int boundary, int direction)
+DLL_EXPORT int fdiff2D(float *imagefull, float *gradYfull, float *gradXfull, long ny, long nx, int boundary, int direction, int nThreads)
{
+ int nThreads_initial;
+ threads_setup(nThreads, &nThreads_initial);
+
if (boundary)
{
if (direction)
@@ -688,5 +567,7 @@ DLL_EXPORT int fdiff2D(float *imagefull, float *gradYfull, float *gradXfull, lon
fdiff_adjoint_neumann(imagefull, gradXfull, gradYfull, NULL, NULL, nx, ny, 1, 1);
}
+ omp_set_num_threads(nThreads_initial);
return 0;
}
+