summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorDaniil Kazantsev <dkazanc@hotmail.com>2019-09-26 22:02:42 +0100
committerDaniil Kazantsev <dkazanc@hotmail.com>2019-09-26 22:02:42 +0100
commit304db3ba12a12870f0d1d7cf94bc7d9aedca95c4 (patch)
treef6751d24c3c99d091fdb3d9a4bdba8c2af07ae5e /src
parent98e14b257b2e9f8d89e34a5ed01c17a1ab0d8652 (diff)
parent4fb89fcc4719f24b38a777ed6c7204f5761770ad (diff)
downloadregularization-304db3ba12a12870f0d1d7cf94bc7d9aedca95c4.tar.gz
regularization-304db3ba12a12870f0d1d7cf94bc7d9aedca95c4.tar.bz2
regularization-304db3ba12a12870f0d1d7cf94bc7d9aedca95c4.tar.xz
regularization-304db3ba12a12870f0d1d7cf94bc7d9aedca95c4.zip
Merge branch 'master' into fgp_cpu_resort_for
Diffstat (limited to 'src')
-rw-r--r--src/Core/regularisers_CPU/Nonlocal_TV_core.c73
-rw-r--r--src/Core/regularisers_CPU/Nonlocal_TV_core.h18
-rw-r--r--src/Core/regularisers_CPU/PatchSelect_core.c164
-rw-r--r--src/Core/regularisers_CPU/PatchSelect_core.h7
-rw-r--r--src/Core/regularisers_GPU/PatchSelect_GPU_core.cu368
-rw-r--r--src/Matlab/mex_compile/regularisers_CPU/Nonlocal_TV.c30
-rw-r--r--src/Matlab/mex_compile/regularisers_CPU/PatchSelect.c24
-rw-r--r--src/Python/src/cpu_regularisers.pyx8
8 files changed, 329 insertions, 363 deletions
diff --git a/src/Core/regularisers_CPU/Nonlocal_TV_core.c b/src/Core/regularisers_CPU/Nonlocal_TV_core.c
index c4c9118..de1c173 100644
--- a/src/Core/regularisers_CPU/Nonlocal_TV_core.c
+++ b/src/Core/regularisers_CPU/Nonlocal_TV_core.c
@@ -1,11 +1,11 @@
/*
* This work is part of the Core Imaging Library developed by
* Visual Analytics and Imaging System Group of the Science Technology
- * Facilities Council, STFC and Diamond Light Source Ltd.
+ * Facilities Council, STFC and Diamond Light Source Ltd.
*
* Copyright 2017 Daniil Kazantsev
* Copyright 2017 Srikanth Nagella, Edoardo Pasca
- * Copyright 2018 Diamond Light Source Ltd.
+ * Copyright 2018 Diamond Light Source Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -31,50 +31,55 @@
* 2. AR_i - indeces of i neighbours
* 3. AR_j - indeces of j neighbours
* 4. AR_k - indeces of k neighbours (0 - for 2D case)
- * 5. Weights_ij(k) - associated weights
+ * 5. Weights_ij(k) - associated weights
* 6. regularisation parameter
- * 7. iterations number
-
+ * 7. iterations number
+
* Output:
- * 1. denoised image/volume
+ * 1. denoised image/volume
* Elmoataz, Abderrahim, Olivier Lezoray, and Sébastien Bougleux. "Nonlocal discrete regularization on weighted graphs: a framework for image and manifold processing." IEEE Trans. Image Processing 17, no. 7 (2008): 1047-1060.
-
+
*/
/*****************************************************************************/
-float Nonlocal_TV_CPU_main(float *A_orig, float *Output, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, int dimX, int dimY, int dimZ, int NumNeighb, float lambdaReg, int IterNumb)
+float Nonlocal_TV_CPU_main(float *A_orig, float *Output, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, int dimX, int dimY, int dimZ, int NumNeighb, float lambdaReg, int IterNumb, int switchM)
{
long i, j, k;
int iter;
lambdaReg = 1.0f/lambdaReg;
-
+
/*****2D INPUT *****/
if (dimZ == 0) {
copyIm(A_orig, Output, (long)(dimX), (long)(dimY), 1l);
/* for each pixel store indeces of the most similar neighbours (patches) */
- for(iter=0; iter<IterNumb; iter++) {
+ for(iter=0; iter<IterNumb; iter++) {
#pragma omp parallel for shared (A_orig, Output, Weights, H_i, H_j, iter) private(i,j)
+ for(j=0; j<(long)(dimY); j++) {
for(i=0; i<(long)(dimX); i++) {
- for(j=0; j<(long)(dimY); j++) {
/*NLM_H1_2D(Output, A_orig, H_i, H_j, Weights, i, j, (long)(dimX), (long)(dimY), NumNeighb, lambdaReg);*/ /* NLM - H1 penalty */
- NLM_TV_2D(Output, A_orig, H_i, H_j, Weights, i, j, (long)(dimX), (long)(dimY), NumNeighb, lambdaReg); /* NLM - TV penalty */
+ if (switchM == 1) {
+ NLM_TV_2D(Output, A_orig, H_j, H_i, Weights, i, j, (long)(dimX), (long)(dimY), NumNeighb, lambdaReg); /* NLM - TV penalty */
+ }
+ else {
+ NLM_TV_2D(Output, A_orig, H_i, H_j, Weights, i, j, (long)(dimX), (long)(dimY), NumNeighb, lambdaReg); /* NLM - TV penalty */
+ }
}}
}
- }
+ }
else {
/*****3D INPUT *****/
copyIm(A_orig, Output, (long)(dimX), (long)(dimY), (long)(dimZ));
/* for each pixel store indeces of the most similar neighbours (patches) */
- for(iter=0; iter<IterNumb; iter++) {
+ for(iter=0; iter<IterNumb; iter++) {
#pragma omp parallel for shared (A_orig, Output, Weights, H_i, H_j, H_k, iter) private(i,j,k)
for(i=0; i<(long)(dimX); i++) {
- for(j=0; j<(long)(dimY); j++) {
+ for(j=0; j<(long)(dimY); j++) {
for(k=0; k<(long)(dimZ); k++) {
/* NLM_H1_3D(Output, A_orig, H_i, H_j, H_k, Weights, i, j, k, dimX, dimY, dimZ, NumNeighb, lambdaReg); */ /* NLM - H1 penalty */
- NLM_TV_3D(Output, A_orig, H_i, H_j, H_k, Weights, i, j, k, (long)(dimX), (long)(dimY), (long)(dimZ), NumNeighb, lambdaReg); /* NLM - TV penalty */
- }}}
- }
+ NLM_TV_3D(Output, A_orig, H_i, H_j, H_k, Weights, i, j, k, (long)(dimX), (long)(dimY), (long)(dimZ), NumNeighb, lambdaReg); /* NLM - TV penalty */
+ }}}
+ }
}
return *Output;
}
@@ -82,9 +87,9 @@ float Nonlocal_TV_CPU_main(float *A_orig, float *Output, unsigned short *H_i, un
/***********<<<<Main Function for NLM - H1 penalty>>>>**********/
float NLM_H1_2D(float *A, float *A_orig, unsigned short *H_i, unsigned short *H_j, float *Weights, long i, long j, long dimX, long dimY, int NumNeighb, float lambdaReg)
{
- long x, i1, j1, index, index_m;
+ long x, i1, j1, index, index_m;
float value = 0.0f, normweight = 0.0f;
-
+
index_m = j*dimX+i;
for(x=0; x < NumNeighb; x++) {
index = (dimX*dimY*x) + j*dimX+i;
@@ -99,9 +104,9 @@ float NLM_H1_2D(float *A, float *A_orig, unsigned short *H_i, unsigned short *H_
/*3D version*/
float NLM_H1_3D(float *A, float *A_orig, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, long i, long j, long k, long dimX, long dimY, long dimZ, int NumNeighb, float lambdaReg)
{
- long x, i1, j1, k1, index;
+ long x, i1, j1, k1, index;
float value = 0.0f, normweight = 0.0f;
-
+
for(x=0; x < NumNeighb; x++) {
index = dimX*dimY*dimZ*x + (dimX*dimY*k) + j*dimX+i;
i1 = H_i[index];
@@ -109,7 +114,7 @@ float NLM_H1_3D(float *A, float *A_orig, unsigned short *H_i, unsigned short *H_
k1 = H_k[index];
value += A[(dimX*dimY*k1) + j1*dimX+i1]*Weights[index];
normweight += Weights[index];
- }
+ }
A[(dimX*dimY*k) + j*dimX+i] = (lambdaReg*A_orig[(dimX*dimY*k) + j*dimX+i] + value)/(lambdaReg + normweight);
return *A;
}
@@ -118,37 +123,37 @@ float NLM_H1_3D(float *A, float *A_orig, unsigned short *H_i, unsigned short *H_
/***********<<<<Main Function for NLM - TV penalty>>>>**********/
float NLM_TV_2D(float *A, float *A_orig, unsigned short *H_i, unsigned short *H_j, float *Weights, long i, long j, long dimX, long dimY, int NumNeighb, float lambdaReg)
{
- long x, i1, j1, index, index_m;
+ long x, i1, j1, index, index_m;
float value = 0.0f, normweight = 0.0f, NLgrad_magn = 0.0f, NLCoeff;
-
+
index_m = j*dimX+i;
-
+
for(x=0; x < NumNeighb; x++) {
index = (dimX*dimY*x) + j*dimX+i; /*c*/
i1 = H_i[index];
j1 = H_j[index];
NLgrad_magn += powf((A[j1*dimX+i1] - A[index_m]),2)*Weights[index];
}
-
+
NLgrad_magn = sqrtf(NLgrad_magn); /*Non Local Gradients Magnitude */
NLCoeff = 2.0f*(1.0f/(NLgrad_magn + EPS));
-
+
for(x=0; x < NumNeighb; x++) {
index = (dimX*dimY*x) + j*dimX+i; /*c*/
i1 = H_i[index];
j1 = H_j[index];
value += A[j1*dimX+i1]*NLCoeff*Weights[index];
normweight += Weights[index]*NLCoeff;
- }
+ }
A[index_m] = (lambdaReg*A_orig[index_m] + value)/(lambdaReg + normweight);
return *A;
}
/*3D version*/
float NLM_TV_3D(float *A, float *A_orig, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, long i, long j, long k, long dimX, long dimY, long dimZ, int NumNeighb, float lambdaReg)
{
- long x, i1, j1, k1, index;
+ long x, i1, j1, k1, index;
float value = 0.0f, normweight = 0.0f, NLgrad_magn = 0.0f, NLCoeff;
-
+
for(x=0; x < NumNeighb; x++) {
index = dimX*dimY*dimZ*x + (dimX*dimY*k) + j*dimX+i;
i1 = H_i[index];
@@ -156,10 +161,10 @@ float NLM_TV_3D(float *A, float *A_orig, unsigned short *H_i, unsigned short *H_
k1 = H_k[index];
NLgrad_magn += powf((A[(dimX*dimY*k1) + j1*dimX+i1] - A[(dimX*dimY*k1) + j*dimX+i]),2)*Weights[index];
}
-
+
NLgrad_magn = sqrtf(NLgrad_magn); /*Non Local Gradients Magnitude */
NLCoeff = 2.0f*(1.0f/(NLgrad_magn + EPS));
-
+
for(x=0; x < NumNeighb; x++) {
index = dimX*dimY*dimZ*x + (dimX*dimY*k) + j*dimX+i;
i1 = H_i[index];
@@ -167,7 +172,7 @@ float NLM_TV_3D(float *A, float *A_orig, unsigned short *H_i, unsigned short *H_
k1 = H_k[index];
value += A[(dimX*dimY*k1) + j1*dimX+i1]*NLCoeff*Weights[index];
normweight += Weights[index]*NLCoeff;
- }
+ }
A[(dimX*dimY*k) + j*dimX+i] = (lambdaReg*A_orig[(dimX*dimY*k) + j*dimX+i] + value)/(lambdaReg + normweight);
return *A;
}
diff --git a/src/Core/regularisers_CPU/Nonlocal_TV_core.h b/src/Core/regularisers_CPU/Nonlocal_TV_core.h
index 6d55101..18f6724 100644
--- a/src/Core/regularisers_CPU/Nonlocal_TV_core.h
+++ b/src/Core/regularisers_CPU/Nonlocal_TV_core.h
@@ -1,11 +1,11 @@
/*
* This work is part of the Core Imaging Library developed by
* Visual Analytics and Imaging System Group of the Science Technology
- * Facilities Council, STFC and Diamond Light Source Ltd.
+ * Facilities Council, STFC and Diamond Light Source Ltd.
*
* Copyright 2017 Daniil Kazantsev
* Copyright 2017 Srikanth Nagella, Edoardo Pasca
- * Copyright 2018 Diamond Light Source Ltd.
+ * Copyright 2018 Diamond Light Source Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -39,19 +39,19 @@
* 2. AR_i - indeces of i neighbours
* 3. AR_j - indeces of j neighbours
* 4. AR_k - indeces of k neighbours (0 - for 2D case)
- * 5. Weights_ij(k) - associated weights
+ * 5. Weights_ij(k) - associated weights
* 6. regularisation parameter
- * 7. iterations number
-
+ * 7. iterations number
+
* Output:
- * 1. denoised image/volume
- * Elmoataz, Abderrahim, Olivier Lezoray, and Sébastien Bougleux. "Nonlocal discrete regularization on weighted graphs: a framework for image and manifold processing." IEEE Trans. Image Processing 17, no. 7 (2008): 1047-1060.
+ * 1. denoised image/volume
+ * Elmoataz, Abderrahim, Olivier Lezoray, and Sébastien Bougleux. "Nonlocal discrete regularization on weighted graphs: a framework for image and manifold processing." IEEE Trans. Image Processing 17, no. 7 (2008): 1047-1060.
*/
-
+
#ifdef __cplusplus
extern "C" {
#endif
-CCPI_EXPORT float Nonlocal_TV_CPU_main(float *A_orig, float *Output, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, int dimX, int dimY, int dimZ, int NumNeighb, float lambdaReg, int IterNumb);
+CCPI_EXPORT float Nonlocal_TV_CPU_main(float *A_orig, float *Output, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, int dimX, int dimY, int dimZ, int NumNeighb, float lambdaReg, int IterNumb, int switchM);
CCPI_EXPORT float NLM_H1_2D(float *A, float *A_orig, unsigned short *H_i, unsigned short *H_j, float *Weights, long i, long j, long dimX, long dimY, int NumNeighb, float lambdaReg);
CCPI_EXPORT float NLM_TV_2D(float *A, float *A_orig, unsigned short *H_i, unsigned short *H_j, float *Weights, long i, long j, long dimX, long dimY, int NumNeighb, float lambdaReg);
CCPI_EXPORT float NLM_H1_3D(float *A, float *A_orig, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, long i, long j, long k, long dimX, long dimY, long dimZ, int NumNeighb, float lambdaReg);
diff --git a/src/Core/regularisers_CPU/PatchSelect_core.c b/src/Core/regularisers_CPU/PatchSelect_core.c
index cf5cdc7..8581797 100644
--- a/src/Core/regularisers_CPU/PatchSelect_core.c
+++ b/src/Core/regularisers_CPU/PatchSelect_core.c
@@ -1,11 +1,11 @@
/*
* This work is part of the Core Imaging Library developed by
* Visual Analytics and Imaging System Group of the Science Technology
- * Facilities Council, STFC and Diamond Light Source Ltd.
+ * Facilities Council, STFC and Diamond Light Source Ltd.
*
* Copyright 2017 Daniil Kazantsev
* Copyright 2017 Srikanth Nagella, Edoardo Pasca
- * Copyright 2018 Diamond Light Source Ltd.
+ * Copyright 2018 Diamond Light Source Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -44,27 +44,27 @@
* 4. Weights_ijk - associated weights
*/
-void swap(float *xp, float *yp)
-{
- float temp = *xp;
- *xp = *yp;
- *yp = temp;
-}
+void swap(float *xp, float *yp)
+{
+ float temp = *xp;
+ *xp = *yp;
+ *yp = temp;
+}
-void swapUS(unsigned short *xp, unsigned short *yp)
-{
- unsigned short temp = *xp;
- *xp = *yp;
- *yp = temp;
-}
+void swapUS(unsigned short *xp, unsigned short *yp)
+{
+ unsigned short temp = *xp;
+ *xp = *yp;
+ *yp = temp;
+}
/**************************************************/
-float PatchSelect_CPU_main(float *A, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, int dimX, int dimY, int dimZ, int SearchWindow, int SimilarWin, int NumNeighb, float h, int switchM)
+float PatchSelect_CPU_main(float *A, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, int dimX, int dimY, int dimZ, int SearchWindow, int SimilarWin, int NumNeighb, float h)
{
int counterG;
long i, j, k;
float *Eucl_Vec, h2;
- h2 = h*h;
+ h2 = h*h;
/****************2D INPUT ***************/
if (dimZ == 0) {
/* generate a 2D Gaussian kernel for NLM procedure */
@@ -76,23 +76,14 @@ float PatchSelect_CPU_main(float *A, unsigned short *H_i, unsigned short *H_j, u
counterG++;
}} /*main neighb loop */
/* for each pixel store indeces of the most similar neighbours (patches) */
- if (switchM == 1) {
-#pragma omp parallel for shared (A, Weights, H_i, H_j) private(i,j)
- for(i=0; i<(long)(dimX); i++) {
- for(j=0; j<(long)(dimY); j++) {
- Indeces2D_p(A, H_i, H_j, Weights, i, j, (long)(dimX), (long)(dimY), Eucl_Vec, NumNeighb, SearchWindow, SimilarWin, h2);
- }}
- }
- else {
#pragma omp parallel for shared (A, Weights, H_i, H_j) private(i,j)
for(i=0; i<(long)(dimX); i++) {
for(j=0; j<(long)(dimY); j++) {
Indeces2D(A, H_i, H_j, Weights, i, j, (long)(dimX), (long)(dimY), Eucl_Vec, NumNeighb, SearchWindow, SimilarWin, h2);
}}
- }
}
else {
- /****************3D INPUT ***************/
+ /****************3D INPUT ***************/
/* generate a 3D Gaussian kernel for NLM procedure */
Eucl_Vec = (float*) calloc ((2*SimilarWin+1)*(2*SimilarWin+1)*(2*SimilarWin+1),sizeof(float));
counterG = 0;
@@ -101,25 +92,15 @@ float PatchSelect_CPU_main(float *A, unsigned short *H_i, unsigned short *H_j, u
for(k=-SimilarWin; k<=SimilarWin; k++) {
Eucl_Vec[counterG] = (float)exp(-(pow(((float) i), 2) + pow(((float) j), 2) + pow(((float) k), 2))/(2*SimilarWin*SimilarWin*SimilarWin));
counterG++;
- }}} /*main neighb loop */
-
+ }}} /*main neighb loop */
+
/* for each voxel store indeces of the most similar neighbours (patches) */
- if (switchM == 1) {
#pragma omp parallel for shared (A, Weights, H_i, H_j, H_k) private(i,j,k)
for(i=0; i<dimX; i++) {
for(j=0; j<dimY; j++) {
for(k=0; k<dimZ; k++) {
- Indeces3D(A, H_i, H_j, H_k, Weights, j, i, (k), (dimX), (dimY), (dimZ), Eucl_Vec, NumNeighb, SearchWindow, SimilarWin, h2);
+ Indeces3D(A, H_i, H_j, H_k, Weights, i, j, k, (long)(dimX), (long)(dimY), (long)(dimZ), Eucl_Vec, NumNeighb, SearchWindow, SimilarWin, h2);
}}}
- }
- else {
-#pragma omp parallel for shared (A, Weights, H_i, H_j, H_k) private(i,j,k)
- for(i=0; i<dimX; i++) {
- for(j=0; j<dimY; j++) {
- for(k=0; k<dimZ; k++) {
- Indeces3D(A, H_i, H_j, H_k, Weights, (i), (j), (k), (dimX), (dimY), (dimZ), Eucl_Vec, NumNeighb, SearchWindow, SimilarWin, h2);
- }}}
- }
}
free(Eucl_Vec);
return 1;
@@ -130,78 +111,13 @@ float Indeces2D(float *Aorig, unsigned short *H_i, unsigned short *H_j, float *W
long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, index, sizeWin_tot, counterG;
float *Weight_Vec, normsum;
unsigned short *ind_i, *ind_j;
-
- sizeWin_tot = (2*SearchWindow + 1)*(2*SearchWindow + 1);
-
- Weight_Vec = (float*) calloc(sizeWin_tot, sizeof(float));
- ind_i = (unsigned short*) calloc(sizeWin_tot, sizeof(unsigned short));
- ind_j = (unsigned short*) calloc(sizeWin_tot, sizeof(unsigned short));
-
- counter = 0;
- for(i_m=-SearchWindow; i_m<=SearchWindow; i_m++) {
- for(j_m=-SearchWindow; j_m<=SearchWindow; j_m++) {
- i1 = i+i_m;
- j1 = j+j_m;
- if (((i1 >= 0) && (i1 < dimX)) && ((j1 >= 0) && (j1 < dimY))) {
- normsum = 0.0f; counterG = 0;
- for(i_c=-SimilarWin; i_c<=SimilarWin; i_c++) {
- for(j_c=-SimilarWin; j_c<=SimilarWin; j_c++) {
- i2 = i1 + i_c;
- j2 = j1 + j_c;
- i3 = i + i_c;
- j3 = j + j_c;
- if (((i2 >= 0) && (i2 < dimX)) && ((j2 >= 0) && (j2 < dimY))) {
- if (((i3 >= 0) && (i3 < dimX)) && ((j3 >= 0) && (j3 < dimY))) {
- normsum += Eucl_Vec[counterG]*pow(Aorig[j3*dimX + (i3)] - Aorig[j2*dimX + (i2)], 2);
- counterG++;
- }}
-
- }}
- /* writing temporarily into vectors */
- if (normsum > EPS) {
- Weight_Vec[counter] = expf(-normsum/h2);
- ind_i[counter] = i1;
- ind_j[counter] = j1;
- counter++;
- }
- }
- }}
- /* do sorting to choose the most prominent weights [HIGH to LOW] */
- /* and re-arrange indeces accordingly */
- for (x = 0; x < counter-1; x++) {
- for (y = 0; y < counter-x-1; y++) {
- if (Weight_Vec[y] < Weight_Vec[y+1]) {
- swap(&Weight_Vec[y], &Weight_Vec[y+1]);
- swapUS(&ind_i[y], &ind_i[y+1]);
- swapUS(&ind_j[y], &ind_j[y+1]);
- }
- }
- }
- /*sorting loop finished*/
- /*now select the NumNeighb more prominent weights and store into pre-allocated arrays */
- for(x=0; x < NumNeighb; x++) {
- index = (dimX*dimY*x) + j*dimX+i;
- H_i[index] = ind_i[x];
- H_j[index] = ind_j[x];
- Weights[index] = Weight_Vec[x];
- }
- free(ind_i);
- free(ind_j);
- free(Weight_Vec);
- return 1;
-}
-float Indeces2D_p(float *Aorig, unsigned short *H_i, unsigned short *H_j, float *Weights, long i, long j, long dimX, long dimY, float *Eucl_Vec, int NumNeighb, int SearchWindow, int SimilarWin, float h2)
-{
- long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, index, sizeWin_tot, counterG;
- float *Weight_Vec, normsum;
- unsigned short *ind_i, *ind_j;
-
+
sizeWin_tot = (2*SearchWindow + 1)*(2*SearchWindow + 1);
-
+
Weight_Vec = (float*) calloc(sizeWin_tot, sizeof(float));
ind_i = (unsigned short*) calloc(sizeWin_tot, sizeof(unsigned short));
ind_j = (unsigned short*) calloc(sizeWin_tot, sizeof(unsigned short));
-
+
counter = 0;
for(i_m=-SearchWindow; i_m<=SearchWindow; i_m++) {
for(j_m=-SearchWindow; j_m<=SearchWindow; j_m++) {
@@ -217,11 +133,9 @@ float Indeces2D_p(float *Aorig, unsigned short *H_i, unsigned short *H_j, float
j3 = j + j_c;
if (((i2 >= 0) && (i2 < dimX)) && ((j2 >= 0) && (j2 < dimY))) {
if (((i3 >= 0) && (i3 < dimX)) && ((j3 >= 0) && (j3 < dimY))) {
- //normsum += Eucl_Vec[counterG]*pow(Aorig[j3*dimX + (i3)] - Aorig[j2*dimX + (i2)], 2);
- normsum += Eucl_Vec[counterG]*pow(Aorig[i3*dimY + (j3)] - Aorig[i2*dimY + (j2)], 2);
+ normsum += Eucl_Vec[counterG]*powf(Aorig[j3*dimX + (i3)] - Aorig[j2*dimX + (i2)], 2);
counterG++;
}}
-
}}
/* writing temporarily into vectors */
if (normsum > EPS) {
@@ -232,26 +146,25 @@ float Indeces2D_p(float *Aorig, unsigned short *H_i, unsigned short *H_j, float
}
}
}}
- /* do sorting to choose the most prominent weights [HIGH to LOW] */
+ /* do sorting to choose the most prominent weights [HIGH to LOW] */
/* and re-arrange indeces accordingly */
for (x = 0; x < counter-1; x++) {
for (y = 0; y < counter-x-1; y++) {
if (Weight_Vec[y] < Weight_Vec[y+1]) {
- swap(&Weight_Vec[y], &Weight_Vec[y+1]);
+ swap(&Weight_Vec[y], &Weight_Vec[y+1]);
swapUS(&ind_i[y], &ind_i[y+1]);
- swapUS(&ind_j[y], &ind_j[y+1]);
+ swapUS(&ind_j[y], &ind_j[y+1]);
}
}
}
- /*sorting loop finished*/
-
- /*now select the NumNeighb more prominent weights and store into pre-allocated arrays */
+ /*sorting loop finished*/
+ /*now select the NumNeighb more prominent weights and store into pre-allocated arrays */
for(x=0; x < NumNeighb; x++) {
- index = (dimX*dimY*x) + i*dimY+j;
+ index = (dimX*dimY*x) + j*dimX+i;
H_i[index] = ind_i[x];
H_j[index] = ind_j[x];
Weights[index] = Weight_Vec[x];
- }
+ }
free(ind_i);
free(ind_j);
free(Weight_Vec);
@@ -263,14 +176,14 @@ float Indeces3D(float *Aorig, unsigned short *H_i, unsigned short *H_j, unsigned
long i1, j1, k1, i_m, j_m, k_m, i_c, j_c, k_c, i2, j2, k2, i3, j3, k3, counter, x, y, index, sizeWin_tot, counterG;
float *Weight_Vec, normsum, temp;
unsigned short *ind_i, *ind_j, *ind_k, temp_i, temp_j, temp_k;
-
+
sizeWin_tot = (2*SearchWindow + 1)*(2*SearchWindow + 1)*(2*SearchWindow + 1);
-
+
Weight_Vec = (float*) calloc(sizeWin_tot, sizeof(float));
ind_i = (unsigned short*) calloc(sizeWin_tot, sizeof(unsigned short));
ind_j = (unsigned short*) calloc(sizeWin_tot, sizeof(unsigned short));
ind_k = (unsigned short*) calloc(sizeWin_tot, sizeof(unsigned short));
-
+
counter = 0l;
for(i_m=-SearchWindow; i_m<=SearchWindow; i_m++) {
for(j_m=-SearchWindow; j_m<=SearchWindow; j_m++) {
@@ -324,22 +237,21 @@ float Indeces3D(float *Aorig, unsigned short *H_i, unsigned short *H_j, unsigned
ind_k[y] = temp_k;
}}}
/*sorting loop finished*/
-
+
/*now select the NumNeighb more prominent weights and store into arrays */
for(x=0; x < NumNeighb; x++) {
index = dimX*dimY*dimZ*x + (dimX*dimY*k) + j*dimX+i;
-
+
H_i[index] = ind_i[x];
H_j[index] = ind_j[x];
H_k[index] = ind_k[x];
-
+
Weights[index] = Weight_Vec[x];
}
-
+
free(ind_i);
free(ind_j);
free(ind_k);
free(Weight_Vec);
return 1;
}
-
diff --git a/src/Core/regularisers_CPU/PatchSelect_core.h b/src/Core/regularisers_CPU/PatchSelect_core.h
index ddaa428..ea18c26 100644
--- a/src/Core/regularisers_CPU/PatchSelect_core.h
+++ b/src/Core/regularisers_CPU/PatchSelect_core.h
@@ -1,11 +1,11 @@
/*
* This work is part of the Core Imaging Library developed by
* Visual Analytics and Imaging System Group of the Science Technology
- * Facilities Council, STFC and Diamond Light Source Ltd.
+ * Facilities Council, STFC and Diamond Light Source Ltd.
*
* Copyright 2017 Daniil Kazantsev
* Copyright 2017 Srikanth Nagella, Edoardo Pasca
- * Copyright 2018 Diamond Light Source Ltd.
+ * Copyright 2018 Diamond Light Source Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -54,9 +54,8 @@
#ifdef __cplusplus
extern "C" {
#endif
-CCPI_EXPORT float PatchSelect_CPU_main(float *A, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, int dimX, int dimY, int dimZ, int SearchWindow, int SimilarWin, int NumNeighb, float h, int switchM);
+CCPI_EXPORT float PatchSelect_CPU_main(float *A, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, int dimX, int dimY, int dimZ, int SearchWindow, int SimilarWin, int NumNeighb, float h);
CCPI_EXPORT float Indeces2D(float *Aorig, unsigned short *H_i, unsigned short *H_j, float *Weights, long i, long j, long dimX, long dimY, float *Eucl_Vec, int NumNeighb, int SearchWindow, int SimilarWin, float h2);
-CCPI_EXPORT float Indeces2D_p(float *Aorig, unsigned short *H_i, unsigned short *H_j, float *Weights, long i, long j, long dimX, long dimY, float *Eucl_Vec, int NumNeighb, int SearchWindow, int SimilarWin, float h2);
CCPI_EXPORT float Indeces3D(float *Aorig, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, long i, long j, long k, long dimY, long dimX, long dimZ, float *Eucl_Vec, int NumNeighb, int SearchWindow, int SimilarWin, float h2);
#ifdef __cplusplus
}
diff --git a/src/Core/regularisers_GPU/PatchSelect_GPU_core.cu b/src/Core/regularisers_GPU/PatchSelect_GPU_core.cu
index 98c8488..fb6fa95 100644
--- a/src/Core/regularisers_GPU/PatchSelect_GPU_core.cu
+++ b/src/Core/regularisers_GPU/PatchSelect_GPU_core.cu
@@ -1,11 +1,11 @@
/*
* This work is part of the Core Imaging Library developed by
* Visual Analytics and Imaging System Group of the Science Technology
- * Facilities Council, STFC and Diamond Light Source Ltd.
+ * Facilities Council, STFC and Diamond Light Source Ltd.
*
* Copyright 2017 Daniil Kazantsev
* Copyright 2017 Srikanth Nagella, Edoardo Pasca
- * Copyright 2018 Diamond Light Source Ltd.
+ * Copyright 2018 Diamond Light Source Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -40,8 +40,8 @@
*/
-#define BLKXSIZE 16
-#define BLKYSIZE 16
+#define BLKXSIZE 8
+#define BLKYSIZE 4
#define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) )
#define M_PI 3.14159265358979323846
#define EPS 1.0e-8
@@ -51,142 +51,162 @@
#define CONSTVECSIZE11 529
#define CONSTVECSIZE13 729
-__device__ void swap(float *xp, float *yp)
+__device__ void swap(float *xp, float *yp)
{
- float temp = *xp;
- *xp = *yp;
- *yp = temp;
+ float temp = *xp;
+ *xp = *yp;
+ *yp = temp;
}
-__device__ void swapUS(unsigned short *xp, unsigned short *yp)
-{
- unsigned short temp = *xp;
- *xp = *yp;
- *yp = temp;
+__device__ void swapUS(unsigned short *xp, unsigned short *yp)
+{
+ unsigned short temp = *xp;
+ *xp = *yp;
+ *yp = temp;
}
/********************************************************************************/
__global__ void IndexSelect2D_5_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2)
-{
+{
- long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2;
+ long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2, ind;
float normsum;
-
+
float Weight_Vec[CONSTVECSIZE5];
unsigned short ind_i[CONSTVECSIZE5];
unsigned short ind_j[CONSTVECSIZE5];
+ for(ind=0; ind<CONSTVECSIZE5; ind++) {
+ Weight_Vec[ind] = 0.0;
+ ind_i[ind] = 0;
+ ind_j[ind] = 0; }
+
int i = blockDim.x * blockIdx.x + threadIdx.x;
int j = blockDim.y * blockIdx.y + threadIdx.y;
-
- long index = i*M+j;
-
+
+ long index = i + N*j;
+
counter = 0;
for(i_m=-SearchWindow; i_m<=SearchWindow; i_m++) {
+ i1 = i+i_m;
+ if ((i1 >= 0) && (i1 < N)) {
for(j_m=-SearchWindow; j_m<=SearchWindow; j_m++) {
- i1 = i+i_m;
j1 = j+j_m;
- if (((i1 >= 0) && (i1 < N)) && ((j1 >= 0) && (j1 < M))) {
+ if ((j1 >= 0) && (j1 < M)) {
normsum = 0.0f; counterG = 0;
for(i_c=-SimilarWin; i_c<=SimilarWin; i_c++) {
+ i2 = i1 + i_c;
+ i3 = i + i_c;
+ if ((i2 >= 0) && (i2 < N) && (i3 >= 0) && (i3 < N)) {
for(j_c=-SimilarWin; j_c<=SimilarWin; j_c++) {
- i2 = i1 + i_c;
j2 = j1 + j_c;
- i3 = i + i_c;
j3 = j + j_c;
- if (((i2 >= 0) && (i2 < N)) && ((j2 >= 0) && (j2 < M))) {
- if (((i3 >= 0) && (i3 < N)) && ((j3 >= 0) && (j3 < M))) {
- normsum += Eucl_Vec_d[counterG]*powf(Ad[i3*M + j3] - Ad[i2*M + j2], 2);
+ if ((j2 >= 0) && (j2 < M) && (j3 >= 0) && (j3 < M)) {
+ normsum += Eucl_Vec_d[counterG]*powf(Ad[i3 + N*j3] - Ad[i2 + N*j2], 2);
counterG++;
- }}
- }}
+ } /*if j2 j3*/
+ }
+ } /*if i2 i3*/
+ }
/* writing temporarily into vectors */
if (normsum > EPS) {
- Weight_Vec[counter] = __expf(-normsum/h2);
+ Weight_Vec[counter] = expf(-normsum/h2);
ind_i[counter] = i1;
ind_j[counter] = j1;
counter++;
}
- }
- }}
-
+ } /*if j1*/
+ }
+ } /*if i1*/
+ }
+
/* do sorting to choose the most prominent weights [HIGH to LOW] */
/* and re-arrange indeces accordingly */
for (x = 0; x < counter-1; x++) {
for (y = 0; y < counter-x-1; y++) {
if (Weight_Vec[y] < Weight_Vec[y+1]) {
- swap(&Weight_Vec[y], &Weight_Vec[y+1]);
+ swap(&Weight_Vec[y], &Weight_Vec[y+1]);
swapUS(&ind_i[y], &ind_i[y+1]);
- swapUS(&ind_j[y], &ind_j[y+1]);
+ swapUS(&ind_j[y], &ind_j[y+1]);
}
}
- }
- /*sorting loop finished*/
- /*now select the NumNeighb more prominent weights and store into arrays */
+ }
+ /*sorting loop finished*/
+ /*now select the NumNeighb more prominent weights and store into arrays */
for(x=0; x < NumNeighb; x++) {
index2 = (N*M*x) + index;
H_i_d[index2] = ind_i[x];
H_j_d[index2] = ind_j[x];
Weights_d[index2] = Weight_Vec[x];
}
-}
+}
/********************************************************************************/
__global__ void IndexSelect2D_7_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2)
-{
+{
- long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2;
+ long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2, ind;
float normsum;
-
+
float Weight_Vec[CONSTVECSIZE7];
unsigned short ind_i[CONSTVECSIZE7];
unsigned short ind_j[CONSTVECSIZE7];
+ for(ind=0; ind<CONSTVECSIZE7; ind++) {
+ Weight_Vec[ind] = 0.0;
+ ind_i[ind] = 0;
+ ind_j[ind] = 0; }
+
int i = blockDim.x * blockIdx.x + threadIdx.x;
int j = blockDim.y * blockIdx.y + threadIdx.y;
-
- long index = i*M+j;
-
+
+ long index = i + N*j;
+
counter = 0;
for(i_m=-SearchWindow; i_m<=SearchWindow; i_m++) {
+ i1 = i+i_m;
+ if ((i1 >= 0) && (i1 < N)) {
for(j_m=-SearchWindow; j_m<=SearchWindow; j_m++) {
- i1 = i+i_m;
j1 = j+j_m;
- if (((i1 >= 0) && (i1 < N)) && ((j1 >= 0) && (j1 < M))) {
+ if ((j1 >= 0) && (j1 < M)) {
normsum = 0.0f; counterG = 0;
for(i_c=-SimilarWin; i_c<=SimilarWin; i_c++) {
+ i2 = i1 + i_c;
+ i3 = i + i_c;
+ if ((i2 >= 0) && (i2 < N) && (i3 >= 0) && (i3 < N)) {
for(j_c=-SimilarWin; j_c<=SimilarWin; j_c++) {
- i2 = i1 + i_c;
j2 = j1 + j_c;
- i3 = i + i_c;
j3 = j + j_c;
- if (((i2 >= 0) && (i2 < N)) && ((j2 >= 0) && (j2 < M))) {
- if (((i3 >= 0) && (i3 < N)) && ((j3 >= 0) && (j3 < M))) {
- normsum += Eucl_Vec_d[counterG]*powf(Ad[i3*M + j3] - Ad[i2*M + j2], 2);
+ if ((j2 >= 0) && (j2 < M) && (j3 >= 0) && (j3 < M)) {
+ normsum += Eucl_Vec_d[counterG]*powf(Ad[i3 + N*j3] - Ad[i2 + N*j2], 2);
counterG++;
- }}
- }}
+ } /*if j2 j3*/
+ }
+ } /*if i2 i3*/
+ }
/* writing temporarily into vectors */
if (normsum > EPS) {
- Weight_Vec[counter] = __expf(-normsum/h2);
+ Weight_Vec[counter] = expf(-normsum/h2);
ind_i[counter] = i1;
ind_j[counter] = j1;
counter++;
}
- }
- }}
-
+ } /*if j1*/
+ }
+ } /*if i1*/
+ }
+
/* do sorting to choose the most prominent weights [HIGH to LOW] */
/* and re-arrange indeces accordingly */
for (x = 0; x < counter-1; x++) {
for (y = 0; y < counter-x-1; y++) {
if (Weight_Vec[y] < Weight_Vec[y+1]) {
- swap(&Weight_Vec[y], &Weight_Vec[y+1]);
+ swap(&Weight_Vec[y], &Weight_Vec[y+1]);
swapUS(&ind_i[y], &ind_i[y+1]);
- swapUS(&ind_j[y], &ind_j[y+1]);
+ swapUS(&ind_j[y], &ind_j[y+1]);
}
}
- }
- /*sorting loop finished*/
- /*now select the NumNeighb more prominent weights and store into arrays */
+ }
+ /*sorting loop finished*/
+ /*now select the NumNeighb more prominent weights and store into arrays */
for(x=0; x < NumNeighb; x++) {
index2 = (N*M*x) + index;
H_i_d[index2] = ind_i[x];
@@ -195,39 +215,47 @@ __global__ void IndexSelect2D_7_kernel(float *Ad, unsigned short *H_i_d, unsigne
}
}
__global__ void IndexSelect2D_9_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2)
-{
+{
- long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2;
+ long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2, ind;
float normsum;
-
+
float Weight_Vec[CONSTVECSIZE9];
unsigned short ind_i[CONSTVECSIZE9];
unsigned short ind_j[CONSTVECSIZE9];
+ for(ind=0; ind<CONSTVECSIZE9; ind++) {
+ Weight_Vec[ind] = 0.0;
+ ind_i[ind] = 0;
+ ind_j[ind] = 0; }
+
int i = blockDim.x * blockIdx.x + threadIdx.x;
int j = blockDim.y * blockIdx.y + threadIdx.y;
-
- long index = i*M+j;
-
+
+ long index = i + N*j;
+
counter = 0;
for(i_m=-SearchWindow; i_m<=SearchWindow; i_m++) {
+ i1 = i+i_m;
+ if ((i1 >= 0) && (i1 < N)) {
for(j_m=-SearchWindow; j_m<=SearchWindow; j_m++) {
- i1 = i+i_m;
j1 = j+j_m;
- if (((i1 >= 0) && (i1 < N)) && ((j1 >= 0) && (j1 < M))) {
+ if ((j1 >= 0) && (j1 < M)) {
normsum = 0.0f; counterG = 0;
for(i_c=-SimilarWin; i_c<=SimilarWin; i_c++) {
+ i2 = i1 + i_c;
+ i3 = i + i_c;
+ if ((i2 >= 0) && (i2 < N) && (i3 >= 0) && (i3 < N)) {
for(j_c=-SimilarWin; j_c<=SimilarWin; j_c++) {
- i2 = i1 + i_c;
j2 = j1 + j_c;
- i3 = i + i_c;
j3 = j + j_c;
- if (((i2 >= 0) && (i2 < N)) && ((j2 >= 0) && (j2 < M))) {
- if (((i3 >= 0) && (i3 < N)) && ((j3 >= 0) && (j3 < M))) {
- normsum += Eucl_Vec_d[counterG]*powf(Ad[i3*M + j3] - Ad[i2*M + j2], 2);
+ if ((j2 >= 0) && (j2 < M) && (j3 >= 0) && (j3 < M)) {
+ normsum += Eucl_Vec_d[counterG]*powf(Ad[i3 + N*j3] - Ad[i2 + N*j2], 2);
counterG++;
- }}
- }}
+ } /*if j2 j3*/
+ }
+ } /*if i2 i3*/
+ }
/* writing temporarily into vectors */
if (normsum > EPS) {
Weight_Vec[counter] = expf(-normsum/h2);
@@ -235,159 +263,181 @@ __global__ void IndexSelect2D_9_kernel(float *Ad, unsigned short *H_i_d, unsigne
ind_j[counter] = j1;
counter++;
}
+ } /*if j1*/
}
- }}
-
+ } /*if i1*/
+ }
+
/* do sorting to choose the most prominent weights [HIGH to LOW] */
/* and re-arrange indeces accordingly */
for (x = 0; x < counter-1; x++) {
for (y = 0; y < counter-x-1; y++) {
if (Weight_Vec[y] < Weight_Vec[y+1]) {
- swap(&Weight_Vec[y], &Weight_Vec[y+1]);
+ swap(&Weight_Vec[y], &Weight_Vec[y+1]);
swapUS(&ind_i[y], &ind_i[y+1]);
- swapUS(&ind_j[y], &ind_j[y+1]);
+ swapUS(&ind_j[y], &ind_j[y+1]);
}
}
- }
- /*sorting loop finished*/
- /*now select the NumNeighb more prominent weights and store into arrays */
+ }
+ /*sorting loop finished*/
+ /*now select the NumNeighb more prominent weights and store into arrays */
for(x=0; x < NumNeighb; x++) {
index2 = (N*M*x) + index;
H_i_d[index2] = ind_i[x];
H_j_d[index2] = ind_j[x];
Weights_d[index2] = Weight_Vec[x];
- }
+ }
}
__global__ void IndexSelect2D_11_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2)
-{
+{
- long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2;
+ long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2, ind;
float normsum;
-
+
float Weight_Vec[CONSTVECSIZE11];
unsigned short ind_i[CONSTVECSIZE11];
unsigned short ind_j[CONSTVECSIZE11];
+ for(ind=0; ind<CONSTVECSIZE11; ind++) {
+ Weight_Vec[ind] = 0.0;
+ ind_i[ind] = 0;
+ ind_j[ind] = 0; }
+
int i = blockDim.x * blockIdx.x + threadIdx.x;
int j = blockDim.y * blockIdx.y + threadIdx.y;
-
- long index = i*M+j;
-
+
+ long index = i + N*j;
+
counter = 0;
for(i_m=-SearchWindow; i_m<=SearchWindow; i_m++) {
+ i1 = i+i_m;
+ if ((i1 >= 0) && (i1 < N)) {
for(j_m=-SearchWindow; j_m<=SearchWindow; j_m++) {
- i1 = i+i_m;
j1 = j+j_m;
- if (((i1 >= 0) && (i1 < N)) && ((j1 >= 0) && (j1 < M))) {
+ if ((j1 >= 0) && (j1 < M)) {
normsum = 0.0f; counterG = 0;
for(i_c=-SimilarWin; i_c<=SimilarWin; i_c++) {
+ i2 = i1 + i_c;
+ i3 = i + i_c;
+ if ((i2 >= 0) && (i2 < N) && (i3 >= 0) && (i3 < N)) {
for(j_c=-SimilarWin; j_c<=SimilarWin; j_c++) {
- i2 = i1 + i_c;
j2 = j1 + j_c;
- i3 = i + i_c;
j3 = j + j_c;
- if (((i2 >= 0) && (i2 < N)) && ((j2 >= 0) && (j2 < M))) {
- if (((i3 >= 0) && (i3 < N)) && ((j3 >= 0) && (j3 < M))) {
- normsum += Eucl_Vec_d[counterG]*powf(Ad[i3*M + j3] - Ad[i2*M + j2], 2);
+ if ((j2 >= 0) && (j2 < M) && (j3 >= 0) && (j3 < M)) {
+ normsum += Eucl_Vec_d[counterG]*powf(Ad[i3 + N*j3] - Ad[i2 + N*j2], 2);
counterG++;
- }}
- }}
+ } /*if j2 j3*/
+ }
+ } /*if i2 i3*/
+ }
/* writing temporarily into vectors */
if (normsum > EPS) {
- Weight_Vec[counter] = __expf(-normsum/h2);
+ Weight_Vec[counter] = expf(-normsum/h2);
ind_i[counter] = i1;
ind_j[counter] = j1;
counter++;
}
- }
- }}
-
+ } /*if j1*/
+ }
+ } /*if i1*/
+ }
+
/* do sorting to choose the most prominent weights [HIGH to LOW] */
/* and re-arrange indeces accordingly */
for (x = 0; x < counter-1; x++) {
for (y = 0; y < counter-x-1; y++) {
if (Weight_Vec[y] < Weight_Vec[y+1]) {
- swap(&Weight_Vec[y], &Weight_Vec[y+1]);
+ swap(&Weight_Vec[y], &Weight_Vec[y+1]);
swapUS(&ind_i[y], &ind_i[y+1]);
- swapUS(&ind_j[y], &ind_j[y+1]);
+ swapUS(&ind_j[y], &ind_j[y+1]);
}
}
- }
- /*sorting loop finished*/
- /*now select the NumNeighb more prominent weights and store into arrays */
+ }
+ /*sorting loop finished*/
+ /*now select the NumNeighb more prominent weights and store into arrays */
for(x=0; x < NumNeighb; x++) {
index2 = (N*M*x) + index;
H_i_d[index2] = ind_i[x];
H_j_d[index2] = ind_j[x];
Weights_d[index2] = Weight_Vec[x];
}
-}
+}
__global__ void IndexSelect2D_13_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2)
-{
+{
- long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2;
+ long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2, ind;
float normsum;
-
+
float Weight_Vec[CONSTVECSIZE13];
unsigned short ind_i[CONSTVECSIZE13];
unsigned short ind_j[CONSTVECSIZE13];
+ for(ind=0; ind<CONSTVECSIZE13; ind++) {
+ Weight_Vec[ind] = 0.0;
+ ind_i[ind] = 0;
+ ind_j[ind] = 0; }
+
int i = blockDim.x * blockIdx.x + threadIdx.x;
int j = blockDim.y * blockIdx.y + threadIdx.y;
-
- long index = i*M+j;
-
+
+ long index = i + N*j;
+
counter = 0;
for(i_m=-SearchWindow; i_m<=SearchWindow; i_m++) {
+ i1 = i+i_m;
+ if ((i1 >= 0) && (i1 < N)) {
for(j_m=-SearchWindow; j_m<=SearchWindow; j_m++) {
- i1 = i+i_m;
j1 = j+j_m;
- if (((i1 >= 0) && (i1 < N)) && ((j1 >= 0) && (j1 < M))) {
+ if ((j1 >= 0) && (j1 < M)) {
normsum = 0.0f; counterG = 0;
for(i_c=-SimilarWin; i_c<=SimilarWin; i_c++) {
+ i2 = i1 + i_c;
+ i3 = i + i_c;
+ if ((i2 >= 0) && (i2 < N) && (i3 >= 0) && (i3 < N)) {
for(j_c=-SimilarWin; j_c<=SimilarWin; j_c++) {
- i2 = i1 + i_c;
j2 = j1 + j_c;
- i3 = i + i_c;
j3 = j + j_c;
- if (((i2 >= 0) && (i2 < N)) && ((j2 >= 0) && (j2 < M))) {
- if (((i3 >= 0) && (i3 < N)) && ((j3 >= 0) && (j3 < M))) {
- normsum += Eucl_Vec_d[counterG]*powf(Ad[i3*M + j3] - Ad[i2*M + j2], 2);
+ if ((j2 >= 0) && (j2 < M) && (j3 >= 0) && (j3 < M)) {
+ normsum += Eucl_Vec_d[counterG]*powf(Ad[i3 + N*j3] - Ad[i2 + N*j2], 2);
counterG++;
- }}
- }}
+ } /*if j2 j3*/
+ }
+ } /*if i2 i3*/
+ }
/* writing temporarily into vectors */
if (normsum > EPS) {
- Weight_Vec[counter] = __expf(-normsum/h2);
+ Weight_Vec[counter] = expf(-normsum/h2);
ind_i[counter] = i1;
ind_j[counter] = j1;
counter++;
}
- }
- }}
-
+ } /*if j1*/
+ }
+ } /*if i1*/
+ }
+
/* do sorting to choose the most prominent weights [HIGH to LOW] */
/* and re-arrange indeces accordingly */
for (x = 0; x < counter-1; x++) {
for (y = 0; y < counter-x-1; y++) {
if (Weight_Vec[y] < Weight_Vec[y+1]) {
- swap(&Weight_Vec[y], &Weight_Vec[y+1]);
+ swap(&Weight_Vec[y], &Weight_Vec[y+1]);
swapUS(&ind_i[y], &ind_i[y+1]);
- swapUS(&ind_j[y], &ind_j[y+1]);
+ swapUS(&ind_j[y], &ind_j[y+1]);
}
}
- }
- /*sorting loop finished*/
- /*now select the NumNeighb more prominent weights and store into arrays */
+ }
+ /*sorting loop finished*/
+ /*now select the NumNeighb more prominent weights and store into arrays */
for(x=0; x < NumNeighb; x++) {
index2 = (N*M*x) + index;
H_i_d[index2] = ind_i[x];
H_j_d[index2] = ind_j[x];
Weights_d[index2] = Weight_Vec[x];
}
-}
+}
+
-
/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
/********************* MAIN HOST FUNCTION ******************/
/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
@@ -398,19 +448,19 @@ extern "C" int PatchSelect_GPU_main(float *A, unsigned short *H_i, unsigned shor
if (deviceCount == 0) {
fprintf(stderr, "No CUDA devices found\n");
return -1;
- }
-
+ }
+
int SearchW_full, SimilW_full, counterG, i, j;
- float *Ad, *Weights_d, h2, *Eucl_Vec, *Eucl_Vec_d;
+ float *Ad, *Weights_d, h2, *Eucl_Vec, *Eucl_Vec_d;
unsigned short *H_i_d, *H_j_d;
h2 = h*h;
-
+
dim3 dimBlock(BLKXSIZE,BLKYSIZE);
- dim3 dimGrid(idivup(N,BLKXSIZE), idivup(M,BLKYSIZE));
-
+ dim3 dimGrid(idivup(N,BLKXSIZE), idivup(M,BLKYSIZE));
+
SearchW_full = (2*SearchWindow + 1)*(2*SearchWindow + 1); /* the full searching window size */
SimilW_full = (2*SimilarWin + 1)*(2*SimilarWin + 1); /* the full similarity window size */
-
+
/* generate a 2D Gaussian kernel for NLM procedure */
Eucl_Vec = (float*) calloc (SimilW_full,sizeof(float));
counterG = 0;
@@ -419,8 +469,8 @@ extern "C" int PatchSelect_GPU_main(float *A, unsigned short *H_i, unsigned shor
Eucl_Vec[counterG] = (float)exp(-(pow(((float) i), 2) + pow(((float) j), 2))/(2.0*SimilarWin*SimilarWin));
counterG++;
}} /*main neighb loop */
-
-
+
+
/*allocate space on the device*/
checkCudaErrors( cudaMalloc((void**)&Ad, N*M*sizeof(float)) );
checkCudaErrors( cudaMalloc((void**)&H_i_d, N*M*NumNeighb*sizeof(unsigned short)) );
@@ -430,8 +480,8 @@ extern "C" int PatchSelect_GPU_main(float *A, unsigned short *H_i, unsigned shor
/* copy data from the host to the device */
checkCudaErrors( cudaMemcpy(Ad,A,N*M*sizeof(float),cudaMemcpyHostToDevice) );
- checkCudaErrors( cudaMemcpy(Eucl_Vec_d,Eucl_Vec,SimilW_full*sizeof(float),cudaMemcpyHostToDevice) );
-
+ checkCudaErrors( cudaMemcpy(Eucl_Vec_d,Eucl_Vec,SimilW_full*sizeof(float),cudaMemcpyHostToDevice) );
+
/********************** Run CUDA kernel here ********************/
if (SearchWindow == 5) IndexSelect2D_5_kernel<<<dimGrid,dimBlock>>>(Ad, H_i_d, H_j_d, Weights_d, Eucl_Vec_d, N, M, SearchWindow, SearchW_full, SimilarWin, NumNeighb, h2);
else if (SearchWindow == 7) IndexSelect2D_7_kernel<<<dimGrid,dimBlock>>>(Ad, H_i_d, H_j_d, Weights_d, Eucl_Vec_d, N, M, SearchWindow, SearchW_full, SimilarWin, NumNeighb, h2);
@@ -440,19 +490,19 @@ extern "C" int PatchSelect_GPU_main(float *A, unsigned short *H_i, unsigned shor
else if (SearchWindow == 13) IndexSelect2D_13_kernel<<<dimGrid,dimBlock>>>(Ad, H_i_d, H_j_d, Weights_d, Eucl_Vec_d, N, M, SearchWindow, SearchW_full, SimilarWin, NumNeighb, h2);
else {
fprintf(stderr, "Select the searching window size from 5, 7, 9, 11 or 13\n");
- return -1;}
- checkCudaErrors(cudaPeekAtLastError() );
- checkCudaErrors(cudaDeviceSynchronize());
- /***************************************************************/
-
+ return -1;}
+ checkCudaErrors(cudaPeekAtLastError() );
+ checkCudaErrors(cudaDeviceSynchronize());
+ /***************************************************************/
+
checkCudaErrors(cudaMemcpy(H_i, H_i_d, N*M*NumNeighb*sizeof(unsigned short),cudaMemcpyDeviceToHost) );
- checkCudaErrors(cudaMemcpy(H_j, H_j_d, N*M*NumNeighb*sizeof(unsigned short),cudaMemcpyDeviceToHost) );
- checkCudaErrors(cudaMemcpy(Weights, Weights_d, N*M*NumNeighb*sizeof(float),cudaMemcpyDeviceToHost) );
-
-
- cudaFree(Ad);
- cudaFree(H_i_d);
- cudaFree(H_j_d);
+ checkCudaErrors(cudaMemcpy(H_j, H_j_d, N*M*NumNeighb*sizeof(unsigned short),cudaMemcpyDeviceToHost) );
+ checkCudaErrors(cudaMemcpy(Weights, Weights_d, N*M*NumNeighb*sizeof(float),cudaMemcpyDeviceToHost) );
+
+
+ cudaFree(Ad);
+ cudaFree(H_i_d);
+ cudaFree(H_j_d);
cudaFree(Weights_d);
cudaFree(Eucl_Vec_d);
cudaDeviceReset();
diff --git a/src/Matlab/mex_compile/regularisers_CPU/Nonlocal_TV.c b/src/Matlab/mex_compile/regularisers_CPU/Nonlocal_TV.c
index 34b9915..a5fb1df 100644
--- a/src/Matlab/mex_compile/regularisers_CPU/Nonlocal_TV.c
+++ b/src/Matlab/mex_compile/regularisers_CPU/Nonlocal_TV.c
@@ -1,11 +1,11 @@
/*
* This work is part of the Core Imaging Library developed by
* Visual Analytics and Imaging System Group of the Science Technology
- * Facilities Council, STFC and Diamond Light Source Ltd.
+ * Facilities Council, STFC and Diamond Light Source Ltd.
*
* Copyright 2017 Daniil Kazantsev
* Copyright 2017 Srikanth Nagella, Edoardo Pasca
- * Copyright 2018 Diamond Light Source Ltd.
+ * Copyright 2018 Diamond Light Source Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -35,12 +35,12 @@
* 2. AR_i - indeces of i neighbours
* 3. AR_j - indeces of j neighbours
* 4. AR_k - indeces of k neighbours (0 - for 2D case)
- * 5. Weights_ij(k) - associated weights
+ * 5. Weights_ij(k) - associated weights
* 6. regularisation parameter
- * 7. iterations number
-
+ * 7. iterations number
+
* Output:
- * 1. denoised image/volume
+ * 1. denoised image/volume
* Elmoataz, Abderrahim, Olivier Lezoray, and Sébastien Bougleux. "Nonlocal discrete regularization on weighted graphs: a framework for image and manifold processing." IEEE Trans. Image Processing 17, no. 7 (2008): 1047-1060.
*/
@@ -54,11 +54,11 @@ void mexFunction(
const mwSize *dim_array;
const mwSize *dim_array2;
float *A_orig, *Output=NULL, *Weights, lambda;
-
+
dim_array = mxGetDimensions(prhs[0]);
dim_array2 = mxGetDimensions(prhs[1]);
number_of_dims = mxGetNumberOfDimensions(prhs[0]);
-
+
/*Handling Matlab input data*/
A_orig = (float *) mxGetData(prhs[0]); /* a 2D image or a set of 2D images (3D stack) */
H_i = (unsigned short *) mxGetData(prhs[1]); /* indeces of i neighbours */
@@ -67,14 +67,14 @@ void mexFunction(
Weights = (float *) mxGetData(prhs[4]); /* weights for patches */
lambda = (float) mxGetScalar(prhs[5]); /* regularisation parameter */
IterNumb = (int) mxGetScalar(prhs[6]); /* the number of iterations */
-
- dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2];
-
+
+ dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2];
+
/*****2D INPUT *****/
if (number_of_dims == 2) {
- dimZ = 0;
+ dimZ = 0;
NumNeighb = dim_array2[2];
- Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+ Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
}
/*****3D INPUT *****/
/****************************************************/
@@ -82,7 +82,7 @@ void mexFunction(
NumNeighb = dim_array2[3];
Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
}
-
+
/* run the main function here */
- Nonlocal_TV_CPU_main(A_orig, Output, H_i, H_j, H_k, Weights, dimX, dimY, dimZ, NumNeighb, lambda, IterNumb);
+ Nonlocal_TV_CPU_main(A_orig, Output, H_i, H_j, H_k, Weights, dimX, dimY, dimZ, NumNeighb, lambda, IterNumb, 0);
}
diff --git a/src/Matlab/mex_compile/regularisers_CPU/PatchSelect.c b/src/Matlab/mex_compile/regularisers_CPU/PatchSelect.c
index 1acab29..84b08dd 100644
--- a/src/Matlab/mex_compile/regularisers_CPU/PatchSelect.c
+++ b/src/Matlab/mex_compile/regularisers_CPU/PatchSelect.c
@@ -1,11 +1,11 @@
/*
* This work is part of the Core Imaging Library developed by
* Visual Analytics and Imaging System Group of the Science Technology
- * Facilities Council, STFC and Diamond Light Source Ltd.
+ * Facilities Council, STFC and Diamond Light Source Ltd.
*
* Copyright 2017 Daniil Kazantsev
* Copyright 2017 Srikanth Nagella, Edoardo Pasca
- * Copyright 2018 Diamond Light Source Ltd.
+ * Copyright 2018 Diamond Light Source Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -53,14 +53,14 @@ void mexFunction(
int number_of_dims, SearchWindow, SimilarWin, NumNeighb;
mwSize dimX, dimY, dimZ;
const mwSize *dim_array;
- unsigned short *H_i=NULL, *H_j=NULL, *H_k=NULL;
+ unsigned short *H_i=NULL, *H_j=NULL, *H_k=NULL;
float *A, *Weights = NULL, h;
mwSize dim_array2[3]; /* for 2D data */
mwSize dim_array3[4]; /* for 3D data */
-
+
dim_array = mxGetDimensions(prhs[0]);
number_of_dims = mxGetNumberOfDimensions(prhs[0]);
-
+
/*Handling Matlab input data*/
A = (float *) mxGetData(prhs[0]); /* a 2D or 3D image/volume */
SearchWindow = (int) mxGetScalar(prhs[1]); /* Large Searching window */
@@ -71,22 +71,22 @@ void mexFunction(
dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2];
dim_array2[0] = dimX; dim_array2[1] = dimY; dim_array2[2] = NumNeighb; /* 2D case */
dim_array3[0] = dimX; dim_array3[1] = dimY; dim_array3[2] = dimZ; dim_array3[3] = NumNeighb; /* 3D case */
-
+
/****************2D INPUT ***************/
if (number_of_dims == 2) {
- dimZ = 0;
+ dimZ = 0;
H_i = (unsigned short*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array2, mxUINT16_CLASS, mxREAL));
H_j = (unsigned short*)mxGetPr(plhs[1] = mxCreateNumericArray(3, dim_array2, mxUINT16_CLASS, mxREAL));
Weights = (float*)mxGetPr(plhs[2] = mxCreateNumericArray(3, dim_array2, mxSINGLE_CLASS, mxREAL));
}
/****************3D INPUT ***************/
- if (number_of_dims == 3) {
+ if (number_of_dims == 3) {
H_i = (unsigned short*)mxGetPr(plhs[0] = mxCreateNumericArray(4, dim_array3, mxUINT16_CLASS, mxREAL));
H_j = (unsigned short*)mxGetPr(plhs[1] = mxCreateNumericArray(4, dim_array3, mxUINT16_CLASS, mxREAL));
H_k = (unsigned short*)mxGetPr(plhs[2] = mxCreateNumericArray(4, dim_array3, mxUINT16_CLASS, mxREAL));
- Weights = (float*)mxGetPr(plhs[3] = mxCreateNumericArray(4, dim_array3, mxSINGLE_CLASS, mxREAL));
+ Weights = (float*)mxGetPr(plhs[3] = mxCreateNumericArray(4, dim_array3, mxSINGLE_CLASS, mxREAL));
}
-
- PatchSelect_CPU_main(A, H_i, H_j, H_k, Weights, (long)(dimX), (long)(dimY), (long)(dimZ), SearchWindow, SimilarWin, NumNeighb, h, 0);
-
+
+ PatchSelect_CPU_main(A, H_i, H_j, H_k, Weights, (long)(dimX), (long)(dimY), (long)(dimZ), SearchWindow, SimilarWin, NumNeighb, h);
+
}
diff --git a/src/Python/src/cpu_regularisers.pyx b/src/Python/src/cpu_regularisers.pyx
index 904b4f5..4917d06 100644
--- a/src/Python/src/cpu_regularisers.pyx
+++ b/src/Python/src/cpu_regularisers.pyx
@@ -27,8 +27,8 @@ cdef extern float Diffusion_CPU_main(float *Input, float *Output, float *infovec
cdef extern float Diffus4th_CPU_main(float *Input, float *Output, float *infovector, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, float epsil, int dimX, int dimY, int dimZ);
cdef extern float dTV_FGP_CPU_main(float *Input, float *InputRef, float *Output, float *infovector, float lambdaPar, int iterationsNumb, float epsil, float eta, int methodTV, int nonneg, int dimX, int dimY, int dimZ);
cdef extern float TNV_CPU_main(float *Input, float *u, float lambdaPar, int maxIter, float tol, int dimX, int dimY, int dimZ);
-cdef extern float PatchSelect_CPU_main(float *Input, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, int dimX, int dimY, int dimZ, int SearchWindow, int SimilarWin, int NumNeighb, float h, int switchM);
-cdef extern float Nonlocal_TV_CPU_main(float *A_orig, float *Output, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, int dimX, int dimY, int dimZ, int NumNeighb, float lambdaReg, int IterNumb);
+cdef extern float PatchSelect_CPU_main(float *Input, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, int dimX, int dimY, int dimZ, int SearchWindow, int SimilarWin, int NumNeighb, float h);
+cdef extern float Nonlocal_TV_CPU_main(float *A_orig, float *Output, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, int dimX, int dimY, int dimZ, int NumNeighb, float lambdaReg, int IterNumb, int switchM);
cdef extern float Diffusion_Inpaint_CPU_main(float *Input, unsigned char *Mask, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int penaltytype, int dimX, int dimY, int dimZ);
cdef extern float NonlocalMarching_Inpaint_main(float *Input, unsigned char *M, float *Output, unsigned char *M_upd, int SW_increment, int iterationsNumb, int trigger, int dimX, int dimY, int dimZ);
@@ -570,7 +570,7 @@ def PatchSel_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData,
np.zeros([dims[0], dims[1],dims[2]], dtype='uint16')
# Run patch-based weight selection function
- PatchSelect_CPU_main(&inputData[0,0], &H_j[0,0,0], &H_i[0,0,0], &H_i[0,0,0], &Weights[0,0,0], dims[2], dims[1], 0, searchwindow, patchwindow, neighbours, edge_parameter, 1)
+ PatchSelect_CPU_main(&inputData[0,0], &H_j[0,0,0], &H_i[0,0,0], &H_i[0,0,0], &Weights[0,0,0], dims[2], dims[1], 0, searchwindow, patchwindow, neighbours, edge_parameter)
return H_i, H_j, Weights
"""
def PatchSel_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,
@@ -625,7 +625,7 @@ def NLTV_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData,
np.zeros([dims[0],dims[1]], dtype='float32')
# Run nonlocal TV regularisation
- Nonlocal_TV_CPU_main(&inputData[0,0], &outputData[0,0], &H_i[0,0,0], &H_j[0,0,0], &H_i[0,0,0], &Weights[0,0,0], dims[1], dims[0], 0, neighbours, regularisation_parameter, iterations)
+ Nonlocal_TV_CPU_main(&inputData[0,0], &outputData[0,0], &H_i[0,0,0], &H_j[0,0,0], &H_i[0,0,0], &Weights[0,0,0], dims[1], dims[0], 0, neighbours, regularisation_parameter, iterations, 1)
return outputData
#*********************Inpainting WITH****************************#