summaryrefslogtreecommitdiffstats
path: root/patches/ccpi-regularisation-toolkit-fast-fgptv/FGP_TV_core.c
diff options
context:
space:
mode:
Diffstat (limited to 'patches/ccpi-regularisation-toolkit-fast-fgptv/FGP_TV_core.c')
-rw-r--r--patches/ccpi-regularisation-toolkit-fast-fgptv/FGP_TV_core.c266
1 files changed, 266 insertions, 0 deletions
diff --git a/patches/ccpi-regularisation-toolkit-fast-fgptv/FGP_TV_core.c b/patches/ccpi-regularisation-toolkit-fast-fgptv/FGP_TV_core.c
new file mode 100644
index 0000000..91fd746
--- /dev/null
+++ b/patches/ccpi-regularisation-toolkit-fast-fgptv/FGP_TV_core.c
@@ -0,0 +1,266 @@
+/*
+ * This work is part of the Core Imaging Library developed by
+ * Visual Analytics and Imaging System Group of the Science Technology
+ * Facilities Council, STFC
+ *
+ * Copyright 2019 Daniil Kazantsev
+ * Copyright 2019 Srikanth Nagella, Edoardo Pasca
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "FGP_TV_core.h"
+
+/* C-OMP implementation of FGP-TV [1] denoising/regularization model (2D/3D case)
+ *
+ * Input Parameters:
+ * 1. Noisy image/volume
+ * 2. lambdaPar - regularization parameter
+ * 3. Number of iterations
+ * 4. eplsilon: tolerance constant
+ * 5. TV-type: methodTV - 'iso' (0) or 'l1' (1)
+ * 6. nonneg: 'nonnegativity (0 is OFF by default)
+ *
+ * Output:
+ * [1] Filtered/regularized image/volume
+ * [2] Information vector which contains [iteration no., reached tolerance]
+ *
+ * This function is based on the Matlab's code and paper by
+ * [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems"
+ */
+
+float TV_FGP_CPU_main(float *Input, float *Output, float *infovector, float lambdaPar, int iterationsNumb, float epsil, int methodTV, int nonneg, int dimX, int dimY, int dimZ)
+{
+ int ll;
+ long j, DimTotal;
+ float re, re1;
+ re = 0.0f; re1 = 0.0f;
+ float tk = 1.0f;
+ float tkp1 =1.0f;
+ int count = 0;
+
+ if (dimZ <= 1) {
+ /*2D case */
+ float *Output_prev=NULL, *P1=NULL, *P2=NULL, *P1_prev=NULL, *P2_prev=NULL, *R1=NULL, *R2=NULL;
+ DimTotal = (long)(dimX*dimY);
+
+ if (epsil != 0.0f) Output_prev = calloc(DimTotal, sizeof(float));
+ P1 = calloc(DimTotal, sizeof(float));
+ P2 = calloc(DimTotal, sizeof(float));
+ P1_prev = calloc(DimTotal, sizeof(float));
+ P2_prev = calloc(DimTotal, sizeof(float));
+ R1 = calloc(DimTotal, sizeof(float));
+ R2 = calloc(DimTotal, sizeof(float));
+
+ /* begin iterations */
+ for(ll=0; ll<iterationsNumb; ll++) {
+
+ if ((epsil != 0.0f) && (ll % 5 == 0)) copyIm(Output, Output_prev, (long)(dimX), (long)(dimY), 1l);
+ /* computing the gradient of the objective function */
+ Obj_func2D(Input, Output, R1, R2, lambdaPar, (long)(dimX), (long)(dimY));
+
+ /* apply nonnegativity */
+ if (nonneg == 1) for(j=0; j<DimTotal; j++) {if (Output[j] < 0.0f) Output[j] = 0.0f;}
+
+ /*Taking a step towards minus of the gradient*/
+ Grad_func2D(P1, P2, Output, R1, R2, lambdaPar, (long)(dimX), (long)(dimY));
+
+ /* projection step */
+ Proj_func2D(P1, P2, methodTV, DimTotal);
+
+ /*updating R and t*/
+ tkp1 = (1.0f + sqrtf(1.0f + 4.0f*tk*tk))*0.5f;
+ Rupd_func2D(P1, P1_prev, P2, P2_prev, R1, R2, tkp1, tk, DimTotal);
+
+ /*storing old values*/
+ copyIm(P1, P1_prev, (long)(dimX), (long)(dimY), 1l);
+ copyIm(P2, P2_prev, (long)(dimX), (long)(dimY), 1l);
+ tk = tkp1;
+
+ /* check early stopping criteria */
+ if ((epsil != 0.0f) && (ll % 5 == 0)) {
+ re = 0.0f; re1 = 0.0f;
+ for(j=0; j<DimTotal; j++)
+ {
+ re += powf(Output[j] - Output_prev[j],2);
+ re1 += powf(Output[j],2);
+ }
+ re = sqrtf(re)/sqrtf(re1);
+ if (re < epsil) count++;
+ if (count > 3) break;
+ }
+ }
+ if (epsil != 0.0f) free(Output_prev);
+ free(P1); free(P2); free(P1_prev); free(P2_prev); free(R1); free(R2);
+ }
+ else {
+ /*3D case*/
+ float *P1=NULL, *P2=NULL, *P3=NULL, *R1=NULL, *R2=NULL, *R3=NULL;
+ DimTotal = (long)dimX*(long)dimY*(long)dimZ;
+
+ P1 = calloc(DimTotal, sizeof(float));
+ P2 = calloc(DimTotal, sizeof(float));
+ P3 = calloc(DimTotal, sizeof(float));
+
+ R1 = calloc(DimTotal, sizeof(float));
+ R2 = calloc(DimTotal, sizeof(float));
+ R3 = calloc(DimTotal, sizeof(float));
+
+ /* begin iterations */
+ for(ll=0; ll<iterationsNumb; ll++) {
+ /* computing the gradient of the objective function */
+ re = Obj_func3D(Input, Output, R1, R2, R3, lambdaPar, nonneg, (long)(dimX), (long)(dimY), (long)(dimZ));
+
+ tkp1 = (1.0f + sqrtf(1.0f + 4.0f*tk*tk))*0.5f;
+ All_func3D(P1, P2, P3, Output, R1, R2, R3, lambdaPar, tkp1, tk, (long)(dimX), (long)(dimY), (long)(dimZ));
+
+ // calculate norm - stopping rules
+ if ((epsil != 0.0f) && (ll % 5 == 0)) {
+ // stop if the norm residual is less than the tolerance EPS
+ if (re < epsil) count++;
+ if (count > 3) break;
+ }
+
+ tk = tkp1;
+ }
+
+ free(P1); free(P2); free(P3); free(R1); free(R2); free(R3);
+// printf("TV iters %i (of %i)\n", ll, iterationsNumb);
+ }
+
+ /*adding info into info_vector */
+ infovector[0] = (float)(ll); /*iterations number (if stopped earlier based on tolerance)*/
+ infovector[1] = re; /* reached tolerance */
+
+ return 0;
+}
+
+float Obj_func2D(float *A, float *D, float *R1, float *R2, float lambda, long dimX, long dimY)
+{
+ float val1, val2;
+ long i,j,index;
+#pragma omp parallel for shared(A,D,R1,R2) private(index,i,j,val1,val2)
+ for(j=0; j<dimY; j++) {
+ for(i=0; i<dimX; i++) {
+ index = j*dimX+i;
+ /* boundary conditions */
+ if (i == 0) {val1 = 0.0f;} else {val1 = R1[j*dimX + (i-1)];}
+ if (j == 0) {val2 = 0.0f;} else {val2 = R2[(j-1)*dimX + i];}
+ D[index] = A[index] - lambda*(R1[index] + R2[index] - val1 - val2);
+ }}
+ return *D;
+}
+float Grad_func2D(float *P1, float *P2, float *D, float *R1, float *R2, float lambda, long dimX, long dimY)
+{
+ float val1, val2, multip;
+ long i,j,index;
+ multip = (1.0f/(8.0f*lambda));
+#pragma omp parallel for shared(P1,P2,D,R1,R2,multip) private(index,i,j,val1,val2)
+ for(j=0; j<dimY; j++) {
+ for(i=0; i<dimX; i++) {
+ index = j*dimX+i;
+ /* boundary conditions */
+ if (i == dimX-1) val1 = 0.0f; else val1 = D[index] - D[j*dimX + (i+1)];
+ if (j == dimY-1) val2 = 0.0f; else val2 = D[index] - D[(j+1)*dimX + i];
+ P1[index] = R1[index] + multip*val1;
+ P2[index] = R2[index] + multip*val2;
+ }}
+ return 1;
+}
+float Rupd_func2D(float *P1, float *P1_old, float *P2, float *P2_old, float *R1, float *R2, float tkp1, float tk, long DimTotal)
+{
+ long i;
+ float multip;
+ multip = ((tk-1.0f)/tkp1);
+#pragma omp parallel for shared(P1,P2,P1_old,P2_old,R1,R2,multip) private(i)
+ for(i=0; i<DimTotal; i++) {
+ R1[i] = P1[i] + multip*(P1[i] - P1_old[i]);
+ R2[i] = P2[i] + multip*(P2[i] - P2_old[i]);
+ }
+ return 1;
+}
+
+ // too much threads poisoning caches. there is a sweet spot
+#define n_threads 16
+
+/* 3D-case related Functions */
+/*****************************************************************/
+float Obj_func3D(float *A, float *D, float *R1, float *R2, float *R3, float lambda, int nonneg, long dimX, long dimY, long dimZ)
+{
+ float D_new;
+ float val1, val2, val3;
+ float re = 0.0f, re1 = 0.0f;
+ long i,j,k,index;
+
+#pragma omp parallel for reduction(+ : re, re1) shared(A,D,R1,R2,R3) private(index,i,j,k,val1,val2,val3,D_new) num_threads(n_threads)
+ for(k=0; k<dimZ; k++) {
+ for(j=0; j<dimY; j++) {
+ for(i=0; i<dimX; i++) {
+ index = (dimX*dimY)*k + j*dimX+i;
+ /* boundary conditions */
+ val1 = (i == 0)?0.f:R1[(dimX*dimY)*k + j*dimX + (i-1)];
+ val2 = (j == 0)?0.f:R2[(dimX*dimY)*k + (j-1)*dimX + i];
+ val3 = (k == 0)?0.f:R3[(dimX*dimY)*(k-1) + j*dimX + i];
+
+ D_new = A[index] - lambda*(R1[index] + R2[index] + R3[index] - val1 - val2 - val3);
+ if ((nonneg == 1)&&(D_new < 0.0f)) D_new = 0.0f;
+
+ re += powf(D_new - D[index], 2);
+ re1 += powf(D_new, 2);
+ D[index] = D_new;
+ }}}
+
+ return sqrtf(re)/sqrtf(re1);
+}
+
+float All_func3D(float *P1_old, float *P2_old, float *P3_old, float *D, float *R1, float *R2, float *R3, float lambda, float tkp1, float tk, long dimX, long dimY, long dimZ)
+{
+ float val1, val2, val3, denom, sq_denom;
+ float P1_new, P2_new, P3_new;
+ long i,j,k, index;
+ float multip = (1.0f/(26.0f*lambda));
+ float multip2 = ((tk-1.0f)/tkp1);
+
+#pragma omp parallel for shared(P1_old,P2_old,P3_old,D,R1,R2,R3) private(index,i,j,k,val1,val2,val3,denom,sq_denom,multip,multip2,P1_new,P2_new,P3_new) num_threads(n_threads)
+ for(k=0; k<dimZ; k++) {
+ for(j=0; j<dimY; j++) {
+ for(i=0; i<dimX; i++) {
+ index = (dimX*dimY)*k + j*dimX+i;
+
+ /* boundary conditions */
+ val1 = (i == dimX-1)?0.0f: D[index] - D[(dimX*dimY)*k + j*dimX + (i+1)];
+ val2 = (j == dimY-1)?0.0f: D[index] - D[(dimX*dimY)*k + (j+1)*dimX + i];
+ val3 = (k == dimZ-1)?0.0f: D[index] - D[(dimX*dimY)*(k+1) + j*dimX + i];
+
+ P1_new = R1[index] + multip*val1;
+ P2_new = R2[index] + multip*val2;
+ P3_new = R3[index] + multip*val3;
+
+ denom = powf(P1_new,2) + powf(P2_new,2) + powf(P3_new,2);
+ if (denom > 1.0f) {
+ sq_denom = 1.0f/sqrtf(denom);
+ P1_new *= sq_denom;
+ P2_new *= sq_denom;
+ P3_new *= sq_denom;
+ }
+
+ R1[index] = P1_new + multip2*(P1_new - P1_old[index]);
+ R2[index] = P2_new + multip2*(P2_new - P2_old[index]);
+ R3[index] = P3_new + multip2*(P3_new - P3_old[index]);
+
+ P1_old[index] = P1_new;
+ P2_old[index] = P2_new;
+ P3_old[index] = P3_new;
+ }}}
+
+
+ return 1;
+}