1 files changed, 266 insertions, 0 deletions
diff --git a/patches/ccpi-regularisation-toolkit-fast-fgptv/FGP_TV_core.c b/patches/ccpi-regularisation-toolkit-fast-fgptv/FGP_TV_core.c
new file mode 100644
index 0000000..91fd746
--- /dev/null
+++ b/patches/ccpi-regularisation-toolkit-fast-fgptv/FGP_TV_core.c
@@ -0,0 +1,266 @@
+/*
+ * This work is part of the Core Imaging Library developed by
+ * Visual Analytics and Imaging System Group of the Science Technology
+ * Facilities Council, STFC
+ *
+ * Copyright 2019 Daniil Kazantsev
+ * Copyright 2019 Srikanth Nagella, Edoardo Pasca
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "FGP_TV_core.h"
+
+/* C-OMP implementation of FGP-TV [1] denoising/regularization model (2D/3D case)
+ *
+ * Input Parameters:
+ * 1. Noisy image/volume
+ * 2. lambdaPar - regularization parameter
+ * 3. Number of iterations
+ * 4. eplsilon: tolerance constant
+ * 5. TV-type: methodTV - 'iso' (0) or 'l1' (1)
+ * 6. nonneg: 'nonnegativity (0 is OFF by default)
+ *
+ * Output:
+ * [1] Filtered/regularized image/volume
+ * [2] Information vector which contains [iteration no., reached tolerance]
+ *
+ * This function is based on the Matlab's code and paper by
+ * [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems"
+ */
+
+float TV_FGP_CPU_main(float *Input, float *Output, float *infovector, float lambdaPar, int iterationsNumb, float epsil, int methodTV, int nonneg, int dimX, int dimY, int dimZ)
+{
+    int ll;
+    long j, DimTotal;
+    float re, re1;
+    re = 0.0f; re1 = 0.0f;
+    float tk = 1.0f;
+    float tkp1 =1.0f;
+    int count = 0;
+
+    if (dimZ <= 1) {
+        /*2D case */
+        float *Output_prev=NULL, *P1=NULL, *P2=NULL, *P1_prev=NULL, *P2_prev=NULL, *R1=NULL, *R2=NULL;
+        DimTotal = (long)(dimX*dimY);
+
+        if (epsil != 0.0f) Output_prev = calloc(DimTotal, sizeof(float));
+        P1 = calloc(DimTotal, sizeof(float));
+        P2 = calloc(DimTotal, sizeof(float));
+        P1_prev = calloc(DimTotal, sizeof(float));
+        P2_prev = calloc(DimTotal, sizeof(float));
+        R1 = calloc(DimTotal, sizeof(float));
+        R2 = calloc(DimTotal, sizeof(float));
+
+        /* begin iterations */
+        for(ll=0; ll<iterationsNumb; ll++) {
+
+            if ((epsil != 0.0f)  && (ll % 5 == 0)) copyIm(Output, Output_prev, (long)(dimX), (long)(dimY), 1l);
+            /* computing the gradient of the objective function */
+            Obj_func2D(Input, Output, R1, R2, lambdaPar, (long)(dimX), (long)(dimY));
+
+            /* apply nonnegativity */
+            if (nonneg == 1) for(j=0; j<DimTotal; j++) {if (Output[j] < 0.0f) Output[j] = 0.0f;}
+
+            /*Taking a step towards minus of the gradient*/
+            Grad_func2D(P1, P2, Output, R1, R2, lambdaPar, (long)(dimX), (long)(dimY));
+
+            /* projection step */
+            Proj_func2D(P1, P2, methodTV, DimTotal);
+
+            /*updating R and t*/
+            tkp1 = (1.0f + sqrtf(1.0f + 4.0f*tk*tk))*0.5f;
+            Rupd_func2D(P1, P1_prev, P2, P2_prev, R1, R2, tkp1, tk, DimTotal);
+
+            /*storing old values*/
+            copyIm(P1, P1_prev, (long)(dimX), (long)(dimY), 1l);
+            copyIm(P2, P2_prev, (long)(dimX), (long)(dimY), 1l);
+            tk = tkp1;
+
+            /* check early stopping criteria */
+            if ((epsil != 0.0f)  && (ll % 5 == 0)) {
+                re = 0.0f; re1 = 0.0f;
+                for(j=0; j<DimTotal; j++)
+                {
+                    re += powf(Output[j] - Output_prev[j],2);
+                    re1 += powf(Output[j],2);
+                }
+                re = sqrtf(re)/sqrtf(re1);
+                if (re < epsil)  count++;
+                if (count > 3) break;
+            }
+        }
+        if (epsil != 0.0f) free(Output_prev);
+        free(P1); free(P2); free(P1_prev); free(P2_prev); free(R1); free(R2);
+    }
+    else {
+        /*3D case*/
+        float *P1=NULL, *P2=NULL, *P3=NULL, *R1=NULL, *R2=NULL, *R3=NULL;
+        DimTotal = (long)dimX*(long)dimY*(long)dimZ;
+
+        P1 = calloc(DimTotal, sizeof(float));
+        P2 = calloc(DimTotal, sizeof(float));
+        P3 = calloc(DimTotal, sizeof(float));
+
+        R1 = calloc(DimTotal, sizeof(float));
+        R2 = calloc(DimTotal, sizeof(float));
+        R3 = calloc(DimTotal, sizeof(float));
+
+        /* begin iterations */
+        for(ll=0; ll<iterationsNumb; ll++) {
+            /* computing the gradient of the objective function */
+            re = Obj_func3D(Input, Output, R1, R2, R3, lambdaPar, nonneg, (long)(dimX), (long)(dimY), (long)(dimZ));
+
+            tkp1 = (1.0f + sqrtf(1.0f + 4.0f*tk*tk))*0.5f;
+	    All_func3D(P1, P2, P3, Output, R1, R2, R3, lambdaPar, tkp1, tk, (long)(dimX), (long)(dimY), (long)(dimZ));
+
+            // calculate norm - stopping rules
+            if ((epsil != 0.0f)  && (ll % 5 == 0)) {
+                // stop if the norm residual is less than the tolerance EPS
+                if (re < epsil)  count++;
+                if (count > 3) break;
+            }
+
+            tk = tkp1;
+        }
+
+        free(P1); free(P2); free(P3); free(R1); free(R2); free(R3);
+//	printf("TV iters %i (of %i)\n", ll, iterationsNumb);
+    }
+
+    /*adding info into info_vector */
+    infovector[0] = (float)(ll);  /*iterations number (if stopped earlier based on tolerance)*/
+    infovector[1] = re;  /* reached tolerance */
+
+    return 0;
+}
+
+float Obj_func2D(float *A, float *D, float *R1, float *R2, float lambda, long dimX, long dimY)
+{
+    float val1, val2;
+    long i,j,index;
+#pragma omp parallel for shared(A,D,R1,R2) private(index,i,j,val1,val2)
+    for(j=0; j<dimY; j++) {
+        for(i=0; i<dimX; i++) {
+            index = j*dimX+i;
+            /* boundary conditions  */
+            if (i == 0) {val1 = 0.0f;} else {val1 = R1[j*dimX + (i-1)];}
+            if (j == 0) {val2 = 0.0f;} else {val2 = R2[(j-1)*dimX + i];}
+            D[index] = A[index] - lambda*(R1[index] + R2[index] - val1 - val2);
+        }}
+    return *D;
+}
+float Grad_func2D(float *P1, float *P2, float *D, float *R1, float *R2, float lambda,  long dimX, long dimY)
+{
+    float val1, val2, multip;
+    long i,j,index;
+    multip = (1.0f/(8.0f*lambda));
+#pragma omp parallel for shared(P1,P2,D,R1,R2,multip) private(index,i,j,val1,val2)
+    for(j=0; j<dimY; j++) {
+        for(i=0; i<dimX; i++) {
+            index = j*dimX+i;
+            /* boundary conditions */
+            if (i == dimX-1) val1 = 0.0f; else val1 = D[index] - D[j*dimX + (i+1)];
+            if (j == dimY-1) val2 = 0.0f; else val2 = D[index] - D[(j+1)*dimX + i];
+            P1[index] = R1[index] + multip*val1;
+            P2[index] = R2[index] + multip*val2;
+        }}
+    return 1;
+}
+float Rupd_func2D(float *P1, float *P1_old, float *P2, float *P2_old, float *R1, float *R2, float tkp1, float tk, long DimTotal)
+{
+    long i;
+    float multip;
+    multip = ((tk-1.0f)/tkp1);
+#pragma omp parallel for shared(P1,P2,P1_old,P2_old,R1,R2,multip) private(i)
+    for(i=0; i<DimTotal; i++) {
+        R1[i] = P1[i] + multip*(P1[i] - P1_old[i]);
+        R2[i] = P2[i] + multip*(P2[i] - P2_old[i]);
+    }
+    return 1;
+}
+
+    // too much threads poisoning caches. there is a sweet spot
+#define n_threads 16
+
+/* 3D-case related Functions */
+/*****************************************************************/
+float Obj_func3D(float *A, float *D, float *R1, float *R2, float *R3, float lambda, int nonneg, long dimX, long dimY, long dimZ)
+{
+    float D_new;
+    float val1, val2, val3;
+    float re = 0.0f, re1 = 0.0f;
+    long i,j,k,index;
+
+#pragma omp parallel for reduction(+ : re, re1) shared(A,D,R1,R2,R3) private(index,i,j,k,val1,val2,val3,D_new) num_threads(n_threads)
+    for(k=0; k<dimZ; k++) {
+        for(j=0; j<dimY; j++) {
+            for(i=0; i<dimX; i++) {
+                index = (dimX*dimY)*k + j*dimX+i;
+                /* boundary conditions */
+                val1 = (i == 0)?0.f:R1[(dimX*dimY)*k + j*dimX + (i-1)];
+                val2 = (j == 0)?0.f:R2[(dimX*dimY)*k + (j-1)*dimX + i];
+                val3 = (k == 0)?0.f:R3[(dimX*dimY)*(k-1) + j*dimX + i];
+
+                D_new = A[index] - lambda*(R1[index] + R2[index] + R3[index] - val1 - val2 - val3);
+		if ((nonneg == 1)&&(D_new < 0.0f)) D_new = 0.0f;
+
+		re += powf(D_new - D[index], 2);
+		re1 += powf(D_new, 2);
+		D[index] = D_new;
+            }}}
+	
+    return sqrtf(re)/sqrtf(re1);
+}
+
+float All_func3D(float *P1_old, float *P2_old, float *P3_old, float *D, float *R1, float *R2, float *R3, float lambda, float tkp1, float tk, long dimX, long dimY, long dimZ)
+{
+    float val1, val2, val3, denom, sq_denom;
+    float P1_new, P2_new, P3_new;
+    long i,j,k, index;
+    float multip = (1.0f/(26.0f*lambda));
+    float multip2 = ((tk-1.0f)/tkp1);
+
+#pragma omp parallel for shared(P1_old,P2_old,P3_old,D,R1,R2,R3) private(index,i,j,k,val1,val2,val3,denom,sq_denom,multip,multip2,P1_new,P2_new,P3_new) num_threads(n_threads)
+    for(k=0; k<dimZ; k++) {
+        for(j=0; j<dimY; j++) {
+            for(i=0; i<dimX; i++) {
+                index = (dimX*dimY)*k + j*dimX+i;
+
+                /* boundary conditions */
+                val1 = (i == dimX-1)?0.0f: D[index] - D[(dimX*dimY)*k + j*dimX + (i+1)];
+                val2 = (j == dimY-1)?0.0f: D[index] - D[(dimX*dimY)*k + (j+1)*dimX + i];
+                val3 = (k == dimZ-1)?0.0f: D[index] - D[(dimX*dimY)*(k+1) + j*dimX + i];
+
+                P1_new = R1[index] + multip*val1;
+                P2_new = R2[index] + multip*val2;
+                P3_new = R3[index] + multip*val3;
+
+		denom = powf(P1_new,2) + powf(P2_new,2) + powf(P3_new,2);
+		if (denom > 1.0f) {
+		    sq_denom = 1.0f/sqrtf(denom);
+		    P1_new *= sq_denom;
+		    P2_new *= sq_denom;
+		    P3_new *= sq_denom;
+        	}
+
+		R1[index] = P1_new + multip2*(P1_new - P1_old[index]);
+		R2[index] = P2_new + multip2*(P2_new - P2_old[index]);
+		R3[index] = P3_new + multip2*(P3_new - P3_old[index]);
+		
+		P1_old[index] = P1_new;
+		P2_old[index] = P2_new;
+		P3_old[index] = P3_new;
+            }}}
+
+
+    return 1;
+}