diff options
Diffstat (limited to 'src/Core/regularisers_CPU/TNV_core_backtrack.c')
-rwxr-xr-x | src/Core/regularisers_CPU/TNV_core_backtrack.c | 189 |
1 files changed, 91 insertions, 98 deletions
diff --git a/src/Core/regularisers_CPU/TNV_core_backtrack.c b/src/Core/regularisers_CPU/TNV_core_backtrack.c index 7192475..9b19ed5 100755 --- a/src/Core/regularisers_CPU/TNV_core_backtrack.c +++ b/src/Core/regularisers_CPU/TNV_core_backtrack.c @@ -335,6 +335,7 @@ static int tnv_restore(HWThread thr, void *hwctx, int device_id, void *data) { static int tnv_step(HWThread thr, void *hwctx, int device_id, void *data) { long i, j, k, l; + long i1, i2, j1, j2; tnv_context_t *tnv_ctx = (tnv_context_t*)data; tnv_thread_t *ctx = tnv_ctx->thr_ctx + device_id; @@ -376,11 +377,12 @@ static int tnv_step(HWThread thr, void *hwctx, int device_id, void *data) { float theta1 = 1.0f + theta; float constant = 1.0f + taulambda; - float resprimal = 0.0; - float resdual = 0.0; - float product = 0.0; - float unorm = 0.0; - float qnorm = 0.0; + float resprimal = 0.0f; + float resdual1 = 0.0f; + float resdual2 = 0.0f; + float product = 0.0f; + float unorm = 0.0f; + float qnorm = 0.0f; float udiff[dimZ] __attribute__((aligned(64))); float qxdiff __attribute__((aligned(64))); @@ -389,104 +391,82 @@ static int tnv_step(HWThread thr, void *hwctx, int device_id, void *data) { float gradxdiff[dimZ] __attribute__((aligned(64))); float gradydiff[dimZ] __attribute__((aligned(64))); - for(int j1 = 0; j1 < stepY; j1 += BLOCK) { - for(int i1 = 0; i1 < dimX; i1 += BLOCK) { - for(int j2 = 0; j2 < BLOCK; j2++) { - j = j1 + j2; - for(int i2 = 0; i2 < BLOCK; i2++) { - float t[3]; - float M1 = 0.0f, M2 = 0.0f, M3 = 0.0f; - - i = i1 + i2; - if (i == dimX) break; - if (j == stepY) { j2 = BLOCK; break; } - l = (j * dimX + i) * padZ; - -//#pragma vector aligned -#pragma GCC ivdep - for(k = 0; k < dimZ; k++) { - u_upd[l + k] = (u[l + k] + tau * div[l + k] + taulambda * Input[l + k]) / constant; - udiff[k] = u[l + k] - u_upd[l + k]; - unorm += (udiff[k] * udiff[k]); - - gradx_upd[l + k] = (i == (dimX - 1))?0:((u[l + k + padZ] + tau * div[l + k + padZ] + taulambda * Input[l + k + padZ]) / constant - u_upd[l + k]); - grady_upd[l + k] = (j == (copY - 1))?0:((u[l + k + dimX*padZ] + tau * div[l + k + dimX*padZ] + taulambda * Input[l + k + dimX*padZ]) / constant - u_upd[l + k]); - gradxdiff[k] = gradx[l + k] - gradx_upd[l + k]; - gradydiff[k] = grady[l + k] - grady_upd[l + k]; - - float ubarx = theta1 * gradx_upd[l + k] - theta * gradx[l + k]; - float ubary = theta1 * grady_upd[l + k] - theta * grady[l + k]; -//#define TNV_NEW_STYLE -#ifdef TNV_NEW_STYLE - qx_upd[l + k] = qx[l + k] + sigma * ubarx; - qy_upd[l + k] = qy[l + k] + sigma * ubary; - - float vx = divsigma * qx_upd[l + k]; //+ ubarx - float vy = divsigma * qy_upd[l + k]; //+ ubary -#else - float vx = ubarx + divsigma * qx[l + k]; - float vy = ubary + divsigma * qy[l + k]; -#endif - - M1 += (vx * vx); M2 += (vx * vy); M3 += (vy * vy); - } - coefF(t, M1, M2, M3, sigma, p, q, r); - -//#pragma vector aligned -#pragma GCC ivdep - for(k = 0; k < dimZ; k++) { -#ifdef TNV_NEW_STYLE - float vx = divsigma * qx_upd[l + k]; - float vy = divsigma * qy_upd[l + k]; - - float gx_upd = vx * t[0] + vy * t[1]; - float gy_upd = vx * t[1] + vy * t[2]; - - qx_upd[l + k] -= sigma * gx_upd; - qy_upd[l + k] -= sigma * gy_upd; -#else - float ubarx = theta1 * gradx_upd[l + k] - theta * gradx[l + k]; - float ubary = theta1 * grady_upd[l + k] - theta * grady[l + k]; - float vx = ubarx + divsigma * qx[l + k]; - float vy = ubary + divsigma * qy[l + k]; - - float gx_upd = vx * t[0] + vy * t[1]; - float gy_upd = vx * t[1] + vy * t[2]; - - qx_upd[l + k] = qx[l + k] + sigma * (ubarx - gx_upd); - qy_upd[l + k] = qy[l + k] + sigma * (ubary - gy_upd); -#endif - - float div_upd_val = 0; - div_upd_val -= (i > 0)?qx_upd[l + k - padZ]:0; - div_upd_val -= (j > 0)?qy_upd[l + k - dimX * padZ]:0; - div_upd_val += (i < (dimX-1))?qx_upd[l + k]:0; - div_upd_val += (j < (copY-1))?qy_upd[l + k]:0; - div_upd[l + k] = div_upd_val; - - qxdiff = qx[l + k] - qx_upd[l + k]; - qydiff = qy[l + k] - qy_upd[l + k]; - qnorm += (qxdiff * qxdiff + qydiff * qydiff); - - resdual += fabs(divsigma * qxdiff - gradxdiff[k]); - resdual += fabs(divsigma * qydiff - gradydiff[k]); - product += (gradxdiff[k] * qxdiff + gradydiff[k] * qydiff); - - if ((offY == 0)||(j > 0)) { - divdiff = div[l + k] - div_upd[l + k]; // Multiple steps... How we compute without history? - resprimal += fabs(divtau * udiff[k] + divdiff); - } + j = 0; { +# define TNV_LOOP_FIRST_J + i = 0; { +# define TNV_LOOP_FIRST_I +# include "TNV_core_backtrack_loop.h" +# undef TNV_LOOP_FIRST_I + } + for(i = 1; i < (dimX - 1); i++) { +# include "TNV_core_backtrack_loop.h" + } + i = dimX - 1; { +# define TNV_LOOP_LAST_I +# include "TNV_core_backtrack_loop.h" +# undef TNV_LOOP_LAST_I + } +# undef TNV_LOOP_FIRST_J + } + + + + for(int j = 1; j < (copY - 1); j++) { + i = 0; { +# define TNV_LOOP_FIRST_I +# include "TNV_core_backtrack_loop.h" +# undef TNV_LOOP_FIRST_I + } + } + + for(int j1 = 1; j1 < (copY - 1); j1 += BLOCK) { + for(int i1 = 1; i1 < (dimX - 1); i1 += BLOCK) { + for(int j2 = 0; j2 < BLOCK; j2 ++) { + j = j1 + j2; + for(int i2 = 0; i2 < BLOCK; i2++) { + i = i1 + i2; + + if (i == (dimX - 1)) break; + if (j == (copY - 1)) { j2 = BLOCK; break; } +# include "TNV_core_backtrack_loop.h" + } } - } // i - } // j - } // i - } // j - + + } + + for(int j = 1; j < (copY - 1); j++) { + i = dimX - 1; { +# define TNV_LOOP_LAST_I +# include "TNV_core_backtrack_loop.h" +# undef TNV_LOOP_LAST_I + } + } + + + + for (j = copY - 1; j < stepY; j++) { +# define TNV_LOOP_LAST_J + i = 0; { +# define TNV_LOOP_FIRST_I +# include "TNV_core_backtrack_loop.h" +# undef TNV_LOOP_FIRST_I + } + for(i = 1; i < (dimX - 1); i++) { +# include "TNV_core_backtrack_loop.h" + } + i = dimX - 1; { +# define TNV_LOOP_LAST_I +# include "TNV_core_backtrack_loop.h" +# undef TNV_LOOP_LAST_I + } +# undef TNV_LOOP_LAST_J + } + ctx->resprimal = resprimal; - ctx->resdual = resdual; + ctx->resdual = resdual1 + resdual2; ctx->product = product; ctx->unorm = unorm; ctx->qnorm = qnorm; @@ -630,6 +610,19 @@ float TNV_CPU_main(float *InputT, float *uT, float lambda, int maxIter, float to } } + { + tnv_thread_t *ctx = tnv_ctx.thr_ctx + 0; + for(i = 0; i < dimX; i++) { + for(k = 0; k < dimZ; k++) { + int l = i * padZ + k; + + float divdiff = ctx->div[l] - ctx->div_upd[l]; + float udiff = ctx->u[l] - ctx->u_upd[l]; + resprimal += fabs(divtau * udiff + divdiff); + } + } + } + for (j = 0; j < tnv_ctx.threads; j++) { tnv_thread_t *ctx = tnv_ctx.thr_ctx + j; resprimal += ctx->resprimal; |