summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSuren A. Chilingaryan <csa@suren.me>2020-03-29 23:13:21 +0200
committerSuren A. Chilingaryan <csa@suren.me>2020-03-29 23:13:21 +0200
commitfebfe9a6490052d4b8789fd8f7a0342115bfd55e (patch)
treee23a934918e6c8ab44007ea4b20570b748f8f4bd
parentda30ce619bce168fc24ed0c17f04e411626ba18b (diff)
downloadregularization-febfe9a6490052d4b8789fd8f7a0342115bfd55e.tar.gz
regularization-febfe9a6490052d4b8789fd8f7a0342115bfd55e.tar.bz2
regularization-febfe9a6490052d4b8789fd8f7a0342115bfd55e.tar.xz
regularization-febfe9a6490052d4b8789fd8f7a0342115bfd55e.zip
Enable padding over Z dimmension
-rw-r--r--src/Core/CMakeLists.txt10
-rwxr-xr-xsrc/Core/regularisers_CPU/TNV_core.c4
-rwxr-xr-xsrc/Core/regularisers_CPU/TNV_core_backtrack.c4
-rw-r--r--src/Core/regularisers_CPU/TNV_core_backtrack_loop.h4
-rw-r--r--src/Core/regularisers_CPU/TNV_core_loop.h4
5 files changed, 14 insertions, 12 deletions
diff --git a/src/Core/CMakeLists.txt b/src/Core/CMakeLists.txt
index 76b0f3e..9a53b67 100644
--- a/src/Core/CMakeLists.txt
+++ b/src/Core/CMakeLists.txt
@@ -60,15 +60,17 @@ message("CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}")
message("Adding regularisers as a shared library")
#set(CMAKE_C_COMPILER /opt/intel/compilers_and_libraries/linux/bin/intel64/icc)
-#set(CMAKE_C_FLAGS "-Ofast -mtune=sandybridge -xSSE4.2 -qopt-report=5 -qopt-report-file=stdout -qopt-report-phase=vec -qopenmp -g")
-#set(CMAKE_C_FLAGS "-Ofast -mtune=sandybridge -axAVX2 -xAVX2 -qopt-report=5 -qopt-report-file=stdout -qopt-report-phase=vec -qopenmp -g")
-#set(CMAKE_C_FLAGS "-Ofast -mtune=sandybridge -mavx512f -mavx512dq -mavx512bw -mavx512vbmi -mavx512vbmi2 -mavx512vl -qopt-report=5 -qopt-report-file=stdout -qopt-report-phase=vec -qopenmp -g")
+#set(CMAKE_C_FLAGS "-Ofast -mtune=sandybridge -xSSE4.2 -qopt-report=5 -qopt-report-file=stdout -qopt-report-phase=vec -qopenmp")
+#set(CMAKE_C_FLAGS "-Ofast -mtune=sandybridge -axAVX2 -xAVX2 -qopt-report=5 -qopt-report-file=stdout -qopt-report-phase=vec -qopenmp")
+#set(CMAKE_C_FLAGS "-Ofast -mtune=sandybridge -mavx512f -mavx512dq -mavx512bw -mavx512vbmi -mavx512vbmi2 -mavx512vl -qopt-report=5 -qopt-report-file=stdout -qopt-report-phase=vec -qopenmp")
#set(CMAKE_C_COMPILER clang)
#set(CMAKE_C_FLAGS "-march=nocona -msse -msse2 -msse3 -mssse3 -msse4 -msse4.1 -msse4.2 -ftree-vectorize -fopenmp")
+#set(CMAKE_C_COMPILER gcc-9)
+set(CMAKE_C_FLAGS "-march=native -ftree-vectorize -fopt-info-vec-optimized -fopt-info-vec -fopenmp")
#set(CMAKE_C_FLAGS "-march=nocona -msse -msse2 -msse3 -mssse3 -msse4 -msse4.1 -msse4.2 -ftree-vectorize -fopt-info-vec-optimized -fopt-info-vec -mprefer-vector-width=128 -fopenmp")
-set(CMAKE_C_FLAGS "-march=native -mavx2 -ftree-vectorize -fopt-info-vec-optimized -fopt-info-vec -mprefer-vector-width=512 -fopenmp")
+#set(CMAKE_C_FLAGS "-march=native -mavx2 -ftree-vectorize -fopt-info-vec-optimized -fopt-info-vec -mprefer-vector-width=512 -fopenmp")
#set(CMAKE_C_FLAGS "-march=native -mavx512f -mavx512dq -mavx512bw -mavx512vbmi -mavx512vbmi2 -mavx512vl -ftree-vectorize -fopt-info-vec-optimized -fopt-info-vec -mprefer-vector-width=512 -fopenmp")
#set(CMAKE_C_FLAGS_RELEASE "-g -gdwarf-2 -g3 -fno-omit-frame-pointer")
diff --git a/src/Core/regularisers_CPU/TNV_core.c b/src/Core/regularisers_CPU/TNV_core.c
index 415c644..7b8023b 100755
--- a/src/Core/regularisers_CPU/TNV_core.c
+++ b/src/Core/regularisers_CPU/TNV_core.c
@@ -460,9 +460,9 @@ static void TNV_CPU_init(float *InputT, float *uT, int dimX, int dimY, int dimZ)
tnv_ctx.dimY = dimY;
tnv_ctx.dimZ = dimZ;
// Padding seems actually slower
- tnv_ctx.padZ = dimZ;
+// tnv_ctx.padZ = dimZ;
// tnv_ctx.padZ = 4 * ((dimZ / 4) + ((dimZ % 4)?1:0));
-// tnv_ctx.padZ = 16 * ((dimZ / 16) + ((dimZ % 16)?1:0));
+ tnv_ctx.padZ = 16 * ((dimZ / 16) + ((dimZ % 16)?1:0));
hw_sched_init();
diff --git a/src/Core/regularisers_CPU/TNV_core_backtrack.c b/src/Core/regularisers_CPU/TNV_core_backtrack.c
index 9b19ed5..7eb367e 100755
--- a/src/Core/regularisers_CPU/TNV_core_backtrack.c
+++ b/src/Core/regularisers_CPU/TNV_core_backtrack.c
@@ -483,8 +483,8 @@ static void TNV_CPU_init(float *InputT, float *uT, int dimX, int dimY, int dimZ)
tnv_ctx.dimY = dimY;
tnv_ctx.dimZ = dimZ;
// Padding seems actually slower
- tnv_ctx.padZ = dimZ;
-// tnv_ctx.padZ = 16 * ((dimZ / 16) + ((dimZ % 16)?1:0));
+// tnv_ctx.padZ = dimZ;
+ tnv_ctx.padZ = 16 * ((dimZ / 16) + ((dimZ % 16)?1:0));
hw_sched_init();
diff --git a/src/Core/regularisers_CPU/TNV_core_backtrack_loop.h b/src/Core/regularisers_CPU/TNV_core_backtrack_loop.h
index 3ec4250..2605d22 100644
--- a/src/Core/regularisers_CPU/TNV_core_backtrack_loop.h
+++ b/src/Core/regularisers_CPU/TNV_core_backtrack_loop.h
@@ -3,7 +3,7 @@
l = (j * dimX + i) * padZ;
-//#pragma vector aligned
+#pragma vector aligned
#pragma GCC ivdep
for(k = 0; k < dimZ; k++) {
u_upd[l + k] = (u[l + k] + tau * div[l + k] + taulambda * Input[l + k]) / constant;
@@ -44,7 +44,7 @@
coefF(t, M1, M2, M3, sigma, p, q, r);
-//#pragma vector aligned
+#pragma vector aligned
#pragma GCC ivdep
for(k = 0; k < dimZ; k++) {
#ifdef TNV_NEW_STYLE
diff --git a/src/Core/regularisers_CPU/TNV_core_loop.h b/src/Core/regularisers_CPU/TNV_core_loop.h
index 34e7139..3f6d9bc 100644
--- a/src/Core/regularisers_CPU/TNV_core_loop.h
+++ b/src/Core/regularisers_CPU/TNV_core_loop.h
@@ -17,7 +17,7 @@
// __assume(padZ%16==0);
-//#pragma vector aligned
+#pragma vector aligned
#pragma GCC ivdep
for(k = 0; k < dimZ; k++) {
float u_upd = (u[l + k] + tau * div[l + k] + taulambda * Input[l + k]) / constant; // 3 reads
@@ -59,7 +59,7 @@
coefF(t, M1, M2, M3, sigma, p, q, r);
-//#pragma vector aligned
+#pragma vector aligned
#pragma GCC ivdep
for(k = 0; k < padZ; k++) {
float vx = ubarx[k] + divsigma * qx_current[k]; // cache 2r