[test-suite] r334954 - [test-suite] Corrections for MiniGMG
Brian Homerding via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 18 08:56:18 PDT 2018
Author: homerdin
Date: Mon Jun 18 08:56:18 2018
New Revision: 334954
URL: http://llvm.org/viewvc/llvm-project?rev=334954&view=rev
Log:
[test-suite] Corrections for MiniGMG
The openMP loops were being macro'd out along with the pragmas. This patch
allows the application to run fully. The problem size was decreased as the
test's runtime increased substantially. Additional verification output has
been enabled using FP_TOLERANCE.
Reviewers: hfinkel
Differential Revision: https://reviews.llvm.org/D47484
Modified:
test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/CMakeLists.txt
test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/apply_op.inc
test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/exchange_boundary.inc
test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/mg.h
test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/miniGMG.reference_output
test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/misc.inc
test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/residual.inc
test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/restriction.inc
Modified: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/CMakeLists.txt?rev=334954&r1=334953&r2=334954&view=diff
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/CMakeLists.txt (original)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/CMakeLists.txt Mon Jun 18 08:56:18 2018
@@ -1,4 +1,6 @@
set(PROG miniGMG)
+list(APPEND CXXFLAGS -D__PRINT_NORM)
list(APPEND LDFLAGS -lm)
-set(RUN_OPTIONS 6 2 2 2 1 1 1)
+set(FP_TOLERANCE 0.00001)
+set(RUN_OPTIONS 5 2 2 2 1 1 1)
llvm_multisource()
Modified: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/apply_op.inc
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/apply_op.inc?rev=334954&r1=334953&r2=334954&view=diff
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/apply_op.inc (original)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/apply_op.inc Mon Jun 18 08:56:18 2018
@@ -24,6 +24,7 @@ void apply_op(domain_type * domain, int
#ifdef OMP
#pragma omp parallel for private(box) if(omp_across_boxes)
+#endif
for(box=0;box<domain->subdomains_per_rank;box++){
int i,j,k,s;
int pencil = domain->subdomains[box].levels[level].pencil;
@@ -41,7 +42,9 @@ void apply_op(domain_type * domain, int
double * __restrict__ beta_k = domain->subdomains[box].levels[level].grids[ __beta_k] + ghosts*(1+pencil+plane);
double * __restrict__ lambda = domain->subdomains[box].levels[level].grids[ __lambda] + ghosts*(1+pencil+plane);
+#ifdef OMP
#pragma omp parallel for private(k,j,i) if(omp_within_a_box) collapse(2)
+#endif
for(k=0;k<dim_k;k++){
for(j=0;j<dim_j;j++){
for(i=0;i<dim_i;i++){
@@ -57,7 +60,7 @@ void apply_op(domain_type * domain, int
Ax[ijk] = helmholtz;
}}}
}
-#endif
+
domain->cycles.apply_op[level] += (uint64_t)(CycleTime()-_timeStart);
}
//------------------------------------------------------------------------------------------------------------------------------
Modified: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/exchange_boundary.inc
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/exchange_boundary.inc?rev=334954&r1=334953&r2=334954&view=diff
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/exchange_boundary.inc (original)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/exchange_boundary.inc Mon Jun 18 08:56:18 2018
@@ -141,7 +141,34 @@ void exchange_boundary(domain_type *doma
if( (domain->bufferCopies[level][buffer].isFace && exchange_faces ) ||
(domain->bufferCopies[level][buffer].isEdge && exchange_edges ) ||
(domain->bufferCopies[level][buffer].isCorner && exchange_corners) ){
- DoBufferCopy(domain,level,grid_id,buffer);
+ DoBufferCopy(domain,level,grid_id,buffer);
+ }}
+ _timeEnd = CycleTime();
+ domain->cycles.pack[level] += (_timeEnd-_timeStart);
+
+ // loop through MPI send buffers and post Isend's...
+ _timeStart = CycleTime();
+ #ifdef __MPI_THREAD_MULTIPLE
+ #pragma omp parallel for schedule(dynamic,1)
+ #endif
+ for(n=nMessages/2;n<nMessages;n++){
+ MPI_Isend(buffers_packed[n],sizes_packed[n],MPI_DOUBLE,ranks_packed[n],tags_packed[n],MPI_COMM_WORLD,&requests_packed[n]);
+ }
+ _timeEnd = CycleTime();
+ domain->cycles.send[level] += (_timeEnd-_timeStart);
+ #endif
+
+
+ // exchange locally... try and hide within Isend latency...
+ _timeStart = CycleTime();
+#ifdef OMP
+ #pragma omp parallel for schedule(static,1)
+#endif
+ for(buffer=domain->bufferCopy_Local_Start;buffer<domain->bufferCopy_Local_End;buffer++){
+ if( (domain->bufferCopies[level][buffer].isFace && exchange_faces ) ||
+ (domain->bufferCopies[level][buffer].isEdge && exchange_edges ) ||
+ (domain->bufferCopies[level][buffer].isCorner && exchange_corners) ){
+ DoBufferCopy(domain,level,grid_id,buffer);
}}
_timeEnd = CycleTime();
domain->cycles.grid2grid[level] += (_timeEnd-_timeStart);
@@ -153,7 +180,7 @@ void exchange_boundary(domain_type *doma
MPI_Waitall(nMessages,requests_packed,status_packed);
_timeEnd = CycleTime();
domain->cycles.wait[level] += (_timeEnd-_timeStart);
- #endif
+
// unpack MPI receive buffers
_timeStart = CycleTime();
#pragma omp parallel for schedule(static,1)
Modified: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/mg.h
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/mg.h?rev=334954&r1=334953&r2=334954&view=diff
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/mg.h (original)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/mg.h Mon Jun 18 08:56:18 2018
@@ -125,5 +125,6 @@ void destroy_domain(domain_type * domain
void MGBuild(domain_type * domain, double a, double b, double h0);
void MGSolve(domain_type * domain, int u_id, int F_id, double a, double b, double desired_mg_norm);
void print_timing(domain_type *domain);
+void DoBufferCopy(domain_type *domain, int level, int grid_id, int buffer);
//------------------------------------------------------------------------------------------------------------------------------
#endif
Modified: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/miniGMG.reference_output
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/miniGMG.reference_output?rev=334954&r1=334953&r2=334954&view=diff
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/miniGMG.reference_output (original)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/miniGMG.reference_output Mon Jun 18 08:56:18 2018
@@ -1,25 +1,46 @@
1 MPI Tasks of 1 threads
truncating the v-cycle at 2^3 subdomains
creating domain... done
- 64 x 64 x 64 (per subdomain)
- 128 x 128 x 128 (per process)
- 128 x 128 x 128 (overall)
+ 32 x 32 x 32 (per subdomain)
+ 64 x 64 x 64 (per process)
+ 64 x 64 x 64 (overall)
1-deep ghost zones
- allocated 246 MB
+ allocated 34 MB
initializing alpha, beta, RHS for the ``hard problem''...
- average value of f = 0.000000000000e+00
+ average value of f = -1.110223024625e-15
done
MGBuild...
- level= 0, eigenvalue_max ~= 1.000000e+00
- level= 1, eigenvalue_max ~= -1.000000e+00
- level= 2, eigenvalue_max ~= -1.000000e+00
- level= 3, eigenvalue_max ~= -1.000000e+00
- level= 4, eigenvalue_max ~= -1.000000e+00
- level= 5, eigenvalue_max ~= -1.000000e+00
+ level= 0, eigenvalue_max ~= 1.999996e+00
+ level= 1, eigenvalue_max ~= 1.999984e+00
+ level= 2, eigenvalue_max ~= 1.999935e+00
+ level= 3, eigenvalue_max ~= 1.999740e+00
+ level= 4, eigenvalue_max ~= 1.998958e+00
done
-MGSolve... done
-MGSolve... done
-Error test: h = 7.812500e-03, max = 0.000000e+00
-Error test: h = 7.812500e-03, L2 = 0.000000e+00
+MGSolve...
+v-cycle= 1, norm=0.00104306838695688285 (1.043068e-03)
+v-cycle= 2, norm=0.00008508164981969001 (8.508165e-05)
+v-cycle= 3, norm=0.00001867173484947213 (1.867173e-05)
+v-cycle= 4, norm=0.00000204150498158241 (2.041505e-06)
+v-cycle= 5, norm=0.00000036859010963302 (3.685901e-07)
+v-cycle= 6, norm=0.00000004500376124219 (4.500376e-08)
+v-cycle= 7, norm=0.00000000777275418452 (7.772754e-09)
+v-cycle= 8, norm=0.00000000096441180332 (9.644118e-10)
+v-cycle= 9, norm=0.00000000016688265714 (1.668827e-10)
+v-cycle=10, norm=0.00000000002055567632 (2.055568e-11)
+done
+MGSolve...
+v-cycle= 1, norm=0.00104306838695688285 (1.043068e-03)
+v-cycle= 2, norm=0.00008508164981969001 (8.508165e-05)
+v-cycle= 3, norm=0.00001867173484947213 (1.867173e-05)
+v-cycle= 4, norm=0.00000204150498158241 (2.041505e-06)
+v-cycle= 5, norm=0.00000036859010963302 (3.685901e-07)
+v-cycle= 6, norm=0.00000004500376124219 (4.500376e-08)
+v-cycle= 7, norm=0.00000000777275418452 (7.772754e-09)
+v-cycle= 8, norm=0.00000000096441180332 (9.644118e-10)
+v-cycle= 9, norm=0.00000000016688265714 (1.668827e-10)
+v-cycle=10, norm=0.00000000002055567632 (2.055568e-11)
+done
+Error test: h = 1.562500e-02, max = 3.529710e-04
+Error test: h = 1.562500e-02, L2 = 5.559935e-05
deallocating domain... done
exit 0
Modified: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/misc.inc
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/misc.inc?rev=334954&r1=334953&r2=334954&view=diff
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/misc.inc (original)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/misc.inc Mon Jun 18 08:56:18 2018
@@ -20,6 +20,7 @@ void zero_grid(domain_type *domain, int
#ifdef OMP
#pragma omp parallel for private(box) if(omp_across_boxes)
+#endif
for(box=0;box<domain->subdomains_per_rank;box++){
int i,j,k;
int pencil = domain->subdomains[box].levels[level].pencil;
@@ -29,7 +30,9 @@ void zero_grid(domain_type *domain, int
int dim_j = domain->subdomains[box].levels[level].dim.j;
int dim_i = domain->subdomains[box].levels[level].dim.i;
double * __restrict__ grid = domain->subdomains[box].levels[level].grids[grid_id] + ghosts*(1+pencil+plane);
+#ifdef OMP
#pragma omp parallel for private(k,j,i) if(omp_within_a_box) collapse(2)
+#endif
for(k=-ghosts;k<dim_k+ghosts;k++){
for(j=-ghosts;j<dim_j+ghosts;j++){
for(i=-ghosts;i<dim_i+ghosts;i++){
@@ -37,7 +40,6 @@ void zero_grid(domain_type *domain, int
grid[ijk] = 0.0;
}}}
}
-#endif
domain->cycles.blas1[level] += (uint64_t)(CycleTime()-_timeStart);
}
@@ -56,6 +58,7 @@ void initialize_grid_to_scalar(domain_ty
int box;
#ifdef OMP
#pragma omp parallel for private(box) if(omp_across_boxes)
+#endif
for(box=0;box<domain->subdomains_per_rank;box++){
int i,j,k;
int pencil = domain->subdomains[box].levels[level].pencil;
@@ -65,7 +68,9 @@ void initialize_grid_to_scalar(domain_ty
int dim_j = domain->subdomains[box].levels[level].dim.j;
int dim_i = domain->subdomains[box].levels[level].dim.i;
double * __restrict__ grid = domain->subdomains[box].levels[level].grids[grid_id] + ghosts*(1+pencil+plane);
+#ifdef OMP
#pragma omp parallel for private(k,j,i) if(omp_within_a_box) collapse(2)
+#endif
for(k=-ghosts;k<dim_k+ghosts;k++){
for(j=-ghosts;j<dim_j+ghosts;j++){
for(i=-ghosts;i<dim_i+ghosts;i++){
@@ -74,7 +79,6 @@ void initialize_grid_to_scalar(domain_ty
grid[ijk] = ghostZone ? 0.0 : scalar;
}}}
}
-#endif
domain->cycles.blas1[level] += (uint64_t)(CycleTime()-_timeStart);
}
@@ -92,6 +96,7 @@ void add_grids(domain_type *domain, int
int box;
#ifdef OMP
#pragma omp parallel for private(box) if(omp_across_boxes)
+#endif
for(box=0;box<domain->subdomains_per_rank;box++){
int i,j,k;
int pencil = domain->subdomains[box].levels[level].pencil;
@@ -103,7 +108,9 @@ void add_grids(domain_type *domain, int
double * __restrict__ grid_c = domain->subdomains[box].levels[level].grids[id_c] + ghosts*(1+pencil+plane);
double * __restrict__ grid_a = domain->subdomains[box].levels[level].grids[id_a] + ghosts*(1+pencil+plane);
double * __restrict__ grid_b = domain->subdomains[box].levels[level].grids[id_b] + ghosts*(1+pencil+plane);
+#ifdef OMP
#pragma omp parallel for private(k,j,i) if(omp_within_a_box) collapse(2)
+#endif
for(k=0;k<dim_k;k++){
for(j=0;j<dim_j;j++){
for(i=0;i<dim_i;i++){
@@ -111,7 +118,6 @@ void add_grids(domain_type *domain, int
grid_c[ijk] = scale_a*grid_a[ijk] + scale_b*grid_b[ijk];
}}}
}
-#endif
domain->cycles.blas1[level] += (uint64_t)(CycleTime()-_timeStart);
}
@@ -129,6 +135,7 @@ void mul_grids(domain_type *domain, int
int box;
#ifdef OMP
#pragma omp parallel for private(box) if(omp_across_boxes)
+#endif
for(box=0;box<domain->subdomains_per_rank;box++){
int i,j,k;
int pencil = domain->subdomains[box].levels[level].pencil;
@@ -140,7 +147,9 @@ void mul_grids(domain_type *domain, int
double * __restrict__ grid_c = domain->subdomains[box].levels[level].grids[id_c] + ghosts*(1+pencil+plane);
double * __restrict__ grid_a = domain->subdomains[box].levels[level].grids[id_a] + ghosts*(1+pencil+plane);
double * __restrict__ grid_b = domain->subdomains[box].levels[level].grids[id_b] + ghosts*(1+pencil+plane);
+#ifdef OMP
#pragma omp parallel for private(k,j,i) if(omp_within_a_box) collapse(2)
+#endif
for(k=0;k<dim_k;k++){
for(j=0;j<dim_j;j++){
for(i=0;i<dim_i;i++){
@@ -148,7 +157,6 @@ void mul_grids(domain_type *domain, int
grid_c[ijk] = scale*grid_a[ijk]*grid_b[ijk];
}}}
}
-#endif
domain->cycles.blas1[level] += (uint64_t)(CycleTime()-_timeStart);
}
@@ -167,6 +175,7 @@ void scale_grid(domain_type *domain, int
int box;
#ifdef OMP
#pragma omp parallel for private(box) if(omp_across_boxes)
+#endif
for(box=0;box<domain->subdomains_per_rank;box++){
int i,j,k;
int pencil = domain->subdomains[box].levels[level].pencil;
@@ -177,7 +186,9 @@ void scale_grid(domain_type *domain, int
int dim_i = domain->subdomains[box].levels[level].dim.i;
double * __restrict__ grid_c = domain->subdomains[box].levels[level].grids[id_c] + ghosts*(1+pencil+plane);
double * __restrict__ grid_a = domain->subdomains[box].levels[level].grids[id_a] + ghosts*(1+pencil+plane);
+#ifdef OMP
#pragma omp parallel for private(k,j,i) if(omp_within_a_box) collapse(2)
+#endif
for(k=0;k<dim_k;k++){
for(j=0;j<dim_j;j++){
for(i=0;i<dim_i;i++){
@@ -185,7 +196,6 @@ void scale_grid(domain_type *domain, int
grid_c[ijk] = scale_a*grid_a[ijk];
}}}
}
-#endif
domain->cycles.blas1[level] += (uint64_t)(CycleTime()-_timeStart);
}
@@ -206,6 +216,7 @@ double dot(domain_type *domain, int leve
#ifdef OMP
// FIX, schedule(static) is a stand in to guarantee reproducibility...
#pragma omp parallel for private(box) if(omp_across_boxes) reduction(+:a_dot_b_domain) schedule(static)
+#endif
for(box=0;box<domain->subdomains_per_rank;box++){
int i,j,k;
int pencil = domain->subdomains[box].levels[level].pencil;
@@ -217,7 +228,9 @@ double dot(domain_type *domain, int leve
double * __restrict__ grid_a = domain->subdomains[box].levels[level].grids[id_a] + ghosts*(1+pencil+plane); // i.e. [0] = first non ghost zone point
double * __restrict__ grid_b = domain->subdomains[box].levels[level].grids[id_b] + ghosts*(1+pencil+plane);
double a_dot_b_box = 0.0;
+#ifdef OMP
#pragma omp parallel for private(i,j,k) if(omp_within_a_box) collapse(2) reduction(+:a_dot_b_box) schedule(static)
+#endif
for(k=0;k<dim_k;k++){
for(j=0;j<dim_j;j++){
for(i=0;i<dim_i;i++){
@@ -226,7 +239,6 @@ double dot(domain_type *domain, int leve
}}}
a_dot_b_domain+=a_dot_b_box;
}
-#endif
domain->cycles.blas1[level] += (uint64_t)(CycleTime()-_timeStart);
#ifdef __MPI
@@ -256,6 +268,7 @@ double norm(domain_type *domain, int lev
#ifdef OMP
// FIX, schedule(static) is a stand in to guarantee reproducibility...
#pragma omp parallel for private(box) if(omp_across_boxes) reduction(max:max_norm) schedule(static)
+#endif
for(box=0;box<domain->subdomains_per_rank;box++){
int i,j,k;
int pencil = domain->subdomains[box].levels[level].pencil;
@@ -266,7 +279,9 @@ double norm(domain_type *domain, int lev
int dim_i = domain->subdomains[box].levels[level].dim.i;
double * __restrict__ grid = domain->subdomains[box].levels[level].grids[ grid_id] + ghosts*(1+pencil+plane); // i.e. [0] = first non ghost zone point
double box_norm = 0.0;
+#ifdef OMP
#pragma omp parallel for private(i,j,k) if(omp_within_a_box) collapse(2) reduction(max:box_norm) schedule(static)
+#endif
for(k=0;k<dim_k;k++){
for(j=0;j<dim_j;j++){
for(i=0;i<dim_i;i++){
@@ -276,7 +291,6 @@ double norm(domain_type *domain, int lev
}}}
if(box_norm>max_norm){max_norm = box_norm;}
} // box list
-#endif
domain->cycles.blas1[level] += (uint64_t)(CycleTime()-_timeStart);
#ifdef __MPI
@@ -306,6 +320,7 @@ double mean(domain_type *domain, int lev
double sum_domain = 0.0;
#ifdef OMP
#pragma omp parallel for private(box) if(omp_across_boxes) reduction(+:sum_domain)
+#endif
for(box=0;box<domain->subdomains_per_rank;box++){
int i,j,k;
int pencil = domain->subdomains[box].levels[level].pencil;
@@ -316,7 +331,9 @@ double mean(domain_type *domain, int lev
int dim_i = domain->subdomains[box].levels[level].dim.i;
double * __restrict__ grid_a = domain->subdomains[box].levels[level].grids[id_a] + ghosts*(1+pencil+plane); // i.e. [0] = first non ghost zone point
double sum_box = 0.0;
+#ifdef OMP
#pragma omp parallel for private(i,j,k) if(omp_within_a_box) collapse(2) reduction(+:sum_box)
+#endif
for(k=0;k<dim_k;k++){
for(j=0;j<dim_j;j++){
for(i=0;i<dim_i;i++){
@@ -325,7 +342,6 @@ double mean(domain_type *domain, int lev
}}}
sum_domain+=sum_box;
}
-#endif
domain->cycles.blas1[level] += (uint64_t)(CycleTime()-_timeStart);
double ncells_domain = (double)domain->dim.i*(double)domain->dim.j*(double)domain->dim.k;
@@ -356,6 +372,7 @@ void shift_grid(domain_type *domain, int
int box;
#ifdef OMP
#pragma omp parallel for private(box) if(omp_across_boxes)
+#endif
for(box=0;box<domain->subdomains_per_rank;box++){
int i,j,k;
int pencil = domain->subdomains[box].levels[level].pencil;
@@ -367,7 +384,9 @@ void shift_grid(domain_type *domain, int
double * __restrict__ grid_c = domain->subdomains[box].levels[level].grids[id_c] + ghosts*(1+pencil+plane); // i.e. [0] = first non ghost zone point
double * __restrict__ grid_a = domain->subdomains[box].levels[level].grids[id_a] + ghosts*(1+pencil+plane); // i.e. [0] = first non ghost zone point
+#ifdef OMP
#pragma omp parallel for private(i,j,k) if(omp_within_a_box) collapse(2)
+#endif
for(k=0;k<dim_k;k++){
for(j=0;j<dim_j;j++){
for(i=0;i<dim_i;i++){
@@ -375,7 +394,6 @@ void shift_grid(domain_type *domain, int
grid_c[ijk] = grid_a[ijk] + shift_a;
}}}
}
-#endif
domain->cycles.blas1[level] += (uint64_t)(CycleTime()-_timeStart);
}
@@ -391,6 +409,7 @@ void project_cell_to_face(domain_type *d
int box;
#ifdef OMP
#pragma omp parallel for private(box) if(omp_across_boxes)
+#endif
for(box=0;box<domain->subdomains_per_rank;box++){
int i,j,k;
int pencil = domain->subdomains[box].levels[level].pencil;
@@ -407,7 +426,9 @@ void project_cell_to_face(domain_type *d
case 1: stride = pencil;break;//j-direction
case 2: stride = plane;break;//k-direction
}
+#ifdef OMP
#pragma omp parallel for private(k,j,i) if(omp_within_a_box) collapse(2)
+#endif
for(k=0;k<=dim_k;k++){ // <= to ensure you do low and high faces
for(j=0;j<=dim_j;j++){
for(i=0;i<=dim_i;i++){
@@ -415,6 +436,5 @@ void project_cell_to_face(domain_type *d
grid_face[ijk] = 0.5*(grid_cell[ijk-stride] + grid_cell[ijk]); // simple linear interpolation
}}}
}
-#endif
domain->cycles.blas1[level] += (uint64_t)(CycleTime()-_timeStart);
}
Modified: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/residual.inc
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/residual.inc?rev=334954&r1=334953&r2=334954&view=diff
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/residual.inc (original)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/residual.inc Mon Jun 18 08:56:18 2018
@@ -23,6 +23,7 @@ void residual(domain_type * domain, int
int box;
#ifdef OMP
#pragma omp parallel for private(box) if(omp_across_boxes)
+#endif
for(box=0;box<domain->subdomains_per_rank;box++){
int i,j,k;
int pencil = domain->subdomains[box].levels[level].pencil;
@@ -40,7 +41,9 @@ void residual(domain_type * domain, int
double * __restrict__ beta_k = domain->subdomains[box].levels[level].grids[__beta_k] + ghosts*(1+pencil+plane);
double * __restrict__ res = domain->subdomains[box].levels[level].grids[ res_id] + ghosts*(1+pencil+plane);
+#ifdef OMP
#pragma omp parallel for private(k,j,i) if(omp_within_a_box) collapse(2)
+#endif
for(k=0;k<dim_k;k++){
for(j=0;j<dim_j;j++){
for(i=0;i<dim_i;i++){
@@ -57,7 +60,6 @@ void residual(domain_type * domain, int
res[ijk] = rhs[ijk]-helmholtz;
}}}
}
-#endif
domain->cycles.residual[level] += (uint64_t)(CycleTime()-_timeStart);
}
@@ -85,6 +87,7 @@ void residual_and_restriction(domain_typ
int box;
#ifdef OMP
#pragma omp parallel for private(box) if(omp_across_boxes)
+#endif
for(box=0;box<domain->subdomains_per_rank;box++){
int kk,jj;
int pencil_c = domain->subdomains[box].levels[level_c].pencil;
@@ -110,7 +113,9 @@ void residual_and_restriction(domain_typ
double * __restrict__ beta_k = domain->subdomains[box].levels[level_f].grids[__beta_k] + ghosts_f*(1+pencil_f+plane_f);
double * __restrict__ res = domain->subdomains[box].levels[level_c].grids[ res_id] + ghosts_c*(1+pencil_c+plane_c);
+#ifdef OMP
#pragma omp parallel for private(kk,jj) if(omp_within_a_box) collapse(2)
+#endif
for(kk=0;kk<dim_k_f;kk+=2){
for(jj=0;jj<dim_j_f;jj+=2){
int i,j,k;
@@ -136,7 +141,6 @@ void residual_and_restriction(domain_typ
}
}}}}
}
-#endif
domain->cycles.residual[level_f] += (uint64_t)(CycleTime()-_timeStart);
}
#else
@@ -158,7 +162,9 @@ void residual_and_restriction(domain_typ
int omp_within_a_box = (domain->subdomains[0].levels[level_f].dim.i >= CollaborativeThreadingBoxSize);
int box;
+#ifdef OMP
#pragma omp parallel for private(box) if(omp_across_boxes)
+#endif
for(box=0;box<domain->subdomains_per_rank;box++){
int pencil_c = domain->subdomains[box].levels[level_c].pencil;
int plane_c = domain->subdomains[box].levels[level_c].plane;
@@ -184,7 +190,10 @@ void residual_and_restriction(domain_typ
double * __restrict__ res = domain->subdomains[box].levels[level_c].grids[ res_id] + ghosts_c*(1+pencil_c+plane_c);
int kk;
+#ifdef OMP
#pragma omp parallel for private(kk) if(omp_within_a_box)
+#endif
+
for(kk=0;kk<dim_k_f;kk+=2){
int i,j,k;
// zero out the next coarse grid plane
Modified: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/restriction.inc
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/restriction.inc?rev=334954&r1=334953&r2=334954&view=diff
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/restriction.inc (original)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/restriction.inc Mon Jun 18 08:56:18 2018
@@ -22,6 +22,7 @@ void restriction(domain_type *domain, in
int box;
#ifdef OMP
#pragma omp parallel for private(box) if(omp_across_boxes)
+#endif
for(box=0;box<domain->subdomains_per_rank;box++){
int i,j,k;
int ghosts_c = domain->subdomains[box].levels[level_c].ghosts;
@@ -37,8 +38,10 @@ void restriction(domain_type *domain, in
double * __restrict__ grid_f = domain->subdomains[box].levels[level_f].grids[id_f] + ghosts_f*(1+pencil_f+plane_f);
double * __restrict__ grid_c = domain->subdomains[box].levels[level_c].grids[id_c] + ghosts_c*(1+pencil_c+plane_c);
-
+
+#ifdef OMP
#pragma omp parallel for private(k,j,i) if(omp_within_a_box) collapse(2)
+#endif
for(k=0;k<dim_k_c;k++){
for(j=0;j<dim_j_c;j++){
for(i=0;i<dim_i_c;i++){
@@ -50,7 +53,6 @@ void restriction(domain_type *domain, in
grid_f[ijk_f +pencil_f+plane_f]+grid_f[ijk_f+1+pencil_f+plane_f] ) * 0.125;
}}}
}
-#endif
domain->cycles.restriction[level_f] += (uint64_t)(CycleTime()-_timeStart);
}
@@ -71,6 +73,7 @@ void restriction_betas(domain_type * dom
#ifdef OMP
#pragma omp parallel for private(box) if(omp_across_boxes)
+#endif
for(box=0;box<domain->subdomains_per_rank;box++){
int i,j,k;
int ghosts_c = domain->subdomains[box].levels[level_c].ghosts;
@@ -103,7 +106,9 @@ void restriction_betas(domain_type * dom
// restrict beta_j (== face in ik)
beta_f = domain->subdomains[box].levels[level_f].grids[__beta_j] + ghosts_f*plane_f + ghosts_f*pencil_f + ghosts_f;
beta_c = domain->subdomains[box].levels[level_c].grids[__beta_j] + ghosts_c*plane_c + ghosts_c*pencil_c + ghosts_c;
+#ifdef OMP
#pragma omp parallel for private(k,j,i) if(omp_within_a_box) collapse(2)
+#endif
for(k=0;k<dim_k_c;k++){
for(j=0;j<dim_j_c;j++){
for(i=0;i<dim_i_c;i++){
@@ -116,7 +121,9 @@ void restriction_betas(domain_type * dom
// restrict beta_k (== face in ij)
beta_f = domain->subdomains[box].levels[level_f].grids[__beta_k] + ghosts_f*plane_f + ghosts_f*pencil_f + ghosts_f;
beta_c = domain->subdomains[box].levels[level_c].grids[__beta_k] + ghosts_c*plane_c + ghosts_c*pencil_c + ghosts_c;
+#ifdef OMP
#pragma omp parallel for private(k,j,i) if(omp_within_a_box) collapse(2)
+#endif
for(k=0;k<dim_k_c;k++){
for(j=0;j<dim_j_c;j++){
for(i=0;i<dim_i_c;i++){
@@ -126,6 +133,5 @@ void restriction_betas(domain_type * dom
beta_f[ijk_f+pencil_f]+beta_f[ijk_f+1+pencil_f] ) * 0.25;
}}}
}
-#endif
domain->cycles.restriction[level_f] += (uint64_t)(CycleTime()-_timeStart);
}
More information about the llvm-commits
mailing list