[test-suite] r334954 - [test-suite] Corrections for MiniGMG

Brian Homerding via llvm-commits llvm-commits at lists.llvm.org
Mon Jun 18 08:56:18 PDT 2018


Author: homerdin
Date: Mon Jun 18 08:56:18 2018
New Revision: 334954

URL: http://llvm.org/viewvc/llvm-project?rev=334954&view=rev
Log:
[test-suite] Corrections for MiniGMG

The openMP loops were being macro'd out along with the pragmas.  This patch
allows the application to run fully.  The problem size was decreased as the
test's runtime increased substantially.  Additional verification output has
been enabled using FP_TOLERANCE.

Reviewers: hfinkel

Differential Revision: https://reviews.llvm.org/D47484

Modified:
    test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/CMakeLists.txt
    test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/apply_op.inc
    test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/exchange_boundary.inc
    test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/mg.h
    test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/miniGMG.reference_output
    test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/misc.inc
    test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/residual.inc
    test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/restriction.inc

Modified: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/CMakeLists.txt?rev=334954&r1=334953&r2=334954&view=diff
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/CMakeLists.txt (original)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/CMakeLists.txt Mon Jun 18 08:56:18 2018
@@ -1,4 +1,6 @@
 set(PROG miniGMG)
+list(APPEND CXXFLAGS -D__PRINT_NORM)
 list(APPEND LDFLAGS -lm)
-set(RUN_OPTIONS 6  2 2 2  1 1 1)
+set(FP_TOLERANCE 0.00001)
+set(RUN_OPTIONS 5  2 2 2  1 1 1)
 llvm_multisource()

Modified: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/apply_op.inc
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/apply_op.inc?rev=334954&r1=334953&r2=334954&view=diff
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/apply_op.inc (original)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/apply_op.inc Mon Jun 18 08:56:18 2018
@@ -24,6 +24,7 @@ void apply_op(domain_type * domain, int
 
 #ifdef OMP
   #pragma omp parallel for private(box) if(omp_across_boxes)
+#endif
   for(box=0;box<domain->subdomains_per_rank;box++){
     int i,j,k,s;
     int pencil = domain->subdomains[box].levels[level].pencil;
@@ -41,7 +42,9 @@ void apply_op(domain_type * domain, int
     double * __restrict__ beta_k = domain->subdomains[box].levels[level].grids[ __beta_k] + ghosts*(1+pencil+plane);
     double * __restrict__ lambda = domain->subdomains[box].levels[level].grids[ __lambda] + ghosts*(1+pencil+plane);
 
+#ifdef OMP
     #pragma omp parallel for private(k,j,i) if(omp_within_a_box) collapse(2)
+#endif
     for(k=0;k<dim_k;k++){
     for(j=0;j<dim_j;j++){
     for(i=0;i<dim_i;i++){
@@ -57,7 +60,7 @@ void apply_op(domain_type * domain, int
       Ax[ijk] = helmholtz;
     }}}
   }
-#endif
+
   domain->cycles.apply_op[level] += (uint64_t)(CycleTime()-_timeStart);
 }
 //------------------------------------------------------------------------------------------------------------------------------

Modified: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/exchange_boundary.inc
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/exchange_boundary.inc?rev=334954&r1=334953&r2=334954&view=diff
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/exchange_boundary.inc (original)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/exchange_boundary.inc Mon Jun 18 08:56:18 2018
@@ -141,7 +141,34 @@ void exchange_boundary(domain_type *doma
     if( (domain->bufferCopies[level][buffer].isFace   && exchange_faces  ) || 
         (domain->bufferCopies[level][buffer].isEdge   && exchange_edges  ) ||
         (domain->bufferCopies[level][buffer].isCorner && exchange_corners) ){
-        DoBufferCopy(domain,level,grid_id,buffer);
+          DoBufferCopy(domain,level,grid_id,buffer);
+  }}
+  _timeEnd = CycleTime();
+  domain->cycles.pack[level] += (_timeEnd-_timeStart);
+ 
+  // loop through MPI send buffers and post Isend's...
+  _timeStart = CycleTime();
+  #ifdef __MPI_THREAD_MULTIPLE
+  #pragma omp parallel for schedule(dynamic,1)
+  #endif
+  for(n=nMessages/2;n<nMessages;n++){
+    MPI_Isend(buffers_packed[n],sizes_packed[n],MPI_DOUBLE,ranks_packed[n],tags_packed[n],MPI_COMM_WORLD,&requests_packed[n]);
+  }
+  _timeEnd = CycleTime();
+  domain->cycles.send[level] += (_timeEnd-_timeStart);
+  #endif
+
+
+  // exchange locally... try and hide within Isend latency... 
+  _timeStart = CycleTime();
+#ifdef OMP
+  #pragma omp parallel for schedule(static,1)
+#endif
+  for(buffer=domain->bufferCopy_Local_Start;buffer<domain->bufferCopy_Local_End;buffer++){
+    if( (domain->bufferCopies[level][buffer].isFace   && exchange_faces  ) || 
+        (domain->bufferCopies[level][buffer].isEdge   && exchange_edges  ) ||
+        (domain->bufferCopies[level][buffer].isCorner && exchange_corners) ){
+          DoBufferCopy(domain,level,grid_id,buffer);
   }}
   _timeEnd = CycleTime();
   domain->cycles.grid2grid[level] += (_timeEnd-_timeStart);
@@ -153,7 +180,7 @@ void exchange_boundary(domain_type *doma
   MPI_Waitall(nMessages,requests_packed,status_packed);
   _timeEnd = CycleTime();
   domain->cycles.wait[level] += (_timeEnd-_timeStart);
-  #endif
+
   // unpack MPI receive buffers 
   _timeStart = CycleTime();
   #pragma omp parallel for schedule(static,1)

Modified: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/mg.h
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/mg.h?rev=334954&r1=334953&r2=334954&view=diff
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/mg.h (original)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/mg.h Mon Jun 18 08:56:18 2018
@@ -125,5 +125,6 @@ void destroy_domain(domain_type * domain
 void      MGBuild(domain_type * domain, double a, double b, double h0);
 void      MGSolve(domain_type * domain, int u_id, int F_id, double a, double b, double desired_mg_norm);
 void print_timing(domain_type *domain);
+void DoBufferCopy(domain_type *domain, int level, int grid_id, int buffer);
 //------------------------------------------------------------------------------------------------------------------------------
 #endif

Modified: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/miniGMG.reference_output
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/miniGMG.reference_output?rev=334954&r1=334953&r2=334954&view=diff
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/miniGMG.reference_output (original)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/miniGMG.reference_output Mon Jun 18 08:56:18 2018
@@ -1,25 +1,46 @@
 1 MPI Tasks of 1 threads
 truncating the v-cycle at 2^3 subdomains
 creating domain...       done
-  64 x 64 x 64 (per subdomain)
-  128 x 128 x 128 (per process)
-  128 x 128 x 128 (overall)
+  32 x 32 x 32 (per subdomain)
+  64 x 64 x 64 (per process)
+  64 x 64 x 64 (overall)
   1-deep ghost zones
-  allocated 246 MB
+  allocated 34 MB
 initializing alpha, beta, RHS for the ``hard problem''...
-  average value of f =   0.000000000000e+00
+  average value of f =  -1.110223024625e-15
 done
 MGBuild...                      
-  level= 0, eigenvalue_max ~= 1.000000e+00
-  level= 1, eigenvalue_max ~= -1.000000e+00
-  level= 2, eigenvalue_max ~= -1.000000e+00
-  level= 3, eigenvalue_max ~= -1.000000e+00
-  level= 4, eigenvalue_max ~= -1.000000e+00
-  level= 5, eigenvalue_max ~= -1.000000e+00
+  level= 0, eigenvalue_max ~= 1.999996e+00
+  level= 1, eigenvalue_max ~= 1.999984e+00
+  level= 2, eigenvalue_max ~= 1.999935e+00
+  level= 3, eigenvalue_max ~= 1.999740e+00
+  level= 4, eigenvalue_max ~= 1.998958e+00
 done
-MGSolve...                      done
-MGSolve...                      done
-Error test: h = 7.812500e-03, max = 0.000000e+00
-Error test: h = 7.812500e-03, L2  = 0.000000e+00
+MGSolve...                      
+v-cycle= 1, norm=0.00104306838695688285 (1.043068e-03)
+v-cycle= 2, norm=0.00008508164981969001 (8.508165e-05)
+v-cycle= 3, norm=0.00001867173484947213 (1.867173e-05)
+v-cycle= 4, norm=0.00000204150498158241 (2.041505e-06)
+v-cycle= 5, norm=0.00000036859010963302 (3.685901e-07)
+v-cycle= 6, norm=0.00000004500376124219 (4.500376e-08)
+v-cycle= 7, norm=0.00000000777275418452 (7.772754e-09)
+v-cycle= 8, norm=0.00000000096441180332 (9.644118e-10)
+v-cycle= 9, norm=0.00000000016688265714 (1.668827e-10)
+v-cycle=10, norm=0.00000000002055567632 (2.055568e-11)
+done
+MGSolve...                      
+v-cycle= 1, norm=0.00104306838695688285 (1.043068e-03)
+v-cycle= 2, norm=0.00008508164981969001 (8.508165e-05)
+v-cycle= 3, norm=0.00001867173484947213 (1.867173e-05)
+v-cycle= 4, norm=0.00000204150498158241 (2.041505e-06)
+v-cycle= 5, norm=0.00000036859010963302 (3.685901e-07)
+v-cycle= 6, norm=0.00000004500376124219 (4.500376e-08)
+v-cycle= 7, norm=0.00000000777275418452 (7.772754e-09)
+v-cycle= 8, norm=0.00000000096441180332 (9.644118e-10)
+v-cycle= 9, norm=0.00000000016688265714 (1.668827e-10)
+v-cycle=10, norm=0.00000000002055567632 (2.055568e-11)
+done
+Error test: h = 1.562500e-02, max = 3.529710e-04
+Error test: h = 1.562500e-02, L2  = 5.559935e-05
 deallocating domain...   done
 exit 0

Modified: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/misc.inc
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/misc.inc?rev=334954&r1=334953&r2=334954&view=diff
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/misc.inc (original)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/misc.inc Mon Jun 18 08:56:18 2018
@@ -20,6 +20,7 @@ void zero_grid(domain_type *domain, int
 
 #ifdef OMP
   #pragma omp parallel for private(box) if(omp_across_boxes)
+#endif
   for(box=0;box<domain->subdomains_per_rank;box++){
     int i,j,k;
     int pencil = domain->subdomains[box].levels[level].pencil;
@@ -29,7 +30,9 @@ void zero_grid(domain_type *domain, int
     int  dim_j = domain->subdomains[box].levels[level].dim.j;
     int  dim_i = domain->subdomains[box].levels[level].dim.i;
     double * __restrict__ grid = domain->subdomains[box].levels[level].grids[grid_id] + ghosts*(1+pencil+plane);
+#ifdef OMP
     #pragma omp parallel for private(k,j,i) if(omp_within_a_box) collapse(2)
+#endif
     for(k=-ghosts;k<dim_k+ghosts;k++){
      for(j=-ghosts;j<dim_j+ghosts;j++){
       for(i=-ghosts;i<dim_i+ghosts;i++){
@@ -37,7 +40,6 @@ void zero_grid(domain_type *domain, int
         grid[ijk] = 0.0;
     }}}
   }
-#endif
   domain->cycles.blas1[level] += (uint64_t)(CycleTime()-_timeStart);
 }
 
@@ -56,6 +58,7 @@ void initialize_grid_to_scalar(domain_ty
   int box;
 #ifdef OMP
   #pragma omp parallel for private(box) if(omp_across_boxes)
+#endif
   for(box=0;box<domain->subdomains_per_rank;box++){
     int i,j,k;
     int pencil = domain->subdomains[box].levels[level].pencil;
@@ -65,7 +68,9 @@ void initialize_grid_to_scalar(domain_ty
     int  dim_j = domain->subdomains[box].levels[level].dim.j;
     int  dim_i = domain->subdomains[box].levels[level].dim.i;
     double * __restrict__ grid = domain->subdomains[box].levels[level].grids[grid_id] + ghosts*(1+pencil+plane);
+#ifdef OMP
     #pragma omp parallel for private(k,j,i) if(omp_within_a_box) collapse(2)
+#endif
     for(k=-ghosts;k<dim_k+ghosts;k++){
      for(j=-ghosts;j<dim_j+ghosts;j++){
       for(i=-ghosts;i<dim_i+ghosts;i++){
@@ -74,7 +79,6 @@ void initialize_grid_to_scalar(domain_ty
         grid[ijk] = ghostZone ? 0.0 : scalar;
     }}}
   }
-#endif
   domain->cycles.blas1[level] += (uint64_t)(CycleTime()-_timeStart);
 }
 
@@ -92,6 +96,7 @@ void add_grids(domain_type *domain, int
   int box;
 #ifdef OMP
   #pragma omp parallel for private(box) if(omp_across_boxes)
+#endif
   for(box=0;box<domain->subdomains_per_rank;box++){
     int i,j,k;
     int pencil = domain->subdomains[box].levels[level].pencil;
@@ -103,7 +108,9 @@ void add_grids(domain_type *domain, int
     double * __restrict__ grid_c = domain->subdomains[box].levels[level].grids[id_c] + ghosts*(1+pencil+plane);
     double * __restrict__ grid_a = domain->subdomains[box].levels[level].grids[id_a] + ghosts*(1+pencil+plane);
     double * __restrict__ grid_b = domain->subdomains[box].levels[level].grids[id_b] + ghosts*(1+pencil+plane);
+#ifdef OMP
     #pragma omp parallel for private(k,j,i) if(omp_within_a_box) collapse(2)
+#endif
     for(k=0;k<dim_k;k++){
      for(j=0;j<dim_j;j++){
       for(i=0;i<dim_i;i++){
@@ -111,7 +118,6 @@ void add_grids(domain_type *domain, int
         grid_c[ijk] = scale_a*grid_a[ijk] + scale_b*grid_b[ijk];
     }}}
   }
-#endif
   domain->cycles.blas1[level] += (uint64_t)(CycleTime()-_timeStart);
 }
 
@@ -129,6 +135,7 @@ void mul_grids(domain_type *domain, int
   int box;
 #ifdef OMP
   #pragma omp parallel for private(box) if(omp_across_boxes)
+#endif
   for(box=0;box<domain->subdomains_per_rank;box++){
     int i,j,k;
     int pencil = domain->subdomains[box].levels[level].pencil;
@@ -140,7 +147,9 @@ void mul_grids(domain_type *domain, int
     double * __restrict__ grid_c = domain->subdomains[box].levels[level].grids[id_c] + ghosts*(1+pencil+plane);
     double * __restrict__ grid_a = domain->subdomains[box].levels[level].grids[id_a] + ghosts*(1+pencil+plane);
     double * __restrict__ grid_b = domain->subdomains[box].levels[level].grids[id_b] + ghosts*(1+pencil+plane);
+#ifdef OMP
     #pragma omp parallel for private(k,j,i) if(omp_within_a_box) collapse(2)
+#endif
     for(k=0;k<dim_k;k++){
      for(j=0;j<dim_j;j++){
       for(i=0;i<dim_i;i++){
@@ -148,7 +157,6 @@ void mul_grids(domain_type *domain, int
         grid_c[ijk] = scale*grid_a[ijk]*grid_b[ijk];
     }}}
   }
-#endif
   domain->cycles.blas1[level] += (uint64_t)(CycleTime()-_timeStart);
 }
 
@@ -167,6 +175,7 @@ void scale_grid(domain_type *domain, int
   int box;
 #ifdef OMP
   #pragma omp parallel for private(box) if(omp_across_boxes)
+#endif
   for(box=0;box<domain->subdomains_per_rank;box++){
     int i,j,k;
     int pencil = domain->subdomains[box].levels[level].pencil;
@@ -177,7 +186,9 @@ void scale_grid(domain_type *domain, int
     int  dim_i = domain->subdomains[box].levels[level].dim.i;
     double * __restrict__ grid_c = domain->subdomains[box].levels[level].grids[id_c] + ghosts*(1+pencil+plane);
     double * __restrict__ grid_a = domain->subdomains[box].levels[level].grids[id_a] + ghosts*(1+pencil+plane);
+#ifdef OMP
     #pragma omp parallel for private(k,j,i) if(omp_within_a_box) collapse(2)
+#endif
     for(k=0;k<dim_k;k++){
      for(j=0;j<dim_j;j++){
       for(i=0;i<dim_i;i++){
@@ -185,7 +196,6 @@ void scale_grid(domain_type *domain, int
         grid_c[ijk] = scale_a*grid_a[ijk];
     }}}
   }
-#endif
   domain->cycles.blas1[level] += (uint64_t)(CycleTime()-_timeStart);
 }
 
@@ -206,6 +216,7 @@ double dot(domain_type *domain, int leve
 #ifdef OMP
   // FIX, schedule(static) is a stand in to guarantee reproducibility...
   #pragma omp parallel for private(box) if(omp_across_boxes) reduction(+:a_dot_b_domain) schedule(static)
+#endif
   for(box=0;box<domain->subdomains_per_rank;box++){
     int i,j,k;
     int pencil = domain->subdomains[box].levels[level].pencil;
@@ -217,7 +228,9 @@ double dot(domain_type *domain, int leve
     double * __restrict__ grid_a = domain->subdomains[box].levels[level].grids[id_a] + ghosts*(1+pencil+plane); // i.e. [0] = first non ghost zone point
     double * __restrict__ grid_b = domain->subdomains[box].levels[level].grids[id_b] + ghosts*(1+pencil+plane);
     double a_dot_b_box = 0.0;
+#ifdef OMP
     #pragma omp parallel for private(i,j,k) if(omp_within_a_box) collapse(2) reduction(+:a_dot_b_box) schedule(static)
+#endif
     for(k=0;k<dim_k;k++){
     for(j=0;j<dim_j;j++){
     for(i=0;i<dim_i;i++){
@@ -226,7 +239,6 @@ double dot(domain_type *domain, int leve
     }}}
     a_dot_b_domain+=a_dot_b_box;
   }
-#endif
   domain->cycles.blas1[level] += (uint64_t)(CycleTime()-_timeStart);
 
   #ifdef __MPI
@@ -256,6 +268,7 @@ double norm(domain_type *domain, int lev
 #ifdef OMP
   // FIX, schedule(static) is a stand in to guarantee reproducibility...
   #pragma omp parallel for private(box) if(omp_across_boxes) reduction(max:max_norm) schedule(static)
+#endif
   for(box=0;box<domain->subdomains_per_rank;box++){
     int i,j,k;
     int pencil = domain->subdomains[box].levels[level].pencil;
@@ -266,7 +279,9 @@ double norm(domain_type *domain, int lev
     int  dim_i = domain->subdomains[box].levels[level].dim.i;
     double * __restrict__ grid   = domain->subdomains[box].levels[level].grids[ grid_id] + ghosts*(1+pencil+plane); // i.e. [0] = first non ghost zone point
     double box_norm = 0.0;
+#ifdef OMP
     #pragma omp parallel for private(i,j,k) if(omp_within_a_box) collapse(2) reduction(max:box_norm) schedule(static)
+#endif
     for(k=0;k<dim_k;k++){
     for(j=0;j<dim_j;j++){
     for(i=0;i<dim_i;i++){
@@ -276,7 +291,6 @@ double norm(domain_type *domain, int lev
     }}}
     if(box_norm>max_norm){max_norm = box_norm;}
   } // box list
-#endif
   domain->cycles.blas1[level] += (uint64_t)(CycleTime()-_timeStart);
 
   #ifdef __MPI
@@ -306,6 +320,7 @@ double mean(domain_type *domain, int lev
   double sum_domain =  0.0;
 #ifdef OMP
   #pragma omp parallel for private(box) if(omp_across_boxes) reduction(+:sum_domain)
+#endif
   for(box=0;box<domain->subdomains_per_rank;box++){
     int i,j,k;
     int pencil = domain->subdomains[box].levels[level].pencil;
@@ -316,7 +331,9 @@ double mean(domain_type *domain, int lev
     int  dim_i = domain->subdomains[box].levels[level].dim.i;
     double * __restrict__ grid_a = domain->subdomains[box].levels[level].grids[id_a] + ghosts*(1+pencil+plane); // i.e. [0] = first non ghost zone point
     double sum_box = 0.0;
+#ifdef OMP
     #pragma omp parallel for private(i,j,k) if(omp_within_a_box) collapse(2) reduction(+:sum_box)
+#endif
     for(k=0;k<dim_k;k++){
     for(j=0;j<dim_j;j++){
     for(i=0;i<dim_i;i++){
@@ -325,7 +342,6 @@ double mean(domain_type *domain, int lev
     }}}
     sum_domain+=sum_box;
   }
-#endif
   domain->cycles.blas1[level] += (uint64_t)(CycleTime()-_timeStart);
   double ncells_domain = (double)domain->dim.i*(double)domain->dim.j*(double)domain->dim.k;
 
@@ -356,6 +372,7 @@ void shift_grid(domain_type *domain, int
   int box;
 #ifdef OMP
   #pragma omp parallel for private(box) if(omp_across_boxes)
+#endif
   for(box=0;box<domain->subdomains_per_rank;box++){
     int i,j,k;
     int pencil = domain->subdomains[box].levels[level].pencil;
@@ -367,7 +384,9 @@ void shift_grid(domain_type *domain, int
     double * __restrict__ grid_c = domain->subdomains[box].levels[level].grids[id_c] + ghosts*(1+pencil+plane); // i.e. [0] = first non ghost zone point
     double * __restrict__ grid_a = domain->subdomains[box].levels[level].grids[id_a] + ghosts*(1+pencil+plane); // i.e. [0] = first non ghost zone point
 
+#ifdef OMP
     #pragma omp parallel for private(i,j,k) if(omp_within_a_box) collapse(2)
+#endif
     for(k=0;k<dim_k;k++){
     for(j=0;j<dim_j;j++){
     for(i=0;i<dim_i;i++){
@@ -375,7 +394,6 @@ void shift_grid(domain_type *domain, int
       grid_c[ijk] = grid_a[ijk] + shift_a;
     }}}
   }
-#endif
   domain->cycles.blas1[level] += (uint64_t)(CycleTime()-_timeStart);
 }
 
@@ -391,6 +409,7 @@ void project_cell_to_face(domain_type *d
   int box;
 #ifdef OMP
   #pragma omp parallel for private(box) if(omp_across_boxes)
+#endif
   for(box=0;box<domain->subdomains_per_rank;box++){
     int i,j,k;
     int pencil = domain->subdomains[box].levels[level].pencil;
@@ -407,7 +426,9 @@ void project_cell_to_face(domain_type *d
       case 1: stride = pencil;break;//j-direction
       case 2: stride =  plane;break;//k-direction
     }
+#ifdef OMP
     #pragma omp parallel for private(k,j,i) if(omp_within_a_box) collapse(2)
+#endif
     for(k=0;k<=dim_k;k++){ // <= to ensure you do low and high faces
      for(j=0;j<=dim_j;j++){
       for(i=0;i<=dim_i;i++){
@@ -415,6 +436,5 @@ void project_cell_to_face(domain_type *d
         grid_face[ijk] = 0.5*(grid_cell[ijk-stride] + grid_cell[ijk]); // simple linear interpolation
     }}}
   }
-#endif
   domain->cycles.blas1[level] += (uint64_t)(CycleTime()-_timeStart);
 }

Modified: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/residual.inc
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/residual.inc?rev=334954&r1=334953&r2=334954&view=diff
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/residual.inc (original)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/residual.inc Mon Jun 18 08:56:18 2018
@@ -23,6 +23,7 @@ void residual(domain_type * domain, int
   int box;
 #ifdef OMP
   #pragma omp parallel for private(box) if(omp_across_boxes)
+#endif
   for(box=0;box<domain->subdomains_per_rank;box++){
     int i,j,k;
     int pencil = domain->subdomains[box].levels[level].pencil;
@@ -40,7 +41,9 @@ void residual(domain_type * domain, int
     double * __restrict__ beta_k = domain->subdomains[box].levels[level].grids[__beta_k] + ghosts*(1+pencil+plane);
     double * __restrict__ res    = domain->subdomains[box].levels[level].grids[  res_id] + ghosts*(1+pencil+plane);
 
+#ifdef OMP
     #pragma omp parallel for private(k,j,i) if(omp_within_a_box) collapse(2)
+#endif
     for(k=0;k<dim_k;k++){
     for(j=0;j<dim_j;j++){
     for(i=0;i<dim_i;i++){
@@ -57,7 +60,6 @@ void residual(domain_type * domain, int
       res[ijk] = rhs[ijk]-helmholtz;
     }}}
   }
-#endif
   domain->cycles.residual[level] += (uint64_t)(CycleTime()-_timeStart);
 }
 
@@ -85,6 +87,7 @@ void residual_and_restriction(domain_typ
   int box;
 #ifdef OMP
   #pragma omp parallel for private(box) if(omp_across_boxes)
+#endif
   for(box=0;box<domain->subdomains_per_rank;box++){
     int kk,jj;
     int pencil_c = domain->subdomains[box].levels[level_c].pencil;
@@ -110,7 +113,9 @@ void residual_and_restriction(domain_typ
     double * __restrict__ beta_k = domain->subdomains[box].levels[level_f].grids[__beta_k] + ghosts_f*(1+pencil_f+plane_f);
     double * __restrict__ res    = domain->subdomains[box].levels[level_c].grids[  res_id] + ghosts_c*(1+pencil_c+plane_c);
 
+#ifdef OMP
     #pragma omp parallel for private(kk,jj) if(omp_within_a_box) collapse(2)
+#endif
     for(kk=0;kk<dim_k_f;kk+=2){
     for(jj=0;jj<dim_j_f;jj+=2){
       int i,j,k;
@@ -136,7 +141,6 @@ void residual_and_restriction(domain_typ
       }
     }}}}
   }
-#endif
   domain->cycles.residual[level_f] += (uint64_t)(CycleTime()-_timeStart);
 }
 #else
@@ -158,7 +162,9 @@ void residual_and_restriction(domain_typ
   int omp_within_a_box = (domain->subdomains[0].levels[level_f].dim.i >= CollaborativeThreadingBoxSize);
   int box;
 
+#ifdef OMP
   #pragma omp parallel for private(box) if(omp_across_boxes)
+#endif
   for(box=0;box<domain->subdomains_per_rank;box++){
     int pencil_c = domain->subdomains[box].levels[level_c].pencil;
     int  plane_c = domain->subdomains[box].levels[level_c].plane;
@@ -184,7 +190,10 @@ void residual_and_restriction(domain_typ
     double * __restrict__ res    = domain->subdomains[box].levels[level_c].grids[  res_id] + ghosts_c*(1+pencil_c+plane_c);
 
     int kk;
+#ifdef OMP
     #pragma omp parallel for private(kk) if(omp_within_a_box)
+#endif
+
     for(kk=0;kk<dim_k_f;kk+=2){
       int i,j,k; 
       // zero out the next coarse grid plane

Modified: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/restriction.inc
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/restriction.inc?rev=334954&r1=334953&r2=334954&view=diff
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/restriction.inc (original)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/restriction.inc Mon Jun 18 08:56:18 2018
@@ -22,6 +22,7 @@ void restriction(domain_type *domain, in
   int box;
 #ifdef OMP
   #pragma omp parallel for private(box) if(omp_across_boxes)
+#endif
   for(box=0;box<domain->subdomains_per_rank;box++){
     int i,j,k;
     int ghosts_c = domain->subdomains[box].levels[level_c].ghosts;
@@ -37,8 +38,10 @@ void restriction(domain_type *domain, in
   
     double * __restrict__ grid_f = domain->subdomains[box].levels[level_f].grids[id_f] + ghosts_f*(1+pencil_f+plane_f);
     double * __restrict__ grid_c = domain->subdomains[box].levels[level_c].grids[id_c] + ghosts_c*(1+pencil_c+plane_c);
-  
+
+#ifdef OMP  
     #pragma omp parallel for private(k,j,i) if(omp_within_a_box) collapse(2)
+#endif
     for(k=0;k<dim_k_c;k++){
     for(j=0;j<dim_j_c;j++){
     for(i=0;i<dim_i_c;i++){
@@ -50,7 +53,6 @@ void restriction(domain_type *domain, in
                         grid_f[ijk_f  +pencil_f+plane_f]+grid_f[ijk_f+1+pencil_f+plane_f] ) * 0.125;
     }}}
   }
-#endif
   domain->cycles.restriction[level_f] += (uint64_t)(CycleTime()-_timeStart);
 }
 
@@ -71,6 +73,7 @@ void restriction_betas(domain_type * dom
 
 #ifdef OMP
   #pragma omp parallel for private(box) if(omp_across_boxes)
+#endif
   for(box=0;box<domain->subdomains_per_rank;box++){
     int i,j,k;
     int ghosts_c = domain->subdomains[box].levels[level_c].ghosts;
@@ -103,7 +106,9 @@ void restriction_betas(domain_type * dom
     // restrict beta_j (== face in ik)
     beta_f = domain->subdomains[box].levels[level_f].grids[__beta_j] + ghosts_f*plane_f + ghosts_f*pencil_f + ghosts_f;
     beta_c = domain->subdomains[box].levels[level_c].grids[__beta_j] + ghosts_c*plane_c + ghosts_c*pencil_c + ghosts_c;
+#ifdef OMP
     #pragma omp parallel for private(k,j,i) if(omp_within_a_box) collapse(2)
+#endif
     for(k=0;k<dim_k_c;k++){
     for(j=0;j<dim_j_c;j++){
     for(i=0;i<dim_i_c;i++){
@@ -116,7 +121,9 @@ void restriction_betas(domain_type * dom
     // restrict beta_k (== face in ij)
     beta_f = domain->subdomains[box].levels[level_f].grids[__beta_k] + ghosts_f*plane_f + ghosts_f*pencil_f + ghosts_f;
     beta_c = domain->subdomains[box].levels[level_c].grids[__beta_k] + ghosts_c*plane_c + ghosts_c*pencil_c + ghosts_c;
+#ifdef OMP
     #pragma omp parallel for private(k,j,i) if(omp_within_a_box) collapse(2)
+#endif
     for(k=0;k<dim_k_c;k++){
     for(j=0;j<dim_j_c;j++){
     for(i=0;i<dim_i_c;i++){
@@ -126,6 +133,5 @@ void restriction_betas(domain_type * dom
                         beta_f[ijk_f+pencil_f]+beta_f[ijk_f+1+pencil_f] ) * 0.25;
     }}}
   }
-#endif
   domain->cycles.restriction[level_f] += (uint64_t)(CycleTime()-_timeStart);
 }




More information about the llvm-commits mailing list