[test-suite] r311411 - [test-suite] Adding the miniFE Benchmark

Mon Aug 21 16:54:29 PDT 2017

Author: hfinkel
Date: Mon Aug 21 16:54:28 2017
New Revision: 311411

URL: http://llvm.org/viewvc/llvm-project?rev=311411&view=rev
Log:
[test-suite] Adding the miniFE Benchmark

MiniFE is an proxy application for unstructured implicit finite element codes.
It is similar to HPCCG and pHPCCG but provides a much more complete vertical
covering of the steps in this class of applications. MiniFE also provides
support for computation on multicore nodes, including pthreads and Intel
Threading Building Blocks (TBB) for homogeneous multicore and CUDA for GPUs.

This is a serial build for the test suite.

Web: https://mantevo.org/packages/
Github: https://github.com/Mantevo/miniFE

When run on an Intel Xeon CPU E5-2699 v4 @ 2.20GHz:
compile_time: 65.8697
exec_time: 3.3827
Maximum resident set size (kbytes): 401472

Patch by Brian Homerding, thanks!

Differential Revision: https://reviews.llvm.org/D36683

Added:
    test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/
    test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/Box.hpp
    test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/BoxIterator.hpp
    test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/BoxPartition.cpp
    test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/BoxPartition.hpp
    test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/CMakeLists.txt
    test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/CSRMatrix.hpp
    test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/ELLMatrix.hpp
    test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/ElemData.hpp
    test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/GetNodesCoords.hpp
    test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/Hex8.hpp
    test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/Hex8_ElemData.hpp
    test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/Hex8_box_utils.hpp
    test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/Hex8_enums.hpp
    test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/Makefile
    test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/MatrixCopyOp.hpp
    test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/MatrixInitOp.hpp
    test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/Parameters.hpp
    test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/README
    test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/SparseMatrix_functions.hpp
    test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/TypeTraits.hpp
    test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/Vector.hpp
    test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/Vector_functions.hpp
    test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/YAML_Doc.cpp
    test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/YAML_Doc.hpp
    test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/YAML_Element.cpp
    test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/YAML_Element.hpp
    test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/analytic_soln.hpp
    test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/assemble_FE_data.hpp
    test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/box_utils.hpp
    test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/cg_solve.hpp
    test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/compute_matrix_stats.hpp
    test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/driver.hpp
    test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/exchange_externals.hpp
    test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/gauss_pts.hpp
    test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/generate_matrix_structure.hpp
    test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/imbalance.hpp
    test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/main.cpp
    test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/make_local_matrix.hpp
    test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/matrix_algebra_3x3.hpp
    test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/miniFE.reference_output
    test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/miniFE_no_info.hpp
    test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/miniFE_version.h
    test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/mytimer.cpp
    test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/mytimer.hpp
    test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/outstream.hpp
    test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/param_utils.cpp
    test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/param_utils.hpp
    test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/perform_element_loop.hpp
    test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/simple_mesh_description.hpp
    test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/time_kernels.hpp
    test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/utils.cpp
    test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/utils.hpp
    test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/verify_solution.hpp
Modified:
    test-suite/trunk/LICENSE.TXT
    test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/CMakeLists.txt
    test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/Makefile

Modified: test-suite/trunk/LICENSE.TXT
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/LICENSE.TXT?rev=311411&r1=311410&r2=311411&view=diff
==============================================================================

--- test-suite/trunk/LICENSE.TXT (original)
+++ test-suite/trunk/LICENSE.TXT Mon Aug 21 16:54:28 2017
@@ -87,6 +87,7 @@ miniAMR:            llvm-test/MultiSourc
 XSBench:            llvm-test/MultiSource/Benchmarks/DOE-ProxyApps-C/XSBench
 HPCCG:              llvm-test/MultiSource/Benchmarks/DOE-ProxyApps-C++/HPCCG
 PENNANT:            llvm-test/MultiSource/Benchmarks/DOE-ProxyApps-C++/PENNANT
+miniFE:             llvm-test/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE
 Fhourstones:        llvm-test/MultiSource/Benchmarks/Fhourstones
 Fhourstones-3.1:    llvm-test/MultiSource/Benchmarks/Fhourstones-3.1
 McCat:              llvm-test/MultiSource/Benchmarks/McCat

Modified: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C%2B%2B/CMakeLists.txt?rev=311411&r1=311410&r2=311411&view=diff
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/CMakeLists.txt (original)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/CMakeLists.txt Mon Aug 21 16:54:28 2017
@@ -1,2 +1,3 @@
 add_subdirectory(HPCCG)
 add_subdirectory(PENNANT)
+add_subdirectory(miniFE)

Modified: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/Makefile
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C%2B%2B/Makefile?rev=311411&r1=311410&r2=311411&view=diff
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/Makefile (original)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/Makefile Mon Aug 21 16:54:28 2017
@@ -1,6 +1,6 @@
 # MultiSource/DOE-ProxyApps-C++ Makefile: Build all subdirectories automatically
 
 LEVEL = ../../..
-PARALLEL_DIRS = HPCCG PENNANT
+PARALLEL_DIRS = HPCCG PENNANT miniFE
 
 include $(LEVEL)/Makefile.programs

Added: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/Box.hpp
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C%2B%2B/miniFE/Box.hpp?rev=311411&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/Box.hpp (added)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/Box.hpp Mon Aug 21 16:54:28 2017
@@ -0,0 +1,55 @@
+//@HEADER
+// ************************************************************************
+//
+// MiniFE: Simple Finite Element Assembly and Solve
+// Copyright (2006-2013) Sandia Corporation
+//
+// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
+// license for use of this work by or on behalf of the U.S. Government.
+//
+// This library is free software; you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as
+// published by the Free Software Foundation; either version 2.1 of the
+// License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+// USA
+//
+// ************************************************************************
+//@HEADER
+
+#ifndef _Box_hpp_
+#define _Box_hpp_
+
+/**
+  * a 'Box' is 3 pairs of ints, where each pair specifies a lower
+  * and upper bound for one of the 3 spatial dimensions.
+  *
+  * This struct stores the 3 pairs as a simple array of 6 ints,
+  * but defines the bracket operator so that it can be referenced
+  * using 2-dimensional array notation like this:
+  * int xmin = box[0][0]; int xmax = box[0][1];
+  * int ymin = box[1][0]; int ymax = box[1][1];
+  * int zmin = box[2][0]; int zmax = box[2][1];
+ */
+struct Box {
+  int ranges[6];
+#ifdef __CUDACC__
+__host__ __device__ __inline__
+#endif
+  int* operator[](int xyz) { return &ranges[xyz*2]; }
+#ifdef __CUDACC__
+__host__ __device__ __inline__
+#endif
+  const int* operator[](int xyz) const { return &ranges[xyz*2]; }
+};
+
+#endif
+

Added: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/BoxIterator.hpp
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C%2B%2B/miniFE/BoxIterator.hpp?rev=311411&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/BoxIterator.hpp (added)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/BoxIterator.hpp Mon Aug 21 16:54:28 2017
@@ -0,0 +1,142 @@
+#ifndef _BoxTraverser_hpp_
+#define _BoxTraverser_hpp_
+
+//@HEADER
+// ************************************************************************
+//
+// MiniFE: Simple Finite Element Assembly and Solve
+// Copyright (2006-2013) Sandia Corporation
+//
+// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
+// license for use of this work by or on behalf of the U.S. Government.
+//
+// This library is free software; you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as
+// published by the Free Software Foundation; either version 2.1 of the
+// License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+// USA
+//
+// ************************************************************************
+//@HEADER
+
+namespace miniFE {
+
+/** Class for traversing a 3-dimensional 'box' of indices.
+
+  //One way to traverse a 'box[3][2]' is to use a triply-nested for-loop:
+  for(int z=box[2][0]; z<box[2][1]; ++z) {
+    for(int y=box[1][0]; y<box[1][1]; ++y) {
+      for(int x=box[0][0]; x<box[0][1]; ++x) {
+        ...
+      }
+    }
+  }
+
+  //Another way is to use this BoxIterator class, like so:
+  //BoxIterator iter = BoxIterator::begin(box);
+  //BoxIterator end = BoxIterator::end(box);
+  for(; iter != end; ++iter) {
+    int x = iter.x;
+    int y = iter.y;
+    int z = iter.z;
+    ...
+  }
+*/
+class BoxIterator {
+public:
+  ~BoxIterator(){}
+
+  static BoxIterator begin(const Box& box)
+  {
+    return BoxIterator(box);
+  }
+
+  static BoxIterator end(const Box& box)
+  {
+    return BoxIterator(box, true/*at_end==true*/);
+  }
+
+  BoxIterator& operator=(const BoxIterator& src)
+  {
+    box_[0][0] = src.box_[0][0]; box_[0][1] = src.box_[0][1];
+    box_[1][0] = src.box_[1][0]; box_[1][1] = src.box_[1][1];
+    box_[2][0] = src.box_[2][0]; box_[2][1] = src.box_[2][1];
+    x = src.x;
+    y = src.y;
+    z = src.z;
+    return *this;
+  }
+
+  BoxIterator& operator++()
+  {
+    ++x;
+    if (x >= box_[0][1]) {
+      x = box_[0][0];
+      ++y;
+      if (y >= box_[1][1]) {
+        y = box_[1][0];
+        ++z;
+        if (z >= box_[2][1]) {
+          z = box_[2][1];
+          y = box_[1][1];
+          x = box_[0][1];
+        }
+      }
+    }
+    return *this;
+  }
+
+  BoxIterator operator++(int)
+  {
+    BoxIterator temp = *this;
+    ++(*this);
+    return temp;
+  }
+
+  bool operator==(const BoxIterator& rhs) const
+  {
+    return x == rhs.x && y == rhs.y && z == rhs.z;
+  }
+
+  bool operator!=(const BoxIterator& rhs) const
+  {
+    return !(this->operator==(rhs));
+  }
+
+  int x;
+  int y;
+  int z;
+
+private:
+  BoxIterator(const Box& box, bool at_end = false)
+   : x(box[0][0]),
+     y(box[1][0]),
+     z(box[2][0]),
+     box_()
+  {
+    box_[0][0] = box[0][0]; box_[0][1] = box[0][1];
+    box_[1][0] = box[1][0]; box_[1][1] = box[1][1];
+    box_[2][0] = box[2][0]; box_[2][1] = box[2][1];
+    if (at_end) {
+      x = box[0][1];
+      y = box[1][1];
+      z = box[2][1];
+    }
+  }
+
+  Box box_;
+};//class BoxTraverser
+
+}//namespace miniFE
+
+#endif
+

Added: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/BoxPartition.cpp
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C%2B%2B/miniFE/BoxPartition.cpp?rev=311411&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/BoxPartition.cpp (added)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/BoxPartition.cpp Mon Aug 21 16:54:28 2017
@@ -0,0 +1,503 @@
+//@HEADER
+// ************************************************************************
+//
+// MiniFE: Simple Finite Element Assembly and Solve
+// Copyright (2006-2013) Sandia Corporation
+//
+// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
+// license for use of this work by or on behalf of the U.S. Government.
+//
+// This library is free software; you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as
+// published by the Free Software Foundation; either version 2.1 of the
+// License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+// USA
+//
+// ************************************************************************
+//@HEADER
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <Box.hpp>
+#include <BoxPartition.hpp>
+
+/*--------------------------------------------------------------------*/
+
+static int box_map_local_entry( const Box& box ,
+                                const int ghost ,
+                                int local_x ,
+                                int local_y ,
+                                int local_z )
+{
+  const int nx = 2 * ghost + box[0][1] - box[0][0] ;
+  const int ny = 2 * ghost + box[1][1] - box[1][0] ;
+  const int nz = 2 * ghost + box[2][1] - box[2][0] ;
+  int result = -1 ;
+
+  local_x += ghost ;
+  local_y += ghost ;
+  local_z += ghost ;
+
+  if ( 0 <= local_x && local_x < nx &&
+       0 <= local_y && local_y < ny &&
+       0 <= local_z && local_z < nz ) {
+
+    result = local_z * ny * nx + local_y * nx + local_x ;
+  }
+  return result ;
+}
+
+int box_map_local( const Box& box_local,
+                   const int ghost ,
+                   const int box_local_map[] ,
+                   const int local_x ,
+                   const int local_y ,
+                   const int local_z )
+{
+  int result = box_map_local_entry(box_local,ghost,local_x,local_y,local_z);
+
+  if ( 0 <= result ) {
+    result = box_local_map[ result ];
+  }
+
+  return result ;
+}
+
+/*--------------------------------------------------------------------*/
+/* Recursively split a box into into (up-ip) sub-boxes */
+
+void box_partition( int ip , int up , int axis ,
+                    const Box& box,
+                    Box* p_box )
+{
+  const int np = up - ip ;
+  if ( 1 == np ) {
+    p_box[ip][0][0] = box[0][0] ; p_box[ip][0][1] = box[0][1] ;
+    p_box[ip][1][0] = box[1][0] ; p_box[ip][1][1] = box[1][1] ;
+    p_box[ip][2][0] = box[2][0] ; p_box[ip][2][1] = box[2][1] ;
+  }
+  else {
+    const int n = box[ axis ][1] - box[ axis ][0] ;
+    const int np_low = np / 2 ;  /* Rounded down */
+    const int np_upp = np - np_low ;
+
+    const int n_upp = (int) (((double) n) * ( ((double)np_upp) / ((double)np)));
+    const int n_low = n - n_upp ;
+    const int next_axis = ( axis + 2 ) % 3 ;
+
+    if ( np_low ) { /* P = [ip,ip+np_low) */
+      Box dbox ;
+      dbox[0][0] = box[0][0] ; dbox[0][1] = box[0][1] ;
+      dbox[1][0] = box[1][0] ; dbox[1][1] = box[1][1] ;
+      dbox[2][0] = box[2][0] ; dbox[2][1] = box[2][1] ;
+
+      dbox[ axis ][1] = dbox[ axis ][0] + n_low ;
+
+      box_partition( ip, ip + np_low, next_axis, dbox, p_box );
+    }
+
+    if ( np_upp ) { /* P = [ip+np_low,ip+np_low+np_upp) */
+      Box dbox;
+      dbox[0][0] = box[0][0] ; dbox[0][1] = box[0][1] ;
+      dbox[1][0] = box[1][0] ; dbox[1][1] = box[1][1] ;
+      dbox[2][0] = box[2][0] ; dbox[2][1] = box[2][1] ;
+
+      ip += np_low ;
+      dbox[ axis ][0] += n_low ;
+      dbox[ axis ][1]  = dbox[ axis ][0] + n_upp ;
+
+      box_partition( ip, ip + np_upp, next_axis, dbox, p_box );
+    }
+  }
+}
+
+/*--------------------------------------------------------------------*/
+
+static int box_disjoint( const Box& a , const Box& b)
+{
+  return a[0][1] <= b[0][0] || b[0][1] <= a[0][0] ||
+         a[1][1] <= b[1][0] || b[1][1] <= a[1][0] ||
+         a[2][1] <= b[2][0] || b[2][1] <= a[2][0] ;
+}
+
+static void resize_int( int ** a , int * allocLen , int newLen )
+{
+  int k = 32;
+  while ( k < newLen ) { k <<= 1 ; }
+  if ( NULL == *a )
+    { *a = (int*)malloc( sizeof(int)*(*allocLen = k) ); }
+  else if ( *allocLen < k ) 
+    { *a = (int*)realloc(*a , sizeof(int)*(*allocLen = k)); }
+}
+
+static void box_partition_maps( 
+  const int np ,
+  const int my_p ,
+  const Box* pbox,
+  const int ghost ,
+  int ** map_local_id ,
+  int ** map_recv_pc ,
+  int ** map_send_pc ,
+  int ** map_send_id )
+{
+  const Box& my_box = pbox[my_p] ;
+
+  const int my_ix = my_box[0][0] ;
+  const int my_iy = my_box[1][0] ;
+  const int my_iz = my_box[2][0] ;
+  const int my_nx = my_box[0][1] - my_box[0][0] ;
+  const int my_ny = my_box[1][1] - my_box[1][0] ;
+  const int my_nz = my_box[2][1] - my_box[2][0] ;
+
+  const int my_use_nx = 2 * ghost + my_nx ;
+  const int my_use_ny = 2 * ghost + my_ny ;
+  const int my_use_nz = 2 * ghost + my_nz ;
+
+  const int id_length = my_use_nx * my_use_ny * my_use_nz ;
+
+  int * local_id  = (int *) malloc( id_length * sizeof(int) );
+  int * recv_pc   = (int *) malloc( ( np + 1 ) * sizeof(int) );
+  int * send_pc   = (int *) malloc( ( np + 1 ) * sizeof(int) );
+
+  int * send_id  = NULL ;
+  int   send_id_size = 0 ;
+
+  int iLocal , iSend ;
+  int i ;
+
+  Box my_use_box;
+
+  my_use_box[0][0] = my_box[0][0] - ghost ;
+  my_use_box[0][1] = my_box[0][1] + ghost ;
+  my_use_box[1][0] = my_box[1][0] - ghost ;
+  my_use_box[1][1] = my_box[1][1] + ghost ;
+  my_use_box[2][0] = my_box[2][0] - ghost ;
+  my_use_box[2][1] = my_box[2][1] + ghost ;
+
+  for ( i = 0 ; i < id_length ; ++i ) { local_id[i] = -1 ; }
+
+  iSend = 0 ;
+  iLocal = 0 ;
+
+  /* The vector space is partitioned by processors */
+
+  for ( i = 0 ; i < np ; ++i ) {
+    const int ip = ( i + my_p ) % np ;
+    recv_pc[i] = iLocal ;
+    send_pc[i] = iSend ;
+
+    if ( ! box_disjoint( my_use_box , pbox[ip] ) ) {
+      const int p_ix = pbox[ip][0][0] ;
+      const int p_iy = pbox[ip][1][0] ;
+      const int p_iz = pbox[ip][2][0] ;
+      const int p_ex = pbox[ip][0][1] ;
+      const int p_ey = pbox[ip][1][1] ;
+      const int p_ez = pbox[ip][2][1] ;
+
+      int local_x , local_y , local_z ;
+
+      /* Run the span of global cells that my processor uses */
+
+      for ( local_z = -ghost ; local_z < my_nz + ghost ; ++local_z ) {
+      for ( local_y = -ghost ; local_y < my_ny + ghost ; ++local_y ) {
+      for ( local_x = -ghost ; local_x < my_nx + ghost ; ++local_x ) {
+
+        const int global_z = local_z + my_iz ;
+        const int global_y = local_y + my_iy ;
+        const int global_x = local_x + my_ix ;
+
+        const int entry = 
+          box_map_local_entry(my_box,ghost,local_x,local_y,local_z);
+
+        if ( entry < 0 ) { abort(); }
+
+        if ( p_iz <= global_z && global_z < p_ez &&
+             p_iy <= global_y && global_y < p_ey &&
+             p_ix <= global_x && global_x < p_ex ) {
+
+          /* This ordinal is owned by processor 'ip' */
+
+          local_id[ entry ] = iLocal++ ;
+
+#if defined(DEBUG_PRINT)
+if ( my_p != ip ) {
+  fprintf(stdout,"  (%d,%d,%d) : P%d recv at local %d from P%d\n",
+                  global_x,global_y,global_z,my_p,local_id[entry],ip);
+  fflush(stdout);
+}
+#endif
+        }
+
+        /* If in my ownership and used by the other processor */
+        if ( my_p != ip &&
+             /* In my ownership: */
+             ( 0 <= local_z && local_z < my_nz &&
+               0 <= local_y && local_y < my_ny &&
+               0 <= local_x && local_x < my_nx ) &&
+             /* In other processors usage: */
+             ( p_iz - ghost <= global_z && global_z < p_ez + ghost &&
+               p_iy - ghost <= global_y && global_y < p_ey + ghost &&
+               p_ix - ghost <= global_x && global_x < p_ex + ghost ) ) {
+
+          resize_int( & send_id , & send_id_size , (iSend + 1) );
+          send_id[ iSend ] = local_id[ entry ] ;
+          ++iSend ;
+
+#if defined(DEBUG_PRINT)
+{
+  fprintf(stdout,"  (%d,%d,%d) : P%d send at local %d to P%d\n",
+                  global_x,global_y,global_z,my_p,local_id[entry],ip);
+  fflush(stdout);
+}
+#endif
+        }
+      }
+    }
+    }
+    }
+  }
+  recv_pc[np] = iLocal ;
+  send_pc[np] = iSend ;
+
+  *map_local_id  = local_id ;
+  *map_recv_pc   = recv_pc ;
+  *map_send_pc   = send_pc ;
+  *map_send_id   = send_id ;
+}
+
+void box_partition_rcb( const int np , 
+                        const int my_p ,
+                        const Box& root_box,
+                        const int ghost ,
+                        Box** pbox,
+                        int ** map_local_id ,
+                        int ** map_recv_pc ,
+                        int ** map_send_pc ,
+                        int ** map_send_id )
+{
+  *pbox = new Box[ np ];
+
+  box_partition( 0 , np , 2 , root_box , *pbox );
+
+  box_partition_maps( np , my_p , *pbox , ghost ,
+                      map_local_id , map_recv_pc , 
+                      map_send_pc , map_send_id );
+}
+
+/*--------------------------------------------------------------------*/
+
+#ifdef UNIT_TEST
+
+static int box_contain( const Box& a , const Box& b )
+{
+  return a[0][0] <= b[0][0] && b[0][1] <= a[0][1] &&
+         a[1][0] <= b[1][0] && b[1][1] <= a[1][1] &&
+         a[2][0] <= b[2][0] && b[2][1] <= a[2][1] ;
+}
+
+static void box_print( FILE * fp , const Box& a )
+{
+  fprintf(fp,"{ [ %d , %d ) , [ %d , %d ) , [ %d , %d ) }",
+                a[0][0] , a[0][1] ,  
+                a[1][0] , a[1][1] ,  
+                a[2][0] , a[2][1] );
+}
+
+static void test_box( const Box& box , const int np )
+{
+  const int ncell_box = box[0][1] * box[1][1] * box[2][1] ;
+  int ncell_total = 0 ;
+  int ncell_min = ncell_box ;
+  int ncell_max = 0 ;
+  std::vector<Box> pbox(np);
+  int i , j ;
+
+  box_partition( 0 , np , 2 , box , &pbox[0] );
+
+  for ( i = 0 ; i < np ; ++i ) {
+    const int ncell = ( pbox[i][0][1] - pbox[i][0][0] ) *
+                      ( pbox[i][1][1] - pbox[i][1][0] ) *
+                      ( pbox[i][2][1] - pbox[i][2][0] );
+
+    if ( ! box_contain( box , pbox[i] ) ) {
+      fprintf(stdout,"  OUT OF BOUNDS pbox[%d/%d] = ",i,np);
+      box_print(stdout,pbox[i]);
+      fprintf(stdout,"\n");
+      abort();
+    }
+
+    for ( j = i + 1 ; j < np ; ++j ) {
+      if ( ! box_disjoint( pbox[i] , pbox[j] ) ) {
+        fprintf(stdout,"  NOT DISJOINT pbox[%d/%d] = ",i,np);
+        box_print(stdout, pbox[i]);
+        fprintf(stdout,"\n");
+        fprintf(stdout,"               pbox[%d/%d] = ",j,np);
+        box_print(stdout, pbox[j]);
+        fprintf(stdout,"\n");
+        abort();
+      }
+    }
+    ncell_total += ncell ;
+
+    if ( ncell_max < ncell ) { ncell_max = ncell ; }
+    if ( ncell < ncell_min ) { ncell_min = ncell ; }
+  }
+
+  if ( ncell_total != ncell_box ) {
+    fprintf(stdout,"  WRONG CELL COUNT NP = %d\n",np);
+    abort();
+  }
+  fprintf(stdout,"NP = %d, total = %d, avg = %d, min = %d, max = %d\n",
+          np,ncell_box,ncell_box/np,ncell_min,ncell_max);
+}
+
+/*--------------------------------------------------------------------*/
+
+static void test_maps( const Box& root_box , const int np )
+{
+  const int ghost = 1 ;
+  const int nx_global = root_box[0][1] - root_box[0][0] ;
+  const int ny_global = root_box[1][1] - root_box[1][0] ;
+  int ieq , i , j ;
+  std::vector<Box> pbox(np);
+  int **local_values ;
+  int **map_local_id ;
+  int **map_recv_pc ;
+  int **map_send_pc ;
+  int **map_send_id ;
+
+  box_partition( 0 , np , 2 , root_box , &pbox[0] );
+
+  local_values = (int **) malloc( sizeof(int*) * np );
+  map_local_id = (int **) malloc( sizeof(int*) * np );
+  map_recv_pc  = (int **) malloc( sizeof(int*) * np );
+  map_send_pc  = (int **) malloc( sizeof(int*) * np );
+  map_send_id  = (int **) malloc( sizeof(int*) * np );
+
+  /* Set each local value to the global equation number */
+
+  for ( ieq = i = 0 ; i < np ; ++i ) {
+    const Box& mybox = pbox[i] ;
+    const int nx = mybox[0][1] - mybox[0][0] ;
+    const int ny = mybox[1][1] - mybox[1][0] ;
+    const int nz = mybox[2][1] - mybox[2][0] ;
+    int ix , iy , iz ;
+
+    /* Generate the partition maps for this rank */
+    box_partition_maps( np , i , &pbox[0] , ghost ,
+                        & map_local_id[i] , & map_recv_pc[i] , 
+                        & map_send_pc[i] , & map_send_id[i] );
+
+    local_values[i] = (int *) malloc( sizeof(int) * map_recv_pc[i][np] );
+
+    for ( iz = -ghost ; iz < nz + ghost ; ++iz ) {
+    for ( iy = -ghost ; iy < ny + ghost ; ++iy ) {
+    for ( ix = -ghost ; ix < nx + ghost ; ++ix ) {
+      const int ieq = box_map_local(mybox,ghost,map_local_id[i],ix,iy,iz);
+
+      if ( 0 <= ieq ) {
+        const int ix_global = ix + mybox[0][0] ;
+        const int iy_global = iy + mybox[1][0] ;
+        const int iz_global = iz + mybox[2][0] ;
+
+        if ( root_box[0][0] <= ix_global && ix_global < root_box[0][1] &&
+             root_box[1][0] <= iy_global && iy_global < root_box[1][1] &&
+             root_box[2][0] <= iz_global && iz_global < root_box[2][1] ) {
+
+          local_values[i][ ieq ] = ix_global +
+                                   iy_global * nx_global +
+                                   iz_global * nx_global * ny_global ;
+        }
+        else {
+          local_values[i][ ieq ] = -1 ;
+        }
+      }
+    }
+    }
+    }
+  }
+
+  /* Pair-wise compare the local values */
+  /* i  == receiving processor rank */
+  /* ip == sending   processor rank */
+  /* j  == receiving processor data entry for message from 'ip' */
+  /* jp == sending   processor data entry for message to   'i' */
+
+  for ( i = 0 ; i < np ; ++i ) {
+    for ( j = 1 ; j < np ; ++j ) {
+      const int ip = ( i + j ) % np ;
+      const int jp = ( i + np - ip ) % np ;
+      const int nrecv = map_recv_pc[i] [j+1]  - map_recv_pc[i] [j] ;
+      const int nsend = map_send_pc[ip][jp+1] - map_send_pc[ip][jp] ;
+      int k ;
+      if ( nrecv != nsend ) {
+        fprintf(stderr,"P%d recv %d from P%d\n",i,nrecv,ip);
+        fprintf(stderr,"P%d send %d to   P%d\n",ip,nsend,i);
+        abort();
+      }
+      for ( k = 0 ; k < nrecv ; ++k ) {
+        const int irecv = map_recv_pc[i][j] + k ;
+        const int isend = map_send_pc[ip][jp] + k ;
+        const int val_irecv = local_values[i][irecv] ;
+        const int val_isend = local_values[ip][ map_send_id[ip][isend] ] ;
+        if ( val_irecv != val_isend ) {
+          fprintf(stderr,"P%d recv[%d] = %d , from P%d\n",i,k,val_irecv,ip);
+          fprintf(stderr,"P%d send[%d] = %d , to   P%d\n",ip,k,val_isend,i);
+          abort();
+        }
+      }
+    }
+  }
+
+  for ( i = 0 ; i < np ; ++i ) {
+    free( map_local_id[i] );
+    free( map_recv_pc[i] );
+    free( map_send_pc[i] );
+    free( map_send_id[i] );
+    free( local_values[i] );
+  }
+  free( map_send_id );
+  free( map_send_pc );
+  free( map_recv_pc );
+  free( map_local_id );
+  free( local_values );
+}
+
+/*--------------------------------------------------------------------*/
+
+int main( int argc , char * argv[] )
+{
+  int np_max = 256 ;
+  Box box = { 0 , 64 , 0 , 64 , 0 , 64 };
+  int np = 0 ;
+
+  switch( argc ) {
+  case 3:
+    sscanf(argv[1],"%d",&np);
+    sscanf(argv[2],"%dx%dx%d",& box[0][1] , & box[1][1] , & box[2][1] );
+    if ( 0 < np ) { test_box( box , np ); }
+    if ( 0 < np ) { test_maps( box , np ); }
+    break ;
+  default:
+    for ( np = 1 ; np <= np_max ; ++np ) {
+      test_box( box , np );
+      test_maps( box , np );
+    }
+    break ;
+  }
+  return 0 ;
+}
+
+#endif
+
+

Added: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/BoxPartition.hpp
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C%2B%2B/miniFE/BoxPartition.hpp?rev=311411&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/BoxPartition.hpp (added)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/BoxPartition.hpp Mon Aug 21 16:54:28 2017
@@ -0,0 +1,103 @@
+//@HEADER
+// ************************************************************************
+//
+// MiniFE: Simple Finite Element Assembly and Solve
+// Copyright (2006-2013) Sandia Corporation
+//
+// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
+// license for use of this work by or on behalf of the U.S. Government.
+//
+// This library is free software; you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as
+// published by the Free Software Foundation; either version 2.1 of the
+// License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+// USA
+//
+// ************************************************************************
+//@HEADER
+
+#ifndef _BoxPartition_hpp_
+#define _BoxPartition_hpp_
+
+#include <Box.hpp>
+
+/** \brief Recursively split a box into (up-ip) sub-boxes
+ */
+void box_partition( int ip , int up , int axis ,
+                    const Box& box ,
+                    Box* p_box );
+
+/** \brief  Partition a { [ix,jx) X [iy,jy) X [iz,jz) } box.
+ *
+ *  Use recursive coordinate bisection to partition a box 
+ *  into np disjoint sub-boxes.  Allocate (via malloc) and
+ *  populate the sub-boxes, mapping the local (x,y,z) to
+ *  a local ordinal, and mappings for the send-recv messages
+ *  to update the ghost cells.
+ *
+ *  usage:
+ *
+ *  my_nx = pbox[my_p][0][1] - pbox[my_p][0][0] ;
+ *  my_ny = pbox[my_p][1][1] - pbox[my_p][1][0] ;
+ *  my_nz = pbox[my_p][2][1] - pbox[my_p][2][0] ;
+ *
+ *  for ( x = -ghost ; x < my_nx + ghost ; ++x ) {
+ *  for ( y = -ghost ; y < my_ny + ghost ; ++y ) {
+ *  for ( z = -ghost ; z < my_nz + ghost ; ++z ) {
+ *    const int x_global = x + pbox[my_p][0][0] ;
+ *    const int y_global = y + pbox[my_p][1][0] ;
+ *    const int z_global = z + pbox[my_p][2][0] ;
+ *
+ *    const int local_ordinal =
+ *      box_map_local( pbox[my_p], ghost, map_local_id, x, y, z );
+ *
+ *    if ( 0 <= local_ordinal ) {
+ *    }
+ *  }
+ *  
+ *  for ( i = 1 ; i < np ; ++i ) {
+ *    const int recv_processor = ( my_p + i ) % np ;
+ *    const int recv_ordinal_begin = map_recv_pc[i];
+ *    const int recv_ordinal_end   = map_recv_pc[i+1];
+ *  }
+ *
+ *  for ( i = 1 ; i < np ; ++i ) {
+ *    const int send_processor = ( my_p + i ) % np ;
+ *    const int send_map_begin = map_send_pc[i];
+ *    const int send_map_end   = map_send_pc[i+1];
+ *    for ( j = send_map_begin ; j < send_map_end ; ++j ) {
+ *      send_ordinal = map_send_id[j] ;
+ *    }
+ *  }
+ */
+void box_partition_rcb( 
+  const int np            /**< [in]  Number of partitions */ ,
+  const int my_p          /**< [in]  My partition rank    */ ,
+  const Box& root_box     /**< [in]  3D Box to partition  */ ,
+  const int ghost         /**< [in]  Ghost cell boundary  */ ,
+  Box* pbox               /**< [out] Partition's 3D boxes */ ,
+  int ** map_local_id     /**< [out] Map local cells */ ,
+  int ** map_recv_pc      /**< [out] Receive spans per processor */ ,
+  int ** map_send_pc      /**< [out] Send prefix counts per processor */ ,
+  int ** map_send_id      /**< [out] Send message ordinals */ );
+
+/* \brief  Map a local (x,y,z) to a local ordinal.
+ */
+int box_map_local( const Box& box_local ,
+                   const int ghost ,
+                   const int map_local_id[] ,
+                   const int local_x ,
+                   const int local_y ,
+                   const int local_z );
+
+#endif
+

Added: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C%2B%2B/miniFE/CMakeLists.txt?rev=311411&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/CMakeLists.txt (added)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/CMakeLists.txt Mon Aug 21 16:54:28 2017
@@ -0,0 +1,5 @@
+set(PROG miniFE)
+list(APPEND CXXFLAGS -I. -DMINIFE_SCALAR=double  -DMINIFE_LOCAL_ORDINAL=int -DMINIFE_GLOBAL_ORDINAL=int -DMINIFE_CSR_MATRIX)
+list(APPEND LDFLAGS -lm)
+set(RUN_OPTIONS -nx 64 -ny 64 -nz 64)
+llvm_multisource()

Added: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/CSRMatrix.hpp
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C%2B%2B/miniFE/CSRMatrix.hpp?rev=311411&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/CSRMatrix.hpp (added)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/CSRMatrix.hpp Mon Aug 21 16:54:28 2017
@@ -0,0 +1,134 @@
+#ifndef _CSRMatrix_hpp_
+#define _CSRMatrix_hpp_
+
+//@HEADER
+// ************************************************************************
+//
+// MiniFE: Simple Finite Element Assembly and Solve
+// Copyright (2006-2013) Sandia Corporation
+//
+// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
+// license for use of this work by or on behalf of the U.S. Government.
+//
+// This library is free software; you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as
+// published by the Free Software Foundation; either version 2.1 of the
+// License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+// USA
+//
+// ************************************************************************
+//@HEADER
+
+#include <cstddef>
+#include <vector>
+#include <algorithm>
+#ifdef HAVE_MPI
+#include <mpi.h>
+#endif
+
+namespace miniFE {
+
+template<typename Scalar,
+         typename LocalOrdinal,
+         typename GlobalOrdinal>
+struct
+CSRMatrix {
+  CSRMatrix()
+   : has_local_indices(false),
+     rows(), row_offsets(), row_offsets_external(),
+     packed_cols(), packed_coefs(),
+     num_cols(0)
+#ifdef HAVE_MPI
+     ,external_index(), external_local_index(), elements_to_send(),
+      neighbors(), recv_length(), send_length(), send_buffer(), request()
+#endif
+  {
+  }
+
+  ~CSRMatrix()
+  {}
+
+  typedef Scalar        ScalarType;
+  typedef LocalOrdinal  LocalOrdinalType;
+  typedef GlobalOrdinal GlobalOrdinalType;
+
+  bool                       has_local_indices;
+  std::vector<GlobalOrdinal> rows;
+  std::vector<LocalOrdinal>  row_offsets;
+  std::vector<LocalOrdinal>  row_offsets_external;
+  std::vector<GlobalOrdinal> packed_cols;
+  std::vector<Scalar>        packed_coefs;
+  LocalOrdinal               num_cols;
+
+#ifdef HAVE_MPI
+  std::vector<GlobalOrdinal> external_index;
+  std::vector<GlobalOrdinal>  external_local_index;
+  std::vector<GlobalOrdinal> elements_to_send;
+  std::vector<int>           neighbors;
+  std::vector<LocalOrdinal>  recv_length;
+  std::vector<LocalOrdinal>  send_length;
+  std::vector<Scalar>        send_buffer;
+  std::vector<MPI_Request>   request;
+#endif
+
+  size_t num_nonzeros() const
+  {
+    return row_offsets[row_offsets.size()-1];
+  }
+
+  void reserve_space(unsigned nrows, unsigned ncols_per_row)
+  {
+    rows.resize(nrows);
+    row_offsets.resize(nrows+1);
+    packed_cols.reserve(nrows * ncols_per_row);
+    packed_coefs.reserve(nrows * ncols_per_row);
+  }
+
+  void get_row_pointers(GlobalOrdinalType row, size_t& row_length,
+                        GlobalOrdinalType*& cols,
+                        ScalarType*& coefs)
+  {
+    ptrdiff_t local_row = -1;
+    //first see if we can get the local-row index using fast direct lookup:
+    if (rows.size() >= 1) {
+      ptrdiff_t idx = row - rows[0];
+      if (idx < rows.size() && rows[idx] == row) {
+        local_row = idx;
+      }
+    }
+ 
+    //if we didn't get the local-row index using direct lookup, try a
+    //more expensive binary-search:
+    if (local_row == -1) {
+      typename std::vector<GlobalOrdinal>::iterator row_iter =
+          std::lower_bound(rows.begin(), rows.end(), row);
+  
+      //if we still haven't found row, it's not local so jump out:
+      if (row_iter == rows.end() || *row_iter != row) {
+        row_length = 0;
+        return;
+      }
+  
+      local_row = row_iter - rows.begin();
+    }
+
+    LocalOrdinalType offset = row_offsets[local_row];
+    row_length = row_offsets[local_row+1] - offset;
+    cols = &packed_cols[offset];
+    coefs = &packed_coefs[offset];
+  }
+};
+
+}//namespace miniFE
+
+#endif
+

Added: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/ELLMatrix.hpp
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C%2B%2B/miniFE/ELLMatrix.hpp?rev=311411&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/ELLMatrix.hpp (added)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/ELLMatrix.hpp Mon Aug 21 16:54:28 2017
@@ -0,0 +1,139 @@
+#ifndef _ELLMatrix_hpp_
+#define _ELLMatrix_hpp_
+
+//@HEADER
+// ************************************************************************
+//
+// MiniFE: Simple Finite Element Assembly and Solve
+// Copyright (2006-2013) Sandia Corporation
+//
+// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
+// license for use of this work by or on behalf of the U.S. Government.
+//
+// This library is free software; you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as
+// published by the Free Software Foundation; either version 2.1 of the
+// License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+// USA
+//
+// ************************************************************************
+//@HEADER
+
+#include <cstddef>
+#include <vector>
+#include <algorithm>
+#ifdef HAVE_MPI
+#include <mpi.h>
+#endif
+
+namespace miniFE {
+
+template<typename Scalar,
+         typename LocalOrdinal,
+         typename GlobalOrdinal>
+struct
+ELLMatrix {
+  ELLMatrix()
+   : has_local_indices(false),
+     rows(),
+     cols(), coefs(),
+     num_cols(0),
+     num_cols_per_row(0)
+#ifdef HAVE_MPI
+     ,external_index(), external_local_index(), elements_to_send(),
+      neighbors(), recv_length(), send_length(), send_buffer(), request()
+#endif
+  {
+  }
+
+  ~ELLMatrix()
+  {}
+
+  typedef Scalar        ScalarType;
+  typedef LocalOrdinal  LocalOrdinalType;
+  typedef GlobalOrdinal GlobalOrdinalType;
+
+  bool                       has_local_indices;
+  std::vector<GlobalOrdinal> rows;
+  std::vector<GlobalOrdinal> cols;
+  std::vector<Scalar>        coefs;
+  LocalOrdinal               num_cols;
+  LocalOrdinal               num_cols_per_row;
+
+#ifdef HAVE_MPI
+  std::vector<GlobalOrdinal> external_index;
+  std::vector<GlobalOrdinal>  external_local_index;
+  std::vector<GlobalOrdinal> elements_to_send;
+  std::vector<int>           neighbors;
+  std::vector<LocalOrdinal>  recv_length;
+  std::vector<LocalOrdinal>  send_length;
+  std::vector<Scalar>        send_buffer;
+  std::vector<MPI_Request>   request;
+#endif
+
+  size_t num_nonzeros() const
+  {
+    return rows.size()*num_cols_per_row;
+  }
+
+  void reserve_space(unsigned nrows, unsigned ncols_per_row)
+  {
+    rows.resize(nrows);
+    cols.resize(nrows * ncols_per_row);
+    coefs.resize(nrows * ncols_per_row);
+    num_cols_per_row = ncols_per_row;
+  }
+
+  void get_row_pointers(GlobalOrdinalType row, size_t& row_length,
+                        GlobalOrdinalType*& cols_ptr,
+                        ScalarType*& coefs_ptr)
+  {
+    ptrdiff_t local_row = -1;
+    //first see if we can get the local-row index using fast direct lookup:
+    if (rows.size() >= 1) {
+      ptrdiff_t idx = row - rows[0];
+      if (idx < rows.size() && rows[idx] == row) {
+        local_row = idx;
+      }
+    }
+ 
+    //if we didn't get the local-row index using direct lookup, try a
+    //more expensive binary-search:
+    if (local_row == -1) {
+      typename std::vector<GlobalOrdinal>::iterator row_iter =
+          std::lower_bound(rows.begin(), rows.end(), row);
+  
+      //if we still haven't found row, it's not local so jump out:
+      if (row_iter == rows.end() || *row_iter != row) {
+        row_length = 0;
+        return;
+      }
+  
+      local_row = row_iter - rows.begin();
+    }
+
+    cols_ptr = &cols[local_row*num_cols_per_row];
+    coefs_ptr = &coefs[local_row*num_cols_per_row];
+    
+    int idx = num_cols_per_row-1;
+    while(idx>=0) {
+      if (cols_ptr[idx] != 0) break;
+      --idx;
+    }
+    row_length = idx+1;
+  }
+};
+
+}//namespace miniFE
+
+#endif
+

Added: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/ElemData.hpp
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C%2B%2B/miniFE/ElemData.hpp?rev=311411&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/ElemData.hpp (added)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/ElemData.hpp Mon Aug 21 16:54:28 2017
@@ -0,0 +1,64 @@
+#ifndef _ElemData_hpp_
+#define _ElemData_hpp_
+
+//@HEADER
+// ************************************************************************
+//
+// MiniFE: Simple Finite Element Assembly and Solve
+// Copyright (2006-2013) Sandia Corporation
+//
+// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
+// license for use of this work by or on behalf of the U.S. Government.
+//
+// This library is free software; you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as
+// published by the Free Software Foundation; either version 2.1 of the
+// License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+// USA
+//
+// ************************************************************************
+//@HEADER
+
+#include <Hex8_enums.hpp>
+
+namespace miniFE {
+
+template<typename GlobalOrdinal, typename Scalar>
+struct ElemData {
+  ElemData() : nodes_per_elem(Hex8::numNodesPerElem) {}
+  ~ElemData(){}
+
+  const size_t nodes_per_elem;
+  GlobalOrdinal elem_node_ids[Hex8::numNodesPerElem];
+  Scalar grad_vals[Hex8::numGaussPointsPerDim * Hex8::numGaussPointsPerDim * Hex8::numGaussPointsPerDim * Hex8::numNodesPerElem * Hex8::spatialDim];
+  Scalar elem_node_coords[Hex8::numNodesPerElem*Hex8::spatialDim];
+  Scalar elem_diffusion_matrix[(Hex8::numNodesPerElem*(Hex8::numNodesPerElem+1))/2];
+  Scalar elem_source_vector[Hex8::numNodesPerElem];
+};
+
+template<typename GlobalOrdinal, typename Scalar>
+struct ElemDataPtr {
+  ElemDataPtr() : nodes_per_elem(Hex8::numNodesPerElem) {}
+  ~ElemDataPtr(){}
+
+  const size_t nodes_per_elem;
+  GlobalOrdinal elem_node_ids[Hex8::numNodesPerElem];
+  Scalar grad_vals[Hex8::numGaussPointsPerDim * Hex8::numGaussPointsPerDim * Hex8::numGaussPointsPerDim * Hex8::numNodesPerElem * Hex8::spatialDim];
+  Scalar elem_node_coords[(Hex8::numNodesPerElem*(Hex8::spatialDim+1))/2];
+  Scalar* elem_diffusion_matrix;
+  Scalar* elem_source_vector;
+};
+
+}//namespace miniFE
+
+#endif
+

Added: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/GetNodesCoords.hpp
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C%2B%2B/miniFE/GetNodesCoords.hpp?rev=311411&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/GetNodesCoords.hpp (added)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/GetNodesCoords.hpp Mon Aug 21 16:54:28 2017
@@ -0,0 +1,51 @@
+//@HEADER
+// ************************************************************************
+//
+// MiniFE: Simple Finite Element Assembly and Solve
+// Copyright (2006-2013) Sandia Corporation
+//
+// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
+// license for use of this work by or on behalf of the U.S. Government.
+//
+// This library is free software; you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as
+// published by the Free Software Foundation; either version 2.1 of the
+// License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+// USA
+//
+// ************************************************************************
+//@HEADER
+
+#ifndef _GETNODESCOORDS_HPP_
+#define _GETNODESCOORDS_HPP_
+
+#include <Hex8_enums.hpp>
+#include <simple_mesh_description.hpp>
+
+template<typename GlobalOrdinal,typename Scalar>
+struct GetNodesCoords {
+  const miniFE::simple_mesh_description<GlobalOrdinal>* mesh;
+  GlobalOrdinal* elemIDs;
+  GlobalOrdinal* node_ordinals;
+  Scalar* elem_node_coords;
+
+inline void operator()(int i)
+{
+  unsigned nnodes = miniFE::Hex8::numNodesPerElem;
+  GlobalOrdinal elemID = elemIDs[i];
+  GlobalOrdinal* node_ords = node_ordinals+i*nnodes;
+  Scalar* node_coords = elem_node_coords+i*nnodes*miniFE::Hex8::spatialDim;
+  get_elem_nodes_and_coords(*mesh, elemID, node_ords, node_coords);
+}
+};
+
+#endif

Added: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/Hex8.hpp
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C%2B%2B/miniFE/Hex8.hpp?rev=311411&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/Hex8.hpp (added)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/Hex8.hpp Mon Aug 21 16:54:28 2017
@@ -0,0 +1,417 @@
+#ifndef _Hex8_hpp_
+#define _Hex8_hpp_
+
+//@HEADER
+// ************************************************************************
+//
+// MiniFE: Simple Finite Element Assembly and Solve
+// Copyright (2006-2013) Sandia Corporation
+//
+// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
+// license for use of this work by or on behalf of the U.S. Government.
+//
+// This library is free software; you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as
+// published by the Free Software Foundation; either version 2.1 of the
+// License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+// USA
+//
+// ************************************************************************
+//@HEADER
+
+#ifndef KERNEL_PREFIX 
+#define KERNEL_PREFIX
+#endif
+
+#include <gauss_pts.hpp>
+#include <matrix_algebra_3x3.hpp>
+#include <Hex8_enums.hpp>
+
+namespace miniFE {
+
+namespace Hex8 {
+
+template<typename Scalar>
+KERNEL_PREFIX void shape_fns(const Scalar* x, Scalar* values_at_nodes)
+{
+  //assumptions: values_at_nodes has length numNodesPerElem
+  //             x has length 3 (hard-coded spatialDim)
+
+  const Scalar u = 1.0 - x[0];
+  const Scalar v = 1.0 - x[1];
+  const Scalar w = 1.0 - x[2];
+
+  const Scalar up1 = 1.0 + x[0];
+  const Scalar vp1 = 1.0 + x[1];
+  const Scalar wp1 = 1.0 + x[2];
+
+  values_at_nodes[0] = 0.125 *   u *   v *   w;//(1-x)*(1-y)*(1-z)
+  values_at_nodes[1] = 0.125 * up1 *   v *   w;//(1+x)*(1-y)*(1-z)
+  values_at_nodes[2] = 0.125 * up1 * vp1 *   w;//(1+x)*(1+y)*(1-z)
+  values_at_nodes[3] = 0.125 *   u * vp1 *   w;//(1-x)*(1+y)*(1-z)
+  values_at_nodes[4] = 0.125 *   u *   v * wp1;//(1-x)*(1-y)*(1+z)
+  values_at_nodes[5] = 0.125 * up1 *   v * wp1;//(1+x)*(1-y)*(1+z)
+  values_at_nodes[6] = 0.125 * up1 * vp1 * wp1;//(1+x)*(1+y)*(1+z)
+  values_at_nodes[7] = 0.125 *   u * vp1 * wp1;//(1-x)*(1+y)*(1+z)
+}
+
+template<typename Scalar>
+KERNEL_PREFIX void gradients(const Scalar* x, Scalar* values_per_fn)
+{
+  //assumptions values_per_fn has length 24 (numNodesPerElem*spatialDim)
+  //        spatialDim == 3
+
+  const Scalar u = 1.0 - x[0];
+  const Scalar v = 1.0 - x[1];
+  const Scalar w = 1.0 - x[2];
+
+  const Scalar up1 = 1.0 + x[0];
+  const Scalar vp1 = 1.0 + x[1];
+  const Scalar wp1 = 1.0 + x[2];
+
+//fn 0
+  values_per_fn[0] = -0.125 *  v *  w;
+  values_per_fn[1] = -0.125 *  u *  w;
+  values_per_fn[2] = -0.125 *  u *  v;
+//fn 1
+  values_per_fn[3] =  0.125 *  v   *  w;
+  values_per_fn[4] = -0.125 *  up1 *  w;
+  values_per_fn[5] = -0.125 *  up1 *  v;
+//fn 2
+  values_per_fn[6] =  0.125 *  vp1 *  w;
+  values_per_fn[7] =  0.125 *  up1 *  w;
+  values_per_fn[8] = -0.125 *  up1 *  vp1;
+//fn 3
+  values_per_fn[9]  = -0.125 *  vp1 *  w;
+  values_per_fn[10] =  0.125 *  u   *  w;
+  values_per_fn[11] = -0.125 *  u   *  vp1;
+//fn 4
+  values_per_fn[12] = -0.125 *  v   * wp1;
+  values_per_fn[13] = -0.125 *  u   * wp1;
+  values_per_fn[14] =  0.125 *  u   * v;
+//fn 5
+  values_per_fn[15] =  0.125 *  v * wp1;
+  values_per_fn[16] = -0.125 *  up1 * wp1;
+  values_per_fn[17] =  0.125 *  up1 * v;
+//fn 6
+  values_per_fn[18] =  0.125 *  vp1 * wp1;
+  values_per_fn[19] =  0.125 *  up1 * wp1;
+  values_per_fn[20] =  0.125 *  up1 * vp1;
+//fn 7
+  values_per_fn[21] = -0.125 *  vp1 * wp1;
+  values_per_fn[22] =  0.125 *  u   * wp1;
+  values_per_fn[23] =  0.125 *  u   * vp1;
+}
+
+template<typename Scalar>
+KERNEL_PREFIX void gradients_and_detJ(const Scalar* elemNodeCoords,
+                                          const Scalar* grad_vals,
+                                          Scalar& detJ)
+{
+/**
+  pt is the point at which the jacobian is to be computed.
+*/
+
+  //assumptions on the lengths of input arguments:
+  //elemNodeCoords has length numNodesPerElem*spatialDim,
+  //grad_vals has length numNodesPerElem*spatialDim
+
+  const Scalar zero = 0;
+
+  Scalar J00 = zero;
+  Scalar J01 = zero;
+  Scalar J02 = zero;
+
+  Scalar J10 = zero;
+  Scalar J11 = zero;
+  Scalar J12 = zero;
+
+  Scalar J20 = zero;
+  Scalar J21 = zero;
+  Scalar J22 = zero;
+
+  size_t i_X_spatialDim = 0;
+  for(size_t i=0; i<numNodesPerElem; ++i) {
+//    size_t offset = 0;
+//    for(size_t gd=0; gd<spatialDim; ++gd) {
+//
+//      Scalar gval = grad_vals[i_X_spatialDim+gd];
+//
+//      for(size_t jd=0; jd<spatialDim; ++jd) {
+//        J[offset++] += gval*elemNodeCoords[i_X_spatialDim+jd];
+//      }
+//    }
+    //for optimization, unroll the above double-loop over spatialDim:
+    //(hard-coded assumption that spatialDim == 3)
+    J00 += grad_vals[i_X_spatialDim+0]*elemNodeCoords[i_X_spatialDim+0];
+    J01 += grad_vals[i_X_spatialDim+0]*elemNodeCoords[i_X_spatialDim+1];
+    J02 += grad_vals[i_X_spatialDim+0]*elemNodeCoords[i_X_spatialDim+2];
+
+    J10 += grad_vals[i_X_spatialDim+1]*elemNodeCoords[i_X_spatialDim+0];
+    J11 += grad_vals[i_X_spatialDim+1]*elemNodeCoords[i_X_spatialDim+1];
+    J12 += grad_vals[i_X_spatialDim+1]*elemNodeCoords[i_X_spatialDim+2];
+
+    J20 += grad_vals[i_X_spatialDim+2]*elemNodeCoords[i_X_spatialDim+0];
+    J21 += grad_vals[i_X_spatialDim+2]*elemNodeCoords[i_X_spatialDim+1];
+    J22 += grad_vals[i_X_spatialDim+2]*elemNodeCoords[i_X_spatialDim+2];
+
+    i_X_spatialDim += spatialDim;
+  }
+
+  Scalar term0 = J22*J11 - J21*J12;
+  Scalar term1 = J22*J01 - J21*J02;
+  Scalar term2 = J12*J01 - J11*J02;
+
+  detJ = J00*term0 - J10*term1 + J20*term2;
+}
+
+template<typename Scalar>
+KERNEL_PREFIX void gradients_and_invJ_and_detJ(const Scalar* elemNodeCoords,
+                                               const Scalar* grad_vals,
+                                               Scalar* invJ,
+                                               Scalar& detJ)
+{
+/**
+  pt is the point at which the jacobian is to be computed.
+*/
+
+  //assumptions on the lengths of input arguments:
+  //pt has length spatialDim,
+  //elemNodeCoords has length numNodesPerElem*spatialDim,
+  //grad_vals has length numNodesPerElem*spatialDim, and
+  //J has length spatialDim*spatialDim
+
+  const Scalar zero = 0;
+
+  //
+  //First we compute the jacobian J:
+  //
+  Scalar J00 = zero;
+  Scalar J01 = zero;
+  Scalar J02 = zero;
+
+  Scalar J10 = zero;
+  Scalar J11 = zero;
+  Scalar J12 = zero;
+
+  Scalar J20 = zero;
+  Scalar J21 = zero;
+  Scalar J22 = zero;
+
+  size_t i_X_spatialDim = 0;
+  for(size_t i=0; i<numNodesPerElem; ++i) {
+//    size_t offset = 0;
+//    for(size_t gd=0; gd<spatialDim; ++gd) {
+//
+//      Scalar gval = grad_vals[i_X_spatialDim+gd];
+//
+//      for(size_t jd=0; jd<spatialDim; ++jd) {
+//        J[offset++] += gval*elemNodeCoords[i_X_spatialDim+jd];
+//      }
+//    }
+    //for optimization, unroll the above double-loop over spatialDim:
+    //(a hard-coded assumption that spatialDim == 3)
+    J00 += grad_vals[i_X_spatialDim+0]*elemNodeCoords[i_X_spatialDim+0];
+    J01 += grad_vals[i_X_spatialDim+0]*elemNodeCoords[i_X_spatialDim+1];
+    J02 += grad_vals[i_X_spatialDim+0]*elemNodeCoords[i_X_spatialDim+2];
+
+    J10 += grad_vals[i_X_spatialDim+1]*elemNodeCoords[i_X_spatialDim+0];
+    J11 += grad_vals[i_X_spatialDim+1]*elemNodeCoords[i_X_spatialDim+1];
+    J12 += grad_vals[i_X_spatialDim+1]*elemNodeCoords[i_X_spatialDim+2];
+
+    J20 += grad_vals[i_X_spatialDim+2]*elemNodeCoords[i_X_spatialDim+0];
+    J21 += grad_vals[i_X_spatialDim+2]*elemNodeCoords[i_X_spatialDim+1];
+    J22 += grad_vals[i_X_spatialDim+2]*elemNodeCoords[i_X_spatialDim+2];
+
+    i_X_spatialDim += spatialDim;
+  }
+
+  Scalar term0 = J22*J11 - J21*J12;
+  Scalar term1 = J22*J01 - J21*J02;
+  Scalar term2 = J12*J01 - J11*J02;
+
+  detJ = J00*term0 - J10*term1 + J20*term2;
+
+  Scalar inv_detJ = 1.0/detJ;
+
+  invJ[0] =  term0*inv_detJ;
+  invJ[1] = -term1*inv_detJ;
+  invJ[2] =  term2*inv_detJ;
+
+  invJ[3] = -(J22*J10 - J20*J12)*inv_detJ;
+  invJ[4] =  (J22*J00 - J20*J02)*inv_detJ;
+  invJ[5] = -(J12*J00 - J10*J02)*inv_detJ;
+
+  invJ[6] =  (J21*J10 - J20*J11)*inv_detJ;
+  invJ[7] = -(J21*J00 - J20*J01)*inv_detJ;
+  invJ[8] =  (J11*J00 - J10*J01)*inv_detJ;
+}
+
+template<typename Scalar>
+KERNEL_PREFIX void diffusionMatrix_symm(const Scalar* elemNodeCoords,
+                        const Scalar* grad_vals,
+                        Scalar* elem_mat)
+{
+  int len = (numNodesPerElem * (numNodesPerElem+1))/2;
+  const Scalar zero = 0;
+  miniFE::fill(elem_mat, elem_mat+len, zero);
+
+  Scalar gpts[numGaussPointsPerDim];
+  Scalar gwts[numGaussPointsPerDim];
+
+  gauss_pts(numGaussPointsPerDim, gpts, gwts);
+
+  const Scalar k = 1.0;
+  Scalar detJ = 0.0;
+
+  Scalar dpsidx[numNodesPerElem], dpsidy[numNodesPerElem], dpsidz[numNodesPerElem];
+
+  Scalar invJ[spatialDim*spatialDim];
+
+  //The following nested loop implements equations 3.4.5 and 3.4.7 on page 88
+  //of Reddy & Gartling, "The Finite Element Method in Heat Transfer and Fluid
+  //Dynamics", 2nd edition,
+  //to compute the element diffusion matrix for the steady conduction equation.
+
+  Scalar pt[spatialDim];
+
+#ifdef MINIFE_DEBUG
+  Scalar volume = zero;
+#endif
+
+  size_t gv_offset = 0;
+  for(size_t ig=0; ig<numGaussPointsPerDim; ++ig) {
+    Scalar wi = gwts[ig];
+
+    for(size_t jg=0; jg<numGaussPointsPerDim; ++jg) {
+      Scalar wi_wj = wi*gwts[jg];
+
+      for(size_t kg=0; kg<numGaussPointsPerDim; ++kg) {
+        Scalar wi_wj_wk = wi_wj*gwts[kg];
+        const Scalar* grad_vals_ptr = &grad_vals[gv_offset];
+        gv_offset += numNodesPerElem*spatialDim;
+        gradients_and_invJ_and_detJ(elemNodeCoords, grad_vals_ptr, invJ, detJ);
+
+#ifdef MINIFE_DEBUG
+        volume += detJ;
+#endif
+        Scalar k_detJ_wi_wj_wk = k*detJ*wi_wj_wk;
+
+        const Scalar* gv = grad_vals_ptr;
+        for(int i=0; i<numNodesPerElem; ++i) {
+          Scalar gv0 = gv[0], gv1 = gv[1], gv2 = gv[2];
+          dpsidx[i] = gv0 * invJ[0] +
+                      gv1 * invJ[1] +
+                      gv2 * invJ[2];
+          dpsidy[i] = gv0 * invJ[3] +
+                      gv1 * invJ[4] +
+                      gv2 * invJ[5];
+          dpsidz[i] = gv0 * invJ[6] +
+                      gv1 * invJ[7] +
+                      gv2 * invJ[8];
+          gv += spatialDim;
+        }
+
+        int offset = 0;
+        for(int m=0; m<numNodesPerElem; ++m) {
+          const Scalar dpsidx_m = dpsidx[m];
+          const Scalar dpsidy_m = dpsidy[m];
+          const Scalar dpsidz_m = dpsidz[m];
+
+          elem_mat[offset++] += k_detJ_wi_wj_wk *
+                              ((dpsidx_m*dpsidx_m) +
+                               (dpsidy_m*dpsidy_m) +
+                               (dpsidz_m*dpsidz_m));
+
+          for(int n=m+1; n<numNodesPerElem; ++n) {
+            elem_mat[offset++] += k_detJ_wi_wj_wk *
+                                  ((dpsidx_m * dpsidx[n]) +
+                                   (dpsidy_m * dpsidy[n]) +
+                                   (dpsidz_m * dpsidz[n]));
+          }
+        }
+
+      }//for kg
+    }//for jg
+  }//for ig
+
+//int offset = 0;
+//std::cout.precision(16);
+//for(int m=0; m<numNodesPerElem; ++m) {
+//  for(int n=m; n<numNodesPerElem; ++n) {
+//std::cout<<"elem_mat["<<offset<<"] = "<<elem_mat[offset]<<";"<<std::endl;
+//   ++offset;
+//  }
+//}
+#ifdef MINIFE_DEBUG
+//  std::cout << "element volume: " << volume << std::endl;
+//  if (std::abs(volume - 1) > 1.e-7) {
+//    std::cout << "element volume is "<<volume<<", expected 1.0."<<std::endl;
+//  }
+#endif
+}
+
+template<typename Scalar>
+KERNEL_PREFIX void sourceVector(const Scalar* elemNodeCoords,
+                                const Scalar* grad_vals,
+                                Scalar* elem_vec)
+{
+  int len = numNodesPerElem;
+  const Scalar zero = 0;
+  miniFE::fill(elem_vec, elem_vec+len, zero);
+
+  Scalar gpts[numGaussPointsPerDim];
+  Scalar gwts[numGaussPointsPerDim];
+
+  Scalar psi[numNodesPerElem];
+
+  gauss_pts(numGaussPointsPerDim, gpts, gwts);
+
+  Scalar Q = 1.0;
+
+  Scalar pt[spatialDim];
+
+  size_t gv_offset = 0;
+  for(size_t ig=0; ig<numGaussPointsPerDim; ++ig) {
+    pt[0] = gpts[ig];
+    Scalar wi = gwts[ig];
+
+    for(size_t jg=0; jg<numGaussPointsPerDim; ++jg) {
+      pt[1] = gpts[jg];
+      Scalar wj = gwts[jg];
+
+      for(size_t kg=0; kg<numGaussPointsPerDim; ++kg) {
+        pt[2] = gpts[kg];
+        Scalar wk = gwts[kg];
+    
+        shape_fns(pt, psi);
+        const Scalar* grad_vals_ptr = &grad_vals[gv_offset];
+        gv_offset += numNodesPerElem*spatialDim;
+        Scalar detJ;
+        gradients_and_detJ(elemNodeCoords, grad_vals_ptr, detJ);
+    
+        Scalar term = Q*detJ*wi*wj*wk;
+
+        for(int i=0; i<numNodesPerElem; ++i) {
+          elem_vec[i] += psi[i]*term;
+        }
+      }
+    }
+  }
+}
+
+}//namespace Hex8
+
+}//namespace miniFE
+
+#endif
+

Added: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/Hex8_ElemData.hpp
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C%2B%2B/miniFE/Hex8_ElemData.hpp?rev=311411&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/Hex8_ElemData.hpp (added)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/Hex8_ElemData.hpp Mon Aug 21 16:54:28 2017
@@ -0,0 +1,86 @@
+#ifndef _Hex8_ElemData_hpp_
+#define _Hex8_ElemData_hpp_
+
+//@HEADER
+// ************************************************************************
+//
+// MiniFE: Simple Finite Element Assembly and Solve
+// Copyright (2006-2013) Sandia Corporation
+//
+// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
+// license for use of this work by or on behalf of the U.S. Government.
+//
+// This library is free software; you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as
+// published by the Free Software Foundation; either version 2.1 of the
+// License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+// USA
+//
+// ************************************************************************
+//@HEADER
+
+#include <Hex8_enums.hpp>
+#include <Hex8.hpp>
+#include <ElemData.hpp>
+
+namespace miniFE {
+
+template<typename Scalar>
+void compute_gradient_values(Scalar* grad_vals)
+{
+  Scalar gpts[Hex8::numGaussPointsPerDim];
+  Scalar gwts[Hex8::numGaussPointsPerDim];
+
+  gauss_pts(Hex8::numGaussPointsPerDim, gpts, gwts);
+
+  Scalar pt[Hex8::spatialDim];
+
+  Scalar* grad_vals_ptr = grad_vals;
+  for(size_t ig=0; ig<Hex8::numGaussPointsPerDim; ++ig) {
+    pt[0] = gpts[ig];
+    for(size_t jg=0; jg<Hex8::numGaussPointsPerDim; ++jg) {
+      pt[1] = gpts[jg];
+      for(size_t kg=0; kg<Hex8::numGaussPointsPerDim; ++kg) {
+        pt[2] = gpts[kg];
+
+        Hex8::gradients(pt, grad_vals_ptr);
+
+        grad_vals_ptr += Hex8::numNodesPerElem*Hex8::spatialDim;
+      }
+    }
+  }
+}
+
+template<typename GlobalOrdinal,typename Scalar>
+void
+compute_element_matrix_and_vector(ElemData<GlobalOrdinal,Scalar>& elem_data)
+{
+  Hex8::diffusionMatrix_symm(elem_data.elem_node_coords, elem_data.grad_vals,
+                             elem_data.elem_diffusion_matrix);
+  Hex8::sourceVector(elem_data.elem_node_coords, elem_data.grad_vals,
+                     elem_data.elem_source_vector);
+}
+
+template<typename GlobalOrdinal,typename Scalar>
+void
+compute_element_matrix_and_vector(ElemDataPtr<GlobalOrdinal,Scalar>& elem_data)
+{
+  Hex8::diffusionMatrix_symm(elem_data.elem_node_coords, elem_data.grad_vals,
+                             elem_data.elem_diffusion_matrix);
+  Hex8::sourceVector(elem_data.elem_node_coords, elem_data.grad_vals,
+                     elem_data.elem_source_vector);
+}
+
+}//namespace miniFE
+
+#endif
+

Added: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/Hex8_box_utils.hpp
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C%2B%2B/miniFE/Hex8_box_utils.hpp?rev=311411&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/Hex8_box_utils.hpp (added)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/Hex8_box_utils.hpp Mon Aug 21 16:54:28 2017
@@ -0,0 +1,173 @@
+#ifndef _Hex8_box_utils_hpp_
+#define _Hex8_box_utils_hpp_
+
+//@HEADER
+// ************************************************************************
+//
+// MiniFE: Simple Finite Element Assembly and Solve
+// Copyright (2006-2013) Sandia Corporation
+//
+// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
+// license for use of this work by or on behalf of the U.S. Government.
+//
+// This library is free software; you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as
+// published by the Free Software Foundation; either version 2.1 of the
+// License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+// USA
+//
+// ************************************************************************
+//@HEADER
+
+#include <stdexcept>
+
+#include <box_utils.hpp>
+#include <ElemData.hpp>
+#include <simple_mesh_description.hpp>
+#include <Hex8.hpp>
+
+namespace miniFE {
+
+
+template<typename GlobalOrdinal>
+void get_hex8_node_ids(int nx, int ny,
+                       GlobalOrdinal node0,
+                       GlobalOrdinal* elem_node_ids)
+{
+//Given box dimensions nx and ny, and a starting node
+//(local-node-0 for a hex8), compute the other nodes
+//of the hex8 using the exodus ordering convention.
+  elem_node_ids[0] = node0;
+  elem_node_ids[1] = node0 + 1;
+  elem_node_ids[2] = node0 + nx + 1;
+  elem_node_ids[3] = node0 + nx;
+  elem_node_ids[4] = node0 +     nx*ny;
+  elem_node_ids[5] = node0 + 1 + nx*ny;
+  elem_node_ids[6] = node0 + nx + nx*ny + 1;
+  elem_node_ids[7] = node0 + nx + nx*ny;
+}
+
+template<typename Scalar>
+void get_hex8_node_coords_3d(Scalar x, Scalar y, Scalar z,
+                             Scalar hx, Scalar hy, Scalar hz,
+                             Scalar* elem_node_coords)
+{
+  //Input: x,y,z are the coordinates of local-node 0 for a Hex8.
+  //'hx', 'hy', 'hz' are the lengths of the sides of the element
+  //in each direction.
+
+  elem_node_coords[0] = x;
+  elem_node_coords[1] = y;
+  elem_node_coords[2] = z;
+
+  elem_node_coords[3] = x + hx;
+  elem_node_coords[4] = y;
+  elem_node_coords[5] = z;
+
+  elem_node_coords[6] = x + hx;
+  elem_node_coords[7] = y + hy;
+  elem_node_coords[8] = z;
+
+  elem_node_coords[9]  = x;
+  elem_node_coords[10] = y + hy;
+  elem_node_coords[11] = z;
+
+  elem_node_coords[12] = x;
+  elem_node_coords[13] = y;
+  elem_node_coords[14] = z + hz;
+
+  elem_node_coords[15] = x + hx;
+  elem_node_coords[16] = y;
+  elem_node_coords[17] = z + hz;
+
+  elem_node_coords[18] = x + hx;
+  elem_node_coords[19] = y + hy;
+  elem_node_coords[20] = z + hz;
+
+  elem_node_coords[21] = x;
+  elem_node_coords[22] = y + hy;
+  elem_node_coords[23] = z + hz;
+}
+
+template<typename GlobalOrdinal, typename Scalar>
+void
+get_elem_nodes_and_coords(const simple_mesh_description<GlobalOrdinal>& mesh,
+                          GlobalOrdinal elemID,
+                          GlobalOrdinal* node_ords, Scalar* node_coords)
+{
+  int global_nodes_x = mesh.global_box[0][1]+1;
+  int global_nodes_y = mesh.global_box[1][1]+1;
+  int global_nodes_z = mesh.global_box[2][1]+1;
+ 
+  if (elemID < 0) {
+    //I don't think this can happen, but check for the sake of paranoia...
+    throw std::runtime_error("get_elem_nodes_and_coords ERROR, negative elemID");
+  }
+
+  int elem_int_x, elem_int_y, elem_int_z;
+  get_int_coords(elemID, global_nodes_x-1, global_nodes_y-1, global_nodes_z-1,
+             elem_int_x, elem_int_y, elem_int_z);
+  GlobalOrdinal nodeID = get_id<GlobalOrdinal>(global_nodes_x, global_nodes_y, global_nodes_z, elem_int_x, elem_int_y, elem_int_z);
+
+#ifdef MINIFE_DEBUG_VERBOSE
+  std::cout<<"\nelemID: "<<elemID<<", nodeID: "<<nodeID<<std::endl;
+#endif
+  get_hex8_node_ids(global_nodes_x, global_nodes_y, nodeID, node_ords);
+
+  //Map node-IDs to rows because each processor may have a non-contiguous block of
+  //node-ids, but needs a contiguous block of row-numbers:
+#ifdef MINIFE_DEBUG_VERBOSE
+  std::cout<<"elem "<<elemID<<" nodes: ";
+#endif
+  for(int i=0; i<Hex8::numNodesPerElem; ++i) {
+#ifdef MINIFE_DEBUG_VERBOSE
+    std::cout<<node_ords[i]<<" ";
+#endif
+    node_ords[i] = mesh.map_id_to_row(node_ords[i]);
+  }
+#ifdef MINIFE_DEBUG_VERBOSE
+  std::cout << std::endl;
+#endif
+
+  int global_elems_x = mesh.global_box[0][1];
+  int global_elems_y = mesh.global_box[1][1];
+  int global_elems_z = mesh.global_box[2][1];
+ 
+  Scalar ix,iy,iz;
+  get_coords<GlobalOrdinal,Scalar>(nodeID, global_nodes_x,global_nodes_y,global_nodes_z,
+                            ix,iy,iz);
+  Scalar hx = 1.0/global_elems_x;
+  Scalar hy = 1.0/global_elems_y;
+  Scalar hz = 1.0/global_elems_z;
+  get_hex8_node_coords_3d(ix, iy, iz, hx, hy, hz, node_coords);
+#ifdef MINIFE_DEBUG_VERBOSE
+  int offset = 0;
+  for(int i=0; i<Hex8::numNodesPerElem; ++i) {
+    std::cout << "("<<node_coords[offset++]<<","<<node_coords[offset++]<<","<<node_coords[offset++]<<")";
+  }
+  std::cout << std::endl;
+#endif
+}
+
+template<typename GlobalOrdinal, typename Scalar>
+void
+get_elem_nodes_and_coords(const simple_mesh_description<GlobalOrdinal>& mesh,
+                          GlobalOrdinal elemID,
+                          ElemData<GlobalOrdinal,Scalar>& elem_data)
+{
+  get_elem_nodes_and_coords(mesh, elemID, elem_data.elem_node_ids, elem_data.elem_node_coords);
+}
+
+}//namespace miniFE
+
+#endif
+

Added: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/Hex8_enums.hpp
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C%2B%2B/miniFE/Hex8_enums.hpp?rev=311411&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/Hex8_enums.hpp (added)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/Hex8_enums.hpp Mon Aug 21 16:54:28 2017
@@ -0,0 +1,52 @@
+#ifndef _Hex8_enums_hpp_
+#define _Hex8_enums_hpp_
+
+//@HEADER
+// ************************************************************************
+//
+// MiniFE: Simple Finite Element Assembly and Solve
+// Copyright (2006-2013) Sandia Corporation
+//
+// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
+// license for use of this work by or on behalf of the U.S. Government.
+//
+// This library is free software; you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as
+// published by the Free Software Foundation; either version 2.1 of the
+// License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+// USA
+//
+// ************************************************************************
+//@HEADER
+
+namespace miniFE {
+
+namespace Hex8 {
+
+//   !!!!!!!
+//Important note: there are places in miniFE code where
+//loops over spatialDim are unrolled (spatialDim is assumed to be 3).
+//Thus, changing this enum is not enough to make miniFE code
+//work for spatialDim values other than 3.
+//   !!!!!!!
+enum {
+  spatialDim = 3,
+  numNodesPerElem = 8,
+  numGaussPointsPerDim = 2
+};
+
+}//namespace Hex8
+
+}//namespace miniFE
+
+#endif
+

Added: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/Makefile
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C%2B%2B/miniFE/Makefile?rev=311411&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/Makefile (added)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/Makefile Mon Aug 21 16:54:28 2017
@@ -0,0 +1,7 @@
+LEVEL = ../../../..
+
+PROG     = miniFE
+CXXFLAGS = -I. -DMINIFE_SCALAR=double -DMINIFE_LOCAL_ORDINAL=int -DMINIFE_GLOBAL_ORDINAL=int -DMINIFE_CSR_MATRIX
+LDFLAGS  = -lm
+RUN_OPTIONS = -nx 64 -ny 64 -nz 64
+include $(LEVEL)/MultiSource/Makefile.multisrc

Added: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/MatrixCopyOp.hpp
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C%2B%2B/miniFE/MatrixCopyOp.hpp?rev=311411&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/MatrixCopyOp.hpp (added)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/MatrixCopyOp.hpp Mon Aug 21 16:54:28 2017
@@ -0,0 +1,60 @@
+//@HEADER
+// ************************************************************************
+//
+// MiniFE: Simple Finite Element Assembly and Solve
+// Copyright (2006-2013) Sandia Corporation
+//
+// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
+// license for use of this work by or on behalf of the U.S. Government.
+//
+// This library is free software; you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as
+// published by the Free Software Foundation; either version 2.1 of the
+// License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+// USA
+//
+// ************************************************************************
+//@HEADER
+
+#ifndef _MatrixCopyOp_hpp_
+#define _MatrixCopyOp_hpp_
+
+template<typename MatrixType>
+struct MatrixCopyOp {
+  typedef typename MatrixType::GlobalOrdinalType GlobalOrdinalType;
+  typedef typename MatrixType::LocalOrdinalType LocalOrdinalType;
+  typedef typename MatrixType::ScalarType ScalarType;
+
+  const GlobalOrdinalType* src_rows;
+  const LocalOrdinalType*  src_rowoffsets;
+  const GlobalOrdinalType* src_cols;
+  const ScalarType*        src_coefs;
+
+  GlobalOrdinalType* dest_rows;
+  LocalOrdinalType*  dest_rowoffsets;
+  GlobalOrdinalType* dest_cols;
+  ScalarType*        dest_coefs;
+  int n;
+
+  inline void operator()(int i)
+  {
+    dest_rows[i] = src_rows[i];
+    dest_rowoffsets[i] = src_rowoffsets[i];
+    for(int j=src_rowoffsets[i]; j<src_rowoffsets[i+1]; ++j) {
+      dest_cols[j] = src_cols[j];
+      dest_coefs[j] = src_coefs[j];
+    }
+  }
+};
+
+#endif
+

Added: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/MatrixInitOp.hpp
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C%2B%2B/miniFE/MatrixInitOp.hpp?rev=311411&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/MatrixInitOp.hpp (added)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/MatrixInitOp.hpp Mon Aug 21 16:54:28 2017
@@ -0,0 +1,231 @@
+//@HEADER
+// ************************************************************************
+//
+// MiniFE: Simple Finite Element Assembly and Solve
+// Copyright (2006-2013) Sandia Corporation
+//
+// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
+// license for use of this work by or on behalf of the U.S. Government.
+//
+// This library is free software; you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as
+// published by the Free Software Foundation; either version 2.1 of the
+// License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+// USA
+//
+// ************************************************************************
+//@HEADER
+
+#ifndef _MatrixInitOp_hpp_
+#define _MatrixInitOp_hpp_
+
+#include <simple_mesh_description.hpp>
+#include <box_utils.hpp>
+
+#include <CSRMatrix.hpp>
+#include <ELLMatrix.hpp>
+
+#include <algorithm>
+
+template<typename GlobalOrdinal>
+void sort_if_needed(GlobalOrdinal* list,
+                    GlobalOrdinal list_len)
+{
+  bool need_to_sort = false;
+  for(GlobalOrdinal i=list_len-1; i>=1; --i) {
+    if (list[i] < list[i-1]) {
+      need_to_sort = true;
+      break;
+    }
+  }
+
+  if (need_to_sort) {
+    std::sort(list,list+list_len);
+  }
+}
+
+template<typename MatrixType>
+struct MatrixInitOp {
+};
+
+template<>
+struct MatrixInitOp<miniFE::CSRMatrix<MINIFE_SCALAR,MINIFE_LOCAL_ORDINAL,MINIFE_GLOBAL_ORDINAL> > {
+  MatrixInitOp(const std::vector<MINIFE_GLOBAL_ORDINAL>& rows_vec,
+               const std::vector<MINIFE_LOCAL_ORDINAL>& row_offsets_vec,
+               const std::vector<int>& row_coords_vec,
+               int global_nx, int global_ny, int global_nz,
+               MINIFE_GLOBAL_ORDINAL global_n_rows,
+               const miniFE::simple_mesh_description<MINIFE_GLOBAL_ORDINAL>& input_mesh,
+               miniFE::CSRMatrix<MINIFE_SCALAR,MINIFE_LOCAL_ORDINAL,MINIFE_GLOBAL_ORDINAL>& matrix)
+   : rows(&rows_vec[0]),
+     row_offsets(&row_offsets_vec[0]),
+     row_coords(&row_coords_vec[0]),
+     global_nodes_x(global_nx),
+     global_nodes_y(global_ny),
+     global_nodes_z(global_nz),
+     global_nrows(global_n_rows),
+     mesh(&input_mesh),
+     dest_rows(&matrix.rows[0]),
+     dest_rowoffsets(&matrix.row_offsets[0]),
+     dest_cols(&matrix.packed_cols[0]),
+     dest_coefs(&matrix.packed_coefs[0]),
+     n(matrix.rows.size())
+  {
+    if (matrix.packed_cols.capacity() != matrix.packed_coefs.capacity()) {
+      std::cout<<"Warning, packed_cols.capacity ("<<matrix.packed_cols.capacity()<<") != "
+        << "packed_coefs.capacity ("<<matrix.packed_coefs.capacity()<<")"<<std::endl;
+    }
+
+    size_t nnz = row_offsets_vec[n];
+    if (matrix.packed_cols.capacity() < nnz) {
+      std::cout<<"Warning, packed_cols.capacity ("<<matrix.packed_cols.capacity()<<") < "
+        " nnz ("<<nnz<<")"<<std::endl;
+    }
+
+    matrix.packed_cols.resize(nnz);
+    matrix.packed_coefs.resize(nnz);
+    dest_rowoffsets[n] = nnz;
+#ifdef HAVE_MPI 
+   MPI_Comm_rank(MPI_COMM_WORLD, &proc);
+#else
+   proc = 0;
+#endif
+  }
+
+  typedef MINIFE_GLOBAL_ORDINAL GlobalOrdinalType;
+  typedef MINIFE_LOCAL_ORDINAL LocalOrdinalType;
+  typedef MINIFE_SCALAR ScalarType;
+
+  const GlobalOrdinalType* rows;
+  const LocalOrdinalType*  row_offsets;
+  const int*               row_coords;
+
+  int global_nodes_x;
+  int global_nodes_y;
+  int global_nodes_z;
+
+  GlobalOrdinalType global_nrows;
+
+  GlobalOrdinalType* dest_rows;
+  LocalOrdinalType*  dest_rowoffsets;
+  GlobalOrdinalType* dest_cols;
+  ScalarType*        dest_coefs;
+  int n;
+  int proc;
+
+  const miniFE::simple_mesh_description<GlobalOrdinalType>* mesh;
+
+  inline void operator()(int i)
+  {
+    dest_rows[i] = rows[i];
+    int offset = row_offsets[i];
+    dest_rowoffsets[i] = offset;
+    int ix = row_coords[i*3];
+    int iy = row_coords[i*3+1];
+    int iz = row_coords[i*3+2];
+    GlobalOrdinalType nnz = 0;
+    for(int sz=-1; sz<=1; ++sz) {
+      for(int sy=-1; sy<=1; ++sy) {
+        for(int sx=-1; sx<=1; ++sx) {
+          GlobalOrdinalType col_id =
+              miniFE::get_id<GlobalOrdinalType>(global_nodes_x, global_nodes_y, global_nodes_z,
+                                   ix+sx, iy+sy, iz+sz);
+          if (col_id >= 0 && col_id < global_nrows) {
+            GlobalOrdinalType col = mesh->map_id_to_row(col_id);
+            if (col >= global_nrows) {
+              std::cout << "mesh->map_id_to_row produced col="<<col<<" from col_id="<<col_id<<", but global_nrows="<<global_nrows<<", max_row_in_map="<<mesh->max_row_in_map()<<", proc="<<proc<<std::endl;
+            }
+            dest_cols[offset+nnz] = col;
+            dest_coefs[offset+nnz] = 0;
+            ++nnz;
+          }
+        }
+      }
+    }
+
+    sort_if_needed(&dest_cols[offset], nnz);
+  }
+};
+
+template<>
+struct MatrixInitOp<miniFE::ELLMatrix<MINIFE_SCALAR,MINIFE_LOCAL_ORDINAL,MINIFE_GLOBAL_ORDINAL> > {
+  MatrixInitOp(const std::vector<MINIFE_GLOBAL_ORDINAL>& rows_vec,
+               const std::vector<MINIFE_LOCAL_ORDINAL>& /*row_offsets_vec*/,
+               const std::vector<int>& row_coords_vec,
+               int global_nx, int global_ny, int global_nz,
+               MINIFE_GLOBAL_ORDINAL global_n_rows,
+               const miniFE::simple_mesh_description<MINIFE_GLOBAL_ORDINAL>& input_mesh,
+               miniFE::ELLMatrix<MINIFE_SCALAR,MINIFE_LOCAL_ORDINAL,MINIFE_GLOBAL_ORDINAL>& matrix)
+   : rows(&rows_vec[0]),
+     row_coords(&row_coords_vec[0]),
+     global_nodes_x(global_nx),
+     global_nodes_y(global_ny),
+     global_nodes_z(global_nz),
+     global_nrows(global_n_rows),
+     mesh(&input_mesh),
+     dest_rows(&matrix.rows[0]),
+     dest_cols(&matrix.cols[0]),
+     dest_coefs(&matrix.coefs[0]),
+     n(matrix.rows.size()),
+     ncols_per_row(matrix.num_cols_per_row)
+  {
+  }
+
+  typedef MINIFE_GLOBAL_ORDINAL GlobalOrdinalType;
+  typedef MINIFE_LOCAL_ORDINAL LocalOrdinalType;
+  typedef MINIFE_SCALAR ScalarType;
+
+  const GlobalOrdinalType* rows;
+  const int*               row_coords;
+
+  int global_nodes_x;
+  int global_nodes_y;
+  int global_nodes_z;
+
+  GlobalOrdinalType global_nrows;
+
+  GlobalOrdinalType* dest_rows;
+  GlobalOrdinalType* dest_cols;
+  ScalarType*        dest_coefs;
+  int n;
+  int ncols_per_row;
+
+  const miniFE::simple_mesh_description<GlobalOrdinalType>* mesh;
+
+  inline void operator()(int i)
+  {
+    dest_rows[i] = rows[i];
+    int offset = i*ncols_per_row;
+    int ix = row_coords[i*3];
+    int iy = row_coords[i*3+1];
+    int iz = row_coords[i*3+2];
+    GlobalOrdinalType nnz = 0;
+    for(int sz=-1; sz<=1; ++sz)
+      for(int sy=-1; sy<=1; ++sy)
+        for(int sx=-1; sx<=1; ++sx) {
+          GlobalOrdinalType col_id =
+              miniFE::get_id<GlobalOrdinalType>(global_nodes_x, global_nodes_y, global_nodes_z,
+                                   ix+sx, iy+sy, iz+sz);
+          if (col_id >= 0 && col_id < global_nrows) {
+            GlobalOrdinalType col = mesh->map_id_to_row(col_id);
+            dest_cols[offset+nnz] = col;
+            dest_coefs[offset+nnz] = 0;
+            ++nnz;
+          }
+        }
+
+    sort_if_needed(&dest_cols[offset], nnz);
+  }
+};
+
+#endif
+

Added: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/Parameters.hpp
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C%2B%2B/miniFE/Parameters.hpp?rev=311411&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/Parameters.hpp (added)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/Parameters.hpp Mon Aug 21 16:54:28 2017
@@ -0,0 +1,64 @@
+//@HEADER
+// ************************************************************************
+//
+// MiniFE: Simple Finite Element Assembly and Solve
+// Copyright (2006-2013) Sandia Corporation
+//
+// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
+// license for use of this work by or on behalf of the U.S. Government.
+//
+// This library is free software; you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as
+// published by the Free Software Foundation; either version 2.1 of the
+// License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+// USA
+//
+// ************************************************************************
+//@HEADER
+
+#ifndef _parameters_hpp_
+#define _parameters_hpp_
+
+#include <string>
+
+namespace miniFE {
+
+struct Parameters {
+  Parameters()
+   : nx(5), ny(nx), nz(nx), numthreads(1),
+     mv_overlap_comm_comp(0), use_locking(0),
+     load_imbalance(0), name(), elem_group_size(1),
+     use_elem_mat_fields(1), verify_solution(0),
+     device(0),num_devices(2),skip_device(9999),numa(1)
+  {}
+
+  int nx;
+  int ny;
+  int nz;
+  int numthreads;
+  int mv_overlap_comm_comp;
+  int use_locking;
+  float load_imbalance;
+  std::string name;
+  int elem_group_size;
+  int use_elem_mat_fields;
+  int verify_solution;
+  int device;
+  int num_devices;
+  int skip_device;
+  int numa;
+};//struct Parameters
+
+}//namespace miniFE
+
+#endif
+

Added: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/README
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C%2B%2B/miniFE/README?rev=311411&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/README (added)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/README Mon Aug 21 16:54:28 2017
@@ -0,0 +1,9 @@
+MiniFE is an proxy application for unstructured implicit 
+finite element codes. It is similar to HPCCG and pHPCCG 
+but provides a much more complete vertical covering of the 
+steps in this class of applications. MiniFE also provides 
+support for computation on multicore nodes, including 
+pthreads and Intel Threading Building Blocks (TBB) for 
+homogeneous multicore and CUDA for GPUs.
+
+This is a serial build for the LLVM test-suite.

Added: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/SparseMatrix_functions.hpp
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C%2B%2B/miniFE/SparseMatrix_functions.hpp?rev=311411&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/SparseMatrix_functions.hpp (added)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/SparseMatrix_functions.hpp Mon Aug 21 16:54:28 2017
@@ -0,0 +1,680 @@
+#ifndef _SparseMatrix_functions_hpp_
+#define _SparseMatrix_functions_hpp_
+
+//@HEADER
+// ************************************************************************
+//
+// MiniFE: Simple Finite Element Assembly and Solve
+// Copyright (2006-2013) Sandia Corporation
+//
+// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
+// license for use of this work by or on behalf of the U.S. Government.
+//
+// This library is free software; you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as
+// published by the Free Software Foundation; either version 2.1 of the
+// License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+// USA
+//
+// ************************************************************************
+//@HEADER
+
+#include <cstddef>
+#include <vector>
+#include <set>
+#include <algorithm>
+#include <sstream>
+#include <fstream>
+
+#include <Vector.hpp>
+#include <Vector_functions.hpp>
+#include <ElemData.hpp>
+#include <MatrixInitOp.hpp>
+#include <MatrixCopyOp.hpp>
+#include <exchange_externals.hpp>
+#include <mytimer.hpp>
+
+#ifdef MINIFE_HAVE_TBB
+#include <LockingMatrix.hpp>
+#endif
+
+#ifdef HAVE_MPI
+#include <mpi.h>
+#endif
+
+namespace miniFE {
+
+template<typename MatrixType>
+void init_matrix(MatrixType& M,
+                 const std::vector<typename MatrixType::GlobalOrdinalType>& rows,
+                 const std::vector<typename MatrixType::LocalOrdinalType>& row_offsets,
+                 const std::vector<int>& row_coords,
+                 int global_nodes_x,
+                 int global_nodes_y,
+                 int global_nodes_z,
+                 typename MatrixType::GlobalOrdinalType global_nrows,
+                 const simple_mesh_description<typename MatrixType::GlobalOrdinalType>& mesh)
+{
+  MatrixInitOp<MatrixType> mat_init(rows, row_offsets, row_coords,
+                                 global_nodes_x, global_nodes_y, global_nodes_z,
+                                 global_nrows, mesh, M);
+
+  for(int i=0; i<mat_init.n; ++i) {
+    mat_init(i);
+  }
+}
+
+template<typename T,
+         typename U>
+void sort_with_companions(ptrdiff_t len, T* array, U* companions)
+{
+  ptrdiff_t i, j, index;
+  U companion;
+
+  for (i=1; i < len; i++) {
+    index = array[i];
+    companion = companions[i];
+    j = i;
+    while ((j > 0) && (array[j-1] > index))
+    {
+      array[j] = array[j-1];
+      companions[j] = companions[j-1];
+      j = j - 1;
+    }
+    array[j] = index;
+    companions[j] = companion;
+  }
+}
+
+template<typename MatrixType>
+void write_matrix(const std::string& filename, 
+                  MatrixType& mat)
+{
+  typedef typename MatrixType::LocalOrdinalType LocalOrdinalType;
+  typedef typename MatrixType::GlobalOrdinalType GlobalOrdinalType;
+  typedef typename MatrixType::ScalarType ScalarType;
+
+  int numprocs = 1, myproc = 0;
+#ifdef HAVE_MPI
+  MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
+  MPI_Comm_rank(MPI_COMM_WORLD, &myproc);
+#endif
+
+  std::ostringstream osstr;
+  osstr << filename << "." << numprocs << "." << myproc;
+  std::string full_name = osstr.str();
+  std::ofstream ofs(full_name.c_str());
+
+  size_t nrows = mat.rows.size();
+  size_t nnz = mat.num_nonzeros();
+
+  for(int p=0; p<numprocs; ++p) {
+    if (p == myproc) {
+      if (p == 0) {
+        ofs << nrows << " " << nnz << std::endl;
+      }
+      for(size_t i=0; i<nrows; ++i) {
+        size_t row_len = 0;
+        GlobalOrdinalType* cols = NULL;
+        ScalarType* coefs = NULL;
+        mat.get_row_pointers(mat.rows[i], row_len, cols, coefs);
+
+        for(size_t j=0; j<row_len; ++j) {
+          ofs << mat.rows[i] << " " << cols[j] << " " << coefs[j] << std::endl;
+        }
+      }
+    }
+#ifdef HAVE_MPI
+    MPI_Barrier(MPI_COMM_WORLD);
+#endif
+  }
+}
+
+template<typename GlobalOrdinal,typename Scalar>
+void
+sum_into_row(int row_len,
+             GlobalOrdinal* row_indices,
+             Scalar* row_coefs,
+             int num_inputs,
+             const GlobalOrdinal* input_indices,
+             const Scalar* input_coefs)
+{
+  for(size_t i=0; i<num_inputs; ++i) {
+    GlobalOrdinal* loc = std::lower_bound(row_indices, row_indices+row_len,
+                                          input_indices[i]);
+    if (loc-row_indices < row_len && *loc == input_indices[i]) {
+//if(flag && *loc==6)
+//std::cout<<"  ("<<*loc<<":"<<row_coefs[loc-row_indices]<<" += "<<input_coefs[i]<<")"<<std::endl;
+      row_coefs[loc-row_indices] += input_coefs[i];
+    }
+  }
+}
+
+template<typename MatrixType>
+void
+sum_into_row(typename MatrixType::GlobalOrdinalType row,
+             size_t num_indices,
+             const typename MatrixType::GlobalOrdinalType* col_inds,
+             const typename MatrixType::ScalarType* coefs,
+             MatrixType& mat)
+{
+  typedef typename MatrixType::GlobalOrdinalType GlobalOrdinal;
+  typedef typename MatrixType::ScalarType Scalar;
+
+  size_t row_len = 0;
+  GlobalOrdinal* mat_row_cols = NULL;
+  Scalar* mat_row_coefs = NULL;
+
+  mat.get_row_pointers(row, row_len, mat_row_cols, mat_row_coefs);
+  if (row_len == 0) return;
+
+  sum_into_row(row_len, mat_row_cols, mat_row_coefs, num_indices, col_inds, coefs);
+}
+
+template<typename MatrixType>
+void
+sum_in_symm_elem_matrix(size_t num,
+                   const typename MatrixType::GlobalOrdinalType* indices,
+                   const typename MatrixType::ScalarType* coefs,
+                   MatrixType& mat)
+{
+  typedef typename MatrixType::ScalarType Scalar;
+  typedef typename MatrixType::GlobalOrdinalType GlobalOrdinal;
+
+//indices is length num (which should be nodes-per-elem)
+//coefs is the upper triangle of the element diffusion matrix
+//which should be length num*(num+1)/2
+//std::cout<<std::endl;
+
+  int row_offset = 0;
+  bool flag = false;
+  for(size_t i=0; i<num; ++i) {
+    GlobalOrdinal row = indices[i];
+ 
+    const Scalar* row_coefs = &coefs[row_offset];
+    const GlobalOrdinal* row_col_inds = &indices[i];
+    size_t row_len = num - i;
+    row_offset += row_len;
+
+    size_t mat_row_len = 0;
+    GlobalOrdinal* mat_row_cols = NULL;
+    Scalar* mat_row_coefs = NULL;
+  
+    mat.get_row_pointers(row, mat_row_len, mat_row_cols, mat_row_coefs);
+    if (mat_row_len == 0) continue;
+
+    sum_into_row(mat_row_len, mat_row_cols, mat_row_coefs,
+                 row_len, row_col_inds, row_coefs);
+
+    int offset = i;
+    for(size_t j=0; j<i; ++j) {
+      Scalar coef = coefs[offset];
+//std::cout<<"i: "<<i<<", j: "<<j<<", offset: "<<offset<<std::endl;
+      sum_into_row(mat_row_len, mat_row_cols, mat_row_coefs,
+                   1, &indices[j], &coef);
+      offset += num - (j+1);
+    }
+  }
+}
+
+template<typename MatrixType>
+void
+sum_in_elem_matrix(size_t num,
+                   const typename MatrixType::GlobalOrdinalType* indices,
+                   const typename MatrixType::ScalarType* coefs,
+                   MatrixType& mat)
+{
+  size_t offset = 0;
+
+  for(size_t i=0; i<num; ++i) {
+    sum_into_row(indices[i], num,
+                 &indices[0], &coefs[offset], mat);
+    offset += num;
+  }
+}
+
+template<typename GlobalOrdinal, typename Scalar,
+         typename MatrixType, typename VectorType>
+void
+sum_into_global_linear_system(ElemData<GlobalOrdinal,Scalar>& elem_data,
+                              MatrixType& A, VectorType& b)
+{
+  sum_in_symm_elem_matrix(elem_data.nodes_per_elem, elem_data.elem_node_ids,
+                     elem_data.elem_diffusion_matrix, A);
+  sum_into_vector(elem_data.nodes_per_elem, elem_data.elem_node_ids,
+                  elem_data.elem_source_vector, b);
+}
+
+#ifdef MINIFE_HAVE_TBB
+template<typename MatrixType>
+void
+sum_in_elem_matrix(size_t num,
+                   const typename MatrixType::GlobalOrdinalType* indices,
+                   const typename MatrixType::ScalarType* coefs,
+                   LockingMatrix<MatrixType>& mat)
+{
+  size_t offset = 0;
+
+  for(size_t i=0; i<num; ++i) {
+    mat.sum_in(indices[i], num, &indices[0], &coefs[offset]);
+    offset += num;
+  }
+}
+
+template<typename GlobalOrdinal, typename Scalar,
+         typename MatrixType, typename VectorType>
+void
+sum_into_global_linear_system(ElemData<GlobalOrdinal,Scalar>& elem_data,
+                              LockingMatrix<MatrixType>& A, LockingVector<VectorType>& b)
+{
+  sum_in_elem_matrix(elem_data.nodes_per_elem, elem_data.elem_node_ids,
+                     elem_data.elem_diffusion_matrix, A);
+  sum_into_vector(elem_data.nodes_per_elem, elem_data.elem_node_ids,
+                  elem_data.elem_source_vector, b);
+}
+#endif
+
+template<typename MatrixType>
+void
+add_to_diagonal(typename MatrixType::ScalarType value, MatrixType& mat)
+{
+  for(size_t i=0; i<mat.rows.size(); ++i) {
+    sum_into_row(mat.rows[i], 1, &mat.rows[i], &value, mat);
+  }
+}
+
+template<typename MatrixType>
+double
+parallel_memory_overhead_MB(const MatrixType& A)
+{
+  typedef typename MatrixType::GlobalOrdinalType GlobalOrdinal;
+  typedef typename MatrixType::LocalOrdinalType LocalOrdinal;
+  double mem_MB = 0;
+
+#ifdef HAVE_MPI
+  double invMB = 1.0/(1024*1024);
+  mem_MB = invMB*A.external_index.size()*sizeof(GlobalOrdinal);
+  mem_MB += invMB*A.external_local_index.size()*sizeof(GlobalOrdinal);
+  mem_MB += invMB*A.elements_to_send.size()*sizeof(GlobalOrdinal);
+  mem_MB += invMB*A.neighbors.size()*sizeof(int);
+  mem_MB += invMB*A.recv_length.size()*sizeof(LocalOrdinal);
+  mem_MB += invMB*A.send_length.size()*sizeof(LocalOrdinal);
+
+  double tmp = mem_MB;
+  MPI_Allreduce(&tmp, &mem_MB, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
+#endif
+
+  return mem_MB;
+}
+
+template<typename MatrixType>
+void rearrange_matrix_local_external(MatrixType& A)
+{
+  //This function will rearrange A so that local entries are contiguous at the front
+  //of A's memory, and external entries are contiguous at the back of A's memory.
+  //
+  //A.row_offsets will describe where the local entries occur, and
+  //A.row_offsets_external will describe where the external entries occur.
+
+  typedef typename MatrixType::GlobalOrdinalType GlobalOrdinal;
+  typedef typename MatrixType::LocalOrdinalType LocalOrdinal;
+  typedef typename MatrixType::ScalarType Scalar;
+
+  size_t nrows = A.rows.size();
+  std::vector<LocalOrdinal> tmp_row_offsets(nrows*2);
+  std::vector<LocalOrdinal> tmp_row_offsets_external(nrows*2);
+
+  LocalOrdinal num_local_nz = 0;
+  LocalOrdinal num_extern_nz = 0;
+
+  //First sort within each row of A, so that local entries come
+  //before external entries within each row.
+  //tmp_row_offsets describe the locations of the local entries, and
+  //tmp_row_offsets_external describe the locations of the external entries.
+  //
+  for(size_t i=0; i<nrows; ++i) {
+    GlobalOrdinal* row_begin = &A.packed_cols[A.row_offsets[i]];
+    GlobalOrdinal* row_end = &A.packed_cols[A.row_offsets[i+1]];
+
+    Scalar* coef_row_begin = &A.packed_coefs[A.row_offsets[i]];
+
+    tmp_row_offsets[i*2] = A.row_offsets[i];
+    tmp_row_offsets[i*2+1] = A.row_offsets[i+1];
+    tmp_row_offsets_external[i*2] = A.row_offsets[i+1];
+    tmp_row_offsets_external[i*2+1] = A.row_offsets[i+1];
+
+    ptrdiff_t row_len = row_end - row_begin;
+
+    sort_with_companions(row_len, row_begin, coef_row_begin);
+
+    GlobalOrdinal* row_iter = std::lower_bound(row_begin, row_end, nrows);
+
+    LocalOrdinal offset = A.row_offsets[i] + row_iter-row_begin;
+    tmp_row_offsets[i*2+1] = offset;
+    tmp_row_offsets_external[i*2] = offset;
+
+    num_local_nz += tmp_row_offsets[i*2+1]-tmp_row_offsets[i*2];
+    num_extern_nz += tmp_row_offsets_external[i*2+1]-tmp_row_offsets_external[i*2];
+  }
+
+  //Next, copy the external entries into separate arrays.
+
+  std::vector<GlobalOrdinal> ext_cols(num_extern_nz);
+  std::vector<Scalar> ext_coefs(num_extern_nz);
+  std::vector<LocalOrdinal> ext_offsets(nrows+1);
+  LocalOrdinal offset = 0;
+  for(size_t i=0; i<nrows; ++i) {
+    ext_offsets[i] = offset;
+    for(LocalOrdinal j=tmp_row_offsets_external[i*2];
+                     j<tmp_row_offsets_external[i*2+1]; ++j) {
+      ext_cols[offset] = A.packed_cols[j];
+      ext_coefs[offset++] = A.packed_coefs[j];
+    }
+  }
+  ext_offsets[nrows] = offset;
+
+  //Now slide all local entries down to the beginning of A's packed arrays
+
+  A.row_offsets.resize(nrows+1);
+  offset = 0;
+  for(size_t i=0; i<nrows; ++i) {
+    A.row_offsets[i] = offset;
+    for(LocalOrdinal j=tmp_row_offsets[i*2]; j<tmp_row_offsets[i*2+1]; ++j) {
+      A.packed_cols[offset] = A.packed_cols[j];
+      A.packed_coefs[offset++] = A.packed_coefs[j];
+    }
+  }
+  A.row_offsets[nrows] = offset;
+
+  //Finally, copy the external entries back into A.packed_cols and
+  //A.packed_coefs, starting at the end of the local entries.
+
+  for(LocalOrdinal i=offset; i<offset+ext_cols.size(); ++i) {
+    A.packed_cols[i] = ext_cols[i-offset];
+    A.packed_coefs[i] = ext_coefs[i-offset];
+  }
+
+  A.row_offsets_external.resize(nrows+1);
+  for(size_t i=0; i<=nrows; ++i) A.row_offsets_external[i] = ext_offsets[i] + offset;
+}
+
+//------------------------------------------------------------------------
+template<typename MatrixType>
+void
+zero_row_and_put_1_on_diagonal(MatrixType& A, typename MatrixType::GlobalOrdinalType row)
+{
+  typedef typename MatrixType::GlobalOrdinalType GlobalOrdinal;
+  typedef typename MatrixType::LocalOrdinalType LocalOrdinal;
+  typedef typename MatrixType::ScalarType Scalar;
+
+  size_t row_len = 0;
+  GlobalOrdinal* cols = NULL;
+  Scalar* coefs = NULL;
+  A.get_row_pointers(row, row_len, cols, coefs);
+  
+  for(size_t i=0; i<row_len; ++i) {
+    if (cols[i] == row) coefs[i] = 1;
+    else coefs[i] = 0;
+  }
+}
+
+//------------------------------------------------------------------------
+template<typename MatrixType,
+         typename VectorType>
+void
+impose_dirichlet(typename MatrixType::ScalarType prescribed_value,
+                    MatrixType& A,
+                    VectorType& b,
+                    int global_nx,
+                    int global_ny,
+                    int global_nz,
+                    const std::set<typename MatrixType::GlobalOrdinalType>& bc_rows)
+{
+  typedef typename MatrixType::GlobalOrdinalType GlobalOrdinal;
+  typedef typename MatrixType::LocalOrdinalType LocalOrdinal;
+  typedef typename MatrixType::ScalarType Scalar;
+
+  GlobalOrdinal first_local_row = A.rows.size()>0 ? A.rows[0] : 0;
+  GlobalOrdinal last_local_row  = A.rows.size()>0 ? A.rows[A.rows.size()-1] : -1;
+
+  typename std::set<GlobalOrdinal>::const_iterator
+    bc_iter = bc_rows.begin(), bc_end = bc_rows.end();
+  for(; bc_iter!=bc_end; ++bc_iter) {
+    GlobalOrdinal row = *bc_iter;
+    if (row >= first_local_row && row <= last_local_row) {
+      size_t local_row = row - first_local_row;
+      b.coefs[local_row] = prescribed_value;
+      zero_row_and_put_1_on_diagonal(A, row);
+    }
+  }
+
+  for(size_t i=0; i<A.rows.size(); ++i) {
+    GlobalOrdinal row = A.rows[i];
+
+    if (bc_rows.find(row) != bc_rows.end()) continue;
+
+    size_t row_length = 0;
+    GlobalOrdinal* cols = NULL;
+    Scalar* coefs = NULL;
+    A.get_row_pointers(row, row_length, cols, coefs);
+
+    Scalar sum = 0;
+    for(size_t j=0; j<row_length; ++j) {
+      if (bc_rows.find(cols[j]) != bc_rows.end()) {
+        sum += coefs[j];
+        coefs[j] = 0;
+      }
+    }
+
+    b.coefs[i] -= sum*prescribed_value;
+  }
+}
+
+static timer_type exchtime = 0;
+
+//------------------------------------------------------------------------
+//Compute matrix vector product y = A*x and return dot(x,y), where:
+//
+// A - input matrix
+// x - input vector
+// y - result vector
+//
+template<typename MatrixType,
+         typename VectorType>
+typename TypeTraits<typename VectorType::ScalarType>::magnitude_type
+matvec_and_dot(MatrixType& A,
+               VectorType& x,
+               VectorType& y)
+{
+  timer_type t0 = mytimer();
+  exchange_externals(A, x);
+  exchtime += mytimer()-t0;
+
+  typedef typename TypeTraits<typename VectorType::ScalarType>::magnitude_type magnitude;
+  typedef typename MatrixType::ScalarType ScalarType;
+  typedef typename MatrixType::GlobalOrdinalType GlobalOrdinalType;
+  typedef typename MatrixType::LocalOrdinalType LocalOrdinalType;
+
+  int n = A.rows.size();
+  const LocalOrdinalType* Arowoffsets = &A.row_offsets[0];
+  const GlobalOrdinalType* Acols      = &A.packed_cols[0];
+  const ScalarType* Acoefs            = &A.packed_coefs[0];
+  const ScalarType* xcoefs = &x.coefs[0];
+        ScalarType* ycoefs = &y.coefs[0];
+  ScalarType beta = 0;
+
+  magnitude result = 0;
+
+  for(int row=0; row<n; ++row) {
+    ScalarType sum = beta*ycoefs[row];
+
+    for(LocalOrdinalType i=Arowoffsets[row]; i<Arowoffsets[row+1]; ++i) {
+      sum += Acoefs[i]*xcoefs[Acols[i]];
+    }
+
+    ycoefs[row] = sum;
+    result += xcoefs[row]*sum;
+  }
+
+#ifdef HAVE_MPI
+  magnitude local_dot = result, global_dot = 0;
+  MPI_Datatype mpi_dtype = TypeTraits<magnitude>::mpi_type();  
+  MPI_Allreduce(&local_dot, &global_dot, 1, mpi_dtype, MPI_SUM, MPI_COMM_WORLD);
+  return global_dot;
+#else
+  return result;
+#endif
+}
+
+//------------------------------------------------------------------------
+//Compute matrix vector product y = A*x where:
+//
+// A - input matrix
+// x - input vector
+// y - result vector
+//
+#if defined(MINIFE_CSR_MATRIX)
+template<typename MatrixType,
+         typename VectorType>
+struct matvec_std {
+void operator()(MatrixType& A,
+            VectorType& x,
+            VectorType& y)
+{
+  exchange_externals(A, x);
+
+  typedef typename MatrixType::ScalarType ScalarType;
+  typedef typename MatrixType::GlobalOrdinalType GlobalOrdinalType;
+  typedef typename MatrixType::LocalOrdinalType LocalOrdinalType;
+
+  int n = A.rows.size();
+  const LocalOrdinalType* Arowoffsets = &A.row_offsets[0];
+  const GlobalOrdinalType* Acols      = &A.packed_cols[0];
+  const ScalarType* Acoefs            = &A.packed_coefs[0];
+  const ScalarType* xcoefs = &x.coefs[0];
+        ScalarType* ycoefs = &y.coefs[0];
+  ScalarType beta = 0;
+
+  for(int row=0; row<n; ++row) {
+    ScalarType sum = beta*ycoefs[row];
+
+    for(LocalOrdinalType i=Arowoffsets[row]; i<Arowoffsets[row+1]; ++i) {
+      sum += Acoefs[i]*xcoefs[Acols[i]];
+    }
+
+    //std::cout << "row[" << row << "] = " << sum << std::endl;
+    ycoefs[row] = sum;
+  }
+}
+};
+#elif defined(MINIFE_ELL_MATRIX)
+template<typename MatrixType,
+         typename VectorType>
+struct matvec_std {
+void operator()(MatrixType& A,
+            VectorType& x,
+            VectorType& y)
+{
+  exchange_externals(A, x);
+
+  typedef typename MatrixType::ScalarType ScalarType;
+  typedef typename MatrixType::GlobalOrdinalType GlobalOrdinalType;
+  typedef typename MatrixType::LocalOrdinalType LocalOrdinalType;
+
+  int row_len = A.num_cols_per_row;
+  int n = A.rows.size();
+  const GlobalOrdinalType* Acols      = &A.cols[0];
+  const ScalarType* Acoefs            = &A.coefs[0];
+  const ScalarType* xcoefs = &x.coefs[0];
+        ScalarType* ycoefs = &y.coefs[0];
+  ScalarType beta = 0;
+
+  for(int row=0; row<n; ++row) {
+    ScalarType sum = beta*ycoefs[row];
+
+    int row_start=row*row_len;
+    int row_end=row_start+row_len;
+    for(LocalOrdinalType i=row_start; i<row_end; ++i) {
+      sum += Acoefs[i]*xcoefs[Acols[i]];
+    }
+
+    ycoefs[row] = sum;
+  }
+}
+};
+#endif
+
+template<typename MatrixType,
+         typename VectorType>
+void matvec(MatrixType& A, VectorType& x, VectorType& y)
+{
+  matvec_std<MatrixType,VectorType> mv;
+  mv(A, x, y);
+}
+
+template<typename MatrixType,
+         typename VectorType>
+struct matvec_overlap {
+void operator()(MatrixType& A,
+                    VectorType& x,
+                    VectorType& y)
+{
+#ifdef HAVE_MPI
+  begin_exchange_externals(A, x);
+#endif
+
+  typedef typename MatrixType::ScalarType ScalarType;
+  typedef typename MatrixType::GlobalOrdinalType GlobalOrdinalType;
+  typedef typename MatrixType::LocalOrdinalType LocalOrdinalType;
+
+
+  int n = A.rows.size();
+  const LocalOrdinalType* Arowoffsets = &A.row_offsets[0];
+  const GlobalOrdinalType* Acols      = &A.packed_cols[0];
+  const ScalarType* Acoefs            = &A.packed_coefs[0];
+  const ScalarType* xcoefs = &x.coefs[0];
+        ScalarType* ycoefs = &y.coefs[0];
+  ScalarType beta = 0;
+
+  for(int row=0; row<n; ++row) {
+    ScalarType sum = beta*ycoefs[row];
+
+    for(LocalOrdinalType i=Arowoffsets[row]; i<Arowoffsets[row+1]; ++i) {
+      sum += Acoefs[i]*xcoefs[Acols[i]];
+    }
+
+    ycoefs[row] = sum;
+  }
+
+#ifdef HAVE_MPI
+  finish_exchange_externals(A.neighbors.size());
+
+  Arowoffsets = &A.row_offsets_external[0];
+  beta = 1;
+
+  for(int row=0; row<n; ++row) {
+    ScalarType sum = beta*ycoefs[row];
+
+    for(LocalOrdinalType i=Arowoffsets[row]; i<Arowoffsets[row+1]; ++i) {
+      sum += Acoefs[i]*xcoefs[Acols[i]];
+    }
+
+    ycoefs[row] = sum;
+  }
+#endif
+}
+};
+
+}//namespace miniFE
+
+#endif
+

Added: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/TypeTraits.hpp
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C%2B%2B/miniFE/TypeTraits.hpp?rev=311411&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/TypeTraits.hpp (added)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/TypeTraits.hpp Mon Aug 21 16:54:28 2017
@@ -0,0 +1,136 @@
+#ifndef _TypeTraits_hpp_
+#define _TypeTraits_hpp_
+
+//@HEADER
+// ************************************************************************
+//
+// MiniFE: Simple Finite Element Assembly and Solve
+// Copyright (2006-2013) Sandia Corporation
+//
+// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
+// license for use of this work by or on behalf of the U.S. Government.
+//
+// This library is free software; you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as
+// published by the Free Software Foundation; either version 2.1 of the
+// License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+// USA
+//
+// ************************************************************************
+//@HEADER
+
+#include <complex>
+
+#ifdef HAVE_MPI
+#include <mpi.h>
+#endif
+
+namespace miniFE {
+
+template<typename T> struct TypeTraits {};
+
+template<>
+struct TypeTraits<float> {
+  typedef float magnitude_type;
+
+  static const char* name() {return "float";}
+
+#ifdef HAVE_MPI
+  static MPI_Datatype mpi_type() {return MPI_FLOAT;}
+#endif
+};
+
+template<>
+struct TypeTraits<double> {
+  typedef double magnitude_type;
+
+  static const char* name() {return "double";}
+
+#ifdef HAVE_MPI
+  static MPI_Datatype mpi_type() {return MPI_DOUBLE;}
+#endif
+};
+
+template<>
+struct TypeTraits<int> {
+  typedef int magnitude_type;
+
+  static const char* name() {return "int";}
+
+#ifdef HAVE_MPI
+  static MPI_Datatype mpi_type() {return MPI_INT;}
+#endif
+};
+
+template<>
+struct TypeTraits<long int> {
+  typedef long int magnitude_type;
+
+  static const char* name() {return "long int";}
+
+#ifdef HAVE_MPI
+  static MPI_Datatype mpi_type() {return MPI_LONG;}
+#endif
+};
+
+#ifndef MINIFE_NO_LONG_LONG
+
+template<>
+struct TypeTraits<long long> {
+  typedef long long magnitude_type;
+
+  static const char* name() {return "long long";}
+
+#ifdef HAVE_MPI
+  static MPI_Datatype mpi_type() {return MPI_LONG_LONG;}
+#endif
+};
+
+#endif
+
+template<>
+struct TypeTraits<unsigned> {
+  typedef unsigned magnitude_type;
+
+  static const char* name() {return "unsigned";}
+
+#ifdef HAVE_MPI
+  static MPI_Datatype mpi_type() {return MPI_UNSIGNED;}
+#endif
+};
+
+template<>
+struct TypeTraits<std::complex<float> > {
+  typedef float magnitude_type;
+
+  static const char* name() {return "std::complex<float>";}
+
+#ifdef HAVE_MPI
+  static MPI_Datatype mpi_type() {return MPI_COMPLEX;}
+#endif
+};
+
+template<>
+struct TypeTraits<std::complex<double> > {
+  typedef double magnitude_type;
+
+  static const char* name() {return "std::complex<double>";}
+
+#ifdef HAVE_MPI
+  static MPI_Datatype mpi_type() {return MPI_DOUBLE_COMPLEX;}
+#endif
+};
+
+}//namespace miniFE
+
+#endif
+

Added: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/Vector.hpp
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C%2B%2B/miniFE/Vector.hpp?rev=311411&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/Vector.hpp (added)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/Vector.hpp Mon Aug 21 16:54:28 2017
@@ -0,0 +1,67 @@
+#ifndef _Vector_hpp_
+#define _Vector_hpp_
+
+//@HEADER
+// ************************************************************************
+//
+// MiniFE: Simple Finite Element Assembly and Solve
+// Copyright (2006-2013) Sandia Corporation
+//
+// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
+// license for use of this work by or on behalf of the U.S. Government.
+//
+// This library is free software; you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as
+// published by the Free Software Foundation; either version 2.1 of the
+// License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+// USA
+//
+// ************************************************************************
+//@HEADER
+
+#include <vector>
+
+namespace miniFE {
+
+
+template<typename Scalar,
+         typename LocalOrdinal,
+         typename GlobalOrdinal>
+struct Vector {
+  typedef Scalar ScalarType;
+  typedef LocalOrdinal LocalOrdinalType;
+  typedef GlobalOrdinal GlobalOrdinalType;
+
+  Vector(GlobalOrdinal startIdx, LocalOrdinal local_sz)
+   : startIndex(startIdx),
+     local_size(local_sz),
+     coefs(local_size)
+  {
+    for(size_t i=0; i < local_size; ++i) {
+    	coefs[i] = 0;
+    }
+  }
+
+  ~Vector()
+  {
+  }
+
+  GlobalOrdinal startIndex;
+  LocalOrdinal local_size;
+  std::vector<Scalar> coefs;
+};
+
+
+}//namespace miniFE
+
+#endif
+

Added: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/Vector_functions.hpp
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C%2B%2B/miniFE/Vector_functions.hpp?rev=311411&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/Vector_functions.hpp (added)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/Vector_functions.hpp Mon Aug 21 16:54:28 2017
@@ -0,0 +1,231 @@
+#ifndef _Vector_functions_hpp_
+#define _Vector_functions_hpp_
+
+//@HEADER
+// ************************************************************************
+//
+// MiniFE: Simple Finite Element Assembly and Solve
+// Copyright (2006-2013) Sandia Corporation
+//
+// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
+// license for use of this work by or on behalf of the U.S. Government.
+//
+// This library is free software; you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as
+// published by the Free Software Foundation; either version 2.1 of the
+// License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+// USA
+//
+// ************************************************************************
+//@HEADER
+
+#include <vector>
+#include <sstream>
+#include <fstream>
+
+#ifdef HAVE_MPI
+#include <mpi.h>
+#endif
+
+#ifdef MINIFE_HAVE_TBB
+#include <LockingVector.hpp>
+#endif
+
+#include <TypeTraits.hpp>
+#include <Vector.hpp>
+
+namespace miniFE {
+
+
+template<typename VectorType>
+void write_vector(const std::string& filename,
+                  const VectorType& vec)
+{
+  int numprocs = 1, myproc = 0;
+#ifdef HAVE_MPI
+  MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
+  MPI_Comm_rank(MPI_COMM_WORLD, &myproc);
+#endif
+
+  std::ostringstream osstr;
+  osstr << filename << "." << numprocs << "." << myproc;
+  std::string full_name = osstr.str();
+  std::ofstream ofs(full_name.c_str());
+
+  typedef typename VectorType::ScalarType ScalarType;
+
+  const std::vector<ScalarType>& coefs = vec.coefs;
+  for(int p=0; p<numprocs; ++p) {
+    if (p == myproc) {
+      if (p == 0) {
+        ofs << vec.local_size << std::endl;
+      }
+  
+      typename VectorType::GlobalOrdinalType first = vec.startIndex;
+      for(size_t i=0; i<vec.local_size; ++i) {
+        ofs << first+i << " " << coefs[i] << std::endl;
+      }
+    }
+#ifdef HAVE_MPI
+    MPI_Barrier(MPI_COMM_WORLD);
+#endif
+  }
+}
+
+template<typename VectorType>
+void sum_into_vector(size_t num_indices,
+                     const typename VectorType::GlobalOrdinalType* indices,
+                     const typename VectorType::ScalarType* coefs,
+                     VectorType& vec)
+{
+  typedef typename VectorType::GlobalOrdinalType GlobalOrdinal;
+  typedef typename VectorType::ScalarType Scalar;
+
+  GlobalOrdinal first = vec.startIndex;
+  GlobalOrdinal last = first + vec.local_size - 1;
+
+  std::vector<Scalar>& vec_coefs = vec.coefs;
+
+  for(size_t i=0; i<num_indices; ++i) {
+    if (indices[i] < first || indices[i] > last) continue;
+    size_t idx = indices[i] - first;
+    vec_coefs[idx] += coefs[i];
+  }
+}
+
+#ifdef MINIFE_HAVE_TBB
+template<typename VectorType>
+void sum_into_vector(size_t num_indices,
+                     const typename VectorType::GlobalOrdinalType* indices,
+                     const typename VectorType::ScalarType* coefs,
+                     LockingVector<VectorType>& vec)
+{
+  vec.sum_in(num_indices, indices, coefs);
+}
+#endif
+
+//------------------------------------------------------------
+//Compute the update of a vector with the sum of two scaled vectors where:
+//
+// w = alpha*x + beta*y
+//
+// x,y - input vectors
+//
+// alpha,beta - scalars applied to x and y respectively
+//
+// w - output vector
+//
+template<typename VectorType>
+void
+  waxpby(typename VectorType::ScalarType alpha, const VectorType& x,
+         typename VectorType::ScalarType beta, const VectorType& y,
+         VectorType& w)
+{
+  typedef typename VectorType::ScalarType ScalarType;
+
+#ifdef MINIFE_DEBUG
+  if (y.local_size < x.local_size || w.local_size < x.local_size) {
+    std::cerr << "miniFE::waxpby ERROR, y and w must be at least as long as x." << std::endl;
+    return;
+  }
+#endif
+
+  int n = x.coefs.size();
+  const ScalarType* xcoefs = &x.coefs[0];
+  const ScalarType* ycoefs = &y.coefs[0];
+        ScalarType* wcoefs = &w.coefs[0];
+
+  for(int i=0; i<n; ++i) {
+    wcoefs[i] = alpha*xcoefs[i] + beta*ycoefs[i];
+  }
+}
+
+//Like waxpby above, except operates on two sets of arguments.
+//In other words, performs two waxpby operations in one loop.
+template<typename VectorType>
+void
+  fused_waxpby(typename VectorType::ScalarType alpha, const VectorType& x,
+         typename VectorType::ScalarType beta, const VectorType& y,
+         VectorType& w,
+         typename VectorType::ScalarType alpha2, const VectorType& x2,
+         typename VectorType::ScalarType beta2, const VectorType& y2,
+         VectorType& w2)
+{
+  typedef typename VectorType::ScalarType ScalarType;
+
+#ifdef MINIFE_DEBUG
+  if (y.local_size < x.local_size || w.local_size < x.local_size) {
+    std::cerr << "miniFE::waxpby ERROR, y and w must be at least as long as x." << std::endl;
+    return;
+  }
+#endif
+
+  int n = x.coefs.size();
+  const ScalarType* xcoefs = &x.coefs[0];
+  const ScalarType* ycoefs = &y.coefs[0];
+        ScalarType* wcoefs = &w.coefs[0];
+
+  const ScalarType* x2coefs = &x2.coefs[0];
+  const ScalarType* y2coefs = &y2.coefs[0];
+        ScalarType* w2coefs = &w2.coefs[0];
+
+  for(int i=0; i<n; ++i) {
+    wcoefs[i] = alpha*xcoefs[i] + beta*ycoefs[i];
+    w2coefs[i] = alpha2*x2coefs[i] + beta2*y2coefs[i];
+  }
+}
+
+//-----------------------------------------------------------
+//Compute the dot product of two vectors where:
+//
+// x,y - input vectors
+//
+// result - return-value
+//
+template<typename Vector>
+typename TypeTraits<typename Vector::ScalarType>::magnitude_type
+  dot(const Vector& x,
+      const Vector& y)
+{
+  int n = x.coefs.size();
+
+#ifdef MINIFE_DEBUG
+  if (y.local_size < n) {
+    std::cerr << "miniFE::dot ERROR, y must be at least as long as x."<<std::endl;
+    n = y.local_size;
+  }
+#endif
+
+  typedef typename Vector::ScalarType Scalar;
+  typedef typename TypeTraits<typename Vector::ScalarType>::magnitude_type magnitude;
+
+  const Scalar* xcoefs = &x.coefs[0];
+  const Scalar* ycoefs = &y.coefs[0];
+  magnitude result = 0;
+  for(int i=0; i<n; ++i) {
+    result += xcoefs[i]*ycoefs[i];
+  }
+
+#ifdef HAVE_MPI
+  magnitude local_dot = result, global_dot = 0;
+  MPI_Datatype mpi_dtype = TypeTraits<magnitude>::mpi_type();  
+  MPI_Allreduce(&local_dot, &global_dot, 1, mpi_dtype, MPI_SUM, MPI_COMM_WORLD);
+  return global_dot;
+#else
+  return result;
+#endif
+}
+
+}//namespace miniFE
+
+#endif
+

Added: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/YAML_Doc.cpp
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C%2B%2B/miniFE/YAML_Doc.cpp?rev=311411&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/YAML_Doc.cpp (added)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/YAML_Doc.cpp Mon Aug 21 16:54:28 2017
@@ -0,0 +1,77 @@
+#include <ctime>
+#include <cstdlib>
+#include <ctime>
+#include <vector>
+#include <iostream>
+#include <fstream>
+#include <sstream>
+#ifdef REDSTORM
+#include <time.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#endif
+#include "YAML_Doc.hpp"
+using namespace std;
+
+//set the microapp_name and version which will become part of the YAML doc.
+YAML_Doc::YAML_Doc(const std::string& miniApp_Name, const std::string& miniApp_Version, const std::string& destination_Directory, const std::string& destination_FileName){
+  miniAppName = miniApp_Name;
+  miniAppVersion = miniApp_Version;
+  destinationDirectory = destination_Directory;
+  destinationFileName = destination_FileName;
+}
+
+//inherits the destructor from YAML_Element
+YAML_Doc::~YAML_Doc(void){
+}
+
+/*
+* generates YAML from the elements of the document and saves it
+* to a file
+*/
+string YAML_Doc::generateYAML(){
+  string yaml;
+  yaml =  yaml + "Mini-Application Name: " + miniAppName + "\n";
+  yaml =  yaml + "Mini-Application Version: " + miniAppVersion + "\n";
+  for(size_t i=0; i<children.size(); i++){
+    yaml = yaml + children[i]->printYAML("");
+  }
+  
+  time_t rawtime;
+  tm * ptm;
+  time ( &rawtime );
+  ptm = localtime(&rawtime);
+  char sdate[25];
+  //use tm_mon+1 because tm_mon is 0 .. 11 instead of 1 .. 12
+  sprintf (sdate,"%04d:%02d:%02d-%02d:%02d:%02d",ptm->tm_year + 1900, ptm->tm_mon+1,
+    ptm->tm_mday, ptm->tm_hour, ptm->tm_min,ptm->tm_sec);
+
+  string filename;
+  if (destinationFileName=="") 
+    filename = miniAppName + "-" + miniAppVersion + "_";
+  else 
+    filename = destinationFileName;
+  filename = filename + string(sdate) + ".yaml";
+  if (destinationDirectory!="" && destinationDirectory!=".") {
+    string mkdir_cmd = "mkdir " + destinationDirectory;
+#ifdef REDSTORM
+    mkdir(destinationDirectory.c_str(),0755);
+#else
+    system(mkdir_cmd.c_str());
+#endif
+    filename = destinationDirectory + "/" + destinationFileName;
+  }
+  else 
+    filename = "./" + filename;
+
+#ifdef GENERATE_YAML
+  ofstream myfile;
+  myfile.open(filename.c_str());
+  myfile << yaml;
+  myfile.close();
+#endif
+
+  return yaml;
+}
+
+

Added: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/YAML_Doc.hpp
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C%2B%2B/miniFE/YAML_Doc.hpp?rev=311411&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/YAML_Doc.hpp (added)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/YAML_Doc.hpp Mon Aug 21 16:54:28 2017
@@ -0,0 +1,122 @@
+//@HEADER
+// ************************************************************************
+// 
+//               Mantevo: A collection of mini-applications for HPC
+//                 Copyright (2008) Sandia Corporation
+// 
+// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
+// license for use of this work by or on behalf of the U.S. Government.
+// 
+// This library is free software; you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as
+// published by the Free Software Foundation; either version 2.1 of the
+// License, or (at your option) any later version.
+//  
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// Lesser General Public License for more details.
+//  
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+// USA
+// Questions? Contact Michael A. Heroux (maherou at sandia.gov) 
+// 
+// ************************************************************************
+//@HEADER
+
+// Changelog
+//
+// Version 0.1
+// - Initial version.
+//
+/////////////////////////////////////////////////////////////////////////
+
+#ifndef YAML_DOC_H
+#define YAML_DOC_H
+#include <string>
+#include <vector>
+#include "YAML_Element.hpp"
+
+//! The Mantevo YAML_Doc class for the uniform collecting and reporting of performance data for mini-applications
+
+/*!
+
+The YAML_Doc class works in conjuction with the YAML_Element class to facilitate easy collecting and reporting of YAML-formatted
+data that can be then registered with the Mantevo results collection website.
+
+\code
+
+//EXAMPLE CODE FOR GENERATING YAML
+
+  YAML_Doc doc("hpccg","1.0");
+  doc.add("final_residual",1.4523e-13);
+  doc.add("time","4.893"); 
+ 
+//note: the following line will remove the data (4.890) associated with "time"
+  doc.get("time")->add("total",4.243);
+
+//note:  the following line will likewise remove the data (1.243) associated with "time"
+  doc.get("time")->get("total")->add("time",2.457);
+  doc.get("time")->get("total")->add("flops",4.88e5);
+  doc.get("time")->add("ddot",1.243);
+  doc.get("time")->add("sparsemv","");
+  doc.get("time")->get("sparsemv")->add("time",0.3445);
+  doc.get("time")->get("sparsemv")->add("overhead","");
+  doc.get("time")->get("sparsemv")->get("overhead")->add("time",0.0123);
+  doc.get("time")->get("sparsemv")->get("overhead")->add("percentage",0.034);
+  cout << doc.generateYAML() << endl; 
+  return 0;
+
+\endcode
+
+Below is the output generated by the above code:
+
+\verbatim
+
+final_residual: 1.4523e-13
+time: 
+  total:
+    time: 2.457
+    flops: 4.88e5
+  ddot: 1.243
+  sparsemv:
+    time: 0.3445
+    overhead:
+      time: 0.0123
+      percentage: 0.034
+
+\endverbatim
+
+\note {No value is allowed to be attached to a key that has children.  If children are added to a key, the value is simply set to "".}
+
+*/
+class YAML_Doc: public YAML_Element {
+  public:
+  //! Constructor: accepts mini-application name and version as strings, optionally accepts directory and file name for printing results.
+  /*!
+    The sole constructor for this class accepts and name and version number for the mini-application as well as optional directory 
+    and file name information for results that are generated by the generateYAML() method.
+    \param miniApp_Name (in) string containing name of the mini-application
+    \param miniApp_Version (in) string containing the version of the mini-application
+    \param destination_Directory (in, optional) path of diretory where results file will be stored, relative to current working directory. 
+           If this value is not supplied, the results file will be stored in the current working directory.  If the directory does not exist
+	   it will be created.
+    \param destination_FileName (in, optional) root name of the results file.  A suffix of ".yaml" will be automatically appended.  If no
+           file name is specified the filename will be constructed by concatenating the miniAppName + miniAppVersion + ".yaml" strings.
+  */
+  YAML_Doc(const std::string& miniApp_Name, const std::string& miniApp_Version, const std::string& destination_Directory = "", const std::string& destination_FileName = "");
+  //! Destructor
+  ~YAML_Doc();
+  //! Generate YAML results to standard out and to a file using specified directory and filename, using current directory and miniAppName + miniAppVersion + ".yaml" by default
+  std::string generateYAML();
+
+protected:
+  std::string miniAppName;
+  std::string miniAppVersion;
+  std::string destinationDirectory;
+  std::string destinationFileName;
+};
+#endif /* YAML_DOC_H */
+

Added: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/YAML_Element.cpp
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C%2B%2B/miniFE/YAML_Element.cpp?rev=311411&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/YAML_Element.cpp (added)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/YAML_Element.cpp Mon Aug 21 16:54:28 2017
@@ -0,0 +1,120 @@
+#include <vector>
+#include <iostream>
+#include <fstream>
+#include <sstream>
+#include "YAML_Element.hpp"
+using namespace std;
+YAML_Element::YAML_Element(const std::string& key_arg, const std::string& value_arg){
+  key = key_arg;
+  value = value_arg;
+}
+
+YAML_Element::~YAML_Element(){
+  for (size_t i=0; i<children.size(); i++) {
+    delete children[i];
+  }
+  children.clear();
+}
+
+/*
+* Add an element to the vector
+* QUESTION: if an element is not added because the key already exists,
+* will this lead to memory leakage?
+*/
+YAML_Element* YAML_Element::add(const std::string& key_arg, double value_arg) {
+  this->value = "";
+  string converted_value = convert_double_to_string(value_arg);
+  YAML_Element* element = new YAML_Element(key_arg,converted_value);
+  children.push_back(element);
+  return element;
+}
+
+YAML_Element* YAML_Element::add(const std::string& key_arg, int value_arg) {
+  this->value = "";
+  string converted_value = convert_int_to_string(value_arg);
+  YAML_Element* element = new YAML_Element(key_arg,converted_value);
+  children.push_back(element);
+  return element;
+}
+
+#ifndef MINIFE_NO_LONG_LONG
+
+YAML_Element* YAML_Element::add(const std::string& key_arg, long long value_arg) {
+  this->value = "";
+  string converted_value = convert_long_long_to_string(value_arg);
+  YAML_Element* element = new YAML_Element(key_arg,converted_value);
+  children.push_back(element);
+  return element;
+}
+
+#endif
+
+YAML_Element* YAML_Element::add(const std::string& key_arg, size_t value_arg) {
+  this->value = "";
+  string converted_value = convert_size_t_to_string(value_arg);
+  YAML_Element* element = new YAML_Element(key_arg,converted_value);
+  children.push_back(element);
+  return element;
+}
+
+YAML_Element* YAML_Element::add(const std::string& key_arg, const std::string& value_arg) {
+  this->value = "";
+  YAML_Element* element = new YAML_Element(key_arg, value_arg);
+  children.push_back(element);
+  return element;
+}
+
+/*
+* returns pointer to the YAML_Element for the given key.
+* I, cam, believe an exception should be thrown if there is no
+* element in the vector for the specified key
+*/
+YAML_Element* YAML_Element::get(const std::string& key_arg) {
+  for (size_t i=0; i<children.size(); i++) {
+    if(children[i]->getKey() == key_arg){
+      return children[i];
+    }
+  }
+  return 0;
+}
+
+/*
+* prints a line of a YAML document.  Correct YAML depends on
+* correct spacing; the parameter space should be the proper
+* amount of space for the parent element
+*/
+string YAML_Element::printYAML(std::string space){
+  string yaml_line = space + key + ": " + value + "\n";
+  for(int i=0; i<2; i++) space = space + " ";
+  for(size_t i=0; i<children.size(); i++){
+    yaml_line = yaml_line + children[i]->printYAML(space);
+  }
+  return yaml_line;
+}
+
+string YAML_Element::convert_double_to_string(double value_arg){
+  stringstream strm;
+  strm << value_arg;
+  return strm.str();
+}
+string YAML_Element::convert_int_to_string(int value_arg){
+  stringstream strm;
+  strm << value_arg;
+  return strm.str();
+}
+
+#ifndef MINIFE_NO_LONG_LONG
+
+string YAML_Element::convert_long_long_to_string(long long value_arg){
+  stringstream strm;
+  strm << value_arg;
+  return strm.str();
+}
+
+#endif
+
+string YAML_Element::convert_size_t_to_string(size_t value_arg){
+  stringstream strm;
+  strm << value_arg;
+  return strm.str();
+}

Added: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/YAML_Element.hpp
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C%2B%2B/miniFE/YAML_Element.hpp?rev=311411&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/YAML_Element.hpp (added)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/YAML_Element.hpp Mon Aug 21 16:54:28 2017
@@ -0,0 +1,86 @@
+//@HEADER
+// ************************************************************************
+// 
+//               Mantevo: A collection of mini-applications for HPC
+//                 Copyright (2008) Sandia Corporation
+// 
+// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
+// license for use of this work by or on behalf of the U.S. Government.
+// 
+// This library is free software; you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as
+// published by the Free Software Foundation; either version 2.1 of the
+// License, or (at your option) any later version.
+//  
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// Lesser General Public License for more details.
+//  
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+// USA
+// Questions? Contact Michael A. Heroux (maherou at sandia.gov) 
+// 
+// ************************************************************************
+//@HEADER
+
+// Changelog
+//
+// Version 0.1
+// - Initial version.
+//
+/////////////////////////////////////////////////////////////////////////
+
+#ifndef YAML_ELEMENT_H
+#define YAML_ELEMENT_H
+#include <string>
+#include <vector>
+//! The Mantevo YAML_Element class for registering key-value pairs of performance data
+
+/*!
+  Mantevo mini-applications generate a collection of performance data for each run of the executable.  YAML_Element, and
+  the related YAML_Doc class, provide a uniform facility for gathering and reporting this data using the YAML text format.
+*/
+class YAML_Element {
+  public:
+
+  //! Default constructor.
+  YAML_Element (){key="";value="";}
+  //! Construct with known key-value pair
+  YAML_Element (const std::string& key_arg, const std::string& value_arg);
+  //! Destructor
+  ~YAML_Element ();
+  //! Key accessor method
+  std::string getKey(){return key;}
+  //! Add a child element to an element list associated with this element, value of type double
+  YAML_Element* add(const std::string& key_arg, double value_arg);
+  //! Add a child element to an element list associated with this element, value of type int
+  YAML_Element* add(const std::string& key_arg, int value_arg);
+#ifndef MINIFE_NO_LONG_LONG
+  //! Add a child element to an element list associated with this element, value of type long long
+  YAML_Element* add(const std::string& key_arg, long long value_arg);
+#endif
+  //! Add a child element to an element list associated with this element, value of type size_t
+  YAML_Element* add(const std::string& key_arg, size_t value_arg);
+  //! Add a child element to an element list associated with this element, value of type string
+  YAML_Element* add(const std::string& key_arg, const std::string& value_arg);
+  //! get the element in the list with the given key
+  YAML_Element* get(const std::string& key_arg);
+  std::string printYAML(std::string space);
+  
+protected:
+  std::string key;
+  std::string value;
+  std::vector<YAML_Element*> children;
+
+private:
+  std::string convert_double_to_string(double value_arg);
+  std::string convert_int_to_string(int value_arg);
+#ifndef MINIFE_NO_LONG_LONG
+  std::string convert_long_long_to_string(long long value_arg);
+#endif
+  std::string convert_size_t_to_string(size_t value_arg);
+};
+#endif /* YAML_ELEMENT_H */

Added: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/analytic_soln.hpp
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C%2B%2B/miniFE/analytic_soln.hpp?rev=311411&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/analytic_soln.hpp (added)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/analytic_soln.hpp Mon Aug 21 16:54:28 2017
@@ -0,0 +1,116 @@
+#ifndef _analytic_soln_hpp_
+#define _analytic_soln_hpp_
+
+//@HEADER
+// ************************************************************************
+//
+// MiniFE: Simple Finite Element Assembly and Solve
+// Copyright (2006-2013) Sandia Corporation
+//
+// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
+// license for use of this work by or on behalf of the U.S. Government.
+//
+// This library is free software; you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as
+// published by the Free Software Foundation; either version 2.1 of the
+// License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+// USA
+//
+// ************************************************************************
+//@HEADER
+
+#include <cmath>
+
+#ifndef MINIFE_SCALAR
+#define MINIFE_SCALAR double;
+#endif
+
+namespace miniFE {
+
+typedef MINIFE_SCALAR Scalar;
+
+// The 'soln' function below computes the analytic solution for
+// steady state temperature in a brick-shaped domain (formally called
+// a rectangular parallelepiped). The inputs to the function are
+// the x,y,z coordinates of the point at which temperature is to be
+// computed, and the number of terms p,q in the series expansion.
+//
+// The equations used for the temperature solution are equations 9 and 10
+// in section 6.2 of Carslaw & Jaeger, "Conduction of Heat in Solids".
+//
+// The paralellepiped being used is defined by this domain:
+// 0 <= x <= 1.0
+// 0 <= y <= 1.0
+// 0 <= z <= 1.0
+//
+// With boundary conditions prescribing the temperature to be 1.0 on
+// the x==1.0 face, and 0.0 on all other faces.
+//
+// Thus, in the equations from Carslaw & Jaeger, the following constants
+// are used:
+//
+// a == b == c == 1.0  (the extents of the domain)
+// v1 == 0.0           (temperature at x == 0.0)
+// v2 == 1.0           (temperature at x == 1.0)
+//
+
+const Scalar PI = 3.141592653589793238462;
+const Scalar PI_SQR = PI*PI;
+const Scalar term0 = 16.0/(PI_SQR);
+
+inline Scalar fcn_l(int p, int q)
+{
+  return std::sqrt((2*p+1)*(2*p+1)*PI_SQR + (2*q+1)*(2*q+1)*PI_SQR);
+}
+
+inline Scalar fcn(int n, Scalar u)
+{
+  return (2*n+1)*PI*u;
+}
+
+inline Scalar soln(Scalar x, Scalar y, Scalar z, int max_p, int max_q)
+{
+  Scalar sum = 0;
+  for(int p=0; p<=max_p; ++p) {
+    const Scalar p21y = fcn(p, y);
+    const Scalar sin_py = std::sin(p21y)/(2*p+1);
+    for(int q=0; q<=max_q; ++q) {
+      const Scalar q21z = fcn(q, z);
+      const Scalar sin_qz = std::sin(q21z)/(2*q+1);
+
+      const Scalar l = fcn_l(p, q);
+
+      const Scalar sinh1 = std::sinh(l*x);
+      const Scalar sinh2 = std::sinh(l);
+
+      const Scalar tmp = (sinh1*sin_py)*(sin_qz/sinh2);
+
+      //if the scalar l gets too big, sinh(l) becomes inf.
+      //if that happens, tmp is a NaN.
+      //crude check for NaN:
+      //if tmp != tmp, tmp is NaN
+      if (tmp == tmp) {
+        sum += tmp;
+      }
+      else {
+        //if we got a NaN, break out of this inner loop and go to
+        //the next iteration of the outer loop.
+        break;
+      }
+    }
+  }
+  return term0*sum;
+}
+
+}//namespace miniFE
+
+#endif /* _analytic_soln_hpp_ */

Added: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/assemble_FE_data.hpp
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C%2B%2B/miniFE/assemble_FE_data.hpp?rev=311411&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/assemble_FE_data.hpp (added)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/assemble_FE_data.hpp Mon Aug 21 16:54:28 2017
@@ -0,0 +1,78 @@
+#ifndef _assemble_FE_data_hpp_
+#define _assemble_FE_data_hpp_
+
+//@HEADER
+// ************************************************************************
+//
+// MiniFE: Simple Finite Element Assembly and Solve
+// Copyright (2006-2013) Sandia Corporation
+//
+// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
+// license for use of this work by or on behalf of the U.S. Government.
+//
+// This library is free software; you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as
+// published by the Free Software Foundation; either version 2.1 of the
+// License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+// USA
+//
+// ************************************************************************
+//@HEADER
+
+#include <box_utils.hpp>
+#include <simple_mesh_description.hpp>
+
+#include <perform_element_loop.hpp>
+
+namespace miniFE {
+
+template<typename MatrixType,
+         typename VectorType>
+void
+assemble_FE_data(const simple_mesh_description<typename MatrixType::GlobalOrdinalType>& mesh,
+                 MatrixType& A,
+                 VectorType& b,
+                 Parameters& params)
+{
+  typedef typename MatrixType::GlobalOrdinalType GlobalOrdinal;
+
+  int global_elems_x = mesh.global_box[0][1];
+  int global_elems_y = mesh.global_box[1][1];
+  int global_elems_z = mesh.global_box[2][1];
+
+  Box local_elem_box;
+  copy_box(mesh.local_box, local_elem_box);
+
+  if (get_num_ids<GlobalOrdinal>(local_elem_box) < 1) {
+    return;
+  }
+
+  //
+  //We want the element-loop to loop over our (processor-local) domain plus a
+  //ghost layer, so we can assemble the complete linear-system without doing
+  //any communication.
+  //
+  int ghost = 1;
+  if (local_elem_box[0][0] > 0) local_elem_box[0][0] -= ghost;
+  if (local_elem_box[1][0] > 0) local_elem_box[1][0] -= ghost;
+  if (local_elem_box[2][0] > 0) local_elem_box[2][0] -= ghost;
+  if (local_elem_box[0][1] < global_elems_x) local_elem_box[0][1] += ghost;
+  if (local_elem_box[1][1] < global_elems_y) local_elem_box[1][1] += ghost;
+  if (local_elem_box[2][1] < global_elems_z) local_elem_box[2][1] += ghost;
+
+  perform_element_loop(mesh, local_elem_box, A, b, params);
+}
+                      
+}//namespace miniFE
+
+#endif
+

Added: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/box_utils.hpp
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C%2B%2B/miniFE/box_utils.hpp?rev=311411&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/box_utils.hpp (added)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/box_utils.hpp Mon Aug 21 16:54:28 2017
@@ -0,0 +1,320 @@
+#ifndef _box_utils_hpp_
+#define _box_utils_hpp_
+
+//@HEADER
+// ************************************************************************
+//
+// MiniFE: Simple Finite Element Assembly and Solve
+// Copyright (2006-2013) Sandia Corporation
+//
+// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
+// license for use of this work by or on behalf of the U.S. Government.
+//
+// This library is free software; you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as
+// published by the Free Software Foundation; either version 2.1 of the
+// License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+// USA
+//
+// ************************************************************************
+//@HEADER
+
+#include <vector>
+#include <set>
+#include <map>
+
+#ifdef HAVE_MPI
+#include <mpi.h>
+#endif
+
+#include <TypeTraits.hpp>
+#include <Box.hpp>
+
+namespace miniFE {
+
+inline void copy_box(const Box& from_box, Box& to_box)
+{
+  for(int i=0; i<3; ++i) {
+    to_box[i][0] = from_box[i][0];
+    to_box[i][1] = from_box[i][1];
+  }
+}
+
+template<typename GlobalOrdinal>
+#ifdef __CUDACC__
+__host__ __device__ __inline__
+#endif
+void get_int_coords(GlobalOrdinal ID, int nx, int ny, int nz,
+                int& x, int& y, int& z)
+{
+  z = ID/(nx*ny);
+  y = (ID%(nx*ny))/nx;
+  x = ID%nx;
+}
+
+template<typename GlobalOrdinal,typename Scalar>
+#ifdef __CUDACC__
+__host__ __device__ __inline__
+#endif
+void get_coords(GlobalOrdinal ID, int nx, int ny, int nz,
+                Scalar& x, Scalar& y, Scalar& z)
+{
+  const int xdiv = nx>1 ? nx-1 : 1;
+  const int ydiv = ny>1 ? ny-1 : 1;
+  const int zdiv = nz>1 ? nz-1 : 1;
+
+//This code assumes that ID is 0-based.
+//
+//compute coordinates that lie on (or in) the unit cube.
+//that's why we're dividing by nz,ny,nx:
+  z = (1.0*(ID/(nx*ny)))/zdiv;
+  y = 1.0*((ID%(nx*ny))/nx)/ydiv;
+  x = 1.0*(ID%nx)/xdiv;
+}
+
+template<typename GlobalOrdinal>
+GlobalOrdinal get_num_ids(const Box& box)
+{
+  int nx = box[0][1] - box[0][0];
+  int ny = box[1][1] - box[1][0];
+  int nz = box[2][1] - box[2][0];
+  GlobalOrdinal tmp = nx*ny;
+  tmp *= nz;
+  return tmp;
+}
+
+template<typename GlobalOrdinal>
+#ifdef __CUDACC__
+__host__ __device__ __inline__
+#endif
+GlobalOrdinal get_id(int nx, int ny, int nz,
+                     int x, int y, int z)
+{
+  if (x<0 || y<0 || z<0) return -1;
+  if (x>=nx || y>=ny || z>=nz) return -1;
+
+  //form x + nx*y + nx*ny*z:
+
+  GlobalOrdinal tmp = nx*ny;
+  tmp *= z;
+  tmp = x + nx * y + tmp;
+  return tmp;
+}
+
+template<typename GlobalOrdinal>
+void get_ids(int nx, int ny, int nz,
+             const Box& box,
+             std::vector<GlobalOrdinal>& ids,
+             bool include_ghost_layer=false)
+{
+  ids.clear();
+  int minz = box[2][0];
+  int maxz = box[2][1];
+  int miny = box[1][0];
+  int maxy = box[1][1];
+  int minx = box[0][0];
+  int maxx = box[0][1];
+
+  if (include_ghost_layer) {
+    if (minz > 0) minz--;
+    if (miny > 0) miny--;
+    if (minx > 0) minx--;
+    if (maxz < nz) maxz++;
+    if (maxy < ny) maxy++;
+    if (maxx < nx) maxx++;
+  }
+
+  size_t ids_size = ((maxz - minz) * (maxy - miny)) * (maxx - minx);
+  ids.reserve(ids_size);
+
+  for(int z=minz; z<maxz; ++z) {
+    for(int y=miny; y<maxy; ++y) {
+      for(int x=minx; x<maxx; ++x) {
+        ids.push_back(get_id<GlobalOrdinal>(nx, ny, nz, x, y, z));
+      }
+    }
+  }
+}
+
+template<typename GlobalOrdinal>
+void get_ghost_ids(int nx, int ny, int nz,
+             const Box& box,
+             std::vector<GlobalOrdinal>& ids)
+{
+  ids.clear();
+  int minz,maxz,miny,maxy,minx,maxx;
+  int orig_minz = minz = box[2][0];
+  int orig_maxz = maxz = box[2][1];
+  int orig_miny = miny = box[1][0];
+  int orig_maxy = maxy = box[1][1];
+  int orig_minx = minx = box[0][0];
+  int orig_maxx = maxx = box[0][1];
+
+  if (minz > 0) minz--;
+  if (miny > 0) miny--;
+  if (minx > 0) minx--;
+  if (maxz < nz) maxz++;
+  if (maxy < ny) maxy++;
+  if (maxx < nx) maxx++;
+
+  for(int z=minz; z<maxz; ++z) {
+    for(int y=miny; y<maxy; ++y) {
+      for(int x=minx; x<maxx; ++x) {
+        bool x_in_ghost_layer = (x < orig_minx) || (x >= orig_maxx);
+        bool y_in_ghost_layer = (y < orig_miny) || (y >= orig_maxy);
+        bool z_in_ghost_layer = (z < orig_minz) || (z >= orig_maxz);
+        //we are in the ghost layer if any one of x,y,z are in the ghost layer
+        if (!x_in_ghost_layer && !y_in_ghost_layer && !z_in_ghost_layer) continue;
+        ids.push_back(get_id<GlobalOrdinal>(nx, ny, nz, x, y, z));
+      }
+    }
+  }
+}
+
+ inline void print_box(int myproc, const char* name, const Box& box,
+                      const char* name2, const Box& box2)
+{
+  std::cout << "proc " << myproc << " "<<name
+      <<" ("<<box[0][0]<<","<<box[0][1]<<") "
+      <<" ("<<box[1][0]<<","<<box[1][1]<<") "
+      <<" ("<<box[2][0]<<","<<box[2][1]<<") "
+      <<name2
+      <<" ("<<box2[0][0]<<","<<box2[0][1]<<") "
+      <<" ("<<box2[1][0]<<","<<box2[1][1]<<") "
+      <<" ("<<box2[2][0]<<","<<box2[2][1]<<") "<<std::endl;
+}
+
+bool is_neighbor(const Box& box1, const Box& box2)
+{
+  //neighbors in the x dimension if:
+  bool x_neighbor = (box1[0][1] == box2[0][0]) || (box1[0][0] == box2[0][1]) || // min matches max
+                    (box1[0][0] == box2[0][0]) || (box1[0][1] == box2[0][1]) || // mins or maxs match
+                    (box1[0][0] >  box2[0][0]  &&  box1[0][1] <  box2[0][1]) || // range contains other
+                    (box2[0][0] >  box1[0][0]  &&  box2[0][1] <  box1[0][1]) || // range contains other
+                    (box1[0][0] >  box2[0][0]  &&  box1[0][0] <  box2[0][1]) || // min contained in rng
+                    (box2[0][0] >  box1[0][0]  &&  box2[0][0] <  box1[0][1]);   // min contained in rng
+  if (!x_neighbor) {
+    x_neighbor = (box1[0][1] == box2[0][0]-1) || (box1[0][0] == box2[0][1]+1);
+  }
+
+  bool y_neighbor = (box1[1][1] == box2[1][0]) || (box1[1][0] == box2[1][1]) || // min matches max
+                    (box1[1][0] == box2[1][0]) || (box1[1][1] == box2[1][1]) || // mins or maxs match
+                    (box1[1][0] >  box2[1][0]  &&  box1[1][1] <  box2[1][1]) || // range contains other
+                    (box2[1][0] >  box1[1][0]  &&  box2[1][1] <  box1[1][1]) || // range contains other
+                    (box1[1][0] >  box2[1][0]  &&  box1[1][0] <  box2[1][1]) || // min contained in rng
+                    (box2[1][0] >  box1[1][0]  &&  box2[1][0] <  box1[1][1]);   // min contained in rng
+  if (!y_neighbor) {
+    y_neighbor = (box1[1][1] == box2[1][0]-1) || (box1[1][0] == box2[1][1]+1);
+  }
+
+  bool z_neighbor = (box1[2][1] == box2[2][0]) || (box1[2][0] == box2[2][1]) || // min matches max
+                    (box1[2][0] == box2[2][0]) || (box1[2][1] == box2[2][1]) || // mins or maxs match
+                    (box1[2][0] >  box2[2][0]  &&  box1[2][1] <  box2[2][1]) || // range contains other
+                    (box2[2][0] >  box1[2][0]  &&  box2[2][1] <  box1[2][1]) || // range contains other
+                    (box1[2][0] >  box2[2][0]  &&  box1[2][0] <  box2[2][1]) || // min contained in rng
+                    (box2[2][0] >  box1[2][0]  &&  box2[2][0] <  box1[2][1]);   // min contained in rng
+  if (!z_neighbor) {
+    z_neighbor = (box1[2][1] == box2[2][0]-1) || (box1[2][0] == box2[2][1]+1);
+  }
+
+  return x_neighbor && y_neighbor && z_neighbor;
+}
+
+template<typename GlobalOrdinal>
+void create_map_id_to_row(int global_nx, int global_ny, int global_nz,
+                     const Box& box,
+                     std::map<GlobalOrdinal,GlobalOrdinal>& id_to_row)
+{
+  GlobalOrdinal num_my_ids = get_num_ids<GlobalOrdinal>(box);
+
+  typename std::vector<GlobalOrdinal> all_ids;
+  bool include_ghost_layer = false;
+  get_ids(global_nx, global_ny, global_nz, box, all_ids, include_ghost_layer);
+
+  GlobalOrdinal my_first_row = 0;
+  typename std::vector<GlobalOrdinal> global_offsets;
+  std::vector<int> all_boxes;
+  int numprocs = 1, myproc = 0;
+#ifdef HAVE_MPI
+  MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
+  MPI_Comm_rank(MPI_COMM_WORLD, &myproc);
+
+  GlobalOrdinal local_num_ids = num_my_ids;
+  global_offsets.resize(numprocs);
+  MPI_Datatype mpi_dtype = TypeTraits<GlobalOrdinal>::mpi_type();
+  MPI_Allgather(&local_num_ids, 1, mpi_dtype, &global_offsets[0], 1, mpi_dtype, MPI_COMM_WORLD);
+  GlobalOrdinal offset = 0;
+  for(int i=0; i<numprocs; ++i) {
+    GlobalOrdinal tmp = global_offsets[i];
+    global_offsets[i] = offset;
+    offset += tmp;
+  }
+
+  my_first_row = global_offsets[myproc];
+
+  all_boxes.resize(6*numprocs);
+  int* local_box_ranges = const_cast<int*>(&box.ranges[0]);
+  MPI_Allgather(local_box_ranges, 6, MPI_INT, &all_boxes[0], 6, MPI_INT, MPI_COMM_WORLD);
+#endif
+
+  if (all_ids.size() > 0) {
+    id_to_row.insert(std::make_pair(all_ids[0], my_first_row));
+  }
+
+  for(size_t i=1; i<all_ids.size(); ++i) {
+    if (all_ids[i] != all_ids[i-1]+1) {
+      id_to_row.insert(std::make_pair(all_ids[i], my_first_row+i));
+    }
+  }
+
+//  int num_neighbors = 0;
+  for(int i=0; i<numprocs; ++i) {
+    if (i == myproc) continue;
+    Box box_i;
+    for(int r=0; r<6; ++r) box_i.ranges[r] = all_boxes[i*6 + r];
+//    bool neighbor= is_neighbor(box, box_i);
+//if(myproc==2) {
+//  std::cout<<"i: "<<i<<" "<<neighbor<<" ";
+//  print_box(myproc, " ", box, " ", box_i);
+//}
+    if (!is_neighbor(box, box_i)) {
+//      if (myproc==50) {
+//        std::cout<<"box ("<<box[0][0]<<","<<box[0][1]<<" - "<<box[1][0]<<","<<box[1][1]<<" - "<<box[2][0]<<","<<box[2][1]<<")"<<std::endl<<" and ("<<box_i[0][0]<<","<<box_i[0][1]<<" - "<<box_i[1][0]<<","<<box_i[1][1]<<" - "<<box_i[2][0]<<","<<box_i[2][1]<<") not neighbors."<<std::endl;
+//      }
+      continue;
+    }
+//    ++num_neighbors;
+
+    get_ids(global_nx, global_ny, global_nz, box_i, all_ids, include_ghost_layer);
+
+    GlobalOrdinal first_row = global_offsets[i];
+    if (all_ids.size() > 0) {
+      id_to_row.insert(std::make_pair(all_ids[0], first_row));
+    }
+    for(size_t j=1; j<all_ids.size(); ++j) {
+      if (all_ids[j] != all_ids[j-1]+1) {
+        id_to_row.insert(std::make_pair(all_ids[j], first_row+j));
+      }
+    }
+  }
+
+//std::cout<<"proc "<<myproc<<": num_neighbors: "<<num_neighbors<<", id_to_row.size(): "<<id_to_row.size()<<std::endl;
+//typename std::map<GlobalOrdinal,GlobalOrdinal>::iterator iter = id_to_row.begin(), end = id_to_row.end();
+//for(; iter!=end; ++iter) {
+//  std::cout<<"proc "<<myproc<<": "<<iter->first<<" :: "<<iter->second<<std::endl;
+//}
+}
+
+}//namespace miniFE
+
+#endif
+

Added: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/cg_solve.hpp
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C%2B%2B/miniFE/cg_solve.hpp?rev=311411&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/cg_solve.hpp (added)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/cg_solve.hpp Mon Aug 21 16:54:28 2017
@@ -0,0 +1,234 @@
+#ifndef _cg_solve_hpp_
+#define _cg_solve_hpp_
+
+//@HEADER
+// ************************************************************************
+//
+// MiniFE: Simple Finite Element Assembly and Solve
+// Copyright (2006-2013) Sandia Corporation
+//
+// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
+// license for use of this work by or on behalf of the U.S. Government.
+//
+// This library is free software; you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as
+// published by the Free Software Foundation; either version 2.1 of the
+// License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+// USA
+//
+// ************************************************************************
+//@HEADER
+
+#include <cmath>
+#include <limits>
+
+#include <Vector_functions.hpp>
+#include <mytimer.hpp>
+
+#include <outstream.hpp>
+
+namespace miniFE {
+
+template<typename Scalar>
+void print_vec(const std::vector<Scalar>& vec, const std::string& name)
+{
+  for(size_t i=0; i<vec.size(); ++i) {
+    std::cout << name << "["<<i<<"]: " << vec[i] << std::endl;
+  }
+}
+
+template<typename VectorType>
+bool breakdown(typename VectorType::ScalarType inner,
+               const VectorType& v,
+               const VectorType& w)
+{
+  typedef typename VectorType::ScalarType Scalar;
+  typedef typename TypeTraits<Scalar>::magnitude_type magnitude;
+
+//This is code that was copied from Aztec, and originally written
+//by my hero, Ray Tuminaro.
+//
+//Assuming that inner = <v,w> (inner product of v and w),
+//v and w are considered orthogonal if
+//  |inner| < 100 * ||v||_2 * ||w||_2 * epsilon
+
+  magnitude vnorm = std::sqrt(dot(v,v));
+  magnitude wnorm = std::sqrt(dot(w,w));
+  return std::abs(inner) <= 100*vnorm*wnorm*std::numeric_limits<magnitude>::epsilon();
+}
+
+template<typename OperatorType,
+         typename VectorType,
+         typename Matvec>
+void
+cg_solve(OperatorType& A,
+         const VectorType& b,
+         VectorType& x,
+         Matvec matvec,
+         typename OperatorType::LocalOrdinalType max_iter,
+         typename TypeTraits<typename OperatorType::ScalarType>::magnitude_type& tolerance,
+         typename OperatorType::LocalOrdinalType& num_iters,
+         typename TypeTraits<typename OperatorType::ScalarType>::magnitude_type& normr,
+         timer_type* my_cg_times)
+{
+  typedef typename OperatorType::ScalarType ScalarType;
+  typedef typename OperatorType::GlobalOrdinalType GlobalOrdinalType;
+  typedef typename OperatorType::LocalOrdinalType LocalOrdinalType;
+  typedef typename TypeTraits<ScalarType>::magnitude_type magnitude_type;
+
+  timer_type t0 = 0, tWAXPY = 0, tDOT = 0, tMATVEC = 0, tMATVECDOT = 0;
+  timer_type total_time = mytimer();
+
+  int myproc = 0;
+#ifdef HAVE_MPI
+  MPI_Comm_rank(MPI_COMM_WORLD, &myproc);
+#ifdef USE_MPI_PCONTROL
+  MPI_Pcontrol(1);
+#endif
+#endif
+
+  if (!A.has_local_indices) {
+    std::cerr << "miniFE::cg_solve ERROR, A.has_local_indices is false, needs to be true. This probably means "
+       << "miniFE::make_local_matrix(A) was not called prior to calling miniFE::cg_solve."
+       << std::endl;
+    return;
+  }
+
+  size_t nrows = A.rows.size();
+  LocalOrdinalType ncols = A.num_cols;
+
+  VectorType r(b.startIndex, nrows);
+  VectorType p(0, ncols);
+  VectorType Ap(b.startIndex, nrows);
+
+  normr = 0;
+  magnitude_type rtrans = 0;
+  magnitude_type oldrtrans = 0;
+
+  LocalOrdinalType print_freq = max_iter/10;
+  if (print_freq>50) print_freq = 50;
+  if (print_freq<1)  print_freq = 1;
+
+  ScalarType one = 1.0;
+  ScalarType zero = 0.0;
+
+  TICK(); waxpby(one, x, zero, x, p); TOCK(tWAXPY);
+
+//  print_vec(p.coefs, "p");
+
+  TICK();
+  matvec(A, p, Ap);
+  TOCK(tMATVEC);
+
+  TICK(); waxpby(one, b, -one, Ap, r); TOCK(tWAXPY);
+
+  TICK(); rtrans = dot(r, r); TOCK(tDOT);
+
+//std::cout << "rtrans="<<rtrans<<std::endl;
+
+  normr = std::sqrt(rtrans);
+
+  if (myproc == 0) {
+    std::cout << "Initial Residual = "<< normr << std::endl;
+  }
+
+  magnitude_type brkdown_tol = std::numeric_limits<magnitude_type>::epsilon();
+
+#ifdef MINIFE_DEBUG
+  std::ostream& os = outstream();
+  os << "brkdown_tol = " << brkdown_tol << std::endl;
+#endif
+
+
+  for(LocalOrdinalType k=1; k <= max_iter && normr > tolerance; ++k) {
+    if (k == 1) {
+      TICK(); waxpby(one, r, zero, r, p); TOCK(tWAXPY);
+    }
+    else {
+      oldrtrans = rtrans;
+      TICK(); rtrans = dot(r, r); TOCK(tDOT);
+      magnitude_type beta = rtrans/oldrtrans;
+      TICK(); waxpby(one, r, beta, p, p); TOCK(tWAXPY);
+    }
+
+    normr = std::sqrt(rtrans);
+
+    if (myproc == 0 && (k%print_freq==0 || k==max_iter)) {
+      std::cout << "Iteration = "<<k<<"   Residual = "<<normr<<std::endl;
+    }
+
+    magnitude_type alpha = 0;
+    magnitude_type p_ap_dot = 0;
+
+#ifdef MINIFE_FUSED
+    TICK();
+    p_ap_dot = matvec_and_dot(A, p, Ap);
+    TOCK(tMATVECDOT);
+#else
+    TICK(); matvec(A, p, Ap); TOCK(tMATVEC);
+
+    TICK(); p_ap_dot = dot(Ap, p); TOCK(tDOT);
+#endif
+
+#ifdef MINIFE_DEBUG
+    os << "iter " << k << ", p_ap_dot = " << p_ap_dot;
+    os.flush();
+#endif
+    if (p_ap_dot < brkdown_tol) {
+      if (p_ap_dot < 0 || breakdown(p_ap_dot, Ap, p)) {
+        std::cerr << "miniFE::cg_solve ERROR, numerical breakdown!"<<std::endl;
+#ifdef MINIFE_DEBUG
+        os << "ERROR, numerical breakdown!"<<std::endl;
+#endif
+        //update the timers before jumping out.
+        my_cg_times[WAXPY] = tWAXPY;
+        my_cg_times[DOT] = tDOT;
+        my_cg_times[MATVEC] = tMATVEC;
+        my_cg_times[TOTAL] = mytimer() - total_time;
+        return;
+      }
+      else brkdown_tol = 0.1 * p_ap_dot;
+    }
+    alpha = rtrans/p_ap_dot;
+#ifdef MINIFE_DEBUG
+    os << ", rtrans = " << rtrans << ", alpha = " << alpha << std::endl;
+#endif
+
+#ifdef MINIFE_FUSED
+    TICK();
+    fused_waxpby(one, x, alpha, p, x, one, r, -alpha, Ap, r);
+    TOCK(tWAXPY);
+#else
+    TICK(); waxpby(one, x, alpha, p, x);
+            waxpby(one, r, -alpha, Ap, r); TOCK(tWAXPY);
+#endif
+
+    num_iters = k;
+  }
+
+#ifdef HAVE_MPI
+#ifdef USE_MPI_PCONTROL
+  MPI_Pcontrol(0);
+#endif
+#endif
+
+  my_cg_times[WAXPY] = tWAXPY;
+  my_cg_times[DOT] = tDOT;
+  my_cg_times[MATVEC] = tMATVEC;
+  my_cg_times[MATVECDOT] = tMATVECDOT;
+  my_cg_times[TOTAL] = mytimer() - total_time;
+}
+
+}//namespace miniFE
+
+#endif
+

Added: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/compute_matrix_stats.hpp
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C%2B%2B/miniFE/compute_matrix_stats.hpp?rev=311411&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/compute_matrix_stats.hpp (added)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/compute_matrix_stats.hpp Mon Aug 21 16:54:28 2017
@@ -0,0 +1,116 @@
+#ifndef _compute_matrix_stats_hpp_
+#define _compute_matrix_stats_hpp_
+
+//@HEADER
+// ************************************************************************
+//
+// MiniFE: Simple Finite Element Assembly and Solve
+// Copyright (2006-2013) Sandia Corporation
+//
+// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
+// license for use of this work by or on behalf of the U.S. Government.
+//
+// This library is free software; you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as
+// published by the Free Software Foundation; either version 2.1 of the
+// License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+// USA
+//
+// ************************************************************************
+//@HEADER
+
+#include <cstddef>
+#include <cmath>
+#include <cstdlib>
+#include <iostream>
+#include <sstream>
+#include <iomanip>
+
+#include <outstream.hpp>
+#include <utils.hpp>
+#include <YAML_Doc.hpp>
+
+namespace miniFE {
+
+template<typename MatrixType>
+size_t
+compute_matrix_stats(const MatrixType& A, int myproc, int numprocs, YAML_Doc& ydoc)
+{
+  typedef typename MatrixType::GlobalOrdinalType GlobalOrdinal;
+  typedef typename MatrixType::LocalOrdinalType LocalOrdinal;
+  typedef typename MatrixType::ScalarType Scalar;
+
+  GlobalOrdinal min_nrows = 0, max_nrows = 0, global_nrows = 0;
+  int min_proc = 0, max_proc = 0;
+
+  GlobalOrdinal local_nrows = A.rows.size();
+
+  get_global_min_max(local_nrows, global_nrows, min_nrows, min_proc,
+                     max_nrows, max_proc);
+
+  //Gather stats on global, min/max matrix num-nonzeros:
+
+  double local_nnz = A.num_nonzeros();
+  double dglobal_nnz = 0, dmin_nnz = 0, dmax_nnz = 0;
+
+  get_global_min_max(local_nnz, dglobal_nnz, dmin_nnz, min_proc,
+                     dmax_nnz, max_proc);
+
+  double avg_nrows = global_nrows;
+  avg_nrows /= numprocs;
+  double avg_nnz = dglobal_nnz;
+  avg_nnz /= numprocs;
+
+  double mem_overhead_MB = parallel_memory_overhead_MB(A);
+
+  size_t global_nnz = static_cast<size_t>(std::ceil(dglobal_nnz));
+  size_t min_nnz = static_cast<size_t>(std::ceil(dmin_nnz));
+  size_t max_nnz = static_cast<size_t>(std::ceil(dmax_nnz));
+  size_t global_num_rows = global_nrows;
+
+  if (myproc == 0) {
+    ydoc.add("Matrix attributes","");
+    ydoc.get("Matrix attributes")->add("Global Nrows",global_num_rows);
+    ydoc.get("Matrix attributes")->add("Global NNZ",global_nnz);
+
+    //compute how much memory the matrix occupies:
+    //num-bytes = sizeof(GlobalOrdinal)*global_nrows   for A.rows
+    //          + sizeof(LocalOrdinal)*global_nrows    for A.rows_offsets
+    //          + sizeof(GlobalOrdinal)*global_nnz     for A.packed_cols
+    //          + sizeof(Scalar)*global_nnz            for A.packed_coefs
+
+    double invGB = 1.0/(1024*1024*1024);
+    double memGB = invGB*global_nrows*sizeof(GlobalOrdinal);
+    memGB += invGB*global_nrows*sizeof(LocalOrdinal);
+    memGB += invGB*global_nnz*sizeof(GlobalOrdinal);
+    memGB += invGB*global_nnz*sizeof(Scalar);
+    ydoc.get("Matrix attributes")->add("Global Memory (GB)",memGB);
+
+    ydoc.get("Matrix attributes")->add("Pll Memory Overhead (MB)",mem_overhead_MB);
+
+    size_t min_num_rows = min_nrows;
+    size_t max_num_rows = max_nrows;
+    ydoc.get("Matrix attributes")->add("Rows per proc MIN",min_num_rows);
+    ydoc.get("Matrix attributes")->add("Rows per proc MAX",max_num_rows);
+    ydoc.get("Matrix attributes")->add("Rows per proc AVG",avg_nrows);
+    ydoc.get("Matrix attributes")->add("NNZ per proc MIN",min_nnz);
+    ydoc.get("Matrix attributes")->add("NNZ per proc MAX",max_nnz);
+    ydoc.get("Matrix attributes")->add("NNZ per proc AVG",avg_nnz);
+  }
+
+  return global_nnz;
+}
+
+}//namespace miniFE
+
+#endif
+

Added: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/driver.hpp
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C%2B%2B/miniFE/driver.hpp?rev=311411&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/driver.hpp (added)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/driver.hpp Mon Aug 21 16:54:28 2017
@@ -0,0 +1,412 @@
+#ifndef _driver_hpp_
+#define _driver_hpp_
+
+//@HEADER
+// ************************************************************************
+//
+// MiniFE: Simple Finite Element Assembly and Solve
+// Copyright (2006-2013) Sandia Corporation
+//
+// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
+// license for use of this work by or on behalf of the U.S. Government.
+//
+// This library is free software; you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as
+// published by the Free Software Foundation; either version 2.1 of the
+// License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+// USA
+//
+// ************************************************************************
+//@HEADER
+
+#include <cstddef>
+#include <cmath>
+#include <cstdlib>
+#include <iostream>
+#include <sstream>
+#include <iomanip>
+
+#include <box_utils.hpp>
+#include <Vector.hpp>
+
+#ifdef MINIFE_CSR_MATRIX
+#include <CSRMatrix.hpp>
+#elif defined(MINIFE_ELL_MATRIX)
+#include <ELLMatrix.hpp>
+#else
+#include <CSRMatrix.hpp>
+#endif
+
+#include <simple_mesh_description.hpp>
+
+#include <SparseMatrix_functions.hpp>
+
+#include <generate_matrix_structure.hpp>
+#include <assemble_FE_data.hpp>
+
+#include <verify_solution.hpp>
+
+#include <compute_matrix_stats.hpp>
+#include <make_local_matrix.hpp>
+#include <imbalance.hpp>
+#include <cg_solve.hpp>
+#if MINIFE_KERNELS != 0
+#include <time_kernels.hpp>
+#endif
+#include <outstream.hpp>
+#include <utils.hpp>
+#include <mytimer.hpp>
+#include <YAML_Doc.hpp>
+
+#ifdef HAVE_MPI
+#include <mpi.h>
+#endif
+
+#define RUN_TIMED_FUNCTION(msg, fn, time_inc, time_total) \
+{                                   \
+  /*if (myproc==0) {                  \
+    std::cout.width(30);            \
+    std::cout << msg;               \
+    std::cout.flush();              \
+  }*/                                 \
+  timer_type rtf_t0 = mytimer();    \
+  fn;                               \
+  time_inc = mytimer() - rtf_t0;    \
+  time_total += time_inc;           \
+  /*if (myproc==0) {                  \
+    std::cout << time_inc << "s, total time: " << time_total << std::endl; \
+  }*/                                 \
+}
+
+//This program assembles finite-element matrices into a global matrix and
+//vector, then solves the linear-system using Conjugate Gradients.
+//Each finite-element is a hexahedron with 8 vertex-nodes.
+//
+//Notes:
+//- In finite-element terms, the box dimensions are in elements, not nodes.
+//  In other words, a 2x2x2 box describes 8 elements, each of which has 8 nodes,
+//  so it is a 3x3x3 node domain (27 nodes).
+//  The assembled linear system will have 1 equation for each finite element node.
+//
+//- The coordinate origin is at the corner of the global box where x=0,
+//  y=0, z=0, and the box extends along the positive x-axis, positive y-axis,
+//  and the positive z-axis.
+//
+//- Some aspects of matrix-structure generation and finite-element assembly
+//  are convenient to do using global node identifiers.
+//  A global identifier for each node is obtained from coordinates plus
+//  global box dimensions. See the function 'get_id' in box_utils.hpp.
+//
+//- Each node corresponds to a row in the matrix. The RCB partitioning method
+//  we use to split the global box among processors results in some
+//  processors owning non-contiguous blocks of global node identifiers.
+//  Since it is convenient for matrices and vectors to store contiguously-
+//  numbered blocks of rows, we map global node identifiers to a separate
+//  space of row numbers such that each processor's nodes correspond to a
+//  contiguous block of row numbers.
+//
+
+namespace miniFE {
+
+template<typename Scalar,
+         typename LocalOrdinal,
+         typename GlobalOrdinal>
+int
+driver(const Box& global_box, Box& my_box,
+       Parameters& params, YAML_Doc& ydoc)
+{
+  int global_nx = global_box[0][1];
+  int global_ny = global_box[1][1];
+  int global_nz = global_box[2][1];
+
+  int numprocs = 1, myproc = 0;
+#ifdef HAVE_MPI
+  MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
+  MPI_Comm_rank(MPI_COMM_WORLD, &myproc);
+#endif
+
+  if (params.load_imbalance > 0) {
+    add_imbalance<GlobalOrdinal>(global_box, my_box, params.load_imbalance, ydoc);
+  }
+
+  float largest_imbalance = 0, std_dev = 0;
+  compute_imbalance<GlobalOrdinal>(global_box, my_box, largest_imbalance,
+                                   std_dev, ydoc, true);
+
+
+  //Create a representation of the mesh:
+  //Note that 'simple_mesh_description' is a virtual or conceptual
+  //mesh that doesn't actually store mesh data.
+#ifdef TIME_IT
+  if (myproc==0) {
+    std::cout.width(30);
+    std::cout << "creating/filling mesh...";
+    std::cout.flush();
+  }
+#endif
+
+  timer_type t_start = mytimer();
+  timer_type t0 = mytimer();
+
+  simple_mesh_description<GlobalOrdinal> mesh(global_box, my_box);
+
+  timer_type mesh_fill = mytimer() - t0;
+  timer_type t_total = mytimer() - t_start;
+
+#ifdef TIME_IT
+  if (myproc==0) {
+    std::cout << mesh_fill << "s, total time: " << t_total << std::endl;
+  }
+#endif
+
+  //next we will generate the matrix structure.
+
+  //Declare matrix object:
+
+#if defined(MINIFE_ELL_MATRIX)
+  typedef ELLMatrix<Scalar,LocalOrdinal,GlobalOrdinal> MatrixType;
+#else
+  typedef CSRMatrix<Scalar,LocalOrdinal,GlobalOrdinal> MatrixType;
+#endif
+
+  MatrixType A;
+
+  timer_type gen_structure;
+  RUN_TIMED_FUNCTION("generating matrix structure...",
+                     generate_matrix_structure(mesh, A),
+                     gen_structure, t_total);
+
+  GlobalOrdinal local_nrows = A.rows.size();
+  GlobalOrdinal my_first_row = local_nrows > 0 ? A.rows[0] : -1;
+
+  Vector<Scalar,LocalOrdinal,GlobalOrdinal> b(my_first_row, local_nrows);
+  Vector<Scalar,LocalOrdinal,GlobalOrdinal> x(my_first_row, local_nrows);
+
+  //Assemble finite-element sub-matrices and sub-vectors into the global
+  //linear system:
+
+  timer_type fe_assembly;
+  RUN_TIMED_FUNCTION("assembling FE data...",
+                     assemble_FE_data(mesh, A, b, params),
+                     fe_assembly, t_total);
+
+  if (myproc == 0) {
+    ydoc.add("Matrix structure generation","");
+    ydoc.get("Matrix structure generation")->add("Mat-struc-gen Time",gen_structure);
+    ydoc.add("FE assembly","");
+    ydoc.get("FE assembly")->add("FE assembly Time",fe_assembly);
+  }
+
+#ifdef MINIFE_DEBUG
+  write_matrix("A_prebc.mtx", A);
+  write_vector("b_prebc.vec", b);
+#endif
+
+  //Now apply dirichlet boundary-conditions
+  //(Apply the 0-valued surfaces first, then the 1-valued surface last.)
+
+  timer_type dirbc_time;
+  RUN_TIMED_FUNCTION("imposing Dirichlet BC...",
+            impose_dirichlet(0.0, A, b, global_nx+1, global_ny+1, global_nz+1, mesh.bc_rows_0), dirbc_time, t_total);
+  RUN_TIMED_FUNCTION("imposing Dirichlet BC...",
+            impose_dirichlet(1.0, A, b, global_nx+1, global_ny+1, global_nz+1, mesh.bc_rows_1), dirbc_time, t_total);
+
+#ifdef MINIFE_DEBUG
+  write_matrix("A.mtx", A);
+  write_vector("b.vec", b);
+#endif
+
+  //Transform global indices to local, set up communication information:
+
+  timer_type make_local_time;
+  RUN_TIMED_FUNCTION("making matrix indices local...",
+                     make_local_matrix(A),
+                     make_local_time, t_total);
+
+#ifdef MINIFE_DEBUG
+  write_matrix("A_local.mtx", A);
+  write_vector("b_local.vec", b);
+#endif
+
+  size_t global_nnz = compute_matrix_stats(A, myproc, numprocs, ydoc);
+
+  //Prepare to perform conjugate gradient solve:
+
+  LocalOrdinal max_iters = 200;
+  LocalOrdinal num_iters = 0;
+  typedef typename TypeTraits<Scalar>::magnitude_type magnitude;
+  magnitude rnorm = 0;
+  magnitude tol = std::numeric_limits<magnitude>::epsilon();
+
+  timer_type cg_times[NUM_TIMERS];
+
+  typedef Vector<Scalar,LocalOrdinal,GlobalOrdinal> VectorType;
+
+  t_total = mytimer() - t_start;
+
+  bool matvec_with_comm_overlap = params.mv_overlap_comm_comp==1;
+
+  int verify_result = 0;
+
+#if MINIFE_KERNELS != 0
+  if (myproc==0) {
+    std::cout.width(30);
+    std::cout << "Starting kernel timing loops ..." << std::endl;
+  }
+
+  max_iters = 500;
+  x.coefs[0] = 0.9;
+  if (matvec_with_comm_overlap) {
+    time_kernels(A, b, x, matvec_overlap<MatrixType,VectorType>(), max_iters, rnorm, cg_times);
+  }
+  else {
+    time_kernels(A, b, x, matvec_std<MatrixType,VectorType>(), max_iters, rnorm, cg_times);
+  }
+  num_iters = max_iters;
+  std::string title("Kernel timings");
+#else
+  if (myproc==0) {
+    std::cout << "Starting CG solver ... " << std::endl;
+  }
+
+  if (matvec_with_comm_overlap) {
+#ifdef MINIFE_CSR_MATRIX
+    rearrange_matrix_local_external(A);
+    cg_solve(A, b, x, matvec_overlap<MatrixType,VectorType>(), max_iters, tol,
+           num_iters, rnorm, cg_times);
+#else
+    std::cout << "ERROR, matvec with overlapping comm/comp only works with CSR matrix."<<std::endl;
+#endif
+  }
+  else {
+    cg_solve(A, b, x, matvec_std<MatrixType,VectorType>(), max_iters, tol,
+           num_iters, rnorm, cg_times);
+    if (myproc == 0) {
+      std::cout << "Final Resid Norm: " << rnorm << std::endl;
+    }
+
+    if (params.verify_solution > 0) {
+      double tolerance = 0.06;
+      bool verify_whole_domain = false;
+  #ifdef MINIFE_DEBUG
+      verify_whole_domain = true;
+  #endif
+      if (myproc == 0) {
+        if (verify_whole_domain) std::cout << "verifying solution..." << std::endl;
+        else std::cout << "verifying solution at ~ (0.5, 0.5, 0.5) ..." << std::endl;
+      }
+      verify_result = verify_solution(mesh, x, tolerance, verify_whole_domain);
+    }
+  }
+
+#ifdef MINIFE_DEBUG
+  write_vector("x.vec", x);
+#endif
+  std::string title("CG solve");
+#endif
+
+  if (myproc == 0) {
+    ydoc.get("Global Run Parameters")->add("ScalarType",TypeTraits<Scalar>::name());
+    ydoc.get("Global Run Parameters")->add("GlobalOrdinalType",TypeTraits<GlobalOrdinal>::name());
+    ydoc.get("Global Run Parameters")->add("LocalOrdinalType",TypeTraits<LocalOrdinal>::name());
+    ydoc.add(title,"");
+    ydoc.get(title)->add("Iterations",num_iters);
+    ydoc.get(title)->add("Final Resid Norm",rnorm);
+
+    GlobalOrdinal global_nrows = global_nx;
+    global_nrows *= global_ny*global_nz;
+
+    //flops-per-mv, flops-per-dot, flops-per-waxpy:
+    double mv_flops = global_nnz*2.0;
+    double dot_flops = global_nrows*2.0;
+    double waxpy_flops = global_nrows*3.0;
+
+#if MINIFE_KERNELS == 0
+//if MINIFE_KERNELS == 0 then we did a CG solve, and in that case
+//there were num_iters+1 matvecs, num_iters*2 dots, and num_iters*3+2 waxpys.
+    mv_flops *= (num_iters+1);
+    dot_flops *= (2*num_iters);
+    waxpy_flops *= (3*num_iters+2);
+#else
+//if MINIFE_KERNELS then we did one of each operation per iteration.
+    mv_flops *= num_iters;
+    dot_flops *= num_iters;
+    waxpy_flops *= num_iters;
+#endif
+
+    double total_flops = mv_flops + dot_flops + waxpy_flops;
+
+    double mv_mflops = -1;
+    if (cg_times[MATVEC] > 1.e-4)
+      mv_mflops = 1.e-6 * (mv_flops/cg_times[MATVEC]);
+
+    double dot_mflops = -1;
+    if (cg_times[DOT] > 1.e-4)
+      dot_mflops = 1.e-6 * (dot_flops/cg_times[DOT]);
+
+    double waxpy_mflops = -1;
+    if (cg_times[WAXPY] > 1.e-4)
+      waxpy_mflops = 1.e-6 *  (waxpy_flops/cg_times[WAXPY]);
+
+    double total_mflops = -1;
+    if (cg_times[TOTAL] > 1.e-4)
+      total_mflops = 1.e-6 * (total_flops/cg_times[TOTAL]);
+
+    ydoc.get(title)->add("WAXPY Time",cg_times[WAXPY]);
+    ydoc.get(title)->add("WAXPY Flops",waxpy_flops);
+    if (waxpy_mflops >= 0)
+      ydoc.get(title)->add("WAXPY Mflops",waxpy_mflops);
+    else
+      ydoc.get(title)->add("WAXPY Mflops","inf");
+
+    ydoc.get(title)->add("DOT Time",cg_times[DOT]);
+    ydoc.get(title)->add("DOT Flops",dot_flops);
+    if (dot_mflops >= 0)
+      ydoc.get(title)->add("DOT Mflops",dot_mflops);
+    else
+      ydoc.get(title)->add("DOT Mflops","inf");
+
+    ydoc.get(title)->add("MATVEC Time",cg_times[MATVEC]);
+    ydoc.get(title)->add("MATVEC Flops",mv_flops);
+    if (mv_mflops >= 0)
+      ydoc.get(title)->add("MATVEC Mflops",mv_mflops);
+    else
+      ydoc.get(title)->add("MATVEC Mflops","inf");
+
+#ifdef MINIFE_FUSED
+    ydoc.get(title)->add("MATVECDOT Time",cg_times[MATVECDOT]);
+    ydoc.get(title)->add("MATVECDOT Flops",mv_flops);
+    if (mv_mflops >= 0)
+      ydoc.get(title)->add("MATVECDOT Mflops",mv_mflops);
+    else
+      ydoc.get(title)->add("MATVECDOT Mflops","inf");
+#endif
+
+#if MINIFE_KERNELS == 0
+    ydoc.get(title)->add("Total","");
+    ydoc.get(title)->get("Total")->add("Total CG Time",cg_times[TOTAL]);
+    ydoc.get(title)->get("Total")->add("Total CG Flops",total_flops);
+    if (total_mflops >= 0)
+      ydoc.get(title)->get("Total")->add("Total CG Mflops",total_mflops);
+    else
+      ydoc.get(title)->get("Total")->add("Total CG Mflops","inf");
+    ydoc.get(title)->add("Time per iteration",cg_times[TOTAL]/num_iters);
+#endif
+  }
+
+  return verify_result;
+}
+
+}//namespace miniFE
+
+#endif
+

Added: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/exchange_externals.hpp
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C%2B%2B/miniFE/exchange_externals.hpp?rev=311411&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/exchange_externals.hpp (added)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/exchange_externals.hpp Mon Aug 21 16:54:28 2017
@@ -0,0 +1,269 @@
+#ifndef _exchange_externals_hpp_
+#define _exchange_externals_hpp_
+
+//@HEADER
+// ************************************************************************
+//
+// MiniFE: Simple Finite Element Assembly and Solve
+// Copyright (2006-2013) Sandia Corporation
+//
+// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
+// license for use of this work by or on behalf of the U.S. Government.
+//
+// This library is free software; you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as
+// published by the Free Software Foundation; either version 2.1 of the
+// License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+// USA
+//
+// ************************************************************************
+//@HEADER
+
+#include <cstdlib>
+#include <iostream>
+
+#ifdef HAVE_MPI
+#include <mpi.h>
+#endif
+
+#include <outstream.hpp>
+
+#include <TypeTraits.hpp>
+
+namespace miniFE {
+
+template<typename MatrixType,
+         typename VectorType>
+void
+exchange_externals(MatrixType& A,
+                   VectorType& x)
+{
+#ifdef HAVE_MPI
+#ifdef MINIFE_DEBUG
+  std::ostream& os = outstream();
+  os << "entering exchange_externals\n";
+#endif
+
+  int numprocs = 1;
+  MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
+
+  if (numprocs < 2) return;
+
+  typedef typename MatrixType::ScalarType Scalar;
+  typedef typename MatrixType::LocalOrdinalType LocalOrdinal;
+  typedef typename MatrixType::GlobalOrdinalType GlobalOrdinal;
+
+  // Extract Matrix pieces
+
+  int local_nrow = A.rows.size();
+  int num_neighbors = A.neighbors.size();
+  const std::vector<LocalOrdinal>& recv_length = A.recv_length;
+  const std::vector<LocalOrdinal>& send_length = A.send_length;
+  const std::vector<int>& neighbors = A.neighbors;
+  const std::vector<GlobalOrdinal>& elements_to_send = A.elements_to_send;
+
+  std::vector<Scalar>& send_buffer = A.send_buffer;
+
+  //
+  // first post receives, these are immediate receives
+  // Do not wait for result to come, will do that at the
+  // wait call below.
+  //
+
+  int MPI_MY_TAG = 99;
+
+  std::vector<MPI_Request>& request = A.request;
+
+  //
+  // Externals are at end of locals
+  //
+
+  std::vector<Scalar>& x_coefs = x.coefs;
+  Scalar* x_external = &(x_coefs[local_nrow]);
+
+  MPI_Datatype mpi_dtype = TypeTraits<Scalar>::mpi_type();
+
+  // Post receives first
+  for(int i=0; i<num_neighbors; ++i) {
+    int n_recv = recv_length[i];
+    MPI_Irecv(x_external, n_recv, mpi_dtype, neighbors[i], MPI_MY_TAG,
+              MPI_COMM_WORLD, &request[i]);
+    x_external += n_recv;
+  }
+
+#ifdef MINIFE_DEBUG
+  os << "launched recvs\n";
+#endif
+
+  //
+  // Fill up send buffer
+  //
+
+  size_t total_to_be_sent = elements_to_send.size();
+#ifdef MINIFE_DEBUG
+  os << "total_to_be_sent: " << total_to_be_sent << std::endl;
+#endif
+
+  for(size_t i=0; i<total_to_be_sent; ++i) {
+#ifdef MINIFE_DEBUG
+    //expensive index range-check:
+    if (elements_to_send[i] < 0 || elements_to_send[i] > x.coefs.size()) {
+      os << "error, out-of-range. x.coefs.size()=="<<x.coefs.size()<<", elements_to_send[i]=="<<elements_to_send[i]<<std::endl;
+    }
+#endif
+    send_buffer[i] = x.coefs[elements_to_send[i]];
+  }
+
+  //
+  // Send to each neighbor
+  //
+
+  Scalar* s_buffer = &send_buffer[0];
+
+  for(int i=0; i<num_neighbors; ++i) {
+    int n_send = send_length[i];
+    MPI_Send(s_buffer, n_send, mpi_dtype, neighbors[i], MPI_MY_TAG,
+             MPI_COMM_WORLD);
+    s_buffer += n_send;
+  }
+
+#ifdef MINIFE_DEBUG
+  os << "send to " << num_neighbors << std::endl;
+#endif
+
+  //
+  // Complete the reads issued above
+  //
+
+  MPI_Status status;
+  for(int i=0; i<num_neighbors; ++i) {
+    if (MPI_Wait(&request[i], &status) != MPI_SUCCESS) {
+      std::cerr << "MPI_Wait error\n"<<std::endl;
+      MPI_Abort(MPI_COMM_WORLD, -1);
+    }
+  }
+
+#ifdef MINIFE_DEBUG
+  os << "leaving exchange_externals"<<std::endl;
+#endif
+
+//endif HAVE_MPI
+#endif
+}
+
+#ifdef HAVE_MPI
+static std::vector<MPI_Request> exch_ext_requests;
+#endif
+
+template<typename MatrixType,
+         typename VectorType>
+void
+begin_exchange_externals(MatrixType& A,
+                         VectorType& x)
+{
+#ifdef HAVE_MPI
+
+  int numprocs = 1, myproc = 0;
+  MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
+  MPI_Comm_rank(MPI_COMM_WORLD, &myproc);
+
+  if (numprocs < 2) return;
+
+  typedef typename MatrixType::ScalarType Scalar;
+  typedef typename MatrixType::LocalOrdinalType LocalOrdinal;
+  typedef typename MatrixType::GlobalOrdinalType GlobalOrdinal;
+
+  // Extract Matrix pieces
+
+  int local_nrow = A.rows.size();
+  int num_neighbors = A.neighbors.size();
+  const std::vector<LocalOrdinal>& recv_length = A.recv_length;
+  const std::vector<LocalOrdinal>& send_length = A.send_length;
+  const std::vector<int>& neighbors = A.neighbors;
+  const std::vector<GlobalOrdinal>& elements_to_send = A.elements_to_send;
+
+  std::vector<Scalar> send_buffer(elements_to_send.size(), 0);
+
+  //
+  // first post receives, these are immediate receives
+  // Do not wait for result to come, will do that at the
+  // wait call below.
+  //
+
+  int MPI_MY_TAG = 99;
+
+  exch_ext_requests.resize(num_neighbors);
+
+  //
+  // Externals are at end of locals
+  //
+
+  std::vector<Scalar>& x_coefs = x.coefs;
+  Scalar* x_external = &(x_coefs[local_nrow]);
+
+  MPI_Datatype mpi_dtype = TypeTraits<Scalar>::mpi_type();
+
+  // Post receives first
+  for(int i=0; i<num_neighbors; ++i) {
+    int n_recv = recv_length[i];
+    MPI_Irecv(x_external, n_recv, mpi_dtype, neighbors[i], MPI_MY_TAG,
+              MPI_COMM_WORLD, &exch_ext_requests[i]);
+    x_external += n_recv;
+  }
+
+  //
+  // Fill up send buffer
+  //
+
+  size_t total_to_be_sent = elements_to_send.size();
+  for(size_t i=0; i<total_to_be_sent; ++i) send_buffer[i] = x.coefs[elements_to_send[i]];
+
+  //
+  // Send to each neighbor
+  //
+
+  Scalar* s_buffer = &send_buffer[0];
+
+  for(int i=0; i<num_neighbors; ++i) {
+    int n_send = send_length[i];
+    MPI_Send(s_buffer, n_send, mpi_dtype, neighbors[i], MPI_MY_TAG,
+             MPI_COMM_WORLD);
+    s_buffer += n_send;
+  }
+#endif
+}
+
+inline
+void
+finish_exchange_externals(int num_neighbors)
+{
+#ifdef HAVE_MPI
+  //
+  // Complete the reads issued above
+  //
+
+  MPI_Status status;
+  for(int i=0; i<num_neighbors; ++i) {
+    if (MPI_Wait(&exch_ext_requests[i], &status) != MPI_SUCCESS) {
+      std::cerr << "MPI_Wait error\n"<<std::endl;
+      MPI_Abort(MPI_COMM_WORLD, -1);
+    }
+  }
+
+//endif HAVE_MPI
+#endif
+}
+
+}//namespace miniFE
+
+#endif
+

Added: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/gauss_pts.hpp
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C%2B%2B/miniFE/gauss_pts.hpp?rev=311411&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/gauss_pts.hpp (added)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/gauss_pts.hpp Mon Aug 21 16:54:28 2017
@@ -0,0 +1,67 @@
+#ifndef _gauss_pts_hpp_
+#define _gauss_pts_hpp_
+
+//@HEADER
+// ************************************************************************
+//
+// MiniFE: Simple Finite Element Assembly and Solve
+// Copyright (2006-2013) Sandia Corporation
+//
+// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
+// license for use of this work by or on behalf of the U.S. Government.
+//
+// This library is free software; you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as
+// published by the Free Software Foundation; either version 2.1 of the
+// License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+// USA
+//
+// ************************************************************************
+//@HEADER
+
+#ifndef KERNEL_PREFIX 
+#define KERNEL_PREFIX
+#endif
+
+namespace miniFE {
+
+template<typename Scalar>
+inline
+KERNEL_PREFIX void gauss_pts(int N, Scalar* pts, Scalar* wts)
+{
+  const Scalar x2 = 0.577350269; // 1.0/sqrt(3.0)
+  const Scalar x3 = 0.77459667; // sqrt(3.0/5.0)
+  const Scalar w1 = 0.55555556; // 5.0/9.0
+  const Scalar w2 = 0.88888889; // 8.0/9.0
+
+  switch(N) {
+  case 1:
+    pts[0] = 0.0; wts[0] = 2.0;
+    break;
+  case 2:
+    pts[0] = -x2; wts[0] = 1.0;
+    pts[1] = x2;  wts[1] = 1.0;
+    break;
+  case 3:
+    pts[0] =  -x3;  wts[0] = w1;
+    pts[1] =  0.0;  wts[1] = w2;
+    pts[2] =   x3;  wts[2] = w1;
+    break;
+  default:
+    break;
+  }
+}
+
+}//namespace miniFE
+
+#endif
+

Added: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/generate_matrix_structure.hpp
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C%2B%2B/miniFE/generate_matrix_structure.hpp?rev=311411&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/generate_matrix_structure.hpp (added)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/generate_matrix_structure.hpp Mon Aug 21 16:54:28 2017
@@ -0,0 +1,154 @@
+#ifndef _generate_matrix_structure_hpp_
+#define _generate_matrix_structure_hpp_
+
+//@HEADER
+// ************************************************************************
+//
+// MiniFE: Simple Finite Element Assembly and Solve
+// Copyright (2006-2013) Sandia Corporation
+//
+// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
+// license for use of this work by or on behalf of the U.S. Government.
+//
+// This library is free software; you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as
+// published by the Free Software Foundation; either version 2.1 of the
+// License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+// USA
+//
+// ************************************************************************
+//@HEADER
+
+#include <sstream>
+#include <stdexcept>
+#include <map>
+#include <algorithm>
+
+#include <simple_mesh_description.hpp>
+#include <SparseMatrix_functions.hpp>
+#include <box_utils.hpp>
+#include <utils.hpp>
+
+#ifdef HAVE_MPI
+#include <mpi.h>
+#endif
+
+namespace miniFE {
+
+template<typename MatrixType>
+int
+generate_matrix_structure(const simple_mesh_description<typename MatrixType::GlobalOrdinalType>& mesh,
+                          MatrixType& A)
+{
+  int myproc = 0;
+#ifdef HAVE_MPI
+  MPI_Comm_rank(MPI_COMM_WORLD, &myproc);
+#endif
+
+  int threw_exc = 0;
+  try {
+
+  typedef typename MatrixType::GlobalOrdinalType GlobalOrdinal;
+  typedef typename MatrixType::LocalOrdinalType LocalOrdinal;
+
+  int global_nodes_x = mesh.global_box[0][1]+1;
+  int global_nodes_y = mesh.global_box[1][1]+1;
+  int global_nodes_z = mesh.global_box[2][1]+1;
+  Box box;
+  copy_box(mesh.local_box, box);
+
+  //num-owned-nodes in each dimension is num-elems+1
+  //only if num-elems > 0 in that dimension *and*
+  //we are at the high end of the global range in that dimension:
+  if (box[0][1] > box[0][0] && box[0][1] == mesh.global_box[0][1]) ++box[0][1];
+  if (box[1][1] > box[1][0] && box[1][1] == mesh.global_box[1][1]) ++box[1][1];
+  if (box[2][1] > box[2][0] && box[2][1] == mesh.global_box[2][1]) ++box[2][1];
+
+  GlobalOrdinal global_nrows = global_nodes_x;
+  global_nrows *= global_nodes_y*global_nodes_z;
+
+  GlobalOrdinal nrows = get_num_ids<GlobalOrdinal>(box);
+  try {
+    A.reserve_space(nrows, 27);
+  }
+  catch(std::exception& exc) {
+    std::ostringstream osstr;
+    osstr << "One of A.rows.resize, A.row_offsets.resize, A.packed_cols.reserve or A.packed_coefs.reserve: nrows=" <<nrows<<": ";
+    osstr << exc.what();
+    std::string str1 = osstr.str();
+    throw std::runtime_error(str1);
+  }
+
+  std::vector<GlobalOrdinal> rows(nrows);
+  std::vector<LocalOrdinal> row_offsets(nrows+1);
+  std::vector<int> row_coords(nrows*3);
+
+  unsigned roffset = 0;
+  GlobalOrdinal nnz = 0;
+
+  for(int iz=box[2][0]; iz<box[2][1]; ++iz) {
+   for(int iy=box[1][0]; iy<box[1][1]; ++iy) {
+    for(int ix=box[0][0]; ix<box[0][1]; ++ix) {
+      GlobalOrdinal row_id =
+          get_id<GlobalOrdinal>(global_nodes_x, global_nodes_y, global_nodes_z,
+                                ix, iy, iz);
+      rows[roffset] = mesh.map_id_to_row(row_id);
+      row_coords[roffset*3] = ix;
+      row_coords[roffset*3+1] = iy;
+      row_coords[roffset*3+2] = iz;
+      row_offsets[roffset++] = nnz;
+
+      for(int sz=-1; sz<=1; ++sz) {
+       for(int sy=-1; sy<=1; ++sy) {
+        for(int sx=-1; sx<=1; ++sx) {
+          GlobalOrdinal col_id =
+              get_id<GlobalOrdinal>(global_nodes_x, global_nodes_y, global_nodes_z,
+                                   ix+sx, iy+sy, iz+sz);
+          if (col_id >= 0 && col_id < global_nrows) {
+            ++nnz;
+          }
+        }
+       }
+      }
+
+    }
+   }
+  }
+
+  if (roffset != nrows) {
+    throw std::runtime_error("ERROR in generate_matrix_structure, roffset != nrows.");
+  }
+  row_offsets[roffset] = nnz;
+
+  init_matrix(A, rows, row_offsets, row_coords,
+              global_nodes_x, global_nodes_y, global_nodes_z, global_nrows, mesh);
+  }
+  catch(...) {
+    std::cout << "proc " << myproc << " threw an exception in generate_matrix_structure, probably due to running out of memory." << std::endl;
+    threw_exc = 1;
+  }
+#ifdef HAVE_MPI
+  int global_throw = 0;
+  MPI_Allreduce(&threw_exc, &global_throw, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
+  threw_exc = global_throw;
+#endif
+  if (threw_exc) {
+    return 1;
+  }
+
+  return 0;
+}
+
+}//namespace miniFE
+
+#endif
+

Added: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/imbalance.hpp
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C%2B%2B/miniFE/imbalance.hpp?rev=311411&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/imbalance.hpp (added)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/imbalance.hpp Mon Aug 21 16:54:28 2017
@@ -0,0 +1,298 @@
+//@HEADER
+// ************************************************************************
+//
+// MiniFE: Simple Finite Element Assembly and Solve
+// Copyright (2006-2013) Sandia Corporation
+//
+// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
+// license for use of this work by or on behalf of the U.S. Government.
+//
+// This library is free software; you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as
+// published by the Free Software Foundation; either version 2.1 of the
+// License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+// USA
+//
+// ************************************************************************
+//@HEADER
+
+#ifndef _imbalance_hpp_
+#define _imbalance_hpp_
+
+#include <cmath>
+
+#ifdef HAVE_MPI
+#include <mpi.h>
+#endif
+
+#include <box_utils.hpp>
+#include <utils.hpp>
+#include <YAML_Doc.hpp>
+
+namespace miniFE {
+
+const int X = 0;
+const int Y = 1;
+const int Z = 2;
+const int NONE = 3;
+
+const int LOWER = 0;
+const int UPPER = 1;
+
+template<typename GlobalOrdinal>
+void
+compute_imbalance(const Box& global_box,
+                  const Box& local_box,
+                  float& largest_imbalance,
+                  float& std_dev,
+                  YAML_Doc& doc,
+                  bool record_in_doc)
+{
+  int numprocs = 1, myproc = 0;
+#ifdef HAVE_MPI
+  MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
+  MPI_Comm_rank(MPI_COMM_WORLD, &myproc);
+#endif
+
+  GlobalOrdinal local_nrows = get_num_ids<GlobalOrdinal>(local_box);
+  GlobalOrdinal min_nrows = 0, max_nrows = 0, global_nrows = 0;
+  int min_proc = myproc, max_proc = myproc;
+  get_global_min_max(local_nrows, global_nrows, min_nrows, min_proc,
+                     max_nrows, max_proc);
+
+  float avg_nrows = global_nrows;
+  avg_nrows /= numprocs;
+
+  //largest_imbalance will be the difference between the min (or max)
+  //rows-per-processor and avg_nrows, represented as a percentage:
+  largest_imbalance = percentage_difference<float>(min_nrows, avg_nrows);
+
+  float tmp = percentage_difference<float>(max_nrows, avg_nrows);
+  if (tmp > largest_imbalance) largest_imbalance = tmp;
+
+  std_dev = compute_std_dev_as_percentage<float>(local_nrows, avg_nrows);
+
+  if (myproc == 0 && record_in_doc) {
+    doc.add("Rows-per-proc Load Imbalance","");
+    doc.get("Rows-per-proc Load Imbalance")->add("Largest (from avg, %)",largest_imbalance);
+    doc.get("Rows-per-proc Load Imbalance")->add("Std Dev (%)",std_dev);
+  }
+}
+
+std::pair<int,int>
+decide_how_to_grow(const Box& global_box, const Box& local_box)
+{
+  std::pair<int,int> result(NONE,UPPER);
+
+  if (local_box[Z][UPPER] < global_box[Z][UPPER]) {
+    result.first = Z;
+    result.second = UPPER;
+    return result;
+  }
+  if (local_box[Z][LOWER] > global_box[Z][LOWER]) {
+    result.first = Z;
+    result.second = LOWER;
+    return result;
+  }
+  if (local_box[Y][UPPER] < global_box[Y][UPPER]) {
+    result.first = Y;
+    result.second = UPPER;
+    return result;
+  }
+  if (local_box[Y][LOWER] > global_box[Y][LOWER]) {
+    result.first = Y;
+    result.second = LOWER;
+    return result;
+  }
+  if (local_box[X][UPPER] < global_box[X][UPPER]) {
+    result.first = X;
+    result.second = UPPER;
+    return result;
+  }
+  if (local_box[X][LOWER] > global_box[X][LOWER]) {
+    result.first = X;
+    result.second = LOWER;
+    return result;
+  }
+  return result;
+}
+
+std::pair<int,int>
+decide_how_to_shrink(const Box& global_box, const Box& local_box)
+{
+  std::pair<int,int> result(NONE,UPPER);
+
+  if (local_box[Z][UPPER] < global_box[Z][UPPER] && local_box[Z][UPPER]-local_box[Z][LOWER] > 2) {
+    result.first = Z;
+    result.second = UPPER;
+    return result;
+  }
+  if (local_box[Z][LOWER] > global_box[Z][LOWER] && local_box[Z][UPPER]-local_box[Z][LOWER] > 2) {
+    result.first = Z;
+    result.second = LOWER;
+    return result;
+  }
+  if (local_box[Y][UPPER] < global_box[Y][UPPER] && local_box[Y][UPPER]-local_box[Y][LOWER] > 2) {
+    result.first = Y;
+    result.second = UPPER;
+    return result;
+  }
+  if (local_box[Y][LOWER] > global_box[Y][LOWER] && local_box[Y][UPPER]-local_box[Y][LOWER] > 2) {
+    result.first = Y;
+    result.second = LOWER;
+    return result;
+  }
+  if (local_box[X][UPPER] < global_box[X][UPPER] && local_box[X][UPPER]-local_box[X][LOWER] > 2) {
+    result.first = X;
+    result.second = UPPER;
+    return result;
+  }
+  if (local_box[X][LOWER] > global_box[X][LOWER] && local_box[X][UPPER]-local_box[X][LOWER] > 2) {
+    result.first = X;
+    result.second = LOWER;
+    return result;
+  }
+  return result;
+}
+
+template<typename GlobalOrdinal>
+void
+add_imbalance(const Box& global_box,
+              Box& local_box,
+              float imbalance,
+              YAML_Doc& doc)
+{
+  int numprocs = 1, myproc = 0;
+#ifdef HAVE_MPI
+  MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
+  MPI_Comm_rank(MPI_COMM_WORLD, &myproc);
+#endif
+
+  if (numprocs == 1) {
+    return;
+  }
+
+  float cur_imbalance = 0, cur_std_dev = 0;
+  compute_imbalance<GlobalOrdinal>(global_box, local_box,
+                                  cur_imbalance, cur_std_dev, doc, false);
+
+  while (cur_imbalance < imbalance) {
+    GlobalOrdinal local_nrows = get_num_ids<GlobalOrdinal>(local_box);
+    GlobalOrdinal min_nrows = 0, max_nrows = 0, global_nrows = 0;
+    int min_proc = myproc, max_proc = myproc;
+    get_global_min_max(local_nrows, global_nrows, min_nrows, min_proc,
+                       max_nrows, max_proc);
+
+    std::pair<int,int> grow(NONE,UPPER);
+    int grow_axis_val = -1;
+    std::pair<int,int> shrink(NONE,UPPER);
+    int shrink_axis_val = -1;
+
+    if (myproc == max_proc) {
+      grow = decide_how_to_grow(global_box, local_box);
+      if (grow.first != NONE) {
+        grow_axis_val = local_box[grow.first][grow.second];
+      }
+    }
+    if (myproc == min_proc) {
+      shrink = decide_how_to_shrink(global_box, local_box);
+      if (shrink.first != NONE) {
+        shrink_axis_val = local_box[shrink.first][shrink.second];
+      }
+    }
+
+    int grow_info[8] = {grow.first, grow.second,
+                        local_box[X][0], local_box[X][1],
+                        local_box[Y][0], local_box[Y][1],
+                        local_box[Z][0], local_box[Z][1]};
+
+    int shrink_info[8] = {shrink.first, shrink.second,
+                        local_box[X][0], local_box[X][1],
+                        local_box[Y][0], local_box[Y][1],
+                        local_box[Z][0], local_box[Z][1]};
+#ifdef HAVE_MPI
+    MPI_Bcast(&grow_info[0], 8, MPI_INT, max_proc, MPI_COMM_WORLD);
+    MPI_Bcast(&shrink_info[0], 8, MPI_INT, min_proc, MPI_COMM_WORLD);
+#endif
+
+    int grow_axis = grow_info[0];
+    int grow_end = grow_info[1];
+    int shrink_axis = shrink_info[0];
+    int shrink_end = shrink_info[1];
+    int grow_incr = 1;
+    if (grow_end == LOWER) grow_incr = -1;
+    int shrink_incr = -1;
+    if (shrink_end == LOWER) shrink_incr = 1;
+    if (grow_axis != NONE) grow_axis_val = grow_info[2+grow_axis*2+grow_end];
+    if (shrink_axis != NONE) shrink_axis_val = shrink_info[2+shrink_axis*2+shrink_end];
+
+    if (grow_axis == NONE && shrink_axis == NONE) break;
+
+    bool grow_status = grow_axis==NONE ? false : true;
+    if (grow_axis != NONE) {
+      if ((grow_incr ==  1 && local_box[grow_axis][0] == grow_axis_val) ||
+          (grow_incr == -1 && local_box[grow_axis][1] == grow_axis_val)) {
+        if (local_box[grow_axis][1] - local_box[grow_axis][0] < 2) {
+          grow_status = false;
+        }
+      }
+    }
+
+    bool shrink_status = shrink_axis==NONE ? false : true;
+    if (shrink_axis != NONE) {
+      if ((shrink_incr ==  1 && local_box[shrink_axis][0] == shrink_axis_val) ||
+          (shrink_incr == -1 && local_box[shrink_axis][1] == shrink_axis_val)) {
+        if (local_box[shrink_axis][1] - local_box[shrink_axis][0] < 2) {
+          shrink_status = false;
+        }
+      }
+    }
+
+#ifdef HAVE_MPI
+    int statusints[2] = { grow_status ? 0 : 1, shrink_status ? 0 : 1 };
+    int globalstatus[2] = { 0, 0 };
+    MPI_Allreduce(&statusints, &globalstatus, 2, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
+    grow_status = globalstatus[0]>0 ? false : true;
+    shrink_status = globalstatus[1]>0 ? false : true;
+#endif
+
+    if (grow_status == false && shrink_status == false) break;
+
+    if (grow_status && grow_axis != NONE) {
+      if (local_box[grow_axis][0] == grow_axis_val) {
+        local_box[grow_axis][0] += grow_incr;
+      }
+
+      if (local_box[grow_axis][1] == grow_axis_val) {
+        local_box[grow_axis][1] += grow_incr;
+      }
+    }
+
+    if (shrink_status && shrink_axis != NONE) {
+      if (local_box[shrink_axis][0] == shrink_axis_val) {
+        local_box[shrink_axis][0] += shrink_incr;
+      }
+
+      if (local_box[shrink_axis][1] == shrink_axis_val) {
+        local_box[shrink_axis][1] += shrink_incr;
+      }
+    }
+
+    compute_imbalance<GlobalOrdinal>(global_box, local_box,
+                                    cur_imbalance, cur_std_dev, doc, false);
+  }
+}
+
+}//namespace miniFE
+
+#endif
+

Added: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/main.cpp
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C%2B%2B/miniFE/main.cpp?rev=311411&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/main.cpp (added)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/main.cpp Mon Aug 21 16:54:28 2017
@@ -0,0 +1,248 @@
+
+//@HEADER
+// ************************************************************************
+//
+// MiniFE: Simple Finite Element Assembly and Solve
+// Copyright (2006-2013) Sandia Corporation
+//
+// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
+// license for use of this work by or on behalf of the U.S. Government.
+//
+// This library is free software; you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as
+// published by the Free Software Foundation; either version 2.1 of the
+// License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+// USA
+//
+// ************************************************************************
+//@HEADER
+
+#include <iostream>
+#include <ctime>
+#include <cstdlib>
+#include <vector>
+
+#ifdef MINIFE_REPORT_RUSAGE
+#include <sys/time.h>
+#include <sys/resource.h>
+#endif
+
+#include <miniFE_version.h>
+
+#include <outstream.hpp>
+
+#ifdef HAVE_MPI
+#include <mpi.h>
+#endif
+
+#include <Box.hpp>
+#include <BoxPartition.hpp>
+#include <box_utils.hpp>
+#include <Parameters.hpp>
+#include <utils.hpp>
+#include <driver.hpp>
+#include <YAML_Doc.hpp>
+
+#if MINIFE_INFO != 0
+#include <miniFE_info.hpp>
+#else
+#include <miniFE_no_info.hpp>
+#endif
+
+//The following macros should be specified as compile-macros in the
+//makefile. They are defaulted here just in case...
+#ifndef MINIFE_SCALAR
+#define MINIFE_SCALAR double
+#endif
+#ifndef MINIFE_LOCAL_ORDINAL
+#define MINIFE_LOCAL_ORDINAL int
+#endif
+#ifndef MINIFE_GLOBAL_ORDINAL
+#define MINIFE_GLOBAL_ORDINAL int
+#endif
+
+// ************************************************************************
+
+void add_params_to_yaml(YAML_Doc& doc, miniFE::Parameters& params);
+void add_configuration_to_yaml(YAML_Doc& doc, int numprocs, int numthreads);
+void add_timestring_to_yaml(YAML_Doc& doc);
+
+//
+//We will create a 'box' of size nx X ny X nz, partition it among processors,
+//then call miniFE::driver which will use the partitioned box as the domain
+//from which to assemble finite-element matrices into a global matrix and
+//vector, then solve the linear-system using Conjugate Gradients.
+//
+
+int main(int argc, char** argv) {
+  miniFE::Parameters params;
+  miniFE::get_parameters(argc, argv, params);
+
+  int numprocs = 1, myproc = 0;
+  miniFE::initialize_mpi(argc, argv, numprocs, myproc);
+
+#ifdef HAVE_MPI
+#ifdef USE_MPI_PCONTROL
+  MPI_Pcontrol(0);
+#endif
+#endif
+
+  miniFE::timer_type start_time = miniFE::mytimer();
+
+#ifdef MINIFE_DEBUG
+  outstream(numprocs, myproc);
+#endif
+
+  //make sure each processor has the same parameters:
+  miniFE::broadcast_parameters(params);
+
+
+  Box global_box = { 0, params.nx, 0, params.ny, 0, params.nz };
+  std::vector<Box> local_boxes(numprocs);
+
+  box_partition(0, numprocs, 2, global_box, &local_boxes[0]);
+
+  Box& my_box = local_boxes[myproc];
+
+  MINIFE_GLOBAL_ORDINAL num_my_ids = miniFE::get_num_ids<MINIFE_GLOBAL_ORDINAL>(my_box);
+  MINIFE_GLOBAL_ORDINAL min_ids = num_my_ids;
+
+#ifdef HAVE_MPI
+  MPI_Datatype mpi_dtype = miniFE::TypeTraits<MINIFE_GLOBAL_ORDINAL>::mpi_type();
+  MPI_Allreduce(&num_my_ids, &min_ids, 1, mpi_dtype, MPI_MIN, MPI_COMM_WORLD);
+#endif
+
+  if (min_ids == 0) {
+    std::cout<<"One or more processors have 0 equations. Not currently supported. Exiting."<<std::endl;
+
+    miniFE::finalize_mpi();
+
+    return 1;
+  }
+
+  std::ostringstream osstr;
+  osstr << "miniFE." << params.nx << "x" << params.ny << "x" << params.nz;
+#ifdef HAVE_MPI
+  osstr << ".P"<<numprocs;
+#endif
+  osstr << ".";
+  if (params.name != "") osstr << params.name << ".";
+
+  YAML_Doc doc("miniFE", MINIFE_VERSION, ".", osstr.str());
+  if (myproc == 0) {
+    add_params_to_yaml(doc, params);
+    add_configuration_to_yaml(doc, numprocs, params.numthreads);
+    add_timestring_to_yaml(doc);
+  }
+
+  //Most of the program is performed in the 'driver' function, which is
+  //templated on < Scalar, LocalOrdinal, GlobalOrdinal >.
+  //To run miniFE with float instead of double, or 'long long' instead of int,
+  //etc., change these template-parameters by changing the macro definitions in
+  //the makefile or on the make command-line.
+
+  int return_code =
+     miniFE::driver< MINIFE_SCALAR, MINIFE_LOCAL_ORDINAL, MINIFE_GLOBAL_ORDINAL>(global_box, my_box, params, doc);
+
+  miniFE::timer_type total_time = miniFE::mytimer() - start_time;
+
+#ifdef MINIFE_REPORT_RUSAGE
+   struct rusage get_mem;
+   getrusage(RUSAGE_SELF, &get_mem);
+
+   long long int rank_rss = get_mem.ru_maxrss;
+   long long int global_rss = 0;
+   long long int max_rss = 0;
+
+#ifdef HAVE_MPI
+   MPI_Reduce(&rank_rss, &global_rss, 1,
+       	MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_WORLD);
+   MPI_Reduce(&rank_rss, &max_rss, 1,
+       	MPI_LONG_LONG, MPI_MAX, 0, MPI_COMM_WORLD);
+   if (myproc == 0) {
+        doc.add("Global All-RSS (kB)", global_rss);
+       	doc.add("Global Max-RSS (kB)", max_rss);
+   }
+#else
+   doc.add("RSS (kB)", rank_rss);
+#endif
+#endif
+
+  if (myproc == 0) {
+    doc.add("Total Program Time",total_time);
+    doc.generateYAML();
+  }
+
+  miniFE::finalize_mpi();
+
+  return return_code;
+}
+
+void add_params_to_yaml(YAML_Doc& doc, miniFE::Parameters& params)
+{
+  doc.add("Global Run Parameters","");
+  doc.get("Global Run Parameters")->add("dimensions","");
+  doc.get("Global Run Parameters")->get("dimensions")->add("nx",params.nx);
+  doc.get("Global Run Parameters")->get("dimensions")->add("ny",params.ny);
+  doc.get("Global Run Parameters")->get("dimensions")->add("nz",params.nz);
+  doc.get("Global Run Parameters")->add("load_imbalance", params.load_imbalance);
+  if (params.mv_overlap_comm_comp == 1) {
+    std::string val("1 (yes)");
+    doc.get("Global Run Parameters")->add("mv_overlap_comm_comp", val);
+  }
+  else {
+    std::string val("0 (no)");
+    doc.get("Global Run Parameters")->add("mv_overlap_comm_comp", val);
+  }
+}
+
+void add_configuration_to_yaml(YAML_Doc& doc, int numprocs, int numthreads)
+{
+  doc.get("Global Run Parameters")->add("number of processors", numprocs);
+
+  doc.add("Platform","");
+  doc.get("Platform")->add("hostname",MINIFE_HOSTNAME);
+  doc.get("Platform")->add("kernel name",MINIFE_KERNEL_NAME);
+  doc.get("Platform")->add("kernel release",MINIFE_KERNEL_RELEASE);
+  doc.get("Platform")->add("processor",MINIFE_PROCESSOR);
+
+  doc.add("Build","");
+  doc.get("Build")->add("CXX",MINIFE_CXX);
+#if MINIFE_INFO != 0
+  doc.get("Build")->add("compiler version",MINIFE_CXX_VERSION);
+#endif
+  doc.get("Build")->add("CXXFLAGS",MINIFE_CXXFLAGS);
+  std::string using_mpi("no");
+#ifdef HAVE_MPI
+  using_mpi = "yes";
+#endif
+  doc.get("Build")->add("using MPI",using_mpi);
+}
+
+void add_timestring_to_yaml(YAML_Doc& doc)
+{
+  std::time_t rawtime;
+  struct tm * timeinfo;
+  std::time(&rawtime);
+  timeinfo = std::localtime(&rawtime);
+  std::ostringstream osstr;
+  osstr.fill('0');
+  osstr << timeinfo->tm_year+1900 << "-";
+  osstr.width(2); osstr << timeinfo->tm_mon+1 << "-";
+  osstr.width(2); osstr << timeinfo->tm_mday << ", ";
+  osstr.width(2); osstr << timeinfo->tm_hour << "-";
+  osstr.width(2); osstr << timeinfo->tm_min << "-";
+  osstr.width(2); osstr << timeinfo->tm_sec;
+  std::string timestring = osstr.str();
+  doc.add("Run Date/Time",timestring);
+}
+

Added: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/make_local_matrix.hpp
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C%2B%2B/miniFE/make_local_matrix.hpp?rev=311411&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/make_local_matrix.hpp (added)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/make_local_matrix.hpp Mon Aug 21 16:54:28 2017
@@ -0,0 +1,447 @@
+#ifndef _make_local_matrix_hpp_
+#define _make_local_matrix_hpp_
+#include <assert.h>
+
+//@HEADER
+// ************************************************************************
+//
+// MiniFE: Simple Finite Element Assembly and Solve
+// Copyright (2006-2013) Sandia Corporation
+//
+// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
+// license for use of this work by or on behalf of the U.S. Government.
+//
+// This library is free software; you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as
+// published by the Free Software Foundation; either version 2.1 of the
+// License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+// USA
+//
+// ************************************************************************
+//@HEADER
+
+#include <utils.hpp>
+
+#include <map>
+
+#ifdef HAVE_MPI
+#include <mpi.h>
+#endif
+
+namespace miniFE {
+
+template<typename MatrixType>
+void
+make_local_matrix(MatrixType& A)
+{
+#ifdef HAVE_MPI
+  int numprocs = 1, myproc = 0;
+  MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
+  MPI_Comm_rank(MPI_COMM_WORLD, &myproc);
+
+  if (numprocs < 2) {
+    A.num_cols = A.rows.size();
+    A.has_local_indices = true;
+    return;
+  }
+
+  typedef typename MatrixType::GlobalOrdinalType GlobalOrdinal;
+  typedef typename MatrixType::LocalOrdinalType LocalOrdinal;
+  typedef typename MatrixType::ScalarType Scalar;
+
+  std::map<GlobalOrdinal,GlobalOrdinal> externals;
+  LocalOrdinal num_external = 0;
+
+  //Extract Matrix pieces
+
+  size_t local_nrow = A.rows.size();
+  GlobalOrdinal start_row = local_nrow>0 ? A.rows[0] : -1;
+  GlobalOrdinal stop_row  = local_nrow>0 ? A.rows[local_nrow-1] : -1;
+
+  // We need to convert the index values for the rows on this processor
+  // to a local index space. We need to:
+  // - Determine if each index reaches to a local value or external value
+  // - If local, subtract start_row from index value to get local index
+  // - If external, find out if it is already accounted for.
+  //   - If so, then do nothing,
+  //   - otherwise
+  //     - add it to the list of external indices,
+  //     - find out which processor owns the value.
+  //     - Set up communication for sparse MV operation
+
+  ///////////////////////////////////////////
+  // Scan the indices and transform to local
+  ///////////////////////////////////////////
+
+  std::vector<GlobalOrdinal>& external_index = A.external_index;
+
+  for(size_t i=0; i<A.rows.size(); ++i) {
+    GlobalOrdinal* Acols = NULL;
+    Scalar* Acoefs = NULL;
+    size_t row_len = 0;
+    A.get_row_pointers(A.rows[i], row_len, Acols, Acoefs);
+
+    for(size_t j=0; j<row_len; ++j) {
+      GlobalOrdinal cur_ind = Acols[j];
+      if (start_row <= cur_ind && cur_ind <= stop_row) {
+        Acols[j] -= start_row;
+      }
+      else { // Must find out if we have already set up this point
+        if (externals.find(cur_ind) == externals.end()) {
+          externals[cur_ind] = num_external++;
+          external_index.push_back(cur_ind);
+        }
+        // Mark index as external by adding 1 and negating it
+        Acols[j] = -(Acols[j] + 1);
+      }
+    }
+  }
+
+  ////////////////////////////////////////////////////////////////////////
+  // Go through list of externals to find out which processors must be accessed.
+  ////////////////////////////////////////////////////////////////////////
+
+  std::vector<GlobalOrdinal> tmp_buffer(numprocs, 0); // Temp buffer space needed below
+
+  // Build list of global index offset
+
+  std::vector<GlobalOrdinal> global_index_offsets(numprocs, 0);
+
+  tmp_buffer[myproc] = start_row; // This is my start row
+
+  // This call sends the start_row of each ith processor to the ith
+  // entry of global_index_offsets on all processors.
+  // Thus, each processor knows the range of indices owned by all
+  // other processors.
+  // Note: There might be a better algorithm for doing this, but this
+  //       will work...
+
+  MPI_Datatype mpi_dtype = TypeTraits<GlobalOrdinal>::mpi_type();
+  MPI_Allreduce(&tmp_buffer[0], &global_index_offsets[0], numprocs, mpi_dtype,
+                MPI_SUM, MPI_COMM_WORLD);
+
+  // Go through list of externals and find the processor that owns each
+  std::vector<int> external_processor(num_external);
+
+  for(LocalOrdinal i=0; i<num_external; ++i) {
+    GlobalOrdinal cur_ind = external_index[i];
+    for(int j=numprocs-1; j>=0; --j) {
+      if (global_index_offsets[j] <= cur_ind && global_index_offsets[j] >= 0) {
+        external_processor[i] = j;
+        break;
+      }
+    }
+  }
+
+  /////////////////////////////////////////////////////////////////////////
+  // Sift through the external elements. For each newly encountered external
+  // point assign it the next index in the sequence. Then look for other
+  // external elements who are updated by the same node and assign them the next
+  // set of index numbers in the sequence (ie. elements updated by the same node
+  // have consecutive indices).
+  /////////////////////////////////////////////////////////////////////////
+
+  size_t count = local_nrow;
+  std::vector<GlobalOrdinal>& external_local_index = A.external_local_index;
+  external_local_index.assign(num_external, -1);
+
+  for(LocalOrdinal i=0; i<num_external; ++i) {
+    if (external_local_index[i] == -1) {
+      external_local_index[i] = count++;
+
+      for(LocalOrdinal j=i+1; j<num_external; ++j) {
+        if (external_processor[j] == external_processor[i])
+          external_local_index[j] = count++;
+      }
+    }
+  }
+
+  for(size_t i=0; i<local_nrow; ++i) {
+    GlobalOrdinal* Acols = NULL;
+    Scalar* Acoefs = NULL;
+    size_t row_len = 0;
+    A.get_row_pointers(A.rows[i], row_len, Acols, Acoefs);
+
+    for(size_t j=0; j<row_len; ++j) {
+      if (Acols[j] < 0) { // Change index values of externals
+        GlobalOrdinal cur_ind = -Acols[j] - 1;
+        Acols[j] = external_local_index[externals[cur_ind]];
+      }
+    }
+  }
+
+  std::vector<int> new_external_processor(num_external, 0);
+
+  for(int i=0; i<num_external; ++i) {
+    new_external_processor[external_local_index[i]-local_nrow] =
+      external_processor[i];
+  }
+
+  ////////////////////////////////////////////////////////////////////////
+  ///
+  // Count the number of neighbors from which we receive information to update
+  // our external elements. Additionally, fill the array tmp_neighbors in the
+  // following way:
+  //      tmp_neighbors[i] = 0   ==>  No external elements are updated by
+  //                              processor i.
+  //      tmp_neighbors[i] = x   ==>  (x-1)/numprocs elements are updated from
+  //                              processor i.
+  ///
+  ////////////////////////////////////////////////////////////////////////
+
+  std::vector<GlobalOrdinal> tmp_neighbors(numprocs, 0);
+
+  int num_recv_neighbors = 0;
+  int length             = 1;
+
+  for(LocalOrdinal i=0; i<num_external; ++i) {
+    if (tmp_neighbors[new_external_processor[i]] == 0) {
+      ++num_recv_neighbors;
+      tmp_neighbors[new_external_processor[i]] = 1;
+    }
+    tmp_neighbors[new_external_processor[i]] += numprocs;
+  }
+
+  /// sum over all processor all the tmp_neighbors arrays ///
+
+  MPI_Allreduce(&tmp_neighbors[0], &tmp_buffer[0], numprocs, mpi_dtype,
+                MPI_SUM, MPI_COMM_WORLD);
+
+  // decode the combined 'tmp_neighbors' (stored in tmp_buffer)
+  // array from all the processors
+
+  GlobalOrdinal num_send_neighbors = tmp_buffer[myproc] % numprocs;
+
+  /// decode 'tmp_buffer[myproc] to deduce total number of elements
+  //  we must send
+
+  GlobalOrdinal total_to_be_sent = (tmp_buffer[myproc] - num_send_neighbors) / numprocs;
+
+  ///////////////////////////////////////////////////////////////////////
+  ///
+  // Make a list of the neighbors that will send information to update our
+  // external elements (in the order that we will receive this information).
+  ///
+  ///////////////////////////////////////////////////////////////////////
+
+  std::vector<int> recv_list;
+  recv_list.push_back(new_external_processor[0]);
+  for(LocalOrdinal i=1; i<num_external; ++i) {
+    if (new_external_processor[i-1] != new_external_processor[i]) {
+      recv_list.push_back(new_external_processor[i]);
+    }
+  }
+
+  //
+  // Send a 0 length message to each of our recv neighbors
+  //
+
+  std::vector<int> send_list(num_send_neighbors, 0);
+
+  //
+  // first post receives, these are immediate receives
+  // Do not wait for result to come, will do that at the
+  // wait call below.
+  //
+  int MPI_MY_TAG = 99;
+
+  std::vector<MPI_Request> request(num_send_neighbors);
+  for(int i=0; i<num_send_neighbors; ++i) {
+    MPI_Irecv(&tmp_buffer[i], 1, mpi_dtype, MPI_ANY_SOURCE, MPI_MY_TAG,
+              MPI_COMM_WORLD, &request[i]);
+  }
+
+  // send messages
+
+  for(int i=0; i<num_recv_neighbors; ++i) {
+    MPI_Send(&tmp_buffer[i], 1, mpi_dtype, recv_list[i], MPI_MY_TAG,
+             MPI_COMM_WORLD);
+  }
+
+  ///
+  // Receive message from each send neighbor to construct 'send_list'.
+  ///
+
+  MPI_Status status;
+  for(int i=0; i<num_send_neighbors; ++i) {
+    if (MPI_Wait(&request[i], &status) != MPI_SUCCESS) {
+      std::cerr << "MPI_Wait error\n"<<std::endl;
+      MPI_Abort(MPI_COMM_WORLD, -1);
+    }
+    send_list[i] = status.MPI_SOURCE;
+  }
+
+  //////////////////////////////////////////////////////////////////////
+  ///
+  // Compare the two lists. In most cases they should be the same.
+  // However, if they are not then add new entries to the recv list
+  // that are in the send list (but not already in the recv list).
+  ///
+  //////////////////////////////////////////////////////////////////////
+
+  for(int j=0; j<num_send_neighbors; ++j) {
+    int found = 0;
+    for(int i=0; i<num_recv_neighbors; ++i) {
+      if (recv_list[i] == send_list[j]) found = 1;
+    }
+
+    if (found == 0) {
+      recv_list.push_back(send_list[j]);
+      ++num_recv_neighbors;
+    }
+  }
+
+  num_send_neighbors = num_recv_neighbors;
+  request.resize(num_send_neighbors);
+
+  A.elements_to_send.assign(total_to_be_sent, 0);
+  A.send_buffer.assign(total_to_be_sent, 0);
+
+  //
+  // Create 'new_external' which explicitly put the external elements in the
+  // order given by 'external_local_index'
+  //
+
+  std::vector<GlobalOrdinal> new_external(num_external);
+  for(LocalOrdinal i=0; i<num_external; ++i) {
+    new_external[external_local_index[i] - local_nrow] = external_index[i];
+  }
+
+  /////////////////////////////////////////////////////////////////////////
+  //
+  // Send each processor the global index list of the external elements in the
+  // order that I will want to receive them when updating my external elements.
+  //
+  /////////////////////////////////////////////////////////////////////////
+
+  std::vector<int> lengths(num_recv_neighbors);
+
+  ++MPI_MY_TAG;
+
+  // First post receives
+
+  for(int i=0; i<num_recv_neighbors; ++i) {
+    int partner = recv_list[i];
+    MPI_Irecv(&lengths[i], 1, MPI_INT, partner, MPI_MY_TAG, MPI_COMM_WORLD,
+              &request[i]);
+  }
+
+  std::vector<int>& neighbors = A.neighbors;
+  std::vector<int>& recv_length = A.recv_length;
+  std::vector<int>& send_length = A.send_length;
+
+  neighbors.resize(num_recv_neighbors, 0);
+  A.request.resize(num_recv_neighbors);
+  recv_length.resize(num_recv_neighbors, 0);
+  send_length.resize(num_recv_neighbors, 0);
+
+  LocalOrdinal j = 0;
+  for(int i=0; i<num_recv_neighbors; ++i) {
+    int start = j;
+    int newlength = 0;
+
+    //go through list of external elements until updating
+    //processor changes
+
+    while((j < num_external) &&
+          (new_external_processor[j] == recv_list[i])) {
+      ++newlength;
+      ++j;
+      if (j == num_external) break;
+    }
+
+    recv_length[i] = newlength;
+    neighbors[i] = recv_list[i];
+
+    length = j - start;
+    MPI_Send(&length, 1, MPI_INT, recv_list[i], MPI_MY_TAG, MPI_COMM_WORLD);
+  }
+
+  // Complete the receives of the number of externals
+
+  for(int i=0; i<num_recv_neighbors; ++i) {
+    if (MPI_Wait(&request[i], &status) != MPI_SUCCESS) {
+      std::cerr << "MPI_Wait error\n"<<std::endl;
+      MPI_Abort(MPI_COMM_WORLD, -1);
+    }
+    send_length[i] = lengths[i];
+  }
+
+  ////////////////////////////////////////////////////////////////////////
+  // Build "elements_to_send" list. These are the x elements I own
+  // that need to be sent to other processors.
+  ////////////////////////////////////////////////////////////////////////
+
+  ++MPI_MY_TAG;
+
+  j = 0;
+  for(int i=0; i<num_recv_neighbors; ++i) {
+    MPI_Irecv(&A.elements_to_send[j], send_length[i], mpi_dtype, neighbors[i],
+              MPI_MY_TAG, MPI_COMM_WORLD, &request[i]);
+    j += send_length[i];
+  }
+
+  j = 0;
+  for(int i=0; i<num_recv_neighbors; ++i) {
+    LocalOrdinal start = j;
+    LocalOrdinal newlength = 0;
+
+    // Go through list of external elements
+    // until updating processor changes. This is redundant, but
+    // saves us from recording this information.
+
+    while((j < num_external) &&
+          (new_external_processor[j] == recv_list[i])) {
+      ++newlength;
+      ++j;
+      if (j == num_external) break;
+    }
+    MPI_Send(&new_external[start], j-start, mpi_dtype, recv_list[i],
+             MPI_MY_TAG, MPI_COMM_WORLD);
+  }
+
+  // receive from each neighbor the global index list of external elements
+
+  for(int i=0; i<num_recv_neighbors; ++i) {
+    if (MPI_Wait(&request[i], &status) != MPI_SUCCESS) {
+      std::cerr << "MPI_Wait error\n"<<std::endl;
+      MPI_Abort(MPI_COMM_WORLD, -1);
+    }
+  }
+
+  /// replace global indices by local indices ///
+
+  for(GlobalOrdinal i=0; i<total_to_be_sent; ++i) {
+    A.elements_to_send[i] -= start_row;
+    if (A.elements_to_send[i] >= A.rows.size()) {
+      std::cout<<"start_row: "<<start_row<<", A.elements_to_send[i]: "<<A.elements_to_send[i]<<", A.rows.size(): "<<A.rows.size()<<std::endl;
+    assert(A.elements_to_send[i] < A.rows.size());
+    }
+  }
+
+  //////////////////
+  // Finish up !!
+  //////////////////
+
+  A.num_cols = local_nrow + num_external;
+
+#else
+  A.num_cols = A.rows.size();
+#endif
+
+  A.has_local_indices = true;
+}
+
+}//namespace miniFE
+
+#endif
+

Added: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/matrix_algebra_3x3.hpp
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C%2B%2B/miniFE/matrix_algebra_3x3.hpp?rev=311411&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/matrix_algebra_3x3.hpp (added)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/matrix_algebra_3x3.hpp Mon Aug 21 16:54:28 2017
@@ -0,0 +1,166 @@
+#ifndef _matrix_algebra_3x3_hpp_
+#define _matrix_algebra_3x3_hpp_
+
+//@HEADER
+// ************************************************************************
+//
+// MiniFE: Simple Finite Element Assembly and Solve
+// Copyright (2006-2013) Sandia Corporation
+//
+// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
+// license for use of this work by or on behalf of the U.S. Government.
+//
+// This library is free software; you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as
+// published by the Free Software Foundation; either version 2.1 of the
+// License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+// USA
+//
+// ************************************************************************
+//@HEADER
+
+#ifndef KERNEL_PREFIX
+#define KERNEL_PREFIX
+#endif
+
+namespace miniFE {
+
+template<typename Scalar>
+#ifdef __CUDACC__
+  __host__ __device__
+#endif
+KERNEL_PREFIX void fill(Scalar* begin, Scalar* end, const Scalar& val)
+{
+  while(begin != end) {*begin++ = val;}
+}
+
+template<typename Scalar>
+KERNEL_PREFIX void inverse_and_determinant3x3(const Scalar* J, Scalar* invJ, Scalar& detJ)
+{
+  //hardwired "3x3" in function-name allows us to assume
+  //that J and invJ have length 9:
+
+  Scalar J00 = J[0];
+  Scalar J01 = J[1];
+  Scalar J02 = J[2];
+
+  Scalar J10 = J[3];
+  Scalar J11 = J[4];
+  Scalar J12 = J[5];
+
+  Scalar J20 = J[6];
+  Scalar J21 = J[7];
+  Scalar J22 = J[8];
+
+  Scalar term0 = J22*J11 - J21*J12;
+  Scalar term1 = J22*J01 - J21*J02;
+  Scalar term2 = J12*J01 - J11*J02;
+
+  detJ = J00*term0 - J10*term1 + J20*term2;
+
+  Scalar inv_detJ = 1.0/detJ;
+
+  invJ[0] =  term0*inv_detJ;
+  invJ[1] = -term1*inv_detJ;
+  invJ[2] =  term2*inv_detJ;
+
+  invJ[3] = -(J22*J10 - J20*J12)*inv_detJ;
+  invJ[4] =  (J22*J00 - J20*J02)*inv_detJ;
+  invJ[5] = -(J12*J00 - J10*J02)*inv_detJ;
+
+  invJ[6] =  (J21*J10 - J20*J11)*inv_detJ;
+  invJ[7] = -(J21*J00 - J20*J01)*inv_detJ;
+  invJ[8] =  (J11*J00 - J10*J01)*inv_detJ;
+}
+
+template<typename Scalar>
+KERNEL_PREFIX void matmat3x3(const Scalar* A, const Scalar* B, Scalar* C)
+{
+  //hardwired "3x3" in function-name allows us to assume args have length 9:
+  //A,B,C are all assumed to be ordered such that columns are contiguous.
+
+  const Scalar zero = 0;
+  miniFE::fill(C, C+9, zero);
+
+  for(int i=0; i<3; ++i) {
+    for(int j=0; j<3; ++j) {
+      C[i+j*3] = A[i+0]*B[j*3+0]
+               + A[i+3]*B[j*3+1]
+               + A[i+6]*B[j*3+2];
+    }
+  }
+}
+
+template<typename Scalar>
+KERNEL_PREFIX Scalar determinant3x3(const Scalar* J)
+{
+  //hardwired "3x3" in function-name allows us to assume that J has length 9:
+
+  Scalar J00 = J[0];
+  Scalar J01 = J[1];
+  Scalar J02 = J[2];
+
+  Scalar J10 = J[3];
+  Scalar J11 = J[4];
+  Scalar J12 = J[5];
+
+  Scalar J20 = J[6];
+  Scalar J21 = J[7];
+  Scalar J22 = J[8];
+
+  Scalar term0 = J22*J11 - J21*J12;
+  Scalar term1 = J22*J01 - J21*J02;
+  Scalar term2 = J12*J01 - J11*J02;
+
+  Scalar detJ = J00*term0 - J10*term1 + J20*term2;
+
+  return detJ;
+}
+
+template<typename Scalar>
+KERNEL_PREFIX void matmat3x3_X_3xn(const Scalar* A, int n, const Scalar* B, Scalar* C)
+{
+  //A is 3x3, B is 3xn. So C is also 3xn.
+  //A,B,C are all assumed to be ordered such that columns are contiguous.
+
+  Scalar* Cj = C;
+  const Scalar* Bj = B;
+  for(int j=0; j<n; ++j) {
+    Cj[0] = A[0]*Bj[0] + A[3]*Bj[1] + A[6]*Bj[2];
+    Cj[1] = A[1]*Bj[0] + A[4]*Bj[1] + A[7]*Bj[2];
+    Cj[2] = A[2]*Bj[0] + A[5]*Bj[1] + A[8]*Bj[2];
+    Bj += 3;
+    Cj += 3;
+  }
+}
+
+template<typename Scalar>
+KERNEL_PREFIX void matTransMat3x3_X_3xn(const Scalar* A, int n, const Scalar* B, Scalar* C)
+{
+  //A is 3x3, B is 3xn. So C is also 3xn.
+  //A,B,C are all assumed to be ordered such that columns are contiguous.
+
+  Scalar* Cj = C;
+  const Scalar* Bj = B;
+  for(int j=0; j<n; ++j) {
+    Cj[0] = A[0]*Bj[0] + A[1]*Bj[1] + A[2]*Bj[2];
+    Cj[1] = A[3]*Bj[0] + A[4]*Bj[1] + A[5]*Bj[2];
+    Cj[2] = A[6]*Bj[0] + A[7]*Bj[1] + A[8]*Bj[2];
+    Bj += 3;
+    Cj += 3;
+  }
+}
+
+}//namespace miniFE
+
+#endif
+

Added: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/miniFE.reference_output
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C%2B%2B/miniFE/miniFE.reference_output?rev=311411&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/miniFE.reference_output (added)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/miniFE.reference_output Mon Aug 21 16:54:28 2017
@@ -0,0 +1,14 @@
+Starting CG solver ... 
+Initial Residual = 65.0072
+Iteration = 20   Residual = 0.0409638
+Iteration = 40   Residual = 0.0116308
+Iteration = 60   Residual = 0.00330043
+Iteration = 80   Residual = 0.000152231
+Iteration = 100   Residual = 8.66762e-06
+Iteration = 120   Residual = 2.50799e-07
+Iteration = 140   Residual = 3.39847e-09
+Iteration = 160   Residual = 6.88585e-11
+Iteration = 180   Residual = 5.97875e-13
+Iteration = 200   Residual = 4.15881e-15
+Final Resid Norm: 4.15881e-15
+exit 0

Added: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/miniFE_no_info.hpp
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C%2B%2B/miniFE/miniFE_no_info.hpp?rev=311411&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/miniFE_no_info.hpp (added)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/miniFE_no_info.hpp Mon Aug 21 16:54:28 2017
@@ -0,0 +1,39 @@
+//@HEADER
+// ************************************************************************
+//
+// MiniFE: Simple Finite Element Assembly and Solve
+// Copyright (2006-2013) Sandia Corporation
+//
+// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
+// license for use of this work by or on behalf of the U.S. Government.
+//
+// This library is free software; you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as
+// published by the Free Software Foundation; either version 2.1 of the
+// License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+// USA
+//
+// ************************************************************************
+//@HEADER
+
+#ifndef miniFE_no_info_hpp
+#define miniFE_no_info_hpp
+
+#define MINIFE_HOSTNAME "unknown"
+#define MINIFE_KERNEL_NAME "unknown"
+#define MINIFE_KERNEL_RELEASE "unknown"
+#define MINIFE_PROCESSOR "unknown"
+
+#define MINIFE_CXX "unknown"
+#define MINIFE_CXXFLAGS "unknown"
+
+#endif

Added: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/miniFE_version.h
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C%2B%2B/miniFE/miniFE_version.h?rev=311411&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/miniFE_version.h (added)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/miniFE_version.h Mon Aug 21 16:54:28 2017
@@ -0,0 +1,35 @@
+#ifndef _minife_version_h_
+#define _minife_version_h_
+
+//@HEADER
+// ************************************************************************
+// 
+//               miniFE: simple finite-element assembly and linear-solve
+//                 Copyright (2006) Sandia Corporation
+// 
+// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
+// license for use of this work by or on behalf of the U.S. Government.
+// 
+// This library is free software; you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as
+// published by the Free Software Foundation; either version 2.1 of the
+// License, or (at your option) any later version.
+//  
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// Lesser General Public License for more details.
+//  
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+// USA
+// Questions? Contact Michael A. Heroux (maherou at sandia.gov) 
+// 
+// ************************************************************************
+//@HEADER
+
+#define MINIFE_VERSION "2.0"
+
+#endif
+

Added: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/mytimer.cpp
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C%2B%2B/miniFE/mytimer.cpp?rev=311411&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/mytimer.cpp (added)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/mytimer.cpp Mon Aug 21 16:54:28 2017
@@ -0,0 +1,132 @@
+
+//@HEADER
+// ************************************************************************
+//
+// MiniFE: Simple Finite Element Assembly and Solve
+// Copyright (2006-2013) Sandia Corporation
+//
+// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
+// license for use of this work by or on behalf of the U.S. Government.
+//
+// This library is free software; you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as
+// published by the Free Software Foundation; either version 2.1 of the
+// License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+// USA
+//
+// ************************************************************************
+//@HEADER
+
+#include <cstddef>
+#include <cstdlib>
+#include <mytimer.hpp>
+
+#ifdef HAVE_MPI
+#include <mpi.h>
+#endif
+
+namespace miniFE {
+
+/////////////////////////////////////////////////////////////////////////
+
+// Function to return time in seconds.
+// If compiled with no flags, return CPU time (user and system).
+// If compiled with -DWALL, returns elapsed time.
+
+/////////////////////////////////////////////////////////////////////////
+
+#if defined(HAVE_MPI) && defined(USE_MPI_WTIME)
+
+timer_type mytimer()
+{
+  return((timer_type) MPI_Wtime());
+}
+
+
+#elif defined(UseClock)
+
+#include <time.hpp>
+timer_type mytimer(void)
+{
+   clock_t t1;
+   static clock_t t0=0;
+   static timer_type CPS = CLOCKS_PER_SEC;
+   timer_type d;
+
+   if (t0 == 0) t0 = clock();
+   t1 = clock() - t0;
+   d = t1 / CPS;
+   return(d);
+}
+
+#elif defined(WALL)
+
+#include <cstdlib>
+#include <sys/time.h>
+#include <sys/resource.h>
+timer_type mytimer(void)
+{
+   struct timeval tp;
+   static long start=0, startu;
+   if (!start)
+   {
+      gettimeofday(&tp, NULL);
+      start = tp.tv_sec;
+      startu = tp.tv_usec;
+      return(0.0);
+   }
+   gettimeofday(&tp, NULL);
+   return( ((timer_type) (tp.tv_sec - start)) + (tp.tv_usec-startu)/1000000.0 );
+}
+
+#elif defined(UseTimes)
+
+#include <cstdlib>
+#include <sys/times.h>
+#include <unistd.h>
+timer_type mytimer(void)
+{
+   struct tms ts;
+   static timer_type ClockTick=0.0;
+
+   if (ClockTick == 0.0) ClockTick = (timer_type) sysconf(_SC_CLK_TCK);
+   times(&ts);
+   return( (timer_type) ts.tms_utime / ClockTick );
+}
+
+#else
+
+#include <cstdlib>
+#include <sys/time.h>
+#include <sys/resource.h>
+timer_type mytimer(void)
+{
+//This function now uses gettimeofday instead of getrusage. See note below.
+//
+  struct timeval tv;
+  struct timezone tz;
+  gettimeofday(&tv, &tz);
+  return ( (timer_type)tv.tv_sec + tv.tv_usec/1000000.0 );
+
+//The below use of 'getrusage' is not used because it doesn't do the right thing
+//for the case of using threads. It adds up the time spent in multiple threads,
+//rather than giving elapsed time.
+//
+//   struct rusage ruse;
+//   getrusage(RUSAGE_SELF, &ruse);
+//   return( (timer_type)(ruse.ru_utime.tv_sec+ruse.ru_utime.tv_usec / 1000000.0) );
+}
+
+#endif
+
+}//namespace miniFE
+

Added: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/mytimer.hpp
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C%2B%2B/miniFE/mytimer.hpp?rev=311411&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/mytimer.hpp (added)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/mytimer.hpp Mon Aug 21 16:54:28 2017
@@ -0,0 +1,52 @@
+#ifndef _mytimer_hpp_
+#define _mytimer_hpp_
+
+//@HEADER
+// ************************************************************************
+//
+// MiniFE: Simple Finite Element Assembly and Solve
+// Copyright (2006-2013) Sandia Corporation
+//
+// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
+// license for use of this work by or on behalf of the U.S. Government.
+//
+// This library is free software; you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as
+// published by the Free Software Foundation; either version 2.1 of the
+// License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+// USA
+//
+// ************************************************************************
+//@HEADER
+
+namespace miniFE {
+
+typedef double timer_type;
+
+timer_type mytimer();
+
+enum CG_TIMES {
+  WAXPY = 0,
+  DOT = 1,
+  MATVEC = 2,
+  MATVECDOT = 3,
+  TOTAL = 4,
+  NUM_TIMERS = 5
+};
+
+//Use TICK and TOCK to time a code section
+#define TICK() t0 = mytimer();
+#define TOCK(t) t += mytimer() - t0;
+
+}//namespace miniFE
+
+#endif

Added: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/outstream.hpp
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C%2B%2B/miniFE/outstream.hpp?rev=311411&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/outstream.hpp (added)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/outstream.hpp Mon Aug 21 16:54:28 2017
@@ -0,0 +1,45 @@
+//@HEADER
+// ************************************************************************
+//
+// MiniFE: Simple Finite Element Assembly and Solve
+// Copyright (2006-2013) Sandia Corporation
+//
+// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
+// license for use of this work by or on behalf of the U.S. Government.
+//
+// This library is free software; you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as
+// published by the Free Software Foundation; either version 2.1 of the
+// License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+// USA
+//
+// ************************************************************************
+//@HEADER
+
+#ifndef _outstream_hpp_
+#define _outstream_hpp_
+
+#include <fstream>
+#include <sstream>
+
+inline
+std::ostream& outstream(int np=1, int p=0)
+{
+  static bool first = true;
+  static std::ostringstream oss;
+  if (first) oss << "minife_debug."<<np<<"."<<p;
+  static std::ofstream ofs(oss.str().c_str(), std::ios::out);
+  first = false;
+  return ofs;
+}
+
+#endif

Added: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/param_utils.cpp
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C%2B%2B/miniFE/param_utils.cpp?rev=311411&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/param_utils.cpp (added)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/param_utils.cpp Mon Aug 21 16:54:28 2017
@@ -0,0 +1,58 @@
+//@HEADER
+// ************************************************************************
+//
+// MiniFE: Simple Finite Element Assembly and Solve
+// Copyright (2006-2013) Sandia Corporation
+//
+// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
+// license for use of this work by or on behalf of the U.S. Government.
+//
+// This library is free software; you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as
+// published by the Free Software Foundation; either version 2.1 of the
+// License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+// USA
+//
+// ************************************************************************
+//@HEADER
+
+#include <param_utils.hpp>
+
+#include <sstream>
+#include <fstream>
+
+namespace Mantevo {
+
+//-------------------------------------------------------------
+void read_args_into_string(int argc, char** argv, std::string& arg_string)
+{
+  arg_string = argv[0];
+  for(int i=1; i<argc; ++i) {
+    arg_string += " " + std::string(argv[i]);
+  }
+}
+
+//-------------------------------------------------------------
+void read_file_into_string(const std::string& filename,
+                           std::string& file_contents)
+{
+  file_contents.clear();
+  std::ifstream ifs(filename.c_str());
+  char line[256];
+  while(!ifs.eof()) {
+    ifs.getline(line, 256);
+    file_contents += " " + std::string(line);
+  }
+}
+
+}//namespace Mantevo
+

Added: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/param_utils.hpp
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C%2B%2B/miniFE/param_utils.hpp?rev=311411&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/param_utils.hpp (added)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/param_utils.hpp Mon Aug 21 16:54:28 2017
@@ -0,0 +1,160 @@
+#ifndef _param_utils_hpp_
+#define _param_utils_hpp_
+
+//@HEADER
+// ************************************************************************
+//
+// MiniFE: Simple Finite Element Assembly and Solve
+// Copyright (2006-2013) Sandia Corporation
+//
+// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
+// license for use of this work by or on behalf of the U.S. Government.
+//
+// This library is free software; you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as
+// published by the Free Software Foundation; either version 2.1 of the
+// License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+// USA
+//
+// ************************************************************************
+//@HEADER
+
+#include <string>
+#include <sstream>
+
+//Parameter-parsing Utilities:
+//
+//The functions declared below are intended to assist with parsing
+//input-parameters which may be command-line arguments and/or lines in a
+//text file.
+//
+// Scenario: You want your program to accept parameters that are specified
+// as command-line arguments and/or as lines in a text file (such
+// as a YAML output file). i.e., your program can be run like this:
+// % program.exe foo=3.14159 bar: 42
+// or
+// % program.exe input_file=params.txt
+// or
+// % program.exe foo=3.14159 input_file = params.txt
+//
+//Example:
+// Here is example code to obtain parameters using the 3 functions
+// 'read_args_into_string', 'read_file_into_string' and 'parse_parameter':
+//
+//   std::string arg_string;
+//
+//   //put command-line-arguments into 'arg_string':
+//   read_args_into_string(argc, argv, arg_string);
+//
+//   //do the command-line-arguments specify an 'input_file'?
+//   std::string filename =
+//      parse_parameter<std::string>(arg_string,"input_file","none-specified");
+//
+//   if (filename != "none-specified") {
+//     std::string tmp;
+//     read_file_into_string(filename, tmp);
+//     arg_string += tmp;
+//   }
+//
+//  //now parse the parameters:
+//  float foo = parse_parameter<float>(arg_string, "foo", -9.9);
+//  int bar   = parse_parameter<int>(arg_string, "bar", -1);
+//
+//See the comments below for parse_parameter, for formatting requirements of
+//named parameter-value pairs.
+//
+
+namespace Mantevo {
+
+/**
+ * Concatenate command-line arguments into a single string.
+ *
+ * Note: this function is purely serial. If argc and argv have different
+ * values on different MPI processes, then you need to resolve that by
+ * broadcasting arg_string's contents.
+ */
+void read_args_into_string(int argc, char** argv, std::string& arg_string);
+
+/**
+ * Read the contents of a text-file into a single string.
+ *
+ * Note: this function is purely serial. If you want file_contents on multiple
+ * MPI processes, you need to broadcast it (or call this function on each
+ * MPI process...).
+ */
+void read_file_into_string(const std::string& filename,
+                           std::string& file_contents);
+
+/**
+ * Parse a named parameter value from input 'arg_string'.
+ *
+ * Search 'arg_string' for an occurrence of param_name and attempt to parse
+ * a value into the return-type. If param_name is not found, then default_value
+ * is returned.
+ *
+ * Example:
+ * arg_string = "foo = 3.14159";
+ * float foo = parse_parameter<float>(arg_string, "foo", -999.9);
+ * //foo should now contain the value 3.14159; if 'foo' was not found in
+ * //arg_string, then -999.9 would have been returned.
+ *
+ * Other legal name-value separators are ':' and ' '. Extra spaces are also ok,
+ * e.g. "foo : 3.114159".
+ *
+ * Note that if a YAML file is read into a string, that would be a valid input
+ * string for this function.
+ */
+template<typename T>
+T parse_parameter(const std::string& arg_string,
+                const std::string& param_name,
+                const T& default_value)
+{
+  std::string::size_type pos = arg_string.find(param_name);
+  if (pos == std::string::npos) {
+    //if param_name is not found in arg_string, return default_value:
+    return default_value;
+  }
+
+  pos += param_name.size();
+
+  if (arg_string.size() <= pos) return default_value;
+
+  //skip past ' ', '=' or ':':
+  while(pos < arg_string.size() &&
+        (arg_string[pos] == ' ' ||
+         arg_string[pos] == '=' ||
+         arg_string[pos] == ':'))
+  {
+    ++pos;
+  }
+
+  if (arg_string[pos] == '=' || arg_string[pos] == ':') ++pos;
+
+  std::string str = arg_string.substr(pos);
+
+  std::istringstream isstr(str);
+
+  T return_val = default_value;
+
+  //parse value into return_val:
+  isstr >> return_val;
+
+  //if parse failed, return default_value:
+  if (!isstr) return default_value;
+
+  return return_val;
+}
+
+}//namespace Mantevo
+
+#endif
+

Added: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/perform_element_loop.hpp
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C%2B%2B/miniFE/perform_element_loop.hpp?rev=311411&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/perform_element_loop.hpp (added)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/perform_element_loop.hpp Mon Aug 21 16:54:28 2017
@@ -0,0 +1,109 @@
+#ifndef _perform_element_loop_hpp_
+#define _perform_element_loop_hpp_
+
+//@HEADER
+// ************************************************************************
+//
+// MiniFE: Simple Finite Element Assembly and Solve
+// Copyright (2006-2013) Sandia Corporation
+//
+// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
+// license for use of this work by or on behalf of the U.S. Government.
+//
+// This library is free software; you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as
+// published by the Free Software Foundation; either version 2.1 of the
+// License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+// USA
+//
+// ************************************************************************
+//@HEADER
+
+#include <BoxIterator.hpp>
+#include <simple_mesh_description.hpp>
+#include <SparseMatrix_functions.hpp>
+#include <box_utils.hpp>
+#include <Hex8_box_utils.hpp>
+#include <Hex8_ElemData.hpp>
+
+namespace miniFE {
+
+template<typename GlobalOrdinal,
+         typename MatrixType, typename VectorType>
+void
+perform_element_loop(const simple_mesh_description<GlobalOrdinal>& mesh,
+                     const Box& local_elem_box,
+                     MatrixType& A, VectorType& b,
+                     Parameters& /*params*/)
+{
+  typedef typename MatrixType::ScalarType Scalar;
+
+  int global_elems_x = mesh.global_box[0][1];
+  int global_elems_y = mesh.global_box[1][1];
+  int global_elems_z = mesh.global_box[2][1];
+
+  //We will iterate the local-element-box (local portion of the mesh), and
+  //get element-IDs in preparation for later assembling the FE operators
+  //into the global sparse linear-system.
+
+  GlobalOrdinal num_elems = get_num_ids<GlobalOrdinal>(local_elem_box);
+  std::vector<GlobalOrdinal> elemIDs(num_elems);
+
+  BoxIterator iter = BoxIterator::begin(local_elem_box);
+  BoxIterator end  = BoxIterator::end(local_elem_box);
+
+  for(size_t i=0; iter != end; ++iter, ++i) {
+    elemIDs[i] = get_id<GlobalOrdinal>(global_elems_x, global_elems_y, global_elems_z,
+                                       iter.x, iter.y, iter.z);
+//#ifdef MINIFE_DEBUG
+//std::cout << "elem ID " << elemIDs[i] << " ("<<iter.x<<","<<iter.y<<","<<iter.z<<")"<<std::endl;
+//#endif
+  }
+
+  //Now do the actual finite-element assembly loop:
+
+  ElemData<GlobalOrdinal,Scalar> elem_data;
+
+  compute_gradient_values(elem_data.grad_vals);
+
+  timer_type t_gn = 0, t_ce = 0, t_si = 0;
+  timer_type t0 = 0;
+  for(size_t i=0; i<elemIDs.size(); ++i) {
+    //Given an element-id, populate elem_data with the
+    //element's node_ids and nodal-coords:
+
+    TICK();
+    get_elem_nodes_and_coords(mesh, elemIDs[i], elem_data);
+    TOCK(t_gn);
+
+    //Next compute element-diffusion-matrix and element-source-vector:
+
+    TICK();
+    compute_element_matrix_and_vector(elem_data);
+    TOCK(t_ce);
+
+    //Now assemble the (dense) element-matrix and element-vector into the
+    //global sparse linear system:
+
+    TICK();
+    sum_into_global_linear_system(elem_data, A, b);
+    TOCK(t_si);
+  }
+//std::cout << std::endl<<"get-nodes: " << t_gn << std::endl;
+//std::cout << "compute-elems: " << t_ce << std::endl;
+//std::cout << "sum-in: " << t_si << std::endl;
+}
+
+}//namespace miniFE
+
+#endif
+

Added: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/simple_mesh_description.hpp
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C%2B%2B/miniFE/simple_mesh_description.hpp?rev=311411&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/simple_mesh_description.hpp (added)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/simple_mesh_description.hpp Mon Aug 21 16:54:28 2017
@@ -0,0 +1,248 @@
+
+#ifndef _simple_mesh_description_hpp_
+#define _simple_mesh_description_hpp_
+
+//@HEADER
+// ************************************************************************
+//
+// MiniFE: Simple Finite Element Assembly and Solve
+// Copyright (2006-2013) Sandia Corporation
+//
+// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
+// license for use of this work by or on behalf of the U.S. Government.
+//
+// This library is free software; you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as
+// published by the Free Software Foundation; either version 2.1 of the
+// License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+// USA
+//
+// ************************************************************************
+//@HEADER
+
+#include <utils.hpp>
+#include <set>
+#include <map>
+
+namespace miniFE {
+
+template<typename GlobalOrdinal>
+class simple_mesh_description {
+public:
+  simple_mesh_description(const Box& global_box_in, const Box& local_box_in)
+  {
+   Box local_node_box;
+    for(int i=0; i<3; ++i) {
+      global_box[i][0] = global_box_in[i][0];
+      global_box[i][1] = global_box_in[i][1];
+      local_box[i][0] = local_box_in[i][0];
+      local_box[i][1] = local_box_in[i][1];
+      local_node_box[i][0] = local_box_in[i][0];
+      local_node_box[i][1] = local_box_in[i][1];
+      //num-owned-nodes == num-elems+1 in this dimension if the elem box is not empty
+      //and we are at the high end of the global range in that dimension:
+      if (local_box_in[i][1] > local_box_in[i][0] && local_box_in[i][1] == global_box[i][1]) local_node_box[i][1] += 1;
+    }
+
+    int max_node_x = global_box[0][1]+1;
+    int max_node_y = global_box[1][1]+1;
+    int max_node_z = global_box[2][1]+1;
+    create_map_id_to_row(max_node_x, max_node_y, max_node_z, local_node_box,
+                         map_ids_to_rows);
+
+    //As described in analytic_soln.hpp,
+    //we will impose a 0 boundary-condition on faces x=0, y=0, z=0, y=1, z=1
+    //we will impose a 1 boundary-condition on face x=1
+
+#ifdef MINIFE_DEBUG
+std::cout<<std::endl;
+#endif
+    const int X=0;
+    const int Y=1;
+    const int Z=2;
+
+    const int x1 = max_node_x - 1;
+    const int y1 = max_node_y - 1;
+    const int z1 = max_node_z - 1;
+
+    //if we're on the x=0 face:
+    if (global_box[X][0] == local_box[X][0]) {
+      int miny = local_node_box[Y][0];
+      int minz = local_node_box[Z][0];
+      int maxy = local_node_box[Y][1];
+      int maxz = local_node_box[Z][1];
+      //expand y and z dimensions to include ghost layer
+      if (local_node_box[Y][0] > 0) --miny;
+      if (local_node_box[Z][0] > 0) --minz;
+      if (local_node_box[Y][1] < max_node_y) ++maxy;
+      if (local_node_box[Z][1] < max_node_z) ++maxz;
+
+      for(int iz=minz; iz<maxz; ++iz) {
+        for(int iy=miny; iy<maxy; ++iy) {
+          GlobalOrdinal nodeID = get_id<GlobalOrdinal>(max_node_x, max_node_y, max_node_z,
+             0, iy, iz);
+#ifdef MINIFE_DEBUG
+std::cout<<"x=0 BC, node "<<nodeID<<", (0,"<<iy<<","<<iz<<")"<<std::endl;
+#endif
+          bc_rows_0.insert(map_id_to_row(nodeID));
+        }
+      }
+    }
+
+    //if we're on the y=0 face:
+    if (global_box[Y][0] == local_box[Y][0]) {
+      int minx = local_node_box[X][0];
+      int minz = local_node_box[Z][0];
+      int maxx = local_node_box[X][1];
+      int maxz = local_node_box[Z][1];
+      //expand x and z dimensions to include ghost layer
+      if (local_node_box[X][0] > 0) --minx;
+      if (local_node_box[Z][0] > 0) --minz;
+      if (local_node_box[X][1] < max_node_x) ++maxx;
+      if (local_node_box[Z][1] < max_node_z) ++maxz;
+
+      for(int iz=minz; iz<maxz; ++iz) {
+        for(int ix=minx; ix<maxx; ++ix) {
+          GlobalOrdinal nodeID = get_id<GlobalOrdinal>(max_node_x, max_node_y, max_node_z,
+             ix, 0, iz);
+#ifdef MINIFE_DEBUG
+std::cout<<"y=0 BC, node "<<nodeID<<", ("<<ix<<",0,"<<iz<<")"<<std::endl;
+#endif
+          GlobalOrdinal row = map_id_to_row(nodeID);
+          if (row < 0) {
+            std::cout<<"on the y==0 face (ix="<<ix<<", iz="<<iz<<"), ERROR: found negative row ("<<row<<") for nodeID="<<nodeID<<std::endl;
+          }
+          bc_rows_0.insert(row);
+        }
+      }
+    }
+
+    //if we're on the z=0 face:
+    if (global_box[Z][0] == local_box[Z][0]) {
+      int minx = local_node_box[X][0];
+      int miny = local_node_box[Y][0];
+      int maxx = local_node_box[X][1];
+      int maxy = local_node_box[Y][1];
+      //expand x and y dimensions to include ghost layer
+      if (local_node_box[X][0] > 0) --minx;
+      if (local_node_box[Y][0] > 0) --miny;
+      if (local_node_box[X][1] < max_node_x) ++maxx;
+      if (local_node_box[Y][1] < max_node_y) ++maxy;
+
+      for(int iy=miny; iy<maxy; ++iy) {
+        for(int ix=minx; ix<maxx; ++ix) {
+          GlobalOrdinal nodeID = get_id<GlobalOrdinal>(max_node_x, max_node_y, max_node_z,
+             ix, iy, 0);
+#ifdef MINIFE_DEBUG
+std::cout<<"z=0 BC, node "<<nodeID<<", ("<<ix<<","<<iy<<",0)"<<std::endl;
+#endif
+          bc_rows_0.insert(map_id_to_row(nodeID));
+        }
+      }
+    }
+
+    //if we're on the x=1 face:
+    if (global_box[X][1] == local_box[X][1]) {
+      int minz = local_node_box[Z][0];
+      int miny = local_node_box[Y][0];
+      int maxz = local_node_box[Z][1];
+      int maxy = local_node_box[Y][1];
+      //expand z and y dimensions to include ghost layer
+      if (local_node_box[Z][0] > 0) --minz;
+      if (local_node_box[Y][0] > 0) --miny;
+      if (local_node_box[Z][1] < max_node_z) ++maxz;
+      if (local_node_box[Y][1] < max_node_y) ++maxy;
+
+      for(int iy=miny; iy<maxy; ++iy) {
+        for(int iz=minz; iz<maxz; ++iz) {
+          GlobalOrdinal nodeID = get_id<GlobalOrdinal>(max_node_x, max_node_y, max_node_z,
+             x1, iy, iz);
+          GlobalOrdinal row = map_id_to_row(nodeID);
+#ifdef MINIFE_DEBUG
+std::cout<<"x=1 BC, node "<<nodeID<<", row "<<row<<", ("<<x1<<","<<iy<<","<<iz<<")"<<std::endl;
+#endif
+          bc_rows_1.insert(row);
+        }
+      }
+    }
+
+    //if we're on the y=1 face:
+    if (global_box[Y][1] == local_box[Y][1]) {
+      int minz = local_node_box[Z][0];
+      int minx = local_node_box[X][0];
+      int maxz = local_node_box[Z][1];
+      int maxx = local_node_box[X][1];
+      //expand z and x dimensions to include ghost layer
+      if (local_node_box[Z][0] > 0) --minz;
+      if (local_node_box[X][0] > 0) --minx;
+      if (local_node_box[Z][1] < max_node_z) ++maxz;
+      if (local_node_box[X][1] < max_node_x) ++maxx;
+
+      for(int ix=minx; ix<maxx; ++ix) {
+        for(int iz=minz; iz<maxz; ++iz) {
+          GlobalOrdinal nodeID = get_id<GlobalOrdinal>(max_node_x, max_node_y, max_node_z,
+             ix, y1, iz);
+#ifdef MINIFE_DEBUG
+std::cout<<"y=1 BC, node "<<nodeID<<", ("<<ix<<","<<y1<<","<<iz<<")"<<std::endl;
+#endif
+          bc_rows_0.insert(map_id_to_row(nodeID));
+        }
+      }
+    }
+
+    //if we're on the z=1 face:
+    if (global_box[Z][1] == local_box[Z][1]) {
+      int miny = local_node_box[Y][0];
+      int minx = local_node_box[X][0];
+      int maxy = local_node_box[Y][1];
+      int maxx = local_node_box[X][1];
+      //expand x and y dimensions to include ghost layer
+      if (local_node_box[Y][0] > 0) --miny;
+      if (local_node_box[X][0] > 0) --minx;
+      if (local_node_box[Y][1] < max_node_y) ++maxy;
+      if (local_node_box[X][1] < max_node_x) ++maxx;
+
+      for(int ix=minx; ix<maxx; ++ix) {
+        for(int iy=miny; iy<maxy; ++iy) {
+          GlobalOrdinal nodeID = get_id<GlobalOrdinal>(max_node_x, max_node_y, max_node_z,
+             ix, iy, z1);
+#ifdef MINIFE_DEBUG
+std::cout<<"z=1 BC, node "<<nodeID<<", ("<<ix<<","<<iy<<","<<z1<<")"<<std::endl;
+#endif
+          bc_rows_0.insert(map_id_to_row(nodeID));
+        }
+      }
+    }
+
+  }
+
+  GlobalOrdinal map_id_to_row(const GlobalOrdinal& id) const
+  {
+    return find_row_for_id(id, map_ids_to_rows);
+  }
+
+  GlobalOrdinal max_row_in_map() const {
+    if (map_ids_to_rows.empty()) return 0;
+    typename std::map<GlobalOrdinal,GlobalOrdinal>::const_iterator mend = map_ids_to_rows.end();
+    --mend;
+    return mend->second;
+  }
+  std::set<GlobalOrdinal> bc_rows_0;
+  std::set<GlobalOrdinal> bc_rows_1;
+  std::map<GlobalOrdinal,GlobalOrdinal> map_ids_to_rows;
+  Box global_box;
+  Box local_box;
+};//class simple_mesh_description
+
+}//namespace miniFE
+
+#endif

Added: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/time_kernels.hpp
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C%2B%2B/miniFE/time_kernels.hpp?rev=311411&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/time_kernels.hpp (added)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/time_kernels.hpp Mon Aug 21 16:54:28 2017
@@ -0,0 +1,139 @@
+#ifndef _time_kernels_hpp_
+#define _time_kernels_hpp_
+
+//@HEADER
+// ************************************************************************
+//
+// MiniFE: Simple Finite Element Assembly and Solve
+// Copyright (2006-2013) Sandia Corporation
+//
+// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
+// license for use of this work by or on behalf of the U.S. Government.
+//
+// This library is free software; you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as
+// published by the Free Software Foundation; either version 2.1 of the
+// License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+// USA
+//
+// ************************************************************************
+//@HEADER
+
+#include <cmath>
+
+#include <Vector_functions.hpp>
+#include <mytimer.hpp>
+
+#ifdef MINIFE_HAVE_CUDA
+#include <cuda.h>
+#endif
+
+namespace miniFE {
+
+template<typename OperatorType,
+         typename VectorType,
+         typename Matvec>
+void
+time_kernels(OperatorType& A,
+             const VectorType& b,
+             VectorType& x,
+             Matvec matvec,
+             typename OperatorType::LocalOrdinalType max_iter,
+             typename OperatorType::ScalarType& xdotp,
+             timer_type* my_kern_times)
+{
+  typedef typename OperatorType::ScalarType ScalarType;
+  typedef typename OperatorType::LocalOrdinalType OrdinalType;
+  typedef typename TypeTraits<ScalarType>::magnitude_type magnitude_type;
+
+  timer_type t0 = 0, tWAXPY = 0, tDOT = 0, tMATVEC = 0;
+
+  int myproc = 0;
+#ifdef HAVE_MPI
+  MPI_Comm_rank(MPI_COMM_WORLD, &myproc);
+#endif
+
+  if (!A.has_local_indices) {
+    std::cerr << "miniFE::time_kernels ERROR, A.has_local_indices is false, needs to be true. This probably means "
+       << "miniFE::make_local_matrix(A) was not called prior to calling miniFE::time_kernels."
+       << std::endl;
+    return;
+  }
+
+  OrdinalType nrows = A.rows.size();
+  OrdinalType ncols = A.num_cols;
+
+  VectorType p(0, ncols, b.compute_node);
+
+  ScalarType one = 1.0;
+  ScalarType zero = 0.0;
+
+  typedef typename VectorType::ComputeNodeType ComputeNodeType;
+  ComputeNodeType& compute_node = x.compute_node;
+
+  //The following lines that create and initialize buffers are no-ops in many
+  //cases, but perform actual allocations and copies if a off-cpu device such as
+  //a GPU is being used by compute_node.
+
+  //Do any required allocations for buffers that will be needed during CG:
+  ScalarType* d_x = compute_node.get_buffer(&x.coefs[0], x.coefs.size());
+  ScalarType* d_p = compute_node.get_buffer(&p.coefs[0], p.coefs.size());
+  ScalarType* d_b = compute_node.get_buffer(&b.coefs[0], b.coefs.size());
+  OrdinalType* d_Arowoff = compute_node.get_buffer(&A.row_offsets[0], A.row_offsets.size());
+  OrdinalType* d_Acols   = compute_node.get_buffer(&A.packed_cols[0], A.packed_cols.size());
+  ScalarType* d_Acoefs  = compute_node.get_buffer(&A.packed_coefs[0], A.packed_coefs.size());
+
+  //Copy data to buffers that need to be initialized from input data:
+  compute_node.copy_to_buffer(&x.coefs[0], x.coefs.size(), d_x);
+  compute_node.copy_to_buffer(&b.coefs[0], b.coefs.size(), d_b);
+  compute_node.copy_to_buffer(&A.row_offsets[0], A.row_offsets.size(), d_Arowoff);
+  compute_node.copy_to_buffer(&A.packed_cols[0], A.packed_cols.size(), d_Acols);
+  compute_node.copy_to_buffer(&A.packed_coefs[0], A.packed_coefs.size(), d_Acoefs);
+
+  TICK();
+  for(OrdinalType i=0; i<max_iter; ++i) {
+    waxpby(one, x, zero, x, p);
+  }
+#ifdef MINIFE_HAVE_CUDA
+  cudaThreadSynchronize();
+#endif
+  TOCK(tWAXPY);
+
+  TICK();
+  for(OrdinalType i=0; i<max_iter; ++i) {
+    matvec(A, p, x);
+  }
+#ifdef MINIFE_HAVE_CUDA
+  cudaThreadSynchronize();
+#endif
+  TOCK(tMATVEC);
+
+  TICK();
+  xdotp = 0;
+  for(OrdinalType i=0; i<max_iter; ++i) {
+    xdotp += dot(x, p);
+  }
+#ifdef MINIFE_HAVE_CUDA
+  cudaThreadSynchronize();
+#endif
+  TOCK(tDOT);
+
+  my_kern_times[WAXPY] = tWAXPY;
+  my_kern_times[DOT] = tDOT;
+  my_kern_times[MATVEC] = tMATVEC;
+  my_kern_times[TOTAL] = 0;
+}
+
+}//namespace miniFE
+
+#endif
+

Added: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/utils.cpp
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C%2B%2B/miniFE/utils.cpp?rev=311411&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/utils.cpp (added)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/utils.cpp Mon Aug 21 16:54:28 2017
@@ -0,0 +1,136 @@
+
+//@HEADER
+// ************************************************************************
+//
+// MiniFE: Simple Finite Element Assembly and Solve
+// Copyright (2006-2013) Sandia Corporation
+//
+// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
+// license for use of this work by or on behalf of the U.S. Government.
+//
+// This library is free software; you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as
+// published by the Free Software Foundation; either version 2.1 of the
+// License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+// USA
+//
+// ************************************************************************
+//@HEADER
+
+#include <string>
+#include <iostream>
+#include <sstream>
+#include <fstream>
+
+#ifdef HAVE_MPI
+#include <mpi.h>
+#endif
+
+#ifdef MINIFE_HAVE_TPI
+#include <TPI.h>
+#endif
+
+#ifdef MINIFE_HAVE_TBB
+#include <tbb/task_scheduler_init.h>
+#endif
+
+#include <param_utils.hpp>
+#include <Parameters.hpp>
+#include <utils.hpp>
+
+namespace miniFE {
+
+//-------------------------------------------------------------
+void get_parameters(int argc, char** argv, Parameters& params)
+{
+  std::string argstring;
+  Mantevo::read_args_into_string(argc, argv, argstring);
+
+  std::string garbage("garbage");
+  std::string filename =
+      Mantevo::parse_parameter<std::string>(argstring, "input_file", garbage);
+
+  if (filename != garbage) {
+    Mantevo::read_file_into_string(filename, argstring);
+  }
+
+  params.nx = Mantevo::parse_parameter<int>(argstring, "nx", 10);
+  params.ny = Mantevo::parse_parameter<int>(argstring, "ny", params.nx);
+  params.nz = Mantevo::parse_parameter<int>(argstring, "nz", params.ny);
+  params.load_imbalance =
+      Mantevo::parse_parameter<float>(argstring, "load_imbalance", 0);
+  params.numthreads = Mantevo::parse_parameter<int>(argstring, "numthreads", 1);
+  params.mv_overlap_comm_comp = Mantevo::parse_parameter<int>(argstring, "mv_overlap_comm_comp", 0);
+  params.use_locking = Mantevo::parse_parameter<int>(argstring, "use_locking", 0);
+  params.name = Mantevo::parse_parameter<std::string>(argstring, "name","");
+  params.elem_group_size = Mantevo::parse_parameter<int>(argstring, "elem_group_size", 1);
+  params.use_elem_mat_fields = Mantevo::parse_parameter<int>(argstring, "use_elem_mat_fields", 1);
+  params.verify_solution = Mantevo::parse_parameter<int>(argstring, "verify_solution", 0);
+  params.device = Mantevo::parse_parameter<int>(argstring, "device", 0);
+  params.num_devices = Mantevo::parse_parameter<int>(argstring, "num_devices", 2);
+  params.skip_device = Mantevo::parse_parameter<int>(argstring, "skip_device", 9999);
+  params.numa = Mantevo::parse_parameter<int>(argstring, "numa", 1);
+}
+
+//-------------------------------------------------------------
+void broadcast_parameters(Parameters& params)
+{
+#ifdef HAVE_MPI
+  const int num_int_params = 13;
+  int iparams[num_int_params] = {params.nx, params.ny, params.nz, params.numthreads, params.mv_overlap_comm_comp, params.use_locking,
+		     params.elem_group_size, params.use_elem_mat_fields, params.verify_solution,
+		     params.device, params.num_devices,params.skip_device,params.numa};
+  MPI_Bcast(&iparams[0], num_int_params, MPI_INT, 0, MPI_COMM_WORLD);
+  params.nx = iparams[0];
+  params.ny = iparams[1];
+  params.nz = iparams[2];
+  params.numthreads = iparams[3];
+  params.mv_overlap_comm_comp = iparams[4];
+  params.use_locking = iparams[5];
+  params.elem_group_size = iparams[6];
+  params.use_elem_mat_fields = iparams[7];
+  params.verify_solution = iparams[8];
+  params.device = iparams[9];
+  params.num_devices = iparams[10];
+  params.skip_device = iparams[11];
+  params.numa = iparams[12];
+
+  float fparams[1] = {params.load_imbalance};
+  MPI_Bcast(&fparams[0], 1, MPI_FLOAT, 0, MPI_COMM_WORLD);
+  params.load_imbalance = fparams[0];
+
+#endif
+}
+
+//-------------------------------------------------------------
+void initialize_mpi(int argc, char** argv, int& numprocs, int& myproc)
+{
+#ifdef HAVE_MPI
+  MPI_Init(&argc, &argv);
+  MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
+  MPI_Comm_rank(MPI_COMM_WORLD, &myproc);
+#else
+  numprocs = 1;
+  myproc = 0;
+#endif
+}
+
+//-------------------------------------------------------------
+void finalize_mpi()
+{
+#ifdef HAVE_MPI
+  MPI_Finalize();
+#endif
+}
+
+}//namespace miniFE
+

Added: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/utils.hpp
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C%2B%2B/miniFE/utils.hpp?rev=311411&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/utils.hpp (added)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/utils.hpp Mon Aug 21 16:54:28 2017
@@ -0,0 +1,204 @@
+#ifndef _utils_hpp_
+#define _utils_hpp_
+
+//@HEADER
+// ************************************************************************
+//
+// MiniFE: Simple Finite Element Assembly and Solve
+// Copyright (2006-2013) Sandia Corporation
+//
+// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
+// license for use of this work by or on behalf of the U.S. Government.
+//
+// This library is free software; you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as
+// published by the Free Software Foundation; either version 2.1 of the
+// License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+// USA
+//
+// ************************************************************************
+//@HEADER
+
+#include <cstdlib>
+#include <cmath>
+#include <vector>
+#include <map>
+
+#ifdef HAVE_MPI
+#include <mpi.h>
+#endif
+
+#include <TypeTraits.hpp>
+#include <Parameters.hpp>
+
+namespace miniFE {
+
+void get_parameters(int argc, char** argv, Parameters& params);
+
+void broadcast_parameters(Parameters& params);
+
+void initialize_mpi(int argc, char** argv, int& numprocs, int& myproc);
+
+void finalize_mpi();
+
+template<typename Scalar>
+Scalar percentage_difference(Scalar value, Scalar average)
+{
+  //result will be the difference between value and average, represented as
+  //a percentage of average.
+  //Examples:
+  //  if value=100 and average=50, result is 100%
+  //  if value=500 and average=400, result is 25%
+
+  //Note: if average is 0, result is undefined. We'll return -1.0;
+
+  Scalar result = std::abs(value-average);
+  if (std::abs(average) > 1.e-5) {
+    result /= average;
+    result *= 100;
+  }
+  else result = -1;
+
+  return result;
+}
+
+template<typename GlobalOrdinal>
+void get_global_min_max(GlobalOrdinal local_n,
+                        GlobalOrdinal& global_n,
+                        GlobalOrdinal& min_n,
+                        int& min_proc,
+                        GlobalOrdinal& max_n,
+                        int& max_proc)
+{
+//Given a local_n, compute global_n, min/max, etc. All computed results
+//will be returned on all processors.
+//
+  int numprocs = 1, myproc = 0;
+#ifdef HAVE_MPI
+  MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
+  MPI_Comm_rank(MPI_COMM_WORLD, &myproc);
+#endif
+
+  std::vector<GlobalOrdinal> all_n(numprocs, 0);
+  all_n[myproc] = local_n;
+#ifdef HAVE_MPI
+  std::vector<GlobalOrdinal> tmp(all_n);
+  MPI_Datatype mpi_dtype = TypeTraits<GlobalOrdinal>::mpi_type();
+  MPI_Allreduce(&tmp[0], &all_n[0], numprocs, mpi_dtype, MPI_MAX, MPI_COMM_WORLD);
+#endif
+
+  global_n = 0;
+  min_n= 5*local_n;
+  min_proc = 0;
+  max_n= 0;
+  max_proc = 0;
+
+  for(int i=0; i<numprocs; ++i) {
+    global_n += all_n[i];
+    //min_proc will be the lowest-numbered proc with n = min_n
+    if (all_n[i] < min_n) {
+      min_n = all_n[i];
+      min_proc = i;
+    }
+    //max_proc will be the highest-numbered proc with n = max_n
+    if (all_n[i] >= max_n) {
+      max_n = all_n[i];
+      max_proc = i;
+    }
+  }
+}
+
+template<typename Scalar>
+Scalar compute_std_dev_as_percentage(Scalar local_nrows,
+                                     Scalar avg_nrows)
+{
+//compute and return a standard deviation for the deviation of local_nrows from the average.
+//the std. dev. will be expressed as a percentage of avg_nrows.
+//
+//Input argument local_nrows is really a integer, but taking it as a floating-point scalar is
+//harmless.
+//
+#ifdef HAVE_MPI
+  int numprocs = 1, myproc = 0;
+  MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
+  MPI_Comm_rank(MPI_COMM_WORLD, &myproc);
+  MPI_Datatype mpi_dtype = TypeTraits<Scalar>::mpi_type();
+
+//If it's significantly more efficient, we may consider using MPI_Gather below instead of
+//MPI_Allgather. We really only need to compute std.dev. on proc 0...
+//
+//(But for now, use MPI_Allgather and compute on all procs.)
+
+  std::vector<Scalar> all_nrows(numprocs, 0);
+  MPI_Allgather(&local_nrows, 1, mpi_dtype, &all_nrows[0], 1, mpi_dtype, MPI_COMM_WORLD);
+
+  //turn all_nrows contents into deviations, add to sum-of-squares-of-deviations:
+  Scalar sum_sqr_dev = 0;
+  for(size_t i=0; i<all_nrows.size(); ++i) {
+    all_nrows[i] -= avg_nrows;
+    all_nrows[i] *= all_nrows[i];
+    sum_sqr_dev += all_nrows[i];
+  }
+  Scalar tmp1 = sum_sqr_dev;
+  Scalar std_dev = numprocs>1 ? std::sqrt(tmp1/(numprocs-1)) : 0;
+
+  //std_dev is now the standard deviation of rows-per-processor with respect
+  //to avg_nrows.
+  //Next turn std_dev into a percentage of avg_nrows:
+  std_dev /= avg_nrows;
+  std_dev *= 100;
+  return std_dev;
+#else
+  return 0;
+#endif
+}
+
+template<typename GlobalOrdinal>
+GlobalOrdinal find_row_for_id(GlobalOrdinal id,
+                              const std::map<GlobalOrdinal,GlobalOrdinal>& ids_to_rows)
+{
+  typename std::map<GlobalOrdinal,GlobalOrdinal>::const_iterator
+    iter = ids_to_rows.lower_bound(id);
+
+  if (iter == ids_to_rows.end() || iter->first != id) {
+    if (ids_to_rows.size() > 0) {
+      --iter;
+    }
+    else {
+      std::cout << "ERROR, failed to map id to row."<<std::endl;
+      return -99;
+    }
+  }
+
+  if (iter->first == id) {
+    return iter->second;
+  }
+
+  if (iter == ids_to_rows.begin() && iter->first > id) {
+    std::cout << "ERROR, id:" << id << ", ids_to_rows.begin(): " << iter->first<<std::endl;
+    return -99;
+  }
+
+  GlobalOrdinal offset = id - iter->first;
+
+  if (offset < 0) {
+    std::cout << "ERROR, negative offset in find_row_for_id for id="<<id<<std::endl;
+    return -99;
+  }
+
+  return iter->second + offset;
+}
+
+}//namespace miniFE
+
+#endif
+

Added: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/verify_solution.hpp
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C%2B%2B/miniFE/verify_solution.hpp?rev=311411&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/verify_solution.hpp (added)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/verify_solution.hpp Mon Aug 21 16:54:28 2017
@@ -0,0 +1,179 @@
+#ifndef _verify_solution_hpp_
+#define _verify_solution_hpp_
+
+//@HEADER
+// ************************************************************************
+//
+// MiniFE: Simple Finite Element Assembly and Solve
+// Copyright (2006-2013) Sandia Corporation
+//
+// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
+// license for use of this work by or on behalf of the U.S. Government.
+//
+// This library is free software; you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as
+// published by the Free Software Foundation; either version 2.1 of the
+// License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+// USA
+//
+// ************************************************************************
+//@HEADER
+
+#include <sstream>
+#include <stdexcept>
+#include <map>
+#include <algorithm>
+
+#include <simple_mesh_description.hpp>
+#include <analytic_soln.hpp>
+#include <box_utils.hpp>
+#include <utils.hpp>
+
+#ifdef HAVE_MPI
+#include <mpi.h>
+#endif
+
+namespace miniFE {
+
+template<typename Scalar>
+struct err_info {
+  Scalar err;
+  Scalar computed;
+  Scalar analytic;
+  Scalar coords[3];
+};
+
+template<typename VectorType>
+int
+verify_solution(const simple_mesh_description<typename VectorType::GlobalOrdinalType>& mesh,
+                const VectorType& x, double tolerance, bool verify_whole_domain = false)
+{
+  typedef typename VectorType::GlobalOrdinalType GlobalOrdinal;
+  typedef typename VectorType::ScalarType Scalar;
+
+  int global_nodes_x = mesh.global_box[0][1]+1;
+  int global_nodes_y = mesh.global_box[1][1]+1;
+  int global_nodes_z = mesh.global_box[2][1]+1;
+  Box box;
+  copy_box(mesh.local_box, box);
+
+  //num-owned-nodes in each dimension is num-elems+1
+  //only if num-elems > 0 in that dimension *and*
+  //we are at the high end of the global range in that dimension:
+  if (box[0][1] > box[0][0] && box[0][1] == mesh.global_box[0][1]) ++box[0][1];
+  if (box[1][1] > box[1][0] && box[1][1] == mesh.global_box[1][1]) ++box[1][1];
+  if (box[2][1] > box[2][0] && box[2][1] == mesh.global_box[2][1]) ++box[2][1];
+
+  std::vector<GlobalOrdinal> rows;
+  std::vector<Scalar> row_coords;
+
+  int roffset = 0;
+  for(int iz=box[2][0]; iz<box[2][1]; ++iz) {
+   for(int iy=box[1][0]; iy<box[1][1]; ++iy) {
+    for(int ix=box[0][0]; ix<box[0][1]; ++ix) {
+      GlobalOrdinal row_id =
+          get_id<GlobalOrdinal>(global_nodes_x, global_nodes_y, global_nodes_z,
+                                ix, iy, iz);
+      Scalar x, y, z;
+      get_coords(row_id, global_nodes_x, global_nodes_y, global_nodes_z, x, y, z);
+
+      bool verify_this_point = false;
+      if (verify_whole_domain) verify_this_point = true;
+      else if (std::abs(x - 0.5) < 0.05 && std::abs(y - 0.5) < 0.05 && std::abs(z - 0.5) < 0.05) {
+        verify_this_point = true;
+      }
+
+      if (verify_this_point) {
+        rows.push_back(roffset);
+        row_coords.push_back(x);
+        row_coords.push_back(y);
+        row_coords.push_back(z);
+      }
+
+      ++roffset;
+    }
+   }
+  }
+
+  int return_code = 0;
+
+  const int num_terms = 300;
+
+  err_info<Scalar> max_error;
+  max_error.err = 0.0;
+
+  for(size_t i=0; i<rows.size(); ++i) {
+    Scalar computed_soln = x.coefs[rows[i]];
+    Scalar x = row_coords[i*3];
+    Scalar y = row_coords[i*3+1];
+    Scalar z = row_coords[i*3+2];
+    Scalar analytic_soln = 0.0;
+    //set exact boundary-conditions:
+    if (x == 1.0) {
+      //x==1 is first, we want soln to be 1 even around the edges
+      //of the x==1 plane where y and/or z may be 0 or 1...
+      analytic_soln = 1;
+    }
+    else if (x == 0.0 || y == 0.0 || z == 0.0) {
+      analytic_soln = 0;
+    }
+    else if (y == 1.0 || z == 1.0) {
+      analytic_soln = 0;
+    }
+    else {
+      analytic_soln = soln(x, y, z, num_terms, num_terms);
+    }
+
+#ifdef MINIFE_DEBUG_VERBOSE
+std::cout<<"("<<x<<","<<y<<","<<z<<") row "<<rows[i]<<": computed: "<<computed_soln<<",  analytic: "<<analytic_soln<<std::endl;
+#endif
+    Scalar err = std::abs(analytic_soln - computed_soln);
+    if (err > max_error.err) {
+      max_error.err = err;
+      max_error.computed = computed_soln;
+      max_error.analytic = analytic_soln;
+      max_error.coords[0] = x;
+      max_error.coords[1] = y;
+      max_error.coords[2] = z;
+    }
+  }
+
+  Scalar local_max_err = max_error.err;
+  Scalar global_max_err = 0;
+#ifdef HAVE_MPI
+  MPI_Allreduce(&local_max_err, &global_max_err, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
+#else
+  global_max_err = local_max_err;
+#endif
+
+  if (local_max_err == global_max_err) {
+    if (max_error.err > tolerance) {
+      std::cout << "max absolute error is "<<max_error.err<<":"<<std::endl;
+      std::cout << "   at position ("<<max_error.coords[0]<<","<<max_error.coords[1]<<","<<max_error.coords[2]<<"), "<<std::endl;
+      std::cout << "   computed solution: "<<max_error.computed<<",  analytic solution: "<<max_error.analytic<<std::endl;
+    }
+    else {
+      std::cout << "solution matches analytic solution to within "<<tolerance<<" or better."<<std::endl;
+    }
+  }
+
+  if (global_max_err > tolerance) {
+    return_code = 1;
+  }
+
+  return return_code;
+}
+
+}//namespace miniFE
+
+#endif
+