[Mlir-commits] [mlir] a68235d - [mlir] Add padding to 1-D Vector in CRunnerUtils.h

Mon Mar 2 19:46:26 PST 2020

Author: Nicolas Vasilache
Date: 2020-03-02T22:45:50-05:00
New Revision: a68235d583fccb465694fdfd72eeb8d45d3369c0

URL: https://github.com/llvm/llvm-project/commit/a68235d583fccb465694fdfd72eeb8d45d3369c0
DIFF: https://github.com/llvm/llvm-project/commit/a68235d583fccb465694fdfd72eeb8d45d3369c0.diff

LOG: [mlir] Add padding to 1-D Vector in CRunnerUtils.h

Summary:
This revision fixes a -Wzero-length-array compile error that
caused e459596917a72f11bd8251a81bdb521401abdd3d which reverted
78f9e5d098af95610f4542ee41479d7931261066.

This revision adds padding for 1-D Vector in the common case of x86
execution with a stadard data layout. This supports properly interfacing
codegen with arrays of e.g. `vector<9xf32>`.

Such vectors are already assumed padded to the next power of 2 by LLVM
codegen with the default x86 data layout:
```
define void @test_vector_add_1d_2_3(<3 x float>* nocapture readnone %0,
<3 x float>* nocapture readonly %1, i64 %2, i64 %3, i64 %4, <3 x float>*
nocapture readnone %5, <3 x float>* nocapture readonly %6, i64 %7, i64
%8, i64 %9, <3 x float>* nocapture readnone %10, <3 x float>* nocapture
%11, i64 %12, i64 %13, i64 %14) local_unnamed_addr {
  %16 = getelementptr <3 x float>, <3 x float>* %6, i64 1
  %17 = load <3 x float>, <3 x float>* %16, align 16
  %18 = getelementptr <3 x float>, <3 x float>* %1, i64 1
  %19 = load <3 x float>, <3 x float>* %18, align 16
  %20 = fadd <3 x float> %17, %19
  %21 = getelementptr <3 x float>, <3 x float>* %11, i64 1
```

The pointer addressing a `vector<3xf32>` is assumed aligned `@16`.
Similarly, the pointer addressing a `vector<65xf32>` is assumed aligned
`@512`.

This revision allows using objects such as `vector<3xf32>` properly with
the standard x86 data layout used in the JitRunner. Integration testing
is done out of tree, at the moment such testing fails without this
change.

Differential Revision: https://reviews.llvm.org/D75459

Added: 
    

Modified: 
    mlir/include/mlir/ExecutionEngine/CRunnerUtils.h
    mlir/include/mlir/ExecutionEngine/RunnerUtils.h

Removed: 
    


################################################################################
diff  --git a/mlir/include/mlir/ExecutionEngine/CRunnerUtils.h b/mlir/include/mlir/ExecutionEngine/CRunnerUtils.h
index d82963b1aa03..28ef8e560667 100644

--- a/mlir/include/mlir/ExecutionEngine/CRunnerUtils.h
+++ b/mlir/include/mlir/ExecutionEngine/CRunnerUtils.h
@@ -39,15 +39,78 @@ template <int N> void dropFront(int64_t arr[N], int64_t *res) {
 //===----------------------------------------------------------------------===//
 // Codegen-compatible structures for Vector type.
 //===----------------------------------------------------------------------===//
+namespace detail {
+template <unsigned N>
+constexpr bool isPowerOf2() {
+  return (!(N & (N - 1)));
+}
+  
+template <unsigned N>
+constexpr unsigned nextPowerOf2();
+template <>
+constexpr unsigned nextPowerOf2<0>() {
+  return 1;
+}
+template <>
+constexpr unsigned nextPowerOf2<1>() {
+  return 1;
+}
+template <unsigned N> constexpr unsigned nextPowerOf2() {
+  return isPowerOf2<N>() ? N : 2 * nextPowerOf2<(N + 1) / 2>();
+}
+
+template <typename T, int Dim, bool IsPowerOf2>
+class Vector1D;
+
+template <typename T, int Dim>
+struct Vector1D<T, Dim, /*IsPowerOf2=*/true> {
+  Vector1D() {
+    static_assert(detail::nextPowerOf2<sizeof(T[Dim])>() == sizeof(T[Dim]),
+                  "size error");
+  }
+  constexpr T &operator[](unsigned i) { return vector[i]; }
+  constexpr const T &operator[](unsigned i) const { return vector[i]; }
+
+private:
+  T vector[Dim];
+};
+
+// 1-D vector, padded to the next power of 2 allocation.
+// Specialization occurs to avoid zero size arrays (which fail in -Werror).
+template <typename T, int Dim>
+struct Vector1D<T, Dim, /*IsPowerOf2=*/false> {
+  Vector1D() {
+    static_assert(detail::nextPowerOf2<sizeof(T[Dim])>() > sizeof(T[Dim]),
+                  "size error");
+    static_assert(detail::nextPowerOf2<sizeof(T[Dim])>() < 2 * sizeof(T[Dim]),
+                  "size error");
+  }
+  constexpr T &operator[](unsigned i) { return vector[i]; }
+  constexpr const T &operator[](unsigned i) const { return vector[i]; }
+
+private:
+  T vector[Dim];
+  char padding[detail::nextPowerOf2<sizeof(T[Dim])>() - sizeof(T[Dim])];
+};
+} // end namespace detail
+
+// N-D vectors recurse down to 1-D.
 template <typename T, int Dim, int... Dims>
 struct Vector {
+  constexpr Vector<T, Dims...> &operator[](unsigned i) { return vector[i]; }
+  constexpr const Vector<T, Dims...> &operator[](unsigned i) const {
+    return vector[i];
+  }
+
+private:
   Vector<T, Dims...> vector[Dim];
 };
 
+// 1-D vectors in LLVM are automatically padded to the next power of 2.
+// We insert explicit padding in to account for this.
 template <typename T, int Dim>
-struct Vector<T, Dim> {
-  T vector[Dim];
-};
+struct Vector<T, Dim>
+  : public detail::Vector1D<T, Dim, detail::isPowerOf2<sizeof(T[Dim])>()> {};
 
 template <int D1, typename T>
 using Vector1D = Vector<T, D1>;

diff  --git a/mlir/include/mlir/ExecutionEngine/RunnerUtils.h b/mlir/include/mlir/ExecutionEngine/RunnerUtils.h
index 2676e648930c..16dc54f20da7 100644
--- a/mlir/include/mlir/ExecutionEngine/RunnerUtils.h
+++ b/mlir/include/mlir/ExecutionEngine/RunnerUtils.h
@@ -92,7 +92,7 @@ void VectorDataPrinter<T, M, Dims...>::print(std::ostream &os,
   static_assert(sizeof(val) == M * StaticSizeMult<Dims...>::value * sizeof(T),
                 "Incorrect vector size!");
   // First
-  os << "(" << val.vector[0];
+  os << "(" << val[0];
   if (M > 1)
     os << ", ";
   if (sizeof...(Dims) > 1)
@@ -100,14 +100,14 @@ void VectorDataPrinter<T, M, Dims...>::print(std::ostream &os,
   // Kernel
   for (unsigned i = 1; i + 1 < M; ++i) {
     printSpace(os, 2 * sizeof...(Dims));
-    os << val.vector[i] << ", ";
+    os << val[i] << ", ";
     if (sizeof...(Dims) > 1)
       os << "\n";
   }
   // Last
   if (M > 1) {
     printSpace(os, sizeof...(Dims));
-    os << val.vector[M - 1];
+    os << val[M - 1];
   }
   os << ")";
 }