[Mlir-commits] [mlir] b4bc72a - [mlir] refactor Linalg LoopNestBuilder to use common infra

Tue Jun 16 11:51:42 PDT 2020

Author: Alex Zinenko
Date: 2020-06-16T20:51:32+02:00
New Revision: b4bc72afb78477dca7ba06b98b1b0002716297b7

URL: https://github.com/llvm/llvm-project/commit/b4bc72afb78477dca7ba06b98b1b0002716297b7
DIFF: https://github.com/llvm/llvm-project/commit/b4bc72afb78477dca7ba06b98b1b0002716297b7.diff

LOG: [mlir] refactor Linalg LoopNestBuilder to use common infra

Recent work has introduced support for constructing loops via `::build` with
callbacks that construct loop bodies using only the core OpBuilder. This is now
supported on all loop types that Linalg lowers to. Refactor LoopNestBuilder in
Linalg to rely on this functionality instead of using a custom EDSC-based
approach to creating loop nests.

The specialization targeting parallel loops is also simplified by factoring out
the recursive call into a separate static function and considering only two
alternatives: top-level loop is parallel or sequential.

This removes the last remaining in-tree use of edsc::LoopBuilder, which is now
deprecated and will be removed soon.

Differential Revision: https://reviews.llvm.org/D81873

Added: 
    

Modified: 
    mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
    mlir/lib/Dialect/Linalg/Transforms/Loops.cpp
    mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
    mlir/lib/Dialect/Linalg/Utils/Utils.cpp

Removed: 
    


################################################################################
diff  --git a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
index 235dedd60401..ab85cebee178 100644

--- a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
+++ b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
@@ -136,18 +136,18 @@ void applyPermutationToVector(SmallVector<T, N> &inVec,
 }
 
 /// Utility class used to generate nested loops with ranges described by
-/// `loopRanges` and loop type described by the `iteratorTypes`. `allIvs` is
-/// populated with induction variables for all generated loops on return, with
-/// `fun` used to generate the body of the innermost loop.
+/// `loopRanges` and loop type described by the `iteratorTypes`. `bodyBuilderFn`
+/// is used to generate the body of the innermost loop. It is passed a range
+/// of loop induction variables.
 template <typename LoopTy>
 struct GenerateLoopNest {
   using IndexedValueTy =
       typename std::conditional<std::is_same<LoopTy, AffineForOp>::value,
                                 AffineIndexedValue, StdIndexedValue>::type;
-  static void doit(MutableArrayRef<Value> allIvs,
-                   ArrayRef<SubViewOp::Range> loopRanges,
+
+  static void doit(ArrayRef<SubViewOp::Range> loopRanges,
                    ArrayRef<Attribute> iteratorTypes,
-                   std::function<void(void)> fun);
+                   function_ref<void(ValueRange)> bodyBuilderFn);
 };
 
 } // namespace linalg

diff  --git a/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp b/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp
index 98baef105763..5ccf2a469dce 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp
@@ -459,10 +459,6 @@ Optional<LinalgLoops> linalgOpToLoopsImpl(Operation *op, OpBuilder &builder) {
   auto linalgOp = cast<ConcreteOpTy>(op);
   assert(linalgOp.hasBufferSemantics() &&
          "expected linalg op with buffer semantics");
-  auto nPar = linalgOp.getNumParallelLoops();
-  auto nRed = linalgOp.getNumReductionLoops();
-  auto nWin = linalgOp.getNumWindowLoops();
-  auto nLoops = nPar + nRed + nWin;
   auto mapsRange =
       linalgOp.indexing_maps().template getAsRange<AffineMapAttr>();
   auto maps = llvm::to_vector<8>(
@@ -475,15 +471,14 @@ Optional<LinalgLoops> linalgOpToLoopsImpl(Operation *op, OpBuilder &builder) {
     return LinalgLoops();
   }
 
-  SmallVector<Value, 4> allIvs(nLoops);
+  SmallVector<Value, 4> allIvs;
   auto loopRanges =
       emitLoopRanges(scope.getBuilderRef(), scope.getLocation(), invertedMap,
                      getViewSizes(builder, linalgOp));
-  assert(loopRanges.size() == allIvs.size());
   GenerateLoopNest<LoopTy>::doit(
-      allIvs, loopRanges, linalgOp.iterator_types().getValue(), [&] {
-        SmallVector<Value, 4> allIvValues(allIvs.begin(), allIvs.end());
-        emitScalarImplementation<IndexedValueTy>(allIvValues, linalgOp);
+      loopRanges, linalgOp.iterator_types().getValue(), [&](ValueRange ivs) {
+        allIvs.append(ivs.begin(), ivs.end());
+        emitScalarImplementation<IndexedValueTy>(allIvs, linalgOp);
       });
   // Number of loop ops might be 
diff erent from the number of ivs since some
   // loops like affine.parallel and scf.parallel have multiple ivs.

diff  --git a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
index 0dac95739679..ac6903b4bd88 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
@@ -375,29 +375,31 @@ Optional<TiledLinalgOp> static tileLinalgOpImpl(
 
   // 3. Create the tiled loops.
   LinalgOp res = op;
-  SmallVector<Value, 4> ivs(loopRanges.size());
+  SmallVector<Value, 4> ivs;
   SmallVector<Attribute, 4> iteratorTypes =
       llvm::to_vector<4>(op.iterator_types().cast<ArrayAttr>().getValue());
   if (!options.interchangeVector.empty())
     applyPermutationToVector(iteratorTypes, options.interchangeVector);
-  GenerateLoopNest<LoopTy>::doit(ivs, loopRanges, iteratorTypes, [&] {
-    auto &b = ScopedContext::getBuilderRef();
-    auto loc = ScopedContext::getLocation();
-    SmallVector<Value, 4> ivValues(ivs.begin(), ivs.end());
-
-    // If we have to apply a permutation to the tiled loop nest, we have to
-    // reorder the induction variables This permutation is the right one
-    // assuming that loopRanges have previously been permuted by
-    // (i,j,k)->(k,i,j) So this permutation should be the inversePermutation
-    // of that one: (d0,d1,d2)->(d2,d0,d1)
-    if (!options.interchangeVector.empty())
-      ivValues = applyMapToValues(b, loc, invPermutationMap, ivValues);
-
-    auto views = makeTiledViews(b, loc, op, ivValues, tileSizes, viewSizes);
-    auto operands = getAssumedNonViewOperands(op);
-    views.append(operands.begin(), operands.end());
-    res = op.clone(b, loc, views);
-  });
+  GenerateLoopNest<LoopTy>::doit(
+      loopRanges, iteratorTypes, [&](ValueRange localIvs) {
+        auto &b = ScopedContext::getBuilderRef();
+        auto loc = ScopedContext::getLocation();
+        ivs.assign(localIvs.begin(), localIvs.end());
+        SmallVector<Value, 4> ivValues(ivs.begin(), ivs.end());
+
+        // If we have to apply a permutation to the tiled loop nest, we have to
+        // reorder the induction variables This permutation is the right one
+        // assuming that loopRanges have previously been permuted by
+        // (i,j,k)->(k,i,j) So this permutation should be the inversePermutation
+        // of that one: (d0,d1,d2)->(d2,d0,d1)
+        if (!options.interchangeVector.empty())
+          ivValues = applyMapToValues(b, loc, invPermutationMap, ivValues);
+
+        auto views = makeTiledViews(b, loc, op, ivValues, tileSizes, viewSizes);
+        auto operands = getAssumedNonViewOperands(op);
+        views.append(operands.begin(), operands.end());
+        res = op.clone(b, loc, views);
+      });
 
   // 4. Transforms index arguments of `linalg.generic` w.r.t. to the tiling.
   transformIndexedGenericOpIndices(b, res, ivs, loopIndexToRangeIndex);

diff  --git a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
index c48b87aaa4e4..5bba11420d08 100644
--- a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
+++ b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
@@ -129,70 +129,125 @@ template struct mlir::linalg::GenerateLoopNest<scf::ForOp>;
 template struct mlir::linalg::GenerateLoopNest<scf::ParallelOp>;
 template struct mlir::linalg::GenerateLoopNest<AffineForOp>;
 
+/// Given a list of subview ranges, extract individual values for lower, upper
+/// bounds and steps and put them into the corresponding vectors.
+static void unpackRanges(ArrayRef<SubViewOp::Range> ranges,
+                         SmallVectorImpl<Value> &lbs,
+                         SmallVectorImpl<Value> &ubs,
+                         SmallVectorImpl<Value> &steps) {
+  for (SubViewOp::Range range : ranges) {
+    lbs.emplace_back(range.offset);
+    ubs.emplace_back(range.size);
+    steps.emplace_back(range.stride);
+  }
+}
+
 namespace mlir {
 namespace linalg {
-/// Specialization of loop nest generator for scf.parallel loops to handle
-/// iterator types that are not parallel. These are generated as sequential
-/// loops.
+
+/// Specialization to build an scf "for" nest.
 template <>
-void GenerateLoopNest<scf::ForOp>::doit(MutableArrayRef<Value> allIvs,
-                                        ArrayRef<SubViewOp::Range> loopRanges,
-                                        ArrayRef<Attribute> iteratorTypes,
-                                        std::function<void(void)> fun) {
-  edsc::GenericLoopNestRangeBuilder<scf::ForOp>(allIvs, loopRanges)(fun);
+void GenerateLoopNest<scf::ForOp>::doit(
+    ArrayRef<SubViewOp::Range> loopRanges, ArrayRef<Attribute> iteratorTypes,
+    function_ref<void(ValueRange)> bodyBuilderFn) {
+  SmallVector<Value, 4> lbs, ubs, steps;
+  unpackRanges(loopRanges, lbs, ubs, steps);
+  edsc::loopNestBuilder(lbs, ubs, steps, bodyBuilderFn);
 }
 
+/// Specialization to build affine "for" nest.
 template <>
-void GenerateLoopNest<AffineForOp>::doit(MutableArrayRef<Value> allIvs,
-                                         ArrayRef<SubViewOp::Range> loopRanges,
-                                         ArrayRef<Attribute> iteratorTypes,
-                                         std::function<void(void)> fun) {
-  edsc::GenericLoopNestRangeBuilder<AffineForOp>(allIvs, loopRanges)(fun);
+void GenerateLoopNest<AffineForOp>::doit(
+    ArrayRef<SubViewOp::Range> loopRanges, ArrayRef<Attribute> iteratorTypes,
+    function_ref<void(ValueRange)> bodyBuilderFn) {
+  SmallVector<Value, 4> lbs, ubs, steps;
+  unpackRanges(loopRanges, lbs, ubs, steps);
+
+  // Affine loops require constant steps.
+  SmallVector<int64_t, 4> constantSteps;
+  constantSteps.reserve(steps.size());
+  for (Value v : steps) {
+    auto op = v.getDefiningOp<ConstantIndexOp>();
+    assert(op && "Affine loops require constant steps");
+    constantSteps.push_back(op.getValue());
+  }
+
+  edsc::affineLoopNestBuilder(lbs, ubs, constantSteps, bodyBuilderFn);
 }
 
-template <>
-void GenerateLoopNest<scf::ParallelOp>::doit(
-    MutableArrayRef<Value> allIvs, ArrayRef<SubViewOp::Range> loopRanges,
-    ArrayRef<Attribute> iteratorTypes, std::function<void(void)> fun) {
-  // Check if there is nothing to do here. This is also the recursion
-  // termination.
-  if (loopRanges.empty())
-    return;
-  size_t nOuterPar = iteratorTypes.take_front(loopRanges.size())
-                         .take_while(isParallelIteratorType)
-                         .size();
-  if (nOuterPar == 0 && loopRanges.size() == 1)
-    // Generate the sequential for loop for the remaining non-parallel loop.
-    return GenerateLoopNest<scf::ForOp>::doit(allIvs, loopRanges, iteratorTypes,
-                                              fun);
+/// Generates a loop nest consisting of scf.parallel and scf.for, depending on
+/// the `iteratorTypes.` Consecutive parallel loops create a single scf.parallel
+/// operation; each sequential loop creates a new scf.for operation. The body
+/// of the innermost loop is populated by `bodyBuilderFn` that accepts a range
+/// of induction variables for all loops. `ivStorage` is used to store the
+/// partial list of induction variables.
+// TODO(zinenko,ntv): this function can be made iterative instead. However, it
+// will have at most as many recursive calls as nested loops, which rarely
+// exceeds 10.
+static void
+generateParallelLoopNest(ValueRange lbs, ValueRange ubs, ValueRange steps,
+                         ArrayRef<Attribute> iteratorTypes,
+                         function_ref<void(ValueRange)> bodyBuilderFn,
+                         SmallVectorImpl<Value> &ivStorage) {
+  assert(lbs.size() == ubs.size());
+  assert(lbs.size() == steps.size());
+  assert(lbs.size() == iteratorTypes.size());
+
+  // If there are no (more) loops to be generated, generate the body and be
+  // done with it.
+  if (iteratorTypes.empty())
+    return bodyBuilderFn(ivStorage);
+
+  // Find the outermost parallel loops and drop their types from the list.
+  unsigned nLoops = iteratorTypes.size();
+  iteratorTypes = iteratorTypes.drop_while(isParallelIteratorType);
+  unsigned nOuterPar = nLoops - iteratorTypes.size();
+
+  // If there are no outer parallel loops, generate one sequential loop and
+  // recurse. Note that we wouldn't have dropped anything from `iteratorTypes`
+  // in this case.
   if (nOuterPar == 0) {
-    // The immediate outer loop is not parallel. Generate a scf.for op for this
-    // loop, but there might be subsequent loops that are parallel. Use
-    // recursion to find those.
-    auto nestedFn = [&]() {
-      GenerateLoopNest<scf::ParallelOp>::doit(allIvs.drop_front(),
-                                              loopRanges.drop_front(),
-                                              iteratorTypes.drop_front(), fun);
-    };
-    return GenerateLoopNest<scf::ForOp>::doit(allIvs[0], loopRanges[0],
-                                              iteratorTypes[0], nestedFn);
-  }
-  if (nOuterPar == loopRanges.size()) {
-    // All loops are parallel, so generate the scf.parallel op.
-    return edsc::GenericLoopNestRangeBuilder<scf::ParallelOp>(allIvs,
-                                                              loopRanges)(fun);
+    edsc::loopNestBuilder(lbs[0], ubs[0], steps[0], [&](Value iv) {
+      ivStorage.push_back(iv);
+      generateParallelLoopNest(lbs.drop_front(), ubs.drop_front(),
+                               steps.drop_front(), iteratorTypes.drop_front(),
+                               bodyBuilderFn, ivStorage);
+    });
+    return;
   }
-  // Generate scf.parallel for the outer parallel loops. The next inner loop is
-  // sequential, but there might be more parallel loops after that. So recurse
-  // into the same method.
-  auto nestedFn = [&]() {
-    GenerateLoopNest<scf::ParallelOp>::doit(
-        allIvs.drop_front(nOuterPar), loopRanges.drop_front(nOuterPar),
-        iteratorTypes.drop_front(nOuterPar), fun);
-  };
-  return GenerateLoopNest<scf::ParallelOp>::doit(
-      allIvs.take_front(nOuterPar), loopRanges.take_front(nOuterPar),
-      iteratorTypes.take_front(nOuterPar), nestedFn);
+
+  // Generate a single parallel loop-nest operation for all outermost parallel
+  // loops and recurse.
+  edsc::OperationBuilder<scf::ParallelOp>(
+      lbs.take_front(nOuterPar), ubs.take_front(nOuterPar),
+      steps.take_front(nOuterPar),
+      [&](OpBuilder &nestedBuilder, Location nestedLoc, ValueRange localIvs) {
+        edsc::ScopedContext context(nestedBuilder, nestedLoc);
+        ivStorage.append(localIvs.begin(), localIvs.end());
+        generateParallelLoopNest(lbs.drop_front(nOuterPar),
+                                 ubs.drop_front(nOuterPar),
+                                 steps.drop_front(nOuterPar), iteratorTypes,
+                                 bodyBuilderFn, ivStorage);
+      });
+}
+
+/// Specialization for generating a mix of parallel and sequential scf loops.
+template <>
+void GenerateLoopNest<scf::ParallelOp>::doit(
+    ArrayRef<SubViewOp::Range> loopRanges, ArrayRef<Attribute> iteratorTypes,
+    function_ref<void(ValueRange)> bodyBuilderFn) {
+  SmallVector<Value, 8> lbsStorage, ubsStorage, stepsStorage, ivs;
+  unpackRanges(loopRanges, lbsStorage, ubsStorage, stepsStorage);
+  ValueRange lbs(lbsStorage), ubs(ubsStorage), steps(stepsStorage);
+
+  // This function may be passed more iterator types than ranges.
+  assert(iteratorTypes.size() >= loopRanges.size() &&
+         "expected iterator type for all ranges");
+  iteratorTypes = iteratorTypes.take_front(loopRanges.size());
+  ivs.reserve(iteratorTypes.size());
+  generateParallelLoopNest(lbs, ubs, steps, iteratorTypes, bodyBuilderFn, ivs);
+  assert(ivs.size() == iteratorTypes.size() && "did not generate enough loops");
 }
+
 } // namespace linalg
 } // namespace mlir