[Mlir-commits] [mlir] 70e99f3 - [mlir] Make ViewLikeInterface Range work with attributes

Wed Jul 27 01:52:25 PDT 2022

Author: Alex Zinenko
Date: 2022-07-27T08:52:13Z
New Revision: 70e99f387a482bfe0ad2b7accc88e9e8b2a4a59d

URL: https://github.com/llvm/llvm-project/commit/70e99f387a482bfe0ad2b7accc88e9e8b2a4a59d
DIFF: https://github.com/llvm/llvm-project/commit/70e99f387a482bfe0ad2b7accc88e9e8b2a4a59d.diff

LOG: [mlir] Make ViewLikeInterface Range work with attributes

While most of methods in ViewLikeInterface accept an `OpFoldResult` for
the offset/size/stride that may be static, represented as `Attribute`,
or dynamic, represented as `Value`, the `Range` abstraction only
accepted `Values`. This can often lead to known-constant
offset/size/strides being materialized into constant operations and
hinder further constant propagation without explicitly running the
constant folding pass. This often leads to a more complicated than
necessary addressing code being emitted. Switch `Range` to use
`OpFoldResult`. Code that uses `Range` currently keeps materializing the
constants to minimize the effect of this change on the IR. Further
commits will make use of this.

Reviewed By: nicolasvasilache, mravishankar

Differential Revision: https://reviews.llvm.org/D129633

Added: 
    

Modified: 
    mlir/include/mlir/Dialect/Linalg/IR/Linalg.h
    mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
    mlir/include/mlir/Interfaces/ViewLikeInterface.h
    mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp
    mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp
    mlir/lib/Dialect/Linalg/Transforms/FusionOnTensors.cpp
    mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp
    mlir/lib/Dialect/Linalg/Transforms/Split.cpp
    mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
    mlir/lib/Dialect/Linalg/Utils/Utils.cpp
    mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp

Removed: 
    


################################################################################
diff  --git a/mlir/include/mlir/Dialect/Linalg/IR/Linalg.h b/mlir/include/mlir/Dialect/Linalg/IR/Linalg.h
index 9f5699f08fe4e..d31a416fd99fb 100644

--- a/mlir/include/mlir/Dialect/Linalg/IR/Linalg.h
+++ b/mlir/include/mlir/Dialect/Linalg/IR/Linalg.h
@@ -30,20 +30,6 @@ namespace linalg {
 
 class LinalgOp;
 
-// TOFO: allow an extra ValueRange to specify an indexing and allow
-// non-hyperrectangular shapes.
-using LoopRangeBuilder =
-    std::function<SmallVector<Range, 4>(ImplicitLocOpBuilder)>;
-
-/// Provide a very simple inference procedure to build the loop ranges from the
-/// op and its operands. This only works with permutation affine maps and
-/// patterns of the form `(m, n)[s] -> (m + n - s floordiv 2)`.
-/// A more advanced Tensor-Comprehension like inference is possible but has
-/// proven to be ambiguous in unfavorable case.
-/// As a consequence, we relax the default behavior very conservatively and
-/// provide an op-specified hook so that Linalg ops may override the behavior.
-LoopRangeBuilder defaultLoopRangesBuilder(LinalgOp op);
-
 /// Returns the name mangled library call name to disambiguate between 
diff erent
 /// overloads at the C level. The name mangling scheme is basic and uses MLIR
 /// type names:

diff  --git a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
index beee22c493f56..1a792da3c278c 100644
--- a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
+++ b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
@@ -208,6 +208,8 @@ SmallVector<Value> insertSlicesBack(OpBuilder &builder, Location loc,
 /// necessary.
 Value materializeOpFoldResult(ImplicitLocOpBuilder &builder,
                               OpFoldResult opFoldResult);
+Value materializeOpFoldResult(OpBuilder &b, Location loc,
+                              OpFoldResult opFoldResult);
 
 /// Creates an extract_slice/subview op for a single `valueToTile` with
 /// `builder`. This new operation extracts a tile of `valueToTile`, starting

diff  --git a/mlir/include/mlir/Interfaces/ViewLikeInterface.h b/mlir/include/mlir/Interfaces/ViewLikeInterface.h
index 18b3d4caba776..e974b0bdf8065 100644
--- a/mlir/include/mlir/Interfaces/ViewLikeInterface.h
+++ b/mlir/include/mlir/Interfaces/ViewLikeInterface.h
@@ -24,9 +24,9 @@ namespace mlir {
 /// operands into a list of triples. Such a list can be more convenient to
 /// manipulate.
 struct Range {
-  Value offset;
-  Value size;
-  Value stride;
+  OpFoldResult offset;
+  OpFoldResult size;
+  OpFoldResult stride;
 };
 
 class OffsetSizeAndStrideOpInterface;

diff  --git a/mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp b/mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp
index e0a0ec4cd3747..a6d5dd49b33ac 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp
@@ -1346,6 +1346,26 @@ static FailureOr<SmallVector<Value>> collapseGenericOpIterationDims(
         genericOp, "illegal to collapse specified dimensions");
   }
 
+  // Bail on non-canonical ranges.
+  SmallVector<Range> loopRanges =
+      cast<LinalgOp>(genericOp.getOperation())
+          .createLoopRanges(rewriter, genericOp.getLoc());
+  auto opFoldIsConstantValue = [](OpFoldResult ofr, int64_t value) {
+    if (auto attr = ofr.dyn_cast<Attribute>())
+      return attr.cast<IntegerAttr>().getInt() == value;
+    llvm::APInt actual;
+    return matchPattern(ofr.get<Value>(), m_ConstantInt(&actual)) &&
+           actual.getSExtValue() == value;
+  };
+  if (!llvm::all_of(loopRanges, [&](Range range) {
+        return opFoldIsConstantValue(range.offset, 0) &&
+               opFoldIsConstantValue(range.stride, 1);
+      })) {
+    return rewriter.notifyMatchFailure(
+        genericOp,
+        "expected all loop ranges to have zero start and unit stride");
+  }
+
   // Get the iterator types for the operand.
   SmallVector<StringRef> iteratorTypes = getCollapsedOpIteratorTypes(
       genericOp.iterator_types().getValue(), collapsingInfo);
@@ -1390,17 +1410,10 @@ static FailureOr<SmallVector<Value>> collapseGenericOpIterationDims(
     // Collect the loop range of the generic op.
     OpBuilder::InsertionGuard g(rewriter);
     rewriter.setInsertionPoint(collapsedGenericOp);
-    SmallVector<Range> loopRanges =
-        cast<LinalgOp>(genericOp.getOperation())
-            .createLoopRanges(rewriter, genericOp.getLoc());
-    assert(llvm::all_of(loopRanges,
-                        [](Range range) {
-                          return matchPattern(range.offset, m_Zero()) &&
-                                 matchPattern(range.stride, m_One());
-                        }) &&
-           "expected all loop ranges to have zero start and unit stride");
-    SmallVector<Value> loopBound = llvm::to_vector(
-        llvm::map_range(loopRanges, [](Range range) { return range.size; }));
+    SmallVector<Value> loopBound =
+        llvm::to_vector(llvm::map_range(loopRanges, [&](Range range) {
+          return materializeOpFoldResult(rewriter, loc, range.size);
+        }));
     generateCollapsedIndexingRegion(loc,
                                     &collapsedGenericOp->getRegion(0).front(),
                                     collapsingInfo, loopBound, rewriter);

diff  --git a/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp b/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp
index 91089a3821321..266c71027b7b5 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp
@@ -117,7 +117,6 @@ static LinalgOp fuse(OpBuilder &b, LinalgOp producer,
   SmallVector<Range, 8> loopRanges;
   Location loc = producer.getLoc();
   auto zero = b.create<arith::ConstantIndexOp>(loc, 0);
-  auto one = b.create<arith::ConstantIndexOp>(loc, 1);
 
   for (unsigned i = 0, e = producer.getNumLoops(); i < e; ++i) {
     auto shapeDim = getShapeDefiningLoopRange(producer, i);
@@ -125,14 +124,14 @@ static LinalgOp fuse(OpBuilder &b, LinalgOp producer,
     sizeBounds.push_back(dim);
     auto it = fusedLoopsAndRanges.find(i);
     if (it != fusedLoopsAndRanges.end()) {
-      ivs.push_back(it->second.offset);
-      tileSizes.push_back(it->second.size);
+      ivs.push_back(materializeOpFoldResult(b, loc, it->second.offset));
+      tileSizes.push_back(materializeOpFoldResult(b, loc, it->second.size));
       loopRanges.push_back(it->second);
       LLVM_DEBUG(llvm::dbgs() << "tiled loop#" << i << " with LoopRange "
                               << loopRanges.back() << "\n");
     } else {
       tileSizes.push_back(zero);
-      loopRanges.push_back(Range{zero, dim, one});
+      loopRanges.push_back(Range{b.getIndexAttr(0), dim, b.getIndexAttr(1)});
       LLVM_DEBUG(llvm::dbgs() << "full loop#" << i << " with LoopRange "
                               << loopRanges.back() << "\n");
     }
@@ -168,8 +167,9 @@ static LinalgOp fuse(OpBuilder &b, LinalgOp producer,
 
   // Shift all IndexOp results by the tile offset.
   SmallVector<Value> allIvs;
-  llvm::transform(loopRanges, std::back_inserter(allIvs),
-                  [](Range range) { return range.offset; });
+  llvm::transform(loopRanges, std::back_inserter(allIvs), [&](Range range) {
+    return materializeOpFoldResult(b, loc, range.offset);
+  });
   offsetIndices(b, clonedOp, allIvs);
 
   return clonedOp;

diff  --git a/mlir/lib/Dialect/Linalg/Transforms/FusionOnTensors.cpp b/mlir/lib/Dialect/Linalg/Transforms/FusionOnTensors.cpp
index 66a558ce8cfaf..df8e9f87f0587 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/FusionOnTensors.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/FusionOnTensors.cpp
@@ -143,8 +143,9 @@ static LinalgOp getTiledProducer(OpBuilder &b, OpResult producerResult,
   // Obtain the `producerOp` loop bounds and the `sliceOp` ranges.
   SmallVector<Value> producerLoopBounds;
   llvm::transform(producerOp.createLoopRanges(b, loc),
-                  std::back_inserter(producerLoopBounds),
-                  [](Range range) { return range.size; });
+                  std::back_inserter(producerLoopBounds), [&](Range range) {
+                    return materializeOpFoldResult(b, loc, range.size);
+                  });
   SmallVector<Range> sliceOpRanges = sliceOp.getOrCreateRanges(b, loc);
 
   // Tile the producer operands given the `sliceOp` ranges. Iterate the
@@ -157,8 +158,10 @@ static LinalgOp getTiledProducer(OpBuilder &b, OpResult producerResult,
   for (auto it : zip(tiledSliceDimIndices, tiledProducerLoopIndices)) {
     int64_t tiledSliceDim = std::get<0>(it);
     int64_t tiledProducerLoop = std::get<1>(it);
-    tileIvs[tiledProducerLoop] = sliceOpRanges[tiledSliceDim].offset;
-    tileSizes[tiledProducerLoop] = sliceOpRanges[tiledSliceDim].size;
+    tileIvs[tiledProducerLoop] =
+        materializeOpFoldResult(b, loc, sliceOpRanges[tiledSliceDim].offset);
+    tileSizes[tiledProducerLoop] =
+        materializeOpFoldResult(b, loc, sliceOpRanges[tiledSliceDim].size);
     allIvs[tiledProducerLoop] = tileIvs[tiledProducerLoop];
   }
   erase_value(tileIvs, nullptr);

diff  --git a/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp b/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp
index 8ce2a348c951c..56ce6b85ba879 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp
@@ -223,14 +223,20 @@ FailureOr<PromotionInfo> mlir::linalg::promoteSubviewAsNewBuffer(
     if (droppedDims[en.index()])
       continue;
     auto rangeValue = en.value();
-    // Try to extract a tight constant.
+    // Try to extract a tight constant. If the size is known statically, no need
+    // to look for the bound.
     LLVM_DEBUG(llvm::dbgs() << "Extract tightest: " << rangeValue.size << "\n");
-    FailureOr<int64_t> upperBound =
-        getConstantUpperBoundForIndex(rangeValue.size);
-    Value size =
-        failed(upperBound)
-            ? rangeValue.size
-            : b.create<arith::ConstantIndexOp>(loc, upperBound.value());
+    Value size;
+    if (auto attr = rangeValue.size.dyn_cast<Attribute>()) {
+      size = materializeOpFoldResult(b, loc, rangeValue.size);
+    } else {
+      Value materializedSize = materializeOpFoldResult(b, loc, rangeValue.size);
+      FailureOr<int64_t> upperBound =
+          getConstantUpperBoundForIndex(materializedSize);
+      size = failed(upperBound)
+                 ? materializedSize
+                 : b.create<arith::ConstantIndexOp>(loc, upperBound.getValue());
+    }
     LLVM_DEBUG(llvm::dbgs() << "Extracted tightest: " << size << "\n");
     fullSizes.push_back(size);
     partialSizes.push_back(

diff  --git a/mlir/lib/Dialect/Linalg/Transforms/Split.cpp b/mlir/lib/Dialect/Linalg/Transforms/Split.cpp
index d735c671ab49c..c00a1f8568506 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Split.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Split.cpp
@@ -74,12 +74,10 @@ linalg::splitOp(RewriterBase &rewriter, TilingInterface op, unsigned dimension,
   if (dimension >= iterationSpace.size())
     return std::make_pair(op, TilingInterface());
 
-  SmallVector<OpFoldResult> offsets =
-      getAsOpFoldResult(llvm::to_vector(llvm::map_range(
-          iterationSpace, [](const Range &range) { return range.offset; })));
-  SmallVector<OpFoldResult> sizes =
-      getAsOpFoldResult(llvm::to_vector(llvm::map_range(
-          iterationSpace, [](const Range &range) { return range.size; })));
+  SmallVector<OpFoldResult> offsets = llvm::to_vector(llvm::map_range(
+      iterationSpace, [](const Range &range) { return range.offset; }));
+  SmallVector<OpFoldResult> sizes = llvm::to_vector(llvm::map_range(
+      iterationSpace, [](const Range &range) { return range.size; }));
 
   // Adjust the split point so that it doesn't overflow the size.
   AffineExpr d0, d1, d2;
@@ -105,7 +103,7 @@ linalg::splitOp(RewriterBase &rewriter, TilingInterface op, unsigned dimension,
   TilingInterface firstPart = createSplitPart(
       rewriter, op.getLoc(), op, offsets, sizes,
       op.getDestinationOperands(rewriter), dimension, minSplitPoint,
-      getAsOpFoldResult(iterationSpace[dimension].offset), firstResults);
+      iterationSpace[dimension].offset, firstResults);
 
   // Need to pretend that the original op now takes as operands firstResults,
   // otherwise tiling interface implementation will take the wrong value to

diff  --git a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
index 98d5393b6026b..9066bb83410ea 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
@@ -66,8 +66,7 @@ mlir::linalg::makeTiledLoopRanges(RewriterBase &b, Location loc, AffineMap map,
   // Create a new range with the applied tile sizes.
   SmallVector<Range, 4> res;
   for (unsigned idx = 0, e = tileSizes.size(); idx < e; ++idx)
-    res.push_back(Range{b.create<arith::ConstantIndexOp>(loc, 0),
-                        shapeSizes[idx], tileSizes[idx]});
+    res.push_back(Range{b.getIndexAttr(0), shapeSizes[idx], tileSizes[idx]});
   return std::make_tuple(res, loopIndexToRangeIndex);
 }
 
@@ -567,10 +566,12 @@ static LogicalResult tilePadOp(RewriterBase &builder, tensor::PadOp op,
   SmallVector<Range> ranges = tilingInterface.getIterationDomain(builder);
   SmallVector<Value> lbs, dims, allDims, steps;
   for (int64_t i = 0; i < rank; ++i) {
-    allDims.push_back(ranges[i].size);
+    Value materializedSize =
+        materializeOpFoldResult(builder, loc, ranges[i].size);
+    allDims.push_back(materializedSize);
     if (!isZero(tileSizes[i])) {
-      lbs.push_back(ranges[i].offset);
-      dims.push_back(ranges[i].size);
+      lbs.push_back(materializeOpFoldResult(builder, loc, ranges[i].offset));
+      dims.push_back(materializedSize);
       steps.push_back(tileSizes[i]);
     }
   }

diff  --git a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
index d26eb484d0955..59db977095d3b 100644
--- a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
+++ b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
@@ -129,13 +129,14 @@ template struct mlir::linalg::GenerateLoopNest<AffineForOp>;
 
 /// Given a list of subview ranges, extract individual values for lower, upper
 /// bounds and steps and put them into the corresponding vectors.
-static void unpackRanges(ArrayRef<Range> ranges, SmallVectorImpl<Value> &lbs,
+static void unpackRanges(OpBuilder &builder, Location loc,
+                         ArrayRef<Range> ranges, SmallVectorImpl<Value> &lbs,
                          SmallVectorImpl<Value> &ubs,
                          SmallVectorImpl<Value> &steps) {
   for (Range range : ranges) {
-    lbs.emplace_back(range.offset);
-    ubs.emplace_back(range.size);
-    steps.emplace_back(range.stride);
+    lbs.emplace_back(materializeOpFoldResult(builder, loc, range.offset));
+    ubs.emplace_back(materializeOpFoldResult(builder, loc, range.size));
+    steps.emplace_back(materializeOpFoldResult(builder, loc, range.stride));
   }
 }
 
@@ -524,7 +525,7 @@ void GenerateLoopNest<scf::ForOp>::doit(
   }
 
   SmallVector<Value, 4> lbs, ubs, steps;
-  unpackRanges(loopRanges, lbs, ubs, steps);
+  unpackRanges(b, loc, loopRanges, lbs, ubs, steps);
   LoopNest loopNest = mlir::scf::buildLoopNest(
       b, loc, lbs, ubs, steps, iterArgInitValues,
       [&](OpBuilder &b, Location loc, ValueRange ivs, ValueRange iterArgs) {
@@ -567,7 +568,7 @@ void GenerateLoopNest<AffineForOp>::doit(
   SmallVector<Value> iterArgInitValues = linalgOp.getOutputTensorOperands();
   assert(iterArgInitValues.empty() && "unexpected AffineForOp init values");
   SmallVector<Value, 4> lbs, ubs, steps;
-  unpackRanges(loopRanges, lbs, ubs, steps);
+  unpackRanges(b, loc, loopRanges, lbs, ubs, steps);
 
   // Affine loops require constant steps.
   SmallVector<int64_t, 4> constantSteps;
@@ -744,7 +745,7 @@ void GenerateLoopNest<scf::ParallelOp>::doit(
   stepsStorage.reserve(numLoops);
 
   // Get the loop lb, ub, and step.
-  unpackRanges(loopRanges, lbsStorage, ubsStorage, stepsStorage);
+  unpackRanges(b, loc, loopRanges, lbsStorage, ubsStorage, stepsStorage);
 
   // Modify the lb, ub, and step based on the distribution options.
   SmallVector<DistributionMethod, 0> distributionMethod;
@@ -986,6 +987,12 @@ Value materializeOpFoldResult(ImplicitLocOpBuilder &builder,
   return builder.create<arith::ConstantIndexOp>(attr.getValue().getSExtValue());
 }
 
+Value materializeOpFoldResult(OpBuilder &builder, Location loc,
+                              OpFoldResult opFoldResult) {
+  ImplicitLocOpBuilder b(loc, builder);
+  return materializeOpFoldResult(b, opFoldResult);
+}
+
 SmallVector<Value, 4> makeTiledShapes(OpBuilder &b, Location loc,
                                       LinalgOp linalgOp,
                                       ArrayRef<Value> valuesToTile,

diff  --git a/mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp b/mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp
index c62f27d8a22f4..e2ca296616644 100644
--- a/mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp
+++ b/mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp
@@ -14,6 +14,7 @@
 
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
+#include "mlir/Dialect/Arithmetic/Utils/Utils.h"
 #include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/SCF/Utils/Utils.h"
 #include "mlir/Dialect/Tensor/IR/Tensor.h"
@@ -117,23 +118,25 @@ generateTileLoopNest(OpBuilder &builder, Location loc,
   AffineMap minMap = AffineMap::get(1, 2, {s0, s1 - d0}, builder.getContext());
 
   for (auto loopRange : llvm::enumerate(loopRanges)) {
+    Value offset =
+        getValueOrCreateConstantIndexOp(builder, loc, loopRange.value().offset);
+    Value size =
+        getValueOrCreateConstantIndexOp(builder, loc, loopRange.value().size);
     // No loops if tile size is zero. Set offset and size to the loop
     // offset and size.
     if (matchPattern(tileSizeVals[loopRange.index()], m_Zero())) {
-      offsets[loopRange.index()] = loopRange.value().offset;
-      sizes[loopRange.index()] = loopRange.value().size;
+      offsets[loopRange.index()] = offset;
+      sizes[loopRange.index()] = size;
       continue;
     }
 
     auto loop = builder.create<scf::ForOp>(
-        loc, loopRange.value().offset, loopRange.value().size,
-        tileSizeVals[loopRange.index()], ValueRange{},
+        loc, offset, size, tileSizeVals[loopRange.index()], ValueRange{},
         [&](OpBuilder &bodyBuilder, Location bodyLoc, Value iv,
             ValueRange /*iterArgs*/) {
           Value boundedTileSize = builder.create<AffineMinOp>(
               bodyLoc, minMap,
-              ValueRange{iv, tileSizeVals[loopRange.index()],
-                         loopRange.value().size});
+              ValueRange{iv, tileSizeVals[loopRange.index()], size});
           sizes[loopRange.index()] = boundedTileSize;
           builder.create<scf::YieldOp>(loc);
         });