[Mlir-commits] [mlir] efc290c - [mlir][affine] More efficient `makeComposedFolded...` helpers

Thu Jun 22 01:47:46 PDT 2023

Author: Matthias Springer
Date: 2023-06-22T10:47:38+02:00
New Revision: efc290ce9c2b85bbd66ebc9ea43986fe89cbff15

URL: https://github.com/llvm/llvm-project/commit/efc290ce9c2b85bbd66ebc9ea43986fe89cbff15
DIFF: https://github.com/llvm/llvm-project/commit/efc290ce9c2b85bbd66ebc9ea43986fe89cbff15.diff

LOG: [mlir][affine] More efficient `makeComposedFolded...` helpers

The old code used to materialize constants as ops, immediately folded them into the resulting affine map and then deleted the constant ops again. Instead, directly fold the attributes into the affine map. Furthermore, all helpers accept `OpFoldResult` instead of `Value` now. This makes the code at call sites more efficient, because it is no longer necessary to materialize a `Value`, just to be able to use these helper functions.

Note: The API has changed (accepts OpFoldResult instead of Value), otherwise this change is NFC.

Differential Revision: https://reviews.llvm.org/D153324

Added: 
    

Modified: 
    mlir/include/mlir/Dialect/Affine/IR/AffineOps.h
    mlir/lib/Conversion/VectorToGPU/VectorToGPU.cpp
    mlir/lib/Dialect/Affine/IR/AffineOps.cpp
    mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp
    mlir/lib/Dialect/Linalg/Transforms/ConvertConv2DToImg2Col.cpp
    mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
    mlir/lib/Dialect/Tensor/Transforms/ExtractSliceFromReshapeUtils.cpp
    mlir/lib/Dialect/Vector/Transforms/VectorTransferSplitRewritePatterns.cpp

Removed: 
    


################################################################################
diff  --git a/mlir/include/mlir/Dialect/Affine/IR/AffineOps.h b/mlir/include/mlir/Dialect/Affine/IR/AffineOps.h
index 1409d52d64a6f..778c3b3593236 100644

--- a/mlir/include/mlir/Dialect/Affine/IR/AffineOps.h
+++ b/mlir/include/mlir/Dialect/Affine/IR/AffineOps.h
@@ -26,6 +26,8 @@ namespace affine {
 
 class AffineApplyOp;
 class AffineBound;
+class AffineMaxOp;
+class AffineMinOp;
 class AffineValueMap;
 
 /// A utility function to check if a value is defined at the top level of an
@@ -382,10 +384,9 @@ void canonicalizeSetAndOperands(IntegerSet *set,
 /// other AffineApplyOps supplying those operands. The operands of the resulting
 /// AffineApplyOp do not change the length of  AffineApplyOp chains.
 AffineApplyOp makeComposedAffineApply(OpBuilder &b, Location loc, AffineMap map,
-                                      ValueRange operands);
-/// Variant of `makeComposedAffineApply` which infers the AffineMap from `e`.
+                                      ArrayRef<OpFoldResult> operands);
 AffineApplyOp makeComposedAffineApply(OpBuilder &b, Location loc, AffineExpr e,
-                                      ValueRange values);
+                                      ArrayRef<OpFoldResult> operands);
 
 /// Constructs an AffineApplyOp that applies `map` to `operands` after composing
 /// the map with the maps of any other AffineApplyOp supplying the operands,
@@ -407,8 +408,8 @@ SmallVector<OpFoldResult> makeComposedFoldedMultiResultAffineApply(
 
 /// Returns an AffineMinOp obtained by composing `map` and `operands` with
 /// AffineApplyOps supplying those operands.
-Value makeComposedAffineMin(OpBuilder &b, Location loc, AffineMap map,
-                            ValueRange operands);
+AffineMinOp makeComposedAffineMin(OpBuilder &b, Location loc, AffineMap map,
+                                  ArrayRef<OpFoldResult> operands);
 
 /// Constructs an AffineMinOp that computes a minimum across the results of
 /// applying `map` to `operands`, then immediately attempts to fold it. If

diff  --git a/mlir/lib/Conversion/VectorToGPU/VectorToGPU.cpp b/mlir/lib/Conversion/VectorToGPU/VectorToGPU.cpp
index 08c5214aae3db..da57368696797 100644
--- a/mlir/lib/Conversion/VectorToGPU/VectorToGPU.cpp
+++ b/mlir/lib/Conversion/VectorToGPU/VectorToGPU.cpp
@@ -63,7 +63,7 @@ static void getXferIndices(RewriterBase &rewriter, TransferOpType xferOp,
   for (auto expr : xferOp.getPermutationMap().getResults()) {
     if (auto dim = expr.template dyn_cast<AffineDimExpr>()) {
       Value prevIdx = indices[dim.getPosition()];
-      SmallVector<Value, 3> dims(dimValues.begin(), dimValues.end());
+      SmallVector<OpFoldResult, 3> dims(dimValues.begin(), dimValues.end());
       dims.push_back(prevIdx);
       AffineExpr d0 = rewriter.getAffineDimExpr(offsetMap.getNumDims());
       indices[dim.getPosition()] = affine::makeComposedAffineApply(

diff  --git a/mlir/lib/Dialect/Affine/IR/AffineOps.cpp b/mlir/lib/Dialect/Affine/IR/AffineOps.cpp
index c4c1aaba638d1..ae93942bb8f0b 100644
--- a/mlir/lib/Dialect/Affine/IR/AffineOps.cpp
+++ b/mlir/lib/Dialect/Affine/IR/AffineOps.cpp
@@ -1193,111 +1193,53 @@ void mlir::affine::fullyComposeAffineMapAndOperands(
   }
 }
 
-/// Given a list of `OpFoldResult`, build the necessary operations to populate
-/// `actualValues` with values produced by operations. In particular, for any
-/// attribute-typed element in `values`, call the constant materializer
-/// associated with the Affine dialect to produce an operation. Do NOT notify
-/// the builder listener about the constant ops being created as they are
-/// intended to be removed after being folded into affine constructs; this is
-/// not suitable for use beyond the Affine dialect.
-static void materializeConstants(OpBuilder &b, Location loc,
-                                 ArrayRef<OpFoldResult> values,
-                                 SmallVectorImpl<Operation *> &constants,
-                                 SmallVectorImpl<Value> &actualValues) {
-  OpBuilder::Listener *listener = b.getListener();
-  b.setListener(nullptr);
-  auto listenerResetter =
-      llvm::make_scope_exit([listener, &b] { b.setListener(listener); });
-
-  actualValues.reserve(values.size());
-  auto *dialect = b.getContext()->getLoadedDialect<AffineDialect>();
-  for (OpFoldResult ofr : values) {
-    if (auto value = llvm::dyn_cast_if_present<Value>(ofr)) {
-      actualValues.push_back(value);
-      continue;
+/// Fold all attributes among the given operands into the affine map and return
+/// all remaining values. The affine map is modified in-place.
+static SmallVector<Value>
+foldAttributesIntoMap(Builder &b, AffineMap *map,
+                      ArrayRef<OpFoldResult> operands) {
+  SmallVector<AffineExpr> dimReplacements, symReplacements;
+  SmallVector<Value, 8> valueOperands;
+  int64_t numDims = 0;
+  for (int64_t i = 0; i < map->getNumDims(); ++i) {
+    if (auto attr = operands[i].dyn_cast<Attribute>()) {
+      dimReplacements.push_back(
+          b.getAffineConstantExpr(attr.cast<IntegerAttr>().getInt()));
+    } else {
+      dimReplacements.push_back(b.getAffineDimExpr(numDims++));
+      valueOperands.push_back(operands[i].get<Value>());
     }
-    // Since we are directly specifying `index` as the result type, we need to
-    // ensure the provided attribute is also an index type. Otherwise, the
-    // AffineDialect materializer will create invalid `arith.constant`
-    // operations if the provided Attribute is any other kind of integer.
-    constants.push_back(dialect->materializeConstant(
-        b,
-        b.getIndexAttr(llvm::cast<IntegerAttr>(ofr.get<Attribute>()).getInt()),
-        b.getIndexType(), loc));
-    actualValues.push_back(constants.back()->getResult(0));
   }
-}
-
-/// Create an operation of the type provided as template argument and attempt to
-/// fold it immediately. The operation is expected to have a builder taking
-/// arbitrary `leadingArguments`, followed by a list of Value-typed `operands`.
-/// The operation is also expected to always produce a single result. Return an
-/// `OpFoldResult` containing the Attribute representing the folded constant if
-/// complete folding was possible and a Value produced by the created operation
-/// otherwise.
-template <typename OpTy, typename... Args>
-static std::enable_if_t<OpTy::template hasTrait<OpTrait::OneResult>(),
-                        OpFoldResult>
-createOrFold(OpBuilder &b, Location loc, ValueRange operands,
-             Args &&...leadingArguments) {
-  // Identify the constant operands and extract their values as attributes.
-  // Note that we cannot use the original values directly because the list of
-  // operands may have changed due to canonicalization and composition.
-  SmallVector<Attribute> constantOperands;
-  constantOperands.reserve(operands.size());
-  for (Value operand : operands) {
-    IntegerAttr attr;
-    if (matchPattern(operand, m_Constant(&attr)))
-      constantOperands.push_back(attr);
-    else
-      constantOperands.push_back(nullptr);
-  }
-
-  // Create the operation and immediately attempt to fold it. On success,
-  // delete the operation and prepare the (unmaterialized) value for being
-  // returned. On failure, return the operation result value. Temporarily remove
-  // the listener to avoid notifying it when the op is created as it may be
-  // removed immediately and there is no way of notifying the caller about that
-  // without resorting to RewriterBase.
-  //
-  // TODO: arguably, the main folder (createOrFold) API should support this use
-  // case instead of indiscriminately materializing constants.
-  OpBuilder::Listener *listener = b.getListener();
-  b.setListener(nullptr);
-  auto listenerResetter =
-      llvm::make_scope_exit([listener, &b] { b.setListener(listener); });
-  OpTy op =
-      b.create<OpTy>(loc, std::forward<Args>(leadingArguments)..., operands);
-  SmallVector<OpFoldResult, 1> foldResults;
-  if (succeeded(op->fold(constantOperands, foldResults)) &&
-      !foldResults.empty()) {
-    op->erase();
-    return foldResults.front();
+  int64_t numSymbols = 0;
+  for (int64_t i = 0; i < map->getNumSymbols(); ++i) {
+    if (auto attr = operands[i + map->getNumDims()].dyn_cast<Attribute>()) {
+      symReplacements.push_back(
+          b.getAffineConstantExpr(attr.cast<IntegerAttr>().getInt()));
+    } else {
+      symReplacements.push_back(b.getAffineSymbolExpr(numSymbols++));
+      valueOperands.push_back(operands[i + map->getNumDims()].get<Value>());
+    }
   }
-
-  // Notify the listener now that we definitely know that the operation will
-  // persist. Use the original listener stored in the variable.
-  if (listener)
-    listener->notifyOperationInserted(op);
-  return op->getResult(0);
+  *map = map->replaceDimsAndSymbols(dimReplacements, symReplacements, numDims,
+                                    numSymbols);
+  return valueOperands;
 }
 
-AffineApplyOp mlir::affine::makeComposedAffineApply(OpBuilder &b, Location loc,
-                                                    AffineMap map,
-                                                    ValueRange operands) {
-  AffineMap normalizedMap = map;
-  SmallVector<Value, 8> normalizedOperands(operands.begin(), operands.end());
-  composeAffineMapAndOperands(&normalizedMap, &normalizedOperands);
-  assert(normalizedMap);
-  return b.create<AffineApplyOp>(loc, normalizedMap, normalizedOperands);
+AffineApplyOp
+mlir::affine::makeComposedAffineApply(OpBuilder &b, Location loc, AffineMap map,
+                                      ArrayRef<OpFoldResult> operands) {
+  SmallVector<Value> valueOperands = foldAttributesIntoMap(b, &map, operands);
+  composeAffineMapAndOperands(&map, &valueOperands);
+  assert(map);
+  return b.create<AffineApplyOp>(loc, map, valueOperands);
 }
 
-AffineApplyOp mlir::affine::makeComposedAffineApply(OpBuilder &b, Location loc,
-                                                    AffineExpr e,
-                                                    ValueRange values) {
+AffineApplyOp
+mlir::affine::makeComposedAffineApply(OpBuilder &b, Location loc, AffineExpr e,
+                                      ArrayRef<OpFoldResult> operands) {
   return makeComposedAffineApply(
       b, loc, AffineMap::inferFromExprList(ArrayRef<AffineExpr>{e}).front(),
-      values);
+      operands);
 }
 
 /// Composes the given affine map with the given list of operands, pulling in
@@ -1336,17 +1278,35 @@ mlir::affine::makeComposedFoldedAffineApply(OpBuilder &b, Location loc,
                                             ArrayRef<OpFoldResult> operands) {
   assert(map.getNumResults() == 1 && "building affine.apply with !=1 result");
 
-  SmallVector<Operation *> constants;
-  SmallVector<Value> actualValues;
-  materializeConstants(b, loc, operands, constants, actualValues);
-  composeAffineMapAndOperands(&map, &actualValues);
-  OpFoldResult result = createOrFold<AffineApplyOp>(b, loc, actualValues, map);
+  // Temporarily disconnect the listener, so that no notification is triggered
+  // if the op is folded.
+  // TODO: OpBuilder::createOrFold should return OpFoldResults, then this
+  // workaround is no longer needed.
+  OpBuilder::Listener *listener = b.getListener();
+  b.setListener(nullptr);
+  auto listenerResetter =
+      llvm::make_scope_exit([listener, &b] { b.setListener(listener); });
+
+  // Create op.
+  AffineApplyOp applyOp = makeComposedAffineApply(b, loc, map, operands);
+
+  // Get constant operands.
+  SmallVector<Attribute> constOperands(applyOp->getNumOperands());
+  for (unsigned i = 0, e = constOperands.size(); i != e; ++i)
+    matchPattern(applyOp->getOperand(i), m_Constant(&constOperands[i]));
+
+  // Try to fold the operation.
+  SmallVector<OpFoldResult> foldResults;
+  if (failed(applyOp->fold(constOperands, foldResults)) ||
+      foldResults.empty()) {
+    if (listener)
+      listener->notifyOperationInserted(applyOp);
+    return applyOp.getResult();
+  }
 
-  // Constants are always folded into affine min/max because they can be
-  // represented as constant expressions, so delete them.
-  for (Operation *op : constants)
-    op->erase();
-  return result;
+  applyOp->erase();
+  assert(foldResults.size() == 1 && "expected 1 folded result");
+  return foldResults.front();
 }
 
 OpFoldResult
@@ -1369,29 +1329,53 @@ mlir::affine::makeComposedFoldedMultiResultAffineApply(
                              });
 }
 
-Value mlir::affine::makeComposedAffineMin(OpBuilder &b, Location loc,
-                                          AffineMap map, ValueRange operands) {
-  SmallVector<Value> allOperands = llvm::to_vector(operands);
-  composeMultiResultAffineMap(map, allOperands);
-  return b.createOrFold<AffineMinOp>(loc, b.getIndexType(), map, allOperands);
+template <typename OpTy>
+static OpTy makeComposedMinMax(OpBuilder &b, Location loc, AffineMap map,
+                               ArrayRef<OpFoldResult> operands) {
+  SmallVector<Value> valueOperands = foldAttributesIntoMap(b, &map, operands);
+  composeMultiResultAffineMap(map, valueOperands);
+  return b.create<OpTy>(loc, b.getIndexType(), map, valueOperands);
+}
+
+AffineMinOp
+mlir::affine::makeComposedAffineMin(OpBuilder &b, Location loc, AffineMap map,
+                                    ArrayRef<OpFoldResult> operands) {
+  return makeComposedMinMax<AffineMinOp>(b, loc, map, operands);
 }
 
 template <typename OpTy>
 static OpFoldResult makeComposedFoldedMinMax(OpBuilder &b, Location loc,
                                              AffineMap map,
                                              ArrayRef<OpFoldResult> operands) {
-  SmallVector<Operation *> constants;
-  SmallVector<Value> actualValues;
-  materializeConstants(b, loc, operands, constants, actualValues);
-  composeMultiResultAffineMap(map, actualValues);
-  OpFoldResult result =
-      createOrFold<OpTy>(b, loc, actualValues, b.getIndexType(), map);
-
-  // Constants are always folded into affine min/max because they can be
-  // represented as constant expressions, so delete them.
-  for (Operation *op : constants)
-    op->erase();
-  return result;
+  // Temporarily disconnect the listener, so that no notification is triggered
+  // if the op is folded.
+  // TODO: OpBuilder::createOrFold should return OpFoldResults, then this
+  // workaround is no longer needed.
+  OpBuilder::Listener *listener = b.getListener();
+  b.setListener(nullptr);
+  auto listenerResetter =
+      llvm::make_scope_exit([listener, &b] { b.setListener(listener); });
+
+  // Create op.
+  auto minMaxOp = makeComposedMinMax<OpTy>(b, loc, map, operands);
+
+  // Get constant operands.
+  SmallVector<Attribute> constOperands(minMaxOp->getNumOperands());
+  for (unsigned i = 0, e = constOperands.size(); i != e; ++i)
+    matchPattern(minMaxOp->getOperand(i), m_Constant(&constOperands[i]));
+
+  // Try to fold the operation.
+  SmallVector<OpFoldResult> foldResults;
+  if (failed(minMaxOp->fold(constOperands, foldResults)) ||
+      foldResults.empty()) {
+    if (listener)
+      listener->notifyOperationInserted(minMaxOp);
+    return minMaxOp.getResult();
+  }
+
+  minMaxOp->erase();
+  assert(foldResults.size() == 1 && "expected 1 folded result");
+  return foldResults.front();
 }
 
 OpFoldResult

diff  --git a/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp b/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp
index 28b645e0ebc68..3916cda50a7bb 100644
--- a/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp
+++ b/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp
@@ -149,7 +149,7 @@ struct GpuWarpIdBuilder : public GpuIdBuilder {
       // Reverse back to be in [x, y, z] order.
       for (AffineExpr e : llvm::reverse(delinearizingExprs))
         ids.push_back(
-            affine::makeComposedAffineApply(rewriter, loc, e, warpId));
+            affine::makeComposedAffineApply(rewriter, loc, e, {warpId}));
 
       // clang-format off
       LDBG("----linearId: " << linearId);
@@ -205,7 +205,7 @@ struct GpuLinearIdBuilder : public GpuIdBuilder {
       // Reverse back to be in [x, y, z] order.
       for (AffineExpr e : llvm::reverse(delinearizingExprs))
         ids.push_back(
-            affine::makeComposedAffineApply(rewriter, loc, e, linearId));
+            affine::makeComposedAffineApply(rewriter, loc, e, {linearId}));
 
       // clang-format off
       LLVM_DEBUG(llvm::interleaveComma(reverseBasisSizes,

diff  --git a/mlir/lib/Dialect/Linalg/Transforms/ConvertConv2DToImg2Col.cpp b/mlir/lib/Dialect/Linalg/Transforms/ConvertConv2DToImg2Col.cpp
index 48c24598f628f..036d76c12eba6 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/ConvertConv2DToImg2Col.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/ConvertConv2DToImg2Col.cpp
@@ -68,8 +68,7 @@ static Value getConvolvedIndex(OpBuilder &b, Location loc, Value oIndex,
   AffineExpr oExpr, fExpr;
   bindSymbols(b.getContext(), oExpr, fExpr);
   AffineMap convMap = AffineMap::get(0, 2, stride * oExpr + fExpr);
-  return affine::makeComposedAffineApply(b, loc, convMap,
-                                         ValueRange{oIndex, fIndex});
+  return affine::makeComposedAffineApply(b, loc, convMap, {oIndex, fIndex});
 }
 
 FailureOr<std::pair<Operation *, Operation *>>

diff  --git a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
index 273cd797bb39a..f4e9c24a08618 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
@@ -178,8 +178,8 @@ mlir::linalg::computeMultiTileSizes(OpBuilder &builder, LinalgOp op,
   AffineExpr s0 = b.getAffineSymbolExpr(0);
   AffineExpr s1 = b.getAffineSymbolExpr(1);
   AffineExpr s2 = b.getAffineSymbolExpr(2);
-  auto apply = [&](AffineExpr expr, ValueRange values) -> Value {
-    return affine::makeComposedAffineApply(b, b.getLoc(), expr, values);
+  auto apply = [&](AffineExpr expr, ArrayRef<OpFoldResult> ofrs) -> Value {
+    return affine::makeComposedAffineApply(b, b.getLoc(), expr, ofrs);
   };
   Value a = apply(s0.floorDiv(s1), {tripCount, divisorValue});
   Value t = apply((s0 + s1 - 1).floorDiv(s1), {targetSizeValue, divisorValue});

diff  --git a/mlir/lib/Dialect/Tensor/Transforms/ExtractSliceFromReshapeUtils.cpp b/mlir/lib/Dialect/Tensor/Transforms/ExtractSliceFromReshapeUtils.cpp
index 968d68e143fe1..b7f5218e8f6a9 100644
--- a/mlir/lib/Dialect/Tensor/Transforms/ExtractSliceFromReshapeUtils.cpp
+++ b/mlir/lib/Dialect/Tensor/Transforms/ExtractSliceFromReshapeUtils.cpp
@@ -61,12 +61,9 @@ static DimAndIndex invertSliceIndexing(OpBuilder &b, Location loc,
   auto [dim, indexValue] = dimAndIndex;
   assert(dim < sliceParams.size() && "slice should be non rank-reducing");
   return std::make_pair(
-      dim,
-      affine::makeComposedAffineApply(
-          b, loc, s0 + d0 * s1,
-          {indexValue,
-           getValueOrCreateConstantIndexOp(b, loc, sliceParams[dim].offset),
-           getValueOrCreateConstantIndexOp(b, loc, sliceParams[dim].stride)}));
+      dim, affine::makeComposedAffineApply(
+               b, loc, s0 + d0 * s1,
+               {indexValue, sliceParams[dim].offset, sliceParams[dim].stride}));
 }
 
 /// Transform `dimAndIndex` from the result tensor index space of a

diff  --git a/mlir/lib/Dialect/Vector/Transforms/VectorTransferSplitRewritePatterns.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorTransferSplitRewritePatterns.cpp
index 6dacb1e199f3f..3aa5cd01928d7 100644
--- a/mlir/lib/Dialect/Vector/Transforms/VectorTransferSplitRewritePatterns.cpp
+++ b/mlir/lib/Dialect/Vector/Transforms/VectorTransferSplitRewritePatterns.cpp
@@ -77,7 +77,7 @@ static Value createInBoundsCond(RewriterBase &b,
     auto d0 = getAffineDimExpr(0, xferOp.getContext());
     auto vs = getAffineConstantExpr(vectorSize, xferOp.getContext());
     Value sum = affine::makeComposedAffineApply(b, loc, d0 + vs,
-                                                xferOp.indices()[indicesIdx]);
+                                                {xferOp.indices()[indicesIdx]});
     Value cond = createFoldedSLE(
         b, sum, vector::createOrFoldDimOp(b, loc, xferOp.source(), indicesIdx));
     if (!cond)