[Mlir-commits] [mlir] [mlir][tensor] Implement constant folder for tensor.pad (PR #92691)

Wed Jun 5 12:05:50 PDT 2024

https://github.com/sabauma updated https://github.com/llvm/llvm-project/pull/92691

>From 8c2e9c5b82d86dda22fc029d94770d4b08a01f9e Mon Sep 17 00:00:00 2001
From: Spenser Bauman <sabauma at fastmail>
Date: Sat, 18 May 2024 19:43:05 -0400
Subject: [PATCH] [mlir][tensor] Implement constant folder for tensor.pad

Extend the folding ability of the RewriteAsConstant patterns to include
tensor.pad operations on constants. The new pattern with constant fold
tensor.pad operations which operate on tensor constants and have
statically resolvable padding sizes/values.

  %init = arith.constant dense<[[6, 7], [8, 9]]> : tensor<2x2xi32>
  %pad_value = arith.constant 0 : i32

  %0 = tensor.pad %init low[1, 1] high[1, 1] {
    ^bb0(%arg1: index, %arg2: index):
      tensor.yield %pad_value : i32
  } : tensor<2x2xi32> to tensor<4x4xi32>

becomes

  %cst = arith.constant dense<[[0, 0, 0, 0],
                               [0, 6, 7, 0],
                               [0, 8, 9, 0],
                               [0, 0, 0, 0]]> : tensor<4x4xi32>
---
 .../Tensor/TransformOps/TensorTransformOps.td |   4 +-
 .../Dialect/Tensor/Transforms/Transforms.h    |   5 +-
 .../TransformOps/TensorTransformOps.cpp       |  15 +-
 .../Tensor/Transforms/RewriteAsConstant.cpp   | 165 +++++++++++++++++-
 mlir/lib/Dialect/Utils/IndexingUtils.cpp      |   2 +-
 .../Dialect/Tensor/rewrite-as-constant.mlir   | 135 ++++++++++++++
 6 files changed, 321 insertions(+), 5 deletions(-)

diff --git a/mlir/include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.td b/mlir/include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.td
index fea5afa0b7bb3..81bab1b0c82f7 100644
--- a/mlir/include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.td
+++ b/mlir/include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.td
@@ -114,12 +114,14 @@ def ApplyReassociativeReshapeFoldingPatternsOp : Op<Transform_Dialect,
 def ApplyRewriteTensorOpsAsConstantPatternsOp : Op<Transform_Dialect,
     "apply_patterns.tensor.rewrite_as_constant",
     [DeclareOpInterfaceMethods<PatternDescriptorOpInterface>]> {
+  let arguments = (ins UnitAttr:$aggressive);
   let description = [{
     Indicates that tensor ops (such as tensor.generate) should be replaced with
     constants (arith.constant) when possible.
   }];
 
-  let assemblyFormat = "attr-dict";
+  let assemblyFormat =
+      "(`aggressive` $aggressive^)? attr-dict";
 }
 
 def Transform_TensorPadOp : Transform_ConcreteOpType<"tensor.pad">;
diff --git a/mlir/include/mlir/Dialect/Tensor/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Tensor/Transforms/Transforms.h
index 7dabc266c023b..7f983b8b3cfd0 100644
--- a/mlir/include/mlir/Dialect/Tensor/Transforms/Transforms.h
+++ b/mlir/include/mlir/Dialect/Tensor/Transforms/Transforms.h
@@ -91,9 +91,12 @@ void populateSimplifyPackAndUnpackPatterns(RewritePatternSet &patterns);
 /// respectively.
 void populateFoldIntoPackAndUnpackPatterns(RewritePatternSet &patterns);
 
+using ControlFoldFn = std::function<bool(OpOperand *)>;
+
 /// Populates `patterns` with patterns that replace tensor ops (such as
 /// tensor.generate) with constants when possible.
-void populateRewriteAsConstantPatterns(RewritePatternSet &patterns);
+void populateRewriteAsConstantPatterns(RewritePatternSet &patterns,
+                                       const ControlFoldFn &controlFn);
 
 //===----------------------------------------------------------------------===//
 // Transform helpers
diff --git a/mlir/lib/Dialect/Tensor/TransformOps/TensorTransformOps.cpp b/mlir/lib/Dialect/Tensor/TransformOps/TensorTransformOps.cpp
index 5c6a32ce9a68d..33016f84056e9 100644
--- a/mlir/lib/Dialect/Tensor/TransformOps/TensorTransformOps.cpp
+++ b/mlir/lib/Dialect/Tensor/TransformOps/TensorTransformOps.cpp
@@ -127,7 +127,20 @@ void transform::ApplyReassociativeReshapeFoldingPatternsOp::populatePatterns(
 
 void transform::ApplyRewriteTensorOpsAsConstantPatternsOp::populatePatterns(
     RewritePatternSet &patterns) {
-  tensor::populateRewriteAsConstantPatterns(patterns);
+  ControlFoldFn defaultControlFn = [](OpOperand *fusedOperand) {
+    Operation *producer = fusedOperand->get().getDefiningOp();
+    return producer && producer->hasOneUse();
+  };
+
+  ControlFoldFn aggressiveControlFn = [](OpOperand *fusedOperand) {
+    return true;
+  };
+
+  // Add folding with reshape by expansion patterns.
+  if (getAggressive())
+    tensor::populateRewriteAsConstantPatterns(patterns, aggressiveControlFn);
+  else
+    tensor::populateRewriteAsConstantPatterns(patterns, defaultControlFn);
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/Tensor/Transforms/RewriteAsConstant.cpp b/mlir/lib/Dialect/Tensor/Transforms/RewriteAsConstant.cpp
index 11e1de543ac91..7c9fced540adb 100644
--- a/mlir/lib/Dialect/Tensor/Transforms/RewriteAsConstant.cpp
+++ b/mlir/lib/Dialect/Tensor/Transforms/RewriteAsConstant.cpp
@@ -8,9 +8,12 @@
 //
 #include "mlir/Dialect/Tensor/IR/Tensor.h"
 #include "mlir/Dialect/Tensor/Transforms/Transforms.h"
+#include "mlir/Dialect/Utils/IndexingUtils.h"
 #include "mlir/IR/Matchers.h"
 #include "mlir/IR/PatternMatch.h"
 
+#include "llvm/ADT/TypeSwitch.h"
+
 using namespace mlir;
 using namespace mlir::tensor;
 
@@ -45,9 +48,169 @@ struct GenerateToConstant : public OpRewritePattern<GenerateOp> {
   }
 };
 
+/// Transform a linear index from one indexing space to another given:
+///
+/// - the shape of the source indexing space,
+/// - the strides of the target indexing space,
+/// - a linear index into the source indexing space.
+///
+/// This function is logically a sequence of linearize/delinearize over
+/// different bases but avoids allocating intermediate SmallVectors.
+int64_t transformIndexSpace(ArrayRef<int64_t> inputShape,
+                            ArrayRef<int64_t> outputStrides,
+                            int64_t srcLinearIndex) {
+  assert(inputShape.size() == outputStrides.size());
+
+  int64_t dstLinearIndex = 0;
+
+  for (int64_t dim = inputShape.size() - 1; dim >= 0; --dim) {
+    // Compute the index into the current dimension of the source tensor.
+    // `quotient` is the remaining linear index after accounting for the
+    // current dimension.
+    //
+    // `remainder` is the index into the source tensor for the current
+    // dimension.
+    auto [quotient, remainder] = std::div(srcLinearIndex, inputShape[dim]);
+
+    srcLinearIndex = quotient;
+
+    // Add the contribution of the current dimension to the output using the
+    // permutation map.
+    dstLinearIndex += outputStrides[dim] * remainder;
+  }
+
+  return dstLinearIndex;
+}
+
+template <typename ElemType, typename AttrType>
+Value constantFoldPadOp(PatternRewriter &rewriter, Location loc,
+                        DenseElementsAttr input, AttrType padValue,
+                        ArrayRef<int64_t> padLow, ArrayRef<int64_t> padHigh) {
+  auto inputValues = input.tryGetValues<ElemType>();
+  if (failed(inputValues))
+    return nullptr;
+
+  auto oldShape = input.getType().getShape();
+
+  // Compute the output shape of the new value.
+  auto newShape =
+      llvm::map_to_vector(llvm::zip(oldShape, padLow, padHigh),
+                          [](std::tuple<int64_t, int64_t, int64_t> pack) {
+                            auto [old, low, high] = pack;
+                            return old + low + high;
+                          });
+
+  int64_t outputSize = computeProduct(newShape);
+
+  // Fully initialize the vector with the padding value.
+  // The non-padded area will then be copied.
+  SmallVector<ElemType> values(outputSize, padValue.getValue());
+
+  // Strides for input and output are used to transform between the indexing
+  // space of the input and output tensors.
+  SmallVector<int64_t> outputStrides = computeStrides(newShape);
+
+  // The contribution of the low padding to the offset in the output tensor.
+  // This is the starting position of the source tensor within the padding
+  // tensor.
+  int64_t startingOffset = linearize(padLow, outputStrides);
+
+  // Copy values from the input tensor to the corresponding sub-region
+  // of the output tensor.
+  for (auto [inputIndex, inputValue] : llvm::enumerate(*inputValues)) {
+    auto outputIndex = transformIndexSpace(oldShape, outputStrides, inputIndex);
+    values[outputIndex + startingOffset] = inputValue;
+  }
+
+  // Create an attribute for the folded value.
+  auto newType = input.getType().clone(newShape);
+  auto newAttr = DenseElementsAttr::get(newType, values);
+
+  Operation *constantOp =
+      rewriter.getContext()
+          ->getLoadedDialect<TensorDialect>()
+          ->materializeConstant(rewriter, newAttr, newType, loc);
+
+  return constantOp ? constantOp->getResult(0) : nullptr;
+}
+
+struct PadOpToConstant final : public OpRewritePattern<PadOp> {
+
+  PadOpToConstant(MLIRContext *context, const ControlFoldFn &controlFn,
+                  PatternBenefit benefit = 1)
+      : OpRewritePattern<PadOp>(context, benefit), controlFn{controlFn} {}
+
+  LogicalResult matchAndRewrite(PadOp padTensorOp,
+                                PatternRewriter &rewriter) const override {
+    if (padTensorOp.getNofold())
+      return rewriter.notifyMatchFailure(
+          padTensorOp, "refusing to fold nofold pad operation");
+
+    TypedValue<RankedTensorType> input = padTensorOp.getSource();
+    RankedTensorType resultType = padTensorOp.getResult().getType();
+
+    DenseElementsAttr inputAttr = nullptr;
+    if (!matchPattern(input, m_Constant(&inputAttr)))
+      return failure();
+
+    Value paddingValue = padTensorOp.getConstantPaddingValue();
+
+    // Extract the constant value used for padding or bail out.
+    Attribute paddingAttr = nullptr;
+    if (!paddingValue || !matchPattern(paddingValue, m_Constant(&paddingAttr)))
+      return rewriter.notifyMatchFailure(padTensorOp,
+                                         "unable to get constant value");
+
+    // Try to extract the constant values of the low and high padding.
+    auto lowPad = getConstantIntValues(padTensorOp.getMixedLowPad());
+    auto highPad = getConstantIntValues(padTensorOp.getMixedHighPad());
+
+    // If the padding cannot be extracted, bail out.
+    if (!lowPad || !highPad)
+      return rewriter.notifyMatchFailure(padTensorOp,
+                                         "unable to extract constant padding");
+
+    // We have a potential candidate, consult the control function to
+    // determine if the op should fold.
+    if (!controlFn(&padTensorOp.getSourceMutable()))
+      return rewriter.notifyMatchFailure(padTensorOp,
+                                         "not folding due to cost function");
+
+    Location loc = padTensorOp.getLoc();
+
+    // Try constant folding the supported cases of integer and float values.
+    Value newOp =
+        llvm::TypeSwitch<Attribute, Value>(paddingAttr)
+            .Case([&](FloatAttr floatAttr) {
+              return constantFoldPadOp<llvm::APFloat>(
+                  rewriter, loc, inputAttr, floatAttr, *lowPad, *highPad);
+            })
+            .Case([&](IntegerAttr integerAttr) {
+              return constantFoldPadOp<llvm::APInt>(
+                  rewriter, loc, inputAttr, integerAttr, *lowPad, *highPad);
+            })
+            .Default(Value());
+
+    if (!newOp)
+      return rewriter.notifyMatchFailure(padTensorOp,
+                                         "tensor type not supported");
+
+    if (newOp.getType() != resultType)
+      newOp = rewriter.create<tensor::CastOp>(loc, resultType, newOp);
+
+    rewriter.replaceOp(padTensorOp, newOp);
+    return success();
+  }
+
+private:
+  ControlFoldFn controlFn;
+};
+
 } // namespace
 
 void mlir::tensor::populateRewriteAsConstantPatterns(
-    RewritePatternSet &patterns) {
+    RewritePatternSet &patterns, const ControlFoldFn &controlFn) {
   patterns.add<GenerateToConstant>(patterns.getContext());
+
+  patterns.add<PadOpToConstant>(patterns.getContext(), controlFn);
 }
diff --git a/mlir/lib/Dialect/Utils/IndexingUtils.cpp b/mlir/lib/Dialect/Utils/IndexingUtils.cpp
index 4c960659d80cb..aba225be720c3 100644
--- a/mlir/lib/Dialect/Utils/IndexingUtils.cpp
+++ b/mlir/lib/Dialect/Utils/IndexingUtils.cpp
@@ -92,7 +92,7 @@ int64_t mlir::computeProduct(ArrayRef<int64_t> basis) {
   assert(llvm::all_of(basis, [](int64_t s) { return s > 0; }) &&
          "basis must be nonnegative");
   if (basis.empty())
-    return 0;
+    return 1;
   return std::accumulate(basis.begin(), basis.end(), 1,
                          std::multiplies<int64_t>());
 }
diff --git a/mlir/test/Dialect/Tensor/rewrite-as-constant.mlir b/mlir/test/Dialect/Tensor/rewrite-as-constant.mlir
index 1a1cf9e407d80..35ee6f1caf0d9 100644
--- a/mlir/test/Dialect/Tensor/rewrite-as-constant.mlir
+++ b/mlir/test/Dialect/Tensor/rewrite-as-constant.mlir
@@ -21,3 +21,138 @@ func.func @tensor_generate_constant() -> tensor<2x3x5xf32> {
   } : tensor<2x3x5xf32>
   return %0 : tensor<2x3x5xf32>
 }
+
+//         CHECK-LABEL: func @pad_of_ints(
+//               CHECK: %[[cst:.*]] = arith.constant dense<[
+// CHECK-SAME{LITERAL}:     [0, 0, 0, 0],
+// CHECK-SAME{LITERAL}:     [0, 6, 7, 0],
+// CHECK-SAME{LITERAL}:     [0, 8, 9, 0],
+// CHECK-SAME{LITERAL}:     [0, 0, 0, 0]
+// CHECK-SAME{LITERAL}:     ]> : tensor<4x4xi32>
+//               CHECK: %[[cast:.*]] = tensor.cast %[[cst]] : tensor<4x4xi32> to tensor<?x?xi32>
+//               CHECK: return %[[cast]]
+func.func @pad_of_ints() -> tensor<?x?xi32> {
+  %init = arith.constant dense<[[6, 7], [8, 9]]> : tensor<2x2xi32>
+  %pad_value = arith.constant 0 : i32
+
+  %c1 = arith.constant 1 : index
+
+  %0 = tensor.pad %init low[%c1, %c1] high[%c1, %c1] {
+    ^bb0(%arg1: index, %arg2: index):
+      tensor.yield %pad_value : i32
+  } : tensor<2x2xi32> to tensor<?x?xi32>
+
+  return %0 : tensor<?x?xi32>
+}
+
+//         CHECK-LABEL: func @pad_of_floats(
+//               CHECK: %[[cst:.*]] = arith.constant dense<[
+// CHECK-SAME{LITERAL}:     [0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00],
+// CHECK-SAME{LITERAL}:     [0.000000e+00, 6.000000e+00, 7.000000e+00, 0.000000e+00],
+// CHECK-SAME{LITERAL}:     [0.000000e+00, 8.000000e+00, 9.000000e+00, 0.000000e+00],
+// CHECK-SAME{LITERAL}:     [0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00]
+// CHECK-SAME{LITERAL}:     ]> : tensor<4x4xf32>
+//               CHECK: return %[[cst]]
+
+func.func @pad_of_floats() -> tensor<4x4xf32> {
+  %init = arith.constant dense<[[6.0, 7.0], [8.0, 9.0]]> : tensor<2x2xf32>
+  %pad_value = arith.constant 0.0 : f32
+
+  %0 = tensor.pad %init low[1, 1] high[1, 1] {
+    ^bb0(%arg1: index, %arg2: index):
+      tensor.yield %pad_value : f32
+  } : tensor<2x2xf32> to tensor<4x4xf32>
+
+  return %0 : tensor<4x4xf32>
+}
+
+//         CHECK-LABEL: func @pad_of_ints_no_low_dims(
+//               CHECK: %[[cst:.*]] = arith.constant dense<[
+// CHECK-SAME{LITERAL}:     [6, 7, 0],
+// CHECK-SAME{LITERAL}:     [8, 9, 0],
+// CHECK-SAME{LITERAL}:     [0, 0, 0]
+// CHECK-SAME{LITERAL}:     ]> : tensor<3x3xi32>
+//               CHECK: return %[[cst]]
+func.func @pad_of_ints_no_low_dims() -> tensor<3x3xi32> {
+  %init = arith.constant dense<[[6, 7], [8, 9]]> : tensor<2x2xi32>
+  %pad_value = arith.constant 0 : i32
+
+  %0 = tensor.pad %init low[0, 0] high[1, 1] {
+    ^bb0(%arg1: index, %arg2: index):
+      tensor.yield %pad_value : i32
+  } : tensor<2x2xi32> to tensor<3x3xi32>
+
+  return %0 : tensor<3x3xi32>
+}
+
+//         CHECK-LABEL: func @pad_of_ints_no_high_dims(
+//               CHECK: %[[cst:.*]] = arith.constant dense<[
+// CHECK-SAME{LITERAL}:     [0, 0, 0],
+// CHECK-SAME{LITERAL}:     [0, 6, 7],
+// CHECK-SAME{LITERAL}:     [0, 8, 9]
+// CHECK-SAME{LITERAL}:     ]> : tensor<3x3xi32>
+//               CHECK: return %[[cst]]
+func.func @pad_of_ints_no_high_dims() -> tensor<3x3xi32> {
+  %init = arith.constant dense<[[6, 7], [8, 9]]> : tensor<2x2xi32>
+  %pad_value = arith.constant 0 : i32
+
+  %0 = tensor.pad %init low[1, 1] high[0, 0] {
+    ^bb0(%arg1: index, %arg2: index):
+      tensor.yield %pad_value : i32
+  } : tensor<2x2xi32> to tensor<3x3xi32>
+
+  return %0 : tensor<3x3xi32>
+}
+
+//         CHECK-LABEL: func @pad_multi_use_do_not_fold(
+//               CHECK: %[[pad:.+]] = tensor.pad
+//               CHECK: return %[[pad]]
+func.func @pad_multi_use_do_not_fold() -> (tensor<?x?xi32>, tensor<2x2xi32>) {
+  %init = arith.constant dense<[[6, 7], [8, 9]]> : tensor<2x2xi32>
+  %pad_value = arith.constant 0 : i32
+
+  %c1 = arith.constant 1 : index
+
+  %0 = tensor.pad %init low[%c1, %c1] high[%c1, %c1] {
+    ^bb0(%arg1: index, %arg2: index):
+      tensor.yield %pad_value : i32
+  } : tensor<2x2xi32> to tensor<?x?xi32>
+
+  return %0, %init : tensor<?x?xi32>, tensor<2x2xi32>
+}
+
+// -----
+
+module attributes {transform.with_named_sequence} {
+  transform.named_sequence @__transform_main(%root : !transform.any_op {transform.readonly}) {
+    %func_op = transform.structured.match ops{["func.func"]} in %root : (!transform.any_op) -> !transform.op<"func.func">
+    transform.apply_patterns to %func_op {
+      transform.apply_patterns.tensor.rewrite_as_constant aggressive
+    } : !transform.op<"func.func">
+    transform.yield
+  }
+}
+
+//         CHECK-LABEL: func @pad_aggressive_fold(
+//               CHECK: %[[init:.*]] = arith.constant dense<7> : tensor<2x2xi32>
+//               CHECK: %[[cst:.*]] = arith.constant dense<[
+// CHECK-SAME{LITERAL}:     [0, 0, 0, 0],
+// CHECK-SAME{LITERAL}:     [0, 7, 7, 0],
+// CHECK-SAME{LITERAL}:     [0, 7, 7, 0],
+// CHECK-SAME{LITERAL}:     [0, 0, 0, 0]
+// CHECK-SAME{LITERAL}:     ]> : tensor<4x4xi32>
+//               CHECK: %[[cast:.*]] = tensor.cast %[[cst]] : tensor<4x4xi32> to tensor<?x?xi32>
+//               CHECK: return %[[cast]]
+func.func @pad_aggressive_fold() -> (tensor<?x?xi32>, tensor<2x2xi32>) {
+  %init = arith.constant dense<7> : tensor<2x2xi32>
+  %pad_value = arith.constant 0 : i32
+
+  %c1 = arith.constant 1 : index
+
+  %0 = tensor.pad %init low[%c1, %c1] high[%c1, %c1] {
+    ^bb0(%arg1: index, %arg2: index):
+      tensor.yield %pad_value : i32
+  } : tensor<2x2xi32> to tensor<?x?xi32>
+
+  return %0, %init : tensor<?x?xi32>, tensor<2x2xi32>
+}