[Mlir-commits] [mlir] d26c42a - [mlir][linalg] Control dimensions to pad.

Mon Mar 28 07:41:10 PDT 2022

Author: gysit
Date: 2022-03-28T14:39:57Z
New Revision: d26c42af57fc23557202006e362ba6ba1e3e57dc

URL: https://github.com/llvm/llvm-project/commit/d26c42af57fc23557202006e362ba6ba1e3e57dc
DIFF: https://github.com/llvm/llvm-project/commit/d26c42af57fc23557202006e362ba6ba1e3e57dc.diff

LOG: [mlir][linalg] Control dimensions to pad.

This revision supports padding only a subset of the iteration dimensions via an additional padding-dimensions parameter. This control allows us to pad an operation in multiple steps. For example, one may want to pad only the output dimensions of a producer matmul fused into a consumer loop nest, before tiling and padding its reduction dimension.

Depends On D122309

Reviewed By: nicolasvasilache

Differential Revision: https://reviews.llvm.org/D122560

Added: 
    

Modified: 
    mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
    mlir/include/mlir/Dialect/Tensor/Utils/Utils.h
    mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
    mlir/lib/Dialect/Linalg/Utils/Utils.cpp
    mlir/lib/Dialect/Tensor/Utils/Utils.cpp
    mlir/test/Dialect/Linalg/codegen-strategy.mlir
    mlir/test/Dialect/Linalg/pad.mlir
    mlir/test/lib/Dialect/Linalg/TestLinalgCodegenStrategy.cpp

Removed: 
    


################################################################################
diff  --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
index c6a4335b551d3..ab892fd1fcb85 100644

--- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
+++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
@@ -560,6 +560,12 @@ struct LinalgPaddingOptions {
     paddingValues.assign(pv.begin(), pv.end());
     return *this;
   }
+  /// A list of iterator dimensions to pad.
+  SmallVector<int64_t> paddingDimensions;
+  LinalgPaddingOptions &setPaddingDimensions(ArrayRef<int64_t> pd) {
+    paddingDimensions.assign(pd.begin(), pd.end());
+    return *this;
+  }
   /// A flag for every operand to mark the PadOp as nofold which enables packing
   /// for statically shaped operands.
   SmallVector<bool> packPaddings;
@@ -1217,13 +1223,15 @@ struct PadOpTransformationPattern : public OpRewritePattern<tensor::PadOp> {
                                 PatternRewriter &rewriter) const override;
 };
 
-/// Pad the operands of `opToPad` to a static bounding box. Use `paddingValues`
-/// and `packPaddings` to set the padding value and the nofold attribute of the
-/// introduced tensor::PadOps, respectively. Update `paddedOp` to the cloned
-/// statically shaped operation and return the extracted dynamically shaped
-/// results. If padding fails, return failure.
+/// Pad the iterator dimensions `paddingDimensions` of all `opToPad` operands to
+/// a static bounding box. Use `paddingValues` and `packPaddings` to set padding
+/// value and nofold attribute of the created tensor::PadOps, respectively.
+/// Update `paddedOp` to the cloned operation with statically shaped
+/// `paddingDimensions` and return the extracted dynamically shaped results. If
+/// padding fails, return failure.
 FailureOr<SmallVector<Value>>
 rewriteAsPaddedOp(OpBuilder &b, LinalgOp opToPad,
+                  ArrayRef<int64_t> paddingDimensions,
                   ArrayRef<Attribute> paddingValues,
                   ArrayRef<bool> packPaddings, LinalgOp &paddedOp);
 

diff  --git a/mlir/include/mlir/Dialect/Tensor/Utils/Utils.h b/mlir/include/mlir/Dialect/Tensor/Utils/Utils.h
index 4b4c53896b7d8..f8c8d3dd357b4 100644
--- a/mlir/include/mlir/Dialect/Tensor/Utils/Utils.h
+++ b/mlir/include/mlir/Dialect/Tensor/Utils/Utils.h
@@ -15,11 +15,11 @@ namespace mlir {
 namespace tensor {
 
 // Return a PadOp that pads `source` to `type` size where the static
-// sizes are assumed to be greater than the dynamic sizes. The op performs
-// "high" padding (i.e. it adds trailing padding values until the desired
-// size is met).
-PadOp createPadHighOp(Type type, Value source, Value pad, bool nofold,
-                      Location loc, OpBuilder &builder);
+// sizes are assumed to be greater than the dynamic sizes. If `type` has dynamic
+// dimensions the padding width is set to zero. The op performs "high" padding
+// (i.e. it adds trailing padding values until the desired size is met).
+PadOp createPadHighOp(RankedTensorType type, Value source, Value pad,
+                      bool nofold, Location loc, OpBuilder &builder);
 
 // Return a PadOp that pads `source to `type` size with `pad` value.
 // I.e., a block will be created and the `pad` value will be yielded

diff  --git a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
index 6a59bf32f718b..9caac73ea1cf0 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
@@ -158,29 +158,41 @@ LinalgTilingOptions &mlir::linalg::LinalgTilingOptions::scalarizeDynamicDims() {
   return *this;
 }
 
-/// Pad `opOperand` using the provided `paddingValues`. Exit early for scalar
-/// operands, if `paddingValues` contains no value for the `opOperand`, or if
-/// `opOperand` is not defined by an ExtractSliceOp. Otherwise, try to pad the
-/// operand even if it already has a static shape. Set `result` to the result of
-/// the created tensor::PadOp or and return success if the operand either has
-/// been padded to a static shape or already had a static shape and failure
-/// otherwise.
-static LogicalResult padOperandToSmallestStaticBoundingBox(
+/// Pad the `opOperand` in the `paddingDimensions` using the padding value and
+/// the nofold flag found in `paddingValues` and `packPaddings`, respectively.
+/// Exit early and return the `opOperand` value if the shape dimensions that
+/// match `paddingDimensions` have a static size and the nofold flag is not set.
+/// Otherwise, try to pad the shape dimensions that match the iterator
+/// dimensions `paddingDimensions` and return the tensor::PadOp result if
+/// padding succeeds or failure otherwise.
+static FailureOr<Value> padOperandToSmallestStaticBoundingBox(
     OpBuilder &b, linalg::LinalgOp opToPad, OpOperand *opOperand,
-    ArrayRef<Attribute> paddingValues, ArrayRef<bool> packPaddings,
-    Value &result) {
-  // Get the shape of the operand and check if it has a dynamic shape. Only
-  // return failure if the operand is not a scalar and has a dynamic shape.
+    ArrayRef<int64_t> paddingDimensions, ArrayRef<Attribute> paddingValues,
+    ArrayRef<bool> packPaddings) {
+  AffineMap indexingMap = opToPad.getTiedIndexingMap(opOperand);
   ArrayRef<int64_t> shape = opToPad.getShape(opOperand);
-  bool hasDynamicShape = llvm::is_contained(shape, ShapedType::kDynamicSize);
 
-  // Cannot pad scalar operands.
-  if (shape.empty())
-    return success();
+  // Collect the shape dimension that are a function of the `paddingDimensions`.
+  llvm::SmallDenseSet<int64_t> shapeDimsToPad;
+  for (int64_t dim : paddingDimensions)
+    for (const auto &en : enumerate(indexingMap.getResults()))
+      if (en.value().isFunctionOfDim(dim))
+        shapeDimsToPad.insert(en.index());
 
-  // Cannot pad if the padding value is unknown.
+  // Return the unpadded operand if padding to a static shape is not needed and
+  // if the nofold flag is not set.
+  bool nofold = opOperand->getOperandNumber() < packPaddings.size()
+                    ? packPaddings[opOperand->getOperandNumber()]
+                    : false;
+  bool hasStaticShape = llvm::none_of(shapeDimsToPad, [&](int64_t dim) {
+    return ShapedType::isDynamic(shape[dim]);
+  });
+  if (!nofold && hasStaticShape)
+    return opOperand->get();
+
+  // Fail if `paddingValues` specifies no padding value.
   if (opOperand->getOperandNumber() >= paddingValues.size())
-    return failure(hasDynamicShape);
+    return failure();
   Attribute paddingAttr = paddingValues[opOperand->getOperandNumber()];
   Value paddingValue = b.create<arith::ConstantOp>(
       opToPad.getLoc(), paddingAttr.getType(), paddingAttr);
@@ -192,27 +204,31 @@ static LogicalResult padOperandToSmallestStaticBoundingBox(
     currOpOperand = linalgOp.getOutputOperand(result.getResultNumber());
   }
 
-  // Cannot construct a static bounding box if the `currOpOperand` is not
-  // defined by an ExtractSliceOp.
+  // Fail if `currOpOperand` is not defined by an ExtractSliceOp.
   auto sliceOp = currOpOperand->get().getDefiningOp<tensor::ExtractSliceOp>();
   if (!sliceOp)
-    return failure(hasDynamicShape);
+    return failure();
 
   // Compute the dropped dimensions if `sliceOp` is ranke-reducing.
   llvm::SmallBitVector droppedDims = sliceOp.getDroppedDims();
+  OffsetSizeAndStrideOpInterface shapedOp = sliceOp;
 
   // Upper bound the `sliceOp` sizes to obtain a static bounding box.
-  SmallVector<int64_t> staticSizes;
-  staticSizes.reserve(shape.size());
-  auto shapedOp = cast<OffsetSizeAndStrideOpInterface>(sliceOp.getOperation());
+  SmallVector<int64_t> paddedShape(shape.begin(), shape.end());
+  int64_t shapeIdx = 0;
   for (const auto &en : enumerate(shapedOp.getMixedSizes())) {
     // Skip dropped dimensions.
     if (droppedDims.test(en.index()))
       continue;
-    // If the size is an attribute add it directly to `staticSizes`.
+    // Skip dimensions that do not require padding.
+    if (!shapeDimsToPad.contains(shapeIdx)) {
+      shapeIdx++;
+      continue;
+    }
+    // If the size is an attribute add it directly to `paddedShape`.
     if (en.value().is<Attribute>()) {
-      staticSizes.push_back(
-          en.value().get<Attribute>().dyn_cast<IntegerAttr>().getInt());
+      paddedShape[shapeIdx++] =
+          en.value().get<Attribute>().dyn_cast<IntegerAttr>().getInt();
       continue;
     }
     // Otherwise, try to compute a constant upper bound for the size value.
@@ -222,24 +238,21 @@ static LogicalResult padOperandToSmallestStaticBoundingBox(
       LLVM_DEBUG(DBGS() << "No constant bounding box can be found for padding");
       return failure();
     }
-    staticSizes.push_back(upperBound.getValue());
+    paddedShape[shapeIdx++] = upperBound.getValue();
   }
-  assert(staticSizes.size() == shape.size() &&
+  assert(shapeIdx == static_cast<int64_t>(shape.size()) &&
          "expect the dynamic and static ranks to match");
 
-  // Pad the operand to the bounding box defined by `staticSizes`.
-  auto staticTensorType = RankedTensorType::get(
-      staticSizes, getElementTypeOrSelf(opOperand->get()));
-  bool nofold = opOperand->getOperandNumber() < packPaddings.size()
-                    ? packPaddings[opOperand->getOperandNumber()]
-                    : false;
-  result = makeComposedPadHighOp(b, opToPad->getLoc(), staticTensorType,
-                                 opOperand->get(), paddingValue, nofold);
-  return success();
+  // Pad the operand to the bounding box defined by `paddedShape`.
+  auto paddedTensorType = RankedTensorType::get(
+      paddedShape, getElementTypeOrSelf(opOperand->get()));
+  return makeComposedPadHighOp(b, opToPad->getLoc(), paddedTensorType,
+                               opOperand->get(), paddingValue, nofold);
 }
 
 FailureOr<SmallVector<Value>>
 linalg::rewriteAsPaddedOp(OpBuilder &b, LinalgOp opToPad,
+                          ArrayRef<int64_t> paddingDimensions,
                           ArrayRef<Attribute> paddingValues,
                           ArrayRef<bool> packPaddings, LinalgOp &paddedOp) {
   Location loc = opToPad->getLoc();
@@ -255,13 +268,12 @@ linalg::rewriteAsPaddedOp(OpBuilder &b, LinalgOp opToPad,
   SmallVector<Value> newOperands;
   newOperands.reserve(opToPad.getNumInputsAndOutputs());
   for (OpOperand *opOperand : opToPad.getInputAndOutputOperands()) {
-    Value paddedOperand;
-    // If padding was requested but the shape cannot be bounded statically then
-    // the pattern fails to apply.
-    if (failed(padOperandToSmallestStaticBoundingBox(
-            b, opToPad, opOperand, paddingValues, packPaddings, paddedOperand)))
+    FailureOr<Value> paddedOperand = padOperandToSmallestStaticBoundingBox(
+        b, opToPad, opOperand, paddingDimensions, paddingValues, packPaddings);
+    // Exit if `paddingDimensions` cannot be bounded statically.
+    if (failed(paddedOperand))
       return failure();
-    newOperands.push_back(paddedOperand ? paddedOperand : opOperand->get());
+    newOperands.push_back(*paddedOperand);
   }
 
   SmallVector<SmallVector<Value>> reifiedResultShapes;
@@ -502,8 +514,8 @@ mlir::linalg::LinalgPaddingPattern::returningMatchAndRewrite(
   // Pad the operation.
   LinalgOp paddedOp;
   FailureOr<SmallVector<Value>> newResults =
-      rewriteAsPaddedOp(rewriter, linalgOp, options.paddingValues,
-                        options.packPaddings, paddedOp);
+      rewriteAsPaddedOp(rewriter, linalgOp, options.paddingDimensions,
+                        options.paddingValues, options.packPaddings, paddedOp);
   if (failed(newResults))
     return failure();
 
@@ -511,10 +523,16 @@ mlir::linalg::LinalgPaddingPattern::returningMatchAndRewrite(
   for (const auto &en : enumerate(options.hoistPaddings)) {
     if (static_cast<int64_t>(en.index()) >= paddedOp.getNumInputsAndOutputs())
       break;
-    OpOperand &opOperand = paddedOp->getOpOperand(en.index());
-    auto padOp = opOperand.get().getDefiningOp<tensor::PadOp>();
+    OpOperand *opOperand = &paddedOp->getOpOperand(en.index());
+    auto padOp = opOperand->get().getDefiningOp<tensor::PadOp>();
     if (!padOp || en.value() == 0)
       continue;
+
+    // Fail hoisting if the operand shape is not fully static.
+    if (llvm::any_of(paddedOp.getShape(opOperand),
+                     [](int64_t size) { return ShapedType::isDynamic(size); }))
+      return failure();
+
     tensor::PadOp hoistedOp;
     SmallVector<GenericOp> transposeOps;
     SmallVector<int64_t> transposeVector =

diff  --git a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
index 5a566c5c30cab..0d2a61713a5ea 100644
--- a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
+++ b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
@@ -322,8 +322,6 @@ tensor::ExtractSliceOp makeComposedExtractSliceOp(
 
 Value makeComposedPadHighOp(OpBuilder &b, Location loc, RankedTensorType type,
                             Value source, Value pad, bool nofold) {
-  assert(type.hasStaticShape() && "expect tensor type to have static shape");
-
   // Exit if `source` is not defined by an ExtractSliceOp.
   auto sliceOp = source.getDefiningOp<tensor::ExtractSliceOp>();
   if (!sliceOp)

diff  --git a/mlir/lib/Dialect/Tensor/Utils/Utils.cpp b/mlir/lib/Dialect/Tensor/Utils/Utils.cpp
index c7054cf50d060..b35f554ca1a52 100644
--- a/mlir/lib/Dialect/Tensor/Utils/Utils.cpp
+++ b/mlir/lib/Dialect/Tensor/Utils/Utils.cpp
@@ -25,8 +25,8 @@ PadOp mlir::tensor::createPadScalarOp(Type type, Value source, Value pad,
   auto padTensorOp =
       builder.create<PadOp>(loc, type, source, low, high, nofold);
   int rank = padTensorOp.getResultType().getRank();
-  SmallVector<Type, 4> blockArgTypes(rank, builder.getIndexType());
-  SmallVector<Location, 4> blockArgLocs(rank, loc);
+  SmallVector<Type> blockArgTypes(rank, builder.getIndexType());
+  SmallVector<Location> blockArgLocs(rank, loc);
   auto &region = padTensorOp.region();
   // `builder.createBlock` changes the insertion point within the block. Create
   // a guard to reset the insertion point of the builder after it is destroyed.
@@ -36,19 +36,22 @@ PadOp mlir::tensor::createPadScalarOp(Type type, Value source, Value pad,
   return padTensorOp;
 }
 
-PadOp mlir::tensor::createPadHighOp(Type type, Value source, Value pad,
-                                    bool nofold, Location loc, OpBuilder &b) {
-  SmallVector<OpFoldResult, 4> low, high;
-  auto rankedTensorType = type.cast<RankedTensorType>();
-  assert(rankedTensorType.hasStaticShape());
-  for (const auto &en : enumerate(rankedTensorType.getShape())) {
+PadOp mlir::tensor::createPadHighOp(RankedTensorType type, Value source,
+                                    Value pad, bool nofold, Location loc,
+                                    OpBuilder &b) {
+  auto zero = b.createOrFold<arith::ConstantIndexOp>(loc, 0);
+  SmallVector<OpFoldResult> low(type.getRank(), zero);
+  SmallVector<OpFoldResult> high(type.getRank(), zero);
+  for (const auto &en : enumerate(type.getShape())) {
+    // Pad only the static dimensions of the result tensor type.
+    if (ShapedType::isDynamic(en.value()))
+      continue;
+    // Compute the padding width.
     AffineExpr d0;
     bindDims(b.getContext(), d0);
     auto dimOp = b.createOrFold<tensor::DimOp>(loc, source, en.index());
-    Value paddingWidth =
-        makeComposedAffineApply(b, loc, en.value() - d0, {dimOp});
-    high.push_back(paddingWidth);
-    low.push_back(b.createOrFold<arith::ConstantIndexOp>(loc, 0));
+    high[en.index()] =
+        makeComposedAffineApply(b, loc, en.value() - d0, {dimOp}).getResult();
   }
   return createPadScalarOp(type, source, pad, low, high, nofold, loc, b);
 }

diff  --git a/mlir/test/Dialect/Linalg/codegen-strategy.mlir b/mlir/test/Dialect/Linalg/codegen-strategy.mlir
index 2e997b5d00689..05f99635e85c6 100644
--- a/mlir/test/Dialect/Linalg/codegen-strategy.mlir
+++ b/mlir/test/Dialect/Linalg/codegen-strategy.mlir
@@ -1,9 +1,9 @@
 // RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-func=matmul anchor-op=linalg.matmul tile-sizes=2,4,8 vectorize vectorize-contraction-to=matrixintrinsics unroll-vector-transfers=true" -split-input-file | FileCheck %s --check-prefix=CHECK-INTRINSIC
 // RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-func=matmul anchor-op=linalg.matmul tile-sizes=16,32,64 promote promote-full-tile-pad register-tile-sizes=2,4,8 vectorize vectorize-contraction-to=outerproduct split-transfers=true unroll-vector-transfers=false" -split-input-file | FileCheck %s --check-prefix=CHECK-OUTER
 // RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-func=matmul anchor-op=linalg.matmul tile-sizes=16,32,64 tile-interchange=1,2,0 generalize iterator-interchange=0,2,1" -split-input-file | FileCheck %s --check-prefix=CHECK-INTERCHANGE
-// RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-func=matmul anchor-op=linalg.matmul tile-sizes=16,32,64 pad padding-values=0.:f32,0.:f32,0.:f32 pack-paddings=1,1,0 hoist-paddings=3,3,0" -split-input-file | FileCheck %s --check-prefix=CHECK-PAD
-// RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-func=matmul anchor-op=linalg.matmul tile-sizes=16,32,64 fuse pad padding-values=0.:f32,0.:f32,0.:f32 vectorize" -split-input-file | FileCheck %s --check-prefix=CHECK-FUSE
-// RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-func=conv anchor-op=linalg.conv_2d_nhwc_hwcf tile-sizes=1,1,8,32,1,1,8 fuse pad padding-values=0.:f32,0.:f32,0.:f32 decompose vectorize vectorize-padding" -split-input-file | FileCheck %s --check-prefix=CHECK-DECOMP
+// RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-func=matmul anchor-op=linalg.matmul tile-sizes=16,32,64 pad padding-values=0.:f32,0.:f32,0.:f32 padding-dimensions=0,1,2 pack-paddings=1,1,0 hoist-paddings=3,3,0" -split-input-file | FileCheck %s --check-prefix=CHECK-PAD
+// RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-func=matmul anchor-op=linalg.matmul tile-sizes=16,32,64 fuse pad padding-values=0.:f32,0.:f32,0.:f32 padding-dimensions=0,1,2 vectorize" -split-input-file | FileCheck %s --check-prefix=CHECK-FUSE
+// RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-func=conv anchor-op=linalg.conv_2d_nhwc_hwcf tile-sizes=1,1,8,32,1,1,8 fuse pad padding-values=0.:f32,0.:f32,0.:f32 padding-dimensions=0,1,2 decompose vectorize vectorize-padding" -split-input-file | FileCheck %s --check-prefix=CHECK-DECOMP
 
 // CHECK-INTRINSIC: func @matmul(
 //     CHECK-OUTER: func @matmul(

diff  --git a/mlir/test/Dialect/Linalg/pad.mlir b/mlir/test/Dialect/Linalg/pad.mlir
index 7dff3fd82c6a5..e37f0fc06f2d1 100644
--- a/mlir/test/Dialect/Linalg/pad.mlir
+++ b/mlir/test/Dialect/Linalg/pad.mlir
@@ -1,7 +1,8 @@
-// RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-op=linalg.matmul pad padding-values=0.:f32,0.:f32,0.:f32 pack-paddings=1,1,0 run-enable-pass=false" -cse -split-input-file | FileCheck %s --check-prefix=MATMUL
-// RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-op=linalg.fill pad padding-values=0.:f32,1.:f32 pack-paddings=1,1 run-enable-pass=false" -cse -split-input-file | FileCheck %s --check-prefix=FILL
-// RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-op=linalg.fill pad padding-values=0.:f32,0.:f32 pack-paddings=1,0 run-enable-pass=false" -test-linalg-codegen-strategy="anchor-op=linalg.matmul pad padding-values=0.:f32,0.:f32,0.:f32 pack-paddings=1,1,0 run-enable-pass=false" -cse -split-input-file | FileCheck %s --check-prefix=FILL-MATMUL
-// RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-op=linalg.matmul pad padding-values=0.:f32,0.:f32 pack-paddings=1,1,0 run-enable-pass=false" -cse -split-input-file | FileCheck %s --check-prefix=INPUTS-ONLY
+// RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-op=linalg.matmul pad padding-values=0.:f32,0.:f32,0.:f32 padding-dimensions=0,1,2 pack-paddings=1,1,0 run-enable-pass=false" -cse -split-input-file | FileCheck %s --check-prefix=MATMUL
+// RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-op=linalg.fill pad padding-values=0.:f32,1.:f32 pack-paddings=0,1 padding-dimensions=0,1,2 run-enable-pass=false" -cse -split-input-file | FileCheck %s --check-prefix=FILL
+// RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-op=linalg.fill pad padding-values=0.:f32,0.:f32 pack-paddings=0,1 padding-dimensions=0,1,2 run-enable-pass=false" -test-linalg-codegen-strategy="anchor-op=linalg.matmul pad padding-values=0.:f32,0.:f32,0.:f32 padding-dimensions=0,1,2 pack-paddings=0,1 run-enable-pass=false" -cse -split-input-file | FileCheck %s --check-prefix=FILL-MATMUL
+// RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-op=linalg.matmul pad padding-values=0.:f32,0.:f32 pack-paddings=1,1,0 padding-dimensions=0,1,2 run-enable-pass=false" -cse -split-input-file | FileCheck %s --check-prefix=INPUTS-ONLY
+// RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-op=linalg.matmul pad padding-values=0.:f32,0.:f32,0.:f32 padding-dimensions=0,1 pack-paddings=1,1,1 run-enable-pass=false" -cse -split-input-file | FileCheck %s --check-prefix=PARTIAL
 
 // MATMUL-DAG: #[[MAP0:[0-9a-z]+]] = affine_map<()[s0] -> (-s0 + 12, 7)>
 // MATMUL-DAG: #[[MAP1:[0-9a-z]+]] = affine_map<()[s0] -> (-s0 + 7)>
@@ -503,3 +504,34 @@ func.func @rank_reducing(%arg0: tensor<1x64x1x64xf32>,
   %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<1x?x?xf32>) -> tensor<1x?x?xf32>
   func.return %1 : tensor<1x?x?xf32>
 }
+
+// -----
+
+#map0 = affine_map<()[s0] -> (7, s0)>
+
+//      PARTIAL:  padding_the_output_dims_only
+func.func @padding_the_output_dims_only(%arg0: tensor<24x12xf32>,
+                                        %arg1: tensor<12x25xf32>,
+                                        %arg2: tensor<24x25xf32>,
+                                        %iv0 : index, %iv1 : index, %iv2 : index) -> tensor<24x25xf32> {
+  //  PARTIAL-DAG:  %[[C0:.*]] = arith.constant 0 : index
+  //  PARTIAL-DAG:  %[[TS:.*]] = affine.apply
+  %0 = affine.min #map0()[%iv2]
+
+  // Check only the output dimensions of the matmul are padded.
+  //      PARTIAL:  %[[T0:.*]] = tensor.pad
+  // PARTIAL-SAME:                 [%[[TS]], %[[C0]]
+  //      PARTIAL:  %[[T1:.*]] = tensor.pad
+  // PARTIAL-SAME:                 [%[[C0]], %[[TS]]
+  //      PARTIAL:  %[[T2:.*]] = tensor.pad
+  // PARTIAL-SAME:                 [%[[TS]], %[[TS]]
+  %1 = tensor.extract_slice %arg0[%iv0, %iv2] [%0, %0] [1, 1] : tensor<24x12xf32> to tensor<?x?xf32>
+  %2 = tensor.extract_slice %arg1[%iv2, %iv1] [%0, %0] [1, 1] : tensor<12x25xf32> to tensor<?x?xf32>
+  %3 = tensor.extract_slice %arg2[%iv0, %iv1] [%0, %0] [1, 1] : tensor<24x25xf32> to tensor<?x?xf32>
+
+  //      PARTIAL:  = linalg.matmul ins(%[[T0]], %[[T1]]
+  // PARTIAL-SAME:             outs(%[[T2]]
+  %4 = linalg.matmul ins(%1, %2 : tensor<?x?xf32>, tensor<?x?xf32>) outs(%3 : tensor<?x?xf32>) -> tensor<?x?xf32>
+  %5 = tensor.insert_slice %4 into %arg2[%iv0, %iv1] [%0, %0] [1, 1] : tensor<?x?xf32> into tensor<24x25xf32>
+  func.return %5 : tensor<24x25xf32>
+}

diff  --git a/mlir/test/lib/Dialect/Linalg/TestLinalgCodegenStrategy.cpp b/mlir/test/lib/Dialect/Linalg/TestLinalgCodegenStrategy.cpp
index cb5cd5cb27ab6..09e93d60116ef 100644
--- a/mlir/test/lib/Dialect/Linalg/TestLinalgCodegenStrategy.cpp
+++ b/mlir/test/lib/Dialect/Linalg/TestLinalgCodegenStrategy.cpp
@@ -101,6 +101,10 @@ struct TestLinalgCodegenStrategy
       *this, "padding-values",
       llvm::cl::desc("Operand padding values parsed by the attribute parser."),
       llvm::cl::ZeroOrMore, llvm::cl::MiscFlags::CommaSeparated};
+  ListOption<int64_t> paddingDimensions{
+      *this, "padding-dimensions",
+      llvm::cl::desc("Operation iterator dimensions to pad."),
+      llvm::cl::ZeroOrMore, llvm::cl::MiscFlags::CommaSeparated};
   ListOption<int64_t> packPaddings{
       *this, "pack-paddings", llvm::cl::desc("Operand packing flags."),
       llvm::cl::ZeroOrMore, llvm::cl::MiscFlags::CommaSeparated};
@@ -267,6 +271,8 @@ void TestLinalgCodegenStrategy::runOnOperation() {
 
   LinalgPaddingOptions paddingOptions;
   paddingOptions.setPaddingValues(paddingValueAttributes);
+  paddingOptions.setPaddingDimensions(
+      SmallVector<int64_t>{paddingDimensions.begin(), paddingDimensions.end()});
   paddingOptions.setPackPaddings(
       SmallVector<bool>{packPaddings.begin(), packPaddings.end()});
   paddingOptions.setHoistPaddings(