[Mlir-commits] [mlir] [mlir][tensor] Extend the logic to generalise tensor.pack (PR #109815)

Mon Sep 30 14:57:00 PDT 2024

================
@@ -16,28 +16,49 @@
 #include "mlir/Dialect/Arith/IR/Arith.h"
 #include "mlir/Dialect/Arith/Utils/Utils.h"
 #include "mlir/Dialect/Utils/IndexingUtils.h"
+#include "mlir/Dialect/Vector/IR//VectorOps.h"
 #include "mlir/Interfaces/ValueBoundsOpInterface.h"
 
 using namespace mlir;
 using namespace mlir::tensor;
 
-PadOp mlir::tensor::createPadHighOp(RankedTensorType type, Value source,
+PadOp mlir::tensor::createPadHighOp(RankedTensorType resType, Value source,
                                     Value pad, bool nofold, Location loc,
-                                    OpBuilder &b) {
-  SmallVector<OpFoldResult> low(type.getRank(), b.getIndexAttr(0));
-  SmallVector<OpFoldResult> high(type.getRank(), b.getIndexAttr(0));
-  for (const auto &en : enumerate(type.getShape())) {
-    // Pad only the static dimensions of the result tensor type.
-    if (ShapedType::isDynamic(en.value()))
+                                    OpBuilder &b,
+                                    SmallVector<Value> dynOutDims) {
+
+  assert((resType.getNumDynamicDims() == dynOutDims.size()) ||
+         dynOutDims.empty() &&
+             "Either none or all output dynamic dims must be specified!");
+
+  // Init "low" and "high" padding values ("low" is kept as is, "high" is
+  // computed below).
+  SmallVector<OpFoldResult> low(resType.getRank(), b.getIndexAttr(0));
+  SmallVector<OpFoldResult> high(resType.getRank(), b.getIndexAttr(0));
+
+  size_t outDimIdx = 0;
+
+  for (const auto [idx, val] : enumerate(resType.getShape())) {
+    bool isDimDynamic = ShapedType::isDynamic(val);
+    bool updatePadHigh = !isDimDynamic || !dynOutDims.empty();
+
+    // Keep the default padding width (i.e. "0") when the output dim is dynamic
+    // and no actual output sizes have been provided.
+    if (!updatePadHigh)
       continue;
-    // Compute the padding width.
-    AffineExpr d0;
-    bindDims(b.getContext(), d0);
-    OpFoldResult sz = tensor::getMixedSize(b, loc, source, en.index());
-    high[en.index()] =
-        affine::makeComposedFoldedAffineApply(b, loc, en.value() - d0, {sz});
+
+    // Compute the padding width: resDim - sourceDim.
+    AffineExpr d0, d1;
+    bindDims(b.getContext(), d0, d1);
+    OpFoldResult sourceDim = tensor::getMixedSize(b, loc, source, idx);
+    Value outDim = isDimDynamic
+                       ? dynOutDims[outDimIdx++]
+                       : b.create<arith::ConstantIndexOp>(loc, val).getResult();
----------------
hanhanW wrote:

I think it is easier if we use OpFoldResult here. You don't need to create the zero constant when it is not needed. The idea is to avoid creating IR if we don't need to. Can you give it a shot?

```cpp
    OpFoldResult outDim = isDimDynamic
                       ? dynOutDims[outDimIdx++]
                       : b.getIndexAttr(val);
```

or something like:

```cpp
    OpFoldResult outDim = b.getIndexAttr(val);;
    if (isDimDynamic)
        outDim = dynOutDims[outDimIdx++];
```

https://github.com/llvm/llvm-project/pull/109815