[Mlir-commits] [mlir] [mlir][vector] linearize vector.insert_strided_slice (flatten to vector.shuffle) (PR #138725)

Fri May 9 14:58:12 PDT 2025

================
@@ -109,17 +109,108 @@ struct LinearizeVectorizable final
   }
 };
 
-/// This pattern converts the ExtractStridedSliceOp into a ShuffleOp that works
-/// on a linearized vector.
-/// Following,
+template <typename TOp>
+static bool stridesAllOne(TOp op) {
+  static_assert(
+      std::is_same_v<TOp, vector::ExtractStridedSliceOp> ||
+          std::is_same_v<TOp, vector::InsertStridedSliceOp>,
+      "expected vector.extract_strided_slice or vector.insert_strided_slice");
+  ArrayAttr strides = op.getStrides();
+  return llvm::all_of(
+      strides, [](auto stride) { return isConstantIntValue(stride, 1); });
+}
+
+/// Convert an array of attributes into a vector of integers, if possible.
+static FailureOr<SmallVector<int64_t>> intsFromArrayAttr(ArrayAttr attrs) {
+  if (!attrs)
+    return failure();
+  SmallVector<int64_t> ints;
+  ints.reserve(attrs.size());
+  for (auto attr : attrs) {
+    if (auto intAttr = dyn_cast<IntegerAttr>(attr)) {
+      ints.push_back(intAttr.getInt());
+    } else {
+      return failure();
+    }
+  }
+  return ints;
+}
+
+/// Consider inserting a vector of shape `small` into a vector of shape `large`,
+/// at position `offsets`: this function enumeratates all the indices in `large`
+/// that are written to. The enumeration is with row-major ordering.
+///
+/// Example: insert a 1x2 vector into a 4x5 vector at position (1,3). The 2
+/// positions written to are (1,3) and (1,4), which have linearized indices 8
+/// and 9. So [8,9] is returned.
+///
+/// The length of the returned vector is equal to the number of elements in
+/// the shape `small` (i.e. the product of dimensions of `small`).
+SmallVector<int64_t> static getStridedSliceInsertionIndices(
+    ArrayRef<int64_t> small, ArrayRef<int64_t> large,
+    ArrayRef<int64_t> offsets) {
+
+  // Example of alignment between, `large`, `small` and `offsets`:
+  //    large  =  4, 5, 6, 7, 8
+  //    small  =     1, 6, 7, 8
+  //  offsets  =  2, 3, 0
+  //
+  // `offsets` has implicit trailing 0s, `small` has implicit leading 1s.
+  assert((large.size() >= small.size()) &&
+         "rank of 'large' cannot be lower than rank of 'small'");
+  assert((large.size() >= offsets.size()) &&
+         "rank of 'large' cannot be lower than the number of offsets");
+  unsigned delta = large.size() - small.size();
+  unsigned nOffsets = offsets.size();
+  auto getSmall = [&](int64_t i) { return i >= delta ? small[i - delta] : 1; };
+  auto getOffset = [&](int64_t i) { return i < nOffsets ? offsets[i] : 0; };
+
+  // Using 2 vectors of indices, at each iteration populate the updated set of
+  // indices based on the old set of indices, and the size of the small vector
+  // in the current iteration.
+  SmallVector<int64_t> indices{0};
+  SmallVector<int64_t> nextIndices;
+  int64_t stride = 1;
+  for (int i = large.size() - 1; i >= 0; --i) {
+    auto currentSize = indices.size();
+    auto smallSize = getSmall(i);
+    auto nextSize = currentSize * smallSize;
+    nextIndices.resize(nextSize);
+    int64_t *base = nextIndices.begin();
+    int64_t offset = getOffset(i) * stride;
+    for (int j = 0; j < smallSize; ++j) {
+      for (uint64_t k = 0; k < currentSize; ++k) {
+        base[k] = indices[k] + offset;
+      }
+      offset += stride;
+      base += currentSize;
+    }
+    stride *= large[i];
+    std::swap(indices, nextIndices);
+    nextIndices.clear();
----------------
dcaballe wrote:

wonder if it's better to `resize` + `clear` or just move `nextIndices` decl inside the loop... The latter at least would make clearer that `nextIndices` doesn't have valid information at the beginning of the iteration... It wasn't obvious to me with the `resize` and `clear` approach...

https://github.com/llvm/llvm-project/pull/138725