[Mlir-commits] [mlir] [mlir] Do not bufferize parallel_insert_slice dest to read for full slices (PR #112761)

Thu Oct 17 23:54:09 PDT 2024

================
@@ -636,6 +637,34 @@ struct InsertOpInterface
   }
 };
 
+template <typename InsertOpTy>
+static bool insertSliceOpRequiresRead(InsertOpTy insertSliceOp,
+                                      OpOperand &opOperand) {
+  RankedTensorType destType = insertSliceOp.getDestType();
+
+  // The source is always read.
+  if (opOperand == insertSliceOp.getSourceMutable())
+    return true;
+
+  // For the destination, it depends...
+  assert(opOperand == insertSliceOp.getDestMutable() && "expected dest");
+
+  // Dest is not read if it is entirely overwritten. E.g.:
+  // tensor.insert_slice %a into %t[0][10][1] : ... into tensor<10xf32>
+  bool allOffsetsZero =
+      llvm::all_of(insertSliceOp.getMixedOffsets(),
+                   [](OpFoldResult ofr) { return isConstantIntValue(ofr, 0); });
+  bool sizesMatchDestSizes = llvm::all_of(
+      llvm::enumerate(insertSliceOp.getMixedSizes()), [&](const auto &it) {
+        return getConstantIntValue(it.value()) ==
----------------
matthias-springer wrote:

`getConstantIntValue` works for SSA values (e.g., `%c1`) and `OpFoldResult`, so I usually prefer that one.

https://github.com/llvm/llvm-project/pull/112761