[Mlir-commits] [mlir] [mlir][SCF] Remove `scf-bufferize` pass (PR #113840)

Sun Oct 27 17:19:04 PDT 2024

https://github.com/matthias-springer updated https://github.com/llvm/llvm-project/pull/113840

>From df9727cc57a56e5087ac8efb2cfba7317afbcf2f Mon Sep 17 00:00:00 2001
From: Matthias Springer <mspringer at nvidia.com>
Date: Sun, 27 Oct 2024 23:10:27 +0100
Subject: [PATCH] [mlir][SCF] Remove `scf-bufferize` pass

---
 mlir/docs/Bufferization.md                    | 17 +------
 .../mlir/Dialect/SCF/Transforms/Passes.h      |  3 --
 .../mlir/Dialect/SCF/Transforms/Passes.td     |  7 ---
 .../BufferizableOpInterfaceImpl.cpp           |  6 ++-
 mlir/lib/Dialect/SCF/Transforms/Bufferize.cpp | 47 -------------------
 .../lib/Dialect/SCF/Transforms/CMakeLists.txt |  1 -
 mlir/test/Dialect/SCF/bufferize.mlir          | 34 +++++++++++---
 7 files changed, 32 insertions(+), 83 deletions(-)
 delete mode 100644 mlir/lib/Dialect/SCF/Transforms/Bufferize.cpp

diff --git a/mlir/docs/Bufferization.md b/mlir/docs/Bufferization.md
index d5a426e09e7ceb..7d38ebb38535c7 100644
--- a/mlir/docs/Bufferization.md
+++ b/mlir/docs/Bufferization.md
@@ -579,7 +579,6 @@ The code, slightly simplified and annotated, is reproduced here:
   // Partial bufferization passes.
   pm.addPass(createTensorConstantBufferizePass());
   pm.addNestedPass<func::FuncOp>(createTCPBufferizePass()); // Bufferizes the downstream `tcp` dialect.
-  pm.addNestedPass<func::FuncOp>(createSCFBufferizePass());
   pm.addNestedPass<func::FuncOp>(createLinalgBufferizePass());
   pm.addNestedPass<func::FuncOp>(createTensorBufferizePass());
   pm.addPass(createFuncBufferizePass());
@@ -596,7 +595,7 @@ must be module passes because they make changes to the top-level module.
 
 The bulk of the bufferization work is done by the function passes. Most of these
 passes are provided as part of the upstream MLIR distribution and bufferize
-their respective dialects (e.g. `scf-bufferize` bufferizes the `scf` dialect).
+their respective dialects (e.g. `abc-bufferize` bufferizes the `abc` dialect).
 The `tcp-bufferize` pass is an exception -- it is a partial bufferization pass
 used to bufferize the downstream `tcp` dialect, and fits in perfectly with all
 the other passes provided upstream.
@@ -694,20 +693,6 @@ which helps with this in general.
 
 ### Other partial bufferization examples
 
--   `scf-bufferize`
-    ([code](https://github.com/llvm/llvm-project/blob/bc8acf2ce8ad6e8c9b1d97b2e02d3f4ad26e1d9d/mlir/lib/Dialect/SCF/Transforms/Bufferize.cpp#L1),
-    [test](https://github.com/llvm/llvm-project/blob/bc8acf2ce8ad6e8c9b1d97b2e02d3f4ad26e1d9d/mlir/test/Dialect/SCF/bufferize.mlir#L1))
-
-    -   Bufferizes ops from the `scf` dialect.
-    -   This is an example of how to bufferize ops that implement
-        `RegionBranchOpInterface` (that is, they use regions to represent
-        control flow).
-    -   The bulk of the work is done by
-        `lib/Dialect/SCF/Transforms/StructuralTypeConversions.cpp`
-        ([code](https://github.com/llvm/llvm-project/blob/daaaed6bb89044ac58a23f1bb1ccdd12342a5a58/mlir/lib/Dialect/SCF/Transforms/StructuralTypeConversions.cpp#L1)),
-        which is well-commented and covers how to correctly convert ops that
-        contain regions.
-
 -   `func-bufferize`
     ([code](https://github.com/llvm/llvm-project/blob/2f5715dc78328215d51d5664c72c632a6dac1046/mlir/lib/Dialect/Func/Transforms/FuncBufferize.cpp#L1),
     [test](https://github.com/llvm/llvm-project/blob/2f5715dc78328215d51d5664c72c632a6dac1046/mlir/test/Dialect/Func/func-bufferize.mlir#L1))
diff --git a/mlir/include/mlir/Dialect/SCF/Transforms/Passes.h b/mlir/include/mlir/Dialect/SCF/Transforms/Passes.h
index fb8411418ff9a0..b70599df6f5033 100644
--- a/mlir/include/mlir/Dialect/SCF/Transforms/Passes.h
+++ b/mlir/include/mlir/Dialect/SCF/Transforms/Passes.h
@@ -20,9 +20,6 @@ namespace mlir {
 #define GEN_PASS_DECL
 #include "mlir/Dialect/SCF/Transforms/Passes.h.inc"
 
-/// Creates a pass that bufferizes the SCF dialect.
-std::unique_ptr<Pass> createSCFBufferizePass();
-
 /// Creates a pass that specializes for loop for unrolling and
 /// vectorization.
 std::unique_ptr<Pass> createForLoopSpecializationPass();
diff --git a/mlir/include/mlir/Dialect/SCF/Transforms/Passes.td b/mlir/include/mlir/Dialect/SCF/Transforms/Passes.td
index 53d1ae10dc87d8..6e5ef96c450aa4 100644
--- a/mlir/include/mlir/Dialect/SCF/Transforms/Passes.td
+++ b/mlir/include/mlir/Dialect/SCF/Transforms/Passes.td
@@ -11,13 +11,6 @@
 
 include "mlir/Pass/PassBase.td"
 
-def SCFBufferize : Pass<"scf-bufferize"> {
-  let summary = "Bufferize the scf dialect.";
-  let constructor = "mlir::createSCFBufferizePass()";
-  let dependentDialects = ["bufferization::BufferizationDialect",
-                           "memref::MemRefDialect"];
-}
-
 // Note: Making these canonicalization patterns would require a dependency
 // of the SCF dialect on the Affine/Tensor/MemRef dialects or vice versa.
 def SCFForLoopCanonicalization
diff --git a/mlir/lib/Dialect/SCF/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Dialect/SCF/Transforms/BufferizableOpInterfaceImpl.cpp
index cf40443ff38390..779c41a22e9ee2 100644
--- a/mlir/lib/Dialect/SCF/Transforms/BufferizableOpInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/SCF/Transforms/BufferizableOpInterfaceImpl.cpp
@@ -649,7 +649,8 @@ struct ForOpInterface
     if (failed(bufferizableOp.resolveTensorOpOperandConflicts(rewriter, state)))
       return failure();
 
-    if (!state.getOptions().enforceAliasingInvariants)
+    if (!state.getOptions().enforceAliasingInvariants ||
+        state.getOptions().copyBeforeWrite)
       return success();
 
     // According to the `getAliasing...` implementations, a bufferized OpResult
@@ -889,7 +890,8 @@ struct WhileOpInterface
     if (failed(bufferizableOp.resolveTensorOpOperandConflicts(rewriter, state)))
       return failure();
 
-    if (!state.getOptions().enforceAliasingInvariants)
+    if (!state.getOptions().enforceAliasingInvariants ||
+        state.getOptions().copyBeforeWrite)
       return success();
 
     // According to the `getAliasing...` implementations, a bufferized OpResult
diff --git a/mlir/lib/Dialect/SCF/Transforms/Bufferize.cpp b/mlir/lib/Dialect/SCF/Transforms/Bufferize.cpp
deleted file mode 100644
index 21c618ab633f60..00000000000000
--- a/mlir/lib/Dialect/SCF/Transforms/Bufferize.cpp
+++ /dev/null
@@ -1,47 +0,0 @@
-//===- Bufferize.cpp - scf bufferize pass ---------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Dialect/SCF/Transforms/Passes.h"
-
-#include "mlir/Dialect/Bufferization/IR/Bufferization.h"
-#include "mlir/Dialect/Bufferization/Transforms/Bufferize.h"
-#include "mlir/Dialect/MemRef/IR/MemRef.h"
-#include "mlir/Dialect/SCF/IR/SCF.h"
-#include "mlir/Dialect/SCF/Transforms/Patterns.h"
-#include "mlir/Transforms/DialectConversion.h"
-
-namespace mlir {
-#define GEN_PASS_DEF_SCFBUFFERIZE
-#include "mlir/Dialect/SCF/Transforms/Passes.h.inc"
-} // namespace mlir
-
-using namespace mlir;
-using namespace mlir::scf;
-
-namespace {
-struct SCFBufferizePass : public impl::SCFBufferizeBase<SCFBufferizePass> {
-  void runOnOperation() override {
-    auto *func = getOperation();
-    auto *context = &getContext();
-
-    bufferization::BufferizeTypeConverter typeConverter;
-    RewritePatternSet patterns(context);
-    ConversionTarget target(*context);
-
-    bufferization::populateBufferizeMaterializationLegality(target);
-    populateSCFStructuralTypeConversionsAndLegality(typeConverter, patterns,
-                                                    target);
-    if (failed(applyPartialConversion(func, target, std::move(patterns))))
-      return signalPassFailure();
-  };
-};
-} // namespace
-
-std::unique_ptr<Pass> mlir::createSCFBufferizePass() {
-  return std::make_unique<SCFBufferizePass>();
-}
diff --git a/mlir/lib/Dialect/SCF/Transforms/CMakeLists.txt b/mlir/lib/Dialect/SCF/Transforms/CMakeLists.txt
index 5dc7c60792b9b6..e99b5d0cc26fc7 100644
--- a/mlir/lib/Dialect/SCF/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/SCF/Transforms/CMakeLists.txt
@@ -1,7 +1,6 @@
 add_mlir_dialect_library(MLIRSCFTransforms
   BufferDeallocationOpInterfaceImpl.cpp
   BufferizableOpInterfaceImpl.cpp
-  Bufferize.cpp
   ForallToFor.cpp
   ForallToParallel.cpp
   ForToWhile.cpp
diff --git a/mlir/test/Dialect/SCF/bufferize.mlir b/mlir/test/Dialect/SCF/bufferize.mlir
index ff1612310255a0..53fcee692226cb 100644
--- a/mlir/test/Dialect/SCF/bufferize.mlir
+++ b/mlir/test/Dialect/SCF/bufferize.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -scf-bufferize | FileCheck %s
+// RUN: mlir-opt %s -one-shot-bufferize="dialect-filter=scf,bufferization copy-before-write unknown-type-conversion=identity-layout-map" -split-input-file | FileCheck %s
 
 // CHECK-LABEL:   func @if(
 // CHECK-SAME:             %[[PRED:.*]]: i1,
@@ -23,15 +23,21 @@ func.func @if(%pred: i1, %true_val: tensor<?xf32>, %false_val: tensor<?xf32>) ->
   return %0 : tensor<?xf32>
 }
 
+// -----
+
 // CHECK-LABEL:   func @for(
 // CHECK-SAME:              %[[TENSOR:.*]]: tensor<f32>,
 // CHECK-SAME:              %[[LB:.*]]: index, %[[UB:.*]]: index,
 // CHECK-SAME:              %[[STEP:.*]]: index) -> tensor<f32> {
 // CHECK:           %[[MEMREF:.*]] = bufferization.to_memref %[[TENSOR]] : memref<f32>
-// CHECK:           %[[RESULT_MEMREF:.*]] = scf.for %[[VAL_6:.*]] = %[[LB]] to %[[UB]] step %[[STEP]] iter_args(%[[ITER:.*]] = %[[MEMREF]]) -> (memref<f32>) {
+// Note: scf.for iter_args always bufferize to a memory write. This could be
+// optimized by analyzing the loop body.
+// CHECK:           %[[MEMREF_COPY:.*]] = memref.alloc()
+// CHECK:           memref.copy %[[MEMREF]], %[[MEMREF_COPY]]
+// CHECK:           %[[RESULT_MEMREF:.*]] = scf.for %{{.*}} = %[[LB]] to %[[UB]] step %[[STEP]] iter_args(%[[ITER:.*]] = %[[MEMREF_COPY]]) -> (memref<f32>) {
 // CHECK:             scf.yield %[[ITER]] : memref<f32>
 // CHECK:           } {some_attr}
-// CHECK:           %[[VAL_8:.*]] = bufferization.to_tensor %[[VAL_9:.*]] : memref<f32>
+// CHECK:           %[[VAL_8:.*]] = bufferization.to_tensor %[[RESULT_MEMREF]] : memref<f32>
 // CHECK:           return %[[VAL_8]] : tensor<f32>
 // CHECK:         }
 func.func @for(%arg0: tensor<f32>, %lb: index, %ub: index, %step: index) -> tensor<f32> {
@@ -41,6 +47,8 @@ func.func @for(%arg0: tensor<f32>, %lb: index, %ub: index, %step: index) -> tens
   return %ret : tensor<f32>
 }
 
+// -----
+
 // Check whether this converts at all.
 //
 // It would previously fail altogether.
@@ -57,17 +65,23 @@ func.func @if_correct_recursive_legalization_behavior(%pred: i1, %tensor: tensor
   return %0 : tensor<f32>
 }
 
+// -----
+
 // CHECK-LABEL:   func @for_correct_recursive_legalization_behavior(
 // CHECK-SAME:                                                      %[[TENSOR:.*]]: tensor<f32>,
 // CHECK-SAME:                                                      %[[INDEX:.*]]: index) -> tensor<f32> {
 // CHECK:           %[[MEMREF:.*]] = bufferization.to_memref %[[TENSOR]] : memref<f32>
-// CHECK:           %[[RESULT:.*]] = scf.for %[[IV:.*]] = %[[INDEX]] to %[[INDEX]] step %[[INDEX]] iter_args(%[[MEMREF_ITER:.*]] = %[[MEMREF]]) -> (memref<f32>) {
+// Note: scf.for iter_args always bufferize to a memory write. This could be
+// optimized by analyzing the loop body.
+// CHECK:           %[[MEMREF_COPY:.*]] = memref.alloc()
+// CHECK:           memref.copy %[[MEMREF]], %[[MEMREF_COPY]]
+// CHECK:           %[[RESULT:.*]] = scf.for %{{.*}} = %[[INDEX]] to %[[INDEX]] step %[[INDEX]] iter_args(%[[MEMREF_ITER:.*]] = %[[MEMREF_COPY]]) -> (memref<f32>) {
 // CHECK:             %[[TENSOR_ITER:.*]] = bufferization.to_tensor %[[MEMREF_ITER]] : memref<f32>
 // CHECK:             %[[TENSOR_MUNGED:.*]] = "test.munge_tensor"(%[[TENSOR_ITER]]) : (tensor<f32>) -> tensor<f32>
 // CHECK:             %[[MEMREF_MUNGED:.*]] = bufferization.to_memref %[[TENSOR_MUNGED]] : memref<f32>
 // CHECK:             scf.yield %[[MEMREF_MUNGED]] : memref<f32>
 // CHECK:           }
-// CHECK:           %[[TENSOR:.*]] = bufferization.to_tensor %[[RESULT:.*]] : memref<f32>
+// CHECK:           %[[TENSOR:.*]] = bufferization.to_tensor %[[RESULT]] : memref<f32>
 // CHECK:           return %[[TENSOR]] : tensor<f32>
 // CHECK:         }
 func.func @for_correct_recursive_legalization_behavior(%arg0: tensor<f32>, %index: index) -> tensor<f32> {
@@ -78,11 +92,17 @@ func.func @for_correct_recursive_legalization_behavior(%arg0: tensor<f32>, %inde
   return %ret : tensor<f32>
 }
 
+// -----
+
 // CHECK-LABEL:   func @bufferize_while(
 // CHECK-SAME: %[[ARG0:.*]]: i64, %[[ARG1:.*]]: i64, %[[ARG2:.*]]: tensor<f32>
 // CHECK: %[[M:.*]] = bufferization.to_memref %[[ARG2]] : memref<f32>
-// CHECK: %[[RES1:.*]]:3 = scf.while (%{{.*}} = %[[ARG0]], %{{.*}} = %[[M]]) : (i64, memref<f32>) -> (i64, i64, memref<f32>)
-// CHECK: scf.condition(%{{.*}}) %{{.*}}, %{{.*}}, %{{.*}} : i64, i64, memref<f32>
+// Note: scf.while iter_args always bufferize to a memory write. This could be
+// optimized by analyzing the loop body.
+// CHECK:           %[[MEMREF_COPY:.*]] = memref.alloc()
+// CHECK:           memref.copy %[[M]], %[[MEMREF_COPY]]
+// CHECK: %[[RES1:.*]]:3 = scf.while (%{{.*}} = %[[ARG0]], %[[ITER:.*]] = %[[MEMREF_COPY]]) : (i64, memref<f32>) -> (i64, i64, memref<f32>)
+// CHECK: scf.condition(%{{.*}}) %{{.*}}, %{{.*}}, %[[ITER]] : i64, i64, memref<f32>
 // CHECK: ^bb0(%{{.*}}: i64, %{{.*}}: i64, %{{.*}}: memref<f32>):
 // CHECK: scf.yield %{{.*}}, %{{.*}} : i64, memref<f32>
 // CHECK:  %[[RES2:.*]] = bufferization.to_tensor %[[RES1]]#2 : memref<f32>