[Mlir-commits] [mlir] [mlir][sparse] Extract `StorageSpecifierToLLVMPass` from bufferization pipeline (PR #68635)

Mon Oct 9 14:35:11 PDT 2023

https://github.com/matthias-springer created https://github.com/llvm/llvm-project/pull/68635

`StorageSpecifierToLLVMPass` does not have to be part of the bufferization mini pipeline. It can run after the bufferization pipeline. This is desirable because it keeps the bufferization pipeline smaller.

Also fix incorrect bufferization API usage: `bufferizeOp` instead of `bufferizeModuleOp` was used, even though function boundaries were bufferized.

Depends on #68632 and #68625. Only review the top commit.


>From 400d503f2040cf59fac1db59e8e508ff6a571785 Mon Sep 17 00:00:00 2001
From: Matthias Springer <me at m-sp.org>
Date: Mon, 9 Oct 2023 23:30:46 +0200
Subject: [PATCH 1/3] [mlir][bufferization][NFC] Simplify `bufferizeOp`
 function signature

Remove the `opFilter` and `copyBeforeWrite` function arguments. These options can already be configured in the `options` object.
---
 .../Bufferization/Transforms/Bufferize.h      | 18 +++++++----------
 .../Bufferization/Transforms/Bufferize.cpp    | 20 ++++++-------------
 .../Transforms/OneShotAnalysis.cpp            |  3 +--
 .../Transforms/OneShotModuleBufferize.cpp     | 19 ++++++++++++------
 .../SparsificationAndBufferizationPass.cpp    | 16 +++++++--------
 5 files changed, 35 insertions(+), 41 deletions(-)

diff --git a/mlir/include/mlir/Dialect/Bufferization/Transforms/Bufferize.h b/mlir/include/mlir/Dialect/Bufferization/Transforms/Bufferize.h
index 3d3316db6b09336..cab997e1aff2977 100644
--- a/mlir/include/mlir/Dialect/Bufferization/Transforms/Bufferize.h
+++ b/mlir/include/mlir/Dialect/Bufferization/Transforms/Bufferize.h
@@ -63,19 +63,12 @@ void populateEliminateBufferizeMaterializationsPatterns(
     BufferizeTypeConverter &typeConverter, RewritePatternSet &patterns);
 
 /// Bufferize `op` and its nested ops that implement `BufferizableOpInterface`.
-/// If `copyBeforeWrite`, buffers are duplicated and copied before any tensor
-/// use that bufferizes to a memory write.
 ///
-/// Note: In the general case, it unsafe to run with `copyBeforeWrite = false`
-/// because read-after-write conflicts may materialize during bufferization.
-/// `copyBeforeWrite = false` is safe only if the input IR is guaranteed to
-/// *not* require any out-of-place bufferization.
-///
-/// Note: This function bufferizes ops without utilizing analysis results. It
-/// can be used to implement partial bufferization passes.
+/// Note: This function does not resolve read-after-write conflicts. Use this
+/// function only if it is guaranteed that the input IR can bufferize without
+/// additional buffer copies or set "options.copyBeforeWrite = true". The
+/// general bufferization entry point is `runOneShotBufferize`.
 LogicalResult bufferizeOp(Operation *op, const BufferizationOptions &options,
-                          bool copyBeforeWrite = true,
-                          const OpFilter *opFilter = nullptr,
                           BufferizationStatistics *statistics = nullptr);
 
 /// Bufferize the signature of `block` and its callers (i.e., ops that have the
@@ -94,6 +87,9 @@ LogicalResult bufferizeOp(Operation *op, const BufferizationOptions &options,
 LogicalResult bufferizeBlockSignature(Block *block, RewriterBase &rewriter,
                                       const BufferizationOptions &options);
 
+/// Return `BufferizationOptions` such that the `bufferizeOp` behaves like the
+/// old (deprecated) partial, dialect conversion-based bufferization passes. A
+/// copy will be inserted before every buffer write.
 BufferizationOptions getPartialBufferizationOptions();
 
 } // namespace bufferization
diff --git a/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp b/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp
index 045dea5d2b85f85..f2125feeda54159 100644
--- a/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp
+++ b/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp
@@ -383,11 +383,9 @@ class BufferizationRewriter : public IRRewriter, public RewriterBase::Listener {
                         DenseSet<Operation *> &toMemrefOps,
                         SmallVector<Operation *> &worklist,
                         const BufferizationOptions &options,
-                        const OpFilter *opFilter,
                         BufferizationStatistics *statistics)
       : IRRewriter(ctx), erasedOps(erasedOps), toMemrefOps(toMemrefOps),
-        worklist(worklist), analysisState(options), opFilter(opFilter),
-        statistics(statistics) {
+        worklist(worklist), analysisState(options), statistics(statistics) {
     setListener(this);
   }
 
@@ -424,7 +422,7 @@ class BufferizationRewriter : public IRRewriter, public RewriterBase::Listener {
 
     // Skip ops that are not allowed to be bufferized.
     auto const &options = analysisState.getOptions();
-    if (!options.isOpAllowed(op) || (opFilter && !opFilter->isOpAllowed(op)))
+    if (!options.isOpAllowed(op))
       return;
 
     // Add op to worklist.
@@ -445,9 +443,6 @@ class BufferizationRewriter : public IRRewriter, public RewriterBase::Listener {
   /// bufferization options.
   const AnalysisState analysisState;
 
-  /// An extra op filter for bufferization.
-  const OpFilter *opFilter;
-
   /// Bufferization statistics for debugging.
   BufferizationStatistics *statistics;
 };
@@ -455,10 +450,8 @@ class BufferizationRewriter : public IRRewriter, public RewriterBase::Listener {
 
 LogicalResult bufferization::bufferizeOp(Operation *op,
                                          const BufferizationOptions &options,
-                                         bool copyBeforeWrite,
-                                         const OpFilter *opFilter,
                                          BufferizationStatistics *statistics) {
-  if (copyBeforeWrite) {
+  if (options.copyBeforeWrite) {
     AnalysisState state(options);
     if (failed(insertTensorCopies(op, state)))
       return failure();
@@ -486,7 +479,7 @@ LogicalResult bufferization::bufferizeOp(Operation *op,
 
   // Bufferize all ops.
   BufferizationRewriter rewriter(op->getContext(), erasedOps, toMemrefOps,
-                                 worklist, options, opFilter, statistics);
+                                 worklist, options, statistics);
   for (unsigned i = 0; i < worklist.size(); ++i) {
     Operation *nextOp = worklist[i];
     // Skip ops that were erased.
@@ -496,7 +489,7 @@ LogicalResult bufferization::bufferizeOp(Operation *op,
     auto bufferizableOp = options.dynCastBufferizableOp(nextOp);
     if (!bufferizableOp)
       continue;
-    if (opFilter && !opFilter->isOpAllowed(nextOp))
+    if (!options.isOpAllowed(nextOp))
       continue;
     // Skip ops that no longer have tensor semantics.
     if (!hasTensorSemantics(nextOp))
@@ -558,8 +551,6 @@ LogicalResult bufferization::bufferizeOp(Operation *op,
     // Continue ops that are not allowed.
     if (!options.isOpAllowed(op))
       continue;
-    if (opFilter && !opFilter->isOpAllowed(op))
-      continue;
     // Ops without any uses and no side effects will fold away.
     if (op->getUses().empty() && isMemoryEffectFree(op))
       continue;
@@ -662,6 +653,7 @@ bufferization::bufferizeBlockSignature(Block *block, RewriterBase &rewriter,
 BufferizationOptions bufferization::getPartialBufferizationOptions() {
   BufferizationOptions options;
   options.allowUnknownOps = true;
+  options.copyBeforeWrite = true;
   options.enforceAliasingInvariants = false;
   options.unknownTypeConverterFn = [](Value value, Attribute memorySpace,
                                       const BufferizationOptions &options) {
diff --git a/mlir/lib/Dialect/Bufferization/Transforms/OneShotAnalysis.cpp b/mlir/lib/Dialect/Bufferization/Transforms/OneShotAnalysis.cpp
index 1c85dbb5688be4b..95872368c436af6 100644
--- a/mlir/lib/Dialect/Bufferization/Transforms/OneShotAnalysis.cpp
+++ b/mlir/lib/Dialect/Bufferization/Transforms/OneShotAnalysis.cpp
@@ -1323,6 +1323,5 @@ bufferization::runOneShotBufferize(Operation *op,
   }
   if (options.testAnalysisOnly)
     return success();
-  return bufferizeOp(op, options, /*copyBeforeWrite=*/options.copyBeforeWrite,
-                     /*opFilter=*/nullptr, statistics);
+  return bufferizeOp(op, options, statistics);
 }
diff --git a/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp b/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp
index 417f457c8910ca9..1872bf205b65065 100644
--- a/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp
+++ b/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp
@@ -426,12 +426,19 @@ LogicalResult mlir::bufferization::bufferizeModuleOp(
   for (func::FuncOp funcOp : orderedFuncOps) {
     // Note: It would be good to apply cleanups here but we cannot as aliasInfo
     // would be invalidated.
-    bool copyBeforeWrite =
-        options.copyBeforeWrite ||
-        llvm::is_contained(options.noAnalysisFuncFilter, funcOp.getSymName());
-    if (failed(bufferizeOp(funcOp, options, copyBeforeWrite,
-                           /*opFilter=*/nullptr, statistics)))
-      return failure();
+
+    if (llvm::is_contained(options.noAnalysisFuncFilter, funcOp.getSymName())) {
+      // This function was not analyzed and RaW conflicts were not resolved.
+      // Buffer copies must be inserted before every write.
+      OneShotBufferizationOptions updatedOptions = options;
+      updatedOptions.copyBeforeWrite = true;
+      if (failed(bufferizeOp(funcOp, updatedOptions, statistics)))
+        return failure();
+    } else {
+      if (failed(bufferizeOp(funcOp, options, statistics)))
+        return failure();
+    }
+
     // Change buffer return types to more precise layout maps.
     if (options.inferFunctionResultLayout)
       foldMemRefCasts(funcOp);
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparsificationAndBufferizationPass.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparsificationAndBufferizationPass.cpp
index 9b5567814a75f32..6fca8f82e356626 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/SparsificationAndBufferizationPass.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparsificationAndBufferizationPass.cpp
@@ -81,23 +81,23 @@ class SparsificationAndBufferizationPass
   /// and that all required buffer copies were already inserted by
   /// `insertTensorCopies` in the form of `bufferization.alloc_tensor` ops.
   LogicalResult runDenseBufferization() {
-    bufferization::OpFilter denseOpFilter;
-    denseOpFilter.allowOperation([&](Operation *op) {
+    bufferization::OneShotBufferizationOptions updatedOptions =
+        bufferizationOptions;
+    // Skip all sparse ops.
+    updatedOptions.opFilter.denyOperation([&](Operation *op) {
       if (containsSparseTensor(TypeRange(op->getResults())) ||
           containsSparseTensor(TypeRange(op->getOperands())))
-        return false;
+        return true;
       if (auto funcOp = dyn_cast<func::FuncOp>(op)) {
         FunctionType funcType = funcOp.getFunctionType();
         if (containsSparseTensor(funcType.getInputs()) ||
             containsSparseTensor(funcType.getResults()))
-          return false;
+          return true;
       }
-      return true;
+      return false;
     });
 
-    if (failed(bufferization::bufferizeOp(getOperation(), bufferizationOptions,
-                                          /*copyBeforeWrite=*/false,
-                                          &denseOpFilter)))
+    if (failed(bufferization::bufferizeOp(getOperation(), updatedOptions)))
       return failure();
 
     bufferization::removeBufferizationAttributesInModule(getOperation());

>From f4d9f105da12e44065fef9231b34524fc72d8d94 Mon Sep 17 00:00:00 2001
From: Matthias Springer <me at m-sp.org>
Date: Mon, 9 Oct 2023 23:31:04 +0200
Subject: [PATCH 2/3] [mlir][bufferization] Allow cyclic function graphs
 without tensors

Cyclic function call graphs are generally not supported by One-Shot Bufferize. However, they can be allowed when a function does not have tensor arguments or results. This is because it is then no longer necessary that the callee will be bufferized before the caller.
---
 .../Transforms/OneShotModuleBufferize.cpp     | 15 ++++++++++++-
 .../one-shot-module-bufferize-invalid.mlir    | 12 +++++------
 .../Transforms/one-shot-module-bufferize.mlir | 21 +++++++++++++++++++
 3 files changed, 41 insertions(+), 7 deletions(-)

diff --git a/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp b/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp
index 1872bf205b65065..f6470795c9e0ee3 100644
--- a/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp
+++ b/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp
@@ -274,6 +274,13 @@ static void equivalenceAnalysis(func::FuncOp funcOp,
   });
 }
 
+/// Return "true" if the given function signature has tensor semantics.
+static bool hasTensorSignature(func::FuncOp funcOp) {
+  auto isaTensor = [](Type t) { return isa<TensorType>(t); };
+  return llvm::any_of(funcOp.getFunctionType().getInputs(), isaTensor) ||
+         llvm::any_of(funcOp.getFunctionType().getResults(), isaTensor);
+}
+
 /// Store all functions of the `moduleOp` in `orderedFuncOps`, sorted by
 /// callee-caller order (i.e. callees without callers first).
 /// Store the map of FuncOp to all its callers in `callerMap`.
@@ -297,10 +304,16 @@ getFuncOpsOrderedByCalls(ModuleOp moduleOp,
                   "without a unique ReturnOp";
     }
 
+    // Collect function calls and populate the caller map.
     numberCallOpsContainedInFuncOp[funcOp] = 0;
     return funcOp.walk([&](func::CallOp callOp) -> WalkResult {
       func::FuncOp calledFunction = getCalledFunction(callOp);
       assert(calledFunction && "could not retrieved called func::FuncOp");
+      // If the called function does not have any tensors in its signature, then
+      // it is not necessary to bufferize the callee before the caller.
+      if (!hasTensorSignature(calledFunction))
+        return WalkResult::skip();
+
       callerMap[calledFunction].insert(callOp);
       if (calledBy[calledFunction].insert(funcOp).second) {
         numberCallOpsContainedInFuncOp[funcOp]++;
@@ -310,7 +323,7 @@ getFuncOpsOrderedByCalls(ModuleOp moduleOp,
   });
   if (res.wasInterrupted())
     return failure();
-  // Iteratively remove function operation that do not call any of the
+  // Iteratively remove function operations that do not call any of the
   // functions remaining in the callCounter map and add them to the worklist.
   while (!numberCallOpsContainedInFuncOp.empty()) {
     auto it = llvm::find_if(numberCallOpsContainedInFuncOp,
diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-invalid.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-invalid.mlir
index a25b57991baca7f..935b14933888e61 100644
--- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-invalid.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-invalid.mlir
@@ -27,14 +27,14 @@ func.func @swappy(%cond1 : i1, %cond2 : i1, %t1 : tensor<f32>, %t2 : tensor<f32>
 
 // expected-error @-3 {{expected callgraph to be free of circular dependencies}}
 
-func.func @foo() {
-  call @bar() : () -> ()
-  return
+func.func @foo(%t: tensor<5xf32>) -> tensor<5xf32> {
+  %0 = call @bar(%t) : (tensor<5xf32>) -> (tensor<5xf32>)
+  return %0 : tensor<5xf32>
 }
 
-func.func @bar() {
-  call @foo() : () -> ()
-  return
+func.func @bar(%t: tensor<5xf32>) -> tensor<5xf32>{
+  %0 = call @foo(%t) : (tensor<5xf32>) -> (tensor<5xf32>)
+  return %0 : tensor<5xf32>
 }
 
 // -----
diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir
index b9de4ba34e0e6d3..39f4835b28ffeb2 100644
--- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir
@@ -662,3 +662,24 @@ func.func @br_in_func(%t: tensor<5xf32>) -> tensor<5xf32> {
 ^bb1(%arg1 : tensor<5xf32>):
   func.return %arg1 : tensor<5xf32>
 }
+
+// -----
+
+// Cyclic call graphs with tensors are not supported by One-Shot Bufferize.
+// However, if a function signature does not have any tensor arguments or
+// results, calls to that function are not seen as an "edge" in the fuction
+// call graph.
+
+// CHECK-LABEL: func.func @foo(%{{.*}}: memref<5xf32>) -> memref<5xf32>
+func.func @foo(%m: memref<5xf32>) -> memref<5xf32> {
+  %0 = tensor.empty() : tensor<5xf32>
+  %1 = func.call @bar(%0, %m)
+      : (tensor<5xf32>, memref<5xf32>) -> (memref<5xf32>)
+  return %1 : memref<5xf32>
+}
+
+// CHECK: func.func @bar(%{{.*}}: memref<5xf32, strided<[?], offset: ?>>, %arg1: memref<5xf32>) -> memref<5xf32>
+func.func @bar(%t: tensor<5xf32>, %m: memref<5xf32>) -> memref<5xf32> {
+  %0 = func.call @foo(%m) : (memref<5xf32>) -> (memref<5xf32>)
+  return %0 : memref<5xf32>
+}

>From 8f3041206f532b2ad6fae5cc7fea21354ccd51f7 Mon Sep 17 00:00:00 2001
From: Matthias Springer <me at m-sp.org>
Date: Mon, 9 Oct 2023 23:31:22 +0200
Subject: [PATCH 3/3] [mlir][sparse] Extract `StorageSpecifierToLLVMPass` from
 bufferization pipeline

`StorageSpecifierToLLVMPass` does not have to be part of the bufferization mini pipeline. It can run after the bufferization pipeline. This is desirable because it keeps the bufferization pipeline smaller.
---
 .../SparseTensor/Pipelines/SparseTensorPipelines.cpp        | 2 ++
 .../Transforms/SparsificationAndBufferizationPass.cpp       | 6 +++---
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp b/mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp
index 54069064839eac3..7569413546c0a6e 100644
--- a/mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp
@@ -42,6 +42,8 @@ void mlir::sparse_tensor::buildSparseCompiler(
       /*enableSIMDIndex32=*/options.force32BitVectorIndices));
   if (options.testBufferizationAnalysisOnly)
     return;
+
+  pm.addPass(createStorageSpecifierToLLVMPass());
   pm.addNestedPass<func::FuncOp>(createCanonicalizerPass());
   pm.addNestedPass<func::FuncOp>(
       mlir::bufferization::createFinalizingBufferizePass());
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparsificationAndBufferizationPass.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparsificationAndBufferizationPass.cpp
index 6fca8f82e356626..480e18e257277de 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/SparsificationAndBufferizationPass.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparsificationAndBufferizationPass.cpp
@@ -39,7 +39,7 @@ namespace sparse_tensor {
 /// Return `true` if one of the given types is a sparse tensor type.
 static bool containsSparseTensor(TypeRange types) {
   for (Type t : types)
-    if (getSparseTensorEncoding(t))
+    if (isa<TensorType>(t) && getSparseTensorEncoding(t))
       return true;
   return false;
 }
@@ -97,7 +97,8 @@ class SparsificationAndBufferizationPass
       return false;
     });
 
-    if (failed(bufferization::bufferizeOp(getOperation(), updatedOptions)))
+    if (failed(bufferization::bufferizeModuleOp(cast<ModuleOp>(getOperation()),
+                                                updatedOptions)))
       return failure();
 
     bufferization::removeBufferizationAttributesInModule(getOperation());
@@ -154,7 +155,6 @@ class SparsificationAndBufferizationPass
         pm.addPass(createSparseTensorCodegenPass(createSparseDeallocs,
                                                  enableBufferInitialization));
         pm.addPass(createSparseBufferRewritePass(enableBufferInitialization));
-        pm.addPass(createStorageSpecifierToLLVMPass());
       }
       if (failed(runPipeline(pm, getOperation())))
         return signalPassFailure();