[Mlir-commits] [mlir] [mlir] Add bufferization option for parallel region check (PR #94645)

Mon Jun 10 13:25:42 PDT 2024

https://github.com/Max191 updated https://github.com/llvm/llvm-project/pull/94645

>From e8173c70bc16bae95fdfd26933ed3f59f4e05fec Mon Sep 17 00:00:00 2001
From: Max Dawkins <max.dawkins at gmail.com>
Date: Thu, 6 Jun 2024 13:27:43 -0400
Subject: [PATCH 1/2] [mlir] Add bufferization option for parallel region check

---
 .../mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h  | 5 +++++
 .../lib/Dialect/Bufferization/Transforms/OneShotAnalysis.cpp | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h
index 2d8add82383be..2fda091e412ae 100644
--- a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h
+++ b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h
@@ -309,6 +309,11 @@ struct BufferizationOptions {
   /// bufferized or not.
   bool bufferizeFunctionBoundaries = false;
 
+  // Specifies whether to account for parallel regions in RaW analysis. If true,
+  // then writes inside of parallel regions that write to buffers defined
+  // outside of the parallel region will be given a new buffer.
+  bool checkParallelRegions = true;
+
   /// Certain ops have aliasing OpOperand/OpResult invariants (e.g., scf.for).
   /// If this flag is set to `false`, those invariants are no longer enforced
   /// with buffer copies.
diff --git a/mlir/lib/Dialect/Bufferization/Transforms/OneShotAnalysis.cpp b/mlir/lib/Dialect/Bufferization/Transforms/OneShotAnalysis.cpp
index 2d329a1f3d889..d0b4e0dd4383e 100644
--- a/mlir/lib/Dialect/Bufferization/Transforms/OneShotAnalysis.cpp
+++ b/mlir/lib/Dialect/Bufferization/Transforms/OneShotAnalysis.cpp
@@ -611,7 +611,7 @@ hasReadAfterWriteInterference(const DenseSet<OpOperand *> &usesRead,
   // Before going through the main RaW analysis, find cases where a buffer must
   // be privatized due to parallelism. If the result of a write is never read,
   // privatization is not necessary (and large parts of the IR are likely dead).
-  if (!usesRead.empty()) {
+  if (options.checkParallelRegions && !usesRead.empty()) {
     for (OpOperand *uConflictingWrite : usesWrite) {
       // Find the allocation point or last write (definition) of the buffer.
       // Note: In contrast to `findDefinitions`, this also returns results of

>From 802c940d70e736982110b476529cf2790539167a Mon Sep 17 00:00:00 2001
From: Max Dawkins <max.dawkins at gmail.com>
Date: Mon, 10 Jun 2024 16:25:28 -0400
Subject: [PATCH 2/2] add tests

---
 .../TransformOps/BufferizationTransformOps.td |  1 +
 .../Bufferization/Transforms/Passes.td        |  2 ++
 .../Bufferization/Transforms/Bufferize.cpp    |  1 +
 .../SCF/one-shot-bufferize-analysis.mlir      | 21 ++++++++++++-------
 4 files changed, 18 insertions(+), 7 deletions(-)

diff --git a/mlir/include/mlir/Dialect/Bufferization/TransformOps/BufferizationTransformOps.td b/mlir/include/mlir/Dialect/Bufferization/TransformOps/BufferizationTransformOps.td
index 5ace9c390e146..53b3b0505b399 100644
--- a/mlir/include/mlir/Dialect/Bufferization/TransformOps/BufferizationTransformOps.td
+++ b/mlir/include/mlir/Dialect/Bufferization/TransformOps/BufferizationTransformOps.td
@@ -88,6 +88,7 @@ def OneShotBufferizeOp
       DefaultValuedAttr<BoolAttr, "false">:$dump_alias_sets,
       DefaultValuedAttr<BoolAttr, "false">:$test_analysis_only,
       DefaultValuedAttr<BoolAttr, "false">:$print_conflicts,
+      DefaultValuedAttr<BoolAttr, "true">:$check_parallel_regions,
       DefaultValuedAttr<StrAttr, "\"memref.copy\"">:$memcpy_op);
 
   let results = (outs TransformHandleTypeInterface:$transformed);
diff --git a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td
index 8f8826b9ad56b..1cece818dbbbc 100644
--- a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td
+++ b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td
@@ -498,6 +498,8 @@ def OneShotBufferize : Pass<"one-shot-bufferize", "ModuleOp"> {
     Option<"bufferizeFunctionBoundaries", "bufferize-function-boundaries",
            "bool", /*default=*/"0",
            "Bufferize function boundaries (experimental).">,
+    Option<"checkParallelRegions", "check-parallel-regions", "bool",
+           /*default=*/"true", "Account for parallel regions in RaW analysis.">,
     Option<"copyBeforeWrite", "copy-before-write", "bool", /*default=*/"false",
            "Skip the analysis. Make a buffer copy on every write.">,
     ListOption<"dialectFilter", "dialect-filter", "std::string",
diff --git a/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp b/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp
index 0fddd60eb8140..e422086c9fde6 100644
--- a/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp
+++ b/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp
@@ -226,6 +226,7 @@ struct OneShotBufferizePass
       opt.printConflicts = printConflicts;
       opt.testAnalysisOnly = testAnalysisOnly;
       opt.bufferizeFunctionBoundaries = bufferizeFunctionBoundaries;
+      opt.checkParallelRegions = checkParallelRegions;
       opt.noAnalysisFuncFilter = noAnalysisFuncFilter;
 
       // Configure type converter.
diff --git a/mlir/test/Dialect/SCF/one-shot-bufferize-analysis.mlir b/mlir/test/Dialect/SCF/one-shot-bufferize-analysis.mlir
index 4d82021e86f5b..9bb87ffbb2090 100644
--- a/mlir/test/Dialect/SCF/one-shot-bufferize-analysis.mlir
+++ b/mlir/test/Dialect/SCF/one-shot-bufferize-analysis.mlir
@@ -1,4 +1,5 @@
-// RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs-from-loops bufferize-function-boundaries test-analysis-only" -split-input-file | FileCheck %s
+// RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs-from-loops bufferize-function-boundaries test-analysis-only" -split-input-file | FileCheck %s --check-prefixes=CHECK,PARALLEL-CHECK
+// RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs-from-loops bufferize-function-boundaries test-analysis-only check-parallel-regions=false" -split-input-file | FileCheck %s --check-prefixes=CHECK,NO-PARALLEL-CHECK
 
 // Run fuzzer with different seeds.
 // RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs-from-loops bufferize-function-boundaries test-analysis-only analysis-heuristic=fuzzer analysis-fuzzer-seed=23" -split-input-file -o /dev/null
@@ -811,8 +812,10 @@ func.func @parallel_region() -> tensor<320xf32>
   %0 = scf.forall (%arg0) in (%c320) shared_outs(%arg1 = %alloc0) -> (tensor<320xf32>) {
     %val = "test.foo"() : () -> (f32)
     // linalg.fill must bufferize out-of-place because every thread needs a
-    // private copy of %alloc1.
-    // CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "false"]}
+    // private copy of %alloc1. If not accounting for parallel regions, the fill
+    // can bufferize in place.
+    // PARALLEL-CHECK:    linalg.fill {__inplace_operands_attr__ = ["none", "false"]}
+    // NO-PARALLEL-CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "true"]}
     %fill = linalg.fill ins(%val : f32) outs(%alloc1 : tensor<1xf32>) -> tensor<1xf32>
     scf.forall.in_parallel {
       // CHECK: tensor.parallel_insert_slice {{.*}} {__inplace_operands_attr__ = ["true", "true", "none"]}
@@ -841,8 +844,10 @@ func.func @parallel_region_mixed_def(%c: i1) -> tensor<320xf32>
     }
     %val = "test.foo"() : () -> (f32)
     // linalg.fill must bufferize out-of-place because every thread needs a
-    // private copy of %alloc1.
-    // CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "false"]}
+    // private copy of %alloc1. If not accounting for parallel regions, the fill
+    // can bufferize in place.
+    // PARALLEL-CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "false"]}
+    // NO-PARALLEL-CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "true"]}
     %fill = linalg.fill ins(%val : f32) outs(%selected : tensor<1xf32>) -> tensor<1xf32>
     scf.forall.in_parallel {
       // CHECK: tensor.parallel_insert_slice {{.*}} {__inplace_operands_attr__ = ["true", "true", "none"]}
@@ -866,8 +871,10 @@ func.func @parallel_region_two_writes(%f: f32) -> tensor<320xf32>
   %0 = scf.forall (%arg0) in (%c320) shared_outs(%arg1 = %alloc0) -> (tensor<320xf32>) {
     %val = "test.foo"() : () -> (f32)
     // linalg.fill must bufferize out-of-place because every thread needs a
-    // private copy of %alloc1.
-    // CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "false"]}
+    // private copy of %alloc1. If not accounting for parallel regions, the fill
+    // can bufferize in place.
+    // PARALLEL-CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "false"]}
+    // NO-PARALLEL-CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "true"]}
     %fill = linalg.fill ins(%val : f32) outs(%alloc1 : tensor<1xf32>) -> tensor<1xf32>
     // CHECK: tensor.insert
     // CHECK-SAME: __inplace_operands_attr__ = ["none", "true", "none"]