[Mlir-commits] [mlir] 51af040 - [mlir][gpu] Eliminate redundant gpu.barrier ops (#71575)

Thu Nov 9 15:06:24 PST 2023

Author: spaceotter
Date: 2023-11-09T18:06:20-05:00
New Revision: 51af040b22c117e4f2429277c515299cd62c58ea

URL: https://github.com/llvm/llvm-project/commit/51af040b22c117e4f2429277c515299cd62c58ea
DIFF: https://github.com/llvm/llvm-project/commit/51af040b22c117e4f2429277c515299cd62c58ea.diff

LOG: [mlir][gpu] Eliminate redundant gpu.barrier ops (#71575)

Adds a canonicalizer for gpu.barrier that gets rid of duplicates.

Co-authored-by: Eric Eaton <eric at nod-labs.com>

Added: 
    

Modified: 
    mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
    mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
    mlir/test/Dialect/GPU/canonicalize.mlir

Removed: 
    


################################################################################
diff  --git a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
index 6375d35f4311295..632cdd96c6d4c2b 100644

--- a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
+++ b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
@@ -1010,6 +1010,7 @@ def GPU_BarrierOp : GPU_Op<"barrier"> {
     in convergence.
   }];
   let assemblyFormat = "attr-dict";
+  let hasCanonicalizer = 1;
 }
 
 def GPU_GPUModuleOp : GPU_Op<"module", [

diff  --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
index 5eb2cadc884e151..e0a2b93df3d1fd6 100644
--- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
+++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
@@ -1139,6 +1139,29 @@ void ShuffleOp::build(OpBuilder &builder, OperationState &result, Value value,
         mode);
 }
 
+//===----------------------------------------------------------------------===//
+// BarrierOp
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+/// Remove gpu.barrier after gpu.barrier, the threads are already synchronized!
+LogicalResult eraseRedundantGpuBarrierOps(BarrierOp op,
+                                          PatternRewriter &rewriter) {
+  if (isa_and_nonnull<BarrierOp>(op->getNextNode())) {
+    rewriter.eraseOp(op);
+    return success();
+  }
+  return failure();
+}
+
+} // end anonymous namespace
+
+void BarrierOp::getCanonicalizationPatterns(RewritePatternSet &results,
+                                            MLIRContext *context) {
+  results.add(eraseRedundantGpuBarrierOps);
+}
+
 //===----------------------------------------------------------------------===//
 // GPUFuncOp
 //===----------------------------------------------------------------------===//

diff  --git a/mlir/test/Dialect/GPU/canonicalize.mlir b/mlir/test/Dialect/GPU/canonicalize.mlir
index 1cf582e3822dae5..c2abb96d7d4fb8d 100644
--- a/mlir/test/Dialect/GPU/canonicalize.mlir
+++ b/mlir/test/Dialect/GPU/canonicalize.mlir
@@ -11,6 +11,16 @@ func.func @fold_wait_op_test1() {
 }
 // CHECK-NOT: gpu.wait
 
+// Erase duplicate barriers.
+// CHECK-LABEL: func @erase_barriers
+//       CHECK-NEXT: gpu.barrier
+//       CHECK-NEXT: return
+func.func @erase_barriers() {
+  gpu.barrier
+  gpu.barrier
+  return
+}
+
 // Replace uses of gpu.wait op with its async dependency.
 // CHECK-LABEL: func @fold_wait_op_test2
 func.func @fold_wait_op_test2(%arg0: i1) -> (memref<5xf16>, memref<5xf16>) {