[Mlir-commits] [mlir] [mlir][nvvm] Introduce `nvvm.barrier` OP (PR #81487)

Guray Ozen llvmlistbot at llvm.org
Tue Feb 13 02:02:13 PST 2024


https://github.com/grypp updated https://github.com/llvm/llvm-project/pull/81487

>From 2124309e291c2deeff61f3d6d25fccea819b181e Mon Sep 17 00:00:00 2001
From: grypp <guray.ozen at gmail.com>
Date: Mon, 12 Feb 2024 14:52:22 +0000
Subject: [PATCH 1/3] [mlir][nvvm] Introduce `nvvm.barrier` OP

This PR that introduces the `nvvm.barrier` OP to the NVVM dialect. Currently, NVVM only supports the `nvvm.barrier0`, which synchronizes all threads using barrier resource 0.

The new `nvvm.barrier` has two essential arguments: the barrier resource and the number of threads. This added flexibility allows for selective synchronization of threads within a CTA, aligning with the capabilities provided by LLVM intrinsics or the PTX model.

IMHO, the goal is to deprecate `nvvm.barrier0` in favor of the more generic and powerful `nvvm.barrier`.

```
// Equivalent to nvvm.barrier0 or __syncthreads in CUDA.
nvvm.barrier

// Synchronize all threads using the 3rd barrier resource.
nvvm.barrier resource = 3

// Synchronize %numberOfThreads threads using the default (first) barrier resource.
nvvm.barrier number_of_threads = %numberOfThreads

// Synchronize %numberOfThreads threads using the 3rd barrier resource.
nvvm.barrier resource = 3 number_of_threads = %numberOfThreads
```
---
 mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td | 20 ++++++++++++++++++++
 mlir/test/Dialect/LLVMIR/nvvm.mlir          | 14 ++++++++++++++
 mlir/test/Target/LLVMIR/nvvmir.mlir         | 14 ++++++++++++++
 3 files changed, 48 insertions(+)

diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
index 37e525a139d4ad..1369ff1988037c 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
@@ -377,6 +377,26 @@ def NVVM_Barrier0Op : NVVM_Op<"barrier0"> {
   let assemblyFormat = "attr-dict";
 }
 
+def NVVM_BarrierOp : NVVM_Op<"barrier"> {
+  let arguments = (ins     
+    DefaultValuedAttr<ConfinedAttr<I32Attr, [IntMinValue<0>, IntMaxValue<15>]>, "0">:$barrierResource,
+    Optional<I32>:$numberOfThreads);
+  string llvmBuilder = [{
+    auto syncThreads = builder.getInt32($barrierResource);
+    if ($numberOfThreads) {
+      createIntrinsicCall(builder, llvm::Intrinsic::nvvm_barrier,
+                { syncThreads, $numberOfThreads});
+    } else {
+      if($barrierResource == 0)
+        createIntrinsicCall(builder, llvm::Intrinsic::nvvm_barrier0);
+      else
+        createIntrinsicCall(builder, llvm::Intrinsic::nvvm_barrier_n,
+                { syncThreads});
+    }
+  }];
+  let assemblyFormat = "(`resource` `=` $barrierResource^)? (`number_of_threads` `=` $numberOfThreads^)? attr-dict";
+}
+
 def NVVM_ClusterArriveOp : NVVM_Op<"cluster.arrive"> {
   let arguments = (ins OptionalAttr<UnitAttr>:$aligned);
 
diff --git a/mlir/test/Dialect/LLVMIR/nvvm.mlir b/mlir/test/Dialect/LLVMIR/nvvm.mlir
index ce483ddab22a0e..297712a47e7830 100644
--- a/mlir/test/Dialect/LLVMIR/nvvm.mlir
+++ b/mlir/test/Dialect/LLVMIR/nvvm.mlir
@@ -43,6 +43,20 @@ func.func @llvm_nvvm_barrier0() {
   llvm.return
 }
 
+// CHECK-LABEL: llvm.func @llvm_nvvm_barrier
+// CHECK-SAME: (%[[barId:.*]]: i32)
+llvm.func @llvm_nvvm_barrier(%numberOfThreads : i32) {
+  // CHECK: nvvm.barrier
+  nvvm.barrier 
+  // CHECK: nvvm.barrier resource = 3
+  nvvm.barrier resource = 3
+  // CHECK: nvvm.barrier number_of_threads = %[[barId]]
+  nvvm.barrier number_of_threads = %numberOfThreads
+  // CHECK: nvvm.barrier resource = 4 number_of_threads = %[[barId]]
+  nvvm.barrier resource = 4 number_of_threads = %numberOfThreads
+  llvm.return
+}
+
 // CHECK-LABEL: @llvm_nvvm_cluster_arrive
 func.func @llvm_nvvm_cluster_arrive() {
   // CHECK: nvvm.cluster.arrive
diff --git a/mlir/test/Target/LLVMIR/nvvmir.mlir b/mlir/test/Target/LLVMIR/nvvmir.mlir
index 49f9426daabc21..17eba33fdce71b 100644
--- a/mlir/test/Target/LLVMIR/nvvmir.mlir
+++ b/mlir/test/Target/LLVMIR/nvvmir.mlir
@@ -80,6 +80,20 @@ llvm.func @llvm_nvvm_barrier0() {
   llvm.return
 }
 
+// CHECK-LABEL: @llvm_nvvm_barrier(
+// CHECK-SAME: i32 %[[barId:.*]])
+llvm.func @llvm_nvvm_barrier(%numberOfThreads : i32) {
+  // CHECK: call void @llvm.nvvm.barrier0()
+  nvvm.barrier 
+  // CHECK: call void @llvm.nvvm.barrier.n(i32 3)
+  nvvm.barrier resource = 3
+  // CHECK: call void @llvm.nvvm.barrier(i32 0, i32 %[[barId]])
+  nvvm.barrier number_of_threads = %numberOfThreads
+  // CHECK: call void @llvm.nvvm.barrier(i32 4, i32 %[[barId]])
+  nvvm.barrier resource = 4 number_of_threads = %numberOfThreads
+  llvm.return
+}
+
 // CHECK-LABEL: @llvm_nvvm_cluster_arrive
 llvm.func @llvm_nvvm_cluster_arrive() {
   // CHECK: call void @llvm.nvvm.barrier.cluster.arrive()

>From 8d79471d1a5e20db88e7c15d567ad1823f75b7d6 Mon Sep 17 00:00:00 2001
From: grypp <guray.ozen at gmail.com>
Date: Tue, 13 Feb 2024 09:06:18 +0000
Subject: [PATCH 2/3] MAke barrierId operand

---
 mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td | 21 ++++++++++-----------
 mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp  |  6 ++++++
 mlir/test/Dialect/LLVMIR/nvvm.mlir          | 18 ++++++++----------
 mlir/test/Target/LLVMIR/nvvmir.mlir         | 21 +++++++++++++--------
 4 files changed, 37 insertions(+), 29 deletions(-)

diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
index 1369ff1988037c..f38eb60f9f0c27 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
@@ -377,24 +377,23 @@ def NVVM_Barrier0Op : NVVM_Op<"barrier0"> {
   let assemblyFormat = "attr-dict";
 }
 
-def NVVM_BarrierOp : NVVM_Op<"barrier"> {
+def NVVM_BarrierOp : NVVM_Op<"barrier", [AttrSizedOperandSegments]> {
   let arguments = (ins     
-    DefaultValuedAttr<ConfinedAttr<I32Attr, [IntMinValue<0>, IntMaxValue<15>]>, "0">:$barrierResource,
+    Optional<I32>:$barrierId,
     Optional<I32>:$numberOfThreads);
   string llvmBuilder = [{
-    auto syncThreads = builder.getInt32($barrierResource);
-    if ($numberOfThreads) {
+    if ($numberOfThreads && $barrierId) {
       createIntrinsicCall(builder, llvm::Intrinsic::nvvm_barrier,
-                { syncThreads, $numberOfThreads});
+                {$barrierId, $numberOfThreads});
+    } else if($barrierId) {
+      createIntrinsicCall(builder, llvm::Intrinsic::nvvm_barrier_n,
+                {$barrierId});   
     } else {
-      if($barrierResource == 0)
-        createIntrinsicCall(builder, llvm::Intrinsic::nvvm_barrier0);
-      else
-        createIntrinsicCall(builder, llvm::Intrinsic::nvvm_barrier_n,
-                { syncThreads});
+      createIntrinsicCall(builder, llvm::Intrinsic::nvvm_barrier0);
     }
   }];
-  let assemblyFormat = "(`resource` `=` $barrierResource^)? (`number_of_threads` `=` $numberOfThreads^)? attr-dict";
+  let hasVerifier = 1;
+  let assemblyFormat = "(`id` `=` $barrierId^)? (`number_of_threads` `=` $numberOfThreads^)? attr-dict";
 }
 
 def NVVM_ClusterArriveOp : NVVM_Op<"cluster.arrive"> {
diff --git a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
index a855e4b209ac5b..5fcedb03c28294 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
@@ -1022,6 +1022,12 @@ LogicalResult NVVM::SetMaxRegisterOp::verify() {
   return success();
 }
 
+LogicalResult NVVM::BarrierOp::verify() {
+  if(getNumberOfThreads() && !getBarrierId())
+    return emitOpError("barrier id is missing, it should be set between 0 to 15");
+  return success();
+}
+
 //===----------------------------------------------------------------------===//
 // NVVMDialect initialization, type parsing, and registration.
 //===----------------------------------------------------------------------===//
diff --git a/mlir/test/Dialect/LLVMIR/nvvm.mlir b/mlir/test/Dialect/LLVMIR/nvvm.mlir
index 297712a47e7830..fe45c90236f826 100644
--- a/mlir/test/Dialect/LLVMIR/nvvm.mlir
+++ b/mlir/test/Dialect/LLVMIR/nvvm.mlir
@@ -43,17 +43,15 @@ func.func @llvm_nvvm_barrier0() {
   llvm.return
 }
 
-// CHECK-LABEL: llvm.func @llvm_nvvm_barrier
-// CHECK-SAME: (%[[barId:.*]]: i32)
-llvm.func @llvm_nvvm_barrier(%numberOfThreads : i32) {
-  // CHECK: nvvm.barrier
+// CHECK-LABEL: @llvm_nvvm_barrier
+// CHECK-SAME: (%[[barId:.*]]: i32, %[[numberOfThreads:.*]]: i32)
+llvm.func @llvm_nvvm_barrier(%barId : i32, %numberOfThreads : i32) {
+  // CHECK: nvvm.barrier 
   nvvm.barrier 
-  // CHECK: nvvm.barrier resource = 3
-  nvvm.barrier resource = 3
-  // CHECK: nvvm.barrier number_of_threads = %[[barId]]
-  nvvm.barrier number_of_threads = %numberOfThreads
-  // CHECK: nvvm.barrier resource = 4 number_of_threads = %[[barId]]
-  nvvm.barrier resource = 4 number_of_threads = %numberOfThreads
+  // CHECK: nvvm.barrier id = %[[barId]]
+  nvvm.barrier id = %barId
+  // CHECK: nvvm.barrier id = %[[barId]] number_of_threads = %[[numberOfThreads]]
+  nvvm.barrier id = %barId number_of_threads = %numberOfThreads
   llvm.return
 }
 
diff --git a/mlir/test/Target/LLVMIR/nvvmir.mlir b/mlir/test/Target/LLVMIR/nvvmir.mlir
index 17eba33fdce71b..2adfe15e29ad4b 100644
--- a/mlir/test/Target/LLVMIR/nvvmir.mlir
+++ b/mlir/test/Target/LLVMIR/nvvmir.mlir
@@ -81,16 +81,14 @@ llvm.func @llvm_nvvm_barrier0() {
 }
 
 // CHECK-LABEL: @llvm_nvvm_barrier(
-// CHECK-SAME: i32 %[[barId:.*]])
-llvm.func @llvm_nvvm_barrier(%numberOfThreads : i32) {
+// CHECK-SAME: i32 %[[barId:.*]], i32 %[[numThreads:.*]])
+llvm.func @llvm_nvvm_barrier(%barID : i32, %numberOfThreads : i32) {
   // CHECK: call void @llvm.nvvm.barrier0()
   nvvm.barrier 
-  // CHECK: call void @llvm.nvvm.barrier.n(i32 3)
-  nvvm.barrier resource = 3
-  // CHECK: call void @llvm.nvvm.barrier(i32 0, i32 %[[barId]])
-  nvvm.barrier number_of_threads = %numberOfThreads
-  // CHECK: call void @llvm.nvvm.barrier(i32 4, i32 %[[barId]])
-  nvvm.barrier resource = 4 number_of_threads = %numberOfThreads
+  // CHECK: call void @llvm.nvvm.barrier.n(i32 %[[barId]])
+  nvvm.barrier id = %barID
+  // CHECK: call void @llvm.nvvm.barrier(i32 %[[barId]], i32 %[[numThreads]])
+  nvvm.barrier id = %barID number_of_threads = %numberOfThreads
   llvm.return
 }
 
@@ -526,6 +524,13 @@ llvm.func @kernel_func() attributes {nvvm.kernel, nvvm.maxntid = array<i32: 1, 2
 // CHECK:     {ptr @kernel_func, !"maxntidz", i32 32}
 // CHECK:     {ptr @kernel_func, !"minctasm", i32 16}
 
+// -----
+
+llvm.func @kernel_func(%numberOfThreads : i32) {
+  // expected-error @below {{'nvvm.barrier' op barrier id is missing, it should be set between 0 to 15}}
+  nvvm.barrier number_of_threads = %numberOfThreads
+}
+
 // -----
 // expected-error @below {{'"nvvm.minctasm"' attribute must be integer constant}}
 llvm.func @kernel_func() attributes {nvvm.kernel,

>From 58d72f2c00bf85876a07e9954c3cb9b6f4d44271 Mon Sep 17 00:00:00 2001
From: grypp <guray.ozen at gmail.com>
Date: Tue, 13 Feb 2024 10:01:50 +0000
Subject: [PATCH 3/3] fix formatting

---
 mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
index 5fcedb03c28294..137e8e167970c9 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
@@ -1023,8 +1023,9 @@ LogicalResult NVVM::SetMaxRegisterOp::verify() {
 }
 
 LogicalResult NVVM::BarrierOp::verify() {
-  if(getNumberOfThreads() && !getBarrierId())
-    return emitOpError("barrier id is missing, it should be set between 0 to 15");
+  if (getNumberOfThreads() && !getBarrierId())
+    return emitOpError(
+        "barrier id is missing, it should be set between 0 to 15");
   return success();
 }
 



More information about the Mlir-commits mailing list