[Mlir-commits] [mlir] b5d694b - [mlir][nvvm] Introduce `nvvm.barrier` OP (#81487)

llvmlistbot at llvm.org llvmlistbot at llvm.org
Tue Feb 13 23:28:48 PST 2024


Author: Guray Ozen
Date: 2024-02-14T08:28:45+01:00
New Revision: b5d694ba14524e0161421b13c875747d5fa917de

URL: https://github.com/llvm/llvm-project/commit/b5d694ba14524e0161421b13c875747d5fa917de
DIFF: https://github.com/llvm/llvm-project/commit/b5d694ba14524e0161421b13c875747d5fa917de.diff

LOG: [mlir][nvvm] Introduce `nvvm.barrier` OP (#81487)

This PR that introduces the `nvvm.barrier` OP to the NVVM dialect.
Currently, NVVM only supports the `nvvm.barrier0`, which synchronizes
all threads using barrier resource 0.

The new `nvvm.barrier` has two essential arguments: the barrier resource
and the number of threads. This added flexibility allows for selective
synchronization of threads within a CTA, aligning with the capabilities
provided by LLVM intrinsics or the PTX model.

I think we can deprecate `nvvm.barrier0` in favor of the more generic
`nvvm.barrier`.

```
// Equivalent to nvvm.barrier0 (or __syncthreads() in CUDA)
nvvm.barrier

// Synchronize all threads using the 3rd barrier resource.
nvvm.barrier id = 3

// Synchronize %numberOfThreads threads using the 3rd barrier resource.
nvvm.barrier id = 3 number_of_threads = %numberOfThreads
```

Added: 
    

Modified: 
    mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
    mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
    mlir/test/Dialect/LLVMIR/nvvm.mlir
    mlir/test/Target/LLVMIR/nvvmir.mlir

Removed: 
    


################################################################################
diff  --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
index 5a75944af0a4f1..8ec8e16f75c94b 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
@@ -390,6 +390,25 @@ def NVVM_Barrier0Op : NVVM_Op<"barrier0"> {
   let assemblyFormat = "attr-dict";
 }
 
+def NVVM_BarrierOp : NVVM_Op<"barrier", [AttrSizedOperandSegments]> {
+  let arguments = (ins     
+    Optional<I32>:$barrierId,
+    Optional<I32>:$numberOfThreads);
+  string llvmBuilder = [{
+    if ($numberOfThreads && $barrierId) {
+      createIntrinsicCall(builder, llvm::Intrinsic::nvvm_barrier,
+                {$barrierId, $numberOfThreads});
+    } else if($barrierId) {
+      createIntrinsicCall(builder, llvm::Intrinsic::nvvm_barrier_n,
+                {$barrierId});   
+    } else {
+      createIntrinsicCall(builder, llvm::Intrinsic::nvvm_barrier0);
+    }
+  }];
+  let hasVerifier = 1;
+  let assemblyFormat = "(`id` `=` $barrierId^)? (`number_of_threads` `=` $numberOfThreads^)? attr-dict";
+}
+
 def NVVM_ClusterArriveOp : NVVM_Op<"cluster.arrive"> {
   let arguments = (ins OptionalAttr<UnitAttr>:$aligned);
 

diff  --git a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
index 9b1ef084ee7f16..4780ec09b81b9b 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
@@ -1022,6 +1022,13 @@ LogicalResult NVVM::SetMaxRegisterOp::verify() {
   return success();
 }
 
+LogicalResult NVVM::BarrierOp::verify() {
+  if (getNumberOfThreads() && !getBarrierId())
+    return emitOpError(
+        "barrier id is missing, it should be set between 0 to 15");
+  return success();
+}
+
 //===----------------------------------------------------------------------===//
 // NVVMDialect initialization, type parsing, and registration.
 //===----------------------------------------------------------------------===//

diff  --git a/mlir/test/Dialect/LLVMIR/nvvm.mlir b/mlir/test/Dialect/LLVMIR/nvvm.mlir
index 0369f45ca6a015..f35393c5e95748 100644
--- a/mlir/test/Dialect/LLVMIR/nvvm.mlir
+++ b/mlir/test/Dialect/LLVMIR/nvvm.mlir
@@ -43,6 +43,18 @@ func.func @llvm_nvvm_barrier0() {
   llvm.return
 }
 
+// CHECK-LABEL: @llvm_nvvm_barrier
+// CHECK-SAME: (%[[barId:.*]]: i32, %[[numberOfThreads:.*]]: i32)
+llvm.func @llvm_nvvm_barrier(%barId : i32, %numberOfThreads : i32) {
+  // CHECK: nvvm.barrier 
+  nvvm.barrier 
+  // CHECK: nvvm.barrier id = %[[barId]]
+  nvvm.barrier id = %barId
+  // CHECK: nvvm.barrier id = %[[barId]] number_of_threads = %[[numberOfThreads]]
+  nvvm.barrier id = %barId number_of_threads = %numberOfThreads
+  llvm.return
+}
+
 // CHECK-LABEL: @llvm_nvvm_cluster_arrive
 func.func @llvm_nvvm_cluster_arrive() {
   // CHECK: nvvm.cluster.arrive

diff  --git a/mlir/test/Target/LLVMIR/nvvmir.mlir b/mlir/test/Target/LLVMIR/nvvmir.mlir
index 3a6a4544e20dc1..a8ae4d97888c90 100644
--- a/mlir/test/Target/LLVMIR/nvvmir.mlir
+++ b/mlir/test/Target/LLVMIR/nvvmir.mlir
@@ -80,6 +80,18 @@ llvm.func @llvm_nvvm_barrier0() {
   llvm.return
 }
 
+// CHECK-LABEL: @llvm_nvvm_barrier(
+// CHECK-SAME: i32 %[[barId:.*]], i32 %[[numThreads:.*]])
+llvm.func @llvm_nvvm_barrier(%barID : i32, %numberOfThreads : i32) {
+  // CHECK: call void @llvm.nvvm.barrier0()
+  nvvm.barrier 
+  // CHECK: call void @llvm.nvvm.barrier.n(i32 %[[barId]])
+  nvvm.barrier id = %barID
+  // CHECK: call void @llvm.nvvm.barrier(i32 %[[barId]], i32 %[[numThreads]])
+  nvvm.barrier id = %barID number_of_threads = %numberOfThreads
+  llvm.return
+}
+
 // CHECK-LABEL: @llvm_nvvm_cluster_arrive
 llvm.func @llvm_nvvm_cluster_arrive() {
   // CHECK: call void @llvm.nvvm.barrier.cluster.arrive()
@@ -512,6 +524,13 @@ llvm.func @kernel_func() attributes {nvvm.kernel, nvvm.maxntid = array<i32: 1, 2
 // CHECK:     {ptr @kernel_func, !"maxntidz", i32 32}
 // CHECK:     {ptr @kernel_func, !"minctasm", i32 16}
 
+// -----
+
+llvm.func @kernel_func(%numberOfThreads : i32) {
+  // expected-error @below {{'nvvm.barrier' op barrier id is missing, it should be set between 0 to 15}}
+  nvvm.barrier number_of_threads = %numberOfThreads
+}
+
 // -----
 // expected-error @below {{'"nvvm.minctasm"' attribute must be integer constant}}
 llvm.func @kernel_func() attributes {nvvm.kernel,


        


More information about the Mlir-commits mailing list