[Mlir-commits] [mlir] [mlir][nvvm] Introduce `nvvm.barrier` OP (PR #81487)
Guray Ozen
llvmlistbot at llvm.org
Mon Feb 12 06:53:31 PST 2024
https://github.com/grypp created https://github.com/llvm/llvm-project/pull/81487
This PR that introduces the `nvvm.barrier` OP to the NVVM dialect. Currently, NVVM only supports the `nvvm.barrier0`, which synchronizes all threads using barrier resource 0.
The new `nvvm.barrier` has two essential arguments: the barrier resource and the number of threads. This added flexibility allows for selective synchronization of threads within a CTA, aligning with the capabilities provided by LLVM intrinsics or the PTX model.
I think we can deprecate `nvvm.barrier0` in favor of the more generic `nvvm.barrier`.
```
// Equivalent to nvvm.barrier0 or __syncthreads in CUDA.
nvvm.barrier
// Synchronize all threads using the 3rd barrier resource.
nvvm.barrier resource = 3
// Synchronize %numberOfThreads threads using the default (first) barrier resource.
nvvm.barrier number_of_threads = %numberOfThreads
// Synchronize %numberOfThreads threads using the 3rd barrier resource.
nvvm.barrier resource = 3 number_of_threads = %numberOfThreads
```
>From 2124309e291c2deeff61f3d6d25fccea819b181e Mon Sep 17 00:00:00 2001
From: grypp <guray.ozen at gmail.com>
Date: Mon, 12 Feb 2024 14:52:22 +0000
Subject: [PATCH] [mlir][nvvm] Introduce `nvvm.barrier` OP
This PR that introduces the `nvvm.barrier` OP to the NVVM dialect. Currently, NVVM only supports the `nvvm.barrier0`, which synchronizes all threads using barrier resource 0.
The new `nvvm.barrier` has two essential arguments: the barrier resource and the number of threads. This added flexibility allows for selective synchronization of threads within a CTA, aligning with the capabilities provided by LLVM intrinsics or the PTX model.
IMHO, the goal is to deprecate `nvvm.barrier0` in favor of the more generic and powerful `nvvm.barrier`.
```
// Equivalent to nvvm.barrier0 or __syncthreads in CUDA.
nvvm.barrier
// Synchronize all threads using the 3rd barrier resource.
nvvm.barrier resource = 3
// Synchronize %numberOfThreads threads using the default (first) barrier resource.
nvvm.barrier number_of_threads = %numberOfThreads
// Synchronize %numberOfThreads threads using the 3rd barrier resource.
nvvm.barrier resource = 3 number_of_threads = %numberOfThreads
```
---
mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td | 20 ++++++++++++++++++++
mlir/test/Dialect/LLVMIR/nvvm.mlir | 14 ++++++++++++++
mlir/test/Target/LLVMIR/nvvmir.mlir | 14 ++++++++++++++
3 files changed, 48 insertions(+)
diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
index 37e525a139d4ad..1369ff1988037c 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
@@ -377,6 +377,26 @@ def NVVM_Barrier0Op : NVVM_Op<"barrier0"> {
let assemblyFormat = "attr-dict";
}
+def NVVM_BarrierOp : NVVM_Op<"barrier"> {
+ let arguments = (ins
+ DefaultValuedAttr<ConfinedAttr<I32Attr, [IntMinValue<0>, IntMaxValue<15>]>, "0">:$barrierResource,
+ Optional<I32>:$numberOfThreads);
+ string llvmBuilder = [{
+ auto syncThreads = builder.getInt32($barrierResource);
+ if ($numberOfThreads) {
+ createIntrinsicCall(builder, llvm::Intrinsic::nvvm_barrier,
+ { syncThreads, $numberOfThreads});
+ } else {
+ if($barrierResource == 0)
+ createIntrinsicCall(builder, llvm::Intrinsic::nvvm_barrier0);
+ else
+ createIntrinsicCall(builder, llvm::Intrinsic::nvvm_barrier_n,
+ { syncThreads});
+ }
+ }];
+ let assemblyFormat = "(`resource` `=` $barrierResource^)? (`number_of_threads` `=` $numberOfThreads^)? attr-dict";
+}
+
def NVVM_ClusterArriveOp : NVVM_Op<"cluster.arrive"> {
let arguments = (ins OptionalAttr<UnitAttr>:$aligned);
diff --git a/mlir/test/Dialect/LLVMIR/nvvm.mlir b/mlir/test/Dialect/LLVMIR/nvvm.mlir
index ce483ddab22a0e..297712a47e7830 100644
--- a/mlir/test/Dialect/LLVMIR/nvvm.mlir
+++ b/mlir/test/Dialect/LLVMIR/nvvm.mlir
@@ -43,6 +43,20 @@ func.func @llvm_nvvm_barrier0() {
llvm.return
}
+// CHECK-LABEL: llvm.func @llvm_nvvm_barrier
+// CHECK-SAME: (%[[barId:.*]]: i32)
+llvm.func @llvm_nvvm_barrier(%numberOfThreads : i32) {
+ // CHECK: nvvm.barrier
+ nvvm.barrier
+ // CHECK: nvvm.barrier resource = 3
+ nvvm.barrier resource = 3
+ // CHECK: nvvm.barrier number_of_threads = %[[barId]]
+ nvvm.barrier number_of_threads = %numberOfThreads
+ // CHECK: nvvm.barrier resource = 4 number_of_threads = %[[barId]]
+ nvvm.barrier resource = 4 number_of_threads = %numberOfThreads
+ llvm.return
+}
+
// CHECK-LABEL: @llvm_nvvm_cluster_arrive
func.func @llvm_nvvm_cluster_arrive() {
// CHECK: nvvm.cluster.arrive
diff --git a/mlir/test/Target/LLVMIR/nvvmir.mlir b/mlir/test/Target/LLVMIR/nvvmir.mlir
index 49f9426daabc21..17eba33fdce71b 100644
--- a/mlir/test/Target/LLVMIR/nvvmir.mlir
+++ b/mlir/test/Target/LLVMIR/nvvmir.mlir
@@ -80,6 +80,20 @@ llvm.func @llvm_nvvm_barrier0() {
llvm.return
}
+// CHECK-LABEL: @llvm_nvvm_barrier(
+// CHECK-SAME: i32 %[[barId:.*]])
+llvm.func @llvm_nvvm_barrier(%numberOfThreads : i32) {
+ // CHECK: call void @llvm.nvvm.barrier0()
+ nvvm.barrier
+ // CHECK: call void @llvm.nvvm.barrier.n(i32 3)
+ nvvm.barrier resource = 3
+ // CHECK: call void @llvm.nvvm.barrier(i32 0, i32 %[[barId]])
+ nvvm.barrier number_of_threads = %numberOfThreads
+ // CHECK: call void @llvm.nvvm.barrier(i32 4, i32 %[[barId]])
+ nvvm.barrier resource = 4 number_of_threads = %numberOfThreads
+ llvm.return
+}
+
// CHECK-LABEL: @llvm_nvvm_cluster_arrive
llvm.func @llvm_nvvm_cluster_arrive() {
// CHECK: call void @llvm.nvvm.barrier.cluster.arrive()
More information about the Mlir-commits
mailing list