[Mlir-commits] [mlir] [mlir][nvvm] Introduce `nvvm.barrier` OP (PR #81487)
Guray Ozen
llvmlistbot at llvm.org
Tue Feb 13 02:02:13 PST 2024
https://github.com/grypp updated https://github.com/llvm/llvm-project/pull/81487
>From 2124309e291c2deeff61f3d6d25fccea819b181e Mon Sep 17 00:00:00 2001
From: grypp <guray.ozen at gmail.com>
Date: Mon, 12 Feb 2024 14:52:22 +0000
Subject: [PATCH 1/3] [mlir][nvvm] Introduce `nvvm.barrier` OP
This PR that introduces the `nvvm.barrier` OP to the NVVM dialect. Currently, NVVM only supports the `nvvm.barrier0`, which synchronizes all threads using barrier resource 0.
The new `nvvm.barrier` has two essential arguments: the barrier resource and the number of threads. This added flexibility allows for selective synchronization of threads within a CTA, aligning with the capabilities provided by LLVM intrinsics or the PTX model.
IMHO, the goal is to deprecate `nvvm.barrier0` in favor of the more generic and powerful `nvvm.barrier`.
```
// Equivalent to nvvm.barrier0 or __syncthreads in CUDA.
nvvm.barrier
// Synchronize all threads using the 3rd barrier resource.
nvvm.barrier resource = 3
// Synchronize %numberOfThreads threads using the default (first) barrier resource.
nvvm.barrier number_of_threads = %numberOfThreads
// Synchronize %numberOfThreads threads using the 3rd barrier resource.
nvvm.barrier resource = 3 number_of_threads = %numberOfThreads
```
---
mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td | 20 ++++++++++++++++++++
mlir/test/Dialect/LLVMIR/nvvm.mlir | 14 ++++++++++++++
mlir/test/Target/LLVMIR/nvvmir.mlir | 14 ++++++++++++++
3 files changed, 48 insertions(+)
diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
index 37e525a139d4ad..1369ff1988037c 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
@@ -377,6 +377,26 @@ def NVVM_Barrier0Op : NVVM_Op<"barrier0"> {
let assemblyFormat = "attr-dict";
}
+def NVVM_BarrierOp : NVVM_Op<"barrier"> {
+ let arguments = (ins
+ DefaultValuedAttr<ConfinedAttr<I32Attr, [IntMinValue<0>, IntMaxValue<15>]>, "0">:$barrierResource,
+ Optional<I32>:$numberOfThreads);
+ string llvmBuilder = [{
+ auto syncThreads = builder.getInt32($barrierResource);
+ if ($numberOfThreads) {
+ createIntrinsicCall(builder, llvm::Intrinsic::nvvm_barrier,
+ { syncThreads, $numberOfThreads});
+ } else {
+ if($barrierResource == 0)
+ createIntrinsicCall(builder, llvm::Intrinsic::nvvm_barrier0);
+ else
+ createIntrinsicCall(builder, llvm::Intrinsic::nvvm_barrier_n,
+ { syncThreads});
+ }
+ }];
+ let assemblyFormat = "(`resource` `=` $barrierResource^)? (`number_of_threads` `=` $numberOfThreads^)? attr-dict";
+}
+
def NVVM_ClusterArriveOp : NVVM_Op<"cluster.arrive"> {
let arguments = (ins OptionalAttr<UnitAttr>:$aligned);
diff --git a/mlir/test/Dialect/LLVMIR/nvvm.mlir b/mlir/test/Dialect/LLVMIR/nvvm.mlir
index ce483ddab22a0e..297712a47e7830 100644
--- a/mlir/test/Dialect/LLVMIR/nvvm.mlir
+++ b/mlir/test/Dialect/LLVMIR/nvvm.mlir
@@ -43,6 +43,20 @@ func.func @llvm_nvvm_barrier0() {
llvm.return
}
+// CHECK-LABEL: llvm.func @llvm_nvvm_barrier
+// CHECK-SAME: (%[[barId:.*]]: i32)
+llvm.func @llvm_nvvm_barrier(%numberOfThreads : i32) {
+ // CHECK: nvvm.barrier
+ nvvm.barrier
+ // CHECK: nvvm.barrier resource = 3
+ nvvm.barrier resource = 3
+ // CHECK: nvvm.barrier number_of_threads = %[[barId]]
+ nvvm.barrier number_of_threads = %numberOfThreads
+ // CHECK: nvvm.barrier resource = 4 number_of_threads = %[[barId]]
+ nvvm.barrier resource = 4 number_of_threads = %numberOfThreads
+ llvm.return
+}
+
// CHECK-LABEL: @llvm_nvvm_cluster_arrive
func.func @llvm_nvvm_cluster_arrive() {
// CHECK: nvvm.cluster.arrive
diff --git a/mlir/test/Target/LLVMIR/nvvmir.mlir b/mlir/test/Target/LLVMIR/nvvmir.mlir
index 49f9426daabc21..17eba33fdce71b 100644
--- a/mlir/test/Target/LLVMIR/nvvmir.mlir
+++ b/mlir/test/Target/LLVMIR/nvvmir.mlir
@@ -80,6 +80,20 @@ llvm.func @llvm_nvvm_barrier0() {
llvm.return
}
+// CHECK-LABEL: @llvm_nvvm_barrier(
+// CHECK-SAME: i32 %[[barId:.*]])
+llvm.func @llvm_nvvm_barrier(%numberOfThreads : i32) {
+ // CHECK: call void @llvm.nvvm.barrier0()
+ nvvm.barrier
+ // CHECK: call void @llvm.nvvm.barrier.n(i32 3)
+ nvvm.barrier resource = 3
+ // CHECK: call void @llvm.nvvm.barrier(i32 0, i32 %[[barId]])
+ nvvm.barrier number_of_threads = %numberOfThreads
+ // CHECK: call void @llvm.nvvm.barrier(i32 4, i32 %[[barId]])
+ nvvm.barrier resource = 4 number_of_threads = %numberOfThreads
+ llvm.return
+}
+
// CHECK-LABEL: @llvm_nvvm_cluster_arrive
llvm.func @llvm_nvvm_cluster_arrive() {
// CHECK: call void @llvm.nvvm.barrier.cluster.arrive()
>From 8d79471d1a5e20db88e7c15d567ad1823f75b7d6 Mon Sep 17 00:00:00 2001
From: grypp <guray.ozen at gmail.com>
Date: Tue, 13 Feb 2024 09:06:18 +0000
Subject: [PATCH 2/3] MAke barrierId operand
---
mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td | 21 ++++++++++-----------
mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp | 6 ++++++
mlir/test/Dialect/LLVMIR/nvvm.mlir | 18 ++++++++----------
mlir/test/Target/LLVMIR/nvvmir.mlir | 21 +++++++++++++--------
4 files changed, 37 insertions(+), 29 deletions(-)
diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
index 1369ff1988037c..f38eb60f9f0c27 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
@@ -377,24 +377,23 @@ def NVVM_Barrier0Op : NVVM_Op<"barrier0"> {
let assemblyFormat = "attr-dict";
}
-def NVVM_BarrierOp : NVVM_Op<"barrier"> {
+def NVVM_BarrierOp : NVVM_Op<"barrier", [AttrSizedOperandSegments]> {
let arguments = (ins
- DefaultValuedAttr<ConfinedAttr<I32Attr, [IntMinValue<0>, IntMaxValue<15>]>, "0">:$barrierResource,
+ Optional<I32>:$barrierId,
Optional<I32>:$numberOfThreads);
string llvmBuilder = [{
- auto syncThreads = builder.getInt32($barrierResource);
- if ($numberOfThreads) {
+ if ($numberOfThreads && $barrierId) {
createIntrinsicCall(builder, llvm::Intrinsic::nvvm_barrier,
- { syncThreads, $numberOfThreads});
+ {$barrierId, $numberOfThreads});
+ } else if($barrierId) {
+ createIntrinsicCall(builder, llvm::Intrinsic::nvvm_barrier_n,
+ {$barrierId});
} else {
- if($barrierResource == 0)
- createIntrinsicCall(builder, llvm::Intrinsic::nvvm_barrier0);
- else
- createIntrinsicCall(builder, llvm::Intrinsic::nvvm_barrier_n,
- { syncThreads});
+ createIntrinsicCall(builder, llvm::Intrinsic::nvvm_barrier0);
}
}];
- let assemblyFormat = "(`resource` `=` $barrierResource^)? (`number_of_threads` `=` $numberOfThreads^)? attr-dict";
+ let hasVerifier = 1;
+ let assemblyFormat = "(`id` `=` $barrierId^)? (`number_of_threads` `=` $numberOfThreads^)? attr-dict";
}
def NVVM_ClusterArriveOp : NVVM_Op<"cluster.arrive"> {
diff --git a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
index a855e4b209ac5b..5fcedb03c28294 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
@@ -1022,6 +1022,12 @@ LogicalResult NVVM::SetMaxRegisterOp::verify() {
return success();
}
+LogicalResult NVVM::BarrierOp::verify() {
+ if(getNumberOfThreads() && !getBarrierId())
+ return emitOpError("barrier id is missing, it should be set between 0 to 15");
+ return success();
+}
+
//===----------------------------------------------------------------------===//
// NVVMDialect initialization, type parsing, and registration.
//===----------------------------------------------------------------------===//
diff --git a/mlir/test/Dialect/LLVMIR/nvvm.mlir b/mlir/test/Dialect/LLVMIR/nvvm.mlir
index 297712a47e7830..fe45c90236f826 100644
--- a/mlir/test/Dialect/LLVMIR/nvvm.mlir
+++ b/mlir/test/Dialect/LLVMIR/nvvm.mlir
@@ -43,17 +43,15 @@ func.func @llvm_nvvm_barrier0() {
llvm.return
}
-// CHECK-LABEL: llvm.func @llvm_nvvm_barrier
-// CHECK-SAME: (%[[barId:.*]]: i32)
-llvm.func @llvm_nvvm_barrier(%numberOfThreads : i32) {
- // CHECK: nvvm.barrier
+// CHECK-LABEL: @llvm_nvvm_barrier
+// CHECK-SAME: (%[[barId:.*]]: i32, %[[numberOfThreads:.*]]: i32)
+llvm.func @llvm_nvvm_barrier(%barId : i32, %numberOfThreads : i32) {
+ // CHECK: nvvm.barrier
nvvm.barrier
- // CHECK: nvvm.barrier resource = 3
- nvvm.barrier resource = 3
- // CHECK: nvvm.barrier number_of_threads = %[[barId]]
- nvvm.barrier number_of_threads = %numberOfThreads
- // CHECK: nvvm.barrier resource = 4 number_of_threads = %[[barId]]
- nvvm.barrier resource = 4 number_of_threads = %numberOfThreads
+ // CHECK: nvvm.barrier id = %[[barId]]
+ nvvm.barrier id = %barId
+ // CHECK: nvvm.barrier id = %[[barId]] number_of_threads = %[[numberOfThreads]]
+ nvvm.barrier id = %barId number_of_threads = %numberOfThreads
llvm.return
}
diff --git a/mlir/test/Target/LLVMIR/nvvmir.mlir b/mlir/test/Target/LLVMIR/nvvmir.mlir
index 17eba33fdce71b..2adfe15e29ad4b 100644
--- a/mlir/test/Target/LLVMIR/nvvmir.mlir
+++ b/mlir/test/Target/LLVMIR/nvvmir.mlir
@@ -81,16 +81,14 @@ llvm.func @llvm_nvvm_barrier0() {
}
// CHECK-LABEL: @llvm_nvvm_barrier(
-// CHECK-SAME: i32 %[[barId:.*]])
-llvm.func @llvm_nvvm_barrier(%numberOfThreads : i32) {
+// CHECK-SAME: i32 %[[barId:.*]], i32 %[[numThreads:.*]])
+llvm.func @llvm_nvvm_barrier(%barID : i32, %numberOfThreads : i32) {
// CHECK: call void @llvm.nvvm.barrier0()
nvvm.barrier
- // CHECK: call void @llvm.nvvm.barrier.n(i32 3)
- nvvm.barrier resource = 3
- // CHECK: call void @llvm.nvvm.barrier(i32 0, i32 %[[barId]])
- nvvm.barrier number_of_threads = %numberOfThreads
- // CHECK: call void @llvm.nvvm.barrier(i32 4, i32 %[[barId]])
- nvvm.barrier resource = 4 number_of_threads = %numberOfThreads
+ // CHECK: call void @llvm.nvvm.barrier.n(i32 %[[barId]])
+ nvvm.barrier id = %barID
+ // CHECK: call void @llvm.nvvm.barrier(i32 %[[barId]], i32 %[[numThreads]])
+ nvvm.barrier id = %barID number_of_threads = %numberOfThreads
llvm.return
}
@@ -526,6 +524,13 @@ llvm.func @kernel_func() attributes {nvvm.kernel, nvvm.maxntid = array<i32: 1, 2
// CHECK: {ptr @kernel_func, !"maxntidz", i32 32}
// CHECK: {ptr @kernel_func, !"minctasm", i32 16}
+// -----
+
+llvm.func @kernel_func(%numberOfThreads : i32) {
+ // expected-error @below {{'nvvm.barrier' op barrier id is missing, it should be set between 0 to 15}}
+ nvvm.barrier number_of_threads = %numberOfThreads
+}
+
// -----
// expected-error @below {{'"nvvm.minctasm"' attribute must be integer constant}}
llvm.func @kernel_func() attributes {nvvm.kernel,
>From 58d72f2c00bf85876a07e9954c3cb9b6f4d44271 Mon Sep 17 00:00:00 2001
From: grypp <guray.ozen at gmail.com>
Date: Tue, 13 Feb 2024 10:01:50 +0000
Subject: [PATCH 3/3] fix formatting
---
mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
index 5fcedb03c28294..137e8e167970c9 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
@@ -1023,8 +1023,9 @@ LogicalResult NVVM::SetMaxRegisterOp::verify() {
}
LogicalResult NVVM::BarrierOp::verify() {
- if(getNumberOfThreads() && !getBarrierId())
- return emitOpError("barrier id is missing, it should be set between 0 to 15");
+ if (getNumberOfThreads() && !getBarrierId())
+ return emitOpError(
+ "barrier id is missing, it should be set between 0 to 15");
return success();
}
More information about the Mlir-commits
mailing list