[Mlir-commits] [mlir] [MLIR][NVVM]: Add support for aligned variants of cluster barriers (PR #78142)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Mon Jan 15 03:04:09 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-mlir-llvm
Author: Durgadoss R (durga4github)
<details>
<summary>Changes</summary>
This patch adds:
* Support for the 'aligned' variants of the cluster barrier Ops, by extending the existing Op with an 'aligned' attribute.
* Docs for these Ops.
* Test cases to verify the lowering to the corresponding intrinsics.
---
Full diff: https://github.com/llvm/llvm-project/pull/78142.diff
3 Files Affected:
- (modified) mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td (+55-3)
- (modified) mlir/test/Dialect/LLVMIR/nvvm.mlir (+6)
- (modified) mlir/test/Target/LLVMIR/nvvmir.mlir (+27)
``````````diff
diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
index c5f68a2ebe3952..7140e614412f98 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
@@ -378,22 +378,74 @@ def NVVM_Barrier0Op : NVVM_Op<"barrier0"> {
}
def NVVM_ClusterArriveOp : NVVM_Op<"cluster.arrive"> {
+ let arguments = (ins OptionalAttr<UnitAttr>:$aligned);
+
+ let summary = "Cluster Barrier Arrive Op";
+ let description = [{
+ The `cluster.arrive` can be used by the threads within the cluster for synchronization and
+ communication. The `cluster.arrive` instruction marks the warps' arrival at the barrier
+ without causing the executing thread to wait for other participating threads.
+
+ The `aligned` attribute, when provided, generates the .aligned version of the PTX instruction.
+
+ [For more information, see PTX ISA]
+ (https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-barrier-cluster)
+ }];
+
string llvmBuilder = [{
- createIntrinsicCall(builder, llvm::Intrinsic::nvvm_barrier_cluster_arrive);
+ if ($aligned)
+ createIntrinsicCall(builder, llvm::Intrinsic::nvvm_barrier_cluster_arrive_aligned);
+ else
+ createIntrinsicCall(builder, llvm::Intrinsic::nvvm_barrier_cluster_arrive);
}];
let assemblyFormat = "attr-dict";
}
def NVVM_ClusterArriveRelaxedOp : NVVM_Op<"cluster.arrive.relaxed"> {
+ let arguments = (ins OptionalAttr<UnitAttr>:$aligned);
+
+ let summary = "Cluster Barrier Relaxed Arrive Op";
+ let description = [{
+ The `cluster.arrive` can be used by the threads within the cluster for synchronization and
+ communication. The `cluster.arrive` instruction marks the warps' arrival at the barrier
+ without causing the executing thread to wait for other participating threads.
+
+ The `aligned` attribute, when provided, generates the .aligned version of the PTX instruction.
+ The .relaxed qualifier on `cluster.arrive` specifies that there are no memory
+ ordering and visibility guarantees provided for the memory accesses performed prior to
+ `cluster.arrive`.
+
+ [For more information, see PTX ISA]
+ (https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-barrier-cluster)
+ }];
+
string llvmBuilder = [{
- createIntrinsicCall(builder, llvm::Intrinsic::nvvm_barrier_cluster_arrive_relaxed);
+ if ($aligned)
+ createIntrinsicCall(builder, llvm::Intrinsic::nvvm_barrier_cluster_arrive_relaxed_aligned);
+ else
+ createIntrinsicCall(builder, llvm::Intrinsic::nvvm_barrier_cluster_arrive_relaxed);
}];
let assemblyFormat = "attr-dict";
}
def NVVM_ClusterWaitOp : NVVM_Op<"cluster.wait"> {
+ let arguments = (ins OptionalAttr<UnitAttr>:$aligned);
+
+ let summary = "Cluster Barrier Wait Op";
+ let description = [{
+ The `cluster.wait` causes the executing thread to wait for all non-exited threads
+ of the cluster to perform `cluster.arrive`. The `aligned` attribute, when provided,
+ generates the .aligned version of the PTX instruction.
+
+ [For more information, see PTX ISA]
+ (https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-barrier-cluster)
+ }];
+
string llvmBuilder = [{
- createIntrinsicCall(builder, llvm::Intrinsic::nvvm_barrier_cluster_wait);
+ if ($aligned)
+ createIntrinsicCall(builder, llvm::Intrinsic::nvvm_barrier_cluster_wait_aligned);
+ else
+ createIntrinsicCall(builder, llvm::Intrinsic::nvvm_barrier_cluster_wait);
}];
let assemblyFormat = "attr-dict";
}
diff --git a/mlir/test/Dialect/LLVMIR/nvvm.mlir b/mlir/test/Dialect/LLVMIR/nvvm.mlir
index 39516b5090d07b..ce483ddab22a0e 100644
--- a/mlir/test/Dialect/LLVMIR/nvvm.mlir
+++ b/mlir/test/Dialect/LLVMIR/nvvm.mlir
@@ -47,6 +47,8 @@ func.func @llvm_nvvm_barrier0() {
func.func @llvm_nvvm_cluster_arrive() {
// CHECK: nvvm.cluster.arrive
nvvm.cluster.arrive
+ // CHECK: nvvm.cluster.arrive {aligned}
+ nvvm.cluster.arrive {aligned}
llvm.return
}
@@ -54,6 +56,8 @@ func.func @llvm_nvvm_cluster_arrive() {
func.func @llvm_nvvm_cluster_arrive_relaxed() {
// CHECK: nvvm.cluster.arrive.relaxed
nvvm.cluster.arrive.relaxed
+ // CHECK: nvvm.cluster.arrive.relaxed {aligned}
+ nvvm.cluster.arrive.relaxed {aligned}
llvm.return
}
@@ -61,6 +65,8 @@ func.func @llvm_nvvm_cluster_arrive_relaxed() {
func.func @llvm_nvvm_cluster_wait() {
// CHECK: nvvm.cluster.wait
nvvm.cluster.wait
+ // CHECK: nvvm.cluster.wait {aligned}
+ nvvm.cluster.wait {aligned}
llvm.return
}
diff --git a/mlir/test/Target/LLVMIR/nvvmir.mlir b/mlir/test/Target/LLVMIR/nvvmir.mlir
index 423b1a133a4ae2..8c5e3524a848f6 100644
--- a/mlir/test/Target/LLVMIR/nvvmir.mlir
+++ b/mlir/test/Target/LLVMIR/nvvmir.mlir
@@ -80,6 +80,33 @@ llvm.func @llvm_nvvm_barrier0() {
llvm.return
}
+// CHECK-LABEL: @llvm_nvvm_cluster_arrive
+llvm.func @llvm_nvvm_cluster_arrive() {
+ // CHECK: call void @llvm.nvvm.barrier.cluster.arrive()
+ nvvm.cluster.arrive
+ // CHECK: call void @llvm.nvvm.barrier.cluster.arrive.aligned()
+ nvvm.cluster.arrive {aligned}
+ llvm.return
+}
+
+// CHECK-LABEL: @llvm_nvvm_cluster_arrive_relaxed
+llvm.func @llvm_nvvm_cluster_arrive_relaxed() {
+ // CHECK: call void @llvm.nvvm.barrier.cluster.arrive.relaxed()
+ nvvm.cluster.arrive.relaxed
+ // CHECK: call void @llvm.nvvm.barrier.cluster.arrive.relaxed.aligned()
+ nvvm.cluster.arrive.relaxed {aligned}
+ llvm.return
+}
+
+// CHECK-LABEL: @llvm_nvvm_cluster_wait
+llvm.func @llvm_nvvm_cluster_wait() {
+ // CHECK: call void @llvm.nvvm.barrier.cluster.wait()
+ nvvm.cluster.wait
+ // CHECK: call void @llvm.nvvm.barrier.cluster.wait.aligned()
+ nvvm.cluster.wait {aligned}
+ llvm.return
+}
+
// CHECK-LABEL: @nvvm_shfl
llvm.func @nvvm_shfl(
%0 : i32, %1 : i32, %2 : i32,
``````````
</details>
https://github.com/llvm/llvm-project/pull/78142
More information about the Mlir-commits
mailing list