[Mlir-commits] [mlir] [MLIR][NVVM]: Add support for aligned variants of cluster barriers (PR #78142)
Durgadoss R
llvmlistbot at llvm.org
Mon Jan 15 03:03:39 PST 2024
https://github.com/durga4github created https://github.com/llvm/llvm-project/pull/78142
This patch adds:
* Support for the 'aligned' variants of the cluster barrier Ops, by extending the existing Op with an 'aligned' attribute.
* Docs for these Ops.
* Test cases to verify the lowering to the corresponding intrinsics.
>From f6ef181637a67f26c890edc2c04d39341b505379 Mon Sep 17 00:00:00 2001
From: Durgadoss R <durgadossr at nvidia.com>
Date: Mon, 15 Jan 2024 16:19:29 +0530
Subject: [PATCH] [MLIR][NVVM]: Add support for aligned variants of cluster
barriers
This patch adds:
* Support for the 'aligned' variants of the cluster barrier Ops,
by extending the existing Op with an 'aligned' attribute.
* Docs for these Ops.
* Test cases to verify the lowering to the corresponding intrinsics.
Signed-off-by: Durgadoss R <durgadossr at nvidia.com>
---
mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td | 58 +++++++++++++++++++--
mlir/test/Dialect/LLVMIR/nvvm.mlir | 6 +++
mlir/test/Target/LLVMIR/nvvmir.mlir | 27 ++++++++++
3 files changed, 88 insertions(+), 3 deletions(-)
diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
index c5f68a2ebe3952..7140e614412f98 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
@@ -378,22 +378,74 @@ def NVVM_Barrier0Op : NVVM_Op<"barrier0"> {
}
def NVVM_ClusterArriveOp : NVVM_Op<"cluster.arrive"> {
+ let arguments = (ins OptionalAttr<UnitAttr>:$aligned);
+
+ let summary = "Cluster Barrier Arrive Op";
+ let description = [{
+ The `cluster.arrive` can be used by the threads within the cluster for synchronization and
+ communication. The `cluster.arrive` instruction marks the warps' arrival at the barrier
+ without causing the executing thread to wait for other participating threads.
+
+ The `aligned` attribute, when provided, generates the .aligned version of the PTX instruction.
+
+ [For more information, see PTX ISA]
+ (https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-barrier-cluster)
+ }];
+
string llvmBuilder = [{
- createIntrinsicCall(builder, llvm::Intrinsic::nvvm_barrier_cluster_arrive);
+ if ($aligned)
+ createIntrinsicCall(builder, llvm::Intrinsic::nvvm_barrier_cluster_arrive_aligned);
+ else
+ createIntrinsicCall(builder, llvm::Intrinsic::nvvm_barrier_cluster_arrive);
}];
let assemblyFormat = "attr-dict";
}
def NVVM_ClusterArriveRelaxedOp : NVVM_Op<"cluster.arrive.relaxed"> {
+ let arguments = (ins OptionalAttr<UnitAttr>:$aligned);
+
+ let summary = "Cluster Barrier Relaxed Arrive Op";
+ let description = [{
+ The `cluster.arrive` can be used by the threads within the cluster for synchronization and
+ communication. The `cluster.arrive` instruction marks the warps' arrival at the barrier
+ without causing the executing thread to wait for other participating threads.
+
+ The `aligned` attribute, when provided, generates the .aligned version of the PTX instruction.
+ The .relaxed qualifier on `cluster.arrive` specifies that there are no memory
+ ordering and visibility guarantees provided for the memory accesses performed prior to
+ `cluster.arrive`.
+
+ [For more information, see PTX ISA]
+ (https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-barrier-cluster)
+ }];
+
string llvmBuilder = [{
- createIntrinsicCall(builder, llvm::Intrinsic::nvvm_barrier_cluster_arrive_relaxed);
+ if ($aligned)
+ createIntrinsicCall(builder, llvm::Intrinsic::nvvm_barrier_cluster_arrive_relaxed_aligned);
+ else
+ createIntrinsicCall(builder, llvm::Intrinsic::nvvm_barrier_cluster_arrive_relaxed);
}];
let assemblyFormat = "attr-dict";
}
def NVVM_ClusterWaitOp : NVVM_Op<"cluster.wait"> {
+ let arguments = (ins OptionalAttr<UnitAttr>:$aligned);
+
+ let summary = "Cluster Barrier Wait Op";
+ let description = [{
+ The `cluster.wait` causes the executing thread to wait for all non-exited threads
+ of the cluster to perform `cluster.arrive`. The `aligned` attribute, when provided,
+ generates the .aligned version of the PTX instruction.
+
+ [For more information, see PTX ISA]
+ (https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-barrier-cluster)
+ }];
+
string llvmBuilder = [{
- createIntrinsicCall(builder, llvm::Intrinsic::nvvm_barrier_cluster_wait);
+ if ($aligned)
+ createIntrinsicCall(builder, llvm::Intrinsic::nvvm_barrier_cluster_wait_aligned);
+ else
+ createIntrinsicCall(builder, llvm::Intrinsic::nvvm_barrier_cluster_wait);
}];
let assemblyFormat = "attr-dict";
}
diff --git a/mlir/test/Dialect/LLVMIR/nvvm.mlir b/mlir/test/Dialect/LLVMIR/nvvm.mlir
index 39516b5090d07b..ce483ddab22a0e 100644
--- a/mlir/test/Dialect/LLVMIR/nvvm.mlir
+++ b/mlir/test/Dialect/LLVMIR/nvvm.mlir
@@ -47,6 +47,8 @@ func.func @llvm_nvvm_barrier0() {
func.func @llvm_nvvm_cluster_arrive() {
// CHECK: nvvm.cluster.arrive
nvvm.cluster.arrive
+ // CHECK: nvvm.cluster.arrive {aligned}
+ nvvm.cluster.arrive {aligned}
llvm.return
}
@@ -54,6 +56,8 @@ func.func @llvm_nvvm_cluster_arrive() {
func.func @llvm_nvvm_cluster_arrive_relaxed() {
// CHECK: nvvm.cluster.arrive.relaxed
nvvm.cluster.arrive.relaxed
+ // CHECK: nvvm.cluster.arrive.relaxed {aligned}
+ nvvm.cluster.arrive.relaxed {aligned}
llvm.return
}
@@ -61,6 +65,8 @@ func.func @llvm_nvvm_cluster_arrive_relaxed() {
func.func @llvm_nvvm_cluster_wait() {
// CHECK: nvvm.cluster.wait
nvvm.cluster.wait
+ // CHECK: nvvm.cluster.wait {aligned}
+ nvvm.cluster.wait {aligned}
llvm.return
}
diff --git a/mlir/test/Target/LLVMIR/nvvmir.mlir b/mlir/test/Target/LLVMIR/nvvmir.mlir
index 423b1a133a4ae2..8c5e3524a848f6 100644
--- a/mlir/test/Target/LLVMIR/nvvmir.mlir
+++ b/mlir/test/Target/LLVMIR/nvvmir.mlir
@@ -80,6 +80,33 @@ llvm.func @llvm_nvvm_barrier0() {
llvm.return
}
+// CHECK-LABEL: @llvm_nvvm_cluster_arrive
+llvm.func @llvm_nvvm_cluster_arrive() {
+ // CHECK: call void @llvm.nvvm.barrier.cluster.arrive()
+ nvvm.cluster.arrive
+ // CHECK: call void @llvm.nvvm.barrier.cluster.arrive.aligned()
+ nvvm.cluster.arrive {aligned}
+ llvm.return
+}
+
+// CHECK-LABEL: @llvm_nvvm_cluster_arrive_relaxed
+llvm.func @llvm_nvvm_cluster_arrive_relaxed() {
+ // CHECK: call void @llvm.nvvm.barrier.cluster.arrive.relaxed()
+ nvvm.cluster.arrive.relaxed
+ // CHECK: call void @llvm.nvvm.barrier.cluster.arrive.relaxed.aligned()
+ nvvm.cluster.arrive.relaxed {aligned}
+ llvm.return
+}
+
+// CHECK-LABEL: @llvm_nvvm_cluster_wait
+llvm.func @llvm_nvvm_cluster_wait() {
+ // CHECK: call void @llvm.nvvm.barrier.cluster.wait()
+ nvvm.cluster.wait
+ // CHECK: call void @llvm.nvvm.barrier.cluster.wait.aligned()
+ nvvm.cluster.wait {aligned}
+ llvm.return
+}
+
// CHECK-LABEL: @nvvm_shfl
llvm.func @nvvm_shfl(
%0 : i32, %1 : i32, %2 : i32,
More information about the Mlir-commits
mailing list