[llvm] [LLVM][NVPTX]: Add aligned versions of cluster barriers (PR #77940)
Durgadoss R via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 12 07:38:53 PST 2024
https://github.com/durga4github created https://github.com/llvm/llvm-project/pull/77940
PTX Doc link for these intrinsics:
https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-barrier-cluster
This patch adds the '.aligned' variants of the
barrier.cluster intrinsics. lit tests are added
to verify the generated PTX.
>From 8abef166a0fae73b9d4aa4cae5bca8561904d9dc Mon Sep 17 00:00:00 2001
From: Durgadoss R <durgadossr at nvidia.com>
Date: Fri, 12 Jan 2024 21:00:36 +0530
Subject: [PATCH] [LLVM][NVPTX]: Add aligned versions of cluster barriers
PTX Doc for these intrinsics:
https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-barrier-cluster
This patch adds the '.aligned' variants of the
barrier.cluster intrinsics. lit tests are added
to verify the generated PTX.
Signed-off-by: Durgadoss R <durgadossr at nvidia.com>
---
llvm/include/llvm/IR/IntrinsicsNVVM.td | 8 ++++++++
llvm/lib/Target/NVPTX/NVPTXIntrinsics.td | 10 ++++++++++
llvm/test/CodeGen/NVPTX/intrinsics-sm90.ll | 13 +++++++++++++
3 files changed, 31 insertions(+)
diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td
index cf50f2a59f602f..4665a1169ef4ee 100644
--- a/llvm/include/llvm/IR/IntrinsicsNVVM.td
+++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td
@@ -1372,6 +1372,14 @@ let TargetPrefix = "nvvm" in {
def int_nvvm_barrier_cluster_wait :
Intrinsic<[], [], [IntrConvergent, IntrNoCallback]>;
+ // 'aligned' versions of the above barrier.cluster.* intrinsics
+ def int_nvvm_barrier_cluster_arrive_aligned :
+ Intrinsic<[], [], [IntrConvergent, IntrNoCallback]>;
+ def int_nvvm_barrier_cluster_arrive_relaxed_aligned :
+ Intrinsic<[], [], [IntrConvergent, IntrNoCallback]>;
+ def int_nvvm_barrier_cluster_wait_aligned :
+ Intrinsic<[], [], [IntrConvergent, IntrNoCallback]>;
+
// Membar
def int_nvvm_membar_cta : ClangBuiltin<"__nvvm_membar_cta">,
Intrinsic<[], [], [IntrNoCallback]>;
diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
index 6b062a7f39127f..c5dbe350e44472 100644
--- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -132,6 +132,7 @@ def INT_BARRIER_SYNC_CNT_II : NVPTXInst<(outs), (ins i32imm:$id, i32imm:$cnt),
"barrier.sync \t$id, $cnt;",
[(int_nvvm_barrier_sync_cnt imm:$id, imm:$cnt)]>,
Requires<[hasPTX<60>, hasSM<30>]>;
+
class INT_BARRIER_CLUSTER<string variant, Intrinsic Intr,
list<Predicate> Preds = [hasPTX<78>, hasSM<90>]>:
NVPTXInst<(outs), (ins), "barrier.cluster."# variant #";", [(Intr)]>,
@@ -145,6 +146,15 @@ def barrier_cluster_arrive_relaxed:
def barrier_cluster_wait:
INT_BARRIER_CLUSTER<"wait", int_nvvm_barrier_cluster_wait>;
+// 'aligned' versions of the cluster barrier intrinsics
+def barrier_cluster_arrive_aligned:
+ INT_BARRIER_CLUSTER<"arrive.aligned", int_nvvm_barrier_cluster_arrive_aligned>;
+def barrier_cluster_arrive_relaxed_aligned:
+ INT_BARRIER_CLUSTER<"arrive.relaxed.aligned",
+ int_nvvm_barrier_cluster_arrive_relaxed_aligned, [hasPTX<80>, hasSM<90>]>;
+def barrier_cluster_wait_aligned:
+ INT_BARRIER_CLUSTER<"wait.aligned", int_nvvm_barrier_cluster_wait_aligned>;
+
class SHFL_INSTR<bit sync, string mode, string reg, bit return_pred,
bit offset_imm, bit mask_imm, bit threadmask_imm>
: NVPTXInst<(outs), (ins), "?", []> {
diff --git a/llvm/test/CodeGen/NVPTX/intrinsics-sm90.ll b/llvm/test/CodeGen/NVPTX/intrinsics-sm90.ll
index a157616db9fb4f..181fbf21129102 100644
--- a/llvm/test/CodeGen/NVPTX/intrinsics-sm90.ll
+++ b/llvm/test/CodeGen/NVPTX/intrinsics-sm90.ll
@@ -133,6 +133,16 @@ define void @test_barrier_cluster() {
ret void
}
+; CHECK-LABEL: test_barrier_cluster_aligned(
+define void @test_barrier_cluster_aligned() {
+; CHECK: barrier.cluster.arrive.aligned;
+ call void @llvm.nvvm.barrier.cluster.arrive.aligned()
+; CHECK: barrier.cluster.arrive.relaxed.aligned;
+ call void @llvm.nvvm.barrier.cluster.arrive.relaxed.aligned()
+; CHECK: barrier.cluster.wait.aligned;
+ call void @llvm.nvvm.barrier.cluster.wait.aligned()
+ ret void
+}
declare i1 @llvm.nvvm.isspacep.shared.cluster(ptr %p);
declare ptr @llvm.nvvm.mapa(ptr %p, i32 %r);
@@ -153,4 +163,7 @@ declare i1 @llvm.nvvm.is_explicit_cluster()
declare void @llvm.nvvm.barrier.cluster.arrive()
declare void @llvm.nvvm.barrier.cluster.arrive.relaxed()
declare void @llvm.nvvm.barrier.cluster.wait()
+declare void @llvm.nvvm.barrier.cluster.arrive.aligned()
+declare void @llvm.nvvm.barrier.cluster.arrive.relaxed.aligned()
+declare void @llvm.nvvm.barrier.cluster.wait.aligned()
declare void @llvm.nvvm.fence.sc.cluster()
More information about the llvm-commits
mailing list