[llvm] [LLVM][NVPTX]: Add aligned versions of cluster barriers (PR #77940)

Durgadoss R via llvm-commits llvm-commits at lists.llvm.org
Fri Jan 12 07:38:53 PST 2024


https://github.com/durga4github created https://github.com/llvm/llvm-project/pull/77940

PTX Doc link for these intrinsics:
https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-barrier-cluster

This patch adds the '.aligned' variants of the
barrier.cluster intrinsics. lit tests are added
to verify the generated PTX.

>From 8abef166a0fae73b9d4aa4cae5bca8561904d9dc Mon Sep 17 00:00:00 2001
From: Durgadoss R <durgadossr at nvidia.com>
Date: Fri, 12 Jan 2024 21:00:36 +0530
Subject: [PATCH] [LLVM][NVPTX]: Add aligned versions of cluster barriers

PTX Doc for these intrinsics:
https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-barrier-cluster

This patch adds the '.aligned' variants of the
barrier.cluster intrinsics. lit tests are added
to verify the generated PTX.

Signed-off-by: Durgadoss R <durgadossr at nvidia.com>
---
 llvm/include/llvm/IR/IntrinsicsNVVM.td     |  8 ++++++++
 llvm/lib/Target/NVPTX/NVPTXIntrinsics.td   | 10 ++++++++++
 llvm/test/CodeGen/NVPTX/intrinsics-sm90.ll | 13 +++++++++++++
 3 files changed, 31 insertions(+)

diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td
index cf50f2a59f602f..4665a1169ef4ee 100644
--- a/llvm/include/llvm/IR/IntrinsicsNVVM.td
+++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td
@@ -1372,6 +1372,14 @@ let TargetPrefix = "nvvm" in {
   def int_nvvm_barrier_cluster_wait :
       Intrinsic<[], [], [IntrConvergent, IntrNoCallback]>;
 
+  // 'aligned' versions of the above barrier.cluster.* intrinsics
+  def int_nvvm_barrier_cluster_arrive_aligned :
+      Intrinsic<[], [], [IntrConvergent, IntrNoCallback]>;
+  def int_nvvm_barrier_cluster_arrive_relaxed_aligned :
+      Intrinsic<[], [], [IntrConvergent, IntrNoCallback]>;
+  def int_nvvm_barrier_cluster_wait_aligned :
+      Intrinsic<[], [], [IntrConvergent, IntrNoCallback]>;
+
   // Membar
   def int_nvvm_membar_cta : ClangBuiltin<"__nvvm_membar_cta">,
       Intrinsic<[], [], [IntrNoCallback]>;
diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
index 6b062a7f39127f..c5dbe350e44472 100644
--- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -132,6 +132,7 @@ def INT_BARRIER_SYNC_CNT_II : NVPTXInst<(outs), (ins i32imm:$id, i32imm:$cnt),
                  "barrier.sync \t$id, $cnt;",
                  [(int_nvvm_barrier_sync_cnt imm:$id, imm:$cnt)]>,
         Requires<[hasPTX<60>, hasSM<30>]>;
+
 class INT_BARRIER_CLUSTER<string variant, Intrinsic Intr,
                           list<Predicate> Preds = [hasPTX<78>, hasSM<90>]>:
         NVPTXInst<(outs), (ins), "barrier.cluster."# variant #";", [(Intr)]>,
@@ -145,6 +146,15 @@ def barrier_cluster_arrive_relaxed:
 def barrier_cluster_wait:
         INT_BARRIER_CLUSTER<"wait", int_nvvm_barrier_cluster_wait>;
 
+// 'aligned' versions of the cluster barrier intrinsics
+def barrier_cluster_arrive_aligned:
+        INT_BARRIER_CLUSTER<"arrive.aligned", int_nvvm_barrier_cluster_arrive_aligned>;
+def barrier_cluster_arrive_relaxed_aligned:
+        INT_BARRIER_CLUSTER<"arrive.relaxed.aligned",
+        int_nvvm_barrier_cluster_arrive_relaxed_aligned, [hasPTX<80>, hasSM<90>]>;
+def barrier_cluster_wait_aligned:
+        INT_BARRIER_CLUSTER<"wait.aligned", int_nvvm_barrier_cluster_wait_aligned>;
+
 class SHFL_INSTR<bit sync, string mode, string reg, bit return_pred,
                  bit offset_imm, bit mask_imm, bit threadmask_imm>
       : NVPTXInst<(outs), (ins), "?", []> {
diff --git a/llvm/test/CodeGen/NVPTX/intrinsics-sm90.ll b/llvm/test/CodeGen/NVPTX/intrinsics-sm90.ll
index a157616db9fb4f..181fbf21129102 100644
--- a/llvm/test/CodeGen/NVPTX/intrinsics-sm90.ll
+++ b/llvm/test/CodeGen/NVPTX/intrinsics-sm90.ll
@@ -133,6 +133,16 @@ define void @test_barrier_cluster() {
        ret void
 }
 
+; CHECK-LABEL: test_barrier_cluster_aligned(
+define void @test_barrier_cluster_aligned() {
+; CHECK: barrier.cluster.arrive.aligned;
+       call void @llvm.nvvm.barrier.cluster.arrive.aligned()
+; CHECK: barrier.cluster.arrive.relaxed.aligned;
+       call void @llvm.nvvm.barrier.cluster.arrive.relaxed.aligned()
+; CHECK: barrier.cluster.wait.aligned;
+       call void @llvm.nvvm.barrier.cluster.wait.aligned()
+       ret void
+}
 
 declare i1 @llvm.nvvm.isspacep.shared.cluster(ptr %p);
 declare ptr @llvm.nvvm.mapa(ptr %p, i32 %r);
@@ -153,4 +163,7 @@ declare i1 @llvm.nvvm.is_explicit_cluster()
 declare void @llvm.nvvm.barrier.cluster.arrive()
 declare void @llvm.nvvm.barrier.cluster.arrive.relaxed()
 declare void @llvm.nvvm.barrier.cluster.wait()
+declare void @llvm.nvvm.barrier.cluster.arrive.aligned()
+declare void @llvm.nvvm.barrier.cluster.arrive.relaxed.aligned()
+declare void @llvm.nvvm.barrier.cluster.wait.aligned()
 declare void @llvm.nvvm.fence.sc.cluster()



More information about the llvm-commits mailing list