[llvm] [NVPTX] Support !"cluster_dim_{x,y,z}" metadata (PR #109548)

Alex MacLean via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 25 11:32:17 PDT 2024


https://github.com/AlexMaclean updated https://github.com/llvm/llvm-project/pull/109548

>From 6e896ae1b899f53062c5f97cd8a5b3fb0d5635c7 Mon Sep 17 00:00:00 2001
From: Alex MacLean <amaclean at nvidia.com>
Date: Wed, 18 Sep 2024 01:50:00 +0000
Subject: [PATCH 1/3] [NVPTX] Support !"cluster_dim_{x,y,z}" metadata

---
 llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp | 23 ++++++++++++++++++++++-
 llvm/lib/Target/NVPTX/NVPTXUtilities.cpp  | 12 ++++++++++++
 llvm/lib/Target/NVPTX/NVPTXUtilities.h    |  4 ++++
 llvm/test/CodeGen/NVPTX/cluster-dim.ll    | 17 +++++++++++++++++
 4 files changed, 55 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/NVPTX/cluster-dim.ll

diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 9bcc911b6c3451..0f07f2ad97e0d4 100644
--- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -573,9 +573,30 @@ void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
   // filter it out for lower SM versions, as it causes a hard ptxas crash.
   const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
   const auto *STI = static_cast<const NVPTXSubtarget *>(NTM.getSubtargetImpl());
-  if (STI->getSmVersion() >= 90)
+
+  if (STI->getSmVersion() >= 90) {
+    std::optional<unsigned> ClusterX = getClusterDimx(F);
+    std::optional<unsigned> ClusterY = getClusterDimy(F);
+    std::optional<unsigned> ClusterZ = getClusterDimz(F);
+
+    if (ClusterX || ClusterY || ClusterZ) {
+      O << ".explicitcluster\n";
+      if (ClusterX.value_or(1) != 0) {
+        assert(ClusterY.value_or(1) && ClusterZ.value_or(1) &&
+               "clusterx != 0 implies clustery and clusterz should be non-zero "
+               "as well");
+
+        O << ".reqnctapercluster " << ClusterX.value_or(1) << ", "
+          << ClusterY.value_or(1) << ", " << ClusterZ.value_or(1) << "\n";
+      } else {
+        assert(
+            !ClusterY.value_or(1) && !ClusterZ.value_or(1) &&
+            "clusterx == 0 implies clustery and clusterz should be 0 as well");
+      }
+    }
     if (const auto Maxclusterrank = getMaxClusterRank(F))
       O << ".maxclusterrank " << *Maxclusterrank << "\n";
+  }
 }
 
 std::string NVPTXAsmPrinter::getVirtualRegisterName(unsigned Reg) const {
diff --git a/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp b/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
index be1c87d07f4ded..2d62f34726e88c 100644
--- a/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
@@ -272,6 +272,18 @@ std::optional<unsigned> getMaxNTID(const Function &F) {
   return std::nullopt;
 }
 
+std::optional<unsigned> getClusterDimx(const Function &F) {
+  return findOneNVVMAnnotation(&F, "cluster_dim_x");
+}
+
+std::optional<unsigned> getClusterDimy(const Function &F) {
+  return findOneNVVMAnnotation(&F, "cluster_dim_y");
+}
+
+std::optional<unsigned> getClusterDimz(const Function &F) {
+  return findOneNVVMAnnotation(&F, "cluster_dim_z");
+}
+
 std::optional<unsigned> getMaxClusterRank(const Function &F) {
   return findOneNVVMAnnotation(&F, "maxclusterrank");
 }
diff --git a/llvm/lib/Target/NVPTX/NVPTXUtilities.h b/llvm/lib/Target/NVPTX/NVPTXUtilities.h
index cf15dff85cbde0..36fc0e49153531 100644
--- a/llvm/lib/Target/NVPTX/NVPTXUtilities.h
+++ b/llvm/lib/Target/NVPTX/NVPTXUtilities.h
@@ -55,6 +55,10 @@ std::optional<unsigned> getReqNTIDy(const Function &);
 std::optional<unsigned> getReqNTIDz(const Function &);
 std::optional<unsigned> getReqNTID(const Function &);
 
+std::optional<unsigned> getClusterDimx(const Function &);
+std::optional<unsigned> getClusterDimy(const Function &);
+std::optional<unsigned> getClusterDimz(const Function &);
+
 std::optional<unsigned> getMaxClusterRank(const Function &);
 std::optional<unsigned> getMinCTASm(const Function &);
 std::optional<unsigned> getMaxNReg(const Function &);
diff --git a/llvm/test/CodeGen/NVPTX/cluster-dim.ll b/llvm/test/CodeGen/NVPTX/cluster-dim.ll
new file mode 100644
index 00000000000000..109c9891417c57
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/cluster-dim.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_90 | FileCheck %s
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_90 | FileCheck %s
+; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_90 | %ptxas-verify %}
+; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_90 | %ptxas-verify %}
+
+; CHECK-LABEL: .entry kernel_func_clusterxyz
+define void @kernel_func_clusterxyz() {
+; CHECK: .explicitcluster
+; CHECK: .reqnctapercluster 3, 5, 7
+  ret void
+}
+
+
+!nvvm.annotations = !{!1, !2}
+
+!1 = !{ptr @kernel_func_clusterxyz, !"kernel", i32 1}
+!2 = !{ptr @kernel_func_clusterxyz, !"cluster_dim_x", i32 3, !"cluster_dim_y", i32 5, !"cluster_dim_z", i32 7}

>From b3994ae8f2e78f1766116938a2ff438b59715fb1 Mon Sep 17 00:00:00 2001
From: Alex MacLean <amaclean at nvidia.com>
Date: Mon, 23 Sep 2024 15:41:01 +0000
Subject: [PATCH 2/3] address comments

---
 llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp | 10 +++++-----
 llvm/test/CodeGen/NVPTX/cluster-dim.ll    |  4 +---
 2 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 0f07f2ad97e0d4..fd69e483ae2007 100644
--- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -583,15 +583,15 @@ void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
       O << ".explicitcluster\n";
       if (ClusterX.value_or(1) != 0) {
         assert(ClusterY.value_or(1) && ClusterZ.value_or(1) &&
-               "clusterx != 0 implies clustery and clusterz should be non-zero "
-               "as well");
+               "cluster_dim_x != 0 implies cluster_dim_y and cluster_dim_z "
+               "should be non-zero as well");
 
         O << ".reqnctapercluster " << ClusterX.value_or(1) << ", "
           << ClusterY.value_or(1) << ", " << ClusterZ.value_or(1) << "\n";
       } else {
-        assert(
-            !ClusterY.value_or(1) && !ClusterZ.value_or(1) &&
-            "clusterx == 0 implies clustery and clusterz should be 0 as well");
+        assert(!ClusterY.value_or(1) && !ClusterZ.value_or(1) &&
+               "cluster_dim_x == 0 implies cluster_dim_y and cluster_dim_z "
+               "should be 0 as well");
       }
     }
     if (const auto Maxclusterrank = getMaxClusterRank(F))
diff --git a/llvm/test/CodeGen/NVPTX/cluster-dim.ll b/llvm/test/CodeGen/NVPTX/cluster-dim.ll
index 109c9891417c57..9c11a10acccb14 100644
--- a/llvm/test/CodeGen/NVPTX/cluster-dim.ll
+++ b/llvm/test/CodeGen/NVPTX/cluster-dim.ll
@@ -1,7 +1,5 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_90 | FileCheck %s
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_90 | FileCheck %s
-; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_90 | %ptxas-verify %}
-; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_90 | %ptxas-verify %}
+; RUN: %if ptxas-12.0 %{ llc < %s -march=nvptx64 -mcpu=sm_90 | %ptxas-verify -arch=sm_90 %}
 
 ; CHECK-LABEL: .entry kernel_func_clusterxyz
 define void @kernel_func_clusterxyz() {

>From e65d9c154af80ca525a7b447b4880631089cacaf Mon Sep 17 00:00:00 2001
From: Alex MacLean <amaclean at nvidia.com>
Date: Wed, 25 Sep 2024 18:31:44 +0000
Subject: [PATCH 3/3] address comments

---
 llvm/test/CodeGen/NVPTX/cluster-dim.ll | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/llvm/test/CodeGen/NVPTX/cluster-dim.ll b/llvm/test/CodeGen/NVPTX/cluster-dim.ll
index 9c11a10acccb14..42dff68fa6594d 100644
--- a/llvm/test/CodeGen/NVPTX/cluster-dim.ll
+++ b/llvm/test/CodeGen/NVPTX/cluster-dim.ll
@@ -1,10 +1,24 @@
-; RUN: llc < %s -march=nvptx64 -mcpu=sm_90 | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_80 | FileCheck -check-prefixes=CHECK80 %s
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_90 | FileCheck -check-prefixes=CHECK90 %s
 ; RUN: %if ptxas-12.0 %{ llc < %s -march=nvptx64 -mcpu=sm_90 | %ptxas-verify -arch=sm_90 %}
 
-; CHECK-LABEL: .entry kernel_func_clusterxyz
 define void @kernel_func_clusterxyz() {
-; CHECK: .explicitcluster
-; CHECK: .reqnctapercluster 3, 5, 7
+; CHECK80-LABEL: kernel_func_clusterxyz(
+; CHECK80:       {
+; CHECK80-EMPTY:
+; CHECK80-EMPTY:
+; CHECK80-NEXT:  // %bb.0:
+; CHECK80-NEXT:    ret;
+;
+; CHECK90-LABEL: kernel_func_clusterxyz(
+; CHECK90:       .explicitcluster
+; CHECK90-NEXT:  .reqnctapercluster 3, 5, 7
+; CHECK90-NEXT:  {
+; CHECK90-EMPTY:
+; CHECK90-EMPTY:
+; CHECK90-NEXT:  // %bb.0:
+; CHECK90-NEXT:    ret;
   ret void
 }
 



More information about the llvm-commits mailing list