[llvm] 9bc26e9 - [NVPTX] Support !"cluster_dim_{x, y, z}" metadata (#109548)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 25 16:49:06 PDT 2024
Author: Alex MacLean
Date: 2024-09-25T16:49:02-07:00
New Revision: 9bc26e9e8eb7b63c6a96f93e5644e0511bf3f735
URL: https://github.com/llvm/llvm-project/commit/9bc26e9e8eb7b63c6a96f93e5644e0511bf3f735
DIFF: https://github.com/llvm/llvm-project/commit/9bc26e9e8eb7b63c6a96f93e5644e0511bf3f735.diff
LOG: [NVPTX] Support !"cluster_dim_{x,y,z}" metadata (#109548)
Add support for !"cluster_dim_{x,y,z}" metadata to allow specifying
cluster dimensions on a kernel function in llvm.
If any of these metadata entries are present, the `.explicitcluster` PTX
directive is used and the specified dimensions are lowered with the
`.reqnctapercluster` directive. For more details see:
[PTX ISA: 11.7. Cluster Dimension Directives]
(https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#cluster-dimension-directives)
Added:
llvm/test/CodeGen/NVPTX/cluster-dim.ll
Modified:
llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
llvm/lib/Target/NVPTX/NVPTXUtilities.h
Removed:
################################################################################
diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 9bcc911b6c3451..fd69e483ae2007 100644
--- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -573,9 +573,30 @@ void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
// filter it out for lower SM versions, as it causes a hard ptxas crash.
const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
const auto *STI = static_cast<const NVPTXSubtarget *>(NTM.getSubtargetImpl());
- if (STI->getSmVersion() >= 90)
+
+ if (STI->getSmVersion() >= 90) {
+ std::optional<unsigned> ClusterX = getClusterDimx(F);
+ std::optional<unsigned> ClusterY = getClusterDimy(F);
+ std::optional<unsigned> ClusterZ = getClusterDimz(F);
+
+ if (ClusterX || ClusterY || ClusterZ) {
+ O << ".explicitcluster\n";
+ if (ClusterX.value_or(1) != 0) {
+ assert(ClusterY.value_or(1) && ClusterZ.value_or(1) &&
+ "cluster_dim_x != 0 implies cluster_dim_y and cluster_dim_z "
+ "should be non-zero as well");
+
+ O << ".reqnctapercluster " << ClusterX.value_or(1) << ", "
+ << ClusterY.value_or(1) << ", " << ClusterZ.value_or(1) << "\n";
+ } else {
+ assert(!ClusterY.value_or(1) && !ClusterZ.value_or(1) &&
+ "cluster_dim_x == 0 implies cluster_dim_y and cluster_dim_z "
+ "should be 0 as well");
+ }
+ }
if (const auto Maxclusterrank = getMaxClusterRank(F))
O << ".maxclusterrank " << *Maxclusterrank << "\n";
+ }
}
std::string NVPTXAsmPrinter::getVirtualRegisterName(unsigned Reg) const {
diff --git a/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp b/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
index be1c87d07f4ded..2d62f34726e88c 100644
--- a/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
@@ -272,6 +272,18 @@ std::optional<unsigned> getMaxNTID(const Function &F) {
return std::nullopt;
}
+std::optional<unsigned> getClusterDimx(const Function &F) {
+ return findOneNVVMAnnotation(&F, "cluster_dim_x");
+}
+
+std::optional<unsigned> getClusterDimy(const Function &F) {
+ return findOneNVVMAnnotation(&F, "cluster_dim_y");
+}
+
+std::optional<unsigned> getClusterDimz(const Function &F) {
+ return findOneNVVMAnnotation(&F, "cluster_dim_z");
+}
+
std::optional<unsigned> getMaxClusterRank(const Function &F) {
return findOneNVVMAnnotation(&F, "maxclusterrank");
}
diff --git a/llvm/lib/Target/NVPTX/NVPTXUtilities.h b/llvm/lib/Target/NVPTX/NVPTXUtilities.h
index cf15dff85cbde0..36fc0e49153531 100644
--- a/llvm/lib/Target/NVPTX/NVPTXUtilities.h
+++ b/llvm/lib/Target/NVPTX/NVPTXUtilities.h
@@ -55,6 +55,10 @@ std::optional<unsigned> getReqNTIDy(const Function &);
std::optional<unsigned> getReqNTIDz(const Function &);
std::optional<unsigned> getReqNTID(const Function &);
+std::optional<unsigned> getClusterDimx(const Function &);
+std::optional<unsigned> getClusterDimy(const Function &);
+std::optional<unsigned> getClusterDimz(const Function &);
+
std::optional<unsigned> getMaxClusterRank(const Function &);
std::optional<unsigned> getMinCTASm(const Function &);
std::optional<unsigned> getMaxNReg(const Function &);
diff --git a/llvm/test/CodeGen/NVPTX/cluster-dim.ll b/llvm/test/CodeGen/NVPTX/cluster-dim.ll
new file mode 100644
index 00000000000000..42dff68fa6594d
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/cluster-dim.ll
@@ -0,0 +1,29 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_80 | FileCheck -check-prefixes=CHECK80 %s
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_90 | FileCheck -check-prefixes=CHECK90 %s
+; RUN: %if ptxas-12.0 %{ llc < %s -march=nvptx64 -mcpu=sm_90 | %ptxas-verify -arch=sm_90 %}
+
+define void @kernel_func_clusterxyz() {
+; CHECK80-LABEL: kernel_func_clusterxyz(
+; CHECK80: {
+; CHECK80-EMPTY:
+; CHECK80-EMPTY:
+; CHECK80-NEXT: // %bb.0:
+; CHECK80-NEXT: ret;
+;
+; CHECK90-LABEL: kernel_func_clusterxyz(
+; CHECK90: .explicitcluster
+; CHECK90-NEXT: .reqnctapercluster 3, 5, 7
+; CHECK90-NEXT: {
+; CHECK90-EMPTY:
+; CHECK90-EMPTY:
+; CHECK90-NEXT: // %bb.0:
+; CHECK90-NEXT: ret;
+ ret void
+}
+
+
+!nvvm.annotations = !{!1, !2}
+
+!1 = !{ptr @kernel_func_clusterxyz, !"kernel", i32 1}
+!2 = !{ptr @kernel_func_clusterxyz, !"cluster_dim_x", i32 3, !"cluster_dim_y", i32 5, !"cluster_dim_z", i32 7}
More information about the llvm-commits
mailing list