[llvm] [NVPTX] Auto-Upgrade some nvvm.annotations to attributes (PR #119261)
Alex MacLean via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 27 16:52:42 PST 2025
https://github.com/AlexMaclean updated https://github.com/llvm/llvm-project/pull/119261
>From 6e1b7af045e1cca43d10f9359b72497aea3289f5 Mon Sep 17 00:00:00 2001
From: Alex Maclean <amaclean at nvidia.com>
Date: Tue, 28 Jan 2025 00:30:16 +0000
Subject: [PATCH 1/2] auto-update cc ptx_kernel
---
llvm/include/llvm/IR/AutoUpgrade.h | 4 ++
llvm/lib/AsmParser/LLParser.cpp | 1 +
llvm/lib/Bitcode/Reader/BitcodeReader.cpp | 2 +
llvm/lib/IR/AutoUpgrade.cpp | 67 +++++++++++++++++++
llvm/lib/Linker/IRMover.cpp | 1 +
llvm/lib/Target/NVPTX/NVPTXUtilities.cpp | 27 +++-----
llvm/lib/Target/NVPTX/NVPTXUtilities.h | 7 +-
.../CodeGen/NVPTX/upgrade-nvvm-annotations.ll | 28 ++++++++
8 files changed, 118 insertions(+), 19 deletions(-)
create mode 100644 llvm/test/CodeGen/NVPTX/upgrade-nvvm-annotations.ll
diff --git a/llvm/include/llvm/IR/AutoUpgrade.h b/llvm/include/llvm/IR/AutoUpgrade.h
index 97c3e4d7589d7b..8c093568a1e031 100644
--- a/llvm/include/llvm/IR/AutoUpgrade.h
+++ b/llvm/include/llvm/IR/AutoUpgrade.h
@@ -61,6 +61,10 @@ namespace llvm {
/// module is modified.
bool UpgradeModuleFlags(Module &M);
+ /// Convert legacy nvvm.annotations metadata to appropriate function
+ /// attributes.
+ void UpgradeNVVMAnnotations(Module &M);
+
/// Convert calls to ARC runtime functions to intrinsic calls and upgrade the
/// old retain release marker to new module flag format.
void UpgradeARCRuntime(Module &M);
diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index fa0079bac435c1..9936b4e9da5664 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -448,6 +448,7 @@ bool LLParser::validateEndOfModule(bool UpgradeDebugInfo) {
llvm::UpgradeDebugInfo(*M);
UpgradeModuleFlags(*M);
+ UpgradeNVVMAnnotations(*M);
UpgradeSectionAttributes(*M);
if (PreserveInputDbgFormat != cl::boolOrDefault::BOU_TRUE)
diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index 551dfd4af88bb2..e038b2dc5658ca 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -7148,6 +7148,8 @@ Error BitcodeReader::materializeModule() {
UpgradeModuleFlags(*TheModule);
+ UpgradeNVVMAnnotations(*TheModule);
+
UpgradeARCRuntime(*TheModule);
return Error::success();
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index 3725f412b8930d..e886a6012b219a 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -17,6 +17,7 @@
#include "llvm/ADT/StringSwitch.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/IR/AttributeMask.h"
+#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DebugInfoMetadata.h"
@@ -5019,6 +5020,72 @@ bool llvm::UpgradeDebugInfo(Module &M) {
return Modified;
}
+bool static upgradeSingleNVVMAnnotation(GlobalValue *GV, StringRef K,
+ const Metadata *V) {
+ if (K == "kernel") {
+ if (!mdconst::extract<ConstantInt>(V)->isZero())
+ cast<Function>(GV)->setCallingConv(CallingConv::PTX_Kernel);
+ return true;
+ }
+ if (K == "align") {
+ // V is a bitfeild specifying two 16-bit values. The alignment value is
+ // specfied in low 16-bits, The index is specified in the high bits. For the
+ // index, 0 indicates the return value while higher values correspond to
+ // each parameter (idx = param + 1).
+ const uint64_t AlignIdxValuePair =
+ mdconst::extract<ConstantInt>(V)->getZExtValue();
+ const unsigned Idx = (AlignIdxValuePair >> 16);
+ const Align StackAlign = Align(AlignIdxValuePair & 0xFFFF);
+ // TODO: Skip adding the stackalign attribute for returns, for now.
+ if (!Idx)
+ return false;
+ cast<Function>(GV)->addAttributeAtIndex(
+ Idx, Attribute::getWithStackAlignment(GV->getContext(), StackAlign));
+ return true;
+ }
+
+ return false;
+}
+
+void llvm::UpgradeNVVMAnnotations(Module &M) {
+ NamedMDNode *NamedMD = M.getNamedMetadata("nvvm.annotations");
+ if (!NamedMD)
+ return;
+
+ SmallVector<MDNode *, 8> NewNodes;
+ SmallSet<const MDNode *, 8> SeenNodes;
+ for (MDNode *MD : NamedMD->operands()) {
+ if (!SeenNodes.insert(MD).second)
+ continue;
+
+ auto *GV = mdconst::dyn_extract_or_null<GlobalValue>(MD->getOperand(0));
+ if (!GV)
+ continue;
+
+ assert((MD->getNumOperands() % 2) == 1 && "Invalid number of operands");
+
+ SmallVector<Metadata *, 8> NewOperands{MD->getOperand(0)};
+ // Each nvvm.annotations metadata entry will be of the following form:
+ // !{ ptr @gv, !"key1", value1, !"key2", value2, ... }
+ // start index = 1, to skip the global variable key
+ // increment = 2, to skip the value for each property-value pairs
+ for (unsigned j = 1, je = MD->getNumOperands(); j < je; j += 2) {
+ MDString *K = cast<MDString>(MD->getOperand(j));
+ const MDOperand &V = MD->getOperand(j + 1);
+ bool Upgraded = upgradeSingleNVVMAnnotation(GV, K->getString(), V);
+ if (!Upgraded)
+ NewOperands.append({K, V});
+ }
+
+ if (NewOperands.size() > 1)
+ NewNodes.push_back(MDNode::get(M.getContext(), NewOperands));
+ }
+
+ NamedMD->clearOperands();
+ for (MDNode *N : NewNodes)
+ NamedMD->addOperand(N);
+}
+
/// This checks for objc retain release marker which should be upgraded. It
/// returns true if module is modified.
static bool upgradeRetainReleaseMarker(Module &M) {
diff --git a/llvm/lib/Linker/IRMover.cpp b/llvm/lib/Linker/IRMover.cpp
index 43fcfe75ba46b2..62e2af4da57bbf 100644
--- a/llvm/lib/Linker/IRMover.cpp
+++ b/llvm/lib/Linker/IRMover.cpp
@@ -1244,6 +1244,7 @@ Error IRLinker::linkModuleFlagsMetadata() {
// Check for module flag for updates before do anything.
UpgradeModuleFlags(*SrcM);
+ UpgradeNVVMAnnotations(*SrcM);
// If the destination module doesn't have module flags yet, then just copy
// over the source module's flags.
diff --git a/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp b/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
index 0f2bec711b249d..a41943880807c5 100644
--- a/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
@@ -310,30 +310,21 @@ std::optional<unsigned> getMaxNReg(const Function &F) {
return findOneNVVMAnnotation(&F, "maxnreg");
}
-bool isKernelFunction(const Function &F) {
- if (F.getCallingConv() == CallingConv::PTX_Kernel)
- return true;
-
- if (const auto X = findOneNVVMAnnotation(&F, "kernel"))
- return (*X == 1);
-
- return false;
-}
-
MaybeAlign getAlign(const Function &F, unsigned Index) {
// First check the alignstack metadata
if (MaybeAlign StackAlign =
F.getAttributes().getAttributes(Index).getStackAlignment())
return StackAlign;
- // If that is missing, check the legacy nvvm metadata
- std::vector<unsigned> Vs;
- bool retval = findAllNVVMAnnotation(&F, "align", Vs);
- if (!retval)
- return std::nullopt;
- for (unsigned V : Vs)
- if ((V >> 16) == Index)
- return Align(V & 0xFFFF);
+ // check the legacy nvvm metadata only for the return value since llvm does
+ // not support stackalign attribute for this.
+ if (Index == 0) {
+ std::vector<unsigned> Vs;
+ if (findAllNVVMAnnotation(&F, "align", Vs))
+ for (unsigned V : Vs)
+ if ((V >> 16) == Index)
+ return Align(V & 0xFFFF);
+ }
return std::nullopt;
}
diff --git a/llvm/lib/Target/NVPTX/NVPTXUtilities.h b/llvm/lib/Target/NVPTX/NVPTXUtilities.h
index 7ce00b9b5688d1..cf35eaf4cbae53 100644
--- a/llvm/lib/Target/NVPTX/NVPTXUtilities.h
+++ b/llvm/lib/Target/NVPTX/NVPTXUtilities.h
@@ -16,6 +16,7 @@
#include "NVPTX.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IntrinsicInst.h"
@@ -63,7 +64,11 @@ std::optional<unsigned> getClusterDimz(const Function &);
std::optional<unsigned> getMaxClusterRank(const Function &);
std::optional<unsigned> getMinCTASm(const Function &);
std::optional<unsigned> getMaxNReg(const Function &);
-bool isKernelFunction(const Function &);
+
+inline bool isKernelFunction(const Function &F) {
+ return F.getCallingConv() == CallingConv::PTX_Kernel;
+}
+
bool isParamGridConstant(const Value &);
MaybeAlign getAlign(const Function &, unsigned);
diff --git a/llvm/test/CodeGen/NVPTX/upgrade-nvvm-annotations.ll b/llvm/test/CodeGen/NVPTX/upgrade-nvvm-annotations.ll
new file mode 100644
index 00000000000000..a9f370a12a945a
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/upgrade-nvvm-annotations.ll
@@ -0,0 +1,28 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 5
+; RUN: opt < %s -mtriple=nvptx64-unknown-unknown -O0 -S | FileCheck %s
+
+define i32 @foo(i32 %a, i32 %b) {
+; CHECK-LABEL: define i32 @foo(
+; CHECK-SAME: i32 alignstack(8) [[A:%.*]], i32 alignstack(16) [[B:%.*]]) {
+; CHECK-NEXT: ret i32 0
+;
+ ret i32 0
+}
+
+define i32 @bar(i32 %a, i32 %b) {
+; CHECK-LABEL: define ptx_kernel i32 @bar(
+; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]]) {
+; CHECK-NEXT: ret i32 0
+;
+ ret i32 0
+}
+
+!nvvm.annotations = !{!0, !1, !2}
+
+!0 = !{ptr @foo, !"align", i32 u0x00000008, !"align", i32 u0x00010008, !"align", i32 u0x00020010}
+!1 = !{null, !"align", i32 u0x00000008, !"align", i32 u0x00010008, !"align", i32 u0x00020008}
+!2 = !{ptr @bar, !"kernel", i32 1}
+
+;.
+; CHECK: [[META0:![0-9]+]] = !{ptr @foo, !"align", i32 8}
+;.
>From 885d162bea406d41b92cad94b63b00a13a2a1de4 Mon Sep 17 00:00:00 2001
From: Alex Maclean <amaclean at nvidia.com>
Date: Tue, 28 Jan 2025 00:52:28 +0000
Subject: [PATCH 2/2] remove dead omp code
---
llvm/lib/Transforms/IPO/OpenMPOpt.cpp | 28 ++++-----------------------
1 file changed, 4 insertions(+), 24 deletions(-)
diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index 10008130016c3b..33d9f660b48d5f 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -5917,39 +5917,19 @@ static bool isKernelCC(Function &F) {
}
KernelSet llvm::omp::getDeviceKernels(Module &M) {
- // TODO: Create a more cross-platform way of determining device kernels.
KernelSet Kernels;
- DenseSet<const Function *> SeenKernels;
- auto ProcessKernel = [&](Function &KF) {
- if (SeenKernels.insert(&KF).second) {
+ for (Function &F : M)
+ if (isKernelCC(F)) {
// We are only interested in OpenMP target regions. Others, such as
// kernels generated by CUDA but linked together, are not interesting to
// this pass.
- if (isOpenMPKernel(KF)) {
+ if (isOpenMPKernel(F)) {
++NumOpenMPTargetRegionKernels;
- Kernels.insert(&KF);
+ Kernels.insert(&F);
} else
++NumNonOpenMPTargetRegionKernels;
}
- };
-
- if (NamedMDNode *MD = M.getNamedMetadata("nvvm.annotations"))
- for (auto *Op : MD->operands()) {
- if (Op->getNumOperands() < 2)
- continue;
- MDString *KindID = dyn_cast<MDString>(Op->getOperand(1));
- if (!KindID || KindID->getString() != "kernel")
- continue;
-
- if (auto *KernelFn =
- mdconst::dyn_extract_or_null<Function>(Op->getOperand(0)))
- ProcessKernel(*KernelFn);
- }
-
- for (Function &F : M)
- if (isKernelCC(F))
- ProcessKernel(F);
return Kernels;
}
More information about the llvm-commits
mailing list