[llvm] [AMDGPU] Fix resource usage information for unnamed functions (PR #115320)
Janek van Oirschot via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 7 05:54:43 PST 2024
https://github.com/JanekvO created https://github.com/llvm/llvm-project/pull/115320
Resource usage information would try to overwrite unnamed functions if there are multiple within the same compilation unit. This aims to either use the `MCSymbol` assigned to the unnamed function (i.e., `CurrentFnSym`), or, rematerialize the `MCSymbol` for the unnamed function.
>From 37aaabf99fe4bc1317c813ce5b22475deababb8c Mon Sep 17 00:00:00 2001
From: Janek van Oirschot <janek.vanoirschot at amd.com>
Date: Wed, 6 Nov 2024 19:42:12 +0000
Subject: [PATCH] [AMDGPU] Fix resource usage information for unnamed functions
---
llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 51 +++++++++++--------
.../Target/AMDGPU/AMDGPUMCResourceInfo.cpp | 36 +++++++++----
.../AMDGPU/unnamed-function-resource-info.ll | 48 +++++++++++++++++
3 files changed, 102 insertions(+), 33 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/unnamed-function-resource-info.ll
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 93b6ba0595b70f..5ebe4a069569c2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -368,6 +368,7 @@ void AMDGPUAsmPrinter::validateMCResourceInfo(Function &F) {
using RIK = MCResourceInfo::ResourceInfoKind;
const GCNSubtarget &STM = TM.getSubtarget<GCNSubtarget>(F);
+ MCSymbol *FnSym = TM.getSymbol(&F);
auto TryGetMCExprValue = [](const MCExpr *Value, uint64_t &Res) -> bool {
int64_t Val;
@@ -381,7 +382,7 @@ void AMDGPUAsmPrinter::validateMCResourceInfo(Function &F) {
const uint64_t MaxScratchPerWorkitem =
STM.getMaxWaveScratchSize() / STM.getWavefrontSize();
MCSymbol *ScratchSizeSymbol =
- RI.getSymbol(F.getName(), RIK::RIK_PrivateSegSize, OutContext);
+ RI.getSymbol(FnSym->getName(), RIK::RIK_PrivateSegSize, OutContext);
uint64_t ScratchSize;
if (ScratchSizeSymbol->isVariable() &&
TryGetMCExprValue(ScratchSizeSymbol->getVariableValue(), ScratchSize) &&
@@ -394,7 +395,7 @@ void AMDGPUAsmPrinter::validateMCResourceInfo(Function &F) {
// Validate addressable scalar registers (i.e., prior to added implicit
// SGPRs).
MCSymbol *NumSGPRSymbol =
- RI.getSymbol(F.getName(), RIK::RIK_NumSGPR, OutContext);
+ RI.getSymbol(FnSym->getName(), RIK::RIK_NumSGPR, OutContext);
if (STM.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS &&
!STM.hasSGPRInitBug()) {
unsigned MaxAddressableNumSGPRs = STM.getAddressableNumSGPRs();
@@ -411,9 +412,9 @@ void AMDGPUAsmPrinter::validateMCResourceInfo(Function &F) {
}
MCSymbol *VCCUsedSymbol =
- RI.getSymbol(F.getName(), RIK::RIK_UsesVCC, OutContext);
+ RI.getSymbol(FnSym->getName(), RIK::RIK_UsesVCC, OutContext);
MCSymbol *FlatUsedSymbol =
- RI.getSymbol(F.getName(), RIK::RIK_UsesFlatScratch, OutContext);
+ RI.getSymbol(FnSym->getName(), RIK::RIK_UsesFlatScratch, OutContext);
uint64_t VCCUsed, FlatUsed, NumSgpr;
if (NumSGPRSymbol->isVariable() && VCCUsedSymbol->isVariable() &&
@@ -440,9 +441,9 @@ void AMDGPUAsmPrinter::validateMCResourceInfo(Function &F) {
}
MCSymbol *NumVgprSymbol =
- RI.getSymbol(F.getName(), RIK::RIK_NumVGPR, OutContext);
+ RI.getSymbol(FnSym->getName(), RIK::RIK_NumVGPR, OutContext);
MCSymbol *NumAgprSymbol =
- RI.getSymbol(F.getName(), RIK::RIK_NumAGPR, OutContext);
+ RI.getSymbol(FnSym->getName(), RIK::RIK_NumAGPR, OutContext);
uint64_t NumVgpr, NumAgpr;
MachineModuleInfo &MMI =
@@ -705,15 +706,20 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
{
using RIK = MCResourceInfo::ResourceInfoKind;
getTargetStreamer()->EmitMCResourceInfo(
- RI.getSymbol(MF.getName(), RIK::RIK_NumVGPR, OutContext),
- RI.getSymbol(MF.getName(), RIK::RIK_NumAGPR, OutContext),
- RI.getSymbol(MF.getName(), RIK::RIK_NumSGPR, OutContext),
- RI.getSymbol(MF.getName(), RIK::RIK_PrivateSegSize, OutContext),
- RI.getSymbol(MF.getName(), RIK::RIK_UsesVCC, OutContext),
- RI.getSymbol(MF.getName(), RIK::RIK_UsesFlatScratch, OutContext),
- RI.getSymbol(MF.getName(), RIK::RIK_HasDynSizedStack, OutContext),
- RI.getSymbol(MF.getName(), RIK::RIK_HasRecursion, OutContext),
- RI.getSymbol(MF.getName(), RIK::RIK_HasIndirectCall, OutContext));
+ RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_NumVGPR, OutContext),
+ RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_NumAGPR, OutContext),
+ RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_NumSGPR, OutContext),
+ RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_PrivateSegSize,
+ OutContext),
+ RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_UsesVCC, OutContext),
+ RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_UsesFlatScratch,
+ OutContext),
+ RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_HasDynSizedStack,
+ OutContext),
+ RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_HasRecursion,
+ OutContext),
+ RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_HasIndirectCall,
+ OutContext));
}
if (isVerbose()) {
@@ -726,18 +732,19 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
OutStreamer->emitRawComment(" Function info:", false);
emitCommonFunctionComments(
- RI.getSymbol(MF.getName(), RIK::RIK_NumVGPR, OutContext)
+ RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_NumVGPR, OutContext)
->getVariableValue(),
- STM.hasMAIInsts()
- ? RI.getSymbol(MF.getName(), RIK::RIK_NumAGPR, OutContext)
- ->getVariableValue()
- : nullptr,
+ STM.hasMAIInsts() ? RI.getSymbol(CurrentFnSym->getName(),
+ RIK::RIK_NumAGPR, OutContext)
+ ->getVariableValue()
+ : nullptr,
RI.createTotalNumVGPRs(MF, Ctx),
RI.createTotalNumSGPRs(
MF,
MF.getSubtarget<GCNSubtarget>().getTargetID().isXnackOnOrAny(),
Ctx),
- RI.getSymbol(MF.getName(), RIK::RIK_PrivateSegSize, OutContext)
+ RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_PrivateSegSize,
+ OutContext)
->getVariableValue(),
getFunctionCodeSize(MF), MFI);
return false;
@@ -943,7 +950,7 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
auto GetSymRefExpr =
[&](MCResourceInfo::ResourceInfoKind RIK) -> const MCExpr * {
- MCSymbol *Sym = RI.getSymbol(MF.getName(), RIK, OutContext);
+ MCSymbol *Sym = RI.getSymbol(CurrentFnSym->getName(), RIK, OutContext);
return MCSymbolRefExpr::create(Sym, Ctx);
};
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp
index da0397fa20bd1b..85b359f18816a5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp
@@ -18,6 +18,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/TargetMachine.h"
using namespace llvm;
@@ -95,6 +96,8 @@ void MCResourceInfo::assignResourceInfoExpr(
int64_t LocalValue, ResourceInfoKind RIK, AMDGPUMCExpr::VariantKind Kind,
const MachineFunction &MF, const SmallVectorImpl<const Function *> &Callees,
MCContext &OutContext) {
+ const LLVMTargetMachine &TM = MF.getTarget();
+ MCSymbol *FnSym = TM.getSymbol(&MF.getFunction());
const MCConstantExpr *LocalConstExpr =
MCConstantExpr::create(LocalValue, OutContext);
const MCExpr *SymVal = LocalConstExpr;
@@ -109,12 +112,14 @@ void MCResourceInfo::assignResourceInfoExpr(
for (const Function *Callee : Callees) {
if (!Seen.insert(Callee).second)
continue;
- MCSymbol *CalleeValSym = getSymbol(Callee->getName(), RIK, OutContext);
+ MCSymbol *CalleeFnSym = TM.getSymbol(&Callee->getFunction());
+ MCSymbol *CalleeValSym =
+ getSymbol(CalleeFnSym->getName(), RIK, OutContext);
ArgExprs.push_back(MCSymbolRefExpr::create(CalleeValSym, OutContext));
}
SymVal = AMDGPUMCExpr::create(Kind, ArgExprs, OutContext);
}
- MCSymbol *Sym = getSymbol(MF.getName(), RIK, OutContext);
+ MCSymbol *Sym = getSymbol(FnSym->getName(), RIK, OutContext);
Sym->setVariableValue(SymVal);
}
@@ -133,6 +138,9 @@ void MCResourceInfo::gatherResourceInfo(
addMaxSGPRCandidate(FRI.NumExplicitSGPR);
}
+ const LLVMTargetMachine &TM = MF.getTarget();
+ MCSymbol *FnSym = TM.getSymbol(&MF.getFunction());
+
auto SetMaxReg = [&](MCSymbol *MaxSym, int32_t numRegs,
ResourceInfoKind RIK) {
if (!FRI.HasIndirectCall) {
@@ -140,7 +148,7 @@ void MCResourceInfo::gatherResourceInfo(
FRI.Callees, OutContext);
} else {
const MCExpr *SymRef = MCSymbolRefExpr::create(MaxSym, OutContext);
- MCSymbol *LocalNumSym = getSymbol(MF.getName(), RIK, OutContext);
+ MCSymbol *LocalNumSym = getSymbol(FnSym->getName(), RIK, OutContext);
const MCExpr *MaxWithLocal = AMDGPUMCExpr::createMax(
{MCConstantExpr::create(numRegs, OutContext), SymRef}, OutContext);
LocalNumSym->setVariableValue(MaxWithLocal);
@@ -165,8 +173,9 @@ void MCResourceInfo::gatherResourceInfo(
if (!Seen.insert(Callee).second)
continue;
if (!Callee->isDeclaration()) {
+ MCSymbol *CalleeFnSym = TM.getSymbol(&Callee->getFunction());
MCSymbol *calleeValSym =
- getSymbol(Callee->getName(), RIK_PrivateSegSize, OutContext);
+ getSymbol(CalleeFnSym->getName(), RIK_PrivateSegSize, OutContext);
ArgExprs.push_back(MCSymbolRefExpr::create(calleeValSym, OutContext));
}
}
@@ -178,12 +187,12 @@ void MCResourceInfo::gatherResourceInfo(
localConstExpr =
MCBinaryExpr::createAdd(localConstExpr, transitiveExpr, OutContext);
}
- getSymbol(MF.getName(), RIK_PrivateSegSize, OutContext)
+ getSymbol(FnSym->getName(), RIK_PrivateSegSize, OutContext)
->setVariableValue(localConstExpr);
}
auto SetToLocal = [&](int64_t LocalValue, ResourceInfoKind RIK) {
- MCSymbol *Sym = getSymbol(MF.getName(), RIK, OutContext);
+ MCSymbol *Sym = getSymbol(FnSym->getName(), RIK, OutContext);
Sym->setVariableValue(MCConstantExpr::create(LocalValue, OutContext));
};
@@ -213,18 +222,23 @@ void MCResourceInfo::gatherResourceInfo(
const MCExpr *MCResourceInfo::createTotalNumVGPRs(const MachineFunction &MF,
MCContext &Ctx) {
+ const LLVMTargetMachine &TM = MF.getTarget();
+ MCSymbol *FnSym = TM.getSymbol(&MF.getFunction());
return AMDGPUMCExpr::createTotalNumVGPR(
- getSymRefExpr(MF.getName(), RIK_NumAGPR, Ctx),
- getSymRefExpr(MF.getName(), RIK_NumVGPR, Ctx), Ctx);
+ getSymRefExpr(FnSym->getName(), RIK_NumAGPR, Ctx),
+ getSymRefExpr(FnSym->getName(), RIK_NumVGPR, Ctx), Ctx);
}
const MCExpr *MCResourceInfo::createTotalNumSGPRs(const MachineFunction &MF,
bool hasXnack,
MCContext &Ctx) {
+ const LLVMTargetMachine &TM = MF.getTarget();
+ MCSymbol *FnSym = TM.getSymbol(&MF.getFunction());
return MCBinaryExpr::createAdd(
- getSymRefExpr(MF.getName(), RIK_NumSGPR, Ctx),
+ getSymRefExpr(FnSym->getName(), RIK_NumSGPR, Ctx),
AMDGPUMCExpr::createExtraSGPRs(
- getSymRefExpr(MF.getName(), RIK_UsesVCC, Ctx),
- getSymRefExpr(MF.getName(), RIK_UsesFlatScratch, Ctx), hasXnack, Ctx),
+ getSymRefExpr(FnSym->getName(), RIK_UsesVCC, Ctx),
+ getSymRefExpr(FnSym->getName(), RIK_UsesFlatScratch, Ctx), hasXnack,
+ Ctx),
Ctx);
}
diff --git a/llvm/test/CodeGen/AMDGPU/unnamed-function-resource-info.ll b/llvm/test/CodeGen/AMDGPU/unnamed-function-resource-info.ll
new file mode 100644
index 00000000000000..c9fbd369e062d5
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/unnamed-function-resource-info.ll
@@ -0,0 +1,48 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 < %s | FileCheck %s
+
+; CHECK-LABEL: __unnamed_1:
+; CHECK: .set __unnamed_1.num_vgpr, 0
+; CHECK: .set __unnamed_1.num_agpr, 0
+; CHECK: .set __unnamed_1.numbered_sgpr, 32
+; CHECK: .set __unnamed_1.private_seg_size, 0
+; CHECK: .set __unnamed_1.uses_vcc, 0
+; CHECK: .set __unnamed_1.uses_flat_scratch, 0
+; CHECK: .set __unnamed_1.has_dyn_sized_stack, 0
+; CHECK: .set __unnamed_1.has_recursion, 0
+; CHECK: .set __unnamed_1.has_indirect_call, 0
+define void @1() {
+entry:
+ ret void
+}
+
+; CHECK-LABEL: __unnamed_2:
+; CHECK: .set __unnamed_2.num_vgpr, max(32, __unnamed_1.num_vgpr)
+; CHECK: .set __unnamed_2.num_agpr, max(0, __unnamed_1.num_agpr)
+; CHECK: .set __unnamed_2.numbered_sgpr, max(34, __unnamed_1.numbered_sgpr)
+; CHECK: .set __unnamed_2.private_seg_size, 16+(max(__unnamed_1.private_seg_size))
+; CHECK: .set __unnamed_2.uses_vcc, or(0, __unnamed_1.uses_vcc)
+; CHECK: .set __unnamed_2.uses_flat_scratch, or(0, __unnamed_1.uses_flat_scratch)
+; CHECK: .set __unnamed_2.has_dyn_sized_stack, or(0, __unnamed_1.has_dyn_sized_stack)
+; CHECK: .set __unnamed_2.has_recursion, or(1, __unnamed_1.has_recursion)
+; CHECK: .set __unnamed_2.has_indirect_call, or(0, __unnamed_1.has_indirect_call)
+define void @2() {
+entry:
+ call void @1()
+ ret void
+}
+
+; CHECK-LABEL: {{^}}use
+; CHECK: .set use.num_vgpr, max(32, __unnamed_1.num_vgpr, __unnamed_2.num_vgpr)
+; CHECK: .set use.num_agpr, max(0, __unnamed_1.num_agpr, __unnamed_2.num_agpr)
+; CHECK: .set use.numbered_sgpr, max(33, __unnamed_1.numbered_sgpr, __unnamed_2.numbered_sgpr)
+; CHECK: .set use.private_seg_size, 0+(max(__unnamed_1.private_seg_size, __unnamed_2.private_seg_size))
+; CHECK: .set use.uses_vcc, or(0, __unnamed_1.uses_vcc, __unnamed_2.uses_vcc)
+; CHECK: .set use.uses_flat_scratch, or(1, __unnamed_1.uses_flat_scratch, __unnamed_2.uses_flat_scratch)
+; CHECK: .set use.has_dyn_sized_stack, or(0, __unnamed_1.has_dyn_sized_stack, __unnamed_2.has_dyn_sized_stack)
+; CHECK: .set use.has_recursion, or(1, __unnamed_1.has_recursion, __unnamed_2.has_recursion)
+; CHECK: .set use.has_indirect_call, or(0, __unnamed_1.has_indirect_call, __unnamed_2.has_indirect_call)
+define amdgpu_kernel void @use() {
+ call void @1()
+ call void @2()
+ ret void
+}
More information about the llvm-commits
mailing list