[llvm] bd9145c - Reapply [AMDGPU] Avoid resource propagation for recursion through multiple functions (#112251)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 15 10:40:10 PST 2024
Author: Janek van Oirschot
Date: 2024-11-15T18:40:05Z
New Revision: bd9145c8c21334e099d51b3e66f49d51d24931ee
URL: https://github.com/llvm/llvm-project/commit/bd9145c8c21334e099d51b3e66f49d51d24931ee
DIFF: https://github.com/llvm/llvm-project/commit/bd9145c8c21334e099d51b3e66f49d51d24931ee.diff
LOG: Reapply [AMDGPU] Avoid resource propagation for recursion through multiple functions (#112251)
I was wrong last patch. I viewed the `Visited` set purely as a possible
recursion deterrent where functions calling a callee multiple times are
handled elsewhere. This doesn't consider cases where a function is
called multiple times by different callers still part of the same call
graph. New test shows the aforementioned case.
Reapplies #111004, fixes #115562.
Added:
llvm/test/CodeGen/AMDGPU/multi-call-resource-usage-mcexpr.ll
llvm/test/CodeGen/AMDGPU/recursive-resource-usage-mcexpr.ll
Modified:
llvm/include/llvm/MC/MCExpr.h
llvm/lib/MC/MCExpr.cpp
llvm/lib/MC/MCParser/AsmParser.cpp
llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp
llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.cpp
llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.h
llvm/test/CodeGen/AMDGPU/function-resource-usage.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/MC/MCExpr.h b/llvm/include/llvm/MC/MCExpr.h
index 10bc6ebd6fe506..ece51ebecdd9c1 100644
--- a/llvm/include/llvm/MC/MCExpr.h
+++ b/llvm/include/llvm/MC/MCExpr.h
@@ -86,6 +86,10 @@ class MCExpr {
bool InParens = false) const;
void dump() const;
+ /// Returns whether the given symbol is used anywhere in the expression or
+ /// subexpressions.
+ bool isSymbolUsedInExpression(const MCSymbol *Sym) const;
+
/// @}
/// \name Expression Evaluation
/// @{
@@ -663,6 +667,9 @@ class MCTargetExpr : public MCExpr {
const MCFixup *Fixup) const = 0;
// allow Target Expressions to be checked for equality
virtual bool isEqualTo(const MCExpr *x) const { return false; }
+ virtual bool isSymbolUsedInExpression(const MCSymbol *Sym) const {
+ return false;
+ }
// This should be set when assigned expressions are not valid ".set"
// expressions, e.g. registers, and must be inlined.
virtual bool inlineAssignedExpr() const { return false; }
diff --git a/llvm/lib/MC/MCExpr.cpp b/llvm/lib/MC/MCExpr.cpp
index c9d5f6580fda4c..ede7655733f253 100644
--- a/llvm/lib/MC/MCExpr.cpp
+++ b/llvm/lib/MC/MCExpr.cpp
@@ -177,6 +177,35 @@ LLVM_DUMP_METHOD void MCExpr::dump() const {
}
#endif
+bool MCExpr::isSymbolUsedInExpression(const MCSymbol *Sym) const {
+ switch (getKind()) {
+ case MCExpr::Binary: {
+ const MCBinaryExpr *BE = static_cast<const MCBinaryExpr *>(this);
+ return BE->getLHS()->isSymbolUsedInExpression(Sym) ||
+ BE->getRHS()->isSymbolUsedInExpression(Sym);
+ }
+ case MCExpr::Target: {
+ const MCTargetExpr *TE = static_cast<const MCTargetExpr *>(this);
+ return TE->isSymbolUsedInExpression(Sym);
+ }
+ case MCExpr::Constant:
+ return false;
+ case MCExpr::SymbolRef: {
+ const MCSymbol &S = static_cast<const MCSymbolRefExpr *>(this)->getSymbol();
+ if (S.isVariable() && !S.isWeakExternal())
+ return S.getVariableValue()->isSymbolUsedInExpression(Sym);
+ return &S == Sym;
+ }
+ case MCExpr::Unary: {
+ const MCExpr *SubExpr =
+ static_cast<const MCUnaryExpr *>(this)->getSubExpr();
+ return SubExpr->isSymbolUsedInExpression(Sym);
+ }
+ }
+
+ llvm_unreachable("Unknown expr kind!");
+}
+
/* *** */
const MCBinaryExpr *MCBinaryExpr::create(Opcode Opc, const MCExpr *LHS,
diff --git a/llvm/lib/MC/MCParser/AsmParser.cpp b/llvm/lib/MC/MCParser/AsmParser.cpp
index de11b46c069bc9..3ce45f7d5d67e1 100644
--- a/llvm/lib/MC/MCParser/AsmParser.cpp
+++ b/llvm/lib/MC/MCParser/AsmParser.cpp
@@ -6417,33 +6417,6 @@ bool HLASMAsmParser::parseStatement(ParseStatementInfo &Info,
namespace llvm {
namespace MCParserUtils {
-/// Returns whether the given symbol is used anywhere in the given expression,
-/// or subexpressions.
-static bool isSymbolUsedInExpression(const MCSymbol *Sym, const MCExpr *Value) {
- switch (Value->getKind()) {
- case MCExpr::Binary: {
- const MCBinaryExpr *BE = static_cast<const MCBinaryExpr *>(Value);
- return isSymbolUsedInExpression(Sym, BE->getLHS()) ||
- isSymbolUsedInExpression(Sym, BE->getRHS());
- }
- case MCExpr::Target:
- case MCExpr::Constant:
- return false;
- case MCExpr::SymbolRef: {
- const MCSymbol &S =
- static_cast<const MCSymbolRefExpr *>(Value)->getSymbol();
- if (S.isVariable() && !S.isWeakExternal())
- return isSymbolUsedInExpression(Sym, S.getVariableValue());
- return &S == Sym;
- }
- case MCExpr::Unary:
- return isSymbolUsedInExpression(
- Sym, static_cast<const MCUnaryExpr *>(Value)->getSubExpr());
- }
-
- llvm_unreachable("Unknown expr kind!");
-}
-
bool parseAssignmentExpression(StringRef Name, bool allow_redef,
MCAsmParser &Parser, MCSymbol *&Sym,
const MCExpr *&Value) {
@@ -6468,7 +6441,7 @@ bool parseAssignmentExpression(StringRef Name, bool allow_redef,
//
// FIXME: Diagnostics. Note the location of the definition as a label.
// FIXME: Diagnose assignment to protected identifier (e.g., register name).
- if (isSymbolUsedInExpression(Sym, Value))
+ if (Value->isSymbolUsedInExpression(Sym))
return Parser.Error(EqualLoc, "Recursive use of '" + Name + "'");
else if (Sym->isUndefined(/*SetUsed*/ false) && !Sym->isUsed() &&
!Sym->isVariable())
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp
index 413f2b1cdb7530..9511b6bb7de062 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp
@@ -100,25 +100,50 @@ void MCResourceInfo::assignResourceInfoExpr(
const MCConstantExpr *LocalConstExpr =
MCConstantExpr::create(LocalValue, OutContext);
const MCExpr *SymVal = LocalConstExpr;
+ MCSymbol *Sym = getSymbol(FnSym->getName(), RIK, OutContext);
if (!Callees.empty()) {
SmallVector<const MCExpr *, 8> ArgExprs;
- // Avoid recursive symbol assignment.
SmallPtrSet<const Function *, 8> Seen;
ArgExprs.push_back(LocalConstExpr);
- const Function &F = MF.getFunction();
- Seen.insert(&F);
for (const Function *Callee : Callees) {
if (!Seen.insert(Callee).second)
continue;
+
MCSymbol *CalleeFnSym = TM.getSymbol(&Callee->getFunction());
MCSymbol *CalleeValSym =
getSymbol(CalleeFnSym->getName(), RIK, OutContext);
- ArgExprs.push_back(MCSymbolRefExpr::create(CalleeValSym, OutContext));
+
+ // Avoid constructing recursive definitions by detecting whether `Sym` is
+ // found transitively within any of its `CalleeValSym`.
+ if (!CalleeValSym->isVariable() ||
+ !CalleeValSym->getVariableValue(/*isUsed=*/false)
+ ->isSymbolUsedInExpression(Sym)) {
+ ArgExprs.push_back(MCSymbolRefExpr::create(CalleeValSym, OutContext));
+ } else {
+ // In case of recursion: make sure to use conservative register counts
+ // (i.e., specifically for VGPR/SGPR/AGPR).
+ switch (RIK) {
+ default:
+ break;
+ case RIK_NumVGPR:
+ ArgExprs.push_back(MCSymbolRefExpr::create(
+ getMaxVGPRSymbol(OutContext), OutContext));
+ break;
+ case RIK_NumSGPR:
+ ArgExprs.push_back(MCSymbolRefExpr::create(
+ getMaxSGPRSymbol(OutContext), OutContext));
+ break;
+ case RIK_NumAGPR:
+ ArgExprs.push_back(MCSymbolRefExpr::create(
+ getMaxAGPRSymbol(OutContext), OutContext));
+ break;
+ }
+ }
}
- SymVal = AMDGPUMCExpr::create(Kind, ArgExprs, OutContext);
+ if (ArgExprs.size() > 1)
+ SymVal = AMDGPUMCExpr::create(Kind, ArgExprs, OutContext);
}
- MCSymbol *Sym = getSymbol(FnSym->getName(), RIK, OutContext);
Sym->setVariableValue(SymVal);
}
@@ -162,6 +187,7 @@ void MCResourceInfo::gatherResourceInfo(
// The expression for private segment size should be: FRI.PrivateSegmentSize
// + max(FRI.Callees, FRI.CalleeSegmentSize)
SmallVector<const MCExpr *, 8> ArgExprs;
+ MCSymbol *Sym = getSymbol(FnSym->getName(), RIK_PrivateSegSize, OutContext);
if (FRI.CalleeSegmentSize)
ArgExprs.push_back(
MCConstantExpr::create(FRI.CalleeSegmentSize, OutContext));
@@ -173,9 +199,16 @@ void MCResourceInfo::gatherResourceInfo(
continue;
if (!Callee->isDeclaration()) {
MCSymbol *CalleeFnSym = TM.getSymbol(&Callee->getFunction());
- MCSymbol *calleeValSym =
+ MCSymbol *CalleeValSym =
getSymbol(CalleeFnSym->getName(), RIK_PrivateSegSize, OutContext);
- ArgExprs.push_back(MCSymbolRefExpr::create(calleeValSym, OutContext));
+
+ // Avoid constructing recursive definitions by detecting whether `Sym`
+ // is found transitively within any of its `CalleeValSym`.
+ if (!CalleeValSym->isVariable() ||
+ !CalleeValSym->getVariableValue(/*isUsed=*/false)
+ ->isSymbolUsedInExpression(Sym)) {
+ ArgExprs.push_back(MCSymbolRefExpr::create(CalleeValSym, OutContext));
+ }
}
}
const MCExpr *localConstExpr =
@@ -186,8 +219,7 @@ void MCResourceInfo::gatherResourceInfo(
localConstExpr =
MCBinaryExpr::createAdd(localConstExpr, transitiveExpr, OutContext);
}
- getSymbol(FnSym->getName(), RIK_PrivateSegSize, OutContext)
- ->setVariableValue(localConstExpr);
+ Sym->setVariableValue(localConstExpr);
}
auto SetToLocal = [&](int64_t LocalValue, ResourceInfoKind RIK) {
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.cpp
index 5fd1295177e14a..fd2d6109ac23bc 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.cpp
@@ -305,6 +305,14 @@ const AMDGPUMCExpr *AMDGPUMCExpr::createOccupancy(unsigned InitOcc,
Ctx);
}
+bool AMDGPUMCExpr::isSymbolUsedInExpression(const MCSymbol *Sym) const {
+ for (const MCExpr *E : getArgs()) {
+ if (E->isSymbolUsedInExpression(Sym))
+ return true;
+ }
+ return false;
+}
+
static KnownBits fromOptionalToKnownBits(std::optional<bool> CompareResult) {
static constexpr unsigned BitWidth = 64;
const APInt True(BitWidth, 1);
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.h
index a16843f404b8f6..75e676bb7d5081 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.h
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.h
@@ -97,6 +97,7 @@ class AMDGPUMCExpr : public MCTargetExpr {
void printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const override;
bool evaluateAsRelocatableImpl(MCValue &Res, const MCAssembler *Asm,
const MCFixup *Fixup) const override;
+ bool isSymbolUsedInExpression(const MCSymbol *Sym) const override;
void visitUsedExpr(MCStreamer &Streamer) const override;
MCFragment *findAssociatedFragment() const override;
void fixELFSymbolsInTLSFixups(MCAssembler &) const override{};
diff --git a/llvm/test/CodeGen/AMDGPU/function-resource-usage.ll b/llvm/test/CodeGen/AMDGPU/function-resource-usage.ll
index eda1e33cc4b9eb..278a2a0170ff33 100644
--- a/llvm/test/CodeGen/AMDGPU/function-resource-usage.ll
+++ b/llvm/test/CodeGen/AMDGPU/function-resource-usage.ll
@@ -481,6 +481,136 @@ define amdgpu_kernel void @usage_direct_recursion(i32 %n) #0 {
ret void
}
+; GCN-LABEL: {{^}}multi_stage_recurse2:
+; GCN: .set multi_stage_recurse2.num_vgpr, max(43, multi_stage_recurse1.num_vgpr)
+; GCN: .set multi_stage_recurse2.num_agpr, max(0, multi_stage_recurse1.num_agpr)
+; GCN: .set multi_stage_recurse2.numbered_sgpr, max(34, multi_stage_recurse1.numbered_sgpr)
+; GCN: .set multi_stage_recurse2.private_seg_size, 16+(max(multi_stage_recurse1.private_seg_size))
+; GCN: .set multi_stage_recurse2.uses_vcc, or(1, multi_stage_recurse1.uses_vcc)
+; GCN: .set multi_stage_recurse2.uses_flat_scratch, or(0, multi_stage_recurse1.uses_flat_scratch)
+; GCN: .set multi_stage_recurse2.has_dyn_sized_stack, or(0, multi_stage_recurse1.has_dyn_sized_stack)
+; GCN: .set multi_stage_recurse2.has_recursion, or(1, multi_stage_recurse1.has_recursion)
+; GCN: .set multi_stage_recurse2.has_indirect_call, or(0, multi_stage_recurse1.has_indirect_call)
+; GCN: TotalNumSgprs: multi_stage_recurse2.numbered_sgpr+(extrasgprs(multi_stage_recurse2.uses_vcc, multi_stage_recurse2.uses_flat_scratch, 1))
+; GCN: NumVgprs: max(43, multi_stage_recurse1.num_vgpr)
+; GCN: ScratchSize: 16+(max(multi_stage_recurse1.private_seg_size))
+; GCN-LABEL: {{^}}multi_stage_recurse1:
+; GCN: .set multi_stage_recurse1.num_vgpr, max(48, amdgpu.max_num_vgpr)
+; GCN: .set multi_stage_recurse1.num_agpr, max(0, amdgpu.max_num_agpr)
+; GCN: .set multi_stage_recurse1.numbered_sgpr, max(34, amdgpu.max_num_sgpr)
+; GCN: .set multi_stage_recurse1.private_seg_size, 16
+; GCN: .set multi_stage_recurse1.uses_vcc, 1
+; GCN: .set multi_stage_recurse1.uses_flat_scratch, 0
+; GCN: .set multi_stage_recurse1.has_dyn_sized_stack, 0
+; GCN: .set multi_stage_recurse1.has_recursion, 1
+; GCN: .set multi_stage_recurse1.has_indirect_call, 0
+; GCN: TotalNumSgprs: multi_stage_recurse1.numbered_sgpr+4
+; GCN: NumVgprs: max(48, amdgpu.max_num_vgpr)
+; GCN: ScratchSize: 16
+define void @multi_stage_recurse1(i32 %val) #2 {
+ call void @multi_stage_recurse2(i32 %val)
+ call void asm sideeffect "", "~{v47}"() #0
+ ret void
+}
+define void @multi_stage_recurse2(i32 %val) #2 {
+ call void @multi_stage_recurse1(i32 %val)
+ call void asm sideeffect "", "~{v42}"() #0
+ ret void
+}
+
+; GCN-LABEL: {{^}}usage_multi_stage_recurse:
+; GCN: .set usage_multi_stage_recurse.num_vgpr, max(32, multi_stage_recurse1.num_vgpr)
+; GCN: .set usage_multi_stage_recurse.num_agpr, max(0, multi_stage_recurse1.num_agpr)
+; GCN: .set usage_multi_stage_recurse.numbered_sgpr, max(33, multi_stage_recurse1.numbered_sgpr)
+; GCN: .set usage_multi_stage_recurse.private_seg_size, 0+(max(multi_stage_recurse1.private_seg_size))
+; GCN: .set usage_multi_stage_recurse.uses_vcc, or(1, multi_stage_recurse1.uses_vcc)
+; GCN: .set usage_multi_stage_recurse.uses_flat_scratch, or(1, multi_stage_recurse1.uses_flat_scratch)
+; GCN: .set usage_multi_stage_recurse.has_dyn_sized_stack, or(0, multi_stage_recurse1.has_dyn_sized_stack)
+; GCN: .set usage_multi_stage_recurse.has_recursion, or(1, multi_stage_recurse1.has_recursion)
+; GCN: .set usage_multi_stage_recurse.has_indirect_call, or(0, multi_stage_recurse1.has_indirect_call)
+; GCN: TotalNumSgprs: usage_multi_stage_recurse.numbered_sgpr+6
+; GCN: NumVgprs: usage_multi_stage_recurse.num_vgpr
+; GCN: ScratchSize: 16
+define amdgpu_kernel void @usage_multi_stage_recurse(i32 %n) #0 {
+ call void @multi_stage_recurse1(i32 %n)
+ ret void
+}
+
+; GCN-LABEL: {{^}}multi_stage_recurse_noattr2:
+; GCN: .set multi_stage_recurse_noattr2.num_vgpr, max(41, multi_stage_recurse_noattr1.num_vgpr)
+; GCN: .set multi_stage_recurse_noattr2.num_agpr, max(0, multi_stage_recurse_noattr1.num_agpr)
+; GCN: .set multi_stage_recurse_noattr2.numbered_sgpr, max(54, multi_stage_recurse_noattr1.numbered_sgpr)
+; GCN: .set multi_stage_recurse_noattr2.private_seg_size, 16+(max(multi_stage_recurse_noattr1.private_seg_size))
+; GCN: .set multi_stage_recurse_noattr2.uses_vcc, or(1, multi_stage_recurse_noattr1.uses_vcc)
+; GCN: .set multi_stage_recurse_noattr2.uses_flat_scratch, or(0, multi_stage_recurse_noattr1.uses_flat_scratch)
+; GCN: .set multi_stage_recurse_noattr2.has_dyn_sized_stack, or(0, multi_stage_recurse_noattr1.has_dyn_sized_stack)
+; GCN: .set multi_stage_recurse_noattr2.has_recursion, or(0, multi_stage_recurse_noattr1.has_recursion)
+; GCN: .set multi_stage_recurse_noattr2.has_indirect_call, or(0, multi_stage_recurse_noattr1.has_indirect_call)
+; GCN: TotalNumSgprs: multi_stage_recurse_noattr2.numbered_sgpr+(extrasgprs(multi_stage_recurse_noattr2.uses_vcc, multi_stage_recurse_noattr2.uses_flat_scratch, 1))
+; GCN: NumVgprs: max(41, multi_stage_recurse_noattr1.num_vgpr)
+; GCN: ScratchSize: 16+(max(multi_stage_recurse_noattr1.private_seg_size))
+; GCN-LABEL: {{^}}multi_stage_recurse_noattr1:
+; GCN: .set multi_stage_recurse_noattr1.num_vgpr, max(41, amdgpu.max_num_vgpr)
+; GCN: .set multi_stage_recurse_noattr1.num_agpr, max(0, amdgpu.max_num_agpr)
+; GCN: .set multi_stage_recurse_noattr1.numbered_sgpr, max(57, amdgpu.max_num_sgpr)
+; GCN: .set multi_stage_recurse_noattr1.private_seg_size, 16
+; GCN: .set multi_stage_recurse_noattr1.uses_vcc, 1
+; GCN: .set multi_stage_recurse_noattr1.uses_flat_scratch, 0
+; GCN: .set multi_stage_recurse_noattr1.has_dyn_sized_stack, 0
+; GCN: .set multi_stage_recurse_noattr1.has_recursion, 0
+; GCN: .set multi_stage_recurse_noattr1.has_indirect_call, 0
+; GCN: TotalNumSgprs: multi_stage_recurse_noattr1.numbered_sgpr+4
+; GCN: NumVgprs: max(41, amdgpu.max_num_vgpr)
+; GCN: ScratchSize: 16
+define void @multi_stage_recurse_noattr1(i32 %val) #0 {
+ call void @multi_stage_recurse_noattr2(i32 %val)
+ call void asm sideeffect "", "~{s56}"() #0
+ ret void
+}
+define void @multi_stage_recurse_noattr2(i32 %val) #0 {
+ call void @multi_stage_recurse_noattr1(i32 %val)
+ call void asm sideeffect "", "~{s53}"() #0
+ ret void
+}
+
+; GCN-LABEL: {{^}}usage_multi_stage_recurse_noattrs:
+; GCN: .set usage_multi_stage_recurse_noattrs.num_vgpr, max(32, multi_stage_recurse_noattr1.num_vgpr)
+; GCN: .set usage_multi_stage_recurse_noattrs.num_agpr, max(0, multi_stage_recurse_noattr1.num_agpr)
+; GCN: .set usage_multi_stage_recurse_noattrs.numbered_sgpr, max(33, multi_stage_recurse_noattr1.numbered_sgpr)
+; GCN: .set usage_multi_stage_recurse_noattrs.private_seg_size, 0+(max(multi_stage_recurse_noattr1.private_seg_size))
+; GCN: .set usage_multi_stage_recurse_noattrs.uses_vcc, or(1, multi_stage_recurse_noattr1.uses_vcc)
+; GCN: .set usage_multi_stage_recurse_noattrs.uses_flat_scratch, or(1, multi_stage_recurse_noattr1.uses_flat_scratch)
+; GCN: .set usage_multi_stage_recurse_noattrs.has_dyn_sized_stack, or(0, multi_stage_recurse_noattr1.has_dyn_sized_stack)
+; GCN: .set usage_multi_stage_recurse_noattrs.has_recursion, or(0, multi_stage_recurse_noattr1.has_recursion)
+; GCN: .set usage_multi_stage_recurse_noattrs.has_indirect_call, or(0, multi_stage_recurse_noattr1.has_indirect_call)
+; GCN: TotalNumSgprs: usage_multi_stage_recurse_noattrs.numbered_sgpr+6
+; GCN: NumVgprs: usage_multi_stage_recurse_noattrs.num_vgpr
+; GCN: ScratchSize: 16
+define amdgpu_kernel void @usage_multi_stage_recurse_noattrs(i32 %n) #0 {
+ call void @multi_stage_recurse_noattr1(i32 %n)
+ ret void
+}
+
+; GCN-LABEL: {{^}}multi_call_with_multi_stage_recurse:
+; GCN: .set multi_call_with_multi_stage_recurse.num_vgpr, max(41, use_stack0.num_vgpr, use_stack1.num_vgpr, multi_stage_recurse1.num_vgpr)
+; GCN: .set multi_call_with_multi_stage_recurse.num_agpr, max(0, use_stack0.num_agpr, use_stack1.num_agpr, multi_stage_recurse1.num_agpr)
+; GCN: .set multi_call_with_multi_stage_recurse.numbered_sgpr, max(43, use_stack0.numbered_sgpr, use_stack1.numbered_sgpr, multi_stage_recurse1.numbered_sgpr)
+; GCN: .set multi_call_with_multi_stage_recurse.private_seg_size, 0+(max(use_stack0.private_seg_size, use_stack1.private_seg_size, multi_stage_recurse1.private_seg_size))
+; GCN: .set multi_call_with_multi_stage_recurse.uses_vcc, or(1, use_stack0.uses_vcc, use_stack1.uses_vcc, multi_stage_recurse1.uses_vcc)
+; GCN: .set multi_call_with_multi_stage_recurse.uses_flat_scratch, or(1, use_stack0.uses_flat_scratch, use_stack1.uses_flat_scratch, multi_stage_recurse1.uses_flat_scratch)
+; GCN: .set multi_call_with_multi_stage_recurse.has_dyn_sized_stack, or(0, use_stack0.has_dyn_sized_stack, use_stack1.has_dyn_sized_stack, multi_stage_recurse1.has_dyn_sized_stack)
+; GCN: .set multi_call_with_multi_stage_recurse.has_recursion, or(1, use_stack0.has_recursion, use_stack1.has_recursion, multi_stage_recurse1.has_recursion)
+; GCN: .set multi_call_with_multi_stage_recurse.has_indirect_call, or(0, use_stack0.has_indirect_call, use_stack1.has_indirect_call, multi_stage_recurse1.has_indirect_call)
+; GCN: TotalNumSgprs: multi_call_with_multi_stage_recurse.numbered_sgpr+6
+; GCN: NumVgprs: multi_call_with_multi_stage_recurse.num_vgpr
+; GCN: ScratchSize: 2052
+define amdgpu_kernel void @multi_call_with_multi_stage_recurse(i32 %n) #0 {
+ call void @use_stack0()
+ call void @use_stack1()
+ call void @multi_stage_recurse1(i32 %n)
+ ret void
+}
+
; Make sure there's no assert when a sgpr96 is used.
; GCN-LABEL: {{^}}count_use_sgpr96_external_call
; GCN: .set count_use_sgpr96_external_call.num_vgpr, max(32, amdgpu.max_num_vgpr)
diff --git a/llvm/test/CodeGen/AMDGPU/multi-call-resource-usage-mcexpr.ll b/llvm/test/CodeGen/AMDGPU/multi-call-resource-usage-mcexpr.ll
new file mode 100644
index 00000000000000..e150231e3d9e1a
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/multi-call-resource-usage-mcexpr.ll
@@ -0,0 +1,86 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck %s
+
+; CHECK-LABEL: {{^}}qux
+; CHECK: .set qux.num_vgpr, 13
+; CHECK: .set qux.num_agpr, 0
+; CHECK: .set qux.numbered_sgpr, 32
+; CHECK: .set qux.private_seg_size, 0
+; CHECK: .set qux.uses_vcc, 0
+; CHECK: .set qux.uses_flat_scratch, 0
+; CHECK: .set qux.has_dyn_sized_stack, 0
+; CHECK: .set qux.has_recursion, 0
+; CHECK: .set qux.has_indirect_call, 0
+define void @qux() {
+entry:
+ call void asm sideeffect "", "~{v12}"()
+ ret void
+}
+
+; CHECK-LABEL: {{^}}baz
+; CHECK: .set baz.num_vgpr, max(49, qux.num_vgpr)
+; CHECK: .set baz.num_agpr, max(0, qux.num_agpr)
+; CHECK: .set baz.numbered_sgpr, max(34, qux.numbered_sgpr)
+; CHECK: .set baz.private_seg_size, 16+(max(qux.private_seg_size))
+; CHECK: .set baz.uses_vcc, or(0, qux.uses_vcc)
+; CHECK: .set baz.uses_flat_scratch, or(0, qux.uses_flat_scratch)
+; CHECK: .set baz.has_dyn_sized_stack, or(0, qux.has_dyn_sized_stack)
+; CHECK: .set baz.has_recursion, or(1, qux.has_recursion)
+; CHECK: .set baz.has_indirect_call, or(0, qux.has_indirect_call)
+define void @baz() {
+entry:
+ call void @qux()
+ call void asm sideeffect "", "~{v48}"()
+ ret void
+}
+
+; CHECK-LABEL: {{^}}bar
+; CHECK: .set bar.num_vgpr, max(65, baz.num_vgpr, qux.num_vgpr)
+; CHECK: .set bar.num_agpr, max(0, baz.num_agpr, qux.num_agpr)
+; CHECK: .set bar.numbered_sgpr, max(34, baz.numbered_sgpr, qux.numbered_sgpr)
+; CHECK: .set bar.private_seg_size, 16+(max(baz.private_seg_size, qux.private_seg_size))
+; CHECK: .set bar.uses_vcc, or(0, baz.uses_vcc, qux.uses_vcc)
+; CHECK: .set bar.uses_flat_scratch, or(0, baz.uses_flat_scratch, qux.uses_flat_scratch)
+; CHECK: .set bar.has_dyn_sized_stack, or(0, baz.has_dyn_sized_stack, qux.has_dyn_sized_stack)
+; CHECK: .set bar.has_recursion, or(1, baz.has_recursion, qux.has_recursion)
+; CHECK: .set bar.has_indirect_call, or(0, baz.has_indirect_call, qux.has_indirect_call)
+define void @bar() {
+entry:
+ call void @baz()
+ call void @qux()
+ call void @baz()
+ call void asm sideeffect "", "~{v64}"()
+ ret void
+}
+
+; CHECK-LABEL: {{^}}foo
+; CHECK: .set foo.num_vgpr, max(38, bar.num_vgpr)
+; CHECK: .set foo.num_agpr, max(0, bar.num_agpr)
+; CHECK: .set foo.numbered_sgpr, max(34, bar.numbered_sgpr)
+; CHECK: .set foo.private_seg_size, 16+(max(bar.private_seg_size))
+; CHECK: .set foo.uses_vcc, or(0, bar.uses_vcc)
+; CHECK: .set foo.uses_flat_scratch, or(0, bar.uses_flat_scratch)
+; CHECK: .set foo.has_dyn_sized_stack, or(0, bar.has_dyn_sized_stack)
+; CHECK: .set foo.has_recursion, or(1, bar.has_recursion)
+; CHECK: .set foo.has_indirect_call, or(0, bar.has_indirect_call)
+define void @foo() {
+entry:
+ call void @bar()
+ call void asm sideeffect "", "~{v37}"()
+ ret void
+}
+
+; CHECK-LABEL: {{^}}usefoo
+; CHECK: .set usefoo.num_vgpr, max(32, foo.num_vgpr)
+; CHECK: .set usefoo.num_agpr, max(0, foo.num_agpr)
+; CHECK: .set usefoo.numbered_sgpr, max(33, foo.numbered_sgpr)
+; CHECK: .set usefoo.private_seg_size, 0+(max(foo.private_seg_size))
+; CHECK: .set usefoo.uses_vcc, or(0, foo.uses_vcc)
+; CHECK: .set usefoo.uses_flat_scratch, or(1, foo.uses_flat_scratch)
+; CHECK: .set usefoo.has_dyn_sized_stack, or(0, foo.has_dyn_sized_stack)
+; CHECK: .set usefoo.has_recursion, or(1, foo.has_recursion)
+; CHECK: .set usefoo.has_indirect_call, or(0, foo.has_indirect_call)
+define amdgpu_kernel void @usefoo() {
+ call void @foo()
+ ret void
+}
+
diff --git a/llvm/test/CodeGen/AMDGPU/recursive-resource-usage-mcexpr.ll b/llvm/test/CodeGen/AMDGPU/recursive-resource-usage-mcexpr.ll
new file mode 100644
index 00000000000000..ac6bd9a4ae8a6e
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/recursive-resource-usage-mcexpr.ll
@@ -0,0 +1,93 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck %s
+
+; CHECK-LABEL: {{^}}qux
+; CHECK: .set qux.num_vgpr, max(71, foo.num_vgpr)
+; CHECK: .set qux.num_agpr, max(0, foo.num_agpr)
+; CHECK: .set qux.numbered_sgpr, max(46, foo.numbered_sgpr)
+; CHECK: .set qux.private_seg_size, 16
+; CHECK: .set qux.uses_vcc, or(1, foo.uses_vcc)
+; CHECK: .set qux.uses_flat_scratch, or(0, foo.uses_flat_scratch)
+; CHECK: .set qux.has_dyn_sized_stack, or(0, foo.has_dyn_sized_stack)
+; CHECK: .set qux.has_recursion, or(1, foo.has_recursion)
+; CHECK: .set qux.has_indirect_call, or(0, foo.has_indirect_call)
+
+; CHECK-LABEL: {{^}}baz
+; CHECK: .set baz.num_vgpr, max(61, qux.num_vgpr)
+; CHECK: .set baz.num_agpr, max(0, qux.num_agpr)
+; CHECK: .set baz.numbered_sgpr, max(51, qux.numbered_sgpr)
+; CHECK: .set baz.private_seg_size, 16+(max(qux.private_seg_size))
+; CHECK: .set baz.uses_vcc, or(1, qux.uses_vcc)
+; CHECK: .set baz.uses_flat_scratch, or(0, qux.uses_flat_scratch)
+; CHECK: .set baz.has_dyn_sized_stack, or(0, qux.has_dyn_sized_stack)
+; CHECK: .set baz.has_recursion, or(1, qux.has_recursion)
+; CHECK: .set baz.has_indirect_call, or(0, qux.has_indirect_call)
+
+; CHECK-LABEL: {{^}}bar
+; CHECK: .set bar.num_vgpr, max(51, baz.num_vgpr)
+; CHECK: .set bar.num_agpr, max(0, baz.num_agpr)
+; CHECK: .set bar.numbered_sgpr, max(61, baz.numbered_sgpr)
+; CHECK: .set bar.private_seg_size, 16+(max(baz.private_seg_size))
+; CHECK: .set bar.uses_vcc, or(1, baz.uses_vcc)
+; CHECK: .set bar.uses_flat_scratch, or(0, baz.uses_flat_scratch)
+; CHECK: .set bar.has_dyn_sized_stack, or(0, baz.has_dyn_sized_stack)
+; CHECK: .set bar.has_recursion, or(1, baz.has_recursion)
+; CHECK: .set bar.has_indirect_call, or(0, baz.has_indirect_call)
+
+; CHECK-LABEL: {{^}}foo
+; CHECK: .set foo.num_vgpr, max(46, amdgpu.max_num_vgpr)
+; CHECK: .set foo.num_agpr, max(0, amdgpu.max_num_agpr)
+; CHECK: .set foo.numbered_sgpr, max(71, amdgpu.max_num_sgpr)
+; CHECK: .set foo.private_seg_size, 16
+; CHECK: .set foo.uses_vcc, 1
+; CHECK: .set foo.uses_flat_scratch, 0
+; CHECK: .set foo.has_dyn_sized_stack, 0
+; CHECK: .set foo.has_recursion, 1
+; CHECK: .set foo.has_indirect_call, 0
+
+define void @foo() {
+entry:
+ call void @bar()
+ call void asm sideeffect "", "~{v45}"()
+ call void asm sideeffect "", "~{s70}"()
+ ret void
+}
+
+define void @bar() {
+entry:
+ call void @baz()
+ call void asm sideeffect "", "~{v50}"()
+ call void asm sideeffect "", "~{s60}"()
+ ret void
+}
+
+define void @baz() {
+entry:
+ call void @qux()
+ call void asm sideeffect "", "~{v60}"()
+ call void asm sideeffect "", "~{s50}"()
+ ret void
+}
+
+define void @qux() {
+entry:
+ call void @foo()
+ call void asm sideeffect "", "~{v70}"()
+ call void asm sideeffect "", "~{s45}"()
+ ret void
+}
+
+; CHECK-LABEL: {{^}}usefoo
+; CHECK: .set usefoo.num_vgpr, max(32, foo.num_vgpr)
+; CHECK: .set usefoo.num_agpr, max(0, foo.num_agpr)
+; CHECK: .set usefoo.numbered_sgpr, max(33, foo.numbered_sgpr)
+; CHECK: .set usefoo.private_seg_size, 0+(max(foo.private_seg_size))
+; CHECK: .set usefoo.uses_vcc, or(1, foo.uses_vcc)
+; CHECK: .set usefoo.uses_flat_scratch, or(1, foo.uses_flat_scratch)
+; CHECK: .set usefoo.has_dyn_sized_stack, or(0, foo.has_dyn_sized_stack)
+; CHECK: .set usefoo.has_recursion, or(1, foo.has_recursion)
+; CHECK: .set usefoo.has_indirect_call, or(0, foo.has_indirect_call)
+define amdgpu_kernel void @usefoo() {
+ call void @foo()
+ ret void
+}
+
More information about the llvm-commits
mailing list