[llvm-branch-commits] [llvm] [AMDGPU] Report only local per-function resource usage when object linking is enabled (PR #192594)
Shilei Tian via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri Apr 17 07:28:15 PDT 2026
https://github.com/shiltian updated https://github.com/llvm/llvm-project/pull/192594
>From 2ebfe9189176f50f92c818a1c48f436013adb2e7 Mon Sep 17 00:00:00 2001
From: Shilei Tian <i at tianshilei.me>
Date: Fri, 17 Apr 2026 01:12:28 -0400
Subject: [PATCH] [AMDGPU] Report only local per-function resource usage when
object linking is enabled
With object linking the linker aggregates resource usage across TUs via
`.amdgpu.info`, so compile-time pessimism and call-graph propagation duplicate
the linker's work or pollute its inputs.
In this mode, skip the per-callsite conservative bumps in
`AMDGPUResourceUsageAnalysis` and assign each resource symbol in
`AMDGPUMCResourceInfo` a concrete local constant instead of building call-graph
max/or expressions.
---
.../Target/AMDGPU/AMDGPUMCResourceInfo.cpp | 34 ++++--
llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.h | 4 +
.../AMDGPU/AMDGPUResourceUsageAnalysis.cpp | 11 +-
.../AMDGPU/lds-link-time-codegen-indirect.ll | 2 +-
.../AMDGPU/object-linking-local-resources.ll | 104 ++++++++++++++++++
5 files changed, 145 insertions(+), 10 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/object-linking-local-resources.ll
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp
index 83012c4cb72f2..e3a85dea934ae 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp
@@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#include "AMDGPUMCResourceInfo.h"
+#include "AMDGPUTargetMachine.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCAsmInfo.h"
@@ -267,6 +268,31 @@ void MCResourceInfo::gatherResourceInfo(
LLVM_DEBUG(dbgs() << "MCResUse: Gathering resource information for "
<< FnSym->getName() << '\n');
+
+ auto SetToLocal = [&](int64_t Value, ResourceInfoKind RIK) {
+ MCSymbol *Sym = getSymbol(FnSym->getName(), RIK, OutContext);
+ Sym->setVariableValue(MCConstantExpr::create(Value, OutContext));
+ };
+
+ // When link-time object linking is enabled, set all resource symbols to
+ // concrete local values.
+ if (AMDGPUTargetMachine::EnableObjectLinking) {
+ LLVM_DEBUG(dbgs() << "MCResUse: object linking enabled, no call-graph "
+ "propagation; emitting local resource values only\n");
+ SetToLocal(FRI.NumVGPR, RIK_NumVGPR);
+ SetToLocal(FRI.NumAGPR, RIK_NumAGPR);
+ SetToLocal(FRI.NumExplicitSGPR, RIK_NumSGPR);
+ SetToLocal(FRI.NumNamedBarrier, RIK_NumNamedBarrier);
+ SetToLocal(FRI.PrivateSegmentSize, RIK_PrivateSegSize);
+ SetToLocal(FRI.UsesVCC, ResourceInfoKind::RIK_UsesVCC);
+ SetToLocal(FRI.UsesFlatScratch, ResourceInfoKind::RIK_UsesFlatScratch);
+ SetToLocal(FRI.HasDynamicallySizedStack,
+ ResourceInfoKind::RIK_HasDynSizedStack);
+ SetToLocal(FRI.HasRecursion, ResourceInfoKind::RIK_HasRecursion);
+ SetToLocal(FRI.HasIndirectCall, ResourceInfoKind::RIK_HasIndirectCall);
+ return;
+ }
+
LLVM_DEBUG({
if (!FRI.Callees.empty()) {
dbgs() << "MCResUse: Callees:\n";
@@ -347,14 +373,6 @@ void MCResourceInfo::gatherResourceInfo(
Sym->setVariableValue(localConstExpr);
}
- auto SetToLocal = [&](int64_t LocalValue, ResourceInfoKind RIK) {
- MCSymbol *Sym = getSymbol(FnSym->getName(), RIK, OutContext);
- LLVM_DEBUG(
- dbgs() << "MCResUse: " << Sym->getName() << ": Adding " << LocalValue
- << ", no further propagation as indirect callee found within\n");
- Sym->setVariableValue(MCConstantExpr::create(LocalValue, OutContext));
- };
-
if (!FRI.HasIndirectCall) {
assignResourceInfoExpr(FRI.UsesVCC, ResourceInfoKind::RIK_UsesVCC,
AMDGPUMCExpr::AGVK_Or, MF, FRI.Callees, OutContext);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.h
index 5c1f59636446c..3cb063ef8e962 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.h
@@ -102,6 +102,10 @@ class MCResourceInfo {
/// transitive maximum or accumulative. For example, if A calls B and B's VGPR
/// usage exceeds A's, A should be assigned B's VGPR usage. Furthermore,
/// functions with indirect calls should be assigned the module level maximum.
+ ///
+ /// When link-time object linking is enabled, skip all call-transitive
+ /// propagation and emit concrete per-function values for every resource
+ /// symbol. Cross-TU aggregation is then the linker's responsibility.
void gatherResourceInfo(
const MachineFunction &MF,
const AMDGPUResourceUsageAnalysisWrapperPass::FunctionResourceInfo &FRI,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
index 4e664e084fb88..51bebefed5aa7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
@@ -17,6 +17,7 @@
#include "AMDGPUResourceUsageAnalysis.h"
#include "AMDGPU.h"
+#include "AMDGPUTargetMachine.h"
#include "GCNSubtarget.h"
#include "SIMachineFunctionInfo.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -272,6 +273,15 @@ AMDGPUResourceUsageAnalysisImpl::analyzeResourceUsage(
Info.Callees.push_back(Callee);
bool IsIndirect = !Callee || Callee->isDeclaration();
+ Info.HasIndirectCall |= IsIndirect;
+
+ // In object linking mode the linker has the full cross-TU view. It
+ // propagates resource usage across both direct calls to external
+ // declarations and true indirect calls. Skip the compile-time
+ // conservative assumptions so that the locally emitted metadata
+ // describes this function's own usage only.
+ if (AMDGPUTargetMachine::EnableObjectLinking)
+ continue;
// FIXME: Call site could have norecurse on it
if (!Callee || !Callee->doesNotRecurse()) {
@@ -301,7 +311,6 @@ AMDGPUResourceUsageAnalysisImpl::analyzeResourceUsage(
Info.UsesVCC = true;
Info.UsesFlatScratch = ST.hasFlatAddressSpace();
Info.HasDynamicallySizedStack = true;
- Info.HasIndirectCall = true;
}
}
}
diff --git a/llvm/test/CodeGen/AMDGPU/lds-link-time-codegen-indirect.ll b/llvm/test/CodeGen/AMDGPU/lds-link-time-codegen-indirect.ll
index 9e18b79e20814..fb23f4bd8e933 100644
--- a/llvm/test/CodeGen/AMDGPU/lds-link-time-codegen-indirect.ll
+++ b/llvm/test/CodeGen/AMDGPU/lds-link-time-codegen-indirect.ll
@@ -45,7 +45,7 @@ define amdgpu_kernel void @my_kernel() {
; ASM-DAG: .amdgpu_typeid "vi"
; ASM-DAG: .end_amdgpu_info
; ASM-DAG: .amdgpu_info caller
-; ASM-DAG: .amdgpu_flags 14
+; ASM-DAG: .amdgpu_flags 2
; ASM-DAG: .amdgpu_num_vgpr {{[0-9]+}}
; ASM-DAG: .amdgpu_indirect_call "vi"
; ASM-DAG: .end_amdgpu_info
diff --git a/llvm/test/CodeGen/AMDGPU/object-linking-local-resources.ll b/llvm/test/CodeGen/AMDGPU/object-linking-local-resources.ll
new file mode 100644
index 0000000000000..a25f74c46e954
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/object-linking-local-resources.ll
@@ -0,0 +1,104 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=asm < %s | FileCheck %s --check-prefix=DEFAULT
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-enable-object-linking -filetype=asm < %s | FileCheck %s --check-prefix=OL
+
+declare void @extern_callee()
+
+define void @calls_extern() {
+ call void @extern_callee()
+ ret void
+}
+
+define void @calls_indirect(ptr %fptr) {
+ call void %fptr()
+ ret void
+}
+
+define void @calls_local() {
+ ret void
+}
+
+define amdgpu_kernel void @my_kernel(ptr %fptr) {
+ call void @calls_extern()
+ call void @calls_indirect(ptr %fptr)
+ call void @calls_local()
+ ret void
+}
+
+; COM: Default mode: direct-to-extern triggers the conservative "unknown
+; COM: callee" path. Register/stack-size symbols include the module-level
+; COM: sinks; boolean flags are all forced to 1; HasIndirectCall is set too
+; COM: (IsIndirect covers calls to declarations).
+; DEFAULT: .set .Lcalls_extern.num_vgpr, max({{[0-9]+}}, amdgpu.max_num_vgpr)
+; DEFAULT: .set .Lcalls_extern.num_agpr, max({{[0-9]+}}, amdgpu.max_num_agpr)
+; DEFAULT: .set .Lcalls_extern.numbered_sgpr, max({{[0-9]+}}, amdgpu.max_num_sgpr)
+; DEFAULT: .set .Lcalls_extern.num_named_barrier, max({{[0-9]+}}, amdgpu.max_num_named_barrier)
+; DEFAULT: .set .Lcalls_extern.uses_vcc, 1
+; DEFAULT: .set .Lcalls_extern.uses_flat_scratch, 1
+; DEFAULT: .set .Lcalls_extern.has_dyn_sized_stack, 1
+; DEFAULT: .set .Lcalls_extern.has_recursion, 1
+; DEFAULT: .set .Lcalls_extern.has_indirect_call, 1
+
+; COM: Object linking: the same function reports only its own local usage.
+; COM: The sinks drop out of the register/stack-size expressions and the
+; COM: pessimized boolean flags collapse to the true local values (UsesVCC is
+; COM: still 1 here because the call-site lowering on gfx900 genuinely uses
+; COM: VCC).
+; OL: .set .Lcalls_extern.num_vgpr, {{[0-9]+}}
+; OL: .set .Lcalls_extern.num_agpr, {{[0-9]+}}
+; OL: .set .Lcalls_extern.numbered_sgpr, {{[0-9]+}}
+; OL: .set .Lcalls_extern.num_named_barrier, {{[0-9]+}}
+; OL: .set .Lcalls_extern.uses_vcc, 1
+; OL: .set .Lcalls_extern.uses_flat_scratch, 0
+; OL: .set .Lcalls_extern.has_dyn_sized_stack, 0
+; OL: .set .Lcalls_extern.has_recursion, 0
+; OL: .set .Lcalls_extern.has_indirect_call, 1
+
+; COM: True indirect call: identical contrast to the direct-to-extern case.
+; COM: In default mode all the flags are pessimized; with object linking only
+; COM: HasIndirectCall is preserved (the linker sees the call site's typeid
+; COM: and address-taken set and handles propagation).
+; DEFAULT: .set .Lcalls_indirect.uses_vcc, 1
+; DEFAULT: .set .Lcalls_indirect.uses_flat_scratch, 1
+; DEFAULT: .set .Lcalls_indirect.has_dyn_sized_stack, 1
+; DEFAULT: .set .Lcalls_indirect.has_recursion, 1
+; DEFAULT: .set .Lcalls_indirect.has_indirect_call, 1
+
+; OL: .set .Lcalls_indirect.uses_vcc, 1
+; OL: .set .Lcalls_indirect.uses_flat_scratch, 0
+; OL: .set .Lcalls_indirect.has_dyn_sized_stack, 0
+; OL: .set .Lcalls_indirect.has_recursion, 0
+; OL: .set .Lcalls_indirect.has_indirect_call, 1
+
+; COM: Baseline: a function that makes no calls outside itself reports the
+; COM: same all-zero local flags in both modes.
+; DEFAULT: .set .Lcalls_local.uses_vcc, 0
+; DEFAULT: .set .Lcalls_local.uses_flat_scratch, 0
+; DEFAULT: .set .Lcalls_local.has_dyn_sized_stack, 0
+; DEFAULT: .set .Lcalls_local.has_recursion, 0
+; DEFAULT: .set .Lcalls_local.has_indirect_call, 0
+
+; OL: .set .Lcalls_local.uses_vcc, 0
+; OL: .set .Lcalls_local.uses_flat_scratch, 0
+; OL: .set .Lcalls_local.has_dyn_sized_stack, 0
+; OL: .set .Lcalls_local.has_recursion, 0
+; OL: .set .Lcalls_local.has_indirect_call, 0
+
+; COM: Kernel side of the contrast. Default mode emits call-graph-propagation
+; COM: expressions (max()/or() over every callee's symbols) so the kernel
+; COM: picks up its callees' pessimized values; object linking emits concrete
+; COM: literals and leaves cross-TU aggregation to the linker.
+; DEFAULT: .set .Lmy_kernel.num_vgpr, max({{[0-9]+}}, .Lcalls_extern.num_vgpr, .Lcalls_indirect.num_vgpr, .Lcalls_local.num_vgpr)
+; DEFAULT: .set .Lmy_kernel.private_seg_size, {{[0-9]+}}+max(.Lcalls_extern.private_seg_size, .Lcalls_indirect.private_seg_size, .Lcalls_local.private_seg_size)
+; DEFAULT: .set .Lmy_kernel.uses_vcc, or({{[0-9]+}}, .Lcalls_extern.uses_vcc, .Lcalls_indirect.uses_vcc, .Lcalls_local.uses_vcc)
+; DEFAULT: .set .Lmy_kernel.uses_flat_scratch, or({{[0-9]+}}, .Lcalls_extern.uses_flat_scratch, .Lcalls_indirect.uses_flat_scratch, .Lcalls_local.uses_flat_scratch)
+; DEFAULT: .set .Lmy_kernel.has_dyn_sized_stack, or({{[0-9]+}}, .Lcalls_extern.has_dyn_sized_stack, .Lcalls_indirect.has_dyn_sized_stack, .Lcalls_local.has_dyn_sized_stack)
+; DEFAULT: .set .Lmy_kernel.has_recursion, or({{[0-9]+}}, .Lcalls_extern.has_recursion, .Lcalls_indirect.has_recursion, .Lcalls_local.has_recursion)
+; DEFAULT: .set .Lmy_kernel.has_indirect_call, or({{[0-9]+}}, .Lcalls_extern.has_indirect_call, .Lcalls_indirect.has_indirect_call, .Lcalls_local.has_indirect_call)
+
+; OL: .set .Lmy_kernel.num_vgpr, {{[0-9]+}}
+; OL: .set .Lmy_kernel.private_seg_size, {{[0-9]+}}
+; OL: .set .Lmy_kernel.uses_vcc, {{[01]}}
+; OL: .set .Lmy_kernel.uses_flat_scratch, {{[01]}}
+; OL: .set .Lmy_kernel.has_dyn_sized_stack, 0
+; OL: .set .Lmy_kernel.has_recursion, 0
+; OL: .set .Lmy_kernel.has_indirect_call, 0
More information about the llvm-branch-commits
mailing list