[llvm] [AMDGPU][NPM] Port AMDGPUArgumentUsageInfo to NPM (PR #170886)
Dark Steve via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 8 01:42:39 PST 2025
https://github.com/PrasoonMishra updated https://github.com/llvm/llvm-project/pull/170886
>From 78319d9de1aedfd59229f48fbd23e07eced638f3 Mon Sep 17 00:00:00 2001
From: Prasoon Mishra <Prasoon.Mishra at amd.com>
Date: Fri, 5 Dec 2025 12:35:45 +0000
Subject: [PATCH 1/3] [AMDGPU][NPM] Port AMDGPUArgumentUsageInfo to NPM
Port AMDGPUArgumentUsageInfo analysis to the NPM to fix suboptimal
code generation when NPM is enabled by default.
Previously, DAG.getPass() returns nullptr when using NPM, causing the
argument usage info to be unavailable during ISel. This resulted in
fallback to FixedABIFunctionInfo which assumes all implicit arguments
are needed, generating unnecessary register setup code for entry
functions.
Fixes LLVM::CodeGen/AMDGPU/cc-entry.ll
Changes:
- Split AMDGPUArgumentUsageInfo into a data class and NPM analysis wrapper
- Update SIISelLowering to use DAG.getMFAM() for NPM path
- Add RequireAnalysisPass in addPreISel() to ensure analysis availability
This follows the same pattern used for PhysicalRegisterUsageInfo.
---
llvm/lib/Target/AMDGPU/AMDGPU.h | 2 +-
.../Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp | 19 +++++-
.../Target/AMDGPU/AMDGPUArgumentUsageInfo.h | 62 ++++++++++++++++---
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 4 +-
llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def | 6 ++
.../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 6 +-
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 30 ++++++---
llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll | 6 +-
8 files changed, 108 insertions(+), 27 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 5af2a2755cec3..5df11a45b4889 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -536,7 +536,7 @@ void initializeAMDGPUAAWrapperPassPass(PassRegistry&);
ImmutablePass *createAMDGPUExternalAAWrapperPass();
void initializeAMDGPUExternalAAWrapperPass(PassRegistry&);
-void initializeAMDGPUArgumentUsageInfoPass(PassRegistry &);
+void initializeAMDGPUArgumentUsageInfoWrapperLegacyPass(PassRegistry &);
ModulePass *createAMDGPUExportKernelRuntimeHandlesLegacyPass();
void initializeAMDGPUExportKernelRuntimeHandlesLegacyPass(PassRegistry &);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp
index dda8033f47398..cd9763ef16334 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp
@@ -19,7 +19,7 @@ using namespace llvm;
#define DEBUG_TYPE "amdgpu-argument-reg-usage-info"
-INITIALIZE_PASS(AMDGPUArgumentUsageInfo, DEBUG_TYPE,
+INITIALIZE_PASS(AMDGPUArgumentUsageInfoWrapperLegacy, DEBUG_TYPE,
"Argument Register Usage Information Storage", false, true)
void ArgDescriptor::print(raw_ostream &OS,
@@ -42,7 +42,7 @@ void ArgDescriptor::print(raw_ostream &OS,
OS << '\n';
}
-char AMDGPUArgumentUsageInfo::ID = 0;
+char AMDGPUArgumentUsageInfoWrapperLegacy::ID = 0;
const AMDGPUFunctionArgInfo AMDGPUArgumentUsageInfo::ExternFunctionInfo{};
@@ -86,6 +86,13 @@ void AMDGPUArgumentUsageInfo::print(raw_ostream &OS, const Module *M) const {
}
}
+bool AMDGPUArgumentUsageInfo::invalidate(
+ Module &M, const PreservedAnalyses &PA,
+ ModuleAnalysisManager::Invalidator &) {
+ auto PAC = PA.getChecker<AMDGPUArgumentUsageAnalysis>();
+ return !PAC.preservedWhenStateless();
+}
+
std::tuple<const ArgDescriptor *, const TargetRegisterClass *, LLT>
AMDGPUFunctionArgInfo::getPreloadedValue(
AMDGPUFunctionArgInfo::PreloadedValue Value) const {
@@ -191,3 +198,11 @@ AMDGPUArgumentUsageInfo::lookupFuncArgInfo(const Function &F) const {
return FixedABIFunctionInfo;
return I->second;
}
+
+AnalysisKey AMDGPUArgumentUsageAnalysis::Key;
+AMDGPUArgumentUsageInfo
+AMDGPUArgumentUsageAnalysis::run(Module &M, ModuleAnalysisManager &) {
+ AMDGPUArgumentUsageInfo AUIP;
+ AUIP.doInitialization(M);
+ return AUIP;
+}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h
index cb7f63639aee3..52af5e8dc8362 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h
@@ -12,11 +12,15 @@
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/CodeGen/Register.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/Pass.h"
+#include "llvm/PassRegistry.h"
#include <variant>
namespace llvm {
+void initializeAMDGPUArgumentUsageInfoWrapperLegacyPass(PassRegistry &);
+
class Function;
class LLT;
class raw_ostream;
@@ -168,32 +172,70 @@ struct AMDGPUFunctionArgInfo {
static AMDGPUFunctionArgInfo fixedABILayout();
};
-class AMDGPUArgumentUsageInfo : public ImmutablePass {
+class AMDGPUArgumentUsageInfo {
private:
DenseMap<const Function *, AMDGPUFunctionArgInfo> ArgInfoMap;
public:
- static char ID;
-
static const AMDGPUFunctionArgInfo ExternFunctionInfo;
static const AMDGPUFunctionArgInfo FixedABIFunctionInfo;
- AMDGPUArgumentUsageInfo() : ImmutablePass(ID) { }
+ bool doInitialization(Module &M);
+ bool doFinalization(Module &M);
+
+ void print(raw_ostream &OS, const Module *M = nullptr) const;
+
+ void setFuncArgInfo(const Function &F, const AMDGPUFunctionArgInfo &ArgInfo) {
+ ArgInfoMap[&F] = ArgInfo;
+ }
+
+ const AMDGPUFunctionArgInfo &lookupFuncArgInfo(const Function &F) const;
+
+ bool invalidate(Module &M, const PreservedAnalyses &PA,
+ ModuleAnalysisManager::Invalidator &Inv);
+};
+
+class AMDGPUArgumentUsageInfoWrapperLegacy : public ImmutablePass {
+ std::unique_ptr<AMDGPUArgumentUsageInfo> AUIP;
+
+public:
+ static char ID;
+
+ AMDGPUArgumentUsageInfoWrapperLegacy() : ImmutablePass(ID) {
+ initializeAMDGPUArgumentUsageInfoWrapperLegacyPass(
+ *PassRegistry::getPassRegistry());
+ }
+
+ AMDGPUArgumentUsageInfo &getArgUsageInfo() { return *AUIP; }
+ const AMDGPUArgumentUsageInfo &getArgUsageInfo() const { return *AUIP; }
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesAll();
}
- bool doInitialization(Module &M) override;
- bool doFinalization(Module &M) override;
+ bool doInitialization(Module &M) override {
+ AUIP.reset(new AMDGPUArgumentUsageInfo());
+ return AUIP->doInitialization(M);
+ }
- void print(raw_ostream &OS, const Module *M = nullptr) const override;
+ bool doFinalization(Module &M) override {
+ return AUIP->doFinalization(M);
+ }
- void setFuncArgInfo(const Function &F, const AMDGPUFunctionArgInfo &ArgInfo) {
- ArgInfoMap[&F] = ArgInfo;
+ void print(raw_ostream &OS, const Module *M = nullptr) const override {
+ AUIP->print(OS, M);
}
+};
- const AMDGPUFunctionArgInfo &lookupFuncArgInfo(const Function &F) const;
+class AMDGPUArgumentUsageAnalysis
+ : public AnalysisInfoMixin<AMDGPUArgumentUsageAnalysis> {
+ friend AnalysisInfoMixin<AMDGPUArgumentUsageAnalysis>;
+ static AnalysisKey Key;
+
+public:
+ using Result = AMDGPUArgumentUsageInfo;
+
+ AMDGPUArgumentUsageInfo run(Module &M, ModuleAnalysisManager &);
};
} // end namespace llvm
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 8698e816ddbb9..d0835a03a2495 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -134,7 +134,7 @@ static SDValue stripExtractLoElt(SDValue In) {
INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISelLegacy, "amdgpu-isel",
"AMDGPU DAG->DAG Pattern Instruction Selection", false,
false)
-INITIALIZE_PASS_DEPENDENCY(AMDGPUArgumentUsageInfo)
+INITIALIZE_PASS_DEPENDENCY(AMDGPUArgumentUsageInfoWrapperLegacy)
INITIALIZE_PASS_DEPENDENCY(AMDGPUPerfHintAnalysisLegacy)
INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass)
#ifdef EXPENSIVE_CHECKS
@@ -238,7 +238,7 @@ bool AMDGPUDAGToDAGISelLegacy::runOnMachineFunction(MachineFunction &MF) {
}
void AMDGPUDAGToDAGISelLegacy::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<AMDGPUArgumentUsageInfo>();
+ AU.addRequired<AMDGPUArgumentUsageInfoWrapperLegacy>();
AU.addRequired<UniformityInfoWrapperPass>();
#ifdef EXPENSIVE_CHECKS
AU.addRequired<DominatorTreeWrapperPass>();
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
index 46d70c257b75e..f464fbf31c754 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
@@ -13,6 +13,12 @@
// NOTE: NO INCLUDE GUARD DESIRED!
+#ifndef MODULE_ANALYSIS
+#define MODULE_ANALYSIS(NAME, CREATE_PASS)
+#endif
+MODULE_ANALYSIS("amdgpu-argument-usage", AMDGPUArgumentUsageAnalysis())
+#undef MODULE_ANALYSIS
+
#ifndef MODULE_PASS
#define MODULE_PASS(NAME, CREATE_PASS)
#endif
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index e5a35abe6da6b..0894bb41d2aff 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -17,6 +17,7 @@
#include "AMDGPUTargetMachine.h"
#include "AMDGPU.h"
#include "AMDGPUAliasAnalysis.h"
+#include "AMDGPUArgumentUsageInfo.h"
#include "AMDGPUBarrierLatency.h"
#include "AMDGPUCtorDtorLowering.h"
#include "AMDGPUExportClustering.h"
@@ -575,7 +576,7 @@ extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
initializeAMDGPULowerExecSyncLegacyPass(*PR);
initializeAMDGPUSwLowerLDSLegacyPass(*PR);
initializeAMDGPUAnnotateUniformValuesLegacyPass(*PR);
- initializeAMDGPUArgumentUsageInfoPass(*PR);
+ initializeAMDGPUArgumentUsageInfoWrapperLegacyPass(*PR);
initializeAMDGPUAtomicOptimizerPass(*PR);
initializeAMDGPULowerKernelArgumentsPass(*PR);
initializeAMDGPUPromoteKernelArgumentsPass(*PR);
@@ -2207,6 +2208,9 @@ void AMDGPUCodeGenPassBuilder::addCodeGenPrepare(AddIRPass &addPass) const {
void AMDGPUCodeGenPassBuilder::addPreISel(AddIRPass &addPass) const {
+ // Require AMDGPUArgumentUsageAnalysis so that it's available during ISel.
+ addPass(RequireAnalysisPass<AMDGPUArgumentUsageAnalysis, Module>());
+
if (TM.getOptLevel() > CodeGenOptLevel::None) {
addPass(FlattenCFGPass());
addPass(SinkingPass());
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index ff67fd63ea75e..f206cee0222f9 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -35,6 +35,7 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachinePassManager.h"
#include "llvm/CodeGen/PseudoSourceValueManager.h"
#include "llvm/CodeGen/SDPatternMatch.h"
#include "llvm/IR/DiagnosticInfo.h"
@@ -3587,11 +3588,17 @@ SDValue SITargetLowering::LowerFormalArguments(
if (IsEntryFunc)
allocateSystemSGPRs(CCInfo, MF, *Info, CallConv, IsGraphics);
- // DAG.getPass() returns nullptr when using new pass manager.
- // TODO: Use DAG.getMFAM() to access analysis result.
if (DAG.getPass()) {
- auto &ArgUsageInfo = DAG.getPass()->getAnalysis<AMDGPUArgumentUsageInfo>();
- ArgUsageInfo.setFuncArgInfo(Fn, Info->getArgInfo());
+ auto &ArgUsageInfo =
+ DAG.getPass()->getAnalysis<AMDGPUArgumentUsageInfoWrapperLegacy>();
+ ArgUsageInfo.getArgUsageInfo().setFuncArgInfo(Fn, Info->getArgInfo());
+ } else if (auto *MFAM = DAG.getMFAM()) {
+ Module &M = *MF.getFunction().getParent();
+ auto *ArgUsageInfo =
+ MFAM->getResult<ModuleAnalysisManagerMachineFunctionProxy>(MF)
+ .getCachedResult<AMDGPUArgumentUsageAnalysis>(M);
+ if (ArgUsageInfo)
+ ArgUsageInfo->setFuncArgInfo(Fn, Info->getArgInfo());
}
unsigned StackArgSize = CCInfo.getStackSize();
@@ -3806,12 +3813,19 @@ void SITargetLowering::passSpecialInputs(
const AMDGPUFunctionArgInfo *CalleeArgInfo =
&AMDGPUArgumentUsageInfo::FixedABIFunctionInfo;
if (const Function *CalleeFunc = CLI.CB->getCalledFunction()) {
- // DAG.getPass() returns nullptr when using new pass manager.
- // TODO: Use DAG.getMFAM() to access analysis result.
if (DAG.getPass()) {
auto &ArgUsageInfo =
- DAG.getPass()->getAnalysis<AMDGPUArgumentUsageInfo>();
- CalleeArgInfo = &ArgUsageInfo.lookupFuncArgInfo(*CalleeFunc);
+ DAG.getPass()->getAnalysis<AMDGPUArgumentUsageInfoWrapperLegacy>();
+ CalleeArgInfo =
+ &ArgUsageInfo.getArgUsageInfo().lookupFuncArgInfo(*CalleeFunc);
+ } else if (auto *MFAM = DAG.getMFAM()) {
+ Module &M = *DAG.getMachineFunction().getFunction().getParent();
+ auto *ArgUsageInfo =
+ MFAM->getResult<ModuleAnalysisManagerMachineFunctionProxy>(
+ DAG.getMachineFunction())
+ .getCachedResult<AMDGPUArgumentUsageAnalysis>(M);
+ if (ArgUsageInfo)
+ CalleeArgInfo = &ArgUsageInfo->lookupFuncArgInfo(*CalleeFunc);
}
}
diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll
index 69dfbc2a2ae51..187e2ceb1209b 100644
--- a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll
+++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll
@@ -9,11 +9,11 @@
; RUN: | FileCheck -check-prefix=GCN-O3 %s
-; GCN-O0: require<MachineModuleAnalysis>,require<profile-summary>,require<collector-metadata>,require<runtime-libcall-info>,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp<O0>),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,function(amdgpu-uniform-intrinsic-combine),expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-lower-exec-sync,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(atomic-expand,verify,gc-lowering,lower-constant-intrinsics,unreachableblockelim,ee-instrument<post-inline>,scalarize-masked-mem-intrin,expand-reductions,amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,amdgpu-lower-intrinsics,cgscc(function(lower-switch,lower-invoke,unreachableblockelim,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa,require<uniformity>,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,localstackalloc))),require<reg-usage>,cgscc(function(machine-function(reg-usage-propagation,phi-node-elimination,two-address-instruction,regallocfast,si-fix-vgpr-copies,remove-redundant-debug-values,fixup-statepoint-caller-saved,prolog-epilog,post-ra-pseudos,si-post-ra-bundler,fentry-insert,xray-instrumentation,patchable-function,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,post-RA-hazard-rec,amdgpu-wait-sgpr-hazards,amdgpu-lower-vgpr-encoding,branch-relaxation,reg-usage-collector,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),free-machine-function))
+; GCN-O0: require<MachineModuleAnalysis>,require<profile-summary>,require<collector-metadata>,require<runtime-libcall-info>,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp<O0>),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,function(amdgpu-uniform-intrinsic-combine),expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-lower-exec-sync,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(atomic-expand,verify,gc-lowering,lower-constant-intrinsics,unreachableblockelim,ee-instrument<post-inline>,scalarize-masked-mem-intrin,expand-reductions,amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,amdgpu-lower-intrinsics,cgscc(function(lower-switch,lower-invoke,unreachableblockelim)),require<amdgpu-argument-usage>,cgscc(function(amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa,require<uniformity>,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,localstackalloc))),require<reg-usage>,cgscc(function(machine-function(reg-usage-propagation,phi-node-elimination,two-address-instruction,regallocfast,si-fix-vgpr-copies,remove-redundant-debug-values,fixup-statepoint-caller-saved,prolog-epilog,post-ra-pseudos,si-post-ra-bundler,fentry-insert,xray-instrumentation,patchable-function,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,post-RA-hazard-rec,amdgpu-wait-sgpr-hazards,amdgpu-lower-vgpr-encoding,branch-relaxation,reg-usage-collector,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),free-machine-function))
-; GCN-O2: require<MachineModuleAnalysis>,require<profile-summary>,require<collector-metadata>,require<runtime-libcall-info>,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp<O2>),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,function(amdgpu-image-intrinsic-opt,amdgpu-uniform-intrinsic-combine),expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-lower-exec-sync,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(amdgpu-atomic-optimizer,atomic-expand,amdgpu-promote-alloca,separate-const-offset-from-gep<>,slsr,early-cse<>,nary-reassociate,early-cse<>,amdgpu-codegenprepare,loop-mssa(licm<allowspeculation>),verify,loop-mssa(canon-freeze,loop-reduce),mergeicmps,expand-memcmp,gc-lowering,lower-constant-intrinsics,unreachableblockelim,consthoist,replace-with-veclib,partially-inline-libcalls,ee-instrument<post-inline>,scalarize-masked-mem-intrin,expand-reductions,early-cse<>),amdgpu-preload-kernel-arguments,function(amdgpu-lower-kernel-arguments,codegenprepare,load-store-vectorizer),amdgpu-lower-buffer-fat-pointers,amdgpu-lower-intrinsics,cgscc(function(lower-switch,lower-invoke,unreachableblockelim,flatten-cfg,sink,amdgpu-late-codegenprepare,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa)),amdgpu-perf-hint,cgscc(function(require<uniformity>,objc-arc-contract,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,early-tailduplication,opt-phis,stack-coloring,localstackalloc,dead-mi-elimination,early-machinelicm,machine-cse,machine-sink,peephole-opt,dead-mi-elimination,si-fold-operands,gcn-dpp-combine,si-load-store-opt,si-peephole-sdwa,early-machinelicm,machine-cse,si-fold-operands,dead-mi-elimination,si-shrink-instructions))),require<reg-usage>,cgscc(function(machine-function(reg-usage-propagation,amdgpu-prepare-agpr-alloc,detect-dead-lanes,dead-mi-elimination,init-undef,process-imp-defs,unreachable-mbb-elimination,require<live-vars>,si-opt-vgpr-liverange,require<machine-loops>,phi-node-elimination,si-lower-control-flow,two-address-instruction,register-coalescer,rename-independent-subregs,amdgpu-rewrite-partial-reg-uses,machine-scheduler,amdgpu-pre-ra-optimizations,si-wqm,si-optimize-exec-masking-pre-ra,si-form-memory-clauses,amdgpu-pre-ra-long-branch-reg,greedy<sgpr>,virt-reg-rewriter<no-clear-vregs>,stack-slot-coloring,si-lower-sgpr-spills,si-pre-allocate-wwm-regs,greedy<wwm>,si-lower-wwm-copies,virt-reg-rewriter<no-clear-vregs>,amdgpu-reserve-wwm-regs,greedy<vgpr>,amdgpu-nsa-reassign,virt-reg-rewriter,amdgpu-mark-last-scratch-load,machine-cp,machinelicm,si-fix-vgpr-copies,si-optimize-exec-masking,remove-redundant-debug-values,fixup-statepoint-caller-saved,postra-machine-sink,shrink-wrap,prolog-epilog,branch-folder,tailduplication,machine-latecleanup,machine-cp,post-ra-pseudos,si-shrink-instructions,si-post-ra-bundler,postmisched,block-placement,fentry-insert,xray-instrumentation,patchable-function,gcn-create-vopd,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,si-pre-emit-peephole,post-RA-hazard-rec,amdgpu-wait-sgpr-hazards,amdgpu-lower-vgpr-encoding,amdgpu-insert-delay-alu,branch-relaxation,reg-usage-collector,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),free-machine-function))
+; GCN-O2: require<MachineModuleAnalysis>,require<profile-summary>,require<collector-metadata>,require<runtime-libcall-info>,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp<O2>),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,function(amdgpu-image-intrinsic-opt,amdgpu-uniform-intrinsic-combine),expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-lower-exec-sync,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(amdgpu-atomic-optimizer,atomic-expand,amdgpu-promote-alloca,separate-const-offset-from-gep<>,slsr,early-cse<>,nary-reassociate,early-cse<>,amdgpu-codegenprepare,loop-mssa(licm<allowspeculation>),verify,loop-mssa(canon-freeze,loop-reduce),mergeicmps,expand-memcmp,gc-lowering,lower-constant-intrinsics,unreachableblockelim,consthoist,replace-with-veclib,partially-inline-libcalls,ee-instrument<post-inline>,scalarize-masked-mem-intrin,expand-reductions,early-cse<>),amdgpu-preload-kernel-arguments,function(amdgpu-lower-kernel-arguments,codegenprepare,load-store-vectorizer),amdgpu-lower-buffer-fat-pointers,amdgpu-lower-intrinsics,cgscc(function(lower-switch,lower-invoke,unreachableblockelim)),require<amdgpu-argument-usage>,cgscc(function(flatten-cfg,sink,amdgpu-late-codegenprepare,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa)),amdgpu-perf-hint,cgscc(function(require<uniformity>,objc-arc-contract,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,early-tailduplication,opt-phis,stack-coloring,localstackalloc,dead-mi-elimination,early-machinelicm,machine-cse,machine-sink,peephole-opt,dead-mi-elimination,si-fold-operands,gcn-dpp-combine,si-load-store-opt,si-peephole-sdwa,early-machinelicm,machine-cse,si-fold-operands,dead-mi-elimination,si-shrink-instructions))),require<reg-usage>,cgscc(function(machine-function(reg-usage-propagation,amdgpu-prepare-agpr-alloc,detect-dead-lanes,dead-mi-elimination,init-undef,process-imp-defs,unreachable-mbb-elimination,require<live-vars>,si-opt-vgpr-liverange,require<machine-loops>,phi-node-elimination,si-lower-control-flow,two-address-instruction,register-coalescer,rename-independent-subregs,amdgpu-rewrite-partial-reg-uses,machine-scheduler,amdgpu-pre-ra-optimizations,si-wqm,si-optimize-exec-masking-pre-ra,si-form-memory-clauses,amdgpu-pre-ra-long-branch-reg,greedy<sgpr>,virt-reg-rewriter<no-clear-vregs>,stack-slot-coloring,si-lower-sgpr-spills,si-pre-allocate-wwm-regs,greedy<wwm>,si-lower-wwm-copies,virt-reg-rewriter<no-clear-vregs>,amdgpu-reserve-wwm-regs,greedy<vgpr>,amdgpu-nsa-reassign,virt-reg-rewriter,amdgpu-mark-last-scratch-load,machine-cp,machinelicm,si-fix-vgpr-copies,si-optimize-exec-masking,remove-redundant-debug-values,fixup-statepoint-caller-saved,postra-machine-sink,shrink-wrap,prolog-epilog,branch-folder,tailduplication,machine-latecleanup,machine-cp,post-ra-pseudos,si-shrink-instructions,si-post-ra-bundler,postmisched,block-placement,fentry-insert,xray-instrumentation,patchable-function,gcn-create-vopd,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,si-pre-emit-peephole,post-RA-hazard-rec,amdgpu-wait-sgpr-hazards,amdgpu-lower-vgpr-encoding,amdgpu-insert-delay-alu,branch-relaxation,reg-usage-collector,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),free-machine-function))
-; GCN-O3: require<MachineModuleAnalysis>,require<profile-summary>,require<collector-metadata>,require<runtime-libcall-info>,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp<O3>),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,function(amdgpu-image-intrinsic-opt,amdgpu-uniform-intrinsic-combine),expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-lower-exec-sync,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(amdgpu-atomic-optimizer,atomic-expand,amdgpu-promote-alloca,separate-const-offset-from-gep<>,slsr,gvn<>,nary-reassociate,early-cse<>,amdgpu-codegenprepare,loop-mssa(licm<allowspeculation>),verify,loop-mssa(canon-freeze,loop-reduce),mergeicmps,expand-memcmp,gc-lowering,lower-constant-intrinsics,unreachableblockelim,consthoist,replace-with-veclib,partially-inline-libcalls,ee-instrument<post-inline>,scalarize-masked-mem-intrin,expand-reductions,gvn<>),amdgpu-preload-kernel-arguments,function(amdgpu-lower-kernel-arguments,codegenprepare,load-store-vectorizer),amdgpu-lower-buffer-fat-pointers,amdgpu-lower-intrinsics,cgscc(function(lower-switch,lower-invoke,unreachableblockelim,flatten-cfg,sink,amdgpu-late-codegenprepare,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa)),amdgpu-perf-hint,cgscc(function(require<uniformity>,objc-arc-contract,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,early-tailduplication,opt-phis,stack-coloring,localstackalloc,dead-mi-elimination,early-machinelicm,machine-cse,machine-sink,peephole-opt,dead-mi-elimination,si-fold-operands,gcn-dpp-combine,si-load-store-opt,si-peephole-sdwa,early-machinelicm,machine-cse,si-fold-operands,dead-mi-elimination,si-shrink-instructions))),require<reg-usage>,cgscc(function(machine-function(reg-usage-propagation,amdgpu-prepare-agpr-alloc,detect-dead-lanes,dead-mi-elimination,init-undef,process-imp-defs,unreachable-mbb-elimination,require<live-vars>,si-opt-vgpr-liverange,require<machine-loops>,phi-node-elimination,si-lower-control-flow,two-address-instruction,register-coalescer,rename-independent-subregs,amdgpu-rewrite-partial-reg-uses,machine-scheduler,amdgpu-pre-ra-optimizations,si-wqm,si-optimize-exec-masking-pre-ra,si-form-memory-clauses,amdgpu-pre-ra-long-branch-reg,greedy<sgpr>,virt-reg-rewriter<no-clear-vregs>,stack-slot-coloring,si-lower-sgpr-spills,si-pre-allocate-wwm-regs,greedy<wwm>,si-lower-wwm-copies,virt-reg-rewriter<no-clear-vregs>,amdgpu-reserve-wwm-regs,greedy<vgpr>,amdgpu-nsa-reassign,virt-reg-rewriter,amdgpu-mark-last-scratch-load,machine-cp,machinelicm,si-fix-vgpr-copies,si-optimize-exec-masking,remove-redundant-debug-values,fixup-statepoint-caller-saved,postra-machine-sink,shrink-wrap,prolog-epilog,branch-folder,tailduplication,machine-latecleanup,machine-cp,post-ra-pseudos,si-shrink-instructions,si-post-ra-bundler,postmisched,block-placement,fentry-insert,xray-instrumentation,patchable-function,gcn-create-vopd,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,si-pre-emit-peephole,post-RA-hazard-rec,amdgpu-wait-sgpr-hazards,amdgpu-lower-vgpr-encoding,amdgpu-insert-delay-alu,branch-relaxation,reg-usage-collector,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),free-machine-function))
+; GCN-O3: require<MachineModuleAnalysis>,require<profile-summary>,require<collector-metadata>,require<runtime-libcall-info>,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp<O3>),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,function(amdgpu-image-intrinsic-opt,amdgpu-uniform-intrinsic-combine),expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-lower-exec-sync,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(amdgpu-atomic-optimizer,atomic-expand,amdgpu-promote-alloca,separate-const-offset-from-gep<>,slsr,gvn<>,nary-reassociate,early-cse<>,amdgpu-codegenprepare,loop-mssa(licm<allowspeculation>),verify,loop-mssa(canon-freeze,loop-reduce),mergeicmps,expand-memcmp,gc-lowering,lower-constant-intrinsics,unreachableblockelim,consthoist,replace-with-veclib,partially-inline-libcalls,ee-instrument<post-inline>,scalarize-masked-mem-intrin,expand-reductions,gvn<>),amdgpu-preload-kernel-arguments,function(amdgpu-lower-kernel-arguments,codegenprepare,load-store-vectorizer),amdgpu-lower-buffer-fat-pointers,amdgpu-lower-intrinsics,cgscc(function(lower-switch,lower-invoke,unreachableblockelim)),require<amdgpu-argument-usage>,cgscc(function(flatten-cfg,sink,amdgpu-late-codegenprepare,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa)),amdgpu-perf-hint,cgscc(function(require<uniformity>,objc-arc-contract,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,early-tailduplication,opt-phis,stack-coloring,localstackalloc,dead-mi-elimination,early-machinelicm,machine-cse,machine-sink,peephole-opt,dead-mi-elimination,si-fold-operands,gcn-dpp-combine,si-load-store-opt,si-peephole-sdwa,early-machinelicm,machine-cse,si-fold-operands,dead-mi-elimination,si-shrink-instructions))),require<reg-usage>,cgscc(function(machine-function(reg-usage-propagation,amdgpu-prepare-agpr-alloc,detect-dead-lanes,dead-mi-elimination,init-undef,process-imp-defs,unreachable-mbb-elimination,require<live-vars>,si-opt-vgpr-liverange,require<machine-loops>,phi-node-elimination,si-lower-control-flow,two-address-instruction,register-coalescer,rename-independent-subregs,amdgpu-rewrite-partial-reg-uses,machine-scheduler,amdgpu-pre-ra-optimizations,si-wqm,si-optimize-exec-masking-pre-ra,si-form-memory-clauses,amdgpu-pre-ra-long-branch-reg,greedy<sgpr>,virt-reg-rewriter<no-clear-vregs>,stack-slot-coloring,si-lower-sgpr-spills,si-pre-allocate-wwm-regs,greedy<wwm>,si-lower-wwm-copies,virt-reg-rewriter<no-clear-vregs>,amdgpu-reserve-wwm-regs,greedy<vgpr>,amdgpu-nsa-reassign,virt-reg-rewriter,amdgpu-mark-last-scratch-load,machine-cp,machinelicm,si-fix-vgpr-copies,si-optimize-exec-masking,remove-redundant-debug-values,fixup-statepoint-caller-saved,postra-machine-sink,shrink-wrap,prolog-epilog,branch-folder,tailduplication,machine-latecleanup,machine-cp,post-ra-pseudos,si-shrink-instructions,si-post-ra-bundler,postmisched,block-placement,fentry-insert,xray-instrumentation,patchable-function,gcn-create-vopd,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,si-pre-emit-peephole,post-RA-hazard-rec,amdgpu-wait-sgpr-hazards,amdgpu-lower-vgpr-encoding,amdgpu-insert-delay-alu,branch-relaxation,reg-usage-collector,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),free-machine-function))
define void @empty() {
ret void
>From 356ac54d80b035c494c23f97c74cf7649f430e56 Mon Sep 17 00:00:00 2001
From: Prasoon Mishra <Prasoon.Mishra at amd.com>
Date: Fri, 5 Dec 2025 17:19:58 +0000
Subject: [PATCH 2/3] Apply clang-format.
---
llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp | 5 ++---
llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h | 4 +---
2 files changed, 3 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp
index cd9763ef16334..baa32cf7db927 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp
@@ -86,9 +86,8 @@ void AMDGPUArgumentUsageInfo::print(raw_ostream &OS, const Module *M) const {
}
}
-bool AMDGPUArgumentUsageInfo::invalidate(
- Module &M, const PreservedAnalyses &PA,
- ModuleAnalysisManager::Invalidator &) {
+bool AMDGPUArgumentUsageInfo::invalidate(Module &M, const PreservedAnalyses &PA,
+ ModuleAnalysisManager::Invalidator &) {
auto PAC = PA.getChecker<AMDGPUArgumentUsageAnalysis>();
return !PAC.preservedWhenStateless();
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h
index 52af5e8dc8362..ddd74210dae29 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h
@@ -218,9 +218,7 @@ class AMDGPUArgumentUsageInfoWrapperLegacy : public ImmutablePass {
return AUIP->doInitialization(M);
}
- bool doFinalization(Module &M) override {
- return AUIP->doFinalization(M);
- }
+ bool doFinalization(Module &M) override { return AUIP->doFinalization(M); }
void print(raw_ostream &OS, const Module *M = nullptr) const override {
AUIP->print(OS, M);
>From cddaedc435798428a24516af6f8ee17783b06713 Mon Sep 17 00:00:00 2001
From: Prasoon Mishra <Prasoon.Mishra at amd.com>
Date: Mon, 8 Dec 2025 08:59:47 +0000
Subject: [PATCH 3/3] Clean up data class interface
Fix: Removed legacy PM method names (doInitialization/doFinalization)
from AMDGPUArgumentUsageInfo data class.
Simplified NPM run() as no special initialization is needed.
Simplify NPM run() to return default-constructed object since no
special initialization is needed.
---
llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp | 14 ++------------
llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h | 14 ++++++++------
2 files changed, 10 insertions(+), 18 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp
index baa32cf7db927..346e257ea7291 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp
@@ -50,15 +50,6 @@ const AMDGPUFunctionArgInfo AMDGPUArgumentUsageInfo::ExternFunctionInfo{};
const AMDGPUFunctionArgInfo AMDGPUArgumentUsageInfo::FixedABIFunctionInfo
= AMDGPUFunctionArgInfo::fixedABILayout();
-bool AMDGPUArgumentUsageInfo::doInitialization(Module &M) {
- return false;
-}
-
-bool AMDGPUArgumentUsageInfo::doFinalization(Module &M) {
- ArgInfoMap.clear();
- return false;
-}
-
// TODO: Print preload kernargs?
void AMDGPUArgumentUsageInfo::print(raw_ostream &OS, const Module *M) const {
for (const auto &FI : ArgInfoMap) {
@@ -199,9 +190,8 @@ AMDGPUArgumentUsageInfo::lookupFuncArgInfo(const Function &F) const {
}
AnalysisKey AMDGPUArgumentUsageAnalysis::Key;
+
AMDGPUArgumentUsageInfo
AMDGPUArgumentUsageAnalysis::run(Module &M, ModuleAnalysisManager &) {
- AMDGPUArgumentUsageInfo AUIP;
- AUIP.doInitialization(M);
- return AUIP;
+ return AMDGPUArgumentUsageInfo();
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h
index ddd74210dae29..f41739ac6c169 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h
@@ -180,11 +180,10 @@ class AMDGPUArgumentUsageInfo {
static const AMDGPUFunctionArgInfo ExternFunctionInfo;
static const AMDGPUFunctionArgInfo FixedABIFunctionInfo;
- bool doInitialization(Module &M);
- bool doFinalization(Module &M);
-
void print(raw_ostream &OS, const Module *M = nullptr) const;
+ void clear() { ArgInfoMap.clear(); }
+
void setFuncArgInfo(const Function &F, const AMDGPUFunctionArgInfo &ArgInfo) {
ArgInfoMap[&F] = ArgInfo;
}
@@ -214,11 +213,14 @@ class AMDGPUArgumentUsageInfoWrapperLegacy : public ImmutablePass {
}
bool doInitialization(Module &M) override {
- AUIP.reset(new AMDGPUArgumentUsageInfo());
- return AUIP->doInitialization(M);
+ AUIP = std::make_unique<AMDGPUArgumentUsageInfo>();
+ return false;
}
- bool doFinalization(Module &M) override { return AUIP->doFinalization(M); }
+ bool doFinalization(Module &M) override {
+ AUIP->clear();
+ return false;
+ }
void print(raw_ostream &OS, const Module *M = nullptr) const override {
AUIP->print(OS, M);
More information about the llvm-commits
mailing list