[llvm] Revert "[AMDGPU] Skip register uses in AMDGPUResourceUsageAnalysis (#… (PR #144039)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 13 02:31:52 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
@llvm/pr-subscribers-llvm-globalisel
Author: Diana Picus (rovka)
<details>
<summary>Changes</summary>
…133242)"
This reverts commit 130080fab11cde5efcb338b77f5c3b31097df6e6 because it causes issues in testcases similar to coalescer_remat.ll [1], i.e. when we use a VGPR tuple but only write to its lower parts. The high VGPRs would then not be included in the vgpr_count, and accessing them would be an out of bounds violation.
[1] https://github.com/llvm/llvm-project/blob/main/llvm/test/CodeGen/AMDGPU/coalescer_remat.ll
---
Patch is 98.38 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/144039.diff
48 Files Affected:
- (modified) llvm/docs/AMDGPUUsage.rst (+6-5)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp (+10-1)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp (+266-17)
- (modified) llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h (-15)
- (modified) llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp (-14)
- (modified) llvm/lib/Target/AMDGPU/SIRegisterInfo.h (-5)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll (+19-19)
- (modified) llvm/test/CodeGen/AMDGPU/amdgpu-no-agprs-violations.ll (+3-4)
- (modified) llvm/test/CodeGen/AMDGPU/amdhsa-kernarg-preload-num-sgprs.ll (+14-14)
- (modified) llvm/test/CodeGen/AMDGPU/amdpal-callable.ll (+6-6)
- (modified) llvm/test/CodeGen/AMDGPU/amdpal-elf.ll (+4-12)
- (modified) llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size.ll (+4-4)
- (modified) llvm/test/CodeGen/AMDGPU/attr-amdgpu-waves-per-eu.ll (+12-12)
- (modified) llvm/test/CodeGen/AMDGPU/call-alias-register-usage-agpr.ll (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/call-alias-register-usage0.ll (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/call-alias-register-usage1.ll (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/call-alias-register-usage2.ll (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/call-alias-register-usage3.ll (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll (+5-5)
- (modified) llvm/test/CodeGen/AMDGPU/coalescer_remat.ll (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/code-object-v3.ll (+3-3)
- (modified) llvm/test/CodeGen/AMDGPU/elf-notes.ll (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/flat-scratch-reg.ll (+53-53)
- (modified) llvm/test/CodeGen/AMDGPU/function-resource-usage.ll (+12-12)
- (modified) llvm/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props.ll (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/hsa.ll (+1-1)
- (removed) llvm/test/CodeGen/AMDGPU/init-whole-wave-vgpr-count-large.ll (-72)
- (removed) llvm/test/CodeGen/AMDGPU/init-whole-wave-vgpr-count-leaf.ll (-46)
- (removed) llvm/test/CodeGen/AMDGPU/init-whole-wave-vgpr-count-use-inactive.ll (-74)
- (removed) llvm/test/CodeGen/AMDGPU/init-whole-wave-vgpr-count.ll (-71)
- (modified) llvm/test/CodeGen/AMDGPU/ipra.ll (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/mcexpr-knownbits-assign-crash-gh-issue-110930.ll (+12-12)
- (modified) llvm/test/CodeGen/AMDGPU/multi-call-resource-usage-mcexpr.ll (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/pal-metadata-3.0-callable.ll (+4-4)
- (modified) llvm/test/CodeGen/AMDGPU/ps-shader-arg-count.ll (+3-3)
- (modified) llvm/test/CodeGen/AMDGPU/register-count-comments.ll (+1-3)
- (modified) llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll (+2-2)
- (modified) llvm/test/CodeGen/AMDGPU/schedule-amdgpu-tracker-physreg.ll (+2-2)
- (modified) llvm/test/CodeGen/AMDGPU/schedule-amdgpu-trackers.ll (+2-2)
- (modified) llvm/test/CodeGen/AMDGPU/schedule-regpressure-limit2.ll (+3-3)
- (modified) llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll (+6-6)
- (modified) llvm/test/CodeGen/AMDGPU/tid-kd-xnack-any.ll (+2-2)
- (modified) llvm/test/CodeGen/AMDGPU/tid-kd-xnack-off.ll (+2-2)
- (modified) llvm/test/CodeGen/AMDGPU/tid-kd-xnack-on.ll (+2-2)
- (modified) llvm/test/CodeGen/AMDGPU/unnamed-function-resource-info.ll (+2-2)
- (modified) llvm/test/CodeGen/AMDGPU/vgpr-agpr-limit-gfx90a.ll (+2-2)
- (removed) llvm/test/CodeGen/AMDGPU/vgpr-count-compute.ll (-30)
- (removed) llvm/test/CodeGen/AMDGPU/vgpr-count-graphics.ll (-35)
``````````diff
diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index 3aa8773fa506b..e0a43225e81be 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -4263,9 +4263,10 @@ same *vendor-name*.
wavefront for
GFX6-GFX9. A register
is required if it is
- written to, or
+ used explicitly, or
if a higher numbered
- register is written to. This
+ register is used
+ explicitly. This
includes the special
SGPRs for VCC, Flat
Scratch (GFX7-GFX9)
@@ -4283,10 +4284,10 @@ same *vendor-name*.
each work-item for
GFX6-GFX9. A register
is required if it is
- written to, or
+ used explicitly, or
if a higher numbered
- register is
- written to.
+ register is used
+ explicitly.
".agpr_count" integer Required Number of accumulator
registers required by
each work-item for
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index d4fea30f21f45..491314daf2d81 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -989,7 +989,7 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
// dispatch registers are function args.
unsigned WaveDispatchNumSGPR = 0, WaveDispatchNumVGPR = 0;
- if (isShader(F.getCallingConv()) && isEntryFunctionCC(F.getCallingConv())) {
+ if (isShader(F.getCallingConv())) {
bool IsPixelShader =
F.getCallingConv() == CallingConv::AMDGPU_PS && !STM.isAmdHsaOS();
@@ -1060,6 +1060,15 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
ProgInfo.NumVGPR = AMDGPUMCExpr::createTotalNumVGPR(
ProgInfo.NumAccVGPR, ProgInfo.NumArchVGPR, Ctx);
+ } else if (isKernel(F.getCallingConv()) &&
+ MFI->getNumKernargPreloadedSGPRs()) {
+ // Consider cases where the total number of UserSGPRs with trailing
+ // allocated preload SGPRs, is greater than the number of explicitly
+ // referenced SGPRs.
+ const MCExpr *UserPlusExtraSGPRs = MCBinaryExpr::createAdd(
+ CreateExpr(MFI->getNumUserSGPRs()), ExtraSGPRs, Ctx);
+ ProgInfo.NumSGPR =
+ AMDGPUMCExpr::createMax({ProgInfo.NumSGPR, UserPlusExtraSGPRs}, Ctx);
}
// Adjust number of registers used to meet default/requested minimum/maximum
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
index 7bde59412d905..9a609a1752de0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
@@ -137,29 +137,274 @@ AMDGPUResourceUsageAnalysis::analyzeResourceUsage(
if (MFI->isStackRealigned())
Info.PrivateSegmentSize += FrameInfo.getMaxAlign().value();
- Info.UsesVCC = MRI.isPhysRegUsed(AMDGPU::VCC);
-
- Info.NumVGPR = TRI.getNumDefinedPhysRegs(MRI, AMDGPU::VGPR_32RegClass);
- Info.NumExplicitSGPR =
- TRI.getNumDefinedPhysRegs(MRI, AMDGPU::SGPR_32RegClass);
- if (ST.hasMAIInsts())
- Info.NumAGPR = TRI.getNumDefinedPhysRegs(MRI, AMDGPU::AGPR_32RegClass);
-
- // Preloaded registers are written by the hardware, not defined in the
- // function body, so they need special handling.
- if (MFI->isEntryFunction()) {
- Info.NumExplicitSGPR =
- std::max<int32_t>(Info.NumExplicitSGPR, MFI->getNumPreloadedSGPRs());
- Info.NumVGPR = std::max<int32_t>(Info.NumVGPR, MFI->getNumPreloadedVGPRs());
- }
-
- if (!FrameInfo.hasCalls() && !FrameInfo.hasTailCall())
+ Info.UsesVCC =
+ MRI.isPhysRegUsed(AMDGPU::VCC_LO) || MRI.isPhysRegUsed(AMDGPU::VCC_HI);
+
+ // If there are no calls, MachineRegisterInfo can tell us the used register
+ // count easily.
+ // A tail call isn't considered a call for MachineFrameInfo's purposes.
+ if (!FrameInfo.hasCalls() && !FrameInfo.hasTailCall()) {
+ Info.NumVGPR = TRI.getNumUsedPhysRegs(MRI, AMDGPU::VGPR_32RegClass);
+ Info.NumExplicitSGPR = TRI.getNumUsedPhysRegs(MRI, AMDGPU::SGPR_32RegClass);
+ if (ST.hasMAIInsts())
+ Info.NumAGPR = TRI.getNumUsedPhysRegs(MRI, AMDGPU::AGPR_32RegClass);
return Info;
+ }
+ int32_t MaxVGPR = -1;
+ int32_t MaxAGPR = -1;
+ int32_t MaxSGPR = -1;
Info.CalleeSegmentSize = 0;
for (const MachineBasicBlock &MBB : MF) {
for (const MachineInstr &MI : MBB) {
+ // TODO: Check regmasks? Do they occur anywhere except calls?
+ for (const MachineOperand &MO : MI.operands()) {
+ unsigned Width = 0;
+ bool IsSGPR = false;
+ bool IsAGPR = false;
+
+ if (!MO.isReg())
+ continue;
+
+ Register Reg = MO.getReg();
+ switch (Reg) {
+ case AMDGPU::EXEC:
+ case AMDGPU::EXEC_LO:
+ case AMDGPU::EXEC_HI:
+ case AMDGPU::SCC:
+ case AMDGPU::M0:
+ case AMDGPU::M0_LO16:
+ case AMDGPU::M0_HI16:
+ case AMDGPU::SRC_SHARED_BASE_LO:
+ case AMDGPU::SRC_SHARED_BASE:
+ case AMDGPU::SRC_SHARED_LIMIT_LO:
+ case AMDGPU::SRC_SHARED_LIMIT:
+ case AMDGPU::SRC_PRIVATE_BASE_LO:
+ case AMDGPU::SRC_PRIVATE_BASE:
+ case AMDGPU::SRC_PRIVATE_LIMIT_LO:
+ case AMDGPU::SRC_PRIVATE_LIMIT:
+ case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
+ case AMDGPU::SGPR_NULL:
+ case AMDGPU::SGPR_NULL64:
+ case AMDGPU::MODE:
+ continue;
+
+ case AMDGPU::NoRegister:
+ assert(MI.isDebugInstr() &&
+ "Instruction uses invalid noreg register");
+ continue;
+
+ case AMDGPU::VCC:
+ case AMDGPU::VCC_LO:
+ case AMDGPU::VCC_HI:
+ case AMDGPU::VCC_LO_LO16:
+ case AMDGPU::VCC_LO_HI16:
+ case AMDGPU::VCC_HI_LO16:
+ case AMDGPU::VCC_HI_HI16:
+ Info.UsesVCC = true;
+ continue;
+
+ case AMDGPU::FLAT_SCR:
+ case AMDGPU::FLAT_SCR_LO:
+ case AMDGPU::FLAT_SCR_HI:
+ continue;
+
+ case AMDGPU::XNACK_MASK:
+ case AMDGPU::XNACK_MASK_LO:
+ case AMDGPU::XNACK_MASK_HI:
+ llvm_unreachable("xnack_mask registers should not be used");
+
+ case AMDGPU::LDS_DIRECT:
+ llvm_unreachable("lds_direct register should not be used");
+
+ case AMDGPU::TBA:
+ case AMDGPU::TBA_LO:
+ case AMDGPU::TBA_HI:
+ case AMDGPU::TMA:
+ case AMDGPU::TMA_LO:
+ case AMDGPU::TMA_HI:
+ llvm_unreachable("trap handler registers should not be used");
+
+ case AMDGPU::SRC_VCCZ:
+ llvm_unreachable("src_vccz register should not be used");
+
+ case AMDGPU::SRC_EXECZ:
+ llvm_unreachable("src_execz register should not be used");
+
+ case AMDGPU::SRC_SCC:
+ llvm_unreachable("src_scc register should not be used");
+
+ default:
+ break;
+ }
+
+ if (AMDGPU::SGPR_32RegClass.contains(Reg) ||
+ AMDGPU::SGPR_LO16RegClass.contains(Reg) ||
+ AMDGPU::SGPR_HI16RegClass.contains(Reg)) {
+ IsSGPR = true;
+ Width = 1;
+ } else if (AMDGPU::VGPR_32RegClass.contains(Reg) ||
+ AMDGPU::VGPR_16RegClass.contains(Reg)) {
+ IsSGPR = false;
+ Width = 1;
+ } else if (AMDGPU::AGPR_32RegClass.contains(Reg) ||
+ AMDGPU::AGPR_LO16RegClass.contains(Reg)) {
+ IsSGPR = false;
+ IsAGPR = true;
+ Width = 1;
+ } else if (AMDGPU::SGPR_64RegClass.contains(Reg)) {
+ IsSGPR = true;
+ Width = 2;
+ } else if (AMDGPU::VReg_64RegClass.contains(Reg)) {
+ IsSGPR = false;
+ Width = 2;
+ } else if (AMDGPU::AReg_64RegClass.contains(Reg)) {
+ IsSGPR = false;
+ IsAGPR = true;
+ Width = 2;
+ } else if (AMDGPU::VReg_96RegClass.contains(Reg)) {
+ IsSGPR = false;
+ Width = 3;
+ } else if (AMDGPU::SReg_96RegClass.contains(Reg)) {
+ IsSGPR = true;
+ Width = 3;
+ } else if (AMDGPU::AReg_96RegClass.contains(Reg)) {
+ IsSGPR = false;
+ IsAGPR = true;
+ Width = 3;
+ } else if (AMDGPU::SGPR_128RegClass.contains(Reg)) {
+ IsSGPR = true;
+ Width = 4;
+ } else if (AMDGPU::VReg_128RegClass.contains(Reg)) {
+ IsSGPR = false;
+ Width = 4;
+ } else if (AMDGPU::AReg_128RegClass.contains(Reg)) {
+ IsSGPR = false;
+ IsAGPR = true;
+ Width = 4;
+ } else if (AMDGPU::VReg_160RegClass.contains(Reg)) {
+ IsSGPR = false;
+ Width = 5;
+ } else if (AMDGPU::SReg_160RegClass.contains(Reg)) {
+ IsSGPR = true;
+ Width = 5;
+ } else if (AMDGPU::AReg_160RegClass.contains(Reg)) {
+ IsSGPR = false;
+ IsAGPR = true;
+ Width = 5;
+ } else if (AMDGPU::VReg_192RegClass.contains(Reg)) {
+ IsSGPR = false;
+ Width = 6;
+ } else if (AMDGPU::SReg_192RegClass.contains(Reg)) {
+ IsSGPR = true;
+ Width = 6;
+ } else if (AMDGPU::AReg_192RegClass.contains(Reg)) {
+ IsSGPR = false;
+ IsAGPR = true;
+ Width = 6;
+ } else if (AMDGPU::VReg_224RegClass.contains(Reg)) {
+ IsSGPR = false;
+ Width = 7;
+ } else if (AMDGPU::SReg_224RegClass.contains(Reg)) {
+ IsSGPR = true;
+ Width = 7;
+ } else if (AMDGPU::AReg_224RegClass.contains(Reg)) {
+ IsSGPR = false;
+ IsAGPR = true;
+ Width = 7;
+ } else if (AMDGPU::SReg_256RegClass.contains(Reg)) {
+ IsSGPR = true;
+ Width = 8;
+ } else if (AMDGPU::VReg_256RegClass.contains(Reg)) {
+ IsSGPR = false;
+ Width = 8;
+ } else if (AMDGPU::AReg_256RegClass.contains(Reg)) {
+ IsSGPR = false;
+ IsAGPR = true;
+ Width = 8;
+ } else if (AMDGPU::VReg_288RegClass.contains(Reg)) {
+ IsSGPR = false;
+ Width = 9;
+ } else if (AMDGPU::SReg_288RegClass.contains(Reg)) {
+ IsSGPR = true;
+ Width = 9;
+ } else if (AMDGPU::AReg_288RegClass.contains(Reg)) {
+ IsSGPR = false;
+ IsAGPR = true;
+ Width = 9;
+ } else if (AMDGPU::VReg_320RegClass.contains(Reg)) {
+ IsSGPR = false;
+ Width = 10;
+ } else if (AMDGPU::SReg_320RegClass.contains(Reg)) {
+ IsSGPR = true;
+ Width = 10;
+ } else if (AMDGPU::AReg_320RegClass.contains(Reg)) {
+ IsSGPR = false;
+ IsAGPR = true;
+ Width = 10;
+ } else if (AMDGPU::VReg_352RegClass.contains(Reg)) {
+ IsSGPR = false;
+ Width = 11;
+ } else if (AMDGPU::SReg_352RegClass.contains(Reg)) {
+ IsSGPR = true;
+ Width = 11;
+ } else if (AMDGPU::AReg_352RegClass.contains(Reg)) {
+ IsSGPR = false;
+ IsAGPR = true;
+ Width = 11;
+ } else if (AMDGPU::VReg_384RegClass.contains(Reg)) {
+ IsSGPR = false;
+ Width = 12;
+ } else if (AMDGPU::SReg_384RegClass.contains(Reg)) {
+ IsSGPR = true;
+ Width = 12;
+ } else if (AMDGPU::AReg_384RegClass.contains(Reg)) {
+ IsSGPR = false;
+ IsAGPR = true;
+ Width = 12;
+ } else if (AMDGPU::SReg_512RegClass.contains(Reg)) {
+ IsSGPR = true;
+ Width = 16;
+ } else if (AMDGPU::VReg_512RegClass.contains(Reg)) {
+ IsSGPR = false;
+ Width = 16;
+ } else if (AMDGPU::AReg_512RegClass.contains(Reg)) {
+ IsSGPR = false;
+ IsAGPR = true;
+ Width = 16;
+ } else if (AMDGPU::SReg_1024RegClass.contains(Reg)) {
+ IsSGPR = true;
+ Width = 32;
+ } else if (AMDGPU::VReg_1024RegClass.contains(Reg)) {
+ IsSGPR = false;
+ Width = 32;
+ } else if (AMDGPU::AReg_1024RegClass.contains(Reg)) {
+ IsSGPR = false;
+ IsAGPR = true;
+ Width = 32;
+ } else {
+ // We only expect TTMP registers or registers that do not belong to
+ // any RC.
+ assert((AMDGPU::TTMP_32RegClass.contains(Reg) ||
+ AMDGPU::TTMP_64RegClass.contains(Reg) ||
+ AMDGPU::TTMP_128RegClass.contains(Reg) ||
+ AMDGPU::TTMP_256RegClass.contains(Reg) ||
+ AMDGPU::TTMP_512RegClass.contains(Reg) ||
+ !TRI.getPhysRegBaseClass(Reg)) &&
+ "Unknown register class");
+ }
+ unsigned HWReg = TRI.getHWRegIndex(Reg);
+ int MaxUsed = HWReg + Width - 1;
+ if (IsSGPR) {
+ MaxSGPR = MaxUsed > MaxSGPR ? MaxUsed : MaxSGPR;
+ } else if (IsAGPR) {
+ MaxAGPR = MaxUsed > MaxAGPR ? MaxUsed : MaxAGPR;
+ } else {
+ MaxVGPR = MaxUsed > MaxVGPR ? MaxUsed : MaxVGPR;
+ }
+ }
+
if (MI.isCall()) {
// Pseudo used just to encode the underlying global. Is there a better
// way to track this?
@@ -219,5 +464,9 @@ AMDGPUResourceUsageAnalysis::analyzeResourceUsage(
}
}
+ Info.NumExplicitSGPR = MaxSGPR + 1;
+ Info.NumVGPR = MaxVGPR + 1;
+ Info.NumAGPR = MaxAGPR + 1;
+
return Info;
}
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index 01718faaf5c2e..0e7635a045588 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -970,25 +970,10 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction,
return NumUserSGPRs;
}
- // Get the number of preloaded SGPRs for compute kernels.
unsigned getNumPreloadedSGPRs() const {
return NumUserSGPRs + NumSystemSGPRs;
}
- // Get the number of preloaded VGPRs for compute kernels.
- unsigned getNumPreloadedVGPRs() const {
- if (hasWorkItemIDZ())
- return ArgInfo.WorkItemIDZ.getRegister() - AMDGPU::VGPR0 + 1;
-
- if (hasWorkItemIDY())
- return ArgInfo.WorkItemIDY.getRegister() - AMDGPU::VGPR0 + 1;
-
- if (hasWorkItemIDX())
- return ArgInfo.WorkItemIDX.getRegister() - AMDGPU::VGPR0 + 1;
-
- return 0;
- }
-
unsigned getNumKernargPreloadedSGPRs() const {
return UserSGPRInfo.getNumKernargPreloadSGPRs();
}
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index b76823a128e07..e41189adfb46f 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -4055,20 +4055,6 @@ SIRegisterInfo::getNumUsedPhysRegs(const MachineRegisterInfo &MRI,
return 0;
}
-unsigned
-SIRegisterInfo::getNumDefinedPhysRegs(const MachineRegisterInfo &MRI,
- const TargetRegisterClass &RC) const {
- for (MCPhysReg Reg : reverse(RC.getRegisters())) {
- for (MCRegAliasIterator AI(Reg, this, true); AI.isValid(); ++AI) {
- if (llvm::any_of(MRI.def_instructions(*AI), [](const MachineInstr &MI) {
- return !MI.isImplicitDef();
- }))
- return getHWRegIndex(Reg) + 1;
- }
- }
- return 0;
-}
-
SmallVector<StringLiteral>
SIRegisterInfo::getVRegFlagsOfReg(Register Reg,
const MachineFunction &MF) const {
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
index 7726762ad0e6d..a4b135d5e0b59 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -486,11 +486,6 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo {
unsigned getNumUsedPhysRegs(const MachineRegisterInfo &MRI,
const TargetRegisterClass &RC) const;
- // \returns the number of registers of a given \p RC defined in a function.
- // Does not go inside function calls.
- unsigned getNumDefinedPhysRegs(const MachineRegisterInfo &MRI,
- const TargetRegisterClass &RC) const;
-
std::optional<uint8_t> getVRegFlagValue(StringRef Name) const override {
return Name == "WWM_REG" ? AMDGPU::VirtRegFlag::WWM_REG
: std::optional<uint8_t>{};
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll
index bdd86c1af6248..9b35920f8547a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll
@@ -3059,7 +3059,7 @@ define amdgpu_kernel void @dyn_extract_v5f64_s_s(ptr addrspace(1) %out, i32 %sel
; GPRIDX-NEXT: gds_segment_byte_size = 0
; GPRIDX-NEXT: kernarg_segment_byte_size = 28
; GPRIDX-NEXT: workgroup_fbarrier_count = 0
-; GPRIDX-NEXT: wavefront_sgpr_count = 24
+; GPRIDX-NEXT: wavefront_sgpr_count = 17
; GPRIDX-NEXT: workitem_vgpr_count = 3
; GPRIDX-NEXT: reserved_vgpr_first = 0
; GPRIDX-NEXT: reserved_vgpr_count = 0
@@ -3202,7 +3202,7 @@ define amdgpu_kernel void @dyn_extract_v5f64_s_s(ptr addrspace(1) %out, i32 %sel
; GFX10-NEXT: kernel_code_entry_byte_offset = 256
; GFX10-NEXT: kernel_code_prefetch_byte_size = 0
; GFX10-NEXT: granulated_workitem_vgpr_count = 0
-; GFX10-NEXT: granulated_wavefront_sgpr_count = 2
+; GFX10-NEXT: granulated_wavefront_sgpr_count = 1
; GFX10-NEXT: priority = 0
; GFX10-NEXT: float_mode = 240
; GFX10-NEXT: priv = 0
@@ -3245,7 +3245,7 @@ define amdgpu_kernel void @dyn_extract_v5f64_s_s(ptr addrspace(1) %out, i32 %sel
; GFX10-NEXT: gds_segment_byte_size = 0
; GFX10-NEXT: kernarg_segment_byte_size = 28
; GFX10-NEXT: workgroup_fbarrier_count = 0
-; GFX10-NEXT: wavefront_sgpr_count = 18
+; GFX10-NEXT: wavefront_sgpr_count = 10
; GFX10-NEXT: workitem_vgpr_count = 3
; GFX10-NEXT: reserved_vgpr_first = 0
; GFX10-NEXT: reserved_vgpr_count = 0
@@ -3294,7 +3294,7 @@ define amdgpu_kernel void @dyn_extract_v5f64_s_s(ptr addrspace(1) %out, i32 %sel
; GFX11-NEXT: kernel_code_entry_byte_offset = 256
; GFX11-NEXT: kernel_code_prefetch_byte_size = 0
; GFX11-NEXT: granulated_workitem_vgpr_count = 0
-; GFX11-NEXT: granulated_wavefront_sgpr_count = 1
+; GFX11-NEXT: granulated_wavefront_sgpr_count = 0
; GFX11-NEXT: priority = 0
; GFX11-NEXT: float_mode = 240
; GFX11-NEXT: priv = 0
@@ -3337,7 +3337,7 @@ define amdgpu_kernel void @dyn_extract_v5f64_s_s(ptr addrspace(1) %out, i32 %sel
; GFX11-NEXT: gds_segment_byte_size = 0
; GFX11-NEXT: kernarg_segment_byte_size = 28
; GFX11-NEXT: workgroup_fbarrier_count = 0
-; GFX11-NEXT: wavefront_sgpr_count = 16
+; GFX11-NEXT: wavefront_sgpr_count = 7
; GFX11-NEXT: workitem_vgpr_count = 3
; GFX11-NEXT: reserved_vgpr_first = 0
; GFX11-NEXT: reserved_vgpr_count = 0
@@ -4034,7 +4034,7 @@ define amdgpu_kernel void @dyn_e...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/144039
More information about the llvm-commits
mailing list