[llvm] Revert "[AMDGPU] Skip register uses in AMDGPUResourceUsageAnalysis (#… (PR #144039)

via llvm-commits llvm-commits at lists.llvm.org
Fri Jun 13 02:31:52 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu

@llvm/pr-subscribers-llvm-globalisel

Author: Diana Picus (rovka)

<details>
<summary>Changes</summary>

…133242)"

This reverts commit 130080fab11cde5efcb338b77f5c3b31097df6e6 because it causes issues in testcases similar to coalescer_remat.ll [1], i.e. when we use a VGPR tuple but only write to its lower parts. The high VGPRs would then not be included in the vgpr_count, and accessing them would be an out of bounds violation.

[1] https://github.com/llvm/llvm-project/blob/main/llvm/test/CodeGen/AMDGPU/coalescer_remat.ll

---

Patch is 98.38 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/144039.diff


48 Files Affected:

- (modified) llvm/docs/AMDGPUUsage.rst (+6-5) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp (+10-1) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp (+266-17) 
- (modified) llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h (-15) 
- (modified) llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp (-14) 
- (modified) llvm/lib/Target/AMDGPU/SIRegisterInfo.h (-5) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll (+19-19) 
- (modified) llvm/test/CodeGen/AMDGPU/amdgpu-no-agprs-violations.ll (+3-4) 
- (modified) llvm/test/CodeGen/AMDGPU/amdhsa-kernarg-preload-num-sgprs.ll (+14-14) 
- (modified) llvm/test/CodeGen/AMDGPU/amdpal-callable.ll (+6-6) 
- (modified) llvm/test/CodeGen/AMDGPU/amdpal-elf.ll (+4-12) 
- (modified) llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size.ll (+4-4) 
- (modified) llvm/test/CodeGen/AMDGPU/attr-amdgpu-waves-per-eu.ll (+12-12) 
- (modified) llvm/test/CodeGen/AMDGPU/call-alias-register-usage-agpr.ll (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/call-alias-register-usage0.ll (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/call-alias-register-usage1.ll (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/call-alias-register-usage2.ll (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/call-alias-register-usage3.ll (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll (+5-5) 
- (modified) llvm/test/CodeGen/AMDGPU/coalescer_remat.ll (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/code-object-v3.ll (+3-3) 
- (modified) llvm/test/CodeGen/AMDGPU/elf-notes.ll (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/flat-scratch-reg.ll (+53-53) 
- (modified) llvm/test/CodeGen/AMDGPU/function-resource-usage.ll (+12-12) 
- (modified) llvm/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props.ll (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/hsa.ll (+1-1) 
- (removed) llvm/test/CodeGen/AMDGPU/init-whole-wave-vgpr-count-large.ll (-72) 
- (removed) llvm/test/CodeGen/AMDGPU/init-whole-wave-vgpr-count-leaf.ll (-46) 
- (removed) llvm/test/CodeGen/AMDGPU/init-whole-wave-vgpr-count-use-inactive.ll (-74) 
- (removed) llvm/test/CodeGen/AMDGPU/init-whole-wave-vgpr-count.ll (-71) 
- (modified) llvm/test/CodeGen/AMDGPU/ipra.ll (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/mcexpr-knownbits-assign-crash-gh-issue-110930.ll (+12-12) 
- (modified) llvm/test/CodeGen/AMDGPU/multi-call-resource-usage-mcexpr.ll (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/pal-metadata-3.0-callable.ll (+4-4) 
- (modified) llvm/test/CodeGen/AMDGPU/ps-shader-arg-count.ll (+3-3) 
- (modified) llvm/test/CodeGen/AMDGPU/register-count-comments.ll (+1-3) 
- (modified) llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll (+2-2) 
- (modified) llvm/test/CodeGen/AMDGPU/schedule-amdgpu-tracker-physreg.ll (+2-2) 
- (modified) llvm/test/CodeGen/AMDGPU/schedule-amdgpu-trackers.ll (+2-2) 
- (modified) llvm/test/CodeGen/AMDGPU/schedule-regpressure-limit2.ll (+3-3) 
- (modified) llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll (+6-6) 
- (modified) llvm/test/CodeGen/AMDGPU/tid-kd-xnack-any.ll (+2-2) 
- (modified) llvm/test/CodeGen/AMDGPU/tid-kd-xnack-off.ll (+2-2) 
- (modified) llvm/test/CodeGen/AMDGPU/tid-kd-xnack-on.ll (+2-2) 
- (modified) llvm/test/CodeGen/AMDGPU/unnamed-function-resource-info.ll (+2-2) 
- (modified) llvm/test/CodeGen/AMDGPU/vgpr-agpr-limit-gfx90a.ll (+2-2) 
- (removed) llvm/test/CodeGen/AMDGPU/vgpr-count-compute.ll (-30) 
- (removed) llvm/test/CodeGen/AMDGPU/vgpr-count-graphics.ll (-35) 


``````````diff
diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index 3aa8773fa506b..e0a43225e81be 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -4263,9 +4263,10 @@ same *vendor-name*.
                                                                   wavefront for
                                                                   GFX6-GFX9. A register
                                                                   is required if it is
-                                                                  written to, or
+                                                                  used explicitly, or
                                                                   if a higher numbered
-                                                                  register is written to. This
+                                                                  register is used
+                                                                  explicitly. This
                                                                   includes the special
                                                                   SGPRs for VCC, Flat
                                                                   Scratch (GFX7-GFX9)
@@ -4283,10 +4284,10 @@ same *vendor-name*.
                                                                   each work-item for
                                                                   GFX6-GFX9. A register
                                                                   is required if it is
-                                                                  written to, or
+                                                                  used explicitly, or
                                                                   if a higher numbered
-                                                                  register is
-                                                                  written to.
+                                                                  register is used
+                                                                  explicitly.
      ".agpr_count"                       integer        Required  Number of accumulator
                                                                   registers required by
                                                                   each work-item for
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index d4fea30f21f45..491314daf2d81 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -989,7 +989,7 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
   // dispatch registers are function args.
   unsigned WaveDispatchNumSGPR = 0, WaveDispatchNumVGPR = 0;
 
-  if (isShader(F.getCallingConv()) && isEntryFunctionCC(F.getCallingConv())) {
+  if (isShader(F.getCallingConv())) {
     bool IsPixelShader =
         F.getCallingConv() == CallingConv::AMDGPU_PS && !STM.isAmdHsaOS();
 
@@ -1060,6 +1060,15 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
 
     ProgInfo.NumVGPR = AMDGPUMCExpr::createTotalNumVGPR(
         ProgInfo.NumAccVGPR, ProgInfo.NumArchVGPR, Ctx);
+  } else if (isKernel(F.getCallingConv()) &&
+             MFI->getNumKernargPreloadedSGPRs()) {
+    // Consider cases where the total number of UserSGPRs with trailing
+    // allocated preload SGPRs, is greater than the number of explicitly
+    // referenced SGPRs.
+    const MCExpr *UserPlusExtraSGPRs = MCBinaryExpr::createAdd(
+        CreateExpr(MFI->getNumUserSGPRs()), ExtraSGPRs, Ctx);
+    ProgInfo.NumSGPR =
+        AMDGPUMCExpr::createMax({ProgInfo.NumSGPR, UserPlusExtraSGPRs}, Ctx);
   }
 
   // Adjust number of registers used to meet default/requested minimum/maximum
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
index 7bde59412d905..9a609a1752de0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
@@ -137,29 +137,274 @@ AMDGPUResourceUsageAnalysis::analyzeResourceUsage(
   if (MFI->isStackRealigned())
     Info.PrivateSegmentSize += FrameInfo.getMaxAlign().value();
 
-  Info.UsesVCC = MRI.isPhysRegUsed(AMDGPU::VCC);
-
-  Info.NumVGPR = TRI.getNumDefinedPhysRegs(MRI, AMDGPU::VGPR_32RegClass);
-  Info.NumExplicitSGPR =
-      TRI.getNumDefinedPhysRegs(MRI, AMDGPU::SGPR_32RegClass);
-  if (ST.hasMAIInsts())
-    Info.NumAGPR = TRI.getNumDefinedPhysRegs(MRI, AMDGPU::AGPR_32RegClass);
-
-  // Preloaded registers are written by the hardware, not defined in the
-  // function body, so they need special handling.
-  if (MFI->isEntryFunction()) {
-    Info.NumExplicitSGPR =
-        std::max<int32_t>(Info.NumExplicitSGPR, MFI->getNumPreloadedSGPRs());
-    Info.NumVGPR = std::max<int32_t>(Info.NumVGPR, MFI->getNumPreloadedVGPRs());
-  }
-
-  if (!FrameInfo.hasCalls() && !FrameInfo.hasTailCall())
+  Info.UsesVCC =
+      MRI.isPhysRegUsed(AMDGPU::VCC_LO) || MRI.isPhysRegUsed(AMDGPU::VCC_HI);
+
+  // If there are no calls, MachineRegisterInfo can tell us the used register
+  // count easily.
+  // A tail call isn't considered a call for MachineFrameInfo's purposes.
+  if (!FrameInfo.hasCalls() && !FrameInfo.hasTailCall()) {
+    Info.NumVGPR = TRI.getNumUsedPhysRegs(MRI, AMDGPU::VGPR_32RegClass);
+    Info.NumExplicitSGPR = TRI.getNumUsedPhysRegs(MRI, AMDGPU::SGPR_32RegClass);
+    if (ST.hasMAIInsts())
+      Info.NumAGPR = TRI.getNumUsedPhysRegs(MRI, AMDGPU::AGPR_32RegClass);
     return Info;
+  }
 
+  int32_t MaxVGPR = -1;
+  int32_t MaxAGPR = -1;
+  int32_t MaxSGPR = -1;
   Info.CalleeSegmentSize = 0;
 
   for (const MachineBasicBlock &MBB : MF) {
     for (const MachineInstr &MI : MBB) {
+      // TODO: Check regmasks? Do they occur anywhere except calls?
+      for (const MachineOperand &MO : MI.operands()) {
+        unsigned Width = 0;
+        bool IsSGPR = false;
+        bool IsAGPR = false;
+
+        if (!MO.isReg())
+          continue;
+
+        Register Reg = MO.getReg();
+        switch (Reg) {
+        case AMDGPU::EXEC:
+        case AMDGPU::EXEC_LO:
+        case AMDGPU::EXEC_HI:
+        case AMDGPU::SCC:
+        case AMDGPU::M0:
+        case AMDGPU::M0_LO16:
+        case AMDGPU::M0_HI16:
+        case AMDGPU::SRC_SHARED_BASE_LO:
+        case AMDGPU::SRC_SHARED_BASE:
+        case AMDGPU::SRC_SHARED_LIMIT_LO:
+        case AMDGPU::SRC_SHARED_LIMIT:
+        case AMDGPU::SRC_PRIVATE_BASE_LO:
+        case AMDGPU::SRC_PRIVATE_BASE:
+        case AMDGPU::SRC_PRIVATE_LIMIT_LO:
+        case AMDGPU::SRC_PRIVATE_LIMIT:
+        case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
+        case AMDGPU::SGPR_NULL:
+        case AMDGPU::SGPR_NULL64:
+        case AMDGPU::MODE:
+          continue;
+
+        case AMDGPU::NoRegister:
+          assert(MI.isDebugInstr() &&
+                 "Instruction uses invalid noreg register");
+          continue;
+
+        case AMDGPU::VCC:
+        case AMDGPU::VCC_LO:
+        case AMDGPU::VCC_HI:
+        case AMDGPU::VCC_LO_LO16:
+        case AMDGPU::VCC_LO_HI16:
+        case AMDGPU::VCC_HI_LO16:
+        case AMDGPU::VCC_HI_HI16:
+          Info.UsesVCC = true;
+          continue;
+
+        case AMDGPU::FLAT_SCR:
+        case AMDGPU::FLAT_SCR_LO:
+        case AMDGPU::FLAT_SCR_HI:
+          continue;
+
+        case AMDGPU::XNACK_MASK:
+        case AMDGPU::XNACK_MASK_LO:
+        case AMDGPU::XNACK_MASK_HI:
+          llvm_unreachable("xnack_mask registers should not be used");
+
+        case AMDGPU::LDS_DIRECT:
+          llvm_unreachable("lds_direct register should not be used");
+
+        case AMDGPU::TBA:
+        case AMDGPU::TBA_LO:
+        case AMDGPU::TBA_HI:
+        case AMDGPU::TMA:
+        case AMDGPU::TMA_LO:
+        case AMDGPU::TMA_HI:
+          llvm_unreachable("trap handler registers should not be used");
+
+        case AMDGPU::SRC_VCCZ:
+          llvm_unreachable("src_vccz register should not be used");
+
+        case AMDGPU::SRC_EXECZ:
+          llvm_unreachable("src_execz register should not be used");
+
+        case AMDGPU::SRC_SCC:
+          llvm_unreachable("src_scc register should not be used");
+
+        default:
+          break;
+        }
+
+        if (AMDGPU::SGPR_32RegClass.contains(Reg) ||
+            AMDGPU::SGPR_LO16RegClass.contains(Reg) ||
+            AMDGPU::SGPR_HI16RegClass.contains(Reg)) {
+          IsSGPR = true;
+          Width = 1;
+        } else if (AMDGPU::VGPR_32RegClass.contains(Reg) ||
+                   AMDGPU::VGPR_16RegClass.contains(Reg)) {
+          IsSGPR = false;
+          Width = 1;
+        } else if (AMDGPU::AGPR_32RegClass.contains(Reg) ||
+                   AMDGPU::AGPR_LO16RegClass.contains(Reg)) {
+          IsSGPR = false;
+          IsAGPR = true;
+          Width = 1;
+        } else if (AMDGPU::SGPR_64RegClass.contains(Reg)) {
+          IsSGPR = true;
+          Width = 2;
+        } else if (AMDGPU::VReg_64RegClass.contains(Reg)) {
+          IsSGPR = false;
+          Width = 2;
+        } else if (AMDGPU::AReg_64RegClass.contains(Reg)) {
+          IsSGPR = false;
+          IsAGPR = true;
+          Width = 2;
+        } else if (AMDGPU::VReg_96RegClass.contains(Reg)) {
+          IsSGPR = false;
+          Width = 3;
+        } else if (AMDGPU::SReg_96RegClass.contains(Reg)) {
+          IsSGPR = true;
+          Width = 3;
+        } else if (AMDGPU::AReg_96RegClass.contains(Reg)) {
+          IsSGPR = false;
+          IsAGPR = true;
+          Width = 3;
+        } else if (AMDGPU::SGPR_128RegClass.contains(Reg)) {
+          IsSGPR = true;
+          Width = 4;
+        } else if (AMDGPU::VReg_128RegClass.contains(Reg)) {
+          IsSGPR = false;
+          Width = 4;
+        } else if (AMDGPU::AReg_128RegClass.contains(Reg)) {
+          IsSGPR = false;
+          IsAGPR = true;
+          Width = 4;
+        } else if (AMDGPU::VReg_160RegClass.contains(Reg)) {
+          IsSGPR = false;
+          Width = 5;
+        } else if (AMDGPU::SReg_160RegClass.contains(Reg)) {
+          IsSGPR = true;
+          Width = 5;
+        } else if (AMDGPU::AReg_160RegClass.contains(Reg)) {
+          IsSGPR = false;
+          IsAGPR = true;
+          Width = 5;
+        } else if (AMDGPU::VReg_192RegClass.contains(Reg)) {
+          IsSGPR = false;
+          Width = 6;
+        } else if (AMDGPU::SReg_192RegClass.contains(Reg)) {
+          IsSGPR = true;
+          Width = 6;
+        } else if (AMDGPU::AReg_192RegClass.contains(Reg)) {
+          IsSGPR = false;
+          IsAGPR = true;
+          Width = 6;
+        } else if (AMDGPU::VReg_224RegClass.contains(Reg)) {
+          IsSGPR = false;
+          Width = 7;
+        } else if (AMDGPU::SReg_224RegClass.contains(Reg)) {
+          IsSGPR = true;
+          Width = 7;
+        } else if (AMDGPU::AReg_224RegClass.contains(Reg)) {
+          IsSGPR = false;
+          IsAGPR = true;
+          Width = 7;
+        } else if (AMDGPU::SReg_256RegClass.contains(Reg)) {
+          IsSGPR = true;
+          Width = 8;
+        } else if (AMDGPU::VReg_256RegClass.contains(Reg)) {
+          IsSGPR = false;
+          Width = 8;
+        } else if (AMDGPU::AReg_256RegClass.contains(Reg)) {
+          IsSGPR = false;
+          IsAGPR = true;
+          Width = 8;
+        } else if (AMDGPU::VReg_288RegClass.contains(Reg)) {
+          IsSGPR = false;
+          Width = 9;
+        } else if (AMDGPU::SReg_288RegClass.contains(Reg)) {
+          IsSGPR = true;
+          Width = 9;
+        } else if (AMDGPU::AReg_288RegClass.contains(Reg)) {
+          IsSGPR = false;
+          IsAGPR = true;
+          Width = 9;
+        } else if (AMDGPU::VReg_320RegClass.contains(Reg)) {
+          IsSGPR = false;
+          Width = 10;
+        } else if (AMDGPU::SReg_320RegClass.contains(Reg)) {
+          IsSGPR = true;
+          Width = 10;
+        } else if (AMDGPU::AReg_320RegClass.contains(Reg)) {
+          IsSGPR = false;
+          IsAGPR = true;
+          Width = 10;
+        } else if (AMDGPU::VReg_352RegClass.contains(Reg)) {
+          IsSGPR = false;
+          Width = 11;
+        } else if (AMDGPU::SReg_352RegClass.contains(Reg)) {
+          IsSGPR = true;
+          Width = 11;
+        } else if (AMDGPU::AReg_352RegClass.contains(Reg)) {
+          IsSGPR = false;
+          IsAGPR = true;
+          Width = 11;
+        } else if (AMDGPU::VReg_384RegClass.contains(Reg)) {
+          IsSGPR = false;
+          Width = 12;
+        } else if (AMDGPU::SReg_384RegClass.contains(Reg)) {
+          IsSGPR = true;
+          Width = 12;
+        } else if (AMDGPU::AReg_384RegClass.contains(Reg)) {
+          IsSGPR = false;
+          IsAGPR = true;
+          Width = 12;
+        } else if (AMDGPU::SReg_512RegClass.contains(Reg)) {
+          IsSGPR = true;
+          Width = 16;
+        } else if (AMDGPU::VReg_512RegClass.contains(Reg)) {
+          IsSGPR = false;
+          Width = 16;
+        } else if (AMDGPU::AReg_512RegClass.contains(Reg)) {
+          IsSGPR = false;
+          IsAGPR = true;
+          Width = 16;
+        } else if (AMDGPU::SReg_1024RegClass.contains(Reg)) {
+          IsSGPR = true;
+          Width = 32;
+        } else if (AMDGPU::VReg_1024RegClass.contains(Reg)) {
+          IsSGPR = false;
+          Width = 32;
+        } else if (AMDGPU::AReg_1024RegClass.contains(Reg)) {
+          IsSGPR = false;
+          IsAGPR = true;
+          Width = 32;
+        } else {
+          // We only expect TTMP registers or registers that do not belong to
+          // any RC.
+          assert((AMDGPU::TTMP_32RegClass.contains(Reg) ||
+                  AMDGPU::TTMP_64RegClass.contains(Reg) ||
+                  AMDGPU::TTMP_128RegClass.contains(Reg) ||
+                  AMDGPU::TTMP_256RegClass.contains(Reg) ||
+                  AMDGPU::TTMP_512RegClass.contains(Reg) ||
+                  !TRI.getPhysRegBaseClass(Reg)) &&
+                 "Unknown register class");
+        }
+        unsigned HWReg = TRI.getHWRegIndex(Reg);
+        int MaxUsed = HWReg + Width - 1;
+        if (IsSGPR) {
+          MaxSGPR = MaxUsed > MaxSGPR ? MaxUsed : MaxSGPR;
+        } else if (IsAGPR) {
+          MaxAGPR = MaxUsed > MaxAGPR ? MaxUsed : MaxAGPR;
+        } else {
+          MaxVGPR = MaxUsed > MaxVGPR ? MaxUsed : MaxVGPR;
+        }
+      }
+
       if (MI.isCall()) {
         // Pseudo used just to encode the underlying global. Is there a better
         // way to track this?
@@ -219,5 +464,9 @@ AMDGPUResourceUsageAnalysis::analyzeResourceUsage(
     }
   }
 
+  Info.NumExplicitSGPR = MaxSGPR + 1;
+  Info.NumVGPR = MaxVGPR + 1;
+  Info.NumAGPR = MaxAGPR + 1;
+
   return Info;
 }
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index 01718faaf5c2e..0e7635a045588 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -970,25 +970,10 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction,
     return NumUserSGPRs;
   }
 
-  // Get the number of preloaded SGPRs for compute kernels.
   unsigned getNumPreloadedSGPRs() const {
     return NumUserSGPRs + NumSystemSGPRs;
   }
 
-  // Get the number of preloaded VGPRs for compute kernels.
-  unsigned getNumPreloadedVGPRs() const {
-    if (hasWorkItemIDZ())
-      return ArgInfo.WorkItemIDZ.getRegister() - AMDGPU::VGPR0 + 1;
-
-    if (hasWorkItemIDY())
-      return ArgInfo.WorkItemIDY.getRegister() - AMDGPU::VGPR0 + 1;
-
-    if (hasWorkItemIDX())
-      return ArgInfo.WorkItemIDX.getRegister() - AMDGPU::VGPR0 + 1;
-
-    return 0;
-  }
-
   unsigned getNumKernargPreloadedSGPRs() const {
     return UserSGPRInfo.getNumKernargPreloadSGPRs();
   }
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index b76823a128e07..e41189adfb46f 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -4055,20 +4055,6 @@ SIRegisterInfo::getNumUsedPhysRegs(const MachineRegisterInfo &MRI,
   return 0;
 }
 
-unsigned
-SIRegisterInfo::getNumDefinedPhysRegs(const MachineRegisterInfo &MRI,
-                                      const TargetRegisterClass &RC) const {
-  for (MCPhysReg Reg : reverse(RC.getRegisters())) {
-    for (MCRegAliasIterator AI(Reg, this, true); AI.isValid(); ++AI) {
-      if (llvm::any_of(MRI.def_instructions(*AI), [](const MachineInstr &MI) {
-            return !MI.isImplicitDef();
-          }))
-        return getHWRegIndex(Reg) + 1;
-    }
-  }
-  return 0;
-}
-
 SmallVector<StringLiteral>
 SIRegisterInfo::getVRegFlagsOfReg(Register Reg,
                                   const MachineFunction &MF) const {
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
index 7726762ad0e6d..a4b135d5e0b59 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -486,11 +486,6 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo {
   unsigned getNumUsedPhysRegs(const MachineRegisterInfo &MRI,
                               const TargetRegisterClass &RC) const;
 
-  // \returns the number of registers of a given \p RC defined in a function.
-  // Does not go inside function calls.
-  unsigned getNumDefinedPhysRegs(const MachineRegisterInfo &MRI,
-                                 const TargetRegisterClass &RC) const;
-
   std::optional<uint8_t> getVRegFlagValue(StringRef Name) const override {
     return Name == "WWM_REG" ? AMDGPU::VirtRegFlag::WWM_REG
                              : std::optional<uint8_t>{};
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll
index bdd86c1af6248..9b35920f8547a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll
@@ -3059,7 +3059,7 @@ define amdgpu_kernel void @dyn_extract_v5f64_s_s(ptr addrspace(1) %out, i32 %sel
 ; GPRIDX-NEXT:     gds_segment_byte_size = 0
 ; GPRIDX-NEXT:     kernarg_segment_byte_size = 28
 ; GPRIDX-NEXT:     workgroup_fbarrier_count = 0
-; GPRIDX-NEXT:     wavefront_sgpr_count = 24
+; GPRIDX-NEXT:     wavefront_sgpr_count = 17
 ; GPRIDX-NEXT:     workitem_vgpr_count = 3
 ; GPRIDX-NEXT:     reserved_vgpr_first = 0
 ; GPRIDX-NEXT:     reserved_vgpr_count = 0
@@ -3202,7 +3202,7 @@ define amdgpu_kernel void @dyn_extract_v5f64_s_s(ptr addrspace(1) %out, i32 %sel
 ; GFX10-NEXT:     kernel_code_entry_byte_offset = 256
 ; GFX10-NEXT:     kernel_code_prefetch_byte_size = 0
 ; GFX10-NEXT:     granulated_workitem_vgpr_count = 0
-; GFX10-NEXT:     granulated_wavefront_sgpr_count = 2
+; GFX10-NEXT:     granulated_wavefront_sgpr_count = 1
 ; GFX10-NEXT:     priority = 0
 ; GFX10-NEXT:     float_mode = 240
 ; GFX10-NEXT:     priv = 0
@@ -3245,7 +3245,7 @@ define amdgpu_kernel void @dyn_extract_v5f64_s_s(ptr addrspace(1) %out, i32 %sel
 ; GFX10-NEXT:     gds_segment_byte_size = 0
 ; GFX10-NEXT:     kernarg_segment_byte_size = 28
 ; GFX10-NEXT:     workgroup_fbarrier_count = 0
-; GFX10-NEXT:     wavefront_sgpr_count = 18
+; GFX10-NEXT:     wavefront_sgpr_count = 10
 ; GFX10-NEXT:     workitem_vgpr_count = 3
 ; GFX10-NEXT:     reserved_vgpr_first = 0
 ; GFX10-NEXT:     reserved_vgpr_count = 0
@@ -3294,7 +3294,7 @@ define amdgpu_kernel void @dyn_extract_v5f64_s_s(ptr addrspace(1) %out, i32 %sel
 ; GFX11-NEXT:     kernel_code_entry_byte_offset = 256
 ; GFX11-NEXT:     kernel_code_prefetch_byte_size = 0
 ; GFX11-NEXT:     granulated_workitem_vgpr_count = 0
-; GFX11-NEXT:     granulated_wavefront_sgpr_count = 1
+; GFX11-NEXT:     granulated_wavefront_sgpr_count = 0
 ; GFX11-NEXT:     priority = 0
 ; GFX11-NEXT:     float_mode = 240
 ; GFX11-NEXT:     priv = 0
@@ -3337,7 +3337,7 @@ define amdgpu_kernel void @dyn_extract_v5f64_s_s(ptr addrspace(1) %out, i32 %sel
 ; GFX11-NEXT:     gds_segment_byte_size = 0
 ; GFX11-NEXT:     kernarg_segment_byte_size = 28
 ; GFX11-NEXT:     workgroup_fbarrier_count = 0
-; GFX11-NEXT:     wavefront_sgpr_count = 16
+; GFX11-NEXT:     wavefront_sgpr_count = 7
 ; GFX11-NEXT:     workitem_vgpr_count = 3
 ; GFX11-NEXT:     reserved_vgpr_first = 0
 ; GFX11-NEXT:     reserved_vgpr_count = 0
@@ -4034,7 +4034,7 @@ define amdgpu_kernel void @dyn_e...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/144039


More information about the llvm-commits mailing list