[llvm] c27e814 - [AMDGPU][IndirectCalls] Fix register usage propagation for indirect/external calls

via llvm-commits llvm-commits at lists.llvm.org
Fri Jun 11 23:29:49 PDT 2021


Author: madhur13490
Date: 2021-06-12T11:59:34+05:30
New Revision: c27e8141b3d1265d2ab1cb951c4330b961fab9ee

URL: https://github.com/llvm/llvm-project/commit/c27e8141b3d1265d2ab1cb951c4330b961fab9ee
DIFF: https://github.com/llvm/llvm-project/commit/c27e8141b3d1265d2ab1cb951c4330b961fab9ee.diff

LOG: [AMDGPU][IndirectCalls] Fix register usage propagation for indirect/external calls

This patch computes max SGPRs and VGPRs used by module
in presence of indirect calls and makes that
as register requirement for functions/kernels
which makes indirect calls.

This patch also refactors code AMDGPUSubTarget.cpp
which add a "base" variants of getMaxNumSGPRs which
is used by MachineFunction and new Function version.

Reviewed By: arsenm

Differential Revision: https://reviews.llvm.org/D103636

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
    llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
    llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
    llvm/lib/Target/AMDGPU/GCNSubtarget.h
    llvm/test/CodeGen/AMDGPU/agpr-register-count.ll
    llvm/test/CodeGen/AMDGPU/amdpal-callable.ll
    llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll
    llvm/test/CodeGen/AMDGPU/indirect-call.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 8a987f0288bee..d3ac254d7e83c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -627,6 +627,21 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
   return false;
 }
 
+bool AMDGPUAsmPrinter::doInitialization(Module &M) {
+  NonKernelMaxSGPRs = 0;
+  NonKernelMaxVGPRs = 0;
+  // Compute upper bound on the number of SGPRs and VGPRs
+  // for non-kernel functions.
+  for (const Function &F : M) {
+    if (!AMDGPU::isEntryFunctionCC(F.getCallingConv())) {
+      const GCNSubtarget &STM = TM.getSubtarget<GCNSubtarget>(F);
+      NonKernelMaxSGPRs = std::max(NonKernelMaxSGPRs, STM.getMaxNumSGPRs(F));
+      NonKernelMaxVGPRs = std::max(NonKernelMaxVGPRs, STM.getMaxNumVGPRs(F));
+    }
+  }
+  return AsmPrinter::doInitialization(M);
+}
+
 // TODO: Fold this into emitFunctionBodyStart.
 void AMDGPUAsmPrinter::initializeTargetID(const Module &M) {
   // In the beginning all features are either 'Any' or 'NotSupported',
@@ -1020,14 +1035,20 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
               AMDGPU::isEntryFunctionCC(Callee->getCallingConv()))
             report_fatal_error("invalid call to entry function");
 
-          // If this is a call to an external function, we can't do much. Make
-          // conservative guesses.
-
-          // 48 SGPRs - vcc, - flat_scr, -xnack
-          int MaxSGPRGuess =
-            47 - IsaInfo::getNumExtraSGPRs(&ST, true, ST.hasFlatAddressSpace());
-          MaxSGPR = std::max(MaxSGPR, MaxSGPRGuess);
-          MaxVGPR = std::max(MaxVGPR, 23);
+          unsigned ExtraSGPRs = IsaInfo::getNumExtraSGPRs(
+              TM.getMCSubtargetInfo(), false, ST.hasFlatAddressSpace());
+          // If this is a call to an external function, we put the
+          // max values computed in doInitialization().
+          // Subtract extra SGPRs in case of indirect calls.
+          // For indirect calls, we take the max for the module
+          // and use that as the register budget for functions
+          // which makes an indirect calls. This max value
+          // includes extra SGPRs too (e.g. flatscratch and vcc).
+          // which are getting added later.
+          // Subtract them here so that they don't get added twice.
+          MaxSGPR = NonKernelMaxSGPRs - ExtraSGPRs - 1;
+          MaxVGPR = NonKernelMaxVGPRs - 1;
+          // TODO: handle AGPRs
           MaxAGPR = std::max(MaxAGPR, 23);
 
           CalleeFrameSize = std::max(CalleeFrameSize,

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
index fc29f9f2051cf..ac7f5f29e5d25 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
@@ -58,6 +58,8 @@ class AMDGPUAsmPrinter final : public AsmPrinter {
 
   void initializeTargetID(const Module &M);
 
+  bool doInitialization(Module &M) override;
+
   SIProgramInfo CurrentProgramInfo;
   DenseMap<const Function *, SIFunctionResourceInfo> CallGraphResourceInfo;
 
@@ -101,6 +103,11 @@ class AMDGPUAsmPrinter final : public AsmPrinter {
   explicit AMDGPUAsmPrinter(TargetMachine &TM,
                             std::unique_ptr<MCStreamer> Streamer);
 
+  // To memoize max SGPR usage of non-kernel functions of the module.
+  unsigned NonKernelMaxSGPRs = 0;
+  // To memoize max VGPR usage of non-kernel functions of the module.
+  unsigned NonKernelMaxVGPRs = 0;
+
   StringRef getPassName() const override;
 
   const MCSubtargetInfo* getGlobalSTI() const;

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 3540a10f2a040..1728f47258588 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -698,12 +698,12 @@ unsigned GCNSubtarget::getOccupancyWithNumVGPRs(unsigned VGPRs) const {
   return std::min(std::max(getTotalNumVGPRs() / RoundedRegs, 1u), MaxWaves);
 }
 
-unsigned GCNSubtarget::getReservedNumSGPRs(const MachineFunction &MF) const {
-  const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
+unsigned
+GCNSubtarget::getBaseReservedNumSGPRs(const bool HasFlatScratchInit) const {
   if (getGeneration() >= AMDGPUSubtarget::GFX10)
     return 2; // VCC. FLAT_SCRATCH and XNACK are no longer in SGPRs.
 
-  if (MFI.hasFlatScratchInit()) {
+  if (HasFlatScratchInit) {
     if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
       return 6; // FLAT_SCRATCH, XNACK, VCC (in that order).
     if (getGeneration() == AMDGPUSubtarget::SEA_ISLANDS)
@@ -715,6 +715,26 @@ unsigned GCNSubtarget::getReservedNumSGPRs(const MachineFunction &MF) const {
   return 2; // VCC.
 }
 
+unsigned GCNSubtarget::getReservedNumSGPRs(const MachineFunction &MF) const {
+  const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
+  return getBaseReservedNumSGPRs(MFI.hasFlatScratchInit());
+}
+
+unsigned GCNSubtarget::getReservedNumSGPRs(const Function &F) const {
+  // The logic to detect if the function has
+  // flat scratch init is same as how MachineFunctionInfo derives.
+  bool FunctionHasFlatScratchInit = false;
+  bool HasCalls = F.hasFnAttribute("amdgpu-calls");
+  bool HasStackObjects = F.hasFnAttribute("amdgpu-stack-objects");
+  if (hasFlatAddressSpace() && AMDGPU::isEntryFunctionCC(F.getCallingConv()) &&
+      (isAmdHsaOrMesa(F) || enableFlatScratch()) &&
+      !flatScratchIsArchitected()) {
+    if (HasCalls || HasStackObjects || enableFlatScratch())
+      FunctionHasFlatScratchInit = true;
+  }
+  return getBaseReservedNumSGPRs(FunctionHasFlatScratchInit);
+}
+
 unsigned GCNSubtarget::computeOccupancy(const Function &F, unsigned LDSSize,
                                         unsigned NumSGPRs,
                                         unsigned NumVGPRs) const {
@@ -728,13 +748,11 @@ unsigned GCNSubtarget::computeOccupancy(const Function &F, unsigned LDSSize,
   return Occupancy;
 }
 
-unsigned GCNSubtarget::getMaxNumSGPRs(const MachineFunction &MF) const {
-  const Function &F = MF.getFunction();
-  const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
-
+unsigned GCNSubtarget::getBaseMaxNumSGPRs(
+    const Function &F, std::pair<unsigned, unsigned> WavesPerEU,
+    unsigned PreloadedSGPRs, unsigned ReservedNumSGPRs) const {
   // Compute maximum number of SGPRs function can use using default/requested
   // minimum number of waves per execution unit.
-  std::pair<unsigned, unsigned> WavesPerEU = MFI.getWavesPerEU();
   unsigned MaxNumSGPRs = getMaxNumSGPRs(WavesPerEU.first, false);
   unsigned MaxAddressableNumSGPRs = getMaxNumSGPRs(WavesPerEU.first, true);
 
@@ -745,7 +763,7 @@ unsigned GCNSubtarget::getMaxNumSGPRs(const MachineFunction &MF) const {
       F, "amdgpu-num-sgpr", MaxNumSGPRs);
 
     // Make sure requested value does not violate subtarget's specifications.
-    if (Requested && (Requested <= getReservedNumSGPRs(MF)))
+    if (Requested && (Requested <= ReservedNumSGPRs))
       Requested = 0;
 
     // If more SGPRs are required to support the input user/system SGPRs,
@@ -755,7 +773,7 @@ unsigned GCNSubtarget::getMaxNumSGPRs(const MachineFunction &MF) const {
     // of reserved special registers in total. Theoretically you could re-use
     // the last input registers for these special registers, but this would
     // require a lot of complexity to deal with the weird aliasing.
-    unsigned InputNumSGPRs = MFI.getNumPreloadedSGPRs();
+    unsigned InputNumSGPRs = PreloadedSGPRs;
     if (Requested && Requested < InputNumSGPRs)
       Requested = InputNumSGPRs;
 
@@ -774,17 +792,43 @@ unsigned GCNSubtarget::getMaxNumSGPRs(const MachineFunction &MF) const {
   if (hasSGPRInitBug())
     MaxNumSGPRs = AMDGPU::IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
 
-  return std::min(MaxNumSGPRs - getReservedNumSGPRs(MF),
-                  MaxAddressableNumSGPRs);
+  return std::min(MaxNumSGPRs - ReservedNumSGPRs, MaxAddressableNumSGPRs);
 }
 
-unsigned GCNSubtarget::getMaxNumVGPRs(const MachineFunction &MF) const {
+unsigned GCNSubtarget::getMaxNumSGPRs(const MachineFunction &MF) const {
   const Function &F = MF.getFunction();
   const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
+  return getBaseMaxNumSGPRs(F, MFI.getWavesPerEU(), MFI.getNumPreloadedSGPRs(),
+                            getReservedNumSGPRs(MF));
+}
+
+static unsigned getMaxNumPreloadedSGPRs() {
+  // Max number of user SGPRs
+  unsigned MaxUserSGPRs = 4 + // private segment buffer
+                          2 + // Dispatch ptr
+                          2 + // queue ptr
+                          2 + // kernel segment ptr
+                          2 + // dispatch ID
+                          2 + // flat scratch init
+                          2;  // Implicit buffer ptr
+  // Max number of system SGPRs
+  unsigned MaxSystemSGPRs = 1 + // WorkGroupIDX
+                            1 + // WorkGroupIDY
+                            1 + // WorkGroupIDZ
+                            1 + // WorkGroupInfo
+                            1;  // private segment wave byte offset
+  return MaxUserSGPRs + MaxSystemSGPRs;
+}
+
+unsigned GCNSubtarget::getMaxNumSGPRs(const Function &F) const {
+  return getBaseMaxNumSGPRs(F, getWavesPerEU(F), getMaxNumPreloadedSGPRs(),
+                            getReservedNumSGPRs(F));
+}
 
+unsigned GCNSubtarget::getBaseMaxNumVGPRs(
+    const Function &F, std::pair<unsigned, unsigned> WavesPerEU) const {
   // Compute maximum number of VGPRs function can use using default/requested
   // minimum number of waves per execution unit.
-  std::pair<unsigned, unsigned> WavesPerEU = MFI.getWavesPerEU();
   unsigned MaxNumVGPRs = getMaxNumVGPRs(WavesPerEU.first);
 
   // Check if maximum number of VGPRs was explicitly requested using
@@ -811,6 +855,16 @@ unsigned GCNSubtarget::getMaxNumVGPRs(const MachineFunction &MF) const {
   return MaxNumVGPRs;
 }
 
+unsigned GCNSubtarget::getMaxNumVGPRs(const Function &F) const {
+  return getBaseMaxNumVGPRs(F, getWavesPerEU(F));
+}
+
+unsigned GCNSubtarget::getMaxNumVGPRs(const MachineFunction &MF) const {
+  const Function &F = MF.getFunction();
+  const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
+  return getBaseMaxNumVGPRs(F, MFI.getWavesPerEU());
+}
+
 void GCNSubtarget::adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use,
                                          int UseOpIdx, SDep &Dep) const {
   if (Dep.getKind() != SDep::Kind::Data || !Dep.getReg() ||

diff  --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 1cf4043e46745..39abb00680b3b 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -1033,9 +1033,24 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
     return AMDGPU::IsaInfo::getMaxNumSGPRs(this, WavesPerEU, Addressable);
   }
 
-  /// \returns Reserved number of SGPRs for given function \p MF.
+  /// \returns Reserved number of SGPRs. This is common
+  /// utility function called by MachineFunction and
+  /// Function variants of getReservedNumSGPRs.
+  unsigned getBaseReservedNumSGPRs(const bool HasFlatScratchInit) const;
+  /// \returns Reserved number of SGPRs for given machine function \p MF.
   unsigned getReservedNumSGPRs(const MachineFunction &MF) const;
 
+  /// \returns Reserved number of SGPRs for given function \p F.
+  unsigned getReservedNumSGPRs(const Function &F) const;
+
+  /// \returns max num SGPRs. This is the common utility
+  /// function called by MachineFunction and Function
+  /// variants of getMaxNumSGPRs.
+  unsigned getBaseMaxNumSGPRs(const Function &F,
+                              std::pair<unsigned, unsigned> WavesPerEU,
+                              unsigned PreloadedSGPRs,
+                              unsigned ReservedNumSGPRs) const;
+
   /// \returns Maximum number of SGPRs that meets number of waves per execution
   /// unit requirement for function \p MF, or number of SGPRs explicitly
   /// requested using "amdgpu-num-sgpr" attribute attached to function \p MF.
@@ -1046,6 +1061,16 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
   /// unit requirement.
   unsigned getMaxNumSGPRs(const MachineFunction &MF) const;
 
+  /// \returns Maximum number of SGPRs that meets number of waves per execution
+  /// unit requirement for function \p F, or number of SGPRs explicitly
+  /// requested using "amdgpu-num-sgpr" attribute attached to function \p F.
+  ///
+  /// \returns Value that meets number of waves per execution unit requirement
+  /// if explicitly requested value cannot be converted to integer, violates
+  /// subtarget's specifications, or does not meet number of waves per execution
+  /// unit requirement.
+  unsigned getMaxNumSGPRs(const Function &F) const;
+
   /// \returns VGPR allocation granularity supported by the subtarget.
   unsigned getVGPRAllocGranule() const {
     return AMDGPU::IsaInfo::getVGPRAllocGranule(this);
@@ -1078,6 +1103,20 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
     return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU);
   }
 
+  /// \returns max num VGPRs. This is the common utility function
+  /// called by MachineFunction and Function variants of getMaxNumVGPRs.
+  unsigned getBaseMaxNumVGPRs(const Function &F,
+                              std::pair<unsigned, unsigned> WavesPerEU) const;
+  /// \returns Maximum number of VGPRs that meets number of waves per execution
+  /// unit requirement for function \p F, or number of VGPRs explicitly
+  /// requested using "amdgpu-num-vgpr" attribute attached to function \p F.
+  ///
+  /// \returns Value that meets number of waves per execution unit requirement
+  /// if explicitly requested value cannot be converted to integer, violates
+  /// subtarget's specifications, or does not meet number of waves per execution
+  /// unit requirement.
+  unsigned getMaxNumVGPRs(const Function &F) const;
+
   /// \returns Maximum number of VGPRs that meets number of waves per execution
   /// unit requirement for function \p MF, or number of VGPRs explicitly
   /// requested using "amdgpu-num-vgpr" attribute attached to function \p MF.

diff  --git a/llvm/test/CodeGen/AMDGPU/agpr-register-count.ll b/llvm/test/CodeGen/AMDGPU/agpr-register-count.ll
index fe031096479ad..3155013c7fbe9 100644
--- a/llvm/test/CodeGen/AMDGPU/agpr-register-count.ll
+++ b/llvm/test/CodeGen/AMDGPU/agpr-register-count.ll
@@ -154,21 +154,22 @@ bb:
 declare void @undef_func()
 
 ; GCN-LABEL: {{^}}kernel_call_undef_func:
-; GFX908: .amdhsa_next_free_vgpr 24
-; GFX90A: .amdhsa_next_free_vgpr 48
-; GFX90A: .amdhsa_accum_offset 24
-; GCN:    NumVgprs: 24
+; GFX908: .amdhsa_next_free_vgpr 128
+; GFX90A: .amdhsa_next_free_vgpr 280
+; GFX90A: .amdhsa_accum_offset 256
+; GCN908: NumVgprs: 128
+; GCN90A: NumVgprs: 256
 ; GCN:    NumAgprs: 24
-; GFX908: TotalNumVgprs: 24
-; GFX90A: TotalNumVgprs: 48
-; GFX908: VGPRBlocks: 5
-; GFX90A: VGPRBlocks: 5
-; GFX908: NumVGPRsForWavesPerEU: 24
-; GFX90A: NumVGPRsForWavesPerEU: 48
-; GFX90A: AccumOffset: 24
-; GFX908: Occupancy: 10
-; GFX90A: Occupancy: 8
-; GFX90A: COMPUTE_PGM_RSRC3_GFX90A:ACCUM_OFFSET: 5
+; GFX908: TotalNumVgprs: 128
+; GFX90A: TotalNumVgprs: 280
+; GFX908: VGPRBlocks: 31
+; GFX90A: VGPRBlocks: 34
+; GFX908: NumVGPRsForWavesPerEU: 128
+; GFX90A: NumVGPRsForWavesPerEU: 280
+; GFX90A: AccumOffset: 256
+; GFX908: Occupancy: 2
+; GFX90A: Occupancy: 1
+; GFX90A: COMPUTE_PGM_RSRC3_GFX90A:ACCUM_OFFSET: 63
 define amdgpu_kernel void @kernel_call_undef_func() #0 {
 bb:
   call void @undef_func()

diff  --git a/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll b/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll
index a1f4d67acb158..7ecc73406e1eb 100644
--- a/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll
@@ -147,9 +147,9 @@ attributes #0 = { nounwind }
 
 ; GCN: amdpal.pipelines:
 ; GCN-NEXT:  - .registers:
-; SDAG-NEXT:      0x2e12 (COMPUTE_PGM_RSRC1): 0xaf01ca{{$}}
+; SDAG-NEXT:      0x2e12 (COMPUTE_PGM_RSRC1): 0xaf03cf{{$}}
 ; SDAG-NEXT:      0x2e13 (COMPUTE_PGM_RSRC2): 0x8001{{$}}
-; GISEL-NEXT:      0x2e12 (COMPUTE_PGM_RSRC1): 0xaf01ce{{$}}
+; GISEL-NEXT:      0x2e12 (COMPUTE_PGM_RSRC1): 0xaf03cf{{$}}
 ; GISEL-NEXT:      0x2e13 (COMPUTE_PGM_RSRC2): 0x8001{{$}}
 ; GCN-NEXT:    .shader_functions:
 ; GCN-NEXT:      dynamic_stack:

diff  --git a/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll b/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll
index 7b5329b8d9cbe..d81b628a9c13b 100644
--- a/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll
@@ -227,10 +227,10 @@ define amdgpu_kernel void @usage_direct_recursion(i32 %n) #0 {
 ; Make sure there's no assert when a sgpr96 is used.
 ; GCN-LABEL: {{^}}count_use_sgpr96_external_call
 ; GCN: ; sgpr96 s[{{[0-9]+}}:{{[0-9]+}}]
-; CI: NumSgprs: 48
-; VI-NOBUG: NumSgprs: 48
+; CI: NumSgprs: 102
+; VI-NOBUG: NumSgprs: 102
 ; VI-BUG: NumSgprs: 96
-; GCN: NumVgprs: 24
+; GCN: NumVgprs: 64
 define amdgpu_kernel void @count_use_sgpr96_external_call()  {
 entry:
   tail call void asm sideeffect "; sgpr96 $0", "s"(<3 x i32> <i32 10, i32 11, i32 12>) #1
@@ -241,10 +241,10 @@ entry:
 ; Make sure there's no assert when a sgpr160 is used.
 ; GCN-LABEL: {{^}}count_use_sgpr160_external_call
 ; GCN: ; sgpr160 s[{{[0-9]+}}:{{[0-9]+}}]
-; CI: NumSgprs: 48
-; VI-NOBUG: NumSgprs: 48
+; CI: NumSgprs: 102
+; VI-NOBUG: NumSgprs: 102
 ; VI-BUG: NumSgprs: 96
-; GCN: NumVgprs: 24
+; GCN: NumVgprs: 64
 define amdgpu_kernel void @count_use_sgpr160_external_call()  {
 entry:
   tail call void asm sideeffect "; sgpr160 $0", "s"(<5 x i32> <i32 10, i32 11, i32 12, i32 13, i32 14>) #1
@@ -255,10 +255,10 @@ entry:
 ; Make sure there's no assert when a vgpr160 is used.
 ; GCN-LABEL: {{^}}count_use_vgpr160_external_call
 ; GCN: ; vgpr160 v[{{[0-9]+}}:{{[0-9]+}}]
-; CI: NumSgprs: 48
-; VI-NOBUG: NumSgprs: 48
+; CI: NumSgprs: 102
+; VI-NOBUG: NumSgprs: 102
 ; VI-BUG: NumSgprs: 96
-; GCN: NumVgprs: 24
+; GCN: NumVgprs: 64
 define amdgpu_kernel void @count_use_vgpr160_external_call()  {
 entry:
   tail call void asm sideeffect "; vgpr160 $0", "v"(<5 x i32> <i32 10, i32 11, i32 12, i32 13, i32 14>) #1

diff  --git a/llvm/test/CodeGen/AMDGPU/indirect-call.ll b/llvm/test/CodeGen/AMDGPU/indirect-call.ll
index 59039e5c650d2..707557b52398f 100644
--- a/llvm/test/CodeGen/AMDGPU/indirect-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/indirect-call.ll
@@ -15,8 +15,8 @@ define amdgpu_kernel void @test_indirect_call_sgpr_ptr() {
 ; GCN-NEXT:     amd_machine_version_stepping = 0
 ; GCN-NEXT:     kernel_code_entry_byte_offset = 256
 ; GCN-NEXT:     kernel_code_prefetch_byte_size = 0
-; GCN-NEXT:     granulated_workitem_vgpr_count = 7
-; GCN-NEXT:     granulated_wavefront_sgpr_count = 5
+; GCN-NEXT:     granulated_workitem_vgpr_count = 15
+; GCN-NEXT:     granulated_wavefront_sgpr_count = 12
 ; GCN-NEXT:     priority = 0
 ; GCN-NEXT:     float_mode = 240
 ; GCN-NEXT:     priv = 0
@@ -59,8 +59,8 @@ define amdgpu_kernel void @test_indirect_call_sgpr_ptr() {
 ; GCN-NEXT:     gds_segment_byte_size = 0
 ; GCN-NEXT:     kernarg_segment_byte_size = 0
 ; GCN-NEXT:     workgroup_fbarrier_count = 0
-; GCN-NEXT:     wavefront_sgpr_count = 48
-; GCN-NEXT:     workitem_vgpr_count = 32
+; GCN-NEXT:     wavefront_sgpr_count = 102
+; GCN-NEXT:     workitem_vgpr_count = 64
 ; GCN-NEXT:     reserved_vgpr_first = 0
 ; GCN-NEXT:     reserved_vgpr_count = 0
 ; GCN-NEXT:     reserved_sgpr_first = 0
@@ -111,8 +111,8 @@ define amdgpu_kernel void @test_indirect_call_sgpr_ptr_arg() {
 ; GCN-NEXT:     amd_machine_version_stepping = 0
 ; GCN-NEXT:     kernel_code_entry_byte_offset = 256
 ; GCN-NEXT:     kernel_code_prefetch_byte_size = 0
-; GCN-NEXT:     granulated_workitem_vgpr_count = 7
-; GCN-NEXT:     granulated_wavefront_sgpr_count = 5
+; GCN-NEXT:     granulated_workitem_vgpr_count = 15
+; GCN-NEXT:     granulated_wavefront_sgpr_count = 12
 ; GCN-NEXT:     priority = 0
 ; GCN-NEXT:     float_mode = 240
 ; GCN-NEXT:     priv = 0
@@ -155,8 +155,8 @@ define amdgpu_kernel void @test_indirect_call_sgpr_ptr_arg() {
 ; GCN-NEXT:     gds_segment_byte_size = 0
 ; GCN-NEXT:     kernarg_segment_byte_size = 0
 ; GCN-NEXT:     workgroup_fbarrier_count = 0
-; GCN-NEXT:     wavefront_sgpr_count = 48
-; GCN-NEXT:     workitem_vgpr_count = 32
+; GCN-NEXT:     wavefront_sgpr_count = 102
+; GCN-NEXT:     workitem_vgpr_count = 64
 ; GCN-NEXT:     reserved_vgpr_first = 0
 ; GCN-NEXT:     reserved_vgpr_count = 0
 ; GCN-NEXT:     reserved_sgpr_first = 0


        


More information about the llvm-commits mailing list