[llvm] fd1d608 - [AMDGPU] Remove CC exception for Promote Alloca Limits
via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 12 23:48:39 PDT 2023
Author: pvanhout
Date: 2023-04-13T08:48:34+02:00
New Revision: fd1d60873fdce6e908c9865ddf925f2616fccd55
URL: https://github.com/llvm/llvm-project/commit/fd1d60873fdce6e908c9865ddf925f2616fccd55
DIFF: https://github.com/llvm/llvm-project/commit/fd1d60873fdce6e908c9865ddf925f2616fccd55.diff
LOG: [AMDGPU] Remove CC exception for Promote Alloca Limits
Apparently it was used to work around some issue that has been fixed.
Removing it helps with high scratch usage observed in some cases due to failed alloca promotion.
Reviewed By: rampitec
Differential Revision: https://reviews.llvm.org/D145586
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
llvm/test/CodeGen/AMDGPU/vector-alloca-limits.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index 2fe5fbebf7c19..27392aba20de1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -124,6 +124,14 @@ class AMDGPUPromoteAllocaToVector : public FunctionPass {
}
};
+unsigned getMaxVGPRs(const TargetMachine &TM, const Function &F) {
+ if (!TM.getTargetTriple().isAMDGCN())
+ return 128;
+
+ const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
+ return ST.getMaxNumVGPRs(ST.getWavesPerEU(F).first);
+}
+
} // end anonymous namespace
char AMDGPUPromoteAlloca::ID = 0;
@@ -176,16 +184,7 @@ bool AMDGPUPromoteAllocaImpl::run(Function &F) {
if (!ST.isPromoteAllocaEnabled())
return false;
- if (IsAMDGCN) {
- const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
- MaxVGPRs = ST.getMaxNumVGPRs(ST.getWavesPerEU(F).first);
- // A non-entry function has only 32 caller preserved registers.
- // Do not promote alloca which will force spilling.
- if (!AMDGPU::isEntryFunctionCC(F.getCallingConv()))
- MaxVGPRs = std::min(MaxVGPRs, 32u);
- } else {
- MaxVGPRs = 128;
- }
+ MaxVGPRs = getMaxVGPRs(TM, F);
bool SufficientLDS = hasSufficientLocalMem(F);
bool Changed = false;
@@ -1200,17 +1199,7 @@ bool promoteAllocasToVector(Function &F, TargetMachine &TM) {
if (!ST.isPromoteAllocaEnabled())
return false;
- unsigned MaxVGPRs;
- if (TM.getTargetTriple().getArch() == Triple::amdgcn) {
- const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
- MaxVGPRs = ST.getMaxNumVGPRs(ST.getWavesPerEU(F).first);
- // A non-entry function has only 32 caller preserved registers.
- // Do not promote alloca which will force spilling.
- if (!AMDGPU::isEntryFunctionCC(F.getCallingConv()))
- MaxVGPRs = std::min(MaxVGPRs, 32u);
- } else {
- MaxVGPRs = 128;
- }
+ const unsigned MaxVGPRs = getMaxVGPRs(TM, F);
bool Changed = false;
BasicBlock &EntryBB = *F.begin();
diff --git a/llvm/test/CodeGen/AMDGPU/vector-alloca-limits.ll b/llvm/test/CodeGen/AMDGPU/vector-alloca-limits.ll
index 3afc22a5fac2e..dccf1c7021a37 100644
--- a/llvm/test/CodeGen/AMDGPU/vector-alloca-limits.ll
+++ b/llvm/test/CodeGen/AMDGPU/vector-alloca-limits.ll
@@ -139,8 +139,8 @@ entry:
}
; OPT-LABEL: @func_alloca_9xi64_max256(
-; OPT: alloca
-; OPT-NOT: <9 x i64>
+; OPT-NOT: alloca
+; OPT: <9 x i64>
; LIMIT32: alloca
; LIMIT32-NOT: <9 x i64>
define void @func_alloca_9xi64_max256(ptr addrspace(1) %out, i32 %index) #2 {
More information about the llvm-commits
mailing list