[llvm] Limit Alloca->LDS promotion based on speculations as to eventual register pressure (PR #152814)
Patrick Simmons via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 8 16:43:14 PDT 2025
https://github.com/linuxrocks123 created https://github.com/llvm/llvm-project/pull/152814
This attempts to resolve SWDEV-547512 by inhibiting alloca to LDS promotion when register pressure is low. Draft because:
- There are no tests.
- The heuristic probably needs to be refined.
- This may not even be a good idea; performance testing must be completed to determine that.
>From cecde92e51ffbba859851ad6dd58adf8cf88f909 Mon Sep 17 00:00:00 2001
From: Patrick Simmons <psimmons at pensando.io>
Date: Fri, 8 Aug 2025 18:37:07 -0500
Subject: [PATCH] Limit Alloca->LDS promotion based on speculations as to
eventual register pressure
---
.../lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp | 88 +++++++++++++++++++
1 file changed, 88 insertions(+)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index f226c7f381aa2..0dd327207fed6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -27,7 +27,9 @@
#include "AMDGPU.h"
#include "GCNSubtarget.h"
+#include "SIRegisterInfo.h"
#include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/InstSimplifyFolder.h"
@@ -36,6 +38,7 @@
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/IntrinsicsR600.h"
@@ -45,6 +48,9 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include <algorithm>
+#include <unordered_set>
+
#define DEBUG_TYPE "amdgpu-promote-alloca"
using namespace llvm;
@@ -100,6 +106,14 @@ class AMDGPUPromoteAllocaImpl {
unsigned VGPRBudgetRatio;
unsigned MaxVectorRegs;
+ std::unordered_map<BasicBlock *, std::unordered_set<Instruction *>>
+ SGPRLiveIns;
+ size_t getSGPRPressureEstimate(AllocaInst &I);
+
+ std::unordered_map<BasicBlock *, std::unordered_set<Instruction *>>
+ VGPRLiveIns;
+ size_t getVGPRPressureEstimate(AllocaInst &I);
+
bool IsAMDGCN = false;
bool IsAMDHSA = false;
@@ -1471,9 +1485,83 @@ bool AMDGPUPromoteAllocaImpl::hasSufficientLocalMem(const Function &F) {
return true;
}
+size_t AMDGPUPromoteAllocaImpl::getSGPRPressureEstimate(AllocaInst &I) {
+ Function &F = *I.getParent()->getParent();
+ size_t MaxLive = 0;
+ for (BasicBlock *BB : post_order(&F)) {
+ if (SGPRLiveIns.count(BB))
+ continue;
+
+ std::unordered_set<Instruction *> CurrentlyLive;
+ for (BasicBlock *SuccBB : successors(BB))
+ if (SGPRLiveIns.count(SuccBB))
+ for (const auto &R : SGPRLiveIns[SuccBB])
+ CurrentlyLive.insert(R);
+
+ for (auto RIt = BB->rbegin(); RIt != BB->rend(); RIt++) {
+ if (&*RIt == &I)
+ return (MaxLive + CurrentlyLive.size()) / 2;
+
+ MaxLive = std::max(MaxLive, CurrentlyLive.size());
+
+ for (auto &Op : RIt->operands())
+ if (!Op.get()->getType()->isVectorTy())
+ if (Instruction *U = dyn_cast<Instruction>(Op))
+ CurrentlyLive.insert(U);
+
+ if (!RIt->getType()->isVectorTy())
+ CurrentlyLive.erase(&*RIt);
+ }
+ }
+
+ llvm_unreachable("Woops, we fell off the edge of the world. Bye bye.");
+}
+
+size_t AMDGPUPromoteAllocaImpl::getVGPRPressureEstimate(AllocaInst &I) {
+ Function &F = *I.getParent()->getParent();
+ size_t MaxLive = 0;
+ for (BasicBlock *BB : post_order(&F)) {
+ if (VGPRLiveIns.count(BB))
+ continue;
+
+ std::unordered_set<Instruction *> CurrentlyLive;
+ for (BasicBlock *SuccBB : successors(BB))
+ if (VGPRLiveIns.count(SuccBB))
+ for (const auto &R : VGPRLiveIns[SuccBB])
+ CurrentlyLive.insert(R);
+
+ for (auto RIt = BB->rbegin(); RIt != BB->rend(); RIt++) {
+ if (&*RIt == &I)
+ return (MaxLive + CurrentlyLive.size() / 2);
+
+ MaxLive = std::max(MaxLive, CurrentlyLive.size());
+
+ for (auto &Op : RIt->operands())
+ if (Op.get()->getType()->isVectorTy())
+ if (Instruction *U = dyn_cast<Instruction>(Op))
+ CurrentlyLive.insert(U);
+
+ if (RIt->getType()->isVectorTy())
+ CurrentlyLive.erase(&*RIt);
+ }
+ }
+
+ llvm_unreachable("Woops, we fell off the edge of the world. Bye bye.");
+}
+
// FIXME: Should try to pick the most likely to be profitable allocas first.
bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToLDS(AllocaInst &I,
bool SufficientLDS) {
+ const unsigned SGPRPressureLimit = AMDGPU::SGPR_32RegClass.getNumRegs();
+ const unsigned VGPRPressureLimit = AMDGPU::VGPR_32RegClass.getNumRegs();
+
+ if (getSGPRPressureEstimate(I) < SGPRPressureLimit &&
+ getVGPRPressureEstimate(I) < VGPRPressureLimit) {
+ LLVM_DEBUG(dbgs() << "Declining to promote " << I
+ << " to LDS since pressure is relatively low.\n");
+ return false;
+ }
+
LLVM_DEBUG(dbgs() << "Trying to promote to LDS: " << I << '\n');
if (DisablePromoteAllocaToLDS) {
More information about the llvm-commits
mailing list