[llvm] 0e0ec4c - Revert "AMDGPU/PromoteAlloca: Simplify how deferred loads work (#170510)"
Nicolai Hähnle via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 5 13:23:45 PST 2025
Author: Nicolai Hähnle
Date: 2025-12-05T13:23:05-08:00
New Revision: 0e0ec4c348670864e18224dc99dd615c0d7e72f8
URL: https://github.com/llvm/llvm-project/commit/0e0ec4c348670864e18224dc99dd615c0d7e72f8
DIFF: https://github.com/llvm/llvm-project/commit/0e0ec4c348670864e18224dc99dd615c0d7e72f8.diff
LOG: Revert "AMDGPU/PromoteAlloca: Simplify how deferred loads work (#170510)"
This reverts commit 22a2c27a0aa0d3aa5d4222f6e766646166450543.
Failure on clang-hip-vega20: https://lab.llvm.org/buildbot/#/builders/123/builds/31779
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index 73ec607014d31..77db14513254f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -502,14 +502,27 @@ static Value *promoteAllocaUserToVector(
Instruction *Inst, const DataLayout &DL, FixedVectorType *VectorTy,
unsigned VecStoreSize, unsigned ElementSize,
DenseMap<MemTransferInst *, MemTransferInfo> &TransferInfo,
- std::map<GetElementPtrInst *, WeakTrackingVH> &GEPVectorIdx,
- function_ref<Value *()> GetCurVal) {
+ std::map<GetElementPtrInst *, WeakTrackingVH> &GEPVectorIdx, Value *CurVal,
+ SmallVectorImpl<LoadInst *> &DeferredLoads) {
// Note: we use InstSimplifyFolder because it can leverage the DataLayout
// to do more folding, especially in the case of vector splats.
IRBuilder<InstSimplifyFolder> Builder(Inst->getContext(),
InstSimplifyFolder(DL));
Builder.SetInsertPoint(Inst);
+ const auto GetOrLoadCurrentVectorValue = [&]() -> Value * {
+ if (CurVal)
+ return CurVal;
+
+ // If the current value is not known, insert a dummy load and lower it on
+ // the second pass.
+ LoadInst *Dummy =
+ Builder.CreateLoad(VectorTy, PoisonValue::get(Builder.getPtrTy()),
+ "promotealloca.dummyload");
+ DeferredLoads.push_back(Dummy);
+ return Dummy;
+ };
+
const auto CreateTempPtrIntCast = [&Builder, DL](Value *Val,
Type *PtrTy) -> Value * {
assert(DL.getTypeStoreSize(Val->getType()) == DL.getTypeStoreSize(PtrTy));
@@ -529,7 +542,12 @@ static Value *promoteAllocaUserToVector(
switch (Inst->getOpcode()) {
case Instruction::Load: {
- Value *CurVal = GetCurVal();
+ // Loads can only be lowered if the value is known.
+ if (!CurVal) {
+ DeferredLoads.push_back(cast<LoadInst>(Inst));
+ return nullptr;
+ }
+
Value *Index = calculateVectorIndex(
cast<LoadInst>(Inst)->getPointerOperand(), GEPVectorIdx);
@@ -619,7 +637,7 @@ static Value *promoteAllocaUserToVector(
Val = Builder.CreateBitOrPointerCast(Val, SubVecTy);
- Value *CurVec = GetCurVal();
+ Value *CurVec = GetOrLoadCurrentVectorValue();
for (unsigned K = 0, NumElts = std::min(NumWrittenElts, NumVecElts);
K < NumElts; ++K) {
Value *CurIdx =
@@ -632,7 +650,8 @@ static Value *promoteAllocaUserToVector(
if (Val->getType() != VecEltTy)
Val = Builder.CreateBitOrPointerCast(Val, VecEltTy);
- return Builder.CreateInsertElement(GetCurVal(), Val, Index);
+ return Builder.CreateInsertElement(GetOrLoadCurrentVectorValue(), Val,
+ Index);
}
case Instruction::Call: {
if (auto *MTI = dyn_cast<MemTransferInst>(Inst)) {
@@ -654,7 +673,7 @@ static Value *promoteAllocaUserToVector(
}
}
- return Builder.CreateShuffleVector(GetCurVal(), Mask);
+ return Builder.CreateShuffleVector(GetOrLoadCurrentVectorValue(), Mask);
}
if (auto *MSI = dyn_cast<MemSetInst>(Inst)) {
@@ -1019,44 +1038,37 @@ bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(AllocaInst &Alloca) {
Updater.AddAvailableValue(EntryBB, AllocaInitValue);
- // First handle the initial worklist, in basic block order.
- //
- // Insert a placeholder whenever we need the vector value at the top of a
- // basic block.
- SmallVector<Instruction *> Placeholders;
+ // First handle the initial worklist.
+ SmallVector<LoadInst *, 4> DeferredLoads;
forEachWorkListItem(WorkList, [&](Instruction *I) {
BasicBlock *BB = I->getParent();
- auto GetCurVal = [&]() -> Value * {
- if (Value *CurVal = Updater.FindValueForBlock(BB))
- return CurVal;
-
- // If the current value in the basic block is not yet known, insert a
- // placeholder that we will replace later.
- IRBuilder<> Builder(I);
- auto *Placeholder = cast<Instruction>(Builder.CreateFreeze(
- PoisonValue::get(VectorTy), "promotealloca.placeholder"));
- Placeholders.push_back(Placeholder);
- Updater.AddAvailableValue(BB, Placeholder);
- return Placeholder;
- };
-
- Value *Result =
- promoteAllocaUserToVector(I, *DL, VectorTy, VecStoreSize, ElementSize,
- TransferInfo, GEPVectorIdx, GetCurVal);
+ // On the first pass, we only take values that are trivially known, i.e.
+ // where AddAvailableValue was already called in this block.
+ Value *Result = promoteAllocaUserToVector(
+ I, *DL, VectorTy, VecStoreSize, ElementSize, TransferInfo, GEPVectorIdx,
+ Updater.FindValueForBlock(BB), DeferredLoads);
if (Result)
Updater.AddAvailableValue(BB, Result);
});
- // Now fixup the placeholders.
- for (Instruction *Placeholder : Placeholders) {
- Placeholder->replaceAllUsesWith(
- Updater.GetValueInMiddleOfBlock(Placeholder->getParent()));
- Placeholder->eraseFromParent();
- }
+ // Then handle deferred loads.
+ forEachWorkListItem(DeferredLoads, [&](Instruction *I) {
+ SmallVector<LoadInst *, 0> NewDLs;
+ BasicBlock *BB = I->getParent();
+ // On the second pass, we use GetValueInMiddleOfBlock to guarantee we always
+ // get a value, inserting PHIs as needed.
+ Value *Result = promoteAllocaUserToVector(
+ I, *DL, VectorTy, VecStoreSize, ElementSize, TransferInfo, GEPVectorIdx,
+ Updater.GetValueInMiddleOfBlock(I->getParent()), NewDLs);
+ if (Result)
+ Updater.AddAvailableValue(BB, Result);
+ assert(NewDLs.empty() && "No more deferred loads should be queued!");
+ });
// Delete all instructions. On the first pass, new dummy loads may have been
// added so we need to collect them too.
DenseSet<Instruction *> InstsToDelete(WorkList.begin(), WorkList.end());
+ InstsToDelete.insert_range(DeferredLoads);
for (Instruction *I : InstsToDelete) {
assert(I->use_empty());
I->eraseFromParent();
More information about the llvm-commits
mailing list