[llvm] 0e0ec4c - Revert "AMDGPU/PromoteAlloca: Simplify how deferred loads work (#170510)"

Fri Dec 5 13:23:45 PST 2025

Author: Nicolai Hähnle
Date: 2025-12-05T13:23:05-08:00
New Revision: 0e0ec4c348670864e18224dc99dd615c0d7e72f8

URL: https://github.com/llvm/llvm-project/commit/0e0ec4c348670864e18224dc99dd615c0d7e72f8
DIFF: https://github.com/llvm/llvm-project/commit/0e0ec4c348670864e18224dc99dd615c0d7e72f8.diff

LOG: Revert "AMDGPU/PromoteAlloca: Simplify how deferred loads work (#170510)"

This reverts commit 22a2c27a0aa0d3aa5d4222f6e766646166450543.

Failure on clang-hip-vega20: https://lab.llvm.org/buildbot/#/builders/123/builds/31779

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index 73ec607014d31..77db14513254f 100644

--- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -502,14 +502,27 @@ static Value *promoteAllocaUserToVector(
     Instruction *Inst, const DataLayout &DL, FixedVectorType *VectorTy,
     unsigned VecStoreSize, unsigned ElementSize,
     DenseMap<MemTransferInst *, MemTransferInfo> &TransferInfo,
-    std::map<GetElementPtrInst *, WeakTrackingVH> &GEPVectorIdx,
-    function_ref<Value *()> GetCurVal) {
+    std::map<GetElementPtrInst *, WeakTrackingVH> &GEPVectorIdx, Value *CurVal,
+    SmallVectorImpl<LoadInst *> &DeferredLoads) {
   // Note: we use InstSimplifyFolder because it can leverage the DataLayout
   // to do more folding, especially in the case of vector splats.
   IRBuilder<InstSimplifyFolder> Builder(Inst->getContext(),
                                         InstSimplifyFolder(DL));
   Builder.SetInsertPoint(Inst);
 
+  const auto GetOrLoadCurrentVectorValue = [&]() -> Value * {
+    if (CurVal)
+      return CurVal;
+
+    // If the current value is not known, insert a dummy load and lower it on
+    // the second pass.
+    LoadInst *Dummy =
+        Builder.CreateLoad(VectorTy, PoisonValue::get(Builder.getPtrTy()),
+                           "promotealloca.dummyload");
+    DeferredLoads.push_back(Dummy);
+    return Dummy;
+  };
+
   const auto CreateTempPtrIntCast = [&Builder, DL](Value *Val,
                                                    Type *PtrTy) -> Value * {
     assert(DL.getTypeStoreSize(Val->getType()) == DL.getTypeStoreSize(PtrTy));
@@ -529,7 +542,12 @@ static Value *promoteAllocaUserToVector(
 
   switch (Inst->getOpcode()) {
   case Instruction::Load: {
-    Value *CurVal = GetCurVal();
+    // Loads can only be lowered if the value is known.
+    if (!CurVal) {
+      DeferredLoads.push_back(cast<LoadInst>(Inst));
+      return nullptr;
+    }
+
     Value *Index = calculateVectorIndex(
         cast<LoadInst>(Inst)->getPointerOperand(), GEPVectorIdx);
 
@@ -619,7 +637,7 @@ static Value *promoteAllocaUserToVector(
 
       Val = Builder.CreateBitOrPointerCast(Val, SubVecTy);
 
-      Value *CurVec = GetCurVal();
+      Value *CurVec = GetOrLoadCurrentVectorValue();
       for (unsigned K = 0, NumElts = std::min(NumWrittenElts, NumVecElts);
            K < NumElts; ++K) {
         Value *CurIdx =
@@ -632,7 +650,8 @@ static Value *promoteAllocaUserToVector(
 
     if (Val->getType() != VecEltTy)
       Val = Builder.CreateBitOrPointerCast(Val, VecEltTy);
-    return Builder.CreateInsertElement(GetCurVal(), Val, Index);
+    return Builder.CreateInsertElement(GetOrLoadCurrentVectorValue(), Val,
+                                       Index);
   }
   case Instruction::Call: {
     if (auto *MTI = dyn_cast<MemTransferInst>(Inst)) {
@@ -654,7 +673,7 @@ static Value *promoteAllocaUserToVector(
         }
       }
 
-      return Builder.CreateShuffleVector(GetCurVal(), Mask);
+      return Builder.CreateShuffleVector(GetOrLoadCurrentVectorValue(), Mask);
     }
 
     if (auto *MSI = dyn_cast<MemSetInst>(Inst)) {
@@ -1019,44 +1038,37 @@ bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(AllocaInst &Alloca) {
 
   Updater.AddAvailableValue(EntryBB, AllocaInitValue);
 
-  // First handle the initial worklist, in basic block order.
-  //
-  // Insert a placeholder whenever we need the vector value at the top of a
-  // basic block.
-  SmallVector<Instruction *> Placeholders;
+  // First handle the initial worklist.
+  SmallVector<LoadInst *, 4> DeferredLoads;
   forEachWorkListItem(WorkList, [&](Instruction *I) {
     BasicBlock *BB = I->getParent();
-    auto GetCurVal = [&]() -> Value * {
-      if (Value *CurVal = Updater.FindValueForBlock(BB))
-        return CurVal;
-
-      // If the current value in the basic block is not yet known, insert a
-      // placeholder that we will replace later.
-      IRBuilder<> Builder(I);
-      auto *Placeholder = cast<Instruction>(Builder.CreateFreeze(
-          PoisonValue::get(VectorTy), "promotealloca.placeholder"));
-      Placeholders.push_back(Placeholder);
-      Updater.AddAvailableValue(BB, Placeholder);
-      return Placeholder;
-    };
-
-    Value *Result =
-        promoteAllocaUserToVector(I, *DL, VectorTy, VecStoreSize, ElementSize,
-                                  TransferInfo, GEPVectorIdx, GetCurVal);
+    // On the first pass, we only take values that are trivially known, i.e.
+    // where AddAvailableValue was already called in this block.
+    Value *Result = promoteAllocaUserToVector(
+        I, *DL, VectorTy, VecStoreSize, ElementSize, TransferInfo, GEPVectorIdx,
+        Updater.FindValueForBlock(BB), DeferredLoads);
     if (Result)
       Updater.AddAvailableValue(BB, Result);
   });
 
-  // Now fixup the placeholders.
-  for (Instruction *Placeholder : Placeholders) {
-    Placeholder->replaceAllUsesWith(
-        Updater.GetValueInMiddleOfBlock(Placeholder->getParent()));
-    Placeholder->eraseFromParent();
-  }
+  // Then handle deferred loads.
+  forEachWorkListItem(DeferredLoads, [&](Instruction *I) {
+    SmallVector<LoadInst *, 0> NewDLs;
+    BasicBlock *BB = I->getParent();
+    // On the second pass, we use GetValueInMiddleOfBlock to guarantee we always
+    // get a value, inserting PHIs as needed.
+    Value *Result = promoteAllocaUserToVector(
+        I, *DL, VectorTy, VecStoreSize, ElementSize, TransferInfo, GEPVectorIdx,
+        Updater.GetValueInMiddleOfBlock(I->getParent()), NewDLs);
+    if (Result)
+      Updater.AddAvailableValue(BB, Result);
+    assert(NewDLs.empty() && "No more deferred loads should be queued!");
+  });
 
   // Delete all instructions. On the first pass, new dummy loads may have been
   // added so we need to collect them too.
   DenseSet<Instruction *> InstsToDelete(WorkList.begin(), WorkList.end());
+  InstsToDelete.insert_range(DeferredLoads);
   for (Instruction *I : InstsToDelete) {
     assert(I->use_empty());
     I->eraseFromParent();