[llvm] [SimplifyCFG] Introduce a heuristic code sinker to fold phi expressions (PR #128171)

via llvm-commits llvm-commits at lists.llvm.org
Wed Apr 30 09:33:09 PDT 2025


================
@@ -2683,6 +2691,174 @@ static bool sinkCommonCodeFromPredecessors(BasicBlock *BB,
   return Changed;
 }
 
+using SinkCandidatesType = std::vector<std::vector<Instruction *>>;
+using HerusticGetCandidatesCallback = std::function<bool(
+    SinkCandidatesType &, bool &, BasicBlock *, DomTreeUpdater *)>;
+
+// There might be instruction like:
+// arr = alloca something
+// ...
+// phi (gep (arr ...), gep (arr ...), arr)
+// Transform the direct use of this arr to gep on this arr with zero offset and
+// of the same structure with previous phi. In the entry block of the function
+// we insert instruction like isomorphicGEP = gep arr 0, ...(following a set of
+// zero). And we only change the uses of the alloca in the specific PHINode.
+// Return true if there's any modification on the IR.
+static bool transformAllocaToIsomorphicGEP(PHINode &PN) {
+  // We need at least one GEP to construct a gep with zero offset for
+  // over-optimized alloca, if any.
+  GetElementPtrInst *oneGEP = nullptr;
+  for (auto &IncomingValue : PN.incoming_values())
+    if ((oneGEP = dyn_cast<GetElementPtrInst>(IncomingValue)))
+      break;
+
+  // If there is no GEP, we can't sink the instruction.
+  if (oneGEP == nullptr)
+    return false;
+
+  // There might be serveral identical alloca incoming from different blocks.
+  // We only need a same alloca.
+  AllocaInst *CandidateAlloca = nullptr;
+  // Get the function entry. We insert newly created isomorphicGEP there.
+  auto &EntryBB = PN.getParent()->getParent()->getEntryBlock();
+  auto NumOperands = oneGEP->getNumOperands();
+  for (auto &IncomingValue : PN.incoming_values()) {
+    if (auto AI = dyn_cast<AllocaInst>(IncomingValue)) {
+      // We only do this on static alloca. We fail the whole process.
+      if (!AI->isStaticAlloca())
+        return false;
+
+      if (CandidateAlloca != nullptr) {
+        if (CandidateAlloca == AI)
+          continue;
+        return false;
+      }
+      CandidateAlloca = AI;
+    }
+  }
+
+  if (CandidateAlloca == nullptr)
+    return false;
+
+  for (auto &IncomingValue : PN.incoming_values()) {
+    if (IncomingValue == CandidateAlloca)
+      continue;
+    if (auto GEPAsIncoming = dyn_cast<GetElementPtrInst>(IncomingValue)) {
+      if (GEPAsIncoming->getPointerOperand() != CandidateAlloca ||
+          GEPAsIncoming->getNumOperands() != NumOperands)
+        return false;
+    } else
+      return false;
+  }
+
+  // Change the alloca's use in this PHINode with corresonding gep.
+  GetElementPtrInst *IsomorphicGEP =
+      dyn_cast<GetElementPtrInst>(oneGEP->clone());
+  for (unsigned i = 1; i < NumOperands; ++i) {
+    auto IntTy = IsomorphicGEP->getOperand(i)->getType();
+    IsomorphicGEP->setOperand(i, ConstantInt::get(IntTy, 0));
+  }
+  PN.replaceUsesOfWith(CandidateAlloca, IsomorphicGEP);
+  IsomorphicGEP->insertBefore(EntryBB.getTerminator()->getIterator());
+
+  return true;
+}
+
+// Change phi (gep (same_ptr, ...), gep(same_ptr, ...), ...) to gep same_ptr
+// phi(...)
+static bool
+herusticGetPhiGEPCandidate(SinkCandidatesType &ResultsSinkCandidates,
+                           bool &Changed, BasicBlock *BB, DomTreeUpdater *DTU) {
+
+  // Collect all the predecessors.
+  auto Preds = std::vector<BasicBlock *>();
+  for (auto *PredBB : predecessors(BB))
+    Preds.push_back(PredBB);
+
+  // If it has less than 2 predecessor, just skip.
+  // TODO: Maybe we should only do for more predecessors.
+  if (Preds.size() < 2)
+    return false;
+
+  DenseMap<const Use *, SmallVector<Value *, 4>> PHIOperands;
+  for (PHINode &PN : BB->phis()) {
+    // If the same_ptr is an alloca, we try to transform it to gep with zero
+    // offset.
+    Changed |= transformAllocaToIsomorphicGEP(PN);
+    SmallDenseMap<BasicBlock *, const Use *, 4> IncomingVals;
+    for (const Use &U : PN.incoming_values())
+      IncomingVals.insert({PN.getIncomingBlock(U), &U});
+    auto &Ops = PHIOperands[IncomingVals[Preds[0]]];
+    for (BasicBlock *Pred : Preds)
+      Ops.push_back(*IncomingVals[Pred]);
----------------
sharkautarch wrote:

It seems like I've found a weird edge-case issue with the above line of code: when building llvm w/ a new enough llvm, this code started to indirectly cause segfaults.
This happens when there's a Pred BB that isn't one of the current PN's incoming values' BB
When that happens, `operator []()` creates a new (empty? not sure what constructor runs in this case) entry in `IncomingVals`, and appends an "empty" `Value` to a vector in `PHIOperands`

Simple patch I applied to fix this issue:
```
@@ -2789,8 +2789,9 @@
     for (const Use &U : PN.incoming_values())
       IncomingVals.insert({PN.getIncomingBlock(U), &U});
     auto &Ops = PHIOperands[IncomingVals[Preds[0]]];
-    for (BasicBlock *Pred : Preds)
-      Ops.push_back(*IncomingVals[Pred]);
+    for (BasicBlock *Pred : Preds)
+      if (auto&& iter = IncomingVals.find(Pred); iter != IncomingVals.end())
+        Ops.push_back( *(iter->getSecond()) );
   }
 
   SinkCandidatesType SinkCandidates;
``` 

https://github.com/llvm/llvm-project/pull/128171


More information about the llvm-commits mailing list