[llvm] [SimplifyCFG] Introduce a heuristic code sinker to fold phi expressions (PR #128171)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 10 07:03:51 PDT 2025
sharkautarch wrote:
@u4f3 Update: I had recently noticed crashes were still happening w/ this PR even w/ my previously mentioned patch applied
But now I think I've found the real root issue: things can break when `SinkCandidates` of type `std::vector<std::vector<Instruction*>>` has more than one vector.
This happens because running `sinkCandidatesImpl()` on the Nth group of Candidates can lead to `sinkCandidatesImpl()` replacing instructions that also happen to be part of the (N+1)th group of Candidates
patching this PR so that `herusticGetPhiGEPCandidate()` only returns one group of Candidates at a time, fetching the next after sinking the current group, seems to mostly fix things. Though for inexplicable reasons, I've still found a rare case where `PN->use_empty() == true` in `sinkCandidatesImpl()`, but that seems to be simply worked around by just adding a `if (PN->use_empty()) continue;` in the applicable loop in `sinkCandidatesImpl()`
Also, when building w/ asserts on, seems that `assert(I->user_empty() && "Inst unexpectedly still has non-dbg users");` in `sinkCandidatesImpl()` will fail, but since this seems to only make sense when `sinkCandidatesImpl()` is called from `sinkLastInstruction()`, I just disabled that assert when called from `herusticPhiSinker()`
W/ patches applied, all llvm tests except for `llvm-project/llvm/test/Transforms/SimplifyCFG/X86/sink-common-code.ll` pass when run with asserts enabled
The one failing test fails the same assert previously mentioned : `assert(I->user_empty() && "Inst unexpectedly still has non-dbg users");` but in that case `sinkCandidatesImpl()` wasn't called from `herusticPhiSinker()` when the assert failed. Not sure why that assert is failing in that case, but seems to be only happen w/ that test enabling the off-by-default `sink-common-insts` simplifycfg option
This is the current patch I've been applying, which currently has extra changes that weren't mentioned above, but I think the parts of the patch I haven't mentioned above are probably unnecessary, but not entirely sure yet:
```
--- llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp 2025-06-10 09:33:32.124525613 -0400
+++ SimplifyCFG.cpp 2025-06-10 09:33:05.422285255 -0400
@@ -2288,13 +2288,20 @@
static void sinkCandidatesImpl(BasicBlock *BBEnd,
ArrayRef<BasicBlock *> IncomingBBs,
- ArrayRef<Instruction *> Insts) {
+ ArrayRef<Instruction *> Insts, bool allowOtherInstsToHaveNonDbgUsers = false) {
assert(IncomingBBs.size() == Insts.size() &&
"We already guarentee they are the same size!");
// We don't need to do any more checking here; canSinkInstructions should
// have done it all for us.
SmallVector<Value*, 4> NewOperands;
Instruction *I0 = Insts.front();
+ enum LocalPhiWarningKinds : uint8_t {
+ WarningNone = 0b000,
+ ParentlessPhi = 0b001,
+ UselessPhi = 0b010,
+ ParentlessInst = 0b100
+ };
+ uint8_t warnings{LocalPhiWarningKinds::WarningNone};
for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) {
// This check is different to that in canSinkInstructions. There, we
// cared about the global view once simplifycfg (and instcombine) have
@@ -2316,14 +2323,19 @@
auto *PN =
PHINode::Create(Op->getType(), Insts.size(), Op->getName() + ".sink");
PN->insertBefore(BBEnd->begin());
- for (decltype(Insts.size()) i = 0, E = Insts.size(); i < E; i++)
+ for (decltype(Insts.size()) i = 0, E = Insts.size(); i < E; i++) {
+ if (PN->getType() != Insts[i]->getOperand(O)->getType()) {
+ LLVM_DEBUG(dbgs() << "PN: " << PN << " -- type: " << PN->getType());
+ LLVM_DEBUG(dbgs() << "Insts[" << i << "]->getOperand("<<O<<"): " << Insts[i]->getOperand(O) << " -- type: " << Insts[i]->getOperand(O)->getType());
+ }
PN->addIncoming(Insts[i]->getOperand(O), IncomingBBs[i]);
+ }
NewOperands.push_back(PN);
}
// Arbitrarily use I0 as the new "common" instruction; remap its operands
// and move it to the start of the successor block.
- for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O)
+ for (unsigned O = 0, E = std::min(I0->getNumOperands(), (unsigned)NewOperands.size()); O != E; ++O)
I0->getOperandUse(O).set(NewOperands[O]);
I0->moveBefore(*BBEnd, BBEnd->getFirstInsertionPt());
@@ -2354,21 +2366,43 @@
// canSinkLastInstruction checked that all instructions are only used by
// phi nodes in a way that allows replacing the phi node with the common
// instruction.
- auto *PN = cast<PHINode>(U);
+ auto *PN = dyn_cast<PHINode>(U);
+ if (!PN) {
+ continue;
+ }
+
+ uint8_t warningLocal = PN->use_empty() ? LocalPhiWarningKinds::UselessPhi : LocalPhiWarningKinds::WarningNone;
+ static_assert(static_cast<LocalPhiWarningKinds>(true) == LocalPhiWarningKinds::ParentlessPhi);
+ warningLocal |= PN->getParent() == nullptr;
+ warnings |= warningLocal;
+
+ if (warningLocal) {
+ continue;
+ }
PN->replaceAllUsesWith(I0);
PN->eraseFromParent();
}
// Finally nuke all instructions apart from the common instruction.
for (auto *I : Insts) {
- if (I == I0)
+ bool isParentless = I->getParent() == nullptr;
+ warnings |= (isParentless ? LocalPhiWarningKinds::ParentlessInst : LocalPhiWarningKinds::WarningNone);
+ if (I == I0 || isParentless)
continue;
// The remaining uses are debug users, replace those with the common inst.
// In most (all?) cases this just introduces a use-before-def.
- assert(I->user_empty() && "Inst unexpectedly still has non-dbg users");
+ assert( (allowOtherInstsToHaveNonDbgUsers || I->user_empty()) && "Inst unexpectedly still has non-dbg users");
I->replaceAllUsesWith(I0);
I->eraseFromParent();
}
+
+ if (warnings != LocalPhiWarningKinds::WarningNone) {
+ fprintf(stderr, "sinkCandidatesImpl **Non-Fatal** Warning(s): %s%s%s\n",
+ (warnings & LocalPhiWarningKinds::ParentlessPhi) ? "PN->getParent() == nullptr\n" : "",
+ (warnings & LocalPhiWarningKinds::UselessPhi) ? "PN->use_empty() == true\n" : "",
+ (warnings & LocalPhiWarningKinds::ParentlessInst) ? "I->getParent() == nullptr\n" : ""
+ );
+ }
}
// Assuming canSinkInstructions(Blocks) has returned true, sink the last
@@ -2652,9 +2686,6 @@
return Changed;
}
-using SinkCandidatesType = std::vector<std::vector<Instruction *>>;
-using HerusticGetCandidatesCallback = std::function<bool(
- SinkCandidatesType &, bool &, BasicBlock *, DomTreeUpdater *)>;
// There might be instruction like:
// arr = alloca something
@@ -2665,7 +2696,7 @@
// we insert instruction like isomorphicGEP = gep arr 0, ...(following a set of
// zero). And we only change the uses of the alloca in the specific PHINode.
// Return true if there's any modification on the IR.
-static bool transformAllocaToIsomorphicGEP(PHINode &PN) {
+static std::optional<std::pair<AllocaInst*, GetElementPtrInst*>> getTransformableAlloca(PHINode &PN) {
// We need at least one GEP to construct a gep with zero offset for
// over-optimized alloca, if any.
GetElementPtrInst *oneGEP = nullptr;
@@ -2675,7 +2706,7 @@
// If there is no GEP, we can't sink the instruction.
if (oneGEP == nullptr)
- return false;
+ return std::nullopt;
// There might be serveral identical alloca incoming from different blocks.
// We only need a same alloca.
@@ -2687,19 +2718,19 @@
if (auto AI = dyn_cast<AllocaInst>(IncomingValue)) {
// We only do this on static alloca. We fail the whole process.
if (!AI->isStaticAlloca())
- return false;
+ return std::nullopt;
if (CandidateAlloca != nullptr) {
if (CandidateAlloca == AI)
continue;
- return false;
+ return std::nullopt;
}
CandidateAlloca = AI;
}
}
if (CandidateAlloca == nullptr)
- return false;
+ return std::nullopt;
for (auto &IncomingValue : PN.incoming_values()) {
if (IncomingValue == CandidateAlloca)
@@ -2707,9 +2738,9 @@
if (auto GEPAsIncoming = dyn_cast<GetElementPtrInst>(IncomingValue)) {
if (GEPAsIncoming->getPointerOperand() != CandidateAlloca ||
GEPAsIncoming->getNumOperands() != NumOperands)
- return false;
+ return std::nullopt;
} else
- return false;
+ return std::nullopt;
}
// Change the alloca's use in this PHINode with corresonding gep.
@@ -2719,18 +2750,17 @@
auto IntTy = IsomorphicGEP->getOperand(i)->getType();
IsomorphicGEP->setOperand(i, ConstantInt::get(IntTy, 0));
}
- PN.replaceUsesOfWith(CandidateAlloca, IsomorphicGEP);
- IsomorphicGEP->insertBefore(EntryBB.getTerminator()->getIterator());
+ //PN.replaceUsesOfWith(CandidateAlloca, IsomorphicGEP);
+ //IsomorphicGEP->insertBefore(EntryBB.getTerminator()->getIterator());
- return true;
+ return std::optional<std::pair<AllocaInst*, GetElementPtrInst*>>{std::in_place_t{}, CandidateAlloca, IsomorphicGEP};
}
// Change phi (gep (same_ptr, ...), gep(same_ptr, ...), ...) to gep same_ptr
// phi(...)
static bool
-herusticGetPhiGEPCandidate(SinkCandidatesType &ResultsSinkCandidates,
- bool &Changed, BasicBlock *BB, DomTreeUpdater *DTU) {
-
+herusticGetPhiGEPCandidate(std::vector<Instruction *> &ResultsSinkCandidate,
+ bool &Changed, BasicBlock *BB, DomTreeUpdater *DTU, TinyPtrVector<Instruction*> BaseCandidatesToSkip = {}) {
// Collect all the predecessors.
auto Preds = std::vector<BasicBlock *>();
for (auto *PredBB : predecessors(BB))
@@ -2742,81 +2772,128 @@
return false;
DenseMap<const Use *, SmallVector<Value *, 4>> PHIOperands;
+ DenseMap<std::pair<PHINode*, AllocaInst*>, GetElementPtrInst*> transformableAllocas;
for (PHINode &PN : BB->phis()) {
// If the same_ptr is an alloca, we try to transform it to gep with zero
// offset.
- Changed |= transformAllocaToIsomorphicGEP(PN);
+ auto allocaGepPair = getTransformableAlloca(PN);
+ if (allocaGepPair.has_value()) {
+ transformableAllocas.try_emplace(std::pair<PHINode*, AllocaInst*>{&PN, allocaGepPair->first}, allocaGepPair->second);
+ }
SmallDenseMap<BasicBlock *, const Use *, 4> IncomingVals;
for (const Use &U : PN.incoming_values())
IncomingVals.insert({PN.getIncomingBlock(U), &U});
auto &Ops = PHIOperands[IncomingVals[Preds[0]]];
- for (BasicBlock *Pred : Preds)
- Ops.push_back(*IncomingVals[Pred]);
+ for (BasicBlock *Pred : Preds) {
+ if (auto&& iter = IncomingVals.find(Pred); iter != IncomingVals.end())
+ Ops.push_back( *(iter->getSecond()) );
+ }
}
- SinkCandidatesType SinkCandidates;
+ std::vector<std::vector<Instruction*>> SinkCandidates;
for (auto &[U, IncomingValues] : PHIOperands) {
auto CorrespondingPhiNode = dyn_cast<PHINode>(U->getUser());
assert(CorrespondingPhiNode &&
"PHIOperands should only contain PHINode's operands.");
std::vector<Instruction *> Candidates;
+ std::vector<std::pair<GetElementPtrInst*, AllocaInst*>> replacementList;
+
+ auto inspectCandidate = [&transformableAllocas,CorrespondingPhiNode](Value* IncomingValue) -> std::pair<GetElementPtrInst*,AllocaInst*> {
+ if (auto GEP = dyn_cast<GetElementPtrInst>(IncomingValue)) {
+ return {GEP, nullptr};
+ }
+
+ if (auto alloca = dyn_cast<AllocaInst>(IncomingValue)) {
+ if (auto&& iter = transformableAllocas.find(std::pair<PHINode*, AllocaInst*>{CorrespondingPhiNode, alloca}); iter != transformableAllocas.end()) {
+ return {iter->getSecond(), alloca};
+ }
+ }
- for (auto IncomingValue : IncomingValues) {
- if (auto GEP = dyn_cast<GetElementPtrInst>(IncomingValue)) {
- Candidates.push_back(GEP);
- } else {
- Candidates.clear();
- break;
+ return {nullptr, nullptr};
+ };
+
+ auto sameBase = [&Candidates](GetElementPtrInst* candidate) -> bool {
+ if (Candidates.empty()) {
+ return true;
}
- }
+ auto BaseCandidate = cast<GetElementPtrInst>(Candidates[0]);
+ auto BasePtr = BaseCandidate->getOperand(0);
+ return candidate->getOperand(0) == BasePtr && candidate->getResultElementType() == BaseCandidate->getResultElementType();
+ };
- if (Candidates.empty())
- continue;
-
- auto BasePtr = Candidates[0]->getOperand(0);
- for (auto Candidate : Candidates) {
- if (Candidate->getOperand(0) != BasePtr) {
- Candidates.clear();
+ for (auto IncomingValue : IncomingValues) {
+ auto val = inspectCandidate(IncomingValue);
+ if (val.first == nullptr || !sameBase(val.first)) {
+ Candidates.clear();
break;
- }
+ }
+
+ Candidates.push_back(val.first);
+ if (val.second != nullptr) {
+ replacementList.push_back(val);
+ }
}
- if (Candidates.empty())
+ if (Candidates.empty() ||
+ std::find(BaseCandidatesToSkip.begin(), BaseCandidatesToSkip.end(), Candidates[0]) != BaseCandidatesToSkip.end()) {
continue;
+ }
+ for (auto [gep, alloca] : replacementList) {
+ Changed = true;
+ if (gep->getParent() == nullptr) {
+ auto &EntryBB = CorrespondingPhiNode->getParent()->getParent()->getEntryBlock();
+ gep->insertBefore(EntryBB.getTerminator()->getIterator());
+ }
+ CorrespondingPhiNode->replaceUsesOfWith(alloca, gep);
+ }
SinkCandidates.emplace_back(std::move(Candidates));
}
- for (auto &Candidates : SinkCandidates)
- if (canSinkInstructions(Candidates, PHIOperands))
- ResultsSinkCandidates.emplace_back(std::move(Candidates));
-
- if (ResultsSinkCandidates.size() != 0)
+ for (auto &Candidates : SinkCandidates) {
+ if (canSinkInstructions(Candidates, PHIOperands)) {
+ ResultsSinkCandidate = std::move(Candidates);
+ }
+ }
+ if (ResultsSinkCandidate.size() != 0)
return true;
return false;
}
-static bool herusticPhiSinker(BasicBlock *BB, DomTreeUpdater *DTU,
- HerusticGetCandidatesCallback GetCandidates) {
- SinkCandidatesType SinkCandidates;
+static bool herusticPhiSinker(BasicBlock *BB, DomTreeUpdater *DTU) {
+ LLVM_DEBUG(dbgs() << "function IR before running herusticPhiSinker():\n");
+
+ LLVM_DEBUG(BB->getParent()->print(dbgs()));
+ LLVM_DEBUG(dbgs() << "\n");
+
+ std::vector<Instruction*> SinkCandidates;
bool Changed = false;
- if (!GetCandidates(SinkCandidates, Changed, BB, DTU) ||
- SinkCandidates.size() == 0)
+ if (!herusticGetPhiGEPCandidate(SinkCandidates, Changed, BB, DTU))
return Changed;
std::vector<BasicBlock *> Preds;
for (auto *PredBB : predecessors(BB))
Preds.push_back(PredBB);
-
- for (auto Candidates : SinkCandidates) {
- LLVM_DEBUG(dbgs() << "SINK: Sink: " << *Candidates[0] << "\n");
- sinkCandidatesImpl(BB, Preds, Candidates);
+
+ TinyPtrVector<Instruction*> BaseCandidatesToSkip;
+ do {
+ LLVM_DEBUG(dbgs() << "SINK: Sink: " << *SinkCandidates[0] << "\n");
+ if (SinkCandidates.size() != 1) {
+ LLVM_DEBUG(dbgs() << "SINK: Sink: other insts of" << *SinkCandidates[0] << ":\n");
+ for (size_t i = 1; i < SinkCandidates.size(); i++) {
+ LLVM_DEBUG(dbgs() << " " << i << ") " << *SinkCandidates[i] << "\n");
+ }
+ LLVM_DEBUG(dbgs() << "\n");
+ }
+ sinkCandidatesImpl(BB, Preds, SinkCandidates, true);
NumSinkCommonInstrs++;
- Changed = true;
- }
- if (SinkCandidates.size() != 0)
- ++NumSinkCommonCode;
+
+ BaseCandidatesToSkip.push_back(SinkCandidates[0]);
+ SinkCandidates.clear();
+ } while ( herusticGetPhiGEPCandidate(SinkCandidates, Changed, BB, DTU, BaseCandidatesToSkip) ) ;
+
+ ++NumSinkCommonCode;
return Changed;
}
@@ -8518,7 +8595,7 @@
if (SinkCommon && Options.SinkCommonInsts)
if (sinkCommonCodeFromPredecessors(BB, DTU) ||
- herusticPhiSinker(BB, DTU, herusticGetPhiGEPCandidate) ||
+ herusticPhiSinker(BB, DTU) ||
mergeCompatibleInvokes(BB, DTU)) {
// sinkCommonCodeFromPredecessors() does not automatically CSE PHI's,
// so we may now how duplicate PHI's.
```
https://github.com/llvm/llvm-project/pull/128171
More information about the llvm-commits
mailing list