[llvm] [SimplifyCFG] Fix hoisting problem in SimplifyCFG (PR #78615)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 6 11:37:13 PST 2024
https://github.com/RouzbehPaktinat updated https://github.com/llvm/llvm-project/pull/78615
>From 895ee265d1b40721350f12eee38a97c536992614 Mon Sep 17 00:00:00 2001
From: RouzbehPaktinat <rouzbeh.paktinat1 at huawei.com>
Date: Mon, 29 Jan 2024 10:24:09 -0500
Subject: [PATCH 1/3] [SimplifyCFG] Add precommit for hoisting problem
---
.../SimplifyCFG/hoist-common-code.ll | 52 +++++++++++++++++++
1 file changed, 52 insertions(+)
diff --git a/llvm/test/Transforms/SimplifyCFG/hoist-common-code.ll b/llvm/test/Transforms/SimplifyCFG/hoist-common-code.ll
index bfe31d8345d506..285062455e4f5f 100644
--- a/llvm/test/Transforms/SimplifyCFG/hoist-common-code.ll
+++ b/llvm/test/Transforms/SimplifyCFG/hoist-common-code.ll
@@ -24,6 +24,58 @@ F: ; preds = %0
ret void
}
+define void @test_unordered(ptr noalias %b, ptr noalias %c, ptr noalias %Q, ptr noalias %R, i32 %i ) {
+; CHECK-LABEL: @test_unordered(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[ldR1:%.*]] = load i32, ptr [[R:%.*]], align 8
+; CHECK-NEXT: switch i32 %i, label %bb0 [
+; CHECK-NEXT: i32 2, label %bb1
+; CHECK-NEXT: i32 3, label %bb2
+; CHECK-NEXT: ]
+; CHECK: common.ret:
+; CHECK-NEXT: ret void
+; CHECK: bb0:
+; CHECK-NEXT: [[ldQ:%.*]] = load i32, ptr [[Q:%.*]], align 8
+; CHECK-NEXT: [[mul:%.*]] = mul i32 [[ldQ:%.*]], 2
+; CHECK-NEXT: [[add:%.*]] = add i32 [[ldR1:%.*]], [[mul:%.*]]
+; CHECK-NEXT: store i32 [[add:%.*]], ptr [[c:%.*]], align 8
+; CHECK-NEXT: br label [[COMMON_RET:%.*]]
+; CHECK: bb1:
+; CHECK-NEXT: store i32 [[ldR1:%.*]], ptr [[c:%.*]], align 4
+; CHECK-NEXT: br label [[COMMON_RET:%.*]]
+; CHECK: bb2:
+; CHECK-NEXT: [[ldQ2:%.*]] = load i32, ptr [[Q:%.*]], align 8
+; CHECK-NEXT: [[sub:%.*]] = sub i32 [[ldR1:%.*]], [[ldQ2:%.*]]
+; CHECK-NEXT: store i32 [[sub:%.*]], ptr [[c:%.*]], align 8
+; CHECK-NEXT: br label [[COMMON_RET:%.*]]
+
+entry:
+ switch i32 %i, label %bb0 [
+ i32 2, label %bb1
+ i32 3, label %bb2
+ ]
+
+bb0: ; preds = %entry
+ %ldQ1 = load i32, ptr %Q, align 8
+ %mul = mul i32 %ldQ1, 2
+ %ldR1 = load i32, ptr %R, align 8
+ %add = add i32 %ldR1, %mul
+ store i32 %add, ptr %c, align 8
+ ret void
+
+bb1: ; preds = entry
+ %ldR2 = load i32, ptr %R, align 8
+ store i32 %ldR2, ptr %c
+ ret void
+
+bb2: ; preds = entry
+ %ldQ2 = load i32, ptr %Q, align 8
+ %ldR3 = load i32, ptr %R, align 8
+ %sub = sub i32 %ldR3, %ldQ2
+ store i32 %sub, ptr %c, align 8
+ ret void
+}
+
define void @test_switch(i64 %i, ptr %Q) {
; CHECK-LABEL: @test_switch(
; CHECK-NEXT: common.ret:
>From 6e36f6345226e4bd74ea3fd80ac205b57e020236 Mon Sep 17 00:00:00 2001
From: RouzbehPaktinat <rouzbeh.paktinat1 at huawei.com>
Date: Thu, 18 Jan 2024 12:56:35 -0500
Subject: [PATCH 2/3] [SimplifyCFG] Fix hoisting problem in SimplifyCFG
---
llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 192 +++++++++++++++-------
1 file changed, 129 insertions(+), 63 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 7424fe31945dc7..61aa2f61221235 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -1582,6 +1582,17 @@ hoistLockstepIdenticalDPValues(Instruction *TI, Instruction *I1,
}
}
+// Hash instructions based on following factors:
+// 1- Instruction Opcode
+// 2- Instruction type
+// 3- Instruction operands
+llvm::hash_code getHash(Instruction *Instr) {
+ std::vector<Value *> operands(Instr->op_begin(), Instr->op_end());
+ return llvm::hash_combine(
+ Instr->getOpcode(), Instr->getType(),
+ hash_combine_range(operands.begin(), operands.end()));
+}
+
/// Hoist any common code in the successor blocks up into the block. This
/// function guarantees that BB dominates all successors. If EqTermsOnly is
/// given, only perform hoisting in case both blocks only contain a terminator.
@@ -1589,12 +1600,11 @@ hoistLockstepIdenticalDPValues(Instruction *TI, Instruction *I1,
/// added.
bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(BasicBlock *BB,
bool EqTermsOnly) {
- // This does very trivial matching, with limited scanning, to find identical
- // instructions in the two blocks. In particular, we don't want to get into
- // O(N1*N2*...) situations here where Ni are the sizes of these successors. As
- // such, we currently just scan for obviously identical instructions in an
- // identical order, possibly separated by the same number of non-identical
- // instructions.
+ // We first sort successors based on the number of instructions each block
+ // holds. Then for each successor we make a hashmap from its instructions,
+ // except for the first successor. After that, we iterate over the
+ // instructions of the first successor. If we find identical instructions from
+ // every other successor, we hoist all of them into the predeccessor.
unsigned int SuccSize = succ_size(BB);
if (SuccSize < 2)
return false;
@@ -1608,10 +1618,21 @@ bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(BasicBlock *BB,
auto *TI = BB->getTerminator();
+ SmallVector<BasicBlock *> SuccessorBlocks;
+ for (auto *Succ : successors(BB))
+ SuccessorBlocks.push_back(Succ);
+
+ // Sort successor blocks based on the number of instructions.
+ // This is because we always want to iterate over instructions
+ // of the smallest block.
+ llvm::stable_sort(SuccessorBlocks, [](BasicBlock *BB1, BasicBlock *BB2) {
+ return BB1->sizeWithoutDebug() < BB2->sizeWithoutDebug();
+ });
+
// The second of pair is a SkipFlags bitmask.
using SuccIterPair = std::pair<BasicBlock::iterator, unsigned>;
SmallVector<SuccIterPair, 8> SuccIterPairs;
- for (auto *Succ : successors(BB)) {
+ for (auto *Succ : SuccessorBlocks) {
BasicBlock::iterator SuccItr = Succ->begin();
if (isa<PHINode>(*SuccItr))
return false;
@@ -1645,77 +1666,124 @@ bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(BasicBlock *BB,
}
bool Changed = false;
+ auto *SuccIterPairBegin = SuccIterPairs.begin();
+ SuccIterPairBegin++;
+ auto OtherSuccIterPairRange =
+ iterator_range(SuccIterPairBegin, SuccIterPairs.end());
+ auto OtherSuccIterRange = make_first_range(OtherSuccIterPairRange);
+ using InstrFlagPair = std::pair<Instruction *, unsigned>;
+ SmallVector<DenseMap<llvm::hash_code, InstrFlagPair>, 2> OtherSuccessorsHash;
+
+ for (auto BBItrPair : OtherSuccIterRange) {
+ // Fill the hashmap for every other successor
+ DenseMap<llvm::hash_code, InstrFlagPair> hashMap;
+ unsigned skipFlag = 0;
+ Instruction *I = nullptr;
+ do {
+ I = &*BBItrPair;
+ skipFlag |= skippedInstrFlags(I);
+ hashMap[getHash(I)] = InstrFlagPair(I, skipFlag);
+ BBItrPair++;
+ } while (!I->isTerminator());
+ OtherSuccessorsHash.push_back(hashMap);
+ }
+ // Keep track of instructions skipped in the first successor
+ unsigned SkipFlagsBB1 = 0;
+ bool SameLevelHoist = true;
for (;;) {
auto *SuccIterPairBegin = SuccIterPairs.begin();
auto &BB1ItrPair = *SuccIterPairBegin++;
auto OtherSuccIterPairRange =
iterator_range(SuccIterPairBegin, SuccIterPairs.end());
- auto OtherSuccIterRange = make_first_range(OtherSuccIterPairRange);
-
Instruction *I1 = &*BB1ItrPair.first;
-
- // Skip debug info if it is not identical.
- bool AllDbgInstsAreIdentical = all_of(OtherSuccIterRange, [I1](auto &Iter) {
- Instruction *I2 = &*Iter;
- return I1->isIdenticalToWhenDefined(I2);
- });
- if (!AllDbgInstsAreIdentical) {
- while (isa<DbgInfoIntrinsic>(I1))
- I1 = &*++BB1ItrPair.first;
- for (auto &SuccIter : OtherSuccIterRange) {
- Instruction *I2 = &*SuccIter;
- while (isa<DbgInfoIntrinsic>(I2))
- I2 = &*++SuccIter;
+ bool HasIdenticalInst = true;
+
+ // Check if there are identical instructions in all other successors
+ for (auto &map : OtherSuccessorsHash) {
+ Instruction *I2 = map[getHash(I1)].first;
+ // We might face with same hash values for different instructions.
+ // If that happens, ignore the instruction.
+ if (!I2 || !I1->isIdenticalTo(I2)) {
+ HasIdenticalInst = false;
+ break;
}
}
- bool AllInstsAreIdentical = true;
- bool HasTerminator = I1->isTerminator();
- for (auto &SuccIter : OtherSuccIterRange) {
- Instruction *I2 = &*SuccIter;
- HasTerminator |= I2->isTerminator();
- if (AllInstsAreIdentical && !I1->isIdenticalToWhenDefined(I2))
- AllInstsAreIdentical = false;
+ if (!HasIdenticalInst) {
+ if (NumSkipped >= HoistCommonSkipLimit)
+ return Changed;
+ SkipFlagsBB1 |= skippedInstrFlags(I1);
+ if (SameLevelHoist) {
+ for (auto &SuccIterPair : OtherSuccIterPairRange) {
+ Instruction *I = &*SuccIterPair.first++;
+ SuccIterPair.second |= skippedInstrFlags(I);
+ }
+ }
+ NumSkipped++;
+ if (I1->isTerminator())
+ return Changed;
+ ++BB1ItrPair.first;
+ continue;
}
SmallVector<Instruction *, 8> OtherInsts;
- for (auto &SuccIter : OtherSuccIterRange)
- OtherInsts.push_back(&*SuccIter);
+ if (SameLevelHoist) {
+ for (auto &SuccIterPair : OtherSuccIterPairRange)
+ OtherInsts.push_back(&*(SuccIterPair.first));
+ } else {
+ for (auto &map : OtherSuccessorsHash)
+ OtherInstrs.push_back(map[getHash(I1)].first);
+ }
// If we are hoisting the terminator instruction, don't move one (making a
// broken BB), instead clone it, and remove BI.
- if (HasTerminator) {
+ if (I1->isTerminator()) {
// Even if BB, which contains only one unreachable instruction, is ignored
// at the beginning of the loop, we can hoist the terminator instruction.
// If any instructions remain in the block, we cannot hoist terminators.
- if (NumSkipped || !AllInstsAreIdentical) {
+ if (NumSkipped) {
hoistLockstepIdenticalDPValues(TI, I1, OtherInsts);
return Changed;
}
-
- return hoistSuccIdenticalTerminatorToSwitchOrIf(TI, I1, OtherInsts) ||
- Changed;
+ SmallVector<Instruction *, 8> Insts;
+ for (auto &map : OtherSuccessorsHash) {
+ Instruction *I2 = map[getHash(I1)].first;
+ // BB holding I2 should only contain the branch instruction
+ auto itr = I2->getParent()->instructionsWithoutDebug();
+ if (&*itr.begin() != I2)
+ return Changed;
+ Insts.push_back(I2);
+ }
+ return hoistSuccIdenticalTerminatorToSwitchOrIf(TI, I1, Insts) || Changed;
}
- if (AllInstsAreIdentical) {
- unsigned SkipFlagsBB1 = BB1ItrPair.second;
- AllInstsAreIdentical =
- isSafeToHoistInstr(I1, SkipFlagsBB1) &&
- all_of(OtherSuccIterPairRange, [=](const auto &Pair) {
- Instruction *I2 = &*Pair.first;
- unsigned SkipFlagsBB2 = Pair.second;
- // Even if the instructions are identical, it may not
- // be safe to hoist them if we have skipped over
- // instructions with side effects or their operands
- // weren't hoisted.
- return isSafeToHoistInstr(I2, SkipFlagsBB2) &&
- shouldHoistCommonInstructions(I1, I2, TTI);
- });
+ bool SafeToHoist = isSafeToHoistInstr(I1, SkipFlagsBB1);
+ unsigned index = 0;
+ for (auto &SuccIterPair : OtherSuccIterPairRange) {
+ Instruction *I2 = OtherSuccessorsHash[index][getHash(I1)].first;
+ // If instructions of all successors are at the same level, use the
+ // skipFlag of its BB, i.e., SameLevelHoist. Otherwise, use the skipFlag
+ // that was calculated initially for this instruction in the hashmap
+ if (SameLevelHoist && I2 == (&*(SuccIterPair.first))) {
+ SafeToHoist = SafeToHoist &&
+ isSafeToHoistInstr(I2, SuccIterPair.second) &&
+ shouldHoistCommonInstructions(I1, I2, TTI);
+ } else {
+ unsigned skipFlag = OtherSuccessorsHash[index][getHash(I1)].second;
+ SafeToHoist = SafeToHoist && isSafeToHoistInstr(I2, skipFlag) &&
+ shouldHoistCommonInstructions(I1, I2, TTI);
+ SameLevelHoist = false;
+ }
+ index++;
}
- if (AllInstsAreIdentical) {
+ if (SafeToHoist) {
BB1ItrPair.first++;
+ if (SameLevelHoist) {
+ for (auto &SuccIterPair : OtherSuccIterPairRange)
+ SuccIterPair.first++;
+ }
if (isa<DbgInfoIntrinsic>(I1)) {
// The debug location is an integral part of a debug info intrinsic
// and can't be separated from it or replaced. Instead of attempting
@@ -1725,8 +1793,8 @@ bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(BasicBlock *BB,
// leave any that were not hoisted behind (by calling moveBefore
// rather than moveBeforePreserving).
I1->moveBefore(TI);
- for (auto &SuccIter : OtherSuccIterRange) {
- auto *I2 = &*SuccIter++;
+ for (auto &map : OtherSuccessorsHash) {
+ Instruction *I2 = map[getHash(I1)].first;
assert(isa<DbgInfoIntrinsic>(I2));
I2->moveBefore(TI);
}
@@ -1739,8 +1807,8 @@ bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(BasicBlock *BB,
// leave any that were not hoisted behind (by calling moveBefore
// rather than moveBeforePreserving).
I1->moveBefore(TI);
- for (auto &SuccIter : OtherSuccIterRange) {
- Instruction *I2 = &*SuccIter++;
+ for (auto &map : OtherSuccessorsHash) {
+ Instruction *I2 = map[getHash(I1)].first;
assert(I2 != I1);
if (!I2->use_empty())
I2->replaceAllUsesWith(I1);
@@ -1764,9 +1832,12 @@ bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(BasicBlock *BB,
// We are about to skip over a pair of non-identical instructions. Record
// if any have characteristics that would prevent reordering instructions
// across them.
- for (auto &SuccIterPair : SuccIterPairs) {
- Instruction *I = &*SuccIterPair.first++;
- SuccIterPair.second |= skippedInstrFlags(I);
+ SkipFlagsBB1 |= skippedInstrFlags(I1);
+ if (SameLevelHoist) {
+ for (auto &SuccIterPair : OtherSuccIterPairRange) { // update flags
+ Instruction *I = &*SuccIterPair.first;
+ SuccIterPair.second |= skippedInstrFlags(I);
+ }
}
++NumSkipped;
}
@@ -1810,7 +1881,6 @@ bool SimplifyCFGOpt::hoistSuccIdenticalTerminatorToSwitchOrIf(
Value *BB2V = PN.getIncomingValueForBlock(OtherSuccTI->getParent());
if (BB1V == BB2V)
continue;
-
// In the case of an if statement, check for
// passingValueIsAlwaysUndefined here because we would rather eliminate
// undefined control flow then converting it to a select.
@@ -1882,20 +1952,16 @@ bool SimplifyCFGOpt::hoistSuccIdenticalTerminatorToSwitchOrIf(
}
}
}
-
SmallVector<DominatorTree::UpdateType, 4> Updates;
-
// Update any PHI nodes in our new successors.
for (BasicBlock *Succ : successors(BB1)) {
AddPredecessorToBlock(Succ, TIParent, BB1);
if (DTU)
Updates.push_back({DominatorTree::Insert, TIParent, Succ});
}
-
if (DTU)
for (BasicBlock *Succ : successors(TI))
Updates.push_back({DominatorTree::Delete, TIParent, Succ});
-
EraseTerminatorAndDCECond(TI);
if (DTU)
DTU->applyUpdates(Updates);
>From 210ac6f19f5a48445c8cc1a989e08ab48204065e Mon Sep 17 00:00:00 2001
From: RouzbehPaktinat <rouzbeh.paktinat1 at huawei.com>
Date: Tue, 6 Feb 2024 14:35:07 -0500
Subject: [PATCH 3/3] Fix bugs
---
llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 99 +++++++++++++------
llvm/test/CodeGen/ARM/aes-erratum-fix.ll | 24 ++---
llvm/test/CodeGen/Thumb2/mve-memtp-branch.ll | 4 +-
.../PhaseOrdering/simplifyCFG-hoist.ll | 87 ++++++++++++++++
.../SimplifyCFG/dont-hoist-deoptimize.ll | 12 ++-
5 files changed, 176 insertions(+), 50 deletions(-)
create mode 100644 llvm/test/Transforms/PhaseOrdering/simplifyCFG-hoist.ll
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 61aa2f61221235..67c17e91740b3a 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -1618,27 +1618,39 @@ bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(BasicBlock *BB,
auto *TI = BB->getTerminator();
- SmallVector<BasicBlock *> SuccessorBlocks;
- for (auto *Succ : successors(BB))
- SuccessorBlocks.push_back(Succ);
+ SmallVector<BasicBlock *, 8> SuccessorBBs;
+ for (auto *Succ : successors(BB)) {
+ BasicBlock::iterator SuccItr = Succ->begin();
+ // If we find an unreachable instruction at the beginning of a basic block,
+ // we can still hoist instructions from the rest of the basic blocks.
+ if (isa<UnreachableInst>(*SuccItr))
+ continue;
+ SuccessorBBs.push_back(Succ);
+ }
- // Sort successor blocks based on the number of instructions.
- // This is because we always want to iterate over instructions
- // of the smallest block.
- llvm::stable_sort(SuccessorBlocks, [](BasicBlock *BB1, BasicBlock *BB2) {
- return BB1->sizeWithoutDebug() < BB2->sizeWithoutDebug();
- });
+ // Find the smallest BB because we always want to iterate over instructions
+ // of the smallest Successor.
+ auto *SmallestBB = *std::min_element(SuccessorBBs.begin(), SuccessorBBs.end(),
+ [](BasicBlock *BB1, BasicBlock *BB2) {
+ return BB1->size() < BB2->size();
+ });
+ std::iter_swap(
+ SuccessorBBs.begin(),
+ std::find(SuccessorBBs.begin(), SuccessorBBs.end(), SmallestBB));
// The second of pair is a SkipFlags bitmask.
using SuccIterPair = std::pair<BasicBlock::iterator, unsigned>;
SmallVector<SuccIterPair, 8> SuccIterPairs;
- for (auto *Succ : SuccessorBlocks) {
+ for (auto *Succ : SuccessorBBs) {
BasicBlock::iterator SuccItr = Succ->begin();
if (isa<PHINode>(*SuccItr))
return false;
SuccIterPairs.push_back(SuccIterPair(SuccItr, 0));
}
+ if (SuccIterPairs.size() < 2)
+ return false;
+
// Check if only hoisting terminators is allowed. This does not add new
// instructions to the hoist location.
if (EqTermsOnly) {
@@ -1656,14 +1668,6 @@ bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(BasicBlock *BB,
// many instructions we skip, serving as a compilation time control as well as
// preventing excessive increase of life ranges.
unsigned NumSkipped = 0;
- // If we find an unreachable instruction at the beginning of a basic block, we
- // can still hoist instructions from the rest of the basic blocks.
- if (SuccIterPairs.size() > 2) {
- erase_if(SuccIterPairs,
- [](const auto &Pair) { return isa<UnreachableInst>(Pair.first); });
- if (SuccIterPairs.size() < 2)
- return false;
- }
bool Changed = false;
auto *SuccIterPairBegin = SuccIterPairs.begin();
@@ -1697,6 +1701,17 @@ bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(BasicBlock *BB,
auto OtherSuccIterPairRange =
iterator_range(SuccIterPairBegin, SuccIterPairs.end());
Instruction *I1 = &*BB1ItrPair.first;
+
+ // Skip debug info if it is not identical.
+ bool IdenticalDebugs = all_of(OtherSuccIterRange, [I1](auto &Iter) {
+ Instruction *I2 = &*Iter;
+ return I1->isIdenticalToWhenDefined(I2);
+ });
+ if (!IdenticalDebugs) {
+ while (isa<DbgInfoIntrinsic>(I1))
+ I1 = &*++BB1ItrPair.first;
+ }
+
bool HasIdenticalInst = true;
// Check if there are identical instructions in all other successors
@@ -1704,7 +1719,7 @@ bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(BasicBlock *BB,
Instruction *I2 = map[getHash(I1)].first;
// We might face with same hash values for different instructions.
// If that happens, ignore the instruction.
- if (!I2 || !I1->isIdenticalTo(I2)) {
+ if (!I2 || !I1->isIdenticalToWhenDefined(I2)) {
HasIdenticalInst = false;
break;
}
@@ -1720,7 +1735,7 @@ bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(BasicBlock *BB,
SuccIterPair.second |= skippedInstrFlags(I);
}
}
- NumSkipped++;
+ ++NumSkipped;
if (I1->isTerminator())
return Changed;
++BB1ItrPair.first;
@@ -1733,7 +1748,7 @@ bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(BasicBlock *BB,
OtherInsts.push_back(&*(SuccIterPair.first));
} else {
for (auto &map : OtherSuccessorsHash)
- OtherInstrs.push_back(map[getHash(I1)].first);
+ OtherInsts.push_back(map[getHash(I1)].first);
}
// If we are hoisting the terminator instruction, don't move one (making a
@@ -1810,13 +1825,38 @@ bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(BasicBlock *BB,
for (auto &map : OtherSuccessorsHash) {
Instruction *I2 = map[getHash(I1)].first;
assert(I2 != I1);
- if (!I2->use_empty())
+ // Update hashcode of all instructions using I2
+ if (!I2->use_empty()) {
+ SmallVector<llvm::hash_code, 8> PrevHashCodes;
+ SmallVector<llvm::Instruction *, 8> PrevUsers;
+ // Once the uses of I1 are replaced, the hash value computed for
+ // those users are not valid anymore so we gather users and then
+ // recompute the hash codes for them. We need to do this only for
+ // the instructions located in the same block as I2 because we
+ // initially only hashed those instructions.
+ for (auto *user : I2->users()) {
+ if (auto *I = dyn_cast<Instruction>(user)) {
+ if (I->getParent() != I2->getParent())
+ continue;
+ PrevHashCodes.push_back(getHash(I));
+ PrevUsers.push_back(I);
+ }
+ }
I2->replaceAllUsesWith(I1);
+ unsigned index = 0;
+ for (auto &PrevHash : PrevHashCodes) {
+ auto NewHash = getHash(PrevUsers[index]);
+ map.insert({NewHash, map[PrevHash]});
+ map.erase(PrevHash);
+ index++;
+ }
+ }
I1->andIRFlags(I2);
combineMetadataForCSE(I1, I2, true);
// I1 and I2 are being combined into a single instruction. Its debug
// location is the merged locations of the original instructions.
I1->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc());
+ map.erase(getHash(I1));
I2->eraseFromParent();
}
}
@@ -1832,10 +1872,11 @@ bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(BasicBlock *BB,
// We are about to skip over a pair of non-identical instructions. Record
// if any have characteristics that would prevent reordering instructions
// across them.
+ BB1ItrPair.first++;
SkipFlagsBB1 |= skippedInstrFlags(I1);
if (SameLevelHoist) {
for (auto &SuccIterPair : OtherSuccIterPairRange) { // update flags
- Instruction *I = &*SuccIterPair.first;
+ Instruction *I = &*SuccIterPair.first++;
SuccIterPair.second |= skippedInstrFlags(I);
}
}
@@ -1857,11 +1898,8 @@ bool SimplifyCFGOpt::hoistSuccIdenticalTerminatorToSwitchOrIf(
// Use only for an if statement.
auto *I2 = *OtherSuccTIs.begin();
auto *BB2 = I2->getParent();
- if (BI) {
+ if (BI)
assert(OtherSuccTIs.size() == 1);
- assert(BI->getSuccessor(0) == I1->getParent());
- assert(BI->getSuccessor(1) == I2->getParent());
- }
// In the case of an if statement, we try to hoist an invoke.
// FIXME: Can we define a safety predicate for CallBr?
@@ -1881,6 +1919,7 @@ bool SimplifyCFGOpt::hoistSuccIdenticalTerminatorToSwitchOrIf(
Value *BB2V = PN.getIncomingValueForBlock(OtherSuccTI->getParent());
if (BB1V == BB2V)
continue;
+
// In the case of an if statement, check for
// passingValueIsAlwaysUndefined here because we would rather eliminate
// undefined control flow then converting it to a select.
@@ -1952,16 +1991,20 @@ bool SimplifyCFGOpt::hoistSuccIdenticalTerminatorToSwitchOrIf(
}
}
}
+
SmallVector<DominatorTree::UpdateType, 4> Updates;
+
// Update any PHI nodes in our new successors.
for (BasicBlock *Succ : successors(BB1)) {
AddPredecessorToBlock(Succ, TIParent, BB1);
if (DTU)
Updates.push_back({DominatorTree::Insert, TIParent, Succ});
}
+
if (DTU)
for (BasicBlock *Succ : successors(TI))
Updates.push_back({DominatorTree::Delete, TIParent, Succ});
+
EraseTerminatorAndDCECond(TI);
if (DTU)
DTU->applyUpdates(Updates);
@@ -3713,7 +3756,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
// Change the PHI node into a select instruction.
Value *TrueVal = PN->getIncomingValueForBlock(IfTrue);
Value *FalseVal = PN->getIncomingValueForBlock(IfFalse);
-
+
Value *Sel = Builder.CreateSelect(IfCond, TrueVal, FalseVal, "", DomBI);
PN->replaceAllUsesWith(Sel);
Sel->takeName(PN);
diff --git a/llvm/test/CodeGen/ARM/aes-erratum-fix.ll b/llvm/test/CodeGen/ARM/aes-erratum-fix.ll
index 17d1ca65430aff..ba41ce30b98e5f 100644
--- a/llvm/test/CodeGen/ARM/aes-erratum-fix.ll
+++ b/llvm/test/CodeGen/ARM/aes-erratum-fix.ll
@@ -314,16 +314,15 @@ define arm_aapcs_vfpcc void @aese_set8_cond_via_ptr(i1 zeroext %0, ptr %1, <16 x
; CHECK-FIX-LABEL: aese_set8_cond_via_ptr:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q0, q0, q0
+; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: beq .LBB12_2
; CHECK-FIX-NEXT: @ %bb.1:
-; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: vld1.8 {d16[0]}, [r1]
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: bne .LBB12_3
; CHECK-FIX-NEXT: b .LBB12_4
; CHECK-FIX-NEXT: .LBB12_2:
-; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: beq .LBB12_4
; CHECK-FIX-NEXT: .LBB12_3:
@@ -3205,15 +3204,10 @@ define arm_aapcs_vfpcc void @aesd_set64_via_val(i64 %0, <16 x i8> %1, ptr %2) no
define arm_aapcs_vfpcc void @aesd_set64_cond_via_ptr(i1 zeroext %0, ptr %1, <16 x i8> %2, ptr %3) nounwind {
; CHECK-FIX-NOSCHED-LABEL: aesd_set64_cond_via_ptr:
; CHECK-FIX-NOSCHED: @ %bb.0:
-; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
-; CHECK-FIX-NOSCHED-NEXT: beq .LBB76_2
-; CHECK-FIX-NOSCHED-NEXT: @ %bb.1:
-; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r2]
-; CHECK-FIX-NOSCHED-NEXT: vldr d16, [r1]
-; CHECK-FIX-NOSCHED-NEXT: b .LBB76_3
-; CHECK-FIX-NOSCHED-NEXT: .LBB76_2:
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r2]
-; CHECK-FIX-NOSCHED-NEXT: .LBB76_3:
+; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
+; CHECK-FIX-NOSCHED-NEXT: vldrne d16, [r1]
+; CHECK-FIX-NOSCHED-NEXT: vorr q8, q8, q8
; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
; CHECK-FIX-NOSCHED-NEXT: vldrne d0, [r1]
; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
@@ -3221,7 +3215,7 @@ define arm_aapcs_vfpcc void @aesd_set64_cond_via_ptr(i1 zeroext %0, ptr %1, <16
; CHECK-FIX-NOSCHED-NEXT: aesimc.8 q8, q8
; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r2]
; CHECK-FIX-NOSCHED-NEXT: bx lr
-;
+
; CHECK-CORTEX-FIX-LABEL: aesd_set64_cond_via_ptr:
; CHECK-CORTEX-FIX: @ %bb.0:
; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
@@ -4096,19 +4090,15 @@ define arm_aapcs_vfpcc void @aesd_setf32_cond_via_ptr(i1 zeroext %0, ptr %1, <16
; CHECK-FIX-LABEL: aesd_setf32_cond_via_ptr:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q0, q0, q0
+; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: beq .LBB88_2
; CHECK-FIX-NEXT: @ %bb.1:
-; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: vld1.32 {d16[0]}, [r1:32]
-; CHECK-FIX-NEXT: cmp r0, #0
-; CHECK-FIX-NEXT: bne .LBB88_3
-; CHECK-FIX-NEXT: b .LBB88_4
; CHECK-FIX-NEXT: .LBB88_2:
-; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: beq .LBB88_4
-; CHECK-FIX-NEXT: .LBB88_3:
+; CHECK-FIX-NEXT: @ %bb.3:
; CHECK-FIX-NEXT: vld1.32 {d0[0]}, [r1:32]
; CHECK-FIX-NEXT: .LBB88_4:
; CHECK-FIX-NEXT: aesd.8 q8, q0
diff --git a/llvm/test/CodeGen/Thumb2/mve-memtp-branch.ll b/llvm/test/CodeGen/Thumb2/mve-memtp-branch.ll
index b5c9b903e18416..cafae03698257e 100644
--- a/llvm/test/CodeGen/Thumb2/mve-memtp-branch.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-memtp-branch.ll
@@ -11,8 +11,8 @@ define i32 @a(i8 zeroext %b, ptr nocapture readonly %c, ptr nocapture readonly %
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r7, lr}
; CHECK-NEXT: push {r4, r5, r7, lr}
-; CHECK-NEXT: cmp r0, #2
-; CHECK-NEXT: bls.w .LBB0_12
+; CHECK-NEXT: cmp r0, #3
+; CHECK-NEXT: blo.w .LBB0_12
; CHECK-NEXT: @ %bb.1: @ %for.body.us.preheader
; CHECK-NEXT: movw r5, :lower16:arr_183
; CHECK-NEXT: movs r3, #0
diff --git a/llvm/test/Transforms/PhaseOrdering/simplifyCFG-hoist.ll b/llvm/test/Transforms/PhaseOrdering/simplifyCFG-hoist.ll
new file mode 100644
index 00000000000000..fc8ed95416a4e5
--- /dev/null
+++ b/llvm/test/Transforms/PhaseOrdering/simplifyCFG-hoist.ll
@@ -0,0 +1,87 @@
+; opt -passes='default<O3>' -S --mtriple=aarch64-linux-gnu --mcpu=a64fx < %s | FileCheck %s
+
+; Hoist identical instructions from successor blocks even if
+; they are not located at the same level. This could help generate
+; more compact vectorized code.
+; More info can be found at https://github.com/llvm/llvm-project/issues/68395.
+
+
+define void @hoist_then_vectorize(ptr %a, ptr %b, ptr %c, ptr %d, i32 %N){
+; CHECK-LABEL: @hoist_then_vectorize(
+; CHECK-NEXT: iter.check:
+; CHECK-NEXT: [[VSCALE:%.*]] = tail call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[SHIFT:%.*]] = shl i64 [[VSCALE:%.*]], 1
+; CHECK-NEXT: [[MIN_ITR:%.*]] = icmp ugt i64 [[SHIFT:%.*]], 20
+; CHECK-NEXT: br i1 [[MIN_ITR:%.*]], label [[FOR_BODY_PREHEADER:%.*]], label [[VECTOR_MAIN_LOOP_ITR_CHECK:%.*]]
+; CHECK: vector.main.loop.iter.check:
+; CHECK-NEXT: [[VSCALE2:%.*]] = tail call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[SHIFT2:%.*]] = shl i64 [[VSCALE2:%.*]], 2
+; CHECK-NEXT: [[MIN_ITR2:%.*]] = icmp ugt i64 [[SHIFT2:%.*]], 20
+; CHECK-NEXT: br i1 [[MIN_ITR2:%.*]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK: vector.ph:
+; CHECK-NEXT: [[VSCALE3:%.*]] = tail call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[SHIFT3:%.*]] = shl i64 [[VSCALE3:%.*]], 2
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 20, [[SHIFT3:%.*]]
+; CHECK-NEXT: [[N_VEC:%.*]] = sub nuw nsw i64 20, [[N_MOD_VF:%.*]]
+; CHECK-NEXT: [[VSCALE4:%.*]] = tail call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[SHIFT4:%.*]] = shl i64 [[VSCALE4:%.*]], 2
+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY:%.*]] ]
+; CHECK-NEXT: [[GEP_D:%.*]] = getelementptr inbounds i32, ptr [[D:%.*]], i64 [[INDEX:%.*]]
+; CHECK-NEXT: [[LOAD_D:%.*]] = load <vscale x 4 x i32>, ptr [[GEP_D:%.*]], align 4
+; CHECK-NEXT: [[MASK1:%.*]] = icmp slt <vscale x 4 x i32> [[LOAD_D:%.*]], zeroinitializer
+; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX:%.*]]
+; CHECK-NEXT: [[LOAD_A:%.*]] = load <vscale x 4 x i32>, ptr [[GEP_A:%.*]], align 4
+; CHECK-NEXT: [[MASK2:%.*]] = icmp eq <vscale x 4 x i32> [[LOAD_A:%.*]], zeroinitializer
+; CHECK-NEXT: [[SEL1:%.*]] = select <vscale x 4 x i1> [[MASK2:%.*]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 2, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT: [[SEL2:%.*]] = select <vscale x 4 x i1> [[MASK1:%.*]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> [[SEL1:%.*]]
+; CHECK-NEXT: [[ADD:%.*]] = add <vscale x 4 x i32> [[LOAD_A:%.*]], [[SEL2:%.*]]
+; CHECK-NEXT: store <vscale x 4 x i32> [[ADD:%.*]], ptr [[GEP_A:%.*]], align 4
+; CHECK-NEXT: [[INDEX_NEXT:%.*]] = add nuw i64 [[INDEX:%.*]], [[SHIFT4:%.*]]
+; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp eq i64 [[INDEX_NEXT:%.*]], [[N_VEC:%.*]]
+; CHECK-NEXT: br i1 [[LOOP_COND:%.*]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY:%.*]]
+
+entry:
+ br label %for.body
+
+for.cond.cleanup: ; preds = %for.inc
+ ret void
+
+for.body: ; preds = %entry, %for.inc
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ]
+ %arrayidx = getelementptr inbounds i32, ptr %d, i64 %indvars.iv
+ %ldr_d = load i32, ptr %arrayidx, align 4
+ %cmp1 = icmp slt i32 %ldr_d, 0
+ br i1 %cmp1, label %if.then, label %if.else
+
+if.then: ; preds = %for.body
+ %arrayidx3 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+ %ldr_a = load i32, ptr %arrayidx3, align 4
+ %add33 = add i32 %ldr_a, 1
+ store i32 %add33, ptr %arrayidx3, align 4
+ br label %for.inc
+
+if.else: ; preds = %for.body
+ %cmp7 = icmp eq i32 %ldr_d, 0
+ br i1 %cmp7, label %if.then9, label %if.else15
+
+if.then9: ; preds = %if.else
+ %arrayidx11 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+ %ldr_a2 = load i32, ptr %arrayidx11, align 4
+ %add1334 = add i32 %ldr_a2, 2
+ store i32 %add1334, ptr %arrayidx11, align 4
+ br label %for.inc
+
+if.else15: ; preds = %if.else
+ %arrayidx112 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+ %ldr_a3 = load i32, ptr %arrayidx112, align 4
+ %add1935 = add i32 %ldr_a3, 3
+ store i32 %add1935, ptr %arrayidx112, align 4
+ br label %for.inc
+
+for.inc: ; preds = %if.then, %if.else15, %if.then9
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond.not = icmp eq i64 %indvars.iv.next, 20
+ br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
\ No newline at end of file
diff --git a/llvm/test/Transforms/SimplifyCFG/dont-hoist-deoptimize.ll b/llvm/test/Transforms/SimplifyCFG/dont-hoist-deoptimize.ll
index 6bfa967ff23e14..28a3cf50e347a1 100644
--- a/llvm/test/Transforms/SimplifyCFG/dont-hoist-deoptimize.ll
+++ b/llvm/test/Transforms/SimplifyCFG/dont-hoist-deoptimize.ll
@@ -3,21 +3,25 @@
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128-ni:1-p2:32:8:8:32-ni:2"
target triple = "x86_64-unknown-linux-gnu"
+; SimplifyCFG hoists %tmp and %tmp2 but after skipping %tmp3, we reach to the skipping threshold and
+; bail out, not hoisting %tmp4.
+
declare void @llvm.experimental.deoptimize.isVoid(...) #0
define void @widget(i1 %arg) {
; CHECK-LABEL: @widget(
; CHECK-NEXT: bb:
; CHECK-NEXT: [[TMP:%.*]] = trunc i64 5 to i32
+; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 0 to i32
; CHECK-NEXT: br i1 [[ARG:%.*]], label [[BB1:%.*]], label [[BB4:%.*]]
; CHECK: bb1:
-; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 0 to i32
; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 0 to i32
+; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 2 to i32
; CHECK-NEXT: call void (...) @llvm.experimental.deoptimize.isVoid(i32 13) #[[ATTR0:[0-9]+]] [ "deopt"() ]
; CHECK-NEXT: ret void
; CHECK: bb4:
-; CHECK-NEXT: [[TMP6:%.*]] = trunc i64 1 to i32
-; CHECK-NEXT: [[TMP7:%.*]] = trunc i64 0 to i32
+; CHECK-NEXT: [[TMP7:%.*]] = trunc i64 1 to i32
+; CHECK-NEXT: [[TMP8:%.*]] = trunc i64 2 to i32
; CHECK-NEXT: call void (...) @llvm.experimental.deoptimize.isVoid(i32 13) #[[ATTR0]] [ "deopt"() ]
; CHECK-NEXT: ret void
;
@@ -28,6 +32,7 @@ bb1: ; preds = %bb
%tmp = trunc i64 5 to i32
%tmp2 = trunc i64 0 to i32
%tmp3 = trunc i64 0 to i32
+ %tmp4 = trunc i64 2 to i32
call void (...) @llvm.experimental.deoptimize.isVoid(i32 13) #0 [ "deopt"() ]
ret void
@@ -35,6 +40,7 @@ bb4: ; preds = %bb
%tmp5 = trunc i64 5 to i32
%tmp6 = trunc i64 1 to i32
%tmp7 = trunc i64 0 to i32
+ %tmp8 = trunc i64 2 to i32
call void (...) @llvm.experimental.deoptimize.isVoid(i32 13) #0 [ "deopt"() ]
ret void
}
More information about the llvm-commits
mailing list