[llvm] a1b78fb - [JT][CT] Preserve exisiting BPI/BFI during JumpThreading
Evgeniy Brevnov via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 16 01:08:42 PST 2023
Author: Evgeniy Brevnov
Date: 2023-02-16T16:08:34+07:00
New Revision: a1b78fb929fccf96acaa0212cf68fee82298e747
URL: https://github.com/llvm/llvm-project/commit/a1b78fb929fccf96acaa0212cf68fee82298e747
DIFF: https://github.com/llvm/llvm-project/commit/a1b78fb929fccf96acaa0212cf68fee82298e747.diff
LOG: [JT][CT] Preserve exisiting BPI/BFI during JumpThreading
Currently, JT creates and updates local instances of BPI\BFI. As a result global ones have to be invalidated if JT made any changes.
In fact, JT doesn't use any information from BPI/BFI for the sake of the transformation itself. It only creates BPI/BFI to keep them up to date. But since it updates local copies (besides cases when it updates profile metadata) it just waste of time.
Current patch is a rework of D124439. D124439 makes one step and replaces local copies with global ones retrieved through AnalysisPassManager. Here we do one more step and don't create BPI/BFI if the only reason of creation is to keep BPI/BFI up to date. Overall logic is the following. If there is cached BPI/BFI then update it along the transformations. If there is no existing BPI/BFI, then create it only if it is required to update profile metadata.
Please note if BPI/BFI exists on exit from JT (either cached or created) it is always up to date and no reason to invalidate it.
Reviewed By: mkazantsev
Differential Revision: https://reviews.llvm.org/D136827
Added:
Modified:
llvm/include/llvm/Transforms/Scalar/JumpThreading.h
llvm/lib/Transforms/Scalar/JumpThreading.cpp
llvm/test/Transforms/JumpThreading/select.ll
llvm/test/Transforms/JumpThreading/thread-prob-2.ll
llvm/test/Transforms/JumpThreading/thread-prob-3.ll
llvm/test/Transforms/JumpThreading/thread-prob-4.ll
llvm/test/Transforms/JumpThreading/thread-prob-5.ll
llvm/test/Transforms/JumpThreading/thread-prob-6.ll
llvm/test/Transforms/JumpThreading/threading_prof1.ll
llvm/test/Transforms/JumpThreading/threading_prof2.ll
llvm/test/Transforms/JumpThreading/threading_prof3.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/Transforms/Scalar/JumpThreading.h b/llvm/include/llvm/Transforms/Scalar/JumpThreading.h
index 09d08bf423a6a..e1ff43a6093cf 100644
--- a/llvm/include/llvm/Transforms/Scalar/JumpThreading.h
+++ b/llvm/include/llvm/Transforms/Scalar/JumpThreading.h
@@ -20,7 +20,9 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/IR/ValueHandle.h"
+#include <optional>
#include <utility>
namespace llvm {
@@ -31,7 +33,6 @@ class BinaryOperator;
class BranchInst;
class CmpInst;
class Constant;
-class DomTreeUpdater;
class Function;
class Instruction;
class IntrinsicInst;
@@ -75,14 +76,16 @@ enum ConstantPreference { WantInteger, WantBlockAddress };
/// In this case, the unconditional branch at the end of the first if can be
/// revectored to the false side of the second if.
class JumpThreadingPass : public PassInfoMixin<JumpThreadingPass> {
- TargetLibraryInfo *TLI;
- TargetTransformInfo *TTI;
- LazyValueInfo *LVI;
- AAResults *AA;
- DomTreeUpdater *DTU;
- std::unique_ptr<BlockFrequencyInfo> BFI;
- std::unique_ptr<BranchProbabilityInfo> BPI;
- bool HasProfileData = false;
+ Function *F = nullptr;
+ FunctionAnalysisManager *FAM = nullptr;
+ TargetLibraryInfo *TLI = nullptr;
+ TargetTransformInfo *TTI = nullptr;
+ LazyValueInfo *LVI = nullptr;
+ AAResults *AA = nullptr;
+ std::unique_ptr<DomTreeUpdater> DTU;
+ std::optional<BlockFrequencyInfo *> BFI = std::nullopt;
+ std::optional<BranchProbabilityInfo *> BPI = std::nullopt;
+ bool ChangedSinceLastAnalysisUpdate = false;
bool HasGuards = false;
#ifndef LLVM_ENABLE_ABI_BREAKING_CHECKS
SmallPtrSet<const BasicBlock *, 16> LoopHeaders;
@@ -97,18 +100,16 @@ class JumpThreadingPass : public PassInfoMixin<JumpThreadingPass> {
JumpThreadingPass(int T = -1);
// Glue for old PM.
- bool runImpl(Function &F, TargetLibraryInfo *TLI, TargetTransformInfo *TTI,
- LazyValueInfo *LVI, AAResults *AA, DomTreeUpdater *DTU,
- bool HasProfileData, std::unique_ptr<BlockFrequencyInfo> BFI,
- std::unique_ptr<BranchProbabilityInfo> BPI);
+ bool runImpl(Function &F, FunctionAnalysisManager *FAM,
+ TargetLibraryInfo *TLI, TargetTransformInfo *TTI,
+ LazyValueInfo *LVI, AAResults *AA,
+ std::unique_ptr<DomTreeUpdater> DTU,
+ std::optional<BlockFrequencyInfo *> BFI,
+ std::optional<BranchProbabilityInfo *> BPI);
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
- void releaseMemory() {
- BFI.reset();
- BPI.reset();
- }
-
+ DomTreeUpdater *getDomTreeUpdater() const { return DTU.get(); }
void findLoopHeaders(Function &F);
bool processBlock(BasicBlock *BB);
bool maybeMergeBasicBlockIntoOnlyPred(BasicBlock *BB);
@@ -168,9 +169,41 @@ class JumpThreadingPass : public PassInfoMixin<JumpThreadingPass> {
BasicBlock *splitBlockPreds(BasicBlock *BB, ArrayRef<BasicBlock *> Preds,
const char *Suffix);
void updateBlockFreqAndEdgeWeight(BasicBlock *PredBB, BasicBlock *BB,
- BasicBlock *NewBB, BasicBlock *SuccBB);
+ BasicBlock *NewBB, BasicBlock *SuccBB,
+ BlockFrequencyInfo *BFI,
+ BranchProbabilityInfo *BPI,
+ bool HasProfile);
/// Check if the block has profile metadata for its outgoing edges.
bool doesBlockHaveProfileData(BasicBlock *BB);
+
+ /// Returns analysis preserved by the pass.
+ PreservedAnalyses getPreservedAnalysis() const;
+
+ /// Helper function to run "external" analysis in the middle of JumpThreading.
+ /// It takes care of updating/invalidating other existing analysis
+ /// before/after running the "external" one.
+ template <typename AnalysisT>
+ typename AnalysisT::Result *runExternalAnalysis();
+
+ /// Returns an existing instance of BPI if any, otherwise nullptr. By
+ /// "existing" we mean either cached result provided by FunctionAnalysisManger
+ /// or created by preceding call to 'getOrCreateBPI'.
+ BranchProbabilityInfo *getBPI();
+
+ /// Returns an existing instance of BFI if any, otherwise nullptr. By
+ /// "existing" we mean either cached result provided by FunctionAnalysisManger
+ /// or created by preceding call to 'getOrCreateBFI'.
+ BlockFrequencyInfo *getBFI();
+
+ /// Returns an existing instance of BPI if any, otherwise:
+ /// if 'HasProfile' is true creates new instance through
+ /// FunctionAnalysisManager, otherwise nullptr.
+ BranchProbabilityInfo *getOrCreateBPI(bool Force = false);
+
+ /// Returns an existing instance of BFI if any, otherwise:
+ /// if 'HasProfile' is true creates new instance through
+ /// FunctionAnalysisManager, otherwise nullptr.
+ BlockFrequencyInfo *getOrCreateBFI(bool Force = false);
};
} // end namespace llvm
diff --git a/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/llvm/lib/Transforms/Scalar/JumpThreading.cpp
index bd5d9ef7f097e..ae9868ecdf7a5 100644
--- a/llvm/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/llvm/lib/Transforms/Scalar/JumpThreading.cpp
@@ -23,7 +23,6 @@
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/ConstantFolding.h"
-#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/GuardUtils.h"
#include "llvm/Analysis/InstructionSimplify.h"
@@ -31,6 +30,7 @@
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/Analysis/PostDominators.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -134,12 +134,10 @@ namespace {
/// In this case, the unconditional branch at the end of the first if can be
/// revectored to the false side of the second if.
class JumpThreading : public FunctionPass {
- JumpThreadingPass Impl;
-
public:
static char ID; // Pass identification
- JumpThreading(int T = -1) : FunctionPass(ID), Impl(T) {
+ JumpThreading(int T = -1) : FunctionPass(ID) {
initializeJumpThreadingPass(*PassRegistry::getPassRegistry());
}
@@ -155,8 +153,6 @@ namespace {
AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
}
-
- void releaseMemory() override { Impl.releaseMemory(); }
};
} // end anonymous namespace
@@ -319,7 +315,6 @@ bool JumpThreading::runOnFunction(Function &F) {
auto DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
auto LVI = &getAnalysis<LazyValueInfoWrapperPass>().getLVI();
auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
- DomTreeUpdater DTU(*DT, DomTreeUpdater::UpdateStrategy::Lazy);
std::unique_ptr<BlockFrequencyInfo> BFI;
std::unique_ptr<BranchProbabilityInfo> BPI;
if (F.hasProfileData()) {
@@ -328,11 +323,14 @@ bool JumpThreading::runOnFunction(Function &F) {
BFI.reset(new BlockFrequencyInfo(F, *BPI, LI));
}
- bool Changed = Impl.runImpl(F, TLI, TTI, LVI, AA, &DTU, F.hasProfileData(),
- std::move(BFI), std::move(BPI));
+ JumpThreadingPass Impl;
+ bool Changed = Impl.runImpl(F, nullptr, TLI, TTI, LVI, AA,
+ std::make_unique<DomTreeUpdater>(
+ DT, DomTreeUpdater::UpdateStrategy::Lazy),
+ BFI.get(), BPI.get());
if (PrintLVIAfterJumpThreading) {
dbgs() << "LVI for function '" << F.getName() << "':\n";
- LVI->printLVI(F, DTU.getDomTree(), dbgs());
+ LVI->printLVI(F, Impl.getDomTreeUpdater()->getDomTree(), dbgs());
}
return Changed;
}
@@ -344,65 +342,74 @@ PreservedAnalyses JumpThreadingPass::run(Function &F,
if (TTI.hasBranchDivergence())
return PreservedAnalyses::all();
auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
- auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
auto &LVI = AM.getResult<LazyValueAnalysis>(F);
auto &AA = AM.getResult<AAManager>(F);
- DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
-
- std::unique_ptr<BlockFrequencyInfo> BFI;
- std::unique_ptr<BranchProbabilityInfo> BPI;
- if (F.hasProfileData()) {
- LoopInfo LI{DT};
- BPI.reset(new BranchProbabilityInfo(F, LI, &TLI));
- BFI.reset(new BlockFrequencyInfo(F, *BPI, LI));
- }
+ auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
- bool Changed = runImpl(F, &TLI, &TTI, &LVI, &AA, &DTU, F.hasProfileData(),
- std::move(BFI), std::move(BPI));
+ bool Changed =
+ runImpl(F, &AM, &TLI, &TTI, &LVI, &AA,
+ std::make_unique<DomTreeUpdater>(
+ &DT, nullptr, DomTreeUpdater::UpdateStrategy::Lazy),
+ std::nullopt, std::nullopt);
if (PrintLVIAfterJumpThreading) {
dbgs() << "LVI for function '" << F.getName() << "':\n";
- LVI.printLVI(F, DTU.getDomTree(), dbgs());
+ LVI.printLVI(F, getDomTreeUpdater()->getDomTree(), dbgs());
}
if (!Changed)
return PreservedAnalyses::all();
- PreservedAnalyses PA;
- PA.preserve<DominatorTreeAnalysis>();
- PA.preserve<LazyValueAnalysis>();
- return PA;
+
+
+ getDomTreeUpdater()->flush();
+
+#if defined(EXPENSIVE_CHECKS)
+ assert(getDomTreeUpdater()->getDomTree().verify(
+ DominatorTree::VerificationLevel::Full) &&
+ "DT broken after JumpThreading");
+ assert((!getDomTreeUpdater()->hasPostDomTree() ||
+ getDomTreeUpdater()->getPostDomTree().verify(
+ PostDominatorTree::VerificationLevel::Full)) &&
+ "PDT broken after JumpThreading");
+#else
+ assert(getDomTreeUpdater()->getDomTree().verify(
+ DominatorTree::VerificationLevel::Fast) &&
+ "DT broken after JumpThreading");
+ assert((!getDomTreeUpdater()->hasPostDomTree() ||
+ getDomTreeUpdater()->getPostDomTree().verify(
+ PostDominatorTree::VerificationLevel::Fast)) &&
+ "PDT broken after JumpThreading");
+#endif
+
+ return getPreservedAnalysis();
}
-bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
+bool JumpThreadingPass::runImpl(Function &F_, FunctionAnalysisManager *FAM_,
+ TargetLibraryInfo *TLI_,
TargetTransformInfo *TTI_, LazyValueInfo *LVI_,
- AliasAnalysis *AA_, DomTreeUpdater *DTU_,
- bool HasProfileData_,
- std::unique_ptr<BlockFrequencyInfo> BFI_,
- std::unique_ptr<BranchProbabilityInfo> BPI_) {
- LLVM_DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n");
+ AliasAnalysis *AA_,
+ std::unique_ptr<DomTreeUpdater> DTU_,
+ std::optional<BlockFrequencyInfo *> BFI_,
+ std::optional<BranchProbabilityInfo *> BPI_) {
+ LLVM_DEBUG(dbgs() << "Jump threading on function '" << F_.getName() << "'\n");
+ F = &F_;
+ FAM = FAM_;
TLI = TLI_;
TTI = TTI_;
LVI = LVI_;
AA = AA_;
- DTU = DTU_;
- BFI.reset();
- BPI.reset();
- // When profile data is available, we need to update edge weights after
- // successful jump threading, which requires both BPI and BFI being available.
- HasProfileData = HasProfileData_;
- auto *GuardDecl = F.getParent()->getFunction(
+ DTU = std::move(DTU_);
+ BFI = BFI_;
+ BPI = BPI_;
+ auto *GuardDecl = F->getParent()->getFunction(
Intrinsic::getName(Intrinsic::experimental_guard));
HasGuards = GuardDecl && !GuardDecl->use_empty();
- if (HasProfileData) {
- BPI = std::move(BPI_);
- BFI = std::move(BFI_);
- }
// Reduce the number of instructions duplicated when optimizing strictly for
// size.
if (BBDuplicateThreshold.getNumOccurrences())
BBDupThreshold = BBDuplicateThreshold;
- else if (F.hasFnAttribute(Attribute::MinSize))
+ else if (F->hasFnAttribute(Attribute::MinSize))
BBDupThreshold = 3;
else
BBDupThreshold = DefaultBBDupThreshold;
@@ -413,22 +420,22 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
assert(DTU && "DTU isn't passed into JumpThreading before using it.");
assert(DTU->hasDomTree() && "JumpThreading relies on DomTree to proceed.");
DominatorTree &DT = DTU->getDomTree();
- for (auto &BB : F)
+ for (auto &BB : *F)
if (!DT.isReachableFromEntry(&BB))
Unreachable.insert(&BB);
if (!ThreadAcrossLoopHeaders)
- findLoopHeaders(F);
+ findLoopHeaders(*F);
bool EverChanged = false;
bool Changed;
do {
Changed = false;
- for (auto &BB : F) {
+ for (auto &BB : *F) {
if (Unreachable.count(&BB))
continue;
while (processBlock(&BB)) // Thread all of the branches we can over BB.
- Changed = true;
+ Changed = ChangedSinceLastAnalysisUpdate = true;
// Jump threading may have introduced redundant debug values into BB
// which should be removed.
@@ -438,7 +445,7 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
// Stop processing BB if it's the entry or is now deleted. The following
// routines attempt to eliminate BB and locating a suitable replacement
// for the entry is non-trivial.
- if (&BB == &F.getEntryBlock() || DTU->isBBPendingDeletion(&BB))
+ if (&BB == &F->getEntryBlock() || DTU->isBBPendingDeletion(&BB))
continue;
if (pred_empty(&BB)) {
@@ -449,8 +456,8 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
<< '\n');
LoopHeaders.erase(&BB);
LVI->eraseBlock(&BB);
- DeleteDeadBlock(&BB, DTU);
- Changed = true;
+ DeleteDeadBlock(&BB, DTU.get());
+ Changed = ChangedSinceLastAnalysisUpdate = true;
continue;
}
@@ -465,12 +472,12 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
// Don't alter Loop headers and latches to ensure another pass can
// detect and transform nested loops later.
!LoopHeaders.count(&BB) && !LoopHeaders.count(Succ) &&
- TryToSimplifyUncondBranchFromEmptyBlock(&BB, DTU)) {
+ TryToSimplifyUncondBranchFromEmptyBlock(&BB, DTU.get())) {
RemoveRedundantDbgInstrs(Succ);
// BB is valid for cleanup here because we passed in DTU. F remains
// BB's parent until a DTU->getDomTree() event.
LVI->eraseBlock(&BB);
- Changed = true;
+ Changed = ChangedSinceLastAnalysisUpdate = true;
}
}
}
@@ -1141,8 +1148,8 @@ bool JumpThreadingPass::processBlock(BasicBlock *BB) {
<< "' folding terminator: " << *BB->getTerminator()
<< '\n');
++NumFolds;
- ConstantFoldTerminator(BB, true, nullptr, DTU);
- if (HasProfileData)
+ ConstantFoldTerminator(BB, true, nullptr, DTU.get());
+ if (auto *BPI = getBPI())
BPI->eraseBlock(BB);
return true;
}
@@ -1297,7 +1304,7 @@ bool JumpThreadingPass::processImpliedCondition(BasicBlock *BB) {
FICond->eraseFromParent();
DTU->applyUpdatesPermissive({{DominatorTree::Delete, BB, RemoveSucc}});
- if (HasProfileData)
+ if (auto *BPI = getBPI())
BPI->eraseBlock(BB);
return true;
}
@@ -1741,7 +1748,7 @@ bool JumpThreadingPass::processThreadableEdges(Value *Cond, BasicBlock *BB,
++NumFolds;
Term->eraseFromParent();
DTU->applyUpdatesPermissive(Updates);
- if (HasProfileData)
+ if (auto *BPI = getBPI())
BPI->eraseBlock(BB);
// If the condition is now dead due to the removal of the old terminator,
@@ -1994,7 +2001,7 @@ bool JumpThreadingPass::maybeMergeBasicBlockIntoOnlyPred(BasicBlock *BB) {
LoopHeaders.insert(BB);
LVI->eraseBlock(SinglePred);
- MergeBasicBlockIntoOnlyPred(BB, DTU);
+ MergeBasicBlockIntoOnlyPred(BB, DTU.get());
// Now that BB is merged into SinglePred (i.e. SinglePred code followed by
// BB code within one basic block `BB`), we need to invalidate the LVI
@@ -2313,6 +2320,11 @@ void JumpThreadingPass::threadThroughTwoBasicBlocks(BasicBlock *PredPredBB,
LLVM_DEBUG(dbgs() << " Threading through '" << PredBB->getName() << "' and '"
<< BB->getName() << "'\n");
+ // Build BPI/BFI before any changes are made to IR.
+ bool HasProfile = doesBlockHaveProfileData(BB);
+ auto *BFI = getOrCreateBFI(HasProfile);
+ auto *BPI = getOrCreateBPI(BFI != nullptr);
+
BranchInst *CondBr = cast<BranchInst>(BB->getTerminator());
BranchInst *PredBBBranch = cast<BranchInst>(PredBB->getTerminator());
@@ -2322,7 +2334,8 @@ void JumpThreadingPass::threadThroughTwoBasicBlocks(BasicBlock *PredPredBB,
NewBB->moveAfter(PredBB);
// Set the block frequency of NewBB.
- if (HasProfileData) {
+ if (BFI) {
+ assert(BPI && "It's expected BPI to exist along with BFI");
auto NewBBFreq = BFI->getBlockFreq(PredPredBB) *
BPI->getEdgeProbability(PredPredBB, PredBB);
BFI->setBlockFreq(NewBB, NewBBFreq.getFrequency());
@@ -2335,7 +2348,7 @@ void JumpThreadingPass::threadThroughTwoBasicBlocks(BasicBlock *PredPredBB,
cloneInstructions(PredBB->begin(), PredBB->end(), NewBB, PredPredBB);
// Copy the edge probabilities from PredBB to NewBB.
- if (HasProfileData)
+ if (BPI)
BPI->copyEdgeProbabilities(PredBB, NewBB);
// Update the terminator of PredPredBB to jump to NewBB instead of PredBB.
@@ -2419,6 +2432,11 @@ void JumpThreadingPass::threadEdge(BasicBlock *BB,
assert(!LoopHeaders.count(BB) && !LoopHeaders.count(SuccBB) &&
"Don't thread across loop headers");
+ // Build BPI/BFI before any changes are made to IR.
+ bool HasProfile = doesBlockHaveProfileData(BB);
+ auto *BFI = getOrCreateBFI(HasProfile);
+ auto *BPI = getOrCreateBPI(BFI != nullptr);
+
// And finally, do it! Start by factoring the predecessors if needed.
BasicBlock *PredBB;
if (PredBBs.size() == 1)
@@ -2442,7 +2460,8 @@ void JumpThreadingPass::threadEdge(BasicBlock *BB,
NewBB->moveAfter(PredBB);
// Set the block frequency of NewBB.
- if (HasProfileData) {
+ if (BFI) {
+ assert(BPI && "It's expected BPI to exist along with BFI");
auto NewBBFreq =
BFI->getBlockFreq(PredBB) * BPI->getEdgeProbability(PredBB, BB);
BFI->setBlockFreq(NewBB, NewBBFreq.getFrequency());
@@ -2484,7 +2503,7 @@ void JumpThreadingPass::threadEdge(BasicBlock *BB,
SimplifyInstructionsInBlock(NewBB, TLI);
// Update the edge weight from BB to SuccBB, which should be less than before.
- updateBlockFreqAndEdgeWeight(PredBB, BB, NewBB, SuccBB);
+ updateBlockFreqAndEdgeWeight(PredBB, BB, NewBB, SuccBB, BFI, BPI, HasProfile);
// Threaded an edge!
++NumThreads;
@@ -2501,10 +2520,13 @@ BasicBlock *JumpThreadingPass::splitBlockPreds(BasicBlock *BB,
// Collect the frequencies of all predecessors of BB, which will be used to
// update the edge weight of the result of splitting predecessors.
DenseMap<BasicBlock *, BlockFrequency> FreqMap;
- if (HasProfileData)
+ auto *BFI = getBFI();
+ if (BFI) {
+ auto *BPI = getOrCreateBPI(true);
for (auto *Pred : Preds)
FreqMap.insert(std::make_pair(
Pred, BFI->getBlockFreq(Pred) * BPI->getEdgeProbability(Pred, BB)));
+ }
// In the case when BB is a LandingPad block we create 2 new predecessors
// instead of just one.
@@ -2523,10 +2545,10 @@ BasicBlock *JumpThreadingPass::splitBlockPreds(BasicBlock *BB,
for (auto *Pred : predecessors(NewBB)) {
Updates.push_back({DominatorTree::Delete, Pred, BB});
Updates.push_back({DominatorTree::Insert, Pred, NewBB});
- if (HasProfileData) // Update frequencies between Pred -> NewBB.
+ if (BFI) // Update frequencies between Pred -> NewBB.
NewBBFreq += FreqMap.lookup(Pred);
}
- if (HasProfileData) // Apply the summed frequency to NewBB.
+ if (BFI) // Apply the summed frequency to NewBB.
BFI->setBlockFreq(NewBB, NewBBFreq.getFrequency());
}
@@ -2536,7 +2558,9 @@ BasicBlock *JumpThreadingPass::splitBlockPreds(BasicBlock *BB,
bool JumpThreadingPass::doesBlockHaveProfileData(BasicBlock *BB) {
const Instruction *TI = BB->getTerminator();
- assert(TI->getNumSuccessors() > 1 && "not a split");
+ if (!TI || TI->getNumSuccessors() < 2)
+ return false;
+
return hasValidBranchWeightMD(*TI);
}
@@ -2546,11 +2570,18 @@ bool JumpThreadingPass::doesBlockHaveProfileData(BasicBlock *BB) {
void JumpThreadingPass::updateBlockFreqAndEdgeWeight(BasicBlock *PredBB,
BasicBlock *BB,
BasicBlock *NewBB,
- BasicBlock *SuccBB) {
- if (!HasProfileData)
+ BasicBlock *SuccBB,
+ BlockFrequencyInfo *BFI,
+ BranchProbabilityInfo *BPI,
+ bool HasProfile) {
+ assert(((BFI && BPI) || (!BFI && !BFI)) &&
+ "Both BFI & BPI should either be set or unset");
+
+ if (!BFI) {
+ assert(!HasProfile &&
+ "It's expected to have BFI/BPI when profile info exists");
return;
-
- assert(BFI && BPI && "BFI & BPI should have been created here");
+ }
// As the edge from PredBB to BB is deleted, we have to update the block
// frequency of BB.
@@ -2623,7 +2654,7 @@ void JumpThreadingPass::updateBlockFreqAndEdgeWeight(BasicBlock *PredBB,
// FIXME this locally as well so that BPI and BFI are consistent as well. We
// shouldn't make edges extremely likely or unlikely based solely on static
// estimation.
- if (BBSuccProbs.size() >= 2 && doesBlockHaveProfileData(BB)) {
+ if (BBSuccProbs.size() >= 2 && HasProfile) {
SmallVector<uint32_t, 4> Weights;
for (auto Prob : BBSuccProbs)
Weights.push_back(Prob.getNumerator());
@@ -2755,7 +2786,7 @@ bool JumpThreadingPass::duplicateCondBranchOnPHIIntoPred(
// Remove the unconditional branch at the end of the PredBB block.
OldPredBranch->eraseFromParent();
- if (HasProfileData)
+ if (auto *BPI = getBPI())
BPI->copyEdgeProbabilities(BB, PredBB);
DTU->applyUpdatesPermissive(Updates);
@@ -2792,21 +2823,30 @@ void JumpThreadingPass::unfoldSelectInstr(BasicBlock *Pred, BasicBlock *BB,
BI->copyMetadata(*SI, {LLVMContext::MD_prof});
SIUse->setIncomingValue(Idx, SI->getFalseValue());
SIUse->addIncoming(SI->getTrueValue(), NewBB);
- // Set the block frequency of NewBB.
- if (HasProfileData) {
- uint64_t TrueWeight, FalseWeight;
- if (extractBranchWeights(*SI, TrueWeight, FalseWeight) &&
- (TrueWeight + FalseWeight) != 0) {
- SmallVector<BranchProbability, 2> BP;
- BP.emplace_back(BranchProbability::getBranchProbability(
- TrueWeight, TrueWeight + FalseWeight));
- BP.emplace_back(BranchProbability::getBranchProbability(
- FalseWeight, TrueWeight + FalseWeight));
+
+ uint64_t TrueWeight = 1;
+ uint64_t FalseWeight = 1;
+ // Copy probabilities from 'SI' to created conditional branch in 'Pred'.
+ if (extractBranchWeights(*SI, TrueWeight, FalseWeight) &&
+ (TrueWeight + FalseWeight) != 0) {
+ SmallVector<BranchProbability, 2> BP;
+ BP.emplace_back(BranchProbability::getBranchProbability(
+ TrueWeight, TrueWeight + FalseWeight));
+ BP.emplace_back(BranchProbability::getBranchProbability(
+ FalseWeight, TrueWeight + FalseWeight));
+ // Update BPI if exists.
+ if (auto *BPI = getBPI())
BPI->setEdgeProbability(Pred, BP);
+ }
+ // Set the block frequency of NewBB.
+ if (auto *BFI = getBFI()) {
+ if ((TrueWeight + FalseWeight) == 0) {
+ TrueWeight = 1;
+ FalseWeight = 1;
}
-
- auto NewBBFreq =
- BFI->getBlockFreq(Pred) * BPI->getEdgeProbability(Pred, NewBB);
+ BranchProbability PredToNewBBProb = BranchProbability::getBranchProbability(
+ TrueWeight, TrueWeight + FalseWeight);
+ auto NewBBFreq = BFI->getBlockFreq(Pred) * PredToNewBBProb;
BFI->setBlockFreq(NewBB, NewBBFreq.getFrequency());
}
@@ -3127,3 +3167,93 @@ bool JumpThreadingPass::threadGuard(BasicBlock *BB, IntrinsicInst *Guard,
}
return true;
}
+
+PreservedAnalyses JumpThreadingPass::getPreservedAnalysis() const {
+ PreservedAnalyses PA;
+ PA.preserve<LazyValueAnalysis>();
+ PA.preserve<DominatorTreeAnalysis>();
+
+ // TODO: We would like to preserve BPI/BFI. Enable once all paths update them.
+ // TODO: Would be nice to verify BPI/BFI consistency as well.
+ return PA;
+}
+
+template <typename AnalysisT>
+typename AnalysisT::Result *JumpThreadingPass::runExternalAnalysis() {
+ assert(FAM && "Can't run external analysis without FunctionAnalysisManager");
+
+ // If there were no changes since last call to 'runExternalAnalysis' then all
+ // analysis is either up to date or explicitly invalidated. Just go ahead and
+ // run the "external" analysis.
+ if (!ChangedSinceLastAnalysisUpdate) {
+ assert(!DTU->hasPendingUpdates() &&
+ "Lost update of 'ChangedSinceLastAnalysisUpdate'?");
+ // Run the "external" analysis.
+ return &FAM->getResult<AnalysisT>(*F);
+ }
+ ChangedSinceLastAnalysisUpdate = false;
+
+ auto PA = getPreservedAnalysis();
+ // TODO: This shouldn't be needed once 'getPreservedAnalysis' reports BPI/BFI
+ // as preserved.
+ PA.preserve<BranchProbabilityAnalysis>();
+ PA.preserve<BlockFrequencyAnalysis>();
+ // Report everything except explicitly preserved as invalid.
+ FAM->invalidate(*F, PA);
+ // Update DT/PDT.
+ DTU->flush();
+ // Make sure DT/PDT are valid before running "external" analysis.
+ assert(DTU->getDomTree().verify(DominatorTree::VerificationLevel::Fast));
+ assert((!DTU->hasPostDomTree() ||
+ DTU->getPostDomTree().verify(
+ PostDominatorTree::VerificationLevel::Fast)));
+ // Run the "external" analysis.
+ auto *Result = &FAM->getResult<AnalysisT>(*F);
+ // Update analysis JumpThreading depends on and not explicitly preserved.
+ TTI = &FAM->getResult<TargetIRAnalysis>(*F);
+ TLI = &FAM->getResult<TargetLibraryAnalysis>(*F);
+ AA = &FAM->getResult<AAManager>(*F);
+
+ return Result;
+}
+
+BranchProbabilityInfo *JumpThreadingPass::getBPI() {
+ if (!BPI) {
+ assert(FAM && "Can't create BPI without FunctionAnalysisManager");
+ BPI = FAM->getCachedResult<BranchProbabilityAnalysis>(*F);
+ }
+ return *BPI;
+}
+
+BlockFrequencyInfo *JumpThreadingPass::getBFI() {
+ if (!BFI) {
+ assert(FAM && "Can't create BFI without FunctionAnalysisManager");
+ BFI = FAM->getCachedResult<BlockFrequencyAnalysis>(*F);
+ }
+ return *BFI;
+}
+
+// Important note on validity of BPI/BFI. JumpThreading tries to preserve
+// BPI/BFI as it goes. Thus if cached instance exists it will be updated.
+// Otherwise, new instance of BPI/BFI is created (up to date by definition).
+BranchProbabilityInfo *JumpThreadingPass::getOrCreateBPI(bool Force) {
+ auto *Res = getBPI();
+ if (Res)
+ return Res;
+
+ if (Force)
+ BPI = runExternalAnalysis<BranchProbabilityAnalysis>();
+
+ return *BPI;
+}
+
+BlockFrequencyInfo *JumpThreadingPass::getOrCreateBFI(bool Force) {
+ auto *Res = getBFI();
+ if (Res)
+ return Res;
+
+ if (Force)
+ BFI = runExternalAnalysis<BlockFrequencyAnalysis>();
+
+ return *BFI;
+}
diff --git a/llvm/test/Transforms/JumpThreading/select.ll b/llvm/test/Transforms/JumpThreading/select.ll
index 67025b286953c..4ec55a66bb8ac 100644
--- a/llvm/test/Transforms/JumpThreading/select.ll
+++ b/llvm/test/Transforms/JumpThreading/select.ll
@@ -1,13 +1,14 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
-; RUN: opt -S -passes=jump-threading -debug-only=branch-prob < %s 2>&1 | FileCheck %s
+; RUN: opt -S -passes="jump-threading" -debug-only=branch-prob < %s 2>&1 | FileCheck %s
+; RUN: opt -S -passes="require<branch-prob>,jump-threading" -debug-only=branch-prob < %s 2>&1 | FileCheck -check-prefixes=CHECK,CHECK-BPI %s
; REQUIRES: asserts
-; CHECK-LABEL: ---- Branch Probability Info : unfold1 ----
-; CHECK: set edge cond.false -> 0 successor probability to 0x20000000 / 0x80000000 = 25.00%
-; CHECK: set edge cond.false -> 1 successor probability to 0x60000000 / 0x80000000 = 75.00%
-; CHECK-LABEL: ---- Branch Probability Info : unfold2 ----
-; CHECK: set edge cond.false -> 0 successor probability to 0x20000000 / 0x80000000 = 25.00%
-; CHECK: set edge cond.false -> 1 successor probability to 0x60000000 / 0x80000000 = 75.00%
+; CHECK-BPI-LABEL: ---- Branch Probability Info : unfold1 ----
+; CHECK-BPI: set edge cond.false -> 0 successor probability to 0x20000000 / 0x80000000 = 25.00%
+; CHECK-BPI: set edge cond.false -> 1 successor probability to 0x60000000 / 0x80000000 = 75.00%
+; CHECK-BPI-LABEL: ---- Branch Probability Info : unfold2 ----
+; CHECK-BPI: set edge cond.false -> 0 successor probability to 0x20000000 / 0x80000000 = 25.00%
+; CHECK-BPI: set edge cond.false -> 1 successor probability to 0x60000000 / 0x80000000 = 75.00%
declare void @foo()
declare void @bar()
diff --git a/llvm/test/Transforms/JumpThreading/thread-prob-2.ll b/llvm/test/Transforms/JumpThreading/thread-prob-2.ll
index d2b5b9e0e2a12..cbb8339597df0 100644
--- a/llvm/test/Transforms/JumpThreading/thread-prob-2.ll
+++ b/llvm/test/Transforms/JumpThreading/thread-prob-2.ll
@@ -1,10 +1,12 @@
-; RUN: opt -debug-only=branch-prob -passes=jump-threading -S %s 2>&1 | FileCheck %s
+; RUN: opt -debug-only=branch-prob -passes="require<branch-prob>,jump-threading" -S %s 2>&1 | FileCheck %s
+; RUN: opt -debug-only=branch-prob -passes=jump-threading -S %s 2>&1 | FileCheck %s --check-prefix=CHECK-NOBPI
; REQUIRES: asserts
; Make sure that we clear edge probabilities for bb.cond as we fold
; the conditional branch in it.
; CHECK: eraseBlock bb.cond
+; CHECK-NOBPI-NOT: eraseBlock bb.cond
define i32 @foo(i1 %cond) !prof !0 {
; CHECK-LABEL: @foo
diff --git a/llvm/test/Transforms/JumpThreading/thread-prob-3.ll b/llvm/test/Transforms/JumpThreading/thread-prob-3.ll
index 705bb475287c3..30afd8e6033b9 100644
--- a/llvm/test/Transforms/JumpThreading/thread-prob-3.ll
+++ b/llvm/test/Transforms/JumpThreading/thread-prob-3.ll
@@ -1,4 +1,5 @@
-; RUN: opt -debug-only=branch-prob -passes=jump-threading -S %s 2>&1 | FileCheck %s
+; RUN: opt -debug-only=branch-prob -passes="require<branch-prob>,jump-threading" -S %s 2>&1 | FileCheck %s
+; RUN: opt -debug-only=branch-prob -passes=jump-threading -S %s 2>&1 | FileCheck %s --check-prefix=CHECK-NOBPI
; REQUIRES: asserts
; Make sure that we set edge probabilities for bb2 as we
@@ -7,6 +8,7 @@
; CHECK-LABEL: ---- Branch Probability Info : foo
; CHECK: set edge bb2 -> 0 successor probability to 0x80000000 / 0x80000000 = 100.00%
; CHECK-NEXT: set edge bb2 -> 1 successor probability to 0x00000000 / 0x80000000 = 0.00%
+; CHECK-NOBPI-NOT: ---- Branch Probability Info : foo
define void @foo(i1 %f0, i1 %f1, i1 %f2) !prof !{!"function_entry_count", i64 0} {
; CHECK-LABEL: @foo(
bb1:
diff --git a/llvm/test/Transforms/JumpThreading/thread-prob-4.ll b/llvm/test/Transforms/JumpThreading/thread-prob-4.ll
index e80985c7f8b07..fcdb51924b3e1 100644
--- a/llvm/test/Transforms/JumpThreading/thread-prob-4.ll
+++ b/llvm/test/Transforms/JumpThreading/thread-prob-4.ll
@@ -1,10 +1,12 @@
-; RUN: opt -debug-only=branch-prob -passes=jump-threading -S %s 2>&1 | FileCheck %s
+; RUN: opt -debug-only=branch-prob -passes="require<branch-prob>,jump-threading" -S %s 2>&1 | FileCheck %s
+; RUN: opt -debug-only=branch-prob -passes=jump-threading -S %s 2>&1 | FileCheck %s --check-prefix=CHECK-NOBPI
; REQUIRES: asserts
; Make sure that we clear edge probabilities for bb1 as we fold
; the conditional branch in it.
; CHECK: eraseBlock bb1
+; CHECK-NOBPI-NOT: eraseBlock bb1
define i32 @foo(i32 %arg) !prof !0 {
; CHECK-LABEL: @foo
diff --git a/llvm/test/Transforms/JumpThreading/thread-prob-5.ll b/llvm/test/Transforms/JumpThreading/thread-prob-5.ll
index a3bcfdd87a286..95371e1e3b071 100644
--- a/llvm/test/Transforms/JumpThreading/thread-prob-5.ll
+++ b/llvm/test/Transforms/JumpThreading/thread-prob-5.ll
@@ -1,10 +1,12 @@
-; RUN: opt -debug-only=branch-prob -passes=jump-threading -S %s 2>&1 | FileCheck %s
+; RUN: opt -debug-only=branch-prob -passes="require<branch-prob>,jump-threading" -S %s 2>&1 | FileCheck %s
+; RUN: opt -debug-only=branch-prob -passes=jump-threading -S %s 2>&1 | FileCheck %s --check-prefix=CHECK-NOBPI
; REQUIRES: asserts
; Make sure that we clear edge probabilities for bb1 as we fold
; the conditional branch in it.
; CHECK: eraseBlock bb1
+; CHECK-NOBPI-NOT: eraseBlock bb1
define void @foo(i32 %i, i32 %len) !prof !0 {
; CHECK-LABEL: @foo
diff --git a/llvm/test/Transforms/JumpThreading/thread-prob-6.ll b/llvm/test/Transforms/JumpThreading/thread-prob-6.ll
index 0384c73968b9a..6a0746a60e1e1 100644
--- a/llvm/test/Transforms/JumpThreading/thread-prob-6.ll
+++ b/llvm/test/Transforms/JumpThreading/thread-prob-6.ll
@@ -1,10 +1,12 @@
-; RUN: opt -debug-only=branch-prob -passes=jump-threading -S %s 2>&1 | FileCheck %s
+; RUN: opt -debug-only=branch-prob -passes="require<branch-prob>,jump-threading" -S %s 2>&1 | FileCheck %s
+; RUN: opt -debug-only=branch-prob -passes=jump-threading -S %s 2>&1 | FileCheck %s --check-prefix=CHECK-NOBPI
; REQUIRES: asserts
; Make sure that we clear edge probabilities for bb1 as we fold
; the conditional branch in it.
; CHECK: eraseBlock bb1
+; CHECK-NOBPI-NOT: eraseBlock bb1
define i32 @foo() !prof !0 {
; CHECK-LABEL: @foo
diff --git a/llvm/test/Transforms/JumpThreading/threading_prof1.ll b/llvm/test/Transforms/JumpThreading/threading_prof1.ll
index 5abc89071d388..28f1abe2d0948 100644
--- a/llvm/test/Transforms/JumpThreading/threading_prof1.ll
+++ b/llvm/test/Transforms/JumpThreading/threading_prof1.ll
@@ -95,4 +95,4 @@ declare i32 @b()
!0 = !{!"branch_weights", i32 2146410443, i32 1073205}
;CHECK: ![[PROF1]] = !{!"branch_weights", i32 1073205, i32 2146410443}
-;CHECK: ![[PROF2]] = !{!"branch_weights", i32 2146410443, i32 1073205}
+;CHECK: ![[PROF2]] = !{!"branch_weights", i32 -2147483648, i32 0}
diff --git a/llvm/test/Transforms/JumpThreading/threading_prof2.ll b/llvm/test/Transforms/JumpThreading/threading_prof2.ll
index f81770285d6af..8b217411cd1da 100644
--- a/llvm/test/Transforms/JumpThreading/threading_prof2.ll
+++ b/llvm/test/Transforms/JumpThreading/threading_prof2.ll
@@ -38,4 +38,4 @@ declare i32 @b()
!0 = !{!"branch_weights", i32 2146410443, i32 1073205}
;CHECK: ![[PROF1]] = !{!"branch_weights", i32 1073205, i32 2146410443}
-;CHECK: ![[PROF2]] = !{!"branch_weights", i32 2146410443, i32 1073205}
+;CHECK: ![[PROF2]] = !{!"branch_weights", i32 -2147483648, i32 0}
diff --git a/llvm/test/Transforms/JumpThreading/threading_prof3.ll b/llvm/test/Transforms/JumpThreading/threading_prof3.ll
index 29a964d0ebff0..e7332018c4629 100644
--- a/llvm/test/Transforms/JumpThreading/threading_prof3.ll
+++ b/llvm/test/Transforms/JumpThreading/threading_prof3.ll
@@ -26,4 +26,4 @@ bb9:
ret void
}
-;CHECK: ![[PROF]] = !{!"branch_weights", i32 0, i32 0}
+;CHECK: ![[PROF]] = !{!"branch_weights", i32 -2147483648, i32 0}
More information about the llvm-commits
mailing list