[llvm] r341386 - [PGO] Control Height Reduction
Hiroshi Yamauchi via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 4 10:19:13 PDT 2018
Author: yamauchi
Date: Tue Sep 4 10:19:13 2018
New Revision: 341386
URL: http://llvm.org/viewvc/llvm-project?rev=341386&view=rev
Log:
[PGO] Control Height Reduction
Summary:
Control height reduction merges conditional blocks of code and reduces the
number of conditional branches in the hot path based on profiles.
if (hot_cond1) { // Likely true.
do_stg_hot1();
}
if (hot_cond2) { // Likely true.
do_stg_hot2();
}
->
if (hot_cond1 && hot_cond2) { // Hot path.
do_stg_hot1();
do_stg_hot2();
} else { // Cold path.
if (hot_cond1) {
do_stg_hot1();
}
if (hot_cond2) {
do_stg_hot2();
}
}
This speeds up some internal benchmarks up to ~30%.
Reviewers: davidxl
Reviewed By: davidxl
Subscribers: xbolva00, dmgreen, mehdi_amini, llvm-commits, mgorny
Differential Revision: https://reviews.llvm.org/D50591
Added:
llvm/trunk/include/llvm/Transforms/Instrumentation/ControlHeightReduction.h
llvm/trunk/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
llvm/trunk/test/Transforms/PGOProfile/chr.ll
Modified:
llvm/trunk/include/llvm/InitializePasses.h
llvm/trunk/include/llvm/LinkAllPasses.h
llvm/trunk/include/llvm/Transforms/Utils.h
llvm/trunk/lib/Passes/PassBuilder.cpp
llvm/trunk/lib/Passes/PassRegistry.def
llvm/trunk/lib/Transforms/IPO/PassManagerBuilder.cpp
llvm/trunk/lib/Transforms/Instrumentation/CMakeLists.txt
llvm/trunk/lib/Transforms/Instrumentation/Instrumentation.cpp
Modified: llvm/trunk/include/llvm/InitializePasses.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/InitializePasses.h?rev=341386&r1=341385&r2=341386&view=diff
==============================================================================
--- llvm/trunk/include/llvm/InitializePasses.h (original)
+++ llvm/trunk/include/llvm/InitializePasses.h Tue Sep 4 10:19:13 2018
@@ -103,6 +103,7 @@ void initializeCodeGenPreparePass(PassRe
void initializeConstantHoistingLegacyPassPass(PassRegistry&);
void initializeConstantMergeLegacyPassPass(PassRegistry&);
void initializeConstantPropagationPass(PassRegistry&);
+void initializeControlHeightReductionLegacyPassPass(PassRegistry&);
void initializeCorrelatedValuePropagationPass(PassRegistry&);
void initializeCostModelAnalysisPass(PassRegistry&);
void initializeCrossDSOCFIPass(PassRegistry&);
Modified: llvm/trunk/include/llvm/LinkAllPasses.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/LinkAllPasses.h?rev=341386&r1=341385&r2=341386&view=diff
==============================================================================
--- llvm/trunk/include/llvm/LinkAllPasses.h (original)
+++ llvm/trunk/include/llvm/LinkAllPasses.h Tue Sep 4 10:19:13 2018
@@ -88,6 +88,7 @@ namespace {
(void) llvm::createCalledValuePropagationPass();
(void) llvm::createConstantMergePass();
(void) llvm::createConstantPropagationPass();
+ (void) llvm::createControlHeightReductionLegacyPass();
(void) llvm::createCostModelAnalysisPass();
(void) llvm::createDeadArgEliminationPass();
(void) llvm::createDeadCodeEliminationPass();
Added: llvm/trunk/include/llvm/Transforms/Instrumentation/ControlHeightReduction.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Transforms/Instrumentation/ControlHeightReduction.h?rev=341386&view=auto
==============================================================================
--- llvm/trunk/include/llvm/Transforms/Instrumentation/ControlHeightReduction.h (added)
+++ llvm/trunk/include/llvm/Transforms/Instrumentation/ControlHeightReduction.h Tue Sep 4 10:19:13 2018
@@ -0,0 +1,32 @@
+//===- ControlHeightReduction.h - Control Height Reduction ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass merges conditional blocks of code and reduces the number of
+// conditional branches in the hot paths based on profiles.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_CONTROLHEIGHTREDUCTION_H
+#define LLVM_TRANSFORMS_INSTRUMENTATION_CONTROLHEIGHTREDUCTION_H
+
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/Transforms/Scalar/LoopPassManager.h"
+
+namespace llvm {
+
+class ControlHeightReductionPass :
+ public PassInfoMixin<ControlHeightReductionPass> {
+public:
+ ControlHeightReductionPass();
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM);
+};
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_INSTRUMENTATION_CONTROLHEIGHTREDUCTION_H
Modified: llvm/trunk/include/llvm/Transforms/Utils.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Transforms/Utils.h?rev=341386&r1=341385&r2=341386&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Transforms/Utils.h (original)
+++ llvm/trunk/include/llvm/Transforms/Utils.h Tue Sep 4 10:19:13 2018
@@ -113,6 +113,13 @@ extern char &LoopSimplifyID;
/// This function returns a new pass that downgrades the debug info in the
/// module to line tables only.
ModulePass *createStripNonLineTableDebugInfoPass();
+
+//===----------------------------------------------------------------------===//
+//
+// ControlHeightReudction - Merges conditional blocks of code and reduces the
+// number of conditional branches in the hot paths based on profiles.
+//
+FunctionPass *createControlHeightReductionLegacyPass();
}
#endif
Modified: llvm/trunk/lib/Passes/PassBuilder.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Passes/PassBuilder.cpp?rev=341386&r1=341385&r2=341386&view=diff
==============================================================================
--- llvm/trunk/lib/Passes/PassBuilder.cpp (original)
+++ llvm/trunk/lib/Passes/PassBuilder.cpp Tue Sep 4 10:19:13 2018
@@ -87,6 +87,7 @@
#include "llvm/Transforms/IPO/WholeProgramDevirt.h"
#include "llvm/Transforms/InstCombine/InstCombine.h"
#include "llvm/Transforms/Instrumentation/BoundsChecking.h"
+#include "llvm/Transforms/Instrumentation/ControlHeightReduction.h"
#include "llvm/Transforms/Instrumentation/GCOVProfiler.h"
#include "llvm/Transforms/Instrumentation/InstrProfiling.h"
#include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
@@ -193,6 +194,10 @@ static cl::opt<bool> EnableSyntheticCoun
static Regex DefaultAliasRegex(
"^(default|thinlto-pre-link|thinlto|lto-pre-link|lto)<(O[0123sz])>$");
+static cl::opt<bool>
+ EnableCHR("enable-chr-npm", cl::init(true), cl::Hidden,
+ cl::desc("Enable control height reduction optimization (CHR)"));
+
static bool isOptimizingForSize(PassBuilder::OptimizationLevel Level) {
switch (Level) {
case PassBuilder::O0:
@@ -486,6 +491,10 @@ PassBuilder::buildFunctionSimplification
FPM.addPass(InstCombinePass());
invokePeepholeEPCallbacks(FPM, Level);
+ if (EnableCHR && Level == O3 && PGOOpt &&
+ (!PGOOpt->ProfileUseFile.empty() || !PGOOpt->SampleProfileFile.empty()))
+ FPM.addPass(ControlHeightReductionPass());
+
return FPM;
}
Modified: llvm/trunk/lib/Passes/PassRegistry.def
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Passes/PassRegistry.def?rev=341386&r1=341385&r2=341386&view=diff
==============================================================================
--- llvm/trunk/lib/Passes/PassRegistry.def (original)
+++ llvm/trunk/lib/Passes/PassRegistry.def Tue Sep 4 10:19:13 2018
@@ -148,6 +148,7 @@ FUNCTION_PASS("bounds-checking", BoundsC
FUNCTION_PASS("break-crit-edges", BreakCriticalEdgesPass())
FUNCTION_PASS("callsite-splitting", CallSiteSplittingPass())
FUNCTION_PASS("consthoist", ConstantHoistingPass())
+FUNCTION_PASS("chr", ControlHeightReductionPass())
FUNCTION_PASS("correlated-propagation", CorrelatedValuePropagationPass())
FUNCTION_PASS("dce", DCEPass())
FUNCTION_PASS("div-rem-pairs", DivRemPairsPass())
Modified: llvm/trunk/lib/Transforms/IPO/PassManagerBuilder.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/IPO/PassManagerBuilder.cpp?rev=341386&r1=341385&r2=341386&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/IPO/PassManagerBuilder.cpp (original)
+++ llvm/trunk/lib/Transforms/IPO/PassManagerBuilder.cpp Tue Sep 4 10:19:13 2018
@@ -152,6 +152,10 @@ static cl::opt<bool> EnableGVNSink(
"enable-gvn-sink", cl::init(false), cl::Hidden,
cl::desc("Enable the GVN sinking pass (default = off)"));
+static cl::opt<bool>
+ EnableCHR("enable-chr", cl::init(true), cl::Hidden,
+ cl::desc("Enable control height reduction optimization (CHR)"));
+
PassManagerBuilder::PassManagerBuilder() {
OptLevel = 2;
SizeLevel = 0;
@@ -411,6 +415,10 @@ void PassManagerBuilder::addFunctionSimp
// Clean up after everything.
addInstructionCombiningPass(MPM);
addExtensionsToPM(EP_Peephole, MPM);
+
+ if (EnableCHR && OptLevel >= 3 &&
+ (!PGOInstrUse.empty() || !PGOSampleUse.empty()))
+ MPM.add(createControlHeightReductionLegacyPass());
}
void PassManagerBuilder::populateModulePassManager(
Modified: llvm/trunk/lib/Transforms/Instrumentation/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Instrumentation/CMakeLists.txt?rev=341386&r1=341385&r2=341386&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Instrumentation/CMakeLists.txt (original)
+++ llvm/trunk/lib/Transforms/Instrumentation/CMakeLists.txt Tue Sep 4 10:19:13 2018
@@ -2,6 +2,7 @@ add_llvm_library(LLVMInstrumentation
AddressSanitizer.cpp
BoundsChecking.cpp
CGProfile.cpp
+ ControlHeightReduction.cpp
DataFlowSanitizer.cpp
GCOVProfiling.cpp
MemorySanitizer.cpp
Added: llvm/trunk/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Instrumentation/ControlHeightReduction.cpp?rev=341386&view=auto
==============================================================================
--- llvm/trunk/lib/Transforms/Instrumentation/ControlHeightReduction.cpp (added)
+++ llvm/trunk/lib/Transforms/Instrumentation/ControlHeightReduction.cpp Tue Sep 4 10:19:13 2018
@@ -0,0 +1,2010 @@
+//===-- ControlHeightReduction.cpp - Control Height Reduction -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass merges conditional blocks of code and reduces the number of
+// conditional branches in the hot paths based on profiles.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Instrumentation/ControlHeightReduction.h"
+#include "llvm/Transforms/Utils.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Analysis/RegionInfo.h"
+#include "llvm/Analysis/RegionIterator.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Transforms/Scalar.h"
+
+#include <cxxabi.h>
+#include <set>
+#include <sstream>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "chr"
+
+#define CHR_DEBUG(X) LLVM_DEBUG(X)
+
+static cl::opt<bool> ForceCHR("force-chr", cl::init(false), cl::Hidden,
+ cl::desc("Apply CHR for all functions"));
+
+static cl::opt<double> CHRBiasThreshold(
+ "chr-bias-threshold", cl::init(0.99), cl::Hidden,
+ cl::desc("CHR considers a branch bias greater than this ratio as biased"));
+
+static cl::opt<unsigned> CHRMergeThreshold(
+ "chr-merge-threshold", cl::init(2), cl::Hidden,
+ cl::desc("CHR merges a group of N branches/selects where N >= this value"));
+
+static cl::opt<std::string> CHRModuleList(
+ "chr-module-list", cl::init(""), cl::Hidden,
+ cl::desc("Specify file to retrieve the list of modules to apply CHR to"));
+
+static cl::opt<std::string> CHRFunctionList(
+ "chr-function-list", cl::init(""), cl::Hidden,
+ cl::desc("Specify file to retrieve the list of functions to apply CHR to"));
+
+static StringSet<> CHRModules;
+static StringSet<> CHRFunctions;
+
+static void ParseCHRFilterFiles() {
+ if (!CHRModuleList.empty()) {
+ auto FileOrErr = MemoryBuffer::getFile(CHRModuleList);
+ if (!FileOrErr) {
+ errs() << "Error: Couldn't read the chr-module-list file " << CHRModuleList << "\n";
+ std::exit(1);
+ }
+ StringRef Buf = FileOrErr->get()->getBuffer();
+ SmallVector<StringRef, 0> Lines;
+ Buf.split(Lines, '\n');
+ for (StringRef Line : Lines) {
+ Line = Line.trim();
+ if (!Line.empty())
+ CHRModules.insert(Line);
+ }
+ }
+ if (!CHRFunctionList.empty()) {
+ auto FileOrErr = MemoryBuffer::getFile(CHRFunctionList);
+ if (!FileOrErr) {
+ errs() << "Error: Couldn't read the chr-function-list file " << CHRFunctionList << "\n";
+ std::exit(1);
+ }
+ StringRef Buf = FileOrErr->get()->getBuffer();
+ SmallVector<StringRef, 0> Lines;
+ Buf.split(Lines, '\n');
+ for (StringRef Line : Lines) {
+ Line = Line.trim();
+ if (!Line.empty())
+ CHRFunctions.insert(Line);
+ }
+ }
+}
+
+namespace {
+class ControlHeightReductionLegacyPass : public FunctionPass {
+public:
+ static char ID;
+
+ ControlHeightReductionLegacyPass() : FunctionPass(ID) {
+ initializeControlHeightReductionLegacyPassPass(
+ *PassRegistry::getPassRegistry());
+ ParseCHRFilterFiles();
+ }
+
+ bool runOnFunction(Function &F) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<BlockFrequencyInfoWrapperPass>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<ProfileSummaryInfoWrapperPass>();
+ AU.addRequired<RegionInfoPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
+ }
+};
+} // end anonymous namespace
+
+char ControlHeightReductionLegacyPass::ID = 0;
+
+INITIALIZE_PASS_BEGIN(ControlHeightReductionLegacyPass,
+ "chr",
+ "Reduce control height in the hot paths",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(RegionInfoPass)
+INITIALIZE_PASS_END(ControlHeightReductionLegacyPass,
+ "chr",
+ "Reduce control height in the hot paths",
+ false, false)
+
+FunctionPass *llvm::createControlHeightReductionLegacyPass() {
+ return new ControlHeightReductionLegacyPass();
+}
+
+namespace {
+
+struct CHRStats {
+ CHRStats() : NumBranches(0), NumBranchesDelta(0),
+ WeightedNumBranchesDelta(0) {}
+ void print(raw_ostream &OS) const {
+ OS << "CHRStats: NumBranches " << NumBranches
+ << " NumBranchesDelta " << NumBranchesDelta
+ << " WeightedNumBranchesDelta " << WeightedNumBranchesDelta;
+ }
+ uint64_t NumBranches; // The original number of conditional branches /
+ // selects
+ uint64_t NumBranchesDelta; // The decrease of the number of conditional
+ // branches / selects in the hot paths due to CHR.
+ uint64_t WeightedNumBranchesDelta; // NumBranchesDelta weighted by the profile
+ // count at the scope entry.
+};
+
+inline raw_ostream &operator<<(raw_ostream &OS, const CHRStats &Stats) {
+ Stats.print(OS);
+ return OS;
+}
+
+// RegInfo - some properties of a Region.
+struct RegInfo {
+ RegInfo() : R(nullptr), HasBranch(false) {}
+ RegInfo(Region *RegionIn) : R(RegionIn), HasBranch(false) {}
+ Region *R;
+ bool HasBranch;
+ SmallVector<SelectInst *, 8> Selects;
+};
+
+typedef DenseMap<Region *, DenseSet<Instruction *>> HoistStopMapTy;
+
+// CHRScope - a sequence of regions to CHR together. It corresponds to a
+// sequence of conditional blocks. It can have subscopes which correspond to
+// nested conditional blocks. Nested CHRScopes form a tree.
+class CHRScope {
+ public:
+ CHRScope(RegInfo RI) : BranchInsertPoint(nullptr) {
+ assert(RI.R && "Null RegionIn");
+ RegInfos.push_back(RI);
+ }
+
+ Region *getParentRegion() {
+ assert(RegInfos.size() > 0 && "Empty CHRScope");
+ Region *Parent = RegInfos[0].R->getParent();
+ assert(Parent && "Unexpected to call this on the top-level region");
+ return Parent;
+ }
+
+ BasicBlock *getEntryBlock() {
+ assert(RegInfos.size() > 0 && "Empty CHRScope");
+ return RegInfos.front().R->getEntry();
+ }
+
+ BasicBlock *getExitBlock() {
+ assert(RegInfos.size() > 0 && "Empty CHRScope");
+ return RegInfos.back().R->getExit();
+ }
+
+ bool appendable(CHRScope *Next) {
+ // The next scope is appendable only if this scope is directly connected to
+ // it (which implies it post-dominates this scope) and this scope dominates
+ // it (no edge to the next scope outside this scope).
+ BasicBlock *NextEntry = Next->getEntryBlock();
+ if (getExitBlock() != NextEntry)
+ // Not directly connected.
+ return false;
+ Region *LastRegion = RegInfos.back().R;
+ for (BasicBlock *Pred : predecessors(NextEntry))
+ if (!LastRegion->contains(Pred))
+ // There's an edge going into the entry of the next scope from outside
+ // of this scope.
+ return false;
+ return true;
+ }
+
+ void append(CHRScope *Next) {
+ assert(RegInfos.size() > 0 && "Empty CHRScope");
+ assert(Next->RegInfos.size() > 0 && "Empty CHRScope");
+ assert(getParentRegion() == Next->getParentRegion() &&
+ "Must be siblings");
+ assert(getExitBlock() == Next->getEntryBlock() &&
+ "Must be adjacent");
+ for (RegInfo &RI : Next->RegInfos)
+ RegInfos.push_back(RI);
+ for (CHRScope *Sub : Next->Subs)
+ Subs.push_back(Sub);
+ }
+
+ void addSub(CHRScope *SubIn) {
+#ifndef NDEBUG
+ bool is_child = false;
+ for (RegInfo &RI : RegInfos)
+ if (RI.R == SubIn->getParentRegion()) {
+ is_child = true;
+ break;
+ }
+ assert(is_child && "Must be a child");
+#endif
+ Subs.push_back(SubIn);
+ }
+
+ // Split this scope at the boundary region into two, which will belong to the
+ // tail and returns the tail.
+ CHRScope *split(Region *Boundary) {
+ assert(Boundary && "Boundary null");
+ assert(RegInfos.begin()->R != Boundary &&
+ "Can't be split at beginning");
+ auto BoundaryIt = std::find_if(RegInfos.begin(), RegInfos.end(),
+ [&Boundary](const RegInfo& RI) {
+ return Boundary == RI.R;
+ });
+ if (BoundaryIt == RegInfos.end())
+ return nullptr;
+ SmallVector<RegInfo, 8> TailRegInfos;
+ SmallVector<CHRScope *, 8> TailSubs;
+ TailRegInfos.insert(TailRegInfos.begin(), BoundaryIt, RegInfos.end());
+ RegInfos.resize(BoundaryIt - RegInfos.begin());
+ DenseSet<Region *> TailRegionSet;
+ for (RegInfo &RI : TailRegInfos)
+ TailRegionSet.insert(RI.R);
+ for (auto It = Subs.begin(); It != Subs.end(); ) {
+ CHRScope *Sub = *It;
+ assert(Sub && "null Sub");
+ Region *Parent = Sub->getParentRegion();
+ if (TailRegionSet.count(Parent)) {
+ TailSubs.push_back(Sub);
+ It = Subs.erase(It);
+ } else {
+ assert(std::find_if(RegInfos.begin(), RegInfos.end(),
+ [&Parent](const RegInfo& RI) {
+ return Parent == RI.R;
+ }) != RegInfos.end() &&
+ "Must be in head");
+ ++It;
+ }
+ }
+ assert(HoistStopMap.empty() && "MapHoistStops must be empty");
+ return new CHRScope(TailRegInfos, TailSubs);
+ }
+
+ bool contains(Instruction *I) const {
+ BasicBlock *Parent = I->getParent();
+ for (const RegInfo &RI : RegInfos)
+ if (RI.R->contains(Parent))
+ return true;
+ return false;
+ }
+
+ void print(raw_ostream &OS) const;
+
+ SmallVector<RegInfo, 8> RegInfos; // Regions that belong to this scope
+ SmallVector<CHRScope *, 8> Subs; // Subscopes.
+
+ // The instruction at which to insert the CHR conditional branch (and hoist
+ // the dependent condition values).
+ Instruction *BranchInsertPoint;
+
+ // True-biased and false-biased regions (conditional blocks),
+ // respectively. Used only for the outermost scope and includes regions in
+ // subscopes. The rest are unbiased.
+ DenseSet<Region *> TrueBiasedRegions;
+ DenseSet<Region *> FalseBiasedRegions;
+ // Among the biased regions, the regions that get CHRed.
+ SmallVector<RegInfo, 8> CHRRegions;
+
+ // True-biased and false-biased selects, respectively. Used only for the
+ // outermost scope and includes ones in subscopes.
+ DenseSet<SelectInst *> TrueBiasedSelects;
+ DenseSet<SelectInst *> FalseBiasedSelects;
+
+ // Map from one of the above regions to the instructions to stop
+ // hoisting instructions at through use-def chains.
+ HoistStopMapTy HoistStopMap;
+
+ private:
+ CHRScope(SmallVector<RegInfo, 8> &RegInfosIn,
+ SmallVector<CHRScope *, 8> &SubsIn)
+ : RegInfos(RegInfosIn), Subs(SubsIn), BranchInsertPoint(nullptr) {}
+};
+
+inline raw_ostream &operator<<(raw_ostream &OS, const CHRScope &Scope) {
+ Scope.print(OS);
+ return OS;
+}
+
+class CHR {
+ public:
+ CHR(Function &Fin, BlockFrequencyInfo &BFIin, DominatorTree &DTin,
+ ProfileSummaryInfo &PSIin, RegionInfo &RIin)
+ : F(Fin), BFI(BFIin), DT(DTin), PSI(PSIin), RI(RIin) {}
+
+ ~CHR() {
+ for (CHRScope *Scope : Scopes) {
+ delete Scope;
+ }
+ }
+
+ bool run();
+
+ private:
+ // See the comments in CHR::run() for the high level flow of the algorithm and
+ // what the following functions do.
+
+ void findScopes(SmallVectorImpl<CHRScope *> &Output) {
+ Region *R = RI.getTopLevelRegion();
+ CHRScope *Scope = findScopes(R, nullptr, nullptr, Output);
+ if (Scope) {
+ Output.push_back(Scope);
+ }
+ }
+ CHRScope *findScopes(Region *R, Region *NextRegion, Region *ParentRegion,
+ SmallVectorImpl<CHRScope *> &Scopes);
+ CHRScope *findScope(Region *R);
+ void checkScopeHoistable(CHRScope *Scope);
+
+ void splitScopes(SmallVectorImpl<CHRScope *> &Input,
+ SmallVectorImpl<CHRScope *> &Output);
+ SmallVector<CHRScope *, 8> splitScope(CHRScope *Scope,
+ CHRScope *Outer,
+ DenseSet<Value *> *OuterConditionValues,
+ Instruction *OuterInsertPoint,
+ SmallVectorImpl<CHRScope *> &Output,
+ DenseSet<Instruction *> &Unhoistables);
+
+ void classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes);
+ void classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope);
+
+ void filterScopes(SmallVectorImpl<CHRScope *> &Input,
+ SmallVectorImpl<CHRScope *> &Output);
+
+ void setCHRRegions(SmallVectorImpl<CHRScope *> &Input,
+ SmallVectorImpl<CHRScope *> &Output);
+ void setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope);
+
+ void sortScopes(SmallVectorImpl<CHRScope *> &Input,
+ SmallVectorImpl<CHRScope *> &Output);
+
+ void transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes);
+ void transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs);
+ void cloneScopeBlocks(CHRScope *Scope,
+ BasicBlock *PreEntryBlock,
+ BasicBlock *ExitBlock,
+ Region *LastRegion,
+ ValueToValueMapTy &VMap);
+ BranchInst *createMergedBranch(BasicBlock *PreEntryBlock,
+ BasicBlock *EntryBlock,
+ BasicBlock *NewEntryBlock,
+ ValueToValueMapTy &VMap);
+ void fixupBranchesAndSelects(CHRScope *Scope,
+ BasicBlock *PreEntryBlock,
+ BranchInst *MergedBR,
+ uint64_t ProfileCount);
+ void fixupBranch(Region *R,
+ CHRScope *Scope,
+ IRBuilder<> &IRB,
+ Value *&MergedCondition, BranchProbability &CHRBranchBias);
+ void fixupSelect(SelectInst* SI,
+ CHRScope *Scope,
+ IRBuilder<> &IRB,
+ Value *&MergedCondition, BranchProbability &CHRBranchBias);
+ void addToMergedCondition(bool IsTrueBiased, Value *Cond,
+ Instruction *BranchOrSelect,
+ CHRScope *Scope,
+ IRBuilder<> &IRB,
+ Value *&MergedCondition);
+
+ Function &F;
+ BlockFrequencyInfo &BFI;
+ DominatorTree &DT;
+ ProfileSummaryInfo &PSI;
+ RegionInfo &RI;
+ CHRStats Stats;
+
+ // All the true-biased regions in the function
+ DenseSet<Region *> TrueBiasedRegionsGlobal;
+ // All the false-biased regions in the function
+ DenseSet<Region *> FalseBiasedRegionsGlobal;
+ // All the true-biased selects in the function
+ DenseSet<SelectInst *> TrueBiasedSelectsGlobal;
+ // All the false-biased selects in the function
+ DenseSet<SelectInst *> FalseBiasedSelectsGlobal;
+ // A map from biased regions to their branch bias
+ DenseMap<Region *, BranchProbability> BranchBiasMap;
+ // A map from biased selects to their branch bias
+ DenseMap<SelectInst *, BranchProbability> SelectBiasMap;
+ // All the scopes.
+ DenseSet<CHRScope *> Scopes;
+};
+
+} // end anonymous namespace
+
+static bool shouldApply(Function &F, ProfileSummaryInfo& PSI) {
+ if (ForceCHR)
+ return true;
+
+ if (!CHRModuleList.empty() || !CHRFunctionList.empty()) {
+ if (CHRModules.count(F.getParent()->getName()))
+ return true;
+ StringRef Name = F.getName();
+ if (CHRFunctions.count(Name))
+ return true;
+ const char* DemangledName = nullptr;
+ int Status = -1;
+ DemangledName = abi::__cxa_demangle(Name.str().c_str(),
+ nullptr, nullptr, &Status);
+ return DemangledName && CHRFunctions.count(DemangledName);
+ }
+
+ assert(PSI.hasProfileSummary() && "Empty PSI?");
+ return PSI.isFunctionEntryHot(&F);
+}
+
+static void dumpIR(Function &F, const char *Label, CHRStats *Stats) {
+ std::string Name = F.getName().str();
+ const char *DemangledName = nullptr;
+ int Status = -1;
+ DemangledName = abi::__cxa_demangle(Name.c_str(),
+ nullptr, nullptr, &Status);
+ if (DemangledName == nullptr) {
+ DemangledName = "<NOT-MANGLED>";
+ }
+ std::string ModuleName = F.getParent()->getName().str();
+ CHR_DEBUG(dbgs() << "CHR IR dump " << Label << " " << ModuleName << " "
+ << Name);
+ if (Stats)
+ CHR_DEBUG(dbgs() << " " << *Stats);
+ CHR_DEBUG(dbgs() << "\n");
+ CHR_DEBUG(F.dump());
+}
+
+
+void CHRScope::print(raw_ostream &OS) const {
+ assert(RegInfos.size() > 0 && "Empty CHRScope");
+ OS << "CHRScope[";
+ OS << RegInfos.size() << ", Regions[";
+ for (const RegInfo &RI : RegInfos) {
+ OS << RI.R->getNameStr();
+ if (RI.HasBranch)
+ OS << " B";
+ if (RI.Selects.size() > 0)
+ OS << " S" << RI.Selects.size();
+ OS << ", ";
+ }
+ if (RegInfos[0].R->getParent()) {
+ OS << "], Parent " << RegInfos[0].R->getParent()->getNameStr();
+ } else {
+ // top level region
+ OS << "]";
+ }
+ OS << ", Subs[";
+ for (CHRScope *Sub : Subs) {
+ OS << *Sub << ", ";
+ }
+ OS << "]]";
+}
+
+// Return true if the given instruction type can be hoisted by CHR.
+static bool isHoistableInstructionType(Instruction *I) {
+ return isa<BinaryOperator>(I) || isa<CastInst>(I) || isa<SelectInst>(I) ||
+ isa<GetElementPtrInst>(I) || isa<CmpInst>(I) ||
+ isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) ||
+ isa<ShuffleVectorInst>(I) || isa<ExtractValueInst>(I) ||
+ isa<InsertValueInst>(I);
+}
+
+// Return true if the given instruction can be hoisted by CHR.
+static bool isHoistable(Instruction *I, DominatorTree &DT) {
+ if (!isHoistableInstructionType(I))
+ return false;
+ return isSafeToSpeculativelyExecute(I, nullptr, &DT);
+}
+
+// Recursively traverse the use-def chains of the given value and return a set
+// of the unhoistable base values defined within the scope (excluding the
+// first-region entry block) or the (hoistable or unhoistable) base values that
+// are defined outside (including the first-region entry block) of the
+// scope. The returned set doesn't include constants.
+static std::set<Value *> getBaseValues(Value *V,
+ DominatorTree &DT) {
+ std::set<Value *> Result;
+ if (auto *I = dyn_cast<Instruction>(V)) {
+ // We don't stop at a block that's not in the Scope because we would miss some
+ // instructions that are based on the same base values if we stop there.
+ if (!isHoistable(I, DT)) {
+ Result.insert(I);
+ return Result;
+ }
+ // I is hoistable above the Scope.
+ for (Value *Op : I->operands()) {
+ std::set<Value *> OpResult = getBaseValues(Op, DT);
+ Result.insert(OpResult.begin(), OpResult.end());
+ }
+ return Result;
+ }
+ if (isa<Argument>(V)) {
+ Result.insert(V);
+ return Result;
+ }
+ // We don't include others like constants because those won't lead to any
+ // chance of folding of conditions (eg two bit checks merged into one check)
+ // after CHR.
+ return Result; // empty
+}
+
+// Return true if V is already hoisted or can be hoisted (along with its
+// operands) above the insert point. When it returns true and HoistStops is
+// non-null, the instructions to stop hoisting at through the use-def chains are
+// inserted into HoistStops.
+static bool
+checkHoistValue(Value *V, Instruction *InsertPoint, DominatorTree &DT,
+ DenseSet<Instruction *> &Unhoistables,
+ DenseSet<Instruction *> *HoistStops) {
+ assert(InsertPoint && "Null InsertPoint");
+ if (auto *I = dyn_cast<Instruction>(V)) {
+ assert(DT.getNode(I->getParent()) && "DT must contain I's parent block");
+ assert(DT.getNode(InsertPoint->getParent()) && "DT must contain Destination");
+ if (Unhoistables.count(I)) {
+ // Don't hoist if they are not to be hoisted.
+ return false;
+ }
+ if (DT.dominates(I, InsertPoint)) {
+ // We are already above the insert point. Stop here.
+ if (HoistStops)
+ HoistStops->insert(I);
+ return true;
+ }
+ // We aren't not above the insert point, check if we can hoist it above the
+ // insert point.
+ if (isHoistable(I, DT)) {
+ // Check operands first.
+ DenseSet<Instruction *> OpsHoistStops;
+ bool AllOpsHoisted = true;
+ for (Value *Op : I->operands()) {
+ if (!checkHoistValue(Op, InsertPoint, DT, Unhoistables, &OpsHoistStops)) {
+ AllOpsHoisted = false;
+ break;
+ }
+ }
+ if (AllOpsHoisted) {
+ CHR_DEBUG(dbgs() << "checkHoistValue " << *I << "\n");
+ if (HoistStops)
+ HoistStops->insert(OpsHoistStops.begin(), OpsHoistStops.end());
+ return true;
+ }
+ }
+ return false;
+ }
+ // Non-instructions are considered hoistable.
+ return true;
+}
+
+// Returns true and sets the true probability and false probability of an
+// MD_prof metadata if it's well-formed.
+static bool CheckMDProf(MDNode *MD, BranchProbability &TrueProb,
+ BranchProbability &FalseProb) {
+ if (!MD) return false;
+ MDString *MDName = cast<MDString>(MD->getOperand(0));
+ if (MDName->getString() != "branch_weights" ||
+ MD->getNumOperands() != 3)
+ return false;
+ ConstantInt *TrueWeight = mdconst::extract<ConstantInt>(MD->getOperand(1));
+ ConstantInt *FalseWeight = mdconst::extract<ConstantInt>(MD->getOperand(2));
+ if (!TrueWeight || !FalseWeight)
+ return false;
+ APInt TrueWt = TrueWeight->getValue();
+ APInt FalseWt = FalseWeight->getValue();
+ APInt SumWt = TrueWt + FalseWt;
+ TrueProb = BranchProbability::getBranchProbability(TrueWt.getZExtValue(),
+ SumWt.getZExtValue());
+ FalseProb = BranchProbability::getBranchProbability(FalseWt.getZExtValue(),
+ SumWt.getZExtValue());
+ return true;
+}
+
+static BranchProbability getCHRBiasThreshold() {
+ return BranchProbability::getBranchProbability(
+ static_cast<uint64_t>(CHRBiasThreshold * 1000000), 1000000);
+}
+
+// A helper for CheckBiasedBranch and CheckBiasedSelect. If TrueProb >=
+// CHRBiasThreshold, put Key into TrueSet and return true. If FalseProb >=
+// CHRBiasThreshold, put Key into FalseSet and return true. Otherwise, return
+// false.
+template<typename K, typename S, typename M>
+bool CheckBias(K *Key, BranchProbability TrueProb, BranchProbability FalseProb,
+ S &TrueSet, S &FalseSet, M &BiasMap) {
+ BranchProbability Threshold = getCHRBiasThreshold();
+ if (TrueProb >= Threshold) {
+ TrueSet.insert(Key);
+ BiasMap[Key] = TrueProb;
+ return true;
+ } else if (FalseProb >= Threshold) {
+ FalseSet.insert(Key);
+ BiasMap[Key] = FalseProb;
+ return true;
+ }
+ return false;
+}
+
+// Returns true and insert a region into the right biased set and the map if the
+// branch of the region is biased.
+static bool CheckBiasedBranch(BranchInst *BI, Region *R,
+ DenseSet<Region *> &TrueBiasedRegionsGlobal,
+ DenseSet<Region *> &FalseBiasedRegionsGlobal,
+ DenseMap<Region *, BranchProbability> &BranchBiasMap) {
+ if (!BI->isConditional())
+ return false;
+ BranchProbability ThenProb, ElseProb;
+ if (!CheckMDProf(BI->getMetadata(LLVMContext::MD_prof),
+ ThenProb, ElseProb))
+ return false;
+ BasicBlock *IfThen = BI->getSuccessor(0);
+ BasicBlock *IfElse = BI->getSuccessor(1);
+ assert((IfThen == R->getExit() || IfElse == R->getExit()) &&
+ IfThen != IfElse &&
+ "Invariant from findScopes");
+ if (IfThen == R->getExit()) {
+ // Swap them so that IfThen/ThenProb means going into the conditional code
+ // and IfElse/ElseProb means skipping it.
+ std::swap(IfThen, IfElse);
+ std::swap(ThenProb, ElseProb);
+ }
+ CHR_DEBUG(dbgs() << "BI " << *BI << " ");
+ CHR_DEBUG(dbgs() << "ThenProb " << ThenProb << " ");
+ CHR_DEBUG(dbgs() << "ElseProb " << ElseProb << "\n");
+ return CheckBias(R, ThenProb, ElseProb,
+ TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal,
+ BranchBiasMap);
+}
+
+// Returns true and insert a select into the right biased set and the map if the
+// select is biased.
+static bool CheckBiasedSelect(
+ SelectInst *SI, Region *R,
+ DenseSet<SelectInst *> &TrueBiasedSelectsGlobal,
+ DenseSet<SelectInst *> &FalseBiasedSelectsGlobal,
+ DenseMap<SelectInst *, BranchProbability> &SelectBiasMap) {
+ BranchProbability TrueProb, FalseProb;
+ if (!CheckMDProf(SI->getMetadata(LLVMContext::MD_prof),
+ TrueProb, FalseProb))
+ return false;
+ CHR_DEBUG(dbgs() << "SI " << *SI << " ");
+ CHR_DEBUG(dbgs() << "TrueProb " << TrueProb << " ");
+ CHR_DEBUG(dbgs() << "FalseProb " << FalseProb << "\n");
+ return CheckBias(SI, TrueProb, FalseProb,
+ TrueBiasedSelectsGlobal, FalseBiasedSelectsGlobal,
+ SelectBiasMap);
+}
+
+// Returns the instruction at which to hoist the dependent condition values and
+// insert the CHR branch for a region. This is the terminator branch in the
+// entry block or the first select in the entry block, if any.
+static Instruction* getBranchInsertPoint(RegInfo &RI) {
+ Region *R = RI.R;
+ BasicBlock *EntryBB = R->getEntry();
+ // The hoist point is by default the terminator of the entry block, which is
+ // the same as the branch instruction if RI.HasBranch is true.
+ Instruction *HoistPoint = EntryBB->getTerminator();
+ for (SelectInst *SI : RI.Selects) {
+ if (SI->getParent() == EntryBB) {
+ // Pick the first select in Selects in the entry block. Note Selects is
+ // sorted in the instruction order within a block (asserted below).
+ HoistPoint = SI;
+ break;
+ }
+ }
+ assert(HoistPoint && "Null HoistPoint");
+#ifndef NDEBUG
+ // Check that HoistPoint is the first one in Selects in the entry block,
+ // if any.
+ DenseSet<Instruction *> EntryBlockSelectSet;
+ for (SelectInst *SI : RI.Selects) {
+ if (SI->getParent() == EntryBB) {
+ EntryBlockSelectSet.insert(SI);
+ }
+ }
+ for (Instruction &I : *EntryBB) {
+ if (EntryBlockSelectSet.count(&I) > 0) {
+ assert(&I == HoistPoint &&
+ "HoistPoint must be the first one in Selects");
+ break;
+ }
+ }
+#endif
+ return HoistPoint;
+}
+
+// Find a CHR scope in the given region.
+CHRScope * CHR::findScope(Region *R) {
+ CHRScope *Result = nullptr;
+ BasicBlock *Entry = R->getEntry();
+ BasicBlock *Exit = R->getExit(); // null if top level.
+ assert(Entry && "Entry must not be null");
+ assert((Exit == nullptr) == (R->isTopLevelRegion()) &&
+ "Only top level region has a null exit");
+ if (Entry)
+ CHR_DEBUG(dbgs() << "Entry " << Entry->getName() << "\n");
+ else
+ CHR_DEBUG(dbgs() << "Entry null\n");
+ if (Exit)
+ CHR_DEBUG(dbgs() << "Exit " << Exit->getName() << "\n");
+ else
+ CHR_DEBUG(dbgs() << "Exit null\n");
+ // Exclude cases where Entry is part of a subregion (hence it doesn't belong
+ // to this region).
+ bool EntryInSubregion = RI.getRegionFor(Entry) != R;
+ if (EntryInSubregion)
+ return nullptr;
+ // Exclude loops
+ for (BasicBlock *Pred : predecessors(Entry))
+ if (R->contains(Pred))
+ return nullptr;
+ if (Exit) {
+ // Try to find an if-then block (check if R is an if-then).
+ // if (cond) {
+ // ...
+ // }
+ auto *BI = dyn_cast<BranchInst>(Entry->getTerminator());
+ if (BI)
+ CHR_DEBUG(dbgs() << "BI.isConditional " << BI->isConditional() << "\n");
+ else
+ CHR_DEBUG(dbgs() << "BI null\n");
+ if (BI && BI->isConditional()) {
+ BasicBlock *S0 = BI->getSuccessor(0);
+ BasicBlock *S1 = BI->getSuccessor(1);
+ CHR_DEBUG(dbgs() << "S0 " << S0->getName() << "\n");
+ CHR_DEBUG(dbgs() << "S1 " << S1->getName() << "\n");
+ if (S0 != S1 && (S0 == Exit || S1 == Exit)) {
+ RegInfo RI(R);
+ RI.HasBranch = CheckBiasedBranch(
+ BI, R, TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal,
+ BranchBiasMap);
+ Result = new CHRScope(RI);
+ Scopes.insert(Result);
+ CHR_DEBUG(dbgs() << "Found a region with a branch\n");
+ ++Stats.NumBranches;
+ }
+ }
+ }
+ {
+ // Try to look for selects in the direct child blocks (as opposed to in
+ // subregions) of R.
+ // ...
+ // if (..) { // Some subregion
+ // ...
+ // }
+ // if (..) { // Some subregion
+ // ...
+ // }
+ // ...
+ // a = cond ? b : c;
+ // ...
+ SmallVector<SelectInst *, 8> Selects;
+ for (RegionNode *E : R->elements()) {
+ if (E->isSubRegion())
+ continue;
+ // This returns the basic block of E if E is a direct child of R (not a
+ // subregion.)
+ BasicBlock *BB = E->getEntry();
+ // Need to push in the order to make it easier to find the first Select
+ // later.
+ for (Instruction &I : *BB) {
+ if (auto *SI = dyn_cast<SelectInst>(&I)) {
+ Selects.push_back(SI);
+ ++Stats.NumBranches;
+ }
+ }
+ }
+ if (Selects.size() > 0) {
+ auto AddSelects = [&](RegInfo &RI) {
+ for (auto *SI : Selects)
+ if (CheckBiasedSelect(SI, RI.R,
+ TrueBiasedSelectsGlobal,
+ FalseBiasedSelectsGlobal,
+ SelectBiasMap))
+ RI.Selects.push_back(SI);
+ };
+ if (!Result) {
+ CHR_DEBUG(dbgs() << "Found a select-only region\n");
+ RegInfo RI(R);
+ AddSelects(RI);
+ Result = new CHRScope(RI);
+ Scopes.insert(Result);
+ } else {
+ CHR_DEBUG(dbgs() << "Found select(s) in a region with a branch\n");
+ AddSelects(Result->RegInfos[0]);
+ }
+ }
+ }
+
+ if (Result) {
+ checkScopeHoistable(Result);
+ }
+ return Result;
+}
+
+// Check that any of the branch and the selects in the region could be
+// hoisted above the the CHR branch insert point (the most dominating of
+// them, either the branch (at the end of the first block) or the first
+// select in the first block). If the branch can't be hoisted, drop the
+// selects in the first blocks.
+//
+// For example, for the following scope/region with selects, we want to insert
+// the merged branch right before the first select in the first/entry block by
+// hoisting c1, c2, c3, and c4.
+//
+// // Branch insert point here.
+// a = c1 ? b : c; // Select 1
+// d = c2 ? e : f; // Select 2
+// if (c3) { // Branch
+// ...
+// c4 = foo() // A call.
+// g = c4 ? h : i; // Select 3
+// }
+//
+// But suppose we can't hoist c4 because it's dependent on the preceding
+// call. Then, we drop Select 3. Furthermore, if we can't hoist c2, we also drop
+// Select 2. If we can't hoist c3, we drop Selects 1 & 2.
+void CHR::checkScopeHoistable(CHRScope *Scope) {
+ RegInfo &RI = Scope->RegInfos[0];
+ Region *R = RI.R;
+ BasicBlock *EntryBB = R->getEntry();
+ auto *Branch = RI.HasBranch ?
+ cast<BranchInst>(EntryBB->getTerminator()) : nullptr;
+ SmallVector<SelectInst *, 8> &Selects = RI.Selects;
+ if (RI.HasBranch || !Selects.empty()) {
+ Instruction *InsertPoint = getBranchInsertPoint(RI);
+ CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
+ // Avoid a data dependence from a select or a branch to a(nother)
+ // select. Note no instruction can't data-depend on a branch (a branch
+ // instruction doesn't produce a value).
+ DenseSet<Instruction *> Unhoistables;
+ // Initialize Unhoistables with the selects.
+ for (SelectInst *SI : Selects) {
+ Unhoistables.insert(SI);
+ }
+ // Remove Selects that can't be hoisted.
+ for (auto it = Selects.begin(); it != Selects.end(); ) {
+ SelectInst *SI = *it;
+ if (SI == InsertPoint) {
+ ++it;
+ continue;
+ }
+ bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint,
+ DT, Unhoistables, nullptr);
+ if (!IsHoistable) {
+ CHR_DEBUG(dbgs() << "Dropping select " << *SI << "\n");
+ it = Selects.erase(it);
+ // Since we are dropping the select here, we also drop it from
+ // Unhoistables.
+ Unhoistables.erase(SI);
+ } else
+ ++it;
+ }
+ // Update InsertPoint after potentially removing selects.
+ InsertPoint = getBranchInsertPoint(RI);
+ CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
+ if (RI.HasBranch && InsertPoint != Branch) {
+ bool IsHoistable = checkHoistValue(Branch->getCondition(), InsertPoint,
+ DT, Unhoistables, nullptr);
+ if (!IsHoistable) {
+ // If the branch isn't hoistable, drop the selects in the entry
+ // block, preferring the branch, which makes the branch the hoist
+ // point.
+ assert(InsertPoint != Branch && "Branch must not be the hoist point");
+ CHR_DEBUG(dbgs() << "Dropping selects in entry block \n");
+ CHR_DEBUG(
+ for (SelectInst *SI : Selects) {
+ dbgs() << "SI " << *SI << "\n";
+ });
+ Selects.erase(std::remove_if(Selects.begin(), Selects.end(),
+ [EntryBB](SelectInst *SI) {
+ return SI->getParent() == EntryBB;
+ }), Selects.end());
+ Unhoistables.clear();
+ InsertPoint = Branch;
+ }
+ }
+ CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
+#ifndef NDEBUG
+ if (RI.HasBranch) {
+ assert(!DT.dominates(Branch, InsertPoint) &&
+ "Branch can't be already above the hoist point");
+ assert(checkHoistValue(Branch->getCondition(), InsertPoint,
+ DT, Unhoistables, nullptr) &&
+ "checkHoistValue for branch");
+ }
+ for (auto *SI : Selects) {
+ assert(!DT.dominates(SI, InsertPoint) &&
+ "SI can't be already above the hoist point");
+ assert(checkHoistValue(SI->getCondition(), InsertPoint, DT,
+ Unhoistables, nullptr) &&
+ "checkHoistValue for selects");
+ }
+ CHR_DEBUG(dbgs() << "Result\n");
+ if (RI.HasBranch) {
+ CHR_DEBUG(dbgs() << "BI " << *Branch << "\n");
+ }
+ for (auto *SI : Selects) {
+ CHR_DEBUG(dbgs() << "SI " << *SI << "\n");
+ }
+#endif
+ }
+}
+
+// Traverse the region tree, find all nested scopes and merge them if possible.
+CHRScope * CHR::findScopes(Region *R, Region *NextRegion, Region *ParentRegion,
+ SmallVectorImpl<CHRScope *> &Scopes) {
+ CHR_DEBUG(dbgs() << "findScopes " << R->getNameStr() << "\n");
+ CHRScope *Result = findScope(R);
+ // Visit subscopes.
+ CHRScope *ConsecutiveSubscope = nullptr;
+ SmallVector<CHRScope *, 8> Subscopes;
+ for (auto It = R->begin(); It != R->end(); ++It) {
+ const std::unique_ptr<Region> &SubR = *It;
+ auto Next_It = std::next(It);
+ Region *NextSubR = Next_It != R->end() ? Next_It->get() : nullptr;
+ CHR_DEBUG(dbgs() << "Looking at subregion " << SubR.get()->getNameStr()
+ << "\n");
+ CHRScope *SubCHRScope = findScopes(SubR.get(), NextSubR, R, Scopes);
+ if (SubCHRScope) {
+ CHR_DEBUG(dbgs() << "Subregion Scope " << *SubCHRScope << "\n");
+ } else {
+ CHR_DEBUG(dbgs() << "Subregion Scope null\n");
+ }
+ if (SubCHRScope) {
+ if (!ConsecutiveSubscope)
+ ConsecutiveSubscope = SubCHRScope;
+ else if (!ConsecutiveSubscope->appendable(SubCHRScope)) {
+ Subscopes.push_back(ConsecutiveSubscope);
+ ConsecutiveSubscope = SubCHRScope;
+ } else
+ ConsecutiveSubscope->append(SubCHRScope);
+ } else {
+ if (ConsecutiveSubscope) {
+ Subscopes.push_back(ConsecutiveSubscope);
+ }
+ ConsecutiveSubscope = nullptr;
+ }
+ }
+ if (ConsecutiveSubscope) {
+ Subscopes.push_back(ConsecutiveSubscope);
+ }
+ for (CHRScope *Sub : Subscopes) {
+ if (Result) {
+ // Combine it with the parent.
+ Result->addSub(Sub);
+ } else {
+ // Push Subscopes as they won't be combined with the parent.
+ Scopes.push_back(Sub);
+ }
+ }
+ return Result;
+}
+
+static DenseSet<Value *> getCHRConditionValuesForRegion(RegInfo &RI) {
+ DenseSet<Value *> ConditionValues;
+ if (RI.HasBranch) {
+ auto *BI = cast<BranchInst>(RI.R->getEntry()->getTerminator());
+ ConditionValues.insert(BI->getCondition());
+ }
+ for (SelectInst *SI : RI.Selects) {
+ ConditionValues.insert(SI->getCondition());
+ }
+ return ConditionValues;
+}
+
+
+// Determine whether to split a scope depending on the sets of the branch
+// condition values of the previous region and the current region. We split
+// (return true) it if 1) the condition values of the inner/lower scope can't be
+// hoisted up to the outer/upper scope, or 2) the two sets of the condition
+// values have an empty intersection (because the combined branch conditions
+// won't probably lead to a simpler combined condition).
+static bool shouldSplit(Instruction *InsertPoint,
+ DenseSet<Value *> &PrevConditionValues,
+ DenseSet<Value *> &ConditionValues,
+ DominatorTree &DT,
+ DenseSet<Instruction *> &Unhoistables) {
+ CHR_DEBUG(
+ dbgs() << "shouldSplit " << *InsertPoint << " PrevConditionValues ";
+ for (Value *V : PrevConditionValues) {
+ dbgs() << *V << ", ";
+ }
+ dbgs() << " ConditionValues ";
+ for (Value *V : ConditionValues) {
+ dbgs() << *V << ", ";
+ }
+ dbgs() << "\n");
+ assert(InsertPoint && "Null InsertPoint");
+ // If any of Bases isn't hoistable to the hoist point, split.
+ for (Value *V : ConditionValues) {
+ if (!checkHoistValue(V, InsertPoint, DT, Unhoistables, nullptr)) {
+ CHR_DEBUG(dbgs() << "Split. checkHoistValue false " << *V << "\n");
+ return true; // Not hoistable, split.
+ }
+ }
+ // If PrevConditionValues or ConditionValues is empty, don't split to avoid
+ // unnecessary splits at scopes with no branch/selects. If
+ // PrevConditionValues and ConditionValues don't intersect at all, split.
+ if (!PrevConditionValues.empty() && !ConditionValues.empty()) {
+ // Use std::set as DenseSet doesn't work with set_intersection.
+ std::set<Value *> PrevBases, Bases;
+ for (Value *V : PrevConditionValues) {
+ std::set<Value *> BaseValues = getBaseValues(V, DT);
+ PrevBases.insert(BaseValues.begin(), BaseValues.end());
+ }
+ for (Value *V : ConditionValues) {
+ std::set<Value *> BaseValues = getBaseValues(V, DT);
+ Bases.insert(BaseValues.begin(), BaseValues.end());
+ }
+ CHR_DEBUG(
+ dbgs() << "PrevBases ";
+ for (Value *V : PrevBases) {
+ dbgs() << *V << ", ";
+ }
+ dbgs() << " Bases ";
+ for (Value *V : Bases) {
+ dbgs() << *V << ", ";
+ }
+ dbgs() << "\n");
+ std::set<Value *> Intersection;
+ std::set_intersection(PrevBases.begin(), PrevBases.end(),
+ Bases.begin(), Bases.end(),
+ std::inserter(Intersection, Intersection.begin()));
+ if (Intersection.empty()) {
+ // Empty intersection, split.
+ CHR_DEBUG(dbgs() << "Split. Intersection empty\n");
+ return true;
+ }
+ }
+ CHR_DEBUG(dbgs() << "No split\n");
+ return false; // Don't split.
+}
+
+static void GetSelectsInScope(CHRScope *Scope,
+ DenseSet<Instruction *> &Output) {
+ for (RegInfo &RI : Scope->RegInfos) {
+ for (SelectInst *SI : RI.Selects) {
+ Output.insert(SI);
+ }
+ }
+ for (CHRScope *Sub : Scope->Subs) {
+ GetSelectsInScope(Sub, Output);
+ }
+}
+
+void CHR::splitScopes(SmallVectorImpl<CHRScope *> &Input,
+ SmallVectorImpl<CHRScope *> &Output) {
+ for (CHRScope *Scope : Input) {
+ assert(!Scope->BranchInsertPoint &&
+ "BranchInsertPoint must not be set");
+ DenseSet<Instruction *> Unhoistables;
+ GetSelectsInScope(Scope, Unhoistables);
+ splitScope(Scope, nullptr, nullptr, nullptr, Output, Unhoistables);
+ }
+#ifndef NDEBUG
+ for (CHRScope *Scope : Output) {
+ assert(Scope->BranchInsertPoint && "BranchInsertPoint must be set");
+ }
+#endif
+}
+
+SmallVector<CHRScope *, 8> CHR::splitScope(
+ CHRScope *Scope,
+ CHRScope *Outer,
+ DenseSet<Value *> *OuterConditionValues,
+ Instruction *OuterInsertPoint,
+ SmallVectorImpl<CHRScope *> &Output,
+ DenseSet<Instruction *> &Unhoistables) {
+ if (Outer) {
+ assert(OuterConditionValues && "Null OuterConditionValues");
+ assert(OuterInsertPoint && "Null OuterInsertPoint");
+ }
+ bool PrevSplitFromOuter = true;
+ DenseSet<Value *> PrevConditionValues;
+ Instruction *PrevInsertPoint = nullptr;
+ SmallVector<CHRScope *, 8> Splits;
+ SmallVector<bool, 8> SplitsSplitFromOuter;
+ SmallVector<DenseSet<Value *>, 8> SplitsConditionValues;
+ SmallVector<Instruction *, 8> SplitsInsertPoints;
+ SmallVector<RegInfo, 8> RegInfos(Scope->RegInfos); // Copy
+ for (RegInfo &RI : RegInfos) {
+ Instruction *InsertPoint = getBranchInsertPoint(RI);
+ DenseSet<Value *> ConditionValues = getCHRConditionValuesForRegion(RI);
+ CHR_DEBUG(
+ dbgs() << "ConditionValues ";
+ for (Value *V : ConditionValues) {
+ dbgs() << *V << ", ";
+ }
+ dbgs() << "\n");
+ if (RI.R == RegInfos[0].R) {
+ // First iteration. Check to see if we should split from the outer.
+ if (Outer) {
+ CHR_DEBUG(dbgs() << "Outer " << *Outer << "\n");
+ CHR_DEBUG(dbgs() << "Should split from outer at "
+ << RI.R->getNameStr() << "\n");
+ if (shouldSplit(OuterInsertPoint, *OuterConditionValues,
+ ConditionValues, DT, Unhoistables)) {
+ PrevConditionValues = ConditionValues;
+ PrevInsertPoint = InsertPoint;
+ } else {
+ // Not splitting from the outer. Use the outer bases and insert
+ // point. Union the bases.
+ PrevSplitFromOuter = false;
+ PrevConditionValues = *OuterConditionValues;
+ PrevConditionValues.insert(ConditionValues.begin(),
+ ConditionValues.end());
+ PrevInsertPoint = OuterInsertPoint;
+ }
+ } else {
+ CHR_DEBUG(dbgs() << "Outer null\n");
+ PrevConditionValues = ConditionValues;
+ PrevInsertPoint = InsertPoint;
+ }
+ } else {
+ CHR_DEBUG(dbgs() << "Should split from prev at "
+ << RI.R->getNameStr() << "\n");
+ if (shouldSplit(PrevInsertPoint, PrevConditionValues, ConditionValues,
+ DT, Unhoistables)) {
+ CHRScope *Tail = Scope->split(RI.R);
+ Scopes.insert(Tail);
+ Splits.push_back(Scope);
+ SplitsSplitFromOuter.push_back(PrevSplitFromOuter);
+ SplitsConditionValues.push_back(PrevConditionValues);
+ SplitsInsertPoints.push_back(PrevInsertPoint);
+ Scope = Tail;
+ PrevConditionValues = ConditionValues;
+ PrevInsertPoint = InsertPoint;
+ PrevSplitFromOuter = true;
+ } else {
+ // Not splitting. Union the bases. Keep the hoist point.
+ PrevConditionValues.insert(ConditionValues.begin(), ConditionValues.end());
+ }
+ }
+ }
+ Splits.push_back(Scope);
+ SplitsSplitFromOuter.push_back(PrevSplitFromOuter);
+ SplitsConditionValues.push_back(PrevConditionValues);
+ assert(PrevInsertPoint && "Null PrevInsertPoint");
+ SplitsInsertPoints.push_back(PrevInsertPoint);
+ assert(Splits.size() == SplitsConditionValues.size() &&
+ Splits.size() == SplitsSplitFromOuter.size() &&
+ Splits.size() == SplitsInsertPoints.size() && "Mismatching sizes");
+ for (size_t I = 0; I < Splits.size(); ++I) {
+ CHRScope *Split = Splits[I];
+ DenseSet<Value *> &SplitConditionValues = SplitsConditionValues[I];
+ Instruction *SplitInsertPoint = SplitsInsertPoints[I];
+ SmallVector<CHRScope *, 8> NewSubs;
+ DenseSet<Instruction *> SplitUnhoistables;
+ GetSelectsInScope(Split, SplitUnhoistables);
+ for (CHRScope *Sub : Split->Subs) {
+ SmallVector<CHRScope *, 8> SubSplits = splitScope(
+ Sub, Split, &SplitConditionValues, SplitInsertPoint, Output,
+ SplitUnhoistables);
+ NewSubs.insert(NewSubs.end(), SubSplits.begin(), SubSplits.end());
+ }
+ Split->Subs = NewSubs;
+ }
+ SmallVector<CHRScope *, 8> Result;
+ for (size_t I = 0; I < Splits.size(); ++I) {
+ CHRScope *Split = Splits[I];
+ if (SplitsSplitFromOuter[I]) {
+ // Split from the outer.
+ Output.push_back(Split);
+ Split->BranchInsertPoint = SplitsInsertPoints[I];
+ CHR_DEBUG(dbgs() << "BranchInsertPoint " << *SplitsInsertPoints[I]
+ << "\n");
+ } else {
+ // Connected to the outer.
+ Result.push_back(Split);
+ }
+ }
+ if (!Outer)
+ assert(Result.empty() &&
+ "If no outer (top-level), must return no nested ones");
+ return Result;
+}
+
+void CHR::classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes) {
+ for (CHRScope *Scope : Scopes) {
+ assert(Scope->TrueBiasedRegions.empty() && Scope->FalseBiasedRegions.empty() && "Empty");
+ classifyBiasedScopes(Scope, Scope);
+ CHR_DEBUG(
+ dbgs() << "classifyBiasedScopes " << *Scope << "\n";
+ dbgs() << "TrueBiasedRegions ";
+ for (Region *R : Scope->TrueBiasedRegions) {
+ dbgs() << R->getNameStr() << ", ";
+ }
+ dbgs() << "\n";
+ dbgs() << "FalseBiasedRegions ";
+ for (Region *R : Scope->FalseBiasedRegions) {
+ dbgs() << R->getNameStr() << ", ";
+ }
+ dbgs() << "\n";
+ dbgs() << "TrueBiasedSelects ";
+ for (SelectInst *SI : Scope->TrueBiasedSelects) {
+ dbgs() << *SI << ", ";
+ }
+ dbgs() << "\n";
+ dbgs() << "FalseBiasedSelects ";
+ for (SelectInst *SI : Scope->FalseBiasedSelects) {
+ dbgs() << *SI << ", ";
+ }
+ dbgs() << "\n";);
+ }
+}
+
+void CHR::classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope) {
+ for (RegInfo &RI : Scope->RegInfos) {
+ if (RI.HasBranch) {
+ Region *R = RI.R;
+ if (TrueBiasedRegionsGlobal.count(R) > 0)
+ OutermostScope->TrueBiasedRegions.insert(R);
+ else if (FalseBiasedRegionsGlobal.count(R) > 0)
+ OutermostScope->FalseBiasedRegions.insert(R);
+ else
+ llvm_unreachable("Must be biased");
+ }
+ for (SelectInst *SI : RI.Selects) {
+ if (TrueBiasedSelectsGlobal.count(SI) > 0)
+ OutermostScope->TrueBiasedSelects.insert(SI);
+ else if (FalseBiasedSelectsGlobal.count(SI) > 0)
+ OutermostScope->FalseBiasedSelects.insert(SI);
+ else
+ llvm_unreachable("Must be biased");
+ }
+ }
+ for (CHRScope *Sub : Scope->Subs) {
+ classifyBiasedScopes(Sub, OutermostScope);
+ }
+}
+
+static bool hasAtLeastTwoBiasedBranches(CHRScope *Scope) {
+ unsigned NumBiased = Scope->TrueBiasedRegions.size() +
+ Scope->FalseBiasedRegions.size() +
+ Scope->TrueBiasedSelects.size() +
+ Scope->FalseBiasedSelects.size();
+ return NumBiased >= CHRMergeThreshold;
+}
+
+void CHR::filterScopes(SmallVectorImpl<CHRScope *> &Input,
+ SmallVectorImpl<CHRScope *> &Output) {
+ for (CHRScope *Scope : Input) {
+ // Filter out the ones with only one region and no subs.
+ if (!hasAtLeastTwoBiasedBranches(Scope)) {
+ CHR_DEBUG(dbgs() << "Filtered out by biased branches truthy-regions "
+ << Scope->TrueBiasedRegions.size()
+ << " falsy-regions " << Scope->FalseBiasedRegions.size()
+ << " true-selects " << Scope->TrueBiasedSelects.size()
+ << " false-selects " << Scope->FalseBiasedSelects.size() << "\n");
+ continue;
+ }
+ Output.push_back(Scope);
+ }
+}
+
+void CHR::setCHRRegions(SmallVectorImpl<CHRScope *> &Input,
+ SmallVectorImpl<CHRScope *> &Output) {
+ for (CHRScope *Scope : Input) {
+ assert(Scope->HoistStopMap.empty() && Scope->CHRRegions.empty() &&
+ "Empty");
+ setCHRRegions(Scope, Scope);
+ Output.push_back(Scope);
+ CHR_DEBUG(
+ dbgs() << "setCHRRegions HoistStopMap " << *Scope << "\n";
+ for (auto pair : Scope->HoistStopMap) {
+ Region *R = pair.first;
+ dbgs() << "Region " << R->getNameStr() << "\n";
+ for (Instruction *I : pair.second) {
+ dbgs() << "HoistStop " << *I << "\n";
+ }
+ }
+ dbgs() << "CHRRegions" << "\n";
+ for (RegInfo &RI : Scope->CHRRegions) {
+ dbgs() << RI.R->getNameStr() << "\n";
+ });
+ }
+}
+
+void CHR::setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope) {
+ DenseSet<Instruction *> Unhoistables;
+ // Put the biased selects in Unhoistables because they should stay where they
+ // are and constant-folded after CHR (in case one biased select or a branch
+ // can depend on another biased select.)
+ for (RegInfo &RI : Scope->RegInfos) {
+ for (SelectInst *SI : RI.Selects) {
+ Unhoistables.insert(SI);
+ }
+ }
+ Instruction *InsertPoint = OutermostScope->BranchInsertPoint;
+ for (RegInfo &RI : Scope->RegInfos) {
+ Region *R = RI.R;
+ DenseSet<Instruction *> HoistStops;
+ bool IsHoisted = false;
+ if (RI.HasBranch) {
+ assert((OutermostScope->TrueBiasedRegions.count(R) > 0 ||
+ OutermostScope->FalseBiasedRegions.count(R) > 0) &&
+ "Must be truthy or falsy");
+ auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
+ // Note checkHoistValue fills in HoistStops.
+ bool IsHoistable = checkHoistValue(BI->getCondition(), InsertPoint, DT,
+ Unhoistables, &HoistStops);
+ assert(IsHoistable && "Must be hoistable");
+ (void)(IsHoistable); // Unused in release build
+ IsHoisted = true;
+ }
+ for (SelectInst *SI : RI.Selects) {
+ assert((OutermostScope->TrueBiasedSelects.count(SI) > 0 ||
+ OutermostScope->FalseBiasedSelects.count(SI) > 0) &&
+ "Must be true or false biased");
+ // Note checkHoistValue fills in HoistStops.
+ bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint, DT,
+ Unhoistables, &HoistStops);
+ assert(IsHoistable && "Must be hoistable");
+ (void)(IsHoistable); // Unused in release build
+ IsHoisted = true;
+ }
+ if (IsHoisted) {
+ OutermostScope->CHRRegions.push_back(RI);
+ OutermostScope->HoistStopMap[R] = HoistStops;
+ }
+ }
+ for (CHRScope *Sub : Scope->Subs)
+ setCHRRegions(Sub, OutermostScope);
+}
+
+bool CHRScopeSorter(CHRScope *Scope1, CHRScope *Scope2) {
+ return Scope1->RegInfos[0].R->getDepth() < Scope2->RegInfos[0].R->getDepth();
+}
+
+void CHR::sortScopes(SmallVectorImpl<CHRScope *> &Input,
+ SmallVectorImpl<CHRScope *> &Output) {
+ Output.resize(Input.size());
+ std::copy(Input.begin(), Input.end(), Output.begin());
+ std::stable_sort(Output.begin(), Output.end(), CHRScopeSorter);
+}
+
+// Return true if V is already hoisted or was hoisted (along with its operands)
+// to the insert point.
+static void hoistValue(Value *V, Instruction *HoistPoint, Region *R,
+ HoistStopMapTy &HoistStopMap,
+ DenseSet<Instruction *> &HoistedSet,
+ DenseSet<PHINode *> &TrivialPHIs) {
+ auto IT = HoistStopMap.find(R);
+ assert(IT != HoistStopMap.end() && "Region must be in hoist stop map");
+ DenseSet<Instruction *> &HoistStops = IT->second;
+ if (auto *I = dyn_cast<Instruction>(V)) {
+ if (I == HoistPoint)
+ return;
+ if (HoistStops.count(I))
+ return;
+ if (auto *PN = dyn_cast<PHINode>(I))
+ if (TrivialPHIs.count(PN))
+ // The trivial phi inserted by the previous CHR scope could replace a
+ // non-phi in HoistStops. Note that since this phi is at the exit of a
+ // previous CHR scope, which dominates this scope, it's safe to stop
+ // hoisting there.
+ return;
+ if (HoistedSet.count(I))
+ // Already hoisted, return.
+ return;
+ assert(isHoistableInstructionType(I) && "Unhoistable instruction type");
+ for (Value *Op : I->operands()) {
+ hoistValue(Op, HoistPoint, R, HoistStopMap, HoistedSet, TrivialPHIs);
+ }
+ I->moveBefore(HoistPoint);
+ HoistedSet.insert(I);
+ CHR_DEBUG(dbgs() << "hoistValue " << *I << "\n");
+ }
+}
+
+// Hoist the dependent condition values of the branches and the selects in the
+// scope to the insert point.
+static void hoistScopeConditions(CHRScope *Scope, Instruction *HoistPoint,
+ DenseSet<PHINode *> &TrivialPHIs) {
+ DenseSet<Instruction *> HoistedSet;
+ for (const RegInfo &RI : Scope->CHRRegions) {
+ Region *R = RI.R;
+ bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
+ bool IsFalseBiased = Scope->FalseBiasedRegions.count(R);
+ if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) {
+ auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
+ hoistValue(BI->getCondition(), HoistPoint, R, Scope->HoistStopMap,
+ HoistedSet, TrivialPHIs);
+ }
+ for (SelectInst *SI : RI.Selects) {
+ bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
+ bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI);
+ if (!(IsTrueBiased || IsFalseBiased))
+ continue;
+ hoistValue(SI->getCondition(), HoistPoint, R, Scope->HoistStopMap,
+ HoistedSet, TrivialPHIs);
+ }
+ }
+}
+
+// Negate the predicate if an ICmp if it's used only by branches or selects by
+// swapping the operands of the branches or the selects. Returns true if success.
+static bool NegateICmpIfUsedByBranchOrSelectOnly(ICmpInst *ICmp,
+ Instruction *ExcludedUser,
+ CHRScope *Scope) {
+ for (User *U : ICmp->users()) {
+ if (U == ExcludedUser)
+ continue;
+ if (isa<BranchInst>(U) && cast<BranchInst>(U)->isConditional())
+ continue;
+ if (isa<SelectInst>(U) && cast<SelectInst>(U)->getCondition() == ICmp)
+ continue;
+ return false;
+ }
+ for (User *U : ICmp->users()) {
+ if (U == ExcludedUser)
+ continue;
+ if (auto *BI = dyn_cast<BranchInst>(U)) {
+ assert(BI->isConditional() && "Must be conditional");
+ BI->swapSuccessors();
+ // Don't need to swap this in terms of
+ // TrueBiasedRegions/FalseBiasedRegions because true-based/false-based
+ // mean whehter the branch is likely go into the if-then rather than
+ // successor0/successor1 and because we can tell which edge is the then or
+ // the else one by comparing the destination to the region exit block.
+ continue;
+ }
+ if (auto *SI = dyn_cast<SelectInst>(U)) {
+ // Swap operands
+ Value *TrueValue = SI->getTrueValue();
+ Value *FalseValue = SI->getFalseValue();
+ SI->setTrueValue(FalseValue);
+ SI->setFalseValue(TrueValue);
+ SI->swapProfMetadata();
+ if (Scope->TrueBiasedSelects.count(SI)) {
+ assert(Scope->FalseBiasedSelects.count(SI) == 0 &&
+ "Must not be already in");
+ Scope->FalseBiasedSelects.insert(SI);
+ } else if (Scope->FalseBiasedSelects.count(SI)) {
+ assert(Scope->TrueBiasedSelects.count(SI) == 0 &&
+ "Must not be already in");
+ Scope->TrueBiasedSelects.insert(SI);
+ }
+ continue;
+ }
+ llvm_unreachable("Must be a branch or a select");
+ }
+ ICmp->setPredicate(CmpInst::getInversePredicate(ICmp->getPredicate()));
+ return true;
+}
+
+// A helper for transformScopes. Insert a trivial phi at the scope exit block
+// for a value that's defined in the scope but used outside it (meaning it's
+// alive at the exit block).
+static void insertTrivialPHIs(CHRScope *Scope,
+ BasicBlock *EntryBlock, BasicBlock *ExitBlock,
+ DenseSet<PHINode *> &TrivialPHIs) {
+ DenseSet<BasicBlock *> BlocksInScopeSet;
+ SmallVector<BasicBlock *, 8> BlocksInScopeVec;
+ for (RegInfo &RI : Scope->RegInfos) {
+ for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the
+ // sub-Scopes.
+ BlocksInScopeSet.insert(BB);
+ BlocksInScopeVec.push_back(BB);
+ }
+ }
+ CHR_DEBUG(
+ dbgs() << "Inserting redudant phis\n";
+ for (BasicBlock *BB : BlocksInScopeVec) {
+ dbgs() << "BlockInScope " << BB->getName() << "\n";
+ });
+ for (BasicBlock *BB : BlocksInScopeVec) {
+ for (Instruction &I : *BB) {
+ SmallVector<Instruction *, 8> Users;
+ for (User *U : I.users()) {
+ if (auto *UI = dyn_cast<Instruction>(U)) {
+ if (BlocksInScopeSet.count(UI->getParent()) == 0 &&
+ // Unless there's already a phi for I at the exit block.
+ !(isa<PHINode>(UI) && UI->getParent() == ExitBlock)) {
+ CHR_DEBUG(dbgs() << "V " << I << "\n");
+ CHR_DEBUG(dbgs() << "Used outside scope by user " << *UI << "\n");
+ Users.push_back(UI);
+ } else if (UI->getParent() == EntryBlock && isa<PHINode>(UI)) {
+ // There's a loop backedge from a block that's dominated by this
+ // scope to the entry block.
+ CHR_DEBUG(dbgs() << "V " << I << "\n");
+ CHR_DEBUG(dbgs()
+ << "Used at entry block (for a back edge) by a phi user "
+ << *UI << "\n");
+ Users.push_back(UI);
+ }
+ }
+ }
+ if (Users.size() > 0) {
+ // Insert a trivial phi for I (phi [&I, P0], [&I, P1], ...) at
+ // ExitBlock. Replace I with the new phi in UI unless UI is another
+ // phi at ExitBlock.
+ unsigned PredCount = std::distance(pred_begin(ExitBlock),
+ pred_end(ExitBlock));
+ PHINode *PN = PHINode::Create(I.getType(), PredCount, "",
+ &ExitBlock->front());
+ for (BasicBlock *Pred : predecessors(ExitBlock)) {
+ PN->addIncoming(&I, Pred);
+ }
+ TrivialPHIs.insert(PN);
+ CHR_DEBUG(dbgs() << "Insert phi " << *PN << "\n");
+ for (Instruction *UI : Users) {
+ for (unsigned J = 0, NumOps = UI->getNumOperands(); J < NumOps; ++J) {
+ if (UI->getOperand(J) == &I) {
+ UI->setOperand(J, PN);
+ }
+ }
+ CHR_DEBUG(dbgs() << "Updated user " << *UI << "\n");
+ }
+ }
+ }
+ }
+}
+
+// Assert that all the CHR regions of the scope have a biased branch or select.
+static void assertCHRRegionsHaveBiasedBranchOrSelect(CHRScope *Scope) {
+#ifndef NDEBUG
+ auto HasBiasedBranchOrSelect = [](RegInfo &RI, CHRScope *Scope) {
+ if (Scope->TrueBiasedRegions.count(RI.R) ||
+ Scope->FalseBiasedRegions.count(RI.R))
+ return true;
+ for (SelectInst *SI : RI.Selects)
+ if (Scope->TrueBiasedSelects.count(SI) ||
+ Scope->FalseBiasedSelects.count(SI))
+ return true;
+ return false;
+ };
+ for (RegInfo &RI : Scope->CHRRegions) {
+ assert(HasBiasedBranchOrSelect(RI, Scope) &&
+ "Must have biased branch or select");
+ }
+#endif
+}
+
+// Assert that all the condition values of the biased branches and selects have
+// been hoisted to the pre-entry block or outside of the scope.
+static void assertBranchOrSelectConditionHoisted(CHRScope *Scope,
+ BasicBlock *PreEntryBlock) {
+ CHR_DEBUG(dbgs() << "Biased regions condition values \n");
+ for (RegInfo &RI : Scope->CHRRegions) {
+ Region *R = RI.R;
+ bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
+ bool IsFalseBiased = Scope->FalseBiasedRegions.count(R);
+ if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) {
+ auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
+ Value *V = BI->getCondition();
+ CHR_DEBUG(dbgs() << *V << "\n");
+ if (auto *I = dyn_cast<Instruction>(V)) {
+ assert((I->getParent() == PreEntryBlock ||
+ !Scope->contains(I)) &&
+ "Must have been hoisted to PreEntryBlock or outside the scope");
+ }
+ }
+ for (SelectInst *SI : RI.Selects) {
+ bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
+ bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI);
+ if (!(IsTrueBiased || IsFalseBiased))
+ continue;
+ Value *V = SI->getCondition();
+ CHR_DEBUG(dbgs() << *V << "\n");
+ if (auto *I = dyn_cast<Instruction>(V)) {
+ assert((I->getParent() == PreEntryBlock ||
+ !Scope->contains(I)) &&
+ "Must have been hoisted to PreEntryBlock or outside the scope");
+ }
+ }
+ }
+}
+
+void CHR::transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs) {
+ CHR_DEBUG(dbgs() << "transformScopes " << *Scope << "\n");
+
+ assert(Scope->RegInfos.size() >= 1 && "Should have at least one Region");
+ Region *FirstRegion = Scope->RegInfos[0].R;
+ BasicBlock *EntryBlock = FirstRegion->getEntry();
+ Region *LastRegion = Scope->RegInfos[Scope->RegInfos.size() - 1].R;
+ BasicBlock *ExitBlock = LastRegion->getExit();
+ Optional<uint64_t> ProfileCount = BFI.getBlockProfileCount(EntryBlock);
+
+ if (ExitBlock) {
+ // Insert a trivial phi at the exit block (where the CHR hot path and the
+ // cold path merges) for a value that's defined in the scope but used
+ // outside it (meaning it's alive at the exit block). We will add the
+ // incoming values for the CHR cold paths to it below. Without this, we'd
+ // miss updating phi's for such values unless there happens to already be a
+ // phi for that value there.
+ insertTrivialPHIs(Scope, EntryBlock, ExitBlock, TrivialPHIs);
+ }
+
+ // Split the entry block of the first region. The new block becomes the new
+ // entry block of the first region. The old entry block becomes the block to
+ // insert the CHR branch into. Note DT gets updated. Since DT gets updated
+ // through the split, we update the entry of the first region after the split,
+ // and Region only points to the entry and the exit blocks, rather than
+ // keeping everything in a list or set, the blocks membership and the
+ // entry/exit blocks of the region are still valid after the split.
+ CHR_DEBUG(dbgs() << "Splitting entry block " << EntryBlock->getName()
+ << " at " << *Scope->BranchInsertPoint << "\n");
+ BasicBlock *NewEntryBlock =
+ SplitBlock(EntryBlock, Scope->BranchInsertPoint, &DT);
+ assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&
+ "NewEntryBlock's only pred must be EntryBlock");
+ FirstRegion->replaceEntryRecursive(NewEntryBlock);
+ BasicBlock *PreEntryBlock = EntryBlock;
+
+ ValueToValueMapTy VMap;
+ // Clone the blocks in the scope (excluding the PreEntryBlock) to split into a
+ // hot path (originals) and a cold path (clones) and update the PHIs at the
+ // exit block.
+ cloneScopeBlocks(Scope, PreEntryBlock, ExitBlock, LastRegion, VMap);
+
+ // Replace the old (placeholder) branch with the new (merged) conditional
+ // branch.
+ BranchInst *MergedBr = createMergedBranch(PreEntryBlock, EntryBlock,
+ NewEntryBlock, VMap);
+
+#ifndef NDEBUG
+ assertCHRRegionsHaveBiasedBranchOrSelect(Scope);
+#endif
+
+ // Hoist the conditional values of the branches/selects.
+ hoistScopeConditions(Scope, PreEntryBlock->getTerminator(), TrivialPHIs);
+
+#ifndef NDEBUG
+ assertBranchOrSelectConditionHoisted(Scope, PreEntryBlock);
+#endif
+
+ // Create the combined branch condition and constant-fold the branches/selects
+ // in the hot path.
+ fixupBranchesAndSelects(Scope, PreEntryBlock, MergedBr,
+ ProfileCount ? ProfileCount.getValue() : 0);
+}
+
+// A helper for transformScopes. Clone the blocks in the scope (excluding the
+// PreEntryBlock) to split into a hot path and a cold path and update the PHIs
+// at the exit block.
+void CHR::cloneScopeBlocks(CHRScope *Scope,
+ BasicBlock *PreEntryBlock,
+ BasicBlock *ExitBlock,
+ Region *LastRegion,
+ ValueToValueMapTy &VMap) {
+ // Clone all the blocks. The original blocks will be the hot-path
+ // CHR-optimized code and the cloned blocks will be the original unoptimized
+ // code. This is so that the block pointers from the
+ // CHRScope/Region/RegionInfo can stay valid in pointing to the hot-path code
+ // which CHR should apply to.
+ SmallVector<BasicBlock*, 8> NewBlocks;
+ for (RegInfo &RI : Scope->RegInfos)
+ for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the
+ // sub-Scopes.
+ assert(BB != PreEntryBlock && "Don't copy the preetntry block");
+ BasicBlock *NewBB = CloneBasicBlock(BB, VMap, ".nonchr", &F);
+ NewBlocks.push_back(NewBB);
+ VMap[BB] = NewBB;
+ }
+
+ // Place the cloned blocks right after the original blocks (right before the
+ // exit block of.)
+ if (ExitBlock)
+ F.getBasicBlockList().splice(ExitBlock->getIterator(),
+ F.getBasicBlockList(),
+ NewBlocks[0]->getIterator(), F.end());
+
+ // Update the cloned blocks/instructions to refer to themselves.
+ for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i)
+ for (Instruction &I : *NewBlocks[i])
+ RemapInstruction(&I, VMap,
+ RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
+
+ // Add the cloned blocks to the PHIs of the exit blocks. ExitBlock is null for
+ // the top-level region but we don't need to add PHIs. The trivial PHIs
+ // inserted above will be updated here.
+ if (ExitBlock)
+ for (PHINode &PN : ExitBlock->phis())
+ for (unsigned I = 0, NumOps = PN.getNumIncomingValues(); I < NumOps;
+ ++I) {
+ BasicBlock *Pred = PN.getIncomingBlock(I);
+ if (LastRegion->contains(Pred)) {
+ Value *V = PN.getIncomingValue(I);
+ auto It = VMap.find(V);
+ if (It != VMap.end()) V = It->second;
+ assert(VMap.find(Pred) != VMap.end() && "Pred must have been cloned");
+ PN.addIncoming(V, cast<BasicBlock>(VMap[Pred]));
+ }
+ }
+}
+
+// A helper for transformScope. Replace the old (placeholder) branch with the
+// new (merged) conditional branch.
+BranchInst *CHR::createMergedBranch(BasicBlock *PreEntryBlock,
+ BasicBlock *EntryBlock,
+ BasicBlock *NewEntryBlock,
+ ValueToValueMapTy &VMap) {
+ BranchInst *OldBR = cast<BranchInst>(PreEntryBlock->getTerminator());
+ assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == NewEntryBlock &&
+ "SplitBlock did not work correctly!");
+ assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&
+ "NewEntryBlock's only pred must be EntryBlock");
+ assert(VMap.find(NewEntryBlock) != VMap.end() &&
+ "NewEntryBlock must have been copied");
+ OldBR->removeFromParent();
+ OldBR->dropAllReferences();
+ // The true predicate is a placeholder. It will be replaced later in
+ // fixupBranchesAndSelects().
+ BranchInst *NewBR = BranchInst::Create(NewEntryBlock,
+ cast<BasicBlock>(VMap[NewEntryBlock]),
+ ConstantInt::getTrue(F.getContext()));
+ PreEntryBlock->getInstList().push_back(NewBR);
+ assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&
+ "NewEntryBlock's only pred must be EntryBlock");
+ return NewBR;
+}
+
+// A helper for transformScopes. Create the combined branch condition and
+// constant-fold the branches/selects in the hot path.
+void CHR::fixupBranchesAndSelects(CHRScope *Scope,
+ BasicBlock *PreEntryBlock,
+ BranchInst *MergedBR,
+ uint64_t ProfileCount) {
+ Value *MergedCondition = ConstantInt::getTrue(F.getContext());
+ BranchProbability CHRBranchBias(1, 1);
+ uint64_t NumCHRedBranches = 0;
+ IRBuilder<> IRB(PreEntryBlock->getTerminator());
+ for (RegInfo &RI : Scope->CHRRegions) {
+ Region *R = RI.R;
+ if (RI.HasBranch) {
+ fixupBranch(R, Scope, IRB, MergedCondition, CHRBranchBias);
+ ++NumCHRedBranches;
+ }
+ for (SelectInst *SI : RI.Selects) {
+ fixupSelect(SI, Scope, IRB, MergedCondition, CHRBranchBias);
+ ++NumCHRedBranches;
+ }
+ }
+ Stats.NumBranchesDelta += NumCHRedBranches - 1;
+ Stats.WeightedNumBranchesDelta += (NumCHRedBranches - 1) * ProfileCount;
+ MergedBR->setCondition(MergedCondition);
+ SmallVector<uint32_t, 2> Weights;
+ Weights.push_back(static_cast<uint32_t>(CHRBranchBias.scale(1000)));
+ Weights.push_back(static_cast<uint32_t>(CHRBranchBias.getCompl().scale(1000)));
+ MDBuilder MDB(F.getContext());
+ MergedBR->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
+ CHR_DEBUG(dbgs() << "CHR branch bias " << Weights[0] << ":" << Weights[1]
+ << "\n");
+}
+
+// A helper for fixupBranchesAndSelects. Add to the combined branch condition
+// and constant-fold a branch in the hot path.
+void CHR::fixupBranch(Region *R, CHRScope *Scope,
+ IRBuilder<> &IRB,
+ Value *&MergedCondition,
+ BranchProbability &CHRBranchBias) {
+ bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
+ assert((IsTrueBiased || Scope->FalseBiasedRegions.count(R)) &&
+ "Must be truthy or falsy");
+ auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
+ assert(BranchBiasMap.find(R) != BranchBiasMap.end() &&
+ "Must be in the bias map");
+ BranchProbability Bias = BranchBiasMap[R];
+ assert(Bias >= getCHRBiasThreshold() && "Must be highly biased");
+ // Take the min.
+ if (CHRBranchBias > Bias)
+ CHRBranchBias = Bias;
+ BasicBlock *IfThen = BI->getSuccessor(1);
+ BasicBlock *IfElse = BI->getSuccessor(0);
+ BasicBlock *RegionExitBlock = R->getExit();
+ assert(RegionExitBlock && "Null ExitBlock");
+ assert((IfThen == RegionExitBlock || IfElse == RegionExitBlock) &&
+ IfThen != IfElse && "Invariant from findScopes");
+ if (IfThen == RegionExitBlock) {
+ // Swap them so that IfThen means going into it and IfElse means skipping
+ // it.
+ std::swap(IfThen, IfElse);
+ }
+ CHR_DEBUG(dbgs() << "IfThen " << IfThen->getName()
+ << " IfElse " << IfElse->getName() << "\n");
+ Value *Cond = BI->getCondition();
+ BasicBlock *HotTarget = IsTrueBiased ? IfThen : IfElse;
+ bool ConditionTrue = HotTarget == BI->getSuccessor(0);
+ addToMergedCondition(ConditionTrue, Cond, BI, Scope, IRB,
+ MergedCondition);
+ // Constant-fold the branch at ClonedEntryBlock.
+ assert(ConditionTrue == (HotTarget == BI->getSuccessor(0)) &&
+ "The successor shouldn't change");
+ Value *NewCondition = ConditionTrue ?
+ ConstantInt::getTrue(F.getContext()) :
+ ConstantInt::getFalse(F.getContext());
+ BI->setCondition(NewCondition);
+}
+
+// A helper for fixupBranchesAndSelects. Add to the combined branch condition
+// and constant-fold a select in the hot path.
+void CHR::fixupSelect(SelectInst *SI, CHRScope *Scope,
+ IRBuilder<> &IRB,
+ Value *&MergedCondition,
+ BranchProbability &CHRBranchBias) {
+ bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
+ assert((IsTrueBiased ||
+ Scope->FalseBiasedSelects.count(SI)) && "Must be biased");
+ assert(SelectBiasMap.find(SI) != SelectBiasMap.end() &&
+ "Must be in the bias map");
+ BranchProbability Bias = SelectBiasMap[SI];
+ assert(Bias >= getCHRBiasThreshold() && "Must be highly biased");
+ // Take the min.
+ if (CHRBranchBias > Bias)
+ CHRBranchBias = Bias;
+ Value *Cond = SI->getCondition();
+ addToMergedCondition(IsTrueBiased, Cond, SI, Scope, IRB,
+ MergedCondition);
+ Value *NewCondition = IsTrueBiased ?
+ ConstantInt::getTrue(F.getContext()) :
+ ConstantInt::getFalse(F.getContext());
+ SI->setCondition(NewCondition);
+}
+
+// A helper for fixupBranch/fixupSelect. Add a branch condition to the merged
+// condition.
+void CHR::addToMergedCondition(bool IsTrueBiased, Value *Cond,
+ Instruction *BranchOrSelect,
+ CHRScope *Scope,
+ IRBuilder<> &IRB,
+ Value *&MergedCondition) {
+ if (IsTrueBiased) {
+ MergedCondition = IRB.CreateAnd(MergedCondition, Cond);
+ } else {
+ // If Cond is an icmp and all users of V except for BranchOrSelect is a
+ // branch, negate the icmp predicate and swap the branch targets and avoid
+ // inserting an Xor to negate Cond.
+ bool Done = false;
+ if (auto *ICmp = dyn_cast<ICmpInst>(Cond))
+ if (NegateICmpIfUsedByBranchOrSelectOnly(ICmp, BranchOrSelect, Scope)) {
+ MergedCondition = IRB.CreateAnd(MergedCondition, Cond);
+ Done = true;
+ }
+ if (!Done) {
+ Value *Negate = IRB.CreateXor(
+ ConstantInt::getTrue(F.getContext()), Cond);
+ MergedCondition = IRB.CreateAnd(MergedCondition, Negate);
+ }
+ }
+}
+
+void CHR::transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes) {
+ unsigned i = 0;
+ (void)(i); // Unused in release build.
+ DenseSet<PHINode *> TrivialPHIs;
+ for (CHRScope *Scope : CHRScopes) {
+ transformScopes(Scope, TrivialPHIs);
+ CHR_DEBUG(
+ std::ostringstream oss;
+ oss << " after transformScopes " << i++;
+ dumpIR(F, oss.str().c_str(), nullptr));
+ }
+}
+
+static void dumpScopes(SmallVectorImpl<CHRScope *> &Scopes, const char * Label) {
+ dbgs() << Label << " " << Scopes.size() << "\n";
+ for (CHRScope *Scope : Scopes) {
+ dbgs() << *Scope << "\n";
+ }
+}
+
+bool CHR::run() {
+ if (!shouldApply(F, PSI))
+ return false;
+
+ CHR_DEBUG(dumpIR(F, "before", nullptr));
+
+ bool Changed = false;
+ {
+ CHR_DEBUG(
+ dbgs() << "RegionInfo:\n";
+ RI.print(dbgs()));
+
+ // Recursively traverse the region tree and find regions that have biased
+ // branches and/or selects and create scopes.
+ SmallVector<CHRScope *, 8> AllScopes;
+ findScopes(AllScopes);
+ CHR_DEBUG(dumpScopes(AllScopes, "All scopes"));
+
+ // Split the scopes if 1) the conditiona values of the biased
+ // branches/selects of the inner/lower scope can't be hoisted up to the
+ // outermost/uppermost scope entry, or 2) the condition values of the biased
+ // branches/selects in a scope (including subscopes) don't share at least
+ // one common value.
+ SmallVector<CHRScope *, 8> SplitScopes;
+ splitScopes(AllScopes, SplitScopes);
+ CHR_DEBUG(dumpScopes(SplitScopes, "Split scopes"));
+
+ // After splitting, set the biased regions and selects of a scope (a tree
+ // root) that include those of the subscopes.
+ classifyBiasedScopes(SplitScopes);
+ CHR_DEBUG(dbgs() << "Set per-scope bias " << SplitScopes.size() << "\n");
+
+ // Filter out the scopes that has only one biased region or select (CHR
+ // isn't useful in such a case).
+ SmallVector<CHRScope *, 8> FilteredScopes;
+ filterScopes(SplitScopes, FilteredScopes);
+ CHR_DEBUG(dumpScopes(FilteredScopes, "Filtered scopes"));
+
+ // Set the regions to be CHR'ed and their hoist stops for each scope.
+ SmallVector<CHRScope *, 8> SetScopes;
+ setCHRRegions(FilteredScopes, SetScopes);
+ CHR_DEBUG(dumpScopes(SetScopes, "Set CHR regions"));
+
+ // Sort CHRScopes by the depth so that outer CHRScopes comes before inner
+ // ones. We need to apply CHR from outer to inner so that we apply CHR only
+ // to the hot path, rather than both hot and cold paths.
+ SmallVector<CHRScope *, 8> SortedScopes;
+ sortScopes(SetScopes, SortedScopes);
+ CHR_DEBUG(dumpScopes(SortedScopes, "Sorted scopes"));
+
+ CHR_DEBUG(
+ dbgs() << "RegionInfo:\n";
+ RI.print(dbgs()));
+
+ // Apply the CHR transformation.
+ if (!SortedScopes.empty()) {
+ transformScopes(SortedScopes);
+ Changed = true;
+ }
+ }
+
+ if (Changed)
+ CHR_DEBUG(dumpIR(F, "after", &Stats));
+
+ return Changed;
+}
+
+bool ControlHeightReductionLegacyPass::runOnFunction(Function &F) {
+ BlockFrequencyInfo &BFI =
+ getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI();
+ DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ ProfileSummaryInfo &PSI =
+ *getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+ RegionInfo &RI = getAnalysis<RegionInfoPass>().getRegionInfo();
+ return CHR(F, BFI, DT, PSI, RI).run();
+}
+
+namespace llvm {
+
+ControlHeightReductionPass::ControlHeightReductionPass() {
+ ParseCHRFilterFiles();
+}
+
+PreservedAnalyses ControlHeightReductionPass::run(
+ Function &F,
+ FunctionAnalysisManager &FAM) {
+ auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
+ auto &DT = FAM.getResult<DominatorTreeAnalysis>(F);
+ auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
+ auto &MAM = MAMProxy.getManager();
+ auto &PSI = *MAM.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
+ auto &RI = FAM.getResult<RegionInfoAnalysis>(F);
+ bool Changed = CHR(F, BFI, DT, PSI, RI).run();
+ if (!Changed)
+ return PreservedAnalyses::all();
+ auto PA = PreservedAnalyses();
+ PA.preserve<GlobalsAA>();
+ return PA;
+}
+
+} // namespace llvm
Modified: llvm/trunk/lib/Transforms/Instrumentation/Instrumentation.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Instrumentation/Instrumentation.cpp?rev=341386&r1=341385&r2=341386&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Instrumentation/Instrumentation.cpp (original)
+++ llvm/trunk/lib/Transforms/Instrumentation/Instrumentation.cpp Tue Sep 4 10:19:13 2018
@@ -59,6 +59,7 @@ void llvm::initializeInstrumentation(Pas
initializeAddressSanitizerPass(Registry);
initializeAddressSanitizerModulePass(Registry);
initializeBoundsCheckingLegacyPassPass(Registry);
+ initializeControlHeightReductionLegacyPassPass(Registry);
initializeGCOVProfilerLegacyPassPass(Registry);
initializePGOInstrumentationGenLegacyPassPass(Registry);
initializePGOInstrumentationUseLegacyPassPass(Registry);
Added: llvm/trunk/test/Transforms/PGOProfile/chr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/PGOProfile/chr.ll?rev=341386&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/PGOProfile/chr.ll (added)
+++ llvm/trunk/test/Transforms/PGOProfile/chr.ll Tue Sep 4 10:19:13 2018
@@ -0,0 +1,1912 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -chr -instcombine -simplifycfg -S | FileCheck %s
+; RUN: opt < %s -passes='require<profile-summary>,function(chr,instcombine,simplify-cfg)' -S | FileCheck %s
+
+declare void @foo()
+declare void @bar()
+
+; Simple case.
+; Roughly,
+; t0 = *i
+; if ((t0 & 1) != 0) // Likely true
+; foo()
+; if ((t0 & 2) != 0) // Likely true
+; foo()
+; ->
+; t0 = *i
+; if ((t0 & 3) != 0) { // Likely true
+; foo()
+; foo()
+; } else {
+; if ((t0 & 1) != 0)
+; foo()
+; if ((t0 & 2) != 0)
+; foo()
+; }
+define void @test_chr_1(i32* %i) !prof !14 {
+; CHECK-LABEL: @test_chr_1(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 3
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 3
+; CHECK-NEXT: br i1 [[TMP2]], label [[BB0:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15
+; CHECK: bb0:
+; CHECK-NEXT: call void @foo()
+; CHECK-NEXT: call void @foo()
+; CHECK-NEXT: br label [[BB3:%.*]]
+; CHECK: entry.split.nonchr:
+; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP0]], 1
+; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 0
+; CHECK-NEXT: br i1 [[TMP4]], label [[BB1_NONCHR:%.*]], label [[BB0_NONCHR:%.*]], !prof !16
+; CHECK: bb0.nonchr:
+; CHECK-NEXT: call void @foo()
+; CHECK-NEXT: br label [[BB1_NONCHR]]
+; CHECK: bb1.nonchr:
+; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP0]], 2
+; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP5]], 0
+; CHECK-NEXT: br i1 [[TMP6]], label [[BB3]], label [[BB2_NONCHR:%.*]], !prof !16
+; CHECK: bb2.nonchr:
+; CHECK-NEXT: call void @foo()
+; CHECK-NEXT: br label [[BB3]]
+; CHECK: bb3:
+; CHECK-NEXT: ret void
+;
+entry:
+ %0 = load i32, i32* %i
+ %1 = and i32 %0, 1
+ %2 = icmp eq i32 %1, 0
+ br i1 %2, label %bb1, label %bb0, !prof !15
+
+bb0:
+ call void @foo()
+ br label %bb1
+
+bb1:
+ %3 = and i32 %0, 2
+ %4 = icmp eq i32 %3, 0
+ br i1 %4, label %bb3, label %bb2, !prof !15
+
+bb2:
+ call void @foo()
+ br label %bb3
+
+bb3:
+ ret void
+}
+
+; Simple case with a cold block.
+; Roughly,
+; t0 = *i
+; if ((t0 & 1) != 0) // Likely true
+; foo()
+; if ((t0 & 2) == 0) // Likely false
+; bar()
+; if ((t0 & 4) != 0) // Likely true
+; foo()
+; ->
+; t0 = *i
+; if ((t0 & 7) == 7) { // Likely true
+; foo()
+; foo()
+; } else {
+; if ((t0 & 1) != 0)
+; foo()
+; if ((t0 & 2) == 0)
+; bar()
+; if ((t0 & 4) != 0)
+; foo()
+; }
+define void @test_chr_1_1(i32* %i) !prof !14 {
+; CHECK-LABEL: @test_chr_1_1(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 7
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 7
+; CHECK-NEXT: br i1 [[TMP2]], label [[BB0:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15
+; CHECK: bb0:
+; CHECK-NEXT: call void @foo()
+; CHECK-NEXT: call void @foo()
+; CHECK-NEXT: br label [[BB5:%.*]]
+; CHECK: entry.split.nonchr:
+; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP0]], 1
+; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 0
+; CHECK-NEXT: br i1 [[TMP4]], label [[BB1_NONCHR:%.*]], label [[BB0_NONCHR:%.*]], !prof !16
+; CHECK: bb0.nonchr:
+; CHECK-NEXT: call void @foo()
+; CHECK-NEXT: br label [[BB1_NONCHR]]
+; CHECK: bb1.nonchr:
+; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP0]], 2
+; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP5]], 0
+; CHECK-NEXT: br i1 [[TMP6]], label [[BB2_NONCHR:%.*]], label [[BB3_NONCHR:%.*]], !prof !16
+; CHECK: bb2.nonchr:
+; CHECK-NEXT: call void @bar()
+; CHECK-NEXT: br label [[BB3_NONCHR]]
+; CHECK: bb3.nonchr:
+; CHECK-NEXT: [[TMP7:%.*]] = and i32 [[TMP0]], 4
+; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0
+; CHECK-NEXT: br i1 [[TMP8]], label [[BB5]], label [[BB4_NONCHR:%.*]], !prof !16
+; CHECK: bb4.nonchr:
+; CHECK-NEXT: call void @foo()
+; CHECK-NEXT: br label [[BB5]]
+; CHECK: bb5:
+; CHECK-NEXT: ret void
+;
+entry:
+ %0 = load i32, i32* %i
+ %1 = and i32 %0, 1
+ %2 = icmp eq i32 %1, 0
+ br i1 %2, label %bb1, label %bb0, !prof !15
+
+bb0:
+ call void @foo()
+ br label %bb1
+
+bb1:
+ %3 = and i32 %0, 2
+ %4 = icmp eq i32 %3, 0
+ br i1 %4, label %bb2, label %bb3, !prof !15
+
+bb2:
+ call void @bar()
+ br label %bb3
+
+bb3:
+ %5 = and i32 %0, 4
+ %6 = icmp eq i32 %5, 0
+ br i1 %6, label %bb5, label %bb4, !prof !15
+
+bb4:
+ call void @foo()
+ br label %bb5
+
+bb5:
+ ret void
+}
+
+; With an aggregate bit check.
+; Roughly,
+; t0 = *i
+; if ((t0 & 255) != 0) // Likely true
+; if ((t0 & 1) != 0) // Likely true
+; foo()
+; if ((t0 & 2) != 0) // Likely true
+; foo()
+; ->
+; t0 = *i
+; if ((t0 & 3) != 0) { // Likely true
+; foo()
+; foo()
+; } else if ((t0 & 255) != 0)
+; if ((t0 & 1) != 0)
+; foo()
+; if ((t0 & 2) != 0)
+; foo()
+; }
+define void @test_chr_2(i32* %i) !prof !14 {
+; CHECK-LABEL: @test_chr_2(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 3
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 3
+; CHECK-NEXT: br i1 [[TMP2]], label [[BB1:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15
+; CHECK: bb1:
+; CHECK-NEXT: call void @foo()
+; CHECK-NEXT: call void @foo()
+; CHECK-NEXT: br label [[BB4:%.*]]
+; CHECK: entry.split.nonchr:
+; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP0]], 255
+; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 0
+; CHECK-NEXT: br i1 [[TMP4]], label [[BB4]], label [[BB0_NONCHR:%.*]], !prof !16
+; CHECK: bb0.nonchr:
+; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP0]], 1
+; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP5]], 0
+; CHECK-NEXT: br i1 [[TMP6]], label [[BB2_NONCHR:%.*]], label [[BB1_NONCHR:%.*]], !prof !16
+; CHECK: bb2.nonchr:
+; CHECK-NEXT: [[TMP7:%.*]] = and i32 [[TMP0]], 2
+; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0
+; CHECK-NEXT: br i1 [[TMP8]], label [[BB4]], label [[BB3_NONCHR:%.*]], !prof !16
+; CHECK: bb3.nonchr:
+; CHECK-NEXT: call void @foo()
+; CHECK-NEXT: br label [[BB4]]
+; CHECK: bb1.nonchr:
+; CHECK-NEXT: call void @foo()
+; CHECK-NEXT: br label [[BB2_NONCHR]]
+; CHECK: bb4:
+; CHECK-NEXT: ret void
+;
+entry:
+ %0 = load i32, i32* %i
+ %1 = and i32 %0, 255
+ %2 = icmp eq i32 %1, 0
+ br i1 %2, label %bb4, label %bb0, !prof !15
+
+bb0:
+ %3 = and i32 %0, 1
+ %4 = icmp eq i32 %3, 0
+ br i1 %4, label %bb2, label %bb1, !prof !15
+
+bb1:
+ call void @foo()
+ br label %bb2
+
+bb2:
+ %5 = and i32 %0, 2
+ %6 = icmp eq i32 %5, 0
+ br i1 %6, label %bb4, label %bb3, !prof !15
+
+bb3:
+ call void @foo()
+ br label %bb4
+
+bb4:
+ ret void
+}
+
+; Split case.
+; Roughly,
+; t1 = *i
+; if ((t1 & 1) != 0) // Likely true
+; foo()
+; if ((t1 & 2) != 0) // Likely true
+; foo()
+; t2 = *i
+; if ((t2 & 4) != 0) // Likely true
+; foo()
+; if ((t2 & 8) != 0) // Likely true
+; foo()
+; ->
+; t1 = *i
+; if ((t1 & 3) != 0) { // Likely true
+; foo()
+; foo()
+; } else {
+; if ((t1 & 1) != 0)
+; foo()
+; if ((t1 & 2) != 0)
+; foo()
+; }
+; t2 = *i
+; if ((t2 & 12) != 0) { // Likely true
+; foo()
+; foo()
+; } else {
+; if ((t2 & 4) != 0)
+; foo()
+; if ((t2 & 8) != 0)
+; foo()
+; }
+define void @test_chr_3(i32* %i) !prof !14 {
+; CHECK-LABEL: @test_chr_3(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 3
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 3
+; CHECK-NEXT: br i1 [[TMP2]], label [[BB0:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15
+; CHECK: bb0:
+; CHECK-NEXT: call void @foo()
+; CHECK-NEXT: call void @foo()
+; CHECK-NEXT: br label [[BB3:%.*]]
+; CHECK: entry.split.nonchr:
+; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP0]], 1
+; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 0
+; CHECK-NEXT: br i1 [[TMP4]], label [[BB1_NONCHR:%.*]], label [[BB0_NONCHR:%.*]], !prof !16
+; CHECK: bb0.nonchr:
+; CHECK-NEXT: call void @foo()
+; CHECK-NEXT: br label [[BB1_NONCHR]]
+; CHECK: bb1.nonchr:
+; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP0]], 2
+; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP5]], 0
+; CHECK-NEXT: br i1 [[TMP6]], label [[BB3]], label [[BB2_NONCHR:%.*]], !prof !16
+; CHECK: bb2.nonchr:
+; CHECK-NEXT: call void @foo()
+; CHECK-NEXT: br label [[BB3]]
+; CHECK: bb3:
+; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[I]], align 4
+; CHECK-NEXT: [[TMP8:%.*]] = and i32 [[TMP7]], 12
+; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 12
+; CHECK-NEXT: br i1 [[TMP9]], label [[BB4:%.*]], label [[BB3_SPLIT_NONCHR:%.*]], !prof !15
+; CHECK: bb4:
+; CHECK-NEXT: call void @foo()
+; CHECK-NEXT: call void @foo()
+; CHECK-NEXT: br label [[BB7:%.*]]
+; CHECK: bb3.split.nonchr:
+; CHECK-NEXT: [[TMP10:%.*]] = and i32 [[TMP7]], 4
+; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[TMP10]], 0
+; CHECK-NEXT: br i1 [[TMP11]], label [[BB5_NONCHR:%.*]], label [[BB4_NONCHR:%.*]], !prof !16
+; CHECK: bb4.nonchr:
+; CHECK-NEXT: call void @foo()
+; CHECK-NEXT: br label [[BB5_NONCHR]]
+; CHECK: bb5.nonchr:
+; CHECK-NEXT: [[TMP12:%.*]] = and i32 [[TMP7]], 8
+; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 0
+; CHECK-NEXT: br i1 [[TMP13]], label [[BB7]], label [[BB6_NONCHR:%.*]], !prof !16
+; CHECK: bb6.nonchr:
+; CHECK-NEXT: call void @foo()
+; CHECK-NEXT: br label [[BB7]]
+; CHECK: bb7:
+; CHECK-NEXT: ret void
+;
+entry:
+ %0 = load i32, i32* %i
+ %1 = and i32 %0, 1
+ %2 = icmp eq i32 %1, 0
+ br i1 %2, label %bb1, label %bb0, !prof !15
+
+bb0:
+ call void @foo()
+ br label %bb1
+
+bb1:
+ %3 = and i32 %0, 2
+ %4 = icmp eq i32 %3, 0
+ br i1 %4, label %bb3, label %bb2, !prof !15
+
+bb2:
+ call void @foo()
+ br label %bb3
+
+bb3:
+ %5 = load i32, i32* %i
+ %6 = and i32 %5, 4
+ %7 = icmp eq i32 %6, 0
+ br i1 %7, label %bb5, label %bb4, !prof !15
+
+bb4:
+ call void @foo()
+ br label %bb5
+
+bb5:
+ %8 = and i32 %5, 8
+ %9 = icmp eq i32 %8, 0
+ br i1 %9, label %bb7, label %bb6, !prof !15
+
+bb6:
+ call void @foo()
+ br label %bb7
+
+bb7:
+ ret void
+}
+
+; Selects.
+; Roughly,
+; t0 = *i
+; sum1 = (t0 & 1) ? sum0 : (sum0 + 42) // Likely false
+; sum2 = (t0 & 2) ? sum1 : (sum1 + 43) // Likely false
+; return sum2
+; ->
+; t0 = *i
+; if ((t0 & 3) == 3)
+; return sum0 + 85
+; else {
+; sum1 = (t0 & 1) ? sum0 : (sum0 + 42)
+; sum2 = (t0 & 2) ? sum1 : (sum1 + 43)
+; return sum2
+; }
+define i32 @test_chr_4(i32* %i, i32 %sum0) !prof !14 {
+; CHECK-LABEL: @test_chr_4(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 3
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 3
+; CHECK-NEXT: br i1 [[TMP2]], label [[ENTRY_SPLIT:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15
+; CHECK: entry.split:
+; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SUM0:%.*]], 85
+; CHECK-NEXT: ret i32 [[TMP3]]
+; CHECK: entry.split.nonchr:
+; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[SUM0]], 42
+; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP0]], 1
+; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP5]], 0
+; CHECK-NEXT: [[SUM1_NONCHR:%.*]] = select i1 [[TMP6]], i32 [[SUM0]], i32 [[TMP4]], !prof !16
+; CHECK-NEXT: [[TMP7:%.*]] = and i32 [[TMP0]], 2
+; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0
+; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[SUM1_NONCHR]], 43
+; CHECK-NEXT: [[SUM2_NONCHR:%.*]] = select i1 [[TMP8]], i32 [[SUM1_NONCHR]], i32 [[TMP9]], !prof !16
+; CHECK-NEXT: ret i32 [[SUM2_NONCHR]]
+;
+entry:
+ %0 = load i32, i32* %i
+ %1 = and i32 %0, 1
+ %2 = icmp eq i32 %1, 0
+ %3 = add i32 %sum0, 42
+ %sum1 = select i1 %2, i32 %sum0, i32 %3, !prof !15
+ %4 = and i32 %0, 2
+ %5 = icmp eq i32 %4, 0
+ %6 = add i32 %sum1, 43
+ %sum2 = select i1 %5, i32 %sum1, i32 %6, !prof !15
+ ret i32 %sum2
+}
+
+; Selects + Brs
+; Roughly,
+; t0 = *i
+; if ((t0 & 255) != 0) { // Likely true
+; sum = (t0 & 1) ? sum0 : (sum0 + 42) // Likely false
+; sum = (t0 & 2) ? sum : (sum + 43) // Likely false
+; if ((t0 & 4) != 0) { // Likely true
+; sum3 = sum + 44
+; sum = (t0 & 8) ? sum3 : (sum3 + 44) // Likely false
+; }
+; }
+; return sum
+; ->
+; t0 = *i
+; if ((t0 & 15) != 15) { // Likely true
+; sum = sum0 + 173
+; } else if ((t0 & 255) != 0) {
+; sum = (t0 & 1) ? sum0 : (sum0 + 42)
+; sum = (t0 & 2) ? sum : (sum + 43)
+; if ((t0 & 4) != 0) {
+; sum3 = sum + 44
+; sum = (t0 & 8) ? sum3 : (sum3 + 44)
+; }
+; }
+; return sum
+define i32 @test_chr_5(i32* %i, i32 %sum0) !prof !14 {
+; CHECK-LABEL: @test_chr_5(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 15
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 15
+; CHECK-NEXT: br i1 [[TMP2]], label [[BB0:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15
+; CHECK: bb0:
+; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SUM0:%.*]], 85
+; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[SUM0]], 173
+; CHECK-NEXT: br label [[BB3:%.*]]
+; CHECK: entry.split.nonchr:
+; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP0]], 255
+; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP5]], 0
+; CHECK-NEXT: br i1 [[TMP6]], label [[BB3]], label [[BB0_NONCHR:%.*]], !prof !16
+; CHECK: bb0.nonchr:
+; CHECK-NEXT: [[TMP7:%.*]] = and i32 [[TMP0]], 1
+; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0
+; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[SUM0]], 42
+; CHECK-NEXT: [[SUM1_NONCHR:%.*]] = select i1 [[TMP8]], i32 [[SUM0]], i32 [[TMP9]], !prof !16
+; CHECK-NEXT: [[TMP10:%.*]] = and i32 [[TMP0]], 2
+; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[TMP10]], 0
+; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[SUM1_NONCHR]], 43
+; CHECK-NEXT: [[SUM2_NONCHR:%.*]] = select i1 [[TMP11]], i32 [[SUM1_NONCHR]], i32 [[TMP12]], !prof !16
+; CHECK-NEXT: [[TMP13:%.*]] = and i32 [[TMP0]], 4
+; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP13]], 0
+; CHECK-NEXT: br i1 [[TMP14]], label [[BB3]], label [[BB1_NONCHR:%.*]], !prof !16
+; CHECK: bb1.nonchr:
+; CHECK-NEXT: [[TMP15:%.*]] = and i32 [[TMP0]], 8
+; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 0
+; CHECK-NEXT: [[SUM4_NONCHR_V:%.*]] = select i1 [[TMP16]], i32 44, i32 88, !prof !16
+; CHECK-NEXT: [[SUM4_NONCHR:%.*]] = add i32 [[SUM2_NONCHR]], [[SUM4_NONCHR_V]]
+; CHECK-NEXT: br label [[BB3]]
+; CHECK: bb3:
+; CHECK-NEXT: [[SUM6:%.*]] = phi i32 [ [[TMP4]], [[BB0]] ], [ [[SUM0]], [[ENTRY_SPLIT_NONCHR]] ], [ [[SUM2_NONCHR]], [[BB0_NONCHR]] ], [ [[SUM4_NONCHR]], [[BB1_NONCHR]] ]
+; CHECK-NEXT: ret i32 [[SUM6]]
+;
+entry:
+ %0 = load i32, i32* %i
+ %1 = and i32 %0, 255
+ %2 = icmp eq i32 %1, 0
+ br i1 %2, label %bb3, label %bb0, !prof !15
+
+bb0:
+ %3 = and i32 %0, 1
+ %4 = icmp eq i32 %3, 0
+ %5 = add i32 %sum0, 42
+ %sum1 = select i1 %4, i32 %sum0, i32 %5, !prof !15
+ %6 = and i32 %0, 2
+ %7 = icmp eq i32 %6, 0
+ %8 = add i32 %sum1, 43
+ %sum2 = select i1 %7, i32 %sum1, i32 %8, !prof !15
+ %9 = and i32 %0, 4
+ %10 = icmp eq i32 %9, 0
+ br i1 %10, label %bb2, label %bb1, !prof !15
+
+bb1:
+ %sum3 = add i32 %sum2, 44
+ %11 = and i32 %0, 8
+ %12 = icmp eq i32 %11, 0
+ %13 = add i32 %sum3, 44
+ %sum4 = select i1 %12, i32 %sum3, i32 %13, !prof !15
+ br label %bb2
+
+bb2:
+ %sum5 = phi i32 [ %sum2, %bb0 ], [ %sum4, %bb1 ]
+ br label %bb3
+
+bb3:
+ %sum6 = phi i32 [ %sum0, %entry ], [ %sum5, %bb2 ]
+ ret i32 %sum6
+}
+
+; Selects + Brs with a scope split in the middle
+; Roughly,
+; t0 = *i
+; if ((t0 & 255) != 0) { // Likely true
+; sum = (t0 & 1) ? sum0 : (sum0 + 42) // Likely false
+; sum = (t0 & 2) ? sum : (sum + 43) // Likely false
+; if ((sum0 & 4) != 0) { // Likely true. The condition doesn't use v.
+; sum3 = sum + 44
+; sum = (t0 & 8) ? sum3 : (sum3 + 44) // Likely false
+; }
+; }
+; return sum
+; ->
+; t0 = *i
+; if ((sum0 & 4) != 0 & (t0 & 11) != 11) { // Likely true
+; sum = sum0 + 173
+; } else if ((t0 & 255) != 0) {
+; sum = (t0 & 1) ? sum0 : (sum0 + 42)
+; sum = (t0 & 2) ? sum : (sum + 43)
+; if ((sum0 & 4) != 0) {
+; sum3 = sum + 44
+; sum = (t0 & 8) ? sum3 : (sum3 + 44)
+; }
+; }
+; return sum
+define i32 @test_chr_5_1(i32* %i, i32 %sum0) !prof !14 {
+; CHECK-LABEL: @test_chr_5_1(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[SUM0:%.*]], 4
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP0]], 11
+; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 11
+; CHECK-NEXT: [[TMP5:%.*]] = and i1 [[TMP4]], [[TMP2]]
+; CHECK-NEXT: br i1 [[TMP5]], label [[BB0:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15
+; CHECK: bb0:
+; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[SUM0]], 85
+; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[SUM0]], 173
+; CHECK-NEXT: br label [[BB3:%.*]]
+; CHECK: entry.split.nonchr:
+; CHECK-NEXT: [[TMP8:%.*]] = and i32 [[TMP0]], 255
+; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0
+; CHECK-NEXT: br i1 [[TMP9]], label [[BB3]], label [[BB0_NONCHR:%.*]], !prof !16
+; CHECK: bb0.nonchr:
+; CHECK-NEXT: [[TMP10:%.*]] = and i32 [[TMP0]], 1
+; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[TMP10]], 0
+; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[SUM0]], 42
+; CHECK-NEXT: [[SUM1_NONCHR:%.*]] = select i1 [[TMP11]], i32 [[SUM0]], i32 [[TMP12]], !prof !16
+; CHECK-NEXT: [[TMP13:%.*]] = and i32 [[TMP0]], 2
+; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP13]], 0
+; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[SUM1_NONCHR]], 43
+; CHECK-NEXT: [[SUM2_NONCHR:%.*]] = select i1 [[TMP14]], i32 [[SUM1_NONCHR]], i32 [[TMP15]], !prof !16
+; CHECK-NEXT: [[TMP16:%.*]] = and i32 [[SUM0]], 4
+; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i32 [[TMP16]], 0
+; CHECK-NEXT: br i1 [[TMP17]], label [[BB3]], label [[BB1_NONCHR:%.*]], !prof !16
+; CHECK: bb1.nonchr:
+; CHECK-NEXT: [[TMP18:%.*]] = and i32 [[TMP0]], 8
+; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i32 [[TMP18]], 0
+; CHECK-NEXT: [[SUM4_NONCHR_V:%.*]] = select i1 [[TMP19]], i32 44, i32 88, !prof !16
+; CHECK-NEXT: [[SUM4_NONCHR:%.*]] = add i32 [[SUM2_NONCHR]], [[SUM4_NONCHR_V]]
+; CHECK-NEXT: br label [[BB3]]
+; CHECK: bb3:
+; CHECK-NEXT: [[SUM6:%.*]] = phi i32 [ [[TMP7]], [[BB0]] ], [ [[SUM0]], [[ENTRY_SPLIT_NONCHR]] ], [ [[SUM2_NONCHR]], [[BB0_NONCHR]] ], [ [[SUM4_NONCHR]], [[BB1_NONCHR]] ]
+; CHECK-NEXT: ret i32 [[SUM6]]
+;
+entry:
+ %0 = load i32, i32* %i
+ %1 = and i32 %0, 255
+ %2 = icmp eq i32 %1, 0
+ br i1 %2, label %bb3, label %bb0, !prof !15
+
+bb0:
+ %3 = and i32 %0, 1
+ %4 = icmp eq i32 %3, 0
+ %5 = add i32 %sum0, 42
+ %sum1 = select i1 %4, i32 %sum0, i32 %5, !prof !15
+ %6 = and i32 %0, 2
+ %7 = icmp eq i32 %6, 0
+ %8 = add i32 %sum1, 43
+ %sum2 = select i1 %7, i32 %sum1, i32 %8, !prof !15
+ %9 = and i32 %sum0, 4 ; Split
+ %10 = icmp eq i32 %9, 0
+ br i1 %10, label %bb2, label %bb1, !prof !15
+
+bb1:
+ %sum3 = add i32 %sum2, 44
+ %11 = and i32 %0, 8
+ %12 = icmp eq i32 %11, 0
+ %13 = add i32 %sum3, 44
+ %sum4 = select i1 %12, i32 %sum3, i32 %13, !prof !15
+ br label %bb2
+
+bb2:
+ %sum5 = phi i32 [ %sum2, %bb0 ], [ %sum4, %bb1 ]
+ br label %bb3
+
+bb3:
+ %sum6 = phi i32 [ %sum0, %entry ], [ %sum5, %bb2 ]
+ ret i32 %sum6
+}
+
+; Selects + Brs, non-matching bases
+; Roughly,
+; i0 = *i
+; j0 = *j
+; if ((i0 & 255) != 0) { // Likely true
+; sum = (i0 & 2) ? sum0 : (sum0 + 43) // Likely false
+; if ((j0 & 4) != 0) { // Likely true. The condition uses j0, not i0.
+; sum3 = sum + 44
+; sum = (i0 & 8) ? sum3 : (sum3 + 44) // Likely false
+; }
+; }
+; return sum
+; ->
+; i0 = *i
+; j0 = *j
+; if ((j0 & 4) != 0 & (i0 & 10) != 10) { // Likely true
+; sum = sum0 + 131
+; } else if ((i0 & 255) != 0) {
+; sum = (i0 & 2) ? sum0 : (sum0 + 43)
+; if ((j0 & 4) != 0) {
+; sum3 = sum + 44
+; sum = (i0 & 8) ? sum3 : (sum3 + 44)
+; }
+; }
+; return sum
+define i32 @test_chr_6(i32* %i, i32* %j, i32 %sum0) !prof !14 {
+; CHECK-LABEL: @test_chr_6(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[I0:%.*]] = load i32, i32* [[I:%.*]], align 4
+; CHECK-NEXT: [[J0:%.*]] = load i32, i32* [[J:%.*]], align 4
+; CHECK-NEXT: [[V9:%.*]] = and i32 [[J0]], 4
+; CHECK-NEXT: [[V10:%.*]] = icmp ne i32 [[V9]], 0
+; CHECK-NEXT: [[TMP0:%.*]] = and i32 [[I0]], 10
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 10
+; CHECK-NEXT: [[TMP2:%.*]] = and i1 [[TMP1]], [[V10]]
+; CHECK-NEXT: br i1 [[TMP2]], label [[BB0:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15
+; CHECK: bb0:
+; CHECK-NEXT: [[V8:%.*]] = add i32 [[SUM0:%.*]], 43
+; CHECK-NEXT: [[V13:%.*]] = add i32 [[SUM0]], 131
+; CHECK-NEXT: br label [[BB3:%.*]]
+; CHECK: entry.split.nonchr:
+; CHECK-NEXT: [[V1:%.*]] = and i32 [[I0]], 255
+; CHECK-NEXT: [[V2:%.*]] = icmp eq i32 [[V1]], 0
+; CHECK-NEXT: br i1 [[V2]], label [[BB3]], label [[BB0_NONCHR:%.*]], !prof !16
+; CHECK: bb0.nonchr:
+; CHECK-NEXT: [[V3_NONCHR:%.*]] = and i32 [[I0]], 2
+; CHECK-NEXT: [[V4_NONCHR:%.*]] = icmp eq i32 [[V3_NONCHR]], 0
+; CHECK-NEXT: [[V8_NONCHR:%.*]] = add i32 [[SUM0]], 43
+; CHECK-NEXT: [[SUM2_NONCHR:%.*]] = select i1 [[V4_NONCHR]], i32 [[SUM0]], i32 [[V8_NONCHR]], !prof !16
+; CHECK-NEXT: [[V9_NONCHR:%.*]] = and i32 [[J0]], 4
+; CHECK-NEXT: [[V10_NONCHR:%.*]] = icmp eq i32 [[V9_NONCHR]], 0
+; CHECK-NEXT: br i1 [[V10_NONCHR]], label [[BB3]], label [[BB1_NONCHR:%.*]], !prof !16
+; CHECK: bb1.nonchr:
+; CHECK-NEXT: [[V11_NONCHR:%.*]] = and i32 [[I0]], 8
+; CHECK-NEXT: [[V12_NONCHR:%.*]] = icmp eq i32 [[V11_NONCHR]], 0
+; CHECK-NEXT: [[SUM4_NONCHR_V:%.*]] = select i1 [[V12_NONCHR]], i32 44, i32 88, !prof !16
+; CHECK-NEXT: [[SUM4_NONCHR:%.*]] = add i32 [[SUM2_NONCHR]], [[SUM4_NONCHR_V]]
+; CHECK-NEXT: br label [[BB3]]
+; CHECK: bb3:
+; CHECK-NEXT: [[SUM6:%.*]] = phi i32 [ [[V13]], [[BB0]] ], [ [[SUM0]], [[ENTRY_SPLIT_NONCHR]] ], [ [[SUM2_NONCHR]], [[BB0_NONCHR]] ], [ [[SUM4_NONCHR]], [[BB1_NONCHR]] ]
+; CHECK-NEXT: ret i32 [[SUM6]]
+;
+entry:
+ %i0 = load i32, i32* %i
+ %j0 = load i32, i32* %j
+ %v1 = and i32 %i0, 255
+ %v2 = icmp eq i32 %v1, 0
+ br i1 %v2, label %bb3, label %bb0, !prof !15
+
+bb0:
+ %v3 = and i32 %i0, 2
+ %v4 = icmp eq i32 %v3, 0
+ %v8 = add i32 %sum0, 43
+ %sum2 = select i1 %v4, i32 %sum0, i32 %v8, !prof !15
+ %v9 = and i32 %j0, 4
+ %v10 = icmp eq i32 %v9, 0
+ br i1 %v10, label %bb2, label %bb1, !prof !15
+
+bb1:
+ %sum3 = add i32 %sum2, 44
+ %v11 = and i32 %i0, 8
+ %v12 = icmp eq i32 %v11, 0
+ %v13 = add i32 %sum3, 44
+ %sum4 = select i1 %v12, i32 %sum3, i32 %v13, !prof !15
+ br label %bb2
+
+bb2:
+ %sum5 = phi i32 [ %sum2, %bb0 ], [ %sum4, %bb1 ]
+ br label %bb3
+
+bb3:
+ %sum6 = phi i32 [ %sum0, %entry ], [ %sum5, %bb2 ]
+ ret i32 %sum6
+}
+
+; Selects + Brs, the branch condition can't be hoisted to be merged with a
+; select. No CHR happens.
+; Roughly,
+; i0 = *i
+; sum = ((i0 & 2) == 0) ? sum0 : (sum0 + 43) // Likely false
+; foo();
+; j0 = *j
+; if ((j0 & 4) != 0) { // Likely true
+; foo();
+; sum = sum + 44
+; }
+; return sum
+; ->
+; (no change)
+define i32 @test_chr_7(i32* %i, i32* %j, i32 %sum0) !prof !14 {
+; CHECK-LABEL: @test_chr_7(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[I0:%.*]] = load i32, i32* [[I:%.*]], align 4
+; CHECK-NEXT: [[V3:%.*]] = and i32 [[I0]], 2
+; CHECK-NEXT: [[V4:%.*]] = icmp eq i32 [[V3]], 0
+; CHECK-NEXT: [[V8:%.*]] = add i32 [[SUM0:%.*]], 43
+; CHECK-NEXT: [[SUM2:%.*]] = select i1 [[V4]], i32 [[SUM0]], i32 [[V8]], !prof !16
+; CHECK-NEXT: call void @foo()
+; CHECK-NEXT: [[J0:%.*]] = load i32, i32* [[J:%.*]], align 4
+; CHECK-NEXT: [[V9:%.*]] = and i32 [[J0]], 4
+; CHECK-NEXT: [[V10:%.*]] = icmp eq i32 [[V9]], 0
+; CHECK-NEXT: br i1 [[V10]], label [[BB2:%.*]], label [[BB1:%.*]], !prof !16
+; CHECK: bb1:
+; CHECK-NEXT: call void @foo()
+; CHECK-NEXT: [[SUM4:%.*]] = add i32 [[SUM2]], 44
+; CHECK-NEXT: br label [[BB2]]
+; CHECK: bb2:
+; CHECK-NEXT: [[SUM5:%.*]] = phi i32 [ [[SUM2]], [[ENTRY:%.*]] ], [ [[SUM4]], [[BB1]] ]
+; CHECK-NEXT: ret i32 [[SUM5]]
+;
+entry:
+ %i0 = load i32, i32* %i
+ %v3 = and i32 %i0, 2
+ %v4 = icmp eq i32 %v3, 0
+ %v8 = add i32 %sum0, 43
+ %sum2 = select i1 %v4, i32 %sum0, i32 %v8, !prof !15
+ call void @foo()
+ %j0 = load i32, i32* %j
+ %v9 = and i32 %j0, 4
+ %v10 = icmp eq i32 %v9, 0
+ br i1 %v10, label %bb2, label %bb1, !prof !15 ; %v10 can't be hoisted above the above select
+
+bb1:
+ call void @foo()
+ %sum4 = add i32 %sum2, 44
+ br label %bb2
+
+bb2:
+ %sum5 = phi i32 [ %sum2, %entry ], [ %sum4, %bb1 ]
+ ret i32 %sum5
+}
+
+; Selects + Brs, the branch condition can't be hoisted to be merged with the
+; selects. Dropping the select.
+; Roughly,
+; i0 = *i
+; sum = ((i0 & 2) == 0) ? sum0 : (sum0 + 43) // Likely false
+; foo();
+; j0 = *j
+; if ((j0 & 4) != 0) // Likely true
+; foo()
+; if ((j0 & 8) != 0) // Likely true
+; foo()
+; return sum
+; ->
+; i0 = *i
+; sum = ((i0 & 2) == 0) ? sum0 : (sum0 + 43) // Likely false
+; foo();
+; j0 = *j
+; if ((j0 & 12) != 12) { // Likely true
+; foo()
+; foo()
+; } else {
+; if ((j0 & 4) != 0)
+; foo()
+; if ((j0 & 8) != 0)
+; foo()
+; }
+; return sum
+define i32 @test_chr_7_1(i32* %i, i32* %j, i32 %sum0) !prof !14 {
+; CHECK-LABEL: @test_chr_7_1(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[I0:%.*]] = load i32, i32* [[I:%.*]], align 4
+; CHECK-NEXT: [[V3:%.*]] = and i32 [[I0]], 2
+; CHECK-NEXT: [[V4:%.*]] = icmp eq i32 [[V3]], 0
+; CHECK-NEXT: [[V8:%.*]] = add i32 [[SUM0:%.*]], 43
+; CHECK-NEXT: [[SUM2:%.*]] = select i1 [[V4]], i32 [[SUM0]], i32 [[V8]], !prof !16
+; CHECK-NEXT: call void @foo()
+; CHECK-NEXT: [[J0:%.*]] = load i32, i32* [[J:%.*]], align 4
+; CHECK-NEXT: [[TMP0:%.*]] = and i32 [[J0]], 12
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 12
+; CHECK-NEXT: br i1 [[TMP1]], label [[BB0:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15
+; CHECK: bb0:
+; CHECK-NEXT: call void @foo()
+; CHECK-NEXT: call void @foo()
+; CHECK-NEXT: br label [[BB3:%.*]]
+; CHECK: entry.split.nonchr:
+; CHECK-NEXT: [[V9:%.*]] = and i32 [[J0]], 4
+; CHECK-NEXT: [[V10:%.*]] = icmp eq i32 [[V9]], 0
+; CHECK-NEXT: br i1 [[V10]], label [[BB1_NONCHR:%.*]], label [[BB0_NONCHR:%.*]], !prof !16
+; CHECK: bb0.nonchr:
+; CHECK-NEXT: call void @foo()
+; CHECK-NEXT: br label [[BB1_NONCHR]]
+; CHECK: bb1.nonchr:
+; CHECK-NEXT: [[V11_NONCHR:%.*]] = and i32 [[J0]], 8
+; CHECK-NEXT: [[V12_NONCHR:%.*]] = icmp eq i32 [[V11_NONCHR]], 0
+; CHECK-NEXT: br i1 [[V12_NONCHR]], label [[BB3]], label [[BB2_NONCHR:%.*]], !prof !16
+; CHECK: bb2.nonchr:
+; CHECK-NEXT: call void @foo()
+; CHECK-NEXT: br label [[BB3]]
+; CHECK: bb3:
+; CHECK-NEXT: ret i32 [[SUM2]]
+;
+entry:
+ %i0 = load i32, i32* %i
+ %v3 = and i32 %i0, 2
+ %v4 = icmp eq i32 %v3, 0
+ %v8 = add i32 %sum0, 43
+ %sum2 = select i1 %v4, i32 %sum0, i32 %v8, !prof !15
+ call void @foo()
+ %j0 = load i32, i32* %j
+ %v9 = and i32 %j0, 4
+ %v10 = icmp eq i32 %v9, 0
+ br i1 %v10, label %bb1, label %bb0, !prof !15 ; %v10 can't be hoisted above the above select
+
+bb0:
+ call void @foo()
+ br label %bb1
+
+bb1:
+ %v11 = and i32 %j0, 8
+ %v12 = icmp eq i32 %v11, 0
+ br i1 %v12, label %bb3, label %bb2, !prof !15
+
+bb2:
+ call void @foo()
+ br label %bb3
+
+bb3:
+ ret i32 %sum2
+}
+
+; Branches aren't biased enough. No CHR happens.
+; Roughly,
+; t0 = *i
+; if ((t0 & 1) != 0) // Not biased
+; foo()
+; if ((t0 & 2) != 0) // Not biased
+; foo()
+; ->
+; (no change)
+define void @test_chr_8(i32* %i) !prof !14 {
+; CHECK-LABEL: @test_chr_8(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 1
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[TMP2]], label [[BB1:%.*]], label [[BB0:%.*]], !prof !17
+; CHECK: bb0:
+; CHECK-NEXT: call void @foo()
+; CHECK-NEXT: br label [[BB1]]
+; CHECK: bb1:
+; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP0]], 2
+; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 0
+; CHECK-NEXT: br i1 [[TMP4]], label [[BB3:%.*]], label [[BB2:%.*]], !prof !17
+; CHECK: bb2:
+; CHECK-NEXT: call void @foo()
+; CHECK-NEXT: br label [[BB3]]
+; CHECK: bb3:
+; CHECK-NEXT: ret void
+;
+entry:
+ %0 = load i32, i32* %i
+ %1 = and i32 %0, 1
+ %2 = icmp eq i32 %1, 0
+ br i1 %2, label %bb1, label %bb0, !prof !16
+
+bb0:
+ call void @foo()
+ br label %bb1
+
+bb1:
+ %3 = and i32 %0, 2
+ %4 = icmp eq i32 %3, 0
+ br i1 %4, label %bb3, label %bb2, !prof !16
+
+bb2:
+ call void @foo()
+ br label %bb3
+
+bb3:
+ ret void
+}
+
+; With an existing phi at the exit.
+; Roughly,
+; t = *i
+; if ((t0 & 1) != 0) // Likely true
+; foo()
+; if ((t0 & 2) != 0) { // Likely true
+; t = *j
+; foo()
+; }
+; // There's a phi for t here.
+; return t
+; ->
+; t = *i
+; if ((t & 3) == 3) { // Likely true
+; foo()
+; t = *j
+; foo()
+; } else {
+; if ((t & 1) != 0)
+; foo()
+; if ((t & 2) != 0) {
+; t = *j
+; foo()
+; }
+; }
+; // There's a phi for t here.
+; return t
+define i32 @test_chr_9(i32* %i, i32* %j) !prof !14 {
+; CHECK-LABEL: @test_chr_9(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 3
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 3
+; CHECK-NEXT: br i1 [[TMP2]], label [[BB0:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15
+; CHECK: bb0:
+; CHECK-NEXT: call void @foo()
+; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[J:%.*]], align 4
+; CHECK-NEXT: call void @foo()
+; CHECK-NEXT: br label [[BB3:%.*]]
+; CHECK: entry.split.nonchr:
+; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[TMP0]], 1
+; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
+; CHECK-NEXT: br i1 [[TMP5]], label [[BB1_NONCHR:%.*]], label [[BB0_NONCHR:%.*]], !prof !16
+; CHECK: bb0.nonchr:
+; CHECK-NEXT: call void @foo()
+; CHECK-NEXT: br label [[BB1_NONCHR]]
+; CHECK: bb1.nonchr:
+; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[TMP0]], 2
+; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 0
+; CHECK-NEXT: br i1 [[TMP7]], label [[BB3]], label [[BB2_NONCHR:%.*]], !prof !16
+; CHECK: bb2.nonchr:
+; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[J]], align 4
+; CHECK-NEXT: call void @foo()
+; CHECK-NEXT: br label [[BB3]]
+; CHECK: bb3:
+; CHECK-NEXT: [[TMP9:%.*]] = phi i32 [ [[TMP3]], [[BB0]] ], [ [[TMP0]], [[BB1_NONCHR]] ], [ [[TMP8]], [[BB2_NONCHR]] ]
+; CHECK-NEXT: ret i32 [[TMP9]]
+;
+entry:
+ %0 = load i32, i32* %i
+ %1 = and i32 %0, 1
+ %2 = icmp eq i32 %1, 0
+ br i1 %2, label %bb1, label %bb0, !prof !15
+
+bb0:
+ call void @foo()
+ br label %bb1
+
+bb1:
+ %3 = and i32 %0, 2
+ %4 = icmp eq i32 %3, 0
+ br i1 %4, label %bb3, label %bb2, !prof !15
+
+bb2:
+ %5 = load i32, i32* %j
+ call void @foo()
+ br label %bb3
+
+bb3:
+ %6 = phi i32 [ %0, %bb1 ], [ %5, %bb2 ]
+ ret i32 %6
+}
+
+; With no phi at the exit, but the exit needs a phi inserted after CHR.
+; Roughly,
+; t0 = *i
+; if ((t0 & 1) != 0) // Likely true
+; foo()
+; t1 = *j
+; if ((t1 & 2) != 0) // Likely true
+; foo()
+; return (t1 * 42) - (t1 - 99)
+; ->
+; t0 = *i
+; if ((t0 & 3) == 3) { // Likely true
+; foo()
+; t1 = *j
+; foo()
+; } else {
+; if ((t0 & 1) != 0)
+; foo()
+; if ((t0 & 2) != 0) {
+; t1 = *j
+; foo()
+; }
+; }
+; // A new phi for t1 is inserted here.
+; return (t1 * 42) - (t1 - 99)
+define i32 @test_chr_10(i32* %i, i32* %j) !prof !14 {
+; CHECK-LABEL: @test_chr_10(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 3
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 3
+; CHECK-NEXT: br i1 [[TMP2]], label [[BB0:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15
+; CHECK: bb0:
+; CHECK-NEXT: call void @foo()
+; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[J:%.*]], align 4
+; CHECK-NEXT: call void @foo()
+; CHECK-NEXT: br label [[BB3:%.*]]
+; CHECK: entry.split.nonchr:
+; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[TMP0]], 1
+; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
+; CHECK-NEXT: br i1 [[TMP5]], label [[BB1_NONCHR:%.*]], label [[BB0_NONCHR:%.*]], !prof !16
+; CHECK: bb0.nonchr:
+; CHECK-NEXT: call void @foo()
+; CHECK-NEXT: br label [[BB1_NONCHR]]
+; CHECK: bb1.nonchr:
+; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[J]], align 4
+; CHECK-NEXT: [[TMP7:%.*]] = and i32 [[TMP0]], 2
+; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0
+; CHECK-NEXT: br i1 [[TMP8]], label [[BB3]], label [[BB2_NONCHR:%.*]], !prof !16
+; CHECK: bb2.nonchr:
+; CHECK-NEXT: call void @foo()
+; CHECK-NEXT: br label [[BB3]]
+; CHECK: bb3:
+; CHECK-NEXT: [[TMP9:%.*]] = phi i32 [ [[TMP3]], [[BB0]] ], [ [[TMP6]], [[BB2_NONCHR]] ], [ [[TMP6]], [[BB1_NONCHR]] ]
+; CHECK-NEXT: [[TMP10:%.*]] = mul i32 [[TMP9]], 42
+; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[TMP9]], -99
+; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[TMP10]], [[TMP11]]
+; CHECK-NEXT: ret i32 [[TMP12]]
+;
+entry:
+ %0 = load i32, i32* %i
+ %1 = and i32 %0, 1
+ %2 = icmp eq i32 %1, 0
+ br i1 %2, label %bb1, label %bb0, !prof !15
+
+bb0:
+ call void @foo()
+ br label %bb1
+
+bb1:
+ %3 = load i32, i32* %j
+ %4 = and i32 %0, 2
+ %5 = icmp eq i32 %4, 0
+ br i1 %5, label %bb3, label %bb2, !prof !15
+
+bb2:
+ call void @foo()
+ br label %bb3
+
+bb3:
+ %6 = mul i32 %3, 42
+ %7 = sub i32 %3, 99
+ %8 = add i32 %6, %7
+ ret i32 %8
+}
+
+; Test a case where there are two use-def chain paths to the same value (t0)
+; from the branch condition. This is a regression test for an old bug that
+; caused a bad hoisting that moves (hoists) a value (%conv) twice to the end of
+; the %entry block (once for %div and once for %mul16) and put a use ahead of
+; its definition like:
+; %entry:
+; ...
+; %div = fdiv double 1.000000e+00, %conv
+; %conv = sitofp i32 %0 to double
+; %mul16 = fmul double %div, %conv
+;
+; Roughly,
+; t0 = *i
+; if ((t0 & 1) != 0) // Likely true
+; foo()
+; // there are two use-def paths from the branch condition to t0.
+; if ((1.0 / t0) * t0 < 1) // Likely true
+; foo()
+; ->
+; t0 = *i
+; if ((t0 & 1) != 0 & (1.0 / t0) * t0 > 0) { // Likely true
+; foo()
+; foo()
+; } else {
+; if ((t0 & 1) != 0)
+; foo()
+; if ((1.0 / t0) * t0 < 1) // Likely true
+; foo()
+; }
+define void @test_chr_11(i32* %i, i32 %x) !prof !14 {
+; CHECK-LABEL: @test_chr_11(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 1
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP0]] to double
+; CHECK-NEXT: [[DIV:%.*]] = fdiv double 1.000000e+00, [[CONV]]
+; CHECK-NEXT: [[MUL16:%.*]] = fmul double [[DIV]], [[CONV]]
+; CHECK-NEXT: [[CONV717:%.*]] = fptosi double [[MUL16]] to i32
+; CHECK-NEXT: [[CMP18:%.*]] = icmp sgt i32 [[CONV717]], 0
+; CHECK-NEXT: [[TMP3:%.*]] = and i1 [[TMP2]], [[CMP18]]
+; CHECK-NEXT: br i1 [[TMP3]], label [[BB0:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15
+; CHECK: bb0:
+; CHECK-NEXT: call void @foo()
+; CHECK-NEXT: call void @foo()
+; CHECK-NEXT: br label [[BB3:%.*]]
+; CHECK: entry.split.nonchr:
+; CHECK-NEXT: br i1 [[TMP2]], label [[BB0_NONCHR:%.*]], label [[BB1_NONCHR:%.*]], !prof !18
+; CHECK: bb0.nonchr:
+; CHECK-NEXT: call void @foo()
+; CHECK-NEXT: br label [[BB1_NONCHR]]
+; CHECK: bb1.nonchr:
+; CHECK-NEXT: [[CONV_NONCHR:%.*]] = sitofp i32 [[TMP0]] to double
+; CHECK-NEXT: [[DIV_NONCHR:%.*]] = fdiv double 1.000000e+00, [[CONV_NONCHR]]
+; CHECK-NEXT: [[MUL16_NONCHR:%.*]] = fmul double [[DIV_NONCHR]], [[CONV_NONCHR]]
+; CHECK-NEXT: [[CONV717_NONCHR:%.*]] = fptosi double [[MUL16_NONCHR]] to i32
+; CHECK-NEXT: [[CMP18_NONCHR:%.*]] = icmp slt i32 [[CONV717_NONCHR]], 1
+; CHECK-NEXT: br i1 [[CMP18_NONCHR]], label [[BB3]], label [[BB2_NONCHR:%.*]], !prof !16
+; CHECK: bb2.nonchr:
+; CHECK-NEXT: call void @foo()
+; CHECK-NEXT: br label [[BB3]]
+; CHECK: bb3:
+; CHECK-NEXT: ret void
+;
+entry:
+ %0 = load i32, i32* %i
+ %1 = and i32 %0, 1
+ %2 = icmp eq i32 %1, 0
+ br i1 %2, label %bb1, label %bb0, !prof !15
+
+bb0:
+ call void @foo()
+ br label %bb1
+
+bb1:
+ %conv = sitofp i32 %0 to double
+ %div = fdiv double 1.000000e+00, %conv
+ %mul16 = fmul double %div, %conv
+ %conv717 = fptosi double %mul16 to i32
+ %cmp18 = icmp slt i32 %conv717, 1
+ br i1 %cmp18, label %bb3, label %bb2, !prof !15
+
+bb2:
+ call void @foo()
+ br label %bb3
+
+bb3:
+ ret void
+}
+
+; Selects + unrelated br only
+define i32 @test_chr_12(i32* %i, i32 %sum0) !prof !14 {
+; CHECK-LABEL: @test_chr_12(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 255
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[TMP2]], label [[BB3:%.*]], label [[BB0:%.*]], !prof !16
+; CHECK: bb0:
+; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP0]], 1
+; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 0
+; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[SUM0:%.*]], 42
+; CHECK-NEXT: [[SUM1:%.*]] = select i1 [[TMP4]], i32 [[SUM0]], i32 [[TMP5]], !prof !16
+; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[TMP0]], 2
+; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 0
+; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[SUM1]], 43
+; CHECK-NEXT: [[SUM2:%.*]] = select i1 [[TMP7]], i32 [[SUM1]], i32 [[TMP8]], !prof !16
+; CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4
+; CHECK-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
+; CHECK-NEXT: [[TMP11:%.*]] = and i32 [[TMP0]], 8
+; CHECK-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
+; CHECK-NEXT: [[TMP13:%.*]] = and i1 [[TMP10]], [[TMP12]]
+; CHECK-NEXT: br i1 [[TMP13]], label [[BB1:%.*]], label [[BB0_SPLIT_NONCHR:%.*]], !prof !15
+; CHECK: bb1:
+; CHECK-NEXT: [[TMP14:%.*]] = add i32 [[SUM2]], 88
+; CHECK-NEXT: br label [[BB3]]
+; CHECK: bb0.split.nonchr:
+; CHECK-NEXT: br i1 [[TMP10]], label [[BB1_NONCHR:%.*]], label [[BB3]], !prof !18
+; CHECK: bb1.nonchr:
+; CHECK-NEXT: [[TMP15:%.*]] = and i32 [[TMP0]], 8
+; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 0
+; CHECK-NEXT: [[SUM4_NONCHR_V:%.*]] = select i1 [[TMP16]], i32 44, i32 88, !prof !16
+; CHECK-NEXT: [[SUM4_NONCHR:%.*]] = add i32 [[SUM2]], [[SUM4_NONCHR_V]]
+; CHECK-NEXT: br label [[BB3]]
+; CHECK: bb3:
+; CHECK-NEXT: [[SUM6:%.*]] = phi i32 [ [[SUM0]], [[ENTRY:%.*]] ], [ [[TMP14]], [[BB1]] ], [ [[SUM2]], [[BB0_SPLIT_NONCHR]] ], [ [[SUM4_NONCHR]], [[BB1_NONCHR]] ]
+; CHECK-NEXT: ret i32 [[SUM6]]
+;
+entry:
+ %0 = load i32, i32* %i
+ %1 = and i32 %0, 255
+ %2 = icmp eq i32 %1, 0
+ br i1 %2, label %bb3, label %bb0, !prof !15
+
+bb0:
+ %3 = and i32 %0, 1
+ %4 = icmp eq i32 %3, 0
+ %5 = add i32 %sum0, 42
+ %sum1 = select i1 %4, i32 %sum0, i32 %5, !prof !15
+ %6 = and i32 %0, 2
+ %7 = icmp eq i32 %6, 0
+ %8 = add i32 %sum1, 43
+ %sum2 = select i1 %7, i32 %sum1, i32 %8, !prof !15
+ %9 = load i32, i32* %i
+ %10 = icmp eq i32 %9, 0
+ br i1 %10, label %bb2, label %bb1, !prof !15
+
+bb1:
+ %sum3 = add i32 %sum2, 44
+ %11 = and i32 %0, 8
+ %12 = icmp eq i32 %11, 0
+ %13 = add i32 %sum3, 44
+ %sum4 = select i1 %12, i32 %sum3, i32 %13, !prof !15
+ br label %bb2
+
+bb2:
+ %sum5 = phi i32 [ %sum2, %bb0 ], [ %sum4, %bb1 ]
+ br label %bb3
+
+bb3:
+ %sum6 = phi i32 [ %sum0, %entry ], [ %sum5, %bb2 ]
+ ret i32 %sum6
+}
+
+; In the second CHR, a condition value depends on a trivial phi that's inserted
+; by the first CHR.
+; Roughly,
+; i0 = *i
+; v2 = (z != 1) ? pred : true // Likely false
+; if (z == 0 & pred) // Likely false
+; foo()
+; j0 = *j
+; sum2 = ((i0 & 2) == j0) ? sum0 : (sum0 + 43) // Likely false
+; sum3 = ((i0 == j0) ? sum0 : (sum0 + 43) // Likely false
+; foo()
+; if ((i0 & 4) == 0) // Unbiased
+; foo()
+; return i0 + sum3
+; ->
+; i0 = *i
+; if (z != 1 & (z == 0 & pred)) // First CHR
+; foo()
+; // A trivial phi for i0 is inserted here by the first CHR (which gets removed
+; // later) and the subsequent branch condition (for the second CHR) uses it.
+; j0 = *j
+; if ((i0 & 2) != j0 & i0 != j0) { // Second CHR
+; sum3 = sum0 + 43
+; foo()
+; if (i0 & 4) == 0)
+; foo()
+; } else {
+; sum3 = (i0 == j0) ? sum0 : (sum0 + 43)
+; foo()
+; if (i0 & 4) == 0)
+; foo()
+; }
+; return i0 + sum3
+define i32 @test_chr_14(i32* %i, i32* %j, i32 %sum0, i1 %pred, i32 %z) !prof !14 {
+; CHECK-LABEL: @test_chr_14(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[I0:%.*]] = load i32, i32* [[I:%.*]], align 4
+; CHECK-NEXT: [[V1:%.*]] = icmp ne i32 [[Z:%.*]], 1
+; CHECK-NEXT: [[V0:%.*]] = icmp eq i32 [[Z]], 0
+; CHECK-NEXT: [[V3_NONCHR:%.*]] = and i1 [[V0]], [[PRED:%.*]]
+; CHECK-NEXT: [[OR_COND:%.*]] = and i1 [[V1]], [[V3_NONCHR]]
+; CHECK-NEXT: br i1 [[OR_COND]], label [[BB0_NONCHR:%.*]], label [[BB1:%.*]], !prof !19
+; CHECK: bb0.nonchr:
+; CHECK-NEXT: call void @foo()
+; CHECK-NEXT: br label [[BB1]]
+; CHECK: bb1:
+; CHECK-NEXT: [[J0:%.*]] = load i32, i32* [[J:%.*]], align 4
+; CHECK-NEXT: [[V6:%.*]] = and i32 [[I0]], 2
+; CHECK-NEXT: [[V4:%.*]] = icmp ne i32 [[V6]], [[J0]]
+; CHECK-NEXT: [[V8:%.*]] = add i32 [[SUM0:%.*]], 43
+; CHECK-NEXT: [[V5:%.*]] = icmp ne i32 [[I0]], [[J0]]
+; CHECK-NEXT: [[TMP0:%.*]] = and i1 [[V4]], [[V5]]
+; CHECK-NEXT: br i1 [[TMP0]], label [[BB1_SPLIT:%.*]], label [[BB1_SPLIT_NONCHR:%.*]], !prof !15
+; CHECK: bb1.split:
+; CHECK-NEXT: call void @foo()
+; CHECK-NEXT: [[V9:%.*]] = and i32 [[I0]], 4
+; CHECK-NEXT: [[V10:%.*]] = icmp eq i32 [[V9]], 0
+; CHECK-NEXT: br i1 [[V10]], label [[BB3:%.*]], label [[BB2:%.*]]
+; CHECK: bb2:
+; CHECK-NEXT: call void @foo()
+; CHECK-NEXT: br label [[BB3]]
+; CHECK: bb1.split.nonchr:
+; CHECK-NEXT: [[V5_NONCHR:%.*]] = icmp eq i32 [[I0]], [[J0]]
+; CHECK-NEXT: [[SUM3_NONCHR:%.*]] = select i1 [[V5_NONCHR]], i32 [[SUM0]], i32 [[V8]], !prof !16
+; CHECK-NEXT: call void @foo()
+; CHECK-NEXT: [[V9_NONCHR:%.*]] = and i32 [[I0]], 4
+; CHECK-NEXT: [[V10_NONCHR:%.*]] = icmp eq i32 [[V9_NONCHR]], 0
+; CHECK-NEXT: br i1 [[V10_NONCHR]], label [[BB3]], label [[BB2_NONCHR:%.*]]
+; CHECK: bb2.nonchr:
+; CHECK-NEXT: call void @foo()
+; CHECK-NEXT: br label [[BB3]]
+; CHECK: bb3:
+; CHECK-NEXT: [[TMP1:%.*]] = phi i32 [ [[V8]], [[BB2]] ], [ [[V8]], [[BB1_SPLIT]] ], [ [[SUM3_NONCHR]], [[BB2_NONCHR]] ], [ [[SUM3_NONCHR]], [[BB1_SPLIT_NONCHR]] ]
+; CHECK-NEXT: [[V11:%.*]] = add i32 [[I0]], [[TMP1]]
+; CHECK-NEXT: ret i32 [[V11]]
+;
+entry:
+ %i0 = load i32, i32* %i
+ %v0 = icmp eq i32 %z, 0
+ %v1 = icmp ne i32 %z, 1
+ %v2 = select i1 %v1, i1 %pred, i1 true, !prof !15
+ %v3 = and i1 %v0, %pred
+ br i1 %v3, label %bb0, label %bb1, !prof !15
+
+bb0:
+ call void @foo()
+ br label %bb1
+
+bb1:
+ %j0 = load i32, i32* %j
+ %v6 = and i32 %i0, 2
+ %v4 = icmp eq i32 %v6, %j0
+ %v8 = add i32 %sum0, 43
+ %sum2 = select i1 %v4, i32 %sum0, i32 %v8, !prof !15
+ %v5 = icmp eq i32 %i0, %j0
+ %sum3 = select i1 %v5, i32 %sum0, i32 %v8, !prof !15
+ call void @foo()
+ %v9 = and i32 %i0, 4
+ %v10 = icmp eq i32 %v9, 0
+ br i1 %v10, label %bb3, label %bb2
+
+bb2:
+ call void @foo()
+ br label %bb3
+
+bb3:
+ %v11 = add i32 %i0, %sum3
+ ret i32 %v11
+}
+
+; Branch or selects depends on another select. No CHR happens.
+; Roughly,
+; i0 = *i
+; if (z == 0 & ((z != 1) ? pred : true)) { // Likely false
+; foo()
+; j0 = *j
+; sum2 = ((i0 & 2) == j0) ? sum0 : (sum0 + 43) // Likely false
+; sum3 = (i0 == sum2) ? sum2 : (sum0 + 43) // Likely false. This depends on the
+; // previous select.
+; foo()
+; if ((i0 & 4) == 0) // Unbiased
+; foo()
+; return i0 + sum3
+; ->
+; (no change)
+define i32 @test_chr_15(i32* %i, i32* %j, i32 %sum0, i1 %pred, i32 %z) !prof !14 {
+; CHECK-LABEL: @test_chr_15(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[I0:%.*]] = load i32, i32* [[I:%.*]], align 4
+; CHECK-NEXT: [[V0:%.*]] = icmp eq i32 [[Z:%.*]], 0
+; CHECK-NEXT: [[V3:%.*]] = and i1 [[V0]], [[PRED:%.*]]
+; CHECK-NEXT: br i1 [[V3]], label [[BB0:%.*]], label [[BB1:%.*]], !prof !16
+; CHECK: bb0:
+; CHECK-NEXT: call void @foo()
+; CHECK-NEXT: br label [[BB1]]
+; CHECK: bb1:
+; CHECK-NEXT: [[J0:%.*]] = load i32, i32* [[J:%.*]], align 4
+; CHECK-NEXT: [[V6:%.*]] = and i32 [[I0]], 2
+; CHECK-NEXT: [[V4:%.*]] = icmp eq i32 [[V6]], [[J0]]
+; CHECK-NEXT: [[V8:%.*]] = add i32 [[SUM0:%.*]], 43
+; CHECK-NEXT: [[SUM2:%.*]] = select i1 [[V4]], i32 [[SUM0]], i32 [[V8]], !prof !16
+; CHECK-NEXT: [[V5:%.*]] = icmp eq i32 [[I0]], [[SUM2]]
+; CHECK-NEXT: [[SUM3:%.*]] = select i1 [[V5]], i32 [[SUM2]], i32 [[V8]], !prof !16
+; CHECK-NEXT: call void @foo()
+; CHECK-NEXT: [[V9:%.*]] = and i32 [[I0]], 4
+; CHECK-NEXT: [[V10:%.*]] = icmp eq i32 [[V9]], 0
+; CHECK-NEXT: br i1 [[V10]], label [[BB3:%.*]], label [[BB2:%.*]]
+; CHECK: bb2:
+; CHECK-NEXT: call void @foo()
+; CHECK-NEXT: br label [[BB3]]
+; CHECK: bb3:
+; CHECK-NEXT: [[V11:%.*]] = add i32 [[I0]], [[SUM3]]
+; CHECK-NEXT: ret i32 [[V11]]
+;
+entry:
+ %i0 = load i32, i32* %i
+ %v0 = icmp eq i32 %z, 0
+ %v1 = icmp ne i32 %z, 1
+ %v2 = select i1 %v1, i1 %pred, i1 true, !prof !15
+ %v3 = and i1 %v0, %v2
+ br i1 %v3, label %bb0, label %bb1, !prof !15
+
+bb0:
+ call void @foo()
+ br label %bb1
+
+bb1:
+ %j0 = load i32, i32* %j
+ %v6 = and i32 %i0, 2
+ %v4 = icmp eq i32 %v6, %j0
+ %v8 = add i32 %sum0, 43
+ %sum2 = select i1 %v4, i32 %sum0, i32 %v8, !prof !15
+ %v5 = icmp eq i32 %i0, %sum2
+ %sum3 = select i1 %v5, i32 %sum2, i32 %v8, !prof !15
+ call void @foo()
+ %v9 = and i32 %i0, 4
+ %v10 = icmp eq i32 %v9, 0
+ br i1 %v10, label %bb3, label %bb2
+
+bb2:
+ call void @foo()
+ br label %bb3
+
+bb3:
+ %v11 = add i32 %i0, %sum3
+ ret i32 %v11
+}
+
+; With an existing phi at the exit but a value (%v40) is both alive and is an
+; operand to a phi at the exit block.
+; Roughly,
+; t0 = *i
+; if ((t0 & 1) != 0) // Likely true
+; foo()
+; v40 = t0 + 44
+; if ((t0 & 2) != 0) // Likely true
+; v41 = t0 + 99
+; foo()
+; }
+; v42 = phi v40, v41
+; return v42 + v40
+; ->
+; t0 = *i
+; if ((t0 & 3) == 3) // Likely true
+; foo()
+; v40 = t0 + 44
+; v41 = t0 + 99
+; foo()
+; } else {
+; if ((t0 & 1) != 0) // Likely true
+; foo()
+; v40_nc = t0 + 44
+; if ((t0 & 2) != 0) // Likely true
+; v41_nc = t0 + 99
+; foo()
+; }
+; }
+; t7 = phi v40, v40_nc
+; v42 = phi v41, v41_nc
+; v43 = v42 + t7
+; return v43
+define i32 @test_chr_16(i32* %i) !prof !14 {
+; CHECK-LABEL: @test_chr_16(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 3
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 3
+; CHECK-NEXT: br i1 [[TMP2]], label [[BB0:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15
+; CHECK: bb0:
+; CHECK-NEXT: call void @foo()
+; CHECK-NEXT: [[V40:%.*]] = add i32 [[TMP0]], 44
+; CHECK-NEXT: [[V41:%.*]] = add i32 [[TMP0]], 99
+; CHECK-NEXT: call void @foo()
+; CHECK-NEXT: br label [[BB3:%.*]]
+; CHECK: entry.split.nonchr:
+; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP0]], 1
+; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 0
+; CHECK-NEXT: br i1 [[TMP4]], label [[BB1_NONCHR:%.*]], label [[BB0_NONCHR:%.*]], !prof !16
+; CHECK: bb0.nonchr:
+; CHECK-NEXT: call void @foo()
+; CHECK-NEXT: br label [[BB1_NONCHR]]
+; CHECK: bb1.nonchr:
+; CHECK-NEXT: [[V40_NONCHR:%.*]] = add i32 [[TMP0]], 44
+; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP0]], 2
+; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP5]], 0
+; CHECK-NEXT: br i1 [[TMP6]], label [[BB3]], label [[BB2_NONCHR:%.*]], !prof !16
+; CHECK: bb2.nonchr:
+; CHECK-NEXT: [[V41_NONCHR:%.*]] = add i32 [[TMP0]], 99
+; CHECK-NEXT: call void @foo()
+; CHECK-NEXT: br label [[BB3]]
+; CHECK: bb3:
+; CHECK-NEXT: [[TMP7:%.*]] = phi i32 [ [[V40]], [[BB0]] ], [ [[V40_NONCHR]], [[BB2_NONCHR]] ], [ [[V40_NONCHR]], [[BB1_NONCHR]] ]
+; CHECK-NEXT: [[V42:%.*]] = phi i32 [ [[V41]], [[BB0]] ], [ [[V41_NONCHR]], [[BB2_NONCHR]] ], [ [[V40_NONCHR]], [[BB1_NONCHR]] ]
+; CHECK-NEXT: [[V43:%.*]] = add i32 [[V42]], [[TMP7]]
+; CHECK-NEXT: ret i32 [[V43]]
+;
+entry:
+ %0 = load i32, i32* %i
+ %1 = and i32 %0, 1
+ %2 = icmp eq i32 %1, 0
+ br i1 %2, label %bb1, label %bb0, !prof !15
+
+bb0:
+ call void @foo()
+ br label %bb1
+
+bb1:
+ %v40 = add i32 %0, 44
+ %3 = and i32 %0, 2
+ %4 = icmp eq i32 %3, 0
+ br i1 %4, label %bb3, label %bb2, !prof !15
+
+bb2:
+ %v41 = add i32 %0, 99
+ call void @foo()
+ br label %bb3
+
+bb3:
+ %v42 = phi i32 [ %v41, %bb2 ], [ %v40, %bb1 ]
+ %v43 = add i32 %v42, %v40
+ ret i32 %v43
+}
+
+; Two consecutive regions have an entry in the middle of them. No CHR happens.
+; Roughly,
+; if ((i & 4) == 0) {
+; if (!j)
+; goto bb1
+; } else {
+; t0 = (i & 1)
+; if (t0 != 0) // Likely true
+; foo()
+; s = (i & 1) + i
+; }
+; bb1:
+; p = phi i, t0, s
+; if ((i & 2) != 0) // Likely true
+; foo()
+; q = p + 2
+; }
+; r = phi p, q, i
+; return r
+; ->
+; (no change)
+define i32 @test_chr_17(i32 %i, i1 %j) !prof !14 {
+; CHECK-LABEL: @test_chr_17(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[V0:%.*]] = and i32 [[I:%.*]], 4
+; CHECK-NEXT: [[V1:%.*]] = icmp eq i32 [[V0]], 0
+; CHECK-NEXT: br i1 [[V1]], label [[BBE:%.*]], label [[BBQ:%.*]]
+; CHECK: bbq:
+; CHECK-NEXT: br i1 [[J:%.*]], label [[BB3:%.*]], label [[BB1:%.*]]
+; CHECK: bbe:
+; CHECK-NEXT: [[TMP0:%.*]] = and i32 [[I]], 1
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0
+; CHECK-NEXT: br i1 [[TMP1]], label [[BB1]], label [[BB0:%.*]], !prof !16
+; CHECK: bb0:
+; CHECK-NEXT: call void @foo()
+; CHECK-NEXT: [[S:%.*]] = add i32 [[TMP0]], [[I]]
+; CHECK-NEXT: br label [[BB1]]
+; CHECK: bb1:
+; CHECK-NEXT: [[P:%.*]] = phi i32 [ [[I]], [[BBQ]] ], [ [[TMP0]], [[BBE]] ], [ [[S]], [[BB0]] ]
+; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[I]], 2
+; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[TMP3]], label [[BB3]], label [[BB2:%.*]], !prof !16
+; CHECK: bb2:
+; CHECK-NEXT: call void @foo()
+; CHECK-NEXT: [[Q:%.*]] = add i32 [[P]], [[TMP2]]
+; CHECK-NEXT: br label [[BB3]]
+; CHECK: bb3:
+; CHECK-NEXT: [[R:%.*]] = phi i32 [ [[P]], [[BB1]] ], [ [[Q]], [[BB2]] ], [ [[I]], [[BBQ]] ]
+; CHECK-NEXT: ret i32 [[R]]
+;
+entry:
+ %v0 = and i32 %i, 4
+ %v1 = icmp eq i32 %v0, 0
+ br i1 %v1, label %bbe, label %bbq
+
+bbq:
+ br i1 %j, label %bb3, label %bb1
+
+bbe:
+ %0 = and i32 %i, 1
+ %1 = icmp eq i32 %0, 0
+ br i1 %1, label %bb1, label %bb0, !prof !15
+
+bb0:
+ call void @foo()
+ %s = add i32 %0, %i
+ br label %bb1
+
+bb1:
+ %p = phi i32 [ %i, %bbq ], [ %0, %bbe ], [ %s, %bb0 ]
+ %2 = and i32 %i, 2
+ %3 = icmp eq i32 %2, 0
+ br i1 %3, label %bb3, label %bb2, !prof !15
+
+bb2:
+ call void @foo()
+ %q = add i32 %p, %2
+ br label %bb3
+
+bb3:
+ %r = phi i32 [ %p, %bb1 ], [ %q, %bb2 ], [ %i, %bbq ]
+ ret i32 %r
+}
+
+; Select + br, there's a loop and we need to update the user of an inserted phi
+; at the entry block. This is a regression test for a bug that's fixed.
+; Roughly,
+; do {
+; inc1 = phi inc2, 0
+; li = *i
+; sum1 = sum0 + 42
+; sum2 = ((li & 1) == 0) ? sum0 : sum1 // Likely false
+; inc2 = inc1 + 1
+; if ((li & 4) != 0) // Likely true
+; sum3 = sum2 + 44
+; sum4 = phi sum1, sum3
+; } while (inc2 != 100) // Likely true (loop back)
+; return sum4
+; ->
+; do {
+; inc1 = phi tmp2, 0 // The first operand needed to be updated
+; li = *i
+; sum1 = sum0 + 42
+; if ((li & 5) == 5) { // Likely true
+; inc2 = inc1 + 1
+; sum3 = sum0 + 86
+; } else {
+; inc2_nc = inc1 + 1
+; if ((li & 4) == 0)
+; sum2_nc = ((li & 1) == 0) ? sum0 : sum1
+; sum3_nc = sum2_nc + 44
+; }
+; tmp2 = phi inc2, in2c_nc
+; sum4 = phi sum3, sum3_nc, sum1
+; } while (tmp2 != 100)
+; return sum4
+define i32 @test_chr_18(i32* %i, i32 %sum0) !prof !14 {
+; CHECK-LABEL: @test_chr_18(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[BB0:%.*]]
+; CHECK: bb0:
+; CHECK-NEXT: [[INC1:%.*]] = phi i32 [ [[TMP2:%.*]], [[BB2:%.*]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[LI:%.*]] = load i32, i32* [[I:%.*]], align 4
+; CHECK-NEXT: [[SUM1:%.*]] = add i32 [[SUM0:%.*]], 42
+; CHECK-NEXT: [[TMP0:%.*]] = and i32 [[LI]], 5
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 5
+; CHECK-NEXT: br i1 [[TMP1]], label [[BB0_SPLIT:%.*]], label [[BB0_SPLIT_NONCHR:%.*]], !prof !15
+; CHECK: bb0.split:
+; CHECK-NEXT: [[INC2:%.*]] = add i32 [[INC1]], 1
+; CHECK-NEXT: [[SUM3:%.*]] = add i32 [[SUM0]], 86
+; CHECK-NEXT: br label [[BB2]]
+; CHECK: bb0.split.nonchr:
+; CHECK-NEXT: [[A4_NONCHR:%.*]] = and i32 [[LI]], 4
+; CHECK-NEXT: [[CMP4_NONCHR:%.*]] = icmp eq i32 [[A4_NONCHR]], 0
+; CHECK-NEXT: [[INC2_NONCHR:%.*]] = add i32 [[INC1]], 1
+; CHECK-NEXT: br i1 [[CMP4_NONCHR]], label [[BB2]], label [[BB1_NONCHR:%.*]], !prof !16
+; CHECK: bb1.nonchr:
+; CHECK-NEXT: [[A1:%.*]] = and i32 [[LI]], 1
+; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[A1]], 0
+; CHECK-NEXT: [[SUM2_NONCHR:%.*]] = select i1 [[CMP1]], i32 [[SUM0]], i32 [[SUM1]], !prof !16
+; CHECK-NEXT: [[SUM3_NONCHR:%.*]] = add i32 [[SUM2_NONCHR]], 44
+; CHECK-NEXT: br label [[BB2]]
+; CHECK: bb2:
+; CHECK-NEXT: [[TMP2]] = phi i32 [ [[INC2]], [[BB0_SPLIT]] ], [ [[INC2_NONCHR]], [[BB1_NONCHR]] ], [ [[INC2_NONCHR]], [[BB0_SPLIT_NONCHR]] ]
+; CHECK-NEXT: [[SUM4:%.*]] = phi i32 [ [[SUM3]], [[BB0_SPLIT]] ], [ [[SUM3_NONCHR]], [[BB1_NONCHR]] ], [ [[SUM1]], [[BB0_SPLIT_NONCHR]] ]
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP2]], 100
+; CHECK-NEXT: br i1 [[CMP]], label [[BB3:%.*]], label [[BB0]], !prof !16
+; CHECK: bb3:
+; CHECK-NEXT: ret i32 [[SUM4]]
+;
+entry:
+ br label %bb0
+
+bb0:
+ %inc1 = phi i32 [ %inc2, %bb2 ], [ 0, %entry ]
+ %li = load i32, i32* %i
+ %a1 = and i32 %li, 1
+ %cmp1 = icmp eq i32 %a1, 0
+ %sum1 = add i32 %sum0, 42
+ %sum2 = select i1 %cmp1, i32 %sum0, i32 %sum1, !prof !15
+ %a4 = and i32 %li, 4
+ %cmp4 = icmp eq i32 %a4, 0
+ %inc2 = add i32 %inc1, 1
+ br i1 %cmp4, label %bb2, label %bb1, !prof !15
+
+bb1:
+ %sum3 = add i32 %sum2, 44
+ br label %bb2
+
+bb2:
+ %sum4 = phi i32 [ %sum1, %bb0 ], [ %sum3, %bb1 ]
+ %cmp = icmp eq i32 %inc2, 100
+ br i1 %cmp, label %bb3, label %bb0, !prof !15
+
+bb3:
+ ret i32 %sum4
+}
+
+
+; Selects + Brs. Those share the condition value, which causes the
+; targets/operands of the branch/select to be flipped.
+; Roughly,
+; t0 = *i
+; if ((t0 & 255) != 0) { // Likely true
+; sum1 = ((t0 & 1) == 0) ? sum0 : (sum0 + 42) // Likely false
+; sum2 = ((t0 & 1) == 0) ? sum1 : (sum1 + 42) // Likely false
+; if ((t0 & 1) != 0) { // Likely true
+; sum3 = sum2 + 44
+; sum4 = ((t0 & 8) == 0) ? sum3 : (sum3 + 44) // Likely false
+; }
+; sum5 = phi sum2, sum4
+; }
+; sum6 = phi sum0, sum5
+; return sum6
+; ->
+; t0 = *i
+; if ((t0 & 9) == 9) { // Likely true
+; tmp3 = sum0 + 85 // Dead
+; tmp4 = sum0 + 173
+; } else {
+; if ((t0 & 255) != 0) {
+; sum2_nc = ((t0 & 1) == 0) ? sum0 : (sum0 + 85)
+; sum4_nc_v = ((t0 & 8) == 0) ? 44 : 88
+; sum4_nc = add sum2_nc + sum4_nc_v
+; }
+; }
+; sum6 = phi tmp4, sum0, sum2_nc, sum4_nc
+; return sum6
+define i32 @test_chr_19(i32* %i, i32 %sum0) !prof !14 {
+; CHECK-LABEL: @test_chr_19(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 9
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 9
+; CHECK-NEXT: br i1 [[TMP2]], label [[BB0:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15
+; CHECK: bb0:
+; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SUM0:%.*]], 85
+; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[SUM0]], 173
+; CHECK-NEXT: br label [[BB3:%.*]]
+; CHECK: entry.split.nonchr:
+; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP0]], 255
+; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP5]], 0
+; CHECK-NEXT: br i1 [[TMP6]], label [[BB3]], label [[BB0_NONCHR:%.*]], !prof !16
+; CHECK: bb0.nonchr:
+; CHECK-NEXT: [[TMP7:%.*]] = and i32 [[TMP0]], 1
+; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0
+; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[SUM0]], 85
+; CHECK-NEXT: [[SUM2_NONCHR:%.*]] = select i1 [[TMP8]], i32 [[SUM0]], i32 [[TMP9]], !prof !16
+; CHECK-NEXT: br i1 [[TMP8]], label [[BB3]], label [[BB1_NONCHR:%.*]], !prof !16
+; CHECK: bb1.nonchr:
+; CHECK-NEXT: [[TMP10:%.*]] = and i32 [[TMP0]], 8
+; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[TMP10]], 0
+; CHECK-NEXT: [[SUM4_NONCHR_V:%.*]] = select i1 [[TMP11]], i32 44, i32 88, !prof !16
+; CHECK-NEXT: [[SUM4_NONCHR:%.*]] = add i32 [[SUM2_NONCHR]], [[SUM4_NONCHR_V]]
+; CHECK-NEXT: br label [[BB3]]
+; CHECK: bb3:
+; CHECK-NEXT: [[SUM6:%.*]] = phi i32 [ [[TMP4]], [[BB0]] ], [ [[SUM0]], [[ENTRY_SPLIT_NONCHR]] ], [ [[SUM2_NONCHR]], [[BB0_NONCHR]] ], [ [[SUM4_NONCHR]], [[BB1_NONCHR]] ]
+; CHECK-NEXT: ret i32 [[SUM6]]
+;
+entry:
+ %0 = load i32, i32* %i
+ %1 = and i32 %0, 255
+ %2 = icmp eq i32 %1, 0
+ br i1 %2, label %bb3, label %bb0, !prof !15
+
+bb0:
+ %3 = and i32 %0, 1
+ %4 = icmp eq i32 %3, 0
+ %5 = add i32 %sum0, 42
+ %sum1 = select i1 %4, i32 %sum0, i32 %5, !prof !15
+ %6 = add i32 %sum1, 43
+ %sum2 = select i1 %4, i32 %sum1, i32 %6, !prof !15
+ br i1 %4, label %bb2, label %bb1, !prof !15
+
+bb1:
+ %sum3 = add i32 %sum2, 44
+ %7 = and i32 %0, 8
+ %8 = icmp eq i32 %7, 0
+ %9 = add i32 %sum3, 44
+ %sum4 = select i1 %8, i32 %sum3, i32 %9, !prof !15
+ br label %bb2
+
+bb2:
+ %sum5 = phi i32 [ %sum2, %bb0 ], [ %sum4, %bb1 ]
+ br label %bb3
+
+bb3:
+ %sum6 = phi i32 [ %sum0, %entry ], [ %sum5, %bb2 ]
+ ret i32 %sum6
+}
+
+; Selects. The exit block, which belongs to the top-level region, has a select
+; and causes the top-level region to be the outermost CHR scope with the
+; subscope that includes the entry block with two selects. The outermost CHR
+; scope doesn't see the selects in the entry block as the entry block is in the
+; subscope and incorrectly sets the CHR hoist point to the branch rather than
+; the first select in the entry block and causes the CHR'ed selects ("select i1
+; false...") to incorrectly position above the CHR branch. This is testing
+; against a quirk of how the region analysis handles the entry block.
+; Roughly,
+; i0 = *i
+; sum2 = ((i0 & 2) == 0) ? sum0 : (sum0 + 43) // Likely false
+; sum3 = ((i0 & 4) == 0) ? sum2 : (sum2 + 44) // Likely false
+; if (j)
+; foo()
+; i5 = *i
+; v13 = (i5 == 44) ? i5 : sum3
+; return v13
+; ->
+; i0 = *i
+; if ((i0 & 6) != 6) { // Likely true
+; v9 = sum0 + 87
+; if (j)
+; foo()
+; } else {
+; sum2.nc = ((i0 & 2) == 0) ? sum0 : (sum0 + 43)
+; sum3.nc = ((i0 & 4) == 0) ? sum2.nc : (sum2.nc + 44)
+; if (j)
+; foo()
+; }
+; t2 = phi v9, sum3.nc
+; i5 = *i
+; v13 = (i5 == 44) ? 44 : t2
+; return v13
+define i32 @test_chr_20(i32* %i, i32 %sum0, i1 %j) !prof !14 {
+; CHECK-LABEL: @test_chr_20(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[I0:%.*]] = load i32, i32* [[I:%.*]], align 4
+; CHECK-NEXT: [[TMP0:%.*]] = and i32 [[I0]], 6
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 6
+; CHECK-NEXT: br i1 [[TMP1]], label [[ENTRY_SPLIT:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15
+; CHECK: entry.split:
+; CHECK-NEXT: [[V9:%.*]] = add i32 [[SUM0:%.*]], 87
+; CHECK-NEXT: br i1 [[J:%.*]], label [[BB1:%.*]], label [[BB4:%.*]]
+; CHECK: bb1:
+; CHECK-NEXT: call void @foo()
+; CHECK-NEXT: br label [[BB4]]
+; CHECK: entry.split.nonchr:
+; CHECK-NEXT: [[V8:%.*]] = add i32 [[SUM0]], 43
+; CHECK-NEXT: [[V3:%.*]] = and i32 [[I0]], 2
+; CHECK-NEXT: [[V4:%.*]] = icmp eq i32 [[V3]], 0
+; CHECK-NEXT: [[SUM2_NONCHR:%.*]] = select i1 [[V4]], i32 [[SUM0]], i32 [[V8]], !prof !16
+; CHECK-NEXT: [[V6_NONCHR:%.*]] = and i32 [[I0]], 4
+; CHECK-NEXT: [[V5_NONCHR:%.*]] = icmp eq i32 [[V6_NONCHR]], 0
+; CHECK-NEXT: [[V9_NONCHR:%.*]] = add i32 [[SUM2_NONCHR]], 44
+; CHECK-NEXT: [[SUM3_NONCHR:%.*]] = select i1 [[V5_NONCHR]], i32 [[SUM2_NONCHR]], i32 [[V9_NONCHR]], !prof !16
+; CHECK-NEXT: br i1 [[J]], label [[BB1_NONCHR:%.*]], label [[BB4]]
+; CHECK: bb1.nonchr:
+; CHECK-NEXT: call void @foo()
+; CHECK-NEXT: br label [[BB4]]
+; CHECK: bb4:
+; CHECK-NEXT: [[TMP2:%.*]] = phi i32 [ [[V9]], [[BB1]] ], [ [[V9]], [[ENTRY_SPLIT]] ], [ [[SUM3_NONCHR]], [[BB1_NONCHR]] ], [ [[SUM3_NONCHR]], [[ENTRY_SPLIT_NONCHR]] ]
+; CHECK-NEXT: [[I5:%.*]] = load i32, i32* [[I]], align 4
+; CHECK-NEXT: [[V12:%.*]] = icmp eq i32 [[I5]], 44
+; CHECK-NEXT: [[V13:%.*]] = select i1 [[V12]], i32 44, i32 [[TMP2]], !prof !16
+; CHECK-NEXT: ret i32 [[V13]]
+;
+entry:
+ %i0 = load i32, i32* %i
+ %v3 = and i32 %i0, 2
+ %v4 = icmp eq i32 %v3, 0
+ %v8 = add i32 %sum0, 43
+ %sum2 = select i1 %v4, i32 %sum0, i32 %v8, !prof !15
+ %v6 = and i32 %i0, 4
+ %v5 = icmp eq i32 %v6, 0
+ %v9 = add i32 %sum2, 44
+ %sum3 = select i1 %v5, i32 %sum2, i32 %v9, !prof !15
+ br i1 %j, label %bb1, label %bb4
+
+bb1:
+ call void @foo()
+ br label %bb4
+
+bb4:
+ %i5 = load i32, i32* %i
+ %v12 = icmp eq i32 %i5, 44
+ %v13 = select i1 %v12, i32 %i5, i32 %sum3, !prof !15
+ ret i32 %v13
+}
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"ProfileSummary", !1}
+!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
+!2 = !{!"ProfileFormat", !"InstrProf"}
+!3 = !{!"TotalCount", i64 10000}
+!4 = !{!"MaxCount", i64 10}
+!5 = !{!"MaxInternalCount", i64 1}
+!6 = !{!"MaxFunctionCount", i64 1000}
+!7 = !{!"NumCounts", i64 3}
+!8 = !{!"NumFunctions", i64 3}
+!9 = !{!"DetailedSummary", !10}
+!10 = !{!11, !12, !13}
+!11 = !{i32 10000, i64 100, i32 1}
+!12 = !{i32 999000, i64 100, i32 1}
+!13 = !{i32 999999, i64 1, i32 2}
+
+!14 = !{!"function_entry_count", i64 100}
+!15 = !{!"branch_weights", i32 0, i32 1}
+!16 = !{!"branch_weights", i32 1, i32 1}
+; CHECK: !15 = !{!"branch_weights", i32 1000, i32 0}
+; CHECK: !16 = !{!"branch_weights", i32 0, i32 1}
+; CHECK: !17 = !{!"branch_weights", i32 1, i32 1}
+; CHECK: !18 = !{!"branch_weights", i32 1, i32 0}
+; CHECK: !19 = !{!"branch_weights", i32 0, i32 1000}
More information about the llvm-commits
mailing list