[llvm] r341386 - [PGO] Control Height Reduction

Hiroshi Yamauchi via llvm-commits llvm-commits at lists.llvm.org
Tue Sep 4 10:19:13 PDT 2018


Author: yamauchi
Date: Tue Sep  4 10:19:13 2018
New Revision: 341386

URL: http://llvm.org/viewvc/llvm-project?rev=341386&view=rev
Log:
[PGO] Control Height Reduction

Summary:
Control height reduction merges conditional blocks of code and reduces the
number of conditional branches in the hot path based on profiles.

if (hot_cond1) { // Likely true.
  do_stg_hot1();
}
if (hot_cond2) { // Likely true.
  do_stg_hot2();
}

->

if (hot_cond1 && hot_cond2) { // Hot path.
  do_stg_hot1();
  do_stg_hot2();
} else { // Cold path.
  if (hot_cond1) {
    do_stg_hot1();
  }
  if (hot_cond2) {
    do_stg_hot2();
  }
}

This speeds up some internal benchmarks up to ~30%.

Reviewers: davidxl

Reviewed By: davidxl

Subscribers: xbolva00, dmgreen, mehdi_amini, llvm-commits, mgorny

Differential Revision: https://reviews.llvm.org/D50591

Added:
    llvm/trunk/include/llvm/Transforms/Instrumentation/ControlHeightReduction.h
    llvm/trunk/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
    llvm/trunk/test/Transforms/PGOProfile/chr.ll
Modified:
    llvm/trunk/include/llvm/InitializePasses.h
    llvm/trunk/include/llvm/LinkAllPasses.h
    llvm/trunk/include/llvm/Transforms/Utils.h
    llvm/trunk/lib/Passes/PassBuilder.cpp
    llvm/trunk/lib/Passes/PassRegistry.def
    llvm/trunk/lib/Transforms/IPO/PassManagerBuilder.cpp
    llvm/trunk/lib/Transforms/Instrumentation/CMakeLists.txt
    llvm/trunk/lib/Transforms/Instrumentation/Instrumentation.cpp

Modified: llvm/trunk/include/llvm/InitializePasses.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/InitializePasses.h?rev=341386&r1=341385&r2=341386&view=diff
==============================================================================
--- llvm/trunk/include/llvm/InitializePasses.h (original)
+++ llvm/trunk/include/llvm/InitializePasses.h Tue Sep  4 10:19:13 2018
@@ -103,6 +103,7 @@ void initializeCodeGenPreparePass(PassRe
 void initializeConstantHoistingLegacyPassPass(PassRegistry&);
 void initializeConstantMergeLegacyPassPass(PassRegistry&);
 void initializeConstantPropagationPass(PassRegistry&);
+void initializeControlHeightReductionLegacyPassPass(PassRegistry&);
 void initializeCorrelatedValuePropagationPass(PassRegistry&);
 void initializeCostModelAnalysisPass(PassRegistry&);
 void initializeCrossDSOCFIPass(PassRegistry&);

Modified: llvm/trunk/include/llvm/LinkAllPasses.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/LinkAllPasses.h?rev=341386&r1=341385&r2=341386&view=diff
==============================================================================
--- llvm/trunk/include/llvm/LinkAllPasses.h (original)
+++ llvm/trunk/include/llvm/LinkAllPasses.h Tue Sep  4 10:19:13 2018
@@ -88,6 +88,7 @@ namespace {
       (void) llvm::createCalledValuePropagationPass();
       (void) llvm::createConstantMergePass();
       (void) llvm::createConstantPropagationPass();
+      (void) llvm::createControlHeightReductionLegacyPass();
       (void) llvm::createCostModelAnalysisPass();
       (void) llvm::createDeadArgEliminationPass();
       (void) llvm::createDeadCodeEliminationPass();

Added: llvm/trunk/include/llvm/Transforms/Instrumentation/ControlHeightReduction.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Transforms/Instrumentation/ControlHeightReduction.h?rev=341386&view=auto
==============================================================================
--- llvm/trunk/include/llvm/Transforms/Instrumentation/ControlHeightReduction.h (added)
+++ llvm/trunk/include/llvm/Transforms/Instrumentation/ControlHeightReduction.h Tue Sep  4 10:19:13 2018
@@ -0,0 +1,32 @@
+//===- ControlHeightReduction.h - Control Height Reduction ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass merges conditional blocks of code and reduces the number of
+// conditional branches in the hot paths based on profiles.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_CONTROLHEIGHTREDUCTION_H
+#define LLVM_TRANSFORMS_INSTRUMENTATION_CONTROLHEIGHTREDUCTION_H
+
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/Transforms/Scalar/LoopPassManager.h"
+
+namespace llvm {
+
+class ControlHeightReductionPass :
+      public PassInfoMixin<ControlHeightReductionPass> {
+public:
+  ControlHeightReductionPass();
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM);
+};
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_INSTRUMENTATION_CONTROLHEIGHTREDUCTION_H

Modified: llvm/trunk/include/llvm/Transforms/Utils.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Transforms/Utils.h?rev=341386&r1=341385&r2=341386&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Transforms/Utils.h (original)
+++ llvm/trunk/include/llvm/Transforms/Utils.h Tue Sep  4 10:19:13 2018
@@ -113,6 +113,13 @@ extern char &LoopSimplifyID;
 /// This function returns a new pass that downgrades the debug info in the
 /// module to line tables only.
 ModulePass *createStripNonLineTableDebugInfoPass();
+
+//===----------------------------------------------------------------------===//
+//
+// ControlHeightReudction - Merges conditional blocks of code and reduces the
+// number of conditional branches in the hot paths based on profiles.
+//
+FunctionPass *createControlHeightReductionLegacyPass();
 }
 
 #endif

Modified: llvm/trunk/lib/Passes/PassBuilder.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Passes/PassBuilder.cpp?rev=341386&r1=341385&r2=341386&view=diff
==============================================================================
--- llvm/trunk/lib/Passes/PassBuilder.cpp (original)
+++ llvm/trunk/lib/Passes/PassBuilder.cpp Tue Sep  4 10:19:13 2018
@@ -87,6 +87,7 @@
 #include "llvm/Transforms/IPO/WholeProgramDevirt.h"
 #include "llvm/Transforms/InstCombine/InstCombine.h"
 #include "llvm/Transforms/Instrumentation/BoundsChecking.h"
+#include "llvm/Transforms/Instrumentation/ControlHeightReduction.h"
 #include "llvm/Transforms/Instrumentation/GCOVProfiler.h"
 #include "llvm/Transforms/Instrumentation/InstrProfiling.h"
 #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
@@ -193,6 +194,10 @@ static cl::opt<bool> EnableSyntheticCoun
 static Regex DefaultAliasRegex(
     "^(default|thinlto-pre-link|thinlto|lto-pre-link|lto)<(O[0123sz])>$");
 
+static cl::opt<bool>
+    EnableCHR("enable-chr-npm", cl::init(true), cl::Hidden,
+              cl::desc("Enable control height reduction optimization (CHR)"));
+
 static bool isOptimizingForSize(PassBuilder::OptimizationLevel Level) {
   switch (Level) {
   case PassBuilder::O0:
@@ -486,6 +491,10 @@ PassBuilder::buildFunctionSimplification
   FPM.addPass(InstCombinePass());
   invokePeepholeEPCallbacks(FPM, Level);
 
+  if (EnableCHR && Level == O3 && PGOOpt &&
+      (!PGOOpt->ProfileUseFile.empty() || !PGOOpt->SampleProfileFile.empty()))
+    FPM.addPass(ControlHeightReductionPass());
+
   return FPM;
 }
 

Modified: llvm/trunk/lib/Passes/PassRegistry.def
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Passes/PassRegistry.def?rev=341386&r1=341385&r2=341386&view=diff
==============================================================================
--- llvm/trunk/lib/Passes/PassRegistry.def (original)
+++ llvm/trunk/lib/Passes/PassRegistry.def Tue Sep  4 10:19:13 2018
@@ -148,6 +148,7 @@ FUNCTION_PASS("bounds-checking", BoundsC
 FUNCTION_PASS("break-crit-edges", BreakCriticalEdgesPass())
 FUNCTION_PASS("callsite-splitting", CallSiteSplittingPass())
 FUNCTION_PASS("consthoist", ConstantHoistingPass())
+FUNCTION_PASS("chr", ControlHeightReductionPass())
 FUNCTION_PASS("correlated-propagation", CorrelatedValuePropagationPass())
 FUNCTION_PASS("dce", DCEPass())
 FUNCTION_PASS("div-rem-pairs", DivRemPairsPass())

Modified: llvm/trunk/lib/Transforms/IPO/PassManagerBuilder.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/IPO/PassManagerBuilder.cpp?rev=341386&r1=341385&r2=341386&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/IPO/PassManagerBuilder.cpp (original)
+++ llvm/trunk/lib/Transforms/IPO/PassManagerBuilder.cpp Tue Sep  4 10:19:13 2018
@@ -152,6 +152,10 @@ static cl::opt<bool> EnableGVNSink(
     "enable-gvn-sink", cl::init(false), cl::Hidden,
     cl::desc("Enable the GVN sinking pass (default = off)"));
 
+static cl::opt<bool>
+    EnableCHR("enable-chr", cl::init(true), cl::Hidden,
+              cl::desc("Enable control height reduction optimization (CHR)"));
+
 PassManagerBuilder::PassManagerBuilder() {
     OptLevel = 2;
     SizeLevel = 0;
@@ -411,6 +415,10 @@ void PassManagerBuilder::addFunctionSimp
   // Clean up after everything.
   addInstructionCombiningPass(MPM);
   addExtensionsToPM(EP_Peephole, MPM);
+
+  if (EnableCHR && OptLevel >= 3 &&
+      (!PGOInstrUse.empty() || !PGOSampleUse.empty()))
+    MPM.add(createControlHeightReductionLegacyPass());
 }
 
 void PassManagerBuilder::populateModulePassManager(

Modified: llvm/trunk/lib/Transforms/Instrumentation/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Instrumentation/CMakeLists.txt?rev=341386&r1=341385&r2=341386&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Instrumentation/CMakeLists.txt (original)
+++ llvm/trunk/lib/Transforms/Instrumentation/CMakeLists.txt Tue Sep  4 10:19:13 2018
@@ -2,6 +2,7 @@ add_llvm_library(LLVMInstrumentation
   AddressSanitizer.cpp
   BoundsChecking.cpp
   CGProfile.cpp
+  ControlHeightReduction.cpp
   DataFlowSanitizer.cpp
   GCOVProfiling.cpp
   MemorySanitizer.cpp

Added: llvm/trunk/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Instrumentation/ControlHeightReduction.cpp?rev=341386&view=auto
==============================================================================
--- llvm/trunk/lib/Transforms/Instrumentation/ControlHeightReduction.cpp (added)
+++ llvm/trunk/lib/Transforms/Instrumentation/ControlHeightReduction.cpp Tue Sep  4 10:19:13 2018
@@ -0,0 +1,2010 @@
+//===-- ControlHeightReduction.cpp - Control Height Reduction -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass merges conditional blocks of code and reduces the number of
+// conditional branches in the hot paths based on profiles.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Instrumentation/ControlHeightReduction.h"
+#include "llvm/Transforms/Utils.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Analysis/RegionInfo.h"
+#include "llvm/Analysis/RegionIterator.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Transforms/Scalar.h"
+
+#include <cxxabi.h>
+#include <set>
+#include <sstream>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "chr"
+
+#define CHR_DEBUG(X) LLVM_DEBUG(X)
+
+static cl::opt<bool> ForceCHR("force-chr", cl::init(false), cl::Hidden,
+                              cl::desc("Apply CHR for all functions"));
+
+static cl::opt<double> CHRBiasThreshold(
+    "chr-bias-threshold", cl::init(0.99), cl::Hidden,
+    cl::desc("CHR considers a branch bias greater than this ratio as biased"));
+
+static cl::opt<unsigned> CHRMergeThreshold(
+    "chr-merge-threshold", cl::init(2), cl::Hidden,
+    cl::desc("CHR merges a group of N branches/selects where N >= this value"));
+
+static cl::opt<std::string> CHRModuleList(
+    "chr-module-list", cl::init(""), cl::Hidden,
+    cl::desc("Specify file to retrieve the list of modules to apply CHR to"));
+
+static cl::opt<std::string> CHRFunctionList(
+    "chr-function-list", cl::init(""), cl::Hidden,
+    cl::desc("Specify file to retrieve the list of functions to apply CHR to"));
+
+static StringSet<> CHRModules;
+static StringSet<> CHRFunctions;
+
+static void ParseCHRFilterFiles() {
+  if (!CHRModuleList.empty()) {
+    auto FileOrErr = MemoryBuffer::getFile(CHRModuleList);
+    if (!FileOrErr) {
+      errs() << "Error: Couldn't read the chr-module-list file " << CHRModuleList << "\n";
+      std::exit(1);
+    }
+    StringRef Buf = FileOrErr->get()->getBuffer();
+    SmallVector<StringRef, 0> Lines;
+    Buf.split(Lines, '\n');
+    for (StringRef Line : Lines) {
+      Line = Line.trim();
+      if (!Line.empty())
+        CHRModules.insert(Line);
+    }
+  }
+  if (!CHRFunctionList.empty()) {
+    auto FileOrErr = MemoryBuffer::getFile(CHRFunctionList);
+    if (!FileOrErr) {
+      errs() << "Error: Couldn't read the chr-function-list file " << CHRFunctionList << "\n";
+      std::exit(1);
+    }
+    StringRef Buf = FileOrErr->get()->getBuffer();
+    SmallVector<StringRef, 0> Lines;
+    Buf.split(Lines, '\n');
+    for (StringRef Line : Lines) {
+      Line = Line.trim();
+      if (!Line.empty())
+        CHRFunctions.insert(Line);
+    }
+  }
+}
+
+namespace {
+class ControlHeightReductionLegacyPass : public FunctionPass {
+public:
+  static char ID;
+
+  ControlHeightReductionLegacyPass() : FunctionPass(ID) {
+    initializeControlHeightReductionLegacyPassPass(
+        *PassRegistry::getPassRegistry());
+    ParseCHRFilterFiles();
+  }
+
+  bool runOnFunction(Function &F) override;
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<BlockFrequencyInfoWrapperPass>();
+    AU.addRequired<DominatorTreeWrapperPass>();
+    AU.addRequired<ProfileSummaryInfoWrapperPass>();
+    AU.addRequired<RegionInfoPass>();
+    AU.addPreserved<GlobalsAAWrapperPass>();
+  }
+};
+} // end anonymous namespace
+
+char ControlHeightReductionLegacyPass::ID = 0;
+
+INITIALIZE_PASS_BEGIN(ControlHeightReductionLegacyPass,
+                      "chr",
+                      "Reduce control height in the hot paths",
+                      false, false)
+INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(RegionInfoPass)
+INITIALIZE_PASS_END(ControlHeightReductionLegacyPass,
+                    "chr",
+                    "Reduce control height in the hot paths",
+                    false, false)
+
+FunctionPass *llvm::createControlHeightReductionLegacyPass() {
+  return new ControlHeightReductionLegacyPass();
+}
+
+namespace {
+
+struct CHRStats {
+  CHRStats() : NumBranches(0), NumBranchesDelta(0),
+               WeightedNumBranchesDelta(0) {}
+  void print(raw_ostream &OS) const {
+    OS << "CHRStats: NumBranches " << NumBranches
+       << " NumBranchesDelta " << NumBranchesDelta
+       << " WeightedNumBranchesDelta " << WeightedNumBranchesDelta;
+  }
+  uint64_t NumBranches;       // The original number of conditional branches /
+                              // selects
+  uint64_t NumBranchesDelta;  // The decrease of the number of conditional
+                              // branches / selects in the hot paths due to CHR.
+  uint64_t WeightedNumBranchesDelta; // NumBranchesDelta weighted by the profile
+                                     // count at the scope entry.
+};
+
+inline raw_ostream &operator<<(raw_ostream &OS, const CHRStats &Stats) {
+  Stats.print(OS);
+  return OS;
+}
+
+// RegInfo - some properties of a Region.
+struct RegInfo {
+  RegInfo() : R(nullptr), HasBranch(false) {}
+  RegInfo(Region *RegionIn) : R(RegionIn), HasBranch(false) {}
+  Region *R;
+  bool HasBranch;
+  SmallVector<SelectInst *, 8> Selects;
+};
+
+typedef DenseMap<Region *, DenseSet<Instruction *>> HoistStopMapTy;
+
+// CHRScope - a sequence of regions to CHR together. It corresponds to a
+// sequence of conditional blocks. It can have subscopes which correspond to
+// nested conditional blocks. Nested CHRScopes form a tree.
+class CHRScope {
+ public:
+  CHRScope(RegInfo RI) : BranchInsertPoint(nullptr) {
+    assert(RI.R && "Null RegionIn");
+    RegInfos.push_back(RI);
+  }
+
+  Region *getParentRegion() {
+    assert(RegInfos.size() > 0 && "Empty CHRScope");
+    Region *Parent = RegInfos[0].R->getParent();
+    assert(Parent && "Unexpected to call this on the top-level region");
+    return Parent;
+  }
+
+  BasicBlock *getEntryBlock() {
+    assert(RegInfos.size() > 0 && "Empty CHRScope");
+    return RegInfos.front().R->getEntry();
+  }
+
+  BasicBlock *getExitBlock() {
+    assert(RegInfos.size() > 0 && "Empty CHRScope");
+    return RegInfos.back().R->getExit();
+  }
+
+  bool appendable(CHRScope *Next) {
+    // The next scope is appendable only if this scope is directly connected to
+    // it (which implies it post-dominates this scope) and this scope dominates
+    // it (no edge to the next scope outside this scope).
+    BasicBlock *NextEntry = Next->getEntryBlock();
+    if (getExitBlock() != NextEntry)
+      // Not directly connected.
+      return false;
+    Region *LastRegion = RegInfos.back().R;
+    for (BasicBlock *Pred : predecessors(NextEntry))
+      if (!LastRegion->contains(Pred))
+        // There's an edge going into the entry of the next scope from outside
+        // of this scope.
+        return false;
+    return true;
+  }
+
+  void append(CHRScope *Next) {
+    assert(RegInfos.size() > 0 && "Empty CHRScope");
+    assert(Next->RegInfos.size() > 0 && "Empty CHRScope");
+    assert(getParentRegion() == Next->getParentRegion() &&
+           "Must be siblings");
+    assert(getExitBlock() == Next->getEntryBlock() &&
+           "Must be adjacent");
+    for (RegInfo &RI : Next->RegInfos)
+      RegInfos.push_back(RI);
+    for (CHRScope *Sub : Next->Subs)
+      Subs.push_back(Sub);
+  }
+
+  void addSub(CHRScope *SubIn) {
+#ifndef NDEBUG
+    bool is_child = false;
+    for (RegInfo &RI : RegInfos)
+      if (RI.R == SubIn->getParentRegion()) {
+        is_child = true;
+        break;
+      }
+    assert(is_child && "Must be a child");
+#endif
+    Subs.push_back(SubIn);
+  }
+
+  // Split this scope at the boundary region into two, which will belong to the
+  // tail and returns the tail.
+  CHRScope *split(Region *Boundary) {
+    assert(Boundary && "Boundary null");
+    assert(RegInfos.begin()->R != Boundary &&
+           "Can't be split at beginning");
+    auto BoundaryIt = std::find_if(RegInfos.begin(), RegInfos.end(),
+                                   [&Boundary](const RegInfo& RI) {
+                                     return Boundary == RI.R;
+                                   });
+    if (BoundaryIt == RegInfos.end())
+      return nullptr;
+    SmallVector<RegInfo, 8> TailRegInfos;
+    SmallVector<CHRScope *, 8> TailSubs;
+    TailRegInfos.insert(TailRegInfos.begin(), BoundaryIt, RegInfos.end());
+    RegInfos.resize(BoundaryIt - RegInfos.begin());
+    DenseSet<Region *> TailRegionSet;
+    for (RegInfo &RI : TailRegInfos)
+      TailRegionSet.insert(RI.R);
+    for (auto It = Subs.begin(); It != Subs.end(); ) {
+      CHRScope *Sub = *It;
+      assert(Sub && "null Sub");
+      Region *Parent = Sub->getParentRegion();
+      if (TailRegionSet.count(Parent)) {
+        TailSubs.push_back(Sub);
+        It = Subs.erase(It);
+      } else {
+        assert(std::find_if(RegInfos.begin(), RegInfos.end(),
+                            [&Parent](const RegInfo& RI) {
+                              return Parent == RI.R;
+                            }) != RegInfos.end() &&
+               "Must be in head");
+        ++It;
+      }
+    }
+    assert(HoistStopMap.empty() && "MapHoistStops must be empty");
+    return new CHRScope(TailRegInfos, TailSubs);
+  }
+
+  bool contains(Instruction *I) const {
+    BasicBlock *Parent = I->getParent();
+    for (const RegInfo &RI : RegInfos)
+      if (RI.R->contains(Parent))
+        return true;
+    return false;
+  }
+
+  void print(raw_ostream &OS) const;
+
+  SmallVector<RegInfo, 8> RegInfos; // Regions that belong to this scope
+  SmallVector<CHRScope *, 8> Subs;  // Subscopes.
+
+  // The instruction at which to insert the CHR conditional branch (and hoist
+  // the dependent condition values).
+  Instruction *BranchInsertPoint;
+
+  // True-biased and false-biased regions (conditional blocks),
+  // respectively. Used only for the outermost scope and includes regions in
+  // subscopes. The rest are unbiased.
+  DenseSet<Region *> TrueBiasedRegions;
+  DenseSet<Region *> FalseBiasedRegions;
+  // Among the biased regions, the regions that get CHRed.
+  SmallVector<RegInfo, 8> CHRRegions;
+
+  // True-biased and false-biased selects, respectively. Used only for the
+  // outermost scope and includes ones in subscopes.
+  DenseSet<SelectInst *> TrueBiasedSelects;
+  DenseSet<SelectInst *> FalseBiasedSelects;
+
+  // Map from one of the above regions to the instructions to stop
+  // hoisting instructions at through use-def chains.
+  HoistStopMapTy HoistStopMap;
+
+ private:
+  CHRScope(SmallVector<RegInfo, 8> &RegInfosIn,
+           SmallVector<CHRScope *, 8> &SubsIn)
+    : RegInfos(RegInfosIn), Subs(SubsIn), BranchInsertPoint(nullptr) {}
+};
+
+inline raw_ostream &operator<<(raw_ostream &OS, const CHRScope &Scope) {
+  Scope.print(OS);
+  return OS;
+}
+
+class CHR {
+ public:
+  CHR(Function &Fin, BlockFrequencyInfo &BFIin, DominatorTree &DTin,
+      ProfileSummaryInfo &PSIin, RegionInfo &RIin)
+      : F(Fin), BFI(BFIin), DT(DTin), PSI(PSIin), RI(RIin) {}
+
+  ~CHR() {
+    for (CHRScope *Scope : Scopes) {
+      delete Scope;
+    }
+  }
+
+  bool run();
+
+ private:
+  // See the comments in CHR::run() for the high level flow of the algorithm and
+  // what the following functions do.
+
+  void findScopes(SmallVectorImpl<CHRScope *> &Output) {
+    Region *R = RI.getTopLevelRegion();
+    CHRScope *Scope = findScopes(R, nullptr, nullptr, Output);
+    if (Scope) {
+      Output.push_back(Scope);
+    }
+  }
+  CHRScope *findScopes(Region *R, Region *NextRegion, Region *ParentRegion,
+                        SmallVectorImpl<CHRScope *> &Scopes);
+  CHRScope *findScope(Region *R);
+  void checkScopeHoistable(CHRScope *Scope);
+
+  void splitScopes(SmallVectorImpl<CHRScope *> &Input,
+                   SmallVectorImpl<CHRScope *> &Output);
+  SmallVector<CHRScope *, 8> splitScope(CHRScope *Scope,
+                                        CHRScope *Outer,
+                                        DenseSet<Value *> *OuterConditionValues,
+                                        Instruction *OuterInsertPoint,
+                                        SmallVectorImpl<CHRScope *> &Output,
+                                        DenseSet<Instruction *> &Unhoistables);
+
+  void classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes);
+  void classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope);
+
+  void filterScopes(SmallVectorImpl<CHRScope *> &Input,
+                    SmallVectorImpl<CHRScope *> &Output);
+
+  void setCHRRegions(SmallVectorImpl<CHRScope *> &Input,
+                     SmallVectorImpl<CHRScope *> &Output);
+  void setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope);
+
+  void sortScopes(SmallVectorImpl<CHRScope *> &Input,
+                  SmallVectorImpl<CHRScope *> &Output);
+
+  void transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes);
+  void transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs);
+  void cloneScopeBlocks(CHRScope *Scope,
+                        BasicBlock *PreEntryBlock,
+                        BasicBlock *ExitBlock,
+                        Region *LastRegion,
+                        ValueToValueMapTy &VMap);
+  BranchInst *createMergedBranch(BasicBlock *PreEntryBlock,
+                                 BasicBlock *EntryBlock,
+                                 BasicBlock *NewEntryBlock,
+                                 ValueToValueMapTy &VMap);
+  void fixupBranchesAndSelects(CHRScope *Scope,
+                               BasicBlock *PreEntryBlock,
+                               BranchInst *MergedBR,
+                               uint64_t ProfileCount);
+  void fixupBranch(Region *R,
+                   CHRScope *Scope,
+                   IRBuilder<> &IRB,
+                   Value *&MergedCondition, BranchProbability &CHRBranchBias);
+  void fixupSelect(SelectInst* SI,
+                   CHRScope *Scope,
+                   IRBuilder<> &IRB,
+                   Value *&MergedCondition, BranchProbability &CHRBranchBias);
+  void addToMergedCondition(bool IsTrueBiased, Value *Cond,
+                            Instruction *BranchOrSelect,
+                            CHRScope *Scope,
+                            IRBuilder<> &IRB,
+                            Value *&MergedCondition);
+
+  Function &F;
+  BlockFrequencyInfo &BFI;
+  DominatorTree &DT;
+  ProfileSummaryInfo &PSI;
+  RegionInfo &RI;
+  CHRStats Stats;
+
+  // All the true-biased regions in the function
+  DenseSet<Region *> TrueBiasedRegionsGlobal;
+  // All the false-biased regions in the function
+  DenseSet<Region *> FalseBiasedRegionsGlobal;
+  // All the true-biased selects in the function
+  DenseSet<SelectInst *> TrueBiasedSelectsGlobal;
+  // All the false-biased selects in the function
+  DenseSet<SelectInst *> FalseBiasedSelectsGlobal;
+  // A map from biased regions to their branch bias
+  DenseMap<Region *, BranchProbability> BranchBiasMap;
+  // A map from biased selects to their branch bias
+  DenseMap<SelectInst *, BranchProbability> SelectBiasMap;
+  // All the scopes.
+  DenseSet<CHRScope *> Scopes;
+};
+
+} // end anonymous namespace
+
+static bool shouldApply(Function &F, ProfileSummaryInfo& PSI) {
+  if (ForceCHR)
+    return true;
+
+  if (!CHRModuleList.empty() || !CHRFunctionList.empty()) {
+    if (CHRModules.count(F.getParent()->getName()))
+      return true;
+    StringRef Name = F.getName();
+    if (CHRFunctions.count(Name))
+      return true;
+    const char* DemangledName = nullptr;
+    int Status = -1;
+    DemangledName = abi::__cxa_demangle(Name.str().c_str(),
+                                        nullptr, nullptr, &Status);
+    return DemangledName && CHRFunctions.count(DemangledName);
+  }
+
+  assert(PSI.hasProfileSummary() && "Empty PSI?");
+  return PSI.isFunctionEntryHot(&F);
+}
+
+static void dumpIR(Function &F, const char *Label, CHRStats *Stats) {
+  std::string Name = F.getName().str();
+  const char *DemangledName = nullptr;
+  int Status = -1;
+  DemangledName = abi::__cxa_demangle(Name.c_str(),
+                                      nullptr, nullptr, &Status);
+  if (DemangledName == nullptr) {
+    DemangledName = "<NOT-MANGLED>";
+  }
+  std::string ModuleName = F.getParent()->getName().str();
+  CHR_DEBUG(dbgs() << "CHR IR dump " << Label << " " << ModuleName << " "
+            << Name);
+  if (Stats)
+    CHR_DEBUG(dbgs() << " " << *Stats);
+  CHR_DEBUG(dbgs() << "\n");
+  CHR_DEBUG(F.dump());
+}
+
+
+void CHRScope::print(raw_ostream &OS) const {
+  assert(RegInfos.size() > 0 && "Empty CHRScope");
+  OS << "CHRScope[";
+  OS << RegInfos.size() << ", Regions[";
+  for (const RegInfo &RI : RegInfos) {
+    OS << RI.R->getNameStr();
+    if (RI.HasBranch)
+      OS << " B";
+    if (RI.Selects.size() > 0)
+      OS << " S" << RI.Selects.size();
+    OS << ", ";
+  }
+  if (RegInfos[0].R->getParent()) {
+    OS << "], Parent " << RegInfos[0].R->getParent()->getNameStr();
+  } else {
+    // top level region
+    OS << "]";
+  }
+  OS << ", Subs[";
+  for (CHRScope *Sub : Subs) {
+    OS << *Sub << ", ";
+  }
+  OS << "]]";
+}
+
+// Return true if the given instruction type can be hoisted by CHR.
+static bool isHoistableInstructionType(Instruction *I) {
+  return isa<BinaryOperator>(I) || isa<CastInst>(I) || isa<SelectInst>(I) ||
+      isa<GetElementPtrInst>(I) || isa<CmpInst>(I) ||
+      isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) ||
+      isa<ShuffleVectorInst>(I) || isa<ExtractValueInst>(I) ||
+      isa<InsertValueInst>(I);
+}
+
+// Return true if the given instruction can be hoisted by CHR.
+static bool isHoistable(Instruction *I, DominatorTree &DT) {
+  if (!isHoistableInstructionType(I))
+    return false;
+  return isSafeToSpeculativelyExecute(I, nullptr, &DT);
+}
+
+// Recursively traverse the use-def chains of the given value and return a set
+// of the unhoistable base values defined within the scope (excluding the
+// first-region entry block) or the (hoistable or unhoistable) base values that
+// are defined outside (including the first-region entry block) of the
+// scope. The returned set doesn't include constants.
+static std::set<Value *> getBaseValues(Value *V,
+                                       DominatorTree &DT) {
+  std::set<Value *> Result;
+  if (auto *I = dyn_cast<Instruction>(V)) {
+    // We don't stop at a block that's not in the Scope because we would miss some
+    // instructions that are based on the same base values if we stop there.
+    if (!isHoistable(I, DT)) {
+      Result.insert(I);
+      return Result;
+    }
+    // I is hoistable above the Scope.
+    for (Value *Op : I->operands()) {
+      std::set<Value *> OpResult = getBaseValues(Op, DT);
+      Result.insert(OpResult.begin(), OpResult.end());
+    }
+    return Result;
+  }
+  if (isa<Argument>(V)) {
+    Result.insert(V);
+    return Result;
+  }
+  // We don't include others like constants because those won't lead to any
+  // chance of folding of conditions (eg two bit checks merged into one check)
+  // after CHR.
+  return Result;  // empty
+}
+
+// Return true if V is already hoisted or can be hoisted (along with its
+// operands) above the insert point. When it returns true and HoistStops is
+// non-null, the instructions to stop hoisting at through the use-def chains are
+// inserted into HoistStops.
+static bool
+checkHoistValue(Value *V, Instruction *InsertPoint, DominatorTree &DT,
+                DenseSet<Instruction *> &Unhoistables,
+                DenseSet<Instruction *> *HoistStops) {
+  assert(InsertPoint && "Null InsertPoint");
+  if (auto *I = dyn_cast<Instruction>(V)) {
+    assert(DT.getNode(I->getParent()) && "DT must contain I's parent block");
+    assert(DT.getNode(InsertPoint->getParent()) && "DT must contain Destination");
+    if (Unhoistables.count(I)) {
+      // Don't hoist if they are not to be hoisted.
+      return false;
+    }
+    if (DT.dominates(I, InsertPoint)) {
+      // We are already above the insert point. Stop here.
+      if (HoistStops)
+        HoistStops->insert(I);
+      return true;
+    }
+    // We aren't not above the insert point, check if we can hoist it above the
+    // insert point.
+    if (isHoistable(I, DT)) {
+      // Check operands first.
+      DenseSet<Instruction *> OpsHoistStops;
+      bool AllOpsHoisted = true;
+      for (Value *Op : I->operands()) {
+        if (!checkHoistValue(Op, InsertPoint, DT, Unhoistables, &OpsHoistStops)) {
+          AllOpsHoisted = false;
+          break;
+        }
+      }
+      if (AllOpsHoisted) {
+        CHR_DEBUG(dbgs() << "checkHoistValue " << *I << "\n");
+        if (HoistStops)
+          HoistStops->insert(OpsHoistStops.begin(), OpsHoistStops.end());
+        return true;
+      }
+    }
+    return false;
+  }
+  // Non-instructions are considered hoistable.
+  return true;
+}
+
+// Returns true and sets the true probability and false probability of an
+// MD_prof metadata if it's well-formed.
+static bool CheckMDProf(MDNode *MD, BranchProbability &TrueProb,
+                        BranchProbability &FalseProb) {
+  if (!MD) return false;
+  MDString *MDName = cast<MDString>(MD->getOperand(0));
+  if (MDName->getString() != "branch_weights" ||
+      MD->getNumOperands() != 3)
+    return false;
+  ConstantInt *TrueWeight = mdconst::extract<ConstantInt>(MD->getOperand(1));
+  ConstantInt *FalseWeight = mdconst::extract<ConstantInt>(MD->getOperand(2));
+  if (!TrueWeight || !FalseWeight)
+    return false;
+  APInt TrueWt = TrueWeight->getValue();
+  APInt FalseWt = FalseWeight->getValue();
+  APInt SumWt = TrueWt + FalseWt;
+  TrueProb = BranchProbability::getBranchProbability(TrueWt.getZExtValue(),
+                                                     SumWt.getZExtValue());
+  FalseProb = BranchProbability::getBranchProbability(FalseWt.getZExtValue(),
+                                                      SumWt.getZExtValue());
+  return true;
+}
+
+static BranchProbability getCHRBiasThreshold() {
+  return BranchProbability::getBranchProbability(
+      static_cast<uint64_t>(CHRBiasThreshold * 1000000), 1000000);
+}
+
+// A helper for CheckBiasedBranch and CheckBiasedSelect. If TrueProb >=
+// CHRBiasThreshold, put Key into TrueSet and return true. If FalseProb >=
+// CHRBiasThreshold, put Key into FalseSet and return true. Otherwise, return
+// false.
+template<typename K, typename S, typename M>
+bool CheckBias(K *Key, BranchProbability TrueProb, BranchProbability FalseProb,
+               S &TrueSet, S &FalseSet, M &BiasMap) {
+  BranchProbability Threshold = getCHRBiasThreshold();
+  if (TrueProb >= Threshold) {
+    TrueSet.insert(Key);
+    BiasMap[Key] = TrueProb;
+    return true;
+  } else if (FalseProb >= Threshold) {
+    FalseSet.insert(Key);
+    BiasMap[Key] = FalseProb;
+    return true;
+  }
+  return false;
+}
+
+// Returns true and insert a region into the right biased set and the map if the
+// branch of the region is biased.
+static bool CheckBiasedBranch(BranchInst *BI, Region *R,
+                              DenseSet<Region *> &TrueBiasedRegionsGlobal,
+                              DenseSet<Region *> &FalseBiasedRegionsGlobal,
+                              DenseMap<Region *, BranchProbability> &BranchBiasMap) {
+  if (!BI->isConditional())
+    return false;
+  BranchProbability ThenProb, ElseProb;
+  if (!CheckMDProf(BI->getMetadata(LLVMContext::MD_prof),
+                   ThenProb, ElseProb))
+    return false;
+  BasicBlock *IfThen = BI->getSuccessor(0);
+  BasicBlock *IfElse = BI->getSuccessor(1);
+  assert((IfThen == R->getExit() || IfElse == R->getExit()) &&
+         IfThen != IfElse &&
+         "Invariant from findScopes");
+  if (IfThen == R->getExit()) {
+    // Swap them so that IfThen/ThenProb means going into the conditional code
+    // and IfElse/ElseProb means skipping it.
+    std::swap(IfThen, IfElse);
+    std::swap(ThenProb, ElseProb);
+  }
+  CHR_DEBUG(dbgs() << "BI " << *BI << " ");
+  CHR_DEBUG(dbgs() << "ThenProb " << ThenProb << " ");
+  CHR_DEBUG(dbgs() << "ElseProb " << ElseProb << "\n");
+  return CheckBias(R, ThenProb, ElseProb,
+                   TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal,
+                   BranchBiasMap);
+}
+
+// Returns true and insert a select into the right biased set and the map if the
+// select is biased.
+static bool CheckBiasedSelect(
+    SelectInst *SI, Region *R,
+    DenseSet<SelectInst *> &TrueBiasedSelectsGlobal,
+    DenseSet<SelectInst *> &FalseBiasedSelectsGlobal,
+    DenseMap<SelectInst *, BranchProbability> &SelectBiasMap) {
+  BranchProbability TrueProb, FalseProb;
+  if (!CheckMDProf(SI->getMetadata(LLVMContext::MD_prof),
+                   TrueProb, FalseProb))
+    return false;
+  CHR_DEBUG(dbgs() << "SI " << *SI << " ");
+  CHR_DEBUG(dbgs() << "TrueProb " << TrueProb << " ");
+  CHR_DEBUG(dbgs() << "FalseProb " << FalseProb << "\n");
+  return CheckBias(SI, TrueProb, FalseProb,
+                   TrueBiasedSelectsGlobal, FalseBiasedSelectsGlobal,
+                   SelectBiasMap);
+}
+
+// Returns the instruction at which to hoist the dependent condition values and
+// insert the CHR branch for a region. This is the terminator branch in the
+// entry block or the first select in the entry block, if any.
+static Instruction* getBranchInsertPoint(RegInfo &RI) {
+  Region *R = RI.R;
+  BasicBlock *EntryBB = R->getEntry();
+  // The hoist point is by default the terminator of the entry block, which is
+  // the same as the branch instruction if RI.HasBranch is true.
+  Instruction *HoistPoint = EntryBB->getTerminator();
+  for (SelectInst *SI : RI.Selects) {
+    if (SI->getParent() == EntryBB) {
+      // Pick the first select in Selects in the entry block.  Note Selects is
+      // sorted in the instruction order within a block (asserted below).
+      HoistPoint = SI;
+      break;
+    }
+  }
+  assert(HoistPoint && "Null HoistPoint");
+#ifndef NDEBUG
+  // Check that HoistPoint is the first one in Selects in the entry block,
+  // if any.
+  DenseSet<Instruction *> EntryBlockSelectSet;
+  for (SelectInst *SI : RI.Selects) {
+    if (SI->getParent() == EntryBB) {
+      EntryBlockSelectSet.insert(SI);
+    }
+  }
+  for (Instruction &I : *EntryBB) {
+    if (EntryBlockSelectSet.count(&I) > 0) {
+      assert(&I == HoistPoint &&
+             "HoistPoint must be the first one in Selects");
+      break;
+    }
+  }
+#endif
+  return HoistPoint;
+}
+
+// Find a CHR scope in the given region.
+CHRScope * CHR::findScope(Region *R) {
+  CHRScope *Result = nullptr;
+  BasicBlock *Entry = R->getEntry();
+  BasicBlock *Exit = R->getExit();  // null if top level.
+  assert(Entry && "Entry must not be null");
+  assert((Exit == nullptr) == (R->isTopLevelRegion()) &&
+         "Only top level region has a null exit");
+  if (Entry)
+    CHR_DEBUG(dbgs() << "Entry " << Entry->getName() << "\n");
+  else
+    CHR_DEBUG(dbgs() << "Entry null\n");
+  if (Exit)
+    CHR_DEBUG(dbgs() << "Exit " << Exit->getName() << "\n");
+  else
+    CHR_DEBUG(dbgs() << "Exit null\n");
+  // Exclude cases where Entry is part of a subregion (hence it doesn't belong
+  // to this region).
+  bool EntryInSubregion = RI.getRegionFor(Entry) != R;
+  if (EntryInSubregion)
+    return nullptr;
+  // Exclude loops
+  for (BasicBlock *Pred : predecessors(Entry))
+    if (R->contains(Pred))
+      return nullptr;
+  if (Exit) {
+    // Try to find an if-then block (check if R is an if-then).
+    // if (cond) {
+    //  ...
+    // }
+    auto *BI = dyn_cast<BranchInst>(Entry->getTerminator());
+    if (BI)
+      CHR_DEBUG(dbgs() << "BI.isConditional " << BI->isConditional() << "\n");
+    else
+      CHR_DEBUG(dbgs() << "BI null\n");
+    if (BI && BI->isConditional()) {
+      BasicBlock *S0 = BI->getSuccessor(0);
+      BasicBlock *S1 = BI->getSuccessor(1);
+      CHR_DEBUG(dbgs() << "S0 " << S0->getName() << "\n");
+      CHR_DEBUG(dbgs() << "S1 " << S1->getName() << "\n");
+      if (S0 != S1 && (S0 == Exit || S1 == Exit)) {
+        RegInfo RI(R);
+        RI.HasBranch = CheckBiasedBranch(
+            BI, R, TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal,
+            BranchBiasMap);
+        Result = new CHRScope(RI);
+        Scopes.insert(Result);
+        CHR_DEBUG(dbgs() << "Found a region with a branch\n");
+        ++Stats.NumBranches;
+      }
+    }
+  }
+  {
+    // Try to look for selects in the direct child blocks (as opposed to in
+    // subregions) of R.
+    // ...
+    // if (..) { // Some subregion
+    //   ...
+    // }
+    // if (..) { // Some subregion
+    //   ...
+    // }
+    // ...
+    // a = cond ? b : c;
+    // ...
+    SmallVector<SelectInst *, 8> Selects;
+    for (RegionNode *E : R->elements()) {
+      if (E->isSubRegion())
+        continue;
+      // This returns the basic block of E if E is a direct child of R (not a
+      // subregion.)
+      BasicBlock *BB = E->getEntry();
+      // Need to push in the order to make it easier to find the first Select
+      // later.
+      for (Instruction &I : *BB) {
+        if (auto *SI = dyn_cast<SelectInst>(&I)) {
+          Selects.push_back(SI);
+          ++Stats.NumBranches;
+        }
+      }
+    }
+    if (Selects.size() > 0) {
+      auto AddSelects = [&](RegInfo &RI) {
+        for (auto *SI : Selects)
+          if (CheckBiasedSelect(SI, RI.R,
+                                TrueBiasedSelectsGlobal,
+                                FalseBiasedSelectsGlobal,
+                                SelectBiasMap))
+            RI.Selects.push_back(SI);
+      };
+      if (!Result) {
+        CHR_DEBUG(dbgs() << "Found a select-only region\n");
+        RegInfo RI(R);
+        AddSelects(RI);
+        Result = new CHRScope(RI);
+        Scopes.insert(Result);
+      } else {
+        CHR_DEBUG(dbgs() << "Found select(s) in a region with a branch\n");
+        AddSelects(Result->RegInfos[0]);
+      }
+    }
+  }
+
+  if (Result) {
+    checkScopeHoistable(Result);
+  }
+  return Result;
+}
+
+// Check that any of the branch and the selects in the region could be
+// hoisted above the the CHR branch insert point (the most dominating of
+// them, either the branch (at the end of the first block) or the first
+// select in the first block). If the branch can't be hoisted, drop the
+// selects in the first blocks.
+//
+// For example, for the following scope/region with selects, we want to insert
+// the merged branch right before the first select in the first/entry block by
+// hoisting c1, c2, c3, and c4.
+//
+// // Branch insert point here.
+// a = c1 ? b : c; // Select 1
+// d = c2 ? e : f; // Select 2
+// if (c3) { // Branch
+//   ...
+//   c4 = foo() // A call.
+//   g = c4 ? h : i; // Select 3
+// }
+//
+// But suppose we can't hoist c4 because it's dependent on the preceding
+// call. Then, we drop Select 3. Furthermore, if we can't hoist c2, we also drop
+// Select 2. If we can't hoist c3, we drop Selects 1 & 2.
+void CHR::checkScopeHoistable(CHRScope *Scope) {
+  RegInfo &RI = Scope->RegInfos[0];
+  Region *R = RI.R;
+  BasicBlock *EntryBB = R->getEntry();
+  auto *Branch = RI.HasBranch ?
+                 cast<BranchInst>(EntryBB->getTerminator()) : nullptr;
+  SmallVector<SelectInst *, 8> &Selects = RI.Selects;
+  if (RI.HasBranch || !Selects.empty()) {
+    Instruction *InsertPoint = getBranchInsertPoint(RI);
+    CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
+    // Avoid a data dependence from a select or a branch to a(nother)
+    // select. Note no instruction can't data-depend on a branch (a branch
+    // instruction doesn't produce a value).
+    DenseSet<Instruction *> Unhoistables;
+    // Initialize Unhoistables with the selects.
+    for (SelectInst *SI : Selects) {
+      Unhoistables.insert(SI);
+    }
+    // Remove Selects that can't be hoisted.
+    for (auto it = Selects.begin(); it != Selects.end(); ) {
+      SelectInst *SI = *it;
+      if (SI == InsertPoint) {
+        ++it;
+        continue;
+      }
+      bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint,
+                                         DT, Unhoistables, nullptr);
+      if (!IsHoistable) {
+        CHR_DEBUG(dbgs() << "Dropping select " << *SI << "\n");
+        it = Selects.erase(it);
+        // Since we are dropping the select here, we also drop it from
+        // Unhoistables.
+        Unhoistables.erase(SI);
+      } else
+        ++it;
+    }
+    // Update InsertPoint after potentially removing selects.
+    InsertPoint = getBranchInsertPoint(RI);
+    CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
+    if (RI.HasBranch && InsertPoint != Branch) {
+      bool IsHoistable = checkHoistValue(Branch->getCondition(), InsertPoint,
+                                         DT, Unhoistables, nullptr);
+      if (!IsHoistable) {
+        // If the branch isn't hoistable, drop the selects in the entry
+        // block, preferring the branch, which makes the branch the hoist
+        // point.
+        assert(InsertPoint != Branch && "Branch must not be the hoist point");
+        CHR_DEBUG(dbgs() << "Dropping selects in entry block \n");
+        CHR_DEBUG(
+            for (SelectInst *SI : Selects) {
+              dbgs() << "SI " << *SI << "\n";
+            });
+        Selects.erase(std::remove_if(Selects.begin(), Selects.end(),
+                                     [EntryBB](SelectInst *SI) {
+                                       return SI->getParent() == EntryBB;
+                                     }), Selects.end());
+        Unhoistables.clear();
+        InsertPoint = Branch;
+      }
+    }
+    CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
+#ifndef NDEBUG
+    if (RI.HasBranch) {
+      assert(!DT.dominates(Branch, InsertPoint) &&
+             "Branch can't be already above the hoist point");
+      assert(checkHoistValue(Branch->getCondition(), InsertPoint,
+                             DT, Unhoistables, nullptr) &&
+             "checkHoistValue for branch");
+    }
+    for (auto *SI : Selects) {
+      assert(!DT.dominates(SI, InsertPoint) &&
+             "SI can't be already above the hoist point");
+      assert(checkHoistValue(SI->getCondition(), InsertPoint, DT,
+                             Unhoistables, nullptr) &&
+             "checkHoistValue for selects");
+    }
+    CHR_DEBUG(dbgs() << "Result\n");
+    if (RI.HasBranch) {
+      CHR_DEBUG(dbgs() << "BI " << *Branch << "\n");
+    }
+    for (auto *SI : Selects) {
+      CHR_DEBUG(dbgs() << "SI " << *SI << "\n");
+    }
+#endif
+  }
+}
+
+// Traverse the region tree, find all nested scopes and merge them if possible.
+CHRScope * CHR::findScopes(Region *R, Region *NextRegion, Region *ParentRegion,
+                           SmallVectorImpl<CHRScope *> &Scopes) {
+  CHR_DEBUG(dbgs() << "findScopes " << R->getNameStr() << "\n");
+  CHRScope *Result = findScope(R);
+  // Visit subscopes.
+  CHRScope *ConsecutiveSubscope = nullptr;
+  SmallVector<CHRScope *, 8> Subscopes;
+  for (auto It = R->begin(); It != R->end(); ++It) {
+    const std::unique_ptr<Region> &SubR = *It;
+    auto Next_It = std::next(It);
+    Region *NextSubR = Next_It != R->end() ? Next_It->get() : nullptr;
+    CHR_DEBUG(dbgs() << "Looking at subregion " << SubR.get()->getNameStr()
+              << "\n");
+    CHRScope *SubCHRScope = findScopes(SubR.get(), NextSubR, R, Scopes);
+    if (SubCHRScope) {
+      CHR_DEBUG(dbgs() << "Subregion Scope " << *SubCHRScope << "\n");
+    } else {
+      CHR_DEBUG(dbgs() << "Subregion Scope null\n");
+    }
+    if (SubCHRScope) {
+      if (!ConsecutiveSubscope)
+        ConsecutiveSubscope = SubCHRScope;
+      else if (!ConsecutiveSubscope->appendable(SubCHRScope)) {
+        Subscopes.push_back(ConsecutiveSubscope);
+        ConsecutiveSubscope = SubCHRScope;
+      } else
+        ConsecutiveSubscope->append(SubCHRScope);
+    } else {
+      if (ConsecutiveSubscope) {
+        Subscopes.push_back(ConsecutiveSubscope);
+      }
+      ConsecutiveSubscope = nullptr;
+    }
+  }
+  if (ConsecutiveSubscope) {
+    Subscopes.push_back(ConsecutiveSubscope);
+  }
+  for (CHRScope *Sub : Subscopes) {
+    if (Result) {
+      // Combine it with the parent.
+      Result->addSub(Sub);
+    } else {
+      // Push Subscopes as they won't be combined with the parent.
+      Scopes.push_back(Sub);
+    }
+  }
+  return Result;
+}
+
+static DenseSet<Value *> getCHRConditionValuesForRegion(RegInfo &RI) {
+  DenseSet<Value *> ConditionValues;
+  if (RI.HasBranch) {
+    auto *BI = cast<BranchInst>(RI.R->getEntry()->getTerminator());
+    ConditionValues.insert(BI->getCondition());
+  }
+  for (SelectInst *SI : RI.Selects) {
+    ConditionValues.insert(SI->getCondition());
+  }
+  return ConditionValues;
+}
+
+
+// Determine whether to split a scope depending on the sets of the branch
+// condition values of the previous region and the current region. We split
+// (return true) it if 1) the condition values of the inner/lower scope can't be
+// hoisted up to the outer/upper scope, or 2) the two sets of the condition
+// values have an empty intersection (because the combined branch conditions
+// won't probably lead to a simpler combined condition).
+static bool shouldSplit(Instruction *InsertPoint,
+                        DenseSet<Value *> &PrevConditionValues,
+                        DenseSet<Value *> &ConditionValues,
+                        DominatorTree &DT,
+                        DenseSet<Instruction *> &Unhoistables) {
+  CHR_DEBUG(
+      dbgs() << "shouldSplit " << *InsertPoint << " PrevConditionValues ";
+      for (Value *V : PrevConditionValues) {
+        dbgs() << *V << ", ";
+      }
+      dbgs() << " ConditionValues ";
+      for (Value *V : ConditionValues) {
+        dbgs() << *V << ", ";
+      }
+      dbgs() << "\n");
+  assert(InsertPoint && "Null InsertPoint");
+  // If any of Bases isn't hoistable to the hoist point, split.
+  for (Value *V : ConditionValues) {
+    if (!checkHoistValue(V, InsertPoint, DT, Unhoistables, nullptr)) {
+      CHR_DEBUG(dbgs() << "Split. checkHoistValue false " << *V << "\n");
+      return true; // Not hoistable, split.
+    }
+  }
+  // If PrevConditionValues or ConditionValues is empty, don't split to avoid
+  // unnecessary splits at scopes with no branch/selects.  If
+  // PrevConditionValues and ConditionValues don't intersect at all, split.
+  if (!PrevConditionValues.empty() && !ConditionValues.empty()) {
+    // Use std::set as DenseSet doesn't work with set_intersection.
+    std::set<Value *> PrevBases, Bases;
+    for (Value *V : PrevConditionValues) {
+      std::set<Value *> BaseValues = getBaseValues(V, DT);
+      PrevBases.insert(BaseValues.begin(), BaseValues.end());
+    }
+    for (Value *V : ConditionValues) {
+      std::set<Value *> BaseValues = getBaseValues(V, DT);
+      Bases.insert(BaseValues.begin(), BaseValues.end());
+    }
+    CHR_DEBUG(
+        dbgs() << "PrevBases ";
+        for (Value *V : PrevBases) {
+          dbgs() << *V << ", ";
+        }
+        dbgs() << " Bases ";
+        for (Value *V : Bases) {
+          dbgs() << *V << ", ";
+        }
+        dbgs() << "\n");
+    std::set<Value *> Intersection;
+    std::set_intersection(PrevBases.begin(), PrevBases.end(),
+                          Bases.begin(), Bases.end(),
+                          std::inserter(Intersection, Intersection.begin()));
+    if (Intersection.empty()) {
+      // Empty intersection, split.
+      CHR_DEBUG(dbgs() << "Split. Intersection empty\n");
+      return true;
+    }
+  }
+  CHR_DEBUG(dbgs() << "No split\n");
+  return false;  // Don't split.
+}
+
+static void GetSelectsInScope(CHRScope *Scope,
+                              DenseSet<Instruction *> &Output) {
+  for (RegInfo &RI : Scope->RegInfos) {
+    for (SelectInst *SI : RI.Selects) {
+      Output.insert(SI);
+    }
+  }
+  for (CHRScope *Sub : Scope->Subs) {
+    GetSelectsInScope(Sub, Output);
+  }
+}
+
+void CHR::splitScopes(SmallVectorImpl<CHRScope *> &Input,
+                      SmallVectorImpl<CHRScope *> &Output) {
+  for (CHRScope *Scope : Input) {
+    assert(!Scope->BranchInsertPoint &&
+           "BranchInsertPoint must not be set");
+    DenseSet<Instruction *> Unhoistables;
+    GetSelectsInScope(Scope, Unhoistables);
+    splitScope(Scope, nullptr, nullptr, nullptr, Output, Unhoistables);
+  }
+#ifndef NDEBUG
+  for (CHRScope *Scope : Output) {
+    assert(Scope->BranchInsertPoint && "BranchInsertPoint must be set");
+  }
+#endif
+}
+
+SmallVector<CHRScope *, 8> CHR::splitScope(
+    CHRScope *Scope,
+    CHRScope *Outer,
+    DenseSet<Value *> *OuterConditionValues,
+    Instruction *OuterInsertPoint,
+    SmallVectorImpl<CHRScope *> &Output,
+    DenseSet<Instruction *> &Unhoistables) {
+  if (Outer) {
+    assert(OuterConditionValues && "Null OuterConditionValues");
+    assert(OuterInsertPoint && "Null OuterInsertPoint");
+  }
+  bool PrevSplitFromOuter = true;
+  DenseSet<Value *> PrevConditionValues;
+  Instruction *PrevInsertPoint = nullptr;
+  SmallVector<CHRScope *, 8> Splits;
+  SmallVector<bool, 8> SplitsSplitFromOuter;
+  SmallVector<DenseSet<Value *>, 8> SplitsConditionValues;
+  SmallVector<Instruction *, 8> SplitsInsertPoints;
+  SmallVector<RegInfo, 8> RegInfos(Scope->RegInfos);  // Copy
+  for (RegInfo &RI : RegInfos) {
+    Instruction *InsertPoint = getBranchInsertPoint(RI);
+    DenseSet<Value *> ConditionValues = getCHRConditionValuesForRegion(RI);
+    CHR_DEBUG(
+        dbgs() << "ConditionValues ";
+        for (Value *V : ConditionValues) {
+          dbgs() << *V << ", ";
+        }
+        dbgs() << "\n");
+    if (RI.R == RegInfos[0].R) {
+      // First iteration. Check to see if we should split from the outer.
+      if (Outer) {
+        CHR_DEBUG(dbgs() << "Outer " << *Outer << "\n");
+        CHR_DEBUG(dbgs() << "Should split from outer at "
+                  << RI.R->getNameStr() << "\n");
+        if (shouldSplit(OuterInsertPoint, *OuterConditionValues,
+                        ConditionValues, DT, Unhoistables)) {
+          PrevConditionValues = ConditionValues;
+          PrevInsertPoint = InsertPoint;
+        } else {
+          // Not splitting from the outer. Use the outer bases and insert
+          // point. Union the bases.
+          PrevSplitFromOuter = false;
+          PrevConditionValues = *OuterConditionValues;
+          PrevConditionValues.insert(ConditionValues.begin(),
+                                     ConditionValues.end());
+          PrevInsertPoint = OuterInsertPoint;
+        }
+      } else {
+        CHR_DEBUG(dbgs() << "Outer null\n");
+        PrevConditionValues = ConditionValues;
+        PrevInsertPoint = InsertPoint;
+      }
+    } else {
+      CHR_DEBUG(dbgs() << "Should split from prev at "
+                << RI.R->getNameStr() << "\n");
+      if (shouldSplit(PrevInsertPoint, PrevConditionValues, ConditionValues,
+                      DT, Unhoistables)) {
+        CHRScope *Tail = Scope->split(RI.R);
+        Scopes.insert(Tail);
+        Splits.push_back(Scope);
+        SplitsSplitFromOuter.push_back(PrevSplitFromOuter);
+        SplitsConditionValues.push_back(PrevConditionValues);
+        SplitsInsertPoints.push_back(PrevInsertPoint);
+        Scope = Tail;
+        PrevConditionValues = ConditionValues;
+        PrevInsertPoint = InsertPoint;
+        PrevSplitFromOuter = true;
+      } else {
+        // Not splitting. Union the bases. Keep the hoist point.
+        PrevConditionValues.insert(ConditionValues.begin(), ConditionValues.end());
+      }
+    }
+  }
+  Splits.push_back(Scope);
+  SplitsSplitFromOuter.push_back(PrevSplitFromOuter);
+  SplitsConditionValues.push_back(PrevConditionValues);
+  assert(PrevInsertPoint && "Null PrevInsertPoint");
+  SplitsInsertPoints.push_back(PrevInsertPoint);
+  assert(Splits.size() == SplitsConditionValues.size() &&
+         Splits.size() == SplitsSplitFromOuter.size() &&
+         Splits.size() == SplitsInsertPoints.size() && "Mismatching sizes");
+  for (size_t I = 0; I < Splits.size(); ++I) {
+    CHRScope *Split = Splits[I];
+    DenseSet<Value *> &SplitConditionValues = SplitsConditionValues[I];
+    Instruction *SplitInsertPoint = SplitsInsertPoints[I];
+    SmallVector<CHRScope *, 8> NewSubs;
+    DenseSet<Instruction *> SplitUnhoistables;
+    GetSelectsInScope(Split, SplitUnhoistables);
+    for (CHRScope *Sub : Split->Subs) {
+      SmallVector<CHRScope *, 8> SubSplits = splitScope(
+          Sub, Split, &SplitConditionValues, SplitInsertPoint, Output,
+          SplitUnhoistables);
+      NewSubs.insert(NewSubs.end(), SubSplits.begin(), SubSplits.end());
+    }
+    Split->Subs = NewSubs;
+  }
+  SmallVector<CHRScope *, 8> Result;
+  for (size_t I = 0; I < Splits.size(); ++I) {
+    CHRScope *Split = Splits[I];
+    if (SplitsSplitFromOuter[I]) {
+      // Split from the outer.
+      Output.push_back(Split);
+      Split->BranchInsertPoint = SplitsInsertPoints[I];
+      CHR_DEBUG(dbgs() << "BranchInsertPoint " << *SplitsInsertPoints[I]
+                << "\n");
+    } else {
+      // Connected to the outer.
+      Result.push_back(Split);
+    }
+  }
+  if (!Outer)
+    assert(Result.empty() &&
+           "If no outer (top-level), must return no nested ones");
+  return Result;
+}
+
+void CHR::classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes) {
+  for (CHRScope *Scope : Scopes) {
+    assert(Scope->TrueBiasedRegions.empty() && Scope->FalseBiasedRegions.empty() && "Empty");
+    classifyBiasedScopes(Scope, Scope);
+    CHR_DEBUG(
+        dbgs() << "classifyBiasedScopes " << *Scope << "\n";
+        dbgs() << "TrueBiasedRegions ";
+        for (Region *R : Scope->TrueBiasedRegions) {
+          dbgs() << R->getNameStr() << ", ";
+        }
+        dbgs() << "\n";
+        dbgs() << "FalseBiasedRegions ";
+        for (Region *R : Scope->FalseBiasedRegions) {
+          dbgs() << R->getNameStr() << ", ";
+        }
+        dbgs() << "\n";
+        dbgs() << "TrueBiasedSelects ";
+        for (SelectInst *SI : Scope->TrueBiasedSelects) {
+          dbgs() << *SI << ", ";
+        }
+        dbgs() << "\n";
+        dbgs() << "FalseBiasedSelects ";
+        for (SelectInst *SI : Scope->FalseBiasedSelects) {
+          dbgs() << *SI << ", ";
+        }
+        dbgs() << "\n";);
+  }
+}
+
+void CHR::classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope) {
+  for (RegInfo &RI : Scope->RegInfos) {
+    if (RI.HasBranch) {
+      Region *R = RI.R;
+      if (TrueBiasedRegionsGlobal.count(R) > 0)
+        OutermostScope->TrueBiasedRegions.insert(R);
+      else if (FalseBiasedRegionsGlobal.count(R) > 0)
+        OutermostScope->FalseBiasedRegions.insert(R);
+      else
+        llvm_unreachable("Must be biased");
+    }
+    for (SelectInst *SI : RI.Selects) {
+      if (TrueBiasedSelectsGlobal.count(SI) > 0)
+        OutermostScope->TrueBiasedSelects.insert(SI);
+      else if (FalseBiasedSelectsGlobal.count(SI) > 0)
+        OutermostScope->FalseBiasedSelects.insert(SI);
+      else
+        llvm_unreachable("Must be biased");
+    }
+  }
+  for (CHRScope *Sub : Scope->Subs) {
+    classifyBiasedScopes(Sub, OutermostScope);
+  }
+}
+
+static bool hasAtLeastTwoBiasedBranches(CHRScope *Scope) {
+  unsigned NumBiased = Scope->TrueBiasedRegions.size() +
+                       Scope->FalseBiasedRegions.size() +
+                       Scope->TrueBiasedSelects.size() +
+                       Scope->FalseBiasedSelects.size();
+  return NumBiased >= CHRMergeThreshold;
+}
+
+void CHR::filterScopes(SmallVectorImpl<CHRScope *> &Input,
+                       SmallVectorImpl<CHRScope *> &Output) {
+  for (CHRScope *Scope : Input) {
+    // Filter out the ones with only one region and no subs.
+    if (!hasAtLeastTwoBiasedBranches(Scope)) {
+      CHR_DEBUG(dbgs() << "Filtered out by biased branches truthy-regions "
+                << Scope->TrueBiasedRegions.size()
+                << " falsy-regions " << Scope->FalseBiasedRegions.size()
+                << " true-selects " << Scope->TrueBiasedSelects.size()
+                << " false-selects " << Scope->FalseBiasedSelects.size() << "\n");
+      continue;
+    }
+    Output.push_back(Scope);
+  }
+}
+
+void CHR::setCHRRegions(SmallVectorImpl<CHRScope *> &Input,
+                        SmallVectorImpl<CHRScope *> &Output) {
+  for (CHRScope *Scope : Input) {
+    assert(Scope->HoistStopMap.empty() && Scope->CHRRegions.empty() &&
+           "Empty");
+    setCHRRegions(Scope, Scope);
+    Output.push_back(Scope);
+    CHR_DEBUG(
+        dbgs() << "setCHRRegions HoistStopMap " << *Scope << "\n";
+        for (auto pair : Scope->HoistStopMap) {
+          Region *R = pair.first;
+          dbgs() << "Region " << R->getNameStr() << "\n";
+          for (Instruction *I : pair.second) {
+            dbgs() << "HoistStop " << *I << "\n";
+          }
+        }
+        dbgs() << "CHRRegions" << "\n";
+        for (RegInfo &RI : Scope->CHRRegions) {
+          dbgs() << RI.R->getNameStr() << "\n";
+        });
+  }
+}
+
+void CHR::setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope) {
+  DenseSet<Instruction *> Unhoistables;
+  // Put the biased selects in Unhoistables because they should stay where they
+  // are and constant-folded after CHR (in case one biased select or a branch
+  // can depend on another biased select.)
+  for (RegInfo &RI : Scope->RegInfos) {
+    for (SelectInst *SI : RI.Selects) {
+      Unhoistables.insert(SI);
+    }
+  }
+  Instruction *InsertPoint = OutermostScope->BranchInsertPoint;
+  for (RegInfo &RI : Scope->RegInfos) {
+    Region *R = RI.R;
+    DenseSet<Instruction *> HoistStops;
+    bool IsHoisted = false;
+    if (RI.HasBranch) {
+      assert((OutermostScope->TrueBiasedRegions.count(R) > 0 ||
+              OutermostScope->FalseBiasedRegions.count(R) > 0) &&
+             "Must be truthy or falsy");
+      auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
+      // Note checkHoistValue fills in HoistStops.
+      bool IsHoistable = checkHoistValue(BI->getCondition(), InsertPoint, DT,
+                                         Unhoistables, &HoistStops);
+      assert(IsHoistable && "Must be hoistable");
+      (void)(IsHoistable);  // Unused in release build
+      IsHoisted = true;
+    }
+    for (SelectInst *SI : RI.Selects) {
+      assert((OutermostScope->TrueBiasedSelects.count(SI) > 0 ||
+              OutermostScope->FalseBiasedSelects.count(SI) > 0) &&
+             "Must be true or false biased");
+      // Note checkHoistValue fills in HoistStops.
+      bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint, DT,
+                                         Unhoistables, &HoistStops);
+      assert(IsHoistable && "Must be hoistable");
+      (void)(IsHoistable);  // Unused in release build
+      IsHoisted = true;
+    }
+    if (IsHoisted) {
+      OutermostScope->CHRRegions.push_back(RI);
+      OutermostScope->HoistStopMap[R] = HoistStops;
+    }
+  }
+  for (CHRScope *Sub : Scope->Subs)
+    setCHRRegions(Sub, OutermostScope);
+}
+
+bool CHRScopeSorter(CHRScope *Scope1, CHRScope *Scope2) {
+  return Scope1->RegInfos[0].R->getDepth() < Scope2->RegInfos[0].R->getDepth();
+}
+
+void CHR::sortScopes(SmallVectorImpl<CHRScope *> &Input,
+                     SmallVectorImpl<CHRScope *> &Output) {
+  Output.resize(Input.size());
+  std::copy(Input.begin(), Input.end(), Output.begin());
+  std::stable_sort(Output.begin(), Output.end(), CHRScopeSorter);
+}
+
+// Return true if V is already hoisted or was hoisted (along with its operands)
+// to the insert point.
+static void hoistValue(Value *V, Instruction *HoistPoint, Region *R,
+                       HoistStopMapTy &HoistStopMap,
+                       DenseSet<Instruction *> &HoistedSet,
+                       DenseSet<PHINode *> &TrivialPHIs) {
+  auto IT = HoistStopMap.find(R);
+  assert(IT != HoistStopMap.end() && "Region must be in hoist stop map");
+  DenseSet<Instruction *> &HoistStops = IT->second;
+  if (auto *I = dyn_cast<Instruction>(V)) {
+    if (I == HoistPoint)
+      return;
+    if (HoistStops.count(I))
+      return;
+    if (auto *PN = dyn_cast<PHINode>(I))
+      if (TrivialPHIs.count(PN))
+        // The trivial phi inserted by the previous CHR scope could replace a
+        // non-phi in HoistStops. Note that since this phi is at the exit of a
+        // previous CHR scope, which dominates this scope, it's safe to stop
+        // hoisting there.
+        return;
+    if (HoistedSet.count(I))
+      // Already hoisted, return.
+      return;
+    assert(isHoistableInstructionType(I) && "Unhoistable instruction type");
+    for (Value *Op : I->operands()) {
+      hoistValue(Op, HoistPoint, R, HoistStopMap, HoistedSet, TrivialPHIs);
+    }
+    I->moveBefore(HoistPoint);
+    HoistedSet.insert(I);
+    CHR_DEBUG(dbgs() << "hoistValue " << *I << "\n");
+  }
+}
+
+// Hoist the dependent condition values of the branches and the selects in the
+// scope to the insert point.
+static void hoistScopeConditions(CHRScope *Scope, Instruction *HoistPoint,
+                                 DenseSet<PHINode *> &TrivialPHIs) {
+  DenseSet<Instruction *> HoistedSet;
+  for (const RegInfo &RI : Scope->CHRRegions) {
+    Region *R = RI.R;
+    bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
+    bool IsFalseBiased = Scope->FalseBiasedRegions.count(R);
+    if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) {
+      auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
+      hoistValue(BI->getCondition(), HoistPoint, R, Scope->HoistStopMap,
+                 HoistedSet, TrivialPHIs);
+    }
+    for (SelectInst *SI : RI.Selects) {
+      bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
+      bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI);
+      if (!(IsTrueBiased || IsFalseBiased))
+        continue;
+      hoistValue(SI->getCondition(), HoistPoint, R, Scope->HoistStopMap,
+                 HoistedSet, TrivialPHIs);
+    }
+  }
+}
+
+// Negate the predicate if an ICmp if it's used only by branches or selects by
+// swapping the operands of the branches or the selects. Returns true if success.
+static bool NegateICmpIfUsedByBranchOrSelectOnly(ICmpInst *ICmp,
+                                                 Instruction *ExcludedUser,
+                                                 CHRScope *Scope) {
+  for (User *U : ICmp->users()) {
+    if (U == ExcludedUser)
+      continue;
+    if (isa<BranchInst>(U) && cast<BranchInst>(U)->isConditional())
+      continue;
+    if (isa<SelectInst>(U) && cast<SelectInst>(U)->getCondition() == ICmp)
+      continue;
+    return false;
+  }
+  for (User *U : ICmp->users()) {
+    if (U == ExcludedUser)
+      continue;
+    if (auto *BI = dyn_cast<BranchInst>(U)) {
+      assert(BI->isConditional() && "Must be conditional");
+      BI->swapSuccessors();
+      // Don't need to swap this in terms of
+      // TrueBiasedRegions/FalseBiasedRegions because true-based/false-based
+      // mean whehter the branch is likely go into the if-then rather than
+      // successor0/successor1 and because we can tell which edge is the then or
+      // the else one by comparing the destination to the region exit block.
+      continue;
+    }
+    if (auto *SI = dyn_cast<SelectInst>(U)) {
+      // Swap operands
+      Value *TrueValue = SI->getTrueValue();
+      Value *FalseValue = SI->getFalseValue();
+      SI->setTrueValue(FalseValue);
+      SI->setFalseValue(TrueValue);
+      SI->swapProfMetadata();
+      if (Scope->TrueBiasedSelects.count(SI)) {
+        assert(Scope->FalseBiasedSelects.count(SI) == 0 &&
+               "Must not be already in");
+        Scope->FalseBiasedSelects.insert(SI);
+      } else if (Scope->FalseBiasedSelects.count(SI)) {
+        assert(Scope->TrueBiasedSelects.count(SI) == 0 &&
+               "Must not be already in");
+        Scope->TrueBiasedSelects.insert(SI);
+      }
+      continue;
+    }
+    llvm_unreachable("Must be a branch or a select");
+  }
+  ICmp->setPredicate(CmpInst::getInversePredicate(ICmp->getPredicate()));
+  return true;
+}
+
+// A helper for transformScopes. Insert a trivial phi at the scope exit block
+// for a value that's defined in the scope but used outside it (meaning it's
+// alive at the exit block).
+static void insertTrivialPHIs(CHRScope *Scope,
+                              BasicBlock *EntryBlock, BasicBlock *ExitBlock,
+                              DenseSet<PHINode *> &TrivialPHIs) {
+  DenseSet<BasicBlock *> BlocksInScopeSet;
+  SmallVector<BasicBlock *, 8> BlocksInScopeVec;
+  for (RegInfo &RI : Scope->RegInfos) {
+    for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the
+                                            // sub-Scopes.
+      BlocksInScopeSet.insert(BB);
+      BlocksInScopeVec.push_back(BB);
+    }
+  }
+  CHR_DEBUG(
+      dbgs() << "Inserting redudant phis\n";
+      for (BasicBlock *BB : BlocksInScopeVec) {
+        dbgs() << "BlockInScope " << BB->getName() << "\n";
+      });
+  for (BasicBlock *BB : BlocksInScopeVec) {
+    for (Instruction &I : *BB) {
+      SmallVector<Instruction *, 8> Users;
+      for (User *U : I.users()) {
+        if (auto *UI = dyn_cast<Instruction>(U)) {
+          if (BlocksInScopeSet.count(UI->getParent()) == 0 &&
+              // Unless there's already a phi for I at the exit block.
+              !(isa<PHINode>(UI) && UI->getParent() == ExitBlock)) {
+            CHR_DEBUG(dbgs() << "V " << I << "\n");
+            CHR_DEBUG(dbgs() << "Used outside scope by user " << *UI << "\n");
+            Users.push_back(UI);
+          } else if (UI->getParent() == EntryBlock && isa<PHINode>(UI)) {
+            // There's a loop backedge from a block that's dominated by this
+            // scope to the entry block.
+            CHR_DEBUG(dbgs() << "V " << I << "\n");
+            CHR_DEBUG(dbgs()
+                      << "Used at entry block (for a back edge) by a phi user "
+                      << *UI << "\n");
+            Users.push_back(UI);
+          }
+        }
+      }
+      if (Users.size() > 0) {
+        // Insert a trivial phi for I (phi [&I, P0], [&I, P1], ...) at
+        // ExitBlock. Replace I with the new phi in UI unless UI is another
+        // phi at ExitBlock.
+        unsigned PredCount = std::distance(pred_begin(ExitBlock),
+                                           pred_end(ExitBlock));
+        PHINode *PN = PHINode::Create(I.getType(), PredCount, "",
+                                      &ExitBlock->front());
+        for (BasicBlock *Pred : predecessors(ExitBlock)) {
+          PN->addIncoming(&I, Pred);
+        }
+        TrivialPHIs.insert(PN);
+        CHR_DEBUG(dbgs() << "Insert phi " << *PN << "\n");
+        for (Instruction *UI : Users) {
+          for (unsigned J = 0, NumOps = UI->getNumOperands(); J < NumOps; ++J) {
+            if (UI->getOperand(J) == &I) {
+              UI->setOperand(J, PN);
+            }
+          }
+          CHR_DEBUG(dbgs() << "Updated user " << *UI << "\n");
+        }
+      }
+    }
+  }
+}
+
+// Assert that all the CHR regions of the scope have a biased branch or select.
+static void assertCHRRegionsHaveBiasedBranchOrSelect(CHRScope *Scope) {
+#ifndef NDEBUG
+  auto HasBiasedBranchOrSelect = [](RegInfo &RI, CHRScope *Scope) {
+    if (Scope->TrueBiasedRegions.count(RI.R) ||
+        Scope->FalseBiasedRegions.count(RI.R))
+      return true;
+    for (SelectInst *SI : RI.Selects)
+      if (Scope->TrueBiasedSelects.count(SI) ||
+          Scope->FalseBiasedSelects.count(SI))
+        return true;
+    return false;
+  };
+  for (RegInfo &RI : Scope->CHRRegions) {
+    assert(HasBiasedBranchOrSelect(RI, Scope) &&
+           "Must have biased branch or select");
+  }
+#endif
+}
+
+// Assert that all the condition values of the biased branches and selects have
+// been hoisted to the pre-entry block or outside of the scope.
+static void assertBranchOrSelectConditionHoisted(CHRScope *Scope,
+                                                 BasicBlock *PreEntryBlock) {
+  CHR_DEBUG(dbgs() << "Biased regions condition values \n");
+  for (RegInfo &RI : Scope->CHRRegions) {
+    Region *R = RI.R;
+    bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
+    bool IsFalseBiased = Scope->FalseBiasedRegions.count(R);
+    if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) {
+      auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
+      Value *V = BI->getCondition();
+      CHR_DEBUG(dbgs() << *V << "\n");
+      if (auto *I = dyn_cast<Instruction>(V)) {
+        assert((I->getParent() == PreEntryBlock ||
+                !Scope->contains(I)) &&
+               "Must have been hoisted to PreEntryBlock or outside the scope");
+      }
+    }
+    for (SelectInst *SI : RI.Selects) {
+      bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
+      bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI);
+      if (!(IsTrueBiased || IsFalseBiased))
+        continue;
+      Value *V = SI->getCondition();
+      CHR_DEBUG(dbgs() << *V << "\n");
+      if (auto *I = dyn_cast<Instruction>(V)) {
+        assert((I->getParent() == PreEntryBlock ||
+                !Scope->contains(I)) &&
+               "Must have been hoisted to PreEntryBlock or outside the scope");
+      }
+    }
+  }
+}
+
+void CHR::transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs) {
+  CHR_DEBUG(dbgs() << "transformScopes " << *Scope << "\n");
+
+  assert(Scope->RegInfos.size() >= 1 && "Should have at least one Region");
+  Region *FirstRegion = Scope->RegInfos[0].R;
+  BasicBlock *EntryBlock = FirstRegion->getEntry();
+  Region *LastRegion = Scope->RegInfos[Scope->RegInfos.size() - 1].R;
+  BasicBlock *ExitBlock = LastRegion->getExit();
+  Optional<uint64_t> ProfileCount = BFI.getBlockProfileCount(EntryBlock);
+
+  if (ExitBlock) {
+    // Insert a trivial phi at the exit block (where the CHR hot path and the
+    // cold path merges) for a value that's defined in the scope but used
+    // outside it (meaning it's alive at the exit block). We will add the
+    // incoming values for the CHR cold paths to it below. Without this, we'd
+    // miss updating phi's for such values unless there happens to already be a
+    // phi for that value there.
+    insertTrivialPHIs(Scope, EntryBlock, ExitBlock, TrivialPHIs);
+  }
+
+  // Split the entry block of the first region. The new block becomes the new
+  // entry block of the first region. The old entry block becomes the block to
+  // insert the CHR branch into. Note DT gets updated. Since DT gets updated
+  // through the split, we update the entry of the first region after the split,
+  // and Region only points to the entry and the exit blocks, rather than
+  // keeping everything in a list or set, the blocks membership and the
+  // entry/exit blocks of the region are still valid after the split.
+  CHR_DEBUG(dbgs() << "Splitting entry block " << EntryBlock->getName()
+            << " at " << *Scope->BranchInsertPoint << "\n");
+  BasicBlock *NewEntryBlock =
+      SplitBlock(EntryBlock, Scope->BranchInsertPoint, &DT);
+  assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&
+         "NewEntryBlock's only pred must be EntryBlock");
+  FirstRegion->replaceEntryRecursive(NewEntryBlock);
+  BasicBlock *PreEntryBlock = EntryBlock;
+
+  ValueToValueMapTy VMap;
+  // Clone the blocks in the scope (excluding the PreEntryBlock) to split into a
+  // hot path (originals) and a cold path (clones) and update the PHIs at the
+  // exit block.
+  cloneScopeBlocks(Scope, PreEntryBlock, ExitBlock, LastRegion, VMap);
+
+  // Replace the old (placeholder) branch with the new (merged) conditional
+  // branch.
+  BranchInst *MergedBr = createMergedBranch(PreEntryBlock, EntryBlock,
+                                            NewEntryBlock, VMap);
+
+#ifndef NDEBUG
+  assertCHRRegionsHaveBiasedBranchOrSelect(Scope);
+#endif
+
+  // Hoist the conditional values of the branches/selects.
+  hoistScopeConditions(Scope, PreEntryBlock->getTerminator(), TrivialPHIs);
+
+#ifndef NDEBUG
+  assertBranchOrSelectConditionHoisted(Scope, PreEntryBlock);
+#endif
+
+  // Create the combined branch condition and constant-fold the branches/selects
+  // in the hot path.
+  fixupBranchesAndSelects(Scope, PreEntryBlock, MergedBr,
+                          ProfileCount ? ProfileCount.getValue() : 0);
+}
+
+// A helper for transformScopes. Clone the blocks in the scope (excluding the
+// PreEntryBlock) to split into a hot path and a cold path and update the PHIs
+// at the exit block.
+void CHR::cloneScopeBlocks(CHRScope *Scope,
+                           BasicBlock *PreEntryBlock,
+                           BasicBlock *ExitBlock,
+                           Region *LastRegion,
+                           ValueToValueMapTy &VMap) {
+  // Clone all the blocks. The original blocks will be the hot-path
+  // CHR-optimized code and the cloned blocks will be the original unoptimized
+  // code. This is so that the block pointers from the
+  // CHRScope/Region/RegionInfo can stay valid in pointing to the hot-path code
+  // which CHR should apply to.
+  SmallVector<BasicBlock*, 8> NewBlocks;
+  for (RegInfo &RI : Scope->RegInfos)
+    for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the
+                                            // sub-Scopes.
+      assert(BB != PreEntryBlock && "Don't copy the preetntry block");
+      BasicBlock *NewBB = CloneBasicBlock(BB, VMap, ".nonchr", &F);
+      NewBlocks.push_back(NewBB);
+      VMap[BB] = NewBB;
+    }
+
+  // Place the cloned blocks right after the original blocks (right before the
+  // exit block of.)
+  if (ExitBlock)
+    F.getBasicBlockList().splice(ExitBlock->getIterator(),
+                                 F.getBasicBlockList(),
+                                 NewBlocks[0]->getIterator(), F.end());
+
+  // Update the cloned blocks/instructions to refer to themselves.
+  for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i)
+    for (Instruction &I : *NewBlocks[i])
+      RemapInstruction(&I, VMap,
+                       RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
+
+  // Add the cloned blocks to the PHIs of the exit blocks. ExitBlock is null for
+  // the top-level region but we don't need to add PHIs. The trivial PHIs
+  // inserted above will be updated here.
+  if (ExitBlock)
+    for (PHINode &PN : ExitBlock->phis())
+      for (unsigned I = 0, NumOps = PN.getNumIncomingValues(); I < NumOps;
+           ++I) {
+        BasicBlock *Pred = PN.getIncomingBlock(I);
+        if (LastRegion->contains(Pred)) {
+          Value *V = PN.getIncomingValue(I);
+          auto It = VMap.find(V);
+          if (It != VMap.end()) V = It->second;
+          assert(VMap.find(Pred) != VMap.end() && "Pred must have been cloned");
+          PN.addIncoming(V, cast<BasicBlock>(VMap[Pred]));
+        }
+      }
+}
+
+// A helper for transformScope. Replace the old (placeholder) branch with the
+// new (merged) conditional branch.
+BranchInst *CHR::createMergedBranch(BasicBlock *PreEntryBlock,
+                                    BasicBlock *EntryBlock,
+                                    BasicBlock *NewEntryBlock,
+                                    ValueToValueMapTy &VMap) {
+  BranchInst *OldBR = cast<BranchInst>(PreEntryBlock->getTerminator());
+  assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == NewEntryBlock &&
+         "SplitBlock did not work correctly!");
+  assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&
+         "NewEntryBlock's only pred must be EntryBlock");
+  assert(VMap.find(NewEntryBlock) != VMap.end() &&
+         "NewEntryBlock must have been copied");
+  OldBR->removeFromParent();
+  OldBR->dropAllReferences();
+  // The true predicate is a placeholder. It will be replaced later in
+  // fixupBranchesAndSelects().
+  BranchInst *NewBR = BranchInst::Create(NewEntryBlock,
+                                         cast<BasicBlock>(VMap[NewEntryBlock]),
+                                         ConstantInt::getTrue(F.getContext()));
+  PreEntryBlock->getInstList().push_back(NewBR);
+  assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&
+         "NewEntryBlock's only pred must be EntryBlock");
+  return NewBR;
+}
+
+// A helper for transformScopes. Create the combined branch condition and
+// constant-fold the branches/selects in the hot path.
+void CHR::fixupBranchesAndSelects(CHRScope *Scope,
+                                  BasicBlock *PreEntryBlock,
+                                  BranchInst *MergedBR,
+                                  uint64_t ProfileCount) {
+  Value *MergedCondition = ConstantInt::getTrue(F.getContext());
+  BranchProbability CHRBranchBias(1, 1);
+  uint64_t NumCHRedBranches = 0;
+  IRBuilder<> IRB(PreEntryBlock->getTerminator());
+  for (RegInfo &RI : Scope->CHRRegions) {
+    Region *R = RI.R;
+    if (RI.HasBranch) {
+      fixupBranch(R, Scope, IRB, MergedCondition, CHRBranchBias);
+      ++NumCHRedBranches;
+    }
+    for (SelectInst *SI : RI.Selects) {
+      fixupSelect(SI, Scope, IRB, MergedCondition, CHRBranchBias);
+      ++NumCHRedBranches;
+    }
+  }
+  Stats.NumBranchesDelta += NumCHRedBranches - 1;
+  Stats.WeightedNumBranchesDelta += (NumCHRedBranches - 1) * ProfileCount;
+  MergedBR->setCondition(MergedCondition);
+  SmallVector<uint32_t, 2> Weights;
+  Weights.push_back(static_cast<uint32_t>(CHRBranchBias.scale(1000)));
+  Weights.push_back(static_cast<uint32_t>(CHRBranchBias.getCompl().scale(1000)));
+  MDBuilder MDB(F.getContext());
+  MergedBR->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
+  CHR_DEBUG(dbgs() << "CHR branch bias " << Weights[0] << ":" << Weights[1]
+            << "\n");
+}
+
+// A helper for fixupBranchesAndSelects. Add to the combined branch condition
+// and constant-fold a branch in the hot path.
+void CHR::fixupBranch(Region *R, CHRScope *Scope,
+                      IRBuilder<> &IRB,
+                      Value *&MergedCondition,
+                      BranchProbability &CHRBranchBias) {
+  bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
+  assert((IsTrueBiased || Scope->FalseBiasedRegions.count(R)) &&
+         "Must be truthy or falsy");
+  auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
+  assert(BranchBiasMap.find(R) != BranchBiasMap.end() &&
+         "Must be in the bias map");
+  BranchProbability Bias = BranchBiasMap[R];
+  assert(Bias >= getCHRBiasThreshold() && "Must be highly biased");
+  // Take the min.
+  if (CHRBranchBias > Bias)
+    CHRBranchBias = Bias;
+  BasicBlock *IfThen = BI->getSuccessor(1);
+  BasicBlock *IfElse = BI->getSuccessor(0);
+  BasicBlock *RegionExitBlock = R->getExit();
+  assert(RegionExitBlock && "Null ExitBlock");
+  assert((IfThen == RegionExitBlock || IfElse == RegionExitBlock) &&
+         IfThen != IfElse && "Invariant from findScopes");
+  if (IfThen == RegionExitBlock) {
+    // Swap them so that IfThen means going into it and IfElse means skipping
+    // it.
+    std::swap(IfThen, IfElse);
+  }
+  CHR_DEBUG(dbgs() << "IfThen " << IfThen->getName()
+            << " IfElse " << IfElse->getName() << "\n");
+  Value *Cond = BI->getCondition();
+  BasicBlock *HotTarget = IsTrueBiased ? IfThen : IfElse;
+  bool ConditionTrue = HotTarget == BI->getSuccessor(0);
+  addToMergedCondition(ConditionTrue, Cond, BI, Scope, IRB,
+                       MergedCondition);
+  // Constant-fold the branch at ClonedEntryBlock.
+  assert(ConditionTrue == (HotTarget == BI->getSuccessor(0)) &&
+         "The successor shouldn't change");
+  Value *NewCondition = ConditionTrue ?
+                        ConstantInt::getTrue(F.getContext()) :
+                        ConstantInt::getFalse(F.getContext());
+  BI->setCondition(NewCondition);
+}
+
+// A helper for fixupBranchesAndSelects. Add to the combined branch condition
+// and constant-fold a select in the hot path.
+void CHR::fixupSelect(SelectInst *SI, CHRScope *Scope,
+                      IRBuilder<> &IRB,
+                      Value *&MergedCondition,
+                      BranchProbability &CHRBranchBias) {
+  bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
+  assert((IsTrueBiased ||
+          Scope->FalseBiasedSelects.count(SI)) && "Must be biased");
+  assert(SelectBiasMap.find(SI) != SelectBiasMap.end() &&
+         "Must be in the bias map");
+  BranchProbability Bias = SelectBiasMap[SI];
+  assert(Bias >= getCHRBiasThreshold() && "Must be highly biased");
+  // Take the min.
+  if (CHRBranchBias > Bias)
+    CHRBranchBias = Bias;
+  Value *Cond = SI->getCondition();
+  addToMergedCondition(IsTrueBiased, Cond, SI, Scope, IRB,
+                       MergedCondition);
+  Value *NewCondition = IsTrueBiased ?
+                        ConstantInt::getTrue(F.getContext()) :
+                        ConstantInt::getFalse(F.getContext());
+  SI->setCondition(NewCondition);
+}
+
+// A helper for fixupBranch/fixupSelect. Add a branch condition to the merged
+// condition.
+void CHR::addToMergedCondition(bool IsTrueBiased, Value *Cond,
+                               Instruction *BranchOrSelect,
+                               CHRScope *Scope,
+                               IRBuilder<> &IRB,
+                               Value *&MergedCondition) {
+  if (IsTrueBiased) {
+    MergedCondition = IRB.CreateAnd(MergedCondition, Cond);
+  } else {
+    // If Cond is an icmp and all users of V except for BranchOrSelect is a
+    // branch, negate the icmp predicate and swap the branch targets and avoid
+    // inserting an Xor to negate Cond.
+    bool Done = false;
+    if (auto *ICmp = dyn_cast<ICmpInst>(Cond))
+      if (NegateICmpIfUsedByBranchOrSelectOnly(ICmp, BranchOrSelect, Scope)) {
+        MergedCondition = IRB.CreateAnd(MergedCondition, Cond);
+        Done = true;
+      }
+    if (!Done) {
+      Value *Negate = IRB.CreateXor(
+          ConstantInt::getTrue(F.getContext()), Cond);
+      MergedCondition = IRB.CreateAnd(MergedCondition, Negate);
+    }
+  }
+}
+
+void CHR::transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes) {
+  unsigned i = 0;
+  (void)(i); // Unused in release build.
+  DenseSet<PHINode *> TrivialPHIs;
+  for (CHRScope *Scope : CHRScopes) {
+    transformScopes(Scope, TrivialPHIs);
+    CHR_DEBUG(
+        std::ostringstream oss;
+        oss << " after transformScopes " << i++;
+        dumpIR(F, oss.str().c_str(), nullptr));
+  }
+}
+
+static void dumpScopes(SmallVectorImpl<CHRScope *> &Scopes, const char * Label) {
+  dbgs() << Label << " " << Scopes.size() << "\n";
+  for (CHRScope *Scope : Scopes) {
+    dbgs() << *Scope << "\n";
+  }
+}
+
+bool CHR::run() {
+  if (!shouldApply(F, PSI))
+    return false;
+
+  CHR_DEBUG(dumpIR(F, "before", nullptr));
+
+  bool Changed = false;
+  {
+    CHR_DEBUG(
+        dbgs() << "RegionInfo:\n";
+        RI.print(dbgs()));
+
+    // Recursively traverse the region tree and find regions that have biased
+    // branches and/or selects and create scopes.
+    SmallVector<CHRScope *, 8> AllScopes;
+    findScopes(AllScopes);
+    CHR_DEBUG(dumpScopes(AllScopes, "All scopes"));
+
+    // Split the scopes if 1) the conditiona values of the biased
+    // branches/selects of the inner/lower scope can't be hoisted up to the
+    // outermost/uppermost scope entry, or 2) the condition values of the biased
+    // branches/selects in a scope (including subscopes) don't share at least
+    // one common value.
+    SmallVector<CHRScope *, 8> SplitScopes;
+    splitScopes(AllScopes, SplitScopes);
+    CHR_DEBUG(dumpScopes(SplitScopes, "Split scopes"));
+
+    // After splitting, set the biased regions and selects of a scope (a tree
+    // root) that include those of the subscopes.
+    classifyBiasedScopes(SplitScopes);
+    CHR_DEBUG(dbgs() << "Set per-scope bias " << SplitScopes.size() << "\n");
+
+    // Filter out the scopes that has only one biased region or select (CHR
+    // isn't useful in such a case).
+    SmallVector<CHRScope *, 8> FilteredScopes;
+    filterScopes(SplitScopes, FilteredScopes);
+    CHR_DEBUG(dumpScopes(FilteredScopes, "Filtered scopes"));
+
+    // Set the regions to be CHR'ed and their hoist stops for each scope.
+    SmallVector<CHRScope *, 8> SetScopes;
+    setCHRRegions(FilteredScopes, SetScopes);
+    CHR_DEBUG(dumpScopes(SetScopes, "Set CHR regions"));
+
+    // Sort CHRScopes by the depth so that outer CHRScopes comes before inner
+    // ones. We need to apply CHR from outer to inner so that we apply CHR only
+    // to the hot path, rather than both hot and cold paths.
+    SmallVector<CHRScope *, 8> SortedScopes;
+    sortScopes(SetScopes, SortedScopes);
+    CHR_DEBUG(dumpScopes(SortedScopes, "Sorted scopes"));
+
+    CHR_DEBUG(
+        dbgs() << "RegionInfo:\n";
+        RI.print(dbgs()));
+
+    // Apply the CHR transformation.
+    if (!SortedScopes.empty()) {
+      transformScopes(SortedScopes);
+      Changed = true;
+    }
+  }
+
+  if (Changed)
+    CHR_DEBUG(dumpIR(F, "after", &Stats));
+
+  return Changed;
+}
+
+bool ControlHeightReductionLegacyPass::runOnFunction(Function &F) {
+  BlockFrequencyInfo &BFI =
+      getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI();
+  DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+  ProfileSummaryInfo &PSI =
+      *getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+  RegionInfo &RI = getAnalysis<RegionInfoPass>().getRegionInfo();
+  return CHR(F, BFI, DT, PSI, RI).run();
+}
+
+namespace llvm {
+
+ControlHeightReductionPass::ControlHeightReductionPass() {
+  ParseCHRFilterFiles();
+}
+
+PreservedAnalyses ControlHeightReductionPass::run(
+    Function &F,
+    FunctionAnalysisManager &FAM) {
+  auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
+  auto &DT = FAM.getResult<DominatorTreeAnalysis>(F);
+  auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
+  auto &MAM = MAMProxy.getManager();
+  auto &PSI = *MAM.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
+  auto &RI = FAM.getResult<RegionInfoAnalysis>(F);
+  bool Changed = CHR(F, BFI, DT, PSI, RI).run();
+  if (!Changed)
+    return PreservedAnalyses::all();
+  auto PA = PreservedAnalyses();
+  PA.preserve<GlobalsAA>();
+  return PA;
+}
+
+} // namespace llvm

Modified: llvm/trunk/lib/Transforms/Instrumentation/Instrumentation.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Instrumentation/Instrumentation.cpp?rev=341386&r1=341385&r2=341386&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Instrumentation/Instrumentation.cpp (original)
+++ llvm/trunk/lib/Transforms/Instrumentation/Instrumentation.cpp Tue Sep  4 10:19:13 2018
@@ -59,6 +59,7 @@ void llvm::initializeInstrumentation(Pas
   initializeAddressSanitizerPass(Registry);
   initializeAddressSanitizerModulePass(Registry);
   initializeBoundsCheckingLegacyPassPass(Registry);
+  initializeControlHeightReductionLegacyPassPass(Registry);
   initializeGCOVProfilerLegacyPassPass(Registry);
   initializePGOInstrumentationGenLegacyPassPass(Registry);
   initializePGOInstrumentationUseLegacyPassPass(Registry);

Added: llvm/trunk/test/Transforms/PGOProfile/chr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/PGOProfile/chr.ll?rev=341386&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/PGOProfile/chr.ll (added)
+++ llvm/trunk/test/Transforms/PGOProfile/chr.ll Tue Sep  4 10:19:13 2018
@@ -0,0 +1,1912 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -chr -instcombine -simplifycfg -S | FileCheck %s
+; RUN: opt < %s -passes='require<profile-summary>,function(chr,instcombine,simplify-cfg)' -S | FileCheck %s
+
+declare void @foo()
+declare void @bar()
+
+; Simple case.
+; Roughly,
+; t0 = *i
+; if ((t0 & 1) != 0) // Likely true
+;   foo()
+; if ((t0 & 2) != 0) // Likely true
+;   foo()
+; ->
+; t0 = *i
+; if ((t0 & 3) != 0) { // Likely true
+;   foo()
+;   foo()
+; } else {
+;   if ((t0 & 1) != 0)
+;     foo()
+;   if ((t0 & 2) != 0)
+;     foo()
+; }
+define void @test_chr_1(i32* %i) !prof !14 {
+; CHECK-LABEL: @test_chr_1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[TMP0]], 3
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 3
+; CHECK-NEXT:    br i1 [[TMP2]], label [[BB0:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15
+; CHECK:       bb0:
+; CHECK-NEXT:    call void @foo()
+; CHECK-NEXT:    call void @foo()
+; CHECK-NEXT:    br label [[BB3:%.*]]
+; CHECK:       entry.split.nonchr:
+; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[TMP0]], 1
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 0
+; CHECK-NEXT:    br i1 [[TMP4]], label [[BB1_NONCHR:%.*]], label [[BB0_NONCHR:%.*]], !prof !16
+; CHECK:       bb0.nonchr:
+; CHECK-NEXT:    call void @foo()
+; CHECK-NEXT:    br label [[BB1_NONCHR]]
+; CHECK:       bb1.nonchr:
+; CHECK-NEXT:    [[TMP5:%.*]] = and i32 [[TMP0]], 2
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[TMP5]], 0
+; CHECK-NEXT:    br i1 [[TMP6]], label [[BB3]], label [[BB2_NONCHR:%.*]], !prof !16
+; CHECK:       bb2.nonchr:
+; CHECK-NEXT:    call void @foo()
+; CHECK-NEXT:    br label [[BB3]]
+; CHECK:       bb3:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = load i32, i32* %i
+  %1 = and i32 %0, 1
+  %2 = icmp eq i32 %1, 0
+  br i1 %2, label %bb1, label %bb0, !prof !15
+
+bb0:
+  call void @foo()
+  br label %bb1
+
+bb1:
+  %3 = and i32 %0, 2
+  %4 = icmp eq i32 %3, 0
+  br i1 %4, label %bb3, label %bb2, !prof !15
+
+bb2:
+  call void @foo()
+  br label %bb3
+
+bb3:
+  ret void
+}
+
+; Simple case with a cold block.
+; Roughly,
+; t0 = *i
+; if ((t0 & 1) != 0) // Likely true
+;   foo()
+; if ((t0 & 2) == 0) // Likely false
+;   bar()
+; if ((t0 & 4) != 0) // Likely true
+;   foo()
+; ->
+; t0 = *i
+; if ((t0 & 7) == 7) { // Likely true
+;   foo()
+;   foo()
+; } else {
+;   if ((t0 & 1) != 0)
+;     foo()
+;   if ((t0 & 2) == 0)
+;     bar()
+;   if ((t0 & 4) != 0)
+;     foo()
+; }
+define void @test_chr_1_1(i32* %i) !prof !14 {
+; CHECK-LABEL: @test_chr_1_1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[TMP0]], 7
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 7
+; CHECK-NEXT:    br i1 [[TMP2]], label [[BB0:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15
+; CHECK:       bb0:
+; CHECK-NEXT:    call void @foo()
+; CHECK-NEXT:    call void @foo()
+; CHECK-NEXT:    br label [[BB5:%.*]]
+; CHECK:       entry.split.nonchr:
+; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[TMP0]], 1
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 0
+; CHECK-NEXT:    br i1 [[TMP4]], label [[BB1_NONCHR:%.*]], label [[BB0_NONCHR:%.*]], !prof !16
+; CHECK:       bb0.nonchr:
+; CHECK-NEXT:    call void @foo()
+; CHECK-NEXT:    br label [[BB1_NONCHR]]
+; CHECK:       bb1.nonchr:
+; CHECK-NEXT:    [[TMP5:%.*]] = and i32 [[TMP0]], 2
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[TMP5]], 0
+; CHECK-NEXT:    br i1 [[TMP6]], label [[BB2_NONCHR:%.*]], label [[BB3_NONCHR:%.*]], !prof !16
+; CHECK:       bb2.nonchr:
+; CHECK-NEXT:    call void @bar()
+; CHECK-NEXT:    br label [[BB3_NONCHR]]
+; CHECK:       bb3.nonchr:
+; CHECK-NEXT:    [[TMP7:%.*]] = and i32 [[TMP0]], 4
+; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0
+; CHECK-NEXT:    br i1 [[TMP8]], label [[BB5]], label [[BB4_NONCHR:%.*]], !prof !16
+; CHECK:       bb4.nonchr:
+; CHECK-NEXT:    call void @foo()
+; CHECK-NEXT:    br label [[BB5]]
+; CHECK:       bb5:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = load i32, i32* %i
+  %1 = and i32 %0, 1
+  %2 = icmp eq i32 %1, 0
+  br i1 %2, label %bb1, label %bb0, !prof !15
+
+bb0:
+  call void @foo()
+  br label %bb1
+
+bb1:
+  %3 = and i32 %0, 2
+  %4 = icmp eq i32 %3, 0
+  br i1 %4, label %bb2, label %bb3, !prof !15
+
+bb2:
+  call void @bar()
+  br label %bb3
+
+bb3:
+  %5 = and i32 %0, 4
+  %6 = icmp eq i32 %5, 0
+  br i1 %6, label %bb5, label %bb4, !prof !15
+
+bb4:
+  call void @foo()
+  br label %bb5
+
+bb5:
+  ret void
+}
+
+; With an aggregate bit check.
+; Roughly,
+; t0 = *i
+; if ((t0 & 255) != 0) // Likely true
+;   if ((t0 & 1) != 0) // Likely true
+;     foo()
+;   if ((t0 & 2) != 0) // Likely true
+;     foo()
+; ->
+; t0 = *i
+; if ((t0 & 3) != 0) { // Likely true
+;   foo()
+;   foo()
+; } else if ((t0 & 255) != 0)
+;   if ((t0 & 1) != 0)
+;     foo()
+;   if ((t0 & 2) != 0)
+;     foo()
+; }
+define void @test_chr_2(i32* %i) !prof !14 {
+; CHECK-LABEL: @test_chr_2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[TMP0]], 3
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 3
+; CHECK-NEXT:    br i1 [[TMP2]], label [[BB1:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15
+; CHECK:       bb1:
+; CHECK-NEXT:    call void @foo()
+; CHECK-NEXT:    call void @foo()
+; CHECK-NEXT:    br label [[BB4:%.*]]
+; CHECK:       entry.split.nonchr:
+; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[TMP0]], 255
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 0
+; CHECK-NEXT:    br i1 [[TMP4]], label [[BB4]], label [[BB0_NONCHR:%.*]], !prof !16
+; CHECK:       bb0.nonchr:
+; CHECK-NEXT:    [[TMP5:%.*]] = and i32 [[TMP0]], 1
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[TMP5]], 0
+; CHECK-NEXT:    br i1 [[TMP6]], label [[BB2_NONCHR:%.*]], label [[BB1_NONCHR:%.*]], !prof !16
+; CHECK:       bb2.nonchr:
+; CHECK-NEXT:    [[TMP7:%.*]] = and i32 [[TMP0]], 2
+; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0
+; CHECK-NEXT:    br i1 [[TMP8]], label [[BB4]], label [[BB3_NONCHR:%.*]], !prof !16
+; CHECK:       bb3.nonchr:
+; CHECK-NEXT:    call void @foo()
+; CHECK-NEXT:    br label [[BB4]]
+; CHECK:       bb1.nonchr:
+; CHECK-NEXT:    call void @foo()
+; CHECK-NEXT:    br label [[BB2_NONCHR]]
+; CHECK:       bb4:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = load i32, i32* %i
+  %1 = and i32 %0, 255
+  %2 = icmp eq i32 %1, 0
+  br i1 %2, label %bb4, label %bb0, !prof !15
+
+bb0:
+  %3 = and i32 %0, 1
+  %4 = icmp eq i32 %3, 0
+  br i1 %4, label %bb2, label %bb1, !prof !15
+
+bb1:
+  call void @foo()
+  br label %bb2
+
+bb2:
+  %5 = and i32 %0, 2
+  %6 = icmp eq i32 %5, 0
+  br i1 %6, label %bb4, label %bb3, !prof !15
+
+bb3:
+  call void @foo()
+  br label %bb4
+
+bb4:
+  ret void
+}
+
+; Split case.
+; Roughly,
+; t1 = *i
+; if ((t1 & 1) != 0) // Likely true
+;   foo()
+; if ((t1 & 2) != 0) // Likely true
+;   foo()
+; t2 = *i
+; if ((t2 & 4) != 0) // Likely true
+;   foo()
+; if ((t2 & 8) != 0) // Likely true
+;   foo()
+; ->
+; t1 = *i
+; if ((t1 & 3) != 0) { // Likely true
+;   foo()
+;   foo()
+; } else {
+;   if ((t1 & 1) != 0)
+;     foo()
+;   if ((t1 & 2) != 0)
+;     foo()
+; }
+; t2 = *i
+; if ((t2 & 12) != 0) { // Likely true
+;   foo()
+;   foo()
+; } else {
+;   if ((t2 & 4) != 0)
+;     foo()
+;   if ((t2 & 8) != 0)
+;     foo()
+; }
+define void @test_chr_3(i32* %i) !prof !14 {
+; CHECK-LABEL: @test_chr_3(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[TMP0]], 3
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 3
+; CHECK-NEXT:    br i1 [[TMP2]], label [[BB0:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15
+; CHECK:       bb0:
+; CHECK-NEXT:    call void @foo()
+; CHECK-NEXT:    call void @foo()
+; CHECK-NEXT:    br label [[BB3:%.*]]
+; CHECK:       entry.split.nonchr:
+; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[TMP0]], 1
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 0
+; CHECK-NEXT:    br i1 [[TMP4]], label [[BB1_NONCHR:%.*]], label [[BB0_NONCHR:%.*]], !prof !16
+; CHECK:       bb0.nonchr:
+; CHECK-NEXT:    call void @foo()
+; CHECK-NEXT:    br label [[BB1_NONCHR]]
+; CHECK:       bb1.nonchr:
+; CHECK-NEXT:    [[TMP5:%.*]] = and i32 [[TMP0]], 2
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[TMP5]], 0
+; CHECK-NEXT:    br i1 [[TMP6]], label [[BB3]], label [[BB2_NONCHR:%.*]], !prof !16
+; CHECK:       bb2.nonchr:
+; CHECK-NEXT:    call void @foo()
+; CHECK-NEXT:    br label [[BB3]]
+; CHECK:       bb3:
+; CHECK-NEXT:    [[TMP7:%.*]] = load i32, i32* [[I]], align 4
+; CHECK-NEXT:    [[TMP8:%.*]] = and i32 [[TMP7]], 12
+; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 12
+; CHECK-NEXT:    br i1 [[TMP9]], label [[BB4:%.*]], label [[BB3_SPLIT_NONCHR:%.*]], !prof !15
+; CHECK:       bb4:
+; CHECK-NEXT:    call void @foo()
+; CHECK-NEXT:    call void @foo()
+; CHECK-NEXT:    br label [[BB7:%.*]]
+; CHECK:       bb3.split.nonchr:
+; CHECK-NEXT:    [[TMP10:%.*]] = and i32 [[TMP7]], 4
+; CHECK-NEXT:    [[TMP11:%.*]] = icmp eq i32 [[TMP10]], 0
+; CHECK-NEXT:    br i1 [[TMP11]], label [[BB5_NONCHR:%.*]], label [[BB4_NONCHR:%.*]], !prof !16
+; CHECK:       bb4.nonchr:
+; CHECK-NEXT:    call void @foo()
+; CHECK-NEXT:    br label [[BB5_NONCHR]]
+; CHECK:       bb5.nonchr:
+; CHECK-NEXT:    [[TMP12:%.*]] = and i32 [[TMP7]], 8
+; CHECK-NEXT:    [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 0
+; CHECK-NEXT:    br i1 [[TMP13]], label [[BB7]], label [[BB6_NONCHR:%.*]], !prof !16
+; CHECK:       bb6.nonchr:
+; CHECK-NEXT:    call void @foo()
+; CHECK-NEXT:    br label [[BB7]]
+; CHECK:       bb7:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = load i32, i32* %i
+  %1 = and i32 %0, 1
+  %2 = icmp eq i32 %1, 0
+  br i1 %2, label %bb1, label %bb0, !prof !15
+
+bb0:
+  call void @foo()
+  br label %bb1
+
+bb1:
+  %3 = and i32 %0, 2
+  %4 = icmp eq i32 %3, 0
+  br i1 %4, label %bb3, label %bb2, !prof !15
+
+bb2:
+  call void @foo()
+  br label %bb3
+
+bb3:
+  %5 = load i32, i32* %i
+  %6 = and i32 %5, 4
+  %7 = icmp eq i32 %6, 0
+  br i1 %7, label %bb5, label %bb4, !prof !15
+
+bb4:
+  call void @foo()
+  br label %bb5
+
+bb5:
+  %8 = and i32 %5, 8
+  %9 = icmp eq i32 %8, 0
+  br i1 %9, label %bb7, label %bb6, !prof !15
+
+bb6:
+  call void @foo()
+  br label %bb7
+
+bb7:
+  ret void
+}
+
+; Selects.
+; Roughly,
+; t0 = *i
+; sum1 = (t0 & 1) ? sum0 : (sum0 + 42) // Likely false
+; sum2 = (t0 & 2) ? sum1 : (sum1 + 43) // Likely false
+; return sum2
+; ->
+; t0 = *i
+; if ((t0 & 3) == 3)
+;   return sum0 + 85
+; else {
+;   sum1 = (t0 & 1) ? sum0 : (sum0 + 42)
+;   sum2 = (t0 & 2) ? sum1 : (sum1 + 43)
+;   return sum2
+; }
+define i32 @test_chr_4(i32* %i, i32 %sum0) !prof !14 {
+; CHECK-LABEL: @test_chr_4(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[TMP0]], 3
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 3
+; CHECK-NEXT:    br i1 [[TMP2]], label [[ENTRY_SPLIT:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15
+; CHECK:       entry.split:
+; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[SUM0:%.*]], 85
+; CHECK-NEXT:    ret i32 [[TMP3]]
+; CHECK:       entry.split.nonchr:
+; CHECK-NEXT:    [[TMP4:%.*]] = add i32 [[SUM0]], 42
+; CHECK-NEXT:    [[TMP5:%.*]] = and i32 [[TMP0]], 1
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[TMP5]], 0
+; CHECK-NEXT:    [[SUM1_NONCHR:%.*]] = select i1 [[TMP6]], i32 [[SUM0]], i32 [[TMP4]], !prof !16
+; CHECK-NEXT:    [[TMP7:%.*]] = and i32 [[TMP0]], 2
+; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0
+; CHECK-NEXT:    [[TMP9:%.*]] = add i32 [[SUM1_NONCHR]], 43
+; CHECK-NEXT:    [[SUM2_NONCHR:%.*]] = select i1 [[TMP8]], i32 [[SUM1_NONCHR]], i32 [[TMP9]], !prof !16
+; CHECK-NEXT:    ret i32 [[SUM2_NONCHR]]
+;
+entry:
+  %0 = load i32, i32* %i
+  %1 = and i32 %0, 1
+  %2 = icmp eq i32 %1, 0
+  %3 = add i32 %sum0, 42
+  %sum1 = select i1 %2, i32 %sum0, i32 %3, !prof !15
+  %4 = and i32 %0, 2
+  %5 = icmp eq i32 %4, 0
+  %6 = add i32 %sum1, 43
+  %sum2 = select i1 %5, i32 %sum1, i32 %6, !prof !15
+  ret i32 %sum2
+}
+
+; Selects + Brs
+; Roughly,
+; t0 = *i
+; if ((t0 & 255) != 0) { // Likely true
+;   sum = (t0 & 1) ? sum0 : (sum0 + 42) // Likely false
+;   sum = (t0 & 2) ? sum : (sum + 43) // Likely false
+;   if ((t0 & 4) != 0) { // Likely true
+;     sum3 = sum + 44
+;     sum = (t0 & 8) ? sum3 : (sum3 + 44) // Likely false
+;   }
+; }
+; return sum
+; ->
+; t0 = *i
+; if ((t0 & 15) != 15) { // Likely true
+;   sum = sum0 + 173
+; } else if ((t0 & 255) != 0) {
+;   sum = (t0 & 1) ? sum0 : (sum0 + 42)
+;   sum = (t0 & 2) ? sum : (sum + 43)
+;   if ((t0 & 4) != 0) {
+;     sum3 = sum + 44
+;     sum = (t0 & 8) ? sum3 : (sum3 + 44)
+;   }
+; }
+; return sum
+define i32 @test_chr_5(i32* %i, i32 %sum0) !prof !14 {
+; CHECK-LABEL: @test_chr_5(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[TMP0]], 15
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 15
+; CHECK-NEXT:    br i1 [[TMP2]], label [[BB0:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15
+; CHECK:       bb0:
+; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[SUM0:%.*]], 85
+; CHECK-NEXT:    [[TMP4:%.*]] = add i32 [[SUM0]], 173
+; CHECK-NEXT:    br label [[BB3:%.*]]
+; CHECK:       entry.split.nonchr:
+; CHECK-NEXT:    [[TMP5:%.*]] = and i32 [[TMP0]], 255
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[TMP5]], 0
+; CHECK-NEXT:    br i1 [[TMP6]], label [[BB3]], label [[BB0_NONCHR:%.*]], !prof !16
+; CHECK:       bb0.nonchr:
+; CHECK-NEXT:    [[TMP7:%.*]] = and i32 [[TMP0]], 1
+; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0
+; CHECK-NEXT:    [[TMP9:%.*]] = add i32 [[SUM0]], 42
+; CHECK-NEXT:    [[SUM1_NONCHR:%.*]] = select i1 [[TMP8]], i32 [[SUM0]], i32 [[TMP9]], !prof !16
+; CHECK-NEXT:    [[TMP10:%.*]] = and i32 [[TMP0]], 2
+; CHECK-NEXT:    [[TMP11:%.*]] = icmp eq i32 [[TMP10]], 0
+; CHECK-NEXT:    [[TMP12:%.*]] = add i32 [[SUM1_NONCHR]], 43
+; CHECK-NEXT:    [[SUM2_NONCHR:%.*]] = select i1 [[TMP11]], i32 [[SUM1_NONCHR]], i32 [[TMP12]], !prof !16
+; CHECK-NEXT:    [[TMP13:%.*]] = and i32 [[TMP0]], 4
+; CHECK-NEXT:    [[TMP14:%.*]] = icmp eq i32 [[TMP13]], 0
+; CHECK-NEXT:    br i1 [[TMP14]], label [[BB3]], label [[BB1_NONCHR:%.*]], !prof !16
+; CHECK:       bb1.nonchr:
+; CHECK-NEXT:    [[TMP15:%.*]] = and i32 [[TMP0]], 8
+; CHECK-NEXT:    [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 0
+; CHECK-NEXT:    [[SUM4_NONCHR_V:%.*]] = select i1 [[TMP16]], i32 44, i32 88, !prof !16
+; CHECK-NEXT:    [[SUM4_NONCHR:%.*]] = add i32 [[SUM2_NONCHR]], [[SUM4_NONCHR_V]]
+; CHECK-NEXT:    br label [[BB3]]
+; CHECK:       bb3:
+; CHECK-NEXT:    [[SUM6:%.*]] = phi i32 [ [[TMP4]], [[BB0]] ], [ [[SUM0]], [[ENTRY_SPLIT_NONCHR]] ], [ [[SUM2_NONCHR]], [[BB0_NONCHR]] ], [ [[SUM4_NONCHR]], [[BB1_NONCHR]] ]
+; CHECK-NEXT:    ret i32 [[SUM6]]
+;
+entry:
+  %0 = load i32, i32* %i
+  %1 = and i32 %0, 255
+  %2 = icmp eq i32 %1, 0
+  br i1 %2, label %bb3, label %bb0, !prof !15
+
+bb0:
+  %3 = and i32 %0, 1
+  %4 = icmp eq i32 %3, 0
+  %5 = add i32 %sum0, 42
+  %sum1 = select i1 %4, i32 %sum0, i32 %5, !prof !15
+  %6 = and i32 %0, 2
+  %7 = icmp eq i32 %6, 0
+  %8 = add i32 %sum1, 43
+  %sum2 = select i1 %7, i32 %sum1, i32 %8, !prof !15
+  %9 = and i32 %0, 4
+  %10 = icmp eq i32 %9, 0
+  br i1 %10, label %bb2, label %bb1, !prof !15
+
+bb1:
+  %sum3 = add i32 %sum2, 44
+  %11 = and i32 %0, 8
+  %12 = icmp eq i32 %11, 0
+  %13 = add i32 %sum3, 44
+  %sum4 = select i1 %12, i32 %sum3, i32 %13, !prof !15
+  br label %bb2
+
+bb2:
+  %sum5 = phi i32 [ %sum2, %bb0 ], [ %sum4, %bb1 ]
+  br label %bb3
+
+bb3:
+  %sum6 = phi i32 [ %sum0, %entry ], [ %sum5, %bb2 ]
+  ret i32 %sum6
+}
+
+; Selects + Brs with a scope split in the middle
+; Roughly,
+; t0 = *i
+; if ((t0 & 255) != 0) { // Likely true
+;   sum = (t0 & 1) ? sum0 : (sum0 + 42) // Likely false
+;   sum = (t0 & 2) ? sum : (sum + 43) // Likely false
+;   if ((sum0 & 4) != 0) { // Likely true. The condition doesn't use v.
+;     sum3 = sum + 44
+;     sum = (t0 & 8) ? sum3 : (sum3 + 44) // Likely false
+;   }
+; }
+; return sum
+; ->
+; t0 = *i
+; if ((sum0 & 4) != 0 & (t0 & 11) != 11) { // Likely true
+;   sum = sum0 + 173
+; } else if ((t0 & 255) != 0) {
+;   sum = (t0 & 1) ? sum0 : (sum0 + 42)
+;   sum = (t0 & 2) ? sum : (sum + 43)
+;   if ((sum0 & 4) != 0) {
+;     sum3 = sum + 44
+;     sum = (t0 & 8) ? sum3 : (sum3 + 44)
+;   }
+; }
+; return sum
+define i32 @test_chr_5_1(i32* %i, i32 %sum0) !prof !14 {
+; CHECK-LABEL: @test_chr_5_1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[SUM0:%.*]], 4
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[TMP0]], 11
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 11
+; CHECK-NEXT:    [[TMP5:%.*]] = and i1 [[TMP4]], [[TMP2]]
+; CHECK-NEXT:    br i1 [[TMP5]], label [[BB0:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15
+; CHECK:       bb0:
+; CHECK-NEXT:    [[TMP6:%.*]] = add i32 [[SUM0]], 85
+; CHECK-NEXT:    [[TMP7:%.*]] = add i32 [[SUM0]], 173
+; CHECK-NEXT:    br label [[BB3:%.*]]
+; CHECK:       entry.split.nonchr:
+; CHECK-NEXT:    [[TMP8:%.*]] = and i32 [[TMP0]], 255
+; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0
+; CHECK-NEXT:    br i1 [[TMP9]], label [[BB3]], label [[BB0_NONCHR:%.*]], !prof !16
+; CHECK:       bb0.nonchr:
+; CHECK-NEXT:    [[TMP10:%.*]] = and i32 [[TMP0]], 1
+; CHECK-NEXT:    [[TMP11:%.*]] = icmp eq i32 [[TMP10]], 0
+; CHECK-NEXT:    [[TMP12:%.*]] = add i32 [[SUM0]], 42
+; CHECK-NEXT:    [[SUM1_NONCHR:%.*]] = select i1 [[TMP11]], i32 [[SUM0]], i32 [[TMP12]], !prof !16
+; CHECK-NEXT:    [[TMP13:%.*]] = and i32 [[TMP0]], 2
+; CHECK-NEXT:    [[TMP14:%.*]] = icmp eq i32 [[TMP13]], 0
+; CHECK-NEXT:    [[TMP15:%.*]] = add i32 [[SUM1_NONCHR]], 43
+; CHECK-NEXT:    [[SUM2_NONCHR:%.*]] = select i1 [[TMP14]], i32 [[SUM1_NONCHR]], i32 [[TMP15]], !prof !16
+; CHECK-NEXT:    [[TMP16:%.*]] = and i32 [[SUM0]], 4
+; CHECK-NEXT:    [[TMP17:%.*]] = icmp eq i32 [[TMP16]], 0
+; CHECK-NEXT:    br i1 [[TMP17]], label [[BB3]], label [[BB1_NONCHR:%.*]], !prof !16
+; CHECK:       bb1.nonchr:
+; CHECK-NEXT:    [[TMP18:%.*]] = and i32 [[TMP0]], 8
+; CHECK-NEXT:    [[TMP19:%.*]] = icmp eq i32 [[TMP18]], 0
+; CHECK-NEXT:    [[SUM4_NONCHR_V:%.*]] = select i1 [[TMP19]], i32 44, i32 88, !prof !16
+; CHECK-NEXT:    [[SUM4_NONCHR:%.*]] = add i32 [[SUM2_NONCHR]], [[SUM4_NONCHR_V]]
+; CHECK-NEXT:    br label [[BB3]]
+; CHECK:       bb3:
+; CHECK-NEXT:    [[SUM6:%.*]] = phi i32 [ [[TMP7]], [[BB0]] ], [ [[SUM0]], [[ENTRY_SPLIT_NONCHR]] ], [ [[SUM2_NONCHR]], [[BB0_NONCHR]] ], [ [[SUM4_NONCHR]], [[BB1_NONCHR]] ]
+; CHECK-NEXT:    ret i32 [[SUM6]]
+;
+entry:
+  %0 = load i32, i32* %i
+  %1 = and i32 %0, 255
+  %2 = icmp eq i32 %1, 0
+  br i1 %2, label %bb3, label %bb0, !prof !15
+
+bb0:
+  %3 = and i32 %0, 1
+  %4 = icmp eq i32 %3, 0
+  %5 = add i32 %sum0, 42
+  %sum1 = select i1 %4, i32 %sum0, i32 %5, !prof !15
+  %6 = and i32 %0, 2
+  %7 = icmp eq i32 %6, 0
+  %8 = add i32 %sum1, 43
+  %sum2 = select i1 %7, i32 %sum1, i32 %8, !prof !15
+  %9 = and i32 %sum0, 4                              ; Split
+  %10 = icmp eq i32 %9, 0
+  br i1 %10, label %bb2, label %bb1, !prof !15
+
+bb1:
+  %sum3 = add i32 %sum2, 44
+  %11 = and i32 %0, 8
+  %12 = icmp eq i32 %11, 0
+  %13 = add i32 %sum3, 44
+  %sum4 = select i1 %12, i32 %sum3, i32 %13, !prof !15
+  br label %bb2
+
+bb2:
+  %sum5 = phi i32 [ %sum2, %bb0 ], [ %sum4, %bb1 ]
+  br label %bb3
+
+bb3:
+  %sum6 = phi i32 [ %sum0, %entry ], [ %sum5, %bb2 ]
+  ret i32 %sum6
+}
+
+; Selects + Brs, non-matching bases
+; Roughly,
+; i0 = *i
+; j0 = *j
+; if ((i0 & 255) != 0) { // Likely true
+;   sum = (i0 & 2) ? sum0 : (sum0 + 43) // Likely false
+;   if ((j0 & 4) != 0) { // Likely true. The condition uses j0, not i0.
+;     sum3 = sum + 44
+;     sum = (i0 & 8) ? sum3 : (sum3 + 44) // Likely false
+;   }
+; }
+; return sum
+; ->
+; i0 = *i
+; j0 = *j
+; if ((j0 & 4) != 0 & (i0 & 10) != 10) { // Likely true
+;   sum = sum0 + 131
+; } else if ((i0 & 255) != 0) {
+;   sum = (i0 & 2) ? sum0 : (sum0 + 43)
+;   if ((j0 & 4) != 0) {
+;     sum3 = sum + 44
+;     sum = (i0 & 8) ? sum3 : (sum3 + 44)
+;   }
+; }
+; return sum
+define i32 @test_chr_6(i32* %i, i32* %j, i32 %sum0) !prof !14 {
+; CHECK-LABEL: @test_chr_6(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[I0:%.*]] = load i32, i32* [[I:%.*]], align 4
+; CHECK-NEXT:    [[J0:%.*]] = load i32, i32* [[J:%.*]], align 4
+; CHECK-NEXT:    [[V9:%.*]] = and i32 [[J0]], 4
+; CHECK-NEXT:    [[V10:%.*]] = icmp ne i32 [[V9]], 0
+; CHECK-NEXT:    [[TMP0:%.*]] = and i32 [[I0]], 10
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 10
+; CHECK-NEXT:    [[TMP2:%.*]] = and i1 [[TMP1]], [[V10]]
+; CHECK-NEXT:    br i1 [[TMP2]], label [[BB0:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15
+; CHECK:       bb0:
+; CHECK-NEXT:    [[V8:%.*]] = add i32 [[SUM0:%.*]], 43
+; CHECK-NEXT:    [[V13:%.*]] = add i32 [[SUM0]], 131
+; CHECK-NEXT:    br label [[BB3:%.*]]
+; CHECK:       entry.split.nonchr:
+; CHECK-NEXT:    [[V1:%.*]] = and i32 [[I0]], 255
+; CHECK-NEXT:    [[V2:%.*]] = icmp eq i32 [[V1]], 0
+; CHECK-NEXT:    br i1 [[V2]], label [[BB3]], label [[BB0_NONCHR:%.*]], !prof !16
+; CHECK:       bb0.nonchr:
+; CHECK-NEXT:    [[V3_NONCHR:%.*]] = and i32 [[I0]], 2
+; CHECK-NEXT:    [[V4_NONCHR:%.*]] = icmp eq i32 [[V3_NONCHR]], 0
+; CHECK-NEXT:    [[V8_NONCHR:%.*]] = add i32 [[SUM0]], 43
+; CHECK-NEXT:    [[SUM2_NONCHR:%.*]] = select i1 [[V4_NONCHR]], i32 [[SUM0]], i32 [[V8_NONCHR]], !prof !16
+; CHECK-NEXT:    [[V9_NONCHR:%.*]] = and i32 [[J0]], 4
+; CHECK-NEXT:    [[V10_NONCHR:%.*]] = icmp eq i32 [[V9_NONCHR]], 0
+; CHECK-NEXT:    br i1 [[V10_NONCHR]], label [[BB3]], label [[BB1_NONCHR:%.*]], !prof !16
+; CHECK:       bb1.nonchr:
+; CHECK-NEXT:    [[V11_NONCHR:%.*]] = and i32 [[I0]], 8
+; CHECK-NEXT:    [[V12_NONCHR:%.*]] = icmp eq i32 [[V11_NONCHR]], 0
+; CHECK-NEXT:    [[SUM4_NONCHR_V:%.*]] = select i1 [[V12_NONCHR]], i32 44, i32 88, !prof !16
+; CHECK-NEXT:    [[SUM4_NONCHR:%.*]] = add i32 [[SUM2_NONCHR]], [[SUM4_NONCHR_V]]
+; CHECK-NEXT:    br label [[BB3]]
+; CHECK:       bb3:
+; CHECK-NEXT:    [[SUM6:%.*]] = phi i32 [ [[V13]], [[BB0]] ], [ [[SUM0]], [[ENTRY_SPLIT_NONCHR]] ], [ [[SUM2_NONCHR]], [[BB0_NONCHR]] ], [ [[SUM4_NONCHR]], [[BB1_NONCHR]] ]
+; CHECK-NEXT:    ret i32 [[SUM6]]
+;
+entry:
+  %i0 = load i32, i32* %i
+  %j0 = load i32, i32* %j
+  %v1 = and i32 %i0, 255
+  %v2 = icmp eq i32 %v1, 0
+  br i1 %v2, label %bb3, label %bb0, !prof !15
+
+bb0:
+  %v3 = and i32 %i0, 2
+  %v4 = icmp eq i32 %v3, 0
+  %v8 = add i32 %sum0, 43
+  %sum2 = select i1 %v4, i32 %sum0, i32 %v8, !prof !15
+  %v9 = and i32 %j0, 4
+  %v10 = icmp eq i32 %v9, 0
+  br i1 %v10, label %bb2, label %bb1, !prof !15
+
+bb1:
+  %sum3 = add i32 %sum2, 44
+  %v11 = and i32 %i0, 8
+  %v12 = icmp eq i32 %v11, 0
+  %v13 = add i32 %sum3, 44
+  %sum4 = select i1 %v12, i32 %sum3, i32 %v13, !prof !15
+  br label %bb2
+
+bb2:
+  %sum5 = phi i32 [ %sum2, %bb0 ], [ %sum4, %bb1 ]
+  br label %bb3
+
+bb3:
+  %sum6 = phi i32 [ %sum0, %entry ], [ %sum5, %bb2 ]
+  ret i32 %sum6
+}
+
+; Selects + Brs, the branch condition can't be hoisted to be merged with a
+; select. No CHR happens.
+; Roughly,
+; i0 = *i
+; sum = ((i0 & 2) == 0) ? sum0 : (sum0 + 43)  // Likely false
+; foo();
+; j0 = *j
+; if ((j0 & 4) != 0) { // Likely true
+;   foo();
+;   sum = sum + 44
+; }
+; return sum
+; ->
+; (no change)
+define i32 @test_chr_7(i32* %i, i32* %j, i32 %sum0) !prof !14 {
+; CHECK-LABEL: @test_chr_7(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[I0:%.*]] = load i32, i32* [[I:%.*]], align 4
+; CHECK-NEXT:    [[V3:%.*]] = and i32 [[I0]], 2
+; CHECK-NEXT:    [[V4:%.*]] = icmp eq i32 [[V3]], 0
+; CHECK-NEXT:    [[V8:%.*]] = add i32 [[SUM0:%.*]], 43
+; CHECK-NEXT:    [[SUM2:%.*]] = select i1 [[V4]], i32 [[SUM0]], i32 [[V8]], !prof !16
+; CHECK-NEXT:    call void @foo()
+; CHECK-NEXT:    [[J0:%.*]] = load i32, i32* [[J:%.*]], align 4
+; CHECK-NEXT:    [[V9:%.*]] = and i32 [[J0]], 4
+; CHECK-NEXT:    [[V10:%.*]] = icmp eq i32 [[V9]], 0
+; CHECK-NEXT:    br i1 [[V10]], label [[BB2:%.*]], label [[BB1:%.*]], !prof !16
+; CHECK:       bb1:
+; CHECK-NEXT:    call void @foo()
+; CHECK-NEXT:    [[SUM4:%.*]] = add i32 [[SUM2]], 44
+; CHECK-NEXT:    br label [[BB2]]
+; CHECK:       bb2:
+; CHECK-NEXT:    [[SUM5:%.*]] = phi i32 [ [[SUM2]], [[ENTRY:%.*]] ], [ [[SUM4]], [[BB1]] ]
+; CHECK-NEXT:    ret i32 [[SUM5]]
+;
+entry:
+  %i0 = load i32, i32* %i
+  %v3 = and i32 %i0, 2
+  %v4 = icmp eq i32 %v3, 0
+  %v8 = add i32 %sum0, 43
+  %sum2 = select i1 %v4, i32 %sum0, i32 %v8, !prof !15
+  call void @foo()
+  %j0 = load i32, i32* %j
+  %v9 = and i32 %j0, 4
+  %v10 = icmp eq i32 %v9, 0
+  br i1 %v10, label %bb2, label %bb1, !prof !15    ; %v10 can't be hoisted above the above select
+
+bb1:
+  call void @foo()
+  %sum4 = add i32 %sum2, 44
+  br label %bb2
+
+bb2:
+  %sum5 = phi i32 [ %sum2, %entry ], [ %sum4, %bb1 ]
+  ret i32 %sum5
+}
+
+; Selects + Brs, the branch condition can't be hoisted to be merged with the
+; selects. Dropping the select.
+; Roughly,
+; i0 = *i
+; sum = ((i0 & 2) == 0) ? sum0 : (sum0 + 43)  // Likely false
+; foo();
+; j0 = *j
+; if ((j0 & 4) != 0) // Likely true
+;   foo()
+; if ((j0 & 8) != 0) // Likely true
+;   foo()
+; return sum
+; ->
+; i0 = *i
+; sum = ((i0 & 2) == 0) ? sum0 : (sum0 + 43)  // Likely false
+; foo();
+; j0 = *j
+; if ((j0 & 12) != 12) { // Likely true
+;   foo()
+;   foo()
+; } else {
+;   if ((j0 & 4) != 0)
+;     foo()
+;   if ((j0 & 8) != 0)
+;     foo()
+; }
+; return sum
+define i32 @test_chr_7_1(i32* %i, i32* %j, i32 %sum0) !prof !14 {
+; CHECK-LABEL: @test_chr_7_1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[I0:%.*]] = load i32, i32* [[I:%.*]], align 4
+; CHECK-NEXT:    [[V3:%.*]] = and i32 [[I0]], 2
+; CHECK-NEXT:    [[V4:%.*]] = icmp eq i32 [[V3]], 0
+; CHECK-NEXT:    [[V8:%.*]] = add i32 [[SUM0:%.*]], 43
+; CHECK-NEXT:    [[SUM2:%.*]] = select i1 [[V4]], i32 [[SUM0]], i32 [[V8]], !prof !16
+; CHECK-NEXT:    call void @foo()
+; CHECK-NEXT:    [[J0:%.*]] = load i32, i32* [[J:%.*]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = and i32 [[J0]], 12
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 12
+; CHECK-NEXT:    br i1 [[TMP1]], label [[BB0:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15
+; CHECK:       bb0:
+; CHECK-NEXT:    call void @foo()
+; CHECK-NEXT:    call void @foo()
+; CHECK-NEXT:    br label [[BB3:%.*]]
+; CHECK:       entry.split.nonchr:
+; CHECK-NEXT:    [[V9:%.*]] = and i32 [[J0]], 4
+; CHECK-NEXT:    [[V10:%.*]] = icmp eq i32 [[V9]], 0
+; CHECK-NEXT:    br i1 [[V10]], label [[BB1_NONCHR:%.*]], label [[BB0_NONCHR:%.*]], !prof !16
+; CHECK:       bb0.nonchr:
+; CHECK-NEXT:    call void @foo()
+; CHECK-NEXT:    br label [[BB1_NONCHR]]
+; CHECK:       bb1.nonchr:
+; CHECK-NEXT:    [[V11_NONCHR:%.*]] = and i32 [[J0]], 8
+; CHECK-NEXT:    [[V12_NONCHR:%.*]] = icmp eq i32 [[V11_NONCHR]], 0
+; CHECK-NEXT:    br i1 [[V12_NONCHR]], label [[BB3]], label [[BB2_NONCHR:%.*]], !prof !16
+; CHECK:       bb2.nonchr:
+; CHECK-NEXT:    call void @foo()
+; CHECK-NEXT:    br label [[BB3]]
+; CHECK:       bb3:
+; CHECK-NEXT:    ret i32 [[SUM2]]
+;
+entry:
+  %i0 = load i32, i32* %i
+  %v3 = and i32 %i0, 2
+  %v4 = icmp eq i32 %v3, 0
+  %v8 = add i32 %sum0, 43
+  %sum2 = select i1 %v4, i32 %sum0, i32 %v8, !prof !15
+  call void @foo()
+  %j0 = load i32, i32* %j
+  %v9 = and i32 %j0, 4
+  %v10 = icmp eq i32 %v9, 0
+  br i1 %v10, label %bb1, label %bb0, !prof !15    ; %v10 can't be hoisted above the above select
+
+bb0:
+  call void @foo()
+  br label %bb1
+
+bb1:
+  %v11 = and i32 %j0, 8
+  %v12 = icmp eq i32 %v11, 0
+  br i1 %v12, label %bb3, label %bb2, !prof !15
+
+bb2:
+  call void @foo()
+  br label %bb3
+
+bb3:
+  ret i32 %sum2
+}
+
+; Branches aren't biased enough. No CHR happens.
+; Roughly,
+; t0 = *i
+; if ((t0 & 1) != 0) // Not biased
+;   foo()
+; if ((t0 & 2) != 0) // Not biased
+;   foo()
+; ->
+; (no change)
+define void @test_chr_8(i32* %i) !prof !14 {
+; CHECK-LABEL: @test_chr_8(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[TMP0]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT:    br i1 [[TMP2]], label [[BB1:%.*]], label [[BB0:%.*]], !prof !17
+; CHECK:       bb0:
+; CHECK-NEXT:    call void @foo()
+; CHECK-NEXT:    br label [[BB1]]
+; CHECK:       bb1:
+; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[TMP0]], 2
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 0
+; CHECK-NEXT:    br i1 [[TMP4]], label [[BB3:%.*]], label [[BB2:%.*]], !prof !17
+; CHECK:       bb2:
+; CHECK-NEXT:    call void @foo()
+; CHECK-NEXT:    br label [[BB3]]
+; CHECK:       bb3:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = load i32, i32* %i
+  %1 = and i32 %0, 1
+  %2 = icmp eq i32 %1, 0
+  br i1 %2, label %bb1, label %bb0, !prof !16
+
+bb0:
+  call void @foo()
+  br label %bb1
+
+bb1:
+  %3 = and i32 %0, 2
+  %4 = icmp eq i32 %3, 0
+  br i1 %4, label %bb3, label %bb2, !prof !16
+
+bb2:
+  call void @foo()
+  br label %bb3
+
+bb3:
+  ret void
+}
+
+; With an existing phi at the exit.
+; Roughly,
+; t = *i
+; if ((t0 & 1) != 0) // Likely true
+;   foo()
+; if ((t0 & 2) != 0) { // Likely true
+;   t = *j
+;   foo()
+; }
+; // There's a phi for t here.
+; return t
+; ->
+; t = *i
+; if ((t & 3) == 3) { // Likely true
+;   foo()
+;   t = *j
+;   foo()
+; } else {
+;   if ((t & 1) != 0)
+;     foo()
+;   if ((t & 2) != 0) {
+;     t = *j
+;     foo()
+;   }
+; }
+; // There's a phi for t here.
+; return t
+define i32 @test_chr_9(i32* %i, i32* %j) !prof !14 {
+; CHECK-LABEL: @test_chr_9(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[TMP0]], 3
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 3
+; CHECK-NEXT:    br i1 [[TMP2]], label [[BB0:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15
+; CHECK:       bb0:
+; CHECK-NEXT:    call void @foo()
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[J:%.*]], align 4
+; CHECK-NEXT:    call void @foo()
+; CHECK-NEXT:    br label [[BB3:%.*]]
+; CHECK:       entry.split.nonchr:
+; CHECK-NEXT:    [[TMP4:%.*]] = and i32 [[TMP0]], 1
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
+; CHECK-NEXT:    br i1 [[TMP5]], label [[BB1_NONCHR:%.*]], label [[BB0_NONCHR:%.*]], !prof !16
+; CHECK:       bb0.nonchr:
+; CHECK-NEXT:    call void @foo()
+; CHECK-NEXT:    br label [[BB1_NONCHR]]
+; CHECK:       bb1.nonchr:
+; CHECK-NEXT:    [[TMP6:%.*]] = and i32 [[TMP0]], 2
+; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 0
+; CHECK-NEXT:    br i1 [[TMP7]], label [[BB3]], label [[BB2_NONCHR:%.*]], !prof !16
+; CHECK:       bb2.nonchr:
+; CHECK-NEXT:    [[TMP8:%.*]] = load i32, i32* [[J]], align 4
+; CHECK-NEXT:    call void @foo()
+; CHECK-NEXT:    br label [[BB3]]
+; CHECK:       bb3:
+; CHECK-NEXT:    [[TMP9:%.*]] = phi i32 [ [[TMP3]], [[BB0]] ], [ [[TMP0]], [[BB1_NONCHR]] ], [ [[TMP8]], [[BB2_NONCHR]] ]
+; CHECK-NEXT:    ret i32 [[TMP9]]
+;
+entry:
+  %0 = load i32, i32* %i
+  %1 = and i32 %0, 1
+  %2 = icmp eq i32 %1, 0
+  br i1 %2, label %bb1, label %bb0, !prof !15
+
+bb0:
+  call void @foo()
+  br label %bb1
+
+bb1:
+  %3 = and i32 %0, 2
+  %4 = icmp eq i32 %3, 0
+  br i1 %4, label %bb3, label %bb2, !prof !15
+
+bb2:
+  %5 = load i32, i32* %j
+  call void @foo()
+  br label %bb3
+
+bb3:
+  %6 = phi i32 [ %0, %bb1 ], [ %5, %bb2 ]
+  ret i32 %6
+}
+
+; With no phi at the exit, but the exit needs a phi inserted after CHR.
+; Roughly,
+; t0 = *i
+; if ((t0 & 1) != 0) // Likely true
+;   foo()
+; t1 = *j
+; if ((t1 & 2) != 0) // Likely true
+;   foo()
+; return (t1 * 42) - (t1 - 99)
+; ->
+; t0 = *i
+; if ((t0 & 3) == 3) { // Likely true
+;   foo()
+;   t1 = *j
+;   foo()
+; } else {
+;   if ((t0 & 1) != 0)
+;     foo()
+;   if ((t0 & 2) != 0) {
+;     t1 = *j
+;     foo()
+;   }
+; }
+; // A new phi for t1 is inserted here.
+; return (t1 * 42) - (t1 - 99)
+define i32 @test_chr_10(i32* %i, i32* %j) !prof !14 {
+; CHECK-LABEL: @test_chr_10(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[TMP0]], 3
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 3
+; CHECK-NEXT:    br i1 [[TMP2]], label [[BB0:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15
+; CHECK:       bb0:
+; CHECK-NEXT:    call void @foo()
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[J:%.*]], align 4
+; CHECK-NEXT:    call void @foo()
+; CHECK-NEXT:    br label [[BB3:%.*]]
+; CHECK:       entry.split.nonchr:
+; CHECK-NEXT:    [[TMP4:%.*]] = and i32 [[TMP0]], 1
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
+; CHECK-NEXT:    br i1 [[TMP5]], label [[BB1_NONCHR:%.*]], label [[BB0_NONCHR:%.*]], !prof !16
+; CHECK:       bb0.nonchr:
+; CHECK-NEXT:    call void @foo()
+; CHECK-NEXT:    br label [[BB1_NONCHR]]
+; CHECK:       bb1.nonchr:
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[J]], align 4
+; CHECK-NEXT:    [[TMP7:%.*]] = and i32 [[TMP0]], 2
+; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0
+; CHECK-NEXT:    br i1 [[TMP8]], label [[BB3]], label [[BB2_NONCHR:%.*]], !prof !16
+; CHECK:       bb2.nonchr:
+; CHECK-NEXT:    call void @foo()
+; CHECK-NEXT:    br label [[BB3]]
+; CHECK:       bb3:
+; CHECK-NEXT:    [[TMP9:%.*]] = phi i32 [ [[TMP3]], [[BB0]] ], [ [[TMP6]], [[BB2_NONCHR]] ], [ [[TMP6]], [[BB1_NONCHR]] ]
+; CHECK-NEXT:    [[TMP10:%.*]] = mul i32 [[TMP9]], 42
+; CHECK-NEXT:    [[TMP11:%.*]] = add i32 [[TMP9]], -99
+; CHECK-NEXT:    [[TMP12:%.*]] = add i32 [[TMP10]], [[TMP11]]
+; CHECK-NEXT:    ret i32 [[TMP12]]
+;
+entry:
+  %0 = load i32, i32* %i
+  %1 = and i32 %0, 1
+  %2 = icmp eq i32 %1, 0
+  br i1 %2, label %bb1, label %bb0, !prof !15
+
+bb0:
+  call void @foo()
+  br label %bb1
+
+bb1:
+  %3 = load i32, i32* %j
+  %4 = and i32 %0, 2
+  %5 = icmp eq i32 %4, 0
+  br i1 %5, label %bb3, label %bb2, !prof !15
+
+bb2:
+  call void @foo()
+  br label %bb3
+
+bb3:
+  %6 = mul i32 %3, 42
+  %7 = sub i32 %3, 99
+  %8 = add i32 %6, %7
+  ret i32 %8
+}
+
+; Test a case where there are two use-def chain paths to the same value (t0)
+; from the branch condition. This is a regression test for an old bug that
+; caused a bad hoisting that moves (hoists) a value (%conv) twice to the end of
+; the %entry block (once for %div and once for %mul16) and put a use ahead of
+; its definition like:
+; %entry:
+;   ...
+;   %div = fdiv double 1.000000e+00, %conv
+;   %conv = sitofp i32 %0 to double
+;   %mul16 = fmul double %div, %conv
+;
+; Roughly,
+; t0 = *i
+; if ((t0 & 1) != 0) // Likely true
+;   foo()
+; // there are two use-def paths from the branch condition to t0.
+; if ((1.0 / t0) * t0 < 1) // Likely true
+;   foo()
+; ->
+; t0 = *i
+; if ((t0 & 1) != 0 & (1.0 / t0) * t0 > 0) { // Likely true
+;   foo()
+;   foo()
+; } else {
+;   if ((t0 & 1) != 0)
+;     foo()
+;   if ((1.0 / t0) * t0 < 1) // Likely true
+;     foo()
+; }
+define void @test_chr_11(i32* %i, i32 %x) !prof !14 {
+; CHECK-LABEL: @test_chr_11(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[TMP0]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP0]] to double
+; CHECK-NEXT:    [[DIV:%.*]] = fdiv double 1.000000e+00, [[CONV]]
+; CHECK-NEXT:    [[MUL16:%.*]] = fmul double [[DIV]], [[CONV]]
+; CHECK-NEXT:    [[CONV717:%.*]] = fptosi double [[MUL16]] to i32
+; CHECK-NEXT:    [[CMP18:%.*]] = icmp sgt i32 [[CONV717]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = and i1 [[TMP2]], [[CMP18]]
+; CHECK-NEXT:    br i1 [[TMP3]], label [[BB0:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15
+; CHECK:       bb0:
+; CHECK-NEXT:    call void @foo()
+; CHECK-NEXT:    call void @foo()
+; CHECK-NEXT:    br label [[BB3:%.*]]
+; CHECK:       entry.split.nonchr:
+; CHECK-NEXT:    br i1 [[TMP2]], label [[BB0_NONCHR:%.*]], label [[BB1_NONCHR:%.*]], !prof !18
+; CHECK:       bb0.nonchr:
+; CHECK-NEXT:    call void @foo()
+; CHECK-NEXT:    br label [[BB1_NONCHR]]
+; CHECK:       bb1.nonchr:
+; CHECK-NEXT:    [[CONV_NONCHR:%.*]] = sitofp i32 [[TMP0]] to double
+; CHECK-NEXT:    [[DIV_NONCHR:%.*]] = fdiv double 1.000000e+00, [[CONV_NONCHR]]
+; CHECK-NEXT:    [[MUL16_NONCHR:%.*]] = fmul double [[DIV_NONCHR]], [[CONV_NONCHR]]
+; CHECK-NEXT:    [[CONV717_NONCHR:%.*]] = fptosi double [[MUL16_NONCHR]] to i32
+; CHECK-NEXT:    [[CMP18_NONCHR:%.*]] = icmp slt i32 [[CONV717_NONCHR]], 1
+; CHECK-NEXT:    br i1 [[CMP18_NONCHR]], label [[BB3]], label [[BB2_NONCHR:%.*]], !prof !16
+; CHECK:       bb2.nonchr:
+; CHECK-NEXT:    call void @foo()
+; CHECK-NEXT:    br label [[BB3]]
+; CHECK:       bb3:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = load i32, i32* %i
+  %1 = and i32 %0, 1
+  %2 = icmp eq i32 %1, 0
+  br i1 %2, label %bb1, label %bb0, !prof !15
+
+bb0:
+  call void @foo()
+  br label %bb1
+
+bb1:
+  %conv = sitofp i32 %0 to double
+  %div = fdiv double 1.000000e+00, %conv
+  %mul16 = fmul double %div, %conv
+  %conv717 = fptosi double %mul16 to i32
+  %cmp18 = icmp slt i32 %conv717, 1
+  br i1 %cmp18, label %bb3, label %bb2, !prof !15
+
+bb2:
+  call void @foo()
+  br label %bb3
+
+bb3:
+  ret void
+}
+
+; Selects + unrelated br only
+define i32 @test_chr_12(i32* %i, i32 %sum0) !prof !14 {
+; CHECK-LABEL: @test_chr_12(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[TMP0]], 255
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT:    br i1 [[TMP2]], label [[BB3:%.*]], label [[BB0:%.*]], !prof !16
+; CHECK:       bb0:
+; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[TMP0]], 1
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 0
+; CHECK-NEXT:    [[TMP5:%.*]] = add i32 [[SUM0:%.*]], 42
+; CHECK-NEXT:    [[SUM1:%.*]] = select i1 [[TMP4]], i32 [[SUM0]], i32 [[TMP5]], !prof !16
+; CHECK-NEXT:    [[TMP6:%.*]] = and i32 [[TMP0]], 2
+; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 0
+; CHECK-NEXT:    [[TMP8:%.*]] = add i32 [[SUM1]], 43
+; CHECK-NEXT:    [[SUM2:%.*]] = select i1 [[TMP7]], i32 [[SUM1]], i32 [[TMP8]], !prof !16
+; CHECK-NEXT:    [[TMP9:%.*]] = load i32, i32* [[I]], align 4
+; CHECK-NEXT:    [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
+; CHECK-NEXT:    [[TMP11:%.*]] = and i32 [[TMP0]], 8
+; CHECK-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
+; CHECK-NEXT:    [[TMP13:%.*]] = and i1 [[TMP10]], [[TMP12]]
+; CHECK-NEXT:    br i1 [[TMP13]], label [[BB1:%.*]], label [[BB0_SPLIT_NONCHR:%.*]], !prof !15
+; CHECK:       bb1:
+; CHECK-NEXT:    [[TMP14:%.*]] = add i32 [[SUM2]], 88
+; CHECK-NEXT:    br label [[BB3]]
+; CHECK:       bb0.split.nonchr:
+; CHECK-NEXT:    br i1 [[TMP10]], label [[BB1_NONCHR:%.*]], label [[BB3]], !prof !18
+; CHECK:       bb1.nonchr:
+; CHECK-NEXT:    [[TMP15:%.*]] = and i32 [[TMP0]], 8
+; CHECK-NEXT:    [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 0
+; CHECK-NEXT:    [[SUM4_NONCHR_V:%.*]] = select i1 [[TMP16]], i32 44, i32 88, !prof !16
+; CHECK-NEXT:    [[SUM4_NONCHR:%.*]] = add i32 [[SUM2]], [[SUM4_NONCHR_V]]
+; CHECK-NEXT:    br label [[BB3]]
+; CHECK:       bb3:
+; CHECK-NEXT:    [[SUM6:%.*]] = phi i32 [ [[SUM0]], [[ENTRY:%.*]] ], [ [[TMP14]], [[BB1]] ], [ [[SUM2]], [[BB0_SPLIT_NONCHR]] ], [ [[SUM4_NONCHR]], [[BB1_NONCHR]] ]
+; CHECK-NEXT:    ret i32 [[SUM6]]
+;
+entry:
+  %0 = load i32, i32* %i
+  %1 = and i32 %0, 255
+  %2 = icmp eq i32 %1, 0
+  br i1 %2, label %bb3, label %bb0, !prof !15
+
+bb0:
+  %3 = and i32 %0, 1
+  %4 = icmp eq i32 %3, 0
+  %5 = add i32 %sum0, 42
+  %sum1 = select i1 %4, i32 %sum0, i32 %5, !prof !15
+  %6 = and i32 %0, 2
+  %7 = icmp eq i32 %6, 0
+  %8 = add i32 %sum1, 43
+  %sum2 = select i1 %7, i32 %sum1, i32 %8, !prof !15
+  %9 = load i32, i32* %i
+  %10 = icmp eq i32 %9, 0
+  br i1 %10, label %bb2, label %bb1, !prof !15
+
+bb1:
+  %sum3 = add i32 %sum2, 44
+  %11 = and i32 %0, 8
+  %12 = icmp eq i32 %11, 0
+  %13 = add i32 %sum3, 44
+  %sum4 = select i1 %12, i32 %sum3, i32 %13, !prof !15
+  br label %bb2
+
+bb2:
+  %sum5 = phi i32 [ %sum2, %bb0 ], [ %sum4, %bb1 ]
+  br label %bb3
+
+bb3:
+  %sum6 = phi i32 [ %sum0, %entry ], [ %sum5, %bb2 ]
+  ret i32 %sum6
+}
+
+; In the second CHR, a condition value depends on a trivial phi that's inserted
+; by the first CHR.
+; Roughly,
+; i0 = *i
+; v2 = (z != 1) ? pred : true  // Likely false
+; if (z == 0 & pred)  // Likely false
+;   foo()
+; j0 = *j
+; sum2 = ((i0 & 2) == j0) ? sum0 : (sum0 + 43) // Likely false
+; sum3 = ((i0 == j0) ? sum0 : (sum0 + 43) // Likely false
+; foo()
+; if ((i0 & 4) == 0) // Unbiased
+;   foo()
+; return i0 + sum3
+; ->
+; i0 = *i
+; if (z != 1 & (z == 0 & pred)) // First CHR
+;   foo()
+; // A trivial phi for i0 is inserted here by the first CHR (which gets removed
+; // later) and the subsequent branch condition (for the second CHR) uses it.
+; j0 = *j
+; if ((i0 & 2) != j0 & i0 != j0) {  // Second CHR
+;   sum3 = sum0 + 43
+;   foo()
+;   if (i0 & 4) == 0)
+;     foo()
+; } else {
+;   sum3 = (i0 == j0) ? sum0 : (sum0 + 43)
+;   foo()
+;   if (i0 & 4) == 0)
+;     foo()
+; }
+; return i0 + sum3
+define i32 @test_chr_14(i32* %i, i32* %j, i32 %sum0, i1 %pred, i32 %z) !prof !14 {
+; CHECK-LABEL: @test_chr_14(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[I0:%.*]] = load i32, i32* [[I:%.*]], align 4
+; CHECK-NEXT:    [[V1:%.*]] = icmp ne i32 [[Z:%.*]], 1
+; CHECK-NEXT:    [[V0:%.*]] = icmp eq i32 [[Z]], 0
+; CHECK-NEXT:    [[V3_NONCHR:%.*]] = and i1 [[V0]], [[PRED:%.*]]
+; CHECK-NEXT:    [[OR_COND:%.*]] = and i1 [[V1]], [[V3_NONCHR]]
+; CHECK-NEXT:    br i1 [[OR_COND]], label [[BB0_NONCHR:%.*]], label [[BB1:%.*]], !prof !19
+; CHECK:       bb0.nonchr:
+; CHECK-NEXT:    call void @foo()
+; CHECK-NEXT:    br label [[BB1]]
+; CHECK:       bb1:
+; CHECK-NEXT:    [[J0:%.*]] = load i32, i32* [[J:%.*]], align 4
+; CHECK-NEXT:    [[V6:%.*]] = and i32 [[I0]], 2
+; CHECK-NEXT:    [[V4:%.*]] = icmp ne i32 [[V6]], [[J0]]
+; CHECK-NEXT:    [[V8:%.*]] = add i32 [[SUM0:%.*]], 43
+; CHECK-NEXT:    [[V5:%.*]] = icmp ne i32 [[I0]], [[J0]]
+; CHECK-NEXT:    [[TMP0:%.*]] = and i1 [[V4]], [[V5]]
+; CHECK-NEXT:    br i1 [[TMP0]], label [[BB1_SPLIT:%.*]], label [[BB1_SPLIT_NONCHR:%.*]], !prof !15
+; CHECK:       bb1.split:
+; CHECK-NEXT:    call void @foo()
+; CHECK-NEXT:    [[V9:%.*]] = and i32 [[I0]], 4
+; CHECK-NEXT:    [[V10:%.*]] = icmp eq i32 [[V9]], 0
+; CHECK-NEXT:    br i1 [[V10]], label [[BB3:%.*]], label [[BB2:%.*]]
+; CHECK:       bb2:
+; CHECK-NEXT:    call void @foo()
+; CHECK-NEXT:    br label [[BB3]]
+; CHECK:       bb1.split.nonchr:
+; CHECK-NEXT:    [[V5_NONCHR:%.*]] = icmp eq i32 [[I0]], [[J0]]
+; CHECK-NEXT:    [[SUM3_NONCHR:%.*]] = select i1 [[V5_NONCHR]], i32 [[SUM0]], i32 [[V8]], !prof !16
+; CHECK-NEXT:    call void @foo()
+; CHECK-NEXT:    [[V9_NONCHR:%.*]] = and i32 [[I0]], 4
+; CHECK-NEXT:    [[V10_NONCHR:%.*]] = icmp eq i32 [[V9_NONCHR]], 0
+; CHECK-NEXT:    br i1 [[V10_NONCHR]], label [[BB3]], label [[BB2_NONCHR:%.*]]
+; CHECK:       bb2.nonchr:
+; CHECK-NEXT:    call void @foo()
+; CHECK-NEXT:    br label [[BB3]]
+; CHECK:       bb3:
+; CHECK-NEXT:    [[TMP1:%.*]] = phi i32 [ [[V8]], [[BB2]] ], [ [[V8]], [[BB1_SPLIT]] ], [ [[SUM3_NONCHR]], [[BB2_NONCHR]] ], [ [[SUM3_NONCHR]], [[BB1_SPLIT_NONCHR]] ]
+; CHECK-NEXT:    [[V11:%.*]] = add i32 [[I0]], [[TMP1]]
+; CHECK-NEXT:    ret i32 [[V11]]
+;
+entry:
+  %i0 = load i32, i32* %i
+  %v0 = icmp eq i32 %z, 0
+  %v1 = icmp ne i32 %z, 1
+  %v2 = select i1 %v1, i1 %pred, i1 true, !prof !15
+  %v3 = and i1 %v0, %pred
+  br i1 %v3, label %bb0, label %bb1, !prof !15
+
+bb0:
+  call void @foo()
+  br label %bb1
+
+bb1:
+  %j0 = load i32, i32* %j
+  %v6 = and i32 %i0, 2
+  %v4 = icmp eq i32 %v6, %j0
+  %v8 = add i32 %sum0, 43
+  %sum2 = select i1 %v4, i32 %sum0, i32 %v8, !prof !15
+  %v5 = icmp eq i32 %i0, %j0
+  %sum3 = select i1 %v5, i32 %sum0, i32 %v8, !prof !15
+  call void @foo()
+  %v9 = and i32 %i0, 4
+  %v10 = icmp eq i32 %v9, 0
+  br i1 %v10, label %bb3, label %bb2
+
+bb2:
+  call void @foo()
+  br label %bb3
+
+bb3:
+  %v11 = add i32 %i0, %sum3
+  ret i32 %v11
+}
+
+; Branch or selects depends on another select. No CHR happens.
+; Roughly,
+; i0 = *i
+; if (z == 0 & ((z != 1) ? pred : true)) { // Likely false
+;   foo()
+; j0 = *j
+; sum2 = ((i0 & 2) == j0) ? sum0 : (sum0 + 43) // Likely false
+; sum3 = (i0 == sum2) ? sum2 : (sum0 + 43) // Likely false. This depends on the
+;                                          // previous select.
+; foo()
+; if ((i0 & 4) == 0) // Unbiased
+;   foo()
+; return i0 + sum3
+; ->
+; (no change)
+define i32 @test_chr_15(i32* %i, i32* %j, i32 %sum0, i1 %pred, i32 %z) !prof !14 {
+; CHECK-LABEL: @test_chr_15(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[I0:%.*]] = load i32, i32* [[I:%.*]], align 4
+; CHECK-NEXT:    [[V0:%.*]] = icmp eq i32 [[Z:%.*]], 0
+; CHECK-NEXT:    [[V3:%.*]] = and i1 [[V0]], [[PRED:%.*]]
+; CHECK-NEXT:    br i1 [[V3]], label [[BB0:%.*]], label [[BB1:%.*]], !prof !16
+; CHECK:       bb0:
+; CHECK-NEXT:    call void @foo()
+; CHECK-NEXT:    br label [[BB1]]
+; CHECK:       bb1:
+; CHECK-NEXT:    [[J0:%.*]] = load i32, i32* [[J:%.*]], align 4
+; CHECK-NEXT:    [[V6:%.*]] = and i32 [[I0]], 2
+; CHECK-NEXT:    [[V4:%.*]] = icmp eq i32 [[V6]], [[J0]]
+; CHECK-NEXT:    [[V8:%.*]] = add i32 [[SUM0:%.*]], 43
+; CHECK-NEXT:    [[SUM2:%.*]] = select i1 [[V4]], i32 [[SUM0]], i32 [[V8]], !prof !16
+; CHECK-NEXT:    [[V5:%.*]] = icmp eq i32 [[I0]], [[SUM2]]
+; CHECK-NEXT:    [[SUM3:%.*]] = select i1 [[V5]], i32 [[SUM2]], i32 [[V8]], !prof !16
+; CHECK-NEXT:    call void @foo()
+; CHECK-NEXT:    [[V9:%.*]] = and i32 [[I0]], 4
+; CHECK-NEXT:    [[V10:%.*]] = icmp eq i32 [[V9]], 0
+; CHECK-NEXT:    br i1 [[V10]], label [[BB3:%.*]], label [[BB2:%.*]]
+; CHECK:       bb2:
+; CHECK-NEXT:    call void @foo()
+; CHECK-NEXT:    br label [[BB3]]
+; CHECK:       bb3:
+; CHECK-NEXT:    [[V11:%.*]] = add i32 [[I0]], [[SUM3]]
+; CHECK-NEXT:    ret i32 [[V11]]
+;
+entry:
+  %i0 = load i32, i32* %i
+  %v0 = icmp eq i32 %z, 0
+  %v1 = icmp ne i32 %z, 1
+  %v2 = select i1 %v1, i1 %pred, i1 true, !prof !15
+  %v3 = and i1 %v0, %v2
+  br i1 %v3, label %bb0, label %bb1, !prof !15
+
+bb0:
+  call void @foo()
+  br label %bb1
+
+bb1:
+  %j0 = load i32, i32* %j
+  %v6 = and i32 %i0, 2
+  %v4 = icmp eq i32 %v6, %j0
+  %v8 = add i32 %sum0, 43
+  %sum2 = select i1 %v4, i32 %sum0, i32 %v8, !prof !15
+  %v5 = icmp eq i32 %i0, %sum2
+  %sum3 = select i1 %v5, i32 %sum2, i32 %v8, !prof !15
+  call void @foo()
+  %v9 = and i32 %i0, 4
+  %v10 = icmp eq i32 %v9, 0
+  br i1 %v10, label %bb3, label %bb2
+
+bb2:
+  call void @foo()
+  br label %bb3
+
+bb3:
+  %v11 = add i32 %i0, %sum3
+  ret i32 %v11
+}
+
+; With an existing phi at the exit but a value (%v40) is both alive and is an
+; operand to a phi at the exit block.
+; Roughly,
+; t0 = *i
+; if ((t0 & 1) != 0) // Likely true
+;   foo()
+; v40 = t0 + 44
+; if ((t0 & 2) != 0) // Likely true
+;   v41 = t0 + 99
+;   foo()
+; }
+; v42 = phi v40, v41
+; return v42 + v40
+; ->
+; t0 = *i
+; if ((t0 & 3) == 3) // Likely true
+;   foo()
+;   v40 = t0 + 44
+;   v41 = t0 + 99
+;   foo()
+; } else {
+;   if ((t0 & 1) != 0) // Likely true
+;     foo()
+;   v40_nc = t0 + 44
+;   if ((t0 & 2) != 0) // Likely true
+;     v41_nc = t0 + 99
+;     foo()
+;   }
+; }
+; t7 = phi v40, v40_nc
+; v42 = phi v41, v41_nc
+; v43 = v42 + t7
+; return v43
+define i32 @test_chr_16(i32* %i) !prof !14 {
+; CHECK-LABEL: @test_chr_16(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[TMP0]], 3
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 3
+; CHECK-NEXT:    br i1 [[TMP2]], label [[BB0:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15
+; CHECK:       bb0:
+; CHECK-NEXT:    call void @foo()
+; CHECK-NEXT:    [[V40:%.*]] = add i32 [[TMP0]], 44
+; CHECK-NEXT:    [[V41:%.*]] = add i32 [[TMP0]], 99
+; CHECK-NEXT:    call void @foo()
+; CHECK-NEXT:    br label [[BB3:%.*]]
+; CHECK:       entry.split.nonchr:
+; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[TMP0]], 1
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 0
+; CHECK-NEXT:    br i1 [[TMP4]], label [[BB1_NONCHR:%.*]], label [[BB0_NONCHR:%.*]], !prof !16
+; CHECK:       bb0.nonchr:
+; CHECK-NEXT:    call void @foo()
+; CHECK-NEXT:    br label [[BB1_NONCHR]]
+; CHECK:       bb1.nonchr:
+; CHECK-NEXT:    [[V40_NONCHR:%.*]] = add i32 [[TMP0]], 44
+; CHECK-NEXT:    [[TMP5:%.*]] = and i32 [[TMP0]], 2
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[TMP5]], 0
+; CHECK-NEXT:    br i1 [[TMP6]], label [[BB3]], label [[BB2_NONCHR:%.*]], !prof !16
+; CHECK:       bb2.nonchr:
+; CHECK-NEXT:    [[V41_NONCHR:%.*]] = add i32 [[TMP0]], 99
+; CHECK-NEXT:    call void @foo()
+; CHECK-NEXT:    br label [[BB3]]
+; CHECK:       bb3:
+; CHECK-NEXT:    [[TMP7:%.*]] = phi i32 [ [[V40]], [[BB0]] ], [ [[V40_NONCHR]], [[BB2_NONCHR]] ], [ [[V40_NONCHR]], [[BB1_NONCHR]] ]
+; CHECK-NEXT:    [[V42:%.*]] = phi i32 [ [[V41]], [[BB0]] ], [ [[V41_NONCHR]], [[BB2_NONCHR]] ], [ [[V40_NONCHR]], [[BB1_NONCHR]] ]
+; CHECK-NEXT:    [[V43:%.*]] = add i32 [[V42]], [[TMP7]]
+; CHECK-NEXT:    ret i32 [[V43]]
+;
+entry:
+  %0 = load i32, i32* %i
+  %1 = and i32 %0, 1
+  %2 = icmp eq i32 %1, 0
+  br i1 %2, label %bb1, label %bb0, !prof !15
+
+bb0:
+  call void @foo()
+  br label %bb1
+
+bb1:
+  %v40 = add i32 %0, 44
+  %3 = and i32 %0, 2
+  %4 = icmp eq i32 %3, 0
+  br i1 %4, label %bb3, label %bb2, !prof !15
+
+bb2:
+  %v41 = add i32 %0, 99
+  call void @foo()
+  br label %bb3
+
+bb3:
+  %v42 = phi i32 [ %v41, %bb2 ], [ %v40, %bb1 ]
+  %v43 = add i32 %v42, %v40
+  ret i32 %v43
+}
+
+; Two consecutive regions have an entry in the middle of them. No CHR happens.
+; Roughly,
+; if ((i & 4) == 0) {
+;   if (!j)
+;     goto bb1
+; } else {
+;   t0 = (i & 1)
+;   if (t0 != 0) // Likely true
+;     foo()
+;     s = (i & 1) + i
+;   }
+;  bb1:
+;   p = phi i, t0, s
+;   if ((i & 2) != 0) // Likely true
+;     foo()
+;     q = p + 2
+; }
+; r = phi p, q, i
+; return r
+; ->
+; (no change)
+define i32 @test_chr_17(i32 %i, i1 %j) !prof !14 {
+; CHECK-LABEL: @test_chr_17(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[V0:%.*]] = and i32 [[I:%.*]], 4
+; CHECK-NEXT:    [[V1:%.*]] = icmp eq i32 [[V0]], 0
+; CHECK-NEXT:    br i1 [[V1]], label [[BBE:%.*]], label [[BBQ:%.*]]
+; CHECK:       bbq:
+; CHECK-NEXT:    br i1 [[J:%.*]], label [[BB3:%.*]], label [[BB1:%.*]]
+; CHECK:       bbe:
+; CHECK-NEXT:    [[TMP0:%.*]] = and i32 [[I]], 1
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0
+; CHECK-NEXT:    br i1 [[TMP1]], label [[BB1]], label [[BB0:%.*]], !prof !16
+; CHECK:       bb0:
+; CHECK-NEXT:    call void @foo()
+; CHECK-NEXT:    [[S:%.*]] = add i32 [[TMP0]], [[I]]
+; CHECK-NEXT:    br label [[BB1]]
+; CHECK:       bb1:
+; CHECK-NEXT:    [[P:%.*]] = phi i32 [ [[I]], [[BBQ]] ], [ [[TMP0]], [[BBE]] ], [ [[S]], [[BB0]] ]
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[I]], 2
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 0
+; CHECK-NEXT:    br i1 [[TMP3]], label [[BB3]], label [[BB2:%.*]], !prof !16
+; CHECK:       bb2:
+; CHECK-NEXT:    call void @foo()
+; CHECK-NEXT:    [[Q:%.*]] = add i32 [[P]], [[TMP2]]
+; CHECK-NEXT:    br label [[BB3]]
+; CHECK:       bb3:
+; CHECK-NEXT:    [[R:%.*]] = phi i32 [ [[P]], [[BB1]] ], [ [[Q]], [[BB2]] ], [ [[I]], [[BBQ]] ]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+entry:
+  %v0 = and i32 %i, 4
+  %v1 = icmp eq i32 %v0, 0
+  br i1 %v1, label %bbe, label %bbq
+
+bbq:
+  br i1 %j, label %bb3, label %bb1
+
+bbe:
+  %0 = and i32 %i, 1
+  %1 = icmp eq i32 %0, 0
+  br i1 %1, label %bb1, label %bb0, !prof !15
+
+bb0:
+  call void @foo()
+  %s = add i32 %0, %i
+  br label %bb1
+
+bb1:
+  %p = phi i32 [ %i, %bbq ], [ %0, %bbe ], [ %s, %bb0 ]
+  %2 = and i32 %i, 2
+  %3 = icmp eq i32 %2, 0
+  br i1 %3, label %bb3, label %bb2, !prof !15
+
+bb2:
+  call void @foo()
+  %q = add i32 %p, %2
+  br label %bb3
+
+bb3:
+  %r = phi i32 [ %p, %bb1 ], [ %q, %bb2 ], [ %i, %bbq ]
+  ret i32 %r
+}
+
+; Select + br, there's a loop and we need to update the user of an inserted phi
+; at the entry block. This is a regression test for a bug that's fixed.
+; Roughly,
+; do {
+;   inc1 = phi inc2, 0
+;   li = *i
+;   sum1 = sum0 + 42
+;   sum2 = ((li & 1) == 0) ? sum0 : sum1  // Likely false
+;   inc2 = inc1 + 1
+;   if ((li & 4) != 0) // Likely true
+;     sum3 = sum2 + 44
+;   sum4 = phi sum1, sum3
+; } while (inc2 != 100)  // Likely true (loop back)
+; return sum4
+; ->
+; do {
+;   inc1 = phi tmp2, 0  // The first operand needed to be updated
+;   li = *i
+;   sum1 = sum0 + 42
+;   if ((li & 5) == 5) { // Likely true
+;     inc2 = inc1 + 1
+;     sum3 = sum0 + 86
+;   } else {
+;     inc2_nc = inc1 + 1
+;     if ((li & 4) == 0)
+;       sum2_nc = ((li & 1) == 0) ? sum0 : sum1
+;       sum3_nc = sum2_nc + 44
+;     }
+;   tmp2 = phi inc2, in2c_nc
+;   sum4 = phi sum3, sum3_nc, sum1
+; } while (tmp2 != 100)
+; return sum4
+define i32 @test_chr_18(i32* %i, i32 %sum0) !prof !14 {
+; CHECK-LABEL: @test_chr_18(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[BB0:%.*]]
+; CHECK:       bb0:
+; CHECK-NEXT:    [[INC1:%.*]] = phi i32 [ [[TMP2:%.*]], [[BB2:%.*]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[LI:%.*]] = load i32, i32* [[I:%.*]], align 4
+; CHECK-NEXT:    [[SUM1:%.*]] = add i32 [[SUM0:%.*]], 42
+; CHECK-NEXT:    [[TMP0:%.*]] = and i32 [[LI]], 5
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 5
+; CHECK-NEXT:    br i1 [[TMP1]], label [[BB0_SPLIT:%.*]], label [[BB0_SPLIT_NONCHR:%.*]], !prof !15
+; CHECK:       bb0.split:
+; CHECK-NEXT:    [[INC2:%.*]] = add i32 [[INC1]], 1
+; CHECK-NEXT:    [[SUM3:%.*]] = add i32 [[SUM0]], 86
+; CHECK-NEXT:    br label [[BB2]]
+; CHECK:       bb0.split.nonchr:
+; CHECK-NEXT:    [[A4_NONCHR:%.*]] = and i32 [[LI]], 4
+; CHECK-NEXT:    [[CMP4_NONCHR:%.*]] = icmp eq i32 [[A4_NONCHR]], 0
+; CHECK-NEXT:    [[INC2_NONCHR:%.*]] = add i32 [[INC1]], 1
+; CHECK-NEXT:    br i1 [[CMP4_NONCHR]], label [[BB2]], label [[BB1_NONCHR:%.*]], !prof !16
+; CHECK:       bb1.nonchr:
+; CHECK-NEXT:    [[A1:%.*]] = and i32 [[LI]], 1
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i32 [[A1]], 0
+; CHECK-NEXT:    [[SUM2_NONCHR:%.*]] = select i1 [[CMP1]], i32 [[SUM0]], i32 [[SUM1]], !prof !16
+; CHECK-NEXT:    [[SUM3_NONCHR:%.*]] = add i32 [[SUM2_NONCHR]], 44
+; CHECK-NEXT:    br label [[BB2]]
+; CHECK:       bb2:
+; CHECK-NEXT:    [[TMP2]] = phi i32 [ [[INC2]], [[BB0_SPLIT]] ], [ [[INC2_NONCHR]], [[BB1_NONCHR]] ], [ [[INC2_NONCHR]], [[BB0_SPLIT_NONCHR]] ]
+; CHECK-NEXT:    [[SUM4:%.*]] = phi i32 [ [[SUM3]], [[BB0_SPLIT]] ], [ [[SUM3_NONCHR]], [[BB1_NONCHR]] ], [ [[SUM1]], [[BB0_SPLIT_NONCHR]] ]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP2]], 100
+; CHECK-NEXT:    br i1 [[CMP]], label [[BB3:%.*]], label [[BB0]], !prof !16
+; CHECK:       bb3:
+; CHECK-NEXT:    ret i32 [[SUM4]]
+;
+entry:
+  br label %bb0
+
+bb0:
+  %inc1 = phi i32 [ %inc2, %bb2 ], [ 0, %entry ]
+  %li = load i32, i32* %i
+  %a1 = and i32 %li, 1
+  %cmp1 = icmp eq i32 %a1, 0
+  %sum1 = add i32 %sum0, 42
+  %sum2 = select i1 %cmp1, i32 %sum0, i32 %sum1, !prof !15
+  %a4 = and i32 %li, 4
+  %cmp4 = icmp eq i32 %a4, 0
+  %inc2 = add i32 %inc1, 1
+  br i1 %cmp4, label %bb2, label %bb1, !prof !15
+
+bb1:
+  %sum3 = add i32 %sum2, 44
+  br label %bb2
+
+bb2:
+  %sum4 = phi i32 [ %sum1, %bb0 ], [ %sum3, %bb1 ]
+  %cmp = icmp eq i32 %inc2, 100
+  br i1 %cmp, label %bb3, label %bb0, !prof !15
+
+bb3:
+  ret i32 %sum4
+}
+
+
+; Selects + Brs. Those share the condition value, which causes the
+; targets/operands of the branch/select to be flipped.
+; Roughly,
+; t0 = *i
+; if ((t0 & 255) != 0) {  // Likely true
+;   sum1 = ((t0 & 1) == 0) ? sum0 : (sum0 + 42)  // Likely false
+;   sum2 = ((t0 & 1) == 0) ? sum1 : (sum1 + 42)  // Likely false
+;   if ((t0 & 1) != 0) { // Likely true
+;     sum3 = sum2 + 44
+;     sum4 = ((t0 & 8) == 0) ? sum3 : (sum3 + 44) // Likely false
+;   }
+;   sum5 = phi sum2, sum4
+; }
+; sum6 = phi sum0, sum5
+; return sum6
+; ->
+; t0 = *i
+; if ((t0 & 9) == 9) { // Likely true
+;   tmp3 = sum0 + 85  // Dead
+;   tmp4 = sum0 + 173
+; } else {
+;   if ((t0 & 255) != 0) {
+;     sum2_nc = ((t0 & 1) == 0) ? sum0 : (sum0 + 85)
+;     sum4_nc_v = ((t0 & 8) == 0) ? 44 : 88
+;     sum4_nc = add sum2_nc + sum4_nc_v
+;   }
+; }
+; sum6 = phi tmp4, sum0, sum2_nc, sum4_nc
+; return sum6
+define i32 @test_chr_19(i32* %i, i32 %sum0) !prof !14 {
+; CHECK-LABEL: @test_chr_19(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[TMP0]], 9
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 9
+; CHECK-NEXT:    br i1 [[TMP2]], label [[BB0:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15
+; CHECK:       bb0:
+; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[SUM0:%.*]], 85
+; CHECK-NEXT:    [[TMP4:%.*]] = add i32 [[SUM0]], 173
+; CHECK-NEXT:    br label [[BB3:%.*]]
+; CHECK:       entry.split.nonchr:
+; CHECK-NEXT:    [[TMP5:%.*]] = and i32 [[TMP0]], 255
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[TMP5]], 0
+; CHECK-NEXT:    br i1 [[TMP6]], label [[BB3]], label [[BB0_NONCHR:%.*]], !prof !16
+; CHECK:       bb0.nonchr:
+; CHECK-NEXT:    [[TMP7:%.*]] = and i32 [[TMP0]], 1
+; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0
+; CHECK-NEXT:    [[TMP9:%.*]] = add i32 [[SUM0]], 85
+; CHECK-NEXT:    [[SUM2_NONCHR:%.*]] = select i1 [[TMP8]], i32 [[SUM0]], i32 [[TMP9]], !prof !16
+; CHECK-NEXT:    br i1 [[TMP8]], label [[BB3]], label [[BB1_NONCHR:%.*]], !prof !16
+; CHECK:       bb1.nonchr:
+; CHECK-NEXT:    [[TMP10:%.*]] = and i32 [[TMP0]], 8
+; CHECK-NEXT:    [[TMP11:%.*]] = icmp eq i32 [[TMP10]], 0
+; CHECK-NEXT:    [[SUM4_NONCHR_V:%.*]] = select i1 [[TMP11]], i32 44, i32 88, !prof !16
+; CHECK-NEXT:    [[SUM4_NONCHR:%.*]] = add i32 [[SUM2_NONCHR]], [[SUM4_NONCHR_V]]
+; CHECK-NEXT:    br label [[BB3]]
+; CHECK:       bb3:
+; CHECK-NEXT:    [[SUM6:%.*]] = phi i32 [ [[TMP4]], [[BB0]] ], [ [[SUM0]], [[ENTRY_SPLIT_NONCHR]] ], [ [[SUM2_NONCHR]], [[BB0_NONCHR]] ], [ [[SUM4_NONCHR]], [[BB1_NONCHR]] ]
+; CHECK-NEXT:    ret i32 [[SUM6]]
+;
+entry:
+  %0 = load i32, i32* %i
+  %1 = and i32 %0, 255
+  %2 = icmp eq i32 %1, 0
+  br i1 %2, label %bb3, label %bb0, !prof !15
+
+bb0:
+  %3 = and i32 %0, 1
+  %4 = icmp eq i32 %3, 0
+  %5 = add i32 %sum0, 42
+  %sum1 = select i1 %4, i32 %sum0, i32 %5, !prof !15
+  %6 = add i32 %sum1, 43
+  %sum2 = select i1 %4, i32 %sum1, i32 %6, !prof !15
+  br i1 %4, label %bb2, label %bb1, !prof !15
+
+bb1:
+  %sum3 = add i32 %sum2, 44
+  %7 = and i32 %0, 8
+  %8 = icmp eq i32 %7, 0
+  %9 = add i32 %sum3, 44
+  %sum4 = select i1 %8, i32 %sum3, i32 %9, !prof !15
+  br label %bb2
+
+bb2:
+  %sum5 = phi i32 [ %sum2, %bb0 ], [ %sum4, %bb1 ]
+  br label %bb3
+
+bb3:
+  %sum6 = phi i32 [ %sum0, %entry ], [ %sum5, %bb2 ]
+  ret i32 %sum6
+}
+
+; Selects. The exit block, which belongs to the top-level region, has a select
+; and causes the top-level region to be the outermost CHR scope with the
+; subscope that includes the entry block with two selects. The outermost CHR
+; scope doesn't see the selects in the entry block as the entry block is in the
+; subscope and incorrectly sets the CHR hoist point to the branch rather than
+; the first select in the entry block and causes the CHR'ed selects ("select i1
+; false...") to incorrectly position above the CHR branch. This is testing
+; against a quirk of how the region analysis handles the entry block.
+; Roughly,
+; i0 = *i
+; sum2 = ((i0 & 2) == 0) ? sum0 : (sum0 + 43) // Likely false
+; sum3 = ((i0 & 4) == 0) ? sum2 : (sum2 + 44) // Likely false
+; if (j)
+;   foo()
+; i5 = *i
+; v13 = (i5 == 44) ? i5 : sum3
+; return v13
+; ->
+; i0 = *i
+; if ((i0 & 6) != 6) { // Likely true
+;   v9 = sum0 + 87
+;   if (j)
+;     foo()
+; } else {
+;   sum2.nc = ((i0 & 2) == 0) ? sum0 : (sum0 + 43)
+;   sum3.nc = ((i0 & 4) == 0) ? sum2.nc : (sum2.nc + 44)
+;   if (j)
+;     foo()
+; }
+; t2 = phi v9, sum3.nc
+; i5 = *i
+; v13 = (i5 == 44) ? 44 : t2
+; return v13
+define i32 @test_chr_20(i32* %i, i32 %sum0, i1 %j) !prof !14 {
+; CHECK-LABEL: @test_chr_20(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[I0:%.*]] = load i32, i32* [[I:%.*]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = and i32 [[I0]], 6
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 6
+; CHECK-NEXT:    br i1 [[TMP1]], label [[ENTRY_SPLIT:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15
+; CHECK:       entry.split:
+; CHECK-NEXT:    [[V9:%.*]] = add i32 [[SUM0:%.*]], 87
+; CHECK-NEXT:    br i1 [[J:%.*]], label [[BB1:%.*]], label [[BB4:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    call void @foo()
+; CHECK-NEXT:    br label [[BB4]]
+; CHECK:       entry.split.nonchr:
+; CHECK-NEXT:    [[V8:%.*]] = add i32 [[SUM0]], 43
+; CHECK-NEXT:    [[V3:%.*]] = and i32 [[I0]], 2
+; CHECK-NEXT:    [[V4:%.*]] = icmp eq i32 [[V3]], 0
+; CHECK-NEXT:    [[SUM2_NONCHR:%.*]] = select i1 [[V4]], i32 [[SUM0]], i32 [[V8]], !prof !16
+; CHECK-NEXT:    [[V6_NONCHR:%.*]] = and i32 [[I0]], 4
+; CHECK-NEXT:    [[V5_NONCHR:%.*]] = icmp eq i32 [[V6_NONCHR]], 0
+; CHECK-NEXT:    [[V9_NONCHR:%.*]] = add i32 [[SUM2_NONCHR]], 44
+; CHECK-NEXT:    [[SUM3_NONCHR:%.*]] = select i1 [[V5_NONCHR]], i32 [[SUM2_NONCHR]], i32 [[V9_NONCHR]], !prof !16
+; CHECK-NEXT:    br i1 [[J]], label [[BB1_NONCHR:%.*]], label [[BB4]]
+; CHECK:       bb1.nonchr:
+; CHECK-NEXT:    call void @foo()
+; CHECK-NEXT:    br label [[BB4]]
+; CHECK:       bb4:
+; CHECK-NEXT:    [[TMP2:%.*]] = phi i32 [ [[V9]], [[BB1]] ], [ [[V9]], [[ENTRY_SPLIT]] ], [ [[SUM3_NONCHR]], [[BB1_NONCHR]] ], [ [[SUM3_NONCHR]], [[ENTRY_SPLIT_NONCHR]] ]
+; CHECK-NEXT:    [[I5:%.*]] = load i32, i32* [[I]], align 4
+; CHECK-NEXT:    [[V12:%.*]] = icmp eq i32 [[I5]], 44
+; CHECK-NEXT:    [[V13:%.*]] = select i1 [[V12]], i32 44, i32 [[TMP2]], !prof !16
+; CHECK-NEXT:    ret i32 [[V13]]
+;
+entry:
+  %i0 = load i32, i32* %i
+  %v3 = and i32 %i0, 2
+  %v4 = icmp eq i32 %v3, 0
+  %v8 = add i32 %sum0, 43
+  %sum2 = select i1 %v4, i32 %sum0, i32 %v8, !prof !15
+  %v6 = and i32 %i0, 4
+  %v5 = icmp eq i32 %v6, 0
+  %v9 = add i32 %sum2, 44
+  %sum3 = select i1 %v5, i32 %sum2, i32 %v9, !prof !15
+  br i1 %j, label %bb1, label %bb4
+
+bb1:
+  call void @foo()
+  br label %bb4
+
+bb4:
+  %i5 = load i32, i32* %i
+  %v12 = icmp eq i32 %i5, 44
+  %v13 = select i1 %v12, i32 %i5, i32 %sum3, !prof !15
+  ret i32 %v13
+}
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"ProfileSummary", !1}
+!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
+!2 = !{!"ProfileFormat", !"InstrProf"}
+!3 = !{!"TotalCount", i64 10000}
+!4 = !{!"MaxCount", i64 10}
+!5 = !{!"MaxInternalCount", i64 1}
+!6 = !{!"MaxFunctionCount", i64 1000}
+!7 = !{!"NumCounts", i64 3}
+!8 = !{!"NumFunctions", i64 3}
+!9 = !{!"DetailedSummary", !10}
+!10 = !{!11, !12, !13}
+!11 = !{i32 10000, i64 100, i32 1}
+!12 = !{i32 999000, i64 100, i32 1}
+!13 = !{i32 999999, i64 1, i32 2}
+
+!14 = !{!"function_entry_count", i64 100}
+!15 = !{!"branch_weights", i32 0, i32 1}
+!16 = !{!"branch_weights", i32 1, i32 1}
+; CHECK: !15 = !{!"branch_weights", i32 1000, i32 0}
+; CHECK: !16 = !{!"branch_weights", i32 0, i32 1}
+; CHECK: !17 = !{!"branch_weights", i32 1, i32 1}
+; CHECK: !18 = !{!"branch_weights", i32 1, i32 0}
+; CHECK: !19 = !{!"branch_weights", i32 0, i32 1000}




More information about the llvm-commits mailing list