[llvm] e8f4e41 - [IRSim][IROutliner] Add support for outlining PHINodes with the rest of the region.

Andrew Litteken via llvm-commits llvm-commits at lists.llvm.org
Tue Jan 25 16:27:17 PST 2022


Author: Andrew Litteken
Date: 2022-01-25T18:25:50-06:00
New Revision: e8f4e41b6bf4627eb075eb7cad88246af5d6777e

URL: https://github.com/llvm/llvm-project/commit/e8f4e41b6bf4627eb075eb7cad88246af5d6777e
DIFF: https://github.com/llvm/llvm-project/commit/e8f4e41b6bf4627eb075eb7cad88246af5d6777e.diff

LOG: [IRSim][IROutliner] Add support for outlining PHINodes with the rest of the region.

We use the same similarity scheme we used for branch instructions for phi nodes, and allow them to be outlined. There is not a lot of special handling needed for these phi nodes when outlining, as they simply act as outputs. The code extractor does not currently allow for non entry blocks within the extracted region to have predecessors, so there are not conflicts to handle with respect to predecessors no longer contained in the function.

Recommit of 515eec3553b02533e9a88ee84bc245d5415163da

Reviewers: paquette

Differential Revision: https://reviews.llvm.org/D106997

Added: 
    llvm/test/Transforms/IROutliner/included-phi-nodes-begin.ll
    llvm/test/Transforms/IROutliner/included-phi-nodes-end.ll
    llvm/test/Transforms/IROutliner/must-capture-all-phi-nodes-begin.ll
    llvm/test/Transforms/IROutliner/must-capture-all-phi-nodes-end.ll
    llvm/test/Transforms/IROutliner/phi-nodes-non-constant.ll
    llvm/test/Transforms/IROutliner/phi-nodes-simple.ll

Modified: 
    llvm/include/llvm/Analysis/IRSimilarityIdentifier.h
    llvm/include/llvm/Transforms/IPO/IROutliner.h
    llvm/lib/Analysis/IRSimilarityIdentifier.cpp
    llvm/lib/Transforms/IPO/IROutliner.cpp
    llvm/test/Transforms/IROutliner/outlining-branches-phi-nodes.ll
    llvm/test/Transforms/IROutliner/outlining-exits-to-phi-node.ll
    llvm/test/Transforms/IROutliner/region-inputs-in-phi-nodes.ll
    llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h b/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h
index 966bf02d128ec..c7759e0bbf0f8 100644
--- a/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h
+++ b/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h
@@ -214,6 +214,16 @@ struct IRInstructionData
   /// function name as a 
diff erentiating parameter.
   void setCalleeName(bool MatchByName = true);
 
+  /// For an IRInstructionData containing a PHINode, finds the
+  /// relative distances from the incoming basic block to the current block by
+  /// taking the 
diff erence of the number assigned to the current basic block
+  /// and the incoming basic block of the branch.
+  ///
+  /// \param BasicBlockToInteger - The mapping of basic blocks to their location
+  /// in the module.
+  void
+  setPHIPredecessors(DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger);
+
   /// Hashes \p Value based on its opcode, types, and operand types.
   /// Two IRInstructionData instances produce the same hash when they perform
   /// the same operation.
@@ -497,8 +507,11 @@ struct IRInstructionMapper {
         return Legal;
       return Illegal;
     }
-    // TODO: Determine a scheme to resolve when the labels are similar enough.
-    InstrType visitPHINode(PHINode &PN) { return Illegal; }
+    InstrType visitPHINode(PHINode &PN) { 
+      if (EnableBranches)
+        return Legal;
+      return Illegal;
+    }
     // TODO: Handle allocas.
     InstrType visitAllocaInst(AllocaInst &AI) { return Illegal; }
     // We exclude variable argument instructions since variable arguments

diff  --git a/llvm/include/llvm/Transforms/IPO/IROutliner.h b/llvm/include/llvm/Transforms/IPO/IROutliner.h
index 9799737a529e3..ed74c8ed0e96c 100644
--- a/llvm/include/llvm/Transforms/IPO/IROutliner.h
+++ b/llvm/include/llvm/Transforms/IPO/IROutliner.h
@@ -342,8 +342,7 @@ class IROutliner {
     bool visitBranchInst(BranchInst &BI) { 
       return EnableBranches;
     }
-    // TODO: Determine a scheme to resolve when the labels are similar enough.
-    bool visitPHINode(PHINode &PN) { return false; }
+    bool visitPHINode(PHINode &PN) { return EnableBranches; }
     // TODO: Handle allocas.
     bool visitAllocaInst(AllocaInst &AI) { return false; }
     // VAArg instructions are not allowed since this could cause 
diff iculty when

diff  --git a/llvm/lib/Analysis/IRSimilarityIdentifier.cpp b/llvm/lib/Analysis/IRSimilarityIdentifier.cpp
index 8c3c0400d6015..0ce3d218eb33a 100644
--- a/llvm/lib/Analysis/IRSimilarityIdentifier.cpp
+++ b/llvm/lib/Analysis/IRSimilarityIdentifier.cpp
@@ -70,6 +70,12 @@ void IRInstructionData::initializeInstruction() {
 
     OperVals.push_back(OI.get());
   }
+
+  // We capture the incoming BasicBlocks as values as well as the incoming
+  // Values in order to check for structural similarity.
+  if (PHINode *PN = dyn_cast<PHINode>(Inst))
+    for (BasicBlock *BB : PN->blocks())
+      OperVals.push_back(BB);
 }
 
 IRInstructionData::IRInstructionData(IRInstructionDataList &IDList)
@@ -108,6 +114,34 @@ void IRInstructionData::setCalleeName(bool MatchByName) {
     CalleeName = CI->getCalledFunction()->getName().str();
 }
 
+void IRInstructionData::setPHIPredecessors(
+    DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger) {
+  assert(isa<PHINode>(Inst) && "Instruction must be phi node");
+
+  PHINode *PN = cast<PHINode>(Inst);
+  DenseMap<BasicBlock *, unsigned>::iterator BBNumIt;
+
+  BBNumIt = BasicBlockToInteger.find(PN->getParent());
+  assert(BBNumIt != BasicBlockToInteger.end() &&
+         "Could not find location for BasicBlock!");
+
+  int CurrentBlockNumber = static_cast<int>(BBNumIt->second);
+
+  // Convert the incoming blocks of the PHINode to an integer value, based on
+  // the relative distances between the current block and the incoming block.
+  for (unsigned Idx = 0; Idx < PN->getNumIncomingValues(); Idx++) {
+    BasicBlock *Incoming = PN->getIncomingBlock(Idx);
+    BBNumIt = BasicBlockToInteger.find(Incoming);
+    assert(BBNumIt != BasicBlockToInteger.end() &&
+           "Could not find number for BasicBlock!");
+    int OtherBlockNumber = static_cast<int>(BBNumIt->second);
+
+    int Relative = OtherBlockNumber - CurrentBlockNumber;
+    RelativeBlockLocations.push_back(Relative);
+    RelativeBlockLocations.push_back(Relative);
+  }
+}
+
 CmpInst::Predicate IRInstructionData::predicateForConsistency(CmpInst *CI) {
   switch (CI->getPredicate()) {
   case CmpInst::FCMP_OGT:
@@ -270,6 +304,9 @@ unsigned IRInstructionMapper::mapToLegalUnsigned(
   if (isa<CallInst>(*It))
     ID->setCalleeName(EnableMatchCallsByName);
 
+  if (isa<PHINode>(*It))
+    ID->setPHIPredecessors(BasicBlockToInteger);
+
   // Add to the instruction list
   bool WasInserted;
   DenseMap<IRInstructionData *, unsigned, IRInstructionDataTraits>::iterator

diff  --git a/llvm/lib/Transforms/IPO/IROutliner.cpp b/llvm/lib/Transforms/IPO/IROutliner.cpp
index 9c79972443fe6..fd19b669c3c04 100644
--- a/llvm/lib/Transforms/IPO/IROutliner.cpp
+++ b/llvm/lib/Transforms/IPO/IROutliner.cpp
@@ -185,6 +185,44 @@ Value *OutlinableRegion::findCorrespondingValueIn(const OutlinableRegion &Other,
   return FoundValueOpt.getValueOr(nullptr);
 }
 
+/// Rewrite the BranchInsts in the incoming blocks to \p PHIBlock that are found
+/// in \p Included to branch to BasicBlock \p Replace if they currently branch
+/// to the BasicBlock \p Find.  This is used to fix up the incoming basic blocks
+/// when PHINodes are included in outlined regions.
+///
+/// \param PHIBlock - The BasicBlock containing the PHINodes that need to be
+/// checked.
+/// \param Find - The successor block to be replaced.
+/// \param Replace - The new succesor block to branch to.
+/// \param Included - The set of blocks about to be outlined.
+static void replaceTargetsFromPHINode(BasicBlock *PHIBlock, BasicBlock *Find,
+                                      BasicBlock *Replace,
+                                      DenseSet<BasicBlock *> &Included) {
+  for (PHINode &PN : PHIBlock->phis()) {
+    for (unsigned Idx = 0, PNEnd = PN.getNumIncomingValues(); Idx != PNEnd;
+         ++Idx) {
+      // Check if the incoming block is included in the set of blocks being
+      // outlined.
+      BasicBlock *Incoming = PN.getIncomingBlock(Idx);
+      if (!Included.contains(Incoming))
+        continue;
+
+      BranchInst *BI = dyn_cast<BranchInst>(Incoming->getTerminator());
+      assert(BI && "Not a branch instruction?");
+      // Look over the branching instructions into this block to see if we
+      // used to branch to Find in this outlined block.
+      for (unsigned Succ = 0, End = BI->getNumSuccessors(); Succ != End;
+           Succ++) {
+        // If we have found the block to replace, we do so here.
+        if (BI->getSuccessor(Succ) != Find)
+          continue;
+        BI->setSuccessor(Succ, Replace);
+      }
+    }
+  }
+}
+
+
 void OutlinableRegion::splitCandidate() {
   assert(!CandidateSplit && "Candidate already split!");
 
@@ -215,6 +253,39 @@ void OutlinableRegion::splitCandidate() {
   StartBB = StartInst->getParent();
   PrevBB = StartBB;
 
+  DenseSet<BasicBlock *> BBSet;
+  Candidate->getBasicBlocks(BBSet);
+
+  // We iterate over the instructions in the region, if we find a PHINode, we
+  // check if there are predecessors outside of the region, if there are,
+  // we ignore this region since we are unable to handle the severing of the
+  // phi node right now. 
+  BasicBlock::iterator It = StartInst->getIterator();
+  while (PHINode *PN = dyn_cast<PHINode>(&*It)) {
+    unsigned NumPredsOutsideRegion = 0;
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+      if (!BBSet.contains(PN->getIncomingBlock(i)))
+        ++NumPredsOutsideRegion;
+
+    if (NumPredsOutsideRegion > 1)
+      return;
+    
+    It++;
+  }
+
+  // If the region starts with a PHINode, but is not the initial instruction of
+  // the BasicBlock, we ignore this region for now.
+  if (isa<PHINode>(StartInst) && StartInst != &*StartBB->begin())
+    return;
+  
+  // If the region ends with a PHINode, but does not contain all of the phi node
+  // instructions of the region, we ignore it for now.
+  if (isa<PHINode>(BackInst)) {
+    EndBB = BackInst->getParent();
+    if (BackInst != &*std::prev(EndBB->getFirstInsertionPt()))
+      return;
+  }
+
   // The basic block gets split like so:
   // block:                 block:
   //   inst1                  inst1
@@ -241,12 +312,20 @@ void OutlinableRegion::splitCandidate() {
     FollowBB = EndBB->splitBasicBlock(EndInst, OriginalName + "_after_outline");
     EndBB->replaceSuccessorsPhiUsesWith(EndBB, FollowBB);
     FollowBB->replaceSuccessorsPhiUsesWith(PrevBB, FollowBB);
-    return;
+  } else {
+    EndBB = BackInst->getParent();
+    EndsInBranch = true;
+    FollowBB = nullptr;
   }
 
-  EndBB = BackInst->getParent();
-  EndsInBranch = true;
-  FollowBB = nullptr;
+  // Refind the basic block set.
+  BBSet.clear();
+  Candidate->getBasicBlocks(BBSet);
+  // For the phi nodes in the new starting basic block of the region, we
+  // reassign the targets of the basic blocks branching instructions.
+  replaceTargetsFromPHINode(StartBB, PrevBB, StartBB, BBSet);
+  if (FollowBB)
+    replaceTargetsFromPHINode(FollowBB, EndBB, FollowBB, BBSet);
 }
 
 void OutlinableRegion::reattachCandidate() {
@@ -268,15 +347,21 @@ void OutlinableRegion::reattachCandidate() {
   //   inst4
   assert(StartBB != nullptr && "StartBB for Candidate is not defined!");
 
-  // StartBB should only have one predecessor since we put an unconditional
-  // branch at the end of PrevBB when we split the BasicBlock.
-  PrevBB = StartBB->getSinglePredecessor();
-  assert(PrevBB != nullptr &&
-         "No Predecessor for the region start basic block!");
-
   assert(PrevBB->getTerminator() && "Terminator removed from PrevBB!");
   PrevBB->getTerminator()->eraseFromParent();
 
+  // If we reattaching after outlining, we iterate over the phi nodes to
+  // the initial block, and reassign the branch instructions of the incoming
+  // blocks to the block we are remerging into.
+  if (!ExtractedFunction) {
+    DenseSet<BasicBlock *> BBSet;
+    Candidate->getBasicBlocks(BBSet);
+
+    replaceTargetsFromPHINode(StartBB, StartBB, PrevBB, BBSet);
+    if (!EndsInBranch)
+      replaceTargetsFromPHINode(FollowBB, FollowBB, EndBB, BBSet);
+  }
+
   moveBBContents(*StartBB, *PrevBB);
 
   BasicBlock *PlacementBB = PrevBB;
@@ -1622,7 +1707,8 @@ replaceArgumentUses(OutlinableRegion &Region,
 
       // If this is storing a PHINode, we must make sure it is included in the
       // overall function.
-      if (!isa<PHINode>(ValueOperand)) {
+      if (!isa<PHINode>(ValueOperand) ||
+          Region.Candidate->getGVN(ValueOperand).hasValue()) {
         if (FirstFunction)
           continue;
         Value *CorrVal =
@@ -2161,7 +2247,7 @@ void IROutliner::pruneIncompatibleRegions(
   if (FirstCandidate.getLength() == 2) {
     if (isa<CallInst>(FirstCandidate.front()->Inst) &&
         isa<BranchInst>(FirstCandidate.back()->Inst))
-        return;
+      return;
   }
 
   unsigned CurrentEndIdx = 0;

diff  --git a/llvm/test/Transforms/IROutliner/included-phi-nodes-begin.ll b/llvm/test/Transforms/IROutliner/included-phi-nodes-begin.ll
new file mode 100644
index 0000000000000..e519fb318ba87
--- /dev/null
+++ b/llvm/test/Transforms/IROutliner/included-phi-nodes-begin.ll
@@ -0,0 +1,93 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs
+; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
+
+; Show that we are able to outline when all of the phi nodes in the starting
+; block are included in the region and there is no more than one predecessor
+; into those phi nodes from outside of the region.
+
+define void @function1(i32* %a, i32* %b) {
+entry:
+  %0 = alloca i32, align 4
+  %c = load i32, i32* %0, align 4
+  %y = add i32 %c, %c
+  br label %test1
+dummy:
+  ret void
+test1:
+  %1 = phi i32 [ %e, %test1 ], [ %y, %entry ]
+  %2 = phi i32 [ %e, %test1 ], [ %y, %entry  ]
+  %e = load i32, i32* %0, align 4
+  %3 = add i32 %c, %c
+  br i1 true, label %test, label %test1
+test:
+  %d = load i32, i32* %0, align 4
+  br label %first
+first:
+  ret void
+}
+
+define void @function2(i32* %a, i32* %b) {
+entry:
+  %0 = alloca i32, align 4
+  %c = load i32, i32* %0, align 4
+  %y = mul i32 %c, %c
+  br label %test1
+dummy:
+  ret void
+test1:
+  %1 = phi i32 [ %e, %test1 ], [ %y, %entry ]
+  %2 = phi i32 [ %e, %test1 ], [ %y, %entry ]
+  %e = load i32, i32* %0, align 4
+  %3 = add i32 %c, %c
+  br i1 true, label %test, label %test1
+test:
+  %d = load i32, i32* %0, align 4
+  br label %first
+first:
+  ret void
+}
+; CHECK-LABEL: @function1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[C:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    [[Y:%.*]] = add i32 [[C]], [[C]]
+; CHECK-NEXT:    br label [[TEST1:%.*]]
+; CHECK:       dummy:
+; CHECK-NEXT:    ret void
+; CHECK:       test1:
+; CHECK-NEXT:    call void @outlined_ir_func_0(i32 [[Y]], i32* [[TMP0]], i32 [[C]])
+; CHECK-NEXT:    br label [[FIRST:%.*]]
+; CHECK:       first:
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK-LABEL: @function2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[C:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    [[Y:%.*]] = mul i32 [[C]], [[C]]
+; CHECK-NEXT:    br label [[TEST1:%.*]]
+; CHECK:       dummy:
+; CHECK-NEXT:    ret void
+; CHECK:       test1:
+; CHECK-NEXT:    call void @outlined_ir_func_0(i32 [[Y]], i32* [[TMP0]], i32 [[C]])
+; CHECK-NEXT:    br label [[FIRST:%.*]]
+; CHECK:       first:
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK: define internal void @outlined_ir_func_0(
+; CHECK-NEXT:  newFuncRoot:
+; CHECK-NEXT:    br label [[TEST1_TO_OUTLINE:%.*]]
+; CHECK:       test1_to_outline:
+; CHECK-NEXT:    [[TMP3:%.*]] = phi i32 [ [[E:%.*]], [[TEST1_TO_OUTLINE]] ], [ [[TMP0:%.*]], [[NEWFUNCROOT:%.*]] ]
+; CHECK-NEXT:    [[TMP4:%.*]] = phi i32 [ [[E]], [[TEST1_TO_OUTLINE]] ], [ [[TMP0]], [[NEWFUNCROOT]] ]
+; CHECK-NEXT:    [[E]] = load i32, i32* [[TMP1:%.*]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = add i32 [[TMP2:%.*]], [[TMP2]]
+; CHECK-NEXT:    br i1 true, label [[TEST:%.*]], label [[TEST1_TO_OUTLINE]]
+; CHECK:       test:
+; CHECK-NEXT:    [[D:%.*]] = load i32, i32* [[TMP1]], align 4
+; CHECK-NEXT:    br label [[FIRST_EXITSTUB:%.*]]
+; CHECK:       first.exitStub:
+; CHECK-NEXT:    ret void
+;

diff  --git a/llvm/test/Transforms/IROutliner/included-phi-nodes-end.ll b/llvm/test/Transforms/IROutliner/included-phi-nodes-end.ll
new file mode 100644
index 0000000000000..0f9750576139a
--- /dev/null
+++ b/llvm/test/Transforms/IROutliner/included-phi-nodes-end.ll
@@ -0,0 +1,94 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs
+; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
+
+; Show that we are able to propogate inputs to the region into the split PHINode
+; outside of the region if necessary.
+
+define void @function1(i32* %a, i32* %b) {
+entry:
+  %0 = alloca i32, align 4
+  %c = load i32, i32* %0, align 4
+  %z = add i32 %c, %c
+  br i1 true, label %test1, label %first
+test1:
+  %e = load i32, i32* %0, align 4
+  %1 = add i32 %c, %c
+  br i1 true, label %first, label %test
+test:
+  %d = load i32, i32* %0, align 4
+  br i1 true, label %first, label %next
+first:
+  %2 = phi i32 [ %d, %test ], [ %e, %test1 ], [ %c, %entry ]
+  %3 = phi i32 [ %d, %test ], [ %e, %test1 ], [ %c, %entry ]
+  ret void
+next:
+  ret void
+}
+
+define void @function2(i32* %a, i32* %b) {
+entry:
+  %0 = alloca i32, align 4
+  %c = load i32, i32* %0, align 4
+  %z = mul i32 %c, %c
+  br i1 true, label %test1, label %first
+test1:
+  %e = load i32, i32* %0, align 4
+  %1 = add i32 %c, %c
+  br i1 true, label %first, label %test
+test:
+  %d = load i32, i32* %0, align 4
+  br i1 true, label %first, label %next
+first:
+  %2 = phi i32 [ %d, %test ], [ %e, %test1 ], [ %c, %entry ]
+  %3 = phi i32 [ %d, %test ], [ %e, %test1 ], [ %c, %entry ]
+  ret void
+next:
+  ret void
+}
+; CHECK-LABEL: @function1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[C:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    [[Z:%.*]] = add i32 [[C]], [[C]]
+; CHECK-NEXT:    [[TARGETBLOCK:%.*]] = call i1 @outlined_ir_func_0(i32* [[TMP0]], i32 [[C]])
+; CHECK-NEXT:    br i1 [[TARGETBLOCK]], label [[NEXT:%.*]], label [[ENTRY_AFTER_OUTLINE:%.*]]
+; CHECK:       entry_after_outline:
+; CHECK-NEXT:    ret void
+; CHECK:       next:
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK-LABEL: @function2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[C:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    [[Z:%.*]] = mul i32 [[C]], [[C]]
+; CHECK-NEXT:    [[TARGETBLOCK:%.*]] = call i1 @outlined_ir_func_0(i32* [[TMP0]], i32 [[C]])
+; CHECK-NEXT:    br i1 [[TARGETBLOCK]], label [[NEXT:%.*]], label [[ENTRY_AFTER_OUTLINE:%.*]]
+; CHECK:       entry_after_outline:
+; CHECK-NEXT:    ret void
+; CHECK:       next:
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK: define internal i1 @outlined_ir_func_0(
+; CHECK-NEXT:  newFuncRoot:
+; CHECK-NEXT:    br label [[ENTRY_TO_OUTLINE:%.*]]
+; CHECK:       entry_to_outline:
+; CHECK-NEXT:    br i1 true, label [[TEST1:%.*]], label [[FIRST:%.*]]
+; CHECK:       test1:
+; CHECK-NEXT:    [[E:%.*]] = load i32, i32* [[TMP0:%.*]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[TMP1:%.*]], [[TMP1]]
+; CHECK-NEXT:    br i1 true, label [[FIRST]], label [[TEST:%.*]]
+; CHECK:       test:
+; CHECK-NEXT:    [[D:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    br i1 true, label [[FIRST]], label [[NEXT_EXITSTUB:%.*]]
+; CHECK:       first:
+; CHECK-NEXT:    [[TMP3:%.*]] = phi i32 [ [[D]], [[TEST]] ], [ [[E]], [[TEST1]] ], [ [[TMP1]], [[ENTRY_TO_OUTLINE]] ]
+; CHECK-NEXT:    [[TMP4:%.*]] = phi i32 [ [[D]], [[TEST]] ], [ [[E]], [[TEST1]] ], [ [[TMP1]], [[ENTRY_TO_OUTLINE]] ]
+; CHECK-NEXT:    br label [[ENTRY_AFTER_OUTLINE_EXITSTUB:%.*]]
+; CHECK:       next.exitStub:
+; CHECK-NEXT:    ret i1 true
+; CHECK:       entry_after_outline.exitStub:
+; CHECK-NEXT:    ret i1 false
+;

diff  --git a/llvm/test/Transforms/IROutliner/must-capture-all-phi-nodes-begin.ll b/llvm/test/Transforms/IROutliner/must-capture-all-phi-nodes-begin.ll
new file mode 100644
index 0000000000000..debaef12687b7
--- /dev/null
+++ b/llvm/test/Transforms/IROutliner/must-capture-all-phi-nodes-begin.ll
@@ -0,0 +1,108 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs
+; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
+
+; Show that we do not outline when all of the phi nodes in the beginning
+; block are included not in the region.
+
+define void @function1(i32* %a, i32* %b) {
+entry:
+  %0 = alloca i32, align 4
+  %c = load i32, i32* %0, align 4
+  %y = add i32 %c, %c
+  br label %test1
+dummy:
+  ret void
+test1:
+  %1 = phi i32 [ %e, %test1 ], [ %y, %entry ]
+  %2 = phi i32 [ %e, %test1 ], [ %y, %entry  ]
+  %e = load i32, i32* %0, align 4
+  %3 = add i32 %c, %c
+  br i1 true, label %test, label %test1
+test:
+  %d = load i32, i32* %0, align 4
+  br label %first
+first:
+  ret void
+}
+
+define void @function2(i32* %a, i32* %b) {
+entry:
+  %0 = alloca i32, align 4
+  %c = load i32, i32* %0, align 4
+  %y = mul i32 %c, %c
+  br label %test1
+dummy:
+  ret void
+test1:
+  %1 = phi i32 [ %e, %test1 ], [ %y, %entry ]
+  %2 = phi i32 [ %y, %entry ], [ %e, %test1 ]
+  %e = load i32, i32* %0, align 4
+  %3 = add i32 %c, %c
+  br i1 true, label %test, label %test1
+test:
+  %d = load i32, i32* %0, align 4
+  br label %first
+first:
+  ret void
+}
+; CHECK-LABEL: @function1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[E_LOC:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[C:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    [[Y:%.*]] = add i32 [[C]], [[C]]
+; CHECK-NEXT:    br label [[TEST1:%.*]]
+; CHECK:       dummy:
+; CHECK-NEXT:    ret void
+; CHECK:       test1:
+; CHECK-NEXT:    [[TMP1:%.*]] = phi i32 [ [[E_RELOAD:%.*]], [[TEST1]] ], [ [[Y]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[TMP2:%.*]] = phi i32 [ [[E_RELOAD]], [[TEST1]] ], [ [[Y]], [[ENTRY]] ]
+; CHECK-NEXT:    [[LT_CAST:%.*]] = bitcast i32* [[E_LOC]] to i8*
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]])
+; CHECK-NEXT:    [[TARGETBLOCK:%.*]] = call i1 @outlined_ir_func_0(i32* [[TMP0]], i32 [[C]], i32* [[E_LOC]])
+; CHECK-NEXT:    [[E_RELOAD]] = load i32, i32* [[E_LOC]], align 4
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
+; CHECK-NEXT:    br i1 [[TARGETBLOCK]], label [[TEST1]], label [[FIRST:%.*]]
+; CHECK:       first:
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK-LABEL: @function2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[E_LOC:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[C:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    [[Y:%.*]] = mul i32 [[C]], [[C]]
+; CHECK-NEXT:    br label [[TEST1:%.*]]
+; CHECK:       dummy:
+; CHECK-NEXT:    ret void
+; CHECK:       test1:
+; CHECK-NEXT:    [[TMP1:%.*]] = phi i32 [ [[E_RELOAD:%.*]], [[TEST1]] ], [ [[Y]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[TMP2:%.*]] = phi i32 [ [[Y]], [[ENTRY]] ], [ [[E_RELOAD]], [[TEST1]] ]
+; CHECK-NEXT:    [[LT_CAST:%.*]] = bitcast i32* [[E_LOC]] to i8*
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]])
+; CHECK-NEXT:    [[TARGETBLOCK:%.*]] = call i1 @outlined_ir_func_0(i32* [[TMP0]], i32 [[C]], i32* [[E_LOC]])
+; CHECK-NEXT:    [[E_RELOAD]] = load i32, i32* [[E_LOC]], align 4
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
+; CHECK-NEXT:    br i1 [[TARGETBLOCK]], label [[TEST1]], label [[FIRST:%.*]]
+; CHECK:       first:
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK: define internal i1 @outlined_ir_func_0(
+; CHECK-NEXT:  newFuncRoot:
+; CHECK-NEXT:    br label [[TEST1_TO_OUTLINE:%.*]]
+; CHECK:       test1_to_outline:
+; CHECK-NEXT:    [[E:%.*]] = load i32, i32* [[TMP0:%.*]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[TMP1:%.*]], [[TMP1]]
+; CHECK-NEXT:    br i1 true, label [[TEST:%.*]], label [[TEST1_EXITSTUB:%.*]]
+; CHECK:       test:
+; CHECK-NEXT:    [[D:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    br label [[FIRST_EXITSTUB:%.*]]
+; CHECK:       test1.exitStub:
+; CHECK-NEXT:    store i32 [[E]], i32* [[TMP2:%.*]], align 4
+; CHECK-NEXT:    ret i1 true
+; CHECK:       first.exitStub:
+; CHECK-NEXT:    store i32 [[E]], i32* [[TMP2]], align 4
+; CHECK-NEXT:    ret i1 false
+;

diff  --git a/llvm/test/Transforms/IROutliner/must-capture-all-phi-nodes-end.ll b/llvm/test/Transforms/IROutliner/must-capture-all-phi-nodes-end.ll
new file mode 100644
index 0000000000000..a5d0314f21359
--- /dev/null
+++ b/llvm/test/Transforms/IROutliner/must-capture-all-phi-nodes-end.ll
@@ -0,0 +1,88 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs
+; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
+
+; Show that we do not  outline when all of the phi nodes in the end
+; block are not included in the region.
+
+define void @function1(i32* %a, i32* %b) {
+entry:
+  %0 = alloca i32, align 4
+  %c = load i32, i32* %0, align 4
+  %z = add i32 %c, %c
+  br i1 true, label %test1, label %first
+test1:
+  %e = load i32, i32* %0, align 4
+  %1 = add i32 %c, %c
+  br i1 true, label %first, label %test
+test:
+  %d = load i32, i32* %0, align 4
+  br i1 true, label %first, label %next
+first:
+  %2 = phi i32 [ %d, %test ], [ %e, %test1 ], [ %c, %entry ]
+  %3 = phi i32 [ %d, %test ], [ %e, %test1 ], [ %c, %entry ]
+  ret void
+next:
+  ret void
+}
+
+define void @function2(i32* %a, i32* %b) {
+entry:
+  %0 = alloca i32, align 4
+  %c = load i32, i32* %0, align 4
+  %z = mul i32 %c, %c
+  br i1 true, label %test1, label %first
+test1:
+  %e = load i32, i32* %0, align 4
+  %1 = add i32 %c, %c
+  br i1 true, label %first, label %test
+test:
+  %d = load i32, i32* %0, align 4
+  br i1 true, label %first, label %next
+first:
+  %2 = phi i32 [ %d, %test ], [ %e, %test1 ], [ %c, %entry ]
+  %3 = phi i32 [ %d, %test ], [ %c, %entry ], [ %e, %test1 ]
+  ret void
+next:
+  ret void
+}
+; CHECK-LABEL: @function1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[C:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    [[Z:%.*]] = add i32 [[C]], [[C]]
+; CHECK-NEXT:    br i1 true, label [[TEST1:%.*]], label [[FIRST:%.*]]
+; CHECK:       test1:
+; CHECK-NEXT:    [[E:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[C]], [[C]]
+; CHECK-NEXT:    br i1 true, label [[FIRST]], label [[TEST:%.*]]
+; CHECK:       test:
+; CHECK-NEXT:    [[D:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    br i1 true, label [[FIRST]], label [[NEXT:%.*]]
+; CHECK:       first:
+; CHECK-NEXT:    [[TMP2:%.*]] = phi i32 [ [[D]], [[TEST]] ], [ [[E]], [[TEST1]] ], [ [[C]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[TMP3:%.*]] = phi i32 [ [[D]], [[TEST]] ], [ [[E]], [[TEST1]] ], [ [[C]], [[ENTRY]] ]
+; CHECK-NEXT:    ret void
+; CHECK:       next:
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK-LABEL: @function2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[C:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    [[Z:%.*]] = mul i32 [[C]], [[C]]
+; CHECK-NEXT:    br i1 true, label [[TEST1:%.*]], label [[FIRST:%.*]]
+; CHECK:       test1:
+; CHECK-NEXT:    [[E:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[C]], [[C]]
+; CHECK-NEXT:    br i1 true, label [[FIRST]], label [[TEST:%.*]]
+; CHECK:       test:
+; CHECK-NEXT:    [[D:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    br i1 true, label [[FIRST]], label [[NEXT:%.*]]
+; CHECK:       first:
+; CHECK-NEXT:    [[TMP2:%.*]] = phi i32 [ [[D]], [[TEST]] ], [ [[E]], [[TEST1]] ], [ [[C]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[TMP3:%.*]] = phi i32 [ [[D]], [[TEST]] ], [ [[C]], [[ENTRY]] ], [ [[E]], [[TEST1]] ]
+; CHECK-NEXT:    ret void
+; CHECK:       next:
+; CHECK-NEXT:    ret void
+;

diff  --git a/llvm/test/Transforms/IROutliner/outlining-branches-phi-nodes.ll b/llvm/test/Transforms/IROutliner/outlining-branches-phi-nodes.ll
index 4e777e862543e..59b08422cec3a 100644
--- a/llvm/test/Transforms/IROutliner/outlining-branches-phi-nodes.ll
+++ b/llvm/test/Transforms/IROutliner/outlining-branches-phi-nodes.ll
@@ -38,6 +38,8 @@ block_5:
   store i32 %add2, i32* %output, align 4
   store i32 %mul2, i32* %result, align 4
   br label %block_6
+dummy:
+  ret void
 block_6:
   %
diff  = phi i32 [%aval, %block_4], [%a2val, %block_5]
   ret void
@@ -76,6 +78,8 @@ block_5:
   store i32 %add2, i32* %output, align 4
   store i32 %mul2, i32* %result, align 4
   br label %block_6
+dummy:
+  ret void
 block_6:
   %
diff  = phi i32 [%aval, %block_4], [%a2val, %block_5]
   ret void
@@ -102,6 +106,8 @@ block_6:
 ; CHECK-NEXT:    [[DIFF_CE_RELOAD:%.*]] = load i32, i32* [[DIFF_CE_LOC]], align 4
 ; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
 ; CHECK-NEXT:    br label [[BLOCK_6:%.*]]
+; CHECK: dummy:
+; CHECK-NEXT:  ret void
 ; CHECK:       block_6:
 ; CHECK-NEXT:    [[DIFF:%.*]] = phi i32 [ [[DIFF_CE_RELOAD]], [[BLOCK_2]] ]
 ; CHECK-NEXT:    ret void
@@ -128,6 +134,8 @@ block_6:
 ; CHECK-NEXT:    [[DIFF_CE_RELOAD:%.*]] = load i32, i32* [[DIFF_CE_LOC]], align 4
 ; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
 ; CHECK-NEXT:    br label [[BLOCK_6:%.*]]
+; CHECK: dummy:
+; CHECK-NEXT:  ret void
 ; CHECK:       block_6:
 ; CHECK-NEXT:    [[DIFF:%.*]] = phi i32 [ [[DIFF_CE_RELOAD]], [[BLOCK_2]] ]
 ; CHECK-NEXT:    ret void

diff  --git a/llvm/test/Transforms/IROutliner/outlining-exits-to-phi-node.ll b/llvm/test/Transforms/IROutliner/outlining-exits-to-phi-node.ll
index 77cb4a5bc476f..397121f000cd9 100644
--- a/llvm/test/Transforms/IROutliner/outlining-exits-to-phi-node.ll
+++ b/llvm/test/Transforms/IROutliner/outlining-exits-to-phi-node.ll
@@ -15,6 +15,8 @@ test1:
 test:
   %d = load i32, i32* %0, align 4
   br label %first
+dummy:
+  ret void
 first:
   %1 = phi i32 [ %c, %test ], [ %e, %test1 ]
   ret void
@@ -31,6 +33,8 @@ test1:
 test:
   %d = load i32, i32* %0, align 4
   br label %first
+dummy:
+  ret void
 first:
   %1 = phi i32 [ %c, %test ], [ %e, %test1 ]
   ret void
@@ -45,6 +49,8 @@ first:
 ; CHECK-NEXT:    [[DOTCE_RELOAD:%.*]] = load i32, i32* [[DOTCE_LOC]], align 4
 ; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
 ; CHECK-NEXT:    br label [[FIRST:%.*]]
+; CHECK: dummy:
+; CHECK-NEXT:  ret void
 ; CHECK:       first:
 ; CHECK-NEXT:    [[TMP1:%.*]] = phi i32 [ [[DOTCE_RELOAD]], [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    ret void
@@ -60,6 +66,8 @@ first:
 ; CHECK-NEXT:    [[DOTCE_RELOAD:%.*]] = load i32, i32* [[DOTCE_LOC]], align 4
 ; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
 ; CHECK-NEXT:    br label [[FIRST:%.*]]
+; CHECK: dummy:
+; CHECK-NEXT:  ret void
 ; CHECK:       first:
 ; CHECK-NEXT:    [[TMP1:%.*]] = phi i32 [ [[DOTCE_RELOAD]], [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    ret void

diff  --git a/llvm/test/Transforms/IROutliner/phi-nodes-non-constant.ll b/llvm/test/Transforms/IROutliner/phi-nodes-non-constant.ll
new file mode 100644
index 0000000000000..d56ffa5730552
--- /dev/null
+++ b/llvm/test/Transforms/IROutliner/phi-nodes-non-constant.ll
@@ -0,0 +1,74 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs
+; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
+
+; Show that we do extract phi nodes from the regions.
+
+define void @function1(i32* %a, i32* %b) {
+entry:
+  %0 = alloca i32, align 4
+  %c = load i32, i32* %0, align 4
+  br label %test1
+test1:
+  %e = load i32, i32* %0, align 4
+  br label %first
+test:
+  %d = load i32, i32* %0, align 4
+  br label %first
+first:
+  %1 = phi i32 [ %c, %test ], [ %e, %test1 ]
+  store i32 2, i32* %a, align 4
+  store i32 3, i32* %b, align 4
+  ret void
+}
+
+define void @function2(i32* %a, i32* %b) {
+entry:
+  %0 = alloca i32, align 4
+  %c = load i32, i32* %0, align 4
+  br label %test1
+test1:
+  %e = load i32, i32* %0, align 4
+  br label %first
+test:
+  %d = load i32, i32* %0, align 4
+  br label %first
+first:
+  %1 = phi i32 [ %c, %test ], [ %e, %test1 ]
+  store i32 2, i32* %a, align 4
+  store i32 3, i32* %b, align 4
+  ret void
+}
+; CHECK-LABEL: @function1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    call void @outlined_ir_func_0(i32* [[TMP0]], i32* [[A:%.*]], i32* [[B:%.*]])
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK-LABEL: @function2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    call void @outlined_ir_func_0(i32* [[TMP0]], i32* [[A:%.*]], i32* [[B:%.*]])
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK: define internal void @outlined_ir_func_0(
+; CHECK-NEXT:  newFuncRoot:
+; CHECK-NEXT:    br label [[ENTRY_TO_OUTLINE:%.*]]
+; CHECK:       entry_to_outline:
+; CHECK-NEXT:    [[C:%.*]] = load i32, i32* [[TMP0:%.*]], align 4
+; CHECK-NEXT:    br label [[TEST1:%.*]]
+; CHECK:       test1:
+; CHECK-NEXT:    [[E:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    br label [[FIRST:%.*]]
+; CHECK:       test:
+; CHECK-NEXT:    [[D:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    br label [[FIRST]]
+; CHECK:       first:
+; CHECK-NEXT:    [[TMP3:%.*]] = phi i32 [ [[C]], [[TEST:%.*]] ], [ [[E]], [[TEST1]] ]
+; CHECK-NEXT:    store i32 2, i32* [[TMP1:%.*]], align 4
+; CHECK-NEXT:    store i32 3, i32* [[TMP2:%.*]], align 4
+; CHECK-NEXT:    br label [[ENTRY_AFTER_OUTLINE_EXITSTUB:%.*]]
+; CHECK:       entry_after_outline.exitStub:
+; CHECK-NEXT:    ret void
+;

diff  --git a/llvm/test/Transforms/IROutliner/phi-nodes-simple.ll b/llvm/test/Transforms/IROutliner/phi-nodes-simple.ll
new file mode 100644
index 0000000000000..e5afb89a26138
--- /dev/null
+++ b/llvm/test/Transforms/IROutliner/phi-nodes-simple.ll
@@ -0,0 +1,58 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs
+; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
+
+; Show that we are able to outline the simple phi node case of constants when
+; the corresponding labels match.
+
+define void @function1(i32* %a, i32* %b) {
+entry:
+  br label %test
+test:
+  br label %first
+first:
+  %0 = phi i32 [ 0, %test ]
+  store i32 2, i32* %a, align 4
+  store i32 3, i32* %b, align 4
+  ret void
+}
+
+define void @function2(i32* %a, i32* %b) {
+entry:
+  br label %test
+test:
+  br label %first
+first:
+  %0 = phi i32 [ 0, %test ]
+  store i32 2, i32* %a, align 4
+  store i32 3, i32* %b, align 4
+  ret void
+}
+; CHECK-LABEL: @function1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[TEST:%.*]]
+; CHECK:       test:
+; CHECK-NEXT:    call void @outlined_ir_func_0(i32* [[A:%.*]], i32* [[B:%.*]])
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK-LABEL: @function2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[TEST:%.*]]
+; CHECK:       test:
+; CHECK-NEXT:    call void @outlined_ir_func_0(i32* [[A:%.*]], i32* [[B:%.*]])
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK: define internal void @outlined_ir_func_0(
+; CHECK-NEXT:  newFuncRoot:
+; CHECK-NEXT:    br label [[TEST_TO_OUTLINE:%.*]]
+; CHECK:       test_to_outline:
+; CHECK-NEXT:    br label [[FIRST:%.*]]
+; CHECK:       first:
+; CHECK-NEXT:    [[TMP2:%.*]] = phi i32 [ 0, [[TEST_TO_OUTLINE]] ]
+; CHECK-NEXT:    store i32 2, i32* [[TMP0:%.*]], align 4
+; CHECK-NEXT:    store i32 3, i32* [[TMP1:%.*]], align 4
+; CHECK-NEXT:    br label [[TEST_AFTER_OUTLINE_EXITSTUB:%.*]]
+; CHECK:       test_after_outline.exitStub:
+; CHECK-NEXT:    ret void
+;

diff  --git a/llvm/test/Transforms/IROutliner/region-inputs-in-phi-nodes.ll b/llvm/test/Transforms/IROutliner/region-inputs-in-phi-nodes.ll
index 258bbfe131e3e..62d936d68ff2f 100644
--- a/llvm/test/Transforms/IROutliner/region-inputs-in-phi-nodes.ll
+++ b/llvm/test/Transforms/IROutliner/region-inputs-in-phi-nodes.ll
@@ -17,6 +17,8 @@ test1:
 test:
   %d = load i32, i32* %0, align 4
   br i1 true, label %first, label %next
+dummy:
+  ret void
 first:
   %2 = phi i32 [ %d, %test ], [ %e, %test1 ], [ %c, %entry ]
   ret void
@@ -37,6 +39,8 @@ test1:
 test:
   %d = load i32, i32* %0, align 4
   br i1 true, label %first, label %next
+dummy:
+  ret void
 first:
   %2 = phi i32 [ %d, %test ], [ %e, %test1 ], [ %c, %entry ]
   ret void
@@ -55,6 +59,8 @@ next:
 ; CHECK-NEXT:    [[DOTCE_RELOAD:%.*]] = load i32, i32* [[DOTCE_LOC]], align 4
 ; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
 ; CHECK-NEXT:    br i1 [[TARGETBLOCK]], label [[FIRST:%.*]], label [[NEXT:%.*]]
+; CHECK: dummy:
+; CHECK-NEXT:  ret void
 ; CHECK:       first:
 ; CHECK-NEXT:    [[TMP1:%.*]] = phi i32 [ [[DOTCE_RELOAD]], [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    ret void
@@ -74,6 +80,8 @@ next:
 ; CHECK-NEXT:    [[DOTCE_RELOAD:%.*]] = load i32, i32* [[DOTCE_LOC]], align 4
 ; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
 ; CHECK-NEXT:    br i1 [[TARGETBLOCK]], label [[FIRST:%.*]], label [[NEXT:%.*]]
+; CHECK: dummy:
+; CHECK-NEXT:  ret void
 ; CHECK:       first:
 ; CHECK-NEXT:    [[TMP1:%.*]] = phi i32 [ [[DOTCE_RELOAD]], [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    ret void

diff  --git a/llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp b/llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp
index a8781e8f78af7..97d9be3d8985a 100644
--- a/llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp
+++ b/llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp
@@ -757,26 +757,41 @@ TEST(IRInstructionMapper, BranchLegal) {
   ASSERT_TRUE(UnsignedVec[1] < UnsignedVec[2]);
 }
 
-// In most cases, the illegal instructions we are collecting don't require any
-// sort of setup.  In these cases, we can just only have illegal instructions,
-// and the mapper will create 0 length vectors, and we can check that.
+// Checks that a PHINode is mapped to be legal.
+TEST(IRInstructionMapper, PhiLegal) {
+  StringRef ModuleString = R"(
+                          define i32 @f(i32 %a, i32 %b) {
+                          bb0:
+                             %0 = phi i1 [ 0, %bb0 ], [ %0, %bb1 ]
+                             %1 = add i32 %a, %b
+                             ret i32 0
+                          bb1:
+                             ret i32 1
+                          })";
+  LLVMContext Context;
+  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleString);
 
-// In cases where we have legal instructions needed to set up the illegal
-// instruction, to check illegal instructions are assigned unsigned integers
-// from the maximum value decreasing to 0, it will be greater than a legal
-// instruction that comes after.  So to check that we have an illegal
-// instruction, we place a legal instruction after an illegal instruction, and
-// check that the illegal unsigned integer is greater than the unsigned integer
-// of the legal instruction.
+  std::vector<IRInstructionData *> InstrList;
+  std::vector<unsigned> UnsignedVec;
+
+  SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator;
+  SpecificBumpPtrAllocator<IRInstructionDataList> IDLAllocator;
+  IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator);
+  Mapper.InstClassifier.EnableBranches = true;
+  Mapper.initializeForBBs(*M);
+  getVectors(*M, Mapper, InstrList, UnsignedVec);
+
+  ASSERT_EQ(InstrList.size(), UnsignedVec.size());
+  ASSERT_EQ(UnsignedVec.size(), static_cast<unsigned>(3));
+}
 
-// Checks that a PHINode is mapped to be illegal since there is extra checking
-// needed to ensure that a branch in one region is bin an isomorphic
-// location in a 
diff erent region.
+// Checks that a PHINode is mapped to be legal.
 TEST(IRInstructionMapper, PhiIllegal) {
   StringRef ModuleString = R"(
                           define i32 @f(i32 %a, i32 %b) {
                           bb0:
                              %0 = phi i1 [ 0, %bb0 ], [ %0, %bb1 ]
+                             %1 = add i32 %a, %b
                              ret i32 0
                           bb1:
                              ret i32 1
@@ -790,12 +805,25 @@ TEST(IRInstructionMapper, PhiIllegal) {
   SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator;
   SpecificBumpPtrAllocator<IRInstructionDataList> IDLAllocator;
   IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator);
+  Mapper.initializeForBBs(*M);
   getVectors(*M, Mapper, InstrList, UnsignedVec);
 
   ASSERT_EQ(InstrList.size(), UnsignedVec.size());
   ASSERT_EQ(UnsignedVec.size(), static_cast<unsigned>(0));
 }
 
+// In most cases, the illegal instructions we are collecting don't require any
+// sort of setup.  In these cases, we can just only have illegal instructions,
+// and the mapper will create 0 length vectors, and we can check that.
+
+// In cases where we have legal instructions needed to set up the illegal
+// instruction, to check illegal instructions are assigned unsigned integers
+// from the maximum value decreasing to 0, it will be greater than a legal
+// instruction that comes after.  So to check that we have an illegal
+// instruction, we place a legal instruction after an illegal instruction, and
+// check that the illegal unsigned integer is greater than the unsigned integer
+// of the legal instruction.
+
 // Checks that an alloca instruction is mapped to be illegal.
 TEST(IRInstructionMapper, AllocaIllegal) {
   StringRef ModuleString = R"(
@@ -2346,6 +2374,108 @@ TEST(IRSimilarityCandidate, DifferentBranchStructureOutside) {
   ASSERT_TRUE(longSimCandCompare(InstrList, true, 3, 0, 6));
 }
 
+// Checks that the same structure is recognized between two candidates,
+// when the phi predecessor are other blocks inside the same region,
+// the relative distance between the blocks must be the same.
+TEST(IRSimilarityCandidate, SamePHIStructureInternal) {
+  StringRef ModuleString = R"(
+                          define i32 @f(i32 %a, i32 %b) {
+                          bb0:
+                             br label %bb2
+                          bb1:
+                             br label %bb2
+                          bb2:
+                             %0 = phi i32 [ %a, %bb0 ], [ %b, %bb1 ] 
+                             %1 = add i32 %b, %a
+                             %2 = add i32 %a, %b
+                             ret i32 0
+                          }
+                          
+                          define i32 @f2(i32 %a, i32 %b) {
+                          bb0:
+                             br label %bb2
+                          bb1:
+                             br label %bb2
+                          bb2:
+                             %0 = phi i32 [ %a, %bb0 ], [ %b, %bb1 ]
+                             %1 = add i32 %b, %a
+                             %2 = add i32 %a, %b
+                             ret i32 0
+                          })";
+  LLVMContext Context;
+  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleString);
+
+  std::vector<IRInstructionData *> InstrList;
+  std::vector<unsigned> UnsignedVec;
+
+  SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator;
+  SpecificBumpPtrAllocator<IRInstructionDataList> IDLAllocator;
+  IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator);
+  Mapper.InstClassifier.EnableBranches = true;
+  Mapper.initializeForBBs(*M);
+  getVectors(*M, Mapper, InstrList, UnsignedVec);
+
+  // Check to make sure that we have a long enough region.
+  ASSERT_EQ(InstrList.size(), static_cast<unsigned>(11));
+  // Check that the instructions were added correctly to both vectors.
+  ASSERT_TRUE(InstrList.size() == UnsignedVec.size());
+
+  ASSERT_TRUE(longSimCandCompare(InstrList, true, 4, 0, 6));
+}
+
+// Checks that the 
diff erent structure is recognized between two candidates,
+// when the phi predecessor are other blocks inside the same region,
+// the relative distance between the blocks must be the same.
+TEST(IRSimilarityCandidate, DifferentPHIStructureInternal) {
+  StringRef ModuleString = R"(
+                          define i32 @f(i32 %a, i32 %b) {
+                          bb0:
+                             br label %bb2
+                          bb1:
+                             br label %bb2
+                          bb3:
+                             br label %bb2
+                          bb2:
+                             %0 = phi i32 [ %a, %bb0 ], [ %b, %bb1 ] 
+                             %1 = add i32 %b, %a
+                             %2 = add i32 %a, %b
+                             ret i32 0
+                          }
+                          
+                          define i32 @f2(i32 %a, i32 %b) {
+                          bb0:
+                             br label %bb2
+                          bb1:
+                             br label %bb2
+                          bb3:
+                             br label %bb2
+                          bb2:
+                             %0 = phi i32 [ %a, %bb0 ], [ %b, %bb3 ] 
+                             %1 = add i32 %b, %a
+                             %2 = add i32 %a, %b
+                             ret i32 0
+                          })";
+  LLVMContext Context;
+  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleString);
+
+  std::vector<IRInstructionData *> InstrList;
+  std::vector<unsigned> UnsignedVec;
+
+  SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator;
+  SpecificBumpPtrAllocator<IRInstructionDataList> IDLAllocator;
+  IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator);
+  Mapper.InstClassifier.EnableBranches = true;
+  Mapper.initializeForBBs(*M);
+  getVectors(*M, Mapper, InstrList, UnsignedVec);
+
+  // Check to make sure that we have a long enough region.
+  ASSERT_EQ(InstrList.size(), static_cast<unsigned>(13));
+  // Check that the instructions were added correctly to both vectors.
+  ASSERT_TRUE(InstrList.size() == UnsignedVec.size());
+
+  ASSERT_FALSE(longSimCandCompare(InstrList, true, 5, 0, 7));
+}
+
 // Checks that two sets of identical instructions are found to be the same.
 // Both sequences of adds have the same operand ordering, and the same
 // instructions, making them strcturally equivalent.


        


More information about the llvm-commits mailing list