[llvm] 515eec3 - [IRSim][IROutliner] Add support for outlining PHINodes with the rest of the region.

Andrew Litteken via llvm-commits llvm-commits at lists.llvm.org
Tue Jan 25 16:21:13 PST 2022


Author: Andrew Litteken
Date: 2022-01-25T18:20:10-06:00
New Revision: 515eec3553b02533e9a88ee84bc245d5415163da

URL: https://github.com/llvm/llvm-project/commit/515eec3553b02533e9a88ee84bc245d5415163da
DIFF: https://github.com/llvm/llvm-project/commit/515eec3553b02533e9a88ee84bc245d5415163da.diff

LOG: [IRSim][IROutliner] Add support for outlining PHINodes with the rest of the region.

Added: 
    llvm/test/Transforms/IROutliner/included-phi-nodes-begin.ll
    llvm/test/Transforms/IROutliner/included-phi-nodes-end.ll
    llvm/test/Transforms/IROutliner/must-capture-all-phi-nodes-begin.ll
    llvm/test/Transforms/IROutliner/must-capture-all-phi-nodes-end.ll
    llvm/test/Transforms/IROutliner/phi-nodes-non-constant.ll
    llvm/test/Transforms/IROutliner/phi-nodes-simple.ll

Modified: 
    llvm/include/llvm/Analysis/IRSimilarityIdentifier.h
    llvm/include/llvm/Transforms/IPO/IROutliner.h
    llvm/lib/Analysis/IRSimilarityIdentifier.cpp
    llvm/lib/Transforms/IPO/IROutliner.cpp
    llvm/test/Transforms/IROutliner/outlining-branches-phi-nodes.ll
    llvm/test/Transforms/IROutliner/outlining-exits-to-phi-node.ll
    llvm/test/Transforms/IROutliner/region-inputs-in-phi-nodes.ll
    llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h b/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h
index 966bf02d128ec..c7759e0bbf0f8 100644
--- a/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h
+++ b/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h
@@ -214,6 +214,16 @@ struct IRInstructionData
   /// function name as a 
diff erentiating parameter.
   void setCalleeName(bool MatchByName = true);
 
+  /// For an IRInstructionData containing a PHINode, finds the
+  /// relative distances from the incoming basic block to the current block by
+  /// taking the 
diff erence of the number assigned to the current basic block
+  /// and the incoming basic block of the branch.
+  ///
+  /// \param BasicBlockToInteger - The mapping of basic blocks to their location
+  /// in the module.
+  void
+  setPHIPredecessors(DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger);
+
   /// Hashes \p Value based on its opcode, types, and operand types.
   /// Two IRInstructionData instances produce the same hash when they perform
   /// the same operation.
@@ -497,8 +507,11 @@ struct IRInstructionMapper {
         return Legal;
       return Illegal;
     }
-    // TODO: Determine a scheme to resolve when the labels are similar enough.
-    InstrType visitPHINode(PHINode &PN) { return Illegal; }
+    InstrType visitPHINode(PHINode &PN) { 
+      if (EnableBranches)
+        return Legal;
+      return Illegal;
+    }
     // TODO: Handle allocas.
     InstrType visitAllocaInst(AllocaInst &AI) { return Illegal; }
     // We exclude variable argument instructions since variable arguments

diff  --git a/llvm/include/llvm/Transforms/IPO/IROutliner.h b/llvm/include/llvm/Transforms/IPO/IROutliner.h
index 9799737a529e3..ed74c8ed0e96c 100644
--- a/llvm/include/llvm/Transforms/IPO/IROutliner.h
+++ b/llvm/include/llvm/Transforms/IPO/IROutliner.h
@@ -342,8 +342,7 @@ class IROutliner {
     bool visitBranchInst(BranchInst &BI) { 
       return EnableBranches;
     }
-    // TODO: Determine a scheme to resolve when the labels are similar enough.
-    bool visitPHINode(PHINode &PN) { return false; }
+    bool visitPHINode(PHINode &PN) { return EnableBranches; }
     // TODO: Handle allocas.
     bool visitAllocaInst(AllocaInst &AI) { return false; }
     // VAArg instructions are not allowed since this could cause 
diff iculty when

diff  --git a/llvm/lib/Analysis/IRSimilarityIdentifier.cpp b/llvm/lib/Analysis/IRSimilarityIdentifier.cpp
index 8c3c0400d6015..0ce3d218eb33a 100644
--- a/llvm/lib/Analysis/IRSimilarityIdentifier.cpp
+++ b/llvm/lib/Analysis/IRSimilarityIdentifier.cpp
@@ -70,6 +70,12 @@ void IRInstructionData::initializeInstruction() {
 
     OperVals.push_back(OI.get());
   }
+
+  // We capture the incoming BasicBlocks as values as well as the incoming
+  // Values in order to check for structural similarity.
+  if (PHINode *PN = dyn_cast<PHINode>(Inst))
+    for (BasicBlock *BB : PN->blocks())
+      OperVals.push_back(BB);
 }
 
 IRInstructionData::IRInstructionData(IRInstructionDataList &IDList)
@@ -108,6 +114,34 @@ void IRInstructionData::setCalleeName(bool MatchByName) {
     CalleeName = CI->getCalledFunction()->getName().str();
 }
 
+void IRInstructionData::setPHIPredecessors(
+    DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger) {
+  assert(isa<PHINode>(Inst) && "Instruction must be phi node");
+
+  PHINode *PN = cast<PHINode>(Inst);
+  DenseMap<BasicBlock *, unsigned>::iterator BBNumIt;
+
+  BBNumIt = BasicBlockToInteger.find(PN->getParent());
+  assert(BBNumIt != BasicBlockToInteger.end() &&
+         "Could not find location for BasicBlock!");
+
+  int CurrentBlockNumber = static_cast<int>(BBNumIt->second);
+
+  // Convert the incoming blocks of the PHINode to an integer value, based on
+  // the relative distances between the current block and the incoming block.
+  for (unsigned Idx = 0; Idx < PN->getNumIncomingValues(); Idx++) {
+    BasicBlock *Incoming = PN->getIncomingBlock(Idx);
+    BBNumIt = BasicBlockToInteger.find(Incoming);
+    assert(BBNumIt != BasicBlockToInteger.end() &&
+           "Could not find number for BasicBlock!");
+    int OtherBlockNumber = static_cast<int>(BBNumIt->second);
+
+    int Relative = OtherBlockNumber - CurrentBlockNumber;
+    RelativeBlockLocations.push_back(Relative);
+    RelativeBlockLocations.push_back(Relative);
+  }
+}
+
 CmpInst::Predicate IRInstructionData::predicateForConsistency(CmpInst *CI) {
   switch (CI->getPredicate()) {
   case CmpInst::FCMP_OGT:
@@ -270,6 +304,9 @@ unsigned IRInstructionMapper::mapToLegalUnsigned(
   if (isa<CallInst>(*It))
     ID->setCalleeName(EnableMatchCallsByName);
 
+  if (isa<PHINode>(*It))
+    ID->setPHIPredecessors(BasicBlockToInteger);
+
   // Add to the instruction list
   bool WasInserted;
   DenseMap<IRInstructionData *, unsigned, IRInstructionDataTraits>::iterator

diff  --git a/llvm/lib/Transforms/IPO/IROutliner.cpp b/llvm/lib/Transforms/IPO/IROutliner.cpp
index 9c79972443fe6..fd19b669c3c04 100644
--- a/llvm/lib/Transforms/IPO/IROutliner.cpp
+++ b/llvm/lib/Transforms/IPO/IROutliner.cpp
@@ -185,6 +185,44 @@ Value *OutlinableRegion::findCorrespondingValueIn(const OutlinableRegion &Other,
   return FoundValueOpt.getValueOr(nullptr);
 }
 
+/// Rewrite the BranchInsts in the incoming blocks to \p PHIBlock that are found
+/// in \p Included to branch to BasicBlock \p Replace if they currently branch
+/// to the BasicBlock \p Find.  This is used to fix up the incoming basic blocks
+/// when PHINodes are included in outlined regions.
+///
+/// \param PHIBlock - The BasicBlock containing the PHINodes that need to be
+/// checked.
+/// \param Find - The successor block to be replaced.
+/// \param Replace - The new succesor block to branch to.
+/// \param Included - The set of blocks about to be outlined.
+static void replaceTargetsFromPHINode(BasicBlock *PHIBlock, BasicBlock *Find,
+                                      BasicBlock *Replace,
+                                      DenseSet<BasicBlock *> &Included) {
+  for (PHINode &PN : PHIBlock->phis()) {
+    for (unsigned Idx = 0, PNEnd = PN.getNumIncomingValues(); Idx != PNEnd;
+         ++Idx) {
+      // Check if the incoming block is included in the set of blocks being
+      // outlined.
+      BasicBlock *Incoming = PN.getIncomingBlock(Idx);
+      if (!Included.contains(Incoming))
+        continue;
+
+      BranchInst *BI = dyn_cast<BranchInst>(Incoming->getTerminator());
+      assert(BI && "Not a branch instruction?");
+      // Look over the branching instructions into this block to see if we
+      // used to branch to Find in this outlined block.
+      for (unsigned Succ = 0, End = BI->getNumSuccessors(); Succ != End;
+           Succ++) {
+        // If we have found the block to replace, we do so here.
+        if (BI->getSuccessor(Succ) != Find)
+          continue;
+        BI->setSuccessor(Succ, Replace);
+      }
+    }
+  }
+}
+
+
 void OutlinableRegion::splitCandidate() {
   assert(!CandidateSplit && "Candidate already split!");
 
@@ -215,6 +253,39 @@ void OutlinableRegion::splitCandidate() {
   StartBB = StartInst->getParent();
   PrevBB = StartBB;
 
+  DenseSet<BasicBlock *> BBSet;
+  Candidate->getBasicBlocks(BBSet);
+
+  // We iterate over the instructions in the region, if we find a PHINode, we
+  // check if there are predecessors outside of the region, if there are,
+  // we ignore this region since we are unable to handle the severing of the
+  // phi node right now. 
+  BasicBlock::iterator It = StartInst->getIterator();
+  while (PHINode *PN = dyn_cast<PHINode>(&*It)) {
+    unsigned NumPredsOutsideRegion = 0;
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+      if (!BBSet.contains(PN->getIncomingBlock(i)))
+        ++NumPredsOutsideRegion;
+
+    if (NumPredsOutsideRegion > 1)
+      return;
+    
+    It++;
+  }
+
+  // If the region starts with a PHINode, but is not the initial instruction of
+  // the BasicBlock, we ignore this region for now.
+  if (isa<PHINode>(StartInst) && StartInst != &*StartBB->begin())
+    return;
+  
+  // If the region ends with a PHINode, but does not contain all of the phi node
+  // instructions of the region, we ignore it for now.
+  if (isa<PHINode>(BackInst)) {
+    EndBB = BackInst->getParent();
+    if (BackInst != &*std::prev(EndBB->getFirstInsertionPt()))
+      return;
+  }
+
   // The basic block gets split like so:
   // block:                 block:
   //   inst1                  inst1
@@ -241,12 +312,20 @@ void OutlinableRegion::splitCandidate() {
     FollowBB = EndBB->splitBasicBlock(EndInst, OriginalName + "_after_outline");
     EndBB->replaceSuccessorsPhiUsesWith(EndBB, FollowBB);
     FollowBB->replaceSuccessorsPhiUsesWith(PrevBB, FollowBB);
-    return;
+  } else {
+    EndBB = BackInst->getParent();
+    EndsInBranch = true;
+    FollowBB = nullptr;
   }
 
-  EndBB = BackInst->getParent();
-  EndsInBranch = true;
-  FollowBB = nullptr;
+  // Refind the basic block set.
+  BBSet.clear();
+  Candidate->getBasicBlocks(BBSet);
+  // For the phi nodes in the new starting basic block of the region, we
+  // reassign the targets of the basic blocks branching instructions.
+  replaceTargetsFromPHINode(StartBB, PrevBB, StartBB, BBSet);
+  if (FollowBB)
+    replaceTargetsFromPHINode(FollowBB, EndBB, FollowBB, BBSet);
 }
 
 void OutlinableRegion::reattachCandidate() {
@@ -268,15 +347,21 @@ void OutlinableRegion::reattachCandidate() {
   //   inst4
   assert(StartBB != nullptr && "StartBB for Candidate is not defined!");
 
-  // StartBB should only have one predecessor since we put an unconditional
-  // branch at the end of PrevBB when we split the BasicBlock.
-  PrevBB = StartBB->getSinglePredecessor();
-  assert(PrevBB != nullptr &&
-         "No Predecessor for the region start basic block!");
-
   assert(PrevBB->getTerminator() && "Terminator removed from PrevBB!");
   PrevBB->getTerminator()->eraseFromParent();
 
+  // If we reattaching after outlining, we iterate over the phi nodes to
+  // the initial block, and reassign the branch instructions of the incoming
+  // blocks to the block we are remerging into.
+  if (!ExtractedFunction) {
+    DenseSet<BasicBlock *> BBSet;
+    Candidate->getBasicBlocks(BBSet);
+
+    replaceTargetsFromPHINode(StartBB, StartBB, PrevBB, BBSet);
+    if (!EndsInBranch)
+      replaceTargetsFromPHINode(FollowBB, FollowBB, EndBB, BBSet);
+  }
+
   moveBBContents(*StartBB, *PrevBB);
 
   BasicBlock *PlacementBB = PrevBB;
@@ -1622,7 +1707,8 @@ replaceArgumentUses(OutlinableRegion &Region,
 
       // If this is storing a PHINode, we must make sure it is included in the
       // overall function.
-      if (!isa<PHINode>(ValueOperand)) {
+      if (!isa<PHINode>(ValueOperand) ||
+          Region.Candidate->getGVN(ValueOperand).hasValue()) {
         if (FirstFunction)
           continue;
         Value *CorrVal =
@@ -2161,7 +2247,7 @@ void IROutliner::pruneIncompatibleRegions(
   if (FirstCandidate.getLength() == 2) {
     if (isa<CallInst>(FirstCandidate.front()->Inst) &&
         isa<BranchInst>(FirstCandidate.back()->Inst))
-        return;
+      return;
   }
 
   unsigned CurrentEndIdx = 0;

diff  --git a/llvm/test/Transforms/IROutliner/included-phi-nodes-begin.ll b/llvm/test/Transforms/IROutliner/included-phi-nodes-begin.ll
new file mode 100644
index 0000000000000..e519fb318ba87
--- /dev/null
+++ b/llvm/test/Transforms/IROutliner/included-phi-nodes-begin.ll
@@ -0,0 +1,93 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs
+; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
+
+; Show that we are able to outline when all of the phi nodes in the starting
+; block are included in the region and there is no more than one predecessor
+; into those phi nodes from outside of the region.
+
+define void @function1(i32* %a, i32* %b) {
+entry:
+  %0 = alloca i32, align 4
+  %c = load i32, i32* %0, align 4
+  %y = add i32 %c, %c
+  br label %test1
+dummy:
+  ret void
+test1:
+  %1 = phi i32 [ %e, %test1 ], [ %y, %entry ]
+  %2 = phi i32 [ %e, %test1 ], [ %y, %entry  ]
+  %e = load i32, i32* %0, align 4
+  %3 = add i32 %c, %c
+  br i1 true, label %test, label %test1
+test:
+  %d = load i32, i32* %0, align 4
+  br label %first
+first:
+  ret void
+}
+
+define void @function2(i32* %a, i32* %b) {
+entry:
+  %0 = alloca i32, align 4
+  %c = load i32, i32* %0, align 4
+  %y = mul i32 %c, %c
+  br label %test1
+dummy:
+  ret void
+test1:
+  %1 = phi i32 [ %e, %test1 ], [ %y, %entry ]
+  %2 = phi i32 [ %e, %test1 ], [ %y, %entry ]
+  %e = load i32, i32* %0, align 4
+  %3 = add i32 %c, %c
+  br i1 true, label %test, label %test1
+test:
+  %d = load i32, i32* %0, align 4
+  br label %first
+first:
+  ret void
+}
+; CHECK-LABEL: @function1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[C:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    [[Y:%.*]] = add i32 [[C]], [[C]]
+; CHECK-NEXT:    br label [[TEST1:%.*]]
+; CHECK:       dummy:
+; CHECK-NEXT:    ret void
+; CHECK:       test1:
+; CHECK-NEXT:    call void @outlined_ir_func_0(i32 [[Y]], i32* [[TMP0]], i32 [[C]])
+; CHECK-NEXT:    br label [[FIRST:%.*]]
+; CHECK:       first:
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK-LABEL: @function2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[C:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    [[Y:%.*]] = mul i32 [[C]], [[C]]
+; CHECK-NEXT:    br label [[TEST1:%.*]]
+; CHECK:       dummy:
+; CHECK-NEXT:    ret void
+; CHECK:       test1:
+; CHECK-NEXT:    call void @outlined_ir_func_0(i32 [[Y]], i32* [[TMP0]], i32 [[C]])
+; CHECK-NEXT:    br label [[FIRST:%.*]]
+; CHECK:       first:
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK: define internal void @outlined_ir_func_0(
+; CHECK-NEXT:  newFuncRoot:
+; CHECK-NEXT:    br label [[TEST1_TO_OUTLINE:%.*]]
+; CHECK:       test1_to_outline:
+; CHECK-NEXT:    [[TMP3:%.*]] = phi i32 [ [[E:%.*]], [[TEST1_TO_OUTLINE]] ], [ [[TMP0:%.*]], [[NEWFUNCROOT:%.*]] ]
+; CHECK-NEXT:    [[TMP4:%.*]] = phi i32 [ [[E]], [[TEST1_TO_OUTLINE]] ], [ [[TMP0]], [[NEWFUNCROOT]] ]
+; CHECK-NEXT:    [[E]] = load i32, i32* [[TMP1:%.*]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = add i32 [[TMP2:%.*]], [[TMP2]]
+; CHECK-NEXT:    br i1 true, label [[TEST:%.*]], label [[TEST1_TO_OUTLINE]]
+; CHECK:       test:
+; CHECK-NEXT:    [[D:%.*]] = load i32, i32* [[TMP1]], align 4
+; CHECK-NEXT:    br label [[FIRST_EXITSTUB:%.*]]
+; CHECK:       first.exitStub:
+; CHECK-NEXT:    ret void
+;

diff  --git a/llvm/test/Transforms/IROutliner/included-phi-nodes-end.ll b/llvm/test/Transforms/IROutliner/included-phi-nodes-end.ll
new file mode 100644
index 0000000000000..0f9750576139a
--- /dev/null
+++ b/llvm/test/Transforms/IROutliner/included-phi-nodes-end.ll
@@ -0,0 +1,94 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs
+; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
+
+; Show that we are able to propogate inputs to the region into the split PHINode
+; outside of the region if necessary.
+
+define void @function1(i32* %a, i32* %b) {
+entry:
+  %0 = alloca i32, align 4
+  %c = load i32, i32* %0, align 4
+  %z = add i32 %c, %c
+  br i1 true, label %test1, label %first
+test1:
+  %e = load i32, i32* %0, align 4
+  %1 = add i32 %c, %c
+  br i1 true, label %first, label %test
+test:
+  %d = load i32, i32* %0, align 4
+  br i1 true, label %first, label %next
+first:
+  %2 = phi i32 [ %d, %test ], [ %e, %test1 ], [ %c, %entry ]
+  %3 = phi i32 [ %d, %test ], [ %e, %test1 ], [ %c, %entry ]
+  ret void
+next:
+  ret void
+}
+
+define void @function2(i32* %a, i32* %b) {
+entry:
+  %0 = alloca i32, align 4
+  %c = load i32, i32* %0, align 4
+  %z = mul i32 %c, %c
+  br i1 true, label %test1, label %first
+test1:
+  %e = load i32, i32* %0, align 4
+  %1 = add i32 %c, %c
+  br i1 true, label %first, label %test
+test:
+  %d = load i32, i32* %0, align 4
+  br i1 true, label %first, label %next
+first:
+  %2 = phi i32 [ %d, %test ], [ %e, %test1 ], [ %c, %entry ]
+  %3 = phi i32 [ %d, %test ], [ %e, %test1 ], [ %c, %entry ]
+  ret void
+next:
+  ret void
+}
+; CHECK-LABEL: @function1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[C:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    [[Z:%.*]] = add i32 [[C]], [[C]]
+; CHECK-NEXT:    [[TARGETBLOCK:%.*]] = call i1 @outlined_ir_func_0(i32* [[TMP0]], i32 [[C]])
+; CHECK-NEXT:    br i1 [[TARGETBLOCK]], label [[NEXT:%.*]], label [[ENTRY_AFTER_OUTLINE:%.*]]
+; CHECK:       entry_after_outline:
+; CHECK-NEXT:    ret void
+; CHECK:       next:
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK-LABEL: @function2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[C:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    [[Z:%.*]] = mul i32 [[C]], [[C]]
+; CHECK-NEXT:    [[TARGETBLOCK:%.*]] = call i1 @outlined_ir_func_0(i32* [[TMP0]], i32 [[C]])
+; CHECK-NEXT:    br i1 [[TARGETBLOCK]], label [[NEXT:%.*]], label [[ENTRY_AFTER_OUTLINE:%.*]]
+; CHECK:       entry_after_outline:
+; CHECK-NEXT:    ret void
+; CHECK:       next:
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK: define internal i1 @outlined_ir_func_0(
+; CHECK-NEXT:  newFuncRoot:
+; CHECK-NEXT:    br label [[ENTRY_TO_OUTLINE:%.*]]
+; CHECK:       entry_to_outline:
+; CHECK-NEXT:    br i1 true, label [[TEST1:%.*]], label [[FIRST:%.*]]
+; CHECK:       test1:
+; CHECK-NEXT:    [[E:%.*]] = load i32, i32* [[TMP0:%.*]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[TMP1:%.*]], [[TMP1]]
+; CHECK-NEXT:    br i1 true, label [[FIRST]], label [[TEST:%.*]]
+; CHECK:       test:
+; CHECK-NEXT:    [[D:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    br i1 true, label [[FIRST]], label [[NEXT_EXITSTUB:%.*]]
+; CHECK:       first:
+; CHECK-NEXT:    [[TMP3:%.*]] = phi i32 [ [[D]], [[TEST]] ], [ [[E]], [[TEST1]] ], [ [[TMP1]], [[ENTRY_TO_OUTLINE]] ]
+; CHECK-NEXT:    [[TMP4:%.*]] = phi i32 [ [[D]], [[TEST]] ], [ [[E]], [[TEST1]] ], [ [[TMP1]], [[ENTRY_TO_OUTLINE]] ]
+; CHECK-NEXT:    br label [[ENTRY_AFTER_OUTLINE_EXITSTUB:%.*]]
+; CHECK:       next.exitStub:
+; CHECK-NEXT:    ret i1 true
+; CHECK:       entry_after_outline.exitStub:
+; CHECK-NEXT:    ret i1 false
+;

diff  --git a/llvm/test/Transforms/IROutliner/must-capture-all-phi-nodes-begin.ll b/llvm/test/Transforms/IROutliner/must-capture-all-phi-nodes-begin.ll
new file mode 100644
index 0000000000000..debaef12687b7
--- /dev/null
+++ b/llvm/test/Transforms/IROutliner/must-capture-all-phi-nodes-begin.ll
@@ -0,0 +1,108 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs
+; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
+
+; Show that we do not outline when all of the phi nodes in the beginning
+; block are included not in the region.
+
+define void @function1(i32* %a, i32* %b) {
+entry:
+  %0 = alloca i32, align 4
+  %c = load i32, i32* %0, align 4
+  %y = add i32 %c, %c
+  br label %test1
+dummy:
+  ret void
+test1:
+  %1 = phi i32 [ %e, %test1 ], [ %y, %entry ]
+  %2 = phi i32 [ %e, %test1 ], [ %y, %entry  ]
+  %e = load i32, i32* %0, align 4
+  %3 = add i32 %c, %c
+  br i1 true, label %test, label %test1
+test:
+  %d = load i32, i32* %0, align 4
+  br label %first
+first:
+  ret void
+}
+
+define void @function2(i32* %a, i32* %b) {
+entry:
+  %0 = alloca i32, align 4
+  %c = load i32, i32* %0, align 4
+  %y = mul i32 %c, %c
+  br label %test1
+dummy:
+  ret void
+test1:
+  %1 = phi i32 [ %e, %test1 ], [ %y, %entry ]
+  %2 = phi i32 [ %y, %entry ], [ %e, %test1 ]
+  %e = load i32, i32* %0, align 4
+  %3 = add i32 %c, %c
+  br i1 true, label %test, label %test1
+test:
+  %d = load i32, i32* %0, align 4
+  br label %first
+first:
+  ret void
+}
+; CHECK-LABEL: @function1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[E_LOC:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[C:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    [[Y:%.*]] = add i32 [[C]], [[C]]
+; CHECK-NEXT:    br label [[TEST1:%.*]]
+; CHECK:       dummy:
+; CHECK-NEXT:    ret void
+; CHECK:       test1:
+; CHECK-NEXT:    [[TMP1:%.*]] = phi i32 [ [[E_RELOAD:%.*]], [[TEST1]] ], [ [[Y]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[TMP2:%.*]] = phi i32 [ [[E_RELOAD]], [[TEST1]] ], [ [[Y]], [[ENTRY]] ]
+; CHECK-NEXT:    [[LT_CAST:%.*]] = bitcast i32* [[E_LOC]] to i8*
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]])
+; CHECK-NEXT:    [[TARGETBLOCK:%.*]] = call i1 @outlined_ir_func_0(i32* [[TMP0]], i32 [[C]], i32* [[E_LOC]])
+; CHECK-NEXT:    [[E_RELOAD]] = load i32, i32* [[E_LOC]], align 4
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
+; CHECK-NEXT:    br i1 [[TARGETBLOCK]], label [[TEST1]], label [[FIRST:%.*]]
+; CHECK:       first:
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK-LABEL: @function2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[E_LOC:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[C:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    [[Y:%.*]] = mul i32 [[C]], [[C]]
+; CHECK-NEXT:    br label [[TEST1:%.*]]
+; CHECK:       dummy:
+; CHECK-NEXT:    ret void
+; CHECK:       test1:
+; CHECK-NEXT:    [[TMP1:%.*]] = phi i32 [ [[E_RELOAD:%.*]], [[TEST1]] ], [ [[Y]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[TMP2:%.*]] = phi i32 [ [[Y]], [[ENTRY]] ], [ [[E_RELOAD]], [[TEST1]] ]
+; CHECK-NEXT:    [[LT_CAST:%.*]] = bitcast i32* [[E_LOC]] to i8*
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]])
+; CHECK-NEXT:    [[TARGETBLOCK:%.*]] = call i1 @outlined_ir_func_0(i32* [[TMP0]], i32 [[C]], i32* [[E_LOC]])
+; CHECK-NEXT:    [[E_RELOAD]] = load i32, i32* [[E_LOC]], align 4
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
+; CHECK-NEXT:    br i1 [[TARGETBLOCK]], label [[TEST1]], label [[FIRST:%.*]]
+; CHECK:       first:
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK: define internal i1 @outlined_ir_func_0(
+; CHECK-NEXT:  newFuncRoot:
+; CHECK-NEXT:    br label [[TEST1_TO_OUTLINE:%.*]]
+; CHECK:       test1_to_outline:
+; CHECK-NEXT:    [[E:%.*]] = load i32, i32* [[TMP0:%.*]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[TMP1:%.*]], [[TMP1]]
+; CHECK-NEXT:    br i1 true, label [[TEST:%.*]], label [[TEST1_EXITSTUB:%.*]]
+; CHECK:       test:
+; CHECK-NEXT:    [[D:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    br label [[FIRST_EXITSTUB:%.*]]
+; CHECK:       test1.exitStub:
+; CHECK-NEXT:    store i32 [[E]], i32* [[TMP2:%.*]], align 4
+; CHECK-NEXT:    ret i1 true
+; CHECK:       first.exitStub:
+; CHECK-NEXT:    store i32 [[E]], i32* [[TMP2]], align 4
+; CHECK-NEXT:    ret i1 false
+;

diff  --git a/llvm/test/Transforms/IROutliner/must-capture-all-phi-nodes-end.ll b/llvm/test/Transforms/IROutliner/must-capture-all-phi-nodes-end.ll
new file mode 100644
index 0000000000000..a5d0314f21359
--- /dev/null
+++ b/llvm/test/Transforms/IROutliner/must-capture-all-phi-nodes-end.ll
@@ -0,0 +1,88 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs
+; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
+
+; Show that we do not  outline when all of the phi nodes in the end
+; block are not included in the region.
+
+define void @function1(i32* %a, i32* %b) {
+entry:
+  %0 = alloca i32, align 4
+  %c = load i32, i32* %0, align 4
+  %z = add i32 %c, %c
+  br i1 true, label %test1, label %first
+test1:
+  %e = load i32, i32* %0, align 4
+  %1 = add i32 %c, %c
+  br i1 true, label %first, label %test
+test:
+  %d = load i32, i32* %0, align 4
+  br i1 true, label %first, label %next
+first:
+  %2 = phi i32 [ %d, %test ], [ %e, %test1 ], [ %c, %entry ]
+  %3 = phi i32 [ %d, %test ], [ %e, %test1 ], [ %c, %entry ]
+  ret void
+next:
+  ret void
+}
+
+define void @function2(i32* %a, i32* %b) {
+entry:
+  %0 = alloca i32, align 4
+  %c = load i32, i32* %0, align 4
+  %z = mul i32 %c, %c
+  br i1 true, label %test1, label %first
+test1:
+  %e = load i32, i32* %0, align 4
+  %1 = add i32 %c, %c
+  br i1 true, label %first, label %test
+test:
+  %d = load i32, i32* %0, align 4
+  br i1 true, label %first, label %next
+first:
+  %2 = phi i32 [ %d, %test ], [ %e, %test1 ], [ %c, %entry ]
+  %3 = phi i32 [ %d, %test ], [ %c, %entry ], [ %e, %test1 ]
+  ret void
+next:
+  ret void
+}
+; CHECK-LABEL: @function1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[C:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    [[Z:%.*]] = add i32 [[C]], [[C]]
+; CHECK-NEXT:    br i1 true, label [[TEST1:%.*]], label [[FIRST:%.*]]
+; CHECK:       test1:
+; CHECK-NEXT:    [[E:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[C]], [[C]]
+; CHECK-NEXT:    br i1 true, label [[FIRST]], label [[TEST:%.*]]
+; CHECK:       test:
+; CHECK-NEXT:    [[D:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    br i1 true, label [[FIRST]], label [[NEXT:%.*]]
+; CHECK:       first:
+; CHECK-NEXT:    [[TMP2:%.*]] = phi i32 [ [[D]], [[TEST]] ], [ [[E]], [[TEST1]] ], [ [[C]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[TMP3:%.*]] = phi i32 [ [[D]], [[TEST]] ], [ [[E]], [[TEST1]] ], [ [[C]], [[ENTRY]] ]
+; CHECK-NEXT:    ret void
+; CHECK:       next:
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK-LABEL: @function2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[C:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    [[Z:%.*]] = mul i32 [[C]], [[C]]
+; CHECK-NEXT:    br i1 true, label [[TEST1:%.*]], label [[FIRST:%.*]]
+; CHECK:       test1:
+; CHECK-NEXT:    [[E:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[C]], [[C]]
+; CHECK-NEXT:    br i1 true, label [[FIRST]], label [[TEST:%.*]]
+; CHECK:       test:
+; CHECK-NEXT:    [[D:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    br i1 true, label [[FIRST]], label [[NEXT:%.*]]
+; CHECK:       first:
+; CHECK-NEXT:    [[TMP2:%.*]] = phi i32 [ [[D]], [[TEST]] ], [ [[E]], [[TEST1]] ], [ [[C]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[TMP3:%.*]] = phi i32 [ [[D]], [[TEST]] ], [ [[C]], [[ENTRY]] ], [ [[E]], [[TEST1]] ]
+; CHECK-NEXT:    ret void
+; CHECK:       next:
+; CHECK-NEXT:    ret void
+;

diff  --git a/llvm/test/Transforms/IROutliner/outlining-branches-phi-nodes.ll b/llvm/test/Transforms/IROutliner/outlining-branches-phi-nodes.ll
index 4e777e862543e..59b08422cec3a 100644
--- a/llvm/test/Transforms/IROutliner/outlining-branches-phi-nodes.ll
+++ b/llvm/test/Transforms/IROutliner/outlining-branches-phi-nodes.ll
@@ -38,6 +38,8 @@ block_5:
   store i32 %add2, i32* %output, align 4
   store i32 %mul2, i32* %result, align 4
   br label %block_6
+dummy:
+  ret void
 block_6:
   %
diff  = phi i32 [%aval, %block_4], [%a2val, %block_5]
   ret void
@@ -76,6 +78,8 @@ block_5:
   store i32 %add2, i32* %output, align 4
   store i32 %mul2, i32* %result, align 4
   br label %block_6
+dummy:
+  ret void
 block_6:
   %
diff  = phi i32 [%aval, %block_4], [%a2val, %block_5]
   ret void
@@ -102,6 +106,8 @@ block_6:
 ; CHECK-NEXT:    [[DIFF_CE_RELOAD:%.*]] = load i32, i32* [[DIFF_CE_LOC]], align 4
 ; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
 ; CHECK-NEXT:    br label [[BLOCK_6:%.*]]
+; CHECK: dummy:
+; CHECK-NEXT:  ret void
 ; CHECK:       block_6:
 ; CHECK-NEXT:    [[DIFF:%.*]] = phi i32 [ [[DIFF_CE_RELOAD]], [[BLOCK_2]] ]
 ; CHECK-NEXT:    ret void
@@ -128,6 +134,8 @@ block_6:
 ; CHECK-NEXT:    [[DIFF_CE_RELOAD:%.*]] = load i32, i32* [[DIFF_CE_LOC]], align 4
 ; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
 ; CHECK-NEXT:    br label [[BLOCK_6:%.*]]
+; CHECK: dummy:
+; CHECK-NEXT:  ret void
 ; CHECK:       block_6:
 ; CHECK-NEXT:    [[DIFF:%.*]] = phi i32 [ [[DIFF_CE_RELOAD]], [[BLOCK_2]] ]
 ; CHECK-NEXT:    ret void

diff  --git a/llvm/test/Transforms/IROutliner/outlining-exits-to-phi-node.ll b/llvm/test/Transforms/IROutliner/outlining-exits-to-phi-node.ll
index 77cb4a5bc476f..397121f000cd9 100644
--- a/llvm/test/Transforms/IROutliner/outlining-exits-to-phi-node.ll
+++ b/llvm/test/Transforms/IROutliner/outlining-exits-to-phi-node.ll
@@ -15,6 +15,8 @@ test1:
 test:
   %d = load i32, i32* %0, align 4
   br label %first
+dummy:
+  ret void
 first:
   %1 = phi i32 [ %c, %test ], [ %e, %test1 ]
   ret void
@@ -31,6 +33,8 @@ test1:
 test:
   %d = load i32, i32* %0, align 4
   br label %first
+dummy:
+  ret void
 first:
   %1 = phi i32 [ %c, %test ], [ %e, %test1 ]
   ret void
@@ -45,6 +49,8 @@ first:
 ; CHECK-NEXT:    [[DOTCE_RELOAD:%.*]] = load i32, i32* [[DOTCE_LOC]], align 4
 ; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
 ; CHECK-NEXT:    br label [[FIRST:%.*]]
+; CHECK: dummy:
+; CHECK-NEXT:  ret void
 ; CHECK:       first:
 ; CHECK-NEXT:    [[TMP1:%.*]] = phi i32 [ [[DOTCE_RELOAD]], [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    ret void
@@ -60,6 +66,8 @@ first:
 ; CHECK-NEXT:    [[DOTCE_RELOAD:%.*]] = load i32, i32* [[DOTCE_LOC]], align 4
 ; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
 ; CHECK-NEXT:    br label [[FIRST:%.*]]
+; CHECK: dummy:
+; CHECK-NEXT:  ret void
 ; CHECK:       first:
 ; CHECK-NEXT:    [[TMP1:%.*]] = phi i32 [ [[DOTCE_RELOAD]], [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    ret void

diff  --git a/llvm/test/Transforms/IROutliner/phi-nodes-non-constant.ll b/llvm/test/Transforms/IROutliner/phi-nodes-non-constant.ll
new file mode 100644
index 0000000000000..d56ffa5730552
--- /dev/null
+++ b/llvm/test/Transforms/IROutliner/phi-nodes-non-constant.ll
@@ -0,0 +1,74 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs
+; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
+
+; Show that we do extract phi nodes from the regions.
+
+define void @function1(i32* %a, i32* %b) {
+entry:
+  %0 = alloca i32, align 4
+  %c = load i32, i32* %0, align 4
+  br label %test1
+test1:
+  %e = load i32, i32* %0, align 4
+  br label %first
+test:
+  %d = load i32, i32* %0, align 4
+  br label %first
+first:
+  %1 = phi i32 [ %c, %test ], [ %e, %test1 ]
+  store i32 2, i32* %a, align 4
+  store i32 3, i32* %b, align 4
+  ret void
+}
+
+define void @function2(i32* %a, i32* %b) {
+entry:
+  %0 = alloca i32, align 4
+  %c = load i32, i32* %0, align 4
+  br label %test1
+test1:
+  %e = load i32, i32* %0, align 4
+  br label %first
+test:
+  %d = load i32, i32* %0, align 4
+  br label %first
+first:
+  %1 = phi i32 [ %c, %test ], [ %e, %test1 ]
+  store i32 2, i32* %a, align 4
+  store i32 3, i32* %b, align 4
+  ret void
+}
+; CHECK-LABEL: @function1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    call void @outlined_ir_func_0(i32* [[TMP0]], i32* [[A:%.*]], i32* [[B:%.*]])
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK-LABEL: @function2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    call void @outlined_ir_func_0(i32* [[TMP0]], i32* [[A:%.*]], i32* [[B:%.*]])
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK: define internal void @outlined_ir_func_0(
+; CHECK-NEXT:  newFuncRoot:
+; CHECK-NEXT:    br label [[ENTRY_TO_OUTLINE:%.*]]
+; CHECK:       entry_to_outline:
+; CHECK-NEXT:    [[C:%.*]] = load i32, i32* [[TMP0:%.*]], align 4
+; CHECK-NEXT:    br label [[TEST1:%.*]]
+; CHECK:       test1:
+; CHECK-NEXT:    [[E:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    br label [[FIRST:%.*]]
+; CHECK:       test:
+; CHECK-NEXT:    [[D:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    br label [[FIRST]]
+; CHECK:       first:
+; CHECK-NEXT:    [[TMP3:%.*]] = phi i32 [ [[C]], [[TEST:%.*]] ], [ [[E]], [[TEST1]] ]
+; CHECK-NEXT:    store i32 2, i32* [[TMP1:%.*]], align 4
+; CHECK-NEXT:    store i32 3, i32* [[TMP2:%.*]], align 4
+; CHECK-NEXT:    br label [[ENTRY_AFTER_OUTLINE_EXITSTUB:%.*]]
+; CHECK:       entry_after_outline.exitStub:
+; CHECK-NEXT:    ret void
+;

diff  --git a/llvm/test/Transforms/IROutliner/phi-nodes-simple.ll b/llvm/test/Transforms/IROutliner/phi-nodes-simple.ll
new file mode 100644
index 0000000000000..e5afb89a26138
--- /dev/null
+++ b/llvm/test/Transforms/IROutliner/phi-nodes-simple.ll
@@ -0,0 +1,58 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs
+; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
+
+; Show that we are able to outline the simple phi node case of constants when
+; the corresponding labels match.
+
+define void @function1(i32* %a, i32* %b) {
+entry:
+  br label %test
+test:
+  br label %first
+first:
+  %0 = phi i32 [ 0, %test ]
+  store i32 2, i32* %a, align 4
+  store i32 3, i32* %b, align 4
+  ret void
+}
+
+define void @function2(i32* %a, i32* %b) {
+entry:
+  br label %test
+test:
+  br label %first
+first:
+  %0 = phi i32 [ 0, %test ]
+  store i32 2, i32* %a, align 4
+  store i32 3, i32* %b, align 4
+  ret void
+}
+; CHECK-LABEL: @function1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[TEST:%.*]]
+; CHECK:       test:
+; CHECK-NEXT:    call void @outlined_ir_func_0(i32* [[A:%.*]], i32* [[B:%.*]])
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK-LABEL: @function2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[TEST:%.*]]
+; CHECK:       test:
+; CHECK-NEXT:    call void @outlined_ir_func_0(i32* [[A:%.*]], i32* [[B:%.*]])
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK: define internal void @outlined_ir_func_0(
+; CHECK-NEXT:  newFuncRoot:
+; CHECK-NEXT:    br label [[TEST_TO_OUTLINE:%.*]]
+; CHECK:       test_to_outline:
+; CHECK-NEXT:    br label [[FIRST:%.*]]
+; CHECK:       first:
+; CHECK-NEXT:    [[TMP2:%.*]] = phi i32 [ 0, [[TEST_TO_OUTLINE]] ]
+; CHECK-NEXT:    store i32 2, i32* [[TMP0:%.*]], align 4
+; CHECK-NEXT:    store i32 3, i32* [[TMP1:%.*]], align 4
+; CHECK-NEXT:    br label [[TEST_AFTER_OUTLINE_EXITSTUB:%.*]]
+; CHECK:       test_after_outline.exitStub:
+; CHECK-NEXT:    ret void
+;

diff  --git a/llvm/test/Transforms/IROutliner/region-inputs-in-phi-nodes.ll b/llvm/test/Transforms/IROutliner/region-inputs-in-phi-nodes.ll
index 258bbfe131e3e..62d936d68ff2f 100644
--- a/llvm/test/Transforms/IROutliner/region-inputs-in-phi-nodes.ll
+++ b/llvm/test/Transforms/IROutliner/region-inputs-in-phi-nodes.ll
@@ -17,6 +17,8 @@ test1:
 test:
   %d = load i32, i32* %0, align 4
   br i1 true, label %first, label %next
+dummy:
+  ret void
 first:
   %2 = phi i32 [ %d, %test ], [ %e, %test1 ], [ %c, %entry ]
   ret void
@@ -37,6 +39,8 @@ test1:
 test:
   %d = load i32, i32* %0, align 4
   br i1 true, label %first, label %next
+dummy:
+  ret void
 first:
   %2 = phi i32 [ %d, %test ], [ %e, %test1 ], [ %c, %entry ]
   ret void
@@ -55,6 +59,8 @@ next:
 ; CHECK-NEXT:    [[DOTCE_RELOAD:%.*]] = load i32, i32* [[DOTCE_LOC]], align 4
 ; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
 ; CHECK-NEXT:    br i1 [[TARGETBLOCK]], label [[FIRST:%.*]], label [[NEXT:%.*]]
+; CHECK: dummy:
+; CHECK-NEXT:  ret void
 ; CHECK:       first:
 ; CHECK-NEXT:    [[TMP1:%.*]] = phi i32 [ [[DOTCE_RELOAD]], [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    ret void
@@ -74,6 +80,8 @@ next:
 ; CHECK-NEXT:    [[DOTCE_RELOAD:%.*]] = load i32, i32* [[DOTCE_LOC]], align 4
 ; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
 ; CHECK-NEXT:    br i1 [[TARGETBLOCK]], label [[FIRST:%.*]], label [[NEXT:%.*]]
+; CHECK: dummy:
+; CHECK-NEXT:  ret void
 ; CHECK:       first:
 ; CHECK-NEXT:    [[TMP1:%.*]] = phi i32 [ [[DOTCE_RELOAD]], [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    ret void

diff  --git a/llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp b/llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp
index a8781e8f78af7..97d9be3d8985a 100644
--- a/llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp
+++ b/llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp
@@ -757,26 +757,41 @@ TEST(IRInstructionMapper, BranchLegal) {
   ASSERT_TRUE(UnsignedVec[1] < UnsignedVec[2]);
 }
 
-// In most cases, the illegal instructions we are collecting don't require any
-// sort of setup.  In these cases, we can just only have illegal instructions,
-// and the mapper will create 0 length vectors, and we can check that.
+// Checks that a PHINode is mapped to be legal.
+TEST(IRInstructionMapper, PhiLegal) {
+  StringRef ModuleString = R"(
+                          define i32 @f(i32 %a, i32 %b) {
+                          bb0:
+                             %0 = phi i1 [ 0, %bb0 ], [ %0, %bb1 ]
+                             %1 = add i32 %a, %b
+                             ret i32 0
+                          bb1:
+                             ret i32 1
+                          })";
+  LLVMContext Context;
+  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleString);
 
-// In cases where we have legal instructions needed to set up the illegal
-// instruction, to check illegal instructions are assigned unsigned integers
-// from the maximum value decreasing to 0, it will be greater than a legal
-// instruction that comes after.  So to check that we have an illegal
-// instruction, we place a legal instruction after an illegal instruction, and
-// check that the illegal unsigned integer is greater than the unsigned integer
-// of the legal instruction.
+  std::vector<IRInstructionData *> InstrList;
+  std::vector<unsigned> UnsignedVec;
+
+  SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator;
+  SpecificBumpPtrAllocator<IRInstructionDataList> IDLAllocator;
+  IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator);
+  Mapper.InstClassifier.EnableBranches = true;
+  Mapper.initializeForBBs(*M);
+  getVectors(*M, Mapper, InstrList, UnsignedVec);
+
+  ASSERT_EQ(InstrList.size(), UnsignedVec.size());
+  ASSERT_EQ(UnsignedVec.size(), static_cast<unsigned>(3));
+}
 
-// Checks that a PHINode is mapped to be illegal since there is extra checking
-// needed to ensure that a branch in one region is bin an isomorphic
-// location in a 
diff erent region.
+// Checks that a PHINode is mapped to be legal.
 TEST(IRInstructionMapper, PhiIllegal) {
   StringRef ModuleString = R"(
                           define i32 @f(i32 %a, i32 %b) {
                           bb0:
                              %0 = phi i1 [ 0, %bb0 ], [ %0, %bb1 ]
+                             %1 = add i32 %a, %b
                              ret i32 0
                           bb1:
                              ret i32 1
@@ -790,12 +805,25 @@ TEST(IRInstructionMapper, PhiIllegal) {
   SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator;
   SpecificBumpPtrAllocator<IRInstructionDataList> IDLAllocator;
   IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator);
+  Mapper.initializeForBBs(*M);
   getVectors(*M, Mapper, InstrList, UnsignedVec);
 
   ASSERT_EQ(InstrList.size(), UnsignedVec.size());
   ASSERT_EQ(UnsignedVec.size(), static_cast<unsigned>(0));
 }
 
+// In most cases, the illegal instructions we are collecting don't require any
+// sort of setup.  In these cases, we can just only have illegal instructions,
+// and the mapper will create 0 length vectors, and we can check that.
+
+// In cases where we have legal instructions needed to set up the illegal
+// instruction, to check illegal instructions are assigned unsigned integers
+// from the maximum value decreasing to 0, it will be greater than a legal
+// instruction that comes after.  So to check that we have an illegal
+// instruction, we place a legal instruction after an illegal instruction, and
+// check that the illegal unsigned integer is greater than the unsigned integer
+// of the legal instruction.
+
 // Checks that an alloca instruction is mapped to be illegal.
 TEST(IRInstructionMapper, AllocaIllegal) {
   StringRef ModuleString = R"(
@@ -2346,6 +2374,108 @@ TEST(IRSimilarityCandidate, DifferentBranchStructureOutside) {
   ASSERT_TRUE(longSimCandCompare(InstrList, true, 3, 0, 6));
 }
 
+// Checks that the same structure is recognized between two candidates,
+// when the phi predecessor are other blocks inside the same region,
+// the relative distance between the blocks must be the same.
+TEST(IRSimilarityCandidate, SamePHIStructureInternal) {
+  StringRef ModuleString = R"(
+                          define i32 @f(i32 %a, i32 %b) {
+                          bb0:
+                             br label %bb2
+                          bb1:
+                             br label %bb2
+                          bb2:
+                             %0 = phi i32 [ %a, %bb0 ], [ %b, %bb1 ] 
+                             %1 = add i32 %b, %a
+                             %2 = add i32 %a, %b
+                             ret i32 0
+                          }
+                          
+                          define i32 @f2(i32 %a, i32 %b) {
+                          bb0:
+                             br label %bb2
+                          bb1:
+                             br label %bb2
+                          bb2:
+                             %0 = phi i32 [ %a, %bb0 ], [ %b, %bb1 ]
+                             %1 = add i32 %b, %a
+                             %2 = add i32 %a, %b
+                             ret i32 0
+                          })";
+  LLVMContext Context;
+  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleString);
+
+  std::vector<IRInstructionData *> InstrList;
+  std::vector<unsigned> UnsignedVec;
+
+  SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator;
+  SpecificBumpPtrAllocator<IRInstructionDataList> IDLAllocator;
+  IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator);
+  Mapper.InstClassifier.EnableBranches = true;
+  Mapper.initializeForBBs(*M);
+  getVectors(*M, Mapper, InstrList, UnsignedVec);
+
+  // Check to make sure that we have a long enough region.
+  ASSERT_EQ(InstrList.size(), static_cast<unsigned>(11));
+  // Check that the instructions were added correctly to both vectors.
+  ASSERT_TRUE(InstrList.size() == UnsignedVec.size());
+
+  ASSERT_TRUE(longSimCandCompare(InstrList, true, 4, 0, 6));
+}
+
+// Checks that the 
diff erent structure is recognized between two candidates,
+// when the phi predecessor are other blocks inside the same region,
+// the relative distance between the blocks must be the same.
+TEST(IRSimilarityCandidate, DifferentPHIStructureInternal) {
+  StringRef ModuleString = R"(
+                          define i32 @f(i32 %a, i32 %b) {
+                          bb0:
+                             br label %bb2
+                          bb1:
+                             br label %bb2
+                          bb3:
+                             br label %bb2
+                          bb2:
+                             %0 = phi i32 [ %a, %bb0 ], [ %b, %bb1 ] 
+                             %1 = add i32 %b, %a
+                             %2 = add i32 %a, %b
+                             ret i32 0
+                          }
+                          
+                          define i32 @f2(i32 %a, i32 %b) {
+                          bb0:
+                             br label %bb2
+                          bb1:
+                             br label %bb2
+                          bb3:
+                             br label %bb2
+                          bb2:
+                             %0 = phi i32 [ %a, %bb0 ], [ %b, %bb3 ] 
+                             %1 = add i32 %b, %a
+                             %2 = add i32 %a, %b
+                             ret i32 0
+                          })";
+  LLVMContext Context;
+  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleString);
+
+  std::vector<IRInstructionData *> InstrList;
+  std::vector<unsigned> UnsignedVec;
+
+  SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator;
+  SpecificBumpPtrAllocator<IRInstructionDataList> IDLAllocator;
+  IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator);
+  Mapper.InstClassifier.EnableBranches = true;
+  Mapper.initializeForBBs(*M);
+  getVectors(*M, Mapper, InstrList, UnsignedVec);
+
+  // Check to make sure that we have a long enough region.
+  ASSERT_EQ(InstrList.size(), static_cast<unsigned>(13));
+  // Check that the instructions were added correctly to both vectors.
+  ASSERT_TRUE(InstrList.size() == UnsignedVec.size());
+
+  ASSERT_FALSE(longSimCandCompare(InstrList, true, 5, 0, 7));
+}
+
 // Checks that two sets of identical instructions are found to be the same.
 // Both sequences of adds have the same operand ordering, and the same
 // instructions, making them strcturally equivalent.


        


More information about the llvm-commits mailing list