[llvm-branch-commits] [llvm] dae3446 - [IRSim][IROutliner] Adding the extraction basics for the IROutliner.

Andrew Litteken via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Thu Dec 17 09:32:48 PST 2020


Author: Andrew Litteken
Date: 2020-12-17T11:27:26-06:00
New Revision: dae34463e3e05a055899b65251efde887a24ec38

URL: https://github.com/llvm/llvm-project/commit/dae34463e3e05a055899b65251efde887a24ec38
DIFF: https://github.com/llvm/llvm-project/commit/dae34463e3e05a055899b65251efde887a24ec38.diff

LOG: [IRSim][IROutliner] Adding the extraction basics for the IROutliner.

Extracting the similar regions is the first step in the IROutliner.

Using the IRSimilarityIdentifier, we collect the SimilarityGroups and
sort them by how many instructions will be removed.  Each
IRSimilarityCandidate is used to define an OutlinableRegion.  Each
region is ordered by their occurrence in the Module and the regions that
are not compatible with previously outlined regions are discarded.

Each region is then extracted with the CodeExtractor into its own
function.

We test that correctly extract in:
test/Transforms/IROutliner/extraction.ll
test/Transforms/IROutliner/address-taken.ll
test/Transforms/IROutliner/outlining-same-globals.ll
test/Transforms/IROutliner/outlining-same-constants.ll
test/Transforms/IROutliner/outlining-different-structure.ll

Recommit of bf899e891387d07dfd12de195ce2a16f62afd5e0 fixing memory
leaks.

Reviewers: paquette, jroelofs, yroux

Differential Revision: https://reviews.llvm.org/D86975

Added: 
    llvm/include/llvm/Transforms/IPO/IROutliner.h
    llvm/lib/Transforms/IPO/IROutliner.cpp
    llvm/test/Transforms/IROutliner/extraction.ll
    llvm/test/Transforms/IROutliner/outlining-address-taken.ll
    llvm/test/Transforms/IROutliner/outlining-different-structure.ll
    llvm/test/Transforms/IROutliner/outlining-same-constants.ll
    llvm/test/Transforms/IROutliner/outlining-same-globals.ll

Modified: 
    llvm/include/llvm/InitializePasses.h
    llvm/include/llvm/Transforms/IPO.h
    llvm/lib/Passes/PassBuilder.cpp
    llvm/lib/Passes/PassRegistry.def
    llvm/lib/Transforms/IPO/CMakeLists.txt
    llvm/lib/Transforms/IPO/IPO.cpp
    llvm/lib/Transforms/IPO/PassManagerBuilder.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index e1b3a8dd3f3a..f77de64e2c64 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -184,6 +184,7 @@ void initializeHotColdSplittingLegacyPassPass(PassRegistry&);
 void initializeHWAddressSanitizerLegacyPassPass(PassRegistry &);
 void initializeIPSCCPLegacyPassPass(PassRegistry&);
 void initializeIRCELegacyPassPass(PassRegistry&);
+void initializeIROutlinerLegacyPassPass(PassRegistry&);
 void initializeIRSimilarityIdentifierWrapperPassPass(PassRegistry&);
 void initializeIRTranslatorPass(PassRegistry&);
 void initializeIVUsersWrapperPassPass(PassRegistry&);

diff  --git a/llvm/include/llvm/Transforms/IPO.h b/llvm/include/llvm/Transforms/IPO.h
index 1918ad76a270..af357181597a 100644
--- a/llvm/include/llvm/Transforms/IPO.h
+++ b/llvm/include/llvm/Transforms/IPO.h
@@ -215,6 +215,11 @@ ModulePass *createMergeFunctionsPass();
 /// function(s).
 ModulePass *createHotColdSplittingPass();
 
+//===----------------------------------------------------------------------===//
+/// createIROutlinerPass - This pass finds similar code regions and factors
+/// those regions out into functions.
+ModulePass *createIROutlinerPass();
+
 //===----------------------------------------------------------------------===//
 /// createPartialInliningPass - This pass inlines parts of functions.
 ///

diff  --git a/llvm/include/llvm/Transforms/IPO/IROutliner.h b/llvm/include/llvm/Transforms/IPO/IROutliner.h
new file mode 100644
index 000000000000..80ba40d91ed8
--- /dev/null
+++ b/llvm/include/llvm/Transforms/IPO/IROutliner.h
@@ -0,0 +1,191 @@
+//===- IROutliner.h - Extract similar IR regions into functions ------------==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// \file
+// The interface file for the IROutliner which is used by the IROutliner Pass.
+//
+// The outliner uses the IRSimilarityIdentifier to identify the similar regions
+// of code.  It evaluates each set of IRSimilarityCandidates with an estimate of
+// whether it will provide code size reduction.  Each region is extracted using
+// the code extractor.  These extracted functions are consolidated into a single
+// function and called from the extracted call site.
+//
+// For example:
+// \code
+//   %1 = add i32 %a, %b
+//   %2 = add i32 %b, %a
+//   %3 = add i32 %b, %a
+//   %4 = add i32 %a, %b
+// \endcode
+// would become function
+// \code
+// define internal void outlined_ir_function(i32 %0, i32 %1) {
+//   %1 = add i32 %0, %1
+//   %2 = add i32 %1, %0
+//   ret void
+// }
+// \endcode
+// with calls:
+// \code
+//   call void outlined_ir_function(i32 %a, i32 %b)
+//   call void outlined_ir_function(i32 %b, i32 %a)
+// \endcode
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_IPO_IROUTLINER_H
+#define LLVM_TRANSFORMS_IPO_IROUTLINER_H
+
+#include "llvm/Analysis/IRSimilarityIdentifier.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/IR/ValueMap.h"
+#include "llvm/Transforms/Utils/CodeExtractor.h"
+#include <set>
+
+struct OutlinableGroup;
+
+namespace llvm {
+using namespace IRSimilarity;
+
+class Module;
+class TargetTransformInfo;
+class OptimizationRemarkEmitter;
+
+/// The OutlinableRegion holds all the information for a specific region, or
+/// sequence of instructions. This includes what values need to be hoisted to
+/// arguments from the extracted function, inputs and outputs to the region, and
+/// mapping from the extracted function arguments to overall function arguments.
+struct OutlinableRegion {
+  /// Describes the region of code.
+  IRSimilarityCandidate *Candidate;
+
+  /// If this region is outlined, the front and back IRInstructionData could
+  /// potentially become invalidated if the only new instruction is a call.
+  /// This ensures that we replace in the instruction in the IRInstructionData.
+  IRInstructionData *NewFront = nullptr;
+  IRInstructionData *NewBack = nullptr;
+
+  /// Used to create an outlined function.
+  CodeExtractor *CE = nullptr;
+
+  /// The call site of the extracted region.
+  CallInst *Call = nullptr;
+
+  /// The function for the extracted region.
+  Function *ExtractedFunction = nullptr;
+
+  /// Flag for whether we have split out the IRSimilarityCanidate. That is,
+  /// make the region contained the IRSimilarityCandidate its own BasicBlock.
+  bool CandidateSplit = false;
+
+  /// Flag for whether we should not consider this region for extraction.
+  bool IgnoreRegion = false;
+
+  /// The BasicBlock that is before the start of the region BasicBlock,
+  /// only defined when the region has been split.
+  BasicBlock *PrevBB = nullptr;
+
+  /// The BasicBlock that contains the starting instruction of the region.
+  BasicBlock *StartBB = nullptr;
+
+  /// The BasicBlock that contains the ending instruction of the region.
+  BasicBlock *EndBB = nullptr;
+
+  /// The BasicBlock that is after the start of the region BasicBlock,
+  /// only defined when the region has been split.
+  BasicBlock *FollowBB = nullptr;
+
+  /// The Outlinable Group that contains this region and structurally similar
+  /// regions to this region.
+  OutlinableGroup *Parent = nullptr;
+
+  OutlinableRegion(IRSimilarityCandidate &C, OutlinableGroup &Group)
+      : Candidate(&C), Parent(&Group) {
+    StartBB = C.getStartBB();
+    EndBB = C.getEndBB();
+  }
+
+  /// For the contained region, split the parent BasicBlock at the starting and
+  /// ending instructions of the contained IRSimilarityCandidate.
+  void splitCandidate();
+
+  /// For the contained region, reattach the BasicBlock at the starting and
+  /// ending instructions of the contained IRSimilarityCandidate, or if the
+  /// function has been extracted, the start and end of the BasicBlock
+  /// containing the called function.
+  void reattachCandidate();
+};
+
+/// This class is a pass that identifies similarity in a Module, extracts
+/// instances of the similarity, and then consolidating the similar regions
+/// in an effort to reduce code size.  It uses the IRSimilarityIdentifier pass
+/// to identify the similar regions of code, and then extracts the similar
+/// sections into a single function.  See the above for an example as to
+/// how code is extracted and consolidated into a single function.
+class IROutliner {
+public:
+  IROutliner(function_ref<TargetTransformInfo &(Function &)> GTTI,
+             function_ref<IRSimilarityIdentifier &(Module &)> GIRSI)
+      : getTTI(GTTI), getIRSI(GIRSI) {}
+  bool run(Module &M);
+
+private:
+  /// Find repeated similar code sequences in \p M and outline them into new
+  /// Functions.
+  ///
+  /// \param [in] M - The module to outline from.
+  /// \returns The number of Functions created.
+  unsigned doOutline(Module &M);
+
+  /// Remove all the IRSimilarityCandidates from \p CandidateVec that have
+  /// instructions contained in a previously outlined region and put the
+  /// remaining regions in \p CurrentGroup.
+  ///
+  /// \param [in] CandidateVec - List of similarity candidates for regions with
+  /// the same similarity structure.
+  /// \param [in,out] CurrentGroup - Contains the potential sections to
+  /// be outlined.
+  void
+  pruneIncompatibleRegions(std::vector<IRSimilarityCandidate> &CandidateVec,
+                           OutlinableGroup &CurrentGroup);
+
+  /// Extract \p Region into its own function.
+  ///
+  /// \param [in] Region - The region to be extracted into its own function.
+  /// \returns True if it was successfully outlined.
+  bool extractSection(OutlinableRegion &Region);
+
+  /// The set of outlined Instructions, identified by their location in the
+  /// sequential ordering of instructions in a Module.
+  DenseSet<unsigned> Outlined;
+
+  /// TargetTransformInfo lambda for target specific information.
+  function_ref<TargetTransformInfo &(Function &)> getTTI;
+
+  /// IRSimilarityIdentifier lambda to retrieve IRSimilarityIdentifier.
+  function_ref<IRSimilarityIdentifier &(Module &)> getIRSI;
+
+  /// The memory allocator used to allocate the CodeExtractors.
+  SpecificBumpPtrAllocator<CodeExtractor> ExtractorAllocator;
+
+  /// The memory allocator used to allocate the OutlinableRegions.
+  SpecificBumpPtrAllocator<OutlinableRegion> RegionAllocator;
+
+  /// The memory allocator used to allocate new IRInstructionData.
+  SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator;
+};
+
+/// Pass to outline similar regions.
+class IROutlinerPass : public PassInfoMixin<IROutlinerPass> {
+public:
+  PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_IPO_IROUTLINER_H

diff  --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index aef8db53cf6f..4e8062c6a789 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -105,6 +105,7 @@
 #include "llvm/Transforms/IPO/InferFunctionAttrs.h"
 #include "llvm/Transforms/IPO/Inliner.h"
 #include "llvm/Transforms/IPO/Internalize.h"
+#include "llvm/Transforms/IPO/IROutliner.h"
 #include "llvm/Transforms/IPO/LoopExtractor.h"
 #include "llvm/Transforms/IPO/LowerTypeTests.h"
 #include "llvm/Transforms/IPO/MergeFunctions.h"
@@ -289,6 +290,7 @@ extern cl::opt<bool> EnableConstraintElimination;
 extern cl::opt<bool> EnableGVNHoist;
 extern cl::opt<bool> EnableGVNSink;
 extern cl::opt<bool> EnableHotColdSplit;
+extern cl::opt<bool> EnableIROutliner;
 extern cl::opt<bool> EnableOrderFileInstrumentation;
 extern cl::opt<bool> EnableCHR;
 extern cl::opt<bool> EnableUnrollAndJam;
@@ -1313,6 +1315,13 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
   if (EnableHotColdSplit && !LTOPreLink)
     MPM.addPass(HotColdSplittingPass());
 
+  // Search the code for similar regions of code. If enough similar regions can
+  // be found where extracting the regions into their own function will decrease
+  // the size of the program, we extract the regions, a deduplicate the
+  // structurally similar regions.
+  if (EnableIROutliner)
+    MPM.addPass(IROutlinerPass());
+
   // Merge functions if requested.
   if (PTO.MergeFunctions)
     MPM.addPass(MergeFunctionsPass());

diff  --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index f971027299d4..977bd46b0633 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -72,6 +72,7 @@ MODULE_PASS("instrprof", InstrProfiling())
 MODULE_PASS("internalize", InternalizePass())
 MODULE_PASS("invalidate<all>", InvalidateAllAnalysesPass())
 MODULE_PASS("ipsccp", IPSCCPPass())
+MODULE_PASS("iroutliner", IROutlinerPass())
 MODULE_PASS("print-ir-similarity", IRSimilarityAnalysisPrinterPass(dbgs()))
 MODULE_PASS("loop-extract", LoopExtractorPass())
 MODULE_PASS("lowertypetests", LowerTypeTestsPass())

diff  --git a/llvm/lib/Transforms/IPO/CMakeLists.txt b/llvm/lib/Transforms/IPO/CMakeLists.txt
index ccaeaff703f8..34788bc10f01 100644
--- a/llvm/lib/Transforms/IPO/CMakeLists.txt
+++ b/llvm/lib/Transforms/IPO/CMakeLists.txt
@@ -20,6 +20,7 @@ add_llvm_component_library(LLVMipo
   GlobalSplit.cpp
   HotColdSplitting.cpp
   IPO.cpp
+  IROutliner.cpp
   InferFunctionAttrs.cpp
   InlineSimple.cpp
   Inliner.cpp

diff  --git a/llvm/lib/Transforms/IPO/IPO.cpp b/llvm/lib/Transforms/IPO/IPO.cpp
index 52c4d3229bb7..f4c12dd7f4cd 100644
--- a/llvm/lib/Transforms/IPO/IPO.cpp
+++ b/llvm/lib/Transforms/IPO/IPO.cpp
@@ -36,6 +36,7 @@ void llvm::initializeIPO(PassRegistry &Registry) {
   initializeGlobalOptLegacyPassPass(Registry);
   initializeGlobalSplitPass(Registry);
   initializeHotColdSplittingLegacyPassPass(Registry);
+  initializeIROutlinerLegacyPassPass(Registry);
   initializeAlwaysInlinerLegacyPassPass(Registry);
   initializeSimpleInlinerPass(Registry);
   initializeInferFunctionAttrsLegacyPassPass(Registry);

diff  --git a/llvm/lib/Transforms/IPO/IROutliner.cpp b/llvm/lib/Transforms/IPO/IROutliner.cpp
new file mode 100644
index 000000000000..feefeb458bdc
--- /dev/null
+++ b/llvm/lib/Transforms/IPO/IROutliner.cpp
@@ -0,0 +1,369 @@
+//===- IROutliner.cpp -- Outline Similar Regions ----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+// Implementation for the IROutliner which is used by the IROutliner Pass.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO/IROutliner.h"
+#include "llvm/Analysis/IRSimilarityIdentifier.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/IPO.h"
+#include <map>
+#include <set>
+#include <vector>
+
+#define DEBUG_TYPE "iroutliner"
+
+using namespace llvm;
+using namespace IRSimilarity;
+
+/// The OutlinableGroup holds all the overarching information for outlining
+/// a set of regions that are structurally similar to one another, such as the
+/// types of the overall function, the output blocks, the sets of stores needed
+/// and a list of the 
diff erent regions. This information is used in the
+/// deduplication of extracted regions with the same structure.
+struct OutlinableGroup {
+  /// The sections that could be outlined
+  std::vector<OutlinableRegion *> Regions;
+
+  /// Flag for whether we should not consider this group of OutlinableRegions
+  /// for extraction.
+  bool IgnoreGroup = false;
+};
+
+/// Move the contents of \p SourceBB to before the last instruction of \p
+/// TargetBB.
+/// \param SourceBB - the BasicBlock to pull Instructions from.
+/// \param TargetBB - the BasicBlock to put Instruction into.
+static void moveBBContents(BasicBlock &SourceBB, BasicBlock &TargetBB) {
+  BasicBlock::iterator BBCurr, BBEnd, BBNext;
+  for (BBCurr = SourceBB.begin(), BBEnd = SourceBB.end(); BBCurr != BBEnd;
+       BBCurr = BBNext) {
+    BBNext = std::next(BBCurr);
+    BBCurr->moveBefore(TargetBB, TargetBB.end());
+  }
+}
+
+void OutlinableRegion::splitCandidate() {
+  assert(!CandidateSplit && "Candidate already split!");
+
+  Instruction *StartInst = (*Candidate->begin()).Inst;
+  Instruction *EndInst = (*Candidate->end()).Inst;
+  assert(StartInst && EndInst && "Expected a start and end instruction?");
+  StartBB = StartInst->getParent();
+  PrevBB = StartBB;
+
+  // The basic block gets split like so:
+  // block:                 block:
+  //   inst1                  inst1
+  //   inst2                  inst2
+  //   region1               br block_to_outline
+  //   region2              block_to_outline:
+  //   region3          ->    region1
+  //   region4                region2
+  //   inst3                  region3
+  //   inst4                  region4
+  //                          br block_after_outline
+  //                        block_after_outline:
+  //                          inst3
+  //                          inst4
+
+  std::string OriginalName = PrevBB->getName().str();
+
+  StartBB = PrevBB->splitBasicBlock(StartInst, OriginalName + "_to_outline");
+
+  // This is the case for the inner block since we do not have to include
+  // multiple blocks.
+  EndBB = StartBB;
+  FollowBB = EndBB->splitBasicBlock(EndInst, OriginalName + "_after_outline");
+
+  CandidateSplit = true;
+}
+
+void OutlinableRegion::reattachCandidate() {
+  assert(CandidateSplit && "Candidate is not split!");
+
+  // The basic block gets reattached like so:
+  // block:                        block:
+  //   inst1                         inst1
+  //   inst2                         inst2
+  //   br block_to_outline           region1
+  // block_to_outline:        ->     region2
+  //   region1                       region3
+  //   region2                       region4
+  //   region3                       inst3
+  //   region4                       inst4
+  //   br block_after_outline
+  // block_after_outline:
+  //   inst3
+  //   inst4
+  assert(StartBB != nullptr && "StartBB for Candidate is not defined!");
+  assert(FollowBB != nullptr && "StartBB for Candidate is not defined!");
+
+  // StartBB should only have one predecessor since we put an unconditional
+  // branch at the end of PrevBB when we split the BasicBlock.
+  PrevBB = StartBB->getSinglePredecessor();
+  assert(PrevBB != nullptr &&
+         "No Predecessor for the region start basic block!");
+
+  assert(PrevBB->getTerminator() && "Terminator removed from PrevBB!");
+  assert(EndBB->getTerminator() && "Terminator removed from EndBB!");
+  PrevBB->getTerminator()->eraseFromParent();
+  EndBB->getTerminator()->eraseFromParent();
+
+  moveBBContents(*StartBB, *PrevBB);
+
+  BasicBlock *PlacementBB = PrevBB;
+  if (StartBB != EndBB)
+    PlacementBB = EndBB;
+  moveBBContents(*FollowBB, *PlacementBB);
+
+  PrevBB->replaceSuccessorsPhiUsesWith(StartBB, PrevBB);
+  PrevBB->replaceSuccessorsPhiUsesWith(FollowBB, PlacementBB);
+  StartBB->eraseFromParent();
+  FollowBB->eraseFromParent();
+
+  // Make sure to save changes back to the StartBB.
+  StartBB = PrevBB;
+  EndBB = nullptr;
+  PrevBB = nullptr;
+  FollowBB = nullptr;
+
+  CandidateSplit = false;
+}
+
+void IROutliner::pruneIncompatibleRegions(
+    std::vector<IRSimilarityCandidate> &CandidateVec,
+    OutlinableGroup &CurrentGroup) {
+  bool PreviouslyOutlined;
+
+  // Sort from beginning to end, so the IRSimilarityCandidates are in order.
+  stable_sort(CandidateVec, [](const IRSimilarityCandidate &LHS,
+                               const IRSimilarityCandidate &RHS) {
+    return LHS.getStartIdx() < RHS.getStartIdx();
+  });
+
+  unsigned CurrentEndIdx = 0;
+  for (IRSimilarityCandidate &IRSC : CandidateVec) {
+    PreviouslyOutlined = false;
+    unsigned StartIdx = IRSC.getStartIdx();
+    unsigned EndIdx = IRSC.getEndIdx();
+
+    for (unsigned Idx = StartIdx; Idx <= EndIdx; Idx++)
+      if (Outlined.find(Idx) != Outlined.end()) {
+        PreviouslyOutlined = true;
+        break;
+      }
+
+    if (PreviouslyOutlined)
+      continue;
+
+    // TODO: If in the future we can outline across BasicBlocks, we will need to
+    // check all BasicBlocks contained in the region.
+    if (IRSC.getStartBB()->hasAddressTaken())
+      continue;
+
+    // Greedily prune out any regions that will overlap with already chosen
+    // regions.
+    if (CurrentEndIdx != 0 && StartIdx <= CurrentEndIdx)
+      continue;
+
+    OutlinableRegion *OS = new (RegionAllocator.Allocate())
+        OutlinableRegion(IRSC, CurrentGroup);
+    CurrentGroup.Regions.push_back(OS);
+
+    CurrentEndIdx = EndIdx;
+  }
+}
+
+bool IROutliner::extractSection(OutlinableRegion &Region) {
+  assert(Region.StartBB != nullptr &&
+         "StartBB for the OutlinableRegion is nullptr!");
+  assert(Region.FollowBB != nullptr &&
+         "StartBB for the OutlinableRegion is nullptr!");
+  Function *OrigF = Region.StartBB->getParent();
+  CodeExtractorAnalysisCache CEAC(*OrigF);
+  Region.ExtractedFunction = Region.CE->extractCodeRegion(CEAC);
+
+  // If the extraction was successful, find the BasicBlock, and reassign the
+  // OutlinableRegion blocks
+  if (!Region.ExtractedFunction) {
+    LLVM_DEBUG(dbgs() << "CodeExtractor failed to outline " << Region.StartBB
+                      << "\n");
+    Region.reattachCandidate();
+    return false;
+  }
+
+  BasicBlock *RewrittenBB = Region.FollowBB->getSinglePredecessor();
+  Region.StartBB = RewrittenBB;
+  Region.EndBB = RewrittenBB;
+
+  // The sequences of outlinable regions has now changed.  We must fix the
+  // IRInstructionDataList for consistency.  Although they may not be illegal
+  // instructions, they should not be compared with anything else as they
+  // should not be outlined in this round.  So marking these as illegal is
+  // allowed.
+  IRInstructionDataList *IDL = Region.Candidate->front()->IDL;
+  Region.NewFront = new (InstDataAllocator.Allocate())
+      IRInstructionData(*RewrittenBB->begin(), false, *IDL);
+  Region.NewBack = new (InstDataAllocator.Allocate())
+      IRInstructionData(*std::prev(std::prev(RewrittenBB->end())), false, *IDL);
+
+  // Insert the first IRInstructionData of the new region in front of the
+  // first IRInstructionData of the IRSimilarityCandidate.
+  IDL->insert(Region.Candidate->begin(), *Region.NewFront);
+  // Insert the first IRInstructionData of the new region after the
+  // last IRInstructionData of the IRSimilarityCandidate.
+  IDL->insert(Region.Candidate->end(), *Region.NewBack);
+  // Remove the IRInstructionData from the IRSimilarityCandidate.
+  IDL->erase(Region.Candidate->begin(), std::prev(Region.Candidate->end()));
+
+  assert(RewrittenBB != nullptr &&
+         "Could not find a predecessor after extraction!");
+
+  // Iterate over the new set of instructions to find the new call
+  // instruction.
+  for (Instruction &I : *RewrittenBB)
+    if (CallInst *CI = dyn_cast<CallInst>(&I))
+      if (Region.ExtractedFunction == CI->getCalledFunction()) {
+        Region.Call = CI;
+        break;
+      }
+  Region.reattachCandidate();
+  return true;
+}
+
+unsigned IROutliner::doOutline(Module &M) {
+  // Find the possibile similarity sections.
+  IRSimilarityIdentifier &Identifier = getIRSI(M);
+  SimilarityGroupList &SimilarityCandidates = *Identifier.getSimilarity();
+
+  // Sort them by size of extracted sections
+  unsigned OutlinedFunctionNum = 0;
+  // If we only have one SimilarityGroup in SimilarityCandidates, we do not have
+  // to sort them by the potential number of instructions to be outlined
+  if (SimilarityCandidates.size() > 1)
+    llvm::stable_sort(SimilarityCandidates,
+                      [](const std::vector<IRSimilarityCandidate> &LHS,
+                         const std::vector<IRSimilarityCandidate> &RHS) {
+                        return LHS[0].getLength() * LHS.size() >
+                               RHS[0].getLength() * RHS.size();
+                      });
+
+  // Iterate over the possible sets of similarity.
+  for (SimilarityGroup &CandidateVec : SimilarityCandidates) {
+    OutlinableGroup CurrentGroup;
+
+    // Remove entries that were previously outlined
+    pruneIncompatibleRegions(CandidateVec, CurrentGroup);
+
+    // We pruned the number of regions to 0 to 1, meaning that it's not worth
+    // trying to outlined since there is no compatible similar instance of this
+    // code.
+    if (CurrentGroup.Regions.size() < 2)
+      continue;
+
+    // Create a CodeExtractor for each outlinable region.
+    for (OutlinableRegion *OS : CurrentGroup.Regions) {
+      // Break the outlinable region out of its parent BasicBlock into its own
+      // BasicBlocks (see function implementation).
+      OS->splitCandidate();
+      std::vector<BasicBlock *> BE = {OS->StartBB};
+      OS->CE = new (ExtractorAllocator.Allocate())
+          CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false,
+                        false, "outlined");
+    }
+
+    // Create functions out of all the sections, and mark them as outlined
+    std::vector<OutlinableRegion *> OutlinedRegions;
+    for (OutlinableRegion *OS : CurrentGroup.Regions) {
+      OutlinedFunctionNum++;
+      bool FunctionOutlined = extractSection(*OS);
+      if (FunctionOutlined) {
+        unsigned StartIdx = OS->Candidate->getStartIdx();
+        unsigned EndIdx = OS->Candidate->getEndIdx();
+        for (unsigned Idx = StartIdx; Idx <= EndIdx; Idx++)
+          Outlined.insert(Idx);
+
+        OutlinedRegions.push_back(OS);
+      }
+    }
+
+    CurrentGroup.Regions = std::move(OutlinedRegions);
+  }
+
+  return OutlinedFunctionNum;
+}
+
+bool IROutliner::run(Module &M) { return doOutline(M) > 0; }
+
+// Pass Manager Boilerplate
+class IROutlinerLegacyPass : public ModulePass {
+public:
+  static char ID;
+  IROutlinerLegacyPass() : ModulePass(ID) {
+    initializeIROutlinerLegacyPassPass(*PassRegistry::getPassRegistry());
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<TargetTransformInfoWrapperPass>();
+    AU.addRequired<IRSimilarityIdentifierWrapperPass>();
+  }
+
+  bool runOnModule(Module &M) override;
+};
+
+bool IROutlinerLegacyPass::runOnModule(Module &M) {
+  if (skipModule(M))
+    return false;
+
+  auto GTTI = [this](Function &F) -> TargetTransformInfo & {
+    return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+  };
+
+  auto GIRSI = [this](Module &) -> IRSimilarityIdentifier & {
+    return this->getAnalysis<IRSimilarityIdentifierWrapperPass>().getIRSI();
+  };
+
+  return IROutliner(GTTI, GIRSI).run(M);
+}
+
+PreservedAnalyses IROutlinerPass::run(Module &M, ModuleAnalysisManager &AM) {
+  auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+
+  std::function<TargetTransformInfo &(Function &)> GTTI =
+      [&FAM](Function &F) -> TargetTransformInfo & {
+    return FAM.getResult<TargetIRAnalysis>(F);
+  };
+
+  std::function<IRSimilarityIdentifier &(Module &)> GIRSI =
+      [&AM](Module &M) -> IRSimilarityIdentifier & {
+    return AM.getResult<IRSimilarityAnalysis>(M);
+  };
+
+  if (IROutliner(GTTI, GIRSI).run(M))
+    return PreservedAnalyses::none();
+  return PreservedAnalyses::all();
+}
+
+char IROutlinerLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(IROutlinerLegacyPass, "iroutliner", "IR Outliner", false,
+                      false)
+INITIALIZE_PASS_DEPENDENCY(IRSimilarityIdentifierWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_END(IROutlinerLegacyPass, "iroutliner", "IR Outliner", false,
+                    false)
+
+ModulePass *llvm::createIROutlinerPass() { return new IROutlinerLegacyPass(); }

diff  --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
index 8dd30ef82bc3..282ce5132944 100644
--- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -107,6 +107,9 @@ static cl::opt<bool>
 cl::opt<bool> EnableHotColdSplit("hot-cold-split", cl::init(false),
     cl::ZeroOrMore, cl::desc("Enable hot-cold splitting pass"));
 
+cl::opt<bool> EnableIROutliner("ir-outliner", cl::init(false), cl::Hidden,
+    cl::desc("Enable ir outliner pass"));
+
 static cl::opt<bool> UseLoopVersioningLICM(
     "enable-loop-versioning-licm", cl::init(false), cl::Hidden,
     cl::desc("Enable the experimental Loop Versioning LICM pass"));
@@ -880,6 +883,9 @@ void PassManagerBuilder::populateModulePassManager(
   if (EnableHotColdSplit && !(PrepareForLTO || PrepareForThinLTO))
     MPM.add(createHotColdSplittingPass());
 
+  if (EnableIROutliner)
+    MPM.add(createIROutlinerPass());
+
   if (MergeFunctions)
     MPM.add(createMergeFunctionsPass());
 

diff  --git a/llvm/test/Transforms/IROutliner/extraction.ll b/llvm/test/Transforms/IROutliner/extraction.ll
new file mode 100644
index 000000000000..ec0f0ea4854e
--- /dev/null
+++ b/llvm/test/Transforms/IROutliner/extraction.ll
@@ -0,0 +1,127 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -verify -iroutliner < %s | FileCheck %s
+
+; This test makes sure we are extracting the found similarity sections
+; correctly at the call site.
+
+define void @extract1() {
+; CHECK-LABEL: @extract1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[B:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[C:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    call void @extract1.outlined(i32* [[A]], i32* [[B]], i32* [[C]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %a = alloca i32, align 4
+  %b = alloca i32, align 4
+  %c = alloca i32, align 4
+  store i32 2, i32* %a, align 4
+  store i32 3, i32* %b, align 4
+  store i32 4, i32* %c, align 4
+  %al = load i32, i32* %a
+  %bl = load i32, i32* %b
+  %cl = load i32, i32* %c
+  ret void
+}
+
+define void @extract2() {
+; CHECK-LABEL: @extract2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[B:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[C:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    call void @extract2.outlined(i32* [[A]], i32* [[B]], i32* [[C]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %a = alloca i32, align 4
+  %b = alloca i32, align 4
+  %c = alloca i32, align 4
+  store i32 2, i32* %a, align 4
+  store i32 3, i32* %b, align 4
+  store i32 4, i32* %c, align 4
+  %al = load i32, i32* %a
+  %bl = load i32, i32* %b
+  %cl = load i32, i32* %c
+  ret void
+}
+
+define void @extract_outs1() #0 {
+; CHECK-LABEL: @extract_outs1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[DOTLOC:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[ADD_LOC:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[A:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[B:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[OUTPUT:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[RESULT:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[LT_CAST:%.*]] = bitcast i32* [[ADD_LOC]] to i8*
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]])
+; CHECK-NEXT:    [[LT_CAST1:%.*]] = bitcast i32* [[DOTLOC]] to i8*
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST1]])
+; CHECK-NEXT:    call void @extract_outs1.outlined(i32* [[A]], i32* [[B]], i32* [[OUTPUT]], i32* [[ADD_LOC]], i32* [[DOTLOC]])
+; CHECK-NEXT:    [[ADD_RELOAD:%.*]] = load i32, i32* [[ADD_LOC]], align 4
+; CHECK-NEXT:    [[DOTRELOAD:%.*]] = load i32, i32* [[DOTLOC]], align 4
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST1]])
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[OUTPUT]], align 4
+; CHECK-NEXT:    call void @extract_outs1.outlined.1(i32 [[DOTRELOAD]], i32 [[ADD_RELOAD]], i32* [[RESULT]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %a = alloca i32, align 4
+  %b = alloca i32, align 4
+  %output = alloca i32, align 4
+  %result = alloca i32, align 4
+  store i32 2, i32* %a, align 4
+  store i32 3, i32* %b, align 4
+  %0 = load i32, i32* %a, align 4
+  %1 = load i32, i32* %b, align 4
+  %add = add i32 %0, %1
+  store i32 %add, i32* %output, align 4
+  %2 = load i32, i32* %output, align 4
+  %3 = load i32, i32* %output, align 4
+  %mul = mul i32 %2, %add
+  store i32 %mul, i32* %result, align 4
+  ret void
+}
+
+define void @extract_outs2() #0 {
+; CHECK-LABEL: @extract_outs2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[DOTLOC:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[ADD_LOC:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[A:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[B:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[OUTPUT:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[RESULT:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[LT_CAST:%.*]] = bitcast i32* [[ADD_LOC]] to i8*
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]])
+; CHECK-NEXT:    [[LT_CAST1:%.*]] = bitcast i32* [[DOTLOC]] to i8*
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST1]])
+; CHECK-NEXT:    call void @extract_outs2.outlined(i32* [[A]], i32* [[B]], i32* [[OUTPUT]], i32* [[ADD_LOC]], i32* [[DOTLOC]])
+; CHECK-NEXT:    [[ADD_RELOAD:%.*]] = load i32, i32* [[ADD_LOC]], align 4
+; CHECK-NEXT:    [[DOTRELOAD:%.*]] = load i32, i32* [[DOTLOC]], align 4
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST1]])
+; CHECK-NEXT:    call void @extract_outs2.outlined.2(i32 [[DOTRELOAD]], i32 [[ADD_RELOAD]], i32* [[RESULT]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %a = alloca i32, align 4
+  %b = alloca i32, align 4
+  %output = alloca i32, align 4
+  %result = alloca i32, align 4
+  store i32 2, i32* %a, align 4
+  store i32 3, i32* %b, align 4
+  %0 = load i32, i32* %a, align 4
+  %1 = load i32, i32* %b, align 4
+  %add = add i32 %0, %1
+  store i32 %add, i32* %output, align 4
+  %2 = load i32, i32* %output, align 4
+  %mul = mul i32 %2, %add
+  store i32 %mul, i32* %result, align 4
+  ret void
+}

diff  --git a/llvm/test/Transforms/IROutliner/outlining-address-taken.ll b/llvm/test/Transforms/IROutliner/outlining-address-taken.ll
new file mode 100644
index 000000000000..dbae8e8230d6
--- /dev/null
+++ b/llvm/test/Transforms/IROutliner/outlining-address-taken.ll
@@ -0,0 +1,91 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -verify -iroutliner < %s | FileCheck %s
+
+; This test shows that we do not outline from basic blocks with their address
+; taken.
+
+ at ba1 = constant i8* blockaddress (@dontoutline, %new_block)
+
+define void @outline_1() {
+; CHECK-LABEL: @outline_1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[B:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[C:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    call void @[[FUNCTION_1:.*]](i32* [[A]], i32* [[B]], i32* [[C]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %a = alloca i32, align 4
+  %b = alloca i32, align 4
+  %c = alloca i32, align 4
+  store i32 2, i32* %a, align 4
+  store i32 3, i32* %b, align 4
+  store i32 4, i32* %c, align 4
+  %al = load i32, i32* %a
+  %bl = load i32, i32* %b
+  %cl = load i32, i32* %c
+  ret void
+}
+
+define void @outline_2() {
+; CHECK-LABEL: @outline_2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[B:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[C:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    call void @[[FUNCTION_2:.*]](i32* [[A]], i32* [[B]], i32* [[C]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %a = alloca i32, align 4
+  %b = alloca i32, align 4
+  %c = alloca i32, align 4
+  store i32 2, i32* %a, align 4
+  store i32 3, i32* %b, align 4
+  store i32 4, i32* %c, align 4
+  %al = load i32, i32* %a
+  %bl = load i32, i32* %b
+  %cl = load i32, i32* %c
+  ret void
+}
+
+define void @dontoutline() {
+; CHECK-LABEL: @dontoutline(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[B:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[C:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    br label [[NEW_BLOCK:%.*]]
+; CHECK:       new_block:
+; CHECK-NEXT:    store i32 2, i32* [[A]], align 4
+; CHECK-NEXT:    store i32 3, i32* [[B]], align 4
+; CHECK-NEXT:    store i32 4, i32* [[C]], align 4
+; CHECK-NEXT:    [[AL:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[BL:%.*]] = load i32, i32* [[B]], align 4
+; CHECK-NEXT:    [[CL:%.*]] = load i32, i32* [[C]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %a = alloca i32, align 4
+  %b = alloca i32, align 4
+  %c = alloca i32, align 4
+  br label %new_block
+new_block:
+  store i32 2, i32* %a, align 4
+  store i32 3, i32* %b, align 4
+  store i32 4, i32* %c, align 4
+  %al = load i32, i32* %a
+  %bl = load i32, i32* %b
+  %cl = load i32, i32* %c
+  ret void
+}
+
+; CHECK: define internal void @[[FUNCTION_2]](i32* [[ARG0:%.*]], i32* [[ARG1:%.*]], i32* [[ARG2:%.*]])
+; CHECK: entry_to_outline:
+; CHECK-NEXT:    store i32 2, i32* [[ARG0]], align 4
+; CHECK-NEXT:    store i32 3, i32* [[ARG1]], align 4
+; CHECK-NEXT:    store i32 4, i32* [[ARG2]], align 4
+; CHECK-NEXT:    [[AL:%.*]] = load i32, i32* [[ARG0]], align 4
+; CHECK-NEXT:    [[BL:%.*]] = load i32, i32* [[ARG1]], align 4
+; CHECK-NEXT:    [[CL:%.*]] = load i32, i32* [[ARG2]], align 4

diff  --git a/llvm/test/Transforms/IROutliner/outlining-
diff erent-structure.ll b/llvm/test/Transforms/IROutliner/outlining-
diff erent-structure.ll
new file mode 100644
index 000000000000..bb4af26bf990
--- /dev/null
+++ b/llvm/test/Transforms/IROutliner/outlining-
diff erent-structure.ll
@@ -0,0 +1,68 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -verify -iroutliner < %s | FileCheck %s
+
+; This is a negative case to show that when we have the same set of
+; instructions, but in a 
diff erent order, they are not outlined in the same way.
+; In this case, the arguments passed into the function are in a 
diff erent order.
+
+define void @outline_constants1() {
+; CHECK-LABEL: @outline_constants1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[B:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[C:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    store i32 2, i32* [[A]], align 4
+; CHECK-NEXT:    store i32 3, i32* [[B]], align 4
+; CHECK-NEXT:    store i32 4, i32* [[C]], align 4
+; CHECK-NEXT:    call void @[[FUNCTION_0:.*]](i32* [[A]], i32* [[C]], i32* [[B]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %a = alloca i32, align 4
+  %b = alloca i32, align 4
+  %c = alloca i32, align 4
+  store i32 2, i32* %a, align 4
+  store i32 3, i32* %b, align 4
+  store i32 4, i32* %c, align 4
+  %al = load i32, i32* %a
+  %cl = load i32, i32* %c
+  %bl = load i32, i32* %b
+  ret void
+}
+
+define void @outline_constants2() {
+; CHECK-LABEL: @outline_constants2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[B:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[C:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    store i32 2, i32* [[A]], align 4
+; CHECK-NEXT:    store i32 3, i32* [[B]], align 4
+; CHECK-NEXT:    store i32 4, i32* [[C]], align 4
+; CHECK-NEXT:    call void @[[FUNCTION_1:.*]](i32* [[A]], i32* [[B]], i32* [[C]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %a = alloca i32, align 4
+  %b = alloca i32, align 4
+  %c = alloca i32, align 4
+  store i32 2, i32* %a, align 4
+  store i32 3, i32* %b, align 4
+  store i32 4, i32* %c, align 4
+  %al = load i32, i32* %a
+  %bl = load i32, i32* %b
+  %cl = load i32, i32* %c
+  ret void
+}
+
+; CHECK: define internal void @[[FUNCTION_0]](i32* [[ARG0:%.*]], i32* [[ARG1:%.*]], i32* [[ARG2:%.*]])
+; CHECK: entry_to_outline:
+; CHECK-NEXT:    [[AL:%.*]] = load i32, i32* [[ARG0]], align 4
+; CHECK-NEXT:    [[BL:%.*]] = load i32, i32* [[ARG1]], align 4
+; CHECK-NEXT:    [[CL:%.*]] = load i32, i32* [[ARG2]], align 4
+
+; CHECK: define internal void @[[FUNCTION_1]](i32* [[ARG0:%.*]], i32* [[ARG1:%.*]], i32* [[ARG2:%.*]])
+; CHECK: entry_to_outline:
+; CHECK-NEXT:    [[AL:%.*]] = load i32, i32* [[ARG0]], align 4
+; CHECK-NEXT:    [[BL:%.*]] = load i32, i32* [[ARG1]], align 4
+; CHECK-NEXT:    [[CL:%.*]] = load i32, i32* [[ARG2]], align 4

diff  --git a/llvm/test/Transforms/IROutliner/outlining-same-constants.ll b/llvm/test/Transforms/IROutliner/outlining-same-constants.ll
new file mode 100644
index 000000000000..e6228abc3d28
--- /dev/null
+++ b/llvm/test/Transforms/IROutliner/outlining-same-constants.ll
@@ -0,0 +1,67 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -verify -iroutliner < %s | FileCheck %s
+
+; This test looks at the constants in the regions, and if it they are the
+; same it outlines them as constants rather than elevating them to arguments.
+
+define void @outline_constants1() {
+; CHECK-LABEL: @outline_constants1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[B:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[C:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    call void @[[FUNCTION_0:.*]](i32* [[A]], i32* [[B]], i32* [[C]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %a = alloca i32, align 4
+  %b = alloca i32, align 4
+  %c = alloca i32, align 4
+  store i32 2, i32* %a, align 4
+  store i32 3, i32* %b, align 4
+  store i32 4, i32* %c, align 4
+  %al = load i32, i32* %a
+  %bl = load i32, i32* %b
+  %cl = load i32, i32* %c
+  ret void
+}
+
+define void @outline_constants2() {
+; CHECK-LABEL: @outline_constants2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[B:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[C:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    call void @[[FUNCTION_1:.*]](i32* [[A]], i32* [[B]], i32* [[C]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %a = alloca i32, align 4
+  %b = alloca i32, align 4
+  %c = alloca i32, align 4
+  store i32 2, i32* %a, align 4
+  store i32 3, i32* %b, align 4
+  store i32 4, i32* %c, align 4
+  %al = load i32, i32* %a
+  %bl = load i32, i32* %b
+  %cl = load i32, i32* %c
+  ret void
+}
+
+; CHECK: define internal void @[[FUNCTION_0]](i32* [[ARG0:%.*]], i32* [[ARG1:%.*]], i32* [[ARG2:%.*]])
+; CHECK: entry_to_outline:
+; CHECK-NEXT:    store i32 2, i32* [[ARG0]], align 4
+; CHECK-NEXT:    store i32 3, i32* [[ARG1]], align 4
+; CHECK-NEXT:    store i32 4, i32* [[ARG2]], align 4
+; CHECK-NEXT:    [[AL:%.*]] = load i32, i32* [[ARG0]], align 4
+; CHECK-NEXT:    [[BL:%.*]] = load i32, i32* [[ARG1]], align 4
+; CHECK-NEXT:    [[CL:%.*]] = load i32, i32* [[ARG2]], align 4
+
+; CHECK: define internal void @[[FUNCTION_1]](i32* [[ARG0:%.*]], i32* [[ARG1:%.*]], i32* [[ARG2:%.*]])
+; CHECK: entry_to_outline:
+; CHECK-NEXT:    store i32 2, i32* [[ARG0]], align 4
+; CHECK-NEXT:    store i32 3, i32* [[ARG1]], align 4
+; CHECK-NEXT:    store i32 4, i32* [[ARG2]], align 4
+; CHECK-NEXT:    [[AL:%.*]] = load i32, i32* [[ARG0]], align 4
+; CHECK-NEXT:    [[BL:%.*]] = load i32, i32* [[ARG1]], align 4
+; CHECK-NEXT:    [[CL:%.*]] = load i32, i32* [[ARG2]], align 4

diff  --git a/llvm/test/Transforms/IROutliner/outlining-same-globals.ll b/llvm/test/Transforms/IROutliner/outlining-same-globals.ll
new file mode 100644
index 000000000000..9e28cc919010
--- /dev/null
+++ b/llvm/test/Transforms/IROutliner/outlining-same-globals.ll
@@ -0,0 +1,48 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -verify -iroutliner < %s | FileCheck %s
+
+ at global1 = global i32 1, align 4
+ at global2 = global i32 2, align 4
+
+; This test looks at the globals in the regions, and if it they are the
+; same it outlines the region without elevating the globals to arguments.
+
+define void @outline_globals1() {
+; CHECK-LABEL: @outline_globals1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    call void @[[FUNCTION_0:.*]]()
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = load i32, i32* @global1
+  %1 = load i32, i32* @global2
+  %2 = add i32 %0, %1
+  ret void
+}
+
+define void @outline_globals2() {
+; CHECK-LABEL: @outline_globals2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    call void @[[FUNCTION_1:.*]]()
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = load i32, i32* @global1
+  %1 = load i32, i32* @global2
+  %2 = add i32 %0, %1
+  ret void
+}
+
+; CHECK: define internal void @[[FUNCTION_0]]()
+; CHECK: entry_to_outline:
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* @global1, align 4
+; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* @global2, align 4
+; CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP1]], [[TMP2]]
+
+
+; CHECK: define internal void @[[FUNCTION_1]]()
+; CHECK: entry_to_outline:
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* @global1, align 4
+; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* @global2, align 4
+; CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP1]], [[TMP2]]
+


        


More information about the llvm-branch-commits mailing list