[llvm] r274305 - code hoisting pass based on GVN

Duncan P. N. Exon Smith via llvm-commits llvm-commits at lists.llvm.org
Thu Jun 30 19:00:13 PDT 2016


Reverted in r274320 due to self-hosting failures:
 http://lab.llvm.org:8080/green/job/clang-stage1-configure-RA_build/22349/
 http://lab.llvm.org:8011/builders/clang-x86_64-linux-selfhost-modules/builds/17232

Note that the blamelist on lab.llvm.org:8011 is incorrect.  The previous
build was r274299, but somehow r274305 wasn't included in the blamelist:
 http://lab.llvm.org:8011/builders/clang-x86_64-linux-selfhost-modules

> On 2016-Jun-30, at 17:24, Sebastian Pop via llvm-commits <llvm-commits at lists.llvm.org> wrote:
> 
> Author: spop
> Date: Thu Jun 30 19:24:31 2016
> New Revision: 274305
> 
> URL: http://llvm.org/viewvc/llvm-project?rev=274305&view=rev
> Log:
> code hoisting pass based on GVN
> 
> This pass hoists duplicated computations in the program. The primary goal of
> gvn-hoist is to reduce the size of functions before inline heuristics to reduce
> the total cost of function inlining.
> 
> Pass written by Sebastian Pop, Aditya Kumar, Xiaoyu Hu, and Brian Rzycki.
> Important algorithmic contributions by Daniel Berlin under the form of reviews.
> 
> Differential Revision: http://reviews.llvm.org/D19338
> 
> Added:
>    llvm/trunk/lib/Transforms/Scalar/GVNHoist.cpp
>    llvm/trunk/test/Transforms/GVN/hoist.ll
> Modified:
>    llvm/trunk/include/llvm/InitializePasses.h
>    llvm/trunk/include/llvm/LinkAllPasses.h
>    llvm/trunk/include/llvm/Transforms/Scalar.h
>    llvm/trunk/include/llvm/Transforms/Scalar/GVN.h
>    llvm/trunk/lib/Passes/PassRegistry.def
>    llvm/trunk/lib/Transforms/IPO/PassManagerBuilder.cpp
>    llvm/trunk/lib/Transforms/Scalar/CMakeLists.txt
>    llvm/trunk/lib/Transforms/Scalar/Scalar.cpp
> 
> Modified: llvm/trunk/include/llvm/InitializePasses.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/InitializePasses.h?rev=274305&r1=274304&r2=274305&view=diff
> ==============================================================================
> --- llvm/trunk/include/llvm/InitializePasses.h (original)
> +++ llvm/trunk/include/llvm/InitializePasses.h Thu Jun 30 19:24:31 2016
> @@ -119,6 +119,7 @@ void initializeEarlyIfConverterPass(Pass
> void initializeEdgeBundlesPass(PassRegistry&);
> void initializeEfficiencySanitizerPass(PassRegistry&);
> void initializeEliminateAvailableExternallyLegacyPassPass(PassRegistry &);
> +void initializeGVNHoistLegacyPassPass(PassRegistry &);
> void initializeExpandISelPseudosPass(PassRegistry&);
> void initializeExpandPostRAPass(PassRegistry&);
> void initializeExternalAAWrapperPassPass(PassRegistry&);
> 
> Modified: llvm/trunk/include/llvm/LinkAllPasses.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/LinkAllPasses.h?rev=274305&r1=274304&r2=274305&view=diff
> ==============================================================================
> --- llvm/trunk/include/llvm/LinkAllPasses.h (original)
> +++ llvm/trunk/include/llvm/LinkAllPasses.h Thu Jun 30 19:24:31 2016
> @@ -158,6 +158,7 @@ namespace {
>       (void) llvm::createConstantHoistingPass();
>       (void) llvm::createCodeGenPreparePass();
>       (void) llvm::createEarlyCSEPass();
> +      (void) llvm::createGVNHoistPass();
>       (void) llvm::createMergedLoadStoreMotionPass();
>       (void) llvm::createGVNPass();
>       (void) llvm::createMemCpyOptPass();
> 
> Modified: llvm/trunk/include/llvm/Transforms/Scalar.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Transforms/Scalar.h?rev=274305&r1=274304&r2=274305&view=diff
> ==============================================================================
> --- llvm/trunk/include/llvm/Transforms/Scalar.h (original)
> +++ llvm/trunk/include/llvm/Transforms/Scalar.h Thu Jun 30 19:24:31 2016
> @@ -326,6 +326,13 @@ FunctionPass *createEarlyCSEPass();
> 
> //===----------------------------------------------------------------------===//
> //
> +// GVNHoist - This pass performs a simple and fast GVN pass over the dominator
> +// tree to hoist common expressions from sibling branches.
> +//
> +FunctionPass *createGVNHoistPass();
> +
> +//===----------------------------------------------------------------------===//
> +//
> // MergedLoadStoreMotion - This pass merges loads and stores in diamonds. Loads
> // are hoisted into the header, while stores sink into the footer.
> //
> 
> Modified: llvm/trunk/include/llvm/Transforms/Scalar/GVN.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Transforms/Scalar/GVN.h?rev=274305&r1=274304&r2=274305&view=diff
> ==============================================================================
> --- llvm/trunk/include/llvm/Transforms/Scalar/GVN.h (original)
> +++ llvm/trunk/include/llvm/Transforms/Scalar/GVN.h Thu Jun 30 19:24:31 2016
> @@ -58,11 +58,7 @@ public:
>   AliasAnalysis *getAliasAnalysis() const { return VN.getAliasAnalysis(); }
>   MemoryDependenceResults &getMemDep() const { return *MD; }
> 
> -private:
> -  friend class gvn::GVNLegacyPass;
> -
>   struct Expression;
> -  friend struct DenseMapInfo<Expression>;
> 
>   /// This class holds the mapping between values and value numbers.  It is used
>   /// as an efficient mechanism to determine the expression-wise equivalence of
> @@ -104,6 +100,10 @@ private:
>     void verifyRemoved(const Value *) const;
>   };
> 
> +private:
> +  friend class gvn::GVNLegacyPass;
> +  friend struct DenseMapInfo<Expression>;
> +
>   MemoryDependenceResults *MD;
>   DominatorTree *DT;
>   const TargetLibraryInfo *TLI;
> @@ -228,6 +228,13 @@ private:
> /// loads are eliminated by the pass.
> FunctionPass *createGVNPass(bool NoLoads = false);
> 
> +/// \brief A simple and fast domtree-based GVN pass to hoist common expressions
> +/// from sibling branches.
> +struct GVNHoistPass : PassInfoMixin<GVNHoistPass> {
> +  /// \brief Run the pass over the function.
> +  PreservedAnalyses run(Function &F, AnalysisManager<Function> &AM);
> +};
> +
> }
> 
> #endif
> 
> Modified: llvm/trunk/lib/Passes/PassRegistry.def
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Passes/PassRegistry.def?rev=274305&r1=274304&r2=274305&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Passes/PassRegistry.def (original)
> +++ llvm/trunk/lib/Passes/PassRegistry.def Thu Jun 30 19:24:31 2016
> @@ -128,6 +128,7 @@ FUNCTION_PASS("bdce", BDCEPass())
> FUNCTION_PASS("dce", DCEPass())
> FUNCTION_PASS("dse", DSEPass())
> FUNCTION_PASS("early-cse", EarlyCSEPass())
> +FUNCTION_PASS("gvn-hoist", GVNHoistPass())
> FUNCTION_PASS("instcombine", InstCombinePass())
> FUNCTION_PASS("invalidate<all>", InvalidateAllAnalysesPass())
> FUNCTION_PASS("float2int", Float2IntPass())
> 
> Modified: llvm/trunk/lib/Transforms/IPO/PassManagerBuilder.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/IPO/PassManagerBuilder.cpp?rev=274305&r1=274304&r2=274305&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Transforms/IPO/PassManagerBuilder.cpp (original)
> +++ llvm/trunk/lib/Transforms/IPO/PassManagerBuilder.cpp Thu Jun 30 19:24:31 2016
> @@ -199,6 +199,7 @@ void PassManagerBuilder::populateFunctio
>   FPM.add(createCFGSimplificationPass());
>   FPM.add(createSROAPass());
>   FPM.add(createEarlyCSEPass());
> +  FPM.add(createGVNHoistPass());
>   FPM.add(createLowerExpectIntrinsicPass());
> }
> 
> 
> Modified: llvm/trunk/lib/Transforms/Scalar/CMakeLists.txt
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/CMakeLists.txt?rev=274305&r1=274304&r2=274305&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Transforms/Scalar/CMakeLists.txt (original)
> +++ llvm/trunk/lib/Transforms/Scalar/CMakeLists.txt Thu Jun 30 19:24:31 2016
> @@ -12,6 +12,7 @@ add_llvm_library(LLVMScalarOpts
>   Float2Int.cpp
>   GuardWidening.cpp
>   GVN.cpp
> +  GVNHoist.cpp
>   InductiveRangeCheckElimination.cpp
>   IndVarSimplify.cpp
>   JumpThreading.cpp
> 
> Added: llvm/trunk/lib/Transforms/Scalar/GVNHoist.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/GVNHoist.cpp?rev=274305&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Transforms/Scalar/GVNHoist.cpp (added)
> +++ llvm/trunk/lib/Transforms/Scalar/GVNHoist.cpp Thu Jun 30 19:24:31 2016
> @@ -0,0 +1,740 @@
> +//===- GVNHoist.cpp - Hoist scalar and load expressions -------------------===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +//
> +// This pass hoists expressions from branches to a common dominator. It uses
> +// GVN (global value numbering) to discover expressions computing the same
> +// values. The primary goal is to reduce the code size, and in some
> +// cases reduce critical path (by exposing more ILP).
> +// Hoisting may affect the performance in some cases. To mitigate that, hoisting
> +// is disabled in the following cases.
> +// 1. Scalars across calls.
> +// 2. geps when corresponding load/store cannot be hoisted.
> +//===----------------------------------------------------------------------===//
> +
> +#include "llvm/ADT/SmallPtrSet.h"
> +#include "llvm/ADT/Statistic.h"
> +#include "llvm/ADT/DenseMap.h"
> +#include "llvm/Analysis/ValueTracking.h"
> +#include "llvm/Transforms/Scalar.h"
> +#include "llvm/Transforms/Scalar/GVN.h"
> +#include "llvm/Transforms/Utils/MemorySSA.h"
> +#include <functional>
> +#include <unordered_map>
> +#include <vector>
> +
> +using namespace llvm;
> +
> +#define DEBUG_TYPE "gvn-hoist"
> +
> +STATISTIC(NumHoisted, "Number of instructions hoisted");
> +STATISTIC(NumRemoved, "Number of instructions removed");
> +STATISTIC(NumLoadsHoisted, "Number of loads hoisted");
> +STATISTIC(NumLoadsRemoved, "Number of loads removed");
> +STATISTIC(NumStoresHoisted, "Number of stores hoisted");
> +STATISTIC(NumStoresRemoved, "Number of stores removed");
> +STATISTIC(NumCallsHoisted, "Number of calls hoisted");
> +STATISTIC(NumCallsRemoved, "Number of calls removed");
> +
> +static cl::opt<int>
> +    MaxHoistedThreshold("gvn-max-hoisted", cl::Hidden, cl::init(-1),
> +                        cl::desc("Max number of instructions to hoist "
> +                                 "(default unlimited = -1)"));
> +static cl::opt<int> MaxNumberOfBBSInPath(
> +    "gvn-hoist-max-bbs", cl::Hidden, cl::init(4),
> +    cl::desc("Max number of basic blocks on the path between "
> +             "hoisting locations (default = 4, unlimited = -1)"));
> +
> +static int HoistedCtr = 0;
> +
> +namespace {
> +
> +// Provides a sorting function based on the execution order of two instructions.
> +struct SortByDFSIn {
> +private:
> +  DenseMap<const BasicBlock *, unsigned> &DFSNumber;
> +
> +public:
> +  SortByDFSIn(DenseMap<const BasicBlock *, unsigned> &D) : DFSNumber(D) {}
> +
> +  // Returns true when A executes before B.
> +  bool operator()(const Instruction *A, const Instruction *B) const {
> +    assert(A != B);
> +    const BasicBlock *BA = A->getParent();
> +    const BasicBlock *BB = B->getParent();
> +    unsigned NA = DFSNumber[BA];
> +    unsigned NB = DFSNumber[BB];
> +    if (NA < NB)
> +      return true;
> +    if (NA == NB) {
> +      // Sort them in the order they occur in the same basic block.
> +      BasicBlock::const_iterator AI(A), BI(B);
> +      return std::distance(AI, BI) < 0;
> +    }
> +    return false;
> +  }
> +};
> +
> +// A map from a VN (value number) to all the instructions with that VN.
> +typedef DenseMap<unsigned, SmallVector<Instruction *, 4>> VNtoInsns;
> +
> +// Records all scalar instructions candidate for code hoisting.
> +class InsnInfo {
> +  VNtoInsns VNtoScalars;
> +
> +public:
> +  // Inserts I and its value number in VNtoScalars.
> +  void insert(Instruction *I, GVN::ValueTable &VN) {
> +    // Scalar instruction.
> +    unsigned V = VN.lookupOrAdd(I);
> +    VNtoScalars[V].push_back(I);
> +  }
> +
> +  const VNtoInsns &getVNTable() const { return VNtoScalars; }
> +};
> +
> +// Records all load instructions candidate for code hoisting.
> +class LoadInfo {
> +  VNtoInsns VNtoLoads;
> +
> +public:
> +  // Insert Load and the value number of its memory address in VNtoLoads.
> +  void insert(LoadInst *Load, GVN::ValueTable &VN) {
> +    if (Load->isSimple()) {
> +      unsigned V = VN.lookupOrAdd(Load->getPointerOperand());
> +      VNtoLoads[V].push_back(Load);
> +    }
> +  }
> +
> +  const VNtoInsns &getVNTable() const { return VNtoLoads; }
> +};
> +
> +// Records all store instructions candidate for code hoisting.
> +class StoreInfo {
> +  VNtoInsns VNtoStores;
> +
> +public:
> +  // Insert the Store and a hash number of the store address and the stored
> +  // value in VNtoStores.
> +  void insert(StoreInst *Store, GVN::ValueTable &VN) {
> +    if (!Store->isSimple())
> +      return;
> +    // Hash the store address and the stored value.
> +    Value *Ptr = Store->getPointerOperand();
> +    Value *Val = Store->getValueOperand();
> +    VNtoStores[hash_combine(VN.lookupOrAdd(Ptr), VN.lookupOrAdd(Val))]
> +        .push_back(Store);
> +  }
> +
> +  const VNtoInsns &getVNTable() const { return VNtoStores; }
> +};
> +
> +// Records all call instructions candidate for code hoisting.
> +class CallInfo {
> +  VNtoInsns VNtoCallsScalars;
> +  VNtoInsns VNtoCallsLoads;
> +  VNtoInsns VNtoCallsStores;
> +
> +public:
> +  // Insert Call and its value numbering in one of the VNtoCalls* containers.
> +  void insert(CallInst *Call, GVN::ValueTable &VN) {
> +    // A call that doesNotAccessMemory is handled as a Scalar,
> +    // onlyReadsMemory will be handled as a Load instruction,
> +    // all other calls will be handled as stores.
> +    unsigned V = VN.lookupOrAdd(Call);
> +
> +    if (Call->doesNotAccessMemory())
> +      VNtoCallsScalars[V].push_back(Call);
> +    else if (Call->onlyReadsMemory())
> +      VNtoCallsLoads[V].push_back(Call);
> +    else
> +      VNtoCallsStores[V].push_back(Call);
> +  }
> +
> +  const VNtoInsns &getScalarVNTable() const { return VNtoCallsScalars; }
> +
> +  const VNtoInsns &getLoadVNTable() const { return VNtoCallsLoads; }
> +
> +  const VNtoInsns &getStoreVNTable() const { return VNtoCallsStores; }
> +};
> +
> +typedef DenseMap<const BasicBlock *, bool> BBSideEffectsSet;
> +typedef SmallVector<Instruction *, 4> SmallVecInsn;
> +typedef SmallVectorImpl<Instruction *> SmallVecImplInsn;
> +
> +// This pass hoists common computations across branches sharing common
> +// dominator. The primary goal is to reduce the code size, and in some
> +// cases reduce critical path (by exposing more ILP).
> +class GVNHoistLegacyPassImpl {
> +public:
> +  GVN::ValueTable VN;
> +  DominatorTree *DT;
> +  AliasAnalysis *AA;
> +  MemoryDependenceResults *MD;
> +  DenseMap<const BasicBlock *, unsigned> DFSNumber;
> +  BBSideEffectsSet BBSideEffects;
> +  MemorySSA *MSSA;
> +  enum InsKind { Unknown, Scalar, Load, Store };
> +
> +  GVNHoistLegacyPassImpl(DominatorTree *Dt, AliasAnalysis *Aa,
> +                         MemoryDependenceResults *Md)
> +      : DT(Dt), AA(Aa), MD(Md) {}
> +
> +  // Return true when there are exception handling in BB.
> +  bool hasEH(const BasicBlock *BB) {
> +    auto It = BBSideEffects.find(BB);
> +    if (It != BBSideEffects.end())
> +      return It->second;
> +
> +    if (BB->isEHPad() || BB->hasAddressTaken()) {
> +      BBSideEffects[BB] = true;
> +      return true;
> +    }
> +
> +    if (BB->getTerminator()->mayThrow()) {
> +      BBSideEffects[BB] = true;
> +      return true;
> +    }
> +
> +    BBSideEffects[BB] = false;
> +    return false;
> +  }
> +
> +  // Return true when all paths from A to the end of the function pass through
> +  // either B or C.
> +  bool hoistingFromAllPaths(const BasicBlock *A, const BasicBlock *B,
> +                            const BasicBlock *C) {
> +    // We fully copy the WL in order to be able to remove items from it.
> +    SmallPtrSet<const BasicBlock *, 2> WL;
> +    WL.insert(B);
> +    WL.insert(C);
> +
> +    for (auto It = df_begin(A), E = df_end(A); It != E;) {
> +      // There exists a path from A to the exit of the function if we are still
> +      // iterating in DF traversal and we removed all instructions from the work
> +      // list.
> +      if (WL.empty())
> +        return false;
> +
> +      const BasicBlock *BB = *It;
> +      if (WL.erase(BB)) {
> +        // Stop DFS traversal when BB is in the work list.
> +        It.skipChildren();
> +        continue;
> +      }
> +
> +      // Check for end of function, calls that do not return, etc.
> +      if (!isGuaranteedToTransferExecutionToSuccessor(BB->getTerminator()))
> +        return false;
> +
> +      // Increment DFS traversal when not skipping children.
> +      ++It;
> +    }
> +
> +    return true;
> +  }
> +
> +  // Each element of a hoisting list contains the basic block where to hoist and
> +  // a list of instructions to be hoisted.
> +  typedef std::pair<BasicBlock *, SmallVecInsn> HoistingPointInfo;
> +  typedef SmallVector<HoistingPointInfo, 4> HoistingPointList;
> +
> +  // Return true when there are users of A in one of the BBs of Paths.
> +  bool hasMemoryUse(MemoryAccess *A, const BasicBlock *PBB) {
> +    Value::user_iterator UI = A->user_begin();
> +    Value::user_iterator UE = A->user_end();
> +    const BasicBlock *BBA = A->getBlock();
> +    for (; UI != UE; ++UI)
> +      if (MemoryAccess *UM = dyn_cast<MemoryAccess>(*UI)) {
> +        if (PBB == BBA)
> +          if (MSSA->locallyDominates(UM, A))
> +            return true;
> +        if (PBB == UM->getBlock())
> +          return true;
> +      }
> +    return false;
> +  }
> +
> +  // Check whether it is possible to hoist in between NewHoistPt and BBInsn.
> +  bool safeToHoist(const BasicBlock *NewHoistPt, const BasicBlock *BBInsn,
> +                   InsKind K, int &NBBsOnAllPaths, MemoryAccess *MemdefInsn,
> +                   BasicBlock *BBMemdefInsn, MemoryAccess *MemdefFirst,
> +                   BasicBlock *BBMemdefFirst) {
> +    assert(DT->dominates(NewHoistPt, BBInsn) && "Invalid path");
> +
> +    // Record in Paths all basic blocks reachable in depth-first iteration on
> +    // the inverse CFG from BBInsn to NewHoistPt. These blocks are all the
> +    // blocks that may be executed between the execution of NewHoistPt and
> +    // BBInsn. Hoisting an expression from BBInsn into NewHoistPt has to be safe
> +    // on all execution paths.
> +    for (auto I = idf_begin(BBInsn), E = idf_end(BBInsn); I != E;) {
> +      if (*I == NewHoistPt) {
> +        // Stop traversal when reaching NewHoistPt.
> +        I.skipChildren();
> +        continue;
> +      }
> +
> +      // The safety checks for BBInsn will be handled separately.
> +      if (*I != BBInsn) {
> +        // Stop gathering blocks when it is not possible to hoist.
> +        if (hasEH(*I))
> +          return false;
> +
> +        // Check that we do not move a store past loads.
> +        if (K == InsKind::Store) {
> +          if (DT->dominates(BBMemdefInsn, NewHoistPt))
> +            if (hasMemoryUse(MemdefInsn, *I))
> +              return false;
> +
> +          if (DT->dominates(BBMemdefFirst, NewHoistPt))
> +            if (hasMemoryUse(MemdefFirst, *I))
> +              return false;
> +        }
> +      }
> +      ++NBBsOnAllPaths;
> +      ++I;
> +    }
> +
> +    // Check whether there are too many blocks on the hoisting path.
> +    if (MaxNumberOfBBSInPath != -1 && NBBsOnAllPaths >= MaxNumberOfBBSInPath)
> +      return false;
> +
> +    return true;
> +  }
> +
> +  // Return true when it is safe to hoist an instruction Insn to NewHoistPt and
> +  // move the insertion point from HoistPt to NewHoistPt.
> +  bool safeToHoist(const BasicBlock *NewHoistPt, const BasicBlock *HoistPt,
> +                   const Instruction *Insn, const Instruction *First, InsKind K,
> +                   int &NBBsOnAllPaths) {
> +    if (hasEH(HoistPt))
> +      return false;
> +
> +    const BasicBlock *BBInsn = Insn->getParent();
> +    // When HoistPt already contains an instruction to be hoisted, the
> +    // expression is needed on all paths.
> +
> +    // Check that the hoisted expression is needed on all paths: it is unsafe
> +    // to hoist loads to a place where there may be a path not loading from
> +    // the same address: for instance there may be a branch on which the
> +    // address of the load may not be initialized. FIXME: at -Oz we may want
> +    // to hoist scalars to a place where they are partially needed.
> +    if (BBInsn != NewHoistPt &&
> +        !hoistingFromAllPaths(NewHoistPt, HoistPt, BBInsn))
> +      return false;
> +
> +    MemoryAccess *MemdefInsn = nullptr;
> +    MemoryAccess *MemdefFirst = nullptr;
> +    BasicBlock *BBMemdefInsn = nullptr;
> +    BasicBlock *BBMemdefFirst = nullptr;
> +
> +    if (K != InsKind::Scalar) {
> +      // For loads and stores, we check for dependences on the Memory SSA.
> +      MemdefInsn = cast<MemoryUseOrDef>(MSSA->getMemoryAccess(Insn))
> +                       ->getDefiningAccess();
> +      BBMemdefInsn = MemdefInsn->getBlock();
> +
> +      if (DT->properlyDominates(NewHoistPt, BBMemdefInsn))
> +        // Cannot move Insn past BBMemdefInsn to NewHoistPt.
> +        return false;
> +
> +      MemdefFirst = cast<MemoryUseOrDef>(MSSA->getMemoryAccess(First))
> +                        ->getDefiningAccess();
> +      BBMemdefFirst = MemdefFirst->getBlock();
> +
> +      if (DT->properlyDominates(NewHoistPt, BBMemdefFirst))
> +        // Cannot move First past BBMemdefFirst to NewHoistPt.
> +        return false;
> +    }
> +
> +    // Check for unsafe hoistings due to side effects.
> +    if (!safeToHoist(NewHoistPt, HoistPt, K, NBBsOnAllPaths, MemdefInsn,
> +                     BBMemdefInsn, MemdefFirst, BBMemdefFirst) ||
> +        !safeToHoist(NewHoistPt, BBInsn, K, NBBsOnAllPaths, MemdefInsn,
> +                     BBMemdefInsn, MemdefFirst, BBMemdefFirst))
> +      return false;
> +
> +    // Safe to hoist scalars.
> +    if (K == InsKind::Scalar)
> +      return true;
> +
> +    if (DT->properlyDominates(BBMemdefInsn, NewHoistPt) &&
> +        DT->properlyDominates(BBMemdefFirst, NewHoistPt))
> +      return true;
> +
> +    const BasicBlock *BBFirst = First->getParent();
> +    if (BBInsn == BBFirst)
> +      return false;
> +
> +    assert(BBMemdefInsn == NewHoistPt || BBMemdefFirst == NewHoistPt);
> +
> +    if (BBInsn != NewHoistPt && BBFirst != NewHoistPt)
> +      return true;
> +
> +    if (BBInsn == NewHoistPt) {
> +      if (DT->properlyDominates(BBMemdefFirst, NewHoistPt))
> +        return true;
> +      assert(BBInsn == BBMemdefFirst);
> +      if (MSSA->locallyDominates(MSSA->getMemoryAccess(Insn), MemdefFirst))
> +        return false;
> +      return true;
> +    }
> +
> +    if (BBFirst == NewHoistPt) {
> +      if (DT->properlyDominates(BBMemdefInsn, NewHoistPt))
> +        return true;
> +      assert(BBFirst == BBMemdefInsn);
> +      if (MSSA->locallyDominates(MSSA->getMemoryAccess(First), MemdefInsn))
> +        return false;
> +      return true;
> +    }
> +
> +    // No side effects: it is safe to hoist.
> +    return true;
> +  }
> +
> +  // Partition InstructionsToHoist into a set of candidates which can share a
> +  // common hoisting point. The partitions are collected in HPL. IsScalar is
> +  // true when the instructions in InstructionsToHoist are scalars. IsLoad is
> +  // true when the InstructionsToHoist are loads, false when they are stores.
> +  void partitionCandidates(SmallVecImplInsn &InstructionsToHoist,
> +                           HoistingPointList &HPL, InsKind K) {
> +    // No need to sort for two instructions.
> +    if (InstructionsToHoist.size() > 2) {
> +      SortByDFSIn Pred(DFSNumber);
> +      std::sort(InstructionsToHoist.begin(), InstructionsToHoist.end(), Pred);
> +    }
> +
> +    // Create a work list of all the BB of the Insns to be hoisted.
> +    SmallPtrSet<BasicBlock *, 4> WL;
> +    SmallVecImplInsn::iterator II = InstructionsToHoist.begin();
> +    SmallVecImplInsn::iterator Start = II;
> +    BasicBlock *HoistPt = (*II)->getParent();
> +    WL.insert((*II)->getParent());
> +    int NBBsOnAllPaths = 0;
> +
> +    for (++II; II != InstructionsToHoist.end(); ++II) {
> +      Instruction *Insn = *II;
> +      BasicBlock *BB = Insn->getParent();
> +      BasicBlock *NewHoistPt = DT->findNearestCommonDominator(HoistPt, BB);
> +      WL.insert(BB);
> +      if (safeToHoist(NewHoistPt, HoistPt, Insn, *Start, K, NBBsOnAllPaths)) {
> +        // Extend HoistPt to NewHoistPt.
> +        HoistPt = NewHoistPt;
> +        continue;
> +      }
> +      // Not safe to hoist: save the previous work list and start over from BB.
> +      if (std::distance(Start, II) > 1)
> +        HPL.push_back(std::make_pair(HoistPt, SmallVecInsn(Start, II)));
> +      else
> +        WL.clear();
> +
> +      // We start over to compute HoistPt from BB.
> +      Start = II;
> +      HoistPt = BB;
> +      NBBsOnAllPaths = 0;
> +    }
> +
> +    // Save the last partition.
> +    if (std::distance(Start, II) > 1)
> +      HPL.push_back(std::make_pair(HoistPt, SmallVecInsn(Start, II)));
> +  }
> +
> +  // Initialize HPL from Map.
> +  void computeInsertionPoints(const VNtoInsns &Map, HoistingPointList &HPL,
> +                              InsKind K) {
> +    for (VNtoInsns::const_iterator It = Map.begin(); It != Map.end(); ++It) {
> +      if (MaxHoistedThreshold != -1 && ++HoistedCtr > MaxHoistedThreshold)
> +        return;
> +
> +      const SmallVecInsn &V = It->second;
> +      if (V.size() < 2)
> +        continue;
> +
> +      // Compute the insertion point and the list of expressions to be hoisted.
> +      SmallVecInsn InstructionsToHoist;
> +      for (auto I : V)
> +        if (!hasEH(I->getParent()))
> +          InstructionsToHoist.push_back(I);
> +
> +      if (InstructionsToHoist.size())
> +        partitionCandidates(InstructionsToHoist, HPL, K);
> +    }
> +  }
> +
> +  // Return true when all operands of Instr are available at insertion point
> +  // HoistPt. When limiting the number of hoisted expressions, one could hoist
> +  // a load without hoisting its access function. So before hoisting any
> +  // expression, make sure that all its operands are available at insert point.
> +  bool allOperandsAvailable(const Instruction *I,
> +                            const BasicBlock *HoistPt) const {
> +    for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
> +      const Value *Op = I->getOperand(i);
> +      const Instruction *Inst = dyn_cast<Instruction>(Op);
> +      if (Inst && !DT->dominates(Inst->getParent(), HoistPt))
> +        return false;
> +    }
> +
> +    return true;
> +  }
> +
> +  Instruction *firstOfTwo(Instruction *I, Instruction *J) const {
> +    for (Instruction &I1 : *I->getParent())
> +      if (&I1 == I || &I1 == J)
> +        return &I1;
> +    llvm_unreachable("Both I and J must be from same BB");
> +  }
> +
> +  // Replace the use of From with To in Insn.
> +  void replaceUseWith(Instruction *Insn, Value *From, Value *To) const {
> +    for (Value::use_iterator UI = From->use_begin(), UE = From->use_end();
> +         UI != UE;) {
> +      Use &U = *UI++;
> +      if (U.getUser() == Insn) {
> +        U.set(To);
> +        return;
> +      }
> +    }
> +    llvm_unreachable("should replace exactly once");
> +  }
> +
> +  bool makeOperandsAvailable(Instruction *Repl, BasicBlock *HoistPt) const {
> +    // Check whether the GEP of a ld/st can be synthesized at HoistPt.
> +    Instruction *Gep = nullptr;
> +    Instruction *Val = nullptr;
> +    if (LoadInst *Ld = dyn_cast<LoadInst>(Repl))
> +      Gep = dyn_cast<Instruction>(Ld->getPointerOperand());
> +    if (StoreInst *St = dyn_cast<StoreInst>(Repl)) {
> +      Gep = dyn_cast<Instruction>(St->getPointerOperand());
> +      Val = dyn_cast<Instruction>(St->getValueOperand());
> +    }
> +
> +    if (!Gep || !isa<GetElementPtrInst>(Gep))
> +      return false;
> +
> +    // Check whether we can compute the Gep at HoistPt.
> +    if (!allOperandsAvailable(Gep, HoistPt))
> +      return false;
> +
> +    // Also check that the stored value is available.
> +    if (Val && !allOperandsAvailable(Val, HoistPt))
> +      return false;
> +
> +    // Copy the gep before moving the ld/st.
> +    Instruction *ClonedGep = Gep->clone();
> +    ClonedGep->insertBefore(HoistPt->getTerminator());
> +    replaceUseWith(Repl, Gep, ClonedGep);
> +
> +    // Also copy Val when it is a gep: geps are not hoisted by default.
> +    if (Val && isa<GetElementPtrInst>(Val)) {
> +      Instruction *ClonedVal = Val->clone();
> +      ClonedVal->insertBefore(HoistPt->getTerminator());
> +      replaceUseWith(Repl, Val, ClonedVal);
> +    }
> +
> +    return true;
> +  }
> +
> +  std::pair<unsigned, unsigned> hoist(HoistingPointList &HPL) {
> +    unsigned NI = 0, NL = 0, NS = 0, NC = 0, NR = 0;
> +    for (const HoistingPointInfo &HP : HPL) {
> +      // Find out whether we already have one of the instructions in HoistPt,
> +      // in which case we do not have to move it.
> +      BasicBlock *HoistPt = HP.first;
> +      const SmallVecInsn &InstructionsToHoist = HP.second;
> +      Instruction *Repl = nullptr;
> +      for (Instruction *I : InstructionsToHoist)
> +        if (I->getParent() == HoistPt) {
> +          // If there are two instructions in HoistPt to be hoisted in place:
> +          // update Repl to be the first one, such that we can rename the uses
> +          // of the second based on the first.
> +          Repl = !Repl ? I : firstOfTwo(Repl, I);
> +        }
> +
> +      if (Repl) {
> +        // Repl is already in HoistPt: it remains in place.
> +        assert(allOperandsAvailable(Repl, HoistPt) &&
> +               "instruction depends on operands that are not available");
> +      } else {
> +        // When we do not find Repl in HoistPt, select the first in the list
> +        // and move it to HoistPt.
> +        Repl = InstructionsToHoist.front();
> +
> +        // We can move Repl in HoistPt only when all operands are available.
> +        // The order in which hoistings are done may influence the availability
> +        // of operands.
> +        if (!allOperandsAvailable(Repl, HoistPt) &&
> +            !makeOperandsAvailable(Repl, HoistPt))
> +          continue;
> +        Repl->moveBefore(HoistPt->getTerminator());
> +      }
> +
> +      if (isa<LoadInst>(Repl))
> +        ++NL;
> +      else if (isa<StoreInst>(Repl))
> +        ++NS;
> +      else if (isa<CallInst>(Repl))
> +        ++NC;
> +      else // Scalar
> +        ++NI;
> +
> +      // Remove and rename all other instructions.
> +      for (Instruction *I : InstructionsToHoist)
> +        if (I != Repl) {
> +          ++NR;
> +          if (isa<LoadInst>(Repl))
> +            ++NumLoadsRemoved;
> +          else if (isa<StoreInst>(Repl))
> +            ++NumStoresRemoved;
> +          else if (isa<CallInst>(Repl))
> +            ++NumCallsRemoved;
> +          I->replaceAllUsesWith(Repl);
> +          I->eraseFromParent();
> +        }
> +    }
> +
> +    NumHoisted += NL + NS + NC + NI;
> +    NumRemoved += NR;
> +    NumLoadsHoisted += NL;
> +    NumStoresHoisted += NS;
> +    NumCallsHoisted += NC;
> +    return {NI, NL + NC + NS};
> +  }
> +
> +  // Hoist all expressions. Returns Number of scalars hoisted
> +  // and number of non-scalars hoisted.
> +  std::pair<unsigned, unsigned> hoistExpressions(Function &F) {
> +    InsnInfo II;
> +    LoadInfo LI;
> +    StoreInfo SI;
> +    CallInfo CI;
> +    const bool OptForMinSize = F.optForMinSize();
> +    for (BasicBlock *BB : depth_first(&F.getEntryBlock())) {
> +      for (Instruction &I1 : *BB) {
> +        if (LoadInst *Load = dyn_cast<LoadInst>(&I1))
> +          LI.insert(Load, VN);
> +        else if (StoreInst *Store = dyn_cast<StoreInst>(&I1))
> +          SI.insert(Store, VN);
> +        else if (CallInst *Call = dyn_cast<CallInst>(&I1)) {
> +          if (IntrinsicInst *Intr = dyn_cast<IntrinsicInst>(Call)) {
> +            if (isa<DbgInfoIntrinsic>(Intr) ||
> +                Intr->getIntrinsicID() == Intrinsic::assume)
> +              continue;
> +          }
> +          if (Call->mayHaveSideEffects()) {
> +            if (!OptForMinSize)
> +              break;
> +            // We may continue hoisting across calls which write to memory.
> +            if (Call->mayThrow())
> +              break;
> +          }
> +          CI.insert(Call, VN);
> +        } else if (OptForMinSize || !isa<GetElementPtrInst>(&I1))
> +          // Do not hoist scalars past calls that may write to memory because
> +          // that could result in spills later. geps are handled separately.
> +          // TODO: We can relax this for targets like AArch64 as they have more
> +          // registers than X86.
> +          II.insert(&I1, VN);
> +      }
> +    }
> +
> +    HoistingPointList HPL;
> +    computeInsertionPoints(II.getVNTable(), HPL, InsKind::Scalar);
> +    computeInsertionPoints(LI.getVNTable(), HPL, InsKind::Load);
> +    computeInsertionPoints(SI.getVNTable(), HPL, InsKind::Store);
> +    computeInsertionPoints(CI.getScalarVNTable(), HPL, InsKind::Scalar);
> +    computeInsertionPoints(CI.getLoadVNTable(), HPL, InsKind::Load);
> +    computeInsertionPoints(CI.getStoreVNTable(), HPL, InsKind::Store);
> +    return hoist(HPL);
> +  }
> +
> +  bool run(Function &F) {
> +    VN.setDomTree(DT);
> +    VN.setAliasAnalysis(AA);
> +    VN.setMemDep(MD);
> +    bool Res = false;
> +
> +    unsigned I = 0;
> +    for (const BasicBlock *BB : depth_first(&F.getEntryBlock()))
> +      DFSNumber.insert(std::make_pair(BB, ++I));
> +
> +    // FIXME: use lazy evaluation of VN to avoid the fix-point computation.
> +    while (1) {
> +      // FIXME: only compute MemorySSA once. We need to update the analysis in
> +      // the same time as transforming the code.
> +      MemorySSA M(F, AA, DT);
> +      MSSA = &M;
> +
> +      auto HoistStat = hoistExpressions(F);
> +      if (HoistStat.first + HoistStat.second == 0) {
> +        return Res;
> +      }
> +      if (HoistStat.second > 0) {
> +        // To address a limitation of the current GVN, we need to rerun the
> +        // hoisting after we hoisted loads in order to be able to hoist all
> +        // scalars dependent on the hoisted loads. Same for stores.
> +        VN.clear();
> +      }
> +      Res = true;
> +    }
> +
> +    return Res;
> +  }
> +};
> +
> +class GVNHoistLegacyPass : public FunctionPass {
> +public:
> +  static char ID;
> +
> +  GVNHoistLegacyPass() : FunctionPass(ID) {
> +    initializeGVNHoistLegacyPassPass(*PassRegistry::getPassRegistry());
> +  }
> +
> +  bool runOnFunction(Function &F) override {
> +    auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
> +    auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
> +    auto &MD = getAnalysis<MemoryDependenceWrapperPass>().getMemDep();
> +
> +    GVNHoistLegacyPassImpl G(&DT, &AA, &MD);
> +    return G.run(F);
> +  }
> +
> +  void getAnalysisUsage(AnalysisUsage &AU) const override {
> +    AU.addRequired<DominatorTreeWrapperPass>();
> +    AU.addRequired<AAResultsWrapperPass>();
> +    AU.addRequired<MemoryDependenceWrapperPass>();
> +    AU.addPreserved<DominatorTreeWrapperPass>();
> +  }
> +};
> +} // namespace
> +
> +PreservedAnalyses GVNHoistPass::run(Function &F,
> +                                    AnalysisManager<Function> &AM) {
> +  DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F);
> +  AliasAnalysis &AA = AM.getResult<AAManager>(F);
> +  MemoryDependenceResults &MD = AM.getResult<MemoryDependenceAnalysis>(F);
> +
> +  GVNHoistLegacyPassImpl G(&DT, &AA, &MD);
> +  if (!G.run(F))
> +    return PreservedAnalyses::all();
> +
> +  PreservedAnalyses PA;
> +  PA.preserve<DominatorTreeAnalysis>();
> +  return PA;
> +}
> +
> +char GVNHoistLegacyPass::ID = 0;
> +INITIALIZE_PASS_BEGIN(GVNHoistLegacyPass, "gvn-hoist",
> +                      "Early GVN Hoisting of Expressions", false, false)
> +INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass)
> +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
> +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
> +INITIALIZE_PASS_END(GVNHoistLegacyPass, "gvn-hoist",
> +                    "Early GVN Hoisting of Expressions", false, false)
> +
> +FunctionPass *llvm::createGVNHoistPass() { return new GVNHoistLegacyPass(); }
> 
> Modified: llvm/trunk/lib/Transforms/Scalar/Scalar.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/Scalar.cpp?rev=274305&r1=274304&r2=274305&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Transforms/Scalar/Scalar.cpp (original)
> +++ llvm/trunk/lib/Transforms/Scalar/Scalar.cpp Thu Jun 30 19:24:31 2016
> @@ -44,6 +44,7 @@ void llvm::initializeScalarOpts(PassRegi
>   initializeGuardWideningLegacyPassPass(Registry);
>   initializeGVNLegacyPassPass(Registry);
>   initializeEarlyCSELegacyPassPass(Registry);
> +  initializeGVNHoistLegacyPassPass(Registry);
>   initializeFlattenCFGPassPass(Registry);
>   initializeInductiveRangeCheckEliminationPass(Registry);
>   initializeIndVarSimplifyLegacyPassPass(Registry);
> @@ -236,6 +237,10 @@ void LLVMAddEarlyCSEPass(LLVMPassManager
>   unwrap(PM)->add(createEarlyCSEPass());
> }
> 
> +void LLVMAddGVNHoistLegacyPass(LLVMPassManagerRef PM) {
> +  unwrap(PM)->add(createGVNHoistPass());
> +}
> +
> void LLVMAddTypeBasedAliasAnalysisPass(LLVMPassManagerRef PM) {
>   unwrap(PM)->add(createTypeBasedAAWrapperPass());
> }
> 
> Added: llvm/trunk/test/Transforms/GVN/hoist.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/GVN/hoist.ll?rev=274305&view=auto
> ==============================================================================
> --- llvm/trunk/test/Transforms/GVN/hoist.ll (added)
> +++ llvm/trunk/test/Transforms/GVN/hoist.ll Thu Jun 30 19:24:31 2016
> @@ -0,0 +1,650 @@
> +; RUN: opt -gvn-hoist -S < %s | FileCheck %s
> +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
> +target triple = "x86_64-unknown-linux-gnu"
> +
> + at GlobalVar = internal global float 1.000000e+00
> +
> +; Check that all scalar expressions are hoisted.
> +;
> +; CHECK-LABEL: @scalarsHoisting
> +; CHECK: fsub
> +; CHECK: fmul
> +; CHECK: fsub
> +; CHECK: fmul
> +; CHECK-NOT: fmul
> +; CHECK-NOT: fsub
> +define float @scalarsHoisting(float %d, float %min, float %max, float %a) {
> +entry:
> +  %div = fdiv float 1.000000e+00, %d
> +  %cmp = fcmp oge float %div, 0.000000e+00
> +  br i1 %cmp, label %if.then, label %if.else
> +
> +if.then:                                          ; preds = %entry
> +  %sub = fsub float %min, %a
> +  %mul = fmul float %sub, %div
> +  %sub1 = fsub float %max, %a
> +  %mul2 = fmul float %sub1, %div
> +  br label %if.end
> +
> +if.else:                                          ; preds = %entry
> +  %sub3 = fsub float %max, %a
> +  %mul4 = fmul float %sub3, %div
> +  %sub5 = fsub float %min, %a
> +  %mul6 = fmul float %sub5, %div
> +  br label %if.end
> +
> +if.end:                                           ; preds = %if.else, %if.then
> +  %tmax.0 = phi float [ %mul2, %if.then ], [ %mul6, %if.else ]
> +  %tmin.0 = phi float [ %mul, %if.then ], [ %mul4, %if.else ]
> +  %add = fadd float %tmax.0, %tmin.0
> +  ret float %add
> +}
> +
> +; Check that all loads and scalars depending on the loads are hoisted.
> +; Check that getelementptr computation gets hoisted before the load.
> +;
> +; CHECK-LABEL: @readsAndScalarsHoisting
> +; CHECK: load
> +; CHECK: load
> +; CHECK: load
> +; CHECK: fsub
> +; CHECK: fmul
> +; CHECK: fsub
> +; CHECK: fmul
> +; CHECK-NOT: load
> +; CHECK-NOT: fmul
> +; CHECK-NOT: fsub
> +define float @readsAndScalarsHoisting(float %d, float* %min, float* %max, float* %a) {
> +entry:
> +  %div = fdiv float 1.000000e+00, %d
> +  %cmp = fcmp oge float %div, 0.000000e+00
> +  br i1 %cmp, label %if.then, label %if.else
> +
> +if.then:                                          ; preds = %entry
> +  %A = getelementptr float, float* %min, i32 1
> +  %0 = load float, float* %A, align 4
> +  %1 = load float, float* %a, align 4
> +  %sub = fsub float %0, %1
> +  %mul = fmul float %sub, %div
> +  %2 = load float, float* %max, align 4
> +  %sub1 = fsub float %2, %1
> +  %mul2 = fmul float %sub1, %div
> +  br label %if.end
> +
> +if.else:                                          ; preds = %entry
> +  %3 = load float, float* %max, align 4
> +  %4 = load float, float* %a, align 4
> +  %sub3 = fsub float %3, %4
> +  %mul4 = fmul float %sub3, %div
> +  %B = getelementptr float, float* %min, i32 1
> +  %5 = load float, float* %B, align 4
> +  %sub5 = fsub float %5, %4
> +  %mul6 = fmul float %sub5, %div
> +  br label %if.end
> +
> +if.end:                                           ; preds = %if.else, %if.then
> +  %tmax.0 = phi float [ %mul2, %if.then ], [ %mul6, %if.else ]
> +  %tmin.0 = phi float [ %mul, %if.then ], [ %mul4, %if.else ]
> +  %add = fadd float %tmax.0, %tmin.0
> +  ret float %add
> +}
> +
> +; Check that we do not hoist loads after a store: the first two loads will be
> +; hoisted, and then the third load will not be hoisted.
> +;
> +; CHECK-LABEL: @readsAndWrites
> +; CHECK: load
> +; CHECK: load
> +; CHECK: fsub
> +; CHECK: fmul
> +; CHECK: store
> +; CHECK: load
> +; CHECK: fsub
> +; CHECK: fmul
> +; CHECK: load
> +; CHECK: fsub
> +; CHECK: fmul
> +; CHECK-NOT: load
> +; CHECK-NOT: fmul
> +; CHECK-NOT: fsub
> +define float @readsAndWrites(float %d, float* %min, float* %max, float* %a) {
> +entry:
> +  %div = fdiv float 1.000000e+00, %d
> +  %cmp = fcmp oge float %div, 0.000000e+00
> +  br i1 %cmp, label %if.then, label %if.else
> +
> +if.then:                                          ; preds = %entry
> +  %0 = load float, float* %min, align 4
> +  %1 = load float, float* %a, align 4
> +  store float %0, float* @GlobalVar
> +  %sub = fsub float %0, %1
> +  %mul = fmul float %sub, %div
> +  %2 = load float, float* %max, align 4
> +  %sub1 = fsub float %2, %1
> +  %mul2 = fmul float %sub1, %div
> +  br label %if.end
> +
> +if.else:                                          ; preds = %entry
> +  %3 = load float, float* %max, align 4
> +  %4 = load float, float* %a, align 4
> +  %sub3 = fsub float %3, %4
> +  %mul4 = fmul float %sub3, %div
> +  %5 = load float, float* %min, align 4
> +  %sub5 = fsub float %5, %4
> +  %mul6 = fmul float %sub5, %div
> +  br label %if.end
> +
> +if.end:                                           ; preds = %if.else, %if.then
> +  %tmax.0 = phi float [ %mul2, %if.then ], [ %mul6, %if.else ]
> +  %tmin.0 = phi float [ %mul, %if.then ], [ %mul4, %if.else ]
> +  %add = fadd float %tmax.0, %tmin.0
> +  ret float %add
> +}
> +
> +; Check that we do hoist loads when the store is above the insertion point.
> +;
> +; CHECK-LABEL: @readsAndWriteAboveInsertPt
> +; CHECK: load
> +; CHECK: load
> +; CHECK: load
> +; CHECK: fsub
> +; CHECK: fsub
> +; CHECK: fmul
> +; CHECK: fmul
> +; CHECK-NOT: load
> +; CHECK-NOT: fmul
> +; CHECK-NOT: fsub
> +define float @readsAndWriteAboveInsertPt(float %d, float* %min, float* %max, float* %a) {
> +entry:
> +  %div = fdiv float 1.000000e+00, %d
> +  store float 0.000000e+00, float* @GlobalVar
> +  %cmp = fcmp oge float %div, 0.000000e+00
> +  br i1 %cmp, label %if.then, label %if.else
> +
> +if.then:                                          ; preds = %entry
> +  %0 = load float, float* %min, align 4
> +  %1 = load float, float* %a, align 4
> +  %sub = fsub float %0, %1
> +  %mul = fmul float %sub, %div
> +  %2 = load float, float* %max, align 4
> +  %sub1 = fsub float %2, %1
> +  %mul2 = fmul float %sub1, %div
> +  br label %if.end
> +
> +if.else:                                          ; preds = %entry
> +  %3 = load float, float* %max, align 4
> +  %4 = load float, float* %a, align 4
> +  %sub3 = fsub float %3, %4
> +  %mul4 = fmul float %sub3, %div
> +  %5 = load float, float* %min, align 4
> +  %sub5 = fsub float %5, %4
> +  %mul6 = fmul float %sub5, %div
> +  br label %if.end
> +
> +if.end:                                           ; preds = %if.else, %if.then
> +  %tmax.0 = phi float [ %mul2, %if.then ], [ %mul6, %if.else ]
> +  %tmin.0 = phi float [ %mul, %if.then ], [ %mul4, %if.else ]
> +  %add = fadd float %tmax.0, %tmin.0
> +  ret float %add
> +}
> +
> +; Check that dependent expressions are hoisted.
> +; CHECK-LABEL: @dependentScalarsHoisting
> +; CHECK: fsub
> +; CHECK: fadd
> +; CHECK: fdiv
> +; CHECK: fmul
> +; CHECK-NOT: fsub
> +; CHECK-NOT: fadd
> +; CHECK-NOT: fdiv
> +; CHECK-NOT: fmul
> +define float @dependentScalarsHoisting(float %a, float %b, i1 %c) {
> +entry:
> +  br i1 %c, label %if.then, label %if.else
> +
> +if.then:
> +  %d = fsub float %b, %a
> +  %e = fadd float %d, %a
> +  %f = fdiv float %e, %a
> +  %g = fmul float %f, %a
> +  br label %if.end
> +
> +if.else:
> +  %h = fsub float %b, %a
> +  %i = fadd float %h, %a
> +  %j = fdiv float %i, %a
> +  %k = fmul float %j, %a
> +  br label %if.end
> +
> +if.end:
> +  %r = phi float [ %g, %if.then ], [ %k, %if.else ]
> +  ret float %r
> +}
> +
> +; Check that all independent expressions are hoisted.
> +; CHECK-LABEL: @independentScalarsHoisting
> +; CHECK: fmul
> +; CHECK: fadd
> +; CHECK: fdiv
> +; CHECK: fsub
> +; CHECK-NOT: fsub
> +; CHECK-NOT: fdiv
> +; CHECK-NOT: fmul
> +define float @independentScalarsHoisting(float %a, float %b, i1 %c) {
> +entry:
> +  br i1 %c, label %if.then, label %if.else
> +
> +if.then:
> +  %d = fadd float %b, %a
> +  %e = fsub float %b, %a
> +  %f = fdiv float %b, %a
> +  %g = fmul float %b, %a
> +  br label %if.end
> +
> +if.else:
> +  %i = fadd float %b, %a
> +  %h = fsub float %b, %a
> +  %j = fdiv float %b, %a
> +  %k = fmul float %b, %a
> +  br label %if.end
> +
> +if.end:
> +  %p = phi float [ %d, %if.then ], [ %i, %if.else ]
> +  %q = phi float [ %e, %if.then ], [ %h, %if.else ]
> +  %r = phi float [ %f, %if.then ], [ %j, %if.else ]
> +  %s = phi float [ %g, %if.then ], [ %k, %if.else ]
> +  %t = fadd float %p, %q
> +  %u = fadd float %r, %s
> +  %v = fadd float %t, %u
> +  ret float %v
> +}
> +
> +; Check that we hoist load and scalar expressions in triangles.
> +; CHECK-LABEL: @triangleHoisting
> +; CHECK: load
> +; CHECK: load
> +; CHECK: load
> +; CHECK: fsub
> +; CHECK: fsub
> +; CHECK: fmul
> +; CHECK: fmul
> +; CHECK-NOT: load
> +; CHECK-NOT: fmul
> +; CHECK-NOT: fsub
> +define float @triangleHoisting(float %d, float* %min, float* %max, float* %a) {
> +entry:
> +  %div = fdiv float 1.000000e+00, %d
> +  %cmp = fcmp oge float %div, 0.000000e+00
> +  br i1 %cmp, label %if.then, label %if.end
> +
> +if.then:                                          ; preds = %entry
> +  %0 = load float, float* %min, align 4
> +  %1 = load float, float* %a, align 4
> +  %sub = fsub float %0, %1
> +  %mul = fmul float %sub, %div
> +  %2 = load float, float* %max, align 4
> +  %sub1 = fsub float %2, %1
> +  %mul2 = fmul float %sub1, %div
> +  br label %if.end
> +
> +if.end:                                          ; preds = %entry
> +  %p1 = phi float [ %mul2, %if.then ], [ 0.000000e+00, %entry ]
> +  %p2 = phi float [ %mul, %if.then ], [ 0.000000e+00, %entry ]
> +  %3 = load float, float* %max, align 4
> +  %4 = load float, float* %a, align 4
> +  %sub3 = fsub float %3, %4
> +  %mul4 = fmul float %sub3, %div
> +  %5 = load float, float* %min, align 4
> +  %sub5 = fsub float %5, %4
> +  %mul6 = fmul float %sub5, %div
> +
> +  %x = fadd float %p1, %mul6
> +  %y = fadd float %p2, %mul4
> +  %z = fadd float %x, %y
> +  ret float %z
> +}
> +
> +; Check that we hoist load and scalar expressions in dominator.
> +; CHECK-LABEL: @dominatorHoisting
> +; CHECK: load
> +; CHECK: load
> +; CHECK: fsub
> +; CHECK: fmul
> +; CHECK: load
> +; CHECK: fsub
> +; CHECK: fmul
> +; CHECK-NOT: load
> +; CHECK-NOT: fmul
> +; CHECK-NOT: fsub
> +define float @dominatorHoisting(float %d, float* %min, float* %max, float* %a) {
> +entry:
> +  %div = fdiv float 1.000000e+00, %d
> +  %0 = load float, float* %min, align 4
> +  %1 = load float, float* %a, align 4
> +  %sub = fsub float %0, %1
> +  %mul = fmul float %sub, %div
> +  %2 = load float, float* %max, align 4
> +  %sub1 = fsub float %2, %1
> +  %mul2 = fmul float %sub1, %div
> +  %cmp = fcmp oge float %div, 0.000000e+00
> +  br i1 %cmp, label %if.then, label %if.end
> +
> +if.then:                                          ; preds = %entry
> +  %3 = load float, float* %max, align 4
> +  %4 = load float, float* %a, align 4
> +  %sub3 = fsub float %3, %4
> +  %mul4 = fmul float %sub3, %div
> +  %5 = load float, float* %min, align 4
> +  %sub5 = fsub float %5, %4
> +  %mul6 = fmul float %sub5, %div
> +  br label %if.end
> +
> +if.end:                                          ; preds = %entry
> +  %p1 = phi float [ %mul4, %if.then ], [ 0.000000e+00, %entry ]
> +  %p2 = phi float [ %mul6, %if.then ], [ 0.000000e+00, %entry ]
> +
> +  %x = fadd float %p1, %mul2
> +  %y = fadd float %p2, %mul
> +  %z = fadd float %x, %y
> +  ret float %z
> +}
> +
> +; Check that we hoist load and scalar expressions in dominator.
> +; CHECK-LABEL: @domHoisting
> +; CHECK: load
> +; CHECK: load
> +; CHECK: fsub
> +; CHECK: fmul
> +; CHECK: load
> +; CHECK: fsub
> +; CHECK: fmul
> +; CHECK-NOT: load
> +; CHECK-NOT: fmul
> +; CHECK-NOT: fsub
> +define float @domHoisting(float %d, float* %min, float* %max, float* %a) {
> +entry:
> +  %div = fdiv float 1.000000e+00, %d
> +  %0 = load float, float* %min, align 4
> +  %1 = load float, float* %a, align 4
> +  %sub = fsub float %0, %1
> +  %mul = fmul float %sub, %div
> +  %2 = load float, float* %max, align 4
> +  %sub1 = fsub float %2, %1
> +  %mul2 = fmul float %sub1, %div
> +  %cmp = fcmp oge float %div, 0.000000e+00
> +  br i1 %cmp, label %if.then, label %if.else
> +
> +if.then:
> +  %3 = load float, float* %max, align 4
> +  %4 = load float, float* %a, align 4
> +  %sub3 = fsub float %3, %4
> +  %mul4 = fmul float %sub3, %div
> +  %5 = load float, float* %min, align 4
> +  %sub5 = fsub float %5, %4
> +  %mul6 = fmul float %sub5, %div
> +  br label %if.end
> +
> +if.else:
> +  %6 = load float, float* %max, align 4
> +  %7 = load float, float* %a, align 4
> +  %sub9 = fsub float %6, %7
> +  %mul10 = fmul float %sub9, %div
> +  %8 = load float, float* %min, align 4
> +  %sub12 = fsub float %8, %7
> +  %mul13 = fmul float %sub12, %div
> +  br label %if.end
> +
> +if.end:
> +  %p1 = phi float [ %mul4, %if.then ], [ %mul10, %if.else ]
> +  %p2 = phi float [ %mul6, %if.then ], [ %mul13, %if.else ]
> +
> +  %x = fadd float %p1, %mul2
> +  %y = fadd float %p2, %mul
> +  %z = fadd float %x, %y
> +  ret float %z
> +}
> +
> +; Check that we do not hoist loads past stores within a same basic block.
> +; CHECK-LABEL: @noHoistInSingleBBWithStore
> +; CHECK: load
> +; CHECK: store
> +; CHECK: load
> +; CHECK: store
> +define i32 @noHoistInSingleBBWithStore() {
> +entry:
> +  %D = alloca i32, align 4
> +  %0 = bitcast i32* %D to i8*
> +  %bf = load i8, i8* %0, align 4
> +  %bf.clear = and i8 %bf, -3
> +  store i8 %bf.clear, i8* %0, align 4
> +  %bf1 = load i8, i8* %0, align 4
> +  %bf.clear1 = and i8 %bf1, 1
> +  store i8 %bf.clear1, i8* %0, align 4
> +  ret i32 0
> +}
> +
> +; Check that we do not hoist loads past calls within a same basic block.
> +; CHECK-LABEL: @noHoistInSingleBBWithCall
> +; CHECK: load
> +; CHECK: call
> +; CHECK: load
> +declare void @foo()
> +define i32 @noHoistInSingleBBWithCall() {
> +entry:
> +  %D = alloca i32, align 4
> +  %0 = bitcast i32* %D to i8*
> +  %bf = load i8, i8* %0, align 4
> +  %bf.clear = and i8 %bf, -3
> +  call void @foo()
> +  %bf1 = load i8, i8* %0, align 4
> +  %bf.clear1 = and i8 %bf1, 1
> +  ret i32 0
> +}
> +
> +; Check that we do not hoist loads past stores in any branch of a diamond.
> +; CHECK-LABEL: @noHoistInDiamondWithOneStore1
> +; CHECK: fdiv
> +; CHECK: fcmp
> +; CHECK: br
> +define float @noHoistInDiamondWithOneStore1(float %d, float* %min, float* %max, float* %a) {
> +entry:
> +  %div = fdiv float 1.000000e+00, %d
> +  %cmp = fcmp oge float %div, 0.000000e+00
> +  br i1 %cmp, label %if.then, label %if.else
> +
> +if.then:                                          ; preds = %entry
> +  store float 0.000000e+00, float* @GlobalVar
> +  %0 = load float, float* %min, align 4
> +  %1 = load float, float* %a, align 4
> +  %sub = fsub float %0, %1
> +  %mul = fmul float %sub, %div
> +  %2 = load float, float* %max, align 4
> +  %sub1 = fsub float %2, %1
> +  %mul2 = fmul float %sub1, %div
> +  br label %if.end
> +
> +if.else:                                          ; preds = %entry
> +  ; There are no side effects on the if.else branch.
> +  %3 = load float, float* %max, align 4
> +  %4 = load float, float* %a, align 4
> +  %sub3 = fsub float %3, %4
> +  %mul4 = fmul float %sub3, %div
> +  %5 = load float, float* %min, align 4
> +  %sub5 = fsub float %5, %4
> +  %mul6 = fmul float %sub5, %div
> +  br label %if.end
> +
> +if.end:                                           ; preds = %if.else, %if.then
> +  %tmax.0 = phi float [ %mul2, %if.then ], [ %mul6, %if.else ]
> +  %tmin.0 = phi float [ %mul, %if.then ], [ %mul4, %if.else ]
> +
> +  %6 = load float, float* %max, align 4
> +  %7 = load float, float* %a, align 4
> +  %sub6 = fsub float %6, %7
> +  %mul7 = fmul float %sub6, %div
> +  %8 = load float, float* %min, align 4
> +  %sub8 = fsub float %8, %7
> +  %mul9 = fmul float %sub8, %div
> +
> +  %add = fadd float %tmax.0, %tmin.0
> +  ret float %add
> +}
> +
> +; Check that we do not hoist loads past a store in any branch of a diamond.
> +; CHECK-LABEL: @noHoistInDiamondWithOneStore2
> +; CHECK: fdiv
> +; CHECK: fcmp
> +; CHECK: br
> +define float @noHoistInDiamondWithOneStore2(float %d, float* %min, float* %max, float* %a) {
> +entry:
> +  %div = fdiv float 1.000000e+00, %d
> +  %cmp = fcmp oge float %div, 0.000000e+00
> +  br i1 %cmp, label %if.then, label %if.else
> +
> +if.then:                                          ; preds = %entry
> +  ; There are no side effects on the if.then branch.
> +  %0 = load float, float* %min, align 4
> +  %1 = load float, float* %a, align 4
> +  %sub = fsub float %0, %1
> +  %mul = fmul float %sub, %div
> +  %2 = load float, float* %max, align 4
> +  %sub1 = fsub float %2, %1
> +  %mul2 = fmul float %sub1, %div
> +  br label %if.end
> +
> +if.else:                                          ; preds = %entry
> +  store float 0.000000e+00, float* @GlobalVar
> +  %3 = load float, float* %max, align 4
> +  %4 = load float, float* %a, align 4
> +  %sub3 = fsub float %3, %4
> +  %mul4 = fmul float %sub3, %div
> +  %5 = load float, float* %min, align 4
> +  %sub5 = fsub float %5, %4
> +  %mul6 = fmul float %sub5, %div
> +  br label %if.end
> +
> +if.end:                                           ; preds = %if.else, %if.then
> +  %tmax.0 = phi float [ %mul2, %if.then ], [ %mul6, %if.else ]
> +  %tmin.0 = phi float [ %mul, %if.then ], [ %mul4, %if.else ]
> +
> +  %6 = load float, float* %max, align 4
> +  %7 = load float, float* %a, align 4
> +  %sub6 = fsub float %6, %7
> +  %mul7 = fmul float %sub6, %div
> +  %8 = load float, float* %min, align 4
> +  %sub8 = fsub float %8, %7
> +  %mul9 = fmul float %sub8, %div
> +
> +  %add = fadd float %tmax.0, %tmin.0
> +  ret float %add
> +}
> +
> +; Check that we do not hoist loads outside a loop containing stores.
> +; CHECK-LABEL: @noHoistInLoopsWithStores
> +; CHECK: fdiv
> +; CHECK: fcmp
> +; CHECK: br
> +define float @noHoistInLoopsWithStores(float %d, float* %min, float* %max, float* %a) {
> +entry:
> +  %div = fdiv float 1.000000e+00, %d
> +  %cmp = fcmp oge float %div, 0.000000e+00
> +  br i1 %cmp, label %do.body, label %if.else
> +
> +do.body:
> +  %0 = load float, float* %min, align 4
> +  %1 = load float, float* %a, align 4
> +
> +  ; It is unsafe to hoist the loads outside the loop because of the store.
> +  store float 0.000000e+00, float* @GlobalVar
> +
> +  %sub = fsub float %0, %1
> +  %mul = fmul float %sub, %div
> +  %2 = load float, float* %max, align 4
> +  %sub1 = fsub float %2, %1
> +  %mul2 = fmul float %sub1, %div
> +  br label %while.cond
> +
> +while.cond:
> +  %cmp1 = fcmp oge float %mul2, 0.000000e+00
> +  br i1 %cmp1, label %if.end, label %do.body
> +
> +if.else:
> +  %3 = load float, float* %max, align 4
> +  %4 = load float, float* %a, align 4
> +  %sub3 = fsub float %3, %4
> +  %mul4 = fmul float %sub3, %div
> +  %5 = load float, float* %min, align 4
> +  %sub5 = fsub float %5, %4
> +  %mul6 = fmul float %sub5, %div
> +  br label %if.end
> +
> +if.end:
> +  %tmax.0 = phi float [ %mul2, %while.cond ], [ %mul6, %if.else ]
> +  %tmin.0 = phi float [ %mul, %while.cond ], [ %mul4, %if.else ]
> +
> +  %add = fadd float %tmax.0, %tmin.0
> +  ret float %add
> +}
> +
> +; Check that we hoist stores: all the instructions from the then branch
> +; should be hoisted.
> +; CHECK-LABEL: @hoistStores
> +; CHECK: zext
> +; CHECK: trunc
> +; CHECK: getelementptr
> +; CHECK: load
> +; CHECK: getelementptr
> +; CHECK: store
> +; CHECK: load
> +; CHECK: load
> +; CHECK: zext
> +; CHECK: add
> +; CHECK: store
> +; CHECK: br
> +; CHECK: if.then
> +; CHECK: br
> +
> +%struct.foo = type { i16* }
> +
> +define void @hoistStores(%struct.foo* %s, i32* %coord, i1 zeroext %delta) {
> +entry:
> +  %frombool = zext i1 %delta to i8
> +  %tobool = trunc i8 %frombool to i1
> +  br i1 %tobool, label %if.then, label %if.else
> +
> +if.then:                                          ; preds = %entry
> +  %p = getelementptr inbounds %struct.foo, %struct.foo* %s, i32 0, i32 0
> +  %0 = load i16*, i16** %p, align 8
> +  %incdec.ptr = getelementptr inbounds i16, i16* %0, i32 1
> +  store i16* %incdec.ptr, i16** %p, align 8
> +  %1 = load i16, i16* %0, align 2
> +  %conv = zext i16 %1 to i32
> +  %2 = load i32, i32* %coord, align 4
> +  %add = add i32 %2, %conv
> +  store i32 %add, i32* %coord, align 4
> +  br label %if.end
> +
> +if.else:                                          ; preds = %entry
> +  %p1 = getelementptr inbounds %struct.foo, %struct.foo* %s, i32 0, i32 0
> +  %3 = load i16*, i16** %p1, align 8
> +  %incdec.ptr2 = getelementptr inbounds i16, i16* %3, i32 1
> +  store i16* %incdec.ptr2, i16** %p1, align 8
> +  %4 = load i16, i16* %3, align 2
> +  %conv3 = zext i16 %4 to i32
> +  %5 = load i32, i32* %coord, align 4
> +  %add4 = add i32 %5, %conv3
> +  store i32 %add4, i32* %coord, align 4
> +  %6 = load i16*, i16** %p1, align 8
> +  %incdec.ptr6 = getelementptr inbounds i16, i16* %6, i32 1
> +  store i16* %incdec.ptr6, i16** %p1, align 8
> +  %7 = load i16, i16* %6, align 2
> +  %conv7 = zext i16 %7 to i32
> +  %shl = shl i32 %conv7, 8
> +  %8 = load i32, i32* %coord, align 4
> +  %add8 = add i32 %8, %shl
> +  store i32 %add8, i32* %coord, align 4
> +  br label %if.end
> +
> +if.end:                                           ; preds = %if.else, %if.then
> +  ret void
> +}
> 
> 
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits



More information about the llvm-commits mailing list