[llvm] r274305 - code hoisting pass based on GVN
Duncan P. N. Exon Smith via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 30 19:00:13 PDT 2016
Reverted in r274320 due to self-hosting failures:
http://lab.llvm.org:8080/green/job/clang-stage1-configure-RA_build/22349/
http://lab.llvm.org:8011/builders/clang-x86_64-linux-selfhost-modules/builds/17232
Note that the blamelist on lab.llvm.org:8011 is incorrect. The previous
build was r274299, but somehow r274305 wasn't included in the blamelist:
http://lab.llvm.org:8011/builders/clang-x86_64-linux-selfhost-modules
> On 2016-Jun-30, at 17:24, Sebastian Pop via llvm-commits <llvm-commits at lists.llvm.org> wrote:
>
> Author: spop
> Date: Thu Jun 30 19:24:31 2016
> New Revision: 274305
>
> URL: http://llvm.org/viewvc/llvm-project?rev=274305&view=rev
> Log:
> code hoisting pass based on GVN
>
> This pass hoists duplicated computations in the program. The primary goal of
> gvn-hoist is to reduce the size of functions before inline heuristics to reduce
> the total cost of function inlining.
>
> Pass written by Sebastian Pop, Aditya Kumar, Xiaoyu Hu, and Brian Rzycki.
> Important algorithmic contributions by Daniel Berlin under the form of reviews.
>
> Differential Revision: http://reviews.llvm.org/D19338
>
> Added:
> llvm/trunk/lib/Transforms/Scalar/GVNHoist.cpp
> llvm/trunk/test/Transforms/GVN/hoist.ll
> Modified:
> llvm/trunk/include/llvm/InitializePasses.h
> llvm/trunk/include/llvm/LinkAllPasses.h
> llvm/trunk/include/llvm/Transforms/Scalar.h
> llvm/trunk/include/llvm/Transforms/Scalar/GVN.h
> llvm/trunk/lib/Passes/PassRegistry.def
> llvm/trunk/lib/Transforms/IPO/PassManagerBuilder.cpp
> llvm/trunk/lib/Transforms/Scalar/CMakeLists.txt
> llvm/trunk/lib/Transforms/Scalar/Scalar.cpp
>
> Modified: llvm/trunk/include/llvm/InitializePasses.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/InitializePasses.h?rev=274305&r1=274304&r2=274305&view=diff
> ==============================================================================
> --- llvm/trunk/include/llvm/InitializePasses.h (original)
> +++ llvm/trunk/include/llvm/InitializePasses.h Thu Jun 30 19:24:31 2016
> @@ -119,6 +119,7 @@ void initializeEarlyIfConverterPass(Pass
> void initializeEdgeBundlesPass(PassRegistry&);
> void initializeEfficiencySanitizerPass(PassRegistry&);
> void initializeEliminateAvailableExternallyLegacyPassPass(PassRegistry &);
> +void initializeGVNHoistLegacyPassPass(PassRegistry &);
> void initializeExpandISelPseudosPass(PassRegistry&);
> void initializeExpandPostRAPass(PassRegistry&);
> void initializeExternalAAWrapperPassPass(PassRegistry&);
>
> Modified: llvm/trunk/include/llvm/LinkAllPasses.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/LinkAllPasses.h?rev=274305&r1=274304&r2=274305&view=diff
> ==============================================================================
> --- llvm/trunk/include/llvm/LinkAllPasses.h (original)
> +++ llvm/trunk/include/llvm/LinkAllPasses.h Thu Jun 30 19:24:31 2016
> @@ -158,6 +158,7 @@ namespace {
> (void) llvm::createConstantHoistingPass();
> (void) llvm::createCodeGenPreparePass();
> (void) llvm::createEarlyCSEPass();
> + (void) llvm::createGVNHoistPass();
> (void) llvm::createMergedLoadStoreMotionPass();
> (void) llvm::createGVNPass();
> (void) llvm::createMemCpyOptPass();
>
> Modified: llvm/trunk/include/llvm/Transforms/Scalar.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Transforms/Scalar.h?rev=274305&r1=274304&r2=274305&view=diff
> ==============================================================================
> --- llvm/trunk/include/llvm/Transforms/Scalar.h (original)
> +++ llvm/trunk/include/llvm/Transforms/Scalar.h Thu Jun 30 19:24:31 2016
> @@ -326,6 +326,13 @@ FunctionPass *createEarlyCSEPass();
>
> //===----------------------------------------------------------------------===//
> //
> +// GVNHoist - This pass performs a simple and fast GVN pass over the dominator
> +// tree to hoist common expressions from sibling branches.
> +//
> +FunctionPass *createGVNHoistPass();
> +
> +//===----------------------------------------------------------------------===//
> +//
> // MergedLoadStoreMotion - This pass merges loads and stores in diamonds. Loads
> // are hoisted into the header, while stores sink into the footer.
> //
>
> Modified: llvm/trunk/include/llvm/Transforms/Scalar/GVN.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Transforms/Scalar/GVN.h?rev=274305&r1=274304&r2=274305&view=diff
> ==============================================================================
> --- llvm/trunk/include/llvm/Transforms/Scalar/GVN.h (original)
> +++ llvm/trunk/include/llvm/Transforms/Scalar/GVN.h Thu Jun 30 19:24:31 2016
> @@ -58,11 +58,7 @@ public:
> AliasAnalysis *getAliasAnalysis() const { return VN.getAliasAnalysis(); }
> MemoryDependenceResults &getMemDep() const { return *MD; }
>
> -private:
> - friend class gvn::GVNLegacyPass;
> -
> struct Expression;
> - friend struct DenseMapInfo<Expression>;
>
> /// This class holds the mapping between values and value numbers. It is used
> /// as an efficient mechanism to determine the expression-wise equivalence of
> @@ -104,6 +100,10 @@ private:
> void verifyRemoved(const Value *) const;
> };
>
> +private:
> + friend class gvn::GVNLegacyPass;
> + friend struct DenseMapInfo<Expression>;
> +
> MemoryDependenceResults *MD;
> DominatorTree *DT;
> const TargetLibraryInfo *TLI;
> @@ -228,6 +228,13 @@ private:
> /// loads are eliminated by the pass.
> FunctionPass *createGVNPass(bool NoLoads = false);
>
> +/// \brief A simple and fast domtree-based GVN pass to hoist common expressions
> +/// from sibling branches.
> +struct GVNHoistPass : PassInfoMixin<GVNHoistPass> {
> + /// \brief Run the pass over the function.
> + PreservedAnalyses run(Function &F, AnalysisManager<Function> &AM);
> +};
> +
> }
>
> #endif
>
> Modified: llvm/trunk/lib/Passes/PassRegistry.def
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Passes/PassRegistry.def?rev=274305&r1=274304&r2=274305&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Passes/PassRegistry.def (original)
> +++ llvm/trunk/lib/Passes/PassRegistry.def Thu Jun 30 19:24:31 2016
> @@ -128,6 +128,7 @@ FUNCTION_PASS("bdce", BDCEPass())
> FUNCTION_PASS("dce", DCEPass())
> FUNCTION_PASS("dse", DSEPass())
> FUNCTION_PASS("early-cse", EarlyCSEPass())
> +FUNCTION_PASS("gvn-hoist", GVNHoistPass())
> FUNCTION_PASS("instcombine", InstCombinePass())
> FUNCTION_PASS("invalidate<all>", InvalidateAllAnalysesPass())
> FUNCTION_PASS("float2int", Float2IntPass())
>
> Modified: llvm/trunk/lib/Transforms/IPO/PassManagerBuilder.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/IPO/PassManagerBuilder.cpp?rev=274305&r1=274304&r2=274305&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Transforms/IPO/PassManagerBuilder.cpp (original)
> +++ llvm/trunk/lib/Transforms/IPO/PassManagerBuilder.cpp Thu Jun 30 19:24:31 2016
> @@ -199,6 +199,7 @@ void PassManagerBuilder::populateFunctio
> FPM.add(createCFGSimplificationPass());
> FPM.add(createSROAPass());
> FPM.add(createEarlyCSEPass());
> + FPM.add(createGVNHoistPass());
> FPM.add(createLowerExpectIntrinsicPass());
> }
>
>
> Modified: llvm/trunk/lib/Transforms/Scalar/CMakeLists.txt
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/CMakeLists.txt?rev=274305&r1=274304&r2=274305&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Transforms/Scalar/CMakeLists.txt (original)
> +++ llvm/trunk/lib/Transforms/Scalar/CMakeLists.txt Thu Jun 30 19:24:31 2016
> @@ -12,6 +12,7 @@ add_llvm_library(LLVMScalarOpts
> Float2Int.cpp
> GuardWidening.cpp
> GVN.cpp
> + GVNHoist.cpp
> InductiveRangeCheckElimination.cpp
> IndVarSimplify.cpp
> JumpThreading.cpp
>
> Added: llvm/trunk/lib/Transforms/Scalar/GVNHoist.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/GVNHoist.cpp?rev=274305&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Transforms/Scalar/GVNHoist.cpp (added)
> +++ llvm/trunk/lib/Transforms/Scalar/GVNHoist.cpp Thu Jun 30 19:24:31 2016
> @@ -0,0 +1,740 @@
> +//===- GVNHoist.cpp - Hoist scalar and load expressions -------------------===//
> +//
> +// The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +//
> +// This pass hoists expressions from branches to a common dominator. It uses
> +// GVN (global value numbering) to discover expressions computing the same
> +// values. The primary goal is to reduce the code size, and in some
> +// cases reduce critical path (by exposing more ILP).
> +// Hoisting may affect the performance in some cases. To mitigate that, hoisting
> +// is disabled in the following cases.
> +// 1. Scalars across calls.
> +// 2. geps when corresponding load/store cannot be hoisted.
> +//===----------------------------------------------------------------------===//
> +
> +#include "llvm/ADT/SmallPtrSet.h"
> +#include "llvm/ADT/Statistic.h"
> +#include "llvm/ADT/DenseMap.h"
> +#include "llvm/Analysis/ValueTracking.h"
> +#include "llvm/Transforms/Scalar.h"
> +#include "llvm/Transforms/Scalar/GVN.h"
> +#include "llvm/Transforms/Utils/MemorySSA.h"
> +#include <functional>
> +#include <unordered_map>
> +#include <vector>
> +
> +using namespace llvm;
> +
> +#define DEBUG_TYPE "gvn-hoist"
> +
> +STATISTIC(NumHoisted, "Number of instructions hoisted");
> +STATISTIC(NumRemoved, "Number of instructions removed");
> +STATISTIC(NumLoadsHoisted, "Number of loads hoisted");
> +STATISTIC(NumLoadsRemoved, "Number of loads removed");
> +STATISTIC(NumStoresHoisted, "Number of stores hoisted");
> +STATISTIC(NumStoresRemoved, "Number of stores removed");
> +STATISTIC(NumCallsHoisted, "Number of calls hoisted");
> +STATISTIC(NumCallsRemoved, "Number of calls removed");
> +
> +static cl::opt<int>
> + MaxHoistedThreshold("gvn-max-hoisted", cl::Hidden, cl::init(-1),
> + cl::desc("Max number of instructions to hoist "
> + "(default unlimited = -1)"));
> +static cl::opt<int> MaxNumberOfBBSInPath(
> + "gvn-hoist-max-bbs", cl::Hidden, cl::init(4),
> + cl::desc("Max number of basic blocks on the path between "
> + "hoisting locations (default = 4, unlimited = -1)"));
> +
> +static int HoistedCtr = 0;
> +
> +namespace {
> +
> +// Provides a sorting function based on the execution order of two instructions.
> +struct SortByDFSIn {
> +private:
> + DenseMap<const BasicBlock *, unsigned> &DFSNumber;
> +
> +public:
> + SortByDFSIn(DenseMap<const BasicBlock *, unsigned> &D) : DFSNumber(D) {}
> +
> + // Returns true when A executes before B.
> + bool operator()(const Instruction *A, const Instruction *B) const {
> + assert(A != B);
> + const BasicBlock *BA = A->getParent();
> + const BasicBlock *BB = B->getParent();
> + unsigned NA = DFSNumber[BA];
> + unsigned NB = DFSNumber[BB];
> + if (NA < NB)
> + return true;
> + if (NA == NB) {
> + // Sort them in the order they occur in the same basic block.
> + BasicBlock::const_iterator AI(A), BI(B);
> + return std::distance(AI, BI) < 0;
> + }
> + return false;
> + }
> +};
> +
> +// A map from a VN (value number) to all the instructions with that VN.
> +typedef DenseMap<unsigned, SmallVector<Instruction *, 4>> VNtoInsns;
> +
> +// Records all scalar instructions candidate for code hoisting.
> +class InsnInfo {
> + VNtoInsns VNtoScalars;
> +
> +public:
> + // Inserts I and its value number in VNtoScalars.
> + void insert(Instruction *I, GVN::ValueTable &VN) {
> + // Scalar instruction.
> + unsigned V = VN.lookupOrAdd(I);
> + VNtoScalars[V].push_back(I);
> + }
> +
> + const VNtoInsns &getVNTable() const { return VNtoScalars; }
> +};
> +
> +// Records all load instructions candidate for code hoisting.
> +class LoadInfo {
> + VNtoInsns VNtoLoads;
> +
> +public:
> + // Insert Load and the value number of its memory address in VNtoLoads.
> + void insert(LoadInst *Load, GVN::ValueTable &VN) {
> + if (Load->isSimple()) {
> + unsigned V = VN.lookupOrAdd(Load->getPointerOperand());
> + VNtoLoads[V].push_back(Load);
> + }
> + }
> +
> + const VNtoInsns &getVNTable() const { return VNtoLoads; }
> +};
> +
> +// Records all store instructions candidate for code hoisting.
> +class StoreInfo {
> + VNtoInsns VNtoStores;
> +
> +public:
> + // Insert the Store and a hash number of the store address and the stored
> + // value in VNtoStores.
> + void insert(StoreInst *Store, GVN::ValueTable &VN) {
> + if (!Store->isSimple())
> + return;
> + // Hash the store address and the stored value.
> + Value *Ptr = Store->getPointerOperand();
> + Value *Val = Store->getValueOperand();
> + VNtoStores[hash_combine(VN.lookupOrAdd(Ptr), VN.lookupOrAdd(Val))]
> + .push_back(Store);
> + }
> +
> + const VNtoInsns &getVNTable() const { return VNtoStores; }
> +};
> +
> +// Records all call instructions candidate for code hoisting.
> +class CallInfo {
> + VNtoInsns VNtoCallsScalars;
> + VNtoInsns VNtoCallsLoads;
> + VNtoInsns VNtoCallsStores;
> +
> +public:
> + // Insert Call and its value numbering in one of the VNtoCalls* containers.
> + void insert(CallInst *Call, GVN::ValueTable &VN) {
> + // A call that doesNotAccessMemory is handled as a Scalar,
> + // onlyReadsMemory will be handled as a Load instruction,
> + // all other calls will be handled as stores.
> + unsigned V = VN.lookupOrAdd(Call);
> +
> + if (Call->doesNotAccessMemory())
> + VNtoCallsScalars[V].push_back(Call);
> + else if (Call->onlyReadsMemory())
> + VNtoCallsLoads[V].push_back(Call);
> + else
> + VNtoCallsStores[V].push_back(Call);
> + }
> +
> + const VNtoInsns &getScalarVNTable() const { return VNtoCallsScalars; }
> +
> + const VNtoInsns &getLoadVNTable() const { return VNtoCallsLoads; }
> +
> + const VNtoInsns &getStoreVNTable() const { return VNtoCallsStores; }
> +};
> +
> +typedef DenseMap<const BasicBlock *, bool> BBSideEffectsSet;
> +typedef SmallVector<Instruction *, 4> SmallVecInsn;
> +typedef SmallVectorImpl<Instruction *> SmallVecImplInsn;
> +
> +// This pass hoists common computations across branches sharing common
> +// dominator. The primary goal is to reduce the code size, and in some
> +// cases reduce critical path (by exposing more ILP).
> +class GVNHoistLegacyPassImpl {
> +public:
> + GVN::ValueTable VN;
> + DominatorTree *DT;
> + AliasAnalysis *AA;
> + MemoryDependenceResults *MD;
> + DenseMap<const BasicBlock *, unsigned> DFSNumber;
> + BBSideEffectsSet BBSideEffects;
> + MemorySSA *MSSA;
> + enum InsKind { Unknown, Scalar, Load, Store };
> +
> + GVNHoistLegacyPassImpl(DominatorTree *Dt, AliasAnalysis *Aa,
> + MemoryDependenceResults *Md)
> + : DT(Dt), AA(Aa), MD(Md) {}
> +
> + // Return true when there are exception handling in BB.
> + bool hasEH(const BasicBlock *BB) {
> + auto It = BBSideEffects.find(BB);
> + if (It != BBSideEffects.end())
> + return It->second;
> +
> + if (BB->isEHPad() || BB->hasAddressTaken()) {
> + BBSideEffects[BB] = true;
> + return true;
> + }
> +
> + if (BB->getTerminator()->mayThrow()) {
> + BBSideEffects[BB] = true;
> + return true;
> + }
> +
> + BBSideEffects[BB] = false;
> + return false;
> + }
> +
> + // Return true when all paths from A to the end of the function pass through
> + // either B or C.
> + bool hoistingFromAllPaths(const BasicBlock *A, const BasicBlock *B,
> + const BasicBlock *C) {
> + // We fully copy the WL in order to be able to remove items from it.
> + SmallPtrSet<const BasicBlock *, 2> WL;
> + WL.insert(B);
> + WL.insert(C);
> +
> + for (auto It = df_begin(A), E = df_end(A); It != E;) {
> + // There exists a path from A to the exit of the function if we are still
> + // iterating in DF traversal and we removed all instructions from the work
> + // list.
> + if (WL.empty())
> + return false;
> +
> + const BasicBlock *BB = *It;
> + if (WL.erase(BB)) {
> + // Stop DFS traversal when BB is in the work list.
> + It.skipChildren();
> + continue;
> + }
> +
> + // Check for end of function, calls that do not return, etc.
> + if (!isGuaranteedToTransferExecutionToSuccessor(BB->getTerminator()))
> + return false;
> +
> + // Increment DFS traversal when not skipping children.
> + ++It;
> + }
> +
> + return true;
> + }
> +
> + // Each element of a hoisting list contains the basic block where to hoist and
> + // a list of instructions to be hoisted.
> + typedef std::pair<BasicBlock *, SmallVecInsn> HoistingPointInfo;
> + typedef SmallVector<HoistingPointInfo, 4> HoistingPointList;
> +
> + // Return true when there are users of A in one of the BBs of Paths.
> + bool hasMemoryUse(MemoryAccess *A, const BasicBlock *PBB) {
> + Value::user_iterator UI = A->user_begin();
> + Value::user_iterator UE = A->user_end();
> + const BasicBlock *BBA = A->getBlock();
> + for (; UI != UE; ++UI)
> + if (MemoryAccess *UM = dyn_cast<MemoryAccess>(*UI)) {
> + if (PBB == BBA)
> + if (MSSA->locallyDominates(UM, A))
> + return true;
> + if (PBB == UM->getBlock())
> + return true;
> + }
> + return false;
> + }
> +
> + // Check whether it is possible to hoist in between NewHoistPt and BBInsn.
> + bool safeToHoist(const BasicBlock *NewHoistPt, const BasicBlock *BBInsn,
> + InsKind K, int &NBBsOnAllPaths, MemoryAccess *MemdefInsn,
> + BasicBlock *BBMemdefInsn, MemoryAccess *MemdefFirst,
> + BasicBlock *BBMemdefFirst) {
> + assert(DT->dominates(NewHoistPt, BBInsn) && "Invalid path");
> +
> + // Record in Paths all basic blocks reachable in depth-first iteration on
> + // the inverse CFG from BBInsn to NewHoistPt. These blocks are all the
> + // blocks that may be executed between the execution of NewHoistPt and
> + // BBInsn. Hoisting an expression from BBInsn into NewHoistPt has to be safe
> + // on all execution paths.
> + for (auto I = idf_begin(BBInsn), E = idf_end(BBInsn); I != E;) {
> + if (*I == NewHoistPt) {
> + // Stop traversal when reaching NewHoistPt.
> + I.skipChildren();
> + continue;
> + }
> +
> + // The safety checks for BBInsn will be handled separately.
> + if (*I != BBInsn) {
> + // Stop gathering blocks when it is not possible to hoist.
> + if (hasEH(*I))
> + return false;
> +
> + // Check that we do not move a store past loads.
> + if (K == InsKind::Store) {
> + if (DT->dominates(BBMemdefInsn, NewHoistPt))
> + if (hasMemoryUse(MemdefInsn, *I))
> + return false;
> +
> + if (DT->dominates(BBMemdefFirst, NewHoistPt))
> + if (hasMemoryUse(MemdefFirst, *I))
> + return false;
> + }
> + }
> + ++NBBsOnAllPaths;
> + ++I;
> + }
> +
> + // Check whether there are too many blocks on the hoisting path.
> + if (MaxNumberOfBBSInPath != -1 && NBBsOnAllPaths >= MaxNumberOfBBSInPath)
> + return false;
> +
> + return true;
> + }
> +
> + // Return true when it is safe to hoist an instruction Insn to NewHoistPt and
> + // move the insertion point from HoistPt to NewHoistPt.
> + bool safeToHoist(const BasicBlock *NewHoistPt, const BasicBlock *HoistPt,
> + const Instruction *Insn, const Instruction *First, InsKind K,
> + int &NBBsOnAllPaths) {
> + if (hasEH(HoistPt))
> + return false;
> +
> + const BasicBlock *BBInsn = Insn->getParent();
> + // When HoistPt already contains an instruction to be hoisted, the
> + // expression is needed on all paths.
> +
> + // Check that the hoisted expression is needed on all paths: it is unsafe
> + // to hoist loads to a place where there may be a path not loading from
> + // the same address: for instance there may be a branch on which the
> + // address of the load may not be initialized. FIXME: at -Oz we may want
> + // to hoist scalars to a place where they are partially needed.
> + if (BBInsn != NewHoistPt &&
> + !hoistingFromAllPaths(NewHoistPt, HoistPt, BBInsn))
> + return false;
> +
> + MemoryAccess *MemdefInsn = nullptr;
> + MemoryAccess *MemdefFirst = nullptr;
> + BasicBlock *BBMemdefInsn = nullptr;
> + BasicBlock *BBMemdefFirst = nullptr;
> +
> + if (K != InsKind::Scalar) {
> + // For loads and stores, we check for dependences on the Memory SSA.
> + MemdefInsn = cast<MemoryUseOrDef>(MSSA->getMemoryAccess(Insn))
> + ->getDefiningAccess();
> + BBMemdefInsn = MemdefInsn->getBlock();
> +
> + if (DT->properlyDominates(NewHoistPt, BBMemdefInsn))
> + // Cannot move Insn past BBMemdefInsn to NewHoistPt.
> + return false;
> +
> + MemdefFirst = cast<MemoryUseOrDef>(MSSA->getMemoryAccess(First))
> + ->getDefiningAccess();
> + BBMemdefFirst = MemdefFirst->getBlock();
> +
> + if (DT->properlyDominates(NewHoistPt, BBMemdefFirst))
> + // Cannot move First past BBMemdefFirst to NewHoistPt.
> + return false;
> + }
> +
> + // Check for unsafe hoistings due to side effects.
> + if (!safeToHoist(NewHoistPt, HoistPt, K, NBBsOnAllPaths, MemdefInsn,
> + BBMemdefInsn, MemdefFirst, BBMemdefFirst) ||
> + !safeToHoist(NewHoistPt, BBInsn, K, NBBsOnAllPaths, MemdefInsn,
> + BBMemdefInsn, MemdefFirst, BBMemdefFirst))
> + return false;
> +
> + // Safe to hoist scalars.
> + if (K == InsKind::Scalar)
> + return true;
> +
> + if (DT->properlyDominates(BBMemdefInsn, NewHoistPt) &&
> + DT->properlyDominates(BBMemdefFirst, NewHoistPt))
> + return true;
> +
> + const BasicBlock *BBFirst = First->getParent();
> + if (BBInsn == BBFirst)
> + return false;
> +
> + assert(BBMemdefInsn == NewHoistPt || BBMemdefFirst == NewHoistPt);
> +
> + if (BBInsn != NewHoistPt && BBFirst != NewHoistPt)
> + return true;
> +
> + if (BBInsn == NewHoistPt) {
> + if (DT->properlyDominates(BBMemdefFirst, NewHoistPt))
> + return true;
> + assert(BBInsn == BBMemdefFirst);
> + if (MSSA->locallyDominates(MSSA->getMemoryAccess(Insn), MemdefFirst))
> + return false;
> + return true;
> + }
> +
> + if (BBFirst == NewHoistPt) {
> + if (DT->properlyDominates(BBMemdefInsn, NewHoistPt))
> + return true;
> + assert(BBFirst == BBMemdefInsn);
> + if (MSSA->locallyDominates(MSSA->getMemoryAccess(First), MemdefInsn))
> + return false;
> + return true;
> + }
> +
> + // No side effects: it is safe to hoist.
> + return true;
> + }
> +
> + // Partition InstructionsToHoist into a set of candidates which can share a
> + // common hoisting point. The partitions are collected in HPL. IsScalar is
> + // true when the instructions in InstructionsToHoist are scalars. IsLoad is
> + // true when the InstructionsToHoist are loads, false when they are stores.
> + void partitionCandidates(SmallVecImplInsn &InstructionsToHoist,
> + HoistingPointList &HPL, InsKind K) {
> + // No need to sort for two instructions.
> + if (InstructionsToHoist.size() > 2) {
> + SortByDFSIn Pred(DFSNumber);
> + std::sort(InstructionsToHoist.begin(), InstructionsToHoist.end(), Pred);
> + }
> +
> + // Create a work list of all the BB of the Insns to be hoisted.
> + SmallPtrSet<BasicBlock *, 4> WL;
> + SmallVecImplInsn::iterator II = InstructionsToHoist.begin();
> + SmallVecImplInsn::iterator Start = II;
> + BasicBlock *HoistPt = (*II)->getParent();
> + WL.insert((*II)->getParent());
> + int NBBsOnAllPaths = 0;
> +
> + for (++II; II != InstructionsToHoist.end(); ++II) {
> + Instruction *Insn = *II;
> + BasicBlock *BB = Insn->getParent();
> + BasicBlock *NewHoistPt = DT->findNearestCommonDominator(HoistPt, BB);
> + WL.insert(BB);
> + if (safeToHoist(NewHoistPt, HoistPt, Insn, *Start, K, NBBsOnAllPaths)) {
> + // Extend HoistPt to NewHoistPt.
> + HoistPt = NewHoistPt;
> + continue;
> + }
> + // Not safe to hoist: save the previous work list and start over from BB.
> + if (std::distance(Start, II) > 1)
> + HPL.push_back(std::make_pair(HoistPt, SmallVecInsn(Start, II)));
> + else
> + WL.clear();
> +
> + // We start over to compute HoistPt from BB.
> + Start = II;
> + HoistPt = BB;
> + NBBsOnAllPaths = 0;
> + }
> +
> + // Save the last partition.
> + if (std::distance(Start, II) > 1)
> + HPL.push_back(std::make_pair(HoistPt, SmallVecInsn(Start, II)));
> + }
> +
> + // Initialize HPL from Map.
> + void computeInsertionPoints(const VNtoInsns &Map, HoistingPointList &HPL,
> + InsKind K) {
> + for (VNtoInsns::const_iterator It = Map.begin(); It != Map.end(); ++It) {
> + if (MaxHoistedThreshold != -1 && ++HoistedCtr > MaxHoistedThreshold)
> + return;
> +
> + const SmallVecInsn &V = It->second;
> + if (V.size() < 2)
> + continue;
> +
> + // Compute the insertion point and the list of expressions to be hoisted.
> + SmallVecInsn InstructionsToHoist;
> + for (auto I : V)
> + if (!hasEH(I->getParent()))
> + InstructionsToHoist.push_back(I);
> +
> + if (InstructionsToHoist.size())
> + partitionCandidates(InstructionsToHoist, HPL, K);
> + }
> + }
> +
> + // Return true when all operands of Instr are available at insertion point
> + // HoistPt. When limiting the number of hoisted expressions, one could hoist
> + // a load without hoisting its access function. So before hoisting any
> + // expression, make sure that all its operands are available at insert point.
> + bool allOperandsAvailable(const Instruction *I,
> + const BasicBlock *HoistPt) const {
> + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
> + const Value *Op = I->getOperand(i);
> + const Instruction *Inst = dyn_cast<Instruction>(Op);
> + if (Inst && !DT->dominates(Inst->getParent(), HoistPt))
> + return false;
> + }
> +
> + return true;
> + }
> +
> + Instruction *firstOfTwo(Instruction *I, Instruction *J) const {
> + for (Instruction &I1 : *I->getParent())
> + if (&I1 == I || &I1 == J)
> + return &I1;
> + llvm_unreachable("Both I and J must be from same BB");
> + }
> +
> + // Replace the use of From with To in Insn.
> + void replaceUseWith(Instruction *Insn, Value *From, Value *To) const {
> + for (Value::use_iterator UI = From->use_begin(), UE = From->use_end();
> + UI != UE;) {
> + Use &U = *UI++;
> + if (U.getUser() == Insn) {
> + U.set(To);
> + return;
> + }
> + }
> + llvm_unreachable("should replace exactly once");
> + }
> +
> + bool makeOperandsAvailable(Instruction *Repl, BasicBlock *HoistPt) const {
> + // Check whether the GEP of a ld/st can be synthesized at HoistPt.
> + Instruction *Gep = nullptr;
> + Instruction *Val = nullptr;
> + if (LoadInst *Ld = dyn_cast<LoadInst>(Repl))
> + Gep = dyn_cast<Instruction>(Ld->getPointerOperand());
> + if (StoreInst *St = dyn_cast<StoreInst>(Repl)) {
> + Gep = dyn_cast<Instruction>(St->getPointerOperand());
> + Val = dyn_cast<Instruction>(St->getValueOperand());
> + }
> +
> + if (!Gep || !isa<GetElementPtrInst>(Gep))
> + return false;
> +
> + // Check whether we can compute the Gep at HoistPt.
> + if (!allOperandsAvailable(Gep, HoistPt))
> + return false;
> +
> + // Also check that the stored value is available.
> + if (Val && !allOperandsAvailable(Val, HoistPt))
> + return false;
> +
> + // Copy the gep before moving the ld/st.
> + Instruction *ClonedGep = Gep->clone();
> + ClonedGep->insertBefore(HoistPt->getTerminator());
> + replaceUseWith(Repl, Gep, ClonedGep);
> +
> + // Also copy Val when it is a gep: geps are not hoisted by default.
> + if (Val && isa<GetElementPtrInst>(Val)) {
> + Instruction *ClonedVal = Val->clone();
> + ClonedVal->insertBefore(HoistPt->getTerminator());
> + replaceUseWith(Repl, Val, ClonedVal);
> + }
> +
> + return true;
> + }
> +
> + std::pair<unsigned, unsigned> hoist(HoistingPointList &HPL) {
> + unsigned NI = 0, NL = 0, NS = 0, NC = 0, NR = 0;
> + for (const HoistingPointInfo &HP : HPL) {
> + // Find out whether we already have one of the instructions in HoistPt,
> + // in which case we do not have to move it.
> + BasicBlock *HoistPt = HP.first;
> + const SmallVecInsn &InstructionsToHoist = HP.second;
> + Instruction *Repl = nullptr;
> + for (Instruction *I : InstructionsToHoist)
> + if (I->getParent() == HoistPt) {
> + // If there are two instructions in HoistPt to be hoisted in place:
> + // update Repl to be the first one, such that we can rename the uses
> + // of the second based on the first.
> + Repl = !Repl ? I : firstOfTwo(Repl, I);
> + }
> +
> + if (Repl) {
> + // Repl is already in HoistPt: it remains in place.
> + assert(allOperandsAvailable(Repl, HoistPt) &&
> + "instruction depends on operands that are not available");
> + } else {
> + // When we do not find Repl in HoistPt, select the first in the list
> + // and move it to HoistPt.
> + Repl = InstructionsToHoist.front();
> +
> + // We can move Repl in HoistPt only when all operands are available.
> + // The order in which hoistings are done may influence the availability
> + // of operands.
> + if (!allOperandsAvailable(Repl, HoistPt) &&
> + !makeOperandsAvailable(Repl, HoistPt))
> + continue;
> + Repl->moveBefore(HoistPt->getTerminator());
> + }
> +
> + if (isa<LoadInst>(Repl))
> + ++NL;
> + else if (isa<StoreInst>(Repl))
> + ++NS;
> + else if (isa<CallInst>(Repl))
> + ++NC;
> + else // Scalar
> + ++NI;
> +
> + // Remove and rename all other instructions.
> + for (Instruction *I : InstructionsToHoist)
> + if (I != Repl) {
> + ++NR;
> + if (isa<LoadInst>(Repl))
> + ++NumLoadsRemoved;
> + else if (isa<StoreInst>(Repl))
> + ++NumStoresRemoved;
> + else if (isa<CallInst>(Repl))
> + ++NumCallsRemoved;
> + I->replaceAllUsesWith(Repl);
> + I->eraseFromParent();
> + }
> + }
> +
> + NumHoisted += NL + NS + NC + NI;
> + NumRemoved += NR;
> + NumLoadsHoisted += NL;
> + NumStoresHoisted += NS;
> + NumCallsHoisted += NC;
> + return {NI, NL + NC + NS};
> + }
> +
> + // Hoist all expressions. Returns Number of scalars hoisted
> + // and number of non-scalars hoisted.
> + std::pair<unsigned, unsigned> hoistExpressions(Function &F) {
> + InsnInfo II;
> + LoadInfo LI;
> + StoreInfo SI;
> + CallInfo CI;
> + const bool OptForMinSize = F.optForMinSize();
> + for (BasicBlock *BB : depth_first(&F.getEntryBlock())) {
> + for (Instruction &I1 : *BB) {
> + if (LoadInst *Load = dyn_cast<LoadInst>(&I1))
> + LI.insert(Load, VN);
> + else if (StoreInst *Store = dyn_cast<StoreInst>(&I1))
> + SI.insert(Store, VN);
> + else if (CallInst *Call = dyn_cast<CallInst>(&I1)) {
> + if (IntrinsicInst *Intr = dyn_cast<IntrinsicInst>(Call)) {
> + if (isa<DbgInfoIntrinsic>(Intr) ||
> + Intr->getIntrinsicID() == Intrinsic::assume)
> + continue;
> + }
> + if (Call->mayHaveSideEffects()) {
> + if (!OptForMinSize)
> + break;
> + // We may continue hoisting across calls which write to memory.
> + if (Call->mayThrow())
> + break;
> + }
> + CI.insert(Call, VN);
> + } else if (OptForMinSize || !isa<GetElementPtrInst>(&I1))
> + // Do not hoist scalars past calls that may write to memory because
> + // that could result in spills later. geps are handled separately.
> + // TODO: We can relax this for targets like AArch64 as they have more
> + // registers than X86.
> + II.insert(&I1, VN);
> + }
> + }
> +
> + HoistingPointList HPL;
> + computeInsertionPoints(II.getVNTable(), HPL, InsKind::Scalar);
> + computeInsertionPoints(LI.getVNTable(), HPL, InsKind::Load);
> + computeInsertionPoints(SI.getVNTable(), HPL, InsKind::Store);
> + computeInsertionPoints(CI.getScalarVNTable(), HPL, InsKind::Scalar);
> + computeInsertionPoints(CI.getLoadVNTable(), HPL, InsKind::Load);
> + computeInsertionPoints(CI.getStoreVNTable(), HPL, InsKind::Store);
> + return hoist(HPL);
> + }
> +
> + bool run(Function &F) {
> + VN.setDomTree(DT);
> + VN.setAliasAnalysis(AA);
> + VN.setMemDep(MD);
> + bool Res = false;
> +
> + unsigned I = 0;
> + for (const BasicBlock *BB : depth_first(&F.getEntryBlock()))
> + DFSNumber.insert(std::make_pair(BB, ++I));
> +
> + // FIXME: use lazy evaluation of VN to avoid the fix-point computation.
> + while (1) {
> + // FIXME: only compute MemorySSA once. We need to update the analysis in
> + // the same time as transforming the code.
> + MemorySSA M(F, AA, DT);
> + MSSA = &M;
> +
> + auto HoistStat = hoistExpressions(F);
> + if (HoistStat.first + HoistStat.second == 0) {
> + return Res;
> + }
> + if (HoistStat.second > 0) {
> + // To address a limitation of the current GVN, we need to rerun the
> + // hoisting after we hoisted loads in order to be able to hoist all
> + // scalars dependent on the hoisted loads. Same for stores.
> + VN.clear();
> + }
> + Res = true;
> + }
> +
> + return Res;
> + }
> +};
> +
> +class GVNHoistLegacyPass : public FunctionPass {
> +public:
> + static char ID;
> +
> + GVNHoistLegacyPass() : FunctionPass(ID) {
> + initializeGVNHoistLegacyPassPass(*PassRegistry::getPassRegistry());
> + }
> +
> + bool runOnFunction(Function &F) override {
> + auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
> + auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
> + auto &MD = getAnalysis<MemoryDependenceWrapperPass>().getMemDep();
> +
> + GVNHoistLegacyPassImpl G(&DT, &AA, &MD);
> + return G.run(F);
> + }
> +
> + void getAnalysisUsage(AnalysisUsage &AU) const override {
> + AU.addRequired<DominatorTreeWrapperPass>();
> + AU.addRequired<AAResultsWrapperPass>();
> + AU.addRequired<MemoryDependenceWrapperPass>();
> + AU.addPreserved<DominatorTreeWrapperPass>();
> + }
> +};
> +} // namespace
> +
> +PreservedAnalyses GVNHoistPass::run(Function &F,
> + AnalysisManager<Function> &AM) {
> + DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F);
> + AliasAnalysis &AA = AM.getResult<AAManager>(F);
> + MemoryDependenceResults &MD = AM.getResult<MemoryDependenceAnalysis>(F);
> +
> + GVNHoistLegacyPassImpl G(&DT, &AA, &MD);
> + if (!G.run(F))
> + return PreservedAnalyses::all();
> +
> + PreservedAnalyses PA;
> + PA.preserve<DominatorTreeAnalysis>();
> + return PA;
> +}
> +
> +char GVNHoistLegacyPass::ID = 0;
> +INITIALIZE_PASS_BEGIN(GVNHoistLegacyPass, "gvn-hoist",
> + "Early GVN Hoisting of Expressions", false, false)
> +INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass)
> +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
> +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
> +INITIALIZE_PASS_END(GVNHoistLegacyPass, "gvn-hoist",
> + "Early GVN Hoisting of Expressions", false, false)
> +
> +FunctionPass *llvm::createGVNHoistPass() { return new GVNHoistLegacyPass(); }
>
> Modified: llvm/trunk/lib/Transforms/Scalar/Scalar.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/Scalar.cpp?rev=274305&r1=274304&r2=274305&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Transforms/Scalar/Scalar.cpp (original)
> +++ llvm/trunk/lib/Transforms/Scalar/Scalar.cpp Thu Jun 30 19:24:31 2016
> @@ -44,6 +44,7 @@ void llvm::initializeScalarOpts(PassRegi
> initializeGuardWideningLegacyPassPass(Registry);
> initializeGVNLegacyPassPass(Registry);
> initializeEarlyCSELegacyPassPass(Registry);
> + initializeGVNHoistLegacyPassPass(Registry);
> initializeFlattenCFGPassPass(Registry);
> initializeInductiveRangeCheckEliminationPass(Registry);
> initializeIndVarSimplifyLegacyPassPass(Registry);
> @@ -236,6 +237,10 @@ void LLVMAddEarlyCSEPass(LLVMPassManager
> unwrap(PM)->add(createEarlyCSEPass());
> }
>
> +void LLVMAddGVNHoistLegacyPass(LLVMPassManagerRef PM) {
> + unwrap(PM)->add(createGVNHoistPass());
> +}
> +
> void LLVMAddTypeBasedAliasAnalysisPass(LLVMPassManagerRef PM) {
> unwrap(PM)->add(createTypeBasedAAWrapperPass());
> }
>
> Added: llvm/trunk/test/Transforms/GVN/hoist.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/GVN/hoist.ll?rev=274305&view=auto
> ==============================================================================
> --- llvm/trunk/test/Transforms/GVN/hoist.ll (added)
> +++ llvm/trunk/test/Transforms/GVN/hoist.ll Thu Jun 30 19:24:31 2016
> @@ -0,0 +1,650 @@
> +; RUN: opt -gvn-hoist -S < %s | FileCheck %s
> +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
> +target triple = "x86_64-unknown-linux-gnu"
> +
> + at GlobalVar = internal global float 1.000000e+00
> +
> +; Check that all scalar expressions are hoisted.
> +;
> +; CHECK-LABEL: @scalarsHoisting
> +; CHECK: fsub
> +; CHECK: fmul
> +; CHECK: fsub
> +; CHECK: fmul
> +; CHECK-NOT: fmul
> +; CHECK-NOT: fsub
> +define float @scalarsHoisting(float %d, float %min, float %max, float %a) {
> +entry:
> + %div = fdiv float 1.000000e+00, %d
> + %cmp = fcmp oge float %div, 0.000000e+00
> + br i1 %cmp, label %if.then, label %if.else
> +
> +if.then: ; preds = %entry
> + %sub = fsub float %min, %a
> + %mul = fmul float %sub, %div
> + %sub1 = fsub float %max, %a
> + %mul2 = fmul float %sub1, %div
> + br label %if.end
> +
> +if.else: ; preds = %entry
> + %sub3 = fsub float %max, %a
> + %mul4 = fmul float %sub3, %div
> + %sub5 = fsub float %min, %a
> + %mul6 = fmul float %sub5, %div
> + br label %if.end
> +
> +if.end: ; preds = %if.else, %if.then
> + %tmax.0 = phi float [ %mul2, %if.then ], [ %mul6, %if.else ]
> + %tmin.0 = phi float [ %mul, %if.then ], [ %mul4, %if.else ]
> + %add = fadd float %tmax.0, %tmin.0
> + ret float %add
> +}
> +
> +; Check that all loads and scalars depending on the loads are hoisted.
> +; Check that getelementptr computation gets hoisted before the load.
> +;
> +; CHECK-LABEL: @readsAndScalarsHoisting
> +; CHECK: load
> +; CHECK: load
> +; CHECK: load
> +; CHECK: fsub
> +; CHECK: fmul
> +; CHECK: fsub
> +; CHECK: fmul
> +; CHECK-NOT: load
> +; CHECK-NOT: fmul
> +; CHECK-NOT: fsub
> +define float @readsAndScalarsHoisting(float %d, float* %min, float* %max, float* %a) {
> +entry:
> + %div = fdiv float 1.000000e+00, %d
> + %cmp = fcmp oge float %div, 0.000000e+00
> + br i1 %cmp, label %if.then, label %if.else
> +
> +if.then: ; preds = %entry
> + %A = getelementptr float, float* %min, i32 1
> + %0 = load float, float* %A, align 4
> + %1 = load float, float* %a, align 4
> + %sub = fsub float %0, %1
> + %mul = fmul float %sub, %div
> + %2 = load float, float* %max, align 4
> + %sub1 = fsub float %2, %1
> + %mul2 = fmul float %sub1, %div
> + br label %if.end
> +
> +if.else: ; preds = %entry
> + %3 = load float, float* %max, align 4
> + %4 = load float, float* %a, align 4
> + %sub3 = fsub float %3, %4
> + %mul4 = fmul float %sub3, %div
> + %B = getelementptr float, float* %min, i32 1
> + %5 = load float, float* %B, align 4
> + %sub5 = fsub float %5, %4
> + %mul6 = fmul float %sub5, %div
> + br label %if.end
> +
> +if.end: ; preds = %if.else, %if.then
> + %tmax.0 = phi float [ %mul2, %if.then ], [ %mul6, %if.else ]
> + %tmin.0 = phi float [ %mul, %if.then ], [ %mul4, %if.else ]
> + %add = fadd float %tmax.0, %tmin.0
> + ret float %add
> +}
> +
> +; Check that we do not hoist loads after a store: the first two loads will be
> +; hoisted, and then the third load will not be hoisted.
> +;
> +; CHECK-LABEL: @readsAndWrites
> +; CHECK: load
> +; CHECK: load
> +; CHECK: fsub
> +; CHECK: fmul
> +; CHECK: store
> +; CHECK: load
> +; CHECK: fsub
> +; CHECK: fmul
> +; CHECK: load
> +; CHECK: fsub
> +; CHECK: fmul
> +; CHECK-NOT: load
> +; CHECK-NOT: fmul
> +; CHECK-NOT: fsub
> +define float @readsAndWrites(float %d, float* %min, float* %max, float* %a) {
> +entry:
> + %div = fdiv float 1.000000e+00, %d
> + %cmp = fcmp oge float %div, 0.000000e+00
> + br i1 %cmp, label %if.then, label %if.else
> +
> +if.then: ; preds = %entry
> + %0 = load float, float* %min, align 4
> + %1 = load float, float* %a, align 4
> + store float %0, float* @GlobalVar
> + %sub = fsub float %0, %1
> + %mul = fmul float %sub, %div
> + %2 = load float, float* %max, align 4
> + %sub1 = fsub float %2, %1
> + %mul2 = fmul float %sub1, %div
> + br label %if.end
> +
> +if.else: ; preds = %entry
> + %3 = load float, float* %max, align 4
> + %4 = load float, float* %a, align 4
> + %sub3 = fsub float %3, %4
> + %mul4 = fmul float %sub3, %div
> + %5 = load float, float* %min, align 4
> + %sub5 = fsub float %5, %4
> + %mul6 = fmul float %sub5, %div
> + br label %if.end
> +
> +if.end: ; preds = %if.else, %if.then
> + %tmax.0 = phi float [ %mul2, %if.then ], [ %mul6, %if.else ]
> + %tmin.0 = phi float [ %mul, %if.then ], [ %mul4, %if.else ]
> + %add = fadd float %tmax.0, %tmin.0
> + ret float %add
> +}
> +
> +; Check that we do hoist loads when the store is above the insertion point.
> +;
> +; CHECK-LABEL: @readsAndWriteAboveInsertPt
> +; CHECK: load
> +; CHECK: load
> +; CHECK: load
> +; CHECK: fsub
> +; CHECK: fsub
> +; CHECK: fmul
> +; CHECK: fmul
> +; CHECK-NOT: load
> +; CHECK-NOT: fmul
> +; CHECK-NOT: fsub
> +define float @readsAndWriteAboveInsertPt(float %d, float* %min, float* %max, float* %a) {
> +entry:
> + %div = fdiv float 1.000000e+00, %d
> + store float 0.000000e+00, float* @GlobalVar
> + %cmp = fcmp oge float %div, 0.000000e+00
> + br i1 %cmp, label %if.then, label %if.else
> +
> +if.then: ; preds = %entry
> + %0 = load float, float* %min, align 4
> + %1 = load float, float* %a, align 4
> + %sub = fsub float %0, %1
> + %mul = fmul float %sub, %div
> + %2 = load float, float* %max, align 4
> + %sub1 = fsub float %2, %1
> + %mul2 = fmul float %sub1, %div
> + br label %if.end
> +
> +if.else: ; preds = %entry
> + %3 = load float, float* %max, align 4
> + %4 = load float, float* %a, align 4
> + %sub3 = fsub float %3, %4
> + %mul4 = fmul float %sub3, %div
> + %5 = load float, float* %min, align 4
> + %sub5 = fsub float %5, %4
> + %mul6 = fmul float %sub5, %div
> + br label %if.end
> +
> +if.end: ; preds = %if.else, %if.then
> + %tmax.0 = phi float [ %mul2, %if.then ], [ %mul6, %if.else ]
> + %tmin.0 = phi float [ %mul, %if.then ], [ %mul4, %if.else ]
> + %add = fadd float %tmax.0, %tmin.0
> + ret float %add
> +}
> +
> +; Check that dependent expressions are hoisted.
> +; CHECK-LABEL: @dependentScalarsHoisting
> +; CHECK: fsub
> +; CHECK: fadd
> +; CHECK: fdiv
> +; CHECK: fmul
> +; CHECK-NOT: fsub
> +; CHECK-NOT: fadd
> +; CHECK-NOT: fdiv
> +; CHECK-NOT: fmul
> +define float @dependentScalarsHoisting(float %a, float %b, i1 %c) {
> +entry:
> + br i1 %c, label %if.then, label %if.else
> +
> +if.then:
> + %d = fsub float %b, %a
> + %e = fadd float %d, %a
> + %f = fdiv float %e, %a
> + %g = fmul float %f, %a
> + br label %if.end
> +
> +if.else:
> + %h = fsub float %b, %a
> + %i = fadd float %h, %a
> + %j = fdiv float %i, %a
> + %k = fmul float %j, %a
> + br label %if.end
> +
> +if.end:
> + %r = phi float [ %g, %if.then ], [ %k, %if.else ]
> + ret float %r
> +}
> +
> +; Check that all independent expressions are hoisted.
> +; CHECK-LABEL: @independentScalarsHoisting
> +; CHECK: fmul
> +; CHECK: fadd
> +; CHECK: fdiv
> +; CHECK: fsub
> +; CHECK-NOT: fsub
> +; CHECK-NOT: fdiv
> +; CHECK-NOT: fmul
> +define float @independentScalarsHoisting(float %a, float %b, i1 %c) {
> +entry:
> + br i1 %c, label %if.then, label %if.else
> +
> +if.then:
> + %d = fadd float %b, %a
> + %e = fsub float %b, %a
> + %f = fdiv float %b, %a
> + %g = fmul float %b, %a
> + br label %if.end
> +
> +if.else:
> + %i = fadd float %b, %a
> + %h = fsub float %b, %a
> + %j = fdiv float %b, %a
> + %k = fmul float %b, %a
> + br label %if.end
> +
> +if.end:
> + %p = phi float [ %d, %if.then ], [ %i, %if.else ]
> + %q = phi float [ %e, %if.then ], [ %h, %if.else ]
> + %r = phi float [ %f, %if.then ], [ %j, %if.else ]
> + %s = phi float [ %g, %if.then ], [ %k, %if.else ]
> + %t = fadd float %p, %q
> + %u = fadd float %r, %s
> + %v = fadd float %t, %u
> + ret float %v
> +}
> +
> +; Check that we hoist load and scalar expressions in triangles.
> +; CHECK-LABEL: @triangleHoisting
> +; CHECK: load
> +; CHECK: load
> +; CHECK: load
> +; CHECK: fsub
> +; CHECK: fsub
> +; CHECK: fmul
> +; CHECK: fmul
> +; CHECK-NOT: load
> +; CHECK-NOT: fmul
> +; CHECK-NOT: fsub
> +define float @triangleHoisting(float %d, float* %min, float* %max, float* %a) {
> +entry:
> + %div = fdiv float 1.000000e+00, %d
> + %cmp = fcmp oge float %div, 0.000000e+00
> + br i1 %cmp, label %if.then, label %if.end
> +
> +if.then: ; preds = %entry
> + %0 = load float, float* %min, align 4
> + %1 = load float, float* %a, align 4
> + %sub = fsub float %0, %1
> + %mul = fmul float %sub, %div
> + %2 = load float, float* %max, align 4
> + %sub1 = fsub float %2, %1
> + %mul2 = fmul float %sub1, %div
> + br label %if.end
> +
> +if.end: ; preds = %entry
> + %p1 = phi float [ %mul2, %if.then ], [ 0.000000e+00, %entry ]
> + %p2 = phi float [ %mul, %if.then ], [ 0.000000e+00, %entry ]
> + %3 = load float, float* %max, align 4
> + %4 = load float, float* %a, align 4
> + %sub3 = fsub float %3, %4
> + %mul4 = fmul float %sub3, %div
> + %5 = load float, float* %min, align 4
> + %sub5 = fsub float %5, %4
> + %mul6 = fmul float %sub5, %div
> +
> + %x = fadd float %p1, %mul6
> + %y = fadd float %p2, %mul4
> + %z = fadd float %x, %y
> + ret float %z
> +}
> +
> +; Check that we hoist load and scalar expressions in dominator.
> +; CHECK-LABEL: @dominatorHoisting
> +; CHECK: load
> +; CHECK: load
> +; CHECK: fsub
> +; CHECK: fmul
> +; CHECK: load
> +; CHECK: fsub
> +; CHECK: fmul
> +; CHECK-NOT: load
> +; CHECK-NOT: fmul
> +; CHECK-NOT: fsub
> +define float @dominatorHoisting(float %d, float* %min, float* %max, float* %a) {
> +entry:
> + %div = fdiv float 1.000000e+00, %d
> + %0 = load float, float* %min, align 4
> + %1 = load float, float* %a, align 4
> + %sub = fsub float %0, %1
> + %mul = fmul float %sub, %div
> + %2 = load float, float* %max, align 4
> + %sub1 = fsub float %2, %1
> + %mul2 = fmul float %sub1, %div
> + %cmp = fcmp oge float %div, 0.000000e+00
> + br i1 %cmp, label %if.then, label %if.end
> +
> +if.then: ; preds = %entry
> + %3 = load float, float* %max, align 4
> + %4 = load float, float* %a, align 4
> + %sub3 = fsub float %3, %4
> + %mul4 = fmul float %sub3, %div
> + %5 = load float, float* %min, align 4
> + %sub5 = fsub float %5, %4
> + %mul6 = fmul float %sub5, %div
> + br label %if.end
> +
> +if.end: ; preds = %entry
> + %p1 = phi float [ %mul4, %if.then ], [ 0.000000e+00, %entry ]
> + %p2 = phi float [ %mul6, %if.then ], [ 0.000000e+00, %entry ]
> +
> + %x = fadd float %p1, %mul2
> + %y = fadd float %p2, %mul
> + %z = fadd float %x, %y
> + ret float %z
> +}
> +
> +; Check that we hoist load and scalar expressions in dominator.
> +; CHECK-LABEL: @domHoisting
> +; CHECK: load
> +; CHECK: load
> +; CHECK: fsub
> +; CHECK: fmul
> +; CHECK: load
> +; CHECK: fsub
> +; CHECK: fmul
> +; CHECK-NOT: load
> +; CHECK-NOT: fmul
> +; CHECK-NOT: fsub
> +define float @domHoisting(float %d, float* %min, float* %max, float* %a) {
> +entry:
> + %div = fdiv float 1.000000e+00, %d
> + %0 = load float, float* %min, align 4
> + %1 = load float, float* %a, align 4
> + %sub = fsub float %0, %1
> + %mul = fmul float %sub, %div
> + %2 = load float, float* %max, align 4
> + %sub1 = fsub float %2, %1
> + %mul2 = fmul float %sub1, %div
> + %cmp = fcmp oge float %div, 0.000000e+00
> + br i1 %cmp, label %if.then, label %if.else
> +
> +if.then:
> + %3 = load float, float* %max, align 4
> + %4 = load float, float* %a, align 4
> + %sub3 = fsub float %3, %4
> + %mul4 = fmul float %sub3, %div
> + %5 = load float, float* %min, align 4
> + %sub5 = fsub float %5, %4
> + %mul6 = fmul float %sub5, %div
> + br label %if.end
> +
> +if.else:
> + %6 = load float, float* %max, align 4
> + %7 = load float, float* %a, align 4
> + %sub9 = fsub float %6, %7
> + %mul10 = fmul float %sub9, %div
> + %8 = load float, float* %min, align 4
> + %sub12 = fsub float %8, %7
> + %mul13 = fmul float %sub12, %div
> + br label %if.end
> +
> +if.end:
> + %p1 = phi float [ %mul4, %if.then ], [ %mul10, %if.else ]
> + %p2 = phi float [ %mul6, %if.then ], [ %mul13, %if.else ]
> +
> + %x = fadd float %p1, %mul2
> + %y = fadd float %p2, %mul
> + %z = fadd float %x, %y
> + ret float %z
> +}
> +
> +; Check that we do not hoist loads past stores within a same basic block.
> +; CHECK-LABEL: @noHoistInSingleBBWithStore
> +; CHECK: load
> +; CHECK: store
> +; CHECK: load
> +; CHECK: store
> +define i32 @noHoistInSingleBBWithStore() {
> +entry:
> + %D = alloca i32, align 4
> + %0 = bitcast i32* %D to i8*
> + %bf = load i8, i8* %0, align 4
> + %bf.clear = and i8 %bf, -3
> + store i8 %bf.clear, i8* %0, align 4
> + %bf1 = load i8, i8* %0, align 4
> + %bf.clear1 = and i8 %bf1, 1
> + store i8 %bf.clear1, i8* %0, align 4
> + ret i32 0
> +}
> +
> +; Check that we do not hoist loads past calls within a same basic block.
> +; CHECK-LABEL: @noHoistInSingleBBWithCall
> +; CHECK: load
> +; CHECK: call
> +; CHECK: load
> +declare void @foo()
> +define i32 @noHoistInSingleBBWithCall() {
> +entry:
> + %D = alloca i32, align 4
> + %0 = bitcast i32* %D to i8*
> + %bf = load i8, i8* %0, align 4
> + %bf.clear = and i8 %bf, -3
> + call void @foo()
> + %bf1 = load i8, i8* %0, align 4
> + %bf.clear1 = and i8 %bf1, 1
> + ret i32 0
> +}
> +
> +; Check that we do not hoist loads past stores in any branch of a diamond.
> +; CHECK-LABEL: @noHoistInDiamondWithOneStore1
> +; CHECK: fdiv
> +; CHECK: fcmp
> +; CHECK: br
> +define float @noHoistInDiamondWithOneStore1(float %d, float* %min, float* %max, float* %a) {
> +entry:
> + %div = fdiv float 1.000000e+00, %d
> + %cmp = fcmp oge float %div, 0.000000e+00
> + br i1 %cmp, label %if.then, label %if.else
> +
> +if.then: ; preds = %entry
> + store float 0.000000e+00, float* @GlobalVar
> + %0 = load float, float* %min, align 4
> + %1 = load float, float* %a, align 4
> + %sub = fsub float %0, %1
> + %mul = fmul float %sub, %div
> + %2 = load float, float* %max, align 4
> + %sub1 = fsub float %2, %1
> + %mul2 = fmul float %sub1, %div
> + br label %if.end
> +
> +if.else: ; preds = %entry
> + ; There are no side effects on the if.else branch.
> + %3 = load float, float* %max, align 4
> + %4 = load float, float* %a, align 4
> + %sub3 = fsub float %3, %4
> + %mul4 = fmul float %sub3, %div
> + %5 = load float, float* %min, align 4
> + %sub5 = fsub float %5, %4
> + %mul6 = fmul float %sub5, %div
> + br label %if.end
> +
> +if.end: ; preds = %if.else, %if.then
> + %tmax.0 = phi float [ %mul2, %if.then ], [ %mul6, %if.else ]
> + %tmin.0 = phi float [ %mul, %if.then ], [ %mul4, %if.else ]
> +
> + %6 = load float, float* %max, align 4
> + %7 = load float, float* %a, align 4
> + %sub6 = fsub float %6, %7
> + %mul7 = fmul float %sub6, %div
> + %8 = load float, float* %min, align 4
> + %sub8 = fsub float %8, %7
> + %mul9 = fmul float %sub8, %div
> +
> + %add = fadd float %tmax.0, %tmin.0
> + ret float %add
> +}
> +
> +; Check that we do not hoist loads past a store in any branch of a diamond.
> +; CHECK-LABEL: @noHoistInDiamondWithOneStore2
> +; CHECK: fdiv
> +; CHECK: fcmp
> +; CHECK: br
> +define float @noHoistInDiamondWithOneStore2(float %d, float* %min, float* %max, float* %a) {
> +entry:
> + %div = fdiv float 1.000000e+00, %d
> + %cmp = fcmp oge float %div, 0.000000e+00
> + br i1 %cmp, label %if.then, label %if.else
> +
> +if.then: ; preds = %entry
> + ; There are no side effects on the if.then branch.
> + %0 = load float, float* %min, align 4
> + %1 = load float, float* %a, align 4
> + %sub = fsub float %0, %1
> + %mul = fmul float %sub, %div
> + %2 = load float, float* %max, align 4
> + %sub1 = fsub float %2, %1
> + %mul2 = fmul float %sub1, %div
> + br label %if.end
> +
> +if.else: ; preds = %entry
> + store float 0.000000e+00, float* @GlobalVar
> + %3 = load float, float* %max, align 4
> + %4 = load float, float* %a, align 4
> + %sub3 = fsub float %3, %4
> + %mul4 = fmul float %sub3, %div
> + %5 = load float, float* %min, align 4
> + %sub5 = fsub float %5, %4
> + %mul6 = fmul float %sub5, %div
> + br label %if.end
> +
> +if.end: ; preds = %if.else, %if.then
> + %tmax.0 = phi float [ %mul2, %if.then ], [ %mul6, %if.else ]
> + %tmin.0 = phi float [ %mul, %if.then ], [ %mul4, %if.else ]
> +
> + %6 = load float, float* %max, align 4
> + %7 = load float, float* %a, align 4
> + %sub6 = fsub float %6, %7
> + %mul7 = fmul float %sub6, %div
> + %8 = load float, float* %min, align 4
> + %sub8 = fsub float %8, %7
> + %mul9 = fmul float %sub8, %div
> +
> + %add = fadd float %tmax.0, %tmin.0
> + ret float %add
> +}
> +
> +; Check that we do not hoist loads outside a loop containing stores.
> +; CHECK-LABEL: @noHoistInLoopsWithStores
> +; CHECK: fdiv
> +; CHECK: fcmp
> +; CHECK: br
> +define float @noHoistInLoopsWithStores(float %d, float* %min, float* %max, float* %a) {
> +entry:
> + %div = fdiv float 1.000000e+00, %d
> + %cmp = fcmp oge float %div, 0.000000e+00
> + br i1 %cmp, label %do.body, label %if.else
> +
> +do.body:
> + %0 = load float, float* %min, align 4
> + %1 = load float, float* %a, align 4
> +
> + ; It is unsafe to hoist the loads outside the loop because of the store.
> + store float 0.000000e+00, float* @GlobalVar
> +
> + %sub = fsub float %0, %1
> + %mul = fmul float %sub, %div
> + %2 = load float, float* %max, align 4
> + %sub1 = fsub float %2, %1
> + %mul2 = fmul float %sub1, %div
> + br label %while.cond
> +
> +while.cond:
> + %cmp1 = fcmp oge float %mul2, 0.000000e+00
> + br i1 %cmp1, label %if.end, label %do.body
> +
> +if.else:
> + %3 = load float, float* %max, align 4
> + %4 = load float, float* %a, align 4
> + %sub3 = fsub float %3, %4
> + %mul4 = fmul float %sub3, %div
> + %5 = load float, float* %min, align 4
> + %sub5 = fsub float %5, %4
> + %mul6 = fmul float %sub5, %div
> + br label %if.end
> +
> +if.end:
> + %tmax.0 = phi float [ %mul2, %while.cond ], [ %mul6, %if.else ]
> + %tmin.0 = phi float [ %mul, %while.cond ], [ %mul4, %if.else ]
> +
> + %add = fadd float %tmax.0, %tmin.0
> + ret float %add
> +}
> +
> +; Check that we hoist stores: all the instructions from the then branch
> +; should be hoisted.
> +; CHECK-LABEL: @hoistStores
> +; CHECK: zext
> +; CHECK: trunc
> +; CHECK: getelementptr
> +; CHECK: load
> +; CHECK: getelementptr
> +; CHECK: store
> +; CHECK: load
> +; CHECK: load
> +; CHECK: zext
> +; CHECK: add
> +; CHECK: store
> +; CHECK: br
> +; CHECK: if.then
> +; CHECK: br
> +
> +%struct.foo = type { i16* }
> +
> +define void @hoistStores(%struct.foo* %s, i32* %coord, i1 zeroext %delta) {
> +entry:
> + %frombool = zext i1 %delta to i8
> + %tobool = trunc i8 %frombool to i1
> + br i1 %tobool, label %if.then, label %if.else
> +
> +if.then: ; preds = %entry
> + %p = getelementptr inbounds %struct.foo, %struct.foo* %s, i32 0, i32 0
> + %0 = load i16*, i16** %p, align 8
> + %incdec.ptr = getelementptr inbounds i16, i16* %0, i32 1
> + store i16* %incdec.ptr, i16** %p, align 8
> + %1 = load i16, i16* %0, align 2
> + %conv = zext i16 %1 to i32
> + %2 = load i32, i32* %coord, align 4
> + %add = add i32 %2, %conv
> + store i32 %add, i32* %coord, align 4
> + br label %if.end
> +
> +if.else: ; preds = %entry
> + %p1 = getelementptr inbounds %struct.foo, %struct.foo* %s, i32 0, i32 0
> + %3 = load i16*, i16** %p1, align 8
> + %incdec.ptr2 = getelementptr inbounds i16, i16* %3, i32 1
> + store i16* %incdec.ptr2, i16** %p1, align 8
> + %4 = load i16, i16* %3, align 2
> + %conv3 = zext i16 %4 to i32
> + %5 = load i32, i32* %coord, align 4
> + %add4 = add i32 %5, %conv3
> + store i32 %add4, i32* %coord, align 4
> + %6 = load i16*, i16** %p1, align 8
> + %incdec.ptr6 = getelementptr inbounds i16, i16* %6, i32 1
> + store i16* %incdec.ptr6, i16** %p1, align 8
> + %7 = load i16, i16* %6, align 2
> + %conv7 = zext i16 %7 to i32
> + %shl = shl i32 %conv7, 8
> + %8 = load i32, i32* %coord, align 4
> + %add8 = add i32 %8, %shl
> + store i32 %add8, i32* %coord, align 4
> + br label %if.end
> +
> +if.end: ; preds = %if.else, %if.then
> + ret void
> +}
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list