[llvm] r184647 - SLP Vectorizer: Implement multi-block slp-vectorization.
Alexey Samsonov
samsonov at google.com
Sun Jun 23 08:43:42 PDT 2013
Hi Nadav!
This change breaks on ASan bootstrap bot with the following error report:
=================================================================
==27050==ERROR: AddressSanitizer: heap-use-after-free on address
0x60d00000c488 at pc 0x1592bfc bp 0x7ffffec9cd90 sp 0x7ffffec9cd88
READ of size 8 at 0x60d00000c488 thread T0
#0 0x1592bfb in getParent /build/llvm/include/llvm/IR/Instruction.h:53
#1 0x1592bfb in SetInsertPoint
/build/llvm/include/llvm/IR/IRBuilder.h:90
#2 0x1592bfb in ~BuilderLocGuard
/build/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp:61
#3 0x1592bfb in (anonymous
namespace)::FuncSLP::vectorizeTree_rec(llvm::ArrayRef<llvm::Value*>)
/build/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp:1111
#4 0x158e563 in (anonymous
namespace)::FuncSLP::vectorizeTree(llvm::ArrayRef<llvm::Value*>)
/build/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp:1115
#5 0x1588ba1 in vectorizeStoreChain
/build/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp:866
#6 0x1588ba1 in vectorizeStores
/build/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp:915
#7 0x1588ba1 in vectorizeStoreChains
/build/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp:1468
#8 0x1588ba1 in (anonymous
namespace)::SLPVectorizer::runOnFunction(llvm::Function&)
/build/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp:1230
#9 0x2b03fdc in llvm::FPPassManager::runOnFunction(llvm::Function&)
/build/llvm/lib/IR/PassManager.cpp:1530
#10 0x2b045a5 in llvm::FPPassManager::runOnModule(llvm::Module&)
/build/llvm/lib/IR/PassManager.cpp:1550
#11 0x2b04dbb in llvm::MPPassManager::runOnModule(llvm::Module&)
/build/llvm/lib/IR/PassManager.cpp:1608
#12 0x2b05fb3 in llvm::PassManagerImpl::run(llvm::Module&)
/build/llvm/lib/IR/PassManager.cpp:1703
#13 0x2b0642f in llvm::PassManager::run(llvm::Module&)
/build/llvm/lib/IR/PassManager.cpp:1738
#14 0x6199a3 in main /build/llvm/tools/opt/opt.cpp:823
#15 0x7fb44e87276c (/lib/x86_64-linux-gnu/libc.so.6+0x2176c)
#16 0x608ed4 in _start (/build/llvm_build_asan/bin/opt+0x608ed4)
0x60d00000c488 is located 120 bytes inside of 136-byte region
[0x60d00000c410,0x60d00000c498)
freed by thread T0 here:
#0 0x5f49c5 in operator delete(void*)
/build/llvm/projects/compiler-rt/lib/asan/asan_new_delete.cc:83
#1 0x1591a6e in (anonymous
namespace)::FuncSLP::vectorizeTree_rec(llvm::ArrayRef<llvm::Value*>)
/build/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp:1105
#2 0x158e563 in (anonymous
namespace)::FuncSLP::vectorizeTree(llvm::ArrayRef<llvm::Value*>)
/build/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp:1115
#3 0x1588ba1 in vectorizeStoreChain
/build/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp:866
#4 0x1588ba1 in vectorizeStores
/build/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp:915
#5 0x1588ba1 in vectorizeStoreChains
/build/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp:1468
#6 0x1588ba1 in (anonymous
namespace)::SLPVectorizer::runOnFunction(llvm::Function&)
/build/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp:1230
#7 0x2b03fdc in llvm::FPPassManager::runOnFunction(llvm::Function&)
/build/llvm/lib/IR/PassManager.cpp:1530
#8 0x2b045a5 in llvm::FPPassManager::runOnModule(llvm::Module&)
/build/llvm/lib/IR/PassManager.cpp:1550
#9 0x2b04dbb in llvm::MPPassManager::runOnModule(llvm::Module&)
/build/llvm/lib/IR/PassManager.cpp:1608
#10 0x2b05fb3 in llvm::PassManagerImpl::run(llvm::Module&)
/build/llvm/lib/IR/PassManager.cpp:1703
#11 0x2b0642f in llvm::PassManager::run(llvm::Module&)
/build/llvm/lib/IR/PassManager.cpp:1738
#12 0x6199a3 in main /build/llvm/tools/opt/opt.cpp:823
#13 0x7fb44e87276c (/lib/x86_64-linux-gnu/libc.so.6+0x2176c)
previously allocated by thread T0 here:
#0 0x5f4705 in operator new(unsigned long)
/build/llvm/projects/compiler-rt/lib/asan/asan_new_delete.cc:52
#1 0x2b30953 in llvm::User::operator new(unsigned long, unsigned int)
/build/llvm/lib/IR/User.cpp:60
#2 0x149304f in operator new
/build/llvm/include/llvm/IR/Instructions.h:265
#3 0x149304f in llvm::LLParser::ParseStore(llvm::Instruction*&,
llvm::LLParser::PerFunctionState&)
/build/llvm/lib/AsmParser/LLParser.cpp:4137
#4 0x14811e6 in llvm::LLParser::ParseInstruction(llvm::Instruction*&,
llvm::BasicBlock*, llvm::LLParser::PerFunctionState&)
/build/llvm/lib/AsmParser/LLParser.cpp:3312
#5 0x1480175 in
llvm::LLParser::ParseBasicBlock(llvm::LLParser::PerFunctionState&)
/build/llvm/lib/AsmParser/LLParser.cpp:3185
#6 0x145858f in llvm::LLParser::ParseFunctionBody(llvm::Function&)
/build/llvm/lib/AsmParser/LLParser.cpp:3138
#7 0x1445942 in ParseDefine /build/llvm/lib/AsmParser/LLParser.cpp:424
#8 0x1445942 in llvm::LLParser::ParseTopLevelEntities()
/build/llvm/lib/AsmParser/LLParser.cpp:226
#9 0x14455ce in llvm::LLParser::Run()
/build/llvm/lib/AsmParser/LLParser.cpp:41
#10 0x143706e in llvm::ParseAssembly(llvm::MemoryBuffer*,
llvm::Module*, llvm::SMDiagnostic&, llvm::LLVMContext&)
/build/llvm/lib/AsmParser/Parser.cpp:38
#11 0x11bf597 in llvm::ParseIR(llvm::MemoryBuffer*,
llvm::SMDiagnostic&, llvm::LLVMContext&)
/build/llvm/lib/IRReader/IRReader.cpp:76
#12 0x11bff04 in llvm::ParseIRFile(std::string const&,
llvm::SMDiagnostic&, llvm::LLVMContext&)
/build/llvm/lib/IRReader/IRReader.cpp:88
#13 0x61308c in main /build/llvm/tools/opt/opt.cpp:592
#14 0x7fb44e87276c (/lib/x86_64-linux-gnu/libc.so.6+0x2176c)
SUMMARY: AddressSanitizer: heap-use-after-free
/build/llvm/include/llvm/IR/Instruction.h:53 getParent
Can you please fix this?
On Sun, Jun 23, 2013 at 1:34 AM, Nadav Rotem <nrotem at apple.com> wrote:
> Author: nadav
> Date: Sat Jun 22 16:34:10 2013
> New Revision: 184647
>
> URL: http://llvm.org/viewvc/llvm-project?rev=184647&view=rev
> Log:
> SLP Vectorizer: Implement multi-block slp-vectorization.
>
> Rewrote the SLP-vectorization as a whole-function vectorization pass. It
> is now able to vectorize chains across multiple basic blocks.
> It still does not vectorize PHIs, but this should be easy to do now that
> we scan the entire function.
> I removed the support for extracting values from trees.
> We are now able to vectorize more programs, but there are some serious
> regressions in many workloads (such as flops-6 and mandel-2).
>
>
> Added:
> llvm/trunk/test/Transforms/SLPVectorizer/X86/multi_block.ll
> Removed:
> llvm/trunk/lib/Transforms/Vectorize/VecUtils.cpp
> llvm/trunk/lib/Transforms/Vectorize/VecUtils.h
> Modified:
> llvm/trunk/lib/Transforms/Vectorize/CMakeLists.txt
> llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp
> llvm/trunk/test/Transforms/SLPVectorizer/X86/diamond.ll
> llvm/trunk/test/Transforms/SLPVectorizer/X86/multi_user.ll
>
> Modified: llvm/trunk/lib/Transforms/Vectorize/CMakeLists.txt
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/CMakeLists.txt?rev=184647&r1=184646&r2=184647&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Transforms/Vectorize/CMakeLists.txt (original)
> +++ llvm/trunk/lib/Transforms/Vectorize/CMakeLists.txt Sat Jun 22 16:34:10
> 2013
> @@ -3,7 +3,6 @@ add_llvm_library(LLVMVectorize
> Vectorize.cpp
> LoopVectorize.cpp
> SLPVectorizer.cpp
> - VecUtils.cpp
> )
>
> add_dependencies(LLVMVectorize intrinsics_gen)
>
> Modified: llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp?rev=184647&r1=184646&r2=184647&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp (original)
> +++ llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp Sat Jun 22
> 16:34:10 2013
> @@ -18,17 +18,20 @@
> #define SV_NAME "slp-vectorizer"
> #define DEBUG_TYPE "SLP"
>
> -#include "VecUtils.h"
> #include "llvm/Transforms/Vectorize.h"
> #include "llvm/ADT/MapVector.h"
> +#include "llvm/ADT/SetVector.h"
> #include "llvm/Analysis/AliasAnalysis.h"
> #include "llvm/Analysis/ScalarEvolution.h"
> +#include "llvm/Analysis/ScalarEvolutionExpressions.h"
> +#include "llvm/Analysis/AliasAnalysis.h"
> #include "llvm/Analysis/TargetTransformInfo.h"
> #include "llvm/Analysis/Verifier.h"
> #include "llvm/Analysis/LoopInfo.h"
> #include "llvm/IR/DataLayout.h"
> #include "llvm/IR/Instructions.h"
> #include "llvm/IR/IntrinsicInst.h"
> +#include "llvm/IR/IRBuilder.h"
> #include "llvm/IR/Module.h"
> #include "llvm/IR/Type.h"
> #include "llvm/IR/Value.h"
> @@ -36,6 +39,7 @@
> #include "llvm/Support/CommandLine.h"
> #include "llvm/Support/Debug.h"
> #include "llvm/Support/raw_ostream.h"
> +#include <algorithm>
> #include <map>
>
> using namespace llvm;
> @@ -46,9 +50,1138 @@ static cl::opt<int>
> "number. (gain = -cost of vectorization)"));
> namespace {
>
> +static const unsigned MinVecRegSize = 128;
> +
> +static const unsigned RecursionMaxDepth = 6;
> +
> +/// RAII pattern to save the insertion point of the IR builder.
> +class BuilderLocGuard {
> +public:
> + BuilderLocGuard(IRBuilder<> &B) : Builder(B), Loc(B.GetInsertPoint()) {}
> + ~BuilderLocGuard() { Builder.SetInsertPoint(Loc); }
> +
> +private:
> + // Prevent copying.
> + BuilderLocGuard(const BuilderLocGuard &);
> + BuilderLocGuard &operator=(const BuilderLocGuard &);
> + IRBuilder<> &Builder;
> + BasicBlock::iterator Loc;
> +};
> +
> +/// A helper class for numbering instructions in multible blocks.
> +/// Numbers starts at zero for each basic block.
> +struct BlockNumbering {
> +
> + BlockNumbering(BasicBlock *Bb) : BB(Bb), Valid(false) {}
> +
> + BlockNumbering() : BB(0), Valid(false) {}
> +
> + void numberInstructions() {
> + unsigned Loc = 0;
> + InstrIdx.clear();
> + InstrVec.clear();
> + // Number the instructions in the block.
> + for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e;
> ++it) {
> + InstrIdx[it] = Loc++;
> + InstrVec.push_back(it);
> + assert(InstrVec[InstrIdx[it]] == it && "Invalid allocation");
> + }
> + Valid = true;
> + }
> +
> + int getIndex(Instruction *I) {
> + if (!Valid)
> + numberInstructions();
> + assert(InstrIdx.count(I) && "Unknown instruction");
> + return InstrIdx[I];
> + }
> +
> + Instruction *getInstruction(unsigned loc) {
> + if (!Valid)
> + numberInstructions();
> + assert(InstrVec.size() > loc && "Invalid Index");
> + return InstrVec[loc];
> + }
> +
> + void forget() { Valid = false; }
> +
> +private:
> + /// The block we are numbering.
> + BasicBlock *BB;
> + /// Is the block numbered.
> + bool Valid;
> + /// Maps instructions to numbers and back.
> + SmallDenseMap<Instruction *, int> InstrIdx;
> + /// Maps integers to Instructions.
> + std::vector<Instruction *> InstrVec;
> +};
> +
> +class FuncSLP {
> + typedef SmallVector<Value *, 8> ValueList;
> + typedef SmallVector<Instruction *, 16> InstrList;
> + typedef SmallPtrSet<Value *, 16> ValueSet;
> + typedef SmallVector<StoreInst *, 8> StoreList;
> +
> +public:
> + static const int MAX_COST = INT_MIN;
> +
> + FuncSLP(Function *Func, ScalarEvolution *Se, DataLayout *Dl,
> + TargetTransformInfo *Tti, AliasAnalysis *Aa, LoopInfo *Li)
> + : F(Func), SE(Se), DL(Dl), TTI(Tti), AA(Aa), LI(Li),
> + Builder(Se->getContext()) {
> + for (Function::iterator it = F->begin(), e = F->end(); it != e; ++it)
> {
> + BasicBlock *BB = it;
> + BlocksNumbers[BB] = BlockNumbering(BB);
> + }
> + }
> +
> + /// \brief Take the pointer operand from the Load/Store instruction.
> + /// \returns NULL if this is not a valid Load/Store instruction.
> + static Value *getPointerOperand(Value *I);
> +
> + /// \brief Take the address space operand from the Load/Store
> instruction.
> + /// \returns -1 if this is not a valid Load/Store instruction.
> + static unsigned getAddressSpaceOperand(Value *I);
> +
> + /// \returns true if the memory operations A and B are consecutive.
> + bool isConsecutiveAccess(Value *A, Value *B);
> +
> + /// \brief Vectorize the tree that starts with the elements in \p VL.
> + /// \returns the vectorized value.
> + Value *vectorizeTree(ArrayRef<Value *> VL);
> +
> + /// \returns the vectorization cost of the subtree that starts at \p VL.
> + /// A negative number means that this is profitable.
> + int getTreeCost(ArrayRef<Value *> VL);
> +
> + /// \returns the scalarization cost for this list of values. Assuming
> that
> + /// this subtree gets vectorized, we may need to extract the values
> from the
> + /// roots. This method calculates the cost of extracting the values.
> + int getGatherCost(ArrayRef<Value *> VL);
> +
> + /// \brief Attempts to order and vectorize a sequence of stores. This
> + /// function does a quadratic scan of the given stores.
> + /// \returns true if the basic block was modified.
> + bool vectorizeStores(ArrayRef<StoreInst *> Stores, int costThreshold);
> +
> + /// \brief Vectorize a group of scalars into a vector tree.
> + /// \returns the vectorized value.
> + Value *vectorizeArith(ArrayRef<Value *> Operands);
> +
> + /// \brief This method contains the recursive part of getTreeCost.
> + int getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth);
> +
> + /// \brief This recursive method looks for vectorization hazards such as
> + /// values that are used by multiple users and checks that values are
> used
> + /// by only one vector lane. It updates the variables LaneMap,
> MultiUserVals.
> + void getTreeUses_rec(ArrayRef<Value *> VL, unsigned Depth);
> +
> + /// \brief This method contains the recursive part of vectorizeTree.
> + Value *vectorizeTree_rec(ArrayRef<Value *> VL);
> +
> + /// \brief Vectorize a sorted sequence of stores.
> + bool vectorizeStoreChain(ArrayRef<Value *> Chain, int CostThreshold);
> +
> + /// \returns the scalarization cost for this type. Scalarization in this
> + /// context means the creation of vectors from a group of scalars.
> + int getGatherCost(Type *Ty);
> +
> + /// \returns the AA location that is being access by the instruction.
> + AliasAnalysis::Location getLocation(Instruction *I);
> +
> + /// \brief Checks if it is possible to sink an instruction from
> + /// \p Src to \p Dst.
> + /// \returns the pointer to the barrier instruction if we can't sink.
> + Value *getSinkBarrier(Instruction *Src, Instruction *Dst);
> +
> + /// \returns the index of the last instrucion in the BB from \p VL.
> + int getLastIndex(ArrayRef<Value *> VL);
> +
> + /// \returns the Instrucion in the bundle \p VL.
> + Instruction *getLastInstruction(ArrayRef<Value *> VL);
> +
> + /// \returns the Instruction at index \p Index which is in Block \p BB.
> + Instruction *getInstructionForIndex(unsigned Index, BasicBlock *BB);
> +
> + /// \returns the index of the first User of \p VL.
> + int getFirstUserIndex(ArrayRef<Value *> VL);
> +
> + /// \returns a vector from a collection of scalars in \p VL.
> + Value *Gather(ArrayRef<Value *> VL, VectorType *Ty);
> +
> + /// \brief Try to hoist gather sequences outside of the loop in cases
> where
> + /// all of the sources are loop invariant.
> + void hoistGatherSequence();
> +
> + bool needToGatherAny(ArrayRef<Value *> VL) {
> + for (int i = 0, e = VL.size(); i < e; ++i)
> + if (MustGather.count(VL[i]))
> + return true;
> + return false;
> + }
> +
> + /// -- Vectorization State --
> +
> + /// Maps values in the tree to the vector lanes that uses them. This
> map must
> + /// be reset between runs of getCost.
> + std::map<Value *, int> LaneMap;
> + /// A list of instructions to ignore while sinking
> + /// memory instructions. This map must be reset between runs of getCost.
> + ValueSet MemBarrierIgnoreList;
> +
> + /// Maps between the first scalar to the vector. This map must be reset
> + /// between runs.
> + DenseMap<Value *, Value *> VectorizedValues;
> +
> + /// Contains values that must be gathered because they are used
> + /// by multiple lanes, or by users outside the tree.
> + /// NOTICE: The vectorization methods also use this set.
> + ValueSet MustGather;
> +
> + /// Contains a list of values that are used outside the current tree.
> This
> + /// set must be reset between runs.
> + SetVector<Value *> MultiUserVals;
> +
> + /// Holds all of the instructions that we gathered.
> + SetVector<Instruction *> GatherSeq;
> +
> + /// Numbers instructions in different blocks.
> + std::map<BasicBlock *, BlockNumbering> BlocksNumbers;
> +
> + // Analysis and block reference.
> + Function *F;
> + ScalarEvolution *SE;
> + DataLayout *DL;
> + TargetTransformInfo *TTI;
> + AliasAnalysis *AA;
> + LoopInfo *LI;
> + /// Instruction builder to construct the vectorized tree.
> + IRBuilder<> Builder;
> +};
> +
> +int FuncSLP::getGatherCost(Type *Ty) {
> + int Cost = 0;
> + for (unsigned i = 0, e = cast<VectorType>(Ty)->getNumElements(); i < e;
> ++i)
> + Cost += TTI->getVectorInstrCost(Instruction::InsertElement, Ty, i);
> + return Cost;
> +}
> +
> +int FuncSLP::getGatherCost(ArrayRef<Value *> VL) {
> + // Find the type of the operands in VL.
> + Type *ScalarTy = VL[0]->getType();
> + if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
> + ScalarTy = SI->getValueOperand()->getType();
> + VectorType *VecTy = VectorType::get(ScalarTy, VL.size());
> + // Find the cost of inserting/extracting values from the vector.
> + return getGatherCost(VecTy);
> +}
> +
> +AliasAnalysis::Location FuncSLP::getLocation(Instruction *I) {
> + if (StoreInst *SI = dyn_cast<StoreInst>(I))
> + return AA->getLocation(SI);
> + if (LoadInst *LI = dyn_cast<LoadInst>(I))
> + return AA->getLocation(LI);
> + return AliasAnalysis::Location();
> +}
> +
> +Value *FuncSLP::getPointerOperand(Value *I) {
> + if (LoadInst *LI = dyn_cast<LoadInst>(I))
> + return LI->getPointerOperand();
> + if (StoreInst *SI = dyn_cast<StoreInst>(I))
> + return SI->getPointerOperand();
> + return 0;
> +}
> +
> +unsigned FuncSLP::getAddressSpaceOperand(Value *I) {
> + if (LoadInst *L = dyn_cast<LoadInst>(I))
> + return L->getPointerAddressSpace();
> + if (StoreInst *S = dyn_cast<StoreInst>(I))
> + return S->getPointerAddressSpace();
> + return -1;
> +}
> +
> +bool FuncSLP::isConsecutiveAccess(Value *A, Value *B) {
> + Value *PtrA = getPointerOperand(A);
> + Value *PtrB = getPointerOperand(B);
> + unsigned ASA = getAddressSpaceOperand(A);
> + unsigned ASB = getAddressSpaceOperand(B);
> +
> + // Check that the address spaces match and that the pointers are valid.
> + if (!PtrA || !PtrB || (ASA != ASB))
> + return false;
> +
> + // Check that A and B are of the same type.
> + if (PtrA->getType() != PtrB->getType())
> + return false;
> +
> + // Calculate the distance.
> + const SCEV *PtrSCEVA = SE->getSCEV(PtrA);
> + const SCEV *PtrSCEVB = SE->getSCEV(PtrB);
> + const SCEV *OffsetSCEV = SE->getMinusSCEV(PtrSCEVA, PtrSCEVB);
> + const SCEVConstant *ConstOffSCEV = dyn_cast<SCEVConstant>(OffsetSCEV);
> +
> + // Non constant distance.
> + if (!ConstOffSCEV)
> + return false;
> +
> + int64_t Offset = ConstOffSCEV->getValue()->getSExtValue();
> + Type *Ty = cast<PointerType>(PtrA->getType())->getElementType();
> + // The Instructions are connsecutive if the size of the first
> load/store is
> + // the same as the offset.
> + int64_t Sz = DL->getTypeStoreSize(Ty);
> + return ((-Offset) == Sz);
> +}
> +
> +Value *FuncSLP::getSinkBarrier(Instruction *Src, Instruction *Dst) {
> + assert(Src->getParent() == Dst->getParent() && "Not the same BB");
> + BasicBlock::iterator I = Src, E = Dst;
> + /// Scan all of the instruction from SRC to DST and check if
> + /// the source may alias.
> + for (++I; I != E; ++I) {
> + // Ignore store instructions that are marked as 'ignore'.
> + if (MemBarrierIgnoreList.count(I))
> + continue;
> + if (Src->mayWriteToMemory()) /* Write */ {
> + if (!I->mayReadOrWriteMemory())
> + continue;
> + } else /* Read */ {
> + if (!I->mayWriteToMemory())
> + continue;
> + }
> + AliasAnalysis::Location A = getLocation(&*I);
> + AliasAnalysis::Location B = getLocation(Src);
> +
> + if (!A.Ptr || !B.Ptr || AA->alias(A, B))
> + return I;
> + }
> + return 0;
> +}
> +
> +static BasicBlock *getSameBlock(ArrayRef<Value *> VL) {
> + BasicBlock *BB = 0;
> + for (int i = 0, e = VL.size(); i < e; i++) {
> + Instruction *I = dyn_cast<Instruction>(VL[i]);
> + if (!I)
> + return 0;
> +
> + if (!BB) {
> + BB = I->getParent();
> + continue;
> + }
> +
> + if (BB != I->getParent())
> + return 0;
> + }
> + return BB;
> +}
> +
> +static bool allConstant(ArrayRef<Value *> VL) {
> + for (unsigned i = 0, e = VL.size(); i < e; ++i)
> + if (!isa<Constant>(VL[i]))
> + return false;
> + return true;
> +}
> +
> +static bool isSplat(ArrayRef<Value *> VL) {
> + for (unsigned i = 1, e = VL.size(); i < e; ++i)
> + if (VL[i] != VL[0])
> + return false;
> + return true;
> +}
> +
> +static unsigned getSameOpcode(ArrayRef<Value *> VL) {
> + unsigned Opcode = 0;
> + for (int i = 0, e = VL.size(); i < e; i++) {
> + if (Instruction *I = dyn_cast<Instruction>(VL[i])) {
> + if (!Opcode) {
> + Opcode = I->getOpcode();
> + continue;
> + }
> + if (Opcode != I->getOpcode())
> + return 0;
> + }
> + }
> + return Opcode;
> +}
> +
> +static bool CanReuseExtract(ArrayRef<Value *> VL, unsigned VF,
> + VectorType *VecTy) {
> + assert(Instruction::ExtractElement == getSameOpcode(VL) && "Invalid
> opcode");
> + // Check if all of the extracts come from the same vector and from the
> + // correct offset.
> + Value *VL0 = VL[0];
> + ExtractElementInst *E0 = cast<ExtractElementInst>(VL0);
> + Value *Vec = E0->getOperand(0);
> +
> + // We have to extract from the same vector type.
> + if (Vec->getType() != VecTy)
> + return false;
> +
> + // Check that all of the indices extract from the correct offset.
> + ConstantInt *CI = dyn_cast<ConstantInt>(E0->getOperand(1));
> + if (!CI || CI->getZExtValue())
> + return false;
> +
> + for (unsigned i = 1, e = VF; i < e; ++i) {
> + ExtractElementInst *E = cast<ExtractElementInst>(VL[i]);
> + ConstantInt *CI = dyn_cast<ConstantInt>(E->getOperand(1));
> +
> + if (!CI || CI->getZExtValue() != i || E->getOperand(0) != Vec)
> + return false;
> + }
> +
> + return true;
> +}
> +
> +void FuncSLP::getTreeUses_rec(ArrayRef<Value *> VL, unsigned Depth) {
> + if (Depth == RecursionMaxDepth)
> + return MustGather.insert(VL.begin(), VL.end());
> +
> + // Don't handle vectors.
> + if (VL[0]->getType()->isVectorTy())
> + return;
> +
> + if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
> + if (SI->getValueOperand()->getType()->isVectorTy())
> + return;
> +
> + // If all of the operands are identical or constant we have a simple
> solution.
> + if (allConstant(VL) || isSplat(VL) || !getSameBlock(VL))
> + return MustGather.insert(VL.begin(), VL.end());
> +
> + // Stop the scan at unknown IR.
> + Instruction *VL0 = dyn_cast<Instruction>(VL[0]);
> + assert(VL0 && "Invalid instruction");
> +
> + // Mark instructions with multiple users.
> + for (unsigned i = 0, e = VL.size(); i < e; ++i) {
> + Instruction *I = dyn_cast<Instruction>(VL[i]);
> + // Remember to check if all of the users of this instruction are
> vectorized
> + // within our tree. At depth zero we have no local users, only
> external
> + // users that we don't care about.
> + if (Depth && I && I->getNumUses() > 1) {
> + DEBUG(dbgs() << "SLP: Adding to MultiUserVals "
> + "because it has multiple users:" << *I << " \n");
> + MultiUserVals.insert(I);
> + }
> + }
> +
> + // Check that the instruction is only used within one lane.
> + for (int i = 0, e = VL.size(); i < e; ++i) {
> + if (LaneMap.count(VL[i]) && LaneMap[VL[i]] != i) {
> + DEBUG(dbgs() << "SLP: Value used by multiple lanes:" << *VL[i] <<
> "\n");
> + return MustGather.insert(VL.begin(), VL.end());
> + }
> + // Make this instruction as 'seen' and remember the lane.
> + LaneMap[VL[i]] = i;
> + }
> +
> + unsigned Opcode = getSameOpcode(VL);
> + if (!Opcode)
> + return MustGather.insert(VL.begin(), VL.end());
> +
> + switch (Opcode) {
> + case Instruction::ExtractElement: {
> + VectorType *VecTy = VectorType::get(VL[0]->getType(), VL.size());
> + // No need to follow ExtractElements that are going to be optimized
> away.
> + if (CanReuseExtract(VL, VL.size(), VecTy))
> + return;
> + // Fall through.
> + }
> + case Instruction::Load:
> + return;
> + case Instruction::ZExt:
> + case Instruction::SExt:
> + case Instruction::FPToUI:
> + case Instruction::FPToSI:
> + case Instruction::FPExt:
> + case Instruction::PtrToInt:
> + case Instruction::IntToPtr:
> + case Instruction::SIToFP:
> + case Instruction::UIToFP:
> + case Instruction::Trunc:
> + case Instruction::FPTrunc:
> + case Instruction::BitCast:
> + case Instruction::Select:
> + case Instruction::ICmp:
> + case Instruction::FCmp:
> + case Instruction::Add:
> + case Instruction::FAdd:
> + case Instruction::Sub:
> + case Instruction::FSub:
> + case Instruction::Mul:
> + case Instruction::FMul:
> + case Instruction::UDiv:
> + case Instruction::SDiv:
> + case Instruction::FDiv:
> + case Instruction::URem:
> + case Instruction::SRem:
> + case Instruction::FRem:
> + case Instruction::Shl:
> + case Instruction::LShr:
> + case Instruction::AShr:
> + case Instruction::And:
> + case Instruction::Or:
> + case Instruction::Xor: {
> + for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
> + ValueList Operands;
> + // Prepare the operand vector.
> + for (unsigned j = 0; j < VL.size(); ++j)
> + Operands.push_back(cast<Instruction>(VL[j])->getOperand(i));
> +
> + getTreeUses_rec(Operands, Depth + 1);
> + }
> + return;
> + }
> + case Instruction::Store: {
> + ValueList Operands;
> + for (unsigned j = 0; j < VL.size(); ++j)
> + Operands.push_back(cast<Instruction>(VL[j])->getOperand(0));
> + getTreeUses_rec(Operands, Depth + 1);
> + return;
> + }
> + default:
> + return MustGather.insert(VL.begin(), VL.end());
> + }
> +}
> +
> +int FuncSLP::getLastIndex(ArrayRef<Value *> VL) {
> + BasicBlock *BB = cast<Instruction>(VL[0])->getParent();
> + assert(BB == getSameBlock(VL) && BlocksNumbers.count(BB) && "Invalid
> block");
> + BlockNumbering &BN = BlocksNumbers[BB];
> +
> + int MaxIdx = BN.getIndex(BB->getFirstNonPHI());
> + for (unsigned i = 0, e = VL.size(); i < e; ++i)
> + MaxIdx = std::max(MaxIdx, BN.getIndex(cast<Instruction>(VL[i])));
> + return MaxIdx;
> +}
> +
> +Instruction *FuncSLP::getLastInstruction(ArrayRef<Value *> VL) {
> + BasicBlock *BB = cast<Instruction>(VL[0])->getParent();
> + assert(BB == getSameBlock(VL) && BlocksNumbers.count(BB) && "Invalid
> block");
> + BlockNumbering &BN = BlocksNumbers[BB];
> +
> + int MaxIdx = BN.getIndex(cast<Instruction>(VL[0]));
> + for (unsigned i = 1, e = VL.size(); i < e; ++i)
> + MaxIdx = std::max(MaxIdx, BN.getIndex(cast<Instruction>(VL[i])));
> + return BN.getInstruction(MaxIdx);
> +}
> +
> +Instruction *FuncSLP::getInstructionForIndex(unsigned Index, BasicBlock
> *BB) {
> + BlockNumbering &BN = BlocksNumbers[BB];
> + return BN.getInstruction(Index);
> +}
> +
> +int FuncSLP::getFirstUserIndex(ArrayRef<Value *> VL) {
> + BasicBlock *BB = getSameBlock(VL);
> + BlockNumbering &BN = BlocksNumbers[BB];
> +
> + // Find the first user of the values.
> + int FirstUser = BN.getIndex(BB->getTerminator());
> + for (unsigned i = 0, e = VL.size(); i < e; ++i) {
> + for (Value::use_iterator U = VL[i]->use_begin(), UE =
> VL[i]->use_end();
> + U != UE; ++U) {
> + Instruction *Instr = dyn_cast<Instruction>(*U);
> +
> + if (!Instr || Instr->getParent() != BB)
> + continue;
> +
> + FirstUser = std::min(FirstUser, BN.getIndex(Instr));
> + }
> + }
> + return FirstUser;
> +}
> +
> +int FuncSLP::getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth) {
> + Type *ScalarTy = VL[0]->getType();
> +
> + if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
> + ScalarTy = SI->getValueOperand()->getType();
> +
> + /// Don't mess with vectors.
> + if (ScalarTy->isVectorTy())
> + return FuncSLP::MAX_COST;
> +
> + VectorType *VecTy = VectorType::get(ScalarTy, VL.size());
> +
> + if (allConstant(VL))
> + return 0;
> +
> + if (isSplat(VL))
> + return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy,
> 0);
> +
> + if (Depth == RecursionMaxDepth || needToGatherAny(VL))
> + return getGatherCost(VecTy);
> +
> + BasicBlock *BB = getSameBlock(VL);
> + unsigned Opcode = getSameOpcode(VL);
> + assert(Opcode && BB && "Invalid Instruction Value");
> +
> + // Check if it is safe to sink the loads or the stores.
> + if (Opcode == Instruction::Load || Opcode == Instruction::Store) {
> + int MaxIdx = getLastIndex(VL);
> + Instruction *Last = getInstructionForIndex(MaxIdx, BB);
> +
> + for (unsigned i = 0, e = VL.size(); i < e; ++i) {
> + if (VL[i] == Last)
> + continue;
> + Value *Barrier = getSinkBarrier(cast<Instruction>(VL[i]), Last);
> + if (Barrier) {
> + DEBUG(dbgs() << "SLP: Can't sink " << *VL[i] << "\n down to " <<
> *Last
> + << "\n because of " << *Barrier << "\n");
> + return MAX_COST;
> + }
> + }
> + }
> +
> + Instruction *VL0 = cast<Instruction>(VL[0]);
> + switch (Opcode) {
> + case Instruction::ExtractElement: {
> + if (CanReuseExtract(VL, VL.size(), VecTy))
> + return 0;
> + return getGatherCost(VecTy);
> + }
> + case Instruction::ZExt:
> + case Instruction::SExt:
> + case Instruction::FPToUI:
> + case Instruction::FPToSI:
> + case Instruction::FPExt:
> + case Instruction::PtrToInt:
> + case Instruction::IntToPtr:
> + case Instruction::SIToFP:
> + case Instruction::UIToFP:
> + case Instruction::Trunc:
> + case Instruction::FPTrunc:
> + case Instruction::BitCast: {
> + ValueList Operands;
> + Type *SrcTy = VL0->getOperand(0)->getType();
> + // Prepare the operand vector.
> + for (unsigned j = 0; j < VL.size(); ++j) {
> + Operands.push_back(cast<Instruction>(VL[j])->getOperand(0));
> + // Check that the casted type is the same for all users.
> + if (cast<Instruction>(VL[j])->getOperand(0)->getType() != SrcTy)
> + return getGatherCost(VecTy);
> + }
> +
> + int Cost = getTreeCost_rec(Operands, Depth + 1);
> + if (Cost == FuncSLP::MAX_COST)
> + return Cost;
> +
> + // Calculate the cost of this instruction.
> + int ScalarCost = VL.size() * TTI->getCastInstrCost(VL0->getOpcode(),
> + VL0->getType(),
> SrcTy);
> +
> + VectorType *SrcVecTy = VectorType::get(SrcTy, VL.size());
> + int VecCost = TTI->getCastInstrCost(VL0->getOpcode(), VecTy,
> SrcVecTy);
> + Cost += (VecCost - ScalarCost);
> + return Cost;
> + }
> + case Instruction::FCmp:
> + case Instruction::ICmp: {
> + // Check that all of the compares have the same predicate.
> + CmpInst::Predicate P0 = dyn_cast<CmpInst>(VL0)->getPredicate();
> + for (unsigned i = 1, e = VL.size(); i < e; ++i) {
> + CmpInst *Cmp = cast<CmpInst>(VL[i]);
> + if (Cmp->getPredicate() != P0)
> + return getGatherCost(VecTy);
> + }
> + // Fall through.
> + }
> + case Instruction::Select:
> + case Instruction::Add:
> + case Instruction::FAdd:
> + case Instruction::Sub:
> + case Instruction::FSub:
> + case Instruction::Mul:
> + case Instruction::FMul:
> + case Instruction::UDiv:
> + case Instruction::SDiv:
> + case Instruction::FDiv:
> + case Instruction::URem:
> + case Instruction::SRem:
> + case Instruction::FRem:
> + case Instruction::Shl:
> + case Instruction::LShr:
> + case Instruction::AShr:
> + case Instruction::And:
> + case Instruction::Or:
> + case Instruction::Xor: {
> + int TotalCost = 0;
> + // Calculate the cost of all of the operands.
> + for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
> + ValueList Operands;
> + // Prepare the operand vector.
> + for (unsigned j = 0; j < VL.size(); ++j)
> + Operands.push_back(cast<Instruction>(VL[j])->getOperand(i));
> +
> + int Cost = getTreeCost_rec(Operands, Depth + 1);
> + if (Cost == MAX_COST)
> + return MAX_COST;
> + TotalCost += TotalCost;
> + }
> +
> + // Calculate the cost of this instruction.
> + int ScalarCost = 0;
> + int VecCost = 0;
> + if (Opcode == Instruction::FCmp || Opcode == Instruction::ICmp ||
> + Opcode == Instruction::Select) {
> + VectorType *MaskTy = VectorType::get(Builder.getInt1Ty(),
> VL.size());
> + ScalarCost =
> + VecTy->getNumElements() *
> + TTI->getCmpSelInstrCost(Opcode, ScalarTy, Builder.getInt1Ty());
> + VecCost = TTI->getCmpSelInstrCost(Opcode, VecTy, MaskTy);
> + } else {
> + ScalarCost = VecTy->getNumElements() *
> + TTI->getArithmeticInstrCost(Opcode, ScalarTy);
> + VecCost = TTI->getArithmeticInstrCost(Opcode, VecTy);
> + }
> + TotalCost += (VecCost - ScalarCost);
> + return TotalCost;
> + }
> + case Instruction::Load: {
> + // If we are scalarize the loads, add the cost of forming the vector.
> + for (unsigned i = 0, e = VL.size() - 1; i < e; ++i)
> + if (!isConsecutiveAccess(VL[i], VL[i + 1]))
> + return getGatherCost(VecTy);
> +
> + // Cost of wide load - cost of scalar loads.
> + int ScalarLdCost = VecTy->getNumElements() *
> + TTI->getMemoryOpCost(Instruction::Load, ScalarTy,
> 1, 0);
> + int VecLdCost = TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1,
> 0);
> + return VecLdCost - ScalarLdCost;
> + }
> + case Instruction::Store: {
> + // We know that we can merge the stores. Calculate the cost.
> + int ScalarStCost = VecTy->getNumElements() *
> + TTI->getMemoryOpCost(Instruction::Store, ScalarTy,
> 1, 0);
> + int VecStCost = TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1,
> 0);
> + int StoreCost = VecStCost - ScalarStCost;
> +
> + ValueList Operands;
> + for (unsigned j = 0; j < VL.size(); ++j) {
> + Operands.push_back(cast<Instruction>(VL[j])->getOperand(0));
> + MemBarrierIgnoreList.insert(VL[j]);
> + }
> +
> + int Cost = getTreeCost_rec(Operands, Depth + 1);
> + if (Cost == MAX_COST)
> + return MAX_COST;
> +
> + int TotalCost = StoreCost + Cost;
> + return TotalCost;
> + }
> + default:
> + // Unable to vectorize unknown instructions.
> + return getGatherCost(VecTy);
> + }
> +}
> +
> +int FuncSLP::getTreeCost(ArrayRef<Value *> VL) {
> + // Get rid of the list of stores that were removed, and from the
> + // lists of instructions with multiple users.
> + MemBarrierIgnoreList.clear();
> + LaneMap.clear();
> + MultiUserVals.clear();
> + MustGather.clear();
> +
> + if (!getSameBlock(VL))
> + return MAX_COST;
> +
> + // Find the location of the last root.
> + int LastRootIndex = getLastIndex(VL);
> + int FirstUserIndex = getFirstUserIndex(VL);
> +
> + // Don't vectorize if there are users of the tree roots inside the tree
> + // itself.
> + if (LastRootIndex > FirstUserIndex)
> + return MAX_COST;
> +
> + // Scan the tree and find which value is used by which lane, and which
> values
> + // must be scalarized.
> + getTreeUses_rec(VL, 0);
> +
> + // Check that instructions with multiple users can be vectorized. Mark
> unsafe
> + // instructions.
> + for (SetVector<Value *>::iterator it = MultiUserVals.begin(),
> + e = MultiUserVals.end();
> + it != e; ++it) {
> + // Check that all of the users of this instr are within the tree.
> + for (Value::use_iterator I = (*it)->use_begin(), E = (*it)->use_end();
> + I != E; ++I) {
> + if (LaneMap.find(*I) == LaneMap.end()) {
> + DEBUG(dbgs() << "SLP: Adding to MustExtract "
> + "because of an out of tree usage.\n");
> + MustGather.insert(*it);
> + continue;
> + }
> + }
> + }
> +
> + // Now calculate the cost of vectorizing the tree.
> + return getTreeCost_rec(VL, 0);
> +}
> +bool FuncSLP::vectorizeStoreChain(ArrayRef<Value *> Chain, int
> CostThreshold) {
> + unsigned ChainLen = Chain.size();
> + DEBUG(dbgs() << "SLP: Analyzing a store chain of length " << ChainLen
> + << "\n");
> + Type *StoreTy = cast<StoreInst>(Chain[0])->getValueOperand()->getType();
> + unsigned Sz = DL->getTypeSizeInBits(StoreTy);
> + unsigned VF = MinVecRegSize / Sz;
> +
> + if (!isPowerOf2_32(Sz) || VF < 2)
> + return false;
> +
> + bool Changed = false;
> + // Look for profitable vectorizable trees at all offsets, starting at
> zero.
> + for (unsigned i = 0, e = ChainLen; i < e; ++i) {
> + if (i + VF > e)
> + break;
> + DEBUG(dbgs() << "SLP: Analyzing " << VF << " stores at offset " << i
> + << "\n");
> + ArrayRef<Value *> Operands = Chain.slice(i, VF);
> +
> + int Cost = getTreeCost(Operands);
> + if (Cost == FuncSLP::MAX_COST)
> + continue;
> + DEBUG(dbgs() << "SLP: Found cost=" << Cost << " for VF=" << VF <<
> "\n");
> + if (Cost < CostThreshold) {
> + DEBUG(dbgs() << "SLP: Decided to vectorize cost=" << Cost << "\n");
> + vectorizeTree(Operands);
> + i += VF - 1;
> + Changed = true;
> + }
> + }
> +
> + if (Changed || ChainLen > VF)
> + return Changed;
> +
> + // Handle short chains. This helps us catch types such as <3 x float>
> that
> + // are smaller than vector size.
> + int Cost = getTreeCost(Chain);
> + if (Cost == FuncSLP::MAX_COST)
> + return false;
> + if (Cost < CostThreshold) {
> + DEBUG(dbgs() << "SLP: Found store chain cost = " << Cost
> + << " for size = " << ChainLen << "\n");
> + vectorizeTree(Chain);
> + return true;
> + }
> +
> + return false;
> +}
> +
> +bool FuncSLP::vectorizeStores(ArrayRef<StoreInst *> Stores, int
> costThreshold) {
> + SetVector<Value *> Heads, Tails;
> + SmallDenseMap<Value *, Value *> ConsecutiveChain;
> +
> + // We may run into multiple chains that merge into a single chain. We
> mark the
> + // stores that we vectorized so that we don't visit the same store
> twice.
> + ValueSet VectorizedStores;
> + bool Changed = false;
> +
> + // Do a quadratic search on all of the given stores and find
> + // all of the pairs of loads that follow each other.
> + for (unsigned i = 0, e = Stores.size(); i < e; ++i)
> + for (unsigned j = 0; j < e; ++j) {
> + if (i == j)
> + continue;
> +
> + if (isConsecutiveAccess(Stores[i], Stores[j])) {
> + Tails.insert(Stores[j]);
> + Heads.insert(Stores[i]);
> + ConsecutiveChain[Stores[i]] = Stores[j];
> + }
> + }
> +
> + // For stores that start but don't end a link in the chain:
> + for (SetVector<Value *>::iterator it = Heads.begin(), e = Heads.end();
> + it != e; ++it) {
> + if (Tails.count(*it))
> + continue;
> +
> + // We found a store instr that starts a chain. Now follow the chain
> and try
> + // to vectorize it.
> + ValueList Operands;
> + Value *I = *it;
> + // Collect the chain into a list.
> + while (Tails.count(I) || Heads.count(I)) {
> + if (VectorizedStores.count(I))
> + break;
> + Operands.push_back(I);
> + // Move to the next value in the chain.
> + I = ConsecutiveChain[I];
> + }
> +
> + bool Vectorized = vectorizeStoreChain(Operands, costThreshold);
> +
> + // Mark the vectorized stores so that we don't vectorize them again.
> + if (Vectorized)
> + VectorizedStores.insert(Operands.begin(), Operands.end());
> + Changed |= Vectorized;
> + }
> +
> + return Changed;
> +}
> +
> +Value *FuncSLP::Gather(ArrayRef<Value *> VL, VectorType *Ty) {
> + Value *Vec = UndefValue::get(Ty);
> + // Generate the 'InsertElement' instruction.
> + for (unsigned i = 0; i < Ty->getNumElements(); ++i) {
> + Vec = Builder.CreateInsertElement(Vec, VL[i], Builder.getInt32(i));
> + if (Instruction *I = dyn_cast<Instruction>(Vec))
> + GatherSeq.insert(I);
> + }
> +
> + VectorizedValues[VL[0]] = Vec;
> + return Vec;
> +}
> +
> +Value *FuncSLP::vectorizeTree_rec(ArrayRef<Value *> VL) {
> + BuilderLocGuard Guard(Builder);
> +
> + Type *ScalarTy = VL[0]->getType();
> + if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
> + ScalarTy = SI->getValueOperand()->getType();
> + VectorType *VecTy = VectorType::get(ScalarTy, VL.size());
> +
> + if (needToGatherAny(VL))
> + return Gather(VL, VecTy);
> +
> + if (VectorizedValues.count(VL[0])) {
> + DEBUG(dbgs() << "SLP: Diamond merged at depth.\n");
> + return VectorizedValues[VL[0]];
> + }
> +
> + Instruction *VL0 = cast<Instruction>(VL[0]);
> + unsigned Opcode = VL0->getOpcode();
> + assert(Opcode == getSameOpcode(VL) && "Invalid opcode");
> +
> + switch (Opcode) {
> + case Instruction::ExtractElement: {
> + if (CanReuseExtract(VL, VL.size(), VecTy))
> + return VL0->getOperand(0);
> + return Gather(VL, VecTy);
> + }
> + case Instruction::ZExt:
> + case Instruction::SExt:
> + case Instruction::FPToUI:
> + case Instruction::FPToSI:
> + case Instruction::FPExt:
> + case Instruction::PtrToInt:
> + case Instruction::IntToPtr:
> + case Instruction::SIToFP:
> + case Instruction::UIToFP:
> + case Instruction::Trunc:
> + case Instruction::FPTrunc:
> + case Instruction::BitCast: {
> + ValueList INVL;
> + for (int i = 0, e = VL.size(); i < e; ++i)
> + INVL.push_back(cast<Instruction>(VL[i])->getOperand(0));
> +
> + Builder.SetInsertPoint(getLastInstruction(VL));
> + Value *InVec = vectorizeTree_rec(INVL);
> + CastInst *CI = dyn_cast<CastInst>(VL0);
> + Value *V = Builder.CreateCast(CI->getOpcode(), InVec, VecTy);
> + VectorizedValues[VL0] = V;
> + return V;
> + }
> + case Instruction::FCmp:
> + case Instruction::ICmp: {
> + // Check that all of the compares have the same predicate.
> + CmpInst::Predicate P0 = dyn_cast<CmpInst>(VL0)->getPredicate();
> + for (unsigned i = 1, e = VL.size(); i < e; ++i) {
> + CmpInst *Cmp = cast<CmpInst>(VL[i]);
> + if (Cmp->getPredicate() != P0)
> + return Gather(VL, VecTy);
> + }
> +
> + ValueList LHSV, RHSV;
> + for (int i = 0, e = VL.size(); i < e; ++i) {
> + LHSV.push_back(cast<Instruction>(VL[i])->getOperand(0));
> + RHSV.push_back(cast<Instruction>(VL[i])->getOperand(1));
> + }
> +
> + Builder.SetInsertPoint(getLastInstruction(VL));
> + Value *L = vectorizeTree_rec(LHSV);
> + Value *R = vectorizeTree_rec(RHSV);
> + Value *V;
> +
> + if (Opcode == Instruction::FCmp)
> + V = Builder.CreateFCmp(P0, L, R);
> + else
> + V = Builder.CreateICmp(P0, L, R);
> +
> + VectorizedValues[VL0] = V;
> + return V;
> + }
> + case Instruction::Select: {
> + ValueList TrueVec, FalseVec, CondVec;
> + for (int i = 0, e = VL.size(); i < e; ++i) {
> + CondVec.push_back(cast<Instruction>(VL[i])->getOperand(0));
> + TrueVec.push_back(cast<Instruction>(VL[i])->getOperand(1));
> + FalseVec.push_back(cast<Instruction>(VL[i])->getOperand(2));
> + }
> +
> + Builder.SetInsertPoint(getLastInstruction(VL));
> + Value *True = vectorizeTree_rec(TrueVec);
> + Value *False = vectorizeTree_rec(FalseVec);
> + Value *Cond = vectorizeTree_rec(CondVec);
> + Value *V = Builder.CreateSelect(Cond, True, False);
> + VectorizedValues[VL0] = V;
> + return V;
> + }
> + case Instruction::Add:
> + case Instruction::FAdd:
> + case Instruction::Sub:
> + case Instruction::FSub:
> + case Instruction::Mul:
> + case Instruction::FMul:
> + case Instruction::UDiv:
> + case Instruction::SDiv:
> + case Instruction::FDiv:
> + case Instruction::URem:
> + case Instruction::SRem:
> + case Instruction::FRem:
> + case Instruction::Shl:
> + case Instruction::LShr:
> + case Instruction::AShr:
> + case Instruction::And:
> + case Instruction::Or:
> + case Instruction::Xor: {
> + ValueList LHSVL, RHSVL;
> + for (int i = 0, e = VL.size(); i < e; ++i) {
> + LHSVL.push_back(cast<Instruction>(VL[i])->getOperand(0));
> + RHSVL.push_back(cast<Instruction>(VL[i])->getOperand(1));
> + }
> +
> + Builder.SetInsertPoint(getLastInstruction(VL));
> + Value *LHS = vectorizeTree_rec(LHSVL);
> + Value *RHS = vectorizeTree_rec(RHSVL);
> +
> + if (LHS == RHS) {
> + assert((VL0->getOperand(0) == VL0->getOperand(1)) && "Invalid
> order");
> + }
> +
> + BinaryOperator *BinOp = cast<BinaryOperator>(VL0);
> + Value *V = Builder.CreateBinOp(BinOp->getOpcode(), LHS, RHS);
> + VectorizedValues[VL0] = V;
> + return V;
> + }
> + case Instruction::Load: {
> + // Check if all of the loads are consecutive.
> + for (unsigned i = 1, e = VL.size(); i < e; ++i)
> + if (!isConsecutiveAccess(VL[i - 1], VL[i]))
> + return Gather(VL, VecTy);
> +
> + // Loads are inserted at the head of the tree because we don't want to
> + // sink them all the way down past store instructions.
> + Builder.SetInsertPoint(getLastInstruction(VL));
> + LoadInst *LI = cast<LoadInst>(VL0);
> + Value *VecPtr =
> + Builder.CreateBitCast(LI->getPointerOperand(),
> VecTy->getPointerTo());
> + unsigned Alignment = LI->getAlignment();
> + LI = Builder.CreateLoad(VecPtr);
> + LI->setAlignment(Alignment);
> +
> + VectorizedValues[VL0] = LI;
> + return LI;
> + }
> + case Instruction::Store: {
> + StoreInst *SI = cast<StoreInst>(VL0);
> + unsigned Alignment = SI->getAlignment();
> +
> + ValueList ValueOp;
> + for (int i = 0, e = VL.size(); i < e; ++i)
> + ValueOp.push_back(cast<StoreInst>(VL[i])->getValueOperand());
> +
> + Value *VecValue = vectorizeTree_rec(ValueOp);
> +
> + Builder.SetInsertPoint(getLastInstruction(VL));
> + Value *VecPtr =
> + Builder.CreateBitCast(SI->getPointerOperand(),
> VecTy->getPointerTo());
> + Builder.CreateStore(VecValue, VecPtr)->setAlignment(Alignment);
> +
> + for (int i = 0, e = VL.size(); i < e; ++i)
> + cast<Instruction>(VL[i])->eraseFromParent();
> + return 0;
> + }
> + default:
> + return Gather(VL, VecTy);
> + }
> +}
> +
> +Value *FuncSLP::vectorizeTree(ArrayRef<Value *> VL) {
> + Builder.SetInsertPoint(getLastInstruction(VL));
> + Value *V = vectorizeTree_rec(VL);
> +
> + // We moved some instructions around. We have to number them again
> + // before we can do any analysis.
> + MustGather.clear();
> + VectorizedValues.clear();
> + MemBarrierIgnoreList.clear();
> + for (Function::iterator it = F->begin(), e = F->end(); it != e; ++it)
> + BlocksNumbers[it].forget();
> + return V;
> +}
> +
> +Value *FuncSLP::vectorizeArith(ArrayRef<Value *> Operands) {
> + Value *Vec = vectorizeTree(Operands);
> + // After vectorizing the operands we need to generate extractelement
> + // instructions and replace all of the uses of the scalar values with
> + // the values that we extracted from the vectorized tree.
> + for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
> + Value *S = Builder.CreateExtractElement(Vec, Builder.getInt32(i));
> + Operands[i]->replaceAllUsesWith(S);
> + }
> +
> + return Vec;
> +}
> +
> +void FuncSLP::hoistGatherSequence() {
> + for (SetVector<Instruction *>::iterator it = GatherSeq.begin(),
> + e = GatherSeq.end();
> + it != e; ++it) {
> + InsertElementInst *Insert = dyn_cast_or_null<InsertElementInst>(*it);
> +
> + // The InsertElement sequence can be simplified into a constant.
> + // Also Ignore NULL pointers because they are only here to separate
> + // sequences.
> + if (!Insert)
> + continue;
> +
> + BasicBlock *BB = Insert->getParent();
> +
> + // Check if this block is inside a loop.
> + Loop *L = LI->getLoopFor(BB);
> + if (!L)
> + return;
> +
> + // Check if it has a preheader.
> + BasicBlock *PreHeader = L->getLoopPreheader();
> + if (!PreHeader)
> + return;
> +
> + // If the vector or the element that we insert into it are
> + // instructions that are defined in this basic block then we can't
> + // hoist this instruction.
> + Instruction *CurrVec = dyn_cast<Instruction>(Insert->getOperand(0));
> + Instruction *NewElem = dyn_cast<Instruction>(Insert->getOperand(1));
> + if (CurrVec && L->contains(CurrVec))
> + continue;
> + if (NewElem && L->contains(NewElem))
> + continue;
> +
> + // Mark the insertion point for the block.
> + Instruction *Location = PreHeader->getTerminator();
> + // We can hoist this instruction. Move it to the pre-header.
> + Insert->moveBefore(Location);
> + }
> +}
> +
> /// The SLPVectorizer Pass.
> struct SLPVectorizer : public FunctionPass {
> - typedef MapVector<Value *, BoUpSLP::StoreList> StoreListMap;
> + typedef SmallVector<StoreInst *, 8> StoreList;
> + typedef MapVector<Value *, StoreList> StoreListMap;
>
> /// Pass identification, replacement for typeid
> static char ID;
> @@ -80,34 +1213,26 @@ struct SLPVectorizer : public FunctionPa
>
> DEBUG(dbgs() << "SLP: Analyzing blocks in " << F.getName() << ".\n");
>
> + // Use the bollom up slp vectorizer to construct chains that start
> with
> + // he store instructions.
> + FuncSLP R(&F, SE, DL, TTI, AA, LI);
> +
> for (Function::iterator it = F.begin(), e = F.end(); it != e; ++it) {
> BasicBlock *BB = it;
> - bool BBChanged = false;
> -
> - // Use the bollom up slp vectorizer to construct chains that start
> with
> - // he store instructions.
> - BoUpSLP R(BB, SE, DL, TTI, AA, LI->getLoopFor(BB));
>
> // Vectorize trees that end at reductions.
> - BBChanged |= vectorizeChainsInBlock(BB, R);
> + Changed |= vectorizeChainsInBlock(BB, R);
>
> // Vectorize trees that end at stores.
> if (unsigned count = collectStores(BB, R)) {
> (void)count;
> DEBUG(dbgs() << "SLP: Found " << count << " stores to
> vectorize.\n");
> - BBChanged |= vectorizeStoreChains(R);
> + Changed |= vectorizeStoreChains(R);
> }
> -
> - // Try to hoist some of the scalarization code to the preheader.
> - if (BBChanged) {
> - hoistGatherSequence(LI, BB, R);
> - Changed |=
> vectorizeUsingGatherHints(R.getGatherSeqInstructions());
> - }
> -
> - Changed |= BBChanged;
> }
>
> if (Changed) {
> + R.hoistGatherSequence();
> DEBUG(dbgs() << "SLP: vectorized \"" << F.getName() << "\"\n");
> DEBUG(verifyFunction(F));
> }
> @@ -128,42 +1253,31 @@ private:
> /// object. We sort the stores to their base objects to reduce the cost
> of the
> /// quadratic search on the stores. TODO: We can further reduce this
> cost
> /// if we flush the chain creation every time we run into a memory
> barrier.
> - unsigned collectStores(BasicBlock *BB, BoUpSLP &R);
> + unsigned collectStores(BasicBlock *BB, FuncSLP &R);
>
> /// \brief Try to vectorize a chain that starts at two arithmetic
> instrs.
> - bool tryToVectorizePair(Value *A, Value *B, BoUpSLP &R);
> + bool tryToVectorizePair(Value *A, Value *B, FuncSLP &R);
>
> /// \brief Try to vectorize a list of operands. If \p NeedExtracts is
> true
> /// then we calculate the cost of extracting the scalars from the
> vector.
> /// \returns true if a value was vectorized.
> - bool tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R, bool
> NeedExtracts);
> + bool tryToVectorizeList(ArrayRef<Value *> VL, FuncSLP &R, bool
> NeedExtracts);
>
> /// \brief Try to vectorize a chain that may start at the operands of
> \V;
> - bool tryToVectorize(BinaryOperator *V, BoUpSLP &R);
> + bool tryToVectorize(BinaryOperator *V, FuncSLP &R);
>
> /// \brief Vectorize the stores that were collected in StoreRefs.
> - bool vectorizeStoreChains(BoUpSLP &R);
> -
> - /// \brief Try to hoist gather sequences outside of the loop in cases
> where
> - /// all of the sources are loop invariant.
> - void hoistGatherSequence(LoopInfo *LI, BasicBlock *BB, BoUpSLP &R);
> -
> - /// \brief Try to vectorize additional sequences in different basic
> blocks
> - /// based on values that we gathered in previous blocks. The list \p
> Gathers
> - /// holds the gather InsertElement instructions that were generated
> during
> - /// vectorization.
> - /// \returns True if some code was vectorized.
> - bool vectorizeUsingGatherHints(BoUpSLP::InstrList &Gathers);
> + bool vectorizeStoreChains(FuncSLP &R);
>
> /// \brief Scan the basic block and look for patterns that are likely
> to start
> /// a vectorization chain.
> - bool vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R);
> + bool vectorizeChainsInBlock(BasicBlock *BB, FuncSLP &R);
>
> private:
> StoreListMap StoreRefs;
> };
>
> -unsigned SLPVectorizer::collectStores(BasicBlock *BB, BoUpSLP &R) {
> +unsigned SLPVectorizer::collectStores(BasicBlock *BB, FuncSLP &R) {
> unsigned count = 0;
> StoreRefs.clear();
> for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e;
> ++it) {
> @@ -188,14 +1302,14 @@ unsigned SLPVectorizer::collectStores(Ba
> return count;
> }
>
> -bool SLPVectorizer::tryToVectorizePair(Value *A, Value *B, BoUpSLP &R) {
> +bool SLPVectorizer::tryToVectorizePair(Value *A, Value *B, FuncSLP &R) {
> if (!A || !B)
> return false;
> Value *VL[] = { A, B };
> return tryToVectorizeList(VL, R, true);
> }
>
> -bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
> +bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, FuncSLP &R,
> bool NeedExtracts) {
> if (VL.size() < 2)
> return false;
> @@ -219,7 +1333,10 @@ bool SLPVectorizer::tryToVectorizeList(A
> }
>
> int Cost = R.getTreeCost(VL);
> - int ExtrCost = NeedExtracts ? R.getScalarizationCost(VL) : 0;
> + if (Cost == FuncSLP::MAX_COST)
> + return false;
> +
> + int ExtrCost = NeedExtracts ? R.getGatherCost(VL) : 0;
> DEBUG(dbgs() << "SLP: Cost of pair:" << Cost
> << " Cost of extract:" << ExtrCost << ".\n");
> if ((Cost + ExtrCost) >= -SLPCostThreshold)
> @@ -229,10 +1346,10 @@ bool SLPVectorizer::tryToVectorizeList(A
> return true;
> }
>
> -bool SLPVectorizer::tryToVectorize(BinaryOperator *V, BoUpSLP &R) {
> +bool SLPVectorizer::tryToVectorize(BinaryOperator *V, FuncSLP &R) {
> if (!V)
> return false;
> -
> +
> // Try to vectorize V.
> if (tryToVectorizePair(V->getOperand(0), V->getOperand(1), R))
> return true;
> @@ -269,7 +1386,7 @@ bool SLPVectorizer::tryToVectorize(Binar
> return 0;
> }
>
> -bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
> +bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, FuncSLP &R) {
> bool Changed = false;
> for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e;
> ++it) {
> if (isa<DbgInfoIntrinsic>(it))
> @@ -292,7 +1409,7 @@ bool SLPVectorizer::vectorizeChainsInBlo
> Value *Inst = BI->getOperand(0);
> if (Inst == P)
> Inst = BI->getOperand(1);
> -
> +
> Changed |= tryToVectorize(dyn_cast<BinaryOperator>(Inst), R);
> continue;
> }
> @@ -337,7 +1454,7 @@ bool SLPVectorizer::vectorizeChainsInBlo
> return Changed;
> }
>
> -bool SLPVectorizer::vectorizeStoreChains(BoUpSLP &R) {
> +bool SLPVectorizer::vectorizeStoreChains(FuncSLP &R) {
> bool Changed = false;
> // Attempt to sort and vectorize each of the store-groups.
> for (StoreListMap::iterator it = StoreRefs.begin(), e = StoreRefs.end();
> @@ -353,92 +1470,6 @@ bool SLPVectorizer::vectorizeStoreChains
> return Changed;
> }
>
> -bool SLPVectorizer::vectorizeUsingGatherHints(BoUpSLP::InstrList
> &Gathers) {
> - SmallVector<Value *, 4> Seq;
> - bool Changed = false;
> - for (int i = 0, e = Gathers.size(); i < e; ++i) {
> - InsertElementInst *IEI =
> dyn_cast_or_null<InsertElementInst>(Gathers[i]);
> -
> - if (IEI) {
> - if (Instruction *I = dyn_cast<Instruction>(IEI->getOperand(1)))
> - Seq.push_back(I);
> - } else {
> -
> - if (!Seq.size())
> - continue;
> -
> - Instruction *I = cast<Instruction>(Seq[0]);
> - BasicBlock *BB = I->getParent();
> -
> - DEBUG(dbgs() << "SLP: Inspecting a gather list of size " <<
> Seq.size()
> - << " in " << BB->getName() << ".\n");
> -
> - // Check if the gathered values have multiple uses. If they only
> have one
> - // user then we know that the insert/extract pair will go away.
> - bool HasMultipleUsers = false;
> - for (int i = 0; e = Seq.size(), i < e; ++i) {
> - if (!Seq[i]->hasOneUse()) {
> - HasMultipleUsers = true;
> - break;
> - }
> - }
> -
> - BoUpSLP BO(BB, SE, DL, TTI, AA, LI->getLoopFor(BB));
> -
> - if (tryToVectorizeList(Seq, BO, HasMultipleUsers)) {
> - DEBUG(dbgs() << "SLP: Vectorized a gather list of len " <<
> Seq.size()
> - << " in " << BB->getName() << ".\n");
> - Changed = true;
> - }
> -
> - Seq.clear();
> - }
> - }
> -
> - return Changed;
> -}
> -
> -void SLPVectorizer::hoistGatherSequence(LoopInfo *LI, BasicBlock *BB,
> - BoUpSLP &R) {
> - // Check if this block is inside a loop.
> - Loop *L = LI->getLoopFor(BB);
> - if (!L)
> - return;
> -
> - // Check if it has a preheader.
> - BasicBlock *PreHeader = L->getLoopPreheader();
> - if (!PreHeader)
> - return;
> -
> - // Mark the insertion point for the block.
> - Instruction *Location = PreHeader->getTerminator();
> -
> - BoUpSLP::InstrList &Gathers = R.getGatherSeqInstructions();
> - for (BoUpSLP::InstrList::iterator it = Gathers.begin(), e =
> Gathers.end();
> - it != e; ++it) {
> - InsertElementInst *Insert = dyn_cast_or_null<InsertElementInst>(*it);
> -
> - // The InsertElement sequence can be simplified into a constant.
> - // Also Ignore NULL pointers because they are only here to separate
> - // sequences.
> - if (!Insert)
> - continue;
> -
> - // If the vector or the element that we insert into it are
> - // instructions that are defined in this basic block then we can't
> - // hoist this instruction.
> - Instruction *CurrVec = dyn_cast<Instruction>(Insert->getOperand(0));
> - Instruction *NewElem = dyn_cast<Instruction>(Insert->getOperand(1));
> - if (CurrVec && L->contains(CurrVec))
> - continue;
> - if (NewElem && L->contains(NewElem))
> - continue;
> -
> - // We can hoist this instruction. Move it to the pre-header.
> - Insert->moveBefore(Location);
> - }
> -}
> -
> } // end anonymous namespace
>
> char SLPVectorizer::ID = 0;
>
> Removed: llvm/trunk/lib/Transforms/Vectorize/VecUtils.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/VecUtils.cpp?rev=184646&view=auto
>
> ==============================================================================
> --- llvm/trunk/lib/Transforms/Vectorize/VecUtils.cpp (original)
> +++ llvm/trunk/lib/Transforms/Vectorize/VecUtils.cpp (removed)
> @@ -1,1031 +0,0 @@
> -//===- VecUtils.cpp --- Vectorization Utilities
> ---------------------------===//
> -//
> -// The LLVM Compiler Infrastructure
> -//
> -// This file is distributed under the University of Illinois Open Source
> -// License. See LICENSE.TXT for details.
> -//
>
> -//===----------------------------------------------------------------------===//
> -#define DEBUG_TYPE "SLP"
> -
> -#include "VecUtils.h"
> -#include "llvm/ADT/DenseMap.h"
> -#include "llvm/ADT/SmallPtrSet.h"
> -#include "llvm/ADT/SmallSet.h"
> -#include "llvm/ADT/SmallVector.h"
> -#include "llvm/Analysis/AliasAnalysis.h"
> -#include "llvm/Analysis/ScalarEvolution.h"
> -#include "llvm/Analysis/ScalarEvolutionExpressions.h"
> -#include "llvm/Analysis/TargetTransformInfo.h"
> -#include "llvm/Analysis/Verifier.h"
> -#include "llvm/Analysis/LoopInfo.h"
> -#include "llvm/IR/Constants.h"
> -#include "llvm/IR/DataLayout.h"
> -#include "llvm/IR/Function.h"
> -#include "llvm/IR/Instructions.h"
> -#include "llvm/IR/Module.h"
> -#include "llvm/IR/Type.h"
> -#include "llvm/IR/Value.h"
> -#include "llvm/Pass.h"
> -#include "llvm/Support/CommandLine.h"
> -#include "llvm/Support/Debug.h"
> -#include "llvm/Support/raw_ostream.h"
> -#include "llvm/Target/TargetLibraryInfo.h"
> -#include "llvm/Transforms/Scalar.h"
> -#include "llvm/Transforms/Utils/Local.h"
> -#include <algorithm>
> -#include <map>
> -
> -using namespace llvm;
> -
> -static const unsigned MinVecRegSize = 128;
> -
> -static const unsigned RecursionMaxDepth = 6;
> -
> -namespace llvm {
> -
> -BoUpSLP::BoUpSLP(BasicBlock *Bb, ScalarEvolution *S, DataLayout *Dl,
> - TargetTransformInfo *Tti, AliasAnalysis *Aa, Loop *Lp)
> - : Builder(S->getContext()), BB(Bb), SE(S), DL(Dl), TTI(Tti), AA(Aa),
> L(Lp) {
> - numberInstructions();
> -}
> -
> -void BoUpSLP::numberInstructions() {
> - int Loc = 0;
> - InstrIdx.clear();
> - InstrVec.clear();
> - // Number the instructions in the block.
> - for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e;
> ++it) {
> - InstrIdx[it] = Loc++;
> - InstrVec.push_back(it);
> - assert(InstrVec[InstrIdx[it]] == it && "Invalid allocation");
> - }
> -}
> -
> -Value *BoUpSLP::getPointerOperand(Value *I) {
> - if (LoadInst *LI = dyn_cast<LoadInst>(I))
> - return LI->getPointerOperand();
> - if (StoreInst *SI = dyn_cast<StoreInst>(I))
> - return SI->getPointerOperand();
> - return 0;
> -}
> -
> -unsigned BoUpSLP::getAddressSpaceOperand(Value *I) {
> - if (LoadInst *L = dyn_cast<LoadInst>(I))
> - return L->getPointerAddressSpace();
> - if (StoreInst *S = dyn_cast<StoreInst>(I))
> - return S->getPointerAddressSpace();
> - return -1;
> -}
> -
> -bool BoUpSLP::isConsecutiveAccess(Value *A, Value *B) {
> - Value *PtrA = getPointerOperand(A);
> - Value *PtrB = getPointerOperand(B);
> - unsigned ASA = getAddressSpaceOperand(A);
> - unsigned ASB = getAddressSpaceOperand(B);
> -
> - // Check that the address spaces match and that the pointers are valid.
> - if (!PtrA || !PtrB || (ASA != ASB))
> - return false;
> -
> - // Check that A and B are of the same type.
> - if (PtrA->getType() != PtrB->getType())
> - return false;
> -
> - // Calculate the distance.
> - const SCEV *PtrSCEVA = SE->getSCEV(PtrA);
> - const SCEV *PtrSCEVB = SE->getSCEV(PtrB);
> - const SCEV *OffsetSCEV = SE->getMinusSCEV(PtrSCEVA, PtrSCEVB);
> - const SCEVConstant *ConstOffSCEV = dyn_cast<SCEVConstant>(OffsetSCEV);
> -
> - // Non constant distance.
> - if (!ConstOffSCEV)
> - return false;
> -
> - int64_t Offset = ConstOffSCEV->getValue()->getSExtValue();
> - Type *Ty = cast<PointerType>(PtrA->getType())->getElementType();
> - // The Instructions are connsecutive if the size of the first
> load/store is
> - // the same as the offset.
> - int64_t Sz = DL->getTypeStoreSize(Ty);
> - return ((-Offset) == Sz);
> -}
> -
> -bool BoUpSLP::vectorizeStoreChain(ArrayRef<Value *> Chain, int
> CostThreshold) {
> - unsigned ChainLen = Chain.size();
> - DEBUG(dbgs() << "SLP: Analyzing a store chain of length " << ChainLen
> - << "\n");
> - Type *StoreTy = cast<StoreInst>(Chain[0])->getValueOperand()->getType();
> - unsigned Sz = DL->getTypeSizeInBits(StoreTy);
> - unsigned VF = MinVecRegSize / Sz;
> -
> - if (!isPowerOf2_32(Sz) || VF < 2)
> - return false;
> -
> - bool Changed = false;
> - // Look for profitable vectorizable trees at all offsets, starting at
> zero.
> - for (unsigned i = 0, e = ChainLen; i < e; ++i) {
> - if (i + VF > e)
> - break;
> - DEBUG(dbgs() << "SLP: Analyzing " << VF << " stores at offset " << i
> - << "\n");
> - ArrayRef<Value *> Operands = Chain.slice(i, VF);
> -
> - int Cost = getTreeCost(Operands);
> - DEBUG(dbgs() << "SLP: Found cost=" << Cost << " for VF=" << VF <<
> "\n");
> - if (Cost < CostThreshold) {
> - DEBUG(dbgs() << "SLP: Decided to vectorize cost=" << Cost << "\n");
> - Builder.SetInsertPoint(getInsertionPoint(getLastIndex(Operands,
> VF)));
> - vectorizeTree(Operands, VF);
> - i += VF - 1;
> - Changed = true;
> - }
> - }
> -
> - if (Changed || ChainLen > VF)
> - return Changed;
> -
> - // Handle short chains. This helps us catch types such as <3 x float>
> that
> - // are smaller than vector size.
> - int Cost = getTreeCost(Chain);
> - if (Cost < CostThreshold) {
> - DEBUG(dbgs() << "SLP: Found store chain cost = " << Cost
> - << " for size = " << ChainLen << "\n");
> - Builder.SetInsertPoint(getInsertionPoint(getLastIndex(Chain,
> ChainLen)));
> - vectorizeTree(Chain, ChainLen);
> - return true;
> - }
> -
> - return false;
> -}
> -
> -bool BoUpSLP::vectorizeStores(ArrayRef<StoreInst *> Stores, int
> costThreshold) {
> - SetVector<Value *> Heads, Tails;
> - SmallDenseMap<Value *, Value *> ConsecutiveChain;
> -
> - // We may run into multiple chains that merge into a single chain. We
> mark the
> - // stores that we vectorized so that we don't visit the same store
> twice.
> - ValueSet VectorizedStores;
> - bool Changed = false;
> -
> - // Do a quadratic search on all of the given stores and find
> - // all of the pairs of loads that follow each other.
> - for (unsigned i = 0, e = Stores.size(); i < e; ++i)
> - for (unsigned j = 0; j < e; ++j) {
> - if (i == j)
> - continue;
> -
> - if (isConsecutiveAccess(Stores[i], Stores[j])) {
> - Tails.insert(Stores[j]);
> - Heads.insert(Stores[i]);
> - ConsecutiveChain[Stores[i]] = Stores[j];
> - }
> - }
> -
> - // For stores that start but don't end a link in the chain:
> - for (SetVector<Value *>::iterator it = Heads.begin(), e = Heads.end();
> - it != e; ++it) {
> - if (Tails.count(*it))
> - continue;
> -
> - // We found a store instr that starts a chain. Now follow the chain
> and try
> - // to vectorize it.
> - ValueList Operands;
> - Value *I = *it;
> - // Collect the chain into a list.
> - while (Tails.count(I) || Heads.count(I)) {
> - if (VectorizedStores.count(I))
> - break;
> - Operands.push_back(I);
> - // Move to the next value in the chain.
> - I = ConsecutiveChain[I];
> - }
> -
> - bool Vectorized = vectorizeStoreChain(Operands, costThreshold);
> -
> - // Mark the vectorized stores so that we don't vectorize them again.
> - if (Vectorized)
> - VectorizedStores.insert(Operands.begin(), Operands.end());
> - Changed |= Vectorized;
> - }
> -
> - return Changed;
> -}
> -
> -int BoUpSLP::getScalarizationCost(ArrayRef<Value *> VL) {
> - // Find the type of the operands in VL.
> - Type *ScalarTy = VL[0]->getType();
> - if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
> - ScalarTy = SI->getValueOperand()->getType();
> - VectorType *VecTy = VectorType::get(ScalarTy, VL.size());
> - // Find the cost of inserting/extracting values from the vector.
> - return getScalarizationCost(VecTy);
> -}
> -
> -int BoUpSLP::getScalarizationCost(Type *Ty) {
> - int Cost = 0;
> - for (unsigned i = 0, e = cast<VectorType>(Ty)->getNumElements(); i < e;
> ++i)
> - Cost += TTI->getVectorInstrCost(Instruction::InsertElement, Ty, i);
> - return Cost;
> -}
> -
> -AliasAnalysis::Location BoUpSLP::getLocation(Instruction *I) {
> - if (StoreInst *SI = dyn_cast<StoreInst>(I))
> - return AA->getLocation(SI);
> - if (LoadInst *LI = dyn_cast<LoadInst>(I))
> - return AA->getLocation(LI);
> - return AliasAnalysis::Location();
> -}
> -
> -Value *BoUpSLP::isUnsafeToSink(Instruction *Src, Instruction *Dst) {
> - assert(Src->getParent() == Dst->getParent() && "Not the same BB");
> - BasicBlock::iterator I = Src, E = Dst;
> - /// Scan all of the instruction from SRC to DST and check if
> - /// the source may alias.
> - for (++I; I != E; ++I) {
> - // Ignore store instructions that are marked as 'ignore'.
> - if (MemBarrierIgnoreList.count(I))
> - continue;
> - if (Src->mayWriteToMemory()) /* Write */ {
> - if (!I->mayReadOrWriteMemory())
> - continue;
> - } else /* Read */ {
> - if (!I->mayWriteToMemory())
> - continue;
> - }
> - AliasAnalysis::Location A = getLocation(&*I);
> - AliasAnalysis::Location B = getLocation(Src);
> -
> - if (!A.Ptr || !B.Ptr || AA->alias(A, B))
> - return I;
> - }
> - return 0;
> -}
> -
> -Value *BoUpSLP::vectorizeArith(ArrayRef<Value *> Operands) {
> - int LastIdx = getLastIndex(Operands, Operands.size());
> - Instruction *Loc = getInsertionPoint(LastIdx);
> - Builder.SetInsertPoint(Loc);
> -
> - assert(getFirstUserIndex(Operands, Operands.size()) > LastIdx &&
> - "Vectorizing with in-tree users");
> -
> - Value *Vec = vectorizeTree(Operands, Operands.size());
> - // After vectorizing the operands we need to generate extractelement
> - // instructions and replace all of the uses of the scalar values with
> - // the values that we extracted from the vectorized tree.
> - for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
> - Value *S = Builder.CreateExtractElement(Vec, Builder.getInt32(i));
> - Operands[i]->replaceAllUsesWith(S);
> - }
> -
> - return Vec;
> -}
> -
> -int BoUpSLP::getTreeCost(ArrayRef<Value *> VL) {
> - // Get rid of the list of stores that were removed, and from the
> - // lists of instructions with multiple users.
> - MemBarrierIgnoreList.clear();
> - LaneMap.clear();
> - MultiUserVals.clear();
> - MustScalarize.clear();
> - MustExtract.clear();
> -
> - // Find the location of the last root.
> - int LastRootIndex = getLastIndex(VL, VL.size());
> - int FirstUserIndex = getFirstUserIndex(VL, VL.size());
> -
> - // Don't vectorize if there are users of the tree roots inside the tree
> - // itself.
> - if (LastRootIndex > FirstUserIndex)
> - return max_cost;
> -
> - // Scan the tree and find which value is used by which lane, and which
> values
> - // must be scalarized.
> - getTreeUses_rec(VL, 0);
> -
> - // Check that instructions with multiple users can be vectorized. Mark
> unsafe
> - // instructions.
> - for (SetVector<Value *>::iterator it = MultiUserVals.begin(),
> - e = MultiUserVals.end();
> - it != e; ++it) {
> - // Check that all of the users of this instr are within the tree
> - // and that they are all from the same lane.
> - int Lane = -1;
> - for (Value::use_iterator I = (*it)->use_begin(), E = (*it)->use_end();
> - I != E; ++I) {
> - if (LaneMap.find(*I) == LaneMap.end()) {
> - DEBUG(dbgs() << "SLP: Instr " << **it << " has multiple
> users.\n");
> -
> - // We don't have an ordering problem if the user is not in this
> basic
> - // block.
> - Instruction *Inst = cast<Instruction>(*I);
> - if (Inst->getParent() != BB) {
> - MustExtract.insert(*it);
> - continue;
> - }
> -
> - // We don't have an ordering problem if the user is after the
> last root.
> - int Idx = InstrIdx[Inst];
> - if (Idx < LastRootIndex) {
> - MustScalarize.insert(*it);
> - DEBUG(dbgs() << "SLP: Adding to MustScalarize "
> - "because of an unsafe out of tree usage.\n");
> - break;
> - }
> -
> - DEBUG(dbgs() << "SLP: Adding to MustExtract "
> - "because of a safe out of tree usage.\n");
> - MustExtract.insert(*it);
> - continue;
> - }
> - if (Lane == -1)
> - Lane = LaneMap[*I];
> - if (Lane != LaneMap[*I]) {
> - MustScalarize.insert(*it);
> - DEBUG(dbgs() << "SLP: Adding " << **it
> - << " to MustScalarize because multiple lane use it: "
> - << Lane << " and " << LaneMap[*I] << ".\n");
> - break;
> - }
> - }
> - }
> -
> - // Now calculate the cost of vectorizing the tree.
> - return getTreeCost_rec(VL, 0);
> -}
> -
> -static bool CanReuseExtract(ArrayRef<Value *> VL, unsigned VF,
> - VectorType *VecTy) {
> - // Check if all of the extracts come from the same vector and from the
> - // correct offset.
> - Value *VL0 = VL[0];
> - ExtractElementInst *E0 = cast<ExtractElementInst>(VL0);
> - Value *Vec = E0->getOperand(0);
> -
> - // We have to extract from the same vector type.
> - if (Vec->getType() != VecTy)
> - return false;
> -
> - // Check that all of the indices extract from the correct offset.
> - ConstantInt *CI = dyn_cast<ConstantInt>(E0->getOperand(1));
> - if (!CI || CI->getZExtValue())
> - return false;
> -
> - for (unsigned i = 1, e = VF; i < e; ++i) {
> - ExtractElementInst *E = cast<ExtractElementInst>(VL[i]);
> - ConstantInt *CI = dyn_cast<ConstantInt>(E->getOperand(1));
> -
> - if (!CI || CI->getZExtValue() != i || E->getOperand(0) != Vec)
> - return false;
> - }
> -
> - return true;
> -}
> -
> -void BoUpSLP::getTreeUses_rec(ArrayRef<Value *> VL, unsigned Depth) {
> - if (Depth == RecursionMaxDepth)
> - return;
> -
> - // Don't handle vectors.
> - if (VL[0]->getType()->isVectorTy())
> - return;
> -
> - if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
> - if (SI->getValueOperand()->getType()->isVectorTy())
> - return;
> -
> - // Check if all of the operands are constants.
> - bool AllConst = true;
> - bool AllSameScalar = true;
> - for (unsigned i = 0, e = VL.size(); i < e; ++i) {
> - AllConst &= isa<Constant>(VL[i]);
> - AllSameScalar &= (VL[0] == VL[i]);
> - Instruction *I = dyn_cast<Instruction>(VL[i]);
> - // If one of the instructions is out of this BB, we need to scalarize
> all.
> - if (I && I->getParent() != BB)
> - return;
> - }
> -
> - // If all of the operands are identical or constant we have a simple
> solution.
> - if (AllConst || AllSameScalar)
> - return;
> -
> - // Scalarize unknown structures.
> - Instruction *VL0 = dyn_cast<Instruction>(VL[0]);
> - if (!VL0)
> - return;
> -
> - unsigned Opcode = VL0->getOpcode();
> - for (unsigned i = 0, e = VL.size(); i < e; ++i) {
> - Instruction *I = dyn_cast<Instruction>(VL[i]);
> - // If not all of the instructions are identical then we have to
> scalarize.
> - if (!I || Opcode != I->getOpcode())
> - return;
> - }
> -
> - for (int i = 0, e = VL.size(); i < e; ++i) {
> - // Check that the instruction is only used within
> - // one lane.
> - if (LaneMap.count(VL[i]) && LaneMap[VL[i]] != i)
> - return;
> - // Make this instruction as 'seen' and remember the lane.
> - LaneMap[VL[i]] = i;
> - }
> -
> - // Mark instructions with multiple users.
> - for (unsigned i = 0, e = VL.size(); i < e; ++i) {
> - Instruction *I = dyn_cast<Instruction>(VL[i]);
> - // Remember to check if all of the users of this instr are vectorized
> - // within our tree. At depth zero we have no local users, only
> external
> - // users that we don't care about.
> - if (Depth && I && I->getNumUses() > 1) {
> - DEBUG(dbgs() << "SLP: Adding to MultiUserVals "
> - "because it has multiple users:" << *I << " \n");
> - MultiUserVals.insert(I);
> - }
> - }
> -
> - switch (Opcode) {
> - case Instruction::ExtractElement: {
> - VectorType *VecTy = VectorType::get(VL[0]->getType(), VL.size());
> - // No need to follow ExtractElements that are going to be optimized
> away.
> - if (CanReuseExtract(VL, VL.size(), VecTy))
> - return;
> - // Fall through.
> - }
> - case Instruction::ZExt:
> - case Instruction::SExt:
> - case Instruction::FPToUI:
> - case Instruction::FPToSI:
> - case Instruction::FPExt:
> - case Instruction::PtrToInt:
> - case Instruction::IntToPtr:
> - case Instruction::SIToFP:
> - case Instruction::UIToFP:
> - case Instruction::Trunc:
> - case Instruction::FPTrunc:
> - case Instruction::BitCast:
> - case Instruction::Select:
> - case Instruction::ICmp:
> - case Instruction::FCmp:
> - case Instruction::Add:
> - case Instruction::FAdd:
> - case Instruction::Sub:
> - case Instruction::FSub:
> - case Instruction::Mul:
> - case Instruction::FMul:
> - case Instruction::UDiv:
> - case Instruction::SDiv:
> - case Instruction::FDiv:
> - case Instruction::URem:
> - case Instruction::SRem:
> - case Instruction::FRem:
> - case Instruction::Shl:
> - case Instruction::LShr:
> - case Instruction::AShr:
> - case Instruction::And:
> - case Instruction::Or:
> - case Instruction::Xor: {
> - for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
> - ValueList Operands;
> - // Prepare the operand vector.
> - for (unsigned j = 0; j < VL.size(); ++j)
> - Operands.push_back(cast<Instruction>(VL[j])->getOperand(i));
> -
> - getTreeUses_rec(Operands, Depth + 1);
> - }
> - return;
> - }
> - case Instruction::Store: {
> - ValueList Operands;
> - for (unsigned j = 0; j < VL.size(); ++j)
> - Operands.push_back(cast<Instruction>(VL[j])->getOperand(0));
> - getTreeUses_rec(Operands, Depth + 1);
> - return;
> - }
> - default:
> - return;
> - }
> -}
> -
> -int BoUpSLP::getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth) {
> - Type *ScalarTy = VL[0]->getType();
> -
> - if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
> - ScalarTy = SI->getValueOperand()->getType();
> -
> - /// Don't mess with vectors.
> - if (ScalarTy->isVectorTy())
> - return max_cost;
> -
> - VectorType *VecTy = VectorType::get(ScalarTy, VL.size());
> -
> - if (Depth == RecursionMaxDepth)
> - return getScalarizationCost(VecTy);
> -
> - // Check if all of the operands are constants.
> - bool AllConst = true;
> - bool AllSameScalar = true;
> - bool MustScalarizeFlag = false;
> - for (unsigned i = 0, e = VL.size(); i < e; ++i) {
> - AllConst &= isa<Constant>(VL[i]);
> - AllSameScalar &= (VL[0] == VL[i]);
> - // Must have a single use.
> - Instruction *I = dyn_cast<Instruction>(VL[i]);
> - MustScalarizeFlag |= MustScalarize.count(VL[i]);
> - // This instruction is outside the basic block.
> - if (I && I->getParent() != BB)
> - return getScalarizationCost(VecTy);
> - }
> -
> - // Is this a simple vector constant.
> - if (AllConst)
> - return 0;
> -
> - // If all of the operands are identical we can broadcast them.
> - Instruction *VL0 = dyn_cast<Instruction>(VL[0]);
> - if (AllSameScalar) {
> - // If we are in a loop, and this is not an instruction (e.g. constant
> or
> - // argument) or the instruction is defined outside the loop then
> assume
> - // that the cost is zero.
> - if (L && (!VL0 || !L->contains(VL0)))
> - return 0;
> -
> - // We need to broadcast the scalar.
> - return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy,
> 0);
> - }
> -
> - // If this is not a constant, or a scalar from outside the loop then we
> - // need to scalarize it.
> - if (MustScalarizeFlag)
> - return getScalarizationCost(VecTy);
> -
> - if (!VL0)
> - return getScalarizationCost(VecTy);
> - assert(VL0->getParent() == BB && "Wrong BB");
> -
> - unsigned Opcode = VL0->getOpcode();
> - for (unsigned i = 0, e = VL.size(); i < e; ++i) {
> - Instruction *I = dyn_cast<Instruction>(VL[i]);
> - // If not all of the instructions are identical then we have to
> scalarize.
> - if (!I || Opcode != I->getOpcode())
> - return getScalarizationCost(VecTy);
> - }
> -
> - // Check if it is safe to sink the loads or the stores.
> - if (Opcode == Instruction::Load || Opcode == Instruction::Store) {
> - int MaxIdx = getLastIndex(VL, VL.size());
> - Instruction *Last = InstrVec[MaxIdx];
> -
> - for (unsigned i = 0, e = VL.size(); i < e; ++i) {
> - if (VL[i] == Last)
> - continue;
> - Value *Barrier = isUnsafeToSink(cast<Instruction>(VL[i]), Last);
> - if (Barrier) {
> - DEBUG(dbgs() << "SLP: Can't sink " << *VL[i] << "\n down to " <<
> *Last
> - << "\n because of " << *Barrier << "\n");
> - return max_cost;
> - }
> - }
> - }
> -
> - // Calculate the extract cost.
> - unsigned ExternalUserExtractCost = 0;
> - for (unsigned i = 0, e = VL.size(); i < e; ++i)
> - if (MustExtract.count(VL[i]))
> - ExternalUserExtractCost +=
> - TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy, i);
> -
> - switch (Opcode) {
> - case Instruction::ExtractElement: {
> - if (CanReuseExtract(VL, VL.size(), VecTy))
> - return 0;
> - return getScalarizationCost(VecTy);
> - }
> - case Instruction::ZExt:
> - case Instruction::SExt:
> - case Instruction::FPToUI:
> - case Instruction::FPToSI:
> - case Instruction::FPExt:
> - case Instruction::PtrToInt:
> - case Instruction::IntToPtr:
> - case Instruction::SIToFP:
> - case Instruction::UIToFP:
> - case Instruction::Trunc:
> - case Instruction::FPTrunc:
> - case Instruction::BitCast: {
> - int Cost = ExternalUserExtractCost;
> - ValueList Operands;
> - Type *SrcTy = VL0->getOperand(0)->getType();
> - // Prepare the operand vector.
> - for (unsigned j = 0; j < VL.size(); ++j) {
> - Operands.push_back(cast<Instruction>(VL[j])->getOperand(0));
> - // Check that the casted type is the same for all users.
> - if (cast<Instruction>(VL[j])->getOperand(0)->getType() != SrcTy)
> - return getScalarizationCost(VecTy);
> - }
> -
> - Cost += getTreeCost_rec(Operands, Depth + 1);
> - if (Cost >= max_cost)
> - return max_cost;
> -
> - // Calculate the cost of this instruction.
> - int ScalarCost = VL.size() * TTI->getCastInstrCost(VL0->getOpcode(),
> - VL0->getType(),
> SrcTy);
> -
> - VectorType *SrcVecTy = VectorType::get(SrcTy, VL.size());
> - int VecCost = TTI->getCastInstrCost(VL0->getOpcode(), VecTy,
> SrcVecTy);
> - Cost += (VecCost - ScalarCost);
> - return Cost;
> - }
> - case Instruction::FCmp:
> - case Instruction::ICmp: {
> - // Check that all of the compares have the same predicate.
> - CmpInst::Predicate P0 = dyn_cast<CmpInst>(VL0)->getPredicate();
> - for (unsigned i = 1, e = VL.size(); i < e; ++i) {
> - CmpInst *Cmp = cast<CmpInst>(VL[i]);
> - if (Cmp->getPredicate() != P0)
> - return getScalarizationCost(VecTy);
> - }
> - // Fall through.
> - }
> - case Instruction::Select:
> - case Instruction::Add:
> - case Instruction::FAdd:
> - case Instruction::Sub:
> - case Instruction::FSub:
> - case Instruction::Mul:
> - case Instruction::FMul:
> - case Instruction::UDiv:
> - case Instruction::SDiv:
> - case Instruction::FDiv:
> - case Instruction::URem:
> - case Instruction::SRem:
> - case Instruction::FRem:
> - case Instruction::Shl:
> - case Instruction::LShr:
> - case Instruction::AShr:
> - case Instruction::And:
> - case Instruction::Or:
> - case Instruction::Xor: {
> - int Cost = ExternalUserExtractCost;
> - // Calculate the cost of all of the operands.
> - for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
> - ValueList Operands;
> - // Prepare the operand vector.
> - for (unsigned j = 0; j < VL.size(); ++j)
> - Operands.push_back(cast<Instruction>(VL[j])->getOperand(i));
> -
> - Cost += getTreeCost_rec(Operands, Depth + 1);
> - if (Cost >= max_cost)
> - return max_cost;
> - }
> -
> - // Calculate the cost of this instruction.
> - int ScalarCost = 0;
> - int VecCost = 0;
> - if (Opcode == Instruction::FCmp || Opcode == Instruction::ICmp ||
> - Opcode == Instruction::Select) {
> - VectorType *MaskTy = VectorType::get(Builder.getInt1Ty(),
> VL.size());
> - ScalarCost =
> - VecTy->getNumElements() *
> - TTI->getCmpSelInstrCost(Opcode, ScalarTy, Builder.getInt1Ty());
> - VecCost = TTI->getCmpSelInstrCost(Opcode, VecTy, MaskTy);
> - } else {
> - ScalarCost = VecTy->getNumElements() *
> - TTI->getArithmeticInstrCost(Opcode, ScalarTy);
> - VecCost = TTI->getArithmeticInstrCost(Opcode, VecTy);
> - }
> - Cost += (VecCost - ScalarCost);
> - return Cost;
> - }
> - case Instruction::Load: {
> - // If we are scalarize the loads, add the cost of forming the vector.
> - for (unsigned i = 0, e = VL.size() - 1; i < e; ++i)
> - if (!isConsecutiveAccess(VL[i], VL[i + 1]))
> - return getScalarizationCost(VecTy);
> -
> - // Cost of wide load - cost of scalar loads.
> - int ScalarLdCost = VecTy->getNumElements() *
> - TTI->getMemoryOpCost(Instruction::Load, ScalarTy,
> 1, 0);
> - int VecLdCost = TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1,
> 0);
> - return VecLdCost - ScalarLdCost + ExternalUserExtractCost;
> - }
> - case Instruction::Store: {
> - // We know that we can merge the stores. Calculate the cost.
> - int ScalarStCost = VecTy->getNumElements() *
> - TTI->getMemoryOpCost(Instruction::Store, ScalarTy,
> 1, 0);
> - int VecStCost = TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1,
> 0);
> - int StoreCost = VecStCost - ScalarStCost;
> -
> - ValueList Operands;
> - for (unsigned j = 0; j < VL.size(); ++j) {
> - Operands.push_back(cast<Instruction>(VL[j])->getOperand(0));
> - MemBarrierIgnoreList.insert(VL[j]);
> - }
> -
> - int TotalCost = StoreCost + getTreeCost_rec(Operands, Depth + 1);
> - return TotalCost + ExternalUserExtractCost;
> - }
> - default:
> - // Unable to vectorize unknown instructions.
> - return getScalarizationCost(VecTy);
> - }
> -}
> -
> -int BoUpSLP::getLastIndex(ArrayRef<Value *> VL, unsigned VF) {
> - int MaxIdx = InstrIdx[BB->getFirstNonPHI()];
> - for (unsigned i = 0; i < VF; ++i)
> - MaxIdx = std::max(MaxIdx, InstrIdx[VL[i]]);
> - return MaxIdx;
> -}
> -
> -int BoUpSLP::getFirstUserIndex(ArrayRef<Value *> VL, unsigned VF) {
> - // Find the first user of the values.
> - int FirstUser = InstrVec.size();
> - for (unsigned i = 0; i < VF; ++i) {
> - for (Value::use_iterator U = VL[i]->use_begin(), UE =
> VL[i]->use_end();
> - U != UE; ++U) {
> - Instruction *Instr = dyn_cast<Instruction>(*U);
> - if (!Instr || Instr->getParent() != BB)
> - continue;
> -
> - FirstUser = std::min(FirstUser, InstrIdx[Instr]);
> - }
> - }
> - return FirstUser;
> -}
> -
> -int BoUpSLP::getLastIndex(Instruction *I, Instruction *J) {
> - assert(I->getParent() == BB && "Invalid parent for instruction I");
> - assert(J->getParent() == BB && "Invalid parent for instruction J");
> - return std::max(InstrIdx[I], InstrIdx[J]);
> -}
> -
> -Instruction *BoUpSLP::getInsertionPoint(unsigned Index) {
> - return InstrVec[Index + 1];
> -}
> -
> -Value *BoUpSLP::Scalarize(ArrayRef<Value *> VL, VectorType *Ty) {
> - Value *Vec = UndefValue::get(Ty);
> - for (unsigned i = 0; i < Ty->getNumElements(); ++i) {
> - // Generate the 'InsertElement' instruction.
> - Vec = Builder.CreateInsertElement(Vec, VL[i], Builder.getInt32(i));
> - // Remember that this instruction is used as part of a 'gather'
> sequence.
> - // The caller of the bottom-up slp vectorizer can try to hoist the
> sequence
> - // if the users are outside of the basic block.
> - if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(Vec))
> - GatherInstructions.push_back(IEI);
> - }
> -
> - // Mark the end of the gather sequence.
> - GatherInstructions.push_back(0);
> -
> - for (unsigned i = 0; i < Ty->getNumElements(); ++i)
> - VectorizedValues[VL[i]] = Vec;
> -
> - return Vec;
> -}
> -
> -Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL, int VF) {
> - Value *V = vectorizeTree_rec(VL, VF);
> -
> - int LastInstrIdx = getLastIndex(VL, VL.size());
> - for (SetVector<Value *>::iterator it = MustExtract.begin(),
> - e = MustExtract.end();
> - it != e; ++it) {
> - Instruction *I = cast<Instruction>(*it);
> -
> - // This is a scalarized value, so we can use the original value.
> - // No need to extract from the vector.
> - if (!LaneMap.count(I))
> - continue;
> -
> - Value *Vec = VectorizedValues[I];
> - // We decided not to vectorize I because one of its users was not
> - // vectorizerd. This is okay.
> - if (!Vec)
> - continue;
> -
> - Value *Idx = Builder.getInt32(LaneMap[I]);
> - Value *Extract = Builder.CreateExtractElement(Vec, Idx);
> - bool Replaced = false;
> - for (Value::use_iterator U = I->use_begin(), UE = I->use_end(); U !=
> UE;
> - ++U) {
> - Instruction *UI = cast<Instruction>(*U);
> - if (UI->getParent() != I->getParent() || InstrIdx[UI] >
> LastInstrIdx)
> - UI->replaceUsesOfWith(I, Extract);
> - Replaced = true;
> - }
> - assert(Replaced && "Must replace at least one outside user");
> - (void)Replaced;
> - }
> -
> - // We moved some instructions around. We have to number them again
> - // before we can do any analysis.
> - numberInstructions();
> - MustScalarize.clear();
> - MustExtract.clear();
> - VectorizedValues.clear();
> - return V;
> -}
> -
> -Value *BoUpSLP::vectorizeTree_rec(ArrayRef<Value *> VL, int VF) {
> - Type *ScalarTy = VL[0]->getType();
> - if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
> - ScalarTy = SI->getValueOperand()->getType();
> - VectorType *VecTy = VectorType::get(ScalarTy, VF);
> -
> - // Check if all of the operands are constants or identical.
> - bool AllConst = true;
> - bool AllSameScalar = true;
> - for (unsigned i = 0, e = VF; i < e; ++i) {
> - AllConst &= isa<Constant>(VL[i]);
> - AllSameScalar &= (VL[0] == VL[i]);
> - // The instruction must be in the same BB, and it must be
> vectorizable.
> - Instruction *I = dyn_cast<Instruction>(VL[i]);
> - if (MustScalarize.count(VL[i]) || (I && I->getParent() != BB))
> - return Scalarize(VL, VecTy);
> - }
> -
> - // Check that this is a simple vector constant.
> - if (AllConst || AllSameScalar)
> - return Scalarize(VL, VecTy);
> -
> - // Scalarize unknown structures.
> - Instruction *VL0 = dyn_cast<Instruction>(VL[0]);
> - if (!VL0)
> - return Scalarize(VL, VecTy);
> -
> - if (VectorizedValues.count(VL0)) {
> - Value *Vec = VectorizedValues[VL0];
> - for (int i = 0; i < VF; ++i)
> - VectorizedValues[VL[i]] = Vec;
> - return Vec;
> - }
> -
> - unsigned Opcode = VL0->getOpcode();
> - for (unsigned i = 0, e = VF; i < e; ++i) {
> - Instruction *I = dyn_cast<Instruction>(VL[i]);
> - // If not all of the instructions are identical then we have to
> scalarize.
> - if (!I || Opcode != I->getOpcode())
> - return Scalarize(VL, VecTy);
> - }
> -
> - switch (Opcode) {
> - case Instruction::ExtractElement: {
> - if (CanReuseExtract(VL, VL.size(), VecTy))
> - return VL0->getOperand(0);
> - return Scalarize(VL, VecTy);
> - }
> - case Instruction::ZExt:
> - case Instruction::SExt:
> - case Instruction::FPToUI:
> - case Instruction::FPToSI:
> - case Instruction::FPExt:
> - case Instruction::PtrToInt:
> - case Instruction::IntToPtr:
> - case Instruction::SIToFP:
> - case Instruction::UIToFP:
> - case Instruction::Trunc:
> - case Instruction::FPTrunc:
> - case Instruction::BitCast: {
> - ValueList INVL;
> - for (int i = 0; i < VF; ++i)
> - INVL.push_back(cast<Instruction>(VL[i])->getOperand(0));
> - Value *InVec = vectorizeTree_rec(INVL, VF);
> - CastInst *CI = dyn_cast<CastInst>(VL0);
> - Value *V = Builder.CreateCast(CI->getOpcode(), InVec, VecTy);
> -
> - for (int i = 0; i < VF; ++i)
> - VectorizedValues[VL[i]] = V;
> -
> - return V;
> - }
> - case Instruction::FCmp:
> - case Instruction::ICmp: {
> - // Check that all of the compares have the same predicate.
> - CmpInst::Predicate P0 = dyn_cast<CmpInst>(VL0)->getPredicate();
> - for (unsigned i = 1, e = VF; i < e; ++i) {
> - CmpInst *Cmp = cast<CmpInst>(VL[i]);
> - if (Cmp->getPredicate() != P0)
> - return Scalarize(VL, VecTy);
> - }
> -
> - ValueList LHSV, RHSV;
> - for (int i = 0; i < VF; ++i) {
> - LHSV.push_back(cast<Instruction>(VL[i])->getOperand(0));
> - RHSV.push_back(cast<Instruction>(VL[i])->getOperand(1));
> - }
> -
> - Value *L = vectorizeTree_rec(LHSV, VF);
> - Value *R = vectorizeTree_rec(RHSV, VF);
> - Value *V;
> - if (VL0->getOpcode() == Instruction::FCmp)
> - V = Builder.CreateFCmp(P0, L, R);
> - else
> - V = Builder.CreateICmp(P0, L, R);
> -
> - for (int i = 0; i < VF; ++i)
> - VectorizedValues[VL[i]] = V;
> -
> - return V;
> - }
> - case Instruction::Select: {
> - ValueList TrueVec, FalseVec, CondVec;
> - for (int i = 0; i < VF; ++i) {
> - CondVec.push_back(cast<Instruction>(VL[i])->getOperand(0));
> - TrueVec.push_back(cast<Instruction>(VL[i])->getOperand(1));
> - FalseVec.push_back(cast<Instruction>(VL[i])->getOperand(2));
> - }
> -
> - Value *True = vectorizeTree_rec(TrueVec, VF);
> - Value *False = vectorizeTree_rec(FalseVec, VF);
> - Value *Cond = vectorizeTree_rec(CondVec, VF);
> - Value *V = Builder.CreateSelect(Cond, True, False);
> -
> - for (int i = 0; i < VF; ++i)
> - VectorizedValues[VL[i]] = V;
> -
> - return V;
> - }
> - case Instruction::Add:
> - case Instruction::FAdd:
> - case Instruction::Sub:
> - case Instruction::FSub:
> - case Instruction::Mul:
> - case Instruction::FMul:
> - case Instruction::UDiv:
> - case Instruction::SDiv:
> - case Instruction::FDiv:
> - case Instruction::URem:
> - case Instruction::SRem:
> - case Instruction::FRem:
> - case Instruction::Shl:
> - case Instruction::LShr:
> - case Instruction::AShr:
> - case Instruction::And:
> - case Instruction::Or:
> - case Instruction::Xor: {
> - ValueList LHSVL, RHSVL;
> - for (int i = 0; i < VF; ++i) {
> - LHSVL.push_back(cast<Instruction>(VL[i])->getOperand(0));
> - RHSVL.push_back(cast<Instruction>(VL[i])->getOperand(1));
> - }
> -
> - Value *LHS = vectorizeTree_rec(LHSVL, VF);
> - Value *RHS = vectorizeTree_rec(RHSVL, VF);
> - BinaryOperator *BinOp = cast<BinaryOperator>(VL0);
> - Value *V = Builder.CreateBinOp(BinOp->getOpcode(), LHS, RHS);
> -
> - for (int i = 0; i < VF; ++i)
> - VectorizedValues[VL[i]] = V;
> -
> - return V;
> - }
> - case Instruction::Load: {
> - LoadInst *LI = cast<LoadInst>(VL0);
> - unsigned Alignment = LI->getAlignment();
> -
> - // Check if all of the loads are consecutive.
> - for (unsigned i = 1, e = VF; i < e; ++i)
> - if (!isConsecutiveAccess(VL[i - 1], VL[i]))
> - return Scalarize(VL, VecTy);
> -
> - // Loads are inserted at the head of the tree because we don't want
> to sink
> - // them all the way down past store instructions.
> - Instruction *Loc = getInsertionPoint(getLastIndex(VL, VL.size()));
> - IRBuilder<> LoadBuilder(Loc);
> - Value *VecPtr = LoadBuilder.CreateBitCast(LI->getPointerOperand(),
> - VecTy->getPointerTo());
> - LI = LoadBuilder.CreateLoad(VecPtr);
> - LI->setAlignment(Alignment);
> -
> - for (int i = 0; i < VF; ++i)
> - VectorizedValues[VL[i]] = LI;
> -
> - return LI;
> - }
> - case Instruction::Store: {
> - StoreInst *SI = cast<StoreInst>(VL0);
> - unsigned Alignment = SI->getAlignment();
> -
> - ValueList ValueOp;
> - for (int i = 0; i < VF; ++i)
> - ValueOp.push_back(cast<StoreInst>(VL[i])->getValueOperand());
> -
> - Value *VecValue = vectorizeTree_rec(ValueOp, VF);
> - Value *VecPtr =
> - Builder.CreateBitCast(SI->getPointerOperand(),
> VecTy->getPointerTo());
> - Builder.CreateStore(VecValue, VecPtr)->setAlignment(Alignment);
> -
> - for (int i = 0; i < VF; ++i)
> - cast<Instruction>(VL[i])->eraseFromParent();
> - return 0;
> - }
> - default:
> - return Scalarize(VL, VecTy);
> - }
> -}
> -
> -} // end of namespace
>
> Removed: llvm/trunk/lib/Transforms/Vectorize/VecUtils.h
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/VecUtils.h?rev=184646&view=auto
>
> ==============================================================================
> --- llvm/trunk/lib/Transforms/Vectorize/VecUtils.h (original)
> +++ llvm/trunk/lib/Transforms/Vectorize/VecUtils.h (removed)
> @@ -1,194 +0,0 @@
> -//===- VecUtils.h - Vectorization Utilities
> -------------------------------===//
> -//
> -// The LLVM Compiler Infrastructure
> -//
> -// This file is distributed under the University of Illinois Open Source
> -// License. See LICENSE.TXT for details.
> -//
>
> -//===----------------------------------------------------------------------===//
> -//
> -// This family of classes and functions manipulate vectors and chains of
> -// vectors.
> -//
>
> -//===----------------------------------------------------------------------===//
> -
> -#ifndef LLVM_TRANSFORMS_VECTORIZE_VECUTILS_H
> -#define LLVM_TRANSFORMS_VECTORIZE_VECUTILS_H
> -
> -#include "llvm/ADT/DenseMap.h"
> -#include "llvm/ADT/SetVector.h"
> -#include "llvm/ADT/SmallPtrSet.h"
> -#include "llvm/ADT/SmallVector.h"
> -#include "llvm/Analysis/AliasAnalysis.h"
> -#include "llvm/IR/IRBuilder.h"
> -#include <vector>
> -
> -namespace llvm {
> -
> -class BasicBlock;
> -class Instruction;
> -class Type;
> -class VectorType;
> -class StoreInst;
> -class Value;
> -class ScalarEvolution;
> -class DataLayout;
> -class TargetTransformInfo;
> -class AliasAnalysis;
> -class Loop;
> -
> -/// Bottom Up SLP vectorization utility class.
> -struct BoUpSLP {
> - typedef SmallVector<Value *, 8> ValueList;
> - typedef SmallVector<Instruction *, 16> InstrList;
> - typedef SmallPtrSet<Value *, 16> ValueSet;
> - typedef SmallVector<StoreInst *, 8> StoreList;
> - static const int max_cost = 1 << 20;
> -
> - // \brief C'tor.
> - BoUpSLP(BasicBlock *Bb, ScalarEvolution *Se, DataLayout *Dl,
> - TargetTransformInfo *Tti, AliasAnalysis *Aa, Loop *Lp);
> -
> - /// \brief Take the pointer operand from the Load/Store instruction.
> - /// \returns NULL if this is not a valid Load/Store instruction.
> - static Value *getPointerOperand(Value *I);
> -
> - /// \brief Take the address space operand from the Load/Store
> instruction.
> - /// \returns -1 if this is not a valid Load/Store instruction.
> - static unsigned getAddressSpaceOperand(Value *I);
> -
> - /// \returns true if the memory operations A and B are consecutive.
> - bool isConsecutiveAccess(Value *A, Value *B);
> -
> - /// \brief Vectorize the tree that starts with the elements in \p VL.
> - /// \returns the vectorized value.
> - Value *vectorizeTree(ArrayRef<Value *> VL, int VF);
> -
> - /// \returns the vectorization cost of the subtree that starts at \p VL.
> - /// A negative number means that this is profitable.
> - int getTreeCost(ArrayRef<Value *> VL);
> -
> - /// \returns the scalarization cost for this list of values. Assuming
> that
> - /// this subtree gets vectorized, we may need to extract the values
> from the
> - /// roots. This method calculates the cost of extracting the values.
> - int getScalarizationCost(ArrayRef<Value *> VL);
> -
> - /// \brief Attempts to order and vectorize a sequence of stores. This
> - /// function does a quadratic scan of the given stores.
> - /// \returns true if the basic block was modified.
> - bool vectorizeStores(ArrayRef<StoreInst *> Stores, int costThreshold);
> -
> - /// \brief Vectorize a group of scalars into a vector tree.
> - /// \returns the vectorized value.
> - Value *vectorizeArith(ArrayRef<Value *> Operands);
> -
> - /// \returns the list of new instructions that were added in order to
> collect
> - /// scalars into vectors. This list can be used to further optimize the
> gather
> - /// sequences.
> - InstrList &getGatherSeqInstructions() { return GatherInstructions; }
> -
> -private:
> - /// \brief This method contains the recursive part of getTreeCost.
> - int getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth);
> -
> - /// \brief This recursive method looks for vectorization hazards such as
> - /// values that are used by multiple users and checks that values are
> used
> - /// by only one vector lane. It updates the variables LaneMap,
> MultiUserVals.
> - void getTreeUses_rec(ArrayRef<Value *> VL, unsigned Depth);
> -
> - /// \brief This method contains the recursive part of vectorizeTree.
> - Value *vectorizeTree_rec(ArrayRef<Value *> VL, int VF);
> -
> - /// \brief Number all of the instructions in the block.
> - void numberInstructions();
> -
> - /// \brief Vectorize a sorted sequence of stores.
> - bool vectorizeStoreChain(ArrayRef<Value *> Chain, int CostThreshold);
> -
> - /// \returns the scalarization cost for this type. Scalarization in this
> - /// context means the creation of vectors from a group of scalars.
> - int getScalarizationCost(Type *Ty);
> -
> - /// \returns the AA location that is being access by the instruction.
> - AliasAnalysis::Location getLocation(Instruction *I);
> -
> - /// \brief Checks if it is possible to sink an instruction from
> - /// \p Src to \p Dst.
> - /// \returns the pointer to the barrier instruction if we can't sink.
> - Value *isUnsafeToSink(Instruction *Src, Instruction *Dst);
> -
> - /// \returns the index of the last instrucion in the BB from \p VL.
> - /// Only consider the first \p VF elements.
> - int getLastIndex(ArrayRef<Value *> VL, unsigned VF);
> -
> - /// \returns the index of the first User of \p VL.
> - /// Only consider the first \p VF elements.
> - int getFirstUserIndex(ArrayRef<Value *> VL, unsigned VF);
> -
> - /// \returns the instruction \p I or \p J that appears last in the BB .
> - int getLastIndex(Instruction *I, Instruction *J);
> -
> - /// \returns the insertion point for \p Index.
> - Instruction *getInsertionPoint(unsigned Index);
> -
> - /// \returns a vector from a collection of scalars in \p VL.
> - Value *Scalarize(ArrayRef<Value *> VL, VectorType *Ty);
> -
> -private:
> - /// Maps instructions to numbers and back.
> - SmallDenseMap<Value *, int> InstrIdx;
> - /// Maps integers to Instructions.
> - std::vector<Instruction *> InstrVec;
> -
> - // -- containers that are used during getTreeCost -- //
> -
> - /// Contains values that must be scalarized because they are used
> - /// by multiple lanes, or by users outside the tree.
> - /// NOTICE: The vectorization methods also use this set.
> - ValueSet MustScalarize;
> -
> - /// Contains values that have users outside of the vectorized graph.
> - /// We need to generate extract instructions for these values.
> - /// NOTICE: The vectorization methods also use this set.
> - SetVector<Value *> MustExtract;
> -
> - /// Contains a list of values that are used outside the current tree.
> This
> - /// set must be reset between runs.
> - SetVector<Value *> MultiUserVals;
> - /// Maps values in the tree to the vector lanes that uses them. This
> map must
> - /// be reset between runs of getCost.
> - std::map<Value *, int> LaneMap;
> - /// A list of instructions to ignore while sinking
> - /// memory instructions. This map must be reset between runs of getCost.
> - ValueSet MemBarrierIgnoreList;
> -
> - // -- Containers that are used during vectorizeTree -- //
> -
> - /// Maps between the first scalar to the vector. This map must be reset
> - /// between runs.
> - DenseMap<Value *, Value *> VectorizedValues;
> -
> - // -- Containers that are used after vectorization by the caller -- //
> -
> - /// A list of instructions that are used when gathering scalars into
> vectors.
> - /// In many cases these instructions can be hoisted outside of the BB.
> - /// Iterating over this list is faster than calling LICM.
> - /// Notice: We insert NULL ptrs to separate between the different gather
> - /// sequences.
> - InstrList GatherInstructions;
> -
> - /// Instruction builder to construct the vectorized tree.
> - IRBuilder<> Builder;
> -
> - // Analysis and block reference.
> - BasicBlock *BB;
> - ScalarEvolution *SE;
> - DataLayout *DL;
> - TargetTransformInfo *TTI;
> - AliasAnalysis *AA;
> - Loop *L;
> -};
> -
> -} // end of namespace
> -
> -#endif // LLVM_TRANSFORMS_VECTORIZE_VECUTILS_H
>
> Modified: llvm/trunk/test/Transforms/SLPVectorizer/X86/diamond.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/diamond.ll?rev=184647&r1=184646&r2=184647&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/Transforms/SLPVectorizer/X86/diamond.ll (original)
> +++ llvm/trunk/test/Transforms/SLPVectorizer/X86/diamond.ll Sat Jun 22
> 16:34:10 2013
> @@ -50,9 +50,9 @@ entry:
> ; }
>
> ; CHECK: @extr_user
> +; CHECK: load i32*
> ; CHECK: store <4 x i32>
> -; CHECK-NEXT: extractelement <4 x i32>
> -; CHECK: ret
> +; CHECK-NEXT: ret
> define i32 @extr_user(i32* noalias nocapture %B, i32* noalias nocapture
> %A, i32 %n, i32 %m) {
> entry:
> %0 = load i32* %A, align 4
> @@ -79,9 +79,9 @@ entry:
>
> ; In this example we have an external user that is not the first element
> in the vector.
> ; CHECK: @extr_user1
> +; CHECK: load i32*
> ; CHECK: store <4 x i32>
> -; CHECK-NEXT: extractelement <4 x i32>
> -; CHECK: ret
> +; CHECK-NEXT: ret
> define i32 @extr_user1(i32* noalias nocapture %B, i32* noalias nocapture
> %A, i32 %n, i32 %m) {
> entry:
> %0 = load i32* %A, align 4
>
> Added: llvm/trunk/test/Transforms/SLPVectorizer/X86/multi_block.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/multi_block.ll?rev=184647&view=auto
>
> ==============================================================================
> --- llvm/trunk/test/Transforms/SLPVectorizer/X86/multi_block.ll (added)
> +++ llvm/trunk/test/Transforms/SLPVectorizer/X86/multi_block.ll Sat Jun 22
> 16:34:10 2013
> @@ -0,0 +1,55 @@
> +; RUN: opt < %s -basicaa -slp-vectorizer -dce -S
> -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
> +
> +target datalayout =
> "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
> +target triple = "x86_64-apple-macosx10.7.0"
> +
> +; int bar(double *A, int d) {
> +; double A0 = A[0];
> +; double A1 = A[1];
> +; float F0 = A0;
> +; float F1 = A1;
> +; if (d) foo(); <----- This splits the blocks
> +; F0+=4.0;
> +; F1+=5.0;
> +; A[8] = 9.0 + F0;
> +; A[9] = 5.0 + F1;
> +; }
> +
> +
> +;CHECK: @bar
> +;CHECK: load <2 x double>
> +;CHECK: fptrunc <2 x double>
> +;CHECK: call i32
> +;CHECK: fadd <2 x float>
> +;CHECK: fpext <2 x float>
> +;CHECK: store <2 x double>
> +;CHECK: ret
> +define i32 @bar(double* nocapture %A, i32 %d) {
> + %1 = load double* %A, align 8
> + %2 = getelementptr inbounds double* %A, i64 1
> + %3 = load double* %2, align 8
> + %4 = fptrunc double %1 to float
> + %5 = fptrunc double %3 to float
> + %6 = icmp eq i32 %d, 0
> + br i1 %6, label %9, label %7
> +
> +; <label>:7 ; preds = %0
> + %8 = tail call i32 (...)* @foo()
> + br label %9
> +
> +; <label>:9 ; preds = %0, %7
> + %10 = fadd float %4, 4.000000e+00
> + %11 = fadd float %5, 5.000000e+00
> + %12 = fpext float %10 to double
> + %13 = fadd double %12, 9.000000e+00
> + %14 = getelementptr inbounds double* %A, i64 8
> + store double %13, double* %14, align 8
> + %15 = fpext float %11 to double
> + %16 = fadd double %15, 5.000000e+00
> + %17 = getelementptr inbounds double* %A, i64 9
> + store double %16, double* %17, align 8
> + ret i32 undef
> +}
> +
> +declare i32 @foo(...)
> +
>
> Modified: llvm/trunk/test/Transforms/SLPVectorizer/X86/multi_user.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/multi_user.ll?rev=184647&r1=184646&r2=184647&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/Transforms/SLPVectorizer/X86/multi_user.ll (original)
> +++ llvm/trunk/test/Transforms/SLPVectorizer/X86/multi_user.ll Sat Jun 22
> 16:34:10 2013
> @@ -12,8 +12,8 @@ target triple = "x86_64-apple-macosx10.7
> ;}
>
> ;CHECK: @foo
> -;CHECK: load <4 x i32>
> ;CHECK: insertelement <4 x i32>
> +;CHECK: load <4 x i32>
> ;CHECK: add <4 x i32>
> ;CHECK: store <4 x i32>
> ;CHECK: ret
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>
--
Alexey Samsonov, MSK
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20130623/6dec61fa/attachment.html>
More information about the llvm-commits
mailing list