<div dir="ltr">Hi Nadav!<div><br></div><div style>This change breaks on ASan bootstrap bot with the following error report:</div><div style><br></div><div style><div>=================================================================</div>
<div>==27050==ERROR: AddressSanitizer: heap-use-after-free on address 0x60d00000c488 at pc 0x1592bfc bp 0x7ffffec9cd90 sp 0x7ffffec9cd88</div><div>READ of size 8 at 0x60d00000c488 thread T0</div><div> #0 0x1592bfb in getParent /build/llvm/include/llvm/IR/Instruction.h:53</div>
<div> #1 0x1592bfb in SetInsertPoint /build/llvm/include/llvm/IR/IRBuilder.h:90</div><div> #2 0x1592bfb in ~BuilderLocGuard /build/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp:61</div><div> #3 0x1592bfb in (anonymous namespace)::FuncSLP::vectorizeTree_rec(llvm::ArrayRef<llvm::Value*>) /build/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp:1111</div>
<div> #4 0x158e563 in (anonymous namespace)::FuncSLP::vectorizeTree(llvm::ArrayRef<llvm::Value*>) /build/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp:1115</div><div> #5 0x1588ba1 in vectorizeStoreChain /build/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp:866</div>
<div> #6 0x1588ba1 in vectorizeStores /build/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp:915</div><div> #7 0x1588ba1 in vectorizeStoreChains /build/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp:1468</div><div>
#8 0x1588ba1 in (anonymous namespace)::SLPVectorizer::runOnFunction(llvm::Function&) /build/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp:1230</div><div> #9 0x2b03fdc in llvm::FPPassManager::runOnFunction(llvm::Function&) /build/llvm/lib/IR/PassManager.cpp:1530</div>
<div> #10 0x2b045a5 in llvm::FPPassManager::runOnModule(llvm::Module&) /build/llvm/lib/IR/PassManager.cpp:1550</div><div> #11 0x2b04dbb in llvm::MPPassManager::runOnModule(llvm::Module&) /build/llvm/lib/IR/PassManager.cpp:1608</div>
<div> #12 0x2b05fb3 in llvm::PassManagerImpl::run(llvm::Module&) /build/llvm/lib/IR/PassManager.cpp:1703</div><div> #13 0x2b0642f in llvm::PassManager::run(llvm::Module&) /build/llvm/lib/IR/PassManager.cpp:1738</div>
<div> #14 0x6199a3 in main /build/llvm/tools/opt/opt.cpp:823</div><div> #15 0x7fb44e87276c (/lib/x86_64-linux-gnu/libc.so.6+0x2176c)</div><div> #16 0x608ed4 in _start (/build/llvm_build_asan/bin/opt+0x608ed4)</div>
<div>0x60d00000c488 is located 120 bytes inside of 136-byte region [0x60d00000c410,0x60d00000c498)</div><div>freed by thread T0 here:</div><div> #0 0x5f49c5 in operator delete(void*) /build/llvm/projects/compiler-rt/lib/asan/asan_new_delete.cc:83</div>
<div> #1 0x1591a6e in (anonymous namespace)::FuncSLP::vectorizeTree_rec(llvm::ArrayRef<llvm::Value*>) /build/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp:1105</div><div> #2 0x158e563 in (anonymous namespace)::FuncSLP::vectorizeTree(llvm::ArrayRef<llvm::Value*>) /build/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp:1115</div>
<div> #3 0x1588ba1 in vectorizeStoreChain /build/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp:866</div><div> #4 0x1588ba1 in vectorizeStores /build/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp:915</div><div> #5 0x1588ba1 in vectorizeStoreChains /build/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp:1468</div>
<div> #6 0x1588ba1 in (anonymous namespace)::SLPVectorizer::runOnFunction(llvm::Function&) /build/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp:1230</div><div> #7 0x2b03fdc in llvm::FPPassManager::runOnFunction(llvm::Function&) /build/llvm/lib/IR/PassManager.cpp:1530</div>
<div> #8 0x2b045a5 in llvm::FPPassManager::runOnModule(llvm::Module&) /build/llvm/lib/IR/PassManager.cpp:1550</div><div> #9 0x2b04dbb in llvm::MPPassManager::runOnModule(llvm::Module&) /build/llvm/lib/IR/PassManager.cpp:1608</div>
<div> #10 0x2b05fb3 in llvm::PassManagerImpl::run(llvm::Module&) /build/llvm/lib/IR/PassManager.cpp:1703</div><div> #11 0x2b0642f in llvm::PassManager::run(llvm::Module&) /build/llvm/lib/IR/PassManager.cpp:1738</div>
<div> #12 0x6199a3 in main /build/llvm/tools/opt/opt.cpp:823</div><div> #13 0x7fb44e87276c (/lib/x86_64-linux-gnu/libc.so.6+0x2176c)</div><div>previously allocated by thread T0 here:</div><div> #0 0x5f4705 in operator new(unsigned long) /build/llvm/projects/compiler-rt/lib/asan/asan_new_delete.cc:52</div>
<div> #1 0x2b30953 in llvm::User::operator new(unsigned long, unsigned int) /build/llvm/lib/IR/User.cpp:60</div><div> #2 0x149304f in operator new /build/llvm/include/llvm/IR/Instructions.h:265</div><div> #3 0x149304f in llvm::LLParser::ParseStore(llvm::Instruction*&, llvm::LLParser::PerFunctionState&) /build/llvm/lib/AsmParser/LLParser.cpp:4137</div>
<div> #4 0x14811e6 in llvm::LLParser::ParseInstruction(llvm::Instruction*&, llvm::BasicBlock*, llvm::LLParser::PerFunctionState&) /build/llvm/lib/AsmParser/LLParser.cpp:3312</div><div> #5 0x1480175 in llvm::LLParser::ParseBasicBlock(llvm::LLParser::PerFunctionState&) /build/llvm/lib/AsmParser/LLParser.cpp:3185</div>
<div> #6 0x145858f in llvm::LLParser::ParseFunctionBody(llvm::Function&) /build/llvm/lib/AsmParser/LLParser.cpp:3138</div><div> #7 0x1445942 in ParseDefine /build/llvm/lib/AsmParser/LLParser.cpp:424</div><div> #8 0x1445942 in llvm::LLParser::ParseTopLevelEntities() /build/llvm/lib/AsmParser/LLParser.cpp:226</div>
<div> #9 0x14455ce in llvm::LLParser::Run() /build/llvm/lib/AsmParser/LLParser.cpp:41</div><div> #10 0x143706e in llvm::ParseAssembly(llvm::MemoryBuffer*, llvm::Module*, llvm::SMDiagnostic&, llvm::LLVMContext&) /build/llvm/lib/AsmParser/Parser.cpp:38</div>
<div> #11 0x11bf597 in llvm::ParseIR(llvm::MemoryBuffer*, llvm::SMDiagnostic&, llvm::LLVMContext&) /build/llvm/lib/IRReader/IRReader.cpp:76</div><div> #12 0x11bff04 in llvm::ParseIRFile(std::string const&, llvm::SMDiagnostic&, llvm::LLVMContext&) /build/llvm/lib/IRReader/IRReader.cpp:88</div>
<div> #13 0x61308c in main /build/llvm/tools/opt/opt.cpp:592</div><div> #14 0x7fb44e87276c (/lib/x86_64-linux-gnu/libc.so.6+0x2176c)</div><div>SUMMARY: AddressSanitizer: heap-use-after-free /build/llvm/include/llvm/IR/Instruction.h:53 getParent</div>
<div><br></div><div style>Can you please fix this?</div></div></div><div class="gmail_extra"><br><br><div class="gmail_quote">On Sun, Jun 23, 2013 at 1:34 AM, Nadav Rotem <span dir="ltr"><<a href="mailto:nrotem@apple.com" target="_blank">nrotem@apple.com</a>></span> wrote:<br>
<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Author: nadav<br>
Date: Sat Jun 22 16:34:10 2013<br>
New Revision: 184647<br>
<br>
URL: <a href="http://llvm.org/viewvc/llvm-project?rev=184647&view=rev" target="_blank">http://llvm.org/viewvc/llvm-project?rev=184647&view=rev</a><br>
Log:<br>
SLP Vectorizer: Implement multi-block slp-vectorization.<br>
<br>
Rewrote the SLP-vectorization as a whole-function vectorization pass. It is now able to vectorize chains across multiple basic blocks.<br>
It still does not vectorize PHIs, but this should be easy to do now that we scan the entire function.<br>
I removed the support for extracting values from trees.<br>
We are now able to vectorize more programs, but there are some serious regressions in many workloads (such as flops-6 and mandel-2).<br>
<br>
<br>
Added:<br>
llvm/trunk/test/Transforms/SLPVectorizer/X86/multi_block.ll<br>
Removed:<br>
llvm/trunk/lib/Transforms/Vectorize/VecUtils.cpp<br>
llvm/trunk/lib/Transforms/Vectorize/VecUtils.h<br>
Modified:<br>
llvm/trunk/lib/Transforms/Vectorize/CMakeLists.txt<br>
llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp<br>
llvm/trunk/test/Transforms/SLPVectorizer/X86/diamond.ll<br>
llvm/trunk/test/Transforms/SLPVectorizer/X86/multi_user.ll<br>
<br>
Modified: llvm/trunk/lib/Transforms/Vectorize/CMakeLists.txt<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/CMakeLists.txt?rev=184647&r1=184646&r2=184647&view=diff" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/CMakeLists.txt?rev=184647&r1=184646&r2=184647&view=diff</a><br>
==============================================================================<br>
--- llvm/trunk/lib/Transforms/Vectorize/CMakeLists.txt (original)<br>
+++ llvm/trunk/lib/Transforms/Vectorize/CMakeLists.txt Sat Jun 22 16:34:10 2013<br>
@@ -3,7 +3,6 @@ add_llvm_library(LLVMVectorize<br>
Vectorize.cpp<br>
LoopVectorize.cpp<br>
SLPVectorizer.cpp<br>
- VecUtils.cpp<br>
)<br>
<br>
add_dependencies(LLVMVectorize intrinsics_gen)<br>
<br>
Modified: llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp?rev=184647&r1=184646&r2=184647&view=diff" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp?rev=184647&r1=184646&r2=184647&view=diff</a><br>
==============================================================================<br>
--- llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp (original)<br>
+++ llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp Sat Jun 22 16:34:10 2013<br>
@@ -18,17 +18,20 @@<br>
#define SV_NAME "slp-vectorizer"<br>
#define DEBUG_TYPE "SLP"<br>
<br>
-#include "VecUtils.h"<br>
#include "llvm/Transforms/Vectorize.h"<br>
#include "llvm/ADT/MapVector.h"<br>
+#include "llvm/ADT/SetVector.h"<br>
#include "llvm/Analysis/AliasAnalysis.h"<br>
#include "llvm/Analysis/ScalarEvolution.h"<br>
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"<br>
+#include "llvm/Analysis/AliasAnalysis.h"<br>
#include "llvm/Analysis/TargetTransformInfo.h"<br>
#include "llvm/Analysis/Verifier.h"<br>
#include "llvm/Analysis/LoopInfo.h"<br>
#include "llvm/IR/DataLayout.h"<br>
#include "llvm/IR/Instructions.h"<br>
#include "llvm/IR/IntrinsicInst.h"<br>
+#include "llvm/IR/IRBuilder.h"<br>
#include "llvm/IR/Module.h"<br>
#include "llvm/IR/Type.h"<br>
#include "llvm/IR/Value.h"<br>
@@ -36,6 +39,7 @@<br>
#include "llvm/Support/CommandLine.h"<br>
#include "llvm/Support/Debug.h"<br>
#include "llvm/Support/raw_ostream.h"<br>
+#include <algorithm><br>
#include <map><br>
<br>
using namespace llvm;<br>
@@ -46,9 +50,1138 @@ static cl::opt<int><br>
"number. (gain = -cost of vectorization)"));<br>
namespace {<br>
<br>
+static const unsigned MinVecRegSize = 128;<br>
+<br>
+static const unsigned RecursionMaxDepth = 6;<br>
+<br>
+/// RAII pattern to save the insertion point of the IR builder.<br>
+class BuilderLocGuard {<br>
+public:<br>
+ BuilderLocGuard(IRBuilder<> &B) : Builder(B), Loc(B.GetInsertPoint()) {}<br>
+ ~BuilderLocGuard() { Builder.SetInsertPoint(Loc); }<br>
+<br>
+private:<br>
+ // Prevent copying.<br>
+ BuilderLocGuard(const BuilderLocGuard &);<br>
+ BuilderLocGuard &operator=(const BuilderLocGuard &);<br>
+ IRBuilder<> &Builder;<br>
+ BasicBlock::iterator Loc;<br>
+};<br>
+<br>
+/// A helper class for numbering instructions in multible blocks.<br>
+/// Numbers starts at zero for each basic block.<br>
+struct BlockNumbering {<br>
+<br>
+ BlockNumbering(BasicBlock *Bb) : BB(Bb), Valid(false) {}<br>
+<br>
+ BlockNumbering() : BB(0), Valid(false) {}<br>
+<br>
+ void numberInstructions() {<br>
+ unsigned Loc = 0;<br>
+ InstrIdx.clear();<br>
+ InstrVec.clear();<br>
+ // Number the instructions in the block.<br>
+ for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {<br>
+ InstrIdx[it] = Loc++;<br>
+ InstrVec.push_back(it);<br>
+ assert(InstrVec[InstrIdx[it]] == it && "Invalid allocation");<br>
+ }<br>
+ Valid = true;<br>
+ }<br>
+<br>
+ int getIndex(Instruction *I) {<br>
+ if (!Valid)<br>
+ numberInstructions();<br>
+ assert(InstrIdx.count(I) && "Unknown instruction");<br>
+ return InstrIdx[I];<br>
+ }<br>
+<br>
+ Instruction *getInstruction(unsigned loc) {<br>
+ if (!Valid)<br>
+ numberInstructions();<br>
+ assert(InstrVec.size() > loc && "Invalid Index");<br>
+ return InstrVec[loc];<br>
+ }<br>
+<br>
+ void forget() { Valid = false; }<br>
+<br>
+private:<br>
+ /// The block we are numbering.<br>
+ BasicBlock *BB;<br>
+ /// Is the block numbered.<br>
+ bool Valid;<br>
+ /// Maps instructions to numbers and back.<br>
+ SmallDenseMap<Instruction *, int> InstrIdx;<br>
+ /// Maps integers to Instructions.<br>
+ std::vector<Instruction *> InstrVec;<br>
+};<br>
+<br>
+class FuncSLP {<br>
+ typedef SmallVector<Value *, 8> ValueList;<br>
+ typedef SmallVector<Instruction *, 16> InstrList;<br>
+ typedef SmallPtrSet<Value *, 16> ValueSet;<br>
+ typedef SmallVector<StoreInst *, 8> StoreList;<br>
+<br>
+public:<br>
+ static const int MAX_COST = INT_MIN;<br>
+<br>
+ FuncSLP(Function *Func, ScalarEvolution *Se, DataLayout *Dl,<br>
+ TargetTransformInfo *Tti, AliasAnalysis *Aa, LoopInfo *Li)<br>
+ : F(Func), SE(Se), DL(Dl), TTI(Tti), AA(Aa), LI(Li),<br>
+ Builder(Se->getContext()) {<br>
+ for (Function::iterator it = F->begin(), e = F->end(); it != e; ++it) {<br>
+ BasicBlock *BB = it;<br>
+ BlocksNumbers[BB] = BlockNumbering(BB);<br>
+ }<br>
+ }<br>
+<br>
+ /// \brief Take the pointer operand from the Load/Store instruction.<br>
+ /// \returns NULL if this is not a valid Load/Store instruction.<br>
+ static Value *getPointerOperand(Value *I);<br>
+<br>
+ /// \brief Take the address space operand from the Load/Store instruction.<br>
+ /// \returns -1 if this is not a valid Load/Store instruction.<br>
+ static unsigned getAddressSpaceOperand(Value *I);<br>
+<br>
+ /// \returns true if the memory operations A and B are consecutive.<br>
+ bool isConsecutiveAccess(Value *A, Value *B);<br>
+<br>
+ /// \brief Vectorize the tree that starts with the elements in \p VL.<br>
+ /// \returns the vectorized value.<br>
+ Value *vectorizeTree(ArrayRef<Value *> VL);<br>
+<br>
+ /// \returns the vectorization cost of the subtree that starts at \p VL.<br>
+ /// A negative number means that this is profitable.<br>
+ int getTreeCost(ArrayRef<Value *> VL);<br>
+<br>
+ /// \returns the scalarization cost for this list of values. Assuming that<br>
+ /// this subtree gets vectorized, we may need to extract the values from the<br>
+ /// roots. This method calculates the cost of extracting the values.<br>
+ int getGatherCost(ArrayRef<Value *> VL);<br>
+<br>
+ /// \brief Attempts to order and vectorize a sequence of stores. This<br>
+ /// function does a quadratic scan of the given stores.<br>
+ /// \returns true if the basic block was modified.<br>
+ bool vectorizeStores(ArrayRef<StoreInst *> Stores, int costThreshold);<br>
+<br>
+ /// \brief Vectorize a group of scalars into a vector tree.<br>
+ /// \returns the vectorized value.<br>
+ Value *vectorizeArith(ArrayRef<Value *> Operands);<br>
+<br>
+ /// \brief This method contains the recursive part of getTreeCost.<br>
+ int getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth);<br>
+<br>
+ /// \brief This recursive method looks for vectorization hazards such as<br>
+ /// values that are used by multiple users and checks that values are used<br>
+ /// by only one vector lane. It updates the variables LaneMap, MultiUserVals.<br>
+ void getTreeUses_rec(ArrayRef<Value *> VL, unsigned Depth);<br>
+<br>
+ /// \brief This method contains the recursive part of vectorizeTree.<br>
+ Value *vectorizeTree_rec(ArrayRef<Value *> VL);<br>
+<br>
+ /// \brief Vectorize a sorted sequence of stores.<br>
+ bool vectorizeStoreChain(ArrayRef<Value *> Chain, int CostThreshold);<br>
+<br>
+ /// \returns the scalarization cost for this type. Scalarization in this<br>
+ /// context means the creation of vectors from a group of scalars.<br>
+ int getGatherCost(Type *Ty);<br>
+<br>
+ /// \returns the AA location that is being access by the instruction.<br>
+ AliasAnalysis::Location getLocation(Instruction *I);<br>
+<br>
+ /// \brief Checks if it is possible to sink an instruction from<br>
+ /// \p Src to \p Dst.<br>
+ /// \returns the pointer to the barrier instruction if we can't sink.<br>
+ Value *getSinkBarrier(Instruction *Src, Instruction *Dst);<br>
+<br>
+ /// \returns the index of the last instrucion in the BB from \p VL.<br>
+ int getLastIndex(ArrayRef<Value *> VL);<br>
+<br>
+ /// \returns the Instrucion in the bundle \p VL.<br>
+ Instruction *getLastInstruction(ArrayRef<Value *> VL);<br>
+<br>
+ /// \returns the Instruction at index \p Index which is in Block \p BB.<br>
+ Instruction *getInstructionForIndex(unsigned Index, BasicBlock *BB);<br>
+<br>
+ /// \returns the index of the first User of \p VL.<br>
+ int getFirstUserIndex(ArrayRef<Value *> VL);<br>
+<br>
+ /// \returns a vector from a collection of scalars in \p VL.<br>
+ Value *Gather(ArrayRef<Value *> VL, VectorType *Ty);<br>
+<br>
+ /// \brief Try to hoist gather sequences outside of the loop in cases where<br>
+ /// all of the sources are loop invariant.<br>
+ void hoistGatherSequence();<br>
+<br>
+ bool needToGatherAny(ArrayRef<Value *> VL) {<br>
+ for (int i = 0, e = VL.size(); i < e; ++i)<br>
+ if (MustGather.count(VL[i]))<br>
+ return true;<br>
+ return false;<br>
+ }<br>
+<br>
+ /// -- Vectorization State --<br>
+<br>
+ /// Maps values in the tree to the vector lanes that uses them. This map must<br>
+ /// be reset between runs of getCost.<br>
+ std::map<Value *, int> LaneMap;<br>
+ /// A list of instructions to ignore while sinking<br>
+ /// memory instructions. This map must be reset between runs of getCost.<br>
+ ValueSet MemBarrierIgnoreList;<br>
+<br>
+ /// Maps between the first scalar to the vector. This map must be reset<br>
+ /// between runs.<br>
+ DenseMap<Value *, Value *> VectorizedValues;<br>
+<br>
+ /// Contains values that must be gathered because they are used<br>
+ /// by multiple lanes, or by users outside the tree.<br>
+ /// NOTICE: The vectorization methods also use this set.<br>
+ ValueSet MustGather;<br>
+<br>
+ /// Contains a list of values that are used outside the current tree. This<br>
+ /// set must be reset between runs.<br>
+ SetVector<Value *> MultiUserVals;<br>
+<br>
+ /// Holds all of the instructions that we gathered.<br>
+ SetVector<Instruction *> GatherSeq;<br>
+<br>
+ /// Numbers instructions in different blocks.<br>
+ std::map<BasicBlock *, BlockNumbering> BlocksNumbers;<br>
+<br>
+ // Analysis and block reference.<br>
+ Function *F;<br>
+ ScalarEvolution *SE;<br>
+ DataLayout *DL;<br>
+ TargetTransformInfo *TTI;<br>
+ AliasAnalysis *AA;<br>
+ LoopInfo *LI;<br>
+ /// Instruction builder to construct the vectorized tree.<br>
+ IRBuilder<> Builder;<br>
+};<br>
+<br>
+int FuncSLP::getGatherCost(Type *Ty) {<br>
+ int Cost = 0;<br>
+ for (unsigned i = 0, e = cast<VectorType>(Ty)->getNumElements(); i < e; ++i)<br>
+ Cost += TTI->getVectorInstrCost(Instruction::InsertElement, Ty, i);<br>
+ return Cost;<br>
+}<br>
+<br>
+int FuncSLP::getGatherCost(ArrayRef<Value *> VL) {<br>
+ // Find the type of the operands in VL.<br>
+ Type *ScalarTy = VL[0]->getType();<br>
+ if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))<br>
+ ScalarTy = SI->getValueOperand()->getType();<br>
+ VectorType *VecTy = VectorType::get(ScalarTy, VL.size());<br>
+ // Find the cost of inserting/extracting values from the vector.<br>
+ return getGatherCost(VecTy);<br>
+}<br>
+<br>
+AliasAnalysis::Location FuncSLP::getLocation(Instruction *I) {<br>
+ if (StoreInst *SI = dyn_cast<StoreInst>(I))<br>
+ return AA->getLocation(SI);<br>
+ if (LoadInst *LI = dyn_cast<LoadInst>(I))<br>
+ return AA->getLocation(LI);<br>
+ return AliasAnalysis::Location();<br>
+}<br>
+<br>
+Value *FuncSLP::getPointerOperand(Value *I) {<br>
+ if (LoadInst *LI = dyn_cast<LoadInst>(I))<br>
+ return LI->getPointerOperand();<br>
+ if (StoreInst *SI = dyn_cast<StoreInst>(I))<br>
+ return SI->getPointerOperand();<br>
+ return 0;<br>
+}<br>
+<br>
+unsigned FuncSLP::getAddressSpaceOperand(Value *I) {<br>
+ if (LoadInst *L = dyn_cast<LoadInst>(I))<br>
+ return L->getPointerAddressSpace();<br>
+ if (StoreInst *S = dyn_cast<StoreInst>(I))<br>
+ return S->getPointerAddressSpace();<br>
+ return -1;<br>
+}<br>
+<br>
+bool FuncSLP::isConsecutiveAccess(Value *A, Value *B) {<br>
+ Value *PtrA = getPointerOperand(A);<br>
+ Value *PtrB = getPointerOperand(B);<br>
+ unsigned ASA = getAddressSpaceOperand(A);<br>
+ unsigned ASB = getAddressSpaceOperand(B);<br>
+<br>
+ // Check that the address spaces match and that the pointers are valid.<br>
+ if (!PtrA || !PtrB || (ASA != ASB))<br>
+ return false;<br>
+<br>
+ // Check that A and B are of the same type.<br>
+ if (PtrA->getType() != PtrB->getType())<br>
+ return false;<br>
+<br>
+ // Calculate the distance.<br>
+ const SCEV *PtrSCEVA = SE->getSCEV(PtrA);<br>
+ const SCEV *PtrSCEVB = SE->getSCEV(PtrB);<br>
+ const SCEV *OffsetSCEV = SE->getMinusSCEV(PtrSCEVA, PtrSCEVB);<br>
+ const SCEVConstant *ConstOffSCEV = dyn_cast<SCEVConstant>(OffsetSCEV);<br>
+<br>
+ // Non constant distance.<br>
+ if (!ConstOffSCEV)<br>
+ return false;<br>
+<br>
+ int64_t Offset = ConstOffSCEV->getValue()->getSExtValue();<br>
+ Type *Ty = cast<PointerType>(PtrA->getType())->getElementType();<br>
+ // The Instructions are connsecutive if the size of the first load/store is<br>
+ // the same as the offset.<br>
+ int64_t Sz = DL->getTypeStoreSize(Ty);<br>
+ return ((-Offset) == Sz);<br>
+}<br>
+<br>
+Value *FuncSLP::getSinkBarrier(Instruction *Src, Instruction *Dst) {<br>
+ assert(Src->getParent() == Dst->getParent() && "Not the same BB");<br>
+ BasicBlock::iterator I = Src, E = Dst;<br>
+ /// Scan all of the instruction from SRC to DST and check if<br>
+ /// the source may alias.<br>
+ for (++I; I != E; ++I) {<br>
+ // Ignore store instructions that are marked as 'ignore'.<br>
+ if (MemBarrierIgnoreList.count(I))<br>
+ continue;<br>
+ if (Src->mayWriteToMemory()) /* Write */ {<br>
+ if (!I->mayReadOrWriteMemory())<br>
+ continue;<br>
+ } else /* Read */ {<br>
+ if (!I->mayWriteToMemory())<br>
+ continue;<br>
+ }<br>
+ AliasAnalysis::Location A = getLocation(&*I);<br>
+ AliasAnalysis::Location B = getLocation(Src);<br>
+<br>
+ if (!A.Ptr || !B.Ptr || AA->alias(A, B))<br>
+ return I;<br>
+ }<br>
+ return 0;<br>
+}<br>
+<br>
+static BasicBlock *getSameBlock(ArrayRef<Value *> VL) {<br>
+ BasicBlock *BB = 0;<br>
+ for (int i = 0, e = VL.size(); i < e; i++) {<br>
+ Instruction *I = dyn_cast<Instruction>(VL[i]);<br>
+ if (!I)<br>
+ return 0;<br>
+<br>
+ if (!BB) {<br>
+ BB = I->getParent();<br>
+ continue;<br>
+ }<br>
+<br>
+ if (BB != I->getParent())<br>
+ return 0;<br>
+ }<br>
+ return BB;<br>
+}<br>
+<br>
+static bool allConstant(ArrayRef<Value *> VL) {<br>
+ for (unsigned i = 0, e = VL.size(); i < e; ++i)<br>
+ if (!isa<Constant>(VL[i]))<br>
+ return false;<br>
+ return true;<br>
+}<br>
+<br>
+static bool isSplat(ArrayRef<Value *> VL) {<br>
+ for (unsigned i = 1, e = VL.size(); i < e; ++i)<br>
+ if (VL[i] != VL[0])<br>
+ return false;<br>
+ return true;<br>
+}<br>
+<br>
+static unsigned getSameOpcode(ArrayRef<Value *> VL) {<br>
+ unsigned Opcode = 0;<br>
+ for (int i = 0, e = VL.size(); i < e; i++) {<br>
+ if (Instruction *I = dyn_cast<Instruction>(VL[i])) {<br>
+ if (!Opcode) {<br>
+ Opcode = I->getOpcode();<br>
+ continue;<br>
+ }<br>
+ if (Opcode != I->getOpcode())<br>
+ return 0;<br>
+ }<br>
+ }<br>
+ return Opcode;<br>
+}<br>
+<br>
+static bool CanReuseExtract(ArrayRef<Value *> VL, unsigned VF,<br>
+ VectorType *VecTy) {<br>
+ assert(Instruction::ExtractElement == getSameOpcode(VL) && "Invalid opcode");<br>
+ // Check if all of the extracts come from the same vector and from the<br>
+ // correct offset.<br>
+ Value *VL0 = VL[0];<br>
+ ExtractElementInst *E0 = cast<ExtractElementInst>(VL0);<br>
+ Value *Vec = E0->getOperand(0);<br>
+<br>
+ // We have to extract from the same vector type.<br>
+ if (Vec->getType() != VecTy)<br>
+ return false;<br>
+<br>
+ // Check that all of the indices extract from the correct offset.<br>
+ ConstantInt *CI = dyn_cast<ConstantInt>(E0->getOperand(1));<br>
+ if (!CI || CI->getZExtValue())<br>
+ return false;<br>
+<br>
+ for (unsigned i = 1, e = VF; i < e; ++i) {<br>
+ ExtractElementInst *E = cast<ExtractElementInst>(VL[i]);<br>
+ ConstantInt *CI = dyn_cast<ConstantInt>(E->getOperand(1));<br>
+<br>
+ if (!CI || CI->getZExtValue() != i || E->getOperand(0) != Vec)<br>
+ return false;<br>
+ }<br>
+<br>
+ return true;<br>
+}<br>
+<br>
+void FuncSLP::getTreeUses_rec(ArrayRef<Value *> VL, unsigned Depth) {<br>
+ if (Depth == RecursionMaxDepth)<br>
+ return MustGather.insert(VL.begin(), VL.end());<br>
+<br>
+ // Don't handle vectors.<br>
+ if (VL[0]->getType()->isVectorTy())<br>
+ return;<br>
+<br>
+ if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))<br>
+ if (SI->getValueOperand()->getType()->isVectorTy())<br>
+ return;<br>
+<br>
+ // If all of the operands are identical or constant we have a simple solution.<br>
+ if (allConstant(VL) || isSplat(VL) || !getSameBlock(VL))<br>
+ return MustGather.insert(VL.begin(), VL.end());<br>
+<br>
+ // Stop the scan at unknown IR.<br>
+ Instruction *VL0 = dyn_cast<Instruction>(VL[0]);<br>
+ assert(VL0 && "Invalid instruction");<br>
+<br>
+ // Mark instructions with multiple users.<br>
+ for (unsigned i = 0, e = VL.size(); i < e; ++i) {<br>
+ Instruction *I = dyn_cast<Instruction>(VL[i]);<br>
+ // Remember to check if all of the users of this instruction are vectorized<br>
+ // within our tree. At depth zero we have no local users, only external<br>
+ // users that we don't care about.<br>
+ if (Depth && I && I->getNumUses() > 1) {<br>
+ DEBUG(dbgs() << "SLP: Adding to MultiUserVals "<br>
+ "because it has multiple users:" << *I << " \n");<br>
+ MultiUserVals.insert(I);<br>
+ }<br>
+ }<br>
+<br>
+ // Check that the instruction is only used within one lane.<br>
+ for (int i = 0, e = VL.size(); i < e; ++i) {<br>
+ if (LaneMap.count(VL[i]) && LaneMap[VL[i]] != i) {<br>
+ DEBUG(dbgs() << "SLP: Value used by multiple lanes:" << *VL[i] << "\n");<br>
+ return MustGather.insert(VL.begin(), VL.end());<br>
+ }<br>
+ // Make this instruction as 'seen' and remember the lane.<br>
+ LaneMap[VL[i]] = i;<br>
+ }<br>
+<br>
+ unsigned Opcode = getSameOpcode(VL);<br>
+ if (!Opcode)<br>
+ return MustGather.insert(VL.begin(), VL.end());<br>
+<br>
+ switch (Opcode) {<br>
+ case Instruction::ExtractElement: {<br>
+ VectorType *VecTy = VectorType::get(VL[0]->getType(), VL.size());<br>
+ // No need to follow ExtractElements that are going to be optimized away.<br>
+ if (CanReuseExtract(VL, VL.size(), VecTy))<br>
+ return;<br>
+ // Fall through.<br>
+ }<br>
+ case Instruction::Load:<br>
+ return;<br>
+ case Instruction::ZExt:<br>
+ case Instruction::SExt:<br>
+ case Instruction::FPToUI:<br>
+ case Instruction::FPToSI:<br>
+ case Instruction::FPExt:<br>
+ case Instruction::PtrToInt:<br>
+ case Instruction::IntToPtr:<br>
+ case Instruction::SIToFP:<br>
+ case Instruction::UIToFP:<br>
+ case Instruction::Trunc:<br>
+ case Instruction::FPTrunc:<br>
+ case Instruction::BitCast:<br>
+ case Instruction::Select:<br>
+ case Instruction::ICmp:<br>
+ case Instruction::FCmp:<br>
+ case Instruction::Add:<br>
+ case Instruction::FAdd:<br>
+ case Instruction::Sub:<br>
+ case Instruction::FSub:<br>
+ case Instruction::Mul:<br>
+ case Instruction::FMul:<br>
+ case Instruction::UDiv:<br>
+ case Instruction::SDiv:<br>
+ case Instruction::FDiv:<br>
+ case Instruction::URem:<br>
+ case Instruction::SRem:<br>
+ case Instruction::FRem:<br>
+ case Instruction::Shl:<br>
+ case Instruction::LShr:<br>
+ case Instruction::AShr:<br>
+ case Instruction::And:<br>
+ case Instruction::Or:<br>
+ case Instruction::Xor: {<br>
+ for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {<br>
+ ValueList Operands;<br>
+ // Prepare the operand vector.<br>
+ for (unsigned j = 0; j < VL.size(); ++j)<br>
+ Operands.push_back(cast<Instruction>(VL[j])->getOperand(i));<br>
+<br>
+ getTreeUses_rec(Operands, Depth + 1);<br>
+ }<br>
+ return;<br>
+ }<br>
+ case Instruction::Store: {<br>
+ ValueList Operands;<br>
+ for (unsigned j = 0; j < VL.size(); ++j)<br>
+ Operands.push_back(cast<Instruction>(VL[j])->getOperand(0));<br>
+ getTreeUses_rec(Operands, Depth + 1);<br>
+ return;<br>
+ }<br>
+ default:<br>
+ return MustGather.insert(VL.begin(), VL.end());<br>
+ }<br>
+}<br>
+<br>
+int FuncSLP::getLastIndex(ArrayRef<Value *> VL) {<br>
+ BasicBlock *BB = cast<Instruction>(VL[0])->getParent();<br>
+ assert(BB == getSameBlock(VL) && BlocksNumbers.count(BB) && "Invalid block");<br>
+ BlockNumbering &BN = BlocksNumbers[BB];<br>
+<br>
+ int MaxIdx = BN.getIndex(BB->getFirstNonPHI());<br>
+ for (unsigned i = 0, e = VL.size(); i < e; ++i)<br>
+ MaxIdx = std::max(MaxIdx, BN.getIndex(cast<Instruction>(VL[i])));<br>
+ return MaxIdx;<br>
+}<br>
+<br>
+Instruction *FuncSLP::getLastInstruction(ArrayRef<Value *> VL) {<br>
+ BasicBlock *BB = cast<Instruction>(VL[0])->getParent();<br>
+ assert(BB == getSameBlock(VL) && BlocksNumbers.count(BB) && "Invalid block");<br>
+ BlockNumbering &BN = BlocksNumbers[BB];<br>
+<br>
+ int MaxIdx = BN.getIndex(cast<Instruction>(VL[0]));<br>
+ for (unsigned i = 1, e = VL.size(); i < e; ++i)<br>
+ MaxIdx = std::max(MaxIdx, BN.getIndex(cast<Instruction>(VL[i])));<br>
+ return BN.getInstruction(MaxIdx);<br>
+}<br>
+<br>
+Instruction *FuncSLP::getInstructionForIndex(unsigned Index, BasicBlock *BB) {<br>
+ BlockNumbering &BN = BlocksNumbers[BB];<br>
+ return BN.getInstruction(Index);<br>
+}<br>
+<br>
+int FuncSLP::getFirstUserIndex(ArrayRef<Value *> VL) {<br>
+ BasicBlock *BB = getSameBlock(VL);<br>
+ BlockNumbering &BN = BlocksNumbers[BB];<br>
+<br>
+ // Find the first user of the values.<br>
+ int FirstUser = BN.getIndex(BB->getTerminator());<br>
+ for (unsigned i = 0, e = VL.size(); i < e; ++i) {<br>
+ for (Value::use_iterator U = VL[i]->use_begin(), UE = VL[i]->use_end();<br>
+ U != UE; ++U) {<br>
+ Instruction *Instr = dyn_cast<Instruction>(*U);<br>
+<br>
+ if (!Instr || Instr->getParent() != BB)<br>
+ continue;<br>
+<br>
+ FirstUser = std::min(FirstUser, BN.getIndex(Instr));<br>
+ }<br>
+ }<br>
+ return FirstUser;<br>
+}<br>
+<br>
+int FuncSLP::getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth) {<br>
+ Type *ScalarTy = VL[0]->getType();<br>
+<br>
+ if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))<br>
+ ScalarTy = SI->getValueOperand()->getType();<br>
+<br>
+ /// Don't mess with vectors.<br>
+ if (ScalarTy->isVectorTy())<br>
+ return FuncSLP::MAX_COST;<br>
+<br>
+ VectorType *VecTy = VectorType::get(ScalarTy, VL.size());<br>
+<br>
+ if (allConstant(VL))<br>
+ return 0;<br>
+<br>
+ if (isSplat(VL))<br>
+ return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy, 0);<br>
+<br>
+ if (Depth == RecursionMaxDepth || needToGatherAny(VL))<br>
+ return getGatherCost(VecTy);<br>
+<br>
+ BasicBlock *BB = getSameBlock(VL);<br>
+ unsigned Opcode = getSameOpcode(VL);<br>
+ assert(Opcode && BB && "Invalid Instruction Value");<br>
+<br>
+ // Check if it is safe to sink the loads or the stores.<br>
+ if (Opcode == Instruction::Load || Opcode == Instruction::Store) {<br>
+ int MaxIdx = getLastIndex(VL);<br>
+ Instruction *Last = getInstructionForIndex(MaxIdx, BB);<br>
+<br>
+ for (unsigned i = 0, e = VL.size(); i < e; ++i) {<br>
+ if (VL[i] == Last)<br>
+ continue;<br>
+ Value *Barrier = getSinkBarrier(cast<Instruction>(VL[i]), Last);<br>
+ if (Barrier) {<br>
+ DEBUG(dbgs() << "SLP: Can't sink " << *VL[i] << "\n down to " << *Last<br>
+ << "\n because of " << *Barrier << "\n");<br>
+ return MAX_COST;<br>
+ }<br>
+ }<br>
+ }<br>
+<br>
+ Instruction *VL0 = cast<Instruction>(VL[0]);<br>
+ switch (Opcode) {<br>
+ case Instruction::ExtractElement: {<br>
+ if (CanReuseExtract(VL, VL.size(), VecTy))<br>
+ return 0;<br>
+ return getGatherCost(VecTy);<br>
+ }<br>
+ case Instruction::ZExt:<br>
+ case Instruction::SExt:<br>
+ case Instruction::FPToUI:<br>
+ case Instruction::FPToSI:<br>
+ case Instruction::FPExt:<br>
+ case Instruction::PtrToInt:<br>
+ case Instruction::IntToPtr:<br>
+ case Instruction::SIToFP:<br>
+ case Instruction::UIToFP:<br>
+ case Instruction::Trunc:<br>
+ case Instruction::FPTrunc:<br>
+ case Instruction::BitCast: {<br>
+ ValueList Operands;<br>
+ Type *SrcTy = VL0->getOperand(0)->getType();<br>
+ // Prepare the operand vector.<br>
+ for (unsigned j = 0; j < VL.size(); ++j) {<br>
+ Operands.push_back(cast<Instruction>(VL[j])->getOperand(0));<br>
+ // Check that the casted type is the same for all users.<br>
+ if (cast<Instruction>(VL[j])->getOperand(0)->getType() != SrcTy)<br>
+ return getGatherCost(VecTy);<br>
+ }<br>
+<br>
+ int Cost = getTreeCost_rec(Operands, Depth + 1);<br>
+ if (Cost == FuncSLP::MAX_COST)<br>
+ return Cost;<br>
+<br>
+ // Calculate the cost of this instruction.<br>
+ int ScalarCost = VL.size() * TTI->getCastInstrCost(VL0->getOpcode(),<br>
+ VL0->getType(), SrcTy);<br>
+<br>
+ VectorType *SrcVecTy = VectorType::get(SrcTy, VL.size());<br>
+ int VecCost = TTI->getCastInstrCost(VL0->getOpcode(), VecTy, SrcVecTy);<br>
+ Cost += (VecCost - ScalarCost);<br>
+ return Cost;<br>
+ }<br>
+ case Instruction::FCmp:<br>
+ case Instruction::ICmp: {<br>
+ // Check that all of the compares have the same predicate.<br>
+ CmpInst::Predicate P0 = dyn_cast<CmpInst>(VL0)->getPredicate();<br>
+ for (unsigned i = 1, e = VL.size(); i < e; ++i) {<br>
+ CmpInst *Cmp = cast<CmpInst>(VL[i]);<br>
+ if (Cmp->getPredicate() != P0)<br>
+ return getGatherCost(VecTy);<br>
+ }<br>
+ // Fall through.<br>
+ }<br>
+ case Instruction::Select:<br>
+ case Instruction::Add:<br>
+ case Instruction::FAdd:<br>
+ case Instruction::Sub:<br>
+ case Instruction::FSub:<br>
+ case Instruction::Mul:<br>
+ case Instruction::FMul:<br>
+ case Instruction::UDiv:<br>
+ case Instruction::SDiv:<br>
+ case Instruction::FDiv:<br>
+ case Instruction::URem:<br>
+ case Instruction::SRem:<br>
+ case Instruction::FRem:<br>
+ case Instruction::Shl:<br>
+ case Instruction::LShr:<br>
+ case Instruction::AShr:<br>
+ case Instruction::And:<br>
+ case Instruction::Or:<br>
+ case Instruction::Xor: {<br>
+ int TotalCost = 0;<br>
+ // Calculate the cost of all of the operands.<br>
+ for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {<br>
+ ValueList Operands;<br>
+ // Prepare the operand vector.<br>
+ for (unsigned j = 0; j < VL.size(); ++j)<br>
+ Operands.push_back(cast<Instruction>(VL[j])->getOperand(i));<br>
+<br>
+ int Cost = getTreeCost_rec(Operands, Depth + 1);<br>
+ if (Cost == MAX_COST)<br>
+ return MAX_COST;<br>
+ TotalCost += TotalCost;<br>
+ }<br>
+<br>
+ // Calculate the cost of this instruction.<br>
+ int ScalarCost = 0;<br>
+ int VecCost = 0;<br>
+ if (Opcode == Instruction::FCmp || Opcode == Instruction::ICmp ||<br>
+ Opcode == Instruction::Select) {<br>
+ VectorType *MaskTy = VectorType::get(Builder.getInt1Ty(), VL.size());<br>
+ ScalarCost =<br>
+ VecTy->getNumElements() *<br>
+ TTI->getCmpSelInstrCost(Opcode, ScalarTy, Builder.getInt1Ty());<br>
+ VecCost = TTI->getCmpSelInstrCost(Opcode, VecTy, MaskTy);<br>
+ } else {<br>
+ ScalarCost = VecTy->getNumElements() *<br>
+ TTI->getArithmeticInstrCost(Opcode, ScalarTy);<br>
+ VecCost = TTI->getArithmeticInstrCost(Opcode, VecTy);<br>
+ }<br>
+ TotalCost += (VecCost - ScalarCost);<br>
+ return TotalCost;<br>
+ }<br>
+ case Instruction::Load: {<br>
+ // If we are scalarize the loads, add the cost of forming the vector.<br>
+ for (unsigned i = 0, e = VL.size() - 1; i < e; ++i)<br>
+ if (!isConsecutiveAccess(VL[i], VL[i + 1]))<br>
+ return getGatherCost(VecTy);<br>
+<br>
+ // Cost of wide load - cost of scalar loads.<br>
+ int ScalarLdCost = VecTy->getNumElements() *<br>
+ TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0);<br>
+ int VecLdCost = TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0);<br>
+ return VecLdCost - ScalarLdCost;<br>
+ }<br>
+ case Instruction::Store: {<br>
+ // We know that we can merge the stores. Calculate the cost.<br>
+ int ScalarStCost = VecTy->getNumElements() *<br>
+ TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1, 0);<br>
+ int VecStCost = TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1, 0);<br>
+ int StoreCost = VecStCost - ScalarStCost;<br>
+<br>
+ ValueList Operands;<br>
+ for (unsigned j = 0; j < VL.size(); ++j) {<br>
+ Operands.push_back(cast<Instruction>(VL[j])->getOperand(0));<br>
+ MemBarrierIgnoreList.insert(VL[j]);<br>
+ }<br>
+<br>
+ int Cost = getTreeCost_rec(Operands, Depth + 1);<br>
+ if (Cost == MAX_COST)<br>
+ return MAX_COST;<br>
+<br>
+ int TotalCost = StoreCost + Cost;<br>
+ return TotalCost;<br>
+ }<br>
+ default:<br>
+ // Unable to vectorize unknown instructions.<br>
+ return getGatherCost(VecTy);<br>
+ }<br>
+}<br>
+<br>
+int FuncSLP::getTreeCost(ArrayRef<Value *> VL) {<br>
+ // Get rid of the list of stores that were removed, and from the<br>
+ // lists of instructions with multiple users.<br>
+ MemBarrierIgnoreList.clear();<br>
+ LaneMap.clear();<br>
+ MultiUserVals.clear();<br>
+ MustGather.clear();<br>
+<br>
+ if (!getSameBlock(VL))<br>
+ return MAX_COST;<br>
+<br>
+ // Find the location of the last root.<br>
+ int LastRootIndex = getLastIndex(VL);<br>
+ int FirstUserIndex = getFirstUserIndex(VL);<br>
+<br>
+ // Don't vectorize if there are users of the tree roots inside the tree<br>
+ // itself.<br>
+ if (LastRootIndex > FirstUserIndex)<br>
+ return MAX_COST;<br>
+<br>
+ // Scan the tree and find which value is used by which lane, and which values<br>
+ // must be scalarized.<br>
+ getTreeUses_rec(VL, 0);<br>
+<br>
+ // Check that instructions with multiple users can be vectorized. Mark unsafe<br>
+ // instructions.<br>
+ for (SetVector<Value *>::iterator it = MultiUserVals.begin(),<br>
+ e = MultiUserVals.end();<br>
+ it != e; ++it) {<br>
+ // Check that all of the users of this instr are within the tree.<br>
+ for (Value::use_iterator I = (*it)->use_begin(), E = (*it)->use_end();<br>
+ I != E; ++I) {<br>
+ if (LaneMap.find(*I) == LaneMap.end()) {<br>
+ DEBUG(dbgs() << "SLP: Adding to MustExtract "<br>
+ "because of an out of tree usage.\n");<br>
+ MustGather.insert(*it);<br>
+ continue;<br>
+ }<br>
+ }<br>
+ }<br>
+<br>
+ // Now calculate the cost of vectorizing the tree.<br>
+ return getTreeCost_rec(VL, 0);<br>
+}<br>
+bool FuncSLP::vectorizeStoreChain(ArrayRef<Value *> Chain, int CostThreshold) {<br>
+ unsigned ChainLen = Chain.size();<br>
+ DEBUG(dbgs() << "SLP: Analyzing a store chain of length " << ChainLen<br>
+ << "\n");<br>
+ Type *StoreTy = cast<StoreInst>(Chain[0])->getValueOperand()->getType();<br>
+ unsigned Sz = DL->getTypeSizeInBits(StoreTy);<br>
+ unsigned VF = MinVecRegSize / Sz;<br>
+<br>
+ if (!isPowerOf2_32(Sz) || VF < 2)<br>
+ return false;<br>
+<br>
+ bool Changed = false;<br>
+ // Look for profitable vectorizable trees at all offsets, starting at zero.<br>
+ for (unsigned i = 0, e = ChainLen; i < e; ++i) {<br>
+ if (i + VF > e)<br>
+ break;<br>
+ DEBUG(dbgs() << "SLP: Analyzing " << VF << " stores at offset " << i<br>
+ << "\n");<br>
+ ArrayRef<Value *> Operands = Chain.slice(i, VF);<br>
+<br>
+ int Cost = getTreeCost(Operands);<br>
+ if (Cost == FuncSLP::MAX_COST)<br>
+ continue;<br>
+ DEBUG(dbgs() << "SLP: Found cost=" << Cost << " for VF=" << VF << "\n");<br>
+ if (Cost < CostThreshold) {<br>
+ DEBUG(dbgs() << "SLP: Decided to vectorize cost=" << Cost << "\n");<br>
+ vectorizeTree(Operands);<br>
+ i += VF - 1;<br>
+ Changed = true;<br>
+ }<br>
+ }<br>
+<br>
+ if (Changed || ChainLen > VF)<br>
+ return Changed;<br>
+<br>
+ // Handle short chains. This helps us catch types such as <3 x float> that<br>
+ // are smaller than vector size.<br>
+ int Cost = getTreeCost(Chain);<br>
+ if (Cost == FuncSLP::MAX_COST)<br>
+ return false;<br>
+ if (Cost < CostThreshold) {<br>
+ DEBUG(dbgs() << "SLP: Found store chain cost = " << Cost<br>
+ << " for size = " << ChainLen << "\n");<br>
+ vectorizeTree(Chain);<br>
+ return true;<br>
+ }<br>
+<br>
+ return false;<br>
+}<br>
+<br>
+bool FuncSLP::vectorizeStores(ArrayRef<StoreInst *> Stores, int costThreshold) {<br>
+ SetVector<Value *> Heads, Tails;<br>
+ SmallDenseMap<Value *, Value *> ConsecutiveChain;<br>
+<br>
+ // We may run into multiple chains that merge into a single chain. We mark the<br>
+ // stores that we vectorized so that we don't visit the same store twice.<br>
+ ValueSet VectorizedStores;<br>
+ bool Changed = false;<br>
+<br>
+ // Do a quadratic search on all of the given stores and find<br>
+ // all of the pairs of loads that follow each other.<br>
+ for (unsigned i = 0, e = Stores.size(); i < e; ++i)<br>
+ for (unsigned j = 0; j < e; ++j) {<br>
+ if (i == j)<br>
+ continue;<br>
+<br>
+ if (isConsecutiveAccess(Stores[i], Stores[j])) {<br>
+ Tails.insert(Stores[j]);<br>
+ Heads.insert(Stores[i]);<br>
+ ConsecutiveChain[Stores[i]] = Stores[j];<br>
+ }<br>
+ }<br>
+<br>
+ // For stores that start but don't end a link in the chain:<br>
+ for (SetVector<Value *>::iterator it = Heads.begin(), e = Heads.end();<br>
+ it != e; ++it) {<br>
+ if (Tails.count(*it))<br>
+ continue;<br>
+<br>
+ // We found a store instr that starts a chain. Now follow the chain and try<br>
+ // to vectorize it.<br>
+ ValueList Operands;<br>
+ Value *I = *it;<br>
+ // Collect the chain into a list.<br>
+ while (Tails.count(I) || Heads.count(I)) {<br>
+ if (VectorizedStores.count(I))<br>
+ break;<br>
+ Operands.push_back(I);<br>
+ // Move to the next value in the chain.<br>
+ I = ConsecutiveChain[I];<br>
+ }<br>
+<br>
+ bool Vectorized = vectorizeStoreChain(Operands, costThreshold);<br>
+<br>
+ // Mark the vectorized stores so that we don't vectorize them again.<br>
+ if (Vectorized)<br>
+ VectorizedStores.insert(Operands.begin(), Operands.end());<br>
+ Changed |= Vectorized;<br>
+ }<br>
+<br>
+ return Changed;<br>
+}<br>
+<br>
+Value *FuncSLP::Gather(ArrayRef<Value *> VL, VectorType *Ty) {<br>
+ Value *Vec = UndefValue::get(Ty);<br>
+ // Generate the 'InsertElement' instruction.<br>
+ for (unsigned i = 0; i < Ty->getNumElements(); ++i) {<br>
+ Vec = Builder.CreateInsertElement(Vec, VL[i], Builder.getInt32(i));<br>
+ if (Instruction *I = dyn_cast<Instruction>(Vec))<br>
+ GatherSeq.insert(I);<br>
+ }<br>
+<br>
+ VectorizedValues[VL[0]] = Vec;<br>
+ return Vec;<br>
+}<br>
+<br>
+Value *FuncSLP::vectorizeTree_rec(ArrayRef<Value *> VL) {<br>
+ BuilderLocGuard Guard(Builder);<br>
+<br>
+ Type *ScalarTy = VL[0]->getType();<br>
+ if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))<br>
+ ScalarTy = SI->getValueOperand()->getType();<br>
+ VectorType *VecTy = VectorType::get(ScalarTy, VL.size());<br>
+<br>
+ if (needToGatherAny(VL))<br>
+ return Gather(VL, VecTy);<br>
+<br>
+ if (VectorizedValues.count(VL[0])) {<br>
+ DEBUG(dbgs() << "SLP: Diamond merged at depth.\n");<br>
+ return VectorizedValues[VL[0]];<br>
+ }<br>
+<br>
+ Instruction *VL0 = cast<Instruction>(VL[0]);<br>
+ unsigned Opcode = VL0->getOpcode();<br>
+ assert(Opcode == getSameOpcode(VL) && "Invalid opcode");<br>
+<br>
+ switch (Opcode) {<br>
+ case Instruction::ExtractElement: {<br>
+ if (CanReuseExtract(VL, VL.size(), VecTy))<br>
+ return VL0->getOperand(0);<br>
+ return Gather(VL, VecTy);<br>
+ }<br>
+ case Instruction::ZExt:<br>
+ case Instruction::SExt:<br>
+ case Instruction::FPToUI:<br>
+ case Instruction::FPToSI:<br>
+ case Instruction::FPExt:<br>
+ case Instruction::PtrToInt:<br>
+ case Instruction::IntToPtr:<br>
+ case Instruction::SIToFP:<br>
+ case Instruction::UIToFP:<br>
+ case Instruction::Trunc:<br>
+ case Instruction::FPTrunc:<br>
+ case Instruction::BitCast: {<br>
+ ValueList INVL;<br>
+ for (int i = 0, e = VL.size(); i < e; ++i)<br>
+ INVL.push_back(cast<Instruction>(VL[i])->getOperand(0));<br>
+<br>
+ Builder.SetInsertPoint(getLastInstruction(VL));<br>
+ Value *InVec = vectorizeTree_rec(INVL);<br>
+ CastInst *CI = dyn_cast<CastInst>(VL0);<br>
+ Value *V = Builder.CreateCast(CI->getOpcode(), InVec, VecTy);<br>
+ VectorizedValues[VL0] = V;<br>
+ return V;<br>
+ }<br>
+ case Instruction::FCmp:<br>
+ case Instruction::ICmp: {<br>
+ // Check that all of the compares have the same predicate.<br>
+ CmpInst::Predicate P0 = dyn_cast<CmpInst>(VL0)->getPredicate();<br>
+ for (unsigned i = 1, e = VL.size(); i < e; ++i) {<br>
+ CmpInst *Cmp = cast<CmpInst>(VL[i]);<br>
+ if (Cmp->getPredicate() != P0)<br>
+ return Gather(VL, VecTy);<br>
+ }<br>
+<br>
+ ValueList LHSV, RHSV;<br>
+ for (int i = 0, e = VL.size(); i < e; ++i) {<br>
+ LHSV.push_back(cast<Instruction>(VL[i])->getOperand(0));<br>
+ RHSV.push_back(cast<Instruction>(VL[i])->getOperand(1));<br>
+ }<br>
+<br>
+ Builder.SetInsertPoint(getLastInstruction(VL));<br>
+ Value *L = vectorizeTree_rec(LHSV);<br>
+ Value *R = vectorizeTree_rec(RHSV);<br>
+ Value *V;<br>
+<br>
+ if (Opcode == Instruction::FCmp)<br>
+ V = Builder.CreateFCmp(P0, L, R);<br>
+ else<br>
+ V = Builder.CreateICmp(P0, L, R);<br>
+<br>
+ VectorizedValues[VL0] = V;<br>
+ return V;<br>
+ }<br>
+ case Instruction::Select: {<br>
+ ValueList TrueVec, FalseVec, CondVec;<br>
+ for (int i = 0, e = VL.size(); i < e; ++i) {<br>
+ CondVec.push_back(cast<Instruction>(VL[i])->getOperand(0));<br>
+ TrueVec.push_back(cast<Instruction>(VL[i])->getOperand(1));<br>
+ FalseVec.push_back(cast<Instruction>(VL[i])->getOperand(2));<br>
+ }<br>
+<br>
+ Builder.SetInsertPoint(getLastInstruction(VL));<br>
+ Value *True = vectorizeTree_rec(TrueVec);<br>
+ Value *False = vectorizeTree_rec(FalseVec);<br>
+ Value *Cond = vectorizeTree_rec(CondVec);<br>
+ Value *V = Builder.CreateSelect(Cond, True, False);<br>
+ VectorizedValues[VL0] = V;<br>
+ return V;<br>
+ }<br>
+ case Instruction::Add:<br>
+ case Instruction::FAdd:<br>
+ case Instruction::Sub:<br>
+ case Instruction::FSub:<br>
+ case Instruction::Mul:<br>
+ case Instruction::FMul:<br>
+ case Instruction::UDiv:<br>
+ case Instruction::SDiv:<br>
+ case Instruction::FDiv:<br>
+ case Instruction::URem:<br>
+ case Instruction::SRem:<br>
+ case Instruction::FRem:<br>
+ case Instruction::Shl:<br>
+ case Instruction::LShr:<br>
+ case Instruction::AShr:<br>
+ case Instruction::And:<br>
+ case Instruction::Or:<br>
+ case Instruction::Xor: {<br>
+ ValueList LHSVL, RHSVL;<br>
+ for (int i = 0, e = VL.size(); i < e; ++i) {<br>
+ LHSVL.push_back(cast<Instruction>(VL[i])->getOperand(0));<br>
+ RHSVL.push_back(cast<Instruction>(VL[i])->getOperand(1));<br>
+ }<br>
+<br>
+ Builder.SetInsertPoint(getLastInstruction(VL));<br>
+ Value *LHS = vectorizeTree_rec(LHSVL);<br>
+ Value *RHS = vectorizeTree_rec(RHSVL);<br>
+<br>
+ if (LHS == RHS) {<br>
+ assert((VL0->getOperand(0) == VL0->getOperand(1)) && "Invalid order");<br>
+ }<br>
+<br>
+ BinaryOperator *BinOp = cast<BinaryOperator>(VL0);<br>
+ Value *V = Builder.CreateBinOp(BinOp->getOpcode(), LHS, RHS);<br>
+ VectorizedValues[VL0] = V;<br>
+ return V;<br>
+ }<br>
+ case Instruction::Load: {<br>
+ // Check if all of the loads are consecutive.<br>
+ for (unsigned i = 1, e = VL.size(); i < e; ++i)<br>
+ if (!isConsecutiveAccess(VL[i - 1], VL[i]))<br>
+ return Gather(VL, VecTy);<br>
+<br>
+ // Loads are inserted at the head of the tree because we don't want to<br>
+ // sink them all the way down past store instructions.<br>
+ Builder.SetInsertPoint(getLastInstruction(VL));<br>
+ LoadInst *LI = cast<LoadInst>(VL0);<br>
+ Value *VecPtr =<br>
+ Builder.CreateBitCast(LI->getPointerOperand(), VecTy->getPointerTo());<br>
+ unsigned Alignment = LI->getAlignment();<br>
+ LI = Builder.CreateLoad(VecPtr);<br>
+ LI->setAlignment(Alignment);<br>
+<br>
+ VectorizedValues[VL0] = LI;<br>
+ return LI;<br>
+ }<br>
+ case Instruction::Store: {<br>
+ StoreInst *SI = cast<StoreInst>(VL0);<br>
+ unsigned Alignment = SI->getAlignment();<br>
+<br>
+ ValueList ValueOp;<br>
+ for (int i = 0, e = VL.size(); i < e; ++i)<br>
+ ValueOp.push_back(cast<StoreInst>(VL[i])->getValueOperand());<br>
+<br>
+ Value *VecValue = vectorizeTree_rec(ValueOp);<br>
+<br>
+ Builder.SetInsertPoint(getLastInstruction(VL));<br>
+ Value *VecPtr =<br>
+ Builder.CreateBitCast(SI->getPointerOperand(), VecTy->getPointerTo());<br>
+ Builder.CreateStore(VecValue, VecPtr)->setAlignment(Alignment);<br>
+<br>
+ for (int i = 0, e = VL.size(); i < e; ++i)<br>
+ cast<Instruction>(VL[i])->eraseFromParent();<br>
+ return 0;<br>
+ }<br>
+ default:<br>
+ return Gather(VL, VecTy);<br>
+ }<br>
+}<br>
+<br>
+Value *FuncSLP::vectorizeTree(ArrayRef<Value *> VL) {<br>
+ Builder.SetInsertPoint(getLastInstruction(VL));<br>
+ Value *V = vectorizeTree_rec(VL);<br>
+<br>
+ // We moved some instructions around. We have to number them again<br>
+ // before we can do any analysis.<br>
+ MustGather.clear();<br>
+ VectorizedValues.clear();<br>
+ MemBarrierIgnoreList.clear();<br>
+ for (Function::iterator it = F->begin(), e = F->end(); it != e; ++it)<br>
+ BlocksNumbers[it].forget();<br>
+ return V;<br>
+}<br>
+<br>
+Value *FuncSLP::vectorizeArith(ArrayRef<Value *> Operands) {<br>
+ Value *Vec = vectorizeTree(Operands);<br>
+ // After vectorizing the operands we need to generate extractelement<br>
+ // instructions and replace all of the uses of the scalar values with<br>
+ // the values that we extracted from the vectorized tree.<br>
+ for (unsigned i = 0, e = Operands.size(); i != e; ++i) {<br>
+ Value *S = Builder.CreateExtractElement(Vec, Builder.getInt32(i));<br>
+ Operands[i]->replaceAllUsesWith(S);<br>
+ }<br>
+<br>
+ return Vec;<br>
+}<br>
+<br>
+void FuncSLP::hoistGatherSequence() {<br>
+ for (SetVector<Instruction *>::iterator it = GatherSeq.begin(),<br>
+ e = GatherSeq.end();<br>
+ it != e; ++it) {<br>
+ InsertElementInst *Insert = dyn_cast_or_null<InsertElementInst>(*it);<br>
+<br>
+ // The InsertElement sequence can be simplified into a constant.<br>
+ // Also Ignore NULL pointers because they are only here to separate<br>
+ // sequences.<br>
+ if (!Insert)<br>
+ continue;<br>
+<br>
+ BasicBlock *BB = Insert->getParent();<br>
+<br>
+ // Check if this block is inside a loop.<br>
+ Loop *L = LI->getLoopFor(BB);<br>
+ if (!L)<br>
+ return;<br>
+<br>
+ // Check if it has a preheader.<br>
+ BasicBlock *PreHeader = L->getLoopPreheader();<br>
+ if (!PreHeader)<br>
+ return;<br>
+<br>
+ // If the vector or the element that we insert into it are<br>
+ // instructions that are defined in this basic block then we can't<br>
+ // hoist this instruction.<br>
+ Instruction *CurrVec = dyn_cast<Instruction>(Insert->getOperand(0));<br>
+ Instruction *NewElem = dyn_cast<Instruction>(Insert->getOperand(1));<br>
+ if (CurrVec && L->contains(CurrVec))<br>
+ continue;<br>
+ if (NewElem && L->contains(NewElem))<br>
+ continue;<br>
+<br>
+ // Mark the insertion point for the block.<br>
+ Instruction *Location = PreHeader->getTerminator();<br>
+ // We can hoist this instruction. Move it to the pre-header.<br>
+ Insert->moveBefore(Location);<br>
+ }<br>
+}<br>
+<br>
/// The SLPVectorizer Pass.<br>
struct SLPVectorizer : public FunctionPass {<br>
- typedef MapVector<Value *, BoUpSLP::StoreList> StoreListMap;<br>
+ typedef SmallVector<StoreInst *, 8> StoreList;<br>
+ typedef MapVector<Value *, StoreList> StoreListMap;<br>
<br>
/// Pass identification, replacement for typeid<br>
static char ID;<br>
@@ -80,34 +1213,26 @@ struct SLPVectorizer : public FunctionPa<br>
<br>
DEBUG(dbgs() << "SLP: Analyzing blocks in " << F.getName() << ".\n");<br>
<br>
+ // Use the bollom up slp vectorizer to construct chains that start with<br>
+ // he store instructions.<br>
+ FuncSLP R(&F, SE, DL, TTI, AA, LI);<br>
+<br>
for (Function::iterator it = F.begin(), e = F.end(); it != e; ++it) {<br>
BasicBlock *BB = it;<br>
- bool BBChanged = false;<br>
-<br>
- // Use the bollom up slp vectorizer to construct chains that start with<br>
- // he store instructions.<br>
- BoUpSLP R(BB, SE, DL, TTI, AA, LI->getLoopFor(BB));<br>
<br>
// Vectorize trees that end at reductions.<br>
- BBChanged |= vectorizeChainsInBlock(BB, R);<br>
+ Changed |= vectorizeChainsInBlock(BB, R);<br>
<br>
// Vectorize trees that end at stores.<br>
if (unsigned count = collectStores(BB, R)) {<br>
(void)count;<br>
DEBUG(dbgs() << "SLP: Found " << count << " stores to vectorize.\n");<br>
- BBChanged |= vectorizeStoreChains(R);<br>
+ Changed |= vectorizeStoreChains(R);<br>
}<br>
-<br>
- // Try to hoist some of the scalarization code to the preheader.<br>
- if (BBChanged) {<br>
- hoistGatherSequence(LI, BB, R);<br>
- Changed |= vectorizeUsingGatherHints(R.getGatherSeqInstructions());<br>
- }<br>
-<br>
- Changed |= BBChanged;<br>
}<br>
<br>
if (Changed) {<br>
+ R.hoistGatherSequence();<br>
DEBUG(dbgs() << "SLP: vectorized \"" << F.getName() << "\"\n");<br>
DEBUG(verifyFunction(F));<br>
}<br>
@@ -128,42 +1253,31 @@ private:<br>
/// object. We sort the stores to their base objects to reduce the cost of the<br>
/// quadratic search on the stores. TODO: We can further reduce this cost<br>
/// if we flush the chain creation every time we run into a memory barrier.<br>
- unsigned collectStores(BasicBlock *BB, BoUpSLP &R);<br>
+ unsigned collectStores(BasicBlock *BB, FuncSLP &R);<br>
<br>
/// \brief Try to vectorize a chain that starts at two arithmetic instrs.<br>
- bool tryToVectorizePair(Value *A, Value *B, BoUpSLP &R);<br>
+ bool tryToVectorizePair(Value *A, Value *B, FuncSLP &R);<br>
<br>
/// \brief Try to vectorize a list of operands. If \p NeedExtracts is true<br>
/// then we calculate the cost of extracting the scalars from the vector.<br>
/// \returns true if a value was vectorized.<br>
- bool tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R, bool NeedExtracts);<br>
+ bool tryToVectorizeList(ArrayRef<Value *> VL, FuncSLP &R, bool NeedExtracts);<br>
<br>
/// \brief Try to vectorize a chain that may start at the operands of \V;<br>
- bool tryToVectorize(BinaryOperator *V, BoUpSLP &R);<br>
+ bool tryToVectorize(BinaryOperator *V, FuncSLP &R);<br>
<br>
/// \brief Vectorize the stores that were collected in StoreRefs.<br>
- bool vectorizeStoreChains(BoUpSLP &R);<br>
-<br>
- /// \brief Try to hoist gather sequences outside of the loop in cases where<br>
- /// all of the sources are loop invariant.<br>
- void hoistGatherSequence(LoopInfo *LI, BasicBlock *BB, BoUpSLP &R);<br>
-<br>
- /// \brief Try to vectorize additional sequences in different basic blocks<br>
- /// based on values that we gathered in previous blocks. The list \p Gathers<br>
- /// holds the gather InsertElement instructions that were generated during<br>
- /// vectorization.<br>
- /// \returns True if some code was vectorized.<br>
- bool vectorizeUsingGatherHints(BoUpSLP::InstrList &Gathers);<br>
+ bool vectorizeStoreChains(FuncSLP &R);<br>
<br>
/// \brief Scan the basic block and look for patterns that are likely to start<br>
/// a vectorization chain.<br>
- bool vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R);<br>
+ bool vectorizeChainsInBlock(BasicBlock *BB, FuncSLP &R);<br>
<br>
private:<br>
StoreListMap StoreRefs;<br>
};<br>
<br>
-unsigned SLPVectorizer::collectStores(BasicBlock *BB, BoUpSLP &R) {<br>
+unsigned SLPVectorizer::collectStores(BasicBlock *BB, FuncSLP &R) {<br>
unsigned count = 0;<br>
StoreRefs.clear();<br>
for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {<br>
@@ -188,14 +1302,14 @@ unsigned SLPVectorizer::collectStores(Ba<br>
return count;<br>
}<br>
<br>
-bool SLPVectorizer::tryToVectorizePair(Value *A, Value *B, BoUpSLP &R) {<br>
+bool SLPVectorizer::tryToVectorizePair(Value *A, Value *B, FuncSLP &R) {<br>
if (!A || !B)<br>
return false;<br>
Value *VL[] = { A, B };<br>
return tryToVectorizeList(VL, R, true);<br>
}<br>
<br>
-bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,<br>
+bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, FuncSLP &R,<br>
bool NeedExtracts) {<br>
if (VL.size() < 2)<br>
return false;<br>
@@ -219,7 +1333,10 @@ bool SLPVectorizer::tryToVectorizeList(A<br>
}<br>
<br>
int Cost = R.getTreeCost(VL);<br>
- int ExtrCost = NeedExtracts ? R.getScalarizationCost(VL) : 0;<br>
+ if (Cost == FuncSLP::MAX_COST)<br>
+ return false;<br>
+<br>
+ int ExtrCost = NeedExtracts ? R.getGatherCost(VL) : 0;<br>
DEBUG(dbgs() << "SLP: Cost of pair:" << Cost<br>
<< " Cost of extract:" << ExtrCost << ".\n");<br>
if ((Cost + ExtrCost) >= -SLPCostThreshold)<br>
@@ -229,10 +1346,10 @@ bool SLPVectorizer::tryToVectorizeList(A<br>
return true;<br>
}<br>
<br>
-bool SLPVectorizer::tryToVectorize(BinaryOperator *V, BoUpSLP &R) {<br>
+bool SLPVectorizer::tryToVectorize(BinaryOperator *V, FuncSLP &R) {<br>
if (!V)<br>
return false;<br>
-<br>
+<br>
// Try to vectorize V.<br>
if (tryToVectorizePair(V->getOperand(0), V->getOperand(1), R))<br>
return true;<br>
@@ -269,7 +1386,7 @@ bool SLPVectorizer::tryToVectorize(Binar<br>
return 0;<br>
}<br>
<br>
-bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {<br>
+bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, FuncSLP &R) {<br>
bool Changed = false;<br>
for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {<br>
if (isa<DbgInfoIntrinsic>(it))<br>
@@ -292,7 +1409,7 @@ bool SLPVectorizer::vectorizeChainsInBlo<br>
Value *Inst = BI->getOperand(0);<br>
if (Inst == P)<br>
Inst = BI->getOperand(1);<br>
-<br>
+<br>
Changed |= tryToVectorize(dyn_cast<BinaryOperator>(Inst), R);<br>
continue;<br>
}<br>
@@ -337,7 +1454,7 @@ bool SLPVectorizer::vectorizeChainsInBlo<br>
return Changed;<br>
}<br>
<br>
-bool SLPVectorizer::vectorizeStoreChains(BoUpSLP &R) {<br>
+bool SLPVectorizer::vectorizeStoreChains(FuncSLP &R) {<br>
bool Changed = false;<br>
// Attempt to sort and vectorize each of the store-groups.<br>
for (StoreListMap::iterator it = StoreRefs.begin(), e = StoreRefs.end();<br>
@@ -353,92 +1470,6 @@ bool SLPVectorizer::vectorizeStoreChains<br>
return Changed;<br>
}<br>
<br>
-bool SLPVectorizer::vectorizeUsingGatherHints(BoUpSLP::InstrList &Gathers) {<br>
- SmallVector<Value *, 4> Seq;<br>
- bool Changed = false;<br>
- for (int i = 0, e = Gathers.size(); i < e; ++i) {<br>
- InsertElementInst *IEI = dyn_cast_or_null<InsertElementInst>(Gathers[i]);<br>
-<br>
- if (IEI) {<br>
- if (Instruction *I = dyn_cast<Instruction>(IEI->getOperand(1)))<br>
- Seq.push_back(I);<br>
- } else {<br>
-<br>
- if (!Seq.size())<br>
- continue;<br>
-<br>
- Instruction *I = cast<Instruction>(Seq[0]);<br>
- BasicBlock *BB = I->getParent();<br>
-<br>
- DEBUG(dbgs() << "SLP: Inspecting a gather list of size " << Seq.size()<br>
- << " in " << BB->getName() << ".\n");<br>
-<br>
- // Check if the gathered values have multiple uses. If they only have one<br>
- // user then we know that the insert/extract pair will go away.<br>
- bool HasMultipleUsers = false;<br>
- for (int i = 0; e = Seq.size(), i < e; ++i) {<br>
- if (!Seq[i]->hasOneUse()) {<br>
- HasMultipleUsers = true;<br>
- break;<br>
- }<br>
- }<br>
-<br>
- BoUpSLP BO(BB, SE, DL, TTI, AA, LI->getLoopFor(BB));<br>
-<br>
- if (tryToVectorizeList(Seq, BO, HasMultipleUsers)) {<br>
- DEBUG(dbgs() << "SLP: Vectorized a gather list of len " << Seq.size()<br>
- << " in " << BB->getName() << ".\n");<br>
- Changed = true;<br>
- }<br>
-<br>
- Seq.clear();<br>
- }<br>
- }<br>
-<br>
- return Changed;<br>
-}<br>
-<br>
-void SLPVectorizer::hoistGatherSequence(LoopInfo *LI, BasicBlock *BB,<br>
- BoUpSLP &R) {<br>
- // Check if this block is inside a loop.<br>
- Loop *L = LI->getLoopFor(BB);<br>
- if (!L)<br>
- return;<br>
-<br>
- // Check if it has a preheader.<br>
- BasicBlock *PreHeader = L->getLoopPreheader();<br>
- if (!PreHeader)<br>
- return;<br>
-<br>
- // Mark the insertion point for the block.<br>
- Instruction *Location = PreHeader->getTerminator();<br>
-<br>
- BoUpSLP::InstrList &Gathers = R.getGatherSeqInstructions();<br>
- for (BoUpSLP::InstrList::iterator it = Gathers.begin(), e = Gathers.end();<br>
- it != e; ++it) {<br>
- InsertElementInst *Insert = dyn_cast_or_null<InsertElementInst>(*it);<br>
-<br>
- // The InsertElement sequence can be simplified into a constant.<br>
- // Also Ignore NULL pointers because they are only here to separate<br>
- // sequences.<br>
- if (!Insert)<br>
- continue;<br>
-<br>
- // If the vector or the element that we insert into it are<br>
- // instructions that are defined in this basic block then we can't<br>
- // hoist this instruction.<br>
- Instruction *CurrVec = dyn_cast<Instruction>(Insert->getOperand(0));<br>
- Instruction *NewElem = dyn_cast<Instruction>(Insert->getOperand(1));<br>
- if (CurrVec && L->contains(CurrVec))<br>
- continue;<br>
- if (NewElem && L->contains(NewElem))<br>
- continue;<br>
-<br>
- // We can hoist this instruction. Move it to the pre-header.<br>
- Insert->moveBefore(Location);<br>
- }<br>
-}<br>
-<br>
} // end anonymous namespace<br>
<br>
char SLPVectorizer::ID = 0;<br>
<br>
Removed: llvm/trunk/lib/Transforms/Vectorize/VecUtils.cpp<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/VecUtils.cpp?rev=184646&view=auto" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/VecUtils.cpp?rev=184646&view=auto</a><br>
==============================================================================<br>
--- llvm/trunk/lib/Transforms/Vectorize/VecUtils.cpp (original)<br>
+++ llvm/trunk/lib/Transforms/Vectorize/VecUtils.cpp (removed)<br>
@@ -1,1031 +0,0 @@<br>
-//===- VecUtils.cpp --- Vectorization Utilities ---------------------------===//<br>
-//<br>
-// The LLVM Compiler Infrastructure<br>
-//<br>
-// This file is distributed under the University of Illinois Open Source<br>
-// License. See LICENSE.TXT for details.<br>
-//<br>
-//===----------------------------------------------------------------------===//<br>
-#define DEBUG_TYPE "SLP"<br>
-<br>
-#include "VecUtils.h"<br>
-#include "llvm/ADT/DenseMap.h"<br>
-#include "llvm/ADT/SmallPtrSet.h"<br>
-#include "llvm/ADT/SmallSet.h"<br>
-#include "llvm/ADT/SmallVector.h"<br>
-#include "llvm/Analysis/AliasAnalysis.h"<br>
-#include "llvm/Analysis/ScalarEvolution.h"<br>
-#include "llvm/Analysis/ScalarEvolutionExpressions.h"<br>
-#include "llvm/Analysis/TargetTransformInfo.h"<br>
-#include "llvm/Analysis/Verifier.h"<br>
-#include "llvm/Analysis/LoopInfo.h"<br>
-#include "llvm/IR/Constants.h"<br>
-#include "llvm/IR/DataLayout.h"<br>
-#include "llvm/IR/Function.h"<br>
-#include "llvm/IR/Instructions.h"<br>
-#include "llvm/IR/Module.h"<br>
-#include "llvm/IR/Type.h"<br>
-#include "llvm/IR/Value.h"<br>
-#include "llvm/Pass.h"<br>
-#include "llvm/Support/CommandLine.h"<br>
-#include "llvm/Support/Debug.h"<br>
-#include "llvm/Support/raw_ostream.h"<br>
-#include "llvm/Target/TargetLibraryInfo.h"<br>
-#include "llvm/Transforms/Scalar.h"<br>
-#include "llvm/Transforms/Utils/Local.h"<br>
-#include <algorithm><br>
-#include <map><br>
-<br>
-using namespace llvm;<br>
-<br>
-static const unsigned MinVecRegSize = 128;<br>
-<br>
-static const unsigned RecursionMaxDepth = 6;<br>
-<br>
-namespace llvm {<br>
-<br>
-BoUpSLP::BoUpSLP(BasicBlock *Bb, ScalarEvolution *S, DataLayout *Dl,<br>
- TargetTransformInfo *Tti, AliasAnalysis *Aa, Loop *Lp)<br>
- : Builder(S->getContext()), BB(Bb), SE(S), DL(Dl), TTI(Tti), AA(Aa), L(Lp) {<br>
- numberInstructions();<br>
-}<br>
-<br>
-void BoUpSLP::numberInstructions() {<br>
- int Loc = 0;<br>
- InstrIdx.clear();<br>
- InstrVec.clear();<br>
- // Number the instructions in the block.<br>
- for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {<br>
- InstrIdx[it] = Loc++;<br>
- InstrVec.push_back(it);<br>
- assert(InstrVec[InstrIdx[it]] == it && "Invalid allocation");<br>
- }<br>
-}<br>
-<br>
-Value *BoUpSLP::getPointerOperand(Value *I) {<br>
- if (LoadInst *LI = dyn_cast<LoadInst>(I))<br>
- return LI->getPointerOperand();<br>
- if (StoreInst *SI = dyn_cast<StoreInst>(I))<br>
- return SI->getPointerOperand();<br>
- return 0;<br>
-}<br>
-<br>
-unsigned BoUpSLP::getAddressSpaceOperand(Value *I) {<br>
- if (LoadInst *L = dyn_cast<LoadInst>(I))<br>
- return L->getPointerAddressSpace();<br>
- if (StoreInst *S = dyn_cast<StoreInst>(I))<br>
- return S->getPointerAddressSpace();<br>
- return -1;<br>
-}<br>
-<br>
-bool BoUpSLP::isConsecutiveAccess(Value *A, Value *B) {<br>
- Value *PtrA = getPointerOperand(A);<br>
- Value *PtrB = getPointerOperand(B);<br>
- unsigned ASA = getAddressSpaceOperand(A);<br>
- unsigned ASB = getAddressSpaceOperand(B);<br>
-<br>
- // Check that the address spaces match and that the pointers are valid.<br>
- if (!PtrA || !PtrB || (ASA != ASB))<br>
- return false;<br>
-<br>
- // Check that A and B are of the same type.<br>
- if (PtrA->getType() != PtrB->getType())<br>
- return false;<br>
-<br>
- // Calculate the distance.<br>
- const SCEV *PtrSCEVA = SE->getSCEV(PtrA);<br>
- const SCEV *PtrSCEVB = SE->getSCEV(PtrB);<br>
- const SCEV *OffsetSCEV = SE->getMinusSCEV(PtrSCEVA, PtrSCEVB);<br>
- const SCEVConstant *ConstOffSCEV = dyn_cast<SCEVConstant>(OffsetSCEV);<br>
-<br>
- // Non constant distance.<br>
- if (!ConstOffSCEV)<br>
- return false;<br>
-<br>
- int64_t Offset = ConstOffSCEV->getValue()->getSExtValue();<br>
- Type *Ty = cast<PointerType>(PtrA->getType())->getElementType();<br>
- // The Instructions are connsecutive if the size of the first load/store is<br>
- // the same as the offset.<br>
- int64_t Sz = DL->getTypeStoreSize(Ty);<br>
- return ((-Offset) == Sz);<br>
-}<br>
-<br>
-bool BoUpSLP::vectorizeStoreChain(ArrayRef<Value *> Chain, int CostThreshold) {<br>
- unsigned ChainLen = Chain.size();<br>
- DEBUG(dbgs() << "SLP: Analyzing a store chain of length " << ChainLen<br>
- << "\n");<br>
- Type *StoreTy = cast<StoreInst>(Chain[0])->getValueOperand()->getType();<br>
- unsigned Sz = DL->getTypeSizeInBits(StoreTy);<br>
- unsigned VF = MinVecRegSize / Sz;<br>
-<br>
- if (!isPowerOf2_32(Sz) || VF < 2)<br>
- return false;<br>
-<br>
- bool Changed = false;<br>
- // Look for profitable vectorizable trees at all offsets, starting at zero.<br>
- for (unsigned i = 0, e = ChainLen; i < e; ++i) {<br>
- if (i + VF > e)<br>
- break;<br>
- DEBUG(dbgs() << "SLP: Analyzing " << VF << " stores at offset " << i<br>
- << "\n");<br>
- ArrayRef<Value *> Operands = Chain.slice(i, VF);<br>
-<br>
- int Cost = getTreeCost(Operands);<br>
- DEBUG(dbgs() << "SLP: Found cost=" << Cost << " for VF=" << VF << "\n");<br>
- if (Cost < CostThreshold) {<br>
- DEBUG(dbgs() << "SLP: Decided to vectorize cost=" << Cost << "\n");<br>
- Builder.SetInsertPoint(getInsertionPoint(getLastIndex(Operands, VF)));<br>
- vectorizeTree(Operands, VF);<br>
- i += VF - 1;<br>
- Changed = true;<br>
- }<br>
- }<br>
-<br>
- if (Changed || ChainLen > VF)<br>
- return Changed;<br>
-<br>
- // Handle short chains. This helps us catch types such as <3 x float> that<br>
- // are smaller than vector size.<br>
- int Cost = getTreeCost(Chain);<br>
- if (Cost < CostThreshold) {<br>
- DEBUG(dbgs() << "SLP: Found store chain cost = " << Cost<br>
- << " for size = " << ChainLen << "\n");<br>
- Builder.SetInsertPoint(getInsertionPoint(getLastIndex(Chain, ChainLen)));<br>
- vectorizeTree(Chain, ChainLen);<br>
- return true;<br>
- }<br>
-<br>
- return false;<br>
-}<br>
-<br>
-bool BoUpSLP::vectorizeStores(ArrayRef<StoreInst *> Stores, int costThreshold) {<br>
- SetVector<Value *> Heads, Tails;<br>
- SmallDenseMap<Value *, Value *> ConsecutiveChain;<br>
-<br>
- // We may run into multiple chains that merge into a single chain. We mark the<br>
- // stores that we vectorized so that we don't visit the same store twice.<br>
- ValueSet VectorizedStores;<br>
- bool Changed = false;<br>
-<br>
- // Do a quadratic search on all of the given stores and find<br>
- // all of the pairs of loads that follow each other.<br>
- for (unsigned i = 0, e = Stores.size(); i < e; ++i)<br>
- for (unsigned j = 0; j < e; ++j) {<br>
- if (i == j)<br>
- continue;<br>
-<br>
- if (isConsecutiveAccess(Stores[i], Stores[j])) {<br>
- Tails.insert(Stores[j]);<br>
- Heads.insert(Stores[i]);<br>
- ConsecutiveChain[Stores[i]] = Stores[j];<br>
- }<br>
- }<br>
-<br>
- // For stores that start but don't end a link in the chain:<br>
- for (SetVector<Value *>::iterator it = Heads.begin(), e = Heads.end();<br>
- it != e; ++it) {<br>
- if (Tails.count(*it))<br>
- continue;<br>
-<br>
- // We found a store instr that starts a chain. Now follow the chain and try<br>
- // to vectorize it.<br>
- ValueList Operands;<br>
- Value *I = *it;<br>
- // Collect the chain into a list.<br>
- while (Tails.count(I) || Heads.count(I)) {<br>
- if (VectorizedStores.count(I))<br>
- break;<br>
- Operands.push_back(I);<br>
- // Move to the next value in the chain.<br>
- I = ConsecutiveChain[I];<br>
- }<br>
-<br>
- bool Vectorized = vectorizeStoreChain(Operands, costThreshold);<br>
-<br>
- // Mark the vectorized stores so that we don't vectorize them again.<br>
- if (Vectorized)<br>
- VectorizedStores.insert(Operands.begin(), Operands.end());<br>
- Changed |= Vectorized;<br>
- }<br>
-<br>
- return Changed;<br>
-}<br>
-<br>
-int BoUpSLP::getScalarizationCost(ArrayRef<Value *> VL) {<br>
- // Find the type of the operands in VL.<br>
- Type *ScalarTy = VL[0]->getType();<br>
- if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))<br>
- ScalarTy = SI->getValueOperand()->getType();<br>
- VectorType *VecTy = VectorType::get(ScalarTy, VL.size());<br>
- // Find the cost of inserting/extracting values from the vector.<br>
- return getScalarizationCost(VecTy);<br>
-}<br>
-<br>
-int BoUpSLP::getScalarizationCost(Type *Ty) {<br>
- int Cost = 0;<br>
- for (unsigned i = 0, e = cast<VectorType>(Ty)->getNumElements(); i < e; ++i)<br>
- Cost += TTI->getVectorInstrCost(Instruction::InsertElement, Ty, i);<br>
- return Cost;<br>
-}<br>
-<br>
-AliasAnalysis::Location BoUpSLP::getLocation(Instruction *I) {<br>
- if (StoreInst *SI = dyn_cast<StoreInst>(I))<br>
- return AA->getLocation(SI);<br>
- if (LoadInst *LI = dyn_cast<LoadInst>(I))<br>
- return AA->getLocation(LI);<br>
- return AliasAnalysis::Location();<br>
-}<br>
-<br>
-Value *BoUpSLP::isUnsafeToSink(Instruction *Src, Instruction *Dst) {<br>
- assert(Src->getParent() == Dst->getParent() && "Not the same BB");<br>
- BasicBlock::iterator I = Src, E = Dst;<br>
- /// Scan all of the instruction from SRC to DST and check if<br>
- /// the source may alias.<br>
- for (++I; I != E; ++I) {<br>
- // Ignore store instructions that are marked as 'ignore'.<br>
- if (MemBarrierIgnoreList.count(I))<br>
- continue;<br>
- if (Src->mayWriteToMemory()) /* Write */ {<br>
- if (!I->mayReadOrWriteMemory())<br>
- continue;<br>
- } else /* Read */ {<br>
- if (!I->mayWriteToMemory())<br>
- continue;<br>
- }<br>
- AliasAnalysis::Location A = getLocation(&*I);<br>
- AliasAnalysis::Location B = getLocation(Src);<br>
-<br>
- if (!A.Ptr || !B.Ptr || AA->alias(A, B))<br>
- return I;<br>
- }<br>
- return 0;<br>
-}<br>
-<br>
-Value *BoUpSLP::vectorizeArith(ArrayRef<Value *> Operands) {<br>
- int LastIdx = getLastIndex(Operands, Operands.size());<br>
- Instruction *Loc = getInsertionPoint(LastIdx);<br>
- Builder.SetInsertPoint(Loc);<br>
-<br>
- assert(getFirstUserIndex(Operands, Operands.size()) > LastIdx &&<br>
- "Vectorizing with in-tree users");<br>
-<br>
- Value *Vec = vectorizeTree(Operands, Operands.size());<br>
- // After vectorizing the operands we need to generate extractelement<br>
- // instructions and replace all of the uses of the scalar values with<br>
- // the values that we extracted from the vectorized tree.<br>
- for (unsigned i = 0, e = Operands.size(); i != e; ++i) {<br>
- Value *S = Builder.CreateExtractElement(Vec, Builder.getInt32(i));<br>
- Operands[i]->replaceAllUsesWith(S);<br>
- }<br>
-<br>
- return Vec;<br>
-}<br>
-<br>
-int BoUpSLP::getTreeCost(ArrayRef<Value *> VL) {<br>
- // Get rid of the list of stores that were removed, and from the<br>
- // lists of instructions with multiple users.<br>
- MemBarrierIgnoreList.clear();<br>
- LaneMap.clear();<br>
- MultiUserVals.clear();<br>
- MustScalarize.clear();<br>
- MustExtract.clear();<br>
-<br>
- // Find the location of the last root.<br>
- int LastRootIndex = getLastIndex(VL, VL.size());<br>
- int FirstUserIndex = getFirstUserIndex(VL, VL.size());<br>
-<br>
- // Don't vectorize if there are users of the tree roots inside the tree<br>
- // itself.<br>
- if (LastRootIndex > FirstUserIndex)<br>
- return max_cost;<br>
-<br>
- // Scan the tree and find which value is used by which lane, and which values<br>
- // must be scalarized.<br>
- getTreeUses_rec(VL, 0);<br>
-<br>
- // Check that instructions with multiple users can be vectorized. Mark unsafe<br>
- // instructions.<br>
- for (SetVector<Value *>::iterator it = MultiUserVals.begin(),<br>
- e = MultiUserVals.end();<br>
- it != e; ++it) {<br>
- // Check that all of the users of this instr are within the tree<br>
- // and that they are all from the same lane.<br>
- int Lane = -1;<br>
- for (Value::use_iterator I = (*it)->use_begin(), E = (*it)->use_end();<br>
- I != E; ++I) {<br>
- if (LaneMap.find(*I) == LaneMap.end()) {<br>
- DEBUG(dbgs() << "SLP: Instr " << **it << " has multiple users.\n");<br>
-<br>
- // We don't have an ordering problem if the user is not in this basic<br>
- // block.<br>
- Instruction *Inst = cast<Instruction>(*I);<br>
- if (Inst->getParent() != BB) {<br>
- MustExtract.insert(*it);<br>
- continue;<br>
- }<br>
-<br>
- // We don't have an ordering problem if the user is after the last root.<br>
- int Idx = InstrIdx[Inst];<br>
- if (Idx < LastRootIndex) {<br>
- MustScalarize.insert(*it);<br>
- DEBUG(dbgs() << "SLP: Adding to MustScalarize "<br>
- "because of an unsafe out of tree usage.\n");<br>
- break;<br>
- }<br>
-<br>
- DEBUG(dbgs() << "SLP: Adding to MustExtract "<br>
- "because of a safe out of tree usage.\n");<br>
- MustExtract.insert(*it);<br>
- continue;<br>
- }<br>
- if (Lane == -1)<br>
- Lane = LaneMap[*I];<br>
- if (Lane != LaneMap[*I]) {<br>
- MustScalarize.insert(*it);<br>
- DEBUG(dbgs() << "SLP: Adding " << **it<br>
- << " to MustScalarize because multiple lane use it: "<br>
- << Lane << " and " << LaneMap[*I] << ".\n");<br>
- break;<br>
- }<br>
- }<br>
- }<br>
-<br>
- // Now calculate the cost of vectorizing the tree.<br>
- return getTreeCost_rec(VL, 0);<br>
-}<br>
-<br>
-static bool CanReuseExtract(ArrayRef<Value *> VL, unsigned VF,<br>
- VectorType *VecTy) {<br>
- // Check if all of the extracts come from the same vector and from the<br>
- // correct offset.<br>
- Value *VL0 = VL[0];<br>
- ExtractElementInst *E0 = cast<ExtractElementInst>(VL0);<br>
- Value *Vec = E0->getOperand(0);<br>
-<br>
- // We have to extract from the same vector type.<br>
- if (Vec->getType() != VecTy)<br>
- return false;<br>
-<br>
- // Check that all of the indices extract from the correct offset.<br>
- ConstantInt *CI = dyn_cast<ConstantInt>(E0->getOperand(1));<br>
- if (!CI || CI->getZExtValue())<br>
- return false;<br>
-<br>
- for (unsigned i = 1, e = VF; i < e; ++i) {<br>
- ExtractElementInst *E = cast<ExtractElementInst>(VL[i]);<br>
- ConstantInt *CI = dyn_cast<ConstantInt>(E->getOperand(1));<br>
-<br>
- if (!CI || CI->getZExtValue() != i || E->getOperand(0) != Vec)<br>
- return false;<br>
- }<br>
-<br>
- return true;<br>
-}<br>
-<br>
-void BoUpSLP::getTreeUses_rec(ArrayRef<Value *> VL, unsigned Depth) {<br>
- if (Depth == RecursionMaxDepth)<br>
- return;<br>
-<br>
- // Don't handle vectors.<br>
- if (VL[0]->getType()->isVectorTy())<br>
- return;<br>
-<br>
- if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))<br>
- if (SI->getValueOperand()->getType()->isVectorTy())<br>
- return;<br>
-<br>
- // Check if all of the operands are constants.<br>
- bool AllConst = true;<br>
- bool AllSameScalar = true;<br>
- for (unsigned i = 0, e = VL.size(); i < e; ++i) {<br>
- AllConst &= isa<Constant>(VL[i]);<br>
- AllSameScalar &= (VL[0] == VL[i]);<br>
- Instruction *I = dyn_cast<Instruction>(VL[i]);<br>
- // If one of the instructions is out of this BB, we need to scalarize all.<br>
- if (I && I->getParent() != BB)<br>
- return;<br>
- }<br>
-<br>
- // If all of the operands are identical or constant we have a simple solution.<br>
- if (AllConst || AllSameScalar)<br>
- return;<br>
-<br>
- // Scalarize unknown structures.<br>
- Instruction *VL0 = dyn_cast<Instruction>(VL[0]);<br>
- if (!VL0)<br>
- return;<br>
-<br>
- unsigned Opcode = VL0->getOpcode();<br>
- for (unsigned i = 0, e = VL.size(); i < e; ++i) {<br>
- Instruction *I = dyn_cast<Instruction>(VL[i]);<br>
- // If not all of the instructions are identical then we have to scalarize.<br>
- if (!I || Opcode != I->getOpcode())<br>
- return;<br>
- }<br>
-<br>
- for (int i = 0, e = VL.size(); i < e; ++i) {<br>
- // Check that the instruction is only used within<br>
- // one lane.<br>
- if (LaneMap.count(VL[i]) && LaneMap[VL[i]] != i)<br>
- return;<br>
- // Make this instruction as 'seen' and remember the lane.<br>
- LaneMap[VL[i]] = i;<br>
- }<br>
-<br>
- // Mark instructions with multiple users.<br>
- for (unsigned i = 0, e = VL.size(); i < e; ++i) {<br>
- Instruction *I = dyn_cast<Instruction>(VL[i]);<br>
- // Remember to check if all of the users of this instr are vectorized<br>
- // within our tree. At depth zero we have no local users, only external<br>
- // users that we don't care about.<br>
- if (Depth && I && I->getNumUses() > 1) {<br>
- DEBUG(dbgs() << "SLP: Adding to MultiUserVals "<br>
- "because it has multiple users:" << *I << " \n");<br>
- MultiUserVals.insert(I);<br>
- }<br>
- }<br>
-<br>
- switch (Opcode) {<br>
- case Instruction::ExtractElement: {<br>
- VectorType *VecTy = VectorType::get(VL[0]->getType(), VL.size());<br>
- // No need to follow ExtractElements that are going to be optimized away.<br>
- if (CanReuseExtract(VL, VL.size(), VecTy))<br>
- return;<br>
- // Fall through.<br>
- }<br>
- case Instruction::ZExt:<br>
- case Instruction::SExt:<br>
- case Instruction::FPToUI:<br>
- case Instruction::FPToSI:<br>
- case Instruction::FPExt:<br>
- case Instruction::PtrToInt:<br>
- case Instruction::IntToPtr:<br>
- case Instruction::SIToFP:<br>
- case Instruction::UIToFP:<br>
- case Instruction::Trunc:<br>
- case Instruction::FPTrunc:<br>
- case Instruction::BitCast:<br>
- case Instruction::Select:<br>
- case Instruction::ICmp:<br>
- case Instruction::FCmp:<br>
- case Instruction::Add:<br>
- case Instruction::FAdd:<br>
- case Instruction::Sub:<br>
- case Instruction::FSub:<br>
- case Instruction::Mul:<br>
- case Instruction::FMul:<br>
- case Instruction::UDiv:<br>
- case Instruction::SDiv:<br>
- case Instruction::FDiv:<br>
- case Instruction::URem:<br>
- case Instruction::SRem:<br>
- case Instruction::FRem:<br>
- case Instruction::Shl:<br>
- case Instruction::LShr:<br>
- case Instruction::AShr:<br>
- case Instruction::And:<br>
- case Instruction::Or:<br>
- case Instruction::Xor: {<br>
- for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {<br>
- ValueList Operands;<br>
- // Prepare the operand vector.<br>
- for (unsigned j = 0; j < VL.size(); ++j)<br>
- Operands.push_back(cast<Instruction>(VL[j])->getOperand(i));<br>
-<br>
- getTreeUses_rec(Operands, Depth + 1);<br>
- }<br>
- return;<br>
- }<br>
- case Instruction::Store: {<br>
- ValueList Operands;<br>
- for (unsigned j = 0; j < VL.size(); ++j)<br>
- Operands.push_back(cast<Instruction>(VL[j])->getOperand(0));<br>
- getTreeUses_rec(Operands, Depth + 1);<br>
- return;<br>
- }<br>
- default:<br>
- return;<br>
- }<br>
-}<br>
-<br>
-int BoUpSLP::getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth) {<br>
- Type *ScalarTy = VL[0]->getType();<br>
-<br>
- if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))<br>
- ScalarTy = SI->getValueOperand()->getType();<br>
-<br>
- /// Don't mess with vectors.<br>
- if (ScalarTy->isVectorTy())<br>
- return max_cost;<br>
-<br>
- VectorType *VecTy = VectorType::get(ScalarTy, VL.size());<br>
-<br>
- if (Depth == RecursionMaxDepth)<br>
- return getScalarizationCost(VecTy);<br>
-<br>
- // Check if all of the operands are constants.<br>
- bool AllConst = true;<br>
- bool AllSameScalar = true;<br>
- bool MustScalarizeFlag = false;<br>
- for (unsigned i = 0, e = VL.size(); i < e; ++i) {<br>
- AllConst &= isa<Constant>(VL[i]);<br>
- AllSameScalar &= (VL[0] == VL[i]);<br>
- // Must have a single use.<br>
- Instruction *I = dyn_cast<Instruction>(VL[i]);<br>
- MustScalarizeFlag |= MustScalarize.count(VL[i]);<br>
- // This instruction is outside the basic block.<br>
- if (I && I->getParent() != BB)<br>
- return getScalarizationCost(VecTy);<br>
- }<br>
-<br>
- // Is this a simple vector constant.<br>
- if (AllConst)<br>
- return 0;<br>
-<br>
- // If all of the operands are identical we can broadcast them.<br>
- Instruction *VL0 = dyn_cast<Instruction>(VL[0]);<br>
- if (AllSameScalar) {<br>
- // If we are in a loop, and this is not an instruction (e.g. constant or<br>
- // argument) or the instruction is defined outside the loop then assume<br>
- // that the cost is zero.<br>
- if (L && (!VL0 || !L->contains(VL0)))<br>
- return 0;<br>
-<br>
- // We need to broadcast the scalar.<br>
- return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy, 0);<br>
- }<br>
-<br>
- // If this is not a constant, or a scalar from outside the loop then we<br>
- // need to scalarize it.<br>
- if (MustScalarizeFlag)<br>
- return getScalarizationCost(VecTy);<br>
-<br>
- if (!VL0)<br>
- return getScalarizationCost(VecTy);<br>
- assert(VL0->getParent() == BB && "Wrong BB");<br>
-<br>
- unsigned Opcode = VL0->getOpcode();<br>
- for (unsigned i = 0, e = VL.size(); i < e; ++i) {<br>
- Instruction *I = dyn_cast<Instruction>(VL[i]);<br>
- // If not all of the instructions are identical then we have to scalarize.<br>
- if (!I || Opcode != I->getOpcode())<br>
- return getScalarizationCost(VecTy);<br>
- }<br>
-<br>
- // Check if it is safe to sink the loads or the stores.<br>
- if (Opcode == Instruction::Load || Opcode == Instruction::Store) {<br>
- int MaxIdx = getLastIndex(VL, VL.size());<br>
- Instruction *Last = InstrVec[MaxIdx];<br>
-<br>
- for (unsigned i = 0, e = VL.size(); i < e; ++i) {<br>
- if (VL[i] == Last)<br>
- continue;<br>
- Value *Barrier = isUnsafeToSink(cast<Instruction>(VL[i]), Last);<br>
- if (Barrier) {<br>
- DEBUG(dbgs() << "SLP: Can't sink " << *VL[i] << "\n down to " << *Last<br>
- << "\n because of " << *Barrier << "\n");<br>
- return max_cost;<br>
- }<br>
- }<br>
- }<br>
-<br>
- // Calculate the extract cost.<br>
- unsigned ExternalUserExtractCost = 0;<br>
- for (unsigned i = 0, e = VL.size(); i < e; ++i)<br>
- if (MustExtract.count(VL[i]))<br>
- ExternalUserExtractCost +=<br>
- TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy, i);<br>
-<br>
- switch (Opcode) {<br>
- case Instruction::ExtractElement: {<br>
- if (CanReuseExtract(VL, VL.size(), VecTy))<br>
- return 0;<br>
- return getScalarizationCost(VecTy);<br>
- }<br>
- case Instruction::ZExt:<br>
- case Instruction::SExt:<br>
- case Instruction::FPToUI:<br>
- case Instruction::FPToSI:<br>
- case Instruction::FPExt:<br>
- case Instruction::PtrToInt:<br>
- case Instruction::IntToPtr:<br>
- case Instruction::SIToFP:<br>
- case Instruction::UIToFP:<br>
- case Instruction::Trunc:<br>
- case Instruction::FPTrunc:<br>
- case Instruction::BitCast: {<br>
- int Cost = ExternalUserExtractCost;<br>
- ValueList Operands;<br>
- Type *SrcTy = VL0->getOperand(0)->getType();<br>
- // Prepare the operand vector.<br>
- for (unsigned j = 0; j < VL.size(); ++j) {<br>
- Operands.push_back(cast<Instruction>(VL[j])->getOperand(0));<br>
- // Check that the casted type is the same for all users.<br>
- if (cast<Instruction>(VL[j])->getOperand(0)->getType() != SrcTy)<br>
- return getScalarizationCost(VecTy);<br>
- }<br>
-<br>
- Cost += getTreeCost_rec(Operands, Depth + 1);<br>
- if (Cost >= max_cost)<br>
- return max_cost;<br>
-<br>
- // Calculate the cost of this instruction.<br>
- int ScalarCost = VL.size() * TTI->getCastInstrCost(VL0->getOpcode(),<br>
- VL0->getType(), SrcTy);<br>
-<br>
- VectorType *SrcVecTy = VectorType::get(SrcTy, VL.size());<br>
- int VecCost = TTI->getCastInstrCost(VL0->getOpcode(), VecTy, SrcVecTy);<br>
- Cost += (VecCost - ScalarCost);<br>
- return Cost;<br>
- }<br>
- case Instruction::FCmp:<br>
- case Instruction::ICmp: {<br>
- // Check that all of the compares have the same predicate.<br>
- CmpInst::Predicate P0 = dyn_cast<CmpInst>(VL0)->getPredicate();<br>
- for (unsigned i = 1, e = VL.size(); i < e; ++i) {<br>
- CmpInst *Cmp = cast<CmpInst>(VL[i]);<br>
- if (Cmp->getPredicate() != P0)<br>
- return getScalarizationCost(VecTy);<br>
- }<br>
- // Fall through.<br>
- }<br>
- case Instruction::Select:<br>
- case Instruction::Add:<br>
- case Instruction::FAdd:<br>
- case Instruction::Sub:<br>
- case Instruction::FSub:<br>
- case Instruction::Mul:<br>
- case Instruction::FMul:<br>
- case Instruction::UDiv:<br>
- case Instruction::SDiv:<br>
- case Instruction::FDiv:<br>
- case Instruction::URem:<br>
- case Instruction::SRem:<br>
- case Instruction::FRem:<br>
- case Instruction::Shl:<br>
- case Instruction::LShr:<br>
- case Instruction::AShr:<br>
- case Instruction::And:<br>
- case Instruction::Or:<br>
- case Instruction::Xor: {<br>
- int Cost = ExternalUserExtractCost;<br>
- // Calculate the cost of all of the operands.<br>
- for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {<br>
- ValueList Operands;<br>
- // Prepare the operand vector.<br>
- for (unsigned j = 0; j < VL.size(); ++j)<br>
- Operands.push_back(cast<Instruction>(VL[j])->getOperand(i));<br>
-<br>
- Cost += getTreeCost_rec(Operands, Depth + 1);<br>
- if (Cost >= max_cost)<br>
- return max_cost;<br>
- }<br>
-<br>
- // Calculate the cost of this instruction.<br>
- int ScalarCost = 0;<br>
- int VecCost = 0;<br>
- if (Opcode == Instruction::FCmp || Opcode == Instruction::ICmp ||<br>
- Opcode == Instruction::Select) {<br>
- VectorType *MaskTy = VectorType::get(Builder.getInt1Ty(), VL.size());<br>
- ScalarCost =<br>
- VecTy->getNumElements() *<br>
- TTI->getCmpSelInstrCost(Opcode, ScalarTy, Builder.getInt1Ty());<br>
- VecCost = TTI->getCmpSelInstrCost(Opcode, VecTy, MaskTy);<br>
- } else {<br>
- ScalarCost = VecTy->getNumElements() *<br>
- TTI->getArithmeticInstrCost(Opcode, ScalarTy);<br>
- VecCost = TTI->getArithmeticInstrCost(Opcode, VecTy);<br>
- }<br>
- Cost += (VecCost - ScalarCost);<br>
- return Cost;<br>
- }<br>
- case Instruction::Load: {<br>
- // If we are scalarize the loads, add the cost of forming the vector.<br>
- for (unsigned i = 0, e = VL.size() - 1; i < e; ++i)<br>
- if (!isConsecutiveAccess(VL[i], VL[i + 1]))<br>
- return getScalarizationCost(VecTy);<br>
-<br>
- // Cost of wide load - cost of scalar loads.<br>
- int ScalarLdCost = VecTy->getNumElements() *<br>
- TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0);<br>
- int VecLdCost = TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0);<br>
- return VecLdCost - ScalarLdCost + ExternalUserExtractCost;<br>
- }<br>
- case Instruction::Store: {<br>
- // We know that we can merge the stores. Calculate the cost.<br>
- int ScalarStCost = VecTy->getNumElements() *<br>
- TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1, 0);<br>
- int VecStCost = TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1, 0);<br>
- int StoreCost = VecStCost - ScalarStCost;<br>
-<br>
- ValueList Operands;<br>
- for (unsigned j = 0; j < VL.size(); ++j) {<br>
- Operands.push_back(cast<Instruction>(VL[j])->getOperand(0));<br>
- MemBarrierIgnoreList.insert(VL[j]);<br>
- }<br>
-<br>
- int TotalCost = StoreCost + getTreeCost_rec(Operands, Depth + 1);<br>
- return TotalCost + ExternalUserExtractCost;<br>
- }<br>
- default:<br>
- // Unable to vectorize unknown instructions.<br>
- return getScalarizationCost(VecTy);<br>
- }<br>
-}<br>
-<br>
-int BoUpSLP::getLastIndex(ArrayRef<Value *> VL, unsigned VF) {<br>
- int MaxIdx = InstrIdx[BB->getFirstNonPHI()];<br>
- for (unsigned i = 0; i < VF; ++i)<br>
- MaxIdx = std::max(MaxIdx, InstrIdx[VL[i]]);<br>
- return MaxIdx;<br>
-}<br>
-<br>
-int BoUpSLP::getFirstUserIndex(ArrayRef<Value *> VL, unsigned VF) {<br>
- // Find the first user of the values.<br>
- int FirstUser = InstrVec.size();<br>
- for (unsigned i = 0; i < VF; ++i) {<br>
- for (Value::use_iterator U = VL[i]->use_begin(), UE = VL[i]->use_end();<br>
- U != UE; ++U) {<br>
- Instruction *Instr = dyn_cast<Instruction>(*U);<br>
- if (!Instr || Instr->getParent() != BB)<br>
- continue;<br>
-<br>
- FirstUser = std::min(FirstUser, InstrIdx[Instr]);<br>
- }<br>
- }<br>
- return FirstUser;<br>
-}<br>
-<br>
-int BoUpSLP::getLastIndex(Instruction *I, Instruction *J) {<br>
- assert(I->getParent() == BB && "Invalid parent for instruction I");<br>
- assert(J->getParent() == BB && "Invalid parent for instruction J");<br>
- return std::max(InstrIdx[I], InstrIdx[J]);<br>
-}<br>
-<br>
-Instruction *BoUpSLP::getInsertionPoint(unsigned Index) {<br>
- return InstrVec[Index + 1];<br>
-}<br>
-<br>
-Value *BoUpSLP::Scalarize(ArrayRef<Value *> VL, VectorType *Ty) {<br>
- Value *Vec = UndefValue::get(Ty);<br>
- for (unsigned i = 0; i < Ty->getNumElements(); ++i) {<br>
- // Generate the 'InsertElement' instruction.<br>
- Vec = Builder.CreateInsertElement(Vec, VL[i], Builder.getInt32(i));<br>
- // Remember that this instruction is used as part of a 'gather' sequence.<br>
- // The caller of the bottom-up slp vectorizer can try to hoist the sequence<br>
- // if the users are outside of the basic block.<br>
- if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(Vec))<br>
- GatherInstructions.push_back(IEI);<br>
- }<br>
-<br>
- // Mark the end of the gather sequence.<br>
- GatherInstructions.push_back(0);<br>
-<br>
- for (unsigned i = 0; i < Ty->getNumElements(); ++i)<br>
- VectorizedValues[VL[i]] = Vec;<br>
-<br>
- return Vec;<br>
-}<br>
-<br>
-Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL, int VF) {<br>
- Value *V = vectorizeTree_rec(VL, VF);<br>
-<br>
- int LastInstrIdx = getLastIndex(VL, VL.size());<br>
- for (SetVector<Value *>::iterator it = MustExtract.begin(),<br>
- e = MustExtract.end();<br>
- it != e; ++it) {<br>
- Instruction *I = cast<Instruction>(*it);<br>
-<br>
- // This is a scalarized value, so we can use the original value.<br>
- // No need to extract from the vector.<br>
- if (!LaneMap.count(I))<br>
- continue;<br>
-<br>
- Value *Vec = VectorizedValues[I];<br>
- // We decided not to vectorize I because one of its users was not<br>
- // vectorizerd. This is okay.<br>
- if (!Vec)<br>
- continue;<br>
-<br>
- Value *Idx = Builder.getInt32(LaneMap[I]);<br>
- Value *Extract = Builder.CreateExtractElement(Vec, Idx);<br>
- bool Replaced = false;<br>
- for (Value::use_iterator U = I->use_begin(), UE = I->use_end(); U != UE;<br>
- ++U) {<br>
- Instruction *UI = cast<Instruction>(*U);<br>
- if (UI->getParent() != I->getParent() || InstrIdx[UI] > LastInstrIdx)<br>
- UI->replaceUsesOfWith(I, Extract);<br>
- Replaced = true;<br>
- }<br>
- assert(Replaced && "Must replace at least one outside user");<br>
- (void)Replaced;<br>
- }<br>
-<br>
- // We moved some instructions around. We have to number them again<br>
- // before we can do any analysis.<br>
- numberInstructions();<br>
- MustScalarize.clear();<br>
- MustExtract.clear();<br>
- VectorizedValues.clear();<br>
- return V;<br>
-}<br>
-<br>
-Value *BoUpSLP::vectorizeTree_rec(ArrayRef<Value *> VL, int VF) {<br>
- Type *ScalarTy = VL[0]->getType();<br>
- if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))<br>
- ScalarTy = SI->getValueOperand()->getType();<br>
- VectorType *VecTy = VectorType::get(ScalarTy, VF);<br>
-<br>
- // Check if all of the operands are constants or identical.<br>
- bool AllConst = true;<br>
- bool AllSameScalar = true;<br>
- for (unsigned i = 0, e = VF; i < e; ++i) {<br>
- AllConst &= isa<Constant>(VL[i]);<br>
- AllSameScalar &= (VL[0] == VL[i]);<br>
- // The instruction must be in the same BB, and it must be vectorizable.<br>
- Instruction *I = dyn_cast<Instruction>(VL[i]);<br>
- if (MustScalarize.count(VL[i]) || (I && I->getParent() != BB))<br>
- return Scalarize(VL, VecTy);<br>
- }<br>
-<br>
- // Check that this is a simple vector constant.<br>
- if (AllConst || AllSameScalar)<br>
- return Scalarize(VL, VecTy);<br>
-<br>
- // Scalarize unknown structures.<br>
- Instruction *VL0 = dyn_cast<Instruction>(VL[0]);<br>
- if (!VL0)<br>
- return Scalarize(VL, VecTy);<br>
-<br>
- if (VectorizedValues.count(VL0)) {<br>
- Value *Vec = VectorizedValues[VL0];<br>
- for (int i = 0; i < VF; ++i)<br>
- VectorizedValues[VL[i]] = Vec;<br>
- return Vec;<br>
- }<br>
-<br>
- unsigned Opcode = VL0->getOpcode();<br>
- for (unsigned i = 0, e = VF; i < e; ++i) {<br>
- Instruction *I = dyn_cast<Instruction>(VL[i]);<br>
- // If not all of the instructions are identical then we have to scalarize.<br>
- if (!I || Opcode != I->getOpcode())<br>
- return Scalarize(VL, VecTy);<br>
- }<br>
-<br>
- switch (Opcode) {<br>
- case Instruction::ExtractElement: {<br>
- if (CanReuseExtract(VL, VL.size(), VecTy))<br>
- return VL0->getOperand(0);<br>
- return Scalarize(VL, VecTy);<br>
- }<br>
- case Instruction::ZExt:<br>
- case Instruction::SExt:<br>
- case Instruction::FPToUI:<br>
- case Instruction::FPToSI:<br>
- case Instruction::FPExt:<br>
- case Instruction::PtrToInt:<br>
- case Instruction::IntToPtr:<br>
- case Instruction::SIToFP:<br>
- case Instruction::UIToFP:<br>
- case Instruction::Trunc:<br>
- case Instruction::FPTrunc:<br>
- case Instruction::BitCast: {<br>
- ValueList INVL;<br>
- for (int i = 0; i < VF; ++i)<br>
- INVL.push_back(cast<Instruction>(VL[i])->getOperand(0));<br>
- Value *InVec = vectorizeTree_rec(INVL, VF);<br>
- CastInst *CI = dyn_cast<CastInst>(VL0);<br>
- Value *V = Builder.CreateCast(CI->getOpcode(), InVec, VecTy);<br>
-<br>
- for (int i = 0; i < VF; ++i)<br>
- VectorizedValues[VL[i]] = V;<br>
-<br>
- return V;<br>
- }<br>
- case Instruction::FCmp:<br>
- case Instruction::ICmp: {<br>
- // Check that all of the compares have the same predicate.<br>
- CmpInst::Predicate P0 = dyn_cast<CmpInst>(VL0)->getPredicate();<br>
- for (unsigned i = 1, e = VF; i < e; ++i) {<br>
- CmpInst *Cmp = cast<CmpInst>(VL[i]);<br>
- if (Cmp->getPredicate() != P0)<br>
- return Scalarize(VL, VecTy);<br>
- }<br>
-<br>
- ValueList LHSV, RHSV;<br>
- for (int i = 0; i < VF; ++i) {<br>
- LHSV.push_back(cast<Instruction>(VL[i])->getOperand(0));<br>
- RHSV.push_back(cast<Instruction>(VL[i])->getOperand(1));<br>
- }<br>
-<br>
- Value *L = vectorizeTree_rec(LHSV, VF);<br>
- Value *R = vectorizeTree_rec(RHSV, VF);<br>
- Value *V;<br>
- if (VL0->getOpcode() == Instruction::FCmp)<br>
- V = Builder.CreateFCmp(P0, L, R);<br>
- else<br>
- V = Builder.CreateICmp(P0, L, R);<br>
-<br>
- for (int i = 0; i < VF; ++i)<br>
- VectorizedValues[VL[i]] = V;<br>
-<br>
- return V;<br>
- }<br>
- case Instruction::Select: {<br>
- ValueList TrueVec, FalseVec, CondVec;<br>
- for (int i = 0; i < VF; ++i) {<br>
- CondVec.push_back(cast<Instruction>(VL[i])->getOperand(0));<br>
- TrueVec.push_back(cast<Instruction>(VL[i])->getOperand(1));<br>
- FalseVec.push_back(cast<Instruction>(VL[i])->getOperand(2));<br>
- }<br>
-<br>
- Value *True = vectorizeTree_rec(TrueVec, VF);<br>
- Value *False = vectorizeTree_rec(FalseVec, VF);<br>
- Value *Cond = vectorizeTree_rec(CondVec, VF);<br>
- Value *V = Builder.CreateSelect(Cond, True, False);<br>
-<br>
- for (int i = 0; i < VF; ++i)<br>
- VectorizedValues[VL[i]] = V;<br>
-<br>
- return V;<br>
- }<br>
- case Instruction::Add:<br>
- case Instruction::FAdd:<br>
- case Instruction::Sub:<br>
- case Instruction::FSub:<br>
- case Instruction::Mul:<br>
- case Instruction::FMul:<br>
- case Instruction::UDiv:<br>
- case Instruction::SDiv:<br>
- case Instruction::FDiv:<br>
- case Instruction::URem:<br>
- case Instruction::SRem:<br>
- case Instruction::FRem:<br>
- case Instruction::Shl:<br>
- case Instruction::LShr:<br>
- case Instruction::AShr:<br>
- case Instruction::And:<br>
- case Instruction::Or:<br>
- case Instruction::Xor: {<br>
- ValueList LHSVL, RHSVL;<br>
- for (int i = 0; i < VF; ++i) {<br>
- LHSVL.push_back(cast<Instruction>(VL[i])->getOperand(0));<br>
- RHSVL.push_back(cast<Instruction>(VL[i])->getOperand(1));<br>
- }<br>
-<br>
- Value *LHS = vectorizeTree_rec(LHSVL, VF);<br>
- Value *RHS = vectorizeTree_rec(RHSVL, VF);<br>
- BinaryOperator *BinOp = cast<BinaryOperator>(VL0);<br>
- Value *V = Builder.CreateBinOp(BinOp->getOpcode(), LHS, RHS);<br>
-<br>
- for (int i = 0; i < VF; ++i)<br>
- VectorizedValues[VL[i]] = V;<br>
-<br>
- return V;<br>
- }<br>
- case Instruction::Load: {<br>
- LoadInst *LI = cast<LoadInst>(VL0);<br>
- unsigned Alignment = LI->getAlignment();<br>
-<br>
- // Check if all of the loads are consecutive.<br>
- for (unsigned i = 1, e = VF; i < e; ++i)<br>
- if (!isConsecutiveAccess(VL[i - 1], VL[i]))<br>
- return Scalarize(VL, VecTy);<br>
-<br>
- // Loads are inserted at the head of the tree because we don't want to sink<br>
- // them all the way down past store instructions.<br>
- Instruction *Loc = getInsertionPoint(getLastIndex(VL, VL.size()));<br>
- IRBuilder<> LoadBuilder(Loc);<br>
- Value *VecPtr = LoadBuilder.CreateBitCast(LI->getPointerOperand(),<br>
- VecTy->getPointerTo());<br>
- LI = LoadBuilder.CreateLoad(VecPtr);<br>
- LI->setAlignment(Alignment);<br>
-<br>
- for (int i = 0; i < VF; ++i)<br>
- VectorizedValues[VL[i]] = LI;<br>
-<br>
- return LI;<br>
- }<br>
- case Instruction::Store: {<br>
- StoreInst *SI = cast<StoreInst>(VL0);<br>
- unsigned Alignment = SI->getAlignment();<br>
-<br>
- ValueList ValueOp;<br>
- for (int i = 0; i < VF; ++i)<br>
- ValueOp.push_back(cast<StoreInst>(VL[i])->getValueOperand());<br>
-<br>
- Value *VecValue = vectorizeTree_rec(ValueOp, VF);<br>
- Value *VecPtr =<br>
- Builder.CreateBitCast(SI->getPointerOperand(), VecTy->getPointerTo());<br>
- Builder.CreateStore(VecValue, VecPtr)->setAlignment(Alignment);<br>
-<br>
- for (int i = 0; i < VF; ++i)<br>
- cast<Instruction>(VL[i])->eraseFromParent();<br>
- return 0;<br>
- }<br>
- default:<br>
- return Scalarize(VL, VecTy);<br>
- }<br>
-}<br>
-<br>
-} // end of namespace<br>
<br>
Removed: llvm/trunk/lib/Transforms/Vectorize/VecUtils.h<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/VecUtils.h?rev=184646&view=auto" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/VecUtils.h?rev=184646&view=auto</a><br>
==============================================================================<br>
--- llvm/trunk/lib/Transforms/Vectorize/VecUtils.h (original)<br>
+++ llvm/trunk/lib/Transforms/Vectorize/VecUtils.h (removed)<br>
@@ -1,194 +0,0 @@<br>
-//===- VecUtils.h - Vectorization Utilities -------------------------------===//<br>
-//<br>
-// The LLVM Compiler Infrastructure<br>
-//<br>
-// This file is distributed under the University of Illinois Open Source<br>
-// License. See LICENSE.TXT for details.<br>
-//<br>
-//===----------------------------------------------------------------------===//<br>
-//<br>
-// This family of classes and functions manipulate vectors and chains of<br>
-// vectors.<br>
-//<br>
-//===----------------------------------------------------------------------===//<br>
-<br>
-#ifndef LLVM_TRANSFORMS_VECTORIZE_VECUTILS_H<br>
-#define LLVM_TRANSFORMS_VECTORIZE_VECUTILS_H<br>
-<br>
-#include "llvm/ADT/DenseMap.h"<br>
-#include "llvm/ADT/SetVector.h"<br>
-#include "llvm/ADT/SmallPtrSet.h"<br>
-#include "llvm/ADT/SmallVector.h"<br>
-#include "llvm/Analysis/AliasAnalysis.h"<br>
-#include "llvm/IR/IRBuilder.h"<br>
-#include <vector><br>
-<br>
-namespace llvm {<br>
-<br>
-class BasicBlock;<br>
-class Instruction;<br>
-class Type;<br>
-class VectorType;<br>
-class StoreInst;<br>
-class Value;<br>
-class ScalarEvolution;<br>
-class DataLayout;<br>
-class TargetTransformInfo;<br>
-class AliasAnalysis;<br>
-class Loop;<br>
-<br>
-/// Bottom Up SLP vectorization utility class.<br>
-struct BoUpSLP {<br>
- typedef SmallVector<Value *, 8> ValueList;<br>
- typedef SmallVector<Instruction *, 16> InstrList;<br>
- typedef SmallPtrSet<Value *, 16> ValueSet;<br>
- typedef SmallVector<StoreInst *, 8> StoreList;<br>
- static const int max_cost = 1 << 20;<br>
-<br>
- // \brief C'tor.<br>
- BoUpSLP(BasicBlock *Bb, ScalarEvolution *Se, DataLayout *Dl,<br>
- TargetTransformInfo *Tti, AliasAnalysis *Aa, Loop *Lp);<br>
-<br>
- /// \brief Take the pointer operand from the Load/Store instruction.<br>
- /// \returns NULL if this is not a valid Load/Store instruction.<br>
- static Value *getPointerOperand(Value *I);<br>
-<br>
- /// \brief Take the address space operand from the Load/Store instruction.<br>
- /// \returns -1 if this is not a valid Load/Store instruction.<br>
- static unsigned getAddressSpaceOperand(Value *I);<br>
-<br>
- /// \returns true if the memory operations A and B are consecutive.<br>
- bool isConsecutiveAccess(Value *A, Value *B);<br>
-<br>
- /// \brief Vectorize the tree that starts with the elements in \p VL.<br>
- /// \returns the vectorized value.<br>
- Value *vectorizeTree(ArrayRef<Value *> VL, int VF);<br>
-<br>
- /// \returns the vectorization cost of the subtree that starts at \p VL.<br>
- /// A negative number means that this is profitable.<br>
- int getTreeCost(ArrayRef<Value *> VL);<br>
-<br>
- /// \returns the scalarization cost for this list of values. Assuming that<br>
- /// this subtree gets vectorized, we may need to extract the values from the<br>
- /// roots. This method calculates the cost of extracting the values.<br>
- int getScalarizationCost(ArrayRef<Value *> VL);<br>
-<br>
- /// \brief Attempts to order and vectorize a sequence of stores. This<br>
- /// function does a quadratic scan of the given stores.<br>
- /// \returns true if the basic block was modified.<br>
- bool vectorizeStores(ArrayRef<StoreInst *> Stores, int costThreshold);<br>
-<br>
- /// \brief Vectorize a group of scalars into a vector tree.<br>
- /// \returns the vectorized value.<br>
- Value *vectorizeArith(ArrayRef<Value *> Operands);<br>
-<br>
- /// \returns the list of new instructions that were added in order to collect<br>
- /// scalars into vectors. This list can be used to further optimize the gather<br>
- /// sequences.<br>
- InstrList &getGatherSeqInstructions() { return GatherInstructions; }<br>
-<br>
-private:<br>
- /// \brief This method contains the recursive part of getTreeCost.<br>
- int getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth);<br>
-<br>
- /// \brief This recursive method looks for vectorization hazards such as<br>
- /// values that are used by multiple users and checks that values are used<br>
- /// by only one vector lane. It updates the variables LaneMap, MultiUserVals.<br>
- void getTreeUses_rec(ArrayRef<Value *> VL, unsigned Depth);<br>
-<br>
- /// \brief This method contains the recursive part of vectorizeTree.<br>
- Value *vectorizeTree_rec(ArrayRef<Value *> VL, int VF);<br>
-<br>
- /// \brief Number all of the instructions in the block.<br>
- void numberInstructions();<br>
-<br>
- /// \brief Vectorize a sorted sequence of stores.<br>
- bool vectorizeStoreChain(ArrayRef<Value *> Chain, int CostThreshold);<br>
-<br>
- /// \returns the scalarization cost for this type. Scalarization in this<br>
- /// context means the creation of vectors from a group of scalars.<br>
- int getScalarizationCost(Type *Ty);<br>
-<br>
- /// \returns the AA location that is being access by the instruction.<br>
- AliasAnalysis::Location getLocation(Instruction *I);<br>
-<br>
- /// \brief Checks if it is possible to sink an instruction from<br>
- /// \p Src to \p Dst.<br>
- /// \returns the pointer to the barrier instruction if we can't sink.<br>
- Value *isUnsafeToSink(Instruction *Src, Instruction *Dst);<br>
-<br>
- /// \returns the index of the last instrucion in the BB from \p VL.<br>
- /// Only consider the first \p VF elements.<br>
- int getLastIndex(ArrayRef<Value *> VL, unsigned VF);<br>
-<br>
- /// \returns the index of the first User of \p VL.<br>
- /// Only consider the first \p VF elements.<br>
- int getFirstUserIndex(ArrayRef<Value *> VL, unsigned VF);<br>
-<br>
- /// \returns the instruction \p I or \p J that appears last in the BB .<br>
- int getLastIndex(Instruction *I, Instruction *J);<br>
-<br>
- /// \returns the insertion point for \p Index.<br>
- Instruction *getInsertionPoint(unsigned Index);<br>
-<br>
- /// \returns a vector from a collection of scalars in \p VL.<br>
- Value *Scalarize(ArrayRef<Value *> VL, VectorType *Ty);<br>
-<br>
-private:<br>
- /// Maps instructions to numbers and back.<br>
- SmallDenseMap<Value *, int> InstrIdx;<br>
- /// Maps integers to Instructions.<br>
- std::vector<Instruction *> InstrVec;<br>
-<br>
- // -- containers that are used during getTreeCost -- //<br>
-<br>
- /// Contains values that must be scalarized because they are used<br>
- /// by multiple lanes, or by users outside the tree.<br>
- /// NOTICE: The vectorization methods also use this set.<br>
- ValueSet MustScalarize;<br>
-<br>
- /// Contains values that have users outside of the vectorized graph.<br>
- /// We need to generate extract instructions for these values.<br>
- /// NOTICE: The vectorization methods also use this set.<br>
- SetVector<Value *> MustExtract;<br>
-<br>
- /// Contains a list of values that are used outside the current tree. This<br>
- /// set must be reset between runs.<br>
- SetVector<Value *> MultiUserVals;<br>
- /// Maps values in the tree to the vector lanes that uses them. This map must<br>
- /// be reset between runs of getCost.<br>
- std::map<Value *, int> LaneMap;<br>
- /// A list of instructions to ignore while sinking<br>
- /// memory instructions. This map must be reset between runs of getCost.<br>
- ValueSet MemBarrierIgnoreList;<br>
-<br>
- // -- Containers that are used during vectorizeTree -- //<br>
-<br>
- /// Maps between the first scalar to the vector. This map must be reset<br>
- /// between runs.<br>
- DenseMap<Value *, Value *> VectorizedValues;<br>
-<br>
- // -- Containers that are used after vectorization by the caller -- //<br>
-<br>
- /// A list of instructions that are used when gathering scalars into vectors.<br>
- /// In many cases these instructions can be hoisted outside of the BB.<br>
- /// Iterating over this list is faster than calling LICM.<br>
- /// Notice: We insert NULL ptrs to separate between the different gather<br>
- /// sequences.<br>
- InstrList GatherInstructions;<br>
-<br>
- /// Instruction builder to construct the vectorized tree.<br>
- IRBuilder<> Builder;<br>
-<br>
- // Analysis and block reference.<br>
- BasicBlock *BB;<br>
- ScalarEvolution *SE;<br>
- DataLayout *DL;<br>
- TargetTransformInfo *TTI;<br>
- AliasAnalysis *AA;<br>
- Loop *L;<br>
-};<br>
-<br>
-} // end of namespace<br>
-<br>
-#endif // LLVM_TRANSFORMS_VECTORIZE_VECUTILS_H<br>
<br>
Modified: llvm/trunk/test/Transforms/SLPVectorizer/X86/diamond.ll<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/diamond.ll?rev=184647&r1=184646&r2=184647&view=diff" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/diamond.ll?rev=184647&r1=184646&r2=184647&view=diff</a><br>
==============================================================================<br>
--- llvm/trunk/test/Transforms/SLPVectorizer/X86/diamond.ll (original)<br>
+++ llvm/trunk/test/Transforms/SLPVectorizer/X86/diamond.ll Sat Jun 22 16:34:10 2013<br>
@@ -50,9 +50,9 @@ entry:<br>
; }<br>
<br>
; CHECK: @extr_user<br>
+; CHECK: load i32*<br>
; CHECK: store <4 x i32><br>
-; CHECK-NEXT: extractelement <4 x i32><br>
-; CHECK: ret<br>
+; CHECK-NEXT: ret<br>
define i32 @extr_user(i32* noalias nocapture %B, i32* noalias nocapture %A, i32 %n, i32 %m) {<br>
entry:<br>
%0 = load i32* %A, align 4<br>
@@ -79,9 +79,9 @@ entry:<br>
<br>
; In this example we have an external user that is not the first element in the vector.<br>
; CHECK: @extr_user1<br>
+; CHECK: load i32*<br>
; CHECK: store <4 x i32><br>
-; CHECK-NEXT: extractelement <4 x i32><br>
-; CHECK: ret<br>
+; CHECK-NEXT: ret<br>
define i32 @extr_user1(i32* noalias nocapture %B, i32* noalias nocapture %A, i32 %n, i32 %m) {<br>
entry:<br>
%0 = load i32* %A, align 4<br>
<br>
Added: llvm/trunk/test/Transforms/SLPVectorizer/X86/multi_block.ll<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/multi_block.ll?rev=184647&view=auto" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/multi_block.ll?rev=184647&view=auto</a><br>
==============================================================================<br>
--- llvm/trunk/test/Transforms/SLPVectorizer/X86/multi_block.ll (added)<br>
+++ llvm/trunk/test/Transforms/SLPVectorizer/X86/multi_block.ll Sat Jun 22 16:34:10 2013<br>
@@ -0,0 +1,55 @@<br>
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s<br>
+<br>
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"<br>
+target triple = "x86_64-apple-macosx10.7.0"<br>
+<br>
+; int bar(double *A, int d) {<br>
+; double A0 = A[0];<br>
+; double A1 = A[1];<br>
+; float F0 = A0;<br>
+; float F1 = A1;<br>
+; if (d) foo(); <----- This splits the blocks<br>
+; F0+=4.0;<br>
+; F1+=5.0;<br>
+; A[8] = 9.0 + F0;<br>
+; A[9] = 5.0 + F1;<br>
+; }<br>
+<br>
+<br>
+;CHECK: @bar<br>
+;CHECK: load <2 x double><br>
+;CHECK: fptrunc <2 x double><br>
+;CHECK: call i32<br>
+;CHECK: fadd <2 x float><br>
+;CHECK: fpext <2 x float><br>
+;CHECK: store <2 x double><br>
+;CHECK: ret<br>
+define i32 @bar(double* nocapture %A, i32 %d) {<br>
+ %1 = load double* %A, align 8<br>
+ %2 = getelementptr inbounds double* %A, i64 1<br>
+ %3 = load double* %2, align 8<br>
+ %4 = fptrunc double %1 to float<br>
+ %5 = fptrunc double %3 to float<br>
+ %6 = icmp eq i32 %d, 0<br>
+ br i1 %6, label %9, label %7<br>
+<br>
+; <label>:7 ; preds = %0<br>
+ %8 = tail call i32 (...)* @foo()<br>
+ br label %9<br>
+<br>
+; <label>:9 ; preds = %0, %7<br>
+ %10 = fadd float %4, 4.000000e+00<br>
+ %11 = fadd float %5, 5.000000e+00<br>
+ %12 = fpext float %10 to double<br>
+ %13 = fadd double %12, 9.000000e+00<br>
+ %14 = getelementptr inbounds double* %A, i64 8<br>
+ store double %13, double* %14, align 8<br>
+ %15 = fpext float %11 to double<br>
+ %16 = fadd double %15, 5.000000e+00<br>
+ %17 = getelementptr inbounds double* %A, i64 9<br>
+ store double %16, double* %17, align 8<br>
+ ret i32 undef<br>
+}<br>
+<br>
+declare i32 @foo(...)<br>
+<br>
<br>
Modified: llvm/trunk/test/Transforms/SLPVectorizer/X86/multi_user.ll<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/multi_user.ll?rev=184647&r1=184646&r2=184647&view=diff" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/multi_user.ll?rev=184647&r1=184646&r2=184647&view=diff</a><br>
==============================================================================<br>
--- llvm/trunk/test/Transforms/SLPVectorizer/X86/multi_user.ll (original)<br>
+++ llvm/trunk/test/Transforms/SLPVectorizer/X86/multi_user.ll Sat Jun 22 16:34:10 2013<br>
@@ -12,8 +12,8 @@ target triple = "x86_64-apple-macosx10.7<br>
;}<br>
<br>
;CHECK: @foo<br>
-;CHECK: load <4 x i32><br>
;CHECK: insertelement <4 x i32><br>
+;CHECK: load <4 x i32><br>
;CHECK: add <4 x i32><br>
;CHECK: store <4 x i32><br>
;CHECK: ret<br>
<br>
<br>
_______________________________________________<br>
llvm-commits mailing list<br>
<a href="mailto:llvm-commits@cs.uiuc.edu">llvm-commits@cs.uiuc.edu</a><br>
<a href="http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits" target="_blank">http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits</a><br>
</blockquote></div><br><br clear="all"><div><br></div>-- <br><div>Alexey Samsonov, MSK</div>
</div>