On Thu, Oct 4, 2012 at 6:52 PM, Sean Silva <span dir="ltr"><<a href="mailto:silvas@purdue.edu" target="_blank" class="cremed">silvas@purdue.edu</a>></span> wrote:<br><div class="gmail_extra"><div class="gmail_quote">
<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">protip: pass --patience or --histogram to git's diff-generating<br>
commands (git diff, git log -p, etc.) to select alternative diff<br>
algorithms.</blockquote><div><br></div><div>I'm well aware of this, but that does nothing for the commit mailing list which is what my commit log was written for...</div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
I just tried it out locally for this patch and the diff is<br>
dramatically better (the diff is a single big block of + and a single<br>
big block of -). FWIW, I find that patience and histogram are usually<br>
basically the same (histogram is an extension of patience), but either<br>
of them is usually significantly better than the default.<br>
<br>
More info about patience diff, for the curious:<br>
<a href="http://bramcohen.livejournal.com/73318.html" target="_blank" class="cremed">http://bramcohen.livejournal.com/73318.html</a><br>
More info about histogram diff:<br>
<a href="http://download.eclipse.org/jgit/docs/jgit-2.0.0.201206130900-r/apidocs/org/eclipse/jgit/diff/HistogramDiff.html" target="_blank" class="cremed">http://download.eclipse.org/jgit/docs/jgit-2.0.0.201206130900-r/apidocs/org/eclipse/jgit/diff/HistogramDiff.html</a><br>
<span class="HOEnZb"><font color="#888888"><br>
-- Sean Silva<br>
</font></span><div class="HOEnZb"><div class="h5"><br>
On Thu, Oct 4, 2012 at 9:29 PM, Chandler Carruth <<a href="mailto:chandlerc@gmail.com" class="cremed">chandlerc@gmail.com</a>> wrote:<br>
> Author: chandlerc<br>
> Date: Thu Oct 4 20:29:06 2012<br>
> New Revision: 165284<br>
><br>
> URL: <a href="http://llvm.org/viewvc/llvm-project?rev=165284&view=rev" target="_blank" class="cremed">http://llvm.org/viewvc/llvm-project?rev=165284&view=rev</a><br>
> Log:<br>
> Lift the speculation visitor above all the helpers that are targeted at<br>
> the rewrite visitor to make the fact that the speculation is completely<br>
> independent a bit more clear.<br>
><br>
> I promise that this is just a cut/paste of the one visitor and adding<br>
> the annonymous namespace wrappings. The diff may look completely<br>
> preposterous, it does in git for some reason.<br>
><br>
> Modified:<br>
> llvm/trunk/lib/Transforms/Scalar/SROA.cpp<br>
><br>
> Modified: llvm/trunk/lib/Transforms/Scalar/SROA.cpp<br>
> URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/SROA.cpp?rev=165284&r1=165283&r2=165284&view=diff" target="_blank" class="cremed">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/SROA.cpp?rev=165284&r1=165283&r2=165284&view=diff</a><br>
> ==============================================================================<br>
> --- llvm/trunk/lib/Transforms/Scalar/SROA.cpp (original)<br>
> +++ llvm/trunk/lib/Transforms/Scalar/SROA.cpp Thu Oct 4 20:29:06 2012<br>
> @@ -1368,715 +1368,717 @@<br>
> INITIALIZE_PASS_END(SROA, "sroa", "Scalar Replacement Of Aggregates",<br>
> false, false)<br>
><br>
> -/// \brief Accumulate the constant offsets in a GEP into a single APInt offset.<br>
> -///<br>
> -/// If the provided GEP is all-constant, the total byte offset formed by the<br>
> -/// GEP is computed and Offset is set to it. If the GEP has any non-constant<br>
> -/// operands, the function returns false and the value of Offset is unmodified.<br>
> -static bool accumulateGEPOffsets(const TargetData &TD, GEPOperator &GEP,<br>
> - APInt &Offset) {<br>
> - APInt GEPOffset(Offset.getBitWidth(), 0);<br>
> - for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);<br>
> - GTI != GTE; ++GTI) {<br>
> - ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());<br>
> - if (!OpC)<br>
> - return false;<br>
> - if (OpC->isZero()) continue;<br>
> +namespace {<br>
> +/// \brief Visitor to speculate PHIs and Selects where possible.<br>
> +class PHIOrSelectSpeculator : public InstVisitor<PHIOrSelectSpeculator> {<br>
> + // Befriend the base class so it can delegate to private visit methods.<br>
> + friend class llvm::InstVisitor<PHIOrSelectSpeculator>;<br>
><br>
> - // Handle a struct index, which adds its field offset to the pointer.<br>
> - if (StructType *STy = dyn_cast<StructType>(*GTI)) {<br>
> - unsigned ElementIdx = OpC->getZExtValue();<br>
> - const StructLayout *SL = TD.getStructLayout(STy);<br>
> - GEPOffset += APInt(Offset.getBitWidth(),<br>
> - SL->getElementOffset(ElementIdx));<br>
> - continue;<br>
> - }<br>
> + const TargetData &TD;<br>
> + AllocaPartitioning &P;<br>
> + SROA &Pass;<br>
><br>
> - APInt TypeSize(Offset.getBitWidth(),<br>
> - TD.getTypeAllocSize(GTI.getIndexedType()));<br>
> - if (VectorType *VTy = dyn_cast<VectorType>(*GTI)) {<br>
> - assert((VTy->getScalarSizeInBits() % 8) == 0 &&<br>
> - "vector element size is not a multiple of 8, cannot GEP over it");<br>
> - TypeSize = VTy->getScalarSizeInBits() / 8;<br>
> - }<br>
> +public:<br>
> + PHIOrSelectSpeculator(const TargetData &TD, AllocaPartitioning &P, SROA &Pass)<br>
> + : TD(TD), P(P), Pass(Pass) {}<br>
><br>
> - GEPOffset += OpC->getValue().sextOrTrunc(Offset.getBitWidth()) * TypeSize;<br>
> + /// \brief Visit the users of an alloca partition and rewrite them.<br>
> + void visitUsers(AllocaPartitioning::const_iterator PI) {<br>
> + // Note that we need to use an index here as the underlying vector of uses<br>
> + // may be grown during speculation. However, we never need to re-visit the<br>
> + // new uses, and so we can use the initial size bound.<br>
> + for (unsigned Idx = 0, Size = P.use_size(PI); Idx != Size; ++Idx) {<br>
> + const AllocaPartitioning::PartitionUse &PU = P.getUse(PI, Idx);<br>
> + if (!PU.U)<br>
> + continue; // Skip dead use.<br>
> +<br>
> + visit(cast<Instruction>(PU.U->getUser()));<br>
> + }<br>
> }<br>
> - Offset = GEPOffset;<br>
> - return true;<br>
> -}<br>
><br>
> -/// \brief Build a GEP out of a base pointer and indices.<br>
> -///<br>
> -/// This will return the BasePtr if that is valid, or build a new GEP<br>
> -/// instruction using the IRBuilder if GEP-ing is needed.<br>
> -static Value *buildGEP(IRBuilder<> &IRB, Value *BasePtr,<br>
> - SmallVectorImpl<Value *> &Indices,<br>
> - const Twine &Prefix) {<br>
> - if (Indices.empty())<br>
> - return BasePtr;<br>
> +private:<br>
> + // By default, skip this instruction.<br>
> + void visitInstruction(Instruction &I) {}<br>
><br>
> - // A single zero index is a no-op, so check for this and avoid building a GEP<br>
> - // in that case.<br>
> - if (Indices.size() == 1 && cast<ConstantInt>(Indices.back())->isZero())<br>
> - return BasePtr;<br>
> + /// PHI instructions that use an alloca and are subsequently loaded can be<br>
> + /// rewritten to load both input pointers in the pred blocks and then PHI the<br>
> + /// results, allowing the load of the alloca to be promoted.<br>
> + /// From this:<br>
> + /// %P2 = phi [i32* %Alloca, i32* %Other]<br>
> + /// %V = load i32* %P2<br>
> + /// to:<br>
> + /// %V1 = load i32* %Alloca -> will be mem2reg'd<br>
> + /// ...<br>
> + /// %V2 = load i32* %Other<br>
> + /// ...<br>
> + /// %V = phi [i32 %V1, i32 %V2]<br>
> + ///<br>
> + /// We can do this to a select if its only uses are loads and if the operands<br>
> + /// to the select can be loaded unconditionally.<br>
> + ///<br>
> + /// FIXME: This should be hoisted into a generic utility, likely in<br>
> + /// Transforms/Util/Local.h<br>
> + bool isSafePHIToSpeculate(PHINode &PN, SmallVectorImpl<LoadInst *> &Loads) {<br>
> + // For now, we can only do this promotion if the load is in the same block<br>
> + // as the PHI, and if there are no stores between the phi and load.<br>
> + // TODO: Allow recursive phi users.<br>
> + // TODO: Allow stores.<br>
> + BasicBlock *BB = PN.getParent();<br>
> + unsigned MaxAlign = 0;<br>
> + for (Value::use_iterator UI = PN.use_begin(), UE = PN.use_end();<br>
> + UI != UE; ++UI) {<br>
> + LoadInst *LI = dyn_cast<LoadInst>(*UI);<br>
> + if (LI == 0 || !LI->isSimple()) return false;<br>
><br>
> - return IRB.CreateInBoundsGEP(BasePtr, Indices, Prefix + ".idx");<br>
> -}<br>
> + // For now we only allow loads in the same block as the PHI. This is<br>
> + // a common case that happens when instcombine merges two loads through<br>
> + // a PHI.<br>
> + if (LI->getParent() != BB) return false;<br>
><br>
> -/// \brief Get a natural GEP off of the BasePtr walking through Ty toward<br>
> -/// TargetTy without changing the offset of the pointer.<br>
> -///<br>
> -/// This routine assumes we've already established a properly offset GEP with<br>
> -/// Indices, and arrived at the Ty type. The goal is to continue to GEP with<br>
> -/// zero-indices down through type layers until we find one the same as<br>
> -/// TargetTy. If we can't find one with the same type, we at least try to use<br>
> -/// one with the same size. If none of that works, we just produce the GEP as<br>
> -/// indicated by Indices to have the correct offset.<br>
> -static Value *getNaturalGEPWithType(IRBuilder<> &IRB, const TargetData &TD,<br>
> - Value *BasePtr, Type *Ty, Type *TargetTy,<br>
> - SmallVectorImpl<Value *> &Indices,<br>
> - const Twine &Prefix) {<br>
> - if (Ty == TargetTy)<br>
> - return buildGEP(IRB, BasePtr, Indices, Prefix);<br>
> + // Ensure that there are no instructions between the PHI and the load that<br>
> + // could store.<br>
> + for (BasicBlock::iterator BBI = &PN; &*BBI != LI; ++BBI)<br>
> + if (BBI->mayWriteToMemory())<br>
> + return false;<br>
><br>
> - // See if we can descend into a struct and locate a field with the correct<br>
> - // type.<br>
> - unsigned NumLayers = 0;<br>
> - Type *ElementTy = Ty;<br>
> - do {<br>
> - if (ElementTy->isPointerTy())<br>
> - break;<br>
> - if (SequentialType *SeqTy = dyn_cast<SequentialType>(ElementTy)) {<br>
> - ElementTy = SeqTy->getElementType();<br>
> - Indices.push_back(IRB.getInt(APInt(TD.getPointerSizeInBits(), 0)));<br>
> - } else if (StructType *STy = dyn_cast<StructType>(ElementTy)) {<br>
> - ElementTy = *STy->element_begin();<br>
> - Indices.push_back(IRB.getInt32(0));<br>
> - } else {<br>
> - break;<br>
> + MaxAlign = std::max(MaxAlign, LI->getAlignment());<br>
> + Loads.push_back(LI);<br>
> }<br>
> - ++NumLayers;<br>
> - } while (ElementTy != TargetTy);<br>
> - if (ElementTy != TargetTy)<br>
> - Indices.erase(Indices.end() - NumLayers, Indices.end());<br>
><br>
> - return buildGEP(IRB, BasePtr, Indices, Prefix);<br>
> -}<br>
> + // We can only transform this if it is safe to push the loads into the<br>
> + // predecessor blocks. The only thing to watch out for is that we can't put<br>
> + // a possibly trapping load in the predecessor if it is a critical edge.<br>
> + for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num;<br>
> + ++Idx) {<br>
> + TerminatorInst *TI = PN.getIncomingBlock(Idx)->getTerminator();<br>
> + Value *InVal = PN.getIncomingValue(Idx);<br>
><br>
> -/// \brief Recursively compute indices for a natural GEP.<br>
> -///<br>
> -/// This is the recursive step for getNaturalGEPWithOffset that walks down the<br>
> -/// element types adding appropriate indices for the GEP.<br>
> -static Value *getNaturalGEPRecursively(IRBuilder<> &IRB, const TargetData &TD,<br>
> - Value *Ptr, Type *Ty, APInt &Offset,<br>
> - Type *TargetTy,<br>
> - SmallVectorImpl<Value *> &Indices,<br>
> - const Twine &Prefix) {<br>
> - if (Offset == 0)<br>
> - return getNaturalGEPWithType(IRB, TD, Ptr, Ty, TargetTy, Indices, Prefix);<br>
> + // If the value is produced by the terminator of the predecessor (an<br>
> + // invoke) or it has side-effects, there is no valid place to put a load<br>
> + // in the predecessor.<br>
> + if (TI == InVal || TI->mayHaveSideEffects())<br>
> + return false;<br>
><br>
> - // We can't recurse through pointer types.<br>
> - if (Ty->isPointerTy())<br>
> - return 0;<br>
> + // If the predecessor has a single successor, then the edge isn't<br>
> + // critical.<br>
> + if (TI->getNumSuccessors() == 1)<br>
> + continue;<br>
><br>
> - // We try to analyze GEPs over vectors here, but note that these GEPs are<br>
> - // extremely poorly defined currently. The long-term goal is to remove GEPing<br>
> - // over a vector from the IR completely.<br>
> - if (VectorType *VecTy = dyn_cast<VectorType>(Ty)) {<br>
> - unsigned ElementSizeInBits = VecTy->getScalarSizeInBits();<br>
> - if (ElementSizeInBits % 8)<br>
> - return 0; // GEPs over non-multiple of 8 size vector elements are invalid.<br>
> - APInt ElementSize(Offset.getBitWidth(), ElementSizeInBits / 8);<br>
> - APInt NumSkippedElements = Offset.udiv(ElementSize);<br>
> - if (NumSkippedElements.ugt(VecTy->getNumElements()))<br>
> - return 0;<br>
> - Offset -= NumSkippedElements * ElementSize;<br>
> - Indices.push_back(IRB.getInt(NumSkippedElements));<br>
> - return getNaturalGEPRecursively(IRB, TD, Ptr, VecTy->getElementType(),<br>
> - Offset, TargetTy, Indices, Prefix);<br>
> - }<br>
> + // If this pointer is always safe to load, or if we can prove that there<br>
> + // is already a load in the block, then we can move the load to the pred<br>
> + // block.<br>
> + if (InVal->isDereferenceablePointer() ||<br>
> + isSafeToLoadUnconditionally(InVal, TI, MaxAlign, &TD))<br>
> + continue;<br>
><br>
> - if (ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) {<br>
> - Type *ElementTy = ArrTy->getElementType();<br>
> - APInt ElementSize(Offset.getBitWidth(), TD.getTypeAllocSize(ElementTy));<br>
> - APInt NumSkippedElements = Offset.udiv(ElementSize);<br>
> - if (NumSkippedElements.ugt(ArrTy->getNumElements()))<br>
> - return 0;<br>
> + return false;<br>
> + }<br>
><br>
> - Offset -= NumSkippedElements * ElementSize;<br>
> - Indices.push_back(IRB.getInt(NumSkippedElements));<br>
> - return getNaturalGEPRecursively(IRB, TD, Ptr, ElementTy, Offset, TargetTy,<br>
> - Indices, Prefix);<br>
> + return true;<br>
> }<br>
><br>
> - StructType *STy = dyn_cast<StructType>(Ty);<br>
> - if (!STy)<br>
> - return 0;<br>
> + void visitPHINode(PHINode &PN) {<br>
> + DEBUG(dbgs() << " original: " << PN << "\n");<br>
><br>
> - const StructLayout *SL = TD.getStructLayout(STy);<br>
> - uint64_t StructOffset = Offset.getZExtValue();<br>
> - if (StructOffset >= SL->getSizeInBytes())<br>
> - return 0;<br>
> - unsigned Index = SL->getElementContainingOffset(StructOffset);<br>
> - Offset -= APInt(Offset.getBitWidth(), SL->getElementOffset(Index));<br>
> - Type *ElementTy = STy->getElementType(Index);<br>
> - if (Offset.uge(TD.getTypeAllocSize(ElementTy)))<br>
> - return 0; // The offset points into alignment padding.<br>
> + SmallVector<LoadInst *, 4> Loads;<br>
> + if (!isSafePHIToSpeculate(PN, Loads))<br>
> + return;<br>
><br>
> - Indices.push_back(IRB.getInt32(Index));<br>
> - return getNaturalGEPRecursively(IRB, TD, Ptr, ElementTy, Offset, TargetTy,<br>
> - Indices, Prefix);<br>
> -}<br>
> + assert(!Loads.empty());<br>
><br>
> -/// \brief Get a natural GEP from a base pointer to a particular offset and<br>
> -/// resulting in a particular type.<br>
> -///<br>
> -/// The goal is to produce a "natural" looking GEP that works with the existing<br>
> -/// composite types to arrive at the appropriate offset and element type for<br>
> -/// a pointer. TargetTy is the element type the returned GEP should point-to if<br>
> -/// possible. We recurse by decreasing Offset, adding the appropriate index to<br>
> -/// Indices, and setting Ty to the result subtype.<br>
> -///<br>
> -/// If no natural GEP can be constructed, this function returns null.<br>
> -static Value *getNaturalGEPWithOffset(IRBuilder<> &IRB, const TargetData &TD,<br>
> - Value *Ptr, APInt Offset, Type *TargetTy,<br>
> - SmallVectorImpl<Value *> &Indices,<br>
> - const Twine &Prefix) {<br>
> - PointerType *Ty = cast<PointerType>(Ptr->getType());<br>
> + Type *LoadTy = cast<PointerType>(PN.getType())->getElementType();<br>
> + IRBuilder<> PHIBuilder(&PN);<br>
> + PHINode *NewPN = PHIBuilder.CreatePHI(LoadTy, PN.getNumIncomingValues(),<br>
> + PN.getName() + ".sroa.speculated");<br>
><br>
> - // Don't consider any GEPs through an i8* as natural unless the TargetTy is<br>
> - // an i8.<br>
> - if (Ty == IRB.getInt8PtrTy() && TargetTy->isIntegerTy(8))<br>
> - return 0;<br>
> -<br>
> - Type *ElementTy = Ty->getElementType();<br>
> - if (!ElementTy->isSized())<br>
> - return 0; // We can't GEP through an unsized element.<br>
> - APInt ElementSize(Offset.getBitWidth(), TD.getTypeAllocSize(ElementTy));<br>
> - if (ElementSize == 0)<br>
> - return 0; // Zero-length arrays can't help us build a natural GEP.<br>
> - APInt NumSkippedElements = Offset.udiv(ElementSize);<br>
> -<br>
> - Offset -= NumSkippedElements * ElementSize;<br>
> - Indices.push_back(IRB.getInt(NumSkippedElements));<br>
> - return getNaturalGEPRecursively(IRB, TD, Ptr, ElementTy, Offset, TargetTy,<br>
> - Indices, Prefix);<br>
> -}<br>
> + // Get the TBAA tag and alignment to use from one of the loads. It doesn't<br>
> + // matter which one we get and if any differ, it doesn't matter.<br>
> + LoadInst *SomeLoad = cast<LoadInst>(Loads.back());<br>
> + MDNode *TBAATag = SomeLoad->getMetadata(LLVMContext::MD_tbaa);<br>
> + unsigned Align = SomeLoad->getAlignment();<br>
><br>
> -/// \brief Compute an adjusted pointer from Ptr by Offset bytes where the<br>
> -/// resulting pointer has PointerTy.<br>
> -///<br>
> -/// This tries very hard to compute a "natural" GEP which arrives at the offset<br>
> -/// and produces the pointer type desired. Where it cannot, it will try to use<br>
> -/// the natural GEP to arrive at the offset and bitcast to the type. Where that<br>
> -/// fails, it will try to use an existing i8* and GEP to the byte offset and<br>
> -/// bitcast to the type.<br>
> -///<br>
> -/// The strategy for finding the more natural GEPs is to peel off layers of the<br>
> -/// pointer, walking back through bit casts and GEPs, searching for a base<br>
> -/// pointer from which we can compute a natural GEP with the desired<br>
> -/// properities. The algorithm tries to fold as many constant indices into<br>
> -/// a single GEP as possible, thus making each GEP more independent of the<br>
> -/// surrounding code.<br>
> -static Value *getAdjustedPtr(IRBuilder<> &IRB, const TargetData &TD,<br>
> - Value *Ptr, APInt Offset, Type *PointerTy,<br>
> - const Twine &Prefix) {<br>
> - // Even though we don't look through PHI nodes, we could be called on an<br>
> - // instruction in an unreachable block, which may be on a cycle.<br>
> - SmallPtrSet<Value *, 4> Visited;<br>
> - Visited.insert(Ptr);<br>
> - SmallVector<Value *, 4> Indices;<br>
> + // Rewrite all loads of the PN to use the new PHI.<br>
> + do {<br>
> + LoadInst *LI = Loads.pop_back_val();<br>
> + LI->replaceAllUsesWith(NewPN);<br>
> + Pass.DeadInsts.push_back(LI);<br>
> + } while (!Loads.empty());<br>
><br>
> - // We may end up computing an offset pointer that has the wrong type. If we<br>
> - // never are able to compute one directly that has the correct type, we'll<br>
> - // fall back to it, so keep it around here.<br>
> - Value *OffsetPtr = 0;<br>
> + // Inject loads into all of the pred blocks.<br>
> + for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num; ++Idx) {<br>
> + BasicBlock *Pred = PN.getIncomingBlock(Idx);<br>
> + TerminatorInst *TI = Pred->getTerminator();<br>
> + Use *InUse = &PN.getOperandUse(PN.getOperandNumForIncomingValue(Idx));<br>
> + Value *InVal = PN.getIncomingValue(Idx);<br>
> + IRBuilder<> PredBuilder(TI);<br>
><br>
> - // Remember any i8 pointer we come across to re-use if we need to do a raw<br>
> - // byte offset.<br>
> - Value *Int8Ptr = 0;<br>
> - APInt Int8PtrOffset(Offset.getBitWidth(), 0);<br>
> + LoadInst *Load<br>
> + = PredBuilder.CreateLoad(InVal, (PN.getName() + ".sroa.speculate.load." +<br>
> + Pred->getName()));<br>
> + ++NumLoadsSpeculated;<br>
> + Load->setAlignment(Align);<br>
> + if (TBAATag)<br>
> + Load->setMetadata(LLVMContext::MD_tbaa, TBAATag);<br>
> + NewPN->addIncoming(Load, Pred);<br>
><br>
> - Type *TargetTy = PointerTy->getPointerElementType();<br>
> + Instruction *Ptr = dyn_cast<Instruction>(InVal);<br>
> + if (!Ptr)<br>
> + // No uses to rewrite.<br>
> + continue;<br>
><br>
> - do {<br>
> - // First fold any existing GEPs into the offset.<br>
> - while (GEPOperator *GEP = dyn_cast<GEPOperator>(Ptr)) {<br>
> - APInt GEPOffset(Offset.getBitWidth(), 0);<br>
> - if (!accumulateGEPOffsets(TD, *GEP, GEPOffset))<br>
> - break;<br>
> - Offset += GEPOffset;<br>
> - Ptr = GEP->getPointerOperand();<br>
> - if (!Visited.insert(Ptr))<br>
> - break;<br>
> - }<br>
> + // Try to lookup and rewrite any partition uses corresponding to this phi<br>
> + // input.<br>
> + AllocaPartitioning::iterator PI<br>
> + = P.findPartitionForPHIOrSelectOperand(InUse);<br>
> + if (PI == P.end())<br>
> + continue;<br>
><br>
> - // See if we can perform a natural GEP here.<br>
> - Indices.clear();<br>
> - if (Value *P = getNaturalGEPWithOffset(IRB, TD, Ptr, Offset, TargetTy,<br>
> - Indices, Prefix)) {<br>
> - if (P->getType() == PointerTy) {<br>
> - // Zap any offset pointer that we ended up computing in previous rounds.<br>
> - if (OffsetPtr && OffsetPtr->use_empty())<br>
> - if (Instruction *I = dyn_cast<Instruction>(OffsetPtr))<br>
> - I->eraseFromParent();<br>
> - return P;<br>
> - }<br>
> - if (!OffsetPtr) {<br>
> - OffsetPtr = P;<br>
> - }<br>
> + // Replace the Use in the PartitionUse for this operand with the Use<br>
> + // inside the load.<br>
> + AllocaPartitioning::use_iterator UI<br>
> + = P.findPartitionUseForPHIOrSelectOperand(InUse);<br>
> + assert(isa<PHINode>(*UI->U->getUser()));<br>
> + UI->U = &Load->getOperandUse(Load->getPointerOperandIndex());<br>
> }<br>
> + DEBUG(dbgs() << " speculated to: " << *NewPN << "\n");<br>
> + }<br>
><br>
> - // Stash this pointer if we've found an i8*.<br>
> - if (Ptr->getType()->isIntegerTy(8)) {<br>
> - Int8Ptr = Ptr;<br>
> - Int8PtrOffset = Offset;<br>
> - }<br>
> + /// Select instructions that use an alloca and are subsequently loaded can be<br>
> + /// rewritten to load both input pointers and then select between the result,<br>
> + /// allowing the load of the alloca to be promoted.<br>
> + /// From this:<br>
> + /// %P2 = select i1 %cond, i32* %Alloca, i32* %Other<br>
> + /// %V = load i32* %P2<br>
> + /// to:<br>
> + /// %V1 = load i32* %Alloca -> will be mem2reg'd<br>
> + /// %V2 = load i32* %Other<br>
> + /// %V = select i1 %cond, i32 %V1, i32 %V2<br>
> + ///<br>
> + /// We can do this to a select if its only uses are loads and if the operand<br>
> + /// to the select can be loaded unconditionally.<br>
> + bool isSafeSelectToSpeculate(SelectInst &SI,<br>
> + SmallVectorImpl<LoadInst *> &Loads) {<br>
> + Value *TValue = SI.getTrueValue();<br>
> + Value *FValue = SI.getFalseValue();<br>
> + bool TDerefable = TValue->isDereferenceablePointer();<br>
> + bool FDerefable = FValue->isDereferenceablePointer();<br>
><br>
> - // Peel off a layer of the pointer and update the offset appropriately.<br>
> - if (Operator::getOpcode(Ptr) == Instruction::BitCast) {<br>
> - Ptr = cast<Operator>(Ptr)->getOperand(0);<br>
> - } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(Ptr)) {<br>
> - if (GA->mayBeOverridden())<br>
> - break;<br>
> - Ptr = GA->getAliasee();<br>
> - } else {<br>
> - break;<br>
> - }<br>
> - assert(Ptr->getType()->isPointerTy() && "Unexpected operand type!");<br>
> - } while (Visited.insert(Ptr));<br>
> + for (Value::use_iterator UI = SI.use_begin(), UE = SI.use_end();<br>
> + UI != UE; ++UI) {<br>
> + LoadInst *LI = dyn_cast<LoadInst>(*UI);<br>
> + if (LI == 0 || !LI->isSimple()) return false;<br>
><br>
> - if (!OffsetPtr) {<br>
> - if (!Int8Ptr) {<br>
> - Int8Ptr = IRB.CreateBitCast(Ptr, IRB.getInt8PtrTy(),<br>
> - Prefix + ".raw_cast");<br>
> - Int8PtrOffset = Offset;<br>
> + // Both operands to the select need to be dereferencable, either<br>
> + // absolutely (e.g. allocas) or at this point because we can see other<br>
> + // accesses to it.<br>
> + if (!TDerefable && !isSafeToLoadUnconditionally(TValue, LI,<br>
> + LI->getAlignment(), &TD))<br>
> + return false;<br>
> + if (!FDerefable && !isSafeToLoadUnconditionally(FValue, LI,<br>
> + LI->getAlignment(), &TD))<br>
> + return false;<br>
> + Loads.push_back(LI);<br>
> }<br>
><br>
> - OffsetPtr = Int8PtrOffset == 0 ? Int8Ptr :<br>
> - IRB.CreateInBoundsGEP(Int8Ptr, IRB.getInt(Int8PtrOffset),<br>
> - Prefix + ".raw_idx");<br>
> + return true;<br>
> }<br>
> - Ptr = OffsetPtr;<br>
><br>
> - // On the off chance we were targeting i8*, guard the bitcast here.<br>
> - if (Ptr->getType() != PointerTy)<br>
> - Ptr = IRB.CreateBitCast(Ptr, PointerTy, Prefix + ".cast");<br>
> + void visitSelectInst(SelectInst &SI) {<br>
> + DEBUG(dbgs() << " original: " << SI << "\n");<br>
> + IRBuilder<> IRB(&SI);<br>
><br>
> - return Ptr;<br>
> -}<br>
> + // If the select isn't safe to speculate, just use simple logic to emit it.<br>
> + SmallVector<LoadInst *, 4> Loads;<br>
> + if (!isSafeSelectToSpeculate(SI, Loads))<br>
> + return;<br>
><br>
> -/// \brief Test whether the given alloca partition can be promoted to a vector.<br>
> -///<br>
> -/// This is a quick test to check whether we can rewrite a particular alloca<br>
> -/// partition (and its newly formed alloca) into a vector alloca with only<br>
> -/// whole-vector loads and stores such that it could be promoted to a vector<br>
> -/// SSA value. We only can ensure this for a limited set of operations, and we<br>
> -/// don't want to do the rewrites unless we are confident that the result will<br>
> -/// be promotable, so we have an early test here.<br>
> -static bool isVectorPromotionViable(const TargetData &TD,<br>
> - Type *AllocaTy,<br>
> - AllocaPartitioning &P,<br>
> - uint64_t PartitionBeginOffset,<br>
> - uint64_t PartitionEndOffset,<br>
> - AllocaPartitioning::const_use_iterator I,<br>
> - AllocaPartitioning::const_use_iterator E) {<br>
> - VectorType *Ty = dyn_cast<VectorType>(AllocaTy);<br>
> - if (!Ty)<br>
> - return false;<br>
> + Use *Ops[2] = { &SI.getOperandUse(1), &SI.getOperandUse(2) };<br>
> + AllocaPartitioning::iterator PIs[2];<br>
> + AllocaPartitioning::PartitionUse PUs[2];<br>
> + for (unsigned i = 0, e = 2; i != e; ++i) {<br>
> + PIs[i] = P.findPartitionForPHIOrSelectOperand(Ops[i]);<br>
> + if (PIs[i] != P.end()) {<br>
> + // If the pointer is within the partitioning, remove the select from<br>
> + // its uses. We'll add in the new loads below.<br>
> + AllocaPartitioning::use_iterator UI<br>
> + = P.findPartitionUseForPHIOrSelectOperand(Ops[i]);<br>
> + PUs[i] = *UI;<br>
> + // Clear out the use here so that the offsets into the use list remain<br>
> + // stable but this use is ignored when rewriting.<br>
> + UI->U = 0;<br>
> + }<br>
> + }<br>
><br>
> - uint64_t VecSize = TD.getTypeSizeInBits(Ty);<br>
> - uint64_t ElementSize = Ty->getScalarSizeInBits();<br>
> + Value *TV = SI.getTrueValue();<br>
> + Value *FV = SI.getFalseValue();<br>
> + // Replace the loads of the select with a select of two loads.<br>
> + while (!Loads.empty()) {<br>
> + LoadInst *LI = Loads.pop_back_val();<br>
><br>
> - // While the definition of LLVM vectors is bitpacked, we don't support sizes<br>
> - // that aren't byte sized.<br>
> - if (ElementSize % 8)<br>
> - return false;<br>
> - assert((VecSize % 8) == 0 && "vector size not a multiple of element size?");<br>
> - VecSize /= 8;<br>
> - ElementSize /= 8;<br>
> + IRB.SetInsertPoint(LI);<br>
> + LoadInst *TL =<br>
> + IRB.CreateLoad(TV, LI->getName() + ".sroa.speculate.load.true");<br>
> + LoadInst *FL =<br>
> + IRB.CreateLoad(FV, LI->getName() + ".sroa.speculate.load.false");<br>
> + NumLoadsSpeculated += 2;<br>
><br>
> - for (; I != E; ++I) {<br>
> - if (!I->U)<br>
> - continue; // Skip dead use.<br>
> + // Transfer alignment and TBAA info if present.<br>
> + TL->setAlignment(LI->getAlignment());<br>
> + FL->setAlignment(LI->getAlignment());<br>
> + if (MDNode *Tag = LI->getMetadata(LLVMContext::MD_tbaa)) {<br>
> + TL->setMetadata(LLVMContext::MD_tbaa, Tag);<br>
> + FL->setMetadata(LLVMContext::MD_tbaa, Tag);<br>
> + }<br>
><br>
> - uint64_t BeginOffset = I->BeginOffset - PartitionBeginOffset;<br>
> - uint64_t BeginIndex = BeginOffset / ElementSize;<br>
> - if (BeginIndex * ElementSize != BeginOffset ||<br>
> - BeginIndex >= Ty->getNumElements())<br>
> - return false;<br>
> - uint64_t EndOffset = I->EndOffset - PartitionBeginOffset;<br>
> - uint64_t EndIndex = EndOffset / ElementSize;<br>
> - if (EndIndex * ElementSize != EndOffset ||<br>
> - EndIndex > Ty->getNumElements())<br>
> - return false;<br>
> -<br>
> - // FIXME: We should build shuffle vector instructions to handle<br>
> - // non-element-sized accesses.<br>
> - if ((EndOffset - BeginOffset) != ElementSize &&<br>
> - (EndOffset - BeginOffset) != VecSize)<br>
> - return false;<br>
> + Value *V = IRB.CreateSelect(SI.getCondition(), TL, FL,<br>
> + LI->getName() + ".sroa.speculated");<br>
><br>
> - if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I->U->getUser())) {<br>
> - if (MI->isVolatile())<br>
> - return false;<br>
> - if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(I->U->getUser())) {<br>
> - const AllocaPartitioning::MemTransferOffsets &MTO<br>
> - = P.getMemTransferOffsets(*MTI);<br>
> - if (!MTO.IsSplittable)<br>
> - return false;<br>
> + LoadInst *Loads[2] = { TL, FL };<br>
> + for (unsigned i = 0, e = 2; i != e; ++i) {<br>
> + if (PIs[i] != P.end()) {<br>
> + Use *LoadUse = &Loads[i]->getOperandUse(0);<br>
> + assert(PUs[i].U->get() == LoadUse->get());<br>
> + PUs[i].U = LoadUse;<br>
> + P.use_push_back(PIs[i], PUs[i]);<br>
> + }<br>
> }<br>
> - } else if (I->U->get()->getType()->getPointerElementType()->isStructTy()) {<br>
> - // Disable vector promotion when there are loads or stores of an FCA.<br>
> - return false;<br>
> - } else if (!isa<LoadInst>(I->U->getUser()) &&<br>
> - !isa<StoreInst>(I->U->getUser())) {<br>
> - return false;<br>
> +<br>
> + DEBUG(dbgs() << " speculated to: " << *V << "\n");<br>
> + LI->replaceAllUsesWith(V);<br>
> + Pass.DeadInsts.push_back(LI);<br>
> }<br>
> }<br>
> - return true;<br>
> +};<br>
> }<br>
><br>
> -/// \brief Test whether the given alloca partition can be promoted to an int.<br>
> +/// \brief Accumulate the constant offsets in a GEP into a single APInt offset.<br>
> ///<br>
> -/// This is a quick test to check whether we can rewrite a particular alloca<br>
> -/// partition (and its newly formed alloca) into an integer alloca suitable for<br>
> -/// promotion to an SSA value. We only can ensure this for a limited set of<br>
> -/// operations, and we don't want to do the rewrites unless we are confident<br>
> -/// that the result will be promotable, so we have an early test here.<br>
> -static bool isIntegerPromotionViable(const TargetData &TD,<br>
> - Type *AllocaTy,<br>
> - uint64_t AllocBeginOffset,<br>
> - AllocaPartitioning &P,<br>
> - AllocaPartitioning::const_use_iterator I,<br>
> - AllocaPartitioning::const_use_iterator E) {<br>
> - IntegerType *Ty = dyn_cast<IntegerType>(AllocaTy);<br>
> - if (!Ty || 8*TD.getTypeStoreSize(Ty) != Ty->getBitWidth())<br>
> - return false;<br>
> -<br>
> - // Check the uses to ensure the uses are (likely) promoteable integer uses.<br>
> - // Also ensure that the alloca has a covering load or store. We don't want<br>
> - // promote because of some other unsplittable entry (which we may make<br>
> - // splittable later) and lose the ability to promote each element access.<br>
> - bool WholeAllocaOp = false;<br>
> - for (; I != E; ++I) {<br>
> - if (!I->U)<br>
> - continue; // Skip dead use.<br>
> -<br>
> - // We can't reasonably handle cases where the load or store extends past<br>
> - // the end of the aloca's type and into its padding.<br>
> - if ((I->EndOffset - AllocBeginOffset) > TD.getTypeStoreSize(Ty))<br>
> +/// If the provided GEP is all-constant, the total byte offset formed by the<br>
> +/// GEP is computed and Offset is set to it. If the GEP has any non-constant<br>
> +/// operands, the function returns false and the value of Offset is unmodified.<br>
> +static bool accumulateGEPOffsets(const TargetData &TD, GEPOperator &GEP,<br>
> + APInt &Offset) {<br>
> + APInt GEPOffset(Offset.getBitWidth(), 0);<br>
> + for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);<br>
> + GTI != GTE; ++GTI) {<br>
> + ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());<br>
> + if (!OpC)<br>
> return false;<br>
> + if (OpC->isZero()) continue;<br>
><br>
> - if (LoadInst *LI = dyn_cast<LoadInst>(I->U->getUser())) {<br>
> - if (LI->isVolatile() || !LI->getType()->isIntegerTy())<br>
> - return false;<br>
> - if (LI->getType() == Ty)<br>
> - WholeAllocaOp = true;<br>
> - } else if (StoreInst *SI = dyn_cast<StoreInst>(I->U->getUser())) {<br>
> - if (SI->isVolatile() || !SI->getValueOperand()->getType()->isIntegerTy())<br>
> - return false;<br>
> - if (SI->getValueOperand()->getType() == Ty)<br>
> - WholeAllocaOp = true;<br>
> - } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I->U->getUser())) {<br>
> - if (MI->isVolatile())<br>
> - return false;<br>
> - if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(I->U->getUser())) {<br>
> - const AllocaPartitioning::MemTransferOffsets &MTO<br>
> - = P.getMemTransferOffsets(*MTI);<br>
> - if (!MTO.IsSplittable)<br>
> - return false;<br>
> - }<br>
> - } else {<br>
> - return false;<br>
> + // Handle a struct index, which adds its field offset to the pointer.<br>
> + if (StructType *STy = dyn_cast<StructType>(*GTI)) {<br>
> + unsigned ElementIdx = OpC->getZExtValue();<br>
> + const StructLayout *SL = TD.getStructLayout(STy);<br>
> + GEPOffset += APInt(Offset.getBitWidth(),<br>
> + SL->getElementOffset(ElementIdx));<br>
> + continue;<br>
> }<br>
> +<br>
> + APInt TypeSize(Offset.getBitWidth(),<br>
> + TD.getTypeAllocSize(GTI.getIndexedType()));<br>
> + if (VectorType *VTy = dyn_cast<VectorType>(*GTI)) {<br>
> + assert((VTy->getScalarSizeInBits() % 8) == 0 &&<br>
> + "vector element size is not a multiple of 8, cannot GEP over it");<br>
> + TypeSize = VTy->getScalarSizeInBits() / 8;<br>
> + }<br>
> +<br>
> + GEPOffset += OpC->getValue().sextOrTrunc(Offset.getBitWidth()) * TypeSize;<br>
> }<br>
> - return WholeAllocaOp;<br>
> + Offset = GEPOffset;<br>
> + return true;<br>
> }<br>
><br>
> -namespace {<br>
> -/// \brief Visitor to speculate PHIs and Selects where possible.<br>
> -class PHIOrSelectSpeculator : public InstVisitor<PHIOrSelectSpeculator> {<br>
> - // Befriend the base class so it can delegate to private visit methods.<br>
> - friend class llvm::InstVisitor<PHIOrSelectSpeculator>;<br>
> +/// \brief Build a GEP out of a base pointer and indices.<br>
> +///<br>
> +/// This will return the BasePtr if that is valid, or build a new GEP<br>
> +/// instruction using the IRBuilder if GEP-ing is needed.<br>
> +static Value *buildGEP(IRBuilder<> &IRB, Value *BasePtr,<br>
> + SmallVectorImpl<Value *> &Indices,<br>
> + const Twine &Prefix) {<br>
> + if (Indices.empty())<br>
> + return BasePtr;<br>
><br>
> - const TargetData &TD;<br>
> - AllocaPartitioning &P;<br>
> - SROA &Pass;<br>
> + // A single zero index is a no-op, so check for this and avoid building a GEP<br>
> + // in that case.<br>
> + if (Indices.size() == 1 && cast<ConstantInt>(Indices.back())->isZero())<br>
> + return BasePtr;<br>
><br>
> -public:<br>
> - PHIOrSelectSpeculator(const TargetData &TD, AllocaPartitioning &P, SROA &Pass)<br>
> - : TD(TD), P(P), Pass(Pass) {}<br>
> + return IRB.CreateInBoundsGEP(BasePtr, Indices, Prefix + ".idx");<br>
> +}<br>
><br>
> - /// \brief Visit the users of an alloca partition and rewrite them.<br>
> - void visitUsers(AllocaPartitioning::const_iterator PI) {<br>
> - // Note that we need to use an index here as the underlying vector of uses<br>
> - // may be grown during speculation. However, we never need to re-visit the<br>
> - // new uses, and so we can use the initial size bound.<br>
> - for (unsigned Idx = 0, Size = P.use_size(PI); Idx != Size; ++Idx) {<br>
> - const AllocaPartitioning::PartitionUse &PU = P.getUse(PI, Idx);<br>
> - if (!PU.U)<br>
> - continue; // Skip dead use.<br>
> +/// \brief Get a natural GEP off of the BasePtr walking through Ty toward<br>
> +/// TargetTy without changing the offset of the pointer.<br>
> +///<br>
> +/// This routine assumes we've already established a properly offset GEP with<br>
> +/// Indices, and arrived at the Ty type. The goal is to continue to GEP with<br>
> +/// zero-indices down through type layers until we find one the same as<br>
> +/// TargetTy. If we can't find one with the same type, we at least try to use<br>
> +/// one with the same size. If none of that works, we just produce the GEP as<br>
> +/// indicated by Indices to have the correct offset.<br>
> +static Value *getNaturalGEPWithType(IRBuilder<> &IRB, const TargetData &TD,<br>
> + Value *BasePtr, Type *Ty, Type *TargetTy,<br>
> + SmallVectorImpl<Value *> &Indices,<br>
> + const Twine &Prefix) {<br>
> + if (Ty == TargetTy)<br>
> + return buildGEP(IRB, BasePtr, Indices, Prefix);<br>
><br>
> - visit(cast<Instruction>(PU.U->getUser()));<br>
> + // See if we can descend into a struct and locate a field with the correct<br>
> + // type.<br>
> + unsigned NumLayers = 0;<br>
> + Type *ElementTy = Ty;<br>
> + do {<br>
> + if (ElementTy->isPointerTy())<br>
> + break;<br>
> + if (SequentialType *SeqTy = dyn_cast<SequentialType>(ElementTy)) {<br>
> + ElementTy = SeqTy->getElementType();<br>
> + Indices.push_back(IRB.getInt(APInt(TD.getPointerSizeInBits(), 0)));<br>
> + } else if (StructType *STy = dyn_cast<StructType>(ElementTy)) {<br>
> + ElementTy = *STy->element_begin();<br>
> + Indices.push_back(IRB.getInt32(0));<br>
> + } else {<br>
> + break;<br>
> }<br>
> - }<br>
> + ++NumLayers;<br>
> + } while (ElementTy != TargetTy);<br>
> + if (ElementTy != TargetTy)<br>
> + Indices.erase(Indices.end() - NumLayers, Indices.end());<br>
><br>
> -private:<br>
> - // By default, skip this instruction.<br>
> - void visitInstruction(Instruction &I) {}<br>
> + return buildGEP(IRB, BasePtr, Indices, Prefix);<br>
> +}<br>
><br>
> - /// PHI instructions that use an alloca and are subsequently loaded can be<br>
> - /// rewritten to load both input pointers in the pred blocks and then PHI the<br>
> - /// results, allowing the load of the alloca to be promoted.<br>
> - /// From this:<br>
> - /// %P2 = phi [i32* %Alloca, i32* %Other]<br>
> - /// %V = load i32* %P2<br>
> - /// to:<br>
> - /// %V1 = load i32* %Alloca -> will be mem2reg'd<br>
> - /// ...<br>
> - /// %V2 = load i32* %Other<br>
> - /// ...<br>
> - /// %V = phi [i32 %V1, i32 %V2]<br>
> - ///<br>
> - /// We can do this to a select if its only uses are loads and if the operands<br>
> - /// to the select can be loaded unconditionally.<br>
> - ///<br>
> - /// FIXME: This should be hoisted into a generic utility, likely in<br>
> - /// Transforms/Util/Local.h<br>
> - bool isSafePHIToSpeculate(PHINode &PN, SmallVectorImpl<LoadInst *> &Loads) {<br>
> - // For now, we can only do this promotion if the load is in the same block<br>
> - // as the PHI, and if there are no stores between the phi and load.<br>
> - // TODO: Allow recursive phi users.<br>
> - // TODO: Allow stores.<br>
> - BasicBlock *BB = PN.getParent();<br>
> - unsigned MaxAlign = 0;<br>
> - for (Value::use_iterator UI = PN.use_begin(), UE = PN.use_end();<br>
> - UI != UE; ++UI) {<br>
> - LoadInst *LI = dyn_cast<LoadInst>(*UI);<br>
> - if (LI == 0 || !LI->isSimple()) return false;<br>
> +/// \brief Recursively compute indices for a natural GEP.<br>
> +///<br>
> +/// This is the recursive step for getNaturalGEPWithOffset that walks down the<br>
> +/// element types adding appropriate indices for the GEP.<br>
> +static Value *getNaturalGEPRecursively(IRBuilder<> &IRB, const TargetData &TD,<br>
> + Value *Ptr, Type *Ty, APInt &Offset,<br>
> + Type *TargetTy,<br>
> + SmallVectorImpl<Value *> &Indices,<br>
> + const Twine &Prefix) {<br>
> + if (Offset == 0)<br>
> + return getNaturalGEPWithType(IRB, TD, Ptr, Ty, TargetTy, Indices, Prefix);<br>
><br>
> - // For now we only allow loads in the same block as the PHI. This is<br>
> - // a common case that happens when instcombine merges two loads through<br>
> - // a PHI.<br>
> - if (LI->getParent() != BB) return false;<br>
> + // We can't recurse through pointer types.<br>
> + if (Ty->isPointerTy())<br>
> + return 0;<br>
><br>
> - // Ensure that there are no instructions between the PHI and the load that<br>
> - // could store.<br>
> - for (BasicBlock::iterator BBI = &PN; &*BBI != LI; ++BBI)<br>
> - if (BBI->mayWriteToMemory())<br>
> - return false;<br>
> + // We try to analyze GEPs over vectors here, but note that these GEPs are<br>
> + // extremely poorly defined currently. The long-term goal is to remove GEPing<br>
> + // over a vector from the IR completely.<br>
> + if (VectorType *VecTy = dyn_cast<VectorType>(Ty)) {<br>
> + unsigned ElementSizeInBits = VecTy->getScalarSizeInBits();<br>
> + if (ElementSizeInBits % 8)<br>
> + return 0; // GEPs over non-multiple of 8 size vector elements are invalid.<br>
> + APInt ElementSize(Offset.getBitWidth(), ElementSizeInBits / 8);<br>
> + APInt NumSkippedElements = Offset.udiv(ElementSize);<br>
> + if (NumSkippedElements.ugt(VecTy->getNumElements()))<br>
> + return 0;<br>
> + Offset -= NumSkippedElements * ElementSize;<br>
> + Indices.push_back(IRB.getInt(NumSkippedElements));<br>
> + return getNaturalGEPRecursively(IRB, TD, Ptr, VecTy->getElementType(),<br>
> + Offset, TargetTy, Indices, Prefix);<br>
> + }<br>
><br>
> - MaxAlign = std::max(MaxAlign, LI->getAlignment());<br>
> - Loads.push_back(LI);<br>
> - }<br>
> + if (ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) {<br>
> + Type *ElementTy = ArrTy->getElementType();<br>
> + APInt ElementSize(Offset.getBitWidth(), TD.getTypeAllocSize(ElementTy));<br>
> + APInt NumSkippedElements = Offset.udiv(ElementSize);<br>
> + if (NumSkippedElements.ugt(ArrTy->getNumElements()))<br>
> + return 0;<br>
><br>
> - // We can only transform this if it is safe to push the loads into the<br>
> - // predecessor blocks. The only thing to watch out for is that we can't put<br>
> - // a possibly trapping load in the predecessor if it is a critical edge.<br>
> - for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num;<br>
> - ++Idx) {<br>
> - TerminatorInst *TI = PN.getIncomingBlock(Idx)->getTerminator();<br>
> - Value *InVal = PN.getIncomingValue(Idx);<br>
> + Offset -= NumSkippedElements * ElementSize;<br>
> + Indices.push_back(IRB.getInt(NumSkippedElements));<br>
> + return getNaturalGEPRecursively(IRB, TD, Ptr, ElementTy, Offset, TargetTy,<br>
> + Indices, Prefix);<br>
> + }<br>
><br>
> - // If the value is produced by the terminator of the predecessor (an<br>
> - // invoke) or it has side-effects, there is no valid place to put a load<br>
> - // in the predecessor.<br>
> - if (TI == InVal || TI->mayHaveSideEffects())<br>
> - return false;<br>
> + StructType *STy = dyn_cast<StructType>(Ty);<br>
> + if (!STy)<br>
> + return 0;<br>
><br>
> - // If the predecessor has a single successor, then the edge isn't<br>
> - // critical.<br>
> - if (TI->getNumSuccessors() == 1)<br>
> - continue;<br>
> + const StructLayout *SL = TD.getStructLayout(STy);<br>
> + uint64_t StructOffset = Offset.getZExtValue();<br>
> + if (StructOffset >= SL->getSizeInBytes())<br>
> + return 0;<br>
> + unsigned Index = SL->getElementContainingOffset(StructOffset);<br>
> + Offset -= APInt(Offset.getBitWidth(), SL->getElementOffset(Index));<br>
> + Type *ElementTy = STy->getElementType(Index);<br>
> + if (Offset.uge(TD.getTypeAllocSize(ElementTy)))<br>
> + return 0; // The offset points into alignment padding.<br>
><br>
> - // If this pointer is always safe to load, or if we can prove that there<br>
> - // is already a load in the block, then we can move the load to the pred<br>
> - // block.<br>
> - if (InVal->isDereferenceablePointer() ||<br>
> - isSafeToLoadUnconditionally(InVal, TI, MaxAlign, &TD))<br>
> - continue;<br>
> + Indices.push_back(IRB.getInt32(Index));<br>
> + return getNaturalGEPRecursively(IRB, TD, Ptr, ElementTy, Offset, TargetTy,<br>
> + Indices, Prefix);<br>
> +}<br>
><br>
> - return false;<br>
> - }<br>
> +/// \brief Get a natural GEP from a base pointer to a particular offset and<br>
> +/// resulting in a particular type.<br>
> +///<br>
> +/// The goal is to produce a "natural" looking GEP that works with the existing<br>
> +/// composite types to arrive at the appropriate offset and element type for<br>
> +/// a pointer. TargetTy is the element type the returned GEP should point-to if<br>
> +/// possible. We recurse by decreasing Offset, adding the appropriate index to<br>
> +/// Indices, and setting Ty to the result subtype.<br>
> +///<br>
> +/// If no natural GEP can be constructed, this function returns null.<br>
> +static Value *getNaturalGEPWithOffset(IRBuilder<> &IRB, const TargetData &TD,<br>
> + Value *Ptr, APInt Offset, Type *TargetTy,<br>
> + SmallVectorImpl<Value *> &Indices,<br>
> + const Twine &Prefix) {<br>
> + PointerType *Ty = cast<PointerType>(Ptr->getType());<br>
><br>
> - return true;<br>
> - }<br>
> + // Don't consider any GEPs through an i8* as natural unless the TargetTy is<br>
> + // an i8.<br>
> + if (Ty == IRB.getInt8PtrTy() && TargetTy->isIntegerTy(8))<br>
> + return 0;<br>
><br>
> - void visitPHINode(PHINode &PN) {<br>
> - DEBUG(dbgs() << " original: " << PN << "\n");<br>
> + Type *ElementTy = Ty->getElementType();<br>
> + if (!ElementTy->isSized())<br>
> + return 0; // We can't GEP through an unsized element.<br>
> + APInt ElementSize(Offset.getBitWidth(), TD.getTypeAllocSize(ElementTy));<br>
> + if (ElementSize == 0)<br>
> + return 0; // Zero-length arrays can't help us build a natural GEP.<br>
> + APInt NumSkippedElements = Offset.udiv(ElementSize);<br>
><br>
> - SmallVector<LoadInst *, 4> Loads;<br>
> - if (!isSafePHIToSpeculate(PN, Loads))<br>
> - return;<br>
> + Offset -= NumSkippedElements * ElementSize;<br>
> + Indices.push_back(IRB.getInt(NumSkippedElements));<br>
> + return getNaturalGEPRecursively(IRB, TD, Ptr, ElementTy, Offset, TargetTy,<br>
> + Indices, Prefix);<br>
> +}<br>
><br>
> - assert(!Loads.empty());<br>
> +/// \brief Compute an adjusted pointer from Ptr by Offset bytes where the<br>
> +/// resulting pointer has PointerTy.<br>
> +///<br>
> +/// This tries very hard to compute a "natural" GEP which arrives at the offset<br>
> +/// and produces the pointer type desired. Where it cannot, it will try to use<br>
> +/// the natural GEP to arrive at the offset and bitcast to the type. Where that<br>
> +/// fails, it will try to use an existing i8* and GEP to the byte offset and<br>
> +/// bitcast to the type.<br>
> +///<br>
> +/// The strategy for finding the more natural GEPs is to peel off layers of the<br>
> +/// pointer, walking back through bit casts and GEPs, searching for a base<br>
> +/// pointer from which we can compute a natural GEP with the desired<br>
> +/// properities. The algorithm tries to fold as many constant indices into<br>
> +/// a single GEP as possible, thus making each GEP more independent of the<br>
> +/// surrounding code.<br>
> +static Value *getAdjustedPtr(IRBuilder<> &IRB, const TargetData &TD,<br>
> + Value *Ptr, APInt Offset, Type *PointerTy,<br>
> + const Twine &Prefix) {<br>
> + // Even though we don't look through PHI nodes, we could be called on an<br>
> + // instruction in an unreachable block, which may be on a cycle.<br>
> + SmallPtrSet<Value *, 4> Visited;<br>
> + Visited.insert(Ptr);<br>
> + SmallVector<Value *, 4> Indices;<br>
><br>
> - Type *LoadTy = cast<PointerType>(PN.getType())->getElementType();<br>
> - IRBuilder<> PHIBuilder(&PN);<br>
> - PHINode *NewPN = PHIBuilder.CreatePHI(LoadTy, PN.getNumIncomingValues(),<br>
> - PN.getName() + ".sroa.speculated");<br>
> + // We may end up computing an offset pointer that has the wrong type. If we<br>
> + // never are able to compute one directly that has the correct type, we'll<br>
> + // fall back to it, so keep it around here.<br>
> + Value *OffsetPtr = 0;<br>
><br>
> - // Get the TBAA tag and alignment to use from one of the loads. It doesn't<br>
> - // matter which one we get and if any differ, it doesn't matter.<br>
> - LoadInst *SomeLoad = cast<LoadInst>(Loads.back());<br>
> - MDNode *TBAATag = SomeLoad->getMetadata(LLVMContext::MD_tbaa);<br>
> - unsigned Align = SomeLoad->getAlignment();<br>
> + // Remember any i8 pointer we come across to re-use if we need to do a raw<br>
> + // byte offset.<br>
> + Value *Int8Ptr = 0;<br>
> + APInt Int8PtrOffset(Offset.getBitWidth(), 0);<br>
><br>
> - // Rewrite all loads of the PN to use the new PHI.<br>
> - do {<br>
> - LoadInst *LI = Loads.pop_back_val();<br>
> - LI->replaceAllUsesWith(NewPN);<br>
> - Pass.DeadInsts.push_back(LI);<br>
> - } while (!Loads.empty());<br>
> + Type *TargetTy = PointerTy->getPointerElementType();<br>
><br>
> - // Inject loads into all of the pred blocks.<br>
> - for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num; ++Idx) {<br>
> - BasicBlock *Pred = PN.getIncomingBlock(Idx);<br>
> - TerminatorInst *TI = Pred->getTerminator();<br>
> - Use *InUse = &PN.getOperandUse(PN.getOperandNumForIncomingValue(Idx));<br>
> - Value *InVal = PN.getIncomingValue(Idx);<br>
> - IRBuilder<> PredBuilder(TI);<br>
> + do {<br>
> + // First fold any existing GEPs into the offset.<br>
> + while (GEPOperator *GEP = dyn_cast<GEPOperator>(Ptr)) {<br>
> + APInt GEPOffset(Offset.getBitWidth(), 0);<br>
> + if (!accumulateGEPOffsets(TD, *GEP, GEPOffset))<br>
> + break;<br>
> + Offset += GEPOffset;<br>
> + Ptr = GEP->getPointerOperand();<br>
> + if (!Visited.insert(Ptr))<br>
> + break;<br>
> + }<br>
><br>
> - LoadInst *Load<br>
> - = PredBuilder.CreateLoad(InVal, (PN.getName() + ".sroa.speculate.load." +<br>
> - Pred->getName()));<br>
> - ++NumLoadsSpeculated;<br>
> - Load->setAlignment(Align);<br>
> - if (TBAATag)<br>
> - Load->setMetadata(LLVMContext::MD_tbaa, TBAATag);<br>
> - NewPN->addIncoming(Load, Pred);<br>
> + // See if we can perform a natural GEP here.<br>
> + Indices.clear();<br>
> + if (Value *P = getNaturalGEPWithOffset(IRB, TD, Ptr, Offset, TargetTy,<br>
> + Indices, Prefix)) {<br>
> + if (P->getType() == PointerTy) {<br>
> + // Zap any offset pointer that we ended up computing in previous rounds.<br>
> + if (OffsetPtr && OffsetPtr->use_empty())<br>
> + if (Instruction *I = dyn_cast<Instruction>(OffsetPtr))<br>
> + I->eraseFromParent();<br>
> + return P;<br>
> + }<br>
> + if (!OffsetPtr) {<br>
> + OffsetPtr = P;<br>
> + }<br>
> + }<br>
><br>
> - Instruction *Ptr = dyn_cast<Instruction>(InVal);<br>
> - if (!Ptr)<br>
> - // No uses to rewrite.<br>
> - continue;<br>
> + // Stash this pointer if we've found an i8*.<br>
> + if (Ptr->getType()->isIntegerTy(8)) {<br>
> + Int8Ptr = Ptr;<br>
> + Int8PtrOffset = Offset;<br>
> + }<br>
><br>
> - // Try to lookup and rewrite any partition uses corresponding to this phi<br>
> - // input.<br>
> - AllocaPartitioning::iterator PI<br>
> - = P.findPartitionForPHIOrSelectOperand(InUse);<br>
> - if (PI == P.end())<br>
> - continue;<br>
> + // Peel off a layer of the pointer and update the offset appropriately.<br>
> + if (Operator::getOpcode(Ptr) == Instruction::BitCast) {<br>
> + Ptr = cast<Operator>(Ptr)->getOperand(0);<br>
> + } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(Ptr)) {<br>
> + if (GA->mayBeOverridden())<br>
> + break;<br>
> + Ptr = GA->getAliasee();<br>
> + } else {<br>
> + break;<br>
> + }<br>
> + assert(Ptr->getType()->isPointerTy() && "Unexpected operand type!");<br>
> + } while (Visited.insert(Ptr));<br>
><br>
> - // Replace the Use in the PartitionUse for this operand with the Use<br>
> - // inside the load.<br>
> - AllocaPartitioning::use_iterator UI<br>
> - = P.findPartitionUseForPHIOrSelectOperand(InUse);<br>
> - assert(isa<PHINode>(*UI->U->getUser()));<br>
> - UI->U = &Load->getOperandUse(Load->getPointerOperandIndex());<br>
> + if (!OffsetPtr) {<br>
> + if (!Int8Ptr) {<br>
> + Int8Ptr = IRB.CreateBitCast(Ptr, IRB.getInt8PtrTy(),<br>
> + Prefix + ".raw_cast");<br>
> + Int8PtrOffset = Offset;<br>
> }<br>
> - DEBUG(dbgs() << " speculated to: " << *NewPN << "\n");<br>
> +<br>
> + OffsetPtr = Int8PtrOffset == 0 ? Int8Ptr :<br>
> + IRB.CreateInBoundsGEP(Int8Ptr, IRB.getInt(Int8PtrOffset),<br>
> + Prefix + ".raw_idx");<br>
> }<br>
> + Ptr = OffsetPtr;<br>
><br>
> - /// Select instructions that use an alloca and are subsequently loaded can be<br>
> - /// rewritten to load both input pointers and then select between the result,<br>
> - /// allowing the load of the alloca to be promoted.<br>
> - /// From this:<br>
> - /// %P2 = select i1 %cond, i32* %Alloca, i32* %Other<br>
> - /// %V = load i32* %P2<br>
> - /// to:<br>
> - /// %V1 = load i32* %Alloca -> will be mem2reg'd<br>
> - /// %V2 = load i32* %Other<br>
> - /// %V = select i1 %cond, i32 %V1, i32 %V2<br>
> - ///<br>
> - /// We can do this to a select if its only uses are loads and if the operand<br>
> - /// to the select can be loaded unconditionally.<br>
> - bool isSafeSelectToSpeculate(SelectInst &SI,<br>
> - SmallVectorImpl<LoadInst *> &Loads) {<br>
> - Value *TValue = SI.getTrueValue();<br>
> - Value *FValue = SI.getFalseValue();<br>
> - bool TDerefable = TValue->isDereferenceablePointer();<br>
> - bool FDerefable = FValue->isDereferenceablePointer();<br>
> + // On the off chance we were targeting i8*, guard the bitcast here.<br>
> + if (Ptr->getType() != PointerTy)<br>
> + Ptr = IRB.CreateBitCast(Ptr, PointerTy, Prefix + ".cast");<br>
><br>
> - for (Value::use_iterator UI = SI.use_begin(), UE = SI.use_end();<br>
> - UI != UE; ++UI) {<br>
> - LoadInst *LI = dyn_cast<LoadInst>(*UI);<br>
> - if (LI == 0 || !LI->isSimple()) return false;<br>
> + return Ptr;<br>
> +}<br>
><br>
> - // Both operands to the select need to be dereferencable, either<br>
> - // absolutely (e.g. allocas) or at this point because we can see other<br>
> - // accesses to it.<br>
> - if (!TDerefable && !isSafeToLoadUnconditionally(TValue, LI,<br>
> - LI->getAlignment(), &TD))<br>
> - return false;<br>
> - if (!FDerefable && !isSafeToLoadUnconditionally(FValue, LI,<br>
> - LI->getAlignment(), &TD))<br>
> - return false;<br>
> - Loads.push_back(LI);<br>
> - }<br>
> +/// \brief Test whether the given alloca partition can be promoted to a vector.<br>
> +///<br>
> +/// This is a quick test to check whether we can rewrite a particular alloca<br>
> +/// partition (and its newly formed alloca) into a vector alloca with only<br>
> +/// whole-vector loads and stores such that it could be promoted to a vector<br>
> +/// SSA value. We only can ensure this for a limited set of operations, and we<br>
> +/// don't want to do the rewrites unless we are confident that the result will<br>
> +/// be promotable, so we have an early test here.<br>
> +static bool isVectorPromotionViable(const TargetData &TD,<br>
> + Type *AllocaTy,<br>
> + AllocaPartitioning &P,<br>
> + uint64_t PartitionBeginOffset,<br>
> + uint64_t PartitionEndOffset,<br>
> + AllocaPartitioning::const_use_iterator I,<br>
> + AllocaPartitioning::const_use_iterator E) {<br>
> + VectorType *Ty = dyn_cast<VectorType>(AllocaTy);<br>
> + if (!Ty)<br>
> + return false;<br>
><br>
> - return true;<br>
> - }<br>
> + uint64_t VecSize = TD.getTypeSizeInBits(Ty);<br>
> + uint64_t ElementSize = Ty->getScalarSizeInBits();<br>
><br>
> - void visitSelectInst(SelectInst &SI) {<br>
> - DEBUG(dbgs() << " original: " << SI << "\n");<br>
> - IRBuilder<> IRB(&SI);<br>
> + // While the definition of LLVM vectors is bitpacked, we don't support sizes<br>
> + // that aren't byte sized.<br>
> + if (ElementSize % 8)<br>
> + return false;<br>
> + assert((VecSize % 8) == 0 && "vector size not a multiple of element size?");<br>
> + VecSize /= 8;<br>
> + ElementSize /= 8;<br>
><br>
> - // If the select isn't safe to speculate, just use simple logic to emit it.<br>
> - SmallVector<LoadInst *, 4> Loads;<br>
> - if (!isSafeSelectToSpeculate(SI, Loads))<br>
> - return;<br>
> + for (; I != E; ++I) {<br>
> + if (!I->U)<br>
> + continue; // Skip dead use.<br>
><br>
> - Use *Ops[2] = { &SI.getOperandUse(1), &SI.getOperandUse(2) };<br>
> - AllocaPartitioning::iterator PIs[2];<br>
> - AllocaPartitioning::PartitionUse PUs[2];<br>
> - for (unsigned i = 0, e = 2; i != e; ++i) {<br>
> - PIs[i] = P.findPartitionForPHIOrSelectOperand(Ops[i]);<br>
> - if (PIs[i] != P.end()) {<br>
> - // If the pointer is within the partitioning, remove the select from<br>
> - // its uses. We'll add in the new loads below.<br>
> - AllocaPartitioning::use_iterator UI<br>
> - = P.findPartitionUseForPHIOrSelectOperand(Ops[i]);<br>
> - PUs[i] = *UI;<br>
> - // Clear out the use here so that the offsets into the use list remain<br>
> - // stable but this use is ignored when rewriting.<br>
> - UI->U = 0;<br>
> + uint64_t BeginOffset = I->BeginOffset - PartitionBeginOffset;<br>
> + uint64_t BeginIndex = BeginOffset / ElementSize;<br>
> + if (BeginIndex * ElementSize != BeginOffset ||<br>
> + BeginIndex >= Ty->getNumElements())<br>
> + return false;<br>
> + uint64_t EndOffset = I->EndOffset - PartitionBeginOffset;<br>
> + uint64_t EndIndex = EndOffset / ElementSize;<br>
> + if (EndIndex * ElementSize != EndOffset ||<br>
> + EndIndex > Ty->getNumElements())<br>
> + return false;<br>
> +<br>
> + // FIXME: We should build shuffle vector instructions to handle<br>
> + // non-element-sized accesses.<br>
> + if ((EndOffset - BeginOffset) != ElementSize &&<br>
> + (EndOffset - BeginOffset) != VecSize)<br>
> + return false;<br>
> +<br>
> + if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I->U->getUser())) {<br>
> + if (MI->isVolatile())<br>
> + return false;<br>
> + if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(I->U->getUser())) {<br>
> + const AllocaPartitioning::MemTransferOffsets &MTO<br>
> + = P.getMemTransferOffsets(*MTI);<br>
> + if (!MTO.IsSplittable)<br>
> + return false;<br>
> }<br>
> + } else if (I->U->get()->getType()->getPointerElementType()->isStructTy()) {<br>
> + // Disable vector promotion when there are loads or stores of an FCA.<br>
> + return false;<br>
> + } else if (!isa<LoadInst>(I->U->getUser()) &&<br>
> + !isa<StoreInst>(I->U->getUser())) {<br>
> + return false;<br>
> }<br>
> + }<br>
> + return true;<br>
> +}<br>
><br>
> - Value *TV = SI.getTrueValue();<br>
> - Value *FV = SI.getFalseValue();<br>
> - // Replace the loads of the select with a select of two loads.<br>
> - while (!Loads.empty()) {<br>
> - LoadInst *LI = Loads.pop_back_val();<br>
> -<br>
> - IRB.SetInsertPoint(LI);<br>
> - LoadInst *TL =<br>
> - IRB.CreateLoad(TV, LI->getName() + ".sroa.speculate.load.true");<br>
> - LoadInst *FL =<br>
> - IRB.CreateLoad(FV, LI->getName() + ".sroa.speculate.load.false");<br>
> - NumLoadsSpeculated += 2;<br>
> +/// \brief Test whether the given alloca partition can be promoted to an int.<br>
> +///<br>
> +/// This is a quick test to check whether we can rewrite a particular alloca<br>
> +/// partition (and its newly formed alloca) into an integer alloca suitable for<br>
> +/// promotion to an SSA value. We only can ensure this for a limited set of<br>
> +/// operations, and we don't want to do the rewrites unless we are confident<br>
> +/// that the result will be promotable, so we have an early test here.<br>
> +static bool isIntegerPromotionViable(const TargetData &TD,<br>
> + Type *AllocaTy,<br>
> + uint64_t AllocBeginOffset,<br>
> + AllocaPartitioning &P,<br>
> + AllocaPartitioning::const_use_iterator I,<br>
> + AllocaPartitioning::const_use_iterator E) {<br>
> + IntegerType *Ty = dyn_cast<IntegerType>(AllocaTy);<br>
> + if (!Ty || 8*TD.getTypeStoreSize(Ty) != Ty->getBitWidth())<br>
> + return false;<br>
><br>
> - // Transfer alignment and TBAA info if present.<br>
> - TL->setAlignment(LI->getAlignment());<br>
> - FL->setAlignment(LI->getAlignment());<br>
> - if (MDNode *Tag = LI->getMetadata(LLVMContext::MD_tbaa)) {<br>
> - TL->setMetadata(LLVMContext::MD_tbaa, Tag);<br>
> - FL->setMetadata(LLVMContext::MD_tbaa, Tag);<br>
> - }<br>
> + // Check the uses to ensure the uses are (likely) promoteable integer uses.<br>
> + // Also ensure that the alloca has a covering load or store. We don't want<br>
> + // promote because of some other unsplittable entry (which we may make<br>
> + // splittable later) and lose the ability to promote each element access.<br>
> + bool WholeAllocaOp = false;<br>
> + for (; I != E; ++I) {<br>
> + if (!I->U)<br>
> + continue; // Skip dead use.<br>
><br>
> - Value *V = IRB.CreateSelect(SI.getCondition(), TL, FL,<br>
> - LI->getName() + ".sroa.speculated");<br>
> + // We can't reasonably handle cases where the load or store extends past<br>
> + // the end of the aloca's type and into its padding.<br>
> + if ((I->EndOffset - AllocBeginOffset) > TD.getTypeStoreSize(Ty))<br>
> + return false;<br>
><br>
> - LoadInst *Loads[2] = { TL, FL };<br>
> - for (unsigned i = 0, e = 2; i != e; ++i) {<br>
> - if (PIs[i] != P.end()) {<br>
> - Use *LoadUse = &Loads[i]->getOperandUse(0);<br>
> - assert(PUs[i].U->get() == LoadUse->get());<br>
> - PUs[i].U = LoadUse;<br>
> - P.use_push_back(PIs[i], PUs[i]);<br>
> - }<br>
> + if (LoadInst *LI = dyn_cast<LoadInst>(I->U->getUser())) {<br>
> + if (LI->isVolatile() || !LI->getType()->isIntegerTy())<br>
> + return false;<br>
> + if (LI->getType() == Ty)<br>
> + WholeAllocaOp = true;<br>
> + } else if (StoreInst *SI = dyn_cast<StoreInst>(I->U->getUser())) {<br>
> + if (SI->isVolatile() || !SI->getValueOperand()->getType()->isIntegerTy())<br>
> + return false;<br>
> + if (SI->getValueOperand()->getType() == Ty)<br>
> + WholeAllocaOp = true;<br>
> + } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I->U->getUser())) {<br>
> + if (MI->isVolatile())<br>
> + return false;<br>
> + if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(I->U->getUser())) {<br>
> + const AllocaPartitioning::MemTransferOffsets &MTO<br>
> + = P.getMemTransferOffsets(*MTI);<br>
> + if (!MTO.IsSplittable)<br>
> + return false;<br>
> }<br>
> -<br>
> - DEBUG(dbgs() << " speculated to: " << *V << "\n");<br>
> - LI->replaceAllUsesWith(V);<br>
> - Pass.DeadInsts.push_back(LI);<br>
> + } else {<br>
> + return false;<br>
> }<br>
> }<br>
> -};<br>
> + return WholeAllocaOp;<br>
> +}<br>
><br>
> +namespace {<br>
> /// \brief Visitor to rewrite instructions using a partition of an alloca to<br>
> /// use a new alloca.<br>
> ///<br>
><br>
><br>
> _______________________________________________<br>
> llvm-commits mailing list<br>
> <a href="mailto:llvm-commits@cs.uiuc.edu" class="cremed">llvm-commits@cs.uiuc.edu</a><br>
> <a href="http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits" target="_blank" class="cremed">http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits</a><br>
_______________________________________________<br>
llvm-commits mailing list<br>
<a href="mailto:llvm-commits@cs.uiuc.edu" class="cremed">llvm-commits@cs.uiuc.edu</a><br>
<a href="http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits" target="_blank" class="cremed">http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits</a><br>
</div></div></blockquote></div><br></div>