[llvm-commits] [llvm] r165284 - /llvm/trunk/lib/Transforms/Scalar/SROA.cpp
Chandler Carruth
chandlerc at gmail.com
Thu Oct 4 23:13:25 PDT 2012
On Thu, Oct 4, 2012 at 6:52 PM, Sean Silva <silvas at purdue.edu> wrote:
> protip: pass --patience or --histogram to git's diff-generating
> commands (git diff, git log -p, etc.) to select alternative diff
> algorithms.
I'm well aware of this, but that does nothing for the commit mailing list
which is what my commit log was written for...
> I just tried it out locally for this patch and the diff is
> dramatically better (the diff is a single big block of + and a single
> big block of -). FWIW, I find that patience and histogram are usually
> basically the same (histogram is an extension of patience), but either
> of them is usually significantly better than the default.
>
> More info about patience diff, for the curious:
> http://bramcohen.livejournal.com/73318.html
> More info about histogram diff:
>
> http://download.eclipse.org/jgit/docs/jgit-2.0.0.201206130900-r/apidocs/org/eclipse/jgit/diff/HistogramDiff.html
>
> -- Sean Silva
>
> On Thu, Oct 4, 2012 at 9:29 PM, Chandler Carruth <chandlerc at gmail.com>
> wrote:
> > Author: chandlerc
> > Date: Thu Oct 4 20:29:06 2012
> > New Revision: 165284
> >
> > URL: http://llvm.org/viewvc/llvm-project?rev=165284&view=rev
> > Log:
> > Lift the speculation visitor above all the helpers that are targeted at
> > the rewrite visitor to make the fact that the speculation is completely
> > independent a bit more clear.
> >
> > I promise that this is just a cut/paste of the one visitor and adding
> > the annonymous namespace wrappings. The diff may look completely
> > preposterous, it does in git for some reason.
> >
> > Modified:
> > llvm/trunk/lib/Transforms/Scalar/SROA.cpp
> >
> > Modified: llvm/trunk/lib/Transforms/Scalar/SROA.cpp
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/SROA.cpp?rev=165284&r1=165283&r2=165284&view=diff
> >
> ==============================================================================
> > --- llvm/trunk/lib/Transforms/Scalar/SROA.cpp (original)
> > +++ llvm/trunk/lib/Transforms/Scalar/SROA.cpp Thu Oct 4 20:29:06 2012
> > @@ -1368,715 +1368,717 @@
> > INITIALIZE_PASS_END(SROA, "sroa", "Scalar Replacement Of Aggregates",
> > false, false)
> >
> > -/// \brief Accumulate the constant offsets in a GEP into a single APInt
> offset.
> > -///
> > -/// If the provided GEP is all-constant, the total byte offset formed
> by the
> > -/// GEP is computed and Offset is set to it. If the GEP has any
> non-constant
> > -/// operands, the function returns false and the value of Offset is
> unmodified.
> > -static bool accumulateGEPOffsets(const TargetData &TD, GEPOperator &GEP,
> > - APInt &Offset) {
> > - APInt GEPOffset(Offset.getBitWidth(), 0);
> > - for (gep_type_iterator GTI = gep_type_begin(GEP), GTE =
> gep_type_end(GEP);
> > - GTI != GTE; ++GTI) {
> > - ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
> > - if (!OpC)
> > - return false;
> > - if (OpC->isZero()) continue;
> > +namespace {
> > +/// \brief Visitor to speculate PHIs and Selects where possible.
> > +class PHIOrSelectSpeculator : public InstVisitor<PHIOrSelectSpeculator>
> {
> > + // Befriend the base class so it can delegate to private visit
> methods.
> > + friend class llvm::InstVisitor<PHIOrSelectSpeculator>;
> >
> > - // Handle a struct index, which adds its field offset to the
> pointer.
> > - if (StructType *STy = dyn_cast<StructType>(*GTI)) {
> > - unsigned ElementIdx = OpC->getZExtValue();
> > - const StructLayout *SL = TD.getStructLayout(STy);
> > - GEPOffset += APInt(Offset.getBitWidth(),
> > - SL->getElementOffset(ElementIdx));
> > - continue;
> > - }
> > + const TargetData &TD;
> > + AllocaPartitioning &P;
> > + SROA &Pass;
> >
> > - APInt TypeSize(Offset.getBitWidth(),
> > - TD.getTypeAllocSize(GTI.getIndexedType()));
> > - if (VectorType *VTy = dyn_cast<VectorType>(*GTI)) {
> > - assert((VTy->getScalarSizeInBits() % 8) == 0 &&
> > - "vector element size is not a multiple of 8, cannot GEP
> over it");
> > - TypeSize = VTy->getScalarSizeInBits() / 8;
> > - }
> > +public:
> > + PHIOrSelectSpeculator(const TargetData &TD, AllocaPartitioning &P,
> SROA &Pass)
> > + : TD(TD), P(P), Pass(Pass) {}
> >
> > - GEPOffset += OpC->getValue().sextOrTrunc(Offset.getBitWidth()) *
> TypeSize;
> > + /// \brief Visit the users of an alloca partition and rewrite them.
> > + void visitUsers(AllocaPartitioning::const_iterator PI) {
> > + // Note that we need to use an index here as the underlying vector
> of uses
> > + // may be grown during speculation. However, we never need to
> re-visit the
> > + // new uses, and so we can use the initial size bound.
> > + for (unsigned Idx = 0, Size = P.use_size(PI); Idx != Size; ++Idx) {
> > + const AllocaPartitioning::PartitionUse &PU = P.getUse(PI, Idx);
> > + if (!PU.U)
> > + continue; // Skip dead use.
> > +
> > + visit(cast<Instruction>(PU.U->getUser()));
> > + }
> > }
> > - Offset = GEPOffset;
> > - return true;
> > -}
> >
> > -/// \brief Build a GEP out of a base pointer and indices.
> > -///
> > -/// This will return the BasePtr if that is valid, or build a new GEP
> > -/// instruction using the IRBuilder if GEP-ing is needed.
> > -static Value *buildGEP(IRBuilder<> &IRB, Value *BasePtr,
> > - SmallVectorImpl<Value *> &Indices,
> > - const Twine &Prefix) {
> > - if (Indices.empty())
> > - return BasePtr;
> > +private:
> > + // By default, skip this instruction.
> > + void visitInstruction(Instruction &I) {}
> >
> > - // A single zero index is a no-op, so check for this and avoid
> building a GEP
> > - // in that case.
> > - if (Indices.size() == 1 &&
> cast<ConstantInt>(Indices.back())->isZero())
> > - return BasePtr;
> > + /// PHI instructions that use an alloca and are subsequently loaded
> can be
> > + /// rewritten to load both input pointers in the pred blocks and then
> PHI the
> > + /// results, allowing the load of the alloca to be promoted.
> > + /// From this:
> > + /// %P2 = phi [i32* %Alloca, i32* %Other]
> > + /// %V = load i32* %P2
> > + /// to:
> > + /// %V1 = load i32* %Alloca -> will be mem2reg'd
> > + /// ...
> > + /// %V2 = load i32* %Other
> > + /// ...
> > + /// %V = phi [i32 %V1, i32 %V2]
> > + ///
> > + /// We can do this to a select if its only uses are loads and if the
> operands
> > + /// to the select can be loaded unconditionally.
> > + ///
> > + /// FIXME: This should be hoisted into a generic utility, likely in
> > + /// Transforms/Util/Local.h
> > + bool isSafePHIToSpeculate(PHINode &PN, SmallVectorImpl<LoadInst *>
> &Loads) {
> > + // For now, we can only do this promotion if the load is in the
> same block
> > + // as the PHI, and if there are no stores between the phi and load.
> > + // TODO: Allow recursive phi users.
> > + // TODO: Allow stores.
> > + BasicBlock *BB = PN.getParent();
> > + unsigned MaxAlign = 0;
> > + for (Value::use_iterator UI = PN.use_begin(), UE = PN.use_end();
> > + UI != UE; ++UI) {
> > + LoadInst *LI = dyn_cast<LoadInst>(*UI);
> > + if (LI == 0 || !LI->isSimple()) return false;
> >
> > - return IRB.CreateInBoundsGEP(BasePtr, Indices, Prefix + ".idx");
> > -}
> > + // For now we only allow loads in the same block as the PHI.
> This is
> > + // a common case that happens when instcombine merges two loads
> through
> > + // a PHI.
> > + if (LI->getParent() != BB) return false;
> >
> > -/// \brief Get a natural GEP off of the BasePtr walking through Ty
> toward
> > -/// TargetTy without changing the offset of the pointer.
> > -///
> > -/// This routine assumes we've already established a properly offset
> GEP with
> > -/// Indices, and arrived at the Ty type. The goal is to continue to GEP
> with
> > -/// zero-indices down through type layers until we find one the same as
> > -/// TargetTy. If we can't find one with the same type, we at least try
> to use
> > -/// one with the same size. If none of that works, we just produce the
> GEP as
> > -/// indicated by Indices to have the correct offset.
> > -static Value *getNaturalGEPWithType(IRBuilder<> &IRB, const TargetData
> &TD,
> > - Value *BasePtr, Type *Ty, Type
> *TargetTy,
> > - SmallVectorImpl<Value *> &Indices,
> > - const Twine &Prefix) {
> > - if (Ty == TargetTy)
> > - return buildGEP(IRB, BasePtr, Indices, Prefix);
> > + // Ensure that there are no instructions between the PHI and the
> load that
> > + // could store.
> > + for (BasicBlock::iterator BBI = &PN; &*BBI != LI; ++BBI)
> > + if (BBI->mayWriteToMemory())
> > + return false;
> >
> > - // See if we can descend into a struct and locate a field with the
> correct
> > - // type.
> > - unsigned NumLayers = 0;
> > - Type *ElementTy = Ty;
> > - do {
> > - if (ElementTy->isPointerTy())
> > - break;
> > - if (SequentialType *SeqTy = dyn_cast<SequentialType>(ElementTy)) {
> > - ElementTy = SeqTy->getElementType();
> > - Indices.push_back(IRB.getInt(APInt(TD.getPointerSizeInBits(),
> 0)));
> > - } else if (StructType *STy = dyn_cast<StructType>(ElementTy)) {
> > - ElementTy = *STy->element_begin();
> > - Indices.push_back(IRB.getInt32(0));
> > - } else {
> > - break;
> > + MaxAlign = std::max(MaxAlign, LI->getAlignment());
> > + Loads.push_back(LI);
> > }
> > - ++NumLayers;
> > - } while (ElementTy != TargetTy);
> > - if (ElementTy != TargetTy)
> > - Indices.erase(Indices.end() - NumLayers, Indices.end());
> >
> > - return buildGEP(IRB, BasePtr, Indices, Prefix);
> > -}
> > + // We can only transform this if it is safe to push the loads into
> the
> > + // predecessor blocks. The only thing to watch out for is that we
> can't put
> > + // a possibly trapping load in the predecessor if it is a critical
> edge.
> > + for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num;
> > + ++Idx) {
> > + TerminatorInst *TI = PN.getIncomingBlock(Idx)->getTerminator();
> > + Value *InVal = PN.getIncomingValue(Idx);
> >
> > -/// \brief Recursively compute indices for a natural GEP.
> > -///
> > -/// This is the recursive step for getNaturalGEPWithOffset that walks
> down the
> > -/// element types adding appropriate indices for the GEP.
> > -static Value *getNaturalGEPRecursively(IRBuilder<> &IRB, const
> TargetData &TD,
> > - Value *Ptr, Type *Ty, APInt
> &Offset,
> > - Type *TargetTy,
> > - SmallVectorImpl<Value *>
> &Indices,
> > - const Twine &Prefix) {
> > - if (Offset == 0)
> > - return getNaturalGEPWithType(IRB, TD, Ptr, Ty, TargetTy, Indices,
> Prefix);
> > + // If the value is produced by the terminator of the predecessor
> (an
> > + // invoke) or it has side-effects, there is no valid place to put
> a load
> > + // in the predecessor.
> > + if (TI == InVal || TI->mayHaveSideEffects())
> > + return false;
> >
> > - // We can't recurse through pointer types.
> > - if (Ty->isPointerTy())
> > - return 0;
> > + // If the predecessor has a single successor, then the edge isn't
> > + // critical.
> > + if (TI->getNumSuccessors() == 1)
> > + continue;
> >
> > - // We try to analyze GEPs over vectors here, but note that these GEPs
> are
> > - // extremely poorly defined currently. The long-term goal is to
> remove GEPing
> > - // over a vector from the IR completely.
> > - if (VectorType *VecTy = dyn_cast<VectorType>(Ty)) {
> > - unsigned ElementSizeInBits = VecTy->getScalarSizeInBits();
> > - if (ElementSizeInBits % 8)
> > - return 0; // GEPs over non-multiple of 8 size vector elements are
> invalid.
> > - APInt ElementSize(Offset.getBitWidth(), ElementSizeInBits / 8);
> > - APInt NumSkippedElements = Offset.udiv(ElementSize);
> > - if (NumSkippedElements.ugt(VecTy->getNumElements()))
> > - return 0;
> > - Offset -= NumSkippedElements * ElementSize;
> > - Indices.push_back(IRB.getInt(NumSkippedElements));
> > - return getNaturalGEPRecursively(IRB, TD, Ptr,
> VecTy->getElementType(),
> > - Offset, TargetTy, Indices, Prefix);
> > - }
> > + // If this pointer is always safe to load, or if we can prove
> that there
> > + // is already a load in the block, then we can move the load to
> the pred
> > + // block.
> > + if (InVal->isDereferenceablePointer() ||
> > + isSafeToLoadUnconditionally(InVal, TI, MaxAlign, &TD))
> > + continue;
> >
> > - if (ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) {
> > - Type *ElementTy = ArrTy->getElementType();
> > - APInt ElementSize(Offset.getBitWidth(),
> TD.getTypeAllocSize(ElementTy));
> > - APInt NumSkippedElements = Offset.udiv(ElementSize);
> > - if (NumSkippedElements.ugt(ArrTy->getNumElements()))
> > - return 0;
> > + return false;
> > + }
> >
> > - Offset -= NumSkippedElements * ElementSize;
> > - Indices.push_back(IRB.getInt(NumSkippedElements));
> > - return getNaturalGEPRecursively(IRB, TD, Ptr, ElementTy, Offset,
> TargetTy,
> > - Indices, Prefix);
> > + return true;
> > }
> >
> > - StructType *STy = dyn_cast<StructType>(Ty);
> > - if (!STy)
> > - return 0;
> > + void visitPHINode(PHINode &PN) {
> > + DEBUG(dbgs() << " original: " << PN << "\n");
> >
> > - const StructLayout *SL = TD.getStructLayout(STy);
> > - uint64_t StructOffset = Offset.getZExtValue();
> > - if (StructOffset >= SL->getSizeInBytes())
> > - return 0;
> > - unsigned Index = SL->getElementContainingOffset(StructOffset);
> > - Offset -= APInt(Offset.getBitWidth(), SL->getElementOffset(Index));
> > - Type *ElementTy = STy->getElementType(Index);
> > - if (Offset.uge(TD.getTypeAllocSize(ElementTy)))
> > - return 0; // The offset points into alignment padding.
> > + SmallVector<LoadInst *, 4> Loads;
> > + if (!isSafePHIToSpeculate(PN, Loads))
> > + return;
> >
> > - Indices.push_back(IRB.getInt32(Index));
> > - return getNaturalGEPRecursively(IRB, TD, Ptr, ElementTy, Offset,
> TargetTy,
> > - Indices, Prefix);
> > -}
> > + assert(!Loads.empty());
> >
> > -/// \brief Get a natural GEP from a base pointer to a particular offset
> and
> > -/// resulting in a particular type.
> > -///
> > -/// The goal is to produce a "natural" looking GEP that works with the
> existing
> > -/// composite types to arrive at the appropriate offset and element
> type for
> > -/// a pointer. TargetTy is the element type the returned GEP should
> point-to if
> > -/// possible. We recurse by decreasing Offset, adding the appropriate
> index to
> > -/// Indices, and setting Ty to the result subtype.
> > -///
> > -/// If no natural GEP can be constructed, this function returns null.
> > -static Value *getNaturalGEPWithOffset(IRBuilder<> &IRB, const
> TargetData &TD,
> > - Value *Ptr, APInt Offset, Type
> *TargetTy,
> > - SmallVectorImpl<Value *> &Indices,
> > - const Twine &Prefix) {
> > - PointerType *Ty = cast<PointerType>(Ptr->getType());
> > + Type *LoadTy = cast<PointerType>(PN.getType())->getElementType();
> > + IRBuilder<> PHIBuilder(&PN);
> > + PHINode *NewPN = PHIBuilder.CreatePHI(LoadTy,
> PN.getNumIncomingValues(),
> > + PN.getName() +
> ".sroa.speculated");
> >
> > - // Don't consider any GEPs through an i8* as natural unless the
> TargetTy is
> > - // an i8.
> > - if (Ty == IRB.getInt8PtrTy() && TargetTy->isIntegerTy(8))
> > - return 0;
> > -
> > - Type *ElementTy = Ty->getElementType();
> > - if (!ElementTy->isSized())
> > - return 0; // We can't GEP through an unsized element.
> > - APInt ElementSize(Offset.getBitWidth(),
> TD.getTypeAllocSize(ElementTy));
> > - if (ElementSize == 0)
> > - return 0; // Zero-length arrays can't help us build a natural GEP.
> > - APInt NumSkippedElements = Offset.udiv(ElementSize);
> > -
> > - Offset -= NumSkippedElements * ElementSize;
> > - Indices.push_back(IRB.getInt(NumSkippedElements));
> > - return getNaturalGEPRecursively(IRB, TD, Ptr, ElementTy, Offset,
> TargetTy,
> > - Indices, Prefix);
> > -}
> > + // Get the TBAA tag and alignment to use from one of the loads. It
> doesn't
> > + // matter which one we get and if any differ, it doesn't matter.
> > + LoadInst *SomeLoad = cast<LoadInst>(Loads.back());
> > + MDNode *TBAATag = SomeLoad->getMetadata(LLVMContext::MD_tbaa);
> > + unsigned Align = SomeLoad->getAlignment();
> >
> > -/// \brief Compute an adjusted pointer from Ptr by Offset bytes where
> the
> > -/// resulting pointer has PointerTy.
> > -///
> > -/// This tries very hard to compute a "natural" GEP which arrives at
> the offset
> > -/// and produces the pointer type desired. Where it cannot, it will try
> to use
> > -/// the natural GEP to arrive at the offset and bitcast to the type.
> Where that
> > -/// fails, it will try to use an existing i8* and GEP to the byte
> offset and
> > -/// bitcast to the type.
> > -///
> > -/// The strategy for finding the more natural GEPs is to peel off
> layers of the
> > -/// pointer, walking back through bit casts and GEPs, searching for a
> base
> > -/// pointer from which we can compute a natural GEP with the desired
> > -/// properities. The algorithm tries to fold as many constant indices
> into
> > -/// a single GEP as possible, thus making each GEP more independent of
> the
> > -/// surrounding code.
> > -static Value *getAdjustedPtr(IRBuilder<> &IRB, const TargetData &TD,
> > - Value *Ptr, APInt Offset, Type *PointerTy,
> > - const Twine &Prefix) {
> > - // Even though we don't look through PHI nodes, we could be called on
> an
> > - // instruction in an unreachable block, which may be on a cycle.
> > - SmallPtrSet<Value *, 4> Visited;
> > - Visited.insert(Ptr);
> > - SmallVector<Value *, 4> Indices;
> > + // Rewrite all loads of the PN to use the new PHI.
> > + do {
> > + LoadInst *LI = Loads.pop_back_val();
> > + LI->replaceAllUsesWith(NewPN);
> > + Pass.DeadInsts.push_back(LI);
> > + } while (!Loads.empty());
> >
> > - // We may end up computing an offset pointer that has the wrong type.
> If we
> > - // never are able to compute one directly that has the correct type,
> we'll
> > - // fall back to it, so keep it around here.
> > - Value *OffsetPtr = 0;
> > + // Inject loads into all of the pred blocks.
> > + for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num;
> ++Idx) {
> > + BasicBlock *Pred = PN.getIncomingBlock(Idx);
> > + TerminatorInst *TI = Pred->getTerminator();
> > + Use *InUse =
> &PN.getOperandUse(PN.getOperandNumForIncomingValue(Idx));
> > + Value *InVal = PN.getIncomingValue(Idx);
> > + IRBuilder<> PredBuilder(TI);
> >
> > - // Remember any i8 pointer we come across to re-use if we need to do
> a raw
> > - // byte offset.
> > - Value *Int8Ptr = 0;
> > - APInt Int8PtrOffset(Offset.getBitWidth(), 0);
> > + LoadInst *Load
> > + = PredBuilder.CreateLoad(InVal, (PN.getName() +
> ".sroa.speculate.load." +
> > + Pred->getName()));
> > + ++NumLoadsSpeculated;
> > + Load->setAlignment(Align);
> > + if (TBAATag)
> > + Load->setMetadata(LLVMContext::MD_tbaa, TBAATag);
> > + NewPN->addIncoming(Load, Pred);
> >
> > - Type *TargetTy = PointerTy->getPointerElementType();
> > + Instruction *Ptr = dyn_cast<Instruction>(InVal);
> > + if (!Ptr)
> > + // No uses to rewrite.
> > + continue;
> >
> > - do {
> > - // First fold any existing GEPs into the offset.
> > - while (GEPOperator *GEP = dyn_cast<GEPOperator>(Ptr)) {
> > - APInt GEPOffset(Offset.getBitWidth(), 0);
> > - if (!accumulateGEPOffsets(TD, *GEP, GEPOffset))
> > - break;
> > - Offset += GEPOffset;
> > - Ptr = GEP->getPointerOperand();
> > - if (!Visited.insert(Ptr))
> > - break;
> > - }
> > + // Try to lookup and rewrite any partition uses corresponding to
> this phi
> > + // input.
> > + AllocaPartitioning::iterator PI
> > + = P.findPartitionForPHIOrSelectOperand(InUse);
> > + if (PI == P.end())
> > + continue;
> >
> > - // See if we can perform a natural GEP here.
> > - Indices.clear();
> > - if (Value *P = getNaturalGEPWithOffset(IRB, TD, Ptr, Offset,
> TargetTy,
> > - Indices, Prefix)) {
> > - if (P->getType() == PointerTy) {
> > - // Zap any offset pointer that we ended up computing in
> previous rounds.
> > - if (OffsetPtr && OffsetPtr->use_empty())
> > - if (Instruction *I = dyn_cast<Instruction>(OffsetPtr))
> > - I->eraseFromParent();
> > - return P;
> > - }
> > - if (!OffsetPtr) {
> > - OffsetPtr = P;
> > - }
> > + // Replace the Use in the PartitionUse for this operand with the
> Use
> > + // inside the load.
> > + AllocaPartitioning::use_iterator UI
> > + = P.findPartitionUseForPHIOrSelectOperand(InUse);
> > + assert(isa<PHINode>(*UI->U->getUser()));
> > + UI->U = &Load->getOperandUse(Load->getPointerOperandIndex());
> > }
> > + DEBUG(dbgs() << " speculated to: " << *NewPN << "\n");
> > + }
> >
> > - // Stash this pointer if we've found an i8*.
> > - if (Ptr->getType()->isIntegerTy(8)) {
> > - Int8Ptr = Ptr;
> > - Int8PtrOffset = Offset;
> > - }
> > + /// Select instructions that use an alloca and are subsequently
> loaded can be
> > + /// rewritten to load both input pointers and then select between the
> result,
> > + /// allowing the load of the alloca to be promoted.
> > + /// From this:
> > + /// %P2 = select i1 %cond, i32* %Alloca, i32* %Other
> > + /// %V = load i32* %P2
> > + /// to:
> > + /// %V1 = load i32* %Alloca -> will be mem2reg'd
> > + /// %V2 = load i32* %Other
> > + /// %V = select i1 %cond, i32 %V1, i32 %V2
> > + ///
> > + /// We can do this to a select if its only uses are loads and if the
> operand
> > + /// to the select can be loaded unconditionally.
> > + bool isSafeSelectToSpeculate(SelectInst &SI,
> > + SmallVectorImpl<LoadInst *> &Loads) {
> > + Value *TValue = SI.getTrueValue();
> > + Value *FValue = SI.getFalseValue();
> > + bool TDerefable = TValue->isDereferenceablePointer();
> > + bool FDerefable = FValue->isDereferenceablePointer();
> >
> > - // Peel off a layer of the pointer and update the offset
> appropriately.
> > - if (Operator::getOpcode(Ptr) == Instruction::BitCast) {
> > - Ptr = cast<Operator>(Ptr)->getOperand(0);
> > - } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(Ptr)) {
> > - if (GA->mayBeOverridden())
> > - break;
> > - Ptr = GA->getAliasee();
> > - } else {
> > - break;
> > - }
> > - assert(Ptr->getType()->isPointerTy() && "Unexpected operand type!");
> > - } while (Visited.insert(Ptr));
> > + for (Value::use_iterator UI = SI.use_begin(), UE = SI.use_end();
> > + UI != UE; ++UI) {
> > + LoadInst *LI = dyn_cast<LoadInst>(*UI);
> > + if (LI == 0 || !LI->isSimple()) return false;
> >
> > - if (!OffsetPtr) {
> > - if (!Int8Ptr) {
> > - Int8Ptr = IRB.CreateBitCast(Ptr, IRB.getInt8PtrTy(),
> > - Prefix + ".raw_cast");
> > - Int8PtrOffset = Offset;
> > + // Both operands to the select need to be dereferencable, either
> > + // absolutely (e.g. allocas) or at this point because we can see
> other
> > + // accesses to it.
> > + if (!TDerefable && !isSafeToLoadUnconditionally(TValue, LI,
> > +
> LI->getAlignment(), &TD))
> > + return false;
> > + if (!FDerefable && !isSafeToLoadUnconditionally(FValue, LI,
> > +
> LI->getAlignment(), &TD))
> > + return false;
> > + Loads.push_back(LI);
> > }
> >
> > - OffsetPtr = Int8PtrOffset == 0 ? Int8Ptr :
> > - IRB.CreateInBoundsGEP(Int8Ptr, IRB.getInt(Int8PtrOffset),
> > - Prefix + ".raw_idx");
> > + return true;
> > }
> > - Ptr = OffsetPtr;
> >
> > - // On the off chance we were targeting i8*, guard the bitcast here.
> > - if (Ptr->getType() != PointerTy)
> > - Ptr = IRB.CreateBitCast(Ptr, PointerTy, Prefix + ".cast");
> > + void visitSelectInst(SelectInst &SI) {
> > + DEBUG(dbgs() << " original: " << SI << "\n");
> > + IRBuilder<> IRB(&SI);
> >
> > - return Ptr;
> > -}
> > + // If the select isn't safe to speculate, just use simple logic to
> emit it.
> > + SmallVector<LoadInst *, 4> Loads;
> > + if (!isSafeSelectToSpeculate(SI, Loads))
> > + return;
> >
> > -/// \brief Test whether the given alloca partition can be promoted to a
> vector.
> > -///
> > -/// This is a quick test to check whether we can rewrite a particular
> alloca
> > -/// partition (and its newly formed alloca) into a vector alloca with
> only
> > -/// whole-vector loads and stores such that it could be promoted to a
> vector
> > -/// SSA value. We only can ensure this for a limited set of operations,
> and we
> > -/// don't want to do the rewrites unless we are confident that the
> result will
> > -/// be promotable, so we have an early test here.
> > -static bool isVectorPromotionViable(const TargetData &TD,
> > - Type *AllocaTy,
> > - AllocaPartitioning &P,
> > - uint64_t PartitionBeginOffset,
> > - uint64_t PartitionEndOffset,
> > -
> AllocaPartitioning::const_use_iterator I,
> > -
> AllocaPartitioning::const_use_iterator E) {
> > - VectorType *Ty = dyn_cast<VectorType>(AllocaTy);
> > - if (!Ty)
> > - return false;
> > + Use *Ops[2] = { &SI.getOperandUse(1), &SI.getOperandUse(2) };
> > + AllocaPartitioning::iterator PIs[2];
> > + AllocaPartitioning::PartitionUse PUs[2];
> > + for (unsigned i = 0, e = 2; i != e; ++i) {
> > + PIs[i] = P.findPartitionForPHIOrSelectOperand(Ops[i]);
> > + if (PIs[i] != P.end()) {
> > + // If the pointer is within the partitioning, remove the select
> from
> > + // its uses. We'll add in the new loads below.
> > + AllocaPartitioning::use_iterator UI
> > + = P.findPartitionUseForPHIOrSelectOperand(Ops[i]);
> > + PUs[i] = *UI;
> > + // Clear out the use here so that the offsets into the use list
> remain
> > + // stable but this use is ignored when rewriting.
> > + UI->U = 0;
> > + }
> > + }
> >
> > - uint64_t VecSize = TD.getTypeSizeInBits(Ty);
> > - uint64_t ElementSize = Ty->getScalarSizeInBits();
> > + Value *TV = SI.getTrueValue();
> > + Value *FV = SI.getFalseValue();
> > + // Replace the loads of the select with a select of two loads.
> > + while (!Loads.empty()) {
> > + LoadInst *LI = Loads.pop_back_val();
> >
> > - // While the definition of LLVM vectors is bitpacked, we don't
> support sizes
> > - // that aren't byte sized.
> > - if (ElementSize % 8)
> > - return false;
> > - assert((VecSize % 8) == 0 && "vector size not a multiple of element
> size?");
> > - VecSize /= 8;
> > - ElementSize /= 8;
> > + IRB.SetInsertPoint(LI);
> > + LoadInst *TL =
> > + IRB.CreateLoad(TV, LI->getName() + ".sroa.speculate.load.true");
> > + LoadInst *FL =
> > + IRB.CreateLoad(FV, LI->getName() +
> ".sroa.speculate.load.false");
> > + NumLoadsSpeculated += 2;
> >
> > - for (; I != E; ++I) {
> > - if (!I->U)
> > - continue; // Skip dead use.
> > + // Transfer alignment and TBAA info if present.
> > + TL->setAlignment(LI->getAlignment());
> > + FL->setAlignment(LI->getAlignment());
> > + if (MDNode *Tag = LI->getMetadata(LLVMContext::MD_tbaa)) {
> > + TL->setMetadata(LLVMContext::MD_tbaa, Tag);
> > + FL->setMetadata(LLVMContext::MD_tbaa, Tag);
> > + }
> >
> > - uint64_t BeginOffset = I->BeginOffset - PartitionBeginOffset;
> > - uint64_t BeginIndex = BeginOffset / ElementSize;
> > - if (BeginIndex * ElementSize != BeginOffset ||
> > - BeginIndex >= Ty->getNumElements())
> > - return false;
> > - uint64_t EndOffset = I->EndOffset - PartitionBeginOffset;
> > - uint64_t EndIndex = EndOffset / ElementSize;
> > - if (EndIndex * ElementSize != EndOffset ||
> > - EndIndex > Ty->getNumElements())
> > - return false;
> > -
> > - // FIXME: We should build shuffle vector instructions to handle
> > - // non-element-sized accesses.
> > - if ((EndOffset - BeginOffset) != ElementSize &&
> > - (EndOffset - BeginOffset) != VecSize)
> > - return false;
> > + Value *V = IRB.CreateSelect(SI.getCondition(), TL, FL,
> > + LI->getName() + ".sroa.speculated");
> >
> > - if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I->U->getUser())) {
> > - if (MI->isVolatile())
> > - return false;
> > - if (MemTransferInst *MTI =
> dyn_cast<MemTransferInst>(I->U->getUser())) {
> > - const AllocaPartitioning::MemTransferOffsets &MTO
> > - = P.getMemTransferOffsets(*MTI);
> > - if (!MTO.IsSplittable)
> > - return false;
> > + LoadInst *Loads[2] = { TL, FL };
> > + for (unsigned i = 0, e = 2; i != e; ++i) {
> > + if (PIs[i] != P.end()) {
> > + Use *LoadUse = &Loads[i]->getOperandUse(0);
> > + assert(PUs[i].U->get() == LoadUse->get());
> > + PUs[i].U = LoadUse;
> > + P.use_push_back(PIs[i], PUs[i]);
> > + }
> > }
> > - } else if
> (I->U->get()->getType()->getPointerElementType()->isStructTy()) {
> > - // Disable vector promotion when there are loads or stores of an
> FCA.
> > - return false;
> > - } else if (!isa<LoadInst>(I->U->getUser()) &&
> > - !isa<StoreInst>(I->U->getUser())) {
> > - return false;
> > +
> > + DEBUG(dbgs() << " speculated to: " << *V << "\n");
> > + LI->replaceAllUsesWith(V);
> > + Pass.DeadInsts.push_back(LI);
> > }
> > }
> > - return true;
> > +};
> > }
> >
> > -/// \brief Test whether the given alloca partition can be promoted to
> an int.
> > +/// \brief Accumulate the constant offsets in a GEP into a single APInt
> offset.
> > ///
> > -/// This is a quick test to check whether we can rewrite a particular
> alloca
> > -/// partition (and its newly formed alloca) into an integer alloca
> suitable for
> > -/// promotion to an SSA value. We only can ensure this for a limited
> set of
> > -/// operations, and we don't want to do the rewrites unless we are
> confident
> > -/// that the result will be promotable, so we have an early test here.
> > -static bool isIntegerPromotionViable(const TargetData &TD,
> > - Type *AllocaTy,
> > - uint64_t AllocBeginOffset,
> > - AllocaPartitioning &P,
> > -
> AllocaPartitioning::const_use_iterator I,
> > -
> AllocaPartitioning::const_use_iterator E) {
> > - IntegerType *Ty = dyn_cast<IntegerType>(AllocaTy);
> > - if (!Ty || 8*TD.getTypeStoreSize(Ty) != Ty->getBitWidth())
> > - return false;
> > -
> > - // Check the uses to ensure the uses are (likely) promoteable integer
> uses.
> > - // Also ensure that the alloca has a covering load or store. We don't
> want
> > - // promote because of some other unsplittable entry (which we may make
> > - // splittable later) and lose the ability to promote each element
> access.
> > - bool WholeAllocaOp = false;
> > - for (; I != E; ++I) {
> > - if (!I->U)
> > - continue; // Skip dead use.
> > -
> > - // We can't reasonably handle cases where the load or store extends
> past
> > - // the end of the aloca's type and into its padding.
> > - if ((I->EndOffset - AllocBeginOffset) > TD.getTypeStoreSize(Ty))
> > +/// If the provided GEP is all-constant, the total byte offset formed
> by the
> > +/// GEP is computed and Offset is set to it. If the GEP has any
> non-constant
> > +/// operands, the function returns false and the value of Offset is
> unmodified.
> > +static bool accumulateGEPOffsets(const TargetData &TD, GEPOperator &GEP,
> > + APInt &Offset) {
> > + APInt GEPOffset(Offset.getBitWidth(), 0);
> > + for (gep_type_iterator GTI = gep_type_begin(GEP), GTE =
> gep_type_end(GEP);
> > + GTI != GTE; ++GTI) {
> > + ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
> > + if (!OpC)
> > return false;
> > + if (OpC->isZero()) continue;
> >
> > - if (LoadInst *LI = dyn_cast<LoadInst>(I->U->getUser())) {
> > - if (LI->isVolatile() || !LI->getType()->isIntegerTy())
> > - return false;
> > - if (LI->getType() == Ty)
> > - WholeAllocaOp = true;
> > - } else if (StoreInst *SI = dyn_cast<StoreInst>(I->U->getUser())) {
> > - if (SI->isVolatile() ||
> !SI->getValueOperand()->getType()->isIntegerTy())
> > - return false;
> > - if (SI->getValueOperand()->getType() == Ty)
> > - WholeAllocaOp = true;
> > - } else if (MemIntrinsic *MI =
> dyn_cast<MemIntrinsic>(I->U->getUser())) {
> > - if (MI->isVolatile())
> > - return false;
> > - if (MemTransferInst *MTI =
> dyn_cast<MemTransferInst>(I->U->getUser())) {
> > - const AllocaPartitioning::MemTransferOffsets &MTO
> > - = P.getMemTransferOffsets(*MTI);
> > - if (!MTO.IsSplittable)
> > - return false;
> > - }
> > - } else {
> > - return false;
> > + // Handle a struct index, which adds its field offset to the
> pointer.
> > + if (StructType *STy = dyn_cast<StructType>(*GTI)) {
> > + unsigned ElementIdx = OpC->getZExtValue();
> > + const StructLayout *SL = TD.getStructLayout(STy);
> > + GEPOffset += APInt(Offset.getBitWidth(),
> > + SL->getElementOffset(ElementIdx));
> > + continue;
> > }
> > +
> > + APInt TypeSize(Offset.getBitWidth(),
> > + TD.getTypeAllocSize(GTI.getIndexedType()));
> > + if (VectorType *VTy = dyn_cast<VectorType>(*GTI)) {
> > + assert((VTy->getScalarSizeInBits() % 8) == 0 &&
> > + "vector element size is not a multiple of 8, cannot GEP
> over it");
> > + TypeSize = VTy->getScalarSizeInBits() / 8;
> > + }
> > +
> > + GEPOffset += OpC->getValue().sextOrTrunc(Offset.getBitWidth()) *
> TypeSize;
> > }
> > - return WholeAllocaOp;
> > + Offset = GEPOffset;
> > + return true;
> > }
> >
> > -namespace {
> > -/// \brief Visitor to speculate PHIs and Selects where possible.
> > -class PHIOrSelectSpeculator : public InstVisitor<PHIOrSelectSpeculator>
> {
> > - // Befriend the base class so it can delegate to private visit
> methods.
> > - friend class llvm::InstVisitor<PHIOrSelectSpeculator>;
> > +/// \brief Build a GEP out of a base pointer and indices.
> > +///
> > +/// This will return the BasePtr if that is valid, or build a new GEP
> > +/// instruction using the IRBuilder if GEP-ing is needed.
> > +static Value *buildGEP(IRBuilder<> &IRB, Value *BasePtr,
> > + SmallVectorImpl<Value *> &Indices,
> > + const Twine &Prefix) {
> > + if (Indices.empty())
> > + return BasePtr;
> >
> > - const TargetData &TD;
> > - AllocaPartitioning &P;
> > - SROA &Pass;
> > + // A single zero index is a no-op, so check for this and avoid
> building a GEP
> > + // in that case.
> > + if (Indices.size() == 1 &&
> cast<ConstantInt>(Indices.back())->isZero())
> > + return BasePtr;
> >
> > -public:
> > - PHIOrSelectSpeculator(const TargetData &TD, AllocaPartitioning &P,
> SROA &Pass)
> > - : TD(TD), P(P), Pass(Pass) {}
> > + return IRB.CreateInBoundsGEP(BasePtr, Indices, Prefix + ".idx");
> > +}
> >
> > - /// \brief Visit the users of an alloca partition and rewrite them.
> > - void visitUsers(AllocaPartitioning::const_iterator PI) {
> > - // Note that we need to use an index here as the underlying vector
> of uses
> > - // may be grown during speculation. However, we never need to
> re-visit the
> > - // new uses, and so we can use the initial size bound.
> > - for (unsigned Idx = 0, Size = P.use_size(PI); Idx != Size; ++Idx) {
> > - const AllocaPartitioning::PartitionUse &PU = P.getUse(PI, Idx);
> > - if (!PU.U)
> > - continue; // Skip dead use.
> > +/// \brief Get a natural GEP off of the BasePtr walking through Ty
> toward
> > +/// TargetTy without changing the offset of the pointer.
> > +///
> > +/// This routine assumes we've already established a properly offset
> GEP with
> > +/// Indices, and arrived at the Ty type. The goal is to continue to GEP
> with
> > +/// zero-indices down through type layers until we find one the same as
> > +/// TargetTy. If we can't find one with the same type, we at least try
> to use
> > +/// one with the same size. If none of that works, we just produce the
> GEP as
> > +/// indicated by Indices to have the correct offset.
> > +static Value *getNaturalGEPWithType(IRBuilder<> &IRB, const TargetData
> &TD,
> > + Value *BasePtr, Type *Ty, Type
> *TargetTy,
> > + SmallVectorImpl<Value *> &Indices,
> > + const Twine &Prefix) {
> > + if (Ty == TargetTy)
> > + return buildGEP(IRB, BasePtr, Indices, Prefix);
> >
> > - visit(cast<Instruction>(PU.U->getUser()));
> > + // See if we can descend into a struct and locate a field with the
> correct
> > + // type.
> > + unsigned NumLayers = 0;
> > + Type *ElementTy = Ty;
> > + do {
> > + if (ElementTy->isPointerTy())
> > + break;
> > + if (SequentialType *SeqTy = dyn_cast<SequentialType>(ElementTy)) {
> > + ElementTy = SeqTy->getElementType();
> > + Indices.push_back(IRB.getInt(APInt(TD.getPointerSizeInBits(),
> 0)));
> > + } else if (StructType *STy = dyn_cast<StructType>(ElementTy)) {
> > + ElementTy = *STy->element_begin();
> > + Indices.push_back(IRB.getInt32(0));
> > + } else {
> > + break;
> > }
> > - }
> > + ++NumLayers;
> > + } while (ElementTy != TargetTy);
> > + if (ElementTy != TargetTy)
> > + Indices.erase(Indices.end() - NumLayers, Indices.end());
> >
> > -private:
> > - // By default, skip this instruction.
> > - void visitInstruction(Instruction &I) {}
> > + return buildGEP(IRB, BasePtr, Indices, Prefix);
> > +}
> >
> > - /// PHI instructions that use an alloca and are subsequently loaded
> can be
> > - /// rewritten to load both input pointers in the pred blocks and then
> PHI the
> > - /// results, allowing the load of the alloca to be promoted.
> > - /// From this:
> > - /// %P2 = phi [i32* %Alloca, i32* %Other]
> > - /// %V = load i32* %P2
> > - /// to:
> > - /// %V1 = load i32* %Alloca -> will be mem2reg'd
> > - /// ...
> > - /// %V2 = load i32* %Other
> > - /// ...
> > - /// %V = phi [i32 %V1, i32 %V2]
> > - ///
> > - /// We can do this to a select if its only uses are loads and if the
> operands
> > - /// to the select can be loaded unconditionally.
> > - ///
> > - /// FIXME: This should be hoisted into a generic utility, likely in
> > - /// Transforms/Util/Local.h
> > - bool isSafePHIToSpeculate(PHINode &PN, SmallVectorImpl<LoadInst *>
> &Loads) {
> > - // For now, we can only do this promotion if the load is in the
> same block
> > - // as the PHI, and if there are no stores between the phi and load.
> > - // TODO: Allow recursive phi users.
> > - // TODO: Allow stores.
> > - BasicBlock *BB = PN.getParent();
> > - unsigned MaxAlign = 0;
> > - for (Value::use_iterator UI = PN.use_begin(), UE = PN.use_end();
> > - UI != UE; ++UI) {
> > - LoadInst *LI = dyn_cast<LoadInst>(*UI);
> > - if (LI == 0 || !LI->isSimple()) return false;
> > +/// \brief Recursively compute indices for a natural GEP.
> > +///
> > +/// This is the recursive step for getNaturalGEPWithOffset that walks
> down the
> > +/// element types adding appropriate indices for the GEP.
> > +static Value *getNaturalGEPRecursively(IRBuilder<> &IRB, const
> TargetData &TD,
> > + Value *Ptr, Type *Ty, APInt
> &Offset,
> > + Type *TargetTy,
> > + SmallVectorImpl<Value *>
> &Indices,
> > + const Twine &Prefix) {
> > + if (Offset == 0)
> > + return getNaturalGEPWithType(IRB, TD, Ptr, Ty, TargetTy, Indices,
> Prefix);
> >
> > - // For now we only allow loads in the same block as the PHI.
> This is
> > - // a common case that happens when instcombine merges two loads
> through
> > - // a PHI.
> > - if (LI->getParent() != BB) return false;
> > + // We can't recurse through pointer types.
> > + if (Ty->isPointerTy())
> > + return 0;
> >
> > - // Ensure that there are no instructions between the PHI and the
> load that
> > - // could store.
> > - for (BasicBlock::iterator BBI = &PN; &*BBI != LI; ++BBI)
> > - if (BBI->mayWriteToMemory())
> > - return false;
> > + // We try to analyze GEPs over vectors here, but note that these GEPs
> are
> > + // extremely poorly defined currently. The long-term goal is to
> remove GEPing
> > + // over a vector from the IR completely.
> > + if (VectorType *VecTy = dyn_cast<VectorType>(Ty)) {
> > + unsigned ElementSizeInBits = VecTy->getScalarSizeInBits();
> > + if (ElementSizeInBits % 8)
> > + return 0; // GEPs over non-multiple of 8 size vector elements are
> invalid.
> > + APInt ElementSize(Offset.getBitWidth(), ElementSizeInBits / 8);
> > + APInt NumSkippedElements = Offset.udiv(ElementSize);
> > + if (NumSkippedElements.ugt(VecTy->getNumElements()))
> > + return 0;
> > + Offset -= NumSkippedElements * ElementSize;
> > + Indices.push_back(IRB.getInt(NumSkippedElements));
> > + return getNaturalGEPRecursively(IRB, TD, Ptr,
> VecTy->getElementType(),
> > + Offset, TargetTy, Indices, Prefix);
> > + }
> >
> > - MaxAlign = std::max(MaxAlign, LI->getAlignment());
> > - Loads.push_back(LI);
> > - }
> > + if (ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) {
> > + Type *ElementTy = ArrTy->getElementType();
> > + APInt ElementSize(Offset.getBitWidth(),
> TD.getTypeAllocSize(ElementTy));
> > + APInt NumSkippedElements = Offset.udiv(ElementSize);
> > + if (NumSkippedElements.ugt(ArrTy->getNumElements()))
> > + return 0;
> >
> > - // We can only transform this if it is safe to push the loads into
> the
> > - // predecessor blocks. The only thing to watch out for is that we
> can't put
> > - // a possibly trapping load in the predecessor if it is a critical
> edge.
> > - for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num;
> > - ++Idx) {
> > - TerminatorInst *TI = PN.getIncomingBlock(Idx)->getTerminator();
> > - Value *InVal = PN.getIncomingValue(Idx);
> > + Offset -= NumSkippedElements * ElementSize;
> > + Indices.push_back(IRB.getInt(NumSkippedElements));
> > + return getNaturalGEPRecursively(IRB, TD, Ptr, ElementTy, Offset,
> TargetTy,
> > + Indices, Prefix);
> > + }
> >
> > - // If the value is produced by the terminator of the predecessor
> (an
> > - // invoke) or it has side-effects, there is no valid place to put
> a load
> > - // in the predecessor.
> > - if (TI == InVal || TI->mayHaveSideEffects())
> > - return false;
> > + StructType *STy = dyn_cast<StructType>(Ty);
> > + if (!STy)
> > + return 0;
> >
> > - // If the predecessor has a single successor, then the edge isn't
> > - // critical.
> > - if (TI->getNumSuccessors() == 1)
> > - continue;
> > + const StructLayout *SL = TD.getStructLayout(STy);
> > + uint64_t StructOffset = Offset.getZExtValue();
> > + if (StructOffset >= SL->getSizeInBytes())
> > + return 0;
> > + unsigned Index = SL->getElementContainingOffset(StructOffset);
> > + Offset -= APInt(Offset.getBitWidth(), SL->getElementOffset(Index));
> > + Type *ElementTy = STy->getElementType(Index);
> > + if (Offset.uge(TD.getTypeAllocSize(ElementTy)))
> > + return 0; // The offset points into alignment padding.
> >
> > - // If this pointer is always safe to load, or if we can prove
> that there
> > - // is already a load in the block, then we can move the load to
> the pred
> > - // block.
> > - if (InVal->isDereferenceablePointer() ||
> > - isSafeToLoadUnconditionally(InVal, TI, MaxAlign, &TD))
> > - continue;
> > + Indices.push_back(IRB.getInt32(Index));
> > + return getNaturalGEPRecursively(IRB, TD, Ptr, ElementTy, Offset,
> TargetTy,
> > + Indices, Prefix);
> > +}
> >
> > - return false;
> > - }
> > +/// \brief Get a natural GEP from a base pointer to a particular offset
> and
> > +/// resulting in a particular type.
> > +///
> > +/// The goal is to produce a "natural" looking GEP that works with the
> existing
> > +/// composite types to arrive at the appropriate offset and element
> type for
> > +/// a pointer. TargetTy is the element type the returned GEP should
> point-to if
> > +/// possible. We recurse by decreasing Offset, adding the appropriate
> index to
> > +/// Indices, and setting Ty to the result subtype.
> > +///
> > +/// If no natural GEP can be constructed, this function returns null.
> > +static Value *getNaturalGEPWithOffset(IRBuilder<> &IRB, const
> TargetData &TD,
> > + Value *Ptr, APInt Offset, Type
> *TargetTy,
> > + SmallVectorImpl<Value *> &Indices,
> > + const Twine &Prefix) {
> > + PointerType *Ty = cast<PointerType>(Ptr->getType());
> >
> > - return true;
> > - }
> > + // Don't consider any GEPs through an i8* as natural unless the
> TargetTy is
> > + // an i8.
> > + if (Ty == IRB.getInt8PtrTy() && TargetTy->isIntegerTy(8))
> > + return 0;
> >
> > - void visitPHINode(PHINode &PN) {
> > - DEBUG(dbgs() << " original: " << PN << "\n");
> > + Type *ElementTy = Ty->getElementType();
> > + if (!ElementTy->isSized())
> > + return 0; // We can't GEP through an unsized element.
> > + APInt ElementSize(Offset.getBitWidth(),
> TD.getTypeAllocSize(ElementTy));
> > + if (ElementSize == 0)
> > + return 0; // Zero-length arrays can't help us build a natural GEP.
> > + APInt NumSkippedElements = Offset.udiv(ElementSize);
> >
> > - SmallVector<LoadInst *, 4> Loads;
> > - if (!isSafePHIToSpeculate(PN, Loads))
> > - return;
> > + Offset -= NumSkippedElements * ElementSize;
> > + Indices.push_back(IRB.getInt(NumSkippedElements));
> > + return getNaturalGEPRecursively(IRB, TD, Ptr, ElementTy, Offset,
> TargetTy,
> > + Indices, Prefix);
> > +}
> >
> > - assert(!Loads.empty());
> > +/// \brief Compute an adjusted pointer from Ptr by Offset bytes where
> the
> > +/// resulting pointer has PointerTy.
> > +///
> > +/// This tries very hard to compute a "natural" GEP which arrives at
> the offset
> > +/// and produces the pointer type desired. Where it cannot, it will try
> to use
> > +/// the natural GEP to arrive at the offset and bitcast to the type.
> Where that
> > +/// fails, it will try to use an existing i8* and GEP to the byte
> offset and
> > +/// bitcast to the type.
> > +///
> > +/// The strategy for finding the more natural GEPs is to peel off
> layers of the
> > +/// pointer, walking back through bit casts and GEPs, searching for a
> base
> > +/// pointer from which we can compute a natural GEP with the desired
> > +/// properities. The algorithm tries to fold as many constant indices
> into
> > +/// a single GEP as possible, thus making each GEP more independent of
> the
> > +/// surrounding code.
> > +static Value *getAdjustedPtr(IRBuilder<> &IRB, const TargetData &TD,
> > + Value *Ptr, APInt Offset, Type *PointerTy,
> > + const Twine &Prefix) {
> > + // Even though we don't look through PHI nodes, we could be called on
> an
> > + // instruction in an unreachable block, which may be on a cycle.
> > + SmallPtrSet<Value *, 4> Visited;
> > + Visited.insert(Ptr);
> > + SmallVector<Value *, 4> Indices;
> >
> > - Type *LoadTy = cast<PointerType>(PN.getType())->getElementType();
> > - IRBuilder<> PHIBuilder(&PN);
> > - PHINode *NewPN = PHIBuilder.CreatePHI(LoadTy,
> PN.getNumIncomingValues(),
> > - PN.getName() +
> ".sroa.speculated");
> > + // We may end up computing an offset pointer that has the wrong type.
> If we
> > + // never are able to compute one directly that has the correct type,
> we'll
> > + // fall back to it, so keep it around here.
> > + Value *OffsetPtr = 0;
> >
> > - // Get the TBAA tag and alignment to use from one of the loads. It
> doesn't
> > - // matter which one we get and if any differ, it doesn't matter.
> > - LoadInst *SomeLoad = cast<LoadInst>(Loads.back());
> > - MDNode *TBAATag = SomeLoad->getMetadata(LLVMContext::MD_tbaa);
> > - unsigned Align = SomeLoad->getAlignment();
> > + // Remember any i8 pointer we come across to re-use if we need to do
> a raw
> > + // byte offset.
> > + Value *Int8Ptr = 0;
> > + APInt Int8PtrOffset(Offset.getBitWidth(), 0);
> >
> > - // Rewrite all loads of the PN to use the new PHI.
> > - do {
> > - LoadInst *LI = Loads.pop_back_val();
> > - LI->replaceAllUsesWith(NewPN);
> > - Pass.DeadInsts.push_back(LI);
> > - } while (!Loads.empty());
> > + Type *TargetTy = PointerTy->getPointerElementType();
> >
> > - // Inject loads into all of the pred blocks.
> > - for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num;
> ++Idx) {
> > - BasicBlock *Pred = PN.getIncomingBlock(Idx);
> > - TerminatorInst *TI = Pred->getTerminator();
> > - Use *InUse =
> &PN.getOperandUse(PN.getOperandNumForIncomingValue(Idx));
> > - Value *InVal = PN.getIncomingValue(Idx);
> > - IRBuilder<> PredBuilder(TI);
> > + do {
> > + // First fold any existing GEPs into the offset.
> > + while (GEPOperator *GEP = dyn_cast<GEPOperator>(Ptr)) {
> > + APInt GEPOffset(Offset.getBitWidth(), 0);
> > + if (!accumulateGEPOffsets(TD, *GEP, GEPOffset))
> > + break;
> > + Offset += GEPOffset;
> > + Ptr = GEP->getPointerOperand();
> > + if (!Visited.insert(Ptr))
> > + break;
> > + }
> >
> > - LoadInst *Load
> > - = PredBuilder.CreateLoad(InVal, (PN.getName() +
> ".sroa.speculate.load." +
> > - Pred->getName()));
> > - ++NumLoadsSpeculated;
> > - Load->setAlignment(Align);
> > - if (TBAATag)
> > - Load->setMetadata(LLVMContext::MD_tbaa, TBAATag);
> > - NewPN->addIncoming(Load, Pred);
> > + // See if we can perform a natural GEP here.
> > + Indices.clear();
> > + if (Value *P = getNaturalGEPWithOffset(IRB, TD, Ptr, Offset,
> TargetTy,
> > + Indices, Prefix)) {
> > + if (P->getType() == PointerTy) {
> > + // Zap any offset pointer that we ended up computing in
> previous rounds.
> > + if (OffsetPtr && OffsetPtr->use_empty())
> > + if (Instruction *I = dyn_cast<Instruction>(OffsetPtr))
> > + I->eraseFromParent();
> > + return P;
> > + }
> > + if (!OffsetPtr) {
> > + OffsetPtr = P;
> > + }
> > + }
> >
> > - Instruction *Ptr = dyn_cast<Instruction>(InVal);
> > - if (!Ptr)
> > - // No uses to rewrite.
> > - continue;
> > + // Stash this pointer if we've found an i8*.
> > + if (Ptr->getType()->isIntegerTy(8)) {
> > + Int8Ptr = Ptr;
> > + Int8PtrOffset = Offset;
> > + }
> >
> > - // Try to lookup and rewrite any partition uses corresponding to
> this phi
> > - // input.
> > - AllocaPartitioning::iterator PI
> > - = P.findPartitionForPHIOrSelectOperand(InUse);
> > - if (PI == P.end())
> > - continue;
> > + // Peel off a layer of the pointer and update the offset
> appropriately.
> > + if (Operator::getOpcode(Ptr) == Instruction::BitCast) {
> > + Ptr = cast<Operator>(Ptr)->getOperand(0);
> > + } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(Ptr)) {
> > + if (GA->mayBeOverridden())
> > + break;
> > + Ptr = GA->getAliasee();
> > + } else {
> > + break;
> > + }
> > + assert(Ptr->getType()->isPointerTy() && "Unexpected operand type!");
> > + } while (Visited.insert(Ptr));
> >
> > - // Replace the Use in the PartitionUse for this operand with the
> Use
> > - // inside the load.
> > - AllocaPartitioning::use_iterator UI
> > - = P.findPartitionUseForPHIOrSelectOperand(InUse);
> > - assert(isa<PHINode>(*UI->U->getUser()));
> > - UI->U = &Load->getOperandUse(Load->getPointerOperandIndex());
> > + if (!OffsetPtr) {
> > + if (!Int8Ptr) {
> > + Int8Ptr = IRB.CreateBitCast(Ptr, IRB.getInt8PtrTy(),
> > + Prefix + ".raw_cast");
> > + Int8PtrOffset = Offset;
> > }
> > - DEBUG(dbgs() << " speculated to: " << *NewPN << "\n");
> > +
> > + OffsetPtr = Int8PtrOffset == 0 ? Int8Ptr :
> > + IRB.CreateInBoundsGEP(Int8Ptr, IRB.getInt(Int8PtrOffset),
> > + Prefix + ".raw_idx");
> > }
> > + Ptr = OffsetPtr;
> >
> > - /// Select instructions that use an alloca and are subsequently
> loaded can be
> > - /// rewritten to load both input pointers and then select between the
> result,
> > - /// allowing the load of the alloca to be promoted.
> > - /// From this:
> > - /// %P2 = select i1 %cond, i32* %Alloca, i32* %Other
> > - /// %V = load i32* %P2
> > - /// to:
> > - /// %V1 = load i32* %Alloca -> will be mem2reg'd
> > - /// %V2 = load i32* %Other
> > - /// %V = select i1 %cond, i32 %V1, i32 %V2
> > - ///
> > - /// We can do this to a select if its only uses are loads and if the
> operand
> > - /// to the select can be loaded unconditionally.
> > - bool isSafeSelectToSpeculate(SelectInst &SI,
> > - SmallVectorImpl<LoadInst *> &Loads) {
> > - Value *TValue = SI.getTrueValue();
> > - Value *FValue = SI.getFalseValue();
> > - bool TDerefable = TValue->isDereferenceablePointer();
> > - bool FDerefable = FValue->isDereferenceablePointer();
> > + // On the off chance we were targeting i8*, guard the bitcast here.
> > + if (Ptr->getType() != PointerTy)
> > + Ptr = IRB.CreateBitCast(Ptr, PointerTy, Prefix + ".cast");
> >
> > - for (Value::use_iterator UI = SI.use_begin(), UE = SI.use_end();
> > - UI != UE; ++UI) {
> > - LoadInst *LI = dyn_cast<LoadInst>(*UI);
> > - if (LI == 0 || !LI->isSimple()) return false;
> > + return Ptr;
> > +}
> >
> > - // Both operands to the select need to be dereferencable, either
> > - // absolutely (e.g. allocas) or at this point because we can see
> other
> > - // accesses to it.
> > - if (!TDerefable && !isSafeToLoadUnconditionally(TValue, LI,
> > -
> LI->getAlignment(), &TD))
> > - return false;
> > - if (!FDerefable && !isSafeToLoadUnconditionally(FValue, LI,
> > -
> LI->getAlignment(), &TD))
> > - return false;
> > - Loads.push_back(LI);
> > - }
> > +/// \brief Test whether the given alloca partition can be promoted to a
> vector.
> > +///
> > +/// This is a quick test to check whether we can rewrite a particular
> alloca
> > +/// partition (and its newly formed alloca) into a vector alloca with
> only
> > +/// whole-vector loads and stores such that it could be promoted to a
> vector
> > +/// SSA value. We only can ensure this for a limited set of operations,
> and we
> > +/// don't want to do the rewrites unless we are confident that the
> result will
> > +/// be promotable, so we have an early test here.
> > +static bool isVectorPromotionViable(const TargetData &TD,
> > + Type *AllocaTy,
> > + AllocaPartitioning &P,
> > + uint64_t PartitionBeginOffset,
> > + uint64_t PartitionEndOffset,
> > +
> AllocaPartitioning::const_use_iterator I,
> > +
> AllocaPartitioning::const_use_iterator E) {
> > + VectorType *Ty = dyn_cast<VectorType>(AllocaTy);
> > + if (!Ty)
> > + return false;
> >
> > - return true;
> > - }
> > + uint64_t VecSize = TD.getTypeSizeInBits(Ty);
> > + uint64_t ElementSize = Ty->getScalarSizeInBits();
> >
> > - void visitSelectInst(SelectInst &SI) {
> > - DEBUG(dbgs() << " original: " << SI << "\n");
> > - IRBuilder<> IRB(&SI);
> > + // While the definition of LLVM vectors is bitpacked, we don't
> support sizes
> > + // that aren't byte sized.
> > + if (ElementSize % 8)
> > + return false;
> > + assert((VecSize % 8) == 0 && "vector size not a multiple of element
> size?");
> > + VecSize /= 8;
> > + ElementSize /= 8;
> >
> > - // If the select isn't safe to speculate, just use simple logic to
> emit it.
> > - SmallVector<LoadInst *, 4> Loads;
> > - if (!isSafeSelectToSpeculate(SI, Loads))
> > - return;
> > + for (; I != E; ++I) {
> > + if (!I->U)
> > + continue; // Skip dead use.
> >
> > - Use *Ops[2] = { &SI.getOperandUse(1), &SI.getOperandUse(2) };
> > - AllocaPartitioning::iterator PIs[2];
> > - AllocaPartitioning::PartitionUse PUs[2];
> > - for (unsigned i = 0, e = 2; i != e; ++i) {
> > - PIs[i] = P.findPartitionForPHIOrSelectOperand(Ops[i]);
> > - if (PIs[i] != P.end()) {
> > - // If the pointer is within the partitioning, remove the select
> from
> > - // its uses. We'll add in the new loads below.
> > - AllocaPartitioning::use_iterator UI
> > - = P.findPartitionUseForPHIOrSelectOperand(Ops[i]);
> > - PUs[i] = *UI;
> > - // Clear out the use here so that the offsets into the use list
> remain
> > - // stable but this use is ignored when rewriting.
> > - UI->U = 0;
> > + uint64_t BeginOffset = I->BeginOffset - PartitionBeginOffset;
> > + uint64_t BeginIndex = BeginOffset / ElementSize;
> > + if (BeginIndex * ElementSize != BeginOffset ||
> > + BeginIndex >= Ty->getNumElements())
> > + return false;
> > + uint64_t EndOffset = I->EndOffset - PartitionBeginOffset;
> > + uint64_t EndIndex = EndOffset / ElementSize;
> > + if (EndIndex * ElementSize != EndOffset ||
> > + EndIndex > Ty->getNumElements())
> > + return false;
> > +
> > + // FIXME: We should build shuffle vector instructions to handle
> > + // non-element-sized accesses.
> > + if ((EndOffset - BeginOffset) != ElementSize &&
> > + (EndOffset - BeginOffset) != VecSize)
> > + return false;
> > +
> > + if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I->U->getUser())) {
> > + if (MI->isVolatile())
> > + return false;
> > + if (MemTransferInst *MTI =
> dyn_cast<MemTransferInst>(I->U->getUser())) {
> > + const AllocaPartitioning::MemTransferOffsets &MTO
> > + = P.getMemTransferOffsets(*MTI);
> > + if (!MTO.IsSplittable)
> > + return false;
> > }
> > + } else if
> (I->U->get()->getType()->getPointerElementType()->isStructTy()) {
> > + // Disable vector promotion when there are loads or stores of an
> FCA.
> > + return false;
> > + } else if (!isa<LoadInst>(I->U->getUser()) &&
> > + !isa<StoreInst>(I->U->getUser())) {
> > + return false;
> > }
> > + }
> > + return true;
> > +}
> >
> > - Value *TV = SI.getTrueValue();
> > - Value *FV = SI.getFalseValue();
> > - // Replace the loads of the select with a select of two loads.
> > - while (!Loads.empty()) {
> > - LoadInst *LI = Loads.pop_back_val();
> > -
> > - IRB.SetInsertPoint(LI);
> > - LoadInst *TL =
> > - IRB.CreateLoad(TV, LI->getName() + ".sroa.speculate.load.true");
> > - LoadInst *FL =
> > - IRB.CreateLoad(FV, LI->getName() +
> ".sroa.speculate.load.false");
> > - NumLoadsSpeculated += 2;
> > +/// \brief Test whether the given alloca partition can be promoted to
> an int.
> > +///
> > +/// This is a quick test to check whether we can rewrite a particular
> alloca
> > +/// partition (and its newly formed alloca) into an integer alloca
> suitable for
> > +/// promotion to an SSA value. We only can ensure this for a limited
> set of
> > +/// operations, and we don't want to do the rewrites unless we are
> confident
> > +/// that the result will be promotable, so we have an early test here.
> > +static bool isIntegerPromotionViable(const TargetData &TD,
> > + Type *AllocaTy,
> > + uint64_t AllocBeginOffset,
> > + AllocaPartitioning &P,
> > +
> AllocaPartitioning::const_use_iterator I,
> > +
> AllocaPartitioning::const_use_iterator E) {
> > + IntegerType *Ty = dyn_cast<IntegerType>(AllocaTy);
> > + if (!Ty || 8*TD.getTypeStoreSize(Ty) != Ty->getBitWidth())
> > + return false;
> >
> > - // Transfer alignment and TBAA info if present.
> > - TL->setAlignment(LI->getAlignment());
> > - FL->setAlignment(LI->getAlignment());
> > - if (MDNode *Tag = LI->getMetadata(LLVMContext::MD_tbaa)) {
> > - TL->setMetadata(LLVMContext::MD_tbaa, Tag);
> > - FL->setMetadata(LLVMContext::MD_tbaa, Tag);
> > - }
> > + // Check the uses to ensure the uses are (likely) promoteable integer
> uses.
> > + // Also ensure that the alloca has a covering load or store. We don't
> want
> > + // promote because of some other unsplittable entry (which we may make
> > + // splittable later) and lose the ability to promote each element
> access.
> > + bool WholeAllocaOp = false;
> > + for (; I != E; ++I) {
> > + if (!I->U)
> > + continue; // Skip dead use.
> >
> > - Value *V = IRB.CreateSelect(SI.getCondition(), TL, FL,
> > - LI->getName() + ".sroa.speculated");
> > + // We can't reasonably handle cases where the load or store extends
> past
> > + // the end of the aloca's type and into its padding.
> > + if ((I->EndOffset - AllocBeginOffset) > TD.getTypeStoreSize(Ty))
> > + return false;
> >
> > - LoadInst *Loads[2] = { TL, FL };
> > - for (unsigned i = 0, e = 2; i != e; ++i) {
> > - if (PIs[i] != P.end()) {
> > - Use *LoadUse = &Loads[i]->getOperandUse(0);
> > - assert(PUs[i].U->get() == LoadUse->get());
> > - PUs[i].U = LoadUse;
> > - P.use_push_back(PIs[i], PUs[i]);
> > - }
> > + if (LoadInst *LI = dyn_cast<LoadInst>(I->U->getUser())) {
> > + if (LI->isVolatile() || !LI->getType()->isIntegerTy())
> > + return false;
> > + if (LI->getType() == Ty)
> > + WholeAllocaOp = true;
> > + } else if (StoreInst *SI = dyn_cast<StoreInst>(I->U->getUser())) {
> > + if (SI->isVolatile() ||
> !SI->getValueOperand()->getType()->isIntegerTy())
> > + return false;
> > + if (SI->getValueOperand()->getType() == Ty)
> > + WholeAllocaOp = true;
> > + } else if (MemIntrinsic *MI =
> dyn_cast<MemIntrinsic>(I->U->getUser())) {
> > + if (MI->isVolatile())
> > + return false;
> > + if (MemTransferInst *MTI =
> dyn_cast<MemTransferInst>(I->U->getUser())) {
> > + const AllocaPartitioning::MemTransferOffsets &MTO
> > + = P.getMemTransferOffsets(*MTI);
> > + if (!MTO.IsSplittable)
> > + return false;
> > }
> > -
> > - DEBUG(dbgs() << " speculated to: " << *V << "\n");
> > - LI->replaceAllUsesWith(V);
> > - Pass.DeadInsts.push_back(LI);
> > + } else {
> > + return false;
> > }
> > }
> > -};
> > + return WholeAllocaOp;
> > +}
> >
> > +namespace {
> > /// \brief Visitor to rewrite instructions using a partition of an
> alloca to
> > /// use a new alloca.
> > ///
> >
> >
> > _______________________________________________
> > llvm-commits mailing list
> > llvm-commits at cs.uiuc.edu
> > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20121004/beef8e3d/attachment.html>
More information about the llvm-commits
mailing list