[llvm-commits] [llvm] r165284 - /llvm/trunk/lib/Transforms/Scalar/SROA.cpp

Chandler Carruth chandlerc at gmail.com
Thu Oct 4 23:13:25 PDT 2012


On Thu, Oct 4, 2012 at 6:52 PM, Sean Silva <silvas at purdue.edu> wrote:

> protip: pass --patience or --histogram to git's diff-generating
> commands (git diff, git log -p, etc.) to select alternative diff
> algorithms.


I'm well aware of this, but that does nothing for the commit mailing list
which is what my commit log was written for...


> I just tried it out locally for this patch and the diff is
> dramatically better (the diff is a single big block of + and a single
> big block of -). FWIW, I find that patience and histogram are usually
> basically the same (histogram is an extension of patience), but either
> of them is usually significantly better than the default.
>
> More info about patience diff, for the curious:
> http://bramcohen.livejournal.com/73318.html
> More info about histogram diff:
>
> http://download.eclipse.org/jgit/docs/jgit-2.0.0.201206130900-r/apidocs/org/eclipse/jgit/diff/HistogramDiff.html
>
> -- Sean Silva
>
> On Thu, Oct 4, 2012 at 9:29 PM, Chandler Carruth <chandlerc at gmail.com>
> wrote:
> > Author: chandlerc
> > Date: Thu Oct  4 20:29:06 2012
> > New Revision: 165284
> >
> > URL: http://llvm.org/viewvc/llvm-project?rev=165284&view=rev
> > Log:
> > Lift the speculation visitor above all the helpers that are targeted at
> > the rewrite visitor to make the fact that the speculation is completely
> > independent a bit more clear.
> >
> > I promise that this is just a cut/paste of the one visitor and adding
> > the annonymous namespace wrappings. The diff may look completely
> > preposterous, it does in git for some reason.
> >
> > Modified:
> >     llvm/trunk/lib/Transforms/Scalar/SROA.cpp
> >
> > Modified: llvm/trunk/lib/Transforms/Scalar/SROA.cpp
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/SROA.cpp?rev=165284&r1=165283&r2=165284&view=diff
> >
> ==============================================================================
> > --- llvm/trunk/lib/Transforms/Scalar/SROA.cpp (original)
> > +++ llvm/trunk/lib/Transforms/Scalar/SROA.cpp Thu Oct  4 20:29:06 2012
> > @@ -1368,715 +1368,717 @@
> >  INITIALIZE_PASS_END(SROA, "sroa", "Scalar Replacement Of Aggregates",
> >                      false, false)
> >
> > -/// \brief Accumulate the constant offsets in a GEP into a single APInt
> offset.
> > -///
> > -/// If the provided GEP is all-constant, the total byte offset formed
> by the
> > -/// GEP is computed and Offset is set to it. If the GEP has any
> non-constant
> > -/// operands, the function returns false and the value of Offset is
> unmodified.
> > -static bool accumulateGEPOffsets(const TargetData &TD, GEPOperator &GEP,
> > -                                 APInt &Offset) {
> > -  APInt GEPOffset(Offset.getBitWidth(), 0);
> > -  for (gep_type_iterator GTI = gep_type_begin(GEP), GTE =
> gep_type_end(GEP);
> > -       GTI != GTE; ++GTI) {
> > -    ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
> > -    if (!OpC)
> > -      return false;
> > -    if (OpC->isZero()) continue;
> > +namespace {
> > +/// \brief Visitor to speculate PHIs and Selects where possible.
> > +class PHIOrSelectSpeculator : public InstVisitor<PHIOrSelectSpeculator>
> {
> > +  // Befriend the base class so it can delegate to private visit
> methods.
> > +  friend class llvm::InstVisitor<PHIOrSelectSpeculator>;
> >
> > -    // Handle a struct index, which adds its field offset to the
> pointer.
> > -    if (StructType *STy = dyn_cast<StructType>(*GTI)) {
> > -      unsigned ElementIdx = OpC->getZExtValue();
> > -      const StructLayout *SL = TD.getStructLayout(STy);
> > -      GEPOffset += APInt(Offset.getBitWidth(),
> > -                         SL->getElementOffset(ElementIdx));
> > -      continue;
> > -    }
> > +  const TargetData &TD;
> > +  AllocaPartitioning &P;
> > +  SROA &Pass;
> >
> > -    APInt TypeSize(Offset.getBitWidth(),
> > -                   TD.getTypeAllocSize(GTI.getIndexedType()));
> > -    if (VectorType *VTy = dyn_cast<VectorType>(*GTI)) {
> > -      assert((VTy->getScalarSizeInBits() % 8) == 0 &&
> > -             "vector element size is not a multiple of 8, cannot GEP
> over it");
> > -      TypeSize = VTy->getScalarSizeInBits() / 8;
> > -    }
> > +public:
> > +  PHIOrSelectSpeculator(const TargetData &TD, AllocaPartitioning &P,
> SROA &Pass)
> > +    : TD(TD), P(P), Pass(Pass) {}
> >
> > -    GEPOffset += OpC->getValue().sextOrTrunc(Offset.getBitWidth()) *
> TypeSize;
> > +  /// \brief Visit the users of an alloca partition and rewrite them.
> > +  void visitUsers(AllocaPartitioning::const_iterator PI) {
> > +    // Note that we need to use an index here as the underlying vector
> of uses
> > +    // may be grown during speculation. However, we never need to
> re-visit the
> > +    // new uses, and so we can use the initial size bound.
> > +    for (unsigned Idx = 0, Size = P.use_size(PI); Idx != Size; ++Idx) {
> > +      const AllocaPartitioning::PartitionUse &PU = P.getUse(PI, Idx);
> > +      if (!PU.U)
> > +        continue; // Skip dead use.
> > +
> > +      visit(cast<Instruction>(PU.U->getUser()));
> > +    }
> >    }
> > -  Offset = GEPOffset;
> > -  return true;
> > -}
> >
> > -/// \brief Build a GEP out of a base pointer and indices.
> > -///
> > -/// This will return the BasePtr if that is valid, or build a new GEP
> > -/// instruction using the IRBuilder if GEP-ing is needed.
> > -static Value *buildGEP(IRBuilder<> &IRB, Value *BasePtr,
> > -                       SmallVectorImpl<Value *> &Indices,
> > -                       const Twine &Prefix) {
> > -  if (Indices.empty())
> > -    return BasePtr;
> > +private:
> > +  // By default, skip this instruction.
> > +  void visitInstruction(Instruction &I) {}
> >
> > -  // A single zero index is a no-op, so check for this and avoid
> building a GEP
> > -  // in that case.
> > -  if (Indices.size() == 1 &&
> cast<ConstantInt>(Indices.back())->isZero())
> > -    return BasePtr;
> > +  /// PHI instructions that use an alloca and are subsequently loaded
> can be
> > +  /// rewritten to load both input pointers in the pred blocks and then
> PHI the
> > +  /// results, allowing the load of the alloca to be promoted.
> > +  /// From this:
> > +  ///   %P2 = phi [i32* %Alloca, i32* %Other]
> > +  ///   %V = load i32* %P2
> > +  /// to:
> > +  ///   %V1 = load i32* %Alloca      -> will be mem2reg'd
> > +  ///   ...
> > +  ///   %V2 = load i32* %Other
> > +  ///   ...
> > +  ///   %V = phi [i32 %V1, i32 %V2]
> > +  ///
> > +  /// We can do this to a select if its only uses are loads and if the
> operands
> > +  /// to the select can be loaded unconditionally.
> > +  ///
> > +  /// FIXME: This should be hoisted into a generic utility, likely in
> > +  /// Transforms/Util/Local.h
> > +  bool isSafePHIToSpeculate(PHINode &PN, SmallVectorImpl<LoadInst *>
> &Loads) {
> > +    // For now, we can only do this promotion if the load is in the
> same block
> > +    // as the PHI, and if there are no stores between the phi and load.
> > +    // TODO: Allow recursive phi users.
> > +    // TODO: Allow stores.
> > +    BasicBlock *BB = PN.getParent();
> > +    unsigned MaxAlign = 0;
> > +    for (Value::use_iterator UI = PN.use_begin(), UE = PN.use_end();
> > +         UI != UE; ++UI) {
> > +      LoadInst *LI = dyn_cast<LoadInst>(*UI);
> > +      if (LI == 0 || !LI->isSimple()) return false;
> >
> > -  return IRB.CreateInBoundsGEP(BasePtr, Indices, Prefix + ".idx");
> > -}
> > +      // For now we only allow loads in the same block as the PHI.
>  This is
> > +      // a common case that happens when instcombine merges two loads
> through
> > +      // a PHI.
> > +      if (LI->getParent() != BB) return false;
> >
> > -/// \brief Get a natural GEP off of the BasePtr walking through Ty
> toward
> > -/// TargetTy without changing the offset of the pointer.
> > -///
> > -/// This routine assumes we've already established a properly offset
> GEP with
> > -/// Indices, and arrived at the Ty type. The goal is to continue to GEP
> with
> > -/// zero-indices down through type layers until we find one the same as
> > -/// TargetTy. If we can't find one with the same type, we at least try
> to use
> > -/// one with the same size. If none of that works, we just produce the
> GEP as
> > -/// indicated by Indices to have the correct offset.
> > -static Value *getNaturalGEPWithType(IRBuilder<> &IRB, const TargetData
> &TD,
> > -                                    Value *BasePtr, Type *Ty, Type
> *TargetTy,
> > -                                    SmallVectorImpl<Value *> &Indices,
> > -                                    const Twine &Prefix) {
> > -  if (Ty == TargetTy)
> > -    return buildGEP(IRB, BasePtr, Indices, Prefix);
> > +      // Ensure that there are no instructions between the PHI and the
> load that
> > +      // could store.
> > +      for (BasicBlock::iterator BBI = &PN; &*BBI != LI; ++BBI)
> > +        if (BBI->mayWriteToMemory())
> > +          return false;
> >
> > -  // See if we can descend into a struct and locate a field with the
> correct
> > -  // type.
> > -  unsigned NumLayers = 0;
> > -  Type *ElementTy = Ty;
> > -  do {
> > -    if (ElementTy->isPointerTy())
> > -      break;
> > -    if (SequentialType *SeqTy = dyn_cast<SequentialType>(ElementTy)) {
> > -      ElementTy = SeqTy->getElementType();
> > -      Indices.push_back(IRB.getInt(APInt(TD.getPointerSizeInBits(),
> 0)));
> > -    } else if (StructType *STy = dyn_cast<StructType>(ElementTy)) {
> > -      ElementTy = *STy->element_begin();
> > -      Indices.push_back(IRB.getInt32(0));
> > -    } else {
> > -      break;
> > +      MaxAlign = std::max(MaxAlign, LI->getAlignment());
> > +      Loads.push_back(LI);
> >      }
> > -    ++NumLayers;
> > -  } while (ElementTy != TargetTy);
> > -  if (ElementTy != TargetTy)
> > -    Indices.erase(Indices.end() - NumLayers, Indices.end());
> >
> > -  return buildGEP(IRB, BasePtr, Indices, Prefix);
> > -}
> > +    // We can only transform this if it is safe to push the loads into
> the
> > +    // predecessor blocks. The only thing to watch out for is that we
> can't put
> > +    // a possibly trapping load in the predecessor if it is a critical
> edge.
> > +    for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num;
> > +         ++Idx) {
> > +      TerminatorInst *TI = PN.getIncomingBlock(Idx)->getTerminator();
> > +      Value *InVal = PN.getIncomingValue(Idx);
> >
> > -/// \brief Recursively compute indices for a natural GEP.
> > -///
> > -/// This is the recursive step for getNaturalGEPWithOffset that walks
> down the
> > -/// element types adding appropriate indices for the GEP.
> > -static Value *getNaturalGEPRecursively(IRBuilder<> &IRB, const
> TargetData &TD,
> > -                                       Value *Ptr, Type *Ty, APInt
> &Offset,
> > -                                       Type *TargetTy,
> > -                                       SmallVectorImpl<Value *>
> &Indices,
> > -                                       const Twine &Prefix) {
> > -  if (Offset == 0)
> > -    return getNaturalGEPWithType(IRB, TD, Ptr, Ty, TargetTy, Indices,
> Prefix);
> > +      // If the value is produced by the terminator of the predecessor
> (an
> > +      // invoke) or it has side-effects, there is no valid place to put
> a load
> > +      // in the predecessor.
> > +      if (TI == InVal || TI->mayHaveSideEffects())
> > +        return false;
> >
> > -  // We can't recurse through pointer types.
> > -  if (Ty->isPointerTy())
> > -    return 0;
> > +      // If the predecessor has a single successor, then the edge isn't
> > +      // critical.
> > +      if (TI->getNumSuccessors() == 1)
> > +        continue;
> >
> > -  // We try to analyze GEPs over vectors here, but note that these GEPs
> are
> > -  // extremely poorly defined currently. The long-term goal is to
> remove GEPing
> > -  // over a vector from the IR completely.
> > -  if (VectorType *VecTy = dyn_cast<VectorType>(Ty)) {
> > -    unsigned ElementSizeInBits = VecTy->getScalarSizeInBits();
> > -    if (ElementSizeInBits % 8)
> > -      return 0; // GEPs over non-multiple of 8 size vector elements are
> invalid.
> > -    APInt ElementSize(Offset.getBitWidth(), ElementSizeInBits / 8);
> > -    APInt NumSkippedElements = Offset.udiv(ElementSize);
> > -    if (NumSkippedElements.ugt(VecTy->getNumElements()))
> > -      return 0;
> > -    Offset -= NumSkippedElements * ElementSize;
> > -    Indices.push_back(IRB.getInt(NumSkippedElements));
> > -    return getNaturalGEPRecursively(IRB, TD, Ptr,
> VecTy->getElementType(),
> > -                                    Offset, TargetTy, Indices, Prefix);
> > -  }
> > +      // If this pointer is always safe to load, or if we can prove
> that there
> > +      // is already a load in the block, then we can move the load to
> the pred
> > +      // block.
> > +      if (InVal->isDereferenceablePointer() ||
> > +          isSafeToLoadUnconditionally(InVal, TI, MaxAlign, &TD))
> > +        continue;
> >
> > -  if (ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) {
> > -    Type *ElementTy = ArrTy->getElementType();
> > -    APInt ElementSize(Offset.getBitWidth(),
> TD.getTypeAllocSize(ElementTy));
> > -    APInt NumSkippedElements = Offset.udiv(ElementSize);
> > -    if (NumSkippedElements.ugt(ArrTy->getNumElements()))
> > -      return 0;
> > +      return false;
> > +    }
> >
> > -    Offset -= NumSkippedElements * ElementSize;
> > -    Indices.push_back(IRB.getInt(NumSkippedElements));
> > -    return getNaturalGEPRecursively(IRB, TD, Ptr, ElementTy, Offset,
> TargetTy,
> > -                                    Indices, Prefix);
> > +    return true;
> >    }
> >
> > -  StructType *STy = dyn_cast<StructType>(Ty);
> > -  if (!STy)
> > -    return 0;
> > +  void visitPHINode(PHINode &PN) {
> > +    DEBUG(dbgs() << "    original: " << PN << "\n");
> >
> > -  const StructLayout *SL = TD.getStructLayout(STy);
> > -  uint64_t StructOffset = Offset.getZExtValue();
> > -  if (StructOffset >= SL->getSizeInBytes())
> > -    return 0;
> > -  unsigned Index = SL->getElementContainingOffset(StructOffset);
> > -  Offset -= APInt(Offset.getBitWidth(), SL->getElementOffset(Index));
> > -  Type *ElementTy = STy->getElementType(Index);
> > -  if (Offset.uge(TD.getTypeAllocSize(ElementTy)))
> > -    return 0; // The offset points into alignment padding.
> > +    SmallVector<LoadInst *, 4> Loads;
> > +    if (!isSafePHIToSpeculate(PN, Loads))
> > +      return;
> >
> > -  Indices.push_back(IRB.getInt32(Index));
> > -  return getNaturalGEPRecursively(IRB, TD, Ptr, ElementTy, Offset,
> TargetTy,
> > -                                  Indices, Prefix);
> > -}
> > +    assert(!Loads.empty());
> >
> > -/// \brief Get a natural GEP from a base pointer to a particular offset
> and
> > -/// resulting in a particular type.
> > -///
> > -/// The goal is to produce a "natural" looking GEP that works with the
> existing
> > -/// composite types to arrive at the appropriate offset and element
> type for
> > -/// a pointer. TargetTy is the element type the returned GEP should
> point-to if
> > -/// possible. We recurse by decreasing Offset, adding the appropriate
> index to
> > -/// Indices, and setting Ty to the result subtype.
> > -///
> > -/// If no natural GEP can be constructed, this function returns null.
> > -static Value *getNaturalGEPWithOffset(IRBuilder<> &IRB, const
> TargetData &TD,
> > -                                      Value *Ptr, APInt Offset, Type
> *TargetTy,
> > -                                      SmallVectorImpl<Value *> &Indices,
> > -                                      const Twine &Prefix) {
> > -  PointerType *Ty = cast<PointerType>(Ptr->getType());
> > +    Type *LoadTy = cast<PointerType>(PN.getType())->getElementType();
> > +    IRBuilder<> PHIBuilder(&PN);
> > +    PHINode *NewPN = PHIBuilder.CreatePHI(LoadTy,
> PN.getNumIncomingValues(),
> > +                                          PN.getName() +
> ".sroa.speculated");
> >
> > -  // Don't consider any GEPs through an i8* as natural unless the
> TargetTy is
> > -  // an i8.
> > -  if (Ty == IRB.getInt8PtrTy() && TargetTy->isIntegerTy(8))
> > -    return 0;
> > -
> > -  Type *ElementTy = Ty->getElementType();
> > -  if (!ElementTy->isSized())
> > -    return 0; // We can't GEP through an unsized element.
> > -  APInt ElementSize(Offset.getBitWidth(),
> TD.getTypeAllocSize(ElementTy));
> > -  if (ElementSize == 0)
> > -    return 0; // Zero-length arrays can't help us build a natural GEP.
> > -  APInt NumSkippedElements = Offset.udiv(ElementSize);
> > -
> > -  Offset -= NumSkippedElements * ElementSize;
> > -  Indices.push_back(IRB.getInt(NumSkippedElements));
> > -  return getNaturalGEPRecursively(IRB, TD, Ptr, ElementTy, Offset,
> TargetTy,
> > -                                  Indices, Prefix);
> > -}
> > +    // Get the TBAA tag and alignment to use from one of the loads.  It
> doesn't
> > +    // matter which one we get and if any differ, it doesn't matter.
> > +    LoadInst *SomeLoad = cast<LoadInst>(Loads.back());
> > +    MDNode *TBAATag = SomeLoad->getMetadata(LLVMContext::MD_tbaa);
> > +    unsigned Align = SomeLoad->getAlignment();
> >
> > -/// \brief Compute an adjusted pointer from Ptr by Offset bytes where
> the
> > -/// resulting pointer has PointerTy.
> > -///
> > -/// This tries very hard to compute a "natural" GEP which arrives at
> the offset
> > -/// and produces the pointer type desired. Where it cannot, it will try
> to use
> > -/// the natural GEP to arrive at the offset and bitcast to the type.
> Where that
> > -/// fails, it will try to use an existing i8* and GEP to the byte
> offset and
> > -/// bitcast to the type.
> > -///
> > -/// The strategy for finding the more natural GEPs is to peel off
> layers of the
> > -/// pointer, walking back through bit casts and GEPs, searching for a
> base
> > -/// pointer from which we can compute a natural GEP with the desired
> > -/// properities. The algorithm tries to fold as many constant indices
> into
> > -/// a single GEP as possible, thus making each GEP more independent of
> the
> > -/// surrounding code.
> > -static Value *getAdjustedPtr(IRBuilder<> &IRB, const TargetData &TD,
> > -                             Value *Ptr, APInt Offset, Type *PointerTy,
> > -                             const Twine &Prefix) {
> > -  // Even though we don't look through PHI nodes, we could be called on
> an
> > -  // instruction in an unreachable block, which may be on a cycle.
> > -  SmallPtrSet<Value *, 4> Visited;
> > -  Visited.insert(Ptr);
> > -  SmallVector<Value *, 4> Indices;
> > +    // Rewrite all loads of the PN to use the new PHI.
> > +    do {
> > +      LoadInst *LI = Loads.pop_back_val();
> > +      LI->replaceAllUsesWith(NewPN);
> > +      Pass.DeadInsts.push_back(LI);
> > +    } while (!Loads.empty());
> >
> > -  // We may end up computing an offset pointer that has the wrong type.
> If we
> > -  // never are able to compute one directly that has the correct type,
> we'll
> > -  // fall back to it, so keep it around here.
> > -  Value *OffsetPtr = 0;
> > +    // Inject loads into all of the pred blocks.
> > +    for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num;
> ++Idx) {
> > +      BasicBlock *Pred = PN.getIncomingBlock(Idx);
> > +      TerminatorInst *TI = Pred->getTerminator();
> > +      Use *InUse =
> &PN.getOperandUse(PN.getOperandNumForIncomingValue(Idx));
> > +      Value *InVal = PN.getIncomingValue(Idx);
> > +      IRBuilder<> PredBuilder(TI);
> >
> > -  // Remember any i8 pointer we come across to re-use if we need to do
> a raw
> > -  // byte offset.
> > -  Value *Int8Ptr = 0;
> > -  APInt Int8PtrOffset(Offset.getBitWidth(), 0);
> > +      LoadInst *Load
> > +        = PredBuilder.CreateLoad(InVal, (PN.getName() +
> ".sroa.speculate.load." +
> > +                                         Pred->getName()));
> > +      ++NumLoadsSpeculated;
> > +      Load->setAlignment(Align);
> > +      if (TBAATag)
> > +        Load->setMetadata(LLVMContext::MD_tbaa, TBAATag);
> > +      NewPN->addIncoming(Load, Pred);
> >
> > -  Type *TargetTy = PointerTy->getPointerElementType();
> > +      Instruction *Ptr = dyn_cast<Instruction>(InVal);
> > +      if (!Ptr)
> > +        // No uses to rewrite.
> > +        continue;
> >
> > -  do {
> > -    // First fold any existing GEPs into the offset.
> > -    while (GEPOperator *GEP = dyn_cast<GEPOperator>(Ptr)) {
> > -      APInt GEPOffset(Offset.getBitWidth(), 0);
> > -      if (!accumulateGEPOffsets(TD, *GEP, GEPOffset))
> > -        break;
> > -      Offset += GEPOffset;
> > -      Ptr = GEP->getPointerOperand();
> > -      if (!Visited.insert(Ptr))
> > -        break;
> > -    }
> > +      // Try to lookup and rewrite any partition uses corresponding to
> this phi
> > +      // input.
> > +      AllocaPartitioning::iterator PI
> > +        = P.findPartitionForPHIOrSelectOperand(InUse);
> > +      if (PI == P.end())
> > +        continue;
> >
> > -    // See if we can perform a natural GEP here.
> > -    Indices.clear();
> > -    if (Value *P = getNaturalGEPWithOffset(IRB, TD, Ptr, Offset,
> TargetTy,
> > -                                           Indices, Prefix)) {
> > -      if (P->getType() == PointerTy) {
> > -        // Zap any offset pointer that we ended up computing in
> previous rounds.
> > -        if (OffsetPtr && OffsetPtr->use_empty())
> > -          if (Instruction *I = dyn_cast<Instruction>(OffsetPtr))
> > -            I->eraseFromParent();
> > -        return P;
> > -      }
> > -      if (!OffsetPtr) {
> > -        OffsetPtr = P;
> > -      }
> > +      // Replace the Use in the PartitionUse for this operand with the
> Use
> > +      // inside the load.
> > +      AllocaPartitioning::use_iterator UI
> > +        = P.findPartitionUseForPHIOrSelectOperand(InUse);
> > +      assert(isa<PHINode>(*UI->U->getUser()));
> > +      UI->U = &Load->getOperandUse(Load->getPointerOperandIndex());
> >      }
> > +    DEBUG(dbgs() << "          speculated to: " << *NewPN << "\n");
> > +  }
> >
> > -    // Stash this pointer if we've found an i8*.
> > -    if (Ptr->getType()->isIntegerTy(8)) {
> > -      Int8Ptr = Ptr;
> > -      Int8PtrOffset = Offset;
> > -    }
> > +  /// Select instructions that use an alloca and are subsequently
> loaded can be
> > +  /// rewritten to load both input pointers and then select between the
> result,
> > +  /// allowing the load of the alloca to be promoted.
> > +  /// From this:
> > +  ///   %P2 = select i1 %cond, i32* %Alloca, i32* %Other
> > +  ///   %V = load i32* %P2
> > +  /// to:
> > +  ///   %V1 = load i32* %Alloca      -> will be mem2reg'd
> > +  ///   %V2 = load i32* %Other
> > +  ///   %V = select i1 %cond, i32 %V1, i32 %V2
> > +  ///
> > +  /// We can do this to a select if its only uses are loads and if the
> operand
> > +  /// to the select can be loaded unconditionally.
> > +  bool isSafeSelectToSpeculate(SelectInst &SI,
> > +                               SmallVectorImpl<LoadInst *> &Loads) {
> > +    Value *TValue = SI.getTrueValue();
> > +    Value *FValue = SI.getFalseValue();
> > +    bool TDerefable = TValue->isDereferenceablePointer();
> > +    bool FDerefable = FValue->isDereferenceablePointer();
> >
> > -    // Peel off a layer of the pointer and update the offset
> appropriately.
> > -    if (Operator::getOpcode(Ptr) == Instruction::BitCast) {
> > -      Ptr = cast<Operator>(Ptr)->getOperand(0);
> > -    } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(Ptr)) {
> > -      if (GA->mayBeOverridden())
> > -        break;
> > -      Ptr = GA->getAliasee();
> > -    } else {
> > -      break;
> > -    }
> > -    assert(Ptr->getType()->isPointerTy() && "Unexpected operand type!");
> > -  } while (Visited.insert(Ptr));
> > +    for (Value::use_iterator UI = SI.use_begin(), UE = SI.use_end();
> > +         UI != UE; ++UI) {
> > +      LoadInst *LI = dyn_cast<LoadInst>(*UI);
> > +      if (LI == 0 || !LI->isSimple()) return false;
> >
> > -  if (!OffsetPtr) {
> > -    if (!Int8Ptr) {
> > -      Int8Ptr = IRB.CreateBitCast(Ptr, IRB.getInt8PtrTy(),
> > -                                  Prefix + ".raw_cast");
> > -      Int8PtrOffset = Offset;
> > +      // Both operands to the select need to be dereferencable, either
> > +      // absolutely (e.g. allocas) or at this point because we can see
> other
> > +      // accesses to it.
> > +      if (!TDerefable && !isSafeToLoadUnconditionally(TValue, LI,
> > +
>  LI->getAlignment(), &TD))
> > +        return false;
> > +      if (!FDerefable && !isSafeToLoadUnconditionally(FValue, LI,
> > +
>  LI->getAlignment(), &TD))
> > +        return false;
> > +      Loads.push_back(LI);
> >      }
> >
> > -    OffsetPtr = Int8PtrOffset == 0 ? Int8Ptr :
> > -      IRB.CreateInBoundsGEP(Int8Ptr, IRB.getInt(Int8PtrOffset),
> > -                            Prefix + ".raw_idx");
> > +    return true;
> >    }
> > -  Ptr = OffsetPtr;
> >
> > -  // On the off chance we were targeting i8*, guard the bitcast here.
> > -  if (Ptr->getType() != PointerTy)
> > -    Ptr = IRB.CreateBitCast(Ptr, PointerTy, Prefix + ".cast");
> > +  void visitSelectInst(SelectInst &SI) {
> > +    DEBUG(dbgs() << "    original: " << SI << "\n");
> > +    IRBuilder<> IRB(&SI);
> >
> > -  return Ptr;
> > -}
> > +    // If the select isn't safe to speculate, just use simple logic to
> emit it.
> > +    SmallVector<LoadInst *, 4> Loads;
> > +    if (!isSafeSelectToSpeculate(SI, Loads))
> > +      return;
> >
> > -/// \brief Test whether the given alloca partition can be promoted to a
> vector.
> > -///
> > -/// This is a quick test to check whether we can rewrite a particular
> alloca
> > -/// partition (and its newly formed alloca) into a vector alloca with
> only
> > -/// whole-vector loads and stores such that it could be promoted to a
> vector
> > -/// SSA value. We only can ensure this for a limited set of operations,
> and we
> > -/// don't want to do the rewrites unless we are confident that the
> result will
> > -/// be promotable, so we have an early test here.
> > -static bool isVectorPromotionViable(const TargetData &TD,
> > -                                    Type *AllocaTy,
> > -                                    AllocaPartitioning &P,
> > -                                    uint64_t PartitionBeginOffset,
> > -                                    uint64_t PartitionEndOffset,
> > -
>  AllocaPartitioning::const_use_iterator I,
> > -
>  AllocaPartitioning::const_use_iterator E) {
> > -  VectorType *Ty = dyn_cast<VectorType>(AllocaTy);
> > -  if (!Ty)
> > -    return false;
> > +    Use *Ops[2] = { &SI.getOperandUse(1), &SI.getOperandUse(2) };
> > +    AllocaPartitioning::iterator PIs[2];
> > +    AllocaPartitioning::PartitionUse PUs[2];
> > +    for (unsigned i = 0, e = 2; i != e; ++i) {
> > +      PIs[i] = P.findPartitionForPHIOrSelectOperand(Ops[i]);
> > +      if (PIs[i] != P.end()) {
> > +        // If the pointer is within the partitioning, remove the select
> from
> > +        // its uses. We'll add in the new loads below.
> > +        AllocaPartitioning::use_iterator UI
> > +          = P.findPartitionUseForPHIOrSelectOperand(Ops[i]);
> > +        PUs[i] = *UI;
> > +        // Clear out the use here so that the offsets into the use list
> remain
> > +        // stable but this use is ignored when rewriting.
> > +        UI->U = 0;
> > +      }
> > +    }
> >
> > -  uint64_t VecSize = TD.getTypeSizeInBits(Ty);
> > -  uint64_t ElementSize = Ty->getScalarSizeInBits();
> > +    Value *TV = SI.getTrueValue();
> > +    Value *FV = SI.getFalseValue();
> > +    // Replace the loads of the select with a select of two loads.
> > +    while (!Loads.empty()) {
> > +      LoadInst *LI = Loads.pop_back_val();
> >
> > -  // While the definition of LLVM vectors is bitpacked, we don't
> support sizes
> > -  // that aren't byte sized.
> > -  if (ElementSize % 8)
> > -    return false;
> > -  assert((VecSize % 8) == 0 && "vector size not a multiple of element
> size?");
> > -  VecSize /= 8;
> > -  ElementSize /= 8;
> > +      IRB.SetInsertPoint(LI);
> > +      LoadInst *TL =
> > +        IRB.CreateLoad(TV, LI->getName() + ".sroa.speculate.load.true");
> > +      LoadInst *FL =
> > +        IRB.CreateLoad(FV, LI->getName() +
> ".sroa.speculate.load.false");
> > +      NumLoadsSpeculated += 2;
> >
> > -  for (; I != E; ++I) {
> > -    if (!I->U)
> > -      continue; // Skip dead use.
> > +      // Transfer alignment and TBAA info if present.
> > +      TL->setAlignment(LI->getAlignment());
> > +      FL->setAlignment(LI->getAlignment());
> > +      if (MDNode *Tag = LI->getMetadata(LLVMContext::MD_tbaa)) {
> > +        TL->setMetadata(LLVMContext::MD_tbaa, Tag);
> > +        FL->setMetadata(LLVMContext::MD_tbaa, Tag);
> > +      }
> >
> > -    uint64_t BeginOffset = I->BeginOffset - PartitionBeginOffset;
> > -    uint64_t BeginIndex = BeginOffset / ElementSize;
> > -    if (BeginIndex * ElementSize != BeginOffset ||
> > -        BeginIndex >= Ty->getNumElements())
> > -      return false;
> > -    uint64_t EndOffset = I->EndOffset - PartitionBeginOffset;
> > -    uint64_t EndIndex = EndOffset / ElementSize;
> > -    if (EndIndex * ElementSize != EndOffset ||
> > -        EndIndex > Ty->getNumElements())
> > -      return false;
> > -
> > -    // FIXME: We should build shuffle vector instructions to handle
> > -    // non-element-sized accesses.
> > -    if ((EndOffset - BeginOffset) != ElementSize &&
> > -        (EndOffset - BeginOffset) != VecSize)
> > -      return false;
> > +      Value *V = IRB.CreateSelect(SI.getCondition(), TL, FL,
> > +                                  LI->getName() + ".sroa.speculated");
> >
> > -    if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I->U->getUser())) {
> > -      if (MI->isVolatile())
> > -        return false;
> > -      if (MemTransferInst *MTI =
> dyn_cast<MemTransferInst>(I->U->getUser())) {
> > -        const AllocaPartitioning::MemTransferOffsets &MTO
> > -          = P.getMemTransferOffsets(*MTI);
> > -        if (!MTO.IsSplittable)
> > -          return false;
> > +      LoadInst *Loads[2] = { TL, FL };
> > +      for (unsigned i = 0, e = 2; i != e; ++i) {
> > +        if (PIs[i] != P.end()) {
> > +          Use *LoadUse = &Loads[i]->getOperandUse(0);
> > +          assert(PUs[i].U->get() == LoadUse->get());
> > +          PUs[i].U = LoadUse;
> > +          P.use_push_back(PIs[i], PUs[i]);
> > +        }
> >        }
> > -    } else if
> (I->U->get()->getType()->getPointerElementType()->isStructTy()) {
> > -      // Disable vector promotion when there are loads or stores of an
> FCA.
> > -      return false;
> > -    } else if (!isa<LoadInst>(I->U->getUser()) &&
> > -               !isa<StoreInst>(I->U->getUser())) {
> > -      return false;
> > +
> > +      DEBUG(dbgs() << "          speculated to: " << *V << "\n");
> > +      LI->replaceAllUsesWith(V);
> > +      Pass.DeadInsts.push_back(LI);
> >      }
> >    }
> > -  return true;
> > +};
> >  }
> >
> > -/// \brief Test whether the given alloca partition can be promoted to
> an int.
> > +/// \brief Accumulate the constant offsets in a GEP into a single APInt
> offset.
> >  ///
> > -/// This is a quick test to check whether we can rewrite a particular
> alloca
> > -/// partition (and its newly formed alloca) into an integer alloca
> suitable for
> > -/// promotion to an SSA value. We only can ensure this for a limited
> set of
> > -/// operations, and we don't want to do the rewrites unless we are
> confident
> > -/// that the result will be promotable, so we have an early test here.
> > -static bool isIntegerPromotionViable(const TargetData &TD,
> > -                                     Type *AllocaTy,
> > -                                     uint64_t AllocBeginOffset,
> > -                                     AllocaPartitioning &P,
> > -
> AllocaPartitioning::const_use_iterator I,
> > -
> AllocaPartitioning::const_use_iterator E) {
> > -  IntegerType *Ty = dyn_cast<IntegerType>(AllocaTy);
> > -  if (!Ty || 8*TD.getTypeStoreSize(Ty) != Ty->getBitWidth())
> > -    return false;
> > -
> > -  // Check the uses to ensure the uses are (likely) promoteable integer
> uses.
> > -  // Also ensure that the alloca has a covering load or store. We don't
> want
> > -  // promote because of some other unsplittable entry (which we may make
> > -  // splittable later) and lose the ability to promote each element
> access.
> > -  bool WholeAllocaOp = false;
> > -  for (; I != E; ++I) {
> > -    if (!I->U)
> > -      continue; // Skip dead use.
> > -
> > -    // We can't reasonably handle cases where the load or store extends
> past
> > -    // the end of the aloca's type and into its padding.
> > -    if ((I->EndOffset - AllocBeginOffset) > TD.getTypeStoreSize(Ty))
> > +/// If the provided GEP is all-constant, the total byte offset formed
> by the
> > +/// GEP is computed and Offset is set to it. If the GEP has any
> non-constant
> > +/// operands, the function returns false and the value of Offset is
> unmodified.
> > +static bool accumulateGEPOffsets(const TargetData &TD, GEPOperator &GEP,
> > +                                 APInt &Offset) {
> > +  APInt GEPOffset(Offset.getBitWidth(), 0);
> > +  for (gep_type_iterator GTI = gep_type_begin(GEP), GTE =
> gep_type_end(GEP);
> > +       GTI != GTE; ++GTI) {
> > +    ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
> > +    if (!OpC)
> >        return false;
> > +    if (OpC->isZero()) continue;
> >
> > -    if (LoadInst *LI = dyn_cast<LoadInst>(I->U->getUser())) {
> > -      if (LI->isVolatile() || !LI->getType()->isIntegerTy())
> > -        return false;
> > -      if (LI->getType() == Ty)
> > -        WholeAllocaOp = true;
> > -    } else if (StoreInst *SI = dyn_cast<StoreInst>(I->U->getUser())) {
> > -      if (SI->isVolatile() ||
> !SI->getValueOperand()->getType()->isIntegerTy())
> > -        return false;
> > -      if (SI->getValueOperand()->getType() == Ty)
> > -        WholeAllocaOp = true;
> > -    } else if (MemIntrinsic *MI =
> dyn_cast<MemIntrinsic>(I->U->getUser())) {
> > -      if (MI->isVolatile())
> > -        return false;
> > -      if (MemTransferInst *MTI =
> dyn_cast<MemTransferInst>(I->U->getUser())) {
> > -        const AllocaPartitioning::MemTransferOffsets &MTO
> > -          = P.getMemTransferOffsets(*MTI);
> > -        if (!MTO.IsSplittable)
> > -          return false;
> > -      }
> > -    } else {
> > -      return false;
> > +    // Handle a struct index, which adds its field offset to the
> pointer.
> > +    if (StructType *STy = dyn_cast<StructType>(*GTI)) {
> > +      unsigned ElementIdx = OpC->getZExtValue();
> > +      const StructLayout *SL = TD.getStructLayout(STy);
> > +      GEPOffset += APInt(Offset.getBitWidth(),
> > +                         SL->getElementOffset(ElementIdx));
> > +      continue;
> >      }
> > +
> > +    APInt TypeSize(Offset.getBitWidth(),
> > +                   TD.getTypeAllocSize(GTI.getIndexedType()));
> > +    if (VectorType *VTy = dyn_cast<VectorType>(*GTI)) {
> > +      assert((VTy->getScalarSizeInBits() % 8) == 0 &&
> > +             "vector element size is not a multiple of 8, cannot GEP
> over it");
> > +      TypeSize = VTy->getScalarSizeInBits() / 8;
> > +    }
> > +
> > +    GEPOffset += OpC->getValue().sextOrTrunc(Offset.getBitWidth()) *
> TypeSize;
> >    }
> > -  return WholeAllocaOp;
> > +  Offset = GEPOffset;
> > +  return true;
> >  }
> >
> > -namespace {
> > -/// \brief Visitor to speculate PHIs and Selects where possible.
> > -class PHIOrSelectSpeculator : public InstVisitor<PHIOrSelectSpeculator>
> {
> > -  // Befriend the base class so it can delegate to private visit
> methods.
> > -  friend class llvm::InstVisitor<PHIOrSelectSpeculator>;
> > +/// \brief Build a GEP out of a base pointer and indices.
> > +///
> > +/// This will return the BasePtr if that is valid, or build a new GEP
> > +/// instruction using the IRBuilder if GEP-ing is needed.
> > +static Value *buildGEP(IRBuilder<> &IRB, Value *BasePtr,
> > +                       SmallVectorImpl<Value *> &Indices,
> > +                       const Twine &Prefix) {
> > +  if (Indices.empty())
> > +    return BasePtr;
> >
> > -  const TargetData &TD;
> > -  AllocaPartitioning &P;
> > -  SROA &Pass;
> > +  // A single zero index is a no-op, so check for this and avoid
> building a GEP
> > +  // in that case.
> > +  if (Indices.size() == 1 &&
> cast<ConstantInt>(Indices.back())->isZero())
> > +    return BasePtr;
> >
> > -public:
> > -  PHIOrSelectSpeculator(const TargetData &TD, AllocaPartitioning &P,
> SROA &Pass)
> > -    : TD(TD), P(P), Pass(Pass) {}
> > +  return IRB.CreateInBoundsGEP(BasePtr, Indices, Prefix + ".idx");
> > +}
> >
> > -  /// \brief Visit the users of an alloca partition and rewrite them.
> > -  void visitUsers(AllocaPartitioning::const_iterator PI) {
> > -    // Note that we need to use an index here as the underlying vector
> of uses
> > -    // may be grown during speculation. However, we never need to
> re-visit the
> > -    // new uses, and so we can use the initial size bound.
> > -    for (unsigned Idx = 0, Size = P.use_size(PI); Idx != Size; ++Idx) {
> > -      const AllocaPartitioning::PartitionUse &PU = P.getUse(PI, Idx);
> > -      if (!PU.U)
> > -        continue; // Skip dead use.
> > +/// \brief Get a natural GEP off of the BasePtr walking through Ty
> toward
> > +/// TargetTy without changing the offset of the pointer.
> > +///
> > +/// This routine assumes we've already established a properly offset
> GEP with
> > +/// Indices, and arrived at the Ty type. The goal is to continue to GEP
> with
> > +/// zero-indices down through type layers until we find one the same as
> > +/// TargetTy. If we can't find one with the same type, we at least try
> to use
> > +/// one with the same size. If none of that works, we just produce the
> GEP as
> > +/// indicated by Indices to have the correct offset.
> > +static Value *getNaturalGEPWithType(IRBuilder<> &IRB, const TargetData
> &TD,
> > +                                    Value *BasePtr, Type *Ty, Type
> *TargetTy,
> > +                                    SmallVectorImpl<Value *> &Indices,
> > +                                    const Twine &Prefix) {
> > +  if (Ty == TargetTy)
> > +    return buildGEP(IRB, BasePtr, Indices, Prefix);
> >
> > -      visit(cast<Instruction>(PU.U->getUser()));
> > +  // See if we can descend into a struct and locate a field with the
> correct
> > +  // type.
> > +  unsigned NumLayers = 0;
> > +  Type *ElementTy = Ty;
> > +  do {
> > +    if (ElementTy->isPointerTy())
> > +      break;
> > +    if (SequentialType *SeqTy = dyn_cast<SequentialType>(ElementTy)) {
> > +      ElementTy = SeqTy->getElementType();
> > +      Indices.push_back(IRB.getInt(APInt(TD.getPointerSizeInBits(),
> 0)));
> > +    } else if (StructType *STy = dyn_cast<StructType>(ElementTy)) {
> > +      ElementTy = *STy->element_begin();
> > +      Indices.push_back(IRB.getInt32(0));
> > +    } else {
> > +      break;
> >      }
> > -  }
> > +    ++NumLayers;
> > +  } while (ElementTy != TargetTy);
> > +  if (ElementTy != TargetTy)
> > +    Indices.erase(Indices.end() - NumLayers, Indices.end());
> >
> > -private:
> > -  // By default, skip this instruction.
> > -  void visitInstruction(Instruction &I) {}
> > +  return buildGEP(IRB, BasePtr, Indices, Prefix);
> > +}
> >
> > -  /// PHI instructions that use an alloca and are subsequently loaded
> can be
> > -  /// rewritten to load both input pointers in the pred blocks and then
> PHI the
> > -  /// results, allowing the load of the alloca to be promoted.
> > -  /// From this:
> > -  ///   %P2 = phi [i32* %Alloca, i32* %Other]
> > -  ///   %V = load i32* %P2
> > -  /// to:
> > -  ///   %V1 = load i32* %Alloca      -> will be mem2reg'd
> > -  ///   ...
> > -  ///   %V2 = load i32* %Other
> > -  ///   ...
> > -  ///   %V = phi [i32 %V1, i32 %V2]
> > -  ///
> > -  /// We can do this to a select if its only uses are loads and if the
> operands
> > -  /// to the select can be loaded unconditionally.
> > -  ///
> > -  /// FIXME: This should be hoisted into a generic utility, likely in
> > -  /// Transforms/Util/Local.h
> > -  bool isSafePHIToSpeculate(PHINode &PN, SmallVectorImpl<LoadInst *>
> &Loads) {
> > -    // For now, we can only do this promotion if the load is in the
> same block
> > -    // as the PHI, and if there are no stores between the phi and load.
> > -    // TODO: Allow recursive phi users.
> > -    // TODO: Allow stores.
> > -    BasicBlock *BB = PN.getParent();
> > -    unsigned MaxAlign = 0;
> > -    for (Value::use_iterator UI = PN.use_begin(), UE = PN.use_end();
> > -         UI != UE; ++UI) {
> > -      LoadInst *LI = dyn_cast<LoadInst>(*UI);
> > -      if (LI == 0 || !LI->isSimple()) return false;
> > +/// \brief Recursively compute indices for a natural GEP.
> > +///
> > +/// This is the recursive step for getNaturalGEPWithOffset that walks
> down the
> > +/// element types adding appropriate indices for the GEP.
> > +static Value *getNaturalGEPRecursively(IRBuilder<> &IRB, const
> TargetData &TD,
> > +                                       Value *Ptr, Type *Ty, APInt
> &Offset,
> > +                                       Type *TargetTy,
> > +                                       SmallVectorImpl<Value *>
> &Indices,
> > +                                       const Twine &Prefix) {
> > +  if (Offset == 0)
> > +    return getNaturalGEPWithType(IRB, TD, Ptr, Ty, TargetTy, Indices,
> Prefix);
> >
> > -      // For now we only allow loads in the same block as the PHI.
>  This is
> > -      // a common case that happens when instcombine merges two loads
> through
> > -      // a PHI.
> > -      if (LI->getParent() != BB) return false;
> > +  // We can't recurse through pointer types.
> > +  if (Ty->isPointerTy())
> > +    return 0;
> >
> > -      // Ensure that there are no instructions between the PHI and the
> load that
> > -      // could store.
> > -      for (BasicBlock::iterator BBI = &PN; &*BBI != LI; ++BBI)
> > -        if (BBI->mayWriteToMemory())
> > -          return false;
> > +  // We try to analyze GEPs over vectors here, but note that these GEPs
> are
> > +  // extremely poorly defined currently. The long-term goal is to
> remove GEPing
> > +  // over a vector from the IR completely.
> > +  if (VectorType *VecTy = dyn_cast<VectorType>(Ty)) {
> > +    unsigned ElementSizeInBits = VecTy->getScalarSizeInBits();
> > +    if (ElementSizeInBits % 8)
> > +      return 0; // GEPs over non-multiple of 8 size vector elements are
> invalid.
> > +    APInt ElementSize(Offset.getBitWidth(), ElementSizeInBits / 8);
> > +    APInt NumSkippedElements = Offset.udiv(ElementSize);
> > +    if (NumSkippedElements.ugt(VecTy->getNumElements()))
> > +      return 0;
> > +    Offset -= NumSkippedElements * ElementSize;
> > +    Indices.push_back(IRB.getInt(NumSkippedElements));
> > +    return getNaturalGEPRecursively(IRB, TD, Ptr,
> VecTy->getElementType(),
> > +                                    Offset, TargetTy, Indices, Prefix);
> > +  }
> >
> > -      MaxAlign = std::max(MaxAlign, LI->getAlignment());
> > -      Loads.push_back(LI);
> > -    }
> > +  if (ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) {
> > +    Type *ElementTy = ArrTy->getElementType();
> > +    APInt ElementSize(Offset.getBitWidth(),
> TD.getTypeAllocSize(ElementTy));
> > +    APInt NumSkippedElements = Offset.udiv(ElementSize);
> > +    if (NumSkippedElements.ugt(ArrTy->getNumElements()))
> > +      return 0;
> >
> > -    // We can only transform this if it is safe to push the loads into
> the
> > -    // predecessor blocks. The only thing to watch out for is that we
> can't put
> > -    // a possibly trapping load in the predecessor if it is a critical
> edge.
> > -    for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num;
> > -         ++Idx) {
> > -      TerminatorInst *TI = PN.getIncomingBlock(Idx)->getTerminator();
> > -      Value *InVal = PN.getIncomingValue(Idx);
> > +    Offset -= NumSkippedElements * ElementSize;
> > +    Indices.push_back(IRB.getInt(NumSkippedElements));
> > +    return getNaturalGEPRecursively(IRB, TD, Ptr, ElementTy, Offset,
> TargetTy,
> > +                                    Indices, Prefix);
> > +  }
> >
> > -      // If the value is produced by the terminator of the predecessor
> (an
> > -      // invoke) or it has side-effects, there is no valid place to put
> a load
> > -      // in the predecessor.
> > -      if (TI == InVal || TI->mayHaveSideEffects())
> > -        return false;
> > +  StructType *STy = dyn_cast<StructType>(Ty);
> > +  if (!STy)
> > +    return 0;
> >
> > -      // If the predecessor has a single successor, then the edge isn't
> > -      // critical.
> > -      if (TI->getNumSuccessors() == 1)
> > -        continue;
> > +  const StructLayout *SL = TD.getStructLayout(STy);
> > +  uint64_t StructOffset = Offset.getZExtValue();
> > +  if (StructOffset >= SL->getSizeInBytes())
> > +    return 0;
> > +  unsigned Index = SL->getElementContainingOffset(StructOffset);
> > +  Offset -= APInt(Offset.getBitWidth(), SL->getElementOffset(Index));
> > +  Type *ElementTy = STy->getElementType(Index);
> > +  if (Offset.uge(TD.getTypeAllocSize(ElementTy)))
> > +    return 0; // The offset points into alignment padding.
> >
> > -      // If this pointer is always safe to load, or if we can prove
> that there
> > -      // is already a load in the block, then we can move the load to
> the pred
> > -      // block.
> > -      if (InVal->isDereferenceablePointer() ||
> > -          isSafeToLoadUnconditionally(InVal, TI, MaxAlign, &TD))
> > -        continue;
> > +  Indices.push_back(IRB.getInt32(Index));
> > +  return getNaturalGEPRecursively(IRB, TD, Ptr, ElementTy, Offset,
> TargetTy,
> > +                                  Indices, Prefix);
> > +}
> >
> > -      return false;
> > -    }
> > +/// \brief Get a natural GEP from a base pointer to a particular offset
> and
> > +/// resulting in a particular type.
> > +///
> > +/// The goal is to produce a "natural" looking GEP that works with the
> existing
> > +/// composite types to arrive at the appropriate offset and element
> type for
> > +/// a pointer. TargetTy is the element type the returned GEP should
> point-to if
> > +/// possible. We recurse by decreasing Offset, adding the appropriate
> index to
> > +/// Indices, and setting Ty to the result subtype.
> > +///
> > +/// If no natural GEP can be constructed, this function returns null.
> > +static Value *getNaturalGEPWithOffset(IRBuilder<> &IRB, const
> TargetData &TD,
> > +                                      Value *Ptr, APInt Offset, Type
> *TargetTy,
> > +                                      SmallVectorImpl<Value *> &Indices,
> > +                                      const Twine &Prefix) {
> > +  PointerType *Ty = cast<PointerType>(Ptr->getType());
> >
> > -    return true;
> > -  }
> > +  // Don't consider any GEPs through an i8* as natural unless the
> TargetTy is
> > +  // an i8.
> > +  if (Ty == IRB.getInt8PtrTy() && TargetTy->isIntegerTy(8))
> > +    return 0;
> >
> > -  void visitPHINode(PHINode &PN) {
> > -    DEBUG(dbgs() << "    original: " << PN << "\n");
> > +  Type *ElementTy = Ty->getElementType();
> > +  if (!ElementTy->isSized())
> > +    return 0; // We can't GEP through an unsized element.
> > +  APInt ElementSize(Offset.getBitWidth(),
> TD.getTypeAllocSize(ElementTy));
> > +  if (ElementSize == 0)
> > +    return 0; // Zero-length arrays can't help us build a natural GEP.
> > +  APInt NumSkippedElements = Offset.udiv(ElementSize);
> >
> > -    SmallVector<LoadInst *, 4> Loads;
> > -    if (!isSafePHIToSpeculate(PN, Loads))
> > -      return;
> > +  Offset -= NumSkippedElements * ElementSize;
> > +  Indices.push_back(IRB.getInt(NumSkippedElements));
> > +  return getNaturalGEPRecursively(IRB, TD, Ptr, ElementTy, Offset,
> TargetTy,
> > +                                  Indices, Prefix);
> > +}
> >
> > -    assert(!Loads.empty());
> > +/// \brief Compute an adjusted pointer from Ptr by Offset bytes where
> the
> > +/// resulting pointer has PointerTy.
> > +///
> > +/// This tries very hard to compute a "natural" GEP which arrives at
> the offset
> > +/// and produces the pointer type desired. Where it cannot, it will try
> to use
> > +/// the natural GEP to arrive at the offset and bitcast to the type.
> Where that
> > +/// fails, it will try to use an existing i8* and GEP to the byte
> offset and
> > +/// bitcast to the type.
> > +///
> > +/// The strategy for finding the more natural GEPs is to peel off
> layers of the
> > +/// pointer, walking back through bit casts and GEPs, searching for a
> base
> > +/// pointer from which we can compute a natural GEP with the desired
> > +/// properities. The algorithm tries to fold as many constant indices
> into
> > +/// a single GEP as possible, thus making each GEP more independent of
> the
> > +/// surrounding code.
> > +static Value *getAdjustedPtr(IRBuilder<> &IRB, const TargetData &TD,
> > +                             Value *Ptr, APInt Offset, Type *PointerTy,
> > +                             const Twine &Prefix) {
> > +  // Even though we don't look through PHI nodes, we could be called on
> an
> > +  // instruction in an unreachable block, which may be on a cycle.
> > +  SmallPtrSet<Value *, 4> Visited;
> > +  Visited.insert(Ptr);
> > +  SmallVector<Value *, 4> Indices;
> >
> > -    Type *LoadTy = cast<PointerType>(PN.getType())->getElementType();
> > -    IRBuilder<> PHIBuilder(&PN);
> > -    PHINode *NewPN = PHIBuilder.CreatePHI(LoadTy,
> PN.getNumIncomingValues(),
> > -                                          PN.getName() +
> ".sroa.speculated");
> > +  // We may end up computing an offset pointer that has the wrong type.
> If we
> > +  // never are able to compute one directly that has the correct type,
> we'll
> > +  // fall back to it, so keep it around here.
> > +  Value *OffsetPtr = 0;
> >
> > -    // Get the TBAA tag and alignment to use from one of the loads.  It
> doesn't
> > -    // matter which one we get and if any differ, it doesn't matter.
> > -    LoadInst *SomeLoad = cast<LoadInst>(Loads.back());
> > -    MDNode *TBAATag = SomeLoad->getMetadata(LLVMContext::MD_tbaa);
> > -    unsigned Align = SomeLoad->getAlignment();
> > +  // Remember any i8 pointer we come across to re-use if we need to do
> a raw
> > +  // byte offset.
> > +  Value *Int8Ptr = 0;
> > +  APInt Int8PtrOffset(Offset.getBitWidth(), 0);
> >
> > -    // Rewrite all loads of the PN to use the new PHI.
> > -    do {
> > -      LoadInst *LI = Loads.pop_back_val();
> > -      LI->replaceAllUsesWith(NewPN);
> > -      Pass.DeadInsts.push_back(LI);
> > -    } while (!Loads.empty());
> > +  Type *TargetTy = PointerTy->getPointerElementType();
> >
> > -    // Inject loads into all of the pred blocks.
> > -    for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num;
> ++Idx) {
> > -      BasicBlock *Pred = PN.getIncomingBlock(Idx);
> > -      TerminatorInst *TI = Pred->getTerminator();
> > -      Use *InUse =
> &PN.getOperandUse(PN.getOperandNumForIncomingValue(Idx));
> > -      Value *InVal = PN.getIncomingValue(Idx);
> > -      IRBuilder<> PredBuilder(TI);
> > +  do {
> > +    // First fold any existing GEPs into the offset.
> > +    while (GEPOperator *GEP = dyn_cast<GEPOperator>(Ptr)) {
> > +      APInt GEPOffset(Offset.getBitWidth(), 0);
> > +      if (!accumulateGEPOffsets(TD, *GEP, GEPOffset))
> > +        break;
> > +      Offset += GEPOffset;
> > +      Ptr = GEP->getPointerOperand();
> > +      if (!Visited.insert(Ptr))
> > +        break;
> > +    }
> >
> > -      LoadInst *Load
> > -        = PredBuilder.CreateLoad(InVal, (PN.getName() +
> ".sroa.speculate.load." +
> > -                                         Pred->getName()));
> > -      ++NumLoadsSpeculated;
> > -      Load->setAlignment(Align);
> > -      if (TBAATag)
> > -        Load->setMetadata(LLVMContext::MD_tbaa, TBAATag);
> > -      NewPN->addIncoming(Load, Pred);
> > +    // See if we can perform a natural GEP here.
> > +    Indices.clear();
> > +    if (Value *P = getNaturalGEPWithOffset(IRB, TD, Ptr, Offset,
> TargetTy,
> > +                                           Indices, Prefix)) {
> > +      if (P->getType() == PointerTy) {
> > +        // Zap any offset pointer that we ended up computing in
> previous rounds.
> > +        if (OffsetPtr && OffsetPtr->use_empty())
> > +          if (Instruction *I = dyn_cast<Instruction>(OffsetPtr))
> > +            I->eraseFromParent();
> > +        return P;
> > +      }
> > +      if (!OffsetPtr) {
> > +        OffsetPtr = P;
> > +      }
> > +    }
> >
> > -      Instruction *Ptr = dyn_cast<Instruction>(InVal);
> > -      if (!Ptr)
> > -        // No uses to rewrite.
> > -        continue;
> > +    // Stash this pointer if we've found an i8*.
> > +    if (Ptr->getType()->isIntegerTy(8)) {
> > +      Int8Ptr = Ptr;
> > +      Int8PtrOffset = Offset;
> > +    }
> >
> > -      // Try to lookup and rewrite any partition uses corresponding to
> this phi
> > -      // input.
> > -      AllocaPartitioning::iterator PI
> > -        = P.findPartitionForPHIOrSelectOperand(InUse);
> > -      if (PI == P.end())
> > -        continue;
> > +    // Peel off a layer of the pointer and update the offset
> appropriately.
> > +    if (Operator::getOpcode(Ptr) == Instruction::BitCast) {
> > +      Ptr = cast<Operator>(Ptr)->getOperand(0);
> > +    } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(Ptr)) {
> > +      if (GA->mayBeOverridden())
> > +        break;
> > +      Ptr = GA->getAliasee();
> > +    } else {
> > +      break;
> > +    }
> > +    assert(Ptr->getType()->isPointerTy() && "Unexpected operand type!");
> > +  } while (Visited.insert(Ptr));
> >
> > -      // Replace the Use in the PartitionUse for this operand with the
> Use
> > -      // inside the load.
> > -      AllocaPartitioning::use_iterator UI
> > -        = P.findPartitionUseForPHIOrSelectOperand(InUse);
> > -      assert(isa<PHINode>(*UI->U->getUser()));
> > -      UI->U = &Load->getOperandUse(Load->getPointerOperandIndex());
> > +  if (!OffsetPtr) {
> > +    if (!Int8Ptr) {
> > +      Int8Ptr = IRB.CreateBitCast(Ptr, IRB.getInt8PtrTy(),
> > +                                  Prefix + ".raw_cast");
> > +      Int8PtrOffset = Offset;
> >      }
> > -    DEBUG(dbgs() << "          speculated to: " << *NewPN << "\n");
> > +
> > +    OffsetPtr = Int8PtrOffset == 0 ? Int8Ptr :
> > +      IRB.CreateInBoundsGEP(Int8Ptr, IRB.getInt(Int8PtrOffset),
> > +                            Prefix + ".raw_idx");
> >    }
> > +  Ptr = OffsetPtr;
> >
> > -  /// Select instructions that use an alloca and are subsequently
> loaded can be
> > -  /// rewritten to load both input pointers and then select between the
> result,
> > -  /// allowing the load of the alloca to be promoted.
> > -  /// From this:
> > -  ///   %P2 = select i1 %cond, i32* %Alloca, i32* %Other
> > -  ///   %V = load i32* %P2
> > -  /// to:
> > -  ///   %V1 = load i32* %Alloca      -> will be mem2reg'd
> > -  ///   %V2 = load i32* %Other
> > -  ///   %V = select i1 %cond, i32 %V1, i32 %V2
> > -  ///
> > -  /// We can do this to a select if its only uses are loads and if the
> operand
> > -  /// to the select can be loaded unconditionally.
> > -  bool isSafeSelectToSpeculate(SelectInst &SI,
> > -                               SmallVectorImpl<LoadInst *> &Loads) {
> > -    Value *TValue = SI.getTrueValue();
> > -    Value *FValue = SI.getFalseValue();
> > -    bool TDerefable = TValue->isDereferenceablePointer();
> > -    bool FDerefable = FValue->isDereferenceablePointer();
> > +  // On the off chance we were targeting i8*, guard the bitcast here.
> > +  if (Ptr->getType() != PointerTy)
> > +    Ptr = IRB.CreateBitCast(Ptr, PointerTy, Prefix + ".cast");
> >
> > -    for (Value::use_iterator UI = SI.use_begin(), UE = SI.use_end();
> > -         UI != UE; ++UI) {
> > -      LoadInst *LI = dyn_cast<LoadInst>(*UI);
> > -      if (LI == 0 || !LI->isSimple()) return false;
> > +  return Ptr;
> > +}
> >
> > -      // Both operands to the select need to be dereferencable, either
> > -      // absolutely (e.g. allocas) or at this point because we can see
> other
> > -      // accesses to it.
> > -      if (!TDerefable && !isSafeToLoadUnconditionally(TValue, LI,
> > -
>  LI->getAlignment(), &TD))
> > -        return false;
> > -      if (!FDerefable && !isSafeToLoadUnconditionally(FValue, LI,
> > -
>  LI->getAlignment(), &TD))
> > -        return false;
> > -      Loads.push_back(LI);
> > -    }
> > +/// \brief Test whether the given alloca partition can be promoted to a
> vector.
> > +///
> > +/// This is a quick test to check whether we can rewrite a particular
> alloca
> > +/// partition (and its newly formed alloca) into a vector alloca with
> only
> > +/// whole-vector loads and stores such that it could be promoted to a
> vector
> > +/// SSA value. We only can ensure this for a limited set of operations,
> and we
> > +/// don't want to do the rewrites unless we are confident that the
> result will
> > +/// be promotable, so we have an early test here.
> > +static bool isVectorPromotionViable(const TargetData &TD,
> > +                                    Type *AllocaTy,
> > +                                    AllocaPartitioning &P,
> > +                                    uint64_t PartitionBeginOffset,
> > +                                    uint64_t PartitionEndOffset,
> > +
>  AllocaPartitioning::const_use_iterator I,
> > +
>  AllocaPartitioning::const_use_iterator E) {
> > +  VectorType *Ty = dyn_cast<VectorType>(AllocaTy);
> > +  if (!Ty)
> > +    return false;
> >
> > -    return true;
> > -  }
> > +  uint64_t VecSize = TD.getTypeSizeInBits(Ty);
> > +  uint64_t ElementSize = Ty->getScalarSizeInBits();
> >
> > -  void visitSelectInst(SelectInst &SI) {
> > -    DEBUG(dbgs() << "    original: " << SI << "\n");
> > -    IRBuilder<> IRB(&SI);
> > +  // While the definition of LLVM vectors is bitpacked, we don't
> support sizes
> > +  // that aren't byte sized.
> > +  if (ElementSize % 8)
> > +    return false;
> > +  assert((VecSize % 8) == 0 && "vector size not a multiple of element
> size?");
> > +  VecSize /= 8;
> > +  ElementSize /= 8;
> >
> > -    // If the select isn't safe to speculate, just use simple logic to
> emit it.
> > -    SmallVector<LoadInst *, 4> Loads;
> > -    if (!isSafeSelectToSpeculate(SI, Loads))
> > -      return;
> > +  for (; I != E; ++I) {
> > +    if (!I->U)
> > +      continue; // Skip dead use.
> >
> > -    Use *Ops[2] = { &SI.getOperandUse(1), &SI.getOperandUse(2) };
> > -    AllocaPartitioning::iterator PIs[2];
> > -    AllocaPartitioning::PartitionUse PUs[2];
> > -    for (unsigned i = 0, e = 2; i != e; ++i) {
> > -      PIs[i] = P.findPartitionForPHIOrSelectOperand(Ops[i]);
> > -      if (PIs[i] != P.end()) {
> > -        // If the pointer is within the partitioning, remove the select
> from
> > -        // its uses. We'll add in the new loads below.
> > -        AllocaPartitioning::use_iterator UI
> > -          = P.findPartitionUseForPHIOrSelectOperand(Ops[i]);
> > -        PUs[i] = *UI;
> > -        // Clear out the use here so that the offsets into the use list
> remain
> > -        // stable but this use is ignored when rewriting.
> > -        UI->U = 0;
> > +    uint64_t BeginOffset = I->BeginOffset - PartitionBeginOffset;
> > +    uint64_t BeginIndex = BeginOffset / ElementSize;
> > +    if (BeginIndex * ElementSize != BeginOffset ||
> > +        BeginIndex >= Ty->getNumElements())
> > +      return false;
> > +    uint64_t EndOffset = I->EndOffset - PartitionBeginOffset;
> > +    uint64_t EndIndex = EndOffset / ElementSize;
> > +    if (EndIndex * ElementSize != EndOffset ||
> > +        EndIndex > Ty->getNumElements())
> > +      return false;
> > +
> > +    // FIXME: We should build shuffle vector instructions to handle
> > +    // non-element-sized accesses.
> > +    if ((EndOffset - BeginOffset) != ElementSize &&
> > +        (EndOffset - BeginOffset) != VecSize)
> > +      return false;
> > +
> > +    if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I->U->getUser())) {
> > +      if (MI->isVolatile())
> > +        return false;
> > +      if (MemTransferInst *MTI =
> dyn_cast<MemTransferInst>(I->U->getUser())) {
> > +        const AllocaPartitioning::MemTransferOffsets &MTO
> > +          = P.getMemTransferOffsets(*MTI);
> > +        if (!MTO.IsSplittable)
> > +          return false;
> >        }
> > +    } else if
> (I->U->get()->getType()->getPointerElementType()->isStructTy()) {
> > +      // Disable vector promotion when there are loads or stores of an
> FCA.
> > +      return false;
> > +    } else if (!isa<LoadInst>(I->U->getUser()) &&
> > +               !isa<StoreInst>(I->U->getUser())) {
> > +      return false;
> >      }
> > +  }
> > +  return true;
> > +}
> >
> > -    Value *TV = SI.getTrueValue();
> > -    Value *FV = SI.getFalseValue();
> > -    // Replace the loads of the select with a select of two loads.
> > -    while (!Loads.empty()) {
> > -      LoadInst *LI = Loads.pop_back_val();
> > -
> > -      IRB.SetInsertPoint(LI);
> > -      LoadInst *TL =
> > -        IRB.CreateLoad(TV, LI->getName() + ".sroa.speculate.load.true");
> > -      LoadInst *FL =
> > -        IRB.CreateLoad(FV, LI->getName() +
> ".sroa.speculate.load.false");
> > -      NumLoadsSpeculated += 2;
> > +/// \brief Test whether the given alloca partition can be promoted to
> an int.
> > +///
> > +/// This is a quick test to check whether we can rewrite a particular
> alloca
> > +/// partition (and its newly formed alloca) into an integer alloca
> suitable for
> > +/// promotion to an SSA value. We only can ensure this for a limited
> set of
> > +/// operations, and we don't want to do the rewrites unless we are
> confident
> > +/// that the result will be promotable, so we have an early test here.
> > +static bool isIntegerPromotionViable(const TargetData &TD,
> > +                                     Type *AllocaTy,
> > +                                     uint64_t AllocBeginOffset,
> > +                                     AllocaPartitioning &P,
> > +
> AllocaPartitioning::const_use_iterator I,
> > +
> AllocaPartitioning::const_use_iterator E) {
> > +  IntegerType *Ty = dyn_cast<IntegerType>(AllocaTy);
> > +  if (!Ty || 8*TD.getTypeStoreSize(Ty) != Ty->getBitWidth())
> > +    return false;
> >
> > -      // Transfer alignment and TBAA info if present.
> > -      TL->setAlignment(LI->getAlignment());
> > -      FL->setAlignment(LI->getAlignment());
> > -      if (MDNode *Tag = LI->getMetadata(LLVMContext::MD_tbaa)) {
> > -        TL->setMetadata(LLVMContext::MD_tbaa, Tag);
> > -        FL->setMetadata(LLVMContext::MD_tbaa, Tag);
> > -      }
> > +  // Check the uses to ensure the uses are (likely) promoteable integer
> uses.
> > +  // Also ensure that the alloca has a covering load or store. We don't
> want
> > +  // promote because of some other unsplittable entry (which we may make
> > +  // splittable later) and lose the ability to promote each element
> access.
> > +  bool WholeAllocaOp = false;
> > +  for (; I != E; ++I) {
> > +    if (!I->U)
> > +      continue; // Skip dead use.
> >
> > -      Value *V = IRB.CreateSelect(SI.getCondition(), TL, FL,
> > -                                  LI->getName() + ".sroa.speculated");
> > +    // We can't reasonably handle cases where the load or store extends
> past
> > +    // the end of the aloca's type and into its padding.
> > +    if ((I->EndOffset - AllocBeginOffset) > TD.getTypeStoreSize(Ty))
> > +      return false;
> >
> > -      LoadInst *Loads[2] = { TL, FL };
> > -      for (unsigned i = 0, e = 2; i != e; ++i) {
> > -        if (PIs[i] != P.end()) {
> > -          Use *LoadUse = &Loads[i]->getOperandUse(0);
> > -          assert(PUs[i].U->get() == LoadUse->get());
> > -          PUs[i].U = LoadUse;
> > -          P.use_push_back(PIs[i], PUs[i]);
> > -        }
> > +    if (LoadInst *LI = dyn_cast<LoadInst>(I->U->getUser())) {
> > +      if (LI->isVolatile() || !LI->getType()->isIntegerTy())
> > +        return false;
> > +      if (LI->getType() == Ty)
> > +        WholeAllocaOp = true;
> > +    } else if (StoreInst *SI = dyn_cast<StoreInst>(I->U->getUser())) {
> > +      if (SI->isVolatile() ||
> !SI->getValueOperand()->getType()->isIntegerTy())
> > +        return false;
> > +      if (SI->getValueOperand()->getType() == Ty)
> > +        WholeAllocaOp = true;
> > +    } else if (MemIntrinsic *MI =
> dyn_cast<MemIntrinsic>(I->U->getUser())) {
> > +      if (MI->isVolatile())
> > +        return false;
> > +      if (MemTransferInst *MTI =
> dyn_cast<MemTransferInst>(I->U->getUser())) {
> > +        const AllocaPartitioning::MemTransferOffsets &MTO
> > +          = P.getMemTransferOffsets(*MTI);
> > +        if (!MTO.IsSplittable)
> > +          return false;
> >        }
> > -
> > -      DEBUG(dbgs() << "          speculated to: " << *V << "\n");
> > -      LI->replaceAllUsesWith(V);
> > -      Pass.DeadInsts.push_back(LI);
> > +    } else {
> > +      return false;
> >      }
> >    }
> > -};
> > +  return WholeAllocaOp;
> > +}
> >
> > +namespace {
> >  /// \brief Visitor to rewrite instructions using a partition of an
> alloca to
> >  /// use a new alloca.
> >  ///
> >
> >
> > _______________________________________________
> > llvm-commits mailing list
> > llvm-commits at cs.uiuc.edu
> > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20121004/beef8e3d/attachment.html>


More information about the llvm-commits mailing list