On Thu, Oct 4, 2012 at 6:52 PM, Sean Silva <span dir="ltr"><<a href="mailto:silvas@purdue.edu" target="_blank" class="cremed">silvas@purdue.edu</a>></span> wrote:<br><div class="gmail_extra"><div class="gmail_quote">

<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">protip: pass --patience or --histogram to git's diff-generating<br>

commands (git diff, git log -p, etc.) to select alternative diff<br>

algorithms.</blockquote><div><br></div><div>I'm well aware of this, but that does nothing for the commit mailing list which is what my commit log was written for...</div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">

 I just tried it out locally for this patch and the diff is<br>

dramatically better (the diff is a single big block of + and a single<br>

big block of -). FWIW, I find that patience and histogram are usually<br>

basically the same (histogram is an extension of patience), but either<br>

of them is usually significantly better than the default.<br>

<br>

More info about patience diff, for the curious:<br>

<a href="http://bramcohen.livejournal.com/73318.html" target="_blank" class="cremed">http://bramcohen.livejournal.com/73318.html</a><br>

More info about histogram diff:<br>

<a href="http://download.eclipse.org/jgit/docs/jgit-2.0.0.201206130900-r/apidocs/org/eclipse/jgit/diff/HistogramDiff.html" target="_blank" class="cremed">http://download.eclipse.org/jgit/docs/jgit-2.0.0.201206130900-r/apidocs/org/eclipse/jgit/diff/HistogramDiff.html</a><br>


<span class="HOEnZb"><font color="#888888"><br>

-- Sean Silva<br>

</font></span><div class="HOEnZb"><div class="h5"><br>

On Thu, Oct 4, 2012 at 9:29 PM, Chandler Carruth <<a href="mailto:chandlerc@gmail.com" class="cremed">chandlerc@gmail.com</a>> wrote:<br>

> Author: chandlerc<br>

> Date: Thu Oct  4 20:29:06 2012<br>

> New Revision: 165284<br>

><br>

> URL: <a href="http://llvm.org/viewvc/llvm-project?rev=165284&view=rev" target="_blank" class="cremed">http://llvm.org/viewvc/llvm-project?rev=165284&view=rev</a><br>

> Log:<br>

> Lift the speculation visitor above all the helpers that are targeted at<br>

> the rewrite visitor to make the fact that the speculation is completely<br>

> independent a bit more clear.<br>

><br>

> I promise that this is just a cut/paste of the one visitor and adding<br>

> the annonymous namespace wrappings. The diff may look completely<br>

> preposterous, it does in git for some reason.<br>

><br>

> Modified:<br>

>     llvm/trunk/lib/Transforms/Scalar/SROA.cpp<br>

><br>

> Modified: llvm/trunk/lib/Transforms/Scalar/SROA.cpp<br>

> URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/SROA.cpp?rev=165284&r1=165283&r2=165284&view=diff" target="_blank" class="cremed">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/SROA.cpp?rev=165284&r1=165283&r2=165284&view=diff</a><br>


> ==============================================================================<br>

> --- llvm/trunk/lib/Transforms/Scalar/SROA.cpp (original)<br>

> +++ llvm/trunk/lib/Transforms/Scalar/SROA.cpp Thu Oct  4 20:29:06 2012<br>

> @@ -1368,715 +1368,717 @@<br>

>  INITIALIZE_PASS_END(SROA, "sroa", "Scalar Replacement Of Aggregates",<br>

>                      false, false)<br>

><br>

> -/// \brief Accumulate the constant offsets in a GEP into a single APInt offset.<br>

> -///<br>

> -/// If the provided GEP is all-constant, the total byte offset formed by the<br>

> -/// GEP is computed and Offset is set to it. If the GEP has any non-constant<br>

> -/// operands, the function returns false and the value of Offset is unmodified.<br>

> -static bool accumulateGEPOffsets(const TargetData &TD, GEPOperator &GEP,<br>

> -                                 APInt &Offset) {<br>

> -  APInt GEPOffset(Offset.getBitWidth(), 0);<br>

> -  for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);<br>

> -       GTI != GTE; ++GTI) {<br>

> -    ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());<br>

> -    if (!OpC)<br>

> -      return false;<br>

> -    if (OpC->isZero()) continue;<br>

> +namespace {<br>

> +/// \brief Visitor to speculate PHIs and Selects where possible.<br>

> +class PHIOrSelectSpeculator : public InstVisitor<PHIOrSelectSpeculator> {<br>

> +  // Befriend the base class so it can delegate to private visit methods.<br>

> +  friend class llvm::InstVisitor<PHIOrSelectSpeculator>;<br>

><br>

> -    // Handle a struct index, which adds its field offset to the pointer.<br>

> -    if (StructType *STy = dyn_cast<StructType>(*GTI)) {<br>

> -      unsigned ElementIdx = OpC->getZExtValue();<br>

> -      const StructLayout *SL = TD.getStructLayout(STy);<br>

> -      GEPOffset += APInt(Offset.getBitWidth(),<br>

> -                         SL->getElementOffset(ElementIdx));<br>

> -      continue;<br>

> -    }<br>

> +  const TargetData &TD;<br>

> +  AllocaPartitioning &P;<br>

> +  SROA &Pass;<br>

><br>

> -    APInt TypeSize(Offset.getBitWidth(),<br>

> -                   TD.getTypeAllocSize(GTI.getIndexedType()));<br>

> -    if (VectorType *VTy = dyn_cast<VectorType>(*GTI)) {<br>

> -      assert((VTy->getScalarSizeInBits() % 8) == 0 &&<br>

> -             "vector element size is not a multiple of 8, cannot GEP over it");<br>

> -      TypeSize = VTy->getScalarSizeInBits() / 8;<br>

> -    }<br>

> +public:<br>

> +  PHIOrSelectSpeculator(const TargetData &TD, AllocaPartitioning &P, SROA &Pass)<br>

> +    : TD(TD), P(P), Pass(Pass) {}<br>

><br>

> -    GEPOffset += OpC->getValue().sextOrTrunc(Offset.getBitWidth()) * TypeSize;<br>

> +  /// \brief Visit the users of an alloca partition and rewrite them.<br>

> +  void visitUsers(AllocaPartitioning::const_iterator PI) {<br>

> +    // Note that we need to use an index here as the underlying vector of uses<br>

> +    // may be grown during speculation. However, we never need to re-visit the<br>

> +    // new uses, and so we can use the initial size bound.<br>

> +    for (unsigned Idx = 0, Size = P.use_size(PI); Idx != Size; ++Idx) {<br>

> +      const AllocaPartitioning::PartitionUse &PU = P.getUse(PI, Idx);<br>

> +      if (!PU.U)<br>

> +        continue; // Skip dead use.<br>

> +<br>

> +      visit(cast<Instruction>(PU.U->getUser()));<br>

> +    }<br>

>    }<br>

> -  Offset = GEPOffset;<br>

> -  return true;<br>

> -}<br>

><br>

> -/// \brief Build a GEP out of a base pointer and indices.<br>

> -///<br>

> -/// This will return the BasePtr if that is valid, or build a new GEP<br>

> -/// instruction using the IRBuilder if GEP-ing is needed.<br>

> -static Value *buildGEP(IRBuilder<> &IRB, Value *BasePtr,<br>

> -                       SmallVectorImpl<Value *> &Indices,<br>

> -                       const Twine &Prefix) {<br>

> -  if (Indices.empty())<br>

> -    return BasePtr;<br>

> +private:<br>

> +  // By default, skip this instruction.<br>

> +  void visitInstruction(Instruction &I) {}<br>

><br>

> -  // A single zero index is a no-op, so check for this and avoid building a GEP<br>

> -  // in that case.<br>

> -  if (Indices.size() == 1 && cast<ConstantInt>(Indices.back())->isZero())<br>

> -    return BasePtr;<br>

> +  /// PHI instructions that use an alloca and are subsequently loaded can be<br>

> +  /// rewritten to load both input pointers in the pred blocks and then PHI the<br>

> +  /// results, allowing the load of the alloca to be promoted.<br>

> +  /// From this:<br>

> +  ///   %P2 = phi [i32* %Alloca, i32* %Other]<br>

> +  ///   %V = load i32* %P2<br>

> +  /// to:<br>

> +  ///   %V1 = load i32* %Alloca      -> will be mem2reg'd<br>

> +  ///   ...<br>

> +  ///   %V2 = load i32* %Other<br>

> +  ///   ...<br>

> +  ///   %V = phi [i32 %V1, i32 %V2]<br>

> +  ///<br>

> +  /// We can do this to a select if its only uses are loads and if the operands<br>

> +  /// to the select can be loaded unconditionally.<br>

> +  ///<br>

> +  /// FIXME: This should be hoisted into a generic utility, likely in<br>

> +  /// Transforms/Util/Local.h<br>

> +  bool isSafePHIToSpeculate(PHINode &PN, SmallVectorImpl<LoadInst *> &Loads) {<br>

> +    // For now, we can only do this promotion if the load is in the same block<br>

> +    // as the PHI, and if there are no stores between the phi and load.<br>

> +    // TODO: Allow recursive phi users.<br>

> +    // TODO: Allow stores.<br>

> +    BasicBlock *BB = PN.getParent();<br>

> +    unsigned MaxAlign = 0;<br>

> +    for (Value::use_iterator UI = PN.use_begin(), UE = PN.use_end();<br>

> +         UI != UE; ++UI) {<br>

> +      LoadInst *LI = dyn_cast<LoadInst>(*UI);<br>

> +      if (LI == 0 || !LI->isSimple()) return false;<br>

><br>

> -  return IRB.CreateInBoundsGEP(BasePtr, Indices, Prefix + ".idx");<br>

> -}<br>

> +      // For now we only allow loads in the same block as the PHI.  This is<br>

> +      // a common case that happens when instcombine merges two loads through<br>

> +      // a PHI.<br>

> +      if (LI->getParent() != BB) return false;<br>

><br>

> -/// \brief Get a natural GEP off of the BasePtr walking through Ty toward<br>

> -/// TargetTy without changing the offset of the pointer.<br>

> -///<br>

> -/// This routine assumes we've already established a properly offset GEP with<br>

> -/// Indices, and arrived at the Ty type. The goal is to continue to GEP with<br>

> -/// zero-indices down through type layers until we find one the same as<br>

> -/// TargetTy. If we can't find one with the same type, we at least try to use<br>

> -/// one with the same size. If none of that works, we just produce the GEP as<br>

> -/// indicated by Indices to have the correct offset.<br>

> -static Value *getNaturalGEPWithType(IRBuilder<> &IRB, const TargetData &TD,<br>

> -                                    Value *BasePtr, Type *Ty, Type *TargetTy,<br>

> -                                    SmallVectorImpl<Value *> &Indices,<br>

> -                                    const Twine &Prefix) {<br>

> -  if (Ty == TargetTy)<br>

> -    return buildGEP(IRB, BasePtr, Indices, Prefix);<br>

> +      // Ensure that there are no instructions between the PHI and the load that<br>

> +      // could store.<br>

> +      for (BasicBlock::iterator BBI = &PN; &*BBI != LI; ++BBI)<br>

> +        if (BBI->mayWriteToMemory())<br>

> +          return false;<br>

><br>

> -  // See if we can descend into a struct and locate a field with the correct<br>

> -  // type.<br>

> -  unsigned NumLayers = 0;<br>

> -  Type *ElementTy = Ty;<br>

> -  do {<br>

> -    if (ElementTy->isPointerTy())<br>

> -      break;<br>

> -    if (SequentialType *SeqTy = dyn_cast<SequentialType>(ElementTy)) {<br>

> -      ElementTy = SeqTy->getElementType();<br>

> -      Indices.push_back(IRB.getInt(APInt(TD.getPointerSizeInBits(), 0)));<br>

> -    } else if (StructType *STy = dyn_cast<StructType>(ElementTy)) {<br>

> -      ElementTy = *STy->element_begin();<br>

> -      Indices.push_back(IRB.getInt32(0));<br>

> -    } else {<br>

> -      break;<br>

> +      MaxAlign = std::max(MaxAlign, LI->getAlignment());<br>

> +      Loads.push_back(LI);<br>

>      }<br>

> -    ++NumLayers;<br>

> -  } while (ElementTy != TargetTy);<br>

> -  if (ElementTy != TargetTy)<br>

> -    Indices.erase(Indices.end() - NumLayers, Indices.end());<br>

><br>

> -  return buildGEP(IRB, BasePtr, Indices, Prefix);<br>

> -}<br>

> +    // We can only transform this if it is safe to push the loads into the<br>

> +    // predecessor blocks. The only thing to watch out for is that we can't put<br>

> +    // a possibly trapping load in the predecessor if it is a critical edge.<br>

> +    for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num;<br>

> +         ++Idx) {<br>

> +      TerminatorInst *TI = PN.getIncomingBlock(Idx)->getTerminator();<br>

> +      Value *InVal = PN.getIncomingValue(Idx);<br>

><br>

> -/// \brief Recursively compute indices for a natural GEP.<br>

> -///<br>

> -/// This is the recursive step for getNaturalGEPWithOffset that walks down the<br>

> -/// element types adding appropriate indices for the GEP.<br>

> -static Value *getNaturalGEPRecursively(IRBuilder<> &IRB, const TargetData &TD,<br>

> -                                       Value *Ptr, Type *Ty, APInt &Offset,<br>

> -                                       Type *TargetTy,<br>

> -                                       SmallVectorImpl<Value *> &Indices,<br>

> -                                       const Twine &Prefix) {<br>

> -  if (Offset == 0)<br>

> -    return getNaturalGEPWithType(IRB, TD, Ptr, Ty, TargetTy, Indices, Prefix);<br>

> +      // If the value is produced by the terminator of the predecessor (an<br>

> +      // invoke) or it has side-effects, there is no valid place to put a load<br>

> +      // in the predecessor.<br>

> +      if (TI == InVal || TI->mayHaveSideEffects())<br>

> +        return false;<br>

><br>

> -  // We can't recurse through pointer types.<br>

> -  if (Ty->isPointerTy())<br>

> -    return 0;<br>

> +      // If the predecessor has a single successor, then the edge isn't<br>

> +      // critical.<br>

> +      if (TI->getNumSuccessors() == 1)<br>

> +        continue;<br>

><br>

> -  // We try to analyze GEPs over vectors here, but note that these GEPs are<br>

> -  // extremely poorly defined currently. The long-term goal is to remove GEPing<br>

> -  // over a vector from the IR completely.<br>

> -  if (VectorType *VecTy = dyn_cast<VectorType>(Ty)) {<br>

> -    unsigned ElementSizeInBits = VecTy->getScalarSizeInBits();<br>

> -    if (ElementSizeInBits % 8)<br>

> -      return 0; // GEPs over non-multiple of 8 size vector elements are invalid.<br>

> -    APInt ElementSize(Offset.getBitWidth(), ElementSizeInBits / 8);<br>

> -    APInt NumSkippedElements = Offset.udiv(ElementSize);<br>

> -    if (NumSkippedElements.ugt(VecTy->getNumElements()))<br>

> -      return 0;<br>

> -    Offset -= NumSkippedElements * ElementSize;<br>

> -    Indices.push_back(IRB.getInt(NumSkippedElements));<br>

> -    return getNaturalGEPRecursively(IRB, TD, Ptr, VecTy->getElementType(),<br>

> -                                    Offset, TargetTy, Indices, Prefix);<br>

> -  }<br>

> +      // If this pointer is always safe to load, or if we can prove that there<br>

> +      // is already a load in the block, then we can move the load to the pred<br>

> +      // block.<br>

> +      if (InVal->isDereferenceablePointer() ||<br>

> +          isSafeToLoadUnconditionally(InVal, TI, MaxAlign, &TD))<br>

> +        continue;<br>

><br>

> -  if (ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) {<br>

> -    Type *ElementTy = ArrTy->getElementType();<br>

> -    APInt ElementSize(Offset.getBitWidth(), TD.getTypeAllocSize(ElementTy));<br>

> -    APInt NumSkippedElements = Offset.udiv(ElementSize);<br>

> -    if (NumSkippedElements.ugt(ArrTy->getNumElements()))<br>

> -      return 0;<br>

> +      return false;<br>

> +    }<br>

><br>

> -    Offset -= NumSkippedElements * ElementSize;<br>

> -    Indices.push_back(IRB.getInt(NumSkippedElements));<br>

> -    return getNaturalGEPRecursively(IRB, TD, Ptr, ElementTy, Offset, TargetTy,<br>

> -                                    Indices, Prefix);<br>

> +    return true;<br>

>    }<br>

><br>

> -  StructType *STy = dyn_cast<StructType>(Ty);<br>

> -  if (!STy)<br>

> -    return 0;<br>

> +  void visitPHINode(PHINode &PN) {<br>

> +    DEBUG(dbgs() << "    original: " << PN << "\n");<br>

><br>

> -  const StructLayout *SL = TD.getStructLayout(STy);<br>

> -  uint64_t StructOffset = Offset.getZExtValue();<br>

> -  if (StructOffset >= SL->getSizeInBytes())<br>

> -    return 0;<br>

> -  unsigned Index = SL->getElementContainingOffset(StructOffset);<br>

> -  Offset -= APInt(Offset.getBitWidth(), SL->getElementOffset(Index));<br>

> -  Type *ElementTy = STy->getElementType(Index);<br>

> -  if (Offset.uge(TD.getTypeAllocSize(ElementTy)))<br>

> -    return 0; // The offset points into alignment padding.<br>

> +    SmallVector<LoadInst *, 4> Loads;<br>

> +    if (!isSafePHIToSpeculate(PN, Loads))<br>

> +      return;<br>

><br>

> -  Indices.push_back(IRB.getInt32(Index));<br>

> -  return getNaturalGEPRecursively(IRB, TD, Ptr, ElementTy, Offset, TargetTy,<br>

> -                                  Indices, Prefix);<br>

> -}<br>

> +    assert(!Loads.empty());<br>

><br>

> -/// \brief Get a natural GEP from a base pointer to a particular offset and<br>

> -/// resulting in a particular type.<br>

> -///<br>

> -/// The goal is to produce a "natural" looking GEP that works with the existing<br>

> -/// composite types to arrive at the appropriate offset and element type for<br>

> -/// a pointer. TargetTy is the element type the returned GEP should point-to if<br>

> -/// possible. We recurse by decreasing Offset, adding the appropriate index to<br>

> -/// Indices, and setting Ty to the result subtype.<br>

> -///<br>

> -/// If no natural GEP can be constructed, this function returns null.<br>

> -static Value *getNaturalGEPWithOffset(IRBuilder<> &IRB, const TargetData &TD,<br>

> -                                      Value *Ptr, APInt Offset, Type *TargetTy,<br>

> -                                      SmallVectorImpl<Value *> &Indices,<br>

> -                                      const Twine &Prefix) {<br>

> -  PointerType *Ty = cast<PointerType>(Ptr->getType());<br>

> +    Type *LoadTy = cast<PointerType>(PN.getType())->getElementType();<br>

> +    IRBuilder<> PHIBuilder(&PN);<br>

> +    PHINode *NewPN = PHIBuilder.CreatePHI(LoadTy, PN.getNumIncomingValues(),<br>

> +                                          PN.getName() + ".sroa.speculated");<br>

><br>

> -  // Don't consider any GEPs through an i8* as natural unless the TargetTy is<br>

> -  // an i8.<br>

> -  if (Ty == IRB.getInt8PtrTy() && TargetTy->isIntegerTy(8))<br>

> -    return 0;<br>

> -<br>

> -  Type *ElementTy = Ty->getElementType();<br>

> -  if (!ElementTy->isSized())<br>

> -    return 0; // We can't GEP through an unsized element.<br>

> -  APInt ElementSize(Offset.getBitWidth(), TD.getTypeAllocSize(ElementTy));<br>

> -  if (ElementSize == 0)<br>

> -    return 0; // Zero-length arrays can't help us build a natural GEP.<br>

> -  APInt NumSkippedElements = Offset.udiv(ElementSize);<br>

> -<br>

> -  Offset -= NumSkippedElements * ElementSize;<br>

> -  Indices.push_back(IRB.getInt(NumSkippedElements));<br>

> -  return getNaturalGEPRecursively(IRB, TD, Ptr, ElementTy, Offset, TargetTy,<br>

> -                                  Indices, Prefix);<br>

> -}<br>

> +    // Get the TBAA tag and alignment to use from one of the loads.  It doesn't<br>

> +    // matter which one we get and if any differ, it doesn't matter.<br>

> +    LoadInst *SomeLoad = cast<LoadInst>(Loads.back());<br>

> +    MDNode *TBAATag = SomeLoad->getMetadata(LLVMContext::MD_tbaa);<br>

> +    unsigned Align = SomeLoad->getAlignment();<br>

><br>

> -/// \brief Compute an adjusted pointer from Ptr by Offset bytes where the<br>

> -/// resulting pointer has PointerTy.<br>

> -///<br>

> -/// This tries very hard to compute a "natural" GEP which arrives at the offset<br>

> -/// and produces the pointer type desired. Where it cannot, it will try to use<br>

> -/// the natural GEP to arrive at the offset and bitcast to the type. Where that<br>

> -/// fails, it will try to use an existing i8* and GEP to the byte offset and<br>

> -/// bitcast to the type.<br>

> -///<br>

> -/// The strategy for finding the more natural GEPs is to peel off layers of the<br>

> -/// pointer, walking back through bit casts and GEPs, searching for a base<br>

> -/// pointer from which we can compute a natural GEP with the desired<br>

> -/// properities. The algorithm tries to fold as many constant indices into<br>

> -/// a single GEP as possible, thus making each GEP more independent of the<br>

> -/// surrounding code.<br>

> -static Value *getAdjustedPtr(IRBuilder<> &IRB, const TargetData &TD,<br>

> -                             Value *Ptr, APInt Offset, Type *PointerTy,<br>

> -                             const Twine &Prefix) {<br>

> -  // Even though we don't look through PHI nodes, we could be called on an<br>

> -  // instruction in an unreachable block, which may be on a cycle.<br>

> -  SmallPtrSet<Value *, 4> Visited;<br>

> -  Visited.insert(Ptr);<br>

> -  SmallVector<Value *, 4> Indices;<br>

> +    // Rewrite all loads of the PN to use the new PHI.<br>

> +    do {<br>

> +      LoadInst *LI = Loads.pop_back_val();<br>

> +      LI->replaceAllUsesWith(NewPN);<br>

> +      Pass.DeadInsts.push_back(LI);<br>

> +    } while (!Loads.empty());<br>

><br>

> -  // We may end up computing an offset pointer that has the wrong type. If we<br>

> -  // never are able to compute one directly that has the correct type, we'll<br>

> -  // fall back to it, so keep it around here.<br>

> -  Value *OffsetPtr = 0;<br>

> +    // Inject loads into all of the pred blocks.<br>

> +    for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num; ++Idx) {<br>

> +      BasicBlock *Pred = PN.getIncomingBlock(Idx);<br>

> +      TerminatorInst *TI = Pred->getTerminator();<br>

> +      Use *InUse = &PN.getOperandUse(PN.getOperandNumForIncomingValue(Idx));<br>

> +      Value *InVal = PN.getIncomingValue(Idx);<br>

> +      IRBuilder<> PredBuilder(TI);<br>

><br>

> -  // Remember any i8 pointer we come across to re-use if we need to do a raw<br>

> -  // byte offset.<br>

> -  Value *Int8Ptr = 0;<br>

> -  APInt Int8PtrOffset(Offset.getBitWidth(), 0);<br>

> +      LoadInst *Load<br>

> +        = PredBuilder.CreateLoad(InVal, (PN.getName() + ".sroa.speculate.load." +<br>

> +                                         Pred->getName()));<br>

> +      ++NumLoadsSpeculated;<br>

> +      Load->setAlignment(Align);<br>

> +      if (TBAATag)<br>

> +        Load->setMetadata(LLVMContext::MD_tbaa, TBAATag);<br>

> +      NewPN->addIncoming(Load, Pred);<br>

><br>

> -  Type *TargetTy = PointerTy->getPointerElementType();<br>

> +      Instruction *Ptr = dyn_cast<Instruction>(InVal);<br>

> +      if (!Ptr)<br>

> +        // No uses to rewrite.<br>

> +        continue;<br>

><br>

> -  do {<br>

> -    // First fold any existing GEPs into the offset.<br>

> -    while (GEPOperator *GEP = dyn_cast<GEPOperator>(Ptr)) {<br>

> -      APInt GEPOffset(Offset.getBitWidth(), 0);<br>

> -      if (!accumulateGEPOffsets(TD, *GEP, GEPOffset))<br>

> -        break;<br>

> -      Offset += GEPOffset;<br>

> -      Ptr = GEP->getPointerOperand();<br>

> -      if (!Visited.insert(Ptr))<br>

> -        break;<br>

> -    }<br>

> +      // Try to lookup and rewrite any partition uses corresponding to this phi<br>

> +      // input.<br>

> +      AllocaPartitioning::iterator PI<br>

> +        = P.findPartitionForPHIOrSelectOperand(InUse);<br>

> +      if (PI == P.end())<br>

> +        continue;<br>

><br>

> -    // See if we can perform a natural GEP here.<br>

> -    Indices.clear();<br>

> -    if (Value *P = getNaturalGEPWithOffset(IRB, TD, Ptr, Offset, TargetTy,<br>

> -                                           Indices, Prefix)) {<br>

> -      if (P->getType() == PointerTy) {<br>

> -        // Zap any offset pointer that we ended up computing in previous rounds.<br>

> -        if (OffsetPtr && OffsetPtr->use_empty())<br>

> -          if (Instruction *I = dyn_cast<Instruction>(OffsetPtr))<br>

> -            I->eraseFromParent();<br>

> -        return P;<br>

> -      }<br>

> -      if (!OffsetPtr) {<br>

> -        OffsetPtr = P;<br>

> -      }<br>

> +      // Replace the Use in the PartitionUse for this operand with the Use<br>

> +      // inside the load.<br>

> +      AllocaPartitioning::use_iterator UI<br>

> +        = P.findPartitionUseForPHIOrSelectOperand(InUse);<br>

> +      assert(isa<PHINode>(*UI->U->getUser()));<br>

> +      UI->U = &Load->getOperandUse(Load->getPointerOperandIndex());<br>

>      }<br>

> +    DEBUG(dbgs() << "          speculated to: " << *NewPN << "\n");<br>

> +  }<br>

><br>

> -    // Stash this pointer if we've found an i8*.<br>

> -    if (Ptr->getType()->isIntegerTy(8)) {<br>

> -      Int8Ptr = Ptr;<br>

> -      Int8PtrOffset = Offset;<br>

> -    }<br>

> +  /// Select instructions that use an alloca and are subsequently loaded can be<br>

> +  /// rewritten to load both input pointers and then select between the result,<br>

> +  /// allowing the load of the alloca to be promoted.<br>

> +  /// From this:<br>

> +  ///   %P2 = select i1 %cond, i32* %Alloca, i32* %Other<br>

> +  ///   %V = load i32* %P2<br>

> +  /// to:<br>

> +  ///   %V1 = load i32* %Alloca      -> will be mem2reg'd<br>

> +  ///   %V2 = load i32* %Other<br>

> +  ///   %V = select i1 %cond, i32 %V1, i32 %V2<br>

> +  ///<br>

> +  /// We can do this to a select if its only uses are loads and if the operand<br>

> +  /// to the select can be loaded unconditionally.<br>

> +  bool isSafeSelectToSpeculate(SelectInst &SI,<br>

> +                               SmallVectorImpl<LoadInst *> &Loads) {<br>

> +    Value *TValue = SI.getTrueValue();<br>

> +    Value *FValue = SI.getFalseValue();<br>

> +    bool TDerefable = TValue->isDereferenceablePointer();<br>

> +    bool FDerefable = FValue->isDereferenceablePointer();<br>

><br>

> -    // Peel off a layer of the pointer and update the offset appropriately.<br>

> -    if (Operator::getOpcode(Ptr) == Instruction::BitCast) {<br>

> -      Ptr = cast<Operator>(Ptr)->getOperand(0);<br>

> -    } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(Ptr)) {<br>

> -      if (GA->mayBeOverridden())<br>

> -        break;<br>

> -      Ptr = GA->getAliasee();<br>

> -    } else {<br>

> -      break;<br>

> -    }<br>

> -    assert(Ptr->getType()->isPointerTy() && "Unexpected operand type!");<br>

> -  } while (Visited.insert(Ptr));<br>

> +    for (Value::use_iterator UI = SI.use_begin(), UE = SI.use_end();<br>

> +         UI != UE; ++UI) {<br>

> +      LoadInst *LI = dyn_cast<LoadInst>(*UI);<br>

> +      if (LI == 0 || !LI->isSimple()) return false;<br>

><br>

> -  if (!OffsetPtr) {<br>

> -    if (!Int8Ptr) {<br>

> -      Int8Ptr = IRB.CreateBitCast(Ptr, IRB.getInt8PtrTy(),<br>

> -                                  Prefix + ".raw_cast");<br>

> -      Int8PtrOffset = Offset;<br>

> +      // Both operands to the select need to be dereferencable, either<br>

> +      // absolutely (e.g. allocas) or at this point because we can see other<br>

> +      // accesses to it.<br>

> +      if (!TDerefable && !isSafeToLoadUnconditionally(TValue, LI,<br>

> +                                                      LI->getAlignment(), &TD))<br>

> +        return false;<br>

> +      if (!FDerefable && !isSafeToLoadUnconditionally(FValue, LI,<br>

> +                                                      LI->getAlignment(), &TD))<br>

> +        return false;<br>

> +      Loads.push_back(LI);<br>

>      }<br>

><br>

> -    OffsetPtr = Int8PtrOffset == 0 ? Int8Ptr :<br>

> -      IRB.CreateInBoundsGEP(Int8Ptr, IRB.getInt(Int8PtrOffset),<br>

> -                            Prefix + ".raw_idx");<br>

> +    return true;<br>

>    }<br>

> -  Ptr = OffsetPtr;<br>

><br>

> -  // On the off chance we were targeting i8*, guard the bitcast here.<br>

> -  if (Ptr->getType() != PointerTy)<br>

> -    Ptr = IRB.CreateBitCast(Ptr, PointerTy, Prefix + ".cast");<br>

> +  void visitSelectInst(SelectInst &SI) {<br>

> +    DEBUG(dbgs() << "    original: " << SI << "\n");<br>

> +    IRBuilder<> IRB(&SI);<br>

><br>

> -  return Ptr;<br>

> -}<br>

> +    // If the select isn't safe to speculate, just use simple logic to emit it.<br>

> +    SmallVector<LoadInst *, 4> Loads;<br>

> +    if (!isSafeSelectToSpeculate(SI, Loads))<br>

> +      return;<br>

><br>

> -/// \brief Test whether the given alloca partition can be promoted to a vector.<br>

> -///<br>

> -/// This is a quick test to check whether we can rewrite a particular alloca<br>

> -/// partition (and its newly formed alloca) into a vector alloca with only<br>

> -/// whole-vector loads and stores such that it could be promoted to a vector<br>

> -/// SSA value. We only can ensure this for a limited set of operations, and we<br>

> -/// don't want to do the rewrites unless we are confident that the result will<br>

> -/// be promotable, so we have an early test here.<br>

> -static bool isVectorPromotionViable(const TargetData &TD,<br>

> -                                    Type *AllocaTy,<br>

> -                                    AllocaPartitioning &P,<br>

> -                                    uint64_t PartitionBeginOffset,<br>

> -                                    uint64_t PartitionEndOffset,<br>

> -                                    AllocaPartitioning::const_use_iterator I,<br>

> -                                    AllocaPartitioning::const_use_iterator E) {<br>

> -  VectorType *Ty = dyn_cast<VectorType>(AllocaTy);<br>

> -  if (!Ty)<br>

> -    return false;<br>

> +    Use *Ops[2] = { &SI.getOperandUse(1), &SI.getOperandUse(2) };<br>

> +    AllocaPartitioning::iterator PIs[2];<br>

> +    AllocaPartitioning::PartitionUse PUs[2];<br>

> +    for (unsigned i = 0, e = 2; i != e; ++i) {<br>

> +      PIs[i] = P.findPartitionForPHIOrSelectOperand(Ops[i]);<br>

> +      if (PIs[i] != P.end()) {<br>

> +        // If the pointer is within the partitioning, remove the select from<br>

> +        // its uses. We'll add in the new loads below.<br>

> +        AllocaPartitioning::use_iterator UI<br>

> +          = P.findPartitionUseForPHIOrSelectOperand(Ops[i]);<br>

> +        PUs[i] = *UI;<br>

> +        // Clear out the use here so that the offsets into the use list remain<br>

> +        // stable but this use is ignored when rewriting.<br>

> +        UI->U = 0;<br>

> +      }<br>

> +    }<br>

><br>

> -  uint64_t VecSize = TD.getTypeSizeInBits(Ty);<br>

> -  uint64_t ElementSize = Ty->getScalarSizeInBits();<br>

> +    Value *TV = SI.getTrueValue();<br>

> +    Value *FV = SI.getFalseValue();<br>

> +    // Replace the loads of the select with a select of two loads.<br>

> +    while (!Loads.empty()) {<br>

> +      LoadInst *LI = Loads.pop_back_val();<br>

><br>

> -  // While the definition of LLVM vectors is bitpacked, we don't support sizes<br>

> -  // that aren't byte sized.<br>

> -  if (ElementSize % 8)<br>

> -    return false;<br>

> -  assert((VecSize % 8) == 0 && "vector size not a multiple of element size?");<br>

> -  VecSize /= 8;<br>

> -  ElementSize /= 8;<br>

> +      IRB.SetInsertPoint(LI);<br>

> +      LoadInst *TL =<br>

> +        IRB.CreateLoad(TV, LI->getName() + ".sroa.speculate.load.true");<br>

> +      LoadInst *FL =<br>

> +        IRB.CreateLoad(FV, LI->getName() + ".sroa.speculate.load.false");<br>

> +      NumLoadsSpeculated += 2;<br>

><br>

> -  for (; I != E; ++I) {<br>

> -    if (!I->U)<br>

> -      continue; // Skip dead use.<br>

> +      // Transfer alignment and TBAA info if present.<br>

> +      TL->setAlignment(LI->getAlignment());<br>

> +      FL->setAlignment(LI->getAlignment());<br>

> +      if (MDNode *Tag = LI->getMetadata(LLVMContext::MD_tbaa)) {<br>

> +        TL->setMetadata(LLVMContext::MD_tbaa, Tag);<br>

> +        FL->setMetadata(LLVMContext::MD_tbaa, Tag);<br>

> +      }<br>

><br>

> -    uint64_t BeginOffset = I->BeginOffset - PartitionBeginOffset;<br>

> -    uint64_t BeginIndex = BeginOffset / ElementSize;<br>

> -    if (BeginIndex * ElementSize != BeginOffset ||<br>

> -        BeginIndex >= Ty->getNumElements())<br>

> -      return false;<br>

> -    uint64_t EndOffset = I->EndOffset - PartitionBeginOffset;<br>

> -    uint64_t EndIndex = EndOffset / ElementSize;<br>

> -    if (EndIndex * ElementSize != EndOffset ||<br>

> -        EndIndex > Ty->getNumElements())<br>

> -      return false;<br>

> -<br>

> -    // FIXME: We should build shuffle vector instructions to handle<br>

> -    // non-element-sized accesses.<br>

> -    if ((EndOffset - BeginOffset) != ElementSize &&<br>

> -        (EndOffset - BeginOffset) != VecSize)<br>

> -      return false;<br>

> +      Value *V = IRB.CreateSelect(SI.getCondition(), TL, FL,<br>

> +                                  LI->getName() + ".sroa.speculated");<br>

><br>

> -    if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I->U->getUser())) {<br>

> -      if (MI->isVolatile())<br>

> -        return false;<br>

> -      if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(I->U->getUser())) {<br>

> -        const AllocaPartitioning::MemTransferOffsets &MTO<br>

> -          = P.getMemTransferOffsets(*MTI);<br>

> -        if (!MTO.IsSplittable)<br>

> -          return false;<br>

> +      LoadInst *Loads[2] = { TL, FL };<br>

> +      for (unsigned i = 0, e = 2; i != e; ++i) {<br>

> +        if (PIs[i] != P.end()) {<br>

> +          Use *LoadUse = &Loads[i]->getOperandUse(0);<br>

> +          assert(PUs[i].U->get() == LoadUse->get());<br>

> +          PUs[i].U = LoadUse;<br>

> +          P.use_push_back(PIs[i], PUs[i]);<br>

> +        }<br>

>        }<br>

> -    } else if (I->U->get()->getType()->getPointerElementType()->isStructTy()) {<br>

> -      // Disable vector promotion when there are loads or stores of an FCA.<br>

> -      return false;<br>

> -    } else if (!isa<LoadInst>(I->U->getUser()) &&<br>

> -               !isa<StoreInst>(I->U->getUser())) {<br>

> -      return false;<br>

> +<br>

> +      DEBUG(dbgs() << "          speculated to: " << *V << "\n");<br>

> +      LI->replaceAllUsesWith(V);<br>

> +      Pass.DeadInsts.push_back(LI);<br>

>      }<br>

>    }<br>

> -  return true;<br>

> +};<br>

>  }<br>

><br>

> -/// \brief Test whether the given alloca partition can be promoted to an int.<br>

> +/// \brief Accumulate the constant offsets in a GEP into a single APInt offset.<br>

>  ///<br>

> -/// This is a quick test to check whether we can rewrite a particular alloca<br>

> -/// partition (and its newly formed alloca) into an integer alloca suitable for<br>

> -/// promotion to an SSA value. We only can ensure this for a limited set of<br>

> -/// operations, and we don't want to do the rewrites unless we are confident<br>

> -/// that the result will be promotable, so we have an early test here.<br>

> -static bool isIntegerPromotionViable(const TargetData &TD,<br>

> -                                     Type *AllocaTy,<br>

> -                                     uint64_t AllocBeginOffset,<br>

> -                                     AllocaPartitioning &P,<br>

> -                                     AllocaPartitioning::const_use_iterator I,<br>

> -                                     AllocaPartitioning::const_use_iterator E) {<br>

> -  IntegerType *Ty = dyn_cast<IntegerType>(AllocaTy);<br>

> -  if (!Ty || 8*TD.getTypeStoreSize(Ty) != Ty->getBitWidth())<br>

> -    return false;<br>

> -<br>

> -  // Check the uses to ensure the uses are (likely) promoteable integer uses.<br>

> -  // Also ensure that the alloca has a covering load or store. We don't want<br>

> -  // promote because of some other unsplittable entry (which we may make<br>

> -  // splittable later) and lose the ability to promote each element access.<br>

> -  bool WholeAllocaOp = false;<br>

> -  for (; I != E; ++I) {<br>

> -    if (!I->U)<br>

> -      continue; // Skip dead use.<br>

> -<br>

> -    // We can't reasonably handle cases where the load or store extends past<br>

> -    // the end of the aloca's type and into its padding.<br>

> -    if ((I->EndOffset - AllocBeginOffset) > TD.getTypeStoreSize(Ty))<br>

> +/// If the provided GEP is all-constant, the total byte offset formed by the<br>

> +/// GEP is computed and Offset is set to it. If the GEP has any non-constant<br>

> +/// operands, the function returns false and the value of Offset is unmodified.<br>

> +static bool accumulateGEPOffsets(const TargetData &TD, GEPOperator &GEP,<br>

> +                                 APInt &Offset) {<br>

> +  APInt GEPOffset(Offset.getBitWidth(), 0);<br>

> +  for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);<br>

> +       GTI != GTE; ++GTI) {<br>

> +    ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());<br>

> +    if (!OpC)<br>

>        return false;<br>

> +    if (OpC->isZero()) continue;<br>

><br>

> -    if (LoadInst *LI = dyn_cast<LoadInst>(I->U->getUser())) {<br>

> -      if (LI->isVolatile() || !LI->getType()->isIntegerTy())<br>

> -        return false;<br>

> -      if (LI->getType() == Ty)<br>

> -        WholeAllocaOp = true;<br>

> -    } else if (StoreInst *SI = dyn_cast<StoreInst>(I->U->getUser())) {<br>

> -      if (SI->isVolatile() || !SI->getValueOperand()->getType()->isIntegerTy())<br>

> -        return false;<br>

> -      if (SI->getValueOperand()->getType() == Ty)<br>

> -        WholeAllocaOp = true;<br>

> -    } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I->U->getUser())) {<br>

> -      if (MI->isVolatile())<br>

> -        return false;<br>

> -      if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(I->U->getUser())) {<br>

> -        const AllocaPartitioning::MemTransferOffsets &MTO<br>

> -          = P.getMemTransferOffsets(*MTI);<br>

> -        if (!MTO.IsSplittable)<br>

> -          return false;<br>

> -      }<br>

> -    } else {<br>

> -      return false;<br>

> +    // Handle a struct index, which adds its field offset to the pointer.<br>

> +    if (StructType *STy = dyn_cast<StructType>(*GTI)) {<br>

> +      unsigned ElementIdx = OpC->getZExtValue();<br>

> +      const StructLayout *SL = TD.getStructLayout(STy);<br>

> +      GEPOffset += APInt(Offset.getBitWidth(),<br>

> +                         SL->getElementOffset(ElementIdx));<br>

> +      continue;<br>

>      }<br>

> +<br>

> +    APInt TypeSize(Offset.getBitWidth(),<br>

> +                   TD.getTypeAllocSize(GTI.getIndexedType()));<br>

> +    if (VectorType *VTy = dyn_cast<VectorType>(*GTI)) {<br>

> +      assert((VTy->getScalarSizeInBits() % 8) == 0 &&<br>

> +             "vector element size is not a multiple of 8, cannot GEP over it");<br>

> +      TypeSize = VTy->getScalarSizeInBits() / 8;<br>

> +    }<br>

> +<br>

> +    GEPOffset += OpC->getValue().sextOrTrunc(Offset.getBitWidth()) * TypeSize;<br>

>    }<br>

> -  return WholeAllocaOp;<br>

> +  Offset = GEPOffset;<br>

> +  return true;<br>

>  }<br>

><br>

> -namespace {<br>

> -/// \brief Visitor to speculate PHIs and Selects where possible.<br>

> -class PHIOrSelectSpeculator : public InstVisitor<PHIOrSelectSpeculator> {<br>

> -  // Befriend the base class so it can delegate to private visit methods.<br>

> -  friend class llvm::InstVisitor<PHIOrSelectSpeculator>;<br>

> +/// \brief Build a GEP out of a base pointer and indices.<br>

> +///<br>

> +/// This will return the BasePtr if that is valid, or build a new GEP<br>

> +/// instruction using the IRBuilder if GEP-ing is needed.<br>

> +static Value *buildGEP(IRBuilder<> &IRB, Value *BasePtr,<br>

> +                       SmallVectorImpl<Value *> &Indices,<br>

> +                       const Twine &Prefix) {<br>

> +  if (Indices.empty())<br>

> +    return BasePtr;<br>

><br>

> -  const TargetData &TD;<br>

> -  AllocaPartitioning &P;<br>

> -  SROA &Pass;<br>

> +  // A single zero index is a no-op, so check for this and avoid building a GEP<br>

> +  // in that case.<br>

> +  if (Indices.size() == 1 && cast<ConstantInt>(Indices.back())->isZero())<br>

> +    return BasePtr;<br>

><br>

> -public:<br>

> -  PHIOrSelectSpeculator(const TargetData &TD, AllocaPartitioning &P, SROA &Pass)<br>

> -    : TD(TD), P(P), Pass(Pass) {}<br>

> +  return IRB.CreateInBoundsGEP(BasePtr, Indices, Prefix + ".idx");<br>

> +}<br>

><br>

> -  /// \brief Visit the users of an alloca partition and rewrite them.<br>

> -  void visitUsers(AllocaPartitioning::const_iterator PI) {<br>

> -    // Note that we need to use an index here as the underlying vector of uses<br>

> -    // may be grown during speculation. However, we never need to re-visit the<br>

> -    // new uses, and so we can use the initial size bound.<br>

> -    for (unsigned Idx = 0, Size = P.use_size(PI); Idx != Size; ++Idx) {<br>

> -      const AllocaPartitioning::PartitionUse &PU = P.getUse(PI, Idx);<br>

> -      if (!PU.U)<br>

> -        continue; // Skip dead use.<br>

> +/// \brief Get a natural GEP off of the BasePtr walking through Ty toward<br>

> +/// TargetTy without changing the offset of the pointer.<br>

> +///<br>

> +/// This routine assumes we've already established a properly offset GEP with<br>

> +/// Indices, and arrived at the Ty type. The goal is to continue to GEP with<br>

> +/// zero-indices down through type layers until we find one the same as<br>

> +/// TargetTy. If we can't find one with the same type, we at least try to use<br>

> +/// one with the same size. If none of that works, we just produce the GEP as<br>

> +/// indicated by Indices to have the correct offset.<br>

> +static Value *getNaturalGEPWithType(IRBuilder<> &IRB, const TargetData &TD,<br>

> +                                    Value *BasePtr, Type *Ty, Type *TargetTy,<br>

> +                                    SmallVectorImpl<Value *> &Indices,<br>

> +                                    const Twine &Prefix) {<br>

> +  if (Ty == TargetTy)<br>

> +    return buildGEP(IRB, BasePtr, Indices, Prefix);<br>

><br>

> -      visit(cast<Instruction>(PU.U->getUser()));<br>

> +  // See if we can descend into a struct and locate a field with the correct<br>

> +  // type.<br>

> +  unsigned NumLayers = 0;<br>

> +  Type *ElementTy = Ty;<br>

> +  do {<br>

> +    if (ElementTy->isPointerTy())<br>

> +      break;<br>

> +    if (SequentialType *SeqTy = dyn_cast<SequentialType>(ElementTy)) {<br>

> +      ElementTy = SeqTy->getElementType();<br>

> +      Indices.push_back(IRB.getInt(APInt(TD.getPointerSizeInBits(), 0)));<br>

> +    } else if (StructType *STy = dyn_cast<StructType>(ElementTy)) {<br>

> +      ElementTy = *STy->element_begin();<br>

> +      Indices.push_back(IRB.getInt32(0));<br>

> +    } else {<br>

> +      break;<br>

>      }<br>

> -  }<br>

> +    ++NumLayers;<br>

> +  } while (ElementTy != TargetTy);<br>

> +  if (ElementTy != TargetTy)<br>

> +    Indices.erase(Indices.end() - NumLayers, Indices.end());<br>

><br>

> -private:<br>

> -  // By default, skip this instruction.<br>

> -  void visitInstruction(Instruction &I) {}<br>

> +  return buildGEP(IRB, BasePtr, Indices, Prefix);<br>

> +}<br>

><br>

> -  /// PHI instructions that use an alloca and are subsequently loaded can be<br>

> -  /// rewritten to load both input pointers in the pred blocks and then PHI the<br>

> -  /// results, allowing the load of the alloca to be promoted.<br>

> -  /// From this:<br>

> -  ///   %P2 = phi [i32* %Alloca, i32* %Other]<br>

> -  ///   %V = load i32* %P2<br>

> -  /// to:<br>

> -  ///   %V1 = load i32* %Alloca      -> will be mem2reg'd<br>

> -  ///   ...<br>

> -  ///   %V2 = load i32* %Other<br>

> -  ///   ...<br>

> -  ///   %V = phi [i32 %V1, i32 %V2]<br>

> -  ///<br>

> -  /// We can do this to a select if its only uses are loads and if the operands<br>

> -  /// to the select can be loaded unconditionally.<br>

> -  ///<br>

> -  /// FIXME: This should be hoisted into a generic utility, likely in<br>

> -  /// Transforms/Util/Local.h<br>

> -  bool isSafePHIToSpeculate(PHINode &PN, SmallVectorImpl<LoadInst *> &Loads) {<br>

> -    // For now, we can only do this promotion if the load is in the same block<br>

> -    // as the PHI, and if there are no stores between the phi and load.<br>

> -    // TODO: Allow recursive phi users.<br>

> -    // TODO: Allow stores.<br>

> -    BasicBlock *BB = PN.getParent();<br>

> -    unsigned MaxAlign = 0;<br>

> -    for (Value::use_iterator UI = PN.use_begin(), UE = PN.use_end();<br>

> -         UI != UE; ++UI) {<br>

> -      LoadInst *LI = dyn_cast<LoadInst>(*UI);<br>

> -      if (LI == 0 || !LI->isSimple()) return false;<br>

> +/// \brief Recursively compute indices for a natural GEP.<br>

> +///<br>

> +/// This is the recursive step for getNaturalGEPWithOffset that walks down the<br>

> +/// element types adding appropriate indices for the GEP.<br>

> +static Value *getNaturalGEPRecursively(IRBuilder<> &IRB, const TargetData &TD,<br>

> +                                       Value *Ptr, Type *Ty, APInt &Offset,<br>

> +                                       Type *TargetTy,<br>

> +                                       SmallVectorImpl<Value *> &Indices,<br>

> +                                       const Twine &Prefix) {<br>

> +  if (Offset == 0)<br>

> +    return getNaturalGEPWithType(IRB, TD, Ptr, Ty, TargetTy, Indices, Prefix);<br>

><br>

> -      // For now we only allow loads in the same block as the PHI.  This is<br>

> -      // a common case that happens when instcombine merges two loads through<br>

> -      // a PHI.<br>

> -      if (LI->getParent() != BB) return false;<br>

> +  // We can't recurse through pointer types.<br>

> +  if (Ty->isPointerTy())<br>

> +    return 0;<br>

><br>

> -      // Ensure that there are no instructions between the PHI and the load that<br>

> -      // could store.<br>

> -      for (BasicBlock::iterator BBI = &PN; &*BBI != LI; ++BBI)<br>

> -        if (BBI->mayWriteToMemory())<br>

> -          return false;<br>

> +  // We try to analyze GEPs over vectors here, but note that these GEPs are<br>

> +  // extremely poorly defined currently. The long-term goal is to remove GEPing<br>

> +  // over a vector from the IR completely.<br>

> +  if (VectorType *VecTy = dyn_cast<VectorType>(Ty)) {<br>

> +    unsigned ElementSizeInBits = VecTy->getScalarSizeInBits();<br>

> +    if (ElementSizeInBits % 8)<br>

> +      return 0; // GEPs over non-multiple of 8 size vector elements are invalid.<br>

> +    APInt ElementSize(Offset.getBitWidth(), ElementSizeInBits / 8);<br>

> +    APInt NumSkippedElements = Offset.udiv(ElementSize);<br>

> +    if (NumSkippedElements.ugt(VecTy->getNumElements()))<br>

> +      return 0;<br>

> +    Offset -= NumSkippedElements * ElementSize;<br>

> +    Indices.push_back(IRB.getInt(NumSkippedElements));<br>

> +    return getNaturalGEPRecursively(IRB, TD, Ptr, VecTy->getElementType(),<br>

> +                                    Offset, TargetTy, Indices, Prefix);<br>

> +  }<br>

><br>

> -      MaxAlign = std::max(MaxAlign, LI->getAlignment());<br>

> -      Loads.push_back(LI);<br>

> -    }<br>

> +  if (ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) {<br>

> +    Type *ElementTy = ArrTy->getElementType();<br>

> +    APInt ElementSize(Offset.getBitWidth(), TD.getTypeAllocSize(ElementTy));<br>

> +    APInt NumSkippedElements = Offset.udiv(ElementSize);<br>

> +    if (NumSkippedElements.ugt(ArrTy->getNumElements()))<br>

> +      return 0;<br>

><br>

> -    // We can only transform this if it is safe to push the loads into the<br>

> -    // predecessor blocks. The only thing to watch out for is that we can't put<br>

> -    // a possibly trapping load in the predecessor if it is a critical edge.<br>

> -    for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num;<br>

> -         ++Idx) {<br>

> -      TerminatorInst *TI = PN.getIncomingBlock(Idx)->getTerminator();<br>

> -      Value *InVal = PN.getIncomingValue(Idx);<br>

> +    Offset -= NumSkippedElements * ElementSize;<br>

> +    Indices.push_back(IRB.getInt(NumSkippedElements));<br>

> +    return getNaturalGEPRecursively(IRB, TD, Ptr, ElementTy, Offset, TargetTy,<br>

> +                                    Indices, Prefix);<br>

> +  }<br>

><br>

> -      // If the value is produced by the terminator of the predecessor (an<br>

> -      // invoke) or it has side-effects, there is no valid place to put a load<br>

> -      // in the predecessor.<br>

> -      if (TI == InVal || TI->mayHaveSideEffects())<br>

> -        return false;<br>

> +  StructType *STy = dyn_cast<StructType>(Ty);<br>

> +  if (!STy)<br>

> +    return 0;<br>

><br>

> -      // If the predecessor has a single successor, then the edge isn't<br>

> -      // critical.<br>

> -      if (TI->getNumSuccessors() == 1)<br>

> -        continue;<br>

> +  const StructLayout *SL = TD.getStructLayout(STy);<br>

> +  uint64_t StructOffset = Offset.getZExtValue();<br>

> +  if (StructOffset >= SL->getSizeInBytes())<br>

> +    return 0;<br>

> +  unsigned Index = SL->getElementContainingOffset(StructOffset);<br>

> +  Offset -= APInt(Offset.getBitWidth(), SL->getElementOffset(Index));<br>

> +  Type *ElementTy = STy->getElementType(Index);<br>

> +  if (Offset.uge(TD.getTypeAllocSize(ElementTy)))<br>

> +    return 0; // The offset points into alignment padding.<br>

><br>

> -      // If this pointer is always safe to load, or if we can prove that there<br>

> -      // is already a load in the block, then we can move the load to the pred<br>

> -      // block.<br>

> -      if (InVal->isDereferenceablePointer() ||<br>

> -          isSafeToLoadUnconditionally(InVal, TI, MaxAlign, &TD))<br>

> -        continue;<br>

> +  Indices.push_back(IRB.getInt32(Index));<br>

> +  return getNaturalGEPRecursively(IRB, TD, Ptr, ElementTy, Offset, TargetTy,<br>

> +                                  Indices, Prefix);<br>

> +}<br>

><br>

> -      return false;<br>

> -    }<br>

> +/// \brief Get a natural GEP from a base pointer to a particular offset and<br>

> +/// resulting in a particular type.<br>

> +///<br>

> +/// The goal is to produce a "natural" looking GEP that works with the existing<br>

> +/// composite types to arrive at the appropriate offset and element type for<br>

> +/// a pointer. TargetTy is the element type the returned GEP should point-to if<br>

> +/// possible. We recurse by decreasing Offset, adding the appropriate index to<br>

> +/// Indices, and setting Ty to the result subtype.<br>

> +///<br>

> +/// If no natural GEP can be constructed, this function returns null.<br>

> +static Value *getNaturalGEPWithOffset(IRBuilder<> &IRB, const TargetData &TD,<br>

> +                                      Value *Ptr, APInt Offset, Type *TargetTy,<br>

> +                                      SmallVectorImpl<Value *> &Indices,<br>

> +                                      const Twine &Prefix) {<br>

> +  PointerType *Ty = cast<PointerType>(Ptr->getType());<br>

><br>

> -    return true;<br>

> -  }<br>

> +  // Don't consider any GEPs through an i8* as natural unless the TargetTy is<br>

> +  // an i8.<br>

> +  if (Ty == IRB.getInt8PtrTy() && TargetTy->isIntegerTy(8))<br>

> +    return 0;<br>

><br>

> -  void visitPHINode(PHINode &PN) {<br>

> -    DEBUG(dbgs() << "    original: " << PN << "\n");<br>

> +  Type *ElementTy = Ty->getElementType();<br>

> +  if (!ElementTy->isSized())<br>

> +    return 0; // We can't GEP through an unsized element.<br>

> +  APInt ElementSize(Offset.getBitWidth(), TD.getTypeAllocSize(ElementTy));<br>

> +  if (ElementSize == 0)<br>

> +    return 0; // Zero-length arrays can't help us build a natural GEP.<br>

> +  APInt NumSkippedElements = Offset.udiv(ElementSize);<br>

><br>

> -    SmallVector<LoadInst *, 4> Loads;<br>

> -    if (!isSafePHIToSpeculate(PN, Loads))<br>

> -      return;<br>

> +  Offset -= NumSkippedElements * ElementSize;<br>

> +  Indices.push_back(IRB.getInt(NumSkippedElements));<br>

> +  return getNaturalGEPRecursively(IRB, TD, Ptr, ElementTy, Offset, TargetTy,<br>

> +                                  Indices, Prefix);<br>

> +}<br>

><br>

> -    assert(!Loads.empty());<br>

> +/// \brief Compute an adjusted pointer from Ptr by Offset bytes where the<br>

> +/// resulting pointer has PointerTy.<br>

> +///<br>

> +/// This tries very hard to compute a "natural" GEP which arrives at the offset<br>

> +/// and produces the pointer type desired. Where it cannot, it will try to use<br>

> +/// the natural GEP to arrive at the offset and bitcast to the type. Where that<br>

> +/// fails, it will try to use an existing i8* and GEP to the byte offset and<br>

> +/// bitcast to the type.<br>

> +///<br>

> +/// The strategy for finding the more natural GEPs is to peel off layers of the<br>

> +/// pointer, walking back through bit casts and GEPs, searching for a base<br>

> +/// pointer from which we can compute a natural GEP with the desired<br>

> +/// properities. The algorithm tries to fold as many constant indices into<br>

> +/// a single GEP as possible, thus making each GEP more independent of the<br>

> +/// surrounding code.<br>

> +static Value *getAdjustedPtr(IRBuilder<> &IRB, const TargetData &TD,<br>

> +                             Value *Ptr, APInt Offset, Type *PointerTy,<br>

> +                             const Twine &Prefix) {<br>

> +  // Even though we don't look through PHI nodes, we could be called on an<br>

> +  // instruction in an unreachable block, which may be on a cycle.<br>

> +  SmallPtrSet<Value *, 4> Visited;<br>

> +  Visited.insert(Ptr);<br>

> +  SmallVector<Value *, 4> Indices;<br>

><br>

> -    Type *LoadTy = cast<PointerType>(PN.getType())->getElementType();<br>

> -    IRBuilder<> PHIBuilder(&PN);<br>

> -    PHINode *NewPN = PHIBuilder.CreatePHI(LoadTy, PN.getNumIncomingValues(),<br>

> -                                          PN.getName() + ".sroa.speculated");<br>

> +  // We may end up computing an offset pointer that has the wrong type. If we<br>

> +  // never are able to compute one directly that has the correct type, we'll<br>

> +  // fall back to it, so keep it around here.<br>

> +  Value *OffsetPtr = 0;<br>

><br>

> -    // Get the TBAA tag and alignment to use from one of the loads.  It doesn't<br>

> -    // matter which one we get and if any differ, it doesn't matter.<br>

> -    LoadInst *SomeLoad = cast<LoadInst>(Loads.back());<br>

> -    MDNode *TBAATag = SomeLoad->getMetadata(LLVMContext::MD_tbaa);<br>

> -    unsigned Align = SomeLoad->getAlignment();<br>

> +  // Remember any i8 pointer we come across to re-use if we need to do a raw<br>

> +  // byte offset.<br>

> +  Value *Int8Ptr = 0;<br>

> +  APInt Int8PtrOffset(Offset.getBitWidth(), 0);<br>

><br>

> -    // Rewrite all loads of the PN to use the new PHI.<br>

> -    do {<br>

> -      LoadInst *LI = Loads.pop_back_val();<br>

> -      LI->replaceAllUsesWith(NewPN);<br>

> -      Pass.DeadInsts.push_back(LI);<br>

> -    } while (!Loads.empty());<br>

> +  Type *TargetTy = PointerTy->getPointerElementType();<br>

><br>

> -    // Inject loads into all of the pred blocks.<br>

> -    for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num; ++Idx) {<br>

> -      BasicBlock *Pred = PN.getIncomingBlock(Idx);<br>

> -      TerminatorInst *TI = Pred->getTerminator();<br>

> -      Use *InUse = &PN.getOperandUse(PN.getOperandNumForIncomingValue(Idx));<br>

> -      Value *InVal = PN.getIncomingValue(Idx);<br>

> -      IRBuilder<> PredBuilder(TI);<br>

> +  do {<br>

> +    // First fold any existing GEPs into the offset.<br>

> +    while (GEPOperator *GEP = dyn_cast<GEPOperator>(Ptr)) {<br>

> +      APInt GEPOffset(Offset.getBitWidth(), 0);<br>

> +      if (!accumulateGEPOffsets(TD, *GEP, GEPOffset))<br>

> +        break;<br>

> +      Offset += GEPOffset;<br>

> +      Ptr = GEP->getPointerOperand();<br>

> +      if (!Visited.insert(Ptr))<br>

> +        break;<br>

> +    }<br>

><br>

> -      LoadInst *Load<br>

> -        = PredBuilder.CreateLoad(InVal, (PN.getName() + ".sroa.speculate.load." +<br>

> -                                         Pred->getName()));<br>

> -      ++NumLoadsSpeculated;<br>

> -      Load->setAlignment(Align);<br>

> -      if (TBAATag)<br>

> -        Load->setMetadata(LLVMContext::MD_tbaa, TBAATag);<br>

> -      NewPN->addIncoming(Load, Pred);<br>

> +    // See if we can perform a natural GEP here.<br>

> +    Indices.clear();<br>

> +    if (Value *P = getNaturalGEPWithOffset(IRB, TD, Ptr, Offset, TargetTy,<br>

> +                                           Indices, Prefix)) {<br>

> +      if (P->getType() == PointerTy) {<br>

> +        // Zap any offset pointer that we ended up computing in previous rounds.<br>

> +        if (OffsetPtr && OffsetPtr->use_empty())<br>

> +          if (Instruction *I = dyn_cast<Instruction>(OffsetPtr))<br>

> +            I->eraseFromParent();<br>

> +        return P;<br>

> +      }<br>

> +      if (!OffsetPtr) {<br>

> +        OffsetPtr = P;<br>

> +      }<br>

> +    }<br>

><br>

> -      Instruction *Ptr = dyn_cast<Instruction>(InVal);<br>

> -      if (!Ptr)<br>

> -        // No uses to rewrite.<br>

> -        continue;<br>

> +    // Stash this pointer if we've found an i8*.<br>

> +    if (Ptr->getType()->isIntegerTy(8)) {<br>

> +      Int8Ptr = Ptr;<br>

> +      Int8PtrOffset = Offset;<br>

> +    }<br>

><br>

> -      // Try to lookup and rewrite any partition uses corresponding to this phi<br>

> -      // input.<br>

> -      AllocaPartitioning::iterator PI<br>

> -        = P.findPartitionForPHIOrSelectOperand(InUse);<br>

> -      if (PI == P.end())<br>

> -        continue;<br>

> +    // Peel off a layer of the pointer and update the offset appropriately.<br>

> +    if (Operator::getOpcode(Ptr) == Instruction::BitCast) {<br>

> +      Ptr = cast<Operator>(Ptr)->getOperand(0);<br>

> +    } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(Ptr)) {<br>

> +      if (GA->mayBeOverridden())<br>

> +        break;<br>

> +      Ptr = GA->getAliasee();<br>

> +    } else {<br>

> +      break;<br>

> +    }<br>

> +    assert(Ptr->getType()->isPointerTy() && "Unexpected operand type!");<br>

> +  } while (Visited.insert(Ptr));<br>

><br>

> -      // Replace the Use in the PartitionUse for this operand with the Use<br>

> -      // inside the load.<br>

> -      AllocaPartitioning::use_iterator UI<br>

> -        = P.findPartitionUseForPHIOrSelectOperand(InUse);<br>

> -      assert(isa<PHINode>(*UI->U->getUser()));<br>

> -      UI->U = &Load->getOperandUse(Load->getPointerOperandIndex());<br>

> +  if (!OffsetPtr) {<br>

> +    if (!Int8Ptr) {<br>

> +      Int8Ptr = IRB.CreateBitCast(Ptr, IRB.getInt8PtrTy(),<br>

> +                                  Prefix + ".raw_cast");<br>

> +      Int8PtrOffset = Offset;<br>

>      }<br>

> -    DEBUG(dbgs() << "          speculated to: " << *NewPN << "\n");<br>

> +<br>

> +    OffsetPtr = Int8PtrOffset == 0 ? Int8Ptr :<br>

> +      IRB.CreateInBoundsGEP(Int8Ptr, IRB.getInt(Int8PtrOffset),<br>

> +                            Prefix + ".raw_idx");<br>

>    }<br>

> +  Ptr = OffsetPtr;<br>

><br>

> -  /// Select instructions that use an alloca and are subsequently loaded can be<br>

> -  /// rewritten to load both input pointers and then select between the result,<br>

> -  /// allowing the load of the alloca to be promoted.<br>

> -  /// From this:<br>

> -  ///   %P2 = select i1 %cond, i32* %Alloca, i32* %Other<br>

> -  ///   %V = load i32* %P2<br>

> -  /// to:<br>

> -  ///   %V1 = load i32* %Alloca      -> will be mem2reg'd<br>

> -  ///   %V2 = load i32* %Other<br>

> -  ///   %V = select i1 %cond, i32 %V1, i32 %V2<br>

> -  ///<br>

> -  /// We can do this to a select if its only uses are loads and if the operand<br>

> -  /// to the select can be loaded unconditionally.<br>

> -  bool isSafeSelectToSpeculate(SelectInst &SI,<br>

> -                               SmallVectorImpl<LoadInst *> &Loads) {<br>

> -    Value *TValue = SI.getTrueValue();<br>

> -    Value *FValue = SI.getFalseValue();<br>

> -    bool TDerefable = TValue->isDereferenceablePointer();<br>

> -    bool FDerefable = FValue->isDereferenceablePointer();<br>

> +  // On the off chance we were targeting i8*, guard the bitcast here.<br>

> +  if (Ptr->getType() != PointerTy)<br>

> +    Ptr = IRB.CreateBitCast(Ptr, PointerTy, Prefix + ".cast");<br>

><br>

> -    for (Value::use_iterator UI = SI.use_begin(), UE = SI.use_end();<br>

> -         UI != UE; ++UI) {<br>

> -      LoadInst *LI = dyn_cast<LoadInst>(*UI);<br>

> -      if (LI == 0 || !LI->isSimple()) return false;<br>

> +  return Ptr;<br>

> +}<br>

><br>

> -      // Both operands to the select need to be dereferencable, either<br>

> -      // absolutely (e.g. allocas) or at this point because we can see other<br>

> -      // accesses to it.<br>

> -      if (!TDerefable && !isSafeToLoadUnconditionally(TValue, LI,<br>

> -                                                      LI->getAlignment(), &TD))<br>

> -        return false;<br>

> -      if (!FDerefable && !isSafeToLoadUnconditionally(FValue, LI,<br>

> -                                                      LI->getAlignment(), &TD))<br>

> -        return false;<br>

> -      Loads.push_back(LI);<br>

> -    }<br>

> +/// \brief Test whether the given alloca partition can be promoted to a vector.<br>

> +///<br>

> +/// This is a quick test to check whether we can rewrite a particular alloca<br>

> +/// partition (and its newly formed alloca) into a vector alloca with only<br>

> +/// whole-vector loads and stores such that it could be promoted to a vector<br>

> +/// SSA value. We only can ensure this for a limited set of operations, and we<br>

> +/// don't want to do the rewrites unless we are confident that the result will<br>

> +/// be promotable, so we have an early test here.<br>

> +static bool isVectorPromotionViable(const TargetData &TD,<br>

> +                                    Type *AllocaTy,<br>

> +                                    AllocaPartitioning &P,<br>

> +                                    uint64_t PartitionBeginOffset,<br>

> +                                    uint64_t PartitionEndOffset,<br>

> +                                    AllocaPartitioning::const_use_iterator I,<br>

> +                                    AllocaPartitioning::const_use_iterator E) {<br>

> +  VectorType *Ty = dyn_cast<VectorType>(AllocaTy);<br>

> +  if (!Ty)<br>

> +    return false;<br>

><br>

> -    return true;<br>

> -  }<br>

> +  uint64_t VecSize = TD.getTypeSizeInBits(Ty);<br>

> +  uint64_t ElementSize = Ty->getScalarSizeInBits();<br>

><br>

> -  void visitSelectInst(SelectInst &SI) {<br>

> -    DEBUG(dbgs() << "    original: " << SI << "\n");<br>

> -    IRBuilder<> IRB(&SI);<br>

> +  // While the definition of LLVM vectors is bitpacked, we don't support sizes<br>

> +  // that aren't byte sized.<br>

> +  if (ElementSize % 8)<br>

> +    return false;<br>

> +  assert((VecSize % 8) == 0 && "vector size not a multiple of element size?");<br>

> +  VecSize /= 8;<br>

> +  ElementSize /= 8;<br>

><br>

> -    // If the select isn't safe to speculate, just use simple logic to emit it.<br>

> -    SmallVector<LoadInst *, 4> Loads;<br>

> -    if (!isSafeSelectToSpeculate(SI, Loads))<br>

> -      return;<br>

> +  for (; I != E; ++I) {<br>

> +    if (!I->U)<br>

> +      continue; // Skip dead use.<br>

><br>

> -    Use *Ops[2] = { &SI.getOperandUse(1), &SI.getOperandUse(2) };<br>

> -    AllocaPartitioning::iterator PIs[2];<br>

> -    AllocaPartitioning::PartitionUse PUs[2];<br>

> -    for (unsigned i = 0, e = 2; i != e; ++i) {<br>

> -      PIs[i] = P.findPartitionForPHIOrSelectOperand(Ops[i]);<br>

> -      if (PIs[i] != P.end()) {<br>

> -        // If the pointer is within the partitioning, remove the select from<br>

> -        // its uses. We'll add in the new loads below.<br>

> -        AllocaPartitioning::use_iterator UI<br>

> -          = P.findPartitionUseForPHIOrSelectOperand(Ops[i]);<br>

> -        PUs[i] = *UI;<br>

> -        // Clear out the use here so that the offsets into the use list remain<br>

> -        // stable but this use is ignored when rewriting.<br>

> -        UI->U = 0;<br>

> +    uint64_t BeginOffset = I->BeginOffset - PartitionBeginOffset;<br>

> +    uint64_t BeginIndex = BeginOffset / ElementSize;<br>

> +    if (BeginIndex * ElementSize != BeginOffset ||<br>

> +        BeginIndex >= Ty->getNumElements())<br>

> +      return false;<br>

> +    uint64_t EndOffset = I->EndOffset - PartitionBeginOffset;<br>

> +    uint64_t EndIndex = EndOffset / ElementSize;<br>

> +    if (EndIndex * ElementSize != EndOffset ||<br>

> +        EndIndex > Ty->getNumElements())<br>

> +      return false;<br>

> +<br>

> +    // FIXME: We should build shuffle vector instructions to handle<br>

> +    // non-element-sized accesses.<br>

> +    if ((EndOffset - BeginOffset) != ElementSize &&<br>

> +        (EndOffset - BeginOffset) != VecSize)<br>

> +      return false;<br>

> +<br>

> +    if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I->U->getUser())) {<br>

> +      if (MI->isVolatile())<br>

> +        return false;<br>

> +      if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(I->U->getUser())) {<br>

> +        const AllocaPartitioning::MemTransferOffsets &MTO<br>

> +          = P.getMemTransferOffsets(*MTI);<br>

> +        if (!MTO.IsSplittable)<br>

> +          return false;<br>

>        }<br>

> +    } else if (I->U->get()->getType()->getPointerElementType()->isStructTy()) {<br>

> +      // Disable vector promotion when there are loads or stores of an FCA.<br>

> +      return false;<br>

> +    } else if (!isa<LoadInst>(I->U->getUser()) &&<br>

> +               !isa<StoreInst>(I->U->getUser())) {<br>

> +      return false;<br>

>      }<br>

> +  }<br>

> +  return true;<br>

> +}<br>

><br>

> -    Value *TV = SI.getTrueValue();<br>

> -    Value *FV = SI.getFalseValue();<br>

> -    // Replace the loads of the select with a select of two loads.<br>

> -    while (!Loads.empty()) {<br>

> -      LoadInst *LI = Loads.pop_back_val();<br>

> -<br>

> -      IRB.SetInsertPoint(LI);<br>

> -      LoadInst *TL =<br>

> -        IRB.CreateLoad(TV, LI->getName() + ".sroa.speculate.load.true");<br>

> -      LoadInst *FL =<br>

> -        IRB.CreateLoad(FV, LI->getName() + ".sroa.speculate.load.false");<br>

> -      NumLoadsSpeculated += 2;<br>

> +/// \brief Test whether the given alloca partition can be promoted to an int.<br>

> +///<br>

> +/// This is a quick test to check whether we can rewrite a particular alloca<br>

> +/// partition (and its newly formed alloca) into an integer alloca suitable for<br>

> +/// promotion to an SSA value. We only can ensure this for a limited set of<br>

> +/// operations, and we don't want to do the rewrites unless we are confident<br>

> +/// that the result will be promotable, so we have an early test here.<br>

> +static bool isIntegerPromotionViable(const TargetData &TD,<br>

> +                                     Type *AllocaTy,<br>

> +                                     uint64_t AllocBeginOffset,<br>

> +                                     AllocaPartitioning &P,<br>

> +                                     AllocaPartitioning::const_use_iterator I,<br>

> +                                     AllocaPartitioning::const_use_iterator E) {<br>

> +  IntegerType *Ty = dyn_cast<IntegerType>(AllocaTy);<br>

> +  if (!Ty || 8*TD.getTypeStoreSize(Ty) != Ty->getBitWidth())<br>

> +    return false;<br>

><br>

> -      // Transfer alignment and TBAA info if present.<br>

> -      TL->setAlignment(LI->getAlignment());<br>

> -      FL->setAlignment(LI->getAlignment());<br>

> -      if (MDNode *Tag = LI->getMetadata(LLVMContext::MD_tbaa)) {<br>

> -        TL->setMetadata(LLVMContext::MD_tbaa, Tag);<br>

> -        FL->setMetadata(LLVMContext::MD_tbaa, Tag);<br>

> -      }<br>

> +  // Check the uses to ensure the uses are (likely) promoteable integer uses.<br>

> +  // Also ensure that the alloca has a covering load or store. We don't want<br>

> +  // promote because of some other unsplittable entry (which we may make<br>

> +  // splittable later) and lose the ability to promote each element access.<br>

> +  bool WholeAllocaOp = false;<br>

> +  for (; I != E; ++I) {<br>

> +    if (!I->U)<br>

> +      continue; // Skip dead use.<br>

><br>

> -      Value *V = IRB.CreateSelect(SI.getCondition(), TL, FL,<br>

> -                                  LI->getName() + ".sroa.speculated");<br>

> +    // We can't reasonably handle cases where the load or store extends past<br>

> +    // the end of the aloca's type and into its padding.<br>

> +    if ((I->EndOffset - AllocBeginOffset) > TD.getTypeStoreSize(Ty))<br>

> +      return false;<br>

><br>

> -      LoadInst *Loads[2] = { TL, FL };<br>

> -      for (unsigned i = 0, e = 2; i != e; ++i) {<br>

> -        if (PIs[i] != P.end()) {<br>

> -          Use *LoadUse = &Loads[i]->getOperandUse(0);<br>

> -          assert(PUs[i].U->get() == LoadUse->get());<br>

> -          PUs[i].U = LoadUse;<br>

> -          P.use_push_back(PIs[i], PUs[i]);<br>

> -        }<br>

> +    if (LoadInst *LI = dyn_cast<LoadInst>(I->U->getUser())) {<br>

> +      if (LI->isVolatile() || !LI->getType()->isIntegerTy())<br>

> +        return false;<br>

> +      if (LI->getType() == Ty)<br>

> +        WholeAllocaOp = true;<br>

> +    } else if (StoreInst *SI = dyn_cast<StoreInst>(I->U->getUser())) {<br>

> +      if (SI->isVolatile() || !SI->getValueOperand()->getType()->isIntegerTy())<br>

> +        return false;<br>

> +      if (SI->getValueOperand()->getType() == Ty)<br>

> +        WholeAllocaOp = true;<br>

> +    } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I->U->getUser())) {<br>

> +      if (MI->isVolatile())<br>

> +        return false;<br>

> +      if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(I->U->getUser())) {<br>

> +        const AllocaPartitioning::MemTransferOffsets &MTO<br>

> +          = P.getMemTransferOffsets(*MTI);<br>

> +        if (!MTO.IsSplittable)<br>

> +          return false;<br>

>        }<br>

> -<br>

> -      DEBUG(dbgs() << "          speculated to: " << *V << "\n");<br>

> -      LI->replaceAllUsesWith(V);<br>

> -      Pass.DeadInsts.push_back(LI);<br>

> +    } else {<br>

> +      return false;<br>

>      }<br>

>    }<br>

> -};<br>

> +  return WholeAllocaOp;<br>

> +}<br>

><br>

> +namespace {<br>

>  /// \brief Visitor to rewrite instructions using a partition of an alloca to<br>

>  /// use a new alloca.<br>

>  ///<br>

><br>

><br>

> _______________________________________________<br>

> llvm-commits mailing list<br>

> <a href="mailto:llvm-commits@cs.uiuc.edu" class="cremed">llvm-commits@cs.uiuc.edu</a><br>

> <a href="http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits" target="_blank" class="cremed">http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits</a><br>

_______________________________________________<br>

llvm-commits mailing list<br>

<a href="mailto:llvm-commits@cs.uiuc.edu" class="cremed">llvm-commits@cs.uiuc.edu</a><br>

<a href="http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits" target="_blank" class="cremed">http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits</a><br>

</div></div></blockquote></div><br></div>