[llvm-commits] [llvm] r91459 - in /llvm/trunk: lib/Transforms/Scalar/ScalarReplAggregates.cpp test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll

Wed Dec 16 12:03:52 PST 2009

On Wed, Dec 16, 2009 at 9:17 AM, Evan Cheng <evan.cheng at apple.com> wrote:
> I thought the problem started prior to 91459?

I presume the patch was just exposing the bug. At the time I reverted
taking out this and Bill's patch restored bootstrap, but then
subsequently more patches went in to TOT and it didn't help.

I will reapply this and Bill's patch.

I want a close-the-tree button.

 - Daniel

> Evan
>
> On Dec 16, 2009, at 2:58 AM, Daniel Dunbar wrote:
>
>> Hi Bob,
>>
>> I reverted this, it was one of the patches responsible for the failing
>> x86_64-apple-darwin10 bootstrap in my testing.
>>
>> - Daniel
>>
>> On Tue, Dec 15, 2009 at 2:00 PM, Bob Wilson <bob.wilson at apple.com> wrote:
>>> Author: bwilson
>>> Date: Tue Dec 15 16:00:51 2009
>>> New Revision: 91459
>>>
>>> URL: http://llvm.org/viewvc/llvm-project?rev=91459&view=rev
>>> Log:
>>> Reapply 91184 with fixes and an addition to the testcase to cover the problem
>>> found last time.  Instead of trying to modify the IR while iterating over it,
>>> I've change it to keep a list of WeakVH references to dead instructions, and
>>> then delete those instructions later.  I also added some special case code to
>>> detect and handle the situation when both operands of a memcpy intrinsic are
>>> referencing the same alloca.
>>>
>>> Added:
>>>    llvm/trunk/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll
>>> Modified:
>>>    llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp
>>>
>>> Modified: llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp?rev=91459&r1=91458&r2=91459&view=diff
>>>
>>> ==============================================================================
>>> --- llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp (original)
>>> +++ llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp Tue Dec 15 16:00:51 2009
>>> @@ -74,6 +74,10 @@
>>>   private:
>>>     TargetData *TD;
>>>
>>> +    /// DeadInsts - Keep track of instructions we have made dead, so that
>>> +    /// we can remove them after we are done working.
>>> +    SmallVector<WeakVH, 16> DeadInsts;
>>> +
>>>     /// AllocaInfo - When analyzing uses of an alloca instruction, this captures
>>>     /// information about the uses.  All these fields are initialized to false
>>>     /// and set to true when something is learned.
>>> @@ -102,25 +106,30 @@
>>>
>>>     int isSafeAllocaToScalarRepl(AllocaInst *AI);
>>>
>>> -    void isSafeUseOfAllocation(Instruction *User, AllocaInst *AI,
>>> -                               AllocaInfo &Info);
>>> -    void isSafeElementUse(Value *Ptr, bool isFirstElt, AllocaInst *AI,
>>> -                          AllocaInfo &Info);
>>> -    void isSafeMemIntrinsicOnAllocation(MemIntrinsic *MI, AllocaInst *AI,
>>> -                                        unsigned OpNo, AllocaInfo &Info);
>>> -    void isSafeUseOfBitCastedAllocation(BitCastInst *User, AllocaInst *AI,
>>> -                                        AllocaInfo &Info);
>>> +    void isSafeForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
>>> +                             uint64_t ArrayOffset, AllocaInfo &Info);
>>> +    void isSafeGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t &Offset,
>>> +                   uint64_t &ArrayOffset, AllocaInfo &Info);
>>> +    void isSafeMemAccess(AllocaInst *AI, uint64_t Offset, uint64_t ArrayOffset,
>>> +                         uint64_t MemSize, const Type *MemOpType, bool isStore,
>>> +                         AllocaInfo &Info);
>>> +    bool TypeHasComponent(const Type *T, uint64_t Offset, uint64_t Size);
>>> +    unsigned FindElementAndOffset(const Type *&T, uint64_t &Offset);
>>>
>>>     void DoScalarReplacement(AllocaInst *AI,
>>>                              std::vector<AllocaInst*> &WorkList);
>>> +    void DeleteDeadInstructions();
>>>     void CleanupGEP(GetElementPtrInst *GEP);
>>> -    void CleanupAllocaUsers(AllocaInst *AI);
>>> +    void CleanupAllocaUsers(Value *V);
>>>     AllocaInst *AddNewAlloca(Function &F, const Type *Ty, AllocaInst *Base);
>>>
>>> -    void RewriteBitCastUserOfAlloca(Instruction *BCInst, AllocaInst *AI,
>>> -                                    SmallVector<AllocaInst*, 32> &NewElts);
>>> -
>>> -    void RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst,
>>> +    void RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
>>> +                              SmallVector<AllocaInst*, 32> &NewElts);
>>> +    void RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset,
>>> +                        SmallVector<AllocaInst*, 32> &NewElts);
>>> +    void RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset,
>>> +                    SmallVector<AllocaInst*, 32> &NewElts);
>>> +    void RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
>>>                                       AllocaInst *AI,
>>>                                       SmallVector<AllocaInst*, 32> &NewElts);
>>>     void RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
>>> @@ -360,176 +369,37 @@
>>>     }
>>>   }
>>>
>>> -  // Now that we have created the alloca instructions that we want to use,
>>> -  // expand the getelementptr instructions to use them.
>>> -  while (!AI->use_empty()) {
>>> -    Instruction *User = cast<Instruction>(AI->use_back());
>>> -    if (BitCastInst *BCInst = dyn_cast<BitCastInst>(User)) {
>>> -      RewriteBitCastUserOfAlloca(BCInst, AI, ElementAllocas);
>>> -      BCInst->eraseFromParent();
>>> -      continue;
>>> -    }
>>> -
>>> -    // Replace:
>>> -    //   %res = load { i32, i32 }* %alloc
>>> -    // with:
>>> -    //   %load.0 = load i32* %alloc.0
>>> -    //   %insert.0 insertvalue { i32, i32 } zeroinitializer, i32 %load.0, 0
>>> -    //   %load.1 = load i32* %alloc.1
>>> -    //   %insert = insertvalue { i32, i32 } %insert.0, i32 %load.1, 1
>>> -    // (Also works for arrays instead of structs)
>>> -    if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
>>> -      Value *Insert = UndefValue::get(LI->getType());
>>> -      for (unsigned i = 0, e = ElementAllocas.size(); i != e; ++i) {
>>> -        Value *Load = new LoadInst(ElementAllocas[i], "load", LI);
>>> -        Insert = InsertValueInst::Create(Insert, Load, i, "insert", LI);
>>> -      }
>>> -      LI->replaceAllUsesWith(Insert);
>>> -      LI->eraseFromParent();
>>> -      continue;
>>> -    }
>>> +  // Now that we have created the new alloca instructions, rewrite all the
>>> +  // uses of the old alloca.
>>> +  DeadInsts.push_back(AI);
>>> +  RewriteForScalarRepl(AI, AI, 0, ElementAllocas);
>>>
>>> -    // Replace:
>>> -    //   store { i32, i32 } %val, { i32, i32 }* %alloc
>>> -    // with:
>>> -    //   %val.0 = extractvalue { i32, i32 } %val, 0
>>> -    //   store i32 %val.0, i32* %alloc.0
>>> -    //   %val.1 = extractvalue { i32, i32 } %val, 1
>>> -    //   store i32 %val.1, i32* %alloc.1
>>> -    // (Also works for arrays instead of structs)
>>> -    if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
>>> -      Value *Val = SI->getOperand(0);
>>> -      for (unsigned i = 0, e = ElementAllocas.size(); i != e; ++i) {
>>> -        Value *Extract = ExtractValueInst::Create(Val, i, Val->getName(), SI);
>>> -        new StoreInst(Extract, ElementAllocas[i], SI);
>>> -      }
>>> -      SI->eraseFromParent();
>>> -      continue;
>>> -    }
>>> -
>>> -    GetElementPtrInst *GEPI = cast<GetElementPtrInst>(User);
>>> -    // We now know that the GEP is of the form: GEP <ptr>, 0, <cst>
>>> -    unsigned Idx =
>>> -       (unsigned)cast<ConstantInt>(GEPI->getOperand(2))->getZExtValue();
>>> -
>>> -    assert(Idx < ElementAllocas.size() && "Index out of range?");
>>> -    AllocaInst *AllocaToUse = ElementAllocas[Idx];
>>> -
>>> -    Value *RepValue;
>>> -    if (GEPI->getNumOperands() == 3) {
>>> -      // Do not insert a new getelementptr instruction with zero indices, only
>>> -      // to have it optimized out later.
>>> -      RepValue = AllocaToUse;
>>> -    } else {
>>> -      // We are indexing deeply into the structure, so we still need a
>>> -      // getelement ptr instruction to finish the indexing.  This may be
>>> -      // expanded itself once the worklist is rerun.
>>> -      //
>>> -      SmallVector<Value*, 8> NewArgs;
>>> -      NewArgs.push_back(Constant::getNullValue(
>>> -                                           Type::getInt32Ty(AI->getContext())));
>>> -      NewArgs.append(GEPI->op_begin()+3, GEPI->op_end());
>>> -      RepValue = GetElementPtrInst::Create(AllocaToUse, NewArgs.begin(),
>>> -                                           NewArgs.end(), "", GEPI);
>>> -      RepValue->takeName(GEPI);
>>> -    }
>>> -
>>> -    // If this GEP is to the start of the aggregate, check for memcpys.
>>> -    if (Idx == 0 && GEPI->hasAllZeroIndices())
>>> -      RewriteBitCastUserOfAlloca(GEPI, AI, ElementAllocas);
>>> -
>>> -    // Move all of the users over to the new GEP.
>>> -    GEPI->replaceAllUsesWith(RepValue);
>>> -    // Delete the old GEP
>>> -    GEPI->eraseFromParent();
>>> -  }
>>> +  // Now erase any instructions that were made dead while rewriting the alloca.
>>> +  DeleteDeadInstructions();
>>>
>>> -  // Finally, delete the Alloca instruction
>>> -  AI->eraseFromParent();
>>>   NumReplaced++;
>>>  }
>>>
>>> -/// isSafeElementUse - Check to see if this use is an allowed use for a
>>> -/// getelementptr instruction of an array aggregate allocation.  isFirstElt
>>> -/// indicates whether Ptr is known to the start of the aggregate.
>>> -void SROA::isSafeElementUse(Value *Ptr, bool isFirstElt, AllocaInst *AI,
>>> -                            AllocaInfo &Info) {
>>> -  for (Value::use_iterator I = Ptr->use_begin(), E = Ptr->use_end();
>>> -       I != E; ++I) {
>>> -    Instruction *User = cast<Instruction>(*I);
>>> -    switch (User->getOpcode()) {
>>> -    case Instruction::Load:  break;
>>> -    case Instruction::Store:
>>> -      // Store is ok if storing INTO the pointer, not storing the pointer
>>> -      if (User->getOperand(0) == Ptr) return MarkUnsafe(Info);
>>> -      break;
>>> -    case Instruction::GetElementPtr: {
>>> -      GetElementPtrInst *GEP = cast<GetElementPtrInst>(User);
>>> -      bool AreAllZeroIndices = isFirstElt;
>>> -      if (GEP->getNumOperands() > 1 &&
>>> -          (!isa<ConstantInt>(GEP->getOperand(1)) ||
>>> -           !cast<ConstantInt>(GEP->getOperand(1))->isZero()))
>>> -        // Using pointer arithmetic to navigate the array.
>>> -        return MarkUnsafe(Info);
>>> -
>>> -      // Verify that any array subscripts are in range.
>>> -      for (gep_type_iterator GEPIt = gep_type_begin(GEP),
>>> -           E = gep_type_end(GEP); GEPIt != E; ++GEPIt) {
>>> -        // Ignore struct elements, no extra checking needed for these.
>>> -        if (isa<StructType>(*GEPIt))
>>> -          continue;
>>> -
>>> -        // This GEP indexes an array.  Verify that this is an in-range
>>> -        // constant integer. Specifically, consider A[0][i]. We cannot know that
>>> -        // the user isn't doing invalid things like allowing i to index an
>>> -        // out-of-range subscript that accesses A[1].  Because of this, we have
>>> -        // to reject SROA of any accesses into structs where any of the
>>> -        // components are variables.
>>> -        ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPIt.getOperand());
>>> -        if (!IdxVal) return MarkUnsafe(Info);
>>> -
>>> -        // Are all indices still zero?
>>> -        AreAllZeroIndices &= IdxVal->isZero();
>>> -
>>> -        if (const ArrayType *AT = dyn_cast<ArrayType>(*GEPIt)) {
>>> -          if (IdxVal->getZExtValue() >= AT->getNumElements())
>>> -            return MarkUnsafe(Info);
>>> -        } else if (const VectorType *VT = dyn_cast<VectorType>(*GEPIt)) {
>>> -          if (IdxVal->getZExtValue() >= VT->getNumElements())
>>> -            return MarkUnsafe(Info);
>>> -        }
>>> +/// DeleteDeadInstructions - Erase instructions on the DeadInstrs list,
>>> +/// recursively including all their operands that become trivially dead.
>>> +void SROA::DeleteDeadInstructions() {
>>> +  while (!DeadInsts.empty()) {
>>> +    Instruction *I = dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val());
>>> +    if (I == 0)
>>> +      continue;
>>> +
>>> +    for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI)
>>> +      if (Instruction *U = dyn_cast<Instruction>(*OI)) {
>>> +        // Zero out the operand and see if it becomes trivially dead.
>>> +        *OI = 0;
>>> +        if (isInstructionTriviallyDead(U))
>>> +          DeadInsts.push_back(U);
>>>       }
>>> -
>>> -      isSafeElementUse(GEP, AreAllZeroIndices, AI, Info);
>>> -      if (Info.isUnsafe) return;
>>> -      break;
>>> -    }
>>> -    case Instruction::BitCast:
>>> -      if (isFirstElt) {
>>> -        isSafeUseOfBitCastedAllocation(cast<BitCastInst>(User), AI, Info);
>>> -        if (Info.isUnsafe) return;
>>> -        break;
>>> -      }
>>> -      DEBUG(errs() << "  Transformation preventing inst: " << *User << '\n');
>>> -      return MarkUnsafe(Info);
>>> -    case Instruction::Call:
>>> -      if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) {
>>> -        if (isFirstElt) {
>>> -          isSafeMemIntrinsicOnAllocation(MI, AI, I.getOperandNo(), Info);
>>> -          if (Info.isUnsafe) return;
>>> -          break;
>>> -        }
>>> -      }
>>> -      DEBUG(errs() << "  Transformation preventing inst: " << *User << '\n');
>>> -      return MarkUnsafe(Info);
>>> -    default:
>>> -      DEBUG(errs() << "  Transformation preventing inst: " << *User << '\n');
>>> -      return MarkUnsafe(Info);
>>> -    }
>>> +
>>> +    I->eraseFromParent();
>>>   }
>>> -  return;  // All users look ok :)
>>>  }
>>> -
>>> +
>>>  /// AllUsersAreLoads - Return true if all users of this value are loads.
>>>  static bool AllUsersAreLoads(Value *Ptr) {
>>>   for (Value::use_iterator I = Ptr->use_begin(), E = Ptr->use_end();
>>> @@ -539,72 +409,116 @@
>>>   return true;
>>>  }
>>>
>>> -/// isSafeUseOfAllocation - Check if this user is an allowed use for an
>>> -/// aggregate allocation.
>>> -void SROA::isSafeUseOfAllocation(Instruction *User, AllocaInst *AI,
>>> -                                 AllocaInfo &Info) {
>>> -  if (BitCastInst *C = dyn_cast<BitCastInst>(User))
>>> -    return isSafeUseOfBitCastedAllocation(C, AI, Info);
>>> -
>>> -  if (LoadInst *LI = dyn_cast<LoadInst>(User))
>>> -    if (!LI->isVolatile())
>>> -      return;// Loads (returning a first class aggregrate) are always rewritable
>>> -
>>> -  if (StoreInst *SI = dyn_cast<StoreInst>(User))
>>> -    if (!SI->isVolatile() && SI->getOperand(0) != AI)
>>> -      return;// Store is ok if storing INTO the pointer, not storing the pointer
>>> -
>>> -  GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User);
>>> -  if (GEPI == 0)
>>> -    return MarkUnsafe(Info);
>>> -
>>> -  gep_type_iterator I = gep_type_begin(GEPI), E = gep_type_end(GEPI);
>>> +/// isSafeForScalarRepl - Check if instruction I is a safe use with regard to
>>> +/// performing scalar replacement of alloca AI.  The results are flagged in
>>> +/// the Info parameter.  Offset and ArrayOffset indicate the position within
>>> +/// AI that is referenced by this instruction.
>>> +void SROA::isSafeForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
>>> +                               uint64_t ArrayOffset, AllocaInfo &Info) {
>>> +  for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) {
>>> +    Instruction *User = cast<Instruction>(*UI);
>>>
>>> -  // The GEP is not safe to transform if not of the form "GEP <ptr>, 0, <cst>".
>>> -  if (I == E ||
>>> -      I.getOperand() != Constant::getNullValue(I.getOperand()->getType())) {
>>> -    return MarkUnsafe(Info);
>>> +    if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) {
>>> +      isSafeForScalarRepl(BC, AI, Offset, ArrayOffset, Info);
>>> +    } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) {
>>> +      uint64_t GEPArrayOffset = ArrayOffset;
>>> +      uint64_t GEPOffset = Offset;
>>> +      isSafeGEP(GEPI, AI, GEPOffset, GEPArrayOffset, Info);
>>> +      if (!Info.isUnsafe)
>>> +        isSafeForScalarRepl(GEPI, AI, GEPOffset, GEPArrayOffset, Info);
>>> +    } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(UI)) {
>>> +      ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength());
>>> +      if (Length)
>>> +        isSafeMemAccess(AI, Offset, ArrayOffset, Length->getZExtValue(), 0,
>>> +                        UI.getOperandNo() == 1, Info);
>>> +      else
>>> +        MarkUnsafe(Info);
>>> +    } else if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
>>> +      if (!LI->isVolatile()) {
>>> +        const Type *LIType = LI->getType();
>>> +        isSafeMemAccess(AI, Offset, ArrayOffset, TD->getTypeAllocSize(LIType),
>>> +                        LIType, false, Info);
>>> +      } else
>>> +        MarkUnsafe(Info);
>>> +    } else if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
>>> +      // Store is ok if storing INTO the pointer, not storing the pointer
>>> +      if (!SI->isVolatile() && SI->getOperand(0) != I) {
>>> +        const Type *SIType = SI->getOperand(0)->getType();
>>> +        isSafeMemAccess(AI, Offset, ArrayOffset, TD->getTypeAllocSize(SIType),
>>> +                        SIType, true, Info);
>>> +      } else
>>> +        MarkUnsafe(Info);
>>> +    } else if (isa<DbgInfoIntrinsic>(UI)) {
>>> +      // If one user is DbgInfoIntrinsic then check if all users are
>>> +      // DbgInfoIntrinsics.
>>> +      if (OnlyUsedByDbgInfoIntrinsics(I)) {
>>> +        Info.needsCleanup = true;
>>> +        return;
>>> +      }
>>> +      MarkUnsafe(Info);
>>> +    } else {
>>> +      DEBUG(errs() << "  Transformation preventing inst: " << *User << '\n');
>>> +      MarkUnsafe(Info);
>>> +    }
>>> +    if (Info.isUnsafe) return;
>>>   }
>>> +}
>>>
>>> -  ++I;
>>> -  if (I == E) return MarkUnsafe(Info);  // ran out of GEP indices??
>>> +/// isSafeGEP - Check if a GEP instruction can be handled for scalar
>>> +/// replacement.  It is safe when all the indices are constant, in-bounds
>>> +/// references, and when the resulting offset corresponds to an element within
>>> +/// the alloca type.  The results are flagged in the Info parameter.  Upon
>>> +/// return, Offset is adjusted as specified by the GEP indices.  For the
>>> +/// special case of a variable index to a 2-element array, ArrayOffset is set
>>> +/// to the array element size.
>>> +void SROA::isSafeGEP(GetElementPtrInst *GEPI, AllocaInst *AI,
>>> +                     uint64_t &Offset, uint64_t &ArrayOffset,
>>> +                     AllocaInfo &Info) {
>>> +  gep_type_iterator GEPIt = gep_type_begin(GEPI), E = gep_type_end(GEPI);
>>> +  if (GEPIt == E)
>>> +    return;
>>> +
>>> +  // The first GEP index must be zero.
>>> +  if (!isa<ConstantInt>(GEPIt.getOperand()) ||
>>> +      !cast<ConstantInt>(GEPIt.getOperand())->isZero())
>>> +    return MarkUnsafe(Info);
>>> +  if (++GEPIt == E)
>>> +    return;
>>>
>>> -  bool IsAllZeroIndices = true;
>>> -
>>>   // If the first index is a non-constant index into an array, see if we can
>>>   // handle it as a special case.
>>> -  if (const ArrayType *AT = dyn_cast<ArrayType>(*I)) {
>>> -    if (!isa<ConstantInt>(I.getOperand())) {
>>> -      IsAllZeroIndices = 0;
>>> -      uint64_t NumElements = AT->getNumElements();
>>> -
>>> -      // If this is an array index and the index is not constant, we cannot
>>> -      // promote... that is unless the array has exactly one or two elements in
>>> -      // it, in which case we CAN promote it, but we have to canonicalize this
>>> -      // out if this is the only problem.
>>> -      if ((NumElements == 1 || NumElements == 2) &&
>>> -          AllUsersAreLoads(GEPI)) {
>>> +  const Type *ArrayEltTy = 0;
>>> +  if (ArrayOffset == 0 && Offset == 0) {
>>> +    if (const ArrayType *AT = dyn_cast<ArrayType>(*GEPIt)) {
>>> +      if (!isa<ConstantInt>(GEPIt.getOperand())) {
>>> +        uint64_t NumElements = AT->getNumElements();
>>> +
>>> +        // If this is an array index and the index is not constant, we cannot
>>> +        // promote... that is unless the array has exactly one or two elements
>>> +        // in it, in which case we CAN promote it, but we have to canonicalize
>>> +        // this out if this is the only problem.
>>> +        if ((NumElements != 1 && NumElements != 2) || !AllUsersAreLoads(GEPI))
>>> +          return MarkUnsafe(Info);
>>>         Info.needsCleanup = true;
>>> -        return;  // Canonicalization required!
>>> +        ArrayOffset = TD->getTypeAllocSizeInBits(AT->getElementType());
>>> +        ArrayEltTy = AT->getElementType();
>>> +        ++GEPIt;
>>>       }
>>> -      return MarkUnsafe(Info);
>>>     }
>>>   }
>>> -
>>> +
>>>   // Walk through the GEP type indices, checking the types that this indexes
>>>   // into.
>>> -  for (; I != E; ++I) {
>>> +  for (; GEPIt != E; ++GEPIt) {
>>>     // Ignore struct elements, no extra checking needed for these.
>>> -    if (isa<StructType>(*I))
>>> +    if (isa<StructType>(*GEPIt))
>>>       continue;
>>> -
>>> -    ConstantInt *IdxVal = dyn_cast<ConstantInt>(I.getOperand());
>>> -    if (!IdxVal) return MarkUnsafe(Info);
>>>
>>> -    // Are all indices still zero?
>>> -    IsAllZeroIndices &= IdxVal->isZero();
>>> -
>>> -    if (const ArrayType *AT = dyn_cast<ArrayType>(*I)) {
>>> +    ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPIt.getOperand());
>>> +    if (!IdxVal)
>>> +      return MarkUnsafe(Info);
>>> +
>>> +    if (const ArrayType *AT = dyn_cast<ArrayType>(*GEPIt)) {
>>>       // This GEP indexes an array.  Verify that this is an in-range constant
>>>       // integer. Specifically, consider A[0][i]. We cannot know that the user
>>>       // isn't doing invalid things like allowing i to index an out-of-range
>>> @@ -612,147 +526,255 @@
>>>       // of any accesses into structs where any of the components are variables.
>>>       if (IdxVal->getZExtValue() >= AT->getNumElements())
>>>         return MarkUnsafe(Info);
>>> -    } else if (const VectorType *VT = dyn_cast<VectorType>(*I)) {
>>> +    } else {
>>> +      const VectorType *VT = dyn_cast<VectorType>(*GEPIt);
>>> +      assert(VT && "unexpected type in GEP type iterator");
>>>       if (IdxVal->getZExtValue() >= VT->getNumElements())
>>>         return MarkUnsafe(Info);
>>>     }
>>>   }
>>> -
>>> -  // If there are any non-simple uses of this getelementptr, make sure to reject
>>> -  // them.
>>> -  return isSafeElementUse(GEPI, IsAllZeroIndices, AI, Info);
>>> +
>>> +  // All the indices are safe.  Now compute the offset due to this GEP and
>>> +  // check if the alloca has a component element at that offset.
>>> +  if (ArrayOffset == 0) {
>>> +    SmallVector<Value*, 8> Indices(GEPI->op_begin() + 1, GEPI->op_end());
>>> +    Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(),
>>> +                                   &Indices[0], Indices.size());
>>> +  } else {
>>> +    // Both array elements have the same type, so it suffices to check one of
>>> +    // them.  Copy the GEP indices starting from the array index, but replace
>>> +    // that variable index with a constant zero.
>>> +    SmallVector<Value*, 8> Indices(GEPI->op_begin() + 2, GEPI->op_end());
>>> +    Indices[0] = Constant::getNullValue(Type::getInt32Ty(GEPI->getContext()));
>>> +    const Type *ArrayEltPtr = PointerType::getUnqual(ArrayEltTy);
>>> +    Offset += TD->getIndexedOffset(ArrayEltPtr, &Indices[0], Indices.size());
>>> +  }
>>> +  if (!TypeHasComponent(AI->getAllocatedType(), Offset, 0))
>>> +    MarkUnsafe(Info);
>>> +}
>>> +
>>> +/// isSafeMemAccess - Check if a load/store/memcpy operates on the entire AI
>>> +/// alloca or has an offset and size that corresponds to a component element
>>> +/// within it.  The offset checked here may have been formed from a GEP with a
>>> +/// pointer bitcasted to a different type.
>>> +void SROA::isSafeMemAccess(AllocaInst *AI, uint64_t Offset,
>>> +                           uint64_t ArrayOffset, uint64_t MemSize,
>>> +                           const Type *MemOpType, bool isStore,
>>> +                           AllocaInfo &Info) {
>>> +  // Check if this is a load/store of the entire alloca.
>>> +  if (Offset == 0 && ArrayOffset == 0 &&
>>> +      MemSize == TD->getTypeAllocSize(AI->getAllocatedType())) {
>>> +    bool UsesAggregateType = (MemOpType == AI->getAllocatedType());
>>> +    // This is safe for MemIntrinsics (where MemOpType is 0), integer types
>>> +    // (which are essentially the same as the MemIntrinsics, especially with
>>> +    // regard to copying padding between elements), or references using the
>>> +    // aggregate type of the alloca.
>>> +    if (!MemOpType || isa<IntegerType>(MemOpType) || UsesAggregateType) {
>>> +      if (!UsesAggregateType) {
>>> +        if (isStore)
>>> +          Info.isMemCpyDst = true;
>>> +        else
>>> +          Info.isMemCpySrc = true;
>>> +      }
>>> +      return;
>>> +    }
>>> +  }
>>> +  // Check if the offset/size correspond to a component within the alloca type.
>>> +  const Type *T = AI->getAllocatedType();
>>> +  if (TypeHasComponent(T, Offset, MemSize) &&
>>> +      (ArrayOffset == 0 || TypeHasComponent(T, Offset + ArrayOffset, MemSize)))
>>> +    return;
>>> +
>>> +  return MarkUnsafe(Info);
>>>  }
>>>
>>> -/// isSafeMemIntrinsicOnAllocation - Check if the specified memory
>>> -/// intrinsic can be promoted by SROA.  At this point, we know that the operand
>>> -/// of the memintrinsic is a pointer to the beginning of the allocation.
>>> -void SROA::isSafeMemIntrinsicOnAllocation(MemIntrinsic *MI, AllocaInst *AI,
>>> -                                          unsigned OpNo, AllocaInfo &Info) {
>>> -  // If not constant length, give up.
>>> -  ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength());
>>> -  if (!Length) return MarkUnsafe(Info);
>>> -
>>> -  // If not the whole aggregate, give up.
>>> -  if (Length->getZExtValue() !=
>>> -      TD->getTypeAllocSize(AI->getType()->getElementType()))
>>> -    return MarkUnsafe(Info);
>>> -
>>> -  // We only know about memcpy/memset/memmove.
>>> -  if (!isa<MemIntrinsic>(MI))
>>> -    return MarkUnsafe(Info);
>>> -
>>> -  // Otherwise, we can transform it.  Determine whether this is a memcpy/set
>>> -  // into or out of the aggregate.
>>> -  if (OpNo == 1)
>>> -    Info.isMemCpyDst = true;
>>> -  else {
>>> -    assert(OpNo == 2);
>>> -    Info.isMemCpySrc = true;
>>> +/// TypeHasComponent - Return true if T has a component type with the
>>> +/// specified offset and size.  If Size is zero, do not check the size.
>>> +bool SROA::TypeHasComponent(const Type *T, uint64_t Offset, uint64_t Size) {
>>> +  const Type *EltTy;
>>> +  uint64_t EltSize;
>>> +  if (const StructType *ST = dyn_cast<StructType>(T)) {
>>> +    const StructLayout *Layout = TD->getStructLayout(ST);
>>> +    unsigned EltIdx = Layout->getElementContainingOffset(Offset);
>>> +    EltTy = ST->getContainedType(EltIdx);
>>> +    EltSize = TD->getTypeAllocSize(EltTy);
>>> +    Offset -= Layout->getElementOffset(EltIdx);
>>> +  } else if (const ArrayType *AT = dyn_cast<ArrayType>(T)) {
>>> +    EltTy = AT->getElementType();
>>> +    EltSize = TD->getTypeAllocSize(EltTy);
>>> +    Offset %= EltSize;
>>> +  } else {
>>> +    return false;
>>>   }
>>> +  if (Offset == 0 && (Size == 0 || EltSize == Size))
>>> +    return true;
>>> +  // Check if the component spans multiple elements.
>>> +  if (Offset + Size > EltSize)
>>> +    return false;
>>> +  return TypeHasComponent(EltTy, Offset, Size);
>>>  }
>>>
>>> -/// isSafeUseOfBitCastedAllocation - Check if all users of this bitcast
>>> -/// from an alloca are safe for SROA of that alloca.
>>> -void SROA::isSafeUseOfBitCastedAllocation(BitCastInst *BC, AllocaInst *AI,
>>> -                                          AllocaInfo &Info) {
>>> -  for (Value::use_iterator UI = BC->use_begin(), E = BC->use_end();
>>> -       UI != E; ++UI) {
>>> -    if (BitCastInst *BCU = dyn_cast<BitCastInst>(UI)) {
>>> -      isSafeUseOfBitCastedAllocation(BCU, AI, Info);
>>> -    } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(UI)) {
>>> -      isSafeMemIntrinsicOnAllocation(MI, AI, UI.getOperandNo(), Info);
>>> -    } else if (StoreInst *SI = dyn_cast<StoreInst>(UI)) {
>>> -      if (SI->isVolatile())
>>> -        return MarkUnsafe(Info);
>>> -
>>> -      // If storing the entire alloca in one chunk through a bitcasted pointer
>>> -      // to integer, we can transform it.  This happens (for example) when you
>>> -      // cast a {i32,i32}* to i64* and store through it.  This is similar to the
>>> -      // memcpy case and occurs in various "byval" cases and emulated memcpys.
>>> -      if (isa<IntegerType>(SI->getOperand(0)->getType()) &&
>>> -          TD->getTypeAllocSize(SI->getOperand(0)->getType()) ==
>>> -          TD->getTypeAllocSize(AI->getType()->getElementType())) {
>>> -        Info.isMemCpyDst = true;
>>> -        continue;
>>> -      }
>>> -      return MarkUnsafe(Info);
>>> -    } else if (LoadInst *LI = dyn_cast<LoadInst>(UI)) {
>>> -      if (LI->isVolatile())
>>> -        return MarkUnsafe(Info);
>>> +/// RewriteForScalarRepl - Alloca AI is being split into NewElts, so rewrite
>>> +/// the instruction I, which references it, to use the separate elements.
>>> +/// Offset indicates the position within AI that is referenced by this
>>> +/// instruction.
>>> +void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
>>> +                                SmallVector<AllocaInst*, 32> &NewElts) {
>>> +  for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) {
>>> +    Instruction *User = cast<Instruction>(*UI);
>>>
>>> -      // If loading the entire alloca in one chunk through a bitcasted pointer
>>> -      // to integer, we can transform it.  This happens (for example) when you
>>> -      // cast a {i32,i32}* to i64* and load through it.  This is similar to the
>>> -      // memcpy case and occurs in various "byval" cases and emulated memcpys.
>>> -      if (isa<IntegerType>(LI->getType()) &&
>>> -          TD->getTypeAllocSize(LI->getType()) ==
>>> -          TD->getTypeAllocSize(AI->getType()->getElementType())) {
>>> -        Info.isMemCpySrc = true;
>>> -        continue;
>>> +    if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) {
>>> +      RewriteBitCast(BC, AI, Offset, NewElts);
>>> +    } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) {
>>> +      RewriteGEP(GEPI, AI, Offset, NewElts);
>>> +    } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) {
>>> +      ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength());
>>> +      uint64_t MemSize = Length->getZExtValue();
>>> +      if (Offset == 0 &&
>>> +          MemSize == TD->getTypeAllocSize(AI->getAllocatedType()))
>>> +        RewriteMemIntrinUserOfAlloca(MI, I, AI, NewElts);
>>> +    } else if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
>>> +      const Type *LIType = LI->getType();
>>> +      if (LIType == AI->getAllocatedType()) {
>>> +        // Replace:
>>> +        //   %res = load { i32, i32 }* %alloc
>>> +        // with:
>>> +        //   %load.0 = load i32* %alloc.0
>>> +        //   %insert.0 insertvalue { i32, i32 } zeroinitializer, i32 %load.0, 0
>>> +        //   %load.1 = load i32* %alloc.1
>>> +        //   %insert = insertvalue { i32, i32 } %insert.0, i32 %load.1, 1
>>> +        // (Also works for arrays instead of structs)
>>> +        Value *Insert = UndefValue::get(LIType);
>>> +        for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
>>> +          Value *Load = new LoadInst(NewElts[i], "load", LI);
>>> +          Insert = InsertValueInst::Create(Insert, Load, i, "insert", LI);
>>> +        }
>>> +        LI->replaceAllUsesWith(Insert);
>>> +        DeadInsts.push_back(LI);
>>> +      } else if (isa<IntegerType>(LIType) &&
>>> +                 TD->getTypeAllocSize(LIType) ==
>>> +                 TD->getTypeAllocSize(AI->getAllocatedType())) {
>>> +        // If this is a load of the entire alloca to an integer, rewrite it.
>>> +        RewriteLoadUserOfWholeAlloca(LI, AI, NewElts);
>>>       }
>>> -      return MarkUnsafe(Info);
>>> -    } else if (isa<DbgInfoIntrinsic>(UI)) {
>>> -      // If one user is DbgInfoIntrinsic then check if all users are
>>> -      // DbgInfoIntrinsics.
>>> -      if (OnlyUsedByDbgInfoIntrinsics(BC)) {
>>> -        Info.needsCleanup = true;
>>> -        return;
>>> +    } else if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
>>> +      Value *Val = SI->getOperand(0);
>>> +      const Type *SIType = Val->getType();
>>> +      if (SIType == AI->getAllocatedType()) {
>>> +        // Replace:
>>> +        //   store { i32, i32 } %val, { i32, i32 }* %alloc
>>> +        // with:
>>> +        //   %val.0 = extractvalue { i32, i32 } %val, 0
>>> +        //   store i32 %val.0, i32* %alloc.0
>>> +        //   %val.1 = extractvalue { i32, i32 } %val, 1
>>> +        //   store i32 %val.1, i32* %alloc.1
>>> +        // (Also works for arrays instead of structs)
>>> +        for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
>>> +          Value *Extract = ExtractValueInst::Create(Val, i, Val->getName(), SI);
>>> +          new StoreInst(Extract, NewElts[i], SI);
>>> +        }
>>> +        DeadInsts.push_back(SI);
>>> +      } else if (isa<IntegerType>(SIType) &&
>>> +                 TD->getTypeAllocSize(SIType) ==
>>> +                 TD->getTypeAllocSize(AI->getAllocatedType())) {
>>> +        // If this is a store of the entire alloca from an integer, rewrite it.
>>> +        RewriteStoreUserOfWholeAlloca(SI, AI, NewElts);
>>>       }
>>> -      else
>>> -        MarkUnsafe(Info);
>>>     }
>>> -    else {
>>> -      return MarkUnsafe(Info);
>>> -    }
>>> -    if (Info.isUnsafe) return;
>>>   }
>>>  }
>>>
>>> -/// RewriteBitCastUserOfAlloca - BCInst (transitively) bitcasts AI, or indexes
>>> -/// to its first element.  Transform users of the cast to use the new values
>>> -/// instead.
>>> -void SROA::RewriteBitCastUserOfAlloca(Instruction *BCInst, AllocaInst *AI,
>>> -                                      SmallVector<AllocaInst*, 32> &NewElts) {
>>> -  Value::use_iterator UI = BCInst->use_begin(), UE = BCInst->use_end();
>>> -  while (UI != UE) {
>>> -    Instruction *User = cast<Instruction>(*UI++);
>>> -    if (BitCastInst *BCU = dyn_cast<BitCastInst>(User)) {
>>> -      RewriteBitCastUserOfAlloca(BCU, AI, NewElts);
>>> -      if (BCU->use_empty()) BCU->eraseFromParent();
>>> -      continue;
>>> -    }
>>> +/// RewriteBitCast - Update a bitcast reference to the alloca being replaced
>>> +/// and recursively continue updating all of its uses.
>>> +void SROA::RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset,
>>> +                          SmallVector<AllocaInst*, 32> &NewElts) {
>>> +  RewriteForScalarRepl(BC, AI, Offset, NewElts);
>>> +  if (BC->getOperand(0) != AI)
>>> +    return;
>>>
>>> -    if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) {
>>> -      // This must be memcpy/memmove/memset of the entire aggregate.
>>> -      // Split into one per element.
>>> -      RewriteMemIntrinUserOfAlloca(MI, BCInst, AI, NewElts);
>>> -      continue;
>>> -    }
>>> -
>>> -    if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
>>> -      // If this is a store of the entire alloca from an integer, rewrite it.
>>> -      RewriteStoreUserOfWholeAlloca(SI, AI, NewElts);
>>> -      continue;
>>> -    }
>>> +  // The bitcast references the original alloca.  Replace its uses with
>>> +  // references to the first new element alloca.
>>> +  Instruction *Val = NewElts[0];
>>> +  if (Val->getType() != BC->getDestTy()) {
>>> +    Val = new BitCastInst(Val, BC->getDestTy(), "", BC);
>>> +    Val->takeName(BC);
>>> +  }
>>> +  BC->replaceAllUsesWith(Val);
>>> +  DeadInsts.push_back(BC);
>>> +}
>>> +
>>> +/// FindElementAndOffset - Return the index of the element containing Offset
>>> +/// within the specified type, which must be either a struct or an array.
>>> +/// Sets T to the type of the element and Offset to the offset within that
>>> +/// element.
>>> +unsigned SROA::FindElementAndOffset(const Type *&T, uint64_t &Offset) {
>>> +  unsigned Idx = 0;
>>> +  if (const StructType *ST = dyn_cast<StructType>(T)) {
>>> +    const StructLayout *Layout = TD->getStructLayout(ST);
>>> +    Idx = Layout->getElementContainingOffset(Offset);
>>> +    T = ST->getContainedType(Idx);
>>> +    Offset -= Layout->getElementOffset(Idx);
>>> +  } else {
>>> +    const ArrayType *AT = dyn_cast<ArrayType>(T);
>>> +    assert(AT && "unexpected type for scalar replacement");
>>> +    T = AT->getElementType();
>>> +    uint64_t EltSize = TD->getTypeAllocSize(T);
>>> +    Idx = (unsigned)(Offset / EltSize);
>>> +    Offset -= Idx * EltSize;
>>> +  }
>>> +  return Idx;
>>> +}
>>> +
>>> +/// RewriteGEP - Check if this GEP instruction moves the pointer across
>>> +/// elements of the alloca that are being split apart, and if so, rewrite
>>> +/// the GEP to be relative to the new element.
>>> +void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset,
>>> +                      SmallVector<AllocaInst*, 32> &NewElts) {
>>> +  uint64_t OldOffset = Offset;
>>> +  SmallVector<Value*, 8> Indices(GEPI->op_begin() + 1, GEPI->op_end());
>>> +  Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(),
>>> +                                 &Indices[0], Indices.size());
>>> +
>>> +  RewriteForScalarRepl(GEPI, AI, Offset, NewElts);
>>> +
>>> +  const Type *T = AI->getAllocatedType();
>>> +  unsigned OldIdx = FindElementAndOffset(T, OldOffset);
>>> +  if (GEPI->getOperand(0) == AI)
>>> +    OldIdx = ~0U; // Force the GEP to be rewritten.
>>> +
>>> +  T = AI->getAllocatedType();
>>> +  uint64_t EltOffset = Offset;
>>> +  unsigned Idx = FindElementAndOffset(T, EltOffset);
>>> +
>>> +  // If this GEP does not move the pointer across elements of the alloca
>>> +  // being split, then it does not needs to be rewritten.
>>> +  if (Idx == OldIdx)
>>> +    return;
>>>
>>> -    if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
>>> -      // If this is a load of the entire alloca to an integer, rewrite it.
>>> -      RewriteLoadUserOfWholeAlloca(LI, AI, NewElts);
>>> -      continue;
>>> -    }
>>> -
>>> -    // Otherwise it must be some other user of a gep of the first pointer.  Just
>>> -    // leave these alone.
>>> -    continue;
>>> -  }
>>> +  const Type *i32Ty = Type::getInt32Ty(AI->getContext());
>>> +  SmallVector<Value*, 8> NewArgs;
>>> +  NewArgs.push_back(Constant::getNullValue(i32Ty));
>>> +  while (EltOffset != 0) {
>>> +    unsigned EltIdx = FindElementAndOffset(T, EltOffset);
>>> +    NewArgs.push_back(ConstantInt::get(i32Ty, EltIdx));
>>> +  }
>>> +  Instruction *Val = NewElts[Idx];
>>> +  if (NewArgs.size() > 1) {
>>> +    Val = GetElementPtrInst::CreateInBounds(Val, NewArgs.begin(),
>>> +                                            NewArgs.end(), "", GEPI);
>>> +    Val->takeName(GEPI);
>>> +  }
>>> +  if (Val->getType() != GEPI->getType())
>>> +    Val = new BitCastInst(Val, GEPI->getType(), Val->getNameStr(), GEPI);
>>> +  GEPI->replaceAllUsesWith(Val);
>>> +  DeadInsts.push_back(GEPI);
>>>  }
>>>
>>>  /// RewriteMemIntrinUserOfAlloca - MI is a memcpy/memset/memmove from or to AI.
>>>  /// Rewrite it to copy or set the elements of the scalarized memory.
>>> -void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst,
>>> +void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
>>>                                         AllocaInst *AI,
>>>                                         SmallVector<AllocaInst*, 32> &NewElts) {
>>> -
>>>   // If this is a memcpy/memmove, construct the other pointer as the
>>>   // appropriate type.  The "Other" pointer is the pointer that goes to memory
>>>   // that doesn't have anything to do with the alloca that we are promoting. For
>>> @@ -761,28 +783,41 @@
>>>   LLVMContext &Context = MI->getContext();
>>>   unsigned MemAlignment = MI->getAlignment();
>>>   if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) { // memmove/memcopy
>>> -    if (BCInst == MTI->getRawDest())
>>> +    if (Inst == MTI->getRawDest())
>>>       OtherPtr = MTI->getRawSource();
>>>     else {
>>> -      assert(BCInst == MTI->getRawSource());
>>> +      assert(Inst == MTI->getRawSource());
>>>       OtherPtr = MTI->getRawDest();
>>>     }
>>>   }
>>>
>>> -  // Keep track of the other intrinsic argument, so it can be removed if it
>>> -  // is dead when the intrinsic is replaced.
>>> -  Value *PossiblyDead = OtherPtr;
>>> -
>>>   // If there is an other pointer, we want to convert it to the same pointer
>>>   // type as AI has, so we can GEP through it safely.
>>>   if (OtherPtr) {
>>> -    // It is likely that OtherPtr is a bitcast, if so, remove it.
>>> -    if (BitCastInst *BC = dyn_cast<BitCastInst>(OtherPtr))
>>> -      OtherPtr = BC->getOperand(0);
>>> -    // All zero GEPs are effectively bitcasts.
>>> -    if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(OtherPtr))
>>> -      if (GEP->hasAllZeroIndices())
>>> -        OtherPtr = GEP->getOperand(0);
>>> +
>>> +    // Remove bitcasts and all-zero GEPs from OtherPtr.  This is an
>>> +    // optimization, but it's also required to detect the corner case where
>>> +    // both pointer operands are referencing the same memory, and where
>>> +    // OtherPtr may be a bitcast or GEP that currently being rewritten.  (This
>>> +    // function is only called for mem intrinsics that access the whole
>>> +    // aggregate, so non-zero GEPs are not an issue here.)
>>> +    while (1) {
>>> +      if (BitCastInst *BC = dyn_cast<BitCastInst>(OtherPtr)) {
>>> +        OtherPtr = BC->getOperand(0);
>>> +        continue;
>>> +      }
>>> +      if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(OtherPtr)) {
>>> +        // All zero GEPs are effectively bitcasts.
>>> +        if (GEP->hasAllZeroIndices()) {
>>> +          OtherPtr = GEP->getOperand(0);
>>> +          continue;
>>> +        }
>>> +      }
>>> +      break;
>>> +    }
>>> +    // If OtherPtr has already been rewritten, this intrinsic will be dead.
>>> +    if (OtherPtr == NewElts[0])
>>> +      return;
>>>
>>>     if (ConstantExpr *BCE = dyn_cast<ConstantExpr>(OtherPtr))
>>>       if (BCE->getOpcode() == Instruction::BitCast)
>>> @@ -798,7 +833,7 @@
>>>   // Process each element of the aggregate.
>>>   Value *TheFn = MI->getOperand(0);
>>>   const Type *BytePtrTy = MI->getRawDest()->getType();
>>> -  bool SROADest = MI->getRawDest() == BCInst;
>>> +  bool SROADest = MI->getRawDest() == Inst;
>>>
>>>   Constant *Zero = Constant::getNullValue(Type::getInt32Ty(MI->getContext()));
>>>
>>> @@ -807,12 +842,15 @@
>>>     Value *OtherElt = 0;
>>>     unsigned OtherEltAlign = MemAlignment;
>>>
>>> -    if (OtherPtr) {
>>> +    if (OtherPtr == AI) {
>>> +      OtherElt = NewElts[i];
>>> +      OtherEltAlign = 0;
>>> +    } else if (OtherPtr) {
>>>       Value *Idx[2] = { Zero,
>>>                       ConstantInt::get(Type::getInt32Ty(MI->getContext()), i) };
>>> -      OtherElt = GetElementPtrInst::Create(OtherPtr, Idx, Idx + 2,
>>> +      OtherElt = GetElementPtrInst::CreateInBounds(OtherPtr, Idx, Idx + 2,
>>>                                            OtherPtr->getNameStr()+"."+Twine(i),
>>> -                                           MI);
>>> +                                                   MI);
>>>       uint64_t EltOffset;
>>>       const PointerType *OtherPtrTy = cast<PointerType>(OtherPtr->getType());
>>>       if (const StructType *ST =
>>> @@ -924,9 +962,7 @@
>>>       CallInst::Create(TheFn, Ops, Ops + 4, "", MI);
>>>     }
>>>   }
>>> -  MI->eraseFromParent();
>>> -  if (PossiblyDead)
>>> -    RecursivelyDeleteTriviallyDeadInstructions(PossiblyDead);
>>> +  DeadInsts.push_back(MI);
>>>  }
>>>
>>>  /// RewriteStoreUserOfWholeAlloca - We found a store of an integer that
>>> @@ -937,15 +973,9 @@
>>>   // Extract each element out of the integer according to its structure offset
>>>   // and store the element value to the individual alloca.
>>>   Value *SrcVal = SI->getOperand(0);
>>> -  const Type *AllocaEltTy = AI->getType()->getElementType();
>>> +  const Type *AllocaEltTy = AI->getAllocatedType();
>>>   uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy);
>>>
>>> -  // If this isn't a store of an integer to the whole alloca, it may be a store
>>> -  // to the first element.  Just ignore the store in this case and normal SROA
>>> -  // will handle it.
>>> -  if (!isa<IntegerType>(SrcVal->getType()) ||
>>> -      TD->getTypeAllocSizeInBits(SrcVal->getType()) != AllocaSizeBits)
>>> -    return;
>>>   // Handle tail padding by extending the operand
>>>   if (TD->getTypeSizeInBits(SrcVal->getType()) != AllocaSizeBits)
>>>     SrcVal = new ZExtInst(SrcVal,
>>> @@ -1050,7 +1080,7 @@
>>>     }
>>>   }
>>>
>>> -  SI->eraseFromParent();
>>> +  DeadInsts.push_back(SI);
>>>  }
>>>
>>>  /// RewriteLoadUserOfWholeAlloca - We found a load of the entire allocation to
>>> @@ -1059,16 +1089,9 @@
>>>                                         SmallVector<AllocaInst*, 32> &NewElts) {
>>>   // Extract each element out of the NewElts according to its structure offset
>>>   // and form the result value.
>>> -  const Type *AllocaEltTy = AI->getType()->getElementType();
>>> +  const Type *AllocaEltTy = AI->getAllocatedType();
>>>   uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy);
>>>
>>> -  // If this isn't a load of the whole alloca to an integer, it may be a load
>>> -  // of the first element.  Just ignore the load in this case and normal SROA
>>> -  // will handle it.
>>> -  if (!isa<IntegerType>(LI->getType()) ||
>>> -      TD->getTypeAllocSizeInBits(LI->getType()) != AllocaSizeBits)
>>> -    return;
>>> -
>>>   DEBUG(errs() << "PROMOTING LOAD OF WHOLE ALLOCA: " << *AI << '\n' << *LI
>>>                << '\n');
>>>
>>> @@ -1139,10 +1162,9 @@
>>>     ResultVal = new TruncInst(ResultVal, LI->getType(), "", LI);
>>>
>>>   LI->replaceAllUsesWith(ResultVal);
>>> -  LI->eraseFromParent();
>>> +  DeadInsts.push_back(LI);
>>>  }
>>>
>>> -
>>>  /// HasPadding - Return true if the specified type has any structure or
>>>  /// alignment padding, false otherwise.
>>>  static bool HasPadding(const Type *Ty, const TargetData &TD) {
>>> @@ -1192,14 +1214,10 @@
>>>   // the users are safe to transform.
>>>   AllocaInfo Info;
>>>
>>> -  for (Value::use_iterator I = AI->use_begin(), E = AI->use_end();
>>> -       I != E; ++I) {
>>> -    isSafeUseOfAllocation(cast<Instruction>(*I), AI, Info);
>>> -    if (Info.isUnsafe) {
>>> -      DEBUG(errs() << "Cannot transform: " << *AI << "\n  due to user: "
>>> -                   << **I << '\n');
>>> -      return 0;
>>> -    }
>>> +  isSafeForScalarRepl(AI, AI, 0, 0, Info);
>>> +  if (Info.isUnsafe) {
>>> +    DEBUG(errs() << "Cannot transform: " << *AI << '\n');
>>> +    return 0;
>>>   }
>>>
>>>   // Okay, we know all the users are promotable.  If the aggregate is a memcpy
>>> @@ -1208,7 +1226,7 @@
>>>   // types, but may actually be used.  In these cases, we refuse to promote the
>>>   // struct.
>>>   if (Info.isMemCpySrc && Info.isMemCpyDst &&
>>> -      HasPadding(AI->getType()->getElementType(), *TD))
>>> +      HasPadding(AI->getAllocatedType(), *TD))
>>>     return 0;
>>>
>>>   // If we require cleanup, return 1, otherwise return 3.
>>> @@ -1245,15 +1263,15 @@
>>>   // Insert the new GEP instructions, which are properly indexed.
>>>   SmallVector<Value*, 8> Indices(GEPI->op_begin()+1, GEPI->op_end());
>>>   Indices[1] = Constant::getNullValue(Type::getInt32Ty(GEPI->getContext()));
>>> -  Value *ZeroIdx = GetElementPtrInst::Create(GEPI->getOperand(0),
>>> -                                             Indices.begin(),
>>> -                                             Indices.end(),
>>> -                                             GEPI->getName()+".0", GEPI);
>>> +  Value *ZeroIdx = GetElementPtrInst::CreateInBounds(GEPI->getOperand(0),
>>> +                                                     Indices.begin(),
>>> +                                                     Indices.end(),
>>> +                                                     GEPI->getName()+".0",GEPI);
>>>   Indices[1] = ConstantInt::get(Type::getInt32Ty(GEPI->getContext()), 1);
>>> -  Value *OneIdx = GetElementPtrInst::Create(GEPI->getOperand(0),
>>> -                                            Indices.begin(),
>>> -                                            Indices.end(),
>>> -                                            GEPI->getName()+".1", GEPI);
>>> +  Value *OneIdx = GetElementPtrInst::CreateInBounds(GEPI->getOperand(0),
>>> +                                                    Indices.begin(),
>>> +                                                    Indices.end(),
>>> +                                                    GEPI->getName()+".1", GEPI);
>>>   // Replace all loads of the variable index GEP with loads from both
>>>   // indexes and a select.
>>>   while (!GEPI->use_empty()) {
>>> @@ -1264,22 +1282,24 @@
>>>     LI->replaceAllUsesWith(R);
>>>     LI->eraseFromParent();
>>>   }
>>> -  GEPI->eraseFromParent();
>>>  }
>>>
>>> -
>>>  /// CleanupAllocaUsers - If SROA reported that it can promote the specified
>>>  /// allocation, but only if cleaned up, perform the cleanups required.
>>> -void SROA::CleanupAllocaUsers(AllocaInst *AI) {
>>> +void SROA::CleanupAllocaUsers(Value *V) {
>>>   // At this point, we know that the end result will be SROA'd and promoted, so
>>>   // we can insert ugly code if required so long as sroa+mem2reg will clean it
>>>   // up.
>>> -  for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
>>> +  for (Value::use_iterator UI = V->use_begin(), E = V->use_end();
>>>        UI != E; ) {
>>>     User *U = *UI++;
>>> -    if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U))
>>> +    if (isa<BitCastInst>(U)) {
>>> +      CleanupAllocaUsers(U);
>>> +    } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
>>>       CleanupGEP(GEPI);
>>> -    else {
>>> +      CleanupAllocaUsers(GEPI);
>>> +      if (GEPI->use_empty()) GEPI->eraseFromParent();
>>> +    } else {
>>>       Instruction *I = cast<Instruction>(U);
>>>       SmallVector<DbgInfoIntrinsic *, 2> DbgInUses;
>>>       if (!isa<StoreInst>(I) && OnlyUsedByDbgInfoIntrinsics(I, &DbgInUses)) {
>>> @@ -1395,7 +1415,7 @@
>>>
>>>       // Compute the offset that this GEP adds to the pointer.
>>>       SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end());
>>> -      uint64_t GEPOffset = TD->getIndexedOffset(GEP->getOperand(0)->getType(),
>>> +      uint64_t GEPOffset = TD->getIndexedOffset(GEP->getPointerOperandType(),
>>>                                                 &Indices[0], Indices.size());
>>>       // See if all uses can be converted.
>>>       if (!CanConvertToScalar(GEP, IsNotTrivial, VecTy, SawVec,Offset+GEPOffset,
>>> @@ -1457,7 +1477,7 @@
>>>     if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) {
>>>       // Compute the offset that this GEP adds to the pointer.
>>>       SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end());
>>> -      uint64_t GEPOffset = TD->getIndexedOffset(GEP->getOperand(0)->getType(),
>>> +      uint64_t GEPOffset = TD->getIndexedOffset(GEP->getPointerOperandType(),
>>>                                                 &Indices[0], Indices.size());
>>>       ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8);
>>>       GEP->eraseFromParent();
>>>
>>> Added: llvm/trunk/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll?rev=91459&view=auto
>>>
>>> ==============================================================================
>>> --- llvm/trunk/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll (added)
>>> +++ llvm/trunk/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll Tue Dec 15 16:00:51 2009
>>> @@ -0,0 +1,89 @@
>>> +; RUN: opt < %s -scalarrepl -S | FileCheck %s
>>> +; Radar 7441282
>>> +
>>> +target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
>>> +target triple = "thumbv7-apple-darwin10"
>>> +
>>> +%struct.__neon_int16x8x2_t = type { <8 x i16>, <8 x i16> }
>>> +%struct.int16x8_t = type { <8 x i16> }
>>> +%struct.int16x8x2_t = type { [2 x %struct.int16x8_t] }
>>> +%union..0anon = type { %struct.int16x8x2_t }
>>> +
>>> +define arm_apcscc void @test(<8 x i16> %tmp.0, %struct.int16x8x2_t* %dst) nounwind {
>>> +; CHECK: @test
>>> +; CHECK-NOT: alloca
>>> +; CHECK: "alloca point"
>>> +entry:
>>> +  %tmp_addr = alloca %struct.int16x8_t            ; <%struct.int16x8_t*> [#uses=3]
>>> +  %dst_addr = alloca %struct.int16x8x2_t*         ; <%struct.int16x8x2_t**> [#uses=2]
>>> +  %__rv = alloca %union..0anon                    ; <%union..0anon*> [#uses=2]
>>> +  %__bx = alloca %struct.int16x8_t                ; <%struct.int16x8_t*> [#uses=2]
>>> +  %__ax = alloca %struct.int16x8_t                ; <%struct.int16x8_t*> [#uses=2]
>>> +  %tmp2 = alloca %struct.int16x8x2_t              ; <%struct.int16x8x2_t*> [#uses=2]
>>> +  %0 = alloca %struct.int16x8x2_t                 ; <%struct.int16x8x2_t*> [#uses=2]
>>> +  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
>>> +  %1 = getelementptr inbounds %struct.int16x8_t* %tmp_addr, i32 0, i32 0 ; <<8 x i16>*> [#uses=1]
>>> +  store <8 x i16> %tmp.0, <8 x i16>* %1
>>> +  store %struct.int16x8x2_t* %dst, %struct.int16x8x2_t** %dst_addr
>>> +  %2 = getelementptr inbounds %struct.int16x8_t* %__ax, i32 0, i32 0 ; <<8 x i16>*> [#uses=1]
>>> +  %3 = getelementptr inbounds %struct.int16x8_t* %tmp_addr, i32 0, i32 0 ; <<8 x i16>*> [#uses=1]
>>> +  %4 = load <8 x i16>* %3, align 16               ; <<8 x i16>> [#uses=1]
>>> +  store <8 x i16> %4, <8 x i16>* %2, align 16
>>> +  %5 = getelementptr inbounds %struct.int16x8_t* %__bx, i32 0, i32 0 ; <<8 x i16>*> [#uses=1]
>>> +  %6 = getelementptr inbounds %struct.int16x8_t* %tmp_addr, i32 0, i32 0 ; <<8 x i16>*> [#uses=1]
>>> +  %7 = load <8 x i16>* %6, align 16               ; <<8 x i16>> [#uses=1]
>>> +  store <8 x i16> %7, <8 x i16>* %5, align 16
>>> +  %8 = getelementptr inbounds %struct.int16x8_t* %__ax, i32 0, i32 0 ; <<8 x i16>*> [#uses=1]
>>> +  %9 = load <8 x i16>* %8, align 16               ; <<8 x i16>> [#uses=2]
>>> +  %10 = getelementptr inbounds %struct.int16x8_t* %__bx, i32 0, i32 0 ; <<8 x i16>*> [#uses=1]
>>> +  %11 = load <8 x i16>* %10, align 16             ; <<8 x i16>> [#uses=2]
>>> +  %12 = getelementptr inbounds %union..0anon* %__rv, i32 0, i32 0 ; <%struct.int16x8x2_t*> [#uses=1]
>>> +  %13 = bitcast %struct.int16x8x2_t* %12 to %struct.__neon_int16x8x2_t* ; <%struct.__neon_int16x8x2_t*> [#uses=2]
>>> +  %14 = shufflevector <8 x i16> %9, <8 x i16> %11, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> ; <<8 x i16>> [#uses=1]
>>> +  %15 = getelementptr inbounds %struct.__neon_int16x8x2_t* %13, i32 0, i32 0 ; <<8 x i16>*> [#uses=1]
>>> +  store <8 x i16> %14, <8 x i16>* %15
>>> +  %16 = shufflevector <8 x i16> %9, <8 x i16> %11, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> ; <<8 x i16>> [#uses=1]
>>> +  %17 = getelementptr inbounds %struct.__neon_int16x8x2_t* %13, i32 0, i32 1 ; <<8 x i16>*> [#uses=1]
>>> +  store <8 x i16> %16, <8 x i16>* %17
>>> +  %18 = getelementptr inbounds %union..0anon* %__rv, i32 0, i32 0 ; <%struct.int16x8x2_t*> [#uses=1]
>>> +  %19 = bitcast %struct.int16x8x2_t* %0 to i8*    ; <i8*> [#uses=1]
>>> +  %20 = bitcast %struct.int16x8x2_t* %18 to i8*   ; <i8*> [#uses=1]
>>> +  call void @llvm.memcpy.i32(i8* %19, i8* %20, i32 32, i32 16)
>>> +  %tmp21 = bitcast %struct.int16x8x2_t* %tmp2 to i8* ; <i8*> [#uses=1]
>>> +  %21 = bitcast %struct.int16x8x2_t* %0 to i8*    ; <i8*> [#uses=1]
>>> +  call void @llvm.memcpy.i32(i8* %tmp21, i8* %21, i32 32, i32 16)
>>> +  %22 = load %struct.int16x8x2_t** %dst_addr, align 4 ; <%struct.int16x8x2_t*> [#uses=1]
>>> +  %23 = bitcast %struct.int16x8x2_t* %22 to i8*   ; <i8*> [#uses=1]
>>> +  %tmp22 = bitcast %struct.int16x8x2_t* %tmp2 to i8* ; <i8*> [#uses=1]
>>> +  call void @llvm.memcpy.i32(i8* %23, i8* %tmp22, i32 32, i32 16)
>>> +  br label %return
>>> +
>>> +; CHECK: store <8 x i16>
>>> +; CHECK: store <8 x i16>
>>> +
>>> +return:                                           ; preds = %entry
>>> +  ret void
>>> +}
>>> +
>>> +; Radar 7466574
>>> +%struct._NSRange = type { i64 }
>>> +
>>> +define arm_apcscc void @test_memcpy_self() nounwind {
>>> +; CHECK: @test_memcpy_self
>>> +; CHECK-NOT: alloca
>>> +; CHECK: br i1
>>> +entry:
>>> +  %range = alloca %struct._NSRange                ; <%struct._NSRange*> [#uses=2]
>>> +  br i1 undef, label %cond.true, label %cond.false
>>> +
>>> +cond.true:                                        ; preds = %entry
>>> +  %tmp3 = bitcast %struct._NSRange* %range to i8* ; <i8*> [#uses=1]
>>> +  %tmp4 = bitcast %struct._NSRange* %range to i8* ; <i8*> [#uses=1]
>>> +  call void @llvm.memcpy.i32(i8* %tmp3, i8* %tmp4, i32 8, i32 8)
>>> +  ret void
>>> +
>>> +cond.false:                                       ; preds = %entry
>>> +  ret void
>>> +}
>>> +
>>> +declare void @llvm.memcpy.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind
>>>
>>>
>>> _______________________________________________
>>> llvm-commits mailing list
>>> llvm-commits at cs.uiuc.edu
>>> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>>>
>>
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at cs.uiuc.edu
>> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>
>