[llvm-commits] [llvm] r164479 - in /llvm/trunk: lib/Transforms/Scalar/SROA.cpp test/Transforms/SROA/basictest.ll

Mon Sep 24 02:17:30 PDT 2012

Hi Chandler,

> --- llvm/trunk/lib/Transforms/Scalar/SROA.cpp (original)
> +++ llvm/trunk/lib/Transforms/Scalar/SROA.cpp Sun Sep 23 19:34:20 2012
> @@ -1723,6 +1723,54 @@
>     return true;
>   }
>
> +/// \brief Test whether the given alloca partition can be promoted to an int.
> +///
> +/// This is a quick test to check whether we can rewrite a particular alloca
> +/// partition (and its newly formed alloca) into an integer alloca suitable for
> +/// promotion to an SSA value. We only can ensure this for a limited set of
> +/// operations, and we don't want to do the rewrites unless we are confident
> +/// that the result will be promotable, so we have an early test here.
> +static bool isIntegerPromotionViable(const TargetData &TD,
> +                                     Type *AllocaTy,
> +                                     AllocaPartitioning &P,
> +                                     AllocaPartitioning::const_use_iterator I,
> +                                     AllocaPartitioning::const_use_iterator E) {
> +  IntegerType *Ty = dyn_cast<IntegerType>(AllocaTy);
> +  if (!Ty)
> +    return false;
> +
> +  // Check the uses to ensure the uses are (likely) promoteable integer uses.
> +  // Also ensure that the alloca has a covering load or store. We don't want
> +  // promote because of some other unsplittable entry (which we may make

We don't want promote -> We don't want to promote

> +  // splittable later) and lose the ability to promote each element access.
> +  bool WholeAllocaOp = false;
> +  for (; I != E; ++I) {

Passing in the iterators seems pretty icky to me.  Is this because you only want
to check a special range of uses?

> +    if (LoadInst *LI = dyn_cast<LoadInst>(&*I->User)) {
> +      if (LI->isVolatile() || !LI->getType()->isIntegerTy())
> +        return false;
> +      if (LI->getType() == Ty)
> +        WholeAllocaOp = true;

What if the load doesn't start at the beginning of the alloca, and is running
off the end instead?  At this point has that already been taken care of?

> +    } else if (StoreInst *SI = dyn_cast<StoreInst>(&*I->User)) {
> +      if (SI->isVolatile() || !SI->getValueOperand()->getType()->isIntegerTy())
> +        return false;
> +      if (SI->getValueOperand()->getType() == Ty)
> +        WholeAllocaOp = true;
> +    } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(&*I->User)) {
> +      if (MI->isVolatile())
> +        return false;
> +      if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(&*I->User)) {
> +        const AllocaPartitioning::MemTransferOffsets &MTO
> +          = P.getMemTransferOffsets(*MTI);
> +        if (!MTO.IsSplittable)
> +          return false;
> +      }
> +    } else {
> +      return false;
> +    }
> +  }
> +  return WholeAllocaOp;
> +}
> +
>   namespace {
>   /// \brief Visitor to rewrite instructions using a partition of an alloca to
>   /// use a new alloca.
> @@ -1754,6 +1802,12 @@
>     Type *ElementTy;
>     uint64_t ElementSize;
>
> +  // This is a convenience and flag variable that will be null unless the new
> +  // alloca has a promotion-targeted integer type due to passing

promotion-targeted is pretty jargonish...

> +  // isIntegerPromotionViable above. If it is non-null does, the desired
> +  // integer type will be stored here for easy access during rewriting.

If it is non-null does, the desired integer type will be stored here ->
If it is non-null then the desired integer type has been stored here

> +  IntegerType *IntPromotionTy;
> +
>     // The offset of the partition user currently being rewritten.
>     uint64_t BeginOffset, EndOffset;
>     Instruction *OldPtr;
> @@ -1770,7 +1824,7 @@
>         OldAI(OldAI), NewAI(NewAI),
>         NewAllocaBeginOffset(NewBeginOffset),
>         NewAllocaEndOffset(NewEndOffset),
> -      VecTy(), ElementTy(), ElementSize(),
> +      VecTy(), ElementTy(), ElementSize(), IntPromotionTy(),
>         BeginOffset(), EndOffset() {
>     }
>
> @@ -1786,6 +1840,9 @@
>         assert((VecTy->getScalarSizeInBits() % 8) == 0 &&
>                "Only multiple-of-8 sized vector elements are viable");
>         ElementSize = VecTy->getScalarSizeInBits() / 8;
> +    } else if (isIntegerPromotionViable(TD, NewAI.getAllocatedType(),
> +                                        P, I, E)) {
> +      IntPromotionTy = cast<IntegerType>(NewAI.getAllocatedType());
>       }
>       bool CanSROA = true;
>       for (; I != E; ++I) {
> @@ -1830,6 +1887,43 @@
>       return IRB.getInt32(Index);
>     }
>
> +  Value *extractInteger(IRBuilder<> &IRB, IntegerType *TargetTy,
> +                        uint64_t Offset) {
> +    assert(IntPromotionTy && "Alloca is not an integer we can extract from");
> +    Value *V = IRB.CreateLoad(&NewAI, getName(".load"));

What if the desired value is sitting in alignment padding off the end?
Eg imagine that the alloca type is i24, which has an alloc size of 32,
i.e. there is one byte at the end that is alignment padding.  Now imagine
that you are trying to extract an i8 load from the alignment padding byte.
The above load will have only loaded the first three bytes, you will shift
right by those three bytes below, and the value of the i8 load will be
something undefined rather than the contents of the padding byte.

> +    assert(Offset >= NewAllocaBeginOffset && "Out of bounds offset");
> +    uint64_t RelOffset = Offset - NewAllocaBeginOffset;
> +    if (RelOffset)
> +      V = IRB.CreateLShr(V, RelOffset*8, getName(".shift"));

Aren't you assuming a little-endian machine here?

> +    if (TargetTy != IntPromotionTy) {
> +      assert(TargetTy->getBitWidth() < IntPromotionTy->getBitWidth() &&
> +             "Cannot extract to a larger integer!");
> +      V = IRB.CreateTrunc(V, TargetTy, getName(".trunc"));
> +    }
> +    return V;
> +  }
> +
> +  StoreInst *insertInteger(IRBuilder<> &IRB, Value *V, uint64_t Offset) {

Analogous comments to the load case above.

> +    IntegerType *Ty = cast<IntegerType>(V->getType());
> +    if (Ty == IntPromotionTy)
> +      return IRB.CreateStore(V, &NewAI);
> +
> +    assert(Ty->getBitWidth() < IntPromotionTy->getBitWidth() &&
> +           "Cannot insert a larger integer!");
> +    V = IRB.CreateZExt(V, IntPromotionTy, getName(".ext"));
> +    assert(Offset >= NewAllocaBeginOffset && "Out of bounds offset");
> +    uint64_t RelOffset = Offset - NewAllocaBeginOffset;
> +    if (RelOffset)
> +      V = IRB.CreateShl(V, RelOffset*8, getName(".shift"));
> +
> +    APInt Mask = ~Ty->getMask().zext(IntPromotionTy->getBitWidth())
> +                               .shl(RelOffset*8);
> +    Value *Old = IRB.CreateAnd(IRB.CreateLoad(&NewAI, getName(".oldload")),
> +                               Mask, getName(".mask"));
> +    return IRB.CreateStore(IRB.CreateOr(Old, V, getName(".insert")),
> +                           &NewAI);
> +  }

Ciao, Duncan.