[llvm-commits] [llvm] r164479 - in /llvm/trunk: lib/Transforms/Scalar/SROA.cpp test/Transforms/SROA/basictest.ll

Sun Sep 23 21:11:41 PDT 2012

Hi Chandler

This new pass is excellent, but...
On Sep 23, 2012, at 5:34 PM, Chandler Carruth <chandlerc at gmail.com> wrote:

> Author: chandlerc
> Date: Sun Sep 23 19:34:20 2012
> New Revision: 164479
> 
> URL: http://llvm.org/viewvc/llvm-project?rev=164479&view=rev
> Log:
> Address one of the original FIXMEs for the new SROA pass by implementing
> integer promotion analogous to vector promotion. When there is an
> integer alloca being accessed both as its integer type and as a narrower
> integer type, promote the narrower access to "insert" and "extract" the
> smaller integer from the larger one, and make the integer alloca
> a candidate for promotion.
> 
> In the new formulation, we don't care about target legal integer or use
> thresholds to control things. Instead, we only perform this promotion to
> an integer type which the frontend has already emitted a load or store
> for. This bounds the scope and prevents optimization passes from
> coalescing larger and larger entities into a single integer.
> 
> Modified:
>    llvm/trunk/lib/Transforms/Scalar/SROA.cpp
>    llvm/trunk/test/Transforms/SROA/basictest.ll
> 
> Modified: llvm/trunk/lib/Transforms/Scalar/SROA.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/SROA.cpp?rev=164479&r1=164478&r2=164479&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Transforms/Scalar/SROA.cpp (original)
> +++ llvm/trunk/lib/Transforms/Scalar/SROA.cpp Sun Sep 23 19:34:20 2012
> @@ -1723,6 +1723,54 @@
>   return true;
> }
> 
> +/// \brief Test whether the given alloca partition can be promoted to an int.
> +///
> +/// This is a quick test to check whether we can rewrite a particular alloca
> +/// partition (and its newly formed alloca) into an integer alloca suitable for
> +/// promotion to an SSA value. We only can ensure this for a limited set of
> +/// operations, and we don't want to do the rewrites unless we are confident
> +/// that the result will be promotable, so we have an early test here.
> +static bool isIntegerPromotionViable(const TargetData &TD,
> +                                     Type *AllocaTy,
> +                                     AllocaPartitioning &P,
> +                                     AllocaPartitioning::const_use_iterator I,
> +                                     AllocaPartitioning::const_use_iterator E) {
> +  IntegerType *Ty = dyn_cast<IntegerType>(AllocaTy);
> +  if (!Ty)
> +    return false;
> +
> +  // Check the uses to ensure the uses are (likely) promoteable integer uses.
> +  // Also ensure that the alloca has a covering load or store. We don't want
> +  // promote because of some other unsplittable entry (which we may make
> +  // splittable later) and lose the ability to promote each element access.
> +  bool WholeAllocaOp = false;
> +  for (; I != E; ++I) {
> +    if (LoadInst *LI = dyn_cast<LoadInst>(&*I->User)) {
Is there any way to have something nicer than &*.  Its pretty tough on the eyes.

Thanks,
Pete
> +      if (LI->isVolatile() || !LI->getType()->isIntegerTy())
> +        return false;
> +      if (LI->getType() == Ty)
> +        WholeAllocaOp = true;
> +    } else if (StoreInst *SI = dyn_cast<StoreInst>(&*I->User)) {
> +      if (SI->isVolatile() || !SI->getValueOperand()->getType()->isIntegerTy())
> +        return false;
> +      if (SI->getValueOperand()->getType() == Ty)
> +        WholeAllocaOp = true;
> +    } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(&*I->User)) {
> +      if (MI->isVolatile())
> +        return false;
> +      if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(&*I->User)) {
> +        const AllocaPartitioning::MemTransferOffsets &MTO
> +          = P.getMemTransferOffsets(*MTI);
> +        if (!MTO.IsSplittable)
> +          return false;
> +      }
> +    } else {
> +      return false;
> +    }
> +  }
> +  return WholeAllocaOp;
> +}
> +
> namespace {
> /// \brief Visitor to rewrite instructions using a partition of an alloca to
> /// use a new alloca.
> @@ -1754,6 +1802,12 @@
>   Type *ElementTy;
>   uint64_t ElementSize;
> 
> +  // This is a convenience and flag variable that will be null unless the new
> +  // alloca has a promotion-targeted integer type due to passing
> +  // isIntegerPromotionViable above. If it is non-null does, the desired
> +  // integer type will be stored here for easy access during rewriting.
> +  IntegerType *IntPromotionTy;
> +
>   // The offset of the partition user currently being rewritten.
>   uint64_t BeginOffset, EndOffset;
>   Instruction *OldPtr;
> @@ -1770,7 +1824,7 @@
>       OldAI(OldAI), NewAI(NewAI),
>       NewAllocaBeginOffset(NewBeginOffset),
>       NewAllocaEndOffset(NewEndOffset),
> -      VecTy(), ElementTy(), ElementSize(),
> +      VecTy(), ElementTy(), ElementSize(), IntPromotionTy(),
>       BeginOffset(), EndOffset() {
>   }
> 
> @@ -1786,6 +1840,9 @@
>       assert((VecTy->getScalarSizeInBits() % 8) == 0 &&
>              "Only multiple-of-8 sized vector elements are viable");
>       ElementSize = VecTy->getScalarSizeInBits() / 8;
> +    } else if (isIntegerPromotionViable(TD, NewAI.getAllocatedType(),
> +                                        P, I, E)) {
> +      IntPromotionTy = cast<IntegerType>(NewAI.getAllocatedType());
>     }
>     bool CanSROA = true;
>     for (; I != E; ++I) {
> @@ -1830,6 +1887,43 @@
>     return IRB.getInt32(Index);
>   }
> 
> +  Value *extractInteger(IRBuilder<> &IRB, IntegerType *TargetTy,
> +                        uint64_t Offset) {
> +    assert(IntPromotionTy && "Alloca is not an integer we can extract from");
> +    Value *V = IRB.CreateLoad(&NewAI, getName(".load"));
> +    assert(Offset >= NewAllocaBeginOffset && "Out of bounds offset");
> +    uint64_t RelOffset = Offset - NewAllocaBeginOffset;
> +    if (RelOffset)
> +      V = IRB.CreateLShr(V, RelOffset*8, getName(".shift"));
> +    if (TargetTy != IntPromotionTy) {
> +      assert(TargetTy->getBitWidth() < IntPromotionTy->getBitWidth() &&
> +             "Cannot extract to a larger integer!");
> +      V = IRB.CreateTrunc(V, TargetTy, getName(".trunc"));
> +    }
> +    return V;
> +  }
> +
> +  StoreInst *insertInteger(IRBuilder<> &IRB, Value *V, uint64_t Offset) {
> +    IntegerType *Ty = cast<IntegerType>(V->getType());
> +    if (Ty == IntPromotionTy)
> +      return IRB.CreateStore(V, &NewAI);
> +
> +    assert(Ty->getBitWidth() < IntPromotionTy->getBitWidth() &&
> +           "Cannot insert a larger integer!");
> +    V = IRB.CreateZExt(V, IntPromotionTy, getName(".ext"));
> +    assert(Offset >= NewAllocaBeginOffset && "Out of bounds offset");
> +    uint64_t RelOffset = Offset - NewAllocaBeginOffset;
> +    if (RelOffset)
> +      V = IRB.CreateShl(V, RelOffset*8, getName(".shift"));
> +
> +    APInt Mask = ~Ty->getMask().zext(IntPromotionTy->getBitWidth())
> +                               .shl(RelOffset*8);
> +    Value *Old = IRB.CreateAnd(IRB.CreateLoad(&NewAI, getName(".oldload")),
> +                               Mask, getName(".mask"));
> +    return IRB.CreateStore(IRB.CreateOr(Old, V, getName(".insert")),
> +                           &NewAI);
> +  }
> +
>   void deleteIfTriviallyDead(Value *V) {
>     Instruction *I = cast<Instruction>(V);
>     if (isInstructionTriviallyDead(I))
> @@ -1865,6 +1959,16 @@
>     return true;
>   }
> 
> +  bool rewriteIntegerLoad(IRBuilder<> &IRB, LoadInst &LI) {
> +    assert(!LI.isVolatile());
> +    Value *Result = extractInteger(IRB, cast<IntegerType>(LI.getType()),
> +                                   BeginOffset);
> +    LI.replaceAllUsesWith(Result);
> +    Pass.DeadInsts.push_back(&LI);
> +    DEBUG(dbgs() << "          to: " << *Result << "\n");
> +    return true;
> +  }
> +
>   bool visitLoadInst(LoadInst &LI) {
>     DEBUG(dbgs() << "    original: " << LI << "\n");
>     Value *OldOp = LI.getOperand(0);
> @@ -1873,6 +1977,8 @@
> 
>     if (VecTy)
>       return rewriteVectorizedLoadInst(IRB, LI, OldOp);
> +    if (IntPromotionTy)
> +      return rewriteIntegerLoad(IRB, LI);
> 
>     Value *NewPtr = getAdjustedAllocaPtr(IRB,
>                                          LI.getPointerOperand()->getType());
> @@ -1904,6 +2010,15 @@
>     return true;
>   }
> 
> +  bool rewriteIntegerStore(IRBuilder<> &IRB, StoreInst &SI) {
> +    assert(!SI.isVolatile());
> +    StoreInst *Store = insertInteger(IRB, SI.getValueOperand(), BeginOffset);
> +    Pass.DeadInsts.push_back(&SI);
> +    (void)Store;
> +    DEBUG(dbgs() << "          to: " << *Store << "\n");
> +    return true;
> +  }
> +
>   bool visitStoreInst(StoreInst &SI) {
>     DEBUG(dbgs() << "    original: " << SI << "\n");
>     Value *OldOp = SI.getOperand(1);
> @@ -1912,6 +2027,8 @@
> 
>     if (VecTy)
>       return rewriteVectorizedStoreInst(IRB, SI, OldOp);
> +    if (IntPromotionTy)
> +      return rewriteIntegerStore(IRB, SI);
> 
>     Value *NewPtr = getAdjustedAllocaPtr(IRB,
>                                          SI.getPointerOperand()->getType());
> 
> Modified: llvm/trunk/test/Transforms/SROA/basictest.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SROA/basictest.ll?rev=164479&r1=164478&r2=164479&view=diff
> ==============================================================================
> --- llvm/trunk/test/Transforms/SROA/basictest.ll (original)
> +++ llvm/trunk/test/Transforms/SROA/basictest.ll Sun Sep 23 19:34:20 2012
> @@ -553,30 +553,53 @@
>   ret i32 %Z2
> }
> 
> -define i32 @test12() {
> -; CHECK: @test12
> -; CHECK: alloca i24
> -;
> -; FIXME: SROA should promote accesses to this into whole i24 operations instead
> -; of i8 operations.
> -; CHECK: store i8 0
> -; CHECK: store i8 0
> -; CHECK: store i8 0
> +define i8 @test12() {
> +; We fully promote these to the i24 load or store size, resulting in just masks
> +; and other operations that instcombine will fold, but no alloca.
> ;
> -; CHECK: load i24*
> +; CHECK: @test12
> 
> entry:
>   %a = alloca [3 x i8]
> -  %b0ptr = getelementptr [3 x i8]* %a, i64 0, i32 0
> -  store i8 0, i8* %b0ptr
> -  %b1ptr = getelementptr [3 x i8]* %a, i64 0, i32 1
> -  store i8 0, i8* %b1ptr
> -  %b2ptr = getelementptr [3 x i8]* %a, i64 0, i32 2
> -  store i8 0, i8* %b2ptr
> -  %iptr = bitcast [3 x i8]* %a to i24*
> -  %i = load i24* %iptr
> -  %ret = zext i24 %i to i32
> -  ret i32 %ret
> +  %b = alloca [3 x i8]
> +; CHECK-NOT: alloca
> +
> +  %a0ptr = getelementptr [3 x i8]* %a, i64 0, i32 0
> +  store i8 0, i8* %a0ptr
> +  %a1ptr = getelementptr [3 x i8]* %a, i64 0, i32 1
> +  store i8 0, i8* %a1ptr
> +  %a2ptr = getelementptr [3 x i8]* %a, i64 0, i32 2
> +  store i8 0, i8* %a2ptr
> +  %aiptr = bitcast [3 x i8]* %a to i24*
> +  %ai = load i24* %aiptr
> +; CHCEK-NOT: store
> +; CHCEK-NOT: load
> +; CHECK:      %[[mask0:.*]] = and i24 undef, -256
> +; CHECK-NEXT: %[[mask1:.*]] = and i24 %[[mask0]], -65281
> +; CHECK-NEXT: %[[mask2:.*]] = and i24 %[[mask1]], 65535
> +
> +  %biptr = bitcast [3 x i8]* %b to i24*
> +  store i24 %ai, i24* %biptr
> +  %b0ptr = getelementptr [3 x i8]* %b, i64 0, i32 0
> +  %b0 = load i8* %b0ptr
> +  %b1ptr = getelementptr [3 x i8]* %b, i64 0, i32 1
> +  %b1 = load i8* %b1ptr
> +  %b2ptr = getelementptr [3 x i8]* %b, i64 0, i32 2
> +  %b2 = load i8* %b2ptr
> +; CHCEK-NOT: store
> +; CHCEK-NOT: load
> +; CHECK:      %[[trunc0:.*]] = trunc i24 %[[mask2]] to i8
> +; CHECK-NEXT: %[[shift1:.*]] = lshr i24 %[[mask2]], 8
> +; CHECK-NEXT: %[[trunc1:.*]] = trunc i24 %[[shift1]] to i8
> +; CHECK-NEXT: %[[shift2:.*]] = lshr i24 %[[mask2]], 16
> +; CHECK-NEXT: %[[trunc2:.*]] = trunc i24 %[[shift2]] to i8
> +
> +  %bsum0 = add i8 %b0, %b1
> +  %bsum1 = add i8 %bsum0, %b2
> +  ret i8 %bsum1
> +; CHECK:      %[[sum0:.*]] = add i8 %[[trunc0]], %[[trunc1]]
> +; CHECK-NEXT: %[[sum1:.*]] = add i8 %[[sum0]], %[[trunc2]]
> +; CHECK-NEXT: ret i8 %[[sum1]]
> }
> 
> define i32 @test13() {
> 
> 
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits