[llvm-commits] [llvm] r164479 - in /llvm/trunk: lib/Transforms/Scalar/SROA.cpp test/Transforms/SROA/basictest.ll
Peter Cooper
peter_cooper at apple.com
Sun Sep 23 21:11:41 PDT 2012
Hi Chandler
This new pass is excellent, but...
On Sep 23, 2012, at 5:34 PM, Chandler Carruth <chandlerc at gmail.com> wrote:
> Author: chandlerc
> Date: Sun Sep 23 19:34:20 2012
> New Revision: 164479
>
> URL: http://llvm.org/viewvc/llvm-project?rev=164479&view=rev
> Log:
> Address one of the original FIXMEs for the new SROA pass by implementing
> integer promotion analogous to vector promotion. When there is an
> integer alloca being accessed both as its integer type and as a narrower
> integer type, promote the narrower access to "insert" and "extract" the
> smaller integer from the larger one, and make the integer alloca
> a candidate for promotion.
>
> In the new formulation, we don't care about target legal integer or use
> thresholds to control things. Instead, we only perform this promotion to
> an integer type which the frontend has already emitted a load or store
> for. This bounds the scope and prevents optimization passes from
> coalescing larger and larger entities into a single integer.
>
> Modified:
> llvm/trunk/lib/Transforms/Scalar/SROA.cpp
> llvm/trunk/test/Transforms/SROA/basictest.ll
>
> Modified: llvm/trunk/lib/Transforms/Scalar/SROA.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/SROA.cpp?rev=164479&r1=164478&r2=164479&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Transforms/Scalar/SROA.cpp (original)
> +++ llvm/trunk/lib/Transforms/Scalar/SROA.cpp Sun Sep 23 19:34:20 2012
> @@ -1723,6 +1723,54 @@
> return true;
> }
>
> +/// \brief Test whether the given alloca partition can be promoted to an int.
> +///
> +/// This is a quick test to check whether we can rewrite a particular alloca
> +/// partition (and its newly formed alloca) into an integer alloca suitable for
> +/// promotion to an SSA value. We only can ensure this for a limited set of
> +/// operations, and we don't want to do the rewrites unless we are confident
> +/// that the result will be promotable, so we have an early test here.
> +static bool isIntegerPromotionViable(const TargetData &TD,
> + Type *AllocaTy,
> + AllocaPartitioning &P,
> + AllocaPartitioning::const_use_iterator I,
> + AllocaPartitioning::const_use_iterator E) {
> + IntegerType *Ty = dyn_cast<IntegerType>(AllocaTy);
> + if (!Ty)
> + return false;
> +
> + // Check the uses to ensure the uses are (likely) promoteable integer uses.
> + // Also ensure that the alloca has a covering load or store. We don't want
> + // promote because of some other unsplittable entry (which we may make
> + // splittable later) and lose the ability to promote each element access.
> + bool WholeAllocaOp = false;
> + for (; I != E; ++I) {
> + if (LoadInst *LI = dyn_cast<LoadInst>(&*I->User)) {
Is there any way to have something nicer than &*. Its pretty tough on the eyes.
Thanks,
Pete
> + if (LI->isVolatile() || !LI->getType()->isIntegerTy())
> + return false;
> + if (LI->getType() == Ty)
> + WholeAllocaOp = true;
> + } else if (StoreInst *SI = dyn_cast<StoreInst>(&*I->User)) {
> + if (SI->isVolatile() || !SI->getValueOperand()->getType()->isIntegerTy())
> + return false;
> + if (SI->getValueOperand()->getType() == Ty)
> + WholeAllocaOp = true;
> + } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(&*I->User)) {
> + if (MI->isVolatile())
> + return false;
> + if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(&*I->User)) {
> + const AllocaPartitioning::MemTransferOffsets &MTO
> + = P.getMemTransferOffsets(*MTI);
> + if (!MTO.IsSplittable)
> + return false;
> + }
> + } else {
> + return false;
> + }
> + }
> + return WholeAllocaOp;
> +}
> +
> namespace {
> /// \brief Visitor to rewrite instructions using a partition of an alloca to
> /// use a new alloca.
> @@ -1754,6 +1802,12 @@
> Type *ElementTy;
> uint64_t ElementSize;
>
> + // This is a convenience and flag variable that will be null unless the new
> + // alloca has a promotion-targeted integer type due to passing
> + // isIntegerPromotionViable above. If it is non-null does, the desired
> + // integer type will be stored here for easy access during rewriting.
> + IntegerType *IntPromotionTy;
> +
> // The offset of the partition user currently being rewritten.
> uint64_t BeginOffset, EndOffset;
> Instruction *OldPtr;
> @@ -1770,7 +1824,7 @@
> OldAI(OldAI), NewAI(NewAI),
> NewAllocaBeginOffset(NewBeginOffset),
> NewAllocaEndOffset(NewEndOffset),
> - VecTy(), ElementTy(), ElementSize(),
> + VecTy(), ElementTy(), ElementSize(), IntPromotionTy(),
> BeginOffset(), EndOffset() {
> }
>
> @@ -1786,6 +1840,9 @@
> assert((VecTy->getScalarSizeInBits() % 8) == 0 &&
> "Only multiple-of-8 sized vector elements are viable");
> ElementSize = VecTy->getScalarSizeInBits() / 8;
> + } else if (isIntegerPromotionViable(TD, NewAI.getAllocatedType(),
> + P, I, E)) {
> + IntPromotionTy = cast<IntegerType>(NewAI.getAllocatedType());
> }
> bool CanSROA = true;
> for (; I != E; ++I) {
> @@ -1830,6 +1887,43 @@
> return IRB.getInt32(Index);
> }
>
> + Value *extractInteger(IRBuilder<> &IRB, IntegerType *TargetTy,
> + uint64_t Offset) {
> + assert(IntPromotionTy && "Alloca is not an integer we can extract from");
> + Value *V = IRB.CreateLoad(&NewAI, getName(".load"));
> + assert(Offset >= NewAllocaBeginOffset && "Out of bounds offset");
> + uint64_t RelOffset = Offset - NewAllocaBeginOffset;
> + if (RelOffset)
> + V = IRB.CreateLShr(V, RelOffset*8, getName(".shift"));
> + if (TargetTy != IntPromotionTy) {
> + assert(TargetTy->getBitWidth() < IntPromotionTy->getBitWidth() &&
> + "Cannot extract to a larger integer!");
> + V = IRB.CreateTrunc(V, TargetTy, getName(".trunc"));
> + }
> + return V;
> + }
> +
> + StoreInst *insertInteger(IRBuilder<> &IRB, Value *V, uint64_t Offset) {
> + IntegerType *Ty = cast<IntegerType>(V->getType());
> + if (Ty == IntPromotionTy)
> + return IRB.CreateStore(V, &NewAI);
> +
> + assert(Ty->getBitWidth() < IntPromotionTy->getBitWidth() &&
> + "Cannot insert a larger integer!");
> + V = IRB.CreateZExt(V, IntPromotionTy, getName(".ext"));
> + assert(Offset >= NewAllocaBeginOffset && "Out of bounds offset");
> + uint64_t RelOffset = Offset - NewAllocaBeginOffset;
> + if (RelOffset)
> + V = IRB.CreateShl(V, RelOffset*8, getName(".shift"));
> +
> + APInt Mask = ~Ty->getMask().zext(IntPromotionTy->getBitWidth())
> + .shl(RelOffset*8);
> + Value *Old = IRB.CreateAnd(IRB.CreateLoad(&NewAI, getName(".oldload")),
> + Mask, getName(".mask"));
> + return IRB.CreateStore(IRB.CreateOr(Old, V, getName(".insert")),
> + &NewAI);
> + }
> +
> void deleteIfTriviallyDead(Value *V) {
> Instruction *I = cast<Instruction>(V);
> if (isInstructionTriviallyDead(I))
> @@ -1865,6 +1959,16 @@
> return true;
> }
>
> + bool rewriteIntegerLoad(IRBuilder<> &IRB, LoadInst &LI) {
> + assert(!LI.isVolatile());
> + Value *Result = extractInteger(IRB, cast<IntegerType>(LI.getType()),
> + BeginOffset);
> + LI.replaceAllUsesWith(Result);
> + Pass.DeadInsts.push_back(&LI);
> + DEBUG(dbgs() << " to: " << *Result << "\n");
> + return true;
> + }
> +
> bool visitLoadInst(LoadInst &LI) {
> DEBUG(dbgs() << " original: " << LI << "\n");
> Value *OldOp = LI.getOperand(0);
> @@ -1873,6 +1977,8 @@
>
> if (VecTy)
> return rewriteVectorizedLoadInst(IRB, LI, OldOp);
> + if (IntPromotionTy)
> + return rewriteIntegerLoad(IRB, LI);
>
> Value *NewPtr = getAdjustedAllocaPtr(IRB,
> LI.getPointerOperand()->getType());
> @@ -1904,6 +2010,15 @@
> return true;
> }
>
> + bool rewriteIntegerStore(IRBuilder<> &IRB, StoreInst &SI) {
> + assert(!SI.isVolatile());
> + StoreInst *Store = insertInteger(IRB, SI.getValueOperand(), BeginOffset);
> + Pass.DeadInsts.push_back(&SI);
> + (void)Store;
> + DEBUG(dbgs() << " to: " << *Store << "\n");
> + return true;
> + }
> +
> bool visitStoreInst(StoreInst &SI) {
> DEBUG(dbgs() << " original: " << SI << "\n");
> Value *OldOp = SI.getOperand(1);
> @@ -1912,6 +2027,8 @@
>
> if (VecTy)
> return rewriteVectorizedStoreInst(IRB, SI, OldOp);
> + if (IntPromotionTy)
> + return rewriteIntegerStore(IRB, SI);
>
> Value *NewPtr = getAdjustedAllocaPtr(IRB,
> SI.getPointerOperand()->getType());
>
> Modified: llvm/trunk/test/Transforms/SROA/basictest.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SROA/basictest.ll?rev=164479&r1=164478&r2=164479&view=diff
> ==============================================================================
> --- llvm/trunk/test/Transforms/SROA/basictest.ll (original)
> +++ llvm/trunk/test/Transforms/SROA/basictest.ll Sun Sep 23 19:34:20 2012
> @@ -553,30 +553,53 @@
> ret i32 %Z2
> }
>
> -define i32 @test12() {
> -; CHECK: @test12
> -; CHECK: alloca i24
> -;
> -; FIXME: SROA should promote accesses to this into whole i24 operations instead
> -; of i8 operations.
> -; CHECK: store i8 0
> -; CHECK: store i8 0
> -; CHECK: store i8 0
> +define i8 @test12() {
> +; We fully promote these to the i24 load or store size, resulting in just masks
> +; and other operations that instcombine will fold, but no alloca.
> ;
> -; CHECK: load i24*
> +; CHECK: @test12
>
> entry:
> %a = alloca [3 x i8]
> - %b0ptr = getelementptr [3 x i8]* %a, i64 0, i32 0
> - store i8 0, i8* %b0ptr
> - %b1ptr = getelementptr [3 x i8]* %a, i64 0, i32 1
> - store i8 0, i8* %b1ptr
> - %b2ptr = getelementptr [3 x i8]* %a, i64 0, i32 2
> - store i8 0, i8* %b2ptr
> - %iptr = bitcast [3 x i8]* %a to i24*
> - %i = load i24* %iptr
> - %ret = zext i24 %i to i32
> - ret i32 %ret
> + %b = alloca [3 x i8]
> +; CHECK-NOT: alloca
> +
> + %a0ptr = getelementptr [3 x i8]* %a, i64 0, i32 0
> + store i8 0, i8* %a0ptr
> + %a1ptr = getelementptr [3 x i8]* %a, i64 0, i32 1
> + store i8 0, i8* %a1ptr
> + %a2ptr = getelementptr [3 x i8]* %a, i64 0, i32 2
> + store i8 0, i8* %a2ptr
> + %aiptr = bitcast [3 x i8]* %a to i24*
> + %ai = load i24* %aiptr
> +; CHCEK-NOT: store
> +; CHCEK-NOT: load
> +; CHECK: %[[mask0:.*]] = and i24 undef, -256
> +; CHECK-NEXT: %[[mask1:.*]] = and i24 %[[mask0]], -65281
> +; CHECK-NEXT: %[[mask2:.*]] = and i24 %[[mask1]], 65535
> +
> + %biptr = bitcast [3 x i8]* %b to i24*
> + store i24 %ai, i24* %biptr
> + %b0ptr = getelementptr [3 x i8]* %b, i64 0, i32 0
> + %b0 = load i8* %b0ptr
> + %b1ptr = getelementptr [3 x i8]* %b, i64 0, i32 1
> + %b1 = load i8* %b1ptr
> + %b2ptr = getelementptr [3 x i8]* %b, i64 0, i32 2
> + %b2 = load i8* %b2ptr
> +; CHCEK-NOT: store
> +; CHCEK-NOT: load
> +; CHECK: %[[trunc0:.*]] = trunc i24 %[[mask2]] to i8
> +; CHECK-NEXT: %[[shift1:.*]] = lshr i24 %[[mask2]], 8
> +; CHECK-NEXT: %[[trunc1:.*]] = trunc i24 %[[shift1]] to i8
> +; CHECK-NEXT: %[[shift2:.*]] = lshr i24 %[[mask2]], 16
> +; CHECK-NEXT: %[[trunc2:.*]] = trunc i24 %[[shift2]] to i8
> +
> + %bsum0 = add i8 %b0, %b1
> + %bsum1 = add i8 %bsum0, %b2
> + ret i8 %bsum1
> +; CHECK: %[[sum0:.*]] = add i8 %[[trunc0]], %[[trunc1]]
> +; CHECK-NEXT: %[[sum1:.*]] = add i8 %[[sum0]], %[[trunc2]]
> +; CHECK-NEXT: ret i8 %[[sum1]]
> }
>
> define i32 @test13() {
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list