[llvm] r256394 - [InstCombine] transform more extract/insert pairs into shuffles (PR2109)
Tom Stellard via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 4 12:25:44 PST 2016
Hi Sanjay,
This commit caused a regression: https://llvm.org/bugs/show_bug.cgi?id=26015
-Tom
On Thu, Dec 24, 2015 at 09:17:56PM -0000, Sanjay Patel via llvm-commits wrote:
> Author: spatel
> Date: Thu Dec 24 15:17:56 2015
> New Revision: 256394
>
> URL: http://llvm.org/viewvc/llvm-project?rev=256394&view=rev
> Log:
> [InstCombine] transform more extract/insert pairs into shuffles (PR2109)
>
> This is an extension of the shuffle combining from r203229:
> http://reviews.llvm.org/rL203229
>
> The idea is to widen a short input vector with undef elements so the
> existing shuffle transform for extract/insert can kick in.
>
> The motivation is to finally solve PR2109:
> https://llvm.org/bugs/show_bug.cgi?id=2109
>
> For that example, the IR becomes:
>
> %1 = bitcast <2 x i32>* %P to <2 x float>*
> %ld1 = load <2 x float>, <2 x float>* %1, align 8
> %2 = shufflevector <2 x float> %ld1, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
> %i2 = shufflevector <4 x float> %A, <4 x float> %2, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
> ret <4 x float> %i2
>
> And x86 SSE output improves from:
>
> movq (%rdi), %xmm1 ## xmm1 = mem[0],zero
> movdqa %xmm1, %xmm2
> shufps $229, %xmm2, %xmm2 ## xmm2 = xmm2[1,1,2,3]
> shufps $48, %xmm0, %xmm1 ## xmm1 = xmm1[0,0],xmm0[3,0]
> shufps $132, %xmm1, %xmm0 ## xmm0 = xmm0[0,1],xmm1[0,2]
> shufps $32, %xmm0, %xmm2 ## xmm2 = xmm2[0,0],xmm0[2,0]
> shufps $36, %xmm2, %xmm0 ## xmm0 = xmm0[0,1],xmm2[2,0]
> retq
>
> To the almost optimal:
>
> movhpd (%rdi), %xmm0
>
> Note: There's a tension in the existing transform related to generating
> arbitrary shufflevector masks. We avoid that in other places in InstCombine
> because we're scared that codegen can't handle strange masks, but it looks
> like we're ok with producing those here. I purposely chose weird insert/extract
> indexes for the regression tests to see the effect in these cases.
> For PowerPC+Altivec, AArch64, and X86+SSE/AVX, I think the codegen is equal or
> better for these examples.
>
> Differential Revision: http://reviews.llvm.org/D15096
>
>
> Modified:
> llvm/trunk/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
> llvm/trunk/test/Transforms/InstCombine/insert-extract-shuffle.ll
>
> Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineVectorOps.cpp?rev=256394&r1=256393&r2=256394&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Transforms/InstCombine/InstCombineVectorOps.cpp (original)
> +++ llvm/trunk/lib/Transforms/InstCombine/InstCombineVectorOps.cpp Thu Dec 24 15:17:56 2015
> @@ -352,6 +352,48 @@ static bool collectSingleShuffleElements
> return false;
> }
>
> +/// If we have insertion into a vector that is wider than the vector that we
> +/// are extracting from, try to widen the source vector to allow a single
> +/// shufflevector to replace one or more insert/extract pairs.
> +static void replaceExtractElements(InsertElementInst *InsElt,
> + ExtractElementInst *ExtElt,
> + InstCombiner &IC) {
> + VectorType *InsVecType = InsElt->getType();
> + VectorType *ExtVecType = ExtElt->getVectorOperandType();
> + unsigned NumInsElts = InsVecType->getVectorNumElements();
> + unsigned NumExtElts = ExtVecType->getVectorNumElements();
> +
> + // The inserted-to vector must be wider than the extracted-from vector.
> + if (InsVecType->getElementType() != ExtVecType->getElementType() ||
> + NumExtElts >= NumInsElts)
> + return;
> +
> + // Create a shuffle mask to widen the extended-from vector using undefined
> + // values. The mask selects all of the values of the original vector followed
> + // by as many undefined values as needed to create a vector of the same length
> + // as the inserted-to vector.
> + SmallVector<Constant *, 16> ExtendMask;
> + IntegerType *IntType = Type::getInt32Ty(InsElt->getContext());
> + for (unsigned i = 0; i < NumExtElts; ++i)
> + ExtendMask.push_back(ConstantInt::get(IntType, i));
> + for (unsigned i = NumExtElts; i < NumInsElts; ++i)
> + ExtendMask.push_back(UndefValue::get(IntType));
> +
> + Value *ExtVecOp = ExtElt->getVectorOperand();
> + auto *WideVec = new ShuffleVectorInst(ExtVecOp, UndefValue::get(ExtVecType),
> + ConstantVector::get(ExtendMask));
> +
> + // Replace all extracts from the original narrow vector with extracts from
> + // the new wide vector.
> + WideVec->insertBefore(ExtElt);
> + for (User *U : ExtVecOp->users()) {
> + if (ExtractElementInst *OldExt = dyn_cast<ExtractElementInst>(U)) {
> + auto *NewExt = ExtractElementInst::Create(WideVec, OldExt->getOperand(1));
> + NewExt->insertAfter(WideVec);
> + IC.ReplaceInstUsesWith(*OldExt, NewExt);
> + }
> + }
> +}
>
> /// We are building a shuffle to create V, which is a sequence of insertelement,
> /// extractelement pairs. If PermittedRHS is set, then we must either use it or
> @@ -365,7 +407,8 @@ typedef std::pair<Value *, Value *> Shuf
>
> static ShuffleOps collectShuffleElements(Value *V,
> SmallVectorImpl<Constant *> &Mask,
> - Value *PermittedRHS) {
> + Value *PermittedRHS,
> + InstCombiner &IC) {
> assert(V->getType()->isVectorTy() && "Invalid shuffle!");
> unsigned NumElts = cast<VectorType>(V->getType())->getNumElements();
>
> @@ -396,10 +439,14 @@ static ShuffleOps collectShuffleElements
> // otherwise we'd end up with a shuffle of three inputs.
> if (EI->getOperand(0) == PermittedRHS || PermittedRHS == nullptr) {
> Value *RHS = EI->getOperand(0);
> - ShuffleOps LR = collectShuffleElements(VecOp, Mask, RHS);
> + ShuffleOps LR = collectShuffleElements(VecOp, Mask, RHS, IC);
> assert(LR.second == nullptr || LR.second == RHS);
>
> if (LR.first->getType() != RHS->getType()) {
> + // Although we are giving up for now, see if we can create extracts
> + // that match the inserts for another round of combining.
> + replaceExtractElements(IEI, EI, IC);
> +
> // We tried our best, but we can't find anything compatible with RHS
> // further up the chain. Return a trivial shuffle.
> for (unsigned i = 0; i < NumElts; ++i)
> @@ -512,7 +559,7 @@ Instruction *InstCombiner::visitInsertEl
> // (and any insertelements it points to), into one big shuffle.
> if (!IE.hasOneUse() || !isa<InsertElementInst>(IE.user_back())) {
> SmallVector<Constant*, 16> Mask;
> - ShuffleOps LR = collectShuffleElements(&IE, Mask, nullptr);
> + ShuffleOps LR = collectShuffleElements(&IE, Mask, nullptr, *this);
>
> // The proposed shuffle may be trivial, in which case we shouldn't
> // perform the combine.
>
> Modified: llvm/trunk/test/Transforms/InstCombine/insert-extract-shuffle.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/insert-extract-shuffle.ll?rev=256394&r1=256393&r2=256394&view=diff
> ==============================================================================
> --- llvm/trunk/test/Transforms/InstCombine/insert-extract-shuffle.ll (original)
> +++ llvm/trunk/test/Transforms/InstCombine/insert-extract-shuffle.ll Thu Dec 24 15:17:56 2015
> @@ -26,8 +26,8 @@ define <4 x i16> @test2(<8 x i16> %in, <
>
> define <2 x i64> @test_vcopyq_lane_p64(<2 x i64> %a, <1 x i64> %b) {
> ; CHECK-LABEL: @test_vcopyq_lane_p64
> -; CHECK-NEXT: extractelement
> -; CHECK-NEXT: insertelement
> +; CHECK-NEXT: %[[WIDEVEC:.*]] = shufflevector <1 x i64> %b, <1 x i64> undef, <2 x i32> <i32 0, i32 undef>
> +; CHECK-NEXT: shufflevector <2 x i64> %a, <2 x i64> %[[WIDEVEC]], <2 x i32> <i32 0, i32 2>
> ; CHECK-NEXT: ret <2 x i64> %res
> %elt = extractelement <1 x i64> %b, i32 0
> %res = insertelement <2 x i64> %a, i64 %elt, i32 1
> @@ -38,10 +38,8 @@ define <2 x i64> @test_vcopyq_lane_p64(<
>
> define <4 x float> @widen_extract2(<4 x float> %ins, <2 x float> %ext) {
> ; CHECK-LABEL: @widen_extract2(
> -; CHECK-NEXT: extractelement
> -; CHECK-NEXT: extractelement
> -; CHECK-NEXT: insertelement
> -; CHECK-NEXT: insertelement
> +; CHECK-NEXT: %[[WIDEVEC:.*]] = shufflevector <2 x float> %ext, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
> +; CHECK-NEXT: shufflevector <4 x float> %ins, <4 x float> %[[WIDEVEC]], <4 x i32> <i32 0, i32 4, i32 2, i32 5>
> ; CHECK-NEXT: ret <4 x float> %i2
> %e1 = extractelement <2 x float> %ext, i32 0
> %e2 = extractelement <2 x float> %ext, i32 1
> @@ -52,12 +50,8 @@ define <4 x float> @widen_extract2(<4 x
>
> define <4 x float> @widen_extract3(<4 x float> %ins, <3 x float> %ext) {
> ; CHECK-LABEL: @widen_extract3(
> -; CHECK-NEXT: extractelement
> -; CHECK-NEXT: extractelement
> -; CHECK-NEXT: extractelement
> -; CHECK-NEXT: insertelement
> -; CHECK-NEXT: insertelement
> -; CHECK-NEXT: insertelement
> +; CHECK-NEXT: %[[WIDEVEC:.*]] = shufflevector <3 x float> %ext, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
> +; CHECK-NEXT: shufflevector <4 x float> %ins, <4 x float> %[[WIDEVEC]], <4 x i32> <i32 6, i32 5, i32 4, i32 3>
> ; CHECK-NEXT: ret <4 x float> %i3
> %e1 = extractelement <3 x float> %ext, i32 0
> %e2 = extractelement <3 x float> %ext, i32 1
> @@ -68,10 +62,10 @@ define <4 x float> @widen_extract3(<4 x
> ret <4 x float> %i3
> }
>
> -define <8 x float> @too_wide(<8 x float> %ins, <2 x float> %ext) {
> -; CHECK-LABEL: @too_wide(
> -; CHECK-NEXT: extractelement
> -; CHECK-NEXT: insertelement
> +define <8 x float> @widen_extract4(<8 x float> %ins, <2 x float> %ext) {
> +; CHECK-LABEL: @widen_extract4(
> +; CHECK-NEXT: %[[WIDEVEC:.*]] = shufflevector <2 x float> %ext, <2 x float> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
> +; CHECK-NEXT: shufflevector <8 x float> %ins, <8 x float> %[[WIDEVEC]], <8 x i32> <i32 0, i32 1, i32 8, i32 3, i32 4, i32 5, i32 6, i32 7>
> ; CHECK-NEXT: ret <8 x float> %i1
> %e1 = extractelement <2 x float> %ext, i32 0
> %i1 = insertelement <8 x float> %ins, float %e1, i32 2
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list