[llvm] r309812 - [SLP] Fix for PR31880: shuffle and vectorize repeated scalar ops on extracted elements
Yung, Douglas via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 7 19:32:28 PDT 2017
Hi Alexey,
This change seems to have caused a regression in the compiler to optimize a vector add/sub + shuffle to identity. I have filed PR34111 for this issue. Can you take a look?
Douglas Yung
> -----Original Message-----
> From: llvm-commits [mailto:llvm-commits-bounces at lists.llvm.org] On Behalf Of
> Alexey Bataev via llvm-commits
> Sent: Wednesday, August 02, 2017 6:25
> To: llvm-commits at lists.llvm.org
> Subject: [llvm] r309812 - [SLP] Fix for PR31880: shuffle and vectorize
> repeated scalar ops on extracted elements
>
> Author: abataev
> Date: Wed Aug 2 06:25:26 2017
> New Revision: 309812
>
> URL: http://llvm.org/viewvc/llvm-project?rev=309812&view=rev
> Log:
> [SLP] Fix for PR31880: shuffle and vectorize repeated scalar ops on extracted
> elements
>
> Summary:
> Currently most of the time vectors of extractelement instructions are treated
> as scalars that must be gathered into vectors. But in some cases, like when we
> have extractelement instructions from single vector with different constant
> indeces or from 2 vectors of the same size, we can treat this operations as
> shuffle of a single vector or blending of 2 vectors.
> ```
> define <2 x i8> @g(<2 x i8> %x, <2 x i8> %y) {
> %x0 = extractelement <2 x i8> %x, i32 0
> %y1 = extractelement <2 x i8> %y, i32 1
> %x0x0 = mul i8 %x0, %x0
> %y1y1 = mul i8 %y1, %y1
> %ins1 = insertelement <2 x i8> undef, i8 %x0x0, i32 0
> %ins2 = insertelement <2 x i8> %ins1, i8 %y1y1, i32 1
> ret <2 x i8> %ins2
> }
> ```
> can be converted to something like
> ```
> define <2 x i8> @g(<2 x i8> %x, <2 x i8> %y) {
> %1 = shufflevector <2 x i8> %x, <2 x i8> %y, <2 x i32> <i32 0, i32 3>
> %2 = mul <2 x i8> %1, %1
> ret <2 x i8> %2
> }
> ```
> Currently this type of conversion is considered as high cost transformation.
>
> Reviewers: mzolotukhin, delena, mkuper, hfinkel, RKSimon
>
> Subscribers: ashahid, RKSimon, spatel, llvm-commits
>
> Differential Revision: https://reviews.llvm.org/D30200
>
> Modified:
> llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp
> llvm/trunk/test/Transforms/SLPVectorizer/X86/blending-shuffle.ll
>
> Modified: llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp
> URL: http://llvm.org/viewvc/llvm-
> project/llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp?rev=309812&r1=30
> 9811&r2=309812&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp (original)
> +++ llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp Wed Aug 2
> +++ 06:25:26 2017
> @@ -158,6 +158,119 @@ static bool isSplat(ArrayRef<Value *> VL
> return true;
> }
>
> +/// Checks if the vector of instructions can be represented as a shuffle,
> like:
> +/// %x0 = extractelement <4 x i8> %x, i32 0 /// %x3 = extractelement <4
> +x i8> %x, i32 3 /// %y1 = extractelement <4 x i8> %y, i32 1 /// %y2 =
> +extractelement <4 x i8> %y, i32 2 /// %x0x0 = mul i8 %x0, %x0 /// %x3x3
> += mul i8 %x3, %x3 /// %y1y1 = mul i8 %y1, %y1 /// %y2y2 = mul i8 %y2,
> +%y2 /// %ins1 = insertelement <4 x i8> undef, i8 %x0x0, i32 0 /// %ins2
> += insertelement <4 x i8> %ins1, i8 %x3x3, i32 1 /// %ins3 =
> +insertelement <4 x i8> %ins2, i8 %y1y1, i32 2 /// %ins4 = insertelement
> +<4 x i8> %ins3, i8 %y2y2, i32 3 /// ret <4 x i8> %ins4 /// can be
> +transformed into:
> +/// %1 = shufflevector <4 x i8> %x, <4 x i8> %y, <4 x i32> <i32 0, i32 3, i32
> 5,
> +/// i32 6>
> +/// %2 = mul <4 x i8> %1, %1
> +/// ret <4 x i8> %2
> +/// We convert this initially to something like:
> +/// %x0 = extractelement <4 x i8> %x, i32 0 /// %x3 = extractelement <4
> +x i8> %x, i32 3 /// %y1 = extractelement <4 x i8> %y, i32 1 /// %y2 =
> +extractelement <4 x i8> %y, i32 2 /// %1 = insertelement <4 x i8>
> +undef, i8 %x0, i32 0 /// %2 = insertelement <4 x i8> %1, i8 %x3, i32 1
> +/// %3 = insertelement <4 x i8> %2, i8 %y1, i32 2 /// %4 =
> +insertelement <4 x i8> %3, i8 %y2, i32 3 /// %5 = mul <4 x i8> %4, %4
> +/// %6 = extractelement <4 x i8> %5, i32 0 /// %ins1 = insertelement <4
> +x i8> undef, i8 %6, i32 0 /// %7 = extractelement <4 x i8> %5, i32 1
> +/// %ins2 = insertelement <4 x i8> %ins1, i8 %7, i32 1 /// %8 =
> +extractelement <4 x i8> %5, i32 2 /// %ins3 = insertelement <4 x i8>
> +%ins2, i8 %8, i32 2 /// %9 = extractelement <4 x i8> %5, i32 3 ///
> +%ins4 = insertelement <4 x i8> %ins3, i8 %9, i32 3 /// ret <4 x i8>
> +%ins4 /// InstCombiner transforms this into a shuffle and vector mul
> +static Optional<TargetTransformInfo::ShuffleKind>
> +isShuffle(ArrayRef<Value *> VL) {
> + auto *EI0 = cast<ExtractElementInst>(VL[0]);
> + unsigned Size = EI0->getVectorOperandType()->getVectorNumElements();
> + Value *Vec1 = nullptr;
> + Value *Vec2 = nullptr;
> + enum ShuffleMode {Unknown, FirstAlternate, SecondAlternate, Permute};
> + ShuffleMode CommonShuffleMode = Unknown;
> + for (unsigned I = 0, E = VL.size(); I < E; ++I) {
> + auto *EI = cast<ExtractElementInst>(VL[I]);
> + auto *Vec = EI->getVectorOperand();
> + // All vector operands must have the same number of vector elements.
> + if (Vec->getType()->getVectorNumElements() != Size)
> + return None;
> + auto *Idx = dyn_cast<ConstantInt>(EI->getIndexOperand());
> + if (!Idx)
> + return None;
> + // Undefined behavior if Idx is negative or >= Size.
> + if (Idx->getValue().uge(Size))
> + continue;
> + unsigned IntIdx = Idx->getValue().getZExtValue();
> + // We can extractelement from undef vector.
> + if (isa<UndefValue>(Vec))
> + continue;
> + // For correct shuffling we have to have at most 2 different vector
> operands
> + // in all extractelement instructions.
> + if (Vec1 && Vec2 && Vec != Vec1 && Vec != Vec2)
> + return None;
> + if (CommonShuffleMode == Permute)
> + continue;
> + // If the extract index is not the same as the operation number, it is a
> + // permutation.
> + if (IntIdx != I) {
> + CommonShuffleMode = Permute;
> + continue;
> + }
> + // Check the shuffle mode for the current operation.
> + if (!Vec1)
> + Vec1 = Vec;
> + else if (Vec != Vec1)
> + Vec2 = Vec;
> + // Example: shufflevector A, B, <0,5,2,7>
> + // I is odd and IntIdx for A == I - FirstAlternate shuffle.
> + // I is even and IntIdx for B == I - FirstAlternate shuffle.
> + // Example: shufflevector A, B, <4,1,6,3>
> + // I is even and IntIdx for A == I - SecondAlternate shuffle.
> + // I is odd and IntIdx for B == I - SecondAlternate shuffle.
> + const bool IIsEven = I & 1;
> + const bool CurrVecIsA = Vec == Vec1;
> + const bool IIsOdd = !IIsEven;
> + const bool CurrVecIsB = !CurrVecIsA;
> + ShuffleMode CurrentShuffleMode =
> + ((IIsOdd && CurrVecIsA) || (IIsEven && CurrVecIsB)) ? FirstAlternate
> + :
> SecondAlternate;
> + // Common mode is not set or the same as the shuffle mode of the current
> + // operation - alternate.
> + if (CommonShuffleMode == Unknown)
> + CommonShuffleMode = CurrentShuffleMode;
> + // Common shuffle mode is not the same as the shuffle mode of the current
> + // operation - permutation.
> + if (CommonShuffleMode != CurrentShuffleMode)
> + CommonShuffleMode = Permute;
> + }
> + // If we're not crossing lanes in different vectors, consider it as
> blending.
> + if ((CommonShuffleMode == FirstAlternate ||
> + CommonShuffleMode == SecondAlternate) &&
> + Vec2)
> + return TargetTransformInfo::SK_Alternate;
> + // If Vec2 was never used, we have a permutation of a single vector,
> +otherwise
> + // we have permutation of 2 vectors.
> + return Vec2 ? TargetTransformInfo::SK_PermuteTwoSrc
> + : TargetTransformInfo::SK_PermuteSingleSrc;
> +}
> +
> ///\returns Opcode that can be clubbed with \p Op to create an alternate ///
> sequence which can later be merged as a ShuffleVector instruction.
> static unsigned getAltOpcode(unsigned Op) { @@ -1736,6 +1849,26 @@ int
> BoUpSLP::getEntryCost(TreeEntry *E)
> if (isSplat(VL)) {
> return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy,
> 0);
> }
> + if (getSameOpcode(VL) == Instruction::ExtractElement) {
> + Optional<TargetTransformInfo::ShuffleKind> ShuffleKind = isShuffle(VL);
> + if (ShuffleKind.hasValue()) {
> + int Cost = TTI->getShuffleCost(ShuffleKind.getValue(), VecTy);
> + for (auto *V : VL) {
> + // If all users of instruction are going to be vectorized and this
> + // instruction itself is not going to be vectorized, consider this
> + // instruction as dead and remove its cost from the final cost of
> the
> + // vectorized tree.
> + if (areAllUsersVectorized(cast<Instruction>(V)) &&
> + !ScalarToTreeEntry.count(V)) {
> + auto *IO = cast<ConstantInt>(
> + cast<ExtractElementInst>(V)->getIndexOperand());
> + Cost -= TTI->getVectorInstrCost(Instruction::ExtractElement,
> VecTy,
> + IO->getZExtValue());
> + }
> + }
> + return Cost;
> + }
> + }
> return getGatherCost(E->Scalars);
> }
> unsigned Opcode = getSameOpcode(VL);
>
> Modified: llvm/trunk/test/Transforms/SLPVectorizer/X86/blending-shuffle.ll
> URL: http://llvm.org/viewvc/llvm-
> project/llvm/trunk/test/Transforms/SLPVectorizer/X86/blending-
> shuffle.ll?rev=309812&r1=309811&r2=309812&view=diff
> ==============================================================================
> --- llvm/trunk/test/Transforms/SLPVectorizer/X86/blending-shuffle.ll
> (original)
> +++ llvm/trunk/test/Transforms/SLPVectorizer/X86/blending-shuffle.ll Wed
> +++ Aug 2 06:25:26 2017
> @@ -3,13 +3,9 @@
>
> define <2 x i8> @g(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @g(
> -; CHECK-NEXT: [[X0:%.*]] = extractelement <2 x i8> [[X:%.*]], i32 0
> -; CHECK-NEXT: [[Y1:%.*]] = extractelement <2 x i8> [[Y:%.*]], i32 1
> -; CHECK-NEXT: [[X0X0:%.*]] = mul i8 [[X0]], [[X0]]
> -; CHECK-NEXT: [[Y1Y1:%.*]] = mul i8 [[Y1]], [[Y1]]
> -; CHECK-NEXT: [[INS1:%.*]] = insertelement <2 x i8> undef, i8 [[X0X0]],
> i32 0
> -; CHECK-NEXT: [[INS2:%.*]] = insertelement <2 x i8> [[INS1]], i8 [[Y1Y1]],
> i32 1
> -; CHECK-NEXT: ret <2 x i8> [[INS2]]
> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8>
> [[Y:%.*]], <2 x i32> <i32 0, i32 3>
> +; CHECK-NEXT: [[TMP2:%.*]] = mul <2 x i8> [[TMP1]], [[TMP1]]
> +; CHECK-NEXT: ret <2 x i8> [[TMP2]]
> ;
> %x0 = extractelement <2 x i8> %x, i32 0
> %y1 = extractelement <2 x i8> %y, i32 1 @@ -22,19 +18,9 @@ define <2 x i8>
> @g(<2 x i8> %x, <2 x i8>
>
> define <4 x i8> @h(<4 x i8> %x, <4 x i8> %y) { ; CHECK-LABEL: @h(
> -; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x i8> [[X:%.*]], i32 0
> -; CHECK-NEXT: [[X3:%.*]] = extractelement <4 x i8> [[X]], i32 3
> -; CHECK-NEXT: [[Y1:%.*]] = extractelement <4 x i8> [[Y:%.*]], i32 1
> -; CHECK-NEXT: [[Y2:%.*]] = extractelement <4 x i8> [[Y]], i32 2
> -; CHECK-NEXT: [[X0X0:%.*]] = mul i8 [[X0]], [[X0]]
> -; CHECK-NEXT: [[X3X3:%.*]] = mul i8 [[X3]], [[X3]]
> -; CHECK-NEXT: [[Y1Y1:%.*]] = mul i8 [[Y1]], [[Y1]]
> -; CHECK-NEXT: [[Y2Y2:%.*]] = mul i8 [[Y2]], [[Y2]]
> -; CHECK-NEXT: [[INS1:%.*]] = insertelement <4 x i8> undef, i8 [[X0X0]],
> i32 0
> -; CHECK-NEXT: [[INS2:%.*]] = insertelement <4 x i8> [[INS1]], i8 [[X3X3]],
> i32 1
> -; CHECK-NEXT: [[INS3:%.*]] = insertelement <4 x i8> [[INS2]], i8 [[Y1Y1]],
> i32 2
> -; CHECK-NEXT: [[INS4:%.*]] = insertelement <4 x i8> [[INS3]], i8 [[Y2Y2]],
> i32 3
> -; CHECK-NEXT: ret <4 x i8> [[INS4]]
> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8>
> [[Y:%.*]], <4 x i32> <i32 0, i32 3, i32 5, i32 6>
> +; CHECK-NEXT: [[TMP2:%.*]] = mul <4 x i8> [[TMP1]], [[TMP1]]
> +; CHECK-NEXT: ret <4 x i8> [[TMP2]]
> ;
> %x0 = extractelement <4 x i8> %x, i32 0
> %x3 = extractelement <4 x i8> %x, i32 3 @@ -53,16 +39,9 @@ define <4 x i8>
> @h(<4 x i8> %x, <4 x i8>
>
> define <4 x i8> @h_undef(<4 x i8> %x, <4 x i8> %y) { ; CHECK-LABEL:
> @h_undef(
> -; CHECK-NEXT: [[X3:%.*]] = extractelement <4 x i8> [[X:%.*]], i32 3
> -; CHECK-NEXT: [[Y1:%.*]] = extractelement <4 x i8> [[Y:%.*]], i32 1
> -; CHECK-NEXT: [[Y2:%.*]] = extractelement <4 x i8> [[Y]], i32 2
> -; CHECK-NEXT: [[X3X3:%.*]] = mul i8 [[X3]], [[X3]]
> -; CHECK-NEXT: [[Y1Y1:%.*]] = mul i8 [[Y1]], [[Y1]]
> -; CHECK-NEXT: [[Y2Y2:%.*]] = mul i8 [[Y2]], [[Y2]]
> -; CHECK-NEXT: [[INS2:%.*]] = insertelement <4 x i8> undef, i8 [[X3X3]],
> i32 1
> -; CHECK-NEXT: [[INS3:%.*]] = insertelement <4 x i8> [[INS2]], i8 [[Y1Y1]],
> i32 2
> -; CHECK-NEXT: [[INS4:%.*]] = insertelement <4 x i8> [[INS3]], i8 [[Y2Y2]],
> i32 3
> -; CHECK-NEXT: ret <4 x i8> [[INS4]]
> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8>
> [[Y:%.*]], <4 x i32> <i32 undef, i32 3, i32 5, i32 6>
> +; CHECK-NEXT: [[TMP2:%.*]] = mul <4 x i8> [[TMP1]], [[TMP1]]
> +; CHECK-NEXT: ret <4 x i8> [[TMP2]]
> ;
> %x0 = extractelement <4 x i8> undef, i32 0
> %x3 = extractelement <4 x i8> %x, i32 3 @@ -81,17 +60,13 @@ define <4 x i8>
> @h_undef(<4 x i8> %x, <4
>
> define i8 @i(<4 x i8> %x, <4 x i8> %y) { ; CHECK-LABEL: @i(
> -; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x i8> [[X:%.*]], i32 0
> -; CHECK-NEXT: [[X3:%.*]] = extractelement <4 x i8> [[X]], i32 3
> -; CHECK-NEXT: [[Y1:%.*]] = extractelement <4 x i8> [[Y:%.*]], i32 1
> -; CHECK-NEXT: [[Y2:%.*]] = extractelement <4 x i8> [[Y]], i32 2
> -; CHECK-NEXT: [[X0X0:%.*]] = mul i8 [[X0]], [[X0]]
> -; CHECK-NEXT: [[X3X3:%.*]] = mul i8 [[X3]], [[X3]]
> -; CHECK-NEXT: [[Y1Y1:%.*]] = mul i8 [[Y1]], [[Y1]]
> -; CHECK-NEXT: [[Y2Y2:%.*]] = mul i8 [[Y2]], [[Y2]]
> -; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[X0X0]], [[X3X3]]
> -; CHECK-NEXT: [[TMP2:%.*]] = add i8 [[Y1Y1]], [[Y2Y2]]
> -; CHECK-NEXT: [[TMP3:%.*]] = add i8 [[TMP1]], [[TMP2]]
> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8>
> [[Y:%.*]], <4 x i32> <i32 0, i32 3, i32 5, i32 6>
> +; CHECK-NEXT: [[TMP2:%.*]] = mul <4 x i8> [[TMP1]], [[TMP1]]
> +; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8>
> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
> +; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i8> [[TMP2]], [[RDX_SHUF]]
> +; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i8> [[BIN_RDX]], <4 x
> i8> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
> +; CHECK-NEXT: [[BIN_RDX2:%.*]] = add <4 x i8> [[BIN_RDX]], [[RDX_SHUF1]]
> +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i8> [[BIN_RDX2]], i32 0
> ; CHECK-NEXT: ret i8 [[TMP3]]
> ;
> %x0 = extractelement <4 x i8> %x, i32 0 @@ -110,18 +85,15 @@ define i8
> @i(<4 x i8> %x, <4 x i8> %y) {
>
> define i8 @j(<4 x i8> %x, <4 x i8> %y) { ; CHECK-LABEL: @j(
> -; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x i8> [[X:%.*]], i32 0
> -; CHECK-NEXT: [[X3:%.*]] = extractelement <4 x i8> [[X]], i32 3
> -; CHECK-NEXT: [[Y1:%.*]] = extractelement <4 x i8> [[Y:%.*]], i32 1
> -; CHECK-NEXT: [[Y2:%.*]] = extractelement <4 x i8> [[Y]], i32 2
> -; CHECK-NEXT: [[X0X0:%.*]] = mul i8 [[X0]], [[X0]]
> -; CHECK-NEXT: [[X3X3:%.*]] = mul i8 [[X3]], [[X3]]
> -; CHECK-NEXT: [[Y1Y1:%.*]] = mul i8 [[Y1]], [[Y1]]
> -; CHECK-NEXT: [[Y2Y2:%.*]] = mul i8 [[Y2]], [[Y2]]
> -; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[X0X0]], [[X3X3]]
> -; CHECK-NEXT: [[TMP2:%.*]] = add i8 [[Y1Y1]], [[Y2Y2]]
> -; CHECK-NEXT: [[TMP3:%.*]] = sdiv i8 [[TMP1]], [[TMP2]]
> -; CHECK-NEXT: ret i8 [[TMP3]]
> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8>
> [[Y:%.*]], <2 x i32> <i32 0, i32 5>
> +; CHECK-NEXT: [[TMP2:%.*]] = mul <2 x i8> [[TMP1]], [[TMP1]]
> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i8> [[X]], <4 x i8> [[Y]],
> <2 x i32> <i32 3, i32 6>
> +; CHECK-NEXT: [[TMP4:%.*]] = mul <2 x i8> [[TMP3]], [[TMP3]]
> +; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i8> [[TMP2]], [[TMP4]]
> +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i8> [[TMP5]], i32 0
> +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i8> [[TMP5]], i32 1
> +; CHECK-NEXT: [[TMP8:%.*]] = sdiv i8 [[TMP6]], [[TMP7]]
> +; CHECK-NEXT: ret i8 [[TMP8]]
> ;
> %x0 = extractelement <4 x i8> %x, i32 0
> %x3 = extractelement <4 x i8> %x, i32 3 @@ -139,18 +111,15 @@ define i8
> @j(<4 x i8> %x, <4 x i8> %y) {
>
> define i8 @k(<4 x i8> %x) {
> ; CHECK-LABEL: @k(
> -; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x i8> [[X:%.*]], i32 0
> -; CHECK-NEXT: [[X3:%.*]] = extractelement <4 x i8> [[X]], i32 3
> -; CHECK-NEXT: [[X1:%.*]] = extractelement <4 x i8> [[X]], i32 1
> -; CHECK-NEXT: [[X2:%.*]] = extractelement <4 x i8> [[X]], i32 2
> -; CHECK-NEXT: [[X0X0:%.*]] = mul i8 [[X0]], [[X0]]
> -; CHECK-NEXT: [[X3X3:%.*]] = mul i8 [[X3]], [[X3]]
> -; CHECK-NEXT: [[X1X1:%.*]] = mul i8 [[X1]], [[X1]]
> -; CHECK-NEXT: [[X2X2:%.*]] = mul i8 [[X2]], [[X2]]
> -; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[X0X0]], [[X3X3]]
> -; CHECK-NEXT: [[TMP2:%.*]] = add i8 [[X1X1]], [[X2X2]]
> -; CHECK-NEXT: [[TMP3:%.*]] = sdiv i8 [[TMP1]], [[TMP2]]
> -; CHECK-NEXT: ret i8 [[TMP3]]
> +; CHECK-NEXT: [[TMP1:%.*]] = mul <4 x i8> [[X:%.*]], [[X]]
> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8>
> undef, <2 x i32> <i32 0, i32 1>
> +; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i8> [[X]], [[X]]
> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8>
> undef, <2 x i32> <i32 3, i32 2>
> +; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i8> [[TMP2]], [[TMP4]]
> +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i8> [[TMP5]], i32 0
> +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i8> [[TMP5]], i32 1
> +; CHECK-NEXT: [[TMP8:%.*]] = sdiv i8 [[TMP6]], [[TMP7]]
> +; CHECK-NEXT: ret i8 [[TMP8]]
> ;
> %x0 = extractelement <4 x i8> %x, i32 0
> %x3 = extractelement <4 x i8> %x, i32 3
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list