[llvm] r309812 - [SLP] Fix for PR31880: shuffle and vectorize repeated scalar ops on extracted elements

Yung, Douglas via llvm-commits llvm-commits at lists.llvm.org
Mon Aug 7 19:32:28 PDT 2017


Hi Alexey,

This change seems to have caused a regression in the compiler to optimize a vector add/sub + shuffle to identity. I have filed PR34111 for this issue. Can you take a look?

Douglas Yung

> -----Original Message-----
> From: llvm-commits [mailto:llvm-commits-bounces at lists.llvm.org] On Behalf Of
> Alexey Bataev via llvm-commits
> Sent: Wednesday, August 02, 2017 6:25
> To: llvm-commits at lists.llvm.org
> Subject: [llvm] r309812 - [SLP] Fix for PR31880: shuffle and vectorize
> repeated scalar ops on extracted elements
> 
> Author: abataev
> Date: Wed Aug  2 06:25:26 2017
> New Revision: 309812
> 
> URL: http://llvm.org/viewvc/llvm-project?rev=309812&view=rev
> Log:
> [SLP] Fix for PR31880: shuffle and vectorize repeated scalar ops on extracted
> elements
> 
> Summary:
> Currently most of the time vectors of extractelement instructions are treated
> as scalars that must be gathered into vectors. But in some cases, like when we
> have extractelement instructions from single vector with different constant
> indeces or from 2 vectors of the same size, we can treat this operations as
> shuffle of a single vector or blending of 2 vectors.
> ```
> define <2 x i8> @g(<2 x i8> %x, <2 x i8> %y) {
>   %x0 = extractelement <2 x i8> %x, i32 0
>   %y1 = extractelement <2 x i8> %y, i32 1
>   %x0x0 = mul i8 %x0, %x0
>   %y1y1 = mul i8 %y1, %y1
>   %ins1 = insertelement <2 x i8> undef, i8 %x0x0, i32 0
>   %ins2 = insertelement <2 x i8> %ins1, i8 %y1y1, i32 1
>   ret <2 x i8> %ins2
> }
> ```
> can be converted to something like
> ```
> define <2 x i8> @g(<2 x i8> %x, <2 x i8> %y) {
>   %1 = shufflevector <2 x i8> %x, <2 x i8> %y, <2 x i32> <i32 0, i32 3>
>   %2 = mul <2 x i8> %1, %1
>   ret <2 x i8> %2
> }
> ```
> Currently this type of conversion is considered as high cost transformation.
> 
> Reviewers: mzolotukhin, delena, mkuper, hfinkel, RKSimon
> 
> Subscribers: ashahid, RKSimon, spatel, llvm-commits
> 
> Differential Revision: https://reviews.llvm.org/D30200
> 
> Modified:
>     llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp
>     llvm/trunk/test/Transforms/SLPVectorizer/X86/blending-shuffle.ll
> 
> Modified: llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp
> URL: http://llvm.org/viewvc/llvm-
> project/llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp?rev=309812&r1=30
> 9811&r2=309812&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp (original)
> +++ llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp Wed Aug  2
> +++ 06:25:26 2017
> @@ -158,6 +158,119 @@ static bool isSplat(ArrayRef<Value *> VL
>    return true;
>  }
> 
> +/// Checks if the vector of instructions can be represented as a shuffle,
> like:
> +/// %x0 = extractelement <4 x i8> %x, i32 0 /// %x3 = extractelement <4
> +x i8> %x, i32 3 /// %y1 = extractelement <4 x i8> %y, i32 1 /// %y2 =
> +extractelement <4 x i8> %y, i32 2 /// %x0x0 = mul i8 %x0, %x0 /// %x3x3
> += mul i8 %x3, %x3 /// %y1y1 = mul i8 %y1, %y1 /// %y2y2 = mul i8 %y2,
> +%y2 /// %ins1 = insertelement <4 x i8> undef, i8 %x0x0, i32 0 /// %ins2
> += insertelement <4 x i8> %ins1, i8 %x3x3, i32 1 /// %ins3 =
> +insertelement <4 x i8> %ins2, i8 %y1y1, i32 2 /// %ins4 = insertelement
> +<4 x i8> %ins3, i8 %y2y2, i32 3 /// ret <4 x i8> %ins4 /// can be
> +transformed into:
> +/// %1 = shufflevector <4 x i8> %x, <4 x i8> %y, <4 x i32> <i32 0, i32 3, i32
> 5,
> +///                                                         i32 6>
> +/// %2 = mul <4 x i8> %1, %1
> +/// ret <4 x i8> %2
> +/// We convert this initially to something like:
> +/// %x0 = extractelement <4 x i8> %x, i32 0 /// %x3 = extractelement <4
> +x i8> %x, i32 3 /// %y1 = extractelement <4 x i8> %y, i32 1 /// %y2 =
> +extractelement <4 x i8> %y, i32 2 /// %1 = insertelement <4 x i8>
> +undef, i8 %x0, i32 0 /// %2 = insertelement <4 x i8> %1, i8 %x3, i32 1
> +/// %3 = insertelement <4 x i8> %2, i8 %y1, i32 2 /// %4 =
> +insertelement <4 x i8> %3, i8 %y2, i32 3 /// %5 = mul <4 x i8> %4, %4
> +/// %6 = extractelement <4 x i8> %5, i32 0 /// %ins1 = insertelement <4
> +x i8> undef, i8 %6, i32 0 /// %7 = extractelement <4 x i8> %5, i32 1
> +/// %ins2 = insertelement <4 x i8> %ins1, i8 %7, i32 1 /// %8 =
> +extractelement <4 x i8> %5, i32 2 /// %ins3 = insertelement <4 x i8>
> +%ins2, i8 %8, i32 2 /// %9 = extractelement <4 x i8> %5, i32 3 ///
> +%ins4 = insertelement <4 x i8> %ins3, i8 %9, i32 3 /// ret <4 x i8>
> +%ins4 /// InstCombiner transforms this into a shuffle and vector mul
> +static Optional<TargetTransformInfo::ShuffleKind>
> +isShuffle(ArrayRef<Value *> VL) {
> +  auto *EI0 = cast<ExtractElementInst>(VL[0]);
> +  unsigned Size = EI0->getVectorOperandType()->getVectorNumElements();
> +  Value *Vec1 = nullptr;
> +  Value *Vec2 = nullptr;
> +  enum ShuffleMode {Unknown, FirstAlternate, SecondAlternate, Permute};
> +  ShuffleMode CommonShuffleMode = Unknown;
> +  for (unsigned I = 0, E = VL.size(); I < E; ++I) {
> +    auto *EI = cast<ExtractElementInst>(VL[I]);
> +    auto *Vec = EI->getVectorOperand();
> +    // All vector operands must have the same number of vector elements.
> +    if (Vec->getType()->getVectorNumElements() != Size)
> +      return None;
> +    auto *Idx = dyn_cast<ConstantInt>(EI->getIndexOperand());
> +    if (!Idx)
> +      return None;
> +    // Undefined behavior if Idx is negative or >= Size.
> +    if (Idx->getValue().uge(Size))
> +      continue;
> +    unsigned IntIdx = Idx->getValue().getZExtValue();
> +    // We can extractelement from undef vector.
> +    if (isa<UndefValue>(Vec))
> +      continue;
> +    // For correct shuffling we have to have at most 2 different vector
> operands
> +    // in all extractelement instructions.
> +    if (Vec1 && Vec2 && Vec != Vec1 && Vec != Vec2)
> +      return None;
> +    if (CommonShuffleMode == Permute)
> +      continue;
> +    // If the extract index is not the same as the operation number, it is a
> +    // permutation.
> +    if (IntIdx != I) {
> +      CommonShuffleMode = Permute;
> +      continue;
> +    }
> +    // Check the shuffle mode for the current operation.
> +    if (!Vec1)
> +      Vec1 = Vec;
> +    else if (Vec != Vec1)
> +      Vec2 = Vec;
> +    // Example: shufflevector A, B, <0,5,2,7>
> +    // I is odd and IntIdx for A == I - FirstAlternate shuffle.
> +    // I is even and IntIdx for B == I - FirstAlternate shuffle.
> +    // Example: shufflevector A, B, <4,1,6,3>
> +    // I is even and IntIdx for A == I - SecondAlternate shuffle.
> +    // I is odd and IntIdx for B == I - SecondAlternate shuffle.
> +    const bool IIsEven = I & 1;
> +    const bool CurrVecIsA = Vec == Vec1;
> +    const bool IIsOdd = !IIsEven;
> +    const bool CurrVecIsB = !CurrVecIsA;
> +    ShuffleMode CurrentShuffleMode =
> +        ((IIsOdd && CurrVecIsA) || (IIsEven && CurrVecIsB)) ? FirstAlternate
> +                                                            :
> SecondAlternate;
> +    // Common mode is not set or the same as the shuffle mode of the current
> +    // operation - alternate.
> +    if (CommonShuffleMode == Unknown)
> +      CommonShuffleMode = CurrentShuffleMode;
> +    // Common shuffle mode is not the same as the shuffle mode of the current
> +    // operation - permutation.
> +    if (CommonShuffleMode != CurrentShuffleMode)
> +      CommonShuffleMode = Permute;
> +  }
> +  // If we're not crossing lanes in different vectors, consider it as
> blending.
> +  if ((CommonShuffleMode == FirstAlternate ||
> +       CommonShuffleMode == SecondAlternate) &&
> +      Vec2)
> +    return TargetTransformInfo::SK_Alternate;
> +  // If Vec2 was never used, we have a permutation of a single vector,
> +otherwise
> +  // we have permutation of 2 vectors.
> +  return Vec2 ? TargetTransformInfo::SK_PermuteTwoSrc
> +              : TargetTransformInfo::SK_PermuteSingleSrc;
> +}
> +
>  ///\returns Opcode that can be clubbed with \p Op to create an alternate  ///
> sequence which can later be merged as a ShuffleVector instruction.
>  static unsigned getAltOpcode(unsigned Op) { @@ -1736,6 +1849,26 @@ int
> BoUpSLP::getEntryCost(TreeEntry *E)
>      if (isSplat(VL)) {
>        return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy,
> 0);
>      }
> +    if (getSameOpcode(VL) == Instruction::ExtractElement) {
> +      Optional<TargetTransformInfo::ShuffleKind> ShuffleKind = isShuffle(VL);
> +      if (ShuffleKind.hasValue()) {
> +        int Cost = TTI->getShuffleCost(ShuffleKind.getValue(), VecTy);
> +        for (auto *V : VL) {
> +          // If all users of instruction are going to be vectorized and this
> +          // instruction itself is not going to be vectorized, consider this
> +          // instruction as dead and remove its cost from the final cost of
> the
> +          // vectorized tree.
> +          if (areAllUsersVectorized(cast<Instruction>(V)) &&
> +              !ScalarToTreeEntry.count(V)) {
> +            auto *IO = cast<ConstantInt>(
> +                cast<ExtractElementInst>(V)->getIndexOperand());
> +            Cost -= TTI->getVectorInstrCost(Instruction::ExtractElement,
> VecTy,
> +                                            IO->getZExtValue());
> +          }
> +        }
> +        return Cost;
> +      }
> +    }
>      return getGatherCost(E->Scalars);
>    }
>    unsigned Opcode = getSameOpcode(VL);
> 
> Modified: llvm/trunk/test/Transforms/SLPVectorizer/X86/blending-shuffle.ll
> URL: http://llvm.org/viewvc/llvm-
> project/llvm/trunk/test/Transforms/SLPVectorizer/X86/blending-
> shuffle.ll?rev=309812&r1=309811&r2=309812&view=diff
> ==============================================================================
> --- llvm/trunk/test/Transforms/SLPVectorizer/X86/blending-shuffle.ll
> (original)
> +++ llvm/trunk/test/Transforms/SLPVectorizer/X86/blending-shuffle.ll Wed
> +++ Aug  2 06:25:26 2017
> @@ -3,13 +3,9 @@
> 
>  define <2 x i8> @g(<2 x i8> %x, <2 x i8> %y) {  ; CHECK-LABEL: @g(
> -; CHECK-NEXT:    [[X0:%.*]] = extractelement <2 x i8> [[X:%.*]], i32 0
> -; CHECK-NEXT:    [[Y1:%.*]] = extractelement <2 x i8> [[Y:%.*]], i32 1
> -; CHECK-NEXT:    [[X0X0:%.*]] = mul i8 [[X0]], [[X0]]
> -; CHECK-NEXT:    [[Y1Y1:%.*]] = mul i8 [[Y1]], [[Y1]]
> -; CHECK-NEXT:    [[INS1:%.*]] = insertelement <2 x i8> undef, i8 [[X0X0]],
> i32 0
> -; CHECK-NEXT:    [[INS2:%.*]] = insertelement <2 x i8> [[INS1]], i8 [[Y1Y1]],
> i32 1
> -; CHECK-NEXT:    ret <2 x i8> [[INS2]]
> +; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8>
> [[Y:%.*]], <2 x i32> <i32 0, i32 3>
> +; CHECK-NEXT:    [[TMP2:%.*]] = mul <2 x i8> [[TMP1]], [[TMP1]]
> +; CHECK-NEXT:    ret <2 x i8> [[TMP2]]
>  ;
>    %x0 = extractelement <2 x i8> %x, i32 0
>    %y1 = extractelement <2 x i8> %y, i32 1 @@ -22,19 +18,9 @@ define <2 x i8>
> @g(<2 x i8> %x, <2 x i8>
> 
>  define <4 x i8> @h(<4 x i8> %x, <4 x i8> %y) {  ; CHECK-LABEL: @h(
> -; CHECK-NEXT:    [[X0:%.*]] = extractelement <4 x i8> [[X:%.*]], i32 0
> -; CHECK-NEXT:    [[X3:%.*]] = extractelement <4 x i8> [[X]], i32 3
> -; CHECK-NEXT:    [[Y1:%.*]] = extractelement <4 x i8> [[Y:%.*]], i32 1
> -; CHECK-NEXT:    [[Y2:%.*]] = extractelement <4 x i8> [[Y]], i32 2
> -; CHECK-NEXT:    [[X0X0:%.*]] = mul i8 [[X0]], [[X0]]
> -; CHECK-NEXT:    [[X3X3:%.*]] = mul i8 [[X3]], [[X3]]
> -; CHECK-NEXT:    [[Y1Y1:%.*]] = mul i8 [[Y1]], [[Y1]]
> -; CHECK-NEXT:    [[Y2Y2:%.*]] = mul i8 [[Y2]], [[Y2]]
> -; CHECK-NEXT:    [[INS1:%.*]] = insertelement <4 x i8> undef, i8 [[X0X0]],
> i32 0
> -; CHECK-NEXT:    [[INS2:%.*]] = insertelement <4 x i8> [[INS1]], i8 [[X3X3]],
> i32 1
> -; CHECK-NEXT:    [[INS3:%.*]] = insertelement <4 x i8> [[INS2]], i8 [[Y1Y1]],
> i32 2
> -; CHECK-NEXT:    [[INS4:%.*]] = insertelement <4 x i8> [[INS3]], i8 [[Y2Y2]],
> i32 3
> -; CHECK-NEXT:    ret <4 x i8> [[INS4]]
> +; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8>
> [[Y:%.*]], <4 x i32> <i32 0, i32 3, i32 5, i32 6>
> +; CHECK-NEXT:    [[TMP2:%.*]] = mul <4 x i8> [[TMP1]], [[TMP1]]
> +; CHECK-NEXT:    ret <4 x i8> [[TMP2]]
>  ;
>    %x0 = extractelement <4 x i8> %x, i32 0
>    %x3 = extractelement <4 x i8> %x, i32 3 @@ -53,16 +39,9 @@ define <4 x i8>
> @h(<4 x i8> %x, <4 x i8>
> 
>  define <4 x i8> @h_undef(<4 x i8> %x, <4 x i8> %y) {  ; CHECK-LABEL:
> @h_undef(
> -; CHECK-NEXT:    [[X3:%.*]] = extractelement <4 x i8> [[X:%.*]], i32 3
> -; CHECK-NEXT:    [[Y1:%.*]] = extractelement <4 x i8> [[Y:%.*]], i32 1
> -; CHECK-NEXT:    [[Y2:%.*]] = extractelement <4 x i8> [[Y]], i32 2
> -; CHECK-NEXT:    [[X3X3:%.*]] = mul i8 [[X3]], [[X3]]
> -; CHECK-NEXT:    [[Y1Y1:%.*]] = mul i8 [[Y1]], [[Y1]]
> -; CHECK-NEXT:    [[Y2Y2:%.*]] = mul i8 [[Y2]], [[Y2]]
> -; CHECK-NEXT:    [[INS2:%.*]] = insertelement <4 x i8> undef, i8 [[X3X3]],
> i32 1
> -; CHECK-NEXT:    [[INS3:%.*]] = insertelement <4 x i8> [[INS2]], i8 [[Y1Y1]],
> i32 2
> -; CHECK-NEXT:    [[INS4:%.*]] = insertelement <4 x i8> [[INS3]], i8 [[Y2Y2]],
> i32 3
> -; CHECK-NEXT:    ret <4 x i8> [[INS4]]
> +; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8>
> [[Y:%.*]], <4 x i32> <i32 undef, i32 3, i32 5, i32 6>
> +; CHECK-NEXT:    [[TMP2:%.*]] = mul <4 x i8> [[TMP1]], [[TMP1]]
> +; CHECK-NEXT:    ret <4 x i8> [[TMP2]]
>  ;
>    %x0 = extractelement <4 x i8> undef, i32 0
>    %x3 = extractelement <4 x i8> %x, i32 3 @@ -81,17 +60,13 @@ define <4 x i8>
> @h_undef(<4 x i8> %x, <4
> 
>  define i8 @i(<4 x i8> %x, <4 x i8> %y) {  ; CHECK-LABEL: @i(
> -; CHECK-NEXT:    [[X0:%.*]] = extractelement <4 x i8> [[X:%.*]], i32 0
> -; CHECK-NEXT:    [[X3:%.*]] = extractelement <4 x i8> [[X]], i32 3
> -; CHECK-NEXT:    [[Y1:%.*]] = extractelement <4 x i8> [[Y:%.*]], i32 1
> -; CHECK-NEXT:    [[Y2:%.*]] = extractelement <4 x i8> [[Y]], i32 2
> -; CHECK-NEXT:    [[X0X0:%.*]] = mul i8 [[X0]], [[X0]]
> -; CHECK-NEXT:    [[X3X3:%.*]] = mul i8 [[X3]], [[X3]]
> -; CHECK-NEXT:    [[Y1Y1:%.*]] = mul i8 [[Y1]], [[Y1]]
> -; CHECK-NEXT:    [[Y2Y2:%.*]] = mul i8 [[Y2]], [[Y2]]
> -; CHECK-NEXT:    [[TMP1:%.*]] = add i8 [[X0X0]], [[X3X3]]
> -; CHECK-NEXT:    [[TMP2:%.*]] = add i8 [[Y1Y1]], [[Y2Y2]]
> -; CHECK-NEXT:    [[TMP3:%.*]] = add i8 [[TMP1]], [[TMP2]]
> +; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8>
> [[Y:%.*]], <4 x i32> <i32 0, i32 3, i32 5, i32 6>
> +; CHECK-NEXT:    [[TMP2:%.*]] = mul <4 x i8> [[TMP1]], [[TMP1]]
> +; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8>
> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
> +; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <4 x i8> [[TMP2]], [[RDX_SHUF]]
> +; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x i8> [[BIN_RDX]], <4 x
> i8> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
> +; CHECK-NEXT:    [[BIN_RDX2:%.*]] = add <4 x i8> [[BIN_RDX]], [[RDX_SHUF1]]
> +; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x i8> [[BIN_RDX2]], i32 0
>  ; CHECK-NEXT:    ret i8 [[TMP3]]
>  ;
>    %x0 = extractelement <4 x i8> %x, i32 0 @@ -110,18 +85,15 @@ define i8
> @i(<4 x i8> %x, <4 x i8> %y) {
> 
>  define i8 @j(<4 x i8> %x, <4 x i8> %y) {  ; CHECK-LABEL: @j(
> -; CHECK-NEXT:    [[X0:%.*]] = extractelement <4 x i8> [[X:%.*]], i32 0
> -; CHECK-NEXT:    [[X3:%.*]] = extractelement <4 x i8> [[X]], i32 3
> -; CHECK-NEXT:    [[Y1:%.*]] = extractelement <4 x i8> [[Y:%.*]], i32 1
> -; CHECK-NEXT:    [[Y2:%.*]] = extractelement <4 x i8> [[Y]], i32 2
> -; CHECK-NEXT:    [[X0X0:%.*]] = mul i8 [[X0]], [[X0]]
> -; CHECK-NEXT:    [[X3X3:%.*]] = mul i8 [[X3]], [[X3]]
> -; CHECK-NEXT:    [[Y1Y1:%.*]] = mul i8 [[Y1]], [[Y1]]
> -; CHECK-NEXT:    [[Y2Y2:%.*]] = mul i8 [[Y2]], [[Y2]]
> -; CHECK-NEXT:    [[TMP1:%.*]] = add i8 [[X0X0]], [[X3X3]]
> -; CHECK-NEXT:    [[TMP2:%.*]] = add i8 [[Y1Y1]], [[Y2Y2]]
> -; CHECK-NEXT:    [[TMP3:%.*]] = sdiv i8 [[TMP1]], [[TMP2]]
> -; CHECK-NEXT:    ret i8 [[TMP3]]
> +; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8>
> [[Y:%.*]], <2 x i32> <i32 0, i32 5>
> +; CHECK-NEXT:    [[TMP2:%.*]] = mul <2 x i8> [[TMP1]], [[TMP1]]
> +; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x i8> [[X]], <4 x i8> [[Y]],
> <2 x i32> <i32 3, i32 6>
> +; CHECK-NEXT:    [[TMP4:%.*]] = mul <2 x i8> [[TMP3]], [[TMP3]]
> +; CHECK-NEXT:    [[TMP5:%.*]] = add <2 x i8> [[TMP2]], [[TMP4]]
> +; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x i8> [[TMP5]], i32 0
> +; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x i8> [[TMP5]], i32 1
> +; CHECK-NEXT:    [[TMP8:%.*]] = sdiv i8 [[TMP6]], [[TMP7]]
> +; CHECK-NEXT:    ret i8 [[TMP8]]
>  ;
>    %x0 = extractelement <4 x i8> %x, i32 0
>    %x3 = extractelement <4 x i8> %x, i32 3 @@ -139,18 +111,15 @@ define i8
> @j(<4 x i8> %x, <4 x i8> %y) {
> 
>  define i8 @k(<4 x i8> %x) {
>  ; CHECK-LABEL: @k(
> -; CHECK-NEXT:    [[X0:%.*]] = extractelement <4 x i8> [[X:%.*]], i32 0
> -; CHECK-NEXT:    [[X3:%.*]] = extractelement <4 x i8> [[X]], i32 3
> -; CHECK-NEXT:    [[X1:%.*]] = extractelement <4 x i8> [[X]], i32 1
> -; CHECK-NEXT:    [[X2:%.*]] = extractelement <4 x i8> [[X]], i32 2
> -; CHECK-NEXT:    [[X0X0:%.*]] = mul i8 [[X0]], [[X0]]
> -; CHECK-NEXT:    [[X3X3:%.*]] = mul i8 [[X3]], [[X3]]
> -; CHECK-NEXT:    [[X1X1:%.*]] = mul i8 [[X1]], [[X1]]
> -; CHECK-NEXT:    [[X2X2:%.*]] = mul i8 [[X2]], [[X2]]
> -; CHECK-NEXT:    [[TMP1:%.*]] = add i8 [[X0X0]], [[X3X3]]
> -; CHECK-NEXT:    [[TMP2:%.*]] = add i8 [[X1X1]], [[X2X2]]
> -; CHECK-NEXT:    [[TMP3:%.*]] = sdiv i8 [[TMP1]], [[TMP2]]
> -; CHECK-NEXT:    ret i8 [[TMP3]]
> +; CHECK-NEXT:    [[TMP1:%.*]] = mul <4 x i8> [[X:%.*]], [[X]]
> +; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8>
> undef, <2 x i32> <i32 0, i32 1>
> +; CHECK-NEXT:    [[TMP3:%.*]] = mul <4 x i8> [[X]], [[X]]
> +; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8>
> undef, <2 x i32> <i32 3, i32 2>
> +; CHECK-NEXT:    [[TMP5:%.*]] = add <2 x i8> [[TMP2]], [[TMP4]]
> +; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x i8> [[TMP5]], i32 0
> +; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x i8> [[TMP5]], i32 1
> +; CHECK-NEXT:    [[TMP8:%.*]] = sdiv i8 [[TMP6]], [[TMP7]]
> +; CHECK-NEXT:    ret i8 [[TMP8]]
>  ;
>    %x0 = extractelement <4 x i8> %x, i32 0
>    %x3 = extractelement <4 x i8> %x, i32 3
> 
> 
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits


More information about the llvm-commits mailing list