[llvm] r238918 - AVX-512: VSHUFPD instruction selection - code improvements

Wed Jun 3 18:59:05 PDT 2015

Elena, this isn't an AVX-512 specific change, and isn't just code
improvements.

This refactors logic to be common betwene AVX2 and AVX-512. While that may
be the right call, it isn't clear what the end state looks like here, and
the refactoring isn't an obvious benefit. The code for AVX2 was very
simple, and the new code involves loops and other complexity. Perhaps we
need it, but that isn't clear to me yet.

Notably, the most unclear thing to me about all of this is what the end
state of the AVX-512 code looks like. I really would rather you send a
precommit review for that for discussion rather than refactoring AVX2 code
that we know to be working.

On Wed, Jun 3, 2015 at 4:30 AM Elena Demikhovsky <
elena.demikhovsky at intel.com> wrote:

> Author: delena
> Date: Wed Jun  3 06:21:01 2015
> New Revision: 238918
>
> URL: http://llvm.org/viewvc/llvm-project?rev=238918&view=rev
> Log:
> AVX-512: VSHUFPD instruction selection - code improvements
>
> Modified:
>     llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
>     llvm/trunk/test/CodeGen/X86/avx512-shuffle.ll
>
> Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=238918&r1=238917&r2=238918&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
> +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Wed Jun  3 06:21:01 2015
> @@ -9468,6 +9468,37 @@ static bool isShuffleMaskInputInPlace(in
>    return true;
>  }
>
> +static SDValue lowerVectorShuffleWithSHUFPD(SDLoc DL, MVT VT,
> +                                            ArrayRef<int> Mask, SDValue
> V1,
> +                                            SDValue V2, SelectionDAG
> &DAG) {
> +
> +  // Mask for V8F64: 0/1,  8/9,  2/3,  10/11, 4/5, ..
> +  // Mask for V4F64; 0/1,  4/5,  2/3,  6/7..
> +  assert(VT.getScalarSizeInBits() == 64 && "Unexpected data type for
> VSHUFPD");
> +  int NumElts = VT.getVectorNumElements();
> +  bool ShufpdMask = true;
> +  bool CommutableMask = true;
> +  unsigned Immediate = 0;
> +  for (int i = 0; i < NumElts; ++i) {
> +    if (Mask[i] < 0)
> +      continue;
> +    int Val = (i & 6) + NumElts * (i & 1);
> +    int CommutVal = (i & 0xe) + NumElts * ((i & 1)^1);
> +    if (Mask[i] < Val ||  Mask[i] > Val + 1)
> +      ShufpdMask = false;
> +    if (Mask[i] < CommutVal ||  Mask[i] > CommutVal + 1)
> +      CommutableMask = false;
> +    Immediate |= (Mask[i] % 2) << i;
> +  }
> +  if (ShufpdMask)
> +    return DAG.getNode(X86ISD::SHUFP, DL, VT, V1, V2,
> +                       DAG.getConstant(Immediate, DL, MVT::i8));
> +  if (CommutableMask)
> +    return DAG.getNode(X86ISD::SHUFP, DL, VT, V2, V1,
> +                       DAG.getConstant(Immediate, DL, MVT::i8));
> +  return SDValue();
> +}
> +
>  /// \brief Handle lowering of 4-lane 64-bit floating point shuffles.
>  ///
>  /// Also ends up handling lowering of 4-lane 64-bit integer shuffles when
> AVX2
> @@ -9532,24 +9563,9 @@ static SDValue lowerV4F64VectorShuffle(S
>      return Blend;
>
>    // Check if the blend happens to exactly fit that of SHUFPD.
> -  if ((Mask[0] == -1 || Mask[0] < 2) &&
> -      (Mask[1] == -1 || (Mask[1] >= 4 && Mask[1] < 6)) &&
> -      (Mask[2] == -1 || (Mask[2] >= 2 && Mask[2] < 4)) &&
> -      (Mask[3] == -1 || Mask[3] >= 6)) {
> -    unsigned SHUFPDMask = (Mask[0] == 1) | ((Mask[1] == 5) << 1) |
> -                          ((Mask[2] == 3) << 2) | ((Mask[3] == 7) << 3);
> -    return DAG.getNode(X86ISD::SHUFP, DL, MVT::v4f64, V1, V2,
> -                       DAG.getConstant(SHUFPDMask, DL, MVT::i8));
> -  }
> -  if ((Mask[0] == -1 || (Mask[0] >= 4 && Mask[0] < 6)) &&
> -      (Mask[1] == -1 || Mask[1] < 2) &&
> -      (Mask[2] == -1 || Mask[2] >= 6) &&
> -      (Mask[3] == -1 || (Mask[3] >= 2 && Mask[3] < 4))) {
> -    unsigned SHUFPDMask = (Mask[0] == 5) | ((Mask[1] == 1) << 1) |
> -                          ((Mask[2] == 7) << 2) | ((Mask[3] == 3) << 3);
> -    return DAG.getNode(X86ISD::SHUFP, DL, MVT::v4f64, V2, V1,
> -                       DAG.getConstant(SHUFPDMask, DL, MVT::i8));
> -  }
> +  if (SDValue Op =
> +      lowerVectorShuffleWithSHUFPD(DL, MVT::v4f64, Mask, V1, V2, DAG))
> +    return Op;
>
>    // Try to simplify this by merging 128-bit lanes to enable a lane-based
>    // shuffle. However, if we have AVX2 and either inputs are already in
> place,
> @@ -10156,22 +10172,8 @@ static SDValue lowerV8X64VectorShuffle(S
>    if (SDValue Op = lowerVectorShuffleWithVALIGN(DL, VT, Mask, V1, V2,
> DAG))
>      return Op;
>
> -  // VSHUFPD instruction - mask 0/1, 8/9, 2/3, 10/11, 4/5, 12/13, 6/7,
> 14/15
> -  bool ShufpdMask = true;
> -  unsigned Immediate = 0;
> -  for (int i = 0; i < 8; ++i) {
> -    if (Mask[i] < 0)
> -      continue;
> -    int Val = (i & 6) + 8 * (i & 1);
> -    if (Mask[i] < Val ||  Mask[i] > Val+1) {
> -      ShufpdMask = false;
> -      break;
> -    }
> -    Immediate |= (Mask[i]%2) << i;
> -  }
> -  if (ShufpdMask)
> -    return DAG.getNode(X86ISD::SHUFP, DL, VT, V1, V2,
> -                       DAG.getConstant(Immediate, DL, MVT::i8));
> +  if (SDValue Op = lowerVectorShuffleWithSHUFPD(DL, VT, Mask, V1, V2,
> DAG))
> +    return Op;
>
>    // PERMILPD instruction - mask 0/1, 0/1, 2/3, 2/3, 4/5, 4/5, 6/7, 6/7
>    if (isSingleInputShuffleMask(Mask)) {
>
> Modified: llvm/trunk/test/CodeGen/X86/avx512-shuffle.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-shuffle.ll?rev=238918&r1=238917&r2=238918&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/avx512-shuffle.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/avx512-shuffle.ll Wed Jun  3 06:21:01 2015
> @@ -242,3 +242,11 @@ define <16 x i32> @test31(<16 x i32> %a,
>    %c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 3, i32
> 4, i32 5, i32 undef, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13,
> i32 14, i32 15, i32 16, i32 17, i32 18>
>    ret <16 x i32> %c
>  }
> +
> +; CHECK-LABEL: test32
> +; CHECK: vshufpd $99, %zmm0, %zmm1
> +; CHECK: ret
> +define <8 x double> @test32(<8 x double> %a, <8 x double> %b) nounwind {
> +  %c = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 9,
> i32 1, i32 10, i32 2, i32 undef, i32 5, i32 15, i32 undef>
> +  ret <8 x double> %c
> +}
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20150604/5b0ad7ba/attachment.html>