[llvm] r215737 - [x86] Teach the new AVX v4f64 shuffle lowering to use UNPCK instructions

Fri Aug 15 11:15:43 PDT 2014

I fixed the MSVC 2013 build for this in r215744.  It gives this cool
diagnostic:

..\lib\Target\X86\X86ISelLowering.cpp(7085) : error C2971:
'llvm::VariadicFunction1' : template parameter 'Func' :
'isShuffleEquivalentImpl' : a local variable cannot be used as a non-type
argument
        ..\include\llvm/ADT/VariadicFunction.h(153) : see declaration of
'llvm::VariadicFunction1'
        ..\lib\Target\X86\X86ISelLowering.cpp(7061) : see declaration of
'isShuffleEquivalentImpl'

Obviously, isShuffleEquivalentImpl is not a local variable. Switching to an
anonymous namespace over a static function makes the problem go away. Maybe
what it's trying to say is that if you instantiate templates with static
things you'll get template mangling collisions, because they don't
internalize template instantiations that use internal decls.

On Fri, Aug 15, 2014 at 10:42 AM, Chandler Carruth <chandlerc at gmail.com>
wrote:

> Author: chandlerc
> Date: Fri Aug 15 12:42:00 2014
> New Revision: 215737
>
> URL: http://llvm.org/viewvc/llvm-project?rev=215737&view=rev
> Log:
> [x86] Teach the new AVX v4f64 shuffle lowering to use UNPCK instructions
> where applicable for blending.
>
> Modified:
>     llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
>     llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll
>
> Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=215737&r1=215736&r2=215737&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
> +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Fri Aug 15 12:42:00 2014
> @@ -7055,6 +7055,35 @@ static bool isSingleInputShuffleMask(Arr
>    return true;
>  }
>
> +/// \brief Implementation of the \c isShuffleEquivalent variadic functor.
> +///
> +/// See its documentation for details.
> +static bool isShuffleEquivalentImpl(ArrayRef<int> Mask,
> +                                    ArrayRef<const int *> Args) {
> +  if (Mask.size() != Args.size())
> +    return false;
> +  for (int i = 0, e = Mask.size(); i < e; ++i) {
> +    assert(*Args[i] >= 0 && "Arguments must be positive integers!");
> +    assert(*Args[i] < (int)Args.size() * 2 &&
> +           "Argument outside the range of possible shuffle inputs!");
> +    if (Mask[i] != -1 && Mask[i] != *Args[i])
> +      return false;
> +  }
> +  return true;
> +}
> +/// \brief Checks whether a shuffle mask is equivalent to an explicit
> list of
> +/// arguments.
> +///
> +/// This is a fast way to test a shuffle mask against a fixed pattern:
> +///
> +///   if (isShuffleEquivalent(Mask, 3, 2, 1, 0)) { ... }
> +///
> +/// It returns true if the mask is exactly as wide as the argument list,
> and
> +/// each element of the mask is either -1 (signifying undef) or the value
> given
> +/// in the argument.
> +static const VariadicFunction1<
> +    bool, ArrayRef<int>, int, isShuffleEquivalentImpl>
> isShuffleEquivalent = {};
> +
>  /// \brief Get a 4-lane 8-bit shuffle immediate for a mask.
>  ///
>  /// This helper function produces an 8-bit shuffle immediate
> corresponding to
> @@ -8440,6 +8469,19 @@ static SDValue lowerV4F64VectorShuffle(S
>                         DAG.getConstant(VPERMILPMask, MVT::i8));
>    }
>
> +  // X86 has dedicated unpack instructions that can handle specific blend
> +  // operations: UNPCKH and UNPCKL.
> +  if (isShuffleEquivalent(Mask, 0, 4, 2, 6))
> +    return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4f64, V1, V2);
> +  if (isShuffleEquivalent(Mask, 1, 5, 3, 7))
> +    return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4f64, V1, V2);
> +  // FIXME: It would be nice to find a way to get canonicalization to
> commute
> +  // these patterns.
> +  if (isShuffleEquivalent(Mask, 4, 0, 6, 2))
> +    return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4f64, V2, V1);
> +  if (isShuffleEquivalent(Mask, 5, 1, 7, 3))
> +    return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4f64, V2, V1);
> +
>    // Check if the blend happens to exactly fit that of SHUFPD.
>    if (Mask[0] < 4 && (Mask[1] == -1 || Mask[1] >= 4) &&
>        Mask[2] < 4 && (Mask[3] == -1 || Mask[3] >= 4)) {
>
> Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll?rev=215737&r1=215736&r2=215737&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll Fri Aug 15
> 12:42:00 2014
> @@ -239,11 +239,35 @@ define <4 x double> @shuffle_v4f64_0462(
>  define <4 x double> @shuffle_v4f64_0426(<4 x double> %a, <4 x double> %b)
> {
>  ; AVX1-LABEL: @shuffle_v4f64_0426
>  ; AVX1:       # BB#0:
> -; AVX1-NEXT:    vshufpd {{.*}} # ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
> +; AVX1-NEXT:    vunpcklpd {{.*}} # ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
>  ; AVX1-NEXT:    retq
>    %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32>
> <i32 0, i32 4, i32 2, i32 6>
>    ret <4 x double> %shuffle
>  }
> +define <4 x double> @shuffle_v4f64_1537(<4 x double> %a, <4 x double> %b)
> {
> +; AVX1-LABEL: @shuffle_v4f64_1537
> +; AVX1:       # BB#0:
> +; AVX1-NEXT:    vunpckhpd {{.*}} # ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
> +; AVX1-NEXT:    retq
> +  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32>
> <i32 1, i32 5, i32 3, i32 7>
> +  ret <4 x double> %shuffle
> +}
> +define <4 x double> @shuffle_v4f64_4062(<4 x double> %a, <4 x double> %b)
> {
> +; AVX1-LABEL: @shuffle_v4f64_4062
> +; AVX1:       # BB#0:
> +; AVX1-NEXT:    vunpcklpd {{.*}} # ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
> +; AVX1-NEXT:    retq
> +  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32>
> <i32 4, i32 0, i32 6, i32 2>
> +  ret <4 x double> %shuffle
> +}
> +define <4 x double> @shuffle_v4f64_5173(<4 x double> %a, <4 x double> %b)
> {
> +; AVX1-LABEL: @shuffle_v4f64_5173
> +; AVX1:       # BB#0:
> +; AVX1-NEXT:    vunpckhpd {{.*}} # ymm0 = ymm1[1],ymm0[1],ymm1[3],ymm0[3]
> +; AVX1-NEXT:    retq
> +  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32>
> <i32 5, i32 1, i32 7, i32 3>
> +  ret <4 x double> %shuffle
> +}
>  define <4 x double> @shuffle_v4f64_5163(<4 x double> %a, <4 x double> %b)
> {
>  ; AVX1-LABEL: @shuffle_v4f64_5163
>  ; AVX1:       # BB#0:
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20140815/78d88116/attachment.html>