[llvm] r210361 - Fixed a bug in lowering shuffle_vectors to insertps

Serge Pavlov sepavloff at gmail.com
Sat Jun 21 08:28:23 PDT 2014


The fix caused http://llvm.org/bugs/show_bug.cgi?id=20087

Thanks,
--Serge


2014-06-07 1:07 GMT+07:00 Filipe Cabecinhas <me at filcab.net>:

> Author: filcab
> Date: Fri Jun  6 13:07:06 2014
> New Revision: 210361
>
> URL: http://llvm.org/viewvc/llvm-project?rev=210361&view=rev
> Log:
> Fixed a bug in lowering shuffle_vectors to insertps
>
> Summary:
> We were being too strict and not accounting for undefs.
> Added a test case and fixed another one where we improved codegen.
>
> Reviewers: grosbach, nadav, delena
>
> Subscribers: llvm-commits
>
> Differential Revision: http://reviews.llvm.org/D4039
>
> Modified:
>     llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
>     llvm/trunk/test/CodeGen/X86/avx-shuffle.ll
>     llvm/trunk/test/CodeGen/X86/sse41.ll
>
> Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=210361&r1=210360&r2=210361&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
> +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Fri Jun  6 13:07:06 2014
> @@ -3964,14 +3964,22 @@ static bool isINSERTPSMask(ArrayRef<int>
>
>    unsigned CorrectPosV1 = 0;
>    unsigned CorrectPosV2 = 0;
> -  for (int i = 0, e = (int)VT.getVectorNumElements(); i != e; ++i)
> +  for (int i = 0, e = (int)VT.getVectorNumElements(); i != e; ++i) {
> +    if (Mask[i] == -1) {
> +      ++CorrectPosV1;
> +      ++CorrectPosV2;
> +      continue;
> +    }
> +
>      if (Mask[i] == i)
>        ++CorrectPosV1;
>      else if (Mask[i] == i + 4)
>        ++CorrectPosV2;
> +  }
>
>    if (CorrectPosV1 == 3 || CorrectPosV2 == 3)
> -    // We have 3 elements from one vector, and one from another.
> +    // We have 3 elements (undefs count as elements from any vector) from
> one
> +    // vector, and one from another.
>      return true;
>
>    return false;
> @@ -7462,8 +7470,9 @@ static SDValue getINSERTPS(ShuffleVector
>    assert((VT == MVT::v4f32 || VT == MVT::v4i32) &&
>           "unsupported vector type for insertps/pinsrd");
>
> -  int FromV1 = std::count_if(Mask.begin(), Mask.end(),
> -                             [](const int &i) { return i < 4; });
> +  auto FromV1Predicate = [](const int &i) { return i < 4 && i > -1; };
> +  auto FromV2Predicate = [](const int &i) { return i >= 4; };
> +  int FromV1 = std::count_if(Mask.begin(), Mask.end(), FromV1Predicate);
>
>    SDValue From;
>    SDValue To;
> @@ -7471,15 +7480,17 @@ static SDValue getINSERTPS(ShuffleVector
>    if (FromV1 == 1) {
>      From = V1;
>      To = V2;
> -    DestIndex = std::find_if(Mask.begin(), Mask.end(),
> -                             [](const int &i) { return i < 4; }) -
> +    DestIndex = std::find_if(Mask.begin(), Mask.end(), FromV1Predicate) -
>                  Mask.begin();
>    } else {
> +    assert(std::count_if(Mask.begin(), Mask.end(), FromV2Predicate) == 1
> &&
> +           "More than one element from V1 and from V2, or no elements
> from one "
> +           "of the vectors. This case should not have returned true from "
> +           "isINSERTPSMask");
>      From = V2;
>      To = V1;
> -    DestIndex = std::find_if(Mask.begin(), Mask.end(),
> -                             [](const int &i) { return i >= 4; }) -
> -                Mask.begin();
> +    DestIndex =
> +        std::find_if(Mask.begin(), Mask.end(), FromV2Predicate) -
> Mask.begin();
>    }
>
>    if (MayFoldLoad(From)) {
>
> Modified: llvm/trunk/test/CodeGen/X86/avx-shuffle.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-shuffle.ll?rev=210361&r1=210360&r2=210361&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/avx-shuffle.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/avx-shuffle.ll Fri Jun  6 13:07:06 2014
> @@ -5,8 +5,10 @@ define <4 x float> @test1(<4 x float> %a
>    %b = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x
> i32> <i32 2, i32 5, i32 undef, i32 undef>
>    ret <4 x float> %b
>  ; CHECK-LABEL: test1:
> -; CHECK: vshufps
> -; CHECK: vpshufd
> +;; TODO: This test could be improved by removing the xor instruction and
> +;; having vinsertps zero out the needed elements.
> +; CHECK: vxorps
> +; CHECK: vinsertps
>  }
>
>  ; rdar://10538417
>
> Modified: llvm/trunk/test/CodeGen/X86/sse41.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse41.ll?rev=210361&r1=210360&r2=210361&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/sse41.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/sse41.ll Fri Jun  6 13:07:06 2014
> @@ -692,3 +692,14 @@ define <4 x float> @insertps_from_broadc
>    %13 = fadd <4 x float> %11, %12
>    ret <4 x float> %13
>  }
> +
> +define <4 x float> @insertps_with_undefs(<4 x float> %a, float* %b) {
> +; CHECK-LABEL: insertps_with_undefs:
> +; CHECK-NOT: shufps
> +; CHECK: insertps    $32, %xmm0
> +; CHECK: ret
> +  %1 = load float* %b, align 4
> +  %2 = insertelement <4 x float> undef, float %1, i32 0
> +  %result = shufflevector <4 x float> %a, <4 x float> %2, <4 x i32> <i32
> 4, i32 undef, i32 0, i32 7>
> +  ret <4 x float> %result
> +}
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20140621/d1d79193/attachment.html>


More information about the llvm-commits mailing list