[llvm] r210361 - Fixed a bug in lowering shuffle_vectors to insertps
Serge Pavlov
sepavloff at gmail.com
Sat Jun 21 08:28:23 PDT 2014
The fix caused http://llvm.org/bugs/show_bug.cgi?id=20087
Thanks,
--Serge
2014-06-07 1:07 GMT+07:00 Filipe Cabecinhas <me at filcab.net>:
> Author: filcab
> Date: Fri Jun 6 13:07:06 2014
> New Revision: 210361
>
> URL: http://llvm.org/viewvc/llvm-project?rev=210361&view=rev
> Log:
> Fixed a bug in lowering shuffle_vectors to insertps
>
> Summary:
> We were being too strict and not accounting for undefs.
> Added a test case and fixed another one where we improved codegen.
>
> Reviewers: grosbach, nadav, delena
>
> Subscribers: llvm-commits
>
> Differential Revision: http://reviews.llvm.org/D4039
>
> Modified:
> llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
> llvm/trunk/test/CodeGen/X86/avx-shuffle.ll
> llvm/trunk/test/CodeGen/X86/sse41.ll
>
> Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=210361&r1=210360&r2=210361&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
> +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Fri Jun 6 13:07:06 2014
> @@ -3964,14 +3964,22 @@ static bool isINSERTPSMask(ArrayRef<int>
>
> unsigned CorrectPosV1 = 0;
> unsigned CorrectPosV2 = 0;
> - for (int i = 0, e = (int)VT.getVectorNumElements(); i != e; ++i)
> + for (int i = 0, e = (int)VT.getVectorNumElements(); i != e; ++i) {
> + if (Mask[i] == -1) {
> + ++CorrectPosV1;
> + ++CorrectPosV2;
> + continue;
> + }
> +
> if (Mask[i] == i)
> ++CorrectPosV1;
> else if (Mask[i] == i + 4)
> ++CorrectPosV2;
> + }
>
> if (CorrectPosV1 == 3 || CorrectPosV2 == 3)
> - // We have 3 elements from one vector, and one from another.
> + // We have 3 elements (undefs count as elements from any vector) from
> one
> + // vector, and one from another.
> return true;
>
> return false;
> @@ -7462,8 +7470,9 @@ static SDValue getINSERTPS(ShuffleVector
> assert((VT == MVT::v4f32 || VT == MVT::v4i32) &&
> "unsupported vector type for insertps/pinsrd");
>
> - int FromV1 = std::count_if(Mask.begin(), Mask.end(),
> - [](const int &i) { return i < 4; });
> + auto FromV1Predicate = [](const int &i) { return i < 4 && i > -1; };
> + auto FromV2Predicate = [](const int &i) { return i >= 4; };
> + int FromV1 = std::count_if(Mask.begin(), Mask.end(), FromV1Predicate);
>
> SDValue From;
> SDValue To;
> @@ -7471,15 +7480,17 @@ static SDValue getINSERTPS(ShuffleVector
> if (FromV1 == 1) {
> From = V1;
> To = V2;
> - DestIndex = std::find_if(Mask.begin(), Mask.end(),
> - [](const int &i) { return i < 4; }) -
> + DestIndex = std::find_if(Mask.begin(), Mask.end(), FromV1Predicate) -
> Mask.begin();
> } else {
> + assert(std::count_if(Mask.begin(), Mask.end(), FromV2Predicate) == 1
> &&
> + "More than one element from V1 and from V2, or no elements
> from one "
> + "of the vectors. This case should not have returned true from "
> + "isINSERTPSMask");
> From = V2;
> To = V1;
> - DestIndex = std::find_if(Mask.begin(), Mask.end(),
> - [](const int &i) { return i >= 4; }) -
> - Mask.begin();
> + DestIndex =
> + std::find_if(Mask.begin(), Mask.end(), FromV2Predicate) -
> Mask.begin();
> }
>
> if (MayFoldLoad(From)) {
>
> Modified: llvm/trunk/test/CodeGen/X86/avx-shuffle.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-shuffle.ll?rev=210361&r1=210360&r2=210361&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/avx-shuffle.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/avx-shuffle.ll Fri Jun 6 13:07:06 2014
> @@ -5,8 +5,10 @@ define <4 x float> @test1(<4 x float> %a
> %b = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x
> i32> <i32 2, i32 5, i32 undef, i32 undef>
> ret <4 x float> %b
> ; CHECK-LABEL: test1:
> -; CHECK: vshufps
> -; CHECK: vpshufd
> +;; TODO: This test could be improved by removing the xor instruction and
> +;; having vinsertps zero out the needed elements.
> +; CHECK: vxorps
> +; CHECK: vinsertps
> }
>
> ; rdar://10538417
>
> Modified: llvm/trunk/test/CodeGen/X86/sse41.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse41.ll?rev=210361&r1=210360&r2=210361&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/sse41.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/sse41.ll Fri Jun 6 13:07:06 2014
> @@ -692,3 +692,14 @@ define <4 x float> @insertps_from_broadc
> %13 = fadd <4 x float> %11, %12
> ret <4 x float> %13
> }
> +
> +define <4 x float> @insertps_with_undefs(<4 x float> %a, float* %b) {
> +; CHECK-LABEL: insertps_with_undefs:
> +; CHECK-NOT: shufps
> +; CHECK: insertps $32, %xmm0
> +; CHECK: ret
> + %1 = load float* %b, align 4
> + %2 = insertelement <4 x float> undef, float %1, i32 0
> + %result = shufflevector <4 x float> %a, <4 x float> %2, <4 x i32> <i32
> 4, i32 undef, i32 0, i32 7>
> + ret <4 x float> %result
> +}
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20140621/d1d79193/attachment.html>
More information about the llvm-commits
mailing list