<div dir="ltr">The fix caused <a href="http://llvm.org/bugs/show_bug.cgi?id=20087">http://llvm.org/bugs/show_bug.cgi?id=20087</a><div class="gmail_extra"><br clear="all"><div>Thanks,<br>--Serge<br></div>
<br><br><div class="gmail_quote">2014-06-07 1:07 GMT+07:00 Filipe Cabecinhas <span dir="ltr"><<a href="mailto:me@filcab.net" target="_blank">me@filcab.net</a>></span>:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
Author: filcab<br>
Date: Fri Jun 6 13:07:06 2014<br>
New Revision: 210361<br>
<br>
URL: <a href="http://llvm.org/viewvc/llvm-project?rev=210361&view=rev" target="_blank">http://llvm.org/viewvc/llvm-project?rev=210361&view=rev</a><br>
Log:<br>
Fixed a bug in lowering shuffle_vectors to insertps<br>
<br>
Summary:<br>
We were being too strict and not accounting for undefs.<br>
Added a test case and fixed another one where we improved codegen.<br>
<br>
Reviewers: grosbach, nadav, delena<br>
<br>
Subscribers: llvm-commits<br>
<br>
Differential Revision: <a href="http://reviews.llvm.org/D4039" target="_blank">http://reviews.llvm.org/D4039</a><br>
<br>
Modified:<br>
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp<br>
llvm/trunk/test/CodeGen/X86/avx-shuffle.ll<br>
llvm/trunk/test/CodeGen/X86/sse41.ll<br>
<br>
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=210361&r1=210360&r2=210361&view=diff" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=210361&r1=210360&r2=210361&view=diff</a><br>
==============================================================================<br>
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)<br>
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Fri Jun 6 13:07:06 2014<br>
@@ -3964,14 +3964,22 @@ static bool isINSERTPSMask(ArrayRef<int><br>
<br>
unsigned CorrectPosV1 = 0;<br>
unsigned CorrectPosV2 = 0;<br>
- for (int i = 0, e = (int)VT.getVectorNumElements(); i != e; ++i)<br>
+ for (int i = 0, e = (int)VT.getVectorNumElements(); i != e; ++i) {<br>
+ if (Mask[i] == -1) {<br>
+ ++CorrectPosV1;<br>
+ ++CorrectPosV2;<br>
+ continue;<br>
+ }<br>
+<br>
if (Mask[i] == i)<br>
++CorrectPosV1;<br>
else if (Mask[i] == i + 4)<br>
++CorrectPosV2;<br>
+ }<br>
<br>
if (CorrectPosV1 == 3 || CorrectPosV2 == 3)<br>
- // We have 3 elements from one vector, and one from another.<br>
+ // We have 3 elements (undefs count as elements from any vector) from one<br>
+ // vector, and one from another.<br>
return true;<br>
<br>
return false;<br>
@@ -7462,8 +7470,9 @@ static SDValue getINSERTPS(ShuffleVector<br>
assert((VT == MVT::v4f32 || VT == MVT::v4i32) &&<br>
"unsupported vector type for insertps/pinsrd");<br>
<br>
- int FromV1 = std::count_if(Mask.begin(), Mask.end(),<br>
- [](const int &i) { return i < 4; });<br>
+ auto FromV1Predicate = [](const int &i) { return i < 4 && i > -1; };<br>
+ auto FromV2Predicate = [](const int &i) { return i >= 4; };<br>
+ int FromV1 = std::count_if(Mask.begin(), Mask.end(), FromV1Predicate);<br>
<br>
SDValue From;<br>
SDValue To;<br>
@@ -7471,15 +7480,17 @@ static SDValue getINSERTPS(ShuffleVector<br>
if (FromV1 == 1) {<br>
From = V1;<br>
To = V2;<br>
- DestIndex = std::find_if(Mask.begin(), Mask.end(),<br>
- [](const int &i) { return i < 4; }) -<br>
+ DestIndex = std::find_if(Mask.begin(), Mask.end(), FromV1Predicate) -<br>
Mask.begin();<br>
} else {<br>
+ assert(std::count_if(Mask.begin(), Mask.end(), FromV2Predicate) == 1 &&<br>
+ "More than one element from V1 and from V2, or no elements from one "<br>
+ "of the vectors. This case should not have returned true from "<br>
+ "isINSERTPSMask");<br>
From = V2;<br>
To = V1;<br>
- DestIndex = std::find_if(Mask.begin(), Mask.end(),<br>
- [](const int &i) { return i >= 4; }) -<br>
- Mask.begin();<br>
+ DestIndex =<br>
+ std::find_if(Mask.begin(), Mask.end(), FromV2Predicate) - Mask.begin();<br>
}<br>
<br>
if (MayFoldLoad(From)) {<br>
<br>
Modified: llvm/trunk/test/CodeGen/X86/avx-shuffle.ll<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-shuffle.ll?rev=210361&r1=210360&r2=210361&view=diff" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-shuffle.ll?rev=210361&r1=210360&r2=210361&view=diff</a><br>
==============================================================================<br>
--- llvm/trunk/test/CodeGen/X86/avx-shuffle.ll (original)<br>
+++ llvm/trunk/test/CodeGen/X86/avx-shuffle.ll Fri Jun 6 13:07:06 2014<br>
@@ -5,8 +5,10 @@ define <4 x float> @test1(<4 x float> %a<br>
%b = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 2, i32 5, i32 undef, i32 undef><br>
ret <4 x float> %b<br>
; CHECK-LABEL: test1:<br>
-; CHECK: vshufps<br>
-; CHECK: vpshufd<br>
+;; TODO: This test could be improved by removing the xor instruction and<br>
+;; having vinsertps zero out the needed elements.<br>
+; CHECK: vxorps<br>
+; CHECK: vinsertps<br>
}<br>
<br>
; rdar://10538417<br>
<br>
Modified: llvm/trunk/test/CodeGen/X86/sse41.ll<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse41.ll?rev=210361&r1=210360&r2=210361&view=diff" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse41.ll?rev=210361&r1=210360&r2=210361&view=diff</a><br>
==============================================================================<br>
--- llvm/trunk/test/CodeGen/X86/sse41.ll (original)<br>
+++ llvm/trunk/test/CodeGen/X86/sse41.ll Fri Jun 6 13:07:06 2014<br>
@@ -692,3 +692,14 @@ define <4 x float> @insertps_from_broadc<br>
%13 = fadd <4 x float> %11, %12<br>
ret <4 x float> %13<br>
}<br>
+<br>
+define <4 x float> @insertps_with_undefs(<4 x float> %a, float* %b) {<br>
+; CHECK-LABEL: insertps_with_undefs:<br>
+; CHECK-NOT: shufps<br>
+; CHECK: insertps $32, %xmm0<br>
+; CHECK: ret<br>
+ %1 = load float* %b, align 4<br>
+ %2 = insertelement <4 x float> undef, float %1, i32 0<br>
+ %result = shufflevector <4 x float> %a, <4 x float> %2, <4 x i32> <i32 4, i32 undef, i32 0, i32 7><br>
+ ret <4 x float> %result<br>
+}<br>
<br>
<br>
_______________________________________________<br>
llvm-commits mailing list<br>
<a href="mailto:llvm-commits@cs.uiuc.edu">llvm-commits@cs.uiuc.edu</a><br>
<a href="http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits" target="_blank">http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits</a><br>
</blockquote></div><br></div></div>