<div dir="ltr">On Wed, Jul 23, 2014 at 1:41 PM, Jim Grosbach <span dir="ltr"><<a href="mailto:grosbach@apple.com" target="_blank">grosbach@apple.com</a>></span> wrote:<br><div class="gmail_extra"><div class="gmail_quote">

<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Author: grosbach<br>

Date: Wed Jul 23 15:41:38 2014<br>

New Revision: 213799<br>

<br>

URL: <a href="http://llvm.org/viewvc/llvm-project?rev=213799&view=rev" target="_blank">http://llvm.org/viewvc/llvm-project?rev=213799&view=rev</a><br>

Log:<br>

X86: restrict combine to when type sizes are safe.<br>

<br>

The folding of unary operations through a vector compare and mask operation<br>

is only safe if the unary operation result is of the same size as its input.<br>

For example, it's not safe for [su]itofp from v4i32 to v4f64.<br>

<br>

Modified:<br>

    llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp<br>

    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp<br>

    llvm/trunk/test/CodeGen/AArch64/arm64-setcc-int-to-fp-combine.ll<br>

    llvm/trunk/test/CodeGen/X86/x86-setcc-int-to-fp-combine.ll<br></blockquote><div><br></div><div>Should this get pulled into the 3.5 branch?</div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">


Modified: llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp<br>

URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp?rev=213799&r1=213798&r2=213799&view=diff" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp?rev=213799&r1=213798&r2=213799&view=diff</a><br>


==============================================================================<br>

--- llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp (original)<br>

+++ llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp Wed Jul 23 15:41:38 2014<br>

@@ -6492,11 +6492,13 @@ static SDValue performVectorCompareAndMa<br>

   //       AND(VECTOR_CMP(x,y), constant2)<br>

   //    constant2 = UNARYOP(constant)<br>

<br>

-  // Early exit if this isn't a vector operation or if the operand of the<br>

-  // unary operation isn't a bitwise AND.<br>

+  // Early exit if this isn't a vector operation, the operand of the<br>

+  // unary operation isn't a bitwise AND, or if the sizes of the operations<br>

+  // aren't the same.<br>

   EVT VT = N->getValueType(0);<br>

   if (!VT.isVector() || N->getOperand(0)->getOpcode() != ISD::AND ||<br>

-      N->getOperand(0)->getOperand(0)->getOpcode() != ISD::SETCC)<br>

+      N->getOperand(0)->getOperand(0)->getOpcode() != ISD::SETCC ||<br>

+      VT.getSizeInBits() != N->getOperand(0)->getValueType(0).getSizeInBits())<br>

     return SDValue();<br>

<br>

   // Now check that the other operand of the AND is a constant splat. We could<br>

<br>

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp<br>

URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=213799&r1=213798&r2=213799&view=diff" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=213799&r1=213798&r2=213799&view=diff</a><br>


==============================================================================<br>

--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)<br>

+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Wed Jul 23 15:41:38 2014<br>

@@ -21806,11 +21806,13 @@ static SDValue performVectorCompareAndMa<br>

   //       AND(VECTOR_CMP(x,y), constant2)<br>

   //    constant2 = UNARYOP(constant)<br>

<br>

-  // Early exit if this isn't a vector operation or if the operand of the<br>

-  // unary operation isn't a bitwise AND.<br>

+  // Early exit if this isn't a vector operation, the operand of the<br>

+  // unary operation isn't a bitwise AND, or if the sizes of the operations<br>

+  // aren't the same.<br>

   EVT VT = N->getValueType(0);<br>

   if (!VT.isVector() || N->getOperand(0)->getOpcode() != ISD::AND ||<br>

-      N->getOperand(0)->getOperand(0)->getOpcode() != ISD::SETCC)<br>

+      N->getOperand(0)->getOperand(0)->getOpcode() != ISD::SETCC ||<br>

+      VT.getSizeInBits() != N->getOperand(0)->getValueType(0).getSizeInBits())<br>

     return SDValue();<br>

<br>

   // Now check that the other operand of the AND is a constant splat. We could<br>

<br>

Modified: llvm/trunk/test/CodeGen/AArch64/arm64-setcc-int-to-fp-combine.ll<br>

URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-setcc-int-to-fp-combine.ll?rev=213799&r1=213798&r2=213799&view=diff" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-setcc-int-to-fp-combine.ll?rev=213799&r1=213798&r2=213799&view=diff</a><br>


==============================================================================<br>

--- llvm/trunk/test/CodeGen/AArch64/arm64-setcc-int-to-fp-combine.ll (original)<br>

+++ llvm/trunk/test/CodeGen/AArch64/arm64-setcc-int-to-fp-combine.ll Wed Jul 23 15:41:38 2014<br>

@@ -11,3 +11,17 @@ define <4 x float> @foo(<4 x float> %val<br>

   %result = sitofp <4 x i32> %ext to <4 x float><br>

   ret <4 x float> %result<br>

 }<br>

+; Make sure the operation doesn't try to get folded when the sizes don't match,<br>

+; as that ends up crashing later when trying to form a bitcast operation for<br>

+; the folded nodes.<br>

+define void @foo1(<4 x float> %val, <4 x float> %test, <4 x double>* %p) nounwind {<br>

+; CHECK-LABEL: foo1:<br>

+; CHECK: movi.4s<br>

+; CHECK: scvtf.2d<br>

+; CHECK: scvtf.2d<br>

+  %cmp = fcmp oeq <4 x float> %val, %test<br>

+  %ext = zext <4 x i1> %cmp to <4 x i32><br>

+  %result = sitofp <4 x i32> %ext to <4 x double><br>

+  store <4 x double> %result, <4 x double>* %p<br>

+  ret void<br>

+}<br>

<br>

Modified: llvm/trunk/test/CodeGen/X86/x86-setcc-int-to-fp-combine.ll<br>

URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/x86-setcc-int-to-fp-combine.ll?rev=213799&r1=213798&r2=213799&view=diff" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/x86-setcc-int-to-fp-combine.ll?rev=213799&r1=213798&r2=213799&view=diff</a><br>


==============================================================================<br>

--- llvm/trunk/test/CodeGen/X86/x86-setcc-int-to-fp-combine.ll (original)<br>

+++ llvm/trunk/test/CodeGen/X86/x86-setcc-int-to-fp-combine.ll Wed Jul 23 15:41:38 2014<br>

@@ -17,14 +17,38 @@ define <4 x float> @foo(<4 x float> %val<br>

   ret <4 x float> %result<br>

 }<br>

<br>

-define void @bar(<4 x float>* noalias %result) nounwind {<br>

+; Make sure the operation doesn't try to get folded when the sizes don't match,<br>

+; as that ends up crashing later when trying to form a bitcast operation for<br>

+; the folded nodes.<br>

+define void @foo1(<4 x float> %val, <4 x float> %test, <4 x double>* %p) nounwind {<br>

 ; CHECK-LABEL: LCPI1_0:<br>

+; CHECK-NEXT: .long 1                       ## 0x1<br>

+; CHECK-NEXT: .long 1                       ## 0x1<br>

+; CHECK-NEXT: .long 1                       ## 0x1<br>

+; CHECK-NEXT: .long 1                       ## 0x1<br>

+; CHECK-LABEL: foo1:<br>

+;   FIXME: The operation gets scalarized. If/when the compiler learns to better<br>

+;          use [V]CVTDQ2PD, this will need updated.<br>

+; CHECK: cvtsi2sdq<br>

+; CHECK: cvtsi2sdq<br>

+; CHECK: cvtsi2sdq<br>

+; CHECK: cvtsi2sdq<br>

+  %cmp = fcmp oeq <4 x float> %val, %test<br>

+  %ext = zext <4 x i1> %cmp to <4 x i32><br>

+  %result = sitofp <4 x i32> %ext to <4 x double><br>

+  store <4 x double> %result, <4 x double>* %p<br>

+  ret void<br>

+}<br>

+<br>

+; Also test the general purpose constant folding of int->fp.<br>

+define void @foo2(<4 x float>* noalias %result) nounwind {<br>

+; CHECK-LABEL: LCPI2_0:<br>

 ; CHECK-NEXT: .long 1082130432              ## float 4.000000e+00<br>

 ; CHECK-NEXT: .long 1084227584              ## float 5.000000e+00<br>

 ; CHECK-NEXT: .long 1086324736              ## float 6.000000e+00<br>

 ; CHECK-NEXT: .long 1088421888              ## float 7.000000e+00<br>

-; CHECK-LABEL: bar:<br>

-; CHECK:  movaps LCPI1_0(%rip), %xmm0<br>

+; CHECK-LABEL: foo2:<br>

+; CHECK:  movaps LCPI2_0(%rip), %xmm0<br>

<br>

   %val = uitofp <4 x i32> <i32 4, i32 5, i32 6, i32 7> to <4 x float><br>

   store <4 x float> %val, <4 x float>* %result<br>

<br>

<br>

_______________________________________________<br>

llvm-commits mailing list<br>

<a href="mailto:llvm-commits@cs.uiuc.edu">llvm-commits@cs.uiuc.edu</a><br>

<a href="http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits" target="_blank">http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits</a><br>

</blockquote></div><br><br clear="all"><div><br></div>-- <br>Saleem Abdulrasool<br>compnerd (at) compnerd (dot) org

</div></div>