[llvm-commits] [llvm] r115977 - in /llvm/trunk: lib/Target/X86/X86ISelLowering.cpp lib/Target/X86/X86InstrSSE.td test/CodeGen/X86/sse3.ll

Thu Oct 7 13:50:20 PDT 2010

Author: evancheng
Date: Thu Oct  7 15:50:20 2010
New Revision: 115977

URL: http://llvm.org/viewvc/llvm-project?rev=115977&view=rev
Log:
Canonicalize X86ISD::MOVDDUP nodes to v2f64 to make sure all cases match. Also eliminate unneeded isel patterns. rdar://8520311

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/lib/Target/X86/X86InstrSSE.td
    llvm/trunk/test/CodeGen/X86/sse3.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=115977&r1=115976&r2=115977&view=diff
==============================================================================

--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Thu Oct  7 15:50:20 2010
@@ -5083,6 +5083,7 @@
 // uses while it only has one, use this version, and let isel match
 // another instruction if the load really happens to have more than
 // one use. Remove this version after this bug get fixed.
+// rdar://8434668, PR8156
 static bool RelaxedMayFoldVectorLoad(SDValue V) {
   if (V.hasOneUse() && V.getOpcode() == ISD::BIT_CONVERT)
     V = V.getOperand(0);
@@ -5170,6 +5171,17 @@
 }
 
 static
+SDValue getMOVDDup(SDValue &Op, DebugLoc &dl, SDValue V1, SelectionDAG &DAG) {
+  EVT VT = Op.getValueType();
+
+  // Canonizalize to v2f64.
+  V1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, V1);
+  return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
+                     getTargetShuffleNode(X86ISD::MOVDDUP, dl, MVT::v2f64,
+                                          V1, DAG));
+}
+
+static
 SDValue getMOVLowToHigh(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG,
                         bool HasSSE2) {
   SDValue V1 = Op.getOperand(0);
@@ -5309,7 +5321,7 @@
     if (VT.getVectorNumElements() <= 4)
       return SDValue();
 
-    // Canonize all of the remaining to v4f32.
+    // Canonicalize all of the remaining to v4f32.
     return PromoteSplat(SVOp, DAG);
   }
 
@@ -5394,7 +5406,7 @@
 
   if (X86::isMOVDDUPMask(SVOp) && HasSSE3 && V2IsUndef &&
       RelaxedMayFoldVectorLoad(V1))
-    return getTargetShuffleNode(X86ISD::MOVDDUP, dl, VT, V1, DAG);
+    return getMOVDDup(Op, dl, V1, DAG);
 
   if (X86::isMOVHLPS_v_undef_Mask(SVOp))
     return getMOVHighToLow(Op, dl, DAG);

Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=115977&r1=115976&r2=115977&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Thu Oct  7 15:50:20 2010
@@ -5537,19 +5537,14 @@
 def : Pat<(X86Movddup (memopv2f64 addr:$src)),
           (MOVDDUPrm addr:$src)>;
 
-def : Pat<(X86Movddup (bc_v4f32 (memopv2f64 addr:$src))),
+def : Pat<(X86Movddup (bc_v2f64 (memopv4f32 addr:$src))),
           (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
-def : Pat<(X86Movddup (bc_v4f32 (memopv2f64 addr:$src))),
+def : Pat<(X86Movddup (bc_v2f64 (memopv4f32 addr:$src))),
           (MOVDDUPrm addr:$src)>;
 
-def : Pat<(X86Movddup (memopv2i64 addr:$src)),
+def : Pat<(X86Movddup (bc_v2f64 (memopv2i64 addr:$src))),
           (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
-def : Pat<(X86Movddup (memopv2i64 addr:$src)),
-          (MOVDDUPrm addr:$src)>;
-
-def : Pat<(X86Movddup (bc_v4i32 (memopv2i64 addr:$src))),
-          (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
-def : Pat<(X86Movddup (bc_v4i32 (memopv2i64 addr:$src))),
+def : Pat<(X86Movddup (bc_v2f64 (memopv2i64 addr:$src))),
           (MOVDDUPrm addr:$src)>;
 
 def : Pat<(X86Movddup (v2f64 (scalar_to_vector (loadf64 addr:$src)))),
@@ -5564,6 +5559,7 @@
                            (v2i64 (scalar_to_vector (loadi64 addr:$src))))),
           (MOVDDUPrm addr:$src)>;
 
+
 // Shuffle with UNPCKLPS
 def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))),
           (VUNPCKLPSrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>;
@@ -5675,14 +5671,11 @@
           (MOVLHPSrr VR128:$src1, VR128:$src2)>;
 def : Pat<(v2i64 (X86Movlhps VR128:$src1, VR128:$src2)),
           (MOVLHPSrr (v2i64 VR128:$src1), VR128:$src2)>;
-// FIXME: Instead of X86Movddup, there should be a X86Movlhps here, the problem
+
+// FIXME: Instead of X86Movddup, there should be a X86Unpcklpd here, the problem
 // is during lowering, where it's not possible to recognize the load fold cause
 // it has two uses through a bitcast. One use disappears at isel time and the
 // fold opportunity reappears.
-def : Pat<(v2i64 (X86Movddup VR128:$src)),
-          (MOVLHPSrr VR128:$src, VR128:$src)>;
-def : Pat<(v4f32 (X86Movddup VR128:$src)),
-          (MOVLHPSrr VR128:$src, VR128:$src)>;
 def : Pat<(v2f64 (X86Movddup VR128:$src)),
           (UNPCKLPDrr VR128:$src, VR128:$src)>;
 
@@ -5690,6 +5683,7 @@
 def : Pat<(v2f64 (X86Movlhpd VR128:$src1,
                     (scalar_to_vector (loadf64 addr:$src2)))),
           (MOVHPDrm VR128:$src1, addr:$src2)>;
+
 // FIXME: Instead of X86Unpcklpd, there should be a X86Movlhpd here, the problem
 // is during lowering, where it's not possible to recognize the load fold cause
 // it has two uses through a bitcast. One use disappears at isel time and the

Modified: llvm/trunk/test/CodeGen/X86/sse3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse3.ll?rev=115977&r1=115976&r2=115977&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse3.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse3.ll Thu Oct  7 15:50:20 2010
@@ -169,7 +169,7 @@
         ret void
 ; X64: 	t10:
 ; X64: 		pextrw	$4, %xmm0, %eax
-; X64: 		movlhps	%xmm1, %xmm1
+; X64: 		unpcklpd %xmm1, %xmm1
 ; X64: 		pshuflw	$8, %xmm1, %xmm1
 ; X64: 		pinsrw	$2, %eax, %xmm1
 ; X64: 		pextrw	$6, %xmm0, %eax
@@ -260,3 +260,18 @@
 ; X64: 		pinsrw	$1, %eax, %xmm0
 ; X64: 		ret
 }
+
+; rdar://8520311
+define <4 x i32> @t17() nounwind {
+entry:
+; X64: t17:
+; X64:          movddup (%rax), %xmm0
+  %tmp1 = load <4 x float>* undef, align 16
+  %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
+  %tmp3 = load <4 x float>* undef, align 16
+  %tmp4 = shufflevector <4 x float> %tmp2, <4 x float> undef, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1>
+  %tmp5 = bitcast <4 x float> %tmp3 to <4 x i32>
+  %tmp6 = shufflevector <4 x i32> %tmp5, <4 x i32> undef, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1>
+  %tmp7 = and <4 x i32> %tmp6, <i32 undef, i32 undef, i32 -1, i32 0>
+  ret <4 x i32> %tmp7
+}