[llvm-commits] [llvm] r68374 - in /llvm/branches/Apple/Dib: lib/Target/X86/X86ISelLowering.cpp test/CodeGen/X86/vec_i64.ll

Fri Apr 3 00:43:50 PDT 2009

Author: void
Date: Fri Apr  3 02:43:50 2009
New Revision: 68374

URL: http://llvm.org/viewvc/llvm-project?rev=68374&view=rev
Log:

--- Merging (from foreign repository) r68368 into '.':
A    test/CodeGen/X86/vec_i64.ll
U    lib/Target/X86/X86ISelLowering.cpp

Added a x86 dag combine to increase the chances to use a
movq for v2i64 on x86-32.

Added:
    llvm/branches/Apple/Dib/test/CodeGen/X86/vec_i64.ll
Modified:
    llvm/branches/Apple/Dib/lib/Target/X86/X86ISelLowering.cpp

Modified: llvm/branches/Apple/Dib/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Dib/lib/Target/X86/X86ISelLowering.cpp?rev=68374&r1=68373&r2=68374&view=diff

==============================================================================
--- llvm/branches/Apple/Dib/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/branches/Apple/Dib/lib/Target/X86/X86ISelLowering.cpp Fri Apr  3 02:43:50 2009
@@ -8056,15 +8056,43 @@
 /// PerformShuffleCombine - Combine a vector_shuffle that is equal to
 /// build_vector load1, load2, load3, load4, <0, 1, 2, 3> into a 128-bit load
 /// if the load addresses are consecutive, non-overlapping, and in the right
-/// order.
+/// order.  In the case of v2i64, it will see if it can rewrite the
+/// shuffle to be an appropriate build vector so it can take advantage of
+// performBuildVectorCombine.
 static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
                                        const TargetLowering &TLI) {
-  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
   DebugLoc dl = N->getDebugLoc();
   MVT VT = N->getValueType(0);
   MVT EVT = VT.getVectorElementType();
   SDValue PermMask = N->getOperand(2);
   unsigned NumElems = PermMask.getNumOperands();
+
+  // For x86-32 machines, if we see an insert and then a shuffle in a v2i64
+  // where the upper half is 0, it is advantageous to rewrite it as a build
+  // vector of (0, val) so it can use movq.
+  if (VT == MVT::v2i64) {
+    SDValue In[2];
+    In[0] = N->getOperand(0);
+    In[1] = N->getOperand(1);
+    unsigned Idx0 =cast<ConstantSDNode>(PermMask.getOperand(0))->getZExtValue();
+    unsigned Idx1 =cast<ConstantSDNode>(PermMask.getOperand(1))->getZExtValue();
+    if (In[0].getValueType().getVectorNumElements() == NumElems &&
+        In[Idx0/2].getOpcode() == ISD::INSERT_VECTOR_ELT &&
+        In[Idx1/2].getOpcode() == ISD::BUILD_VECTOR) {
+      ConstantSDNode* InsertVecIdx =
+                             dyn_cast<ConstantSDNode>(In[Idx0/2].getOperand(2));
+      if (InsertVecIdx &&
+          InsertVecIdx->getZExtValue() == (Idx0 % 2) &&
+          isZeroNode(In[Idx1/2].getOperand(Idx1 % 2))) {
+        return DAG.getNode(ISD::BUILD_VECTOR, dl, VT,
+                           In[Idx0/2].getOperand(1),
+                           In[Idx1/2].getOperand(Idx1 % 2));
+      }
+    }
+  }
+
+  // Try to combine a vector_shuffle into a 128-bit load.
+  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
   SDNode *Base = NULL;
   if (!EltsFromConsecutiveLoads(N, PermMask, NumElems, EVT, Base,
                                 DAG, MFI, TLI))

Added: llvm/branches/Apple/Dib/test/CodeGen/X86/vec_i64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Dib/test/CodeGen/X86/vec_i64.ll?rev=68374&view=auto

==============================================================================
--- llvm/branches/Apple/Dib/test/CodeGen/X86/vec_i64.ll (added)
+++ llvm/branches/Apple/Dib/test/CodeGen/X86/vec_i64.ll Fri Apr  3 02:43:50 2009
@@ -0,0 +1,22 @@
+; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -o %t -f
+; RUN: grep movq %t | count 2
+
+; Used movq to load i64 into a v2i64 when the top i64 is 0.
+
+define <2 x i64> @foo1(i64* %y) nounwind  {
+entry:
+	%tmp1 = load i64* %y, align 8		; <i64> [#uses=1]
+	%s2v = insertelement <2 x i64> undef, i64 %tmp1, i32 0
+	%loadl = shufflevector <2 x i64> zeroinitializer, <2 x i64> %s2v, <2 x i32> <i32 2, i32 1>
+	ret <2 x i64> %loadl
+}
+
+
+define <4 x float> @foo2(i64* %p) nounwind {
+entry:
+	%load = load i64* %p
+	%s2v = insertelement <2 x i64> undef, i64 %load, i32 0
+	%loadl = shufflevector <2 x i64> zeroinitializer, <2 x i64> %s2v, <2 x i32> <i32 2, i32 1>
+	%0 = bitcast <2 x i64> %loadl to <4 x float>
+	ret <4 x float> %0
+}