[llvm-commits] [llvm] r48063 - in /llvm/trunk: lib/Target/X86/README-SSE.txt lib/Target/X86/X86ISelLowering.cpp test/CodeGen/X86/vec_set-A.ll

Sat Mar 8 17:05:04 PST 2008

Author: lattner
Date: Sat Mar  8 19:05:04 2008
New Revision: 48063

URL: http://llvm.org/viewvc/llvm-project?rev=48063&view=rev
Log:
Implement a readme entry, compiling
#include <xmmintrin.h>
__m128i doload64(short x) {return _mm_set_epi16(0,0,0,0,0,0,0,1);}

into:
	movl	$1, %eax
	movd	%eax, %xmm0
	ret

instead of a constant pool load.


Added:
    llvm/trunk/test/CodeGen/X86/vec_set-A.ll
Modified:
    llvm/trunk/lib/Target/X86/README-SSE.txt
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp

Modified: llvm/trunk/lib/Target/X86/README-SSE.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/README-SSE.txt?rev=48063&r1=48062&r2=48063&view=diff

==============================================================================

--- llvm/trunk/lib/Target/X86/README-SSE.txt (original)
+++ llvm/trunk/lib/Target/X86/README-SSE.txt Sat Mar  8 19:05:04 2008
@@ -782,23 +782,3 @@
 
 //===---------------------------------------------------------------------===//
 
-Take the following code:
-#include <xmmintrin.h>
-__m128i doload64(short x) {return _mm_set_epi16(0,0,0,0,0,0,0,1);}
-
-On x86, LLVM generates the following:
-doload64:
-        subl    $28, %esp
-        movl    $0, 4(%esp)
-        movl    $1, (%esp)
-        movq    (%esp), %xmm0
-        addl    $28, %esp
-        ret
-
-LLVM should instead generate something more like the following:
-doload64:
-        movl    $1, %eax
-        movd    %eax, %xmm0
-        ret
-
-//===---------------------------------------------------------------------===//

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=48063&r1=48062&r2=48063&view=diff

==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sat Mar  8 19:05:04 2008
@@ -2888,6 +2888,21 @@
   return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size());
 }
 
+/// getSwapEltZeroMask - Returns a vector_shuffle mask for a shuffle that swaps
+/// element #0 of a vector with the specified index, leaving the rest of the
+/// elements in place.
+static SDOperand getSwapEltZeroMask(unsigned NumElems, unsigned DestElt,
+                                   SelectionDAG &DAG) {
+  MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
+  MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT);
+  SmallVector<SDOperand, 8> MaskVec;
+  // Element #0 of the result gets the elt we are replacing.
+  MaskVec.push_back(DAG.getConstant(DestElt, BaseVT));
+  for (unsigned i = 1; i != NumElems; ++i)
+    MaskVec.push_back(DAG.getConstant(i == DestElt ? 0 : i, BaseVT));
+  return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size());
+}
+
 /// PromoteSplat - Promote a splat of v8i16 or v16i8 to v4i32.
 ///
 static SDOperand PromoteSplat(SDOperand Op, SelectionDAG &DAG) {
@@ -2912,10 +2927,11 @@
 /// vector of zero or undef vector.  This produces a shuffle where the low
 /// element of V2 is swizzled into the zero/undef vector, landing at element
 /// Idx.  This produces a shuffle mask like 4,1,2,3 (idx=0) or  0,1,2,4 (idx=3).
-static SDOperand getShuffleVectorZeroOrUndef(SDOperand V2, MVT::ValueType VT,
-                                             unsigned NumElems, unsigned Idx,
+static SDOperand getShuffleVectorZeroOrUndef(SDOperand V2, unsigned Idx,
                                              bool isZero, SelectionDAG &DAG) {
+  MVT::ValueType VT = V2.getValueType();
   SDOperand V1 = isZero ? getZeroVector(VT, DAG) : DAG.getNode(ISD::UNDEF, VT);
+  unsigned NumElems = MVT::getVectorNumElements(V2.getValueType());
   MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
   MVT::ValueType EVT = MVT::getVectorElementType(MaskVT);
   SmallVector<SDOperand, 16> MaskVec;
@@ -3056,6 +3072,37 @@
     unsigned Idx = CountTrailingZeros_32(NonZeros);
     SDOperand Item = Op.getOperand(Idx);
     
+    // If this is an insertion of an i64 value on x86-32, and if the top bits of
+    // the value are obviously zero, truncate the value to i32 and do the
+    // insertion that way.  Only do this if the value is non-constant or if the
+    // value is a constant being inserted into element 0.  It is cheaper to do
+    // a constant pool load than it is to do a movd + shuffle.
+    if (EVT == MVT::i64 && !Subtarget->is64Bit() &&
+        (!IsAllConstants || Idx == 0)) {
+      if (DAG.MaskedValueIsZero(Item, APInt::getBitsSet(64, 32, 64))) {
+        // Handle MMX and SSE both.
+        MVT::ValueType VecVT = VT == MVT::v2i64 ? MVT::v4i32 : MVT::v2i32;
+        MVT::ValueType VecElts = VT == MVT::v2i64 ? 4 : 2;
+        
+        // Truncate the value (which may itself be a constant) to i32, and
+        // convert it to a vector with movd (S2V+shuffle to zero extend).
+        Item = DAG.getNode(ISD::TRUNCATE, MVT::i32, Item);
+        Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VecVT, Item);
+        Item = getShuffleVectorZeroOrUndef(Item, 0, true, DAG);
+        
+        // Now we have our 32-bit value zero extended in the low element of
+        // a vector.  If Idx != 0, swizzle it into place.
+        if (Idx != 0) {
+          SDOperand Ops[] = { 
+            Item, DAG.getNode(ISD::UNDEF, Item.getValueType()),
+            getSwapEltZeroMask(VecElts, Idx, DAG)
+          };
+          Item = DAG.getNode(ISD::VECTOR_SHUFFLE, VecVT, Ops, 3);
+        }
+        return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Item);
+      }
+    }
+    
     // If we have a constant or non-constant insertion into the low element of
     // a vector, we can do this with SCALAR_TO_VECTOR + shuffle of zero into
     // the rest of the elements.  This will be matched as movd/movq/movss/movsd
@@ -3066,8 +3113,7 @@
         (EVT != MVT::i64 || Subtarget->is64Bit())) {
       Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Item);
       // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector.
-      return getShuffleVectorZeroOrUndef(Item, VT, NumElems, Idx,
-                                         NumZero > 0, DAG);
+      return getShuffleVectorZeroOrUndef(Item, 0, NumZero > 0, DAG);
     }
     
     if (IsAllConstants) // Otherwise, it's better to do a constpool load.
@@ -3082,8 +3128,7 @@
       Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Item);
       
       // Turn it into a shuffle of zero and zero-extended scalar to vector.
-      Item = getShuffleVectorZeroOrUndef(Item, VT, NumElems, 0, NumZero > 0,
-                                         DAG);
+      Item = getShuffleVectorZeroOrUndef(Item, 0, NumZero > 0, DAG);
       MVT::ValueType MaskVT  = MVT::getIntVectorWithNumElements(NumElems);
       MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT);
       SmallVector<SDOperand, 8> MaskVec;

Added: llvm/trunk/test/CodeGen/X86/vec_set-A.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_set-A.ll?rev=48063&view=auto

==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_set-A.ll (added)
+++ llvm/trunk/test/CodeGen/X86/vec_set-A.ll Sat Mar  8 19:05:04 2008
@@ -0,0 +1,6 @@
+; RUN: llvm-as < %s | llc -march=x86 | grep {movl.*\$1, %}
+define <2 x i64> @test1() {
+entry:
+	ret <2 x i64> < i64 1, i64 0 >
+}
+