[llvm-commits] [llvm] r48063 - in /llvm/trunk: lib/Target/X86/README-SSE.txt lib/Target/X86/X86ISelLowering.cpp test/CodeGen/X86/vec_set-A.ll
Chris Lattner
sabre at nondot.org
Sat Mar 8 17:05:04 PST 2008
Author: lattner
Date: Sat Mar 8 19:05:04 2008
New Revision: 48063
URL: http://llvm.org/viewvc/llvm-project?rev=48063&view=rev
Log:
Implement a readme entry, compiling
#include <xmmintrin.h>
__m128i doload64(short x) {return _mm_set_epi16(0,0,0,0,0,0,0,1);}
into:
movl $1, %eax
movd %eax, %xmm0
ret
instead of a constant pool load.
Added:
llvm/trunk/test/CodeGen/X86/vec_set-A.ll
Modified:
llvm/trunk/lib/Target/X86/README-SSE.txt
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
Modified: llvm/trunk/lib/Target/X86/README-SSE.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/README-SSE.txt?rev=48063&r1=48062&r2=48063&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/README-SSE.txt (original)
+++ llvm/trunk/lib/Target/X86/README-SSE.txt Sat Mar 8 19:05:04 2008
@@ -782,23 +782,3 @@
//===---------------------------------------------------------------------===//
-Take the following code:
-#include <xmmintrin.h>
-__m128i doload64(short x) {return _mm_set_epi16(0,0,0,0,0,0,0,1);}
-
-On x86, LLVM generates the following:
-doload64:
- subl $28, %esp
- movl $0, 4(%esp)
- movl $1, (%esp)
- movq (%esp), %xmm0
- addl $28, %esp
- ret
-
-LLVM should instead generate something more like the following:
-doload64:
- movl $1, %eax
- movd %eax, %xmm0
- ret
-
-//===---------------------------------------------------------------------===//
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=48063&r1=48062&r2=48063&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sat Mar 8 19:05:04 2008
@@ -2888,6 +2888,21 @@
return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size());
}
+/// getSwapEltZeroMask - Returns a vector_shuffle mask for a shuffle that swaps
+/// element #0 of a vector with the specified index, leaving the rest of the
+/// elements in place.
+static SDOperand getSwapEltZeroMask(unsigned NumElems, unsigned DestElt,
+ SelectionDAG &DAG) {
+ MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
+ MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT);
+ SmallVector<SDOperand, 8> MaskVec;
+ // Element #0 of the result gets the elt we are replacing.
+ MaskVec.push_back(DAG.getConstant(DestElt, BaseVT));
+ for (unsigned i = 1; i != NumElems; ++i)
+ MaskVec.push_back(DAG.getConstant(i == DestElt ? 0 : i, BaseVT));
+ return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size());
+}
+
/// PromoteSplat - Promote a splat of v8i16 or v16i8 to v4i32.
///
static SDOperand PromoteSplat(SDOperand Op, SelectionDAG &DAG) {
@@ -2912,10 +2927,11 @@
/// vector of zero or undef vector. This produces a shuffle where the low
/// element of V2 is swizzled into the zero/undef vector, landing at element
/// Idx. This produces a shuffle mask like 4,1,2,3 (idx=0) or 0,1,2,4 (idx=3).
-static SDOperand getShuffleVectorZeroOrUndef(SDOperand V2, MVT::ValueType VT,
- unsigned NumElems, unsigned Idx,
+static SDOperand getShuffleVectorZeroOrUndef(SDOperand V2, unsigned Idx,
bool isZero, SelectionDAG &DAG) {
+ MVT::ValueType VT = V2.getValueType();
SDOperand V1 = isZero ? getZeroVector(VT, DAG) : DAG.getNode(ISD::UNDEF, VT);
+ unsigned NumElems = MVT::getVectorNumElements(V2.getValueType());
MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
MVT::ValueType EVT = MVT::getVectorElementType(MaskVT);
SmallVector<SDOperand, 16> MaskVec;
@@ -3056,6 +3072,37 @@
unsigned Idx = CountTrailingZeros_32(NonZeros);
SDOperand Item = Op.getOperand(Idx);
+ // If this is an insertion of an i64 value on x86-32, and if the top bits of
+ // the value are obviously zero, truncate the value to i32 and do the
+ // insertion that way. Only do this if the value is non-constant or if the
+ // value is a constant being inserted into element 0. It is cheaper to do
+ // a constant pool load than it is to do a movd + shuffle.
+ if (EVT == MVT::i64 && !Subtarget->is64Bit() &&
+ (!IsAllConstants || Idx == 0)) {
+ if (DAG.MaskedValueIsZero(Item, APInt::getBitsSet(64, 32, 64))) {
+ // Handle MMX and SSE both.
+ MVT::ValueType VecVT = VT == MVT::v2i64 ? MVT::v4i32 : MVT::v2i32;
+ MVT::ValueType VecElts = VT == MVT::v2i64 ? 4 : 2;
+
+ // Truncate the value (which may itself be a constant) to i32, and
+ // convert it to a vector with movd (S2V+shuffle to zero extend).
+ Item = DAG.getNode(ISD::TRUNCATE, MVT::i32, Item);
+ Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VecVT, Item);
+ Item = getShuffleVectorZeroOrUndef(Item, 0, true, DAG);
+
+ // Now we have our 32-bit value zero extended in the low element of
+ // a vector. If Idx != 0, swizzle it into place.
+ if (Idx != 0) {
+ SDOperand Ops[] = {
+ Item, DAG.getNode(ISD::UNDEF, Item.getValueType()),
+ getSwapEltZeroMask(VecElts, Idx, DAG)
+ };
+ Item = DAG.getNode(ISD::VECTOR_SHUFFLE, VecVT, Ops, 3);
+ }
+ return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Item);
+ }
+ }
+
// If we have a constant or non-constant insertion into the low element of
// a vector, we can do this with SCALAR_TO_VECTOR + shuffle of zero into
// the rest of the elements. This will be matched as movd/movq/movss/movsd
@@ -3066,8 +3113,7 @@
(EVT != MVT::i64 || Subtarget->is64Bit())) {
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Item);
// Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector.
- return getShuffleVectorZeroOrUndef(Item, VT, NumElems, Idx,
- NumZero > 0, DAG);
+ return getShuffleVectorZeroOrUndef(Item, 0, NumZero > 0, DAG);
}
if (IsAllConstants) // Otherwise, it's better to do a constpool load.
@@ -3082,8 +3128,7 @@
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Item);
// Turn it into a shuffle of zero and zero-extended scalar to vector.
- Item = getShuffleVectorZeroOrUndef(Item, VT, NumElems, 0, NumZero > 0,
- DAG);
+ Item = getShuffleVectorZeroOrUndef(Item, 0, NumZero > 0, DAG);
MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT);
SmallVector<SDOperand, 8> MaskVec;
Added: llvm/trunk/test/CodeGen/X86/vec_set-A.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_set-A.ll?rev=48063&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_set-A.ll (added)
+++ llvm/trunk/test/CodeGen/X86/vec_set-A.ll Sat Mar 8 19:05:04 2008
@@ -0,0 +1,6 @@
+; RUN: llvm-as < %s | llc -march=x86 | grep {movl.*\$1, %}
+define <2 x i64> @test1() {
+entry:
+ ret <2 x i64> < i64 1, i64 0 >
+}
+
More information about the llvm-commits
mailing list