[llvm-commits] [llvm] r44836 - in /llvm/trunk: lib/Target/X86/X86ISelLowering.cpp test/CodeGen/X86/vec_shuffle-12.ll test/CodeGen/X86/vec_shuffle-13.ll
Evan Cheng
evan.cheng at apple.com
Mon Dec 10 17:46:18 PST 2007
Author: evancheng
Date: Mon Dec 10 19:46:18 2007
New Revision: 44836
URL: http://llvm.org/viewvc/llvm-project?rev=44836&view=rev
Log:
- Improved v8i16 shuffle lowering. It now uses pshuflw and pshufhw as much as
possible before resorting to pextrw and pinsrw.
- Better codegen for v4i32 shuffles masquerading as v8i16 or v16i8 shuffles.
- Improves (i16 extract_vector_element 0) codegen by recognizing
(i32 extract_vector_element 0) does not require a pextrw.
Added:
llvm/trunk/test/CodeGen/X86/vec_shuffle-13.ll
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/vec_shuffle-12.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=44836&r1=44835&r2=44836&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Mon Dec 10 19:46:18 2007
@@ -23,6 +23,7 @@
#include "llvm/GlobalVariable.h"
#include "llvm/Function.h"
#include "llvm/Intrinsics.h"
+#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/VectorExtras.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/CodeGen/CallingConvLower.h"
@@ -35,6 +36,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ParameterAttributes.h"
using namespace llvm;
@@ -2714,7 +2716,7 @@
if (Arg.getOpcode() == ISD::UNDEF) continue;
assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
- if (Val > 4)
+ if (Val >= 4)
return false;
}
@@ -3130,6 +3132,8 @@
return V;
}
+/// is4WideVector - Returns true if the specific v8i16 or v16i8 vector is
+/// actually just a 4 wide vector. e.g. <a, a, y, y, d, d, x, x>
SDOperand
X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
// All zero's are handled with pxor, all one's are handled with pcmpeqd.
@@ -3154,7 +3158,7 @@
unsigned NumNonZero = 0;
unsigned NonZeros = 0;
unsigned NumNonZeroImms = 0;
- std::set<SDOperand> Values;
+ SmallSet<SDOperand, 8> Values;
for (unsigned i = 0; i < NumElems; ++i) {
SDOperand Elt = Op.getOperand(i);
if (Elt.getOpcode() != ISD::UNDEF) {
@@ -3314,59 +3318,179 @@
SDOperand LowerVECTOR_SHUFFLEv8i16(SDOperand V1, SDOperand V2,
SDOperand PermMask, SelectionDAG &DAG,
TargetLowering &TLI) {
+ SDOperand NewV;
MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(8);
MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT);
- if (isPSHUFHW_PSHUFLWMask(PermMask.Val)) {
- // Handle v8i16 shuffle high / low shuffle node pair.
+ MVT::ValueType PtrVT = TLI.getPointerTy();
+ SmallVector<SDOperand, 8> MaskElts(PermMask.Val->op_begin(),
+ PermMask.Val->op_end());
+
+ // First record which half of which vector the low elements come from.
+ SmallVector<unsigned, 4> LowQuad(4);
+ for (unsigned i = 0; i < 4; ++i) {
+ SDOperand Elt = MaskElts[i];
+ if (Elt.getOpcode() == ISD::UNDEF)
+ continue;
+ unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
+ int QuadIdx = EltIdx / 4;
+ ++LowQuad[QuadIdx];
+ }
+ int BestLowQuad = -1;
+ unsigned MaxQuad = 1;
+ for (unsigned i = 0; i < 4; ++i) {
+ if (LowQuad[i] > MaxQuad) {
+ BestLowQuad = i;
+ MaxQuad = LowQuad[i];
+ }
+ }
+
+ // Record which half of which vector the high elements come from.
+ SmallVector<unsigned, 4> HighQuad(4);
+ for (unsigned i = 4; i < 8; ++i) {
+ SDOperand Elt = MaskElts[i];
+ if (Elt.getOpcode() == ISD::UNDEF)
+ continue;
+ unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
+ int QuadIdx = EltIdx / 4;
+ ++HighQuad[QuadIdx];
+ }
+ int BestHighQuad = -1;
+ MaxQuad = 1;
+ for (unsigned i = 0; i < 4; ++i) {
+ if (HighQuad[i] > MaxQuad) {
+ BestHighQuad = i;
+ MaxQuad = HighQuad[i];
+ }
+ }
+
+ // If it's possible to sort parts of either half with PSHUF{H|L}W, then do it.
+ if (BestLowQuad != -1 || BestHighQuad != -1) {
+ // First sort the 4 chunks in order using shufpd.
SmallVector<SDOperand, 8> MaskVec;
- for (unsigned i = 0; i != 4; ++i)
- MaskVec.push_back(PermMask.getOperand(i));
- for (unsigned i = 4; i != 8; ++i)
- MaskVec.push_back(DAG.getConstant(i, MaskEVT));
- SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], 8);
- V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v8i16, V1, V2, Mask);
- MaskVec.clear();
- for (unsigned i = 0; i != 4; ++i)
- MaskVec.push_back(DAG.getConstant(i, MaskEVT));
- for (unsigned i = 4; i != 8; ++i)
- MaskVec.push_back(PermMask.getOperand(i));
- Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], 8);
- return DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v8i16, V1, V2, Mask);
+ if (BestLowQuad != -1)
+ MaskVec.push_back(DAG.getConstant(BestLowQuad, MVT::i32));
+ else
+ MaskVec.push_back(DAG.getConstant(0, MVT::i32));
+ if (BestHighQuad != -1)
+ MaskVec.push_back(DAG.getConstant(BestHighQuad, MVT::i32));
+ else
+ MaskVec.push_back(DAG.getConstant(1, MVT::i32));
+ SDOperand Mask= DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i32, &MaskVec[0],2);
+ NewV = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v2i64,
+ DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, V1),
+ DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, V2), Mask);
+ NewV = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, NewV);
+
+ // Now sort high and low parts separately.
+ BitVector InOrder(8);
+ if (BestLowQuad != -1) {
+ // Sort lower half in order using PSHUFLW.
+ MaskVec.clear();
+ bool AnyOutOrder = false;
+ for (unsigned i = 0; i != 4; ++i) {
+ SDOperand Elt = MaskElts[i];
+ if (Elt.getOpcode() == ISD::UNDEF) {
+ MaskVec.push_back(Elt);
+ InOrder.set(i);
+ } else {
+ unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
+ if (EltIdx != i)
+ AnyOutOrder = true;
+ MaskVec.push_back(DAG.getConstant(EltIdx % 4, MaskEVT));
+ // If this element is in the right place after this shuffle, then
+ // remember it.
+ if ((int)(EltIdx / 4) == BestLowQuad)
+ InOrder.set(i);
+ }
+ }
+ if (AnyOutOrder) {
+ for (unsigned i = 4; i != 8; ++i)
+ MaskVec.push_back(DAG.getConstant(i, MaskEVT));
+ SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], 8);
+ NewV = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v8i16, NewV, NewV, Mask);
+ }
+ }
+
+ if (BestHighQuad != -1) {
+ // Sort high half in order using PSHUFHW if possible.
+ MaskVec.clear();
+ for (unsigned i = 0; i != 4; ++i)
+ MaskVec.push_back(DAG.getConstant(i, MaskEVT));
+ bool AnyOutOrder = false;
+ for (unsigned i = 4; i != 8; ++i) {
+ SDOperand Elt = MaskElts[i];
+ if (Elt.getOpcode() == ISD::UNDEF) {
+ MaskVec.push_back(Elt);
+ InOrder.set(i);
+ } else {
+ unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
+ if (EltIdx != i)
+ AnyOutOrder = true;
+ MaskVec.push_back(DAG.getConstant((EltIdx % 4) + 4, MaskEVT));
+ // If this element is in the right place after this shuffle, then
+ // remember it.
+ if ((int)(EltIdx / 4) == BestHighQuad)
+ InOrder.set(i);
+ }
+ }
+ if (AnyOutOrder) {
+ SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], 8);
+ NewV = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v8i16, NewV, NewV, Mask);
+ }
+ }
+
+ // The other elements are put in the right place using pextrw and pinsrw.
+ for (unsigned i = 0; i != 8; ++i) {
+ if (InOrder[i])
+ continue;
+ SDOperand Elt = MaskElts[i];
+ unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
+ if (EltIdx == i)
+ continue;
+ SDOperand ExtOp = (EltIdx < 8)
+ ? DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V1,
+ DAG.getConstant(EltIdx, PtrVT))
+ : DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V2,
+ DAG.getConstant(EltIdx - 8, PtrVT));
+ NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, NewV, ExtOp,
+ DAG.getConstant(i, PtrVT));
+ }
+ return NewV;
}
- // Lower than into extracts and inserts but try to do as few as possible.
+ // PSHUF{H|L}W are not used. Lower into extracts and inserts but try to use
+ ///as few as possible.
// First, let's find out how many elements are already in the right order.
unsigned V1InOrder = 0;
unsigned V1FromV1 = 0;
unsigned V2InOrder = 0;
unsigned V2FromV2 = 0;
- SmallVector<unsigned, 8> V1Elts;
- SmallVector<unsigned, 8> V2Elts;
+ SmallVector<SDOperand, 8> V1Elts;
+ SmallVector<SDOperand, 8> V2Elts;
for (unsigned i = 0; i < 8; ++i) {
- SDOperand Elt = PermMask.getOperand(i);
+ SDOperand Elt = MaskElts[i];
if (Elt.getOpcode() == ISD::UNDEF) {
- V1Elts.push_back(i);
- V2Elts.push_back(i);
+ V1Elts.push_back(Elt);
+ V2Elts.push_back(Elt);
++V1InOrder;
++V2InOrder;
+ continue;
+ }
+ unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
+ if (EltIdx == i) {
+ V1Elts.push_back(Elt);
+ V2Elts.push_back(DAG.getConstant(i+8, MaskEVT));
+ ++V1InOrder;
+ } else if (EltIdx == i+8) {
+ V1Elts.push_back(Elt);
+ V2Elts.push_back(DAG.getConstant(i, MaskEVT));
+ ++V2InOrder;
+ } else if (EltIdx < 8) {
+ V1Elts.push_back(Elt);
+ ++V1FromV1;
} else {
- unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
- if (EltIdx == i) {
- V1Elts.push_back(i);
- V2Elts.push_back(i+8);
- ++V1InOrder;
- } else if (EltIdx == i+8) {
- V1Elts.push_back(i+8);
- V2Elts.push_back(i);
- ++V2InOrder;
- } else {
- V1Elts.push_back(EltIdx);
- V2Elts.push_back(EltIdx);
- if (EltIdx < 8)
- ++V1FromV1;
- else
- ++V2FromV2;
- }
+ V2Elts.push_back(DAG.getConstant(EltIdx-8, MaskEVT));
+ ++V2FromV2;
}
}
@@ -3377,33 +3501,92 @@
std::swap(V1FromV1, V2FromV2);
}
- MVT::ValueType PtrVT = TLI.getPointerTy();
- if (V1FromV1) {
- // If there are elements that are from V1 but out of place,
- // then first sort them in place
- SmallVector<SDOperand, 8> MaskVec;
+ if ((V1FromV1 + V1InOrder) != 8) {
+ // Some elements are from V2.
+ if (V1FromV1) {
+ // If there are elements that are from V1 but out of place,
+ // then first sort them in place
+ SmallVector<SDOperand, 8> MaskVec;
+ for (unsigned i = 0; i < 8; ++i) {
+ SDOperand Elt = V1Elts[i];
+ if (Elt.getOpcode() == ISD::UNDEF) {
+ MaskVec.push_back(DAG.getNode(ISD::UNDEF, MaskEVT));
+ continue;
+ }
+ unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
+ if (EltIdx >= 8)
+ MaskVec.push_back(DAG.getNode(ISD::UNDEF, MaskEVT));
+ else
+ MaskVec.push_back(DAG.getConstant(EltIdx, MaskEVT));
+ }
+ SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], 8);
+ V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v8i16, V1, V1, Mask);
+ }
+
+ NewV = V1;
for (unsigned i = 0; i < 8; ++i) {
- unsigned EltIdx = V1Elts[i];
- if (EltIdx >= 8)
- MaskVec.push_back(DAG.getNode(ISD::UNDEF, MaskEVT));
- else
- MaskVec.push_back(DAG.getConstant(EltIdx, MaskEVT));
+ SDOperand Elt = V1Elts[i];
+ if (Elt.getOpcode() == ISD::UNDEF)
+ continue;
+ unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
+ if (EltIdx < 8)
+ continue;
+ SDOperand ExtOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V2,
+ DAG.getConstant(EltIdx - 8, PtrVT));
+ NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, NewV, ExtOp,
+ DAG.getConstant(i, PtrVT));
}
- SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], 8);
- V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v8i16, V1, V1, Mask);
+ return NewV;
+ } else {
+ // All elements are from V1.
+ NewV = V1;
+ for (unsigned i = 0; i < 8; ++i) {
+ SDOperand Elt = V1Elts[i];
+ if (Elt.getOpcode() == ISD::UNDEF)
+ continue;
+ unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
+ SDOperand ExtOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V1,
+ DAG.getConstant(EltIdx, PtrVT));
+ NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, NewV, ExtOp,
+ DAG.getConstant(i, PtrVT));
+ }
+ return NewV;
}
+}
- // Now let's insert elements from the other vector.
- for (unsigned i = 0; i < 8; ++i) {
- unsigned EltIdx = V1Elts[i];
- if (EltIdx < 8)
- continue;
- SDOperand ExtOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V2,
- DAG.getConstant(EltIdx - 8, PtrVT));
- V1 = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V1, ExtOp,
- DAG.getConstant(i, PtrVT));
+/// RewriteAs4WideShuffle - Try rewriting v8i16 and v16i8 shuffles as 4 wide
+/// ones if possible. This can be done when every pair / quad of shuffle mask
+/// elements point to elements in the right sequence. e.g.
+/// vector_shuffle <>, <>, < 3, 4, | 10, 11, | 0, 1, | 14, 15>
+static
+SDOperand RewriteAs4WideShuffle(SDOperand V1, SDOperand V2,
+ SDOperand PermMask, SelectionDAG &DAG,
+ TargetLowering &TLI) {
+ unsigned NumElems = PermMask.getNumOperands();
+ unsigned Scale = NumElems / 4;
+ SmallVector<SDOperand, 4> MaskVec;
+ for (unsigned i = 0; i < NumElems; i += Scale) {
+ unsigned StartIdx = ~0U;
+ for (unsigned j = 0; j < Scale; ++j) {
+ SDOperand Elt = PermMask.getOperand(i+j);
+ if (Elt.getOpcode() == ISD::UNDEF)
+ continue;
+ unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
+ if (StartIdx == ~0U)
+ StartIdx = EltIdx - (EltIdx % Scale);
+ if (EltIdx != StartIdx + j)
+ return SDOperand();
+ }
+ if (StartIdx == ~0U)
+ MaskVec.push_back(DAG.getNode(ISD::UNDEF, MVT::i32));
+ else
+ MaskVec.push_back(DAG.getConstant(StartIdx / Scale, MVT::i32));
}
- return V1;
+
+ V1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, V1);
+ V2 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, V2);
+ return DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32, V1, V2,
+ DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, &MaskVec[0],4));
}
SDOperand
@@ -3544,18 +3727,31 @@
}
}
+ // If the shuffle can be rewritten as a 4 wide shuffle, then do it!
+ if (VT == MVT::v8i16 || VT == MVT::v16i8) {
+ SDOperand NewOp = RewriteAs4WideShuffle(V1, V2, PermMask, DAG, *this);
+ if (NewOp.Val)
+ return DAG.getNode(ISD::BIT_CONVERT, VT, LowerVECTOR_SHUFFLE(NewOp, DAG));
+ }
+
// Handle v8i16 specifically since SSE can do byte extraction and insertion.
- if (VT == MVT::v8i16)
- return LowerVECTOR_SHUFFLEv8i16(V1, V2, PermMask, DAG, *this);
+ if (VT == MVT::v8i16) {
+ SDOperand NewOp = LowerVECTOR_SHUFFLEv8i16(V1, V2, PermMask, DAG, *this);
+ if (NewOp.Val)
+ return NewOp;
+ }
- if (NumElems == 4 && MVT::getSizeInBits(VT) != 64) {
+ // Handle all 4 wide cases with a number of shuffles.
+ if (NumElems == 4 && MVT::getSizeInBits(VT) != 64) {
// Don't do this for MMX.
MVT::ValueType MaskVT = PermMask.getValueType();
MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT);
SmallVector<std::pair<int, int>, 8> Locs;
Locs.reserve(NumElems);
- SmallVector<SDOperand, 8> Mask1(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
- SmallVector<SDOperand, 8> Mask2(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
+ SmallVector<SDOperand, 8> Mask1(NumElems,
+ DAG.getNode(ISD::UNDEF, MaskEVT));
+ SmallVector<SDOperand, 8> Mask2(NumElems,
+ DAG.getNode(ISD::UNDEF, MaskEVT));
unsigned NumHi = 0;
unsigned NumLo = 0;
// If no more than two elements come from either vector. This can be
@@ -3661,6 +3857,13 @@
MVT::ValueType VT = Op.getValueType();
// TODO: handle v16i8.
if (MVT::getSizeInBits(VT) == 16) {
+ SDOperand Vec = Op.getOperand(0);
+ unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
+ if (Idx == 0)
+ return DAG.getNode(ISD::TRUNCATE, MVT::i16,
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32,
+ DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, Vec),
+ Op.getOperand(1)));
// Transform it so it match pextrw which produces a 32-bit result.
MVT::ValueType EVT = (MVT::ValueType)(VT+1);
SDOperand Extract = DAG.getNode(X86ISD::PEXTRW, EVT,
@@ -3669,7 +3872,6 @@
DAG.getValueType(VT));
return DAG.getNode(ISD::TRUNCATE, VT, Assert);
} else if (MVT::getSizeInBits(VT) == 32) {
- SDOperand Vec = Op.getOperand(0);
unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
if (Idx == 0)
return Op;
@@ -3686,12 +3888,12 @@
push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT)));
SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&IdxVec[0], IdxVec.size());
+ SDOperand Vec = Op.getOperand(0);
Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(),
Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec,
DAG.getConstant(0, getPointerTy()));
} else if (MVT::getSizeInBits(VT) == 64) {
- SDOperand Vec = Op.getOperand(0);
unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
if (Idx == 0)
return Op;
@@ -3706,6 +3908,7 @@
push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT)));
SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&IdxVec[0], IdxVec.size());
+ SDOperand Vec = Op.getOperand(0);
Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(),
Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec,
Modified: llvm/trunk/test/CodeGen/X86/vec_shuffle-12.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_shuffle-12.ll?rev=44836&r1=44835&r2=44836&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_shuffle-12.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_shuffle-12.ll Mon Dec 10 19:46:18 2007
@@ -1,37 +1,28 @@
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep punpck
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pextrw | count 7
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pinsrw | count 7
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pshuf | count 2
+; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pextrw | count 4
+; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pinsrw | count 6
+; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pshuflw | count 3
+; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pshufhw | count 2
-define void @t1(<8 x i16>* %res, <8 x i16>* %A, <8 x i16>* %B) {
+define <8 x i16> @t1(<8 x i16>* %A, <8 x i16>* %B) {
%tmp1 = load <8 x i16>* %A
%tmp2 = load <8 x i16>* %B
%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> < i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 >
- store <8 x i16> %tmp3, <8 x i16>* %res
- ret void
+ ret <8 x i16> %tmp3
}
-define void @t2(<8 x i16>* %res, <8 x i16>* %A, <8 x i16>* %B) {
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
- %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> < i32 8, i32 1, i32 2, i32 13, i32 4, i32 5, i32 6, i32 7 >
- store <8 x i16> %tmp3, <8 x i16>* %res
- ret void
+define <8 x i16> @t2(<8 x i16> %A, <8 x i16> %B) {
+ %tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 9, i32 1, i32 2, i32 9, i32 4, i32 5, i32 6, i32 7 >
+ ret <8 x i16> %tmp
}
-define void @t3(<8 x i16>* %res, <8 x i16>* %A, <8 x i16>* %B) {
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
- %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> < i32 8, i32 3, i32 2, i32 13, i32 7, i32 6, i32 5, i32 4 >
- store <8 x i16> %tmp3, <8 x i16>* %res
- ret void
+define <8 x i16> @t3(<8 x i16> %A, <8 x i16> %B) {
+ %tmp = shufflevector <8 x i16> %A, <8 x i16> %A, <8 x i32> < i32 8, i32 3, i32 2, i32 13, i32 7, i32 6, i32 5, i32 4 >
+ ret <8 x i16> %tmp
}
-define void @t4(<8 x i16>* %res, <8 x i16>* %A, <8 x i16>* %B) {
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
- %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> < i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 >
- store <8 x i16> %tmp3, <8 x i16>* %res
- ret void
+define <8 x i16> @t4(<8 x i16> %A, <8 x i16> %B) {
+ %tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 0, i32 7, i32 2, i32 3, i32 1, i32 5, i32 6, i32 5 >
+ ret <8 x i16> %tmp
}
Added: llvm/trunk/test/CodeGen/X86/vec_shuffle-13.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_shuffle-13.ll?rev=44836&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_shuffle-13.ll (added)
+++ llvm/trunk/test/CodeGen/X86/vec_shuffle-13.ll Mon Dec 10 19:46:18 2007
@@ -0,0 +1,21 @@
+; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2
+; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movlhps | count 1
+; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movss | count 1
+; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pshufd | count 1
+; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pshuflw | count 1
+; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pshufhw | count 1
+
+define <8 x i16> @t1(<8 x i16> %A, <8 x i16> %B) {
+ %tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 8, i32 9, i32 0, i32 1, i32 10, i32 11, i32 2, i32 3 >
+ ret <8 x i16> %tmp
+}
+
+define <8 x i16> @t2(<8 x i16> %A, <8 x i16> %B) {
+ %tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 >
+ ret <8 x i16> %tmp
+}
+
+define <8 x i16> @t3(<8 x i16> %A, <8 x i16> %B) {
+ %tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 0, i32 0, i32 3, i32 2, i32 4, i32 6, i32 4, i32 7 >
+ ret <8 x i16> %tmp
+}
More information about the llvm-commits
mailing list