[llvm-commits] [llvm] r47290 - in /llvm/trunk: include/llvm/CodeGen/SelectionDAGNodes.h lib/CodeGen/SelectionDAG/DAGCombiner.cpp lib/CodeGen/SelectionDAG/SelectionDAG.cpp lib/Target/X86/X86ISelLowering.cpp lib/Target/X86/X86ISelLowering.h lib/Target/X86/X86InstrMMX.td lib/Target/X86/X86InstrSSE.td test/CodeGen/X86/vec_shift2.ll
Evan Cheng
evan.cheng at apple.com
Mon Feb 18 15:04:33 PST 2008
Author: evancheng
Date: Mon Feb 18 17:04:32 2008
New Revision: 47290
URL: http://llvm.org/viewvc/llvm-project?rev=47290&view=rev
Log:
- When DAG combiner is folding a bit convert into a BUILD_VECTOR, it should check if it's essentially a SCALAR_TO_VECTOR. Avoid turning (v8i16) <10, u, u, u> to <10, 0, u, u, u, u, u, u>. Instead, simply convert it to a SCALAR_TO_VECTOR of the proper type.
- X86 now normalize SCALAR_TO_VECTOR to (BIT_CONVERT (v4i32 SCALAR_TO_VECTOR)). Get rid of X86ISD::S2VEC.
Added:
llvm/trunk/test/CodeGen/X86/vec_shift2.ll
Modified:
llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h
llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/lib/Target/X86/X86ISelLowering.h
llvm/trunk/lib/Target/X86/X86InstrMMX.td
llvm/trunk/lib/Target/X86/X86InstrSSE.td
Modified: llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h?rev=47290&r1=47289&r2=47290&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h (original)
+++ llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h Mon Feb 18 17:04:32 2008
@@ -611,6 +611,11 @@
/// BUILD_VECTOR where all of the elements are 0 or undef.
bool isBuildVectorAllZeros(const SDNode *N);
+ /// isScalarToVector - Return true if the specified node is a
+ /// ISD::SCALAR_TO_VECTOR node or a BUILD_VECTOR node where only the low
+ /// element is not an undef.
+ bool isScalarToVector(const SDNode *N);
+
/// isDebugLabel - Return true if the specified node represents a debug
/// label (i.e. ISD::LABEL or TargetInstrInfo::LABEL node and third operand
/// is 0).
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=47290&r1=47289&r2=47290&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Mon Feb 18 17:04:32 2008
@@ -3450,14 +3450,16 @@
Ops.push_back(DAG.getConstant(NewBits, DstEltVT));
}
- MVT::ValueType VT = MVT::getVectorType(DstEltVT,
- Ops.size());
+ MVT::ValueType VT = MVT::getVectorType(DstEltVT, Ops.size());
return DAG.getNode(ISD::BUILD_VECTOR, VT, &Ops[0], Ops.size());
}
// Finally, this must be the case where we are shrinking elements: each input
// turns into multiple outputs.
+ bool isS2V = ISD::isScalarToVector(BV);
unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
+ MVT::ValueType VT = MVT::getVectorType(DstEltVT,
+ NumOutputsPerInput * BV->getNumOperands());
SmallVector<SDOperand, 8> Ops;
for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
if (BV->getOperand(i).getOpcode() == ISD::UNDEF) {
@@ -3466,18 +3468,19 @@
continue;
}
uint64_t OpVal = cast<ConstantSDNode>(BV->getOperand(i))->getValue();
-
for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
unsigned ThisVal = OpVal & ((1ULL << DstBitSize)-1);
- OpVal >>= DstBitSize;
Ops.push_back(DAG.getConstant(ThisVal, DstEltVT));
+ if (isS2V && i == 0 && j == 0 && ThisVal == OpVal)
+ // Simply turn this into a SCALAR_TO_VECTOR of the new type.
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Ops[0]);
+ OpVal >>= DstBitSize;
}
// For big endian targets, swap the order of the pieces of each element.
if (TLI.isBigEndian())
std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
}
- MVT::ValueType VT = MVT::getVectorType(DstEltVT, Ops.size());
return DAG.getNode(ISD::BUILD_VECTOR, VT, &Ops[0], Ops.size());
}
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp?rev=47290&r1=47289&r2=47290&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp Mon Feb 18 17:04:32 2008
@@ -176,6 +176,27 @@
return true;
}
+/// isScalarToVector - Return true if the specified node is a
+/// ISD::SCALAR_TO_VECTOR node or a BUILD_VECTOR node where only the low
+/// element is not an undef.
+bool ISD::isScalarToVector(const SDNode *N) {
+ if (N->getOpcode() == ISD::SCALAR_TO_VECTOR)
+ return true;
+
+ if (N->getOpcode() != ISD::BUILD_VECTOR)
+ return false;
+ if (N->getOperand(0).getOpcode() == ISD::UNDEF)
+ return false;
+ unsigned NumElems = N->getNumOperands();
+ for (unsigned i = 1; i < NumElems; ++i) {
+ SDOperand V = N->getOperand(i);
+ if (V.getOpcode() != ISD::UNDEF)
+ return false;
+ }
+ return true;
+}
+
+
/// isDebugLabel - Return true if the specified node represents a debug
/// label (i.e. ISD::LABEL or TargetInstrInfo::LABEL node and third operand
/// is 0).
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=47290&r1=47289&r2=47290&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Mon Feb 18 17:04:32 2008
@@ -583,7 +583,6 @@
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i8, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i16, Custom);
- setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i32, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v1i64, Custom);
}
@@ -3834,7 +3833,16 @@
SDOperand
X86TargetLowering::LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) {
SDOperand AnyExt = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, Op.getOperand(0));
- return DAG.getNode(X86ISD::S2VEC, Op.getValueType(), AnyExt);
+ MVT::ValueType VT = MVT::v2i32;
+ switch (Op.getValueType()) {
+ default: break;
+ case MVT::v16i8:
+ case MVT::v8i16:
+ VT = MVT::v4i32;
+ break;
+ }
+ return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, AnyExt));
}
// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
@@ -5357,7 +5365,6 @@
case X86ISD::REP_MOVS: return "X86ISD::REP_MOVS";
case X86ISD::GlobalBaseReg: return "X86ISD::GlobalBaseReg";
case X86ISD::Wrapper: return "X86ISD::Wrapper";
- case X86ISD::S2VEC: return "X86ISD::S2VEC";
case X86ISD::PEXTRB: return "X86ISD::PEXTRB";
case X86ISD::PEXTRW: return "X86ISD::PEXTRW";
case X86ISD::INSERTPS: return "X86ISD::INSERTPS";
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=47290&r1=47289&r2=47290&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Mon Feb 18 17:04:32 2008
@@ -166,10 +166,6 @@
/// relative displacements.
WrapperRIP,
- /// S2VEC - X86 version of SCALAR_TO_VECTOR. The destination base does not
- /// have to match the operand type.
- S2VEC,
-
/// PEXTRB - Extract an 8-bit value from a vector and zero extend it to
/// i32, corresponds to X86::PEXTRB.
PEXTRB,
Modified: llvm/trunk/lib/Target/X86/X86InstrMMX.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrMMX.td?rev=47290&r1=47289&r2=47290&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrMMX.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrMMX.td Mon Feb 18 17:04:32 2008
@@ -156,12 +156,13 @@
//===----------------------------------------------------------------------===//
// Data Transfer Instructions
-let neverHasSideEffects = 1 in
def MMX_MOVD64rr : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR32:$src),
- "movd\t{$src, $dst|$dst, $src}", []>;
-let isSimpleLoad = 1, mayLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set VR64:$dst, (v2i32 (scalar_to_vector GR32:$src)))]>;
+let isSimpleLoad = 1, isReMaterializable = 1 in
def MMX_MOVD64rm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst), (ins i32mem:$src),
- "movd\t{$src, $dst|$dst, $src}", []>;
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set VR64:$dst, (v2i32 (scalar_to_vector (loadi32 addr:$src))))]>;
let mayStore = 1 in
def MMX_MOVD64mr : MMXI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR64:$src),
"movd\t{$src, $dst|$dst, $src}", []>;
@@ -547,27 +548,25 @@
def : Pat<(v8i8 (bitconvert (i64 GR64:$src))),
(MMX_MOVD64to64rr GR64:$src)>;
-def MMX_X86s2vec : SDNode<"X86ISD::S2VEC", SDTypeProfile<1, 1, []>, []>;
-
// Move scalar to XMM zero-extended
// movd to XMM register zero-extends
let AddedComplexity = 15 in {
def : Pat<(v8i8 (vector_shuffle immAllZerosV_bc,
- (v8i8 (MMX_X86s2vec GR32:$src)), MMX_MOVL_shuffle_mask)),
+ (bc_v8i8 (v2i32 (scalar_to_vector GR32:$src))),
+ MMX_MOVL_shuffle_mask)),
(MMX_MOVZDI2PDIrr GR32:$src)>;
def : Pat<(v4i16 (vector_shuffle immAllZerosV_bc,
- (v4i16 (MMX_X86s2vec GR32:$src)), MMX_MOVL_shuffle_mask)),
- (MMX_MOVZDI2PDIrr GR32:$src)>;
- def : Pat<(v2i32 (vector_shuffle immAllZerosV,
- (v2i32 (MMX_X86s2vec GR32:$src)), MMX_MOVL_shuffle_mask)),
+ (bc_v4i16 (v2i32 (scalar_to_vector GR32:$src))),
+ MMX_MOVL_shuffle_mask)),
(MMX_MOVZDI2PDIrr GR32:$src)>;
}
-// Scalar to v2i32 / v4i16 / v8i8. The source may be a GR32, but only the lower
+// Scalar to v4i16 / v8i8. The source may be a GR32, but only the lower
// 8 or 16-bits matter.
-def : Pat<(v8i8 (MMX_X86s2vec GR32:$src)), (MMX_MOVD64rr GR32:$src)>;
-def : Pat<(v4i16 (MMX_X86s2vec GR32:$src)), (MMX_MOVD64rr GR32:$src)>;
-def : Pat<(v2i32 (MMX_X86s2vec GR32:$src)), (MMX_MOVD64rr GR32:$src)>;
+def : Pat<(bc_v8i8 (v2i32 (scalar_to_vector GR32:$src))),
+ (MMX_MOVD64rr GR32:$src)>;
+def : Pat<(bc_v4i16 (v2i32 (scalar_to_vector GR32:$src))),
+ (MMX_MOVD64rr GR32:$src)>;
// Patterns to perform canonical versions of vector shuffling.
let AddedComplexity = 10 in {
Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=47290&r1=47289&r2=47290&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Mon Feb 18 17:04:32 2008
@@ -34,7 +34,6 @@
def X86fsrl : SDNode<"X86ISD::FSRL", SDTX86FPShiftOp>;
def X86comi : SDNode<"X86ISD::COMI", SDTX86CmpTest>;
def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86CmpTest>;
-def X86s2vec : SDNode<"X86ISD::S2VEC", SDTypeProfile<1, 1, []>, []>;
def X86pextrb : SDNode<"X86ISD::PEXTRB",
SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>;
def X86pextrw : SDNode<"X86ISD::PEXTRW",
@@ -1781,22 +1780,6 @@
(bitconvert (memopv2i64 addr:$src2))))]>;
}
-multiclass PDI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm,
- string OpcodeStr, Intrinsic IntId> {
- def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]>;
- def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (IntId VR128:$src1,
- (bitconvert (memopv2i64 addr:$src2))))]>;
- def ri : PDIi8<opc2, ImmForm, (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (IntId VR128:$src1,
- (scalar_to_vector (i32 imm:$src2))))]>;
-}
-
-
/// PDI_binop_rm - Simple SSE2 binary operator.
multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
ValueType OpVT, bit Commutable = 0> {
@@ -1871,16 +1854,61 @@
defm PSADBW : PDI_binop_rm_int<0xE0, "psadbw", int_x86_sse2_psad_bw, 1>;
-defm PSLLW : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "psllw", int_x86_sse2_psll_w>;
-defm PSLLD : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "pslld", int_x86_sse2_psll_d>;
-defm PSLLQ : PDI_binop_rmi_int<0xF3, 0x73, MRM6r, "psllq", int_x86_sse2_psll_q>;
-
-defm PSRLW : PDI_binop_rmi_int<0xD1, 0x71, MRM2r, "psrlw", int_x86_sse2_psrl_w>;
-defm PSRLD : PDI_binop_rmi_int<0xD2, 0x72, MRM2r, "psrld", int_x86_sse2_psrl_d>;
-defm PSRLQ : PDI_binop_rmi_int<0xD3, 0x73, MRM2r, "psrlq", int_x86_sse2_psrl_q>;
+defm PSLLW : PDI_binop_rm_int<0xF1, "psllw", int_x86_sse2_psll_w>;
+defm PSLLD : PDI_binop_rm_int<0xF2, "pslld", int_x86_sse2_psll_d>;
+defm PSLLQ : PDI_binop_rm_int<0xF3, "psllq", int_x86_sse2_psll_q>;
+
+defm PSRLW : PDI_binop_rm_int<0xD1, "psrlw", int_x86_sse2_psrl_w>;
+defm PSRLD : PDI_binop_rm_int<0xD2, "psrld", int_x86_sse2_psrl_d>;
+defm PSRLQ : PDI_binop_rm_int<0xD3, "psrlq", int_x86_sse2_psrl_q>;
+
+defm PSRAW : PDI_binop_rm_int<0xE1, "psraw", int_x86_sse2_psra_w>;
+defm PSRAD : PDI_binop_rm_int<0xE2, "psrad", int_x86_sse2_psra_d>;
+
+// Some immediate variants need to match a bit_convert.
+def PSLLWri : PDIi8<0x71, MRM6r, (outs VR128:$dst),
+ (ins VR128:$src1, i32i8imm:$src2),
+ "psllw\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (int_x86_sse2_psll_w VR128:$src1,
+ (bc_v8i16 (v4i32 (scalar_to_vector (i32 imm:$src2))))))]>;
+def PSLLDri : PDIi8<0x72, MRM6r, (outs VR128:$dst),
+ (ins VR128:$src1, i32i8imm:$src2),
+ "pslld\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (int_x86_sse2_psll_d VR128:$src1,
+ (scalar_to_vector (i32 imm:$src2))))]>;
+def PSLLQri : PDIi8<0x73, MRM6r, (outs VR128:$dst),
+ (ins VR128:$src1, i32i8imm:$src2),
+ "psllq\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (int_x86_sse2_psll_q VR128:$src1,
+ (bc_v2i64 (v4i32 (scalar_to_vector (i32 imm:$src2))))))]>;
+
+def PSRLWri : PDIi8<0x71, MRM2r, (outs VR128:$dst),
+ (ins VR128:$src1, i32i8imm:$src2),
+ "psrlw\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (int_x86_sse2_psrl_w VR128:$src1,
+ (bc_v8i16 (v4i32 (scalar_to_vector (i32 imm:$src2))))))]>;
+def PSRLDri : PDIi8<0x72, MRM2r, (outs VR128:$dst),
+ (ins VR128:$src1, i32i8imm:$src2),
+ "psrld\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (int_x86_sse2_psrl_d VR128:$src1,
+ (scalar_to_vector (i32 imm:$src2))))]>;
+def PSRLQri : PDIi8<0x73, MRM2r, (outs VR128:$dst),
+ (ins VR128:$src1, i32i8imm:$src2),
+ "psrlq\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (int_x86_sse2_psrl_q VR128:$src1,
+ (bc_v2i64 (v4i32 (scalar_to_vector (i32 imm:$src2))))))]>;
+
+def PSRAWri : PDIi8<0x71, MRM4r, (outs VR128:$dst),
+ (ins VR128:$src1, i32i8imm:$src2),
+ "psraw\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (int_x86_sse2_psra_w VR128:$src1,
+ (bc_v8i16 (v4i32 (scalar_to_vector (i32 imm:$src2))))))]>;
+def PSRADri : PDIi8<0x72, MRM4r, (outs VR128:$dst),
+ (ins VR128:$src1, i32i8imm:$src2),
+ "psrad\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (int_x86_sse2_psra_d VR128:$src1,
+ (scalar_to_vector (i32 imm:$src2))))]>;
-defm PSRAW : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "psraw", int_x86_sse2_psra_w>;
-defm PSRAD : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad", int_x86_sse2_psra_d>;
// PSRAQ doesn't exist in SSE[1-3].
// 128-bit logical shifts.
@@ -2729,13 +2757,6 @@
def : Pat<(fextend (loadf32 addr:$src)),
(CVTSS2SDrm addr:$src)>;
-// Scalar to v8i16 / v16i8. The source may be a GR32, but only the lower 8 or
-// 16-bits matter.
-def : Pat<(v8i16 (X86s2vec GR32:$src)), (MOVDI2PDIrr GR32:$src)>,
- Requires<[HasSSE2]>;
-def : Pat<(v16i8 (X86s2vec GR32:$src)), (MOVDI2PDIrr GR32:$src)>,
- Requires<[HasSSE2]>;
-
// bit_convert
let Predicates = [HasSSE2] in {
def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>;
Added: llvm/trunk/test/CodeGen/X86/vec_shift2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_shift2.ll?rev=47290&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_shift2.ll (added)
+++ llvm/trunk/test/CodeGen/X86/vec_shift2.ll Mon Feb 18 17:04:32 2008
@@ -0,0 +1,17 @@
+; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep CPI
+
+define <2 x i64> @t1(<2 x i64> %b1, <2 x i64> %c) nounwind {
+ %tmp1 = bitcast <2 x i64> %b1 to <8 x i16>
+ %tmp2 = tail call <8 x i16> @llvm.x86.sse2.psrl.w( <8 x i16> %tmp1, <8 x i16> bitcast (<4 x i32> < i32 14, i32 undef, i32 undef, i32 undef > to <8 x i16>) ) nounwind readnone
+ %tmp3 = bitcast <8 x i16> %tmp2 to <2 x i64>
+ ret <2 x i64> %tmp3
+}
+
+define <4 x i32> @t2(<2 x i64> %b1, <2 x i64> %c) nounwind {
+ %tmp1 = bitcast <2 x i64> %b1 to <4 x i32>
+ %tmp2 = tail call <4 x i32> @llvm.x86.sse2.psll.d( <4 x i32> %tmp1, <4 x i32> < i32 14, i32 undef, i32 undef, i32 undef > ) nounwind readnone
+ ret <4 x i32> %tmp2
+}
+
+declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone
More information about the llvm-commits
mailing list