[llvm] r275215 - [Power9] Add codegen for VSX word insert/extract instructions
Nemanja Ivanovic via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 12 14:00:10 PDT 2016
Author: nemanjai
Date: Tue Jul 12 16:00:10 2016
New Revision: 275215
URL: http://llvm.org/viewvc/llvm-project?rev=275215&view=rev
Log:
[Power9] Add codegen for VSX word insert/extract instructions
This patch corresponds to review:
http://reviews.llvm.org/D20239
It adds exploitation of XXINSERTW and XXEXTRACTUW instructions that
are useful in some cases for inserting and extracting vector elements of
v4[if]32 vectors.
Added:
llvm/trunk/test/CodeGen/p9-xxinsertw-xxextractuw.ll
Modified:
llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h
llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td
llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td
llvm/trunk/lib/Target/PowerPC/README_P9.txt
Modified: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp?rev=275215&r1=275214&r2=275215&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp Tue Jul 12 16:00:10 2016
@@ -665,6 +665,10 @@ PPCTargetLowering::PPCTargetLowering(con
addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
}
+ if (Subtarget.hasP9Vector()) {
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Legal);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Legal);
+ }
}
if (Subtarget.hasQPX()) {
@@ -1018,6 +1022,8 @@ const char *PPCTargetLowering::getTarget
case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP";
case PPCISD::VPERM: return "PPCISD::VPERM";
case PPCISD::XXSPLT: return "PPCISD::XXSPLT";
+ case PPCISD::XXINSERT: return "PPCISD::XXINSERT";
+ case PPCISD::VECSHL: return "PPCISD::VECSHL";
case PPCISD::CMPB: return "PPCISD::CMPB";
case PPCISD::Hi: return "PPCISD::Hi";
case PPCISD::Lo: return "PPCISD::Lo";
@@ -1495,6 +1501,91 @@ bool PPC::isSplatShuffleMask(ShuffleVect
return true;
}
+bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
+ unsigned &InsertAtByte, bool &Swap, bool IsLE) {
+
+ // Check that the mask is shuffling words
+ for (unsigned i = 0; i < 4; ++i) {
+ unsigned B0 = N->getMaskElt(i*4);
+ unsigned B1 = N->getMaskElt(i*4+1);
+ unsigned B2 = N->getMaskElt(i*4+2);
+ unsigned B3 = N->getMaskElt(i*4+3);
+ if (B0 % 4)
+ return false;
+ if (B1 != B0+1 || B2 != B1+1 || B3 != B2+1)
+ return false;
+ }
+
+ // Now we look at mask elements 0,4,8,12
+ unsigned M0 = N->getMaskElt(0) / 4;
+ unsigned M1 = N->getMaskElt(4) / 4;
+ unsigned M2 = N->getMaskElt(8) / 4;
+ unsigned M3 = N->getMaskElt(12) / 4;
+ unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
+ unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
+
+ // Below, let H and L be arbitrary elements of the shuffle mask
+ // where H is in the range [4,7] and L is in the range [0,3].
+ // H, 1, 2, 3 or L, 5, 6, 7
+ if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) ||
+ (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) {
+ ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3];
+ InsertAtByte = IsLE ? 12 : 0;
+ Swap = M0 < 4;
+ return true;
+ }
+ // 0, H, 2, 3 or 4, L, 6, 7
+ if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) ||
+ (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) {
+ ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3];
+ InsertAtByte = IsLE ? 8 : 4;
+ Swap = M1 < 4;
+ return true;
+ }
+ // 0, 1, H, 3 or 4, 5, L, 7
+ if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) ||
+ (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) {
+ ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];
+ InsertAtByte = IsLE ? 4 : 8;
+ Swap = M2 < 4;
+ return true;
+ }
+ // 0, 1, 2, H or 4, 5, 6, L
+ if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) ||
+ (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) {
+ ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];
+ InsertAtByte = IsLE ? 0 : 12;
+ Swap = M3 < 4;
+ return true;
+ }
+
+ // If both vector operands for the shuffle are the same vector, the mask will
+ // contain only elements from the first one and the second one will be undef.
+ if (N->getOperand(1).isUndef()) {
+ ShiftElts = 0;
+ Swap = true;
+ unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
+ if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) {
+ InsertAtByte = IsLE ? 12 : 0;
+ return true;
+ }
+ if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
+ InsertAtByte = IsLE ? 8 : 4;
+ return true;
+ }
+ if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
+ InsertAtByte = IsLE ? 4 : 8;
+ return true;
+ }
+ if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
+ InsertAtByte = IsLE ? 0 : 12;
+ return true;
+ }
+ }
+
+ return false;
+}
+
/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
/// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize,
@@ -7349,6 +7440,27 @@ SDValue PPCTargetLowering::LowerVECTOR_S
EVT VT = Op.getValueType();
bool isLittleEndian = Subtarget.isLittleEndian();
+ unsigned ShiftElts, InsertAtByte;
+ bool Swap;
+ if (Subtarget.hasP9Vector() &&
+ PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap,
+ isLittleEndian)) {
+ if (Swap)
+ std::swap(V1, V2);
+ SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
+ SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2);
+ if (ShiftElts) {
+ SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2,
+ DAG.getConstant(ShiftElts, dl, MVT::i32));
+ SDValue Ins = DAG.getNode(PPCISD::XXINSERT, dl, MVT::v4i32, Conv1, Shl,
+ DAG.getConstant(InsertAtByte, dl, MVT::i32));
+ return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
+ }
+ SDValue Ins = DAG.getNode(PPCISD::XXINSERT, dl, MVT::v4i32, Conv1, Conv2,
+ DAG.getConstant(InsertAtByte, dl, MVT::i32));
+ return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
+ }
+
if (Subtarget.hasVSX()) {
if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
int SplatIdx = PPC::getVSPLTImmediate(SVOp, 4, DAG);
Modified: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h?rev=275215&r1=275214&r2=275215&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h Tue Jul 12 16:00:10 2016
@@ -65,6 +65,14 @@ namespace llvm {
///
XXSPLT,
+ /// XXINSERT - The PPC VSX insert instruction
+ ///
+ XXINSERT,
+
+ /// VECSHL - The PPC VSX shift left instruction
+ ///
+ VECSHL,
+
/// The CMPB instruction (takes two operands of i32 or i64).
CMPB,
@@ -420,6 +428,16 @@ namespace llvm {
/// VSPLTB/VSPLTH/VSPLTW.
bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize);
+ /// isXXINSERTWMask - Return true if this VECTOR_SHUFFLE can be handled by
+ /// the XXINSERTW instruction introduced in ISA 3.0. This is essentially any
+ /// shuffle of v4f32/v4i32 vectors that just inserts one element from one
+ /// vector into the other. This function will also set a couple of
+ /// output parameters for how much the source vector needs to be shifted and
+ /// what byte number needs to be specified for the instruction to put the
+ /// element in the desired location of the target vector.
+ bool isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
+ unsigned &InsertAtByte, bool &Swap, bool IsLE);
+
/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
/// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
unsigned getVSPLTImmediate(SDNode *N, unsigned EltSize, SelectionDAG &DAG);
Modified: llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td?rev=275215&r1=275214&r2=275215&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td Tue Jul 12 16:00:10 2016
@@ -35,6 +35,14 @@ def SDT_PPCVecSplat : SDTypeProfile<1, 2
SDTCisVec<1>, SDTCisInt<2>
]>;
+def SDT_PPCVecShift : SDTypeProfile<1, 3, [ SDTCisVec<0>,
+ SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3>
+]>;
+
+def SDT_PPCVecInsert : SDTypeProfile<1, 3, [ SDTCisVec<0>,
+ SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3>
+]>;
+
def SDT_PPCvcmp : SDTypeProfile<1, 3, [
SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisVT<3, i32>
]>;
@@ -144,8 +152,10 @@ def PPCaddiTlsldLAddr : SDNode<"PPCISD::
def PPCaddisDtprelHA : SDNode<"PPCISD::ADDIS_DTPREL_HA", SDTIntBinOp>;
def PPCaddiDtprelL : SDNode<"PPCISD::ADDI_DTPREL_L", SDTIntBinOp>;
-def PPCvperm : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>;
+def PPCvperm : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>;
def PPCxxsplt : SDNode<"PPCISD::XXSPLT", SDT_PPCVecSplat, []>;
+def PPCxxinsert : SDNode<"PPCISD::XXINSERT", SDT_PPCVecInsert, []>;
+def PPCvecshl : SDNode<"PPCISD::VECSHL", SDT_PPCVecShift, []>;
def PPCqvfperm : SDNode<"PPCISD::QVFPERM", SDT_PPCqvfperm, []>;
def PPCqvgpci : SDNode<"PPCISD::QVGPCI", SDT_PPCqvgpci, []>;
Modified: llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td?rev=275215&r1=275214&r2=275215&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td Tue Jul 12 16:00:10 2016
@@ -779,7 +779,9 @@ let Uses = [RM] in {
def XXSLDWI : XX3Form_2<60, 2,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, u2imm:$SHW),
- "xxsldwi $XT, $XA, $XB, $SHW", IIC_VecPerm, []>;
+ "xxsldwi $XT, $XA, $XB, $SHW", IIC_VecPerm,
+ [(set v4i32:$XT, (PPCvecshl v4i32:$XA, v4i32:$XB,
+ imm32SExt16:$SHW))]>;
def XXSPLTW : XX2Form_2<60, 164,
(outs vsrc:$XT), (ins vsrc:$XB, u2imm:$UIM),
"xxspltw $XT, $XB, $UIM", IIC_VecPerm,
@@ -1819,9 +1821,14 @@ def : Pat<(f64 (bitconvert i64:$S)),
(f64 (MTVSRD $S))>;
}
+def AlignValues {
+ dag F32_TO_BE_WORD1 = (v4f32 (XXSLDWI (XSCVDPSPN $B), (XSCVDPSPN $B), 3));
+ dag I32_TO_BE_WORD1 = (COPY_TO_REGCLASS (MTVSRWZ $B), VSRC);
+}
+
// The following VSX instructions were introduced in Power ISA 3.0
def HasP9Vector : Predicate<"PPCSubTarget->hasP9Vector()">;
-let Predicates = [HasP9Vector] in {
+let AddedComplexity = 400, Predicates = [HasP9Vector] in {
// [PO VRT XO VRB XO /]
class X_VT5_XO5_VB5<bits<6> opcode, bits<5> xo2, bits<10> xo, string opc,
@@ -2028,13 +2035,17 @@ let Predicates = [HasP9Vector] in {
// Vector Insert Word
// XB NOTE: Only XB.dword[1] is used, but we use vsrc on XB.
- def XXINSERTW : XX2_RD6_UIM5_RS6<60, 181,
- (outs vsrc:$XT), (ins u4imm:$UIMM, vsrc:$XB),
- "xxinsertw $XT, $XB, $UIMM", IIC_VecFP, []>;
+ def XXINSERTW :
+ XX2_RD6_UIM5_RS6<60, 181, (outs vsrc:$XT),
+ (ins vsrc:$XTi, vsrc:$XB, u4imm:$UIM),
+ "xxinsertw $XT, $XB, $UIM", IIC_VecFP,
+ [(set v4i32:$XT, (PPCxxinsert v4i32:$XTi, v4i32:$XB,
+ imm32SExt16:$UIM))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
// Vector Extract Unsigned Word
def XXEXTRACTUW : XX2_RD6_UIM5_RS6<60, 165,
- (outs vsrc:$XT), (ins u4imm:$UIMM, vsrc:$XB),
+ (outs vsfrc:$XT), (ins vsrc:$XB, u4imm:$UIMM),
"xxextractuw $XT, $XB, $UIMM", IIC_VecFP, []>;
// Vector Insert Exponent DP/SP
@@ -2173,4 +2184,59 @@ let Predicates = [HasP9Vector] in {
def STXVL : X_XS6_RA5_RB5<31, 397, "stxvl" , vsrc, []>;
def STXVLL : X_XS6_RA5_RB5<31, 429, "stxvll" , vsrc, []>;
} // end mayStore
-} // end HasP9Vector
+
+ // Patterns for which instructions from ISA 3.0 are a better match
+ let Predicates = [IsLittleEndian, HasP9Vector] in {
+ def : Pat<(f32 (PPCfcfidus (PPCmtvsrz (i32 (extractelt v4i32:$A, 0))))),
+ (f32 (XSCVUXDSP (XXEXTRACTUW $A, 12)))>;
+ def : Pat<(f32 (PPCfcfidus (PPCmtvsrz (i32 (extractelt v4i32:$A, 1))))),
+ (f32 (XSCVUXDSP (XXEXTRACTUW $A, 8)))>;
+ def : Pat<(f32 (PPCfcfidus (PPCmtvsrz (i32 (extractelt v4i32:$A, 2))))),
+ (f32 (XSCVUXDSP (XXEXTRACTUW $A, 4)))>;
+ def : Pat<(f32 (PPCfcfidus (PPCmtvsrz (i32 (extractelt v4i32:$A, 3))))),
+ (f32 (XSCVUXDSP (XXEXTRACTUW $A, 0)))>;
+ def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 0)),
+ (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 12))>;
+ def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 1)),
+ (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 8))>;
+ def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 2)),
+ (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 4))>;
+ def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 3)),
+ (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 0))>;
+ def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 0)),
+ (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 12))>;
+ def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 1)),
+ (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 8))>;
+ def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 2)),
+ (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 4))>;
+ def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 3)),
+ (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 0))>;
+ } // IsLittleEndian, HasP9Vector
+
+ let Predicates = [IsBigEndian, HasP9Vector] in {
+ def : Pat<(f32 (PPCfcfidus (PPCmtvsrz (i32 (extractelt v4i32:$A, 0))))),
+ (f32 (XSCVUXDSP (XXEXTRACTUW $A, 0)))>;
+ def : Pat<(f32 (PPCfcfidus (PPCmtvsrz (i32 (extractelt v4i32:$A, 1))))),
+ (f32 (XSCVUXDSP (XXEXTRACTUW $A, 4)))>;
+ def : Pat<(f32 (PPCfcfidus (PPCmtvsrz (i32 (extractelt v4i32:$A, 2))))),
+ (f32 (XSCVUXDSP (XXEXTRACTUW $A, 8)))>;
+ def : Pat<(f32 (PPCfcfidus (PPCmtvsrz (i32 (extractelt v4i32:$A, 3))))),
+ (f32 (XSCVUXDSP (XXEXTRACTUW $A, 12)))>;
+ def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 0)),
+ (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 0))>;
+ def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 1)),
+ (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 4))>;
+ def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 2)),
+ (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 8))>;
+ def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 3)),
+ (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 12))>;
+ def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 0)),
+ (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 0))>;
+ def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 1)),
+ (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 4))>;
+ def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 2)),
+ (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 8))>;
+ def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 3)),
+ (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 12))>;
+ } // IsLittleEndian, HasP9Vector
+} // end HasP9Vector, AddedComplexity
Modified: llvm/trunk/lib/Target/PowerPC/README_P9.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/README_P9.txt?rev=275215&r1=275214&r2=275215&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/README_P9.txt (original)
+++ llvm/trunk/lib/Target/PowerPC/README_P9.txt Tue Jul 12 16:00:10 2016
@@ -397,6 +397,8 @@ Fixed Point Facility:
(set f128:$vT, (int_ppc_vsx_xsxsigqp f128$vB)) // xsxsigqp
- Vector Insert Word: xxinsertw
+ - Useful for inserting f32/i32 elements into vectors (the element to be
+ inserted needs to be prepared)
. Note: llvm has insertelem in "Vector Operations"
; yields <n x <ty>>
<result> = insertelement <n x <ty>> <val>, <ty> <elt>, <ty2> <idx>
@@ -409,6 +411,10 @@ Fixed Point Facility:
(set v1f128:$XT, (int_ppc_vsx_xxinsertw v1f128:$XTi, f128:$XB, i4:$UIMM))
- Vector Extract Unsigned Word: xxextractuw
+ - Not useful for extraction of f32 from v4f32 (the current pattern is better -
+ shift->convert)
+ - It is useful for (uint_to_fp (vector_extract v4i32, N))
+ - Unfortunately, it can't be used for (sint_to_fp (vector_extract v4i32, N))
. Note: llvm has extractelement in "Vector Operations"
; yields <ty>
<result> = extractelement <n x <ty>> <val>, <ty2> <idx>
Added: llvm/trunk/test/CodeGen/p9-xxinsertw-xxextractuw.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/p9-xxinsertw-xxextractuw.ll?rev=275215&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/p9-xxinsertw-xxextractuw.ll (added)
+++ llvm/trunk/test/CodeGen/p9-xxinsertw-xxextractuw.ll Tue Jul 12 16:00:10 2016
@@ -0,0 +1,970 @@
+; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN: -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mcpu=pwr9 -mtriple=powerpc64-unknown-linux-gnu \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK-BE
+
+define <4 x float> @_Z7testInsILj0ELj0EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj0ELj0EDv4_fET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 2
+; CHECK: xxinsertw 34, 0, 12
+; CHECK-BE-LABEL: _Z7testInsILj0ELj0EDv4_fET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 3
+; CHECK-BE: xxinsertw 34, 0, 0
+ %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
+ ret <4 x float> %vecins
+}
+
+define <4 x float> @_Z7testInsILj0ELj1EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj0ELj1EDv4_fET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 1
+; CHECK: xxinsertw 34, 0, 12
+; CHECK-BE-LABEL: _Z7testInsILj0ELj1EDv4_fET1_S1_S1_
+; CHECK-BE-NOT: xxsldwi
+; CHECK-BE: xxinsertw 34, 35, 0
+ %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 5, i32 1, i32 2, i32 3>
+ ret <4 x float> %vecins
+}
+
+define <4 x float> @_Z7testInsILj0ELj2EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj0ELj2EDv4_fET1_S1_S1_
+; CHECK-NOT: xxsldwi
+; CHECK: xxinsertw 34, 35, 12
+; CHECK-BE-LABEL: _Z7testInsILj0ELj2EDv4_fET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 1
+; CHECK-BE: xxinsertw 34, 0, 0
+ %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 6, i32 1, i32 2, i32 3>
+ ret <4 x float> %vecins
+}
+
+define <4 x float> @_Z7testInsILj0ELj3EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj0ELj3EDv4_fET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 3
+; CHECK: xxinsertw 34, 0, 12
+; CHECK-BE-LABEL: _Z7testInsILj0ELj3EDv4_fET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 2
+; CHECK-BE: xxinsertw 34, 0, 0
+ %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 7, i32 1, i32 2, i32 3>
+ ret <4 x float> %vecins
+}
+
+define <4 x float> @_Z7testInsILj1ELj0EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj1ELj0EDv4_fET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 2
+; CHECK: xxinsertw 34, 0, 8
+; CHECK-BE-LABEL: _Z7testInsILj1ELj0EDv4_fET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 3
+; CHECK-BE: xxinsertw 34, 0, 4
+ %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 3>
+ ret <4 x float> %vecins
+}
+
+define <4 x float> @_Z7testInsILj1ELj1EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj1ELj1EDv4_fET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 1
+; CHECK: xxinsertw 34, 0, 8
+; CHECK-BE-LABEL: _Z7testInsILj1ELj1EDv4_fET1_S1_S1_
+; CHECK-BE-NOT: xxsldwi
+; CHECK-BE: xxinsertw 34, 35, 4
+ %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
+ ret <4 x float> %vecins
+}
+
+define <4 x float> @_Z7testInsILj1ELj2EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj1ELj2EDv4_fET1_S1_S1_
+; CHECK-NOT: xxsldwi
+; CHECK: xxinsertw 34, 35, 8
+; CHECK-BE-LABEL: _Z7testInsILj1ELj2EDv4_fET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 1
+; CHECK-BE: xxinsertw 34, 0, 4
+ %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 6, i32 2, i32 3>
+ ret <4 x float> %vecins
+}
+
+define <4 x float> @_Z7testInsILj1ELj3EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj1ELj3EDv4_fET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 3
+; CHECK: xxinsertw 34, 0, 8
+; CHECK-BE-LABEL: _Z7testInsILj1ELj3EDv4_fET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 2
+; CHECK-BE: xxinsertw 34, 0, 4
+ %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 7, i32 2, i32 3>
+ ret <4 x float> %vecins
+}
+
+define <4 x float> @_Z7testInsILj2ELj0EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj2ELj0EDv4_fET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 2
+; CHECK: xxinsertw 34, 0, 4
+; CHECK-BE-LABEL: _Z7testInsILj2ELj0EDv4_fET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 3
+; CHECK-BE: xxinsertw 34, 0, 8
+ %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 3>
+ ret <4 x float> %vecins
+}
+
+define <4 x float> @_Z7testInsILj2ELj1EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj2ELj1EDv4_fET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 1
+; CHECK: xxinsertw 34, 0, 4
+; CHECK-BE-LABEL: _Z7testInsILj2ELj1EDv4_fET1_S1_S1_
+; CHECK-BE-NOT: xxsldwi
+; CHECK-BE: xxinsertw 34, 35, 8
+ %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 5, i32 3>
+ ret <4 x float> %vecins
+}
+
+define <4 x float> @_Z7testInsILj2ELj2EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj2ELj2EDv4_fET1_S1_S1_
+; CHECK-NOT: xxsldwi
+; CHECK: xxinsertw 34, 35, 4
+; CHECK-BE-LABEL: _Z7testInsILj2ELj2EDv4_fET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 1
+; CHECK-BE: xxinsertw 34, 0, 8
+ %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
+ ret <4 x float> %vecins
+}
+
+define <4 x float> @_Z7testInsILj2ELj3EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj2ELj3EDv4_fET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 3
+; CHECK: xxinsertw 34, 0, 4
+; CHECK-BE-LABEL: _Z7testInsILj2ELj3EDv4_fET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 2
+; CHECK-BE: xxinsertw 34, 0, 8
+ %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 7, i32 3>
+ ret <4 x float> %vecins
+}
+
+define <4 x float> @_Z7testInsILj3ELj0EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj3ELj0EDv4_fET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 2
+; CHECK: xxinsertw 34, 0, 0
+; CHECK-BE-LABEL: _Z7testInsILj3ELj0EDv4_fET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 3
+; CHECK-BE: xxinsertw 34, 0, 12
+ %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+ ret <4 x float> %vecins
+}
+
+define <4 x float> @_Z7testInsILj3ELj1EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj3ELj1EDv4_fET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 1
+; CHECK: xxinsertw 34, 0, 0
+; CHECK-BE-LABEL: _Z7testInsILj3ELj1EDv4_fET1_S1_S1_
+; CHECK-BE-NOT: xxsldwi
+; CHECK-BE: xxinsertw 34, 35, 12
+ %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+ ret <4 x float> %vecins
+}
+
+define <4 x float> @_Z7testInsILj3ELj2EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj3ELj2EDv4_fET1_S1_S1_
+; CHECK-NOT: xxsldwi
+; CHECK: xxinsertw 34, 35, 0
+; CHECK-BE-LABEL: _Z7testInsILj3ELj2EDv4_fET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 1
+; CHECK-BE: xxinsertw 34, 0, 12
+ %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+ ret <4 x float> %vecins
+}
+
+define <4 x float> @_Z7testInsILj3ELj3EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj3ELj3EDv4_fET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 3
+; CHECK: xxinsertw 34, 0, 0
+; CHECK-BE-LABEL: _Z7testInsILj3ELj3EDv4_fET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 2
+; CHECK-BE: xxinsertw 34, 0, 12
+ %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+ ret <4 x float> %vecins
+}
+
+define <4 x i32> @_Z7testInsILj0ELj0EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj0ELj0EDv4_jET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 2
+; CHECK: xxinsertw 34, 0, 12
+; CHECK-BE-LABEL: _Z7testInsILj0ELj0EDv4_jET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 3
+; CHECK-BE: xxinsertw 34, 0, 0
+ %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
+ ret <4 x i32> %vecins
+}
+
+define <4 x i32> @_Z7testInsILj0ELj1EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj0ELj1EDv4_jET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 1
+; CHECK: xxinsertw 34, 0, 12
+; CHECK-BE-LABEL: _Z7testInsILj0ELj1EDv4_jET1_S1_S1_
+; CHECK-BE-NOT: xxsldwi
+; CHECK-BE: xxinsertw 34, 35, 0
+ %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 5, i32 1, i32 2, i32 3>
+ ret <4 x i32> %vecins
+}
+
+define <4 x i32> @_Z7testInsILj0ELj2EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj0ELj2EDv4_jET1_S1_S1_
+; CHECK-NOT: xxsldwi
+; CHECK: xxinsertw 34, 35, 12
+; CHECK-BE-LABEL: _Z7testInsILj0ELj2EDv4_jET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 1
+; CHECK-BE: xxinsertw 34, 0, 0
+ %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 6, i32 1, i32 2, i32 3>
+ ret <4 x i32> %vecins
+}
+
+define <4 x i32> @_Z7testInsILj0ELj3EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj0ELj3EDv4_jET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 3
+; CHECK: xxinsertw 34, 0, 12
+; CHECK-BE-LABEL: _Z7testInsILj0ELj3EDv4_jET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 2
+; CHECK-BE: xxinsertw 34, 0, 0
+ %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 7, i32 1, i32 2, i32 3>
+ ret <4 x i32> %vecins
+}
+
+define <4 x i32> @_Z7testInsILj1ELj0EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj1ELj0EDv4_jET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 2
+; CHECK: xxinsertw 34, 0, 8
+; CHECK-BE-LABEL: _Z7testInsILj1ELj0EDv4_jET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 3
+; CHECK-BE: xxinsertw 34, 0, 4
+ %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 3>
+ ret <4 x i32> %vecins
+}
+
+define <4 x i32> @_Z7testInsILj1ELj1EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj1ELj1EDv4_jET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 1
+; CHECK: xxinsertw 34, 0, 8
+; CHECK-BE-LABEL: _Z7testInsILj1ELj1EDv4_jET1_S1_S1_
+; CHECK-BE-NOT: xxsldwi
+; CHECK-BE: xxinsertw 34, 35, 4
+ %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
+ ret <4 x i32> %vecins
+}
+
+define <4 x i32> @_Z7testInsILj1ELj2EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj1ELj2EDv4_jET1_S1_S1_
+; CHECK-NOT: xxsldwi
+; CHECK: xxinsertw 34, 35, 8
+; CHECK-BE-LABEL: _Z7testInsILj1ELj2EDv4_jET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 1
+; CHECK-BE: xxinsertw 34, 0, 4
+ %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 6, i32 2, i32 3>
+ ret <4 x i32> %vecins
+}
+
+define <4 x i32> @_Z7testInsILj1ELj3EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj1ELj3EDv4_jET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 3
+; CHECK: xxinsertw 34, 0, 8
+; CHECK-BE-LABEL: _Z7testInsILj1ELj3EDv4_jET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 2
+; CHECK-BE: xxinsertw 34, 0, 4
+ %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 7, i32 2, i32 3>
+ ret <4 x i32> %vecins
+}
+
+define <4 x i32> @_Z7testInsILj2ELj0EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj2ELj0EDv4_jET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 2
+; CHECK: xxinsertw 34, 0, 4
+; CHECK-BE-LABEL: _Z7testInsILj2ELj0EDv4_jET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 3
+; CHECK-BE: xxinsertw 34, 0, 8
+ %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 3>
+ ret <4 x i32> %vecins
+}
+
+define <4 x i32> @_Z7testInsILj2ELj1EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj2ELj1EDv4_jET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 1
+; CHECK: xxinsertw 34, 0, 4
+; CHECK-BE-LABEL: _Z7testInsILj2ELj1EDv4_jET1_S1_S1_
+; CHECK-BE-NOT: xxsldwi
+; CHECK-BE: xxinsertw 34, 35, 8
+ %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 5, i32 3>
+ ret <4 x i32> %vecins
+}
+
+define <4 x i32> @_Z7testInsILj2ELj2EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj2ELj2EDv4_jET1_S1_S1_
+; CHECK-NOT: xxsldwi
+; CHECK: xxinsertw 34, 35, 4
+; CHECK-BE-LABEL: _Z7testInsILj2ELj2EDv4_jET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 1
+; CHECK-BE: xxinsertw 34, 0, 8
+ %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
+ ret <4 x i32> %vecins
+}
+
+define <4 x i32> @_Z7testInsILj2ELj3EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj2ELj3EDv4_jET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 3
+; CHECK: xxinsertw 34, 0, 4
+; CHECK-BE-LABEL: _Z7testInsILj2ELj3EDv4_jET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 2
+; CHECK-BE: xxinsertw 34, 0, 8
+ %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 7, i32 3>
+ ret <4 x i32> %vecins
+}
+
+define <4 x i32> @_Z7testInsILj3ELj0EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj3ELj0EDv4_jET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 2
+; CHECK: xxinsertw 34, 0, 0
+; CHECK-BE-LABEL: _Z7testInsILj3ELj0EDv4_jET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 3
+; CHECK-BE: xxinsertw 34, 0, 12
+ %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+ ret <4 x i32> %vecins
+}
+
+define <4 x i32> @_Z7testInsILj3ELj1EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj3ELj1EDv4_jET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 1
+; CHECK: xxinsertw 34, 0, 0
+; CHECK-BE-LABEL: _Z7testInsILj3ELj1EDv4_jET1_S1_S1_
+; CHECK-BE-NOT: xxsldwi
+; CHECK-BE: xxinsertw 34, 35, 12
+ %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+ ret <4 x i32> %vecins
+}
+
+define <4 x i32> @_Z7testInsILj3ELj2EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj3ELj2EDv4_jET1_S1_S1_
+; CHECK-NOT: xxsldwi
+; CHECK: xxinsertw 34, 35, 0
+; CHECK-BE-LABEL: _Z7testInsILj3ELj2EDv4_jET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 1
+; CHECK-BE: xxinsertw 34, 0, 12
+ %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+ ret <4 x i32> %vecins
+}
+
+define <4 x i32> @_Z7testInsILj3ELj3EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj3ELj3EDv4_jET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 3
+; CHECK: xxinsertw 34, 0, 0
+; CHECK-BE-LABEL: _Z7testInsILj3ELj3EDv4_jET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 2
+; CHECK-BE: xxinsertw 34, 0, 12
+ %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+ ret <4 x i32> %vecins
+}
+
+define float @_Z13testUiToFpExtILj0EEfDv4_j(<4 x i32> %a) {
+entry:
+; CHECK-LABEL: _Z13testUiToFpExtILj0EEfDv4_j
+; CHECK: xxextractuw 0, 34, 12
+; CHECK: xscvuxdsp 1, 0
+; CHECK-BE-LABEL: _Z13testUiToFpExtILj0EEfDv4_j
+; CHECK-BE: xxextractuw 0, 34, 0
+; CHECK-BE: xscvuxdsp 1, 0
+ %vecext = extractelement <4 x i32> %a, i32 0
+ %conv = uitofp i32 %vecext to float
+ ret float %conv
+}
+
+define float @_Z13testUiToFpExtILj1EEfDv4_j(<4 x i32> %a) {
+entry:
+; CHECK-LABEL: _Z13testUiToFpExtILj1EEfDv4_j
+; CHECK: xxextractuw 0, 34, 8
+; CHECK: xscvuxdsp 1, 0
+; CHECK-BE-LABEL: _Z13testUiToFpExtILj1EEfDv4_j
+; CHECK-BE: xxextractuw 0, 34, 4
+; CHECK-BE: xscvuxdsp 1, 0
+ %vecext = extractelement <4 x i32> %a, i32 1
+ %conv = uitofp i32 %vecext to float
+ ret float %conv
+}
+
+define float @_Z13testUiToFpExtILj2EEfDv4_j(<4 x i32> %a) {
+entry:
+; CHECK-LABEL: _Z13testUiToFpExtILj2EEfDv4_j
+; CHECK: xxextractuw 0, 34, 4
+; CHECK: xscvuxdsp 1, 0
+; CHECK-BE-LABEL: _Z13testUiToFpExtILj2EEfDv4_j
+; CHECK-BE: xxextractuw 0, 34, 8
+; CHECK-BE: xscvuxdsp 1, 0
+ %vecext = extractelement <4 x i32> %a, i32 2
+ %conv = uitofp i32 %vecext to float
+ ret float %conv
+}
+
+define float @_Z13testUiToFpExtILj3EEfDv4_j(<4 x i32> %a) {
+entry:
+; CHECK-LABEL: _Z13testUiToFpExtILj3EEfDv4_j
+; CHECK: xxextractuw 0, 34, 0
+; CHECK: xscvuxdsp 1, 0
+; CHECK-BE-LABEL: _Z13testUiToFpExtILj3EEfDv4_j
+; CHECK-BE: xxextractuw 0, 34, 12
+; CHECK-BE: xscvuxdsp 1, 0
+ %vecext = extractelement <4 x i32> %a, i32 3
+ %conv = uitofp i32 %vecext to float
+ ret float %conv
+}
+
+define <4 x float> @_Z10testInsEltILj0EDv4_ffET0_S1_T1_(<4 x float> %a, float %b) {
+entry:
+; CHECK-LABEL: _Z10testInsEltILj0EDv4_ffET0_S1_T1_
+; CHECK: xscvdpspn 0, 1
+; CHECK: xxsldwi 0, 0, 0, 3
+; CHECK: xxinsertw 34, 0, 12
+; CHECK-BE-LABEL: _Z10testInsEltILj0EDv4_ffET0_S1_T1_
+; CHECK-BE: xscvdpspn 0, 1
+; CHECK-BE: xxsldwi 0, 0, 0, 3
+; CHECK-BE: xxinsertw 34, 0, 0
+ %vecins = insertelement <4 x float> %a, float %b, i32 0
+ ret <4 x float> %vecins
+}
+
+define <4 x float> @_Z10testInsEltILj1EDv4_ffET0_S1_T1_(<4 x float> %a, float %b) {
+entry:
+; CHECK-LABEL: _Z10testInsEltILj1EDv4_ffET0_S1_T1_
+; CHECK: xscvdpspn 0, 1
+; CHECK: xxsldwi 0, 0, 0, 3
+; CHECK: xxinsertw 34, 0, 8
+; CHECK-BE-LABEL: _Z10testInsEltILj1EDv4_ffET0_S1_T1_
+; CHECK-BE: xscvdpspn 0, 1
+; CHECK-BE: xxsldwi 0, 0, 0, 3
+; CHECK-BE: xxinsertw 34, 0, 4
+ %vecins = insertelement <4 x float> %a, float %b, i32 1
+ ret <4 x float> %vecins
+}
+
+define <4 x float> @_Z10testInsEltILj2EDv4_ffET0_S1_T1_(<4 x float> %a, float %b) {
+entry:
+; CHECK-LABEL: _Z10testInsEltILj2EDv4_ffET0_S1_T1_
+; CHECK: xscvdpspn 0, 1
+; CHECK: xxsldwi 0, 0, 0, 3
+; CHECK: xxinsertw 34, 0, 4
+; CHECK-BE-LABEL: _Z10testInsEltILj2EDv4_ffET0_S1_T1_
+; CHECK-BE: xscvdpspn 0, 1
+; CHECK-BE: xxsldwi 0, 0, 0, 3
+; CHECK-BE: xxinsertw 34, 0, 8
+ %vecins = insertelement <4 x float> %a, float %b, i32 2
+ ret <4 x float> %vecins
+}
+
+define <4 x float> @_Z10testInsEltILj3EDv4_ffET0_S1_T1_(<4 x float> %a, float %b) {
+entry:
+; CHECK-LABEL: _Z10testInsEltILj3EDv4_ffET0_S1_T1_
+; CHECK: xscvdpspn 0, 1
+; CHECK: xxsldwi 0, 0, 0, 3
+; CHECK: xxinsertw 34, 0, 0
+; CHECK-BE-LABEL: _Z10testInsEltILj3EDv4_ffET0_S1_T1_
+; CHECK-BE: xscvdpspn 0, 1
+; CHECK-BE: xxsldwi 0, 0, 0, 3
+; CHECK-BE: xxinsertw 34, 0, 12
+ %vecins = insertelement <4 x float> %a, float %b, i32 3
+ ret <4 x float> %vecins
+}
+
+define <4 x i32> @_Z10testInsEltILj0EDv4_jjET0_S1_T1_(<4 x i32> %a, i32 zeroext %b) {
+entry:
+; CHECK-LABEL: _Z10testInsEltILj0EDv4_jjET0_S1_T1_
+; CHECK: mtvsrwz 0, 5
+; CHECK: xxinsertw 34, 0, 12
+; CHECK-BE-LABEL: _Z10testInsEltILj0EDv4_jjET0_S1_T1_
+; CHECK-BE: mtvsrwz 0, 5
+; CHECK-BE: xxinsertw 34, 0, 0
+ %vecins = insertelement <4 x i32> %a, i32 %b, i32 0
+ ret <4 x i32> %vecins
+}
+
+define <4 x i32> @_Z10testInsEltILj1EDv4_jjET0_S1_T1_(<4 x i32> %a, i32 zeroext %b) {
+entry:
+; CHECK-LABEL: _Z10testInsEltILj1EDv4_jjET0_S1_T1_
+; CHECK: mtvsrwz 0, 5
+; CHECK: xxinsertw 34, 0, 8
+; CHECK-BE-LABEL: _Z10testInsEltILj1EDv4_jjET0_S1_T1_
+; CHECK-BE: mtvsrwz 0, 5
+; CHECK-BE: xxinsertw 34, 0, 4
+ %vecins = insertelement <4 x i32> %a, i32 %b, i32 1
+ ret <4 x i32> %vecins
+}
+
+define <4 x i32> @_Z10testInsEltILj2EDv4_jjET0_S1_T1_(<4 x i32> %a, i32 zeroext %b) {
+entry:
+; CHECK-LABEL: _Z10testInsEltILj2EDv4_jjET0_S1_T1_
+; CHECK: mtvsrwz 0, 5
+; CHECK: xxinsertw 34, 0, 4
+; CHECK-BE-LABEL: _Z10testInsEltILj2EDv4_jjET0_S1_T1_
+; CHECK-BE: mtvsrwz 0, 5
+; CHECK-BE: xxinsertw 34, 0, 8
+ %vecins = insertelement <4 x i32> %a, i32 %b, i32 2
+ ret <4 x i32> %vecins
+}
+
+define <4 x i32> @_Z10testInsEltILj3EDv4_jjET0_S1_T1_(<4 x i32> %a, i32 zeroext %b) {
+entry:
+; CHECK-LABEL: _Z10testInsEltILj3EDv4_jjET0_S1_T1_
+; CHECK: mtvsrwz 0, 5
+; CHECK: xxinsertw 34, 0, 0
+; CHECK-BE-LABEL: _Z10testInsEltILj3EDv4_jjET0_S1_T1_
+; CHECK-BE: mtvsrwz 0, 5
+; CHECK-BE: xxinsertw 34, 0, 12
+ %vecins = insertelement <4 x i32> %a, i32 %b, i32 3
+ ret <4 x i32> %vecins
+}
+
+define <4 x float> @_Z7testInsILj0ELj0EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj0ELj0EDv4_fET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 2
+; CHECK: xxinsertw 34, 0, 12
+; CHECK-BE-LABEL: _Z7testInsILj0ELj0EDv4_fET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 3
+; CHECK-BE: xxinsertw 34, 0, 0
+ %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
+ ret <4 x float> %vecins
+}
+
+define <4 x float> @_Z7testInsILj0ELj1EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj0ELj1EDv4_fET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 1
+; CHECK: xxinsertw 34, 0, 12
+; CHECK-BE-LABEL: _Z7testInsILj0ELj1EDv4_fET1_S1_S1_
+; CHECK-BE-NOT: xxsldwi
+; CHECK-BE: xxinsertw 34, 35, 0
+ %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> <i32 1, i32 5, i32 6, i32 7>
+ ret <4 x float> %vecins
+}
+
+define <4 x float> @_Z7testInsILj0ELj2EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj0ELj2EDv4_fET1_S1_S1_
+; CHECK-NOT: xxsldwi
+; CHECK: xxinsertw 34, 35, 12
+; CHECK-BE-LABEL: _Z7testInsILj0ELj2EDv4_fET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 1
+; CHECK-BE: xxinsertw 34, 0, 0
+ %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> <i32 2, i32 5, i32 6, i32 7>
+ ret <4 x float> %vecins
+}
+
+define <4 x float> @_Z7testInsILj0ELj3EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj0ELj3EDv4_fET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 3
+; CHECK: xxinsertw 34, 0, 12
+; CHECK-BE-LABEL: _Z7testInsILj0ELj3EDv4_fET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 2
+; CHECK-BE: xxinsertw 34, 0, 0
+ %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> <i32 3, i32 5, i32 6, i32 7>
+ ret <4 x float> %vecins
+}
+
+define <4 x float> @_Z7testInsILj1ELj0EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj1ELj0EDv4_fET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 2
+; CHECK: xxinsertw 34, 0, 8
+; CHECK-BE-LABEL: _Z7testInsILj1ELj0EDv4_fET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 3
+; CHECK-BE: xxinsertw 34, 0, 4
+ %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> <i32 4, i32 0, i32 6, i32 7>
+ ret <4 x float> %vecins
+}
+
+define <4 x float> @_Z7testInsILj1ELj1EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj1ELj1EDv4_fET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 1
+; CHECK: xxinsertw 34, 0, 8
+; CHECK-BE-LABEL: _Z7testInsILj1ELj1EDv4_fET1_S1_S1_
+; CHECK-BE-NOT: xxsldwi
+; CHECK-BE: xxinsertw 34, 35, 4
+ %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> <i32 4, i32 1, i32 6, i32 7>
+ ret <4 x float> %vecins
+}
+
+define <4 x float> @_Z7testInsILj1ELj2EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj1ELj2EDv4_fET1_S1_S1_
+; CHECK-NOT: xxsldwi
+; CHECK: xxinsertw 34, 35, 8
+; CHECK-BE-LABEL: _Z7testInsILj1ELj2EDv4_fET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 1
+; CHECK-BE: xxinsertw 34, 0, 4
+ %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> <i32 4, i32 2, i32 6, i32 7>
+ ret <4 x float> %vecins
+}
+
+define <4 x float> @_Z7testInsILj1ELj3EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj1ELj3EDv4_fET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 3
+; CHECK: xxinsertw 34, 0, 8
+; CHECK-BE-LABEL: _Z7testInsILj1ELj3EDv4_fET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 2
+; CHECK-BE: xxinsertw 34, 0, 4
+ %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> <i32 4, i32 3, i32 6, i32 7>
+ ret <4 x float> %vecins
+}
+
+define <4 x float> @_Z7testInsILj2ELj0EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj2ELj0EDv4_fET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 2
+; CHECK: xxinsertw 34, 0, 4
+; CHECK-BE-LABEL: _Z7testInsILj2ELj0EDv4_fET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 3
+; CHECK-BE: xxinsertw 34, 0, 8
+ %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> <i32 4, i32 5, i32 0, i32 7>
+ ret <4 x float> %vecins
+}
+
+define <4 x float> @_Z7testInsILj2ELj1EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj2ELj1EDv4_fET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 1
+; CHECK: xxinsertw 34, 0, 4
+; CHECK-BE-LABEL: _Z7testInsILj2ELj1EDv4_fET1_S1_S1_
+; CHECK-BE-NOT: xxsldwi
+; CHECK-BE: xxinsertw 34, 35, 8
+ %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> <i32 4, i32 5, i32 1, i32 7>
+ ret <4 x float> %vecins
+}
+
+define <4 x float> @_Z7testInsILj2ELj2EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj2ELj2EDv4_fET1_S1_S1_
+; CHECK-NOT: xxsldwi
+; CHECK: xxinsertw 34, 35, 4
+; CHECK-BE-LABEL: _Z7testInsILj2ELj2EDv4_fET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 1
+; CHECK-BE: xxinsertw 34, 0, 8
+ %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> <i32 4, i32 5, i32 2, i32 7>
+ ret <4 x float> %vecins
+}
+
+define <4 x float> @_Z7testInsILj2ELj3EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj2ELj3EDv4_fET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 3
+; CHECK: xxinsertw 34, 0, 4
+; CHECK-BE-LABEL: _Z7testInsILj2ELj3EDv4_fET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 2
+; CHECK-BE: xxinsertw 34, 0, 8
+ %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> <i32 4, i32 5, i32 3, i32 7>
+ ret <4 x float> %vecins
+}
+
+define <4 x float> @_Z7testInsILj3ELj0EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj3ELj0EDv4_fET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 2
+; CHECK: xxinsertw 34, 0, 0
+; CHECK-BE-LABEL: _Z7testInsILj3ELj0EDv4_fET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 3
+; CHECK-BE: xxinsertw 34, 0, 12
+ %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 0>
+ ret <4 x float> %vecins
+}
+
+define <4 x float> @_Z7testInsILj3ELj1EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj3ELj1EDv4_fET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 1
+; CHECK: xxinsertw 34, 0, 0
+; CHECK-BE-LABEL: _Z7testInsILj3ELj1EDv4_fET1_S1_S1_
+; CHECK-BE-NOT: xxsldwi
+; CHECK-BE: xxinsertw 34, 35, 12
+ %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 1>
+ ret <4 x float> %vecins
+}
+
+define <4 x float> @_Z7testInsILj3ELj2EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj3ELj2EDv4_fET1_S1_S1_
+; CHECK-NOT: xxsldwi
+; CHECK: xxinsertw 34, 35, 0
+; CHECK-BE-LABEL: _Z7testInsILj3ELj2EDv4_fET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 1
+; CHECK-BE: xxinsertw 34, 0, 12
+ %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 2>
+ ret <4 x float> %vecins
+}
+
+define <4 x float> @_Z7testInsILj3ELj3EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj3ELj3EDv4_fET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 3
+; CHECK: xxinsertw 34, 0, 0
+; CHECK-BE-LABEL: _Z7testInsILj3ELj3EDv4_fET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 2
+; CHECK-BE: xxinsertw 34, 0, 12
+ %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 3>
+ ret <4 x float> %vecins
+}
+
+define <4 x i32> @_Z7testInsILj0ELj0EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj0ELj0EDv4_jET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 2
+; CHECK: xxinsertw 34, 0, 12
+; CHECK-BE-LABEL: _Z7testInsILj0ELj0EDv4_jET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 3
+; CHECK-BE: xxinsertw 34, 0, 0
+ %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
+ ret <4 x i32> %vecins
+}
+
+define <4 x i32> @_Z7testInsILj0ELj1EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj0ELj1EDv4_jET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 1
+; CHECK: xxinsertw 34, 0, 12
+; CHECK-BE-LABEL: _Z7testInsILj0ELj1EDv4_jET1_S1_S1_
+; CHECK-BE-NOT: xxsldwi
+; CHECK-BE: xxinsertw 34, 35, 0
+ %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> <i32 1, i32 5, i32 6, i32 7>
+ ret <4 x i32> %vecins
+}
+
+define <4 x i32> @_Z7testInsILj0ELj2EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj0ELj2EDv4_jET1_S1_S1_
+; CHECK-NOT: xxsldwi
+; CHECK: xxinsertw 34, 35, 12
+; CHECK-BE-LABEL: _Z7testInsILj0ELj2EDv4_jET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 1
+; CHECK-BE: xxinsertw 34, 0, 0
+ %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> <i32 2, i32 5, i32 6, i32 7>
+ ret <4 x i32> %vecins
+}
+
+define <4 x i32> @_Z7testInsILj0ELj3EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj0ELj3EDv4_jET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 3
+; CHECK: xxinsertw 34, 0, 12
+; CHECK-BE-LABEL: _Z7testInsILj0ELj3EDv4_jET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 2
+; CHECK-BE: xxinsertw 34, 0, 0
+ %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> <i32 3, i32 5, i32 6, i32 7>
+ ret <4 x i32> %vecins
+}
+
+define <4 x i32> @_Z7testInsILj1ELj0EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj1ELj0EDv4_jET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 2
+; CHECK: xxinsertw 34, 0, 8
+; CHECK-BE-LABEL: _Z7testInsILj1ELj0EDv4_jET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 3
+; CHECK-BE: xxinsertw 34, 0, 4
+ %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> <i32 4, i32 0, i32 6, i32 7>
+ ret <4 x i32> %vecins
+}
+
+define <4 x i32> @_Z7testInsILj1ELj1EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj1ELj1EDv4_jET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 1
+; CHECK: xxinsertw 34, 0, 8
+; CHECK-BE-LABEL: _Z7testInsILj1ELj1EDv4_jET1_S1_S1_
+; CHECK-BE-NOT: xxsldwi
+; CHECK-BE: xxinsertw 34, 35, 4
+ %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> <i32 4, i32 1, i32 6, i32 7>
+ ret <4 x i32> %vecins
+}
+
+define <4 x i32> @_Z7testInsILj1ELj2EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj1ELj2EDv4_jET1_S1_S1_
+; CHECK-NOT: xxsldwi
+; CHECK: xxinsertw 34, 35, 8
+; CHECK-BE-LABEL: _Z7testInsILj1ELj2EDv4_jET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 1
+; CHECK-BE: xxinsertw 34, 0, 4
+ %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> <i32 4, i32 2, i32 6, i32 7>
+ ret <4 x i32> %vecins
+}
+
+define <4 x i32> @_Z7testInsILj1ELj3EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj1ELj3EDv4_jET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 3
+; CHECK: xxinsertw 34, 0, 8
+; CHECK-BE-LABEL: _Z7testInsILj1ELj3EDv4_jET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 2
+; CHECK-BE: xxinsertw 34, 0, 4
+ %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> <i32 4, i32 3, i32 6, i32 7>
+ ret <4 x i32> %vecins
+}
+
+define <4 x i32> @_Z7testInsILj2ELj0EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj2ELj0EDv4_jET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 2
+; CHECK: xxinsertw 34, 0, 4
+; CHECK-BE-LABEL: _Z7testInsILj2ELj0EDv4_jET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 3
+; CHECK-BE: xxinsertw 34, 0, 8
+ %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> <i32 4, i32 5, i32 0, i32 7>
+ ret <4 x i32> %vecins
+}
+
+define <4 x i32> @_Z7testInsILj2ELj1EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj2ELj1EDv4_jET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 1
+; CHECK: xxinsertw 34, 0, 4
+; CHECK-BE-LABEL: _Z7testInsILj2ELj1EDv4_jET1_S1_S1_
+; CHECK-BE-NOT: xxsldwi
+; CHECK-BE: xxinsertw 34, 35, 8
+ %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> <i32 4, i32 5, i32 1, i32 7>
+ ret <4 x i32> %vecins
+}
+
+define <4 x i32> @_Z7testInsILj2ELj2EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj2ELj2EDv4_jET1_S1_S1_
+; CHECK-NOT: xxsldwi
+; CHECK: xxinsertw 34, 35, 4
+; CHECK-BE-LABEL: _Z7testInsILj2ELj2EDv4_jET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 1
+; CHECK-BE: xxinsertw 34, 0, 8
+ %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> <i32 4, i32 5, i32 2, i32 7>
+ ret <4 x i32> %vecins
+}
+
+define <4 x i32> @_Z7testInsILj2ELj3EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj2ELj3EDv4_jET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 3
+; CHECK: xxinsertw 34, 0, 4
+; CHECK-BE-LABEL: _Z7testInsILj2ELj3EDv4_jET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 2
+; CHECK-BE: xxinsertw 34, 0, 8
+ %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> <i32 4, i32 5, i32 3, i32 7>
+ ret <4 x i32> %vecins
+}
+
+define <4 x i32> @_Z7testInsILj3ELj0EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj3ELj0EDv4_jET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 2
+; CHECK: xxinsertw 34, 0, 0
+; CHECK-BE-LABEL: _Z7testInsILj3ELj0EDv4_jET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 3
+; CHECK-BE: xxinsertw 34, 0, 12
+ %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 0>
+ ret <4 x i32> %vecins
+}
+
+define <4 x i32> @_Z7testInsILj3ELj1EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj3ELj1EDv4_jET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 1
+; CHECK: xxinsertw 34, 0, 0
+; CHECK-BE-LABEL: _Z7testInsILj3ELj1EDv4_jET1_S1_S1_
+; CHECK-BE-NOT: xxsldwi
+; CHECK-BE: xxinsertw 34, 35, 12
+ %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 1>
+ ret <4 x i32> %vecins
+}
+
+define <4 x i32> @_Z7testInsILj3ELj2EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj3ELj2EDv4_jET1_S1_S1_
+; CHECK-NOT: xxsldwi
+; CHECK: xxinsertw 34, 35, 0
+; CHECK-BE-LABEL: _Z7testInsILj3ELj2EDv4_jET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 1
+; CHECK-BE: xxinsertw 34, 0, 12
+ %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 2>
+ ret <4 x i32> %vecins
+}
+
+define <4 x i32> @_Z7testInsILj3ELj3EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj3ELj3EDv4_jET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 3
+; CHECK: xxinsertw 34, 0, 0
+; CHECK-BE-LABEL: _Z7testInsILj3ELj3EDv4_jET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 2
+; CHECK-BE: xxinsertw 34, 0, 12
+ %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 3>
+ ret <4 x i32> %vecins
+}
+define <4 x float> @testSameVecEl0BE(<4 x float> %a) {
+entry:
+; CHECK-BE-LABEL: testSameVecEl0BE
+; CHECK-BE: xxinsertw 34, 34, 0
+ %vecins = shufflevector <4 x float> %a, <4 x float> %a, <4 x i32> <i32 5, i32 1, i32 2, i32 3>
+ ret <4 x float> %vecins
+}
+define <4 x float> @testSameVecEl2BE(<4 x float> %a) {
+entry:
+; CHECK-BE-LABEL: testSameVecEl2BE
+; CHECK-BE: xxinsertw 34, 34, 8
+ %vecins = shufflevector <4 x float> %a, <4 x float> %a, <4 x i32> <i32 0, i32 1, i32 5, i32 3>
+ ret <4 x float> %vecins
+}
+define <4 x float> @testSameVecEl3BE(<4 x float> %a) {
+entry:
+; CHECK-BE-LABEL: testSameVecEl3BE
+; CHECK-BE: xxinsertw 34, 34, 12
+ %vecins = shufflevector <4 x float> %a, <4 x float> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+ ret <4 x float> %vecins
+}
+define <4 x float> @testSameVecEl0LE(<4 x float> %a) {
+entry:
+; CHECK-LABEL: testSameVecEl0LE
+; CHECK: xxinsertw 34, 34, 12
+ %vecins = shufflevector <4 x float> %a, <4 x float> %a, <4 x i32> <i32 6, i32 1, i32 2, i32 3>
+ ret <4 x float> %vecins
+}
+define <4 x float> @testSameVecEl1LE(<4 x float> %a) {
+entry:
+; CHECK-LABEL: testSameVecEl1LE
+; CHECK: xxinsertw 34, 34, 8
+ %vecins = shufflevector <4 x float> %a, <4 x float> %a, <4 x i32> <i32 0, i32 6, i32 2, i32 3>
+ ret <4 x float> %vecins
+}
+define <4 x float> @testSameVecEl3LE(<4 x float> %a) {
+entry:
+; CHECK-LABEL: testSameVecEl3LE
+; CHECK: xxinsertw 34, 34, 0
+ %vecins = shufflevector <4 x float> %a, <4 x float> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+ ret <4 x float> %vecins
+}
More information about the llvm-commits
mailing list