[llvm] r275215 - [Power9] Add codegen for VSX word insert/extract instructions

Nemanja Ivanovic via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 12 14:00:10 PDT 2016


Author: nemanjai
Date: Tue Jul 12 16:00:10 2016
New Revision: 275215

URL: http://llvm.org/viewvc/llvm-project?rev=275215&view=rev
Log:
[Power9] Add codegen for VSX word insert/extract instructions

This patch corresponds to review:
http://reviews.llvm.org/D20239

It adds exploitation of XXINSERTW and XXEXTRACTUW instructions that
are useful in some cases for inserting and extracting vector elements of
v4[if]32 vectors.

Added:
    llvm/trunk/test/CodeGen/p9-xxinsertw-xxextractuw.ll
Modified:
    llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
    llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h
    llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td
    llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td
    llvm/trunk/lib/Target/PowerPC/README_P9.txt

Modified: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp?rev=275215&r1=275214&r2=275215&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp Tue Jul 12 16:00:10 2016
@@ -665,6 +665,10 @@ PPCTargetLowering::PPCTargetLowering(con
       addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
       addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
     }
+    if (Subtarget.hasP9Vector()) {
+      setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Legal);
+      setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Legal);
+    }
   }
 
   if (Subtarget.hasQPX()) {
@@ -1018,6 +1022,8 @@ const char *PPCTargetLowering::getTarget
   case PPCISD::VNMSUBFP:        return "PPCISD::VNMSUBFP";
   case PPCISD::VPERM:           return "PPCISD::VPERM";
   case PPCISD::XXSPLT:          return "PPCISD::XXSPLT";
+  case PPCISD::XXINSERT:        return "PPCISD::XXINSERT";
+  case PPCISD::VECSHL:          return "PPCISD::VECSHL";
   case PPCISD::CMPB:            return "PPCISD::CMPB";
   case PPCISD::Hi:              return "PPCISD::Hi";
   case PPCISD::Lo:              return "PPCISD::Lo";
@@ -1495,6 +1501,91 @@ bool PPC::isSplatShuffleMask(ShuffleVect
   return true;
 }
 
+bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
+                          unsigned &InsertAtByte, bool &Swap, bool IsLE) {
+
+  // Check that the mask is shuffling words
+  for (unsigned i = 0; i < 4; ++i) {
+    unsigned B0 = N->getMaskElt(i*4);
+    unsigned B1 = N->getMaskElt(i*4+1);
+    unsigned B2 = N->getMaskElt(i*4+2);
+    unsigned B3 = N->getMaskElt(i*4+3);
+    if (B0 % 4)
+      return false;
+    if (B1 != B0+1 || B2 != B1+1 || B3 != B2+1)
+      return false;
+  }
+
+  // Now we look at mask elements 0,4,8,12
+  unsigned M0 = N->getMaskElt(0) / 4;
+  unsigned M1 = N->getMaskElt(4) / 4;
+  unsigned M2 = N->getMaskElt(8) / 4;
+  unsigned M3 = N->getMaskElt(12) / 4;
+  unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
+  unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
+
+  // Below, let H and L be arbitrary elements of the shuffle mask
+  // where H is in the range [4,7] and L is in the range [0,3].
+  // H, 1, 2, 3 or L, 5, 6, 7
+  if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) ||
+      (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) {
+    ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3];
+    InsertAtByte = IsLE ? 12 : 0;
+    Swap = M0 < 4;
+    return true;
+  }
+  // 0, H, 2, 3 or 4, L, 6, 7
+  if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) ||
+      (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) {
+    ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3];
+    InsertAtByte = IsLE ? 8 : 4;
+    Swap = M1 < 4;
+    return true;
+  }
+  // 0, 1, H, 3 or 4, 5, L, 7
+  if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) ||
+      (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) {
+    ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];
+    InsertAtByte = IsLE ? 4 : 8;
+    Swap = M2 < 4;
+    return true;
+  }
+  // 0, 1, 2, H or 4, 5, 6, L
+  if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) ||
+      (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) {
+    ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];
+    InsertAtByte = IsLE ? 0 : 12;
+    Swap = M3 < 4;
+    return true;
+  }
+
+  // If both vector operands for the shuffle are the same vector, the mask will
+  // contain only elements from the first one and the second one will be undef.
+  if (N->getOperand(1).isUndef()) {
+    ShiftElts = 0;
+    Swap = true;
+    unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
+    if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) {
+      InsertAtByte = IsLE ? 12 : 0;
+      return true;
+    }
+    if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
+      InsertAtByte = IsLE ? 8 : 4;
+      return true;
+    }
+    if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
+      InsertAtByte = IsLE ? 4 : 8;
+      return true;
+    }
+    if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
+      InsertAtByte = IsLE ? 0 : 12;
+      return true;
+    }
+  }
+
+  return false;
+}
+
 /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
 /// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
 unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize,
@@ -7349,6 +7440,27 @@ SDValue PPCTargetLowering::LowerVECTOR_S
   EVT VT = Op.getValueType();
   bool isLittleEndian = Subtarget.isLittleEndian();
 
+  unsigned ShiftElts, InsertAtByte;
+  bool Swap;
+  if (Subtarget.hasP9Vector() &&
+      PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap,
+                           isLittleEndian)) {
+    if (Swap)
+      std::swap(V1, V2);
+    SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
+    SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2);
+    if (ShiftElts) {
+      SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2,
+                                DAG.getConstant(ShiftElts, dl, MVT::i32));
+      SDValue Ins = DAG.getNode(PPCISD::XXINSERT, dl, MVT::v4i32, Conv1, Shl,
+                                DAG.getConstant(InsertAtByte, dl, MVT::i32));
+      return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
+    }
+    SDValue Ins = DAG.getNode(PPCISD::XXINSERT, dl, MVT::v4i32, Conv1, Conv2,
+                              DAG.getConstant(InsertAtByte, dl, MVT::i32));
+    return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
+  }
+
   if (Subtarget.hasVSX()) {
     if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
       int SplatIdx = PPC::getVSPLTImmediate(SVOp, 4, DAG);

Modified: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h?rev=275215&r1=275214&r2=275215&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h Tue Jul 12 16:00:10 2016
@@ -65,6 +65,14 @@ namespace llvm {
       ///
       XXSPLT,
 
+      /// XXINSERT - The PPC VSX insert instruction
+      ///
+      XXINSERT,
+
+      /// VECSHL - The PPC VSX shift left instruction
+      ///
+      VECSHL,
+
       /// The CMPB instruction (takes two operands of i32 or i64).
       CMPB,
 
@@ -420,6 +428,16 @@ namespace llvm {
     /// VSPLTB/VSPLTH/VSPLTW.
     bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize);
 
+    /// isXXINSERTWMask - Return true if this VECTOR_SHUFFLE can be handled by
+    /// the XXINSERTW instruction introduced in ISA 3.0. This is essentially any
+    /// shuffle of v4f32/v4i32 vectors that just inserts one element from one
+    /// vector into the other. This function will also set a couple of
+    /// output parameters for how much the source vector needs to be shifted and
+    /// what byte number needs to be specified for the instruction to put the
+    /// element in the desired location of the target vector.
+    bool isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
+                         unsigned &InsertAtByte, bool &Swap, bool IsLE);
+
     /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
     /// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
     unsigned getVSPLTImmediate(SDNode *N, unsigned EltSize, SelectionDAG &DAG);

Modified: llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td?rev=275215&r1=275214&r2=275215&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td Tue Jul 12 16:00:10 2016
@@ -35,6 +35,14 @@ def SDT_PPCVecSplat : SDTypeProfile<1, 2
   SDTCisVec<1>, SDTCisInt<2>
 ]>;
 
+def SDT_PPCVecShift : SDTypeProfile<1, 3, [ SDTCisVec<0>,
+  SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3>
+]>;
+
+def SDT_PPCVecInsert : SDTypeProfile<1, 3, [ SDTCisVec<0>,
+  SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3>
+]>;
+
 def SDT_PPCvcmp : SDTypeProfile<1, 3, [
   SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisVT<3, i32>
 ]>;
@@ -144,8 +152,10 @@ def PPCaddiTlsldLAddr : SDNode<"PPCISD::
 def PPCaddisDtprelHA : SDNode<"PPCISD::ADDIS_DTPREL_HA", SDTIntBinOp>;
 def PPCaddiDtprelL   : SDNode<"PPCISD::ADDI_DTPREL_L", SDTIntBinOp>;
 
-def PPCvperm    : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>;
+def PPCvperm     : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>;
 def PPCxxsplt    : SDNode<"PPCISD::XXSPLT", SDT_PPCVecSplat, []>;
+def PPCxxinsert  : SDNode<"PPCISD::XXINSERT", SDT_PPCVecInsert, []>;
+def PPCvecshl    : SDNode<"PPCISD::VECSHL", SDT_PPCVecShift, []>;
 
 def PPCqvfperm   : SDNode<"PPCISD::QVFPERM", SDT_PPCqvfperm, []>;
 def PPCqvgpci    : SDNode<"PPCISD::QVGPCI", SDT_PPCqvgpci, []>;

Modified: llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td?rev=275215&r1=275214&r2=275215&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td Tue Jul 12 16:00:10 2016
@@ -779,7 +779,9 @@ let Uses = [RM] in {
 
   def XXSLDWI : XX3Form_2<60, 2,
                        (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, u2imm:$SHW),
-                       "xxsldwi $XT, $XA, $XB, $SHW", IIC_VecPerm, []>;
+                       "xxsldwi $XT, $XA, $XB, $SHW", IIC_VecPerm,
+                       [(set v4i32:$XT, (PPCvecshl v4i32:$XA, v4i32:$XB,
+                                                  imm32SExt16:$SHW))]>;
   def XXSPLTW : XX2Form_2<60, 164,
                        (outs vsrc:$XT), (ins vsrc:$XB, u2imm:$UIM),
                        "xxspltw $XT, $XB, $UIM", IIC_VecPerm,
@@ -1819,9 +1821,14 @@ def : Pat<(f64 (bitconvert i64:$S)),
           (f64 (MTVSRD $S))>;
 }
 
+def AlignValues {
+  dag F32_TO_BE_WORD1 = (v4f32 (XXSLDWI (XSCVDPSPN $B), (XSCVDPSPN $B), 3));
+  dag I32_TO_BE_WORD1 = (COPY_TO_REGCLASS (MTVSRWZ $B), VSRC);
+}
+
 // The following VSX instructions were introduced in Power ISA 3.0
 def HasP9Vector : Predicate<"PPCSubTarget->hasP9Vector()">;
-let Predicates = [HasP9Vector] in {
+let AddedComplexity = 400, Predicates = [HasP9Vector] in {
 
   // [PO VRT XO VRB XO /]
   class X_VT5_XO5_VB5<bits<6> opcode, bits<5> xo2, bits<10> xo, string opc,
@@ -2028,13 +2035,17 @@ let Predicates = [HasP9Vector] in {
 
   // Vector Insert Word
   // XB NOTE: Only XB.dword[1] is used, but we use vsrc on XB.
-  def XXINSERTW   : XX2_RD6_UIM5_RS6<60, 181,
-                                  (outs vsrc:$XT), (ins u4imm:$UIMM, vsrc:$XB),
-                                  "xxinsertw $XT, $XB, $UIMM", IIC_VecFP, []>;
+  def XXINSERTW   :
+    XX2_RD6_UIM5_RS6<60, 181, (outs vsrc:$XT),
+                     (ins vsrc:$XTi, vsrc:$XB, u4imm:$UIM),
+                     "xxinsertw $XT, $XB, $UIM", IIC_VecFP,
+                     [(set v4i32:$XT, (PPCxxinsert v4i32:$XTi, v4i32:$XB,
+                                                   imm32SExt16:$UIM))]>,
+                     RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
 
   // Vector Extract Unsigned Word
   def XXEXTRACTUW : XX2_RD6_UIM5_RS6<60, 165,
-                                  (outs vsrc:$XT), (ins u4imm:$UIMM, vsrc:$XB),
+                                  (outs vsfrc:$XT), (ins vsrc:$XB, u4imm:$UIMM),
                                   "xxextractuw $XT, $XB, $UIMM", IIC_VecFP, []>;
 
   // Vector Insert Exponent DP/SP
@@ -2173,4 +2184,59 @@ let Predicates = [HasP9Vector] in {
   def STXVL    : X_XS6_RA5_RB5<31,  397, "stxvl"   , vsrc, []>;
   def STXVLL   : X_XS6_RA5_RB5<31,  429, "stxvll"  , vsrc, []>;
   } // end mayStore
-} // end HasP9Vector
+
+  // Patterns for which instructions from ISA 3.0 are a better match
+  let Predicates = [IsLittleEndian, HasP9Vector] in {
+  def : Pat<(f32 (PPCfcfidus (PPCmtvsrz (i32 (extractelt v4i32:$A, 0))))),
+            (f32 (XSCVUXDSP (XXEXTRACTUW $A, 12)))>;
+  def : Pat<(f32 (PPCfcfidus (PPCmtvsrz (i32 (extractelt v4i32:$A, 1))))),
+            (f32 (XSCVUXDSP (XXEXTRACTUW $A, 8)))>;
+  def : Pat<(f32 (PPCfcfidus (PPCmtvsrz (i32 (extractelt v4i32:$A, 2))))),
+            (f32 (XSCVUXDSP (XXEXTRACTUW $A, 4)))>;
+  def : Pat<(f32 (PPCfcfidus (PPCmtvsrz (i32 (extractelt v4i32:$A, 3))))),
+            (f32 (XSCVUXDSP (XXEXTRACTUW $A, 0)))>;
+  def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 0)),
+            (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 12))>;
+  def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 1)),
+            (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 8))>;
+  def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 2)),
+            (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 4))>;
+  def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 3)),
+            (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 0))>;
+  def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 0)),
+            (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 12))>;
+  def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 1)),
+            (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 8))>;
+  def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 2)),
+            (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 4))>;
+  def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 3)),
+            (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 0))>;
+  } // IsLittleEndian, HasP9Vector
+
+  let Predicates = [IsBigEndian, HasP9Vector] in {
+  def : Pat<(f32 (PPCfcfidus (PPCmtvsrz (i32 (extractelt v4i32:$A, 0))))),
+            (f32 (XSCVUXDSP (XXEXTRACTUW $A, 0)))>;
+  def : Pat<(f32 (PPCfcfidus (PPCmtvsrz (i32 (extractelt v4i32:$A, 1))))),
+            (f32 (XSCVUXDSP (XXEXTRACTUW $A, 4)))>;
+  def : Pat<(f32 (PPCfcfidus (PPCmtvsrz (i32 (extractelt v4i32:$A, 2))))),
+            (f32 (XSCVUXDSP (XXEXTRACTUW $A, 8)))>;
+  def : Pat<(f32 (PPCfcfidus (PPCmtvsrz (i32 (extractelt v4i32:$A, 3))))),
+            (f32 (XSCVUXDSP (XXEXTRACTUW $A, 12)))>;
+  def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 0)),
+            (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 0))>;
+  def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 1)),
+            (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 4))>;
+  def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 2)),
+            (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 8))>;
+  def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 3)),
+            (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 12))>;
+  def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 0)),
+            (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 0))>;
+  def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 1)),
+            (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 4))>;
+  def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 2)),
+            (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 8))>;
+  def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 3)),
+            (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 12))>;
+  } // IsLittleEndian, HasP9Vector
+} // end HasP9Vector, AddedComplexity

Modified: llvm/trunk/lib/Target/PowerPC/README_P9.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/README_P9.txt?rev=275215&r1=275214&r2=275215&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/README_P9.txt (original)
+++ llvm/trunk/lib/Target/PowerPC/README_P9.txt Tue Jul 12 16:00:10 2016
@@ -397,6 +397,8 @@ Fixed Point Facility:
     (set f128:$vT, (int_ppc_vsx_xsxsigqp f128$vB))  // xsxsigqp
 
 - Vector Insert Word: xxinsertw
+  - Useful for inserting f32/i32 elements into vectors (the element to be
+    inserted needs to be prepared)
   . Note: llvm has insertelem in "Vector Operations"
     ; yields <n x <ty>>
     <result> = insertelement <n x <ty>> <val>, <ty> <elt>, <ty2> <idx>
@@ -409,6 +411,10 @@ Fixed Point Facility:
     (set v1f128:$XT, (int_ppc_vsx_xxinsertw v1f128:$XTi, f128:$XB, i4:$UIMM))
 
 - Vector Extract Unsigned Word: xxextractuw
+  - Not useful for extraction of f32 from v4f32 (the current pattern is better -
+    shift->convert)
+  - It is useful for (uint_to_fp (vector_extract v4i32, N))
+  - Unfortunately, it can't be used for (sint_to_fp (vector_extract v4i32, N))
   . Note: llvm has extractelement in "Vector Operations"
     ; yields <ty>
     <result> = extractelement <n x <ty>> <val>, <ty2> <idx>

Added: llvm/trunk/test/CodeGen/p9-xxinsertw-xxextractuw.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/p9-xxinsertw-xxextractuw.ll?rev=275215&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/p9-xxinsertw-xxextractuw.ll (added)
+++ llvm/trunk/test/CodeGen/p9-xxinsertw-xxextractuw.ll Tue Jul 12 16:00:10 2016
@@ -0,0 +1,970 @@
+; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mcpu=pwr9 -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK-BE
+
+define <4 x float> @_Z7testInsILj0ELj0EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj0ELj0EDv4_fET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 2
+; CHECK: xxinsertw 34, 0, 12
+; CHECK-BE-LABEL: _Z7testInsILj0ELj0EDv4_fET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 3
+; CHECK-BE: xxinsertw 34, 0, 0
+  %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
+  ret <4 x float> %vecins
+}
+
+define <4 x float> @_Z7testInsILj0ELj1EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj0ELj1EDv4_fET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 1
+; CHECK: xxinsertw 34, 0, 12
+; CHECK-BE-LABEL: _Z7testInsILj0ELj1EDv4_fET1_S1_S1_
+; CHECK-BE-NOT: xxsldwi
+; CHECK-BE: xxinsertw 34, 35, 0
+  %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 5, i32 1, i32 2, i32 3>
+  ret <4 x float> %vecins
+}
+
+define <4 x float> @_Z7testInsILj0ELj2EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj0ELj2EDv4_fET1_S1_S1_
+; CHECK-NOT: xxsldwi
+; CHECK: xxinsertw 34, 35, 12
+; CHECK-BE-LABEL: _Z7testInsILj0ELj2EDv4_fET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 1
+; CHECK-BE: xxinsertw 34, 0, 0
+  %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 6, i32 1, i32 2, i32 3>
+  ret <4 x float> %vecins
+}
+
+define <4 x float> @_Z7testInsILj0ELj3EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj0ELj3EDv4_fET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 3
+; CHECK: xxinsertw 34, 0, 12
+; CHECK-BE-LABEL: _Z7testInsILj0ELj3EDv4_fET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 2
+; CHECK-BE: xxinsertw 34, 0, 0
+  %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 7, i32 1, i32 2, i32 3>
+  ret <4 x float> %vecins
+}
+
+define <4 x float> @_Z7testInsILj1ELj0EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj1ELj0EDv4_fET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 2
+; CHECK: xxinsertw 34, 0, 8
+; CHECK-BE-LABEL: _Z7testInsILj1ELj0EDv4_fET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 3
+; CHECK-BE: xxinsertw 34, 0, 4
+  %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 3>
+  ret <4 x float> %vecins
+}
+
+define <4 x float> @_Z7testInsILj1ELj1EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj1ELj1EDv4_fET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 1
+; CHECK: xxinsertw 34, 0, 8
+; CHECK-BE-LABEL: _Z7testInsILj1ELj1EDv4_fET1_S1_S1_
+; CHECK-BE-NOT: xxsldwi
+; CHECK-BE: xxinsertw 34, 35, 4
+  %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
+  ret <4 x float> %vecins
+}
+
+define <4 x float> @_Z7testInsILj1ELj2EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj1ELj2EDv4_fET1_S1_S1_
+; CHECK-NOT: xxsldwi
+; CHECK: xxinsertw 34, 35, 8
+; CHECK-BE-LABEL: _Z7testInsILj1ELj2EDv4_fET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 1
+; CHECK-BE: xxinsertw 34, 0, 4
+  %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 6, i32 2, i32 3>
+  ret <4 x float> %vecins
+}
+
+define <4 x float> @_Z7testInsILj1ELj3EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj1ELj3EDv4_fET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 3
+; CHECK: xxinsertw 34, 0, 8
+; CHECK-BE-LABEL: _Z7testInsILj1ELj3EDv4_fET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 2
+; CHECK-BE: xxinsertw 34, 0, 4
+  %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 7, i32 2, i32 3>
+  ret <4 x float> %vecins
+}
+
+define <4 x float> @_Z7testInsILj2ELj0EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj2ELj0EDv4_fET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 2
+; CHECK: xxinsertw 34, 0, 4
+; CHECK-BE-LABEL: _Z7testInsILj2ELj0EDv4_fET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 3
+; CHECK-BE: xxinsertw 34, 0, 8
+  %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 3>
+  ret <4 x float> %vecins
+}
+
+define <4 x float> @_Z7testInsILj2ELj1EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj2ELj1EDv4_fET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 1
+; CHECK: xxinsertw 34, 0, 4
+; CHECK-BE-LABEL: _Z7testInsILj2ELj1EDv4_fET1_S1_S1_
+; CHECK-BE-NOT: xxsldwi
+; CHECK-BE: xxinsertw 34, 35, 8
+  %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 5, i32 3>
+  ret <4 x float> %vecins
+}
+
+define <4 x float> @_Z7testInsILj2ELj2EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj2ELj2EDv4_fET1_S1_S1_
+; CHECK-NOT: xxsldwi
+; CHECK: xxinsertw 34, 35, 4
+; CHECK-BE-LABEL: _Z7testInsILj2ELj2EDv4_fET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 1
+; CHECK-BE: xxinsertw 34, 0, 8
+  %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
+  ret <4 x float> %vecins
+}
+
+define <4 x float> @_Z7testInsILj2ELj3EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj2ELj3EDv4_fET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 3
+; CHECK: xxinsertw 34, 0, 4
+; CHECK-BE-LABEL: _Z7testInsILj2ELj3EDv4_fET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 2
+; CHECK-BE: xxinsertw 34, 0, 8
+  %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 7, i32 3>
+  ret <4 x float> %vecins
+}
+
+define <4 x float> @_Z7testInsILj3ELj0EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj3ELj0EDv4_fET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 2
+; CHECK: xxinsertw 34, 0, 0
+; CHECK-BE-LABEL: _Z7testInsILj3ELj0EDv4_fET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 3
+; CHECK-BE: xxinsertw 34, 0, 12
+  %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+  ret <4 x float> %vecins
+}
+
+define <4 x float> @_Z7testInsILj3ELj1EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj3ELj1EDv4_fET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 1
+; CHECK: xxinsertw 34, 0, 0
+; CHECK-BE-LABEL: _Z7testInsILj3ELj1EDv4_fET1_S1_S1_
+; CHECK-BE-NOT: xxsldwi
+; CHECK-BE: xxinsertw 34, 35, 12
+  %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+  ret <4 x float> %vecins
+}
+
+define <4 x float> @_Z7testInsILj3ELj2EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj3ELj2EDv4_fET1_S1_S1_
+; CHECK-NOT: xxsldwi
+; CHECK: xxinsertw 34, 35, 0
+; CHECK-BE-LABEL: _Z7testInsILj3ELj2EDv4_fET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 1
+; CHECK-BE: xxinsertw 34, 0, 12
+  %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+  ret <4 x float> %vecins
+}
+
+define <4 x float> @_Z7testInsILj3ELj3EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj3ELj3EDv4_fET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 3
+; CHECK: xxinsertw 34, 0, 0
+; CHECK-BE-LABEL: _Z7testInsILj3ELj3EDv4_fET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 2
+; CHECK-BE: xxinsertw 34, 0, 12
+  %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+  ret <4 x float> %vecins
+}
+
+define <4 x i32> @_Z7testInsILj0ELj0EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj0ELj0EDv4_jET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 2
+; CHECK: xxinsertw 34, 0, 12
+; CHECK-BE-LABEL: _Z7testInsILj0ELj0EDv4_jET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 3
+; CHECK-BE: xxinsertw 34, 0, 0
+  %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
+  ret <4 x i32> %vecins
+}
+
+define <4 x i32> @_Z7testInsILj0ELj1EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj0ELj1EDv4_jET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 1
+; CHECK: xxinsertw 34, 0, 12
+; CHECK-BE-LABEL: _Z7testInsILj0ELj1EDv4_jET1_S1_S1_
+; CHECK-BE-NOT: xxsldwi
+; CHECK-BE: xxinsertw 34, 35, 0
+  %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 5, i32 1, i32 2, i32 3>
+  ret <4 x i32> %vecins
+}
+
+define <4 x i32> @_Z7testInsILj0ELj2EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj0ELj2EDv4_jET1_S1_S1_
+; CHECK-NOT: xxsldwi
+; CHECK: xxinsertw 34, 35, 12
+; CHECK-BE-LABEL: _Z7testInsILj0ELj2EDv4_jET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 1
+; CHECK-BE: xxinsertw 34, 0, 0
+  %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 6, i32 1, i32 2, i32 3>
+  ret <4 x i32> %vecins
+}
+
+define <4 x i32> @_Z7testInsILj0ELj3EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj0ELj3EDv4_jET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 3
+; CHECK: xxinsertw 34, 0, 12
+; CHECK-BE-LABEL: _Z7testInsILj0ELj3EDv4_jET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 2
+; CHECK-BE: xxinsertw 34, 0, 0
+  %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 7, i32 1, i32 2, i32 3>
+  ret <4 x i32> %vecins
+}
+
+define <4 x i32> @_Z7testInsILj1ELj0EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj1ELj0EDv4_jET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 2
+; CHECK: xxinsertw 34, 0, 8
+; CHECK-BE-LABEL: _Z7testInsILj1ELj0EDv4_jET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 3
+; CHECK-BE: xxinsertw 34, 0, 4
+  %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 3>
+  ret <4 x i32> %vecins
+}
+
+define <4 x i32> @_Z7testInsILj1ELj1EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj1ELj1EDv4_jET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 1
+; CHECK: xxinsertw 34, 0, 8
+; CHECK-BE-LABEL: _Z7testInsILj1ELj1EDv4_jET1_S1_S1_
+; CHECK-BE-NOT: xxsldwi
+; CHECK-BE: xxinsertw 34, 35, 4
+  %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
+  ret <4 x i32> %vecins
+}
+
+define <4 x i32> @_Z7testInsILj1ELj2EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj1ELj2EDv4_jET1_S1_S1_
+; CHECK-NOT: xxsldwi
+; CHECK: xxinsertw 34, 35, 8
+; CHECK-BE-LABEL: _Z7testInsILj1ELj2EDv4_jET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 1
+; CHECK-BE: xxinsertw 34, 0, 4
+  %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 6, i32 2, i32 3>
+  ret <4 x i32> %vecins
+}
+
+define <4 x i32> @_Z7testInsILj1ELj3EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj1ELj3EDv4_jET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 3
+; CHECK: xxinsertw 34, 0, 8
+; CHECK-BE-LABEL: _Z7testInsILj1ELj3EDv4_jET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 2
+; CHECK-BE: xxinsertw 34, 0, 4
+  %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 7, i32 2, i32 3>
+  ret <4 x i32> %vecins
+}
+
+define <4 x i32> @_Z7testInsILj2ELj0EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj2ELj0EDv4_jET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 2
+; CHECK: xxinsertw 34, 0, 4
+; CHECK-BE-LABEL: _Z7testInsILj2ELj0EDv4_jET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 3
+; CHECK-BE: xxinsertw 34, 0, 8
+  %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 3>
+  ret <4 x i32> %vecins
+}
+
+define <4 x i32> @_Z7testInsILj2ELj1EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj2ELj1EDv4_jET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 1
+; CHECK: xxinsertw 34, 0, 4
+; CHECK-BE-LABEL: _Z7testInsILj2ELj1EDv4_jET1_S1_S1_
+; CHECK-BE-NOT: xxsldwi
+; CHECK-BE: xxinsertw 34, 35, 8
+  %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 5, i32 3>
+  ret <4 x i32> %vecins
+}
+
+define <4 x i32> @_Z7testInsILj2ELj2EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj2ELj2EDv4_jET1_S1_S1_
+; CHECK-NOT: xxsldwi
+; CHECK: xxinsertw 34, 35, 4
+; CHECK-BE-LABEL: _Z7testInsILj2ELj2EDv4_jET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 1
+; CHECK-BE: xxinsertw 34, 0, 8
+  %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
+  ret <4 x i32> %vecins
+}
+
+define <4 x i32> @_Z7testInsILj2ELj3EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj2ELj3EDv4_jET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 3
+; CHECK: xxinsertw 34, 0, 4
+; CHECK-BE-LABEL: _Z7testInsILj2ELj3EDv4_jET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 2
+; CHECK-BE: xxinsertw 34, 0, 8
+  %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 7, i32 3>
+  ret <4 x i32> %vecins
+}
+
+define <4 x i32> @_Z7testInsILj3ELj0EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj3ELj0EDv4_jET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 2
+; CHECK: xxinsertw 34, 0, 0
+; CHECK-BE-LABEL: _Z7testInsILj3ELj0EDv4_jET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 3
+; CHECK-BE: xxinsertw 34, 0, 12
+  %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+  ret <4 x i32> %vecins
+}
+
+define <4 x i32> @_Z7testInsILj3ELj1EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj3ELj1EDv4_jET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 1
+; CHECK: xxinsertw 34, 0, 0
+; CHECK-BE-LABEL: _Z7testInsILj3ELj1EDv4_jET1_S1_S1_
+; CHECK-BE-NOT: xxsldwi
+; CHECK-BE: xxinsertw 34, 35, 12
+  %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+  ret <4 x i32> %vecins
+}
+
+define <4 x i32> @_Z7testInsILj3ELj2EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj3ELj2EDv4_jET1_S1_S1_
+; CHECK-NOT: xxsldwi
+; CHECK: xxinsertw 34, 35, 0
+; CHECK-BE-LABEL: _Z7testInsILj3ELj2EDv4_jET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 1
+; CHECK-BE: xxinsertw 34, 0, 12
+  %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+  ret <4 x i32> %vecins
+}
+
+define <4 x i32> @_Z7testInsILj3ELj3EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj3ELj3EDv4_jET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 3
+; CHECK: xxinsertw 34, 0, 0
+; CHECK-BE-LABEL: _Z7testInsILj3ELj3EDv4_jET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 2
+; CHECK-BE: xxinsertw 34, 0, 12
+  %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+  ret <4 x i32> %vecins
+}
+
+define float @_Z13testUiToFpExtILj0EEfDv4_j(<4 x i32> %a) {
+entry:
+; CHECK-LABEL: _Z13testUiToFpExtILj0EEfDv4_j
+; CHECK: xxextractuw 0, 34, 12
+; CHECK: xscvuxdsp 1, 0
+; CHECK-BE-LABEL: _Z13testUiToFpExtILj0EEfDv4_j
+; CHECK-BE: xxextractuw 0, 34, 0
+; CHECK-BE: xscvuxdsp 1, 0
+  %vecext = extractelement <4 x i32> %a, i32 0
+  %conv = uitofp i32 %vecext to float
+  ret float %conv
+}
+
+define float @_Z13testUiToFpExtILj1EEfDv4_j(<4 x i32> %a) {
+entry:
+; CHECK-LABEL: _Z13testUiToFpExtILj1EEfDv4_j
+; CHECK: xxextractuw 0, 34, 8
+; CHECK: xscvuxdsp 1, 0
+; CHECK-BE-LABEL: _Z13testUiToFpExtILj1EEfDv4_j
+; CHECK-BE: xxextractuw 0, 34, 4
+; CHECK-BE: xscvuxdsp 1, 0
+  %vecext = extractelement <4 x i32> %a, i32 1
+  %conv = uitofp i32 %vecext to float
+  ret float %conv
+}
+
+define float @_Z13testUiToFpExtILj2EEfDv4_j(<4 x i32> %a) {
+entry:
+; CHECK-LABEL: _Z13testUiToFpExtILj2EEfDv4_j
+; CHECK: xxextractuw 0, 34, 4
+; CHECK: xscvuxdsp 1, 0
+; CHECK-BE-LABEL: _Z13testUiToFpExtILj2EEfDv4_j
+; CHECK-BE: xxextractuw 0, 34, 8
+; CHECK-BE: xscvuxdsp 1, 0
+  %vecext = extractelement <4 x i32> %a, i32 2
+  %conv = uitofp i32 %vecext to float
+  ret float %conv
+}
+
+define float @_Z13testUiToFpExtILj3EEfDv4_j(<4 x i32> %a) {
+entry:
+; CHECK-LABEL: _Z13testUiToFpExtILj3EEfDv4_j
+; CHECK: xxextractuw 0, 34, 0
+; CHECK: xscvuxdsp 1, 0
+; CHECK-BE-LABEL: _Z13testUiToFpExtILj3EEfDv4_j
+; CHECK-BE: xxextractuw 0, 34, 12
+; CHECK-BE: xscvuxdsp 1, 0
+  %vecext = extractelement <4 x i32> %a, i32 3
+  %conv = uitofp i32 %vecext to float
+  ret float %conv
+}
+
+define <4 x float> @_Z10testInsEltILj0EDv4_ffET0_S1_T1_(<4 x float> %a, float %b) {
+entry:
+; CHECK-LABEL: _Z10testInsEltILj0EDv4_ffET0_S1_T1_
+; CHECK: xscvdpspn 0, 1
+; CHECK: xxsldwi 0, 0, 0, 3
+; CHECK: xxinsertw 34, 0, 12
+; CHECK-BE-LABEL: _Z10testInsEltILj0EDv4_ffET0_S1_T1_
+; CHECK-BE: xscvdpspn 0, 1
+; CHECK-BE: xxsldwi 0, 0, 0, 3
+; CHECK-BE: xxinsertw 34, 0, 0
+  %vecins = insertelement <4 x float> %a, float %b, i32 0
+  ret <4 x float> %vecins
+}
+
+define <4 x float> @_Z10testInsEltILj1EDv4_ffET0_S1_T1_(<4 x float> %a, float %b) {
+entry:
+; CHECK-LABEL: _Z10testInsEltILj1EDv4_ffET0_S1_T1_
+; CHECK: xscvdpspn 0, 1
+; CHECK: xxsldwi 0, 0, 0, 3
+; CHECK: xxinsertw 34, 0, 8
+; CHECK-BE-LABEL: _Z10testInsEltILj1EDv4_ffET0_S1_T1_
+; CHECK-BE: xscvdpspn 0, 1
+; CHECK-BE: xxsldwi 0, 0, 0, 3
+; CHECK-BE: xxinsertw 34, 0, 4
+  %vecins = insertelement <4 x float> %a, float %b, i32 1
+  ret <4 x float> %vecins
+}
+
+define <4 x float> @_Z10testInsEltILj2EDv4_ffET0_S1_T1_(<4 x float> %a, float %b) {
+entry:
+; CHECK-LABEL: _Z10testInsEltILj2EDv4_ffET0_S1_T1_
+; CHECK: xscvdpspn 0, 1
+; CHECK: xxsldwi 0, 0, 0, 3
+; CHECK: xxinsertw 34, 0, 4
+; CHECK-BE-LABEL: _Z10testInsEltILj2EDv4_ffET0_S1_T1_
+; CHECK-BE: xscvdpspn 0, 1
+; CHECK-BE: xxsldwi 0, 0, 0, 3
+; CHECK-BE: xxinsertw 34, 0, 8
+  %vecins = insertelement <4 x float> %a, float %b, i32 2
+  ret <4 x float> %vecins
+}
+
+define <4 x float> @_Z10testInsEltILj3EDv4_ffET0_S1_T1_(<4 x float> %a, float %b) {
+entry:
+; CHECK-LABEL: _Z10testInsEltILj3EDv4_ffET0_S1_T1_
+; CHECK: xscvdpspn 0, 1
+; CHECK: xxsldwi 0, 0, 0, 3
+; CHECK: xxinsertw 34, 0, 0
+; CHECK-BE-LABEL: _Z10testInsEltILj3EDv4_ffET0_S1_T1_
+; CHECK-BE: xscvdpspn 0, 1
+; CHECK-BE: xxsldwi 0, 0, 0, 3
+; CHECK-BE: xxinsertw 34, 0, 12
+  %vecins = insertelement <4 x float> %a, float %b, i32 3
+  ret <4 x float> %vecins
+}
+
+define <4 x i32> @_Z10testInsEltILj0EDv4_jjET0_S1_T1_(<4 x i32> %a, i32 zeroext %b) {
+entry:
+; CHECK-LABEL: _Z10testInsEltILj0EDv4_jjET0_S1_T1_
+; CHECK: mtvsrwz 0, 5
+; CHECK: xxinsertw 34, 0, 12
+; CHECK-BE-LABEL: _Z10testInsEltILj0EDv4_jjET0_S1_T1_
+; CHECK-BE: mtvsrwz 0, 5
+; CHECK-BE: xxinsertw 34, 0, 0
+  %vecins = insertelement <4 x i32> %a, i32 %b, i32 0
+  ret <4 x i32> %vecins
+}
+
+define <4 x i32> @_Z10testInsEltILj1EDv4_jjET0_S1_T1_(<4 x i32> %a, i32 zeroext %b) {
+entry:
+; CHECK-LABEL: _Z10testInsEltILj1EDv4_jjET0_S1_T1_
+; CHECK: mtvsrwz 0, 5
+; CHECK: xxinsertw 34, 0, 8
+; CHECK-BE-LABEL: _Z10testInsEltILj1EDv4_jjET0_S1_T1_
+; CHECK-BE: mtvsrwz 0, 5
+; CHECK-BE: xxinsertw 34, 0, 4
+  %vecins = insertelement <4 x i32> %a, i32 %b, i32 1
+  ret <4 x i32> %vecins
+}
+
+define <4 x i32> @_Z10testInsEltILj2EDv4_jjET0_S1_T1_(<4 x i32> %a, i32 zeroext %b) {
+entry:
+; CHECK-LABEL: _Z10testInsEltILj2EDv4_jjET0_S1_T1_
+; CHECK: mtvsrwz 0, 5
+; CHECK: xxinsertw 34, 0, 4
+; CHECK-BE-LABEL: _Z10testInsEltILj2EDv4_jjET0_S1_T1_
+; CHECK-BE: mtvsrwz 0, 5
+; CHECK-BE: xxinsertw 34, 0, 8
+  %vecins = insertelement <4 x i32> %a, i32 %b, i32 2
+  ret <4 x i32> %vecins
+}
+
+define <4 x i32> @_Z10testInsEltILj3EDv4_jjET0_S1_T1_(<4 x i32> %a, i32 zeroext %b) {
+entry:
+; CHECK-LABEL: _Z10testInsEltILj3EDv4_jjET0_S1_T1_
+; CHECK: mtvsrwz 0, 5
+; CHECK: xxinsertw 34, 0, 0
+; CHECK-BE-LABEL: _Z10testInsEltILj3EDv4_jjET0_S1_T1_
+; CHECK-BE: mtvsrwz 0, 5
+; CHECK-BE: xxinsertw 34, 0, 12
+  %vecins = insertelement <4 x i32> %a, i32 %b, i32 3
+  ret <4 x i32> %vecins
+}
+
+define <4 x float> @_Z7testInsILj0ELj0EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj0ELj0EDv4_fET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 2
+; CHECK: xxinsertw 34, 0, 12
+; CHECK-BE-LABEL: _Z7testInsILj0ELj0EDv4_fET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 3
+; CHECK-BE: xxinsertw 34, 0, 0
+  %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
+  ret <4 x float> %vecins
+}
+
+define <4 x float> @_Z7testInsILj0ELj1EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj0ELj1EDv4_fET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 1
+; CHECK: xxinsertw 34, 0, 12
+; CHECK-BE-LABEL: _Z7testInsILj0ELj1EDv4_fET1_S1_S1_
+; CHECK-BE-NOT: xxsldwi
+; CHECK-BE: xxinsertw 34, 35, 0
+  %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> <i32 1, i32 5, i32 6, i32 7>
+  ret <4 x float> %vecins
+}
+
+define <4 x float> @_Z7testInsILj0ELj2EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj0ELj2EDv4_fET1_S1_S1_
+; CHECK-NOT: xxsldwi
+; CHECK: xxinsertw 34, 35, 12
+; CHECK-BE-LABEL: _Z7testInsILj0ELj2EDv4_fET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 1
+; CHECK-BE: xxinsertw 34, 0, 0
+  %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> <i32 2, i32 5, i32 6, i32 7>
+  ret <4 x float> %vecins
+}
+
+define <4 x float> @_Z7testInsILj0ELj3EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj0ELj3EDv4_fET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 3
+; CHECK: xxinsertw 34, 0, 12
+; CHECK-BE-LABEL: _Z7testInsILj0ELj3EDv4_fET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 2
+; CHECK-BE: xxinsertw 34, 0, 0
+  %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> <i32 3, i32 5, i32 6, i32 7>
+  ret <4 x float> %vecins
+}
+
+define <4 x float> @_Z7testInsILj1ELj0EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj1ELj0EDv4_fET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 2
+; CHECK: xxinsertw 34, 0, 8
+; CHECK-BE-LABEL: _Z7testInsILj1ELj0EDv4_fET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 3
+; CHECK-BE: xxinsertw 34, 0, 4
+  %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> <i32 4, i32 0, i32 6, i32 7>
+  ret <4 x float> %vecins
+}
+
+define <4 x float> @_Z7testInsILj1ELj1EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj1ELj1EDv4_fET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 1
+; CHECK: xxinsertw 34, 0, 8
+; CHECK-BE-LABEL: _Z7testInsILj1ELj1EDv4_fET1_S1_S1_
+; CHECK-BE-NOT: xxsldwi
+; CHECK-BE: xxinsertw 34, 35, 4
+  %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> <i32 4, i32 1, i32 6, i32 7>
+  ret <4 x float> %vecins
+}
+
+define <4 x float> @_Z7testInsILj1ELj2EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj1ELj2EDv4_fET1_S1_S1_
+; CHECK-NOT: xxsldwi
+; CHECK: xxinsertw 34, 35, 8
+; CHECK-BE-LABEL: _Z7testInsILj1ELj2EDv4_fET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 1
+; CHECK-BE: xxinsertw 34, 0, 4
+  %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> <i32 4, i32 2, i32 6, i32 7>
+  ret <4 x float> %vecins
+}
+
+define <4 x float> @_Z7testInsILj1ELj3EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj1ELj3EDv4_fET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 3
+; CHECK: xxinsertw 34, 0, 8
+; CHECK-BE-LABEL: _Z7testInsILj1ELj3EDv4_fET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 2
+; CHECK-BE: xxinsertw 34, 0, 4
+  %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> <i32 4, i32 3, i32 6, i32 7>
+  ret <4 x float> %vecins
+}
+
+define <4 x float> @_Z7testInsILj2ELj0EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj2ELj0EDv4_fET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 2
+; CHECK: xxinsertw 34, 0, 4
+; CHECK-BE-LABEL: _Z7testInsILj2ELj0EDv4_fET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 3
+; CHECK-BE: xxinsertw 34, 0, 8
+  %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> <i32 4, i32 5, i32 0, i32 7>
+  ret <4 x float> %vecins
+}
+
+define <4 x float> @_Z7testInsILj2ELj1EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj2ELj1EDv4_fET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 1
+; CHECK: xxinsertw 34, 0, 4
+; CHECK-BE-LABEL: _Z7testInsILj2ELj1EDv4_fET1_S1_S1_
+; CHECK-BE-NOT: xxsldwi
+; CHECK-BE: xxinsertw 34, 35, 8
+  %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> <i32 4, i32 5, i32 1, i32 7>
+  ret <4 x float> %vecins
+}
+
+define <4 x float> @_Z7testInsILj2ELj2EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj2ELj2EDv4_fET1_S1_S1_
+; CHECK-NOT: xxsldwi
+; CHECK: xxinsertw 34, 35, 4
+; CHECK-BE-LABEL: _Z7testInsILj2ELj2EDv4_fET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 1
+; CHECK-BE: xxinsertw 34, 0, 8
+  %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> <i32 4, i32 5, i32 2, i32 7>
+  ret <4 x float> %vecins
+}
+
+define <4 x float> @_Z7testInsILj2ELj3EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj2ELj3EDv4_fET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 3
+; CHECK: xxinsertw 34, 0, 4
+; CHECK-BE-LABEL: _Z7testInsILj2ELj3EDv4_fET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 2
+; CHECK-BE: xxinsertw 34, 0, 8
+  %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> <i32 4, i32 5, i32 3, i32 7>
+  ret <4 x float> %vecins
+}
+
+define <4 x float> @_Z7testInsILj3ELj0EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj3ELj0EDv4_fET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 2
+; CHECK: xxinsertw 34, 0, 0
+; CHECK-BE-LABEL: _Z7testInsILj3ELj0EDv4_fET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 3
+; CHECK-BE: xxinsertw 34, 0, 12
+  %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 0>
+  ret <4 x float> %vecins
+}
+
+define <4 x float> @_Z7testInsILj3ELj1EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj3ELj1EDv4_fET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 1
+; CHECK: xxinsertw 34, 0, 0
+; CHECK-BE-LABEL: _Z7testInsILj3ELj1EDv4_fET1_S1_S1_
+; CHECK-BE-NOT: xxsldwi
+; CHECK-BE: xxinsertw 34, 35, 12
+  %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 1>
+  ret <4 x float> %vecins
+}
+
+define <4 x float> @_Z7testInsILj3ELj2EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj3ELj2EDv4_fET1_S1_S1_
+; CHECK-NOT: xxsldwi
+; CHECK: xxinsertw 34, 35, 0
+; CHECK-BE-LABEL: _Z7testInsILj3ELj2EDv4_fET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 1
+; CHECK-BE: xxinsertw 34, 0, 12
+  %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 2>
+  ret <4 x float> %vecins
+}
+
+define <4 x float> @_Z7testInsILj3ELj3EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj3ELj3EDv4_fET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 3
+; CHECK: xxinsertw 34, 0, 0
+; CHECK-BE-LABEL: _Z7testInsILj3ELj3EDv4_fET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 2
+; CHECK-BE: xxinsertw 34, 0, 12
+  %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 3>
+  ret <4 x float> %vecins
+}
+
+define <4 x i32> @_Z7testInsILj0ELj0EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj0ELj0EDv4_jET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 2
+; CHECK: xxinsertw 34, 0, 12
+; CHECK-BE-LABEL: _Z7testInsILj0ELj0EDv4_jET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 3
+; CHECK-BE: xxinsertw 34, 0, 0
+  %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
+  ret <4 x i32> %vecins
+}
+
+define <4 x i32> @_Z7testInsILj0ELj1EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj0ELj1EDv4_jET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 1
+; CHECK: xxinsertw 34, 0, 12
+; CHECK-BE-LABEL: _Z7testInsILj0ELj1EDv4_jET1_S1_S1_
+; CHECK-BE-NOT: xxsldwi
+; CHECK-BE: xxinsertw 34, 35, 0
+  %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> <i32 1, i32 5, i32 6, i32 7>
+  ret <4 x i32> %vecins
+}
+
+define <4 x i32> @_Z7testInsILj0ELj2EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj0ELj2EDv4_jET1_S1_S1_
+; CHECK-NOT: xxsldwi
+; CHECK: xxinsertw 34, 35, 12
+; CHECK-BE-LABEL: _Z7testInsILj0ELj2EDv4_jET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 1
+; CHECK-BE: xxinsertw 34, 0, 0
+  %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> <i32 2, i32 5, i32 6, i32 7>
+  ret <4 x i32> %vecins
+}
+
+define <4 x i32> @_Z7testInsILj0ELj3EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj0ELj3EDv4_jET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 3
+; CHECK: xxinsertw 34, 0, 12
+; CHECK-BE-LABEL: _Z7testInsILj0ELj3EDv4_jET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 2
+; CHECK-BE: xxinsertw 34, 0, 0
+  %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> <i32 3, i32 5, i32 6, i32 7>
+  ret <4 x i32> %vecins
+}
+
+define <4 x i32> @_Z7testInsILj1ELj0EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj1ELj0EDv4_jET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 2
+; CHECK: xxinsertw 34, 0, 8
+; CHECK-BE-LABEL: _Z7testInsILj1ELj0EDv4_jET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 3
+; CHECK-BE: xxinsertw 34, 0, 4
+  %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> <i32 4, i32 0, i32 6, i32 7>
+  ret <4 x i32> %vecins
+}
+
+define <4 x i32> @_Z7testInsILj1ELj1EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj1ELj1EDv4_jET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 1
+; CHECK: xxinsertw 34, 0, 8
+; CHECK-BE-LABEL: _Z7testInsILj1ELj1EDv4_jET1_S1_S1_
+; CHECK-BE-NOT: xxsldwi
+; CHECK-BE: xxinsertw 34, 35, 4
+  %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> <i32 4, i32 1, i32 6, i32 7>
+  ret <4 x i32> %vecins
+}
+
+define <4 x i32> @_Z7testInsILj1ELj2EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj1ELj2EDv4_jET1_S1_S1_
+; CHECK-NOT: xxsldwi
+; CHECK: xxinsertw 34, 35, 8
+; CHECK-BE-LABEL: _Z7testInsILj1ELj2EDv4_jET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 1
+; CHECK-BE: xxinsertw 34, 0, 4
+  %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> <i32 4, i32 2, i32 6, i32 7>
+  ret <4 x i32> %vecins
+}
+
+define <4 x i32> @_Z7testInsILj1ELj3EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj1ELj3EDv4_jET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 3
+; CHECK: xxinsertw 34, 0, 8
+; CHECK-BE-LABEL: _Z7testInsILj1ELj3EDv4_jET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 2
+; CHECK-BE: xxinsertw 34, 0, 4
+  %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> <i32 4, i32 3, i32 6, i32 7>
+  ret <4 x i32> %vecins
+}
+
+define <4 x i32> @_Z7testInsILj2ELj0EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj2ELj0EDv4_jET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 2
+; CHECK: xxinsertw 34, 0, 4
+; CHECK-BE-LABEL: _Z7testInsILj2ELj0EDv4_jET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 3
+; CHECK-BE: xxinsertw 34, 0, 8
+  %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> <i32 4, i32 5, i32 0, i32 7>
+  ret <4 x i32> %vecins
+}
+
+define <4 x i32> @_Z7testInsILj2ELj1EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj2ELj1EDv4_jET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 1
+; CHECK: xxinsertw 34, 0, 4
+; CHECK-BE-LABEL: _Z7testInsILj2ELj1EDv4_jET1_S1_S1_
+; CHECK-BE-NOT: xxsldwi
+; CHECK-BE: xxinsertw 34, 35, 8
+  %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> <i32 4, i32 5, i32 1, i32 7>
+  ret <4 x i32> %vecins
+}
+
+define <4 x i32> @_Z7testInsILj2ELj2EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj2ELj2EDv4_jET1_S1_S1_
+; CHECK-NOT: xxsldwi
+; CHECK: xxinsertw 34, 35, 4
+; CHECK-BE-LABEL: _Z7testInsILj2ELj2EDv4_jET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 1
+; CHECK-BE: xxinsertw 34, 0, 8
+  %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> <i32 4, i32 5, i32 2, i32 7>
+  ret <4 x i32> %vecins
+}
+
+define <4 x i32> @_Z7testInsILj2ELj3EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj2ELj3EDv4_jET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 3
+; CHECK: xxinsertw 34, 0, 4
+; CHECK-BE-LABEL: _Z7testInsILj2ELj3EDv4_jET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 2
+; CHECK-BE: xxinsertw 34, 0, 8
+  %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> <i32 4, i32 5, i32 3, i32 7>
+  ret <4 x i32> %vecins
+}
+
+define <4 x i32> @_Z7testInsILj3ELj0EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj3ELj0EDv4_jET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 2
+; CHECK: xxinsertw 34, 0, 0
+; CHECK-BE-LABEL: _Z7testInsILj3ELj0EDv4_jET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 3
+; CHECK-BE: xxinsertw 34, 0, 12
+  %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 0>
+  ret <4 x i32> %vecins
+}
+
+define <4 x i32> @_Z7testInsILj3ELj1EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj3ELj1EDv4_jET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 1
+; CHECK: xxinsertw 34, 0, 0
+; CHECK-BE-LABEL: _Z7testInsILj3ELj1EDv4_jET1_S1_S1_
+; CHECK-BE-NOT: xxsldwi
+; CHECK-BE: xxinsertw 34, 35, 12
+  %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 1>
+  ret <4 x i32> %vecins
+}
+
+define <4 x i32> @_Z7testInsILj3ELj2EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj3ELj2EDv4_jET1_S1_S1_
+; CHECK-NOT: xxsldwi
+; CHECK: xxinsertw 34, 35, 0
+; CHECK-BE-LABEL: _Z7testInsILj3ELj2EDv4_jET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 1
+; CHECK-BE: xxinsertw 34, 0, 12
+  %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 2>
+  ret <4 x i32> %vecins
+}
+
+define <4 x i32> @_Z7testInsILj3ELj3EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) {
+entry:
+; CHECK-LABEL: _Z7testInsILj3ELj3EDv4_jET1_S1_S1_
+; CHECK: xxsldwi 0, 35, 35, 3
+; CHECK: xxinsertw 34, 0, 0
+; CHECK-BE-LABEL: _Z7testInsILj3ELj3EDv4_jET1_S1_S1_
+; CHECK-BE: xxsldwi 0, 35, 35, 2
+; CHECK-BE: xxinsertw 34, 0, 12
+  %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 3>
+  ret <4 x i32> %vecins
+}
+define <4 x float> @testSameVecEl0BE(<4 x float> %a) {
+entry:
+; CHECK-BE-LABEL: testSameVecEl0BE
+; CHECK-BE: xxinsertw 34, 34, 0
+  %vecins = shufflevector <4 x float> %a, <4 x float> %a, <4 x i32> <i32 5, i32 1, i32 2, i32 3>
+  ret <4 x float> %vecins
+}
+define <4 x float> @testSameVecEl2BE(<4 x float> %a) {
+entry:
+; CHECK-BE-LABEL: testSameVecEl2BE
+; CHECK-BE: xxinsertw 34, 34, 8
+  %vecins = shufflevector <4 x float> %a, <4 x float> %a, <4 x i32> <i32 0, i32 1, i32 5, i32 3>
+  ret <4 x float> %vecins
+}
+define <4 x float> @testSameVecEl3BE(<4 x float> %a) {
+entry:
+; CHECK-BE-LABEL: testSameVecEl3BE
+; CHECK-BE: xxinsertw 34, 34, 12
+  %vecins = shufflevector <4 x float> %a, <4 x float> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+  ret <4 x float> %vecins
+}
+define <4 x float> @testSameVecEl0LE(<4 x float> %a) {
+entry:
+; CHECK-LABEL: testSameVecEl0LE
+; CHECK: xxinsertw 34, 34, 12
+  %vecins = shufflevector <4 x float> %a, <4 x float> %a, <4 x i32> <i32 6, i32 1, i32 2, i32 3>
+  ret <4 x float> %vecins
+}
+define <4 x float> @testSameVecEl1LE(<4 x float> %a) {
+entry:
+; CHECK-LABEL: testSameVecEl1LE
+; CHECK: xxinsertw 34, 34, 8
+  %vecins = shufflevector <4 x float> %a, <4 x float> %a, <4 x i32> <i32 0, i32 6, i32 2, i32 3>
+  ret <4 x float> %vecins
+}
+define <4 x float> @testSameVecEl3LE(<4 x float> %a) {
+entry:
+; CHECK-LABEL: testSameVecEl3LE
+; CHECK: xxinsertw 34, 34, 0
+  %vecins = shufflevector <4 x float> %a, <4 x float> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+  ret <4 x float> %vecins
+}




More information about the llvm-commits mailing list