[llvm] r337258 - [DAGCombiner] Call SimplifyDemandedVectorElts from EXTRACT_VECTOR_ELT

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 17 02:45:36 PDT 2018


Author: rksimon
Date: Tue Jul 17 02:45:35 2018
New Revision: 337258

URL: http://llvm.org/viewvc/llvm-project?rev=337258&view=rev
Log:
[DAGCombiner] Call SimplifyDemandedVectorElts from EXTRACT_VECTOR_ELT

If we are only extracting vector elements via EXTRACT_VECTOR_ELT(s) we may be able to use SimplifyDemandedVectorElts to avoid unnecessary vector ops.

Differential Revision: https://reviews.llvm.org/D49262

Modified:
    llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.cpp
    llvm/trunk/test/CodeGen/AMDGPU/indirect-addressing-si.ll
    llvm/trunk/test/CodeGen/ARM/func-argpassing-endian.ll
    llvm/trunk/test/CodeGen/Mips/cconv/vector.ll
    llvm/trunk/test/CodeGen/X86/dagcombine-cse.ll
    llvm/trunk/test/CodeGen/X86/extractelement-load.ll
    llvm/trunk/test/CodeGen/X86/known-bits-vector.ll
    llvm/trunk/test/CodeGen/X86/oddshuffles.ll
    llvm/trunk/test/CodeGen/X86/scalar_widen_div.ll
    llvm/trunk/test/CodeGen/X86/vec_shift7.ll

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=337258&r1=337257&r2=337258&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Tue Jul 17 02:45:35 2018
@@ -242,7 +242,8 @@ namespace {
     }
 
     bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded);
-    bool SimplifyDemandedVectorElts(SDValue Op, const APInt &Demanded);
+    bool SimplifyDemandedVectorElts(SDValue Op, const APInt &Demanded,
+                                    bool AssumeSingleUse = false);
 
     bool CombineToPreIndexedLoadStore(SDNode *N);
     bool CombineToPostIndexedLoadStore(SDNode *N);
@@ -1064,11 +1065,12 @@ bool DAGCombiner::SimplifyDemandedBits(S
 /// Check the specified vector node value to see if it can be simplified or
 /// if things it uses can be simplified as it only uses some of the elements.
 /// If so, return true.
-bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
-                                             const APInt &Demanded) {
+bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op, const APInt &Demanded,
+                                             bool AssumeSingleUse) {
   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
   APInt KnownUndef, KnownZero;
-  if (!TLI.SimplifyDemandedVectorElts(Op, Demanded, KnownUndef, KnownZero, TLO))
+  if (!TLI.SimplifyDemandedVectorElts(Op, Demanded, KnownUndef, KnownZero, TLO,
+                                      0, AssumeSingleUse))
     return false;
 
   // Revisit the node.
@@ -15014,6 +15016,23 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR
     }
   }
 
+  // If only EXTRACT_VECTOR_ELT nodes use the source vector we can
+  // simplify it based on the (valid) extraction indices.
+  if (llvm::all_of(InVec->uses(), [&](SDNode *Use) {
+        return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+               Use->getOperand(0) == InVec &&
+               isa<ConstantSDNode>(Use->getOperand(1));
+      })) {
+    APInt DemandedElts = APInt::getNullValue(VT.getVectorNumElements());
+    for (SDNode *Use : InVec->uses()) {
+      auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1));
+      if (CstElt->getAPIntValue().ult(VT.getVectorNumElements()))
+        DemandedElts.setBit(CstElt->getZExtValue());
+    }
+    if (SimplifyDemandedVectorElts(InVec, DemandedElts, true))
+      return SDValue(N, 0);
+  }
+
   bool BCNumEltsChanged = false;
   EVT ExtVT = VT.getVectorElementType();
   EVT LVT = ExtVT;

Modified: llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.cpp?rev=337258&r1=337257&r2=337258&view=diff
==============================================================================
--- llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.cpp Tue Jul 17 02:45:35 2018
@@ -3893,20 +3893,34 @@ static const Permute *matchDoublePermute
   return nullptr;
 }
 
-// Convert the mask of the given VECTOR_SHUFFLE into a byte-level mask,
+// Convert the mask of the given shuffle op into a byte-level mask,
 // as if it had type vNi8.
-static void getVPermMask(ShuffleVectorSDNode *VSN,
+static bool getVPermMask(SDValue ShuffleOp,
                          SmallVectorImpl<int> &Bytes) {
-  EVT VT = VSN->getValueType(0);
+  EVT VT = ShuffleOp.getValueType();
   unsigned NumElements = VT.getVectorNumElements();
   unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
-  Bytes.resize(NumElements * BytesPerElement, -1);
-  for (unsigned I = 0; I < NumElements; ++I) {
-    int Index = VSN->getMaskElt(I);
-    if (Index >= 0)
+
+  if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(ShuffleOp)) {
+    Bytes.resize(NumElements * BytesPerElement, -1);
+    for (unsigned I = 0; I < NumElements; ++I) {
+      int Index = VSN->getMaskElt(I);
+      if (Index >= 0)
+        for (unsigned J = 0; J < BytesPerElement; ++J)
+          Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
+    }
+    return true;
+  }
+  if (SystemZISD::SPLAT == ShuffleOp.getOpcode() &&
+      isa<ConstantSDNode>(ShuffleOp.getOperand(1))) {
+    unsigned Index = ShuffleOp.getConstantOperandVal(1);
+    Bytes.resize(NumElements * BytesPerElement, -1);
+    for (unsigned I = 0; I < NumElements; ++I)
       for (unsigned J = 0; J < BytesPerElement; ++J)
         Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
+    return true;
   }
+  return false;
 }
 
 // Bytes is a VPERM-like permute vector, except that -1 is used for
@@ -4075,7 +4089,8 @@ bool GeneralShuffle::add(SDValue Op, uns
       // See whether the bytes we need come from a contiguous part of one
       // operand.
       SmallVector<int, SystemZ::VectorBytes> OpBytes;
-      getVPermMask(cast<ShuffleVectorSDNode>(Op), OpBytes);
+      if (!getVPermMask(Op, OpBytes))
+        break;
       int NewByte;
       if (!getShuffleInput(OpBytes, Byte, BytesPerElement, NewByte))
         break;
@@ -5109,13 +5124,14 @@ SDValue SystemZTargetLowering::combineEx
     if (Opcode == ISD::BITCAST)
       // Look through bitcasts.
       Op = Op.getOperand(0);
-    else if (Opcode == ISD::VECTOR_SHUFFLE &&
+    else if ((Opcode == ISD::VECTOR_SHUFFLE || Opcode == SystemZISD::SPLAT) &&
              canTreatAsByteVector(Op.getValueType())) {
       // Get a VPERM-like permute mask and see whether the bytes covered
       // by the extracted element are a contiguous sequence from one
       // source operand.
       SmallVector<int, SystemZ::VectorBytes> Bytes;
-      getVPermMask(cast<ShuffleVectorSDNode>(Op), Bytes);
+      if (!getVPermMask(Op, Bytes))
+        break;
       int First;
       if (!getShuffleInput(Bytes, Index * BytesPerElement,
                            BytesPerElement, First))

Modified: llvm/trunk/test/CodeGen/AMDGPU/indirect-addressing-si.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/indirect-addressing-si.ll?rev=337258&r1=337257&r2=337258&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/indirect-addressing-si.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/indirect-addressing-si.ll Tue Jul 17 02:45:35 2018
@@ -480,38 +480,28 @@ bb7:
 
 ; GCN-LABEL: {{^}}multi_same_block:
 
-; GCN-DAG: v_mov_b32_e32 v[[VEC0_ELT0:[0-9]+]], 0x41880000
-; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41900000
-; GCN-DAG: v_mov_b32_e32 v[[VEC0_ELT2:[0-9]+]], 0x41980000
-; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41a00000
-; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41a80000
-; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41b00000
-; GCN-DAG: s_load_dword [[ARG:s[0-9]+]]
-; IDXMODE-DAG: s_add_i32 [[ARG_ADD:s[0-9]+]], [[ARG]], -16
-
-; MOVREL-DAG: s_add_i32 m0, [[ARG]], -16
-; MOVREL: v_movreld_b32_e32 v[[VEC0_ELT0]], 4.0
-; GCN-NOT: m0
+; GCN: s_load_dword [[ARG:s[0-9]+]]
 
-; IDXMODE: s_set_gpr_idx_on [[ARG_ADD]], dst
-; IDXMODE: v_mov_b32_e32 v[[VEC0_ELT0]], 4.0
+; MOVREL: v_mov_b32_e32 v{{[0-9]+}}, 0x41900000
+; MOVREL: v_mov_b32_e32 v{{[0-9]+}}, 0x41b0cccd
+; MOVREL: s_waitcnt
+; MOVREL: s_add_i32 m0, [[ARG]], -16
+; MOVREL: v_movreld_b32_e32 v{{[0-9]+}}, 4.0
+; MOVREL: v_movreld_b32_e32 v{{[0-9]+}}, -4.0
+; MOVREL: s_mov_b32 m0, -1
+
+
+; IDXMODE: v_mov_b32_e32 v{{[0-9]+}}, 0x41900000
+; IDXMODE: s_waitcnt
+; IDXMODE: s_add_i32 [[ARG]], [[ARG]], -16
+; IDXMODE: s_set_gpr_idx_on [[ARG]], dst
+; IDXMODE: v_mov_b32_e32 v{{[0-9]+}}, 4.0
 ; IDXMODE: s_set_gpr_idx_off
-
-; GCN: v_mov_b32_e32 v[[VEC0_ELT2]], 0x4188cccd
-; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x4190cccd
-; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x4198cccd
-; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41a0cccd
-; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41a8cccd
-; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41b0cccd
-
-; MOVREL: v_movreld_b32_e32 v[[VEC0_ELT2]], -4.0
-
-; IDXMODE: s_set_gpr_idx_on [[ARG_ADD]], dst
-; IDXMODE: v_mov_b32_e32 v[[VEC0_ELT2]], -4.0
+; IDXMODE: v_mov_b32_e32 v{{[0-9]+}}, 0x41b0cccd
+; IDXMODE: s_set_gpr_idx_on [[ARG]], dst
+; IDXMODE: v_mov_b32_e32 v{{[0-9]+}}, -4.0
 ; IDXMODE: s_set_gpr_idx_off
 
-; PREGFX9: s_mov_b32 m0, -1
-; GFX9-NOT: s_mov_b32 m0
 ; GCN: ds_write_b32
 ; GCN: ds_write_b32
 ; GCN: s_endpgm

Modified: llvm/trunk/test/CodeGen/ARM/func-argpassing-endian.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/func-argpassing-endian.ll?rev=337258&r1=337257&r2=337258&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/func-argpassing-endian.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/func-argpassing-endian.ll Tue Jul 17 02:45:35 2018
@@ -38,7 +38,6 @@ define void @arg_double( double %val ) {
 define void @arg_v4i32(<4 x i32> %vec ) {
 ; CHECK-LE-LABEL: arg_v4i32:
 ; CHECK-LE:       @ %bb.0:
-; CHECK-LE-NEXT:    vmov d17, r2, r3
 ; CHECK-LE-NEXT:    vmov d16, r0, r1
 ; CHECK-LE-NEXT:    movw r0, :lower16:var32
 ; CHECK-LE-NEXT:    movt r0, :upper16:var32
@@ -47,7 +46,6 @@ define void @arg_v4i32(<4 x i32> %vec )
 ;
 ; CHECK-BE-LABEL: arg_v4i32:
 ; CHECK-BE:       @ %bb.0:
-; CHECK-BE-NEXT:    vmov d17, r3, r2
 ; CHECK-BE-NEXT:    vmov d16, r1, r0
 ; CHECK-BE-NEXT:    movw r0, :lower16:var32
 ; CHECK-BE-NEXT:    movt r0, :upper16:var32

Modified: llvm/trunk/test/CodeGen/Mips/cconv/vector.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Mips/cconv/vector.ll?rev=337258&r1=337257&r2=337258&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Mips/cconv/vector.ll (original)
+++ llvm/trunk/test/CodeGen/Mips/cconv/vector.ll Tue Jul 17 02:45:35 2018
@@ -89,61 +89,37 @@ define <2 x i8> @i8_2(<2 x i8> %a, <2 x
 ;
 ; MIPS64R5EB-LABEL: i8_2:
 ; MIPS64R5EB:       # %bb.0:
-; MIPS64R5EB-NEXT:    daddiu $sp, $sp, -64
-; MIPS64R5EB-NEXT:    .cfi_def_cfa_offset 64
-; MIPS64R5EB-NEXT:    sd $4, 56($sp)
-; MIPS64R5EB-NEXT:    ldi.b $w0, 0
-; MIPS64R5EB-NEXT:    lbu $1, 57($sp)
-; MIPS64R5EB-NEXT:    lbu $2, 56($sp)
-; MIPS64R5EB-NEXT:    move.v $w1, $w0
-; MIPS64R5EB-NEXT:    insert.h $w1[0], $2
-; MIPS64R5EB-NEXT:    insert.h $w1[1], $1
-; MIPS64R5EB-NEXT:    lbu $1, 58($sp)
-; MIPS64R5EB-NEXT:    insert.h $w1[2], $1
-; MIPS64R5EB-NEXT:    lbu $1, 59($sp)
-; MIPS64R5EB-NEXT:    insert.h $w1[3], $1
-; MIPS64R5EB-NEXT:    lbu $1, 60($sp)
-; MIPS64R5EB-NEXT:    insert.h $w1[4], $1
-; MIPS64R5EB-NEXT:    lbu $1, 61($sp)
-; MIPS64R5EB-NEXT:    insert.h $w1[5], $1
-; MIPS64R5EB-NEXT:    lbu $1, 63($sp)
-; MIPS64R5EB-NEXT:    lbu $2, 62($sp)
-; MIPS64R5EB-NEXT:    insert.h $w1[6], $2
-; MIPS64R5EB-NEXT:    insert.h $w1[7], $1
-; MIPS64R5EB-NEXT:    copy_s.h $1, $w1[0]
-; MIPS64R5EB-NEXT:    copy_s.h $2, $w1[1]
-; MIPS64R5EB-NEXT:    sd $5, 48($sp)
-; MIPS64R5EB-NEXT:    lbu $3, 48($sp)
-; MIPS64R5EB-NEXT:    insert.h $w0[0], $3
-; MIPS64R5EB-NEXT:    lbu $3, 49($sp)
-; MIPS64R5EB-NEXT:    insert.h $w0[1], $3
-; MIPS64R5EB-NEXT:    lbu $3, 50($sp)
-; MIPS64R5EB-NEXT:    insert.h $w0[2], $3
-; MIPS64R5EB-NEXT:    lbu $3, 51($sp)
-; MIPS64R5EB-NEXT:    insert.h $w0[3], $3
-; MIPS64R5EB-NEXT:    lbu $3, 52($sp)
-; MIPS64R5EB-NEXT:    insert.h $w0[4], $3
-; MIPS64R5EB-NEXT:    lbu $3, 53($sp)
-; MIPS64R5EB-NEXT:    insert.h $w0[5], $3
-; MIPS64R5EB-NEXT:    lbu $3, 55($sp)
-; MIPS64R5EB-NEXT:    lbu $4, 54($sp)
-; MIPS64R5EB-NEXT:    insert.h $w0[6], $4
-; MIPS64R5EB-NEXT:    insert.h $w0[7], $3
+; MIPS64R5EB-NEXT:    daddiu $sp, $sp, -96
+; MIPS64R5EB-NEXT:    .cfi_def_cfa_offset 96
+; MIPS64R5EB-NEXT:    sd $4, 88($sp)
+; MIPS64R5EB-NEXT:    lbu $1, 89($sp)
+; MIPS64R5EB-NEXT:    sh $1, 2($sp)
+; MIPS64R5EB-NEXT:    lbu $1, 88($sp)
+; MIPS64R5EB-NEXT:    sh $1, 0($sp)
+; MIPS64R5EB-NEXT:    ld.h $w0, 0($sp)
+; MIPS64R5EB-NEXT:    copy_s.h $1, $w0[0]
+; MIPS64R5EB-NEXT:    copy_s.h $2, $w0[1]
+; MIPS64R5EB-NEXT:    sd $5, 80($sp)
+; MIPS64R5EB-NEXT:    lbu $3, 81($sp)
+; MIPS64R5EB-NEXT:    sh $3, 18($sp)
+; MIPS64R5EB-NEXT:    lbu $3, 80($sp)
+; MIPS64R5EB-NEXT:    sh $3, 16($sp)
+; MIPS64R5EB-NEXT:    ld.h $w0, 16($sp)
 ; MIPS64R5EB-NEXT:    copy_s.h $3, $w0[0]
 ; MIPS64R5EB-NEXT:    copy_s.h $4, $w0[1]
-; MIPS64R5EB-NEXT:    sw $4, 28($sp)
-; MIPS64R5EB-NEXT:    sw $3, 20($sp)
-; MIPS64R5EB-NEXT:    sw $2, 12($sp)
-; MIPS64R5EB-NEXT:    sw $1, 4($sp)
-; MIPS64R5EB-NEXT:    ld.d $w0, 16($sp)
-; MIPS64R5EB-NEXT:    ld.d $w1, 0($sp)
+; MIPS64R5EB-NEXT:    sw $4, 60($sp)
+; MIPS64R5EB-NEXT:    sw $3, 52($sp)
+; MIPS64R5EB-NEXT:    sw $2, 44($sp)
+; MIPS64R5EB-NEXT:    sw $1, 36($sp)
+; MIPS64R5EB-NEXT:    ld.d $w0, 48($sp)
+; MIPS64R5EB-NEXT:    ld.d $w1, 32($sp)
 ; MIPS64R5EB-NEXT:    addv.d $w0, $w1, $w0
 ; MIPS64R5EB-NEXT:    copy_s.d $1, $w0[0]
 ; MIPS64R5EB-NEXT:    copy_s.d $2, $w0[1]
-; MIPS64R5EB-NEXT:    sb $2, 45($sp)
-; MIPS64R5EB-NEXT:    sb $1, 44($sp)
-; MIPS64R5EB-NEXT:    lh $2, 44($sp)
-; MIPS64R5EB-NEXT:    daddiu $sp, $sp, 64
+; MIPS64R5EB-NEXT:    sb $2, 77($sp)
+; MIPS64R5EB-NEXT:    sb $1, 76($sp)
+; MIPS64R5EB-NEXT:    lh $2, 76($sp)
+; MIPS64R5EB-NEXT:    daddiu $sp, $sp, 96
 ; MIPS64R5EB-NEXT:    jr $ra
 ; MIPS64R5EB-NEXT:    nop
 ;
@@ -215,61 +191,37 @@ define <2 x i8> @i8_2(<2 x i8> %a, <2 x
 ;
 ; MIPS64R5EL-LABEL: i8_2:
 ; MIPS64R5EL:       # %bb.0:
-; MIPS64R5EL-NEXT:    daddiu $sp, $sp, -64
-; MIPS64R5EL-NEXT:    .cfi_def_cfa_offset 64
-; MIPS64R5EL-NEXT:    sd $4, 56($sp)
-; MIPS64R5EL-NEXT:    ldi.b $w0, 0
-; MIPS64R5EL-NEXT:    lbu $1, 57($sp)
-; MIPS64R5EL-NEXT:    lbu $2, 56($sp)
-; MIPS64R5EL-NEXT:    move.v $w1, $w0
-; MIPS64R5EL-NEXT:    insert.h $w1[0], $2
-; MIPS64R5EL-NEXT:    insert.h $w1[1], $1
-; MIPS64R5EL-NEXT:    lbu $1, 58($sp)
-; MIPS64R5EL-NEXT:    insert.h $w1[2], $1
-; MIPS64R5EL-NEXT:    lbu $1, 59($sp)
-; MIPS64R5EL-NEXT:    insert.h $w1[3], $1
-; MIPS64R5EL-NEXT:    lbu $1, 60($sp)
-; MIPS64R5EL-NEXT:    insert.h $w1[4], $1
-; MIPS64R5EL-NEXT:    lbu $1, 61($sp)
-; MIPS64R5EL-NEXT:    insert.h $w1[5], $1
-; MIPS64R5EL-NEXT:    lbu $1, 63($sp)
-; MIPS64R5EL-NEXT:    lbu $2, 62($sp)
-; MIPS64R5EL-NEXT:    insert.h $w1[6], $2
-; MIPS64R5EL-NEXT:    insert.h $w1[7], $1
-; MIPS64R5EL-NEXT:    copy_s.h $1, $w1[0]
-; MIPS64R5EL-NEXT:    copy_s.h $2, $w1[1]
-; MIPS64R5EL-NEXT:    sd $5, 48($sp)
-; MIPS64R5EL-NEXT:    lbu $3, 48($sp)
-; MIPS64R5EL-NEXT:    insert.h $w0[0], $3
-; MIPS64R5EL-NEXT:    lbu $3, 49($sp)
-; MIPS64R5EL-NEXT:    insert.h $w0[1], $3
-; MIPS64R5EL-NEXT:    lbu $3, 50($sp)
-; MIPS64R5EL-NEXT:    insert.h $w0[2], $3
-; MIPS64R5EL-NEXT:    lbu $3, 51($sp)
-; MIPS64R5EL-NEXT:    insert.h $w0[3], $3
-; MIPS64R5EL-NEXT:    lbu $3, 52($sp)
-; MIPS64R5EL-NEXT:    insert.h $w0[4], $3
-; MIPS64R5EL-NEXT:    lbu $3, 53($sp)
-; MIPS64R5EL-NEXT:    insert.h $w0[5], $3
-; MIPS64R5EL-NEXT:    lbu $3, 55($sp)
-; MIPS64R5EL-NEXT:    lbu $4, 54($sp)
-; MIPS64R5EL-NEXT:    insert.h $w0[6], $4
-; MIPS64R5EL-NEXT:    insert.h $w0[7], $3
+; MIPS64R5EL-NEXT:    daddiu $sp, $sp, -96
+; MIPS64R5EL-NEXT:    .cfi_def_cfa_offset 96
+; MIPS64R5EL-NEXT:    sd $4, 88($sp)
+; MIPS64R5EL-NEXT:    lbu $1, 89($sp)
+; MIPS64R5EL-NEXT:    sh $1, 2($sp)
+; MIPS64R5EL-NEXT:    lbu $1, 88($sp)
+; MIPS64R5EL-NEXT:    sh $1, 0($sp)
+; MIPS64R5EL-NEXT:    ld.h $w0, 0($sp)
+; MIPS64R5EL-NEXT:    copy_s.h $1, $w0[0]
+; MIPS64R5EL-NEXT:    copy_s.h $2, $w0[1]
+; MIPS64R5EL-NEXT:    sd $5, 80($sp)
+; MIPS64R5EL-NEXT:    lbu $3, 81($sp)
+; MIPS64R5EL-NEXT:    sh $3, 18($sp)
+; MIPS64R5EL-NEXT:    lbu $3, 80($sp)
+; MIPS64R5EL-NEXT:    sh $3, 16($sp)
+; MIPS64R5EL-NEXT:    ld.h $w0, 16($sp)
 ; MIPS64R5EL-NEXT:    copy_s.h $3, $w0[0]
 ; MIPS64R5EL-NEXT:    copy_s.h $4, $w0[1]
-; MIPS64R5EL-NEXT:    sw $4, 24($sp)
-; MIPS64R5EL-NEXT:    sw $3, 16($sp)
-; MIPS64R5EL-NEXT:    sw $2, 8($sp)
-; MIPS64R5EL-NEXT:    sw $1, 0($sp)
-; MIPS64R5EL-NEXT:    ld.d $w0, 16($sp)
-; MIPS64R5EL-NEXT:    ld.d $w1, 0($sp)
+; MIPS64R5EL-NEXT:    sw $4, 56($sp)
+; MIPS64R5EL-NEXT:    sw $3, 48($sp)
+; MIPS64R5EL-NEXT:    sw $2, 40($sp)
+; MIPS64R5EL-NEXT:    sw $1, 32($sp)
+; MIPS64R5EL-NEXT:    ld.d $w0, 48($sp)
+; MIPS64R5EL-NEXT:    ld.d $w1, 32($sp)
 ; MIPS64R5EL-NEXT:    addv.d $w0, $w1, $w0
 ; MIPS64R5EL-NEXT:    copy_s.d $1, $w0[0]
 ; MIPS64R5EL-NEXT:    copy_s.d $2, $w0[1]
-; MIPS64R5EL-NEXT:    sb $2, 45($sp)
-; MIPS64R5EL-NEXT:    sb $1, 44($sp)
-; MIPS64R5EL-NEXT:    lh $2, 44($sp)
-; MIPS64R5EL-NEXT:    daddiu $sp, $sp, 64
+; MIPS64R5EL-NEXT:    sb $2, 77($sp)
+; MIPS64R5EL-NEXT:    sb $1, 76($sp)
+; MIPS64R5EL-NEXT:    lh $2, 76($sp)
+; MIPS64R5EL-NEXT:    daddiu $sp, $sp, 96
 ; MIPS64R5EL-NEXT:    jr $ra
 ; MIPS64R5EL-NEXT:    nop
   %1 = add <2 x i8> %a, %b
@@ -445,181 +397,97 @@ define <2 x i8> @i8x2_7(<2 x i8> %a, <2
 ;
 ; MIPS64R5EB-LABEL: i8x2_7:
 ; MIPS64R5EB:       # %bb.0: # %entry
-; MIPS64R5EB-NEXT:    daddiu $sp, $sp, -176
-; MIPS64R5EB-NEXT:    .cfi_def_cfa_offset 176
-; MIPS64R5EB-NEXT:    sd $4, 168($sp)
-; MIPS64R5EB-NEXT:    ldi.b $w0, 0
-; MIPS64R5EB-NEXT:    lbu $1, 169($sp)
-; MIPS64R5EB-NEXT:    lbu $2, 168($sp)
-; MIPS64R5EB-NEXT:    move.v $w1, $w0
-; MIPS64R5EB-NEXT:    insert.h $w1[0], $2
-; MIPS64R5EB-NEXT:    insert.h $w1[1], $1
-; MIPS64R5EB-NEXT:    lbu $1, 170($sp)
-; MIPS64R5EB-NEXT:    insert.h $w1[2], $1
-; MIPS64R5EB-NEXT:    lbu $1, 171($sp)
-; MIPS64R5EB-NEXT:    insert.h $w1[3], $1
-; MIPS64R5EB-NEXT:    lbu $1, 172($sp)
-; MIPS64R5EB-NEXT:    insert.h $w1[4], $1
-; MIPS64R5EB-NEXT:    lbu $1, 173($sp)
-; MIPS64R5EB-NEXT:    insert.h $w1[5], $1
-; MIPS64R5EB-NEXT:    lbu $1, 175($sp)
-; MIPS64R5EB-NEXT:    lbu $2, 174($sp)
-; MIPS64R5EB-NEXT:    insert.h $w1[6], $2
-; MIPS64R5EB-NEXT:    insert.h $w1[7], $1
+; MIPS64R5EB-NEXT:    daddiu $sp, $sp, -288
+; MIPS64R5EB-NEXT:    .cfi_def_cfa_offset 288
+; MIPS64R5EB-NEXT:    sd $4, 280($sp)
+; MIPS64R5EB-NEXT:    lbu $1, 281($sp)
+; MIPS64R5EB-NEXT:    sh $1, 2($sp)
+; MIPS64R5EB-NEXT:    lbu $1, 280($sp)
+; MIPS64R5EB-NEXT:    sh $1, 0($sp)
+; MIPS64R5EB-NEXT:    ld.h $w0, 0($sp)
+; MIPS64R5EB-NEXT:    copy_s.h $1, $w0[0]
+; MIPS64R5EB-NEXT:    copy_s.h $2, $w0[1]
+; MIPS64R5EB-NEXT:    sd $5, 272($sp)
+; MIPS64R5EB-NEXT:    lbu $3, 273($sp)
+; MIPS64R5EB-NEXT:    sh $3, 18($sp)
+; MIPS64R5EB-NEXT:    lbu $3, 272($sp)
+; MIPS64R5EB-NEXT:    sh $3, 16($sp)
+; MIPS64R5EB-NEXT:    ld.h $w0, 16($sp)
+; MIPS64R5EB-NEXT:    copy_s.h $3, $w0[0]
+; MIPS64R5EB-NEXT:    copy_s.h $4, $w0[1]
+; MIPS64R5EB-NEXT:    sw $4, 140($sp)
+; MIPS64R5EB-NEXT:    sw $3, 132($sp)
+; MIPS64R5EB-NEXT:    sw $2, 124($sp)
+; MIPS64R5EB-NEXT:    sw $1, 116($sp)
+; MIPS64R5EB-NEXT:    ld.d $w0, 128($sp)
+; MIPS64R5EB-NEXT:    ld.d $w1, 112($sp)
+; MIPS64R5EB-NEXT:    addv.d $w0, $w1, $w0
+; MIPS64R5EB-NEXT:    sd $6, 264($sp)
+; MIPS64R5EB-NEXT:    lbu $1, 265($sp)
+; MIPS64R5EB-NEXT:    sh $1, 34($sp)
+; MIPS64R5EB-NEXT:    lbu $1, 264($sp)
+; MIPS64R5EB-NEXT:    sh $1, 32($sp)
+; MIPS64R5EB-NEXT:    ld.h $w1, 32($sp)
 ; MIPS64R5EB-NEXT:    copy_s.h $1, $w1[0]
 ; MIPS64R5EB-NEXT:    copy_s.h $2, $w1[1]
-; MIPS64R5EB-NEXT:    sd $5, 160($sp)
-; MIPS64R5EB-NEXT:    lbu $3, 161($sp)
-; MIPS64R5EB-NEXT:    lbu $4, 160($sp)
-; MIPS64R5EB-NEXT:    move.v $w1, $w0
-; MIPS64R5EB-NEXT:    insert.h $w1[0], $4
-; MIPS64R5EB-NEXT:    insert.h $w1[1], $3
-; MIPS64R5EB-NEXT:    lbu $3, 162($sp)
-; MIPS64R5EB-NEXT:    insert.h $w1[2], $3
-; MIPS64R5EB-NEXT:    lbu $3, 163($sp)
-; MIPS64R5EB-NEXT:    insert.h $w1[3], $3
-; MIPS64R5EB-NEXT:    lbu $3, 164($sp)
-; MIPS64R5EB-NEXT:    insert.h $w1[4], $3
-; MIPS64R5EB-NEXT:    lbu $3, 165($sp)
-; MIPS64R5EB-NEXT:    insert.h $w1[5], $3
-; MIPS64R5EB-NEXT:    lbu $3, 167($sp)
-; MIPS64R5EB-NEXT:    lbu $4, 166($sp)
-; MIPS64R5EB-NEXT:    insert.h $w1[6], $4
-; MIPS64R5EB-NEXT:    insert.h $w1[7], $3
+; MIPS64R5EB-NEXT:    sw $2, 156($sp)
+; MIPS64R5EB-NEXT:    sw $1, 148($sp)
+; MIPS64R5EB-NEXT:    ld.d $w1, 144($sp)
+; MIPS64R5EB-NEXT:    addv.d $w0, $w0, $w1
+; MIPS64R5EB-NEXT:    sd $7, 256($sp)
+; MIPS64R5EB-NEXT:    lbu $1, 257($sp)
+; MIPS64R5EB-NEXT:    sh $1, 50($sp)
+; MIPS64R5EB-NEXT:    lbu $1, 256($sp)
+; MIPS64R5EB-NEXT:    sh $1, 48($sp)
+; MIPS64R5EB-NEXT:    ld.h $w1, 48($sp)
+; MIPS64R5EB-NEXT:    copy_s.h $1, $w1[0]
+; MIPS64R5EB-NEXT:    copy_s.h $2, $w1[1]
+; MIPS64R5EB-NEXT:    sw $2, 172($sp)
+; MIPS64R5EB-NEXT:    sw $1, 164($sp)
+; MIPS64R5EB-NEXT:    ld.d $w1, 160($sp)
+; MIPS64R5EB-NEXT:    addv.d $w0, $w0, $w1
+; MIPS64R5EB-NEXT:    sd $8, 248($sp)
+; MIPS64R5EB-NEXT:    lbu $1, 249($sp)
+; MIPS64R5EB-NEXT:    sh $1, 66($sp)
+; MIPS64R5EB-NEXT:    lbu $1, 248($sp)
+; MIPS64R5EB-NEXT:    sh $1, 64($sp)
+; MIPS64R5EB-NEXT:    ld.h $w1, 64($sp)
+; MIPS64R5EB-NEXT:    copy_s.h $1, $w1[0]
+; MIPS64R5EB-NEXT:    copy_s.h $2, $w1[1]
+; MIPS64R5EB-NEXT:    sw $2, 188($sp)
+; MIPS64R5EB-NEXT:    sw $1, 180($sp)
+; MIPS64R5EB-NEXT:    ld.d $w1, 176($sp)
+; MIPS64R5EB-NEXT:    addv.d $w0, $w0, $w1
+; MIPS64R5EB-NEXT:    sd $10, 232($sp)
+; MIPS64R5EB-NEXT:    lbu $1, 233($sp)
+; MIPS64R5EB-NEXT:    sh $1, 98($sp)
+; MIPS64R5EB-NEXT:    lbu $1, 232($sp)
+; MIPS64R5EB-NEXT:    sh $1, 96($sp)
+; MIPS64R5EB-NEXT:    ld.h $w1, 96($sp)
+; MIPS64R5EB-NEXT:    copy_s.h $1, $w1[0]
+; MIPS64R5EB-NEXT:    copy_s.h $2, $w1[1]
+; MIPS64R5EB-NEXT:    sd $9, 240($sp)
+; MIPS64R5EB-NEXT:    lbu $3, 241($sp)
+; MIPS64R5EB-NEXT:    sh $3, 82($sp)
+; MIPS64R5EB-NEXT:    lbu $3, 240($sp)
+; MIPS64R5EB-NEXT:    sh $3, 80($sp)
+; MIPS64R5EB-NEXT:    ld.h $w1, 80($sp)
 ; MIPS64R5EB-NEXT:    copy_s.h $3, $w1[0]
 ; MIPS64R5EB-NEXT:    copy_s.h $4, $w1[1]
-; MIPS64R5EB-NEXT:    sw $4, 28($sp)
-; MIPS64R5EB-NEXT:    sw $3, 20($sp)
-; MIPS64R5EB-NEXT:    sw $2, 12($sp)
-; MIPS64R5EB-NEXT:    sw $1, 4($sp)
-; MIPS64R5EB-NEXT:    ld.d $w1, 16($sp)
-; MIPS64R5EB-NEXT:    ld.d $w2, 0($sp)
-; MIPS64R5EB-NEXT:    addv.d $w1, $w2, $w1
-; MIPS64R5EB-NEXT:    sd $6, 152($sp)
-; MIPS64R5EB-NEXT:    lbu $1, 153($sp)
-; MIPS64R5EB-NEXT:    lbu $2, 152($sp)
-; MIPS64R5EB-NEXT:    move.v $w2, $w0
-; MIPS64R5EB-NEXT:    insert.h $w2[0], $2
-; MIPS64R5EB-NEXT:    insert.h $w2[1], $1
-; MIPS64R5EB-NEXT:    lbu $1, 154($sp)
-; MIPS64R5EB-NEXT:    insert.h $w2[2], $1
-; MIPS64R5EB-NEXT:    lbu $1, 155($sp)
-; MIPS64R5EB-NEXT:    insert.h $w2[3], $1
-; MIPS64R5EB-NEXT:    lbu $1, 156($sp)
-; MIPS64R5EB-NEXT:    insert.h $w2[4], $1
-; MIPS64R5EB-NEXT:    lbu $1, 157($sp)
-; MIPS64R5EB-NEXT:    insert.h $w2[5], $1
-; MIPS64R5EB-NEXT:    lbu $1, 159($sp)
-; MIPS64R5EB-NEXT:    lbu $2, 158($sp)
-; MIPS64R5EB-NEXT:    insert.h $w2[6], $2
-; MIPS64R5EB-NEXT:    insert.h $w2[7], $1
-; MIPS64R5EB-NEXT:    copy_s.h $1, $w2[0]
-; MIPS64R5EB-NEXT:    copy_s.h $2, $w2[1]
-; MIPS64R5EB-NEXT:    sw $2, 44($sp)
-; MIPS64R5EB-NEXT:    sw $1, 36($sp)
-; MIPS64R5EB-NEXT:    ld.d $w2, 32($sp)
-; MIPS64R5EB-NEXT:    addv.d $w1, $w1, $w2
-; MIPS64R5EB-NEXT:    sd $7, 144($sp)
-; MIPS64R5EB-NEXT:    lbu $1, 145($sp)
-; MIPS64R5EB-NEXT:    lbu $2, 144($sp)
-; MIPS64R5EB-NEXT:    move.v $w2, $w0
-; MIPS64R5EB-NEXT:    insert.h $w2[0], $2
-; MIPS64R5EB-NEXT:    insert.h $w2[1], $1
-; MIPS64R5EB-NEXT:    lbu $1, 146($sp)
-; MIPS64R5EB-NEXT:    insert.h $w2[2], $1
-; MIPS64R5EB-NEXT:    lbu $1, 147($sp)
-; MIPS64R5EB-NEXT:    insert.h $w2[3], $1
-; MIPS64R5EB-NEXT:    lbu $1, 148($sp)
-; MIPS64R5EB-NEXT:    insert.h $w2[4], $1
-; MIPS64R5EB-NEXT:    lbu $1, 149($sp)
-; MIPS64R5EB-NEXT:    insert.h $w2[5], $1
-; MIPS64R5EB-NEXT:    lbu $1, 151($sp)
-; MIPS64R5EB-NEXT:    lbu $2, 150($sp)
-; MIPS64R5EB-NEXT:    insert.h $w2[6], $2
-; MIPS64R5EB-NEXT:    insert.h $w2[7], $1
-; MIPS64R5EB-NEXT:    copy_s.h $1, $w2[0]
-; MIPS64R5EB-NEXT:    copy_s.h $2, $w2[1]
-; MIPS64R5EB-NEXT:    sw $2, 60($sp)
-; MIPS64R5EB-NEXT:    sw $1, 52($sp)
-; MIPS64R5EB-NEXT:    ld.d $w2, 48($sp)
-; MIPS64R5EB-NEXT:    addv.d $w1, $w1, $w2
-; MIPS64R5EB-NEXT:    sd $8, 136($sp)
-; MIPS64R5EB-NEXT:    lbu $1, 137($sp)
-; MIPS64R5EB-NEXT:    lbu $2, 136($sp)
-; MIPS64R5EB-NEXT:    move.v $w2, $w0
-; MIPS64R5EB-NEXT:    insert.h $w2[0], $2
-; MIPS64R5EB-NEXT:    insert.h $w2[1], $1
-; MIPS64R5EB-NEXT:    lbu $1, 138($sp)
-; MIPS64R5EB-NEXT:    insert.h $w2[2], $1
-; MIPS64R5EB-NEXT:    lbu $1, 139($sp)
-; MIPS64R5EB-NEXT:    insert.h $w2[3], $1
-; MIPS64R5EB-NEXT:    lbu $1, 140($sp)
-; MIPS64R5EB-NEXT:    insert.h $w2[4], $1
-; MIPS64R5EB-NEXT:    lbu $1, 141($sp)
-; MIPS64R5EB-NEXT:    insert.h $w2[5], $1
-; MIPS64R5EB-NEXT:    lbu $1, 143($sp)
-; MIPS64R5EB-NEXT:    lbu $2, 142($sp)
-; MIPS64R5EB-NEXT:    insert.h $w2[6], $2
-; MIPS64R5EB-NEXT:    insert.h $w2[7], $1
-; MIPS64R5EB-NEXT:    copy_s.h $1, $w2[0]
-; MIPS64R5EB-NEXT:    copy_s.h $2, $w2[1]
-; MIPS64R5EB-NEXT:    sd $10, 120($sp)
-; MIPS64R5EB-NEXT:    lbu $3, 121($sp)
-; MIPS64R5EB-NEXT:    lbu $4, 120($sp)
-; MIPS64R5EB-NEXT:    move.v $w2, $w0
-; MIPS64R5EB-NEXT:    insert.h $w2[0], $4
-; MIPS64R5EB-NEXT:    insert.h $w2[1], $3
-; MIPS64R5EB-NEXT:    lbu $3, 122($sp)
-; MIPS64R5EB-NEXT:    insert.h $w2[2], $3
-; MIPS64R5EB-NEXT:    lbu $3, 123($sp)
-; MIPS64R5EB-NEXT:    insert.h $w2[3], $3
-; MIPS64R5EB-NEXT:    lbu $3, 124($sp)
-; MIPS64R5EB-NEXT:    insert.h $w2[4], $3
-; MIPS64R5EB-NEXT:    lbu $3, 125($sp)
-; MIPS64R5EB-NEXT:    insert.h $w2[5], $3
-; MIPS64R5EB-NEXT:    lbu $3, 127($sp)
-; MIPS64R5EB-NEXT:    lbu $4, 126($sp)
-; MIPS64R5EB-NEXT:    insert.h $w2[6], $4
-; MIPS64R5EB-NEXT:    insert.h $w2[7], $3
-; MIPS64R5EB-NEXT:    copy_s.h $3, $w2[0]
-; MIPS64R5EB-NEXT:    copy_s.h $4, $w2[1]
-; MIPS64R5EB-NEXT:    sw $2, 76($sp)
-; MIPS64R5EB-NEXT:    sw $1, 68($sp)
-; MIPS64R5EB-NEXT:    ld.d $w2, 64($sp)
-; MIPS64R5EB-NEXT:    addv.d $w1, $w1, $w2
-; MIPS64R5EB-NEXT:    sd $9, 128($sp)
-; MIPS64R5EB-NEXT:    lbu $1, 128($sp)
-; MIPS64R5EB-NEXT:    insert.h $w0[0], $1
-; MIPS64R5EB-NEXT:    lbu $1, 129($sp)
-; MIPS64R5EB-NEXT:    insert.h $w0[1], $1
-; MIPS64R5EB-NEXT:    lbu $1, 130($sp)
-; MIPS64R5EB-NEXT:    insert.h $w0[2], $1
-; MIPS64R5EB-NEXT:    lbu $1, 131($sp)
-; MIPS64R5EB-NEXT:    insert.h $w0[3], $1
-; MIPS64R5EB-NEXT:    lbu $1, 132($sp)
-; MIPS64R5EB-NEXT:    insert.h $w0[4], $1
-; MIPS64R5EB-NEXT:    lbu $1, 133($sp)
-; MIPS64R5EB-NEXT:    insert.h $w0[5], $1
-; MIPS64R5EB-NEXT:    lbu $1, 135($sp)
-; MIPS64R5EB-NEXT:    lbu $2, 134($sp)
-; MIPS64R5EB-NEXT:    insert.h $w0[6], $2
-; MIPS64R5EB-NEXT:    insert.h $w0[7], $1
-; MIPS64R5EB-NEXT:    copy_s.h $1, $w0[0]
-; MIPS64R5EB-NEXT:    copy_s.h $2, $w0[1]
-; MIPS64R5EB-NEXT:    sw $2, 92($sp)
-; MIPS64R5EB-NEXT:    sw $1, 84($sp)
-; MIPS64R5EB-NEXT:    ld.d $w0, 80($sp)
-; MIPS64R5EB-NEXT:    addv.d $w0, $w1, $w0
-; MIPS64R5EB-NEXT:    sw $4, 108($sp)
-; MIPS64R5EB-NEXT:    sw $3, 100($sp)
-; MIPS64R5EB-NEXT:    ld.d $w1, 96($sp)
+; MIPS64R5EB-NEXT:    sw $4, 204($sp)
+; MIPS64R5EB-NEXT:    sw $3, 196($sp)
+; MIPS64R5EB-NEXT:    ld.d $w1, 192($sp)
+; MIPS64R5EB-NEXT:    addv.d $w0, $w0, $w1
+; MIPS64R5EB-NEXT:    sw $2, 220($sp)
+; MIPS64R5EB-NEXT:    sw $1, 212($sp)
+; MIPS64R5EB-NEXT:    ld.d $w1, 208($sp)
 ; MIPS64R5EB-NEXT:    addv.d $w0, $w0, $w1
 ; MIPS64R5EB-NEXT:    copy_s.d $1, $w0[0]
 ; MIPS64R5EB-NEXT:    copy_s.d $2, $w0[1]
-; MIPS64R5EB-NEXT:    sb $2, 117($sp)
-; MIPS64R5EB-NEXT:    sb $1, 116($sp)
-; MIPS64R5EB-NEXT:    lh $2, 116($sp)
-; MIPS64R5EB-NEXT:    daddiu $sp, $sp, 176
+; MIPS64R5EB-NEXT:    sb $2, 229($sp)
+; MIPS64R5EB-NEXT:    sb $1, 228($sp)
+; MIPS64R5EB-NEXT:    lh $2, 228($sp)
+; MIPS64R5EB-NEXT:    daddiu $sp, $sp, 288
 ; MIPS64R5EB-NEXT:    jr $ra
 ; MIPS64R5EB-NEXT:    nop
 ;
@@ -773,181 +641,97 @@ define <2 x i8> @i8x2_7(<2 x i8> %a, <2
 ;
 ; MIPS64R5EL-LABEL: i8x2_7:
 ; MIPS64R5EL:       # %bb.0: # %entry
-; MIPS64R5EL-NEXT:    daddiu $sp, $sp, -176
-; MIPS64R5EL-NEXT:    .cfi_def_cfa_offset 176
-; MIPS64R5EL-NEXT:    sd $4, 168($sp)
-; MIPS64R5EL-NEXT:    ldi.b $w0, 0
-; MIPS64R5EL-NEXT:    lbu $1, 169($sp)
-; MIPS64R5EL-NEXT:    lbu $2, 168($sp)
-; MIPS64R5EL-NEXT:    move.v $w1, $w0
-; MIPS64R5EL-NEXT:    insert.h $w1[0], $2
-; MIPS64R5EL-NEXT:    insert.h $w1[1], $1
-; MIPS64R5EL-NEXT:    lbu $1, 170($sp)
-; MIPS64R5EL-NEXT:    insert.h $w1[2], $1
-; MIPS64R5EL-NEXT:    lbu $1, 171($sp)
-; MIPS64R5EL-NEXT:    insert.h $w1[3], $1
-; MIPS64R5EL-NEXT:    lbu $1, 172($sp)
-; MIPS64R5EL-NEXT:    insert.h $w1[4], $1
-; MIPS64R5EL-NEXT:    lbu $1, 173($sp)
-; MIPS64R5EL-NEXT:    insert.h $w1[5], $1
-; MIPS64R5EL-NEXT:    lbu $1, 175($sp)
-; MIPS64R5EL-NEXT:    lbu $2, 174($sp)
-; MIPS64R5EL-NEXT:    insert.h $w1[6], $2
-; MIPS64R5EL-NEXT:    insert.h $w1[7], $1
+; MIPS64R5EL-NEXT:    daddiu $sp, $sp, -288
+; MIPS64R5EL-NEXT:    .cfi_def_cfa_offset 288
+; MIPS64R5EL-NEXT:    sd $4, 280($sp)
+; MIPS64R5EL-NEXT:    lbu $1, 281($sp)
+; MIPS64R5EL-NEXT:    sh $1, 2($sp)
+; MIPS64R5EL-NEXT:    lbu $1, 280($sp)
+; MIPS64R5EL-NEXT:    sh $1, 0($sp)
+; MIPS64R5EL-NEXT:    ld.h $w0, 0($sp)
+; MIPS64R5EL-NEXT:    copy_s.h $1, $w0[0]
+; MIPS64R5EL-NEXT:    copy_s.h $2, $w0[1]
+; MIPS64R5EL-NEXT:    sd $5, 272($sp)
+; MIPS64R5EL-NEXT:    lbu $3, 273($sp)
+; MIPS64R5EL-NEXT:    sh $3, 18($sp)
+; MIPS64R5EL-NEXT:    lbu $3, 272($sp)
+; MIPS64R5EL-NEXT:    sh $3, 16($sp)
+; MIPS64R5EL-NEXT:    ld.h $w0, 16($sp)
+; MIPS64R5EL-NEXT:    copy_s.h $3, $w0[0]
+; MIPS64R5EL-NEXT:    copy_s.h $4, $w0[1]
+; MIPS64R5EL-NEXT:    sw $4, 136($sp)
+; MIPS64R5EL-NEXT:    sw $3, 128($sp)
+; MIPS64R5EL-NEXT:    sw $2, 120($sp)
+; MIPS64R5EL-NEXT:    sw $1, 112($sp)
+; MIPS64R5EL-NEXT:    ld.d $w0, 128($sp)
+; MIPS64R5EL-NEXT:    ld.d $w1, 112($sp)
+; MIPS64R5EL-NEXT:    addv.d $w0, $w1, $w0
+; MIPS64R5EL-NEXT:    sd $6, 264($sp)
+; MIPS64R5EL-NEXT:    lbu $1, 265($sp)
+; MIPS64R5EL-NEXT:    sh $1, 34($sp)
+; MIPS64R5EL-NEXT:    lbu $1, 264($sp)
+; MIPS64R5EL-NEXT:    sh $1, 32($sp)
+; MIPS64R5EL-NEXT:    ld.h $w1, 32($sp)
 ; MIPS64R5EL-NEXT:    copy_s.h $1, $w1[0]
 ; MIPS64R5EL-NEXT:    copy_s.h $2, $w1[1]
-; MIPS64R5EL-NEXT:    sd $5, 160($sp)
-; MIPS64R5EL-NEXT:    lbu $3, 161($sp)
-; MIPS64R5EL-NEXT:    lbu $4, 160($sp)
-; MIPS64R5EL-NEXT:    move.v $w1, $w0
-; MIPS64R5EL-NEXT:    insert.h $w1[0], $4
-; MIPS64R5EL-NEXT:    insert.h $w1[1], $3
-; MIPS64R5EL-NEXT:    lbu $3, 162($sp)
-; MIPS64R5EL-NEXT:    insert.h $w1[2], $3
-; MIPS64R5EL-NEXT:    lbu $3, 163($sp)
-; MIPS64R5EL-NEXT:    insert.h $w1[3], $3
-; MIPS64R5EL-NEXT:    lbu $3, 164($sp)
-; MIPS64R5EL-NEXT:    insert.h $w1[4], $3
-; MIPS64R5EL-NEXT:    lbu $3, 165($sp)
-; MIPS64R5EL-NEXT:    insert.h $w1[5], $3
-; MIPS64R5EL-NEXT:    lbu $3, 167($sp)
-; MIPS64R5EL-NEXT:    lbu $4, 166($sp)
-; MIPS64R5EL-NEXT:    insert.h $w1[6], $4
-; MIPS64R5EL-NEXT:    insert.h $w1[7], $3
+; MIPS64R5EL-NEXT:    sw $2, 152($sp)
+; MIPS64R5EL-NEXT:    sw $1, 144($sp)
+; MIPS64R5EL-NEXT:    ld.d $w1, 144($sp)
+; MIPS64R5EL-NEXT:    addv.d $w0, $w0, $w1
+; MIPS64R5EL-NEXT:    sd $7, 256($sp)
+; MIPS64R5EL-NEXT:    lbu $1, 257($sp)
+; MIPS64R5EL-NEXT:    sh $1, 50($sp)
+; MIPS64R5EL-NEXT:    lbu $1, 256($sp)
+; MIPS64R5EL-NEXT:    sh $1, 48($sp)
+; MIPS64R5EL-NEXT:    ld.h $w1, 48($sp)
+; MIPS64R5EL-NEXT:    copy_s.h $1, $w1[0]
+; MIPS64R5EL-NEXT:    copy_s.h $2, $w1[1]
+; MIPS64R5EL-NEXT:    sw $2, 168($sp)
+; MIPS64R5EL-NEXT:    sw $1, 160($sp)
+; MIPS64R5EL-NEXT:    ld.d $w1, 160($sp)
+; MIPS64R5EL-NEXT:    addv.d $w0, $w0, $w1
+; MIPS64R5EL-NEXT:    sd $8, 248($sp)
+; MIPS64R5EL-NEXT:    lbu $1, 249($sp)
+; MIPS64R5EL-NEXT:    sh $1, 66($sp)
+; MIPS64R5EL-NEXT:    lbu $1, 248($sp)
+; MIPS64R5EL-NEXT:    sh $1, 64($sp)
+; MIPS64R5EL-NEXT:    ld.h $w1, 64($sp)
+; MIPS64R5EL-NEXT:    copy_s.h $1, $w1[0]
+; MIPS64R5EL-NEXT:    copy_s.h $2, $w1[1]
+; MIPS64R5EL-NEXT:    sw $2, 184($sp)
+; MIPS64R5EL-NEXT:    sw $1, 176($sp)
+; MIPS64R5EL-NEXT:    ld.d $w1, 176($sp)
+; MIPS64R5EL-NEXT:    addv.d $w0, $w0, $w1
+; MIPS64R5EL-NEXT:    sd $10, 232($sp)
+; MIPS64R5EL-NEXT:    lbu $1, 233($sp)
+; MIPS64R5EL-NEXT:    sh $1, 98($sp)
+; MIPS64R5EL-NEXT:    lbu $1, 232($sp)
+; MIPS64R5EL-NEXT:    sh $1, 96($sp)
+; MIPS64R5EL-NEXT:    ld.h $w1, 96($sp)
+; MIPS64R5EL-NEXT:    copy_s.h $1, $w1[0]
+; MIPS64R5EL-NEXT:    copy_s.h $2, $w1[1]
+; MIPS64R5EL-NEXT:    sd $9, 240($sp)
+; MIPS64R5EL-NEXT:    lbu $3, 241($sp)
+; MIPS64R5EL-NEXT:    sh $3, 82($sp)
+; MIPS64R5EL-NEXT:    lbu $3, 240($sp)
+; MIPS64R5EL-NEXT:    sh $3, 80($sp)
+; MIPS64R5EL-NEXT:    ld.h $w1, 80($sp)
 ; MIPS64R5EL-NEXT:    copy_s.h $3, $w1[0]
 ; MIPS64R5EL-NEXT:    copy_s.h $4, $w1[1]
-; MIPS64R5EL-NEXT:    sw $4, 24($sp)
-; MIPS64R5EL-NEXT:    sw $3, 16($sp)
-; MIPS64R5EL-NEXT:    sw $2, 8($sp)
-; MIPS64R5EL-NEXT:    sw $1, 0($sp)
-; MIPS64R5EL-NEXT:    ld.d $w1, 16($sp)
-; MIPS64R5EL-NEXT:    ld.d $w2, 0($sp)
-; MIPS64R5EL-NEXT:    addv.d $w1, $w2, $w1
-; MIPS64R5EL-NEXT:    sd $6, 152($sp)
-; MIPS64R5EL-NEXT:    lbu $1, 153($sp)
-; MIPS64R5EL-NEXT:    lbu $2, 152($sp)
-; MIPS64R5EL-NEXT:    move.v $w2, $w0
-; MIPS64R5EL-NEXT:    insert.h $w2[0], $2
-; MIPS64R5EL-NEXT:    insert.h $w2[1], $1
-; MIPS64R5EL-NEXT:    lbu $1, 154($sp)
-; MIPS64R5EL-NEXT:    insert.h $w2[2], $1
-; MIPS64R5EL-NEXT:    lbu $1, 155($sp)
-; MIPS64R5EL-NEXT:    insert.h $w2[3], $1
-; MIPS64R5EL-NEXT:    lbu $1, 156($sp)
-; MIPS64R5EL-NEXT:    insert.h $w2[4], $1
-; MIPS64R5EL-NEXT:    lbu $1, 157($sp)
-; MIPS64R5EL-NEXT:    insert.h $w2[5], $1
-; MIPS64R5EL-NEXT:    lbu $1, 159($sp)
-; MIPS64R5EL-NEXT:    lbu $2, 158($sp)
-; MIPS64R5EL-NEXT:    insert.h $w2[6], $2
-; MIPS64R5EL-NEXT:    insert.h $w2[7], $1
-; MIPS64R5EL-NEXT:    copy_s.h $1, $w2[0]
-; MIPS64R5EL-NEXT:    copy_s.h $2, $w2[1]
-; MIPS64R5EL-NEXT:    sw $2, 40($sp)
-; MIPS64R5EL-NEXT:    sw $1, 32($sp)
-; MIPS64R5EL-NEXT:    ld.d $w2, 32($sp)
-; MIPS64R5EL-NEXT:    addv.d $w1, $w1, $w2
-; MIPS64R5EL-NEXT:    sd $7, 144($sp)
-; MIPS64R5EL-NEXT:    lbu $1, 145($sp)
-; MIPS64R5EL-NEXT:    lbu $2, 144($sp)
-; MIPS64R5EL-NEXT:    move.v $w2, $w0
-; MIPS64R5EL-NEXT:    insert.h $w2[0], $2
-; MIPS64R5EL-NEXT:    insert.h $w2[1], $1
-; MIPS64R5EL-NEXT:    lbu $1, 146($sp)
-; MIPS64R5EL-NEXT:    insert.h $w2[2], $1
-; MIPS64R5EL-NEXT:    lbu $1, 147($sp)
-; MIPS64R5EL-NEXT:    insert.h $w2[3], $1
-; MIPS64R5EL-NEXT:    lbu $1, 148($sp)
-; MIPS64R5EL-NEXT:    insert.h $w2[4], $1
-; MIPS64R5EL-NEXT:    lbu $1, 149($sp)
-; MIPS64R5EL-NEXT:    insert.h $w2[5], $1
-; MIPS64R5EL-NEXT:    lbu $1, 151($sp)
-; MIPS64R5EL-NEXT:    lbu $2, 150($sp)
-; MIPS64R5EL-NEXT:    insert.h $w2[6], $2
-; MIPS64R5EL-NEXT:    insert.h $w2[7], $1
-; MIPS64R5EL-NEXT:    copy_s.h $1, $w2[0]
-; MIPS64R5EL-NEXT:    copy_s.h $2, $w2[1]
-; MIPS64R5EL-NEXT:    sw $2, 56($sp)
-; MIPS64R5EL-NEXT:    sw $1, 48($sp)
-; MIPS64R5EL-NEXT:    ld.d $w2, 48($sp)
-; MIPS64R5EL-NEXT:    addv.d $w1, $w1, $w2
-; MIPS64R5EL-NEXT:    sd $8, 136($sp)
-; MIPS64R5EL-NEXT:    lbu $1, 137($sp)
-; MIPS64R5EL-NEXT:    lbu $2, 136($sp)
-; MIPS64R5EL-NEXT:    move.v $w2, $w0
-; MIPS64R5EL-NEXT:    insert.h $w2[0], $2
-; MIPS64R5EL-NEXT:    insert.h $w2[1], $1
-; MIPS64R5EL-NEXT:    lbu $1, 138($sp)
-; MIPS64R5EL-NEXT:    insert.h $w2[2], $1
-; MIPS64R5EL-NEXT:    lbu $1, 139($sp)
-; MIPS64R5EL-NEXT:    insert.h $w2[3], $1
-; MIPS64R5EL-NEXT:    lbu $1, 140($sp)
-; MIPS64R5EL-NEXT:    insert.h $w2[4], $1
-; MIPS64R5EL-NEXT:    lbu $1, 141($sp)
-; MIPS64R5EL-NEXT:    insert.h $w2[5], $1
-; MIPS64R5EL-NEXT:    lbu $1, 143($sp)
-; MIPS64R5EL-NEXT:    lbu $2, 142($sp)
-; MIPS64R5EL-NEXT:    insert.h $w2[6], $2
-; MIPS64R5EL-NEXT:    insert.h $w2[7], $1
-; MIPS64R5EL-NEXT:    copy_s.h $1, $w2[0]
-; MIPS64R5EL-NEXT:    copy_s.h $2, $w2[1]
-; MIPS64R5EL-NEXT:    sd $10, 120($sp)
-; MIPS64R5EL-NEXT:    lbu $3, 121($sp)
-; MIPS64R5EL-NEXT:    lbu $4, 120($sp)
-; MIPS64R5EL-NEXT:    move.v $w2, $w0
-; MIPS64R5EL-NEXT:    insert.h $w2[0], $4
-; MIPS64R5EL-NEXT:    insert.h $w2[1], $3
-; MIPS64R5EL-NEXT:    lbu $3, 122($sp)
-; MIPS64R5EL-NEXT:    insert.h $w2[2], $3
-; MIPS64R5EL-NEXT:    lbu $3, 123($sp)
-; MIPS64R5EL-NEXT:    insert.h $w2[3], $3
-; MIPS64R5EL-NEXT:    lbu $3, 124($sp)
-; MIPS64R5EL-NEXT:    insert.h $w2[4], $3
-; MIPS64R5EL-NEXT:    lbu $3, 125($sp)
-; MIPS64R5EL-NEXT:    insert.h $w2[5], $3
-; MIPS64R5EL-NEXT:    lbu $3, 127($sp)
-; MIPS64R5EL-NEXT:    lbu $4, 126($sp)
-; MIPS64R5EL-NEXT:    insert.h $w2[6], $4
-; MIPS64R5EL-NEXT:    insert.h $w2[7], $3
-; MIPS64R5EL-NEXT:    copy_s.h $3, $w2[0]
-; MIPS64R5EL-NEXT:    copy_s.h $4, $w2[1]
-; MIPS64R5EL-NEXT:    sw $2, 72($sp)
-; MIPS64R5EL-NEXT:    sw $1, 64($sp)
-; MIPS64R5EL-NEXT:    ld.d $w2, 64($sp)
-; MIPS64R5EL-NEXT:    addv.d $w1, $w1, $w2
-; MIPS64R5EL-NEXT:    sd $9, 128($sp)
-; MIPS64R5EL-NEXT:    lbu $1, 128($sp)
-; MIPS64R5EL-NEXT:    insert.h $w0[0], $1
-; MIPS64R5EL-NEXT:    lbu $1, 129($sp)
-; MIPS64R5EL-NEXT:    insert.h $w0[1], $1
-; MIPS64R5EL-NEXT:    lbu $1, 130($sp)
-; MIPS64R5EL-NEXT:    insert.h $w0[2], $1
-; MIPS64R5EL-NEXT:    lbu $1, 131($sp)
-; MIPS64R5EL-NEXT:    insert.h $w0[3], $1
-; MIPS64R5EL-NEXT:    lbu $1, 132($sp)
-; MIPS64R5EL-NEXT:    insert.h $w0[4], $1
-; MIPS64R5EL-NEXT:    lbu $1, 133($sp)
-; MIPS64R5EL-NEXT:    insert.h $w0[5], $1
-; MIPS64R5EL-NEXT:    lbu $1, 135($sp)
-; MIPS64R5EL-NEXT:    lbu $2, 134($sp)
-; MIPS64R5EL-NEXT:    insert.h $w0[6], $2
-; MIPS64R5EL-NEXT:    insert.h $w0[7], $1
-; MIPS64R5EL-NEXT:    copy_s.h $1, $w0[0]
-; MIPS64R5EL-NEXT:    copy_s.h $2, $w0[1]
-; MIPS64R5EL-NEXT:    sw $2, 88($sp)
-; MIPS64R5EL-NEXT:    sw $1, 80($sp)
-; MIPS64R5EL-NEXT:    ld.d $w0, 80($sp)
-; MIPS64R5EL-NEXT:    addv.d $w0, $w1, $w0
-; MIPS64R5EL-NEXT:    sw $4, 104($sp)
-; MIPS64R5EL-NEXT:    sw $3, 96($sp)
-; MIPS64R5EL-NEXT:    ld.d $w1, 96($sp)
+; MIPS64R5EL-NEXT:    sw $4, 200($sp)
+; MIPS64R5EL-NEXT:    sw $3, 192($sp)
+; MIPS64R5EL-NEXT:    ld.d $w1, 192($sp)
+; MIPS64R5EL-NEXT:    addv.d $w0, $w0, $w1
+; MIPS64R5EL-NEXT:    sw $2, 216($sp)
+; MIPS64R5EL-NEXT:    sw $1, 208($sp)
+; MIPS64R5EL-NEXT:    ld.d $w1, 208($sp)
 ; MIPS64R5EL-NEXT:    addv.d $w0, $w0, $w1
 ; MIPS64R5EL-NEXT:    copy_s.d $1, $w0[0]
 ; MIPS64R5EL-NEXT:    copy_s.d $2, $w0[1]
-; MIPS64R5EL-NEXT:    sb $2, 117($sp)
-; MIPS64R5EL-NEXT:    sb $1, 116($sp)
-; MIPS64R5EL-NEXT:    lh $2, 116($sp)
-; MIPS64R5EL-NEXT:    daddiu $sp, $sp, 176
+; MIPS64R5EL-NEXT:    sb $2, 229($sp)
+; MIPS64R5EL-NEXT:    sb $1, 228($sp)
+; MIPS64R5EL-NEXT:    lh $2, 228($sp)
+; MIPS64R5EL-NEXT:    daddiu $sp, $sp, 288
 ; MIPS64R5EL-NEXT:    jr $ra
 ; MIPS64R5EL-NEXT:    nop
 entry:
@@ -3768,55 +3552,43 @@ define void @call_i8_2() {
 ;
 ; MIPS64R5EB-LABEL: call_i8_2:
 ; MIPS64R5EB:       # %bb.0: # %entry
-; MIPS64R5EB-NEXT:    daddiu $sp, $sp, -48
-; MIPS64R5EB-NEXT:    .cfi_def_cfa_offset 48
-; MIPS64R5EB-NEXT:    sd $ra, 40($sp) # 8-byte Folded Spill
-; MIPS64R5EB-NEXT:    sd $gp, 32($sp) # 8-byte Folded Spill
+; MIPS64R5EB-NEXT:    daddiu $sp, $sp, -64
+; MIPS64R5EB-NEXT:    .cfi_def_cfa_offset 64
+; MIPS64R5EB-NEXT:    sd $ra, 56($sp) # 8-byte Folded Spill
+; MIPS64R5EB-NEXT:    sd $gp, 48($sp) # 8-byte Folded Spill
 ; MIPS64R5EB-NEXT:    .cfi_offset 31, -8
 ; MIPS64R5EB-NEXT:    .cfi_offset 28, -16
 ; MIPS64R5EB-NEXT:    lui $1, %hi(%neg(%gp_rel(call_i8_2)))
 ; MIPS64R5EB-NEXT:    daddu $1, $1, $25
 ; MIPS64R5EB-NEXT:    daddiu $gp, $1, %lo(%neg(%gp_rel(call_i8_2)))
 ; MIPS64R5EB-NEXT:    addiu $1, $zero, 1543
-; MIPS64R5EB-NEXT:    sh $1, 24($sp)
+; MIPS64R5EB-NEXT:    sh $1, 40($sp)
 ; MIPS64R5EB-NEXT:    addiu $1, $zero, 3080
-; MIPS64R5EB-NEXT:    sh $1, 28($sp)
+; MIPS64R5EB-NEXT:    sh $1, 44($sp)
 ; MIPS64R5EB-NEXT:    ld $25, %call16(i8_2)($gp)
-; MIPS64R5EB-NEXT:    lh $4, 24($sp)
-; MIPS64R5EB-NEXT:    lh $5, 28($sp)
+; MIPS64R5EB-NEXT:    lh $4, 40($sp)
+; MIPS64R5EB-NEXT:    lh $5, 44($sp)
 ; MIPS64R5EB-NEXT:    jalr $25
 ; MIPS64R5EB-NEXT:    nop
-; MIPS64R5EB-NEXT:    sd $2, 16($sp)
-; MIPS64R5EB-NEXT:    ldi.b $w0, 0
-; MIPS64R5EB-NEXT:    lbu $1, 16($sp)
-; MIPS64R5EB-NEXT:    insert.h $w0[0], $1
-; MIPS64R5EB-NEXT:    lbu $1, 17($sp)
-; MIPS64R5EB-NEXT:    insert.h $w0[1], $1
-; MIPS64R5EB-NEXT:    lbu $1, 18($sp)
-; MIPS64R5EB-NEXT:    insert.h $w0[2], $1
-; MIPS64R5EB-NEXT:    lbu $1, 19($sp)
-; MIPS64R5EB-NEXT:    insert.h $w0[3], $1
-; MIPS64R5EB-NEXT:    lbu $1, 20($sp)
-; MIPS64R5EB-NEXT:    insert.h $w0[4], $1
-; MIPS64R5EB-NEXT:    lbu $1, 21($sp)
-; MIPS64R5EB-NEXT:    insert.h $w0[5], $1
-; MIPS64R5EB-NEXT:    lbu $1, 23($sp)
-; MIPS64R5EB-NEXT:    lbu $2, 22($sp)
-; MIPS64R5EB-NEXT:    insert.h $w0[6], $2
-; MIPS64R5EB-NEXT:    insert.h $w0[7], $1
+; MIPS64R5EB-NEXT:    sd $2, 32($sp)
+; MIPS64R5EB-NEXT:    lbu $1, 33($sp)
+; MIPS64R5EB-NEXT:    sh $1, 2($sp)
+; MIPS64R5EB-NEXT:    lbu $1, 32($sp)
+; MIPS64R5EB-NEXT:    sh $1, 0($sp)
+; MIPS64R5EB-NEXT:    ld.h $w0, 0($sp)
 ; MIPS64R5EB-NEXT:    copy_s.h $1, $w0[0]
 ; MIPS64R5EB-NEXT:    copy_s.h $2, $w0[1]
-; MIPS64R5EB-NEXT:    sw $2, 12($sp)
-; MIPS64R5EB-NEXT:    sw $1, 4($sp)
-; MIPS64R5EB-NEXT:    ld.d $w0, 0($sp)
+; MIPS64R5EB-NEXT:    sw $2, 28($sp)
+; MIPS64R5EB-NEXT:    sw $1, 20($sp)
+; MIPS64R5EB-NEXT:    ld.d $w0, 16($sp)
 ; MIPS64R5EB-NEXT:    copy_s.d $1, $w0[0]
 ; MIPS64R5EB-NEXT:    copy_s.d $2, $w0[1]
 ; MIPS64R5EB-NEXT:    ld $3, %got_disp(gv2i8)($gp)
 ; MIPS64R5EB-NEXT:    sb $2, 1($3)
 ; MIPS64R5EB-NEXT:    sb $1, 0($3)
-; MIPS64R5EB-NEXT:    ld $gp, 32($sp) # 8-byte Folded Reload
-; MIPS64R5EB-NEXT:    ld $ra, 40($sp) # 8-byte Folded Reload
-; MIPS64R5EB-NEXT:    daddiu $sp, $sp, 48
+; MIPS64R5EB-NEXT:    ld $gp, 48($sp) # 8-byte Folded Reload
+; MIPS64R5EB-NEXT:    ld $ra, 56($sp) # 8-byte Folded Reload
+; MIPS64R5EB-NEXT:    daddiu $sp, $sp, 64
 ; MIPS64R5EB-NEXT:    jr $ra
 ; MIPS64R5EB-NEXT:    nop
 ;
@@ -3892,55 +3664,43 @@ define void @call_i8_2() {
 ;
 ; MIPS64R5EL-LABEL: call_i8_2:
 ; MIPS64R5EL:       # %bb.0: # %entry
-; MIPS64R5EL-NEXT:    daddiu $sp, $sp, -48
-; MIPS64R5EL-NEXT:    .cfi_def_cfa_offset 48
-; MIPS64R5EL-NEXT:    sd $ra, 40($sp) # 8-byte Folded Spill
-; MIPS64R5EL-NEXT:    sd $gp, 32($sp) # 8-byte Folded Spill
+; MIPS64R5EL-NEXT:    daddiu $sp, $sp, -64
+; MIPS64R5EL-NEXT:    .cfi_def_cfa_offset 64
+; MIPS64R5EL-NEXT:    sd $ra, 56($sp) # 8-byte Folded Spill
+; MIPS64R5EL-NEXT:    sd $gp, 48($sp) # 8-byte Folded Spill
 ; MIPS64R5EL-NEXT:    .cfi_offset 31, -8
 ; MIPS64R5EL-NEXT:    .cfi_offset 28, -16
 ; MIPS64R5EL-NEXT:    lui $1, %hi(%neg(%gp_rel(call_i8_2)))
 ; MIPS64R5EL-NEXT:    daddu $1, $1, $25
 ; MIPS64R5EL-NEXT:    daddiu $gp, $1, %lo(%neg(%gp_rel(call_i8_2)))
 ; MIPS64R5EL-NEXT:    addiu $1, $zero, 1798
-; MIPS64R5EL-NEXT:    sh $1, 24($sp)
+; MIPS64R5EL-NEXT:    sh $1, 40($sp)
 ; MIPS64R5EL-NEXT:    addiu $1, $zero, 2060
-; MIPS64R5EL-NEXT:    sh $1, 28($sp)
+; MIPS64R5EL-NEXT:    sh $1, 44($sp)
 ; MIPS64R5EL-NEXT:    ld $25, %call16(i8_2)($gp)
-; MIPS64R5EL-NEXT:    lh $4, 24($sp)
-; MIPS64R5EL-NEXT:    lh $5, 28($sp)
+; MIPS64R5EL-NEXT:    lh $4, 40($sp)
+; MIPS64R5EL-NEXT:    lh $5, 44($sp)
 ; MIPS64R5EL-NEXT:    jalr $25
 ; MIPS64R5EL-NEXT:    nop
-; MIPS64R5EL-NEXT:    sd $2, 16($sp)
-; MIPS64R5EL-NEXT:    ldi.b $w0, 0
-; MIPS64R5EL-NEXT:    lbu $1, 16($sp)
-; MIPS64R5EL-NEXT:    insert.h $w0[0], $1
-; MIPS64R5EL-NEXT:    lbu $1, 17($sp)
-; MIPS64R5EL-NEXT:    insert.h $w0[1], $1
-; MIPS64R5EL-NEXT:    lbu $1, 18($sp)
-; MIPS64R5EL-NEXT:    insert.h $w0[2], $1
-; MIPS64R5EL-NEXT:    lbu $1, 19($sp)
-; MIPS64R5EL-NEXT:    insert.h $w0[3], $1
-; MIPS64R5EL-NEXT:    lbu $1, 20($sp)
-; MIPS64R5EL-NEXT:    insert.h $w0[4], $1
-; MIPS64R5EL-NEXT:    lbu $1, 21($sp)
-; MIPS64R5EL-NEXT:    insert.h $w0[5], $1
-; MIPS64R5EL-NEXT:    lbu $1, 23($sp)
-; MIPS64R5EL-NEXT:    lbu $2, 22($sp)
-; MIPS64R5EL-NEXT:    insert.h $w0[6], $2
-; MIPS64R5EL-NEXT:    insert.h $w0[7], $1
+; MIPS64R5EL-NEXT:    sd $2, 32($sp)
+; MIPS64R5EL-NEXT:    lbu $1, 33($sp)
+; MIPS64R5EL-NEXT:    sh $1, 2($sp)
+; MIPS64R5EL-NEXT:    lbu $1, 32($sp)
+; MIPS64R5EL-NEXT:    sh $1, 0($sp)
+; MIPS64R5EL-NEXT:    ld.h $w0, 0($sp)
 ; MIPS64R5EL-NEXT:    copy_s.h $1, $w0[0]
 ; MIPS64R5EL-NEXT:    copy_s.h $2, $w0[1]
-; MIPS64R5EL-NEXT:    sw $2, 8($sp)
-; MIPS64R5EL-NEXT:    sw $1, 0($sp)
-; MIPS64R5EL-NEXT:    ld.d $w0, 0($sp)
+; MIPS64R5EL-NEXT:    sw $2, 24($sp)
+; MIPS64R5EL-NEXT:    sw $1, 16($sp)
+; MIPS64R5EL-NEXT:    ld.d $w0, 16($sp)
 ; MIPS64R5EL-NEXT:    copy_s.d $1, $w0[0]
 ; MIPS64R5EL-NEXT:    copy_s.d $2, $w0[1]
 ; MIPS64R5EL-NEXT:    ld $3, %got_disp(gv2i8)($gp)
 ; MIPS64R5EL-NEXT:    sb $2, 1($3)
 ; MIPS64R5EL-NEXT:    sb $1, 0($3)
-; MIPS64R5EL-NEXT:    ld $gp, 32($sp) # 8-byte Folded Reload
-; MIPS64R5EL-NEXT:    ld $ra, 40($sp) # 8-byte Folded Reload
-; MIPS64R5EL-NEXT:    daddiu $sp, $sp, 48
+; MIPS64R5EL-NEXT:    ld $gp, 48($sp) # 8-byte Folded Reload
+; MIPS64R5EL-NEXT:    ld $ra, 56($sp) # 8-byte Folded Reload
+; MIPS64R5EL-NEXT:    daddiu $sp, $sp, 64
 ; MIPS64R5EL-NEXT:    jr $ra
 ; MIPS64R5EL-NEXT:    nop
 entry:

Modified: llvm/trunk/test/CodeGen/X86/dagcombine-cse.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/dagcombine-cse.ll?rev=337258&r1=337257&r2=337258&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/dagcombine-cse.ll (original)
+++ llvm/trunk/test/CodeGen/X86/dagcombine-cse.ll Tue Jul 17 02:45:35 2018
@@ -31,7 +31,6 @@ define i32 @t(i8* %ref_frame_ptr, i32 %r
 ; X64-NEXT:    shlq $32, %rcx
 ; X64-NEXT:    orq %rax, %rcx
 ; X64-NEXT:    movq %rcx, %xmm0
-; X64-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
 ; X64-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,1,1,2,4,5,6,7]
 ; X64-NEXT:    movd %xmm0, %eax
 ; X64-NEXT:    retq

Modified: llvm/trunk/test/CodeGen/X86/extractelement-load.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/extractelement-load.ll?rev=337258&r1=337257&r2=337258&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/extractelement-load.ll (original)
+++ llvm/trunk/test/CodeGen/X86/extractelement-load.ll Tue Jul 17 02:45:35 2018
@@ -85,8 +85,7 @@ define i64 @t4(<2 x double>* %a) {
 ; X32-SSE2-LABEL: t4:
 ; X32-SSE2:       # %bb.0:
 ; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-SSE2-NEXT:    movapd (%eax), %xmm0
-; X32-SSE2-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
+; X32-SSE2-NEXT:    movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
 ; X32-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
 ; X32-SSE2-NEXT:    movd %xmm1, %eax
 ; X32-SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]

Modified: llvm/trunk/test/CodeGen/X86/known-bits-vector.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/known-bits-vector.ll?rev=337258&r1=337257&r2=337258&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/known-bits-vector.ll (original)
+++ llvm/trunk/test/CodeGen/X86/known-bits-vector.ll Tue Jul 17 02:45:35 2018
@@ -24,10 +24,9 @@ define float @knownbits_mask_extract_uit
 ; X32-LABEL: knownbits_mask_extract_uitofp:
 ; X32:       # %bb.0:
 ; X32-NEXT:    pushl %eax
-; X32-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; X32-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3],xmm0[4,5,6,7]
+; X32-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
 ; X32-NEXT:    vmovd %xmm0, %eax
-; X32-NEXT:    vcvtsi2ssl %eax, %xmm2, %xmm0
+; X32-NEXT:    vcvtsi2ssl %eax, %xmm1, %xmm0
 ; X32-NEXT:    vmovss %xmm0, (%esp)
 ; X32-NEXT:    flds (%esp)
 ; X32-NEXT:    popl %eax
@@ -35,10 +34,9 @@ define float @knownbits_mask_extract_uit
 ;
 ; X64-LABEL: knownbits_mask_extract_uitofp:
 ; X64:       # %bb.0:
-; X64-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; X64-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3],xmm0[4,5,6,7]
+; X64-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
 ; X64-NEXT:    vmovq %xmm0, %rax
-; X64-NEXT:    vcvtsi2ssl %eax, %xmm2, %xmm0
+; X64-NEXT:    vcvtsi2ssl %eax, %xmm1, %xmm0
 ; X64-NEXT:    retq
   %1 = and <2 x i64> %a0, <i64 65535, i64 -1>
   %2 = extractelement <2 x i64> %1, i32 0

Modified: llvm/trunk/test/CodeGen/X86/oddshuffles.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/oddshuffles.ll?rev=337258&r1=337257&r2=337258&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/oddshuffles.ll (original)
+++ llvm/trunk/test/CodeGen/X86/oddshuffles.ll Tue Jul 17 02:45:35 2018
@@ -68,41 +68,29 @@ define void @v3f64(<2 x double> %a, <2 x
 define void @v3i32(<2 x i32> %a, <2 x i32> %b, <3 x i32>* %p) nounwind {
 ; SSE2-LABEL: v3i32:
 ; SSE2:       # %bb.0:
-; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[0,2,2,3]
-; SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
-; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; SSE2-NEXT:    movd %xmm0, 8(%rdi)
-; SSE2-NEXT:    movq %xmm2, (%rdi)
+; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT:    movd %xmm2, 8(%rdi)
+; SSE2-NEXT:    movq %xmm0, (%rdi)
 ; SSE2-NEXT:    retq
 ;
 ; SSE42-LABEL: v3i32:
 ; SSE42:       # %bb.0:
-; SSE42-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
-; SSE42-NEXT:    pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
-; SSE42-NEXT:    pextrd $2, %xmm0, 8(%rdi)
-; SSE42-NEXT:    movq %xmm1, (%rdi)
+; SSE42-NEXT:    extractps $2, %xmm0, 8(%rdi)
+; SSE42-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE42-NEXT:    movlps %xmm0, (%rdi)
 ; SSE42-NEXT:    retq
 ;
-; AVX1-LABEL: v3i32:
-; AVX1:       # %bb.0:
-; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm1[0,0,1,1]
-; AVX1-NEXT:    vblendps {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2,3]
-; AVX1-NEXT:    vextractps $2, %xmm0, 8(%rdi)
-; AVX1-NEXT:    vmovlps %xmm1, (%rdi)
-; AVX1-NEXT:    retq
-;
-; AVX2-LABEL: v3i32:
-; AVX2:       # %bb.0:
-; AVX2-NEXT:    vbroadcastss %xmm1, %xmm1
-; AVX2-NEXT:    vblendps {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2,3]
-; AVX2-NEXT:    vextractps $2, %xmm0, 8(%rdi)
-; AVX2-NEXT:    vmovlps %xmm1, (%rdi)
-; AVX2-NEXT:    retq
+; AVX-LABEL: v3i32:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vunpcklps {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; AVX-NEXT:    vextractps $2, %xmm0, 8(%rdi)
+; AVX-NEXT:    vmovlps %xmm1, (%rdi)
+; AVX-NEXT:    retq
 ;
 ; XOP-LABEL: v3i32:
 ; XOP:       # %bb.0:
-; XOP-NEXT:    vpermilps {{.*#+}} xmm1 = xmm1[0,0,1,1]
-; XOP-NEXT:    vblendps {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2,3]
+; XOP-NEXT:    vunpcklps {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
 ; XOP-NEXT:    vextractps $2, %xmm0, 8(%rdi)
 ; XOP-NEXT:    vmovlps %xmm1, (%rdi)
 ; XOP-NEXT:    retq
@@ -114,10 +102,9 @@ define void @v3i32(<2 x i32> %a, <2 x i3
 define void @v5i16(<4 x i16> %a, <4 x i16> %b, <5 x i16>* %p) nounwind {
 ; SSE2-LABEL: v5i16:
 ; SSE2:       # %bb.0:
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm2 = xmm0[0,2,2,3,4,5,6,7]
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,1,2,3]
 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[2,0,2,3,4,5,6,7]
-; SSE2-NEXT:    pshuflw {{.*#+}} xmm2 = xmm0[0,2,2,3,4,5,6,7]
-; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,3,2,3]
 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
 ; SSE2-NEXT:    pextrw $6, %xmm0, %eax
 ; SSE2-NEXT:    movw %ax, 8(%rdi)
@@ -126,10 +113,9 @@ define void @v5i16(<4 x i16> %a, <4 x i1
 ;
 ; SSE42-LABEL: v5i16:
 ; SSE42:       # %bb.0:
+; SSE42-NEXT:    pshuflw {{.*#+}} xmm2 = xmm0[0,2,2,3,4,5,6,7]
 ; SSE42-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,1,2,3]
 ; SSE42-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[2,0,2,3,4,5,6,7]
-; SSE42-NEXT:    pshuflw {{.*#+}} xmm2 = xmm0[0,2,2,3,4,5,6,7]
-; SSE42-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,3,2,3]
 ; SSE42-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
 ; SSE42-NEXT:    pextrw $6, %xmm0, 8(%rdi)
 ; SSE42-NEXT:    movq %xmm2, (%rdi)
@@ -137,10 +123,9 @@ define void @v5i16(<4 x i16> %a, <4 x i1
 ;
 ; AVX1-LABEL: v5i16:
 ; AVX1:       # %bb.0:
+; AVX1-NEXT:    vpshuflw {{.*#+}} xmm2 = xmm0[0,2,2,3,4,5,6,7]
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,1,2,3]
 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[2,0,2,3,4,5,6,7]
-; AVX1-NEXT:    vpshuflw {{.*#+}} xmm2 = xmm0[0,2,2,3,4,5,6,7]
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[0,3,2,3]
 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
 ; AVX1-NEXT:    vpextrw $6, %xmm0, 8(%rdi)
 ; AVX1-NEXT:    vmovq %xmm1, (%rdi)
@@ -148,10 +133,9 @@ define void @v5i16(<4 x i16> %a, <4 x i1
 ;
 ; AVX2-SLOW-LABEL: v5i16:
 ; AVX2-SLOW:       # %bb.0:
+; AVX2-SLOW-NEXT:    vpshuflw {{.*#+}} xmm2 = xmm0[0,2,2,3,4,5,6,7]
 ; AVX2-SLOW-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,1,2,3]
 ; AVX2-SLOW-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[2,0,2,3,4,5,6,7]
-; AVX2-SLOW-NEXT:    vpshuflw {{.*#+}} xmm2 = xmm0[0,2,2,3,4,5,6,7]
-; AVX2-SLOW-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[0,3,2,3]
 ; AVX2-SLOW-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
 ; AVX2-SLOW-NEXT:    vpextrw $6, %xmm0, 8(%rdi)
 ; AVX2-SLOW-NEXT:    vmovq %xmm1, (%rdi)
@@ -160,7 +144,7 @@ define void @v5i16(<4 x i16> %a, <4 x i1
 ; AVX2-FAST-LABEL: v5i16:
 ; AVX2-FAST:       # %bb.0:
 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[4,5,8,9,4,5,6,7,8,9,10,11,12,13,14,15]
-; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm2 = xmm0[0,1,4,5,12,13,14,15,8,9,10,11,12,13,14,15]
+; AVX2-FAST-NEXT:    vpshuflw {{.*#+}} xmm2 = xmm0[0,2,2,3,4,5,6,7]
 ; AVX2-FAST-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
 ; AVX2-FAST-NEXT:    vpextrw $6, %xmm0, 8(%rdi)
 ; AVX2-FAST-NEXT:    vmovq %xmm1, (%rdi)
@@ -168,7 +152,7 @@ define void @v5i16(<4 x i16> %a, <4 x i1
 ;
 ; XOP-LABEL: v5i16:
 ; XOP:       # %bb.0:
-; XOP-NEXT:    vpperm {{.*#+}} xmm1 = xmm0[0,1],xmm1[4,5],xmm0[4,5],xmm1[8,9],xmm0[12,13],xmm1[4,5],xmm0[14,15],xmm1[6,7]
+; XOP-NEXT:    vpperm {{.*#+}} xmm1 = xmm0[0,1],xmm1[4,5],xmm0[4,5],xmm1[8,9],xmm0[4,5],xmm1[4,5],xmm0[6,7],xmm1[6,7]
 ; XOP-NEXT:    vpextrw $6, %xmm0, 8(%rdi)
 ; XOP-NEXT:    vmovq %xmm1, (%rdi)
 ; XOP-NEXT:    retq
@@ -377,23 +361,24 @@ define void @v7i32(<4 x i32> %a, <4 x i3
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[0,1,2,2]
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[0,1,0,3]
 ; SSE2-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm2[2],xmm3[3],xmm2[3]
-; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,0],xmm1[3,0]
-; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,3]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[3,1,2,3]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
 ; SSE2-NEXT:    movd %xmm1, 24(%rdi)
-; SSE2-NEXT:    movlps %xmm0, 16(%rdi)
+; SSE2-NEXT:    movq %xmm0, 16(%rdi)
 ; SSE2-NEXT:    movdqa %xmm3, (%rdi)
 ; SSE2-NEXT:    retq
 ;
 ; SSE42-LABEL: v7i32:
 ; SSE42:       # %bb.0:
-; SSE42-NEXT:    movdqa %xmm1, %xmm2
-; SSE42-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm0[2,3],xmm2[4,5,6,7]
-; SSE42-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
-; SSE42-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,3,2]
-; SSE42-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,3,0,3]
+; SSE42-NEXT:    movdqa %xmm0, %xmm2
+; SSE42-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5],xmm2[6,7]
+; SSE42-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,3,2]
+; SSE42-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+; SSE42-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
 ; SSE42-NEXT:    movd %xmm1, 24(%rdi)
-; SSE42-NEXT:    movq %xmm2, 16(%rdi)
-; SSE42-NEXT:    movdqa %xmm0, (%rdi)
+; SSE42-NEXT:    movq %xmm0, 16(%rdi)
+; SSE42-NEXT:    movdqa %xmm2, (%rdi)
 ; SSE42-NEXT:    retq
 ;
 ; AVX1-LABEL: v7i32:

Modified: llvm/trunk/test/CodeGen/X86/scalar_widen_div.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/scalar_widen_div.ll?rev=337258&r1=337257&r2=337258&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/scalar_widen_div.ll (original)
+++ llvm/trunk/test/CodeGen/X86/scalar_widen_div.ll Tue Jul 17 02:45:35 2018
@@ -427,7 +427,6 @@ define void @test_int_div(<3 x i32>* %de
 ; CHECK-NEXT:    pextrd $2, %xmm1, %r8d
 ; CHECK-NEXT:    cltd
 ; CHECK-NEXT:    idivl %r8d
-; CHECK-NEXT:    pinsrd $2, %eax, %xmm2
 ; CHECK-NEXT:    movl %eax, 8(%rdi,%rcx)
 ; CHECK-NEXT:    movq %xmm2, (%rdi,%rcx)
 ; CHECK-NEXT:    addq $16, %rcx

Modified: llvm/trunk/test/CodeGen/X86/vec_shift7.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_shift7.ll?rev=337258&r1=337257&r2=337258&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_shift7.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_shift7.ll Tue Jul 17 02:45:35 2018
@@ -7,12 +7,9 @@
 define i64 @test1(<2 x i64> %a) {
 ; X32-LABEL: test1:
 ; X32:       # %bb.0: # %entry
-; X32-NEXT:    movdqa %xmm0, %xmm1
-; X32-NEXT:    psllq $2, %xmm1
-; X32-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
-; X32-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,2,3]
-; X32-NEXT:    movd %xmm1, %edx
 ; X32-NEXT:    movd %xmm0, %eax
+; X32-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; X32-NEXT:    movd %xmm0, %edx
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test1:




More information about the llvm-commits mailing list