[llvm] 6735d52 - [MIPS] [MSA] Widen v2i8, v216 and v2i32 vectors (#123040)

via llvm-commits llvm-commits at lists.llvm.org
Thu Jan 23 19:23:38 PST 2025


Author: Cinhi Young
Date: 2025-01-24T11:23:34+08:00
New Revision: 6735d527f9945fbf50c14a95cbdd66592472d622

URL: https://github.com/llvm/llvm-project/commit/6735d527f9945fbf50c14a95cbdd66592472d622
DIFF: https://github.com/llvm/llvm-project/commit/6735d527f9945fbf50c14a95cbdd66592472d622.diff

LOG: [MIPS] [MSA] Widen v2i8, v216 and v2i32 vectors (#123040)

- Widen v2i8, v2i16 and v2i32 vectors so they don't cast back and forth,
and make sure that instructions with correct data unit is being used.
- Handle undef indices for VSHF when lowering VECTOR_SHUFFLE (it crashes
if such index is present).

Added: 
    

Modified: 
    llvm/lib/Target/Mips/MipsSEISelLowering.cpp
    llvm/lib/Target/Mips/MipsSEISelLowering.h
    llvm/test/CodeGen/Mips/cconv/vector.ll
    llvm/test/CodeGen/Mips/msa/basic_operations.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/Mips/MipsSEISelLowering.cpp b/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
index 1d1b0f9c6ae2a9..71a70d9c2dd466 100644
--- a/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
+++ b/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
@@ -42,6 +42,7 @@
 #include "llvm/TargetParser/Triple.h"
 #include <algorithm>
 #include <cassert>
+#include <cstddef>
 #include <cstdint>
 #include <iterator>
 #include <utility>
@@ -59,6 +60,45 @@ static cl::opt<bool> NoDPLoadStore("mno-ldc1-sdc1", cl::init(false),
                                             "stores to their single precision "
                                             "counterparts"));
 
+// Widen the v2 vectors to the register width, i.e. v2i16 -> v8i16,
+// v2i32 -> v4i32, etc, to ensure the correct rail size is used, i.e.
+// INST.h for v16, INST.w for v32, INST.d for v64.
+TargetLoweringBase::LegalizeTypeAction
+MipsSETargetLowering::getPreferredVectorAction(MVT VT) const {
+  if (this->Subtarget.hasMSA()) {
+    switch (VT.SimpleTy) {
+    // Leave v2i1 vectors to be promoted to larger ones.
+    // Other i1 types will be promoted by default.
+    case MVT::v2i1:
+      return TypePromoteInteger;
+      break;
+    // 16-bit vector types (v2 and longer)
+    case MVT::v2i8:
+    // 32-bit vector types (v2 and longer)
+    case MVT::v2i16:
+    case MVT::v4i8:
+    // 64-bit vector types (v2 and longer)
+    case MVT::v2i32:
+    case MVT::v4i16:
+    case MVT::v8i8:
+      return TypeWidenVector;
+      break;
+    // Only word (.w) and doubleword (.d) are available for floating point
+    // vectors. That means floating point vectors should be either v2f64
+    // or v4f32.
+    // Here we only explicitly widen the f32 types - f16 will be promoted
+    // by default.
+    case MVT::v2f32:
+    case MVT::v3f32:
+      return TypeWidenVector;
+    // v2i64 is already 128-bit wide.
+    default:
+      break;
+    }
+  }
+  return TargetLoweringBase::getPreferredVectorAction(VT);
+}
+
 MipsSETargetLowering::MipsSETargetLowering(const MipsTargetMachine &TM,
                                            const MipsSubtarget &STI)
     : MipsTargetLowering(TM, STI) {
@@ -2929,8 +2969,14 @@ static SDValue lowerVECTOR_SHUFFLE_PCKOD(SDValue Op, EVT ResTy,
 // if the type is v8i16 and all the indices are less than 8 then the second
 // operand is unused and can be replaced with anything. We choose to replace it
 // with the used operand since this reduces the number of instructions overall.
+//
+// NOTE: SPLATI shuffle masks may contain UNDEFs, since isSPLATI() treats
+//       UNDEFs as same as SPLATI index.
+//       For other instances we use the last valid index if UNDEF is
+//       encountered.
 static SDValue lowerVECTOR_SHUFFLE_VSHF(SDValue Op, EVT ResTy,
                                         const SmallVector<int, 16> &Indices,
+                                        const bool isSPLATI,
                                         SelectionDAG &DAG) {
   SmallVector<SDValue, 16> Ops;
   SDValue Op0;
@@ -2942,6 +2988,9 @@ static SDValue lowerVECTOR_SHUFFLE_VSHF(SDValue Op, EVT ResTy,
   SDLoc DL(Op);
   int ResTyNumElts = ResTy.getVectorNumElements();
 
+  assert(Indices[0] >= 0 &&
+         "shuffle mask starts with an UNDEF, which is not expected");
+
   for (int i = 0; i < ResTyNumElts; ++i) {
     // Idx == -1 means UNDEF
     int Idx = Indices[i];
@@ -2951,9 +3000,17 @@ static SDValue lowerVECTOR_SHUFFLE_VSHF(SDValue Op, EVT ResTy,
     if (ResTyNumElts <= Idx && Idx < ResTyNumElts * 2)
       Using2ndVec = true;
   }
-
-  for (int Idx : Indices)
+  int LastValidIndex = 0;
+  for (size_t i = 0; i < Indices.size(); i++) {
+    int Idx = Indices[i];
+    if (Idx < 0) {
+      // Continue using splati index or use the last valid index.
+      Idx = isSPLATI ? Indices[0] : LastValidIndex;
+    } else {
+      LastValidIndex = Idx;
+    }
     Ops.push_back(DAG.getTargetConstant(Idx, DL, MaskEltTy));
+  }
 
   SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, Ops);
 
@@ -2996,7 +3053,7 @@ SDValue MipsSETargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
   // splati.[bhwd] is preferable to the others but is matched from
   // MipsISD::VSHF.
   if (isVECTOR_SHUFFLE_SPLATI(Op, ResTy, Indices, DAG))
-    return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, DAG);
+    return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, true, DAG);
   SDValue Result;
   if ((Result = lowerVECTOR_SHUFFLE_ILVEV(Op, ResTy, Indices, DAG)))
     return Result;
@@ -3012,7 +3069,7 @@ SDValue MipsSETargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
     return Result;
   if ((Result = lowerVECTOR_SHUFFLE_SHF(Op, ResTy, Indices, DAG)))
     return Result;
-  return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, DAG);
+  return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, false, DAG);
 }
 
 MachineBasicBlock *

diff  --git a/llvm/lib/Target/Mips/MipsSEISelLowering.h b/llvm/lib/Target/Mips/MipsSEISelLowering.h
index 43b88a9f095226..675131aefb6dd9 100644
--- a/llvm/lib/Target/Mips/MipsSEISelLowering.h
+++ b/llvm/lib/Target/Mips/MipsSEISelLowering.h
@@ -45,6 +45,9 @@ class TargetRegisterClass;
         MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
         unsigned *Fast = nullptr) const override;
 
+    TargetLoweringBase::LegalizeTypeAction
+    getPreferredVectorAction(MVT VT) const override;
+
     SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
 
     SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;

diff  --git a/llvm/test/CodeGen/Mips/cconv/vector.ll b/llvm/test/CodeGen/Mips/cconv/vector.ll
index 28a7dc046139b2..383e5ef19cebf1 100644
--- a/llvm/test/CodeGen/Mips/cconv/vector.ll
+++ b/llvm/test/CodeGen/Mips/cconv/vector.ll
@@ -48,102 +48,86 @@ define <2 x i8> @i8_2(<2 x i8> %a, <2 x i8> %b) {
 ;
 ; MIPS32R5EB-LABEL: i8_2:
 ; MIPS32R5EB:       # %bb.0:
-; MIPS32R5EB-NEXT:    addiu $sp, $sp, -64
-; MIPS32R5EB-NEXT:    .cfi_def_cfa_offset 64
-; MIPS32R5EB-NEXT:    sw $ra, 60($sp) # 4-byte Folded Spill
-; MIPS32R5EB-NEXT:    sw $fp, 56($sp) # 4-byte Folded Spill
+; MIPS32R5EB-NEXT:    addiu $sp, $sp, -48
+; MIPS32R5EB-NEXT:    .cfi_def_cfa_offset 48
+; MIPS32R5EB-NEXT:    sw $ra, 44($sp) # 4-byte Folded Spill
+; MIPS32R5EB-NEXT:    sw $fp, 40($sp) # 4-byte Folded Spill
 ; MIPS32R5EB-NEXT:    .cfi_offset 31, -4
 ; MIPS32R5EB-NEXT:    .cfi_offset 30, -8
 ; MIPS32R5EB-NEXT:    move $fp, $sp
 ; MIPS32R5EB-NEXT:    .cfi_def_cfa_register 30
 ; MIPS32R5EB-NEXT:    addiu $1, $zero, -16
 ; MIPS32R5EB-NEXT:    and $sp, $sp, $1
-; MIPS32R5EB-NEXT:    sh $5, 48($sp)
-; MIPS32R5EB-NEXT:    sh $4, 52($sp)
-; MIPS32R5EB-NEXT:    lbu $1, 49($sp)
-; MIPS32R5EB-NEXT:    sw $1, 28($sp)
-; MIPS32R5EB-NEXT:    lbu $1, 48($sp)
-; MIPS32R5EB-NEXT:    sw $1, 20($sp)
-; MIPS32R5EB-NEXT:    lbu $1, 53($sp)
-; MIPS32R5EB-NEXT:    sw $1, 12($sp)
-; MIPS32R5EB-NEXT:    lbu $1, 52($sp)
-; MIPS32R5EB-NEXT:    sw $1, 4($sp)
-; MIPS32R5EB-NEXT:    ld.d $w0, 16($sp)
-; MIPS32R5EB-NEXT:    ld.d $w1, 0($sp)
-; MIPS32R5EB-NEXT:    addv.d $w0, $w1, $w0
-; MIPS32R5EB-NEXT:    shf.w $w0, $w0, 177
-; MIPS32R5EB-NEXT:    copy_s.w $1, $w0[1]
-; MIPS32R5EB-NEXT:    copy_s.w $2, $w0[3]
-; MIPS32R5EB-NEXT:    sb $2, 45($sp)
-; MIPS32R5EB-NEXT:    sb $1, 44($sp)
-; MIPS32R5EB-NEXT:    lhu $2, 44($sp)
+; MIPS32R5EB-NEXT:    sh $5, 16($sp)
+; MIPS32R5EB-NEXT:    sh $4, 0($sp)
+; MIPS32R5EB-NEXT:    ld.b $w0, 16($sp)
+; MIPS32R5EB-NEXT:    ld.b $w1, 0($sp)
+; MIPS32R5EB-NEXT:    addv.b $w0, $w1, $w0
+; MIPS32R5EB-NEXT:    shf.b $w0, $w0, 177
+; MIPS32R5EB-NEXT:    copy_u.h $2, $w0[0]
 ; MIPS32R5EB-NEXT:    move $sp, $fp
-; MIPS32R5EB-NEXT:    lw $fp, 56($sp) # 4-byte Folded Reload
-; MIPS32R5EB-NEXT:    lw $ra, 60($sp) # 4-byte Folded Reload
-; MIPS32R5EB-NEXT:    addiu $sp, $sp, 64
+; MIPS32R5EB-NEXT:    lw $fp, 40($sp) # 4-byte Folded Reload
+; MIPS32R5EB-NEXT:    lw $ra, 44($sp) # 4-byte Folded Reload
+; MIPS32R5EB-NEXT:    addiu $sp, $sp, 48
 ; MIPS32R5EB-NEXT:    jr $ra
 ; MIPS32R5EB-NEXT:    nop
 ;
-; MIPS64R5-LABEL: i8_2:
-; MIPS64R5:       # %bb.0:
-; MIPS64R5-NEXT:    daddiu $sp, $sp, -16
-; MIPS64R5-NEXT:    .cfi_def_cfa_offset 16
-; MIPS64R5-NEXT:    sh $5, 8($sp)
-; MIPS64R5-NEXT:    sh $4, 12($sp)
-; MIPS64R5-NEXT:    lb $1, 9($sp)
-; MIPS64R5-NEXT:    lb $2, 8($sp)
-; MIPS64R5-NEXT:    insert.d $w0[0], $2
-; MIPS64R5-NEXT:    insert.d $w0[1], $1
-; MIPS64R5-NEXT:    lb $1, 13($sp)
-; MIPS64R5-NEXT:    lb $2, 12($sp)
-; MIPS64R5-NEXT:    insert.d $w1[0], $2
-; MIPS64R5-NEXT:    insert.d $w1[1], $1
-; MIPS64R5-NEXT:    addv.d $w0, $w1, $w0
-; MIPS64R5-NEXT:    copy_s.d $1, $w0[0]
-; MIPS64R5-NEXT:    copy_s.d $2, $w0[1]
-; MIPS64R5-NEXT:    sb $2, 5($sp)
-; MIPS64R5-NEXT:    sb $1, 4($sp)
-; MIPS64R5-NEXT:    lh $2, 4($sp)
-; MIPS64R5-NEXT:    daddiu $sp, $sp, 16
-; MIPS64R5-NEXT:    jr $ra
-; MIPS64R5-NEXT:    nop
-;
 ; MIPS32R5EL-LABEL: i8_2:
 ; MIPS32R5EL:       # %bb.0:
-; MIPS32R5EL-NEXT:    addiu $sp, $sp, -64
-; MIPS32R5EL-NEXT:    .cfi_def_cfa_offset 64
-; MIPS32R5EL-NEXT:    sw $ra, 60($sp) # 4-byte Folded Spill
-; MIPS32R5EL-NEXT:    sw $fp, 56($sp) # 4-byte Folded Spill
+; MIPS32R5EL-NEXT:    addiu $sp, $sp, -48
+; MIPS32R5EL-NEXT:    .cfi_def_cfa_offset 48
+; MIPS32R5EL-NEXT:    sw $ra, 44($sp) # 4-byte Folded Spill
+; MIPS32R5EL-NEXT:    sw $fp, 40($sp) # 4-byte Folded Spill
 ; MIPS32R5EL-NEXT:    .cfi_offset 31, -4
 ; MIPS32R5EL-NEXT:    .cfi_offset 30, -8
 ; MIPS32R5EL-NEXT:    move $fp, $sp
 ; MIPS32R5EL-NEXT:    .cfi_def_cfa_register 30
 ; MIPS32R5EL-NEXT:    addiu $1, $zero, -16
 ; MIPS32R5EL-NEXT:    and $sp, $sp, $1
-; MIPS32R5EL-NEXT:    sh $5, 48($sp)
-; MIPS32R5EL-NEXT:    sh $4, 52($sp)
-; MIPS32R5EL-NEXT:    lbu $1, 49($sp)
-; MIPS32R5EL-NEXT:    sw $1, 24($sp)
-; MIPS32R5EL-NEXT:    lbu $1, 48($sp)
-; MIPS32R5EL-NEXT:    sw $1, 16($sp)
-; MIPS32R5EL-NEXT:    lbu $1, 53($sp)
-; MIPS32R5EL-NEXT:    sw $1, 8($sp)
-; MIPS32R5EL-NEXT:    lbu $1, 52($sp)
-; MIPS32R5EL-NEXT:    sw $1, 0($sp)
-; MIPS32R5EL-NEXT:    ld.d $w0, 16($sp)
-; MIPS32R5EL-NEXT:    ld.d $w1, 0($sp)
-; MIPS32R5EL-NEXT:    addv.d $w0, $w1, $w0
-; MIPS32R5EL-NEXT:    copy_s.w $1, $w0[0]
-; MIPS32R5EL-NEXT:    copy_s.w $2, $w0[2]
-; MIPS32R5EL-NEXT:    sb $2, 45($sp)
-; MIPS32R5EL-NEXT:    sb $1, 44($sp)
-; MIPS32R5EL-NEXT:    lhu $2, 44($sp)
+; MIPS32R5EL-NEXT:    sh $5, 16($sp)
+; MIPS32R5EL-NEXT:    sh $4, 0($sp)
+; MIPS32R5EL-NEXT:    ld.b $w0, 16($sp)
+; MIPS32R5EL-NEXT:    ld.b $w1, 0($sp)
+; MIPS32R5EL-NEXT:    addv.b $w0, $w1, $w0
+; MIPS32R5EL-NEXT:    copy_u.h $2, $w0[0]
 ; MIPS32R5EL-NEXT:    move $sp, $fp
-; MIPS32R5EL-NEXT:    lw $fp, 56($sp) # 4-byte Folded Reload
-; MIPS32R5EL-NEXT:    lw $ra, 60($sp) # 4-byte Folded Reload
-; MIPS32R5EL-NEXT:    addiu $sp, $sp, 64
+; MIPS32R5EL-NEXT:    lw $fp, 40($sp) # 4-byte Folded Reload
+; MIPS32R5EL-NEXT:    lw $ra, 44($sp) # 4-byte Folded Reload
+; MIPS32R5EL-NEXT:    addiu $sp, $sp, 48
 ; MIPS32R5EL-NEXT:    jr $ra
 ; MIPS32R5EL-NEXT:    nop
+;
+; MIPS64R5EB-LABEL: i8_2:
+; MIPS64R5EB:       # %bb.0:
+; MIPS64R5EB-NEXT:    daddiu $sp, $sp, -48
+; MIPS64R5EB-NEXT:    .cfi_def_cfa_offset 48
+; MIPS64R5EB-NEXT:    sh $5, 16($sp)
+; MIPS64R5EB-NEXT:    sh $4, 0($sp)
+; MIPS64R5EB-NEXT:    ld.b $w0, 16($sp)
+; MIPS64R5EB-NEXT:    ld.b $w1, 0($sp)
+; MIPS64R5EB-NEXT:    addv.b $w0, $w1, $w0
+; MIPS64R5EB-NEXT:    shf.b $w0, $w0, 177
+; MIPS64R5EB-NEXT:    copy_s.h $1, $w0[0]
+; MIPS64R5EB-NEXT:    sh $1, 44($sp)
+; MIPS64R5EB-NEXT:    lh $2, 44($sp)
+; MIPS64R5EB-NEXT:    daddiu $sp, $sp, 48
+; MIPS64R5EB-NEXT:    jr $ra
+;
+; MIPS64R5EL-LABEL: i8_2:
+; MIPS64R5EL:       # %bb.0:
+; MIPS64R5EL-NEXT:    daddiu $sp, $sp, -48
+; MIPS64R5EL-NEXT:    .cfi_def_cfa_offset 48
+; MIPS64R5EL-NEXT:    sh $5, 16($sp)
+; MIPS64R5EL-NEXT:    sh $4, 0($sp)
+; MIPS64R5EL-NEXT:    ld.b $w0, 16($sp)
+; MIPS64R5EL-NEXT:    ld.b $w1, 0($sp)
+; MIPS64R5EL-NEXT:    addv.b $w0, $w1, $w0
+; MIPS64R5EL-NEXT:    copy_s.h $1, $w0[0]
+; MIPS64R5EL-NEXT:    sh $1, 44($sp)
+; MIPS64R5EL-NEXT:    lh $2, 44($sp)
+; MIPS64R5EL-NEXT:    daddiu $sp, $sp, 48
+; MIPS64R5EL-NEXT:    jr $ra
+; MIPS64R5EL-NEXT:    nop
   %1 = add <2 x i8> %a, %b
   ret <2 x i8> %1
 }
@@ -229,127 +213,110 @@ define <2 x i8> @i8x2_7(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d, <2 x
 ;
 ; MIPS32R5EB-LABEL: i8x2_7:
 ; MIPS32R5EB:       # %bb.0: # %entry
-; MIPS32R5EB-NEXT:    addiu $sp, $sp, -144
-; MIPS32R5EB-NEXT:    .cfi_def_cfa_offset 144
-; MIPS32R5EB-NEXT:    sw $ra, 140($sp) # 4-byte Folded Spill
-; MIPS32R5EB-NEXT:    sw $fp, 136($sp) # 4-byte Folded Spill
+; MIPS32R5EB-NEXT:    addiu $sp, $sp, -128
+; MIPS32R5EB-NEXT:    .cfi_def_cfa_offset 128
+; MIPS32R5EB-NEXT:    sw $ra, 124($sp) # 4-byte Folded Spill
+; MIPS32R5EB-NEXT:    sw $fp, 120($sp) # 4-byte Folded Spill
 ; MIPS32R5EB-NEXT:    .cfi_offset 31, -4
 ; MIPS32R5EB-NEXT:    .cfi_offset 30, -8
 ; MIPS32R5EB-NEXT:    move $fp, $sp
 ; MIPS32R5EB-NEXT:    .cfi_def_cfa_register 30
 ; MIPS32R5EB-NEXT:    addiu $1, $zero, -16
 ; MIPS32R5EB-NEXT:    and $sp, $sp, $1
-; MIPS32R5EB-NEXT:    sh $5, 128($sp)
-; MIPS32R5EB-NEXT:    sh $4, 132($sp)
-; MIPS32R5EB-NEXT:    lbu $1, 129($sp)
-; MIPS32R5EB-NEXT:    sw $1, 76($sp)
-; MIPS32R5EB-NEXT:    lbu $1, 128($sp)
-; MIPS32R5EB-NEXT:    sw $1, 68($sp)
-; MIPS32R5EB-NEXT:    lbu $1, 133($sp)
-; MIPS32R5EB-NEXT:    sw $1, 60($sp)
-; MIPS32R5EB-NEXT:    lbu $1, 132($sp)
-; MIPS32R5EB-NEXT:    sw $1, 52($sp)
-; MIPS32R5EB-NEXT:    ld.d $w0, 64($sp)
-; MIPS32R5EB-NEXT:    ld.d $w1, 48($sp)
-; MIPS32R5EB-NEXT:    addv.d $w0, $w1, $w0
-; MIPS32R5EB-NEXT:    sh $6, 124($sp)
-; MIPS32R5EB-NEXT:    lbu $1, 125($sp)
-; MIPS32R5EB-NEXT:    sw $1, 92($sp)
-; MIPS32R5EB-NEXT:    lbu $1, 124($sp)
-; MIPS32R5EB-NEXT:    sw $1, 84($sp)
-; MIPS32R5EB-NEXT:    ld.d $w1, 80($sp)
-; MIPS32R5EB-NEXT:    addv.d $w0, $w0, $w1
-; MIPS32R5EB-NEXT:    sh $7, 120($sp)
-; MIPS32R5EB-NEXT:    lbu $1, 121($sp)
-; MIPS32R5EB-NEXT:    sw $1, 108($sp)
-; MIPS32R5EB-NEXT:    lbu $1, 120($sp)
-; MIPS32R5EB-NEXT:    sw $1, 100($sp)
-; MIPS32R5EB-NEXT:    ld.d $w1, 96($sp)
-; MIPS32R5EB-NEXT:    addv.d $w0, $w0, $w1
-; MIPS32R5EB-NEXT:    lbu $1, 163($fp)
-; MIPS32R5EB-NEXT:    sw $1, 12($sp)
-; MIPS32R5EB-NEXT:    lbu $1, 162($fp)
-; MIPS32R5EB-NEXT:    sw $1, 4($sp)
-; MIPS32R5EB-NEXT:    ld.d $w1, 0($sp)
-; MIPS32R5EB-NEXT:    addv.d $w0, $w0, $w1
-; MIPS32R5EB-NEXT:    lbu $1, 167($fp)
-; MIPS32R5EB-NEXT:    sw $1, 28($sp)
-; MIPS32R5EB-NEXT:    lbu $1, 166($fp)
-; MIPS32R5EB-NEXT:    sw $1, 20($sp)
-; MIPS32R5EB-NEXT:    ld.d $w1, 16($sp)
-; MIPS32R5EB-NEXT:    addv.d $w0, $w0, $w1
-; MIPS32R5EB-NEXT:    lbu $1, 171($fp)
-; MIPS32R5EB-NEXT:    sw $1, 44($sp)
-; MIPS32R5EB-NEXT:    lbu $1, 170($fp)
-; MIPS32R5EB-NEXT:    sw $1, 36($sp)
-; MIPS32R5EB-NEXT:    ld.d $w1, 32($sp)
-; MIPS32R5EB-NEXT:    addv.d $w0, $w0, $w1
-; MIPS32R5EB-NEXT:    shf.w $w0, $w0, 177
-; MIPS32R5EB-NEXT:    copy_s.w $1, $w0[1]
-; MIPS32R5EB-NEXT:    copy_s.w $2, $w0[3]
-; MIPS32R5EB-NEXT:    sb $2, 117($sp)
-; MIPS32R5EB-NEXT:    sb $1, 116($sp)
-; MIPS32R5EB-NEXT:    lhu $2, 116($sp)
+; MIPS32R5EB-NEXT:    sh $5, 16($sp)
+; MIPS32R5EB-NEXT:    sh $4, 0($sp)
+; MIPS32R5EB-NEXT:    ld.b $w0, 16($sp)
+; MIPS32R5EB-NEXT:    ld.b $w1, 0($sp)
+; MIPS32R5EB-NEXT:    addv.b $w0, $w1, $w0
+; MIPS32R5EB-NEXT:    sh $6, 32($sp)
+; MIPS32R5EB-NEXT:    ld.b $w1, 32($sp)
+; MIPS32R5EB-NEXT:    addv.b $w0, $w0, $w1
+; MIPS32R5EB-NEXT:    sh $7, 48($sp)
+; MIPS32R5EB-NEXT:    ld.b $w1, 48($sp)
+; MIPS32R5EB-NEXT:    addv.b $w0, $w0, $w1
+; MIPS32R5EB-NEXT:    lhu $1, 146($fp)
+; MIPS32R5EB-NEXT:    sh $1, 64($sp)
+; MIPS32R5EB-NEXT:    ld.b $w1, 64($sp)
+; MIPS32R5EB-NEXT:    addv.b $w0, $w0, $w1
+; MIPS32R5EB-NEXT:    lhu $1, 150($fp)
+; MIPS32R5EB-NEXT:    sh $1, 80($sp)
+; MIPS32R5EB-NEXT:    ld.b $w1, 80($sp)
+; MIPS32R5EB-NEXT:    addv.b $w0, $w0, $w1
+; MIPS32R5EB-NEXT:    lhu $1, 154($fp)
+; MIPS32R5EB-NEXT:    sh $1, 96($sp)
+; MIPS32R5EB-NEXT:    ld.b $w1, 96($sp)
+; MIPS32R5EB-NEXT:    addv.b $w0, $w0, $w1
+; MIPS32R5EB-NEXT:    shf.b $w0, $w0, 177
+; MIPS32R5EB-NEXT:    copy_u.h $2, $w0[0]
 ; MIPS32R5EB-NEXT:    move $sp, $fp
-; MIPS32R5EB-NEXT:    lw $fp, 136($sp) # 4-byte Folded Reload
-; MIPS32R5EB-NEXT:    lw $ra, 140($sp) # 4-byte Folded Reload
-; MIPS32R5EB-NEXT:    addiu $sp, $sp, 144
+; MIPS32R5EB-NEXT:    lw $fp, 120($sp) # 4-byte Folded Reload
+; MIPS32R5EB-NEXT:    lw $ra, 124($sp) # 4-byte Folded Reload
+; MIPS32R5EB-NEXT:    addiu $sp, $sp, 128
 ; MIPS32R5EB-NEXT:    jr $ra
 ; MIPS32R5EB-NEXT:    nop
 ;
-; MIPS64R5-LABEL: i8x2_7:
-; MIPS64R5:       # %bb.0: # %entry
-; MIPS64R5-NEXT:    daddiu $sp, $sp, -32
-; MIPS64R5-NEXT:    .cfi_def_cfa_offset 32
-; MIPS64R5-NEXT:    sh $5, 24($sp)
-; MIPS64R5-NEXT:    sh $4, 28($sp)
-; MIPS64R5-NEXT:    lb $1, 25($sp)
-; MIPS64R5-NEXT:    lb $2, 24($sp)
-; MIPS64R5-NEXT:    insert.d $w0[0], $2
-; MIPS64R5-NEXT:    insert.d $w0[1], $1
-; MIPS64R5-NEXT:    lb $1, 29($sp)
-; MIPS64R5-NEXT:    lb $2, 28($sp)
-; MIPS64R5-NEXT:    insert.d $w1[0], $2
-; MIPS64R5-NEXT:    insert.d $w1[1], $1
-; MIPS64R5-NEXT:    addv.d $w0, $w1, $w0
-; MIPS64R5-NEXT:    sh $6, 20($sp)
-; MIPS64R5-NEXT:    lb $1, 21($sp)
-; MIPS64R5-NEXT:    lb $2, 20($sp)
-; MIPS64R5-NEXT:    insert.d $w1[0], $2
-; MIPS64R5-NEXT:    insert.d $w1[1], $1
-; MIPS64R5-NEXT:    addv.d $w0, $w0, $w1
-; MIPS64R5-NEXT:    sh $7, 16($sp)
-; MIPS64R5-NEXT:    lb $1, 17($sp)
-; MIPS64R5-NEXT:    lb $2, 16($sp)
-; MIPS64R5-NEXT:    insert.d $w1[0], $2
-; MIPS64R5-NEXT:    insert.d $w1[1], $1
-; MIPS64R5-NEXT:    addv.d $w0, $w0, $w1
-; MIPS64R5-NEXT:    sh $8, 12($sp)
-; MIPS64R5-NEXT:    lb $1, 13($sp)
-; MIPS64R5-NEXT:    lb $2, 12($sp)
-; MIPS64R5-NEXT:    insert.d $w1[0], $2
-; MIPS64R5-NEXT:    insert.d $w1[1], $1
-; MIPS64R5-NEXT:    addv.d $w0, $w0, $w1
-; MIPS64R5-NEXT:    sh $9, 8($sp)
-; MIPS64R5-NEXT:    lb $1, 9($sp)
-; MIPS64R5-NEXT:    lb $2, 8($sp)
-; MIPS64R5-NEXT:    insert.d $w1[0], $2
-; MIPS64R5-NEXT:    insert.d $w1[1], $1
-; MIPS64R5-NEXT:    addv.d $w0, $w0, $w1
-; MIPS64R5-NEXT:    sh $10, 4($sp)
-; MIPS64R5-NEXT:    lb $1, 5($sp)
-; MIPS64R5-NEXT:    lb $2, 4($sp)
-; MIPS64R5-NEXT:    insert.d $w1[0], $2
-; MIPS64R5-NEXT:    insert.d $w1[1], $1
-; MIPS64R5-NEXT:    addv.d $w0, $w0, $w1
-; MIPS64R5-NEXT:    copy_s.d $1, $w0[0]
-; MIPS64R5-NEXT:    copy_s.d $2, $w0[1]
-; MIPS64R5-NEXT:    sb $2, 1($sp)
-; MIPS64R5-NEXT:    sb $1, 0($sp)
-; MIPS64R5-NEXT:    lh $2, 0($sp)
-; MIPS64R5-NEXT:    daddiu $sp, $sp, 32
-; MIPS64R5-NEXT:    jr $ra
-; MIPS64R5-NEXT:    nop
+; MIPS64R5EB-LABEL: i8x2_7:
+; MIPS64R5EB:       # %bb.0: # %entry
+; MIPS64R5EB-NEXT:    daddiu $sp, $sp, -128
+; MIPS64R5EB-NEXT:    .cfi_def_cfa_offset 128
+; MIPS64R5EB-NEXT:    sh $5, 16($sp)
+; MIPS64R5EB-NEXT:    sh $4, 0($sp)
+; MIPS64R5EB-NEXT:    ld.b $w0, 16($sp)
+; MIPS64R5EB-NEXT:    ld.b $w1, 0($sp)
+; MIPS64R5EB-NEXT:    addv.b $w0, $w1, $w0
+; MIPS64R5EB-NEXT:    sh $6, 32($sp)
+; MIPS64R5EB-NEXT:    ld.b $w1, 32($sp)
+; MIPS64R5EB-NEXT:    addv.b $w0, $w0, $w1
+; MIPS64R5EB-NEXT:    sh $7, 48($sp)
+; MIPS64R5EB-NEXT:    ld.b $w1, 48($sp)
+; MIPS64R5EB-NEXT:    addv.b $w0, $w0, $w1
+; MIPS64R5EB-NEXT:    sh $8, 64($sp)
+; MIPS64R5EB-NEXT:    ld.b $w1, 64($sp)
+; MIPS64R5EB-NEXT:    addv.b $w0, $w0, $w1
+; MIPS64R5EB-NEXT:    sh $9, 80($sp)
+; MIPS64R5EB-NEXT:    ld.b $w1, 80($sp)
+; MIPS64R5EB-NEXT:    addv.b $w0, $w0, $w1
+; MIPS64R5EB-NEXT:    sh $10, 96($sp)
+; MIPS64R5EB-NEXT:    ld.b $w1, 96($sp)
+; MIPS64R5EB-NEXT:    addv.b $w0, $w0, $w1
+; MIPS64R5EB-NEXT:    shf.b $w0, $w0, 177
+; MIPS64R5EB-NEXT:    copy_s.h $1, $w0[0]
+; MIPS64R5EB-NEXT:    sh $1, 124($sp)
+; MIPS64R5EB-NEXT:    lh $2, 124($sp)
+; MIPS64R5EB-NEXT:    daddiu $sp, $sp, 128
+; MIPS64R5EB-NEXT:    jr $ra
+; MIPS64R5EB-NEXT:    nop
+;
+; MIPS64R5EL-LABEL: i8x2_7:
+; MIPS64R5EL:       # %bb.0: # %entry
+; MIPS64R5EL-NEXT:    daddiu $sp, $sp, -128
+; MIPS64R5EL-NEXT:    .cfi_def_cfa_offset 128
+; MIPS64R5EL-NEXT:    sh $5, 16($sp)
+; MIPS64R5EL-NEXT:    sh $4, 0($sp)
+; MIPS64R5EL-NEXT:    ld.b $w0, 16($sp)
+; MIPS64R5EL-NEXT:    ld.b $w1, 0($sp)
+; MIPS64R5EL-NEXT:    addv.b $w0, $w1, $w0
+; MIPS64R5EL-NEXT:    sh $6, 32($sp)
+; MIPS64R5EL-NEXT:    ld.b $w1, 32($sp)
+; MIPS64R5EL-NEXT:    addv.b $w0, $w0, $w1
+; MIPS64R5EL-NEXT:    sh $7, 48($sp)
+; MIPS64R5EL-NEXT:    ld.b $w1, 48($sp)
+; MIPS64R5EL-NEXT:    addv.b $w0, $w0, $w1
+; MIPS64R5EL-NEXT:    sh $8, 64($sp)
+; MIPS64R5EL-NEXT:    ld.b $w1, 64($sp)
+; MIPS64R5EL-NEXT:    addv.b $w0, $w0, $w1
+; MIPS64R5EL-NEXT:    sh $9, 80($sp)
+; MIPS64R5EL-NEXT:    ld.b $w1, 80($sp)
+; MIPS64R5EL-NEXT:    addv.b $w0, $w0, $w1
+; MIPS64R5EL-NEXT:    sh $10, 96($sp)
+; MIPS64R5EL-NEXT:    ld.b $w1, 96($sp)
+; MIPS64R5EL-NEXT:    addv.b $w0, $w0, $w1
+; MIPS64R5EL-NEXT:    copy_s.h $1, $w0[0]
+; MIPS64R5EL-NEXT:    sh $1, 124($sp)
+; MIPS64R5EL-NEXT:    lh $2, 124($sp)
+; MIPS64R5EL-NEXT:    daddiu $sp, $sp, 128
+; MIPS64R5EL-NEXT:    jr $ra
+; MIPS64R5EL-NEXT:    nop
 ;
 ; MIPS32EL-LABEL: i8x2_7:
 ; MIPS32EL:       # %bb.0: # %entry
@@ -387,70 +354,44 @@ define <2 x i8> @i8x2_7(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d, <2 x
 ;
 ; MIPS32R5EL-LABEL: i8x2_7:
 ; MIPS32R5EL:       # %bb.0: # %entry
-; MIPS32R5EL-NEXT:    addiu $sp, $sp, -144
-; MIPS32R5EL-NEXT:    .cfi_def_cfa_offset 144
-; MIPS32R5EL-NEXT:    sw $ra, 140($sp) # 4-byte Folded Spill
-; MIPS32R5EL-NEXT:    sw $fp, 136($sp) # 4-byte Folded Spill
+; MIPS32R5EL-NEXT:    addiu $sp, $sp, -128
+; MIPS32R5EL-NEXT:    .cfi_def_cfa_offset 128
+; MIPS32R5EL-NEXT:    sw $ra, 124($sp) # 4-byte Folded Spill
+; MIPS32R5EL-NEXT:    sw $fp, 120($sp) # 4-byte Folded Spill
 ; MIPS32R5EL-NEXT:    .cfi_offset 31, -4
 ; MIPS32R5EL-NEXT:    .cfi_offset 30, -8
 ; MIPS32R5EL-NEXT:    move $fp, $sp
 ; MIPS32R5EL-NEXT:    .cfi_def_cfa_register 30
 ; MIPS32R5EL-NEXT:    addiu $1, $zero, -16
 ; MIPS32R5EL-NEXT:    and $sp, $sp, $1
-; MIPS32R5EL-NEXT:    sh $5, 128($sp)
-; MIPS32R5EL-NEXT:    sh $4, 132($sp)
-; MIPS32R5EL-NEXT:    lbu $1, 129($sp)
-; MIPS32R5EL-NEXT:    sw $1, 72($sp)
-; MIPS32R5EL-NEXT:    lbu $1, 128($sp)
+; MIPS32R5EL-NEXT:    sh $5, 16($sp)
+; MIPS32R5EL-NEXT:    sh $4, 0($sp)
+; MIPS32R5EL-NEXT:    ld.b $w0, 16($sp)
+; MIPS32R5EL-NEXT:    ld.b $w1, 0($sp)
+; MIPS32R5EL-NEXT:    addv.b $w0, $w1, $w0
+; MIPS32R5EL-NEXT:    sh $6, 32($sp)
+; MIPS32R5EL-NEXT:    ld.b $w1, 32($sp)
+; MIPS32R5EL-NEXT:    addv.b $w0, $w0, $w1
+; MIPS32R5EL-NEXT:    sh $7, 48($sp)
+; MIPS32R5EL-NEXT:    ld.b $w1, 48($sp)
+; MIPS32R5EL-NEXT:    addv.b $w0, $w0, $w1
+; MIPS32R5EL-NEXT:    lw $1, 144($fp)
 ; MIPS32R5EL-NEXT:    sw $1, 64($sp)
-; MIPS32R5EL-NEXT:    lbu $1, 133($sp)
-; MIPS32R5EL-NEXT:    sw $1, 56($sp)
-; MIPS32R5EL-NEXT:    lbu $1, 132($sp)
-; MIPS32R5EL-NEXT:    sw $1, 48($sp)
-; MIPS32R5EL-NEXT:    ld.d $w0, 64($sp)
-; MIPS32R5EL-NEXT:    ld.d $w1, 48($sp)
-; MIPS32R5EL-NEXT:    addv.d $w0, $w1, $w0
-; MIPS32R5EL-NEXT:    sh $6, 124($sp)
-; MIPS32R5EL-NEXT:    lbu $1, 125($sp)
-; MIPS32R5EL-NEXT:    sw $1, 88($sp)
-; MIPS32R5EL-NEXT:    lbu $1, 124($sp)
+; MIPS32R5EL-NEXT:    ld.b $w1, 64($sp)
+; MIPS32R5EL-NEXT:    addv.b $w0, $w0, $w1
+; MIPS32R5EL-NEXT:    lw $1, 148($fp)
 ; MIPS32R5EL-NEXT:    sw $1, 80($sp)
-; MIPS32R5EL-NEXT:    ld.d $w1, 80($sp)
-; MIPS32R5EL-NEXT:    addv.d $w0, $w0, $w1
-; MIPS32R5EL-NEXT:    sh $7, 120($sp)
-; MIPS32R5EL-NEXT:    lbu $1, 121($sp)
-; MIPS32R5EL-NEXT:    sw $1, 104($sp)
-; MIPS32R5EL-NEXT:    lbu $1, 120($sp)
+; MIPS32R5EL-NEXT:    ld.b $w1, 80($sp)
+; MIPS32R5EL-NEXT:    addv.b $w0, $w0, $w1
+; MIPS32R5EL-NEXT:    lw $1, 152($fp)
 ; MIPS32R5EL-NEXT:    sw $1, 96($sp)
-; MIPS32R5EL-NEXT:    ld.d $w1, 96($sp)
-; MIPS32R5EL-NEXT:    addv.d $w0, $w0, $w1
-; MIPS32R5EL-NEXT:    lbu $1, 161($fp)
-; MIPS32R5EL-NEXT:    sw $1, 8($sp)
-; MIPS32R5EL-NEXT:    lbu $1, 160($fp)
-; MIPS32R5EL-NEXT:    sw $1, 0($sp)
-; MIPS32R5EL-NEXT:    ld.d $w1, 0($sp)
-; MIPS32R5EL-NEXT:    addv.d $w0, $w0, $w1
-; MIPS32R5EL-NEXT:    lbu $1, 165($fp)
-; MIPS32R5EL-NEXT:    sw $1, 24($sp)
-; MIPS32R5EL-NEXT:    lbu $1, 164($fp)
-; MIPS32R5EL-NEXT:    sw $1, 16($sp)
-; MIPS32R5EL-NEXT:    ld.d $w1, 16($sp)
-; MIPS32R5EL-NEXT:    addv.d $w0, $w0, $w1
-; MIPS32R5EL-NEXT:    lbu $1, 169($fp)
-; MIPS32R5EL-NEXT:    sw $1, 40($sp)
-; MIPS32R5EL-NEXT:    lbu $1, 168($fp)
-; MIPS32R5EL-NEXT:    sw $1, 32($sp)
-; MIPS32R5EL-NEXT:    ld.d $w1, 32($sp)
-; MIPS32R5EL-NEXT:    addv.d $w0, $w0, $w1
-; MIPS32R5EL-NEXT:    copy_s.w $1, $w0[0]
-; MIPS32R5EL-NEXT:    copy_s.w $2, $w0[2]
-; MIPS32R5EL-NEXT:    sb $2, 117($sp)
-; MIPS32R5EL-NEXT:    sb $1, 116($sp)
-; MIPS32R5EL-NEXT:    lhu $2, 116($sp)
+; MIPS32R5EL-NEXT:    ld.b $w1, 96($sp)
+; MIPS32R5EL-NEXT:    addv.b $w0, $w0, $w1
+; MIPS32R5EL-NEXT:    copy_u.h $2, $w0[0]
 ; MIPS32R5EL-NEXT:    move $sp, $fp
-; MIPS32R5EL-NEXT:    lw $fp, 136($sp) # 4-byte Folded Reload
-; MIPS32R5EL-NEXT:    lw $ra, 140($sp) # 4-byte Folded Reload
-; MIPS32R5EL-NEXT:    addiu $sp, $sp, 144
+; MIPS32R5EL-NEXT:    lw $fp, 120($sp) # 4-byte Folded Reload
+; MIPS32R5EL-NEXT:    lw $ra, 124($sp) # 4-byte Folded Reload
+; MIPS32R5EL-NEXT:    addiu $sp, $sp, 128
 ; MIPS32R5EL-NEXT:    jr $ra
 ; MIPS32R5EL-NEXT:    nop
 entry:
@@ -514,77 +455,64 @@ define <4 x i8> @i8_4(<4 x i8> %a, <4 x i8> %b) {
 ; MIPS64-NEXT:    jr $ra
 ; MIPS64-NEXT:    nop
 ;
-; MIPS32R5-LABEL: i8_4:
-; MIPS32R5:       # %bb.0:
-; MIPS32R5-NEXT:    addiu $sp, $sp, -16
-; MIPS32R5-NEXT:    .cfi_def_cfa_offset 16
-; MIPS32R5-NEXT:    sw $5, 8($sp)
-; MIPS32R5-NEXT:    sw $4, 12($sp)
-; MIPS32R5-NEXT:    lbu $1, 9($sp)
-; MIPS32R5-NEXT:    lbu $2, 8($sp)
-; MIPS32R5-NEXT:    insert.w $w0[0], $2
-; MIPS32R5-NEXT:    insert.w $w0[1], $1
-; MIPS32R5-NEXT:    lbu $1, 10($sp)
-; MIPS32R5-NEXT:    insert.w $w0[2], $1
-; MIPS32R5-NEXT:    lbu $1, 11($sp)
-; MIPS32R5-NEXT:    insert.w $w0[3], $1
-; MIPS32R5-NEXT:    lbu $1, 13($sp)
-; MIPS32R5-NEXT:    lbu $2, 12($sp)
-; MIPS32R5-NEXT:    insert.w $w1[0], $2
-; MIPS32R5-NEXT:    insert.w $w1[1], $1
-; MIPS32R5-NEXT:    lbu $1, 14($sp)
-; MIPS32R5-NEXT:    insert.w $w1[2], $1
-; MIPS32R5-NEXT:    lbu $1, 15($sp)
-; MIPS32R5-NEXT:    insert.w $w1[3], $1
-; MIPS32R5-NEXT:    addv.w $w0, $w1, $w0
-; MIPS32R5-NEXT:    copy_s.w $1, $w0[0]
-; MIPS32R5-NEXT:    copy_s.w $2, $w0[1]
-; MIPS32R5-NEXT:    copy_s.w $3, $w0[2]
-; MIPS32R5-NEXT:    copy_s.w $4, $w0[3]
-; MIPS32R5-NEXT:    sb $4, 7($sp)
-; MIPS32R5-NEXT:    sb $3, 6($sp)
-; MIPS32R5-NEXT:    sb $2, 5($sp)
-; MIPS32R5-NEXT:    sb $1, 4($sp)
-; MIPS32R5-NEXT:    lw $2, 4($sp)
-; MIPS32R5-NEXT:    addiu $sp, $sp, 16
-; MIPS32R5-NEXT:    jr $ra
-; MIPS32R5-NEXT:    nop
+; MIPS32R5EB-LABEL: i8_4:
+; MIPS32R5EB:       # %bb.0:
+; MIPS32R5EB-NEXT:    addiu $sp, $sp, -48
+; MIPS32R5EB-NEXT:    .cfi_def_cfa_offset 48
+; MIPS32R5EB-NEXT:    sw $ra, 44($sp) # 4-byte Folded Spill
+; MIPS32R5EB-NEXT:    sw $fp, 40($sp) # 4-byte Folded Spill
+; MIPS32R5EB-NEXT:    .cfi_offset 31, -4
+; MIPS32R5EB-NEXT:    .cfi_offset 30, -8
+; MIPS32R5EB-NEXT:    move $fp, $sp
+; MIPS32R5EB-NEXT:    .cfi_def_cfa_register 30
+; MIPS32R5EB-NEXT:    addiu $1, $zero, -16
+; MIPS32R5EB-NEXT:    and $sp, $sp, $1
+; MIPS32R5EB-NEXT:    sw $5, 16($sp)
+; MIPS32R5EB-NEXT:    sw $4, 0($sp)
+; MIPS32R5EB-NEXT:    ld.b $w0, 16($sp)
+; MIPS32R5EB-NEXT:    ld.b $w1, 0($sp)
+; MIPS32R5EB-NEXT:    addv.b $w0, $w1, $w0
+; MIPS32R5EB-NEXT:    shf.b $w0, $w0, 27
+; MIPS32R5EB-NEXT:    copy_s.w $2, $w0[0]
+; MIPS32R5EB-NEXT:    move $sp, $fp
+; MIPS32R5EB-NEXT:    lw $fp, 40($sp) # 4-byte Folded Reload
+; MIPS32R5EB-NEXT:    lw $ra, 44($sp) # 4-byte Folded Reload
+; MIPS32R5EB-NEXT:    addiu $sp, $sp, 48
+; MIPS32R5EB-NEXT:    jr $ra
+; MIPS32R5EB-NEXT:    nop
 ;
-; MIPS64R5-LABEL: i8_4:
-; MIPS64R5:       # %bb.0:
-; MIPS64R5-NEXT:    daddiu $sp, $sp, -16
-; MIPS64R5-NEXT:    .cfi_def_cfa_offset 16
-; MIPS64R5-NEXT:    sw $5, 8($sp)
-; MIPS64R5-NEXT:    sw $4, 12($sp)
-; MIPS64R5-NEXT:    lbu $1, 9($sp)
-; MIPS64R5-NEXT:    lbu $2, 8($sp)
-; MIPS64R5-NEXT:    insert.w $w0[0], $2
-; MIPS64R5-NEXT:    insert.w $w0[1], $1
-; MIPS64R5-NEXT:    lbu $1, 10($sp)
-; MIPS64R5-NEXT:    insert.w $w0[2], $1
-; MIPS64R5-NEXT:    lbu $1, 11($sp)
-; MIPS64R5-NEXT:    insert.w $w0[3], $1
-; MIPS64R5-NEXT:    lbu $1, 13($sp)
-; MIPS64R5-NEXT:    lbu $2, 12($sp)
-; MIPS64R5-NEXT:    insert.w $w1[0], $2
-; MIPS64R5-NEXT:    insert.w $w1[1], $1
-; MIPS64R5-NEXT:    lbu $1, 14($sp)
-; MIPS64R5-NEXT:    insert.w $w1[2], $1
-; MIPS64R5-NEXT:    lbu $1, 15($sp)
-; MIPS64R5-NEXT:    insert.w $w1[3], $1
-; MIPS64R5-NEXT:    addv.w $w0, $w1, $w0
-; MIPS64R5-NEXT:    copy_s.w $1, $w0[0]
-; MIPS64R5-NEXT:    copy_s.w $2, $w0[1]
-; MIPS64R5-NEXT:    copy_s.w $3, $w0[2]
-; MIPS64R5-NEXT:    copy_s.w $4, $w0[3]
-; MIPS64R5-NEXT:    sb $4, 7($sp)
-; MIPS64R5-NEXT:    sb $3, 6($sp)
-; MIPS64R5-NEXT:    sb $2, 5($sp)
-; MIPS64R5-NEXT:    sb $1, 4($sp)
-; MIPS64R5-NEXT:    lw $2, 4($sp)
-; MIPS64R5-NEXT:    daddiu $sp, $sp, 16
-; MIPS64R5-NEXT:    jr $ra
-; MIPS64R5-NEXT:    nop
+; MIPS64R5EB-LABEL: i8_4:
+; MIPS64R5EB:       # %bb.0:
+; MIPS64R5EB-NEXT:    daddiu $sp, $sp, -32
+; MIPS64R5EB-NEXT:    .cfi_def_cfa_offset 32
+; MIPS64R5EB-NEXT:    sll $1, $5, 0
+; MIPS64R5EB-NEXT:    sw $1, 16($sp)
+; MIPS64R5EB-NEXT:    sll $1, $4, 0
+; MIPS64R5EB-NEXT:    sw $1, 0($sp)
+; MIPS64R5EB-NEXT:    ld.b $w0, 16($sp)
+; MIPS64R5EB-NEXT:    ld.b $w1, 0($sp)
+; MIPS64R5EB-NEXT:    addv.b $w0, $w1, $w0
+; MIPS64R5EB-NEXT:    shf.b $w0, $w0, 27
+; MIPS64R5EB-NEXT:    copy_s.w $2, $w0[0]
+; MIPS64R5EB-NEXT:    daddiu $sp, $sp, 32
+; MIPS64R5EB-NEXT:    jr $ra
+; MIPS64R5EB-NEXT:    nop
+;
+; MIPS64R5EL-LABEL: i8_4:
+; MIPS64R5EL:       # %bb.0:
+; MIPS64R5EL-NEXT:    daddiu $sp, $sp, -32
+; MIPS64R5EL-NEXT:    .cfi_def_cfa_offset 32
+; MIPS64R5EL-NEXT:    sll $1, $5, 0
+; MIPS64R5EL-NEXT:    sw $1, 16($sp)
+; MIPS64R5EL-NEXT:    sll $1, $4, 0
+; MIPS64R5EL-NEXT:    sw $1, 0($sp)
+; MIPS64R5EL-NEXT:    ld.b $w0, 16($sp)
+; MIPS64R5EL-NEXT:    ld.b $w1, 0($sp)
+; MIPS64R5EL-NEXT:    addv.b $w0, $w1, $w0
+; MIPS64R5EL-NEXT:    copy_s.w $2, $w0[0]
+; MIPS64R5EL-NEXT:    daddiu $sp, $sp, 32
+; MIPS64R5EL-NEXT:    jr $ra
+; MIPS64R5EL-NEXT:    nop
   %1 = add <4 x i8> %a, %b
   ret <4 x i8> %1
 }
@@ -704,66 +632,16 @@ define <8 x i8> @i8_8(<8 x i8> %a, <8 x i8> %b) {
 ; MIPS32R5EB-NEXT:    .cfi_def_cfa_register 30
 ; MIPS32R5EB-NEXT:    addiu $1, $zero, -16
 ; MIPS32R5EB-NEXT:    and $sp, $sp, $1
-; MIPS32R5EB-NEXT:    sw $6, 24($sp)
-; MIPS32R5EB-NEXT:    lbu $1, 25($sp)
-; MIPS32R5EB-NEXT:    lbu $2, 24($sp)
-; MIPS32R5EB-NEXT:    sw $7, 28($sp)
-; MIPS32R5EB-NEXT:    insert.h $w0[0], $2
-; MIPS32R5EB-NEXT:    insert.h $w0[1], $1
-; MIPS32R5EB-NEXT:    lbu $1, 26($sp)
-; MIPS32R5EB-NEXT:    sw $4, 32($sp)
-; MIPS32R5EB-NEXT:    insert.h $w0[2], $1
-; MIPS32R5EB-NEXT:    lbu $1, 27($sp)
-; MIPS32R5EB-NEXT:    insert.h $w0[3], $1
-; MIPS32R5EB-NEXT:    lbu $1, 28($sp)
-; MIPS32R5EB-NEXT:    sw $5, 36($sp)
-; MIPS32R5EB-NEXT:    insert.h $w0[4], $1
-; MIPS32R5EB-NEXT:    lbu $1, 33($sp)
-; MIPS32R5EB-NEXT:    lbu $2, 32($sp)
-; MIPS32R5EB-NEXT:    insert.h $w1[0], $2
-; MIPS32R5EB-NEXT:    insert.h $w1[1], $1
-; MIPS32R5EB-NEXT:    lbu $1, 29($sp)
-; MIPS32R5EB-NEXT:    lbu $2, 34($sp)
-; MIPS32R5EB-NEXT:    insert.h $w1[2], $2
-; MIPS32R5EB-NEXT:    insert.h $w0[5], $1
-; MIPS32R5EB-NEXT:    lbu $1, 35($sp)
-; MIPS32R5EB-NEXT:    lbu $2, 31($sp)
-; MIPS32R5EB-NEXT:    lbu $3, 30($sp)
-; MIPS32R5EB-NEXT:    lbu $4, 39($sp)
-; MIPS32R5EB-NEXT:    insert.h $w0[6], $3
-; MIPS32R5EB-NEXT:    insert.h $w0[7], $2
-; MIPS32R5EB-NEXT:    insert.h $w1[3], $1
-; MIPS32R5EB-NEXT:    lbu $1, 36($sp)
-; MIPS32R5EB-NEXT:    insert.h $w1[4], $1
-; MIPS32R5EB-NEXT:    lbu $1, 37($sp)
-; MIPS32R5EB-NEXT:    insert.h $w1[5], $1
-; MIPS32R5EB-NEXT:    lbu $1, 38($sp)
-; MIPS32R5EB-NEXT:    insert.h $w1[6], $1
-; MIPS32R5EB-NEXT:    insert.h $w1[7], $4
-; MIPS32R5EB-NEXT:    addv.h $w0, $w1, $w0
-; MIPS32R5EB-NEXT:    copy_s.h $1, $w0[0]
-; MIPS32R5EB-NEXT:    copy_s.h $2, $w0[1]
-; MIPS32R5EB-NEXT:    copy_s.h $3, $w0[2]
-; MIPS32R5EB-NEXT:    copy_s.h $4, $w0[3]
-; MIPS32R5EB-NEXT:    copy_s.h $5, $w0[4]
-; MIPS32R5EB-NEXT:    copy_s.h $6, $w0[5]
-; MIPS32R5EB-NEXT:    copy_s.h $7, $w0[6]
-; MIPS32R5EB-NEXT:    copy_s.h $8, $w0[7]
-; MIPS32R5EB-NEXT:    sb $8, 23($sp)
-; MIPS32R5EB-NEXT:    sb $7, 22($sp)
-; MIPS32R5EB-NEXT:    sb $6, 21($sp)
-; MIPS32R5EB-NEXT:    sb $5, 20($sp)
-; MIPS32R5EB-NEXT:    sb $4, 19($sp)
-; MIPS32R5EB-NEXT:    sb $3, 18($sp)
-; MIPS32R5EB-NEXT:    sb $2, 17($sp)
-; MIPS32R5EB-NEXT:    sb $1, 16($sp)
-; MIPS32R5EB-NEXT:    lw $1, 20($sp)
-; MIPS32R5EB-NEXT:    sw $1, 12($sp)
-; MIPS32R5EB-NEXT:    lw $1, 16($sp)
-; MIPS32R5EB-NEXT:    sw $1, 4($sp)
-; MIPS32R5EB-NEXT:    ld.w $w0, 0($sp)
-; MIPS32R5EB-NEXT:    copy_s.w $2, $w0[1]
-; MIPS32R5EB-NEXT:    copy_s.w $3, $w0[3]
+; MIPS32R5EB-NEXT:    sw $7, 20($sp)
+; MIPS32R5EB-NEXT:    sw $6, 16($sp)
+; MIPS32R5EB-NEXT:    sw $5, 4($sp)
+; MIPS32R5EB-NEXT:    sw $4, 0($sp)
+; MIPS32R5EB-NEXT:    ld.b $w0, 16($sp)
+; MIPS32R5EB-NEXT:    ld.b $w1, 0($sp)
+; MIPS32R5EB-NEXT:    addv.b $w0, $w1, $w0
+; MIPS32R5EB-NEXT:    shf.b $w0, $w0, 27
+; MIPS32R5EB-NEXT:    copy_s.w $2, $w0[0]
+; MIPS32R5EB-NEXT:    copy_s.w $3, $w0[1]
 ; MIPS32R5EB-NEXT:    move $sp, $fp
 ; MIPS32R5EB-NEXT:    lw $fp, 40($sp) # 4-byte Folded Reload
 ; MIPS32R5EB-NEXT:    lw $ra, 44($sp) # 4-byte Folded Reload
@@ -771,65 +649,35 @@ define <8 x i8> @i8_8(<8 x i8> %a, <8 x i8> %b) {
 ; MIPS32R5EB-NEXT:    jr $ra
 ; MIPS32R5EB-NEXT:    nop
 ;
-; MIPS64R5-LABEL: i8_8:
-; MIPS64R5:       # %bb.0:
-; MIPS64R5-NEXT:    daddiu $sp, $sp, -32
-; MIPS64R5-NEXT:    .cfi_def_cfa_offset 32
-; MIPS64R5-NEXT:    sd $5, 16($sp)
-; MIPS64R5-NEXT:    lbu $1, 17($sp)
-; MIPS64R5-NEXT:    lbu $2, 16($sp)
-; MIPS64R5-NEXT:    sd $4, 24($sp)
-; MIPS64R5-NEXT:    insert.h $w0[0], $2
-; MIPS64R5-NEXT:    insert.h $w0[1], $1
-; MIPS64R5-NEXT:    lbu $1, 18($sp)
-; MIPS64R5-NEXT:    insert.h $w0[2], $1
-; MIPS64R5-NEXT:    lbu $1, 19($sp)
-; MIPS64R5-NEXT:    insert.h $w0[3], $1
-; MIPS64R5-NEXT:    lbu $1, 20($sp)
-; MIPS64R5-NEXT:    insert.h $w0[4], $1
-; MIPS64R5-NEXT:    lbu $1, 25($sp)
-; MIPS64R5-NEXT:    lbu $2, 24($sp)
-; MIPS64R5-NEXT:    insert.h $w1[0], $2
-; MIPS64R5-NEXT:    insert.h $w1[1], $1
-; MIPS64R5-NEXT:    lbu $1, 21($sp)
-; MIPS64R5-NEXT:    lbu $2, 26($sp)
-; MIPS64R5-NEXT:    insert.h $w1[2], $2
-; MIPS64R5-NEXT:    insert.h $w0[5], $1
-; MIPS64R5-NEXT:    lbu $1, 27($sp)
-; MIPS64R5-NEXT:    lbu $2, 23($sp)
-; MIPS64R5-NEXT:    lbu $3, 22($sp)
-; MIPS64R5-NEXT:    lbu $4, 31($sp)
-; MIPS64R5-NEXT:    insert.h $w0[6], $3
-; MIPS64R5-NEXT:    insert.h $w0[7], $2
-; MIPS64R5-NEXT:    insert.h $w1[3], $1
-; MIPS64R5-NEXT:    lbu $1, 28($sp)
-; MIPS64R5-NEXT:    insert.h $w1[4], $1
-; MIPS64R5-NEXT:    lbu $1, 29($sp)
-; MIPS64R5-NEXT:    insert.h $w1[5], $1
-; MIPS64R5-NEXT:    lbu $1, 30($sp)
-; MIPS64R5-NEXT:    insert.h $w1[6], $1
-; MIPS64R5-NEXT:    insert.h $w1[7], $4
-; MIPS64R5-NEXT:    addv.h $w0, $w1, $w0
-; MIPS64R5-NEXT:    copy_s.h $1, $w0[0]
-; MIPS64R5-NEXT:    copy_s.h $2, $w0[1]
-; MIPS64R5-NEXT:    copy_s.h $3, $w0[2]
-; MIPS64R5-NEXT:    copy_s.h $4, $w0[3]
-; MIPS64R5-NEXT:    copy_s.h $5, $w0[4]
-; MIPS64R5-NEXT:    copy_s.h $6, $w0[5]
-; MIPS64R5-NEXT:    copy_s.h $7, $w0[6]
-; MIPS64R5-NEXT:    copy_s.h $8, $w0[7]
-; MIPS64R5-NEXT:    sb $8, 15($sp)
-; MIPS64R5-NEXT:    sb $7, 14($sp)
-; MIPS64R5-NEXT:    sb $6, 13($sp)
-; MIPS64R5-NEXT:    sb $5, 12($sp)
-; MIPS64R5-NEXT:    sb $4, 11($sp)
-; MIPS64R5-NEXT:    sb $3, 10($sp)
-; MIPS64R5-NEXT:    sb $2, 9($sp)
-; MIPS64R5-NEXT:    sb $1, 8($sp)
-; MIPS64R5-NEXT:    ld $2, 8($sp)
-; MIPS64R5-NEXT:    daddiu $sp, $sp, 32
-; MIPS64R5-NEXT:    jr $ra
-; MIPS64R5-NEXT:    nop
+; MIPS64R5EB-LABEL: i8_8:
+; MIPS64R5EB:       # %bb.0:
+; MIPS64R5EB-NEXT:    daddiu $sp, $sp, -32
+; MIPS64R5EB-NEXT:    .cfi_def_cfa_offset 32
+; MIPS64R5EB-NEXT:    sd $5, 16($sp)
+; MIPS64R5EB-NEXT:    sd $4, 0($sp)
+; MIPS64R5EB-NEXT:    ld.b $w0, 16($sp)
+; MIPS64R5EB-NEXT:    ld.b $w1, 0($sp)
+; MIPS64R5EB-NEXT:    addv.b $w0, $w1, $w0
+; MIPS64R5EB-NEXT:    shf.b $w0, $w0, 27
+; MIPS64R5EB-NEXT:    shf.w $w0, $w0, 177
+; MIPS64R5EB-NEXT:    copy_s.d $2, $w0[0]
+; MIPS64R5EB-NEXT:    daddiu $sp, $sp, 32
+; MIPS64R5EB-NEXT:    jr $ra
+; MIPS64R5EB-NEXT:    nop
+;
+; MIPS64R5EL-LABEL: i8_8:
+; MIPS64R5EL:       # %bb.0:
+; MIPS64R5EL-NEXT:    daddiu $sp, $sp, -32
+; MIPS64R5EL-NEXT:    .cfi_def_cfa_offset 32
+; MIPS64R5EL-NEXT:    sd $5, 16($sp)
+; MIPS64R5EL-NEXT:    sd $4, 0($sp)
+; MIPS64R5EL-NEXT:    ld.b $w0, 16($sp)
+; MIPS64R5EL-NEXT:    ld.b $w1, 0($sp)
+; MIPS64R5EL-NEXT:    addv.b $w0, $w1, $w0
+; MIPS64R5EL-NEXT:    copy_s.d $2, $w0[0]
+; MIPS64R5EL-NEXT:    daddiu $sp, $sp, 32
+; MIPS64R5EL-NEXT:    jr $ra
+; MIPS64R5EL-NEXT:    nop
 ;
 ; MIPS32R5EL-LABEL: i8_8:
 ; MIPS32R5EL:       # %bb.0:
@@ -843,66 +691,15 @@ define <8 x i8> @i8_8(<8 x i8> %a, <8 x i8> %b) {
 ; MIPS32R5EL-NEXT:    .cfi_def_cfa_register 30
 ; MIPS32R5EL-NEXT:    addiu $1, $zero, -16
 ; MIPS32R5EL-NEXT:    and $sp, $sp, $1
-; MIPS32R5EL-NEXT:    sw $6, 24($sp)
-; MIPS32R5EL-NEXT:    lbu $1, 25($sp)
-; MIPS32R5EL-NEXT:    lbu $2, 24($sp)
-; MIPS32R5EL-NEXT:    sw $7, 28($sp)
-; MIPS32R5EL-NEXT:    insert.h $w0[0], $2
-; MIPS32R5EL-NEXT:    insert.h $w0[1], $1
-; MIPS32R5EL-NEXT:    lbu $1, 26($sp)
-; MIPS32R5EL-NEXT:    sw $4, 32($sp)
-; MIPS32R5EL-NEXT:    insert.h $w0[2], $1
-; MIPS32R5EL-NEXT:    lbu $1, 27($sp)
-; MIPS32R5EL-NEXT:    insert.h $w0[3], $1
-; MIPS32R5EL-NEXT:    lbu $1, 28($sp)
-; MIPS32R5EL-NEXT:    sw $5, 36($sp)
-; MIPS32R5EL-NEXT:    insert.h $w0[4], $1
-; MIPS32R5EL-NEXT:    lbu $1, 33($sp)
-; MIPS32R5EL-NEXT:    lbu $2, 32($sp)
-; MIPS32R5EL-NEXT:    insert.h $w1[0], $2
-; MIPS32R5EL-NEXT:    insert.h $w1[1], $1
-; MIPS32R5EL-NEXT:    lbu $1, 29($sp)
-; MIPS32R5EL-NEXT:    lbu $2, 34($sp)
-; MIPS32R5EL-NEXT:    insert.h $w1[2], $2
-; MIPS32R5EL-NEXT:    insert.h $w0[5], $1
-; MIPS32R5EL-NEXT:    lbu $1, 35($sp)
-; MIPS32R5EL-NEXT:    lbu $2, 31($sp)
-; MIPS32R5EL-NEXT:    lbu $3, 30($sp)
-; MIPS32R5EL-NEXT:    lbu $4, 39($sp)
-; MIPS32R5EL-NEXT:    insert.h $w0[6], $3
-; MIPS32R5EL-NEXT:    insert.h $w0[7], $2
-; MIPS32R5EL-NEXT:    insert.h $w1[3], $1
-; MIPS32R5EL-NEXT:    lbu $1, 36($sp)
-; MIPS32R5EL-NEXT:    insert.h $w1[4], $1
-; MIPS32R5EL-NEXT:    lbu $1, 37($sp)
-; MIPS32R5EL-NEXT:    insert.h $w1[5], $1
-; MIPS32R5EL-NEXT:    lbu $1, 38($sp)
-; MIPS32R5EL-NEXT:    insert.h $w1[6], $1
-; MIPS32R5EL-NEXT:    insert.h $w1[7], $4
-; MIPS32R5EL-NEXT:    addv.h $w0, $w1, $w0
-; MIPS32R5EL-NEXT:    copy_s.h $1, $w0[0]
-; MIPS32R5EL-NEXT:    copy_s.h $2, $w0[1]
-; MIPS32R5EL-NEXT:    copy_s.h $3, $w0[2]
-; MIPS32R5EL-NEXT:    copy_s.h $4, $w0[3]
-; MIPS32R5EL-NEXT:    copy_s.h $5, $w0[4]
-; MIPS32R5EL-NEXT:    copy_s.h $6, $w0[5]
-; MIPS32R5EL-NEXT:    copy_s.h $7, $w0[6]
-; MIPS32R5EL-NEXT:    copy_s.h $8, $w0[7]
-; MIPS32R5EL-NEXT:    sb $8, 23($sp)
-; MIPS32R5EL-NEXT:    sb $7, 22($sp)
-; MIPS32R5EL-NEXT:    sb $6, 21($sp)
-; MIPS32R5EL-NEXT:    sb $5, 20($sp)
-; MIPS32R5EL-NEXT:    sb $4, 19($sp)
-; MIPS32R5EL-NEXT:    sb $3, 18($sp)
-; MIPS32R5EL-NEXT:    sb $2, 17($sp)
-; MIPS32R5EL-NEXT:    sb $1, 16($sp)
-; MIPS32R5EL-NEXT:    lw $1, 20($sp)
-; MIPS32R5EL-NEXT:    sw $1, 8($sp)
-; MIPS32R5EL-NEXT:    lw $1, 16($sp)
-; MIPS32R5EL-NEXT:    sw $1, 0($sp)
-; MIPS32R5EL-NEXT:    ld.w $w0, 0($sp)
+; MIPS32R5EL-NEXT:    sw $7, 20($sp)
+; MIPS32R5EL-NEXT:    sw $6, 16($sp)
+; MIPS32R5EL-NEXT:    sw $5, 4($sp)
+; MIPS32R5EL-NEXT:    sw $4, 0($sp)
+; MIPS32R5EL-NEXT:    ld.b $w0, 16($sp)
+; MIPS32R5EL-NEXT:    ld.b $w1, 0($sp)
+; MIPS32R5EL-NEXT:    addv.b $w0, $w1, $w0
 ; MIPS32R5EL-NEXT:    copy_s.w $2, $w0[0]
-; MIPS32R5EL-NEXT:    copy_s.w $3, $w0[2]
+; MIPS32R5EL-NEXT:    copy_s.w $3, $w0[1]
 ; MIPS32R5EL-NEXT:    move $sp, $fp
 ; MIPS32R5EL-NEXT:    lw $fp, 40($sp) # 4-byte Folded Reload
 ; MIPS32R5EL-NEXT:    lw $ra, 44($sp) # 4-byte Folded Reload
@@ -1221,102 +1018,86 @@ define <2 x i16> @i16_2(<2 x i16> %a, <2 x i16> %b) {
 ;
 ; MIPS32R5EB-LABEL: i16_2:
 ; MIPS32R5EB:       # %bb.0:
-; MIPS32R5EB-NEXT:    addiu $sp, $sp, -64
-; MIPS32R5EB-NEXT:    .cfi_def_cfa_offset 64
-; MIPS32R5EB-NEXT:    sw $ra, 60($sp) # 4-byte Folded Spill
-; MIPS32R5EB-NEXT:    sw $fp, 56($sp) # 4-byte Folded Spill
+; MIPS32R5EB-NEXT:    addiu $sp, $sp, -48
+; MIPS32R5EB-NEXT:    .cfi_def_cfa_offset 48
+; MIPS32R5EB-NEXT:    sw $ra, 44($sp) # 4-byte Folded Spill
+; MIPS32R5EB-NEXT:    sw $fp, 40($sp) # 4-byte Folded Spill
 ; MIPS32R5EB-NEXT:    .cfi_offset 31, -4
 ; MIPS32R5EB-NEXT:    .cfi_offset 30, -8
 ; MIPS32R5EB-NEXT:    move $fp, $sp
 ; MIPS32R5EB-NEXT:    .cfi_def_cfa_register 30
 ; MIPS32R5EB-NEXT:    addiu $1, $zero, -16
 ; MIPS32R5EB-NEXT:    and $sp, $sp, $1
-; MIPS32R5EB-NEXT:    sw $5, 48($sp)
-; MIPS32R5EB-NEXT:    sw $4, 52($sp)
-; MIPS32R5EB-NEXT:    lhu $1, 50($sp)
-; MIPS32R5EB-NEXT:    sw $1, 28($sp)
-; MIPS32R5EB-NEXT:    lhu $1, 48($sp)
-; MIPS32R5EB-NEXT:    sw $1, 20($sp)
-; MIPS32R5EB-NEXT:    lhu $1, 54($sp)
-; MIPS32R5EB-NEXT:    sw $1, 12($sp)
-; MIPS32R5EB-NEXT:    lhu $1, 52($sp)
-; MIPS32R5EB-NEXT:    sw $1, 4($sp)
-; MIPS32R5EB-NEXT:    ld.d $w0, 16($sp)
-; MIPS32R5EB-NEXT:    ld.d $w1, 0($sp)
-; MIPS32R5EB-NEXT:    addv.d $w0, $w1, $w0
-; MIPS32R5EB-NEXT:    shf.w $w0, $w0, 177
-; MIPS32R5EB-NEXT:    copy_s.w $1, $w0[1]
-; MIPS32R5EB-NEXT:    copy_s.w $2, $w0[3]
-; MIPS32R5EB-NEXT:    sh $2, 46($sp)
-; MIPS32R5EB-NEXT:    sh $1, 44($sp)
-; MIPS32R5EB-NEXT:    lw $2, 44($sp)
+; MIPS32R5EB-NEXT:    sw $5, 16($sp)
+; MIPS32R5EB-NEXT:    sw $4, 0($sp)
+; MIPS32R5EB-NEXT:    ld.h $w0, 16($sp)
+; MIPS32R5EB-NEXT:    ld.h $w1, 0($sp)
+; MIPS32R5EB-NEXT:    addv.h $w0, $w1, $w0
+; MIPS32R5EB-NEXT:    shf.h $w0, $w0, 177
+; MIPS32R5EB-NEXT:    copy_s.w $2, $w0[0]
 ; MIPS32R5EB-NEXT:    move $sp, $fp
-; MIPS32R5EB-NEXT:    lw $fp, 56($sp) # 4-byte Folded Reload
-; MIPS32R5EB-NEXT:    lw $ra, 60($sp) # 4-byte Folded Reload
-; MIPS32R5EB-NEXT:    addiu $sp, $sp, 64
+; MIPS32R5EB-NEXT:    lw $fp, 40($sp) # 4-byte Folded Reload
+; MIPS32R5EB-NEXT:    lw $ra, 44($sp) # 4-byte Folded Reload
+; MIPS32R5EB-NEXT:    addiu $sp, $sp, 48
 ; MIPS32R5EB-NEXT:    jr $ra
 ; MIPS32R5EB-NEXT:    nop
 ;
-; MIPS64R5-LABEL: i16_2:
-; MIPS64R5:       # %bb.0:
-; MIPS64R5-NEXT:    daddiu $sp, $sp, -16
-; MIPS64R5-NEXT:    .cfi_def_cfa_offset 16
-; MIPS64R5-NEXT:    sw $5, 8($sp)
-; MIPS64R5-NEXT:    sw $4, 12($sp)
-; MIPS64R5-NEXT:    lh $1, 10($sp)
-; MIPS64R5-NEXT:    lh $2, 8($sp)
-; MIPS64R5-NEXT:    insert.d $w0[0], $2
-; MIPS64R5-NEXT:    insert.d $w0[1], $1
-; MIPS64R5-NEXT:    lh $1, 14($sp)
-; MIPS64R5-NEXT:    lh $2, 12($sp)
-; MIPS64R5-NEXT:    insert.d $w1[0], $2
-; MIPS64R5-NEXT:    insert.d $w1[1], $1
-; MIPS64R5-NEXT:    addv.d $w0, $w1, $w0
-; MIPS64R5-NEXT:    copy_s.d $1, $w0[0]
-; MIPS64R5-NEXT:    copy_s.d $2, $w0[1]
-; MIPS64R5-NEXT:    sh $2, 6($sp)
-; MIPS64R5-NEXT:    sh $1, 4($sp)
-; MIPS64R5-NEXT:    lw $2, 4($sp)
-; MIPS64R5-NEXT:    daddiu $sp, $sp, 16
-; MIPS64R5-NEXT:    jr $ra
-; MIPS64R5-NEXT:    nop
-;
 ; MIPS32R5EL-LABEL: i16_2:
 ; MIPS32R5EL:       # %bb.0:
-; MIPS32R5EL-NEXT:    addiu $sp, $sp, -64
-; MIPS32R5EL-NEXT:    .cfi_def_cfa_offset 64
-; MIPS32R5EL-NEXT:    sw $ra, 60($sp) # 4-byte Folded Spill
-; MIPS32R5EL-NEXT:    sw $fp, 56($sp) # 4-byte Folded Spill
+; MIPS32R5EL-NEXT:    addiu $sp, $sp, -48
+; MIPS32R5EL-NEXT:    .cfi_def_cfa_offset 48
+; MIPS32R5EL-NEXT:    sw $ra, 44($sp) # 4-byte Folded Spill
+; MIPS32R5EL-NEXT:    sw $fp, 40($sp) # 4-byte Folded Spill
 ; MIPS32R5EL-NEXT:    .cfi_offset 31, -4
 ; MIPS32R5EL-NEXT:    .cfi_offset 30, -8
 ; MIPS32R5EL-NEXT:    move $fp, $sp
 ; MIPS32R5EL-NEXT:    .cfi_def_cfa_register 30
 ; MIPS32R5EL-NEXT:    addiu $1, $zero, -16
 ; MIPS32R5EL-NEXT:    and $sp, $sp, $1
-; MIPS32R5EL-NEXT:    sw $5, 48($sp)
-; MIPS32R5EL-NEXT:    sw $4, 52($sp)
-; MIPS32R5EL-NEXT:    lhu $1, 50($sp)
-; MIPS32R5EL-NEXT:    sw $1, 24($sp)
-; MIPS32R5EL-NEXT:    lhu $1, 48($sp)
-; MIPS32R5EL-NEXT:    sw $1, 16($sp)
-; MIPS32R5EL-NEXT:    lhu $1, 54($sp)
-; MIPS32R5EL-NEXT:    sw $1, 8($sp)
-; MIPS32R5EL-NEXT:    lhu $1, 52($sp)
-; MIPS32R5EL-NEXT:    sw $1, 0($sp)
-; MIPS32R5EL-NEXT:    ld.d $w0, 16($sp)
-; MIPS32R5EL-NEXT:    ld.d $w1, 0($sp)
-; MIPS32R5EL-NEXT:    addv.d $w0, $w1, $w0
-; MIPS32R5EL-NEXT:    copy_s.w $1, $w0[0]
-; MIPS32R5EL-NEXT:    copy_s.w $2, $w0[2]
-; MIPS32R5EL-NEXT:    sh $2, 46($sp)
-; MIPS32R5EL-NEXT:    sh $1, 44($sp)
-; MIPS32R5EL-NEXT:    lw $2, 44($sp)
+; MIPS32R5EL-NEXT:    sw $5, 16($sp)
+; MIPS32R5EL-NEXT:    sw $4, 0($sp)
+; MIPS32R5EL-NEXT:    ld.h $w0, 16($sp)
+; MIPS32R5EL-NEXT:    ld.h $w1, 0($sp)
+; MIPS32R5EL-NEXT:    addv.h $w0, $w1, $w0
+; MIPS32R5EL-NEXT:    copy_s.w $2, $w0[0]
 ; MIPS32R5EL-NEXT:    move $sp, $fp
-; MIPS32R5EL-NEXT:    lw $fp, 56($sp) # 4-byte Folded Reload
-; MIPS32R5EL-NEXT:    lw $ra, 60($sp) # 4-byte Folded Reload
-; MIPS32R5EL-NEXT:    addiu $sp, $sp, 64
+; MIPS32R5EL-NEXT:    lw $fp, 40($sp) # 4-byte Folded Reload
+; MIPS32R5EL-NEXT:    lw $ra, 44($sp) # 4-byte Folded Reload
+; MIPS32R5EL-NEXT:    addiu $sp, $sp, 48
 ; MIPS32R5EL-NEXT:    jr $ra
 ; MIPS32R5EL-NEXT:    nop
+;
+; MIPS64R5EB-LABEL: i16_2:
+; MIPS64R5EB:       # %bb.0:
+; MIPS64R5EB-NEXT:    daddiu $sp, $sp, -32
+; MIPS64R5EB-NEXT:    .cfi_def_cfa_offset 32
+; MIPS64R5EB-NEXT:    sll $1, $5, 0
+; MIPS64R5EB-NEXT:    sw $1, 16($sp)
+; MIPS64R5EB-NEXT:    sll $1, $4, 0
+; MIPS64R5EB-NEXT:    sw $1, 0($sp)
+; MIPS64R5EB-NEXT:    ld.h $w0, 16($sp)
+; MIPS64R5EB-NEXT:    ld.h $w1, 0($sp)
+; MIPS64R5EB-NEXT:    addv.h $w0, $w1, $w0
+; MIPS64R5EB-NEXT:    shf.h $w0, $w0, 177
+; MIPS64R5EB-NEXT:    copy_s.w $2, $w0[0]
+; MIPS64R5EB-NEXT:    daddiu $sp, $sp, 32
+; MIPS64R5EB-NEXT:    jr $ra
+; MIPS64R5EB-NEXT:    nop
+;
+; MIPS64R5EL-LABEL: i16_2:
+; MIPS64R5EL:       # %bb.0:
+; MIPS64R5EL-NEXT:    daddiu $sp, $sp, -32
+; MIPS64R5EL-NEXT:    .cfi_def_cfa_offset 32
+; MIPS64R5EL-NEXT:    sll $1, $5, 0
+; MIPS64R5EL-NEXT:    sw $1, 16($sp)
+; MIPS64R5EL-NEXT:    sll $1, $4, 0
+; MIPS64R5EL-NEXT:    sw $1, 0($sp)
+; MIPS64R5EL-NEXT:    ld.h $w0, 16($sp)
+; MIPS64R5EL-NEXT:    ld.h $w1, 0($sp)
+; MIPS64R5EL-NEXT:    addv.h $w0, $w1, $w0
+; MIPS64R5EL-NEXT:    copy_s.w $2, $w0[0]
+; MIPS64R5EL-NEXT:    daddiu $sp, $sp, 32
+; MIPS64R5EL-NEXT:    jr $ra
   %1 = add <2 x i16> %a, %b
   ret <2 x i16> %1
 }
@@ -1384,84 +1165,50 @@ define <4 x i16> @i16_4(<4 x i16> %a, <4 x i16> %b) {
 ; MIPS32R5EB-NEXT:    .cfi_def_cfa_register 30
 ; MIPS32R5EB-NEXT:    addiu $1, $zero, -16
 ; MIPS32R5EB-NEXT:    and $sp, $sp, $1
-; MIPS32R5EB-NEXT:    sw $6, 24($sp)
-; MIPS32R5EB-NEXT:    sw $7, 28($sp)
-; MIPS32R5EB-NEXT:    lhu $1, 26($sp)
-; MIPS32R5EB-NEXT:    lhu $2, 24($sp)
-; MIPS32R5EB-NEXT:    sw $4, 32($sp)
-; MIPS32R5EB-NEXT:    insert.w $w0[0], $2
-; MIPS32R5EB-NEXT:    insert.w $w0[1], $1
-; MIPS32R5EB-NEXT:    lhu $1, 28($sp)
-; MIPS32R5EB-NEXT:    sw $5, 36($sp)
-; MIPS32R5EB-NEXT:    insert.w $w0[2], $1
-; MIPS32R5EB-NEXT:    lhu $1, 30($sp)
-; MIPS32R5EB-NEXT:    insert.w $w0[3], $1
-; MIPS32R5EB-NEXT:    lhu $1, 34($sp)
-; MIPS32R5EB-NEXT:    lhu $2, 32($sp)
-; MIPS32R5EB-NEXT:    insert.w $w1[0], $2
-; MIPS32R5EB-NEXT:    insert.w $w1[1], $1
-; MIPS32R5EB-NEXT:    lhu $1, 36($sp)
-; MIPS32R5EB-NEXT:    insert.w $w1[2], $1
-; MIPS32R5EB-NEXT:    lhu $1, 38($sp)
-; MIPS32R5EB-NEXT:    insert.w $w1[3], $1
-; MIPS32R5EB-NEXT:    addv.w $w0, $w1, $w0
-; MIPS32R5EB-NEXT:    copy_s.w $1, $w0[0]
-; MIPS32R5EB-NEXT:    copy_s.w $2, $w0[1]
-; MIPS32R5EB-NEXT:    copy_s.w $3, $w0[2]
-; MIPS32R5EB-NEXT:    copy_s.w $4, $w0[3]
-; MIPS32R5EB-NEXT:    sh $4, 22($sp)
-; MIPS32R5EB-NEXT:    sh $3, 20($sp)
-; MIPS32R5EB-NEXT:    sh $2, 18($sp)
-; MIPS32R5EB-NEXT:    sh $1, 16($sp)
-; MIPS32R5EB-NEXT:    lw $1, 20($sp)
-; MIPS32R5EB-NEXT:    sw $1, 12($sp)
-; MIPS32R5EB-NEXT:    lw $1, 16($sp)
-; MIPS32R5EB-NEXT:    sw $1, 4($sp)
-; MIPS32R5EB-NEXT:    ld.w $w0, 0($sp)
-; MIPS32R5EB-NEXT:    copy_s.w $2, $w0[1]
-; MIPS32R5EB-NEXT:    copy_s.w $3, $w0[3]
+; MIPS32R5EB-NEXT:    sw $7, 20($sp)
+; MIPS32R5EB-NEXT:    sw $6, 16($sp)
+; MIPS32R5EB-NEXT:    sw $5, 4($sp)
+; MIPS32R5EB-NEXT:    sw $4, 0($sp)
+; MIPS32R5EB-NEXT:    ld.h $w0, 16($sp)
+; MIPS32R5EB-NEXT:    ld.h $w1, 0($sp)
+; MIPS32R5EB-NEXT:    addv.h $w0, $w1, $w0
+; MIPS32R5EB-NEXT:    shf.h $w0, $w0, 177
+; MIPS32R5EB-NEXT:    copy_s.w $2, $w0[0]
+; MIPS32R5EB-NEXT:    copy_s.w $3, $w0[1]
 ; MIPS32R5EB-NEXT:    move $sp, $fp
 ; MIPS32R5EB-NEXT:    lw $fp, 40($sp) # 4-byte Folded Reload
 ; MIPS32R5EB-NEXT:    lw $ra, 44($sp) # 4-byte Folded Reload
 ; MIPS32R5EB-NEXT:    addiu $sp, $sp, 48
 ; MIPS32R5EB-NEXT:    jr $ra
-; MIPS32R5EB-NEXT:    nop
 ;
-; MIPS64R5-LABEL: i16_4:
-; MIPS64R5:       # %bb.0:
-; MIPS64R5-NEXT:    daddiu $sp, $sp, -32
-; MIPS64R5-NEXT:    .cfi_def_cfa_offset 32
-; MIPS64R5-NEXT:    sd $5, 16($sp)
-; MIPS64R5-NEXT:    sd $4, 24($sp)
-; MIPS64R5-NEXT:    lhu $1, 18($sp)
-; MIPS64R5-NEXT:    lhu $2, 16($sp)
-; MIPS64R5-NEXT:    insert.w $w0[0], $2
-; MIPS64R5-NEXT:    insert.w $w0[1], $1
-; MIPS64R5-NEXT:    lhu $1, 20($sp)
-; MIPS64R5-NEXT:    insert.w $w0[2], $1
-; MIPS64R5-NEXT:    lhu $1, 22($sp)
-; MIPS64R5-NEXT:    insert.w $w0[3], $1
-; MIPS64R5-NEXT:    lhu $1, 26($sp)
-; MIPS64R5-NEXT:    lhu $2, 24($sp)
-; MIPS64R5-NEXT:    insert.w $w1[0], $2
-; MIPS64R5-NEXT:    insert.w $w1[1], $1
-; MIPS64R5-NEXT:    lhu $1, 28($sp)
-; MIPS64R5-NEXT:    insert.w $w1[2], $1
-; MIPS64R5-NEXT:    lhu $1, 30($sp)
-; MIPS64R5-NEXT:    insert.w $w1[3], $1
-; MIPS64R5-NEXT:    addv.w $w0, $w1, $w0
-; MIPS64R5-NEXT:    copy_s.w $1, $w0[0]
-; MIPS64R5-NEXT:    copy_s.w $2, $w0[1]
-; MIPS64R5-NEXT:    copy_s.w $3, $w0[2]
-; MIPS64R5-NEXT:    copy_s.w $4, $w0[3]
-; MIPS64R5-NEXT:    sh $4, 14($sp)
-; MIPS64R5-NEXT:    sh $3, 12($sp)
-; MIPS64R5-NEXT:    sh $2, 10($sp)
-; MIPS64R5-NEXT:    sh $1, 8($sp)
-; MIPS64R5-NEXT:    ld $2, 8($sp)
-; MIPS64R5-NEXT:    daddiu $sp, $sp, 32
-; MIPS64R5-NEXT:    jr $ra
-; MIPS64R5-NEXT:    nop
+; MIPS64R5EB-LABEL: i16_4:
+; MIPS64R5EB:       # %bb.0:
+; MIPS64R5EB-NEXT:    daddiu $sp, $sp, -32
+; MIPS64R5EB-NEXT:    .cfi_def_cfa_offset 32
+; MIPS64R5EB-NEXT:    sd $5, 16($sp)
+; MIPS64R5EB-NEXT:    sd $4, 0($sp)
+; MIPS64R5EB-NEXT:    ld.h $w0, 16($sp)
+; MIPS64R5EB-NEXT:    ld.h $w1, 0($sp)
+; MIPS64R5EB-NEXT:    addv.h $w0, $w1, $w0
+; MIPS64R5EB-NEXT:    shf.h $w0, $w0, 27
+; MIPS64R5EB-NEXT:    copy_s.d $2, $w0[0]
+; MIPS64R5EB-NEXT:    daddiu $sp, $sp, 32
+; MIPS64R5EB-NEXT:    jr $ra
+; MIPS64R5EB-NEXT:    nop
+;
+; MIPS64R5EL-LABEL: i16_4:
+; MIPS64R5EL:       # %bb.0:
+; MIPS64R5EL-NEXT:    daddiu $sp, $sp, -32
+; MIPS64R5EL-NEXT:    .cfi_def_cfa_offset 32
+; MIPS64R5EL-NEXT:    sd $5, 16($sp)
+; MIPS64R5EL-NEXT:    sd $4, 0($sp)
+; MIPS64R5EL-NEXT:    ld.h $w0, 16($sp)
+; MIPS64R5EL-NEXT:    ld.h $w1, 0($sp)
+; MIPS64R5EL-NEXT:    addv.h $w0, $w1, $w0
+; MIPS64R5EL-NEXT:    copy_s.d $2, $w0[0]
+; MIPS64R5EL-NEXT:    daddiu $sp, $sp, 32
+; MIPS64R5EL-NEXT:    jr $ra
+; MIPS64R5EL-NEXT:    nop
 ;
 ; MIPS32R5EL-LABEL: i16_4:
 ; MIPS32R5EL:       # %bb.0:
@@ -1475,42 +1222,15 @@ define <4 x i16> @i16_4(<4 x i16> %a, <4 x i16> %b) {
 ; MIPS32R5EL-NEXT:    .cfi_def_cfa_register 30
 ; MIPS32R5EL-NEXT:    addiu $1, $zero, -16
 ; MIPS32R5EL-NEXT:    and $sp, $sp, $1
-; MIPS32R5EL-NEXT:    sw $6, 24($sp)
-; MIPS32R5EL-NEXT:    sw $7, 28($sp)
-; MIPS32R5EL-NEXT:    lhu $1, 26($sp)
-; MIPS32R5EL-NEXT:    lhu $2, 24($sp)
-; MIPS32R5EL-NEXT:    sw $4, 32($sp)
-; MIPS32R5EL-NEXT:    insert.w $w0[0], $2
-; MIPS32R5EL-NEXT:    insert.w $w0[1], $1
-; MIPS32R5EL-NEXT:    lhu $1, 28($sp)
-; MIPS32R5EL-NEXT:    sw $5, 36($sp)
-; MIPS32R5EL-NEXT:    insert.w $w0[2], $1
-; MIPS32R5EL-NEXT:    lhu $1, 30($sp)
-; MIPS32R5EL-NEXT:    insert.w $w0[3], $1
-; MIPS32R5EL-NEXT:    lhu $1, 34($sp)
-; MIPS32R5EL-NEXT:    lhu $2, 32($sp)
-; MIPS32R5EL-NEXT:    insert.w $w1[0], $2
-; MIPS32R5EL-NEXT:    insert.w $w1[1], $1
-; MIPS32R5EL-NEXT:    lhu $1, 36($sp)
-; MIPS32R5EL-NEXT:    insert.w $w1[2], $1
-; MIPS32R5EL-NEXT:    lhu $1, 38($sp)
-; MIPS32R5EL-NEXT:    insert.w $w1[3], $1
-; MIPS32R5EL-NEXT:    addv.w $w0, $w1, $w0
-; MIPS32R5EL-NEXT:    copy_s.w $1, $w0[0]
-; MIPS32R5EL-NEXT:    copy_s.w $2, $w0[1]
-; MIPS32R5EL-NEXT:    copy_s.w $3, $w0[2]
-; MIPS32R5EL-NEXT:    copy_s.w $4, $w0[3]
-; MIPS32R5EL-NEXT:    sh $4, 22($sp)
-; MIPS32R5EL-NEXT:    sh $3, 20($sp)
-; MIPS32R5EL-NEXT:    sh $2, 18($sp)
-; MIPS32R5EL-NEXT:    sh $1, 16($sp)
-; MIPS32R5EL-NEXT:    lw $1, 20($sp)
-; MIPS32R5EL-NEXT:    sw $1, 8($sp)
-; MIPS32R5EL-NEXT:    lw $1, 16($sp)
-; MIPS32R5EL-NEXT:    sw $1, 0($sp)
-; MIPS32R5EL-NEXT:    ld.w $w0, 0($sp)
+; MIPS32R5EL-NEXT:    sw $7, 20($sp)
+; MIPS32R5EL-NEXT:    sw $6, 16($sp)
+; MIPS32R5EL-NEXT:    sw $5, 4($sp)
+; MIPS32R5EL-NEXT:    sw $4, 0($sp)
+; MIPS32R5EL-NEXT:    ld.h $w0, 16($sp)
+; MIPS32R5EL-NEXT:    ld.h $w1, 0($sp)
+; MIPS32R5EL-NEXT:    addv.h $w0, $w1, $w0
 ; MIPS32R5EL-NEXT:    copy_s.w $2, $w0[0]
-; MIPS32R5EL-NEXT:    copy_s.w $3, $w0[2]
+; MIPS32R5EL-NEXT:    copy_s.w $3, $w0[1]
 ; MIPS32R5EL-NEXT:    move $sp, $fp
 ; MIPS32R5EL-NEXT:    lw $fp, 40($sp) # 4-byte Folded Reload
 ; MIPS32R5EL-NEXT:    lw $ra, 44($sp) # 4-byte Folded Reload
@@ -1730,16 +1450,15 @@ define <2 x i32> @i32_2(<2 x i32> %a, <2 x i32> %b) {
 ; MIPS32R5EB-NEXT:    .cfi_def_cfa_register 30
 ; MIPS32R5EB-NEXT:    addiu $1, $zero, -16
 ; MIPS32R5EB-NEXT:    and $sp, $sp, $1
-; MIPS32R5EB-NEXT:    sw $7, 28($sp)
-; MIPS32R5EB-NEXT:    sw $6, 20($sp)
-; MIPS32R5EB-NEXT:    sw $5, 12($sp)
-; MIPS32R5EB-NEXT:    sw $4, 4($sp)
-; MIPS32R5EB-NEXT:    ld.d $w0, 16($sp)
-; MIPS32R5EB-NEXT:    ld.d $w1, 0($sp)
-; MIPS32R5EB-NEXT:    addv.d $w0, $w1, $w0
-; MIPS32R5EB-NEXT:    shf.w $w0, $w0, 177
-; MIPS32R5EB-NEXT:    copy_s.w $2, $w0[1]
-; MIPS32R5EB-NEXT:    copy_s.w $3, $w0[3]
+; MIPS32R5EB-NEXT:    sw $7, 20($sp)
+; MIPS32R5EB-NEXT:    sw $6, 16($sp)
+; MIPS32R5EB-NEXT:    sw $5, 4($sp)
+; MIPS32R5EB-NEXT:    sw $4, 0($sp)
+; MIPS32R5EB-NEXT:    ld.w $w0, 16($sp)
+; MIPS32R5EB-NEXT:    ld.w $w1, 0($sp)
+; MIPS32R5EB-NEXT:    addv.w $w0, $w1, $w0
+; MIPS32R5EB-NEXT:    copy_s.w $2, $w0[0]
+; MIPS32R5EB-NEXT:    copy_s.w $3, $w0[1]
 ; MIPS32R5EB-NEXT:    move $sp, $fp
 ; MIPS32R5EB-NEXT:    lw $fp, 40($sp) # 4-byte Folded Reload
 ; MIPS32R5EB-NEXT:    lw $ra, 44($sp) # 4-byte Folded Reload
@@ -1751,18 +1470,13 @@ define <2 x i32> @i32_2(<2 x i32> %a, <2 x i32> %b) {
 ; MIPS64R5EB:       # %bb.0:
 ; MIPS64R5EB-NEXT:    daddiu $sp, $sp, -32
 ; MIPS64R5EB-NEXT:    .cfi_def_cfa_offset 32
-; MIPS64R5EB-NEXT:    dsrl $1, $5, 32
-; MIPS64R5EB-NEXT:    insert.d $w0[0], $1
-; MIPS64R5EB-NEXT:    insert.d $w0[1], $5
-; MIPS64R5EB-NEXT:    dsrl $1, $4, 32
-; MIPS64R5EB-NEXT:    insert.d $w1[0], $1
-; MIPS64R5EB-NEXT:    insert.d $w1[1], $4
-; MIPS64R5EB-NEXT:    addv.d $w0, $w1, $w0
-; MIPS64R5EB-NEXT:    copy_s.d $1, $w0[0]
-; MIPS64R5EB-NEXT:    copy_s.d $2, $w0[1]
-; MIPS64R5EB-NEXT:    sw $2, 12($sp)
-; MIPS64R5EB-NEXT:    sw $1, 8($sp)
-; MIPS64R5EB-NEXT:    ld $2, 8($sp)
+; MIPS64R5EB-NEXT:    sd $5, 16($sp)
+; MIPS64R5EB-NEXT:    sd $4, 0($sp)
+; MIPS64R5EB-NEXT:    ld.w $w0, 16($sp)
+; MIPS64R5EB-NEXT:    ld.w $w1, 0($sp)
+; MIPS64R5EB-NEXT:    addv.w $w0, $w1, $w0
+; MIPS64R5EB-NEXT:    shf.w $w0, $w0, 177
+; MIPS64R5EB-NEXT:    copy_s.d $2, $w0[0]
 ; MIPS64R5EB-NEXT:    daddiu $sp, $sp, 32
 ; MIPS64R5EB-NEXT:    jr $ra
 ; MIPS64R5EB-NEXT:    nop
@@ -1779,15 +1493,15 @@ define <2 x i32> @i32_2(<2 x i32> %a, <2 x i32> %b) {
 ; MIPS32R5EL-NEXT:    .cfi_def_cfa_register 30
 ; MIPS32R5EL-NEXT:    addiu $1, $zero, -16
 ; MIPS32R5EL-NEXT:    and $sp, $sp, $1
-; MIPS32R5EL-NEXT:    sw $7, 24($sp)
+; MIPS32R5EL-NEXT:    sw $7, 20($sp)
 ; MIPS32R5EL-NEXT:    sw $6, 16($sp)
-; MIPS32R5EL-NEXT:    sw $5, 8($sp)
+; MIPS32R5EL-NEXT:    sw $5, 4($sp)
 ; MIPS32R5EL-NEXT:    sw $4, 0($sp)
-; MIPS32R5EL-NEXT:    ld.d $w0, 16($sp)
-; MIPS32R5EL-NEXT:    ld.d $w1, 0($sp)
-; MIPS32R5EL-NEXT:    addv.d $w0, $w1, $w0
+; MIPS32R5EL-NEXT:    ld.w $w0, 16($sp)
+; MIPS32R5EL-NEXT:    ld.w $w1, 0($sp)
+; MIPS32R5EL-NEXT:    addv.w $w0, $w1, $w0
 ; MIPS32R5EL-NEXT:    copy_s.w $2, $w0[0]
-; MIPS32R5EL-NEXT:    copy_s.w $3, $w0[2]
+; MIPS32R5EL-NEXT:    copy_s.w $3, $w0[1]
 ; MIPS32R5EL-NEXT:    move $sp, $fp
 ; MIPS32R5EL-NEXT:    lw $fp, 40($sp) # 4-byte Folded Reload
 ; MIPS32R5EL-NEXT:    lw $ra, 44($sp) # 4-byte Folded Reload
@@ -1800,19 +1514,11 @@ define <2 x i32> @i32_2(<2 x i32> %a, <2 x i32> %b) {
 ; MIPS64R5EL-NEXT:    daddiu $sp, $sp, -32
 ; MIPS64R5EL-NEXT:    .cfi_def_cfa_offset 32
 ; MIPS64R5EL-NEXT:    sd $5, 16($sp)
-; MIPS64R5EL-NEXT:    sd $4, 24($sp)
-; MIPS64R5EL-NEXT:    lw $1, 20($sp)
-; MIPS64R5EL-NEXT:    insert.d $w0[0], $5
-; MIPS64R5EL-NEXT:    insert.d $w0[1], $1
-; MIPS64R5EL-NEXT:    lw $1, 28($sp)
-; MIPS64R5EL-NEXT:    insert.d $w1[0], $4
-; MIPS64R5EL-NEXT:    insert.d $w1[1], $1
-; MIPS64R5EL-NEXT:    addv.d $w0, $w1, $w0
-; MIPS64R5EL-NEXT:    copy_s.d $1, $w0[0]
-; MIPS64R5EL-NEXT:    copy_s.d $2, $w0[1]
-; MIPS64R5EL-NEXT:    sw $2, 12($sp)
-; MIPS64R5EL-NEXT:    sw $1, 8($sp)
-; MIPS64R5EL-NEXT:    ld $2, 8($sp)
+; MIPS64R5EL-NEXT:    sd $4, 0($sp)
+; MIPS64R5EL-NEXT:    ld.w $w0, 16($sp)
+; MIPS64R5EL-NEXT:    ld.w $w1, 0($sp)
+; MIPS64R5EL-NEXT:    addv.w $w0, $w1, $w0
+; MIPS64R5EL-NEXT:    copy_s.d $2, $w0[0]
 ; MIPS64R5EL-NEXT:    daddiu $sp, $sp, 32
 ; MIPS64R5EL-NEXT:    jr $ra
 ; MIPS64R5EL-NEXT:    nop
@@ -2561,31 +2267,11 @@ define <8 x i8> @ret_8_i8() {
 ;
 ; MIPS32R5EB-LABEL: ret_8_i8:
 ; MIPS32R5EB:       # %bb.0:
-; MIPS32R5EB-NEXT:    addiu $sp, $sp, -32
-; MIPS32R5EB-NEXT:    .cfi_def_cfa_offset 32
-; MIPS32R5EB-NEXT:    sw $ra, 28($sp) # 4-byte Folded Spill
-; MIPS32R5EB-NEXT:    sw $fp, 24($sp) # 4-byte Folded Spill
-; MIPS32R5EB-NEXT:    .cfi_offset 31, -4
-; MIPS32R5EB-NEXT:    .cfi_offset 30, -8
-; MIPS32R5EB-NEXT:    move $fp, $sp
-; MIPS32R5EB-NEXT:    .cfi_def_cfa_register 30
-; MIPS32R5EB-NEXT:    addiu $1, $zero, -16
-; MIPS32R5EB-NEXT:    and $sp, $sp, $1
 ; MIPS32R5EB-NEXT:    lui $1, %hi(gv8i8)
 ; MIPS32R5EB-NEXT:    lw $2, %lo(gv8i8)($1)
-; MIPS32R5EB-NEXT:    sw $2, 4($sp)
 ; MIPS32R5EB-NEXT:    addiu $1, $1, %lo(gv8i8)
-; MIPS32R5EB-NEXT:    lw $1, 4($1)
-; MIPS32R5EB-NEXT:    sw $1, 12($sp)
-; MIPS32R5EB-NEXT:    ld.w $w0, 0($sp)
-; MIPS32R5EB-NEXT:    copy_s.w $2, $w0[1]
-; MIPS32R5EB-NEXT:    copy_s.w $3, $w0[3]
-; MIPS32R5EB-NEXT:    move $sp, $fp
-; MIPS32R5EB-NEXT:    lw $fp, 24($sp) # 4-byte Folded Reload
-; MIPS32R5EB-NEXT:    lw $ra, 28($sp) # 4-byte Folded Reload
-; MIPS32R5EB-NEXT:    addiu $sp, $sp, 32
+; MIPS32R5EB-NEXT:    lw $3, 4($1)
 ; MIPS32R5EB-NEXT:    jr $ra
-; MIPS32R5EB-NEXT:    nop
 ;
 ; MIPS64R5-LABEL: ret_8_i8:
 ; MIPS64R5:       # %bb.0:
@@ -2599,29 +2285,10 @@ define <8 x i8> @ret_8_i8() {
 ;
 ; MIPS32R5EL-LABEL: ret_8_i8:
 ; MIPS32R5EL:       # %bb.0:
-; MIPS32R5EL-NEXT:    addiu $sp, $sp, -32
-; MIPS32R5EL-NEXT:    .cfi_def_cfa_offset 32
-; MIPS32R5EL-NEXT:    sw $ra, 28($sp) # 4-byte Folded Spill
-; MIPS32R5EL-NEXT:    sw $fp, 24($sp) # 4-byte Folded Spill
-; MIPS32R5EL-NEXT:    .cfi_offset 31, -4
-; MIPS32R5EL-NEXT:    .cfi_offset 30, -8
-; MIPS32R5EL-NEXT:    move $fp, $sp
-; MIPS32R5EL-NEXT:    .cfi_def_cfa_register 30
-; MIPS32R5EL-NEXT:    addiu $1, $zero, -16
-; MIPS32R5EL-NEXT:    and $sp, $sp, $1
 ; MIPS32R5EL-NEXT:    lui $1, %hi(gv8i8)
 ; MIPS32R5EL-NEXT:    lw $2, %lo(gv8i8)($1)
-; MIPS32R5EL-NEXT:    sw $2, 0($sp)
 ; MIPS32R5EL-NEXT:    addiu $1, $1, %lo(gv8i8)
-; MIPS32R5EL-NEXT:    lw $1, 4($1)
-; MIPS32R5EL-NEXT:    sw $1, 8($sp)
-; MIPS32R5EL-NEXT:    ld.w $w0, 0($sp)
-; MIPS32R5EL-NEXT:    copy_s.w $2, $w0[0]
-; MIPS32R5EL-NEXT:    copy_s.w $3, $w0[2]
-; MIPS32R5EL-NEXT:    move $sp, $fp
-; MIPS32R5EL-NEXT:    lw $fp, 24($sp) # 4-byte Folded Reload
-; MIPS32R5EL-NEXT:    lw $ra, 28($sp) # 4-byte Folded Reload
-; MIPS32R5EL-NEXT:    addiu $sp, $sp, 32
+; MIPS32R5EL-NEXT:    lw $3, 4($1)
 ; MIPS32R5EL-NEXT:    jr $ra
 ; MIPS32R5EL-NEXT:    nop
   %1 = load <8 x i8>, ptr @gv8i8
@@ -2738,29 +2405,10 @@ define <4 x i16> @ret_4_i16() {
 ;
 ; MIPS32R5EB-LABEL: ret_4_i16:
 ; MIPS32R5EB:       # %bb.0:
-; MIPS32R5EB-NEXT:    addiu $sp, $sp, -32
-; MIPS32R5EB-NEXT:    .cfi_def_cfa_offset 32
-; MIPS32R5EB-NEXT:    sw $ra, 28($sp) # 4-byte Folded Spill
-; MIPS32R5EB-NEXT:    sw $fp, 24($sp) # 4-byte Folded Spill
-; MIPS32R5EB-NEXT:    .cfi_offset 31, -4
-; MIPS32R5EB-NEXT:    .cfi_offset 30, -8
-; MIPS32R5EB-NEXT:    move $fp, $sp
-; MIPS32R5EB-NEXT:    .cfi_def_cfa_register 30
-; MIPS32R5EB-NEXT:    addiu $1, $zero, -16
-; MIPS32R5EB-NEXT:    and $sp, $sp, $1
 ; MIPS32R5EB-NEXT:    lui $1, %hi(gv4i16)
 ; MIPS32R5EB-NEXT:    lw $2, %lo(gv4i16)($1)
-; MIPS32R5EB-NEXT:    sw $2, 4($sp)
 ; MIPS32R5EB-NEXT:    addiu $1, $1, %lo(gv4i16)
-; MIPS32R5EB-NEXT:    lw $1, 4($1)
-; MIPS32R5EB-NEXT:    sw $1, 12($sp)
-; MIPS32R5EB-NEXT:    ld.w $w0, 0($sp)
-; MIPS32R5EB-NEXT:    copy_s.w $2, $w0[1]
-; MIPS32R5EB-NEXT:    copy_s.w $3, $w0[3]
-; MIPS32R5EB-NEXT:    move $sp, $fp
-; MIPS32R5EB-NEXT:    lw $fp, 24($sp) # 4-byte Folded Reload
-; MIPS32R5EB-NEXT:    lw $ra, 28($sp) # 4-byte Folded Reload
-; MIPS32R5EB-NEXT:    addiu $sp, $sp, 32
+; MIPS32R5EB-NEXT:    lw $3, 4($1)
 ; MIPS32R5EB-NEXT:    jr $ra
 ; MIPS32R5EB-NEXT:    nop
 ;
@@ -2776,29 +2424,10 @@ define <4 x i16> @ret_4_i16() {
 ;
 ; MIPS32R5EL-LABEL: ret_4_i16:
 ; MIPS32R5EL:       # %bb.0:
-; MIPS32R5EL-NEXT:    addiu $sp, $sp, -32
-; MIPS32R5EL-NEXT:    .cfi_def_cfa_offset 32
-; MIPS32R5EL-NEXT:    sw $ra, 28($sp) # 4-byte Folded Spill
-; MIPS32R5EL-NEXT:    sw $fp, 24($sp) # 4-byte Folded Spill
-; MIPS32R5EL-NEXT:    .cfi_offset 31, -4
-; MIPS32R5EL-NEXT:    .cfi_offset 30, -8
-; MIPS32R5EL-NEXT:    move $fp, $sp
-; MIPS32R5EL-NEXT:    .cfi_def_cfa_register 30
-; MIPS32R5EL-NEXT:    addiu $1, $zero, -16
-; MIPS32R5EL-NEXT:    and $sp, $sp, $1
 ; MIPS32R5EL-NEXT:    lui $1, %hi(gv4i16)
 ; MIPS32R5EL-NEXT:    lw $2, %lo(gv4i16)($1)
-; MIPS32R5EL-NEXT:    sw $2, 0($sp)
 ; MIPS32R5EL-NEXT:    addiu $1, $1, %lo(gv4i16)
-; MIPS32R5EL-NEXT:    lw $1, 4($1)
-; MIPS32R5EL-NEXT:    sw $1, 8($sp)
-; MIPS32R5EL-NEXT:    ld.w $w0, 0($sp)
-; MIPS32R5EL-NEXT:    copy_s.w $2, $w0[0]
-; MIPS32R5EL-NEXT:    copy_s.w $3, $w0[2]
-; MIPS32R5EL-NEXT:    move $sp, $fp
-; MIPS32R5EL-NEXT:    lw $fp, 24($sp) # 4-byte Folded Reload
-; MIPS32R5EL-NEXT:    lw $ra, 28($sp) # 4-byte Folded Reload
-; MIPS32R5EL-NEXT:    addiu $sp, $sp, 32
+; MIPS32R5EL-NEXT:    lw $3, 4($1)
 ; MIPS32R5EL-NEXT:    jr $ra
 ; MIPS32R5EL-NEXT:    nop
   %1 = load <4 x i16>, ptr @gv4i16
@@ -2877,29 +2506,10 @@ define <2 x i32> @ret_2_i32() {
 ;
 ; MIPS32R5EB-LABEL: ret_2_i32:
 ; MIPS32R5EB:       # %bb.0:
-; MIPS32R5EB-NEXT:    addiu $sp, $sp, -32
-; MIPS32R5EB-NEXT:    .cfi_def_cfa_offset 32
-; MIPS32R5EB-NEXT:    sw $ra, 28($sp) # 4-byte Folded Spill
-; MIPS32R5EB-NEXT:    sw $fp, 24($sp) # 4-byte Folded Spill
-; MIPS32R5EB-NEXT:    .cfi_offset 31, -4
-; MIPS32R5EB-NEXT:    .cfi_offset 30, -8
-; MIPS32R5EB-NEXT:    move $fp, $sp
-; MIPS32R5EB-NEXT:    .cfi_def_cfa_register 30
-; MIPS32R5EB-NEXT:    addiu $1, $zero, -16
-; MIPS32R5EB-NEXT:    and $sp, $sp, $1
 ; MIPS32R5EB-NEXT:    lui $1, %hi(gv2i32)
 ; MIPS32R5EB-NEXT:    lw $2, %lo(gv2i32)($1)
-; MIPS32R5EB-NEXT:    sw $2, 4($sp)
 ; MIPS32R5EB-NEXT:    addiu $1, $1, %lo(gv2i32)
-; MIPS32R5EB-NEXT:    lw $1, 4($1)
-; MIPS32R5EB-NEXT:    sw $1, 12($sp)
-; MIPS32R5EB-NEXT:    ld.w $w0, 0($sp)
-; MIPS32R5EB-NEXT:    copy_s.w $2, $w0[1]
-; MIPS32R5EB-NEXT:    copy_s.w $3, $w0[3]
-; MIPS32R5EB-NEXT:    move $sp, $fp
-; MIPS32R5EB-NEXT:    lw $fp, 24($sp) # 4-byte Folded Reload
-; MIPS32R5EB-NEXT:    lw $ra, 28($sp) # 4-byte Folded Reload
-; MIPS32R5EB-NEXT:    addiu $sp, $sp, 32
+; MIPS32R5EB-NEXT:    lw $3, 4($1)
 ; MIPS32R5EB-NEXT:    jr $ra
 ; MIPS32R5EB-NEXT:    nop
 ;
@@ -2915,29 +2525,10 @@ define <2 x i32> @ret_2_i32() {
 ;
 ; MIPS32R5EL-LABEL: ret_2_i32:
 ; MIPS32R5EL:       # %bb.0:
-; MIPS32R5EL-NEXT:    addiu $sp, $sp, -32
-; MIPS32R5EL-NEXT:    .cfi_def_cfa_offset 32
-; MIPS32R5EL-NEXT:    sw $ra, 28($sp) # 4-byte Folded Spill
-; MIPS32R5EL-NEXT:    sw $fp, 24($sp) # 4-byte Folded Spill
-; MIPS32R5EL-NEXT:    .cfi_offset 31, -4
-; MIPS32R5EL-NEXT:    .cfi_offset 30, -8
-; MIPS32R5EL-NEXT:    move $fp, $sp
-; MIPS32R5EL-NEXT:    .cfi_def_cfa_register 30
-; MIPS32R5EL-NEXT:    addiu $1, $zero, -16
-; MIPS32R5EL-NEXT:    and $sp, $sp, $1
 ; MIPS32R5EL-NEXT:    lui $1, %hi(gv2i32)
 ; MIPS32R5EL-NEXT:    lw $2, %lo(gv2i32)($1)
-; MIPS32R5EL-NEXT:    sw $2, 0($sp)
 ; MIPS32R5EL-NEXT:    addiu $1, $1, %lo(gv2i32)
-; MIPS32R5EL-NEXT:    lw $1, 4($1)
-; MIPS32R5EL-NEXT:    sw $1, 8($sp)
-; MIPS32R5EL-NEXT:    ld.w $w0, 0($sp)
-; MIPS32R5EL-NEXT:    copy_s.w $2, $w0[0]
-; MIPS32R5EL-NEXT:    copy_s.w $3, $w0[2]
-; MIPS32R5EL-NEXT:    move $sp, $fp
-; MIPS32R5EL-NEXT:    lw $fp, 24($sp) # 4-byte Folded Reload
-; MIPS32R5EL-NEXT:    lw $ra, 28($sp) # 4-byte Folded Reload
-; MIPS32R5EL-NEXT:    addiu $sp, $sp, 32
+; MIPS32R5EL-NEXT:    lw $3, 4($1)
 ; MIPS32R5EL-NEXT:    jr $ra
 ; MIPS32R5EL-NEXT:    nop
   %1 = load <2 x i32>, ptr @gv2i32
@@ -3424,9 +3015,9 @@ define void @call_i8_4() {
 ;
 ; MIPS32R5EB-LABEL: call_i8_4:
 ; MIPS32R5EB:       # %bb.0: # %entry
-; MIPS32R5EB-NEXT:    addiu $sp, $sp, -32
-; MIPS32R5EB-NEXT:    .cfi_def_cfa_offset 32
-; MIPS32R5EB-NEXT:    sw $ra, 28($sp) # 4-byte Folded Spill
+; MIPS32R5EB-NEXT:    addiu $sp, $sp, -24
+; MIPS32R5EB-NEXT:    .cfi_def_cfa_offset 24
+; MIPS32R5EB-NEXT:    sw $ra, 20($sp) # 4-byte Folded Spill
 ; MIPS32R5EB-NEXT:    .cfi_offset 31, -4
 ; MIPS32R5EB-NEXT:    lui $1, 1543
 ; MIPS32R5EB-NEXT:    ori $4, $1, 2314
@@ -3436,17 +3027,17 @@ define void @call_i8_4() {
 ; MIPS32R5EB-NEXT:    nop
 ; MIPS32R5EB-NEXT:    lui $1, %hi(gv4i8)
 ; MIPS32R5EB-NEXT:    sw $2, %lo(gv4i8)($1)
-; MIPS32R5EB-NEXT:    lw $ra, 28($sp) # 4-byte Folded Reload
-; MIPS32R5EB-NEXT:    addiu $sp, $sp, 32
+; MIPS32R5EB-NEXT:    lw $ra, 20($sp) # 4-byte Folded Reload
+; MIPS32R5EB-NEXT:    addiu $sp, $sp, 24
 ; MIPS32R5EB-NEXT:    jr $ra
 ; MIPS32R5EB-NEXT:    nop
 ;
 ; MIPS64R5EB-LABEL: call_i8_4:
 ; MIPS64R5EB:       # %bb.0: # %entry
-; MIPS64R5EB-NEXT:    daddiu $sp, $sp, -32
-; MIPS64R5EB-NEXT:    .cfi_def_cfa_offset 32
-; MIPS64R5EB-NEXT:    sd $ra, 24($sp) # 8-byte Folded Spill
-; MIPS64R5EB-NEXT:    sd $gp, 16($sp) # 8-byte Folded Spill
+; MIPS64R5EB-NEXT:    daddiu $sp, $sp, -16
+; MIPS64R5EB-NEXT:    .cfi_def_cfa_offset 16
+; MIPS64R5EB-NEXT:    sd $ra, 8($sp) # 8-byte Folded Spill
+; MIPS64R5EB-NEXT:    sd $gp, 0($sp) # 8-byte Folded Spill
 ; MIPS64R5EB-NEXT:    .cfi_offset 31, -8
 ; MIPS64R5EB-NEXT:    .cfi_offset 28, -16
 ; MIPS64R5EB-NEXT:    lui $1, %hi(%neg(%gp_rel(call_i8_4)))
@@ -3461,9 +3052,9 @@ define void @call_i8_4() {
 ; MIPS64R5EB-NEXT:    nop
 ; MIPS64R5EB-NEXT:    ld $1, %got_disp(gv4i8)($gp)
 ; MIPS64R5EB-NEXT:    sw $2, 0($1)
-; MIPS64R5EB-NEXT:    ld $gp, 16($sp) # 8-byte Folded Reload
-; MIPS64R5EB-NEXT:    ld $ra, 24($sp) # 8-byte Folded Reload
-; MIPS64R5EB-NEXT:    daddiu $sp, $sp, 32
+; MIPS64R5EB-NEXT:    ld $gp, 0($sp) # 8-byte Folded Reload
+; MIPS64R5EB-NEXT:    ld $ra, 8($sp) # 8-byte Folded Reload
+; MIPS64R5EB-NEXT:    daddiu $sp, $sp, 16
 ; MIPS64R5EB-NEXT:    jr $ra
 ; MIPS64R5EB-NEXT:    nop
 ;
@@ -3512,9 +3103,9 @@ define void @call_i8_4() {
 ;
 ; MIPS32R5EL-LABEL: call_i8_4:
 ; MIPS32R5EL:       # %bb.0: # %entry
-; MIPS32R5EL-NEXT:    addiu $sp, $sp, -32
-; MIPS32R5EL-NEXT:    .cfi_def_cfa_offset 32
-; MIPS32R5EL-NEXT:    sw $ra, 28($sp) # 4-byte Folded Spill
+; MIPS32R5EL-NEXT:    addiu $sp, $sp, -24
+; MIPS32R5EL-NEXT:    .cfi_def_cfa_offset 24
+; MIPS32R5EL-NEXT:    sw $ra, 20($sp) # 4-byte Folded Spill
 ; MIPS32R5EL-NEXT:    .cfi_offset 31, -4
 ; MIPS32R5EL-NEXT:    lui $1, 2569
 ; MIPS32R5EL-NEXT:    ori $4, $1, 1798
@@ -3523,17 +3114,17 @@ define void @call_i8_4() {
 ; MIPS32R5EL-NEXT:    nop
 ; MIPS32R5EL-NEXT:    lui $1, %hi(gv4i8)
 ; MIPS32R5EL-NEXT:    sw $2, %lo(gv4i8)($1)
-; MIPS32R5EL-NEXT:    lw $ra, 28($sp) # 4-byte Folded Reload
-; MIPS32R5EL-NEXT:    addiu $sp, $sp, 32
+; MIPS32R5EL-NEXT:    lw $ra, 20($sp) # 4-byte Folded Reload
+; MIPS32R5EL-NEXT:    addiu $sp, $sp, 24
 ; MIPS32R5EL-NEXT:    jr $ra
 ; MIPS32R5EL-NEXT:    nop
 ;
 ; MIPS64R5EL-LABEL: call_i8_4:
 ; MIPS64R5EL:       # %bb.0: # %entry
-; MIPS64R5EL-NEXT:    daddiu $sp, $sp, -32
-; MIPS64R5EL-NEXT:    .cfi_def_cfa_offset 32
-; MIPS64R5EL-NEXT:    sd $ra, 24($sp) # 8-byte Folded Spill
-; MIPS64R5EL-NEXT:    sd $gp, 16($sp) # 8-byte Folded Spill
+; MIPS64R5EL-NEXT:    daddiu $sp, $sp, -16
+; MIPS64R5EL-NEXT:    .cfi_def_cfa_offset 16
+; MIPS64R5EL-NEXT:    sd $ra, 8($sp) # 8-byte Folded Spill
+; MIPS64R5EL-NEXT:    sd $gp, 0($sp) # 8-byte Folded Spill
 ; MIPS64R5EL-NEXT:    .cfi_offset 31, -8
 ; MIPS64R5EL-NEXT:    .cfi_offset 28, -16
 ; MIPS64R5EL-NEXT:    lui $1, %hi(%neg(%gp_rel(call_i8_4)))
@@ -3547,9 +3138,9 @@ define void @call_i8_4() {
 ; MIPS64R5EL-NEXT:    nop
 ; MIPS64R5EL-NEXT:    ld $1, %got_disp(gv4i8)($gp)
 ; MIPS64R5EL-NEXT:    sw $2, 0($1)
-; MIPS64R5EL-NEXT:    ld $gp, 16($sp) # 8-byte Folded Reload
-; MIPS64R5EL-NEXT:    ld $ra, 24($sp) # 8-byte Folded Reload
-; MIPS64R5EL-NEXT:    daddiu $sp, $sp, 32
+; MIPS64R5EL-NEXT:    ld $gp, 0($sp) # 8-byte Folded Reload
+; MIPS64R5EL-NEXT:    ld $ra, 8($sp) # 8-byte Folded Reload
+; MIPS64R5EL-NEXT:    daddiu $sp, $sp, 16
 ; MIPS64R5EL-NEXT:    jr $ra
 ; MIPS64R5EL-NEXT:    nop
 entry:
@@ -3641,10 +3232,10 @@ define void @call_i8_8() {
 ;
 ; MIPS64R5EB-LABEL: call_i8_8:
 ; MIPS64R5EB:       # %bb.0: # %entry
-; MIPS64R5EB-NEXT:    daddiu $sp, $sp, -32
-; MIPS64R5EB-NEXT:    .cfi_def_cfa_offset 32
-; MIPS64R5EB-NEXT:    sd $ra, 24($sp) # 8-byte Folded Spill
-; MIPS64R5EB-NEXT:    sd $gp, 16($sp) # 8-byte Folded Spill
+; MIPS64R5EB-NEXT:    daddiu $sp, $sp, -16
+; MIPS64R5EB-NEXT:    .cfi_def_cfa_offset 16
+; MIPS64R5EB-NEXT:    sd $ra, 8($sp) # 8-byte Folded Spill
+; MIPS64R5EB-NEXT:    sd $gp, 0($sp) # 8-byte Folded Spill
 ; MIPS64R5EB-NEXT:    .cfi_offset 31, -8
 ; MIPS64R5EB-NEXT:    .cfi_offset 28, -16
 ; MIPS64R5EB-NEXT:    lui $1, %hi(%neg(%gp_rel(call_i8_8)))
@@ -3667,9 +3258,9 @@ define void @call_i8_8() {
 ; MIPS64R5EB-NEXT:    nop
 ; MIPS64R5EB-NEXT:    ld $1, %got_disp(gv8i8)($gp)
 ; MIPS64R5EB-NEXT:    sd $2, 0($1)
-; MIPS64R5EB-NEXT:    ld $gp, 16($sp) # 8-byte Folded Reload
-; MIPS64R5EB-NEXT:    ld $ra, 24($sp) # 8-byte Folded Reload
-; MIPS64R5EB-NEXT:    daddiu $sp, $sp, 32
+; MIPS64R5EB-NEXT:    ld $gp, 0($sp) # 8-byte Folded Reload
+; MIPS64R5EB-NEXT:    ld $ra, 8($sp) # 8-byte Folded Reload
+; MIPS64R5EB-NEXT:    daddiu $sp, $sp, 16
 ; MIPS64R5EB-NEXT:    jr $ra
 ; MIPS64R5EB-NEXT:    nop
 ;
@@ -3748,10 +3339,10 @@ define void @call_i8_8() {
 ;
 ; MIPS64R5EL-LABEL: call_i8_8:
 ; MIPS64R5EL:       # %bb.0: # %entry
-; MIPS64R5EL-NEXT:    daddiu $sp, $sp, -32
-; MIPS64R5EL-NEXT:    .cfi_def_cfa_offset 32
-; MIPS64R5EL-NEXT:    sd $ra, 24($sp) # 8-byte Folded Spill
-; MIPS64R5EL-NEXT:    sd $gp, 16($sp) # 8-byte Folded Spill
+; MIPS64R5EL-NEXT:    daddiu $sp, $sp, -16
+; MIPS64R5EL-NEXT:    .cfi_def_cfa_offset 16
+; MIPS64R5EL-NEXT:    sd $ra, 8($sp) # 8-byte Folded Spill
+; MIPS64R5EL-NEXT:    sd $gp, 0($sp) # 8-byte Folded Spill
 ; MIPS64R5EL-NEXT:    .cfi_offset 31, -8
 ; MIPS64R5EL-NEXT:    .cfi_offset 28, -16
 ; MIPS64R5EL-NEXT:    lui $1, %hi(%neg(%gp_rel(call_i8_8)))
@@ -3769,9 +3360,9 @@ define void @call_i8_8() {
 ; MIPS64R5EL-NEXT:    nop
 ; MIPS64R5EL-NEXT:    ld $1, %got_disp(gv8i8)($gp)
 ; MIPS64R5EL-NEXT:    sd $2, 0($1)
-; MIPS64R5EL-NEXT:    ld $gp, 16($sp) # 8-byte Folded Reload
-; MIPS64R5EL-NEXT:    ld $ra, 24($sp) # 8-byte Folded Reload
-; MIPS64R5EL-NEXT:    daddiu $sp, $sp, 32
+; MIPS64R5EL-NEXT:    ld $gp, 0($sp) # 8-byte Folded Reload
+; MIPS64R5EL-NEXT:    ld $ra, 8($sp) # 8-byte Folded Reload
+; MIPS64R5EL-NEXT:    daddiu $sp, $sp, 16
 ; MIPS64R5EL-NEXT:    jr $ra
 ; MIPS64R5EL-NEXT:    nop
 entry:
@@ -4059,9 +3650,9 @@ define void @calli16_2() {
 ;
 ; MIPS32R5EB-LABEL: calli16_2:
 ; MIPS32R5EB:       # %bb.0: # %entry
-; MIPS32R5EB-NEXT:    addiu $sp, $sp, -32
-; MIPS32R5EB-NEXT:    .cfi_def_cfa_offset 32
-; MIPS32R5EB-NEXT:    sw $ra, 28($sp) # 4-byte Folded Spill
+; MIPS32R5EB-NEXT:    addiu $sp, $sp, -24
+; MIPS32R5EB-NEXT:    .cfi_def_cfa_offset 24
+; MIPS32R5EB-NEXT:    sw $ra, 20($sp) # 4-byte Folded Spill
 ; MIPS32R5EB-NEXT:    .cfi_offset 31, -4
 ; MIPS32R5EB-NEXT:    lui $1, 6
 ; MIPS32R5EB-NEXT:    ori $4, $1, 7
@@ -4071,17 +3662,17 @@ define void @calli16_2() {
 ; MIPS32R5EB-NEXT:    nop
 ; MIPS32R5EB-NEXT:    lui $1, %hi(gv2i16)
 ; MIPS32R5EB-NEXT:    sw $2, %lo(gv2i16)($1)
-; MIPS32R5EB-NEXT:    lw $ra, 28($sp) # 4-byte Folded Reload
-; MIPS32R5EB-NEXT:    addiu $sp, $sp, 32
+; MIPS32R5EB-NEXT:    lw $ra, 20($sp) # 4-byte Folded Reload
+; MIPS32R5EB-NEXT:    addiu $sp, $sp, 24
 ; MIPS32R5EB-NEXT:    jr $ra
 ; MIPS32R5EB-NEXT:    nop
 ;
 ; MIPS64R5EB-LABEL: calli16_2:
 ; MIPS64R5EB:       # %bb.0: # %entry
-; MIPS64R5EB-NEXT:    daddiu $sp, $sp, -32
-; MIPS64R5EB-NEXT:    .cfi_def_cfa_offset 32
-; MIPS64R5EB-NEXT:    sd $ra, 24($sp) # 8-byte Folded Spill
-; MIPS64R5EB-NEXT:    sd $gp, 16($sp) # 8-byte Folded Spill
+; MIPS64R5EB-NEXT:    daddiu $sp, $sp, -16
+; MIPS64R5EB-NEXT:    .cfi_def_cfa_offset 16
+; MIPS64R5EB-NEXT:    sd $ra, 8($sp) # 8-byte Folded Spill
+; MIPS64R5EB-NEXT:    sd $gp, 0($sp) # 8-byte Folded Spill
 ; MIPS64R5EB-NEXT:    .cfi_offset 31, -8
 ; MIPS64R5EB-NEXT:    .cfi_offset 28, -16
 ; MIPS64R5EB-NEXT:    lui $1, %hi(%neg(%gp_rel(calli16_2)))
@@ -4096,9 +3687,9 @@ define void @calli16_2() {
 ; MIPS64R5EB-NEXT:    nop
 ; MIPS64R5EB-NEXT:    ld $1, %got_disp(gv2i16)($gp)
 ; MIPS64R5EB-NEXT:    sw $2, 0($1)
-; MIPS64R5EB-NEXT:    ld $gp, 16($sp) # 8-byte Folded Reload
-; MIPS64R5EB-NEXT:    ld $ra, 24($sp) # 8-byte Folded Reload
-; MIPS64R5EB-NEXT:    daddiu $sp, $sp, 32
+; MIPS64R5EB-NEXT:    ld $gp, 0($sp) # 8-byte Folded Reload
+; MIPS64R5EB-NEXT:    ld $ra, 8($sp) # 8-byte Folded Reload
+; MIPS64R5EB-NEXT:    daddiu $sp, $sp, 16
 ; MIPS64R5EB-NEXT:    jr $ra
 ; MIPS64R5EB-NEXT:    nop
 ;
@@ -4149,9 +3740,9 @@ define void @calli16_2() {
 ;
 ; MIPS32R5EL-LABEL: calli16_2:
 ; MIPS32R5EL:       # %bb.0: # %entry
-; MIPS32R5EL-NEXT:    addiu $sp, $sp, -32
-; MIPS32R5EL-NEXT:    .cfi_def_cfa_offset 32
-; MIPS32R5EL-NEXT:    sw $ra, 28($sp) # 4-byte Folded Spill
+; MIPS32R5EL-NEXT:    addiu $sp, $sp, -24
+; MIPS32R5EL-NEXT:    .cfi_def_cfa_offset 24
+; MIPS32R5EL-NEXT:    sw $ra, 20($sp) # 4-byte Folded Spill
 ; MIPS32R5EL-NEXT:    .cfi_offset 31, -4
 ; MIPS32R5EL-NEXT:    lui $1, 7
 ; MIPS32R5EL-NEXT:    ori $4, $1, 6
@@ -4161,17 +3752,17 @@ define void @calli16_2() {
 ; MIPS32R5EL-NEXT:    nop
 ; MIPS32R5EL-NEXT:    lui $1, %hi(gv2i16)
 ; MIPS32R5EL-NEXT:    sw $2, %lo(gv2i16)($1)
-; MIPS32R5EL-NEXT:    lw $ra, 28($sp) # 4-byte Folded Reload
-; MIPS32R5EL-NEXT:    addiu $sp, $sp, 32
+; MIPS32R5EL-NEXT:    lw $ra, 20($sp) # 4-byte Folded Reload
+; MIPS32R5EL-NEXT:    addiu $sp, $sp, 24
 ; MIPS32R5EL-NEXT:    jr $ra
 ; MIPS32R5EL-NEXT:    nop
 ;
 ; MIPS64R5EL-LABEL: calli16_2:
 ; MIPS64R5EL:       # %bb.0: # %entry
-; MIPS64R5EL-NEXT:    daddiu $sp, $sp, -32
-; MIPS64R5EL-NEXT:    .cfi_def_cfa_offset 32
-; MIPS64R5EL-NEXT:    sd $ra, 24($sp) # 8-byte Folded Spill
-; MIPS64R5EL-NEXT:    sd $gp, 16($sp) # 8-byte Folded Spill
+; MIPS64R5EL-NEXT:    daddiu $sp, $sp, -16
+; MIPS64R5EL-NEXT:    .cfi_def_cfa_offset 16
+; MIPS64R5EL-NEXT:    sd $ra, 8($sp) # 8-byte Folded Spill
+; MIPS64R5EL-NEXT:    sd $gp, 0($sp) # 8-byte Folded Spill
 ; MIPS64R5EL-NEXT:    .cfi_offset 31, -8
 ; MIPS64R5EL-NEXT:    .cfi_offset 28, -16
 ; MIPS64R5EL-NEXT:    lui $1, %hi(%neg(%gp_rel(calli16_2)))
@@ -4186,9 +3777,9 @@ define void @calli16_2() {
 ; MIPS64R5EL-NEXT:    nop
 ; MIPS64R5EL-NEXT:    ld $1, %got_disp(gv2i16)($gp)
 ; MIPS64R5EL-NEXT:    sw $2, 0($1)
-; MIPS64R5EL-NEXT:    ld $gp, 16($sp) # 8-byte Folded Reload
-; MIPS64R5EL-NEXT:    ld $ra, 24($sp) # 8-byte Folded Reload
-; MIPS64R5EL-NEXT:    daddiu $sp, $sp, 32
+; MIPS64R5EL-NEXT:    ld $gp, 0($sp) # 8-byte Folded Reload
+; MIPS64R5EL-NEXT:    ld $ra, 8($sp) # 8-byte Folded Reload
+; MIPS64R5EL-NEXT:    daddiu $sp, $sp, 16
 ; MIPS64R5EL-NEXT:    jr $ra
 ; MIPS64R5EL-NEXT:    nop
 entry:
@@ -4282,10 +3873,10 @@ define void @calli16_4() {
 ;
 ; MIPS64R5EB-LABEL: calli16_4:
 ; MIPS64R5EB:       # %bb.0: # %entry
-; MIPS64R5EB-NEXT:    daddiu $sp, $sp, -32
-; MIPS64R5EB-NEXT:    .cfi_def_cfa_offset 32
-; MIPS64R5EB-NEXT:    sd $ra, 24($sp) # 8-byte Folded Spill
-; MIPS64R5EB-NEXT:    sd $gp, 16($sp) # 8-byte Folded Spill
+; MIPS64R5EB-NEXT:    daddiu $sp, $sp, -16
+; MIPS64R5EB-NEXT:    .cfi_def_cfa_offset 16
+; MIPS64R5EB-NEXT:    sd $ra, 8($sp) # 8-byte Folded Spill
+; MIPS64R5EB-NEXT:    sd $gp, 0($sp) # 8-byte Folded Spill
 ; MIPS64R5EB-NEXT:    .cfi_offset 31, -8
 ; MIPS64R5EB-NEXT:    .cfi_offset 28, -16
 ; MIPS64R5EB-NEXT:    lui $1, %hi(%neg(%gp_rel(calli16_4)))
@@ -4308,9 +3899,9 @@ define void @calli16_4() {
 ; MIPS64R5EB-NEXT:    nop
 ; MIPS64R5EB-NEXT:    ld $1, %got_disp(gv4i16)($gp)
 ; MIPS64R5EB-NEXT:    sd $2, 0($1)
-; MIPS64R5EB-NEXT:    ld $gp, 16($sp) # 8-byte Folded Reload
-; MIPS64R5EB-NEXT:    ld $ra, 24($sp) # 8-byte Folded Reload
-; MIPS64R5EB-NEXT:    daddiu $sp, $sp, 32
+; MIPS64R5EB-NEXT:    ld $gp, 0($sp) # 8-byte Folded Reload
+; MIPS64R5EB-NEXT:    ld $ra, 8($sp) # 8-byte Folded Reload
+; MIPS64R5EB-NEXT:    daddiu $sp, $sp, 16
 ; MIPS64R5EB-NEXT:    jr $ra
 ; MIPS64R5EB-NEXT:    nop
 ;
@@ -4398,10 +3989,10 @@ define void @calli16_4() {
 ;
 ; MIPS64R5EL-LABEL: calli16_4:
 ; MIPS64R5EL:       # %bb.0: # %entry
-; MIPS64R5EL-NEXT:    daddiu $sp, $sp, -32
-; MIPS64R5EL-NEXT:    .cfi_def_cfa_offset 32
-; MIPS64R5EL-NEXT:    sd $ra, 24($sp) # 8-byte Folded Spill
-; MIPS64R5EL-NEXT:    sd $gp, 16($sp) # 8-byte Folded Spill
+; MIPS64R5EL-NEXT:    daddiu $sp, $sp, -16
+; MIPS64R5EL-NEXT:    .cfi_def_cfa_offset 16
+; MIPS64R5EL-NEXT:    sd $ra, 8($sp) # 8-byte Folded Spill
+; MIPS64R5EL-NEXT:    sd $gp, 0($sp) # 8-byte Folded Spill
 ; MIPS64R5EL-NEXT:    .cfi_offset 31, -8
 ; MIPS64R5EL-NEXT:    .cfi_offset 28, -16
 ; MIPS64R5EL-NEXT:    lui $1, %hi(%neg(%gp_rel(calli16_4)))
@@ -4424,9 +4015,9 @@ define void @calli16_4() {
 ; MIPS64R5EL-NEXT:    nop
 ; MIPS64R5EL-NEXT:    ld $1, %got_disp(gv4i16)($gp)
 ; MIPS64R5EL-NEXT:    sd $2, 0($1)
-; MIPS64R5EL-NEXT:    ld $gp, 16($sp) # 8-byte Folded Reload
-; MIPS64R5EL-NEXT:    ld $ra, 24($sp) # 8-byte Folded Reload
-; MIPS64R5EL-NEXT:    daddiu $sp, $sp, 32
+; MIPS64R5EL-NEXT:    ld $gp, 0($sp) # 8-byte Folded Reload
+; MIPS64R5EL-NEXT:    ld $ra, 8($sp) # 8-byte Folded Reload
+; MIPS64R5EL-NEXT:    daddiu $sp, $sp, 16
 ; MIPS64R5EL-NEXT:    jr $ra
 ; MIPS64R5EL-NEXT:    nop
 entry:
@@ -4807,10 +4398,10 @@ define void @calli32_2() {
 ;
 ; MIPS64R5EB-LABEL: calli32_2:
 ; MIPS64R5EB:       # %bb.0: # %entry
-; MIPS64R5EB-NEXT:    daddiu $sp, $sp, -32
-; MIPS64R5EB-NEXT:    .cfi_def_cfa_offset 32
-; MIPS64R5EB-NEXT:    sd $ra, 24($sp) # 8-byte Folded Spill
-; MIPS64R5EB-NEXT:    sd $gp, 16($sp) # 8-byte Folded Spill
+; MIPS64R5EB-NEXT:    daddiu $sp, $sp, -16
+; MIPS64R5EB-NEXT:    .cfi_def_cfa_offset 16
+; MIPS64R5EB-NEXT:    sd $ra, 8($sp) # 8-byte Folded Spill
+; MIPS64R5EB-NEXT:    sd $gp, 0($sp) # 8-byte Folded Spill
 ; MIPS64R5EB-NEXT:    .cfi_offset 31, -8
 ; MIPS64R5EB-NEXT:    .cfi_offset 28, -16
 ; MIPS64R5EB-NEXT:    lui $1, %hi(%neg(%gp_rel(calli32_2)))
@@ -4826,9 +4417,9 @@ define void @calli32_2() {
 ; MIPS64R5EB-NEXT:    nop
 ; MIPS64R5EB-NEXT:    ld $1, %got_disp(gv2i32)($gp)
 ; MIPS64R5EB-NEXT:    sd $2, 0($1)
-; MIPS64R5EB-NEXT:    ld $gp, 16($sp) # 8-byte Folded Reload
-; MIPS64R5EB-NEXT:    ld $ra, 24($sp) # 8-byte Folded Reload
-; MIPS64R5EB-NEXT:    daddiu $sp, $sp, 32
+; MIPS64R5EB-NEXT:    ld $gp, 0($sp) # 8-byte Folded Reload
+; MIPS64R5EB-NEXT:    ld $ra, 8($sp) # 8-byte Folded Reload
+; MIPS64R5EB-NEXT:    daddiu $sp, $sp, 16
 ; MIPS64R5EB-NEXT:    jr $ra
 ; MIPS64R5EB-NEXT:    nop
 ;
@@ -4862,10 +4453,10 @@ define void @calli32_2() {
 ;
 ; MIPS64R5EL-LABEL: calli32_2:
 ; MIPS64R5EL:       # %bb.0: # %entry
-; MIPS64R5EL-NEXT:    daddiu $sp, $sp, -32
-; MIPS64R5EL-NEXT:    .cfi_def_cfa_offset 32
-; MIPS64R5EL-NEXT:    sd $ra, 24($sp) # 8-byte Folded Spill
-; MIPS64R5EL-NEXT:    sd $gp, 16($sp) # 8-byte Folded Spill
+; MIPS64R5EL-NEXT:    daddiu $sp, $sp, -16
+; MIPS64R5EL-NEXT:    .cfi_def_cfa_offset 16
+; MIPS64R5EL-NEXT:    sd $ra, 8($sp) # 8-byte Folded Spill
+; MIPS64R5EL-NEXT:    sd $gp, 0($sp) # 8-byte Folded Spill
 ; MIPS64R5EL-NEXT:    .cfi_offset 31, -8
 ; MIPS64R5EL-NEXT:    .cfi_offset 28, -16
 ; MIPS64R5EL-NEXT:    lui $1, %hi(%neg(%gp_rel(calli32_2)))
@@ -4882,9 +4473,9 @@ define void @calli32_2() {
 ; MIPS64R5EL-NEXT:    nop
 ; MIPS64R5EL-NEXT:    ld $1, %got_disp(gv2i32)($gp)
 ; MIPS64R5EL-NEXT:    sd $2, 0($1)
-; MIPS64R5EL-NEXT:    ld $gp, 16($sp) # 8-byte Folded Reload
-; MIPS64R5EL-NEXT:    ld $ra, 24($sp) # 8-byte Folded Reload
-; MIPS64R5EL-NEXT:    daddiu $sp, $sp, 32
+; MIPS64R5EL-NEXT:    ld $gp, 0($sp) # 8-byte Folded Reload
+; MIPS64R5EL-NEXT:    ld $ra, 8($sp) # 8-byte Folded Reload
+; MIPS64R5EL-NEXT:    daddiu $sp, $sp, 16
 ; MIPS64R5EL-NEXT:    jr $ra
 ; MIPS64R5EL-NEXT:    nop
 entry:

diff  --git a/llvm/test/CodeGen/Mips/msa/basic_operations.ll b/llvm/test/CodeGen/Mips/msa/basic_operations.ll
index 820259d7c7bc25..4fc3f57aa002df 100644
--- a/llvm/test/CodeGen/Mips/msa/basic_operations.ll
+++ b/llvm/test/CodeGen/Mips/msa/basic_operations.ll
@@ -2066,46 +2066,38 @@ define void @insert_v2i64_vidx(i64 signext %a) nounwind {
   ret void
 }
 
-; TODO: What code should be emitted?
-define void @truncstore() nounwind {
-; O32-LABEL: truncstore:
+; After legalizing shorter vectors with legal element sizes, this test is
+; no longer called truncstore.
+define void @store_i8_32bit() nounwind {
+; O32-LABEL: store_i8_32bit:
 ; O32:       # %bb.0:
 ; O32-NEXT:    lui $2, %hi(_gp_disp)
 ; O32-NEXT:    addiu $2, $2, %lo(_gp_disp)
 ; O32-NEXT:    addu $1, $2, $25
 ; O32-NEXT:    lw $1, %got(v4i8)($1)
-; O32-NEXT:    addiu $2, $zero, 255
-; O32-NEXT:    sb $2, 3($1)
-; O32-NEXT:    sb $2, 2($1)
-; O32-NEXT:    sb $2, 1($1)
+; O32-NEXT:    addiu $2, $zero, -1
 ; O32-NEXT:    jr $ra
-; O32-NEXT:    sb $2, 0($1)
+; O32-NEXT:    sw $2, 0($1)
 ;
-; N32-LABEL: truncstore:
+; N32-LABEL: store_i8_32bit:
 ; N32:       # %bb.0:
-; N32-NEXT:    lui $1, %hi(%neg(%gp_rel(truncstore)))
+; N32-NEXT:    lui $1, %hi(%neg(%gp_rel(store_i8_32bit)))
 ; N32-NEXT:    addu $1, $1, $25
-; N32-NEXT:    addiu $1, $1, %lo(%neg(%gp_rel(truncstore)))
+; N32-NEXT:    addiu $1, $1, %lo(%neg(%gp_rel(store_i8_32bit)))
 ; N32-NEXT:    lw $1, %got_disp(v4i8)($1)
-; N32-NEXT:    addiu $2, $zero, 255
-; N32-NEXT:    sb $2, 3($1)
-; N32-NEXT:    sb $2, 2($1)
-; N32-NEXT:    sb $2, 1($1)
+; N32-NEXT:    addiu $2, $zero, -1
 ; N32-NEXT:    jr $ra
-; N32-NEXT:    sb $2, 0($1)
+; N32-NEXT:    sw $2, 0($1)
 ;
-; N64-LABEL: truncstore:
+; N64-LABEL: store_i8_32bit:
 ; N64:       # %bb.0:
-; N64-NEXT:    lui $1, %hi(%neg(%gp_rel(truncstore)))
+; N64-NEXT:    lui $1, %hi(%neg(%gp_rel(store_i8_32bit)))
 ; N64-NEXT:    daddu $1, $1, $25
-; N64-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(truncstore)))
+; N64-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(store_i8_32bit)))
 ; N64-NEXT:    ld $1, %got_disp(v4i8)($1)
-; N64-NEXT:    addiu $2, $zero, 255
-; N64-NEXT:    sb $2, 3($1)
-; N64-NEXT:    sb $2, 2($1)
-; N64-NEXT:    sb $2, 1($1)
+; N64-NEXT:    addiu $2, $zero, -1
 ; N64-NEXT:    jr $ra
-; N64-NEXT:    sb $2, 0($1)
+; N64-NEXT:    sw $2, 0($1)
   store volatile <4 x i8> <i8 -1, i8 -1, i8 -1, i8 -1>, ptr @v4i8
   ret void
 }


        


More information about the llvm-commits mailing list