[llvm] 6735d52 - [MIPS] [MSA] Widen v2i8, v216 and v2i32 vectors (#123040)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 23 19:23:38 PST 2025
Author: Cinhi Young
Date: 2025-01-24T11:23:34+08:00
New Revision: 6735d527f9945fbf50c14a95cbdd66592472d622
URL: https://github.com/llvm/llvm-project/commit/6735d527f9945fbf50c14a95cbdd66592472d622
DIFF: https://github.com/llvm/llvm-project/commit/6735d527f9945fbf50c14a95cbdd66592472d622.diff
LOG: [MIPS] [MSA] Widen v2i8, v216 and v2i32 vectors (#123040)
- Widen v2i8, v2i16 and v2i32 vectors so they don't cast back and forth,
and make sure that instructions with correct data unit is being used.
- Handle undef indices for VSHF when lowering VECTOR_SHUFFLE (it crashes
if such index is present).
Added:
Modified:
llvm/lib/Target/Mips/MipsSEISelLowering.cpp
llvm/lib/Target/Mips/MipsSEISelLowering.h
llvm/test/CodeGen/Mips/cconv/vector.ll
llvm/test/CodeGen/Mips/msa/basic_operations.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/Mips/MipsSEISelLowering.cpp b/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
index 1d1b0f9c6ae2a9..71a70d9c2dd466 100644
--- a/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
+++ b/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
@@ -42,6 +42,7 @@
#include "llvm/TargetParser/Triple.h"
#include <algorithm>
#include <cassert>
+#include <cstddef>
#include <cstdint>
#include <iterator>
#include <utility>
@@ -59,6 +60,45 @@ static cl::opt<bool> NoDPLoadStore("mno-ldc1-sdc1", cl::init(false),
"stores to their single precision "
"counterparts"));
+// Widen the v2 vectors to the register width, i.e. v2i16 -> v8i16,
+// v2i32 -> v4i32, etc, to ensure the correct rail size is used, i.e.
+// INST.h for v16, INST.w for v32, INST.d for v64.
+TargetLoweringBase::LegalizeTypeAction
+MipsSETargetLowering::getPreferredVectorAction(MVT VT) const {
+ if (this->Subtarget.hasMSA()) {
+ switch (VT.SimpleTy) {
+ // Leave v2i1 vectors to be promoted to larger ones.
+ // Other i1 types will be promoted by default.
+ case MVT::v2i1:
+ return TypePromoteInteger;
+ break;
+ // 16-bit vector types (v2 and longer)
+ case MVT::v2i8:
+ // 32-bit vector types (v2 and longer)
+ case MVT::v2i16:
+ case MVT::v4i8:
+ // 64-bit vector types (v2 and longer)
+ case MVT::v2i32:
+ case MVT::v4i16:
+ case MVT::v8i8:
+ return TypeWidenVector;
+ break;
+ // Only word (.w) and doubleword (.d) are available for floating point
+ // vectors. That means floating point vectors should be either v2f64
+ // or v4f32.
+ // Here we only explicitly widen the f32 types - f16 will be promoted
+ // by default.
+ case MVT::v2f32:
+ case MVT::v3f32:
+ return TypeWidenVector;
+ // v2i64 is already 128-bit wide.
+ default:
+ break;
+ }
+ }
+ return TargetLoweringBase::getPreferredVectorAction(VT);
+}
+
MipsSETargetLowering::MipsSETargetLowering(const MipsTargetMachine &TM,
const MipsSubtarget &STI)
: MipsTargetLowering(TM, STI) {
@@ -2929,8 +2969,14 @@ static SDValue lowerVECTOR_SHUFFLE_PCKOD(SDValue Op, EVT ResTy,
// if the type is v8i16 and all the indices are less than 8 then the second
// operand is unused and can be replaced with anything. We choose to replace it
// with the used operand since this reduces the number of instructions overall.
+//
+// NOTE: SPLATI shuffle masks may contain UNDEFs, since isSPLATI() treats
+// UNDEFs as same as SPLATI index.
+// For other instances we use the last valid index if UNDEF is
+// encountered.
static SDValue lowerVECTOR_SHUFFLE_VSHF(SDValue Op, EVT ResTy,
const SmallVector<int, 16> &Indices,
+ const bool isSPLATI,
SelectionDAG &DAG) {
SmallVector<SDValue, 16> Ops;
SDValue Op0;
@@ -2942,6 +2988,9 @@ static SDValue lowerVECTOR_SHUFFLE_VSHF(SDValue Op, EVT ResTy,
SDLoc DL(Op);
int ResTyNumElts = ResTy.getVectorNumElements();
+ assert(Indices[0] >= 0 &&
+ "shuffle mask starts with an UNDEF, which is not expected");
+
for (int i = 0; i < ResTyNumElts; ++i) {
// Idx == -1 means UNDEF
int Idx = Indices[i];
@@ -2951,9 +3000,17 @@ static SDValue lowerVECTOR_SHUFFLE_VSHF(SDValue Op, EVT ResTy,
if (ResTyNumElts <= Idx && Idx < ResTyNumElts * 2)
Using2ndVec = true;
}
-
- for (int Idx : Indices)
+ int LastValidIndex = 0;
+ for (size_t i = 0; i < Indices.size(); i++) {
+ int Idx = Indices[i];
+ if (Idx < 0) {
+ // Continue using splati index or use the last valid index.
+ Idx = isSPLATI ? Indices[0] : LastValidIndex;
+ } else {
+ LastValidIndex = Idx;
+ }
Ops.push_back(DAG.getTargetConstant(Idx, DL, MaskEltTy));
+ }
SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, Ops);
@@ -2996,7 +3053,7 @@ SDValue MipsSETargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
// splati.[bhwd] is preferable to the others but is matched from
// MipsISD::VSHF.
if (isVECTOR_SHUFFLE_SPLATI(Op, ResTy, Indices, DAG))
- return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, DAG);
+ return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, true, DAG);
SDValue Result;
if ((Result = lowerVECTOR_SHUFFLE_ILVEV(Op, ResTy, Indices, DAG)))
return Result;
@@ -3012,7 +3069,7 @@ SDValue MipsSETargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
return Result;
if ((Result = lowerVECTOR_SHUFFLE_SHF(Op, ResTy, Indices, DAG)))
return Result;
- return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, DAG);
+ return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, false, DAG);
}
MachineBasicBlock *
diff --git a/llvm/lib/Target/Mips/MipsSEISelLowering.h b/llvm/lib/Target/Mips/MipsSEISelLowering.h
index 43b88a9f095226..675131aefb6dd9 100644
--- a/llvm/lib/Target/Mips/MipsSEISelLowering.h
+++ b/llvm/lib/Target/Mips/MipsSEISelLowering.h
@@ -45,6 +45,9 @@ class TargetRegisterClass;
MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
unsigned *Fast = nullptr) const override;
+ TargetLoweringBase::LegalizeTypeAction
+ getPreferredVectorAction(MVT VT) const override;
+
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
diff --git a/llvm/test/CodeGen/Mips/cconv/vector.ll b/llvm/test/CodeGen/Mips/cconv/vector.ll
index 28a7dc046139b2..383e5ef19cebf1 100644
--- a/llvm/test/CodeGen/Mips/cconv/vector.ll
+++ b/llvm/test/CodeGen/Mips/cconv/vector.ll
@@ -48,102 +48,86 @@ define <2 x i8> @i8_2(<2 x i8> %a, <2 x i8> %b) {
;
; MIPS32R5EB-LABEL: i8_2:
; MIPS32R5EB: # %bb.0:
-; MIPS32R5EB-NEXT: addiu $sp, $sp, -64
-; MIPS32R5EB-NEXT: .cfi_def_cfa_offset 64
-; MIPS32R5EB-NEXT: sw $ra, 60($sp) # 4-byte Folded Spill
-; MIPS32R5EB-NEXT: sw $fp, 56($sp) # 4-byte Folded Spill
+; MIPS32R5EB-NEXT: addiu $sp, $sp, -48
+; MIPS32R5EB-NEXT: .cfi_def_cfa_offset 48
+; MIPS32R5EB-NEXT: sw $ra, 44($sp) # 4-byte Folded Spill
+; MIPS32R5EB-NEXT: sw $fp, 40($sp) # 4-byte Folded Spill
; MIPS32R5EB-NEXT: .cfi_offset 31, -4
; MIPS32R5EB-NEXT: .cfi_offset 30, -8
; MIPS32R5EB-NEXT: move $fp, $sp
; MIPS32R5EB-NEXT: .cfi_def_cfa_register 30
; MIPS32R5EB-NEXT: addiu $1, $zero, -16
; MIPS32R5EB-NEXT: and $sp, $sp, $1
-; MIPS32R5EB-NEXT: sh $5, 48($sp)
-; MIPS32R5EB-NEXT: sh $4, 52($sp)
-; MIPS32R5EB-NEXT: lbu $1, 49($sp)
-; MIPS32R5EB-NEXT: sw $1, 28($sp)
-; MIPS32R5EB-NEXT: lbu $1, 48($sp)
-; MIPS32R5EB-NEXT: sw $1, 20($sp)
-; MIPS32R5EB-NEXT: lbu $1, 53($sp)
-; MIPS32R5EB-NEXT: sw $1, 12($sp)
-; MIPS32R5EB-NEXT: lbu $1, 52($sp)
-; MIPS32R5EB-NEXT: sw $1, 4($sp)
-; MIPS32R5EB-NEXT: ld.d $w0, 16($sp)
-; MIPS32R5EB-NEXT: ld.d $w1, 0($sp)
-; MIPS32R5EB-NEXT: addv.d $w0, $w1, $w0
-; MIPS32R5EB-NEXT: shf.w $w0, $w0, 177
-; MIPS32R5EB-NEXT: copy_s.w $1, $w0[1]
-; MIPS32R5EB-NEXT: copy_s.w $2, $w0[3]
-; MIPS32R5EB-NEXT: sb $2, 45($sp)
-; MIPS32R5EB-NEXT: sb $1, 44($sp)
-; MIPS32R5EB-NEXT: lhu $2, 44($sp)
+; MIPS32R5EB-NEXT: sh $5, 16($sp)
+; MIPS32R5EB-NEXT: sh $4, 0($sp)
+; MIPS32R5EB-NEXT: ld.b $w0, 16($sp)
+; MIPS32R5EB-NEXT: ld.b $w1, 0($sp)
+; MIPS32R5EB-NEXT: addv.b $w0, $w1, $w0
+; MIPS32R5EB-NEXT: shf.b $w0, $w0, 177
+; MIPS32R5EB-NEXT: copy_u.h $2, $w0[0]
; MIPS32R5EB-NEXT: move $sp, $fp
-; MIPS32R5EB-NEXT: lw $fp, 56($sp) # 4-byte Folded Reload
-; MIPS32R5EB-NEXT: lw $ra, 60($sp) # 4-byte Folded Reload
-; MIPS32R5EB-NEXT: addiu $sp, $sp, 64
+; MIPS32R5EB-NEXT: lw $fp, 40($sp) # 4-byte Folded Reload
+; MIPS32R5EB-NEXT: lw $ra, 44($sp) # 4-byte Folded Reload
+; MIPS32R5EB-NEXT: addiu $sp, $sp, 48
; MIPS32R5EB-NEXT: jr $ra
; MIPS32R5EB-NEXT: nop
;
-; MIPS64R5-LABEL: i8_2:
-; MIPS64R5: # %bb.0:
-; MIPS64R5-NEXT: daddiu $sp, $sp, -16
-; MIPS64R5-NEXT: .cfi_def_cfa_offset 16
-; MIPS64R5-NEXT: sh $5, 8($sp)
-; MIPS64R5-NEXT: sh $4, 12($sp)
-; MIPS64R5-NEXT: lb $1, 9($sp)
-; MIPS64R5-NEXT: lb $2, 8($sp)
-; MIPS64R5-NEXT: insert.d $w0[0], $2
-; MIPS64R5-NEXT: insert.d $w0[1], $1
-; MIPS64R5-NEXT: lb $1, 13($sp)
-; MIPS64R5-NEXT: lb $2, 12($sp)
-; MIPS64R5-NEXT: insert.d $w1[0], $2
-; MIPS64R5-NEXT: insert.d $w1[1], $1
-; MIPS64R5-NEXT: addv.d $w0, $w1, $w0
-; MIPS64R5-NEXT: copy_s.d $1, $w0[0]
-; MIPS64R5-NEXT: copy_s.d $2, $w0[1]
-; MIPS64R5-NEXT: sb $2, 5($sp)
-; MIPS64R5-NEXT: sb $1, 4($sp)
-; MIPS64R5-NEXT: lh $2, 4($sp)
-; MIPS64R5-NEXT: daddiu $sp, $sp, 16
-; MIPS64R5-NEXT: jr $ra
-; MIPS64R5-NEXT: nop
-;
; MIPS32R5EL-LABEL: i8_2:
; MIPS32R5EL: # %bb.0:
-; MIPS32R5EL-NEXT: addiu $sp, $sp, -64
-; MIPS32R5EL-NEXT: .cfi_def_cfa_offset 64
-; MIPS32R5EL-NEXT: sw $ra, 60($sp) # 4-byte Folded Spill
-; MIPS32R5EL-NEXT: sw $fp, 56($sp) # 4-byte Folded Spill
+; MIPS32R5EL-NEXT: addiu $sp, $sp, -48
+; MIPS32R5EL-NEXT: .cfi_def_cfa_offset 48
+; MIPS32R5EL-NEXT: sw $ra, 44($sp) # 4-byte Folded Spill
+; MIPS32R5EL-NEXT: sw $fp, 40($sp) # 4-byte Folded Spill
; MIPS32R5EL-NEXT: .cfi_offset 31, -4
; MIPS32R5EL-NEXT: .cfi_offset 30, -8
; MIPS32R5EL-NEXT: move $fp, $sp
; MIPS32R5EL-NEXT: .cfi_def_cfa_register 30
; MIPS32R5EL-NEXT: addiu $1, $zero, -16
; MIPS32R5EL-NEXT: and $sp, $sp, $1
-; MIPS32R5EL-NEXT: sh $5, 48($sp)
-; MIPS32R5EL-NEXT: sh $4, 52($sp)
-; MIPS32R5EL-NEXT: lbu $1, 49($sp)
-; MIPS32R5EL-NEXT: sw $1, 24($sp)
-; MIPS32R5EL-NEXT: lbu $1, 48($sp)
-; MIPS32R5EL-NEXT: sw $1, 16($sp)
-; MIPS32R5EL-NEXT: lbu $1, 53($sp)
-; MIPS32R5EL-NEXT: sw $1, 8($sp)
-; MIPS32R5EL-NEXT: lbu $1, 52($sp)
-; MIPS32R5EL-NEXT: sw $1, 0($sp)
-; MIPS32R5EL-NEXT: ld.d $w0, 16($sp)
-; MIPS32R5EL-NEXT: ld.d $w1, 0($sp)
-; MIPS32R5EL-NEXT: addv.d $w0, $w1, $w0
-; MIPS32R5EL-NEXT: copy_s.w $1, $w0[0]
-; MIPS32R5EL-NEXT: copy_s.w $2, $w0[2]
-; MIPS32R5EL-NEXT: sb $2, 45($sp)
-; MIPS32R5EL-NEXT: sb $1, 44($sp)
-; MIPS32R5EL-NEXT: lhu $2, 44($sp)
+; MIPS32R5EL-NEXT: sh $5, 16($sp)
+; MIPS32R5EL-NEXT: sh $4, 0($sp)
+; MIPS32R5EL-NEXT: ld.b $w0, 16($sp)
+; MIPS32R5EL-NEXT: ld.b $w1, 0($sp)
+; MIPS32R5EL-NEXT: addv.b $w0, $w1, $w0
+; MIPS32R5EL-NEXT: copy_u.h $2, $w0[0]
; MIPS32R5EL-NEXT: move $sp, $fp
-; MIPS32R5EL-NEXT: lw $fp, 56($sp) # 4-byte Folded Reload
-; MIPS32R5EL-NEXT: lw $ra, 60($sp) # 4-byte Folded Reload
-; MIPS32R5EL-NEXT: addiu $sp, $sp, 64
+; MIPS32R5EL-NEXT: lw $fp, 40($sp) # 4-byte Folded Reload
+; MIPS32R5EL-NEXT: lw $ra, 44($sp) # 4-byte Folded Reload
+; MIPS32R5EL-NEXT: addiu $sp, $sp, 48
; MIPS32R5EL-NEXT: jr $ra
; MIPS32R5EL-NEXT: nop
+;
+; MIPS64R5EB-LABEL: i8_2:
+; MIPS64R5EB: # %bb.0:
+; MIPS64R5EB-NEXT: daddiu $sp, $sp, -48
+; MIPS64R5EB-NEXT: .cfi_def_cfa_offset 48
+; MIPS64R5EB-NEXT: sh $5, 16($sp)
+; MIPS64R5EB-NEXT: sh $4, 0($sp)
+; MIPS64R5EB-NEXT: ld.b $w0, 16($sp)
+; MIPS64R5EB-NEXT: ld.b $w1, 0($sp)
+; MIPS64R5EB-NEXT: addv.b $w0, $w1, $w0
+; MIPS64R5EB-NEXT: shf.b $w0, $w0, 177
+; MIPS64R5EB-NEXT: copy_s.h $1, $w0[0]
+; MIPS64R5EB-NEXT: sh $1, 44($sp)
+; MIPS64R5EB-NEXT: lh $2, 44($sp)
+; MIPS64R5EB-NEXT: daddiu $sp, $sp, 48
+; MIPS64R5EB-NEXT: jr $ra
+;
+; MIPS64R5EL-LABEL: i8_2:
+; MIPS64R5EL: # %bb.0:
+; MIPS64R5EL-NEXT: daddiu $sp, $sp, -48
+; MIPS64R5EL-NEXT: .cfi_def_cfa_offset 48
+; MIPS64R5EL-NEXT: sh $5, 16($sp)
+; MIPS64R5EL-NEXT: sh $4, 0($sp)
+; MIPS64R5EL-NEXT: ld.b $w0, 16($sp)
+; MIPS64R5EL-NEXT: ld.b $w1, 0($sp)
+; MIPS64R5EL-NEXT: addv.b $w0, $w1, $w0
+; MIPS64R5EL-NEXT: copy_s.h $1, $w0[0]
+; MIPS64R5EL-NEXT: sh $1, 44($sp)
+; MIPS64R5EL-NEXT: lh $2, 44($sp)
+; MIPS64R5EL-NEXT: daddiu $sp, $sp, 48
+; MIPS64R5EL-NEXT: jr $ra
+; MIPS64R5EL-NEXT: nop
%1 = add <2 x i8> %a, %b
ret <2 x i8> %1
}
@@ -229,127 +213,110 @@ define <2 x i8> @i8x2_7(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d, <2 x
;
; MIPS32R5EB-LABEL: i8x2_7:
; MIPS32R5EB: # %bb.0: # %entry
-; MIPS32R5EB-NEXT: addiu $sp, $sp, -144
-; MIPS32R5EB-NEXT: .cfi_def_cfa_offset 144
-; MIPS32R5EB-NEXT: sw $ra, 140($sp) # 4-byte Folded Spill
-; MIPS32R5EB-NEXT: sw $fp, 136($sp) # 4-byte Folded Spill
+; MIPS32R5EB-NEXT: addiu $sp, $sp, -128
+; MIPS32R5EB-NEXT: .cfi_def_cfa_offset 128
+; MIPS32R5EB-NEXT: sw $ra, 124($sp) # 4-byte Folded Spill
+; MIPS32R5EB-NEXT: sw $fp, 120($sp) # 4-byte Folded Spill
; MIPS32R5EB-NEXT: .cfi_offset 31, -4
; MIPS32R5EB-NEXT: .cfi_offset 30, -8
; MIPS32R5EB-NEXT: move $fp, $sp
; MIPS32R5EB-NEXT: .cfi_def_cfa_register 30
; MIPS32R5EB-NEXT: addiu $1, $zero, -16
; MIPS32R5EB-NEXT: and $sp, $sp, $1
-; MIPS32R5EB-NEXT: sh $5, 128($sp)
-; MIPS32R5EB-NEXT: sh $4, 132($sp)
-; MIPS32R5EB-NEXT: lbu $1, 129($sp)
-; MIPS32R5EB-NEXT: sw $1, 76($sp)
-; MIPS32R5EB-NEXT: lbu $1, 128($sp)
-; MIPS32R5EB-NEXT: sw $1, 68($sp)
-; MIPS32R5EB-NEXT: lbu $1, 133($sp)
-; MIPS32R5EB-NEXT: sw $1, 60($sp)
-; MIPS32R5EB-NEXT: lbu $1, 132($sp)
-; MIPS32R5EB-NEXT: sw $1, 52($sp)
-; MIPS32R5EB-NEXT: ld.d $w0, 64($sp)
-; MIPS32R5EB-NEXT: ld.d $w1, 48($sp)
-; MIPS32R5EB-NEXT: addv.d $w0, $w1, $w0
-; MIPS32R5EB-NEXT: sh $6, 124($sp)
-; MIPS32R5EB-NEXT: lbu $1, 125($sp)
-; MIPS32R5EB-NEXT: sw $1, 92($sp)
-; MIPS32R5EB-NEXT: lbu $1, 124($sp)
-; MIPS32R5EB-NEXT: sw $1, 84($sp)
-; MIPS32R5EB-NEXT: ld.d $w1, 80($sp)
-; MIPS32R5EB-NEXT: addv.d $w0, $w0, $w1
-; MIPS32R5EB-NEXT: sh $7, 120($sp)
-; MIPS32R5EB-NEXT: lbu $1, 121($sp)
-; MIPS32R5EB-NEXT: sw $1, 108($sp)
-; MIPS32R5EB-NEXT: lbu $1, 120($sp)
-; MIPS32R5EB-NEXT: sw $1, 100($sp)
-; MIPS32R5EB-NEXT: ld.d $w1, 96($sp)
-; MIPS32R5EB-NEXT: addv.d $w0, $w0, $w1
-; MIPS32R5EB-NEXT: lbu $1, 163($fp)
-; MIPS32R5EB-NEXT: sw $1, 12($sp)
-; MIPS32R5EB-NEXT: lbu $1, 162($fp)
-; MIPS32R5EB-NEXT: sw $1, 4($sp)
-; MIPS32R5EB-NEXT: ld.d $w1, 0($sp)
-; MIPS32R5EB-NEXT: addv.d $w0, $w0, $w1
-; MIPS32R5EB-NEXT: lbu $1, 167($fp)
-; MIPS32R5EB-NEXT: sw $1, 28($sp)
-; MIPS32R5EB-NEXT: lbu $1, 166($fp)
-; MIPS32R5EB-NEXT: sw $1, 20($sp)
-; MIPS32R5EB-NEXT: ld.d $w1, 16($sp)
-; MIPS32R5EB-NEXT: addv.d $w0, $w0, $w1
-; MIPS32R5EB-NEXT: lbu $1, 171($fp)
-; MIPS32R5EB-NEXT: sw $1, 44($sp)
-; MIPS32R5EB-NEXT: lbu $1, 170($fp)
-; MIPS32R5EB-NEXT: sw $1, 36($sp)
-; MIPS32R5EB-NEXT: ld.d $w1, 32($sp)
-; MIPS32R5EB-NEXT: addv.d $w0, $w0, $w1
-; MIPS32R5EB-NEXT: shf.w $w0, $w0, 177
-; MIPS32R5EB-NEXT: copy_s.w $1, $w0[1]
-; MIPS32R5EB-NEXT: copy_s.w $2, $w0[3]
-; MIPS32R5EB-NEXT: sb $2, 117($sp)
-; MIPS32R5EB-NEXT: sb $1, 116($sp)
-; MIPS32R5EB-NEXT: lhu $2, 116($sp)
+; MIPS32R5EB-NEXT: sh $5, 16($sp)
+; MIPS32R5EB-NEXT: sh $4, 0($sp)
+; MIPS32R5EB-NEXT: ld.b $w0, 16($sp)
+; MIPS32R5EB-NEXT: ld.b $w1, 0($sp)
+; MIPS32R5EB-NEXT: addv.b $w0, $w1, $w0
+; MIPS32R5EB-NEXT: sh $6, 32($sp)
+; MIPS32R5EB-NEXT: ld.b $w1, 32($sp)
+; MIPS32R5EB-NEXT: addv.b $w0, $w0, $w1
+; MIPS32R5EB-NEXT: sh $7, 48($sp)
+; MIPS32R5EB-NEXT: ld.b $w1, 48($sp)
+; MIPS32R5EB-NEXT: addv.b $w0, $w0, $w1
+; MIPS32R5EB-NEXT: lhu $1, 146($fp)
+; MIPS32R5EB-NEXT: sh $1, 64($sp)
+; MIPS32R5EB-NEXT: ld.b $w1, 64($sp)
+; MIPS32R5EB-NEXT: addv.b $w0, $w0, $w1
+; MIPS32R5EB-NEXT: lhu $1, 150($fp)
+; MIPS32R5EB-NEXT: sh $1, 80($sp)
+; MIPS32R5EB-NEXT: ld.b $w1, 80($sp)
+; MIPS32R5EB-NEXT: addv.b $w0, $w0, $w1
+; MIPS32R5EB-NEXT: lhu $1, 154($fp)
+; MIPS32R5EB-NEXT: sh $1, 96($sp)
+; MIPS32R5EB-NEXT: ld.b $w1, 96($sp)
+; MIPS32R5EB-NEXT: addv.b $w0, $w0, $w1
+; MIPS32R5EB-NEXT: shf.b $w0, $w0, 177
+; MIPS32R5EB-NEXT: copy_u.h $2, $w0[0]
; MIPS32R5EB-NEXT: move $sp, $fp
-; MIPS32R5EB-NEXT: lw $fp, 136($sp) # 4-byte Folded Reload
-; MIPS32R5EB-NEXT: lw $ra, 140($sp) # 4-byte Folded Reload
-; MIPS32R5EB-NEXT: addiu $sp, $sp, 144
+; MIPS32R5EB-NEXT: lw $fp, 120($sp) # 4-byte Folded Reload
+; MIPS32R5EB-NEXT: lw $ra, 124($sp) # 4-byte Folded Reload
+; MIPS32R5EB-NEXT: addiu $sp, $sp, 128
; MIPS32R5EB-NEXT: jr $ra
; MIPS32R5EB-NEXT: nop
;
-; MIPS64R5-LABEL: i8x2_7:
-; MIPS64R5: # %bb.0: # %entry
-; MIPS64R5-NEXT: daddiu $sp, $sp, -32
-; MIPS64R5-NEXT: .cfi_def_cfa_offset 32
-; MIPS64R5-NEXT: sh $5, 24($sp)
-; MIPS64R5-NEXT: sh $4, 28($sp)
-; MIPS64R5-NEXT: lb $1, 25($sp)
-; MIPS64R5-NEXT: lb $2, 24($sp)
-; MIPS64R5-NEXT: insert.d $w0[0], $2
-; MIPS64R5-NEXT: insert.d $w0[1], $1
-; MIPS64R5-NEXT: lb $1, 29($sp)
-; MIPS64R5-NEXT: lb $2, 28($sp)
-; MIPS64R5-NEXT: insert.d $w1[0], $2
-; MIPS64R5-NEXT: insert.d $w1[1], $1
-; MIPS64R5-NEXT: addv.d $w0, $w1, $w0
-; MIPS64R5-NEXT: sh $6, 20($sp)
-; MIPS64R5-NEXT: lb $1, 21($sp)
-; MIPS64R5-NEXT: lb $2, 20($sp)
-; MIPS64R5-NEXT: insert.d $w1[0], $2
-; MIPS64R5-NEXT: insert.d $w1[1], $1
-; MIPS64R5-NEXT: addv.d $w0, $w0, $w1
-; MIPS64R5-NEXT: sh $7, 16($sp)
-; MIPS64R5-NEXT: lb $1, 17($sp)
-; MIPS64R5-NEXT: lb $2, 16($sp)
-; MIPS64R5-NEXT: insert.d $w1[0], $2
-; MIPS64R5-NEXT: insert.d $w1[1], $1
-; MIPS64R5-NEXT: addv.d $w0, $w0, $w1
-; MIPS64R5-NEXT: sh $8, 12($sp)
-; MIPS64R5-NEXT: lb $1, 13($sp)
-; MIPS64R5-NEXT: lb $2, 12($sp)
-; MIPS64R5-NEXT: insert.d $w1[0], $2
-; MIPS64R5-NEXT: insert.d $w1[1], $1
-; MIPS64R5-NEXT: addv.d $w0, $w0, $w1
-; MIPS64R5-NEXT: sh $9, 8($sp)
-; MIPS64R5-NEXT: lb $1, 9($sp)
-; MIPS64R5-NEXT: lb $2, 8($sp)
-; MIPS64R5-NEXT: insert.d $w1[0], $2
-; MIPS64R5-NEXT: insert.d $w1[1], $1
-; MIPS64R5-NEXT: addv.d $w0, $w0, $w1
-; MIPS64R5-NEXT: sh $10, 4($sp)
-; MIPS64R5-NEXT: lb $1, 5($sp)
-; MIPS64R5-NEXT: lb $2, 4($sp)
-; MIPS64R5-NEXT: insert.d $w1[0], $2
-; MIPS64R5-NEXT: insert.d $w1[1], $1
-; MIPS64R5-NEXT: addv.d $w0, $w0, $w1
-; MIPS64R5-NEXT: copy_s.d $1, $w0[0]
-; MIPS64R5-NEXT: copy_s.d $2, $w0[1]
-; MIPS64R5-NEXT: sb $2, 1($sp)
-; MIPS64R5-NEXT: sb $1, 0($sp)
-; MIPS64R5-NEXT: lh $2, 0($sp)
-; MIPS64R5-NEXT: daddiu $sp, $sp, 32
-; MIPS64R5-NEXT: jr $ra
-; MIPS64R5-NEXT: nop
+; MIPS64R5EB-LABEL: i8x2_7:
+; MIPS64R5EB: # %bb.0: # %entry
+; MIPS64R5EB-NEXT: daddiu $sp, $sp, -128
+; MIPS64R5EB-NEXT: .cfi_def_cfa_offset 128
+; MIPS64R5EB-NEXT: sh $5, 16($sp)
+; MIPS64R5EB-NEXT: sh $4, 0($sp)
+; MIPS64R5EB-NEXT: ld.b $w0, 16($sp)
+; MIPS64R5EB-NEXT: ld.b $w1, 0($sp)
+; MIPS64R5EB-NEXT: addv.b $w0, $w1, $w0
+; MIPS64R5EB-NEXT: sh $6, 32($sp)
+; MIPS64R5EB-NEXT: ld.b $w1, 32($sp)
+; MIPS64R5EB-NEXT: addv.b $w0, $w0, $w1
+; MIPS64R5EB-NEXT: sh $7, 48($sp)
+; MIPS64R5EB-NEXT: ld.b $w1, 48($sp)
+; MIPS64R5EB-NEXT: addv.b $w0, $w0, $w1
+; MIPS64R5EB-NEXT: sh $8, 64($sp)
+; MIPS64R5EB-NEXT: ld.b $w1, 64($sp)
+; MIPS64R5EB-NEXT: addv.b $w0, $w0, $w1
+; MIPS64R5EB-NEXT: sh $9, 80($sp)
+; MIPS64R5EB-NEXT: ld.b $w1, 80($sp)
+; MIPS64R5EB-NEXT: addv.b $w0, $w0, $w1
+; MIPS64R5EB-NEXT: sh $10, 96($sp)
+; MIPS64R5EB-NEXT: ld.b $w1, 96($sp)
+; MIPS64R5EB-NEXT: addv.b $w0, $w0, $w1
+; MIPS64R5EB-NEXT: shf.b $w0, $w0, 177
+; MIPS64R5EB-NEXT: copy_s.h $1, $w0[0]
+; MIPS64R5EB-NEXT: sh $1, 124($sp)
+; MIPS64R5EB-NEXT: lh $2, 124($sp)
+; MIPS64R5EB-NEXT: daddiu $sp, $sp, 128
+; MIPS64R5EB-NEXT: jr $ra
+; MIPS64R5EB-NEXT: nop
+;
+; MIPS64R5EL-LABEL: i8x2_7:
+; MIPS64R5EL: # %bb.0: # %entry
+; MIPS64R5EL-NEXT: daddiu $sp, $sp, -128
+; MIPS64R5EL-NEXT: .cfi_def_cfa_offset 128
+; MIPS64R5EL-NEXT: sh $5, 16($sp)
+; MIPS64R5EL-NEXT: sh $4, 0($sp)
+; MIPS64R5EL-NEXT: ld.b $w0, 16($sp)
+; MIPS64R5EL-NEXT: ld.b $w1, 0($sp)
+; MIPS64R5EL-NEXT: addv.b $w0, $w1, $w0
+; MIPS64R5EL-NEXT: sh $6, 32($sp)
+; MIPS64R5EL-NEXT: ld.b $w1, 32($sp)
+; MIPS64R5EL-NEXT: addv.b $w0, $w0, $w1
+; MIPS64R5EL-NEXT: sh $7, 48($sp)
+; MIPS64R5EL-NEXT: ld.b $w1, 48($sp)
+; MIPS64R5EL-NEXT: addv.b $w0, $w0, $w1
+; MIPS64R5EL-NEXT: sh $8, 64($sp)
+; MIPS64R5EL-NEXT: ld.b $w1, 64($sp)
+; MIPS64R5EL-NEXT: addv.b $w0, $w0, $w1
+; MIPS64R5EL-NEXT: sh $9, 80($sp)
+; MIPS64R5EL-NEXT: ld.b $w1, 80($sp)
+; MIPS64R5EL-NEXT: addv.b $w0, $w0, $w1
+; MIPS64R5EL-NEXT: sh $10, 96($sp)
+; MIPS64R5EL-NEXT: ld.b $w1, 96($sp)
+; MIPS64R5EL-NEXT: addv.b $w0, $w0, $w1
+; MIPS64R5EL-NEXT: copy_s.h $1, $w0[0]
+; MIPS64R5EL-NEXT: sh $1, 124($sp)
+; MIPS64R5EL-NEXT: lh $2, 124($sp)
+; MIPS64R5EL-NEXT: daddiu $sp, $sp, 128
+; MIPS64R5EL-NEXT: jr $ra
+; MIPS64R5EL-NEXT: nop
;
; MIPS32EL-LABEL: i8x2_7:
; MIPS32EL: # %bb.0: # %entry
@@ -387,70 +354,44 @@ define <2 x i8> @i8x2_7(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d, <2 x
;
; MIPS32R5EL-LABEL: i8x2_7:
; MIPS32R5EL: # %bb.0: # %entry
-; MIPS32R5EL-NEXT: addiu $sp, $sp, -144
-; MIPS32R5EL-NEXT: .cfi_def_cfa_offset 144
-; MIPS32R5EL-NEXT: sw $ra, 140($sp) # 4-byte Folded Spill
-; MIPS32R5EL-NEXT: sw $fp, 136($sp) # 4-byte Folded Spill
+; MIPS32R5EL-NEXT: addiu $sp, $sp, -128
+; MIPS32R5EL-NEXT: .cfi_def_cfa_offset 128
+; MIPS32R5EL-NEXT: sw $ra, 124($sp) # 4-byte Folded Spill
+; MIPS32R5EL-NEXT: sw $fp, 120($sp) # 4-byte Folded Spill
; MIPS32R5EL-NEXT: .cfi_offset 31, -4
; MIPS32R5EL-NEXT: .cfi_offset 30, -8
; MIPS32R5EL-NEXT: move $fp, $sp
; MIPS32R5EL-NEXT: .cfi_def_cfa_register 30
; MIPS32R5EL-NEXT: addiu $1, $zero, -16
; MIPS32R5EL-NEXT: and $sp, $sp, $1
-; MIPS32R5EL-NEXT: sh $5, 128($sp)
-; MIPS32R5EL-NEXT: sh $4, 132($sp)
-; MIPS32R5EL-NEXT: lbu $1, 129($sp)
-; MIPS32R5EL-NEXT: sw $1, 72($sp)
-; MIPS32R5EL-NEXT: lbu $1, 128($sp)
+; MIPS32R5EL-NEXT: sh $5, 16($sp)
+; MIPS32R5EL-NEXT: sh $4, 0($sp)
+; MIPS32R5EL-NEXT: ld.b $w0, 16($sp)
+; MIPS32R5EL-NEXT: ld.b $w1, 0($sp)
+; MIPS32R5EL-NEXT: addv.b $w0, $w1, $w0
+; MIPS32R5EL-NEXT: sh $6, 32($sp)
+; MIPS32R5EL-NEXT: ld.b $w1, 32($sp)
+; MIPS32R5EL-NEXT: addv.b $w0, $w0, $w1
+; MIPS32R5EL-NEXT: sh $7, 48($sp)
+; MIPS32R5EL-NEXT: ld.b $w1, 48($sp)
+; MIPS32R5EL-NEXT: addv.b $w0, $w0, $w1
+; MIPS32R5EL-NEXT: lw $1, 144($fp)
; MIPS32R5EL-NEXT: sw $1, 64($sp)
-; MIPS32R5EL-NEXT: lbu $1, 133($sp)
-; MIPS32R5EL-NEXT: sw $1, 56($sp)
-; MIPS32R5EL-NEXT: lbu $1, 132($sp)
-; MIPS32R5EL-NEXT: sw $1, 48($sp)
-; MIPS32R5EL-NEXT: ld.d $w0, 64($sp)
-; MIPS32R5EL-NEXT: ld.d $w1, 48($sp)
-; MIPS32R5EL-NEXT: addv.d $w0, $w1, $w0
-; MIPS32R5EL-NEXT: sh $6, 124($sp)
-; MIPS32R5EL-NEXT: lbu $1, 125($sp)
-; MIPS32R5EL-NEXT: sw $1, 88($sp)
-; MIPS32R5EL-NEXT: lbu $1, 124($sp)
+; MIPS32R5EL-NEXT: ld.b $w1, 64($sp)
+; MIPS32R5EL-NEXT: addv.b $w0, $w0, $w1
+; MIPS32R5EL-NEXT: lw $1, 148($fp)
; MIPS32R5EL-NEXT: sw $1, 80($sp)
-; MIPS32R5EL-NEXT: ld.d $w1, 80($sp)
-; MIPS32R5EL-NEXT: addv.d $w0, $w0, $w1
-; MIPS32R5EL-NEXT: sh $7, 120($sp)
-; MIPS32R5EL-NEXT: lbu $1, 121($sp)
-; MIPS32R5EL-NEXT: sw $1, 104($sp)
-; MIPS32R5EL-NEXT: lbu $1, 120($sp)
+; MIPS32R5EL-NEXT: ld.b $w1, 80($sp)
+; MIPS32R5EL-NEXT: addv.b $w0, $w0, $w1
+; MIPS32R5EL-NEXT: lw $1, 152($fp)
; MIPS32R5EL-NEXT: sw $1, 96($sp)
-; MIPS32R5EL-NEXT: ld.d $w1, 96($sp)
-; MIPS32R5EL-NEXT: addv.d $w0, $w0, $w1
-; MIPS32R5EL-NEXT: lbu $1, 161($fp)
-; MIPS32R5EL-NEXT: sw $1, 8($sp)
-; MIPS32R5EL-NEXT: lbu $1, 160($fp)
-; MIPS32R5EL-NEXT: sw $1, 0($sp)
-; MIPS32R5EL-NEXT: ld.d $w1, 0($sp)
-; MIPS32R5EL-NEXT: addv.d $w0, $w0, $w1
-; MIPS32R5EL-NEXT: lbu $1, 165($fp)
-; MIPS32R5EL-NEXT: sw $1, 24($sp)
-; MIPS32R5EL-NEXT: lbu $1, 164($fp)
-; MIPS32R5EL-NEXT: sw $1, 16($sp)
-; MIPS32R5EL-NEXT: ld.d $w1, 16($sp)
-; MIPS32R5EL-NEXT: addv.d $w0, $w0, $w1
-; MIPS32R5EL-NEXT: lbu $1, 169($fp)
-; MIPS32R5EL-NEXT: sw $1, 40($sp)
-; MIPS32R5EL-NEXT: lbu $1, 168($fp)
-; MIPS32R5EL-NEXT: sw $1, 32($sp)
-; MIPS32R5EL-NEXT: ld.d $w1, 32($sp)
-; MIPS32R5EL-NEXT: addv.d $w0, $w0, $w1
-; MIPS32R5EL-NEXT: copy_s.w $1, $w0[0]
-; MIPS32R5EL-NEXT: copy_s.w $2, $w0[2]
-; MIPS32R5EL-NEXT: sb $2, 117($sp)
-; MIPS32R5EL-NEXT: sb $1, 116($sp)
-; MIPS32R5EL-NEXT: lhu $2, 116($sp)
+; MIPS32R5EL-NEXT: ld.b $w1, 96($sp)
+; MIPS32R5EL-NEXT: addv.b $w0, $w0, $w1
+; MIPS32R5EL-NEXT: copy_u.h $2, $w0[0]
; MIPS32R5EL-NEXT: move $sp, $fp
-; MIPS32R5EL-NEXT: lw $fp, 136($sp) # 4-byte Folded Reload
-; MIPS32R5EL-NEXT: lw $ra, 140($sp) # 4-byte Folded Reload
-; MIPS32R5EL-NEXT: addiu $sp, $sp, 144
+; MIPS32R5EL-NEXT: lw $fp, 120($sp) # 4-byte Folded Reload
+; MIPS32R5EL-NEXT: lw $ra, 124($sp) # 4-byte Folded Reload
+; MIPS32R5EL-NEXT: addiu $sp, $sp, 128
; MIPS32R5EL-NEXT: jr $ra
; MIPS32R5EL-NEXT: nop
entry:
@@ -514,77 +455,64 @@ define <4 x i8> @i8_4(<4 x i8> %a, <4 x i8> %b) {
; MIPS64-NEXT: jr $ra
; MIPS64-NEXT: nop
;
-; MIPS32R5-LABEL: i8_4:
-; MIPS32R5: # %bb.0:
-; MIPS32R5-NEXT: addiu $sp, $sp, -16
-; MIPS32R5-NEXT: .cfi_def_cfa_offset 16
-; MIPS32R5-NEXT: sw $5, 8($sp)
-; MIPS32R5-NEXT: sw $4, 12($sp)
-; MIPS32R5-NEXT: lbu $1, 9($sp)
-; MIPS32R5-NEXT: lbu $2, 8($sp)
-; MIPS32R5-NEXT: insert.w $w0[0], $2
-; MIPS32R5-NEXT: insert.w $w0[1], $1
-; MIPS32R5-NEXT: lbu $1, 10($sp)
-; MIPS32R5-NEXT: insert.w $w0[2], $1
-; MIPS32R5-NEXT: lbu $1, 11($sp)
-; MIPS32R5-NEXT: insert.w $w0[3], $1
-; MIPS32R5-NEXT: lbu $1, 13($sp)
-; MIPS32R5-NEXT: lbu $2, 12($sp)
-; MIPS32R5-NEXT: insert.w $w1[0], $2
-; MIPS32R5-NEXT: insert.w $w1[1], $1
-; MIPS32R5-NEXT: lbu $1, 14($sp)
-; MIPS32R5-NEXT: insert.w $w1[2], $1
-; MIPS32R5-NEXT: lbu $1, 15($sp)
-; MIPS32R5-NEXT: insert.w $w1[3], $1
-; MIPS32R5-NEXT: addv.w $w0, $w1, $w0
-; MIPS32R5-NEXT: copy_s.w $1, $w0[0]
-; MIPS32R5-NEXT: copy_s.w $2, $w0[1]
-; MIPS32R5-NEXT: copy_s.w $3, $w0[2]
-; MIPS32R5-NEXT: copy_s.w $4, $w0[3]
-; MIPS32R5-NEXT: sb $4, 7($sp)
-; MIPS32R5-NEXT: sb $3, 6($sp)
-; MIPS32R5-NEXT: sb $2, 5($sp)
-; MIPS32R5-NEXT: sb $1, 4($sp)
-; MIPS32R5-NEXT: lw $2, 4($sp)
-; MIPS32R5-NEXT: addiu $sp, $sp, 16
-; MIPS32R5-NEXT: jr $ra
-; MIPS32R5-NEXT: nop
+; MIPS32R5EB-LABEL: i8_4:
+; MIPS32R5EB: # %bb.0:
+; MIPS32R5EB-NEXT: addiu $sp, $sp, -48
+; MIPS32R5EB-NEXT: .cfi_def_cfa_offset 48
+; MIPS32R5EB-NEXT: sw $ra, 44($sp) # 4-byte Folded Spill
+; MIPS32R5EB-NEXT: sw $fp, 40($sp) # 4-byte Folded Spill
+; MIPS32R5EB-NEXT: .cfi_offset 31, -4
+; MIPS32R5EB-NEXT: .cfi_offset 30, -8
+; MIPS32R5EB-NEXT: move $fp, $sp
+; MIPS32R5EB-NEXT: .cfi_def_cfa_register 30
+; MIPS32R5EB-NEXT: addiu $1, $zero, -16
+; MIPS32R5EB-NEXT: and $sp, $sp, $1
+; MIPS32R5EB-NEXT: sw $5, 16($sp)
+; MIPS32R5EB-NEXT: sw $4, 0($sp)
+; MIPS32R5EB-NEXT: ld.b $w0, 16($sp)
+; MIPS32R5EB-NEXT: ld.b $w1, 0($sp)
+; MIPS32R5EB-NEXT: addv.b $w0, $w1, $w0
+; MIPS32R5EB-NEXT: shf.b $w0, $w0, 27
+; MIPS32R5EB-NEXT: copy_s.w $2, $w0[0]
+; MIPS32R5EB-NEXT: move $sp, $fp
+; MIPS32R5EB-NEXT: lw $fp, 40($sp) # 4-byte Folded Reload
+; MIPS32R5EB-NEXT: lw $ra, 44($sp) # 4-byte Folded Reload
+; MIPS32R5EB-NEXT: addiu $sp, $sp, 48
+; MIPS32R5EB-NEXT: jr $ra
+; MIPS32R5EB-NEXT: nop
;
-; MIPS64R5-LABEL: i8_4:
-; MIPS64R5: # %bb.0:
-; MIPS64R5-NEXT: daddiu $sp, $sp, -16
-; MIPS64R5-NEXT: .cfi_def_cfa_offset 16
-; MIPS64R5-NEXT: sw $5, 8($sp)
-; MIPS64R5-NEXT: sw $4, 12($sp)
-; MIPS64R5-NEXT: lbu $1, 9($sp)
-; MIPS64R5-NEXT: lbu $2, 8($sp)
-; MIPS64R5-NEXT: insert.w $w0[0], $2
-; MIPS64R5-NEXT: insert.w $w0[1], $1
-; MIPS64R5-NEXT: lbu $1, 10($sp)
-; MIPS64R5-NEXT: insert.w $w0[2], $1
-; MIPS64R5-NEXT: lbu $1, 11($sp)
-; MIPS64R5-NEXT: insert.w $w0[3], $1
-; MIPS64R5-NEXT: lbu $1, 13($sp)
-; MIPS64R5-NEXT: lbu $2, 12($sp)
-; MIPS64R5-NEXT: insert.w $w1[0], $2
-; MIPS64R5-NEXT: insert.w $w1[1], $1
-; MIPS64R5-NEXT: lbu $1, 14($sp)
-; MIPS64R5-NEXT: insert.w $w1[2], $1
-; MIPS64R5-NEXT: lbu $1, 15($sp)
-; MIPS64R5-NEXT: insert.w $w1[3], $1
-; MIPS64R5-NEXT: addv.w $w0, $w1, $w0
-; MIPS64R5-NEXT: copy_s.w $1, $w0[0]
-; MIPS64R5-NEXT: copy_s.w $2, $w0[1]
-; MIPS64R5-NEXT: copy_s.w $3, $w0[2]
-; MIPS64R5-NEXT: copy_s.w $4, $w0[3]
-; MIPS64R5-NEXT: sb $4, 7($sp)
-; MIPS64R5-NEXT: sb $3, 6($sp)
-; MIPS64R5-NEXT: sb $2, 5($sp)
-; MIPS64R5-NEXT: sb $1, 4($sp)
-; MIPS64R5-NEXT: lw $2, 4($sp)
-; MIPS64R5-NEXT: daddiu $sp, $sp, 16
-; MIPS64R5-NEXT: jr $ra
-; MIPS64R5-NEXT: nop
+; MIPS64R5EB-LABEL: i8_4:
+; MIPS64R5EB: # %bb.0:
+; MIPS64R5EB-NEXT: daddiu $sp, $sp, -32
+; MIPS64R5EB-NEXT: .cfi_def_cfa_offset 32
+; MIPS64R5EB-NEXT: sll $1, $5, 0
+; MIPS64R5EB-NEXT: sw $1, 16($sp)
+; MIPS64R5EB-NEXT: sll $1, $4, 0
+; MIPS64R5EB-NEXT: sw $1, 0($sp)
+; MIPS64R5EB-NEXT: ld.b $w0, 16($sp)
+; MIPS64R5EB-NEXT: ld.b $w1, 0($sp)
+; MIPS64R5EB-NEXT: addv.b $w0, $w1, $w0
+; MIPS64R5EB-NEXT: shf.b $w0, $w0, 27
+; MIPS64R5EB-NEXT: copy_s.w $2, $w0[0]
+; MIPS64R5EB-NEXT: daddiu $sp, $sp, 32
+; MIPS64R5EB-NEXT: jr $ra
+; MIPS64R5EB-NEXT: nop
+;
+; MIPS64R5EL-LABEL: i8_4:
+; MIPS64R5EL: # %bb.0:
+; MIPS64R5EL-NEXT: daddiu $sp, $sp, -32
+; MIPS64R5EL-NEXT: .cfi_def_cfa_offset 32
+; MIPS64R5EL-NEXT: sll $1, $5, 0
+; MIPS64R5EL-NEXT: sw $1, 16($sp)
+; MIPS64R5EL-NEXT: sll $1, $4, 0
+; MIPS64R5EL-NEXT: sw $1, 0($sp)
+; MIPS64R5EL-NEXT: ld.b $w0, 16($sp)
+; MIPS64R5EL-NEXT: ld.b $w1, 0($sp)
+; MIPS64R5EL-NEXT: addv.b $w0, $w1, $w0
+; MIPS64R5EL-NEXT: copy_s.w $2, $w0[0]
+; MIPS64R5EL-NEXT: daddiu $sp, $sp, 32
+; MIPS64R5EL-NEXT: jr $ra
+; MIPS64R5EL-NEXT: nop
%1 = add <4 x i8> %a, %b
ret <4 x i8> %1
}
@@ -704,66 +632,16 @@ define <8 x i8> @i8_8(<8 x i8> %a, <8 x i8> %b) {
; MIPS32R5EB-NEXT: .cfi_def_cfa_register 30
; MIPS32R5EB-NEXT: addiu $1, $zero, -16
; MIPS32R5EB-NEXT: and $sp, $sp, $1
-; MIPS32R5EB-NEXT: sw $6, 24($sp)
-; MIPS32R5EB-NEXT: lbu $1, 25($sp)
-; MIPS32R5EB-NEXT: lbu $2, 24($sp)
-; MIPS32R5EB-NEXT: sw $7, 28($sp)
-; MIPS32R5EB-NEXT: insert.h $w0[0], $2
-; MIPS32R5EB-NEXT: insert.h $w0[1], $1
-; MIPS32R5EB-NEXT: lbu $1, 26($sp)
-; MIPS32R5EB-NEXT: sw $4, 32($sp)
-; MIPS32R5EB-NEXT: insert.h $w0[2], $1
-; MIPS32R5EB-NEXT: lbu $1, 27($sp)
-; MIPS32R5EB-NEXT: insert.h $w0[3], $1
-; MIPS32R5EB-NEXT: lbu $1, 28($sp)
-; MIPS32R5EB-NEXT: sw $5, 36($sp)
-; MIPS32R5EB-NEXT: insert.h $w0[4], $1
-; MIPS32R5EB-NEXT: lbu $1, 33($sp)
-; MIPS32R5EB-NEXT: lbu $2, 32($sp)
-; MIPS32R5EB-NEXT: insert.h $w1[0], $2
-; MIPS32R5EB-NEXT: insert.h $w1[1], $1
-; MIPS32R5EB-NEXT: lbu $1, 29($sp)
-; MIPS32R5EB-NEXT: lbu $2, 34($sp)
-; MIPS32R5EB-NEXT: insert.h $w1[2], $2
-; MIPS32R5EB-NEXT: insert.h $w0[5], $1
-; MIPS32R5EB-NEXT: lbu $1, 35($sp)
-; MIPS32R5EB-NEXT: lbu $2, 31($sp)
-; MIPS32R5EB-NEXT: lbu $3, 30($sp)
-; MIPS32R5EB-NEXT: lbu $4, 39($sp)
-; MIPS32R5EB-NEXT: insert.h $w0[6], $3
-; MIPS32R5EB-NEXT: insert.h $w0[7], $2
-; MIPS32R5EB-NEXT: insert.h $w1[3], $1
-; MIPS32R5EB-NEXT: lbu $1, 36($sp)
-; MIPS32R5EB-NEXT: insert.h $w1[4], $1
-; MIPS32R5EB-NEXT: lbu $1, 37($sp)
-; MIPS32R5EB-NEXT: insert.h $w1[5], $1
-; MIPS32R5EB-NEXT: lbu $1, 38($sp)
-; MIPS32R5EB-NEXT: insert.h $w1[6], $1
-; MIPS32R5EB-NEXT: insert.h $w1[7], $4
-; MIPS32R5EB-NEXT: addv.h $w0, $w1, $w0
-; MIPS32R5EB-NEXT: copy_s.h $1, $w0[0]
-; MIPS32R5EB-NEXT: copy_s.h $2, $w0[1]
-; MIPS32R5EB-NEXT: copy_s.h $3, $w0[2]
-; MIPS32R5EB-NEXT: copy_s.h $4, $w0[3]
-; MIPS32R5EB-NEXT: copy_s.h $5, $w0[4]
-; MIPS32R5EB-NEXT: copy_s.h $6, $w0[5]
-; MIPS32R5EB-NEXT: copy_s.h $7, $w0[6]
-; MIPS32R5EB-NEXT: copy_s.h $8, $w0[7]
-; MIPS32R5EB-NEXT: sb $8, 23($sp)
-; MIPS32R5EB-NEXT: sb $7, 22($sp)
-; MIPS32R5EB-NEXT: sb $6, 21($sp)
-; MIPS32R5EB-NEXT: sb $5, 20($sp)
-; MIPS32R5EB-NEXT: sb $4, 19($sp)
-; MIPS32R5EB-NEXT: sb $3, 18($sp)
-; MIPS32R5EB-NEXT: sb $2, 17($sp)
-; MIPS32R5EB-NEXT: sb $1, 16($sp)
-; MIPS32R5EB-NEXT: lw $1, 20($sp)
-; MIPS32R5EB-NEXT: sw $1, 12($sp)
-; MIPS32R5EB-NEXT: lw $1, 16($sp)
-; MIPS32R5EB-NEXT: sw $1, 4($sp)
-; MIPS32R5EB-NEXT: ld.w $w0, 0($sp)
-; MIPS32R5EB-NEXT: copy_s.w $2, $w0[1]
-; MIPS32R5EB-NEXT: copy_s.w $3, $w0[3]
+; MIPS32R5EB-NEXT: sw $7, 20($sp)
+; MIPS32R5EB-NEXT: sw $6, 16($sp)
+; MIPS32R5EB-NEXT: sw $5, 4($sp)
+; MIPS32R5EB-NEXT: sw $4, 0($sp)
+; MIPS32R5EB-NEXT: ld.b $w0, 16($sp)
+; MIPS32R5EB-NEXT: ld.b $w1, 0($sp)
+; MIPS32R5EB-NEXT: addv.b $w0, $w1, $w0
+; MIPS32R5EB-NEXT: shf.b $w0, $w0, 27
+; MIPS32R5EB-NEXT: copy_s.w $2, $w0[0]
+; MIPS32R5EB-NEXT: copy_s.w $3, $w0[1]
; MIPS32R5EB-NEXT: move $sp, $fp
; MIPS32R5EB-NEXT: lw $fp, 40($sp) # 4-byte Folded Reload
; MIPS32R5EB-NEXT: lw $ra, 44($sp) # 4-byte Folded Reload
@@ -771,65 +649,35 @@ define <8 x i8> @i8_8(<8 x i8> %a, <8 x i8> %b) {
; MIPS32R5EB-NEXT: jr $ra
; MIPS32R5EB-NEXT: nop
;
-; MIPS64R5-LABEL: i8_8:
-; MIPS64R5: # %bb.0:
-; MIPS64R5-NEXT: daddiu $sp, $sp, -32
-; MIPS64R5-NEXT: .cfi_def_cfa_offset 32
-; MIPS64R5-NEXT: sd $5, 16($sp)
-; MIPS64R5-NEXT: lbu $1, 17($sp)
-; MIPS64R5-NEXT: lbu $2, 16($sp)
-; MIPS64R5-NEXT: sd $4, 24($sp)
-; MIPS64R5-NEXT: insert.h $w0[0], $2
-; MIPS64R5-NEXT: insert.h $w0[1], $1
-; MIPS64R5-NEXT: lbu $1, 18($sp)
-; MIPS64R5-NEXT: insert.h $w0[2], $1
-; MIPS64R5-NEXT: lbu $1, 19($sp)
-; MIPS64R5-NEXT: insert.h $w0[3], $1
-; MIPS64R5-NEXT: lbu $1, 20($sp)
-; MIPS64R5-NEXT: insert.h $w0[4], $1
-; MIPS64R5-NEXT: lbu $1, 25($sp)
-; MIPS64R5-NEXT: lbu $2, 24($sp)
-; MIPS64R5-NEXT: insert.h $w1[0], $2
-; MIPS64R5-NEXT: insert.h $w1[1], $1
-; MIPS64R5-NEXT: lbu $1, 21($sp)
-; MIPS64R5-NEXT: lbu $2, 26($sp)
-; MIPS64R5-NEXT: insert.h $w1[2], $2
-; MIPS64R5-NEXT: insert.h $w0[5], $1
-; MIPS64R5-NEXT: lbu $1, 27($sp)
-; MIPS64R5-NEXT: lbu $2, 23($sp)
-; MIPS64R5-NEXT: lbu $3, 22($sp)
-; MIPS64R5-NEXT: lbu $4, 31($sp)
-; MIPS64R5-NEXT: insert.h $w0[6], $3
-; MIPS64R5-NEXT: insert.h $w0[7], $2
-; MIPS64R5-NEXT: insert.h $w1[3], $1
-; MIPS64R5-NEXT: lbu $1, 28($sp)
-; MIPS64R5-NEXT: insert.h $w1[4], $1
-; MIPS64R5-NEXT: lbu $1, 29($sp)
-; MIPS64R5-NEXT: insert.h $w1[5], $1
-; MIPS64R5-NEXT: lbu $1, 30($sp)
-; MIPS64R5-NEXT: insert.h $w1[6], $1
-; MIPS64R5-NEXT: insert.h $w1[7], $4
-; MIPS64R5-NEXT: addv.h $w0, $w1, $w0
-; MIPS64R5-NEXT: copy_s.h $1, $w0[0]
-; MIPS64R5-NEXT: copy_s.h $2, $w0[1]
-; MIPS64R5-NEXT: copy_s.h $3, $w0[2]
-; MIPS64R5-NEXT: copy_s.h $4, $w0[3]
-; MIPS64R5-NEXT: copy_s.h $5, $w0[4]
-; MIPS64R5-NEXT: copy_s.h $6, $w0[5]
-; MIPS64R5-NEXT: copy_s.h $7, $w0[6]
-; MIPS64R5-NEXT: copy_s.h $8, $w0[7]
-; MIPS64R5-NEXT: sb $8, 15($sp)
-; MIPS64R5-NEXT: sb $7, 14($sp)
-; MIPS64R5-NEXT: sb $6, 13($sp)
-; MIPS64R5-NEXT: sb $5, 12($sp)
-; MIPS64R5-NEXT: sb $4, 11($sp)
-; MIPS64R5-NEXT: sb $3, 10($sp)
-; MIPS64R5-NEXT: sb $2, 9($sp)
-; MIPS64R5-NEXT: sb $1, 8($sp)
-; MIPS64R5-NEXT: ld $2, 8($sp)
-; MIPS64R5-NEXT: daddiu $sp, $sp, 32
-; MIPS64R5-NEXT: jr $ra
-; MIPS64R5-NEXT: nop
+; MIPS64R5EB-LABEL: i8_8:
+; MIPS64R5EB: # %bb.0:
+; MIPS64R5EB-NEXT: daddiu $sp, $sp, -32
+; MIPS64R5EB-NEXT: .cfi_def_cfa_offset 32
+; MIPS64R5EB-NEXT: sd $5, 16($sp)
+; MIPS64R5EB-NEXT: sd $4, 0($sp)
+; MIPS64R5EB-NEXT: ld.b $w0, 16($sp)
+; MIPS64R5EB-NEXT: ld.b $w1, 0($sp)
+; MIPS64R5EB-NEXT: addv.b $w0, $w1, $w0
+; MIPS64R5EB-NEXT: shf.b $w0, $w0, 27
+; MIPS64R5EB-NEXT: shf.w $w0, $w0, 177
+; MIPS64R5EB-NEXT: copy_s.d $2, $w0[0]
+; MIPS64R5EB-NEXT: daddiu $sp, $sp, 32
+; MIPS64R5EB-NEXT: jr $ra
+; MIPS64R5EB-NEXT: nop
+;
+; MIPS64R5EL-LABEL: i8_8:
+; MIPS64R5EL: # %bb.0:
+; MIPS64R5EL-NEXT: daddiu $sp, $sp, -32
+; MIPS64R5EL-NEXT: .cfi_def_cfa_offset 32
+; MIPS64R5EL-NEXT: sd $5, 16($sp)
+; MIPS64R5EL-NEXT: sd $4, 0($sp)
+; MIPS64R5EL-NEXT: ld.b $w0, 16($sp)
+; MIPS64R5EL-NEXT: ld.b $w1, 0($sp)
+; MIPS64R5EL-NEXT: addv.b $w0, $w1, $w0
+; MIPS64R5EL-NEXT: copy_s.d $2, $w0[0]
+; MIPS64R5EL-NEXT: daddiu $sp, $sp, 32
+; MIPS64R5EL-NEXT: jr $ra
+; MIPS64R5EL-NEXT: nop
;
; MIPS32R5EL-LABEL: i8_8:
; MIPS32R5EL: # %bb.0:
@@ -843,66 +691,15 @@ define <8 x i8> @i8_8(<8 x i8> %a, <8 x i8> %b) {
; MIPS32R5EL-NEXT: .cfi_def_cfa_register 30
; MIPS32R5EL-NEXT: addiu $1, $zero, -16
; MIPS32R5EL-NEXT: and $sp, $sp, $1
-; MIPS32R5EL-NEXT: sw $6, 24($sp)
-; MIPS32R5EL-NEXT: lbu $1, 25($sp)
-; MIPS32R5EL-NEXT: lbu $2, 24($sp)
-; MIPS32R5EL-NEXT: sw $7, 28($sp)
-; MIPS32R5EL-NEXT: insert.h $w0[0], $2
-; MIPS32R5EL-NEXT: insert.h $w0[1], $1
-; MIPS32R5EL-NEXT: lbu $1, 26($sp)
-; MIPS32R5EL-NEXT: sw $4, 32($sp)
-; MIPS32R5EL-NEXT: insert.h $w0[2], $1
-; MIPS32R5EL-NEXT: lbu $1, 27($sp)
-; MIPS32R5EL-NEXT: insert.h $w0[3], $1
-; MIPS32R5EL-NEXT: lbu $1, 28($sp)
-; MIPS32R5EL-NEXT: sw $5, 36($sp)
-; MIPS32R5EL-NEXT: insert.h $w0[4], $1
-; MIPS32R5EL-NEXT: lbu $1, 33($sp)
-; MIPS32R5EL-NEXT: lbu $2, 32($sp)
-; MIPS32R5EL-NEXT: insert.h $w1[0], $2
-; MIPS32R5EL-NEXT: insert.h $w1[1], $1
-; MIPS32R5EL-NEXT: lbu $1, 29($sp)
-; MIPS32R5EL-NEXT: lbu $2, 34($sp)
-; MIPS32R5EL-NEXT: insert.h $w1[2], $2
-; MIPS32R5EL-NEXT: insert.h $w0[5], $1
-; MIPS32R5EL-NEXT: lbu $1, 35($sp)
-; MIPS32R5EL-NEXT: lbu $2, 31($sp)
-; MIPS32R5EL-NEXT: lbu $3, 30($sp)
-; MIPS32R5EL-NEXT: lbu $4, 39($sp)
-; MIPS32R5EL-NEXT: insert.h $w0[6], $3
-; MIPS32R5EL-NEXT: insert.h $w0[7], $2
-; MIPS32R5EL-NEXT: insert.h $w1[3], $1
-; MIPS32R5EL-NEXT: lbu $1, 36($sp)
-; MIPS32R5EL-NEXT: insert.h $w1[4], $1
-; MIPS32R5EL-NEXT: lbu $1, 37($sp)
-; MIPS32R5EL-NEXT: insert.h $w1[5], $1
-; MIPS32R5EL-NEXT: lbu $1, 38($sp)
-; MIPS32R5EL-NEXT: insert.h $w1[6], $1
-; MIPS32R5EL-NEXT: insert.h $w1[7], $4
-; MIPS32R5EL-NEXT: addv.h $w0, $w1, $w0
-; MIPS32R5EL-NEXT: copy_s.h $1, $w0[0]
-; MIPS32R5EL-NEXT: copy_s.h $2, $w0[1]
-; MIPS32R5EL-NEXT: copy_s.h $3, $w0[2]
-; MIPS32R5EL-NEXT: copy_s.h $4, $w0[3]
-; MIPS32R5EL-NEXT: copy_s.h $5, $w0[4]
-; MIPS32R5EL-NEXT: copy_s.h $6, $w0[5]
-; MIPS32R5EL-NEXT: copy_s.h $7, $w0[6]
-; MIPS32R5EL-NEXT: copy_s.h $8, $w0[7]
-; MIPS32R5EL-NEXT: sb $8, 23($sp)
-; MIPS32R5EL-NEXT: sb $7, 22($sp)
-; MIPS32R5EL-NEXT: sb $6, 21($sp)
-; MIPS32R5EL-NEXT: sb $5, 20($sp)
-; MIPS32R5EL-NEXT: sb $4, 19($sp)
-; MIPS32R5EL-NEXT: sb $3, 18($sp)
-; MIPS32R5EL-NEXT: sb $2, 17($sp)
-; MIPS32R5EL-NEXT: sb $1, 16($sp)
-; MIPS32R5EL-NEXT: lw $1, 20($sp)
-; MIPS32R5EL-NEXT: sw $1, 8($sp)
-; MIPS32R5EL-NEXT: lw $1, 16($sp)
-; MIPS32R5EL-NEXT: sw $1, 0($sp)
-; MIPS32R5EL-NEXT: ld.w $w0, 0($sp)
+; MIPS32R5EL-NEXT: sw $7, 20($sp)
+; MIPS32R5EL-NEXT: sw $6, 16($sp)
+; MIPS32R5EL-NEXT: sw $5, 4($sp)
+; MIPS32R5EL-NEXT: sw $4, 0($sp)
+; MIPS32R5EL-NEXT: ld.b $w0, 16($sp)
+; MIPS32R5EL-NEXT: ld.b $w1, 0($sp)
+; MIPS32R5EL-NEXT: addv.b $w0, $w1, $w0
; MIPS32R5EL-NEXT: copy_s.w $2, $w0[0]
-; MIPS32R5EL-NEXT: copy_s.w $3, $w0[2]
+; MIPS32R5EL-NEXT: copy_s.w $3, $w0[1]
; MIPS32R5EL-NEXT: move $sp, $fp
; MIPS32R5EL-NEXT: lw $fp, 40($sp) # 4-byte Folded Reload
; MIPS32R5EL-NEXT: lw $ra, 44($sp) # 4-byte Folded Reload
@@ -1221,102 +1018,86 @@ define <2 x i16> @i16_2(<2 x i16> %a, <2 x i16> %b) {
;
; MIPS32R5EB-LABEL: i16_2:
; MIPS32R5EB: # %bb.0:
-; MIPS32R5EB-NEXT: addiu $sp, $sp, -64
-; MIPS32R5EB-NEXT: .cfi_def_cfa_offset 64
-; MIPS32R5EB-NEXT: sw $ra, 60($sp) # 4-byte Folded Spill
-; MIPS32R5EB-NEXT: sw $fp, 56($sp) # 4-byte Folded Spill
+; MIPS32R5EB-NEXT: addiu $sp, $sp, -48
+; MIPS32R5EB-NEXT: .cfi_def_cfa_offset 48
+; MIPS32R5EB-NEXT: sw $ra, 44($sp) # 4-byte Folded Spill
+; MIPS32R5EB-NEXT: sw $fp, 40($sp) # 4-byte Folded Spill
; MIPS32R5EB-NEXT: .cfi_offset 31, -4
; MIPS32R5EB-NEXT: .cfi_offset 30, -8
; MIPS32R5EB-NEXT: move $fp, $sp
; MIPS32R5EB-NEXT: .cfi_def_cfa_register 30
; MIPS32R5EB-NEXT: addiu $1, $zero, -16
; MIPS32R5EB-NEXT: and $sp, $sp, $1
-; MIPS32R5EB-NEXT: sw $5, 48($sp)
-; MIPS32R5EB-NEXT: sw $4, 52($sp)
-; MIPS32R5EB-NEXT: lhu $1, 50($sp)
-; MIPS32R5EB-NEXT: sw $1, 28($sp)
-; MIPS32R5EB-NEXT: lhu $1, 48($sp)
-; MIPS32R5EB-NEXT: sw $1, 20($sp)
-; MIPS32R5EB-NEXT: lhu $1, 54($sp)
-; MIPS32R5EB-NEXT: sw $1, 12($sp)
-; MIPS32R5EB-NEXT: lhu $1, 52($sp)
-; MIPS32R5EB-NEXT: sw $1, 4($sp)
-; MIPS32R5EB-NEXT: ld.d $w0, 16($sp)
-; MIPS32R5EB-NEXT: ld.d $w1, 0($sp)
-; MIPS32R5EB-NEXT: addv.d $w0, $w1, $w0
-; MIPS32R5EB-NEXT: shf.w $w0, $w0, 177
-; MIPS32R5EB-NEXT: copy_s.w $1, $w0[1]
-; MIPS32R5EB-NEXT: copy_s.w $2, $w0[3]
-; MIPS32R5EB-NEXT: sh $2, 46($sp)
-; MIPS32R5EB-NEXT: sh $1, 44($sp)
-; MIPS32R5EB-NEXT: lw $2, 44($sp)
+; MIPS32R5EB-NEXT: sw $5, 16($sp)
+; MIPS32R5EB-NEXT: sw $4, 0($sp)
+; MIPS32R5EB-NEXT: ld.h $w0, 16($sp)
+; MIPS32R5EB-NEXT: ld.h $w1, 0($sp)
+; MIPS32R5EB-NEXT: addv.h $w0, $w1, $w0
+; MIPS32R5EB-NEXT: shf.h $w0, $w0, 177
+; MIPS32R5EB-NEXT: copy_s.w $2, $w0[0]
; MIPS32R5EB-NEXT: move $sp, $fp
-; MIPS32R5EB-NEXT: lw $fp, 56($sp) # 4-byte Folded Reload
-; MIPS32R5EB-NEXT: lw $ra, 60($sp) # 4-byte Folded Reload
-; MIPS32R5EB-NEXT: addiu $sp, $sp, 64
+; MIPS32R5EB-NEXT: lw $fp, 40($sp) # 4-byte Folded Reload
+; MIPS32R5EB-NEXT: lw $ra, 44($sp) # 4-byte Folded Reload
+; MIPS32R5EB-NEXT: addiu $sp, $sp, 48
; MIPS32R5EB-NEXT: jr $ra
; MIPS32R5EB-NEXT: nop
;
-; MIPS64R5-LABEL: i16_2:
-; MIPS64R5: # %bb.0:
-; MIPS64R5-NEXT: daddiu $sp, $sp, -16
-; MIPS64R5-NEXT: .cfi_def_cfa_offset 16
-; MIPS64R5-NEXT: sw $5, 8($sp)
-; MIPS64R5-NEXT: sw $4, 12($sp)
-; MIPS64R5-NEXT: lh $1, 10($sp)
-; MIPS64R5-NEXT: lh $2, 8($sp)
-; MIPS64R5-NEXT: insert.d $w0[0], $2
-; MIPS64R5-NEXT: insert.d $w0[1], $1
-; MIPS64R5-NEXT: lh $1, 14($sp)
-; MIPS64R5-NEXT: lh $2, 12($sp)
-; MIPS64R5-NEXT: insert.d $w1[0], $2
-; MIPS64R5-NEXT: insert.d $w1[1], $1
-; MIPS64R5-NEXT: addv.d $w0, $w1, $w0
-; MIPS64R5-NEXT: copy_s.d $1, $w0[0]
-; MIPS64R5-NEXT: copy_s.d $2, $w0[1]
-; MIPS64R5-NEXT: sh $2, 6($sp)
-; MIPS64R5-NEXT: sh $1, 4($sp)
-; MIPS64R5-NEXT: lw $2, 4($sp)
-; MIPS64R5-NEXT: daddiu $sp, $sp, 16
-; MIPS64R5-NEXT: jr $ra
-; MIPS64R5-NEXT: nop
-;
; MIPS32R5EL-LABEL: i16_2:
; MIPS32R5EL: # %bb.0:
-; MIPS32R5EL-NEXT: addiu $sp, $sp, -64
-; MIPS32R5EL-NEXT: .cfi_def_cfa_offset 64
-; MIPS32R5EL-NEXT: sw $ra, 60($sp) # 4-byte Folded Spill
-; MIPS32R5EL-NEXT: sw $fp, 56($sp) # 4-byte Folded Spill
+; MIPS32R5EL-NEXT: addiu $sp, $sp, -48
+; MIPS32R5EL-NEXT: .cfi_def_cfa_offset 48
+; MIPS32R5EL-NEXT: sw $ra, 44($sp) # 4-byte Folded Spill
+; MIPS32R5EL-NEXT: sw $fp, 40($sp) # 4-byte Folded Spill
; MIPS32R5EL-NEXT: .cfi_offset 31, -4
; MIPS32R5EL-NEXT: .cfi_offset 30, -8
; MIPS32R5EL-NEXT: move $fp, $sp
; MIPS32R5EL-NEXT: .cfi_def_cfa_register 30
; MIPS32R5EL-NEXT: addiu $1, $zero, -16
; MIPS32R5EL-NEXT: and $sp, $sp, $1
-; MIPS32R5EL-NEXT: sw $5, 48($sp)
-; MIPS32R5EL-NEXT: sw $4, 52($sp)
-; MIPS32R5EL-NEXT: lhu $1, 50($sp)
-; MIPS32R5EL-NEXT: sw $1, 24($sp)
-; MIPS32R5EL-NEXT: lhu $1, 48($sp)
-; MIPS32R5EL-NEXT: sw $1, 16($sp)
-; MIPS32R5EL-NEXT: lhu $1, 54($sp)
-; MIPS32R5EL-NEXT: sw $1, 8($sp)
-; MIPS32R5EL-NEXT: lhu $1, 52($sp)
-; MIPS32R5EL-NEXT: sw $1, 0($sp)
-; MIPS32R5EL-NEXT: ld.d $w0, 16($sp)
-; MIPS32R5EL-NEXT: ld.d $w1, 0($sp)
-; MIPS32R5EL-NEXT: addv.d $w0, $w1, $w0
-; MIPS32R5EL-NEXT: copy_s.w $1, $w0[0]
-; MIPS32R5EL-NEXT: copy_s.w $2, $w0[2]
-; MIPS32R5EL-NEXT: sh $2, 46($sp)
-; MIPS32R5EL-NEXT: sh $1, 44($sp)
-; MIPS32R5EL-NEXT: lw $2, 44($sp)
+; MIPS32R5EL-NEXT: sw $5, 16($sp)
+; MIPS32R5EL-NEXT: sw $4, 0($sp)
+; MIPS32R5EL-NEXT: ld.h $w0, 16($sp)
+; MIPS32R5EL-NEXT: ld.h $w1, 0($sp)
+; MIPS32R5EL-NEXT: addv.h $w0, $w1, $w0
+; MIPS32R5EL-NEXT: copy_s.w $2, $w0[0]
; MIPS32R5EL-NEXT: move $sp, $fp
-; MIPS32R5EL-NEXT: lw $fp, 56($sp) # 4-byte Folded Reload
-; MIPS32R5EL-NEXT: lw $ra, 60($sp) # 4-byte Folded Reload
-; MIPS32R5EL-NEXT: addiu $sp, $sp, 64
+; MIPS32R5EL-NEXT: lw $fp, 40($sp) # 4-byte Folded Reload
+; MIPS32R5EL-NEXT: lw $ra, 44($sp) # 4-byte Folded Reload
+; MIPS32R5EL-NEXT: addiu $sp, $sp, 48
; MIPS32R5EL-NEXT: jr $ra
; MIPS32R5EL-NEXT: nop
+;
+; MIPS64R5EB-LABEL: i16_2:
+; MIPS64R5EB: # %bb.0:
+; MIPS64R5EB-NEXT: daddiu $sp, $sp, -32
+; MIPS64R5EB-NEXT: .cfi_def_cfa_offset 32
+; MIPS64R5EB-NEXT: sll $1, $5, 0
+; MIPS64R5EB-NEXT: sw $1, 16($sp)
+; MIPS64R5EB-NEXT: sll $1, $4, 0
+; MIPS64R5EB-NEXT: sw $1, 0($sp)
+; MIPS64R5EB-NEXT: ld.h $w0, 16($sp)
+; MIPS64R5EB-NEXT: ld.h $w1, 0($sp)
+; MIPS64R5EB-NEXT: addv.h $w0, $w1, $w0
+; MIPS64R5EB-NEXT: shf.h $w0, $w0, 177
+; MIPS64R5EB-NEXT: copy_s.w $2, $w0[0]
+; MIPS64R5EB-NEXT: daddiu $sp, $sp, 32
+; MIPS64R5EB-NEXT: jr $ra
+; MIPS64R5EB-NEXT: nop
+;
+; MIPS64R5EL-LABEL: i16_2:
+; MIPS64R5EL: # %bb.0:
+; MIPS64R5EL-NEXT: daddiu $sp, $sp, -32
+; MIPS64R5EL-NEXT: .cfi_def_cfa_offset 32
+; MIPS64R5EL-NEXT: sll $1, $5, 0
+; MIPS64R5EL-NEXT: sw $1, 16($sp)
+; MIPS64R5EL-NEXT: sll $1, $4, 0
+; MIPS64R5EL-NEXT: sw $1, 0($sp)
+; MIPS64R5EL-NEXT: ld.h $w0, 16($sp)
+; MIPS64R5EL-NEXT: ld.h $w1, 0($sp)
+; MIPS64R5EL-NEXT: addv.h $w0, $w1, $w0
+; MIPS64R5EL-NEXT: copy_s.w $2, $w0[0]
+; MIPS64R5EL-NEXT: daddiu $sp, $sp, 32
+; MIPS64R5EL-NEXT: jr $ra
%1 = add <2 x i16> %a, %b
ret <2 x i16> %1
}
@@ -1384,84 +1165,50 @@ define <4 x i16> @i16_4(<4 x i16> %a, <4 x i16> %b) {
; MIPS32R5EB-NEXT: .cfi_def_cfa_register 30
; MIPS32R5EB-NEXT: addiu $1, $zero, -16
; MIPS32R5EB-NEXT: and $sp, $sp, $1
-; MIPS32R5EB-NEXT: sw $6, 24($sp)
-; MIPS32R5EB-NEXT: sw $7, 28($sp)
-; MIPS32R5EB-NEXT: lhu $1, 26($sp)
-; MIPS32R5EB-NEXT: lhu $2, 24($sp)
-; MIPS32R5EB-NEXT: sw $4, 32($sp)
-; MIPS32R5EB-NEXT: insert.w $w0[0], $2
-; MIPS32R5EB-NEXT: insert.w $w0[1], $1
-; MIPS32R5EB-NEXT: lhu $1, 28($sp)
-; MIPS32R5EB-NEXT: sw $5, 36($sp)
-; MIPS32R5EB-NEXT: insert.w $w0[2], $1
-; MIPS32R5EB-NEXT: lhu $1, 30($sp)
-; MIPS32R5EB-NEXT: insert.w $w0[3], $1
-; MIPS32R5EB-NEXT: lhu $1, 34($sp)
-; MIPS32R5EB-NEXT: lhu $2, 32($sp)
-; MIPS32R5EB-NEXT: insert.w $w1[0], $2
-; MIPS32R5EB-NEXT: insert.w $w1[1], $1
-; MIPS32R5EB-NEXT: lhu $1, 36($sp)
-; MIPS32R5EB-NEXT: insert.w $w1[2], $1
-; MIPS32R5EB-NEXT: lhu $1, 38($sp)
-; MIPS32R5EB-NEXT: insert.w $w1[3], $1
-; MIPS32R5EB-NEXT: addv.w $w0, $w1, $w0
-; MIPS32R5EB-NEXT: copy_s.w $1, $w0[0]
-; MIPS32R5EB-NEXT: copy_s.w $2, $w0[1]
-; MIPS32R5EB-NEXT: copy_s.w $3, $w0[2]
-; MIPS32R5EB-NEXT: copy_s.w $4, $w0[3]
-; MIPS32R5EB-NEXT: sh $4, 22($sp)
-; MIPS32R5EB-NEXT: sh $3, 20($sp)
-; MIPS32R5EB-NEXT: sh $2, 18($sp)
-; MIPS32R5EB-NEXT: sh $1, 16($sp)
-; MIPS32R5EB-NEXT: lw $1, 20($sp)
-; MIPS32R5EB-NEXT: sw $1, 12($sp)
-; MIPS32R5EB-NEXT: lw $1, 16($sp)
-; MIPS32R5EB-NEXT: sw $1, 4($sp)
-; MIPS32R5EB-NEXT: ld.w $w0, 0($sp)
-; MIPS32R5EB-NEXT: copy_s.w $2, $w0[1]
-; MIPS32R5EB-NEXT: copy_s.w $3, $w0[3]
+; MIPS32R5EB-NEXT: sw $7, 20($sp)
+; MIPS32R5EB-NEXT: sw $6, 16($sp)
+; MIPS32R5EB-NEXT: sw $5, 4($sp)
+; MIPS32R5EB-NEXT: sw $4, 0($sp)
+; MIPS32R5EB-NEXT: ld.h $w0, 16($sp)
+; MIPS32R5EB-NEXT: ld.h $w1, 0($sp)
+; MIPS32R5EB-NEXT: addv.h $w0, $w1, $w0
+; MIPS32R5EB-NEXT: shf.h $w0, $w0, 177
+; MIPS32R5EB-NEXT: copy_s.w $2, $w0[0]
+; MIPS32R5EB-NEXT: copy_s.w $3, $w0[1]
; MIPS32R5EB-NEXT: move $sp, $fp
; MIPS32R5EB-NEXT: lw $fp, 40($sp) # 4-byte Folded Reload
; MIPS32R5EB-NEXT: lw $ra, 44($sp) # 4-byte Folded Reload
; MIPS32R5EB-NEXT: addiu $sp, $sp, 48
; MIPS32R5EB-NEXT: jr $ra
-; MIPS32R5EB-NEXT: nop
;
-; MIPS64R5-LABEL: i16_4:
-; MIPS64R5: # %bb.0:
-; MIPS64R5-NEXT: daddiu $sp, $sp, -32
-; MIPS64R5-NEXT: .cfi_def_cfa_offset 32
-; MIPS64R5-NEXT: sd $5, 16($sp)
-; MIPS64R5-NEXT: sd $4, 24($sp)
-; MIPS64R5-NEXT: lhu $1, 18($sp)
-; MIPS64R5-NEXT: lhu $2, 16($sp)
-; MIPS64R5-NEXT: insert.w $w0[0], $2
-; MIPS64R5-NEXT: insert.w $w0[1], $1
-; MIPS64R5-NEXT: lhu $1, 20($sp)
-; MIPS64R5-NEXT: insert.w $w0[2], $1
-; MIPS64R5-NEXT: lhu $1, 22($sp)
-; MIPS64R5-NEXT: insert.w $w0[3], $1
-; MIPS64R5-NEXT: lhu $1, 26($sp)
-; MIPS64R5-NEXT: lhu $2, 24($sp)
-; MIPS64R5-NEXT: insert.w $w1[0], $2
-; MIPS64R5-NEXT: insert.w $w1[1], $1
-; MIPS64R5-NEXT: lhu $1, 28($sp)
-; MIPS64R5-NEXT: insert.w $w1[2], $1
-; MIPS64R5-NEXT: lhu $1, 30($sp)
-; MIPS64R5-NEXT: insert.w $w1[3], $1
-; MIPS64R5-NEXT: addv.w $w0, $w1, $w0
-; MIPS64R5-NEXT: copy_s.w $1, $w0[0]
-; MIPS64R5-NEXT: copy_s.w $2, $w0[1]
-; MIPS64R5-NEXT: copy_s.w $3, $w0[2]
-; MIPS64R5-NEXT: copy_s.w $4, $w0[3]
-; MIPS64R5-NEXT: sh $4, 14($sp)
-; MIPS64R5-NEXT: sh $3, 12($sp)
-; MIPS64R5-NEXT: sh $2, 10($sp)
-; MIPS64R5-NEXT: sh $1, 8($sp)
-; MIPS64R5-NEXT: ld $2, 8($sp)
-; MIPS64R5-NEXT: daddiu $sp, $sp, 32
-; MIPS64R5-NEXT: jr $ra
-; MIPS64R5-NEXT: nop
+; MIPS64R5EB-LABEL: i16_4:
+; MIPS64R5EB: # %bb.0:
+; MIPS64R5EB-NEXT: daddiu $sp, $sp, -32
+; MIPS64R5EB-NEXT: .cfi_def_cfa_offset 32
+; MIPS64R5EB-NEXT: sd $5, 16($sp)
+; MIPS64R5EB-NEXT: sd $4, 0($sp)
+; MIPS64R5EB-NEXT: ld.h $w0, 16($sp)
+; MIPS64R5EB-NEXT: ld.h $w1, 0($sp)
+; MIPS64R5EB-NEXT: addv.h $w0, $w1, $w0
+; MIPS64R5EB-NEXT: shf.h $w0, $w0, 27
+; MIPS64R5EB-NEXT: copy_s.d $2, $w0[0]
+; MIPS64R5EB-NEXT: daddiu $sp, $sp, 32
+; MIPS64R5EB-NEXT: jr $ra
+; MIPS64R5EB-NEXT: nop
+;
+; MIPS64R5EL-LABEL: i16_4:
+; MIPS64R5EL: # %bb.0:
+; MIPS64R5EL-NEXT: daddiu $sp, $sp, -32
+; MIPS64R5EL-NEXT: .cfi_def_cfa_offset 32
+; MIPS64R5EL-NEXT: sd $5, 16($sp)
+; MIPS64R5EL-NEXT: sd $4, 0($sp)
+; MIPS64R5EL-NEXT: ld.h $w0, 16($sp)
+; MIPS64R5EL-NEXT: ld.h $w1, 0($sp)
+; MIPS64R5EL-NEXT: addv.h $w0, $w1, $w0
+; MIPS64R5EL-NEXT: copy_s.d $2, $w0[0]
+; MIPS64R5EL-NEXT: daddiu $sp, $sp, 32
+; MIPS64R5EL-NEXT: jr $ra
+; MIPS64R5EL-NEXT: nop
;
; MIPS32R5EL-LABEL: i16_4:
; MIPS32R5EL: # %bb.0:
@@ -1475,42 +1222,15 @@ define <4 x i16> @i16_4(<4 x i16> %a, <4 x i16> %b) {
; MIPS32R5EL-NEXT: .cfi_def_cfa_register 30
; MIPS32R5EL-NEXT: addiu $1, $zero, -16
; MIPS32R5EL-NEXT: and $sp, $sp, $1
-; MIPS32R5EL-NEXT: sw $6, 24($sp)
-; MIPS32R5EL-NEXT: sw $7, 28($sp)
-; MIPS32R5EL-NEXT: lhu $1, 26($sp)
-; MIPS32R5EL-NEXT: lhu $2, 24($sp)
-; MIPS32R5EL-NEXT: sw $4, 32($sp)
-; MIPS32R5EL-NEXT: insert.w $w0[0], $2
-; MIPS32R5EL-NEXT: insert.w $w0[1], $1
-; MIPS32R5EL-NEXT: lhu $1, 28($sp)
-; MIPS32R5EL-NEXT: sw $5, 36($sp)
-; MIPS32R5EL-NEXT: insert.w $w0[2], $1
-; MIPS32R5EL-NEXT: lhu $1, 30($sp)
-; MIPS32R5EL-NEXT: insert.w $w0[3], $1
-; MIPS32R5EL-NEXT: lhu $1, 34($sp)
-; MIPS32R5EL-NEXT: lhu $2, 32($sp)
-; MIPS32R5EL-NEXT: insert.w $w1[0], $2
-; MIPS32R5EL-NEXT: insert.w $w1[1], $1
-; MIPS32R5EL-NEXT: lhu $1, 36($sp)
-; MIPS32R5EL-NEXT: insert.w $w1[2], $1
-; MIPS32R5EL-NEXT: lhu $1, 38($sp)
-; MIPS32R5EL-NEXT: insert.w $w1[3], $1
-; MIPS32R5EL-NEXT: addv.w $w0, $w1, $w0
-; MIPS32R5EL-NEXT: copy_s.w $1, $w0[0]
-; MIPS32R5EL-NEXT: copy_s.w $2, $w0[1]
-; MIPS32R5EL-NEXT: copy_s.w $3, $w0[2]
-; MIPS32R5EL-NEXT: copy_s.w $4, $w0[3]
-; MIPS32R5EL-NEXT: sh $4, 22($sp)
-; MIPS32R5EL-NEXT: sh $3, 20($sp)
-; MIPS32R5EL-NEXT: sh $2, 18($sp)
-; MIPS32R5EL-NEXT: sh $1, 16($sp)
-; MIPS32R5EL-NEXT: lw $1, 20($sp)
-; MIPS32R5EL-NEXT: sw $1, 8($sp)
-; MIPS32R5EL-NEXT: lw $1, 16($sp)
-; MIPS32R5EL-NEXT: sw $1, 0($sp)
-; MIPS32R5EL-NEXT: ld.w $w0, 0($sp)
+; MIPS32R5EL-NEXT: sw $7, 20($sp)
+; MIPS32R5EL-NEXT: sw $6, 16($sp)
+; MIPS32R5EL-NEXT: sw $5, 4($sp)
+; MIPS32R5EL-NEXT: sw $4, 0($sp)
+; MIPS32R5EL-NEXT: ld.h $w0, 16($sp)
+; MIPS32R5EL-NEXT: ld.h $w1, 0($sp)
+; MIPS32R5EL-NEXT: addv.h $w0, $w1, $w0
; MIPS32R5EL-NEXT: copy_s.w $2, $w0[0]
-; MIPS32R5EL-NEXT: copy_s.w $3, $w0[2]
+; MIPS32R5EL-NEXT: copy_s.w $3, $w0[1]
; MIPS32R5EL-NEXT: move $sp, $fp
; MIPS32R5EL-NEXT: lw $fp, 40($sp) # 4-byte Folded Reload
; MIPS32R5EL-NEXT: lw $ra, 44($sp) # 4-byte Folded Reload
@@ -1730,16 +1450,15 @@ define <2 x i32> @i32_2(<2 x i32> %a, <2 x i32> %b) {
; MIPS32R5EB-NEXT: .cfi_def_cfa_register 30
; MIPS32R5EB-NEXT: addiu $1, $zero, -16
; MIPS32R5EB-NEXT: and $sp, $sp, $1
-; MIPS32R5EB-NEXT: sw $7, 28($sp)
-; MIPS32R5EB-NEXT: sw $6, 20($sp)
-; MIPS32R5EB-NEXT: sw $5, 12($sp)
-; MIPS32R5EB-NEXT: sw $4, 4($sp)
-; MIPS32R5EB-NEXT: ld.d $w0, 16($sp)
-; MIPS32R5EB-NEXT: ld.d $w1, 0($sp)
-; MIPS32R5EB-NEXT: addv.d $w0, $w1, $w0
-; MIPS32R5EB-NEXT: shf.w $w0, $w0, 177
-; MIPS32R5EB-NEXT: copy_s.w $2, $w0[1]
-; MIPS32R5EB-NEXT: copy_s.w $3, $w0[3]
+; MIPS32R5EB-NEXT: sw $7, 20($sp)
+; MIPS32R5EB-NEXT: sw $6, 16($sp)
+; MIPS32R5EB-NEXT: sw $5, 4($sp)
+; MIPS32R5EB-NEXT: sw $4, 0($sp)
+; MIPS32R5EB-NEXT: ld.w $w0, 16($sp)
+; MIPS32R5EB-NEXT: ld.w $w1, 0($sp)
+; MIPS32R5EB-NEXT: addv.w $w0, $w1, $w0
+; MIPS32R5EB-NEXT: copy_s.w $2, $w0[0]
+; MIPS32R5EB-NEXT: copy_s.w $3, $w0[1]
; MIPS32R5EB-NEXT: move $sp, $fp
; MIPS32R5EB-NEXT: lw $fp, 40($sp) # 4-byte Folded Reload
; MIPS32R5EB-NEXT: lw $ra, 44($sp) # 4-byte Folded Reload
@@ -1751,18 +1470,13 @@ define <2 x i32> @i32_2(<2 x i32> %a, <2 x i32> %b) {
; MIPS64R5EB: # %bb.0:
; MIPS64R5EB-NEXT: daddiu $sp, $sp, -32
; MIPS64R5EB-NEXT: .cfi_def_cfa_offset 32
-; MIPS64R5EB-NEXT: dsrl $1, $5, 32
-; MIPS64R5EB-NEXT: insert.d $w0[0], $1
-; MIPS64R5EB-NEXT: insert.d $w0[1], $5
-; MIPS64R5EB-NEXT: dsrl $1, $4, 32
-; MIPS64R5EB-NEXT: insert.d $w1[0], $1
-; MIPS64R5EB-NEXT: insert.d $w1[1], $4
-; MIPS64R5EB-NEXT: addv.d $w0, $w1, $w0
-; MIPS64R5EB-NEXT: copy_s.d $1, $w0[0]
-; MIPS64R5EB-NEXT: copy_s.d $2, $w0[1]
-; MIPS64R5EB-NEXT: sw $2, 12($sp)
-; MIPS64R5EB-NEXT: sw $1, 8($sp)
-; MIPS64R5EB-NEXT: ld $2, 8($sp)
+; MIPS64R5EB-NEXT: sd $5, 16($sp)
+; MIPS64R5EB-NEXT: sd $4, 0($sp)
+; MIPS64R5EB-NEXT: ld.w $w0, 16($sp)
+; MIPS64R5EB-NEXT: ld.w $w1, 0($sp)
+; MIPS64R5EB-NEXT: addv.w $w0, $w1, $w0
+; MIPS64R5EB-NEXT: shf.w $w0, $w0, 177
+; MIPS64R5EB-NEXT: copy_s.d $2, $w0[0]
; MIPS64R5EB-NEXT: daddiu $sp, $sp, 32
; MIPS64R5EB-NEXT: jr $ra
; MIPS64R5EB-NEXT: nop
@@ -1779,15 +1493,15 @@ define <2 x i32> @i32_2(<2 x i32> %a, <2 x i32> %b) {
; MIPS32R5EL-NEXT: .cfi_def_cfa_register 30
; MIPS32R5EL-NEXT: addiu $1, $zero, -16
; MIPS32R5EL-NEXT: and $sp, $sp, $1
-; MIPS32R5EL-NEXT: sw $7, 24($sp)
+; MIPS32R5EL-NEXT: sw $7, 20($sp)
; MIPS32R5EL-NEXT: sw $6, 16($sp)
-; MIPS32R5EL-NEXT: sw $5, 8($sp)
+; MIPS32R5EL-NEXT: sw $5, 4($sp)
; MIPS32R5EL-NEXT: sw $4, 0($sp)
-; MIPS32R5EL-NEXT: ld.d $w0, 16($sp)
-; MIPS32R5EL-NEXT: ld.d $w1, 0($sp)
-; MIPS32R5EL-NEXT: addv.d $w0, $w1, $w0
+; MIPS32R5EL-NEXT: ld.w $w0, 16($sp)
+; MIPS32R5EL-NEXT: ld.w $w1, 0($sp)
+; MIPS32R5EL-NEXT: addv.w $w0, $w1, $w0
; MIPS32R5EL-NEXT: copy_s.w $2, $w0[0]
-; MIPS32R5EL-NEXT: copy_s.w $3, $w0[2]
+; MIPS32R5EL-NEXT: copy_s.w $3, $w0[1]
; MIPS32R5EL-NEXT: move $sp, $fp
; MIPS32R5EL-NEXT: lw $fp, 40($sp) # 4-byte Folded Reload
; MIPS32R5EL-NEXT: lw $ra, 44($sp) # 4-byte Folded Reload
@@ -1800,19 +1514,11 @@ define <2 x i32> @i32_2(<2 x i32> %a, <2 x i32> %b) {
; MIPS64R5EL-NEXT: daddiu $sp, $sp, -32
; MIPS64R5EL-NEXT: .cfi_def_cfa_offset 32
; MIPS64R5EL-NEXT: sd $5, 16($sp)
-; MIPS64R5EL-NEXT: sd $4, 24($sp)
-; MIPS64R5EL-NEXT: lw $1, 20($sp)
-; MIPS64R5EL-NEXT: insert.d $w0[0], $5
-; MIPS64R5EL-NEXT: insert.d $w0[1], $1
-; MIPS64R5EL-NEXT: lw $1, 28($sp)
-; MIPS64R5EL-NEXT: insert.d $w1[0], $4
-; MIPS64R5EL-NEXT: insert.d $w1[1], $1
-; MIPS64R5EL-NEXT: addv.d $w0, $w1, $w0
-; MIPS64R5EL-NEXT: copy_s.d $1, $w0[0]
-; MIPS64R5EL-NEXT: copy_s.d $2, $w0[1]
-; MIPS64R5EL-NEXT: sw $2, 12($sp)
-; MIPS64R5EL-NEXT: sw $1, 8($sp)
-; MIPS64R5EL-NEXT: ld $2, 8($sp)
+; MIPS64R5EL-NEXT: sd $4, 0($sp)
+; MIPS64R5EL-NEXT: ld.w $w0, 16($sp)
+; MIPS64R5EL-NEXT: ld.w $w1, 0($sp)
+; MIPS64R5EL-NEXT: addv.w $w0, $w1, $w0
+; MIPS64R5EL-NEXT: copy_s.d $2, $w0[0]
; MIPS64R5EL-NEXT: daddiu $sp, $sp, 32
; MIPS64R5EL-NEXT: jr $ra
; MIPS64R5EL-NEXT: nop
@@ -2561,31 +2267,11 @@ define <8 x i8> @ret_8_i8() {
;
; MIPS32R5EB-LABEL: ret_8_i8:
; MIPS32R5EB: # %bb.0:
-; MIPS32R5EB-NEXT: addiu $sp, $sp, -32
-; MIPS32R5EB-NEXT: .cfi_def_cfa_offset 32
-; MIPS32R5EB-NEXT: sw $ra, 28($sp) # 4-byte Folded Spill
-; MIPS32R5EB-NEXT: sw $fp, 24($sp) # 4-byte Folded Spill
-; MIPS32R5EB-NEXT: .cfi_offset 31, -4
-; MIPS32R5EB-NEXT: .cfi_offset 30, -8
-; MIPS32R5EB-NEXT: move $fp, $sp
-; MIPS32R5EB-NEXT: .cfi_def_cfa_register 30
-; MIPS32R5EB-NEXT: addiu $1, $zero, -16
-; MIPS32R5EB-NEXT: and $sp, $sp, $1
; MIPS32R5EB-NEXT: lui $1, %hi(gv8i8)
; MIPS32R5EB-NEXT: lw $2, %lo(gv8i8)($1)
-; MIPS32R5EB-NEXT: sw $2, 4($sp)
; MIPS32R5EB-NEXT: addiu $1, $1, %lo(gv8i8)
-; MIPS32R5EB-NEXT: lw $1, 4($1)
-; MIPS32R5EB-NEXT: sw $1, 12($sp)
-; MIPS32R5EB-NEXT: ld.w $w0, 0($sp)
-; MIPS32R5EB-NEXT: copy_s.w $2, $w0[1]
-; MIPS32R5EB-NEXT: copy_s.w $3, $w0[3]
-; MIPS32R5EB-NEXT: move $sp, $fp
-; MIPS32R5EB-NEXT: lw $fp, 24($sp) # 4-byte Folded Reload
-; MIPS32R5EB-NEXT: lw $ra, 28($sp) # 4-byte Folded Reload
-; MIPS32R5EB-NEXT: addiu $sp, $sp, 32
+; MIPS32R5EB-NEXT: lw $3, 4($1)
; MIPS32R5EB-NEXT: jr $ra
-; MIPS32R5EB-NEXT: nop
;
; MIPS64R5-LABEL: ret_8_i8:
; MIPS64R5: # %bb.0:
@@ -2599,29 +2285,10 @@ define <8 x i8> @ret_8_i8() {
;
; MIPS32R5EL-LABEL: ret_8_i8:
; MIPS32R5EL: # %bb.0:
-; MIPS32R5EL-NEXT: addiu $sp, $sp, -32
-; MIPS32R5EL-NEXT: .cfi_def_cfa_offset 32
-; MIPS32R5EL-NEXT: sw $ra, 28($sp) # 4-byte Folded Spill
-; MIPS32R5EL-NEXT: sw $fp, 24($sp) # 4-byte Folded Spill
-; MIPS32R5EL-NEXT: .cfi_offset 31, -4
-; MIPS32R5EL-NEXT: .cfi_offset 30, -8
-; MIPS32R5EL-NEXT: move $fp, $sp
-; MIPS32R5EL-NEXT: .cfi_def_cfa_register 30
-; MIPS32R5EL-NEXT: addiu $1, $zero, -16
-; MIPS32R5EL-NEXT: and $sp, $sp, $1
; MIPS32R5EL-NEXT: lui $1, %hi(gv8i8)
; MIPS32R5EL-NEXT: lw $2, %lo(gv8i8)($1)
-; MIPS32R5EL-NEXT: sw $2, 0($sp)
; MIPS32R5EL-NEXT: addiu $1, $1, %lo(gv8i8)
-; MIPS32R5EL-NEXT: lw $1, 4($1)
-; MIPS32R5EL-NEXT: sw $1, 8($sp)
-; MIPS32R5EL-NEXT: ld.w $w0, 0($sp)
-; MIPS32R5EL-NEXT: copy_s.w $2, $w0[0]
-; MIPS32R5EL-NEXT: copy_s.w $3, $w0[2]
-; MIPS32R5EL-NEXT: move $sp, $fp
-; MIPS32R5EL-NEXT: lw $fp, 24($sp) # 4-byte Folded Reload
-; MIPS32R5EL-NEXT: lw $ra, 28($sp) # 4-byte Folded Reload
-; MIPS32R5EL-NEXT: addiu $sp, $sp, 32
+; MIPS32R5EL-NEXT: lw $3, 4($1)
; MIPS32R5EL-NEXT: jr $ra
; MIPS32R5EL-NEXT: nop
%1 = load <8 x i8>, ptr @gv8i8
@@ -2738,29 +2405,10 @@ define <4 x i16> @ret_4_i16() {
;
; MIPS32R5EB-LABEL: ret_4_i16:
; MIPS32R5EB: # %bb.0:
-; MIPS32R5EB-NEXT: addiu $sp, $sp, -32
-; MIPS32R5EB-NEXT: .cfi_def_cfa_offset 32
-; MIPS32R5EB-NEXT: sw $ra, 28($sp) # 4-byte Folded Spill
-; MIPS32R5EB-NEXT: sw $fp, 24($sp) # 4-byte Folded Spill
-; MIPS32R5EB-NEXT: .cfi_offset 31, -4
-; MIPS32R5EB-NEXT: .cfi_offset 30, -8
-; MIPS32R5EB-NEXT: move $fp, $sp
-; MIPS32R5EB-NEXT: .cfi_def_cfa_register 30
-; MIPS32R5EB-NEXT: addiu $1, $zero, -16
-; MIPS32R5EB-NEXT: and $sp, $sp, $1
; MIPS32R5EB-NEXT: lui $1, %hi(gv4i16)
; MIPS32R5EB-NEXT: lw $2, %lo(gv4i16)($1)
-; MIPS32R5EB-NEXT: sw $2, 4($sp)
; MIPS32R5EB-NEXT: addiu $1, $1, %lo(gv4i16)
-; MIPS32R5EB-NEXT: lw $1, 4($1)
-; MIPS32R5EB-NEXT: sw $1, 12($sp)
-; MIPS32R5EB-NEXT: ld.w $w0, 0($sp)
-; MIPS32R5EB-NEXT: copy_s.w $2, $w0[1]
-; MIPS32R5EB-NEXT: copy_s.w $3, $w0[3]
-; MIPS32R5EB-NEXT: move $sp, $fp
-; MIPS32R5EB-NEXT: lw $fp, 24($sp) # 4-byte Folded Reload
-; MIPS32R5EB-NEXT: lw $ra, 28($sp) # 4-byte Folded Reload
-; MIPS32R5EB-NEXT: addiu $sp, $sp, 32
+; MIPS32R5EB-NEXT: lw $3, 4($1)
; MIPS32R5EB-NEXT: jr $ra
; MIPS32R5EB-NEXT: nop
;
@@ -2776,29 +2424,10 @@ define <4 x i16> @ret_4_i16() {
;
; MIPS32R5EL-LABEL: ret_4_i16:
; MIPS32R5EL: # %bb.0:
-; MIPS32R5EL-NEXT: addiu $sp, $sp, -32
-; MIPS32R5EL-NEXT: .cfi_def_cfa_offset 32
-; MIPS32R5EL-NEXT: sw $ra, 28($sp) # 4-byte Folded Spill
-; MIPS32R5EL-NEXT: sw $fp, 24($sp) # 4-byte Folded Spill
-; MIPS32R5EL-NEXT: .cfi_offset 31, -4
-; MIPS32R5EL-NEXT: .cfi_offset 30, -8
-; MIPS32R5EL-NEXT: move $fp, $sp
-; MIPS32R5EL-NEXT: .cfi_def_cfa_register 30
-; MIPS32R5EL-NEXT: addiu $1, $zero, -16
-; MIPS32R5EL-NEXT: and $sp, $sp, $1
; MIPS32R5EL-NEXT: lui $1, %hi(gv4i16)
; MIPS32R5EL-NEXT: lw $2, %lo(gv4i16)($1)
-; MIPS32R5EL-NEXT: sw $2, 0($sp)
; MIPS32R5EL-NEXT: addiu $1, $1, %lo(gv4i16)
-; MIPS32R5EL-NEXT: lw $1, 4($1)
-; MIPS32R5EL-NEXT: sw $1, 8($sp)
-; MIPS32R5EL-NEXT: ld.w $w0, 0($sp)
-; MIPS32R5EL-NEXT: copy_s.w $2, $w0[0]
-; MIPS32R5EL-NEXT: copy_s.w $3, $w0[2]
-; MIPS32R5EL-NEXT: move $sp, $fp
-; MIPS32R5EL-NEXT: lw $fp, 24($sp) # 4-byte Folded Reload
-; MIPS32R5EL-NEXT: lw $ra, 28($sp) # 4-byte Folded Reload
-; MIPS32R5EL-NEXT: addiu $sp, $sp, 32
+; MIPS32R5EL-NEXT: lw $3, 4($1)
; MIPS32R5EL-NEXT: jr $ra
; MIPS32R5EL-NEXT: nop
%1 = load <4 x i16>, ptr @gv4i16
@@ -2877,29 +2506,10 @@ define <2 x i32> @ret_2_i32() {
;
; MIPS32R5EB-LABEL: ret_2_i32:
; MIPS32R5EB: # %bb.0:
-; MIPS32R5EB-NEXT: addiu $sp, $sp, -32
-; MIPS32R5EB-NEXT: .cfi_def_cfa_offset 32
-; MIPS32R5EB-NEXT: sw $ra, 28($sp) # 4-byte Folded Spill
-; MIPS32R5EB-NEXT: sw $fp, 24($sp) # 4-byte Folded Spill
-; MIPS32R5EB-NEXT: .cfi_offset 31, -4
-; MIPS32R5EB-NEXT: .cfi_offset 30, -8
-; MIPS32R5EB-NEXT: move $fp, $sp
-; MIPS32R5EB-NEXT: .cfi_def_cfa_register 30
-; MIPS32R5EB-NEXT: addiu $1, $zero, -16
-; MIPS32R5EB-NEXT: and $sp, $sp, $1
; MIPS32R5EB-NEXT: lui $1, %hi(gv2i32)
; MIPS32R5EB-NEXT: lw $2, %lo(gv2i32)($1)
-; MIPS32R5EB-NEXT: sw $2, 4($sp)
; MIPS32R5EB-NEXT: addiu $1, $1, %lo(gv2i32)
-; MIPS32R5EB-NEXT: lw $1, 4($1)
-; MIPS32R5EB-NEXT: sw $1, 12($sp)
-; MIPS32R5EB-NEXT: ld.w $w0, 0($sp)
-; MIPS32R5EB-NEXT: copy_s.w $2, $w0[1]
-; MIPS32R5EB-NEXT: copy_s.w $3, $w0[3]
-; MIPS32R5EB-NEXT: move $sp, $fp
-; MIPS32R5EB-NEXT: lw $fp, 24($sp) # 4-byte Folded Reload
-; MIPS32R5EB-NEXT: lw $ra, 28($sp) # 4-byte Folded Reload
-; MIPS32R5EB-NEXT: addiu $sp, $sp, 32
+; MIPS32R5EB-NEXT: lw $3, 4($1)
; MIPS32R5EB-NEXT: jr $ra
; MIPS32R5EB-NEXT: nop
;
@@ -2915,29 +2525,10 @@ define <2 x i32> @ret_2_i32() {
;
; MIPS32R5EL-LABEL: ret_2_i32:
; MIPS32R5EL: # %bb.0:
-; MIPS32R5EL-NEXT: addiu $sp, $sp, -32
-; MIPS32R5EL-NEXT: .cfi_def_cfa_offset 32
-; MIPS32R5EL-NEXT: sw $ra, 28($sp) # 4-byte Folded Spill
-; MIPS32R5EL-NEXT: sw $fp, 24($sp) # 4-byte Folded Spill
-; MIPS32R5EL-NEXT: .cfi_offset 31, -4
-; MIPS32R5EL-NEXT: .cfi_offset 30, -8
-; MIPS32R5EL-NEXT: move $fp, $sp
-; MIPS32R5EL-NEXT: .cfi_def_cfa_register 30
-; MIPS32R5EL-NEXT: addiu $1, $zero, -16
-; MIPS32R5EL-NEXT: and $sp, $sp, $1
; MIPS32R5EL-NEXT: lui $1, %hi(gv2i32)
; MIPS32R5EL-NEXT: lw $2, %lo(gv2i32)($1)
-; MIPS32R5EL-NEXT: sw $2, 0($sp)
; MIPS32R5EL-NEXT: addiu $1, $1, %lo(gv2i32)
-; MIPS32R5EL-NEXT: lw $1, 4($1)
-; MIPS32R5EL-NEXT: sw $1, 8($sp)
-; MIPS32R5EL-NEXT: ld.w $w0, 0($sp)
-; MIPS32R5EL-NEXT: copy_s.w $2, $w0[0]
-; MIPS32R5EL-NEXT: copy_s.w $3, $w0[2]
-; MIPS32R5EL-NEXT: move $sp, $fp
-; MIPS32R5EL-NEXT: lw $fp, 24($sp) # 4-byte Folded Reload
-; MIPS32R5EL-NEXT: lw $ra, 28($sp) # 4-byte Folded Reload
-; MIPS32R5EL-NEXT: addiu $sp, $sp, 32
+; MIPS32R5EL-NEXT: lw $3, 4($1)
; MIPS32R5EL-NEXT: jr $ra
; MIPS32R5EL-NEXT: nop
%1 = load <2 x i32>, ptr @gv2i32
@@ -3424,9 +3015,9 @@ define void @call_i8_4() {
;
; MIPS32R5EB-LABEL: call_i8_4:
; MIPS32R5EB: # %bb.0: # %entry
-; MIPS32R5EB-NEXT: addiu $sp, $sp, -32
-; MIPS32R5EB-NEXT: .cfi_def_cfa_offset 32
-; MIPS32R5EB-NEXT: sw $ra, 28($sp) # 4-byte Folded Spill
+; MIPS32R5EB-NEXT: addiu $sp, $sp, -24
+; MIPS32R5EB-NEXT: .cfi_def_cfa_offset 24
+; MIPS32R5EB-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill
; MIPS32R5EB-NEXT: .cfi_offset 31, -4
; MIPS32R5EB-NEXT: lui $1, 1543
; MIPS32R5EB-NEXT: ori $4, $1, 2314
@@ -3436,17 +3027,17 @@ define void @call_i8_4() {
; MIPS32R5EB-NEXT: nop
; MIPS32R5EB-NEXT: lui $1, %hi(gv4i8)
; MIPS32R5EB-NEXT: sw $2, %lo(gv4i8)($1)
-; MIPS32R5EB-NEXT: lw $ra, 28($sp) # 4-byte Folded Reload
-; MIPS32R5EB-NEXT: addiu $sp, $sp, 32
+; MIPS32R5EB-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload
+; MIPS32R5EB-NEXT: addiu $sp, $sp, 24
; MIPS32R5EB-NEXT: jr $ra
; MIPS32R5EB-NEXT: nop
;
; MIPS64R5EB-LABEL: call_i8_4:
; MIPS64R5EB: # %bb.0: # %entry
-; MIPS64R5EB-NEXT: daddiu $sp, $sp, -32
-; MIPS64R5EB-NEXT: .cfi_def_cfa_offset 32
-; MIPS64R5EB-NEXT: sd $ra, 24($sp) # 8-byte Folded Spill
-; MIPS64R5EB-NEXT: sd $gp, 16($sp) # 8-byte Folded Spill
+; MIPS64R5EB-NEXT: daddiu $sp, $sp, -16
+; MIPS64R5EB-NEXT: .cfi_def_cfa_offset 16
+; MIPS64R5EB-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill
+; MIPS64R5EB-NEXT: sd $gp, 0($sp) # 8-byte Folded Spill
; MIPS64R5EB-NEXT: .cfi_offset 31, -8
; MIPS64R5EB-NEXT: .cfi_offset 28, -16
; MIPS64R5EB-NEXT: lui $1, %hi(%neg(%gp_rel(call_i8_4)))
@@ -3461,9 +3052,9 @@ define void @call_i8_4() {
; MIPS64R5EB-NEXT: nop
; MIPS64R5EB-NEXT: ld $1, %got_disp(gv4i8)($gp)
; MIPS64R5EB-NEXT: sw $2, 0($1)
-; MIPS64R5EB-NEXT: ld $gp, 16($sp) # 8-byte Folded Reload
-; MIPS64R5EB-NEXT: ld $ra, 24($sp) # 8-byte Folded Reload
-; MIPS64R5EB-NEXT: daddiu $sp, $sp, 32
+; MIPS64R5EB-NEXT: ld $gp, 0($sp) # 8-byte Folded Reload
+; MIPS64R5EB-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload
+; MIPS64R5EB-NEXT: daddiu $sp, $sp, 16
; MIPS64R5EB-NEXT: jr $ra
; MIPS64R5EB-NEXT: nop
;
@@ -3512,9 +3103,9 @@ define void @call_i8_4() {
;
; MIPS32R5EL-LABEL: call_i8_4:
; MIPS32R5EL: # %bb.0: # %entry
-; MIPS32R5EL-NEXT: addiu $sp, $sp, -32
-; MIPS32R5EL-NEXT: .cfi_def_cfa_offset 32
-; MIPS32R5EL-NEXT: sw $ra, 28($sp) # 4-byte Folded Spill
+; MIPS32R5EL-NEXT: addiu $sp, $sp, -24
+; MIPS32R5EL-NEXT: .cfi_def_cfa_offset 24
+; MIPS32R5EL-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill
; MIPS32R5EL-NEXT: .cfi_offset 31, -4
; MIPS32R5EL-NEXT: lui $1, 2569
; MIPS32R5EL-NEXT: ori $4, $1, 1798
@@ -3523,17 +3114,17 @@ define void @call_i8_4() {
; MIPS32R5EL-NEXT: nop
; MIPS32R5EL-NEXT: lui $1, %hi(gv4i8)
; MIPS32R5EL-NEXT: sw $2, %lo(gv4i8)($1)
-; MIPS32R5EL-NEXT: lw $ra, 28($sp) # 4-byte Folded Reload
-; MIPS32R5EL-NEXT: addiu $sp, $sp, 32
+; MIPS32R5EL-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload
+; MIPS32R5EL-NEXT: addiu $sp, $sp, 24
; MIPS32R5EL-NEXT: jr $ra
; MIPS32R5EL-NEXT: nop
;
; MIPS64R5EL-LABEL: call_i8_4:
; MIPS64R5EL: # %bb.0: # %entry
-; MIPS64R5EL-NEXT: daddiu $sp, $sp, -32
-; MIPS64R5EL-NEXT: .cfi_def_cfa_offset 32
-; MIPS64R5EL-NEXT: sd $ra, 24($sp) # 8-byte Folded Spill
-; MIPS64R5EL-NEXT: sd $gp, 16($sp) # 8-byte Folded Spill
+; MIPS64R5EL-NEXT: daddiu $sp, $sp, -16
+; MIPS64R5EL-NEXT: .cfi_def_cfa_offset 16
+; MIPS64R5EL-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill
+; MIPS64R5EL-NEXT: sd $gp, 0($sp) # 8-byte Folded Spill
; MIPS64R5EL-NEXT: .cfi_offset 31, -8
; MIPS64R5EL-NEXT: .cfi_offset 28, -16
; MIPS64R5EL-NEXT: lui $1, %hi(%neg(%gp_rel(call_i8_4)))
@@ -3547,9 +3138,9 @@ define void @call_i8_4() {
; MIPS64R5EL-NEXT: nop
; MIPS64R5EL-NEXT: ld $1, %got_disp(gv4i8)($gp)
; MIPS64R5EL-NEXT: sw $2, 0($1)
-; MIPS64R5EL-NEXT: ld $gp, 16($sp) # 8-byte Folded Reload
-; MIPS64R5EL-NEXT: ld $ra, 24($sp) # 8-byte Folded Reload
-; MIPS64R5EL-NEXT: daddiu $sp, $sp, 32
+; MIPS64R5EL-NEXT: ld $gp, 0($sp) # 8-byte Folded Reload
+; MIPS64R5EL-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload
+; MIPS64R5EL-NEXT: daddiu $sp, $sp, 16
; MIPS64R5EL-NEXT: jr $ra
; MIPS64R5EL-NEXT: nop
entry:
@@ -3641,10 +3232,10 @@ define void @call_i8_8() {
;
; MIPS64R5EB-LABEL: call_i8_8:
; MIPS64R5EB: # %bb.0: # %entry
-; MIPS64R5EB-NEXT: daddiu $sp, $sp, -32
-; MIPS64R5EB-NEXT: .cfi_def_cfa_offset 32
-; MIPS64R5EB-NEXT: sd $ra, 24($sp) # 8-byte Folded Spill
-; MIPS64R5EB-NEXT: sd $gp, 16($sp) # 8-byte Folded Spill
+; MIPS64R5EB-NEXT: daddiu $sp, $sp, -16
+; MIPS64R5EB-NEXT: .cfi_def_cfa_offset 16
+; MIPS64R5EB-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill
+; MIPS64R5EB-NEXT: sd $gp, 0($sp) # 8-byte Folded Spill
; MIPS64R5EB-NEXT: .cfi_offset 31, -8
; MIPS64R5EB-NEXT: .cfi_offset 28, -16
; MIPS64R5EB-NEXT: lui $1, %hi(%neg(%gp_rel(call_i8_8)))
@@ -3667,9 +3258,9 @@ define void @call_i8_8() {
; MIPS64R5EB-NEXT: nop
; MIPS64R5EB-NEXT: ld $1, %got_disp(gv8i8)($gp)
; MIPS64R5EB-NEXT: sd $2, 0($1)
-; MIPS64R5EB-NEXT: ld $gp, 16($sp) # 8-byte Folded Reload
-; MIPS64R5EB-NEXT: ld $ra, 24($sp) # 8-byte Folded Reload
-; MIPS64R5EB-NEXT: daddiu $sp, $sp, 32
+; MIPS64R5EB-NEXT: ld $gp, 0($sp) # 8-byte Folded Reload
+; MIPS64R5EB-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload
+; MIPS64R5EB-NEXT: daddiu $sp, $sp, 16
; MIPS64R5EB-NEXT: jr $ra
; MIPS64R5EB-NEXT: nop
;
@@ -3748,10 +3339,10 @@ define void @call_i8_8() {
;
; MIPS64R5EL-LABEL: call_i8_8:
; MIPS64R5EL: # %bb.0: # %entry
-; MIPS64R5EL-NEXT: daddiu $sp, $sp, -32
-; MIPS64R5EL-NEXT: .cfi_def_cfa_offset 32
-; MIPS64R5EL-NEXT: sd $ra, 24($sp) # 8-byte Folded Spill
-; MIPS64R5EL-NEXT: sd $gp, 16($sp) # 8-byte Folded Spill
+; MIPS64R5EL-NEXT: daddiu $sp, $sp, -16
+; MIPS64R5EL-NEXT: .cfi_def_cfa_offset 16
+; MIPS64R5EL-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill
+; MIPS64R5EL-NEXT: sd $gp, 0($sp) # 8-byte Folded Spill
; MIPS64R5EL-NEXT: .cfi_offset 31, -8
; MIPS64R5EL-NEXT: .cfi_offset 28, -16
; MIPS64R5EL-NEXT: lui $1, %hi(%neg(%gp_rel(call_i8_8)))
@@ -3769,9 +3360,9 @@ define void @call_i8_8() {
; MIPS64R5EL-NEXT: nop
; MIPS64R5EL-NEXT: ld $1, %got_disp(gv8i8)($gp)
; MIPS64R5EL-NEXT: sd $2, 0($1)
-; MIPS64R5EL-NEXT: ld $gp, 16($sp) # 8-byte Folded Reload
-; MIPS64R5EL-NEXT: ld $ra, 24($sp) # 8-byte Folded Reload
-; MIPS64R5EL-NEXT: daddiu $sp, $sp, 32
+; MIPS64R5EL-NEXT: ld $gp, 0($sp) # 8-byte Folded Reload
+; MIPS64R5EL-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload
+; MIPS64R5EL-NEXT: daddiu $sp, $sp, 16
; MIPS64R5EL-NEXT: jr $ra
; MIPS64R5EL-NEXT: nop
entry:
@@ -4059,9 +3650,9 @@ define void @calli16_2() {
;
; MIPS32R5EB-LABEL: calli16_2:
; MIPS32R5EB: # %bb.0: # %entry
-; MIPS32R5EB-NEXT: addiu $sp, $sp, -32
-; MIPS32R5EB-NEXT: .cfi_def_cfa_offset 32
-; MIPS32R5EB-NEXT: sw $ra, 28($sp) # 4-byte Folded Spill
+; MIPS32R5EB-NEXT: addiu $sp, $sp, -24
+; MIPS32R5EB-NEXT: .cfi_def_cfa_offset 24
+; MIPS32R5EB-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill
; MIPS32R5EB-NEXT: .cfi_offset 31, -4
; MIPS32R5EB-NEXT: lui $1, 6
; MIPS32R5EB-NEXT: ori $4, $1, 7
@@ -4071,17 +3662,17 @@ define void @calli16_2() {
; MIPS32R5EB-NEXT: nop
; MIPS32R5EB-NEXT: lui $1, %hi(gv2i16)
; MIPS32R5EB-NEXT: sw $2, %lo(gv2i16)($1)
-; MIPS32R5EB-NEXT: lw $ra, 28($sp) # 4-byte Folded Reload
-; MIPS32R5EB-NEXT: addiu $sp, $sp, 32
+; MIPS32R5EB-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload
+; MIPS32R5EB-NEXT: addiu $sp, $sp, 24
; MIPS32R5EB-NEXT: jr $ra
; MIPS32R5EB-NEXT: nop
;
; MIPS64R5EB-LABEL: calli16_2:
; MIPS64R5EB: # %bb.0: # %entry
-; MIPS64R5EB-NEXT: daddiu $sp, $sp, -32
-; MIPS64R5EB-NEXT: .cfi_def_cfa_offset 32
-; MIPS64R5EB-NEXT: sd $ra, 24($sp) # 8-byte Folded Spill
-; MIPS64R5EB-NEXT: sd $gp, 16($sp) # 8-byte Folded Spill
+; MIPS64R5EB-NEXT: daddiu $sp, $sp, -16
+; MIPS64R5EB-NEXT: .cfi_def_cfa_offset 16
+; MIPS64R5EB-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill
+; MIPS64R5EB-NEXT: sd $gp, 0($sp) # 8-byte Folded Spill
; MIPS64R5EB-NEXT: .cfi_offset 31, -8
; MIPS64R5EB-NEXT: .cfi_offset 28, -16
; MIPS64R5EB-NEXT: lui $1, %hi(%neg(%gp_rel(calli16_2)))
@@ -4096,9 +3687,9 @@ define void @calli16_2() {
; MIPS64R5EB-NEXT: nop
; MIPS64R5EB-NEXT: ld $1, %got_disp(gv2i16)($gp)
; MIPS64R5EB-NEXT: sw $2, 0($1)
-; MIPS64R5EB-NEXT: ld $gp, 16($sp) # 8-byte Folded Reload
-; MIPS64R5EB-NEXT: ld $ra, 24($sp) # 8-byte Folded Reload
-; MIPS64R5EB-NEXT: daddiu $sp, $sp, 32
+; MIPS64R5EB-NEXT: ld $gp, 0($sp) # 8-byte Folded Reload
+; MIPS64R5EB-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload
+; MIPS64R5EB-NEXT: daddiu $sp, $sp, 16
; MIPS64R5EB-NEXT: jr $ra
; MIPS64R5EB-NEXT: nop
;
@@ -4149,9 +3740,9 @@ define void @calli16_2() {
;
; MIPS32R5EL-LABEL: calli16_2:
; MIPS32R5EL: # %bb.0: # %entry
-; MIPS32R5EL-NEXT: addiu $sp, $sp, -32
-; MIPS32R5EL-NEXT: .cfi_def_cfa_offset 32
-; MIPS32R5EL-NEXT: sw $ra, 28($sp) # 4-byte Folded Spill
+; MIPS32R5EL-NEXT: addiu $sp, $sp, -24
+; MIPS32R5EL-NEXT: .cfi_def_cfa_offset 24
+; MIPS32R5EL-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill
; MIPS32R5EL-NEXT: .cfi_offset 31, -4
; MIPS32R5EL-NEXT: lui $1, 7
; MIPS32R5EL-NEXT: ori $4, $1, 6
@@ -4161,17 +3752,17 @@ define void @calli16_2() {
; MIPS32R5EL-NEXT: nop
; MIPS32R5EL-NEXT: lui $1, %hi(gv2i16)
; MIPS32R5EL-NEXT: sw $2, %lo(gv2i16)($1)
-; MIPS32R5EL-NEXT: lw $ra, 28($sp) # 4-byte Folded Reload
-; MIPS32R5EL-NEXT: addiu $sp, $sp, 32
+; MIPS32R5EL-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload
+; MIPS32R5EL-NEXT: addiu $sp, $sp, 24
; MIPS32R5EL-NEXT: jr $ra
; MIPS32R5EL-NEXT: nop
;
; MIPS64R5EL-LABEL: calli16_2:
; MIPS64R5EL: # %bb.0: # %entry
-; MIPS64R5EL-NEXT: daddiu $sp, $sp, -32
-; MIPS64R5EL-NEXT: .cfi_def_cfa_offset 32
-; MIPS64R5EL-NEXT: sd $ra, 24($sp) # 8-byte Folded Spill
-; MIPS64R5EL-NEXT: sd $gp, 16($sp) # 8-byte Folded Spill
+; MIPS64R5EL-NEXT: daddiu $sp, $sp, -16
+; MIPS64R5EL-NEXT: .cfi_def_cfa_offset 16
+; MIPS64R5EL-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill
+; MIPS64R5EL-NEXT: sd $gp, 0($sp) # 8-byte Folded Spill
; MIPS64R5EL-NEXT: .cfi_offset 31, -8
; MIPS64R5EL-NEXT: .cfi_offset 28, -16
; MIPS64R5EL-NEXT: lui $1, %hi(%neg(%gp_rel(calli16_2)))
@@ -4186,9 +3777,9 @@ define void @calli16_2() {
; MIPS64R5EL-NEXT: nop
; MIPS64R5EL-NEXT: ld $1, %got_disp(gv2i16)($gp)
; MIPS64R5EL-NEXT: sw $2, 0($1)
-; MIPS64R5EL-NEXT: ld $gp, 16($sp) # 8-byte Folded Reload
-; MIPS64R5EL-NEXT: ld $ra, 24($sp) # 8-byte Folded Reload
-; MIPS64R5EL-NEXT: daddiu $sp, $sp, 32
+; MIPS64R5EL-NEXT: ld $gp, 0($sp) # 8-byte Folded Reload
+; MIPS64R5EL-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload
+; MIPS64R5EL-NEXT: daddiu $sp, $sp, 16
; MIPS64R5EL-NEXT: jr $ra
; MIPS64R5EL-NEXT: nop
entry:
@@ -4282,10 +3873,10 @@ define void @calli16_4() {
;
; MIPS64R5EB-LABEL: calli16_4:
; MIPS64R5EB: # %bb.0: # %entry
-; MIPS64R5EB-NEXT: daddiu $sp, $sp, -32
-; MIPS64R5EB-NEXT: .cfi_def_cfa_offset 32
-; MIPS64R5EB-NEXT: sd $ra, 24($sp) # 8-byte Folded Spill
-; MIPS64R5EB-NEXT: sd $gp, 16($sp) # 8-byte Folded Spill
+; MIPS64R5EB-NEXT: daddiu $sp, $sp, -16
+; MIPS64R5EB-NEXT: .cfi_def_cfa_offset 16
+; MIPS64R5EB-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill
+; MIPS64R5EB-NEXT: sd $gp, 0($sp) # 8-byte Folded Spill
; MIPS64R5EB-NEXT: .cfi_offset 31, -8
; MIPS64R5EB-NEXT: .cfi_offset 28, -16
; MIPS64R5EB-NEXT: lui $1, %hi(%neg(%gp_rel(calli16_4)))
@@ -4308,9 +3899,9 @@ define void @calli16_4() {
; MIPS64R5EB-NEXT: nop
; MIPS64R5EB-NEXT: ld $1, %got_disp(gv4i16)($gp)
; MIPS64R5EB-NEXT: sd $2, 0($1)
-; MIPS64R5EB-NEXT: ld $gp, 16($sp) # 8-byte Folded Reload
-; MIPS64R5EB-NEXT: ld $ra, 24($sp) # 8-byte Folded Reload
-; MIPS64R5EB-NEXT: daddiu $sp, $sp, 32
+; MIPS64R5EB-NEXT: ld $gp, 0($sp) # 8-byte Folded Reload
+; MIPS64R5EB-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload
+; MIPS64R5EB-NEXT: daddiu $sp, $sp, 16
; MIPS64R5EB-NEXT: jr $ra
; MIPS64R5EB-NEXT: nop
;
@@ -4398,10 +3989,10 @@ define void @calli16_4() {
;
; MIPS64R5EL-LABEL: calli16_4:
; MIPS64R5EL: # %bb.0: # %entry
-; MIPS64R5EL-NEXT: daddiu $sp, $sp, -32
-; MIPS64R5EL-NEXT: .cfi_def_cfa_offset 32
-; MIPS64R5EL-NEXT: sd $ra, 24($sp) # 8-byte Folded Spill
-; MIPS64R5EL-NEXT: sd $gp, 16($sp) # 8-byte Folded Spill
+; MIPS64R5EL-NEXT: daddiu $sp, $sp, -16
+; MIPS64R5EL-NEXT: .cfi_def_cfa_offset 16
+; MIPS64R5EL-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill
+; MIPS64R5EL-NEXT: sd $gp, 0($sp) # 8-byte Folded Spill
; MIPS64R5EL-NEXT: .cfi_offset 31, -8
; MIPS64R5EL-NEXT: .cfi_offset 28, -16
; MIPS64R5EL-NEXT: lui $1, %hi(%neg(%gp_rel(calli16_4)))
@@ -4424,9 +4015,9 @@ define void @calli16_4() {
; MIPS64R5EL-NEXT: nop
; MIPS64R5EL-NEXT: ld $1, %got_disp(gv4i16)($gp)
; MIPS64R5EL-NEXT: sd $2, 0($1)
-; MIPS64R5EL-NEXT: ld $gp, 16($sp) # 8-byte Folded Reload
-; MIPS64R5EL-NEXT: ld $ra, 24($sp) # 8-byte Folded Reload
-; MIPS64R5EL-NEXT: daddiu $sp, $sp, 32
+; MIPS64R5EL-NEXT: ld $gp, 0($sp) # 8-byte Folded Reload
+; MIPS64R5EL-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload
+; MIPS64R5EL-NEXT: daddiu $sp, $sp, 16
; MIPS64R5EL-NEXT: jr $ra
; MIPS64R5EL-NEXT: nop
entry:
@@ -4807,10 +4398,10 @@ define void @calli32_2() {
;
; MIPS64R5EB-LABEL: calli32_2:
; MIPS64R5EB: # %bb.0: # %entry
-; MIPS64R5EB-NEXT: daddiu $sp, $sp, -32
-; MIPS64R5EB-NEXT: .cfi_def_cfa_offset 32
-; MIPS64R5EB-NEXT: sd $ra, 24($sp) # 8-byte Folded Spill
-; MIPS64R5EB-NEXT: sd $gp, 16($sp) # 8-byte Folded Spill
+; MIPS64R5EB-NEXT: daddiu $sp, $sp, -16
+; MIPS64R5EB-NEXT: .cfi_def_cfa_offset 16
+; MIPS64R5EB-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill
+; MIPS64R5EB-NEXT: sd $gp, 0($sp) # 8-byte Folded Spill
; MIPS64R5EB-NEXT: .cfi_offset 31, -8
; MIPS64R5EB-NEXT: .cfi_offset 28, -16
; MIPS64R5EB-NEXT: lui $1, %hi(%neg(%gp_rel(calli32_2)))
@@ -4826,9 +4417,9 @@ define void @calli32_2() {
; MIPS64R5EB-NEXT: nop
; MIPS64R5EB-NEXT: ld $1, %got_disp(gv2i32)($gp)
; MIPS64R5EB-NEXT: sd $2, 0($1)
-; MIPS64R5EB-NEXT: ld $gp, 16($sp) # 8-byte Folded Reload
-; MIPS64R5EB-NEXT: ld $ra, 24($sp) # 8-byte Folded Reload
-; MIPS64R5EB-NEXT: daddiu $sp, $sp, 32
+; MIPS64R5EB-NEXT: ld $gp, 0($sp) # 8-byte Folded Reload
+; MIPS64R5EB-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload
+; MIPS64R5EB-NEXT: daddiu $sp, $sp, 16
; MIPS64R5EB-NEXT: jr $ra
; MIPS64R5EB-NEXT: nop
;
@@ -4862,10 +4453,10 @@ define void @calli32_2() {
;
; MIPS64R5EL-LABEL: calli32_2:
; MIPS64R5EL: # %bb.0: # %entry
-; MIPS64R5EL-NEXT: daddiu $sp, $sp, -32
-; MIPS64R5EL-NEXT: .cfi_def_cfa_offset 32
-; MIPS64R5EL-NEXT: sd $ra, 24($sp) # 8-byte Folded Spill
-; MIPS64R5EL-NEXT: sd $gp, 16($sp) # 8-byte Folded Spill
+; MIPS64R5EL-NEXT: daddiu $sp, $sp, -16
+; MIPS64R5EL-NEXT: .cfi_def_cfa_offset 16
+; MIPS64R5EL-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill
+; MIPS64R5EL-NEXT: sd $gp, 0($sp) # 8-byte Folded Spill
; MIPS64R5EL-NEXT: .cfi_offset 31, -8
; MIPS64R5EL-NEXT: .cfi_offset 28, -16
; MIPS64R5EL-NEXT: lui $1, %hi(%neg(%gp_rel(calli32_2)))
@@ -4882,9 +4473,9 @@ define void @calli32_2() {
; MIPS64R5EL-NEXT: nop
; MIPS64R5EL-NEXT: ld $1, %got_disp(gv2i32)($gp)
; MIPS64R5EL-NEXT: sd $2, 0($1)
-; MIPS64R5EL-NEXT: ld $gp, 16($sp) # 8-byte Folded Reload
-; MIPS64R5EL-NEXT: ld $ra, 24($sp) # 8-byte Folded Reload
-; MIPS64R5EL-NEXT: daddiu $sp, $sp, 32
+; MIPS64R5EL-NEXT: ld $gp, 0($sp) # 8-byte Folded Reload
+; MIPS64R5EL-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload
+; MIPS64R5EL-NEXT: daddiu $sp, $sp, 16
; MIPS64R5EL-NEXT: jr $ra
; MIPS64R5EL-NEXT: nop
entry:
diff --git a/llvm/test/CodeGen/Mips/msa/basic_operations.ll b/llvm/test/CodeGen/Mips/msa/basic_operations.ll
index 820259d7c7bc25..4fc3f57aa002df 100644
--- a/llvm/test/CodeGen/Mips/msa/basic_operations.ll
+++ b/llvm/test/CodeGen/Mips/msa/basic_operations.ll
@@ -2066,46 +2066,38 @@ define void @insert_v2i64_vidx(i64 signext %a) nounwind {
ret void
}
-; TODO: What code should be emitted?
-define void @truncstore() nounwind {
-; O32-LABEL: truncstore:
+; After legalizing shorter vectors with legal element sizes, this test is
+; no longer called truncstore.
+define void @store_i8_32bit() nounwind {
+; O32-LABEL: store_i8_32bit:
; O32: # %bb.0:
; O32-NEXT: lui $2, %hi(_gp_disp)
; O32-NEXT: addiu $2, $2, %lo(_gp_disp)
; O32-NEXT: addu $1, $2, $25
; O32-NEXT: lw $1, %got(v4i8)($1)
-; O32-NEXT: addiu $2, $zero, 255
-; O32-NEXT: sb $2, 3($1)
-; O32-NEXT: sb $2, 2($1)
-; O32-NEXT: sb $2, 1($1)
+; O32-NEXT: addiu $2, $zero, -1
; O32-NEXT: jr $ra
-; O32-NEXT: sb $2, 0($1)
+; O32-NEXT: sw $2, 0($1)
;
-; N32-LABEL: truncstore:
+; N32-LABEL: store_i8_32bit:
; N32: # %bb.0:
-; N32-NEXT: lui $1, %hi(%neg(%gp_rel(truncstore)))
+; N32-NEXT: lui $1, %hi(%neg(%gp_rel(store_i8_32bit)))
; N32-NEXT: addu $1, $1, $25
-; N32-NEXT: addiu $1, $1, %lo(%neg(%gp_rel(truncstore)))
+; N32-NEXT: addiu $1, $1, %lo(%neg(%gp_rel(store_i8_32bit)))
; N32-NEXT: lw $1, %got_disp(v4i8)($1)
-; N32-NEXT: addiu $2, $zero, 255
-; N32-NEXT: sb $2, 3($1)
-; N32-NEXT: sb $2, 2($1)
-; N32-NEXT: sb $2, 1($1)
+; N32-NEXT: addiu $2, $zero, -1
; N32-NEXT: jr $ra
-; N32-NEXT: sb $2, 0($1)
+; N32-NEXT: sw $2, 0($1)
;
-; N64-LABEL: truncstore:
+; N64-LABEL: store_i8_32bit:
; N64: # %bb.0:
-; N64-NEXT: lui $1, %hi(%neg(%gp_rel(truncstore)))
+; N64-NEXT: lui $1, %hi(%neg(%gp_rel(store_i8_32bit)))
; N64-NEXT: daddu $1, $1, $25
-; N64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(truncstore)))
+; N64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(store_i8_32bit)))
; N64-NEXT: ld $1, %got_disp(v4i8)($1)
-; N64-NEXT: addiu $2, $zero, 255
-; N64-NEXT: sb $2, 3($1)
-; N64-NEXT: sb $2, 2($1)
-; N64-NEXT: sb $2, 1($1)
+; N64-NEXT: addiu $2, $zero, -1
; N64-NEXT: jr $ra
-; N64-NEXT: sb $2, 0($1)
+; N64-NEXT: sw $2, 0($1)
store volatile <4 x i8> <i8 -1, i8 -1, i8 -1, i8 -1>, ptr @v4i8
ret void
}
More information about the llvm-commits
mailing list