[llvm] r324180 - [MIPS] Regenerate vector tests with update script

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Sat Feb 3 14:11:22 PST 2018


Author: rksimon
Date: Sat Feb  3 14:11:22 2018
New Revision: 324180

URL: http://llvm.org/viewvc/llvm-project?rev=324180&view=rev
Log:
[MIPS] Regenerate vector tests with update script

Hopefully help make this a lot more maintainable

Modified:
    llvm/trunk/test/CodeGen/Mips/cconv/vector.ll

Modified: llvm/trunk/test/CodeGen/Mips/cconv/vector.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Mips/cconv/vector.ll?rev=324180&r1=324179&r2=324180&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Mips/cconv/vector.ll (original)
+++ llvm/trunk/test/CodeGen/Mips/cconv/vector.ll Sat Feb  3 14:11:22 2018
@@ -1,13 +1,12 @@
-; RUN: llc < %s -march=mips -mcpu=mips32 -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS32,MIPS32EB
-; RUN: llc < %s -march=mips64 -relocation-model=pic -mcpu=mips64 -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS64,MIPS64EB
-; RUN: llc < %s -march=mips -mcpu=mips32r5 -mattr=+fp64,+msa -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS32R5,MIPS32R5EB
-; RUN: llc < %s -march=mips64 -relocation-model=pic -mcpu=mips64r5 -mattr=+fp64,+msa -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS64R5
-; RUN: llc < %s -march=mipsel -mcpu=mips32 -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS32,MIPS32EL
-; RUN: llc < %s -march=mips64el -relocation-model=pic -mcpu=mips64 -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS64,MIPS64EL
-; RUN: llc < %s -march=mipsel -mcpu=mips32r5 -mattr=+fp64,+msa -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS32R5,MIPS32R5EL
-; RUN: llc < %s -march=mips64el -relocation-model=pic -mcpu=mips64r5 -mattr=+fp64,+msa -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS64R5
-
-
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=mips-unknown-linux-gnu -mcpu=mips32 -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS32,MIPS32EB
+; RUN: llc < %s -mtriple=mips64-unknown-linux-gnu -relocation-model=pic -mcpu=mips64 -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS64,MIPS64EB
+; RUN: llc < %s -mtriple=mips-unknown-linux-gnu -mcpu=mips32r5 -mattr=+fp64,+msa -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS32R5,MIPS32R5EB
+; RUN: llc < %s -mtriple=mips64-unknown-linux-gnu -relocation-model=pic -mcpu=mips64r5 -mattr=+fp64,+msa -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS64R5,MIPS64R5EB
+; RUN: llc < %s -mtriple=mipsel-unknown-linux-gnu -mcpu=mips32 -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS32,MIPS32EL
+; RUN: llc < %s -mtriple=mips64el-unknown-linux-gnu -relocation-model=pic -mcpu=mips64 -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS64,MIPS64EL
+; RUN: llc < %s -mtriple=mipsel-unknown-linux-gnu -mcpu=mips32r5 -mattr=+fp64,+msa -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS32R5,MIPS32R5EL
+; RUN: llc < %s -mtriple=mips64el-unknown-linux-gnu -relocation-model=pic -mcpu=mips64r5 -mattr=+fp64,+msa -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS64R5,MIPS64R5EL
 
 ; Test that vector types are passed through the integer register set whether or
 ; not MSA is enabled. This is a ABI requirement for MIPS. For GCC compatibility
@@ -17,28 +16,262 @@
 ; First set of tests are for argument passing.
 
 define <2 x i8> @i8_2(<2 x i8> %a, <2 x i8> %b) {
-; ALL-LABEL: i8_2:
-; MIPS32EB-DAG: srl ${{[0-9]+}}, $5, 24
-; MIPS32EB-DAG: srl ${{[0-9]+}}, $4, 24
-; MIPS32EB-DAG: srl ${{[0-9]+}}, $5, 16
-; MIPS32EB-DAG: srl ${{[0-9]+}}, $4, 16
-
-; MIPS32EL: addu $1, $4, $5
-
-; MIPS32R5-DAG: sw $4
-; MIPS32R5-DAG: sw $5
-
-; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $5, 56
-; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $4, 56
-; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $5, 48
-; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $4, 48
-
-; MIPS64EL-DAG: sll ${{[0-9]+}}, $4, 0
-; MIPS64EL-DAG: sll ${{[0-9]+}}, $5, 0
-
-; MIPS64R5-DAG: sd $4
-; MIPS64R5-DAG: sd $5
-
+; MIPS32EB-LABEL: i8_2:
+; MIPS32EB:       # %bb.0:
+; MIPS32EB-NEXT:    srl $1, $5, 24
+; MIPS32EB-NEXT:    srl $2, $4, 24
+; MIPS32EB-NEXT:    addu $1, $2, $1
+; MIPS32EB-NEXT:    sll $1, $1, 8
+; MIPS32EB-NEXT:    srl $2, $5, 16
+; MIPS32EB-NEXT:    srl $3, $4, 16
+; MIPS32EB-NEXT:    addu $2, $3, $2
+; MIPS32EB-NEXT:    andi $2, $2, 255
+; MIPS32EB-NEXT:    or $2, $2, $1
+; MIPS32EB-NEXT:    jr $ra
+; MIPS32EB-NEXT:    nop
+;
+; MIPS64EB-LABEL: i8_2:
+; MIPS64EB:       # %bb.0:
+; MIPS64EB-NEXT:    dsrl $1, $5, 56
+; MIPS64EB-NEXT:    sll $1, $1, 0
+; MIPS64EB-NEXT:    dsrl $2, $4, 56
+; MIPS64EB-NEXT:    sll $2, $2, 0
+; MIPS64EB-NEXT:    addu $1, $2, $1
+; MIPS64EB-NEXT:    dsrl $2, $5, 48
+; MIPS64EB-NEXT:    sll $1, $1, 8
+; MIPS64EB-NEXT:    sll $2, $2, 0
+; MIPS64EB-NEXT:    dsrl $3, $4, 48
+; MIPS64EB-NEXT:    sll $3, $3, 0
+; MIPS64EB-NEXT:    addu $2, $3, $2
+; MIPS64EB-NEXT:    andi $2, $2, 255
+; MIPS64EB-NEXT:    or $2, $2, $1
+; MIPS64EB-NEXT:    jr $ra
+; MIPS64EB-NEXT:    nop
+;
+; MIPS32R5EB-LABEL: i8_2:
+; MIPS32R5EB:       # %bb.0:
+; MIPS32R5EB-NEXT:    addiu $sp, $sp, -16
+; MIPS32R5EB-NEXT:    .cfi_def_cfa_offset 16
+; MIPS32R5EB-NEXT:    sw $5, 8($sp)
+; MIPS32R5EB-NEXT:    sw $4, 12($sp)
+; MIPS32R5EB-NEXT:    ldi.b $w0, 0
+; MIPS32R5EB-NEXT:    lbu $1, 9($sp)
+; MIPS32R5EB-NEXT:    lbu $2, 8($sp)
+; MIPS32R5EB-NEXT:    move.v $w1, $w0
+; MIPS32R5EB-NEXT:    insert.w $w1[0], $2
+; MIPS32R5EB-NEXT:    insert.w $w1[1], $1
+; MIPS32R5EB-NEXT:    lbu $1, 12($sp)
+; MIPS32R5EB-NEXT:    insert.w $w0[0], $1
+; MIPS32R5EB-NEXT:    lbu $1, 10($sp)
+; MIPS32R5EB-NEXT:    lbu $2, 13($sp)
+; MIPS32R5EB-NEXT:    insert.w $w0[1], $2
+; MIPS32R5EB-NEXT:    insert.w $w1[2], $1
+; MIPS32R5EB-NEXT:    lbu $1, 11($sp)
+; MIPS32R5EB-NEXT:    insert.w $w1[3], $1
+; MIPS32R5EB-NEXT:    ilvr.w $w1, $w1, $w1
+; MIPS32R5EB-NEXT:    lbu $1, 14($sp)
+; MIPS32R5EB-NEXT:    shf.w $w1, $w1, 177
+; MIPS32R5EB-NEXT:    insert.w $w0[2], $1
+; MIPS32R5EB-NEXT:    lbu $1, 15($sp)
+; MIPS32R5EB-NEXT:    insert.w $w0[3], $1
+; MIPS32R5EB-NEXT:    ilvr.w $w0, $w0, $w0
+; MIPS32R5EB-NEXT:    shf.w $w0, $w0, 177
+; MIPS32R5EB-NEXT:    addv.d $w0, $w0, $w1
+; MIPS32R5EB-NEXT:    shf.w $w0, $w0, 177
+; MIPS32R5EB-NEXT:    copy_s.w $1, $w0[1]
+; MIPS32R5EB-NEXT:    copy_s.w $2, $w0[3]
+; MIPS32R5EB-NEXT:    sb $2, 5($sp)
+; MIPS32R5EB-NEXT:    sb $1, 4($sp)
+; MIPS32R5EB-NEXT:    lhu $2, 4($sp)
+; MIPS32R5EB-NEXT:    addiu $sp, $sp, 16
+; MIPS32R5EB-NEXT:    jr $ra
+; MIPS32R5EB-NEXT:    nop
+;
+; MIPS64R5EB-LABEL: i8_2:
+; MIPS64R5EB:       # %bb.0:
+; MIPS64R5EB-NEXT:    daddiu $sp, $sp, -64
+; MIPS64R5EB-NEXT:    .cfi_def_cfa_offset 64
+; MIPS64R5EB-NEXT:    sd $4, 56($sp)
+; MIPS64R5EB-NEXT:    ldi.b $w0, 0
+; MIPS64R5EB-NEXT:    lbu $1, 57($sp)
+; MIPS64R5EB-NEXT:    lbu $2, 56($sp)
+; MIPS64R5EB-NEXT:    move.v $w1, $w0
+; MIPS64R5EB-NEXT:    insert.h $w1[0], $2
+; MIPS64R5EB-NEXT:    insert.h $w1[1], $1
+; MIPS64R5EB-NEXT:    lbu $1, 58($sp)
+; MIPS64R5EB-NEXT:    insert.h $w1[2], $1
+; MIPS64R5EB-NEXT:    lbu $1, 59($sp)
+; MIPS64R5EB-NEXT:    insert.h $w1[3], $1
+; MIPS64R5EB-NEXT:    lbu $1, 60($sp)
+; MIPS64R5EB-NEXT:    insert.h $w1[4], $1
+; MIPS64R5EB-NEXT:    lbu $1, 61($sp)
+; MIPS64R5EB-NEXT:    insert.h $w1[5], $1
+; MIPS64R5EB-NEXT:    lbu $1, 63($sp)
+; MIPS64R5EB-NEXT:    lbu $2, 62($sp)
+; MIPS64R5EB-NEXT:    insert.h $w1[6], $2
+; MIPS64R5EB-NEXT:    insert.h $w1[7], $1
+; MIPS64R5EB-NEXT:    copy_s.h $1, $w1[0]
+; MIPS64R5EB-NEXT:    copy_s.h $2, $w1[1]
+; MIPS64R5EB-NEXT:    sd $5, 48($sp)
+; MIPS64R5EB-NEXT:    lbu $3, 48($sp)
+; MIPS64R5EB-NEXT:    insert.h $w0[0], $3
+; MIPS64R5EB-NEXT:    lbu $3, 49($sp)
+; MIPS64R5EB-NEXT:    insert.h $w0[1], $3
+; MIPS64R5EB-NEXT:    lbu $3, 50($sp)
+; MIPS64R5EB-NEXT:    insert.h $w0[2], $3
+; MIPS64R5EB-NEXT:    lbu $3, 51($sp)
+; MIPS64R5EB-NEXT:    insert.h $w0[3], $3
+; MIPS64R5EB-NEXT:    lbu $3, 52($sp)
+; MIPS64R5EB-NEXT:    insert.h $w0[4], $3
+; MIPS64R5EB-NEXT:    lbu $3, 53($sp)
+; MIPS64R5EB-NEXT:    insert.h $w0[5], $3
+; MIPS64R5EB-NEXT:    lbu $3, 55($sp)
+; MIPS64R5EB-NEXT:    lbu $4, 54($sp)
+; MIPS64R5EB-NEXT:    insert.h $w0[6], $4
+; MIPS64R5EB-NEXT:    insert.h $w0[7], $3
+; MIPS64R5EB-NEXT:    copy_s.h $3, $w0[0]
+; MIPS64R5EB-NEXT:    copy_s.h $4, $w0[1]
+; MIPS64R5EB-NEXT:    sw $4, 28($sp)
+; MIPS64R5EB-NEXT:    sw $3, 20($sp)
+; MIPS64R5EB-NEXT:    sw $2, 12($sp)
+; MIPS64R5EB-NEXT:    sw $1, 4($sp)
+; MIPS64R5EB-NEXT:    ld.d $w0, 16($sp)
+; MIPS64R5EB-NEXT:    ld.d $w1, 0($sp)
+; MIPS64R5EB-NEXT:    addv.d $w0, $w1, $w0
+; MIPS64R5EB-NEXT:    copy_s.d $1, $w0[0]
+; MIPS64R5EB-NEXT:    copy_s.d $2, $w0[1]
+; MIPS64R5EB-NEXT:    sb $2, 45($sp)
+; MIPS64R5EB-NEXT:    sb $1, 44($sp)
+; MIPS64R5EB-NEXT:    lh $2, 44($sp)
+; MIPS64R5EB-NEXT:    daddiu $sp, $sp, 64
+; MIPS64R5EB-NEXT:    jr $ra
+; MIPS64R5EB-NEXT:    nop
+;
+; MIPS32EL-LABEL: i8_2:
+; MIPS32EL:       # %bb.0:
+; MIPS32EL-NEXT:    addu $1, $4, $5
+; MIPS32EL-NEXT:    andi $1, $1, 255
+; MIPS32EL-NEXT:    andi $2, $5, 65280
+; MIPS32EL-NEXT:    srl $2, $2, 8
+; MIPS32EL-NEXT:    andi $3, $4, 65280
+; MIPS32EL-NEXT:    srl $3, $3, 8
+; MIPS32EL-NEXT:    addu $2, $3, $2
+; MIPS32EL-NEXT:    sll $2, $2, 8
+; MIPS32EL-NEXT:    or $2, $1, $2
+; MIPS32EL-NEXT:    jr $ra
+; MIPS32EL-NEXT:    nop
+;
+; MIPS64EL-LABEL: i8_2:
+; MIPS64EL:       # %bb.0:
+; MIPS64EL-NEXT:    sll $1, $5, 0
+; MIPS64EL-NEXT:    sll $2, $4, 0
+; MIPS64EL-NEXT:    addu $3, $2, $1
+; MIPS64EL-NEXT:    andi $3, $3, 255
+; MIPS64EL-NEXT:    andi $1, $1, 65280
+; MIPS64EL-NEXT:    srl $1, $1, 8
+; MIPS64EL-NEXT:    andi $2, $2, 65280
+; MIPS64EL-NEXT:    srl $2, $2, 8
+; MIPS64EL-NEXT:    addu $1, $2, $1
+; MIPS64EL-NEXT:    sll $1, $1, 8
+; MIPS64EL-NEXT:    or $2, $3, $1
+; MIPS64EL-NEXT:    jr $ra
+; MIPS64EL-NEXT:    nop
+;
+; MIPS32R5EL-LABEL: i8_2:
+; MIPS32R5EL:       # %bb.0:
+; MIPS32R5EL-NEXT:    addiu $sp, $sp, -16
+; MIPS32R5EL-NEXT:    .cfi_def_cfa_offset 16
+; MIPS32R5EL-NEXT:    sw $5, 8($sp)
+; MIPS32R5EL-NEXT:    sw $4, 12($sp)
+; MIPS32R5EL-NEXT:    ldi.b $w0, 0
+; MIPS32R5EL-NEXT:    lbu $1, 9($sp)
+; MIPS32R5EL-NEXT:    lbu $2, 12($sp)
+; MIPS32R5EL-NEXT:    lbu $3, 8($sp)
+; MIPS32R5EL-NEXT:    move.v $w1, $w0
+; MIPS32R5EL-NEXT:    insert.w $w1[0], $3
+; MIPS32R5EL-NEXT:    insert.w $w0[0], $2
+; MIPS32R5EL-NEXT:    insert.w $w1[1], $1
+; MIPS32R5EL-NEXT:    lbu $1, 10($sp)
+; MIPS32R5EL-NEXT:    insert.w $w1[2], $1
+; MIPS32R5EL-NEXT:    lbu $1, 11($sp)
+; MIPS32R5EL-NEXT:    insert.w $w1[3], $1
+; MIPS32R5EL-NEXT:    ilvr.w $w1, $w1, $w1
+; MIPS32R5EL-NEXT:    lbu $1, 13($sp)
+; MIPS32R5EL-NEXT:    insert.w $w0[1], $1
+; MIPS32R5EL-NEXT:    lbu $1, 14($sp)
+; MIPS32R5EL-NEXT:    insert.w $w0[2], $1
+; MIPS32R5EL-NEXT:    lbu $1, 15($sp)
+; MIPS32R5EL-NEXT:    insert.w $w0[3], $1
+; MIPS32R5EL-NEXT:    ilvr.w $w0, $w0, $w0
+; MIPS32R5EL-NEXT:    addv.d $w0, $w0, $w1
+; MIPS32R5EL-NEXT:    copy_s.w $1, $w0[0]
+; MIPS32R5EL-NEXT:    copy_s.w $2, $w0[2]
+; MIPS32R5EL-NEXT:    sb $2, 5($sp)
+; MIPS32R5EL-NEXT:    sb $1, 4($sp)
+; MIPS32R5EL-NEXT:    lhu $2, 4($sp)
+; MIPS32R5EL-NEXT:    addiu $sp, $sp, 16
+; MIPS32R5EL-NEXT:    jr $ra
+; MIPS32R5EL-NEXT:    nop
+;
+; MIPS64R5EL-LABEL: i8_2:
+; MIPS64R5EL:       # %bb.0:
+; MIPS64R5EL-NEXT:    daddiu $sp, $sp, -64
+; MIPS64R5EL-NEXT:    .cfi_def_cfa_offset 64
+; MIPS64R5EL-NEXT:    sd $4, 56($sp)
+; MIPS64R5EL-NEXT:    ldi.b $w0, 0
+; MIPS64R5EL-NEXT:    lbu $1, 57($sp)
+; MIPS64R5EL-NEXT:    lbu $2, 56($sp)
+; MIPS64R5EL-NEXT:    move.v $w1, $w0
+; MIPS64R5EL-NEXT:    insert.h $w1[0], $2
+; MIPS64R5EL-NEXT:    insert.h $w1[1], $1
+; MIPS64R5EL-NEXT:    lbu $1, 58($sp)
+; MIPS64R5EL-NEXT:    insert.h $w1[2], $1
+; MIPS64R5EL-NEXT:    lbu $1, 59($sp)
+; MIPS64R5EL-NEXT:    insert.h $w1[3], $1
+; MIPS64R5EL-NEXT:    lbu $1, 60($sp)
+; MIPS64R5EL-NEXT:    insert.h $w1[4], $1
+; MIPS64R5EL-NEXT:    lbu $1, 61($sp)
+; MIPS64R5EL-NEXT:    insert.h $w1[5], $1
+; MIPS64R5EL-NEXT:    lbu $1, 63($sp)
+; MIPS64R5EL-NEXT:    lbu $2, 62($sp)
+; MIPS64R5EL-NEXT:    insert.h $w1[6], $2
+; MIPS64R5EL-NEXT:    insert.h $w1[7], $1
+; MIPS64R5EL-NEXT:    copy_s.h $1, $w1[0]
+; MIPS64R5EL-NEXT:    copy_s.h $2, $w1[1]
+; MIPS64R5EL-NEXT:    sd $5, 48($sp)
+; MIPS64R5EL-NEXT:    lbu $3, 48($sp)
+; MIPS64R5EL-NEXT:    insert.h $w0[0], $3
+; MIPS64R5EL-NEXT:    lbu $3, 49($sp)
+; MIPS64R5EL-NEXT:    insert.h $w0[1], $3
+; MIPS64R5EL-NEXT:    lbu $3, 50($sp)
+; MIPS64R5EL-NEXT:    insert.h $w0[2], $3
+; MIPS64R5EL-NEXT:    lbu $3, 51($sp)
+; MIPS64R5EL-NEXT:    insert.h $w0[3], $3
+; MIPS64R5EL-NEXT:    lbu $3, 52($sp)
+; MIPS64R5EL-NEXT:    insert.h $w0[4], $3
+; MIPS64R5EL-NEXT:    lbu $3, 53($sp)
+; MIPS64R5EL-NEXT:    insert.h $w0[5], $3
+; MIPS64R5EL-NEXT:    lbu $3, 55($sp)
+; MIPS64R5EL-NEXT:    lbu $4, 54($sp)
+; MIPS64R5EL-NEXT:    insert.h $w0[6], $4
+; MIPS64R5EL-NEXT:    insert.h $w0[7], $3
+; MIPS64R5EL-NEXT:    copy_s.h $3, $w0[0]
+; MIPS64R5EL-NEXT:    copy_s.h $4, $w0[1]
+; MIPS64R5EL-NEXT:    sw $4, 24($sp)
+; MIPS64R5EL-NEXT:    sw $3, 16($sp)
+; MIPS64R5EL-NEXT:    sw $2, 8($sp)
+; MIPS64R5EL-NEXT:    sw $1, 0($sp)
+; MIPS64R5EL-NEXT:    ld.d $w0, 16($sp)
+; MIPS64R5EL-NEXT:    ld.d $w1, 0($sp)
+; MIPS64R5EL-NEXT:    addv.d $w0, $w1, $w0
+; MIPS64R5EL-NEXT:    copy_s.d $1, $w0[0]
+; MIPS64R5EL-NEXT:    copy_s.d $2, $w0[1]
+; MIPS64R5EL-NEXT:    sb $2, 45($sp)
+; MIPS64R5EL-NEXT:    sb $1, 44($sp)
+; MIPS64R5EL-NEXT:    lh $2, 44($sp)
+; MIPS64R5EL-NEXT:    daddiu $sp, $sp, 64
+; MIPS64R5EL-NEXT:    jr $ra
+; MIPS64R5EL-NEXT:    nop
   %1 = add <2 x i8> %a, %b
   ret <2 x i8> %1
 }
@@ -46,61 +279,718 @@ define <2 x i8> @i8_2(<2 x i8> %a, <2 x
 ; Test that vector spilled to the outgoing argument area have the expected
 ; offset from $sp.
 
-define <2 x i8> @i8x2_7(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d,
-                        <2 x i8> %e, <2 x i8> %f, <2 x i8> %g) {
+define <2 x i8> @i8x2_7(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d, <2 x i8> %e, <2 x i8> %f, <2 x i8> %g) {
+; MIPS32EB-LABEL: i8x2_7:
+; MIPS32EB:       # %bb.0: # %entry
+; MIPS32EB-NEXT:    srl $1, $5, 24
+; MIPS32EB-NEXT:    srl $2, $4, 24
+; MIPS32EB-NEXT:    addu $1, $2, $1
+; MIPS32EB-NEXT:    srl $2, $6, 24
+; MIPS32EB-NEXT:    addu $1, $1, $2
+; MIPS32EB-NEXT:    srl $2, $7, 24
+; MIPS32EB-NEXT:    addu $1, $1, $2
+; MIPS32EB-NEXT:    srl $2, $5, 16
+; MIPS32EB-NEXT:    srl $3, $4, 16
+; MIPS32EB-NEXT:    addu $2, $3, $2
+; MIPS32EB-NEXT:    srl $3, $6, 16
+; MIPS32EB-NEXT:    lbu $4, 16($sp)
+; MIPS32EB-NEXT:    addu $2, $2, $3
+; MIPS32EB-NEXT:    addu $1, $1, $4
+; MIPS32EB-NEXT:    lbu $3, 20($sp)
+; MIPS32EB-NEXT:    addu $1, $1, $3
+; MIPS32EB-NEXT:    lbu $3, 24($sp)
+; MIPS32EB-NEXT:    addu $1, $1, $3
+; MIPS32EB-NEXT:    srl $3, $7, 16
+; MIPS32EB-NEXT:    sll $1, $1, 8
+; MIPS32EB-NEXT:    addu $2, $2, $3
+; MIPS32EB-NEXT:    lbu $3, 17($sp)
+; MIPS32EB-NEXT:    addu $2, $2, $3
+; MIPS32EB-NEXT:    lbu $3, 21($sp)
+; MIPS32EB-NEXT:    addu $2, $2, $3
+; MIPS32EB-NEXT:    lbu $3, 25($sp)
+; MIPS32EB-NEXT:    addu $2, $2, $3
+; MIPS32EB-NEXT:    andi $2, $2, 255
+; MIPS32EB-NEXT:    or $2, $2, $1
+; MIPS32EB-NEXT:    jr $ra
+; MIPS32EB-NEXT:    nop
+;
+; MIPS64EB-LABEL: i8x2_7:
+; MIPS64EB:       # %bb.0: # %entry
+; MIPS64EB-NEXT:    dsrl $1, $5, 56
+; MIPS64EB-NEXT:    dsrl $2, $6, 56
+; MIPS64EB-NEXT:    sll $1, $1, 0
+; MIPS64EB-NEXT:    dsrl $3, $4, 56
+; MIPS64EB-NEXT:    sll $3, $3, 0
+; MIPS64EB-NEXT:    addu $1, $3, $1
+; MIPS64EB-NEXT:    dsrl $3, $6, 48
+; MIPS64EB-NEXT:    sll $2, $2, 0
+; MIPS64EB-NEXT:    dsrl $5, $5, 48
+; MIPS64EB-NEXT:    sll $5, $5, 0
+; MIPS64EB-NEXT:    dsrl $4, $4, 48
+; MIPS64EB-NEXT:    sll $4, $4, 0
+; MIPS64EB-NEXT:    addu $4, $4, $5
+; MIPS64EB-NEXT:    addu $1, $1, $2
+; MIPS64EB-NEXT:    dsrl $2, $8, 48
+; MIPS64EB-NEXT:    dsrl $5, $8, 56
+; MIPS64EB-NEXT:    sll $3, $3, 0
+; MIPS64EB-NEXT:    dsrl $6, $7, 56
+; MIPS64EB-NEXT:    sll $6, $6, 0
+; MIPS64EB-NEXT:    addu $1, $1, $6
+; MIPS64EB-NEXT:    addu $3, $4, $3
+; MIPS64EB-NEXT:    sll $4, $5, 0
+; MIPS64EB-NEXT:    dsrl $5, $7, 48
+; MIPS64EB-NEXT:    sll $5, $5, 0
+; MIPS64EB-NEXT:    addu $3, $3, $5
+; MIPS64EB-NEXT:    dsrl $5, $10, 48
+; MIPS64EB-NEXT:    sll $2, $2, 0
+; MIPS64EB-NEXT:    dsrl $6, $10, 56
+; MIPS64EB-NEXT:    addu $1, $1, $4
+; MIPS64EB-NEXT:    dsrl $4, $9, 56
+; MIPS64EB-NEXT:    sll $4, $4, 0
+; MIPS64EB-NEXT:    addu $1, $1, $4
+; MIPS64EB-NEXT:    sll $4, $6, 0
+; MIPS64EB-NEXT:    addu $1, $1, $4
+; MIPS64EB-NEXT:    sll $1, $1, 8
+; MIPS64EB-NEXT:    addu $2, $3, $2
+; MIPS64EB-NEXT:    dsrl $3, $9, 48
+; MIPS64EB-NEXT:    sll $3, $3, 0
+; MIPS64EB-NEXT:    addu $2, $2, $3
+; MIPS64EB-NEXT:    sll $3, $5, 0
+; MIPS64EB-NEXT:    addu $2, $2, $3
+; MIPS64EB-NEXT:    andi $2, $2, 255
+; MIPS64EB-NEXT:    or $2, $2, $1
+; MIPS64EB-NEXT:    jr $ra
+; MIPS64EB-NEXT:    nop
+;
+; MIPS32R5EB-LABEL: i8x2_7:
+; MIPS32R5EB:       # %bb.0: # %entry
+; MIPS32R5EB-NEXT:    addiu $sp, $sp, -24
+; MIPS32R5EB-NEXT:    .cfi_def_cfa_offset 24
+; MIPS32R5EB-NEXT:    sw $5, 16($sp)
+; MIPS32R5EB-NEXT:    sw $4, 20($sp)
+; MIPS32R5EB-NEXT:    ldi.b $w0, 0
+; MIPS32R5EB-NEXT:    lbu $1, 17($sp)
+; MIPS32R5EB-NEXT:    lbu $2, 16($sp)
+; MIPS32R5EB-NEXT:    move.v $w1, $w0
+; MIPS32R5EB-NEXT:    insert.w $w1[0], $2
+; MIPS32R5EB-NEXT:    insert.w $w1[1], $1
+; MIPS32R5EB-NEXT:    lbu $1, 18($sp)
+; MIPS32R5EB-NEXT:    lbu $2, 21($sp)
+; MIPS32R5EB-NEXT:    lbu $3, 20($sp)
+; MIPS32R5EB-NEXT:    move.v $w2, $w0
+; MIPS32R5EB-NEXT:    insert.w $w2[0], $3
+; MIPS32R5EB-NEXT:    insert.w $w2[1], $2
+; MIPS32R5EB-NEXT:    insert.w $w1[2], $1
+; MIPS32R5EB-NEXT:    lbu $1, 19($sp)
+; MIPS32R5EB-NEXT:    insert.w $w1[3], $1
+; MIPS32R5EB-NEXT:    ilvr.w $w1, $w1, $w1
+; MIPS32R5EB-NEXT:    lbu $1, 22($sp)
+; MIPS32R5EB-NEXT:    shf.w $w1, $w1, 177
+; MIPS32R5EB-NEXT:    insert.w $w2[2], $1
+; MIPS32R5EB-NEXT:    lbu $1, 23($sp)
+; MIPS32R5EB-NEXT:    insert.w $w2[3], $1
+; MIPS32R5EB-NEXT:    ilvr.w $w2, $w2, $w2
+; MIPS32R5EB-NEXT:    shf.w $w2, $w2, 177
+; MIPS32R5EB-NEXT:    addv.d $w1, $w2, $w1
+; MIPS32R5EB-NEXT:    sw $6, 12($sp)
+; MIPS32R5EB-NEXT:    lbu $1, 13($sp)
+; MIPS32R5EB-NEXT:    lbu $2, 12($sp)
+; MIPS32R5EB-NEXT:    move.v $w2, $w0
+; MIPS32R5EB-NEXT:    insert.w $w2[0], $2
+; MIPS32R5EB-NEXT:    insert.w $w2[1], $1
+; MIPS32R5EB-NEXT:    lbu $1, 14($sp)
+; MIPS32R5EB-NEXT:    insert.w $w2[2], $1
+; MIPS32R5EB-NEXT:    lbu $1, 15($sp)
+; MIPS32R5EB-NEXT:    insert.w $w2[3], $1
+; MIPS32R5EB-NEXT:    ilvr.w $w2, $w2, $w2
+; MIPS32R5EB-NEXT:    shf.w $w2, $w2, 177
+; MIPS32R5EB-NEXT:    addv.d $w1, $w1, $w2
+; MIPS32R5EB-NEXT:    sw $7, 8($sp)
+; MIPS32R5EB-NEXT:    lbu $1, 9($sp)
+; MIPS32R5EB-NEXT:    lbu $2, 8($sp)
+; MIPS32R5EB-NEXT:    move.v $w2, $w0
+; MIPS32R5EB-NEXT:    insert.w $w2[0], $2
+; MIPS32R5EB-NEXT:    insert.w $w2[1], $1
+; MIPS32R5EB-NEXT:    lbu $1, 10($sp)
+; MIPS32R5EB-NEXT:    insert.w $w2[2], $1
+; MIPS32R5EB-NEXT:    lbu $1, 11($sp)
+; MIPS32R5EB-NEXT:    insert.w $w2[3], $1
+; MIPS32R5EB-NEXT:    ilvr.w $w2, $w2, $w2
+; MIPS32R5EB-NEXT:    shf.w $w2, $w2, 177
+; MIPS32R5EB-NEXT:    addv.d $w1, $w1, $w2
+; MIPS32R5EB-NEXT:    lbu $1, 41($sp)
+; MIPS32R5EB-NEXT:    lbu $2, 40($sp)
+; MIPS32R5EB-NEXT:    move.v $w2, $w0
+; MIPS32R5EB-NEXT:    insert.w $w2[0], $2
+; MIPS32R5EB-NEXT:    insert.w $w2[1], $1
+; MIPS32R5EB-NEXT:    lbu $1, 42($sp)
+; MIPS32R5EB-NEXT:    insert.w $w2[2], $1
+; MIPS32R5EB-NEXT:    lbu $1, 43($sp)
+; MIPS32R5EB-NEXT:    insert.w $w2[3], $1
+; MIPS32R5EB-NEXT:    ilvr.w $w2, $w2, $w2
+; MIPS32R5EB-NEXT:    shf.w $w2, $w2, 177
+; MIPS32R5EB-NEXT:    addv.d $w1, $w1, $w2
+; MIPS32R5EB-NEXT:    lbu $1, 45($sp)
+; MIPS32R5EB-NEXT:    lbu $2, 44($sp)
+; MIPS32R5EB-NEXT:    move.v $w2, $w0
+; MIPS32R5EB-NEXT:    insert.w $w2[0], $2
+; MIPS32R5EB-NEXT:    insert.w $w2[1], $1
+; MIPS32R5EB-NEXT:    lbu $1, 46($sp)
+; MIPS32R5EB-NEXT:    insert.w $w2[2], $1
+; MIPS32R5EB-NEXT:    lbu $1, 47($sp)
+; MIPS32R5EB-NEXT:    insert.w $w2[3], $1
+; MIPS32R5EB-NEXT:    ilvr.w $w2, $w2, $w2
+; MIPS32R5EB-NEXT:    shf.w $w2, $w2, 177
+; MIPS32R5EB-NEXT:    addv.d $w1, $w1, $w2
+; MIPS32R5EB-NEXT:    lbu $1, 48($sp)
+; MIPS32R5EB-NEXT:    insert.w $w0[0], $1
+; MIPS32R5EB-NEXT:    lbu $1, 49($sp)
+; MIPS32R5EB-NEXT:    insert.w $w0[1], $1
+; MIPS32R5EB-NEXT:    lbu $1, 50($sp)
+; MIPS32R5EB-NEXT:    insert.w $w0[2], $1
+; MIPS32R5EB-NEXT:    lbu $1, 51($sp)
+; MIPS32R5EB-NEXT:    insert.w $w0[3], $1
+; MIPS32R5EB-NEXT:    ilvr.w $w0, $w0, $w0
+; MIPS32R5EB-NEXT:    shf.w $w0, $w0, 177
+; MIPS32R5EB-NEXT:    addv.d $w0, $w1, $w0
+; MIPS32R5EB-NEXT:    shf.w $w0, $w0, 177
+; MIPS32R5EB-NEXT:    copy_s.w $1, $w0[1]
+; MIPS32R5EB-NEXT:    copy_s.w $2, $w0[3]
+; MIPS32R5EB-NEXT:    sb $2, 5($sp)
+; MIPS32R5EB-NEXT:    sb $1, 4($sp)
+; MIPS32R5EB-NEXT:    lhu $2, 4($sp)
+; MIPS32R5EB-NEXT:    addiu $sp, $sp, 24
+; MIPS32R5EB-NEXT:    jr $ra
+; MIPS32R5EB-NEXT:    nop
+;
+; MIPS64R5EB-LABEL: i8x2_7:
+; MIPS64R5EB:       # %bb.0: # %entry
+; MIPS64R5EB-NEXT:    daddiu $sp, $sp, -176
+; MIPS64R5EB-NEXT:    .cfi_def_cfa_offset 176
+; MIPS64R5EB-NEXT:    sd $4, 168($sp)
+; MIPS64R5EB-NEXT:    ldi.b $w0, 0
+; MIPS64R5EB-NEXT:    lbu $1, 169($sp)
+; MIPS64R5EB-NEXT:    lbu $2, 168($sp)
+; MIPS64R5EB-NEXT:    move.v $w1, $w0
+; MIPS64R5EB-NEXT:    insert.h $w1[0], $2
+; MIPS64R5EB-NEXT:    insert.h $w1[1], $1
+; MIPS64R5EB-NEXT:    lbu $1, 170($sp)
+; MIPS64R5EB-NEXT:    insert.h $w1[2], $1
+; MIPS64R5EB-NEXT:    lbu $1, 171($sp)
+; MIPS64R5EB-NEXT:    insert.h $w1[3], $1
+; MIPS64R5EB-NEXT:    lbu $1, 172($sp)
+; MIPS64R5EB-NEXT:    insert.h $w1[4], $1
+; MIPS64R5EB-NEXT:    lbu $1, 173($sp)
+; MIPS64R5EB-NEXT:    insert.h $w1[5], $1
+; MIPS64R5EB-NEXT:    lbu $1, 175($sp)
+; MIPS64R5EB-NEXT:    lbu $2, 174($sp)
+; MIPS64R5EB-NEXT:    insert.h $w1[6], $2
+; MIPS64R5EB-NEXT:    insert.h $w1[7], $1
+; MIPS64R5EB-NEXT:    copy_s.h $1, $w1[0]
+; MIPS64R5EB-NEXT:    copy_s.h $2, $w1[1]
+; MIPS64R5EB-NEXT:    sd $5, 160($sp)
+; MIPS64R5EB-NEXT:    lbu $3, 161($sp)
+; MIPS64R5EB-NEXT:    lbu $4, 160($sp)
+; MIPS64R5EB-NEXT:    move.v $w1, $w0
+; MIPS64R5EB-NEXT:    insert.h $w1[0], $4
+; MIPS64R5EB-NEXT:    insert.h $w1[1], $3
+; MIPS64R5EB-NEXT:    lbu $3, 162($sp)
+; MIPS64R5EB-NEXT:    insert.h $w1[2], $3
+; MIPS64R5EB-NEXT:    lbu $3, 163($sp)
+; MIPS64R5EB-NEXT:    insert.h $w1[3], $3
+; MIPS64R5EB-NEXT:    lbu $3, 164($sp)
+; MIPS64R5EB-NEXT:    insert.h $w1[4], $3
+; MIPS64R5EB-NEXT:    lbu $3, 165($sp)
+; MIPS64R5EB-NEXT:    insert.h $w1[5], $3
+; MIPS64R5EB-NEXT:    lbu $3, 167($sp)
+; MIPS64R5EB-NEXT:    lbu $4, 166($sp)
+; MIPS64R5EB-NEXT:    insert.h $w1[6], $4
+; MIPS64R5EB-NEXT:    insert.h $w1[7], $3
+; MIPS64R5EB-NEXT:    copy_s.h $3, $w1[0]
+; MIPS64R5EB-NEXT:    copy_s.h $4, $w1[1]
+; MIPS64R5EB-NEXT:    sw $4, 28($sp)
+; MIPS64R5EB-NEXT:    sw $3, 20($sp)
+; MIPS64R5EB-NEXT:    sw $2, 12($sp)
+; MIPS64R5EB-NEXT:    sw $1, 4($sp)
+; MIPS64R5EB-NEXT:    ld.d $w1, 16($sp)
+; MIPS64R5EB-NEXT:    ld.d $w2, 0($sp)
+; MIPS64R5EB-NEXT:    addv.d $w1, $w2, $w1
+; MIPS64R5EB-NEXT:    sd $6, 152($sp)
+; MIPS64R5EB-NEXT:    lbu $1, 153($sp)
+; MIPS64R5EB-NEXT:    lbu $2, 152($sp)
+; MIPS64R5EB-NEXT:    move.v $w2, $w0
+; MIPS64R5EB-NEXT:    insert.h $w2[0], $2
+; MIPS64R5EB-NEXT:    insert.h $w2[1], $1
+; MIPS64R5EB-NEXT:    lbu $1, 154($sp)
+; MIPS64R5EB-NEXT:    insert.h $w2[2], $1
+; MIPS64R5EB-NEXT:    lbu $1, 155($sp)
+; MIPS64R5EB-NEXT:    insert.h $w2[3], $1
+; MIPS64R5EB-NEXT:    lbu $1, 156($sp)
+; MIPS64R5EB-NEXT:    insert.h $w2[4], $1
+; MIPS64R5EB-NEXT:    lbu $1, 157($sp)
+; MIPS64R5EB-NEXT:    insert.h $w2[5], $1
+; MIPS64R5EB-NEXT:    lbu $1, 159($sp)
+; MIPS64R5EB-NEXT:    lbu $2, 158($sp)
+; MIPS64R5EB-NEXT:    insert.h $w2[6], $2
+; MIPS64R5EB-NEXT:    insert.h $w2[7], $1
+; MIPS64R5EB-NEXT:    copy_s.h $1, $w2[0]
+; MIPS64R5EB-NEXT:    copy_s.h $2, $w2[1]
+; MIPS64R5EB-NEXT:    sw $2, 44($sp)
+; MIPS64R5EB-NEXT:    sw $1, 36($sp)
+; MIPS64R5EB-NEXT:    ld.d $w2, 32($sp)
+; MIPS64R5EB-NEXT:    addv.d $w1, $w1, $w2
+; MIPS64R5EB-NEXT:    sd $7, 144($sp)
+; MIPS64R5EB-NEXT:    lbu $1, 145($sp)
+; MIPS64R5EB-NEXT:    lbu $2, 144($sp)
+; MIPS64R5EB-NEXT:    move.v $w2, $w0
+; MIPS64R5EB-NEXT:    insert.h $w2[0], $2
+; MIPS64R5EB-NEXT:    insert.h $w2[1], $1
+; MIPS64R5EB-NEXT:    lbu $1, 146($sp)
+; MIPS64R5EB-NEXT:    insert.h $w2[2], $1
+; MIPS64R5EB-NEXT:    lbu $1, 147($sp)
+; MIPS64R5EB-NEXT:    insert.h $w2[3], $1
+; MIPS64R5EB-NEXT:    lbu $1, 148($sp)
+; MIPS64R5EB-NEXT:    insert.h $w2[4], $1
+; MIPS64R5EB-NEXT:    lbu $1, 149($sp)
+; MIPS64R5EB-NEXT:    insert.h $w2[5], $1
+; MIPS64R5EB-NEXT:    lbu $1, 151($sp)
+; MIPS64R5EB-NEXT:    lbu $2, 150($sp)
+; MIPS64R5EB-NEXT:    insert.h $w2[6], $2
+; MIPS64R5EB-NEXT:    insert.h $w2[7], $1
+; MIPS64R5EB-NEXT:    copy_s.h $1, $w2[0]
+; MIPS64R5EB-NEXT:    copy_s.h $2, $w2[1]
+; MIPS64R5EB-NEXT:    sw $2, 60($sp)
+; MIPS64R5EB-NEXT:    sw $1, 52($sp)
+; MIPS64R5EB-NEXT:    ld.d $w2, 48($sp)
+; MIPS64R5EB-NEXT:    addv.d $w1, $w1, $w2
+; MIPS64R5EB-NEXT:    sd $8, 136($sp)
+; MIPS64R5EB-NEXT:    lbu $1, 137($sp)
+; MIPS64R5EB-NEXT:    lbu $2, 136($sp)
+; MIPS64R5EB-NEXT:    move.v $w2, $w0
+; MIPS64R5EB-NEXT:    insert.h $w2[0], $2
+; MIPS64R5EB-NEXT:    insert.h $w2[1], $1
+; MIPS64R5EB-NEXT:    lbu $1, 138($sp)
+; MIPS64R5EB-NEXT:    insert.h $w2[2], $1
+; MIPS64R5EB-NEXT:    lbu $1, 139($sp)
+; MIPS64R5EB-NEXT:    insert.h $w2[3], $1
+; MIPS64R5EB-NEXT:    lbu $1, 140($sp)
+; MIPS64R5EB-NEXT:    insert.h $w2[4], $1
+; MIPS64R5EB-NEXT:    lbu $1, 141($sp)
+; MIPS64R5EB-NEXT:    insert.h $w2[5], $1
+; MIPS64R5EB-NEXT:    lbu $1, 143($sp)
+; MIPS64R5EB-NEXT:    lbu $2, 142($sp)
+; MIPS64R5EB-NEXT:    insert.h $w2[6], $2
+; MIPS64R5EB-NEXT:    insert.h $w2[7], $1
+; MIPS64R5EB-NEXT:    copy_s.h $1, $w2[0]
+; MIPS64R5EB-NEXT:    copy_s.h $2, $w2[1]
+; MIPS64R5EB-NEXT:    sd $10, 120($sp)
+; MIPS64R5EB-NEXT:    lbu $3, 121($sp)
+; MIPS64R5EB-NEXT:    lbu $4, 120($sp)
+; MIPS64R5EB-NEXT:    move.v $w2, $w0
+; MIPS64R5EB-NEXT:    insert.h $w2[0], $4
+; MIPS64R5EB-NEXT:    insert.h $w2[1], $3
+; MIPS64R5EB-NEXT:    lbu $3, 122($sp)
+; MIPS64R5EB-NEXT:    insert.h $w2[2], $3
+; MIPS64R5EB-NEXT:    lbu $3, 123($sp)
+; MIPS64R5EB-NEXT:    insert.h $w2[3], $3
+; MIPS64R5EB-NEXT:    lbu $3, 124($sp)
+; MIPS64R5EB-NEXT:    insert.h $w2[4], $3
+; MIPS64R5EB-NEXT:    lbu $3, 125($sp)
+; MIPS64R5EB-NEXT:    insert.h $w2[5], $3
+; MIPS64R5EB-NEXT:    lbu $3, 127($sp)
+; MIPS64R5EB-NEXT:    lbu $4, 126($sp)
+; MIPS64R5EB-NEXT:    insert.h $w2[6], $4
+; MIPS64R5EB-NEXT:    insert.h $w2[7], $3
+; MIPS64R5EB-NEXT:    copy_s.h $3, $w2[0]
+; MIPS64R5EB-NEXT:    copy_s.h $4, $w2[1]
+; MIPS64R5EB-NEXT:    sw $2, 76($sp)
+; MIPS64R5EB-NEXT:    sw $1, 68($sp)
+; MIPS64R5EB-NEXT:    ld.d $w2, 64($sp)
+; MIPS64R5EB-NEXT:    addv.d $w1, $w1, $w2
+; MIPS64R5EB-NEXT:    sd $9, 128($sp)
+; MIPS64R5EB-NEXT:    lbu $1, 128($sp)
+; MIPS64R5EB-NEXT:    insert.h $w0[0], $1
+; MIPS64R5EB-NEXT:    lbu $1, 129($sp)
+; MIPS64R5EB-NEXT:    insert.h $w0[1], $1
+; MIPS64R5EB-NEXT:    lbu $1, 130($sp)
+; MIPS64R5EB-NEXT:    insert.h $w0[2], $1
+; MIPS64R5EB-NEXT:    lbu $1, 131($sp)
+; MIPS64R5EB-NEXT:    insert.h $w0[3], $1
+; MIPS64R5EB-NEXT:    lbu $1, 132($sp)
+; MIPS64R5EB-NEXT:    insert.h $w0[4], $1
+; MIPS64R5EB-NEXT:    lbu $1, 133($sp)
+; MIPS64R5EB-NEXT:    insert.h $w0[5], $1
+; MIPS64R5EB-NEXT:    lbu $1, 135($sp)
+; MIPS64R5EB-NEXT:    lbu $2, 134($sp)
+; MIPS64R5EB-NEXT:    insert.h $w0[6], $2
+; MIPS64R5EB-NEXT:    insert.h $w0[7], $1
+; MIPS64R5EB-NEXT:    copy_s.h $1, $w0[0]
+; MIPS64R5EB-NEXT:    copy_s.h $2, $w0[1]
+; MIPS64R5EB-NEXT:    sw $2, 92($sp)
+; MIPS64R5EB-NEXT:    sw $1, 84($sp)
+; MIPS64R5EB-NEXT:    ld.d $w0, 80($sp)
+; MIPS64R5EB-NEXT:    addv.d $w0, $w1, $w0
+; MIPS64R5EB-NEXT:    sw $4, 108($sp)
+; MIPS64R5EB-NEXT:    sw $3, 100($sp)
+; MIPS64R5EB-NEXT:    ld.d $w1, 96($sp)
+; MIPS64R5EB-NEXT:    addv.d $w0, $w0, $w1
+; MIPS64R5EB-NEXT:    copy_s.d $1, $w0[0]
+; MIPS64R5EB-NEXT:    copy_s.d $2, $w0[1]
+; MIPS64R5EB-NEXT:    sb $2, 117($sp)
+; MIPS64R5EB-NEXT:    sb $1, 116($sp)
+; MIPS64R5EB-NEXT:    lh $2, 116($sp)
+; MIPS64R5EB-NEXT:    daddiu $sp, $sp, 176
+; MIPS64R5EB-NEXT:    jr $ra
+; MIPS64R5EB-NEXT:    nop
+;
+; MIPS32EL-LABEL: i8x2_7:
+; MIPS32EL:       # %bb.0: # %entry
+; MIPS32EL-NEXT:    addu $1, $4, $5
+; MIPS32EL-NEXT:    addu $1, $1, $6
+; MIPS32EL-NEXT:    addu $1, $1, $7
+; MIPS32EL-NEXT:    andi $2, $5, 65280
+; MIPS32EL-NEXT:    lbu $3, 16($sp)
+; MIPS32EL-NEXT:    addu $1, $1, $3
+; MIPS32EL-NEXT:    srl $2, $2, 8
+; MIPS32EL-NEXT:    andi $3, $4, 65280
+; MIPS32EL-NEXT:    srl $3, $3, 8
+; MIPS32EL-NEXT:    addu $2, $3, $2
+; MIPS32EL-NEXT:    andi $3, $6, 65280
+; MIPS32EL-NEXT:    srl $3, $3, 8
+; MIPS32EL-NEXT:    lbu $4, 20($sp)
+; MIPS32EL-NEXT:    addu $2, $2, $3
+; MIPS32EL-NEXT:    addu $1, $1, $4
+; MIPS32EL-NEXT:    lbu $3, 24($sp)
+; MIPS32EL-NEXT:    addu $1, $1, $3
+; MIPS32EL-NEXT:    andi $3, $7, 65280
+; MIPS32EL-NEXT:    srl $3, $3, 8
+; MIPS32EL-NEXT:    lbu $4, 25($sp)
+; MIPS32EL-NEXT:    andi $1, $1, 255
+; MIPS32EL-NEXT:    addu $2, $2, $3
+; MIPS32EL-NEXT:    lbu $3, 17($sp)
+; MIPS32EL-NEXT:    addu $2, $2, $3
+; MIPS32EL-NEXT:    lbu $3, 21($sp)
+; MIPS32EL-NEXT:    addu $2, $2, $3
+; MIPS32EL-NEXT:    addu $2, $2, $4
+; MIPS32EL-NEXT:    sll $2, $2, 8
+; MIPS32EL-NEXT:    or $2, $1, $2
+; MIPS32EL-NEXT:    jr $ra
+; MIPS32EL-NEXT:    nop
+;
+; MIPS64EL-LABEL: i8x2_7:
+; MIPS64EL:       # %bb.0: # %entry
+; MIPS64EL-NEXT:    sll $1, $5, 0
+; MIPS64EL-NEXT:    sll $2, $4, 0
+; MIPS64EL-NEXT:    addu $3, $2, $1
+; MIPS64EL-NEXT:    sll $4, $6, 0
+; MIPS64EL-NEXT:    andi $1, $1, 65280
+; MIPS64EL-NEXT:    srl $1, $1, 8
+; MIPS64EL-NEXT:    andi $2, $2, 65280
+; MIPS64EL-NEXT:    srl $2, $2, 8
+; MIPS64EL-NEXT:    addu $1, $2, $1
+; MIPS64EL-NEXT:    addu $2, $3, $4
+; MIPS64EL-NEXT:    sll $3, $7, 0
+; MIPS64EL-NEXT:    andi $5, $3, 65280
+; MIPS64EL-NEXT:    andi $4, $4, 65280
+; MIPS64EL-NEXT:    srl $4, $4, 8
+; MIPS64EL-NEXT:    addu $2, $2, $3
+; MIPS64EL-NEXT:    addu $1, $1, $4
+; MIPS64EL-NEXT:    srl $3, $5, 8
+; MIPS64EL-NEXT:    sll $4, $8, 0
+; MIPS64EL-NEXT:    andi $5, $4, 65280
+; MIPS64EL-NEXT:    srl $5, $5, 8
+; MIPS64EL-NEXT:    addu $1, $1, $3
+; MIPS64EL-NEXT:    addu $2, $2, $4
+; MIPS64EL-NEXT:    sll $3, $9, 0
+; MIPS64EL-NEXT:    addu $2, $2, $3
+; MIPS64EL-NEXT:    sll $4, $10, 0
+; MIPS64EL-NEXT:    addu $2, $2, $4
+; MIPS64EL-NEXT:    andi $2, $2, 255
+; MIPS64EL-NEXT:    addu $1, $1, $5
+; MIPS64EL-NEXT:    andi $3, $3, 65280
+; MIPS64EL-NEXT:    srl $3, $3, 8
+; MIPS64EL-NEXT:    addu $1, $1, $3
+; MIPS64EL-NEXT:    andi $3, $4, 65280
+; MIPS64EL-NEXT:    srl $3, $3, 8
+; MIPS64EL-NEXT:    addu $1, $1, $3
+; MIPS64EL-NEXT:    sll $1, $1, 8
+; MIPS64EL-NEXT:    or $2, $2, $1
+; MIPS64EL-NEXT:    jr $ra
+; MIPS64EL-NEXT:    nop
+;
+; MIPS32R5EL-LABEL: i8x2_7:
+; MIPS32R5EL:       # %bb.0: # %entry
+; MIPS32R5EL-NEXT:    addiu $sp, $sp, -24
+; MIPS32R5EL-NEXT:    .cfi_def_cfa_offset 24
+; MIPS32R5EL-NEXT:    sw $5, 16($sp)
+; MIPS32R5EL-NEXT:    ldi.b $w0, 0
+; MIPS32R5EL-NEXT:    sw $4, 20($sp)
+; MIPS32R5EL-NEXT:    lbu $1, 17($sp)
+; MIPS32R5EL-NEXT:    lbu $2, 16($sp)
+; MIPS32R5EL-NEXT:    move.v $w1, $w0
+; MIPS32R5EL-NEXT:    insert.w $w1[0], $2
+; MIPS32R5EL-NEXT:    insert.w $w1[1], $1
+; MIPS32R5EL-NEXT:    lbu $1, 18($sp)
+; MIPS32R5EL-NEXT:    insert.w $w1[2], $1
+; MIPS32R5EL-NEXT:    lbu $1, 19($sp)
+; MIPS32R5EL-NEXT:    insert.w $w1[3], $1
+; MIPS32R5EL-NEXT:    ilvr.w $w1, $w1, $w1
+; MIPS32R5EL-NEXT:    lbu $1, 21($sp)
+; MIPS32R5EL-NEXT:    lbu $2, 20($sp)
+; MIPS32R5EL-NEXT:    move.v $w2, $w0
+; MIPS32R5EL-NEXT:    insert.w $w2[0], $2
+; MIPS32R5EL-NEXT:    insert.w $w2[1], $1
+; MIPS32R5EL-NEXT:    lbu $1, 22($sp)
+; MIPS32R5EL-NEXT:    insert.w $w2[2], $1
+; MIPS32R5EL-NEXT:    lbu $1, 23($sp)
+; MIPS32R5EL-NEXT:    insert.w $w2[3], $1
+; MIPS32R5EL-NEXT:    ilvr.w $w2, $w2, $w2
+; MIPS32R5EL-NEXT:    addv.d $w1, $w2, $w1
+; MIPS32R5EL-NEXT:    sw $6, 12($sp)
+; MIPS32R5EL-NEXT:    lbu $1, 13($sp)
+; MIPS32R5EL-NEXT:    lbu $2, 12($sp)
+; MIPS32R5EL-NEXT:    move.v $w2, $w0
+; MIPS32R5EL-NEXT:    insert.w $w2[0], $2
+; MIPS32R5EL-NEXT:    insert.w $w2[1], $1
+; MIPS32R5EL-NEXT:    lbu $1, 14($sp)
+; MIPS32R5EL-NEXT:    insert.w $w2[2], $1
+; MIPS32R5EL-NEXT:    lbu $1, 15($sp)
+; MIPS32R5EL-NEXT:    insert.w $w2[3], $1
+; MIPS32R5EL-NEXT:    ilvr.w $w2, $w2, $w2
+; MIPS32R5EL-NEXT:    addv.d $w1, $w1, $w2
+; MIPS32R5EL-NEXT:    sw $7, 8($sp)
+; MIPS32R5EL-NEXT:    lbu $1, 9($sp)
+; MIPS32R5EL-NEXT:    lbu $2, 8($sp)
+; MIPS32R5EL-NEXT:    move.v $w2, $w0
+; MIPS32R5EL-NEXT:    insert.w $w2[0], $2
+; MIPS32R5EL-NEXT:    insert.w $w2[1], $1
+; MIPS32R5EL-NEXT:    lbu $1, 10($sp)
+; MIPS32R5EL-NEXT:    insert.w $w2[2], $1
+; MIPS32R5EL-NEXT:    lbu $1, 11($sp)
+; MIPS32R5EL-NEXT:    insert.w $w2[3], $1
+; MIPS32R5EL-NEXT:    ilvr.w $w2, $w2, $w2
+; MIPS32R5EL-NEXT:    addv.d $w1, $w1, $w2
+; MIPS32R5EL-NEXT:    lbu $1, 41($sp)
+; MIPS32R5EL-NEXT:    lbu $2, 40($sp)
+; MIPS32R5EL-NEXT:    move.v $w2, $w0
+; MIPS32R5EL-NEXT:    insert.w $w2[0], $2
+; MIPS32R5EL-NEXT:    insert.w $w2[1], $1
+; MIPS32R5EL-NEXT:    lbu $1, 42($sp)
+; MIPS32R5EL-NEXT:    insert.w $w2[2], $1
+; MIPS32R5EL-NEXT:    lbu $1, 43($sp)
+; MIPS32R5EL-NEXT:    insert.w $w2[3], $1
+; MIPS32R5EL-NEXT:    ilvr.w $w2, $w2, $w2
+; MIPS32R5EL-NEXT:    addv.d $w1, $w1, $w2
+; MIPS32R5EL-NEXT:    lbu $1, 45($sp)
+; MIPS32R5EL-NEXT:    lbu $2, 44($sp)
+; MIPS32R5EL-NEXT:    move.v $w2, $w0
+; MIPS32R5EL-NEXT:    insert.w $w2[0], $2
+; MIPS32R5EL-NEXT:    insert.w $w2[1], $1
+; MIPS32R5EL-NEXT:    lbu $1, 46($sp)
+; MIPS32R5EL-NEXT:    insert.w $w2[2], $1
+; MIPS32R5EL-NEXT:    lbu $1, 47($sp)
+; MIPS32R5EL-NEXT:    insert.w $w2[3], $1
+; MIPS32R5EL-NEXT:    ilvr.w $w2, $w2, $w2
+; MIPS32R5EL-NEXT:    addv.d $w1, $w1, $w2
+; MIPS32R5EL-NEXT:    lbu $1, 48($sp)
+; MIPS32R5EL-NEXT:    insert.w $w0[0], $1
+; MIPS32R5EL-NEXT:    lbu $1, 49($sp)
+; MIPS32R5EL-NEXT:    insert.w $w0[1], $1
+; MIPS32R5EL-NEXT:    lbu $1, 50($sp)
+; MIPS32R5EL-NEXT:    insert.w $w0[2], $1
+; MIPS32R5EL-NEXT:    lbu $1, 51($sp)
+; MIPS32R5EL-NEXT:    insert.w $w0[3], $1
+; MIPS32R5EL-NEXT:    ilvr.w $w0, $w0, $w0
+; MIPS32R5EL-NEXT:    addv.d $w0, $w1, $w0
+; MIPS32R5EL-NEXT:    copy_s.w $1, $w0[0]
+; MIPS32R5EL-NEXT:    copy_s.w $2, $w0[2]
+; MIPS32R5EL-NEXT:    sb $2, 5($sp)
+; MIPS32R5EL-NEXT:    sb $1, 4($sp)
+; MIPS32R5EL-NEXT:    lhu $2, 4($sp)
+; MIPS32R5EL-NEXT:    addiu $sp, $sp, 24
+; MIPS32R5EL-NEXT:    jr $ra
+; MIPS32R5EL-NEXT:    nop
+;
+; MIPS64R5EL-LABEL: i8x2_7:
+; MIPS64R5EL:       # %bb.0: # %entry
+; MIPS64R5EL-NEXT:    daddiu $sp, $sp, -176
+; MIPS64R5EL-NEXT:    .cfi_def_cfa_offset 176
+; MIPS64R5EL-NEXT:    sd $4, 168($sp)
+; MIPS64R5EL-NEXT:    ldi.b $w0, 0
+; MIPS64R5EL-NEXT:    lbu $1, 169($sp)
+; MIPS64R5EL-NEXT:    lbu $2, 168($sp)
+; MIPS64R5EL-NEXT:    move.v $w1, $w0
+; MIPS64R5EL-NEXT:    insert.h $w1[0], $2
+; MIPS64R5EL-NEXT:    insert.h $w1[1], $1
+; MIPS64R5EL-NEXT:    lbu $1, 170($sp)
+; MIPS64R5EL-NEXT:    insert.h $w1[2], $1
+; MIPS64R5EL-NEXT:    lbu $1, 171($sp)
+; MIPS64R5EL-NEXT:    insert.h $w1[3], $1
+; MIPS64R5EL-NEXT:    lbu $1, 172($sp)
+; MIPS64R5EL-NEXT:    insert.h $w1[4], $1
+; MIPS64R5EL-NEXT:    lbu $1, 173($sp)
+; MIPS64R5EL-NEXT:    insert.h $w1[5], $1
+; MIPS64R5EL-NEXT:    lbu $1, 175($sp)
+; MIPS64R5EL-NEXT:    lbu $2, 174($sp)
+; MIPS64R5EL-NEXT:    insert.h $w1[6], $2
+; MIPS64R5EL-NEXT:    insert.h $w1[7], $1
+; MIPS64R5EL-NEXT:    copy_s.h $1, $w1[0]
+; MIPS64R5EL-NEXT:    copy_s.h $2, $w1[1]
+; MIPS64R5EL-NEXT:    sd $5, 160($sp)
+; MIPS64R5EL-NEXT:    lbu $3, 161($sp)
+; MIPS64R5EL-NEXT:    lbu $4, 160($sp)
+; MIPS64R5EL-NEXT:    move.v $w1, $w0
+; MIPS64R5EL-NEXT:    insert.h $w1[0], $4
+; MIPS64R5EL-NEXT:    insert.h $w1[1], $3
+; MIPS64R5EL-NEXT:    lbu $3, 162($sp)
+; MIPS64R5EL-NEXT:    insert.h $w1[2], $3
+; MIPS64R5EL-NEXT:    lbu $3, 163($sp)
+; MIPS64R5EL-NEXT:    insert.h $w1[3], $3
+; MIPS64R5EL-NEXT:    lbu $3, 164($sp)
+; MIPS64R5EL-NEXT:    insert.h $w1[4], $3
+; MIPS64R5EL-NEXT:    lbu $3, 165($sp)
+; MIPS64R5EL-NEXT:    insert.h $w1[5], $3
+; MIPS64R5EL-NEXT:    lbu $3, 167($sp)
+; MIPS64R5EL-NEXT:    lbu $4, 166($sp)
+; MIPS64R5EL-NEXT:    insert.h $w1[6], $4
+; MIPS64R5EL-NEXT:    insert.h $w1[7], $3
+; MIPS64R5EL-NEXT:    copy_s.h $3, $w1[0]
+; MIPS64R5EL-NEXT:    copy_s.h $4, $w1[1]
+; MIPS64R5EL-NEXT:    sw $4, 24($sp)
+; MIPS64R5EL-NEXT:    sw $3, 16($sp)
+; MIPS64R5EL-NEXT:    sw $2, 8($sp)
+; MIPS64R5EL-NEXT:    sw $1, 0($sp)
+; MIPS64R5EL-NEXT:    ld.d $w1, 16($sp)
+; MIPS64R5EL-NEXT:    ld.d $w2, 0($sp)
+; MIPS64R5EL-NEXT:    addv.d $w1, $w2, $w1
+; MIPS64R5EL-NEXT:    sd $6, 152($sp)
+; MIPS64R5EL-NEXT:    lbu $1, 153($sp)
+; MIPS64R5EL-NEXT:    lbu $2, 152($sp)
+; MIPS64R5EL-NEXT:    move.v $w2, $w0
+; MIPS64R5EL-NEXT:    insert.h $w2[0], $2
+; MIPS64R5EL-NEXT:    insert.h $w2[1], $1
+; MIPS64R5EL-NEXT:    lbu $1, 154($sp)
+; MIPS64R5EL-NEXT:    insert.h $w2[2], $1
+; MIPS64R5EL-NEXT:    lbu $1, 155($sp)
+; MIPS64R5EL-NEXT:    insert.h $w2[3], $1
+; MIPS64R5EL-NEXT:    lbu $1, 156($sp)
+; MIPS64R5EL-NEXT:    insert.h $w2[4], $1
+; MIPS64R5EL-NEXT:    lbu $1, 157($sp)
+; MIPS64R5EL-NEXT:    insert.h $w2[5], $1
+; MIPS64R5EL-NEXT:    lbu $1, 159($sp)
+; MIPS64R5EL-NEXT:    lbu $2, 158($sp)
+; MIPS64R5EL-NEXT:    insert.h $w2[6], $2
+; MIPS64R5EL-NEXT:    insert.h $w2[7], $1
+; MIPS64R5EL-NEXT:    copy_s.h $1, $w2[0]
+; MIPS64R5EL-NEXT:    copy_s.h $2, $w2[1]
+; MIPS64R5EL-NEXT:    sw $2, 40($sp)
+; MIPS64R5EL-NEXT:    sw $1, 32($sp)
+; MIPS64R5EL-NEXT:    ld.d $w2, 32($sp)
+; MIPS64R5EL-NEXT:    addv.d $w1, $w1, $w2
+; MIPS64R5EL-NEXT:    sd $7, 144($sp)
+; MIPS64R5EL-NEXT:    lbu $1, 145($sp)
+; MIPS64R5EL-NEXT:    lbu $2, 144($sp)
+; MIPS64R5EL-NEXT:    move.v $w2, $w0
+; MIPS64R5EL-NEXT:    insert.h $w2[0], $2
+; MIPS64R5EL-NEXT:    insert.h $w2[1], $1
+; MIPS64R5EL-NEXT:    lbu $1, 146($sp)
+; MIPS64R5EL-NEXT:    insert.h $w2[2], $1
+; MIPS64R5EL-NEXT:    lbu $1, 147($sp)
+; MIPS64R5EL-NEXT:    insert.h $w2[3], $1
+; MIPS64R5EL-NEXT:    lbu $1, 148($sp)
+; MIPS64R5EL-NEXT:    insert.h $w2[4], $1
+; MIPS64R5EL-NEXT:    lbu $1, 149($sp)
+; MIPS64R5EL-NEXT:    insert.h $w2[5], $1
+; MIPS64R5EL-NEXT:    lbu $1, 151($sp)
+; MIPS64R5EL-NEXT:    lbu $2, 150($sp)
+; MIPS64R5EL-NEXT:    insert.h $w2[6], $2
+; MIPS64R5EL-NEXT:    insert.h $w2[7], $1
+; MIPS64R5EL-NEXT:    copy_s.h $1, $w2[0]
+; MIPS64R5EL-NEXT:    copy_s.h $2, $w2[1]
+; MIPS64R5EL-NEXT:    sw $2, 56($sp)
+; MIPS64R5EL-NEXT:    sw $1, 48($sp)
+; MIPS64R5EL-NEXT:    ld.d $w2, 48($sp)
+; MIPS64R5EL-NEXT:    addv.d $w1, $w1, $w2
+; MIPS64R5EL-NEXT:    sd $8, 136($sp)
+; MIPS64R5EL-NEXT:    lbu $1, 137($sp)
+; MIPS64R5EL-NEXT:    lbu $2, 136($sp)
+; MIPS64R5EL-NEXT:    move.v $w2, $w0
+; MIPS64R5EL-NEXT:    insert.h $w2[0], $2
+; MIPS64R5EL-NEXT:    insert.h $w2[1], $1
+; MIPS64R5EL-NEXT:    lbu $1, 138($sp)
+; MIPS64R5EL-NEXT:    insert.h $w2[2], $1
+; MIPS64R5EL-NEXT:    lbu $1, 139($sp)
+; MIPS64R5EL-NEXT:    insert.h $w2[3], $1
+; MIPS64R5EL-NEXT:    lbu $1, 140($sp)
+; MIPS64R5EL-NEXT:    insert.h $w2[4], $1
+; MIPS64R5EL-NEXT:    lbu $1, 141($sp)
+; MIPS64R5EL-NEXT:    insert.h $w2[5], $1
+; MIPS64R5EL-NEXT:    lbu $1, 143($sp)
+; MIPS64R5EL-NEXT:    lbu $2, 142($sp)
+; MIPS64R5EL-NEXT:    insert.h $w2[6], $2
+; MIPS64R5EL-NEXT:    insert.h $w2[7], $1
+; MIPS64R5EL-NEXT:    copy_s.h $1, $w2[0]
+; MIPS64R5EL-NEXT:    copy_s.h $2, $w2[1]
+; MIPS64R5EL-NEXT:    sd $10, 120($sp)
+; MIPS64R5EL-NEXT:    lbu $3, 121($sp)
+; MIPS64R5EL-NEXT:    lbu $4, 120($sp)
+; MIPS64R5EL-NEXT:    move.v $w2, $w0
+; MIPS64R5EL-NEXT:    insert.h $w2[0], $4
+; MIPS64R5EL-NEXT:    insert.h $w2[1], $3
+; MIPS64R5EL-NEXT:    lbu $3, 122($sp)
+; MIPS64R5EL-NEXT:    insert.h $w2[2], $3
+; MIPS64R5EL-NEXT:    lbu $3, 123($sp)
+; MIPS64R5EL-NEXT:    insert.h $w2[3], $3
+; MIPS64R5EL-NEXT:    lbu $3, 124($sp)
+; MIPS64R5EL-NEXT:    insert.h $w2[4], $3
+; MIPS64R5EL-NEXT:    lbu $3, 125($sp)
+; MIPS64R5EL-NEXT:    insert.h $w2[5], $3
+; MIPS64R5EL-NEXT:    lbu $3, 127($sp)
+; MIPS64R5EL-NEXT:    lbu $4, 126($sp)
+; MIPS64R5EL-NEXT:    insert.h $w2[6], $4
+; MIPS64R5EL-NEXT:    insert.h $w2[7], $3
+; MIPS64R5EL-NEXT:    copy_s.h $3, $w2[0]
+; MIPS64R5EL-NEXT:    copy_s.h $4, $w2[1]
+; MIPS64R5EL-NEXT:    sw $2, 72($sp)
+; MIPS64R5EL-NEXT:    sw $1, 64($sp)
+; MIPS64R5EL-NEXT:    ld.d $w2, 64($sp)
+; MIPS64R5EL-NEXT:    addv.d $w1, $w1, $w2
+; MIPS64R5EL-NEXT:    sd $9, 128($sp)
+; MIPS64R5EL-NEXT:    lbu $1, 128($sp)
+; MIPS64R5EL-NEXT:    insert.h $w0[0], $1
+; MIPS64R5EL-NEXT:    lbu $1, 129($sp)
+; MIPS64R5EL-NEXT:    insert.h $w0[1], $1
+; MIPS64R5EL-NEXT:    lbu $1, 130($sp)
+; MIPS64R5EL-NEXT:    insert.h $w0[2], $1
+; MIPS64R5EL-NEXT:    lbu $1, 131($sp)
+; MIPS64R5EL-NEXT:    insert.h $w0[3], $1
+; MIPS64R5EL-NEXT:    lbu $1, 132($sp)
+; MIPS64R5EL-NEXT:    insert.h $w0[4], $1
+; MIPS64R5EL-NEXT:    lbu $1, 133($sp)
+; MIPS64R5EL-NEXT:    insert.h $w0[5], $1
+; MIPS64R5EL-NEXT:    lbu $1, 135($sp)
+; MIPS64R5EL-NEXT:    lbu $2, 134($sp)
+; MIPS64R5EL-NEXT:    insert.h $w0[6], $2
+; MIPS64R5EL-NEXT:    insert.h $w0[7], $1
+; MIPS64R5EL-NEXT:    copy_s.h $1, $w0[0]
+; MIPS64R5EL-NEXT:    copy_s.h $2, $w0[1]
+; MIPS64R5EL-NEXT:    sw $2, 88($sp)
+; MIPS64R5EL-NEXT:    sw $1, 80($sp)
+; MIPS64R5EL-NEXT:    ld.d $w0, 80($sp)
+; MIPS64R5EL-NEXT:    addv.d $w0, $w1, $w0
+; MIPS64R5EL-NEXT:    sw $4, 104($sp)
+; MIPS64R5EL-NEXT:    sw $3, 96($sp)
+; MIPS64R5EL-NEXT:    ld.d $w1, 96($sp)
+; MIPS64R5EL-NEXT:    addv.d $w0, $w0, $w1
+; MIPS64R5EL-NEXT:    copy_s.d $1, $w0[0]
+; MIPS64R5EL-NEXT:    copy_s.d $2, $w0[1]
+; MIPS64R5EL-NEXT:    sb $2, 117($sp)
+; MIPS64R5EL-NEXT:    sb $1, 116($sp)
+; MIPS64R5EL-NEXT:    lh $2, 116($sp)
+; MIPS64R5EL-NEXT:    daddiu $sp, $sp, 176
+; MIPS64R5EL-NEXT:    jr $ra
+; MIPS64R5EL-NEXT:    nop
 entry:
-
-; MIPS32EB-DAG: srl ${{[0-9]+}}, $4, 24
-; MIPS32EB-DAG: srl ${{[0-9]+}}, $5, 24
-; MIPS32EB-DAG: srl ${{[0-9]+}}, $6, 24
-; MIPS32EB-DAG: srl ${{[0-9]+}}, $7, 24
-
-; MIPS32EL-DAG: andi ${{[0-9]+}}, $4, 65280
-; MIPS32EL-DAG: andi ${{[0-9]+}}, $5, 65280
-; MIPS32EL-DAG: andi ${{[0-9]+}}, $6, 65280
-; MIPS32EL-DAG: andi ${{[0-9]+}}, $7, 65280
-
-; MIPS32-DAG: lbu ${{[0-9]+}}, 16($sp)
-; MIPS32-DAG; lbu ${{[0-9]+}}, 17($sp)
-; MIPS32-DAG: lbu ${{[0-9]+}}, 20($sp)
-; MIPS32-DAG: lbu ${{[0-9]+}}, 21($sp)
-; MIPS32-DAG: lbu ${{[0-9]+}}, 24($sp)
-; MIPS32-DAG: lbu ${{[0-9]+}}, 25($sp)
-
-; MIPS32R5-DAG: sw $4, {{[0-9]+}}($sp)
-; MIPS32R5-DAG: sw $5, {{[0-9]+}}($sp)
-; MIPS32R5-DAG: sw $6, {{[0-9]+}}($sp)
-; MIPS32R5-DAG: sw $7, {{[0-9]+}}($sp)
-
-; MIPS32R5-DAG: lbu ${{[0-9]+}}, 40($sp)
-; MIPS32R5-DAG: lbu ${{[0-9]+}}, 41($sp)
-; MIPS32R5-DAG: lbu ${{[0-9]+}}, 42($sp)
-; MIPS32R5-DAG: lbu ${{[0-9]+}}, 43($sp)
-; MIPS32R5-DAG: lbu ${{[0-9]+}}, 44($sp)
-; MIPS32R5-DAG: lbu ${{[0-9]+}}, 45($sp)
-; MIPS32R5-DAG: lbu ${{[0-9]+}}, 46($sp)
-; MIPS32R5-DAG: lbu ${{[0-9]+}}, 47($sp)
-; MIPS32R5-DAG: lbu ${{[0-9]+}}, 48($sp)
-; MIPS32R5-DAG: lbu ${{[0-9]+}}, 49($sp)
-; MIPS32R5-DAG: lbu ${{[0-9]+}}, 50($sp)
-; MIPS32R5-DAG: lbu ${{[0-9]+}}, 51($sp)
-
-; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $4, 48
-; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $5, 48
-; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $6, 48
-; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $7, 48
-; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $8, 48
-; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $9, 48
-; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $10, 48
-
-; MIPS64R5-DAG: sd $4, {{[0-9]+}}($sp)
-; MIPS64R5-DAG: sd $5, {{[0-9]+}}($sp)
-; MIPS64R5-DAG: sd $6, {{[0-9]+}}($sp)
-; MIPS64R5-DAG: sd $7, {{[0-9]+}}($sp)
-; MIPS64R5-DAG: sd $8, {{[0-9]+}}($sp)
-; MIPS64R5-DAG: sd $9, {{[0-9]+}}($sp)
-; MIPS64R5-DAG: sd $10, {{[0-9]+}}($sp)
-
   %0 = add <2 x i8> %a, %b
   %1 = add <2 x i8> %0, %c
   %2 = add <2 x i8> %1, %d
@@ -111,290 +1001,1567 @@ entry:
 }
 
 define <4 x i8> @i8_4(<4 x i8> %a, <4 x i8> %b) {
-; ALL-LABEL: i8_4:
-; MIPS32-DAG: srl ${{[0-9]+}}, $5, 24
-; MIPS32-DAG: srl ${{[0-9]+}}, $4, 24
-; MIPS32-DAG: srl ${{[0-9]+}}, $5, 16
-; MIPS32-DAG: srl ${{[0-9]+}}, $4, 16
-; MIPS32-DAG: srl ${{[0-9]+}}, $5, 8
-; MIPS32-DAG: srl ${{[0-9]+}}, $4, 8
-
-; MIPS32R5-DAG: sw $4
-; MIPS32R5-DAG: sw $5
-
-; MIPS64-DAG: sll ${{[0-9]+}}, $4, 0
-; MIPS64-DAG: sll ${{[0-9]+}}, $5, 0
-
-; MIPS64R5-DAG: sll ${{[0-9]+}}, $4, 0
-; MIPS64R5-DAG: sll ${{[0-9]+}}, $5, 0
-
+; MIPS32-LABEL: i8_4:
+; MIPS32:       # %bb.0:
+; MIPS32-NEXT:    srl $1, $5, 24
+; MIPS32-NEXT:    srl $2, $4, 24
+; MIPS32-NEXT:    addu $1, $2, $1
+; MIPS32-NEXT:    sll $1, $1, 8
+; MIPS32-NEXT:    srl $2, $5, 16
+; MIPS32-NEXT:    srl $3, $4, 16
+; MIPS32-NEXT:    addu $2, $3, $2
+; MIPS32-NEXT:    andi $2, $2, 255
+; MIPS32-NEXT:    or $1, $2, $1
+; MIPS32-NEXT:    addu $2, $4, $5
+; MIPS32-NEXT:    sll $1, $1, 16
+; MIPS32-NEXT:    andi $2, $2, 255
+; MIPS32-NEXT:    srl $3, $5, 8
+; MIPS32-NEXT:    srl $4, $4, 8
+; MIPS32-NEXT:    addu $3, $4, $3
+; MIPS32-NEXT:    sll $3, $3, 8
+; MIPS32-NEXT:    or $2, $2, $3
+; MIPS32-NEXT:    andi $2, $2, 65535
+; MIPS32-NEXT:    or $2, $2, $1
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
+;
+; MIPS64-LABEL: i8_4:
+; MIPS64:       # %bb.0:
+; MIPS64-NEXT:    sll $1, $5, 0
+; MIPS64-NEXT:    srl $2, $1, 24
+; MIPS64-NEXT:    sll $3, $4, 0
+; MIPS64-NEXT:    srl $4, $3, 24
+; MIPS64-NEXT:    addu $2, $4, $2
+; MIPS64-NEXT:    sll $2, $2, 8
+; MIPS64-NEXT:    srl $4, $1, 16
+; MIPS64-NEXT:    srl $5, $3, 16
+; MIPS64-NEXT:    addu $4, $5, $4
+; MIPS64-NEXT:    andi $4, $4, 255
+; MIPS64-NEXT:    or $2, $4, $2
+; MIPS64-NEXT:    addu $4, $3, $1
+; MIPS64-NEXT:    sll $2, $2, 16
+; MIPS64-NEXT:    andi $4, $4, 255
+; MIPS64-NEXT:    srl $1, $1, 8
+; MIPS64-NEXT:    srl $3, $3, 8
+; MIPS64-NEXT:    addu $1, $3, $1
+; MIPS64-NEXT:    sll $1, $1, 8
+; MIPS64-NEXT:    or $1, $4, $1
+; MIPS64-NEXT:    andi $1, $1, 65535
+; MIPS64-NEXT:    or $2, $1, $2
+; MIPS64-NEXT:    jr $ra
+; MIPS64-NEXT:    nop
+;
+; MIPS32R5-LABEL: i8_4:
+; MIPS32R5:       # %bb.0:
+; MIPS32R5-NEXT:    addiu $sp, $sp, -16
+; MIPS32R5-NEXT:    .cfi_def_cfa_offset 16
+; MIPS32R5-NEXT:    sw $5, 8($sp)
+; MIPS32R5-NEXT:    sw $4, 12($sp)
+; MIPS32R5-NEXT:    ldi.b $w0, 0
+; MIPS32R5-NEXT:    lbu $1, 9($sp)
+; MIPS32R5-NEXT:    lbu $2, 8($sp)
+; MIPS32R5-NEXT:    move.v $w1, $w0
+; MIPS32R5-NEXT:    insert.w $w1[0], $2
+; MIPS32R5-NEXT:    insert.w $w1[1], $1
+; MIPS32R5-NEXT:    lbu $1, 10($sp)
+; MIPS32R5-NEXT:    insert.w $w1[2], $1
+; MIPS32R5-NEXT:    lbu $1, 12($sp)
+; MIPS32R5-NEXT:    lbu $2, 11($sp)
+; MIPS32R5-NEXT:    insert.w $w1[3], $2
+; MIPS32R5-NEXT:    insert.w $w0[0], $1
+; MIPS32R5-NEXT:    lbu $1, 13($sp)
+; MIPS32R5-NEXT:    insert.w $w0[1], $1
+; MIPS32R5-NEXT:    lbu $1, 14($sp)
+; MIPS32R5-NEXT:    insert.w $w0[2], $1
+; MIPS32R5-NEXT:    lbu $1, 15($sp)
+; MIPS32R5-NEXT:    insert.w $w0[3], $1
+; MIPS32R5-NEXT:    addv.w $w0, $w0, $w1
+; MIPS32R5-NEXT:    copy_s.w $1, $w0[0]
+; MIPS32R5-NEXT:    copy_s.w $2, $w0[1]
+; MIPS32R5-NEXT:    copy_s.w $3, $w0[2]
+; MIPS32R5-NEXT:    copy_s.w $4, $w0[3]
+; MIPS32R5-NEXT:    sb $4, 7($sp)
+; MIPS32R5-NEXT:    sb $3, 6($sp)
+; MIPS32R5-NEXT:    sb $2, 5($sp)
+; MIPS32R5-NEXT:    sb $1, 4($sp)
+; MIPS32R5-NEXT:    lw $2, 4($sp)
+; MIPS32R5-NEXT:    addiu $sp, $sp, 16
+; MIPS32R5-NEXT:    jr $ra
+; MIPS32R5-NEXT:    nop
+;
+; MIPS64R5-LABEL: i8_4:
+; MIPS64R5:       # %bb.0:
+; MIPS64R5-NEXT:    daddiu $sp, $sp, -16
+; MIPS64R5-NEXT:    .cfi_def_cfa_offset 16
+; MIPS64R5-NEXT:    sll $1, $5, 0
+; MIPS64R5-NEXT:    sw $1, 8($sp)
+; MIPS64R5-NEXT:    sll $1, $4, 0
+; MIPS64R5-NEXT:    sw $1, 12($sp)
+; MIPS64R5-NEXT:    ldi.b $w0, 0
+; MIPS64R5-NEXT:    lbu $1, 9($sp)
+; MIPS64R5-NEXT:    lbu $2, 8($sp)
+; MIPS64R5-NEXT:    move.v $w1, $w0
+; MIPS64R5-NEXT:    insert.w $w1[0], $2
+; MIPS64R5-NEXT:    insert.w $w1[1], $1
+; MIPS64R5-NEXT:    lbu $1, 10($sp)
+; MIPS64R5-NEXT:    insert.w $w1[2], $1
+; MIPS64R5-NEXT:    lbu $1, 12($sp)
+; MIPS64R5-NEXT:    lbu $2, 11($sp)
+; MIPS64R5-NEXT:    insert.w $w1[3], $2
+; MIPS64R5-NEXT:    insert.w $w0[0], $1
+; MIPS64R5-NEXT:    lbu $1, 13($sp)
+; MIPS64R5-NEXT:    insert.w $w0[1], $1
+; MIPS64R5-NEXT:    lbu $1, 14($sp)
+; MIPS64R5-NEXT:    insert.w $w0[2], $1
+; MIPS64R5-NEXT:    lbu $1, 15($sp)
+; MIPS64R5-NEXT:    insert.w $w0[3], $1
+; MIPS64R5-NEXT:    addv.w $w0, $w0, $w1
+; MIPS64R5-NEXT:    copy_s.w $1, $w0[0]
+; MIPS64R5-NEXT:    copy_s.w $2, $w0[1]
+; MIPS64R5-NEXT:    copy_s.w $3, $w0[2]
+; MIPS64R5-NEXT:    copy_s.w $4, $w0[3]
+; MIPS64R5-NEXT:    sb $4, 7($sp)
+; MIPS64R5-NEXT:    sb $3, 6($sp)
+; MIPS64R5-NEXT:    sb $2, 5($sp)
+; MIPS64R5-NEXT:    sb $1, 4($sp)
+; MIPS64R5-NEXT:    lw $2, 4($sp)
+; MIPS64R5-NEXT:    daddiu $sp, $sp, 16
+; MIPS64R5-NEXT:    jr $ra
+; MIPS64R5-NEXT:    nop
   %1 = add <4 x i8> %a, %b
   ret <4 x i8> %1
 }
 
 define <8 x i8> @i8_8(<8 x i8> %a, <8 x i8> %b) {
-; ALL-LABEL: i8_8:
-; MIPS32-NOT: lw
-; MIPS32-DAG: srl ${{[0-9]+}}, $7, 24
-; MIPS32-DAG: srl ${{[0-9]+}}, $6, 24
-; MIPS32-DAG: srl ${{[0-9]+}}, $7, 16
-; MIPS32-DAG: srl ${{[0-9]+}}, $6, 16
-; MIPS32-DAG: srl ${{[0-9]+}}, $7, 8
-; MIPS32-DAG: srl ${{[0-9]+}}, $6, 8
-; MIPS32-DAG: srl ${{[0-9]+}}, $5, 24
-; MIPS32-DAG: srl ${{[0-9]+}}, $4, 24
-; MIPS32-DAG: srl ${{[0-9]+}}, $5, 16
-; MIPS32-DAG: srl ${{[0-9]+}}, $4, 16
-; MIPS32-DAG: srl ${{[0-9]+}}, $5, 8
-; MIPS32-DAG: srl ${{[0-9]+}}, $4, 8
-
-; MIPS32R5-DAG: sw $4
-; MIPS32R5-DAG: sw $5
-; MIPS32R5-DAG: sw $6
-; MIPS32R5-DAG: sw $7
-
-; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 56
-; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 56
-; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 48
-; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 48
-; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 40
-; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 40
-; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 32
-; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 32
-; MIPS64-DAG: sll $[[R0:[0-9]+]], $4, 0
-; MIPS64-DAG: sll $[[R1:[0-9]+]], $5, 0
-; MIPS64-DAG: srl ${{[0-9]+}}, $[[R1]], 24
-; MIPS64-DAG: srl ${{[0-9]+}}, $[[R0]], 24
-; MIPS64-DAG: srl ${{[0-9]+}}, $[[R1]], 16
-; MIPS64-DAG: srl ${{[0-9]+}}, $[[R0]], 16
-; MIPS64-DAG: srl ${{[0-9]+}}, $[[R1]], 8
-; MIPS64-DAG: srl ${{[0-9]+}}, $[[R0]], 8
-
-; MIPS64R5-DAG: sd $4
-; MIPS64R5-DAG: sd $5
-
+; MIPS32-LABEL: i8_8:
+; MIPS32:       # %bb.0:
+; MIPS32-NEXT:    srl $1, $6, 24
+; MIPS32-NEXT:    srl $2, $4, 24
+; MIPS32-NEXT:    addu $1, $2, $1
+; MIPS32-NEXT:    sll $1, $1, 8
+; MIPS32-NEXT:    srl $2, $6, 16
+; MIPS32-NEXT:    srl $3, $4, 16
+; MIPS32-NEXT:    addu $2, $3, $2
+; MIPS32-NEXT:    andi $2, $2, 255
+; MIPS32-NEXT:    srl $3, $7, 24
+; MIPS32-NEXT:    srl $8, $5, 24
+; MIPS32-NEXT:    or $1, $2, $1
+; MIPS32-NEXT:    addu $2, $8, $3
+; MIPS32-NEXT:    addu $3, $4, $6
+; MIPS32-NEXT:    sll $2, $2, 8
+; MIPS32-NEXT:    srl $8, $7, 16
+; MIPS32-NEXT:    srl $9, $5, 16
+; MIPS32-NEXT:    addu $8, $9, $8
+; MIPS32-NEXT:    andi $8, $8, 255
+; MIPS32-NEXT:    or $8, $8, $2
+; MIPS32-NEXT:    sll $1, $1, 16
+; MIPS32-NEXT:    andi $2, $3, 255
+; MIPS32-NEXT:    srl $3, $6, 8
+; MIPS32-NEXT:    srl $4, $4, 8
+; MIPS32-NEXT:    addu $3, $4, $3
+; MIPS32-NEXT:    sll $3, $3, 8
+; MIPS32-NEXT:    or $2, $2, $3
+; MIPS32-NEXT:    andi $2, $2, 65535
+; MIPS32-NEXT:    addu $3, $5, $7
+; MIPS32-NEXT:    or $2, $2, $1
+; MIPS32-NEXT:    sll $1, $8, 16
+; MIPS32-NEXT:    andi $3, $3, 255
+; MIPS32-NEXT:    srl $4, $7, 8
+; MIPS32-NEXT:    srl $5, $5, 8
+; MIPS32-NEXT:    addu $4, $5, $4
+; MIPS32-NEXT:    sll $4, $4, 8
+; MIPS32-NEXT:    or $3, $3, $4
+; MIPS32-NEXT:    andi $3, $3, 65535
+; MIPS32-NEXT:    or $3, $3, $1
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
+;
+; MIPS64-LABEL: i8_8:
+; MIPS64:       # %bb.0:
+; MIPS64-NEXT:    dsrl $1, $5, 56
+; MIPS64-NEXT:    sll $1, $1, 0
+; MIPS64-NEXT:    dsrl $2, $4, 56
+; MIPS64-NEXT:    sll $2, $2, 0
+; MIPS64-NEXT:    addu $1, $2, $1
+; MIPS64-NEXT:    dsrl $2, $5, 48
+; MIPS64-NEXT:    sll $1, $1, 8
+; MIPS64-NEXT:    sll $2, $2, 0
+; MIPS64-NEXT:    dsrl $3, $4, 48
+; MIPS64-NEXT:    sll $3, $3, 0
+; MIPS64-NEXT:    addu $2, $3, $2
+; MIPS64-NEXT:    andi $2, $2, 255
+; MIPS64-NEXT:    dsrl $3, $5, 40
+; MIPS64-NEXT:    or $1, $2, $1
+; MIPS64-NEXT:    sll $2, $5, 0
+; MIPS64-NEXT:    sll $3, $3, 0
+; MIPS64-NEXT:    dsrl $6, $4, 40
+; MIPS64-NEXT:    sll $6, $6, 0
+; MIPS64-NEXT:    addu $3, $6, $3
+; MIPS64-NEXT:    dsrl $5, $5, 32
+; MIPS64-NEXT:    srl $6, $2, 24
+; MIPS64-NEXT:    sll $7, $4, 0
+; MIPS64-NEXT:    srl $8, $7, 24
+; MIPS64-NEXT:    addu $6, $8, $6
+; MIPS64-NEXT:    sll $1, $1, 16
+; MIPS64-NEXT:    sll $3, $3, 8
+; MIPS64-NEXT:    sll $5, $5, 0
+; MIPS64-NEXT:    dsrl $4, $4, 32
+; MIPS64-NEXT:    sll $4, $4, 0
+; MIPS64-NEXT:    addu $4, $4, $5
+; MIPS64-NEXT:    andi $4, $4, 255
+; MIPS64-NEXT:    or $3, $4, $3
+; MIPS64-NEXT:    andi $3, $3, 65535
+; MIPS64-NEXT:    or $1, $3, $1
+; MIPS64-NEXT:    sll $3, $6, 8
+; MIPS64-NEXT:    srl $4, $2, 16
+; MIPS64-NEXT:    srl $5, $7, 16
+; MIPS64-NEXT:    addu $4, $5, $4
+; MIPS64-NEXT:    andi $4, $4, 255
+; MIPS64-NEXT:    or $3, $4, $3
+; MIPS64-NEXT:    addu $4, $7, $2
+; MIPS64-NEXT:    dsll $1, $1, 32
+; MIPS64-NEXT:    sll $3, $3, 16
+; MIPS64-NEXT:    andi $4, $4, 255
+; MIPS64-NEXT:    srl $2, $2, 8
+; MIPS64-NEXT:    srl $5, $7, 8
+; MIPS64-NEXT:    addu $2, $5, $2
+; MIPS64-NEXT:    sll $2, $2, 8
+; MIPS64-NEXT:    or $2, $4, $2
+; MIPS64-NEXT:    andi $2, $2, 65535
+; MIPS64-NEXT:    or $2, $2, $3
+; MIPS64-NEXT:    dsll $2, $2, 32
+; MIPS64-NEXT:    dsrl $2, $2, 32
+; MIPS64-NEXT:    or $2, $2, $1
+; MIPS64-NEXT:    jr $ra
+; MIPS64-NEXT:    nop
+;
+; MIPS32R5EB-LABEL: i8_8:
+; MIPS32R5EB:       # %bb.0:
+; MIPS32R5EB-NEXT:    addiu $sp, $sp, -48
+; MIPS32R5EB-NEXT:    .cfi_def_cfa_offset 48
+; MIPS32R5EB-NEXT:    sw $fp, 44($sp) # 4-byte Folded Spill
+; MIPS32R5EB-NEXT:    .cfi_offset 30, -4
+; MIPS32R5EB-NEXT:    move $fp, $sp
+; MIPS32R5EB-NEXT:    .cfi_def_cfa_register 30
+; MIPS32R5EB-NEXT:    addiu $1, $zero, -16
+; MIPS32R5EB-NEXT:    and $sp, $sp, $1
+; MIPS32R5EB-NEXT:    sw $6, 24($sp)
+; MIPS32R5EB-NEXT:    ldi.b $w0, 0
+; MIPS32R5EB-NEXT:    lbu $1, 25($sp)
+; MIPS32R5EB-NEXT:    lbu $2, 24($sp)
+; MIPS32R5EB-NEXT:    sw $7, 28($sp)
+; MIPS32R5EB-NEXT:    move.v $w1, $w0
+; MIPS32R5EB-NEXT:    insert.h $w1[0], $2
+; MIPS32R5EB-NEXT:    insert.h $w1[1], $1
+; MIPS32R5EB-NEXT:    lbu $1, 26($sp)
+; MIPS32R5EB-NEXT:    sw $4, 32($sp)
+; MIPS32R5EB-NEXT:    insert.h $w1[2], $1
+; MIPS32R5EB-NEXT:    lbu $1, 27($sp)
+; MIPS32R5EB-NEXT:    insert.h $w1[3], $1
+; MIPS32R5EB-NEXT:    lbu $1, 28($sp)
+; MIPS32R5EB-NEXT:    sw $5, 36($sp)
+; MIPS32R5EB-NEXT:    insert.h $w1[4], $1
+; MIPS32R5EB-NEXT:    lbu $1, 32($sp)
+; MIPS32R5EB-NEXT:    insert.h $w0[0], $1
+; MIPS32R5EB-NEXT:    lbu $1, 33($sp)
+; MIPS32R5EB-NEXT:    insert.h $w0[1], $1
+; MIPS32R5EB-NEXT:    lbu $1, 29($sp)
+; MIPS32R5EB-NEXT:    lbu $2, 34($sp)
+; MIPS32R5EB-NEXT:    insert.h $w0[2], $2
+; MIPS32R5EB-NEXT:    insert.h $w1[5], $1
+; MIPS32R5EB-NEXT:    lbu $1, 35($sp)
+; MIPS32R5EB-NEXT:    lbu $2, 31($sp)
+; MIPS32R5EB-NEXT:    lbu $3, 30($sp)
+; MIPS32R5EB-NEXT:    lbu $4, 39($sp)
+; MIPS32R5EB-NEXT:    insert.h $w1[6], $3
+; MIPS32R5EB-NEXT:    insert.h $w1[7], $2
+; MIPS32R5EB-NEXT:    insert.h $w0[3], $1
+; MIPS32R5EB-NEXT:    lbu $1, 36($sp)
+; MIPS32R5EB-NEXT:    insert.h $w0[4], $1
+; MIPS32R5EB-NEXT:    lbu $1, 37($sp)
+; MIPS32R5EB-NEXT:    insert.h $w0[5], $1
+; MIPS32R5EB-NEXT:    lbu $1, 38($sp)
+; MIPS32R5EB-NEXT:    insert.h $w0[6], $1
+; MIPS32R5EB-NEXT:    insert.h $w0[7], $4
+; MIPS32R5EB-NEXT:    addv.h $w0, $w0, $w1
+; MIPS32R5EB-NEXT:    copy_s.h $1, $w0[0]
+; MIPS32R5EB-NEXT:    copy_s.h $2, $w0[1]
+; MIPS32R5EB-NEXT:    copy_s.h $3, $w0[2]
+; MIPS32R5EB-NEXT:    copy_s.h $4, $w0[3]
+; MIPS32R5EB-NEXT:    copy_s.h $5, $w0[4]
+; MIPS32R5EB-NEXT:    copy_s.h $6, $w0[5]
+; MIPS32R5EB-NEXT:    copy_s.h $7, $w0[6]
+; MIPS32R5EB-NEXT:    copy_s.h $8, $w0[7]
+; MIPS32R5EB-NEXT:    sb $8, 23($sp)
+; MIPS32R5EB-NEXT:    sb $7, 22($sp)
+; MIPS32R5EB-NEXT:    sb $6, 21($sp)
+; MIPS32R5EB-NEXT:    sb $5, 20($sp)
+; MIPS32R5EB-NEXT:    sb $4, 19($sp)
+; MIPS32R5EB-NEXT:    sb $3, 18($sp)
+; MIPS32R5EB-NEXT:    sb $2, 17($sp)
+; MIPS32R5EB-NEXT:    sb $1, 16($sp)
+; MIPS32R5EB-NEXT:    lw $1, 20($sp)
+; MIPS32R5EB-NEXT:    sw $1, 12($sp)
+; MIPS32R5EB-NEXT:    lw $1, 16($sp)
+; MIPS32R5EB-NEXT:    sw $1, 4($sp)
+; MIPS32R5EB-NEXT:    ld.w $w0, 0($sp)
+; MIPS32R5EB-NEXT:    copy_s.w $2, $w0[1]
+; MIPS32R5EB-NEXT:    copy_s.w $3, $w0[3]
+; MIPS32R5EB-NEXT:    move $sp, $fp
+; MIPS32R5EB-NEXT:    lw $fp, 44($sp) # 4-byte Folded Reload
+; MIPS32R5EB-NEXT:    addiu $sp, $sp, 48
+; MIPS32R5EB-NEXT:    jr $ra
+; MIPS32R5EB-NEXT:    nop
+;
+; MIPS64R5-LABEL: i8_8:
+; MIPS64R5:       # %bb.0:
+; MIPS64R5-NEXT:    daddiu $sp, $sp, -32
+; MIPS64R5-NEXT:    .cfi_def_cfa_offset 32
+; MIPS64R5-NEXT:    sd $5, 16($sp)
+; MIPS64R5-NEXT:    ldi.b $w0, 0
+; MIPS64R5-NEXT:    lbu $1, 17($sp)
+; MIPS64R5-NEXT:    lbu $2, 16($sp)
+; MIPS64R5-NEXT:    sd $4, 24($sp)
+; MIPS64R5-NEXT:    move.v $w1, $w0
+; MIPS64R5-NEXT:    insert.h $w1[0], $2
+; MIPS64R5-NEXT:    insert.h $w1[1], $1
+; MIPS64R5-NEXT:    lbu $1, 18($sp)
+; MIPS64R5-NEXT:    insert.h $w1[2], $1
+; MIPS64R5-NEXT:    lbu $1, 19($sp)
+; MIPS64R5-NEXT:    insert.h $w1[3], $1
+; MIPS64R5-NEXT:    lbu $1, 20($sp)
+; MIPS64R5-NEXT:    insert.h $w1[4], $1
+; MIPS64R5-NEXT:    lbu $1, 24($sp)
+; MIPS64R5-NEXT:    insert.h $w0[0], $1
+; MIPS64R5-NEXT:    lbu $1, 25($sp)
+; MIPS64R5-NEXT:    insert.h $w0[1], $1
+; MIPS64R5-NEXT:    lbu $1, 21($sp)
+; MIPS64R5-NEXT:    lbu $2, 26($sp)
+; MIPS64R5-NEXT:    insert.h $w0[2], $2
+; MIPS64R5-NEXT:    insert.h $w1[5], $1
+; MIPS64R5-NEXT:    lbu $1, 27($sp)
+; MIPS64R5-NEXT:    lbu $2, 23($sp)
+; MIPS64R5-NEXT:    lbu $3, 22($sp)
+; MIPS64R5-NEXT:    lbu $4, 31($sp)
+; MIPS64R5-NEXT:    insert.h $w1[6], $3
+; MIPS64R5-NEXT:    insert.h $w1[7], $2
+; MIPS64R5-NEXT:    insert.h $w0[3], $1
+; MIPS64R5-NEXT:    lbu $1, 28($sp)
+; MIPS64R5-NEXT:    insert.h $w0[4], $1
+; MIPS64R5-NEXT:    lbu $1, 29($sp)
+; MIPS64R5-NEXT:    insert.h $w0[5], $1
+; MIPS64R5-NEXT:    lbu $1, 30($sp)
+; MIPS64R5-NEXT:    insert.h $w0[6], $1
+; MIPS64R5-NEXT:    insert.h $w0[7], $4
+; MIPS64R5-NEXT:    addv.h $w0, $w0, $w1
+; MIPS64R5-NEXT:    copy_s.h $1, $w0[0]
+; MIPS64R5-NEXT:    copy_s.h $2, $w0[1]
+; MIPS64R5-NEXT:    copy_s.h $3, $w0[2]
+; MIPS64R5-NEXT:    copy_s.h $4, $w0[3]
+; MIPS64R5-NEXT:    copy_s.h $5, $w0[4]
+; MIPS64R5-NEXT:    copy_s.h $6, $w0[5]
+; MIPS64R5-NEXT:    copy_s.h $7, $w0[6]
+; MIPS64R5-NEXT:    copy_s.h $8, $w0[7]
+; MIPS64R5-NEXT:    sb $8, 15($sp)
+; MIPS64R5-NEXT:    sb $7, 14($sp)
+; MIPS64R5-NEXT:    sb $6, 13($sp)
+; MIPS64R5-NEXT:    sb $5, 12($sp)
+; MIPS64R5-NEXT:    sb $4, 11($sp)
+; MIPS64R5-NEXT:    sb $3, 10($sp)
+; MIPS64R5-NEXT:    sb $2, 9($sp)
+; MIPS64R5-NEXT:    sb $1, 8($sp)
+; MIPS64R5-NEXT:    ld $2, 8($sp)
+; MIPS64R5-NEXT:    daddiu $sp, $sp, 32
+; MIPS64R5-NEXT:    jr $ra
+; MIPS64R5-NEXT:    nop
+;
+; MIPS32R5EL-LABEL: i8_8:
+; MIPS32R5EL:       # %bb.0:
+; MIPS32R5EL-NEXT:    addiu $sp, $sp, -48
+; MIPS32R5EL-NEXT:    .cfi_def_cfa_offset 48
+; MIPS32R5EL-NEXT:    sw $fp, 44($sp) # 4-byte Folded Spill
+; MIPS32R5EL-NEXT:    .cfi_offset 30, -4
+; MIPS32R5EL-NEXT:    move $fp, $sp
+; MIPS32R5EL-NEXT:    .cfi_def_cfa_register 30
+; MIPS32R5EL-NEXT:    addiu $1, $zero, -16
+; MIPS32R5EL-NEXT:    and $sp, $sp, $1
+; MIPS32R5EL-NEXT:    sw $6, 24($sp)
+; MIPS32R5EL-NEXT:    ldi.b $w0, 0
+; MIPS32R5EL-NEXT:    lbu $1, 25($sp)
+; MIPS32R5EL-NEXT:    lbu $2, 24($sp)
+; MIPS32R5EL-NEXT:    sw $7, 28($sp)
+; MIPS32R5EL-NEXT:    move.v $w1, $w0
+; MIPS32R5EL-NEXT:    insert.h $w1[0], $2
+; MIPS32R5EL-NEXT:    insert.h $w1[1], $1
+; MIPS32R5EL-NEXT:    lbu $1, 26($sp)
+; MIPS32R5EL-NEXT:    sw $4, 32($sp)
+; MIPS32R5EL-NEXT:    insert.h $w1[2], $1
+; MIPS32R5EL-NEXT:    lbu $1, 27($sp)
+; MIPS32R5EL-NEXT:    insert.h $w1[3], $1
+; MIPS32R5EL-NEXT:    lbu $1, 28($sp)
+; MIPS32R5EL-NEXT:    sw $5, 36($sp)
+; MIPS32R5EL-NEXT:    insert.h $w1[4], $1
+; MIPS32R5EL-NEXT:    lbu $1, 32($sp)
+; MIPS32R5EL-NEXT:    insert.h $w0[0], $1
+; MIPS32R5EL-NEXT:    lbu $1, 33($sp)
+; MIPS32R5EL-NEXT:    insert.h $w0[1], $1
+; MIPS32R5EL-NEXT:    lbu $1, 29($sp)
+; MIPS32R5EL-NEXT:    lbu $2, 34($sp)
+; MIPS32R5EL-NEXT:    insert.h $w0[2], $2
+; MIPS32R5EL-NEXT:    insert.h $w1[5], $1
+; MIPS32R5EL-NEXT:    lbu $1, 35($sp)
+; MIPS32R5EL-NEXT:    lbu $2, 31($sp)
+; MIPS32R5EL-NEXT:    lbu $3, 30($sp)
+; MIPS32R5EL-NEXT:    lbu $4, 39($sp)
+; MIPS32R5EL-NEXT:    insert.h $w1[6], $3
+; MIPS32R5EL-NEXT:    insert.h $w1[7], $2
+; MIPS32R5EL-NEXT:    insert.h $w0[3], $1
+; MIPS32R5EL-NEXT:    lbu $1, 36($sp)
+; MIPS32R5EL-NEXT:    insert.h $w0[4], $1
+; MIPS32R5EL-NEXT:    lbu $1, 37($sp)
+; MIPS32R5EL-NEXT:    insert.h $w0[5], $1
+; MIPS32R5EL-NEXT:    lbu $1, 38($sp)
+; MIPS32R5EL-NEXT:    insert.h $w0[6], $1
+; MIPS32R5EL-NEXT:    insert.h $w0[7], $4
+; MIPS32R5EL-NEXT:    addv.h $w0, $w0, $w1
+; MIPS32R5EL-NEXT:    copy_s.h $1, $w0[0]
+; MIPS32R5EL-NEXT:    copy_s.h $2, $w0[1]
+; MIPS32R5EL-NEXT:    copy_s.h $3, $w0[2]
+; MIPS32R5EL-NEXT:    copy_s.h $4, $w0[3]
+; MIPS32R5EL-NEXT:    copy_s.h $5, $w0[4]
+; MIPS32R5EL-NEXT:    copy_s.h $6, $w0[5]
+; MIPS32R5EL-NEXT:    copy_s.h $7, $w0[6]
+; MIPS32R5EL-NEXT:    copy_s.h $8, $w0[7]
+; MIPS32R5EL-NEXT:    sb $8, 23($sp)
+; MIPS32R5EL-NEXT:    sb $7, 22($sp)
+; MIPS32R5EL-NEXT:    sb $6, 21($sp)
+; MIPS32R5EL-NEXT:    sb $5, 20($sp)
+; MIPS32R5EL-NEXT:    sb $4, 19($sp)
+; MIPS32R5EL-NEXT:    sb $3, 18($sp)
+; MIPS32R5EL-NEXT:    sb $2, 17($sp)
+; MIPS32R5EL-NEXT:    sb $1, 16($sp)
+; MIPS32R5EL-NEXT:    lw $1, 20($sp)
+; MIPS32R5EL-NEXT:    sw $1, 8($sp)
+; MIPS32R5EL-NEXT:    lw $1, 16($sp)
+; MIPS32R5EL-NEXT:    sw $1, 0($sp)
+; MIPS32R5EL-NEXT:    ld.w $w0, 0($sp)
+; MIPS32R5EL-NEXT:    copy_s.w $2, $w0[0]
+; MIPS32R5EL-NEXT:    copy_s.w $3, $w0[2]
+; MIPS32R5EL-NEXT:    move $sp, $fp
+; MIPS32R5EL-NEXT:    lw $fp, 44($sp) # 4-byte Folded Reload
+; MIPS32R5EL-NEXT:    addiu $sp, $sp, 48
+; MIPS32R5EL-NEXT:    jr $ra
+; MIPS32R5EL-NEXT:    nop
   %1 = add <8 x i8> %a, %b
   ret <8 x i8> %1
 }
 
 define <16 x i8> @i8_16(<16 x i8> %a, <16 x i8> %b) {
-; ALL-LABEL: i8_16:
-; MIPS32-DAG: lw ${{[0-9]+}}, 16($sp)
-; MIPS32-DAG: lw ${{[0-9]+}}, 20($sp)
-; MIPS32-DAG: lw ${{[0-9]+}}, 24($sp)
-; MIPS32-DAG: lw ${{[0-9]+}}, 28($sp)
-; MIPS32-DAG: srl ${{[0-9]+}}, $7, 24
-; MIPS32-DAG: srl ${{[0-9]+}}, $6, 24
-; MIPS32-DAG: srl ${{[0-9]+}}, $7, 16
-; MIPS32-DAG: srl ${{[0-9]+}}, $6, 16
-; MIPS32-DAG: srl ${{[0-9]+}}, $7, 8
-; MIPS32-DAG: srl ${{[0-9]+}}, $6, 8
-; MIPS32-DAG: srl ${{[0-9]+}}, $5, 24
-; MIPS32-DAG: srl ${{[0-9]+}}, $4, 24
-; MIPS32-DAG: srl ${{[0-9]+}}, $5, 16
-; MIPS32-DAG: srl ${{[0-9]+}}, $4, 16
-; MIPS32-DAG: srl ${{[0-9]+}}, $5, 8
-; MIPS32-DAG: srl ${{[0-9]+}}, $4, 8
-
-; MIPS32R5-DAG: lw ${{[0-9]+}}, 16($sp)
-; MIPS32R5-DAG: lw ${{[0-9]+}}, 20($sp)
-; MIPS32R5-DAG: lw ${{[0-9]+}}, 24($sp)
-; MIPS32R5-DAG: lw ${{[0-9]+}}, 28($sp)
-; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $4
-; MIPS32R5-DAG: insert.w $w[[W0]][1], $5
-; MIPS32R5-DAG: insert.w $w[[W0]][2], $6
-; MIPS32R5-DAG: insert.w $w[[W0]][3], $7
-
-; MIPS64-DAG: dsrl ${{[0-9]+}}, $7, 56
-; MIPS64-DAG: dsrl ${{[0-9]+}}, $6, 56
-; MIPS64-DAG: dsrl ${{[0-9]+}}, $7, 48
-; MIPS64-DAG: dsrl ${{[0-9]+}}, $6, 48
-; MIPS64-DAG: dsrl ${{[0-9]+}}, $7, 40
-; MIPS64-DAG: dsrl ${{[0-9]+}}, $6, 40
-; MIPS64-DAG: dsrl ${{[0-9]+}}, $7, 32
-; MIPS64-DAG: dsrl ${{[0-9]+}}, $6, 32
-; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 56
-; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 56
-; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 48
-; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 48
-; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 32
-; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 32
-
-; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][{{[0-9]}}], $4
-; MIPS64R5-DAG: insert.d $w[[W0]][{{[0-9]}}], $5
-; MIPS64R5-DAG: insert.d $w[[W1:[0-9]+]][{{[0-9]}}], $6
-; MIPS64R5-DAG: insert.d $w[[W1]][{{[0-9]}}], $7
-
+; MIPS32-LABEL: i8_16:
+; MIPS32:       # %bb.0:
+; MIPS32-NEXT:    lw $1, 24($sp)
+; MIPS32-NEXT:    srl $2, $1, 24
+; MIPS32-NEXT:    srl $3, $6, 24
+; MIPS32-NEXT:    srl $8, $1, 16
+; MIPS32-NEXT:    srl $9, $6, 16
+; MIPS32-NEXT:    srl $10, $1, 8
+; MIPS32-NEXT:    srl $11, $6, 8
+; MIPS32-NEXT:    lw $12, 20($sp)
+; MIPS32-NEXT:    srl $13, $12, 8
+; MIPS32-NEXT:    srl $14, $5, 8
+; MIPS32-NEXT:    addu $13, $14, $13
+; MIPS32-NEXT:    addu $14, $5, $12
+; MIPS32-NEXT:    addu $10, $11, $10
+; MIPS32-NEXT:    addu $1, $6, $1
+; MIPS32-NEXT:    addu $6, $9, $8
+; MIPS32-NEXT:    addu $2, $3, $2
+; MIPS32-NEXT:    srl $3, $12, 24
+; MIPS32-NEXT:    srl $8, $5, 24
+; MIPS32-NEXT:    srl $9, $12, 16
+; MIPS32-NEXT:    srl $5, $5, 16
+; MIPS32-NEXT:    addu $5, $5, $9
+; MIPS32-NEXT:    addu $3, $8, $3
+; MIPS32-NEXT:    sll $2, $2, 8
+; MIPS32-NEXT:    andi $6, $6, 255
+; MIPS32-NEXT:    andi $1, $1, 255
+; MIPS32-NEXT:    sll $8, $10, 8
+; MIPS32-NEXT:    andi $9, $14, 255
+; MIPS32-NEXT:    sll $10, $13, 8
+; MIPS32-NEXT:    lw $11, 28($sp)
+; MIPS32-NEXT:    lw $12, 16($sp)
+; MIPS32-NEXT:    srl $13, $12, 24
+; MIPS32-NEXT:    srl $14, $4, 24
+; MIPS32-NEXT:    srl $15, $11, 24
+; MIPS32-NEXT:    srl $24, $7, 24
+; MIPS32-NEXT:    or $9, $9, $10
+; MIPS32-NEXT:    or $1, $1, $8
+; MIPS32-NEXT:    or $2, $6, $2
+; MIPS32-NEXT:    addu $6, $24, $15
+; MIPS32-NEXT:    sll $3, $3, 8
+; MIPS32-NEXT:    andi $5, $5, 255
+; MIPS32-NEXT:    addu $8, $14, $13
+; MIPS32-NEXT:    sll $8, $8, 8
+; MIPS32-NEXT:    srl $10, $12, 16
+; MIPS32-NEXT:    srl $13, $4, 16
+; MIPS32-NEXT:    addu $10, $13, $10
+; MIPS32-NEXT:    andi $10, $10, 255
+; MIPS32-NEXT:    or $8, $10, $8
+; MIPS32-NEXT:    or $3, $5, $3
+; MIPS32-NEXT:    addu $5, $4, $12
+; MIPS32-NEXT:    sll $6, $6, 8
+; MIPS32-NEXT:    srl $10, $11, 16
+; MIPS32-NEXT:    srl $13, $7, 16
+; MIPS32-NEXT:    addu $10, $13, $10
+; MIPS32-NEXT:    andi $10, $10, 255
+; MIPS32-NEXT:    or $6, $10, $6
+; MIPS32-NEXT:    sll $10, $2, 16
+; MIPS32-NEXT:    andi $1, $1, 65535
+; MIPS32-NEXT:    sll $3, $3, 16
+; MIPS32-NEXT:    andi $9, $9, 65535
+; MIPS32-NEXT:    sll $2, $8, 16
+; MIPS32-NEXT:    andi $5, $5, 255
+; MIPS32-NEXT:    srl $8, $12, 8
+; MIPS32-NEXT:    srl $4, $4, 8
+; MIPS32-NEXT:    addu $4, $4, $8
+; MIPS32-NEXT:    sll $4, $4, 8
+; MIPS32-NEXT:    or $4, $5, $4
+; MIPS32-NEXT:    andi $4, $4, 65535
+; MIPS32-NEXT:    addu $5, $7, $11
+; MIPS32-NEXT:    or $2, $4, $2
+; MIPS32-NEXT:    or $3, $9, $3
+; MIPS32-NEXT:    or $4, $1, $10
+; MIPS32-NEXT:    sll $1, $6, 16
+; MIPS32-NEXT:    andi $5, $5, 255
+; MIPS32-NEXT:    srl $6, $11, 8
+; MIPS32-NEXT:    srl $7, $7, 8
+; MIPS32-NEXT:    addu $6, $7, $6
+; MIPS32-NEXT:    sll $6, $6, 8
+; MIPS32-NEXT:    or $5, $5, $6
+; MIPS32-NEXT:    andi $5, $5, 65535
+; MIPS32-NEXT:    or $5, $5, $1
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
+;
+; MIPS64-LABEL: i8_16:
+; MIPS64:       # %bb.0:
+; MIPS64-NEXT:    dsrl $1, $7, 56
+; MIPS64-NEXT:    dsrl $2, $5, 56
+; MIPS64-NEXT:    dsrl $3, $7, 48
+; MIPS64-NEXT:    dsrl $8, $5, 48
+; MIPS64-NEXT:    dsrl $9, $6, 56
+; MIPS64-NEXT:    dsrl $10, $4, 56
+; MIPS64-NEXT:    dsrl $11, $7, 32
+; MIPS64-NEXT:    sll $1, $1, 0
+; MIPS64-NEXT:    sll $2, $2, 0
+; MIPS64-NEXT:    sll $3, $3, 0
+; MIPS64-NEXT:    sll $8, $8, 0
+; MIPS64-NEXT:    dsrl $12, $7, 40
+; MIPS64-NEXT:    sll $12, $12, 0
+; MIPS64-NEXT:    dsrl $13, $5, 40
+; MIPS64-NEXT:    sll $13, $13, 0
+; MIPS64-NEXT:    addu $12, $13, $12
+; MIPS64-NEXT:    addu $3, $8, $3
+; MIPS64-NEXT:    addu $1, $2, $1
+; MIPS64-NEXT:    sll $2, $9, 0
+; MIPS64-NEXT:    sll $8, $10, 0
+; MIPS64-NEXT:    dsrl $9, $6, 48
+; MIPS64-NEXT:    sll $9, $9, 0
+; MIPS64-NEXT:    dsrl $10, $4, 48
+; MIPS64-NEXT:    sll $10, $10, 0
+; MIPS64-NEXT:    addu $9, $10, $9
+; MIPS64-NEXT:    addu $2, $8, $2
+; MIPS64-NEXT:    sll $1, $1, 8
+; MIPS64-NEXT:    andi $3, $3, 255
+; MIPS64-NEXT:    sll $8, $12, 8
+; MIPS64-NEXT:    sll $10, $11, 0
+; MIPS64-NEXT:    dsrl $11, $5, 32
+; MIPS64-NEXT:    sll $11, $11, 0
+; MIPS64-NEXT:    addu $10, $11, $10
+; MIPS64-NEXT:    andi $10, $10, 255
+; MIPS64-NEXT:    or $8, $10, $8
+; MIPS64-NEXT:    sll $10, $6, 0
+; MIPS64-NEXT:    or $1, $3, $1
+; MIPS64-NEXT:    sll $2, $2, 8
+; MIPS64-NEXT:    andi $3, $9, 255
+; MIPS64-NEXT:    dsrl $9, $6, 40
+; MIPS64-NEXT:    srl $11, $10, 24
+; MIPS64-NEXT:    sll $12, $4, 0
+; MIPS64-NEXT:    srl $13, $12, 24
+; MIPS64-NEXT:    srl $14, $10, 16
+; MIPS64-NEXT:    srl $15, $12, 16
+; MIPS64-NEXT:    andi $8, $8, 65535
+; MIPS64-NEXT:    addu $14, $15, $14
+; MIPS64-NEXT:    addu $11, $13, $11
+; MIPS64-NEXT:    sll $7, $7, 0
+; MIPS64-NEXT:    or $2, $3, $2
+; MIPS64-NEXT:    sll $1, $1, 16
+; MIPS64-NEXT:    sll $3, $9, 0
+; MIPS64-NEXT:    dsrl $9, $4, 40
+; MIPS64-NEXT:    sll $9, $9, 0
+; MIPS64-NEXT:    addu $3, $9, $3
+; MIPS64-NEXT:    dsrl $6, $6, 32
+; MIPS64-NEXT:    srl $9, $7, 24
+; MIPS64-NEXT:    sll $5, $5, 0
+; MIPS64-NEXT:    srl $13, $5, 24
+; MIPS64-NEXT:    or $1, $8, $1
+; MIPS64-NEXT:    addu $8, $13, $9
+; MIPS64-NEXT:    sll $9, $11, 8
+; MIPS64-NEXT:    andi $11, $14, 255
+; MIPS64-NEXT:    sll $2, $2, 16
+; MIPS64-NEXT:    sll $3, $3, 8
+; MIPS64-NEXT:    sll $6, $6, 0
+; MIPS64-NEXT:    dsrl $4, $4, 32
+; MIPS64-NEXT:    sll $4, $4, 0
+; MIPS64-NEXT:    addu $4, $4, $6
+; MIPS64-NEXT:    andi $4, $4, 255
+; MIPS64-NEXT:    or $3, $4, $3
+; MIPS64-NEXT:    andi $3, $3, 65535
+; MIPS64-NEXT:    or $2, $3, $2
+; MIPS64-NEXT:    or $3, $11, $9
+; MIPS64-NEXT:    addu $4, $12, $10
+; MIPS64-NEXT:    sll $6, $8, 8
+; MIPS64-NEXT:    srl $8, $7, 16
+; MIPS64-NEXT:    srl $9, $5, 16
+; MIPS64-NEXT:    addu $8, $9, $8
+; MIPS64-NEXT:    andi $8, $8, 255
+; MIPS64-NEXT:    or $6, $8, $6
+; MIPS64-NEXT:    addu $8, $5, $7
+; MIPS64-NEXT:    dsll $2, $2, 32
+; MIPS64-NEXT:    sll $3, $3, 16
+; MIPS64-NEXT:    andi $4, $4, 255
+; MIPS64-NEXT:    srl $9, $10, 8
+; MIPS64-NEXT:    srl $10, $12, 8
+; MIPS64-NEXT:    addu $9, $10, $9
+; MIPS64-NEXT:    sll $9, $9, 8
+; MIPS64-NEXT:    or $4, $4, $9
+; MIPS64-NEXT:    andi $4, $4, 65535
+; MIPS64-NEXT:    or $3, $4, $3
+; MIPS64-NEXT:    dsll $3, $3, 32
+; MIPS64-NEXT:    dsrl $3, $3, 32
+; MIPS64-NEXT:    or $2, $3, $2
+; MIPS64-NEXT:    dsll $1, $1, 32
+; MIPS64-NEXT:    sll $3, $6, 16
+; MIPS64-NEXT:    andi $4, $8, 255
+; MIPS64-NEXT:    srl $6, $7, 8
+; MIPS64-NEXT:    srl $5, $5, 8
+; MIPS64-NEXT:    addu $5, $5, $6
+; MIPS64-NEXT:    sll $5, $5, 8
+; MIPS64-NEXT:    or $4, $4, $5
+; MIPS64-NEXT:    andi $4, $4, 65535
+; MIPS64-NEXT:    or $3, $4, $3
+; MIPS64-NEXT:    dsll $3, $3, 32
+; MIPS64-NEXT:    dsrl $3, $3, 32
+; MIPS64-NEXT:    or $3, $3, $1
+; MIPS64-NEXT:    jr $ra
+; MIPS64-NEXT:    nop
+;
+; MIPS32R5EB-LABEL: i8_16:
+; MIPS32R5EB:       # %bb.0:
+; MIPS32R5EB-NEXT:    ldi.b $w0, 0
+; MIPS32R5EB-NEXT:    lw $1, 20($sp)
+; MIPS32R5EB-NEXT:    lw $2, 16($sp)
+; MIPS32R5EB-NEXT:    move.v $w1, $w0
+; MIPS32R5EB-NEXT:    insert.w $w1[0], $2
+; MIPS32R5EB-NEXT:    insert.w $w1[1], $1
+; MIPS32R5EB-NEXT:    lw $1, 24($sp)
+; MIPS32R5EB-NEXT:    insert.w $w0[0], $4
+; MIPS32R5EB-NEXT:    insert.w $w1[2], $1
+; MIPS32R5EB-NEXT:    lw $1, 28($sp)
+; MIPS32R5EB-NEXT:    insert.w $w1[3], $1
+; MIPS32R5EB-NEXT:    shf.b $w1, $w1, 27
+; MIPS32R5EB-NEXT:    insert.w $w0[1], $5
+; MIPS32R5EB-NEXT:    insert.w $w0[2], $6
+; MIPS32R5EB-NEXT:    insert.w $w0[3], $7
+; MIPS32R5EB-NEXT:    shf.b $w0, $w0, 27
+; MIPS32R5EB-NEXT:    addv.b $w0, $w0, $w1
+; MIPS32R5EB-NEXT:    shf.b $w0, $w0, 27
+; MIPS32R5EB-NEXT:    copy_s.w $2, $w0[0]
+; MIPS32R5EB-NEXT:    copy_s.w $3, $w0[1]
+; MIPS32R5EB-NEXT:    copy_s.w $4, $w0[2]
+; MIPS32R5EB-NEXT:    copy_s.w $5, $w0[3]
+; MIPS32R5EB-NEXT:    jr $ra
+; MIPS32R5EB-NEXT:    nop
+;
+; MIPS64R5EB-LABEL: i8_16:
+; MIPS64R5EB:       # %bb.0:
+; MIPS64R5EB-NEXT:    ldi.b $w0, 0
+; MIPS64R5EB-NEXT:    move.v $w1, $w0
+; MIPS64R5EB-NEXT:    insert.d $w1[0], $6
+; MIPS64R5EB-NEXT:    insert.d $w1[1], $7
+; MIPS64R5EB-NEXT:    shf.b $w1, $w1, 27
+; MIPS64R5EB-NEXT:    shf.w $w1, $w1, 177
+; MIPS64R5EB-NEXT:    insert.d $w0[0], $4
+; MIPS64R5EB-NEXT:    insert.d $w0[1], $5
+; MIPS64R5EB-NEXT:    shf.b $w0, $w0, 27
+; MIPS64R5EB-NEXT:    shf.w $w0, $w0, 177
+; MIPS64R5EB-NEXT:    addv.b $w0, $w0, $w1
+; MIPS64R5EB-NEXT:    shf.b $w0, $w0, 27
+; MIPS64R5EB-NEXT:    shf.w $w0, $w0, 177
+; MIPS64R5EB-NEXT:    copy_s.d $2, $w0[0]
+; MIPS64R5EB-NEXT:    copy_s.d $3, $w0[1]
+; MIPS64R5EB-NEXT:    jr $ra
+; MIPS64R5EB-NEXT:    nop
+;
+; MIPS32R5EL-LABEL: i8_16:
+; MIPS32R5EL:       # %bb.0:
+; MIPS32R5EL-NEXT:    ldi.b $w0, 0
+; MIPS32R5EL-NEXT:    lw $1, 20($sp)
+; MIPS32R5EL-NEXT:    lw $2, 16($sp)
+; MIPS32R5EL-NEXT:    move.v $w1, $w0
+; MIPS32R5EL-NEXT:    insert.w $w1[0], $2
+; MIPS32R5EL-NEXT:    insert.w $w1[1], $1
+; MIPS32R5EL-NEXT:    lw $1, 24($sp)
+; MIPS32R5EL-NEXT:    insert.w $w1[2], $1
+; MIPS32R5EL-NEXT:    lw $1, 28($sp)
+; MIPS32R5EL-NEXT:    insert.w $w1[3], $1
+; MIPS32R5EL-NEXT:    insert.w $w0[0], $4
+; MIPS32R5EL-NEXT:    insert.w $w0[1], $5
+; MIPS32R5EL-NEXT:    insert.w $w0[2], $6
+; MIPS32R5EL-NEXT:    insert.w $w0[3], $7
+; MIPS32R5EL-NEXT:    addv.b $w0, $w0, $w1
+; MIPS32R5EL-NEXT:    copy_s.w $2, $w0[0]
+; MIPS32R5EL-NEXT:    copy_s.w $3, $w0[1]
+; MIPS32R5EL-NEXT:    copy_s.w $4, $w0[2]
+; MIPS32R5EL-NEXT:    copy_s.w $5, $w0[3]
+; MIPS32R5EL-NEXT:    jr $ra
+; MIPS32R5EL-NEXT:    nop
+;
+; MIPS64R5EL-LABEL: i8_16:
+; MIPS64R5EL:       # %bb.0:
+; MIPS64R5EL-NEXT:    ldi.b $w0, 0
+; MIPS64R5EL-NEXT:    move.v $w1, $w0
+; MIPS64R5EL-NEXT:    insert.d $w1[0], $6
+; MIPS64R5EL-NEXT:    insert.d $w1[1], $7
+; MIPS64R5EL-NEXT:    insert.d $w0[0], $4
+; MIPS64R5EL-NEXT:    insert.d $w0[1], $5
+; MIPS64R5EL-NEXT:    addv.b $w0, $w0, $w1
+; MIPS64R5EL-NEXT:    copy_s.d $2, $w0[0]
+; MIPS64R5EL-NEXT:    copy_s.d $3, $w0[1]
+; MIPS64R5EL-NEXT:    jr $ra
+; MIPS64R5EL-NEXT:    nop
   %1 = add <16 x i8> %a, %b
-
   ret <16 x i8> %1
 }
 
 define <2 x i16> @i16_2(<2 x i16> %a, <2 x i16> %b) {
-; ALL-LABEL: i16_2:
-; MIPS32: addu    $[[R0:[0-9]+]], $4, $5
-; MIPS32: andi    $[[R1:[0-9]+]], $[[R0]], 65535
-; MIPS32: srl     $[[R2:[0-9]+]], $5, 16
-; MIPS32: srl     $[[R3:[0-9]+]], $4, 16
-; MIPS32: addu    $[[R4:[0-9]+]], $[[R3]], $[[R2]]
-; MIPS32: sll     $2, $[[R4]], 16
-
-; MIPS32R5-DAG: sw $4
-; MIPS32R5-DAG: sw $5
-
-; MIPS64-DAG: sll ${{[0-9]+}}, $5, 0
-; MIPS64-DAG: sll ${{[0-9]+}}, $4, 0
-
-; MIPS64R5-DAG: sll ${{[0-9]+}}, $4, 0
-; MIPS64R5-DAG: sll ${{[0-9]+}}, $5, 0
-
+; MIPS32-LABEL: i16_2:
+; MIPS32:       # %bb.0:
+; MIPS32-NEXT:    addu $1, $4, $5
+; MIPS32-NEXT:    andi $1, $1, 65535
+; MIPS32-NEXT:    srl $2, $5, 16
+; MIPS32-NEXT:    srl $3, $4, 16
+; MIPS32-NEXT:    addu $2, $3, $2
+; MIPS32-NEXT:    sll $2, $2, 16
+; MIPS32-NEXT:    or $2, $1, $2
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
+;
+; MIPS64-LABEL: i16_2:
+; MIPS64:       # %bb.0:
+; MIPS64-NEXT:    sll $1, $5, 0
+; MIPS64-NEXT:    sll $2, $4, 0
+; MIPS64-NEXT:    addu $3, $2, $1
+; MIPS64-NEXT:    andi $3, $3, 65535
+; MIPS64-NEXT:    srl $1, $1, 16
+; MIPS64-NEXT:    srl $2, $2, 16
+; MIPS64-NEXT:    addu $1, $2, $1
+; MIPS64-NEXT:    sll $1, $1, 16
+; MIPS64-NEXT:    or $2, $3, $1
+; MIPS64-NEXT:    jr $ra
+; MIPS64-NEXT:    nop
+;
+; MIPS32R5EB-LABEL: i16_2:
+; MIPS32R5EB:       # %bb.0:
+; MIPS32R5EB-NEXT:    addiu $sp, $sp, -48
+; MIPS32R5EB-NEXT:    .cfi_def_cfa_offset 48
+; MIPS32R5EB-NEXT:    sw $fp, 44($sp) # 4-byte Folded Spill
+; MIPS32R5EB-NEXT:    .cfi_offset 30, -4
+; MIPS32R5EB-NEXT:    move $fp, $sp
+; MIPS32R5EB-NEXT:    .cfi_def_cfa_register 30
+; MIPS32R5EB-NEXT:    addiu $1, $zero, -16
+; MIPS32R5EB-NEXT:    and $sp, $sp, $1
+; MIPS32R5EB-NEXT:    sw $5, 36($sp)
+; MIPS32R5EB-NEXT:    sw $4, 40($sp)
+; MIPS32R5EB-NEXT:    lhu $1, 38($sp)
+; MIPS32R5EB-NEXT:    sw $1, 28($sp)
+; MIPS32R5EB-NEXT:    lhu $1, 36($sp)
+; MIPS32R5EB-NEXT:    sw $1, 20($sp)
+; MIPS32R5EB-NEXT:    lhu $1, 42($sp)
+; MIPS32R5EB-NEXT:    sw $1, 12($sp)
+; MIPS32R5EB-NEXT:    lhu $1, 40($sp)
+; MIPS32R5EB-NEXT:    sw $1, 4($sp)
+; MIPS32R5EB-NEXT:    ld.d $w0, 16($sp)
+; MIPS32R5EB-NEXT:    ld.d $w1, 0($sp)
+; MIPS32R5EB-NEXT:    addv.d $w0, $w1, $w0
+; MIPS32R5EB-NEXT:    shf.w $w0, $w0, 177
+; MIPS32R5EB-NEXT:    copy_s.w $1, $w0[1]
+; MIPS32R5EB-NEXT:    copy_s.w $2, $w0[3]
+; MIPS32R5EB-NEXT:    sh $2, 34($sp)
+; MIPS32R5EB-NEXT:    sh $1, 32($sp)
+; MIPS32R5EB-NEXT:    lw $2, 32($sp)
+; MIPS32R5EB-NEXT:    move $sp, $fp
+; MIPS32R5EB-NEXT:    lw $fp, 44($sp) # 4-byte Folded Reload
+; MIPS32R5EB-NEXT:    addiu $sp, $sp, 48
+; MIPS32R5EB-NEXT:    jr $ra
+; MIPS32R5EB-NEXT:    nop
+;
+; MIPS64R5-LABEL: i16_2:
+; MIPS64R5:       # %bb.0:
+; MIPS64R5-NEXT:    daddiu $sp, $sp, -16
+; MIPS64R5-NEXT:    .cfi_def_cfa_offset 16
+; MIPS64R5-NEXT:    sll $1, $5, 0
+; MIPS64R5-NEXT:    sw $1, 8($sp)
+; MIPS64R5-NEXT:    sll $1, $4, 0
+; MIPS64R5-NEXT:    sw $1, 12($sp)
+; MIPS64R5-NEXT:    ldi.b $w0, 0
+; MIPS64R5-NEXT:    lh $1, 10($sp)
+; MIPS64R5-NEXT:    lh $2, 8($sp)
+; MIPS64R5-NEXT:    move.v $w1, $w0
+; MIPS64R5-NEXT:    insert.d $w1[0], $2
+; MIPS64R5-NEXT:    insert.d $w1[1], $1
+; MIPS64R5-NEXT:    lh $1, 12($sp)
+; MIPS64R5-NEXT:    insert.d $w0[0], $1
+; MIPS64R5-NEXT:    lh $1, 14($sp)
+; MIPS64R5-NEXT:    insert.d $w0[1], $1
+; MIPS64R5-NEXT:    addv.d $w0, $w0, $w1
+; MIPS64R5-NEXT:    copy_s.d $1, $w0[0]
+; MIPS64R5-NEXT:    copy_s.d $2, $w0[1]
+; MIPS64R5-NEXT:    sh $2, 6($sp)
+; MIPS64R5-NEXT:    sh $1, 4($sp)
+; MIPS64R5-NEXT:    lw $2, 4($sp)
+; MIPS64R5-NEXT:    daddiu $sp, $sp, 16
+; MIPS64R5-NEXT:    jr $ra
+; MIPS64R5-NEXT:    nop
+;
+; MIPS32R5EL-LABEL: i16_2:
+; MIPS32R5EL:       # %bb.0:
+; MIPS32R5EL-NEXT:    addiu $sp, $sp, -48
+; MIPS32R5EL-NEXT:    .cfi_def_cfa_offset 48
+; MIPS32R5EL-NEXT:    sw $fp, 44($sp) # 4-byte Folded Spill
+; MIPS32R5EL-NEXT:    .cfi_offset 30, -4
+; MIPS32R5EL-NEXT:    move $fp, $sp
+; MIPS32R5EL-NEXT:    .cfi_def_cfa_register 30
+; MIPS32R5EL-NEXT:    addiu $1, $zero, -16
+; MIPS32R5EL-NEXT:    and $sp, $sp, $1
+; MIPS32R5EL-NEXT:    sw $5, 36($sp)
+; MIPS32R5EL-NEXT:    sw $4, 40($sp)
+; MIPS32R5EL-NEXT:    lhu $1, 38($sp)
+; MIPS32R5EL-NEXT:    sw $1, 24($sp)
+; MIPS32R5EL-NEXT:    lhu $1, 36($sp)
+; MIPS32R5EL-NEXT:    sw $1, 16($sp)
+; MIPS32R5EL-NEXT:    lhu $1, 42($sp)
+; MIPS32R5EL-NEXT:    sw $1, 8($sp)
+; MIPS32R5EL-NEXT:    lhu $1, 40($sp)
+; MIPS32R5EL-NEXT:    sw $1, 0($sp)
+; MIPS32R5EL-NEXT:    ld.d $w0, 16($sp)
+; MIPS32R5EL-NEXT:    ld.d $w1, 0($sp)
+; MIPS32R5EL-NEXT:    addv.d $w0, $w1, $w0
+; MIPS32R5EL-NEXT:    copy_s.w $1, $w0[0]
+; MIPS32R5EL-NEXT:    copy_s.w $2, $w0[2]
+; MIPS32R5EL-NEXT:    sh $2, 34($sp)
+; MIPS32R5EL-NEXT:    sh $1, 32($sp)
+; MIPS32R5EL-NEXT:    lw $2, 32($sp)
+; MIPS32R5EL-NEXT:    move $sp, $fp
+; MIPS32R5EL-NEXT:    lw $fp, 44($sp) # 4-byte Folded Reload
+; MIPS32R5EL-NEXT:    addiu $sp, $sp, 48
+; MIPS32R5EL-NEXT:    jr $ra
+; MIPS32R5EL-NEXT:    nop
   %1 = add <2 x i16> %a, %b
   ret <2 x i16> %1
 }
 
 define <4 x i16> @i16_4(<4 x i16> %a, <4 x i16> %b) {
-; ALL-LABEL: i16_4:
-; MIPS32-DAG: srl ${{[0-9]+}}, $7, 16
-; MIPS32-DAG: srl ${{[0-9]+}}, $6, 16
-; MIPS32-DAG: srl ${{[0-9]+}}, $5, 16
-; MIPS32-DAG: srl ${{[0-9]+}}, $4, 16
-
-; MIPS32R5-DAG: sw $4
-; MIPS32R5-DAG: sw $5
-; MIPS32R5-DAG: sw $6
-; MIPS32R5-DAG: sw $7
-
-; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 48
-; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 48
-; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 32
-; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 32
-
-; MIPS64R5-DAG: sd $4
-; MIPS64R5-DAG: sd $5
-
+; MIPS32-LABEL: i16_4:
+; MIPS32:       # %bb.0:
+; MIPS32-NEXT:    addu $1, $4, $6
+; MIPS32-NEXT:    andi $1, $1, 65535
+; MIPS32-NEXT:    srl $2, $6, 16
+; MIPS32-NEXT:    srl $3, $4, 16
+; MIPS32-NEXT:    addu $2, $3, $2
+; MIPS32-NEXT:    sll $2, $2, 16
+; MIPS32-NEXT:    or $2, $1, $2
+; MIPS32-NEXT:    addu $1, $5, $7
+; MIPS32-NEXT:    andi $1, $1, 65535
+; MIPS32-NEXT:    srl $3, $7, 16
+; MIPS32-NEXT:    srl $4, $5, 16
+; MIPS32-NEXT:    addu $3, $4, $3
+; MIPS32-NEXT:    sll $3, $3, 16
+; MIPS32-NEXT:    or $3, $1, $3
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
+;
+; MIPS64-LABEL: i16_4:
+; MIPS64:       # %bb.0:
+; MIPS64-NEXT:    dsrl $1, $5, 48
+; MIPS64-NEXT:    sll $1, $1, 0
+; MIPS64-NEXT:    dsrl $2, $4, 48
+; MIPS64-NEXT:    sll $2, $2, 0
+; MIPS64-NEXT:    addu $1, $2, $1
+; MIPS64-NEXT:    dsrl $2, $5, 32
+; MIPS64-NEXT:    sll $1, $1, 16
+; MIPS64-NEXT:    sll $2, $2, 0
+; MIPS64-NEXT:    dsrl $3, $4, 32
+; MIPS64-NEXT:    sll $3, $3, 0
+; MIPS64-NEXT:    addu $2, $3, $2
+; MIPS64-NEXT:    andi $2, $2, 65535
+; MIPS64-NEXT:    or $1, $2, $1
+; MIPS64-NEXT:    sll $2, $5, 0
+; MIPS64-NEXT:    sll $3, $4, 0
+; MIPS64-NEXT:    addu $4, $3, $2
+; MIPS64-NEXT:    dsll $1, $1, 32
+; MIPS64-NEXT:    andi $4, $4, 65535
+; MIPS64-NEXT:    srl $2, $2, 16
+; MIPS64-NEXT:    srl $3, $3, 16
+; MIPS64-NEXT:    addu $2, $3, $2
+; MIPS64-NEXT:    sll $2, $2, 16
+; MIPS64-NEXT:    or $2, $4, $2
+; MIPS64-NEXT:    dsll $2, $2, 32
+; MIPS64-NEXT:    dsrl $2, $2, 32
+; MIPS64-NEXT:    or $2, $2, $1
+; MIPS64-NEXT:    jr $ra
+; MIPS64-NEXT:    nop
+;
+; MIPS32R5EB-LABEL: i16_4:
+; MIPS32R5EB:       # %bb.0:
+; MIPS32R5EB-NEXT:    addiu $sp, $sp, -48
+; MIPS32R5EB-NEXT:    .cfi_def_cfa_offset 48
+; MIPS32R5EB-NEXT:    sw $fp, 44($sp) # 4-byte Folded Spill
+; MIPS32R5EB-NEXT:    .cfi_offset 30, -4
+; MIPS32R5EB-NEXT:    move $fp, $sp
+; MIPS32R5EB-NEXT:    .cfi_def_cfa_register 30
+; MIPS32R5EB-NEXT:    addiu $1, $zero, -16
+; MIPS32R5EB-NEXT:    and $sp, $sp, $1
+; MIPS32R5EB-NEXT:    sw $6, 24($sp)
+; MIPS32R5EB-NEXT:    sw $7, 28($sp)
+; MIPS32R5EB-NEXT:    sw $4, 32($sp)
+; MIPS32R5EB-NEXT:    ldi.b $w0, 0
+; MIPS32R5EB-NEXT:    lhu $1, 26($sp)
+; MIPS32R5EB-NEXT:    lhu $2, 24($sp)
+; MIPS32R5EB-NEXT:    move.v $w1, $w0
+; MIPS32R5EB-NEXT:    insert.w $w1[0], $2
+; MIPS32R5EB-NEXT:    insert.w $w1[1], $1
+; MIPS32R5EB-NEXT:    lhu $1, 28($sp)
+; MIPS32R5EB-NEXT:    sw $5, 36($sp)
+; MIPS32R5EB-NEXT:    insert.w $w1[2], $1
+; MIPS32R5EB-NEXT:    lhu $1, 32($sp)
+; MIPS32R5EB-NEXT:    lhu $2, 30($sp)
+; MIPS32R5EB-NEXT:    insert.w $w1[3], $2
+; MIPS32R5EB-NEXT:    insert.w $w0[0], $1
+; MIPS32R5EB-NEXT:    lhu $1, 34($sp)
+; MIPS32R5EB-NEXT:    insert.w $w0[1], $1
+; MIPS32R5EB-NEXT:    lhu $1, 36($sp)
+; MIPS32R5EB-NEXT:    insert.w $w0[2], $1
+; MIPS32R5EB-NEXT:    lhu $1, 38($sp)
+; MIPS32R5EB-NEXT:    insert.w $w0[3], $1
+; MIPS32R5EB-NEXT:    addv.w $w0, $w0, $w1
+; MIPS32R5EB-NEXT:    copy_s.w $1, $w0[0]
+; MIPS32R5EB-NEXT:    copy_s.w $2, $w0[1]
+; MIPS32R5EB-NEXT:    copy_s.w $3, $w0[2]
+; MIPS32R5EB-NEXT:    copy_s.w $4, $w0[3]
+; MIPS32R5EB-NEXT:    sh $4, 22($sp)
+; MIPS32R5EB-NEXT:    sh $3, 20($sp)
+; MIPS32R5EB-NEXT:    sh $2, 18($sp)
+; MIPS32R5EB-NEXT:    sh $1, 16($sp)
+; MIPS32R5EB-NEXT:    lw $1, 20($sp)
+; MIPS32R5EB-NEXT:    sw $1, 12($sp)
+; MIPS32R5EB-NEXT:    lw $1, 16($sp)
+; MIPS32R5EB-NEXT:    sw $1, 4($sp)
+; MIPS32R5EB-NEXT:    ld.w $w0, 0($sp)
+; MIPS32R5EB-NEXT:    copy_s.w $2, $w0[1]
+; MIPS32R5EB-NEXT:    copy_s.w $3, $w0[3]
+; MIPS32R5EB-NEXT:    move $sp, $fp
+; MIPS32R5EB-NEXT:    lw $fp, 44($sp) # 4-byte Folded Reload
+; MIPS32R5EB-NEXT:    addiu $sp, $sp, 48
+; MIPS32R5EB-NEXT:    jr $ra
+; MIPS32R5EB-NEXT:    nop
+;
+; MIPS64R5-LABEL: i16_4:
+; MIPS64R5:       # %bb.0:
+; MIPS64R5-NEXT:    daddiu $sp, $sp, -32
+; MIPS64R5-NEXT:    .cfi_def_cfa_offset 32
+; MIPS64R5-NEXT:    sd $5, 16($sp)
+; MIPS64R5-NEXT:    sd $4, 24($sp)
+; MIPS64R5-NEXT:    ldi.b $w0, 0
+; MIPS64R5-NEXT:    lhu $1, 18($sp)
+; MIPS64R5-NEXT:    lhu $2, 16($sp)
+; MIPS64R5-NEXT:    move.v $w1, $w0
+; MIPS64R5-NEXT:    insert.w $w1[0], $2
+; MIPS64R5-NEXT:    insert.w $w1[1], $1
+; MIPS64R5-NEXT:    lhu $1, 20($sp)
+; MIPS64R5-NEXT:    insert.w $w1[2], $1
+; MIPS64R5-NEXT:    lhu $1, 24($sp)
+; MIPS64R5-NEXT:    lhu $2, 22($sp)
+; MIPS64R5-NEXT:    insert.w $w1[3], $2
+; MIPS64R5-NEXT:    insert.w $w0[0], $1
+; MIPS64R5-NEXT:    lhu $1, 26($sp)
+; MIPS64R5-NEXT:    insert.w $w0[1], $1
+; MIPS64R5-NEXT:    lhu $1, 28($sp)
+; MIPS64R5-NEXT:    insert.w $w0[2], $1
+; MIPS64R5-NEXT:    lhu $1, 30($sp)
+; MIPS64R5-NEXT:    insert.w $w0[3], $1
+; MIPS64R5-NEXT:    addv.w $w0, $w0, $w1
+; MIPS64R5-NEXT:    copy_s.w $1, $w0[0]
+; MIPS64R5-NEXT:    copy_s.w $2, $w0[1]
+; MIPS64R5-NEXT:    copy_s.w $3, $w0[2]
+; MIPS64R5-NEXT:    copy_s.w $4, $w0[3]
+; MIPS64R5-NEXT:    sh $4, 14($sp)
+; MIPS64R5-NEXT:    sh $3, 12($sp)
+; MIPS64R5-NEXT:    sh $2, 10($sp)
+; MIPS64R5-NEXT:    sh $1, 8($sp)
+; MIPS64R5-NEXT:    ld $2, 8($sp)
+; MIPS64R5-NEXT:    daddiu $sp, $sp, 32
+; MIPS64R5-NEXT:    jr $ra
+; MIPS64R5-NEXT:    nop
+;
+; MIPS32R5EL-LABEL: i16_4:
+; MIPS32R5EL:       # %bb.0:
+; MIPS32R5EL-NEXT:    addiu $sp, $sp, -48
+; MIPS32R5EL-NEXT:    .cfi_def_cfa_offset 48
+; MIPS32R5EL-NEXT:    sw $fp, 44($sp) # 4-byte Folded Spill
+; MIPS32R5EL-NEXT:    .cfi_offset 30, -4
+; MIPS32R5EL-NEXT:    move $fp, $sp
+; MIPS32R5EL-NEXT:    .cfi_def_cfa_register 30
+; MIPS32R5EL-NEXT:    addiu $1, $zero, -16
+; MIPS32R5EL-NEXT:    and $sp, $sp, $1
+; MIPS32R5EL-NEXT:    sw $6, 24($sp)
+; MIPS32R5EL-NEXT:    sw $7, 28($sp)
+; MIPS32R5EL-NEXT:    sw $4, 32($sp)
+; MIPS32R5EL-NEXT:    ldi.b $w0, 0
+; MIPS32R5EL-NEXT:    lhu $1, 26($sp)
+; MIPS32R5EL-NEXT:    lhu $2, 24($sp)
+; MIPS32R5EL-NEXT:    move.v $w1, $w0
+; MIPS32R5EL-NEXT:    insert.w $w1[0], $2
+; MIPS32R5EL-NEXT:    insert.w $w1[1], $1
+; MIPS32R5EL-NEXT:    lhu $1, 28($sp)
+; MIPS32R5EL-NEXT:    sw $5, 36($sp)
+; MIPS32R5EL-NEXT:    insert.w $w1[2], $1
+; MIPS32R5EL-NEXT:    lhu $1, 32($sp)
+; MIPS32R5EL-NEXT:    lhu $2, 30($sp)
+; MIPS32R5EL-NEXT:    insert.w $w1[3], $2
+; MIPS32R5EL-NEXT:    insert.w $w0[0], $1
+; MIPS32R5EL-NEXT:    lhu $1, 34($sp)
+; MIPS32R5EL-NEXT:    insert.w $w0[1], $1
+; MIPS32R5EL-NEXT:    lhu $1, 36($sp)
+; MIPS32R5EL-NEXT:    insert.w $w0[2], $1
+; MIPS32R5EL-NEXT:    lhu $1, 38($sp)
+; MIPS32R5EL-NEXT:    insert.w $w0[3], $1
+; MIPS32R5EL-NEXT:    addv.w $w0, $w0, $w1
+; MIPS32R5EL-NEXT:    copy_s.w $1, $w0[0]
+; MIPS32R5EL-NEXT:    copy_s.w $2, $w0[1]
+; MIPS32R5EL-NEXT:    copy_s.w $3, $w0[2]
+; MIPS32R5EL-NEXT:    copy_s.w $4, $w0[3]
+; MIPS32R5EL-NEXT:    sh $4, 22($sp)
+; MIPS32R5EL-NEXT:    sh $3, 20($sp)
+; MIPS32R5EL-NEXT:    sh $2, 18($sp)
+; MIPS32R5EL-NEXT:    sh $1, 16($sp)
+; MIPS32R5EL-NEXT:    lw $1, 20($sp)
+; MIPS32R5EL-NEXT:    sw $1, 8($sp)
+; MIPS32R5EL-NEXT:    lw $1, 16($sp)
+; MIPS32R5EL-NEXT:    sw $1, 0($sp)
+; MIPS32R5EL-NEXT:    ld.w $w0, 0($sp)
+; MIPS32R5EL-NEXT:    copy_s.w $2, $w0[0]
+; MIPS32R5EL-NEXT:    copy_s.w $3, $w0[2]
+; MIPS32R5EL-NEXT:    move $sp, $fp
+; MIPS32R5EL-NEXT:    lw $fp, 44($sp) # 4-byte Folded Reload
+; MIPS32R5EL-NEXT:    addiu $sp, $sp, 48
+; MIPS32R5EL-NEXT:    jr $ra
+; MIPS32R5EL-NEXT:    nop
   %1 = add <4 x i16> %a, %b
   ret <4 x i16> %1
 }
 
 define <8 x i16> @i16_8(<8 x i16> %a, <8 x i16> %b) {
-; ALL-LABEL: i16_8:
-; MIPS32-DAG: lw ${{[0-9]+}}, 16($sp)
-; MIPS32-DAG: lw ${{[0-9]+}}, 20($sp)
-; MIPS32-DAG: lw ${{[0-9]+}}, 24($sp)
-; MIPS32-DAG: lw ${{[0-9]+}}, 28($sp)
-; MIPS32-DAG: srl ${{[0-9]+}}, $7, 16
-; MIPS32-DAG: srl ${{[0-9]+}}, $6, 16
-; MIPS32-DAG: srl ${{[0-9]+}}, $5, 16
-; MIPS32-DAG: srl ${{[0-9]+}}, $4, 16
-
-; MIPS32R5-DAG: lw ${{[0-9]+}}, 16($sp)
-; MIPS32R5-DAG: lw ${{[0-9]+}}, 20($sp)
-; MIPS32R5-DAG: lw ${{[0-9]+}}, 24($sp)
-; MIPS32R5-DAG: lw ${{[0-9]+}}, 28($sp)
-; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $4
-; MIPS32R5-DAG: insert.w $w[[W0]][1], $5
-; MIPS32R5-DAG: insert.w $w[[W0]][2], $6
-; MIPS32R5-DAG: insert.w $w[[W0]][3], $7
-
-; MIPS64-DAG: dsrl ${{[0-9]+}}, $6, 48
-; MIPS64-DAG: dsrl ${{[0-9]+}}, $7, 48
-; MIPS64-DAG: dsrl ${{[0-9]+}}, $6, 32
-; MIPS64-DAG: dsrl ${{[0-9]+}}, $7, 32
-; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 48
-; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 48
-; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 32
-; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 32
-
-; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][{{[0-9]}}], $4
-; MIPS64R5-DAG: insert.d $w[[W0]][{{[0-9]}}], $5
-; MIPS64R5-DAG: insert.d $w[[W1:[0-9]+]][{{[0-9]}}], $6
-; MIPS64R5-DAG: insert.d $w[[W1]][{{[0-9]}}], $7
-
+; MIPS32-LABEL: i16_8:
+; MIPS32:       # %bb.0:
+; MIPS32-NEXT:    lw $1, 24($sp)
+; MIPS32-NEXT:    srl $2, $1, 16
+; MIPS32-NEXT:    srl $3, $6, 16
+; MIPS32-NEXT:    lw $8, 20($sp)
+; MIPS32-NEXT:    srl $9, $8, 16
+; MIPS32-NEXT:    srl $10, $5, 16
+; MIPS32-NEXT:    addu $9, $10, $9
+; MIPS32-NEXT:    addu $5, $5, $8
+; MIPS32-NEXT:    addu $2, $3, $2
+; MIPS32-NEXT:    addu $1, $6, $1
+; MIPS32-NEXT:    lw $3, 16($sp)
+; MIPS32-NEXT:    lw $6, 28($sp)
+; MIPS32-NEXT:    addu $8, $7, $6
+; MIPS32-NEXT:    andi $1, $1, 65535
+; MIPS32-NEXT:    sll $10, $2, 16
+; MIPS32-NEXT:    andi $5, $5, 65535
+; MIPS32-NEXT:    sll $9, $9, 16
+; MIPS32-NEXT:    addu $2, $4, $3
+; MIPS32-NEXT:    andi $2, $2, 65535
+; MIPS32-NEXT:    srl $3, $3, 16
+; MIPS32-NEXT:    srl $4, $4, 16
+; MIPS32-NEXT:    addu $3, $4, $3
+; MIPS32-NEXT:    sll $3, $3, 16
+; MIPS32-NEXT:    or $2, $2, $3
+; MIPS32-NEXT:    or $3, $5, $9
+; MIPS32-NEXT:    or $4, $1, $10
+; MIPS32-NEXT:    andi $1, $8, 65535
+; MIPS32-NEXT:    srl $5, $6, 16
+; MIPS32-NEXT:    srl $6, $7, 16
+; MIPS32-NEXT:    addu $5, $6, $5
+; MIPS32-NEXT:    sll $5, $5, 16
+; MIPS32-NEXT:    or $5, $1, $5
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
+;
+; MIPS64-LABEL: i16_8:
+; MIPS64:       # %bb.0:
+; MIPS64-NEXT:    dsrl $1, $6, 48
+; MIPS64-NEXT:    dsrl $2, $7, 48
+; MIPS64-NEXT:    sll $1, $1, 0
+; MIPS64-NEXT:    dsrl $3, $4, 48
+; MIPS64-NEXT:    sll $3, $3, 0
+; MIPS64-NEXT:    addu $1, $3, $1
+; MIPS64-NEXT:    dsrl $3, $6, 32
+; MIPS64-NEXT:    sll $2, $2, 0
+; MIPS64-NEXT:    dsrl $8, $5, 48
+; MIPS64-NEXT:    sll $8, $8, 0
+; MIPS64-NEXT:    addu $2, $8, $2
+; MIPS64-NEXT:    sll $1, $1, 16
+; MIPS64-NEXT:    sll $3, $3, 0
+; MIPS64-NEXT:    dsrl $8, $4, 32
+; MIPS64-NEXT:    sll $8, $8, 0
+; MIPS64-NEXT:    addu $3, $8, $3
+; MIPS64-NEXT:    andi $3, $3, 65535
+; MIPS64-NEXT:    dsrl $8, $7, 32
+; MIPS64-NEXT:    or $1, $3, $1
+; MIPS64-NEXT:    sll $2, $2, 16
+; MIPS64-NEXT:    sll $3, $8, 0
+; MIPS64-NEXT:    dsrl $8, $5, 32
+; MIPS64-NEXT:    sll $8, $8, 0
+; MIPS64-NEXT:    addu $3, $8, $3
+; MIPS64-NEXT:    andi $3, $3, 65535
+; MIPS64-NEXT:    or $3, $3, $2
+; MIPS64-NEXT:    sll $2, $6, 0
+; MIPS64-NEXT:    sll $4, $4, 0
+; MIPS64-NEXT:    addu $6, $4, $2
+; MIPS64-NEXT:    andi $6, $6, 65535
+; MIPS64-NEXT:    srl $2, $2, 16
+; MIPS64-NEXT:    srl $4, $4, 16
+; MIPS64-NEXT:    addu $2, $4, $2
+; MIPS64-NEXT:    sll $2, $2, 16
+; MIPS64-NEXT:    dsll $1, $1, 32
+; MIPS64-NEXT:    or $2, $6, $2
+; MIPS64-NEXT:    dsll $2, $2, 32
+; MIPS64-NEXT:    dsrl $2, $2, 32
+; MIPS64-NEXT:    sll $4, $7, 0
+; MIPS64-NEXT:    sll $5, $5, 0
+; MIPS64-NEXT:    addu $6, $5, $4
+; MIPS64-NEXT:    or $2, $2, $1
+; MIPS64-NEXT:    dsll $1, $3, 32
+; MIPS64-NEXT:    andi $3, $6, 65535
+; MIPS64-NEXT:    srl $4, $4, 16
+; MIPS64-NEXT:    srl $5, $5, 16
+; MIPS64-NEXT:    addu $4, $5, $4
+; MIPS64-NEXT:    sll $4, $4, 16
+; MIPS64-NEXT:    or $3, $3, $4
+; MIPS64-NEXT:    dsll $3, $3, 32
+; MIPS64-NEXT:    dsrl $3, $3, 32
+; MIPS64-NEXT:    or $3, $3, $1
+; MIPS64-NEXT:    jr $ra
+; MIPS64-NEXT:    nop
+;
+; MIPS32R5EB-LABEL: i16_8:
+; MIPS32R5EB:       # %bb.0:
+; MIPS32R5EB-NEXT:    ldi.b $w0, 0
+; MIPS32R5EB-NEXT:    lw $1, 20($sp)
+; MIPS32R5EB-NEXT:    lw $2, 16($sp)
+; MIPS32R5EB-NEXT:    move.v $w1, $w0
+; MIPS32R5EB-NEXT:    insert.w $w1[0], $2
+; MIPS32R5EB-NEXT:    insert.w $w1[1], $1
+; MIPS32R5EB-NEXT:    lw $1, 24($sp)
+; MIPS32R5EB-NEXT:    insert.w $w0[0], $4
+; MIPS32R5EB-NEXT:    insert.w $w1[2], $1
+; MIPS32R5EB-NEXT:    lw $1, 28($sp)
+; MIPS32R5EB-NEXT:    insert.w $w1[3], $1
+; MIPS32R5EB-NEXT:    shf.h $w1, $w1, 177
+; MIPS32R5EB-NEXT:    insert.w $w0[1], $5
+; MIPS32R5EB-NEXT:    insert.w $w0[2], $6
+; MIPS32R5EB-NEXT:    insert.w $w0[3], $7
+; MIPS32R5EB-NEXT:    shf.h $w0, $w0, 177
+; MIPS32R5EB-NEXT:    addv.h $w0, $w0, $w1
+; MIPS32R5EB-NEXT:    shf.h $w0, $w0, 177
+; MIPS32R5EB-NEXT:    copy_s.w $2, $w0[0]
+; MIPS32R5EB-NEXT:    copy_s.w $3, $w0[1]
+; MIPS32R5EB-NEXT:    copy_s.w $4, $w0[2]
+; MIPS32R5EB-NEXT:    copy_s.w $5, $w0[3]
+; MIPS32R5EB-NEXT:    jr $ra
+; MIPS32R5EB-NEXT:    nop
+;
+; MIPS64R5EB-LABEL: i16_8:
+; MIPS64R5EB:       # %bb.0:
+; MIPS64R5EB-NEXT:    ldi.b $w0, 0
+; MIPS64R5EB-NEXT:    move.v $w1, $w0
+; MIPS64R5EB-NEXT:    insert.d $w1[0], $6
+; MIPS64R5EB-NEXT:    insert.d $w1[1], $7
+; MIPS64R5EB-NEXT:    shf.h $w1, $w1, 27
+; MIPS64R5EB-NEXT:    insert.d $w0[0], $4
+; MIPS64R5EB-NEXT:    insert.d $w0[1], $5
+; MIPS64R5EB-NEXT:    shf.h $w0, $w0, 27
+; MIPS64R5EB-NEXT:    addv.h $w0, $w0, $w1
+; MIPS64R5EB-NEXT:    shf.h $w0, $w0, 27
+; MIPS64R5EB-NEXT:    copy_s.d $2, $w0[0]
+; MIPS64R5EB-NEXT:    copy_s.d $3, $w0[1]
+; MIPS64R5EB-NEXT:    jr $ra
+; MIPS64R5EB-NEXT:    nop
+;
+; MIPS32R5EL-LABEL: i16_8:
+; MIPS32R5EL:       # %bb.0:
+; MIPS32R5EL-NEXT:    ldi.b $w0, 0
+; MIPS32R5EL-NEXT:    lw $1, 20($sp)
+; MIPS32R5EL-NEXT:    lw $2, 16($sp)
+; MIPS32R5EL-NEXT:    move.v $w1, $w0
+; MIPS32R5EL-NEXT:    insert.w $w1[0], $2
+; MIPS32R5EL-NEXT:    insert.w $w1[1], $1
+; MIPS32R5EL-NEXT:    lw $1, 24($sp)
+; MIPS32R5EL-NEXT:    insert.w $w1[2], $1
+; MIPS32R5EL-NEXT:    lw $1, 28($sp)
+; MIPS32R5EL-NEXT:    insert.w $w1[3], $1
+; MIPS32R5EL-NEXT:    insert.w $w0[0], $4
+; MIPS32R5EL-NEXT:    insert.w $w0[1], $5
+; MIPS32R5EL-NEXT:    insert.w $w0[2], $6
+; MIPS32R5EL-NEXT:    insert.w $w0[3], $7
+; MIPS32R5EL-NEXT:    addv.h $w0, $w0, $w1
+; MIPS32R5EL-NEXT:    copy_s.w $2, $w0[0]
+; MIPS32R5EL-NEXT:    copy_s.w $3, $w0[1]
+; MIPS32R5EL-NEXT:    copy_s.w $4, $w0[2]
+; MIPS32R5EL-NEXT:    copy_s.w $5, $w0[3]
+; MIPS32R5EL-NEXT:    jr $ra
+; MIPS32R5EL-NEXT:    nop
+;
+; MIPS64R5EL-LABEL: i16_8:
+; MIPS64R5EL:       # %bb.0:
+; MIPS64R5EL-NEXT:    ldi.b $w0, 0
+; MIPS64R5EL-NEXT:    move.v $w1, $w0
+; MIPS64R5EL-NEXT:    insert.d $w1[0], $6
+; MIPS64R5EL-NEXT:    insert.d $w1[1], $7
+; MIPS64R5EL-NEXT:    insert.d $w0[0], $4
+; MIPS64R5EL-NEXT:    insert.d $w0[1], $5
+; MIPS64R5EL-NEXT:    addv.h $w0, $w0, $w1
+; MIPS64R5EL-NEXT:    copy_s.d $2, $w0[0]
+; MIPS64R5EL-NEXT:    copy_s.d $3, $w0[1]
+; MIPS64R5EL-NEXT:    jr $ra
+; MIPS64R5EL-NEXT:    nop
   %1 = add <8 x i16> %a, %b
   ret <8 x i16> %1
 }
 
 define <2 x i32> @i32_2(<2 x i32> %a, <2 x i32> %b) {
-; ALL-LABEL: i32_2:
-; MIPS32-DAG: addu    $2, $4, $6
-; MIPS32-DAG: addu    $3, $5, $7
-
-; MIPS32R5-DAG: sw $4
-; MIPS32R5-DAG: sw $5
-; MIPS32R5-DAG: sw $6
-; MIPS32R5-DAG: sw $7
-
-; MIPS64-DAG: sll     ${{[0-9]+}}, $4, 0
-; MIPS64-DAG: sll     ${{[0-9]+}}, $5, 0
-
-; MIPS64R5-DAG: sd $4
-; MIPS64R5-DAG: sd $5
-
+; MIPS32-LABEL: i32_2:
+; MIPS32:       # %bb.0:
+; MIPS32-NEXT:    addu $2, $4, $6
+; MIPS32-NEXT:    addu $3, $5, $7
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
+;
+; MIPS64-LABEL: i32_2:
+; MIPS64:       # %bb.0:
+; MIPS64-NEXT:    sll $1, $5, 0
+; MIPS64-NEXT:    sll $2, $4, 0
+; MIPS64-NEXT:    addu $1, $2, $1
+; MIPS64-NEXT:    dsll $1, $1, 32
+; MIPS64-NEXT:    dsrl $2, $5, 32
+; MIPS64-NEXT:    dsrl $1, $1, 32
+; MIPS64-NEXT:    sll $2, $2, 0
+; MIPS64-NEXT:    dsrl $3, $4, 32
+; MIPS64-NEXT:    sll $3, $3, 0
+; MIPS64-NEXT:    addu $2, $3, $2
+; MIPS64-NEXT:    dsll $2, $2, 32
+; MIPS64-NEXT:    or $2, $1, $2
+; MIPS64-NEXT:    jr $ra
+; MIPS64-NEXT:    nop
+;
+; MIPS32R5EB-LABEL: i32_2:
+; MIPS32R5EB:       # %bb.0:
+; MIPS32R5EB-NEXT:    addiu $sp, $sp, -48
+; MIPS32R5EB-NEXT:    .cfi_def_cfa_offset 48
+; MIPS32R5EB-NEXT:    sw $fp, 44($sp) # 4-byte Folded Spill
+; MIPS32R5EB-NEXT:    .cfi_offset 30, -4
+; MIPS32R5EB-NEXT:    move $fp, $sp
+; MIPS32R5EB-NEXT:    .cfi_def_cfa_register 30
+; MIPS32R5EB-NEXT:    addiu $1, $zero, -16
+; MIPS32R5EB-NEXT:    and $sp, $sp, $1
+; MIPS32R5EB-NEXT:    sw $7, 28($sp)
+; MIPS32R5EB-NEXT:    sw $6, 20($sp)
+; MIPS32R5EB-NEXT:    sw $5, 12($sp)
+; MIPS32R5EB-NEXT:    sw $4, 4($sp)
+; MIPS32R5EB-NEXT:    ld.d $w0, 16($sp)
+; MIPS32R5EB-NEXT:    ld.d $w1, 0($sp)
+; MIPS32R5EB-NEXT:    addv.d $w0, $w1, $w0
+; MIPS32R5EB-NEXT:    shf.w $w0, $w0, 177
+; MIPS32R5EB-NEXT:    copy_s.w $2, $w0[1]
+; MIPS32R5EB-NEXT:    copy_s.w $3, $w0[3]
+; MIPS32R5EB-NEXT:    move $sp, $fp
+; MIPS32R5EB-NEXT:    lw $fp, 44($sp) # 4-byte Folded Reload
+; MIPS32R5EB-NEXT:    addiu $sp, $sp, 48
+; MIPS32R5EB-NEXT:    jr $ra
+; MIPS32R5EB-NEXT:    nop
+;
+; MIPS64R5-LABEL: i32_2:
+; MIPS64R5:       # %bb.0:
+; MIPS64R5-NEXT:    daddiu $sp, $sp, -32
+; MIPS64R5-NEXT:    .cfi_def_cfa_offset 32
+; MIPS64R5-NEXT:    sd $5, 16($sp)
+; MIPS64R5-NEXT:    sd $4, 24($sp)
+; MIPS64R5-NEXT:    ldi.b $w0, 0
+; MIPS64R5-NEXT:    lw $1, 20($sp)
+; MIPS64R5-NEXT:    lw $2, 16($sp)
+; MIPS64R5-NEXT:    move.v $w1, $w0
+; MIPS64R5-NEXT:    insert.d $w1[0], $2
+; MIPS64R5-NEXT:    insert.d $w1[1], $1
+; MIPS64R5-NEXT:    lw $1, 24($sp)
+; MIPS64R5-NEXT:    insert.d $w0[0], $1
+; MIPS64R5-NEXT:    lw $1, 28($sp)
+; MIPS64R5-NEXT:    insert.d $w0[1], $1
+; MIPS64R5-NEXT:    addv.d $w0, $w0, $w1
+; MIPS64R5-NEXT:    copy_s.d $1, $w0[0]
+; MIPS64R5-NEXT:    copy_s.d $2, $w0[1]
+; MIPS64R5-NEXT:    sw $2, 12($sp)
+; MIPS64R5-NEXT:    sw $1, 8($sp)
+; MIPS64R5-NEXT:    ld $2, 8($sp)
+; MIPS64R5-NEXT:    daddiu $sp, $sp, 32
+; MIPS64R5-NEXT:    jr $ra
+; MIPS64R5-NEXT:    nop
+;
+; MIPS32R5EL-LABEL: i32_2:
+; MIPS32R5EL:       # %bb.0:
+; MIPS32R5EL-NEXT:    addiu $sp, $sp, -48
+; MIPS32R5EL-NEXT:    .cfi_def_cfa_offset 48
+; MIPS32R5EL-NEXT:    sw $fp, 44($sp) # 4-byte Folded Spill
+; MIPS32R5EL-NEXT:    .cfi_offset 30, -4
+; MIPS32R5EL-NEXT:    move $fp, $sp
+; MIPS32R5EL-NEXT:    .cfi_def_cfa_register 30
+; MIPS32R5EL-NEXT:    addiu $1, $zero, -16
+; MIPS32R5EL-NEXT:    and $sp, $sp, $1
+; MIPS32R5EL-NEXT:    sw $7, 24($sp)
+; MIPS32R5EL-NEXT:    sw $6, 16($sp)
+; MIPS32R5EL-NEXT:    sw $5, 8($sp)
+; MIPS32R5EL-NEXT:    sw $4, 0($sp)
+; MIPS32R5EL-NEXT:    ld.d $w0, 16($sp)
+; MIPS32R5EL-NEXT:    ld.d $w1, 0($sp)
+; MIPS32R5EL-NEXT:    addv.d $w0, $w1, $w0
+; MIPS32R5EL-NEXT:    copy_s.w $2, $w0[0]
+; MIPS32R5EL-NEXT:    copy_s.w $3, $w0[2]
+; MIPS32R5EL-NEXT:    move $sp, $fp
+; MIPS32R5EL-NEXT:    lw $fp, 44($sp) # 4-byte Folded Reload
+; MIPS32R5EL-NEXT:    addiu $sp, $sp, 48
+; MIPS32R5EL-NEXT:    jr $ra
+; MIPS32R5EL-NEXT:    nop
   %1 = add <2 x i32> %a, %b
-
   ret <2 x i32> %1
 }
 
 define <4 x i32> @i32_4(<4 x i32> %a, <4 x i32> %b) {
-; ALL-LABEL: i32_4:
-; MIPS32-DAG: lw ${{[0-9]+}}, 16($sp)
-; MIPS32-DAG: lw ${{[0-9]+}}, 20($sp)
-; MIPS32-DAG: lw ${{[0-9]+}}, 24($sp)
-; MIPS32-DAG: lw ${{[0-9]+}}, 28($sp)
-; MIPS32-DAG: addu $2
-; MIPS32-DAG: addu $3
-; MIPS32-DAG: addu $4
-; MIPS32-DAG: addu $5
-
-; MIPS32R5-DAG: lw ${{[0-9]+}}, 16($sp)
-; MIPS32R5-DAG: lw ${{[0-9]+}}, 20($sp)
-; MIPS32R5-DAG: lw ${{[0-9]+}}, 24($sp)
-; MIPS32R5-DAG: lw ${{[0-9]+}}, 28($sp)
-; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $4
-; MIPS32R5-DAG: insert.w $w[[W0]][1], $5
-; MIPS32R5-DAG: insert.w $w[[W0]][2], $6
-; MIPS32R5-DAG: insert.w $w[[W0]][3], $7
-
-; MIPS64-DAG: sll     ${{[0-9]+}}, $4, 0
-; MIPS64-DAG: sll     ${{[0-9]+}}, $5, 0
-; MIPS64-DAG: sll     ${{[0-9]+}}, $6, 0
-; MIPS64-DAG: sll     ${{[0-9]+}}, $7, 0
-; MIPS64-DAG: dsrl    ${{[0-9]+}}, $4, 32
-; MIPS64-DAG: dsrl    ${{[0-9]+}}, $5, 32
-; MIPS64-DAG: dsrl    ${{[0-9]+}}, $6, 32
-; MIPS64-DAG: dsrl    ${{[0-9]+}}, $7, 32
+; MIPS32-LABEL: i32_4:
+; MIPS32:       # %bb.0:
+; MIPS32-NEXT:    lw $1, 20($sp)
+; MIPS32-NEXT:    lw $2, 16($sp)
+; MIPS32-NEXT:    addu $2, $4, $2
+; MIPS32-NEXT:    addu $3, $5, $1
+; MIPS32-NEXT:    lw $1, 24($sp)
+; MIPS32-NEXT:    addu $4, $6, $1
+; MIPS32-NEXT:    lw $1, 28($sp)
+; MIPS32-NEXT:    addu $5, $7, $1
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
+;
+; MIPS64-LABEL: i32_4:
+; MIPS64:       # %bb.0:
+; MIPS64-NEXT:    sll $1, $6, 0
+; MIPS64-NEXT:    sll $2, $4, 0
+; MIPS64-NEXT:    addu $1, $2, $1
+; MIPS64-NEXT:    dsll $1, $1, 32
+; MIPS64-NEXT:    sll $2, $7, 0
+; MIPS64-NEXT:    sll $3, $5, 0
+; MIPS64-NEXT:    addu $2, $3, $2
+; MIPS64-NEXT:    dsrl $3, $6, 32
+; MIPS64-NEXT:    dsll $6, $2, 32
+; MIPS64-NEXT:    dsrl $1, $1, 32
+; MIPS64-NEXT:    sll $2, $3, 0
+; MIPS64-NEXT:    dsrl $3, $4, 32
+; MIPS64-NEXT:    sll $3, $3, 0
+; MIPS64-NEXT:    addu $2, $3, $2
+; MIPS64-NEXT:    dsll $2, $2, 32
+; MIPS64-NEXT:    dsrl $3, $7, 32
+; MIPS64-NEXT:    or $2, $1, $2
+; MIPS64-NEXT:    dsrl $1, $6, 32
+; MIPS64-NEXT:    sll $3, $3, 0
+; MIPS64-NEXT:    dsrl $4, $5, 32
+; MIPS64-NEXT:    sll $4, $4, 0
+; MIPS64-NEXT:    addu $3, $4, $3
+; MIPS64-NEXT:    dsll $3, $3, 32
+; MIPS64-NEXT:    or $3, $1, $3
+; MIPS64-NEXT:    jr $ra
+; MIPS64-NEXT:    nop
+;
+; MIPS32R5-LABEL: i32_4:
+; MIPS32R5:       # %bb.0:
+; MIPS32R5-NEXT:    ldi.b $w0, 0
+; MIPS32R5-NEXT:    lw $1, 20($sp)
+; MIPS32R5-NEXT:    lw $2, 16($sp)
+; MIPS32R5-NEXT:    move.v $w1, $w0
+; MIPS32R5-NEXT:    insert.w $w1[0], $2
+; MIPS32R5-NEXT:    insert.w $w1[1], $1
+; MIPS32R5-NEXT:    lw $1, 24($sp)
+; MIPS32R5-NEXT:    insert.w $w1[2], $1
+; MIPS32R5-NEXT:    lw $1, 28($sp)
+; MIPS32R5-NEXT:    insert.w $w1[3], $1
+; MIPS32R5-NEXT:    insert.w $w0[0], $4
+; MIPS32R5-NEXT:    insert.w $w0[1], $5
+; MIPS32R5-NEXT:    insert.w $w0[2], $6
+; MIPS32R5-NEXT:    insert.w $w0[3], $7
+; MIPS32R5-NEXT:    addv.w $w0, $w0, $w1
+; MIPS32R5-NEXT:    copy_s.w $2, $w0[0]
+; MIPS32R5-NEXT:    copy_s.w $3, $w0[1]
+; MIPS32R5-NEXT:    copy_s.w $4, $w0[2]
+; MIPS32R5-NEXT:    copy_s.w $5, $w0[3]
+; MIPS32R5-NEXT:    jr $ra
+; MIPS32R5-NEXT:    nop
+;
+; MIPS64R5EB-LABEL: i32_4:
+; MIPS64R5EB:       # %bb.0:
+; MIPS64R5EB-NEXT:    ldi.b $w0, 0
+; MIPS64R5EB-NEXT:    move.v $w1, $w0
+; MIPS64R5EB-NEXT:    insert.d $w1[0], $6
+; MIPS64R5EB-NEXT:    insert.d $w1[1], $7
+; MIPS64R5EB-NEXT:    shf.w $w1, $w1, 177
+; MIPS64R5EB-NEXT:    insert.d $w0[0], $4
+; MIPS64R5EB-NEXT:    insert.d $w0[1], $5
+; MIPS64R5EB-NEXT:    shf.w $w0, $w0, 177
+; MIPS64R5EB-NEXT:    addv.w $w0, $w0, $w1
+; MIPS64R5EB-NEXT:    shf.w $w0, $w0, 177
+; MIPS64R5EB-NEXT:    copy_s.d $2, $w0[0]
+; MIPS64R5EB-NEXT:    copy_s.d $3, $w0[1]
+; MIPS64R5EB-NEXT:    jr $ra
+; MIPS64R5EB-NEXT:    nop
+;
+; MIPS64R5EL-LABEL: i32_4:
+; MIPS64R5EL:       # %bb.0:
+; MIPS64R5EL-NEXT:    ldi.b $w0, 0
+; MIPS64R5EL-NEXT:    move.v $w1, $w0
+; MIPS64R5EL-NEXT:    insert.d $w1[0], $6
+; MIPS64R5EL-NEXT:    insert.d $w1[1], $7
+; MIPS64R5EL-NEXT:    insert.d $w0[0], $4
+; MIPS64R5EL-NEXT:    insert.d $w0[1], $5
+; MIPS64R5EL-NEXT:    addv.w $w0, $w0, $w1
+; MIPS64R5EL-NEXT:    copy_s.d $2, $w0[0]
+; MIPS64R5EL-NEXT:    copy_s.d $3, $w0[1]
+; MIPS64R5EL-NEXT:    jr $ra
+; MIPS64R5EL-NEXT:    nop
   %1 = add <4 x i32> %a, %b
   ret <4 x i32> %1
 }
 
 define <2 x i64> @i64_2(<2 x i64> %a, <2 x i64> %b) {
-; ALL-LABEL: i64_2:
-; MIPS32-DAG: lw ${{[0-9]+}}, 16($sp)
-; MIPS32-DAG: lw ${{[0-9]+}}, 20($sp)
-; MIPS32-DAG: lw ${{[0-9]+}}, 24($sp)
-; MIPS32-DAG: lw ${{[0-9]+}}, 28($sp)
-; MIPS32-DAG: addu $2
-; MIPS32-DAG: addu $3
-; MIPS32-DAG: addu $4
-; MIPS32-DAG: addu $5
-
-; MIPS32R5-DAG: lw ${{[0-9]+}}, 16($sp)
-; MIPS32R5-DAG: lw ${{[0-9]+}}, 20($sp)
-; MIPS32R5-DAG: lw ${{[0-9]+}}, 24($sp)
-; MIPS32R5-DAG: lw ${{[0-9]+}}, 28($sp)
-; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $4
-; MIPS32R5-DAG: insert.w $w[[W0]][1], $5
-; MIPS32R5-DAG: insert.w $w[[W0]][2], $6
-; MIPS32R5-DAG: insert.w $w[[W0]][3], $7
-
-; MIPS64-DAG: daddu $2, $4, $6
-; MIPS64-DAG: daddu $3, $5, $7
-
-; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][{{[0-9]}}], $4
-; MIPS64R5-DAG: insert.d $w[[W0]][{{[0-9]}}], $5
-; MIPS64R5-DAG: insert.d $w[[W1:[0-9]+]][{{[0-9]}}], $6
-; MIPS64R5-DAG: insert.d $w[[W1]][{{[0-9]}}], $7
-
+; MIPS32EB-LABEL: i64_2:
+; MIPS32EB:       # %bb.0:
+; MIPS32EB-NEXT:    lw $1, 16($sp)
+; MIPS32EB-NEXT:    addu $1, $4, $1
+; MIPS32EB-NEXT:    lw $2, 20($sp)
+; MIPS32EB-NEXT:    addu $3, $5, $2
+; MIPS32EB-NEXT:    sltu $2, $3, $5
+; MIPS32EB-NEXT:    lw $4, 24($sp)
+; MIPS32EB-NEXT:    addu $2, $1, $2
+; MIPS32EB-NEXT:    addu $1, $6, $4
+; MIPS32EB-NEXT:    lw $4, 28($sp)
+; MIPS32EB-NEXT:    addu $5, $7, $4
+; MIPS32EB-NEXT:    sltu $4, $5, $7
+; MIPS32EB-NEXT:    addu $4, $1, $4
+; MIPS32EB-NEXT:    jr $ra
+; MIPS32EB-NEXT:    nop
+;
+; MIPS64-LABEL: i64_2:
+; MIPS64:       # %bb.0:
+; MIPS64-NEXT:    daddu $2, $4, $6
+; MIPS64-NEXT:    daddu $3, $5, $7
+; MIPS64-NEXT:    jr $ra
+; MIPS64-NEXT:    nop
+;
+; MIPS32R5EB-LABEL: i64_2:
+; MIPS32R5EB:       # %bb.0:
+; MIPS32R5EB-NEXT:    ldi.b $w0, 0
+; MIPS32R5EB-NEXT:    lw $1, 20($sp)
+; MIPS32R5EB-NEXT:    lw $2, 16($sp)
+; MIPS32R5EB-NEXT:    move.v $w1, $w0
+; MIPS32R5EB-NEXT:    insert.w $w1[0], $2
+; MIPS32R5EB-NEXT:    insert.w $w1[1], $1
+; MIPS32R5EB-NEXT:    lw $1, 24($sp)
+; MIPS32R5EB-NEXT:    insert.w $w0[0], $4
+; MIPS32R5EB-NEXT:    insert.w $w1[2], $1
+; MIPS32R5EB-NEXT:    lw $1, 28($sp)
+; MIPS32R5EB-NEXT:    insert.w $w1[3], $1
+; MIPS32R5EB-NEXT:    shf.w $w1, $w1, 177
+; MIPS32R5EB-NEXT:    insert.w $w0[1], $5
+; MIPS32R5EB-NEXT:    insert.w $w0[2], $6
+; MIPS32R5EB-NEXT:    insert.w $w0[3], $7
+; MIPS32R5EB-NEXT:    shf.w $w0, $w0, 177
+; MIPS32R5EB-NEXT:    addv.d $w0, $w0, $w1
+; MIPS32R5EB-NEXT:    shf.w $w0, $w0, 177
+; MIPS32R5EB-NEXT:    copy_s.w $2, $w0[0]
+; MIPS32R5EB-NEXT:    copy_s.w $3, $w0[1]
+; MIPS32R5EB-NEXT:    copy_s.w $4, $w0[2]
+; MIPS32R5EB-NEXT:    copy_s.w $5, $w0[3]
+; MIPS32R5EB-NEXT:    jr $ra
+; MIPS32R5EB-NEXT:    nop
+;
+; MIPS64R5-LABEL: i64_2:
+; MIPS64R5:       # %bb.0:
+; MIPS64R5-NEXT:    ldi.b $w0, 0
+; MIPS64R5-NEXT:    move.v $w1, $w0
+; MIPS64R5-NEXT:    insert.d $w1[0], $6
+; MIPS64R5-NEXT:    insert.d $w1[1], $7
+; MIPS64R5-NEXT:    insert.d $w0[0], $4
+; MIPS64R5-NEXT:    insert.d $w0[1], $5
+; MIPS64R5-NEXT:    addv.d $w0, $w0, $w1
+; MIPS64R5-NEXT:    copy_s.d $2, $w0[0]
+; MIPS64R5-NEXT:    copy_s.d $3, $w0[1]
+; MIPS64R5-NEXT:    jr $ra
+; MIPS64R5-NEXT:    nop
+;
+; MIPS32EL-LABEL: i64_2:
+; MIPS32EL:       # %bb.0:
+; MIPS32EL-NEXT:    lw $1, 20($sp)
+; MIPS32EL-NEXT:    addu $1, $5, $1
+; MIPS32EL-NEXT:    lw $2, 16($sp)
+; MIPS32EL-NEXT:    addu $2, $4, $2
+; MIPS32EL-NEXT:    sltu $3, $2, $4
+; MIPS32EL-NEXT:    lw $4, 28($sp)
+; MIPS32EL-NEXT:    addu $3, $1, $3
+; MIPS32EL-NEXT:    addu $1, $7, $4
+; MIPS32EL-NEXT:    lw $4, 24($sp)
+; MIPS32EL-NEXT:    addu $4, $6, $4
+; MIPS32EL-NEXT:    sltu $5, $4, $6
+; MIPS32EL-NEXT:    addu $5, $1, $5
+; MIPS32EL-NEXT:    jr $ra
+; MIPS32EL-NEXT:    nop
+;
+; MIPS32R5EL-LABEL: i64_2:
+; MIPS32R5EL:       # %bb.0:
+; MIPS32R5EL-NEXT:    ldi.b $w0, 0
+; MIPS32R5EL-NEXT:    lw $1, 20($sp)
+; MIPS32R5EL-NEXT:    lw $2, 16($sp)
+; MIPS32R5EL-NEXT:    move.v $w1, $w0
+; MIPS32R5EL-NEXT:    insert.w $w1[0], $2
+; MIPS32R5EL-NEXT:    insert.w $w1[1], $1
+; MIPS32R5EL-NEXT:    lw $1, 24($sp)
+; MIPS32R5EL-NEXT:    insert.w $w1[2], $1
+; MIPS32R5EL-NEXT:    lw $1, 28($sp)
+; MIPS32R5EL-NEXT:    insert.w $w1[3], $1
+; MIPS32R5EL-NEXT:    insert.w $w0[0], $4
+; MIPS32R5EL-NEXT:    insert.w $w0[1], $5
+; MIPS32R5EL-NEXT:    insert.w $w0[2], $6
+; MIPS32R5EL-NEXT:    insert.w $w0[3], $7
+; MIPS32R5EL-NEXT:    addv.d $w0, $w0, $w1
+; MIPS32R5EL-NEXT:    copy_s.w $2, $w0[0]
+; MIPS32R5EL-NEXT:    copy_s.w $3, $w0[1]
+; MIPS32R5EL-NEXT:    copy_s.w $4, $w0[2]
+; MIPS32R5EL-NEXT:    copy_s.w $5, $w0[3]
+; MIPS32R5EL-NEXT:    jr $ra
+; MIPS32R5EL-NEXT:    nop
   %1 = add <2 x i64> %a, %b
   ret <2 x i64> %1
 }
@@ -414,35 +2581,133 @@ define <2 x i64> @i64_2(<2 x i64> %a, <2
 @float_res_v2f32 = external global <2 x float>
 
 define void @float_2(<2 x float> %a, <2 x float> %b) {
-; ALL-LABEL: float_2:
-; MIPS32: mtc1 $7, $f[[F0:[0-9]+]]
-; MIPS32: mtc1 $5, $f[[F1:[0-9]+]]
-; MIPS32: add.s $f[[F2:[0-9]+]], $f[[F1]], $f[[F0]]
-; MIPS32: swc1 $f[[F2]]
-; MIPS32: mtc1 $6, $f[[F3:[0-9]+]]
-; MIPS32: mtc1 $4, $f[[F4:[0-9]+]]
-; MIPS32: add.s $f[[F5:[0-9]+]], $f[[F4]], $f[[F3]]
-; MIPS32: swc1 $f[[F5]]
-
-; MIPS32R5-DAG: sw $4
-; MIPS32R5-DAG: sw $5
-; MIPS32R5-DAG: sw $6
-; MIPS32R5-DAG: sw $7
-
-; MIPS64-DAG: sll $[[R0:[0-9]+]], $4, 0
-; MIPS64-DAG: sll $[[R1:[0-9]+]], $5, 0
-; MIPS64-DAG: mtc1 $[[R0]], $f{{[0-9]+}}
-; MIPS64-DAG: mtc1 $[[R1]], $f{{[0-9]+}}
-; MIPS64-DAG: dsrl $[[R2:[0-9]+]], $4, 32
-; MIPS64-DAG: dsrl $[[R3:[0-9]+]], $5, 32
-; MIPS64-DAG: sll $[[R4:[0-9]+]], $[[R2]], 0
-; MIPS64-DAG: sll $[[R5:[0-9]+]], $[[R3]], 0
-; MIPS64-DAG: mtc1 $[[R4]], $f{{[0-9]+}}
-; MIPS64-DAG: mtc1 $[[R5]], $f{{[0-9]+}}
-
-; MIPS64R5-DAG: sd $4
-; MIPS64R5-DAG: sd $5
-
+; MIPS32-LABEL: float_2:
+; MIPS32:       # %bb.0:
+; MIPS32-NEXT:    mtc1 $7, $f0
+; MIPS32-NEXT:    mtc1 $5, $f1
+; MIPS32-NEXT:    add.s $f0, $f1, $f0
+; MIPS32-NEXT:    lui $1, %hi(float_res_v2f32)
+; MIPS32-NEXT:    addiu $2, $1, %lo(float_res_v2f32)
+; MIPS32-NEXT:    swc1 $f0, 4($2)
+; MIPS32-NEXT:    mtc1 $6, $f0
+; MIPS32-NEXT:    mtc1 $4, $f1
+; MIPS32-NEXT:    add.s $f0, $f1, $f0
+; MIPS32-NEXT:    swc1 $f0, %lo(float_res_v2f32)($1)
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
+;
+; MIPS64EB-LABEL: float_2:
+; MIPS64EB:       # %bb.0:
+; MIPS64EB-NEXT:    lui $1, %hi(%neg(%gp_rel(float_2)))
+; MIPS64EB-NEXT:    daddu $1, $1, $25
+; MIPS64EB-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(float_2)))
+; MIPS64EB-NEXT:    sll $2, $5, 0
+; MIPS64EB-NEXT:    mtc1 $2, $f0
+; MIPS64EB-NEXT:    sll $2, $4, 0
+; MIPS64EB-NEXT:    mtc1 $2, $f1
+; MIPS64EB-NEXT:    add.s $f0, $f1, $f0
+; MIPS64EB-NEXT:    dsrl $2, $5, 32
+; MIPS64EB-NEXT:    sll $2, $2, 0
+; MIPS64EB-NEXT:    ld $1, %got_disp(float_res_v2f32)($1)
+; MIPS64EB-NEXT:    swc1 $f0, 4($1)
+; MIPS64EB-NEXT:    mtc1 $2, $f0
+; MIPS64EB-NEXT:    dsrl $2, $4, 32
+; MIPS64EB-NEXT:    sll $2, $2, 0
+; MIPS64EB-NEXT:    mtc1 $2, $f1
+; MIPS64EB-NEXT:    add.s $f0, $f1, $f0
+; MIPS64EB-NEXT:    swc1 $f0, 0($1)
+; MIPS64EB-NEXT:    jr $ra
+; MIPS64EB-NEXT:    nop
+;
+; MIPS32R5-LABEL: float_2:
+; MIPS32R5:       # %bb.0:
+; MIPS32R5-NEXT:    addiu $sp, $sp, -48
+; MIPS32R5-NEXT:    .cfi_def_cfa_offset 48
+; MIPS32R5-NEXT:    sw $fp, 44($sp) # 4-byte Folded Spill
+; MIPS32R5-NEXT:    .cfi_offset 30, -4
+; MIPS32R5-NEXT:    move $fp, $sp
+; MIPS32R5-NEXT:    .cfi_def_cfa_register 30
+; MIPS32R5-NEXT:    addiu $1, $zero, -16
+; MIPS32R5-NEXT:    and $sp, $sp, $1
+; MIPS32R5-NEXT:    sw $7, 20($sp)
+; MIPS32R5-NEXT:    sw $6, 16($sp)
+; MIPS32R5-NEXT:    sw $5, 4($sp)
+; MIPS32R5-NEXT:    sw $4, 0($sp)
+; MIPS32R5-NEXT:    ld.w $w0, 16($sp)
+; MIPS32R5-NEXT:    ld.w $w1, 0($sp)
+; MIPS32R5-NEXT:    fadd.w $w0, $w1, $w0
+; MIPS32R5-NEXT:    lui $1, %hi(float_res_v2f32)
+; MIPS32R5-NEXT:    addiu $2, $1, %lo(float_res_v2f32)
+; MIPS32R5-NEXT:    splati.w $w1, $w0[1]
+; MIPS32R5-NEXT:    swc1 $f1, 4($2)
+; MIPS32R5-NEXT:    swc1 $f0, %lo(float_res_v2f32)($1)
+; MIPS32R5-NEXT:    move $sp, $fp
+; MIPS32R5-NEXT:    lw $fp, 44($sp) # 4-byte Folded Reload
+; MIPS32R5-NEXT:    addiu $sp, $sp, 48
+; MIPS32R5-NEXT:    jr $ra
+; MIPS32R5-NEXT:    nop
+;
+; MIPS64R5EB-LABEL: float_2:
+; MIPS64R5EB:       # %bb.0:
+; MIPS64R5EB-NEXT:    daddiu $sp, $sp, -32
+; MIPS64R5EB-NEXT:    .cfi_def_cfa_offset 32
+; MIPS64R5EB-NEXT:    lui $1, %hi(%neg(%gp_rel(float_2)))
+; MIPS64R5EB-NEXT:    daddu $1, $1, $25
+; MIPS64R5EB-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(float_2)))
+; MIPS64R5EB-NEXT:    sd $5, 0($sp)
+; MIPS64R5EB-NEXT:    sd $4, 16($sp)
+; MIPS64R5EB-NEXT:    ld.w $w0, 0($sp)
+; MIPS64R5EB-NEXT:    ld.w $w1, 16($sp)
+; MIPS64R5EB-NEXT:    fadd.w $w0, $w1, $w0
+; MIPS64R5EB-NEXT:    shf.w $w0, $w0, 177
+; MIPS64R5EB-NEXT:    copy_s.d $2, $w0[0]
+; MIPS64R5EB-NEXT:    ld $1, %got_disp(float_res_v2f32)($1)
+; MIPS64R5EB-NEXT:    sd $2, 0($1)
+; MIPS64R5EB-NEXT:    daddiu $sp, $sp, 32
+; MIPS64R5EB-NEXT:    jr $ra
+; MIPS64R5EB-NEXT:    nop
+;
+; MIPS64EL-LABEL: float_2:
+; MIPS64EL:       # %bb.0:
+; MIPS64EL-NEXT:    lui $1, %hi(%neg(%gp_rel(float_2)))
+; MIPS64EL-NEXT:    daddu $1, $1, $25
+; MIPS64EL-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(float_2)))
+; MIPS64EL-NEXT:    sll $2, $5, 0
+; MIPS64EL-NEXT:    mtc1 $2, $f0
+; MIPS64EL-NEXT:    sll $2, $4, 0
+; MIPS64EL-NEXT:    mtc1 $2, $f1
+; MIPS64EL-NEXT:    add.s $f0, $f1, $f0
+; MIPS64EL-NEXT:    dsrl $2, $5, 32
+; MIPS64EL-NEXT:    sll $2, $2, 0
+; MIPS64EL-NEXT:    ld $1, %got_disp(float_res_v2f32)($1)
+; MIPS64EL-NEXT:    swc1 $f0, 0($1)
+; MIPS64EL-NEXT:    mtc1 $2, $f0
+; MIPS64EL-NEXT:    dsrl $2, $4, 32
+; MIPS64EL-NEXT:    sll $2, $2, 0
+; MIPS64EL-NEXT:    mtc1 $2, $f1
+; MIPS64EL-NEXT:    add.s $f0, $f1, $f0
+; MIPS64EL-NEXT:    swc1 $f0, 4($1)
+; MIPS64EL-NEXT:    jr $ra
+; MIPS64EL-NEXT:    nop
+;
+; MIPS64R5EL-LABEL: float_2:
+; MIPS64R5EL:       # %bb.0:
+; MIPS64R5EL-NEXT:    daddiu $sp, $sp, -32
+; MIPS64R5EL-NEXT:    .cfi_def_cfa_offset 32
+; MIPS64R5EL-NEXT:    lui $1, %hi(%neg(%gp_rel(float_2)))
+; MIPS64R5EL-NEXT:    daddu $1, $1, $25
+; MIPS64R5EL-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(float_2)))
+; MIPS64R5EL-NEXT:    sd $5, 0($sp)
+; MIPS64R5EL-NEXT:    sd $4, 16($sp)
+; MIPS64R5EL-NEXT:    ld.w $w0, 0($sp)
+; MIPS64R5EL-NEXT:    ld.w $w1, 16($sp)
+; MIPS64R5EL-NEXT:    fadd.w $w0, $w1, $w0
+; MIPS64R5EL-NEXT:    copy_s.d $2, $w0[0]
+; MIPS64R5EL-NEXT:    ld $1, %got_disp(float_res_v2f32)($1)
+; MIPS64R5EL-NEXT:    sd $2, 0($1)
+; MIPS64R5EL-NEXT:    daddiu $sp, $sp, 32
+; MIPS64R5EL-NEXT:    jr $ra
+; MIPS64R5EL-NEXT:    nop
   %1 = fadd <2 x float> %a, %b
   store <2 x float> %1, <2 x float> * @float_res_v2f32
   ret void
@@ -454,56 +2719,161 @@ define void @float_2(<2 x float> %a, <2
 ; ld.w.
 
 define void @float_4(<4 x float> %a, <4 x float> %b) {
-; ALL-LABEL: float_4:
-; MIPS32-DAG: mtc1 $4
-; MIPS32-DAG: mtc1 $5
-; MIPS32-DAG: mtc1 $6
-; MIPS32-DAG: mtc1 $7
-; MIPS32-DAG: lwc1
-; MIPS32-DAG: lwc1
-; MIPS32-DAG: lwc1
-; MIPS32-DAG: lwc1
-
-; MIPS32R5-DAG: lw $[[R1:[0-9]+]], 16($sp)
-; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $[[R1]]
-; MIPS32R5-DAG: lw $[[R2:[0-9]+]], 20($sp)
-; MIPS32R5-DAG: insert.w $w[[W0]][1], $[[R2]]
-; MIPS32R5-DAG: lw $[[R3:[0-9]+]], 24($sp)
-; MIPS32R5-DAG: insert.w $w[[W0]][2], $[[R3]]
-; MIPS32R5-DAG: lw $[[R4:[0-9]+]], 28($sp)
-; MIPS32R5-DAG: insert.w $w[[W0]][3], $[[R4]]
-
-; MIPS32R5-DAG: insert.w $w[[W1:[0-9]+]][0], $4
-; MIPS32R5-DAG: insert.w $w[[W1]][1], $5
-; MIPS32R5-DAG: insert.w $w[[W1]][2], $6
-; MIPS32R5-DAG: insert.w $w[[W1]][3], $7
-
-; MIPS64-DAG: sll $[[R0:[0-9]+]], $4, 0
-; MIPS64-DAG: sll $[[R1:[0-9]+]], $5, 0
-; MIPS64-DAG: mtc1 $[[R0]], $f{{[0-9]+}}
-; MIPS64-DAG: mtc1 $[[R1]], $f{{[0-9]+}}
-; MIPS64-DAG: dsrl $[[R2:[0-9]+]], $4, 32
-; MIPS64-DAG: dsrl $[[R3:[0-9]+]], $5, 32
-; MIPS64-DAG: sll $[[R4:[0-9]+]], $[[R2]], 0
-; MIPS64-DAG: sll $[[R5:[0-9]+]], $[[R3]], 0
-; MIPS64-DAG: mtc1 $[[R4]], $f{{[0-9]+}}
-; MIPS64-DAG: mtc1 $[[R5]], $f{{[0-9]+}}
-; MIPS64-DAG: sll $[[R6:[0-9]+]], $6, 0
-; MIPS64-DAG: sll $[[R7:[0-9]+]], $7, 0
-; MIPS64-DAG: mtc1 $[[R6]], $f{{[0-9]+}}
-; MIPS64-DAG: mtc1 $[[R7]], $f{{[0-9]+}}
-; MIPS64-DAG: dsrl $[[R8:[0-9]+]], $6, 32
-; MIPS64-DAG: dsrl $[[R9:[0-9]+]], $7, 32
-; MIPS64-DAG: sll $[[R10:[0-9]+]], $[[R8]], 0
-; MIPS64-DAG: sll $[[R11:[0-9]+]], $[[R9]], 0
-; MIPS64-DAG: mtc1 $[[R10]], $f{{[0-9]+}}
-; MIPS64-DAG: mtc1 $[[R11]], $f{{[0-9]+}}
-
-; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][{{[0-9]}}], $4
-; MIPS64R5-DAG: insert.d $w[[W0]][{{[0-9]}}], $5
-; MIPS64R5-DAG: insert.d $w[[W1:[0-9]+]][{{[0-9]}}], $6
-; MIPS64R5-DAG: insert.d $w[[W1]][{{[0-9]}}], $7
-
+; MIPS32-LABEL: float_4:
+; MIPS32:       # %bb.0:
+; MIPS32-NEXT:    mtc1 $7, $f0
+; MIPS32-NEXT:    mtc1 $6, $f1
+; MIPS32-NEXT:    lwc1 $f2, 28($sp)
+; MIPS32-NEXT:    lwc1 $f3, 24($sp)
+; MIPS32-NEXT:    add.s $f1, $f1, $f3
+; MIPS32-NEXT:    add.s $f0, $f0, $f2
+; MIPS32-NEXT:    mtc1 $5, $f2
+; MIPS32-NEXT:    lui $1, %hi(float_res_v4f32)
+; MIPS32-NEXT:    addiu $2, $1, %lo(float_res_v4f32)
+; MIPS32-NEXT:    lwc1 $f3, 20($sp)
+; MIPS32-NEXT:    swc1 $f0, 12($2)
+; MIPS32-NEXT:    swc1 $f1, 8($2)
+; MIPS32-NEXT:    add.s $f0, $f2, $f3
+; MIPS32-NEXT:    swc1 $f0, 4($2)
+; MIPS32-NEXT:    mtc1 $4, $f0
+; MIPS32-NEXT:    lwc1 $f1, 16($sp)
+; MIPS32-NEXT:    add.s $f0, $f0, $f1
+; MIPS32-NEXT:    swc1 $f0, %lo(float_res_v4f32)($1)
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
+;
+; MIPS64EB-LABEL: float_4:
+; MIPS64EB:       # %bb.0:
+; MIPS64EB-NEXT:    lui $1, %hi(%neg(%gp_rel(float_4)))
+; MIPS64EB-NEXT:    daddu $1, $1, $25
+; MIPS64EB-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(float_4)))
+; MIPS64EB-NEXT:    dsrl $2, $7, 32
+; MIPS64EB-NEXT:    sll $2, $2, 0
+; MIPS64EB-NEXT:    sll $3, $4, 0
+; MIPS64EB-NEXT:    sll $8, $6, 0
+; MIPS64EB-NEXT:    sll $7, $7, 0
+; MIPS64EB-NEXT:    mtc1 $8, $f0
+; MIPS64EB-NEXT:    mtc1 $3, $f1
+; MIPS64EB-NEXT:    mtc1 $2, $f2
+; MIPS64EB-NEXT:    dsrl $2, $5, 32
+; MIPS64EB-NEXT:    sll $2, $2, 0
+; MIPS64EB-NEXT:    mtc1 $2, $f3
+; MIPS64EB-NEXT:    add.s $f2, $f3, $f2
+; MIPS64EB-NEXT:    add.s $f0, $f1, $f0
+; MIPS64EB-NEXT:    mtc1 $7, $f1
+; MIPS64EB-NEXT:    sll $2, $5, 0
+; MIPS64EB-NEXT:    mtc1 $2, $f3
+; MIPS64EB-NEXT:    add.s $f1, $f3, $f1
+; MIPS64EB-NEXT:    dsrl $2, $6, 32
+; MIPS64EB-NEXT:    sll $2, $2, 0
+; MIPS64EB-NEXT:    ld $1, %got_disp(float_res_v4f32)($1)
+; MIPS64EB-NEXT:    swc1 $f1, 12($1)
+; MIPS64EB-NEXT:    swc1 $f0, 4($1)
+; MIPS64EB-NEXT:    swc1 $f2, 8($1)
+; MIPS64EB-NEXT:    mtc1 $2, $f0
+; MIPS64EB-NEXT:    dsrl $2, $4, 32
+; MIPS64EB-NEXT:    sll $2, $2, 0
+; MIPS64EB-NEXT:    mtc1 $2, $f1
+; MIPS64EB-NEXT:    add.s $f0, $f1, $f0
+; MIPS64EB-NEXT:    swc1 $f0, 0($1)
+; MIPS64EB-NEXT:    jr $ra
+; MIPS64EB-NEXT:    nop
+;
+; MIPS32R5-LABEL: float_4:
+; MIPS32R5:       # %bb.0:
+; MIPS32R5-NEXT:    ldi.b $w0, 0
+; MIPS32R5-NEXT:    lw $1, 20($sp)
+; MIPS32R5-NEXT:    lw $2, 16($sp)
+; MIPS32R5-NEXT:    move.v $w1, $w0
+; MIPS32R5-NEXT:    insert.w $w1[0], $2
+; MIPS32R5-NEXT:    insert.w $w1[1], $1
+; MIPS32R5-NEXT:    lw $1, 24($sp)
+; MIPS32R5-NEXT:    insert.w $w1[2], $1
+; MIPS32R5-NEXT:    lw $1, 28($sp)
+; MIPS32R5-NEXT:    insert.w $w1[3], $1
+; MIPS32R5-NEXT:    insert.w $w0[0], $4
+; MIPS32R5-NEXT:    insert.w $w0[1], $5
+; MIPS32R5-NEXT:    insert.w $w0[2], $6
+; MIPS32R5-NEXT:    insert.w $w0[3], $7
+; MIPS32R5-NEXT:    fadd.w $w0, $w0, $w1
+; MIPS32R5-NEXT:    lui $1, %hi(float_res_v4f32)
+; MIPS32R5-NEXT:    addiu $1, $1, %lo(float_res_v4f32)
+; MIPS32R5-NEXT:    st.w $w0, 0($1)
+; MIPS32R5-NEXT:    jr $ra
+; MIPS32R5-NEXT:    nop
+;
+; MIPS64R5EB-LABEL: float_4:
+; MIPS64R5EB:       # %bb.0:
+; MIPS64R5EB-NEXT:    lui $1, %hi(%neg(%gp_rel(float_4)))
+; MIPS64R5EB-NEXT:    daddu $1, $1, $25
+; MIPS64R5EB-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(float_4)))
+; MIPS64R5EB-NEXT:    ldi.b $w0, 0
+; MIPS64R5EB-NEXT:    move.v $w1, $w0
+; MIPS64R5EB-NEXT:    insert.d $w1[0], $6
+; MIPS64R5EB-NEXT:    insert.d $w1[1], $7
+; MIPS64R5EB-NEXT:    shf.w $w1, $w1, 177
+; MIPS64R5EB-NEXT:    insert.d $w0[0], $4
+; MIPS64R5EB-NEXT:    insert.d $w0[1], $5
+; MIPS64R5EB-NEXT:    shf.w $w0, $w0, 177
+; MIPS64R5EB-NEXT:    fadd.w $w0, $w0, $w1
+; MIPS64R5EB-NEXT:    ld $1, %got_disp(float_res_v4f32)($1)
+; MIPS64R5EB-NEXT:    st.w $w0, 0($1)
+; MIPS64R5EB-NEXT:    jr $ra
+; MIPS64R5EB-NEXT:    nop
+;
+; MIPS64EL-LABEL: float_4:
+; MIPS64EL:       # %bb.0:
+; MIPS64EL-NEXT:    lui $1, %hi(%neg(%gp_rel(float_4)))
+; MIPS64EL-NEXT:    daddu $1, $1, $25
+; MIPS64EL-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(float_4)))
+; MIPS64EL-NEXT:    dsrl $2, $7, 32
+; MIPS64EL-NEXT:    sll $2, $2, 0
+; MIPS64EL-NEXT:    sll $3, $4, 0
+; MIPS64EL-NEXT:    sll $8, $6, 0
+; MIPS64EL-NEXT:    sll $7, $7, 0
+; MIPS64EL-NEXT:    mtc1 $8, $f0
+; MIPS64EL-NEXT:    mtc1 $3, $f1
+; MIPS64EL-NEXT:    mtc1 $2, $f2
+; MIPS64EL-NEXT:    dsrl $2, $5, 32
+; MIPS64EL-NEXT:    sll $2, $2, 0
+; MIPS64EL-NEXT:    mtc1 $2, $f3
+; MIPS64EL-NEXT:    add.s $f2, $f3, $f2
+; MIPS64EL-NEXT:    add.s $f0, $f1, $f0
+; MIPS64EL-NEXT:    mtc1 $7, $f1
+; MIPS64EL-NEXT:    sll $2, $5, 0
+; MIPS64EL-NEXT:    mtc1 $2, $f3
+; MIPS64EL-NEXT:    add.s $f1, $f3, $f1
+; MIPS64EL-NEXT:    dsrl $2, $6, 32
+; MIPS64EL-NEXT:    sll $2, $2, 0
+; MIPS64EL-NEXT:    ld $1, %got_disp(float_res_v4f32)($1)
+; MIPS64EL-NEXT:    swc1 $f1, 8($1)
+; MIPS64EL-NEXT:    swc1 $f0, 0($1)
+; MIPS64EL-NEXT:    swc1 $f2, 12($1)
+; MIPS64EL-NEXT:    mtc1 $2, $f0
+; MIPS64EL-NEXT:    dsrl $2, $4, 32
+; MIPS64EL-NEXT:    sll $2, $2, 0
+; MIPS64EL-NEXT:    mtc1 $2, $f1
+; MIPS64EL-NEXT:    add.s $f0, $f1, $f0
+; MIPS64EL-NEXT:    swc1 $f0, 4($1)
+; MIPS64EL-NEXT:    jr $ra
+; MIPS64EL-NEXT:    nop
+;
+; MIPS64R5EL-LABEL: float_4:
+; MIPS64R5EL:       # %bb.0:
+; MIPS64R5EL-NEXT:    lui $1, %hi(%neg(%gp_rel(float_4)))
+; MIPS64R5EL-NEXT:    daddu $1, $1, $25
+; MIPS64R5EL-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(float_4)))
+; MIPS64R5EL-NEXT:    ldi.b $w0, 0
+; MIPS64R5EL-NEXT:    move.v $w1, $w0
+; MIPS64R5EL-NEXT:    insert.d $w1[0], $6
+; MIPS64R5EL-NEXT:    insert.d $w1[1], $7
+; MIPS64R5EL-NEXT:    insert.d $w0[0], $4
+; MIPS64R5EL-NEXT:    insert.d $w0[1], $5
+; MIPS64R5EL-NEXT:    fadd.w $w0, $w0, $w1
+; MIPS64R5EL-NEXT:    ld $1, %got_disp(float_res_v4f32)($1)
+; MIPS64R5EL-NEXT:    st.w $w0, 0($1)
+; MIPS64R5EL-NEXT:    jr $ra
+; MIPS64R5EL-NEXT:    nop
   %1 = fadd <4 x float> %a, %b
   store <4 x float> %1, <4 x float> * @float_res_v4f32
   ret void
@@ -512,44 +2882,117 @@ define void @float_4(<4 x float> %a, <4
 @double_v2f64 = external global <2 x double>
 
 define void @double_2(<2 x double> %a, <2 x double> %b) {
-; ALL-LABEL: double_2:
-; MIPS32-DAG: sw $7
-; MIPS32-DAG: sw $6
-; MIPS32-DAG: ldc1
-; MIPS32-DAG: ldc1
-; MIPS32:     add.d
-; MIPS32-DAG: sw $5
-; MIPS32-DAG: sw $4
-; MIPS32-DAG: ldc1
-; MIPS32-DAG: ldc1
-; MIPS32:     add.d
-
-; MIPS32R5-DAG: lw $[[R1:[0-9]+]], 16($sp)
-; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $[[R1]]
-; MIPS32R5-DAG: lw $[[R2:[0-9]+]], 20($sp)
-; MIPS32R5-DAG: insert.w $w[[W0]][1], $[[R2]]
-; MIPS32R5-DAG: lw $[[R3:[0-9]+]], 24($sp)
-; MIPS32R5-DAG: insert.w $w[[W0]][2], $[[R3]]
-; MIPS32R5-DAG: lw $[[R4:[0-9]+]], 28($sp)
-; MIPS32R5-DAG: insert.w $w[[W0]][3], $[[R4]]
-
-; MIPS32R5-DAG: insert.w $w[[W1:[0-9]+]][0], $4
-; MIPS32R5-DAG: insert.w $w[[W1]][1], $5
-; MIPS32R5-DAG: insert.w $w[[W1]][2], $6
-; MIPS32R5-DAG: insert.w $w[[W1]][3], $7
-
-; MIPS64-DAG: dmtc1 $6, $f[[R0:[0-9]+]]
-; MIPS64-DAG: dmtc1 $4, $f[[R1:[0-9]+]]
-; MIPS64-DAG: add.d $f[[R2:[0-9]+]], $f[[R1]], $f[[R0]]
-; MIPS64-DAG: dmtc1 $7, $f[[R3:[0-9]+]]
-; MIPS64-DAG: dmtc1 $5, $f[[R4:[0-9]+]]
-; MIPS64-DAG: add.d $f[[R5:[0-9]+]], $f[[R4]], $f[[R3]]
-
-; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][{{[0-9]}}], $4
-; MIPS64R5-DAG: insert.d $w[[W0]][{{[0-9]}}], $5
-; MIPS64R5-DAG: insert.d $w[[W1:[0-9]+]][{{[0-9]}}], $6
-; MIPS64R5-DAG: insert.d $w[[W1]][{{[0-9]}}], $7
-
+; MIPS32-LABEL: double_2:
+; MIPS32:       # %bb.0:
+; MIPS32-NEXT:    addiu $sp, $sp, -32
+; MIPS32-NEXT:    .cfi_def_cfa_offset 32
+; MIPS32-NEXT:    lw $1, 60($sp)
+; MIPS32-NEXT:    sw $1, 12($sp)
+; MIPS32-NEXT:    lw $1, 56($sp)
+; MIPS32-NEXT:    sw $1, 8($sp)
+; MIPS32-NEXT:    sw $7, 28($sp)
+; MIPS32-NEXT:    sw $6, 24($sp)
+; MIPS32-NEXT:    ldc1 $f0, 8($sp)
+; MIPS32-NEXT:    ldc1 $f2, 24($sp)
+; MIPS32-NEXT:    add.d $f0, $f2, $f0
+; MIPS32-NEXT:    lui $1, %hi(double_v2f64)
+; MIPS32-NEXT:    addiu $2, $1, %lo(double_v2f64)
+; MIPS32-NEXT:    lw $3, 52($sp)
+; MIPS32-NEXT:    sdc1 $f0, 8($2)
+; MIPS32-NEXT:    sw $3, 4($sp)
+; MIPS32-NEXT:    lw $2, 48($sp)
+; MIPS32-NEXT:    sw $2, 0($sp)
+; MIPS32-NEXT:    sw $5, 20($sp)
+; MIPS32-NEXT:    sw $4, 16($sp)
+; MIPS32-NEXT:    ldc1 $f0, 0($sp)
+; MIPS32-NEXT:    ldc1 $f2, 16($sp)
+; MIPS32-NEXT:    add.d $f0, $f2, $f0
+; MIPS32-NEXT:    sdc1 $f0, %lo(double_v2f64)($1)
+; MIPS32-NEXT:    addiu $sp, $sp, 32
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
+;
+; MIPS64-LABEL: double_2:
+; MIPS64:       # %bb.0:
+; MIPS64-NEXT:    lui $1, %hi(%neg(%gp_rel(double_2)))
+; MIPS64-NEXT:    daddu $1, $1, $25
+; MIPS64-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(double_2)))
+; MIPS64-NEXT:    dmtc1 $7, $f0
+; MIPS64-NEXT:    dmtc1 $5, $f1
+; MIPS64-NEXT:    add.d $f0, $f1, $f0
+; MIPS64-NEXT:    ld $1, %got_disp(double_v2f64)($1)
+; MIPS64-NEXT:    sdc1 $f0, 8($1)
+; MIPS64-NEXT:    dmtc1 $6, $f0
+; MIPS64-NEXT:    dmtc1 $4, $f1
+; MIPS64-NEXT:    add.d $f0, $f1, $f0
+; MIPS64-NEXT:    sdc1 $f0, 0($1)
+; MIPS64-NEXT:    jr $ra
+; MIPS64-NEXT:    nop
+;
+; MIPS32R5EB-LABEL: double_2:
+; MIPS32R5EB:       # %bb.0:
+; MIPS32R5EB-NEXT:    ldi.b $w0, 0
+; MIPS32R5EB-NEXT:    lw $1, 20($sp)
+; MIPS32R5EB-NEXT:    lw $2, 16($sp)
+; MIPS32R5EB-NEXT:    move.v $w1, $w0
+; MIPS32R5EB-NEXT:    insert.w $w1[0], $2
+; MIPS32R5EB-NEXT:    insert.w $w1[1], $1
+; MIPS32R5EB-NEXT:    lw $1, 24($sp)
+; MIPS32R5EB-NEXT:    insert.w $w0[0], $4
+; MIPS32R5EB-NEXT:    insert.w $w1[2], $1
+; MIPS32R5EB-NEXT:    lw $1, 28($sp)
+; MIPS32R5EB-NEXT:    insert.w $w1[3], $1
+; MIPS32R5EB-NEXT:    shf.w $w1, $w1, 177
+; MIPS32R5EB-NEXT:    insert.w $w0[1], $5
+; MIPS32R5EB-NEXT:    insert.w $w0[2], $6
+; MIPS32R5EB-NEXT:    insert.w $w0[3], $7
+; MIPS32R5EB-NEXT:    shf.w $w0, $w0, 177
+; MIPS32R5EB-NEXT:    fadd.d $w0, $w0, $w1
+; MIPS32R5EB-NEXT:    lui $1, %hi(double_v2f64)
+; MIPS32R5EB-NEXT:    addiu $1, $1, %lo(double_v2f64)
+; MIPS32R5EB-NEXT:    st.d $w0, 0($1)
+; MIPS32R5EB-NEXT:    jr $ra
+; MIPS32R5EB-NEXT:    nop
+;
+; MIPS64R5-LABEL: double_2:
+; MIPS64R5:       # %bb.0:
+; MIPS64R5-NEXT:    lui $1, %hi(%neg(%gp_rel(double_2)))
+; MIPS64R5-NEXT:    daddu $1, $1, $25
+; MIPS64R5-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(double_2)))
+; MIPS64R5-NEXT:    ldi.b $w0, 0
+; MIPS64R5-NEXT:    move.v $w1, $w0
+; MIPS64R5-NEXT:    insert.d $w1[0], $6
+; MIPS64R5-NEXT:    insert.d $w1[1], $7
+; MIPS64R5-NEXT:    insert.d $w0[0], $4
+; MIPS64R5-NEXT:    insert.d $w0[1], $5
+; MIPS64R5-NEXT:    fadd.d $w0, $w0, $w1
+; MIPS64R5-NEXT:    ld $1, %got_disp(double_v2f64)($1)
+; MIPS64R5-NEXT:    st.d $w0, 0($1)
+; MIPS64R5-NEXT:    jr $ra
+; MIPS64R5-NEXT:    nop
+;
+; MIPS32R5EL-LABEL: double_2:
+; MIPS32R5EL:       # %bb.0:
+; MIPS32R5EL-NEXT:    ldi.b $w0, 0
+; MIPS32R5EL-NEXT:    lw $1, 20($sp)
+; MIPS32R5EL-NEXT:    lw $2, 16($sp)
+; MIPS32R5EL-NEXT:    move.v $w1, $w0
+; MIPS32R5EL-NEXT:    insert.w $w1[0], $2
+; MIPS32R5EL-NEXT:    insert.w $w1[1], $1
+; MIPS32R5EL-NEXT:    lw $1, 24($sp)
+; MIPS32R5EL-NEXT:    insert.w $w1[2], $1
+; MIPS32R5EL-NEXT:    lw $1, 28($sp)
+; MIPS32R5EL-NEXT:    insert.w $w1[3], $1
+; MIPS32R5EL-NEXT:    insert.w $w0[0], $4
+; MIPS32R5EL-NEXT:    insert.w $w0[1], $5
+; MIPS32R5EL-NEXT:    insert.w $w0[2], $6
+; MIPS32R5EL-NEXT:    insert.w $w0[3], $7
+; MIPS32R5EL-NEXT:    fadd.d $w0, $w0, $w1
+; MIPS32R5EL-NEXT:    lui $1, %hi(double_v2f64)
+; MIPS32R5EL-NEXT:    addiu $1, $1, %lo(double_v2f64)
+; MIPS32R5EL-NEXT:    st.d $w0, 0($1)
+; MIPS32R5EL-NEXT:    jr $ra
+; MIPS32R5EL-NEXT:    nop
   %1 = fadd <2 x double> %a, %b
   store <2 x double> %1, <2 x double> * @double_v2f64
   ret void
@@ -573,171 +3016,566 @@ define void @double_2(<2 x double> %a, <
 
 @gv2i64 = global <2 x i64> <i64 0, i64 1>
 
-define <2 x i8> @ret_2_i8() {
-; ALL-LABEL: ret_2_i8:
-; MIPS32-DAG:   lhu $2
-; MIPS32R5-DAG: lhu $2
-
 ; FIXME: why is this lh instead of lhu on mips64?
-
-; MIPS64-DAG:  lh $2
-; MIPS64-DAG:  lh $2
+define <2 x i8> @ret_2_i8() {
+; MIPS32-LABEL: ret_2_i8:
+; MIPS32:       # %bb.0:
+; MIPS32-NEXT:    lui $1, %hi(gv2i8)
+; MIPS32-NEXT:    lhu $2, %lo(gv2i8)($1)
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
+;
+; MIPS64-LABEL: ret_2_i8:
+; MIPS64:       # %bb.0:
+; MIPS64-NEXT:    lui $1, %hi(%neg(%gp_rel(ret_2_i8)))
+; MIPS64-NEXT:    daddu $1, $1, $25
+; MIPS64-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(ret_2_i8)))
+; MIPS64-NEXT:    ld $1, %got_disp(gv2i8)($1)
+; MIPS64-NEXT:    lh $2, 0($1)
+; MIPS64-NEXT:    jr $ra
+; MIPS64-NEXT:    nop
+;
+; MIPS32R5-LABEL: ret_2_i8:
+; MIPS32R5:       # %bb.0:
+; MIPS32R5-NEXT:    lui $1, %hi(gv2i8)
+; MIPS32R5-NEXT:    lhu $2, %lo(gv2i8)($1)
+; MIPS32R5-NEXT:    jr $ra
+; MIPS32R5-NEXT:    nop
+;
+; MIPS64R5-LABEL: ret_2_i8:
+; MIPS64R5:       # %bb.0:
+; MIPS64R5-NEXT:    lui $1, %hi(%neg(%gp_rel(ret_2_i8)))
+; MIPS64R5-NEXT:    daddu $1, $1, $25
+; MIPS64R5-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(ret_2_i8)))
+; MIPS64R5-NEXT:    ld $1, %got_disp(gv2i8)($1)
+; MIPS64R5-NEXT:    lh $2, 0($1)
+; MIPS64R5-NEXT:    jr $ra
+; MIPS64R5-NEXT:    nop
   %1 = load <2 x i8>, <2 x i8> * @gv2i8
   ret <2 x i8> %1
 }
 
 define <4 x i8> @ret_4_i8() {
-; ALL-LABEL: ret_4_i8:
-; MIPS32-DAG:   lw $2
-; MIPS32R5-DAG: lw $2
-
-; MIPS64-DAG:   lw $2
-; MIPS64R5-DAG: lw $2
-
+; MIPS32-LABEL: ret_4_i8:
+; MIPS32:       # %bb.0:
+; MIPS32-NEXT:    lui $1, %hi(gv4i8)
+; MIPS32-NEXT:    lw $2, %lo(gv4i8)($1)
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
+;
+; MIPS64-LABEL: ret_4_i8:
+; MIPS64:       # %bb.0:
+; MIPS64-NEXT:    lui $1, %hi(%neg(%gp_rel(ret_4_i8)))
+; MIPS64-NEXT:    daddu $1, $1, $25
+; MIPS64-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(ret_4_i8)))
+; MIPS64-NEXT:    ld $1, %got_disp(gv4i8)($1)
+; MIPS64-NEXT:    lw $2, 0($1)
+; MIPS64-NEXT:    jr $ra
+; MIPS64-NEXT:    nop
+;
+; MIPS32R5-LABEL: ret_4_i8:
+; MIPS32R5:       # %bb.0:
+; MIPS32R5-NEXT:    lui $1, %hi(gv4i8)
+; MIPS32R5-NEXT:    lw $2, %lo(gv4i8)($1)
+; MIPS32R5-NEXT:    jr $ra
+; MIPS32R5-NEXT:    nop
+;
+; MIPS64R5-LABEL: ret_4_i8:
+; MIPS64R5:       # %bb.0:
+; MIPS64R5-NEXT:    lui $1, %hi(%neg(%gp_rel(ret_4_i8)))
+; MIPS64R5-NEXT:    daddu $1, $1, $25
+; MIPS64R5-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(ret_4_i8)))
+; MIPS64R5-NEXT:    ld $1, %got_disp(gv4i8)($1)
+; MIPS64R5-NEXT:    lw $2, 0($1)
+; MIPS64R5-NEXT:    jr $ra
+; MIPS64R5-NEXT:    nop
   %1 = load <4 x i8>, <4 x i8> * @gv4i8
   ret <4 x i8> %1
 }
 
 define <8 x i8> @ret_8_i8() {
-; ALL-LABEL: ret_8_i8:
-; MIPS32-DAG:   lw $2
-; MIPS32-DAG:   lw $3
-
-; MIPS32R5: copy_s.w $2, $w[[W0:[0-9]+]]
-; MIPS32R5: copy_s.w $3, $w[[W0]]
-
-; MIPS64-DAG:   ld $2
-; MIPS64R5-DAG: ld $2
+; MIPS32-LABEL: ret_8_i8:
+; MIPS32:       # %bb.0:
+; MIPS32-NEXT:    lui $1, %hi(gv8i8)
+; MIPS32-NEXT:    lw $2, %lo(gv8i8)($1)
+; MIPS32-NEXT:    addiu $1, $1, %lo(gv8i8)
+; MIPS32-NEXT:    lw $3, 4($1)
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
+;
+; MIPS64-LABEL: ret_8_i8:
+; MIPS64:       # %bb.0:
+; MIPS64-NEXT:    lui $1, %hi(%neg(%gp_rel(ret_8_i8)))
+; MIPS64-NEXT:    daddu $1, $1, $25
+; MIPS64-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(ret_8_i8)))
+; MIPS64-NEXT:    ld $1, %got_disp(gv8i8)($1)
+; MIPS64-NEXT:    ld $2, 0($1)
+; MIPS64-NEXT:    jr $ra
+; MIPS64-NEXT:    nop
+;
+; MIPS32R5EB-LABEL: ret_8_i8:
+; MIPS32R5EB:       # %bb.0:
+; MIPS32R5EB-NEXT:    addiu $sp, $sp, -32
+; MIPS32R5EB-NEXT:    .cfi_def_cfa_offset 32
+; MIPS32R5EB-NEXT:    sw $fp, 28($sp) # 4-byte Folded Spill
+; MIPS32R5EB-NEXT:    .cfi_offset 30, -4
+; MIPS32R5EB-NEXT:    move $fp, $sp
+; MIPS32R5EB-NEXT:    .cfi_def_cfa_register 30
+; MIPS32R5EB-NEXT:    addiu $1, $zero, -16
+; MIPS32R5EB-NEXT:    and $sp, $sp, $1
+; MIPS32R5EB-NEXT:    lui $1, %hi(gv8i8)
+; MIPS32R5EB-NEXT:    lw $2, %lo(gv8i8)($1)
+; MIPS32R5EB-NEXT:    sw $2, 4($sp)
+; MIPS32R5EB-NEXT:    addiu $1, $1, %lo(gv8i8)
+; MIPS32R5EB-NEXT:    lw $1, 4($1)
+; MIPS32R5EB-NEXT:    sw $1, 12($sp)
+; MIPS32R5EB-NEXT:    ld.w $w0, 0($sp)
+; MIPS32R5EB-NEXT:    copy_s.w $2, $w0[1]
+; MIPS32R5EB-NEXT:    copy_s.w $3, $w0[3]
+; MIPS32R5EB-NEXT:    move $sp, $fp
+; MIPS32R5EB-NEXT:    lw $fp, 28($sp) # 4-byte Folded Reload
+; MIPS32R5EB-NEXT:    addiu $sp, $sp, 32
+; MIPS32R5EB-NEXT:    jr $ra
+; MIPS32R5EB-NEXT:    nop
+;
+; MIPS64R5-LABEL: ret_8_i8:
+; MIPS64R5:       # %bb.0:
+; MIPS64R5-NEXT:    lui $1, %hi(%neg(%gp_rel(ret_8_i8)))
+; MIPS64R5-NEXT:    daddu $1, $1, $25
+; MIPS64R5-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(ret_8_i8)))
+; MIPS64R5-NEXT:    ld $1, %got_disp(gv8i8)($1)
+; MIPS64R5-NEXT:    ld $2, 0($1)
+; MIPS64R5-NEXT:    jr $ra
+; MIPS64R5-NEXT:    nop
+;
+; MIPS32R5EL-LABEL: ret_8_i8:
+; MIPS32R5EL:       # %bb.0:
+; MIPS32R5EL-NEXT:    addiu $sp, $sp, -32
+; MIPS32R5EL-NEXT:    .cfi_def_cfa_offset 32
+; MIPS32R5EL-NEXT:    sw $fp, 28($sp) # 4-byte Folded Spill
+; MIPS32R5EL-NEXT:    .cfi_offset 30, -4
+; MIPS32R5EL-NEXT:    move $fp, $sp
+; MIPS32R5EL-NEXT:    .cfi_def_cfa_register 30
+; MIPS32R5EL-NEXT:    addiu $1, $zero, -16
+; MIPS32R5EL-NEXT:    and $sp, $sp, $1
+; MIPS32R5EL-NEXT:    lui $1, %hi(gv8i8)
+; MIPS32R5EL-NEXT:    lw $2, %lo(gv8i8)($1)
+; MIPS32R5EL-NEXT:    sw $2, 0($sp)
+; MIPS32R5EL-NEXT:    addiu $1, $1, %lo(gv8i8)
+; MIPS32R5EL-NEXT:    lw $1, 4($1)
+; MIPS32R5EL-NEXT:    sw $1, 8($sp)
+; MIPS32R5EL-NEXT:    ld.w $w0, 0($sp)
+; MIPS32R5EL-NEXT:    copy_s.w $2, $w0[0]
+; MIPS32R5EL-NEXT:    copy_s.w $3, $w0[2]
+; MIPS32R5EL-NEXT:    move $sp, $fp
+; MIPS32R5EL-NEXT:    lw $fp, 28($sp) # 4-byte Folded Reload
+; MIPS32R5EL-NEXT:    addiu $sp, $sp, 32
+; MIPS32R5EL-NEXT:    jr $ra
+; MIPS32R5EL-NEXT:    nop
   %1 = load <8 x i8>, <8 x i8> * @gv8i8
   ret <8 x i8> %1
 }
 
 define <16 x i8> @ret_16_i8() {
-; ALL-LABEL: ret_16_i8:
-; MIPS32-DAG: lw $2
-; MIPS32-DAG: lw $3
-; MIPS32-DAG: lw $4
-; MIPS32-DAG: lw $5
-
-; MIPS32R5-DAG: copy_s.w $2, $w[[W0:[0-9]+]][0]
-; MIPS32R5-DAG: copy_s.w $3, $w[[W0]][1]
-; MIPS32R5-DAG: copy_s.w $4, $w[[W0]][2]
-; MIPS32R5-DAG: copy_s.w $5, $w[[W0]][3]
-
-; MIPS64-DAG: ld $2
-; MIPS64-DAG: ld $3
-
-; MIPS64R5-DAG: copy_s.d $2
-; MIPS64R5-DAG: copy_s.d $3
-
+; MIPS32-LABEL: ret_16_i8:
+; MIPS32:       # %bb.0:
+; MIPS32-NEXT:    lui $1, %hi(gv16i8)
+; MIPS32-NEXT:    lw $2, %lo(gv16i8)($1)
+; MIPS32-NEXT:    addiu $1, $1, %lo(gv16i8)
+; MIPS32-NEXT:    lw $3, 4($1)
+; MIPS32-NEXT:    lw $4, 8($1)
+; MIPS32-NEXT:    lw $5, 12($1)
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
+;
+; MIPS64-LABEL: ret_16_i8:
+; MIPS64:       # %bb.0:
+; MIPS64-NEXT:    lui $1, %hi(%neg(%gp_rel(ret_16_i8)))
+; MIPS64-NEXT:    daddu $1, $1, $25
+; MIPS64-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(ret_16_i8)))
+; MIPS64-NEXT:    ld $1, %got_disp(gv16i8)($1)
+; MIPS64-NEXT:    ld $2, 0($1)
+; MIPS64-NEXT:    ld $3, 8($1)
+; MIPS64-NEXT:    jr $ra
+; MIPS64-NEXT:    nop
+;
+; MIPS32R5-LABEL: ret_16_i8:
+; MIPS32R5:       # %bb.0:
+; MIPS32R5-NEXT:    lui $1, %hi(gv16i8)
+; MIPS32R5-NEXT:    addiu $1, $1, %lo(gv16i8)
+; MIPS32R5-NEXT:    ld.w $w0, 0($1)
+; MIPS32R5-NEXT:    copy_s.w $2, $w0[0]
+; MIPS32R5-NEXT:    copy_s.w $3, $w0[1]
+; MIPS32R5-NEXT:    copy_s.w $4, $w0[2]
+; MIPS32R5-NEXT:    copy_s.w $5, $w0[3]
+; MIPS32R5-NEXT:    jr $ra
+; MIPS32R5-NEXT:    nop
+;
+; MIPS64R5-LABEL: ret_16_i8:
+; MIPS64R5:       # %bb.0:
+; MIPS64R5-NEXT:    lui $1, %hi(%neg(%gp_rel(ret_16_i8)))
+; MIPS64R5-NEXT:    daddu $1, $1, $25
+; MIPS64R5-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(ret_16_i8)))
+; MIPS64R5-NEXT:    ld $1, %got_disp(gv16i8)($1)
+; MIPS64R5-NEXT:    ld.d $w0, 0($1)
+; MIPS64R5-NEXT:    copy_s.d $2, $w0[0]
+; MIPS64R5-NEXT:    copy_s.d $3, $w0[1]
+; MIPS64R5-NEXT:    jr $ra
+; MIPS64R5-NEXT:    nop
   %1 = load <16 x i8>, <16 x i8> * @gv16i8
   ret <16 x i8> %1
 }
 
 define <2 x i16> @ret_2_i16() {
-; ALL-LABEL: ret_2_i16:
-; MIPS32-DAG:   lw $2
-
-; MIPS32R5-DAG: lw $2
-
-; MIPS64-DAG:   lw $2
-
-; MIPS64R5-DAG: lw $2
+; MIPS32-LABEL: ret_2_i16:
+; MIPS32:       # %bb.0:
+; MIPS32-NEXT:    lui $1, %hi(gv2i16)
+; MIPS32-NEXT:    lw $2, %lo(gv2i16)($1)
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
+;
+; MIPS64-LABEL: ret_2_i16:
+; MIPS64:       # %bb.0:
+; MIPS64-NEXT:    lui $1, %hi(%neg(%gp_rel(ret_2_i16)))
+; MIPS64-NEXT:    daddu $1, $1, $25
+; MIPS64-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(ret_2_i16)))
+; MIPS64-NEXT:    ld $1, %got_disp(gv2i16)($1)
+; MIPS64-NEXT:    lw $2, 0($1)
+; MIPS64-NEXT:    jr $ra
+; MIPS64-NEXT:    nop
+;
+; MIPS32R5-LABEL: ret_2_i16:
+; MIPS32R5:       # %bb.0:
+; MIPS32R5-NEXT:    lui $1, %hi(gv2i16)
+; MIPS32R5-NEXT:    lw $2, %lo(gv2i16)($1)
+; MIPS32R5-NEXT:    jr $ra
+; MIPS32R5-NEXT:    nop
+;
+; MIPS64R5-LABEL: ret_2_i16:
+; MIPS64R5:       # %bb.0:
+; MIPS64R5-NEXT:    lui $1, %hi(%neg(%gp_rel(ret_2_i16)))
+; MIPS64R5-NEXT:    daddu $1, $1, $25
+; MIPS64R5-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(ret_2_i16)))
+; MIPS64R5-NEXT:    ld $1, %got_disp(gv2i16)($1)
+; MIPS64R5-NEXT:    lw $2, 0($1)
+; MIPS64R5-NEXT:    jr $ra
+; MIPS64R5-NEXT:    nop
   %1 = load <2 x i16>, <2 x i16> * @gv2i16
   ret <2 x i16> %1
 }
 
 define <4 x i16> @ret_4_i16() {
-; ALL-LABEL: ret_4_i16:
-; MIPS32-DAG: lw $2
-; MIPS32-DAG: lw $3
-
-; MIPS32R5-DAG: copy_s.w $2, $w[[W0:[0-9]+]]
-; MIPS32R5-DAG: copy_s.w $3, $w[[W0]]
-
-; MIPS64-DAG:   ld $2
-; MIPS64R5-DAG: ld $2
+; MIPS32-LABEL: ret_4_i16:
+; MIPS32:       # %bb.0:
+; MIPS32-NEXT:    lui $1, %hi(gv4i16)
+; MIPS32-NEXT:    lw $2, %lo(gv4i16)($1)
+; MIPS32-NEXT:    addiu $1, $1, %lo(gv4i16)
+; MIPS32-NEXT:    lw $3, 4($1)
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
+;
+; MIPS64-LABEL: ret_4_i16:
+; MIPS64:       # %bb.0:
+; MIPS64-NEXT:    lui $1, %hi(%neg(%gp_rel(ret_4_i16)))
+; MIPS64-NEXT:    daddu $1, $1, $25
+; MIPS64-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(ret_4_i16)))
+; MIPS64-NEXT:    ld $1, %got_disp(gv4i16)($1)
+; MIPS64-NEXT:    ld $2, 0($1)
+; MIPS64-NEXT:    jr $ra
+; MIPS64-NEXT:    nop
+;
+; MIPS32R5EB-LABEL: ret_4_i16:
+; MIPS32R5EB:       # %bb.0:
+; MIPS32R5EB-NEXT:    addiu $sp, $sp, -32
+; MIPS32R5EB-NEXT:    .cfi_def_cfa_offset 32
+; MIPS32R5EB-NEXT:    sw $fp, 28($sp) # 4-byte Folded Spill
+; MIPS32R5EB-NEXT:    .cfi_offset 30, -4
+; MIPS32R5EB-NEXT:    move $fp, $sp
+; MIPS32R5EB-NEXT:    .cfi_def_cfa_register 30
+; MIPS32R5EB-NEXT:    addiu $1, $zero, -16
+; MIPS32R5EB-NEXT:    and $sp, $sp, $1
+; MIPS32R5EB-NEXT:    lui $1, %hi(gv4i16)
+; MIPS32R5EB-NEXT:    lw $2, %lo(gv4i16)($1)
+; MIPS32R5EB-NEXT:    sw $2, 4($sp)
+; MIPS32R5EB-NEXT:    addiu $1, $1, %lo(gv4i16)
+; MIPS32R5EB-NEXT:    lw $1, 4($1)
+; MIPS32R5EB-NEXT:    sw $1, 12($sp)
+; MIPS32R5EB-NEXT:    ld.w $w0, 0($sp)
+; MIPS32R5EB-NEXT:    copy_s.w $2, $w0[1]
+; MIPS32R5EB-NEXT:    copy_s.w $3, $w0[3]
+; MIPS32R5EB-NEXT:    move $sp, $fp
+; MIPS32R5EB-NEXT:    lw $fp, 28($sp) # 4-byte Folded Reload
+; MIPS32R5EB-NEXT:    addiu $sp, $sp, 32
+; MIPS32R5EB-NEXT:    jr $ra
+; MIPS32R5EB-NEXT:    nop
+;
+; MIPS64R5-LABEL: ret_4_i16:
+; MIPS64R5:       # %bb.0:
+; MIPS64R5-NEXT:    lui $1, %hi(%neg(%gp_rel(ret_4_i16)))
+; MIPS64R5-NEXT:    daddu $1, $1, $25
+; MIPS64R5-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(ret_4_i16)))
+; MIPS64R5-NEXT:    ld $1, %got_disp(gv4i16)($1)
+; MIPS64R5-NEXT:    ld $2, 0($1)
+; MIPS64R5-NEXT:    jr $ra
+; MIPS64R5-NEXT:    nop
+;
+; MIPS32R5EL-LABEL: ret_4_i16:
+; MIPS32R5EL:       # %bb.0:
+; MIPS32R5EL-NEXT:    addiu $sp, $sp, -32
+; MIPS32R5EL-NEXT:    .cfi_def_cfa_offset 32
+; MIPS32R5EL-NEXT:    sw $fp, 28($sp) # 4-byte Folded Spill
+; MIPS32R5EL-NEXT:    .cfi_offset 30, -4
+; MIPS32R5EL-NEXT:    move $fp, $sp
+; MIPS32R5EL-NEXT:    .cfi_def_cfa_register 30
+; MIPS32R5EL-NEXT:    addiu $1, $zero, -16
+; MIPS32R5EL-NEXT:    and $sp, $sp, $1
+; MIPS32R5EL-NEXT:    lui $1, %hi(gv4i16)
+; MIPS32R5EL-NEXT:    lw $2, %lo(gv4i16)($1)
+; MIPS32R5EL-NEXT:    sw $2, 0($sp)
+; MIPS32R5EL-NEXT:    addiu $1, $1, %lo(gv4i16)
+; MIPS32R5EL-NEXT:    lw $1, 4($1)
+; MIPS32R5EL-NEXT:    sw $1, 8($sp)
+; MIPS32R5EL-NEXT:    ld.w $w0, 0($sp)
+; MIPS32R5EL-NEXT:    copy_s.w $2, $w0[0]
+; MIPS32R5EL-NEXT:    copy_s.w $3, $w0[2]
+; MIPS32R5EL-NEXT:    move $sp, $fp
+; MIPS32R5EL-NEXT:    lw $fp, 28($sp) # 4-byte Folded Reload
+; MIPS32R5EL-NEXT:    addiu $sp, $sp, 32
+; MIPS32R5EL-NEXT:    jr $ra
+; MIPS32R5EL-NEXT:    nop
   %1 = load <4 x i16>, <4 x i16> * @gv4i16
   ret <4 x i16> %1
 }
 
 define <8 x i16> @ret_8_i16() {
-; ALL-LABEL: ret_8_i16:
-; MIPS32-DAG: lw $2
-; MIPS32-DAG: lw $3
-; MIPS32-DAG: lw $4
-; MIPS32-DAG: lw $5
-
-; MIPS32R5-DAG: copy_s.w $2, $w[[W0:[0-9]+]][0]
-; MIPS32R5-DAG: copy_s.w $3, $w[[W0]][1]
-; MIPS32R5-DAG: copy_s.w $4, $w[[W0]][2]
-; MIPS32R5-DAG: copy_s.w $5, $w[[W0]][3]
-
-; MIPS64-DAG: ld $2
-; MIPS64-DAG: ld $3
-
-; MIPS64R5-DAG: copy_s.d $2
-; MIPS64R5-DAG: copy_s.d $3
-
+; MIPS32-LABEL: ret_8_i16:
+; MIPS32:       # %bb.0:
+; MIPS32-NEXT:    lui $1, %hi(gv8i16)
+; MIPS32-NEXT:    lw $2, %lo(gv8i16)($1)
+; MIPS32-NEXT:    addiu $1, $1, %lo(gv8i16)
+; MIPS32-NEXT:    lw $3, 4($1)
+; MIPS32-NEXT:    lw $4, 8($1)
+; MIPS32-NEXT:    lw $5, 12($1)
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
+;
+; MIPS64-LABEL: ret_8_i16:
+; MIPS64:       # %bb.0:
+; MIPS64-NEXT:    lui $1, %hi(%neg(%gp_rel(ret_8_i16)))
+; MIPS64-NEXT:    daddu $1, $1, $25
+; MIPS64-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(ret_8_i16)))
+; MIPS64-NEXT:    ld $1, %got_disp(gv8i16)($1)
+; MIPS64-NEXT:    ld $2, 0($1)
+; MIPS64-NEXT:    ld $3, 8($1)
+; MIPS64-NEXT:    jr $ra
+; MIPS64-NEXT:    nop
+;
+; MIPS32R5-LABEL: ret_8_i16:
+; MIPS32R5:       # %bb.0:
+; MIPS32R5-NEXT:    lui $1, %hi(gv8i16)
+; MIPS32R5-NEXT:    addiu $1, $1, %lo(gv8i16)
+; MIPS32R5-NEXT:    ld.w $w0, 0($1)
+; MIPS32R5-NEXT:    copy_s.w $2, $w0[0]
+; MIPS32R5-NEXT:    copy_s.w $3, $w0[1]
+; MIPS32R5-NEXT:    copy_s.w $4, $w0[2]
+; MIPS32R5-NEXT:    copy_s.w $5, $w0[3]
+; MIPS32R5-NEXT:    jr $ra
+; MIPS32R5-NEXT:    nop
+;
+; MIPS64R5-LABEL: ret_8_i16:
+; MIPS64R5:       # %bb.0:
+; MIPS64R5-NEXT:    lui $1, %hi(%neg(%gp_rel(ret_8_i16)))
+; MIPS64R5-NEXT:    daddu $1, $1, $25
+; MIPS64R5-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(ret_8_i16)))
+; MIPS64R5-NEXT:    ld $1, %got_disp(gv8i16)($1)
+; MIPS64R5-NEXT:    ld.d $w0, 0($1)
+; MIPS64R5-NEXT:    copy_s.d $2, $w0[0]
+; MIPS64R5-NEXT:    copy_s.d $3, $w0[1]
+; MIPS64R5-NEXT:    jr $ra
+; MIPS64R5-NEXT:    nop
   %1 = load <8 x i16>, <8 x i16> * @gv8i16
   ret <8 x i16> %1
 }
 
 define <2 x i32> @ret_2_i32() {
-; ALL-LABEL: ret_2_i32:
-; MIPS32-DAG: lw $2
-; MIPS32-DAG: lw $3
-
-; MIPS32R5-DAG: copy_s.w $2, $w[[W0:[0-9]+]]
-; MIPS32R5-DAG: copy_s.w $3, $w[[W0]]
-
-; MIPS64-DAG:   ld $2
-; MIPS64R5-DAG: ld $2
-
+; MIPS32-LABEL: ret_2_i32:
+; MIPS32:       # %bb.0:
+; MIPS32-NEXT:    lui $1, %hi(gv2i32)
+; MIPS32-NEXT:    lw $2, %lo(gv2i32)($1)
+; MIPS32-NEXT:    addiu $1, $1, %lo(gv2i32)
+; MIPS32-NEXT:    lw $3, 4($1)
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
+;
+; MIPS64-LABEL: ret_2_i32:
+; MIPS64:       # %bb.0:
+; MIPS64-NEXT:    lui $1, %hi(%neg(%gp_rel(ret_2_i32)))
+; MIPS64-NEXT:    daddu $1, $1, $25
+; MIPS64-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(ret_2_i32)))
+; MIPS64-NEXT:    ld $1, %got_disp(gv2i32)($1)
+; MIPS64-NEXT:    ld $2, 0($1)
+; MIPS64-NEXT:    jr $ra
+; MIPS64-NEXT:    nop
+;
+; MIPS32R5EB-LABEL: ret_2_i32:
+; MIPS32R5EB:       # %bb.0:
+; MIPS32R5EB-NEXT:    addiu $sp, $sp, -32
+; MIPS32R5EB-NEXT:    .cfi_def_cfa_offset 32
+; MIPS32R5EB-NEXT:    sw $fp, 28($sp) # 4-byte Folded Spill
+; MIPS32R5EB-NEXT:    .cfi_offset 30, -4
+; MIPS32R5EB-NEXT:    move $fp, $sp
+; MIPS32R5EB-NEXT:    .cfi_def_cfa_register 30
+; MIPS32R5EB-NEXT:    addiu $1, $zero, -16
+; MIPS32R5EB-NEXT:    and $sp, $sp, $1
+; MIPS32R5EB-NEXT:    lui $1, %hi(gv2i32)
+; MIPS32R5EB-NEXT:    lw $2, %lo(gv2i32)($1)
+; MIPS32R5EB-NEXT:    sw $2, 4($sp)
+; MIPS32R5EB-NEXT:    addiu $1, $1, %lo(gv2i32)
+; MIPS32R5EB-NEXT:    lw $1, 4($1)
+; MIPS32R5EB-NEXT:    sw $1, 12($sp)
+; MIPS32R5EB-NEXT:    ld.w $w0, 0($sp)
+; MIPS32R5EB-NEXT:    copy_s.w $2, $w0[1]
+; MIPS32R5EB-NEXT:    copy_s.w $3, $w0[3]
+; MIPS32R5EB-NEXT:    move $sp, $fp
+; MIPS32R5EB-NEXT:    lw $fp, 28($sp) # 4-byte Folded Reload
+; MIPS32R5EB-NEXT:    addiu $sp, $sp, 32
+; MIPS32R5EB-NEXT:    jr $ra
+; MIPS32R5EB-NEXT:    nop
+;
+; MIPS64R5-LABEL: ret_2_i32:
+; MIPS64R5:       # %bb.0:
+; MIPS64R5-NEXT:    lui $1, %hi(%neg(%gp_rel(ret_2_i32)))
+; MIPS64R5-NEXT:    daddu $1, $1, $25
+; MIPS64R5-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(ret_2_i32)))
+; MIPS64R5-NEXT:    ld $1, %got_disp(gv2i32)($1)
+; MIPS64R5-NEXT:    ld $2, 0($1)
+; MIPS64R5-NEXT:    jr $ra
+; MIPS64R5-NEXT:    nop
+;
+; MIPS32R5EL-LABEL: ret_2_i32:
+; MIPS32R5EL:       # %bb.0:
+; MIPS32R5EL-NEXT:    addiu $sp, $sp, -32
+; MIPS32R5EL-NEXT:    .cfi_def_cfa_offset 32
+; MIPS32R5EL-NEXT:    sw $fp, 28($sp) # 4-byte Folded Spill
+; MIPS32R5EL-NEXT:    .cfi_offset 30, -4
+; MIPS32R5EL-NEXT:    move $fp, $sp
+; MIPS32R5EL-NEXT:    .cfi_def_cfa_register 30
+; MIPS32R5EL-NEXT:    addiu $1, $zero, -16
+; MIPS32R5EL-NEXT:    and $sp, $sp, $1
+; MIPS32R5EL-NEXT:    lui $1, %hi(gv2i32)
+; MIPS32R5EL-NEXT:    lw $2, %lo(gv2i32)($1)
+; MIPS32R5EL-NEXT:    sw $2, 0($sp)
+; MIPS32R5EL-NEXT:    addiu $1, $1, %lo(gv2i32)
+; MIPS32R5EL-NEXT:    lw $1, 4($1)
+; MIPS32R5EL-NEXT:    sw $1, 8($sp)
+; MIPS32R5EL-NEXT:    ld.w $w0, 0($sp)
+; MIPS32R5EL-NEXT:    copy_s.w $2, $w0[0]
+; MIPS32R5EL-NEXT:    copy_s.w $3, $w0[2]
+; MIPS32R5EL-NEXT:    move $sp, $fp
+; MIPS32R5EL-NEXT:    lw $fp, 28($sp) # 4-byte Folded Reload
+; MIPS32R5EL-NEXT:    addiu $sp, $sp, 32
+; MIPS32R5EL-NEXT:    jr $ra
+; MIPS32R5EL-NEXT:    nop
   %1 = load <2 x i32>, <2 x i32> * @gv2i32
   ret <2 x i32> %1
 }
 
 define <4 x i32> @ret_4_i32() {
-; ALL-LABEL: ret_4_i32:
-; MIPS32-DAG: lw $2
-; MIPS32-DAG: lw $3
-; MIPS32-DAG: lw $4
-; MIPS32-DAG: lw $5
-
-; MIPS32R5-DAG: copy_s.w $2, $w[[W0:[0-9]+]][0]
-; MIPS32R5-DAG: copy_s.w $3, $w[[W0]][1]
-; MIPS32R5-DAG: copy_s.w $4, $w[[W0]][2]
-; MIPS32R5-DAG: copy_s.w $5, $w[[W0]][3]
-
-; MIPS64-DAG: ld $2
-; MIPS64-DAG: ld $3
-
-; MIPS64R5-DAG: copy_s.d $2, $w[[W0:[0-9]+]]
-; MIPS64R5-DAG: copy_s.d $3, $w[[W0]]
-
+; MIPS32-LABEL: ret_4_i32:
+; MIPS32:       # %bb.0:
+; MIPS32-NEXT:    lui $1, %hi(gv4i32)
+; MIPS32-NEXT:    lw $2, %lo(gv4i32)($1)
+; MIPS32-NEXT:    addiu $1, $1, %lo(gv4i32)
+; MIPS32-NEXT:    lw $3, 4($1)
+; MIPS32-NEXT:    lw $4, 8($1)
+; MIPS32-NEXT:    lw $5, 12($1)
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
+;
+; MIPS64-LABEL: ret_4_i32:
+; MIPS64:       # %bb.0:
+; MIPS64-NEXT:    lui $1, %hi(%neg(%gp_rel(ret_4_i32)))
+; MIPS64-NEXT:    daddu $1, $1, $25
+; MIPS64-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(ret_4_i32)))
+; MIPS64-NEXT:    ld $1, %got_disp(gv4i32)($1)
+; MIPS64-NEXT:    ld $2, 0($1)
+; MIPS64-NEXT:    ld $3, 8($1)
+; MIPS64-NEXT:    jr $ra
+; MIPS64-NEXT:    nop
+;
+; MIPS32R5-LABEL: ret_4_i32:
+; MIPS32R5:       # %bb.0:
+; MIPS32R5-NEXT:    lui $1, %hi(gv4i32)
+; MIPS32R5-NEXT:    addiu $1, $1, %lo(gv4i32)
+; MIPS32R5-NEXT:    ld.w $w0, 0($1)
+; MIPS32R5-NEXT:    copy_s.w $2, $w0[0]
+; MIPS32R5-NEXT:    copy_s.w $3, $w0[1]
+; MIPS32R5-NEXT:    copy_s.w $4, $w0[2]
+; MIPS32R5-NEXT:    copy_s.w $5, $w0[3]
+; MIPS32R5-NEXT:    jr $ra
+; MIPS32R5-NEXT:    nop
+;
+; MIPS64R5-LABEL: ret_4_i32:
+; MIPS64R5:       # %bb.0:
+; MIPS64R5-NEXT:    lui $1, %hi(%neg(%gp_rel(ret_4_i32)))
+; MIPS64R5-NEXT:    daddu $1, $1, $25
+; MIPS64R5-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(ret_4_i32)))
+; MIPS64R5-NEXT:    ld $1, %got_disp(gv4i32)($1)
+; MIPS64R5-NEXT:    ld.d $w0, 0($1)
+; MIPS64R5-NEXT:    copy_s.d $2, $w0[0]
+; MIPS64R5-NEXT:    copy_s.d $3, $w0[1]
+; MIPS64R5-NEXT:    jr $ra
+; MIPS64R5-NEXT:    nop
   %1 = load <4 x i32>, <4 x i32> * @gv4i32
   ret <4 x i32> %1
 }
 
 define <2 x i64> @ret_2_i64() {
-; ALL-LABEL: ret_2_i64:
-; MIPS32-DAG: lw $2
-; MIPS32-DAG: lw $3
-; MIPS32-DAG: lw $4
-; MIPS32-DAG: lw $5
-
-; MIPS32R5-DAG: copy_s.w $2, $w[[W0:[0-9]+]][0]
-; MIPS32R5-DAG: copy_s.w $3, $w[[W0]][1]
-; MIPS32R5-DAG: copy_s.w $4, $w[[W0]][2]
-; MIPS32R5-DAG: copy_s.w $5, $w[[W0]][3]
-
-; MIPS64-DAG: ld $2
-; MIPS64-DAG: ld $3
-
-; MIPS64R5-DAG: copy_s.d $2, $w[[W0:[0-9]+]]
-; MIPS64R5-DAG: copy_s.d $3, $w[[W0]]
-
+; MIPS32-LABEL: ret_2_i64:
+; MIPS32:       # %bb.0:
+; MIPS32-NEXT:    lui $1, %hi(gv2i64)
+; MIPS32-NEXT:    lw $2, %lo(gv2i64)($1)
+; MIPS32-NEXT:    addiu $1, $1, %lo(gv2i64)
+; MIPS32-NEXT:    lw $3, 4($1)
+; MIPS32-NEXT:    lw $4, 8($1)
+; MIPS32-NEXT:    lw $5, 12($1)
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
+;
+; MIPS64-LABEL: ret_2_i64:
+; MIPS64:       # %bb.0:
+; MIPS64-NEXT:    lui $1, %hi(%neg(%gp_rel(ret_2_i64)))
+; MIPS64-NEXT:    daddu $1, $1, $25
+; MIPS64-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(ret_2_i64)))
+; MIPS64-NEXT:    ld $1, %got_disp(gv2i64)($1)
+; MIPS64-NEXT:    ld $2, 0($1)
+; MIPS64-NEXT:    ld $3, 8($1)
+; MIPS64-NEXT:    jr $ra
+; MIPS64-NEXT:    nop
+;
+; MIPS32R5-LABEL: ret_2_i64:
+; MIPS32R5:       # %bb.0:
+; MIPS32R5-NEXT:    lui $1, %hi(gv2i64)
+; MIPS32R5-NEXT:    addiu $1, $1, %lo(gv2i64)
+; MIPS32R5-NEXT:    ld.w $w0, 0($1)
+; MIPS32R5-NEXT:    copy_s.w $2, $w0[0]
+; MIPS32R5-NEXT:    copy_s.w $3, $w0[1]
+; MIPS32R5-NEXT:    copy_s.w $4, $w0[2]
+; MIPS32R5-NEXT:    copy_s.w $5, $w0[3]
+; MIPS32R5-NEXT:    jr $ra
+; MIPS32R5-NEXT:    nop
+;
+; MIPS64R5-LABEL: ret_2_i64:
+; MIPS64R5:       # %bb.0:
+; MIPS64R5-NEXT:    lui $1, %hi(%neg(%gp_rel(ret_2_i64)))
+; MIPS64R5-NEXT:    daddu $1, $1, $25
+; MIPS64R5-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(ret_2_i64)))
+; MIPS64R5-NEXT:    ld $1, %got_disp(gv2i64)($1)
+; MIPS64R5-NEXT:    ld.d $w0, 0($1)
+; MIPS64R5-NEXT:    copy_s.d $2, $w0[0]
+; MIPS64R5-NEXT:    copy_s.d $3, $w0[1]
+; MIPS64R5-NEXT:    jr $ra
+; MIPS64R5-NEXT:    nop
   %1 = load <2 x i64>, <2 x i64> * @gv2i64
   ret <2 x i64> %1
 }
@@ -746,40 +3584,100 @@ define <2 x i64> @ret_2_i64() {
 @gv4f32 = global <4 x float> <float 0.0, float 0.0, float 0.0, float 0.0>
 
 define <2 x float> @ret_float_2() {
+; MIPS32-LABEL: ret_float_2:
+; MIPS32:       # %bb.0: # %entry
+; MIPS32-NEXT:    lui $1, %hi(gv2f32)
+; MIPS32-NEXT:    addiu $2, $1, %lo(gv2f32)
+; MIPS32-NEXT:    lwc1 $f0, 4($2)
+; MIPS32-NEXT:    swc1 $f0, 4($4)
+; MIPS32-NEXT:    lwc1 $f0, %lo(gv2f32)($1)
+; MIPS32-NEXT:    swc1 $f0, 0($4)
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
+;
+; MIPS64-LABEL: ret_float_2:
+; MIPS64:       # %bb.0: # %entry
+; MIPS64-NEXT:    lui $1, %hi(%neg(%gp_rel(ret_float_2)))
+; MIPS64-NEXT:    daddu $1, $1, $25
+; MIPS64-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(ret_float_2)))
+; MIPS64-NEXT:    ld $1, %got_disp(gv2f32)($1)
+; MIPS64-NEXT:    ld $2, 0($1)
+; MIPS64-NEXT:    jr $ra
+; MIPS64-NEXT:    nop
+;
+; MIPS32R5-LABEL: ret_float_2:
+; MIPS32R5:       # %bb.0: # %entry
+; MIPS32R5-NEXT:    lui $1, %hi(gv2f32)
+; MIPS32R5-NEXT:    addiu $2, $1, %lo(gv2f32)
+; MIPS32R5-NEXT:    lwc1 $f0, 4($2)
+; MIPS32R5-NEXT:    swc1 $f0, 4($4)
+; MIPS32R5-NEXT:    lwc1 $f0, %lo(gv2f32)($1)
+; MIPS32R5-NEXT:    swc1 $f0, 0($4)
+; MIPS32R5-NEXT:    jr $ra
+; MIPS32R5-NEXT:    nop
+;
+; MIPS64R5-LABEL: ret_float_2:
+; MIPS64R5:       # %bb.0: # %entry
+; MIPS64R5-NEXT:    lui $1, %hi(%neg(%gp_rel(ret_float_2)))
+; MIPS64R5-NEXT:    daddu $1, $1, $25
+; MIPS64R5-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(ret_float_2)))
+; MIPS64R5-NEXT:    ld $1, %got_disp(gv2f32)($1)
+; MIPS64R5-NEXT:    ld $2, 0($1)
+; MIPS64R5-NEXT:    jr $ra
+; MIPS64R5-NEXT:    nop
 entry:
-; ALL-LABEL: ret_float_2:
-
-; MIPS32-DAG: swc1 $f{{[0-9]+}}, 0($4)
-; MIPS32-DAG: swc1 $f{{[0-9]+}}, 4($4)
-
-; MIPS32R5-DAG: swc1 $f{{[0-9]+}}, 0($4)
-; MIPS32R5-DAG: swc1 $f{{[0-9]+}}, 4($4)
-
-; MIPS64: ld $2
-
-; MIPS64R5: ld $2
-
   %0 = load <2 x float>, <2 x float> * @gv2f32
   ret <2 x float> %0
 }
 
 define <4 x float> @ret_float_4() {
+; MIPS32-LABEL: ret_float_4:
+; MIPS32:       # %bb.0: # %entry
+; MIPS32-NEXT:    lui $1, %hi(gv4f32)
+; MIPS32-NEXT:    addiu $2, $1, %lo(gv4f32)
+; MIPS32-NEXT:    lwc1 $f0, 12($2)
+; MIPS32-NEXT:    swc1 $f0, 12($4)
+; MIPS32-NEXT:    lwc1 $f0, 8($2)
+; MIPS32-NEXT:    swc1 $f0, 8($4)
+; MIPS32-NEXT:    lwc1 $f0, 4($2)
+; MIPS32-NEXT:    swc1 $f0, 4($4)
+; MIPS32-NEXT:    lwc1 $f0, %lo(gv4f32)($1)
+; MIPS32-NEXT:    swc1 $f0, 0($4)
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
+;
+; MIPS64-LABEL: ret_float_4:
+; MIPS64:       # %bb.0: # %entry
+; MIPS64-NEXT:    lui $1, %hi(%neg(%gp_rel(ret_float_4)))
+; MIPS64-NEXT:    daddu $1, $1, $25
+; MIPS64-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(ret_float_4)))
+; MIPS64-NEXT:    ld $1, %got_disp(gv4f32)($1)
+; MIPS64-NEXT:    ld $2, 0($1)
+; MIPS64-NEXT:    ld $3, 8($1)
+; MIPS64-NEXT:    jr $ra
+; MIPS64-NEXT:    nop
+;
+; MIPS32R5-LABEL: ret_float_4:
+; MIPS32R5:       # %bb.0: # %entry
+; MIPS32R5-NEXT:    lui $1, %hi(gv4f32)
+; MIPS32R5-NEXT:    addiu $1, $1, %lo(gv4f32)
+; MIPS32R5-NEXT:    ld.w $w0, 0($1)
+; MIPS32R5-NEXT:    st.w $w0, 0($4)
+; MIPS32R5-NEXT:    jr $ra
+; MIPS32R5-NEXT:    nop
+;
+; MIPS64R5-LABEL: ret_float_4:
+; MIPS64R5:       # %bb.0: # %entry
+; MIPS64R5-NEXT:    lui $1, %hi(%neg(%gp_rel(ret_float_4)))
+; MIPS64R5-NEXT:    daddu $1, $1, $25
+; MIPS64R5-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(ret_float_4)))
+; MIPS64R5-NEXT:    ld $1, %got_disp(gv4f32)($1)
+; MIPS64R5-NEXT:    ld.d $w0, 0($1)
+; MIPS64R5-NEXT:    copy_s.d $2, $w0[0]
+; MIPS64R5-NEXT:    copy_s.d $3, $w0[1]
+; MIPS64R5-NEXT:    jr $ra
+; MIPS64R5-NEXT:    nop
 entry:
-; ALL-LABEL: ret_float_4:
-
-; MIPS32-DAG: swc1 $f{{[0-9]+}}, 0($4)
-; MIPS32-DAG: swc1 $f{{[0-9]+}}, 4($4)
-; MIPS32-DAG: swc1 $f{{[0-9]+}}, 8($4)
-; MIPS32-DAG: swc1 $f{{[0-9]+}}, 12($4)
-
-; MIPS32R5: st.w $w{{[0-9]+}}, 0($4)
-
-; MIPS64-DAG: ld $2
-; MIPS64-DAG: ld $3
-
-; MIPS64R5-DAG: copy_s.d $2, $w{{[0-9]+}}[0]
-; MIPS64R5-DAG: copy_s.d $3, $w{{[0-9]+}}[1]
-
   %0 = load <4 x float>, <4 x float> * @gv4f32
   ret <4 x float> %0
 }
@@ -787,20 +3685,49 @@ entry:
 @gv2f64 = global <2 x double> <double 0.0, double 0.0>
 
 define <2 x double> @ret_double_2() {
+; MIPS32-LABEL: ret_double_2:
+; MIPS32:       # %bb.0: # %entry
+; MIPS32-NEXT:    lui $1, %hi(gv2f64)
+; MIPS32-NEXT:    addiu $2, $1, %lo(gv2f64)
+; MIPS32-NEXT:    ldc1 $f0, 8($2)
+; MIPS32-NEXT:    sdc1 $f0, 8($4)
+; MIPS32-NEXT:    ldc1 $f0, %lo(gv2f64)($1)
+; MIPS32-NEXT:    sdc1 $f0, 0($4)
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
+;
+; MIPS64-LABEL: ret_double_2:
+; MIPS64:       # %bb.0: # %entry
+; MIPS64-NEXT:    lui $1, %hi(%neg(%gp_rel(ret_double_2)))
+; MIPS64-NEXT:    daddu $1, $1, $25
+; MIPS64-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(ret_double_2)))
+; MIPS64-NEXT:    ld $1, %got_disp(gv2f64)($1)
+; MIPS64-NEXT:    ld $2, 0($1)
+; MIPS64-NEXT:    ld $3, 8($1)
+; MIPS64-NEXT:    jr $ra
+; MIPS64-NEXT:    nop
+;
+; MIPS32R5-LABEL: ret_double_2:
+; MIPS32R5:       # %bb.0: # %entry
+; MIPS32R5-NEXT:    lui $1, %hi(gv2f64)
+; MIPS32R5-NEXT:    addiu $1, $1, %lo(gv2f64)
+; MIPS32R5-NEXT:    ld.d $w0, 0($1)
+; MIPS32R5-NEXT:    st.d $w0, 0($4)
+; MIPS32R5-NEXT:    jr $ra
+; MIPS32R5-NEXT:    nop
+;
+; MIPS64R5-LABEL: ret_double_2:
+; MIPS64R5:       # %bb.0: # %entry
+; MIPS64R5-NEXT:    lui $1, %hi(%neg(%gp_rel(ret_double_2)))
+; MIPS64R5-NEXT:    daddu $1, $1, $25
+; MIPS64R5-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(ret_double_2)))
+; MIPS64R5-NEXT:    ld $1, %got_disp(gv2f64)($1)
+; MIPS64R5-NEXT:    ld.d $w0, 0($1)
+; MIPS64R5-NEXT:    copy_s.d $2, $w0[0]
+; MIPS64R5-NEXT:    copy_s.d $3, $w0[1]
+; MIPS64R5-NEXT:    jr $ra
+; MIPS64R5-NEXT:    nop
 entry:
-; ALL-LABEL: ret_double_2:
-
-; MIPS32-DAG: sdc1 $f{{[0-9]+}}, 8($4)
-; MIPS32-DAG: sdc1 $f{{[0-9]+}}, 0($4)
-
-; MIPS32R5: st.d $w{{[0-9]+}}, 0($4)
-
-; MIPS64-DAG: ld $2
-; MIPS64-DAG: ld $2
-
-; MIPS64R5-DAG: copy_s.d $2, $w{{[0-9]+}}[0]
-; MIPS64R5-DAG: copy_s.d $3, $w{{[0-9]+}}[1]
-
   %0 = load <2 x double>, <2 x double> * @gv2f64
   ret <2 x double> %0
 }
@@ -808,467 +3735,2107 @@ entry:
 ; Test argument lowering and call result lowering.
 
 define void @call_i8_2() {
+; MIPS32EB-LABEL: call_i8_2:
+; MIPS32EB:       # %bb.0: # %entry
+; MIPS32EB-NEXT:    addiu $sp, $sp, -24
+; MIPS32EB-NEXT:    .cfi_def_cfa_offset 24
+; MIPS32EB-NEXT:    sw $ra, 20($sp) # 4-byte Folded Spill
+; MIPS32EB-NEXT:    .cfi_offset 31, -4
+; MIPS32EB-NEXT:    addiu $4, $zero, 1543
+; MIPS32EB-NEXT:    addiu $5, $zero, 3080
+; MIPS32EB-NEXT:    jal i8_2
+; MIPS32EB-NEXT:    nop
+; MIPS32EB-NEXT:    srl $1, $2, 16
+; MIPS32EB-NEXT:    lui $3, %hi(gv2i8)
+; MIPS32EB-NEXT:    addiu $4, $3, %lo(gv2i8)
+; MIPS32EB-NEXT:    sb $1, 1($4)
+; MIPS32EB-NEXT:    srl $1, $2, 24
+; MIPS32EB-NEXT:    sb $1, %lo(gv2i8)($3)
+; MIPS32EB-NEXT:    lw $ra, 20($sp) # 4-byte Folded Reload
+; MIPS32EB-NEXT:    addiu $sp, $sp, 24
+; MIPS32EB-NEXT:    jr $ra
+; MIPS32EB-NEXT:    nop
+;
+; MIPS64EB-LABEL: call_i8_2:
+; MIPS64EB:       # %bb.0: # %entry
+; MIPS64EB-NEXT:    daddiu $sp, $sp, -16
+; MIPS64EB-NEXT:    .cfi_def_cfa_offset 16
+; MIPS64EB-NEXT:    sd $ra, 8($sp) # 8-byte Folded Spill
+; MIPS64EB-NEXT:    sd $gp, 0($sp) # 8-byte Folded Spill
+; MIPS64EB-NEXT:    .cfi_offset 31, -8
+; MIPS64EB-NEXT:    .cfi_offset 28, -16
+; MIPS64EB-NEXT:    lui $1, %hi(%neg(%gp_rel(call_i8_2)))
+; MIPS64EB-NEXT:    daddu $1, $1, $25
+; MIPS64EB-NEXT:    daddiu $gp, $1, %lo(%neg(%gp_rel(call_i8_2)))
+; MIPS64EB-NEXT:    ld $25, %call16(i8_2)($gp)
+; MIPS64EB-NEXT:    daddiu $4, $zero, 1543
+; MIPS64EB-NEXT:    daddiu $5, $zero, 3080
+; MIPS64EB-NEXT:    jalr $25
+; MIPS64EB-NEXT:    nop
+; MIPS64EB-NEXT:    dsrl $1, $2, 48
+; MIPS64EB-NEXT:    ld $3, %got_disp(gv2i8)($gp)
+; MIPS64EB-NEXT:    sb $1, 1($3)
+; MIPS64EB-NEXT:    dsrl $1, $2, 56
+; MIPS64EB-NEXT:    sb $1, 0($3)
+; MIPS64EB-NEXT:    ld $gp, 0($sp) # 8-byte Folded Reload
+; MIPS64EB-NEXT:    ld $ra, 8($sp) # 8-byte Folded Reload
+; MIPS64EB-NEXT:    daddiu $sp, $sp, 16
+; MIPS64EB-NEXT:    jr $ra
+; MIPS64EB-NEXT:    nop
+;
+; MIPS32R5EB-LABEL: call_i8_2:
+; MIPS32R5EB:       # %bb.0: # %entry
+; MIPS32R5EB-NEXT:    addiu $sp, $sp, -32
+; MIPS32R5EB-NEXT:    .cfi_def_cfa_offset 32
+; MIPS32R5EB-NEXT:    sw $ra, 28($sp) # 4-byte Folded Spill
+; MIPS32R5EB-NEXT:    .cfi_offset 31, -4
+; MIPS32R5EB-NEXT:    addiu $1, $zero, 1543
+; MIPS32R5EB-NEXT:    sh $1, 20($sp)
+; MIPS32R5EB-NEXT:    addiu $1, $zero, 3080
+; MIPS32R5EB-NEXT:    sh $1, 24($sp)
+; MIPS32R5EB-NEXT:    lhu $4, 20($sp)
+; MIPS32R5EB-NEXT:    lhu $5, 24($sp)
+; MIPS32R5EB-NEXT:    jal i8_2
+; MIPS32R5EB-NEXT:    nop
+; MIPS32R5EB-NEXT:    sw $2, 16($sp)
+; MIPS32R5EB-NEXT:    lui $1, %hi(gv2i8)
+; MIPS32R5EB-NEXT:    lhu $2, 16($sp)
+; MIPS32R5EB-NEXT:    sh $2, %lo(gv2i8)($1)
+; MIPS32R5EB-NEXT:    lw $ra, 28($sp) # 4-byte Folded Reload
+; MIPS32R5EB-NEXT:    addiu $sp, $sp, 32
+; MIPS32R5EB-NEXT:    jr $ra
+; MIPS32R5EB-NEXT:    nop
+;
+; MIPS64R5EB-LABEL: call_i8_2:
+; MIPS64R5EB:       # %bb.0: # %entry
+; MIPS64R5EB-NEXT:    daddiu $sp, $sp, -48
+; MIPS64R5EB-NEXT:    .cfi_def_cfa_offset 48
+; MIPS64R5EB-NEXT:    sd $ra, 40($sp) # 8-byte Folded Spill
+; MIPS64R5EB-NEXT:    sd $gp, 32($sp) # 8-byte Folded Spill
+; MIPS64R5EB-NEXT:    .cfi_offset 31, -8
+; MIPS64R5EB-NEXT:    .cfi_offset 28, -16
+; MIPS64R5EB-NEXT:    lui $1, %hi(%neg(%gp_rel(call_i8_2)))
+; MIPS64R5EB-NEXT:    daddu $1, $1, $25
+; MIPS64R5EB-NEXT:    daddiu $gp, $1, %lo(%neg(%gp_rel(call_i8_2)))
+; MIPS64R5EB-NEXT:    addiu $1, $zero, 1543
+; MIPS64R5EB-NEXT:    sh $1, 24($sp)
+; MIPS64R5EB-NEXT:    addiu $1, $zero, 3080
+; MIPS64R5EB-NEXT:    sh $1, 28($sp)
+; MIPS64R5EB-NEXT:    ld $25, %call16(i8_2)($gp)
+; MIPS64R5EB-NEXT:    lh $4, 24($sp)
+; MIPS64R5EB-NEXT:    lh $5, 28($sp)
+; MIPS64R5EB-NEXT:    jalr $25
+; MIPS64R5EB-NEXT:    nop
+; MIPS64R5EB-NEXT:    sd $2, 16($sp)
+; MIPS64R5EB-NEXT:    ldi.b $w0, 0
+; MIPS64R5EB-NEXT:    lbu $1, 16($sp)
+; MIPS64R5EB-NEXT:    insert.h $w0[0], $1
+; MIPS64R5EB-NEXT:    lbu $1, 17($sp)
+; MIPS64R5EB-NEXT:    insert.h $w0[1], $1
+; MIPS64R5EB-NEXT:    lbu $1, 18($sp)
+; MIPS64R5EB-NEXT:    insert.h $w0[2], $1
+; MIPS64R5EB-NEXT:    lbu $1, 19($sp)
+; MIPS64R5EB-NEXT:    insert.h $w0[3], $1
+; MIPS64R5EB-NEXT:    lbu $1, 20($sp)
+; MIPS64R5EB-NEXT:    insert.h $w0[4], $1
+; MIPS64R5EB-NEXT:    lbu $1, 21($sp)
+; MIPS64R5EB-NEXT:    insert.h $w0[5], $1
+; MIPS64R5EB-NEXT:    lbu $1, 23($sp)
+; MIPS64R5EB-NEXT:    lbu $2, 22($sp)
+; MIPS64R5EB-NEXT:    insert.h $w0[6], $2
+; MIPS64R5EB-NEXT:    insert.h $w0[7], $1
+; MIPS64R5EB-NEXT:    copy_s.h $1, $w0[0]
+; MIPS64R5EB-NEXT:    copy_s.h $2, $w0[1]
+; MIPS64R5EB-NEXT:    sw $2, 12($sp)
+; MIPS64R5EB-NEXT:    sw $1, 4($sp)
+; MIPS64R5EB-NEXT:    ld.d $w0, 0($sp)
+; MIPS64R5EB-NEXT:    copy_s.d $1, $w0[0]
+; MIPS64R5EB-NEXT:    copy_s.d $2, $w0[1]
+; MIPS64R5EB-NEXT:    ld $3, %got_disp(gv2i8)($gp)
+; MIPS64R5EB-NEXT:    sb $2, 1($3)
+; MIPS64R5EB-NEXT:    sb $1, 0($3)
+; MIPS64R5EB-NEXT:    ld $gp, 32($sp) # 8-byte Folded Reload
+; MIPS64R5EB-NEXT:    ld $ra, 40($sp) # 8-byte Folded Reload
+; MIPS64R5EB-NEXT:    daddiu $sp, $sp, 48
+; MIPS64R5EB-NEXT:    jr $ra
+; MIPS64R5EB-NEXT:    nop
+;
+; MIPS32EL-LABEL: call_i8_2:
+; MIPS32EL:       # %bb.0: # %entry
+; MIPS32EL-NEXT:    addiu $sp, $sp, -24
+; MIPS32EL-NEXT:    .cfi_def_cfa_offset 24
+; MIPS32EL-NEXT:    sw $ra, 20($sp) # 4-byte Folded Spill
+; MIPS32EL-NEXT:    .cfi_offset 31, -4
+; MIPS32EL-NEXT:    addiu $4, $zero, 1798
+; MIPS32EL-NEXT:    addiu $5, $zero, 2060
+; MIPS32EL-NEXT:    jal i8_2
+; MIPS32EL-NEXT:    nop
+; MIPS32EL-NEXT:    lui $1, %hi(gv2i8)
+; MIPS32EL-NEXT:    sb $2, %lo(gv2i8)($1)
+; MIPS32EL-NEXT:    srl $2, $2, 8
+; MIPS32EL-NEXT:    addiu $1, $1, %lo(gv2i8)
+; MIPS32EL-NEXT:    sb $2, 1($1)
+; MIPS32EL-NEXT:    lw $ra, 20($sp) # 4-byte Folded Reload
+; MIPS32EL-NEXT:    addiu $sp, $sp, 24
+; MIPS32EL-NEXT:    jr $ra
+; MIPS32EL-NEXT:    nop
+;
+; MIPS64EL-LABEL: call_i8_2:
+; MIPS64EL:       # %bb.0: # %entry
+; MIPS64EL-NEXT:    daddiu $sp, $sp, -16
+; MIPS64EL-NEXT:    .cfi_def_cfa_offset 16
+; MIPS64EL-NEXT:    sd $ra, 8($sp) # 8-byte Folded Spill
+; MIPS64EL-NEXT:    sd $gp, 0($sp) # 8-byte Folded Spill
+; MIPS64EL-NEXT:    .cfi_offset 31, -8
+; MIPS64EL-NEXT:    .cfi_offset 28, -16
+; MIPS64EL-NEXT:    lui $1, %hi(%neg(%gp_rel(call_i8_2)))
+; MIPS64EL-NEXT:    daddu $1, $1, $25
+; MIPS64EL-NEXT:    daddiu $gp, $1, %lo(%neg(%gp_rel(call_i8_2)))
+; MIPS64EL-NEXT:    ld $25, %call16(i8_2)($gp)
+; MIPS64EL-NEXT:    daddiu $4, $zero, 1798
+; MIPS64EL-NEXT:    daddiu $5, $zero, 2060
+; MIPS64EL-NEXT:    jalr $25
+; MIPS64EL-NEXT:    nop
+; MIPS64EL-NEXT:    sll $1, $2, 0
+; MIPS64EL-NEXT:    ld $2, %got_disp(gv2i8)($gp)
+; MIPS64EL-NEXT:    sb $1, 0($2)
+; MIPS64EL-NEXT:    srl $1, $1, 8
+; MIPS64EL-NEXT:    sb $1, 1($2)
+; MIPS64EL-NEXT:    ld $gp, 0($sp) # 8-byte Folded Reload
+; MIPS64EL-NEXT:    ld $ra, 8($sp) # 8-byte Folded Reload
+; MIPS64EL-NEXT:    daddiu $sp, $sp, 16
+; MIPS64EL-NEXT:    jr $ra
+; MIPS64EL-NEXT:    nop
+;
+; MIPS32R5EL-LABEL: call_i8_2:
+; MIPS32R5EL:       # %bb.0: # %entry
+; MIPS32R5EL-NEXT:    addiu $sp, $sp, -32
+; MIPS32R5EL-NEXT:    .cfi_def_cfa_offset 32
+; MIPS32R5EL-NEXT:    sw $ra, 28($sp) # 4-byte Folded Spill
+; MIPS32R5EL-NEXT:    .cfi_offset 31, -4
+; MIPS32R5EL-NEXT:    addiu $1, $zero, 1798
+; MIPS32R5EL-NEXT:    sh $1, 20($sp)
+; MIPS32R5EL-NEXT:    addiu $1, $zero, 2060
+; MIPS32R5EL-NEXT:    sh $1, 24($sp)
+; MIPS32R5EL-NEXT:    lhu $4, 20($sp)
+; MIPS32R5EL-NEXT:    lhu $5, 24($sp)
+; MIPS32R5EL-NEXT:    jal i8_2
+; MIPS32R5EL-NEXT:    nop
+; MIPS32R5EL-NEXT:    sw $2, 16($sp)
+; MIPS32R5EL-NEXT:    lui $1, %hi(gv2i8)
+; MIPS32R5EL-NEXT:    lhu $2, 16($sp)
+; MIPS32R5EL-NEXT:    sh $2, %lo(gv2i8)($1)
+; MIPS32R5EL-NEXT:    lw $ra, 28($sp) # 4-byte Folded Reload
+; MIPS32R5EL-NEXT:    addiu $sp, $sp, 32
+; MIPS32R5EL-NEXT:    jr $ra
+; MIPS32R5EL-NEXT:    nop
+;
+; MIPS64R5EL-LABEL: call_i8_2:
+; MIPS64R5EL:       # %bb.0: # %entry
+; MIPS64R5EL-NEXT:    daddiu $sp, $sp, -48
+; MIPS64R5EL-NEXT:    .cfi_def_cfa_offset 48
+; MIPS64R5EL-NEXT:    sd $ra, 40($sp) # 8-byte Folded Spill
+; MIPS64R5EL-NEXT:    sd $gp, 32($sp) # 8-byte Folded Spill
+; MIPS64R5EL-NEXT:    .cfi_offset 31, -8
+; MIPS64R5EL-NEXT:    .cfi_offset 28, -16
+; MIPS64R5EL-NEXT:    lui $1, %hi(%neg(%gp_rel(call_i8_2)))
+; MIPS64R5EL-NEXT:    daddu $1, $1, $25
+; MIPS64R5EL-NEXT:    daddiu $gp, $1, %lo(%neg(%gp_rel(call_i8_2)))
+; MIPS64R5EL-NEXT:    addiu $1, $zero, 1798
+; MIPS64R5EL-NEXT:    sh $1, 24($sp)
+; MIPS64R5EL-NEXT:    addiu $1, $zero, 2060
+; MIPS64R5EL-NEXT:    sh $1, 28($sp)
+; MIPS64R5EL-NEXT:    ld $25, %call16(i8_2)($gp)
+; MIPS64R5EL-NEXT:    lh $4, 24($sp)
+; MIPS64R5EL-NEXT:    lh $5, 28($sp)
+; MIPS64R5EL-NEXT:    jalr $25
+; MIPS64R5EL-NEXT:    nop
+; MIPS64R5EL-NEXT:    sd $2, 16($sp)
+; MIPS64R5EL-NEXT:    ldi.b $w0, 0
+; MIPS64R5EL-NEXT:    lbu $1, 16($sp)
+; MIPS64R5EL-NEXT:    insert.h $w0[0], $1
+; MIPS64R5EL-NEXT:    lbu $1, 17($sp)
+; MIPS64R5EL-NEXT:    insert.h $w0[1], $1
+; MIPS64R5EL-NEXT:    lbu $1, 18($sp)
+; MIPS64R5EL-NEXT:    insert.h $w0[2], $1
+; MIPS64R5EL-NEXT:    lbu $1, 19($sp)
+; MIPS64R5EL-NEXT:    insert.h $w0[3], $1
+; MIPS64R5EL-NEXT:    lbu $1, 20($sp)
+; MIPS64R5EL-NEXT:    insert.h $w0[4], $1
+; MIPS64R5EL-NEXT:    lbu $1, 21($sp)
+; MIPS64R5EL-NEXT:    insert.h $w0[5], $1
+; MIPS64R5EL-NEXT:    lbu $1, 23($sp)
+; MIPS64R5EL-NEXT:    lbu $2, 22($sp)
+; MIPS64R5EL-NEXT:    insert.h $w0[6], $2
+; MIPS64R5EL-NEXT:    insert.h $w0[7], $1
+; MIPS64R5EL-NEXT:    copy_s.h $1, $w0[0]
+; MIPS64R5EL-NEXT:    copy_s.h $2, $w0[1]
+; MIPS64R5EL-NEXT:    sw $2, 8($sp)
+; MIPS64R5EL-NEXT:    sw $1, 0($sp)
+; MIPS64R5EL-NEXT:    ld.d $w0, 0($sp)
+; MIPS64R5EL-NEXT:    copy_s.d $1, $w0[0]
+; MIPS64R5EL-NEXT:    copy_s.d $2, $w0[1]
+; MIPS64R5EL-NEXT:    ld $3, %got_disp(gv2i8)($gp)
+; MIPS64R5EL-NEXT:    sb $2, 1($3)
+; MIPS64R5EL-NEXT:    sb $1, 0($3)
+; MIPS64R5EL-NEXT:    ld $gp, 32($sp) # 8-byte Folded Reload
+; MIPS64R5EL-NEXT:    ld $ra, 40($sp) # 8-byte Folded Reload
+; MIPS64R5EL-NEXT:    daddiu $sp, $sp, 48
+; MIPS64R5EL-NEXT:    jr $ra
+; MIPS64R5EL-NEXT:    nop
 entry:
-; ALL-LABEL: call_i8_2:
-; MIPS32EB-DAG: addiu $4
-; MIPS32EB-DAG: addiu $5
-; MIPS32-NOT: addiu $6
-; MIPS32-NOT: addiu $7
-
-; MIPS32R5-DAG: lhu $4, {{[0-9]+}}($sp)
-; MIPS32R5-DAG: lhu $5, {{[0-9]+}}($sp)
-
-; MIPS32R5: jal
-; MIPS32R5: sw $2, {{[0-9]+}}($sp)
-
-; MIPS32R5-DAG; sh ${{[0-9]+}}, %lo(gv2i8)(${{[0-9]+}})
-
-; MIPS32R5-NOT: sb ${{[0-9]+}}, 1(${{[0-9]+}})
-; MIPS32R5-NOT; sb ${{[0-9]+}}, %lo(gv2i8)(${{[0-9]+}})
-
-; MIPS64EB: daddiu $4, $zero, 1543
-; MIPS64EB: daddiu $5, $zero, 3080
-
-; MIPS64EL: daddiu $4, $zero, 1798
-; MIPS64EL; daddiu $5, $zero, 2060
-
-; MIPS64R5-DAG: lh $4
-; MIPS64R5-DAG: lh $5
-
-; MIPS32: jal i8_2
-; MIPS64: jalr $25
-
-; MIPS32EB-DAG: srl $[[R0:[0-9]+]], $2, 16
-; MIPS32EB-DAG: sb $[[R0]]
-; MIPS32EB-DAG: srl $[[R1:[0-9]+]], $2, 24
-; MIPS32EB-DAG: sb $[[R1]]
-
-; MIPS32EL: sb $2
-; MIPS32EL: srl $[[R0:[0-9]+]], $2, 8
-; MIPS32EL: sb $[[R0]]
-
-; MIPS64EB: dsrl $[[R4:[0-9]+]], $2, 48
-; MIPS64EB: sb $[[R4]]
-; MIPS64EB: dsrl $[[R5:[0-9]+]], $2, 56
-; MIPS64EB: sb $[[R5]]
-
-; MIPS64EL: sll $[[R6:[0-9]+]], $2, 0
-; MIPS64EL: sb $[[R6]]
-; MIPS64EL: srl $[[R7:[0-9]+]], $[[R6]], 8
-; MIPS64EL: sb $[[R7]]
-
-; MIPS64R5: sd $2
-
   %0 = call <2 x i8> @i8_2(<2 x i8> <i8 6, i8 7>, <2 x i8> <i8 12, i8 8>)
   store <2 x i8> %0, <2 x i8> * @gv2i8
   ret void
 }
 
 define void @call_i8_4() {
+; MIPS32EB-LABEL: call_i8_4:
+; MIPS32EB:       # %bb.0: # %entry
+; MIPS32EB-NEXT:    addiu $sp, $sp, -24
+; MIPS32EB-NEXT:    .cfi_def_cfa_offset 24
+; MIPS32EB-NEXT:    sw $ra, 20($sp) # 4-byte Folded Spill
+; MIPS32EB-NEXT:    .cfi_offset 31, -4
+; MIPS32EB-NEXT:    lui $1, 1543
+; MIPS32EB-NEXT:    ori $4, $1, 2314
+; MIPS32EB-NEXT:    lui $1, 3080
+; MIPS32EB-NEXT:    ori $5, $1, 2314
+; MIPS32EB-NEXT:    jal i8_4
+; MIPS32EB-NEXT:    nop
+; MIPS32EB-NEXT:    lui $1, %hi(gv4i8)
+; MIPS32EB-NEXT:    sw $2, %lo(gv4i8)($1)
+; MIPS32EB-NEXT:    lw $ra, 20($sp) # 4-byte Folded Reload
+; MIPS32EB-NEXT:    addiu $sp, $sp, 24
+; MIPS32EB-NEXT:    jr $ra
+; MIPS32EB-NEXT:    nop
+;
+; MIPS64EB-LABEL: call_i8_4:
+; MIPS64EB:       # %bb.0: # %entry
+; MIPS64EB-NEXT:    daddiu $sp, $sp, -16
+; MIPS64EB-NEXT:    .cfi_def_cfa_offset 16
+; MIPS64EB-NEXT:    sd $ra, 8($sp) # 8-byte Folded Spill
+; MIPS64EB-NEXT:    sd $gp, 0($sp) # 8-byte Folded Spill
+; MIPS64EB-NEXT:    .cfi_offset 31, -8
+; MIPS64EB-NEXT:    .cfi_offset 28, -16
+; MIPS64EB-NEXT:    lui $1, %hi(%neg(%gp_rel(call_i8_4)))
+; MIPS64EB-NEXT:    daddu $1, $1, $25
+; MIPS64EB-NEXT:    daddiu $gp, $1, %lo(%neg(%gp_rel(call_i8_4)))
+; MIPS64EB-NEXT:    lui $1, 1543
+; MIPS64EB-NEXT:    ori $4, $1, 2314
+; MIPS64EB-NEXT:    lui $1, 3080
+; MIPS64EB-NEXT:    ori $5, $1, 2314
+; MIPS64EB-NEXT:    ld $25, %call16(i8_4)($gp)
+; MIPS64EB-NEXT:    jalr $25
+; MIPS64EB-NEXT:    nop
+; MIPS64EB-NEXT:    ld $1, %got_disp(gv4i8)($gp)
+; MIPS64EB-NEXT:    sw $2, 0($1)
+; MIPS64EB-NEXT:    ld $gp, 0($sp) # 8-byte Folded Reload
+; MIPS64EB-NEXT:    ld $ra, 8($sp) # 8-byte Folded Reload
+; MIPS64EB-NEXT:    daddiu $sp, $sp, 16
+; MIPS64EB-NEXT:    jr $ra
+; MIPS64EB-NEXT:    nop
+;
+; MIPS32R5EB-LABEL: call_i8_4:
+; MIPS32R5EB:       # %bb.0: # %entry
+; MIPS32R5EB-NEXT:    addiu $sp, $sp, -32
+; MIPS32R5EB-NEXT:    .cfi_def_cfa_offset 32
+; MIPS32R5EB-NEXT:    sw $ra, 28($sp) # 4-byte Folded Spill
+; MIPS32R5EB-NEXT:    .cfi_offset 31, -4
+; MIPS32R5EB-NEXT:    lui $1, 1543
+; MIPS32R5EB-NEXT:    ori $4, $1, 2314
+; MIPS32R5EB-NEXT:    lui $1, 3080
+; MIPS32R5EB-NEXT:    ori $5, $1, 2314
+; MIPS32R5EB-NEXT:    jal i8_4
+; MIPS32R5EB-NEXT:    nop
+; MIPS32R5EB-NEXT:    lui $1, %hi(gv4i8)
+; MIPS32R5EB-NEXT:    sw $2, %lo(gv4i8)($1)
+; MIPS32R5EB-NEXT:    lw $ra, 28($sp) # 4-byte Folded Reload
+; MIPS32R5EB-NEXT:    addiu $sp, $sp, 32
+; MIPS32R5EB-NEXT:    jr $ra
+; MIPS32R5EB-NEXT:    nop
+;
+; MIPS64R5EB-LABEL: call_i8_4:
+; MIPS64R5EB:       # %bb.0: # %entry
+; MIPS64R5EB-NEXT:    daddiu $sp, $sp, -32
+; MIPS64R5EB-NEXT:    .cfi_def_cfa_offset 32
+; MIPS64R5EB-NEXT:    sd $ra, 24($sp) # 8-byte Folded Spill
+; MIPS64R5EB-NEXT:    sd $gp, 16($sp) # 8-byte Folded Spill
+; MIPS64R5EB-NEXT:    .cfi_offset 31, -8
+; MIPS64R5EB-NEXT:    .cfi_offset 28, -16
+; MIPS64R5EB-NEXT:    lui $1, %hi(%neg(%gp_rel(call_i8_4)))
+; MIPS64R5EB-NEXT:    daddu $1, $1, $25
+; MIPS64R5EB-NEXT:    daddiu $gp, $1, %lo(%neg(%gp_rel(call_i8_4)))
+; MIPS64R5EB-NEXT:    lui $1, 1543
+; MIPS64R5EB-NEXT:    ori $4, $1, 2314
+; MIPS64R5EB-NEXT:    lui $1, 3080
+; MIPS64R5EB-NEXT:    ori $5, $1, 2314
+; MIPS64R5EB-NEXT:    ld $25, %call16(i8_4)($gp)
+; MIPS64R5EB-NEXT:    jalr $25
+; MIPS64R5EB-NEXT:    nop
+; MIPS64R5EB-NEXT:    ld $1, %got_disp(gv4i8)($gp)
+; MIPS64R5EB-NEXT:    sw $2, 0($1)
+; MIPS64R5EB-NEXT:    ld $gp, 16($sp) # 8-byte Folded Reload
+; MIPS64R5EB-NEXT:    ld $ra, 24($sp) # 8-byte Folded Reload
+; MIPS64R5EB-NEXT:    daddiu $sp, $sp, 32
+; MIPS64R5EB-NEXT:    jr $ra
+; MIPS64R5EB-NEXT:    nop
+;
+; MIPS32EL-LABEL: call_i8_4:
+; MIPS32EL:       # %bb.0: # %entry
+; MIPS32EL-NEXT:    addiu $sp, $sp, -24
+; MIPS32EL-NEXT:    .cfi_def_cfa_offset 24
+; MIPS32EL-NEXT:    sw $ra, 20($sp) # 4-byte Folded Spill
+; MIPS32EL-NEXT:    .cfi_offset 31, -4
+; MIPS32EL-NEXT:    lui $1, 2569
+; MIPS32EL-NEXT:    ori $4, $1, 1798
+; MIPS32EL-NEXT:    ori $5, $1, 2060
+; MIPS32EL-NEXT:    jal i8_4
+; MIPS32EL-NEXT:    nop
+; MIPS32EL-NEXT:    lui $1, %hi(gv4i8)
+; MIPS32EL-NEXT:    sw $2, %lo(gv4i8)($1)
+; MIPS32EL-NEXT:    lw $ra, 20($sp) # 4-byte Folded Reload
+; MIPS32EL-NEXT:    addiu $sp, $sp, 24
+; MIPS32EL-NEXT:    jr $ra
+; MIPS32EL-NEXT:    nop
+;
+; MIPS64EL-LABEL: call_i8_4:
+; MIPS64EL:       # %bb.0: # %entry
+; MIPS64EL-NEXT:    daddiu $sp, $sp, -16
+; MIPS64EL-NEXT:    .cfi_def_cfa_offset 16
+; MIPS64EL-NEXT:    sd $ra, 8($sp) # 8-byte Folded Spill
+; MIPS64EL-NEXT:    sd $gp, 0($sp) # 8-byte Folded Spill
+; MIPS64EL-NEXT:    .cfi_offset 31, -8
+; MIPS64EL-NEXT:    .cfi_offset 28, -16
+; MIPS64EL-NEXT:    lui $1, %hi(%neg(%gp_rel(call_i8_4)))
+; MIPS64EL-NEXT:    daddu $1, $1, $25
+; MIPS64EL-NEXT:    daddiu $gp, $1, %lo(%neg(%gp_rel(call_i8_4)))
+; MIPS64EL-NEXT:    lui $1, 2569
+; MIPS64EL-NEXT:    ori $4, $1, 1798
+; MIPS64EL-NEXT:    ori $5, $1, 2060
+; MIPS64EL-NEXT:    ld $25, %call16(i8_4)($gp)
+; MIPS64EL-NEXT:    jalr $25
+; MIPS64EL-NEXT:    nop
+; MIPS64EL-NEXT:    ld $1, %got_disp(gv4i8)($gp)
+; MIPS64EL-NEXT:    sw $2, 0($1)
+; MIPS64EL-NEXT:    ld $gp, 0($sp) # 8-byte Folded Reload
+; MIPS64EL-NEXT:    ld $ra, 8($sp) # 8-byte Folded Reload
+; MIPS64EL-NEXT:    daddiu $sp, $sp, 16
+; MIPS64EL-NEXT:    jr $ra
+; MIPS64EL-NEXT:    nop
+;
+; MIPS32R5EL-LABEL: call_i8_4:
+; MIPS32R5EL:       # %bb.0: # %entry
+; MIPS32R5EL-NEXT:    addiu $sp, $sp, -32
+; MIPS32R5EL-NEXT:    .cfi_def_cfa_offset 32
+; MIPS32R5EL-NEXT:    sw $ra, 28($sp) # 4-byte Folded Spill
+; MIPS32R5EL-NEXT:    .cfi_offset 31, -4
+; MIPS32R5EL-NEXT:    lui $1, 2569
+; MIPS32R5EL-NEXT:    ori $4, $1, 1798
+; MIPS32R5EL-NEXT:    ori $5, $1, 2060
+; MIPS32R5EL-NEXT:    jal i8_4
+; MIPS32R5EL-NEXT:    nop
+; MIPS32R5EL-NEXT:    lui $1, %hi(gv4i8)
+; MIPS32R5EL-NEXT:    sw $2, %lo(gv4i8)($1)
+; MIPS32R5EL-NEXT:    lw $ra, 28($sp) # 4-byte Folded Reload
+; MIPS32R5EL-NEXT:    addiu $sp, $sp, 32
+; MIPS32R5EL-NEXT:    jr $ra
+; MIPS32R5EL-NEXT:    nop
+;
+; MIPS64R5EL-LABEL: call_i8_4:
+; MIPS64R5EL:       # %bb.0: # %entry
+; MIPS64R5EL-NEXT:    daddiu $sp, $sp, -32
+; MIPS64R5EL-NEXT:    .cfi_def_cfa_offset 32
+; MIPS64R5EL-NEXT:    sd $ra, 24($sp) # 8-byte Folded Spill
+; MIPS64R5EL-NEXT:    sd $gp, 16($sp) # 8-byte Folded Spill
+; MIPS64R5EL-NEXT:    .cfi_offset 31, -8
+; MIPS64R5EL-NEXT:    .cfi_offset 28, -16
+; MIPS64R5EL-NEXT:    lui $1, %hi(%neg(%gp_rel(call_i8_4)))
+; MIPS64R5EL-NEXT:    daddu $1, $1, $25
+; MIPS64R5EL-NEXT:    daddiu $gp, $1, %lo(%neg(%gp_rel(call_i8_4)))
+; MIPS64R5EL-NEXT:    lui $1, 2569
+; MIPS64R5EL-NEXT:    ori $4, $1, 1798
+; MIPS64R5EL-NEXT:    ori $5, $1, 2060
+; MIPS64R5EL-NEXT:    ld $25, %call16(i8_4)($gp)
+; MIPS64R5EL-NEXT:    jalr $25
+; MIPS64R5EL-NEXT:    nop
+; MIPS64R5EL-NEXT:    ld $1, %got_disp(gv4i8)($gp)
+; MIPS64R5EL-NEXT:    sw $2, 0($1)
+; MIPS64R5EL-NEXT:    ld $gp, 16($sp) # 8-byte Folded Reload
+; MIPS64R5EL-NEXT:    ld $ra, 24($sp) # 8-byte Folded Reload
+; MIPS64R5EL-NEXT:    daddiu $sp, $sp, 32
+; MIPS64R5EL-NEXT:    jr $ra
+; MIPS64R5EL-NEXT:    nop
 entry:
-; ALL-LABEL: call_i8_4:
-; MIPS32: ori $4
-; MIPS32: ori $5
-; MIPS32-NOT: ori $6
-; MIPS32-NOT: ori $7
-
-; MIPS32R5-NOT: lw $4, {{[0-9]+}}($sp)
-; MIPS32R5-NOT: lw $5, {{[0-9]+}}($sp)
-
-; MIPS64: ori $4
-; MIPS64: ori $5
-
-; MIPS64R5-NOT: lw $4
-; MIPS64R5-NOT: lw $5
-
-; MIPS32: jal i8_4
-; MIPS64: jalr $25
-
-; MIPS32: sw $2
-
-; MIPS32R5-DAG: sw $2
-
-; MIPS64: sw $2
-; MIPS64R5: sw $2
-
   %0 = call <4 x i8> @i8_4(<4 x i8> <i8 6, i8 7, i8 9, i8 10>, <4 x i8> <i8 12, i8 8, i8 9, i8 10>)
   store <4 x i8> %0, <4 x i8> * @gv4i8
   ret void
 }
 
 define void @call_i8_8() {
+; MIPS32EB-LABEL: call_i8_8:
+; MIPS32EB:       # %bb.0: # %entry
+; MIPS32EB-NEXT:    addiu $sp, $sp, -24
+; MIPS32EB-NEXT:    .cfi_def_cfa_offset 24
+; MIPS32EB-NEXT:    sw $ra, 20($sp) # 4-byte Folded Spill
+; MIPS32EB-NEXT:    .cfi_offset 31, -4
+; MIPS32EB-NEXT:    lui $1, 3080
+; MIPS32EB-NEXT:    ori $6, $1, 2314
+; MIPS32EB-NEXT:    lui $1, 1543
+; MIPS32EB-NEXT:    ori $4, $1, 2314
+; MIPS32EB-NEXT:    move $5, $4
+; MIPS32EB-NEXT:    move $7, $4
+; MIPS32EB-NEXT:    jal i8_8
+; MIPS32EB-NEXT:    nop
+; MIPS32EB-NEXT:    lui $1, %hi(gv8i8)
+; MIPS32EB-NEXT:    addiu $4, $1, %lo(gv8i8)
+; MIPS32EB-NEXT:    sw $3, 4($4)
+; MIPS32EB-NEXT:    sw $2, %lo(gv8i8)($1)
+; MIPS32EB-NEXT:    lw $ra, 20($sp) # 4-byte Folded Reload
+; MIPS32EB-NEXT:    addiu $sp, $sp, 24
+; MIPS32EB-NEXT:    jr $ra
+; MIPS32EB-NEXT:    nop
+;
+; MIPS64EB-LABEL: call_i8_8:
+; MIPS64EB:       # %bb.0: # %entry
+; MIPS64EB-NEXT:    daddiu $sp, $sp, -16
+; MIPS64EB-NEXT:    .cfi_def_cfa_offset 16
+; MIPS64EB-NEXT:    sd $ra, 8($sp) # 8-byte Folded Spill
+; MIPS64EB-NEXT:    sd $gp, 0($sp) # 8-byte Folded Spill
+; MIPS64EB-NEXT:    .cfi_offset 31, -8
+; MIPS64EB-NEXT:    .cfi_offset 28, -16
+; MIPS64EB-NEXT:    lui $1, %hi(%neg(%gp_rel(call_i8_8)))
+; MIPS64EB-NEXT:    daddu $1, $1, $25
+; MIPS64EB-NEXT:    daddiu $gp, $1, %lo(%neg(%gp_rel(call_i8_8)))
+; MIPS64EB-NEXT:    lui $1, 772
+; MIPS64EB-NEXT:    daddiu $1, $1, -31611
+; MIPS64EB-NEXT:    dsll $1, $1, 17
+; MIPS64EB-NEXT:    daddiu $1, $1, 1543
+; MIPS64EB-NEXT:    dsll $1, $1, 16
+; MIPS64EB-NEXT:    daddiu $4, $1, 2314
+; MIPS64EB-NEXT:    lui $1, 1540
+; MIPS64EB-NEXT:    daddiu $1, $1, 1157
+; MIPS64EB-NEXT:    dsll $1, $1, 17
+; MIPS64EB-NEXT:    daddiu $1, $1, 1543
+; MIPS64EB-NEXT:    dsll $1, $1, 16
+; MIPS64EB-NEXT:    daddiu $5, $1, 2314
+; MIPS64EB-NEXT:    ld $25, %call16(i8_8)($gp)
+; MIPS64EB-NEXT:    jalr $25
+; MIPS64EB-NEXT:    nop
+; MIPS64EB-NEXT:    ld $1, %got_disp(gv8i8)($gp)
+; MIPS64EB-NEXT:    sd $2, 0($1)
+; MIPS64EB-NEXT:    ld $gp, 0($sp) # 8-byte Folded Reload
+; MIPS64EB-NEXT:    ld $ra, 8($sp) # 8-byte Folded Reload
+; MIPS64EB-NEXT:    daddiu $sp, $sp, 16
+; MIPS64EB-NEXT:    jr $ra
+; MIPS64EB-NEXT:    nop
+;
+; MIPS32R5EB-LABEL: call_i8_8:
+; MIPS32R5EB:       # %bb.0: # %entry
+; MIPS32R5EB-NEXT:    addiu $sp, $sp, -24
+; MIPS32R5EB-NEXT:    .cfi_def_cfa_offset 24
+; MIPS32R5EB-NEXT:    sw $ra, 20($sp) # 4-byte Folded Spill
+; MIPS32R5EB-NEXT:    .cfi_offset 31, -4
+; MIPS32R5EB-NEXT:    lui $1, 3080
+; MIPS32R5EB-NEXT:    ori $6, $1, 2314
+; MIPS32R5EB-NEXT:    lui $1, 1543
+; MIPS32R5EB-NEXT:    ori $4, $1, 2314
+; MIPS32R5EB-NEXT:    move $5, $4
+; MIPS32R5EB-NEXT:    move $7, $4
+; MIPS32R5EB-NEXT:    jal i8_8
+; MIPS32R5EB-NEXT:    nop
+; MIPS32R5EB-NEXT:    lui $1, %hi(gv8i8)
+; MIPS32R5EB-NEXT:    addiu $4, $1, %lo(gv8i8)
+; MIPS32R5EB-NEXT:    sw $3, 4($4)
+; MIPS32R5EB-NEXT:    sw $2, %lo(gv8i8)($1)
+; MIPS32R5EB-NEXT:    lw $ra, 20($sp) # 4-byte Folded Reload
+; MIPS32R5EB-NEXT:    addiu $sp, $sp, 24
+; MIPS32R5EB-NEXT:    jr $ra
+; MIPS32R5EB-NEXT:    nop
+;
+; MIPS64R5EB-LABEL: call_i8_8:
+; MIPS64R5EB:       # %bb.0: # %entry
+; MIPS64R5EB-NEXT:    daddiu $sp, $sp, -32
+; MIPS64R5EB-NEXT:    .cfi_def_cfa_offset 32
+; MIPS64R5EB-NEXT:    sd $ra, 24($sp) # 8-byte Folded Spill
+; MIPS64R5EB-NEXT:    sd $gp, 16($sp) # 8-byte Folded Spill
+; MIPS64R5EB-NEXT:    .cfi_offset 31, -8
+; MIPS64R5EB-NEXT:    .cfi_offset 28, -16
+; MIPS64R5EB-NEXT:    lui $1, %hi(%neg(%gp_rel(call_i8_8)))
+; MIPS64R5EB-NEXT:    daddu $1, $1, $25
+; MIPS64R5EB-NEXT:    daddiu $gp, $1, %lo(%neg(%gp_rel(call_i8_8)))
+; MIPS64R5EB-NEXT:    lui $1, 772
+; MIPS64R5EB-NEXT:    daddiu $1, $1, -31611
+; MIPS64R5EB-NEXT:    dsll $1, $1, 17
+; MIPS64R5EB-NEXT:    daddiu $1, $1, 1543
+; MIPS64R5EB-NEXT:    dsll $1, $1, 16
+; MIPS64R5EB-NEXT:    daddiu $4, $1, 2314
+; MIPS64R5EB-NEXT:    lui $1, 1540
+; MIPS64R5EB-NEXT:    daddiu $1, $1, 1157
+; MIPS64R5EB-NEXT:    dsll $1, $1, 17
+; MIPS64R5EB-NEXT:    daddiu $1, $1, 1543
+; MIPS64R5EB-NEXT:    dsll $1, $1, 16
+; MIPS64R5EB-NEXT:    daddiu $5, $1, 2314
+; MIPS64R5EB-NEXT:    ld $25, %call16(i8_8)($gp)
+; MIPS64R5EB-NEXT:    jalr $25
+; MIPS64R5EB-NEXT:    nop
+; MIPS64R5EB-NEXT:    ld $1, %got_disp(gv8i8)($gp)
+; MIPS64R5EB-NEXT:    sd $2, 0($1)
+; MIPS64R5EB-NEXT:    ld $gp, 16($sp) # 8-byte Folded Reload
+; MIPS64R5EB-NEXT:    ld $ra, 24($sp) # 8-byte Folded Reload
+; MIPS64R5EB-NEXT:    daddiu $sp, $sp, 32
+; MIPS64R5EB-NEXT:    jr $ra
+; MIPS64R5EB-NEXT:    nop
+;
+; MIPS32EL-LABEL: call_i8_8:
+; MIPS32EL:       # %bb.0: # %entry
+; MIPS32EL-NEXT:    addiu $sp, $sp, -24
+; MIPS32EL-NEXT:    .cfi_def_cfa_offset 24
+; MIPS32EL-NEXT:    sw $ra, 20($sp) # 4-byte Folded Spill
+; MIPS32EL-NEXT:    .cfi_offset 31, -4
+; MIPS32EL-NEXT:    lui $1, 2569
+; MIPS32EL-NEXT:    ori $6, $1, 2060
+; MIPS32EL-NEXT:    ori $4, $1, 1798
+; MIPS32EL-NEXT:    move $5, $4
+; MIPS32EL-NEXT:    move $7, $4
+; MIPS32EL-NEXT:    jal i8_8
+; MIPS32EL-NEXT:    nop
+; MIPS32EL-NEXT:    lui $1, %hi(gv8i8)
+; MIPS32EL-NEXT:    addiu $4, $1, %lo(gv8i8)
+; MIPS32EL-NEXT:    sw $3, 4($4)
+; MIPS32EL-NEXT:    sw $2, %lo(gv8i8)($1)
+; MIPS32EL-NEXT:    lw $ra, 20($sp) # 4-byte Folded Reload
+; MIPS32EL-NEXT:    addiu $sp, $sp, 24
+; MIPS32EL-NEXT:    jr $ra
+; MIPS32EL-NEXT:    nop
+;
+; MIPS64EL-LABEL: call_i8_8:
+; MIPS64EL:       # %bb.0: # %entry
+; MIPS64EL-NEXT:    daddiu $sp, $sp, -16
+; MIPS64EL-NEXT:    .cfi_def_cfa_offset 16
+; MIPS64EL-NEXT:    sd $ra, 8($sp) # 8-byte Folded Spill
+; MIPS64EL-NEXT:    sd $gp, 0($sp) # 8-byte Folded Spill
+; MIPS64EL-NEXT:    .cfi_offset 31, -8
+; MIPS64EL-NEXT:    .cfi_offset 28, -16
+; MIPS64EL-NEXT:    lui $1, %hi(%neg(%gp_rel(call_i8_8)))
+; MIPS64EL-NEXT:    daddu $1, $1, $25
+; MIPS64EL-NEXT:    daddiu $gp, $1, %lo(%neg(%gp_rel(call_i8_8)))
+; MIPS64EL-NEXT:    lui $1, 1285
+; MIPS64EL-NEXT:    daddiu $1, $1, -31869
+; MIPS64EL-NEXT:    dsll $1, $1, 17
+; MIPS64EL-NEXT:    daddiu $1, $1, 2569
+; MIPS64EL-NEXT:    dsll $1, $1, 16
+; MIPS64EL-NEXT:    daddiu $4, $1, 1798
+; MIPS64EL-NEXT:    daddiu $5, $1, 2060
+; MIPS64EL-NEXT:    ld $25, %call16(i8_8)($gp)
+; MIPS64EL-NEXT:    jalr $25
+; MIPS64EL-NEXT:    nop
+; MIPS64EL-NEXT:    ld $1, %got_disp(gv8i8)($gp)
+; MIPS64EL-NEXT:    sd $2, 0($1)
+; MIPS64EL-NEXT:    ld $gp, 0($sp) # 8-byte Folded Reload
+; MIPS64EL-NEXT:    ld $ra, 8($sp) # 8-byte Folded Reload
+; MIPS64EL-NEXT:    daddiu $sp, $sp, 16
+; MIPS64EL-NEXT:    jr $ra
+; MIPS64EL-NEXT:    nop
+;
+; MIPS32R5EL-LABEL: call_i8_8:
+; MIPS32R5EL:       # %bb.0: # %entry
+; MIPS32R5EL-NEXT:    addiu $sp, $sp, -24
+; MIPS32R5EL-NEXT:    .cfi_def_cfa_offset 24
+; MIPS32R5EL-NEXT:    sw $ra, 20($sp) # 4-byte Folded Spill
+; MIPS32R5EL-NEXT:    .cfi_offset 31, -4
+; MIPS32R5EL-NEXT:    lui $1, 2569
+; MIPS32R5EL-NEXT:    ori $6, $1, 2060
+; MIPS32R5EL-NEXT:    ori $4, $1, 1798
+; MIPS32R5EL-NEXT:    move $5, $4
+; MIPS32R5EL-NEXT:    move $7, $4
+; MIPS32R5EL-NEXT:    jal i8_8
+; MIPS32R5EL-NEXT:    nop
+; MIPS32R5EL-NEXT:    lui $1, %hi(gv8i8)
+; MIPS32R5EL-NEXT:    addiu $4, $1, %lo(gv8i8)
+; MIPS32R5EL-NEXT:    sw $3, 4($4)
+; MIPS32R5EL-NEXT:    sw $2, %lo(gv8i8)($1)
+; MIPS32R5EL-NEXT:    lw $ra, 20($sp) # 4-byte Folded Reload
+; MIPS32R5EL-NEXT:    addiu $sp, $sp, 24
+; MIPS32R5EL-NEXT:    jr $ra
+; MIPS32R5EL-NEXT:    nop
+;
+; MIPS64R5EL-LABEL: call_i8_8:
+; MIPS64R5EL:       # %bb.0: # %entry
+; MIPS64R5EL-NEXT:    daddiu $sp, $sp, -32
+; MIPS64R5EL-NEXT:    .cfi_def_cfa_offset 32
+; MIPS64R5EL-NEXT:    sd $ra, 24($sp) # 8-byte Folded Spill
+; MIPS64R5EL-NEXT:    sd $gp, 16($sp) # 8-byte Folded Spill
+; MIPS64R5EL-NEXT:    .cfi_offset 31, -8
+; MIPS64R5EL-NEXT:    .cfi_offset 28, -16
+; MIPS64R5EL-NEXT:    lui $1, %hi(%neg(%gp_rel(call_i8_8)))
+; MIPS64R5EL-NEXT:    daddu $1, $1, $25
+; MIPS64R5EL-NEXT:    daddiu $gp, $1, %lo(%neg(%gp_rel(call_i8_8)))
+; MIPS64R5EL-NEXT:    lui $1, 1285
+; MIPS64R5EL-NEXT:    daddiu $1, $1, -31869
+; MIPS64R5EL-NEXT:    dsll $1, $1, 17
+; MIPS64R5EL-NEXT:    daddiu $1, $1, 2569
+; MIPS64R5EL-NEXT:    dsll $1, $1, 16
+; MIPS64R5EL-NEXT:    daddiu $4, $1, 1798
+; MIPS64R5EL-NEXT:    daddiu $5, $1, 2060
+; MIPS64R5EL-NEXT:    ld $25, %call16(i8_8)($gp)
+; MIPS64R5EL-NEXT:    jalr $25
+; MIPS64R5EL-NEXT:    nop
+; MIPS64R5EL-NEXT:    ld $1, %got_disp(gv8i8)($gp)
+; MIPS64R5EL-NEXT:    sd $2, 0($1)
+; MIPS64R5EL-NEXT:    ld $gp, 16($sp) # 8-byte Folded Reload
+; MIPS64R5EL-NEXT:    ld $ra, 24($sp) # 8-byte Folded Reload
+; MIPS64R5EL-NEXT:    daddiu $sp, $sp, 32
+; MIPS64R5EL-NEXT:    jr $ra
+; MIPS64R5EL-NEXT:    nop
 entry:
-; ALL-LABEL: call_i8_8:
-
-; MIPS32: ori $6
-; MIPS32: ori $4
-; MIPS32: move  $5
-; MIPS32: move  $7
-
-; MIPS32R5-DAG: ori $6
-; MIPS32R5-DAG: ori $4
-; MIPS32R5-DAG: move  $5
-; MIPS32R5-DAG: move  $7
-
-; MIPS64EB: daddiu $4, ${{[0-9]+}}, 2314
-; MIPS64EB: daddiu $5, ${{[0-9]+}}, 2314
-
-; MIPS64EL: daddiu $4, ${{[0-9]+}}, 1798
-; MIPS64EL: daddiu $5, ${{[0-9]+}}, 2060
-
-; MIPS32: jal i8_8
-; MIPS64: jalr $25
-
-; MIPS32-DAG: sw $2
-; MIPS32-DAG: sw $3
-
-; MIPS32R5-DAG: sw $2
-; MIPS32R5-DAG: sw $3
-
-; MIPS64: sd $2
-; MIPS64R5: sd $2
-
   %0 = call <8 x i8> @i8_8(<8 x i8> <i8 6, i8 7, i8 9, i8 10, i8 6, i8 7, i8 9, i8 10>, <8 x i8> <i8 12, i8 8, i8 9, i8 10, i8 6, i8 7, i8 9, i8 10>)
   store <8 x i8> %0, <8 x i8> * @gv8i8
   ret void
 }
 
 define void @calli8_16() {
+; MIPS32EB-LABEL: calli8_16:
+; MIPS32EB:       # %bb.0: # %entry
+; MIPS32EB-NEXT:    addiu $sp, $sp, -40
+; MIPS32EB-NEXT:    .cfi_def_cfa_offset 40
+; MIPS32EB-NEXT:    sw $ra, 36($sp) # 4-byte Folded Spill
+; MIPS32EB-NEXT:    .cfi_offset 31, -4
+; MIPS32EB-NEXT:    lui $1, 3080
+; MIPS32EB-NEXT:    ori $1, $1, 2314
+; MIPS32EB-NEXT:    lui $2, 1801
+; MIPS32EB-NEXT:    sw $1, 28($sp)
+; MIPS32EB-NEXT:    ori $1, $2, 1801
+; MIPS32EB-NEXT:    sw $1, 24($sp)
+; MIPS32EB-NEXT:    sw $1, 20($sp)
+; MIPS32EB-NEXT:    sw $1, 16($sp)
+; MIPS32EB-NEXT:    lui $1, 1543
+; MIPS32EB-NEXT:    ori $4, $1, 1543
+; MIPS32EB-NEXT:    ori $7, $1, 2314
+; MIPS32EB-NEXT:    move $5, $4
+; MIPS32EB-NEXT:    move $6, $4
+; MIPS32EB-NEXT:    jal i8_16
+; MIPS32EB-NEXT:    nop
+; MIPS32EB-NEXT:    lui $1, %hi(gv16i8)
+; MIPS32EB-NEXT:    addiu $6, $1, %lo(gv16i8)
+; MIPS32EB-NEXT:    sw $5, 12($6)
+; MIPS32EB-NEXT:    sw $4, 8($6)
+; MIPS32EB-NEXT:    sw $3, 4($6)
+; MIPS32EB-NEXT:    sw $2, %lo(gv16i8)($1)
+; MIPS32EB-NEXT:    lw $ra, 36($sp) # 4-byte Folded Reload
+; MIPS32EB-NEXT:    addiu $sp, $sp, 40
+; MIPS32EB-NEXT:    jr $ra
+; MIPS32EB-NEXT:    nop
+;
+; MIPS64EB-LABEL: calli8_16:
+; MIPS64EB:       # %bb.0: # %entry
+; MIPS64EB-NEXT:    daddiu $sp, $sp, -16
+; MIPS64EB-NEXT:    .cfi_def_cfa_offset 16
+; MIPS64EB-NEXT:    sd $ra, 8($sp) # 8-byte Folded Spill
+; MIPS64EB-NEXT:    sd $gp, 0($sp) # 8-byte Folded Spill
+; MIPS64EB-NEXT:    .cfi_offset 31, -8
+; MIPS64EB-NEXT:    .cfi_offset 28, -16
+; MIPS64EB-NEXT:    lui $1, %hi(%neg(%gp_rel(calli8_16)))
+; MIPS64EB-NEXT:    daddu $1, $1, $25
+; MIPS64EB-NEXT:    daddiu $gp, $1, %lo(%neg(%gp_rel(calli8_16)))
+; MIPS64EB-NEXT:    lui $1, 1801
+; MIPS64EB-NEXT:    daddiu $1, $1, 1801
+; MIPS64EB-NEXT:    dsll $1, $1, 16
+; MIPS64EB-NEXT:    daddiu $1, $1, 1801
+; MIPS64EB-NEXT:    lui $2, 1543
+; MIPS64EB-NEXT:    dsll $1, $1, 16
+; MIPS64EB-NEXT:    daddiu $2, $2, 1543
+; MIPS64EB-NEXT:    dsll $2, $2, 16
+; MIPS64EB-NEXT:    daddiu $2, $2, 1543
+; MIPS64EB-NEXT:    dsll $2, $2, 16
+; MIPS64EB-NEXT:    daddiu $4, $2, 1543
+; MIPS64EB-NEXT:    daddiu $5, $2, 2314
+; MIPS64EB-NEXT:    daddiu $6, $1, 1801
+; MIPS64EB-NEXT:    lui $1, 225
+; MIPS64EB-NEXT:    daddiu $1, $1, 8417
+; MIPS64EB-NEXT:    dsll $1, $1, 16
+; MIPS64EB-NEXT:    daddiu $1, $1, 8577
+; MIPS64EB-NEXT:    dsll $1, $1, 19
+; MIPS64EB-NEXT:    daddiu $7, $1, 2314
+; MIPS64EB-NEXT:    ld $25, %call16(i8_16)($gp)
+; MIPS64EB-NEXT:    jalr $25
+; MIPS64EB-NEXT:    nop
+; MIPS64EB-NEXT:    ld $1, %got_disp(gv16i8)($gp)
+; MIPS64EB-NEXT:    sd $3, 8($1)
+; MIPS64EB-NEXT:    sd $2, 0($1)
+; MIPS64EB-NEXT:    ld $gp, 0($sp) # 8-byte Folded Reload
+; MIPS64EB-NEXT:    ld $ra, 8($sp) # 8-byte Folded Reload
+; MIPS64EB-NEXT:    daddiu $sp, $sp, 16
+; MIPS64EB-NEXT:    jr $ra
+; MIPS64EB-NEXT:    nop
+;
+; MIPS32R5-LABEL: calli8_16:
+; MIPS32R5:       # %bb.0: # %entry
+; MIPS32R5-NEXT:    addiu $sp, $sp, -40
+; MIPS32R5-NEXT:    .cfi_def_cfa_offset 40
+; MIPS32R5-NEXT:    sw $ra, 36($sp) # 4-byte Folded Spill
+; MIPS32R5-NEXT:    .cfi_offset 31, -4
+; MIPS32R5-NEXT:    lui $1, %hi($CPI30_0)
+; MIPS32R5-NEXT:    addiu $1, $1, %lo($CPI30_0)
+; MIPS32R5-NEXT:    ld.w $w0, 0($1)
+; MIPS32R5-NEXT:    copy_s.w $4, $w0[0]
+; MIPS32R5-NEXT:    copy_s.w $5, $w0[1]
+; MIPS32R5-NEXT:    copy_s.w $6, $w0[2]
+; MIPS32R5-NEXT:    copy_s.w $7, $w0[3]
+; MIPS32R5-NEXT:    lui $1, %hi($CPI30_1)
+; MIPS32R5-NEXT:    addiu $1, $1, %lo($CPI30_1)
+; MIPS32R5-NEXT:    ld.w $w0, 0($1)
+; MIPS32R5-NEXT:    copy_s.w $1, $w0[0]
+; MIPS32R5-NEXT:    copy_s.w $2, $w0[1]
+; MIPS32R5-NEXT:    copy_s.w $3, $w0[2]
+; MIPS32R5-NEXT:    copy_s.w $8, $w0[3]
+; MIPS32R5-NEXT:    sw $8, 28($sp)
+; MIPS32R5-NEXT:    sw $3, 24($sp)
+; MIPS32R5-NEXT:    sw $2, 20($sp)
+; MIPS32R5-NEXT:    sw $1, 16($sp)
+; MIPS32R5-NEXT:    jal i8_16
+; MIPS32R5-NEXT:    nop
+; MIPS32R5-NEXT:    ldi.b $w0, 0
+; MIPS32R5-NEXT:    insert.w $w0[0], $2
+; MIPS32R5-NEXT:    lui $1, %hi(gv16i8)
+; MIPS32R5-NEXT:    insert.w $w0[1], $3
+; MIPS32R5-NEXT:    addiu $1, $1, %lo(gv16i8)
+; MIPS32R5-NEXT:    insert.w $w0[2], $4
+; MIPS32R5-NEXT:    insert.w $w0[3], $5
+; MIPS32R5-NEXT:    st.w $w0, 0($1)
+; MIPS32R5-NEXT:    lw $ra, 36($sp) # 4-byte Folded Reload
+; MIPS32R5-NEXT:    addiu $sp, $sp, 40
+; MIPS32R5-NEXT:    jr $ra
+; MIPS32R5-NEXT:    nop
+;
+; MIPS64R5-LABEL: calli8_16:
+; MIPS64R5:       # %bb.0: # %entry
+; MIPS64R5-NEXT:    daddiu $sp, $sp, -16
+; MIPS64R5-NEXT:    .cfi_def_cfa_offset 16
+; MIPS64R5-NEXT:    sd $ra, 8($sp) # 8-byte Folded Spill
+; MIPS64R5-NEXT:    sd $gp, 0($sp) # 8-byte Folded Spill
+; MIPS64R5-NEXT:    .cfi_offset 31, -8
+; MIPS64R5-NEXT:    .cfi_offset 28, -16
+; MIPS64R5-NEXT:    lui $1, %hi(%neg(%gp_rel(calli8_16)))
+; MIPS64R5-NEXT:    daddu $1, $1, $25
+; MIPS64R5-NEXT:    daddiu $gp, $1, %lo(%neg(%gp_rel(calli8_16)))
+; MIPS64R5-NEXT:    ld $1, %got_page(.LCPI30_0)($gp)
+; MIPS64R5-NEXT:    daddiu $1, $1, %got_ofst(.LCPI30_0)
+; MIPS64R5-NEXT:    ld.d $w0, 0($1)
+; MIPS64R5-NEXT:    copy_s.d $4, $w0[0]
+; MIPS64R5-NEXT:    copy_s.d $5, $w0[1]
+; MIPS64R5-NEXT:    ld $1, %got_page(.LCPI30_1)($gp)
+; MIPS64R5-NEXT:    daddiu $1, $1, %got_ofst(.LCPI30_1)
+; MIPS64R5-NEXT:    ld.d $w0, 0($1)
+; MIPS64R5-NEXT:    copy_s.d $6, $w0[0]
+; MIPS64R5-NEXT:    copy_s.d $7, $w0[1]
+; MIPS64R5-NEXT:    ld $25, %call16(i8_16)($gp)
+; MIPS64R5-NEXT:    jalr $25
+; MIPS64R5-NEXT:    nop
+; MIPS64R5-NEXT:    ldi.b $w0, 0
+; MIPS64R5-NEXT:    insert.d $w0[0], $2
+; MIPS64R5-NEXT:    insert.d $w0[1], $3
+; MIPS64R5-NEXT:    ld $1, %got_disp(gv16i8)($gp)
+; MIPS64R5-NEXT:    st.d $w0, 0($1)
+; MIPS64R5-NEXT:    ld $gp, 0($sp) # 8-byte Folded Reload
+; MIPS64R5-NEXT:    ld $ra, 8($sp) # 8-byte Folded Reload
+; MIPS64R5-NEXT:    daddiu $sp, $sp, 16
+; MIPS64R5-NEXT:    jr $ra
+; MIPS64R5-NEXT:    nop
+;
+; MIPS32EL-LABEL: calli8_16:
+; MIPS32EL:       # %bb.0: # %entry
+; MIPS32EL-NEXT:    addiu $sp, $sp, -40
+; MIPS32EL-NEXT:    .cfi_def_cfa_offset 40
+; MIPS32EL-NEXT:    sw $ra, 36($sp) # 4-byte Folded Spill
+; MIPS32EL-NEXT:    .cfi_offset 31, -4
+; MIPS32EL-NEXT:    lui $1, 2569
+; MIPS32EL-NEXT:    ori $2, $1, 2060
+; MIPS32EL-NEXT:    lui $3, 2311
+; MIPS32EL-NEXT:    sw $2, 28($sp)
+; MIPS32EL-NEXT:    ori $2, $3, 2311
+; MIPS32EL-NEXT:    sw $2, 24($sp)
+; MIPS32EL-NEXT:    sw $2, 20($sp)
+; MIPS32EL-NEXT:    sw $2, 16($sp)
+; MIPS32EL-NEXT:    lui $2, 1798
+; MIPS32EL-NEXT:    ori $4, $2, 1798
+; MIPS32EL-NEXT:    ori $7, $1, 1798
+; MIPS32EL-NEXT:    move $5, $4
+; MIPS32EL-NEXT:    move $6, $4
+; MIPS32EL-NEXT:    jal i8_16
+; MIPS32EL-NEXT:    nop
+; MIPS32EL-NEXT:    lui $1, %hi(gv16i8)
+; MIPS32EL-NEXT:    addiu $6, $1, %lo(gv16i8)
+; MIPS32EL-NEXT:    sw $5, 12($6)
+; MIPS32EL-NEXT:    sw $4, 8($6)
+; MIPS32EL-NEXT:    sw $3, 4($6)
+; MIPS32EL-NEXT:    sw $2, %lo(gv16i8)($1)
+; MIPS32EL-NEXT:    lw $ra, 36($sp) # 4-byte Folded Reload
+; MIPS32EL-NEXT:    addiu $sp, $sp, 40
+; MIPS32EL-NEXT:    jr $ra
+; MIPS32EL-NEXT:    nop
+;
+; MIPS64EL-LABEL: calli8_16:
+; MIPS64EL:       # %bb.0: # %entry
+; MIPS64EL-NEXT:    daddiu $sp, $sp, -16
+; MIPS64EL-NEXT:    .cfi_def_cfa_offset 16
+; MIPS64EL-NEXT:    sd $ra, 8($sp) # 8-byte Folded Spill
+; MIPS64EL-NEXT:    sd $gp, 0($sp) # 8-byte Folded Spill
+; MIPS64EL-NEXT:    .cfi_offset 31, -8
+; MIPS64EL-NEXT:    .cfi_offset 28, -16
+; MIPS64EL-NEXT:    lui $1, %hi(%neg(%gp_rel(calli8_16)))
+; MIPS64EL-NEXT:    daddu $1, $1, $25
+; MIPS64EL-NEXT:    daddiu $gp, $1, %lo(%neg(%gp_rel(calli8_16)))
+; MIPS64EL-NEXT:    lui $1, 1285
+; MIPS64EL-NEXT:    daddiu $1, $1, -31869
+; MIPS64EL-NEXT:    dsll $1, $1, 16
+; MIPS64EL-NEXT:    daddiu $1, $1, 899
+; MIPS64EL-NEXT:    lui $2, 2311
+; MIPS64EL-NEXT:    daddiu $2, $2, 2311
+; MIPS64EL-NEXT:    dsll $2, $2, 16
+; MIPS64EL-NEXT:    daddiu $2, $2, 2311
+; MIPS64EL-NEXT:    dsll $2, $2, 16
+; MIPS64EL-NEXT:    dsll $1, $1, 17
+; MIPS64EL-NEXT:    lui $3, 899
+; MIPS64EL-NEXT:    daddiu $3, $3, 899
+; MIPS64EL-NEXT:    dsll $3, $3, 16
+; MIPS64EL-NEXT:    daddiu $3, $3, 899
+; MIPS64EL-NEXT:    dsll $3, $3, 17
+; MIPS64EL-NEXT:    daddiu $4, $3, 1798
+; MIPS64EL-NEXT:    daddiu $5, $1, 1798
+; MIPS64EL-NEXT:    daddiu $6, $2, 2311
+; MIPS64EL-NEXT:    lui $1, 642
+; MIPS64EL-NEXT:    daddiu $1, $1, 16899
+; MIPS64EL-NEXT:    dsll $1, $1, 18
+; MIPS64EL-NEXT:    daddiu $1, $1, 2311
+; MIPS64EL-NEXT:    dsll $1, $1, 16
+; MIPS64EL-NEXT:    daddiu $7, $1, 2311
+; MIPS64EL-NEXT:    ld $25, %call16(i8_16)($gp)
+; MIPS64EL-NEXT:    jalr $25
+; MIPS64EL-NEXT:    nop
+; MIPS64EL-NEXT:    ld $1, %got_disp(gv16i8)($gp)
+; MIPS64EL-NEXT:    sd $3, 8($1)
+; MIPS64EL-NEXT:    sd $2, 0($1)
+; MIPS64EL-NEXT:    ld $gp, 0($sp) # 8-byte Folded Reload
+; MIPS64EL-NEXT:    ld $ra, 8($sp) # 8-byte Folded Reload
+; MIPS64EL-NEXT:    daddiu $sp, $sp, 16
+; MIPS64EL-NEXT:    jr $ra
+; MIPS64EL-NEXT:    nop
 entry:
-; ALL-LABEL: calli8_16:
-; MIPS32-DAG: sw  ${{[0-9]+}}, 28($sp)
-; MIPS32-DAG: sw  ${{[0-9]+}}, 24($sp)
-; MIPS32-DAG: sw  ${{[0-9]+}}, 20($sp)
-; MIPS32-DAG: sw  ${{[0-9]+}}, 16($sp)
-
-; MIPS32: ori $4, ${{[0-9]+}}, {{[0-9]+}}
-; MIPS32: ori $7, ${{[0-9]+}}, {{[0-9]+}}
-; MIPS32: move  $5, ${{[0-9]+}}
-; MIPS32: move  $6, ${{[0-9]+}}
-
-; MIPS32R5-DAG: copy_s.w $4, $w{{[0-9]+}}
-; MIPS32R5-DAG: copy_s.w $5, $w{{[0-9]+}}
-; MIPS32R5-DAG: copy_s.w $6, $w{{[0-9]+}}
-; MIPS32R5-DAG: copy_s.w $7, $w{{[0-9]+}}
-
-; MIPS32R5-DAG: sw  ${{[0-9]+}}, 28($sp)
-; MIPS32R5-DAG: sw  ${{[0-9]+}}, 24($sp)
-; MIPS32R5-DAG: sw  ${{[0-9]+}}, 20($sp)
-; MIPS32R5-DAG: sw  ${{[0-9]+}}, 16($sp)
-
-; MIPS64-DAG: daddiu $4
-; MIPS64-DAG: daddiu $5
-; MIPS64-DAG: daddiu $6
-; MIPS64-DAG: daddiu $7
-
-; MIPS64R5-DAG: copy_s.d $4
-; MIPS64R5-DAG: copy_s.d $5
-; MIPS64R5-DAG: copy_s.d $6
-; MIPS64R5-DAG: copy_s.d $7
-
-; MIPS32: jal i8_16
-; MIPS64: jalr $25
-
-; MIPS32-DAG: sw $5, 12(${{[0-9]+}})
-; MIPS32-DAG: sw $4, 8(${{[0-9]+}})
-; MIPS32-DAG: sw $3, 4(${{[0-9]+}})
-; MIPS32-DAG: sw $2, %lo(gv16i8)(${{[0-9]+}})
-
-; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $2
-; MIPS32R5-DAG: insert.w $w[[W0]][1], $3
-; MIPS32R5-DAG: insert.w $w[[W0]][2], $4
-; MIPS32R5-DAG: insert.w $w[[W0]][3], $5
-; MIPS32R5-DAG: st.w $w[[W0]]
-
-; MIPS64-DAG: sd $3
-; MIPS64-DAG: sd $2
-
-; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][0], $2
-; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][1], $3
-
   %0 = call <16 x i8> @i8_16(<16 x i8> <i8 6, i8 7,i8 6, i8 7,i8 6, i8 7,i8 6, i8 7,i8 6, i8 7,i8 6, i8 7, i8 6, i8 7, i8 9, i8 10>, <16 x i8> <i8 7, i8 9,i8 7, i8 9,i8 7, i8 9,i8 7, i8 9,i8 7, i8 9,i8 7, i8 9,i8 12, i8 8, i8 9, i8 10>)
   store <16 x i8> %0, <16 x i8> * @gv16i8
   ret void
 }
 
 define void @calli16_2() {
+; MIPS32EB-LABEL: calli16_2:
+; MIPS32EB:       # %bb.0: # %entry
+; MIPS32EB-NEXT:    addiu $sp, $sp, -24
+; MIPS32EB-NEXT:    .cfi_def_cfa_offset 24
+; MIPS32EB-NEXT:    sw $ra, 20($sp) # 4-byte Folded Spill
+; MIPS32EB-NEXT:    .cfi_offset 31, -4
+; MIPS32EB-NEXT:    lui $1, 6
+; MIPS32EB-NEXT:    ori $4, $1, 7
+; MIPS32EB-NEXT:    lui $1, 12
+; MIPS32EB-NEXT:    ori $5, $1, 8
+; MIPS32EB-NEXT:    jal i16_2
+; MIPS32EB-NEXT:    nop
+; MIPS32EB-NEXT:    lui $1, %hi(gv2i16)
+; MIPS32EB-NEXT:    sw $2, %lo(gv2i16)($1)
+; MIPS32EB-NEXT:    lw $ra, 20($sp) # 4-byte Folded Reload
+; MIPS32EB-NEXT:    addiu $sp, $sp, 24
+; MIPS32EB-NEXT:    jr $ra
+; MIPS32EB-NEXT:    nop
+;
+; MIPS64EB-LABEL: calli16_2:
+; MIPS64EB:       # %bb.0: # %entry
+; MIPS64EB-NEXT:    daddiu $sp, $sp, -16
+; MIPS64EB-NEXT:    .cfi_def_cfa_offset 16
+; MIPS64EB-NEXT:    sd $ra, 8($sp) # 8-byte Folded Spill
+; MIPS64EB-NEXT:    sd $gp, 0($sp) # 8-byte Folded Spill
+; MIPS64EB-NEXT:    .cfi_offset 31, -8
+; MIPS64EB-NEXT:    .cfi_offset 28, -16
+; MIPS64EB-NEXT:    lui $1, %hi(%neg(%gp_rel(calli16_2)))
+; MIPS64EB-NEXT:    daddu $1, $1, $25
+; MIPS64EB-NEXT:    daddiu $gp, $1, %lo(%neg(%gp_rel(calli16_2)))
+; MIPS64EB-NEXT:    lui $1, 6
+; MIPS64EB-NEXT:    ori $4, $1, 7
+; MIPS64EB-NEXT:    lui $1, 12
+; MIPS64EB-NEXT:    ori $5, $1, 8
+; MIPS64EB-NEXT:    ld $25, %call16(i16_2)($gp)
+; MIPS64EB-NEXT:    jalr $25
+; MIPS64EB-NEXT:    nop
+; MIPS64EB-NEXT:    ld $1, %got_disp(gv2i16)($gp)
+; MIPS64EB-NEXT:    sw $2, 0($1)
+; MIPS64EB-NEXT:    ld $gp, 0($sp) # 8-byte Folded Reload
+; MIPS64EB-NEXT:    ld $ra, 8($sp) # 8-byte Folded Reload
+; MIPS64EB-NEXT:    daddiu $sp, $sp, 16
+; MIPS64EB-NEXT:    jr $ra
+; MIPS64EB-NEXT:    nop
+;
+; MIPS32R5EB-LABEL: calli16_2:
+; MIPS32R5EB:       # %bb.0: # %entry
+; MIPS32R5EB-NEXT:    addiu $sp, $sp, -32
+; MIPS32R5EB-NEXT:    .cfi_def_cfa_offset 32
+; MIPS32R5EB-NEXT:    sw $ra, 28($sp) # 4-byte Folded Spill
+; MIPS32R5EB-NEXT:    .cfi_offset 31, -4
+; MIPS32R5EB-NEXT:    lui $1, 6
+; MIPS32R5EB-NEXT:    ori $4, $1, 7
+; MIPS32R5EB-NEXT:    lui $1, 12
+; MIPS32R5EB-NEXT:    ori $5, $1, 8
+; MIPS32R5EB-NEXT:    jal i16_2
+; MIPS32R5EB-NEXT:    nop
+; MIPS32R5EB-NEXT:    lui $1, %hi(gv2i16)
+; MIPS32R5EB-NEXT:    sw $2, %lo(gv2i16)($1)
+; MIPS32R5EB-NEXT:    lw $ra, 28($sp) # 4-byte Folded Reload
+; MIPS32R5EB-NEXT:    addiu $sp, $sp, 32
+; MIPS32R5EB-NEXT:    jr $ra
+; MIPS32R5EB-NEXT:    nop
+;
+; MIPS64R5EB-LABEL: calli16_2:
+; MIPS64R5EB:       # %bb.0: # %entry
+; MIPS64R5EB-NEXT:    daddiu $sp, $sp, -32
+; MIPS64R5EB-NEXT:    .cfi_def_cfa_offset 32
+; MIPS64R5EB-NEXT:    sd $ra, 24($sp) # 8-byte Folded Spill
+; MIPS64R5EB-NEXT:    sd $gp, 16($sp) # 8-byte Folded Spill
+; MIPS64R5EB-NEXT:    .cfi_offset 31, -8
+; MIPS64R5EB-NEXT:    .cfi_offset 28, -16
+; MIPS64R5EB-NEXT:    lui $1, %hi(%neg(%gp_rel(calli16_2)))
+; MIPS64R5EB-NEXT:    daddu $1, $1, $25
+; MIPS64R5EB-NEXT:    daddiu $gp, $1, %lo(%neg(%gp_rel(calli16_2)))
+; MIPS64R5EB-NEXT:    lui $1, 6
+; MIPS64R5EB-NEXT:    ori $4, $1, 7
+; MIPS64R5EB-NEXT:    lui $1, 12
+; MIPS64R5EB-NEXT:    ori $5, $1, 8
+; MIPS64R5EB-NEXT:    ld $25, %call16(i16_2)($gp)
+; MIPS64R5EB-NEXT:    jalr $25
+; MIPS64R5EB-NEXT:    nop
+; MIPS64R5EB-NEXT:    ld $1, %got_disp(gv2i16)($gp)
+; MIPS64R5EB-NEXT:    sw $2, 0($1)
+; MIPS64R5EB-NEXT:    ld $gp, 16($sp) # 8-byte Folded Reload
+; MIPS64R5EB-NEXT:    ld $ra, 24($sp) # 8-byte Folded Reload
+; MIPS64R5EB-NEXT:    daddiu $sp, $sp, 32
+; MIPS64R5EB-NEXT:    jr $ra
+; MIPS64R5EB-NEXT:    nop
+;
+; MIPS32EL-LABEL: calli16_2:
+; MIPS32EL:       # %bb.0: # %entry
+; MIPS32EL-NEXT:    addiu $sp, $sp, -24
+; MIPS32EL-NEXT:    .cfi_def_cfa_offset 24
+; MIPS32EL-NEXT:    sw $ra, 20($sp) # 4-byte Folded Spill
+; MIPS32EL-NEXT:    .cfi_offset 31, -4
+; MIPS32EL-NEXT:    lui $1, 7
+; MIPS32EL-NEXT:    ori $4, $1, 6
+; MIPS32EL-NEXT:    lui $1, 8
+; MIPS32EL-NEXT:    ori $5, $1, 12
+; MIPS32EL-NEXT:    jal i16_2
+; MIPS32EL-NEXT:    nop
+; MIPS32EL-NEXT:    lui $1, %hi(gv2i16)
+; MIPS32EL-NEXT:    sw $2, %lo(gv2i16)($1)
+; MIPS32EL-NEXT:    lw $ra, 20($sp) # 4-byte Folded Reload
+; MIPS32EL-NEXT:    addiu $sp, $sp, 24
+; MIPS32EL-NEXT:    jr $ra
+; MIPS32EL-NEXT:    nop
+;
+; MIPS64EL-LABEL: calli16_2:
+; MIPS64EL:       # %bb.0: # %entry
+; MIPS64EL-NEXT:    daddiu $sp, $sp, -16
+; MIPS64EL-NEXT:    .cfi_def_cfa_offset 16
+; MIPS64EL-NEXT:    sd $ra, 8($sp) # 8-byte Folded Spill
+; MIPS64EL-NEXT:    sd $gp, 0($sp) # 8-byte Folded Spill
+; MIPS64EL-NEXT:    .cfi_offset 31, -8
+; MIPS64EL-NEXT:    .cfi_offset 28, -16
+; MIPS64EL-NEXT:    lui $1, %hi(%neg(%gp_rel(calli16_2)))
+; MIPS64EL-NEXT:    daddu $1, $1, $25
+; MIPS64EL-NEXT:    daddiu $gp, $1, %lo(%neg(%gp_rel(calli16_2)))
+; MIPS64EL-NEXT:    lui $1, 7
+; MIPS64EL-NEXT:    ori $4, $1, 6
+; MIPS64EL-NEXT:    lui $1, 8
+; MIPS64EL-NEXT:    ori $5, $1, 12
+; MIPS64EL-NEXT:    ld $25, %call16(i16_2)($gp)
+; MIPS64EL-NEXT:    jalr $25
+; MIPS64EL-NEXT:    nop
+; MIPS64EL-NEXT:    ld $1, %got_disp(gv2i16)($gp)
+; MIPS64EL-NEXT:    sw $2, 0($1)
+; MIPS64EL-NEXT:    ld $gp, 0($sp) # 8-byte Folded Reload
+; MIPS64EL-NEXT:    ld $ra, 8($sp) # 8-byte Folded Reload
+; MIPS64EL-NEXT:    daddiu $sp, $sp, 16
+; MIPS64EL-NEXT:    jr $ra
+; MIPS64EL-NEXT:    nop
+;
+; MIPS32R5EL-LABEL: calli16_2:
+; MIPS32R5EL:       # %bb.0: # %entry
+; MIPS32R5EL-NEXT:    addiu $sp, $sp, -32
+; MIPS32R5EL-NEXT:    .cfi_def_cfa_offset 32
+; MIPS32R5EL-NEXT:    sw $ra, 28($sp) # 4-byte Folded Spill
+; MIPS32R5EL-NEXT:    .cfi_offset 31, -4
+; MIPS32R5EL-NEXT:    lui $1, 7
+; MIPS32R5EL-NEXT:    ori $4, $1, 6
+; MIPS32R5EL-NEXT:    lui $1, 8
+; MIPS32R5EL-NEXT:    ori $5, $1, 12
+; MIPS32R5EL-NEXT:    jal i16_2
+; MIPS32R5EL-NEXT:    nop
+; MIPS32R5EL-NEXT:    lui $1, %hi(gv2i16)
+; MIPS32R5EL-NEXT:    sw $2, %lo(gv2i16)($1)
+; MIPS32R5EL-NEXT:    lw $ra, 28($sp) # 4-byte Folded Reload
+; MIPS32R5EL-NEXT:    addiu $sp, $sp, 32
+; MIPS32R5EL-NEXT:    jr $ra
+; MIPS32R5EL-NEXT:    nop
+;
+; MIPS64R5EL-LABEL: calli16_2:
+; MIPS64R5EL:       # %bb.0: # %entry
+; MIPS64R5EL-NEXT:    daddiu $sp, $sp, -32
+; MIPS64R5EL-NEXT:    .cfi_def_cfa_offset 32
+; MIPS64R5EL-NEXT:    sd $ra, 24($sp) # 8-byte Folded Spill
+; MIPS64R5EL-NEXT:    sd $gp, 16($sp) # 8-byte Folded Spill
+; MIPS64R5EL-NEXT:    .cfi_offset 31, -8
+; MIPS64R5EL-NEXT:    .cfi_offset 28, -16
+; MIPS64R5EL-NEXT:    lui $1, %hi(%neg(%gp_rel(calli16_2)))
+; MIPS64R5EL-NEXT:    daddu $1, $1, $25
+; MIPS64R5EL-NEXT:    daddiu $gp, $1, %lo(%neg(%gp_rel(calli16_2)))
+; MIPS64R5EL-NEXT:    lui $1, 7
+; MIPS64R5EL-NEXT:    ori $4, $1, 6
+; MIPS64R5EL-NEXT:    lui $1, 8
+; MIPS64R5EL-NEXT:    ori $5, $1, 12
+; MIPS64R5EL-NEXT:    ld $25, %call16(i16_2)($gp)
+; MIPS64R5EL-NEXT:    jalr $25
+; MIPS64R5EL-NEXT:    nop
+; MIPS64R5EL-NEXT:    ld $1, %got_disp(gv2i16)($gp)
+; MIPS64R5EL-NEXT:    sw $2, 0($1)
+; MIPS64R5EL-NEXT:    ld $gp, 16($sp) # 8-byte Folded Reload
+; MIPS64R5EL-NEXT:    ld $ra, 24($sp) # 8-byte Folded Reload
+; MIPS64R5EL-NEXT:    daddiu $sp, $sp, 32
+; MIPS64R5EL-NEXT:    jr $ra
+; MIPS64R5EL-NEXT:    nop
 entry:
-; ALL-LABEL: calli16_2:
-
-; MIPS32-DAG: ori $4
-; MIPS32-DAG: ori $5
-
-; MIPS32R5-NOT: lw $4
-; MIPS32R5-NOT: lw $5
-
-; MIPS64: ori $4
-; MIPS64: ori $5
-
-; MIPS64R5-NOT: lw $4
-; MIPS64R5-NOT: lw $5
-
-; MIPS32: jal i16_2
-; MIPS64: jalr $25
-
-; MIPS32: sw $2, %lo(gv2i16)
-
-; MIPS32R5: sw $2, %lo(gv2i16)
-
-; MIPS64: sw $2
-
-; MIPS64R6: sw $2
-
   %0 = call <2 x i16> @i16_2(<2 x i16> <i16 6, i16 7>, <2 x i16> <i16 12, i16 8>)
   store <2 x i16> %0, <2 x i16> * @gv2i16
   ret void
 }
 
 define void @calli16_4() {
+; MIPS32EB-LABEL: calli16_4:
+; MIPS32EB:       # %bb.0: # %entry
+; MIPS32EB-NEXT:    addiu $sp, $sp, -24
+; MIPS32EB-NEXT:    .cfi_def_cfa_offset 24
+; MIPS32EB-NEXT:    sw $ra, 20($sp) # 4-byte Folded Spill
+; MIPS32EB-NEXT:    .cfi_offset 31, -4
+; MIPS32EB-NEXT:    lui $1, 6
+; MIPS32EB-NEXT:    ori $4, $1, 7
+; MIPS32EB-NEXT:    lui $1, 12
+; MIPS32EB-NEXT:    ori $6, $1, 8
+; MIPS32EB-NEXT:    lui $1, 9
+; MIPS32EB-NEXT:    ori $5, $1, 10
+; MIPS32EB-NEXT:    move $7, $5
+; MIPS32EB-NEXT:    jal i16_4
+; MIPS32EB-NEXT:    nop
+; MIPS32EB-NEXT:    lui $1, %hi(gv4i16)
+; MIPS32EB-NEXT:    addiu $4, $1, %lo(gv4i16)
+; MIPS32EB-NEXT:    sw $3, 4($4)
+; MIPS32EB-NEXT:    sw $2, %lo(gv4i16)($1)
+; MIPS32EB-NEXT:    lw $ra, 20($sp) # 4-byte Folded Reload
+; MIPS32EB-NEXT:    addiu $sp, $sp, 24
+; MIPS32EB-NEXT:    jr $ra
+; MIPS32EB-NEXT:    nop
+;
+; MIPS64EB-LABEL: calli16_4:
+; MIPS64EB:       # %bb.0: # %entry
+; MIPS64EB-NEXT:    daddiu $sp, $sp, -16
+; MIPS64EB-NEXT:    .cfi_def_cfa_offset 16
+; MIPS64EB-NEXT:    sd $ra, 8($sp) # 8-byte Folded Spill
+; MIPS64EB-NEXT:    sd $gp, 0($sp) # 8-byte Folded Spill
+; MIPS64EB-NEXT:    .cfi_offset 31, -8
+; MIPS64EB-NEXT:    .cfi_offset 28, -16
+; MIPS64EB-NEXT:    lui $1, %hi(%neg(%gp_rel(calli16_4)))
+; MIPS64EB-NEXT:    daddu $1, $1, $25
+; MIPS64EB-NEXT:    daddiu $gp, $1, %lo(%neg(%gp_rel(calli16_4)))
+; MIPS64EB-NEXT:    lui $1, 6
+; MIPS64EB-NEXT:    daddiu $1, $1, 7
+; MIPS64EB-NEXT:    dsll $1, $1, 16
+; MIPS64EB-NEXT:    daddiu $1, $1, 9
+; MIPS64EB-NEXT:    dsll $1, $1, 16
+; MIPS64EB-NEXT:    daddiu $4, $1, 10
+; MIPS64EB-NEXT:    lui $1, 2
+; MIPS64EB-NEXT:    daddiu $1, $1, -32767
+; MIPS64EB-NEXT:    dsll $1, $1, 19
+; MIPS64EB-NEXT:    daddiu $1, $1, 9
+; MIPS64EB-NEXT:    dsll $1, $1, 16
+; MIPS64EB-NEXT:    daddiu $5, $1, 10
+; MIPS64EB-NEXT:    ld $25, %call16(i16_4)($gp)
+; MIPS64EB-NEXT:    jalr $25
+; MIPS64EB-NEXT:    nop
+; MIPS64EB-NEXT:    ld $1, %got_disp(gv4i16)($gp)
+; MIPS64EB-NEXT:    sd $2, 0($1)
+; MIPS64EB-NEXT:    ld $gp, 0($sp) # 8-byte Folded Reload
+; MIPS64EB-NEXT:    ld $ra, 8($sp) # 8-byte Folded Reload
+; MIPS64EB-NEXT:    daddiu $sp, $sp, 16
+; MIPS64EB-NEXT:    jr $ra
+; MIPS64EB-NEXT:    nop
+;
+; MIPS32R5EB-LABEL: calli16_4:
+; MIPS32R5EB:       # %bb.0: # %entry
+; MIPS32R5EB-NEXT:    addiu $sp, $sp, -24
+; MIPS32R5EB-NEXT:    .cfi_def_cfa_offset 24
+; MIPS32R5EB-NEXT:    sw $ra, 20($sp) # 4-byte Folded Spill
+; MIPS32R5EB-NEXT:    .cfi_offset 31, -4
+; MIPS32R5EB-NEXT:    lui $1, 6
+; MIPS32R5EB-NEXT:    ori $4, $1, 7
+; MIPS32R5EB-NEXT:    lui $1, 12
+; MIPS32R5EB-NEXT:    ori $6, $1, 8
+; MIPS32R5EB-NEXT:    lui $1, 9
+; MIPS32R5EB-NEXT:    ori $5, $1, 10
+; MIPS32R5EB-NEXT:    move $7, $5
+; MIPS32R5EB-NEXT:    jal i16_4
+; MIPS32R5EB-NEXT:    nop
+; MIPS32R5EB-NEXT:    lui $1, %hi(gv4i16)
+; MIPS32R5EB-NEXT:    addiu $4, $1, %lo(gv4i16)
+; MIPS32R5EB-NEXT:    sw $3, 4($4)
+; MIPS32R5EB-NEXT:    sw $2, %lo(gv4i16)($1)
+; MIPS32R5EB-NEXT:    lw $ra, 20($sp) # 4-byte Folded Reload
+; MIPS32R5EB-NEXT:    addiu $sp, $sp, 24
+; MIPS32R5EB-NEXT:    jr $ra
+; MIPS32R5EB-NEXT:    nop
+;
+; MIPS64R5EB-LABEL: calli16_4:
+; MIPS64R5EB:       # %bb.0: # %entry
+; MIPS64R5EB-NEXT:    daddiu $sp, $sp, -32
+; MIPS64R5EB-NEXT:    .cfi_def_cfa_offset 32
+; MIPS64R5EB-NEXT:    sd $ra, 24($sp) # 8-byte Folded Spill
+; MIPS64R5EB-NEXT:    sd $gp, 16($sp) # 8-byte Folded Spill
+; MIPS64R5EB-NEXT:    .cfi_offset 31, -8
+; MIPS64R5EB-NEXT:    .cfi_offset 28, -16
+; MIPS64R5EB-NEXT:    lui $1, %hi(%neg(%gp_rel(calli16_4)))
+; MIPS64R5EB-NEXT:    daddu $1, $1, $25
+; MIPS64R5EB-NEXT:    daddiu $gp, $1, %lo(%neg(%gp_rel(calli16_4)))
+; MIPS64R5EB-NEXT:    lui $1, 6
+; MIPS64R5EB-NEXT:    daddiu $1, $1, 7
+; MIPS64R5EB-NEXT:    dsll $1, $1, 16
+; MIPS64R5EB-NEXT:    daddiu $1, $1, 9
+; MIPS64R5EB-NEXT:    dsll $1, $1, 16
+; MIPS64R5EB-NEXT:    daddiu $4, $1, 10
+; MIPS64R5EB-NEXT:    lui $1, 2
+; MIPS64R5EB-NEXT:    daddiu $1, $1, -32767
+; MIPS64R5EB-NEXT:    dsll $1, $1, 19
+; MIPS64R5EB-NEXT:    daddiu $1, $1, 9
+; MIPS64R5EB-NEXT:    dsll $1, $1, 16
+; MIPS64R5EB-NEXT:    daddiu $5, $1, 10
+; MIPS64R5EB-NEXT:    ld $25, %call16(i16_4)($gp)
+; MIPS64R5EB-NEXT:    jalr $25
+; MIPS64R5EB-NEXT:    nop
+; MIPS64R5EB-NEXT:    ld $1, %got_disp(gv4i16)($gp)
+; MIPS64R5EB-NEXT:    sd $2, 0($1)
+; MIPS64R5EB-NEXT:    ld $gp, 16($sp) # 8-byte Folded Reload
+; MIPS64R5EB-NEXT:    ld $ra, 24($sp) # 8-byte Folded Reload
+; MIPS64R5EB-NEXT:    daddiu $sp, $sp, 32
+; MIPS64R5EB-NEXT:    jr $ra
+; MIPS64R5EB-NEXT:    nop
+;
+; MIPS32EL-LABEL: calli16_4:
+; MIPS32EL:       # %bb.0: # %entry
+; MIPS32EL-NEXT:    addiu $sp, $sp, -24
+; MIPS32EL-NEXT:    .cfi_def_cfa_offset 24
+; MIPS32EL-NEXT:    sw $ra, 20($sp) # 4-byte Folded Spill
+; MIPS32EL-NEXT:    .cfi_offset 31, -4
+; MIPS32EL-NEXT:    lui $1, 7
+; MIPS32EL-NEXT:    ori $4, $1, 6
+; MIPS32EL-NEXT:    lui $1, 8
+; MIPS32EL-NEXT:    ori $6, $1, 12
+; MIPS32EL-NEXT:    lui $1, 10
+; MIPS32EL-NEXT:    ori $5, $1, 9
+; MIPS32EL-NEXT:    move $7, $5
+; MIPS32EL-NEXT:    jal i16_4
+; MIPS32EL-NEXT:    nop
+; MIPS32EL-NEXT:    lui $1, %hi(gv4i16)
+; MIPS32EL-NEXT:    addiu $4, $1, %lo(gv4i16)
+; MIPS32EL-NEXT:    sw $3, 4($4)
+; MIPS32EL-NEXT:    sw $2, %lo(gv4i16)($1)
+; MIPS32EL-NEXT:    lw $ra, 20($sp) # 4-byte Folded Reload
+; MIPS32EL-NEXT:    addiu $sp, $sp, 24
+; MIPS32EL-NEXT:    jr $ra
+; MIPS32EL-NEXT:    nop
+;
+; MIPS64EL-LABEL: calli16_4:
+; MIPS64EL:       # %bb.0: # %entry
+; MIPS64EL-NEXT:    daddiu $sp, $sp, -16
+; MIPS64EL-NEXT:    .cfi_def_cfa_offset 16
+; MIPS64EL-NEXT:    sd $ra, 8($sp) # 8-byte Folded Spill
+; MIPS64EL-NEXT:    sd $gp, 0($sp) # 8-byte Folded Spill
+; MIPS64EL-NEXT:    .cfi_offset 31, -8
+; MIPS64EL-NEXT:    .cfi_offset 28, -16
+; MIPS64EL-NEXT:    lui $1, %hi(%neg(%gp_rel(calli16_4)))
+; MIPS64EL-NEXT:    daddu $1, $1, $25
+; MIPS64EL-NEXT:    daddiu $gp, $1, %lo(%neg(%gp_rel(calli16_4)))
+; MIPS64EL-NEXT:    lui $1, 10
+; MIPS64EL-NEXT:    daddiu $1, $1, 9
+; MIPS64EL-NEXT:    dsll $1, $1, 16
+; MIPS64EL-NEXT:    daddiu $1, $1, 7
+; MIPS64EL-NEXT:    dsll $1, $1, 16
+; MIPS64EL-NEXT:    daddiu $4, $1, 6
+; MIPS64EL-NEXT:    lui $1, 1
+; MIPS64EL-NEXT:    daddiu $1, $1, 16385
+; MIPS64EL-NEXT:    dsll $1, $1, 16
+; MIPS64EL-NEXT:    daddiu $1, $1, 8193
+; MIPS64EL-NEXT:    dsll $1, $1, 19
+; MIPS64EL-NEXT:    daddiu $5, $1, 12
+; MIPS64EL-NEXT:    ld $25, %call16(i16_4)($gp)
+; MIPS64EL-NEXT:    jalr $25
+; MIPS64EL-NEXT:    nop
+; MIPS64EL-NEXT:    ld $1, %got_disp(gv4i16)($gp)
+; MIPS64EL-NEXT:    sd $2, 0($1)
+; MIPS64EL-NEXT:    ld $gp, 0($sp) # 8-byte Folded Reload
+; MIPS64EL-NEXT:    ld $ra, 8($sp) # 8-byte Folded Reload
+; MIPS64EL-NEXT:    daddiu $sp, $sp, 16
+; MIPS64EL-NEXT:    jr $ra
+; MIPS64EL-NEXT:    nop
+;
+; MIPS32R5EL-LABEL: calli16_4:
+; MIPS32R5EL:       # %bb.0: # %entry
+; MIPS32R5EL-NEXT:    addiu $sp, $sp, -24
+; MIPS32R5EL-NEXT:    .cfi_def_cfa_offset 24
+; MIPS32R5EL-NEXT:    sw $ra, 20($sp) # 4-byte Folded Spill
+; MIPS32R5EL-NEXT:    .cfi_offset 31, -4
+; MIPS32R5EL-NEXT:    lui $1, 7
+; MIPS32R5EL-NEXT:    ori $4, $1, 6
+; MIPS32R5EL-NEXT:    lui $1, 8
+; MIPS32R5EL-NEXT:    ori $6, $1, 12
+; MIPS32R5EL-NEXT:    lui $1, 10
+; MIPS32R5EL-NEXT:    ori $5, $1, 9
+; MIPS32R5EL-NEXT:    move $7, $5
+; MIPS32R5EL-NEXT:    jal i16_4
+; MIPS32R5EL-NEXT:    nop
+; MIPS32R5EL-NEXT:    lui $1, %hi(gv4i16)
+; MIPS32R5EL-NEXT:    addiu $4, $1, %lo(gv4i16)
+; MIPS32R5EL-NEXT:    sw $3, 4($4)
+; MIPS32R5EL-NEXT:    sw $2, %lo(gv4i16)($1)
+; MIPS32R5EL-NEXT:    lw $ra, 20($sp) # 4-byte Folded Reload
+; MIPS32R5EL-NEXT:    addiu $sp, $sp, 24
+; MIPS32R5EL-NEXT:    jr $ra
+; MIPS32R5EL-NEXT:    nop
+;
+; MIPS64R5EL-LABEL: calli16_4:
+; MIPS64R5EL:       # %bb.0: # %entry
+; MIPS64R5EL-NEXT:    daddiu $sp, $sp, -32
+; MIPS64R5EL-NEXT:    .cfi_def_cfa_offset 32
+; MIPS64R5EL-NEXT:    sd $ra, 24($sp) # 8-byte Folded Spill
+; MIPS64R5EL-NEXT:    sd $gp, 16($sp) # 8-byte Folded Spill
+; MIPS64R5EL-NEXT:    .cfi_offset 31, -8
+; MIPS64R5EL-NEXT:    .cfi_offset 28, -16
+; MIPS64R5EL-NEXT:    lui $1, %hi(%neg(%gp_rel(calli16_4)))
+; MIPS64R5EL-NEXT:    daddu $1, $1, $25
+; MIPS64R5EL-NEXT:    daddiu $gp, $1, %lo(%neg(%gp_rel(calli16_4)))
+; MIPS64R5EL-NEXT:    lui $1, 10
+; MIPS64R5EL-NEXT:    daddiu $1, $1, 9
+; MIPS64R5EL-NEXT:    dsll $1, $1, 16
+; MIPS64R5EL-NEXT:    daddiu $1, $1, 7
+; MIPS64R5EL-NEXT:    dsll $1, $1, 16
+; MIPS64R5EL-NEXT:    daddiu $4, $1, 6
+; MIPS64R5EL-NEXT:    lui $1, 1
+; MIPS64R5EL-NEXT:    daddiu $1, $1, 16385
+; MIPS64R5EL-NEXT:    dsll $1, $1, 16
+; MIPS64R5EL-NEXT:    daddiu $1, $1, 8193
+; MIPS64R5EL-NEXT:    dsll $1, $1, 19
+; MIPS64R5EL-NEXT:    daddiu $5, $1, 12
+; MIPS64R5EL-NEXT:    ld $25, %call16(i16_4)($gp)
+; MIPS64R5EL-NEXT:    jalr $25
+; MIPS64R5EL-NEXT:    nop
+; MIPS64R5EL-NEXT:    ld $1, %got_disp(gv4i16)($gp)
+; MIPS64R5EL-NEXT:    sd $2, 0($1)
+; MIPS64R5EL-NEXT:    ld $gp, 16($sp) # 8-byte Folded Reload
+; MIPS64R5EL-NEXT:    ld $ra, 24($sp) # 8-byte Folded Reload
+; MIPS64R5EL-NEXT:    daddiu $sp, $sp, 32
+; MIPS64R5EL-NEXT:    jr $ra
+; MIPS64R5EL-NEXT:    nop
 entry:
-; ALL-LABEL: calli16_4:
-; MIPS32-DAG: ori $4
-; MIPS32-DAG: ori $5
-; MIPS32-DAG: ori $6
-; MIPS32-DAG: move $7
-
-; MIPS32R5-DAG: ori $4
-; MIPS32R5-DAG: ori $5
-; MIPS32R5-DAG: ori $6
-; MIPS32R5-DAG: move $7
-
-; MIPS64-DAG: daddiu $4
-; MIPS64-DAG: daddiu $5
-
-; MIPS64R5-NOT: ld $4
-; MIPS64R5-NOT: ld $5
-
-; MIPS32: jal i16_4
-; MIPS64: jalr $25
-
-; MIPS32-DAG: sw $3, 4(${{[0-9]+}})
-; MIPS32-DAG: sw $2, %lo(gv4i16)(${{[0-9]+}})
-
-; MIPS32R5-DAG: sw $3, 4(${{[0-9]+}})
-; MIPS32R5-DAG: sw $2, %lo(gv4i16)(${{[0-9]+}})
-
-; MIPS64: sd $2
-; MIPS64R5: sd $2
-
   %0 = call <4 x i16> @i16_4(<4 x i16> <i16 6, i16 7, i16 9, i16 10>, <4 x i16> <i16 12, i16 8, i16 9, i16 10>)
   store <4 x i16> %0, <4 x i16> * @gv4i16
   ret void
 }
 
 define void @calli16_8() {
+; MIPS32EB-LABEL: calli16_8:
+; MIPS32EB:       # %bb.0: # %entry
+; MIPS32EB-NEXT:    addiu $sp, $sp, -40
+; MIPS32EB-NEXT:    .cfi_def_cfa_offset 40
+; MIPS32EB-NEXT:    sw $ra, 36($sp) # 4-byte Folded Spill
+; MIPS32EB-NEXT:    .cfi_offset 31, -4
+; MIPS32EB-NEXT:    lui $1, 9
+; MIPS32EB-NEXT:    ori $5, $1, 10
+; MIPS32EB-NEXT:    sw $5, 28($sp)
+; MIPS32EB-NEXT:    lui $1, 12
+; MIPS32EB-NEXT:    ori $1, $1, 8
+; MIPS32EB-NEXT:    sw $1, 24($sp)
+; MIPS32EB-NEXT:    sw $5, 20($sp)
+; MIPS32EB-NEXT:    lui $1, 6
+; MIPS32EB-NEXT:    ori $4, $1, 7
+; MIPS32EB-NEXT:    sw $4, 16($sp)
+; MIPS32EB-NEXT:    move $6, $4
+; MIPS32EB-NEXT:    move $7, $5
+; MIPS32EB-NEXT:    jal i16_8
+; MIPS32EB-NEXT:    nop
+; MIPS32EB-NEXT:    lui $1, %hi(gv8i16)
+; MIPS32EB-NEXT:    addiu $6, $1, %lo(gv8i16)
+; MIPS32EB-NEXT:    sw $5, 12($6)
+; MIPS32EB-NEXT:    sw $4, 8($6)
+; MIPS32EB-NEXT:    sw $3, 4($6)
+; MIPS32EB-NEXT:    sw $2, %lo(gv8i16)($1)
+; MIPS32EB-NEXT:    lw $ra, 36($sp) # 4-byte Folded Reload
+; MIPS32EB-NEXT:    addiu $sp, $sp, 40
+; MIPS32EB-NEXT:    jr $ra
+; MIPS32EB-NEXT:    nop
+;
+; MIPS64EB-LABEL: calli16_8:
+; MIPS64EB:       # %bb.0: # %entry
+; MIPS64EB-NEXT:    daddiu $sp, $sp, -16
+; MIPS64EB-NEXT:    .cfi_def_cfa_offset 16
+; MIPS64EB-NEXT:    sd $ra, 8($sp) # 8-byte Folded Spill
+; MIPS64EB-NEXT:    sd $gp, 0($sp) # 8-byte Folded Spill
+; MIPS64EB-NEXT:    .cfi_offset 31, -8
+; MIPS64EB-NEXT:    .cfi_offset 28, -16
+; MIPS64EB-NEXT:    lui $1, %hi(%neg(%gp_rel(calli16_8)))
+; MIPS64EB-NEXT:    daddu $1, $1, $25
+; MIPS64EB-NEXT:    daddiu $gp, $1, %lo(%neg(%gp_rel(calli16_8)))
+; MIPS64EB-NEXT:    lui $1, 6
+; MIPS64EB-NEXT:    daddiu $1, $1, 7
+; MIPS64EB-NEXT:    dsll $1, $1, 16
+; MIPS64EB-NEXT:    daddiu $1, $1, 9
+; MIPS64EB-NEXT:    dsll $1, $1, 16
+; MIPS64EB-NEXT:    daddiu $4, $1, 10
+; MIPS64EB-NEXT:    lui $1, 2
+; MIPS64EB-NEXT:    daddiu $1, $1, -32767
+; MIPS64EB-NEXT:    dsll $1, $1, 19
+; MIPS64EB-NEXT:    daddiu $1, $1, 9
+; MIPS64EB-NEXT:    dsll $1, $1, 16
+; MIPS64EB-NEXT:    daddiu $7, $1, 10
+; MIPS64EB-NEXT:    ld $25, %call16(i16_8)($gp)
+; MIPS64EB-NEXT:    move $5, $4
+; MIPS64EB-NEXT:    move $6, $4
+; MIPS64EB-NEXT:    jalr $25
+; MIPS64EB-NEXT:    nop
+; MIPS64EB-NEXT:    ld $1, %got_disp(gv8i16)($gp)
+; MIPS64EB-NEXT:    sd $3, 8($1)
+; MIPS64EB-NEXT:    sd $2, 0($1)
+; MIPS64EB-NEXT:    ld $gp, 0($sp) # 8-byte Folded Reload
+; MIPS64EB-NEXT:    ld $ra, 8($sp) # 8-byte Folded Reload
+; MIPS64EB-NEXT:    daddiu $sp, $sp, 16
+; MIPS64EB-NEXT:    jr $ra
+; MIPS64EB-NEXT:    nop
+;
+; MIPS32R5EB-LABEL: calli16_8:
+; MIPS32R5EB:       # %bb.0: # %entry
+; MIPS32R5EB-NEXT:    addiu $sp, $sp, -40
+; MIPS32R5EB-NEXT:    .cfi_def_cfa_offset 40
+; MIPS32R5EB-NEXT:    sw $ra, 36($sp) # 4-byte Folded Spill
+; MIPS32R5EB-NEXT:    .cfi_offset 31, -4
+; MIPS32R5EB-NEXT:    lui $1, 6
+; MIPS32R5EB-NEXT:    ori $1, $1, 7
+; MIPS32R5EB-NEXT:    lui $2, 9
+; MIPS32R5EB-NEXT:    ori $2, $2, 10
+; MIPS32R5EB-NEXT:    fill.w $w0, $2
+; MIPS32R5EB-NEXT:    insert.w $w0[1], $1
+; MIPS32R5EB-NEXT:    splati.d $w0, $w0[0]
+; MIPS32R5EB-NEXT:    copy_s.w $4, $w0[0]
+; MIPS32R5EB-NEXT:    copy_s.w $5, $w0[1]
+; MIPS32R5EB-NEXT:    copy_s.w $6, $w0[2]
+; MIPS32R5EB-NEXT:    copy_s.w $7, $w0[3]
+; MIPS32R5EB-NEXT:    lui $1, %hi($CPI33_0)
+; MIPS32R5EB-NEXT:    addiu $1, $1, %lo($CPI33_0)
+; MIPS32R5EB-NEXT:    ld.w $w0, 0($1)
+; MIPS32R5EB-NEXT:    copy_s.w $1, $w0[0]
+; MIPS32R5EB-NEXT:    copy_s.w $2, $w0[1]
+; MIPS32R5EB-NEXT:    copy_s.w $3, $w0[2]
+; MIPS32R5EB-NEXT:    copy_s.w $8, $w0[3]
+; MIPS32R5EB-NEXT:    sw $8, 28($sp)
+; MIPS32R5EB-NEXT:    sw $3, 24($sp)
+; MIPS32R5EB-NEXT:    sw $2, 20($sp)
+; MIPS32R5EB-NEXT:    sw $1, 16($sp)
+; MIPS32R5EB-NEXT:    jal i16_8
+; MIPS32R5EB-NEXT:    nop
+; MIPS32R5EB-NEXT:    lui $1, %hi(gv8i16)
+; MIPS32R5EB-NEXT:    addiu $1, $1, %lo(gv8i16)
+; MIPS32R5EB-NEXT:    ldi.b $w0, 0
+; MIPS32R5EB-NEXT:    insert.w $w0[0], $2
+; MIPS32R5EB-NEXT:    insert.w $w0[1], $3
+; MIPS32R5EB-NEXT:    insert.w $w0[2], $4
+; MIPS32R5EB-NEXT:    insert.w $w0[3], $5
+; MIPS32R5EB-NEXT:    st.w $w0, 0($1)
+; MIPS32R5EB-NEXT:    lw $ra, 36($sp) # 4-byte Folded Reload
+; MIPS32R5EB-NEXT:    addiu $sp, $sp, 40
+; MIPS32R5EB-NEXT:    jr $ra
+; MIPS32R5EB-NEXT:    nop
+;
+; MIPS64R5EB-LABEL: calli16_8:
+; MIPS64R5EB:       # %bb.0: # %entry
+; MIPS64R5EB-NEXT:    daddiu $sp, $sp, -16
+; MIPS64R5EB-NEXT:    .cfi_def_cfa_offset 16
+; MIPS64R5EB-NEXT:    sd $ra, 8($sp) # 8-byte Folded Spill
+; MIPS64R5EB-NEXT:    sd $gp, 0($sp) # 8-byte Folded Spill
+; MIPS64R5EB-NEXT:    .cfi_offset 31, -8
+; MIPS64R5EB-NEXT:    .cfi_offset 28, -16
+; MIPS64R5EB-NEXT:    lui $1, %hi(%neg(%gp_rel(calli16_8)))
+; MIPS64R5EB-NEXT:    daddu $1, $1, $25
+; MIPS64R5EB-NEXT:    daddiu $gp, $1, %lo(%neg(%gp_rel(calli16_8)))
+; MIPS64R5EB-NEXT:    lui $1, 9
+; MIPS64R5EB-NEXT:    ori $1, $1, 10
+; MIPS64R5EB-NEXT:    lui $2, 6
+; MIPS64R5EB-NEXT:    ori $2, $2, 7
+; MIPS64R5EB-NEXT:    dinsu $1, $2, 32, 32
+; MIPS64R5EB-NEXT:    fill.d $w0, $1
+; MIPS64R5EB-NEXT:    copy_s.d $4, $w0[0]
+; MIPS64R5EB-NEXT:    copy_s.d $5, $w0[1]
+; MIPS64R5EB-NEXT:    ld $1, %got_page(.LCPI33_0)($gp)
+; MIPS64R5EB-NEXT:    daddiu $1, $1, %got_ofst(.LCPI33_0)
+; MIPS64R5EB-NEXT:    ld.d $w0, 0($1)
+; MIPS64R5EB-NEXT:    copy_s.d $6, $w0[0]
+; MIPS64R5EB-NEXT:    copy_s.d $7, $w0[1]
+; MIPS64R5EB-NEXT:    ld $25, %call16(i16_8)($gp)
+; MIPS64R5EB-NEXT:    jalr $25
+; MIPS64R5EB-NEXT:    nop
+; MIPS64R5EB-NEXT:    ldi.b $w0, 0
+; MIPS64R5EB-NEXT:    ld $1, %got_disp(gv8i16)($gp)
+; MIPS64R5EB-NEXT:    insert.d $w0[0], $2
+; MIPS64R5EB-NEXT:    insert.d $w0[1], $3
+; MIPS64R5EB-NEXT:    st.d $w0, 0($1)
+; MIPS64R5EB-NEXT:    ld $gp, 0($sp) # 8-byte Folded Reload
+; MIPS64R5EB-NEXT:    ld $ra, 8($sp) # 8-byte Folded Reload
+; MIPS64R5EB-NEXT:    daddiu $sp, $sp, 16
+; MIPS64R5EB-NEXT:    jr $ra
+; MIPS64R5EB-NEXT:    nop
+;
+; MIPS32EL-LABEL: calli16_8:
+; MIPS32EL:       # %bb.0: # %entry
+; MIPS32EL-NEXT:    addiu $sp, $sp, -40
+; MIPS32EL-NEXT:    .cfi_def_cfa_offset 40
+; MIPS32EL-NEXT:    sw $ra, 36($sp) # 4-byte Folded Spill
+; MIPS32EL-NEXT:    .cfi_offset 31, -4
+; MIPS32EL-NEXT:    lui $1, 10
+; MIPS32EL-NEXT:    ori $5, $1, 9
+; MIPS32EL-NEXT:    sw $5, 28($sp)
+; MIPS32EL-NEXT:    lui $1, 8
+; MIPS32EL-NEXT:    ori $1, $1, 12
+; MIPS32EL-NEXT:    sw $1, 24($sp)
+; MIPS32EL-NEXT:    sw $5, 20($sp)
+; MIPS32EL-NEXT:    lui $1, 7
+; MIPS32EL-NEXT:    ori $4, $1, 6
+; MIPS32EL-NEXT:    sw $4, 16($sp)
+; MIPS32EL-NEXT:    move $6, $4
+; MIPS32EL-NEXT:    move $7, $5
+; MIPS32EL-NEXT:    jal i16_8
+; MIPS32EL-NEXT:    nop
+; MIPS32EL-NEXT:    lui $1, %hi(gv8i16)
+; MIPS32EL-NEXT:    addiu $6, $1, %lo(gv8i16)
+; MIPS32EL-NEXT:    sw $5, 12($6)
+; MIPS32EL-NEXT:    sw $4, 8($6)
+; MIPS32EL-NEXT:    sw $3, 4($6)
+; MIPS32EL-NEXT:    sw $2, %lo(gv8i16)($1)
+; MIPS32EL-NEXT:    lw $ra, 36($sp) # 4-byte Folded Reload
+; MIPS32EL-NEXT:    addiu $sp, $sp, 40
+; MIPS32EL-NEXT:    jr $ra
+; MIPS32EL-NEXT:    nop
+;
+; MIPS64EL-LABEL: calli16_8:
+; MIPS64EL:       # %bb.0: # %entry
+; MIPS64EL-NEXT:    daddiu $sp, $sp, -16
+; MIPS64EL-NEXT:    .cfi_def_cfa_offset 16
+; MIPS64EL-NEXT:    sd $ra, 8($sp) # 8-byte Folded Spill
+; MIPS64EL-NEXT:    sd $gp, 0($sp) # 8-byte Folded Spill
+; MIPS64EL-NEXT:    .cfi_offset 31, -8
+; MIPS64EL-NEXT:    .cfi_offset 28, -16
+; MIPS64EL-NEXT:    lui $1, %hi(%neg(%gp_rel(calli16_8)))
+; MIPS64EL-NEXT:    daddu $1, $1, $25
+; MIPS64EL-NEXT:    daddiu $gp, $1, %lo(%neg(%gp_rel(calli16_8)))
+; MIPS64EL-NEXT:    lui $1, 10
+; MIPS64EL-NEXT:    daddiu $1, $1, 9
+; MIPS64EL-NEXT:    dsll $1, $1, 16
+; MIPS64EL-NEXT:    daddiu $1, $1, 7
+; MIPS64EL-NEXT:    dsll $1, $1, 16
+; MIPS64EL-NEXT:    daddiu $4, $1, 6
+; MIPS64EL-NEXT:    lui $1, 1
+; MIPS64EL-NEXT:    daddiu $1, $1, 16385
+; MIPS64EL-NEXT:    dsll $1, $1, 16
+; MIPS64EL-NEXT:    daddiu $1, $1, 8193
+; MIPS64EL-NEXT:    dsll $1, $1, 19
+; MIPS64EL-NEXT:    daddiu $7, $1, 12
+; MIPS64EL-NEXT:    ld $25, %call16(i16_8)($gp)
+; MIPS64EL-NEXT:    move $5, $4
+; MIPS64EL-NEXT:    move $6, $4
+; MIPS64EL-NEXT:    jalr $25
+; MIPS64EL-NEXT:    nop
+; MIPS64EL-NEXT:    ld $1, %got_disp(gv8i16)($gp)
+; MIPS64EL-NEXT:    sd $3, 8($1)
+; MIPS64EL-NEXT:    sd $2, 0($1)
+; MIPS64EL-NEXT:    ld $gp, 0($sp) # 8-byte Folded Reload
+; MIPS64EL-NEXT:    ld $ra, 8($sp) # 8-byte Folded Reload
+; MIPS64EL-NEXT:    daddiu $sp, $sp, 16
+; MIPS64EL-NEXT:    jr $ra
+; MIPS64EL-NEXT:    nop
+;
+; MIPS32R5EL-LABEL: calli16_8:
+; MIPS32R5EL:       # %bb.0: # %entry
+; MIPS32R5EL-NEXT:    addiu $sp, $sp, -40
+; MIPS32R5EL-NEXT:    .cfi_def_cfa_offset 40
+; MIPS32R5EL-NEXT:    sw $ra, 36($sp) # 4-byte Folded Spill
+; MIPS32R5EL-NEXT:    .cfi_offset 31, -4
+; MIPS32R5EL-NEXT:    lui $1, 10
+; MIPS32R5EL-NEXT:    ori $1, $1, 9
+; MIPS32R5EL-NEXT:    lui $2, 7
+; MIPS32R5EL-NEXT:    ori $2, $2, 6
+; MIPS32R5EL-NEXT:    fill.w $w0, $2
+; MIPS32R5EL-NEXT:    insert.w $w0[1], $1
+; MIPS32R5EL-NEXT:    splati.d $w0, $w0[0]
+; MIPS32R5EL-NEXT:    copy_s.w $4, $w0[0]
+; MIPS32R5EL-NEXT:    copy_s.w $5, $w0[1]
+; MIPS32R5EL-NEXT:    copy_s.w $6, $w0[2]
+; MIPS32R5EL-NEXT:    copy_s.w $7, $w0[3]
+; MIPS32R5EL-NEXT:    lui $1, %hi($CPI33_0)
+; MIPS32R5EL-NEXT:    addiu $1, $1, %lo($CPI33_0)
+; MIPS32R5EL-NEXT:    ld.w $w0, 0($1)
+; MIPS32R5EL-NEXT:    copy_s.w $1, $w0[0]
+; MIPS32R5EL-NEXT:    copy_s.w $2, $w0[1]
+; MIPS32R5EL-NEXT:    copy_s.w $3, $w0[2]
+; MIPS32R5EL-NEXT:    copy_s.w $8, $w0[3]
+; MIPS32R5EL-NEXT:    sw $8, 28($sp)
+; MIPS32R5EL-NEXT:    sw $3, 24($sp)
+; MIPS32R5EL-NEXT:    sw $2, 20($sp)
+; MIPS32R5EL-NEXT:    sw $1, 16($sp)
+; MIPS32R5EL-NEXT:    jal i16_8
+; MIPS32R5EL-NEXT:    nop
+; MIPS32R5EL-NEXT:    lui $1, %hi(gv8i16)
+; MIPS32R5EL-NEXT:    addiu $1, $1, %lo(gv8i16)
+; MIPS32R5EL-NEXT:    ldi.b $w0, 0
+; MIPS32R5EL-NEXT:    insert.w $w0[0], $2
+; MIPS32R5EL-NEXT:    insert.w $w0[1], $3
+; MIPS32R5EL-NEXT:    insert.w $w0[2], $4
+; MIPS32R5EL-NEXT:    insert.w $w0[3], $5
+; MIPS32R5EL-NEXT:    st.w $w0, 0($1)
+; MIPS32R5EL-NEXT:    lw $ra, 36($sp) # 4-byte Folded Reload
+; MIPS32R5EL-NEXT:    addiu $sp, $sp, 40
+; MIPS32R5EL-NEXT:    jr $ra
+; MIPS32R5EL-NEXT:    nop
+;
+; MIPS64R5EL-LABEL: calli16_8:
+; MIPS64R5EL:       # %bb.0: # %entry
+; MIPS64R5EL-NEXT:    daddiu $sp, $sp, -16
+; MIPS64R5EL-NEXT:    .cfi_def_cfa_offset 16
+; MIPS64R5EL-NEXT:    sd $ra, 8($sp) # 8-byte Folded Spill
+; MIPS64R5EL-NEXT:    sd $gp, 0($sp) # 8-byte Folded Spill
+; MIPS64R5EL-NEXT:    .cfi_offset 31, -8
+; MIPS64R5EL-NEXT:    .cfi_offset 28, -16
+; MIPS64R5EL-NEXT:    lui $1, %hi(%neg(%gp_rel(calli16_8)))
+; MIPS64R5EL-NEXT:    daddu $1, $1, $25
+; MIPS64R5EL-NEXT:    daddiu $gp, $1, %lo(%neg(%gp_rel(calli16_8)))
+; MIPS64R5EL-NEXT:    lui $1, 7
+; MIPS64R5EL-NEXT:    ori $1, $1, 6
+; MIPS64R5EL-NEXT:    lui $2, 10
+; MIPS64R5EL-NEXT:    ori $2, $2, 9
+; MIPS64R5EL-NEXT:    dinsu $1, $2, 32, 32
+; MIPS64R5EL-NEXT:    fill.d $w0, $1
+; MIPS64R5EL-NEXT:    copy_s.d $4, $w0[0]
+; MIPS64R5EL-NEXT:    copy_s.d $5, $w0[1]
+; MIPS64R5EL-NEXT:    ld $1, %got_page(.LCPI33_0)($gp)
+; MIPS64R5EL-NEXT:    daddiu $1, $1, %got_ofst(.LCPI33_0)
+; MIPS64R5EL-NEXT:    ld.d $w0, 0($1)
+; MIPS64R5EL-NEXT:    copy_s.d $6, $w0[0]
+; MIPS64R5EL-NEXT:    copy_s.d $7, $w0[1]
+; MIPS64R5EL-NEXT:    ld $25, %call16(i16_8)($gp)
+; MIPS64R5EL-NEXT:    jalr $25
+; MIPS64R5EL-NEXT:    nop
+; MIPS64R5EL-NEXT:    ldi.b $w0, 0
+; MIPS64R5EL-NEXT:    ld $1, %got_disp(gv8i16)($gp)
+; MIPS64R5EL-NEXT:    insert.d $w0[0], $2
+; MIPS64R5EL-NEXT:    insert.d $w0[1], $3
+; MIPS64R5EL-NEXT:    st.d $w0, 0($1)
+; MIPS64R5EL-NEXT:    ld $gp, 0($sp) # 8-byte Folded Reload
+; MIPS64R5EL-NEXT:    ld $ra, 8($sp) # 8-byte Folded Reload
+; MIPS64R5EL-NEXT:    daddiu $sp, $sp, 16
+; MIPS64R5EL-NEXT:    jr $ra
+; MIPS64R5EL-NEXT:    nop
 entry:
-; ALL-LABEL: calli16_8:
-
-; MIPS32-DAG: sw  ${{[0-9]+}}, 28($sp)
-; MIPS32-DAG: sw  ${{[0-9]+}}, 24($sp)
-; MIPS32-DAG: sw  ${{[0-9]+}}, 20($sp)
-; MIPS32-DAG: sw  ${{[0-9]+}}, 16($sp)
-
-; MIPS32-DAG: ori $4, ${{[0-9]+}}, {{[0-9]+}}
-; MIPS32-DAG: ori $5, ${{[0-9]+}}, {{[0-9]+}}
-; MIPS32-DAG: move  $6, ${{[0-9]+}}
-; MIPS32-DAG: move  $7, ${{[0-9]+}}
-
-; MIPS32R5-DAG: copy_s.w $4, $w{{[0-9]+}}
-; MIPS32R5-DAG: copy_s.w $5, $w{{[0-9]+}}
-; MIPS32R5-DAG: copy_s.w $6, $w{{[0-9]+}}
-; MIPS32R5-DAG: copy_s.w $7, $w{{[0-9]+}}
-
-; MIPS32R5-DAG: sw  ${{[0-9]+}}, 28($sp)
-; MIPS32R5-DAG: sw  ${{[0-9]+}}, 24($sp)
-; MIPS32R5-DAG: sw  ${{[0-9]+}}, 20($sp)
-; MIPS32R5-DAG: sw  ${{[0-9]+}}, 16($sp)
-
-; MIPS64-DAG: daddiu $4
-; MIPS64-DAG: daddiu $7
-; MIPS64-DAG: move $5
-; MIPS64-DAG: move $6
-
-; MIPS64R5-DAG: copy_s.d $4, $w[[W0:[0-9]+]][0]
-; MIPS64R5-DAG: copy_s.d $5, $w[[W0]][1]
-; MIPS64R5-DAG: copy_s.d $6, $w[[W1:[0-9]+]][0]
-; MIPS64R5-DAG: copy_s.d $7, $w[[W1]][1]
-
-; MIPS32: jal i16_8
-; MIPS64: jalr $25
-
-; MIPS32-DAG: sw $5, 12(${{[0-9]+}})
-; MIPS32-DAG: sw $4, 8(${{[0-9]+}})
-; MIPS32-DAG: sw $3, 4(${{[0-9]+}})
-; MIPS32-DAG: sw $2, %lo(gv8i16)(${{[0-9]+}})
-
-; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $2
-; MIPS32R5-DAG: insert.w $w[[W0]][1], $3
-; MIPS32R5-DAG: insert.w $w[[W0]][2], $4
-; MIPS32R5-DAG: insert.w $w[[W0]][3], $5
-; MIPS32R5-DAG: st.w $w[[W0]]
-
-; MIPS64: sd $3
-; MIPS64: sd $2
-
-; MIPS64R5-DAG: insert.d $w[[W2:[0-9]+]][0], $2
-; MIPS64R5-DAG: insert.d $w[[W2]][1], $3
-
   %0 = call <8 x i16> @i16_8(<8 x i16> <i16 6, i16 7, i16 9, i16 10, i16 6, i16 7, i16 9, i16 10>, <8 x i16> <i16 6, i16 7, i16 9, i16 10, i16 12, i16 8, i16 9, i16 10>)
   store <8 x i16> %0, <8 x i16> * @gv8i16
   ret void
 }
 
 define void @calli32_2() {
+; MIPS32-LABEL: calli32_2:
+; MIPS32:       # %bb.0: # %entry
+; MIPS32-NEXT:    addiu $sp, $sp, -24
+; MIPS32-NEXT:    .cfi_def_cfa_offset 24
+; MIPS32-NEXT:    sw $ra, 20($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    .cfi_offset 31, -4
+; MIPS32-NEXT:    addiu $4, $zero, 6
+; MIPS32-NEXT:    addiu $5, $zero, 7
+; MIPS32-NEXT:    addiu $6, $zero, 12
+; MIPS32-NEXT:    addiu $7, $zero, 8
+; MIPS32-NEXT:    jal i32_2
+; MIPS32-NEXT:    nop
+; MIPS32-NEXT:    lui $1, %hi(gv2i32)
+; MIPS32-NEXT:    addiu $4, $1, %lo(gv2i32)
+; MIPS32-NEXT:    sw $3, 4($4)
+; MIPS32-NEXT:    sw $2, %lo(gv2i32)($1)
+; MIPS32-NEXT:    lw $ra, 20($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    addiu $sp, $sp, 24
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
+;
+; MIPS64EB-LABEL: calli32_2:
+; MIPS64EB:       # %bb.0: # %entry
+; MIPS64EB-NEXT:    daddiu $sp, $sp, -16
+; MIPS64EB-NEXT:    .cfi_def_cfa_offset 16
+; MIPS64EB-NEXT:    sd $ra, 8($sp) # 8-byte Folded Spill
+; MIPS64EB-NEXT:    sd $gp, 0($sp) # 8-byte Folded Spill
+; MIPS64EB-NEXT:    .cfi_offset 31, -8
+; MIPS64EB-NEXT:    .cfi_offset 28, -16
+; MIPS64EB-NEXT:    lui $1, %hi(%neg(%gp_rel(calli32_2)))
+; MIPS64EB-NEXT:    daddu $1, $1, $25
+; MIPS64EB-NEXT:    daddiu $gp, $1, %lo(%neg(%gp_rel(calli32_2)))
+; MIPS64EB-NEXT:    daddiu $1, $zero, 3
+; MIPS64EB-NEXT:    dsll $2, $1, 33
+; MIPS64EB-NEXT:    daddiu $4, $2, 7
+; MIPS64EB-NEXT:    dsll $1, $1, 34
+; MIPS64EB-NEXT:    daddiu $5, $1, 8
+; MIPS64EB-NEXT:    ld $25, %call16(i32_2)($gp)
+; MIPS64EB-NEXT:    jalr $25
+; MIPS64EB-NEXT:    nop
+; MIPS64EB-NEXT:    ld $1, %got_disp(gv2i32)($gp)
+; MIPS64EB-NEXT:    sd $2, 0($1)
+; MIPS64EB-NEXT:    ld $gp, 0($sp) # 8-byte Folded Reload
+; MIPS64EB-NEXT:    ld $ra, 8($sp) # 8-byte Folded Reload
+; MIPS64EB-NEXT:    daddiu $sp, $sp, 16
+; MIPS64EB-NEXT:    jr $ra
+; MIPS64EB-NEXT:    nop
+;
+; MIPS32R5-LABEL: calli32_2:
+; MIPS32R5:       # %bb.0: # %entry
+; MIPS32R5-NEXT:    addiu $sp, $sp, -24
+; MIPS32R5-NEXT:    .cfi_def_cfa_offset 24
+; MIPS32R5-NEXT:    sw $ra, 20($sp) # 4-byte Folded Spill
+; MIPS32R5-NEXT:    .cfi_offset 31, -4
+; MIPS32R5-NEXT:    addiu $4, $zero, 6
+; MIPS32R5-NEXT:    addiu $5, $zero, 7
+; MIPS32R5-NEXT:    addiu $6, $zero, 12
+; MIPS32R5-NEXT:    addiu $7, $zero, 8
+; MIPS32R5-NEXT:    jal i32_2
+; MIPS32R5-NEXT:    nop
+; MIPS32R5-NEXT:    lui $1, %hi(gv2i32)
+; MIPS32R5-NEXT:    addiu $4, $1, %lo(gv2i32)
+; MIPS32R5-NEXT:    sw $3, 4($4)
+; MIPS32R5-NEXT:    sw $2, %lo(gv2i32)($1)
+; MIPS32R5-NEXT:    lw $ra, 20($sp) # 4-byte Folded Reload
+; MIPS32R5-NEXT:    addiu $sp, $sp, 24
+; MIPS32R5-NEXT:    jr $ra
+; MIPS32R5-NEXT:    nop
+;
+; MIPS64R5EB-LABEL: calli32_2:
+; MIPS64R5EB:       # %bb.0: # %entry
+; MIPS64R5EB-NEXT:    daddiu $sp, $sp, -32
+; MIPS64R5EB-NEXT:    .cfi_def_cfa_offset 32
+; MIPS64R5EB-NEXT:    sd $ra, 24($sp) # 8-byte Folded Spill
+; MIPS64R5EB-NEXT:    sd $gp, 16($sp) # 8-byte Folded Spill
+; MIPS64R5EB-NEXT:    .cfi_offset 31, -8
+; MIPS64R5EB-NEXT:    .cfi_offset 28, -16
+; MIPS64R5EB-NEXT:    lui $1, %hi(%neg(%gp_rel(calli32_2)))
+; MIPS64R5EB-NEXT:    daddu $1, $1, $25
+; MIPS64R5EB-NEXT:    daddiu $gp, $1, %lo(%neg(%gp_rel(calli32_2)))
+; MIPS64R5EB-NEXT:    daddiu $1, $zero, 3
+; MIPS64R5EB-NEXT:    dsll $2, $1, 33
+; MIPS64R5EB-NEXT:    daddiu $4, $2, 7
+; MIPS64R5EB-NEXT:    dsll $1, $1, 34
+; MIPS64R5EB-NEXT:    daddiu $5, $1, 8
+; MIPS64R5EB-NEXT:    ld $25, %call16(i32_2)($gp)
+; MIPS64R5EB-NEXT:    jalr $25
+; MIPS64R5EB-NEXT:    nop
+; MIPS64R5EB-NEXT:    ld $1, %got_disp(gv2i32)($gp)
+; MIPS64R5EB-NEXT:    sd $2, 0($1)
+; MIPS64R5EB-NEXT:    ld $gp, 16($sp) # 8-byte Folded Reload
+; MIPS64R5EB-NEXT:    ld $ra, 24($sp) # 8-byte Folded Reload
+; MIPS64R5EB-NEXT:    daddiu $sp, $sp, 32
+; MIPS64R5EB-NEXT:    jr $ra
+; MIPS64R5EB-NEXT:    nop
+;
+; MIPS64EL-LABEL: calli32_2:
+; MIPS64EL:       # %bb.0: # %entry
+; MIPS64EL-NEXT:    daddiu $sp, $sp, -16
+; MIPS64EL-NEXT:    .cfi_def_cfa_offset 16
+; MIPS64EL-NEXT:    sd $ra, 8($sp) # 8-byte Folded Spill
+; MIPS64EL-NEXT:    sd $gp, 0($sp) # 8-byte Folded Spill
+; MIPS64EL-NEXT:    .cfi_offset 31, -8
+; MIPS64EL-NEXT:    .cfi_offset 28, -16
+; MIPS64EL-NEXT:    lui $1, %hi(%neg(%gp_rel(calli32_2)))
+; MIPS64EL-NEXT:    daddu $1, $1, $25
+; MIPS64EL-NEXT:    daddiu $gp, $1, %lo(%neg(%gp_rel(calli32_2)))
+; MIPS64EL-NEXT:    daddiu $1, $zero, 7
+; MIPS64EL-NEXT:    dsll $1, $1, 32
+; MIPS64EL-NEXT:    daddiu $4, $1, 6
+; MIPS64EL-NEXT:    daddiu $1, $zero, 1
+; MIPS64EL-NEXT:    dsll $1, $1, 35
+; MIPS64EL-NEXT:    daddiu $5, $1, 12
+; MIPS64EL-NEXT:    ld $25, %call16(i32_2)($gp)
+; MIPS64EL-NEXT:    jalr $25
+; MIPS64EL-NEXT:    nop
+; MIPS64EL-NEXT:    ld $1, %got_disp(gv2i32)($gp)
+; MIPS64EL-NEXT:    sd $2, 0($1)
+; MIPS64EL-NEXT:    ld $gp, 0($sp) # 8-byte Folded Reload
+; MIPS64EL-NEXT:    ld $ra, 8($sp) # 8-byte Folded Reload
+; MIPS64EL-NEXT:    daddiu $sp, $sp, 16
+; MIPS64EL-NEXT:    jr $ra
+; MIPS64EL-NEXT:    nop
+;
+; MIPS64R5EL-LABEL: calli32_2:
+; MIPS64R5EL:       # %bb.0: # %entry
+; MIPS64R5EL-NEXT:    daddiu $sp, $sp, -32
+; MIPS64R5EL-NEXT:    .cfi_def_cfa_offset 32
+; MIPS64R5EL-NEXT:    sd $ra, 24($sp) # 8-byte Folded Spill
+; MIPS64R5EL-NEXT:    sd $gp, 16($sp) # 8-byte Folded Spill
+; MIPS64R5EL-NEXT:    .cfi_offset 31, -8
+; MIPS64R5EL-NEXT:    .cfi_offset 28, -16
+; MIPS64R5EL-NEXT:    lui $1, %hi(%neg(%gp_rel(calli32_2)))
+; MIPS64R5EL-NEXT:    daddu $1, $1, $25
+; MIPS64R5EL-NEXT:    daddiu $gp, $1, %lo(%neg(%gp_rel(calli32_2)))
+; MIPS64R5EL-NEXT:    daddiu $1, $zero, 7
+; MIPS64R5EL-NEXT:    dsll $1, $1, 32
+; MIPS64R5EL-NEXT:    daddiu $4, $1, 6
+; MIPS64R5EL-NEXT:    daddiu $1, $zero, 1
+; MIPS64R5EL-NEXT:    dsll $1, $1, 35
+; MIPS64R5EL-NEXT:    daddiu $5, $1, 12
+; MIPS64R5EL-NEXT:    ld $25, %call16(i32_2)($gp)
+; MIPS64R5EL-NEXT:    jalr $25
+; MIPS64R5EL-NEXT:    nop
+; MIPS64R5EL-NEXT:    ld $1, %got_disp(gv2i32)($gp)
+; MIPS64R5EL-NEXT:    sd $2, 0($1)
+; MIPS64R5EL-NEXT:    ld $gp, 16($sp) # 8-byte Folded Reload
+; MIPS64R5EL-NEXT:    ld $ra, 24($sp) # 8-byte Folded Reload
+; MIPS64R5EL-NEXT:    daddiu $sp, $sp, 32
+; MIPS64R5EL-NEXT:    jr $ra
+; MIPS64R5EL-NEXT:    nop
 entry:
-; ALL-LABEL: calli32_2:
-
-; MIPS32-DAG: addiu $4
-; MIPS32-DAG: addiu $5
-; MIPS32-DAG: addiu $6
-; MIPS32-DAG: addiu $7
-
-; MIPS32R5-DAG: addiu $4
-; MIPS32R5-DAG: addiu $5
-; MIPS32R5-DAG: addiu $6
-; MIPS32R5-DAG: addiu $7
-
-; MIPS64: daddiu $4
-; MIPS64: daddiu $5
-
-; MIPS64R5-NOT ld $4
-; MIPS64R5-NOT: ld $5
-
-; MIPS32: jal i32_2
-; MIPS64: jalr $25
-
-; MIPS32-DAG: sw $2, %lo(gv2i32)(${{[0-9]+}})
-; MIPS32-DAG: sw $3, 4(${{[0-9]+}})
-
-; MIPS32R5-DAG: sw $2, %lo(gv2i32)(${{[0-9]+}})
-; MIPS32R5-DAG: sw $3, 4(${{[0-9]+}})
-
-; MIPS64: sd $2
-
-; MIPS64R5: sd $2
-
   %0 = call <2 x i32> @i32_2(<2 x i32> <i32 6, i32 7>, <2 x i32> <i32 12, i32 8>)
   store <2 x i32> %0, <2 x i32> * @gv2i32
   ret void
 }
 
 define void @calli32_4() {
+; MIPS32-LABEL: calli32_4:
+; MIPS32:       # %bb.0: # %entry
+; MIPS32-NEXT:    addiu $sp, $sp, -40
+; MIPS32-NEXT:    .cfi_def_cfa_offset 40
+; MIPS32-NEXT:    sw $ra, 36($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    .cfi_offset 31, -4
+; MIPS32-NEXT:    addiu $1, $zero, 9
+; MIPS32-NEXT:    addiu $2, $zero, 10
+; MIPS32-NEXT:    sw $2, 28($sp)
+; MIPS32-NEXT:    sw $1, 24($sp)
+; MIPS32-NEXT:    addiu $1, $zero, 8
+; MIPS32-NEXT:    sw $1, 20($sp)
+; MIPS32-NEXT:    addiu $1, $zero, 12
+; MIPS32-NEXT:    sw $1, 16($sp)
+; MIPS32-NEXT:    addiu $4, $zero, 6
+; MIPS32-NEXT:    addiu $5, $zero, 7
+; MIPS32-NEXT:    addiu $6, $zero, 9
+; MIPS32-NEXT:    addiu $7, $zero, 10
+; MIPS32-NEXT:    jal i32_4
+; MIPS32-NEXT:    nop
+; MIPS32-NEXT:    lui $1, %hi(gv4i32)
+; MIPS32-NEXT:    addiu $6, $1, %lo(gv4i32)
+; MIPS32-NEXT:    sw $5, 12($6)
+; MIPS32-NEXT:    sw $4, 8($6)
+; MIPS32-NEXT:    sw $3, 4($6)
+; MIPS32-NEXT:    sw $2, %lo(gv4i32)($1)
+; MIPS32-NEXT:    lw $ra, 36($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    addiu $sp, $sp, 40
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
+;
+; MIPS64EB-LABEL: calli32_4:
+; MIPS64EB:       # %bb.0: # %entry
+; MIPS64EB-NEXT:    daddiu $sp, $sp, -16
+; MIPS64EB-NEXT:    .cfi_def_cfa_offset 16
+; MIPS64EB-NEXT:    sd $ra, 8($sp) # 8-byte Folded Spill
+; MIPS64EB-NEXT:    sd $gp, 0($sp) # 8-byte Folded Spill
+; MIPS64EB-NEXT:    .cfi_offset 31, -8
+; MIPS64EB-NEXT:    .cfi_offset 28, -16
+; MIPS64EB-NEXT:    lui $1, %hi(%neg(%gp_rel(calli32_4)))
+; MIPS64EB-NEXT:    daddu $1, $1, $25
+; MIPS64EB-NEXT:    daddiu $gp, $1, %lo(%neg(%gp_rel(calli32_4)))
+; MIPS64EB-NEXT:    daddiu $1, $zero, 3
+; MIPS64EB-NEXT:    dsll $2, $1, 33
+; MIPS64EB-NEXT:    daddiu $4, $2, 7
+; MIPS64EB-NEXT:    dsll $1, $1, 34
+; MIPS64EB-NEXT:    daddiu $6, $1, 8
+; MIPS64EB-NEXT:    daddiu $1, $zero, 9
+; MIPS64EB-NEXT:    dsll $1, $1, 32
+; MIPS64EB-NEXT:    daddiu $5, $1, 10
+; MIPS64EB-NEXT:    ld $25, %call16(i32_4)($gp)
+; MIPS64EB-NEXT:    move $7, $5
+; MIPS64EB-NEXT:    jalr $25
+; MIPS64EB-NEXT:    nop
+; MIPS64EB-NEXT:    ld $1, %got_disp(gv4i32)($gp)
+; MIPS64EB-NEXT:    sd $3, 8($1)
+; MIPS64EB-NEXT:    sd $2, 0($1)
+; MIPS64EB-NEXT:    ld $gp, 0($sp) # 8-byte Folded Reload
+; MIPS64EB-NEXT:    ld $ra, 8($sp) # 8-byte Folded Reload
+; MIPS64EB-NEXT:    daddiu $sp, $sp, 16
+; MIPS64EB-NEXT:    jr $ra
+; MIPS64EB-NEXT:    nop
+;
+; MIPS32R5-LABEL: calli32_4:
+; MIPS32R5:       # %bb.0: # %entry
+; MIPS32R5-NEXT:    addiu $sp, $sp, -40
+; MIPS32R5-NEXT:    .cfi_def_cfa_offset 40
+; MIPS32R5-NEXT:    sw $ra, 36($sp) # 4-byte Folded Spill
+; MIPS32R5-NEXT:    .cfi_offset 31, -4
+; MIPS32R5-NEXT:    addiu $1, $zero, 9
+; MIPS32R5-NEXT:    addiu $2, $zero, 10
+; MIPS32R5-NEXT:    sw $2, 28($sp)
+; MIPS32R5-NEXT:    sw $1, 24($sp)
+; MIPS32R5-NEXT:    addiu $1, $zero, 8
+; MIPS32R5-NEXT:    sw $1, 20($sp)
+; MIPS32R5-NEXT:    addiu $1, $zero, 12
+; MIPS32R5-NEXT:    sw $1, 16($sp)
+; MIPS32R5-NEXT:    addiu $4, $zero, 6
+; MIPS32R5-NEXT:    addiu $5, $zero, 7
+; MIPS32R5-NEXT:    addiu $6, $zero, 9
+; MIPS32R5-NEXT:    addiu $7, $zero, 10
+; MIPS32R5-NEXT:    jal i32_4
+; MIPS32R5-NEXT:    nop
+; MIPS32R5-NEXT:    ldi.b $w0, 0
+; MIPS32R5-NEXT:    insert.w $w0[0], $2
+; MIPS32R5-NEXT:    insert.w $w0[1], $3
+; MIPS32R5-NEXT:    insert.w $w0[2], $4
+; MIPS32R5-NEXT:    lui $1, %hi(gv4i32)
+; MIPS32R5-NEXT:    insert.w $w0[3], $5
+; MIPS32R5-NEXT:    addiu $1, $1, %lo(gv4i32)
+; MIPS32R5-NEXT:    st.w $w0, 0($1)
+; MIPS32R5-NEXT:    lw $ra, 36($sp) # 4-byte Folded Reload
+; MIPS32R5-NEXT:    addiu $sp, $sp, 40
+; MIPS32R5-NEXT:    jr $ra
+; MIPS32R5-NEXT:    nop
+;
+; MIPS64R5-LABEL: calli32_4:
+; MIPS64R5:       # %bb.0: # %entry
+; MIPS64R5-NEXT:    daddiu $sp, $sp, -16
+; MIPS64R5-NEXT:    .cfi_def_cfa_offset 16
+; MIPS64R5-NEXT:    sd $ra, 8($sp) # 8-byte Folded Spill
+; MIPS64R5-NEXT:    sd $gp, 0($sp) # 8-byte Folded Spill
+; MIPS64R5-NEXT:    .cfi_offset 31, -8
+; MIPS64R5-NEXT:    .cfi_offset 28, -16
+; MIPS64R5-NEXT:    lui $1, %hi(%neg(%gp_rel(calli32_4)))
+; MIPS64R5-NEXT:    daddu $1, $1, $25
+; MIPS64R5-NEXT:    daddiu $gp, $1, %lo(%neg(%gp_rel(calli32_4)))
+; MIPS64R5-NEXT:    ld $1, %got_page(.LCPI35_0)($gp)
+; MIPS64R5-NEXT:    daddiu $1, $1, %got_ofst(.LCPI35_0)
+; MIPS64R5-NEXT:    ld.d $w0, 0($1)
+; MIPS64R5-NEXT:    copy_s.d $4, $w0[0]
+; MIPS64R5-NEXT:    copy_s.d $5, $w0[1]
+; MIPS64R5-NEXT:    ld $1, %got_page(.LCPI35_1)($gp)
+; MIPS64R5-NEXT:    daddiu $1, $1, %got_ofst(.LCPI35_1)
+; MIPS64R5-NEXT:    ld.d $w0, 0($1)
+; MIPS64R5-NEXT:    copy_s.d $6, $w0[0]
+; MIPS64R5-NEXT:    copy_s.d $7, $w0[1]
+; MIPS64R5-NEXT:    ld $25, %call16(i32_4)($gp)
+; MIPS64R5-NEXT:    jalr $25
+; MIPS64R5-NEXT:    nop
+; MIPS64R5-NEXT:    ldi.b $w0, 0
+; MIPS64R5-NEXT:    insert.d $w0[0], $2
+; MIPS64R5-NEXT:    insert.d $w0[1], $3
+; MIPS64R5-NEXT:    ld $1, %got_disp(gv4i32)($gp)
+; MIPS64R5-NEXT:    st.d $w0, 0($1)
+; MIPS64R5-NEXT:    ld $gp, 0($sp) # 8-byte Folded Reload
+; MIPS64R5-NEXT:    ld $ra, 8($sp) # 8-byte Folded Reload
+; MIPS64R5-NEXT:    daddiu $sp, $sp, 16
+; MIPS64R5-NEXT:    jr $ra
+; MIPS64R5-NEXT:    nop
+;
+; MIPS64EL-LABEL: calli32_4:
+; MIPS64EL:       # %bb.0: # %entry
+; MIPS64EL-NEXT:    daddiu $sp, $sp, -16
+; MIPS64EL-NEXT:    .cfi_def_cfa_offset 16
+; MIPS64EL-NEXT:    sd $ra, 8($sp) # 8-byte Folded Spill
+; MIPS64EL-NEXT:    sd $gp, 0($sp) # 8-byte Folded Spill
+; MIPS64EL-NEXT:    .cfi_offset 31, -8
+; MIPS64EL-NEXT:    .cfi_offset 28, -16
+; MIPS64EL-NEXT:    lui $1, %hi(%neg(%gp_rel(calli32_4)))
+; MIPS64EL-NEXT:    daddu $1, $1, $25
+; MIPS64EL-NEXT:    daddiu $gp, $1, %lo(%neg(%gp_rel(calli32_4)))
+; MIPS64EL-NEXT:    daddiu $1, $zero, 7
+; MIPS64EL-NEXT:    dsll $1, $1, 32
+; MIPS64EL-NEXT:    daddiu $4, $1, 6
+; MIPS64EL-NEXT:    daddiu $1, $zero, 1
+; MIPS64EL-NEXT:    dsll $1, $1, 35
+; MIPS64EL-NEXT:    daddiu $6, $1, 12
+; MIPS64EL-NEXT:    daddiu $1, $zero, 5
+; MIPS64EL-NEXT:    dsll $1, $1, 33
+; MIPS64EL-NEXT:    daddiu $5, $1, 9
+; MIPS64EL-NEXT:    ld $25, %call16(i32_4)($gp)
+; MIPS64EL-NEXT:    move $7, $5
+; MIPS64EL-NEXT:    jalr $25
+; MIPS64EL-NEXT:    nop
+; MIPS64EL-NEXT:    ld $1, %got_disp(gv4i32)($gp)
+; MIPS64EL-NEXT:    sd $3, 8($1)
+; MIPS64EL-NEXT:    sd $2, 0($1)
+; MIPS64EL-NEXT:    ld $gp, 0($sp) # 8-byte Folded Reload
+; MIPS64EL-NEXT:    ld $ra, 8($sp) # 8-byte Folded Reload
+; MIPS64EL-NEXT:    daddiu $sp, $sp, 16
+; MIPS64EL-NEXT:    jr $ra
+; MIPS64EL-NEXT:    nop
 entry:
-; ALL-LABEL: calli32_4:
-
-; MIPS32-DAG: sw  ${{[0-9]+}}, 28($sp)
-; MIPS32-DAG: sw  ${{[0-9]+}}, 24($sp)
-; MIPS32-DAG: sw  ${{[0-9]+}}, 20($sp)
-; MIPS32-DAG: sw  ${{[0-9]+}}, 16($sp)
-
-; MIPS32-DAG: addiu $4
-; MIPS32-DAG: addiu $5
-; MIPS32-DAG: addiu $6
-; MIPS32-DAG: addiu $7
-
-; MIPS32R5-DAG: sw  ${{[0-9]+}}, 28($sp)
-; MIPS32R5-DAG: sw  ${{[0-9]+}}, 24($sp)
-; MIPS32R5-DAG: sw  ${{[0-9]+}}, 20($sp)
-; MIPS32R5-DAG: sw  ${{[0-9]+}}, 16($sp)
-
-; MIPS32R5-DAG: addiu $4
-; MIPS32R5-DAG: addiu $5
-; MIPS32R5-DAG: addiu $6
-; MIPS32R5-DAG: addiu $7
-
-; MIPS64-DAG: daddiu $4
-; MIPS64-DAG: daddiu $6
-; MIPS64-DAG: daddiu $5
-; MIPS64-DAG: move $7
-
-; MIPS64R5-DAG: copy_s.d $4, $w[[W0:[0-9]+]][0]
-; MIPS64R5-DAG: copy_s.d $5, $w[[W0]][1]
-; MIPS64R5-DAG: copy_s.d $6, $w[[W1:[0-9]+]][0]
-; MIPS64R5-DAG: copy_s.d $7, $w[[W1]][1]
-
-; MIPS32: jal i32_4
-; MIPS64: jalr $25
-
-; MIPS32-DAG: sw $5, 12(${{[0-9]+}})
-; MIPS32-DAG: sw $4, 8(${{[0-9]+}})
-; MIPS32-DAG: sw $3, 4(${{[0-9]+}})
-; MIPS32-DAG: sw $2, %lo(gv4i32)(${{[0-9]+}})
-
-; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $2
-; MIPS32R5-DAG: insert.w $w[[W0]][1], $3
-; MIPS32R5-DAG: insert.w $w[[W0]][2], $4
-; MIPS32R5-DAG: insert.w $w[[W0]][3], $5
-; MIPS32R5-DAG: st.w $w[[W0]]
-
-; MIPS64-DAG: sd $2
-; MIPS64-DAG: sd $3
-
-; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][0], $2
-; MIPS64R6-DAG: insert.d $w[[W0:[0-9]+]][1], $3
-
   %0 = call <4 x i32> @i32_4(<4 x i32> <i32 6, i32 7, i32 9, i32 10>, <4 x i32> <i32 12, i32 8, i32 9, i32 10>)
   store <4 x i32> %0, <4 x i32> * @gv4i32
   ret void
 }
 
 define void @calli64_2() {
+; MIPS32EB-LABEL: calli64_2:
+; MIPS32EB:       # %bb.0: # %entry
+; MIPS32EB-NEXT:    addiu $sp, $sp, -40
+; MIPS32EB-NEXT:    .cfi_def_cfa_offset 40
+; MIPS32EB-NEXT:    sw $ra, 36($sp) # 4-byte Folded Spill
+; MIPS32EB-NEXT:    .cfi_offset 31, -4
+; MIPS32EB-NEXT:    addiu $1, $zero, 8
+; MIPS32EB-NEXT:    sw $1, 28($sp)
+; MIPS32EB-NEXT:    addiu $1, $zero, 12
+; MIPS32EB-NEXT:    sw $1, 20($sp)
+; MIPS32EB-NEXT:    sw $zero, 24($sp)
+; MIPS32EB-NEXT:    sw $zero, 16($sp)
+; MIPS32EB-NEXT:    addiu $4, $zero, 0
+; MIPS32EB-NEXT:    addiu $5, $zero, 6
+; MIPS32EB-NEXT:    addiu $6, $zero, 0
+; MIPS32EB-NEXT:    addiu $7, $zero, 7
+; MIPS32EB-NEXT:    jal i64_2
+; MIPS32EB-NEXT:    nop
+; MIPS32EB-NEXT:    lui $1, %hi(gv2i64)
+; MIPS32EB-NEXT:    addiu $6, $1, %lo(gv2i64)
+; MIPS32EB-NEXT:    sw $5, 12($6)
+; MIPS32EB-NEXT:    sw $4, 8($6)
+; MIPS32EB-NEXT:    sw $3, 4($6)
+; MIPS32EB-NEXT:    sw $2, %lo(gv2i64)($1)
+; MIPS32EB-NEXT:    lw $ra, 36($sp) # 4-byte Folded Reload
+; MIPS32EB-NEXT:    addiu $sp, $sp, 40
+; MIPS32EB-NEXT:    jr $ra
+; MIPS32EB-NEXT:    nop
+;
+; MIPS64-LABEL: calli64_2:
+; MIPS64:       # %bb.0: # %entry
+; MIPS64-NEXT:    daddiu $sp, $sp, -16
+; MIPS64-NEXT:    .cfi_def_cfa_offset 16
+; MIPS64-NEXT:    sd $ra, 8($sp) # 8-byte Folded Spill
+; MIPS64-NEXT:    sd $gp, 0($sp) # 8-byte Folded Spill
+; MIPS64-NEXT:    .cfi_offset 31, -8
+; MIPS64-NEXT:    .cfi_offset 28, -16
+; MIPS64-NEXT:    lui $1, %hi(%neg(%gp_rel(calli64_2)))
+; MIPS64-NEXT:    daddu $1, $1, $25
+; MIPS64-NEXT:    daddiu $gp, $1, %lo(%neg(%gp_rel(calli64_2)))
+; MIPS64-NEXT:    ld $25, %call16(i64_2)($gp)
+; MIPS64-NEXT:    daddiu $4, $zero, 6
+; MIPS64-NEXT:    daddiu $5, $zero, 7
+; MIPS64-NEXT:    daddiu $6, $zero, 12
+; MIPS64-NEXT:    daddiu $7, $zero, 8
+; MIPS64-NEXT:    jalr $25
+; MIPS64-NEXT:    nop
+; MIPS64-NEXT:    ld $1, %got_disp(gv2i64)($gp)
+; MIPS64-NEXT:    sd $3, 8($1)
+; MIPS64-NEXT:    sd $2, 0($1)
+; MIPS64-NEXT:    ld $gp, 0($sp) # 8-byte Folded Reload
+; MIPS64-NEXT:    ld $ra, 8($sp) # 8-byte Folded Reload
+; MIPS64-NEXT:    daddiu $sp, $sp, 16
+; MIPS64-NEXT:    jr $ra
+; MIPS64-NEXT:    nop
+;
+; MIPS32R5-LABEL: calli64_2:
+; MIPS32R5:       # %bb.0: # %entry
+; MIPS32R5-NEXT:    addiu $sp, $sp, -40
+; MIPS32R5-NEXT:    .cfi_def_cfa_offset 40
+; MIPS32R5-NEXT:    sw $ra, 36($sp) # 4-byte Folded Spill
+; MIPS32R5-NEXT:    .cfi_offset 31, -4
+; MIPS32R5-NEXT:    lui $1, %hi($CPI36_0)
+; MIPS32R5-NEXT:    addiu $1, $1, %lo($CPI36_0)
+; MIPS32R5-NEXT:    ld.w $w0, 0($1)
+; MIPS32R5-NEXT:    copy_s.w $4, $w0[0]
+; MIPS32R5-NEXT:    copy_s.w $5, $w0[1]
+; MIPS32R5-NEXT:    copy_s.w $6, $w0[2]
+; MIPS32R5-NEXT:    copy_s.w $7, $w0[3]
+; MIPS32R5-NEXT:    lui $1, %hi($CPI36_1)
+; MIPS32R5-NEXT:    addiu $1, $1, %lo($CPI36_1)
+; MIPS32R5-NEXT:    ld.w $w0, 0($1)
+; MIPS32R5-NEXT:    copy_s.w $1, $w0[0]
+; MIPS32R5-NEXT:    copy_s.w $2, $w0[1]
+; MIPS32R5-NEXT:    copy_s.w $3, $w0[2]
+; MIPS32R5-NEXT:    copy_s.w $8, $w0[3]
+; MIPS32R5-NEXT:    sw $8, 28($sp)
+; MIPS32R5-NEXT:    sw $3, 24($sp)
+; MIPS32R5-NEXT:    sw $2, 20($sp)
+; MIPS32R5-NEXT:    sw $1, 16($sp)
+; MIPS32R5-NEXT:    jal i64_2
+; MIPS32R5-NEXT:    nop
+; MIPS32R5-NEXT:    ldi.b $w0, 0
+; MIPS32R5-NEXT:    insert.w $w0[0], $2
+; MIPS32R5-NEXT:    lui $1, %hi(gv2i64)
+; MIPS32R5-NEXT:    insert.w $w0[1], $3
+; MIPS32R5-NEXT:    addiu $1, $1, %lo(gv2i64)
+; MIPS32R5-NEXT:    insert.w $w0[2], $4
+; MIPS32R5-NEXT:    insert.w $w0[3], $5
+; MIPS32R5-NEXT:    st.w $w0, 0($1)
+; MIPS32R5-NEXT:    lw $ra, 36($sp) # 4-byte Folded Reload
+; MIPS32R5-NEXT:    addiu $sp, $sp, 40
+; MIPS32R5-NEXT:    jr $ra
+; MIPS32R5-NEXT:    nop
+;
+; MIPS64R5-LABEL: calli64_2:
+; MIPS64R5:       # %bb.0: # %entry
+; MIPS64R5-NEXT:    daddiu $sp, $sp, -16
+; MIPS64R5-NEXT:    .cfi_def_cfa_offset 16
+; MIPS64R5-NEXT:    sd $ra, 8($sp) # 8-byte Folded Spill
+; MIPS64R5-NEXT:    sd $gp, 0($sp) # 8-byte Folded Spill
+; MIPS64R5-NEXT:    .cfi_offset 31, -8
+; MIPS64R5-NEXT:    .cfi_offset 28, -16
+; MIPS64R5-NEXT:    lui $1, %hi(%neg(%gp_rel(calli64_2)))
+; MIPS64R5-NEXT:    daddu $1, $1, $25
+; MIPS64R5-NEXT:    daddiu $gp, $1, %lo(%neg(%gp_rel(calli64_2)))
+; MIPS64R5-NEXT:    ld $25, %call16(i64_2)($gp)
+; MIPS64R5-NEXT:    daddiu $4, $zero, 6
+; MIPS64R5-NEXT:    daddiu $5, $zero, 7
+; MIPS64R5-NEXT:    daddiu $6, $zero, 12
+; MIPS64R5-NEXT:    daddiu $7, $zero, 8
+; MIPS64R5-NEXT:    jalr $25
+; MIPS64R5-NEXT:    nop
+; MIPS64R5-NEXT:    ldi.b $w0, 0
+; MIPS64R5-NEXT:    insert.d $w0[0], $2
+; MIPS64R5-NEXT:    insert.d $w0[1], $3
+; MIPS64R5-NEXT:    ld $1, %got_disp(gv2i64)($gp)
+; MIPS64R5-NEXT:    st.d $w0, 0($1)
+; MIPS64R5-NEXT:    ld $gp, 0($sp) # 8-byte Folded Reload
+; MIPS64R5-NEXT:    ld $ra, 8($sp) # 8-byte Folded Reload
+; MIPS64R5-NEXT:    daddiu $sp, $sp, 16
+; MIPS64R5-NEXT:    jr $ra
+; MIPS64R5-NEXT:    nop
+;
+; MIPS32EL-LABEL: calli64_2:
+; MIPS32EL:       # %bb.0: # %entry
+; MIPS32EL-NEXT:    addiu $sp, $sp, -40
+; MIPS32EL-NEXT:    .cfi_def_cfa_offset 40
+; MIPS32EL-NEXT:    sw $ra, 36($sp) # 4-byte Folded Spill
+; MIPS32EL-NEXT:    .cfi_offset 31, -4
+; MIPS32EL-NEXT:    addiu $1, $zero, 8
+; MIPS32EL-NEXT:    sw $1, 24($sp)
+; MIPS32EL-NEXT:    addiu $1, $zero, 12
+; MIPS32EL-NEXT:    sw $1, 16($sp)
+; MIPS32EL-NEXT:    sw $zero, 28($sp)
+; MIPS32EL-NEXT:    sw $zero, 20($sp)
+; MIPS32EL-NEXT:    addiu $4, $zero, 6
+; MIPS32EL-NEXT:    addiu $5, $zero, 0
+; MIPS32EL-NEXT:    addiu $6, $zero, 7
+; MIPS32EL-NEXT:    addiu $7, $zero, 0
+; MIPS32EL-NEXT:    jal i64_2
+; MIPS32EL-NEXT:    nop
+; MIPS32EL-NEXT:    lui $1, %hi(gv2i64)
+; MIPS32EL-NEXT:    addiu $6, $1, %lo(gv2i64)
+; MIPS32EL-NEXT:    sw $5, 12($6)
+; MIPS32EL-NEXT:    sw $4, 8($6)
+; MIPS32EL-NEXT:    sw $3, 4($6)
+; MIPS32EL-NEXT:    sw $2, %lo(gv2i64)($1)
+; MIPS32EL-NEXT:    lw $ra, 36($sp) # 4-byte Folded Reload
+; MIPS32EL-NEXT:    addiu $sp, $sp, 40
+; MIPS32EL-NEXT:    jr $ra
+; MIPS32EL-NEXT:    nop
 entry:
-; ALL-LABEL: calli64_2:
-
-; MIPS32-DAG: sw  ${{[0-9a-z]+}}, 28($sp)
-; MIPS32-DAG: sw  ${{[0-9a-z]+}}, 24($sp)
-; MIPS32-DAG: sw  ${{[0-9a-z]+}}, 20($sp)
-; MIPS32-DAG: sw  ${{[0-9a-z]+}}, 16($sp)
-
-; MIPS32-DAG: addiu $4
-; MIPS32-DAG: addiu $5
-; MIPS32-DAG: addiu $6
-; MIPS32-DAG: addiu $7
-
-; MIPS32R5-DAG: copy_s.w $4, $w{{[0-9]+}}
-; MIPS32R5-DAG: copy_s.w $5, $w{{[0-9]+}}
-; MIPS32R5-DAG: copy_s.w $6, $w{{[0-9]+}}
-; MIPS32R5-DAG: copy_s.w $7, $w{{[0-9]+}}
-
-; MIPS32R5-DAG: sw  ${{[0-9]+}}, 28($sp)
-; MIPS32R5-DAG: sw  ${{[0-9]+}}, 24($sp)
-; MIPS32R5-DAG: sw  ${{[0-9]+}}, 20($sp)
-; MIPS32R5-DAG: sw  ${{[0-9]+}}, 16($sp)
-
-; MIPS64: daddiu $4
-; MIPS64: daddiu $5
-; MIPS64: daddiu $6
-; MIPS64: daddiu $7
-
-; MIPS64R5: daddiu $4
-; MIPS64R5: daddiu $5
-; MIPS64R5: daddiu $6
-; MIPS64R5: daddiu $7
-
-; MIPS32: jal i64_2
-; MIPS64: jalr $25
-
-; MIPS32-DAG: sw $5, 12(${{[0-9]+}})
-; MIPS32-DAG: sw $4, 8(${{[0-9]+}})
-; MIPS32-DAG: sw $3, 4(${{[0-9]+}})
-; MIPS32-DAG: sw $2, %lo(gv2i64)(${{[0-9]+}})
-
-; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $2
-; MIPS32R5-DAG: insert.w $w[[W0]][1], $3
-; MIPS32R5-DAG: insert.w $w[[W0]][2], $4
-; MIPS32R5-DAG: insert.w $w[[W0]][3], $5
-; MIPS32R5-DAG: st.w $w[[W0]]
-
-; MIPS64-DAG: sd $3
-; MIPS64-DAG: sd $2
-
-; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][0], $2
-; MIPS64R6-DAG: insert.d $w[[W0:[0-9]+]][1], $3
-
   %0 = call <2 x i64> @i64_2(<2 x i64> <i64 6, i64 7>, <2 x i64> <i64 12, i64 8>)
   store <2 x i64> %0, <2 x i64> * @gv2i64
   ret void
@@ -1279,157 +5846,546 @@ declare <4 x float> @float4_extern(<4 x
 declare <2 x double> @double2_extern(<2 x double>, <2 x double>)
 
 define void @callfloat_2() {
+; MIPS32-LABEL: callfloat_2:
+; MIPS32:       # %bb.0: # %entry
+; MIPS32-NEXT:    addiu $sp, $sp, -40
+; MIPS32-NEXT:    .cfi_def_cfa_offset 40
+; MIPS32-NEXT:    sw $ra, 36($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    .cfi_offset 31, -4
+; MIPS32-NEXT:    lui $1, 16736
+; MIPS32-NEXT:    sw $1, 20($sp)
+; MIPS32-NEXT:    lui $1, 16704
+; MIPS32-NEXT:    sw $1, 16($sp)
+; MIPS32-NEXT:    addiu $4, $sp, 24
+; MIPS32-NEXT:    addiu $6, $zero, 0
+; MIPS32-NEXT:    lui $7, 49024
+; MIPS32-NEXT:    jal float2_extern
+; MIPS32-NEXT:    nop
+; MIPS32-NEXT:    lui $1, %hi(gv2f32)
+; MIPS32-NEXT:    addiu $2, $1, %lo(gv2f32)
+; MIPS32-NEXT:    lwc1 $f0, 28($sp)
+; MIPS32-NEXT:    swc1 $f0, 4($2)
+; MIPS32-NEXT:    lwc1 $f0, 24($sp)
+; MIPS32-NEXT:    swc1 $f0, %lo(gv2f32)($1)
+; MIPS32-NEXT:    lw $ra, 36($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    addiu $sp, $sp, 40
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
+;
+; MIPS64EB-LABEL: callfloat_2:
+; MIPS64EB:       # %bb.0: # %entry
+; MIPS64EB-NEXT:    daddiu $sp, $sp, -16
+; MIPS64EB-NEXT:    .cfi_def_cfa_offset 16
+; MIPS64EB-NEXT:    sd $ra, 8($sp) # 8-byte Folded Spill
+; MIPS64EB-NEXT:    sd $gp, 0($sp) # 8-byte Folded Spill
+; MIPS64EB-NEXT:    .cfi_offset 31, -8
+; MIPS64EB-NEXT:    .cfi_offset 28, -16
+; MIPS64EB-NEXT:    lui $1, %hi(%neg(%gp_rel(callfloat_2)))
+; MIPS64EB-NEXT:    daddu $1, $1, $25
+; MIPS64EB-NEXT:    daddiu $gp, $1, %lo(%neg(%gp_rel(callfloat_2)))
+; MIPS64EB-NEXT:    daddiu $1, $zero, 383
+; MIPS64EB-NEXT:    dsll $4, $1, 23
+; MIPS64EB-NEXT:    daddiu $1, $zero, 261
+; MIPS64EB-NEXT:    dsll $1, $1, 33
+; MIPS64EB-NEXT:    daddiu $1, $1, 523
+; MIPS64EB-NEXT:    dsll $5, $1, 21
+; MIPS64EB-NEXT:    ld $25, %call16(float2_extern)($gp)
+; MIPS64EB-NEXT:    jalr $25
+; MIPS64EB-NEXT:    nop
+; MIPS64EB-NEXT:    ld $1, %got_disp(gv2f32)($gp)
+; MIPS64EB-NEXT:    sd $2, 0($1)
+; MIPS64EB-NEXT:    ld $gp, 0($sp) # 8-byte Folded Reload
+; MIPS64EB-NEXT:    ld $ra, 8($sp) # 8-byte Folded Reload
+; MIPS64EB-NEXT:    daddiu $sp, $sp, 16
+; MIPS64EB-NEXT:    jr $ra
+; MIPS64EB-NEXT:    nop
+;
+; MIPS32R5-LABEL: callfloat_2:
+; MIPS32R5:       # %bb.0: # %entry
+; MIPS32R5-NEXT:    addiu $sp, $sp, -40
+; MIPS32R5-NEXT:    .cfi_def_cfa_offset 40
+; MIPS32R5-NEXT:    sw $ra, 36($sp) # 4-byte Folded Spill
+; MIPS32R5-NEXT:    .cfi_offset 31, -4
+; MIPS32R5-NEXT:    lui $1, 16736
+; MIPS32R5-NEXT:    sw $1, 20($sp)
+; MIPS32R5-NEXT:    lui $1, 16704
+; MIPS32R5-NEXT:    sw $1, 16($sp)
+; MIPS32R5-NEXT:    addiu $4, $sp, 24
+; MIPS32R5-NEXT:    addiu $6, $zero, 0
+; MIPS32R5-NEXT:    lui $7, 49024
+; MIPS32R5-NEXT:    jal float2_extern
+; MIPS32R5-NEXT:    nop
+; MIPS32R5-NEXT:    lui $1, %hi(gv2f32)
+; MIPS32R5-NEXT:    addiu $2, $1, %lo(gv2f32)
+; MIPS32R5-NEXT:    lwc1 $f0, 28($sp)
+; MIPS32R5-NEXT:    swc1 $f0, 4($2)
+; MIPS32R5-NEXT:    lwc1 $f0, 24($sp)
+; MIPS32R5-NEXT:    swc1 $f0, %lo(gv2f32)($1)
+; MIPS32R5-NEXT:    lw $ra, 36($sp) # 4-byte Folded Reload
+; MIPS32R5-NEXT:    addiu $sp, $sp, 40
+; MIPS32R5-NEXT:    jr $ra
+; MIPS32R5-NEXT:    nop
+;
+; MIPS64R5-LABEL: callfloat_2:
+; MIPS64R5:       # %bb.0: # %entry
+; MIPS64R5-NEXT:    daddiu $sp, $sp, -16
+; MIPS64R5-NEXT:    .cfi_def_cfa_offset 16
+; MIPS64R5-NEXT:    sd $ra, 8($sp) # 8-byte Folded Spill
+; MIPS64R5-NEXT:    sd $gp, 0($sp) # 8-byte Folded Spill
+; MIPS64R5-NEXT:    .cfi_offset 31, -8
+; MIPS64R5-NEXT:    .cfi_offset 28, -16
+; MIPS64R5-NEXT:    lui $1, %hi(%neg(%gp_rel(callfloat_2)))
+; MIPS64R5-NEXT:    daddu $1, $1, $25
+; MIPS64R5-NEXT:    daddiu $gp, $1, %lo(%neg(%gp_rel(callfloat_2)))
+; MIPS64R5-NEXT:    ld $1, %got_page(.LCPI37_0)($gp)
+; MIPS64R5-NEXT:    daddiu $1, $1, %got_ofst(.LCPI37_0)
+; MIPS64R5-NEXT:    ld.d $w0, 0($1)
+; MIPS64R5-NEXT:    copy_s.d $4, $w0[0]
+; MIPS64R5-NEXT:    ld $1, %got_page(.LCPI37_1)($gp)
+; MIPS64R5-NEXT:    daddiu $1, $1, %got_ofst(.LCPI37_1)
+; MIPS64R5-NEXT:    ld.d $w0, 0($1)
+; MIPS64R5-NEXT:    copy_s.d $5, $w0[0]
+; MIPS64R5-NEXT:    ld $25, %call16(float2_extern)($gp)
+; MIPS64R5-NEXT:    jalr $25
+; MIPS64R5-NEXT:    nop
+; MIPS64R5-NEXT:    ld $1, %got_disp(gv2f32)($gp)
+; MIPS64R5-NEXT:    sd $2, 0($1)
+; MIPS64R5-NEXT:    ld $gp, 0($sp) # 8-byte Folded Reload
+; MIPS64R5-NEXT:    ld $ra, 8($sp) # 8-byte Folded Reload
+; MIPS64R5-NEXT:    daddiu $sp, $sp, 16
+; MIPS64R5-NEXT:    jr $ra
+; MIPS64R5-NEXT:    nop
+;
+; MIPS64EL-LABEL: callfloat_2:
+; MIPS64EL:       # %bb.0: # %entry
+; MIPS64EL-NEXT:    daddiu $sp, $sp, -16
+; MIPS64EL-NEXT:    .cfi_def_cfa_offset 16
+; MIPS64EL-NEXT:    sd $ra, 8($sp) # 8-byte Folded Spill
+; MIPS64EL-NEXT:    sd $gp, 0($sp) # 8-byte Folded Spill
+; MIPS64EL-NEXT:    .cfi_offset 31, -8
+; MIPS64EL-NEXT:    .cfi_offset 28, -16
+; MIPS64EL-NEXT:    lui $1, %hi(%neg(%gp_rel(callfloat_2)))
+; MIPS64EL-NEXT:    daddu $1, $1, $25
+; MIPS64EL-NEXT:    daddiu $gp, $1, %lo(%neg(%gp_rel(callfloat_2)))
+; MIPS64EL-NEXT:    daddiu $1, $zero, 383
+; MIPS64EL-NEXT:    dsll $4, $1, 55
+; MIPS64EL-NEXT:    daddiu $1, $zero, 523
+; MIPS64EL-NEXT:    dsll $1, $1, 31
+; MIPS64EL-NEXT:    daddiu $1, $1, 261
+; MIPS64EL-NEXT:    dsll $5, $1, 22
+; MIPS64EL-NEXT:    ld $25, %call16(float2_extern)($gp)
+; MIPS64EL-NEXT:    jalr $25
+; MIPS64EL-NEXT:    nop
+; MIPS64EL-NEXT:    ld $1, %got_disp(gv2f32)($gp)
+; MIPS64EL-NEXT:    sd $2, 0($1)
+; MIPS64EL-NEXT:    ld $gp, 0($sp) # 8-byte Folded Reload
+; MIPS64EL-NEXT:    ld $ra, 8($sp) # 8-byte Folded Reload
+; MIPS64EL-NEXT:    daddiu $sp, $sp, 16
+; MIPS64EL-NEXT:    jr $ra
+; MIPS64EL-NEXT:    nop
 entry:
-; ALL-LABEL: callfloat_2:
-
-; MIPS32-DAG: addiu $4, $sp, 24
-; MIPS32-DAG: addiu $6, $zero, 0
-; MIPS32-DAG: lui $7
-
-; MIPS32R5-DAG: addiu $4, $sp, 24
-; MIPS32R5-DAG: addiu $6, $zero, 0
-; MIPS32R5-DAG: lui $7
-
-; MIPS64: dsll $4
-; MIPS64: dsll $5
-
-; MIPS64R5-DAG: copy_s.d $4, $w{{[0-9]+}}
-; MIPS64R5-DAG: copy_s.d $5, $w{{[0-9]+}}
-
-; MIPS32: jal float2_extern
-; MIPS64: jalr $25
-
-; MIPS32-DAG: lwc1 $f[[F0:[0-9]+]], 24($sp)
-; MIPS32-DAG: lwc1 $f[[F1:[0-9]+]], 28($sp)
-
-; MIPS32-DAG: swc1 $f[[F1]], 4(${{[0-9]+}})
-; MIPS32-DAG: swc1 $f[[F0]], %lo(gv2f32)(${{[0-9]+}})
-
-; MIPS32R5-DAG: lwc1 $f[[F0:[0-9]+]], 24($sp)
-; MIPS32R5-DAG: lwc1 $f[[F1:[0-9]+]], 28($sp)
-
-; MIPS32R5-DAG: swc1 $f[[F1]], 4(${{[0-9]+}})
-; MIPS32R5-DAG: swc1 $f[[F0]], %lo(gv2f32)(${{[0-9]+}})
-
-; MIPS64: sd $2
-
-; MIPS64R5: sd $2
-
   %0 = call <2 x float> @float2_extern(<2 x float> <float 0.0, float -1.0>, <2 x float> <float 12.0, float 14.0>)
   store <2 x float> %0, <2 x float> * @gv2f32
   ret void
 }
 
 define void @callfloat_4() {
+; MIPS32-LABEL: callfloat_4:
+; MIPS32:       # %bb.0: # %entry
+; MIPS32-NEXT:    addiu $sp, $sp, -80
+; MIPS32-NEXT:    .cfi_def_cfa_offset 80
+; MIPS32-NEXT:    sw $ra, 76($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    sw $fp, 72($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    .cfi_offset 31, -4
+; MIPS32-NEXT:    .cfi_offset 30, -8
+; MIPS32-NEXT:    move $fp, $sp
+; MIPS32-NEXT:    .cfi_def_cfa_register 30
+; MIPS32-NEXT:    addiu $1, $zero, -16
+; MIPS32-NEXT:    and $sp, $sp, $1
+; MIPS32-NEXT:    lui $1, 16704
+; MIPS32-NEXT:    lui $2, 16736
+; MIPS32-NEXT:    lui $3, 16752
+; MIPS32-NEXT:    lui $4, 16768
+; MIPS32-NEXT:    sw $4, 36($sp)
+; MIPS32-NEXT:    sw $3, 32($sp)
+; MIPS32-NEXT:    sw $2, 28($sp)
+; MIPS32-NEXT:    sw $1, 24($sp)
+; MIPS32-NEXT:    lui $1, 16512
+; MIPS32-NEXT:    sw $1, 20($sp)
+; MIPS32-NEXT:    lui $1, 16384
+; MIPS32-NEXT:    sw $1, 16($sp)
+; MIPS32-NEXT:    addiu $4, $sp, 48
+; MIPS32-NEXT:    addiu $6, $zero, 0
+; MIPS32-NEXT:    lui $7, 49024
+; MIPS32-NEXT:    jal float4_extern
+; MIPS32-NEXT:    nop
+; MIPS32-NEXT:    lui $1, %hi(gv4f32)
+; MIPS32-NEXT:    addiu $2, $1, %lo(gv4f32)
+; MIPS32-NEXT:    lwc1 $f0, 60($sp)
+; MIPS32-NEXT:    swc1 $f0, 12($2)
+; MIPS32-NEXT:    lwc1 $f0, 56($sp)
+; MIPS32-NEXT:    swc1 $f0, 8($2)
+; MIPS32-NEXT:    lwc1 $f0, 52($sp)
+; MIPS32-NEXT:    swc1 $f0, 4($2)
+; MIPS32-NEXT:    lwc1 $f0, 48($sp)
+; MIPS32-NEXT:    swc1 $f0, %lo(gv4f32)($1)
+; MIPS32-NEXT:    move $sp, $fp
+; MIPS32-NEXT:    lw $fp, 72($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    lw $ra, 76($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    addiu $sp, $sp, 80
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
+;
+; MIPS64EB-LABEL: callfloat_4:
+; MIPS64EB:       # %bb.0: # %entry
+; MIPS64EB-NEXT:    daddiu $sp, $sp, -16
+; MIPS64EB-NEXT:    .cfi_def_cfa_offset 16
+; MIPS64EB-NEXT:    sd $ra, 8($sp) # 8-byte Folded Spill
+; MIPS64EB-NEXT:    sd $gp, 0($sp) # 8-byte Folded Spill
+; MIPS64EB-NEXT:    .cfi_offset 31, -8
+; MIPS64EB-NEXT:    .cfi_offset 28, -16
+; MIPS64EB-NEXT:    lui $1, %hi(%neg(%gp_rel(callfloat_4)))
+; MIPS64EB-NEXT:    daddu $1, $1, $25
+; MIPS64EB-NEXT:    daddiu $gp, $1, %lo(%neg(%gp_rel(callfloat_4)))
+; MIPS64EB-NEXT:    daddiu $1, $zero, 1
+; MIPS64EB-NEXT:    dsll $1, $1, 39
+; MIPS64EB-NEXT:    daddiu $1, $1, 129
+; MIPS64EB-NEXT:    daddiu $2, $zero, 261
+; MIPS64EB-NEXT:    dsll $2, $2, 33
+; MIPS64EB-NEXT:    daddiu $3, $zero, 383
+; MIPS64EB-NEXT:    dsll $4, $3, 23
+; MIPS64EB-NEXT:    dsll $5, $1, 23
+; MIPS64EB-NEXT:    daddiu $1, $2, 523
+; MIPS64EB-NEXT:    dsll $6, $1, 21
+; MIPS64EB-NEXT:    daddiu $1, $zero, 1047
+; MIPS64EB-NEXT:    dsll $1, $1, 29
+; MIPS64EB-NEXT:    daddiu $1, $1, 131
+; MIPS64EB-NEXT:    dsll $7, $1, 23
+; MIPS64EB-NEXT:    ld $25, %call16(float4_extern)($gp)
+; MIPS64EB-NEXT:    jalr $25
+; MIPS64EB-NEXT:    nop
+; MIPS64EB-NEXT:    ld $1, %got_disp(gv4f32)($gp)
+; MIPS64EB-NEXT:    sd $3, 8($1)
+; MIPS64EB-NEXT:    sd $2, 0($1)
+; MIPS64EB-NEXT:    ld $gp, 0($sp) # 8-byte Folded Reload
+; MIPS64EB-NEXT:    ld $ra, 8($sp) # 8-byte Folded Reload
+; MIPS64EB-NEXT:    daddiu $sp, $sp, 16
+; MIPS64EB-NEXT:    jr $ra
+; MIPS64EB-NEXT:    nop
+;
+; MIPS32R5-LABEL: callfloat_4:
+; MIPS32R5:       # %bb.0: # %entry
+; MIPS32R5-NEXT:    addiu $sp, $sp, -80
+; MIPS32R5-NEXT:    .cfi_def_cfa_offset 80
+; MIPS32R5-NEXT:    sw $ra, 76($sp) # 4-byte Folded Spill
+; MIPS32R5-NEXT:    sw $fp, 72($sp) # 4-byte Folded Spill
+; MIPS32R5-NEXT:    .cfi_offset 31, -4
+; MIPS32R5-NEXT:    .cfi_offset 30, -8
+; MIPS32R5-NEXT:    move $fp, $sp
+; MIPS32R5-NEXT:    .cfi_def_cfa_register 30
+; MIPS32R5-NEXT:    addiu $1, $zero, -16
+; MIPS32R5-NEXT:    and $sp, $sp, $1
+; MIPS32R5-NEXT:    lui $1, %hi($CPI38_0)
+; MIPS32R5-NEXT:    addiu $1, $1, %lo($CPI38_0)
+; MIPS32R5-NEXT:    ld.w $w0, 0($1)
+; MIPS32R5-NEXT:    copy_s.w $6, $w0[0]
+; MIPS32R5-NEXT:    copy_s.w $7, $w0[1]
+; MIPS32R5-NEXT:    copy_s.w $1, $w0[2]
+; MIPS32R5-NEXT:    copy_s.w $2, $w0[3]
+; MIPS32R5-NEXT:    lui $3, %hi($CPI38_1)
+; MIPS32R5-NEXT:    addiu $3, $3, %lo($CPI38_1)
+; MIPS32R5-NEXT:    ld.w $w0, 0($3)
+; MIPS32R5-NEXT:    copy_s.w $3, $w0[0]
+; MIPS32R5-NEXT:    copy_s.w $4, $w0[1]
+; MIPS32R5-NEXT:    copy_s.w $5, $w0[2]
+; MIPS32R5-NEXT:    copy_s.w $8, $w0[3]
+; MIPS32R5-NEXT:    sw $8, 36($sp)
+; MIPS32R5-NEXT:    sw $5, 32($sp)
+; MIPS32R5-NEXT:    sw $4, 28($sp)
+; MIPS32R5-NEXT:    sw $3, 24($sp)
+; MIPS32R5-NEXT:    sw $2, 20($sp)
+; MIPS32R5-NEXT:    sw $1, 16($sp)
+; MIPS32R5-NEXT:    addiu $4, $sp, 48
+; MIPS32R5-NEXT:    jal float4_extern
+; MIPS32R5-NEXT:    nop
+; MIPS32R5-NEXT:    lui $1, %hi(gv4f32)
+; MIPS32R5-NEXT:    addiu $1, $1, %lo(gv4f32)
+; MIPS32R5-NEXT:    ld.w $w0, 48($sp)
+; MIPS32R5-NEXT:    st.w $w0, 0($1)
+; MIPS32R5-NEXT:    move $sp, $fp
+; MIPS32R5-NEXT:    lw $fp, 72($sp) # 4-byte Folded Reload
+; MIPS32R5-NEXT:    lw $ra, 76($sp) # 4-byte Folded Reload
+; MIPS32R5-NEXT:    addiu $sp, $sp, 80
+; MIPS32R5-NEXT:    jr $ra
+; MIPS32R5-NEXT:    nop
+;
+; MIPS64R5-LABEL: callfloat_4:
+; MIPS64R5:       # %bb.0: # %entry
+; MIPS64R5-NEXT:    daddiu $sp, $sp, -16
+; MIPS64R5-NEXT:    .cfi_def_cfa_offset 16
+; MIPS64R5-NEXT:    sd $ra, 8($sp) # 8-byte Folded Spill
+; MIPS64R5-NEXT:    sd $gp, 0($sp) # 8-byte Folded Spill
+; MIPS64R5-NEXT:    .cfi_offset 31, -8
+; MIPS64R5-NEXT:    .cfi_offset 28, -16
+; MIPS64R5-NEXT:    lui $1, %hi(%neg(%gp_rel(callfloat_4)))
+; MIPS64R5-NEXT:    daddu $1, $1, $25
+; MIPS64R5-NEXT:    daddiu $gp, $1, %lo(%neg(%gp_rel(callfloat_4)))
+; MIPS64R5-NEXT:    ld $1, %got_page(.LCPI38_0)($gp)
+; MIPS64R5-NEXT:    daddiu $1, $1, %got_ofst(.LCPI38_0)
+; MIPS64R5-NEXT:    ld.d $w0, 0($1)
+; MIPS64R5-NEXT:    copy_s.d $4, $w0[0]
+; MIPS64R5-NEXT:    copy_s.d $5, $w0[1]
+; MIPS64R5-NEXT:    ld $1, %got_page(.LCPI38_1)($gp)
+; MIPS64R5-NEXT:    daddiu $1, $1, %got_ofst(.LCPI38_1)
+; MIPS64R5-NEXT:    ld.d $w0, 0($1)
+; MIPS64R5-NEXT:    copy_s.d $6, $w0[0]
+; MIPS64R5-NEXT:    copy_s.d $7, $w0[1]
+; MIPS64R5-NEXT:    ld $25, %call16(float4_extern)($gp)
+; MIPS64R5-NEXT:    jalr $25
+; MIPS64R5-NEXT:    nop
+; MIPS64R5-NEXT:    ldi.b $w0, 0
+; MIPS64R5-NEXT:    insert.d $w0[0], $2
+; MIPS64R5-NEXT:    insert.d $w0[1], $3
+; MIPS64R5-NEXT:    ld $1, %got_disp(gv4f32)($gp)
+; MIPS64R5-NEXT:    st.d $w0, 0($1)
+; MIPS64R5-NEXT:    ld $gp, 0($sp) # 8-byte Folded Reload
+; MIPS64R5-NEXT:    ld $ra, 8($sp) # 8-byte Folded Reload
+; MIPS64R5-NEXT:    daddiu $sp, $sp, 16
+; MIPS64R5-NEXT:    jr $ra
+; MIPS64R5-NEXT:    nop
+;
+; MIPS64EL-LABEL: callfloat_4:
+; MIPS64EL:       # %bb.0: # %entry
+; MIPS64EL-NEXT:    daddiu $sp, $sp, -16
+; MIPS64EL-NEXT:    .cfi_def_cfa_offset 16
+; MIPS64EL-NEXT:    sd $ra, 8($sp) # 8-byte Folded Spill
+; MIPS64EL-NEXT:    sd $gp, 0($sp) # 8-byte Folded Spill
+; MIPS64EL-NEXT:    .cfi_offset 31, -8
+; MIPS64EL-NEXT:    .cfi_offset 28, -16
+; MIPS64EL-NEXT:    lui $1, %hi(%neg(%gp_rel(callfloat_4)))
+; MIPS64EL-NEXT:    daddu $1, $1, $25
+; MIPS64EL-NEXT:    daddiu $gp, $1, %lo(%neg(%gp_rel(callfloat_4)))
+; MIPS64EL-NEXT:    daddiu $1, $zero, 129
+; MIPS64EL-NEXT:    dsll $1, $1, 25
+; MIPS64EL-NEXT:    daddiu $1, $1, 1
+; MIPS64EL-NEXT:    daddiu $2, $zero, 523
+; MIPS64EL-NEXT:    dsll $2, $2, 31
+; MIPS64EL-NEXT:    daddiu $3, $zero, 383
+; MIPS64EL-NEXT:    dsll $4, $3, 55
+; MIPS64EL-NEXT:    dsll $5, $1, 30
+; MIPS64EL-NEXT:    daddiu $1, $2, 261
+; MIPS64EL-NEXT:    dsll $6, $1, 22
+; MIPS64EL-NEXT:    daddiu $1, $zero, 131
+; MIPS64EL-NEXT:    dsll $1, $1, 35
+; MIPS64EL-NEXT:    daddiu $1, $1, 1047
+; MIPS64EL-NEXT:    dsll $7, $1, 20
+; MIPS64EL-NEXT:    ld $25, %call16(float4_extern)($gp)
+; MIPS64EL-NEXT:    jalr $25
+; MIPS64EL-NEXT:    nop
+; MIPS64EL-NEXT:    ld $1, %got_disp(gv4f32)($gp)
+; MIPS64EL-NEXT:    sd $3, 8($1)
+; MIPS64EL-NEXT:    sd $2, 0($1)
+; MIPS64EL-NEXT:    ld $gp, 0($sp) # 8-byte Folded Reload
+; MIPS64EL-NEXT:    ld $ra, 8($sp) # 8-byte Folded Reload
+; MIPS64EL-NEXT:    daddiu $sp, $sp, 16
+; MIPS64EL-NEXT:    jr $ra
+; MIPS64EL-NEXT:    nop
 entry:
-; ALL-LABEL: callfloat_4:
-
-; MIPS32: sw ${{[0-9]+}}, 36($sp)
-; MIPS32: sw ${{[0-9]+}}, 32($sp)
-; MIPS32: sw ${{[0-9]+}}, 28($sp)
-; MIPS32: sw ${{[0-9]+}}, 24($sp)
-; MIPS32: sw ${{[0-9]+}}, 20($sp)
-; MIPS32: sw ${{[0-9]+}}, 16($sp)
-; MIPS32: addiu $4, $sp, 48
-; MIPS32: addiu $6, $zero, 0
-; MIPS32: lui $7
-
-; MIPS32R5: copy_s.w $6, $w{{[0-9]+}}
-; MIPS32R5: copy_s.w $7, $w{{[0-9]+}}
-; MIPS32R5: sw ${{[0-9]+}}, 36($sp)
-; MIPS32R5: sw ${{[0-9]+}}, 32($sp)
-; MIPS32R5: sw ${{[0-9]+}}, 28($sp)
-; MIPS32R5: sw ${{[0-9]+}}, 24($sp)
-; MIPS32R5: sw ${{[0-9]+}}, 20($sp)
-; MIPS32R5: sw ${{[0-9]+}}, 16($sp)
-; MIPS32R5: addiu $4, $sp, 48
-
-; MIPS64-DAG: dsll $4
-; MIPS64-DAG: dsll $5
-; MIPS64-DAG: dsll $6
-; MIPS64-DAG: dsll $7
-
-; MIPS64R5-DAG: copy_s.d $4, $w{{[0-9]+}}
-; MIPS64R5-DAG: copy_s.d $5, $w{{[0-9]+}}
-; MIPS64R5-DAG: copy_s.d $6, $w{{[0-9]+}}
-; MIPS64R5-DAG: copy_s.d $7, $w{{[0-9]+}}
-
-; MIPS64: jalr $25
-; MIPS32: jal
-
-; MIPS32-DAG: lwc1 $f{{[0-9]+}}, 48($sp)
-; MIPS32-DAG: lwc1 $f{{[0-9]+}}, 52($sp)
-; MIPS32-DAG: lwc1 $f{{[0-9]+}}, 56($sp)
-; MIPS32-DAG: lwc1 $f{{[0-9]+}}, 60($sp)
-
-; MIPS32R5: ld.w $w{{[0-9]+}}, 48($sp)
-
-; MIPS64-DAG: $2
-; MIPS64-DAG: $3
-
-; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][0], $2
-; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][1], $3
-
   %0 = call <4 x float> @float4_extern(<4 x float> <float 0.0, float -1.0, float 2.0, float 4.0>, <4 x float> <float 12.0, float 14.0, float 15.0, float 16.0>)
   store <4 x float> %0, <4 x float> * @gv4f32
   ret void
 }
 
 define void @calldouble_2() {
+; MIPS32EB-LABEL: calldouble_2:
+; MIPS32EB:       # %bb.0: # %entry
+; MIPS32EB-NEXT:    addiu $sp, $sp, -80
+; MIPS32EB-NEXT:    .cfi_def_cfa_offset 80
+; MIPS32EB-NEXT:    sw $ra, 76($sp) # 4-byte Folded Spill
+; MIPS32EB-NEXT:    sw $fp, 72($sp) # 4-byte Folded Spill
+; MIPS32EB-NEXT:    .cfi_offset 31, -4
+; MIPS32EB-NEXT:    .cfi_offset 30, -8
+; MIPS32EB-NEXT:    move $fp, $sp
+; MIPS32EB-NEXT:    .cfi_def_cfa_register 30
+; MIPS32EB-NEXT:    addiu $1, $zero, -16
+; MIPS32EB-NEXT:    and $sp, $sp, $1
+; MIPS32EB-NEXT:    lui $1, 16424
+; MIPS32EB-NEXT:    lui $2, 16428
+; MIPS32EB-NEXT:    sw $2, 32($sp)
+; MIPS32EB-NEXT:    sw $1, 24($sp)
+; MIPS32EB-NEXT:    lui $1, 49136
+; MIPS32EB-NEXT:    sw $1, 16($sp)
+; MIPS32EB-NEXT:    sw $zero, 36($sp)
+; MIPS32EB-NEXT:    sw $zero, 28($sp)
+; MIPS32EB-NEXT:    sw $zero, 20($sp)
+; MIPS32EB-NEXT:    addiu $4, $sp, 48
+; MIPS32EB-NEXT:    addiu $6, $zero, 0
+; MIPS32EB-NEXT:    addiu $7, $zero, 0
+; MIPS32EB-NEXT:    jal double2_extern
+; MIPS32EB-NEXT:    nop
+; MIPS32EB-NEXT:    lui $1, %hi(gv2f64)
+; MIPS32EB-NEXT:    addiu $2, $1, %lo(gv2f64)
+; MIPS32EB-NEXT:    ldc1 $f0, 56($sp)
+; MIPS32EB-NEXT:    sdc1 $f0, 8($2)
+; MIPS32EB-NEXT:    ldc1 $f0, 48($sp)
+; MIPS32EB-NEXT:    sdc1 $f0, %lo(gv2f64)($1)
+; MIPS32EB-NEXT:    move $sp, $fp
+; MIPS32EB-NEXT:    lw $fp, 72($sp) # 4-byte Folded Reload
+; MIPS32EB-NEXT:    lw $ra, 76($sp) # 4-byte Folded Reload
+; MIPS32EB-NEXT:    addiu $sp, $sp, 80
+; MIPS32EB-NEXT:    jr $ra
+; MIPS32EB-NEXT:    nop
+;
+; MIPS64-LABEL: calldouble_2:
+; MIPS64:       # %bb.0: # %entry
+; MIPS64-NEXT:    daddiu $sp, $sp, -16
+; MIPS64-NEXT:    .cfi_def_cfa_offset 16
+; MIPS64-NEXT:    sd $ra, 8($sp) # 8-byte Folded Spill
+; MIPS64-NEXT:    sd $gp, 0($sp) # 8-byte Folded Spill
+; MIPS64-NEXT:    .cfi_offset 31, -8
+; MIPS64-NEXT:    .cfi_offset 28, -16
+; MIPS64-NEXT:    lui $1, %hi(%neg(%gp_rel(calldouble_2)))
+; MIPS64-NEXT:    daddu $1, $1, $25
+; MIPS64-NEXT:    daddiu $gp, $1, %lo(%neg(%gp_rel(calldouble_2)))
+; MIPS64-NEXT:    daddiu $1, $zero, 3071
+; MIPS64-NEXT:    dsll $5, $1, 52
+; MIPS64-NEXT:    daddiu $1, $zero, 2053
+; MIPS64-NEXT:    dsll $6, $1, 51
+; MIPS64-NEXT:    daddiu $1, $zero, 4107
+; MIPS64-NEXT:    dsll $7, $1, 50
+; MIPS64-NEXT:    ld $25, %call16(double2_extern)($gp)
+; MIPS64-NEXT:    daddiu $4, $zero, 0
+; MIPS64-NEXT:    jalr $25
+; MIPS64-NEXT:    nop
+; MIPS64-NEXT:    ld $1, %got_disp(gv2f64)($gp)
+; MIPS64-NEXT:    sd $3, 8($1)
+; MIPS64-NEXT:    sd $2, 0($1)
+; MIPS64-NEXT:    ld $gp, 0($sp) # 8-byte Folded Reload
+; MIPS64-NEXT:    ld $ra, 8($sp) # 8-byte Folded Reload
+; MIPS64-NEXT:    daddiu $sp, $sp, 16
+; MIPS64-NEXT:    jr $ra
+; MIPS64-NEXT:    nop
+;
+; MIPS32R5-LABEL: calldouble_2:
+; MIPS32R5:       # %bb.0: # %entry
+; MIPS32R5-NEXT:    addiu $sp, $sp, -80
+; MIPS32R5-NEXT:    .cfi_def_cfa_offset 80
+; MIPS32R5-NEXT:    sw $ra, 76($sp) # 4-byte Folded Spill
+; MIPS32R5-NEXT:    sw $fp, 72($sp) # 4-byte Folded Spill
+; MIPS32R5-NEXT:    .cfi_offset 31, -4
+; MIPS32R5-NEXT:    .cfi_offset 30, -8
+; MIPS32R5-NEXT:    move $fp, $sp
+; MIPS32R5-NEXT:    .cfi_def_cfa_register 30
+; MIPS32R5-NEXT:    addiu $1, $zero, -16
+; MIPS32R5-NEXT:    and $sp, $sp, $1
+; MIPS32R5-NEXT:    lui $1, %hi($CPI39_0)
+; MIPS32R5-NEXT:    addiu $1, $1, %lo($CPI39_0)
+; MIPS32R5-NEXT:    ld.w $w0, 0($1)
+; MIPS32R5-NEXT:    copy_s.w $6, $w0[0]
+; MIPS32R5-NEXT:    copy_s.w $7, $w0[1]
+; MIPS32R5-NEXT:    copy_s.w $1, $w0[2]
+; MIPS32R5-NEXT:    copy_s.w $2, $w0[3]
+; MIPS32R5-NEXT:    lui $3, %hi($CPI39_1)
+; MIPS32R5-NEXT:    addiu $3, $3, %lo($CPI39_1)
+; MIPS32R5-NEXT:    ld.w $w0, 0($3)
+; MIPS32R5-NEXT:    copy_s.w $3, $w0[0]
+; MIPS32R5-NEXT:    copy_s.w $4, $w0[1]
+; MIPS32R5-NEXT:    copy_s.w $5, $w0[2]
+; MIPS32R5-NEXT:    copy_s.w $8, $w0[3]
+; MIPS32R5-NEXT:    sw $8, 36($sp)
+; MIPS32R5-NEXT:    sw $5, 32($sp)
+; MIPS32R5-NEXT:    sw $4, 28($sp)
+; MIPS32R5-NEXT:    sw $3, 24($sp)
+; MIPS32R5-NEXT:    sw $2, 20($sp)
+; MIPS32R5-NEXT:    sw $1, 16($sp)
+; MIPS32R5-NEXT:    addiu $4, $sp, 48
+; MIPS32R5-NEXT:    jal double2_extern
+; MIPS32R5-NEXT:    nop
+; MIPS32R5-NEXT:    lui $1, %hi(gv2f64)
+; MIPS32R5-NEXT:    addiu $1, $1, %lo(gv2f64)
+; MIPS32R5-NEXT:    ld.d $w0, 48($sp)
+; MIPS32R5-NEXT:    st.d $w0, 0($1)
+; MIPS32R5-NEXT:    move $sp, $fp
+; MIPS32R5-NEXT:    lw $fp, 72($sp) # 4-byte Folded Reload
+; MIPS32R5-NEXT:    lw $ra, 76($sp) # 4-byte Folded Reload
+; MIPS32R5-NEXT:    addiu $sp, $sp, 80
+; MIPS32R5-NEXT:    jr $ra
+; MIPS32R5-NEXT:    nop
+;
+; MIPS64R5-LABEL: calldouble_2:
+; MIPS64R5:       # %bb.0: # %entry
+; MIPS64R5-NEXT:    daddiu $sp, $sp, -16
+; MIPS64R5-NEXT:    .cfi_def_cfa_offset 16
+; MIPS64R5-NEXT:    sd $ra, 8($sp) # 8-byte Folded Spill
+; MIPS64R5-NEXT:    sd $gp, 0($sp) # 8-byte Folded Spill
+; MIPS64R5-NEXT:    .cfi_offset 31, -8
+; MIPS64R5-NEXT:    .cfi_offset 28, -16
+; MIPS64R5-NEXT:    lui $1, %hi(%neg(%gp_rel(calldouble_2)))
+; MIPS64R5-NEXT:    daddu $1, $1, $25
+; MIPS64R5-NEXT:    daddiu $gp, $1, %lo(%neg(%gp_rel(calldouble_2)))
+; MIPS64R5-NEXT:    ld $1, %got_page(.LCPI39_0)($gp)
+; MIPS64R5-NEXT:    daddiu $1, $1, %got_ofst(.LCPI39_0)
+; MIPS64R5-NEXT:    ld.d $w0, 0($1)
+; MIPS64R5-NEXT:    copy_s.d $4, $w0[0]
+; MIPS64R5-NEXT:    copy_s.d $5, $w0[1]
+; MIPS64R5-NEXT:    ld $1, %got_page(.LCPI39_1)($gp)
+; MIPS64R5-NEXT:    daddiu $1, $1, %got_ofst(.LCPI39_1)
+; MIPS64R5-NEXT:    ld.d $w0, 0($1)
+; MIPS64R5-NEXT:    copy_s.d $6, $w0[0]
+; MIPS64R5-NEXT:    copy_s.d $7, $w0[1]
+; MIPS64R5-NEXT:    ld $25, %call16(double2_extern)($gp)
+; MIPS64R5-NEXT:    jalr $25
+; MIPS64R5-NEXT:    nop
+; MIPS64R5-NEXT:    ldi.b $w0, 0
+; MIPS64R5-NEXT:    insert.d $w0[0], $2
+; MIPS64R5-NEXT:    insert.d $w0[1], $3
+; MIPS64R5-NEXT:    ld $1, %got_disp(gv2f64)($gp)
+; MIPS64R5-NEXT:    st.d $w0, 0($1)
+; MIPS64R5-NEXT:    ld $gp, 0($sp) # 8-byte Folded Reload
+; MIPS64R5-NEXT:    ld $ra, 8($sp) # 8-byte Folded Reload
+; MIPS64R5-NEXT:    daddiu $sp, $sp, 16
+; MIPS64R5-NEXT:    jr $ra
+; MIPS64R5-NEXT:    nop
+;
+; MIPS32EL-LABEL: calldouble_2:
+; MIPS32EL:       # %bb.0: # %entry
+; MIPS32EL-NEXT:    addiu $sp, $sp, -80
+; MIPS32EL-NEXT:    .cfi_def_cfa_offset 80
+; MIPS32EL-NEXT:    sw $ra, 76($sp) # 4-byte Folded Spill
+; MIPS32EL-NEXT:    sw $fp, 72($sp) # 4-byte Folded Spill
+; MIPS32EL-NEXT:    .cfi_offset 31, -4
+; MIPS32EL-NEXT:    .cfi_offset 30, -8
+; MIPS32EL-NEXT:    move $fp, $sp
+; MIPS32EL-NEXT:    .cfi_def_cfa_register 30
+; MIPS32EL-NEXT:    addiu $1, $zero, -16
+; MIPS32EL-NEXT:    and $sp, $sp, $1
+; MIPS32EL-NEXT:    lui $1, 16424
+; MIPS32EL-NEXT:    lui $2, 16428
+; MIPS32EL-NEXT:    sw $2, 36($sp)
+; MIPS32EL-NEXT:    sw $1, 28($sp)
+; MIPS32EL-NEXT:    lui $1, 49136
+; MIPS32EL-NEXT:    sw $1, 20($sp)
+; MIPS32EL-NEXT:    sw $zero, 32($sp)
+; MIPS32EL-NEXT:    sw $zero, 24($sp)
+; MIPS32EL-NEXT:    sw $zero, 16($sp)
+; MIPS32EL-NEXT:    addiu $4, $sp, 48
+; MIPS32EL-NEXT:    addiu $6, $zero, 0
+; MIPS32EL-NEXT:    addiu $7, $zero, 0
+; MIPS32EL-NEXT:    jal double2_extern
+; MIPS32EL-NEXT:    nop
+; MIPS32EL-NEXT:    lui $1, %hi(gv2f64)
+; MIPS32EL-NEXT:    addiu $2, $1, %lo(gv2f64)
+; MIPS32EL-NEXT:    ldc1 $f0, 56($sp)
+; MIPS32EL-NEXT:    sdc1 $f0, 8($2)
+; MIPS32EL-NEXT:    ldc1 $f0, 48($sp)
+; MIPS32EL-NEXT:    sdc1 $f0, %lo(gv2f64)($1)
+; MIPS32EL-NEXT:    move $sp, $fp
+; MIPS32EL-NEXT:    lw $fp, 72($sp) # 4-byte Folded Reload
+; MIPS32EL-NEXT:    lw $ra, 76($sp) # 4-byte Folded Reload
+; MIPS32EL-NEXT:    addiu $sp, $sp, 80
+; MIPS32EL-NEXT:    jr $ra
+; MIPS32EL-NEXT:    nop
 entry:
-; ALL-LABEL: calldouble_2:
-
-; MIPS32-DAG: sw ${{[0-9a-z]+}}, 36($sp)
-; MIPS32-DAG: sw ${{[0-9a-z]+}}, 32($sp)
-; MIPS32-DAG: sw ${{[0-9a-z]+}}, 28($sp)
-; MIPS32-DAG: sw ${{[0-9a-z]+}}, 24($sp)
-; MIPS32-DAG: sw ${{[0-9a-z]+}}, 20($sp)
-; MIPS32-DAG: sw ${{[0-9a-z]+}}, 16($sp)
-
-; MIPS32-DAG: addiu $4, $sp, [[R0:[0-9]+]]
-; MIPS32-DAG: addiu $6, $zero, 0
-; MIPS32-DAG: addiu $7, $zero, 0
-
-; MIPS32R5-DAG: copy_s.w $4, $w{{[0-9]+}}
-; MIPS32R5-DAG: copy_s.w $5, $w{{[0-9]+}}
-; MIPS32R5-DAG: copy_s.w $6, $w{{[0-9]+}}
-; MIPS32R5-DAG: copy_s.w $7, $w{{[0-9]+}}
-
-; MIPS32R5-DAG: sw  ${{[0-9]+}}, 36($sp)
-; MIPS32R5-DAG: sw  ${{[0-9]+}}, 32($sp)
-; MIPS32R5-DAG: sw  ${{[0-9]+}}, 28($sp)
-; MIPS32R5-DAG: sw  ${{[0-9]+}}, 24($sp)
-; MIPS32R5-DAG: sw  ${{[0-9]+}}, 20($sp)
-; MIPS32R5-DAG: sw  ${{[0-9]+}}, 16($sp)
-
-; MIPS64-DAG: dsll $5
-; MIPS64-DAG: dsll $6
-; MIPS64-DAG: dsll $7
-; MIPS64-DAG: daddiu $4
-
-; MIPS64R5-DAG: copy_s.d $4, $w{{[0-9]+}}
-; MIPS64R5-DAG: copy_s.d $5, $w{{[0-9]+}}
-; MIPS64R5-DAG: copy_s.d $6, $w{{[0-9]+}}
-; MIPS64R5-DAG: copy_s.d $7, $w{{[0-9]+}}
-
-; MIPS32: jal double2_extern
-; MIPS64: jalr $25
-
-; MIPS32-DAG: ldc1 $f[[F0:[0-9]+]], 48($sp)
-; MIPS32-DAG: ldc1 $f[[F1:[0-9]+]], 56($sp)
-
-; MIPS32-DAG: sdc1 $f[[F1]], 8(${{[0-9]+}})
-; MIPS32-DAG: sdc1 $f[[F0]], %lo(gv2f64)(${{[0-9]+}})
-
-; MIPS32R5: ld.d $w[[W0:[0-9]+]], 48($sp)
-; MIPS32R5: st.d $w[[W0]], 0(${{[0-9]+}})
-
-; MIPS64-DAG: sd $2
-; MIPS64-DAG: sd $3
-
-; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][0], $2
-; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][1], $3
-
   %0 = call <2 x double> @double2_extern(<2 x double> <double 0.0, double -1.0>, <2 x double> <double 12.0, double 14.0>)
   store <2 x double> %0, <2 x double> * @gv2f64
   ret void
@@ -1439,65 +6395,126 @@ entry:
 ; in argument passing.
 
 define float @mixed_i8(<2 x float> %a, i8 %b, <2 x float> %c) {
+; MIPS32-LABEL: mixed_i8:
+; MIPS32:       # %bb.0: # %entry
+; MIPS32-NEXT:    mtc1 $5, $f0
+; MIPS32-NEXT:    andi $1, $6, 255
+; MIPS32-NEXT:    mtc1 $1, $f1
+; MIPS32-NEXT:    cvt.s.w $f1, $f1
+; MIPS32-NEXT:    add.s $f0, $f1, $f0
+; MIPS32-NEXT:    lwc1 $f2, 20($sp)
+; MIPS32-NEXT:    add.s $f0, $f0, $f2
+; MIPS32-NEXT:    mtc1 $4, $f2
+; MIPS32-NEXT:    add.s $f1, $f1, $f2
+; MIPS32-NEXT:    lwc1 $f2, 16($sp)
+; MIPS32-NEXT:    add.s $f1, $f1, $f2
+; MIPS32-NEXT:    add.s $f0, $f1, $f0
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
+;
+; MIPS64EB-LABEL: mixed_i8:
+; MIPS64EB:       # %bb.0: # %entry
+; MIPS64EB-NEXT:    sll $1, $4, 0
+; MIPS64EB-NEXT:    mtc1 $1, $f0
+; MIPS64EB-NEXT:    sll $1, $5, 0
+; MIPS64EB-NEXT:    andi $1, $1, 255
+; MIPS64EB-NEXT:    mtc1 $1, $f1
+; MIPS64EB-NEXT:    cvt.s.w $f1, $f1
+; MIPS64EB-NEXT:    add.s $f0, $f1, $f0
+; MIPS64EB-NEXT:    dsrl $1, $4, 32
+; MIPS64EB-NEXT:    sll $1, $1, 0
+; MIPS64EB-NEXT:    sll $2, $6, 0
+; MIPS64EB-NEXT:    mtc1 $2, $f2
+; MIPS64EB-NEXT:    add.s $f0, $f0, $f2
+; MIPS64EB-NEXT:    mtc1 $1, $f2
+; MIPS64EB-NEXT:    add.s $f1, $f1, $f2
+; MIPS64EB-NEXT:    dsrl $1, $6, 32
+; MIPS64EB-NEXT:    sll $1, $1, 0
+; MIPS64EB-NEXT:    mtc1 $1, $f2
+; MIPS64EB-NEXT:    add.s $f1, $f1, $f2
+; MIPS64EB-NEXT:    add.s $f0, $f1, $f0
+; MIPS64EB-NEXT:    jr $ra
+; MIPS64EB-NEXT:    nop
+;
+; MIPS32R5-LABEL: mixed_i8:
+; MIPS32R5:       # %bb.0: # %entry
+; MIPS32R5-NEXT:    addiu $sp, $sp, -64
+; MIPS32R5-NEXT:    .cfi_def_cfa_offset 64
+; MIPS32R5-NEXT:    sw $fp, 60($sp) # 4-byte Folded Spill
+; MIPS32R5-NEXT:    .cfi_offset 30, -4
+; MIPS32R5-NEXT:    move $fp, $sp
+; MIPS32R5-NEXT:    .cfi_def_cfa_register 30
+; MIPS32R5-NEXT:    addiu $1, $zero, -16
+; MIPS32R5-NEXT:    and $sp, $sp, $1
+; MIPS32R5-NEXT:    andi $1, $6, 255
+; MIPS32R5-NEXT:    sw $1, 36($sp)
+; MIPS32R5-NEXT:    sw $1, 32($sp)
+; MIPS32R5-NEXT:    sw $5, 4($sp)
+; MIPS32R5-NEXT:    sw $4, 0($sp)
+; MIPS32R5-NEXT:    ld.w $w0, 32($sp)
+; MIPS32R5-NEXT:    ffint_s.w $w0, $w0
+; MIPS32R5-NEXT:    ld.w $w1, 0($sp)
+; MIPS32R5-NEXT:    fadd.w $w0, $w0, $w1
+; MIPS32R5-NEXT:    lw $1, 84($fp)
+; MIPS32R5-NEXT:    sw $1, 20($sp)
+; MIPS32R5-NEXT:    lw $1, 80($fp)
+; MIPS32R5-NEXT:    sw $1, 16($sp)
+; MIPS32R5-NEXT:    ld.w $w1, 16($sp)
+; MIPS32R5-NEXT:    fadd.w $w0, $w0, $w1
+; MIPS32R5-NEXT:    splati.w $w1, $w0[1]
+; MIPS32R5-NEXT:    add.s $f0, $f0, $f1
+; MIPS32R5-NEXT:    move $sp, $fp
+; MIPS32R5-NEXT:    lw $fp, 60($sp) # 4-byte Folded Reload
+; MIPS32R5-NEXT:    addiu $sp, $sp, 64
+; MIPS32R5-NEXT:    jr $ra
+; MIPS32R5-NEXT:    nop
+;
+; MIPS64R5-LABEL: mixed_i8:
+; MIPS64R5:       # %bb.0: # %entry
+; MIPS64R5-NEXT:    daddiu $sp, $sp, -48
+; MIPS64R5-NEXT:    .cfi_def_cfa_offset 48
+; MIPS64R5-NEXT:    sll $1, $5, 0
+; MIPS64R5-NEXT:    andi $1, $1, 255
+; MIPS64R5-NEXT:    sw $1, 36($sp)
+; MIPS64R5-NEXT:    sw $1, 32($sp)
+; MIPS64R5-NEXT:    sd $4, 16($sp)
+; MIPS64R5-NEXT:    ld.w $w0, 32($sp)
+; MIPS64R5-NEXT:    ffint_s.w $w0, $w0
+; MIPS64R5-NEXT:    ld.w $w1, 16($sp)
+; MIPS64R5-NEXT:    fadd.w $w0, $w0, $w1
+; MIPS64R5-NEXT:    sd $6, 0($sp)
+; MIPS64R5-NEXT:    ld.w $w1, 0($sp)
+; MIPS64R5-NEXT:    fadd.w $w0, $w0, $w1
+; MIPS64R5-NEXT:    splati.w $w1, $w0[1]
+; MIPS64R5-NEXT:    add.s $f0, $f0, $f1
+; MIPS64R5-NEXT:    daddiu $sp, $sp, 48
+; MIPS64R5-NEXT:    jr $ra
+; MIPS64R5-NEXT:    nop
+;
+; MIPS64EL-LABEL: mixed_i8:
+; MIPS64EL:       # %bb.0: # %entry
+; MIPS64EL-NEXT:    dsrl $1, $4, 32
+; MIPS64EL-NEXT:    sll $1, $1, 0
+; MIPS64EL-NEXT:    mtc1 $1, $f0
+; MIPS64EL-NEXT:    sll $1, $5, 0
+; MIPS64EL-NEXT:    andi $1, $1, 255
+; MIPS64EL-NEXT:    mtc1 $1, $f1
+; MIPS64EL-NEXT:    cvt.s.w $f1, $f1
+; MIPS64EL-NEXT:    add.s $f0, $f1, $f0
+; MIPS64EL-NEXT:    dsrl $1, $6, 32
+; MIPS64EL-NEXT:    sll $1, $1, 0
+; MIPS64EL-NEXT:    mtc1 $1, $f2
+; MIPS64EL-NEXT:    add.s $f0, $f0, $f2
+; MIPS64EL-NEXT:    sll $1, $4, 0
+; MIPS64EL-NEXT:    mtc1 $1, $f2
+; MIPS64EL-NEXT:    add.s $f1, $f1, $f2
+; MIPS64EL-NEXT:    sll $1, $6, 0
+; MIPS64EL-NEXT:    mtc1 $1, $f2
+; MIPS64EL-NEXT:    add.s $f1, $f1, $f2
+; MIPS64EL-NEXT:    add.s $f0, $f1, $f0
+; MIPS64EL-NEXT:    jr $ra
+; MIPS64EL-NEXT:    nop
 entry:
-; ALL-LABEL: mixed_i8:
-
-; MIPS32-DAG: mtc1 $5, $f{{[0-9]+}}
-; MIPS32: andi $[[R7:[0-9]+]], $6, 255
-; MIPS32: mtc1 $[[R7]], $f[[F0:[0-9]+]]
-; MIPS32: cvt.s.w $f{{[0-9]+}}, $f[[F0]]
-
-; MIPS32-DAG: mtc1 $4, $f{{[0-9]+}}
-; MIPS32-DAG: lwc1 $f{{[0-9]+}}, 16($sp)
-; MIPS32-DAG: lwc1 $f{{[0-9]+}}, 20($sp)
-; MIPS32-DAG: add.s $f0, $f{{[0-9]+}}, $f{{[0-9]+}}
-
-; MIPS32R5: andi $[[R0:[0-9]+]], $6, 255
-; MIPS32R5: sw $[[R0]], {{[0-9]+}}($sp)
-; MIPS32R5: sw $[[R0]], {{[0-9]+}}($sp)
-; MIPS32R5-DAG: sw $5, {{[0-9]+}}($sp)
-; MIPS32R5-DAG: sw $4, {{[0-9]+}}($sp)
-
-; MIPS64EB-DAG: sll $[[R0:[0-9]+]], $4, 0
-; MIPS64EB-DAG: mtc1 $[[R0]], $f{{[0-9]+}}
-; MIPS64EB: sll $[[R6:[0-9]+]], $5, 0
-; MIPS64EB: andi $[[R7:[0-9]+]], $[[R6]], 255
-; MIPS64EB: mtc1 $[[R7]], $f[[F0:[0-9]+]]
-; MIPS64EB: cvt.s.w $f{{[0-9]+}}, $f[[F0]]
-
-; MIPS64EB-DAG: dsrl $[[R1:[0-9]+]], $4, 32
-; MIPS64EB-DAG: sll $[[R2:[0-9]+]], $[[R1]], 0
-; MIPS64EB-DAG: mtc1 $[[R2:[0-9]+]], $f{{[0-9]+}}
-
-; MIPS64EB-DAG: sll $[[R3:[0-9]+]], $6, 0
-; MIPS64EB-DAG: mtc1 $[[R3]], $f{{[0-9]+}}
-; MIPS64EB-DAG: dsrl $[[R4:[0-9]+]], $6, 32
-; MIPS64EB-DAG: sll $[[R5:[0-9]+]], $[[R4]], 0
-; MIPS64EB-DAG: mtc1 $[[R5:[0-9]+]], $f{{[0-9]+}}
-
-; MIPS64EL-DAG: dsrl $[[R1:[0-9]+]], $4, 32
-; MIPS64EL-DAG: sll $[[R2:[0-9]+]], $[[R1]], 0
-; MIPS64EL-DAG: mtc1 $[[R2:[0-9]+]], $f{{[0-9]+}}
-
-; MIPS64EL: sll $[[R6:[0-9]+]], $5, 0
-; MIPS64EL: andi $[[R7:[0-9]+]], $[[R6]], 255
-; MIPS64EL: mtc1 $[[R7]], $f[[F0:[0-9]+]]
-; MIPS64EL: cvt.s.w $f{{[0-9]+}}, $f[[F0]]
-
-; MIPS64EL-DAG: dsrl $[[R4:[0-9]+]], $6, 32
-; MIPS64EL-DAG: sll $[[R5:[0-9]+]], $[[R4]], 0
-; MIPS64EL-DAG: mtc1 $[[R5:[0-9]+]], $f{{[0-9]+}}
-
-; MIPS64EL-DAG: sll $[[R0:[0-9]+]], $4, 0
-; MIPS64EL-DAG: mtc1 $[[R0]], $f{{[0-9]+}}
-; MIPS64EL-DAG: sll $[[R3:[0-9]+]], $6, 0
-; MIPS64EL-DAG: mtc1 $[[R3]], $f{{[0-9]+}}
-
-; MIPS64R5: sll $[[R0:[0-9]+]], $5, 0
-; MIPS64R5: andi $[[R1:[0-9]+]], $[[R0]], 255
-; MIPS64R5: sd $4, {{[0-9]+}}($sp)
-; MIPS64R5: sd $6, {{[0-9]+}}($sp)
-
   %0 = zext i8 %b to i32
   %1 = uitofp i32 %0 to float
   %2 = insertelement <2 x float> undef, float %1, i32 0
@@ -1511,43 +6528,203 @@ entry:
 }
 
 define <4 x float> @mixed_32(<4 x float> %a, i32 %b) {
+; MIPS32EB-LABEL: mixed_32:
+; MIPS32EB:       # %bb.0: # %entry
+; MIPS32EB-NEXT:    addiu $sp, $sp, -8
+; MIPS32EB-NEXT:    .cfi_def_cfa_offset 8
+; MIPS32EB-NEXT:    lui $1, 17200
+; MIPS32EB-NEXT:    sw $1, 0($sp)
+; MIPS32EB-NEXT:    lw $1, 32($sp)
+; MIPS32EB-NEXT:    sw $1, 4($sp)
+; MIPS32EB-NEXT:    lui $1, %hi($CPI41_0)
+; MIPS32EB-NEXT:    ldc1 $f0, %lo($CPI41_0)($1)
+; MIPS32EB-NEXT:    ldc1 $f2, 0($sp)
+; MIPS32EB-NEXT:    sub.d $f0, $f2, $f0
+; MIPS32EB-NEXT:    cvt.s.d $f0, $f0
+; MIPS32EB-NEXT:    lwc1 $f1, 28($sp)
+; MIPS32EB-NEXT:    lwc1 $f2, 24($sp)
+; MIPS32EB-NEXT:    add.s $f2, $f0, $f2
+; MIPS32EB-NEXT:    add.s $f1, $f0, $f1
+; MIPS32EB-NEXT:    swc1 $f1, 12($4)
+; MIPS32EB-NEXT:    swc1 $f2, 8($4)
+; MIPS32EB-NEXT:    mtc1 $7, $f1
+; MIPS32EB-NEXT:    add.s $f1, $f0, $f1
+; MIPS32EB-NEXT:    swc1 $f1, 4($4)
+; MIPS32EB-NEXT:    mtc1 $6, $f1
+; MIPS32EB-NEXT:    add.s $f0, $f0, $f1
+; MIPS32EB-NEXT:    swc1 $f0, 0($4)
+; MIPS32EB-NEXT:    addiu $sp, $sp, 8
+; MIPS32EB-NEXT:    jr $ra
+; MIPS32EB-NEXT:    nop
+;
+; MIPS64EB-LABEL: mixed_32:
+; MIPS64EB:       # %bb.0: # %entry
+; MIPS64EB-NEXT:    daddiu $sp, $sp, -16
+; MIPS64EB-NEXT:    .cfi_def_cfa_offset 16
+; MIPS64EB-NEXT:    lui $1, %hi(%neg(%gp_rel(mixed_32)))
+; MIPS64EB-NEXT:    daddu $1, $1, $25
+; MIPS64EB-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(mixed_32)))
+; MIPS64EB-NEXT:    lui $2, 17200
+; MIPS64EB-NEXT:    sw $2, 8($sp)
+; MIPS64EB-NEXT:    sll $2, $6, 0
+; MIPS64EB-NEXT:    sw $2, 12($sp)
+; MIPS64EB-NEXT:    ld $1, %got_page(.LCPI41_0)($1)
+; MIPS64EB-NEXT:    ldc1 $f0, %got_ofst(.LCPI41_0)($1)
+; MIPS64EB-NEXT:    ldc1 $f1, 8($sp)
+; MIPS64EB-NEXT:    sub.d $f0, $f1, $f0
+; MIPS64EB-NEXT:    cvt.s.d $f0, $f0
+; MIPS64EB-NEXT:    dsrl $1, $4, 32
+; MIPS64EB-NEXT:    sll $1, $1, 0
+; MIPS64EB-NEXT:    mtc1 $1, $f1
+; MIPS64EB-NEXT:    add.s $f1, $f0, $f1
+; MIPS64EB-NEXT:    dsrl $1, $5, 32
+; MIPS64EB-NEXT:    mfc1 $2, $f1
+; MIPS64EB-NEXT:    sll $3, $4, 0
+; MIPS64EB-NEXT:    sll $1, $1, 0
+; MIPS64EB-NEXT:    mtc1 $1, $f1
+; MIPS64EB-NEXT:    add.s $f1, $f0, $f1
+; MIPS64EB-NEXT:    mfc1 $1, $f1
+; MIPS64EB-NEXT:    mtc1 $3, $f1
+; MIPS64EB-NEXT:    sll $3, $5, 0
+; MIPS64EB-NEXT:    mtc1 $3, $f2
+; MIPS64EB-NEXT:    dsll $2, $2, 32
+; MIPS64EB-NEXT:    add.s $f1, $f0, $f1
+; MIPS64EB-NEXT:    mfc1 $3, $f1
+; MIPS64EB-NEXT:    dsll $3, $3, 32
+; MIPS64EB-NEXT:    dsrl $3, $3, 32
+; MIPS64EB-NEXT:    or $2, $3, $2
+; MIPS64EB-NEXT:    dsll $1, $1, 32
+; MIPS64EB-NEXT:    add.s $f0, $f0, $f2
+; MIPS64EB-NEXT:    mfc1 $3, $f0
+; MIPS64EB-NEXT:    dsll $3, $3, 32
+; MIPS64EB-NEXT:    dsrl $3, $3, 32
+; MIPS64EB-NEXT:    or $3, $3, $1
+; MIPS64EB-NEXT:    daddiu $sp, $sp, 16
+; MIPS64EB-NEXT:    jr $ra
+; MIPS64EB-NEXT:    nop
+;
+; MIPS32R5-LABEL: mixed_32:
+; MIPS32R5:       # %bb.0: # %entry
+; MIPS32R5-NEXT:    ldi.b $w0, 0
+; MIPS32R5-NEXT:    insert.w $w0[0], $6
+; MIPS32R5-NEXT:    insert.w $w0[1], $7
+; MIPS32R5-NEXT:    lw $1, 16($sp)
+; MIPS32R5-NEXT:    insert.w $w0[2], $1
+; MIPS32R5-NEXT:    lw $1, 20($sp)
+; MIPS32R5-NEXT:    insert.w $w0[3], $1
+; MIPS32R5-NEXT:    lw $1, 24($sp)
+; MIPS32R5-NEXT:    fill.w $w1, $1
+; MIPS32R5-NEXT:    ffint_u.w $w1, $w1
+; MIPS32R5-NEXT:    fadd.w $w0, $w1, $w0
+; MIPS32R5-NEXT:    st.w $w0, 0($4)
+; MIPS32R5-NEXT:    jr $ra
+; MIPS32R5-NEXT:    nop
+;
+; MIPS64R5EB-LABEL: mixed_32:
+; MIPS64R5EB:       # %bb.0: # %entry
+; MIPS64R5EB-NEXT:    ldi.b $w0, 0
+; MIPS64R5EB-NEXT:    insert.d $w0[0], $4
+; MIPS64R5EB-NEXT:    insert.d $w0[1], $5
+; MIPS64R5EB-NEXT:    shf.w $w0, $w0, 177
+; MIPS64R5EB-NEXT:    sll $1, $6, 0
+; MIPS64R5EB-NEXT:    fill.w $w1, $1
+; MIPS64R5EB-NEXT:    ffint_u.w $w1, $w1
+; MIPS64R5EB-NEXT:    fadd.w $w0, $w1, $w0
+; MIPS64R5EB-NEXT:    shf.w $w0, $w0, 177
+; MIPS64R5EB-NEXT:    copy_s.d $2, $w0[0]
+; MIPS64R5EB-NEXT:    copy_s.d $3, $w0[1]
+; MIPS64R5EB-NEXT:    jr $ra
+; MIPS64R5EB-NEXT:    nop
+;
+; MIPS32EL-LABEL: mixed_32:
+; MIPS32EL:       # %bb.0: # %entry
+; MIPS32EL-NEXT:    addiu $sp, $sp, -8
+; MIPS32EL-NEXT:    .cfi_def_cfa_offset 8
+; MIPS32EL-NEXT:    lui $1, 17200
+; MIPS32EL-NEXT:    sw $1, 4($sp)
+; MIPS32EL-NEXT:    lw $1, 32($sp)
+; MIPS32EL-NEXT:    sw $1, 0($sp)
+; MIPS32EL-NEXT:    lui $1, %hi($CPI41_0)
+; MIPS32EL-NEXT:    ldc1 $f0, %lo($CPI41_0)($1)
+; MIPS32EL-NEXT:    ldc1 $f2, 0($sp)
+; MIPS32EL-NEXT:    sub.d $f0, $f2, $f0
+; MIPS32EL-NEXT:    cvt.s.d $f0, $f0
+; MIPS32EL-NEXT:    lwc1 $f1, 28($sp)
+; MIPS32EL-NEXT:    lwc1 $f2, 24($sp)
+; MIPS32EL-NEXT:    add.s $f2, $f0, $f2
+; MIPS32EL-NEXT:    add.s $f1, $f0, $f1
+; MIPS32EL-NEXT:    swc1 $f1, 12($4)
+; MIPS32EL-NEXT:    swc1 $f2, 8($4)
+; MIPS32EL-NEXT:    mtc1 $7, $f1
+; MIPS32EL-NEXT:    add.s $f1, $f0, $f1
+; MIPS32EL-NEXT:    swc1 $f1, 4($4)
+; MIPS32EL-NEXT:    mtc1 $6, $f1
+; MIPS32EL-NEXT:    add.s $f0, $f0, $f1
+; MIPS32EL-NEXT:    swc1 $f0, 0($4)
+; MIPS32EL-NEXT:    addiu $sp, $sp, 8
+; MIPS32EL-NEXT:    jr $ra
+; MIPS32EL-NEXT:    nop
+;
+; MIPS64EL-LABEL: mixed_32:
+; MIPS64EL:       # %bb.0: # %entry
+; MIPS64EL-NEXT:    daddiu $sp, $sp, -16
+; MIPS64EL-NEXT:    .cfi_def_cfa_offset 16
+; MIPS64EL-NEXT:    lui $1, %hi(%neg(%gp_rel(mixed_32)))
+; MIPS64EL-NEXT:    daddu $1, $1, $25
+; MIPS64EL-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(mixed_32)))
+; MIPS64EL-NEXT:    lui $2, 17200
+; MIPS64EL-NEXT:    sw $2, 12($sp)
+; MIPS64EL-NEXT:    sll $2, $6, 0
+; MIPS64EL-NEXT:    sw $2, 8($sp)
+; MIPS64EL-NEXT:    ld $1, %got_page(.LCPI41_0)($1)
+; MIPS64EL-NEXT:    ldc1 $f0, %got_ofst(.LCPI41_0)($1)
+; MIPS64EL-NEXT:    ldc1 $f1, 8($sp)
+; MIPS64EL-NEXT:    sub.d $f0, $f1, $f0
+; MIPS64EL-NEXT:    cvt.s.d $f0, $f0
+; MIPS64EL-NEXT:    dsrl $1, $4, 32
+; MIPS64EL-NEXT:    sll $1, $1, 0
+; MIPS64EL-NEXT:    mtc1 $1, $f1
+; MIPS64EL-NEXT:    add.s $f1, $f0, $f1
+; MIPS64EL-NEXT:    dsrl $1, $5, 32
+; MIPS64EL-NEXT:    mfc1 $2, $f1
+; MIPS64EL-NEXT:    sll $3, $4, 0
+; MIPS64EL-NEXT:    sll $1, $1, 0
+; MIPS64EL-NEXT:    mtc1 $1, $f1
+; MIPS64EL-NEXT:    add.s $f1, $f0, $f1
+; MIPS64EL-NEXT:    mfc1 $1, $f1
+; MIPS64EL-NEXT:    mtc1 $3, $f1
+; MIPS64EL-NEXT:    sll $3, $5, 0
+; MIPS64EL-NEXT:    mtc1 $3, $f2
+; MIPS64EL-NEXT:    dsll $2, $2, 32
+; MIPS64EL-NEXT:    add.s $f1, $f0, $f1
+; MIPS64EL-NEXT:    mfc1 $3, $f1
+; MIPS64EL-NEXT:    dsll $3, $3, 32
+; MIPS64EL-NEXT:    dsrl $3, $3, 32
+; MIPS64EL-NEXT:    or $2, $3, $2
+; MIPS64EL-NEXT:    dsll $1, $1, 32
+; MIPS64EL-NEXT:    add.s $f0, $f0, $f2
+; MIPS64EL-NEXT:    mfc1 $3, $f0
+; MIPS64EL-NEXT:    dsll $3, $3, 32
+; MIPS64EL-NEXT:    dsrl $3, $3, 32
+; MIPS64EL-NEXT:    or $3, $3, $1
+; MIPS64EL-NEXT:    daddiu $sp, $sp, 16
+; MIPS64EL-NEXT:    jr $ra
+; MIPS64EL-NEXT:    nop
+;
+; MIPS64R5EL-LABEL: mixed_32:
+; MIPS64R5EL:       # %bb.0: # %entry
+; MIPS64R5EL-NEXT:    ldi.b $w0, 0
+; MIPS64R5EL-NEXT:    insert.d $w0[0], $4
+; MIPS64R5EL-NEXT:    insert.d $w0[1], $5
+; MIPS64R5EL-NEXT:    sll $1, $6, 0
+; MIPS64R5EL-NEXT:    fill.w $w1, $1
+; MIPS64R5EL-NEXT:    ffint_u.w $w1, $w1
+; MIPS64R5EL-NEXT:    fadd.w $w0, $w1, $w0
+; MIPS64R5EL-NEXT:    copy_s.d $2, $w0[0]
+; MIPS64R5EL-NEXT:    copy_s.d $3, $w0[1]
+; MIPS64R5EL-NEXT:    jr $ra
+; MIPS64R5EL-NEXT:    nop
 entry:
-; ALL-LABEL: mixed_32:
-
-; MIPS32-DAG: mtc1 $6, $f{{[0-9]+}}
-; MIPS32-DAG: mtc1 $7, $f{{[0-9]+}}
-; MIPS32-DAG: lwc1 $f{{[0-9]+}}, 28($sp)
-; MIPS32-DAG: lwc1 $f{{[0-9]+}}, 24($sp)
-; MIPS32-DAG: swc1 $f{{[0-9]+}}, 0($4)
-; MIPS32-DAG: swc1 $f{{[0-9]+}}, 4($4)
-; MIPS32-DAG: swc1 $f{{[0-9]+}}, 8($4)
-; MIPS32-DAG: swc1 $f{{[0-9]+}}, 12($4)
-
-; MIPS32R5: insert.w $w[[W0:[0-9]+]][0], $6
-; MIPS32R5: insert.w $w[[W0:[0-9]+]][1], $7
-; MIPS32R5: lw $[[R0:[0-9]+]], 16($sp)
-; MIPS32R5: insert.w $w[[W0:[0-9]+]][2], $[[R0]]
-; MIPS32R5: lw $[[R1:[0-9]+]], 20($sp)
-; MIPS32R5: insert.w $w[[W0:[0-9]+]][3], $[[R1]]
-; MIPS32R5: lw $[[R0:[0-9]+]], 24($sp)
-
-; MIPS64-DAG: sll ${{[0-9]+}}, $6, 0
-; MIPS64-DAG: dsrl $[[R0:[0-9]+]], $4, 32
-; MIPS64-DAG: sll $[[R1:[0-9]+]], $[[R0]], 0
-; MIPS64-DAG: mtc1 $[[R1]], $f{{[0-9]+}}
-; MIPS64-DAG: sll $[[R2:[0-9]+]], $4, 0
-; MIPS64-DAG: dsrl $[[R3:[0-9]+]], $5, 32
-; MIPS64-DAG: sll $[[R4:[0-9]+]], $[[R3]], 0
-; MIPS64-DAG: mtc1 $[[R4]], $f{{[0-9]+}}
-; MIPS64-DAG: mtc1 $[[R2]], $f{{[0-9]+}}
-; MIPS64-DAG: sll	$[[R6:[0-9]+]], $5, 0
-; MIPS64-DAG: mtc1 $[[R6:[0-9]+]], $f{{[0-9]+}}
-
-; MIPS64R5: insert.d $w[[W0:[0-9]+]][0], $4
-; MIPS64R5: insert.d $w[[W0]][1], $5
-; MIPS64R5: sll $[[R0:[0-9]+]], $6, 0
-; MIPS64R5: fill.w $w{{[0-9]+}}, $[[R0]]
-
   %0 = uitofp i32 %b to float
   %1 = insertelement <4 x float> undef, float %0, i32 0
   %2 = insertelement <4 x float> %1, float %0, i32 1
@@ -1563,96 +6740,385 @@ entry:
 ; this function.
 
 define <4 x float> @cast(<4 x i32> %a) {
+; MIPS32EB-LABEL: cast:
+; MIPS32EB:       # %bb.0: # %entry
+; MIPS32EB-NEXT:    addiu $sp, $sp, -32
+; MIPS32EB-NEXT:    .cfi_def_cfa_offset 32
+; MIPS32EB-NEXT:    lw $1, 52($sp)
+; MIPS32EB-NEXT:    lui $2, 17200
+; MIPS32EB-NEXT:    sw $2, 24($sp)
+; MIPS32EB-NEXT:    sw $1, 28($sp)
+; MIPS32EB-NEXT:    lw $1, 48($sp)
+; MIPS32EB-NEXT:    sw $2, 16($sp)
+; MIPS32EB-NEXT:    sw $1, 20($sp)
+; MIPS32EB-NEXT:    lui $1, %hi($CPI42_0)
+; MIPS32EB-NEXT:    sw $2, 8($sp)
+; MIPS32EB-NEXT:    sw $7, 12($sp)
+; MIPS32EB-NEXT:    ldc1 $f0, %lo($CPI42_0)($1)
+; MIPS32EB-NEXT:    ldc1 $f2, 24($sp)
+; MIPS32EB-NEXT:    sub.d $f2, $f2, $f0
+; MIPS32EB-NEXT:    ldc1 $f4, 16($sp)
+; MIPS32EB-NEXT:    sub.d $f4, $f4, $f0
+; MIPS32EB-NEXT:    ldc1 $f6, 8($sp)
+; MIPS32EB-NEXT:    sub.d $f6, $f6, $f0
+; MIPS32EB-NEXT:    cvt.s.d $f6, $f6
+; MIPS32EB-NEXT:    cvt.s.d $f4, $f4
+; MIPS32EB-NEXT:    cvt.s.d $f2, $f2
+; MIPS32EB-NEXT:    swc1 $f2, 12($4)
+; MIPS32EB-NEXT:    swc1 $f4, 8($4)
+; MIPS32EB-NEXT:    swc1 $f6, 4($4)
+; MIPS32EB-NEXT:    sw $2, 0($sp)
+; MIPS32EB-NEXT:    sw $6, 4($sp)
+; MIPS32EB-NEXT:    ldc1 $f2, 0($sp)
+; MIPS32EB-NEXT:    sub.d $f0, $f2, $f0
+; MIPS32EB-NEXT:    cvt.s.d $f0, $f0
+; MIPS32EB-NEXT:    swc1 $f0, 0($4)
+; MIPS32EB-NEXT:    addiu $sp, $sp, 32
+; MIPS32EB-NEXT:    jr $ra
+; MIPS32EB-NEXT:    nop
+;
+; MIPS64EB-LABEL: cast:
+; MIPS64EB:       # %bb.0: # %entry
+; MIPS64EB-NEXT:    daddiu $sp, $sp, -32
+; MIPS64EB-NEXT:    .cfi_def_cfa_offset 32
+; MIPS64EB-NEXT:    lui $1, %hi(%neg(%gp_rel(cast)))
+; MIPS64EB-NEXT:    daddu $1, $1, $25
+; MIPS64EB-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(cast)))
+; MIPS64EB-NEXT:    sll $2, $4, 0
+; MIPS64EB-NEXT:    lui $3, 17200
+; MIPS64EB-NEXT:    sw $3, 0($sp)
+; MIPS64EB-NEXT:    sw $2, 4($sp)
+; MIPS64EB-NEXT:    sll $2, $5, 0
+; MIPS64EB-NEXT:    sw $3, 8($sp)
+; MIPS64EB-NEXT:    sw $2, 12($sp)
+; MIPS64EB-NEXT:    ld $1, %got_page(.LCPI42_0)($1)
+; MIPS64EB-NEXT:    ldc1 $f0, %got_ofst(.LCPI42_0)($1)
+; MIPS64EB-NEXT:    ldc1 $f1, 0($sp)
+; MIPS64EB-NEXT:    sub.d $f1, $f1, $f0
+; MIPS64EB-NEXT:    cvt.s.d $f1, $f1
+; MIPS64EB-NEXT:    ldc1 $f2, 8($sp)
+; MIPS64EB-NEXT:    sub.d $f2, $f2, $f0
+; MIPS64EB-NEXT:    mfc1 $1, $f1
+; MIPS64EB-NEXT:    dsrl $2, $4, 32
+; MIPS64EB-NEXT:    sll $2, $2, 0
+; MIPS64EB-NEXT:    sw $3, 16($sp)
+; MIPS64EB-NEXT:    sw $2, 20($sp)
+; MIPS64EB-NEXT:    sw $3, 24($sp)
+; MIPS64EB-NEXT:    dsll $1, $1, 32
+; MIPS64EB-NEXT:    cvt.s.d $f1, $f2
+; MIPS64EB-NEXT:    dsrl $2, $5, 32
+; MIPS64EB-NEXT:    sll $2, $2, 0
+; MIPS64EB-NEXT:    sw $2, 28($sp)
+; MIPS64EB-NEXT:    mfc1 $2, $f1
+; MIPS64EB-NEXT:    dsll $3, $2, 32
+; MIPS64EB-NEXT:    dsrl $1, $1, 32
+; MIPS64EB-NEXT:    ldc1 $f1, 16($sp)
+; MIPS64EB-NEXT:    sub.d $f1, $f1, $f0
+; MIPS64EB-NEXT:    cvt.s.d $f1, $f1
+; MIPS64EB-NEXT:    mfc1 $2, $f1
+; MIPS64EB-NEXT:    dsll $2, $2, 32
+; MIPS64EB-NEXT:    or $2, $1, $2
+; MIPS64EB-NEXT:    dsrl $1, $3, 32
+; MIPS64EB-NEXT:    ldc1 $f1, 24($sp)
+; MIPS64EB-NEXT:    sub.d $f0, $f1, $f0
+; MIPS64EB-NEXT:    cvt.s.d $f0, $f0
+; MIPS64EB-NEXT:    mfc1 $3, $f0
+; MIPS64EB-NEXT:    dsll $3, $3, 32
+; MIPS64EB-NEXT:    or $3, $1, $3
+; MIPS64EB-NEXT:    daddiu $sp, $sp, 32
+; MIPS64EB-NEXT:    jr $ra
+; MIPS64EB-NEXT:    nop
+;
+; MIPS32R5-LABEL: cast:
+; MIPS32R5:       # %bb.0: # %entry
+; MIPS32R5-NEXT:    ldi.b $w0, 0
+; MIPS32R5-NEXT:    insert.w $w0[0], $6
+; MIPS32R5-NEXT:    insert.w $w0[1], $7
+; MIPS32R5-NEXT:    lw $1, 16($sp)
+; MIPS32R5-NEXT:    insert.w $w0[2], $1
+; MIPS32R5-NEXT:    lw $1, 20($sp)
+; MIPS32R5-NEXT:    insert.w $w0[3], $1
+; MIPS32R5-NEXT:    ffint_u.w $w0, $w0
+; MIPS32R5-NEXT:    st.w $w0, 0($4)
+; MIPS32R5-NEXT:    jr $ra
+; MIPS32R5-NEXT:    nop
+;
+; MIPS64R5EB-LABEL: cast:
+; MIPS64R5EB:       # %bb.0: # %entry
+; MIPS64R5EB-NEXT:    ldi.b $w0, 0
+; MIPS64R5EB-NEXT:    insert.d $w0[0], $4
+; MIPS64R5EB-NEXT:    insert.d $w0[1], $5
+; MIPS64R5EB-NEXT:    shf.w $w0, $w0, 177
+; MIPS64R5EB-NEXT:    ffint_u.w $w0, $w0
+; MIPS64R5EB-NEXT:    shf.w $w0, $w0, 177
+; MIPS64R5EB-NEXT:    copy_s.d $2, $w0[0]
+; MIPS64R5EB-NEXT:    copy_s.d $3, $w0[1]
+; MIPS64R5EB-NEXT:    jr $ra
+; MIPS64R5EB-NEXT:    nop
+;
+; MIPS32EL-LABEL: cast:
+; MIPS32EL:       # %bb.0: # %entry
+; MIPS32EL-NEXT:    addiu $sp, $sp, -32
+; MIPS32EL-NEXT:    .cfi_def_cfa_offset 32
+; MIPS32EL-NEXT:    lw $1, 52($sp)
+; MIPS32EL-NEXT:    lui $2, 17200
+; MIPS32EL-NEXT:    sw $2, 28($sp)
+; MIPS32EL-NEXT:    sw $1, 24($sp)
+; MIPS32EL-NEXT:    lw $1, 48($sp)
+; MIPS32EL-NEXT:    sw $2, 20($sp)
+; MIPS32EL-NEXT:    sw $1, 16($sp)
+; MIPS32EL-NEXT:    lui $1, %hi($CPI42_0)
+; MIPS32EL-NEXT:    sw $2, 12($sp)
+; MIPS32EL-NEXT:    sw $7, 8($sp)
+; MIPS32EL-NEXT:    ldc1 $f0, %lo($CPI42_0)($1)
+; MIPS32EL-NEXT:    ldc1 $f2, 24($sp)
+; MIPS32EL-NEXT:    sub.d $f2, $f2, $f0
+; MIPS32EL-NEXT:    ldc1 $f4, 16($sp)
+; MIPS32EL-NEXT:    sub.d $f4, $f4, $f0
+; MIPS32EL-NEXT:    ldc1 $f6, 8($sp)
+; MIPS32EL-NEXT:    sub.d $f6, $f6, $f0
+; MIPS32EL-NEXT:    cvt.s.d $f6, $f6
+; MIPS32EL-NEXT:    cvt.s.d $f4, $f4
+; MIPS32EL-NEXT:    cvt.s.d $f2, $f2
+; MIPS32EL-NEXT:    swc1 $f2, 12($4)
+; MIPS32EL-NEXT:    swc1 $f4, 8($4)
+; MIPS32EL-NEXT:    swc1 $f6, 4($4)
+; MIPS32EL-NEXT:    sw $2, 4($sp)
+; MIPS32EL-NEXT:    sw $6, 0($sp)
+; MIPS32EL-NEXT:    ldc1 $f2, 0($sp)
+; MIPS32EL-NEXT:    sub.d $f0, $f2, $f0
+; MIPS32EL-NEXT:    cvt.s.d $f0, $f0
+; MIPS32EL-NEXT:    swc1 $f0, 0($4)
+; MIPS32EL-NEXT:    addiu $sp, $sp, 32
+; MIPS32EL-NEXT:    jr $ra
+; MIPS32EL-NEXT:    nop
+;
+; MIPS64EL-LABEL: cast:
+; MIPS64EL:       # %bb.0: # %entry
+; MIPS64EL-NEXT:    daddiu $sp, $sp, -32
+; MIPS64EL-NEXT:    .cfi_def_cfa_offset 32
+; MIPS64EL-NEXT:    lui $1, %hi(%neg(%gp_rel(cast)))
+; MIPS64EL-NEXT:    daddu $1, $1, $25
+; MIPS64EL-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(cast)))
+; MIPS64EL-NEXT:    sll $2, $4, 0
+; MIPS64EL-NEXT:    lui $3, 17200
+; MIPS64EL-NEXT:    sw $3, 4($sp)
+; MIPS64EL-NEXT:    sw $2, 0($sp)
+; MIPS64EL-NEXT:    sll $2, $5, 0
+; MIPS64EL-NEXT:    sw $3, 12($sp)
+; MIPS64EL-NEXT:    sw $2, 8($sp)
+; MIPS64EL-NEXT:    ld $1, %got_page(.LCPI42_0)($1)
+; MIPS64EL-NEXT:    ldc1 $f0, %got_ofst(.LCPI42_0)($1)
+; MIPS64EL-NEXT:    ldc1 $f1, 0($sp)
+; MIPS64EL-NEXT:    sub.d $f1, $f1, $f0
+; MIPS64EL-NEXT:    cvt.s.d $f1, $f1
+; MIPS64EL-NEXT:    ldc1 $f2, 8($sp)
+; MIPS64EL-NEXT:    sub.d $f2, $f2, $f0
+; MIPS64EL-NEXT:    mfc1 $1, $f1
+; MIPS64EL-NEXT:    dsrl $2, $4, 32
+; MIPS64EL-NEXT:    sll $2, $2, 0
+; MIPS64EL-NEXT:    sw $3, 20($sp)
+; MIPS64EL-NEXT:    sw $2, 16($sp)
+; MIPS64EL-NEXT:    sw $3, 28($sp)
+; MIPS64EL-NEXT:    dsll $1, $1, 32
+; MIPS64EL-NEXT:    cvt.s.d $f1, $f2
+; MIPS64EL-NEXT:    dsrl $2, $5, 32
+; MIPS64EL-NEXT:    sll $2, $2, 0
+; MIPS64EL-NEXT:    sw $2, 24($sp)
+; MIPS64EL-NEXT:    mfc1 $2, $f1
+; MIPS64EL-NEXT:    dsll $3, $2, 32
+; MIPS64EL-NEXT:    dsrl $1, $1, 32
+; MIPS64EL-NEXT:    ldc1 $f1, 16($sp)
+; MIPS64EL-NEXT:    sub.d $f1, $f1, $f0
+; MIPS64EL-NEXT:    cvt.s.d $f1, $f1
+; MIPS64EL-NEXT:    mfc1 $2, $f1
+; MIPS64EL-NEXT:    dsll $2, $2, 32
+; MIPS64EL-NEXT:    or $2, $1, $2
+; MIPS64EL-NEXT:    dsrl $1, $3, 32
+; MIPS64EL-NEXT:    ldc1 $f1, 24($sp)
+; MIPS64EL-NEXT:    sub.d $f0, $f1, $f0
+; MIPS64EL-NEXT:    cvt.s.d $f0, $f0
+; MIPS64EL-NEXT:    mfc1 $3, $f0
+; MIPS64EL-NEXT:    dsll $3, $3, 32
+; MIPS64EL-NEXT:    or $3, $1, $3
+; MIPS64EL-NEXT:    daddiu $sp, $sp, 32
+; MIPS64EL-NEXT:    jr $ra
+; MIPS64EL-NEXT:    nop
+;
+; MIPS64R5EL-LABEL: cast:
+; MIPS64R5EL:       # %bb.0: # %entry
+; MIPS64R5EL-NEXT:    ldi.b $w0, 0
+; MIPS64R5EL-NEXT:    insert.d $w0[0], $4
+; MIPS64R5EL-NEXT:    insert.d $w0[1], $5
+; MIPS64R5EL-NEXT:    ffint_u.w $w0, $w0
+; MIPS64R5EL-NEXT:    copy_s.d $2, $w0[0]
+; MIPS64R5EL-NEXT:    copy_s.d $3, $w0[1]
+; MIPS64R5EL-NEXT:    jr $ra
+; MIPS64R5EL-NEXT:    nop
 entry:
-; ALL-LABEL: cast:
-
-; MIPS32: addiu $sp, $sp, -32
-; MIPS32-DAG: sw $6, {{[0-9]+}}($sp)
-; MIPS32-DAG: sw $7, {{[0-9]+}}($sp)
-; MIPS32-DAG: lw ${{[0-9]+}}, 48($sp)
-; MIPS32-DAG: lw ${{[0-9]+}}, 52($sp)
-
-; MIPS32R5-DAG: insert.w  $w0[0], $6
-; MIPS32R5-DAG: insert.w  $w0[1], $7
-; MIPS32R5-DAG: lw  $[[R0:[0-9]+]], 16($sp)
-; MIPS32R5-DAG: insert.w  $w0[2], $[[R0]]
-; MIPS32R5-DAG: lw  $[[R1:[0-9]+]], 20($sp)
-; MIPS32R5-DAG: insert.w  $w0[3], $[[R1]]
-
-; MIPS64-DAG: sll ${{[0-9]+}}, $4, 0
-; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 32
-; MIPS64-DAG: sll ${{[0-9]+}}, $5, 0
-; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 32
-
-; MIPS64R5-DAG: insert.d  $w0[0], $4
-; MIPS64R5-DAG: insert.d  $w0[1], $5
-
   %0 = uitofp <4 x i32> %a to <4 x float>
   ret <4 x float> %0
 }
 
 define <4 x float> @select(<4 x i32> %cond, <4 x float> %arg1, <4 x float> %arg2) {
+; MIPS32-LABEL: select:
+; MIPS32:       # %bb.0: # %entry
+; MIPS32-NEXT:    andi $1, $7, 1
+; MIPS32-NEXT:    lw $2, 16($sp)
+; MIPS32-NEXT:    andi $2, $2, 1
+; MIPS32-NEXT:    addiu $3, $sp, 44
+; MIPS32-NEXT:    addiu $5, $sp, 28
+; MIPS32-NEXT:    addiu $7, $sp, 48
+; MIPS32-NEXT:    addiu $8, $sp, 32
+; MIPS32-NEXT:    movn $7, $8, $2
+; MIPS32-NEXT:    movn $3, $5, $1
+; MIPS32-NEXT:    andi $1, $6, 1
+; MIPS32-NEXT:    addiu $2, $sp, 40
+; MIPS32-NEXT:    addiu $5, $sp, 24
+; MIPS32-NEXT:    movn $2, $5, $1
+; MIPS32-NEXT:    lw $1, 20($sp)
+; MIPS32-NEXT:    lwc1 $f0, 0($2)
+; MIPS32-NEXT:    lwc1 $f1, 0($3)
+; MIPS32-NEXT:    lwc1 $f2, 0($7)
+; MIPS32-NEXT:    andi $1, $1, 1
+; MIPS32-NEXT:    addiu $2, $sp, 52
+; MIPS32-NEXT:    addiu $3, $sp, 36
+; MIPS32-NEXT:    movn $2, $3, $1
+; MIPS32-NEXT:    lwc1 $f3, 0($2)
+; MIPS32-NEXT:    swc1 $f3, 12($4)
+; MIPS32-NEXT:    swc1 $f2, 8($4)
+; MIPS32-NEXT:    swc1 $f1, 4($4)
+; MIPS32-NEXT:    swc1 $f0, 0($4)
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
+;
+; MIPS64-LABEL: select:
+; MIPS64:       # %bb.0: # %entry
+; MIPS64-NEXT:    sll $1, $8, 0
+; MIPS64-NEXT:    mtc1 $1, $f0
+; MIPS64-NEXT:    sll $1, $4, 0
+; MIPS64-NEXT:    andi $1, $1, 1
+; MIPS64-NEXT:    sll $2, $6, 0
+; MIPS64-NEXT:    mtc1 $2, $f1
+; MIPS64-NEXT:    movn.s $f0, $f1, $1
+; MIPS64-NEXT:    dsrl $1, $8, 32
+; MIPS64-NEXT:    dsrl $2, $4, 32
+; MIPS64-NEXT:    sll $1, $1, 0
+; MIPS64-NEXT:    mfc1 $3, $f0
+; MIPS64-NEXT:    sll $4, $9, 0
+; MIPS64-NEXT:    mtc1 $1, $f0
+; MIPS64-NEXT:    sll $1, $2, 0
+; MIPS64-NEXT:    andi $1, $1, 1
+; MIPS64-NEXT:    dsrl $2, $6, 32
+; MIPS64-NEXT:    sll $2, $2, 0
+; MIPS64-NEXT:    mtc1 $2, $f1
+; MIPS64-NEXT:    movn.s $f0, $f1, $1
+; MIPS64-NEXT:    dsll $1, $3, 32
+; MIPS64-NEXT:    mtc1 $4, $f1
+; MIPS64-NEXT:    sll $2, $5, 0
+; MIPS64-NEXT:    andi $2, $2, 1
+; MIPS64-NEXT:    sll $3, $7, 0
+; MIPS64-NEXT:    mtc1 $3, $f2
+; MIPS64-NEXT:    movn.s $f1, $f2, $2
+; MIPS64-NEXT:    mfc1 $2, $f1
+; MIPS64-NEXT:    dsll $3, $2, 32
+; MIPS64-NEXT:    dsrl $1, $1, 32
+; MIPS64-NEXT:    mfc1 $2, $f0
+; MIPS64-NEXT:    dsrl $4, $5, 32
+; MIPS64-NEXT:    dsrl $5, $9, 32
+; MIPS64-NEXT:    dsll $2, $2, 32
+; MIPS64-NEXT:    sll $5, $5, 0
+; MIPS64-NEXT:    or $2, $1, $2
+; MIPS64-NEXT:    dsrl $1, $3, 32
+; MIPS64-NEXT:    mtc1 $5, $f0
+; MIPS64-NEXT:    sll $3, $4, 0
+; MIPS64-NEXT:    andi $3, $3, 1
+; MIPS64-NEXT:    dsrl $4, $7, 32
+; MIPS64-NEXT:    sll $4, $4, 0
+; MIPS64-NEXT:    mtc1 $4, $f1
+; MIPS64-NEXT:    movn.s $f0, $f1, $3
+; MIPS64-NEXT:    mfc1 $3, $f0
+; MIPS64-NEXT:    dsll $3, $3, 32
+; MIPS64-NEXT:    or $3, $1, $3
+; MIPS64-NEXT:    jr $ra
+; MIPS64-NEXT:    nop
+;
+; MIPS32R5-LABEL: select:
+; MIPS32R5:       # %bb.0: # %entry
+; MIPS32R5-NEXT:    ldi.b $w0, 0
+; MIPS32R5-NEXT:    lw $1, 44($sp)
+; MIPS32R5-NEXT:    lw $2, 40($sp)
+; MIPS32R5-NEXT:    move.v $w1, $w0
+; MIPS32R5-NEXT:    insert.w $w1[0], $2
+; MIPS32R5-NEXT:    insert.w $w1[1], $1
+; MIPS32R5-NEXT:    lw $1, 48($sp)
+; MIPS32R5-NEXT:    insert.w $w1[2], $1
+; MIPS32R5-NEXT:    lw $1, 28($sp)
+; MIPS32R5-NEXT:    lw $2, 52($sp)
+; MIPS32R5-NEXT:    lw $3, 24($sp)
+; MIPS32R5-NEXT:    move.v $w2, $w0
+; MIPS32R5-NEXT:    insert.w $w2[0], $3
+; MIPS32R5-NEXT:    insert.w $w0[0], $6
+; MIPS32R5-NEXT:    insert.w $w1[3], $2
+; MIPS32R5-NEXT:    insert.w $w2[1], $1
+; MIPS32R5-NEXT:    lw $1, 32($sp)
+; MIPS32R5-NEXT:    insert.w $w2[2], $1
+; MIPS32R5-NEXT:    lw $1, 36($sp)
+; MIPS32R5-NEXT:    insert.w $w2[3], $1
+; MIPS32R5-NEXT:    insert.w $w0[1], $7
+; MIPS32R5-NEXT:    lw $1, 16($sp)
+; MIPS32R5-NEXT:    insert.w $w0[2], $1
+; MIPS32R5-NEXT:    lw $1, 20($sp)
+; MIPS32R5-NEXT:    insert.w $w0[3], $1
+; MIPS32R5-NEXT:    slli.w $w0, $w0, 31
+; MIPS32R5-NEXT:    srai.w $w0, $w0, 31
+; MIPS32R5-NEXT:    bsel.v $w0, $w1, $w2
+; MIPS32R5-NEXT:    st.w $w0, 0($4)
+; MIPS32R5-NEXT:    jr $ra
+; MIPS32R5-NEXT:    nop
+;
+; MIPS64R5EB-LABEL: select:
+; MIPS64R5EB:       # %bb.0: # %entry
+; MIPS64R5EB-NEXT:    ldi.b $w0, 0
+; MIPS64R5EB-NEXT:    move.v $w1, $w0
+; MIPS64R5EB-NEXT:    insert.d $w1[0], $8
+; MIPS64R5EB-NEXT:    insert.d $w1[1], $9
+; MIPS64R5EB-NEXT:    shf.w $w1, $w1, 177
+; MIPS64R5EB-NEXT:    move.v $w2, $w0
+; MIPS64R5EB-NEXT:    insert.d $w2[0], $6
+; MIPS64R5EB-NEXT:    insert.d $w2[1], $7
+; MIPS64R5EB-NEXT:    shf.w $w2, $w2, 177
+; MIPS64R5EB-NEXT:    insert.d $w0[0], $4
+; MIPS64R5EB-NEXT:    insert.d $w0[1], $5
+; MIPS64R5EB-NEXT:    shf.w $w0, $w0, 177
+; MIPS64R5EB-NEXT:    slli.w $w0, $w0, 31
+; MIPS64R5EB-NEXT:    srai.w $w0, $w0, 31
+; MIPS64R5EB-NEXT:    bsel.v $w0, $w1, $w2
+; MIPS64R5EB-NEXT:    shf.w $w0, $w0, 177
+; MIPS64R5EB-NEXT:    copy_s.d $2, $w0[0]
+; MIPS64R5EB-NEXT:    copy_s.d $3, $w0[1]
+; MIPS64R5EB-NEXT:    jr $ra
+; MIPS64R5EB-NEXT:    nop
+;
+; MIPS64R5EL-LABEL: select:
+; MIPS64R5EL:       # %bb.0: # %entry
+; MIPS64R5EL-NEXT:    ldi.b $w0, 0
+; MIPS64R5EL-NEXT:    move.v $w1, $w0
+; MIPS64R5EL-NEXT:    insert.d $w1[0], $8
+; MIPS64R5EL-NEXT:    insert.d $w1[1], $9
+; MIPS64R5EL-NEXT:    move.v $w2, $w0
+; MIPS64R5EL-NEXT:    insert.d $w2[0], $6
+; MIPS64R5EL-NEXT:    insert.d $w2[1], $7
+; MIPS64R5EL-NEXT:    insert.d $w0[0], $4
+; MIPS64R5EL-NEXT:    insert.d $w0[1], $5
+; MIPS64R5EL-NEXT:    slli.w $w0, $w0, 31
+; MIPS64R5EL-NEXT:    srai.w $w0, $w0, 31
+; MIPS64R5EL-NEXT:    bsel.v $w0, $w1, $w2
+; MIPS64R5EL-NEXT:    copy_s.d $2, $w0[0]
+; MIPS64R5EL-NEXT:    copy_s.d $3, $w0[1]
+; MIPS64R5EL-NEXT:    jr $ra
+; MIPS64R5EL-NEXT:    nop
 entry:
-; ALL-LABEL: select:
-
-; MIPS32-DAG: andi ${{[0-9]+}}, $7, 1
-; MIPS32-DAG: andi ${{[0-9]+}}, $6, 1
-; MIPS32-DAG: lw $[[R0:[0-9]+]], 16($sp)
-; MIPS32-DAG: andi ${{[0-9]+}}, $[[R0]], 1
-; MIPS32-DAG: lw $[[R1:[0-9]+]], 20($sp)
-; MIPS32-DAG: andi ${{[0-9]+}}, $[[R0]], 1
-
-; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $6
-; MIPS32R5-DAG: insert.w $w[[W0]][1], $7
-; MIPS32R5-DAG: lw $[[R0:[0-9]+]], 16($sp)
-; MIPS32R5-DAG: lw $[[R1:[0-9]+]], 20($sp)
-; MIPS32R5-DAG: insert.w $w[[W0]][2], $[[R0]]
-; MIPS32R5-DAG: insert.w $w[[W0]][3], $[[R1]]
-; MIPS32R5-DAG: slli.w $w{{[0-9]}}, $w[[W0]]
-
-; MIPS64-DAG: sll $[[R0:[0-9]+]], $6, 0
-; MIPS64-DAG: mtc1 $[[R0]], $f{{[0-9]+}}
-; MIPS64-DAG: dsrl $[[R1:[0-9]+]], $6, 32
-; MIPS64-DAG: sll $[[R2:[0-9]+]], $[[R1]], 0
-; MIPS64-DAG: mtc1 $[[R2]], $f{{[0-9]+}}
-
-; MIPS64-DAG: sll $[[R3:[0-9]+]], $7, 0
-; MIPS64-DAG: mtc1 $[[R3]], $f{{[0-9]+}}
-; MIPS64-DAG: dsrl $[[R4:[0-9]+]], $7, 32
-; MIPS64-DAG: sll $[[R5:[0-9]+]], $[[R4]], 0
-; MIPS64-DAG: mtc1 $[[R5]], $f{{[0-9]+}}
-
-; MIPS64-DAG: sll $[[R6:[0-9]+]], $8, 0
-; MIPS64-DAG: mtc1 $[[R6]], $f{{[0-9]+}}
-; MIPS64-DAG: dsrl $[[R7:[0-9]+]], $8, 32
-; MIPS64-DAG: sll $[[R8:[0-9]+]], $[[R7]], 0
-; MIPS64-DAG: mtc1 $[[R8]], $f{{[0-9]+}}
-
-; MIPS64-DAG: sll $[[R9:[0-9]+]], $9, 0
-; MIPS64-DAG: mtc1 $[[R9]], $f{{[0-9]+}}
-; MIPS64-DAG: dsrl $[[R10:[0-9]+]], $9, 32
-; MIPS64-DAG: sll $[[R11:[0-9]+]], $[[R10]], 0
-; MIPS64-DAG: mtc1 $[[R11]], $f{{[0-9]+}}
-
-; MIPS64-DAG: sll $[[R12:[0-9]+]], $4, 0
-; MIPS64-DAG: andi ${{[0-9]+}}, $[[R12]], 1
-; MIPS64-DAG: dsrl $[[R13:[0-9]+]], $4, 32
-; MIPS64-DAG: sll $[[R14:[0-9]+]], $[[R13]], 0
-; MIPS64-DAG: andi ${{[0-9]+}}, $[[R14]], 1
-
-; MIPS64-DAG: sll $[[R15:[0-9]+]], $5, 0
-; MIPS64-DAG: andi ${{[0-9]+}}, $[[R15]], 1
-; MIPS64-DAG: dsrl $[[R16:[0-9]+]], $5, 32
-; MIPS64-DAG: sll $[[R17:[0-9]+]], $[[R16]], 0
-; MIPS64-DAG: andi ${{[0-9]+}}, $[[R17]], 1
-
-; MIPS64R5-DAG: insert.d $w{{[0-9]+}}[0], $8
-; MIPS64R5-DAG: insert.d $w{{[0-9]+}}[1], $9
-; MIPS64R5-DAG: insert.d $w{{[0-9]+}}[0], $6
-; MIPS64R5-DAG: insert.d $w{{[0-9]+}}[1], $7
-; MIPS64R5-DAG: insert.d $w{{[0-9]+}}[0], $4
-; MIPS64R5-DAG: insert.d $w{{[0-9]+}}[1], $5
-
   %cond.t = trunc <4 x i32> %cond to <4 x i1>
   %res = select <4 x i1> %cond.t, <4 x float> %arg1, <4 x float> %arg2
   ret <4 x float> %res




More information about the llvm-commits mailing list