[llvm] e7accb7 - [MIPS][MSA] Regenerate basic operations test checks

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 20 05:39:36 PDT 2021


Author: Simon Pilgrim
Date: 2021-07-20T13:37:44+01:00
New Revision: e7accb75bee33d1d1e216e91580a3ca318019bed

URL: https://github.com/llvm/llvm-project/commit/e7accb75bee33d1d1e216e91580a3ca318019bed
DIFF: https://github.com/llvm/llvm-project/commit/e7accb75bee33d1d1e216e91580a3ca318019bed.diff

LOG: [MIPS][MSA] Regenerate basic operations test checks

Cleanup the check prefixes to make refresh a lot easier

Added: 
    

Modified: 
    llvm/test/CodeGen/Mips/msa/basic_operations.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/Mips/msa/basic_operations.ll b/llvm/test/CodeGen/Mips/msa/basic_operations.ll
index c1bbb98a491ad..9ddb91f0770d9 100644
--- a/llvm/test/CodeGen/Mips/msa/basic_operations.ll
+++ b/llvm/test/CodeGen/Mips/msa/basic_operations.ll
@@ -1,21 +1,22 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -march=mips -mcpu=mips32r5 -mattr=+msa,+fp64 -relocation-model=pic \
 ; RUN:   -verify-machineinstrs < %s \
-; RUN:   | FileCheck -check-prefixes=ALL,O32,MIPS32,ALL-BE,O32-BE %s
+; RUN:   | FileCheck -check-prefixes=O32,O32-BE %s
 ; RUN: llc -march=mipsel -mcpu=mips32r5 -mattr=+msa,+fp64 -relocation-model=pic \
 ; RUN:   -verify-machineinstrs < %s \
-; RUN:   | FileCheck -check-prefixes=ALL,O32,MIPS32,ALL-LE,O32-LE %s
+; RUN:   | FileCheck -check-prefixes=O32,O32-LE %s
 ; RUN: llc -march=mips64 -mcpu=mips64r5 -target-abi n32 -mattr=+msa,+fp64 \
 ; RUN:   -relocation-model=pic -verify-machineinstrs < %s \
-; RUN:   | FileCheck -check-prefixes=ALL,N32,MIPS64,ALL-BE %s
+; RUN:   | FileCheck -check-prefixes=N32,N32-BE %s
 ; RUN: llc -march=mips64el -mcpu=mips64r5 -target-abi n32 -mattr=+msa,+fp64 \
 ; RUN:   -relocation-model=pic -verify-machineinstrs < %s \
-; RUN:   | FileCheck -check-prefixes=ALL,N32,MIPS64,ALL-LE %s
+; RUN:   | FileCheck -check-prefixes=N32,N32-LE %s
 ; RUN: llc -march=mips64 -mcpu=mips64r5 -mattr=+msa,+fp64 -relocation-model=pic \
 ; RUN:   -verify-machineinstrs < %s \
-; RUN:   | FileCheck -check-prefixes=ALL,N64,MIPS64,ALL-BE %s
+; RUN:   | FileCheck -check-prefixes=N64,N64-BE %s
 ; RUN: llc -march=mips64el -mcpu=mips64r5 -mattr=+msa,+fp64 -relocation-model=pic \
 ; RUN:   -verify-machineinstrs < %s \
-; RUN:   | FileCheck -check-prefixes=ALL,N64,MIPS64,ALL-LE %s
+; RUN:   | FileCheck -check-prefixes=N64,N64-LE %s
 
 @v4i8 = global <4 x i8> <i8 0, i8 0, i8 0, i8 0>
 @v16i8 = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
@@ -26,185 +27,727 @@
 @i64 = global i64 0
 
 define void @const_v16i8() nounwind {
-  ; ALL-LABEL: const_v16i8:
-
+; O32-BE-LABEL: const_v16i8:
+; O32-BE:       # %bb.0:
+; O32-BE-NEXT:    lui $2, %hi(_gp_disp)
+; O32-BE-NEXT:    addiu $2, $2, %lo(_gp_disp)
+; O32-BE-NEXT:    addu $1, $2, $25
+; O32-BE-NEXT:    lw $2, %got(v16i8)($1)
+; O32-BE-NEXT:    ldi.b $w0, 0
+; O32-BE-NEXT:    st.b $w0, 0($2)
+; O32-BE-NEXT:    ldi.b $w0, 1
+; O32-BE-NEXT:    st.b $w0, 0($2)
+; O32-BE-NEXT:    lw $3, %got($CPI0_0)($1)
+; O32-BE-NEXT:    addiu $3, $3, %lo($CPI0_0)
+; O32-BE-NEXT:    ld.b $w0, 0($3)
+; O32-BE-NEXT:    st.b $w0, 0($2)
+; O32-BE-NEXT:    lw $1, %got($CPI0_1)($1)
+; O32-BE-NEXT:    addiu $1, $1, %lo($CPI0_1)
+; O32-BE-NEXT:    ld.b $w0, 0($1)
+; O32-BE-NEXT:    st.b $w0, 0($2)
+; O32-BE-NEXT:    ldi.h $w0, 256
+; O32-BE-NEXT:    st.b $w0, 0($2)
+; O32-BE-NEXT:    lui $1, 258
+; O32-BE-NEXT:    ori $1, $1, 772
+; O32-BE-NEXT:    fill.w $w0, $1
+; O32-BE-NEXT:    st.b $w0, 0($2)
+; O32-BE-NEXT:    lui $3, 1286
+; O32-BE-NEXT:    ori $3, $3, 1800
+; O32-BE-NEXT:    fill.w $w0, $3
+; O32-BE-NEXT:    insert.w $w0[1], $1
+; O32-BE-NEXT:    splati.d $w0, $w0[0]
+; O32-BE-NEXT:    jr $ra
+; O32-BE-NEXT:    st.b $w0, 0($2)
+;
+; O32-LE-LABEL: const_v16i8:
+; O32-LE:       # %bb.0:
+; O32-LE-NEXT:    lui $2, %hi(_gp_disp)
+; O32-LE-NEXT:    addiu $2, $2, %lo(_gp_disp)
+; O32-LE-NEXT:    addu $1, $2, $25
+; O32-LE-NEXT:    lw $2, %got(v16i8)($1)
+; O32-LE-NEXT:    ldi.b $w0, 0
+; O32-LE-NEXT:    st.b $w0, 0($2)
+; O32-LE-NEXT:    ldi.b $w0, 1
+; O32-LE-NEXT:    st.b $w0, 0($2)
+; O32-LE-NEXT:    lw $3, %got($CPI0_0)($1)
+; O32-LE-NEXT:    addiu $3, $3, %lo($CPI0_0)
+; O32-LE-NEXT:    ld.b $w0, 0($3)
+; O32-LE-NEXT:    st.b $w0, 0($2)
+; O32-LE-NEXT:    lw $1, %got($CPI0_1)($1)
+; O32-LE-NEXT:    addiu $1, $1, %lo($CPI0_1)
+; O32-LE-NEXT:    ld.b $w0, 0($1)
+; O32-LE-NEXT:    st.b $w0, 0($2)
+; O32-LE-NEXT:    ldi.h $w0, 1
+; O32-LE-NEXT:    st.b $w0, 0($2)
+; O32-LE-NEXT:    lui $1, 1027
+; O32-LE-NEXT:    ori $1, $1, 513
+; O32-LE-NEXT:    fill.w $w0, $1
+; O32-LE-NEXT:    st.b $w0, 0($2)
+; O32-LE-NEXT:    lui $1, 2055
+; O32-LE-NEXT:    ori $1, $1, 1541
+; O32-LE-NEXT:    insert.w $w0[1], $1
+; O32-LE-NEXT:    splati.d $w0, $w0[0]
+; O32-LE-NEXT:    jr $ra
+; O32-LE-NEXT:    st.b $w0, 0($2)
+;
+; N32-BE-LABEL: const_v16i8:
+; N32-BE:       # %bb.0:
+; N32-BE-NEXT:    lui $1, %hi(%neg(%gp_rel(const_v16i8)))
+; N32-BE-NEXT:    addu $1, $1, $25
+; N32-BE-NEXT:    addiu $1, $1, %lo(%neg(%gp_rel(const_v16i8)))
+; N32-BE-NEXT:    lw $2, %got_disp(v16i8)($1)
+; N32-BE-NEXT:    ldi.b $w0, 0
+; N32-BE-NEXT:    st.b $w0, 0($2)
+; N32-BE-NEXT:    ldi.b $w0, 1
+; N32-BE-NEXT:    st.b $w0, 0($2)
+; N32-BE-NEXT:    lw $3, %got_page(.LCPI0_0)($1)
+; N32-BE-NEXT:    addiu $3, $3, %got_ofst(.LCPI0_0)
+; N32-BE-NEXT:    ld.b $w0, 0($3)
+; N32-BE-NEXT:    st.b $w0, 0($2)
+; N32-BE-NEXT:    lw $1, %got_page(.LCPI0_1)($1)
+; N32-BE-NEXT:    addiu $1, $1, %got_ofst(.LCPI0_1)
+; N32-BE-NEXT:    ld.b $w0, 0($1)
+; N32-BE-NEXT:    st.b $w0, 0($2)
+; N32-BE-NEXT:    ldi.h $w0, 256
+; N32-BE-NEXT:    st.b $w0, 0($2)
+; N32-BE-NEXT:    lui $1, 258
+; N32-BE-NEXT:    ori $1, $1, 772
+; N32-BE-NEXT:    fill.w $w0, $1
+; N32-BE-NEXT:    st.b $w0, 0($2)
+; N32-BE-NEXT:    lui $3, 1286
+; N32-BE-NEXT:    ori $3, $3, 1800
+; N32-BE-NEXT:    dinsu $3, $1, 32, 32
+; N32-BE-NEXT:    fill.d $w0, $3
+; N32-BE-NEXT:    jr $ra
+; N32-BE-NEXT:    st.b $w0, 0($2)
+;
+; N32-LE-LABEL: const_v16i8:
+; N32-LE:       # %bb.0:
+; N32-LE-NEXT:    lui $1, %hi(%neg(%gp_rel(const_v16i8)))
+; N32-LE-NEXT:    addu $1, $1, $25
+; N32-LE-NEXT:    addiu $1, $1, %lo(%neg(%gp_rel(const_v16i8)))
+; N32-LE-NEXT:    lw $2, %got_disp(v16i8)($1)
+; N32-LE-NEXT:    ldi.b $w0, 0
+; N32-LE-NEXT:    st.b $w0, 0($2)
+; N32-LE-NEXT:    ldi.b $w0, 1
+; N32-LE-NEXT:    st.b $w0, 0($2)
+; N32-LE-NEXT:    lw $3, %got_page(.LCPI0_0)($1)
+; N32-LE-NEXT:    addiu $3, $3, %got_ofst(.LCPI0_0)
+; N32-LE-NEXT:    ld.b $w0, 0($3)
+; N32-LE-NEXT:    st.b $w0, 0($2)
+; N32-LE-NEXT:    lw $1, %got_page(.LCPI0_1)($1)
+; N32-LE-NEXT:    addiu $1, $1, %got_ofst(.LCPI0_1)
+; N32-LE-NEXT:    ld.b $w0, 0($1)
+; N32-LE-NEXT:    st.b $w0, 0($2)
+; N32-LE-NEXT:    ldi.h $w0, 1
+; N32-LE-NEXT:    st.b $w0, 0($2)
+; N32-LE-NEXT:    lui $1, 1027
+; N32-LE-NEXT:    ori $1, $1, 513
+; N32-LE-NEXT:    fill.w $w0, $1
+; N32-LE-NEXT:    st.b $w0, 0($2)
+; N32-LE-NEXT:    lui $3, 2055
+; N32-LE-NEXT:    ori $3, $3, 1541
+; N32-LE-NEXT:    dinsu $1, $3, 32, 32
+; N32-LE-NEXT:    fill.d $w0, $1
+; N32-LE-NEXT:    jr $ra
+; N32-LE-NEXT:    st.b $w0, 0($2)
+;
+; N64-BE-LABEL: const_v16i8:
+; N64-BE:       # %bb.0:
+; N64-BE-NEXT:    lui $1, %hi(%neg(%gp_rel(const_v16i8)))
+; N64-BE-NEXT:    daddu $1, $1, $25
+; N64-BE-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(const_v16i8)))
+; N64-BE-NEXT:    ld $2, %got_disp(v16i8)($1)
+; N64-BE-NEXT:    ldi.b $w0, 0
+; N64-BE-NEXT:    st.b $w0, 0($2)
+; N64-BE-NEXT:    ldi.b $w0, 1
+; N64-BE-NEXT:    st.b $w0, 0($2)
+; N64-BE-NEXT:    ld $3, %got_page(.LCPI0_0)($1)
+; N64-BE-NEXT:    daddiu $3, $3, %got_ofst(.LCPI0_0)
+; N64-BE-NEXT:    ld.b $w0, 0($3)
+; N64-BE-NEXT:    st.b $w0, 0($2)
+; N64-BE-NEXT:    ld $1, %got_page(.LCPI0_1)($1)
+; N64-BE-NEXT:    daddiu $1, $1, %got_ofst(.LCPI0_1)
+; N64-BE-NEXT:    ld.b $w0, 0($1)
+; N64-BE-NEXT:    st.b $w0, 0($2)
+; N64-BE-NEXT:    ldi.h $w0, 256
+; N64-BE-NEXT:    st.b $w0, 0($2)
+; N64-BE-NEXT:    lui $1, 258
+; N64-BE-NEXT:    ori $1, $1, 772
+; N64-BE-NEXT:    fill.w $w0, $1
+; N64-BE-NEXT:    st.b $w0, 0($2)
+; N64-BE-NEXT:    lui $3, 1286
+; N64-BE-NEXT:    ori $3, $3, 1800
+; N64-BE-NEXT:    dinsu $3, $1, 32, 32
+; N64-BE-NEXT:    fill.d $w0, $3
+; N64-BE-NEXT:    jr $ra
+; N64-BE-NEXT:    st.b $w0, 0($2)
+;
+; N64-LE-LABEL: const_v16i8:
+; N64-LE:       # %bb.0:
+; N64-LE-NEXT:    lui $1, %hi(%neg(%gp_rel(const_v16i8)))
+; N64-LE-NEXT:    daddu $1, $1, $25
+; N64-LE-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(const_v16i8)))
+; N64-LE-NEXT:    ld $2, %got_disp(v16i8)($1)
+; N64-LE-NEXT:    ldi.b $w0, 0
+; N64-LE-NEXT:    st.b $w0, 0($2)
+; N64-LE-NEXT:    ldi.b $w0, 1
+; N64-LE-NEXT:    st.b $w0, 0($2)
+; N64-LE-NEXT:    ld $3, %got_page(.LCPI0_0)($1)
+; N64-LE-NEXT:    daddiu $3, $3, %got_ofst(.LCPI0_0)
+; N64-LE-NEXT:    ld.b $w0, 0($3)
+; N64-LE-NEXT:    st.b $w0, 0($2)
+; N64-LE-NEXT:    ld $1, %got_page(.LCPI0_1)($1)
+; N64-LE-NEXT:    daddiu $1, $1, %got_ofst(.LCPI0_1)
+; N64-LE-NEXT:    ld.b $w0, 0($1)
+; N64-LE-NEXT:    st.b $w0, 0($2)
+; N64-LE-NEXT:    ldi.h $w0, 1
+; N64-LE-NEXT:    st.b $w0, 0($2)
+; N64-LE-NEXT:    lui $1, 1027
+; N64-LE-NEXT:    ori $1, $1, 513
+; N64-LE-NEXT:    fill.w $w0, $1
+; N64-LE-NEXT:    st.b $w0, 0($2)
+; N64-LE-NEXT:    lui $3, 2055
+; N64-LE-NEXT:    ori $3, $3, 1541
+; N64-LE-NEXT:    dinsu $1, $3, 32, 32
+; N64-LE-NEXT:    fill.d $w0, $1
+; N64-LE-NEXT:    jr $ra
+; N64-LE-NEXT:    st.b $w0, 0($2)
   store volatile <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, <16 x i8>*@v16i8
-  ; ALL: ldi.b [[R1:\$w[0-9]+]], 0
-
   store volatile <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, <16 x i8>*@v16i8
-  ; ALL: ldi.b [[R1:\$w[0-9]+]], 1
-
   store volatile <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 31>, <16 x i8>*@v16i8
-  ; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
-  ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L
-  ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L
-  ; ALL: ld.b  [[R1:\$w[0-9]+]], 0([[G_PTR]])
-
   store volatile <16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6>, <16 x i8>*@v16i8
-  ; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
-  ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L
-  ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L
-  ; ALL: ld.b  [[R1:\$w[0-9]+]], 0([[G_PTR]])
-
   store volatile <16 x i8> <i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0>, <16 x i8>*@v16i8
-  ; ALL-BE: ldi.h [[R1:\$w[0-9]+]], 256
-  ; ALL-LE: ldi.h [[R1:\$w[0-9]+]], 1
-
   store volatile <16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 1, i8 2, i8 3, i8 4, i8 1, i8 2, i8 3, i8 4, i8 1, i8 2, i8 3, i8 4>, <16 x i8>*@v16i8
-  ; ALL-BE-DAG: lui [[R2:\$[0-9]+]], 258
-  ; ALL-LE-DAG: lui [[R2:\$[0-9]+]], 1027
-  ; ALL-BE-DAG: ori [[R2]], [[R2]], 772
-  ; ALL-LE-DAG: ori [[R2]], [[R2]], 513
-  ; ALL-DAG: fill.w [[R1:\$w[0-9]+]], [[R2]]
-
   store volatile <16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, <16 x i8>*@v16i8
-  ; ALL-BE-DAG: lui [[R3:\$[0-9]+]], 1286
-  ; ALL-LE-DAG: lui [[R3:\$[0-9]+]], 2055
-  ; ALL-BE-DAG: ori [[R4:\$[0-9]+]], [[R3]], 1800
-  ; ALL-LE-DAG: ori [[R4:\$[0-9]+]], [[R3]], 1541
-  ; O32-BE: fill.w  [[R1:\$w[0-9]+]], [[R4]]
-
-  ; O32: insert.w [[R1]][1], [[R2]]
-  ; O32: splati.d $w{{.*}}, [[R1]][0]
-
-  ; MIPS64-BE: dinsu [[R4]], [[R2]], 32, 32
-  ; MIPS64-LE: dinsu [[R2]], [[R4]], 32, 32
-  ; MIPS64-BE: fill.d $w{{.*}}, [[R4]]
-  ; MIPS64-LE: fill.d $w{{.*}}, [[R2]]
-
   ret void
 }
 
 define void @const_v8i16() nounwind {
-  ; ALL-LABEL: const_v8i16:
-
+; O32-BE-LABEL: const_v8i16:
+; O32-BE:       # %bb.0:
+; O32-BE-NEXT:    lui $2, %hi(_gp_disp)
+; O32-BE-NEXT:    addiu $2, $2, %lo(_gp_disp)
+; O32-BE-NEXT:    addu $1, $2, $25
+; O32-BE-NEXT:    ldi.b $w0, 0
+; O32-BE-NEXT:    lw $2, %got(v8i16)($1)
+; O32-BE-NEXT:    st.h $w0, 0($2)
+; O32-BE-NEXT:    ldi.h $w0, 1
+; O32-BE-NEXT:    st.h $w0, 0($2)
+; O32-BE-NEXT:    lw $1, %got($CPI1_0)($1)
+; O32-BE-NEXT:    addiu $1, $1, %lo($CPI1_0)
+; O32-BE-NEXT:    ld.h $w0, 0($1)
+; O32-BE-NEXT:    st.h $w0, 0($2)
+; O32-BE-NEXT:    ldi.b $w0, 4
+; O32-BE-NEXT:    st.h $w0, 0($2)
+; O32-BE-NEXT:    lui $1, 1
+; O32-BE-NEXT:    ori $1, $1, 2
+; O32-BE-NEXT:    fill.w $w0, $1
+; O32-BE-NEXT:    st.h $w0, 0($2)
+; O32-BE-NEXT:    lui $3, 3
+; O32-BE-NEXT:    ori $3, $3, 4
+; O32-BE-NEXT:    fill.w $w0, $3
+; O32-BE-NEXT:    insert.w $w0[1], $1
+; O32-BE-NEXT:    splati.d $w0, $w0[0]
+; O32-BE-NEXT:    jr $ra
+; O32-BE-NEXT:    st.h $w0, 0($2)
+;
+; O32-LE-LABEL: const_v8i16:
+; O32-LE:       # %bb.0:
+; O32-LE-NEXT:    lui $2, %hi(_gp_disp)
+; O32-LE-NEXT:    addiu $2, $2, %lo(_gp_disp)
+; O32-LE-NEXT:    addu $1, $2, $25
+; O32-LE-NEXT:    ldi.b $w0, 0
+; O32-LE-NEXT:    lw $2, %got(v8i16)($1)
+; O32-LE-NEXT:    st.h $w0, 0($2)
+; O32-LE-NEXT:    ldi.h $w0, 1
+; O32-LE-NEXT:    st.h $w0, 0($2)
+; O32-LE-NEXT:    lw $1, %got($CPI1_0)($1)
+; O32-LE-NEXT:    addiu $1, $1, %lo($CPI1_0)
+; O32-LE-NEXT:    ld.h $w0, 0($1)
+; O32-LE-NEXT:    st.h $w0, 0($2)
+; O32-LE-NEXT:    ldi.b $w0, 4
+; O32-LE-NEXT:    st.h $w0, 0($2)
+; O32-LE-NEXT:    lui $1, 2
+; O32-LE-NEXT:    ori $1, $1, 1
+; O32-LE-NEXT:    fill.w $w0, $1
+; O32-LE-NEXT:    st.h $w0, 0($2)
+; O32-LE-NEXT:    lui $1, 4
+; O32-LE-NEXT:    ori $1, $1, 3
+; O32-LE-NEXT:    insert.w $w0[1], $1
+; O32-LE-NEXT:    splati.d $w0, $w0[0]
+; O32-LE-NEXT:    jr $ra
+; O32-LE-NEXT:    st.h $w0, 0($2)
+;
+; N32-BE-LABEL: const_v8i16:
+; N32-BE:       # %bb.0:
+; N32-BE-NEXT:    lui $1, %hi(%neg(%gp_rel(const_v8i16)))
+; N32-BE-NEXT:    addu $1, $1, $25
+; N32-BE-NEXT:    addiu $1, $1, %lo(%neg(%gp_rel(const_v8i16)))
+; N32-BE-NEXT:    ldi.b $w0, 0
+; N32-BE-NEXT:    lw $2, %got_disp(v8i16)($1)
+; N32-BE-NEXT:    st.h $w0, 0($2)
+; N32-BE-NEXT:    ldi.h $w0, 1
+; N32-BE-NEXT:    st.h $w0, 0($2)
+; N32-BE-NEXT:    lw $1, %got_page(.LCPI1_0)($1)
+; N32-BE-NEXT:    addiu $1, $1, %got_ofst(.LCPI1_0)
+; N32-BE-NEXT:    ld.h $w0, 0($1)
+; N32-BE-NEXT:    st.h $w0, 0($2)
+; N32-BE-NEXT:    ldi.b $w0, 4
+; N32-BE-NEXT:    st.h $w0, 0($2)
+; N32-BE-NEXT:    lui $1, 1
+; N32-BE-NEXT:    ori $1, $1, 2
+; N32-BE-NEXT:    fill.w $w0, $1
+; N32-BE-NEXT:    st.h $w0, 0($2)
+; N32-BE-NEXT:    lui $3, 3
+; N32-BE-NEXT:    ori $3, $3, 4
+; N32-BE-NEXT:    dinsu $3, $1, 32, 32
+; N32-BE-NEXT:    fill.d $w0, $3
+; N32-BE-NEXT:    jr $ra
+; N32-BE-NEXT:    st.h $w0, 0($2)
+;
+; N32-LE-LABEL: const_v8i16:
+; N32-LE:       # %bb.0:
+; N32-LE-NEXT:    lui $1, %hi(%neg(%gp_rel(const_v8i16)))
+; N32-LE-NEXT:    addu $1, $1, $25
+; N32-LE-NEXT:    addiu $1, $1, %lo(%neg(%gp_rel(const_v8i16)))
+; N32-LE-NEXT:    ldi.b $w0, 0
+; N32-LE-NEXT:    lw $2, %got_disp(v8i16)($1)
+; N32-LE-NEXT:    st.h $w0, 0($2)
+; N32-LE-NEXT:    ldi.h $w0, 1
+; N32-LE-NEXT:    st.h $w0, 0($2)
+; N32-LE-NEXT:    lw $1, %got_page(.LCPI1_0)($1)
+; N32-LE-NEXT:    addiu $1, $1, %got_ofst(.LCPI1_0)
+; N32-LE-NEXT:    ld.h $w0, 0($1)
+; N32-LE-NEXT:    st.h $w0, 0($2)
+; N32-LE-NEXT:    ldi.b $w0, 4
+; N32-LE-NEXT:    st.h $w0, 0($2)
+; N32-LE-NEXT:    lui $1, 2
+; N32-LE-NEXT:    ori $1, $1, 1
+; N32-LE-NEXT:    fill.w $w0, $1
+; N32-LE-NEXT:    st.h $w0, 0($2)
+; N32-LE-NEXT:    lui $3, 4
+; N32-LE-NEXT:    ori $3, $3, 3
+; N32-LE-NEXT:    dinsu $1, $3, 32, 32
+; N32-LE-NEXT:    fill.d $w0, $1
+; N32-LE-NEXT:    jr $ra
+; N32-LE-NEXT:    st.h $w0, 0($2)
+;
+; N64-BE-LABEL: const_v8i16:
+; N64-BE:       # %bb.0:
+; N64-BE-NEXT:    lui $1, %hi(%neg(%gp_rel(const_v8i16)))
+; N64-BE-NEXT:    daddu $1, $1, $25
+; N64-BE-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(const_v8i16)))
+; N64-BE-NEXT:    ldi.b $w0, 0
+; N64-BE-NEXT:    ld $2, %got_disp(v8i16)($1)
+; N64-BE-NEXT:    st.h $w0, 0($2)
+; N64-BE-NEXT:    ldi.h $w0, 1
+; N64-BE-NEXT:    st.h $w0, 0($2)
+; N64-BE-NEXT:    ld $1, %got_page(.LCPI1_0)($1)
+; N64-BE-NEXT:    daddiu $1, $1, %got_ofst(.LCPI1_0)
+; N64-BE-NEXT:    ld.h $w0, 0($1)
+; N64-BE-NEXT:    st.h $w0, 0($2)
+; N64-BE-NEXT:    ldi.b $w0, 4
+; N64-BE-NEXT:    st.h $w0, 0($2)
+; N64-BE-NEXT:    lui $1, 1
+; N64-BE-NEXT:    ori $1, $1, 2
+; N64-BE-NEXT:    fill.w $w0, $1
+; N64-BE-NEXT:    st.h $w0, 0($2)
+; N64-BE-NEXT:    lui $3, 3
+; N64-BE-NEXT:    ori $3, $3, 4
+; N64-BE-NEXT:    dinsu $3, $1, 32, 32
+; N64-BE-NEXT:    fill.d $w0, $3
+; N64-BE-NEXT:    jr $ra
+; N64-BE-NEXT:    st.h $w0, 0($2)
+;
+; N64-LE-LABEL: const_v8i16:
+; N64-LE:       # %bb.0:
+; N64-LE-NEXT:    lui $1, %hi(%neg(%gp_rel(const_v8i16)))
+; N64-LE-NEXT:    daddu $1, $1, $25
+; N64-LE-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(const_v8i16)))
+; N64-LE-NEXT:    ldi.b $w0, 0
+; N64-LE-NEXT:    ld $2, %got_disp(v8i16)($1)
+; N64-LE-NEXT:    st.h $w0, 0($2)
+; N64-LE-NEXT:    ldi.h $w0, 1
+; N64-LE-NEXT:    st.h $w0, 0($2)
+; N64-LE-NEXT:    ld $1, %got_page(.LCPI1_0)($1)
+; N64-LE-NEXT:    daddiu $1, $1, %got_ofst(.LCPI1_0)
+; N64-LE-NEXT:    ld.h $w0, 0($1)
+; N64-LE-NEXT:    st.h $w0, 0($2)
+; N64-LE-NEXT:    ldi.b $w0, 4
+; N64-LE-NEXT:    st.h $w0, 0($2)
+; N64-LE-NEXT:    lui $1, 2
+; N64-LE-NEXT:    ori $1, $1, 1
+; N64-LE-NEXT:    fill.w $w0, $1
+; N64-LE-NEXT:    st.h $w0, 0($2)
+; N64-LE-NEXT:    lui $3, 4
+; N64-LE-NEXT:    ori $3, $3, 3
+; N64-LE-NEXT:    dinsu $1, $3, 32, 32
+; N64-LE-NEXT:    fill.d $w0, $1
+; N64-LE-NEXT:    jr $ra
+; N64-LE-NEXT:    st.h $w0, 0($2)
   store volatile <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, <8 x i16>*@v8i16
-  ; ALL: ldi.b [[R1:\$w[0-9]+]], 0
-
   store volatile <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <8 x i16>*@v8i16
-  ; ALL: ldi.h [[R1:\$w[0-9]+]], 1
-
   store volatile <8 x i16> <i16 1, i16 1, i16 1, i16 2, i16 1, i16 1, i16 1, i16 31>, <8 x i16>*@v8i16
-  ; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
-  ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L
-  ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L
-  ; ALL: ld.h  [[R1:\$w[0-9]+]], 0([[G_PTR]])
-
   store volatile <8 x i16> <i16 1028, i16 1028, i16 1028, i16 1028, i16 1028, i16 1028, i16 1028, i16 1028>, <8 x i16>*@v8i16
-  ; ALL: ldi.b [[R1:\$w[0-9]+]], 4
-
   store volatile <8 x i16> <i16 1, i16 2, i16 1, i16 2, i16 1, i16 2, i16 1, i16 2>, <8 x i16>*@v8i16
-  ; ALL-BE-DAG: lui [[R2:\$[0-9]+]], 1
-  ; ALL-LE-DAG: lui [[R2:\$[0-9]+]], 2
-  ; ALL-BE-DAG: ori [[R2]], [[R2]], 2
-  ; ALL-LE-DAG: ori [[R2]], [[R2]], 1
-  ; ALL-DAG: fill.w [[R1:\$w[0-9]+]], [[R2]]
-
   store volatile <8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 1, i16 2, i16 3, i16 4>, <8 x i16>*@v8i16
-  ; ALL-BE-DAG: lui [[R3:\$[0-9]+]], 3
-  ; ALL-LE-DAG: lui [[R3:\$[0-9]+]], 4
-  ; ALL-BE-DAG: ori [[R4:\$[0-9]+]], [[R3]], 4
-  ; ALL-LE-DAG: ori [[R4:\$[0-9]+]], [[R3]], 3
-
-  ; O32-BE: fill.w [[R1:\$w[0-9]+]], [[R4]]
-  ; O32: insert.w [[R1]][1], [[R2]]
-  ; O32: splati.d $w{{.*}}, [[R1]][0]
-
-  ; MIPS64-BE: dinsu [[R4]], [[R2]], 32, 32
-  ; MIPS64-LE: dinsu [[R2]], [[R4]], 32, 32
-  ; MIPS64-BE: fill.d $w{{.*}}, [[R4]]
-  ; MIPS64-LE: fill.d $w{{.*}}, [[R2]]
-
   ret void
 }
 
 define void @const_v4i32() nounwind {
-  ; ALL-LABEL: const_v4i32:
-
+; O32-BE-LABEL: const_v4i32:
+; O32-BE:       # %bb.0:
+; O32-BE-NEXT:    lui $2, %hi(_gp_disp)
+; O32-BE-NEXT:    addiu $2, $2, %lo(_gp_disp)
+; O32-BE-NEXT:    addu $1, $2, $25
+; O32-BE-NEXT:    ldi.b $w0, 0
+; O32-BE-NEXT:    lw $2, %got(v4i32)($1)
+; O32-BE-NEXT:    st.w $w0, 0($2)
+; O32-BE-NEXT:    ldi.w $w0, 1
+; O32-BE-NEXT:    st.w $w0, 0($2)
+; O32-BE-NEXT:    lw $3, %got($CPI2_0)($1)
+; O32-BE-NEXT:    addiu $3, $3, %lo($CPI2_0)
+; O32-BE-NEXT:    ld.w $w0, 0($3)
+; O32-BE-NEXT:    st.w $w0, 0($2)
+; O32-BE-NEXT:    ldi.b $w0, 1
+; O32-BE-NEXT:    st.w $w0, 0($2)
+; O32-BE-NEXT:    ldi.h $w0, 1
+; O32-BE-NEXT:    st.w $w0, 0($2)
+; O32-BE-NEXT:    ori $3, $zero, 1
+; O32-BE-NEXT:    ori $4, $zero, 2
+; O32-BE-NEXT:    fill.w $w0, $4
+; O32-BE-NEXT:    insert.w $w0[1], $3
+; O32-BE-NEXT:    splati.d $w0, $w0[0]
+; O32-BE-NEXT:    st.w $w0, 0($2)
+; O32-BE-NEXT:    lw $1, %got($CPI2_1)($1)
+; O32-BE-NEXT:    addiu $1, $1, %lo($CPI2_1)
+; O32-BE-NEXT:    ld.w $w0, 0($1)
+; O32-BE-NEXT:    jr $ra
+; O32-BE-NEXT:    st.w $w0, 0($2)
+;
+; O32-LE-LABEL: const_v4i32:
+; O32-LE:       # %bb.0:
+; O32-LE-NEXT:    lui $2, %hi(_gp_disp)
+; O32-LE-NEXT:    addiu $2, $2, %lo(_gp_disp)
+; O32-LE-NEXT:    addu $1, $2, $25
+; O32-LE-NEXT:    ldi.b $w0, 0
+; O32-LE-NEXT:    lw $2, %got(v4i32)($1)
+; O32-LE-NEXT:    st.w $w0, 0($2)
+; O32-LE-NEXT:    ldi.w $w0, 1
+; O32-LE-NEXT:    st.w $w0, 0($2)
+; O32-LE-NEXT:    lw $3, %got($CPI2_0)($1)
+; O32-LE-NEXT:    addiu $3, $3, %lo($CPI2_0)
+; O32-LE-NEXT:    ld.w $w0, 0($3)
+; O32-LE-NEXT:    st.w $w0, 0($2)
+; O32-LE-NEXT:    ldi.b $w0, 1
+; O32-LE-NEXT:    st.w $w0, 0($2)
+; O32-LE-NEXT:    ldi.h $w0, 1
+; O32-LE-NEXT:    st.w $w0, 0($2)
+; O32-LE-NEXT:    ori $3, $zero, 2
+; O32-LE-NEXT:    ori $4, $zero, 1
+; O32-LE-NEXT:    fill.w $w0, $4
+; O32-LE-NEXT:    insert.w $w0[1], $3
+; O32-LE-NEXT:    splati.d $w0, $w0[0]
+; O32-LE-NEXT:    st.w $w0, 0($2)
+; O32-LE-NEXT:    lw $1, %got($CPI2_1)($1)
+; O32-LE-NEXT:    addiu $1, $1, %lo($CPI2_1)
+; O32-LE-NEXT:    ld.w $w0, 0($1)
+; O32-LE-NEXT:    jr $ra
+; O32-LE-NEXT:    st.w $w0, 0($2)
+;
+; N32-BE-LABEL: const_v4i32:
+; N32-BE:       # %bb.0:
+; N32-BE-NEXT:    lui $1, %hi(%neg(%gp_rel(const_v4i32)))
+; N32-BE-NEXT:    addu $1, $1, $25
+; N32-BE-NEXT:    addiu $1, $1, %lo(%neg(%gp_rel(const_v4i32)))
+; N32-BE-NEXT:    ldi.b $w0, 0
+; N32-BE-NEXT:    lw $2, %got_disp(v4i32)($1)
+; N32-BE-NEXT:    st.w $w0, 0($2)
+; N32-BE-NEXT:    ldi.w $w0, 1
+; N32-BE-NEXT:    st.w $w0, 0($2)
+; N32-BE-NEXT:    lw $3, %got_page(.LCPI2_0)($1)
+; N32-BE-NEXT:    addiu $3, $3, %got_ofst(.LCPI2_0)
+; N32-BE-NEXT:    ld.w $w0, 0($3)
+; N32-BE-NEXT:    st.w $w0, 0($2)
+; N32-BE-NEXT:    ldi.b $w0, 1
+; N32-BE-NEXT:    st.w $w0, 0($2)
+; N32-BE-NEXT:    ldi.h $w0, 1
+; N32-BE-NEXT:    st.w $w0, 0($2)
+; N32-BE-NEXT:    ori $3, $zero, 2
+; N32-BE-NEXT:    ori $4, $zero, 1
+; N32-BE-NEXT:    dinsu $3, $4, 32, 32
+; N32-BE-NEXT:    fill.d $w0, $3
+; N32-BE-NEXT:    st.w $w0, 0($2)
+; N32-BE-NEXT:    lw $1, %got_page(.LCPI2_1)($1)
+; N32-BE-NEXT:    addiu $1, $1, %got_ofst(.LCPI2_1)
+; N32-BE-NEXT:    ld.w $w0, 0($1)
+; N32-BE-NEXT:    jr $ra
+; N32-BE-NEXT:    st.w $w0, 0($2)
+;
+; N32-LE-LABEL: const_v4i32:
+; N32-LE:       # %bb.0:
+; N32-LE-NEXT:    lui $1, %hi(%neg(%gp_rel(const_v4i32)))
+; N32-LE-NEXT:    addu $1, $1, $25
+; N32-LE-NEXT:    addiu $1, $1, %lo(%neg(%gp_rel(const_v4i32)))
+; N32-LE-NEXT:    ldi.b $w0, 0
+; N32-LE-NEXT:    lw $2, %got_disp(v4i32)($1)
+; N32-LE-NEXT:    st.w $w0, 0($2)
+; N32-LE-NEXT:    ldi.w $w0, 1
+; N32-LE-NEXT:    st.w $w0, 0($2)
+; N32-LE-NEXT:    lw $3, %got_page(.LCPI2_0)($1)
+; N32-LE-NEXT:    addiu $3, $3, %got_ofst(.LCPI2_0)
+; N32-LE-NEXT:    ld.w $w0, 0($3)
+; N32-LE-NEXT:    st.w $w0, 0($2)
+; N32-LE-NEXT:    ldi.b $w0, 1
+; N32-LE-NEXT:    st.w $w0, 0($2)
+; N32-LE-NEXT:    ldi.h $w0, 1
+; N32-LE-NEXT:    st.w $w0, 0($2)
+; N32-LE-NEXT:    ori $3, $zero, 1
+; N32-LE-NEXT:    ori $4, $zero, 2
+; N32-LE-NEXT:    dinsu $3, $4, 32, 32
+; N32-LE-NEXT:    fill.d $w0, $3
+; N32-LE-NEXT:    st.w $w0, 0($2)
+; N32-LE-NEXT:    lw $1, %got_page(.LCPI2_1)($1)
+; N32-LE-NEXT:    addiu $1, $1, %got_ofst(.LCPI2_1)
+; N32-LE-NEXT:    ld.w $w0, 0($1)
+; N32-LE-NEXT:    jr $ra
+; N32-LE-NEXT:    st.w $w0, 0($2)
+;
+; N64-BE-LABEL: const_v4i32:
+; N64-BE:       # %bb.0:
+; N64-BE-NEXT:    lui $1, %hi(%neg(%gp_rel(const_v4i32)))
+; N64-BE-NEXT:    daddu $1, $1, $25
+; N64-BE-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(const_v4i32)))
+; N64-BE-NEXT:    ldi.b $w0, 0
+; N64-BE-NEXT:    ld $2, %got_disp(v4i32)($1)
+; N64-BE-NEXT:    st.w $w0, 0($2)
+; N64-BE-NEXT:    ldi.w $w0, 1
+; N64-BE-NEXT:    st.w $w0, 0($2)
+; N64-BE-NEXT:    ld $3, %got_page(.LCPI2_0)($1)
+; N64-BE-NEXT:    daddiu $3, $3, %got_ofst(.LCPI2_0)
+; N64-BE-NEXT:    ld.w $w0, 0($3)
+; N64-BE-NEXT:    st.w $w0, 0($2)
+; N64-BE-NEXT:    ldi.b $w0, 1
+; N64-BE-NEXT:    st.w $w0, 0($2)
+; N64-BE-NEXT:    ldi.h $w0, 1
+; N64-BE-NEXT:    st.w $w0, 0($2)
+; N64-BE-NEXT:    ori $3, $zero, 2
+; N64-BE-NEXT:    ori $4, $zero, 1
+; N64-BE-NEXT:    dinsu $3, $4, 32, 32
+; N64-BE-NEXT:    fill.d $w0, $3
+; N64-BE-NEXT:    st.w $w0, 0($2)
+; N64-BE-NEXT:    ld $1, %got_page(.LCPI2_1)($1)
+; N64-BE-NEXT:    daddiu $1, $1, %got_ofst(.LCPI2_1)
+; N64-BE-NEXT:    ld.w $w0, 0($1)
+; N64-BE-NEXT:    jr $ra
+; N64-BE-NEXT:    st.w $w0, 0($2)
+;
+; N64-LE-LABEL: const_v4i32:
+; N64-LE:       # %bb.0:
+; N64-LE-NEXT:    lui $1, %hi(%neg(%gp_rel(const_v4i32)))
+; N64-LE-NEXT:    daddu $1, $1, $25
+; N64-LE-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(const_v4i32)))
+; N64-LE-NEXT:    ldi.b $w0, 0
+; N64-LE-NEXT:    ld $2, %got_disp(v4i32)($1)
+; N64-LE-NEXT:    st.w $w0, 0($2)
+; N64-LE-NEXT:    ldi.w $w0, 1
+; N64-LE-NEXT:    st.w $w0, 0($2)
+; N64-LE-NEXT:    ld $3, %got_page(.LCPI2_0)($1)
+; N64-LE-NEXT:    daddiu $3, $3, %got_ofst(.LCPI2_0)
+; N64-LE-NEXT:    ld.w $w0, 0($3)
+; N64-LE-NEXT:    st.w $w0, 0($2)
+; N64-LE-NEXT:    ldi.b $w0, 1
+; N64-LE-NEXT:    st.w $w0, 0($2)
+; N64-LE-NEXT:    ldi.h $w0, 1
+; N64-LE-NEXT:    st.w $w0, 0($2)
+; N64-LE-NEXT:    ori $3, $zero, 1
+; N64-LE-NEXT:    ori $4, $zero, 2
+; N64-LE-NEXT:    dinsu $3, $4, 32, 32
+; N64-LE-NEXT:    fill.d $w0, $3
+; N64-LE-NEXT:    st.w $w0, 0($2)
+; N64-LE-NEXT:    ld $1, %got_page(.LCPI2_1)($1)
+; N64-LE-NEXT:    daddiu $1, $1, %got_ofst(.LCPI2_1)
+; N64-LE-NEXT:    ld.w $w0, 0($1)
+; N64-LE-NEXT:    jr $ra
+; N64-LE-NEXT:    st.w $w0, 0($2)
   store volatile <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32>*@v4i32
-  ; ALL: ldi.b [[R1:\$w[0-9]+]], 0
-
   store volatile <4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32>*@v4i32
-  ; ALL: ldi.w [[R1:\$w[0-9]+]], 1
-
   store volatile <4 x i32> <i32 1, i32 1, i32 1, i32 31>, <4 x i32>*@v4i32
-  ; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
-  ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L
-  ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L
-  ; ALL: ld.w  [[R1:\$w[0-9]+]], 0([[G_PTR]])
-
   store volatile <4 x i32> <i32 16843009, i32 16843009, i32 16843009, i32 16843009>, <4 x i32>*@v4i32
-  ; ALL: ldi.b [[R1:\$w[0-9]+]], 1
-
   store volatile <4 x i32> <i32 65537, i32 65537, i32 65537, i32 65537>, <4 x i32>*@v4i32
-  ; ALL: ldi.h [[R1:\$w[0-9]+]], 1
-
   store volatile <4 x i32> <i32 1, i32 2, i32 1, i32 2>, <4 x i32>*@v4i32
-  ; -BE-DAG: ori [[R2:\$[0-9]+]], $zero, 1
-  ; O32-BE-DAG: ori [[R3:\$[0-9]+]], $zero, 1
-  ; O32-BE-DAG: ori [[R4:\$[0-9]+]], $zero, 2
-  ; O32-LE-DAG: ori [[R3:\$[0-9]+]], $zero, 2
-  ; O32-LE-DAG: ori [[R4:\$[0-9]+]], $zero, 1
-  ; O32: fill.w [[W0:\$w[0-9]+]], [[R4]]
-  ; O32: insert.w [[W0]][1], [[R3]]
-  ; O32: splati.d [[W1:\$w[0-9]+]], [[W0]]
-
-  ; MIPS64-DAG: ori [[R5:\$[0-9]+]], $zero, 2
-  ; MIPS64-DAG: ori [[R6:\$[0-9]+]], $zero, 1
-
-  ; MIPS64-BE: dinsu [[R5]], [[R6]], 32, 32
-  ; MIPS64-LE: dinsu [[R6]], [[R5]], 32, 32
-  ; MIPS64-BE: fill.d $w{{.*}}, [[R4]]
-  ; MIPS64-LE: fill.d $w{{.*}}, [[R2]]
-
-
   store volatile <4 x i32> <i32 3, i32 4, i32 5, i32 6>, <4 x i32>*@v4i32
-  ; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
-  ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L
-  ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L
-  ; ALL: ld.w  [[R1:\$w[0-9]+]], 0([[G_PTR]])
-
   ret void
 }
 
 define void @const_v2i64() nounwind {
-  ; ALL-LABEL: const_v2i64:
-
+; O32-LABEL: const_v2i64:
+; O32:       # %bb.0:
+; O32-NEXT:    lui $2, %hi(_gp_disp)
+; O32-NEXT:    addiu $2, $2, %lo(_gp_disp)
+; O32-NEXT:    addu $1, $2, $25
+; O32-NEXT:    ldi.b $w0, 0
+; O32-NEXT:    lw $2, %got(v2i64)($1)
+; O32-NEXT:    st.w $w0, 0($2)
+; O32-NEXT:    ldi.b $w0, 1
+; O32-NEXT:    st.w $w0, 0($2)
+; O32-NEXT:    ldi.h $w0, 1
+; O32-NEXT:    st.w $w0, 0($2)
+; O32-NEXT:    ldi.w $w0, 1
+; O32-NEXT:    st.w $w0, 0($2)
+; O32-NEXT:    ldi.d $w0, 1
+; O32-NEXT:    st.w $w0, 0($2)
+; O32-NEXT:    lw $3, %got($CPI3_0)($1)
+; O32-NEXT:    addiu $3, $3, %lo($CPI3_0)
+; O32-NEXT:    ld.w $w0, 0($3)
+; O32-NEXT:    st.w $w0, 0($2)
+; O32-NEXT:    lw $1, %got($CPI3_1)($1)
+; O32-NEXT:    addiu $1, $1, %lo($CPI3_1)
+; O32-NEXT:    ld.w $w0, 0($1)
+; O32-NEXT:    jr $ra
+; O32-NEXT:    st.w $w0, 0($2)
+;
+; N32-LABEL: const_v2i64:
+; N32:       # %bb.0:
+; N32-NEXT:    lui $1, %hi(%neg(%gp_rel(const_v2i64)))
+; N32-NEXT:    addu $1, $1, $25
+; N32-NEXT:    addiu $1, $1, %lo(%neg(%gp_rel(const_v2i64)))
+; N32-NEXT:    ldi.b $w0, 0
+; N32-NEXT:    lw $2, %got_disp(v2i64)($1)
+; N32-NEXT:    st.d $w0, 0($2)
+; N32-NEXT:    ldi.b $w0, 1
+; N32-NEXT:    st.d $w0, 0($2)
+; N32-NEXT:    ldi.h $w0, 1
+; N32-NEXT:    st.d $w0, 0($2)
+; N32-NEXT:    ldi.w $w0, 1
+; N32-NEXT:    st.d $w0, 0($2)
+; N32-NEXT:    ldi.d $w0, 1
+; N32-NEXT:    st.d $w0, 0($2)
+; N32-NEXT:    lw $3, %got_page(.LCPI3_0)($1)
+; N32-NEXT:    addiu $3, $3, %got_ofst(.LCPI3_0)
+; N32-NEXT:    ld.d $w0, 0($3)
+; N32-NEXT:    st.d $w0, 0($2)
+; N32-NEXT:    lw $1, %got_page(.LCPI3_1)($1)
+; N32-NEXT:    addiu $1, $1, %got_ofst(.LCPI3_1)
+; N32-NEXT:    ld.d $w0, 0($1)
+; N32-NEXT:    jr $ra
+; N32-NEXT:    st.d $w0, 0($2)
+;
+; N64-LABEL: const_v2i64:
+; N64:       # %bb.0:
+; N64-NEXT:    lui $1, %hi(%neg(%gp_rel(const_v2i64)))
+; N64-NEXT:    daddu $1, $1, $25
+; N64-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(const_v2i64)))
+; N64-NEXT:    ldi.b $w0, 0
+; N64-NEXT:    ld $2, %got_disp(v2i64)($1)
+; N64-NEXT:    st.d $w0, 0($2)
+; N64-NEXT:    ldi.b $w0, 1
+; N64-NEXT:    st.d $w0, 0($2)
+; N64-NEXT:    ldi.h $w0, 1
+; N64-NEXT:    st.d $w0, 0($2)
+; N64-NEXT:    ldi.w $w0, 1
+; N64-NEXT:    st.d $w0, 0($2)
+; N64-NEXT:    ldi.d $w0, 1
+; N64-NEXT:    st.d $w0, 0($2)
+; N64-NEXT:    ld $3, %got_page(.LCPI3_0)($1)
+; N64-NEXT:    daddiu $3, $3, %got_ofst(.LCPI3_0)
+; N64-NEXT:    ld.d $w0, 0($3)
+; N64-NEXT:    st.d $w0, 0($2)
+; N64-NEXT:    ld $1, %got_page(.LCPI3_1)($1)
+; N64-NEXT:    daddiu $1, $1, %got_ofst(.LCPI3_1)
+; N64-NEXT:    ld.d $w0, 0($1)
+; N64-NEXT:    jr $ra
+; N64-NEXT:    st.d $w0, 0($2)
   store volatile <2 x i64> <i64 0, i64 0>, <2 x i64>*@v2i64
-  ; ALL: ldi.b [[R1:\$w[0-9]+]], 0
-
   store volatile <2 x i64> <i64 72340172838076673, i64 72340172838076673>, <2 x i64>*@v2i64
-  ; ALL: ldi.b [[R1:\$w[0-9]+]], 1
-
   store volatile <2 x i64> <i64 281479271743489, i64 281479271743489>, <2 x i64>*@v2i64
-  ; ALL: ldi.h [[R1:\$w[0-9]+]], 1
-
   store volatile <2 x i64> <i64 4294967297, i64 4294967297>, <2 x i64>*@v2i64
-  ; ALL: ldi.w [[R1:\$w[0-9]+]], 1
-
   store volatile <2 x i64> <i64 1, i64 1>, <2 x i64>*@v2i64
-  ; ALL: ldi.d [[R1:\$w[0-9]+]], 1
-
   store volatile <2 x i64> <i64 1, i64 31>, <2 x i64>*@v2i64
-  ; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
-  ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L
-  ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L
-  ; MIPS32: ld.w [[R1:\$w[0-9]+]], 0([[G_PTR]])
-  ; MIPS64: ld.d [[R1:\$w[0-9]+]], 0([[G_PTR]])
-
   store volatile <2 x i64> <i64 3, i64 4>, <2 x i64>*@v2i64
-  ; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
-  ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L
-  ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L
-  ; MIPS32: ld.w  [[R1:\$w[0-9]+]], 0([[G_PTR]])
-  ; MIPS64: ld.d  [[R1:\$w[0-9]+]], 0([[G_PTR]])
-
   ret void
 }
 
 define void @nonconst_v16i8(i8 signext %a, i8 signext %b, i8 signext %c, i8 signext %d, i8 signext %e, i8 signext %f, i8 signext %g, i8 signext %h) nounwind {
-  ; ALL-LABEL: nonconst_v16i8:
-
+; O32-LABEL: nonconst_v16i8:
+; O32:       # %bb.0:
+; O32-NEXT:    lui $2, %hi(_gp_disp)
+; O32-NEXT:    addiu $2, $2, %lo(_gp_disp)
+; O32-NEXT:    addu $1, $2, $25
+; O32-NEXT:    insert.b $w0[0], $4
+; O32-NEXT:    insert.b $w0[1], $5
+; O32-NEXT:    insert.b $w0[2], $6
+; O32-NEXT:    insert.b $w0[3], $7
+; O32-NEXT:    lw $2, 16($sp)
+; O32-NEXT:    insert.b $w0[4], $2
+; O32-NEXT:    lw $2, 20($sp)
+; O32-NEXT:    insert.b $w0[5], $2
+; O32-NEXT:    lw $2, 28($sp)
+; O32-NEXT:    lw $3, 24($sp)
+; O32-NEXT:    lw $1, %got(v16i8)($1)
+; O32-NEXT:    insert.b $w0[6], $3
+; O32-NEXT:    insert.b $w0[7], $2
+; O32-NEXT:    insert.b $w0[8], $2
+; O32-NEXT:    insert.b $w0[9], $2
+; O32-NEXT:    insert.b $w0[10], $2
+; O32-NEXT:    insert.b $w0[11], $2
+; O32-NEXT:    insert.b $w0[12], $2
+; O32-NEXT:    insert.b $w0[13], $2
+; O32-NEXT:    insert.b $w0[14], $2
+; O32-NEXT:    insert.b $w0[15], $2
+; O32-NEXT:    jr $ra
+; O32-NEXT:    st.b $w0, 0($1)
+;
+; N32-LABEL: nonconst_v16i8:
+; N32:       # %bb.0:
+; N32-NEXT:    lui $1, %hi(%neg(%gp_rel(nonconst_v16i8)))
+; N32-NEXT:    addu $1, $1, $25
+; N32-NEXT:    addiu $1, $1, %lo(%neg(%gp_rel(nonconst_v16i8)))
+; N32-NEXT:    insert.b $w0[0], $4
+; N32-NEXT:    insert.b $w0[1], $5
+; N32-NEXT:    insert.b $w0[2], $6
+; N32-NEXT:    insert.b $w0[3], $7
+; N32-NEXT:    insert.b $w0[4], $8
+; N32-NEXT:    lw $1, %got_disp(v16i8)($1)
+; N32-NEXT:    insert.b $w0[5], $9
+; N32-NEXT:    insert.b $w0[6], $10
+; N32-NEXT:    insert.b $w0[7], $11
+; N32-NEXT:    insert.b $w0[8], $11
+; N32-NEXT:    insert.b $w0[9], $11
+; N32-NEXT:    insert.b $w0[10], $11
+; N32-NEXT:    insert.b $w0[11], $11
+; N32-NEXT:    insert.b $w0[12], $11
+; N32-NEXT:    insert.b $w0[13], $11
+; N32-NEXT:    insert.b $w0[14], $11
+; N32-NEXT:    insert.b $w0[15], $11
+; N32-NEXT:    jr $ra
+; N32-NEXT:    st.b $w0, 0($1)
+;
+; N64-LABEL: nonconst_v16i8:
+; N64:       # %bb.0:
+; N64-NEXT:    lui $1, %hi(%neg(%gp_rel(nonconst_v16i8)))
+; N64-NEXT:    daddu $1, $1, $25
+; N64-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(nonconst_v16i8)))
+; N64-NEXT:    insert.b $w0[0], $4
+; N64-NEXT:    insert.b $w0[1], $5
+; N64-NEXT:    insert.b $w0[2], $6
+; N64-NEXT:    insert.b $w0[3], $7
+; N64-NEXT:    insert.b $w0[4], $8
+; N64-NEXT:    ld $1, %got_disp(v16i8)($1)
+; N64-NEXT:    insert.b $w0[5], $9
+; N64-NEXT:    insert.b $w0[6], $10
+; N64-NEXT:    insert.b $w0[7], $11
+; N64-NEXT:    insert.b $w0[8], $11
+; N64-NEXT:    insert.b $w0[9], $11
+; N64-NEXT:    insert.b $w0[10], $11
+; N64-NEXT:    insert.b $w0[11], $11
+; N64-NEXT:    insert.b $w0[12], $11
+; N64-NEXT:    insert.b $w0[13], $11
+; N64-NEXT:    insert.b $w0[14], $11
+; N64-NEXT:    insert.b $w0[15], $11
+; N64-NEXT:    jr $ra
+; N64-NEXT:    st.b $w0, 0($1)
   %1 = insertelement <16 x i8> undef, i8 %a, i32 0
   %2 = insertelement <16 x i8> %1, i8 %b, i32 1
   %3 = insertelement <16 x i8> %2, i8 %c, i32 2
@@ -221,39 +764,65 @@ define void @nonconst_v16i8(i8 signext %a, i8 signext %b, i8 signext %c, i8 sign
   %14 = insertelement <16 x i8> %13, i8 %h, i32 13
   %15 = insertelement <16 x i8> %14, i8 %h, i32 14
   %16 = insertelement <16 x i8> %15, i8 %h, i32 15
-  ; ALL-DAG: insert.b [[R1:\$w[0-9]+]][0], $4
-  ; ALL-DAG: insert.b [[R1]][1], $5
-  ; ALL-DAG: insert.b [[R1]][2], $6
-  ; ALL-DAG: insert.b [[R1]][3], $7
-  ; MIPS32-DAG: lw [[R2:\$[0-9]+]], 16($sp)
-  ; MIPS32-DAG: insert.b [[R1]][4], [[R2]]
-  ; MIPS64-DAG: insert.b [[R1]][4], $8
-  ; MIPS32-DAG: lw [[R3:\$[0-9]+]], 20($sp)
-  ; MIPS32-DAG: insert.b [[R1]][5], [[R3]]
-  ; MIPS64-DAG: insert.b [[R1]][5], $9
-  ; MIPS32-DAG: lw [[R4:\$[0-9]+]], 24($sp)
-  ; MIPS32-DAG: insert.b [[R1]][6], [[R4]]
-  ; MIPS64-DAG: insert.b [[R1]][6], $10
-  ; MIPS32-DAG: lw [[R5:\$[0-9]+]], 28($sp)
-  ; MIPS32-DAG: insert.b [[R1]][7], [[R5]]
-  ; MIPS64-DAG: insert.b [[R1]][7], [[R5:\$11]]
-  ; ALL-DAG: insert.b [[R1]][8], [[R5]]
-  ; ALL-DAG: insert.b [[R1]][9], [[R5]]
-  ; ALL-DAG: insert.b [[R1]][10], [[R5]]
-  ; ALL-DAG: insert.b [[R1]][11], [[R5]]
-  ; ALL-DAG: insert.b [[R1]][12], [[R5]]
-  ; ALL-DAG: insert.b [[R1]][13], [[R5]]
-  ; ALL-DAG: insert.b [[R1]][14], [[R5]]
-  ; ALL-DAG: insert.b [[R1]][15], [[R5]]
-
   store volatile <16 x i8> %16, <16 x i8>*@v16i8
-
   ret void
 }
 
 define void @nonconst_v8i16(i16 signext %a, i16 signext %b, i16 signext %c, i16 signext %d, i16 signext %e, i16 signext %f, i16 signext %g, i16 signext %h) nounwind {
-  ; ALL-LABEL: nonconst_v8i16:
-
+; O32-LABEL: nonconst_v8i16:
+; O32:       # %bb.0:
+; O32-NEXT:    lui $2, %hi(_gp_disp)
+; O32-NEXT:    addiu $2, $2, %lo(_gp_disp)
+; O32-NEXT:    addu $1, $2, $25
+; O32-NEXT:    insert.h $w0[0], $4
+; O32-NEXT:    insert.h $w0[1], $5
+; O32-NEXT:    insert.h $w0[2], $6
+; O32-NEXT:    insert.h $w0[3], $7
+; O32-NEXT:    lw $2, 16($sp)
+; O32-NEXT:    insert.h $w0[4], $2
+; O32-NEXT:    lw $2, 20($sp)
+; O32-NEXT:    insert.h $w0[5], $2
+; O32-NEXT:    lw $1, %got(v8i16)($1)
+; O32-NEXT:    lw $2, 28($sp)
+; O32-NEXT:    lw $3, 24($sp)
+; O32-NEXT:    insert.h $w0[6], $3
+; O32-NEXT:    insert.h $w0[7], $2
+; O32-NEXT:    jr $ra
+; O32-NEXT:    st.h $w0, 0($1)
+;
+; N32-LABEL: nonconst_v8i16:
+; N32:       # %bb.0:
+; N32-NEXT:    lui $1, %hi(%neg(%gp_rel(nonconst_v8i16)))
+; N32-NEXT:    addu $1, $1, $25
+; N32-NEXT:    addiu $1, $1, %lo(%neg(%gp_rel(nonconst_v8i16)))
+; N32-NEXT:    insert.h $w0[0], $4
+; N32-NEXT:    insert.h $w0[1], $5
+; N32-NEXT:    insert.h $w0[2], $6
+; N32-NEXT:    insert.h $w0[3], $7
+; N32-NEXT:    insert.h $w0[4], $8
+; N32-NEXT:    lw $1, %got_disp(v8i16)($1)
+; N32-NEXT:    insert.h $w0[5], $9
+; N32-NEXT:    insert.h $w0[6], $10
+; N32-NEXT:    insert.h $w0[7], $11
+; N32-NEXT:    jr $ra
+; N32-NEXT:    st.h $w0, 0($1)
+;
+; N64-LABEL: nonconst_v8i16:
+; N64:       # %bb.0:
+; N64-NEXT:    lui $1, %hi(%neg(%gp_rel(nonconst_v8i16)))
+; N64-NEXT:    daddu $1, $1, $25
+; N64-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(nonconst_v8i16)))
+; N64-NEXT:    insert.h $w0[0], $4
+; N64-NEXT:    insert.h $w0[1], $5
+; N64-NEXT:    insert.h $w0[2], $6
+; N64-NEXT:    insert.h $w0[3], $7
+; N64-NEXT:    insert.h $w0[4], $8
+; N64-NEXT:    ld $1, %got_disp(v8i16)($1)
+; N64-NEXT:    insert.h $w0[5], $9
+; N64-NEXT:    insert.h $w0[6], $10
+; N64-NEXT:    insert.h $w0[7], $11
+; N64-NEXT:    jr $ra
+; N64-NEXT:    st.h $w0, 0($1)
   %1 = insertelement <8 x i16> undef, i16 %a, i32 0
   %2 = insertelement <8 x i16> %1, i16 %b, i32 1
   %3 = insertelement <8 x i16> %2, i16 %c, i32 2
@@ -262,650 +831,1356 @@ define void @nonconst_v8i16(i16 signext %a, i16 signext %b, i16 signext %c, i16
   %6 = insertelement <8 x i16> %5, i16 %f, i32 5
   %7 = insertelement <8 x i16> %6, i16 %g, i32 6
   %8 = insertelement <8 x i16> %7, i16 %h, i32 7
-  ; ALL-DAG: insert.h [[R1:\$w[0-9]+]][0], $4
-  ; ALL-DAG: insert.h [[R1]][1], $5
-  ; ALL-DAG: insert.h [[R1]][2], $6
-  ; ALL-DAG: insert.h [[R1]][3], $7
-  ; MIPS32-DAG: lw [[R2:\$[0-9]+]], 16($sp)
-  ; MIPS32-DAG: insert.h [[R1]][4], [[R2]]
-  ; MIPS64-DAG: insert.h [[R1]][4], $8
-  ; MIPS32-DAG: lw [[R2:\$[0-9]+]], 20($sp)
-  ; MIPS32-DAG: insert.h [[R1]][5], [[R2]]
-  ; MIPS64-DAG: insert.h [[R1]][5], $9
-  ; MIPS32-DAG: lw [[R2:\$[0-9]+]], 24($sp)
-  ; MIPS32-DAG: insert.h [[R1]][6], [[R2]]
-  ; MIPS64-DAG: insert.h [[R1]][6], $10
-  ; MIPS32-DAG: lw [[R2:\$[0-9]+]], 28($sp)
-  ; MIPS32-DAG: insert.h [[R1]][7], [[R2]]
-  ; MIPS64-DAG: insert.h [[R1]][7], $11
-
   store volatile <8 x i16> %8, <8 x i16>*@v8i16
-
   ret void
 }
 
 define void @nonconst_v4i32(i32 signext %a, i32 signext %b, i32 signext %c, i32 signext %d) nounwind {
-  ; ALL-LABEL: nonconst_v4i32:
-
+; O32-LABEL: nonconst_v4i32:
+; O32:       # %bb.0:
+; O32-NEXT:    lui $2, %hi(_gp_disp)
+; O32-NEXT:    addiu $2, $2, %lo(_gp_disp)
+; O32-NEXT:    addu $1, $2, $25
+; O32-NEXT:    insert.w $w0[0], $4
+; O32-NEXT:    insert.w $w0[1], $5
+; O32-NEXT:    insert.w $w0[2], $6
+; O32-NEXT:    insert.w $w0[3], $7
+; O32-NEXT:    lw $1, %got(v4i32)($1)
+; O32-NEXT:    jr $ra
+; O32-NEXT:    st.w $w0, 0($1)
+;
+; N32-LABEL: nonconst_v4i32:
+; N32:       # %bb.0:
+; N32-NEXT:    lui $1, %hi(%neg(%gp_rel(nonconst_v4i32)))
+; N32-NEXT:    addu $1, $1, $25
+; N32-NEXT:    addiu $1, $1, %lo(%neg(%gp_rel(nonconst_v4i32)))
+; N32-NEXT:    insert.w $w0[0], $4
+; N32-NEXT:    insert.w $w0[1], $5
+; N32-NEXT:    insert.w $w0[2], $6
+; N32-NEXT:    insert.w $w0[3], $7
+; N32-NEXT:    lw $1, %got_disp(v4i32)($1)
+; N32-NEXT:    jr $ra
+; N32-NEXT:    st.w $w0, 0($1)
+;
+; N64-LABEL: nonconst_v4i32:
+; N64:       # %bb.0:
+; N64-NEXT:    lui $1, %hi(%neg(%gp_rel(nonconst_v4i32)))
+; N64-NEXT:    daddu $1, $1, $25
+; N64-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(nonconst_v4i32)))
+; N64-NEXT:    insert.w $w0[0], $4
+; N64-NEXT:    insert.w $w0[1], $5
+; N64-NEXT:    insert.w $w0[2], $6
+; N64-NEXT:    insert.w $w0[3], $7
+; N64-NEXT:    ld $1, %got_disp(v4i32)($1)
+; N64-NEXT:    jr $ra
+; N64-NEXT:    st.w $w0, 0($1)
   %1 = insertelement <4 x i32> undef, i32 %a, i32 0
   %2 = insertelement <4 x i32> %1, i32 %b, i32 1
   %3 = insertelement <4 x i32> %2, i32 %c, i32 2
   %4 = insertelement <4 x i32> %3, i32 %d, i32 3
-  ; ALL: insert.w [[R1:\$w[0-9]+]][0], $4
-  ; ALL: insert.w [[R1]][1], $5
-  ; ALL: insert.w [[R1]][2], $6
-  ; ALL: insert.w [[R1]][3], $7
-
   store volatile <4 x i32> %4, <4 x i32>*@v4i32
-
   ret void
 }
 
 define void @nonconst_v2i64(i64 signext %a, i64 signext %b) nounwind {
-  ; ALL-LABEL: nonconst_v2i64:
-
+; O32-LABEL: nonconst_v2i64:
+; O32:       # %bb.0:
+; O32-NEXT:    lui $2, %hi(_gp_disp)
+; O32-NEXT:    addiu $2, $2, %lo(_gp_disp)
+; O32-NEXT:    addu $1, $2, $25
+; O32-NEXT:    insert.w $w0[0], $4
+; O32-NEXT:    insert.w $w0[1], $5
+; O32-NEXT:    insert.w $w0[2], $6
+; O32-NEXT:    insert.w $w0[3], $7
+; O32-NEXT:    lw $1, %got(v2i64)($1)
+; O32-NEXT:    jr $ra
+; O32-NEXT:    st.w $w0, 0($1)
+;
+; N32-LABEL: nonconst_v2i64:
+; N32:       # %bb.0:
+; N32-NEXT:    lui $1, %hi(%neg(%gp_rel(nonconst_v2i64)))
+; N32-NEXT:    addu $1, $1, $25
+; N32-NEXT:    addiu $1, $1, %lo(%neg(%gp_rel(nonconst_v2i64)))
+; N32-NEXT:    insert.d $w0[0], $4
+; N32-NEXT:    insert.d $w0[1], $5
+; N32-NEXT:    lw $1, %got_disp(v2i64)($1)
+; N32-NEXT:    jr $ra
+; N32-NEXT:    st.d $w0, 0($1)
+;
+; N64-LABEL: nonconst_v2i64:
+; N64:       # %bb.0:
+; N64-NEXT:    lui $1, %hi(%neg(%gp_rel(nonconst_v2i64)))
+; N64-NEXT:    daddu $1, $1, $25
+; N64-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(nonconst_v2i64)))
+; N64-NEXT:    insert.d $w0[0], $4
+; N64-NEXT:    insert.d $w0[1], $5
+; N64-NEXT:    ld $1, %got_disp(v2i64)($1)
+; N64-NEXT:    jr $ra
+; N64-NEXT:    st.d $w0, 0($1)
   %1 = insertelement <2 x i64> undef, i64 %a, i32 0
   %2 = insertelement <2 x i64> %1, i64 %b, i32 1
-  ; MIPS32: insert.w [[R1:\$w[0-9]+]][0], $4
-  ; MIPS32: insert.w [[R1]][1], $5
-  ; MIPS32: insert.w [[R1]][2], $6
-  ; MIPS32: insert.w [[R1]][3], $7
-  ; MIPS64: insert.d [[R1:\$w[0-9]+]][0], $4
-  ; MIPS64: insert.d [[R1]][1], $5
-
   store volatile <2 x i64> %2, <2 x i64>*@v2i64
-
   ret void
 }
 
 define i32 @extract_sext_v16i8() nounwind {
-  ; ALL-LABEL: extract_sext_v16i8:
-
+; O32-LABEL: extract_sext_v16i8:
+; O32:       # %bb.0:
+; O32-NEXT:    lui $2, %hi(_gp_disp)
+; O32-NEXT:    addiu $2, $2, %lo(_gp_disp)
+; O32-NEXT:    addu $1, $2, $25
+; O32-NEXT:    lw $1, %got(v16i8)($1)
+; O32-NEXT:    ld.b $w0, 0($1)
+; O32-NEXT:    addv.b $w0, $w0, $w0
+; O32-NEXT:    copy_s.b $1, $w0[1]
+; O32-NEXT:    jr $ra
+; O32-NEXT:    seb $2, $1
+;
+; N32-LABEL: extract_sext_v16i8:
+; N32:       # %bb.0:
+; N32-NEXT:    lui $1, %hi(%neg(%gp_rel(extract_sext_v16i8)))
+; N32-NEXT:    addu $1, $1, $25
+; N32-NEXT:    addiu $1, $1, %lo(%neg(%gp_rel(extract_sext_v16i8)))
+; N32-NEXT:    lw $1, %got_disp(v16i8)($1)
+; N32-NEXT:    ld.b $w0, 0($1)
+; N32-NEXT:    addv.b $w0, $w0, $w0
+; N32-NEXT:    copy_s.b $1, $w0[1]
+; N32-NEXT:    jr $ra
+; N32-NEXT:    seb $2, $1
+;
+; N64-LABEL: extract_sext_v16i8:
+; N64:       # %bb.0:
+; N64-NEXT:    lui $1, %hi(%neg(%gp_rel(extract_sext_v16i8)))
+; N64-NEXT:    daddu $1, $1, $25
+; N64-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(extract_sext_v16i8)))
+; N64-NEXT:    ld $1, %got_disp(v16i8)($1)
+; N64-NEXT:    ld.b $w0, 0($1)
+; N64-NEXT:    addv.b $w0, $w0, $w0
+; N64-NEXT:    copy_s.b $1, $w0[1]
+; N64-NEXT:    jr $ra
+; N64-NEXT:    seb $2, $1
   %1 = load <16 x i8>, <16 x i8>* @v16i8
-  ; ALL-DAG: ld.b [[R1:\$w[0-9]+]],
-
   %2 = add <16 x i8> %1, %1
-  ; ALL-DAG: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
-
   %3 = extractelement <16 x i8> %2, i32 1
   %4 = sext i8 %3 to i32
-  ; ALL-DAG: copy_s.b [[R3:\$[0-9]+]], [[R1]][1]
-  ; ALL-NOT: sll
-  ; ALL-NOT: sra
-
   ret i32 %4
 }
 
 define i32 @extract_sext_v8i16() nounwind {
-  ; ALL-LABEL: extract_sext_v8i16:
-
+; O32-LABEL: extract_sext_v8i16:
+; O32:       # %bb.0:
+; O32-NEXT:    lui $2, %hi(_gp_disp)
+; O32-NEXT:    addiu $2, $2, %lo(_gp_disp)
+; O32-NEXT:    addu $1, $2, $25
+; O32-NEXT:    lw $1, %got(v8i16)($1)
+; O32-NEXT:    ld.h $w0, 0($1)
+; O32-NEXT:    addv.h $w0, $w0, $w0
+; O32-NEXT:    copy_s.h $1, $w0[1]
+; O32-NEXT:    jr $ra
+; O32-NEXT:    seh $2, $1
+;
+; N32-LABEL: extract_sext_v8i16:
+; N32:       # %bb.0:
+; N32-NEXT:    lui $1, %hi(%neg(%gp_rel(extract_sext_v8i16)))
+; N32-NEXT:    addu $1, $1, $25
+; N32-NEXT:    addiu $1, $1, %lo(%neg(%gp_rel(extract_sext_v8i16)))
+; N32-NEXT:    lw $1, %got_disp(v8i16)($1)
+; N32-NEXT:    ld.h $w0, 0($1)
+; N32-NEXT:    addv.h $w0, $w0, $w0
+; N32-NEXT:    copy_s.h $1, $w0[1]
+; N32-NEXT:    jr $ra
+; N32-NEXT:    seh $2, $1
+;
+; N64-LABEL: extract_sext_v8i16:
+; N64:       # %bb.0:
+; N64-NEXT:    lui $1, %hi(%neg(%gp_rel(extract_sext_v8i16)))
+; N64-NEXT:    daddu $1, $1, $25
+; N64-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(extract_sext_v8i16)))
+; N64-NEXT:    ld $1, %got_disp(v8i16)($1)
+; N64-NEXT:    ld.h $w0, 0($1)
+; N64-NEXT:    addv.h $w0, $w0, $w0
+; N64-NEXT:    copy_s.h $1, $w0[1]
+; N64-NEXT:    jr $ra
+; N64-NEXT:    seh $2, $1
   %1 = load <8 x i16>, <8 x i16>* @v8i16
-  ; ALL-DAG: ld.h [[R1:\$w[0-9]+]],
-
   %2 = add <8 x i16> %1, %1
-  ; ALL-DAG: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
-
   %3 = extractelement <8 x i16> %2, i32 1
   %4 = sext i16 %3 to i32
-  ; ALL-DAG: copy_s.h [[R3:\$[0-9]+]], [[R1]][1]
-  ; ALL-NOT: sll
-  ; ALL-NOT: sra
-
   ret i32 %4
 }
 
 define i32 @extract_sext_v4i32() nounwind {
-  ; ALL-LABEL: extract_sext_v4i32:
-
+; O32-LABEL: extract_sext_v4i32:
+; O32:       # %bb.0:
+; O32-NEXT:    lui $2, %hi(_gp_disp)
+; O32-NEXT:    addiu $2, $2, %lo(_gp_disp)
+; O32-NEXT:    addu $1, $2, $25
+; O32-NEXT:    lw $1, %got(v4i32)($1)
+; O32-NEXT:    ld.w $w0, 0($1)
+; O32-NEXT:    addv.w $w0, $w0, $w0
+; O32-NEXT:    jr $ra
+; O32-NEXT:    copy_s.w $2, $w0[1]
+;
+; N32-LABEL: extract_sext_v4i32:
+; N32:       # %bb.0:
+; N32-NEXT:    lui $1, %hi(%neg(%gp_rel(extract_sext_v4i32)))
+; N32-NEXT:    addu $1, $1, $25
+; N32-NEXT:    addiu $1, $1, %lo(%neg(%gp_rel(extract_sext_v4i32)))
+; N32-NEXT:    lw $1, %got_disp(v4i32)($1)
+; N32-NEXT:    ld.w $w0, 0($1)
+; N32-NEXT:    addv.w $w0, $w0, $w0
+; N32-NEXT:    jr $ra
+; N32-NEXT:    copy_s.w $2, $w0[1]
+;
+; N64-LABEL: extract_sext_v4i32:
+; N64:       # %bb.0:
+; N64-NEXT:    lui $1, %hi(%neg(%gp_rel(extract_sext_v4i32)))
+; N64-NEXT:    daddu $1, $1, $25
+; N64-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(extract_sext_v4i32)))
+; N64-NEXT:    ld $1, %got_disp(v4i32)($1)
+; N64-NEXT:    ld.w $w0, 0($1)
+; N64-NEXT:    addv.w $w0, $w0, $w0
+; N64-NEXT:    jr $ra
+; N64-NEXT:    copy_s.w $2, $w0[1]
   %1 = load <4 x i32>, <4 x i32>* @v4i32
-  ; ALL-DAG: ld.w [[R1:\$w[0-9]+]],
-
   %2 = add <4 x i32> %1, %1
-  ; ALL-DAG: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
-
   %3 = extractelement <4 x i32> %2, i32 1
-  ; ALL-DAG: copy_s.w [[R3:\$[0-9]+]], [[R1]][1]
-
   ret i32 %3
 }
 
 define i64 @extract_sext_v2i64() nounwind {
-  ; ALL-LABEL: extract_sext_v2i64:
-
+; O32-BE-LABEL: extract_sext_v2i64:
+; O32-BE:       # %bb.0:
+; O32-BE-NEXT:    lui $2, %hi(_gp_disp)
+; O32-BE-NEXT:    addiu $2, $2, %lo(_gp_disp)
+; O32-BE-NEXT:    addu $1, $2, $25
+; O32-BE-NEXT:    lw $1, %got(v2i64)($1)
+; O32-BE-NEXT:    ld.d $w0, 0($1)
+; O32-BE-NEXT:    addv.d $w0, $w0, $w0
+; O32-BE-NEXT:    shf.w $w0, $w0, 177
+; O32-BE-NEXT:    copy_s.w $2, $w0[2]
+; O32-BE-NEXT:    jr $ra
+; O32-BE-NEXT:    copy_s.w $3, $w0[3]
+;
+; O32-LE-LABEL: extract_sext_v2i64:
+; O32-LE:       # %bb.0:
+; O32-LE-NEXT:    lui $2, %hi(_gp_disp)
+; O32-LE-NEXT:    addiu $2, $2, %lo(_gp_disp)
+; O32-LE-NEXT:    addu $1, $2, $25
+; O32-LE-NEXT:    lw $1, %got(v2i64)($1)
+; O32-LE-NEXT:    ld.d $w0, 0($1)
+; O32-LE-NEXT:    addv.d $w0, $w0, $w0
+; O32-LE-NEXT:    copy_s.w $2, $w0[2]
+; O32-LE-NEXT:    jr $ra
+; O32-LE-NEXT:    copy_s.w $3, $w0[3]
+;
+; N32-LABEL: extract_sext_v2i64:
+; N32:       # %bb.0:
+; N32-NEXT:    lui $1, %hi(%neg(%gp_rel(extract_sext_v2i64)))
+; N32-NEXT:    addu $1, $1, $25
+; N32-NEXT:    addiu $1, $1, %lo(%neg(%gp_rel(extract_sext_v2i64)))
+; N32-NEXT:    lw $1, %got_disp(v2i64)($1)
+; N32-NEXT:    ld.d $w0, 0($1)
+; N32-NEXT:    addv.d $w0, $w0, $w0
+; N32-NEXT:    jr $ra
+; N32-NEXT:    copy_s.d $2, $w0[1]
+;
+; N64-LABEL: extract_sext_v2i64:
+; N64:       # %bb.0:
+; N64-NEXT:    lui $1, %hi(%neg(%gp_rel(extract_sext_v2i64)))
+; N64-NEXT:    daddu $1, $1, $25
+; N64-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(extract_sext_v2i64)))
+; N64-NEXT:    ld $1, %got_disp(v2i64)($1)
+; N64-NEXT:    ld.d $w0, 0($1)
+; N64-NEXT:    addv.d $w0, $w0, $w0
+; N64-NEXT:    jr $ra
+; N64-NEXT:    copy_s.d $2, $w0[1]
   %1 = load <2 x i64>, <2 x i64>* @v2i64
-  ; ALL-DAG: ld.d [[R1:\$w[0-9]+]],
-
   %2 = add <2 x i64> %1, %1
-  ; ALL-DAG: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
-
   %3 = extractelement <2 x i64> %2, i32 1
-  ; MIPS32-DAG: copy_s.w [[R3:\$[0-9]+]], [[R1]][2]
-  ; MIPS32-DAG: copy_s.w [[R4:\$[0-9]+]], [[R1]][3]
-  ; MIPS64-DAG: copy_s.d [[R3:\$[0-9]+]], [[R1]][1]
-  ; ALL-NOT: sll
-  ; ALL-NOT: sra
-
   ret i64 %3
 }
 
 define i32 @extract_zext_v16i8() nounwind {
-  ; ALL-LABEL: extract_zext_v16i8:
-
+; O32-LABEL: extract_zext_v16i8:
+; O32:       # %bb.0:
+; O32-NEXT:    lui $2, %hi(_gp_disp)
+; O32-NEXT:    addiu $2, $2, %lo(_gp_disp)
+; O32-NEXT:    addu $1, $2, $25
+; O32-NEXT:    lw $1, %got(v16i8)($1)
+; O32-NEXT:    ld.b $w0, 0($1)
+; O32-NEXT:    addv.b $w0, $w0, $w0
+; O32-NEXT:    jr $ra
+; O32-NEXT:    copy_u.b $2, $w0[1]
+;
+; N32-LABEL: extract_zext_v16i8:
+; N32:       # %bb.0:
+; N32-NEXT:    lui $1, %hi(%neg(%gp_rel(extract_zext_v16i8)))
+; N32-NEXT:    addu $1, $1, $25
+; N32-NEXT:    addiu $1, $1, %lo(%neg(%gp_rel(extract_zext_v16i8)))
+; N32-NEXT:    lw $1, %got_disp(v16i8)($1)
+; N32-NEXT:    ld.b $w0, 0($1)
+; N32-NEXT:    addv.b $w0, $w0, $w0
+; N32-NEXT:    jr $ra
+; N32-NEXT:    copy_u.b $2, $w0[1]
+;
+; N64-LABEL: extract_zext_v16i8:
+; N64:       # %bb.0:
+; N64-NEXT:    lui $1, %hi(%neg(%gp_rel(extract_zext_v16i8)))
+; N64-NEXT:    daddu $1, $1, $25
+; N64-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(extract_zext_v16i8)))
+; N64-NEXT:    ld $1, %got_disp(v16i8)($1)
+; N64-NEXT:    ld.b $w0, 0($1)
+; N64-NEXT:    addv.b $w0, $w0, $w0
+; N64-NEXT:    jr $ra
+; N64-NEXT:    copy_u.b $2, $w0[1]
   %1 = load <16 x i8>, <16 x i8>* @v16i8
-  ; ALL-DAG: ld.b [[R1:\$w[0-9]+]],
-
   %2 = add <16 x i8> %1, %1
-  ; ALL-DAG: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
-
   %3 = extractelement <16 x i8> %2, i32 1
   %4 = zext i8 %3 to i32
-  ; ALL-DAG: copy_u.b [[R3:\$[0-9]+]], [[R1]][1]
-  ; ALL-NOT: andi
-
   ret i32 %4
 }
 
 define i32 @extract_zext_v8i16() nounwind {
-  ; ALL-LABEL: extract_zext_v8i16:
-
+; O32-LABEL: extract_zext_v8i16:
+; O32:       # %bb.0:
+; O32-NEXT:    lui $2, %hi(_gp_disp)
+; O32-NEXT:    addiu $2, $2, %lo(_gp_disp)
+; O32-NEXT:    addu $1, $2, $25
+; O32-NEXT:    lw $1, %got(v8i16)($1)
+; O32-NEXT:    ld.h $w0, 0($1)
+; O32-NEXT:    addv.h $w0, $w0, $w0
+; O32-NEXT:    jr $ra
+; O32-NEXT:    copy_u.h $2, $w0[1]
+;
+; N32-LABEL: extract_zext_v8i16:
+; N32:       # %bb.0:
+; N32-NEXT:    lui $1, %hi(%neg(%gp_rel(extract_zext_v8i16)))
+; N32-NEXT:    addu $1, $1, $25
+; N32-NEXT:    addiu $1, $1, %lo(%neg(%gp_rel(extract_zext_v8i16)))
+; N32-NEXT:    lw $1, %got_disp(v8i16)($1)
+; N32-NEXT:    ld.h $w0, 0($1)
+; N32-NEXT:    addv.h $w0, $w0, $w0
+; N32-NEXT:    jr $ra
+; N32-NEXT:    copy_u.h $2, $w0[1]
+;
+; N64-LABEL: extract_zext_v8i16:
+; N64:       # %bb.0:
+; N64-NEXT:    lui $1, %hi(%neg(%gp_rel(extract_zext_v8i16)))
+; N64-NEXT:    daddu $1, $1, $25
+; N64-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(extract_zext_v8i16)))
+; N64-NEXT:    ld $1, %got_disp(v8i16)($1)
+; N64-NEXT:    ld.h $w0, 0($1)
+; N64-NEXT:    addv.h $w0, $w0, $w0
+; N64-NEXT:    jr $ra
+; N64-NEXT:    copy_u.h $2, $w0[1]
   %1 = load <8 x i16>, <8 x i16>* @v8i16
-  ; ALL-DAG: ld.h [[R1:\$w[0-9]+]],
-
   %2 = add <8 x i16> %1, %1
-  ; ALL-DAG: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
-
   %3 = extractelement <8 x i16> %2, i32 1
   %4 = zext i16 %3 to i32
-  ; ALL-DAG: copy_u.h [[R3:\$[0-9]+]], [[R1]][1]
-  ; ALL-NOT: andi
-
   ret i32 %4
 }
 
 define i32 @extract_zext_v4i32() nounwind {
-  ; ALL-LABEL: extract_zext_v4i32:
-
+; O32-LABEL: extract_zext_v4i32:
+; O32:       # %bb.0:
+; O32-NEXT:    lui $2, %hi(_gp_disp)
+; O32-NEXT:    addiu $2, $2, %lo(_gp_disp)
+; O32-NEXT:    addu $1, $2, $25
+; O32-NEXT:    lw $1, %got(v4i32)($1)
+; O32-NEXT:    ld.w $w0, 0($1)
+; O32-NEXT:    addv.w $w0, $w0, $w0
+; O32-NEXT:    jr $ra
+; O32-NEXT:    copy_s.w $2, $w0[1]
+;
+; N32-LABEL: extract_zext_v4i32:
+; N32:       # %bb.0:
+; N32-NEXT:    lui $1, %hi(%neg(%gp_rel(extract_zext_v4i32)))
+; N32-NEXT:    addu $1, $1, $25
+; N32-NEXT:    addiu $1, $1, %lo(%neg(%gp_rel(extract_zext_v4i32)))
+; N32-NEXT:    lw $1, %got_disp(v4i32)($1)
+; N32-NEXT:    ld.w $w0, 0($1)
+; N32-NEXT:    addv.w $w0, $w0, $w0
+; N32-NEXT:    jr $ra
+; N32-NEXT:    copy_s.w $2, $w0[1]
+;
+; N64-LABEL: extract_zext_v4i32:
+; N64:       # %bb.0:
+; N64-NEXT:    lui $1, %hi(%neg(%gp_rel(extract_zext_v4i32)))
+; N64-NEXT:    daddu $1, $1, $25
+; N64-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(extract_zext_v4i32)))
+; N64-NEXT:    ld $1, %got_disp(v4i32)($1)
+; N64-NEXT:    ld.w $w0, 0($1)
+; N64-NEXT:    addv.w $w0, $w0, $w0
+; N64-NEXT:    jr $ra
+; N64-NEXT:    copy_s.w $2, $w0[1]
   %1 = load <4 x i32>, <4 x i32>* @v4i32
-  ; ALL-DAG: ld.w [[R1:\$w[0-9]+]],
-
   %2 = add <4 x i32> %1, %1
-  ; ALL-DAG: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
-
   %3 = extractelement <4 x i32> %2, i32 1
-  ; ALL-DAG: copy_{{[su]}}.w [[R3:\$[0-9]+]], [[R1]][1]
-
   ret i32 %3
 }
 
 define i64 @extract_zext_v2i64() nounwind {
-  ; ALL-LABEL: extract_zext_v2i64:
-
+; O32-BE-LABEL: extract_zext_v2i64:
+; O32-BE:       # %bb.0:
+; O32-BE-NEXT:    lui $2, %hi(_gp_disp)
+; O32-BE-NEXT:    addiu $2, $2, %lo(_gp_disp)
+; O32-BE-NEXT:    addu $1, $2, $25
+; O32-BE-NEXT:    lw $1, %got(v2i64)($1)
+; O32-BE-NEXT:    ld.d $w0, 0($1)
+; O32-BE-NEXT:    addv.d $w0, $w0, $w0
+; O32-BE-NEXT:    shf.w $w0, $w0, 177
+; O32-BE-NEXT:    copy_s.w $2, $w0[2]
+; O32-BE-NEXT:    jr $ra
+; O32-BE-NEXT:    copy_s.w $3, $w0[3]
+;
+; O32-LE-LABEL: extract_zext_v2i64:
+; O32-LE:       # %bb.0:
+; O32-LE-NEXT:    lui $2, %hi(_gp_disp)
+; O32-LE-NEXT:    addiu $2, $2, %lo(_gp_disp)
+; O32-LE-NEXT:    addu $1, $2, $25
+; O32-LE-NEXT:    lw $1, %got(v2i64)($1)
+; O32-LE-NEXT:    ld.d $w0, 0($1)
+; O32-LE-NEXT:    addv.d $w0, $w0, $w0
+; O32-LE-NEXT:    copy_s.w $2, $w0[2]
+; O32-LE-NEXT:    jr $ra
+; O32-LE-NEXT:    copy_s.w $3, $w0[3]
+;
+; N32-LABEL: extract_zext_v2i64:
+; N32:       # %bb.0:
+; N32-NEXT:    lui $1, %hi(%neg(%gp_rel(extract_zext_v2i64)))
+; N32-NEXT:    addu $1, $1, $25
+; N32-NEXT:    addiu $1, $1, %lo(%neg(%gp_rel(extract_zext_v2i64)))
+; N32-NEXT:    lw $1, %got_disp(v2i64)($1)
+; N32-NEXT:    ld.d $w0, 0($1)
+; N32-NEXT:    addv.d $w0, $w0, $w0
+; N32-NEXT:    jr $ra
+; N32-NEXT:    copy_s.d $2, $w0[1]
+;
+; N64-LABEL: extract_zext_v2i64:
+; N64:       # %bb.0:
+; N64-NEXT:    lui $1, %hi(%neg(%gp_rel(extract_zext_v2i64)))
+; N64-NEXT:    daddu $1, $1, $25
+; N64-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(extract_zext_v2i64)))
+; N64-NEXT:    ld $1, %got_disp(v2i64)($1)
+; N64-NEXT:    ld.d $w0, 0($1)
+; N64-NEXT:    addv.d $w0, $w0, $w0
+; N64-NEXT:    jr $ra
+; N64-NEXT:    copy_s.d $2, $w0[1]
   %1 = load <2 x i64>, <2 x i64>* @v2i64
-  ; ALL-DAG: ld.d [[R1:\$w[0-9]+]],
-
   %2 = add <2 x i64> %1, %1
-  ; ALL-DAG: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
-
   %3 = extractelement <2 x i64> %2, i32 1
-  ; MIPS32-DAG: copy_{{[su]}}.w [[R3:\$[0-9]+]], [[R1]][2]
-  ; MIPS32-DAG: copy_{{[su]}}.w [[R4:\$[0-9]+]], [[R1]][3]
-  ; MIPS64-DAG: copy_{{[su]}}.d [[R3:\$[0-9]+]], [[R1]][1]
-  ; ALL-NOT: andi
-
   ret i64 %3
 }
 
 define i32 @extract_sext_v16i8_vidx() nounwind {
-  ; ALL-LABEL: extract_sext_v16i8_vidx:
-
+; O32-LABEL: extract_sext_v16i8_vidx:
+; O32:       # %bb.0:
+; O32-NEXT:    lui $2, %hi(_gp_disp)
+; O32-NEXT:    addiu $2, $2, %lo(_gp_disp)
+; O32-NEXT:    addu $1, $2, $25
+; O32-NEXT:    lw $2, %got(i32)($1)
+; O32-NEXT:    lw $2, 0($2)
+; O32-NEXT:    lw $1, %got(v16i8)($1)
+; O32-NEXT:    ld.b $w0, 0($1)
+; O32-NEXT:    addv.b $w0, $w0, $w0
+; O32-NEXT:    splat.b $w0, $w0[$2]
+; O32-NEXT:    mfc1 $1, $f0
+; O32-NEXT:    sra $1, $1, 24
+; O32-NEXT:    jr $ra
+; O32-NEXT:    seb $2, $1
+;
+; N32-LABEL: extract_sext_v16i8_vidx:
+; N32:       # %bb.0:
+; N32-NEXT:    lui $1, %hi(%neg(%gp_rel(extract_sext_v16i8_vidx)))
+; N32-NEXT:    addu $1, $1, $25
+; N32-NEXT:    addiu $1, $1, %lo(%neg(%gp_rel(extract_sext_v16i8_vidx)))
+; N32-NEXT:    lw $2, %got_disp(i32)($1)
+; N32-NEXT:    lw $2, 0($2)
+; N32-NEXT:    lw $1, %got_disp(v16i8)($1)
+; N32-NEXT:    ld.b $w0, 0($1)
+; N32-NEXT:    addv.b $w0, $w0, $w0
+; N32-NEXT:    splat.b $w0, $w0[$2]
+; N32-NEXT:    mfc1 $1, $f0
+; N32-NEXT:    sra $1, $1, 24
+; N32-NEXT:    jr $ra
+; N32-NEXT:    seb $2, $1
+;
+; N64-LABEL: extract_sext_v16i8_vidx:
+; N64:       # %bb.0:
+; N64-NEXT:    lui $1, %hi(%neg(%gp_rel(extract_sext_v16i8_vidx)))
+; N64-NEXT:    daddu $1, $1, $25
+; N64-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(extract_sext_v16i8_vidx)))
+; N64-NEXT:    ld $2, %got_disp(v16i8)($1)
+; N64-NEXT:    ld.b $w0, 0($2)
+; N64-NEXT:    addv.b $w0, $w0, $w0
+; N64-NEXT:    ld $1, %got_disp(i32)($1)
+; N64-NEXT:    lw $1, 0($1)
+; N64-NEXT:    splat.b $w0, $w0[$1]
+; N64-NEXT:    mfc1 $1, $f0
+; N64-NEXT:    sra $1, $1, 24
+; N64-NEXT:    jr $ra
+; N64-NEXT:    seb $2, $1
   %1 = load <16 x i8>, <16 x i8>* @v16i8
-  ; O32-DAG: lw [[PTR_V:\$[0-9]+]], %got(v16i8)(
-  ; N32-DAG: lw [[PTR_V:\$[0-9]+]], %got_disp(v16i8)(
-  ; N64-DAG: ld [[PTR_V:\$[0-9]+]], %got_disp(v16i8)(
-  ; ALL-DAG: ld.b [[R1:\$w[0-9]+]], 0([[PTR_V]])
-
   %2 = add <16 x i8> %1, %1
-  ; ALL-DAG: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
-
   %3 = load i32, i32* @i32
-  ; O32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
-  ; N32-DAG: lw [[PTR_I:\$[0-9]+]], %got_disp(i32)(
-  ; N64-DAG: ld [[PTR_I:\$[0-9]+]], %got_disp(i32)(
-  ; ALL-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
-
   %4 = extractelement <16 x i8> %2, i32 %3
   %5 = sext i8 %4 to i32
-  ; ALL-DAG: splat.b $w[[R3:[0-9]+]], [[R1]]{{\[}}[[IDX]]]
-  ; ALL-DAG: mfc1 [[R5:\$[0-9]+]], $f[[R3]]
-  ; ALL-DAG: sra [[R6:\$[0-9]+]], [[R5]], 24
-
   ret i32 %5
 }
 
 define i32 @extract_sext_v8i16_vidx() nounwind {
-  ; ALL-LABEL: extract_sext_v8i16_vidx:
-
+; O32-LABEL: extract_sext_v8i16_vidx:
+; O32:       # %bb.0:
+; O32-NEXT:    lui $2, %hi(_gp_disp)
+; O32-NEXT:    addiu $2, $2, %lo(_gp_disp)
+; O32-NEXT:    addu $1, $2, $25
+; O32-NEXT:    lw $2, %got(i32)($1)
+; O32-NEXT:    lw $2, 0($2)
+; O32-NEXT:    lw $1, %got(v8i16)($1)
+; O32-NEXT:    ld.h $w0, 0($1)
+; O32-NEXT:    addv.h $w0, $w0, $w0
+; O32-NEXT:    splat.h $w0, $w0[$2]
+; O32-NEXT:    mfc1 $1, $f0
+; O32-NEXT:    sra $1, $1, 16
+; O32-NEXT:    jr $ra
+; O32-NEXT:    seh $2, $1
+;
+; N32-LABEL: extract_sext_v8i16_vidx:
+; N32:       # %bb.0:
+; N32-NEXT:    lui $1, %hi(%neg(%gp_rel(extract_sext_v8i16_vidx)))
+; N32-NEXT:    addu $1, $1, $25
+; N32-NEXT:    addiu $1, $1, %lo(%neg(%gp_rel(extract_sext_v8i16_vidx)))
+; N32-NEXT:    lw $2, %got_disp(i32)($1)
+; N32-NEXT:    lw $2, 0($2)
+; N32-NEXT:    lw $1, %got_disp(v8i16)($1)
+; N32-NEXT:    ld.h $w0, 0($1)
+; N32-NEXT:    addv.h $w0, $w0, $w0
+; N32-NEXT:    splat.h $w0, $w0[$2]
+; N32-NEXT:    mfc1 $1, $f0
+; N32-NEXT:    sra $1, $1, 16
+; N32-NEXT:    jr $ra
+; N32-NEXT:    seh $2, $1
+;
+; N64-LABEL: extract_sext_v8i16_vidx:
+; N64:       # %bb.0:
+; N64-NEXT:    lui $1, %hi(%neg(%gp_rel(extract_sext_v8i16_vidx)))
+; N64-NEXT:    daddu $1, $1, $25
+; N64-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(extract_sext_v8i16_vidx)))
+; N64-NEXT:    ld $2, %got_disp(v8i16)($1)
+; N64-NEXT:    ld.h $w0, 0($2)
+; N64-NEXT:    addv.h $w0, $w0, $w0
+; N64-NEXT:    ld $1, %got_disp(i32)($1)
+; N64-NEXT:    lw $1, 0($1)
+; N64-NEXT:    splat.h $w0, $w0[$1]
+; N64-NEXT:    mfc1 $1, $f0
+; N64-NEXT:    sra $1, $1, 16
+; N64-NEXT:    jr $ra
+; N64-NEXT:    seh $2, $1
   %1 = load <8 x i16>, <8 x i16>* @v8i16
-  ; O32-DAG: lw [[PTR_V:\$[0-9]+]], %got(v8i16)(
-  ; N32-DAG: lw [[PTR_V:\$[0-9]+]], %got_disp(v8i16)(
-  ; N64-DAG: ld [[PTR_V:\$[0-9]+]], %got_disp(v8i16)(
-  ; ALL-DAG: ld.h [[R1:\$w[0-9]+]], 0([[PTR_V]])
-
   %2 = add <8 x i16> %1, %1
-  ; ALL-DAG: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
-
   %3 = load i32, i32* @i32
-  ; O32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
-  ; N32-DAG: lw [[PTR_I:\$[0-9]+]], %got_disp(i32)(
-  ; N64-DAG: ld [[PTR_I:\$[0-9]+]], %got_disp(i32)(
-  ; ALL-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
-
   %4 = extractelement <8 x i16> %2, i32 %3
   %5 = sext i16 %4 to i32
-  ; ALL-DAG: splat.h $w[[R3:[0-9]+]], [[R1]]{{\[}}[[IDX]]]
-  ; ALL-DAG: mfc1 [[R5:\$[0-9]+]], $f[[R3]]
-  ; ALL-DAG: sra [[R6:\$[0-9]+]], [[R5]], 16
-
   ret i32 %5
 }
 
 define i32 @extract_sext_v4i32_vidx() nounwind {
-  ; ALL-LABEL: extract_sext_v4i32_vidx:
-
+; O32-LABEL: extract_sext_v4i32_vidx:
+; O32:       # %bb.0:
+; O32-NEXT:    lui $2, %hi(_gp_disp)
+; O32-NEXT:    addiu $2, $2, %lo(_gp_disp)
+; O32-NEXT:    addu $1, $2, $25
+; O32-NEXT:    lw $2, %got(i32)($1)
+; O32-NEXT:    lw $2, 0($2)
+; O32-NEXT:    lw $1, %got(v4i32)($1)
+; O32-NEXT:    ld.w $w0, 0($1)
+; O32-NEXT:    addv.w $w0, $w0, $w0
+; O32-NEXT:    splat.w $w0, $w0[$2]
+; O32-NEXT:    jr $ra
+; O32-NEXT:    mfc1 $2, $f0
+;
+; N32-LABEL: extract_sext_v4i32_vidx:
+; N32:       # %bb.0:
+; N32-NEXT:    lui $1, %hi(%neg(%gp_rel(extract_sext_v4i32_vidx)))
+; N32-NEXT:    addu $1, $1, $25
+; N32-NEXT:    addiu $1, $1, %lo(%neg(%gp_rel(extract_sext_v4i32_vidx)))
+; N32-NEXT:    lw $2, %got_disp(i32)($1)
+; N32-NEXT:    lw $2, 0($2)
+; N32-NEXT:    lw $1, %got_disp(v4i32)($1)
+; N32-NEXT:    ld.w $w0, 0($1)
+; N32-NEXT:    addv.w $w0, $w0, $w0
+; N32-NEXT:    splat.w $w0, $w0[$2]
+; N32-NEXT:    jr $ra
+; N32-NEXT:    mfc1 $2, $f0
+;
+; N64-LABEL: extract_sext_v4i32_vidx:
+; N64:       # %bb.0:
+; N64-NEXT:    lui $1, %hi(%neg(%gp_rel(extract_sext_v4i32_vidx)))
+; N64-NEXT:    daddu $1, $1, $25
+; N64-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(extract_sext_v4i32_vidx)))
+; N64-NEXT:    ld $2, %got_disp(v4i32)($1)
+; N64-NEXT:    ld.w $w0, 0($2)
+; N64-NEXT:    addv.w $w0, $w0, $w0
+; N64-NEXT:    ld $1, %got_disp(i32)($1)
+; N64-NEXT:    lw $1, 0($1)
+; N64-NEXT:    splat.w $w0, $w0[$1]
+; N64-NEXT:    jr $ra
+; N64-NEXT:    mfc1 $2, $f0
   %1 = load <4 x i32>, <4 x i32>* @v4i32
-  ; O32-DAG: lw [[PTR_V:\$[0-9]+]], %got(v4i32)(
-  ; N32-DAG: lw [[PTR_V:\$[0-9]+]], %got_disp(v4i32)(
-  ; N64-DAG: ld [[PTR_V:\$[0-9]+]], %got_disp(v4i32)(
-  ; ALL-DAG: ld.w [[R1:\$w[0-9]+]], 0([[PTR_V]])
-
   %2 = add <4 x i32> %1, %1
-  ; ALL-DAG: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
-
   %3 = load i32, i32* @i32
-  ; O32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
-  ; N32-DAG: lw [[PTR_I:\$[0-9]+]], %got_disp(i32)(
-  ; N64-DAG: ld [[PTR_I:\$[0-9]+]], %got_disp(i32)(
-  ; ALL-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
-
   %4 = extractelement <4 x i32> %2, i32 %3
-  ; ALL-DAG: splat.w $w[[R3:[0-9]+]], [[R1]]{{\[}}[[IDX]]]
-  ; ALL-DAG: mfc1 [[R5:\$[0-9]+]], $f[[R3]]
-  ; ALL-NOT: sra
-
   ret i32 %4
 }
 
 define i64 @extract_sext_v2i64_vidx() nounwind {
-  ; ALL-LABEL: extract_sext_v2i64_vidx:
-
+; O32-BE-LABEL: extract_sext_v2i64_vidx:
+; O32-BE:       # %bb.0:
+; O32-BE-NEXT:    lui $2, %hi(_gp_disp)
+; O32-BE-NEXT:    addiu $2, $2, %lo(_gp_disp)
+; O32-BE-NEXT:    addu $1, $2, $25
+; O32-BE-NEXT:    lw $2, %got(i32)($1)
+; O32-BE-NEXT:    lw $2, 0($2)
+; O32-BE-NEXT:    addu $2, $2, $2
+; O32-BE-NEXT:    addiu $3, $2, 1
+; O32-BE-NEXT:    lw $1, %got(v2i64)($1)
+; O32-BE-NEXT:    ld.d $w0, 0($1)
+; O32-BE-NEXT:    addv.d $w0, $w0, $w0
+; O32-BE-NEXT:    shf.w $w0, $w0, 177
+; O32-BE-NEXT:    splat.w $w1, $w0[$3]
+; O32-BE-NEXT:    mfc1 $3, $f1
+; O32-BE-NEXT:    splat.w $w0, $w0[$2]
+; O32-BE-NEXT:    jr $ra
+; O32-BE-NEXT:    mfc1 $2, $f0
+;
+; O32-LE-LABEL: extract_sext_v2i64_vidx:
+; O32-LE:       # %bb.0:
+; O32-LE-NEXT:    lui $2, %hi(_gp_disp)
+; O32-LE-NEXT:    addiu $2, $2, %lo(_gp_disp)
+; O32-LE-NEXT:    addu $1, $2, $25
+; O32-LE-NEXT:    lw $2, %got(i32)($1)
+; O32-LE-NEXT:    lw $2, 0($2)
+; O32-LE-NEXT:    addu $2, $2, $2
+; O32-LE-NEXT:    addiu $3, $2, 1
+; O32-LE-NEXT:    lw $1, %got(v2i64)($1)
+; O32-LE-NEXT:    ld.d $w0, 0($1)
+; O32-LE-NEXT:    addv.d $w0, $w0, $w0
+; O32-LE-NEXT:    splat.w $w1, $w0[$3]
+; O32-LE-NEXT:    mfc1 $3, $f1
+; O32-LE-NEXT:    splat.w $w0, $w0[$2]
+; O32-LE-NEXT:    jr $ra
+; O32-LE-NEXT:    mfc1 $2, $f0
+;
+; N32-LABEL: extract_sext_v2i64_vidx:
+; N32:       # %bb.0:
+; N32-NEXT:    lui $1, %hi(%neg(%gp_rel(extract_sext_v2i64_vidx)))
+; N32-NEXT:    addu $1, $1, $25
+; N32-NEXT:    addiu $1, $1, %lo(%neg(%gp_rel(extract_sext_v2i64_vidx)))
+; N32-NEXT:    lw $2, %got_disp(i32)($1)
+; N32-NEXT:    lw $2, 0($2)
+; N32-NEXT:    lw $1, %got_disp(v2i64)($1)
+; N32-NEXT:    ld.d $w0, 0($1)
+; N32-NEXT:    addv.d $w0, $w0, $w0
+; N32-NEXT:    splat.d $w0, $w0[$2]
+; N32-NEXT:    jr $ra
+; N32-NEXT:    dmfc1 $2, $f0
+;
+; N64-LABEL: extract_sext_v2i64_vidx:
+; N64:       # %bb.0:
+; N64-NEXT:    lui $1, %hi(%neg(%gp_rel(extract_sext_v2i64_vidx)))
+; N64-NEXT:    daddu $1, $1, $25
+; N64-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(extract_sext_v2i64_vidx)))
+; N64-NEXT:    ld $2, %got_disp(v2i64)($1)
+; N64-NEXT:    ld.d $w0, 0($2)
+; N64-NEXT:    addv.d $w0, $w0, $w0
+; N64-NEXT:    ld $1, %got_disp(i32)($1)
+; N64-NEXT:    lw $1, 0($1)
+; N64-NEXT:    splat.d $w0, $w0[$1]
+; N64-NEXT:    jr $ra
+; N64-NEXT:    dmfc1 $2, $f0
   %1 = load <2 x i64>, <2 x i64>* @v2i64
-  ; O32-DAG: lw [[PTR_V:\$[0-9]+]], %got(v2i64)(
-  ; N32-DAG: lw [[PTR_V:\$[0-9]+]], %got_disp(v2i64)(
-  ; N64-DAG: ld [[PTR_V:\$[0-9]+]], %got_disp(v2i64)(
-  ; ALL-DAG: ld.d [[R1:\$w[0-9]+]], 0([[PTR_V]])
-
   %2 = add <2 x i64> %1, %1
-  ; ALL-DAG: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
-
   %3 = load i32, i32* @i32
-  ; O32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
-  ; N32-DAG: lw [[PTR_I:\$[0-9]+]], %got_disp(i32)(
-  ; N64-DAG: ld [[PTR_I:\$[0-9]+]], %got_disp(i32)(
-  ; ALL-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
-  ; O32-DAG: addiu [[IDY:\$[0-9]+]], [[IDX]], 1
-
   %4 = extractelement <2 x i64> %2, i32 %3
-  ; MIPS32-DAG: splat.w $w[[R3:[0-9]+]], [[R1]]{{\[}}[[IDY]]]
-  ; MIPS32-DAG: mfc1 [[R5:\$[0-9]+]], $f[[R3]]
-  ; MIPS32-DAG: splat.w $w[[R4:[0-9]+]], [[R1]]{{\[}}[[IDX]]]
-  ; MIPS32-DAG: mfc1 [[R6:\$[0-9]+]], $f[[R4]]
-  ; MIPS64-DAG: splat.d $w[[R3:[0-9]+]], [[R1]]{{\[}}[[IDX]]]
-  ; MIPS64-DAG: dmfc1 [[R5:\$[0-9]+]], $f[[R3]]
-  ; ALL-NOT: sra
-
   ret i64 %4
 }
 
 define i32 @extract_zext_v16i8_vidx() nounwind {
-  ; ALL-LABEL: extract_zext_v16i8_vidx:
-
+; O32-LABEL: extract_zext_v16i8_vidx:
+; O32:       # %bb.0:
+; O32-NEXT:    lui $2, %hi(_gp_disp)
+; O32-NEXT:    addiu $2, $2, %lo(_gp_disp)
+; O32-NEXT:    addu $1, $2, $25
+; O32-NEXT:    lw $2, %got(i32)($1)
+; O32-NEXT:    lw $2, 0($2)
+; O32-NEXT:    lw $1, %got(v16i8)($1)
+; O32-NEXT:    ld.b $w0, 0($1)
+; O32-NEXT:    addv.b $w0, $w0, $w0
+; O32-NEXT:    splat.b $w0, $w0[$2]
+; O32-NEXT:    mfc1 $1, $f0
+; O32-NEXT:    jr $ra
+; O32-NEXT:    srl $2, $1, 24
+;
+; N32-LABEL: extract_zext_v16i8_vidx:
+; N32:       # %bb.0:
+; N32-NEXT:    lui $1, %hi(%neg(%gp_rel(extract_zext_v16i8_vidx)))
+; N32-NEXT:    addu $1, $1, $25
+; N32-NEXT:    addiu $1, $1, %lo(%neg(%gp_rel(extract_zext_v16i8_vidx)))
+; N32-NEXT:    lw $2, %got_disp(i32)($1)
+; N32-NEXT:    lw $2, 0($2)
+; N32-NEXT:    lw $1, %got_disp(v16i8)($1)
+; N32-NEXT:    ld.b $w0, 0($1)
+; N32-NEXT:    addv.b $w0, $w0, $w0
+; N32-NEXT:    splat.b $w0, $w0[$2]
+; N32-NEXT:    mfc1 $1, $f0
+; N32-NEXT:    jr $ra
+; N32-NEXT:    srl $2, $1, 24
+;
+; N64-LABEL: extract_zext_v16i8_vidx:
+; N64:       # %bb.0:
+; N64-NEXT:    lui $1, %hi(%neg(%gp_rel(extract_zext_v16i8_vidx)))
+; N64-NEXT:    daddu $1, $1, $25
+; N64-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(extract_zext_v16i8_vidx)))
+; N64-NEXT:    ld $2, %got_disp(v16i8)($1)
+; N64-NEXT:    ld.b $w0, 0($2)
+; N64-NEXT:    addv.b $w0, $w0, $w0
+; N64-NEXT:    ld $1, %got_disp(i32)($1)
+; N64-NEXT:    lw $1, 0($1)
+; N64-NEXT:    splat.b $w0, $w0[$1]
+; N64-NEXT:    mfc1 $1, $f0
+; N64-NEXT:    jr $ra
+; N64-NEXT:    srl $2, $1, 24
   %1 = load <16 x i8>, <16 x i8>* @v16i8
-  ; O32-DAG: lw [[PTR_V:\$[0-9]+]], %got(v16i8)(
-  ; N32-DAG: lw [[PTR_V:\$[0-9]+]], %got_disp(v16i8)(
-  ; N64-DAG: ld [[PTR_V:\$[0-9]+]], %got_disp(v16i8)(
-  ; ALL-DAG: ld.b [[R1:\$w[0-9]+]], 0([[PTR_V]])
-
   %2 = add <16 x i8> %1, %1
-  ; ALL-DAG: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
-
   %3 = load i32, i32* @i32
-  ; O32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
-  ; N32-DAG: lw [[PTR_I:\$[0-9]+]], %got_disp(i32)(
-  ; N64-DAG: ld [[PTR_I:\$[0-9]+]], %got_disp(i32)(
-  ; ALL-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
-
   %4 = extractelement <16 x i8> %2, i32 %3
   %5 = zext i8 %4 to i32
-  ; ALL-DAG: splat.b $w[[R3:[0-9]+]], [[R1]]{{\[}}[[IDX]]]
-  ; ALL-DAG: mfc1 [[R5:\$[0-9]+]], $f[[R3]]
-  ; ALL-DAG: srl [[R6:\$[0-9]+]], [[R5]], 24
-
   ret i32 %5
 }
 
 define i32 @extract_zext_v8i16_vidx() nounwind {
-  ; ALL-LABEL: extract_zext_v8i16_vidx:
-
+; O32-LABEL: extract_zext_v8i16_vidx:
+; O32:       # %bb.0:
+; O32-NEXT:    lui $2, %hi(_gp_disp)
+; O32-NEXT:    addiu $2, $2, %lo(_gp_disp)
+; O32-NEXT:    addu $1, $2, $25
+; O32-NEXT:    lw $2, %got(i32)($1)
+; O32-NEXT:    lw $2, 0($2)
+; O32-NEXT:    lw $1, %got(v8i16)($1)
+; O32-NEXT:    ld.h $w0, 0($1)
+; O32-NEXT:    addv.h $w0, $w0, $w0
+; O32-NEXT:    splat.h $w0, $w0[$2]
+; O32-NEXT:    mfc1 $1, $f0
+; O32-NEXT:    jr $ra
+; O32-NEXT:    srl $2, $1, 16
+;
+; N32-LABEL: extract_zext_v8i16_vidx:
+; N32:       # %bb.0:
+; N32-NEXT:    lui $1, %hi(%neg(%gp_rel(extract_zext_v8i16_vidx)))
+; N32-NEXT:    addu $1, $1, $25
+; N32-NEXT:    addiu $1, $1, %lo(%neg(%gp_rel(extract_zext_v8i16_vidx)))
+; N32-NEXT:    lw $2, %got_disp(i32)($1)
+; N32-NEXT:    lw $2, 0($2)
+; N32-NEXT:    lw $1, %got_disp(v8i16)($1)
+; N32-NEXT:    ld.h $w0, 0($1)
+; N32-NEXT:    addv.h $w0, $w0, $w0
+; N32-NEXT:    splat.h $w0, $w0[$2]
+; N32-NEXT:    mfc1 $1, $f0
+; N32-NEXT:    jr $ra
+; N32-NEXT:    srl $2, $1, 16
+;
+; N64-LABEL: extract_zext_v8i16_vidx:
+; N64:       # %bb.0:
+; N64-NEXT:    lui $1, %hi(%neg(%gp_rel(extract_zext_v8i16_vidx)))
+; N64-NEXT:    daddu $1, $1, $25
+; N64-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(extract_zext_v8i16_vidx)))
+; N64-NEXT:    ld $2, %got_disp(v8i16)($1)
+; N64-NEXT:    ld.h $w0, 0($2)
+; N64-NEXT:    addv.h $w0, $w0, $w0
+; N64-NEXT:    ld $1, %got_disp(i32)($1)
+; N64-NEXT:    lw $1, 0($1)
+; N64-NEXT:    splat.h $w0, $w0[$1]
+; N64-NEXT:    mfc1 $1, $f0
+; N64-NEXT:    jr $ra
+; N64-NEXT:    srl $2, $1, 16
   %1 = load <8 x i16>, <8 x i16>* @v8i16
-  ; O32-DAG: lw [[PTR_V:\$[0-9]+]], %got(v8i16)(
-  ; N32-DAG: lw [[PTR_V:\$[0-9]+]], %got_disp(v8i16)(
-  ; N64-DAG: ld [[PTR_V:\$[0-9]+]], %got_disp(v8i16)(
-  ; ALL-DAG: ld.h [[R1:\$w[0-9]+]], 0([[PTR_V]])
-
   %2 = add <8 x i16> %1, %1
-  ; ALL-DAG: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
-
   %3 = load i32, i32* @i32
-  ; O32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
-  ; N32-DAG: lw [[PTR_I:\$[0-9]+]], %got_disp(i32)(
-  ; N64-DAG: ld [[PTR_I:\$[0-9]+]], %got_disp(i32)(
-  ; ALL-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
-
   %4 = extractelement <8 x i16> %2, i32 %3
   %5 = zext i16 %4 to i32
-  ; ALL-DAG: splat.h $w[[R3:[0-9]+]], [[R1]]{{\[}}[[IDX]]]
-  ; ALL-DAG: mfc1 [[R5:\$[0-9]+]], $f[[R3]]
-  ; ALL-DAG: srl [[R6:\$[0-9]+]], [[R5]], 16
-
   ret i32 %5
 }
 
 define i32 @extract_zext_v4i32_vidx() nounwind {
-  ; ALL-LABEL: extract_zext_v4i32_vidx:
-
+; O32-LABEL: extract_zext_v4i32_vidx:
+; O32:       # %bb.0:
+; O32-NEXT:    lui $2, %hi(_gp_disp)
+; O32-NEXT:    addiu $2, $2, %lo(_gp_disp)
+; O32-NEXT:    addu $1, $2, $25
+; O32-NEXT:    lw $2, %got(i32)($1)
+; O32-NEXT:    lw $2, 0($2)
+; O32-NEXT:    lw $1, %got(v4i32)($1)
+; O32-NEXT:    ld.w $w0, 0($1)
+; O32-NEXT:    addv.w $w0, $w0, $w0
+; O32-NEXT:    splat.w $w0, $w0[$2]
+; O32-NEXT:    jr $ra
+; O32-NEXT:    mfc1 $2, $f0
+;
+; N32-LABEL: extract_zext_v4i32_vidx:
+; N32:       # %bb.0:
+; N32-NEXT:    lui $1, %hi(%neg(%gp_rel(extract_zext_v4i32_vidx)))
+; N32-NEXT:    addu $1, $1, $25
+; N32-NEXT:    addiu $1, $1, %lo(%neg(%gp_rel(extract_zext_v4i32_vidx)))
+; N32-NEXT:    lw $2, %got_disp(i32)($1)
+; N32-NEXT:    lw $2, 0($2)
+; N32-NEXT:    lw $1, %got_disp(v4i32)($1)
+; N32-NEXT:    ld.w $w0, 0($1)
+; N32-NEXT:    addv.w $w0, $w0, $w0
+; N32-NEXT:    splat.w $w0, $w0[$2]
+; N32-NEXT:    jr $ra
+; N32-NEXT:    mfc1 $2, $f0
+;
+; N64-LABEL: extract_zext_v4i32_vidx:
+; N64:       # %bb.0:
+; N64-NEXT:    lui $1, %hi(%neg(%gp_rel(extract_zext_v4i32_vidx)))
+; N64-NEXT:    daddu $1, $1, $25
+; N64-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(extract_zext_v4i32_vidx)))
+; N64-NEXT:    ld $2, %got_disp(v4i32)($1)
+; N64-NEXT:    ld.w $w0, 0($2)
+; N64-NEXT:    addv.w $w0, $w0, $w0
+; N64-NEXT:    ld $1, %got_disp(i32)($1)
+; N64-NEXT:    lw $1, 0($1)
+; N64-NEXT:    splat.w $w0, $w0[$1]
+; N64-NEXT:    jr $ra
+; N64-NEXT:    mfc1 $2, $f0
   %1 = load <4 x i32>, <4 x i32>* @v4i32
-  ; O32-DAG: lw [[PTR_V:\$[0-9]+]], %got(v4i32)(
-  ; N32-DAG: lw [[PTR_V:\$[0-9]+]], %got_disp(v4i32)(
-  ; N64-DAG: ld [[PTR_V:\$[0-9]+]], %got_disp(v4i32)(
-  ; ALL-DAG: ld.w [[R1:\$w[0-9]+]], 0([[PTR_V]])
-
   %2 = add <4 x i32> %1, %1
-  ; ALL-DAG: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
-
   %3 = load i32, i32* @i32
-  ; O32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
-  ; N32-DAG: lw [[PTR_I:\$[0-9]+]], %got_disp(i32)(
-  ; N64-DAG: ld [[PTR_I:\$[0-9]+]], %got_disp(i32)(
-  ; ALL-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
-
   %4 = extractelement <4 x i32> %2, i32 %3
-  ; ALL-DAG: splat.w $w[[R3:[0-9]+]], [[R1]]{{\[}}[[IDX]]]
-  ; ALL-DAG: mfc1 [[R5:\$[0-9]+]], $f[[R3]]
-  ; ALL-NOT: srl
-
   ret i32 %4
 }
 
 define i64 @extract_zext_v2i64_vidx() nounwind {
-  ; ALL-LABEL: extract_zext_v2i64_vidx:
-
+; O32-BE-LABEL: extract_zext_v2i64_vidx:
+; O32-BE:       # %bb.0:
+; O32-BE-NEXT:    lui $2, %hi(_gp_disp)
+; O32-BE-NEXT:    addiu $2, $2, %lo(_gp_disp)
+; O32-BE-NEXT:    addu $1, $2, $25
+; O32-BE-NEXT:    lw $2, %got(i32)($1)
+; O32-BE-NEXT:    lw $2, 0($2)
+; O32-BE-NEXT:    addu $2, $2, $2
+; O32-BE-NEXT:    addiu $3, $2, 1
+; O32-BE-NEXT:    lw $1, %got(v2i64)($1)
+; O32-BE-NEXT:    ld.d $w0, 0($1)
+; O32-BE-NEXT:    addv.d $w0, $w0, $w0
+; O32-BE-NEXT:    shf.w $w0, $w0, 177
+; O32-BE-NEXT:    splat.w $w1, $w0[$3]
+; O32-BE-NEXT:    mfc1 $3, $f1
+; O32-BE-NEXT:    splat.w $w0, $w0[$2]
+; O32-BE-NEXT:    jr $ra
+; O32-BE-NEXT:    mfc1 $2, $f0
+;
+; O32-LE-LABEL: extract_zext_v2i64_vidx:
+; O32-LE:       # %bb.0:
+; O32-LE-NEXT:    lui $2, %hi(_gp_disp)
+; O32-LE-NEXT:    addiu $2, $2, %lo(_gp_disp)
+; O32-LE-NEXT:    addu $1, $2, $25
+; O32-LE-NEXT:    lw $2, %got(i32)($1)
+; O32-LE-NEXT:    lw $2, 0($2)
+; O32-LE-NEXT:    addu $2, $2, $2
+; O32-LE-NEXT:    addiu $3, $2, 1
+; O32-LE-NEXT:    lw $1, %got(v2i64)($1)
+; O32-LE-NEXT:    ld.d $w0, 0($1)
+; O32-LE-NEXT:    addv.d $w0, $w0, $w0
+; O32-LE-NEXT:    splat.w $w1, $w0[$3]
+; O32-LE-NEXT:    mfc1 $3, $f1
+; O32-LE-NEXT:    splat.w $w0, $w0[$2]
+; O32-LE-NEXT:    jr $ra
+; O32-LE-NEXT:    mfc1 $2, $f0
+;
+; N32-LABEL: extract_zext_v2i64_vidx:
+; N32:       # %bb.0:
+; N32-NEXT:    lui $1, %hi(%neg(%gp_rel(extract_zext_v2i64_vidx)))
+; N32-NEXT:    addu $1, $1, $25
+; N32-NEXT:    addiu $1, $1, %lo(%neg(%gp_rel(extract_zext_v2i64_vidx)))
+; N32-NEXT:    lw $2, %got_disp(i32)($1)
+; N32-NEXT:    lw $2, 0($2)
+; N32-NEXT:    lw $1, %got_disp(v2i64)($1)
+; N32-NEXT:    ld.d $w0, 0($1)
+; N32-NEXT:    addv.d $w0, $w0, $w0
+; N32-NEXT:    splat.d $w0, $w0[$2]
+; N32-NEXT:    jr $ra
+; N32-NEXT:    dmfc1 $2, $f0
+;
+; N64-LABEL: extract_zext_v2i64_vidx:
+; N64:       # %bb.0:
+; N64-NEXT:    lui $1, %hi(%neg(%gp_rel(extract_zext_v2i64_vidx)))
+; N64-NEXT:    daddu $1, $1, $25
+; N64-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(extract_zext_v2i64_vidx)))
+; N64-NEXT:    ld $2, %got_disp(v2i64)($1)
+; N64-NEXT:    ld.d $w0, 0($2)
+; N64-NEXT:    addv.d $w0, $w0, $w0
+; N64-NEXT:    ld $1, %got_disp(i32)($1)
+; N64-NEXT:    lw $1, 0($1)
+; N64-NEXT:    splat.d $w0, $w0[$1]
+; N64-NEXT:    jr $ra
+; N64-NEXT:    dmfc1 $2, $f0
   %1 = load <2 x i64>, <2 x i64>* @v2i64
-  ; O32-DAG: lw [[PTR_V:\$[0-9]+]], %got(v2i64)(
-  ; N32-DAG: lw [[PTR_V:\$[0-9]+]], %got_disp(v2i64)(
-  ; N64-DAG: ld [[PTR_V:\$[0-9]+]], %got_disp(v2i64)(
-  ; ALL-DAG: ld.d [[R1:\$w[0-9]+]], 0([[PTR_V]])
-
   %2 = add <2 x i64> %1, %1
-  ; ALL-DAG: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
-
   %3 = load i32, i32* @i32
-  ; O32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
-  ; N32-DAG: lw [[PTR_I:\$[0-9]+]], %got_disp(i32)(
-  ; N64-DAG: ld [[PTR_I:\$[0-9]+]], %got_disp(i32)(
-  ; ALL-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
-  ; O32-DAG: addiu [[IDY:\$[0-9]+]], [[IDX]], 1
-
   %4 = extractelement <2 x i64> %2, i32 %3
-  ; MIPS32-DAG: splat.w $w[[R3:[0-9]+]], [[R1]]{{\[}}[[IDY]]]
-  ; MIPS32-DAG: mfc1 [[R5:\$[0-9]+]], $f[[R3]]
-  ; MIPS32-DAG: splat.w $w[[R4:[0-9]+]], [[R1]]{{\[}}[[IDX]]]
-  ; MIPS32-DAG: mfc1 [[R6:\$[0-9]+]], $f[[R4]]
-  ; MIPS64-DAG: splat.d $w[[R3:[0-9]+]], [[R1]]{{\[}}[[IDX]]]
-  ; MIPS64-DAG: dmfc1 [[R5:\$[0-9]+]], $f[[R3]]
-  ; ALL-NOT: srl
-
   ret i64 %4
 }
 
 define void @insert_v16i8(i32 signext %a) nounwind {
-  ; ALL-LABEL: insert_v16i8:
-
+; O32-LABEL: insert_v16i8:
+; O32:       # %bb.0:
+; O32-NEXT:    lui $2, %hi(_gp_disp)
+; O32-NEXT:    addiu $2, $2, %lo(_gp_disp)
+; O32-NEXT:    addu $1, $2, $25
+; O32-NEXT:    lw $1, %got(v16i8)($1)
+; O32-NEXT:    ld.b $w0, 0($1)
+; O32-NEXT:    insert.b $w0[1], $4
+; O32-NEXT:    jr $ra
+; O32-NEXT:    st.b $w0, 0($1)
+;
+; N32-LABEL: insert_v16i8:
+; N32:       # %bb.0:
+; N32-NEXT:    lui $1, %hi(%neg(%gp_rel(insert_v16i8)))
+; N32-NEXT:    addu $1, $1, $25
+; N32-NEXT:    addiu $1, $1, %lo(%neg(%gp_rel(insert_v16i8)))
+; N32-NEXT:    lw $1, %got_disp(v16i8)($1)
+; N32-NEXT:    ld.b $w0, 0($1)
+; N32-NEXT:    insert.b $w0[1], $4
+; N32-NEXT:    jr $ra
+; N32-NEXT:    st.b $w0, 0($1)
+;
+; N64-LABEL: insert_v16i8:
+; N64:       # %bb.0:
+; N64-NEXT:    lui $1, %hi(%neg(%gp_rel(insert_v16i8)))
+; N64-NEXT:    daddu $1, $1, $25
+; N64-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(insert_v16i8)))
+; N64-NEXT:    ld $1, %got_disp(v16i8)($1)
+; N64-NEXT:    ld.b $w0, 0($1)
+; N64-NEXT:    insert.b $w0[1], $4
+; N64-NEXT:    jr $ra
+; N64-NEXT:    st.b $w0, 0($1)
   %1 = load <16 x i8>, <16 x i8>* @v16i8
-  ; ALL-DAG: ld.b [[R1:\$w[0-9]+]],
-
   %a2 = trunc i32 %a to i8
   %a3 = sext i8 %a2 to i32
   %a4 = trunc i32 %a3 to i8
-  ; ALL-NOT: andi
-  ; ALL-NOT: sra
-
   %2 = insertelement <16 x i8> %1, i8 %a4, i32 1
-  ; ALL-DAG: insert.b [[R1]][1], $4
-
   store <16 x i8> %2, <16 x i8>* @v16i8
-  ; ALL-DAG: st.b [[R1]]
-
   ret void
 }
 
 define void @insert_v8i16(i32 signext %a) nounwind {
-  ; ALL-LABEL: insert_v8i16:
-
+; O32-LABEL: insert_v8i16:
+; O32:       # %bb.0:
+; O32-NEXT:    lui $2, %hi(_gp_disp)
+; O32-NEXT:    addiu $2, $2, %lo(_gp_disp)
+; O32-NEXT:    addu $1, $2, $25
+; O32-NEXT:    lw $1, %got(v8i16)($1)
+; O32-NEXT:    ld.h $w0, 0($1)
+; O32-NEXT:    insert.h $w0[1], $4
+; O32-NEXT:    jr $ra
+; O32-NEXT:    st.h $w0, 0($1)
+;
+; N32-LABEL: insert_v8i16:
+; N32:       # %bb.0:
+; N32-NEXT:    lui $1, %hi(%neg(%gp_rel(insert_v8i16)))
+; N32-NEXT:    addu $1, $1, $25
+; N32-NEXT:    addiu $1, $1, %lo(%neg(%gp_rel(insert_v8i16)))
+; N32-NEXT:    lw $1, %got_disp(v8i16)($1)
+; N32-NEXT:    ld.h $w0, 0($1)
+; N32-NEXT:    insert.h $w0[1], $4
+; N32-NEXT:    jr $ra
+; N32-NEXT:    st.h $w0, 0($1)
+;
+; N64-LABEL: insert_v8i16:
+; N64:       # %bb.0:
+; N64-NEXT:    lui $1, %hi(%neg(%gp_rel(insert_v8i16)))
+; N64-NEXT:    daddu $1, $1, $25
+; N64-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(insert_v8i16)))
+; N64-NEXT:    ld $1, %got_disp(v8i16)($1)
+; N64-NEXT:    ld.h $w0, 0($1)
+; N64-NEXT:    insert.h $w0[1], $4
+; N64-NEXT:    jr $ra
+; N64-NEXT:    st.h $w0, 0($1)
   %1 = load <8 x i16>, <8 x i16>* @v8i16
-  ; ALL-DAG: ld.h [[R1:\$w[0-9]+]],
-
   %a2 = trunc i32 %a to i16
   %a3 = sext i16 %a2 to i32
   %a4 = trunc i32 %a3 to i16
-  ; ALL-NOT: andi
-  ; ALL-NOT: sra
-
   %2 = insertelement <8 x i16> %1, i16 %a4, i32 1
-  ; ALL-DAG: insert.h [[R1]][1], $4
-
   store <8 x i16> %2, <8 x i16>* @v8i16
-  ; ALL-DAG: st.h [[R1]]
-
   ret void
 }
 
 define void @insert_v4i32(i32 signext %a) nounwind {
-  ; ALL-LABEL: insert_v4i32:
-
+; O32-LABEL: insert_v4i32:
+; O32:       # %bb.0:
+; O32-NEXT:    lui $2, %hi(_gp_disp)
+; O32-NEXT:    addiu $2, $2, %lo(_gp_disp)
+; O32-NEXT:    addu $1, $2, $25
+; O32-NEXT:    lw $1, %got(v4i32)($1)
+; O32-NEXT:    ld.w $w0, 0($1)
+; O32-NEXT:    insert.w $w0[1], $4
+; O32-NEXT:    jr $ra
+; O32-NEXT:    st.w $w0, 0($1)
+;
+; N32-LABEL: insert_v4i32:
+; N32:       # %bb.0:
+; N32-NEXT:    lui $1, %hi(%neg(%gp_rel(insert_v4i32)))
+; N32-NEXT:    addu $1, $1, $25
+; N32-NEXT:    addiu $1, $1, %lo(%neg(%gp_rel(insert_v4i32)))
+; N32-NEXT:    lw $1, %got_disp(v4i32)($1)
+; N32-NEXT:    ld.w $w0, 0($1)
+; N32-NEXT:    insert.w $w0[1], $4
+; N32-NEXT:    jr $ra
+; N32-NEXT:    st.w $w0, 0($1)
+;
+; N64-LABEL: insert_v4i32:
+; N64:       # %bb.0:
+; N64-NEXT:    lui $1, %hi(%neg(%gp_rel(insert_v4i32)))
+; N64-NEXT:    daddu $1, $1, $25
+; N64-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(insert_v4i32)))
+; N64-NEXT:    ld $1, %got_disp(v4i32)($1)
+; N64-NEXT:    ld.w $w0, 0($1)
+; N64-NEXT:    insert.w $w0[1], $4
+; N64-NEXT:    jr $ra
+; N64-NEXT:    st.w $w0, 0($1)
   %1 = load <4 x i32>, <4 x i32>* @v4i32
-  ; ALL-DAG: ld.w [[R1:\$w[0-9]+]],
-
-  ; ALL-NOT: andi
-  ; ALL-NOT: sra
-
   %2 = insertelement <4 x i32> %1, i32 %a, i32 1
-  ; ALL-DAG: insert.w [[R1]][1], $4
-
   store <4 x i32> %2, <4 x i32>* @v4i32
-  ; ALL-DAG: st.w [[R1]]
-
   ret void
 }
-
 define void @insert_v2i64(i64 signext %a) nounwind {
-  ; ALL-LABEL: insert_v2i64:
-
+; O32-LABEL: insert_v2i64:
+; O32:       # %bb.0:
+; O32-NEXT:    lui $2, %hi(_gp_disp)
+; O32-NEXT:    addiu $2, $2, %lo(_gp_disp)
+; O32-NEXT:    addu $1, $2, $25
+; O32-NEXT:    lw $1, %got(v2i64)($1)
+; O32-NEXT:    ld.w $w0, 0($1)
+; O32-NEXT:    insert.w $w0[2], $4
+; O32-NEXT:    insert.w $w0[3], $5
+; O32-NEXT:    jr $ra
+; O32-NEXT:    st.w $w0, 0($1)
+;
+; N32-LABEL: insert_v2i64:
+; N32:       # %bb.0:
+; N32-NEXT:    lui $1, %hi(%neg(%gp_rel(insert_v2i64)))
+; N32-NEXT:    addu $1, $1, $25
+; N32-NEXT:    addiu $1, $1, %lo(%neg(%gp_rel(insert_v2i64)))
+; N32-NEXT:    lw $1, %got_disp(v2i64)($1)
+; N32-NEXT:    ld.d $w0, 0($1)
+; N32-NEXT:    insert.d $w0[1], $4
+; N32-NEXT:    jr $ra
+; N32-NEXT:    st.d $w0, 0($1)
+;
+; N64-LABEL: insert_v2i64:
+; N64:       # %bb.0:
+; N64-NEXT:    lui $1, %hi(%neg(%gp_rel(insert_v2i64)))
+; N64-NEXT:    daddu $1, $1, $25
+; N64-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(insert_v2i64)))
+; N64-NEXT:    ld $1, %got_disp(v2i64)($1)
+; N64-NEXT:    ld.d $w0, 0($1)
+; N64-NEXT:    insert.d $w0[1], $4
+; N64-NEXT:    jr $ra
+; N64-NEXT:    st.d $w0, 0($1)
   %1 = load <2 x i64>, <2 x i64>* @v2i64
-  ; MIPS32-DAG: ld.w [[R1:\$w[0-9]+]],
-  ; MIPS64-DAG: ld.d [[R1:\$w[0-9]+]],
-
-  ; ALL-NOT: andi
-  ; ALL-NOT: sra
-
   %2 = insertelement <2 x i64> %1, i64 %a, i32 1
-  ; MIPS32-DAG: insert.w [[R1]][2], $4
-  ; MIPS32-DAG: insert.w [[R1]][3], $5
-  ; MIPS64-DAG: insert.d [[R1]][1], $4
-
   store <2 x i64> %2, <2 x i64>* @v2i64
-  ; MIPS32-DAG: st.w [[R1]]
-  ; MIPS64-DAG: st.d [[R1]]
-
   ret void
 }
 
 define void @insert_v16i8_vidx(i32 signext %a) nounwind {
-  ; ALL-LABEL: insert_v16i8_vidx:
-
+; O32-LABEL: insert_v16i8_vidx:
+; O32:       # %bb.0:
+; O32-NEXT:    lui $2, %hi(_gp_disp)
+; O32-NEXT:    addiu $2, $2, %lo(_gp_disp)
+; O32-NEXT:    addu $1, $2, $25
+; O32-NEXT:    lw $2, %got(i32)($1)
+; O32-NEXT:    lw $2, 0($2)
+; O32-NEXT:    lw $1, %got(v16i8)($1)
+; O32-NEXT:    ld.b $w0, 0($1)
+; O32-NEXT:    sld.b $w0, $w0[$2]
+; O32-NEXT:    insert.b $w0[0], $4
+; O32-NEXT:    neg $2, $2
+; O32-NEXT:    sld.b $w0, $w0[$2]
+; O32-NEXT:    jr $ra
+; O32-NEXT:    st.b $w0, 0($1)
+;
+; N32-LABEL: insert_v16i8_vidx:
+; N32:       # %bb.0:
+; N32-NEXT:    lui $1, %hi(%neg(%gp_rel(insert_v16i8_vidx)))
+; N32-NEXT:    addu $1, $1, $25
+; N32-NEXT:    addiu $1, $1, %lo(%neg(%gp_rel(insert_v16i8_vidx)))
+; N32-NEXT:    lw $2, %got_disp(i32)($1)
+; N32-NEXT:    lw $2, 0($2)
+; N32-NEXT:    lw $1, %got_disp(v16i8)($1)
+; N32-NEXT:    ld.b $w0, 0($1)
+; N32-NEXT:    sld.b $w0, $w0[$2]
+; N32-NEXT:    insert.b $w0[0], $4
+; N32-NEXT:    neg $2, $2
+; N32-NEXT:    sld.b $w0, $w0[$2]
+; N32-NEXT:    jr $ra
+; N32-NEXT:    st.b $w0, 0($1)
+;
+; N64-LABEL: insert_v16i8_vidx:
+; N64:       # %bb.0:
+; N64-NEXT:    lui $1, %hi(%neg(%gp_rel(insert_v16i8_vidx)))
+; N64-NEXT:    daddu $1, $1, $25
+; N64-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(insert_v16i8_vidx)))
+; N64-NEXT:    ld $2, %got_disp(i32)($1)
+; N64-NEXT:    lw $2, 0($2)
+; N64-NEXT:    ld $1, %got_disp(v16i8)($1)
+; N64-NEXT:    ld.b $w0, 0($1)
+; N64-NEXT:    sld.b $w0, $w0[$2]
+; N64-NEXT:    insert.b $w0[0], $4
+; N64-NEXT:    dneg $2, $2
+; N64-NEXT:    sld.b $w0, $w0[$2]
+; N64-NEXT:    jr $ra
+; N64-NEXT:    st.b $w0, 0($1)
   %1 = load <16 x i8>, <16 x i8>* @v16i8
-  ; ALL-DAG: ld.b [[R1:\$w[0-9]+]],
-
   %2 = load i32, i32* @i32
-  ; O32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
-  ; N32-DAG: lw [[PTR_I:\$[0-9]+]], %got_disp(i32)(
-  ; N64-DAG: ld [[PTR_I:\$[0-9]+]], %got_disp(i32)(
-  ; ALL-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
-
   %a2 = trunc i32 %a to i8
   %a3 = sext i8 %a2 to i32
   %a4 = trunc i32 %a3 to i8
-  ; ALL-NOT: andi
-  ; ALL-NOT: sra
-
   %3 = insertelement <16 x i8> %1, i8 %a4, i32 %2
-  ; ALL-DAG: sld.b [[R1]], [[R1]]{{\[}}[[IDX]]]
-  ; ALL-DAG: insert.b [[R1]][0], $4
-  ; O32-DAG: neg [[NIDX:\$[0-9]+]], [[IDX]]
-  ; N32-DAG: neg [[NIDX:\$[0-9]+]], [[IDX]]
-  ; N64-DAG: dneg [[NIDX:\$[0-9]+]], [[IDX]]
-  ; ALL-DAG: sld.b [[R1]], [[R1]]{{\[}}[[NIDX]]]
-
   store <16 x i8> %3, <16 x i8>* @v16i8
-  ; ALL-DAG: st.b [[R1]]
-
   ret void
 }
 
 define void @insert_v8i16_vidx(i32 signext %a) nounwind {
-  ; ALL-LABEL: insert_v8i16_vidx:
-
+; O32-LABEL: insert_v8i16_vidx:
+; O32:       # %bb.0:
+; O32-NEXT:    lui $2, %hi(_gp_disp)
+; O32-NEXT:    addiu $2, $2, %lo(_gp_disp)
+; O32-NEXT:    addu $1, $2, $25
+; O32-NEXT:    lw $2, %got(i32)($1)
+; O32-NEXT:    lw $2, 0($2)
+; O32-NEXT:    lw $1, %got(v8i16)($1)
+; O32-NEXT:    ld.h $w0, 0($1)
+; O32-NEXT:    sll $2, $2, 1
+; O32-NEXT:    sld.b $w0, $w0[$2]
+; O32-NEXT:    insert.h $w0[0], $4
+; O32-NEXT:    neg $2, $2
+; O32-NEXT:    sld.b $w0, $w0[$2]
+; O32-NEXT:    jr $ra
+; O32-NEXT:    st.h $w0, 0($1)
+;
+; N32-LABEL: insert_v8i16_vidx:
+; N32:       # %bb.0:
+; N32-NEXT:    lui $1, %hi(%neg(%gp_rel(insert_v8i16_vidx)))
+; N32-NEXT:    addu $1, $1, $25
+; N32-NEXT:    addiu $1, $1, %lo(%neg(%gp_rel(insert_v8i16_vidx)))
+; N32-NEXT:    lw $2, %got_disp(i32)($1)
+; N32-NEXT:    lw $2, 0($2)
+; N32-NEXT:    lw $1, %got_disp(v8i16)($1)
+; N32-NEXT:    ld.h $w0, 0($1)
+; N32-NEXT:    sll $2, $2, 1
+; N32-NEXT:    sld.b $w0, $w0[$2]
+; N32-NEXT:    insert.h $w0[0], $4
+; N32-NEXT:    neg $2, $2
+; N32-NEXT:    sld.b $w0, $w0[$2]
+; N32-NEXT:    jr $ra
+; N32-NEXT:    st.h $w0, 0($1)
+;
+; N64-LABEL: insert_v8i16_vidx:
+; N64:       # %bb.0:
+; N64-NEXT:    lui $1, %hi(%neg(%gp_rel(insert_v8i16_vidx)))
+; N64-NEXT:    daddu $1, $1, $25
+; N64-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(insert_v8i16_vidx)))
+; N64-NEXT:    ld $2, %got_disp(i32)($1)
+; N64-NEXT:    lw $2, 0($2)
+; N64-NEXT:    ld $1, %got_disp(v8i16)($1)
+; N64-NEXT:    ld.h $w0, 0($1)
+; N64-NEXT:    dsll $2, $2, 1
+; N64-NEXT:    sld.b $w0, $w0[$2]
+; N64-NEXT:    insert.h $w0[0], $4
+; N64-NEXT:    dneg $2, $2
+; N64-NEXT:    sld.b $w0, $w0[$2]
+; N64-NEXT:    jr $ra
+; N64-NEXT:    st.h $w0, 0($1)
   %1 = load <8 x i16>, <8 x i16>* @v8i16
-  ; ALL-DAG: ld.h [[R1:\$w[0-9]+]],
-
   %2 = load i32, i32* @i32
-  ; O32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
-  ; N32-DAG: lw [[PTR_I:\$[0-9]+]], %got_disp(i32)(
-  ; N64-DAG: ld [[PTR_I:\$[0-9]+]], %got_disp(i32)(
-  ; ALL-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
-
   %a2 = trunc i32 %a to i16
   %a3 = sext i16 %a2 to i32
   %a4 = trunc i32 %a3 to i16
-  ; ALL-NOT: andi
-  ; ALL-NOT: sra
-
   %3 = insertelement <8 x i16> %1, i16 %a4, i32 %2
-  ; ALL-DAG: sll [[BIDX:\$[0-9]+]], [[IDX]], 1
-  ; ALL-DAG: sld.b [[R1]], [[R1]]{{\[}}[[BIDX]]]
-  ; ALL-DAG: insert.h [[R1]][0], $4
-  ; O32-DAG: neg [[NIDX:\$[0-9]+]], [[BIDX]]
-  ; N32-DAG: neg [[NIDX:\$[0-9]+]], [[BIDX]]
-  ; N64-DAG: dneg [[NIDX:\$[0-9]+]], [[BIDX]]
-  ; ALL-DAG: sld.b [[R1]], [[R1]]{{\[}}[[NIDX]]]
-
   store <8 x i16> %3, <8 x i16>* @v8i16
-  ; ALL-DAG: st.h [[R1]]
-
   ret void
 }
 
 define void @insert_v4i32_vidx(i32 signext %a) nounwind {
-  ; ALL-LABEL: insert_v4i32_vidx:
-
+; O32-LABEL: insert_v4i32_vidx:
+; O32:       # %bb.0:
+; O32-NEXT:    lui $2, %hi(_gp_disp)
+; O32-NEXT:    addiu $2, $2, %lo(_gp_disp)
+; O32-NEXT:    addu $1, $2, $25
+; O32-NEXT:    lw $2, %got(i32)($1)
+; O32-NEXT:    lw $2, 0($2)
+; O32-NEXT:    lw $1, %got(v4i32)($1)
+; O32-NEXT:    ld.w $w0, 0($1)
+; O32-NEXT:    sll $2, $2, 2
+; O32-NEXT:    sld.b $w0, $w0[$2]
+; O32-NEXT:    insert.w $w0[0], $4
+; O32-NEXT:    neg $2, $2
+; O32-NEXT:    sld.b $w0, $w0[$2]
+; O32-NEXT:    jr $ra
+; O32-NEXT:    st.w $w0, 0($1)
+;
+; N32-LABEL: insert_v4i32_vidx:
+; N32:       # %bb.0:
+; N32-NEXT:    lui $1, %hi(%neg(%gp_rel(insert_v4i32_vidx)))
+; N32-NEXT:    addu $1, $1, $25
+; N32-NEXT:    addiu $1, $1, %lo(%neg(%gp_rel(insert_v4i32_vidx)))
+; N32-NEXT:    lw $2, %got_disp(i32)($1)
+; N32-NEXT:    lw $2, 0($2)
+; N32-NEXT:    lw $1, %got_disp(v4i32)($1)
+; N32-NEXT:    ld.w $w0, 0($1)
+; N32-NEXT:    sll $2, $2, 2
+; N32-NEXT:    sld.b $w0, $w0[$2]
+; N32-NEXT:    insert.w $w0[0], $4
+; N32-NEXT:    neg $2, $2
+; N32-NEXT:    sld.b $w0, $w0[$2]
+; N32-NEXT:    jr $ra
+; N32-NEXT:    st.w $w0, 0($1)
+;
+; N64-LABEL: insert_v4i32_vidx:
+; N64:       # %bb.0:
+; N64-NEXT:    lui $1, %hi(%neg(%gp_rel(insert_v4i32_vidx)))
+; N64-NEXT:    daddu $1, $1, $25
+; N64-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(insert_v4i32_vidx)))
+; N64-NEXT:    ld $2, %got_disp(i32)($1)
+; N64-NEXT:    lw $2, 0($2)
+; N64-NEXT:    ld $1, %got_disp(v4i32)($1)
+; N64-NEXT:    ld.w $w0, 0($1)
+; N64-NEXT:    dsll $2, $2, 2
+; N64-NEXT:    sld.b $w0, $w0[$2]
+; N64-NEXT:    insert.w $w0[0], $4
+; N64-NEXT:    dneg $2, $2
+; N64-NEXT:    sld.b $w0, $w0[$2]
+; N64-NEXT:    jr $ra
+; N64-NEXT:    st.w $w0, 0($1)
   %1 = load <4 x i32>, <4 x i32>* @v4i32
-  ; ALL-DAG: ld.w [[R1:\$w[0-9]+]],
-
   %2 = load i32, i32* @i32
-  ; O32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
-  ; N32-DAG: lw [[PTR_I:\$[0-9]+]], %got_disp(i32)(
-  ; N64-DAG: ld [[PTR_I:\$[0-9]+]], %got_disp(i32)(
-  ; ALL-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
-
-  ; ALL-NOT: andi
-  ; ALL-NOT: sra
-
   %3 = insertelement <4 x i32> %1, i32 %a, i32 %2
-  ; ALL-DAG: sll [[BIDX:\$[0-9]+]], [[IDX]], 2
-  ; ALL-DAG: sld.b [[R1]], [[R1]]{{\[}}[[BIDX]]]
-  ; ALL-DAG: insert.w [[R1]][0], $4
-  ; O32-DAG: neg [[NIDX:\$[0-9]+]], [[BIDX]]
-  ; N32-DAG: neg [[NIDX:\$[0-9]+]], [[BIDX]]
-  ; N64-DAG: dneg [[NIDX:\$[0-9]+]], [[BIDX]]
-  ; ALL-DAG: sld.b [[R1]], [[R1]]{{\[}}[[NIDX]]]
-
   store <4 x i32> %3, <4 x i32>* @v4i32
-  ; ALL-DAG: st.w [[R1]]
-
   ret void
 }
 
+; TODO: This code could be a lot better but it works. The legalizer splits
+; 64-bit inserts into two 32-bit inserts because there is no i64 type on
+; MIPS32. The obvious optimisation is to perform both insert.w's at once while
+; the vector is rotated.
 define void @insert_v2i64_vidx(i64 signext %a) nounwind {
-  ; ALL-LABEL: insert_v2i64_vidx:
-
+; O32-LABEL: insert_v2i64_vidx:
+; O32:       # %bb.0:
+; O32-NEXT:    lui $2, %hi(_gp_disp)
+; O32-NEXT:    addiu $2, $2, %lo(_gp_disp)
+; O32-NEXT:    addu $1, $2, $25
+; O32-NEXT:    lw $2, %got(i32)($1)
+; O32-NEXT:    lw $2, 0($2)
+; O32-NEXT:    addu $2, $2, $2
+; O32-NEXT:    lw $1, %got(v2i64)($1)
+; O32-NEXT:    ld.w $w0, 0($1)
+; O32-NEXT:    sll $3, $2, 2
+; O32-NEXT:    sld.b $w0, $w0[$3]
+; O32-NEXT:    insert.w $w0[0], $4
+; O32-NEXT:    neg $3, $3
+; O32-NEXT:    sld.b $w0, $w0[$3]
+; O32-NEXT:    addiu $2, $2, 1
+; O32-NEXT:    sll $2, $2, 2
+; O32-NEXT:    sld.b $w0, $w0[$2]
+; O32-NEXT:    insert.w $w0[0], $5
+; O32-NEXT:    neg $2, $2
+; O32-NEXT:    sld.b $w0, $w0[$2]
+; O32-NEXT:    jr $ra
+; O32-NEXT:    st.w $w0, 0($1)
+;
+; N32-LABEL: insert_v2i64_vidx:
+; N32:       # %bb.0:
+; N32-NEXT:    lui $1, %hi(%neg(%gp_rel(insert_v2i64_vidx)))
+; N32-NEXT:    addu $1, $1, $25
+; N32-NEXT:    addiu $1, $1, %lo(%neg(%gp_rel(insert_v2i64_vidx)))
+; N32-NEXT:    lw $2, %got_disp(i32)($1)
+; N32-NEXT:    lw $2, 0($2)
+; N32-NEXT:    lw $1, %got_disp(v2i64)($1)
+; N32-NEXT:    ld.d $w0, 0($1)
+; N32-NEXT:    sll $2, $2, 3
+; N32-NEXT:    sld.b $w0, $w0[$2]
+; N32-NEXT:    insert.d $w0[0], $4
+; N32-NEXT:    neg $2, $2
+; N32-NEXT:    sld.b $w0, $w0[$2]
+; N32-NEXT:    jr $ra
+; N32-NEXT:    st.d $w0, 0($1)
+;
+; N64-LABEL: insert_v2i64_vidx:
+; N64:       # %bb.0:
+; N64-NEXT:    lui $1, %hi(%neg(%gp_rel(insert_v2i64_vidx)))
+; N64-NEXT:    daddu $1, $1, $25
+; N64-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(insert_v2i64_vidx)))
+; N64-NEXT:    ld $2, %got_disp(i32)($1)
+; N64-NEXT:    lw $2, 0($2)
+; N64-NEXT:    ld $1, %got_disp(v2i64)($1)
+; N64-NEXT:    ld.d $w0, 0($1)
+; N64-NEXT:    dsll $2, $2, 3
+; N64-NEXT:    sld.b $w0, $w0[$2]
+; N64-NEXT:    insert.d $w0[0], $4
+; N64-NEXT:    dneg $2, $2
+; N64-NEXT:    sld.b $w0, $w0[$2]
+; N64-NEXT:    jr $ra
+; N64-NEXT:    st.d $w0, 0($1)
   %1 = load <2 x i64>, <2 x i64>* @v2i64
-  ; MIPS32-DAG: ld.w [[R1:\$w[0-9]+]],
-  ; MIPS64-DAG: ld.d [[R1:\$w[0-9]+]],
-
   %2 = load i32, i32* @i32
-  ; O32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
-  ; N32-DAG: lw [[PTR_I:\$[0-9]+]], %got_disp(i32)(
-  ; N64-DAG: ld [[PTR_I:\$[0-9]+]], %got_disp(i32)(
-  ; ALL-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
-
-  ; ALL-NOT: andi
-  ; ALL-NOT: sra
-
   %3 = insertelement <2 x i64> %1, i64 %a, i32 %2
-  ; TODO: This code could be a lot better but it works. The legalizer splits
-  ; 64-bit inserts into two 32-bit inserts because there is no i64 type on
-  ; MIPS32. The obvious optimisation is to perform both insert.w's at once while
-  ; the vector is rotated.
-  ; MIPS32-DAG: sll [[BIDX:\$[0-9]+]], [[IDX]], 2
-  ; MIPS32-DAG: sld.b [[R1]], [[R1]]{{\[}}[[BIDX]]]
-  ; MIPS32-DAG: insert.w [[R1]][0], $4
-  ; MIPS32-DAG: neg [[NIDX:\$[0-9]+]], [[BIDX]]
-  ; MIPS32-DAG: sld.b [[R1]], [[R1]]{{\[}}[[NIDX]]]
-  ; MIPS32-DAG: addiu [[IDX2:\$[0-9]+]], [[IDX]], 1
-  ; MIPS32-DAG: sll [[BIDX:\$[0-9]+]], [[IDX2]], 2
-  ; MIPS32-DAG: sld.b [[R1]], [[R1]]{{\[}}[[BIDX]]]
-  ; MIPS32-DAG: insert.w [[R1]][0], $5
-  ; MIPS32-DAG: neg [[NIDX:\$[0-9]+]], [[BIDX]]
-  ; MIPS32-DAG: sld.b [[R1]], [[R1]]{{\[}}[[NIDX]]]
-
-  ; MIPS64-DAG: sll [[BIDX:\$[0-9]+]], [[IDX]], 3
-  ; MIPS64-DAG: sld.b [[R1]], [[R1]]{{\[}}[[BIDX]]]
-  ; MIPS64-DAG: insert.d [[R1]][0], $4
-  ; N32-DAG: neg [[NIDX:\$[0-9]+]], [[BIDX]]
-  ; N64-DAG: dneg [[NIDX:\$[0-9]+]], [[BIDX]]
-  ; MIPS64-DAG: sld.b [[R1]], [[R1]]{{\[}}[[NIDX]]]
-
   store <2 x i64> %3, <2 x i64>* @v2i64
-  ; MIPS32-DAG: st.w [[R1]]
-  ; MIPS64-DAG: st.d [[R1]]
-
   ret void
 }
 
+; TODO: What code should be emitted?
 define void @truncstore() nounwind {
-  ; ALL-LABEL: truncstore:
-
+; O32-LABEL: truncstore:
+; O32:       # %bb.0:
+; O32-NEXT:    lui $2, %hi(_gp_disp)
+; O32-NEXT:    addiu $2, $2, %lo(_gp_disp)
+; O32-NEXT:    addu $1, $2, $25
+; O32-NEXT:    lw $1, %got(v4i8)($1)
+; O32-NEXT:    addiu $2, $zero, 255
+; O32-NEXT:    sb $2, 3($1)
+; O32-NEXT:    sb $2, 2($1)
+; O32-NEXT:    sb $2, 1($1)
+; O32-NEXT:    jr $ra
+; O32-NEXT:    sb $2, 0($1)
+;
+; N32-LABEL: truncstore:
+; N32:       # %bb.0:
+; N32-NEXT:    lui $1, %hi(%neg(%gp_rel(truncstore)))
+; N32-NEXT:    addu $1, $1, $25
+; N32-NEXT:    addiu $1, $1, %lo(%neg(%gp_rel(truncstore)))
+; N32-NEXT:    lw $1, %got_disp(v4i8)($1)
+; N32-NEXT:    addiu $2, $zero, 255
+; N32-NEXT:    sb $2, 3($1)
+; N32-NEXT:    sb $2, 2($1)
+; N32-NEXT:    sb $2, 1($1)
+; N32-NEXT:    jr $ra
+; N32-NEXT:    sb $2, 0($1)
+;
+; N64-LABEL: truncstore:
+; N64:       # %bb.0:
+; N64-NEXT:    lui $1, %hi(%neg(%gp_rel(truncstore)))
+; N64-NEXT:    daddu $1, $1, $25
+; N64-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(truncstore)))
+; N64-NEXT:    ld $1, %got_disp(v4i8)($1)
+; N64-NEXT:    addiu $2, $zero, 255
+; N64-NEXT:    sb $2, 3($1)
+; N64-NEXT:    sb $2, 2($1)
+; N64-NEXT:    sb $2, 1($1)
+; N64-NEXT:    jr $ra
+; N64-NEXT:    sb $2, 0($1)
   store volatile <4 x i8> <i8 -1, i8 -1, i8 -1, i8 -1>, <4 x i8>*@v4i8
-  ; TODO: What code should be emitted?
-
   ret void
 }


        


More information about the llvm-commits mailing list