[llvm] cfe69c8 - [SelectionDAG] Improve scalarization of irregular vector types

via llvm-commits llvm-commits at lists.llvm.org
Thu Mar 11 10:57:21 PST 2021


Author: LemonBoy
Date: 2021-03-11T19:57:13+01:00
New Revision: cfe69c8efd1c7f6a584e07230a7fc49c93e34cbe

URL: https://github.com/llvm/llvm-project/commit/cfe69c8efd1c7f6a584e07230a7fc49c93e34cbe
DIFF: https://github.com/llvm/llvm-project/commit/cfe69c8efd1c7f6a584e07230a7fc49c93e34cbe.diff

LOG: [SelectionDAG] Improve scalarization of irregular vector types

Use a more general strategy when splitting a vector into scalar parts (and vice-versa) to correctly handle vector types whose element size is not a power of 2 (and a multiple of 8).

Reviewed By: atanasyan

Differential Revision: https://reviews.llvm.org/D98273

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
    llvm/test/CodeGen/Mips/cconv/vector.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index eb5a9676ecc3..ffd4778a4a42 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -436,14 +436,11 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
      if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits()) {
        return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
      } else if (ValueVT.bitsLT(PartEVT)) {
-       // Bitcast Val back the original type and extract the corresponding
-       // vector we want.
-       unsigned Elts = PartEVT.getSizeInBits() / ValueVT.getScalarSizeInBits();
-       EVT WiderVecType = EVT::getVectorVT(*DAG.getContext(),
-                                           ValueVT.getVectorElementType(), Elts);
-       Val = DAG.getBitcast(WiderVecType, Val);
-       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
-                          DAG.getVectorIdxConstant(0, DL));
+       const uint64_t ValueSize = ValueVT.getFixedSizeInBits();
+       EVT IntermediateType = EVT::getIntegerVT(*DAG.getContext(), ValueSize);
+       // Drop the extra bits.
+       Val = DAG.getNode(ISD::TRUNCATE, DL, IntermediateType, Val);
+       return DAG.getBitcast(ValueVT, Val);
      }
 
      diagnosePossiblyInvalidConstraint(
@@ -714,13 +711,25 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
 
   EVT BuiltVectorTy = EVT::getVectorVT(
       *DAG.getContext(), IntermediateVT.getScalarType(), DestEltCnt.getValue());
-  if (ValueVT != BuiltVectorTy) {
-    if (SDValue Widened = widenVectorToPartType(DAG, Val, DL, BuiltVectorTy))
-      Val = Widened;
 
+  if (ValueVT == BuiltVectorTy) {
+    // Nothing to do.
+  } else if (ValueVT.getSizeInBits() == BuiltVectorTy.getSizeInBits()) {
+    // Bitconvert vector->vector case.
     Val = DAG.getNode(ISD::BITCAST, DL, BuiltVectorTy, Val);
+  } else if (SDValue Widened =
+                 widenVectorToPartType(DAG, Val, DL, BuiltVectorTy)) {
+    Val = Widened;
+  } else if (BuiltVectorTy.getVectorElementType().bitsGE(
+                 ValueVT.getVectorElementType()) &&
+             BuiltVectorTy.getVectorElementCount() ==
+                 ValueVT.getVectorElementCount()) {
+    // Promoted vector extract
+    Val = DAG.getAnyExtOrTrunc(Val, DL, BuiltVectorTy);
   }
 
+  assert(Val.getValueType() == BuiltVectorTy && "Unexpected vector value type");
+
   // Split the vector into intermediate operands.
   SmallVector<SDValue, 8> Ops(NumIntermediates);
   for (unsigned i = 0; i != NumIntermediates; ++i) {

diff  --git a/llvm/test/CodeGen/Mips/cconv/vector.ll b/llvm/test/CodeGen/Mips/cconv/vector.ll
index ce86ba611822..723a71b52fed 100644
--- a/llvm/test/CodeGen/Mips/cconv/vector.ll
+++ b/llvm/test/CodeGen/Mips/cconv/vector.ll
@@ -16,37 +16,35 @@
 ; First set of tests are for argument passing.
 
 define <2 x i8> @i8_2(<2 x i8> %a, <2 x i8> %b) {
-; MIPS32EB-LABEL: i8_2:
-; MIPS32EB:       # %bb.0:
-; MIPS32EB-NEXT:    srl $1, $5, 24
-; MIPS32EB-NEXT:    srl $2, $4, 24
-; MIPS32EB-NEXT:    addu $1, $2, $1
-; MIPS32EB-NEXT:    sll $1, $1, 8
-; MIPS32EB-NEXT:    srl $2, $5, 16
-; MIPS32EB-NEXT:    srl $3, $4, 16
-; MIPS32EB-NEXT:    addu $2, $3, $2
-; MIPS32EB-NEXT:    andi $2, $2, 255
-; MIPS32EB-NEXT:    or $2, $2, $1
-; MIPS32EB-NEXT:    jr $ra
-; MIPS32EB-NEXT:    nop
+; MIPS32-LABEL: i8_2:
+; MIPS32:       # %bb.0:
+; MIPS32-NEXT:    addu $1, $4, $5
+; MIPS32-NEXT:    andi $1, $1, 255
+; MIPS32-NEXT:    andi $2, $5, 65280
+; MIPS32-NEXT:    srl $2, $2, 8
+; MIPS32-NEXT:    andi $3, $4, 65280
+; MIPS32-NEXT:    srl $3, $3, 8
+; MIPS32-NEXT:    addu $2, $3, $2
+; MIPS32-NEXT:    sll $2, $2, 8
+; MIPS32-NEXT:    or $2, $1, $2
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
 ;
-; MIPS64EB-LABEL: i8_2:
-; MIPS64EB:       # %bb.0:
-; MIPS64EB-NEXT:    dsrl $1, $5, 56
-; MIPS64EB-NEXT:    sll $1, $1, 0
-; MIPS64EB-NEXT:    dsrl $2, $4, 56
-; MIPS64EB-NEXT:    sll $2, $2, 0
-; MIPS64EB-NEXT:    addu $1, $2, $1
-; MIPS64EB-NEXT:    dsrl $2, $5, 48
-; MIPS64EB-NEXT:    sll $1, $1, 8
-; MIPS64EB-NEXT:    sll $2, $2, 0
-; MIPS64EB-NEXT:    dsrl $3, $4, 48
-; MIPS64EB-NEXT:    sll $3, $3, 0
-; MIPS64EB-NEXT:    addu $2, $3, $2
-; MIPS64EB-NEXT:    andi $2, $2, 255
-; MIPS64EB-NEXT:    or $2, $2, $1
-; MIPS64EB-NEXT:    jr $ra
-; MIPS64EB-NEXT:    nop
+; MIPS64-LABEL: i8_2:
+; MIPS64:       # %bb.0:
+; MIPS64-NEXT:    sll $1, $5, 0
+; MIPS64-NEXT:    sll $2, $4, 0
+; MIPS64-NEXT:    addu $3, $2, $1
+; MIPS64-NEXT:    andi $3, $3, 255
+; MIPS64-NEXT:    andi $1, $1, 65280
+; MIPS64-NEXT:    srl $1, $1, 8
+; MIPS64-NEXT:    andi $2, $2, 65280
+; MIPS64-NEXT:    srl $2, $2, 8
+; MIPS64-NEXT:    addu $1, $2, $1
+; MIPS64-NEXT:    sll $1, $1, 8
+; MIPS64-NEXT:    or $2, $3, $1
+; MIPS64-NEXT:    jr $ra
+; MIPS64-NEXT:    nop
 ;
 ; MIPS32R5EB-LABEL: i8_2:
 ; MIPS32R5EB:       # %bb.0:
@@ -60,8 +58,8 @@ define <2 x i8> @i8_2(<2 x i8> %a, <2 x i8> %b) {
 ; MIPS32R5EB-NEXT:    .cfi_def_cfa_register 30
 ; MIPS32R5EB-NEXT:    addiu $1, $zero, -16
 ; MIPS32R5EB-NEXT:    and $sp, $sp, $1
-; MIPS32R5EB-NEXT:    sw $5, 48($sp)
-; MIPS32R5EB-NEXT:    sw $4, 52($sp)
+; MIPS32R5EB-NEXT:    sh $5, 48($sp)
+; MIPS32R5EB-NEXT:    sh $4, 52($sp)
 ; MIPS32R5EB-NEXT:    lbu $1, 49($sp)
 ; MIPS32R5EB-NEXT:    sw $1, 28($sp)
 ; MIPS32R5EB-NEXT:    lbu $1, 48($sp)
@@ -86,71 +84,29 @@ define <2 x i8> @i8_2(<2 x i8> %a, <2 x i8> %b) {
 ; MIPS32R5EB-NEXT:    jr $ra
 ; MIPS32R5EB-NEXT:    nop
 ;
-; MIPS64R5EB-LABEL: i8_2:
-; MIPS64R5EB:       # %bb.0:
-; MIPS64R5EB-NEXT:    daddiu $sp, $sp, -96
-; MIPS64R5EB-NEXT:    .cfi_def_cfa_offset 96
-; MIPS64R5EB-NEXT:    sd $4, 88($sp)
-; MIPS64R5EB-NEXT:    lbu $1, 89($sp)
-; MIPS64R5EB-NEXT:    sh $1, 2($sp)
-; MIPS64R5EB-NEXT:    lbu $1, 88($sp)
-; MIPS64R5EB-NEXT:    sh $1, 0($sp)
-; MIPS64R5EB-NEXT:    ld.h $w0, 0($sp)
-; MIPS64R5EB-NEXT:    copy_s.h $1, $w0[0]
-; MIPS64R5EB-NEXT:    copy_s.h $2, $w0[1]
-; MIPS64R5EB-NEXT:    sd $5, 80($sp)
-; MIPS64R5EB-NEXT:    lbu $3, 81($sp)
-; MIPS64R5EB-NEXT:    sh $3, 18($sp)
-; MIPS64R5EB-NEXT:    lbu $3, 80($sp)
-; MIPS64R5EB-NEXT:    sh $3, 16($sp)
-; MIPS64R5EB-NEXT:    ld.h $w0, 16($sp)
-; MIPS64R5EB-NEXT:    copy_s.h $3, $w0[0]
-; MIPS64R5EB-NEXT:    copy_s.h $4, $w0[1]
-; MIPS64R5EB-NEXT:    sw $4, 60($sp)
-; MIPS64R5EB-NEXT:    sw $3, 52($sp)
-; MIPS64R5EB-NEXT:    sw $2, 44($sp)
-; MIPS64R5EB-NEXT:    sw $1, 36($sp)
-; MIPS64R5EB-NEXT:    ld.d $w0, 48($sp)
-; MIPS64R5EB-NEXT:    ld.d $w1, 32($sp)
-; MIPS64R5EB-NEXT:    addv.d $w0, $w1, $w0
-; MIPS64R5EB-NEXT:    copy_s.d $1, $w0[0]
-; MIPS64R5EB-NEXT:    copy_s.d $2, $w0[1]
-; MIPS64R5EB-NEXT:    sb $2, 77($sp)
-; MIPS64R5EB-NEXT:    sb $1, 76($sp)
-; MIPS64R5EB-NEXT:    lh $2, 76($sp)
-; MIPS64R5EB-NEXT:    daddiu $sp, $sp, 96
-; MIPS64R5EB-NEXT:    jr $ra
-; MIPS64R5EB-NEXT:    nop
-;
-; MIPS32EL-LABEL: i8_2:
-; MIPS32EL:       # %bb.0:
-; MIPS32EL-NEXT:    addu $1, $4, $5
-; MIPS32EL-NEXT:    andi $1, $1, 255
-; MIPS32EL-NEXT:    andi $2, $5, 65280
-; MIPS32EL-NEXT:    srl $2, $2, 8
-; MIPS32EL-NEXT:    andi $3, $4, 65280
-; MIPS32EL-NEXT:    srl $3, $3, 8
-; MIPS32EL-NEXT:    addu $2, $3, $2
-; MIPS32EL-NEXT:    sll $2, $2, 8
-; MIPS32EL-NEXT:    or $2, $1, $2
-; MIPS32EL-NEXT:    jr $ra
-; MIPS32EL-NEXT:    nop
-;
-; MIPS64EL-LABEL: i8_2:
-; MIPS64EL:       # %bb.0:
-; MIPS64EL-NEXT:    sll $1, $5, 0
-; MIPS64EL-NEXT:    sll $2, $4, 0
-; MIPS64EL-NEXT:    addu $3, $2, $1
-; MIPS64EL-NEXT:    andi $3, $3, 255
-; MIPS64EL-NEXT:    andi $1, $1, 65280
-; MIPS64EL-NEXT:    srl $1, $1, 8
-; MIPS64EL-NEXT:    andi $2, $2, 65280
-; MIPS64EL-NEXT:    srl $2, $2, 8
-; MIPS64EL-NEXT:    addu $1, $2, $1
-; MIPS64EL-NEXT:    sll $1, $1, 8
-; MIPS64EL-NEXT:    or $2, $3, $1
-; MIPS64EL-NEXT:    jr $ra
-; MIPS64EL-NEXT:    nop
+; MIPS64R5-LABEL: i8_2:
+; MIPS64R5:       # %bb.0:
+; MIPS64R5-NEXT:    daddiu $sp, $sp, -16
+; MIPS64R5-NEXT:    .cfi_def_cfa_offset 16
+; MIPS64R5-NEXT:    sh $5, 8($sp)
+; MIPS64R5-NEXT:    sh $4, 12($sp)
+; MIPS64R5-NEXT:    lb $1, 9($sp)
+; MIPS64R5-NEXT:    lb $2, 8($sp)
+; MIPS64R5-NEXT:    insert.d $w0[0], $2
+; MIPS64R5-NEXT:    insert.d $w0[1], $1
+; MIPS64R5-NEXT:    lb $1, 13($sp)
+; MIPS64R5-NEXT:    lb $2, 12($sp)
+; MIPS64R5-NEXT:    insert.d $w1[0], $2
+; MIPS64R5-NEXT:    insert.d $w1[1], $1
+; MIPS64R5-NEXT:    addv.d $w0, $w1, $w0
+; MIPS64R5-NEXT:    copy_s.d $1, $w0[0]
+; MIPS64R5-NEXT:    copy_s.d $2, $w0[1]
+; MIPS64R5-NEXT:    sb $2, 5($sp)
+; MIPS64R5-NEXT:    sb $1, 4($sp)
+; MIPS64R5-NEXT:    lh $2, 4($sp)
+; MIPS64R5-NEXT:    daddiu $sp, $sp, 16
+; MIPS64R5-NEXT:    jr $ra
+; MIPS64R5-NEXT:    nop
 ;
 ; MIPS32R5EL-LABEL: i8_2:
 ; MIPS32R5EL:       # %bb.0:
@@ -164,8 +120,8 @@ define <2 x i8> @i8_2(<2 x i8> %a, <2 x i8> %b) {
 ; MIPS32R5EL-NEXT:    .cfi_def_cfa_register 30
 ; MIPS32R5EL-NEXT:    addiu $1, $zero, -16
 ; MIPS32R5EL-NEXT:    and $sp, $sp, $1
-; MIPS32R5EL-NEXT:    sw $5, 48($sp)
-; MIPS32R5EL-NEXT:    sw $4, 52($sp)
+; MIPS32R5EL-NEXT:    sh $5, 48($sp)
+; MIPS32R5EL-NEXT:    sh $4, 52($sp)
 ; MIPS32R5EL-NEXT:    lbu $1, 49($sp)
 ; MIPS32R5EL-NEXT:    sw $1, 24($sp)
 ; MIPS32R5EL-NEXT:    lbu $1, 48($sp)
@@ -188,42 +144,6 @@ define <2 x i8> @i8_2(<2 x i8> %a, <2 x i8> %b) {
 ; MIPS32R5EL-NEXT:    addiu $sp, $sp, 64
 ; MIPS32R5EL-NEXT:    jr $ra
 ; MIPS32R5EL-NEXT:    nop
-;
-; MIPS64R5EL-LABEL: i8_2:
-; MIPS64R5EL:       # %bb.0:
-; MIPS64R5EL-NEXT:    daddiu $sp, $sp, -96
-; MIPS64R5EL-NEXT:    .cfi_def_cfa_offset 96
-; MIPS64R5EL-NEXT:    sd $4, 88($sp)
-; MIPS64R5EL-NEXT:    lbu $1, 89($sp)
-; MIPS64R5EL-NEXT:    sh $1, 2($sp)
-; MIPS64R5EL-NEXT:    lbu $1, 88($sp)
-; MIPS64R5EL-NEXT:    sh $1, 0($sp)
-; MIPS64R5EL-NEXT:    ld.h $w0, 0($sp)
-; MIPS64R5EL-NEXT:    copy_s.h $1, $w0[0]
-; MIPS64R5EL-NEXT:    copy_s.h $2, $w0[1]
-; MIPS64R5EL-NEXT:    sd $5, 80($sp)
-; MIPS64R5EL-NEXT:    lbu $3, 81($sp)
-; MIPS64R5EL-NEXT:    sh $3, 18($sp)
-; MIPS64R5EL-NEXT:    lbu $3, 80($sp)
-; MIPS64R5EL-NEXT:    sh $3, 16($sp)
-; MIPS64R5EL-NEXT:    ld.h $w0, 16($sp)
-; MIPS64R5EL-NEXT:    copy_s.h $3, $w0[0]
-; MIPS64R5EL-NEXT:    copy_s.h $4, $w0[1]
-; MIPS64R5EL-NEXT:    sw $4, 56($sp)
-; MIPS64R5EL-NEXT:    sw $3, 48($sp)
-; MIPS64R5EL-NEXT:    sw $2, 40($sp)
-; MIPS64R5EL-NEXT:    sw $1, 32($sp)
-; MIPS64R5EL-NEXT:    ld.d $w0, 48($sp)
-; MIPS64R5EL-NEXT:    ld.d $w1, 32($sp)
-; MIPS64R5EL-NEXT:    addv.d $w0, $w1, $w0
-; MIPS64R5EL-NEXT:    copy_s.d $1, $w0[0]
-; MIPS64R5EL-NEXT:    copy_s.d $2, $w0[1]
-; MIPS64R5EL-NEXT:    sb $2, 77($sp)
-; MIPS64R5EL-NEXT:    sb $1, 76($sp)
-; MIPS64R5EL-NEXT:    lh $2, 76($sp)
-; MIPS64R5EL-NEXT:    daddiu $sp, $sp, 96
-; MIPS64R5EL-NEXT:    jr $ra
-; MIPS64R5EL-NEXT:    nop
   %1 = add <2 x i8> %a, %b
   ret <2 x i8> %1
 }
@@ -234,85 +154,78 @@ define <2 x i8> @i8_2(<2 x i8> %a, <2 x i8> %b) {
 define <2 x i8> @i8x2_7(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d, <2 x i8> %e, <2 x i8> %f, <2 x i8> %g) {
 ; MIPS32EB-LABEL: i8x2_7:
 ; MIPS32EB:       # %bb.0: # %entry
-; MIPS32EB-NEXT:    srl $1, $5, 24
-; MIPS32EB-NEXT:    srl $2, $4, 24
-; MIPS32EB-NEXT:    addu $1, $2, $1
-; MIPS32EB-NEXT:    srl $2, $6, 24
-; MIPS32EB-NEXT:    addu $1, $1, $2
-; MIPS32EB-NEXT:    srl $2, $7, 24
-; MIPS32EB-NEXT:    addu $1, $1, $2
-; MIPS32EB-NEXT:    srl $2, $5, 16
-; MIPS32EB-NEXT:    srl $3, $4, 16
+; MIPS32EB-NEXT:    addu $1, $4, $5
+; MIPS32EB-NEXT:    addu $1, $1, $6
+; MIPS32EB-NEXT:    addu $1, $1, $7
+; MIPS32EB-NEXT:    andi $2, $5, 65280
+; MIPS32EB-NEXT:    lbu $3, 19($sp)
+; MIPS32EB-NEXT:    addu $1, $1, $3
+; MIPS32EB-NEXT:    srl $2, $2, 8
+; MIPS32EB-NEXT:    andi $3, $4, 65280
+; MIPS32EB-NEXT:    srl $3, $3, 8
 ; MIPS32EB-NEXT:    addu $2, $3, $2
-; MIPS32EB-NEXT:    srl $3, $6, 16
-; MIPS32EB-NEXT:    lbu $4, 16($sp)
+; MIPS32EB-NEXT:    andi $3, $6, 65280
+; MIPS32EB-NEXT:    srl $3, $3, 8
+; MIPS32EB-NEXT:    lbu $4, 23($sp)
 ; MIPS32EB-NEXT:    addu $2, $2, $3
 ; MIPS32EB-NEXT:    addu $1, $1, $4
-; MIPS32EB-NEXT:    lbu $3, 20($sp)
-; MIPS32EB-NEXT:    addu $1, $1, $3
-; MIPS32EB-NEXT:    lbu $3, 24($sp)
+; MIPS32EB-NEXT:    lbu $3, 27($sp)
 ; MIPS32EB-NEXT:    addu $1, $1, $3
-; MIPS32EB-NEXT:    srl $3, $7, 16
-; MIPS32EB-NEXT:    sll $1, $1, 8
-; MIPS32EB-NEXT:    addu $2, $2, $3
-; MIPS32EB-NEXT:    lbu $3, 17($sp)
+; MIPS32EB-NEXT:    andi $3, $7, 65280
+; MIPS32EB-NEXT:    srl $3, $3, 8
+; MIPS32EB-NEXT:    lbu $4, 26($sp)
+; MIPS32EB-NEXT:    andi $1, $1, 255
 ; MIPS32EB-NEXT:    addu $2, $2, $3
-; MIPS32EB-NEXT:    lbu $3, 21($sp)
+; MIPS32EB-NEXT:    lbu $3, 18($sp)
 ; MIPS32EB-NEXT:    addu $2, $2, $3
-; MIPS32EB-NEXT:    lbu $3, 25($sp)
+; MIPS32EB-NEXT:    lbu $3, 22($sp)
 ; MIPS32EB-NEXT:    addu $2, $2, $3
-; MIPS32EB-NEXT:    andi $2, $2, 255
-; MIPS32EB-NEXT:    or $2, $2, $1
+; MIPS32EB-NEXT:    addu $2, $2, $4
+; MIPS32EB-NEXT:    sll $2, $2, 8
+; MIPS32EB-NEXT:    or $2, $1, $2
 ; MIPS32EB-NEXT:    jr $ra
 ; MIPS32EB-NEXT:    nop
 ;
-; MIPS64EB-LABEL: i8x2_7:
-; MIPS64EB:       # %bb.0: # %entry
-; MIPS64EB-NEXT:    dsrl $1, $5, 56
-; MIPS64EB-NEXT:    dsrl $2, $6, 56
-; MIPS64EB-NEXT:    sll $1, $1, 0
-; MIPS64EB-NEXT:    dsrl $3, $4, 56
-; MIPS64EB-NEXT:    sll $3, $3, 0
-; MIPS64EB-NEXT:    addu $1, $3, $1
-; MIPS64EB-NEXT:    dsrl $3, $6, 48
-; MIPS64EB-NEXT:    sll $2, $2, 0
-; MIPS64EB-NEXT:    dsrl $5, $5, 48
-; MIPS64EB-NEXT:    sll $5, $5, 0
-; MIPS64EB-NEXT:    dsrl $4, $4, 48
-; MIPS64EB-NEXT:    sll $4, $4, 0
-; MIPS64EB-NEXT:    addu $4, $4, $5
-; MIPS64EB-NEXT:    addu $1, $1, $2
-; MIPS64EB-NEXT:    dsrl $2, $8, 48
-; MIPS64EB-NEXT:    dsrl $5, $8, 56
-; MIPS64EB-NEXT:    sll $3, $3, 0
-; MIPS64EB-NEXT:    dsrl $6, $7, 56
-; MIPS64EB-NEXT:    sll $6, $6, 0
-; MIPS64EB-NEXT:    addu $1, $1, $6
-; MIPS64EB-NEXT:    addu $3, $4, $3
-; MIPS64EB-NEXT:    sll $4, $5, 0
-; MIPS64EB-NEXT:    dsrl $5, $7, 48
-; MIPS64EB-NEXT:    sll $5, $5, 0
-; MIPS64EB-NEXT:    addu $3, $3, $5
-; MIPS64EB-NEXT:    dsrl $5, $10, 48
-; MIPS64EB-NEXT:    sll $2, $2, 0
-; MIPS64EB-NEXT:    dsrl $6, $10, 56
-; MIPS64EB-NEXT:    addu $1, $1, $4
-; MIPS64EB-NEXT:    dsrl $4, $9, 56
-; MIPS64EB-NEXT:    sll $4, $4, 0
-; MIPS64EB-NEXT:    addu $1, $1, $4
-; MIPS64EB-NEXT:    sll $4, $6, 0
-; MIPS64EB-NEXT:    addu $1, $1, $4
-; MIPS64EB-NEXT:    sll $1, $1, 8
-; MIPS64EB-NEXT:    addu $2, $3, $2
-; MIPS64EB-NEXT:    dsrl $3, $9, 48
-; MIPS64EB-NEXT:    sll $3, $3, 0
-; MIPS64EB-NEXT:    addu $2, $2, $3
-; MIPS64EB-NEXT:    sll $3, $5, 0
-; MIPS64EB-NEXT:    addu $2, $2, $3
-; MIPS64EB-NEXT:    andi $2, $2, 255
-; MIPS64EB-NEXT:    or $2, $2, $1
-; MIPS64EB-NEXT:    jr $ra
-; MIPS64EB-NEXT:    nop
+; MIPS64-LABEL: i8x2_7:
+; MIPS64:       # %bb.0: # %entry
+; MIPS64-NEXT:    sll $1, $5, 0
+; MIPS64-NEXT:    sll $2, $4, 0
+; MIPS64-NEXT:    addu $3, $2, $1
+; MIPS64-NEXT:    sll $4, $6, 0
+; MIPS64-NEXT:    andi $1, $1, 65280
+; MIPS64-NEXT:    srl $1, $1, 8
+; MIPS64-NEXT:    andi $2, $2, 65280
+; MIPS64-NEXT:    srl $2, $2, 8
+; MIPS64-NEXT:    addu $1, $2, $1
+; MIPS64-NEXT:    addu $2, $3, $4
+; MIPS64-NEXT:    sll $3, $7, 0
+; MIPS64-NEXT:    andi $5, $3, 65280
+; MIPS64-NEXT:    andi $4, $4, 65280
+; MIPS64-NEXT:    srl $4, $4, 8
+; MIPS64-NEXT:    addu $2, $2, $3
+; MIPS64-NEXT:    addu $1, $1, $4
+; MIPS64-NEXT:    srl $3, $5, 8
+; MIPS64-NEXT:    sll $4, $8, 0
+; MIPS64-NEXT:    andi $5, $4, 65280
+; MIPS64-NEXT:    srl $5, $5, 8
+; MIPS64-NEXT:    addu $1, $1, $3
+; MIPS64-NEXT:    addu $2, $2, $4
+; MIPS64-NEXT:    sll $3, $9, 0
+; MIPS64-NEXT:    addu $2, $2, $3
+; MIPS64-NEXT:    sll $4, $10, 0
+; MIPS64-NEXT:    addu $2, $2, $4
+; MIPS64-NEXT:    andi $2, $2, 255
+; MIPS64-NEXT:    addu $1, $1, $5
+; MIPS64-NEXT:    andi $3, $3, 65280
+; MIPS64-NEXT:    srl $3, $3, 8
+; MIPS64-NEXT:    addu $1, $1, $3
+; MIPS64-NEXT:    andi $3, $4, 65280
+; MIPS64-NEXT:    srl $3, $3, 8
+; MIPS64-NEXT:    addu $1, $1, $3
+; MIPS64-NEXT:    sll $1, $1, 8
+; MIPS64-NEXT:    or $2, $2, $1
+; MIPS64-NEXT:    jr $ra
+; MIPS64-NEXT:    nop
 ;
 ; MIPS32R5EB-LABEL: i8x2_7:
 ; MIPS32R5EB:       # %bb.0: # %entry
@@ -326,8 +239,8 @@ define <2 x i8> @i8x2_7(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d, <2 x
 ; MIPS32R5EB-NEXT:    .cfi_def_cfa_register 30
 ; MIPS32R5EB-NEXT:    addiu $1, $zero, -16
 ; MIPS32R5EB-NEXT:    and $sp, $sp, $1
-; MIPS32R5EB-NEXT:    sw $5, 128($sp)
-; MIPS32R5EB-NEXT:    sw $4, 132($sp)
+; MIPS32R5EB-NEXT:    sh $5, 128($sp)
+; MIPS32R5EB-NEXT:    sh $4, 132($sp)
 ; MIPS32R5EB-NEXT:    lbu $1, 129($sp)
 ; MIPS32R5EB-NEXT:    sw $1, 76($sp)
 ; MIPS32R5EB-NEXT:    lbu $1, 128($sp)
@@ -339,35 +252,35 @@ define <2 x i8> @i8x2_7(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d, <2 x
 ; MIPS32R5EB-NEXT:    ld.d $w0, 64($sp)
 ; MIPS32R5EB-NEXT:    ld.d $w1, 48($sp)
 ; MIPS32R5EB-NEXT:    addv.d $w0, $w1, $w0
-; MIPS32R5EB-NEXT:    sw $6, 124($sp)
+; MIPS32R5EB-NEXT:    sh $6, 124($sp)
 ; MIPS32R5EB-NEXT:    lbu $1, 125($sp)
 ; MIPS32R5EB-NEXT:    sw $1, 92($sp)
 ; MIPS32R5EB-NEXT:    lbu $1, 124($sp)
 ; MIPS32R5EB-NEXT:    sw $1, 84($sp)
 ; MIPS32R5EB-NEXT:    ld.d $w1, 80($sp)
 ; MIPS32R5EB-NEXT:    addv.d $w0, $w0, $w1
-; MIPS32R5EB-NEXT:    sw $7, 120($sp)
+; MIPS32R5EB-NEXT:    sh $7, 120($sp)
 ; MIPS32R5EB-NEXT:    lbu $1, 121($sp)
 ; MIPS32R5EB-NEXT:    sw $1, 108($sp)
 ; MIPS32R5EB-NEXT:    lbu $1, 120($sp)
 ; MIPS32R5EB-NEXT:    sw $1, 100($sp)
 ; MIPS32R5EB-NEXT:    ld.d $w1, 96($sp)
 ; MIPS32R5EB-NEXT:    addv.d $w0, $w0, $w1
-; MIPS32R5EB-NEXT:    lbu $1, 161($fp)
+; MIPS32R5EB-NEXT:    lbu $1, 163($fp)
 ; MIPS32R5EB-NEXT:    sw $1, 12($sp)
-; MIPS32R5EB-NEXT:    lbu $1, 160($fp)
+; MIPS32R5EB-NEXT:    lbu $1, 162($fp)
 ; MIPS32R5EB-NEXT:    sw $1, 4($sp)
 ; MIPS32R5EB-NEXT:    ld.d $w1, 0($sp)
 ; MIPS32R5EB-NEXT:    addv.d $w0, $w0, $w1
-; MIPS32R5EB-NEXT:    lbu $1, 165($fp)
+; MIPS32R5EB-NEXT:    lbu $1, 167($fp)
 ; MIPS32R5EB-NEXT:    sw $1, 28($sp)
-; MIPS32R5EB-NEXT:    lbu $1, 164($fp)
+; MIPS32R5EB-NEXT:    lbu $1, 166($fp)
 ; MIPS32R5EB-NEXT:    sw $1, 20($sp)
 ; MIPS32R5EB-NEXT:    ld.d $w1, 16($sp)
 ; MIPS32R5EB-NEXT:    addv.d $w0, $w0, $w1
-; MIPS32R5EB-NEXT:    lbu $1, 169($fp)
+; MIPS32R5EB-NEXT:    lbu $1, 171($fp)
 ; MIPS32R5EB-NEXT:    sw $1, 44($sp)
-; MIPS32R5EB-NEXT:    lbu $1, 168($fp)
+; MIPS32R5EB-NEXT:    lbu $1, 170($fp)
 ; MIPS32R5EB-NEXT:    sw $1, 36($sp)
 ; MIPS32R5EB-NEXT:    ld.d $w1, 32($sp)
 ; MIPS32R5EB-NEXT:    addv.d $w0, $w0, $w1
@@ -384,101 +297,59 @@ define <2 x i8> @i8x2_7(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d, <2 x
 ; MIPS32R5EB-NEXT:    jr $ra
 ; MIPS32R5EB-NEXT:    nop
 ;
-; MIPS64R5EB-LABEL: i8x2_7:
-; MIPS64R5EB:       # %bb.0: # %entry
-; MIPS64R5EB-NEXT:    daddiu $sp, $sp, -288
-; MIPS64R5EB-NEXT:    .cfi_def_cfa_offset 288
-; MIPS64R5EB-NEXT:    sd $4, 280($sp)
-; MIPS64R5EB-NEXT:    lbu $1, 281($sp)
-; MIPS64R5EB-NEXT:    sh $1, 2($sp)
-; MIPS64R5EB-NEXT:    lbu $1, 280($sp)
-; MIPS64R5EB-NEXT:    sh $1, 0($sp)
-; MIPS64R5EB-NEXT:    ld.h $w0, 0($sp)
-; MIPS64R5EB-NEXT:    copy_s.h $1, $w0[0]
-; MIPS64R5EB-NEXT:    copy_s.h $2, $w0[1]
-; MIPS64R5EB-NEXT:    sd $5, 272($sp)
-; MIPS64R5EB-NEXT:    lbu $3, 273($sp)
-; MIPS64R5EB-NEXT:    sh $3, 18($sp)
-; MIPS64R5EB-NEXT:    lbu $3, 272($sp)
-; MIPS64R5EB-NEXT:    sh $3, 16($sp)
-; MIPS64R5EB-NEXT:    ld.h $w0, 16($sp)
-; MIPS64R5EB-NEXT:    copy_s.h $3, $w0[0]
-; MIPS64R5EB-NEXT:    copy_s.h $4, $w0[1]
-; MIPS64R5EB-NEXT:    sw $4, 140($sp)
-; MIPS64R5EB-NEXT:    sw $3, 132($sp)
-; MIPS64R5EB-NEXT:    sw $2, 124($sp)
-; MIPS64R5EB-NEXT:    sw $1, 116($sp)
-; MIPS64R5EB-NEXT:    ld.d $w0, 128($sp)
-; MIPS64R5EB-NEXT:    ld.d $w1, 112($sp)
-; MIPS64R5EB-NEXT:    addv.d $w0, $w1, $w0
-; MIPS64R5EB-NEXT:    sd $6, 264($sp)
-; MIPS64R5EB-NEXT:    lbu $1, 265($sp)
-; MIPS64R5EB-NEXT:    sh $1, 34($sp)
-; MIPS64R5EB-NEXT:    lbu $1, 264($sp)
-; MIPS64R5EB-NEXT:    sh $1, 32($sp)
-; MIPS64R5EB-NEXT:    ld.h $w1, 32($sp)
-; MIPS64R5EB-NEXT:    copy_s.h $1, $w1[0]
-; MIPS64R5EB-NEXT:    copy_s.h $2, $w1[1]
-; MIPS64R5EB-NEXT:    sw $2, 156($sp)
-; MIPS64R5EB-NEXT:    sw $1, 148($sp)
-; MIPS64R5EB-NEXT:    ld.d $w1, 144($sp)
-; MIPS64R5EB-NEXT:    addv.d $w0, $w0, $w1
-; MIPS64R5EB-NEXT:    sd $7, 256($sp)
-; MIPS64R5EB-NEXT:    lbu $1, 257($sp)
-; MIPS64R5EB-NEXT:    sh $1, 50($sp)
-; MIPS64R5EB-NEXT:    lbu $1, 256($sp)
-; MIPS64R5EB-NEXT:    sh $1, 48($sp)
-; MIPS64R5EB-NEXT:    ld.h $w1, 48($sp)
-; MIPS64R5EB-NEXT:    copy_s.h $1, $w1[0]
-; MIPS64R5EB-NEXT:    copy_s.h $2, $w1[1]
-; MIPS64R5EB-NEXT:    sw $2, 172($sp)
-; MIPS64R5EB-NEXT:    sw $1, 164($sp)
-; MIPS64R5EB-NEXT:    ld.d $w1, 160($sp)
-; MIPS64R5EB-NEXT:    addv.d $w0, $w0, $w1
-; MIPS64R5EB-NEXT:    sd $8, 248($sp)
-; MIPS64R5EB-NEXT:    lbu $1, 249($sp)
-; MIPS64R5EB-NEXT:    sh $1, 66($sp)
-; MIPS64R5EB-NEXT:    lbu $1, 248($sp)
-; MIPS64R5EB-NEXT:    sh $1, 64($sp)
-; MIPS64R5EB-NEXT:    ld.h $w1, 64($sp)
-; MIPS64R5EB-NEXT:    copy_s.h $1, $w1[0]
-; MIPS64R5EB-NEXT:    copy_s.h $2, $w1[1]
-; MIPS64R5EB-NEXT:    sw $2, 188($sp)
-; MIPS64R5EB-NEXT:    sw $1, 180($sp)
-; MIPS64R5EB-NEXT:    ld.d $w1, 176($sp)
-; MIPS64R5EB-NEXT:    addv.d $w0, $w0, $w1
-; MIPS64R5EB-NEXT:    sd $10, 232($sp)
-; MIPS64R5EB-NEXT:    lbu $1, 233($sp)
-; MIPS64R5EB-NEXT:    sh $1, 98($sp)
-; MIPS64R5EB-NEXT:    lbu $1, 232($sp)
-; MIPS64R5EB-NEXT:    sh $1, 96($sp)
-; MIPS64R5EB-NEXT:    ld.h $w1, 96($sp)
-; MIPS64R5EB-NEXT:    copy_s.h $1, $w1[0]
-; MIPS64R5EB-NEXT:    copy_s.h $2, $w1[1]
-; MIPS64R5EB-NEXT:    sd $9, 240($sp)
-; MIPS64R5EB-NEXT:    lbu $3, 241($sp)
-; MIPS64R5EB-NEXT:    sh $3, 82($sp)
-; MIPS64R5EB-NEXT:    lbu $3, 240($sp)
-; MIPS64R5EB-NEXT:    sh $3, 80($sp)
-; MIPS64R5EB-NEXT:    ld.h $w1, 80($sp)
-; MIPS64R5EB-NEXT:    copy_s.h $3, $w1[0]
-; MIPS64R5EB-NEXT:    copy_s.h $4, $w1[1]
-; MIPS64R5EB-NEXT:    sw $4, 204($sp)
-; MIPS64R5EB-NEXT:    sw $3, 196($sp)
-; MIPS64R5EB-NEXT:    ld.d $w1, 192($sp)
-; MIPS64R5EB-NEXT:    addv.d $w0, $w0, $w1
-; MIPS64R5EB-NEXT:    sw $2, 220($sp)
-; MIPS64R5EB-NEXT:    sw $1, 212($sp)
-; MIPS64R5EB-NEXT:    ld.d $w1, 208($sp)
-; MIPS64R5EB-NEXT:    addv.d $w0, $w0, $w1
-; MIPS64R5EB-NEXT:    copy_s.d $1, $w0[0]
-; MIPS64R5EB-NEXT:    copy_s.d $2, $w0[1]
-; MIPS64R5EB-NEXT:    sb $2, 229($sp)
-; MIPS64R5EB-NEXT:    sb $1, 228($sp)
-; MIPS64R5EB-NEXT:    lh $2, 228($sp)
-; MIPS64R5EB-NEXT:    daddiu $sp, $sp, 288
-; MIPS64R5EB-NEXT:    jr $ra
-; MIPS64R5EB-NEXT:    nop
+; MIPS64R5-LABEL: i8x2_7:
+; MIPS64R5:       # %bb.0: # %entry
+; MIPS64R5-NEXT:    daddiu $sp, $sp, -32
+; MIPS64R5-NEXT:    .cfi_def_cfa_offset 32
+; MIPS64R5-NEXT:    sh $5, 24($sp)
+; MIPS64R5-NEXT:    sh $4, 28($sp)
+; MIPS64R5-NEXT:    lb $1, 25($sp)
+; MIPS64R5-NEXT:    lb $2, 24($sp)
+; MIPS64R5-NEXT:    insert.d $w0[0], $2
+; MIPS64R5-NEXT:    insert.d $w0[1], $1
+; MIPS64R5-NEXT:    lb $1, 29($sp)
+; MIPS64R5-NEXT:    lb $2, 28($sp)
+; MIPS64R5-NEXT:    insert.d $w1[0], $2
+; MIPS64R5-NEXT:    insert.d $w1[1], $1
+; MIPS64R5-NEXT:    addv.d $w0, $w1, $w0
+; MIPS64R5-NEXT:    sh $6, 20($sp)
+; MIPS64R5-NEXT:    lb $1, 21($sp)
+; MIPS64R5-NEXT:    lb $2, 20($sp)
+; MIPS64R5-NEXT:    insert.d $w1[0], $2
+; MIPS64R5-NEXT:    insert.d $w1[1], $1
+; MIPS64R5-NEXT:    addv.d $w0, $w0, $w1
+; MIPS64R5-NEXT:    sh $7, 16($sp)
+; MIPS64R5-NEXT:    lb $1, 17($sp)
+; MIPS64R5-NEXT:    lb $2, 16($sp)
+; MIPS64R5-NEXT:    insert.d $w1[0], $2
+; MIPS64R5-NEXT:    insert.d $w1[1], $1
+; MIPS64R5-NEXT:    addv.d $w0, $w0, $w1
+; MIPS64R5-NEXT:    sh $8, 12($sp)
+; MIPS64R5-NEXT:    lb $1, 13($sp)
+; MIPS64R5-NEXT:    lb $2, 12($sp)
+; MIPS64R5-NEXT:    insert.d $w1[0], $2
+; MIPS64R5-NEXT:    insert.d $w1[1], $1
+; MIPS64R5-NEXT:    addv.d $w0, $w0, $w1
+; MIPS64R5-NEXT:    sh $9, 8($sp)
+; MIPS64R5-NEXT:    lb $1, 9($sp)
+; MIPS64R5-NEXT:    lb $2, 8($sp)
+; MIPS64R5-NEXT:    insert.d $w1[0], $2
+; MIPS64R5-NEXT:    insert.d $w1[1], $1
+; MIPS64R5-NEXT:    addv.d $w0, $w0, $w1
+; MIPS64R5-NEXT:    sh $10, 4($sp)
+; MIPS64R5-NEXT:    lb $1, 5($sp)
+; MIPS64R5-NEXT:    lb $2, 4($sp)
+; MIPS64R5-NEXT:    insert.d $w1[0], $2
+; MIPS64R5-NEXT:    insert.d $w1[1], $1
+; MIPS64R5-NEXT:    addv.d $w0, $w0, $w1
+; MIPS64R5-NEXT:    copy_s.d $1, $w0[0]
+; MIPS64R5-NEXT:    copy_s.d $2, $w0[1]
+; MIPS64R5-NEXT:    sb $2, 1($sp)
+; MIPS64R5-NEXT:    sb $1, 0($sp)
+; MIPS64R5-NEXT:    lh $2, 0($sp)
+; MIPS64R5-NEXT:    daddiu $sp, $sp, 32
+; MIPS64R5-NEXT:    jr $ra
+; MIPS64R5-NEXT:    nop
 ;
 ; MIPS32EL-LABEL: i8x2_7:
 ; MIPS32EL:       # %bb.0: # %entry
@@ -514,47 +385,6 @@ define <2 x i8> @i8x2_7(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d, <2 x
 ; MIPS32EL-NEXT:    jr $ra
 ; MIPS32EL-NEXT:    nop
 ;
-; MIPS64EL-LABEL: i8x2_7:
-; MIPS64EL:       # %bb.0: # %entry
-; MIPS64EL-NEXT:    sll $1, $5, 0
-; MIPS64EL-NEXT:    sll $2, $4, 0
-; MIPS64EL-NEXT:    addu $3, $2, $1
-; MIPS64EL-NEXT:    sll $4, $6, 0
-; MIPS64EL-NEXT:    andi $1, $1, 65280
-; MIPS64EL-NEXT:    srl $1, $1, 8
-; MIPS64EL-NEXT:    andi $2, $2, 65280
-; MIPS64EL-NEXT:    srl $2, $2, 8
-; MIPS64EL-NEXT:    addu $1, $2, $1
-; MIPS64EL-NEXT:    addu $2, $3, $4
-; MIPS64EL-NEXT:    sll $3, $7, 0
-; MIPS64EL-NEXT:    andi $5, $3, 65280
-; MIPS64EL-NEXT:    andi $4, $4, 65280
-; MIPS64EL-NEXT:    srl $4, $4, 8
-; MIPS64EL-NEXT:    addu $2, $2, $3
-; MIPS64EL-NEXT:    addu $1, $1, $4
-; MIPS64EL-NEXT:    srl $3, $5, 8
-; MIPS64EL-NEXT:    sll $4, $8, 0
-; MIPS64EL-NEXT:    andi $5, $4, 65280
-; MIPS64EL-NEXT:    srl $5, $5, 8
-; MIPS64EL-NEXT:    addu $1, $1, $3
-; MIPS64EL-NEXT:    addu $2, $2, $4
-; MIPS64EL-NEXT:    sll $3, $9, 0
-; MIPS64EL-NEXT:    addu $2, $2, $3
-; MIPS64EL-NEXT:    sll $4, $10, 0
-; MIPS64EL-NEXT:    addu $2, $2, $4
-; MIPS64EL-NEXT:    andi $2, $2, 255
-; MIPS64EL-NEXT:    addu $1, $1, $5
-; MIPS64EL-NEXT:    andi $3, $3, 65280
-; MIPS64EL-NEXT:    srl $3, $3, 8
-; MIPS64EL-NEXT:    addu $1, $1, $3
-; MIPS64EL-NEXT:    andi $3, $4, 65280
-; MIPS64EL-NEXT:    srl $3, $3, 8
-; MIPS64EL-NEXT:    addu $1, $1, $3
-; MIPS64EL-NEXT:    sll $1, $1, 8
-; MIPS64EL-NEXT:    or $2, $2, $1
-; MIPS64EL-NEXT:    jr $ra
-; MIPS64EL-NEXT:    nop
-;
 ; MIPS32R5EL-LABEL: i8x2_7:
 ; MIPS32R5EL:       # %bb.0: # %entry
 ; MIPS32R5EL-NEXT:    addiu $sp, $sp, -144
@@ -567,8 +397,8 @@ define <2 x i8> @i8x2_7(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d, <2 x
 ; MIPS32R5EL-NEXT:    .cfi_def_cfa_register 30
 ; MIPS32R5EL-NEXT:    addiu $1, $zero, -16
 ; MIPS32R5EL-NEXT:    and $sp, $sp, $1
-; MIPS32R5EL-NEXT:    sw $5, 128($sp)
-; MIPS32R5EL-NEXT:    sw $4, 132($sp)
+; MIPS32R5EL-NEXT:    sh $5, 128($sp)
+; MIPS32R5EL-NEXT:    sh $4, 132($sp)
 ; MIPS32R5EL-NEXT:    lbu $1, 129($sp)
 ; MIPS32R5EL-NEXT:    sw $1, 72($sp)
 ; MIPS32R5EL-NEXT:    lbu $1, 128($sp)
@@ -580,14 +410,14 @@ define <2 x i8> @i8x2_7(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d, <2 x
 ; MIPS32R5EL-NEXT:    ld.d $w0, 64($sp)
 ; MIPS32R5EL-NEXT:    ld.d $w1, 48($sp)
 ; MIPS32R5EL-NEXT:    addv.d $w0, $w1, $w0
-; MIPS32R5EL-NEXT:    sw $6, 124($sp)
+; MIPS32R5EL-NEXT:    sh $6, 124($sp)
 ; MIPS32R5EL-NEXT:    lbu $1, 125($sp)
 ; MIPS32R5EL-NEXT:    sw $1, 88($sp)
 ; MIPS32R5EL-NEXT:    lbu $1, 124($sp)
 ; MIPS32R5EL-NEXT:    sw $1, 80($sp)
 ; MIPS32R5EL-NEXT:    ld.d $w1, 80($sp)
 ; MIPS32R5EL-NEXT:    addv.d $w0, $w0, $w1
-; MIPS32R5EL-NEXT:    sw $7, 120($sp)
+; MIPS32R5EL-NEXT:    sh $7, 120($sp)
 ; MIPS32R5EL-NEXT:    lbu $1, 121($sp)
 ; MIPS32R5EL-NEXT:    sw $1, 104($sp)
 ; MIPS32R5EL-NEXT:    lbu $1, 120($sp)
@@ -623,102 +453,6 @@ define <2 x i8> @i8x2_7(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d, <2 x
 ; MIPS32R5EL-NEXT:    addiu $sp, $sp, 144
 ; MIPS32R5EL-NEXT:    jr $ra
 ; MIPS32R5EL-NEXT:    nop
-;
-; MIPS64R5EL-LABEL: i8x2_7:
-; MIPS64R5EL:       # %bb.0: # %entry
-; MIPS64R5EL-NEXT:    daddiu $sp, $sp, -288
-; MIPS64R5EL-NEXT:    .cfi_def_cfa_offset 288
-; MIPS64R5EL-NEXT:    sd $4, 280($sp)
-; MIPS64R5EL-NEXT:    lbu $1, 281($sp)
-; MIPS64R5EL-NEXT:    sh $1, 2($sp)
-; MIPS64R5EL-NEXT:    lbu $1, 280($sp)
-; MIPS64R5EL-NEXT:    sh $1, 0($sp)
-; MIPS64R5EL-NEXT:    ld.h $w0, 0($sp)
-; MIPS64R5EL-NEXT:    copy_s.h $1, $w0[0]
-; MIPS64R5EL-NEXT:    copy_s.h $2, $w0[1]
-; MIPS64R5EL-NEXT:    sd $5, 272($sp)
-; MIPS64R5EL-NEXT:    lbu $3, 273($sp)
-; MIPS64R5EL-NEXT:    sh $3, 18($sp)
-; MIPS64R5EL-NEXT:    lbu $3, 272($sp)
-; MIPS64R5EL-NEXT:    sh $3, 16($sp)
-; MIPS64R5EL-NEXT:    ld.h $w0, 16($sp)
-; MIPS64R5EL-NEXT:    copy_s.h $3, $w0[0]
-; MIPS64R5EL-NEXT:    copy_s.h $4, $w0[1]
-; MIPS64R5EL-NEXT:    sw $4, 136($sp)
-; MIPS64R5EL-NEXT:    sw $3, 128($sp)
-; MIPS64R5EL-NEXT:    sw $2, 120($sp)
-; MIPS64R5EL-NEXT:    sw $1, 112($sp)
-; MIPS64R5EL-NEXT:    ld.d $w0, 128($sp)
-; MIPS64R5EL-NEXT:    ld.d $w1, 112($sp)
-; MIPS64R5EL-NEXT:    addv.d $w0, $w1, $w0
-; MIPS64R5EL-NEXT:    sd $6, 264($sp)
-; MIPS64R5EL-NEXT:    lbu $1, 265($sp)
-; MIPS64R5EL-NEXT:    sh $1, 34($sp)
-; MIPS64R5EL-NEXT:    lbu $1, 264($sp)
-; MIPS64R5EL-NEXT:    sh $1, 32($sp)
-; MIPS64R5EL-NEXT:    ld.h $w1, 32($sp)
-; MIPS64R5EL-NEXT:    copy_s.h $1, $w1[0]
-; MIPS64R5EL-NEXT:    copy_s.h $2, $w1[1]
-; MIPS64R5EL-NEXT:    sw $2, 152($sp)
-; MIPS64R5EL-NEXT:    sw $1, 144($sp)
-; MIPS64R5EL-NEXT:    ld.d $w1, 144($sp)
-; MIPS64R5EL-NEXT:    addv.d $w0, $w0, $w1
-; MIPS64R5EL-NEXT:    sd $7, 256($sp)
-; MIPS64R5EL-NEXT:    lbu $1, 257($sp)
-; MIPS64R5EL-NEXT:    sh $1, 50($sp)
-; MIPS64R5EL-NEXT:    lbu $1, 256($sp)
-; MIPS64R5EL-NEXT:    sh $1, 48($sp)
-; MIPS64R5EL-NEXT:    ld.h $w1, 48($sp)
-; MIPS64R5EL-NEXT:    copy_s.h $1, $w1[0]
-; MIPS64R5EL-NEXT:    copy_s.h $2, $w1[1]
-; MIPS64R5EL-NEXT:    sw $2, 168($sp)
-; MIPS64R5EL-NEXT:    sw $1, 160($sp)
-; MIPS64R5EL-NEXT:    ld.d $w1, 160($sp)
-; MIPS64R5EL-NEXT:    addv.d $w0, $w0, $w1
-; MIPS64R5EL-NEXT:    sd $8, 248($sp)
-; MIPS64R5EL-NEXT:    lbu $1, 249($sp)
-; MIPS64R5EL-NEXT:    sh $1, 66($sp)
-; MIPS64R5EL-NEXT:    lbu $1, 248($sp)
-; MIPS64R5EL-NEXT:    sh $1, 64($sp)
-; MIPS64R5EL-NEXT:    ld.h $w1, 64($sp)
-; MIPS64R5EL-NEXT:    copy_s.h $1, $w1[0]
-; MIPS64R5EL-NEXT:    copy_s.h $2, $w1[1]
-; MIPS64R5EL-NEXT:    sw $2, 184($sp)
-; MIPS64R5EL-NEXT:    sw $1, 176($sp)
-; MIPS64R5EL-NEXT:    ld.d $w1, 176($sp)
-; MIPS64R5EL-NEXT:    addv.d $w0, $w0, $w1
-; MIPS64R5EL-NEXT:    sd $10, 232($sp)
-; MIPS64R5EL-NEXT:    lbu $1, 233($sp)
-; MIPS64R5EL-NEXT:    sh $1, 98($sp)
-; MIPS64R5EL-NEXT:    lbu $1, 232($sp)
-; MIPS64R5EL-NEXT:    sh $1, 96($sp)
-; MIPS64R5EL-NEXT:    ld.h $w1, 96($sp)
-; MIPS64R5EL-NEXT:    copy_s.h $1, $w1[0]
-; MIPS64R5EL-NEXT:    copy_s.h $2, $w1[1]
-; MIPS64R5EL-NEXT:    sd $9, 240($sp)
-; MIPS64R5EL-NEXT:    lbu $3, 241($sp)
-; MIPS64R5EL-NEXT:    sh $3, 82($sp)
-; MIPS64R5EL-NEXT:    lbu $3, 240($sp)
-; MIPS64R5EL-NEXT:    sh $3, 80($sp)
-; MIPS64R5EL-NEXT:    ld.h $w1, 80($sp)
-; MIPS64R5EL-NEXT:    copy_s.h $3, $w1[0]
-; MIPS64R5EL-NEXT:    copy_s.h $4, $w1[1]
-; MIPS64R5EL-NEXT:    sw $4, 200($sp)
-; MIPS64R5EL-NEXT:    sw $3, 192($sp)
-; MIPS64R5EL-NEXT:    ld.d $w1, 192($sp)
-; MIPS64R5EL-NEXT:    addv.d $w0, $w0, $w1
-; MIPS64R5EL-NEXT:    sw $2, 216($sp)
-; MIPS64R5EL-NEXT:    sw $1, 208($sp)
-; MIPS64R5EL-NEXT:    ld.d $w1, 208($sp)
-; MIPS64R5EL-NEXT:    addv.d $w0, $w0, $w1
-; MIPS64R5EL-NEXT:    copy_s.d $1, $w0[0]
-; MIPS64R5EL-NEXT:    copy_s.d $2, $w0[1]
-; MIPS64R5EL-NEXT:    sb $2, 229($sp)
-; MIPS64R5EL-NEXT:    sb $1, 228($sp)
-; MIPS64R5EL-NEXT:    lh $2, 228($sp)
-; MIPS64R5EL-NEXT:    daddiu $sp, $sp, 288
-; MIPS64R5EL-NEXT:    jr $ra
-; MIPS64R5EL-NEXT:    nop
 entry:
   %0 = add <2 x i8> %a, %b
   %1 = add <2 x i8> %0, %c
@@ -2727,6 +2461,8 @@ define void @double_2(<2 x double> %a, <2 x double> %b) {
 @gv4i16 = global <4 x i16> <i16 0, i16 1, i16 2, i16 3>
 @gv8i16 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
 
+ at gv2i24 = global <2 x i24> <i24 1, i24 2>
+
 @gv2i32 = global <2 x i32> <i32 0, i32 1>
 @gv4i32 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 
@@ -3479,12 +3215,8 @@ define void @call_i8_2() {
 ; MIPS32EB-NEXT:    addiu $5, $zero, 3080
 ; MIPS32EB-NEXT:    jal i8_2
 ; MIPS32EB-NEXT:    nop
-; MIPS32EB-NEXT:    srl $1, $2, 16
-; MIPS32EB-NEXT:    lui $3, %hi(gv2i8)
-; MIPS32EB-NEXT:    addiu $4, $3, %lo(gv2i8)
-; MIPS32EB-NEXT:    sb $1, 1($4)
-; MIPS32EB-NEXT:    srl $1, $2, 24
-; MIPS32EB-NEXT:    sb $1, %lo(gv2i8)($3)
+; MIPS32EB-NEXT:    lui $1, %hi(gv2i8)
+; MIPS32EB-NEXT:    sh $2, %lo(gv2i8)($1)
 ; MIPS32EB-NEXT:    lw $ra, 20($sp) # 4-byte Folded Reload
 ; MIPS32EB-NEXT:    addiu $sp, $sp, 24
 ; MIPS32EB-NEXT:    jr $ra
@@ -3506,11 +3238,8 @@ define void @call_i8_2() {
 ; MIPS64EB-NEXT:    daddiu $5, $zero, 3080
 ; MIPS64EB-NEXT:    jalr $25
 ; MIPS64EB-NEXT:    nop
-; MIPS64EB-NEXT:    dsrl $1, $2, 48
-; MIPS64EB-NEXT:    ld $3, %got_disp(gv2i8)($gp)
-; MIPS64EB-NEXT:    sb $1, 1($3)
-; MIPS64EB-NEXT:    dsrl $1, $2, 56
-; MIPS64EB-NEXT:    sb $1, 0($3)
+; MIPS64EB-NEXT:    ld $1, %got_disp(gv2i8)($gp)
+; MIPS64EB-NEXT:    sh $2, 0($1)
 ; MIPS64EB-NEXT:    ld $gp, 0($sp) # 8-byte Folded Reload
 ; MIPS64EB-NEXT:    ld $ra, 8($sp) # 8-byte Folded Reload
 ; MIPS64EB-NEXT:    daddiu $sp, $sp, 16
@@ -3527,9 +3256,7 @@ define void @call_i8_2() {
 ; MIPS32R5EB-NEXT:    addiu $5, $zero, 3080
 ; MIPS32R5EB-NEXT:    jal i8_2
 ; MIPS32R5EB-NEXT:    nop
-; MIPS32R5EB-NEXT:    sw $2, 16($sp)
 ; MIPS32R5EB-NEXT:    lui $1, %hi(gv2i8)
-; MIPS32R5EB-NEXT:    lhu $2, 16($sp)
 ; MIPS32R5EB-NEXT:    sh $2, %lo(gv2i8)($1)
 ; MIPS32R5EB-NEXT:    lw $ra, 28($sp) # 4-byte Folded Reload
 ; MIPS32R5EB-NEXT:    addiu $sp, $sp, 32
@@ -3538,43 +3265,29 @@ define void @call_i8_2() {
 ;
 ; MIPS64R5EB-LABEL: call_i8_2:
 ; MIPS64R5EB:       # %bb.0: # %entry
-; MIPS64R5EB-NEXT:    daddiu $sp, $sp, -64
-; MIPS64R5EB-NEXT:    .cfi_def_cfa_offset 64
-; MIPS64R5EB-NEXT:    sd $ra, 56($sp) # 8-byte Folded Spill
-; MIPS64R5EB-NEXT:    sd $gp, 48($sp) # 8-byte Folded Spill
+; MIPS64R5EB-NEXT:    daddiu $sp, $sp, -32
+; MIPS64R5EB-NEXT:    .cfi_def_cfa_offset 32
+; MIPS64R5EB-NEXT:    sd $ra, 24($sp) # 8-byte Folded Spill
+; MIPS64R5EB-NEXT:    sd $gp, 16($sp) # 8-byte Folded Spill
 ; MIPS64R5EB-NEXT:    .cfi_offset 31, -8
 ; MIPS64R5EB-NEXT:    .cfi_offset 28, -16
 ; MIPS64R5EB-NEXT:    lui $1, %hi(%neg(%gp_rel(call_i8_2)))
 ; MIPS64R5EB-NEXT:    daddu $1, $1, $25
 ; MIPS64R5EB-NEXT:    daddiu $gp, $1, %lo(%neg(%gp_rel(call_i8_2)))
 ; MIPS64R5EB-NEXT:    addiu $1, $zero, 1543
-; MIPS64R5EB-NEXT:    sh $1, 40($sp)
+; MIPS64R5EB-NEXT:    sh $1, 8($sp)
 ; MIPS64R5EB-NEXT:    addiu $1, $zero, 3080
-; MIPS64R5EB-NEXT:    sh $1, 44($sp)
+; MIPS64R5EB-NEXT:    sh $1, 12($sp)
 ; MIPS64R5EB-NEXT:    ld $25, %call16(i8_2)($gp)
-; MIPS64R5EB-NEXT:    lh $4, 40($sp)
-; MIPS64R5EB-NEXT:    lh $5, 44($sp)
+; MIPS64R5EB-NEXT:    lh $4, 8($sp)
+; MIPS64R5EB-NEXT:    lh $5, 12($sp)
 ; MIPS64R5EB-NEXT:    jalr $25
 ; MIPS64R5EB-NEXT:    nop
-; MIPS64R5EB-NEXT:    sd $2, 32($sp)
-; MIPS64R5EB-NEXT:    lbu $1, 33($sp)
-; MIPS64R5EB-NEXT:    sh $1, 2($sp)
-; MIPS64R5EB-NEXT:    lbu $1, 32($sp)
-; MIPS64R5EB-NEXT:    sh $1, 0($sp)
-; MIPS64R5EB-NEXT:    ld.h $w0, 0($sp)
-; MIPS64R5EB-NEXT:    copy_s.h $1, $w0[0]
-; MIPS64R5EB-NEXT:    copy_s.h $2, $w0[1]
-; MIPS64R5EB-NEXT:    sw $2, 28($sp)
-; MIPS64R5EB-NEXT:    sw $1, 20($sp)
-; MIPS64R5EB-NEXT:    ld.d $w0, 16($sp)
-; MIPS64R5EB-NEXT:    copy_s.d $1, $w0[0]
-; MIPS64R5EB-NEXT:    copy_s.d $2, $w0[1]
-; MIPS64R5EB-NEXT:    ld $3, %got_disp(gv2i8)($gp)
-; MIPS64R5EB-NEXT:    sb $2, 1($3)
-; MIPS64R5EB-NEXT:    sb $1, 0($3)
-; MIPS64R5EB-NEXT:    ld $gp, 48($sp) # 8-byte Folded Reload
-; MIPS64R5EB-NEXT:    ld $ra, 56($sp) # 8-byte Folded Reload
-; MIPS64R5EB-NEXT:    daddiu $sp, $sp, 64
+; MIPS64R5EB-NEXT:    ld $1, %got_disp(gv2i8)($gp)
+; MIPS64R5EB-NEXT:    sh $2, 0($1)
+; MIPS64R5EB-NEXT:    ld $gp, 16($sp) # 8-byte Folded Reload
+; MIPS64R5EB-NEXT:    ld $ra, 24($sp) # 8-byte Folded Reload
+; MIPS64R5EB-NEXT:    daddiu $sp, $sp, 32
 ; MIPS64R5EB-NEXT:    jr $ra
 ; MIPS64R5EB-NEXT:    nop
 ;
@@ -3589,10 +3302,7 @@ define void @call_i8_2() {
 ; MIPS32EL-NEXT:    jal i8_2
 ; MIPS32EL-NEXT:    nop
 ; MIPS32EL-NEXT:    lui $1, %hi(gv2i8)
-; MIPS32EL-NEXT:    sb $2, %lo(gv2i8)($1)
-; MIPS32EL-NEXT:    srl $2, $2, 8
-; MIPS32EL-NEXT:    addiu $1, $1, %lo(gv2i8)
-; MIPS32EL-NEXT:    sb $2, 1($1)
+; MIPS32EL-NEXT:    sh $2, %lo(gv2i8)($1)
 ; MIPS32EL-NEXT:    lw $ra, 20($sp) # 4-byte Folded Reload
 ; MIPS32EL-NEXT:    addiu $sp, $sp, 24
 ; MIPS32EL-NEXT:    jr $ra
@@ -3614,11 +3324,8 @@ define void @call_i8_2() {
 ; MIPS64EL-NEXT:    daddiu $5, $zero, 2060
 ; MIPS64EL-NEXT:    jalr $25
 ; MIPS64EL-NEXT:    nop
-; MIPS64EL-NEXT:    sll $1, $2, 0
-; MIPS64EL-NEXT:    ld $2, %got_disp(gv2i8)($gp)
-; MIPS64EL-NEXT:    sb $1, 0($2)
-; MIPS64EL-NEXT:    srl $1, $1, 8
-; MIPS64EL-NEXT:    sb $1, 1($2)
+; MIPS64EL-NEXT:    ld $1, %got_disp(gv2i8)($gp)
+; MIPS64EL-NEXT:    sh $2, 0($1)
 ; MIPS64EL-NEXT:    ld $gp, 0($sp) # 8-byte Folded Reload
 ; MIPS64EL-NEXT:    ld $ra, 8($sp) # 8-byte Folded Reload
 ; MIPS64EL-NEXT:    daddiu $sp, $sp, 16
@@ -3635,9 +3342,7 @@ define void @call_i8_2() {
 ; MIPS32R5EL-NEXT:    addiu $5, $zero, 2060
 ; MIPS32R5EL-NEXT:    jal i8_2
 ; MIPS32R5EL-NEXT:    nop
-; MIPS32R5EL-NEXT:    sw $2, 16($sp)
 ; MIPS32R5EL-NEXT:    lui $1, %hi(gv2i8)
-; MIPS32R5EL-NEXT:    lhu $2, 16($sp)
 ; MIPS32R5EL-NEXT:    sh $2, %lo(gv2i8)($1)
 ; MIPS32R5EL-NEXT:    lw $ra, 28($sp) # 4-byte Folded Reload
 ; MIPS32R5EL-NEXT:    addiu $sp, $sp, 32
@@ -3646,43 +3351,29 @@ define void @call_i8_2() {
 ;
 ; MIPS64R5EL-LABEL: call_i8_2:
 ; MIPS64R5EL:       # %bb.0: # %entry
-; MIPS64R5EL-NEXT:    daddiu $sp, $sp, -64
-; MIPS64R5EL-NEXT:    .cfi_def_cfa_offset 64
-; MIPS64R5EL-NEXT:    sd $ra, 56($sp) # 8-byte Folded Spill
-; MIPS64R5EL-NEXT:    sd $gp, 48($sp) # 8-byte Folded Spill
+; MIPS64R5EL-NEXT:    daddiu $sp, $sp, -32
+; MIPS64R5EL-NEXT:    .cfi_def_cfa_offset 32
+; MIPS64R5EL-NEXT:    sd $ra, 24($sp) # 8-byte Folded Spill
+; MIPS64R5EL-NEXT:    sd $gp, 16($sp) # 8-byte Folded Spill
 ; MIPS64R5EL-NEXT:    .cfi_offset 31, -8
 ; MIPS64R5EL-NEXT:    .cfi_offset 28, -16
 ; MIPS64R5EL-NEXT:    lui $1, %hi(%neg(%gp_rel(call_i8_2)))
 ; MIPS64R5EL-NEXT:    daddu $1, $1, $25
 ; MIPS64R5EL-NEXT:    daddiu $gp, $1, %lo(%neg(%gp_rel(call_i8_2)))
 ; MIPS64R5EL-NEXT:    addiu $1, $zero, 1798
-; MIPS64R5EL-NEXT:    sh $1, 40($sp)
+; MIPS64R5EL-NEXT:    sh $1, 8($sp)
 ; MIPS64R5EL-NEXT:    addiu $1, $zero, 2060
-; MIPS64R5EL-NEXT:    sh $1, 44($sp)
+; MIPS64R5EL-NEXT:    sh $1, 12($sp)
 ; MIPS64R5EL-NEXT:    ld $25, %call16(i8_2)($gp)
-; MIPS64R5EL-NEXT:    lh $4, 40($sp)
-; MIPS64R5EL-NEXT:    lh $5, 44($sp)
+; MIPS64R5EL-NEXT:    lh $4, 8($sp)
+; MIPS64R5EL-NEXT:    lh $5, 12($sp)
 ; MIPS64R5EL-NEXT:    jalr $25
 ; MIPS64R5EL-NEXT:    nop
-; MIPS64R5EL-NEXT:    sd $2, 32($sp)
-; MIPS64R5EL-NEXT:    lbu $1, 33($sp)
-; MIPS64R5EL-NEXT:    sh $1, 2($sp)
-; MIPS64R5EL-NEXT:    lbu $1, 32($sp)
-; MIPS64R5EL-NEXT:    sh $1, 0($sp)
-; MIPS64R5EL-NEXT:    ld.h $w0, 0($sp)
-; MIPS64R5EL-NEXT:    copy_s.h $1, $w0[0]
-; MIPS64R5EL-NEXT:    copy_s.h $2, $w0[1]
-; MIPS64R5EL-NEXT:    sw $2, 24($sp)
-; MIPS64R5EL-NEXT:    sw $1, 16($sp)
-; MIPS64R5EL-NEXT:    ld.d $w0, 16($sp)
-; MIPS64R5EL-NEXT:    copy_s.d $1, $w0[0]
-; MIPS64R5EL-NEXT:    copy_s.d $2, $w0[1]
-; MIPS64R5EL-NEXT:    ld $3, %got_disp(gv2i8)($gp)
-; MIPS64R5EL-NEXT:    sb $2, 1($3)
-; MIPS64R5EL-NEXT:    sb $1, 0($3)
-; MIPS64R5EL-NEXT:    ld $gp, 48($sp) # 8-byte Folded Reload
-; MIPS64R5EL-NEXT:    ld $ra, 56($sp) # 8-byte Folded Reload
-; MIPS64R5EL-NEXT:    daddiu $sp, $sp, 64
+; MIPS64R5EL-NEXT:    ld $1, %got_disp(gv2i8)($gp)
+; MIPS64R5EL-NEXT:    sh $2, 0($1)
+; MIPS64R5EL-NEXT:    ld $gp, 16($sp) # 8-byte Folded Reload
+; MIPS64R5EL-NEXT:    ld $ra, 24($sp) # 8-byte Folded Reload
+; MIPS64R5EL-NEXT:    daddiu $sp, $sp, 32
 ; MIPS64R5EL-NEXT:    jr $ra
 ; MIPS64R5EL-NEXT:    nop
 entry:


        


More information about the llvm-commits mailing list