[llvm] e49103b - [Mips] Fix argument lowering for illegal vector types (PR63608)

Nikita Popov via llvm-commits llvm-commits at lists.llvm.org
Mon Jul 24 03:07:18 PDT 2023


Author: Nikita Popov
Date: 2023-07-24T12:07:09+02:00
New Revision: e49103b2790f346b436b280a2da7a1c6a9f2ab3d

URL: https://github.com/llvm/llvm-project/commit/e49103b2790f346b436b280a2da7a1c6a9f2ab3d
DIFF: https://github.com/llvm/llvm-project/commit/e49103b2790f346b436b280a2da7a1c6a9f2ab3d.diff

LOG: [Mips] Fix argument lowering for illegal vector types (PR63608)

The Mips MSA ABI requires that legal vector types are passed in
scalar registers in packed representation. E.g. a type like v16i8
would be passed as two i64 registers.

The implementation attempts to do the same for illegal vectors with
non-power-of-two element counts or non-power-of-two element types.
However, the SDAG argument lowering code doesn't support this, and
it is not easy to extend it to support this (we would have to deal
with situations like passing v7i18 as two i64 values).

This patch instead opts to restrict the special argument lowering
to only vectors with power-of-two elements and round element types.
Everything else is lowered naively, that is by passing each element
in promoted registers.

Fixes https://github.com/llvm/llvm-project/issues/63608.

Differential Revision: https://reviews.llvm.org/D154445

Added: 
    llvm/test/CodeGen/Mips/cconv/illegal-vectors.ll

Modified: 
    llvm/lib/Target/Mips/MipsISelLowering.cpp
    llvm/test/CodeGen/Mips/cconv/vector.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/Mips/MipsISelLowering.cpp b/llvm/lib/Target/Mips/MipsISelLowering.cpp
index 2851dac53bdf42..18d7773067f137 100644
--- a/llvm/lib/Target/Mips/MipsISelLowering.cpp
+++ b/llvm/lib/Target/Mips/MipsISelLowering.cpp
@@ -102,29 +102,37 @@ MVT MipsTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
   if (!VT.isVector())
     return getRegisterType(Context, VT);
 
-  return Subtarget.isABI_O32() || VT.getSizeInBits() == 32 ? MVT::i32
-                                                           : MVT::i64;
+  if (VT.isPow2VectorType() && VT.getVectorElementType().isRound())
+    return Subtarget.isABI_O32() || VT.getSizeInBits() == 32 ? MVT::i32
+                                                             : MVT::i64;
+  return getRegisterType(Context, VT.getVectorElementType());
 }
 
 unsigned MipsTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
                                                            CallingConv::ID CC,
                                                            EVT VT) const {
-  if (VT.isVector())
-    return divideCeil(VT.getSizeInBits(), Subtarget.isABI_O32() ? 32 : 64);
+  if (VT.isVector()) {
+    if (VT.isPow2VectorType() && VT.getVectorElementType().isRound())
+      return divideCeil(VT.getSizeInBits(), Subtarget.isABI_O32() ? 32 : 64);
+    return VT.getVectorNumElements() *
+           getNumRegisters(Context, VT.getVectorElementType());
+  }
   return MipsTargetLowering::getNumRegisters(Context, VT);
 }
 
 unsigned MipsTargetLowering::getVectorTypeBreakdownForCallingConv(
     LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
     unsigned &NumIntermediates, MVT &RegisterVT) const {
-  // Break down vector types to either 2 i64s or 4 i32s.
-  RegisterVT = getRegisterTypeForCallingConv(Context, CC, VT);
-  IntermediateVT = RegisterVT;
-  NumIntermediates =
-      VT.getFixedSizeInBits() < RegisterVT.getFixedSizeInBits()
-          ? VT.getVectorNumElements()
-          : divideCeil(VT.getSizeInBits(), RegisterVT.getSizeInBits());
-  return NumIntermediates;
+  if (VT.isPow2VectorType()) {
+    IntermediateVT = getRegisterTypeForCallingConv(Context, CC, VT);
+    RegisterVT = IntermediateVT.getSimpleVT();
+    NumIntermediates = getNumRegistersForCallingConv(Context, CC, VT);
+    return NumIntermediates;
+  }
+  IntermediateVT = VT.getVectorElementType();
+  NumIntermediates = VT.getVectorNumElements();
+  RegisterVT = getRegisterType(Context, IntermediateVT);
+  return NumIntermediates * getNumRegisters(Context, IntermediateVT);
 }
 
 SDValue MipsTargetLowering::getGlobalReg(SelectionDAG &DAG, EVT Ty) const {

diff  --git a/llvm/test/CodeGen/Mips/cconv/illegal-vectors.ll b/llvm/test/CodeGen/Mips/cconv/illegal-vectors.ll
new file mode 100644
index 00000000000000..5cb5972f677536
--- /dev/null
+++ b/llvm/test/CodeGen/Mips/cconv/illegal-vectors.ll
@@ -0,0 +1,1809 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mtriple=mips64 < %s | FileCheck %s --check-prefix=MIPS64
+; RUN: llc -mtriple=mips < %s | FileCheck %s --check-prefix=MIPS32
+
+define void @arg_v1i32(<1 x i32> %vec, ptr %p) {
+; MIPS64-LABEL: arg_v1i32:
+; MIPS64:       # %bb.0:
+; MIPS64-NEXT:    jr $ra
+; MIPS64-NEXT:    sw $4, 0($5)
+;
+; MIPS32-LABEL: arg_v1i32:
+; MIPS32:       # %bb.0:
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    sw $4, 0($5)
+  store <1 x i32> %vec, ptr %p
+  ret void
+}
+
+define <1 x i32> @ret_v1i32(ptr %p) {
+; MIPS64-LABEL: ret_v1i32:
+; MIPS64:       # %bb.0:
+; MIPS64-NEXT:    lw $2, 0($4)
+; MIPS64-NEXT:    jr $ra
+; MIPS64-NEXT:    nop
+;
+; MIPS32-LABEL: ret_v1i32:
+; MIPS32:       # %bb.0:
+; MIPS32-NEXT:    lw $2, 0($4)
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
+  %v = load <1 x i32>, ptr %p
+  ret <1 x i32> %v
+}
+
+define void @call_v1i32(ptr %p) nounwind {
+; MIPS64-LABEL: call_v1i32:
+; MIPS64:       # %bb.0:
+; MIPS64-NEXT:    daddiu $sp, $sp, -16
+; MIPS64-NEXT:    sd $ra, 8($sp) # 8-byte Folded Spill
+; MIPS64-NEXT:    sd $16, 0($sp) # 8-byte Folded Spill
+; MIPS64-NEXT:    move $16, $4
+; MIPS64-NEXT:    lw $4, 0($4)
+; MIPS64-NEXT:    jal arg_v1i32
+; MIPS64-NEXT:    nop
+; MIPS64-NEXT:    jal ret_v1i32
+; MIPS64-NEXT:    nop
+; MIPS64-NEXT:    sw $2, 0($16)
+; MIPS64-NEXT:    ld $16, 0($sp) # 8-byte Folded Reload
+; MIPS64-NEXT:    ld $ra, 8($sp) # 8-byte Folded Reload
+; MIPS64-NEXT:    jr $ra
+; MIPS64-NEXT:    daddiu $sp, $sp, 16
+;
+; MIPS32-LABEL: call_v1i32:
+; MIPS32:       # %bb.0:
+; MIPS32-NEXT:    addiu $sp, $sp, -24
+; MIPS32-NEXT:    sw $ra, 20($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    sw $16, 16($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    move $16, $4
+; MIPS32-NEXT:    lw $4, 0($4)
+; MIPS32-NEXT:    jal arg_v1i32
+; MIPS32-NEXT:    nop
+; MIPS32-NEXT:    jal ret_v1i32
+; MIPS32-NEXT:    nop
+; MIPS32-NEXT:    sw $2, 0($16)
+; MIPS32-NEXT:    lw $16, 16($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    lw $ra, 20($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    addiu $sp, $sp, 24
+  %v1 = load <1 x i32>, ptr %p
+  call void @arg_v1i32(<1 x i32> %v1)
+  %v2 = call <1 x i32> @ret_v1i32()
+  store <1 x i32> %v2, ptr %p
+  ret void
+}
+
+define void @arg_v2i32(<2 x i32> %vec, ptr %p) {
+; MIPS64-LABEL: arg_v2i32:
+; MIPS64:       # %bb.0:
+; MIPS64-NEXT:    jr $ra
+; MIPS64-NEXT:    sd $4, 0($5)
+;
+; MIPS32-LABEL: arg_v2i32:
+; MIPS32:       # %bb.0:
+; MIPS32-NEXT:    sw $5, 4($6)
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    sw $4, 0($6)
+  store <2 x i32> %vec, ptr %p
+  ret void
+}
+
+define <2 x i32> @ret_v2i32(ptr %p) {
+; MIPS64-LABEL: ret_v2i32:
+; MIPS64:       # %bb.0:
+; MIPS64-NEXT:    ld $2, 0($4)
+; MIPS64-NEXT:    jr $ra
+; MIPS64-NEXT:    nop
+;
+; MIPS32-LABEL: ret_v2i32:
+; MIPS32:       # %bb.0:
+; MIPS32-NEXT:    lw $2, 0($4)
+; MIPS32-NEXT:    lw $3, 4($4)
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
+  %v = load <2 x i32>, ptr %p
+  ret <2 x i32> %v
+}
+
+define void @call_v2i32(ptr %p) nounwind {
+; MIPS64-LABEL: call_v2i32:
+; MIPS64:       # %bb.0:
+; MIPS64-NEXT:    daddiu $sp, $sp, -16
+; MIPS64-NEXT:    sd $ra, 8($sp) # 8-byte Folded Spill
+; MIPS64-NEXT:    sd $16, 0($sp) # 8-byte Folded Spill
+; MIPS64-NEXT:    move $16, $4
+; MIPS64-NEXT:    ld $4, 0($4)
+; MIPS64-NEXT:    jal arg_v2i32
+; MIPS64-NEXT:    nop
+; MIPS64-NEXT:    jal ret_v2i32
+; MIPS64-NEXT:    nop
+; MIPS64-NEXT:    sd $2, 0($16)
+; MIPS64-NEXT:    ld $16, 0($sp) # 8-byte Folded Reload
+; MIPS64-NEXT:    ld $ra, 8($sp) # 8-byte Folded Reload
+; MIPS64-NEXT:    jr $ra
+; MIPS64-NEXT:    daddiu $sp, $sp, 16
+;
+; MIPS32-LABEL: call_v2i32:
+; MIPS32:       # %bb.0:
+; MIPS32-NEXT:    addiu $sp, $sp, -24
+; MIPS32-NEXT:    sw $ra, 20($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    sw $16, 16($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    move $16, $4
+; MIPS32-NEXT:    lw $5, 4($4)
+; MIPS32-NEXT:    lw $4, 0($4)
+; MIPS32-NEXT:    jal arg_v2i32
+; MIPS32-NEXT:    nop
+; MIPS32-NEXT:    jal ret_v2i32
+; MIPS32-NEXT:    nop
+; MIPS32-NEXT:    sw $3, 4($16)
+; MIPS32-NEXT:    sw $2, 0($16)
+; MIPS32-NEXT:    lw $16, 16($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    lw $ra, 20($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    addiu $sp, $sp, 24
+  %v1 = load <2 x i32>, ptr %p
+  call void @arg_v2i32(<2 x i32> %v1)
+  %v2 = call <2 x i32> @ret_v2i32()
+  store <2 x i32> %v2, ptr %p
+  ret void
+}
+
+define <3 x i32> @arg_v3i32(<3 x i32> %vec, ptr %p) {
+; MIPS64-LABEL: arg_v3i32:
+; MIPS64:       # %bb.0:
+; MIPS64-NEXT:    daddiu $1, $zero, 1
+; MIPS64-NEXT:    dsll $1, $1, 32
+; MIPS64-NEXT:    daddiu $2, $1, -1
+; MIPS64-NEXT:    sll $1, $6, 0
+; MIPS64-NEXT:    sw $1, 8($7)
+; MIPS64-NEXT:    and $2, $5, $2
+; MIPS64-NEXT:    dsll $3, $4, 32
+; MIPS64-NEXT:    or $2, $2, $3
+; MIPS64-NEXT:    sd $2, 0($7)
+; MIPS64-NEXT:    sll $2, $4, 0
+; MIPS64-NEXT:    sll $3, $5, 0
+; MIPS64-NEXT:    jr $ra
+; MIPS64-NEXT:    move $4, $1
+;
+; MIPS32-LABEL: arg_v3i32:
+; MIPS32:       # %bb.0:
+; MIPS32-NEXT:    sw $6, 8($7)
+; MIPS32-NEXT:    sw $5, 4($7)
+; MIPS32-NEXT:    sw $4, 0($7)
+; MIPS32-NEXT:    move $2, $4
+; MIPS32-NEXT:    move $3, $5
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    move $4, $6
+  store <3 x i32> %vec, ptr %p
+  ret <3 x i32> %vec
+}
+
+define <3 x i32> @ret_v3i32(ptr %p) {
+; MIPS64-LABEL: ret_v3i32:
+; MIPS64:       # %bb.0:
+; MIPS64-NEXT:    lw $1, 8($4)
+; MIPS64-NEXT:    ld $2, 0($4)
+; MIPS64-NEXT:    sll $3, $2, 0
+; MIPS64-NEXT:    dsrl $2, $2, 32
+; MIPS64-NEXT:    sll $2, $2, 0
+; MIPS64-NEXT:    jr $ra
+; MIPS64-NEXT:    move $4, $1
+;
+; MIPS32-LABEL: ret_v3i32:
+; MIPS32:       # %bb.0:
+; MIPS32-NEXT:    lw $2, 0($4)
+; MIPS32-NEXT:    lw $3, 4($4)
+; MIPS32-NEXT:    lw $4, 8($4)
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
+  %v = load <3 x i32>, ptr %p
+  ret <3 x i32> %v
+}
+
+define void @call_v3i32(ptr %p) nounwind {
+; MIPS64-LABEL: call_v3i32:
+; MIPS64:       # %bb.0:
+; MIPS64-NEXT:    daddiu $sp, $sp, -16
+; MIPS64-NEXT:    sd $ra, 8($sp) # 8-byte Folded Spill
+; MIPS64-NEXT:    sd $16, 0($sp) # 8-byte Folded Spill
+; MIPS64-NEXT:    move $16, $4
+; MIPS64-NEXT:    lw $6, 8($4)
+; MIPS64-NEXT:    ld $5, 0($4)
+; MIPS64-NEXT:    jal arg_v3i32
+; MIPS64-NEXT:    dsrl $4, $5, 32
+; MIPS64-NEXT:    jal ret_v3i32
+; MIPS64-NEXT:    nop
+; MIPS64-NEXT:    # kill: def $v0 killed $v0 def $v0_64
+; MIPS64-NEXT:    sw $4, 8($16)
+; MIPS64-NEXT:    dsll $1, $2, 32
+; MIPS64-NEXT:    dsll $2, $3, 32
+; MIPS64-NEXT:    dsrl $2, $2, 32
+; MIPS64-NEXT:    or $1, $2, $1
+; MIPS64-NEXT:    sd $1, 0($16)
+; MIPS64-NEXT:    ld $16, 0($sp) # 8-byte Folded Reload
+; MIPS64-NEXT:    ld $ra, 8($sp) # 8-byte Folded Reload
+; MIPS64-NEXT:    jr $ra
+; MIPS64-NEXT:    daddiu $sp, $sp, 16
+;
+; MIPS32-LABEL: call_v3i32:
+; MIPS32:       # %bb.0:
+; MIPS32-NEXT:    addiu $sp, $sp, -24
+; MIPS32-NEXT:    sw $ra, 20($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    sw $16, 16($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    move $16, $4
+; MIPS32-NEXT:    lw $6, 8($4)
+; MIPS32-NEXT:    lw $5, 4($4)
+; MIPS32-NEXT:    lw $4, 0($4)
+; MIPS32-NEXT:    jal arg_v3i32
+; MIPS32-NEXT:    nop
+; MIPS32-NEXT:    jal ret_v3i32
+; MIPS32-NEXT:    nop
+; MIPS32-NEXT:    sw $4, 8($16)
+; MIPS32-NEXT:    sw $3, 4($16)
+; MIPS32-NEXT:    sw $2, 0($16)
+; MIPS32-NEXT:    lw $16, 16($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    lw $ra, 20($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    addiu $sp, $sp, 24
+  %v1 = load <3 x i32>, ptr %p
+  call void @arg_v3i32(<3 x i32> %v1)
+  %v2 = call <3 x i32> @ret_v3i32()
+  store <3 x i32> %v2, ptr %p
+  ret void
+}
+
+define void @arg_v4i32(<4 x i32> %vec, ptr %p) {
+; MIPS64-LABEL: arg_v4i32:
+; MIPS64:       # %bb.0:
+; MIPS64-NEXT:    sd $5, 8($6)
+; MIPS64-NEXT:    jr $ra
+; MIPS64-NEXT:    sd $4, 0($6)
+;
+; MIPS32-LABEL: arg_v4i32:
+; MIPS32:       # %bb.0:
+; MIPS32-NEXT:    lw $1, 16($sp)
+; MIPS32-NEXT:    sw $7, 12($1)
+; MIPS32-NEXT:    sw $6, 8($1)
+; MIPS32-NEXT:    sw $5, 4($1)
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    sw $4, 0($1)
+  store <4 x i32> %vec, ptr %p
+  ret void
+}
+
+define <4 x i32> @ret_v4i32(ptr %p) {
+; MIPS64-LABEL: ret_v4i32:
+; MIPS64:       # %bb.0:
+; MIPS64-NEXT:    ld $2, 0($4)
+; MIPS64-NEXT:    ld $3, 8($4)
+; MIPS64-NEXT:    jr $ra
+; MIPS64-NEXT:    nop
+;
+; MIPS32-LABEL: ret_v4i32:
+; MIPS32:       # %bb.0:
+; MIPS32-NEXT:    lw $2, 0($4)
+; MIPS32-NEXT:    lw $3, 4($4)
+; MIPS32-NEXT:    lw $1, 8($4)
+; MIPS32-NEXT:    lw $5, 12($4)
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    move $4, $1
+  %v = load <4 x i32>, ptr %p
+  ret <4 x i32> %v
+}
+
+define void @call_v4i32(ptr %p) nounwind {
+; MIPS64-LABEL: call_v4i32:
+; MIPS64:       # %bb.0:
+; MIPS64-NEXT:    daddiu $sp, $sp, -16
+; MIPS64-NEXT:    sd $ra, 8($sp) # 8-byte Folded Spill
+; MIPS64-NEXT:    sd $16, 0($sp) # 8-byte Folded Spill
+; MIPS64-NEXT:    move $16, $4
+; MIPS64-NEXT:    ld $5, 8($4)
+; MIPS64-NEXT:    ld $4, 0($4)
+; MIPS64-NEXT:    jal arg_v4i32
+; MIPS64-NEXT:    nop
+; MIPS64-NEXT:    jal ret_v4i32
+; MIPS64-NEXT:    nop
+; MIPS64-NEXT:    sd $3, 8($16)
+; MIPS64-NEXT:    sd $2, 0($16)
+; MIPS64-NEXT:    ld $16, 0($sp) # 8-byte Folded Reload
+; MIPS64-NEXT:    ld $ra, 8($sp) # 8-byte Folded Reload
+; MIPS64-NEXT:    jr $ra
+; MIPS64-NEXT:    daddiu $sp, $sp, 16
+;
+; MIPS32-LABEL: call_v4i32:
+; MIPS32:       # %bb.0:
+; MIPS32-NEXT:    addiu $sp, $sp, -24
+; MIPS32-NEXT:    sw $ra, 20($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    sw $16, 16($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    move $16, $4
+; MIPS32-NEXT:    lw $7, 12($4)
+; MIPS32-NEXT:    lw $6, 8($4)
+; MIPS32-NEXT:    lw $5, 4($4)
+; MIPS32-NEXT:    lw $4, 0($4)
+; MIPS32-NEXT:    jal arg_v4i32
+; MIPS32-NEXT:    nop
+; MIPS32-NEXT:    jal ret_v4i32
+; MIPS32-NEXT:    nop
+; MIPS32-NEXT:    sw $5, 12($16)
+; MIPS32-NEXT:    sw $4, 8($16)
+; MIPS32-NEXT:    sw $3, 4($16)
+; MIPS32-NEXT:    sw $2, 0($16)
+; MIPS32-NEXT:    lw $16, 16($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    lw $ra, 20($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    addiu $sp, $sp, 24
+  %v1 = load <4 x i32>, ptr %p
+  call void @arg_v4i32(<4 x i32> %v1)
+  %v2 = call <4 x i32> @ret_v4i32()
+  store <4 x i32> %v2, ptr %p
+  ret void
+}
+
+define void @arg_v5i32(<5 x i32> %vec, ptr %p) {
+; MIPS64-LABEL: arg_v5i32:
+; MIPS64:       # %bb.0:
+; MIPS64-NEXT:    daddiu $1, $zero, 1
+; MIPS64-NEXT:    dsll $1, $1, 32
+; MIPS64-NEXT:    daddiu $1, $1, -1
+; MIPS64-NEXT:    and $2, $7, $1
+; MIPS64-NEXT:    dsll $3, $6, 32
+; MIPS64-NEXT:    or $2, $2, $3
+; MIPS64-NEXT:    sw $8, 16($9)
+; MIPS64-NEXT:    sd $2, 8($9)
+; MIPS64-NEXT:    and $1, $5, $1
+; MIPS64-NEXT:    dsll $2, $4, 32
+; MIPS64-NEXT:    or $1, $1, $2
+; MIPS64-NEXT:    jr $ra
+; MIPS64-NEXT:    sd $1, 0($9)
+;
+; MIPS32-LABEL: arg_v5i32:
+; MIPS32:       # %bb.0:
+; MIPS32-NEXT:    lw $1, 20($sp)
+; MIPS32-NEXT:    lw $2, 16($sp)
+; MIPS32-NEXT:    sw $2, 16($1)
+; MIPS32-NEXT:    sw $7, 12($1)
+; MIPS32-NEXT:    sw $6, 8($1)
+; MIPS32-NEXT:    sw $5, 4($1)
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    sw $4, 0($1)
+  store <5 x i32> %vec, ptr %p
+  ret void
+}
+
+define <5 x i32> @ret_v5i32(ptr %p) {
+; MIPS64-LABEL: ret_v5i32:
+; MIPS64:       # %bb.0:
+; MIPS64-NEXT:    lw $1, 16($5)
+; MIPS64-NEXT:    sw $1, 16($4)
+; MIPS64-NEXT:    ld $1, 8($5)
+; MIPS64-NEXT:    sd $1, 8($4)
+; MIPS64-NEXT:    ld $1, 0($5)
+; MIPS64-NEXT:    jr $ra
+; MIPS64-NEXT:    sd $1, 0($4)
+;
+; MIPS32-LABEL: ret_v5i32:
+; MIPS32:       # %bb.0:
+; MIPS32-NEXT:    lw $1, 12($5)
+; MIPS32-NEXT:    lw $2, 16($5)
+; MIPS32-NEXT:    sw $2, 16($4)
+; MIPS32-NEXT:    sw $1, 12($4)
+; MIPS32-NEXT:    lw $1, 8($5)
+; MIPS32-NEXT:    sw $1, 8($4)
+; MIPS32-NEXT:    lw $1, 4($5)
+; MIPS32-NEXT:    sw $1, 4($4)
+; MIPS32-NEXT:    lw $1, 0($5)
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    sw $1, 0($4)
+  %v = load <5 x i32>, ptr %p
+  ret <5 x i32> %v
+}
+
+define void @call_v5i32(ptr %p) nounwind {
+; MIPS64-LABEL: call_v5i32:
+; MIPS64:       # %bb.0:
+; MIPS64-NEXT:    daddiu $sp, $sp, -64
+; MIPS64-NEXT:    sd $ra, 56($sp) # 8-byte Folded Spill
+; MIPS64-NEXT:    sd $fp, 48($sp) # 8-byte Folded Spill
+; MIPS64-NEXT:    sd $16, 40($sp) # 8-byte Folded Spill
+; MIPS64-NEXT:    move $fp, $sp
+; MIPS64-NEXT:    daddiu $1, $zero, -32
+; MIPS64-NEXT:    and $sp, $sp, $1
+; MIPS64-NEXT:    move $16, $4
+; MIPS64-NEXT:    lw $8, 16($4)
+; MIPS64-NEXT:    ld $7, 8($4)
+; MIPS64-NEXT:    ld $5, 0($4)
+; MIPS64-NEXT:    dsrl $4, $5, 32
+; MIPS64-NEXT:    jal arg_v5i32
+; MIPS64-NEXT:    dsrl $6, $7, 32
+; MIPS64-NEXT:    jal ret_v5i32
+; MIPS64-NEXT:    daddiu $4, $sp, 0
+; MIPS64-NEXT:    lw $1, 16($sp)
+; MIPS64-NEXT:    ld $2, 0($sp)
+; MIPS64-NEXT:    sd $2, 0($16)
+; MIPS64-NEXT:    ld $2, 8($sp)
+; MIPS64-NEXT:    sd $2, 8($16)
+; MIPS64-NEXT:    sw $1, 16($16)
+; MIPS64-NEXT:    move $sp, $fp
+; MIPS64-NEXT:    ld $16, 40($sp) # 8-byte Folded Reload
+; MIPS64-NEXT:    ld $fp, 48($sp) # 8-byte Folded Reload
+; MIPS64-NEXT:    ld $ra, 56($sp) # 8-byte Folded Reload
+; MIPS64-NEXT:    jr $ra
+; MIPS64-NEXT:    daddiu $sp, $sp, 64
+;
+; MIPS32-LABEL: call_v5i32:
+; MIPS32:       # %bb.0:
+; MIPS32-NEXT:    addiu $sp, $sp, -96
+; MIPS32-NEXT:    sw $ra, 92($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    sw $fp, 88($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    sw $16, 84($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    move $fp, $sp
+; MIPS32-NEXT:    addiu $1, $zero, -32
+; MIPS32-NEXT:    and $sp, $sp, $1
+; MIPS32-NEXT:    move $16, $4
+; MIPS32-NEXT:    lw $7, 12($4)
+; MIPS32-NEXT:    lw $6, 8($4)
+; MIPS32-NEXT:    lw $5, 4($4)
+; MIPS32-NEXT:    lw $4, 0($4)
+; MIPS32-NEXT:    lw $1, 16($16)
+; MIPS32-NEXT:    jal arg_v5i32
+; MIPS32-NEXT:    sw $1, 16($sp)
+; MIPS32-NEXT:    jal ret_v5i32
+; MIPS32-NEXT:    addiu $4, $sp, 32
+; MIPS32-NEXT:    lw $1, 36($sp)
+; MIPS32-NEXT:    lw $2, 32($sp)
+; MIPS32-NEXT:    sw $2, 0($16)
+; MIPS32-NEXT:    sw $1, 4($16)
+; MIPS32-NEXT:    lw $1, 40($sp)
+; MIPS32-NEXT:    sw $1, 8($16)
+; MIPS32-NEXT:    lw $1, 44($sp)
+; MIPS32-NEXT:    sw $1, 12($16)
+; MIPS32-NEXT:    lw $1, 48($sp)
+; MIPS32-NEXT:    sw $1, 16($16)
+; MIPS32-NEXT:    move $sp, $fp
+; MIPS32-NEXT:    lw $16, 84($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    lw $fp, 88($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    lw $ra, 92($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    addiu $sp, $sp, 96
+  %v1 = load <5 x i32>, ptr %p
+  call void @arg_v5i32(<5 x i32> %v1)
+  %v2 = call <5 x i32> @ret_v5i32()
+  store <5 x i32> %v2, ptr %p
+  ret void
+}
+
+define void @arg_v8i32(<8 x i32> %vec, ptr %p) {
+; MIPS64-LABEL: arg_v8i32:
+; MIPS64:       # %bb.0:
+; MIPS64-NEXT:    sd $7, 24($8)
+; MIPS64-NEXT:    sd $6, 16($8)
+; MIPS64-NEXT:    sd $5, 8($8)
+; MIPS64-NEXT:    jr $ra
+; MIPS64-NEXT:    sd $4, 0($8)
+;
+; MIPS32-LABEL: arg_v8i32:
+; MIPS32:       # %bb.0:
+; MIPS32-NEXT:    lw $1, 16($sp)
+; MIPS32-NEXT:    lw $2, 20($sp)
+; MIPS32-NEXT:    lw $3, 24($sp)
+; MIPS32-NEXT:    lw $8, 32($sp)
+; MIPS32-NEXT:    lw $9, 28($sp)
+; MIPS32-NEXT:    sw $9, 28($8)
+; MIPS32-NEXT:    sw $3, 24($8)
+; MIPS32-NEXT:    sw $2, 20($8)
+; MIPS32-NEXT:    sw $1, 16($8)
+; MIPS32-NEXT:    sw $7, 12($8)
+; MIPS32-NEXT:    sw $6, 8($8)
+; MIPS32-NEXT:    sw $5, 4($8)
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    sw $4, 0($8)
+  store <8 x i32> %vec, ptr %p
+  ret void
+}
+
+define <8 x i32> @ret_v8i32(ptr %p) {
+; MIPS64-LABEL: ret_v8i32:
+; MIPS64:       # %bb.0:
+; MIPS64-NEXT:    ld $1, 24($5)
+; MIPS64-NEXT:    sd $1, 24($4)
+; MIPS64-NEXT:    ld $1, 16($5)
+; MIPS64-NEXT:    sd $1, 16($4)
+; MIPS64-NEXT:    ld $1, 8($5)
+; MIPS64-NEXT:    sd $1, 8($4)
+; MIPS64-NEXT:    ld $1, 0($5)
+; MIPS64-NEXT:    jr $ra
+; MIPS64-NEXT:    sd $1, 0($4)
+;
+; MIPS32-LABEL: ret_v8i32:
+; MIPS32:       # %bb.0:
+; MIPS32-NEXT:    lw $1, 12($5)
+; MIPS32-NEXT:    lw $2, 16($5)
+; MIPS32-NEXT:    lw $3, 20($5)
+; MIPS32-NEXT:    lw $6, 24($5)
+; MIPS32-NEXT:    lw $7, 28($5)
+; MIPS32-NEXT:    sw $7, 28($4)
+; MIPS32-NEXT:    sw $6, 24($4)
+; MIPS32-NEXT:    sw $3, 20($4)
+; MIPS32-NEXT:    sw $2, 16($4)
+; MIPS32-NEXT:    sw $1, 12($4)
+; MIPS32-NEXT:    lw $1, 8($5)
+; MIPS32-NEXT:    sw $1, 8($4)
+; MIPS32-NEXT:    lw $1, 4($5)
+; MIPS32-NEXT:    sw $1, 4($4)
+; MIPS32-NEXT:    lw $1, 0($5)
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    sw $1, 0($4)
+  %v = load <8 x i32>, ptr %p
+  ret <8 x i32> %v
+}
+
+define void @call_v8i32(ptr %p) nounwind {
+; MIPS64-LABEL: call_v8i32:
+; MIPS64:       # %bb.0:
+; MIPS64-NEXT:    daddiu $sp, $sp, -64
+; MIPS64-NEXT:    sd $ra, 56($sp) # 8-byte Folded Spill
+; MIPS64-NEXT:    sd $fp, 48($sp) # 8-byte Folded Spill
+; MIPS64-NEXT:    sd $16, 40($sp) # 8-byte Folded Spill
+; MIPS64-NEXT:    move $fp, $sp
+; MIPS64-NEXT:    daddiu $1, $zero, -32
+; MIPS64-NEXT:    move $16, $4
+; MIPS64-NEXT:    ld $7, 24($4)
+; MIPS64-NEXT:    ld $6, 16($4)
+; MIPS64-NEXT:    ld $5, 8($4)
+; MIPS64-NEXT:    ld $4, 0($4)
+; MIPS64-NEXT:    jal arg_v8i32
+; MIPS64-NEXT:    and $sp, $sp, $1
+; MIPS64-NEXT:    jal ret_v8i32
+; MIPS64-NEXT:    daddiu $4, $sp, 0
+; MIPS64-NEXT:    ld $1, 0($sp)
+; MIPS64-NEXT:    lw $2, 16($sp)
+; MIPS64-NEXT:    lw $3, 20($sp)
+; MIPS64-NEXT:    lw $4, 24($sp)
+; MIPS64-NEXT:    lw $5, 28($sp)
+; MIPS64-NEXT:    sw $5, 28($16)
+; MIPS64-NEXT:    sw $4, 24($16)
+; MIPS64-NEXT:    sw $3, 20($16)
+; MIPS64-NEXT:    sw $2, 16($16)
+; MIPS64-NEXT:    lw $2, 12($sp)
+; MIPS64-NEXT:    sw $2, 12($16)
+; MIPS64-NEXT:    lw $2, 8($sp)
+; MIPS64-NEXT:    sw $2, 8($16)
+; MIPS64-NEXT:    sd $1, 0($16)
+; MIPS64-NEXT:    move $sp, $fp
+; MIPS64-NEXT:    ld $16, 40($sp) # 8-byte Folded Reload
+; MIPS64-NEXT:    ld $fp, 48($sp) # 8-byte Folded Reload
+; MIPS64-NEXT:    ld $ra, 56($sp) # 8-byte Folded Reload
+; MIPS64-NEXT:    jr $ra
+; MIPS64-NEXT:    daddiu $sp, $sp, 64
+;
+; MIPS32-LABEL: call_v8i32:
+; MIPS32:       # %bb.0:
+; MIPS32-NEXT:    addiu $sp, $sp, -96
+; MIPS32-NEXT:    sw $ra, 92($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    sw $fp, 88($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    sw $16, 84($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    move $fp, $sp
+; MIPS32-NEXT:    addiu $1, $zero, -32
+; MIPS32-NEXT:    and $sp, $sp, $1
+; MIPS32-NEXT:    move $16, $4
+; MIPS32-NEXT:    lw $7, 12($4)
+; MIPS32-NEXT:    lw $6, 8($4)
+; MIPS32-NEXT:    lw $5, 4($4)
+; MIPS32-NEXT:    lw $4, 0($4)
+; MIPS32-NEXT:    lw $1, 16($16)
+; MIPS32-NEXT:    lw $2, 20($16)
+; MIPS32-NEXT:    lw $3, 24($16)
+; MIPS32-NEXT:    lw $8, 28($16)
+; MIPS32-NEXT:    sw $8, 28($sp)
+; MIPS32-NEXT:    sw $3, 24($sp)
+; MIPS32-NEXT:    sw $2, 20($sp)
+; MIPS32-NEXT:    jal arg_v8i32
+; MIPS32-NEXT:    sw $1, 16($sp)
+; MIPS32-NEXT:    jal ret_v8i32
+; MIPS32-NEXT:    addiu $4, $sp, 32
+; MIPS32-NEXT:    lw $1, 44($sp)
+; MIPS32-NEXT:    lw $2, 48($sp)
+; MIPS32-NEXT:    lw $3, 52($sp)
+; MIPS32-NEXT:    lw $4, 56($sp)
+; MIPS32-NEXT:    lw $5, 60($sp)
+; MIPS32-NEXT:    sw $5, 28($16)
+; MIPS32-NEXT:    sw $4, 24($16)
+; MIPS32-NEXT:    sw $3, 20($16)
+; MIPS32-NEXT:    sw $2, 16($16)
+; MIPS32-NEXT:    sw $1, 12($16)
+; MIPS32-NEXT:    lw $1, 40($sp)
+; MIPS32-NEXT:    sw $1, 8($16)
+; MIPS32-NEXT:    lw $1, 36($sp)
+; MIPS32-NEXT:    sw $1, 4($16)
+; MIPS32-NEXT:    lw $1, 32($sp)
+; MIPS32-NEXT:    sw $1, 0($16)
+; MIPS32-NEXT:    move $sp, $fp
+; MIPS32-NEXT:    lw $16, 84($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    lw $fp, 88($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    lw $ra, 92($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    addiu $sp, $sp, 96
+  %v1 = load <8 x i32>, ptr %p
+  call void @arg_v8i32(<8 x i32> %v1)
+  %v2 = call <8 x i32> @ret_v8i32()
+  store <8 x i32> %v2, ptr %p
+  ret void
+}
+
+define void @arg_v3i24(<3 x i24> %vec, ptr %p) {
+; MIPS64-LABEL: arg_v3i24:
+; MIPS64:       # %bb.0:
+; MIPS64-NEXT:    sll $1, $6, 0
+; MIPS64-NEXT:    srl $2, $1, 8
+; MIPS64-NEXT:    sll $3, $4, 0
+; MIPS64-NEXT:    sll $4, $5, 0
+; MIPS64-NEXT:    sb $1, 8($7)
+; MIPS64-NEXT:    sb $4, 5($7)
+; MIPS64-NEXT:    sb $3, 2($7)
+; MIPS64-NEXT:    sh $2, 6($7)
+; MIPS64-NEXT:    srl $1, $4, 8
+; MIPS64-NEXT:    sb $1, 4($7)
+; MIPS64-NEXT:    srl $1, $4, 16
+; MIPS64-NEXT:    sb $1, 3($7)
+; MIPS64-NEXT:    srl $1, $3, 8
+; MIPS64-NEXT:    jr $ra
+; MIPS64-NEXT:    sh $1, 0($7)
+;
+; MIPS32-LABEL: arg_v3i24:
+; MIPS32:       # %bb.0:
+; MIPS32-NEXT:    srl $1, $6, 8
+; MIPS32-NEXT:    sb $6, 8($7)
+; MIPS32-NEXT:    sb $5, 5($7)
+; MIPS32-NEXT:    sb $4, 2($7)
+; MIPS32-NEXT:    sh $1, 6($7)
+; MIPS32-NEXT:    srl $1, $5, 8
+; MIPS32-NEXT:    sb $1, 4($7)
+; MIPS32-NEXT:    srl $1, $5, 16
+; MIPS32-NEXT:    sb $1, 3($7)
+; MIPS32-NEXT:    srl $1, $4, 8
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    sh $1, 0($7)
+  store <3 x i24> %vec, ptr %p
+  ret void
+}
+
+define <3 x i24> @ret_v3i24(ptr %p) {
+; MIPS64-LABEL: ret_v3i24:
+; MIPS64:       # %bb.0:
+; MIPS64-NEXT:    lbu $1, 8($4)
+; MIPS64-NEXT:    lh $2, 6($4)
+; MIPS64-NEXT:    sll $3, $2, 8
+; MIPS64-NEXT:    lbu $2, 2($4)
+; MIPS64-NEXT:    lhu $5, 0($4)
+; MIPS64-NEXT:    sll $5, $5, 8
+; MIPS64-NEXT:    or $2, $2, $5
+; MIPS64-NEXT:    or $1, $1, $3
+; MIPS64-NEXT:    lbu $3, 4($4)
+; MIPS64-NEXT:    sll $3, $3, 8
+; MIPS64-NEXT:    lb $5, 3($4)
+; MIPS64-NEXT:    sll $5, $5, 16
+; MIPS64-NEXT:    or $3, $5, $3
+; MIPS64-NEXT:    lbu $4, 5($4)
+; MIPS64-NEXT:    or $3, $4, $3
+; MIPS64-NEXT:    jr $ra
+; MIPS64-NEXT:    move $4, $1
+;
+; MIPS32-LABEL: ret_v3i24:
+; MIPS32:       # %bb.0:
+; MIPS32-NEXT:    lbu $1, 8($4)
+; MIPS32-NEXT:    lh $2, 6($4)
+; MIPS32-NEXT:    sll $3, $2, 8
+; MIPS32-NEXT:    lbu $2, 2($4)
+; MIPS32-NEXT:    lhu $5, 0($4)
+; MIPS32-NEXT:    sll $5, $5, 8
+; MIPS32-NEXT:    or $2, $2, $5
+; MIPS32-NEXT:    or $1, $1, $3
+; MIPS32-NEXT:    lbu $3, 4($4)
+; MIPS32-NEXT:    sll $3, $3, 8
+; MIPS32-NEXT:    lb $5, 3($4)
+; MIPS32-NEXT:    sll $5, $5, 16
+; MIPS32-NEXT:    or $3, $5, $3
+; MIPS32-NEXT:    lbu $4, 5($4)
+; MIPS32-NEXT:    or $3, $4, $3
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    move $4, $1
+  %v = load <3 x i24>, ptr %p
+  ret <3 x i24> %v
+}
+
+define void @call_v3i24(ptr %p) nounwind {
+; MIPS64-LABEL: call_v3i24:
+; MIPS64:       # %bb.0:
+; MIPS64-NEXT:    daddiu $sp, $sp, -16
+; MIPS64-NEXT:    sd $ra, 8($sp) # 8-byte Folded Spill
+; MIPS64-NEXT:    sd $16, 0($sp) # 8-byte Folded Spill
+; MIPS64-NEXT:    move $16, $4
+; MIPS64-NEXT:    lbu $1, 4($4)
+; MIPS64-NEXT:    lbu $2, 8($4)
+; MIPS64-NEXT:    lh $3, 6($4)
+; MIPS64-NEXT:    dsll $3, $3, 8
+; MIPS64-NEXT:    lbu $4, 2($4)
+; MIPS64-NEXT:    lh $5, 0($16)
+; MIPS64-NEXT:    dsll $5, $5, 8
+; MIPS64-NEXT:    or $4, $4, $5
+; MIPS64-NEXT:    or $6, $2, $3
+; MIPS64-NEXT:    dsll $1, $1, 8
+; MIPS64-NEXT:    lb $2, 3($16)
+; MIPS64-NEXT:    dsll $2, $2, 16
+; MIPS64-NEXT:    or $1, $2, $1
+; MIPS64-NEXT:    lbu $2, 5($16)
+; MIPS64-NEXT:    jal arg_v3i24
+; MIPS64-NEXT:    or $5, $2, $1
+; MIPS64-NEXT:    jal ret_v3i24
+; MIPS64-NEXT:    nop
+; MIPS64-NEXT:    srl $1, $4, 8
+; MIPS64-NEXT:    sb $4, 8($16)
+; MIPS64-NEXT:    sb $3, 5($16)
+; MIPS64-NEXT:    sb $2, 2($16)
+; MIPS64-NEXT:    sh $1, 6($16)
+; MIPS64-NEXT:    srl $1, $3, 8
+; MIPS64-NEXT:    sb $1, 4($16)
+; MIPS64-NEXT:    srl $1, $3, 16
+; MIPS64-NEXT:    sb $1, 3($16)
+; MIPS64-NEXT:    srl $1, $2, 8
+; MIPS64-NEXT:    sh $1, 0($16)
+; MIPS64-NEXT:    ld $16, 0($sp) # 8-byte Folded Reload
+; MIPS64-NEXT:    ld $ra, 8($sp) # 8-byte Folded Reload
+; MIPS64-NEXT:    jr $ra
+; MIPS64-NEXT:    daddiu $sp, $sp, 16
+;
+; MIPS32-LABEL: call_v3i24:
+; MIPS32:       # %bb.0:
+; MIPS32-NEXT:    addiu $sp, $sp, -24
+; MIPS32-NEXT:    sw $ra, 20($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    sw $16, 16($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    move $16, $4
+; MIPS32-NEXT:    lbu $1, 4($4)
+; MIPS32-NEXT:    lbu $2, 8($4)
+; MIPS32-NEXT:    lh $3, 6($4)
+; MIPS32-NEXT:    sll $3, $3, 8
+; MIPS32-NEXT:    lbu $4, 2($4)
+; MIPS32-NEXT:    lhu $5, 0($16)
+; MIPS32-NEXT:    sll $5, $5, 8
+; MIPS32-NEXT:    or $4, $4, $5
+; MIPS32-NEXT:    or $6, $2, $3
+; MIPS32-NEXT:    sll $1, $1, 8
+; MIPS32-NEXT:    lb $2, 3($16)
+; MIPS32-NEXT:    sll $2, $2, 16
+; MIPS32-NEXT:    or $1, $2, $1
+; MIPS32-NEXT:    lbu $2, 5($16)
+; MIPS32-NEXT:    jal arg_v3i24
+; MIPS32-NEXT:    or $5, $2, $1
+; MIPS32-NEXT:    jal ret_v3i24
+; MIPS32-NEXT:    nop
+; MIPS32-NEXT:    srl $1, $4, 8
+; MIPS32-NEXT:    sb $4, 8($16)
+; MIPS32-NEXT:    sb $3, 5($16)
+; MIPS32-NEXT:    sb $2, 2($16)
+; MIPS32-NEXT:    sh $1, 6($16)
+; MIPS32-NEXT:    srl $1, $3, 8
+; MIPS32-NEXT:    sb $1, 4($16)
+; MIPS32-NEXT:    srl $1, $3, 16
+; MIPS32-NEXT:    sb $1, 3($16)
+; MIPS32-NEXT:    srl $1, $2, 8
+; MIPS32-NEXT:    sh $1, 0($16)
+; MIPS32-NEXT:    lw $16, 16($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    lw $ra, 20($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    addiu $sp, $sp, 24
+  %v1 = load <3 x i24>, ptr %p
+  call void @arg_v3i24(<3 x i24> %v1)
+  %v2 = call <3 x i24> @ret_v3i24()
+  store <3 x i24> %v2, ptr %p
+  ret void
+}
+
+define void @arg_v4i24(<4 x i24> %vec, ptr %p) {
+; MIPS64-LABEL: arg_v4i24:
+; MIPS64:       # %bb.0:
+; MIPS64-NEXT:    sll $1, $7, 0
+; MIPS64-NEXT:    sll $2, $6, 0
+; MIPS64-NEXT:    srl $3, $2, 8
+; MIPS64-NEXT:    srl $6, $1, 16
+; MIPS64-NEXT:    srl $7, $1, 8
+; MIPS64-NEXT:    sll $4, $4, 0
+; MIPS64-NEXT:    sll $5, $5, 0
+; MIPS64-NEXT:    sb $1, 11($8)
+; MIPS64-NEXT:    sb $2, 8($8)
+; MIPS64-NEXT:    sb $5, 5($8)
+; MIPS64-NEXT:    sb $4, 2($8)
+; MIPS64-NEXT:    sb $7, 10($8)
+; MIPS64-NEXT:    sb $6, 9($8)
+; MIPS64-NEXT:    sh $3, 6($8)
+; MIPS64-NEXT:    srl $1, $5, 8
+; MIPS64-NEXT:    sb $1, 4($8)
+; MIPS64-NEXT:    srl $1, $5, 16
+; MIPS64-NEXT:    sb $1, 3($8)
+; MIPS64-NEXT:    srl $1, $4, 8
+; MIPS64-NEXT:    jr $ra
+; MIPS64-NEXT:    sh $1, 0($8)
+;
+; MIPS32-LABEL: arg_v4i24:
+; MIPS32:       # %bb.0:
+; MIPS32-NEXT:    srl $1, $6, 8
+; MIPS32-NEXT:    srl $2, $7, 16
+; MIPS32-NEXT:    srl $3, $7, 8
+; MIPS32-NEXT:    lw $8, 16($sp)
+; MIPS32-NEXT:    sb $7, 11($8)
+; MIPS32-NEXT:    sb $6, 8($8)
+; MIPS32-NEXT:    sb $5, 5($8)
+; MIPS32-NEXT:    sb $4, 2($8)
+; MIPS32-NEXT:    sb $3, 10($8)
+; MIPS32-NEXT:    sb $2, 9($8)
+; MIPS32-NEXT:    sh $1, 6($8)
+; MIPS32-NEXT:    srl $1, $5, 8
+; MIPS32-NEXT:    sb $1, 4($8)
+; MIPS32-NEXT:    srl $1, $5, 16
+; MIPS32-NEXT:    sb $1, 3($8)
+; MIPS32-NEXT:    srl $1, $4, 8
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    sh $1, 0($8)
+  store <4 x i24> %vec, ptr %p
+  ret void
+}
+
+define <4 x i24> @ret_v4i24(ptr %p) {
+; MIPS64-LABEL: ret_v4i24:
+; MIPS64:       # %bb.0:
+; MIPS64-NEXT:    lbu $1, 4($4)
+; MIPS64-NEXT:    sll $1, $1, 8
+; MIPS64-NEXT:    lbu $2, 3($4)
+; MIPS64-NEXT:    sll $2, $2, 16
+; MIPS64-NEXT:    or $3, $2, $1
+; MIPS64-NEXT:    lbu $5, 5($4)
+; MIPS64-NEXT:    lbu $1, 8($4)
+; MIPS64-NEXT:    lhu $2, 6($4)
+; MIPS64-NEXT:    sll $6, $2, 8
+; MIPS64-NEXT:    lbu $2, 2($4)
+; MIPS64-NEXT:    lhu $7, 0($4)
+; MIPS64-NEXT:    sll $7, $7, 8
+; MIPS64-NEXT:    or $2, $2, $7
+; MIPS64-NEXT:    or $1, $1, $6
+; MIPS64-NEXT:    or $3, $5, $3
+; MIPS64-NEXT:    lbu $5, 10($4)
+; MIPS64-NEXT:    sll $5, $5, 8
+; MIPS64-NEXT:    lbu $6, 9($4)
+; MIPS64-NEXT:    sll $6, $6, 16
+; MIPS64-NEXT:    or $5, $6, $5
+; MIPS64-NEXT:    lbu $4, 11($4)
+; MIPS64-NEXT:    or $5, $4, $5
+; MIPS64-NEXT:    jr $ra
+; MIPS64-NEXT:    move $4, $1
+;
+; MIPS32-LABEL: ret_v4i24:
+; MIPS32:       # %bb.0:
+; MIPS32-NEXT:    lbu $1, 4($4)
+; MIPS32-NEXT:    sll $1, $1, 8
+; MIPS32-NEXT:    lbu $2, 3($4)
+; MIPS32-NEXT:    sll $2, $2, 16
+; MIPS32-NEXT:    or $3, $2, $1
+; MIPS32-NEXT:    lbu $5, 5($4)
+; MIPS32-NEXT:    lbu $1, 8($4)
+; MIPS32-NEXT:    lhu $2, 6($4)
+; MIPS32-NEXT:    sll $6, $2, 8
+; MIPS32-NEXT:    lbu $2, 2($4)
+; MIPS32-NEXT:    lhu $7, 0($4)
+; MIPS32-NEXT:    sll $7, $7, 8
+; MIPS32-NEXT:    or $2, $2, $7
+; MIPS32-NEXT:    or $1, $1, $6
+; MIPS32-NEXT:    or $3, $5, $3
+; MIPS32-NEXT:    lbu $5, 10($4)
+; MIPS32-NEXT:    sll $5, $5, 8
+; MIPS32-NEXT:    lbu $6, 9($4)
+; MIPS32-NEXT:    sll $6, $6, 16
+; MIPS32-NEXT:    or $5, $6, $5
+; MIPS32-NEXT:    lbu $4, 11($4)
+; MIPS32-NEXT:    or $5, $4, $5
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    move $4, $1
+  %v = load <4 x i24>, ptr %p
+  ret <4 x i24> %v
+}
+
+define void @call_v4i24(ptr %p) nounwind {
+; MIPS64-LABEL: call_v4i24:
+; MIPS64:       # %bb.0:
+; MIPS64-NEXT:    daddiu $sp, $sp, -16
+; MIPS64-NEXT:    sd $ra, 8($sp) # 8-byte Folded Spill
+; MIPS64-NEXT:    sd $16, 0($sp) # 8-byte Folded Spill
+; MIPS64-NEXT:    move $16, $4
+; MIPS64-NEXT:    lbu $1, 4($4)
+; MIPS64-NEXT:    dsll $1, $1, 8
+; MIPS64-NEXT:    lb $2, 3($4)
+; MIPS64-NEXT:    dsll $2, $2, 16
+; MIPS64-NEXT:    or $1, $2, $1
+; MIPS64-NEXT:    lbu $2, 10($4)
+; MIPS64-NEXT:    lbu $3, 5($4)
+; MIPS64-NEXT:    lbu $5, 8($4)
+; MIPS64-NEXT:    lh $4, 6($4)
+; MIPS64-NEXT:    dsll $6, $4, 8
+; MIPS64-NEXT:    lbu $4, 2($16)
+; MIPS64-NEXT:    lh $7, 0($16)
+; MIPS64-NEXT:    dsll $7, $7, 8
+; MIPS64-NEXT:    or $4, $4, $7
+; MIPS64-NEXT:    or $6, $5, $6
+; MIPS64-NEXT:    or $5, $3, $1
+; MIPS64-NEXT:    dsll $1, $2, 8
+; MIPS64-NEXT:    lb $2, 9($16)
+; MIPS64-NEXT:    dsll $2, $2, 16
+; MIPS64-NEXT:    or $1, $2, $1
+; MIPS64-NEXT:    lbu $2, 11($16)
+; MIPS64-NEXT:    jal arg_v4i24
+; MIPS64-NEXT:    or $7, $2, $1
+; MIPS64-NEXT:    jal ret_v4i24
+; MIPS64-NEXT:    nop
+; MIPS64-NEXT:    srl $1, $4, 8
+; MIPS64-NEXT:    srl $6, $5, 16
+; MIPS64-NEXT:    srl $7, $5, 8
+; MIPS64-NEXT:    sb $5, 11($16)
+; MIPS64-NEXT:    sb $4, 8($16)
+; MIPS64-NEXT:    sb $3, 5($16)
+; MIPS64-NEXT:    sb $2, 2($16)
+; MIPS64-NEXT:    sb $7, 10($16)
+; MIPS64-NEXT:    sb $6, 9($16)
+; MIPS64-NEXT:    sh $1, 6($16)
+; MIPS64-NEXT:    srl $1, $3, 8
+; MIPS64-NEXT:    sb $1, 4($16)
+; MIPS64-NEXT:    srl $1, $3, 16
+; MIPS64-NEXT:    sb $1, 3($16)
+; MIPS64-NEXT:    srl $1, $2, 8
+; MIPS64-NEXT:    sh $1, 0($16)
+; MIPS64-NEXT:    ld $16, 0($sp) # 8-byte Folded Reload
+; MIPS64-NEXT:    ld $ra, 8($sp) # 8-byte Folded Reload
+; MIPS64-NEXT:    jr $ra
+; MIPS64-NEXT:    daddiu $sp, $sp, 16
+;
+; MIPS32-LABEL: call_v4i24:
+; MIPS32:       # %bb.0:
+; MIPS32-NEXT:    addiu $sp, $sp, -24
+; MIPS32-NEXT:    sw $ra, 20($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    sw $16, 16($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    move $16, $4
+; MIPS32-NEXT:    lbu $1, 4($4)
+; MIPS32-NEXT:    sll $1, $1, 8
+; MIPS32-NEXT:    lbu $2, 3($4)
+; MIPS32-NEXT:    sll $2, $2, 16
+; MIPS32-NEXT:    or $1, $2, $1
+; MIPS32-NEXT:    lbu $2, 10($4)
+; MIPS32-NEXT:    lbu $3, 5($4)
+; MIPS32-NEXT:    lbu $5, 8($4)
+; MIPS32-NEXT:    lhu $4, 6($4)
+; MIPS32-NEXT:    sll $6, $4, 8
+; MIPS32-NEXT:    lbu $4, 2($16)
+; MIPS32-NEXT:    lhu $7, 0($16)
+; MIPS32-NEXT:    sll $7, $7, 8
+; MIPS32-NEXT:    or $4, $4, $7
+; MIPS32-NEXT:    or $6, $5, $6
+; MIPS32-NEXT:    or $5, $3, $1
+; MIPS32-NEXT:    sll $1, $2, 8
+; MIPS32-NEXT:    lbu $2, 9($16)
+; MIPS32-NEXT:    sll $2, $2, 16
+; MIPS32-NEXT:    or $1, $2, $1
+; MIPS32-NEXT:    lbu $2, 11($16)
+; MIPS32-NEXT:    jal arg_v4i24
+; MIPS32-NEXT:    or $7, $2, $1
+; MIPS32-NEXT:    jal ret_v4i24
+; MIPS32-NEXT:    nop
+; MIPS32-NEXT:    srl $1, $4, 8
+; MIPS32-NEXT:    srl $6, $5, 16
+; MIPS32-NEXT:    srl $7, $5, 8
+; MIPS32-NEXT:    sb $5, 11($16)
+; MIPS32-NEXT:    sb $4, 8($16)
+; MIPS32-NEXT:    sb $3, 5($16)
+; MIPS32-NEXT:    sb $2, 2($16)
+; MIPS32-NEXT:    sb $7, 10($16)
+; MIPS32-NEXT:    sb $6, 9($16)
+; MIPS32-NEXT:    sh $1, 6($16)
+; MIPS32-NEXT:    srl $1, $3, 8
+; MIPS32-NEXT:    sb $1, 4($16)
+; MIPS32-NEXT:    srl $1, $3, 16
+; MIPS32-NEXT:    sb $1, 3($16)
+; MIPS32-NEXT:    srl $1, $2, 8
+; MIPS32-NEXT:    sh $1, 0($16)
+; MIPS32-NEXT:    lw $16, 16($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    lw $ra, 20($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    addiu $sp, $sp, 24
+  %v1 = load <4 x i24>, ptr %p
+  call void @arg_v4i24(<4 x i24> %v1)
+  %v2 = call <4 x i24> @ret_v4i24()
+  store <4 x i24> %v2, ptr %p
+  ret void
+}
+
+define void @arg_v4i18(<4 x i18> %vec, ptr %p) {
+; MIPS64-LABEL: arg_v4i18:
+; MIPS64:       # %bb.0:
+; MIPS64-NEXT:    lui $1, 3
+; MIPS64-NEXT:    ori $2, $1, 65535
+; MIPS64-NEXT:    and $3, $5, $2
+; MIPS64-NEXT:    dsll $3, $3, 36
+; MIPS64-NEXT:    dsll $5, $4, 54
+; MIPS64-NEXT:    or $3, $5, $3
+; MIPS64-NEXT:    and $2, $6, $2
+; MIPS64-NEXT:    dsll $2, $2, 18
+; MIPS64-NEXT:    or $2, $3, $2
+; MIPS64-NEXT:    ori $1, $1, 65280
+; MIPS64-NEXT:    and $1, $7, $1
+; MIPS64-NEXT:    sb $7, 8($8)
+; MIPS64-NEXT:    or $1, $2, $1
+; MIPS64-NEXT:    daddiu $2, $zero, 255
+; MIPS64-NEXT:    dsrl $1, $1, 8
+; MIPS64-NEXT:    dsll $2, $2, 56
+; MIPS64-NEXT:    dsll $3, $4, 46
+; MIPS64-NEXT:    and $2, $3, $2
+; MIPS64-NEXT:    or $1, $2, $1
+; MIPS64-NEXT:    jr $ra
+; MIPS64-NEXT:    sd $1, 0($8)
+;
+; MIPS32-LABEL: arg_v4i18:
+; MIPS32:       # %bb.0:
+; MIPS32-NEXT:    sll $1, $4, 14
+; MIPS32-NEXT:    lui $2, 63
+; MIPS32-NEXT:    lui $3, 65280
+; MIPS32-NEXT:    and $1, $1, $3
+; MIPS32-NEXT:    ori $2, $2, 65280
+; MIPS32-NEXT:    sll $3, $5, 4
+; MIPS32-NEXT:    and $2, $3, $2
+; MIPS32-NEXT:    sll $4, $4, 22
+; MIPS32-NEXT:    or $2, $4, $2
+; MIPS32-NEXT:    srl $2, $2, 8
+; MIPS32-NEXT:    lui $4, 3
+; MIPS32-NEXT:    or $1, $1, $2
+; MIPS32-NEXT:    ori $2, $4, 65280
+; MIPS32-NEXT:    and $2, $7, $2
+; MIPS32-NEXT:    sll $5, $6, 18
+; MIPS32-NEXT:    or $2, $5, $2
+; MIPS32-NEXT:    lw $5, 16($sp)
+; MIPS32-NEXT:    sb $7, 8($5)
+; MIPS32-NEXT:    sw $1, 0($5)
+; MIPS32-NEXT:    srl $1, $2, 8
+; MIPS32-NEXT:    ori $2, $4, 49152
+; MIPS32-NEXT:    and $2, $6, $2
+; MIPS32-NEXT:    srl $2, $2, 14
+; MIPS32-NEXT:    or $2, $3, $2
+; MIPS32-NEXT:    sll $2, $2, 24
+; MIPS32-NEXT:    or $1, $1, $2
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    sw $1, 4($5)
+  store <4 x i18> %vec, ptr %p
+  ret void
+}
+
+define <4 x i18> @ret_v4i18(ptr %p) {
+; MIPS64-LABEL: ret_v4i18:
+; MIPS64:       # %bb.0:
+; MIPS64-NEXT:    lbu $1, 8($4)
+; MIPS64-NEXT:    ld $2, 0($4)
+; MIPS64-NEXT:    dsll $6, $2, 8
+; MIPS64-NEXT:    or $1, $1, $6
+; MIPS64-NEXT:    sll $3, $2, 0
+; MIPS64-NEXT:    sll $1, $1, 0
+; MIPS64-NEXT:    srl $4, $3, 10
+; MIPS64-NEXT:    lui $3, 3
+; MIPS64-NEXT:    ori $5, $3, 65535
+; MIPS64-NEXT:    dsrl $3, $2, 28
+; MIPS64-NEXT:    sll $3, $3, 0
+; MIPS64-NEXT:    lui $7, 3
+; MIPS64-NEXT:    and $3, $3, $5
+; MIPS64-NEXT:    and $4, $4, $5
+; MIPS64-NEXT:    and $5, $1, $5
+; MIPS64-NEXT:    ori $1, $7, 64512
+; MIPS64-NEXT:    dsrl $2, $2, 46
+; MIPS64-NEXT:    and $1, $2, $1
+; MIPS64-NEXT:    dsrl $2, $6, 54
+; MIPS64-NEXT:    or $1, $2, $1
+; MIPS64-NEXT:    jr $ra
+; MIPS64-NEXT:    sll $2, $1, 0
+;
+; MIPS32-LABEL: ret_v4i18:
+; MIPS32:       # %bb.0:
+; MIPS32-NEXT:    lbu $1, 8($4)
+; MIPS32-NEXT:    lw $2, 4($4)
+; MIPS32-NEXT:    sll $6, $2, 8
+; MIPS32-NEXT:    lui $3, 3
+; MIPS32-NEXT:    or $1, $1, $6
+; MIPS32-NEXT:    ori $5, $3, 64512
+; MIPS32-NEXT:    lw $4, 0($4)
+; MIPS32-NEXT:    srl $7, $4, 14
+; MIPS32-NEXT:    and $5, $7, $5
+; MIPS32-NEXT:    srl $7, $2, 24
+; MIPS32-NEXT:    ori $8, $3, 65535
+; MIPS32-NEXT:    sll $3, $4, 8
+; MIPS32-NEXT:    srl $2, $3, 22
+; MIPS32-NEXT:    or $2, $2, $5
+; MIPS32-NEXT:    and $5, $1, $8
+; MIPS32-NEXT:    or $1, $3, $7
+; MIPS32-NEXT:    srl $1, $1, 4
+; MIPS32-NEXT:    and $3, $1, $8
+; MIPS32-NEXT:    sll $1, $7, 14
+; MIPS32-NEXT:    srl $4, $6, 18
+; MIPS32-NEXT:    or $1, $4, $1
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    and $4, $1, $8
+  %v = load <4 x i18>, ptr %p
+  ret <4 x i18> %v
+}
+
+define void @call_v4i18(ptr %p) nounwind {
+; MIPS64-LABEL: call_v4i18:
+; MIPS64:       # %bb.0:
+; MIPS64-NEXT:    daddiu $sp, $sp, -48
+; MIPS64-NEXT:    sd $ra, 40($sp) # 8-byte Folded Spill
+; MIPS64-NEXT:    sd $19, 32($sp) # 8-byte Folded Spill
+; MIPS64-NEXT:    sd $18, 24($sp) # 8-byte Folded Spill
+; MIPS64-NEXT:    sd $17, 16($sp) # 8-byte Folded Spill
+; MIPS64-NEXT:    sd $16, 8($sp) # 8-byte Folded Spill
+; MIPS64-NEXT:    move $16, $4
+; MIPS64-NEXT:    lui $17, 3
+; MIPS64-NEXT:    ori $1, $17, 64512
+; MIPS64-NEXT:    ld $2, 0($4)
+; MIPS64-NEXT:    dsrl $3, $2, 46
+; MIPS64-NEXT:    dsrl $4, $2, 10
+; MIPS64-NEXT:    ori $18, $17, 65535
+; MIPS64-NEXT:    dsrl $5, $2, 28
+; MIPS64-NEXT:    and $5, $5, $18
+; MIPS64-NEXT:    and $6, $4, $18
+; MIPS64-NEXT:    and $1, $3, $1
+; MIPS64-NEXT:    dsll $2, $2, 8
+; MIPS64-NEXT:    dsrl $3, $2, 54
+; MIPS64-NEXT:    or $4, $3, $1
+; MIPS64-NEXT:    lbu $1, 8($16)
+; MIPS64-NEXT:    or $1, $1, $2
+; MIPS64-NEXT:    jal arg_v4i18
+; MIPS64-NEXT:    and $7, $1, $18
+; MIPS64-NEXT:    daddiu $1, $zero, 255
+; MIPS64-NEXT:    dsll $19, $1, 56
+; MIPS64-NEXT:    jal ret_v4i18
+; MIPS64-NEXT:    ori $17, $17, 65280
+; MIPS64-NEXT:    # kill: def $v0 killed $v0 def $v0_64
+; MIPS64-NEXT:    # kill: def $v1 killed $v1 def $v1_64
+; MIPS64-NEXT:    # kill: def $a0 killed $a0 def $a0_64
+; MIPS64-NEXT:    # kill: def $a1 killed $a1 def $a1_64
+; MIPS64-NEXT:    dsll $1, $2, 54
+; MIPS64-NEXT:    and $3, $3, $18
+; MIPS64-NEXT:    dsll $3, $3, 36
+; MIPS64-NEXT:    or $1, $1, $3
+; MIPS64-NEXT:    and $3, $4, $18
+; MIPS64-NEXT:    dsll $3, $3, 18
+; MIPS64-NEXT:    sb $5, 8($16)
+; MIPS64-NEXT:    or $1, $1, $3
+; MIPS64-NEXT:    and $3, $5, $17
+; MIPS64-NEXT:    or $1, $1, $3
+; MIPS64-NEXT:    dsrl $1, $1, 8
+; MIPS64-NEXT:    dsll $2, $2, 46
+; MIPS64-NEXT:    and $2, $2, $19
+; MIPS64-NEXT:    or $1, $2, $1
+; MIPS64-NEXT:    sd $1, 0($16)
+; MIPS64-NEXT:    ld $16, 8($sp) # 8-byte Folded Reload
+; MIPS64-NEXT:    ld $17, 16($sp) # 8-byte Folded Reload
+; MIPS64-NEXT:    ld $18, 24($sp) # 8-byte Folded Reload
+; MIPS64-NEXT:    ld $19, 32($sp) # 8-byte Folded Reload
+; MIPS64-NEXT:    ld $ra, 40($sp) # 8-byte Folded Reload
+; MIPS64-NEXT:    jr $ra
+; MIPS64-NEXT:    daddiu $sp, $sp, 48
+;
+; MIPS32-LABEL: call_v4i18:
+; MIPS32:       # %bb.0:
+; MIPS32-NEXT:    addiu $sp, $sp, -40
+; MIPS32-NEXT:    sw $ra, 36($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    sw $19, 32($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    sw $18, 28($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    sw $17, 24($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    sw $16, 20($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    move $16, $4
+; MIPS32-NEXT:    lw $1, 4($4)
+; MIPS32-NEXT:    srl $2, $1, 24
+; MIPS32-NEXT:    lw $3, 0($4)
+; MIPS32-NEXT:    sll $4, $3, 8
+; MIPS32-NEXT:    or $5, $4, $2
+; MIPS32-NEXT:    lbu $6, 8($16)
+; MIPS32-NEXT:    sll $1, $1, 8
+; MIPS32-NEXT:    srl $5, $5, 4
+; MIPS32-NEXT:    or $6, $6, $1
+; MIPS32-NEXT:    lui $17, 3
+; MIPS32-NEXT:    ori $7, $17, 64512
+; MIPS32-NEXT:    srl $3, $3, 14
+; MIPS32-NEXT:    and $3, $3, $7
+; MIPS32-NEXT:    ori $8, $17, 65535
+; MIPS32-NEXT:    srl $4, $4, 22
+; MIPS32-NEXT:    or $4, $4, $3
+; MIPS32-NEXT:    and $7, $6, $8
+; MIPS32-NEXT:    and $5, $5, $8
+; MIPS32-NEXT:    sll $2, $2, 14
+; MIPS32-NEXT:    srl $1, $1, 18
+; MIPS32-NEXT:    or $1, $1, $2
+; MIPS32-NEXT:    jal arg_v4i18
+; MIPS32-NEXT:    and $6, $1, $8
+; MIPS32-NEXT:    ori $18, $17, 49152
+; MIPS32-NEXT:    ori $17, $17, 65280
+; MIPS32-NEXT:    lui $1, 63
+; MIPS32-NEXT:    jal ret_v4i18
+; MIPS32-NEXT:    ori $19, $1, 65280
+; MIPS32-NEXT:    lui $1, 65280
+; MIPS32-NEXT:    and $6, $5, $17
+; MIPS32-NEXT:    sll $7, $4, 18
+; MIPS32-NEXT:    or $6, $7, $6
+; MIPS32-NEXT:    srl $6, $6, 8
+; MIPS32-NEXT:    and $4, $4, $18
+; MIPS32-NEXT:    srl $4, $4, 14
+; MIPS32-NEXT:    sll $3, $3, 4
+; MIPS32-NEXT:    or $4, $3, $4
+; MIPS32-NEXT:    sll $4, $4, 24
+; MIPS32-NEXT:    or $4, $6, $4
+; MIPS32-NEXT:    sll $6, $2, 14
+; MIPS32-NEXT:    sb $5, 8($16)
+; MIPS32-NEXT:    sw $4, 4($16)
+; MIPS32-NEXT:    and $1, $6, $1
+; MIPS32-NEXT:    and $3, $3, $19
+; MIPS32-NEXT:    sll $2, $2, 22
+; MIPS32-NEXT:    or $2, $2, $3
+; MIPS32-NEXT:    srl $2, $2, 8
+; MIPS32-NEXT:    or $1, $1, $2
+; MIPS32-NEXT:    sw $1, 0($16)
+; MIPS32-NEXT:    lw $16, 20($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    lw $17, 24($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    lw $18, 28($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    lw $19, 32($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    lw $ra, 36($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    addiu $sp, $sp, 40
+  %v1 = load <4 x i18>, ptr %p
+  call void @arg_v4i18(<4 x i18> %v1)
+  %v2 = call <4 x i18> @ret_v4i18()
+  store <4 x i18> %v2, ptr %p
+  ret void
+}
+
+define void @arg_v7i18(<7 x i18> %vec, ptr %p) {
+; MIPS64-LABEL: arg_v7i18:
+; MIPS64:       # %bb.0:
+; MIPS64-NEXT:    lui $1, 3
+; MIPS64-NEXT:    ori $2, $1, 65535
+; MIPS64-NEXT:    and $3, $8, $2
+; MIPS64-NEXT:    dsll $3, $3, 36
+; MIPS64-NEXT:    dsll $8, $7, 54
+; MIPS64-NEXT:    or $3, $8, $3
+; MIPS64-NEXT:    and $8, $9, $2
+; MIPS64-NEXT:    dsll $8, $8, 18
+; MIPS64-NEXT:    or $3, $3, $8
+; MIPS64-NEXT:    and $5, $5, $2
+; MIPS64-NEXT:    and $8, $10, $2
+; MIPS64-NEXT:    or $3, $3, $8
+; MIPS64-NEXT:    dsll $5, $5, 26
+; MIPS64-NEXT:    dsll $4, $4, 44
+; MIPS64-NEXT:    or $4, $4, $5
+; MIPS64-NEXT:    and $2, $6, $2
+; MIPS64-NEXT:    dsll $2, $2, 8
+; MIPS64-NEXT:    sd $3, 8($11)
+; MIPS64-NEXT:    or $2, $4, $2
+; MIPS64-NEXT:    ori $1, $1, 64512
+; MIPS64-NEXT:    and $1, $7, $1
+; MIPS64-NEXT:    dsrl $1, $1, 10
+; MIPS64-NEXT:    or $1, $2, $1
+; MIPS64-NEXT:    daddiu $2, $zero, 1
+; MIPS64-NEXT:    dsll $2, $2, 62
+; MIPS64-NEXT:    daddiu $2, $2, -1
+; MIPS64-NEXT:    and $1, $1, $2
+; MIPS64-NEXT:    jr $ra
+; MIPS64-NEXT:    sd $1, 0($11)
+;
+; MIPS32-LABEL: arg_v7i18:
+; MIPS32:       # %bb.0:
+; MIPS32-NEXT:    lui $1, 3
+; MIPS32-NEXT:    ori $2, $1, 65535
+; MIPS32-NEXT:    and $3, $6, $2
+; MIPS32-NEXT:    sll $3, $3, 8
+; MIPS32-NEXT:    ori $6, $1, 65472
+; MIPS32-NEXT:    and $6, $5, $6
+; MIPS32-NEXT:    srl $6, $6, 6
+; MIPS32-NEXT:    sll $5, $5, 26
+; MIPS32-NEXT:    sll $4, $4, 12
+; MIPS32-NEXT:    or $4, $4, $6
+; MIPS32-NEXT:    or $3, $5, $3
+; MIPS32-NEXT:    ori $5, $1, 64512
+; MIPS32-NEXT:    and $5, $7, $5
+; MIPS32-NEXT:    srl $5, $5, 10
+; MIPS32-NEXT:    lui $6, 16383
+; MIPS32-NEXT:    ori $6, $6, 65535
+; MIPS32-NEXT:    lw $8, 24($sp)
+; MIPS32-NEXT:    lw $9, 16($sp)
+; MIPS32-NEXT:    or $3, $3, $5
+; MIPS32-NEXT:    and $5, $9, $2
+; MIPS32-NEXT:    and $4, $4, $6
+; MIPS32-NEXT:    and $2, $8, $2
+; MIPS32-NEXT:    lw $6, 20($sp)
+; MIPS32-NEXT:    sll $8, $6, 18
+; MIPS32-NEXT:    or $2, $8, $2
+; MIPS32-NEXT:    lw $8, 28($sp)
+; MIPS32-NEXT:    sw $2, 12($8)
+; MIPS32-NEXT:    sw $4, 0($8)
+; MIPS32-NEXT:    sw $3, 4($8)
+; MIPS32-NEXT:    sll $2, $5, 4
+; MIPS32-NEXT:    sll $3, $7, 22
+; MIPS32-NEXT:    or $2, $3, $2
+; MIPS32-NEXT:    ori $1, $1, 49152
+; MIPS32-NEXT:    and $1, $6, $1
+; MIPS32-NEXT:    srl $1, $1, 14
+; MIPS32-NEXT:    or $1, $2, $1
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    sw $1, 8($8)
+  store <7 x i18> %vec, ptr %p
+  ret void
+}
+
+define <7 x i18> @ret_v7i18(ptr %p) {
+; MIPS64-LABEL: ret_v7i18:
+; MIPS64:       # %bb.0:
+; MIPS64-NEXT:    ld $1, 0($5)
+; MIPS64-NEXT:    sd $1, 0($4)
+; MIPS64-NEXT:    ld $1, 8($5)
+; MIPS64-NEXT:    jr $ra
+; MIPS64-NEXT:    sd $1, 8($4)
+;
+; MIPS32-LABEL: ret_v7i18:
+; MIPS32:       # %bb.0:
+; MIPS32-NEXT:    lw $1, 0($5)
+; MIPS32-NEXT:    sw $1, 0($4)
+; MIPS32-NEXT:    lw $1, 4($5)
+; MIPS32-NEXT:    sw $1, 4($4)
+; MIPS32-NEXT:    lw $1, 12($5)
+; MIPS32-NEXT:    sw $1, 12($4)
+; MIPS32-NEXT:    lw $1, 8($5)
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    sw $1, 8($4)
+  %v = load <7 x i18>, ptr %p
+  ret <7 x i18> %v
+}
+
+define void @call_v7i18(ptr %p) nounwind {
+; MIPS64-LABEL: call_v7i18:
+; MIPS64:       # %bb.0:
+; MIPS64-NEXT:    daddiu $sp, $sp, -32
+; MIPS64-NEXT:    sd $ra, 24($sp) # 8-byte Folded Spill
+; MIPS64-NEXT:    sd $16, 16($sp) # 8-byte Folded Spill
+; MIPS64-NEXT:    move $16, $4
+; MIPS64-NEXT:    ld $1, 0($4)
+; MIPS64-NEXT:    ld $2, 8($4)
+; MIPS64-NEXT:    dsrl $3, $2, 18
+; MIPS64-NEXT:    dsrl $4, $2, 36
+; MIPS64-NEXT:    dsrl $6, $1, 8
+; MIPS64-NEXT:    dsrl $5, $1, 26
+; MIPS64-NEXT:    lui $7, 3
+; MIPS64-NEXT:    ori $7, $7, 65535
+; MIPS64-NEXT:    and $10, $2, $7
+; MIPS64-NEXT:    and $5, $5, $7
+; MIPS64-NEXT:    and $6, $6, $7
+; MIPS64-NEXT:    and $8, $4, $7
+; MIPS64-NEXT:    and $9, $3, $7
+; MIPS64-NEXT:    dsll $3, $1, 10
+; MIPS64-NEXT:    dsrl $2, $2, 54
+; MIPS64-NEXT:    or $2, $2, $3
+; MIPS64-NEXT:    and $7, $2, $7
+; MIPS64-NEXT:    jal arg_v7i18
+; MIPS64-NEXT:    dsrl $4, $1, 44
+; MIPS64-NEXT:    jal ret_v7i18
+; MIPS64-NEXT:    daddiu $4, $sp, 0
+; MIPS64-NEXT:    ld $1, 0($sp)
+; MIPS64-NEXT:    sd $1, 0($16)
+; MIPS64-NEXT:    ld $1, 8($sp)
+; MIPS64-NEXT:    sd $1, 8($16)
+; MIPS64-NEXT:    ld $16, 16($sp) # 8-byte Folded Reload
+; MIPS64-NEXT:    ld $ra, 24($sp) # 8-byte Folded Reload
+; MIPS64-NEXT:    jr $ra
+; MIPS64-NEXT:    daddiu $sp, $sp, 32
+;
+; MIPS32-LABEL: call_v7i18:
+; MIPS32:       # %bb.0:
+; MIPS32-NEXT:    addiu $sp, $sp, -64
+; MIPS32-NEXT:    sw $ra, 60($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    sw $fp, 56($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    sw $16, 52($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    move $fp, $sp
+; MIPS32-NEXT:    addiu $1, $zero, -16
+; MIPS32-NEXT:    and $sp, $sp, $1
+; MIPS32-NEXT:    move $16, $4
+; MIPS32-NEXT:    lw $1, 8($4)
+; MIPS32-NEXT:    sll $2, $1, 14
+; MIPS32-NEXT:    lw $3, 12($4)
+; MIPS32-NEXT:    srl $4, $3, 18
+; MIPS32-NEXT:    or $2, $4, $2
+; MIPS32-NEXT:    srl $4, $1, 4
+; MIPS32-NEXT:    lui $5, 3
+; MIPS32-NEXT:    ori $7, $5, 65535
+; MIPS32-NEXT:    and $2, $2, $7
+; MIPS32-NEXT:    and $4, $4, $7
+; MIPS32-NEXT:    and $3, $3, $7
+; MIPS32-NEXT:    lw $8, 4($16)
+; MIPS32-NEXT:    lw $9, 0($16)
+; MIPS32-NEXT:    sll $5, $9, 6
+; MIPS32-NEXT:    srl $6, $8, 26
+; MIPS32-NEXT:    sw $3, 24($sp)
+; MIPS32-NEXT:    sw $4, 16($sp)
+; MIPS32-NEXT:    sw $2, 20($sp)
+; MIPS32-NEXT:    or $2, $6, $5
+; MIPS32-NEXT:    srl $3, $8, 8
+; MIPS32-NEXT:    and $6, $3, $7
+; MIPS32-NEXT:    and $5, $2, $7
+; MIPS32-NEXT:    sll $2, $8, 10
+; MIPS32-NEXT:    srl $1, $1, 22
+; MIPS32-NEXT:    or $1, $1, $2
+; MIPS32-NEXT:    and $7, $1, $7
+; MIPS32-NEXT:    jal arg_v7i18
+; MIPS32-NEXT:    srl $4, $9, 12
+; MIPS32-NEXT:    jal ret_v7i18
+; MIPS32-NEXT:    addiu $4, $sp, 32
+; MIPS32-NEXT:    lw $1, 32($sp)
+; MIPS32-NEXT:    sw $1, 0($16)
+; MIPS32-NEXT:    lw $1, 36($sp)
+; MIPS32-NEXT:    sw $1, 4($16)
+; MIPS32-NEXT:    lw $1, 40($sp)
+; MIPS32-NEXT:    sw $1, 8($16)
+; MIPS32-NEXT:    lw $1, 44($sp)
+; MIPS32-NEXT:    sw $1, 12($16)
+; MIPS32-NEXT:    move $sp, $fp
+; MIPS32-NEXT:    lw $16, 52($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    lw $fp, 56($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    lw $ra, 60($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    addiu $sp, $sp, 64
+  %v1 = load <7 x i18>, ptr %p
+  call void @arg_v7i18(<7 x i18> %v1)
+  %v2 = call <7 x i18> @ret_v7i18()
+  store <7 x i18> %v2, ptr %p
+  ret void
+}
+
+define void @arg_v2i128(<2 x i128> %vec, ptr %p) {
+; MIPS64-LABEL: arg_v2i128:
+; MIPS64:       # %bb.0:
+; MIPS64-NEXT:    sd $7, 24($8)
+; MIPS64-NEXT:    sd $6, 16($8)
+; MIPS64-NEXT:    sd $5, 8($8)
+; MIPS64-NEXT:    jr $ra
+; MIPS64-NEXT:    sd $4, 0($8)
+;
+; MIPS32-LABEL: arg_v2i128:
+; MIPS32:       # %bb.0:
+; MIPS32-NEXT:    lw $1, 16($sp)
+; MIPS32-NEXT:    lw $2, 20($sp)
+; MIPS32-NEXT:    lw $3, 24($sp)
+; MIPS32-NEXT:    lw $8, 32($sp)
+; MIPS32-NEXT:    lw $9, 28($sp)
+; MIPS32-NEXT:    sw $9, 28($8)
+; MIPS32-NEXT:    sw $3, 24($8)
+; MIPS32-NEXT:    sw $2, 20($8)
+; MIPS32-NEXT:    sw $1, 16($8)
+; MIPS32-NEXT:    sw $7, 12($8)
+; MIPS32-NEXT:    sw $6, 8($8)
+; MIPS32-NEXT:    sw $5, 4($8)
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    sw $4, 0($8)
+  store <2 x i128> %vec, ptr %p
+  ret void
+}
+
+define <2 x i128> @ret_v2i128(ptr %p) {
+; MIPS64-LABEL: ret_v2i128:
+; MIPS64:       # %bb.0:
+; MIPS64-NEXT:    ld $1, 24($5)
+; MIPS64-NEXT:    sd $1, 24($4)
+; MIPS64-NEXT:    ld $1, 16($5)
+; MIPS64-NEXT:    sd $1, 16($4)
+; MIPS64-NEXT:    ld $1, 8($5)
+; MIPS64-NEXT:    sd $1, 8($4)
+; MIPS64-NEXT:    ld $1, 0($5)
+; MIPS64-NEXT:    jr $ra
+; MIPS64-NEXT:    sd $1, 0($4)
+;
+; MIPS32-LABEL: ret_v2i128:
+; MIPS32:       # %bb.0:
+; MIPS32-NEXT:    lw $1, 12($5)
+; MIPS32-NEXT:    lw $2, 16($5)
+; MIPS32-NEXT:    lw $3, 20($5)
+; MIPS32-NEXT:    lw $6, 24($5)
+; MIPS32-NEXT:    lw $7, 28($5)
+; MIPS32-NEXT:    sw $7, 28($4)
+; MIPS32-NEXT:    sw $6, 24($4)
+; MIPS32-NEXT:    sw $3, 20($4)
+; MIPS32-NEXT:    sw $2, 16($4)
+; MIPS32-NEXT:    sw $1, 12($4)
+; MIPS32-NEXT:    lw $1, 8($5)
+; MIPS32-NEXT:    sw $1, 8($4)
+; MIPS32-NEXT:    lw $1, 4($5)
+; MIPS32-NEXT:    sw $1, 4($4)
+; MIPS32-NEXT:    lw $1, 0($5)
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    sw $1, 0($4)
+  %v = load <2 x i128>, ptr %p
+  ret <2 x i128> %v
+}
+
+define void @call_v2i128(ptr %p) nounwind {
+; MIPS64-LABEL: call_v2i128:
+; MIPS64:       # %bb.0:
+; MIPS64-NEXT:    daddiu $sp, $sp, -64
+; MIPS64-NEXT:    sd $ra, 56($sp) # 8-byte Folded Spill
+; MIPS64-NEXT:    sd $fp, 48($sp) # 8-byte Folded Spill
+; MIPS64-NEXT:    sd $16, 40($sp) # 8-byte Folded Spill
+; MIPS64-NEXT:    move $fp, $sp
+; MIPS64-NEXT:    daddiu $1, $zero, -32
+; MIPS64-NEXT:    move $16, $4
+; MIPS64-NEXT:    ld $7, 24($4)
+; MIPS64-NEXT:    ld $6, 16($4)
+; MIPS64-NEXT:    ld $5, 8($4)
+; MIPS64-NEXT:    ld $4, 0($4)
+; MIPS64-NEXT:    jal arg_v2i128
+; MIPS64-NEXT:    and $sp, $sp, $1
+; MIPS64-NEXT:    jal ret_v2i128
+; MIPS64-NEXT:    daddiu $4, $sp, 0
+; MIPS64-NEXT:    ld $1, 16($sp)
+; MIPS64-NEXT:    sd $1, 16($16)
+; MIPS64-NEXT:    ld $1, 24($sp)
+; MIPS64-NEXT:    sd $1, 24($16)
+; MIPS64-NEXT:    ld $1, 0($sp)
+; MIPS64-NEXT:    sd $1, 0($16)
+; MIPS64-NEXT:    ld $1, 8($sp)
+; MIPS64-NEXT:    sd $1, 8($16)
+; MIPS64-NEXT:    move $sp, $fp
+; MIPS64-NEXT:    ld $16, 40($sp) # 8-byte Folded Reload
+; MIPS64-NEXT:    ld $fp, 48($sp) # 8-byte Folded Reload
+; MIPS64-NEXT:    ld $ra, 56($sp) # 8-byte Folded Reload
+; MIPS64-NEXT:    jr $ra
+; MIPS64-NEXT:    daddiu $sp, $sp, 64
+;
+; MIPS32-LABEL: call_v2i128:
+; MIPS32:       # %bb.0:
+; MIPS32-NEXT:    addiu $sp, $sp, -96
+; MIPS32-NEXT:    sw $ra, 92($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    sw $fp, 88($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    sw $16, 84($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    move $fp, $sp
+; MIPS32-NEXT:    addiu $1, $zero, -32
+; MIPS32-NEXT:    and $sp, $sp, $1
+; MIPS32-NEXT:    move $16, $4
+; MIPS32-NEXT:    lw $7, 12($4)
+; MIPS32-NEXT:    lw $6, 8($4)
+; MIPS32-NEXT:    lw $5, 4($4)
+; MIPS32-NEXT:    lw $4, 0($4)
+; MIPS32-NEXT:    lw $1, 16($16)
+; MIPS32-NEXT:    lw $2, 20($16)
+; MIPS32-NEXT:    lw $3, 24($16)
+; MIPS32-NEXT:    lw $8, 28($16)
+; MIPS32-NEXT:    sw $8, 28($sp)
+; MIPS32-NEXT:    sw $3, 24($sp)
+; MIPS32-NEXT:    sw $2, 20($sp)
+; MIPS32-NEXT:    jal arg_v2i128
+; MIPS32-NEXT:    sw $1, 16($sp)
+; MIPS32-NEXT:    jal ret_v2i128
+; MIPS32-NEXT:    addiu $4, $sp, 32
+; MIPS32-NEXT:    lw $1, 40($sp)
+; MIPS32-NEXT:    lw $2, 52($sp)
+; MIPS32-NEXT:    lw $3, 48($sp)
+; MIPS32-NEXT:    lw $4, 60($sp)
+; MIPS32-NEXT:    lw $5, 56($sp)
+; MIPS32-NEXT:    sw $5, 24($16)
+; MIPS32-NEXT:    sw $4, 28($16)
+; MIPS32-NEXT:    sw $3, 16($16)
+; MIPS32-NEXT:    sw $2, 20($16)
+; MIPS32-NEXT:    sw $1, 8($16)
+; MIPS32-NEXT:    lw $1, 44($sp)
+; MIPS32-NEXT:    sw $1, 12($16)
+; MIPS32-NEXT:    lw $1, 32($sp)
+; MIPS32-NEXT:    sw $1, 0($16)
+; MIPS32-NEXT:    lw $1, 36($sp)
+; MIPS32-NEXT:    sw $1, 4($16)
+; MIPS32-NEXT:    move $sp, $fp
+; MIPS32-NEXT:    lw $16, 84($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    lw $fp, 88($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    lw $ra, 92($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    addiu $sp, $sp, 96
+  %v1 = load <2 x i128>, ptr %p
+  call void @arg_v2i128(<2 x i128> %v1)
+  %v2 = call <2 x i128> @ret_v2i128()
+  store <2 x i128> %v2, ptr %p
+  ret void
+}
+
+define void @arg_v3i128(<3 x i128> %vec, ptr %p) {
+; MIPS64-LABEL: arg_v3i128:
+; MIPS64:       # %bb.0:
+; MIPS64-NEXT:    sd $9, 40($10)
+; MIPS64-NEXT:    sd $8, 32($10)
+; MIPS64-NEXT:    sd $7, 24($10)
+; MIPS64-NEXT:    sd $6, 16($10)
+; MIPS64-NEXT:    sd $5, 8($10)
+; MIPS64-NEXT:    jr $ra
+; MIPS64-NEXT:    sd $4, 0($10)
+;
+; MIPS32-LABEL: arg_v3i128:
+; MIPS32:       # %bb.0:
+; MIPS32-NEXT:    lw $1, 16($sp)
+; MIPS32-NEXT:    lw $2, 20($sp)
+; MIPS32-NEXT:    lw $3, 24($sp)
+; MIPS32-NEXT:    lw $8, 28($sp)
+; MIPS32-NEXT:    lw $9, 32($sp)
+; MIPS32-NEXT:    lw $10, 36($sp)
+; MIPS32-NEXT:    lw $11, 40($sp)
+; MIPS32-NEXT:    lw $12, 48($sp)
+; MIPS32-NEXT:    lw $13, 44($sp)
+; MIPS32-NEXT:    sw $13, 44($12)
+; MIPS32-NEXT:    sw $11, 40($12)
+; MIPS32-NEXT:    sw $10, 36($12)
+; MIPS32-NEXT:    sw $9, 32($12)
+; MIPS32-NEXT:    sw $8, 28($12)
+; MIPS32-NEXT:    sw $3, 24($12)
+; MIPS32-NEXT:    sw $2, 20($12)
+; MIPS32-NEXT:    sw $1, 16($12)
+; MIPS32-NEXT:    sw $7, 12($12)
+; MIPS32-NEXT:    sw $6, 8($12)
+; MIPS32-NEXT:    sw $5, 4($12)
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    sw $4, 0($12)
+  store <3 x i128> %vec, ptr %p
+  ret void
+}
+
+define <3 x i128> @ret_v3i128(ptr %p) {
+; MIPS64-LABEL: ret_v3i128:
+; MIPS64:       # %bb.0:
+; MIPS64-NEXT:    ld $1, 24($5)
+; MIPS64-NEXT:    ld $2, 32($5)
+; MIPS64-NEXT:    ld $3, 40($5)
+; MIPS64-NEXT:    sd $3, 40($4)
+; MIPS64-NEXT:    sd $2, 32($4)
+; MIPS64-NEXT:    sd $1, 24($4)
+; MIPS64-NEXT:    ld $1, 16($5)
+; MIPS64-NEXT:    sd $1, 16($4)
+; MIPS64-NEXT:    ld $1, 8($5)
+; MIPS64-NEXT:    sd $1, 8($4)
+; MIPS64-NEXT:    ld $1, 0($5)
+; MIPS64-NEXT:    jr $ra
+; MIPS64-NEXT:    sd $1, 0($4)
+;
+; MIPS32-LABEL: ret_v3i128:
+; MIPS32:       # %bb.0:
+; MIPS32-NEXT:    lw $1, 28($5)
+; MIPS32-NEXT:    lw $2, 32($5)
+; MIPS32-NEXT:    lw $3, 36($5)
+; MIPS32-NEXT:    lw $6, 40($5)
+; MIPS32-NEXT:    lw $7, 12($5)
+; MIPS32-NEXT:    lw $8, 16($5)
+; MIPS32-NEXT:    lw $9, 20($5)
+; MIPS32-NEXT:    lw $10, 24($5)
+; MIPS32-NEXT:    lw $11, 44($5)
+; MIPS32-NEXT:    sw $11, 44($4)
+; MIPS32-NEXT:    sw $6, 40($4)
+; MIPS32-NEXT:    sw $3, 36($4)
+; MIPS32-NEXT:    sw $2, 32($4)
+; MIPS32-NEXT:    sw $1, 28($4)
+; MIPS32-NEXT:    sw $10, 24($4)
+; MIPS32-NEXT:    sw $9, 20($4)
+; MIPS32-NEXT:    sw $8, 16($4)
+; MIPS32-NEXT:    sw $7, 12($4)
+; MIPS32-NEXT:    lw $1, 8($5)
+; MIPS32-NEXT:    sw $1, 8($4)
+; MIPS32-NEXT:    lw $1, 4($5)
+; MIPS32-NEXT:    sw $1, 4($4)
+; MIPS32-NEXT:    lw $1, 0($5)
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    sw $1, 0($4)
+  %v = load <3 x i128>, ptr %p
+  ret <3 x i128> %v
+}
+
+define void @call_v3i128(ptr %p) nounwind {
+; MIPS64-LABEL: call_v3i128:
+; MIPS64:       # %bb.0:
+; MIPS64-NEXT:    daddiu $sp, $sp, -128
+; MIPS64-NEXT:    sd $ra, 120($sp) # 8-byte Folded Spill
+; MIPS64-NEXT:    sd $fp, 112($sp) # 8-byte Folded Spill
+; MIPS64-NEXT:    sd $16, 104($sp) # 8-byte Folded Spill
+; MIPS64-NEXT:    move $fp, $sp
+; MIPS64-NEXT:    daddiu $1, $zero, -64
+; MIPS64-NEXT:    move $16, $4
+; MIPS64-NEXT:    ld $9, 40($4)
+; MIPS64-NEXT:    ld $8, 32($4)
+; MIPS64-NEXT:    ld $7, 24($4)
+; MIPS64-NEXT:    ld $6, 16($4)
+; MIPS64-NEXT:    ld $5, 8($4)
+; MIPS64-NEXT:    ld $4, 0($4)
+; MIPS64-NEXT:    jal arg_v3i128
+; MIPS64-NEXT:    and $sp, $sp, $1
+; MIPS64-NEXT:    jal ret_v3i128
+; MIPS64-NEXT:    daddiu $4, $sp, 0
+; MIPS64-NEXT:    ld $1, 16($sp)
+; MIPS64-NEXT:    ld $2, 40($sp)
+; MIPS64-NEXT:    ld $3, 32($sp)
+; MIPS64-NEXT:    sd $3, 32($16)
+; MIPS64-NEXT:    sd $2, 40($16)
+; MIPS64-NEXT:    sd $1, 16($16)
+; MIPS64-NEXT:    ld $1, 24($sp)
+; MIPS64-NEXT:    sd $1, 24($16)
+; MIPS64-NEXT:    ld $1, 0($sp)
+; MIPS64-NEXT:    sd $1, 0($16)
+; MIPS64-NEXT:    ld $1, 8($sp)
+; MIPS64-NEXT:    sd $1, 8($16)
+; MIPS64-NEXT:    move $sp, $fp
+; MIPS64-NEXT:    ld $16, 104($sp) # 8-byte Folded Reload
+; MIPS64-NEXT:    ld $fp, 112($sp) # 8-byte Folded Reload
+; MIPS64-NEXT:    ld $ra, 120($sp) # 8-byte Folded Reload
+; MIPS64-NEXT:    jr $ra
+; MIPS64-NEXT:    daddiu $sp, $sp, 128
+;
+; MIPS32-LABEL: call_v3i128:
+; MIPS32:       # %bb.0:
+; MIPS32-NEXT:    addiu $sp, $sp, -192
+; MIPS32-NEXT:    sw $ra, 188($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    sw $fp, 184($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    sw $16, 180($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    move $fp, $sp
+; MIPS32-NEXT:    addiu $1, $zero, -64
+; MIPS32-NEXT:    and $sp, $sp, $1
+; MIPS32-NEXT:    move $16, $4
+; MIPS32-NEXT:    lw $7, 12($4)
+; MIPS32-NEXT:    lw $6, 8($4)
+; MIPS32-NEXT:    lw $5, 4($4)
+; MIPS32-NEXT:    lw $4, 0($4)
+; MIPS32-NEXT:    lw $1, 16($16)
+; MIPS32-NEXT:    lw $2, 20($16)
+; MIPS32-NEXT:    lw $3, 24($16)
+; MIPS32-NEXT:    lw $8, 28($16)
+; MIPS32-NEXT:    lw $9, 32($16)
+; MIPS32-NEXT:    lw $10, 36($16)
+; MIPS32-NEXT:    lw $11, 40($16)
+; MIPS32-NEXT:    lw $12, 44($16)
+; MIPS32-NEXT:    sw $12, 44($sp)
+; MIPS32-NEXT:    sw $11, 40($sp)
+; MIPS32-NEXT:    sw $10, 36($sp)
+; MIPS32-NEXT:    sw $9, 32($sp)
+; MIPS32-NEXT:    sw $8, 28($sp)
+; MIPS32-NEXT:    sw $3, 24($sp)
+; MIPS32-NEXT:    sw $2, 20($sp)
+; MIPS32-NEXT:    jal arg_v3i128
+; MIPS32-NEXT:    sw $1, 16($sp)
+; MIPS32-NEXT:    jal ret_v3i128
+; MIPS32-NEXT:    addiu $4, $sp, 64
+; MIPS32-NEXT:    lw $1, 88($sp)
+; MIPS32-NEXT:    lw $2, 100($sp)
+; MIPS32-NEXT:    lw $3, 96($sp)
+; MIPS32-NEXT:    lw $4, 108($sp)
+; MIPS32-NEXT:    lw $5, 64($sp)
+; MIPS32-NEXT:    lw $6, 84($sp)
+; MIPS32-NEXT:    lw $7, 80($sp)
+; MIPS32-NEXT:    lw $8, 92($sp)
+; MIPS32-NEXT:    lw $9, 104($sp)
+; MIPS32-NEXT:    sw $9, 40($16)
+; MIPS32-NEXT:    sw $4, 44($16)
+; MIPS32-NEXT:    sw $3, 32($16)
+; MIPS32-NEXT:    sw $2, 36($16)
+; MIPS32-NEXT:    sw $1, 24($16)
+; MIPS32-NEXT:    sw $8, 28($16)
+; MIPS32-NEXT:    sw $7, 16($16)
+; MIPS32-NEXT:    sw $6, 20($16)
+; MIPS32-NEXT:    sw $5, 0($16)
+; MIPS32-NEXT:    lw $1, 68($sp)
+; MIPS32-NEXT:    sw $1, 4($16)
+; MIPS32-NEXT:    lw $1, 72($sp)
+; MIPS32-NEXT:    sw $1, 8($16)
+; MIPS32-NEXT:    lw $1, 76($sp)
+; MIPS32-NEXT:    sw $1, 12($16)
+; MIPS32-NEXT:    move $sp, $fp
+; MIPS32-NEXT:    lw $16, 180($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    lw $fp, 184($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    lw $ra, 188($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    addiu $sp, $sp, 192
+  %v1 = load <3 x i128>, ptr %p
+  call void @arg_v3i128(<3 x i128> %v1)
+  %v2 = call <3 x i128> @ret_v3i128()
+  store <3 x i128> %v2, ptr %p
+  ret void
+}

diff  --git a/llvm/test/CodeGen/Mips/cconv/vector.ll b/llvm/test/CodeGen/Mips/cconv/vector.ll
index f027c540fe304b..28a7dc046139b2 100644
--- a/llvm/test/CodeGen/Mips/cconv/vector.ll
+++ b/llvm/test/CodeGen/Mips/cconv/vector.ll
@@ -6637,24 +6637,12 @@ define <2 x i24> @i24x2(<2 x i24> %a, <2 x i24> %b) {
 ;
 ; MIPS64-LABEL: i24x2:
 ; MIPS64:       # %bb.0: # %Entry
-; MIPS64-NEXT:    lui $1, 256
-; MIPS64-NEXT:    daddiu $1, $1, -1
-; MIPS64-NEXT:    dsll $1, $1, 24
-; MIPS64-NEXT:    and $2, $5, $1
-; MIPS64-NEXT:    dsrl $2, $2, 24
-; MIPS64-NEXT:    sll $2, $2, 0
-; MIPS64-NEXT:    and $1, $4, $1
-; MIPS64-NEXT:    dsrl $1, $1, 24
-; MIPS64-NEXT:    sll $1, $1, 0
-; MIPS64-NEXT:    addu $1, $1, $2
-; MIPS64-NEXT:    sll $2, $5, 0
-; MIPS64-NEXT:    sll $3, $4, 0
-; MIPS64-NEXT:    dsll $1, $1, 24
-; MIPS64-NEXT:    addu $2, $3, $2
-; MIPS64-NEXT:    lui $3, 255
-; MIPS64-NEXT:    ori $3, $3, 65535
-; MIPS64-NEXT:    and $2, $2, $3
-; MIPS64-NEXT:    or $2, $2, $1
+; MIPS64-NEXT:    sll $1, $6, 0
+; MIPS64-NEXT:    sll $2, $4, 0
+; MIPS64-NEXT:    addu $2, $2, $1
+; MIPS64-NEXT:    sll $1, $7, 0
+; MIPS64-NEXT:    sll $3, $5, 0
+; MIPS64-NEXT:    addu $3, $3, $1
 ; MIPS64-NEXT:    jr $ra
 ; MIPS64-NEXT:    nop
 ;
@@ -6689,56 +6677,14 @@ define <2 x i24> @i24x2(<2 x i24> %a, <2 x i24> %b) {
 ;
 ; MIPS64R5EB-LABEL: i24x2:
 ; MIPS64R5EB:       # %bb.0: # %Entry
-; MIPS64R5EB-NEXT:    daddiu $sp, $sp, -32
-; MIPS64R5EB-NEXT:    .cfi_def_cfa_offset 32
-; MIPS64R5EB-NEXT:    sh $5, 20($sp)
-; MIPS64R5EB-NEXT:    dsrl $1, $5, 16
-; MIPS64R5EB-NEXT:    sw $1, 16($sp)
-; MIPS64R5EB-NEXT:    sh $4, 28($sp)
-; MIPS64R5EB-NEXT:    dsrl $1, $4, 16
-; MIPS64R5EB-NEXT:    sw $1, 24($sp)
-; MIPS64R5EB-NEXT:    lbu $1, 20($sp)
-; MIPS64R5EB-NEXT:    dsll $1, $1, 8
-; MIPS64R5EB-NEXT:    lb $2, 19($sp)
-; MIPS64R5EB-NEXT:    dsll $2, $2, 16
-; MIPS64R5EB-NEXT:    or $1, $2, $1
-; MIPS64R5EB-NEXT:    lbu $2, 28($sp)
-; MIPS64R5EB-NEXT:    dsll $2, $2, 8
-; MIPS64R5EB-NEXT:    lb $3, 27($sp)
-; MIPS64R5EB-NEXT:    dsll $3, $3, 16
-; MIPS64R5EB-NEXT:    lbu $4, 21($sp)
-; MIPS64R5EB-NEXT:    or $2, $3, $2
-; MIPS64R5EB-NEXT:    or $1, $4, $1
-; MIPS64R5EB-NEXT:    lh $3, 16($sp)
-; MIPS64R5EB-NEXT:    dsll $3, $3, 8
-; MIPS64R5EB-NEXT:    lbu $4, 18($sp)
-; MIPS64R5EB-NEXT:    or $3, $4, $3
-; MIPS64R5EB-NEXT:    lbu $4, 29($sp)
-; MIPS64R5EB-NEXT:    insert.d $w0[0], $3
-; MIPS64R5EB-NEXT:    insert.d $w0[1], $1
-; MIPS64R5EB-NEXT:    or $1, $4, $2
-; MIPS64R5EB-NEXT:    lh $2, 24($sp)
-; MIPS64R5EB-NEXT:    dsll $2, $2, 8
-; MIPS64R5EB-NEXT:    lbu $3, 26($sp)
-; MIPS64R5EB-NEXT:    or $2, $3, $2
-; MIPS64R5EB-NEXT:    insert.d $w1[0], $2
-; MIPS64R5EB-NEXT:    insert.d $w1[1], $1
+; MIPS64R5EB-NEXT:    insert.d $w0[0], $6
+; MIPS64R5EB-NEXT:    insert.d $w0[1], $7
+; MIPS64R5EB-NEXT:    insert.d $w1[0], $4
+; MIPS64R5EB-NEXT:    insert.d $w1[1], $5
 ; MIPS64R5EB-NEXT:    addv.d $w0, $w1, $w0
-; MIPS64R5EB-NEXT:    copy_s.d $1, $w0[1]
-; MIPS64R5EB-NEXT:    copy_s.d $2, $w0[0]
-; MIPS64R5EB-NEXT:    sb $2, 10($sp)
-; MIPS64R5EB-NEXT:    dsrl $3, $1, 16
-; MIPS64R5EB-NEXT:    sb $3, 11($sp)
-; MIPS64R5EB-NEXT:    dsrl $2, $2, 8
-; MIPS64R5EB-NEXT:    sh $2, 8($sp)
-; MIPS64R5EB-NEXT:    sb $1, 13($sp)
-; MIPS64R5EB-NEXT:    dsrl $1, $1, 8
-; MIPS64R5EB-NEXT:    sb $1, 12($sp)
-; MIPS64R5EB-NEXT:    lw $1, 8($sp)
-; MIPS64R5EB-NEXT:    dsll $1, $1, 16
-; MIPS64R5EB-NEXT:    lhu $2, 12($sp)
-; MIPS64R5EB-NEXT:    or $2, $2, $1
-; MIPS64R5EB-NEXT:    daddiu $sp, $sp, 32
+; MIPS64R5EB-NEXT:    shf.w $w0, $w0, 177
+; MIPS64R5EB-NEXT:    copy_s.w $2, $w0[1]
+; MIPS64R5EB-NEXT:    copy_s.w $3, $w0[3]
 ; MIPS64R5EB-NEXT:    jr $ra
 ; MIPS64R5EB-NEXT:    nop
 ;
@@ -6772,56 +6718,13 @@ define <2 x i24> @i24x2(<2 x i24> %a, <2 x i24> %b) {
 ;
 ; MIPS64R5EL-LABEL: i24x2:
 ; MIPS64R5EL:       # %bb.0: # %Entry
-; MIPS64R5EL-NEXT:    daddiu $sp, $sp, -32
-; MIPS64R5EL-NEXT:    .cfi_def_cfa_offset 32
-; MIPS64R5EL-NEXT:    dsrl $1, $5, 32
-; MIPS64R5EL-NEXT:    sh $1, 20($sp)
-; MIPS64R5EL-NEXT:    sw $5, 16($sp)
-; MIPS64R5EL-NEXT:    dsrl $1, $4, 32
-; MIPS64R5EL-NEXT:    sh $1, 28($sp)
-; MIPS64R5EL-NEXT:    lbu $1, 20($sp)
-; MIPS64R5EL-NEXT:    sw $4, 24($sp)
-; MIPS64R5EL-NEXT:    dsll $1, $1, 8
-; MIPS64R5EL-NEXT:    lbu $2, 19($sp)
-; MIPS64R5EL-NEXT:    or $1, $1, $2
-; MIPS64R5EL-NEXT:    lb $2, 21($sp)
-; MIPS64R5EL-NEXT:    dsll $2, $2, 16
-; MIPS64R5EL-NEXT:    lbu $3, 28($sp)
-; MIPS64R5EL-NEXT:    dsll $3, $3, 8
-; MIPS64R5EL-NEXT:    lb $4, 18($sp)
-; MIPS64R5EL-NEXT:    lbu $5, 27($sp)
-; MIPS64R5EL-NEXT:    or $3, $3, $5
-; MIPS64R5EL-NEXT:    or $1, $1, $2
-; MIPS64R5EL-NEXT:    dsll $2, $4, 16
-; MIPS64R5EL-NEXT:    lhu $4, 16($sp)
-; MIPS64R5EL-NEXT:    or $2, $4, $2
-; MIPS64R5EL-NEXT:    lb $4, 29($sp)
-; MIPS64R5EL-NEXT:    dsll $4, $4, 16
-; MIPS64R5EL-NEXT:    insert.d $w0[0], $2
-; MIPS64R5EL-NEXT:    insert.d $w0[1], $1
-; MIPS64R5EL-NEXT:    or $1, $3, $4
-; MIPS64R5EL-NEXT:    lb $2, 26($sp)
-; MIPS64R5EL-NEXT:    dsll $2, $2, 16
-; MIPS64R5EL-NEXT:    lhu $3, 24($sp)
-; MIPS64R5EL-NEXT:    or $2, $3, $2
-; MIPS64R5EL-NEXT:    insert.d $w1[0], $2
-; MIPS64R5EL-NEXT:    insert.d $w1[1], $1
+; MIPS64R5EL-NEXT:    insert.d $w0[0], $6
+; MIPS64R5EL-NEXT:    insert.d $w0[1], $7
+; MIPS64R5EL-NEXT:    insert.d $w1[0], $4
+; MIPS64R5EL-NEXT:    insert.d $w1[1], $5
 ; MIPS64R5EL-NEXT:    addv.d $w0, $w1, $w0
-; MIPS64R5EL-NEXT:    copy_s.d $1, $w0[0]
-; MIPS64R5EL-NEXT:    copy_s.d $2, $w0[1]
-; MIPS64R5EL-NEXT:    dsrl $3, $2, 8
-; MIPS64R5EL-NEXT:    sb $3, 12($sp)
-; MIPS64R5EL-NEXT:    dsrl $3, $2, 16
-; MIPS64R5EL-NEXT:    sb $3, 13($sp)
-; MIPS64R5EL-NEXT:    sb $2, 11($sp)
-; MIPS64R5EL-NEXT:    sh $1, 8($sp)
-; MIPS64R5EL-NEXT:    dsrl $1, $1, 16
-; MIPS64R5EL-NEXT:    sb $1, 10($sp)
-; MIPS64R5EL-NEXT:    lh $1, 12($sp)
-; MIPS64R5EL-NEXT:    dsll $1, $1, 32
-; MIPS64R5EL-NEXT:    lwu $2, 8($sp)
-; MIPS64R5EL-NEXT:    or $2, $2, $1
-; MIPS64R5EL-NEXT:    daddiu $sp, $sp, 32
+; MIPS64R5EL-NEXT:    copy_s.w $2, $w0[0]
+; MIPS64R5EL-NEXT:    copy_s.w $3, $w0[2]
 ; MIPS64R5EL-NEXT:    jr $ra
 ; MIPS64R5EL-NEXT:    nop
 Entry:
@@ -6868,17 +6771,22 @@ define void @call_i24x2() {
 ; MIPS64EB-NEXT:    lui $1, %hi(%neg(%gp_rel(call_i24x2)))
 ; MIPS64EB-NEXT:    daddu $1, $1, $25
 ; MIPS64EB-NEXT:    daddiu $gp, $1, %lo(%neg(%gp_rel(call_i24x2)))
-; MIPS64EB-NEXT:    lui $1, 1536
-; MIPS64EB-NEXT:    ori $4, $1, 7
-; MIPS64EB-NEXT:    lui $1, 3072
-; MIPS64EB-NEXT:    ori $5, $1, 8
 ; MIPS64EB-NEXT:    ld $25, %call16(i24x2)($gp)
+; MIPS64EB-NEXT:    daddiu $4, $zero, 6
+; MIPS64EB-NEXT:    daddiu $5, $zero, 7
+; MIPS64EB-NEXT:    daddiu $6, $zero, 12
+; MIPS64EB-NEXT:    daddiu $7, $zero, 8
 ; MIPS64EB-NEXT:    jalr $25
 ; MIPS64EB-NEXT:    nop
 ; MIPS64EB-NEXT:    ld $1, %got_disp(gv2i24)($gp)
-; MIPS64EB-NEXT:    sh $2, 4($1)
-; MIPS64EB-NEXT:    dsrl $2, $2, 16
-; MIPS64EB-NEXT:    sw $2, 0($1)
+; MIPS64EB-NEXT:    sb $3, 5($1)
+; MIPS64EB-NEXT:    sb $2, 2($1)
+; MIPS64EB-NEXT:    srl $4, $3, 8
+; MIPS64EB-NEXT:    sb $4, 4($1)
+; MIPS64EB-NEXT:    srl $3, $3, 16
+; MIPS64EB-NEXT:    sb $3, 3($1)
+; MIPS64EB-NEXT:    srl $2, $2, 8
+; MIPS64EB-NEXT:    sh $2, 0($1)
 ; MIPS64EB-NEXT:    ld $gp, 0($sp) # 8-byte Folded Reload
 ; MIPS64EB-NEXT:    ld $ra, 8($sp) # 8-byte Folded Reload
 ; MIPS64EB-NEXT:    daddiu $sp, $sp, 16
@@ -6923,31 +6831,27 @@ define void @call_i24x2() {
 ; MIPS64R5EB-NEXT:    lui $1, %hi(%neg(%gp_rel(call_i24x2)))
 ; MIPS64R5EB-NEXT:    daddu $1, $1, $25
 ; MIPS64R5EB-NEXT:    daddiu $gp, $1, %lo(%neg(%gp_rel(call_i24x2)))
-; MIPS64R5EB-NEXT:    lui $1, 1536
-; MIPS64R5EB-NEXT:    ori $1, $1, 7
-; MIPS64R5EB-NEXT:    swl $1, 2($sp)
-; MIPS64R5EB-NEXT:    lui $2, 3072
-; MIPS64R5EB-NEXT:    ori $2, $2, 8
-; MIPS64R5EB-NEXT:    swl $2, 10($sp)
-; MIPS64R5EB-NEXT:    sh $zero, 0($sp)
-; MIPS64R5EB-NEXT:    swr $1, 5($sp)
-; MIPS64R5EB-NEXT:    sh $zero, 8($sp)
-; MIPS64R5EB-NEXT:    swr $2, 13($sp)
-; MIPS64R5EB-NEXT:    lw $1, 0($sp)
-; MIPS64R5EB-NEXT:    dsll $1, $1, 16
-; MIPS64R5EB-NEXT:    lhu $2, 4($sp)
-; MIPS64R5EB-NEXT:    or $4, $2, $1
-; MIPS64R5EB-NEXT:    lw $1, 8($sp)
-; MIPS64R5EB-NEXT:    dsll $1, $1, 16
-; MIPS64R5EB-NEXT:    lhu $2, 12($sp)
-; MIPS64R5EB-NEXT:    or $5, $2, $1
 ; MIPS64R5EB-NEXT:    ld $25, %call16(i24x2)($gp)
+; MIPS64R5EB-NEXT:    daddiu $4, $zero, 6
+; MIPS64R5EB-NEXT:    daddiu $5, $zero, 7
+; MIPS64R5EB-NEXT:    daddiu $6, $zero, 12
+; MIPS64R5EB-NEXT:    daddiu $7, $zero, 8
 ; MIPS64R5EB-NEXT:    jalr $25
 ; MIPS64R5EB-NEXT:    nop
-; MIPS64R5EB-NEXT:    ld $1, %got_disp(gv2i24)($gp)
-; MIPS64R5EB-NEXT:    sh $2, 4($1)
+; MIPS64R5EB-NEXT:    sw $3, 12($sp)
+; MIPS64R5EB-NEXT:    sw $2, 4($sp)
+; MIPS64R5EB-NEXT:    ld.d $w0, 0($sp)
+; MIPS64R5EB-NEXT:    copy_s.d $1, $w0[0]
+; MIPS64R5EB-NEXT:    copy_s.d $2, $w0[1]
+; MIPS64R5EB-NEXT:    ld $3, %got_disp(gv2i24)($gp)
+; MIPS64R5EB-NEXT:    sb $2, 5($3)
+; MIPS64R5EB-NEXT:    sb $1, 2($3)
+; MIPS64R5EB-NEXT:    dsrl $4, $2, 8
+; MIPS64R5EB-NEXT:    sb $4, 4($3)
 ; MIPS64R5EB-NEXT:    dsrl $2, $2, 16
-; MIPS64R5EB-NEXT:    sw $2, 0($1)
+; MIPS64R5EB-NEXT:    sb $2, 3($3)
+; MIPS64R5EB-NEXT:    dsrl $1, $1, 8
+; MIPS64R5EB-NEXT:    sh $1, 0($3)
 ; MIPS64R5EB-NEXT:    ld $gp, 16($sp) # 8-byte Folded Reload
 ; MIPS64R5EB-NEXT:    ld $ra, 24($sp) # 8-byte Folded Reload
 ; MIPS64R5EB-NEXT:    daddiu $sp, $sp, 32
@@ -6992,17 +6896,22 @@ define void @call_i24x2() {
 ; MIPS64EL-NEXT:    lui $1, %hi(%neg(%gp_rel(call_i24x2)))
 ; MIPS64EL-NEXT:    daddu $1, $1, $25
 ; MIPS64EL-NEXT:    daddiu $gp, $1, %lo(%neg(%gp_rel(call_i24x2)))
-; MIPS64EL-NEXT:    lui $1, 1792
-; MIPS64EL-NEXT:    ori $4, $1, 6
-; MIPS64EL-NEXT:    lui $1, 2048
-; MIPS64EL-NEXT:    ori $5, $1, 12
 ; MIPS64EL-NEXT:    ld $25, %call16(i24x2)($gp)
+; MIPS64EL-NEXT:    daddiu $4, $zero, 6
+; MIPS64EL-NEXT:    daddiu $5, $zero, 7
+; MIPS64EL-NEXT:    daddiu $6, $zero, 12
+; MIPS64EL-NEXT:    daddiu $7, $zero, 8
 ; MIPS64EL-NEXT:    jalr $25
 ; MIPS64EL-NEXT:    nop
 ; MIPS64EL-NEXT:    ld $1, %got_disp(gv2i24)($gp)
-; MIPS64EL-NEXT:    sw $2, 0($1)
-; MIPS64EL-NEXT:    dsrl $2, $2, 32
-; MIPS64EL-NEXT:    sh $2, 4($1)
+; MIPS64EL-NEXT:    sb $3, 3($1)
+; MIPS64EL-NEXT:    sh $2, 0($1)
+; MIPS64EL-NEXT:    srl $4, $3, 8
+; MIPS64EL-NEXT:    sb $4, 4($1)
+; MIPS64EL-NEXT:    srl $3, $3, 16
+; MIPS64EL-NEXT:    sb $3, 5($1)
+; MIPS64EL-NEXT:    srl $2, $2, 16
+; MIPS64EL-NEXT:    sb $2, 2($1)
 ; MIPS64EL-NEXT:    ld $gp, 0($sp) # 8-byte Folded Reload
 ; MIPS64EL-NEXT:    ld $ra, 8($sp) # 8-byte Folded Reload
 ; MIPS64EL-NEXT:    daddiu $sp, $sp, 16
@@ -7047,31 +6956,27 @@ define void @call_i24x2() {
 ; MIPS64R5EL-NEXT:    lui $1, %hi(%neg(%gp_rel(call_i24x2)))
 ; MIPS64R5EL-NEXT:    daddu $1, $1, $25
 ; MIPS64R5EL-NEXT:    daddiu $gp, $1, %lo(%neg(%gp_rel(call_i24x2)))
-; MIPS64R5EL-NEXT:    addiu $1, $zero, 1792
-; MIPS64R5EL-NEXT:    swl $1, 5($sp)
-; MIPS64R5EL-NEXT:    addiu $2, $zero, 2048
-; MIPS64R5EL-NEXT:    swl $2, 13($sp)
-; MIPS64R5EL-NEXT:    swr $1, 2($sp)
-; MIPS64R5EL-NEXT:    daddiu $1, $zero, 6
-; MIPS64R5EL-NEXT:    sh $1, 0($sp)
-; MIPS64R5EL-NEXT:    swr $2, 10($sp)
-; MIPS64R5EL-NEXT:    daddiu $1, $zero, 12
-; MIPS64R5EL-NEXT:    sh $1, 8($sp)
-; MIPS64R5EL-NEXT:    lh $1, 4($sp)
-; MIPS64R5EL-NEXT:    dsll $1, $1, 32
-; MIPS64R5EL-NEXT:    lwu $2, 0($sp)
-; MIPS64R5EL-NEXT:    or $4, $2, $1
-; MIPS64R5EL-NEXT:    lh $1, 12($sp)
-; MIPS64R5EL-NEXT:    dsll $1, $1, 32
-; MIPS64R5EL-NEXT:    lwu $2, 8($sp)
-; MIPS64R5EL-NEXT:    or $5, $2, $1
 ; MIPS64R5EL-NEXT:    ld $25, %call16(i24x2)($gp)
+; MIPS64R5EL-NEXT:    daddiu $4, $zero, 6
+; MIPS64R5EL-NEXT:    daddiu $5, $zero, 7
+; MIPS64R5EL-NEXT:    daddiu $6, $zero, 12
+; MIPS64R5EL-NEXT:    daddiu $7, $zero, 8
 ; MIPS64R5EL-NEXT:    jalr $25
 ; MIPS64R5EL-NEXT:    nop
-; MIPS64R5EL-NEXT:    ld $1, %got_disp(gv2i24)($gp)
-; MIPS64R5EL-NEXT:    sw $2, 0($1)
-; MIPS64R5EL-NEXT:    dsrl $2, $2, 32
-; MIPS64R5EL-NEXT:    sh $2, 4($1)
+; MIPS64R5EL-NEXT:    sw $3, 8($sp)
+; MIPS64R5EL-NEXT:    sw $2, 0($sp)
+; MIPS64R5EL-NEXT:    ld.d $w0, 0($sp)
+; MIPS64R5EL-NEXT:    copy_s.d $1, $w0[0]
+; MIPS64R5EL-NEXT:    copy_s.d $2, $w0[1]
+; MIPS64R5EL-NEXT:    ld $3, %got_disp(gv2i24)($gp)
+; MIPS64R5EL-NEXT:    sb $2, 3($3)
+; MIPS64R5EL-NEXT:    sh $1, 0($3)
+; MIPS64R5EL-NEXT:    dsrl $4, $2, 8
+; MIPS64R5EL-NEXT:    sb $4, 4($3)
+; MIPS64R5EL-NEXT:    dsrl $2, $2, 16
+; MIPS64R5EL-NEXT:    sb $2, 5($3)
+; MIPS64R5EL-NEXT:    dsrl $1, $1, 16
+; MIPS64R5EL-NEXT:    sb $1, 2($3)
 ; MIPS64R5EL-NEXT:    ld $gp, 16($sp) # 8-byte Folded Reload
 ; MIPS64R5EL-NEXT:    ld $ra, 24($sp) # 8-byte Folded Reload
 ; MIPS64R5EL-NEXT:    daddiu $sp, $sp, 32


        


More information about the llvm-commits mailing list