[llvm] e49103b - [Mips] Fix argument lowering for illegal vector types (PR63608)
Nikita Popov via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 24 03:07:18 PDT 2023
Author: Nikita Popov
Date: 2023-07-24T12:07:09+02:00
New Revision: e49103b2790f346b436b280a2da7a1c6a9f2ab3d
URL: https://github.com/llvm/llvm-project/commit/e49103b2790f346b436b280a2da7a1c6a9f2ab3d
DIFF: https://github.com/llvm/llvm-project/commit/e49103b2790f346b436b280a2da7a1c6a9f2ab3d.diff
LOG: [Mips] Fix argument lowering for illegal vector types (PR63608)
The Mips MSA ABI requires that legal vector types are passed in
scalar registers in packed representation. E.g. a type like v16i8
would be passed as two i64 registers.
The implementation attempts to do the same for illegal vectors with
non-power-of-two element counts or non-power-of-two element types.
However, the SDAG argument lowering code doesn't support this, and
it is not easy to extend it to support this (we would have to deal
with situations like passing v7i18 as two i64 values).
This patch instead opts to restrict the special argument lowering
to only vectors with power-of-two elements and round element types.
Everything else is lowered naively, that is by passing each element
in promoted registers.
Fixes https://github.com/llvm/llvm-project/issues/63608.
Differential Revision: https://reviews.llvm.org/D154445
Added:
llvm/test/CodeGen/Mips/cconv/illegal-vectors.ll
Modified:
llvm/lib/Target/Mips/MipsISelLowering.cpp
llvm/test/CodeGen/Mips/cconv/vector.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/Mips/MipsISelLowering.cpp b/llvm/lib/Target/Mips/MipsISelLowering.cpp
index 2851dac53bdf42..18d7773067f137 100644
--- a/llvm/lib/Target/Mips/MipsISelLowering.cpp
+++ b/llvm/lib/Target/Mips/MipsISelLowering.cpp
@@ -102,29 +102,37 @@ MVT MipsTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
if (!VT.isVector())
return getRegisterType(Context, VT);
- return Subtarget.isABI_O32() || VT.getSizeInBits() == 32 ? MVT::i32
- : MVT::i64;
+ if (VT.isPow2VectorType() && VT.getVectorElementType().isRound())
+ return Subtarget.isABI_O32() || VT.getSizeInBits() == 32 ? MVT::i32
+ : MVT::i64;
+ return getRegisterType(Context, VT.getVectorElementType());
}
unsigned MipsTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
CallingConv::ID CC,
EVT VT) const {
- if (VT.isVector())
- return divideCeil(VT.getSizeInBits(), Subtarget.isABI_O32() ? 32 : 64);
+ if (VT.isVector()) {
+ if (VT.isPow2VectorType() && VT.getVectorElementType().isRound())
+ return divideCeil(VT.getSizeInBits(), Subtarget.isABI_O32() ? 32 : 64);
+ return VT.getVectorNumElements() *
+ getNumRegisters(Context, VT.getVectorElementType());
+ }
return MipsTargetLowering::getNumRegisters(Context, VT);
}
unsigned MipsTargetLowering::getVectorTypeBreakdownForCallingConv(
LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
unsigned &NumIntermediates, MVT &RegisterVT) const {
- // Break down vector types to either 2 i64s or 4 i32s.
- RegisterVT = getRegisterTypeForCallingConv(Context, CC, VT);
- IntermediateVT = RegisterVT;
- NumIntermediates =
- VT.getFixedSizeInBits() < RegisterVT.getFixedSizeInBits()
- ? VT.getVectorNumElements()
- : divideCeil(VT.getSizeInBits(), RegisterVT.getSizeInBits());
- return NumIntermediates;
+ if (VT.isPow2VectorType()) {
+ IntermediateVT = getRegisterTypeForCallingConv(Context, CC, VT);
+ RegisterVT = IntermediateVT.getSimpleVT();
+ NumIntermediates = getNumRegistersForCallingConv(Context, CC, VT);
+ return NumIntermediates;
+ }
+ IntermediateVT = VT.getVectorElementType();
+ NumIntermediates = VT.getVectorNumElements();
+ RegisterVT = getRegisterType(Context, IntermediateVT);
+ return NumIntermediates * getNumRegisters(Context, IntermediateVT);
}
SDValue MipsTargetLowering::getGlobalReg(SelectionDAG &DAG, EVT Ty) const {
diff --git a/llvm/test/CodeGen/Mips/cconv/illegal-vectors.ll b/llvm/test/CodeGen/Mips/cconv/illegal-vectors.ll
new file mode 100644
index 00000000000000..5cb5972f677536
--- /dev/null
+++ b/llvm/test/CodeGen/Mips/cconv/illegal-vectors.ll
@@ -0,0 +1,1809 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mtriple=mips64 < %s | FileCheck %s --check-prefix=MIPS64
+; RUN: llc -mtriple=mips < %s | FileCheck %s --check-prefix=MIPS32
+
+define void @arg_v1i32(<1 x i32> %vec, ptr %p) {
+; MIPS64-LABEL: arg_v1i32:
+; MIPS64: # %bb.0:
+; MIPS64-NEXT: jr $ra
+; MIPS64-NEXT: sw $4, 0($5)
+;
+; MIPS32-LABEL: arg_v1i32:
+; MIPS32: # %bb.0:
+; MIPS32-NEXT: jr $ra
+; MIPS32-NEXT: sw $4, 0($5)
+ store <1 x i32> %vec, ptr %p
+ ret void
+}
+
+define <1 x i32> @ret_v1i32(ptr %p) {
+; MIPS64-LABEL: ret_v1i32:
+; MIPS64: # %bb.0:
+; MIPS64-NEXT: lw $2, 0($4)
+; MIPS64-NEXT: jr $ra
+; MIPS64-NEXT: nop
+;
+; MIPS32-LABEL: ret_v1i32:
+; MIPS32: # %bb.0:
+; MIPS32-NEXT: lw $2, 0($4)
+; MIPS32-NEXT: jr $ra
+; MIPS32-NEXT: nop
+ %v = load <1 x i32>, ptr %p
+ ret <1 x i32> %v
+}
+
+define void @call_v1i32(ptr %p) nounwind {
+; MIPS64-LABEL: call_v1i32:
+; MIPS64: # %bb.0:
+; MIPS64-NEXT: daddiu $sp, $sp, -16
+; MIPS64-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill
+; MIPS64-NEXT: sd $16, 0($sp) # 8-byte Folded Spill
+; MIPS64-NEXT: move $16, $4
+; MIPS64-NEXT: lw $4, 0($4)
+; MIPS64-NEXT: jal arg_v1i32
+; MIPS64-NEXT: nop
+; MIPS64-NEXT: jal ret_v1i32
+; MIPS64-NEXT: nop
+; MIPS64-NEXT: sw $2, 0($16)
+; MIPS64-NEXT: ld $16, 0($sp) # 8-byte Folded Reload
+; MIPS64-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload
+; MIPS64-NEXT: jr $ra
+; MIPS64-NEXT: daddiu $sp, $sp, 16
+;
+; MIPS32-LABEL: call_v1i32:
+; MIPS32: # %bb.0:
+; MIPS32-NEXT: addiu $sp, $sp, -24
+; MIPS32-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $16, 16($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: move $16, $4
+; MIPS32-NEXT: lw $4, 0($4)
+; MIPS32-NEXT: jal arg_v1i32
+; MIPS32-NEXT: nop
+; MIPS32-NEXT: jal ret_v1i32
+; MIPS32-NEXT: nop
+; MIPS32-NEXT: sw $2, 0($16)
+; MIPS32-NEXT: lw $16, 16($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: jr $ra
+; MIPS32-NEXT: addiu $sp, $sp, 24
+ %v1 = load <1 x i32>, ptr %p
+ call void @arg_v1i32(<1 x i32> %v1)
+ %v2 = call <1 x i32> @ret_v1i32()
+ store <1 x i32> %v2, ptr %p
+ ret void
+}
+
+define void @arg_v2i32(<2 x i32> %vec, ptr %p) {
+; MIPS64-LABEL: arg_v2i32:
+; MIPS64: # %bb.0:
+; MIPS64-NEXT: jr $ra
+; MIPS64-NEXT: sd $4, 0($5)
+;
+; MIPS32-LABEL: arg_v2i32:
+; MIPS32: # %bb.0:
+; MIPS32-NEXT: sw $5, 4($6)
+; MIPS32-NEXT: jr $ra
+; MIPS32-NEXT: sw $4, 0($6)
+ store <2 x i32> %vec, ptr %p
+ ret void
+}
+
+define <2 x i32> @ret_v2i32(ptr %p) {
+; MIPS64-LABEL: ret_v2i32:
+; MIPS64: # %bb.0:
+; MIPS64-NEXT: ld $2, 0($4)
+; MIPS64-NEXT: jr $ra
+; MIPS64-NEXT: nop
+;
+; MIPS32-LABEL: ret_v2i32:
+; MIPS32: # %bb.0:
+; MIPS32-NEXT: lw $2, 0($4)
+; MIPS32-NEXT: lw $3, 4($4)
+; MIPS32-NEXT: jr $ra
+; MIPS32-NEXT: nop
+ %v = load <2 x i32>, ptr %p
+ ret <2 x i32> %v
+}
+
+define void @call_v2i32(ptr %p) nounwind {
+; MIPS64-LABEL: call_v2i32:
+; MIPS64: # %bb.0:
+; MIPS64-NEXT: daddiu $sp, $sp, -16
+; MIPS64-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill
+; MIPS64-NEXT: sd $16, 0($sp) # 8-byte Folded Spill
+; MIPS64-NEXT: move $16, $4
+; MIPS64-NEXT: ld $4, 0($4)
+; MIPS64-NEXT: jal arg_v2i32
+; MIPS64-NEXT: nop
+; MIPS64-NEXT: jal ret_v2i32
+; MIPS64-NEXT: nop
+; MIPS64-NEXT: sd $2, 0($16)
+; MIPS64-NEXT: ld $16, 0($sp) # 8-byte Folded Reload
+; MIPS64-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload
+; MIPS64-NEXT: jr $ra
+; MIPS64-NEXT: daddiu $sp, $sp, 16
+;
+; MIPS32-LABEL: call_v2i32:
+; MIPS32: # %bb.0:
+; MIPS32-NEXT: addiu $sp, $sp, -24
+; MIPS32-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $16, 16($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: move $16, $4
+; MIPS32-NEXT: lw $5, 4($4)
+; MIPS32-NEXT: lw $4, 0($4)
+; MIPS32-NEXT: jal arg_v2i32
+; MIPS32-NEXT: nop
+; MIPS32-NEXT: jal ret_v2i32
+; MIPS32-NEXT: nop
+; MIPS32-NEXT: sw $3, 4($16)
+; MIPS32-NEXT: sw $2, 0($16)
+; MIPS32-NEXT: lw $16, 16($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: jr $ra
+; MIPS32-NEXT: addiu $sp, $sp, 24
+ %v1 = load <2 x i32>, ptr %p
+ call void @arg_v2i32(<2 x i32> %v1)
+ %v2 = call <2 x i32> @ret_v2i32()
+ store <2 x i32> %v2, ptr %p
+ ret void
+}
+
+define <3 x i32> @arg_v3i32(<3 x i32> %vec, ptr %p) {
+; MIPS64-LABEL: arg_v3i32:
+; MIPS64: # %bb.0:
+; MIPS64-NEXT: daddiu $1, $zero, 1
+; MIPS64-NEXT: dsll $1, $1, 32
+; MIPS64-NEXT: daddiu $2, $1, -1
+; MIPS64-NEXT: sll $1, $6, 0
+; MIPS64-NEXT: sw $1, 8($7)
+; MIPS64-NEXT: and $2, $5, $2
+; MIPS64-NEXT: dsll $3, $4, 32
+; MIPS64-NEXT: or $2, $2, $3
+; MIPS64-NEXT: sd $2, 0($7)
+; MIPS64-NEXT: sll $2, $4, 0
+; MIPS64-NEXT: sll $3, $5, 0
+; MIPS64-NEXT: jr $ra
+; MIPS64-NEXT: move $4, $1
+;
+; MIPS32-LABEL: arg_v3i32:
+; MIPS32: # %bb.0:
+; MIPS32-NEXT: sw $6, 8($7)
+; MIPS32-NEXT: sw $5, 4($7)
+; MIPS32-NEXT: sw $4, 0($7)
+; MIPS32-NEXT: move $2, $4
+; MIPS32-NEXT: move $3, $5
+; MIPS32-NEXT: jr $ra
+; MIPS32-NEXT: move $4, $6
+ store <3 x i32> %vec, ptr %p
+ ret <3 x i32> %vec
+}
+
+define <3 x i32> @ret_v3i32(ptr %p) {
+; MIPS64-LABEL: ret_v3i32:
+; MIPS64: # %bb.0:
+; MIPS64-NEXT: lw $1, 8($4)
+; MIPS64-NEXT: ld $2, 0($4)
+; MIPS64-NEXT: sll $3, $2, 0
+; MIPS64-NEXT: dsrl $2, $2, 32
+; MIPS64-NEXT: sll $2, $2, 0
+; MIPS64-NEXT: jr $ra
+; MIPS64-NEXT: move $4, $1
+;
+; MIPS32-LABEL: ret_v3i32:
+; MIPS32: # %bb.0:
+; MIPS32-NEXT: lw $2, 0($4)
+; MIPS32-NEXT: lw $3, 4($4)
+; MIPS32-NEXT: lw $4, 8($4)
+; MIPS32-NEXT: jr $ra
+; MIPS32-NEXT: nop
+ %v = load <3 x i32>, ptr %p
+ ret <3 x i32> %v
+}
+
+define void @call_v3i32(ptr %p) nounwind {
+; MIPS64-LABEL: call_v3i32:
+; MIPS64: # %bb.0:
+; MIPS64-NEXT: daddiu $sp, $sp, -16
+; MIPS64-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill
+; MIPS64-NEXT: sd $16, 0($sp) # 8-byte Folded Spill
+; MIPS64-NEXT: move $16, $4
+; MIPS64-NEXT: lw $6, 8($4)
+; MIPS64-NEXT: ld $5, 0($4)
+; MIPS64-NEXT: jal arg_v3i32
+; MIPS64-NEXT: dsrl $4, $5, 32
+; MIPS64-NEXT: jal ret_v3i32
+; MIPS64-NEXT: nop
+; MIPS64-NEXT: # kill: def $v0 killed $v0 def $v0_64
+; MIPS64-NEXT: sw $4, 8($16)
+; MIPS64-NEXT: dsll $1, $2, 32
+; MIPS64-NEXT: dsll $2, $3, 32
+; MIPS64-NEXT: dsrl $2, $2, 32
+; MIPS64-NEXT: or $1, $2, $1
+; MIPS64-NEXT: sd $1, 0($16)
+; MIPS64-NEXT: ld $16, 0($sp) # 8-byte Folded Reload
+; MIPS64-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload
+; MIPS64-NEXT: jr $ra
+; MIPS64-NEXT: daddiu $sp, $sp, 16
+;
+; MIPS32-LABEL: call_v3i32:
+; MIPS32: # %bb.0:
+; MIPS32-NEXT: addiu $sp, $sp, -24
+; MIPS32-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $16, 16($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: move $16, $4
+; MIPS32-NEXT: lw $6, 8($4)
+; MIPS32-NEXT: lw $5, 4($4)
+; MIPS32-NEXT: lw $4, 0($4)
+; MIPS32-NEXT: jal arg_v3i32
+; MIPS32-NEXT: nop
+; MIPS32-NEXT: jal ret_v3i32
+; MIPS32-NEXT: nop
+; MIPS32-NEXT: sw $4, 8($16)
+; MIPS32-NEXT: sw $3, 4($16)
+; MIPS32-NEXT: sw $2, 0($16)
+; MIPS32-NEXT: lw $16, 16($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: jr $ra
+; MIPS32-NEXT: addiu $sp, $sp, 24
+ %v1 = load <3 x i32>, ptr %p
+ call void @arg_v3i32(<3 x i32> %v1)
+ %v2 = call <3 x i32> @ret_v3i32()
+ store <3 x i32> %v2, ptr %p
+ ret void
+}
+
+define void @arg_v4i32(<4 x i32> %vec, ptr %p) {
+; MIPS64-LABEL: arg_v4i32:
+; MIPS64: # %bb.0:
+; MIPS64-NEXT: sd $5, 8($6)
+; MIPS64-NEXT: jr $ra
+; MIPS64-NEXT: sd $4, 0($6)
+;
+; MIPS32-LABEL: arg_v4i32:
+; MIPS32: # %bb.0:
+; MIPS32-NEXT: lw $1, 16($sp)
+; MIPS32-NEXT: sw $7, 12($1)
+; MIPS32-NEXT: sw $6, 8($1)
+; MIPS32-NEXT: sw $5, 4($1)
+; MIPS32-NEXT: jr $ra
+; MIPS32-NEXT: sw $4, 0($1)
+ store <4 x i32> %vec, ptr %p
+ ret void
+}
+
+define <4 x i32> @ret_v4i32(ptr %p) {
+; MIPS64-LABEL: ret_v4i32:
+; MIPS64: # %bb.0:
+; MIPS64-NEXT: ld $2, 0($4)
+; MIPS64-NEXT: ld $3, 8($4)
+; MIPS64-NEXT: jr $ra
+; MIPS64-NEXT: nop
+;
+; MIPS32-LABEL: ret_v4i32:
+; MIPS32: # %bb.0:
+; MIPS32-NEXT: lw $2, 0($4)
+; MIPS32-NEXT: lw $3, 4($4)
+; MIPS32-NEXT: lw $1, 8($4)
+; MIPS32-NEXT: lw $5, 12($4)
+; MIPS32-NEXT: jr $ra
+; MIPS32-NEXT: move $4, $1
+ %v = load <4 x i32>, ptr %p
+ ret <4 x i32> %v
+}
+
+define void @call_v4i32(ptr %p) nounwind {
+; MIPS64-LABEL: call_v4i32:
+; MIPS64: # %bb.0:
+; MIPS64-NEXT: daddiu $sp, $sp, -16
+; MIPS64-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill
+; MIPS64-NEXT: sd $16, 0($sp) # 8-byte Folded Spill
+; MIPS64-NEXT: move $16, $4
+; MIPS64-NEXT: ld $5, 8($4)
+; MIPS64-NEXT: ld $4, 0($4)
+; MIPS64-NEXT: jal arg_v4i32
+; MIPS64-NEXT: nop
+; MIPS64-NEXT: jal ret_v4i32
+; MIPS64-NEXT: nop
+; MIPS64-NEXT: sd $3, 8($16)
+; MIPS64-NEXT: sd $2, 0($16)
+; MIPS64-NEXT: ld $16, 0($sp) # 8-byte Folded Reload
+; MIPS64-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload
+; MIPS64-NEXT: jr $ra
+; MIPS64-NEXT: daddiu $sp, $sp, 16
+;
+; MIPS32-LABEL: call_v4i32:
+; MIPS32: # %bb.0:
+; MIPS32-NEXT: addiu $sp, $sp, -24
+; MIPS32-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $16, 16($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: move $16, $4
+; MIPS32-NEXT: lw $7, 12($4)
+; MIPS32-NEXT: lw $6, 8($4)
+; MIPS32-NEXT: lw $5, 4($4)
+; MIPS32-NEXT: lw $4, 0($4)
+; MIPS32-NEXT: jal arg_v4i32
+; MIPS32-NEXT: nop
+; MIPS32-NEXT: jal ret_v4i32
+; MIPS32-NEXT: nop
+; MIPS32-NEXT: sw $5, 12($16)
+; MIPS32-NEXT: sw $4, 8($16)
+; MIPS32-NEXT: sw $3, 4($16)
+; MIPS32-NEXT: sw $2, 0($16)
+; MIPS32-NEXT: lw $16, 16($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: jr $ra
+; MIPS32-NEXT: addiu $sp, $sp, 24
+ %v1 = load <4 x i32>, ptr %p
+ call void @arg_v4i32(<4 x i32> %v1)
+ %v2 = call <4 x i32> @ret_v4i32()
+ store <4 x i32> %v2, ptr %p
+ ret void
+}
+
+define void @arg_v5i32(<5 x i32> %vec, ptr %p) {
+; MIPS64-LABEL: arg_v5i32:
+; MIPS64: # %bb.0:
+; MIPS64-NEXT: daddiu $1, $zero, 1
+; MIPS64-NEXT: dsll $1, $1, 32
+; MIPS64-NEXT: daddiu $1, $1, -1
+; MIPS64-NEXT: and $2, $7, $1
+; MIPS64-NEXT: dsll $3, $6, 32
+; MIPS64-NEXT: or $2, $2, $3
+; MIPS64-NEXT: sw $8, 16($9)
+; MIPS64-NEXT: sd $2, 8($9)
+; MIPS64-NEXT: and $1, $5, $1
+; MIPS64-NEXT: dsll $2, $4, 32
+; MIPS64-NEXT: or $1, $1, $2
+; MIPS64-NEXT: jr $ra
+; MIPS64-NEXT: sd $1, 0($9)
+;
+; MIPS32-LABEL: arg_v5i32:
+; MIPS32: # %bb.0:
+; MIPS32-NEXT: lw $1, 20($sp)
+; MIPS32-NEXT: lw $2, 16($sp)
+; MIPS32-NEXT: sw $2, 16($1)
+; MIPS32-NEXT: sw $7, 12($1)
+; MIPS32-NEXT: sw $6, 8($1)
+; MIPS32-NEXT: sw $5, 4($1)
+; MIPS32-NEXT: jr $ra
+; MIPS32-NEXT: sw $4, 0($1)
+ store <5 x i32> %vec, ptr %p
+ ret void
+}
+
+define <5 x i32> @ret_v5i32(ptr %p) {
+; MIPS64-LABEL: ret_v5i32:
+; MIPS64: # %bb.0:
+; MIPS64-NEXT: lw $1, 16($5)
+; MIPS64-NEXT: sw $1, 16($4)
+; MIPS64-NEXT: ld $1, 8($5)
+; MIPS64-NEXT: sd $1, 8($4)
+; MIPS64-NEXT: ld $1, 0($5)
+; MIPS64-NEXT: jr $ra
+; MIPS64-NEXT: sd $1, 0($4)
+;
+; MIPS32-LABEL: ret_v5i32:
+; MIPS32: # %bb.0:
+; MIPS32-NEXT: lw $1, 12($5)
+; MIPS32-NEXT: lw $2, 16($5)
+; MIPS32-NEXT: sw $2, 16($4)
+; MIPS32-NEXT: sw $1, 12($4)
+; MIPS32-NEXT: lw $1, 8($5)
+; MIPS32-NEXT: sw $1, 8($4)
+; MIPS32-NEXT: lw $1, 4($5)
+; MIPS32-NEXT: sw $1, 4($4)
+; MIPS32-NEXT: lw $1, 0($5)
+; MIPS32-NEXT: jr $ra
+; MIPS32-NEXT: sw $1, 0($4)
+ %v = load <5 x i32>, ptr %p
+ ret <5 x i32> %v
+}
+
+define void @call_v5i32(ptr %p) nounwind {
+; MIPS64-LABEL: call_v5i32:
+; MIPS64: # %bb.0:
+; MIPS64-NEXT: daddiu $sp, $sp, -64
+; MIPS64-NEXT: sd $ra, 56($sp) # 8-byte Folded Spill
+; MIPS64-NEXT: sd $fp, 48($sp) # 8-byte Folded Spill
+; MIPS64-NEXT: sd $16, 40($sp) # 8-byte Folded Spill
+; MIPS64-NEXT: move $fp, $sp
+; MIPS64-NEXT: daddiu $1, $zero, -32
+; MIPS64-NEXT: and $sp, $sp, $1
+; MIPS64-NEXT: move $16, $4
+; MIPS64-NEXT: lw $8, 16($4)
+; MIPS64-NEXT: ld $7, 8($4)
+; MIPS64-NEXT: ld $5, 0($4)
+; MIPS64-NEXT: dsrl $4, $5, 32
+; MIPS64-NEXT: jal arg_v5i32
+; MIPS64-NEXT: dsrl $6, $7, 32
+; MIPS64-NEXT: jal ret_v5i32
+; MIPS64-NEXT: daddiu $4, $sp, 0
+; MIPS64-NEXT: lw $1, 16($sp)
+; MIPS64-NEXT: ld $2, 0($sp)
+; MIPS64-NEXT: sd $2, 0($16)
+; MIPS64-NEXT: ld $2, 8($sp)
+; MIPS64-NEXT: sd $2, 8($16)
+; MIPS64-NEXT: sw $1, 16($16)
+; MIPS64-NEXT: move $sp, $fp
+; MIPS64-NEXT: ld $16, 40($sp) # 8-byte Folded Reload
+; MIPS64-NEXT: ld $fp, 48($sp) # 8-byte Folded Reload
+; MIPS64-NEXT: ld $ra, 56($sp) # 8-byte Folded Reload
+; MIPS64-NEXT: jr $ra
+; MIPS64-NEXT: daddiu $sp, $sp, 64
+;
+; MIPS32-LABEL: call_v5i32:
+; MIPS32: # %bb.0:
+; MIPS32-NEXT: addiu $sp, $sp, -96
+; MIPS32-NEXT: sw $ra, 92($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $fp, 88($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $16, 84($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: move $fp, $sp
+; MIPS32-NEXT: addiu $1, $zero, -32
+; MIPS32-NEXT: and $sp, $sp, $1
+; MIPS32-NEXT: move $16, $4
+; MIPS32-NEXT: lw $7, 12($4)
+; MIPS32-NEXT: lw $6, 8($4)
+; MIPS32-NEXT: lw $5, 4($4)
+; MIPS32-NEXT: lw $4, 0($4)
+; MIPS32-NEXT: lw $1, 16($16)
+; MIPS32-NEXT: jal arg_v5i32
+; MIPS32-NEXT: sw $1, 16($sp)
+; MIPS32-NEXT: jal ret_v5i32
+; MIPS32-NEXT: addiu $4, $sp, 32
+; MIPS32-NEXT: lw $1, 36($sp)
+; MIPS32-NEXT: lw $2, 32($sp)
+; MIPS32-NEXT: sw $2, 0($16)
+; MIPS32-NEXT: sw $1, 4($16)
+; MIPS32-NEXT: lw $1, 40($sp)
+; MIPS32-NEXT: sw $1, 8($16)
+; MIPS32-NEXT: lw $1, 44($sp)
+; MIPS32-NEXT: sw $1, 12($16)
+; MIPS32-NEXT: lw $1, 48($sp)
+; MIPS32-NEXT: sw $1, 16($16)
+; MIPS32-NEXT: move $sp, $fp
+; MIPS32-NEXT: lw $16, 84($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $fp, 88($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $ra, 92($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: jr $ra
+; MIPS32-NEXT: addiu $sp, $sp, 96
+ %v1 = load <5 x i32>, ptr %p
+ call void @arg_v5i32(<5 x i32> %v1)
+ %v2 = call <5 x i32> @ret_v5i32()
+ store <5 x i32> %v2, ptr %p
+ ret void
+}
+
+define void @arg_v8i32(<8 x i32> %vec, ptr %p) {
+; MIPS64-LABEL: arg_v8i32:
+; MIPS64: # %bb.0:
+; MIPS64-NEXT: sd $7, 24($8)
+; MIPS64-NEXT: sd $6, 16($8)
+; MIPS64-NEXT: sd $5, 8($8)
+; MIPS64-NEXT: jr $ra
+; MIPS64-NEXT: sd $4, 0($8)
+;
+; MIPS32-LABEL: arg_v8i32:
+; MIPS32: # %bb.0:
+; MIPS32-NEXT: lw $1, 16($sp)
+; MIPS32-NEXT: lw $2, 20($sp)
+; MIPS32-NEXT: lw $3, 24($sp)
+; MIPS32-NEXT: lw $8, 32($sp)
+; MIPS32-NEXT: lw $9, 28($sp)
+; MIPS32-NEXT: sw $9, 28($8)
+; MIPS32-NEXT: sw $3, 24($8)
+; MIPS32-NEXT: sw $2, 20($8)
+; MIPS32-NEXT: sw $1, 16($8)
+; MIPS32-NEXT: sw $7, 12($8)
+; MIPS32-NEXT: sw $6, 8($8)
+; MIPS32-NEXT: sw $5, 4($8)
+; MIPS32-NEXT: jr $ra
+; MIPS32-NEXT: sw $4, 0($8)
+ store <8 x i32> %vec, ptr %p
+ ret void
+}
+
+define <8 x i32> @ret_v8i32(ptr %p) {
+; MIPS64-LABEL: ret_v8i32:
+; MIPS64: # %bb.0:
+; MIPS64-NEXT: ld $1, 24($5)
+; MIPS64-NEXT: sd $1, 24($4)
+; MIPS64-NEXT: ld $1, 16($5)
+; MIPS64-NEXT: sd $1, 16($4)
+; MIPS64-NEXT: ld $1, 8($5)
+; MIPS64-NEXT: sd $1, 8($4)
+; MIPS64-NEXT: ld $1, 0($5)
+; MIPS64-NEXT: jr $ra
+; MIPS64-NEXT: sd $1, 0($4)
+;
+; MIPS32-LABEL: ret_v8i32:
+; MIPS32: # %bb.0:
+; MIPS32-NEXT: lw $1, 12($5)
+; MIPS32-NEXT: lw $2, 16($5)
+; MIPS32-NEXT: lw $3, 20($5)
+; MIPS32-NEXT: lw $6, 24($5)
+; MIPS32-NEXT: lw $7, 28($5)
+; MIPS32-NEXT: sw $7, 28($4)
+; MIPS32-NEXT: sw $6, 24($4)
+; MIPS32-NEXT: sw $3, 20($4)
+; MIPS32-NEXT: sw $2, 16($4)
+; MIPS32-NEXT: sw $1, 12($4)
+; MIPS32-NEXT: lw $1, 8($5)
+; MIPS32-NEXT: sw $1, 8($4)
+; MIPS32-NEXT: lw $1, 4($5)
+; MIPS32-NEXT: sw $1, 4($4)
+; MIPS32-NEXT: lw $1, 0($5)
+; MIPS32-NEXT: jr $ra
+; MIPS32-NEXT: sw $1, 0($4)
+ %v = load <8 x i32>, ptr %p
+ ret <8 x i32> %v
+}
+
+define void @call_v8i32(ptr %p) nounwind {
+; MIPS64-LABEL: call_v8i32:
+; MIPS64: # %bb.0:
+; MIPS64-NEXT: daddiu $sp, $sp, -64
+; MIPS64-NEXT: sd $ra, 56($sp) # 8-byte Folded Spill
+; MIPS64-NEXT: sd $fp, 48($sp) # 8-byte Folded Spill
+; MIPS64-NEXT: sd $16, 40($sp) # 8-byte Folded Spill
+; MIPS64-NEXT: move $fp, $sp
+; MIPS64-NEXT: daddiu $1, $zero, -32
+; MIPS64-NEXT: move $16, $4
+; MIPS64-NEXT: ld $7, 24($4)
+; MIPS64-NEXT: ld $6, 16($4)
+; MIPS64-NEXT: ld $5, 8($4)
+; MIPS64-NEXT: ld $4, 0($4)
+; MIPS64-NEXT: jal arg_v8i32
+; MIPS64-NEXT: and $sp, $sp, $1
+; MIPS64-NEXT: jal ret_v8i32
+; MIPS64-NEXT: daddiu $4, $sp, 0
+; MIPS64-NEXT: ld $1, 0($sp)
+; MIPS64-NEXT: lw $2, 16($sp)
+; MIPS64-NEXT: lw $3, 20($sp)
+; MIPS64-NEXT: lw $4, 24($sp)
+; MIPS64-NEXT: lw $5, 28($sp)
+; MIPS64-NEXT: sw $5, 28($16)
+; MIPS64-NEXT: sw $4, 24($16)
+; MIPS64-NEXT: sw $3, 20($16)
+; MIPS64-NEXT: sw $2, 16($16)
+; MIPS64-NEXT: lw $2, 12($sp)
+; MIPS64-NEXT: sw $2, 12($16)
+; MIPS64-NEXT: lw $2, 8($sp)
+; MIPS64-NEXT: sw $2, 8($16)
+; MIPS64-NEXT: sd $1, 0($16)
+; MIPS64-NEXT: move $sp, $fp
+; MIPS64-NEXT: ld $16, 40($sp) # 8-byte Folded Reload
+; MIPS64-NEXT: ld $fp, 48($sp) # 8-byte Folded Reload
+; MIPS64-NEXT: ld $ra, 56($sp) # 8-byte Folded Reload
+; MIPS64-NEXT: jr $ra
+; MIPS64-NEXT: daddiu $sp, $sp, 64
+;
+; MIPS32-LABEL: call_v8i32:
+; MIPS32: # %bb.0:
+; MIPS32-NEXT: addiu $sp, $sp, -96
+; MIPS32-NEXT: sw $ra, 92($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $fp, 88($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $16, 84($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: move $fp, $sp
+; MIPS32-NEXT: addiu $1, $zero, -32
+; MIPS32-NEXT: and $sp, $sp, $1
+; MIPS32-NEXT: move $16, $4
+; MIPS32-NEXT: lw $7, 12($4)
+; MIPS32-NEXT: lw $6, 8($4)
+; MIPS32-NEXT: lw $5, 4($4)
+; MIPS32-NEXT: lw $4, 0($4)
+; MIPS32-NEXT: lw $1, 16($16)
+; MIPS32-NEXT: lw $2, 20($16)
+; MIPS32-NEXT: lw $3, 24($16)
+; MIPS32-NEXT: lw $8, 28($16)
+; MIPS32-NEXT: sw $8, 28($sp)
+; MIPS32-NEXT: sw $3, 24($sp)
+; MIPS32-NEXT: sw $2, 20($sp)
+; MIPS32-NEXT: jal arg_v8i32
+; MIPS32-NEXT: sw $1, 16($sp)
+; MIPS32-NEXT: jal ret_v8i32
+; MIPS32-NEXT: addiu $4, $sp, 32
+; MIPS32-NEXT: lw $1, 44($sp)
+; MIPS32-NEXT: lw $2, 48($sp)
+; MIPS32-NEXT: lw $3, 52($sp)
+; MIPS32-NEXT: lw $4, 56($sp)
+; MIPS32-NEXT: lw $5, 60($sp)
+; MIPS32-NEXT: sw $5, 28($16)
+; MIPS32-NEXT: sw $4, 24($16)
+; MIPS32-NEXT: sw $3, 20($16)
+; MIPS32-NEXT: sw $2, 16($16)
+; MIPS32-NEXT: sw $1, 12($16)
+; MIPS32-NEXT: lw $1, 40($sp)
+; MIPS32-NEXT: sw $1, 8($16)
+; MIPS32-NEXT: lw $1, 36($sp)
+; MIPS32-NEXT: sw $1, 4($16)
+; MIPS32-NEXT: lw $1, 32($sp)
+; MIPS32-NEXT: sw $1, 0($16)
+; MIPS32-NEXT: move $sp, $fp
+; MIPS32-NEXT: lw $16, 84($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $fp, 88($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $ra, 92($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: jr $ra
+; MIPS32-NEXT: addiu $sp, $sp, 96
+ %v1 = load <8 x i32>, ptr %p
+ call void @arg_v8i32(<8 x i32> %v1)
+ %v2 = call <8 x i32> @ret_v8i32()
+ store <8 x i32> %v2, ptr %p
+ ret void
+}
+
+define void @arg_v3i24(<3 x i24> %vec, ptr %p) {
+; MIPS64-LABEL: arg_v3i24:
+; MIPS64: # %bb.0:
+; MIPS64-NEXT: sll $1, $6, 0
+; MIPS64-NEXT: srl $2, $1, 8
+; MIPS64-NEXT: sll $3, $4, 0
+; MIPS64-NEXT: sll $4, $5, 0
+; MIPS64-NEXT: sb $1, 8($7)
+; MIPS64-NEXT: sb $4, 5($7)
+; MIPS64-NEXT: sb $3, 2($7)
+; MIPS64-NEXT: sh $2, 6($7)
+; MIPS64-NEXT: srl $1, $4, 8
+; MIPS64-NEXT: sb $1, 4($7)
+; MIPS64-NEXT: srl $1, $4, 16
+; MIPS64-NEXT: sb $1, 3($7)
+; MIPS64-NEXT: srl $1, $3, 8
+; MIPS64-NEXT: jr $ra
+; MIPS64-NEXT: sh $1, 0($7)
+;
+; MIPS32-LABEL: arg_v3i24:
+; MIPS32: # %bb.0:
+; MIPS32-NEXT: srl $1, $6, 8
+; MIPS32-NEXT: sb $6, 8($7)
+; MIPS32-NEXT: sb $5, 5($7)
+; MIPS32-NEXT: sb $4, 2($7)
+; MIPS32-NEXT: sh $1, 6($7)
+; MIPS32-NEXT: srl $1, $5, 8
+; MIPS32-NEXT: sb $1, 4($7)
+; MIPS32-NEXT: srl $1, $5, 16
+; MIPS32-NEXT: sb $1, 3($7)
+; MIPS32-NEXT: srl $1, $4, 8
+; MIPS32-NEXT: jr $ra
+; MIPS32-NEXT: sh $1, 0($7)
+ store <3 x i24> %vec, ptr %p
+ ret void
+}
+
+define <3 x i24> @ret_v3i24(ptr %p) {
+; MIPS64-LABEL: ret_v3i24:
+; MIPS64: # %bb.0:
+; MIPS64-NEXT: lbu $1, 8($4)
+; MIPS64-NEXT: lh $2, 6($4)
+; MIPS64-NEXT: sll $3, $2, 8
+; MIPS64-NEXT: lbu $2, 2($4)
+; MIPS64-NEXT: lhu $5, 0($4)
+; MIPS64-NEXT: sll $5, $5, 8
+; MIPS64-NEXT: or $2, $2, $5
+; MIPS64-NEXT: or $1, $1, $3
+; MIPS64-NEXT: lbu $3, 4($4)
+; MIPS64-NEXT: sll $3, $3, 8
+; MIPS64-NEXT: lb $5, 3($4)
+; MIPS64-NEXT: sll $5, $5, 16
+; MIPS64-NEXT: or $3, $5, $3
+; MIPS64-NEXT: lbu $4, 5($4)
+; MIPS64-NEXT: or $3, $4, $3
+; MIPS64-NEXT: jr $ra
+; MIPS64-NEXT: move $4, $1
+;
+; MIPS32-LABEL: ret_v3i24:
+; MIPS32: # %bb.0:
+; MIPS32-NEXT: lbu $1, 8($4)
+; MIPS32-NEXT: lh $2, 6($4)
+; MIPS32-NEXT: sll $3, $2, 8
+; MIPS32-NEXT: lbu $2, 2($4)
+; MIPS32-NEXT: lhu $5, 0($4)
+; MIPS32-NEXT: sll $5, $5, 8
+; MIPS32-NEXT: or $2, $2, $5
+; MIPS32-NEXT: or $1, $1, $3
+; MIPS32-NEXT: lbu $3, 4($4)
+; MIPS32-NEXT: sll $3, $3, 8
+; MIPS32-NEXT: lb $5, 3($4)
+; MIPS32-NEXT: sll $5, $5, 16
+; MIPS32-NEXT: or $3, $5, $3
+; MIPS32-NEXT: lbu $4, 5($4)
+; MIPS32-NEXT: or $3, $4, $3
+; MIPS32-NEXT: jr $ra
+; MIPS32-NEXT: move $4, $1
+ %v = load <3 x i24>, ptr %p
+ ret <3 x i24> %v
+}
+
+define void @call_v3i24(ptr %p) nounwind {
+; MIPS64-LABEL: call_v3i24:
+; MIPS64: # %bb.0:
+; MIPS64-NEXT: daddiu $sp, $sp, -16
+; MIPS64-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill
+; MIPS64-NEXT: sd $16, 0($sp) # 8-byte Folded Spill
+; MIPS64-NEXT: move $16, $4
+; MIPS64-NEXT: lbu $1, 4($4)
+; MIPS64-NEXT: lbu $2, 8($4)
+; MIPS64-NEXT: lh $3, 6($4)
+; MIPS64-NEXT: dsll $3, $3, 8
+; MIPS64-NEXT: lbu $4, 2($4)
+; MIPS64-NEXT: lh $5, 0($16)
+; MIPS64-NEXT: dsll $5, $5, 8
+; MIPS64-NEXT: or $4, $4, $5
+; MIPS64-NEXT: or $6, $2, $3
+; MIPS64-NEXT: dsll $1, $1, 8
+; MIPS64-NEXT: lb $2, 3($16)
+; MIPS64-NEXT: dsll $2, $2, 16
+; MIPS64-NEXT: or $1, $2, $1
+; MIPS64-NEXT: lbu $2, 5($16)
+; MIPS64-NEXT: jal arg_v3i24
+; MIPS64-NEXT: or $5, $2, $1
+; MIPS64-NEXT: jal ret_v3i24
+; MIPS64-NEXT: nop
+; MIPS64-NEXT: srl $1, $4, 8
+; MIPS64-NEXT: sb $4, 8($16)
+; MIPS64-NEXT: sb $3, 5($16)
+; MIPS64-NEXT: sb $2, 2($16)
+; MIPS64-NEXT: sh $1, 6($16)
+; MIPS64-NEXT: srl $1, $3, 8
+; MIPS64-NEXT: sb $1, 4($16)
+; MIPS64-NEXT: srl $1, $3, 16
+; MIPS64-NEXT: sb $1, 3($16)
+; MIPS64-NEXT: srl $1, $2, 8
+; MIPS64-NEXT: sh $1, 0($16)
+; MIPS64-NEXT: ld $16, 0($sp) # 8-byte Folded Reload
+; MIPS64-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload
+; MIPS64-NEXT: jr $ra
+; MIPS64-NEXT: daddiu $sp, $sp, 16
+;
+; MIPS32-LABEL: call_v3i24:
+; MIPS32: # %bb.0:
+; MIPS32-NEXT: addiu $sp, $sp, -24
+; MIPS32-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $16, 16($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: move $16, $4
+; MIPS32-NEXT: lbu $1, 4($4)
+; MIPS32-NEXT: lbu $2, 8($4)
+; MIPS32-NEXT: lh $3, 6($4)
+; MIPS32-NEXT: sll $3, $3, 8
+; MIPS32-NEXT: lbu $4, 2($4)
+; MIPS32-NEXT: lhu $5, 0($16)
+; MIPS32-NEXT: sll $5, $5, 8
+; MIPS32-NEXT: or $4, $4, $5
+; MIPS32-NEXT: or $6, $2, $3
+; MIPS32-NEXT: sll $1, $1, 8
+; MIPS32-NEXT: lb $2, 3($16)
+; MIPS32-NEXT: sll $2, $2, 16
+; MIPS32-NEXT: or $1, $2, $1
+; MIPS32-NEXT: lbu $2, 5($16)
+; MIPS32-NEXT: jal arg_v3i24
+; MIPS32-NEXT: or $5, $2, $1
+; MIPS32-NEXT: jal ret_v3i24
+; MIPS32-NEXT: nop
+; MIPS32-NEXT: srl $1, $4, 8
+; MIPS32-NEXT: sb $4, 8($16)
+; MIPS32-NEXT: sb $3, 5($16)
+; MIPS32-NEXT: sb $2, 2($16)
+; MIPS32-NEXT: sh $1, 6($16)
+; MIPS32-NEXT: srl $1, $3, 8
+; MIPS32-NEXT: sb $1, 4($16)
+; MIPS32-NEXT: srl $1, $3, 16
+; MIPS32-NEXT: sb $1, 3($16)
+; MIPS32-NEXT: srl $1, $2, 8
+; MIPS32-NEXT: sh $1, 0($16)
+; MIPS32-NEXT: lw $16, 16($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: jr $ra
+; MIPS32-NEXT: addiu $sp, $sp, 24
+ %v1 = load <3 x i24>, ptr %p
+ call void @arg_v3i24(<3 x i24> %v1)
+ %v2 = call <3 x i24> @ret_v3i24()
+ store <3 x i24> %v2, ptr %p
+ ret void
+}
+
+define void @arg_v4i24(<4 x i24> %vec, ptr %p) {
+; MIPS64-LABEL: arg_v4i24:
+; MIPS64: # %bb.0:
+; MIPS64-NEXT: sll $1, $7, 0
+; MIPS64-NEXT: sll $2, $6, 0
+; MIPS64-NEXT: srl $3, $2, 8
+; MIPS64-NEXT: srl $6, $1, 16
+; MIPS64-NEXT: srl $7, $1, 8
+; MIPS64-NEXT: sll $4, $4, 0
+; MIPS64-NEXT: sll $5, $5, 0
+; MIPS64-NEXT: sb $1, 11($8)
+; MIPS64-NEXT: sb $2, 8($8)
+; MIPS64-NEXT: sb $5, 5($8)
+; MIPS64-NEXT: sb $4, 2($8)
+; MIPS64-NEXT: sb $7, 10($8)
+; MIPS64-NEXT: sb $6, 9($8)
+; MIPS64-NEXT: sh $3, 6($8)
+; MIPS64-NEXT: srl $1, $5, 8
+; MIPS64-NEXT: sb $1, 4($8)
+; MIPS64-NEXT: srl $1, $5, 16
+; MIPS64-NEXT: sb $1, 3($8)
+; MIPS64-NEXT: srl $1, $4, 8
+; MIPS64-NEXT: jr $ra
+; MIPS64-NEXT: sh $1, 0($8)
+;
+; MIPS32-LABEL: arg_v4i24:
+; MIPS32: # %bb.0:
+; MIPS32-NEXT: srl $1, $6, 8
+; MIPS32-NEXT: srl $2, $7, 16
+; MIPS32-NEXT: srl $3, $7, 8
+; MIPS32-NEXT: lw $8, 16($sp)
+; MIPS32-NEXT: sb $7, 11($8)
+; MIPS32-NEXT: sb $6, 8($8)
+; MIPS32-NEXT: sb $5, 5($8)
+; MIPS32-NEXT: sb $4, 2($8)
+; MIPS32-NEXT: sb $3, 10($8)
+; MIPS32-NEXT: sb $2, 9($8)
+; MIPS32-NEXT: sh $1, 6($8)
+; MIPS32-NEXT: srl $1, $5, 8
+; MIPS32-NEXT: sb $1, 4($8)
+; MIPS32-NEXT: srl $1, $5, 16
+; MIPS32-NEXT: sb $1, 3($8)
+; MIPS32-NEXT: srl $1, $4, 8
+; MIPS32-NEXT: jr $ra
+; MIPS32-NEXT: sh $1, 0($8)
+ store <4 x i24> %vec, ptr %p
+ ret void
+}
+
+define <4 x i24> @ret_v4i24(ptr %p) {
+; MIPS64-LABEL: ret_v4i24:
+; MIPS64: # %bb.0:
+; MIPS64-NEXT: lbu $1, 4($4)
+; MIPS64-NEXT: sll $1, $1, 8
+; MIPS64-NEXT: lbu $2, 3($4)
+; MIPS64-NEXT: sll $2, $2, 16
+; MIPS64-NEXT: or $3, $2, $1
+; MIPS64-NEXT: lbu $5, 5($4)
+; MIPS64-NEXT: lbu $1, 8($4)
+; MIPS64-NEXT: lhu $2, 6($4)
+; MIPS64-NEXT: sll $6, $2, 8
+; MIPS64-NEXT: lbu $2, 2($4)
+; MIPS64-NEXT: lhu $7, 0($4)
+; MIPS64-NEXT: sll $7, $7, 8
+; MIPS64-NEXT: or $2, $2, $7
+; MIPS64-NEXT: or $1, $1, $6
+; MIPS64-NEXT: or $3, $5, $3
+; MIPS64-NEXT: lbu $5, 10($4)
+; MIPS64-NEXT: sll $5, $5, 8
+; MIPS64-NEXT: lbu $6, 9($4)
+; MIPS64-NEXT: sll $6, $6, 16
+; MIPS64-NEXT: or $5, $6, $5
+; MIPS64-NEXT: lbu $4, 11($4)
+; MIPS64-NEXT: or $5, $4, $5
+; MIPS64-NEXT: jr $ra
+; MIPS64-NEXT: move $4, $1
+;
+; MIPS32-LABEL: ret_v4i24:
+; MIPS32: # %bb.0:
+; MIPS32-NEXT: lbu $1, 4($4)
+; MIPS32-NEXT: sll $1, $1, 8
+; MIPS32-NEXT: lbu $2, 3($4)
+; MIPS32-NEXT: sll $2, $2, 16
+; MIPS32-NEXT: or $3, $2, $1
+; MIPS32-NEXT: lbu $5, 5($4)
+; MIPS32-NEXT: lbu $1, 8($4)
+; MIPS32-NEXT: lhu $2, 6($4)
+; MIPS32-NEXT: sll $6, $2, 8
+; MIPS32-NEXT: lbu $2, 2($4)
+; MIPS32-NEXT: lhu $7, 0($4)
+; MIPS32-NEXT: sll $7, $7, 8
+; MIPS32-NEXT: or $2, $2, $7
+; MIPS32-NEXT: or $1, $1, $6
+; MIPS32-NEXT: or $3, $5, $3
+; MIPS32-NEXT: lbu $5, 10($4)
+; MIPS32-NEXT: sll $5, $5, 8
+; MIPS32-NEXT: lbu $6, 9($4)
+; MIPS32-NEXT: sll $6, $6, 16
+; MIPS32-NEXT: or $5, $6, $5
+; MIPS32-NEXT: lbu $4, 11($4)
+; MIPS32-NEXT: or $5, $4, $5
+; MIPS32-NEXT: jr $ra
+; MIPS32-NEXT: move $4, $1
+ %v = load <4 x i24>, ptr %p
+ ret <4 x i24> %v
+}
+
+define void @call_v4i24(ptr %p) nounwind {
+; MIPS64-LABEL: call_v4i24:
+; MIPS64: # %bb.0:
+; MIPS64-NEXT: daddiu $sp, $sp, -16
+; MIPS64-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill
+; MIPS64-NEXT: sd $16, 0($sp) # 8-byte Folded Spill
+; MIPS64-NEXT: move $16, $4
+; MIPS64-NEXT: lbu $1, 4($4)
+; MIPS64-NEXT: dsll $1, $1, 8
+; MIPS64-NEXT: lb $2, 3($4)
+; MIPS64-NEXT: dsll $2, $2, 16
+; MIPS64-NEXT: or $1, $2, $1
+; MIPS64-NEXT: lbu $2, 10($4)
+; MIPS64-NEXT: lbu $3, 5($4)
+; MIPS64-NEXT: lbu $5, 8($4)
+; MIPS64-NEXT: lh $4, 6($4)
+; MIPS64-NEXT: dsll $6, $4, 8
+; MIPS64-NEXT: lbu $4, 2($16)
+; MIPS64-NEXT: lh $7, 0($16)
+; MIPS64-NEXT: dsll $7, $7, 8
+; MIPS64-NEXT: or $4, $4, $7
+; MIPS64-NEXT: or $6, $5, $6
+; MIPS64-NEXT: or $5, $3, $1
+; MIPS64-NEXT: dsll $1, $2, 8
+; MIPS64-NEXT: lb $2, 9($16)
+; MIPS64-NEXT: dsll $2, $2, 16
+; MIPS64-NEXT: or $1, $2, $1
+; MIPS64-NEXT: lbu $2, 11($16)
+; MIPS64-NEXT: jal arg_v4i24
+; MIPS64-NEXT: or $7, $2, $1
+; MIPS64-NEXT: jal ret_v4i24
+; MIPS64-NEXT: nop
+; MIPS64-NEXT: srl $1, $4, 8
+; MIPS64-NEXT: srl $6, $5, 16
+; MIPS64-NEXT: srl $7, $5, 8
+; MIPS64-NEXT: sb $5, 11($16)
+; MIPS64-NEXT: sb $4, 8($16)
+; MIPS64-NEXT: sb $3, 5($16)
+; MIPS64-NEXT: sb $2, 2($16)
+; MIPS64-NEXT: sb $7, 10($16)
+; MIPS64-NEXT: sb $6, 9($16)
+; MIPS64-NEXT: sh $1, 6($16)
+; MIPS64-NEXT: srl $1, $3, 8
+; MIPS64-NEXT: sb $1, 4($16)
+; MIPS64-NEXT: srl $1, $3, 16
+; MIPS64-NEXT: sb $1, 3($16)
+; MIPS64-NEXT: srl $1, $2, 8
+; MIPS64-NEXT: sh $1, 0($16)
+; MIPS64-NEXT: ld $16, 0($sp) # 8-byte Folded Reload
+; MIPS64-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload
+; MIPS64-NEXT: jr $ra
+; MIPS64-NEXT: daddiu $sp, $sp, 16
+;
+; MIPS32-LABEL: call_v4i24:
+; MIPS32: # %bb.0:
+; MIPS32-NEXT: addiu $sp, $sp, -24
+; MIPS32-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $16, 16($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: move $16, $4
+; MIPS32-NEXT: lbu $1, 4($4)
+; MIPS32-NEXT: sll $1, $1, 8
+; MIPS32-NEXT: lbu $2, 3($4)
+; MIPS32-NEXT: sll $2, $2, 16
+; MIPS32-NEXT: or $1, $2, $1
+; MIPS32-NEXT: lbu $2, 10($4)
+; MIPS32-NEXT: lbu $3, 5($4)
+; MIPS32-NEXT: lbu $5, 8($4)
+; MIPS32-NEXT: lhu $4, 6($4)
+; MIPS32-NEXT: sll $6, $4, 8
+; MIPS32-NEXT: lbu $4, 2($16)
+; MIPS32-NEXT: lhu $7, 0($16)
+; MIPS32-NEXT: sll $7, $7, 8
+; MIPS32-NEXT: or $4, $4, $7
+; MIPS32-NEXT: or $6, $5, $6
+; MIPS32-NEXT: or $5, $3, $1
+; MIPS32-NEXT: sll $1, $2, 8
+; MIPS32-NEXT: lbu $2, 9($16)
+; MIPS32-NEXT: sll $2, $2, 16
+; MIPS32-NEXT: or $1, $2, $1
+; MIPS32-NEXT: lbu $2, 11($16)
+; MIPS32-NEXT: jal arg_v4i24
+; MIPS32-NEXT: or $7, $2, $1
+; MIPS32-NEXT: jal ret_v4i24
+; MIPS32-NEXT: nop
+; MIPS32-NEXT: srl $1, $4, 8
+; MIPS32-NEXT: srl $6, $5, 16
+; MIPS32-NEXT: srl $7, $5, 8
+; MIPS32-NEXT: sb $5, 11($16)
+; MIPS32-NEXT: sb $4, 8($16)
+; MIPS32-NEXT: sb $3, 5($16)
+; MIPS32-NEXT: sb $2, 2($16)
+; MIPS32-NEXT: sb $7, 10($16)
+; MIPS32-NEXT: sb $6, 9($16)
+; MIPS32-NEXT: sh $1, 6($16)
+; MIPS32-NEXT: srl $1, $3, 8
+; MIPS32-NEXT: sb $1, 4($16)
+; MIPS32-NEXT: srl $1, $3, 16
+; MIPS32-NEXT: sb $1, 3($16)
+; MIPS32-NEXT: srl $1, $2, 8
+; MIPS32-NEXT: sh $1, 0($16)
+; MIPS32-NEXT: lw $16, 16($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: jr $ra
+; MIPS32-NEXT: addiu $sp, $sp, 24
+ %v1 = load <4 x i24>, ptr %p
+ call void @arg_v4i24(<4 x i24> %v1)
+ %v2 = call <4 x i24> @ret_v4i24()
+ store <4 x i24> %v2, ptr %p
+ ret void
+}
+
+define void @arg_v4i18(<4 x i18> %vec, ptr %p) {
+; MIPS64-LABEL: arg_v4i18:
+; MIPS64: # %bb.0:
+; MIPS64-NEXT: lui $1, 3
+; MIPS64-NEXT: ori $2, $1, 65535
+; MIPS64-NEXT: and $3, $5, $2
+; MIPS64-NEXT: dsll $3, $3, 36
+; MIPS64-NEXT: dsll $5, $4, 54
+; MIPS64-NEXT: or $3, $5, $3
+; MIPS64-NEXT: and $2, $6, $2
+; MIPS64-NEXT: dsll $2, $2, 18
+; MIPS64-NEXT: or $2, $3, $2
+; MIPS64-NEXT: ori $1, $1, 65280
+; MIPS64-NEXT: and $1, $7, $1
+; MIPS64-NEXT: sb $7, 8($8)
+; MIPS64-NEXT: or $1, $2, $1
+; MIPS64-NEXT: daddiu $2, $zero, 255
+; MIPS64-NEXT: dsrl $1, $1, 8
+; MIPS64-NEXT: dsll $2, $2, 56
+; MIPS64-NEXT: dsll $3, $4, 46
+; MIPS64-NEXT: and $2, $3, $2
+; MIPS64-NEXT: or $1, $2, $1
+; MIPS64-NEXT: jr $ra
+; MIPS64-NEXT: sd $1, 0($8)
+;
+; MIPS32-LABEL: arg_v4i18:
+; MIPS32: # %bb.0:
+; MIPS32-NEXT: sll $1, $4, 14
+; MIPS32-NEXT: lui $2, 63
+; MIPS32-NEXT: lui $3, 65280
+; MIPS32-NEXT: and $1, $1, $3
+; MIPS32-NEXT: ori $2, $2, 65280
+; MIPS32-NEXT: sll $3, $5, 4
+; MIPS32-NEXT: and $2, $3, $2
+; MIPS32-NEXT: sll $4, $4, 22
+; MIPS32-NEXT: or $2, $4, $2
+; MIPS32-NEXT: srl $2, $2, 8
+; MIPS32-NEXT: lui $4, 3
+; MIPS32-NEXT: or $1, $1, $2
+; MIPS32-NEXT: ori $2, $4, 65280
+; MIPS32-NEXT: and $2, $7, $2
+; MIPS32-NEXT: sll $5, $6, 18
+; MIPS32-NEXT: or $2, $5, $2
+; MIPS32-NEXT: lw $5, 16($sp)
+; MIPS32-NEXT: sb $7, 8($5)
+; MIPS32-NEXT: sw $1, 0($5)
+; MIPS32-NEXT: srl $1, $2, 8
+; MIPS32-NEXT: ori $2, $4, 49152
+; MIPS32-NEXT: and $2, $6, $2
+; MIPS32-NEXT: srl $2, $2, 14
+; MIPS32-NEXT: or $2, $3, $2
+; MIPS32-NEXT: sll $2, $2, 24
+; MIPS32-NEXT: or $1, $1, $2
+; MIPS32-NEXT: jr $ra
+; MIPS32-NEXT: sw $1, 4($5)
+ store <4 x i18> %vec, ptr %p
+ ret void
+}
+
+define <4 x i18> @ret_v4i18(ptr %p) {
+; MIPS64-LABEL: ret_v4i18:
+; MIPS64: # %bb.0:
+; MIPS64-NEXT: lbu $1, 8($4)
+; MIPS64-NEXT: ld $2, 0($4)
+; MIPS64-NEXT: dsll $6, $2, 8
+; MIPS64-NEXT: or $1, $1, $6
+; MIPS64-NEXT: sll $3, $2, 0
+; MIPS64-NEXT: sll $1, $1, 0
+; MIPS64-NEXT: srl $4, $3, 10
+; MIPS64-NEXT: lui $3, 3
+; MIPS64-NEXT: ori $5, $3, 65535
+; MIPS64-NEXT: dsrl $3, $2, 28
+; MIPS64-NEXT: sll $3, $3, 0
+; MIPS64-NEXT: lui $7, 3
+; MIPS64-NEXT: and $3, $3, $5
+; MIPS64-NEXT: and $4, $4, $5
+; MIPS64-NEXT: and $5, $1, $5
+; MIPS64-NEXT: ori $1, $7, 64512
+; MIPS64-NEXT: dsrl $2, $2, 46
+; MIPS64-NEXT: and $1, $2, $1
+; MIPS64-NEXT: dsrl $2, $6, 54
+; MIPS64-NEXT: or $1, $2, $1
+; MIPS64-NEXT: jr $ra
+; MIPS64-NEXT: sll $2, $1, 0
+;
+; MIPS32-LABEL: ret_v4i18:
+; MIPS32: # %bb.0:
+; MIPS32-NEXT: lbu $1, 8($4)
+; MIPS32-NEXT: lw $2, 4($4)
+; MIPS32-NEXT: sll $6, $2, 8
+; MIPS32-NEXT: lui $3, 3
+; MIPS32-NEXT: or $1, $1, $6
+; MIPS32-NEXT: ori $5, $3, 64512
+; MIPS32-NEXT: lw $4, 0($4)
+; MIPS32-NEXT: srl $7, $4, 14
+; MIPS32-NEXT: and $5, $7, $5
+; MIPS32-NEXT: srl $7, $2, 24
+; MIPS32-NEXT: ori $8, $3, 65535
+; MIPS32-NEXT: sll $3, $4, 8
+; MIPS32-NEXT: srl $2, $3, 22
+; MIPS32-NEXT: or $2, $2, $5
+; MIPS32-NEXT: and $5, $1, $8
+; MIPS32-NEXT: or $1, $3, $7
+; MIPS32-NEXT: srl $1, $1, 4
+; MIPS32-NEXT: and $3, $1, $8
+; MIPS32-NEXT: sll $1, $7, 14
+; MIPS32-NEXT: srl $4, $6, 18
+; MIPS32-NEXT: or $1, $4, $1
+; MIPS32-NEXT: jr $ra
+; MIPS32-NEXT: and $4, $1, $8
+ %v = load <4 x i18>, ptr %p
+ ret <4 x i18> %v
+}
+
+define void @call_v4i18(ptr %p) nounwind {
+; MIPS64-LABEL: call_v4i18:
+; MIPS64: # %bb.0:
+; MIPS64-NEXT: daddiu $sp, $sp, -48
+; MIPS64-NEXT: sd $ra, 40($sp) # 8-byte Folded Spill
+; MIPS64-NEXT: sd $19, 32($sp) # 8-byte Folded Spill
+; MIPS64-NEXT: sd $18, 24($sp) # 8-byte Folded Spill
+; MIPS64-NEXT: sd $17, 16($sp) # 8-byte Folded Spill
+; MIPS64-NEXT: sd $16, 8($sp) # 8-byte Folded Spill
+; MIPS64-NEXT: move $16, $4
+; MIPS64-NEXT: lui $17, 3
+; MIPS64-NEXT: ori $1, $17, 64512
+; MIPS64-NEXT: ld $2, 0($4)
+; MIPS64-NEXT: dsrl $3, $2, 46
+; MIPS64-NEXT: dsrl $4, $2, 10
+; MIPS64-NEXT: ori $18, $17, 65535
+; MIPS64-NEXT: dsrl $5, $2, 28
+; MIPS64-NEXT: and $5, $5, $18
+; MIPS64-NEXT: and $6, $4, $18
+; MIPS64-NEXT: and $1, $3, $1
+; MIPS64-NEXT: dsll $2, $2, 8
+; MIPS64-NEXT: dsrl $3, $2, 54
+; MIPS64-NEXT: or $4, $3, $1
+; MIPS64-NEXT: lbu $1, 8($16)
+; MIPS64-NEXT: or $1, $1, $2
+; MIPS64-NEXT: jal arg_v4i18
+; MIPS64-NEXT: and $7, $1, $18
+; MIPS64-NEXT: daddiu $1, $zero, 255
+; MIPS64-NEXT: dsll $19, $1, 56
+; MIPS64-NEXT: jal ret_v4i18
+; MIPS64-NEXT: ori $17, $17, 65280
+; MIPS64-NEXT: # kill: def $v0 killed $v0 def $v0_64
+; MIPS64-NEXT: # kill: def $v1 killed $v1 def $v1_64
+; MIPS64-NEXT: # kill: def $a0 killed $a0 def $a0_64
+; MIPS64-NEXT: # kill: def $a1 killed $a1 def $a1_64
+; MIPS64-NEXT: dsll $1, $2, 54
+; MIPS64-NEXT: and $3, $3, $18
+; MIPS64-NEXT: dsll $3, $3, 36
+; MIPS64-NEXT: or $1, $1, $3
+; MIPS64-NEXT: and $3, $4, $18
+; MIPS64-NEXT: dsll $3, $3, 18
+; MIPS64-NEXT: sb $5, 8($16)
+; MIPS64-NEXT: or $1, $1, $3
+; MIPS64-NEXT: and $3, $5, $17
+; MIPS64-NEXT: or $1, $1, $3
+; MIPS64-NEXT: dsrl $1, $1, 8
+; MIPS64-NEXT: dsll $2, $2, 46
+; MIPS64-NEXT: and $2, $2, $19
+; MIPS64-NEXT: or $1, $2, $1
+; MIPS64-NEXT: sd $1, 0($16)
+; MIPS64-NEXT: ld $16, 8($sp) # 8-byte Folded Reload
+; MIPS64-NEXT: ld $17, 16($sp) # 8-byte Folded Reload
+; MIPS64-NEXT: ld $18, 24($sp) # 8-byte Folded Reload
+; MIPS64-NEXT: ld $19, 32($sp) # 8-byte Folded Reload
+; MIPS64-NEXT: ld $ra, 40($sp) # 8-byte Folded Reload
+; MIPS64-NEXT: jr $ra
+; MIPS64-NEXT: daddiu $sp, $sp, 48
+;
+; MIPS32-LABEL: call_v4i18:
+; MIPS32: # %bb.0:
+; MIPS32-NEXT: addiu $sp, $sp, -40
+; MIPS32-NEXT: sw $ra, 36($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $19, 32($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $18, 28($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $17, 24($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $16, 20($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: move $16, $4
+; MIPS32-NEXT: lw $1, 4($4)
+; MIPS32-NEXT: srl $2, $1, 24
+; MIPS32-NEXT: lw $3, 0($4)
+; MIPS32-NEXT: sll $4, $3, 8
+; MIPS32-NEXT: or $5, $4, $2
+; MIPS32-NEXT: lbu $6, 8($16)
+; MIPS32-NEXT: sll $1, $1, 8
+; MIPS32-NEXT: srl $5, $5, 4
+; MIPS32-NEXT: or $6, $6, $1
+; MIPS32-NEXT: lui $17, 3
+; MIPS32-NEXT: ori $7, $17, 64512
+; MIPS32-NEXT: srl $3, $3, 14
+; MIPS32-NEXT: and $3, $3, $7
+; MIPS32-NEXT: ori $8, $17, 65535
+; MIPS32-NEXT: srl $4, $4, 22
+; MIPS32-NEXT: or $4, $4, $3
+; MIPS32-NEXT: and $7, $6, $8
+; MIPS32-NEXT: and $5, $5, $8
+; MIPS32-NEXT: sll $2, $2, 14
+; MIPS32-NEXT: srl $1, $1, 18
+; MIPS32-NEXT: or $1, $1, $2
+; MIPS32-NEXT: jal arg_v4i18
+; MIPS32-NEXT: and $6, $1, $8
+; MIPS32-NEXT: ori $18, $17, 49152
+; MIPS32-NEXT: ori $17, $17, 65280
+; MIPS32-NEXT: lui $1, 63
+; MIPS32-NEXT: jal ret_v4i18
+; MIPS32-NEXT: ori $19, $1, 65280
+; MIPS32-NEXT: lui $1, 65280
+; MIPS32-NEXT: and $6, $5, $17
+; MIPS32-NEXT: sll $7, $4, 18
+; MIPS32-NEXT: or $6, $7, $6
+; MIPS32-NEXT: srl $6, $6, 8
+; MIPS32-NEXT: and $4, $4, $18
+; MIPS32-NEXT: srl $4, $4, 14
+; MIPS32-NEXT: sll $3, $3, 4
+; MIPS32-NEXT: or $4, $3, $4
+; MIPS32-NEXT: sll $4, $4, 24
+; MIPS32-NEXT: or $4, $6, $4
+; MIPS32-NEXT: sll $6, $2, 14
+; MIPS32-NEXT: sb $5, 8($16)
+; MIPS32-NEXT: sw $4, 4($16)
+; MIPS32-NEXT: and $1, $6, $1
+; MIPS32-NEXT: and $3, $3, $19
+; MIPS32-NEXT: sll $2, $2, 22
+; MIPS32-NEXT: or $2, $2, $3
+; MIPS32-NEXT: srl $2, $2, 8
+; MIPS32-NEXT: or $1, $1, $2
+; MIPS32-NEXT: sw $1, 0($16)
+; MIPS32-NEXT: lw $16, 20($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $17, 24($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $18, 28($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $19, 32($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $ra, 36($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: jr $ra
+; MIPS32-NEXT: addiu $sp, $sp, 40
+ %v1 = load <4 x i18>, ptr %p
+ call void @arg_v4i18(<4 x i18> %v1)
+ %v2 = call <4 x i18> @ret_v4i18()
+ store <4 x i18> %v2, ptr %p
+ ret void
+}
+
+define void @arg_v7i18(<7 x i18> %vec, ptr %p) {
+; MIPS64-LABEL: arg_v7i18:
+; MIPS64: # %bb.0:
+; MIPS64-NEXT: lui $1, 3
+; MIPS64-NEXT: ori $2, $1, 65535
+; MIPS64-NEXT: and $3, $8, $2
+; MIPS64-NEXT: dsll $3, $3, 36
+; MIPS64-NEXT: dsll $8, $7, 54
+; MIPS64-NEXT: or $3, $8, $3
+; MIPS64-NEXT: and $8, $9, $2
+; MIPS64-NEXT: dsll $8, $8, 18
+; MIPS64-NEXT: or $3, $3, $8
+; MIPS64-NEXT: and $5, $5, $2
+; MIPS64-NEXT: and $8, $10, $2
+; MIPS64-NEXT: or $3, $3, $8
+; MIPS64-NEXT: dsll $5, $5, 26
+; MIPS64-NEXT: dsll $4, $4, 44
+; MIPS64-NEXT: or $4, $4, $5
+; MIPS64-NEXT: and $2, $6, $2
+; MIPS64-NEXT: dsll $2, $2, 8
+; MIPS64-NEXT: sd $3, 8($11)
+; MIPS64-NEXT: or $2, $4, $2
+; MIPS64-NEXT: ori $1, $1, 64512
+; MIPS64-NEXT: and $1, $7, $1
+; MIPS64-NEXT: dsrl $1, $1, 10
+; MIPS64-NEXT: or $1, $2, $1
+; MIPS64-NEXT: daddiu $2, $zero, 1
+; MIPS64-NEXT: dsll $2, $2, 62
+; MIPS64-NEXT: daddiu $2, $2, -1
+; MIPS64-NEXT: and $1, $1, $2
+; MIPS64-NEXT: jr $ra
+; MIPS64-NEXT: sd $1, 0($11)
+;
+; MIPS32-LABEL: arg_v7i18:
+; MIPS32: # %bb.0:
+; MIPS32-NEXT: lui $1, 3
+; MIPS32-NEXT: ori $2, $1, 65535
+; MIPS32-NEXT: and $3, $6, $2
+; MIPS32-NEXT: sll $3, $3, 8
+; MIPS32-NEXT: ori $6, $1, 65472
+; MIPS32-NEXT: and $6, $5, $6
+; MIPS32-NEXT: srl $6, $6, 6
+; MIPS32-NEXT: sll $5, $5, 26
+; MIPS32-NEXT: sll $4, $4, 12
+; MIPS32-NEXT: or $4, $4, $6
+; MIPS32-NEXT: or $3, $5, $3
+; MIPS32-NEXT: ori $5, $1, 64512
+; MIPS32-NEXT: and $5, $7, $5
+; MIPS32-NEXT: srl $5, $5, 10
+; MIPS32-NEXT: lui $6, 16383
+; MIPS32-NEXT: ori $6, $6, 65535
+; MIPS32-NEXT: lw $8, 24($sp)
+; MIPS32-NEXT: lw $9, 16($sp)
+; MIPS32-NEXT: or $3, $3, $5
+; MIPS32-NEXT: and $5, $9, $2
+; MIPS32-NEXT: and $4, $4, $6
+; MIPS32-NEXT: and $2, $8, $2
+; MIPS32-NEXT: lw $6, 20($sp)
+; MIPS32-NEXT: sll $8, $6, 18
+; MIPS32-NEXT: or $2, $8, $2
+; MIPS32-NEXT: lw $8, 28($sp)
+; MIPS32-NEXT: sw $2, 12($8)
+; MIPS32-NEXT: sw $4, 0($8)
+; MIPS32-NEXT: sw $3, 4($8)
+; MIPS32-NEXT: sll $2, $5, 4
+; MIPS32-NEXT: sll $3, $7, 22
+; MIPS32-NEXT: or $2, $3, $2
+; MIPS32-NEXT: ori $1, $1, 49152
+; MIPS32-NEXT: and $1, $6, $1
+; MIPS32-NEXT: srl $1, $1, 14
+; MIPS32-NEXT: or $1, $2, $1
+; MIPS32-NEXT: jr $ra
+; MIPS32-NEXT: sw $1, 8($8)
+ store <7 x i18> %vec, ptr %p
+ ret void
+}
+
+define <7 x i18> @ret_v7i18(ptr %p) {
+; MIPS64-LABEL: ret_v7i18:
+; MIPS64: # %bb.0:
+; MIPS64-NEXT: ld $1, 0($5)
+; MIPS64-NEXT: sd $1, 0($4)
+; MIPS64-NEXT: ld $1, 8($5)
+; MIPS64-NEXT: jr $ra
+; MIPS64-NEXT: sd $1, 8($4)
+;
+; MIPS32-LABEL: ret_v7i18:
+; MIPS32: # %bb.0:
+; MIPS32-NEXT: lw $1, 0($5)
+; MIPS32-NEXT: sw $1, 0($4)
+; MIPS32-NEXT: lw $1, 4($5)
+; MIPS32-NEXT: sw $1, 4($4)
+; MIPS32-NEXT: lw $1, 12($5)
+; MIPS32-NEXT: sw $1, 12($4)
+; MIPS32-NEXT: lw $1, 8($5)
+; MIPS32-NEXT: jr $ra
+; MIPS32-NEXT: sw $1, 8($4)
+ %v = load <7 x i18>, ptr %p
+ ret <7 x i18> %v
+}
+
+define void @call_v7i18(ptr %p) nounwind {
+; MIPS64-LABEL: call_v7i18:
+; MIPS64: # %bb.0:
+; MIPS64-NEXT: daddiu $sp, $sp, -32
+; MIPS64-NEXT: sd $ra, 24($sp) # 8-byte Folded Spill
+; MIPS64-NEXT: sd $16, 16($sp) # 8-byte Folded Spill
+; MIPS64-NEXT: move $16, $4
+; MIPS64-NEXT: ld $1, 0($4)
+; MIPS64-NEXT: ld $2, 8($4)
+; MIPS64-NEXT: dsrl $3, $2, 18
+; MIPS64-NEXT: dsrl $4, $2, 36
+; MIPS64-NEXT: dsrl $6, $1, 8
+; MIPS64-NEXT: dsrl $5, $1, 26
+; MIPS64-NEXT: lui $7, 3
+; MIPS64-NEXT: ori $7, $7, 65535
+; MIPS64-NEXT: and $10, $2, $7
+; MIPS64-NEXT: and $5, $5, $7
+; MIPS64-NEXT: and $6, $6, $7
+; MIPS64-NEXT: and $8, $4, $7
+; MIPS64-NEXT: and $9, $3, $7
+; MIPS64-NEXT: dsll $3, $1, 10
+; MIPS64-NEXT: dsrl $2, $2, 54
+; MIPS64-NEXT: or $2, $2, $3
+; MIPS64-NEXT: and $7, $2, $7
+; MIPS64-NEXT: jal arg_v7i18
+; MIPS64-NEXT: dsrl $4, $1, 44
+; MIPS64-NEXT: jal ret_v7i18
+; MIPS64-NEXT: daddiu $4, $sp, 0
+; MIPS64-NEXT: ld $1, 0($sp)
+; MIPS64-NEXT: sd $1, 0($16)
+; MIPS64-NEXT: ld $1, 8($sp)
+; MIPS64-NEXT: sd $1, 8($16)
+; MIPS64-NEXT: ld $16, 16($sp) # 8-byte Folded Reload
+; MIPS64-NEXT: ld $ra, 24($sp) # 8-byte Folded Reload
+; MIPS64-NEXT: jr $ra
+; MIPS64-NEXT: daddiu $sp, $sp, 32
+;
+; MIPS32-LABEL: call_v7i18:
+; MIPS32: # %bb.0:
+; MIPS32-NEXT: addiu $sp, $sp, -64
+; MIPS32-NEXT: sw $ra, 60($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $fp, 56($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $16, 52($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: move $fp, $sp
+; MIPS32-NEXT: addiu $1, $zero, -16
+; MIPS32-NEXT: and $sp, $sp, $1
+; MIPS32-NEXT: move $16, $4
+; MIPS32-NEXT: lw $1, 8($4)
+; MIPS32-NEXT: sll $2, $1, 14
+; MIPS32-NEXT: lw $3, 12($4)
+; MIPS32-NEXT: srl $4, $3, 18
+; MIPS32-NEXT: or $2, $4, $2
+; MIPS32-NEXT: srl $4, $1, 4
+; MIPS32-NEXT: lui $5, 3
+; MIPS32-NEXT: ori $7, $5, 65535
+; MIPS32-NEXT: and $2, $2, $7
+; MIPS32-NEXT: and $4, $4, $7
+; MIPS32-NEXT: and $3, $3, $7
+; MIPS32-NEXT: lw $8, 4($16)
+; MIPS32-NEXT: lw $9, 0($16)
+; MIPS32-NEXT: sll $5, $9, 6
+; MIPS32-NEXT: srl $6, $8, 26
+; MIPS32-NEXT: sw $3, 24($sp)
+; MIPS32-NEXT: sw $4, 16($sp)
+; MIPS32-NEXT: sw $2, 20($sp)
+; MIPS32-NEXT: or $2, $6, $5
+; MIPS32-NEXT: srl $3, $8, 8
+; MIPS32-NEXT: and $6, $3, $7
+; MIPS32-NEXT: and $5, $2, $7
+; MIPS32-NEXT: sll $2, $8, 10
+; MIPS32-NEXT: srl $1, $1, 22
+; MIPS32-NEXT: or $1, $1, $2
+; MIPS32-NEXT: and $7, $1, $7
+; MIPS32-NEXT: jal arg_v7i18
+; MIPS32-NEXT: srl $4, $9, 12
+; MIPS32-NEXT: jal ret_v7i18
+; MIPS32-NEXT: addiu $4, $sp, 32
+; MIPS32-NEXT: lw $1, 32($sp)
+; MIPS32-NEXT: sw $1, 0($16)
+; MIPS32-NEXT: lw $1, 36($sp)
+; MIPS32-NEXT: sw $1, 4($16)
+; MIPS32-NEXT: lw $1, 40($sp)
+; MIPS32-NEXT: sw $1, 8($16)
+; MIPS32-NEXT: lw $1, 44($sp)
+; MIPS32-NEXT: sw $1, 12($16)
+; MIPS32-NEXT: move $sp, $fp
+; MIPS32-NEXT: lw $16, 52($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $fp, 56($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $ra, 60($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: jr $ra
+; MIPS32-NEXT: addiu $sp, $sp, 64
+ %v1 = load <7 x i18>, ptr %p
+ call void @arg_v7i18(<7 x i18> %v1)
+ %v2 = call <7 x i18> @ret_v7i18()
+ store <7 x i18> %v2, ptr %p
+ ret void
+}
+
+define void @arg_v2i128(<2 x i128> %vec, ptr %p) {
+; MIPS64-LABEL: arg_v2i128:
+; MIPS64: # %bb.0:
+; MIPS64-NEXT: sd $7, 24($8)
+; MIPS64-NEXT: sd $6, 16($8)
+; MIPS64-NEXT: sd $5, 8($8)
+; MIPS64-NEXT: jr $ra
+; MIPS64-NEXT: sd $4, 0($8)
+;
+; MIPS32-LABEL: arg_v2i128:
+; MIPS32: # %bb.0:
+; MIPS32-NEXT: lw $1, 16($sp)
+; MIPS32-NEXT: lw $2, 20($sp)
+; MIPS32-NEXT: lw $3, 24($sp)
+; MIPS32-NEXT: lw $8, 32($sp)
+; MIPS32-NEXT: lw $9, 28($sp)
+; MIPS32-NEXT: sw $9, 28($8)
+; MIPS32-NEXT: sw $3, 24($8)
+; MIPS32-NEXT: sw $2, 20($8)
+; MIPS32-NEXT: sw $1, 16($8)
+; MIPS32-NEXT: sw $7, 12($8)
+; MIPS32-NEXT: sw $6, 8($8)
+; MIPS32-NEXT: sw $5, 4($8)
+; MIPS32-NEXT: jr $ra
+; MIPS32-NEXT: sw $4, 0($8)
+ store <2 x i128> %vec, ptr %p
+ ret void
+}
+
+define <2 x i128> @ret_v2i128(ptr %p) {
+; MIPS64-LABEL: ret_v2i128:
+; MIPS64: # %bb.0:
+; MIPS64-NEXT: ld $1, 24($5)
+; MIPS64-NEXT: sd $1, 24($4)
+; MIPS64-NEXT: ld $1, 16($5)
+; MIPS64-NEXT: sd $1, 16($4)
+; MIPS64-NEXT: ld $1, 8($5)
+; MIPS64-NEXT: sd $1, 8($4)
+; MIPS64-NEXT: ld $1, 0($5)
+; MIPS64-NEXT: jr $ra
+; MIPS64-NEXT: sd $1, 0($4)
+;
+; MIPS32-LABEL: ret_v2i128:
+; MIPS32: # %bb.0:
+; MIPS32-NEXT: lw $1, 12($5)
+; MIPS32-NEXT: lw $2, 16($5)
+; MIPS32-NEXT: lw $3, 20($5)
+; MIPS32-NEXT: lw $6, 24($5)
+; MIPS32-NEXT: lw $7, 28($5)
+; MIPS32-NEXT: sw $7, 28($4)
+; MIPS32-NEXT: sw $6, 24($4)
+; MIPS32-NEXT: sw $3, 20($4)
+; MIPS32-NEXT: sw $2, 16($4)
+; MIPS32-NEXT: sw $1, 12($4)
+; MIPS32-NEXT: lw $1, 8($5)
+; MIPS32-NEXT: sw $1, 8($4)
+; MIPS32-NEXT: lw $1, 4($5)
+; MIPS32-NEXT: sw $1, 4($4)
+; MIPS32-NEXT: lw $1, 0($5)
+; MIPS32-NEXT: jr $ra
+; MIPS32-NEXT: sw $1, 0($4)
+ %v = load <2 x i128>, ptr %p
+ ret <2 x i128> %v
+}
+
+define void @call_v2i128(ptr %p) nounwind {
+; MIPS64-LABEL: call_v2i128:
+; MIPS64: # %bb.0:
+; MIPS64-NEXT: daddiu $sp, $sp, -64
+; MIPS64-NEXT: sd $ra, 56($sp) # 8-byte Folded Spill
+; MIPS64-NEXT: sd $fp, 48($sp) # 8-byte Folded Spill
+; MIPS64-NEXT: sd $16, 40($sp) # 8-byte Folded Spill
+; MIPS64-NEXT: move $fp, $sp
+; MIPS64-NEXT: daddiu $1, $zero, -32
+; MIPS64-NEXT: move $16, $4
+; MIPS64-NEXT: ld $7, 24($4)
+; MIPS64-NEXT: ld $6, 16($4)
+; MIPS64-NEXT: ld $5, 8($4)
+; MIPS64-NEXT: ld $4, 0($4)
+; MIPS64-NEXT: jal arg_v2i128
+; MIPS64-NEXT: and $sp, $sp, $1
+; MIPS64-NEXT: jal ret_v2i128
+; MIPS64-NEXT: daddiu $4, $sp, 0
+; MIPS64-NEXT: ld $1, 16($sp)
+; MIPS64-NEXT: sd $1, 16($16)
+; MIPS64-NEXT: ld $1, 24($sp)
+; MIPS64-NEXT: sd $1, 24($16)
+; MIPS64-NEXT: ld $1, 0($sp)
+; MIPS64-NEXT: sd $1, 0($16)
+; MIPS64-NEXT: ld $1, 8($sp)
+; MIPS64-NEXT: sd $1, 8($16)
+; MIPS64-NEXT: move $sp, $fp
+; MIPS64-NEXT: ld $16, 40($sp) # 8-byte Folded Reload
+; MIPS64-NEXT: ld $fp, 48($sp) # 8-byte Folded Reload
+; MIPS64-NEXT: ld $ra, 56($sp) # 8-byte Folded Reload
+; MIPS64-NEXT: jr $ra
+; MIPS64-NEXT: daddiu $sp, $sp, 64
+;
+; MIPS32-LABEL: call_v2i128:
+; MIPS32: # %bb.0:
+; MIPS32-NEXT: addiu $sp, $sp, -96
+; MIPS32-NEXT: sw $ra, 92($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $fp, 88($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $16, 84($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: move $fp, $sp
+; MIPS32-NEXT: addiu $1, $zero, -32
+; MIPS32-NEXT: and $sp, $sp, $1
+; MIPS32-NEXT: move $16, $4
+; MIPS32-NEXT: lw $7, 12($4)
+; MIPS32-NEXT: lw $6, 8($4)
+; MIPS32-NEXT: lw $5, 4($4)
+; MIPS32-NEXT: lw $4, 0($4)
+; MIPS32-NEXT: lw $1, 16($16)
+; MIPS32-NEXT: lw $2, 20($16)
+; MIPS32-NEXT: lw $3, 24($16)
+; MIPS32-NEXT: lw $8, 28($16)
+; MIPS32-NEXT: sw $8, 28($sp)
+; MIPS32-NEXT: sw $3, 24($sp)
+; MIPS32-NEXT: sw $2, 20($sp)
+; MIPS32-NEXT: jal arg_v2i128
+; MIPS32-NEXT: sw $1, 16($sp)
+; MIPS32-NEXT: jal ret_v2i128
+; MIPS32-NEXT: addiu $4, $sp, 32
+; MIPS32-NEXT: lw $1, 40($sp)
+; MIPS32-NEXT: lw $2, 52($sp)
+; MIPS32-NEXT: lw $3, 48($sp)
+; MIPS32-NEXT: lw $4, 60($sp)
+; MIPS32-NEXT: lw $5, 56($sp)
+; MIPS32-NEXT: sw $5, 24($16)
+; MIPS32-NEXT: sw $4, 28($16)
+; MIPS32-NEXT: sw $3, 16($16)
+; MIPS32-NEXT: sw $2, 20($16)
+; MIPS32-NEXT: sw $1, 8($16)
+; MIPS32-NEXT: lw $1, 44($sp)
+; MIPS32-NEXT: sw $1, 12($16)
+; MIPS32-NEXT: lw $1, 32($sp)
+; MIPS32-NEXT: sw $1, 0($16)
+; MIPS32-NEXT: lw $1, 36($sp)
+; MIPS32-NEXT: sw $1, 4($16)
+; MIPS32-NEXT: move $sp, $fp
+; MIPS32-NEXT: lw $16, 84($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $fp, 88($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $ra, 92($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: jr $ra
+; MIPS32-NEXT: addiu $sp, $sp, 96
+ %v1 = load <2 x i128>, ptr %p
+ call void @arg_v2i128(<2 x i128> %v1)
+ %v2 = call <2 x i128> @ret_v2i128()
+ store <2 x i128> %v2, ptr %p
+ ret void
+}
+
+define void @arg_v3i128(<3 x i128> %vec, ptr %p) {
+; MIPS64-LABEL: arg_v3i128:
+; MIPS64: # %bb.0:
+; MIPS64-NEXT: sd $9, 40($10)
+; MIPS64-NEXT: sd $8, 32($10)
+; MIPS64-NEXT: sd $7, 24($10)
+; MIPS64-NEXT: sd $6, 16($10)
+; MIPS64-NEXT: sd $5, 8($10)
+; MIPS64-NEXT: jr $ra
+; MIPS64-NEXT: sd $4, 0($10)
+;
+; MIPS32-LABEL: arg_v3i128:
+; MIPS32: # %bb.0:
+; MIPS32-NEXT: lw $1, 16($sp)
+; MIPS32-NEXT: lw $2, 20($sp)
+; MIPS32-NEXT: lw $3, 24($sp)
+; MIPS32-NEXT: lw $8, 28($sp)
+; MIPS32-NEXT: lw $9, 32($sp)
+; MIPS32-NEXT: lw $10, 36($sp)
+; MIPS32-NEXT: lw $11, 40($sp)
+; MIPS32-NEXT: lw $12, 48($sp)
+; MIPS32-NEXT: lw $13, 44($sp)
+; MIPS32-NEXT: sw $13, 44($12)
+; MIPS32-NEXT: sw $11, 40($12)
+; MIPS32-NEXT: sw $10, 36($12)
+; MIPS32-NEXT: sw $9, 32($12)
+; MIPS32-NEXT: sw $8, 28($12)
+; MIPS32-NEXT: sw $3, 24($12)
+; MIPS32-NEXT: sw $2, 20($12)
+; MIPS32-NEXT: sw $1, 16($12)
+; MIPS32-NEXT: sw $7, 12($12)
+; MIPS32-NEXT: sw $6, 8($12)
+; MIPS32-NEXT: sw $5, 4($12)
+; MIPS32-NEXT: jr $ra
+; MIPS32-NEXT: sw $4, 0($12)
+ store <3 x i128> %vec, ptr %p
+ ret void
+}
+
+define <3 x i128> @ret_v3i128(ptr %p) {
+; MIPS64-LABEL: ret_v3i128:
+; MIPS64: # %bb.0:
+; MIPS64-NEXT: ld $1, 24($5)
+; MIPS64-NEXT: ld $2, 32($5)
+; MIPS64-NEXT: ld $3, 40($5)
+; MIPS64-NEXT: sd $3, 40($4)
+; MIPS64-NEXT: sd $2, 32($4)
+; MIPS64-NEXT: sd $1, 24($4)
+; MIPS64-NEXT: ld $1, 16($5)
+; MIPS64-NEXT: sd $1, 16($4)
+; MIPS64-NEXT: ld $1, 8($5)
+; MIPS64-NEXT: sd $1, 8($4)
+; MIPS64-NEXT: ld $1, 0($5)
+; MIPS64-NEXT: jr $ra
+; MIPS64-NEXT: sd $1, 0($4)
+;
+; MIPS32-LABEL: ret_v3i128:
+; MIPS32: # %bb.0:
+; MIPS32-NEXT: lw $1, 28($5)
+; MIPS32-NEXT: lw $2, 32($5)
+; MIPS32-NEXT: lw $3, 36($5)
+; MIPS32-NEXT: lw $6, 40($5)
+; MIPS32-NEXT: lw $7, 12($5)
+; MIPS32-NEXT: lw $8, 16($5)
+; MIPS32-NEXT: lw $9, 20($5)
+; MIPS32-NEXT: lw $10, 24($5)
+; MIPS32-NEXT: lw $11, 44($5)
+; MIPS32-NEXT: sw $11, 44($4)
+; MIPS32-NEXT: sw $6, 40($4)
+; MIPS32-NEXT: sw $3, 36($4)
+; MIPS32-NEXT: sw $2, 32($4)
+; MIPS32-NEXT: sw $1, 28($4)
+; MIPS32-NEXT: sw $10, 24($4)
+; MIPS32-NEXT: sw $9, 20($4)
+; MIPS32-NEXT: sw $8, 16($4)
+; MIPS32-NEXT: sw $7, 12($4)
+; MIPS32-NEXT: lw $1, 8($5)
+; MIPS32-NEXT: sw $1, 8($4)
+; MIPS32-NEXT: lw $1, 4($5)
+; MIPS32-NEXT: sw $1, 4($4)
+; MIPS32-NEXT: lw $1, 0($5)
+; MIPS32-NEXT: jr $ra
+; MIPS32-NEXT: sw $1, 0($4)
+ %v = load <3 x i128>, ptr %p
+ ret <3 x i128> %v
+}
+
+define void @call_v3i128(ptr %p) nounwind {
+; MIPS64-LABEL: call_v3i128:
+; MIPS64: # %bb.0:
+; MIPS64-NEXT: daddiu $sp, $sp, -128
+; MIPS64-NEXT: sd $ra, 120($sp) # 8-byte Folded Spill
+; MIPS64-NEXT: sd $fp, 112($sp) # 8-byte Folded Spill
+; MIPS64-NEXT: sd $16, 104($sp) # 8-byte Folded Spill
+; MIPS64-NEXT: move $fp, $sp
+; MIPS64-NEXT: daddiu $1, $zero, -64
+; MIPS64-NEXT: move $16, $4
+; MIPS64-NEXT: ld $9, 40($4)
+; MIPS64-NEXT: ld $8, 32($4)
+; MIPS64-NEXT: ld $7, 24($4)
+; MIPS64-NEXT: ld $6, 16($4)
+; MIPS64-NEXT: ld $5, 8($4)
+; MIPS64-NEXT: ld $4, 0($4)
+; MIPS64-NEXT: jal arg_v3i128
+; MIPS64-NEXT: and $sp, $sp, $1
+; MIPS64-NEXT: jal ret_v3i128
+; MIPS64-NEXT: daddiu $4, $sp, 0
+; MIPS64-NEXT: ld $1, 16($sp)
+; MIPS64-NEXT: ld $2, 40($sp)
+; MIPS64-NEXT: ld $3, 32($sp)
+; MIPS64-NEXT: sd $3, 32($16)
+; MIPS64-NEXT: sd $2, 40($16)
+; MIPS64-NEXT: sd $1, 16($16)
+; MIPS64-NEXT: ld $1, 24($sp)
+; MIPS64-NEXT: sd $1, 24($16)
+; MIPS64-NEXT: ld $1, 0($sp)
+; MIPS64-NEXT: sd $1, 0($16)
+; MIPS64-NEXT: ld $1, 8($sp)
+; MIPS64-NEXT: sd $1, 8($16)
+; MIPS64-NEXT: move $sp, $fp
+; MIPS64-NEXT: ld $16, 104($sp) # 8-byte Folded Reload
+; MIPS64-NEXT: ld $fp, 112($sp) # 8-byte Folded Reload
+; MIPS64-NEXT: ld $ra, 120($sp) # 8-byte Folded Reload
+; MIPS64-NEXT: jr $ra
+; MIPS64-NEXT: daddiu $sp, $sp, 128
+;
+; MIPS32-LABEL: call_v3i128:
+; MIPS32: # %bb.0:
+; MIPS32-NEXT: addiu $sp, $sp, -192
+; MIPS32-NEXT: sw $ra, 188($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $fp, 184($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $16, 180($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: move $fp, $sp
+; MIPS32-NEXT: addiu $1, $zero, -64
+; MIPS32-NEXT: and $sp, $sp, $1
+; MIPS32-NEXT: move $16, $4
+; MIPS32-NEXT: lw $7, 12($4)
+; MIPS32-NEXT: lw $6, 8($4)
+; MIPS32-NEXT: lw $5, 4($4)
+; MIPS32-NEXT: lw $4, 0($4)
+; MIPS32-NEXT: lw $1, 16($16)
+; MIPS32-NEXT: lw $2, 20($16)
+; MIPS32-NEXT: lw $3, 24($16)
+; MIPS32-NEXT: lw $8, 28($16)
+; MIPS32-NEXT: lw $9, 32($16)
+; MIPS32-NEXT: lw $10, 36($16)
+; MIPS32-NEXT: lw $11, 40($16)
+; MIPS32-NEXT: lw $12, 44($16)
+; MIPS32-NEXT: sw $12, 44($sp)
+; MIPS32-NEXT: sw $11, 40($sp)
+; MIPS32-NEXT: sw $10, 36($sp)
+; MIPS32-NEXT: sw $9, 32($sp)
+; MIPS32-NEXT: sw $8, 28($sp)
+; MIPS32-NEXT: sw $3, 24($sp)
+; MIPS32-NEXT: sw $2, 20($sp)
+; MIPS32-NEXT: jal arg_v3i128
+; MIPS32-NEXT: sw $1, 16($sp)
+; MIPS32-NEXT: jal ret_v3i128
+; MIPS32-NEXT: addiu $4, $sp, 64
+; MIPS32-NEXT: lw $1, 88($sp)
+; MIPS32-NEXT: lw $2, 100($sp)
+; MIPS32-NEXT: lw $3, 96($sp)
+; MIPS32-NEXT: lw $4, 108($sp)
+; MIPS32-NEXT: lw $5, 64($sp)
+; MIPS32-NEXT: lw $6, 84($sp)
+; MIPS32-NEXT: lw $7, 80($sp)
+; MIPS32-NEXT: lw $8, 92($sp)
+; MIPS32-NEXT: lw $9, 104($sp)
+; MIPS32-NEXT: sw $9, 40($16)
+; MIPS32-NEXT: sw $4, 44($16)
+; MIPS32-NEXT: sw $3, 32($16)
+; MIPS32-NEXT: sw $2, 36($16)
+; MIPS32-NEXT: sw $1, 24($16)
+; MIPS32-NEXT: sw $8, 28($16)
+; MIPS32-NEXT: sw $7, 16($16)
+; MIPS32-NEXT: sw $6, 20($16)
+; MIPS32-NEXT: sw $5, 0($16)
+; MIPS32-NEXT: lw $1, 68($sp)
+; MIPS32-NEXT: sw $1, 4($16)
+; MIPS32-NEXT: lw $1, 72($sp)
+; MIPS32-NEXT: sw $1, 8($16)
+; MIPS32-NEXT: lw $1, 76($sp)
+; MIPS32-NEXT: sw $1, 12($16)
+; MIPS32-NEXT: move $sp, $fp
+; MIPS32-NEXT: lw $16, 180($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $fp, 184($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $ra, 188($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: jr $ra
+; MIPS32-NEXT: addiu $sp, $sp, 192
+ %v1 = load <3 x i128>, ptr %p
+ call void @arg_v3i128(<3 x i128> %v1)
+ %v2 = call <3 x i128> @ret_v3i128()
+ store <3 x i128> %v2, ptr %p
+ ret void
+}
diff --git a/llvm/test/CodeGen/Mips/cconv/vector.ll b/llvm/test/CodeGen/Mips/cconv/vector.ll
index f027c540fe304b..28a7dc046139b2 100644
--- a/llvm/test/CodeGen/Mips/cconv/vector.ll
+++ b/llvm/test/CodeGen/Mips/cconv/vector.ll
@@ -6637,24 +6637,12 @@ define <2 x i24> @i24x2(<2 x i24> %a, <2 x i24> %b) {
;
; MIPS64-LABEL: i24x2:
; MIPS64: # %bb.0: # %Entry
-; MIPS64-NEXT: lui $1, 256
-; MIPS64-NEXT: daddiu $1, $1, -1
-; MIPS64-NEXT: dsll $1, $1, 24
-; MIPS64-NEXT: and $2, $5, $1
-; MIPS64-NEXT: dsrl $2, $2, 24
-; MIPS64-NEXT: sll $2, $2, 0
-; MIPS64-NEXT: and $1, $4, $1
-; MIPS64-NEXT: dsrl $1, $1, 24
-; MIPS64-NEXT: sll $1, $1, 0
-; MIPS64-NEXT: addu $1, $1, $2
-; MIPS64-NEXT: sll $2, $5, 0
-; MIPS64-NEXT: sll $3, $4, 0
-; MIPS64-NEXT: dsll $1, $1, 24
-; MIPS64-NEXT: addu $2, $3, $2
-; MIPS64-NEXT: lui $3, 255
-; MIPS64-NEXT: ori $3, $3, 65535
-; MIPS64-NEXT: and $2, $2, $3
-; MIPS64-NEXT: or $2, $2, $1
+; MIPS64-NEXT: sll $1, $6, 0
+; MIPS64-NEXT: sll $2, $4, 0
+; MIPS64-NEXT: addu $2, $2, $1
+; MIPS64-NEXT: sll $1, $7, 0
+; MIPS64-NEXT: sll $3, $5, 0
+; MIPS64-NEXT: addu $3, $3, $1
; MIPS64-NEXT: jr $ra
; MIPS64-NEXT: nop
;
@@ -6689,56 +6677,14 @@ define <2 x i24> @i24x2(<2 x i24> %a, <2 x i24> %b) {
;
; MIPS64R5EB-LABEL: i24x2:
; MIPS64R5EB: # %bb.0: # %Entry
-; MIPS64R5EB-NEXT: daddiu $sp, $sp, -32
-; MIPS64R5EB-NEXT: .cfi_def_cfa_offset 32
-; MIPS64R5EB-NEXT: sh $5, 20($sp)
-; MIPS64R5EB-NEXT: dsrl $1, $5, 16
-; MIPS64R5EB-NEXT: sw $1, 16($sp)
-; MIPS64R5EB-NEXT: sh $4, 28($sp)
-; MIPS64R5EB-NEXT: dsrl $1, $4, 16
-; MIPS64R5EB-NEXT: sw $1, 24($sp)
-; MIPS64R5EB-NEXT: lbu $1, 20($sp)
-; MIPS64R5EB-NEXT: dsll $1, $1, 8
-; MIPS64R5EB-NEXT: lb $2, 19($sp)
-; MIPS64R5EB-NEXT: dsll $2, $2, 16
-; MIPS64R5EB-NEXT: or $1, $2, $1
-; MIPS64R5EB-NEXT: lbu $2, 28($sp)
-; MIPS64R5EB-NEXT: dsll $2, $2, 8
-; MIPS64R5EB-NEXT: lb $3, 27($sp)
-; MIPS64R5EB-NEXT: dsll $3, $3, 16
-; MIPS64R5EB-NEXT: lbu $4, 21($sp)
-; MIPS64R5EB-NEXT: or $2, $3, $2
-; MIPS64R5EB-NEXT: or $1, $4, $1
-; MIPS64R5EB-NEXT: lh $3, 16($sp)
-; MIPS64R5EB-NEXT: dsll $3, $3, 8
-; MIPS64R5EB-NEXT: lbu $4, 18($sp)
-; MIPS64R5EB-NEXT: or $3, $4, $3
-; MIPS64R5EB-NEXT: lbu $4, 29($sp)
-; MIPS64R5EB-NEXT: insert.d $w0[0], $3
-; MIPS64R5EB-NEXT: insert.d $w0[1], $1
-; MIPS64R5EB-NEXT: or $1, $4, $2
-; MIPS64R5EB-NEXT: lh $2, 24($sp)
-; MIPS64R5EB-NEXT: dsll $2, $2, 8
-; MIPS64R5EB-NEXT: lbu $3, 26($sp)
-; MIPS64R5EB-NEXT: or $2, $3, $2
-; MIPS64R5EB-NEXT: insert.d $w1[0], $2
-; MIPS64R5EB-NEXT: insert.d $w1[1], $1
+; MIPS64R5EB-NEXT: insert.d $w0[0], $6
+; MIPS64R5EB-NEXT: insert.d $w0[1], $7
+; MIPS64R5EB-NEXT: insert.d $w1[0], $4
+; MIPS64R5EB-NEXT: insert.d $w1[1], $5
; MIPS64R5EB-NEXT: addv.d $w0, $w1, $w0
-; MIPS64R5EB-NEXT: copy_s.d $1, $w0[1]
-; MIPS64R5EB-NEXT: copy_s.d $2, $w0[0]
-; MIPS64R5EB-NEXT: sb $2, 10($sp)
-; MIPS64R5EB-NEXT: dsrl $3, $1, 16
-; MIPS64R5EB-NEXT: sb $3, 11($sp)
-; MIPS64R5EB-NEXT: dsrl $2, $2, 8
-; MIPS64R5EB-NEXT: sh $2, 8($sp)
-; MIPS64R5EB-NEXT: sb $1, 13($sp)
-; MIPS64R5EB-NEXT: dsrl $1, $1, 8
-; MIPS64R5EB-NEXT: sb $1, 12($sp)
-; MIPS64R5EB-NEXT: lw $1, 8($sp)
-; MIPS64R5EB-NEXT: dsll $1, $1, 16
-; MIPS64R5EB-NEXT: lhu $2, 12($sp)
-; MIPS64R5EB-NEXT: or $2, $2, $1
-; MIPS64R5EB-NEXT: daddiu $sp, $sp, 32
+; MIPS64R5EB-NEXT: shf.w $w0, $w0, 177
+; MIPS64R5EB-NEXT: copy_s.w $2, $w0[1]
+; MIPS64R5EB-NEXT: copy_s.w $3, $w0[3]
; MIPS64R5EB-NEXT: jr $ra
; MIPS64R5EB-NEXT: nop
;
@@ -6772,56 +6718,13 @@ define <2 x i24> @i24x2(<2 x i24> %a, <2 x i24> %b) {
;
; MIPS64R5EL-LABEL: i24x2:
; MIPS64R5EL: # %bb.0: # %Entry
-; MIPS64R5EL-NEXT: daddiu $sp, $sp, -32
-; MIPS64R5EL-NEXT: .cfi_def_cfa_offset 32
-; MIPS64R5EL-NEXT: dsrl $1, $5, 32
-; MIPS64R5EL-NEXT: sh $1, 20($sp)
-; MIPS64R5EL-NEXT: sw $5, 16($sp)
-; MIPS64R5EL-NEXT: dsrl $1, $4, 32
-; MIPS64R5EL-NEXT: sh $1, 28($sp)
-; MIPS64R5EL-NEXT: lbu $1, 20($sp)
-; MIPS64R5EL-NEXT: sw $4, 24($sp)
-; MIPS64R5EL-NEXT: dsll $1, $1, 8
-; MIPS64R5EL-NEXT: lbu $2, 19($sp)
-; MIPS64R5EL-NEXT: or $1, $1, $2
-; MIPS64R5EL-NEXT: lb $2, 21($sp)
-; MIPS64R5EL-NEXT: dsll $2, $2, 16
-; MIPS64R5EL-NEXT: lbu $3, 28($sp)
-; MIPS64R5EL-NEXT: dsll $3, $3, 8
-; MIPS64R5EL-NEXT: lb $4, 18($sp)
-; MIPS64R5EL-NEXT: lbu $5, 27($sp)
-; MIPS64R5EL-NEXT: or $3, $3, $5
-; MIPS64R5EL-NEXT: or $1, $1, $2
-; MIPS64R5EL-NEXT: dsll $2, $4, 16
-; MIPS64R5EL-NEXT: lhu $4, 16($sp)
-; MIPS64R5EL-NEXT: or $2, $4, $2
-; MIPS64R5EL-NEXT: lb $4, 29($sp)
-; MIPS64R5EL-NEXT: dsll $4, $4, 16
-; MIPS64R5EL-NEXT: insert.d $w0[0], $2
-; MIPS64R5EL-NEXT: insert.d $w0[1], $1
-; MIPS64R5EL-NEXT: or $1, $3, $4
-; MIPS64R5EL-NEXT: lb $2, 26($sp)
-; MIPS64R5EL-NEXT: dsll $2, $2, 16
-; MIPS64R5EL-NEXT: lhu $3, 24($sp)
-; MIPS64R5EL-NEXT: or $2, $3, $2
-; MIPS64R5EL-NEXT: insert.d $w1[0], $2
-; MIPS64R5EL-NEXT: insert.d $w1[1], $1
+; MIPS64R5EL-NEXT: insert.d $w0[0], $6
+; MIPS64R5EL-NEXT: insert.d $w0[1], $7
+; MIPS64R5EL-NEXT: insert.d $w1[0], $4
+; MIPS64R5EL-NEXT: insert.d $w1[1], $5
; MIPS64R5EL-NEXT: addv.d $w0, $w1, $w0
-; MIPS64R5EL-NEXT: copy_s.d $1, $w0[0]
-; MIPS64R5EL-NEXT: copy_s.d $2, $w0[1]
-; MIPS64R5EL-NEXT: dsrl $3, $2, 8
-; MIPS64R5EL-NEXT: sb $3, 12($sp)
-; MIPS64R5EL-NEXT: dsrl $3, $2, 16
-; MIPS64R5EL-NEXT: sb $3, 13($sp)
-; MIPS64R5EL-NEXT: sb $2, 11($sp)
-; MIPS64R5EL-NEXT: sh $1, 8($sp)
-; MIPS64R5EL-NEXT: dsrl $1, $1, 16
-; MIPS64R5EL-NEXT: sb $1, 10($sp)
-; MIPS64R5EL-NEXT: lh $1, 12($sp)
-; MIPS64R5EL-NEXT: dsll $1, $1, 32
-; MIPS64R5EL-NEXT: lwu $2, 8($sp)
-; MIPS64R5EL-NEXT: or $2, $2, $1
-; MIPS64R5EL-NEXT: daddiu $sp, $sp, 32
+; MIPS64R5EL-NEXT: copy_s.w $2, $w0[0]
+; MIPS64R5EL-NEXT: copy_s.w $3, $w0[2]
; MIPS64R5EL-NEXT: jr $ra
; MIPS64R5EL-NEXT: nop
Entry:
@@ -6868,17 +6771,22 @@ define void @call_i24x2() {
; MIPS64EB-NEXT: lui $1, %hi(%neg(%gp_rel(call_i24x2)))
; MIPS64EB-NEXT: daddu $1, $1, $25
; MIPS64EB-NEXT: daddiu $gp, $1, %lo(%neg(%gp_rel(call_i24x2)))
-; MIPS64EB-NEXT: lui $1, 1536
-; MIPS64EB-NEXT: ori $4, $1, 7
-; MIPS64EB-NEXT: lui $1, 3072
-; MIPS64EB-NEXT: ori $5, $1, 8
; MIPS64EB-NEXT: ld $25, %call16(i24x2)($gp)
+; MIPS64EB-NEXT: daddiu $4, $zero, 6
+; MIPS64EB-NEXT: daddiu $5, $zero, 7
+; MIPS64EB-NEXT: daddiu $6, $zero, 12
+; MIPS64EB-NEXT: daddiu $7, $zero, 8
; MIPS64EB-NEXT: jalr $25
; MIPS64EB-NEXT: nop
; MIPS64EB-NEXT: ld $1, %got_disp(gv2i24)($gp)
-; MIPS64EB-NEXT: sh $2, 4($1)
-; MIPS64EB-NEXT: dsrl $2, $2, 16
-; MIPS64EB-NEXT: sw $2, 0($1)
+; MIPS64EB-NEXT: sb $3, 5($1)
+; MIPS64EB-NEXT: sb $2, 2($1)
+; MIPS64EB-NEXT: srl $4, $3, 8
+; MIPS64EB-NEXT: sb $4, 4($1)
+; MIPS64EB-NEXT: srl $3, $3, 16
+; MIPS64EB-NEXT: sb $3, 3($1)
+; MIPS64EB-NEXT: srl $2, $2, 8
+; MIPS64EB-NEXT: sh $2, 0($1)
; MIPS64EB-NEXT: ld $gp, 0($sp) # 8-byte Folded Reload
; MIPS64EB-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload
; MIPS64EB-NEXT: daddiu $sp, $sp, 16
@@ -6923,31 +6831,27 @@ define void @call_i24x2() {
; MIPS64R5EB-NEXT: lui $1, %hi(%neg(%gp_rel(call_i24x2)))
; MIPS64R5EB-NEXT: daddu $1, $1, $25
; MIPS64R5EB-NEXT: daddiu $gp, $1, %lo(%neg(%gp_rel(call_i24x2)))
-; MIPS64R5EB-NEXT: lui $1, 1536
-; MIPS64R5EB-NEXT: ori $1, $1, 7
-; MIPS64R5EB-NEXT: swl $1, 2($sp)
-; MIPS64R5EB-NEXT: lui $2, 3072
-; MIPS64R5EB-NEXT: ori $2, $2, 8
-; MIPS64R5EB-NEXT: swl $2, 10($sp)
-; MIPS64R5EB-NEXT: sh $zero, 0($sp)
-; MIPS64R5EB-NEXT: swr $1, 5($sp)
-; MIPS64R5EB-NEXT: sh $zero, 8($sp)
-; MIPS64R5EB-NEXT: swr $2, 13($sp)
-; MIPS64R5EB-NEXT: lw $1, 0($sp)
-; MIPS64R5EB-NEXT: dsll $1, $1, 16
-; MIPS64R5EB-NEXT: lhu $2, 4($sp)
-; MIPS64R5EB-NEXT: or $4, $2, $1
-; MIPS64R5EB-NEXT: lw $1, 8($sp)
-; MIPS64R5EB-NEXT: dsll $1, $1, 16
-; MIPS64R5EB-NEXT: lhu $2, 12($sp)
-; MIPS64R5EB-NEXT: or $5, $2, $1
; MIPS64R5EB-NEXT: ld $25, %call16(i24x2)($gp)
+; MIPS64R5EB-NEXT: daddiu $4, $zero, 6
+; MIPS64R5EB-NEXT: daddiu $5, $zero, 7
+; MIPS64R5EB-NEXT: daddiu $6, $zero, 12
+; MIPS64R5EB-NEXT: daddiu $7, $zero, 8
; MIPS64R5EB-NEXT: jalr $25
; MIPS64R5EB-NEXT: nop
-; MIPS64R5EB-NEXT: ld $1, %got_disp(gv2i24)($gp)
-; MIPS64R5EB-NEXT: sh $2, 4($1)
+; MIPS64R5EB-NEXT: sw $3, 12($sp)
+; MIPS64R5EB-NEXT: sw $2, 4($sp)
+; MIPS64R5EB-NEXT: ld.d $w0, 0($sp)
+; MIPS64R5EB-NEXT: copy_s.d $1, $w0[0]
+; MIPS64R5EB-NEXT: copy_s.d $2, $w0[1]
+; MIPS64R5EB-NEXT: ld $3, %got_disp(gv2i24)($gp)
+; MIPS64R5EB-NEXT: sb $2, 5($3)
+; MIPS64R5EB-NEXT: sb $1, 2($3)
+; MIPS64R5EB-NEXT: dsrl $4, $2, 8
+; MIPS64R5EB-NEXT: sb $4, 4($3)
; MIPS64R5EB-NEXT: dsrl $2, $2, 16
-; MIPS64R5EB-NEXT: sw $2, 0($1)
+; MIPS64R5EB-NEXT: sb $2, 3($3)
+; MIPS64R5EB-NEXT: dsrl $1, $1, 8
+; MIPS64R5EB-NEXT: sh $1, 0($3)
; MIPS64R5EB-NEXT: ld $gp, 16($sp) # 8-byte Folded Reload
; MIPS64R5EB-NEXT: ld $ra, 24($sp) # 8-byte Folded Reload
; MIPS64R5EB-NEXT: daddiu $sp, $sp, 32
@@ -6992,17 +6896,22 @@ define void @call_i24x2() {
; MIPS64EL-NEXT: lui $1, %hi(%neg(%gp_rel(call_i24x2)))
; MIPS64EL-NEXT: daddu $1, $1, $25
; MIPS64EL-NEXT: daddiu $gp, $1, %lo(%neg(%gp_rel(call_i24x2)))
-; MIPS64EL-NEXT: lui $1, 1792
-; MIPS64EL-NEXT: ori $4, $1, 6
-; MIPS64EL-NEXT: lui $1, 2048
-; MIPS64EL-NEXT: ori $5, $1, 12
; MIPS64EL-NEXT: ld $25, %call16(i24x2)($gp)
+; MIPS64EL-NEXT: daddiu $4, $zero, 6
+; MIPS64EL-NEXT: daddiu $5, $zero, 7
+; MIPS64EL-NEXT: daddiu $6, $zero, 12
+; MIPS64EL-NEXT: daddiu $7, $zero, 8
; MIPS64EL-NEXT: jalr $25
; MIPS64EL-NEXT: nop
; MIPS64EL-NEXT: ld $1, %got_disp(gv2i24)($gp)
-; MIPS64EL-NEXT: sw $2, 0($1)
-; MIPS64EL-NEXT: dsrl $2, $2, 32
-; MIPS64EL-NEXT: sh $2, 4($1)
+; MIPS64EL-NEXT: sb $3, 3($1)
+; MIPS64EL-NEXT: sh $2, 0($1)
+; MIPS64EL-NEXT: srl $4, $3, 8
+; MIPS64EL-NEXT: sb $4, 4($1)
+; MIPS64EL-NEXT: srl $3, $3, 16
+; MIPS64EL-NEXT: sb $3, 5($1)
+; MIPS64EL-NEXT: srl $2, $2, 16
+; MIPS64EL-NEXT: sb $2, 2($1)
; MIPS64EL-NEXT: ld $gp, 0($sp) # 8-byte Folded Reload
; MIPS64EL-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload
; MIPS64EL-NEXT: daddiu $sp, $sp, 16
@@ -7047,31 +6956,27 @@ define void @call_i24x2() {
; MIPS64R5EL-NEXT: lui $1, %hi(%neg(%gp_rel(call_i24x2)))
; MIPS64R5EL-NEXT: daddu $1, $1, $25
; MIPS64R5EL-NEXT: daddiu $gp, $1, %lo(%neg(%gp_rel(call_i24x2)))
-; MIPS64R5EL-NEXT: addiu $1, $zero, 1792
-; MIPS64R5EL-NEXT: swl $1, 5($sp)
-; MIPS64R5EL-NEXT: addiu $2, $zero, 2048
-; MIPS64R5EL-NEXT: swl $2, 13($sp)
-; MIPS64R5EL-NEXT: swr $1, 2($sp)
-; MIPS64R5EL-NEXT: daddiu $1, $zero, 6
-; MIPS64R5EL-NEXT: sh $1, 0($sp)
-; MIPS64R5EL-NEXT: swr $2, 10($sp)
-; MIPS64R5EL-NEXT: daddiu $1, $zero, 12
-; MIPS64R5EL-NEXT: sh $1, 8($sp)
-; MIPS64R5EL-NEXT: lh $1, 4($sp)
-; MIPS64R5EL-NEXT: dsll $1, $1, 32
-; MIPS64R5EL-NEXT: lwu $2, 0($sp)
-; MIPS64R5EL-NEXT: or $4, $2, $1
-; MIPS64R5EL-NEXT: lh $1, 12($sp)
-; MIPS64R5EL-NEXT: dsll $1, $1, 32
-; MIPS64R5EL-NEXT: lwu $2, 8($sp)
-; MIPS64R5EL-NEXT: or $5, $2, $1
; MIPS64R5EL-NEXT: ld $25, %call16(i24x2)($gp)
+; MIPS64R5EL-NEXT: daddiu $4, $zero, 6
+; MIPS64R5EL-NEXT: daddiu $5, $zero, 7
+; MIPS64R5EL-NEXT: daddiu $6, $zero, 12
+; MIPS64R5EL-NEXT: daddiu $7, $zero, 8
; MIPS64R5EL-NEXT: jalr $25
; MIPS64R5EL-NEXT: nop
-; MIPS64R5EL-NEXT: ld $1, %got_disp(gv2i24)($gp)
-; MIPS64R5EL-NEXT: sw $2, 0($1)
-; MIPS64R5EL-NEXT: dsrl $2, $2, 32
-; MIPS64R5EL-NEXT: sh $2, 4($1)
+; MIPS64R5EL-NEXT: sw $3, 8($sp)
+; MIPS64R5EL-NEXT: sw $2, 0($sp)
+; MIPS64R5EL-NEXT: ld.d $w0, 0($sp)
+; MIPS64R5EL-NEXT: copy_s.d $1, $w0[0]
+; MIPS64R5EL-NEXT: copy_s.d $2, $w0[1]
+; MIPS64R5EL-NEXT: ld $3, %got_disp(gv2i24)($gp)
+; MIPS64R5EL-NEXT: sb $2, 3($3)
+; MIPS64R5EL-NEXT: sh $1, 0($3)
+; MIPS64R5EL-NEXT: dsrl $4, $2, 8
+; MIPS64R5EL-NEXT: sb $4, 4($3)
+; MIPS64R5EL-NEXT: dsrl $2, $2, 16
+; MIPS64R5EL-NEXT: sb $2, 5($3)
+; MIPS64R5EL-NEXT: dsrl $1, $1, 16
+; MIPS64R5EL-NEXT: sb $1, 2($3)
; MIPS64R5EL-NEXT: ld $gp, 16($sp) # 8-byte Folded Reload
; MIPS64R5EL-NEXT: ld $ra, 24($sp) # 8-byte Folded Reload
; MIPS64R5EL-NEXT: daddiu $sp, $sp, 32
More information about the llvm-commits
mailing list