[llvm] r337258 - [DAGCombiner] Call SimplifyDemandedVectorElts from EXTRACT_VECTOR_ELT
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 17 02:45:36 PDT 2018
Author: rksimon
Date: Tue Jul 17 02:45:35 2018
New Revision: 337258
URL: http://llvm.org/viewvc/llvm-project?rev=337258&view=rev
Log:
[DAGCombiner] Call SimplifyDemandedVectorElts from EXTRACT_VECTOR_ELT
If we are only extracting vector elements via EXTRACT_VECTOR_ELT(s) we may be able to use SimplifyDemandedVectorElts to avoid unnecessary vector ops.
Differential Revision: https://reviews.llvm.org/D49262
Modified:
llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.cpp
llvm/trunk/test/CodeGen/AMDGPU/indirect-addressing-si.ll
llvm/trunk/test/CodeGen/ARM/func-argpassing-endian.ll
llvm/trunk/test/CodeGen/Mips/cconv/vector.ll
llvm/trunk/test/CodeGen/X86/dagcombine-cse.ll
llvm/trunk/test/CodeGen/X86/extractelement-load.ll
llvm/trunk/test/CodeGen/X86/known-bits-vector.ll
llvm/trunk/test/CodeGen/X86/oddshuffles.ll
llvm/trunk/test/CodeGen/X86/scalar_widen_div.ll
llvm/trunk/test/CodeGen/X86/vec_shift7.ll
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=337258&r1=337257&r2=337258&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Tue Jul 17 02:45:35 2018
@@ -242,7 +242,8 @@ namespace {
}
bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded);
- bool SimplifyDemandedVectorElts(SDValue Op, const APInt &Demanded);
+ bool SimplifyDemandedVectorElts(SDValue Op, const APInt &Demanded,
+ bool AssumeSingleUse = false);
bool CombineToPreIndexedLoadStore(SDNode *N);
bool CombineToPostIndexedLoadStore(SDNode *N);
@@ -1064,11 +1065,12 @@ bool DAGCombiner::SimplifyDemandedBits(S
/// Check the specified vector node value to see if it can be simplified or
/// if things it uses can be simplified as it only uses some of the elements.
/// If so, return true.
-bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
- const APInt &Demanded) {
+bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op, const APInt &Demanded,
+ bool AssumeSingleUse) {
TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
APInt KnownUndef, KnownZero;
- if (!TLI.SimplifyDemandedVectorElts(Op, Demanded, KnownUndef, KnownZero, TLO))
+ if (!TLI.SimplifyDemandedVectorElts(Op, Demanded, KnownUndef, KnownZero, TLO,
+ 0, AssumeSingleUse))
return false;
// Revisit the node.
@@ -15014,6 +15016,23 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR
}
}
+ // If only EXTRACT_VECTOR_ELT nodes use the source vector we can
+ // simplify it based on the (valid) extraction indices.
+ if (llvm::all_of(InVec->uses(), [&](SDNode *Use) {
+ return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ Use->getOperand(0) == InVec &&
+ isa<ConstantSDNode>(Use->getOperand(1));
+ })) {
+ APInt DemandedElts = APInt::getNullValue(VT.getVectorNumElements());
+ for (SDNode *Use : InVec->uses()) {
+ auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1));
+ if (CstElt->getAPIntValue().ult(VT.getVectorNumElements()))
+ DemandedElts.setBit(CstElt->getZExtValue());
+ }
+ if (SimplifyDemandedVectorElts(InVec, DemandedElts, true))
+ return SDValue(N, 0);
+ }
+
bool BCNumEltsChanged = false;
EVT ExtVT = VT.getVectorElementType();
EVT LVT = ExtVT;
Modified: llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.cpp?rev=337258&r1=337257&r2=337258&view=diff
==============================================================================
--- llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.cpp Tue Jul 17 02:45:35 2018
@@ -3893,20 +3893,34 @@ static const Permute *matchDoublePermute
return nullptr;
}
-// Convert the mask of the given VECTOR_SHUFFLE into a byte-level mask,
+// Convert the mask of the given shuffle op into a byte-level mask,
// as if it had type vNi8.
-static void getVPermMask(ShuffleVectorSDNode *VSN,
+static bool getVPermMask(SDValue ShuffleOp,
SmallVectorImpl<int> &Bytes) {
- EVT VT = VSN->getValueType(0);
+ EVT VT = ShuffleOp.getValueType();
unsigned NumElements = VT.getVectorNumElements();
unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
- Bytes.resize(NumElements * BytesPerElement, -1);
- for (unsigned I = 0; I < NumElements; ++I) {
- int Index = VSN->getMaskElt(I);
- if (Index >= 0)
+
+ if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(ShuffleOp)) {
+ Bytes.resize(NumElements * BytesPerElement, -1);
+ for (unsigned I = 0; I < NumElements; ++I) {
+ int Index = VSN->getMaskElt(I);
+ if (Index >= 0)
+ for (unsigned J = 0; J < BytesPerElement; ++J)
+ Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
+ }
+ return true;
+ }
+ if (SystemZISD::SPLAT == ShuffleOp.getOpcode() &&
+ isa<ConstantSDNode>(ShuffleOp.getOperand(1))) {
+ unsigned Index = ShuffleOp.getConstantOperandVal(1);
+ Bytes.resize(NumElements * BytesPerElement, -1);
+ for (unsigned I = 0; I < NumElements; ++I)
for (unsigned J = 0; J < BytesPerElement; ++J)
Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
+ return true;
}
+ return false;
}
// Bytes is a VPERM-like permute vector, except that -1 is used for
@@ -4075,7 +4089,8 @@ bool GeneralShuffle::add(SDValue Op, uns
// See whether the bytes we need come from a contiguous part of one
// operand.
SmallVector<int, SystemZ::VectorBytes> OpBytes;
- getVPermMask(cast<ShuffleVectorSDNode>(Op), OpBytes);
+ if (!getVPermMask(Op, OpBytes))
+ break;
int NewByte;
if (!getShuffleInput(OpBytes, Byte, BytesPerElement, NewByte))
break;
@@ -5109,13 +5124,14 @@ SDValue SystemZTargetLowering::combineEx
if (Opcode == ISD::BITCAST)
// Look through bitcasts.
Op = Op.getOperand(0);
- else if (Opcode == ISD::VECTOR_SHUFFLE &&
+ else if ((Opcode == ISD::VECTOR_SHUFFLE || Opcode == SystemZISD::SPLAT) &&
canTreatAsByteVector(Op.getValueType())) {
// Get a VPERM-like permute mask and see whether the bytes covered
// by the extracted element are a contiguous sequence from one
// source operand.
SmallVector<int, SystemZ::VectorBytes> Bytes;
- getVPermMask(cast<ShuffleVectorSDNode>(Op), Bytes);
+ if (!getVPermMask(Op, Bytes))
+ break;
int First;
if (!getShuffleInput(Bytes, Index * BytesPerElement,
BytesPerElement, First))
Modified: llvm/trunk/test/CodeGen/AMDGPU/indirect-addressing-si.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/indirect-addressing-si.ll?rev=337258&r1=337257&r2=337258&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/indirect-addressing-si.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/indirect-addressing-si.ll Tue Jul 17 02:45:35 2018
@@ -480,38 +480,28 @@ bb7:
; GCN-LABEL: {{^}}multi_same_block:
-; GCN-DAG: v_mov_b32_e32 v[[VEC0_ELT0:[0-9]+]], 0x41880000
-; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41900000
-; GCN-DAG: v_mov_b32_e32 v[[VEC0_ELT2:[0-9]+]], 0x41980000
-; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41a00000
-; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41a80000
-; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41b00000
-; GCN-DAG: s_load_dword [[ARG:s[0-9]+]]
-; IDXMODE-DAG: s_add_i32 [[ARG_ADD:s[0-9]+]], [[ARG]], -16
-
-; MOVREL-DAG: s_add_i32 m0, [[ARG]], -16
-; MOVREL: v_movreld_b32_e32 v[[VEC0_ELT0]], 4.0
-; GCN-NOT: m0
+; GCN: s_load_dword [[ARG:s[0-9]+]]
-; IDXMODE: s_set_gpr_idx_on [[ARG_ADD]], dst
-; IDXMODE: v_mov_b32_e32 v[[VEC0_ELT0]], 4.0
+; MOVREL: v_mov_b32_e32 v{{[0-9]+}}, 0x41900000
+; MOVREL: v_mov_b32_e32 v{{[0-9]+}}, 0x41b0cccd
+; MOVREL: s_waitcnt
+; MOVREL: s_add_i32 m0, [[ARG]], -16
+; MOVREL: v_movreld_b32_e32 v{{[0-9]+}}, 4.0
+; MOVREL: v_movreld_b32_e32 v{{[0-9]+}}, -4.0
+; MOVREL: s_mov_b32 m0, -1
+
+
+; IDXMODE: v_mov_b32_e32 v{{[0-9]+}}, 0x41900000
+; IDXMODE: s_waitcnt
+; IDXMODE: s_add_i32 [[ARG]], [[ARG]], -16
+; IDXMODE: s_set_gpr_idx_on [[ARG]], dst
+; IDXMODE: v_mov_b32_e32 v{{[0-9]+}}, 4.0
; IDXMODE: s_set_gpr_idx_off
-
-; GCN: v_mov_b32_e32 v[[VEC0_ELT2]], 0x4188cccd
-; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x4190cccd
-; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x4198cccd
-; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41a0cccd
-; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41a8cccd
-; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41b0cccd
-
-; MOVREL: v_movreld_b32_e32 v[[VEC0_ELT2]], -4.0
-
-; IDXMODE: s_set_gpr_idx_on [[ARG_ADD]], dst
-; IDXMODE: v_mov_b32_e32 v[[VEC0_ELT2]], -4.0
+; IDXMODE: v_mov_b32_e32 v{{[0-9]+}}, 0x41b0cccd
+; IDXMODE: s_set_gpr_idx_on [[ARG]], dst
+; IDXMODE: v_mov_b32_e32 v{{[0-9]+}}, -4.0
; IDXMODE: s_set_gpr_idx_off
-; PREGFX9: s_mov_b32 m0, -1
-; GFX9-NOT: s_mov_b32 m0
; GCN: ds_write_b32
; GCN: ds_write_b32
; GCN: s_endpgm
Modified: llvm/trunk/test/CodeGen/ARM/func-argpassing-endian.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/func-argpassing-endian.ll?rev=337258&r1=337257&r2=337258&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/func-argpassing-endian.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/func-argpassing-endian.ll Tue Jul 17 02:45:35 2018
@@ -38,7 +38,6 @@ define void @arg_double( double %val ) {
define void @arg_v4i32(<4 x i32> %vec ) {
; CHECK-LE-LABEL: arg_v4i32:
; CHECK-LE: @ %bb.0:
-; CHECK-LE-NEXT: vmov d17, r2, r3
; CHECK-LE-NEXT: vmov d16, r0, r1
; CHECK-LE-NEXT: movw r0, :lower16:var32
; CHECK-LE-NEXT: movt r0, :upper16:var32
@@ -47,7 +46,6 @@ define void @arg_v4i32(<4 x i32> %vec )
;
; CHECK-BE-LABEL: arg_v4i32:
; CHECK-BE: @ %bb.0:
-; CHECK-BE-NEXT: vmov d17, r3, r2
; CHECK-BE-NEXT: vmov d16, r1, r0
; CHECK-BE-NEXT: movw r0, :lower16:var32
; CHECK-BE-NEXT: movt r0, :upper16:var32
Modified: llvm/trunk/test/CodeGen/Mips/cconv/vector.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Mips/cconv/vector.ll?rev=337258&r1=337257&r2=337258&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Mips/cconv/vector.ll (original)
+++ llvm/trunk/test/CodeGen/Mips/cconv/vector.ll Tue Jul 17 02:45:35 2018
@@ -89,61 +89,37 @@ define <2 x i8> @i8_2(<2 x i8> %a, <2 x
;
; MIPS64R5EB-LABEL: i8_2:
; MIPS64R5EB: # %bb.0:
-; MIPS64R5EB-NEXT: daddiu $sp, $sp, -64
-; MIPS64R5EB-NEXT: .cfi_def_cfa_offset 64
-; MIPS64R5EB-NEXT: sd $4, 56($sp)
-; MIPS64R5EB-NEXT: ldi.b $w0, 0
-; MIPS64R5EB-NEXT: lbu $1, 57($sp)
-; MIPS64R5EB-NEXT: lbu $2, 56($sp)
-; MIPS64R5EB-NEXT: move.v $w1, $w0
-; MIPS64R5EB-NEXT: insert.h $w1[0], $2
-; MIPS64R5EB-NEXT: insert.h $w1[1], $1
-; MIPS64R5EB-NEXT: lbu $1, 58($sp)
-; MIPS64R5EB-NEXT: insert.h $w1[2], $1
-; MIPS64R5EB-NEXT: lbu $1, 59($sp)
-; MIPS64R5EB-NEXT: insert.h $w1[3], $1
-; MIPS64R5EB-NEXT: lbu $1, 60($sp)
-; MIPS64R5EB-NEXT: insert.h $w1[4], $1
-; MIPS64R5EB-NEXT: lbu $1, 61($sp)
-; MIPS64R5EB-NEXT: insert.h $w1[5], $1
-; MIPS64R5EB-NEXT: lbu $1, 63($sp)
-; MIPS64R5EB-NEXT: lbu $2, 62($sp)
-; MIPS64R5EB-NEXT: insert.h $w1[6], $2
-; MIPS64R5EB-NEXT: insert.h $w1[7], $1
-; MIPS64R5EB-NEXT: copy_s.h $1, $w1[0]
-; MIPS64R5EB-NEXT: copy_s.h $2, $w1[1]
-; MIPS64R5EB-NEXT: sd $5, 48($sp)
-; MIPS64R5EB-NEXT: lbu $3, 48($sp)
-; MIPS64R5EB-NEXT: insert.h $w0[0], $3
-; MIPS64R5EB-NEXT: lbu $3, 49($sp)
-; MIPS64R5EB-NEXT: insert.h $w0[1], $3
-; MIPS64R5EB-NEXT: lbu $3, 50($sp)
-; MIPS64R5EB-NEXT: insert.h $w0[2], $3
-; MIPS64R5EB-NEXT: lbu $3, 51($sp)
-; MIPS64R5EB-NEXT: insert.h $w0[3], $3
-; MIPS64R5EB-NEXT: lbu $3, 52($sp)
-; MIPS64R5EB-NEXT: insert.h $w0[4], $3
-; MIPS64R5EB-NEXT: lbu $3, 53($sp)
-; MIPS64R5EB-NEXT: insert.h $w0[5], $3
-; MIPS64R5EB-NEXT: lbu $3, 55($sp)
-; MIPS64R5EB-NEXT: lbu $4, 54($sp)
-; MIPS64R5EB-NEXT: insert.h $w0[6], $4
-; MIPS64R5EB-NEXT: insert.h $w0[7], $3
+; MIPS64R5EB-NEXT: daddiu $sp, $sp, -96
+; MIPS64R5EB-NEXT: .cfi_def_cfa_offset 96
+; MIPS64R5EB-NEXT: sd $4, 88($sp)
+; MIPS64R5EB-NEXT: lbu $1, 89($sp)
+; MIPS64R5EB-NEXT: sh $1, 2($sp)
+; MIPS64R5EB-NEXT: lbu $1, 88($sp)
+; MIPS64R5EB-NEXT: sh $1, 0($sp)
+; MIPS64R5EB-NEXT: ld.h $w0, 0($sp)
+; MIPS64R5EB-NEXT: copy_s.h $1, $w0[0]
+; MIPS64R5EB-NEXT: copy_s.h $2, $w0[1]
+; MIPS64R5EB-NEXT: sd $5, 80($sp)
+; MIPS64R5EB-NEXT: lbu $3, 81($sp)
+; MIPS64R5EB-NEXT: sh $3, 18($sp)
+; MIPS64R5EB-NEXT: lbu $3, 80($sp)
+; MIPS64R5EB-NEXT: sh $3, 16($sp)
+; MIPS64R5EB-NEXT: ld.h $w0, 16($sp)
; MIPS64R5EB-NEXT: copy_s.h $3, $w0[0]
; MIPS64R5EB-NEXT: copy_s.h $4, $w0[1]
-; MIPS64R5EB-NEXT: sw $4, 28($sp)
-; MIPS64R5EB-NEXT: sw $3, 20($sp)
-; MIPS64R5EB-NEXT: sw $2, 12($sp)
-; MIPS64R5EB-NEXT: sw $1, 4($sp)
-; MIPS64R5EB-NEXT: ld.d $w0, 16($sp)
-; MIPS64R5EB-NEXT: ld.d $w1, 0($sp)
+; MIPS64R5EB-NEXT: sw $4, 60($sp)
+; MIPS64R5EB-NEXT: sw $3, 52($sp)
+; MIPS64R5EB-NEXT: sw $2, 44($sp)
+; MIPS64R5EB-NEXT: sw $1, 36($sp)
+; MIPS64R5EB-NEXT: ld.d $w0, 48($sp)
+; MIPS64R5EB-NEXT: ld.d $w1, 32($sp)
; MIPS64R5EB-NEXT: addv.d $w0, $w1, $w0
; MIPS64R5EB-NEXT: copy_s.d $1, $w0[0]
; MIPS64R5EB-NEXT: copy_s.d $2, $w0[1]
-; MIPS64R5EB-NEXT: sb $2, 45($sp)
-; MIPS64R5EB-NEXT: sb $1, 44($sp)
-; MIPS64R5EB-NEXT: lh $2, 44($sp)
-; MIPS64R5EB-NEXT: daddiu $sp, $sp, 64
+; MIPS64R5EB-NEXT: sb $2, 77($sp)
+; MIPS64R5EB-NEXT: sb $1, 76($sp)
+; MIPS64R5EB-NEXT: lh $2, 76($sp)
+; MIPS64R5EB-NEXT: daddiu $sp, $sp, 96
; MIPS64R5EB-NEXT: jr $ra
; MIPS64R5EB-NEXT: nop
;
@@ -215,61 +191,37 @@ define <2 x i8> @i8_2(<2 x i8> %a, <2 x
;
; MIPS64R5EL-LABEL: i8_2:
; MIPS64R5EL: # %bb.0:
-; MIPS64R5EL-NEXT: daddiu $sp, $sp, -64
-; MIPS64R5EL-NEXT: .cfi_def_cfa_offset 64
-; MIPS64R5EL-NEXT: sd $4, 56($sp)
-; MIPS64R5EL-NEXT: ldi.b $w0, 0
-; MIPS64R5EL-NEXT: lbu $1, 57($sp)
-; MIPS64R5EL-NEXT: lbu $2, 56($sp)
-; MIPS64R5EL-NEXT: move.v $w1, $w0
-; MIPS64R5EL-NEXT: insert.h $w1[0], $2
-; MIPS64R5EL-NEXT: insert.h $w1[1], $1
-; MIPS64R5EL-NEXT: lbu $1, 58($sp)
-; MIPS64R5EL-NEXT: insert.h $w1[2], $1
-; MIPS64R5EL-NEXT: lbu $1, 59($sp)
-; MIPS64R5EL-NEXT: insert.h $w1[3], $1
-; MIPS64R5EL-NEXT: lbu $1, 60($sp)
-; MIPS64R5EL-NEXT: insert.h $w1[4], $1
-; MIPS64R5EL-NEXT: lbu $1, 61($sp)
-; MIPS64R5EL-NEXT: insert.h $w1[5], $1
-; MIPS64R5EL-NEXT: lbu $1, 63($sp)
-; MIPS64R5EL-NEXT: lbu $2, 62($sp)
-; MIPS64R5EL-NEXT: insert.h $w1[6], $2
-; MIPS64R5EL-NEXT: insert.h $w1[7], $1
-; MIPS64R5EL-NEXT: copy_s.h $1, $w1[0]
-; MIPS64R5EL-NEXT: copy_s.h $2, $w1[1]
-; MIPS64R5EL-NEXT: sd $5, 48($sp)
-; MIPS64R5EL-NEXT: lbu $3, 48($sp)
-; MIPS64R5EL-NEXT: insert.h $w0[0], $3
-; MIPS64R5EL-NEXT: lbu $3, 49($sp)
-; MIPS64R5EL-NEXT: insert.h $w0[1], $3
-; MIPS64R5EL-NEXT: lbu $3, 50($sp)
-; MIPS64R5EL-NEXT: insert.h $w0[2], $3
-; MIPS64R5EL-NEXT: lbu $3, 51($sp)
-; MIPS64R5EL-NEXT: insert.h $w0[3], $3
-; MIPS64R5EL-NEXT: lbu $3, 52($sp)
-; MIPS64R5EL-NEXT: insert.h $w0[4], $3
-; MIPS64R5EL-NEXT: lbu $3, 53($sp)
-; MIPS64R5EL-NEXT: insert.h $w0[5], $3
-; MIPS64R5EL-NEXT: lbu $3, 55($sp)
-; MIPS64R5EL-NEXT: lbu $4, 54($sp)
-; MIPS64R5EL-NEXT: insert.h $w0[6], $4
-; MIPS64R5EL-NEXT: insert.h $w0[7], $3
+; MIPS64R5EL-NEXT: daddiu $sp, $sp, -96
+; MIPS64R5EL-NEXT: .cfi_def_cfa_offset 96
+; MIPS64R5EL-NEXT: sd $4, 88($sp)
+; MIPS64R5EL-NEXT: lbu $1, 89($sp)
+; MIPS64R5EL-NEXT: sh $1, 2($sp)
+; MIPS64R5EL-NEXT: lbu $1, 88($sp)
+; MIPS64R5EL-NEXT: sh $1, 0($sp)
+; MIPS64R5EL-NEXT: ld.h $w0, 0($sp)
+; MIPS64R5EL-NEXT: copy_s.h $1, $w0[0]
+; MIPS64R5EL-NEXT: copy_s.h $2, $w0[1]
+; MIPS64R5EL-NEXT: sd $5, 80($sp)
+; MIPS64R5EL-NEXT: lbu $3, 81($sp)
+; MIPS64R5EL-NEXT: sh $3, 18($sp)
+; MIPS64R5EL-NEXT: lbu $3, 80($sp)
+; MIPS64R5EL-NEXT: sh $3, 16($sp)
+; MIPS64R5EL-NEXT: ld.h $w0, 16($sp)
; MIPS64R5EL-NEXT: copy_s.h $3, $w0[0]
; MIPS64R5EL-NEXT: copy_s.h $4, $w0[1]
-; MIPS64R5EL-NEXT: sw $4, 24($sp)
-; MIPS64R5EL-NEXT: sw $3, 16($sp)
-; MIPS64R5EL-NEXT: sw $2, 8($sp)
-; MIPS64R5EL-NEXT: sw $1, 0($sp)
-; MIPS64R5EL-NEXT: ld.d $w0, 16($sp)
-; MIPS64R5EL-NEXT: ld.d $w1, 0($sp)
+; MIPS64R5EL-NEXT: sw $4, 56($sp)
+; MIPS64R5EL-NEXT: sw $3, 48($sp)
+; MIPS64R5EL-NEXT: sw $2, 40($sp)
+; MIPS64R5EL-NEXT: sw $1, 32($sp)
+; MIPS64R5EL-NEXT: ld.d $w0, 48($sp)
+; MIPS64R5EL-NEXT: ld.d $w1, 32($sp)
; MIPS64R5EL-NEXT: addv.d $w0, $w1, $w0
; MIPS64R5EL-NEXT: copy_s.d $1, $w0[0]
; MIPS64R5EL-NEXT: copy_s.d $2, $w0[1]
-; MIPS64R5EL-NEXT: sb $2, 45($sp)
-; MIPS64R5EL-NEXT: sb $1, 44($sp)
-; MIPS64R5EL-NEXT: lh $2, 44($sp)
-; MIPS64R5EL-NEXT: daddiu $sp, $sp, 64
+; MIPS64R5EL-NEXT: sb $2, 77($sp)
+; MIPS64R5EL-NEXT: sb $1, 76($sp)
+; MIPS64R5EL-NEXT: lh $2, 76($sp)
+; MIPS64R5EL-NEXT: daddiu $sp, $sp, 96
; MIPS64R5EL-NEXT: jr $ra
; MIPS64R5EL-NEXT: nop
%1 = add <2 x i8> %a, %b
@@ -445,181 +397,97 @@ define <2 x i8> @i8x2_7(<2 x i8> %a, <2
;
; MIPS64R5EB-LABEL: i8x2_7:
; MIPS64R5EB: # %bb.0: # %entry
-; MIPS64R5EB-NEXT: daddiu $sp, $sp, -176
-; MIPS64R5EB-NEXT: .cfi_def_cfa_offset 176
-; MIPS64R5EB-NEXT: sd $4, 168($sp)
-; MIPS64R5EB-NEXT: ldi.b $w0, 0
-; MIPS64R5EB-NEXT: lbu $1, 169($sp)
-; MIPS64R5EB-NEXT: lbu $2, 168($sp)
-; MIPS64R5EB-NEXT: move.v $w1, $w0
-; MIPS64R5EB-NEXT: insert.h $w1[0], $2
-; MIPS64R5EB-NEXT: insert.h $w1[1], $1
-; MIPS64R5EB-NEXT: lbu $1, 170($sp)
-; MIPS64R5EB-NEXT: insert.h $w1[2], $1
-; MIPS64R5EB-NEXT: lbu $1, 171($sp)
-; MIPS64R5EB-NEXT: insert.h $w1[3], $1
-; MIPS64R5EB-NEXT: lbu $1, 172($sp)
-; MIPS64R5EB-NEXT: insert.h $w1[4], $1
-; MIPS64R5EB-NEXT: lbu $1, 173($sp)
-; MIPS64R5EB-NEXT: insert.h $w1[5], $1
-; MIPS64R5EB-NEXT: lbu $1, 175($sp)
-; MIPS64R5EB-NEXT: lbu $2, 174($sp)
-; MIPS64R5EB-NEXT: insert.h $w1[6], $2
-; MIPS64R5EB-NEXT: insert.h $w1[7], $1
+; MIPS64R5EB-NEXT: daddiu $sp, $sp, -288
+; MIPS64R5EB-NEXT: .cfi_def_cfa_offset 288
+; MIPS64R5EB-NEXT: sd $4, 280($sp)
+; MIPS64R5EB-NEXT: lbu $1, 281($sp)
+; MIPS64R5EB-NEXT: sh $1, 2($sp)
+; MIPS64R5EB-NEXT: lbu $1, 280($sp)
+; MIPS64R5EB-NEXT: sh $1, 0($sp)
+; MIPS64R5EB-NEXT: ld.h $w0, 0($sp)
+; MIPS64R5EB-NEXT: copy_s.h $1, $w0[0]
+; MIPS64R5EB-NEXT: copy_s.h $2, $w0[1]
+; MIPS64R5EB-NEXT: sd $5, 272($sp)
+; MIPS64R5EB-NEXT: lbu $3, 273($sp)
+; MIPS64R5EB-NEXT: sh $3, 18($sp)
+; MIPS64R5EB-NEXT: lbu $3, 272($sp)
+; MIPS64R5EB-NEXT: sh $3, 16($sp)
+; MIPS64R5EB-NEXT: ld.h $w0, 16($sp)
+; MIPS64R5EB-NEXT: copy_s.h $3, $w0[0]
+; MIPS64R5EB-NEXT: copy_s.h $4, $w0[1]
+; MIPS64R5EB-NEXT: sw $4, 140($sp)
+; MIPS64R5EB-NEXT: sw $3, 132($sp)
+; MIPS64R5EB-NEXT: sw $2, 124($sp)
+; MIPS64R5EB-NEXT: sw $1, 116($sp)
+; MIPS64R5EB-NEXT: ld.d $w0, 128($sp)
+; MIPS64R5EB-NEXT: ld.d $w1, 112($sp)
+; MIPS64R5EB-NEXT: addv.d $w0, $w1, $w0
+; MIPS64R5EB-NEXT: sd $6, 264($sp)
+; MIPS64R5EB-NEXT: lbu $1, 265($sp)
+; MIPS64R5EB-NEXT: sh $1, 34($sp)
+; MIPS64R5EB-NEXT: lbu $1, 264($sp)
+; MIPS64R5EB-NEXT: sh $1, 32($sp)
+; MIPS64R5EB-NEXT: ld.h $w1, 32($sp)
; MIPS64R5EB-NEXT: copy_s.h $1, $w1[0]
; MIPS64R5EB-NEXT: copy_s.h $2, $w1[1]
-; MIPS64R5EB-NEXT: sd $5, 160($sp)
-; MIPS64R5EB-NEXT: lbu $3, 161($sp)
-; MIPS64R5EB-NEXT: lbu $4, 160($sp)
-; MIPS64R5EB-NEXT: move.v $w1, $w0
-; MIPS64R5EB-NEXT: insert.h $w1[0], $4
-; MIPS64R5EB-NEXT: insert.h $w1[1], $3
-; MIPS64R5EB-NEXT: lbu $3, 162($sp)
-; MIPS64R5EB-NEXT: insert.h $w1[2], $3
-; MIPS64R5EB-NEXT: lbu $3, 163($sp)
-; MIPS64R5EB-NEXT: insert.h $w1[3], $3
-; MIPS64R5EB-NEXT: lbu $3, 164($sp)
-; MIPS64R5EB-NEXT: insert.h $w1[4], $3
-; MIPS64R5EB-NEXT: lbu $3, 165($sp)
-; MIPS64R5EB-NEXT: insert.h $w1[5], $3
-; MIPS64R5EB-NEXT: lbu $3, 167($sp)
-; MIPS64R5EB-NEXT: lbu $4, 166($sp)
-; MIPS64R5EB-NEXT: insert.h $w1[6], $4
-; MIPS64R5EB-NEXT: insert.h $w1[7], $3
+; MIPS64R5EB-NEXT: sw $2, 156($sp)
+; MIPS64R5EB-NEXT: sw $1, 148($sp)
+; MIPS64R5EB-NEXT: ld.d $w1, 144($sp)
+; MIPS64R5EB-NEXT: addv.d $w0, $w0, $w1
+; MIPS64R5EB-NEXT: sd $7, 256($sp)
+; MIPS64R5EB-NEXT: lbu $1, 257($sp)
+; MIPS64R5EB-NEXT: sh $1, 50($sp)
+; MIPS64R5EB-NEXT: lbu $1, 256($sp)
+; MIPS64R5EB-NEXT: sh $1, 48($sp)
+; MIPS64R5EB-NEXT: ld.h $w1, 48($sp)
+; MIPS64R5EB-NEXT: copy_s.h $1, $w1[0]
+; MIPS64R5EB-NEXT: copy_s.h $2, $w1[1]
+; MIPS64R5EB-NEXT: sw $2, 172($sp)
+; MIPS64R5EB-NEXT: sw $1, 164($sp)
+; MIPS64R5EB-NEXT: ld.d $w1, 160($sp)
+; MIPS64R5EB-NEXT: addv.d $w0, $w0, $w1
+; MIPS64R5EB-NEXT: sd $8, 248($sp)
+; MIPS64R5EB-NEXT: lbu $1, 249($sp)
+; MIPS64R5EB-NEXT: sh $1, 66($sp)
+; MIPS64R5EB-NEXT: lbu $1, 248($sp)
+; MIPS64R5EB-NEXT: sh $1, 64($sp)
+; MIPS64R5EB-NEXT: ld.h $w1, 64($sp)
+; MIPS64R5EB-NEXT: copy_s.h $1, $w1[0]
+; MIPS64R5EB-NEXT: copy_s.h $2, $w1[1]
+; MIPS64R5EB-NEXT: sw $2, 188($sp)
+; MIPS64R5EB-NEXT: sw $1, 180($sp)
+; MIPS64R5EB-NEXT: ld.d $w1, 176($sp)
+; MIPS64R5EB-NEXT: addv.d $w0, $w0, $w1
+; MIPS64R5EB-NEXT: sd $10, 232($sp)
+; MIPS64R5EB-NEXT: lbu $1, 233($sp)
+; MIPS64R5EB-NEXT: sh $1, 98($sp)
+; MIPS64R5EB-NEXT: lbu $1, 232($sp)
+; MIPS64R5EB-NEXT: sh $1, 96($sp)
+; MIPS64R5EB-NEXT: ld.h $w1, 96($sp)
+; MIPS64R5EB-NEXT: copy_s.h $1, $w1[0]
+; MIPS64R5EB-NEXT: copy_s.h $2, $w1[1]
+; MIPS64R5EB-NEXT: sd $9, 240($sp)
+; MIPS64R5EB-NEXT: lbu $3, 241($sp)
+; MIPS64R5EB-NEXT: sh $3, 82($sp)
+; MIPS64R5EB-NEXT: lbu $3, 240($sp)
+; MIPS64R5EB-NEXT: sh $3, 80($sp)
+; MIPS64R5EB-NEXT: ld.h $w1, 80($sp)
; MIPS64R5EB-NEXT: copy_s.h $3, $w1[0]
; MIPS64R5EB-NEXT: copy_s.h $4, $w1[1]
-; MIPS64R5EB-NEXT: sw $4, 28($sp)
-; MIPS64R5EB-NEXT: sw $3, 20($sp)
-; MIPS64R5EB-NEXT: sw $2, 12($sp)
-; MIPS64R5EB-NEXT: sw $1, 4($sp)
-; MIPS64R5EB-NEXT: ld.d $w1, 16($sp)
-; MIPS64R5EB-NEXT: ld.d $w2, 0($sp)
-; MIPS64R5EB-NEXT: addv.d $w1, $w2, $w1
-; MIPS64R5EB-NEXT: sd $6, 152($sp)
-; MIPS64R5EB-NEXT: lbu $1, 153($sp)
-; MIPS64R5EB-NEXT: lbu $2, 152($sp)
-; MIPS64R5EB-NEXT: move.v $w2, $w0
-; MIPS64R5EB-NEXT: insert.h $w2[0], $2
-; MIPS64R5EB-NEXT: insert.h $w2[1], $1
-; MIPS64R5EB-NEXT: lbu $1, 154($sp)
-; MIPS64R5EB-NEXT: insert.h $w2[2], $1
-; MIPS64R5EB-NEXT: lbu $1, 155($sp)
-; MIPS64R5EB-NEXT: insert.h $w2[3], $1
-; MIPS64R5EB-NEXT: lbu $1, 156($sp)
-; MIPS64R5EB-NEXT: insert.h $w2[4], $1
-; MIPS64R5EB-NEXT: lbu $1, 157($sp)
-; MIPS64R5EB-NEXT: insert.h $w2[5], $1
-; MIPS64R5EB-NEXT: lbu $1, 159($sp)
-; MIPS64R5EB-NEXT: lbu $2, 158($sp)
-; MIPS64R5EB-NEXT: insert.h $w2[6], $2
-; MIPS64R5EB-NEXT: insert.h $w2[7], $1
-; MIPS64R5EB-NEXT: copy_s.h $1, $w2[0]
-; MIPS64R5EB-NEXT: copy_s.h $2, $w2[1]
-; MIPS64R5EB-NEXT: sw $2, 44($sp)
-; MIPS64R5EB-NEXT: sw $1, 36($sp)
-; MIPS64R5EB-NEXT: ld.d $w2, 32($sp)
-; MIPS64R5EB-NEXT: addv.d $w1, $w1, $w2
-; MIPS64R5EB-NEXT: sd $7, 144($sp)
-; MIPS64R5EB-NEXT: lbu $1, 145($sp)
-; MIPS64R5EB-NEXT: lbu $2, 144($sp)
-; MIPS64R5EB-NEXT: move.v $w2, $w0
-; MIPS64R5EB-NEXT: insert.h $w2[0], $2
-; MIPS64R5EB-NEXT: insert.h $w2[1], $1
-; MIPS64R5EB-NEXT: lbu $1, 146($sp)
-; MIPS64R5EB-NEXT: insert.h $w2[2], $1
-; MIPS64R5EB-NEXT: lbu $1, 147($sp)
-; MIPS64R5EB-NEXT: insert.h $w2[3], $1
-; MIPS64R5EB-NEXT: lbu $1, 148($sp)
-; MIPS64R5EB-NEXT: insert.h $w2[4], $1
-; MIPS64R5EB-NEXT: lbu $1, 149($sp)
-; MIPS64R5EB-NEXT: insert.h $w2[5], $1
-; MIPS64R5EB-NEXT: lbu $1, 151($sp)
-; MIPS64R5EB-NEXT: lbu $2, 150($sp)
-; MIPS64R5EB-NEXT: insert.h $w2[6], $2
-; MIPS64R5EB-NEXT: insert.h $w2[7], $1
-; MIPS64R5EB-NEXT: copy_s.h $1, $w2[0]
-; MIPS64R5EB-NEXT: copy_s.h $2, $w2[1]
-; MIPS64R5EB-NEXT: sw $2, 60($sp)
-; MIPS64R5EB-NEXT: sw $1, 52($sp)
-; MIPS64R5EB-NEXT: ld.d $w2, 48($sp)
-; MIPS64R5EB-NEXT: addv.d $w1, $w1, $w2
-; MIPS64R5EB-NEXT: sd $8, 136($sp)
-; MIPS64R5EB-NEXT: lbu $1, 137($sp)
-; MIPS64R5EB-NEXT: lbu $2, 136($sp)
-; MIPS64R5EB-NEXT: move.v $w2, $w0
-; MIPS64R5EB-NEXT: insert.h $w2[0], $2
-; MIPS64R5EB-NEXT: insert.h $w2[1], $1
-; MIPS64R5EB-NEXT: lbu $1, 138($sp)
-; MIPS64R5EB-NEXT: insert.h $w2[2], $1
-; MIPS64R5EB-NEXT: lbu $1, 139($sp)
-; MIPS64R5EB-NEXT: insert.h $w2[3], $1
-; MIPS64R5EB-NEXT: lbu $1, 140($sp)
-; MIPS64R5EB-NEXT: insert.h $w2[4], $1
-; MIPS64R5EB-NEXT: lbu $1, 141($sp)
-; MIPS64R5EB-NEXT: insert.h $w2[5], $1
-; MIPS64R5EB-NEXT: lbu $1, 143($sp)
-; MIPS64R5EB-NEXT: lbu $2, 142($sp)
-; MIPS64R5EB-NEXT: insert.h $w2[6], $2
-; MIPS64R5EB-NEXT: insert.h $w2[7], $1
-; MIPS64R5EB-NEXT: copy_s.h $1, $w2[0]
-; MIPS64R5EB-NEXT: copy_s.h $2, $w2[1]
-; MIPS64R5EB-NEXT: sd $10, 120($sp)
-; MIPS64R5EB-NEXT: lbu $3, 121($sp)
-; MIPS64R5EB-NEXT: lbu $4, 120($sp)
-; MIPS64R5EB-NEXT: move.v $w2, $w0
-; MIPS64R5EB-NEXT: insert.h $w2[0], $4
-; MIPS64R5EB-NEXT: insert.h $w2[1], $3
-; MIPS64R5EB-NEXT: lbu $3, 122($sp)
-; MIPS64R5EB-NEXT: insert.h $w2[2], $3
-; MIPS64R5EB-NEXT: lbu $3, 123($sp)
-; MIPS64R5EB-NEXT: insert.h $w2[3], $3
-; MIPS64R5EB-NEXT: lbu $3, 124($sp)
-; MIPS64R5EB-NEXT: insert.h $w2[4], $3
-; MIPS64R5EB-NEXT: lbu $3, 125($sp)
-; MIPS64R5EB-NEXT: insert.h $w2[5], $3
-; MIPS64R5EB-NEXT: lbu $3, 127($sp)
-; MIPS64R5EB-NEXT: lbu $4, 126($sp)
-; MIPS64R5EB-NEXT: insert.h $w2[6], $4
-; MIPS64R5EB-NEXT: insert.h $w2[7], $3
-; MIPS64R5EB-NEXT: copy_s.h $3, $w2[0]
-; MIPS64R5EB-NEXT: copy_s.h $4, $w2[1]
-; MIPS64R5EB-NEXT: sw $2, 76($sp)
-; MIPS64R5EB-NEXT: sw $1, 68($sp)
-; MIPS64R5EB-NEXT: ld.d $w2, 64($sp)
-; MIPS64R5EB-NEXT: addv.d $w1, $w1, $w2
-; MIPS64R5EB-NEXT: sd $9, 128($sp)
-; MIPS64R5EB-NEXT: lbu $1, 128($sp)
-; MIPS64R5EB-NEXT: insert.h $w0[0], $1
-; MIPS64R5EB-NEXT: lbu $1, 129($sp)
-; MIPS64R5EB-NEXT: insert.h $w0[1], $1
-; MIPS64R5EB-NEXT: lbu $1, 130($sp)
-; MIPS64R5EB-NEXT: insert.h $w0[2], $1
-; MIPS64R5EB-NEXT: lbu $1, 131($sp)
-; MIPS64R5EB-NEXT: insert.h $w0[3], $1
-; MIPS64R5EB-NEXT: lbu $1, 132($sp)
-; MIPS64R5EB-NEXT: insert.h $w0[4], $1
-; MIPS64R5EB-NEXT: lbu $1, 133($sp)
-; MIPS64R5EB-NEXT: insert.h $w0[5], $1
-; MIPS64R5EB-NEXT: lbu $1, 135($sp)
-; MIPS64R5EB-NEXT: lbu $2, 134($sp)
-; MIPS64R5EB-NEXT: insert.h $w0[6], $2
-; MIPS64R5EB-NEXT: insert.h $w0[7], $1
-; MIPS64R5EB-NEXT: copy_s.h $1, $w0[0]
-; MIPS64R5EB-NEXT: copy_s.h $2, $w0[1]
-; MIPS64R5EB-NEXT: sw $2, 92($sp)
-; MIPS64R5EB-NEXT: sw $1, 84($sp)
-; MIPS64R5EB-NEXT: ld.d $w0, 80($sp)
-; MIPS64R5EB-NEXT: addv.d $w0, $w1, $w0
-; MIPS64R5EB-NEXT: sw $4, 108($sp)
-; MIPS64R5EB-NEXT: sw $3, 100($sp)
-; MIPS64R5EB-NEXT: ld.d $w1, 96($sp)
+; MIPS64R5EB-NEXT: sw $4, 204($sp)
+; MIPS64R5EB-NEXT: sw $3, 196($sp)
+; MIPS64R5EB-NEXT: ld.d $w1, 192($sp)
+; MIPS64R5EB-NEXT: addv.d $w0, $w0, $w1
+; MIPS64R5EB-NEXT: sw $2, 220($sp)
+; MIPS64R5EB-NEXT: sw $1, 212($sp)
+; MIPS64R5EB-NEXT: ld.d $w1, 208($sp)
; MIPS64R5EB-NEXT: addv.d $w0, $w0, $w1
; MIPS64R5EB-NEXT: copy_s.d $1, $w0[0]
; MIPS64R5EB-NEXT: copy_s.d $2, $w0[1]
-; MIPS64R5EB-NEXT: sb $2, 117($sp)
-; MIPS64R5EB-NEXT: sb $1, 116($sp)
-; MIPS64R5EB-NEXT: lh $2, 116($sp)
-; MIPS64R5EB-NEXT: daddiu $sp, $sp, 176
+; MIPS64R5EB-NEXT: sb $2, 229($sp)
+; MIPS64R5EB-NEXT: sb $1, 228($sp)
+; MIPS64R5EB-NEXT: lh $2, 228($sp)
+; MIPS64R5EB-NEXT: daddiu $sp, $sp, 288
; MIPS64R5EB-NEXT: jr $ra
; MIPS64R5EB-NEXT: nop
;
@@ -773,181 +641,97 @@ define <2 x i8> @i8x2_7(<2 x i8> %a, <2
;
; MIPS64R5EL-LABEL: i8x2_7:
; MIPS64R5EL: # %bb.0: # %entry
-; MIPS64R5EL-NEXT: daddiu $sp, $sp, -176
-; MIPS64R5EL-NEXT: .cfi_def_cfa_offset 176
-; MIPS64R5EL-NEXT: sd $4, 168($sp)
-; MIPS64R5EL-NEXT: ldi.b $w0, 0
-; MIPS64R5EL-NEXT: lbu $1, 169($sp)
-; MIPS64R5EL-NEXT: lbu $2, 168($sp)
-; MIPS64R5EL-NEXT: move.v $w1, $w0
-; MIPS64R5EL-NEXT: insert.h $w1[0], $2
-; MIPS64R5EL-NEXT: insert.h $w1[1], $1
-; MIPS64R5EL-NEXT: lbu $1, 170($sp)
-; MIPS64R5EL-NEXT: insert.h $w1[2], $1
-; MIPS64R5EL-NEXT: lbu $1, 171($sp)
-; MIPS64R5EL-NEXT: insert.h $w1[3], $1
-; MIPS64R5EL-NEXT: lbu $1, 172($sp)
-; MIPS64R5EL-NEXT: insert.h $w1[4], $1
-; MIPS64R5EL-NEXT: lbu $1, 173($sp)
-; MIPS64R5EL-NEXT: insert.h $w1[5], $1
-; MIPS64R5EL-NEXT: lbu $1, 175($sp)
-; MIPS64R5EL-NEXT: lbu $2, 174($sp)
-; MIPS64R5EL-NEXT: insert.h $w1[6], $2
-; MIPS64R5EL-NEXT: insert.h $w1[7], $1
+; MIPS64R5EL-NEXT: daddiu $sp, $sp, -288
+; MIPS64R5EL-NEXT: .cfi_def_cfa_offset 288
+; MIPS64R5EL-NEXT: sd $4, 280($sp)
+; MIPS64R5EL-NEXT: lbu $1, 281($sp)
+; MIPS64R5EL-NEXT: sh $1, 2($sp)
+; MIPS64R5EL-NEXT: lbu $1, 280($sp)
+; MIPS64R5EL-NEXT: sh $1, 0($sp)
+; MIPS64R5EL-NEXT: ld.h $w0, 0($sp)
+; MIPS64R5EL-NEXT: copy_s.h $1, $w0[0]
+; MIPS64R5EL-NEXT: copy_s.h $2, $w0[1]
+; MIPS64R5EL-NEXT: sd $5, 272($sp)
+; MIPS64R5EL-NEXT: lbu $3, 273($sp)
+; MIPS64R5EL-NEXT: sh $3, 18($sp)
+; MIPS64R5EL-NEXT: lbu $3, 272($sp)
+; MIPS64R5EL-NEXT: sh $3, 16($sp)
+; MIPS64R5EL-NEXT: ld.h $w0, 16($sp)
+; MIPS64R5EL-NEXT: copy_s.h $3, $w0[0]
+; MIPS64R5EL-NEXT: copy_s.h $4, $w0[1]
+; MIPS64R5EL-NEXT: sw $4, 136($sp)
+; MIPS64R5EL-NEXT: sw $3, 128($sp)
+; MIPS64R5EL-NEXT: sw $2, 120($sp)
+; MIPS64R5EL-NEXT: sw $1, 112($sp)
+; MIPS64R5EL-NEXT: ld.d $w0, 128($sp)
+; MIPS64R5EL-NEXT: ld.d $w1, 112($sp)
+; MIPS64R5EL-NEXT: addv.d $w0, $w1, $w0
+; MIPS64R5EL-NEXT: sd $6, 264($sp)
+; MIPS64R5EL-NEXT: lbu $1, 265($sp)
+; MIPS64R5EL-NEXT: sh $1, 34($sp)
+; MIPS64R5EL-NEXT: lbu $1, 264($sp)
+; MIPS64R5EL-NEXT: sh $1, 32($sp)
+; MIPS64R5EL-NEXT: ld.h $w1, 32($sp)
; MIPS64R5EL-NEXT: copy_s.h $1, $w1[0]
; MIPS64R5EL-NEXT: copy_s.h $2, $w1[1]
-; MIPS64R5EL-NEXT: sd $5, 160($sp)
-; MIPS64R5EL-NEXT: lbu $3, 161($sp)
-; MIPS64R5EL-NEXT: lbu $4, 160($sp)
-; MIPS64R5EL-NEXT: move.v $w1, $w0
-; MIPS64R5EL-NEXT: insert.h $w1[0], $4
-; MIPS64R5EL-NEXT: insert.h $w1[1], $3
-; MIPS64R5EL-NEXT: lbu $3, 162($sp)
-; MIPS64R5EL-NEXT: insert.h $w1[2], $3
-; MIPS64R5EL-NEXT: lbu $3, 163($sp)
-; MIPS64R5EL-NEXT: insert.h $w1[3], $3
-; MIPS64R5EL-NEXT: lbu $3, 164($sp)
-; MIPS64R5EL-NEXT: insert.h $w1[4], $3
-; MIPS64R5EL-NEXT: lbu $3, 165($sp)
-; MIPS64R5EL-NEXT: insert.h $w1[5], $3
-; MIPS64R5EL-NEXT: lbu $3, 167($sp)
-; MIPS64R5EL-NEXT: lbu $4, 166($sp)
-; MIPS64R5EL-NEXT: insert.h $w1[6], $4
-; MIPS64R5EL-NEXT: insert.h $w1[7], $3
+; MIPS64R5EL-NEXT: sw $2, 152($sp)
+; MIPS64R5EL-NEXT: sw $1, 144($sp)
+; MIPS64R5EL-NEXT: ld.d $w1, 144($sp)
+; MIPS64R5EL-NEXT: addv.d $w0, $w0, $w1
+; MIPS64R5EL-NEXT: sd $7, 256($sp)
+; MIPS64R5EL-NEXT: lbu $1, 257($sp)
+; MIPS64R5EL-NEXT: sh $1, 50($sp)
+; MIPS64R5EL-NEXT: lbu $1, 256($sp)
+; MIPS64R5EL-NEXT: sh $1, 48($sp)
+; MIPS64R5EL-NEXT: ld.h $w1, 48($sp)
+; MIPS64R5EL-NEXT: copy_s.h $1, $w1[0]
+; MIPS64R5EL-NEXT: copy_s.h $2, $w1[1]
+; MIPS64R5EL-NEXT: sw $2, 168($sp)
+; MIPS64R5EL-NEXT: sw $1, 160($sp)
+; MIPS64R5EL-NEXT: ld.d $w1, 160($sp)
+; MIPS64R5EL-NEXT: addv.d $w0, $w0, $w1
+; MIPS64R5EL-NEXT: sd $8, 248($sp)
+; MIPS64R5EL-NEXT: lbu $1, 249($sp)
+; MIPS64R5EL-NEXT: sh $1, 66($sp)
+; MIPS64R5EL-NEXT: lbu $1, 248($sp)
+; MIPS64R5EL-NEXT: sh $1, 64($sp)
+; MIPS64R5EL-NEXT: ld.h $w1, 64($sp)
+; MIPS64R5EL-NEXT: copy_s.h $1, $w1[0]
+; MIPS64R5EL-NEXT: copy_s.h $2, $w1[1]
+; MIPS64R5EL-NEXT: sw $2, 184($sp)
+; MIPS64R5EL-NEXT: sw $1, 176($sp)
+; MIPS64R5EL-NEXT: ld.d $w1, 176($sp)
+; MIPS64R5EL-NEXT: addv.d $w0, $w0, $w1
+; MIPS64R5EL-NEXT: sd $10, 232($sp)
+; MIPS64R5EL-NEXT: lbu $1, 233($sp)
+; MIPS64R5EL-NEXT: sh $1, 98($sp)
+; MIPS64R5EL-NEXT: lbu $1, 232($sp)
+; MIPS64R5EL-NEXT: sh $1, 96($sp)
+; MIPS64R5EL-NEXT: ld.h $w1, 96($sp)
+; MIPS64R5EL-NEXT: copy_s.h $1, $w1[0]
+; MIPS64R5EL-NEXT: copy_s.h $2, $w1[1]
+; MIPS64R5EL-NEXT: sd $9, 240($sp)
+; MIPS64R5EL-NEXT: lbu $3, 241($sp)
+; MIPS64R5EL-NEXT: sh $3, 82($sp)
+; MIPS64R5EL-NEXT: lbu $3, 240($sp)
+; MIPS64R5EL-NEXT: sh $3, 80($sp)
+; MIPS64R5EL-NEXT: ld.h $w1, 80($sp)
; MIPS64R5EL-NEXT: copy_s.h $3, $w1[0]
; MIPS64R5EL-NEXT: copy_s.h $4, $w1[1]
-; MIPS64R5EL-NEXT: sw $4, 24($sp)
-; MIPS64R5EL-NEXT: sw $3, 16($sp)
-; MIPS64R5EL-NEXT: sw $2, 8($sp)
-; MIPS64R5EL-NEXT: sw $1, 0($sp)
-; MIPS64R5EL-NEXT: ld.d $w1, 16($sp)
-; MIPS64R5EL-NEXT: ld.d $w2, 0($sp)
-; MIPS64R5EL-NEXT: addv.d $w1, $w2, $w1
-; MIPS64R5EL-NEXT: sd $6, 152($sp)
-; MIPS64R5EL-NEXT: lbu $1, 153($sp)
-; MIPS64R5EL-NEXT: lbu $2, 152($sp)
-; MIPS64R5EL-NEXT: move.v $w2, $w0
-; MIPS64R5EL-NEXT: insert.h $w2[0], $2
-; MIPS64R5EL-NEXT: insert.h $w2[1], $1
-; MIPS64R5EL-NEXT: lbu $1, 154($sp)
-; MIPS64R5EL-NEXT: insert.h $w2[2], $1
-; MIPS64R5EL-NEXT: lbu $1, 155($sp)
-; MIPS64R5EL-NEXT: insert.h $w2[3], $1
-; MIPS64R5EL-NEXT: lbu $1, 156($sp)
-; MIPS64R5EL-NEXT: insert.h $w2[4], $1
-; MIPS64R5EL-NEXT: lbu $1, 157($sp)
-; MIPS64R5EL-NEXT: insert.h $w2[5], $1
-; MIPS64R5EL-NEXT: lbu $1, 159($sp)
-; MIPS64R5EL-NEXT: lbu $2, 158($sp)
-; MIPS64R5EL-NEXT: insert.h $w2[6], $2
-; MIPS64R5EL-NEXT: insert.h $w2[7], $1
-; MIPS64R5EL-NEXT: copy_s.h $1, $w2[0]
-; MIPS64R5EL-NEXT: copy_s.h $2, $w2[1]
-; MIPS64R5EL-NEXT: sw $2, 40($sp)
-; MIPS64R5EL-NEXT: sw $1, 32($sp)
-; MIPS64R5EL-NEXT: ld.d $w2, 32($sp)
-; MIPS64R5EL-NEXT: addv.d $w1, $w1, $w2
-; MIPS64R5EL-NEXT: sd $7, 144($sp)
-; MIPS64R5EL-NEXT: lbu $1, 145($sp)
-; MIPS64R5EL-NEXT: lbu $2, 144($sp)
-; MIPS64R5EL-NEXT: move.v $w2, $w0
-; MIPS64R5EL-NEXT: insert.h $w2[0], $2
-; MIPS64R5EL-NEXT: insert.h $w2[1], $1
-; MIPS64R5EL-NEXT: lbu $1, 146($sp)
-; MIPS64R5EL-NEXT: insert.h $w2[2], $1
-; MIPS64R5EL-NEXT: lbu $1, 147($sp)
-; MIPS64R5EL-NEXT: insert.h $w2[3], $1
-; MIPS64R5EL-NEXT: lbu $1, 148($sp)
-; MIPS64R5EL-NEXT: insert.h $w2[4], $1
-; MIPS64R5EL-NEXT: lbu $1, 149($sp)
-; MIPS64R5EL-NEXT: insert.h $w2[5], $1
-; MIPS64R5EL-NEXT: lbu $1, 151($sp)
-; MIPS64R5EL-NEXT: lbu $2, 150($sp)
-; MIPS64R5EL-NEXT: insert.h $w2[6], $2
-; MIPS64R5EL-NEXT: insert.h $w2[7], $1
-; MIPS64R5EL-NEXT: copy_s.h $1, $w2[0]
-; MIPS64R5EL-NEXT: copy_s.h $2, $w2[1]
-; MIPS64R5EL-NEXT: sw $2, 56($sp)
-; MIPS64R5EL-NEXT: sw $1, 48($sp)
-; MIPS64R5EL-NEXT: ld.d $w2, 48($sp)
-; MIPS64R5EL-NEXT: addv.d $w1, $w1, $w2
-; MIPS64R5EL-NEXT: sd $8, 136($sp)
-; MIPS64R5EL-NEXT: lbu $1, 137($sp)
-; MIPS64R5EL-NEXT: lbu $2, 136($sp)
-; MIPS64R5EL-NEXT: move.v $w2, $w0
-; MIPS64R5EL-NEXT: insert.h $w2[0], $2
-; MIPS64R5EL-NEXT: insert.h $w2[1], $1
-; MIPS64R5EL-NEXT: lbu $1, 138($sp)
-; MIPS64R5EL-NEXT: insert.h $w2[2], $1
-; MIPS64R5EL-NEXT: lbu $1, 139($sp)
-; MIPS64R5EL-NEXT: insert.h $w2[3], $1
-; MIPS64R5EL-NEXT: lbu $1, 140($sp)
-; MIPS64R5EL-NEXT: insert.h $w2[4], $1
-; MIPS64R5EL-NEXT: lbu $1, 141($sp)
-; MIPS64R5EL-NEXT: insert.h $w2[5], $1
-; MIPS64R5EL-NEXT: lbu $1, 143($sp)
-; MIPS64R5EL-NEXT: lbu $2, 142($sp)
-; MIPS64R5EL-NEXT: insert.h $w2[6], $2
-; MIPS64R5EL-NEXT: insert.h $w2[7], $1
-; MIPS64R5EL-NEXT: copy_s.h $1, $w2[0]
-; MIPS64R5EL-NEXT: copy_s.h $2, $w2[1]
-; MIPS64R5EL-NEXT: sd $10, 120($sp)
-; MIPS64R5EL-NEXT: lbu $3, 121($sp)
-; MIPS64R5EL-NEXT: lbu $4, 120($sp)
-; MIPS64R5EL-NEXT: move.v $w2, $w0
-; MIPS64R5EL-NEXT: insert.h $w2[0], $4
-; MIPS64R5EL-NEXT: insert.h $w2[1], $3
-; MIPS64R5EL-NEXT: lbu $3, 122($sp)
-; MIPS64R5EL-NEXT: insert.h $w2[2], $3
-; MIPS64R5EL-NEXT: lbu $3, 123($sp)
-; MIPS64R5EL-NEXT: insert.h $w2[3], $3
-; MIPS64R5EL-NEXT: lbu $3, 124($sp)
-; MIPS64R5EL-NEXT: insert.h $w2[4], $3
-; MIPS64R5EL-NEXT: lbu $3, 125($sp)
-; MIPS64R5EL-NEXT: insert.h $w2[5], $3
-; MIPS64R5EL-NEXT: lbu $3, 127($sp)
-; MIPS64R5EL-NEXT: lbu $4, 126($sp)
-; MIPS64R5EL-NEXT: insert.h $w2[6], $4
-; MIPS64R5EL-NEXT: insert.h $w2[7], $3
-; MIPS64R5EL-NEXT: copy_s.h $3, $w2[0]
-; MIPS64R5EL-NEXT: copy_s.h $4, $w2[1]
-; MIPS64R5EL-NEXT: sw $2, 72($sp)
-; MIPS64R5EL-NEXT: sw $1, 64($sp)
-; MIPS64R5EL-NEXT: ld.d $w2, 64($sp)
-; MIPS64R5EL-NEXT: addv.d $w1, $w1, $w2
-; MIPS64R5EL-NEXT: sd $9, 128($sp)
-; MIPS64R5EL-NEXT: lbu $1, 128($sp)
-; MIPS64R5EL-NEXT: insert.h $w0[0], $1
-; MIPS64R5EL-NEXT: lbu $1, 129($sp)
-; MIPS64R5EL-NEXT: insert.h $w0[1], $1
-; MIPS64R5EL-NEXT: lbu $1, 130($sp)
-; MIPS64R5EL-NEXT: insert.h $w0[2], $1
-; MIPS64R5EL-NEXT: lbu $1, 131($sp)
-; MIPS64R5EL-NEXT: insert.h $w0[3], $1
-; MIPS64R5EL-NEXT: lbu $1, 132($sp)
-; MIPS64R5EL-NEXT: insert.h $w0[4], $1
-; MIPS64R5EL-NEXT: lbu $1, 133($sp)
-; MIPS64R5EL-NEXT: insert.h $w0[5], $1
-; MIPS64R5EL-NEXT: lbu $1, 135($sp)
-; MIPS64R5EL-NEXT: lbu $2, 134($sp)
-; MIPS64R5EL-NEXT: insert.h $w0[6], $2
-; MIPS64R5EL-NEXT: insert.h $w0[7], $1
-; MIPS64R5EL-NEXT: copy_s.h $1, $w0[0]
-; MIPS64R5EL-NEXT: copy_s.h $2, $w0[1]
-; MIPS64R5EL-NEXT: sw $2, 88($sp)
-; MIPS64R5EL-NEXT: sw $1, 80($sp)
-; MIPS64R5EL-NEXT: ld.d $w0, 80($sp)
-; MIPS64R5EL-NEXT: addv.d $w0, $w1, $w0
-; MIPS64R5EL-NEXT: sw $4, 104($sp)
-; MIPS64R5EL-NEXT: sw $3, 96($sp)
-; MIPS64R5EL-NEXT: ld.d $w1, 96($sp)
+; MIPS64R5EL-NEXT: sw $4, 200($sp)
+; MIPS64R5EL-NEXT: sw $3, 192($sp)
+; MIPS64R5EL-NEXT: ld.d $w1, 192($sp)
+; MIPS64R5EL-NEXT: addv.d $w0, $w0, $w1
+; MIPS64R5EL-NEXT: sw $2, 216($sp)
+; MIPS64R5EL-NEXT: sw $1, 208($sp)
+; MIPS64R5EL-NEXT: ld.d $w1, 208($sp)
; MIPS64R5EL-NEXT: addv.d $w0, $w0, $w1
; MIPS64R5EL-NEXT: copy_s.d $1, $w0[0]
; MIPS64R5EL-NEXT: copy_s.d $2, $w0[1]
-; MIPS64R5EL-NEXT: sb $2, 117($sp)
-; MIPS64R5EL-NEXT: sb $1, 116($sp)
-; MIPS64R5EL-NEXT: lh $2, 116($sp)
-; MIPS64R5EL-NEXT: daddiu $sp, $sp, 176
+; MIPS64R5EL-NEXT: sb $2, 229($sp)
+; MIPS64R5EL-NEXT: sb $1, 228($sp)
+; MIPS64R5EL-NEXT: lh $2, 228($sp)
+; MIPS64R5EL-NEXT: daddiu $sp, $sp, 288
; MIPS64R5EL-NEXT: jr $ra
; MIPS64R5EL-NEXT: nop
entry:
@@ -3768,55 +3552,43 @@ define void @call_i8_2() {
;
; MIPS64R5EB-LABEL: call_i8_2:
; MIPS64R5EB: # %bb.0: # %entry
-; MIPS64R5EB-NEXT: daddiu $sp, $sp, -48
-; MIPS64R5EB-NEXT: .cfi_def_cfa_offset 48
-; MIPS64R5EB-NEXT: sd $ra, 40($sp) # 8-byte Folded Spill
-; MIPS64R5EB-NEXT: sd $gp, 32($sp) # 8-byte Folded Spill
+; MIPS64R5EB-NEXT: daddiu $sp, $sp, -64
+; MIPS64R5EB-NEXT: .cfi_def_cfa_offset 64
+; MIPS64R5EB-NEXT: sd $ra, 56($sp) # 8-byte Folded Spill
+; MIPS64R5EB-NEXT: sd $gp, 48($sp) # 8-byte Folded Spill
; MIPS64R5EB-NEXT: .cfi_offset 31, -8
; MIPS64R5EB-NEXT: .cfi_offset 28, -16
; MIPS64R5EB-NEXT: lui $1, %hi(%neg(%gp_rel(call_i8_2)))
; MIPS64R5EB-NEXT: daddu $1, $1, $25
; MIPS64R5EB-NEXT: daddiu $gp, $1, %lo(%neg(%gp_rel(call_i8_2)))
; MIPS64R5EB-NEXT: addiu $1, $zero, 1543
-; MIPS64R5EB-NEXT: sh $1, 24($sp)
+; MIPS64R5EB-NEXT: sh $1, 40($sp)
; MIPS64R5EB-NEXT: addiu $1, $zero, 3080
-; MIPS64R5EB-NEXT: sh $1, 28($sp)
+; MIPS64R5EB-NEXT: sh $1, 44($sp)
; MIPS64R5EB-NEXT: ld $25, %call16(i8_2)($gp)
-; MIPS64R5EB-NEXT: lh $4, 24($sp)
-; MIPS64R5EB-NEXT: lh $5, 28($sp)
+; MIPS64R5EB-NEXT: lh $4, 40($sp)
+; MIPS64R5EB-NEXT: lh $5, 44($sp)
; MIPS64R5EB-NEXT: jalr $25
; MIPS64R5EB-NEXT: nop
-; MIPS64R5EB-NEXT: sd $2, 16($sp)
-; MIPS64R5EB-NEXT: ldi.b $w0, 0
-; MIPS64R5EB-NEXT: lbu $1, 16($sp)
-; MIPS64R5EB-NEXT: insert.h $w0[0], $1
-; MIPS64R5EB-NEXT: lbu $1, 17($sp)
-; MIPS64R5EB-NEXT: insert.h $w0[1], $1
-; MIPS64R5EB-NEXT: lbu $1, 18($sp)
-; MIPS64R5EB-NEXT: insert.h $w0[2], $1
-; MIPS64R5EB-NEXT: lbu $1, 19($sp)
-; MIPS64R5EB-NEXT: insert.h $w0[3], $1
-; MIPS64R5EB-NEXT: lbu $1, 20($sp)
-; MIPS64R5EB-NEXT: insert.h $w0[4], $1
-; MIPS64R5EB-NEXT: lbu $1, 21($sp)
-; MIPS64R5EB-NEXT: insert.h $w0[5], $1
-; MIPS64R5EB-NEXT: lbu $1, 23($sp)
-; MIPS64R5EB-NEXT: lbu $2, 22($sp)
-; MIPS64R5EB-NEXT: insert.h $w0[6], $2
-; MIPS64R5EB-NEXT: insert.h $w0[7], $1
+; MIPS64R5EB-NEXT: sd $2, 32($sp)
+; MIPS64R5EB-NEXT: lbu $1, 33($sp)
+; MIPS64R5EB-NEXT: sh $1, 2($sp)
+; MIPS64R5EB-NEXT: lbu $1, 32($sp)
+; MIPS64R5EB-NEXT: sh $1, 0($sp)
+; MIPS64R5EB-NEXT: ld.h $w0, 0($sp)
; MIPS64R5EB-NEXT: copy_s.h $1, $w0[0]
; MIPS64R5EB-NEXT: copy_s.h $2, $w0[1]
-; MIPS64R5EB-NEXT: sw $2, 12($sp)
-; MIPS64R5EB-NEXT: sw $1, 4($sp)
-; MIPS64R5EB-NEXT: ld.d $w0, 0($sp)
+; MIPS64R5EB-NEXT: sw $2, 28($sp)
+; MIPS64R5EB-NEXT: sw $1, 20($sp)
+; MIPS64R5EB-NEXT: ld.d $w0, 16($sp)
; MIPS64R5EB-NEXT: copy_s.d $1, $w0[0]
; MIPS64R5EB-NEXT: copy_s.d $2, $w0[1]
; MIPS64R5EB-NEXT: ld $3, %got_disp(gv2i8)($gp)
; MIPS64R5EB-NEXT: sb $2, 1($3)
; MIPS64R5EB-NEXT: sb $1, 0($3)
-; MIPS64R5EB-NEXT: ld $gp, 32($sp) # 8-byte Folded Reload
-; MIPS64R5EB-NEXT: ld $ra, 40($sp) # 8-byte Folded Reload
-; MIPS64R5EB-NEXT: daddiu $sp, $sp, 48
+; MIPS64R5EB-NEXT: ld $gp, 48($sp) # 8-byte Folded Reload
+; MIPS64R5EB-NEXT: ld $ra, 56($sp) # 8-byte Folded Reload
+; MIPS64R5EB-NEXT: daddiu $sp, $sp, 64
; MIPS64R5EB-NEXT: jr $ra
; MIPS64R5EB-NEXT: nop
;
@@ -3892,55 +3664,43 @@ define void @call_i8_2() {
;
; MIPS64R5EL-LABEL: call_i8_2:
; MIPS64R5EL: # %bb.0: # %entry
-; MIPS64R5EL-NEXT: daddiu $sp, $sp, -48
-; MIPS64R5EL-NEXT: .cfi_def_cfa_offset 48
-; MIPS64R5EL-NEXT: sd $ra, 40($sp) # 8-byte Folded Spill
-; MIPS64R5EL-NEXT: sd $gp, 32($sp) # 8-byte Folded Spill
+; MIPS64R5EL-NEXT: daddiu $sp, $sp, -64
+; MIPS64R5EL-NEXT: .cfi_def_cfa_offset 64
+; MIPS64R5EL-NEXT: sd $ra, 56($sp) # 8-byte Folded Spill
+; MIPS64R5EL-NEXT: sd $gp, 48($sp) # 8-byte Folded Spill
; MIPS64R5EL-NEXT: .cfi_offset 31, -8
; MIPS64R5EL-NEXT: .cfi_offset 28, -16
; MIPS64R5EL-NEXT: lui $1, %hi(%neg(%gp_rel(call_i8_2)))
; MIPS64R5EL-NEXT: daddu $1, $1, $25
; MIPS64R5EL-NEXT: daddiu $gp, $1, %lo(%neg(%gp_rel(call_i8_2)))
; MIPS64R5EL-NEXT: addiu $1, $zero, 1798
-; MIPS64R5EL-NEXT: sh $1, 24($sp)
+; MIPS64R5EL-NEXT: sh $1, 40($sp)
; MIPS64R5EL-NEXT: addiu $1, $zero, 2060
-; MIPS64R5EL-NEXT: sh $1, 28($sp)
+; MIPS64R5EL-NEXT: sh $1, 44($sp)
; MIPS64R5EL-NEXT: ld $25, %call16(i8_2)($gp)
-; MIPS64R5EL-NEXT: lh $4, 24($sp)
-; MIPS64R5EL-NEXT: lh $5, 28($sp)
+; MIPS64R5EL-NEXT: lh $4, 40($sp)
+; MIPS64R5EL-NEXT: lh $5, 44($sp)
; MIPS64R5EL-NEXT: jalr $25
; MIPS64R5EL-NEXT: nop
-; MIPS64R5EL-NEXT: sd $2, 16($sp)
-; MIPS64R5EL-NEXT: ldi.b $w0, 0
-; MIPS64R5EL-NEXT: lbu $1, 16($sp)
-; MIPS64R5EL-NEXT: insert.h $w0[0], $1
-; MIPS64R5EL-NEXT: lbu $1, 17($sp)
-; MIPS64R5EL-NEXT: insert.h $w0[1], $1
-; MIPS64R5EL-NEXT: lbu $1, 18($sp)
-; MIPS64R5EL-NEXT: insert.h $w0[2], $1
-; MIPS64R5EL-NEXT: lbu $1, 19($sp)
-; MIPS64R5EL-NEXT: insert.h $w0[3], $1
-; MIPS64R5EL-NEXT: lbu $1, 20($sp)
-; MIPS64R5EL-NEXT: insert.h $w0[4], $1
-; MIPS64R5EL-NEXT: lbu $1, 21($sp)
-; MIPS64R5EL-NEXT: insert.h $w0[5], $1
-; MIPS64R5EL-NEXT: lbu $1, 23($sp)
-; MIPS64R5EL-NEXT: lbu $2, 22($sp)
-; MIPS64R5EL-NEXT: insert.h $w0[6], $2
-; MIPS64R5EL-NEXT: insert.h $w0[7], $1
+; MIPS64R5EL-NEXT: sd $2, 32($sp)
+; MIPS64R5EL-NEXT: lbu $1, 33($sp)
+; MIPS64R5EL-NEXT: sh $1, 2($sp)
+; MIPS64R5EL-NEXT: lbu $1, 32($sp)
+; MIPS64R5EL-NEXT: sh $1, 0($sp)
+; MIPS64R5EL-NEXT: ld.h $w0, 0($sp)
; MIPS64R5EL-NEXT: copy_s.h $1, $w0[0]
; MIPS64R5EL-NEXT: copy_s.h $2, $w0[1]
-; MIPS64R5EL-NEXT: sw $2, 8($sp)
-; MIPS64R5EL-NEXT: sw $1, 0($sp)
-; MIPS64R5EL-NEXT: ld.d $w0, 0($sp)
+; MIPS64R5EL-NEXT: sw $2, 24($sp)
+; MIPS64R5EL-NEXT: sw $1, 16($sp)
+; MIPS64R5EL-NEXT: ld.d $w0, 16($sp)
; MIPS64R5EL-NEXT: copy_s.d $1, $w0[0]
; MIPS64R5EL-NEXT: copy_s.d $2, $w0[1]
; MIPS64R5EL-NEXT: ld $3, %got_disp(gv2i8)($gp)
; MIPS64R5EL-NEXT: sb $2, 1($3)
; MIPS64R5EL-NEXT: sb $1, 0($3)
-; MIPS64R5EL-NEXT: ld $gp, 32($sp) # 8-byte Folded Reload
-; MIPS64R5EL-NEXT: ld $ra, 40($sp) # 8-byte Folded Reload
-; MIPS64R5EL-NEXT: daddiu $sp, $sp, 48
+; MIPS64R5EL-NEXT: ld $gp, 48($sp) # 8-byte Folded Reload
+; MIPS64R5EL-NEXT: ld $ra, 56($sp) # 8-byte Folded Reload
+; MIPS64R5EL-NEXT: daddiu $sp, $sp, 64
; MIPS64R5EL-NEXT: jr $ra
; MIPS64R5EL-NEXT: nop
entry:
Modified: llvm/trunk/test/CodeGen/X86/dagcombine-cse.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/dagcombine-cse.ll?rev=337258&r1=337257&r2=337258&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/dagcombine-cse.ll (original)
+++ llvm/trunk/test/CodeGen/X86/dagcombine-cse.ll Tue Jul 17 02:45:35 2018
@@ -31,7 +31,6 @@ define i32 @t(i8* %ref_frame_ptr, i32 %r
; X64-NEXT: shlq $32, %rcx
; X64-NEXT: orq %rax, %rcx
; X64-NEXT: movq %rcx, %xmm0
-; X64-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,1,2,4,5,6,7]
; X64-NEXT: movd %xmm0, %eax
; X64-NEXT: retq
Modified: llvm/trunk/test/CodeGen/X86/extractelement-load.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/extractelement-load.ll?rev=337258&r1=337257&r2=337258&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/extractelement-load.ll (original)
+++ llvm/trunk/test/CodeGen/X86/extractelement-load.ll Tue Jul 17 02:45:35 2018
@@ -85,8 +85,7 @@ define i64 @t4(<2 x double>* %a) {
; X32-SSE2-LABEL: t4:
; X32-SSE2: # %bb.0:
; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-SSE2-NEXT: movapd (%eax), %xmm0
-; X32-SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
+; X32-SSE2-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
; X32-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; X32-SSE2-NEXT: movd %xmm1, %eax
; X32-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
Modified: llvm/trunk/test/CodeGen/X86/known-bits-vector.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/known-bits-vector.ll?rev=337258&r1=337257&r2=337258&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/known-bits-vector.ll (original)
+++ llvm/trunk/test/CodeGen/X86/known-bits-vector.ll Tue Jul 17 02:45:35 2018
@@ -24,10 +24,9 @@ define float @knownbits_mask_extract_uit
; X32-LABEL: knownbits_mask_extract_uitofp:
; X32: # %bb.0:
; X32-NEXT: pushl %eax
-; X32-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; X32-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3],xmm0[4,5,6,7]
+; X32-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
; X32-NEXT: vmovd %xmm0, %eax
-; X32-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm0
+; X32-NEXT: vcvtsi2ssl %eax, %xmm1, %xmm0
; X32-NEXT: vmovss %xmm0, (%esp)
; X32-NEXT: flds (%esp)
; X32-NEXT: popl %eax
@@ -35,10 +34,9 @@ define float @knownbits_mask_extract_uit
;
; X64-LABEL: knownbits_mask_extract_uitofp:
; X64: # %bb.0:
-; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; X64-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3],xmm0[4,5,6,7]
+; X64-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
; X64-NEXT: vmovq %xmm0, %rax
-; X64-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm0
+; X64-NEXT: vcvtsi2ssl %eax, %xmm1, %xmm0
; X64-NEXT: retq
%1 = and <2 x i64> %a0, <i64 65535, i64 -1>
%2 = extractelement <2 x i64> %1, i32 0
Modified: llvm/trunk/test/CodeGen/X86/oddshuffles.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/oddshuffles.ll?rev=337258&r1=337257&r2=337258&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/oddshuffles.ll (original)
+++ llvm/trunk/test/CodeGen/X86/oddshuffles.ll Tue Jul 17 02:45:35 2018
@@ -68,41 +68,29 @@ define void @v3f64(<2 x double> %a, <2 x
define void @v3i32(<2 x i32> %a, <2 x i32> %b, <3 x i32>* %p) nounwind {
; SSE2-LABEL: v3i32:
; SSE2: # %bb.0:
-; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,2,2,3]
-; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; SSE2-NEXT: movd %xmm0, 8(%rdi)
-; SSE2-NEXT: movq %xmm2, (%rdi)
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT: movd %xmm2, 8(%rdi)
+; SSE2-NEXT: movq %xmm0, (%rdi)
; SSE2-NEXT: retq
;
; SSE42-LABEL: v3i32:
; SSE42: # %bb.0:
-; SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
-; SSE42-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
-; SSE42-NEXT: pextrd $2, %xmm0, 8(%rdi)
-; SSE42-NEXT: movq %xmm1, (%rdi)
+; SSE42-NEXT: extractps $2, %xmm0, 8(%rdi)
+; SSE42-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE42-NEXT: movlps %xmm0, (%rdi)
; SSE42-NEXT: retq
;
-; AVX1-LABEL: v3i32:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,0,1,1]
-; AVX1-NEXT: vblendps {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2,3]
-; AVX1-NEXT: vextractps $2, %xmm0, 8(%rdi)
-; AVX1-NEXT: vmovlps %xmm1, (%rdi)
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: v3i32:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vbroadcastss %xmm1, %xmm1
-; AVX2-NEXT: vblendps {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2,3]
-; AVX2-NEXT: vextractps $2, %xmm0, 8(%rdi)
-; AVX2-NEXT: vmovlps %xmm1, (%rdi)
-; AVX2-NEXT: retq
+; AVX-LABEL: v3i32:
+; AVX: # %bb.0:
+; AVX-NEXT: vunpcklps {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; AVX-NEXT: vextractps $2, %xmm0, 8(%rdi)
+; AVX-NEXT: vmovlps %xmm1, (%rdi)
+; AVX-NEXT: retq
;
; XOP-LABEL: v3i32:
; XOP: # %bb.0:
-; XOP-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,0,1,1]
-; XOP-NEXT: vblendps {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2,3]
+; XOP-NEXT: vunpcklps {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; XOP-NEXT: vextractps $2, %xmm0, 8(%rdi)
; XOP-NEXT: vmovlps %xmm1, (%rdi)
; XOP-NEXT: retq
@@ -114,10 +102,9 @@ define void @v3i32(<2 x i32> %a, <2 x i3
define void @v5i16(<4 x i16> %a, <4 x i16> %b, <5 x i16>* %p) nounwind {
; SSE2-LABEL: v5i16:
; SSE2: # %bb.0:
+; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm0[0,2,2,3,4,5,6,7]
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,1,2,3]
; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[2,0,2,3,4,5,6,7]
-; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm0[0,2,2,3,4,5,6,7]
-; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,3,2,3]
; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
; SSE2-NEXT: pextrw $6, %xmm0, %eax
; SSE2-NEXT: movw %ax, 8(%rdi)
@@ -126,10 +113,9 @@ define void @v5i16(<4 x i16> %a, <4 x i1
;
; SSE42-LABEL: v5i16:
; SSE42: # %bb.0:
+; SSE42-NEXT: pshuflw {{.*#+}} xmm2 = xmm0[0,2,2,3,4,5,6,7]
; SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,1,2,3]
; SSE42-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[2,0,2,3,4,5,6,7]
-; SSE42-NEXT: pshuflw {{.*#+}} xmm2 = xmm0[0,2,2,3,4,5,6,7]
-; SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,3,2,3]
; SSE42-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
; SSE42-NEXT: pextrw $6, %xmm0, 8(%rdi)
; SSE42-NEXT: movq %xmm2, (%rdi)
@@ -137,10 +123,9 @@ define void @v5i16(<4 x i16> %a, <4 x i1
;
; AVX1-LABEL: v5i16:
; AVX1: # %bb.0:
+; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm0[0,2,2,3,4,5,6,7]
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,1,2,3]
; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[2,0,2,3,4,5,6,7]
-; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm0[0,2,2,3,4,5,6,7]
-; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,3,2,3]
; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
; AVX1-NEXT: vpextrw $6, %xmm0, 8(%rdi)
; AVX1-NEXT: vmovq %xmm1, (%rdi)
@@ -148,10 +133,9 @@ define void @v5i16(<4 x i16> %a, <4 x i1
;
; AVX2-SLOW-LABEL: v5i16:
; AVX2-SLOW: # %bb.0:
+; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm2 = xmm0[0,2,2,3,4,5,6,7]
; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,1,2,3]
; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[2,0,2,3,4,5,6,7]
-; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm2 = xmm0[0,2,2,3,4,5,6,7]
-; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,3,2,3]
; AVX2-SLOW-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
; AVX2-SLOW-NEXT: vpextrw $6, %xmm0, 8(%rdi)
; AVX2-SLOW-NEXT: vmovq %xmm1, (%rdi)
@@ -160,7 +144,7 @@ define void @v5i16(<4 x i16> %a, <4 x i1
; AVX2-FAST-LABEL: v5i16:
; AVX2-FAST: # %bb.0:
; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[4,5,8,9,4,5,6,7,8,9,10,11,12,13,14,15]
-; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm2 = xmm0[0,1,4,5,12,13,14,15,8,9,10,11,12,13,14,15]
+; AVX2-FAST-NEXT: vpshuflw {{.*#+}} xmm2 = xmm0[0,2,2,3,4,5,6,7]
; AVX2-FAST-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
; AVX2-FAST-NEXT: vpextrw $6, %xmm0, 8(%rdi)
; AVX2-FAST-NEXT: vmovq %xmm1, (%rdi)
@@ -168,7 +152,7 @@ define void @v5i16(<4 x i16> %a, <4 x i1
;
; XOP-LABEL: v5i16:
; XOP: # %bb.0:
-; XOP-NEXT: vpperm {{.*#+}} xmm1 = xmm0[0,1],xmm1[4,5],xmm0[4,5],xmm1[8,9],xmm0[12,13],xmm1[4,5],xmm0[14,15],xmm1[6,7]
+; XOP-NEXT: vpperm {{.*#+}} xmm1 = xmm0[0,1],xmm1[4,5],xmm0[4,5],xmm1[8,9],xmm0[4,5],xmm1[4,5],xmm0[6,7],xmm1[6,7]
; XOP-NEXT: vpextrw $6, %xmm0, 8(%rdi)
; XOP-NEXT: vmovq %xmm1, (%rdi)
; XOP-NEXT: retq
@@ -377,23 +361,24 @@ define void @v7i32(<4 x i32> %a, <4 x i3
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,1,2,2]
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,1,0,3]
; SSE2-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm2[2],xmm3[3],xmm2[3]
-; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm1[3,0]
-; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,3]
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,1,2,3]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; SSE2-NEXT: movd %xmm1, 24(%rdi)
-; SSE2-NEXT: movlps %xmm0, 16(%rdi)
+; SSE2-NEXT: movq %xmm0, 16(%rdi)
; SSE2-NEXT: movdqa %xmm3, (%rdi)
; SSE2-NEXT: retq
;
; SSE42-LABEL: v7i32:
; SSE42: # %bb.0:
-; SSE42-NEXT: movdqa %xmm1, %xmm2
-; SSE42-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm0[2,3],xmm2[4,5,6,7]
-; SSE42-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
-; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,3,2]
-; SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,0,3]
+; SSE42-NEXT: movdqa %xmm0, %xmm2
+; SSE42-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5],xmm2[6,7]
+; SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,3,2]
+; SSE42-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
; SSE42-NEXT: movd %xmm1, 24(%rdi)
-; SSE42-NEXT: movq %xmm2, 16(%rdi)
-; SSE42-NEXT: movdqa %xmm0, (%rdi)
+; SSE42-NEXT: movq %xmm0, 16(%rdi)
+; SSE42-NEXT: movdqa %xmm2, (%rdi)
; SSE42-NEXT: retq
;
; AVX1-LABEL: v7i32:
Modified: llvm/trunk/test/CodeGen/X86/scalar_widen_div.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/scalar_widen_div.ll?rev=337258&r1=337257&r2=337258&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/scalar_widen_div.ll (original)
+++ llvm/trunk/test/CodeGen/X86/scalar_widen_div.ll Tue Jul 17 02:45:35 2018
@@ -427,7 +427,6 @@ define void @test_int_div(<3 x i32>* %de
; CHECK-NEXT: pextrd $2, %xmm1, %r8d
; CHECK-NEXT: cltd
; CHECK-NEXT: idivl %r8d
-; CHECK-NEXT: pinsrd $2, %eax, %xmm2
; CHECK-NEXT: movl %eax, 8(%rdi,%rcx)
; CHECK-NEXT: movq %xmm2, (%rdi,%rcx)
; CHECK-NEXT: addq $16, %rcx
Modified: llvm/trunk/test/CodeGen/X86/vec_shift7.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_shift7.ll?rev=337258&r1=337257&r2=337258&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_shift7.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_shift7.ll Tue Jul 17 02:45:35 2018
@@ -7,12 +7,9 @@
define i64 @test1(<2 x i64> %a) {
; X32-LABEL: test1:
; X32: # %bb.0: # %entry
-; X32-NEXT: movdqa %xmm0, %xmm1
-; X32-NEXT: psllq $2, %xmm1
-; X32-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
-; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,2,3]
-; X32-NEXT: movd %xmm1, %edx
; X32-NEXT: movd %xmm0, %eax
+; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; X32-NEXT: movd %xmm0, %edx
; X32-NEXT: retl
;
; X64-LABEL: test1:
More information about the llvm-commits
mailing list