[llvm] ce397e5 - [RISCV] Lower scalar_to_vector
Luke Lau via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 6 09:40:27 PDT 2023
Author: Luke Lau
Date: 2023-04-06T17:39:19+01:00
New Revision: ce397e500d3572a0c3743630bf0a895c24baebad
URL: https://github.com/llvm/llvm-project/commit/ce397e500d3572a0c3743630bf0a895c24baebad
DIFF: https://github.com/llvm/llvm-project/commit/ce397e500d3572a0c3743630bf0a895c24baebad.diff
LOG: [RISCV] Lower scalar_to_vector
Loads of fixed length vectors with irregular element counts are
sometimes emitted as a scalar load + scalar_to_vector.
Previously the scalar_to_vector wasn't legal and so was scalarized
further. This patch handles it by lowering it to a vmv.s.x.
Reviewed By: craig.topper
Differential Revision: https://reviews.llvm.org/D147608
Added:
Modified:
llvm/lib/Target/RISCV/RISCVISelLowering.cpp
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access-zve32x.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index de1fd446158a..524d1e578aac 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -577,7 +577,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
// Mask VTs are custom-expanded into a series of standard nodes
setOperationAction({ISD::TRUNCATE, ISD::CONCAT_VECTORS,
- ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR},
+ ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR,
+ ISD::SCALAR_TO_VECTOR},
VT, Custom);
setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT,
@@ -703,9 +704,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
VT, Custom);
- setOperationAction(
- {ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR},
- VT, Custom);
+ setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR,
+ ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR},
+ VT, Custom);
setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::SELECT_CC, VT, Expand);
@@ -798,9 +799,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::SELECT_CC, VT, Expand);
- setOperationAction(
- {ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR},
- VT, Custom);
+ setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR,
+ ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR},
+ VT, Custom);
setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom);
setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom);
@@ -877,6 +878,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT},
VT, Custom);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
+
setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
setOperationAction(ISD::SETCC, VT, Custom);
@@ -4138,6 +4141,25 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
return lowerINSERT_VECTOR_ELT(Op, DAG);
case ISD::EXTRACT_VECTOR_ELT:
return lowerEXTRACT_VECTOR_ELT(Op, DAG);
+ case ISD::SCALAR_TO_VECTOR: {
+ MVT VT = Op.getSimpleValueType();
+ SDLoc DL(Op);
+ SDValue Scalar = Op.getOperand(0);
+ if (VT.getVectorElementType() == MVT::i1) {
+ MVT WideVT = VT.changeVectorElementType(MVT::i8);
+ SDValue V = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, WideVT, Scalar);
+ return DAG.getNode(ISD::TRUNCATE, DL, VT, V);
+ }
+ MVT ContainerVT = VT;
+ if (VT.isFixedLengthVector())
+ ContainerVT = getContainerForFixedLengthVector(VT);
+ SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
+ SDValue V = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, ContainerVT,
+ DAG.getUNDEF(ContainerVT), Scalar, VL);
+ if (VT.isFixedLengthVector())
+ V = convertFromScalableVector(VT, V, DAG, Subtarget);
+ return V;
+ }
case ISD::VSCALE: {
MVT VT = Op.getSimpleValueType();
SDLoc DL(Op);
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access-zve32x.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access-zve32x.ll
index c94cc43d45bb..f382394ea022 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access-zve32x.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access-zve32x.ll
@@ -7,8 +7,6 @@
define <4 x i1> @load_large_vector(ptr %p) {
; ZVE32X-LABEL: load_large_vector:
; ZVE32X: # %bb.0:
-; ZVE32X-NEXT: addi sp, sp, -16
-; ZVE32X-NEXT: .cfi_def_cfa_offset 16
; ZVE32X-NEXT: ld a1, 80(a0)
; ZVE32X-NEXT: ld a2, 72(a0)
; ZVE32X-NEXT: ld a3, 56(a0)
@@ -19,49 +17,47 @@ define <4 x i1> @load_large_vector(ptr %p) {
; ZVE32X-NEXT: ld a0, 0(a0)
; ZVE32X-NEXT: xor a4, a5, a4
; ZVE32X-NEXT: snez a4, a4
-; ZVE32X-NEXT: sb a4, 12(sp)
+; ZVE32X-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; ZVE32X-NEXT: vmv.s.x v8, a4
+; ZVE32X-NEXT: vand.vi v8, v8, 1
+; ZVE32X-NEXT: vmsne.vi v0, v8, 0
+; ZVE32X-NEXT: vmv.v.i v8, 0
+; ZVE32X-NEXT: vmerge.vim v9, v8, 1, v0
; ZVE32X-NEXT: xor a0, a0, a7
; ZVE32X-NEXT: snez a0, a0
-; ZVE32X-NEXT: sb a0, 15(sp)
-; ZVE32X-NEXT: xor a0, a6, a3
-; ZVE32X-NEXT: snez a0, a0
-; ZVE32X-NEXT: sb a0, 13(sp)
-; ZVE32X-NEXT: xor a1, a2, a1
-; ZVE32X-NEXT: snez a0, a1
-; ZVE32X-NEXT: sb a0, 14(sp)
-; ZVE32X-NEXT: addi a0, sp, 12
-; ZVE32X-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; ZVE32X-NEXT: vlm.v v0, (a0)
-; ZVE32X-NEXT: addi a0, sp, 15
-; ZVE32X-NEXT: vlm.v v8, (a0)
-; ZVE32X-NEXT: vmv.v.i v9, 0
-; ZVE32X-NEXT: vmerge.vim v10, v9, 1, v0
+; ZVE32X-NEXT: vmv.s.x v10, a0
+; ZVE32X-NEXT: vand.vi v10, v10, 1
+; ZVE32X-NEXT: vmsne.vi v0, v10, 0
; ZVE32X-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
-; ZVE32X-NEXT: vmv.v.i v11, 0
-; ZVE32X-NEXT: vmv1r.v v0, v8
-; ZVE32X-NEXT: vmerge.vim v8, v11, 1, v0
+; ZVE32X-NEXT: vmv.v.i v10, 0
+; ZVE32X-NEXT: vmerge.vim v11, v10, 1, v0
; ZVE32X-NEXT: vsetivli zero, 2, e8, mf4, tu, ma
-; ZVE32X-NEXT: vslideup.vi v8, v10, 1
+; ZVE32X-NEXT: vslideup.vi v11, v9, 1
; ZVE32X-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
-; ZVE32X-NEXT: vmsne.vi v0, v8, 0
-; ZVE32X-NEXT: vmerge.vim v8, v11, 1, v0
-; ZVE32X-NEXT: addi a0, sp, 13
+; ZVE32X-NEXT: vmsne.vi v0, v11, 0
+; ZVE32X-NEXT: vmerge.vim v9, v10, 1, v0
+; ZVE32X-NEXT: xor a0, a6, a3
+; ZVE32X-NEXT: snez a0, a0
+; ZVE32X-NEXT: vmv.s.x v11, a0
; ZVE32X-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; ZVE32X-NEXT: vlm.v v0, (a0)
-; ZVE32X-NEXT: vmerge.vim v10, v9, 1, v0
+; ZVE32X-NEXT: vand.vi v11, v11, 1
+; ZVE32X-NEXT: vmsne.vi v0, v11, 0
+; ZVE32X-NEXT: vmerge.vim v11, v8, 1, v0
; ZVE32X-NEXT: vsetivli zero, 3, e8, mf4, tu, ma
-; ZVE32X-NEXT: vslideup.vi v8, v10, 2
+; ZVE32X-NEXT: vslideup.vi v9, v11, 2
; ZVE32X-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
-; ZVE32X-NEXT: vmsne.vi v0, v8, 0
-; ZVE32X-NEXT: vmerge.vim v8, v11, 1, v0
-; ZVE32X-NEXT: addi a0, sp, 14
+; ZVE32X-NEXT: vmsne.vi v0, v9, 0
+; ZVE32X-NEXT: vmerge.vim v9, v10, 1, v0
+; ZVE32X-NEXT: xor a1, a2, a1
+; ZVE32X-NEXT: snez a0, a1
+; ZVE32X-NEXT: vmv.s.x v10, a0
; ZVE32X-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; ZVE32X-NEXT: vlm.v v0, (a0)
-; ZVE32X-NEXT: vmerge.vim v9, v9, 1, v0
+; ZVE32X-NEXT: vand.vi v10, v10, 1
+; ZVE32X-NEXT: vmsne.vi v0, v10, 0
+; ZVE32X-NEXT: vmerge.vim v8, v8, 1, v0
; ZVE32X-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
-; ZVE32X-NEXT: vslideup.vi v8, v9, 3
-; ZVE32X-NEXT: vmsne.vi v0, v8, 0
-; ZVE32X-NEXT: addi sp, sp, 16
+; ZVE32X-NEXT: vslideup.vi v9, v8, 3
+; ZVE32X-NEXT: vmsne.vi v0, v9, 0
; ZVE32X-NEXT: ret
;
; ZVE64X-LABEL: load_large_vector:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load.ll
index 97c4a9540420..747a6a3534fd 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load.ll
@@ -11,14 +11,8 @@ define <5 x i8> @load_v5i8(ptr %p) {
;
; RV64-LABEL: load_v5i8:
; RV64: # %bb.0:
-; RV64-NEXT: addi sp, sp, -16
-; RV64-NEXT: .cfi_def_cfa_offset 16
-; RV64-NEXT: ld a0, 0(a0)
-; RV64-NEXT: sd a0, 8(sp)
-; RV64-NEXT: addi a0, sp, 8
; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV64-NEXT: vle64.v v8, (a0)
-; RV64-NEXT: addi sp, sp, 16
; RV64-NEXT: ret
%x = load <5 x i8>, ptr %p
ret <5 x i8> %x
@@ -39,10 +33,8 @@ define <5 x i8> @load_v5i8_align1(ptr %p) {
; RV32-NEXT: slli a4, a4, 24
; RV32-NEXT: or a3, a4, a3
; RV32-NEXT: or a1, a3, a1
-; RV32-NEXT: sw a1, 0(sp)
-; RV32-NEXT: mv a1, sp
; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; RV32-NEXT: vle32.v v8, (a1)
+; RV32-NEXT: vmv.s.x v8, a1
; RV32-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
; RV32-NEXT: vslidedown.vi v9, v8, 1
; RV32-NEXT: vslidedown.vi v10, v8, 2
@@ -76,10 +68,8 @@ define <5 x i8> @load_v5i8_align1(ptr %p) {
; RV64-NEXT: slli a4, a4, 24
; RV64-NEXT: or a3, a4, a3
; RV64-NEXT: or a1, a3, a1
-; RV64-NEXT: sw a1, 0(sp)
-; RV64-NEXT: mv a1, sp
; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; RV64-NEXT: vle32.v v8, (a1)
+; RV64-NEXT: vmv.s.x v8, a1
; RV64-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
; RV64-NEXT: vslidedown.vi v9, v8, 1
; RV64-NEXT: vslidedown.vi v10, v8, 2
@@ -111,14 +101,8 @@ define <6 x i8> @load_v6i8(ptr %p) {
;
; RV64-LABEL: load_v6i8:
; RV64: # %bb.0:
-; RV64-NEXT: addi sp, sp, -16
-; RV64-NEXT: .cfi_def_cfa_offset 16
-; RV64-NEXT: ld a0, 0(a0)
-; RV64-NEXT: sd a0, 8(sp)
-; RV64-NEXT: addi a0, sp, 8
; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV64-NEXT: vle64.v v8, (a0)
-; RV64-NEXT: addi sp, sp, 16
; RV64-NEXT: ret
%x = load <6 x i8>, ptr %p
ret <6 x i8> %x
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
index 65fe75047526..38103378a008 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
@@ -220,35 +220,20 @@ define void @mscatter_v2i64_truncstore_v2i8(<2 x i64> %val, <2 x ptr> %ptrs, <2
;
; RV32ZVE32F-LABEL: mscatter_v2i64_truncstore_v2i8:
; RV32ZVE32F: # %bb.0:
-; RV32ZVE32F-NEXT: addi sp, sp, -16
-; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
; RV32ZVE32F-NEXT: lw a1, 8(a0)
; RV32ZVE32F-NEXT: lw a0, 0(a0)
-; RV32ZVE32F-NEXT: sb a1, 15(sp)
-; RV32ZVE32F-NEXT: sb a0, 14(sp)
-; RV32ZVE32F-NEXT: addi a0, sp, 15
-; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV32ZVE32F-NEXT: vle8.v v9, (a0)
-; RV32ZVE32F-NEXT: addi a0, sp, 14
-; RV32ZVE32F-NEXT: vle8.v v10, (a0)
; RV32ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV32ZVE32F-NEXT: vmv.s.x v9, a1
+; RV32ZVE32F-NEXT: vmv.s.x v10, a0
; RV32ZVE32F-NEXT: vslideup.vi v10, v9, 1
; RV32ZVE32F-NEXT: vsoxei32.v v10, (zero), v8, v0.t
-; RV32ZVE32F-NEXT: addi sp, sp, 16
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mscatter_v2i64_truncstore_v2i8:
; RV64ZVE32F: # %bb.0:
-; RV64ZVE32F-NEXT: addi sp, sp, -16
-; RV64ZVE32F-NEXT: .cfi_def_cfa_offset 16
-; RV64ZVE32F-NEXT: sb a1, 15(sp)
-; RV64ZVE32F-NEXT: sb a0, 14(sp)
-; RV64ZVE32F-NEXT: addi a0, sp, 15
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vle8.v v9, (a0)
-; RV64ZVE32F-NEXT: addi a0, sp, 14
-; RV64ZVE32F-NEXT: vle8.v v8, (a0)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vmv.s.x v9, a1
+; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: vmv.x.s a0, v0
; RV64ZVE32F-NEXT: andi a1, a0, 1
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
@@ -257,7 +242,6 @@ define void @mscatter_v2i64_truncstore_v2i8(<2 x i64> %val, <2 x ptr> %ptrs, <2
; RV64ZVE32F-NEXT: andi a0, a0, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB4_4
; RV64ZVE32F-NEXT: .LBB4_2: # %else2
-; RV64ZVE32F-NEXT: addi sp, sp, 16
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB4_3: # %cond.store
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
@@ -268,7 +252,6 @@ define void @mscatter_v2i64_truncstore_v2i8(<2 x i64> %val, <2 x ptr> %ptrs, <2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vse8.v v8, (a3)
-; RV64ZVE32F-NEXT: addi sp, sp, 16
; RV64ZVE32F-NEXT: ret
%tval = trunc <2 x i64> %val to <2 x i8>
call void @llvm.masked.scatter.v2i8.v2p0(<2 x i8> %tval, <2 x ptr> %ptrs, i32 1, <2 x i1> %m)
@@ -782,35 +765,20 @@ define void @mscatter_v2i64_truncstore_v2i16(<2 x i64> %val, <2 x ptr> %ptrs, <2
;
; RV32ZVE32F-LABEL: mscatter_v2i64_truncstore_v2i16:
; RV32ZVE32F: # %bb.0:
-; RV32ZVE32F-NEXT: addi sp, sp, -16
-; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
; RV32ZVE32F-NEXT: lw a1, 8(a0)
; RV32ZVE32F-NEXT: lw a0, 0(a0)
-; RV32ZVE32F-NEXT: sh a1, 14(sp)
-; RV32ZVE32F-NEXT: sh a0, 12(sp)
-; RV32ZVE32F-NEXT: addi a0, sp, 14
-; RV32ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV32ZVE32F-NEXT: vle16.v v9, (a0)
-; RV32ZVE32F-NEXT: addi a0, sp, 12
-; RV32ZVE32F-NEXT: vle16.v v10, (a0)
; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
+; RV32ZVE32F-NEXT: vmv.s.x v9, a1
+; RV32ZVE32F-NEXT: vmv.s.x v10, a0
; RV32ZVE32F-NEXT: vslideup.vi v10, v9, 1
; RV32ZVE32F-NEXT: vsoxei32.v v10, (zero), v8, v0.t
-; RV32ZVE32F-NEXT: addi sp, sp, 16
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mscatter_v2i64_truncstore_v2i16:
; RV64ZVE32F: # %bb.0:
-; RV64ZVE32F-NEXT: addi sp, sp, -16
-; RV64ZVE32F-NEXT: .cfi_def_cfa_offset 16
-; RV64ZVE32F-NEXT: sh a1, 14(sp)
-; RV64ZVE32F-NEXT: sh a0, 12(sp)
-; RV64ZVE32F-NEXT: addi a0, sp, 14
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vle16.v v9, (a0)
-; RV64ZVE32F-NEXT: addi a0, sp, 12
-; RV64ZVE32F-NEXT: vle16.v v8, (a0)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
+; RV64ZVE32F-NEXT: vmv.s.x v9, a1
+; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a0, v0
@@ -820,7 +788,6 @@ define void @mscatter_v2i64_truncstore_v2i16(<2 x i64> %val, <2 x ptr> %ptrs, <2
; RV64ZVE32F-NEXT: andi a0, a0, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB13_4
; RV64ZVE32F-NEXT: .LBB13_2: # %else2
-; RV64ZVE32F-NEXT: addi sp, sp, 16
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB13_3: # %cond.store
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
@@ -831,7 +798,6 @@ define void @mscatter_v2i64_truncstore_v2i16(<2 x i64> %val, <2 x ptr> %ptrs, <2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vse16.v v8, (a3)
-; RV64ZVE32F-NEXT: addi sp, sp, 16
; RV64ZVE32F-NEXT: ret
%tval = trunc <2 x i64> %val to <2 x i16>
call void @llvm.masked.scatter.v2i16.v2p0(<2 x i16> %tval, <2 x ptr> %ptrs, i32 2, <2 x i1> %m)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll
index de3bbf360a36..969e9c8c5fd7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll
@@ -445,63 +445,72 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) {
;
; CHECK-V-LABEL: stest_f16i32:
; CHECK-V: # %bb.0: # %entry
-; CHECK-V-NEXT: addi sp, sp, -64
-; CHECK-V-NEXT: .cfi_def_cfa_offset 64
-; CHECK-V-NEXT: sd ra, 56(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s0, 48(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s1, 40(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s2, 32(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: addi sp, sp, -48
+; CHECK-V-NEXT: .cfi_def_cfa_offset 48
+; CHECK-V-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; CHECK-V-NEXT: .cfi_offset ra, -8
; CHECK-V-NEXT: .cfi_offset s0, -16
; CHECK-V-NEXT: .cfi_offset s1, -24
; CHECK-V-NEXT: .cfi_offset s2, -32
+; CHECK-V-NEXT: csrr a1, vlenb
+; CHECK-V-NEXT: slli a1, a1, 1
+; CHECK-V-NEXT: sub sp, sp, a1
+; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 2 * vlenb
; CHECK-V-NEXT: lhu s0, 24(a0)
; CHECK-V-NEXT: lhu s1, 16(a0)
; CHECK-V-NEXT: lhu s2, 0(a0)
; CHECK-V-NEXT: lhu a0, 8(a0)
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
-; CHECK-V-NEXT: sd a0, 24(sp)
+; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-V-NEXT: vmv.s.x v8, a0
+; CHECK-V-NEXT: addi a0, sp, 16
+; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-V-NEXT: mv a0, s2
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
-; CHECK-V-NEXT: sd a0, 0(sp)
+; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-V-NEXT: vmv.s.x v8, a0
+; CHECK-V-NEXT: vsetivli zero, 2, e64, m2, tu, ma
+; CHECK-V-NEXT: addi a0, sp, 16
+; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT: vslideup.vi v8, v10, 1
+; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-V-NEXT: mv a0, s1
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
-; CHECK-V-NEXT: sd a0, 16(sp)
+; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-V-NEXT: vmv.s.x v8, a0
+; CHECK-V-NEXT: vsetivli zero, 3, e64, m2, tu, ma
+; CHECK-V-NEXT: addi a0, sp, 16
+; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT: vslideup.vi v10, v8, 2
+; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
; CHECK-V-NEXT: mv a0, s0
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
-; CHECK-V-NEXT: sd a0, 8(sp)
-; CHECK-V-NEXT: addi a0, sp, 24
-; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-V-NEXT: vle64.v v10, (a0)
-; CHECK-V-NEXT: mv a0, sp
-; CHECK-V-NEXT: vle64.v v8, (a0)
-; CHECK-V-NEXT: vsetivli zero, 2, e64, m2, tu, ma
-; CHECK-V-NEXT: vslideup.vi v8, v10, 1
-; CHECK-V-NEXT: addi a0, sp, 16
-; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-V-NEXT: vle64.v v10, (a0)
-; CHECK-V-NEXT: vsetivli zero, 3, e64, m2, tu, ma
-; CHECK-V-NEXT: vslideup.vi v8, v10, 2
-; CHECK-V-NEXT: addi a0, sp, 8
-; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-V-NEXT: vle64.v v10, (a0)
; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-V-NEXT: vslideup.vi v8, v10, 3
+; CHECK-V-NEXT: vmv.s.x v8, a0
+; CHECK-V-NEXT: addi a0, sp, 16
+; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT: vslideup.vi v10, v8, 3
; CHECK-V-NEXT: lui a0, 524288
; CHECK-V-NEXT: addiw a1, a0, -1
-; CHECK-V-NEXT: vmin.vx v8, v8, a1
+; CHECK-V-NEXT: vmin.vx v8, v10, a1
; CHECK-V-NEXT: vmax.vx v10, v8, a0
; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-V-NEXT: vnsrl.wi v8, v10, 0
-; CHECK-V-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s1, 40(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s2, 32(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: addi sp, sp, 64
+; CHECK-V-NEXT: csrr a0, vlenb
+; CHECK-V-NEXT: slli a0, a0, 1
+; CHECK-V-NEXT: add sp, sp, a0
+; CHECK-V-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: addi sp, sp, 48
; CHECK-V-NEXT: ret
entry:
%conv = fptosi <4 x half> %x to <4 x i64>
@@ -595,62 +604,71 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) {
;
; CHECK-V-LABEL: utesth_f16i32:
; CHECK-V: # %bb.0: # %entry
-; CHECK-V-NEXT: addi sp, sp, -64
-; CHECK-V-NEXT: .cfi_def_cfa_offset 64
-; CHECK-V-NEXT: sd ra, 56(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s0, 48(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s1, 40(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s2, 32(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: addi sp, sp, -48
+; CHECK-V-NEXT: .cfi_def_cfa_offset 48
+; CHECK-V-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; CHECK-V-NEXT: .cfi_offset ra, -8
; CHECK-V-NEXT: .cfi_offset s0, -16
; CHECK-V-NEXT: .cfi_offset s1, -24
; CHECK-V-NEXT: .cfi_offset s2, -32
+; CHECK-V-NEXT: csrr a1, vlenb
+; CHECK-V-NEXT: slli a1, a1, 1
+; CHECK-V-NEXT: sub sp, sp, a1
+; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 2 * vlenb
; CHECK-V-NEXT: lhu s0, 24(a0)
; CHECK-V-NEXT: lhu s1, 16(a0)
; CHECK-V-NEXT: lhu s2, 0(a0)
; CHECK-V-NEXT: lhu a0, 8(a0)
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz
-; CHECK-V-NEXT: sd a0, 24(sp)
+; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-V-NEXT: vmv.s.x v8, a0
+; CHECK-V-NEXT: addi a0, sp, 16
+; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-V-NEXT: mv a0, s2
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz
-; CHECK-V-NEXT: sd a0, 0(sp)
+; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-V-NEXT: vmv.s.x v8, a0
+; CHECK-V-NEXT: vsetivli zero, 2, e64, m2, tu, ma
+; CHECK-V-NEXT: addi a0, sp, 16
+; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT: vslideup.vi v8, v10, 1
+; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-V-NEXT: mv a0, s1
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz
-; CHECK-V-NEXT: sd a0, 16(sp)
+; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-V-NEXT: vmv.s.x v8, a0
+; CHECK-V-NEXT: vsetivli zero, 3, e64, m2, tu, ma
+; CHECK-V-NEXT: addi a0, sp, 16
+; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT: vslideup.vi v10, v8, 2
+; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
; CHECK-V-NEXT: mv a0, s0
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz
-; CHECK-V-NEXT: sd a0, 8(sp)
-; CHECK-V-NEXT: addi a0, sp, 24
-; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-V-NEXT: vle64.v v10, (a0)
-; CHECK-V-NEXT: mv a0, sp
-; CHECK-V-NEXT: vle64.v v8, (a0)
-; CHECK-V-NEXT: vsetivli zero, 2, e64, m2, tu, ma
-; CHECK-V-NEXT: vslideup.vi v8, v10, 1
-; CHECK-V-NEXT: addi a0, sp, 16
-; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-V-NEXT: vle64.v v10, (a0)
-; CHECK-V-NEXT: vsetivli zero, 3, e64, m2, tu, ma
-; CHECK-V-NEXT: vslideup.vi v8, v10, 2
-; CHECK-V-NEXT: addi a0, sp, 8
-; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-V-NEXT: vle64.v v10, (a0)
; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-V-NEXT: vslideup.vi v8, v10, 3
+; CHECK-V-NEXT: vmv.s.x v8, a0
+; CHECK-V-NEXT: addi a0, sp, 16
+; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT: vslideup.vi v10, v8, 3
; CHECK-V-NEXT: li a0, -1
; CHECK-V-NEXT: srli a0, a0, 32
-; CHECK-V-NEXT: vminu.vx v10, v8, a0
+; CHECK-V-NEXT: vminu.vx v10, v10, a0
; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-V-NEXT: vnsrl.wi v8, v10, 0
-; CHECK-V-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s1, 40(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s2, 32(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: addi sp, sp, 64
+; CHECK-V-NEXT: csrr a0, vlenb
+; CHECK-V-NEXT: slli a0, a0, 1
+; CHECK-V-NEXT: add sp, sp, a0
+; CHECK-V-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: addi sp, sp, 48
; CHECK-V-NEXT: ret
entry:
%conv = fptoui <4 x half> %x to <4 x i64>
@@ -754,63 +772,72 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) {
;
; CHECK-V-LABEL: ustest_f16i32:
; CHECK-V: # %bb.0: # %entry
-; CHECK-V-NEXT: addi sp, sp, -64
-; CHECK-V-NEXT: .cfi_def_cfa_offset 64
-; CHECK-V-NEXT: sd ra, 56(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s0, 48(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s1, 40(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s2, 32(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: addi sp, sp, -48
+; CHECK-V-NEXT: .cfi_def_cfa_offset 48
+; CHECK-V-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; CHECK-V-NEXT: .cfi_offset ra, -8
; CHECK-V-NEXT: .cfi_offset s0, -16
; CHECK-V-NEXT: .cfi_offset s1, -24
; CHECK-V-NEXT: .cfi_offset s2, -32
+; CHECK-V-NEXT: csrr a1, vlenb
+; CHECK-V-NEXT: slli a1, a1, 1
+; CHECK-V-NEXT: sub sp, sp, a1
+; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 2 * vlenb
; CHECK-V-NEXT: lhu s0, 24(a0)
; CHECK-V-NEXT: lhu s1, 16(a0)
; CHECK-V-NEXT: lhu s2, 0(a0)
; CHECK-V-NEXT: lhu a0, 8(a0)
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
-; CHECK-V-NEXT: sd a0, 24(sp)
+; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-V-NEXT: vmv.s.x v8, a0
+; CHECK-V-NEXT: addi a0, sp, 16
+; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-V-NEXT: mv a0, s2
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
-; CHECK-V-NEXT: sd a0, 0(sp)
+; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-V-NEXT: vmv.s.x v8, a0
+; CHECK-V-NEXT: vsetivli zero, 2, e64, m2, tu, ma
+; CHECK-V-NEXT: addi a0, sp, 16
+; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT: vslideup.vi v8, v10, 1
+; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-V-NEXT: mv a0, s1
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
-; CHECK-V-NEXT: sd a0, 16(sp)
+; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-V-NEXT: vmv.s.x v8, a0
+; CHECK-V-NEXT: vsetivli zero, 3, e64, m2, tu, ma
+; CHECK-V-NEXT: addi a0, sp, 16
+; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT: vslideup.vi v10, v8, 2
+; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
; CHECK-V-NEXT: mv a0, s0
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
-; CHECK-V-NEXT: sd a0, 8(sp)
-; CHECK-V-NEXT: addi a0, sp, 24
-; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-V-NEXT: vle64.v v10, (a0)
-; CHECK-V-NEXT: mv a0, sp
-; CHECK-V-NEXT: vle64.v v8, (a0)
-; CHECK-V-NEXT: vsetivli zero, 2, e64, m2, tu, ma
-; CHECK-V-NEXT: vslideup.vi v8, v10, 1
-; CHECK-V-NEXT: addi a0, sp, 16
-; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-V-NEXT: vle64.v v10, (a0)
-; CHECK-V-NEXT: vsetivli zero, 3, e64, m2, tu, ma
-; CHECK-V-NEXT: vslideup.vi v8, v10, 2
-; CHECK-V-NEXT: addi a0, sp, 8
-; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-V-NEXT: vle64.v v10, (a0)
; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-V-NEXT: vslideup.vi v8, v10, 3
+; CHECK-V-NEXT: vmv.s.x v8, a0
+; CHECK-V-NEXT: addi a0, sp, 16
+; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT: vslideup.vi v10, v8, 3
; CHECK-V-NEXT: li a0, -1
; CHECK-V-NEXT: srli a0, a0, 32
-; CHECK-V-NEXT: vmin.vx v8, v8, a0
+; CHECK-V-NEXT: vmin.vx v8, v10, a0
; CHECK-V-NEXT: vmax.vx v10, v8, zero
; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-V-NEXT: vnsrl.wi v8, v10, 0
-; CHECK-V-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s1, 40(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s2, 32(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: addi sp, sp, 64
+; CHECK-V-NEXT: csrr a0, vlenb
+; CHECK-V-NEXT: slli a0, a0, 1
+; CHECK-V-NEXT: add sp, sp, a0
+; CHECK-V-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: addi sp, sp, 48
; CHECK-V-NEXT: ret
entry:
%conv = fptosi <4 x half> %x to <4 x i64>
@@ -1360,16 +1387,16 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) {
;
; CHECK-V-LABEL: stest_f16i16:
; CHECK-V: # %bb.0: # %entry
-; CHECK-V-NEXT: addi sp, sp, -96
-; CHECK-V-NEXT: .cfi_def_cfa_offset 96
-; CHECK-V-NEXT: sd ra, 88(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s0, 80(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s1, 72(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s2, 64(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s3, 56(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s4, 48(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s5, 40(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s6, 32(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: addi sp, sp, -80
+; CHECK-V-NEXT: .cfi_def_cfa_offset 80
+; CHECK-V-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s1, 56(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s2, 48(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s3, 40(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s4, 32(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s5, 24(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s6, 16(sp) # 8-byte Folded Spill
; CHECK-V-NEXT: .cfi_offset ra, -8
; CHECK-V-NEXT: .cfi_offset s0, -16
; CHECK-V-NEXT: .cfi_offset s1, -24
@@ -1378,6 +1405,10 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) {
; CHECK-V-NEXT: .cfi_offset s4, -48
; CHECK-V-NEXT: .cfi_offset s5, -56
; CHECK-V-NEXT: .cfi_offset s6, -64
+; CHECK-V-NEXT: csrr a1, vlenb
+; CHECK-V-NEXT: slli a1, a1, 1
+; CHECK-V-NEXT: sub sp, sp, a1
+; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 2 * vlenb
; CHECK-V-NEXT: lhu s0, 56(a0)
; CHECK-V-NEXT: lhu s1, 48(a0)
; CHECK-V-NEXT: lhu s2, 40(a0)
@@ -1388,88 +1419,97 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) {
; CHECK-V-NEXT: lhu a0, 8(a0)
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
-; CHECK-V-NEXT: sw a0, 28(sp)
+; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; CHECK-V-NEXT: vmv.s.x v8, a0
+; CHECK-V-NEXT: addi a0, sp, 16
+; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-V-NEXT: mv a0, s6
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
-; CHECK-V-NEXT: sw a0, 0(sp)
+; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; CHECK-V-NEXT: vmv.s.x v8, a0
+; CHECK-V-NEXT: vsetivli zero, 2, e32, m2, tu, ma
+; CHECK-V-NEXT: addi a0, sp, 16
+; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT: vslideup.vi v8, v10, 1
+; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-V-NEXT: mv a0, s5
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
-; CHECK-V-NEXT: sw a0, 24(sp)
+; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; CHECK-V-NEXT: vmv.s.x v8, a0
+; CHECK-V-NEXT: vsetivli zero, 3, e32, m2, tu, ma
+; CHECK-V-NEXT: addi a0, sp, 16
+; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT: vslideup.vi v10, v8, 2
+; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
; CHECK-V-NEXT: mv a0, s4
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
-; CHECK-V-NEXT: sw a0, 20(sp)
+; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; CHECK-V-NEXT: vmv.s.x v8, a0
+; CHECK-V-NEXT: vsetivli zero, 4, e32, m2, tu, ma
+; CHECK-V-NEXT: addi a0, sp, 16
+; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT: vslideup.vi v10, v8, 3
+; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
; CHECK-V-NEXT: mv a0, s3
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
-; CHECK-V-NEXT: sw a0, 16(sp)
+; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; CHECK-V-NEXT: vmv.s.x v8, a0
+; CHECK-V-NEXT: vsetivli zero, 5, e32, m2, tu, ma
+; CHECK-V-NEXT: addi a0, sp, 16
+; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT: vslideup.vi v10, v8, 4
+; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
; CHECK-V-NEXT: mv a0, s2
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
-; CHECK-V-NEXT: sw a0, 12(sp)
+; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; CHECK-V-NEXT: vmv.s.x v8, a0
+; CHECK-V-NEXT: vsetivli zero, 6, e32, m2, tu, ma
+; CHECK-V-NEXT: addi a0, sp, 16
+; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT: vslideup.vi v10, v8, 5
+; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
; CHECK-V-NEXT: mv a0, s1
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
-; CHECK-V-NEXT: sw a0, 8(sp)
+; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; CHECK-V-NEXT: vmv.s.x v8, a0
+; CHECK-V-NEXT: vsetivli zero, 7, e32, m2, tu, ma
+; CHECK-V-NEXT: addi a0, sp, 16
+; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT: vslideup.vi v10, v8, 6
+; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
; CHECK-V-NEXT: mv a0, s0
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
-; CHECK-V-NEXT: sw a0, 4(sp)
-; CHECK-V-NEXT: addi a0, sp, 28
-; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-V-NEXT: vle32.v v10, (a0)
-; CHECK-V-NEXT: mv a0, sp
-; CHECK-V-NEXT: vle32.v v8, (a0)
-; CHECK-V-NEXT: vsetivli zero, 2, e32, m2, tu, ma
-; CHECK-V-NEXT: vslideup.vi v8, v10, 1
-; CHECK-V-NEXT: addi a0, sp, 24
-; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-V-NEXT: vle32.v v10, (a0)
-; CHECK-V-NEXT: vsetivli zero, 3, e32, m2, tu, ma
-; CHECK-V-NEXT: vslideup.vi v8, v10, 2
-; CHECK-V-NEXT: addi a0, sp, 20
-; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-V-NEXT: vle32.v v10, (a0)
-; CHECK-V-NEXT: vsetivli zero, 4, e32, m2, tu, ma
-; CHECK-V-NEXT: vslideup.vi v8, v10, 3
-; CHECK-V-NEXT: addi a0, sp, 16
-; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-V-NEXT: vle32.v v10, (a0)
-; CHECK-V-NEXT: vsetivli zero, 5, e32, m2, tu, ma
-; CHECK-V-NEXT: vslideup.vi v8, v10, 4
-; CHECK-V-NEXT: addi a0, sp, 12
-; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-V-NEXT: vle32.v v10, (a0)
-; CHECK-V-NEXT: vsetivli zero, 6, e32, m2, tu, ma
-; CHECK-V-NEXT: vslideup.vi v8, v10, 5
-; CHECK-V-NEXT: addi a0, sp, 8
-; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-V-NEXT: vle32.v v10, (a0)
-; CHECK-V-NEXT: vsetivli zero, 7, e32, m2, tu, ma
-; CHECK-V-NEXT: vslideup.vi v8, v10, 6
-; CHECK-V-NEXT: addi a0, sp, 4
-; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-V-NEXT: vle32.v v10, (a0)
; CHECK-V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-V-NEXT: vslideup.vi v8, v10, 7
+; CHECK-V-NEXT: vmv.s.x v8, a0
+; CHECK-V-NEXT: addi a0, sp, 16
+; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT: vslideup.vi v10, v8, 7
; CHECK-V-NEXT: lui a0, 8
; CHECK-V-NEXT: addiw a0, a0, -1
-; CHECK-V-NEXT: vmin.vx v8, v8, a0
+; CHECK-V-NEXT: vmin.vx v8, v10, a0
; CHECK-V-NEXT: lui a0, 1048568
; CHECK-V-NEXT: vmax.vx v10, v8, a0
; CHECK-V-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-V-NEXT: vnsrl.wi v8, v10, 0
-; CHECK-V-NEXT: ld ra, 88(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s0, 80(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s1, 72(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s2, 64(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s3, 56(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s4, 48(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s5, 40(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s6, 32(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: addi sp, sp, 96
+; CHECK-V-NEXT: csrr a0, vlenb
+; CHECK-V-NEXT: slli a0, a0, 1
+; CHECK-V-NEXT: add sp, sp, a0
+; CHECK-V-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s1, 56(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s2, 48(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s3, 40(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s4, 32(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s5, 24(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s6, 16(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: addi sp, sp, 80
; CHECK-V-NEXT: ret
entry:
%conv = fptosi <8 x half> %x to <8 x i32>
@@ -1635,16 +1675,16 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) {
;
; CHECK-V-LABEL: utesth_f16i16:
; CHECK-V: # %bb.0: # %entry
-; CHECK-V-NEXT: addi sp, sp, -96
-; CHECK-V-NEXT: .cfi_def_cfa_offset 96
-; CHECK-V-NEXT: sd ra, 88(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s0, 80(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s1, 72(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s2, 64(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s3, 56(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s4, 48(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s5, 40(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s6, 32(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: addi sp, sp, -80
+; CHECK-V-NEXT: .cfi_def_cfa_offset 80
+; CHECK-V-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s1, 56(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s2, 48(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s3, 40(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s4, 32(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s5, 24(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s6, 16(sp) # 8-byte Folded Spill
; CHECK-V-NEXT: .cfi_offset ra, -8
; CHECK-V-NEXT: .cfi_offset s0, -16
; CHECK-V-NEXT: .cfi_offset s1, -24
@@ -1653,6 +1693,10 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) {
; CHECK-V-NEXT: .cfi_offset s4, -48
; CHECK-V-NEXT: .cfi_offset s5, -56
; CHECK-V-NEXT: .cfi_offset s6, -64
+; CHECK-V-NEXT: csrr a1, vlenb
+; CHECK-V-NEXT: slli a1, a1, 1
+; CHECK-V-NEXT: sub sp, sp, a1
+; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 2 * vlenb
; CHECK-V-NEXT: lhu s0, 56(a0)
; CHECK-V-NEXT: lhu s1, 48(a0)
; CHECK-V-NEXT: lhu s2, 40(a0)
@@ -1663,86 +1707,95 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) {
; CHECK-V-NEXT: lhu a0, 8(a0)
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz
-; CHECK-V-NEXT: sw a0, 28(sp)
+; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; CHECK-V-NEXT: vmv.s.x v8, a0
+; CHECK-V-NEXT: addi a0, sp, 16
+; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-V-NEXT: mv a0, s6
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz
-; CHECK-V-NEXT: sw a0, 0(sp)
+; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; CHECK-V-NEXT: vmv.s.x v8, a0
+; CHECK-V-NEXT: vsetivli zero, 2, e32, m2, tu, ma
+; CHECK-V-NEXT: addi a0, sp, 16
+; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT: vslideup.vi v8, v10, 1
+; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-V-NEXT: mv a0, s5
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz
-; CHECK-V-NEXT: sw a0, 24(sp)
+; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; CHECK-V-NEXT: vmv.s.x v8, a0
+; CHECK-V-NEXT: vsetivli zero, 3, e32, m2, tu, ma
+; CHECK-V-NEXT: addi a0, sp, 16
+; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT: vslideup.vi v10, v8, 2
+; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
; CHECK-V-NEXT: mv a0, s4
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz
-; CHECK-V-NEXT: sw a0, 20(sp)
+; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; CHECK-V-NEXT: vmv.s.x v8, a0
+; CHECK-V-NEXT: vsetivli zero, 4, e32, m2, tu, ma
+; CHECK-V-NEXT: addi a0, sp, 16
+; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT: vslideup.vi v10, v8, 3
+; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
; CHECK-V-NEXT: mv a0, s3
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz
-; CHECK-V-NEXT: sw a0, 16(sp)
+; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; CHECK-V-NEXT: vmv.s.x v8, a0
+; CHECK-V-NEXT: vsetivli zero, 5, e32, m2, tu, ma
+; CHECK-V-NEXT: addi a0, sp, 16
+; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT: vslideup.vi v10, v8, 4
+; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
; CHECK-V-NEXT: mv a0, s2
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz
-; CHECK-V-NEXT: sw a0, 12(sp)
+; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; CHECK-V-NEXT: vmv.s.x v8, a0
+; CHECK-V-NEXT: vsetivli zero, 6, e32, m2, tu, ma
+; CHECK-V-NEXT: addi a0, sp, 16
+; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT: vslideup.vi v10, v8, 5
+; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
; CHECK-V-NEXT: mv a0, s1
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz
-; CHECK-V-NEXT: sw a0, 8(sp)
+; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; CHECK-V-NEXT: vmv.s.x v8, a0
+; CHECK-V-NEXT: vsetivli zero, 7, e32, m2, tu, ma
+; CHECK-V-NEXT: addi a0, sp, 16
+; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT: vslideup.vi v10, v8, 6
+; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
; CHECK-V-NEXT: mv a0, s0
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz
-; CHECK-V-NEXT: sw a0, 4(sp)
-; CHECK-V-NEXT: addi a0, sp, 28
-; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-V-NEXT: vle32.v v10, (a0)
-; CHECK-V-NEXT: mv a0, sp
-; CHECK-V-NEXT: vle32.v v8, (a0)
-; CHECK-V-NEXT: vsetivli zero, 2, e32, m2, tu, ma
-; CHECK-V-NEXT: vslideup.vi v8, v10, 1
-; CHECK-V-NEXT: addi a0, sp, 24
-; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-V-NEXT: vle32.v v10, (a0)
-; CHECK-V-NEXT: vsetivli zero, 3, e32, m2, tu, ma
-; CHECK-V-NEXT: vslideup.vi v8, v10, 2
-; CHECK-V-NEXT: addi a0, sp, 20
-; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-V-NEXT: vle32.v v10, (a0)
-; CHECK-V-NEXT: vsetivli zero, 4, e32, m2, tu, ma
-; CHECK-V-NEXT: vslideup.vi v8, v10, 3
-; CHECK-V-NEXT: addi a0, sp, 16
-; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-V-NEXT: vle32.v v10, (a0)
-; CHECK-V-NEXT: vsetivli zero, 5, e32, m2, tu, ma
-; CHECK-V-NEXT: vslideup.vi v8, v10, 4
-; CHECK-V-NEXT: addi a0, sp, 12
-; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-V-NEXT: vle32.v v10, (a0)
-; CHECK-V-NEXT: vsetivli zero, 6, e32, m2, tu, ma
-; CHECK-V-NEXT: vslideup.vi v8, v10, 5
-; CHECK-V-NEXT: addi a0, sp, 8
-; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-V-NEXT: vle32.v v10, (a0)
-; CHECK-V-NEXT: vsetivli zero, 7, e32, m2, tu, ma
-; CHECK-V-NEXT: vslideup.vi v8, v10, 6
-; CHECK-V-NEXT: addi a0, sp, 4
-; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-V-NEXT: vle32.v v10, (a0)
; CHECK-V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-V-NEXT: vslideup.vi v8, v10, 7
+; CHECK-V-NEXT: vmv.s.x v8, a0
+; CHECK-V-NEXT: addi a0, sp, 16
+; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT: vslideup.vi v10, v8, 7
; CHECK-V-NEXT: lui a0, 16
; CHECK-V-NEXT: addiw a0, a0, -1
-; CHECK-V-NEXT: vminu.vx v10, v8, a0
+; CHECK-V-NEXT: vminu.vx v10, v10, a0
; CHECK-V-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-V-NEXT: vnsrl.wi v8, v10, 0
-; CHECK-V-NEXT: ld ra, 88(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s0, 80(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s1, 72(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s2, 64(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s3, 56(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s4, 48(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s5, 40(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s6, 32(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: addi sp, sp, 96
+; CHECK-V-NEXT: csrr a0, vlenb
+; CHECK-V-NEXT: slli a0, a0, 1
+; CHECK-V-NEXT: add sp, sp, a0
+; CHECK-V-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s1, 56(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s2, 48(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s3, 40(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s4, 32(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s5, 24(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s6, 16(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: addi sp, sp, 80
; CHECK-V-NEXT: ret
entry:
%conv = fptoui <8 x half> %x to <8 x i32>
@@ -1930,16 +1983,16 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) {
;
; CHECK-V-LABEL: ustest_f16i16:
; CHECK-V: # %bb.0: # %entry
-; CHECK-V-NEXT: addi sp, sp, -96
-; CHECK-V-NEXT: .cfi_def_cfa_offset 96
-; CHECK-V-NEXT: sd ra, 88(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s0, 80(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s1, 72(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s2, 64(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s3, 56(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s4, 48(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s5, 40(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s6, 32(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: addi sp, sp, -80
+; CHECK-V-NEXT: .cfi_def_cfa_offset 80
+; CHECK-V-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s1, 56(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s2, 48(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s3, 40(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s4, 32(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s5, 24(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s6, 16(sp) # 8-byte Folded Spill
; CHECK-V-NEXT: .cfi_offset ra, -8
; CHECK-V-NEXT: .cfi_offset s0, -16
; CHECK-V-NEXT: .cfi_offset s1, -24
@@ -1948,6 +2001,10 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) {
; CHECK-V-NEXT: .cfi_offset s4, -48
; CHECK-V-NEXT: .cfi_offset s5, -56
; CHECK-V-NEXT: .cfi_offset s6, -64
+; CHECK-V-NEXT: csrr a1, vlenb
+; CHECK-V-NEXT: slli a1, a1, 1
+; CHECK-V-NEXT: sub sp, sp, a1
+; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 2 * vlenb
; CHECK-V-NEXT: lhu s0, 56(a0)
; CHECK-V-NEXT: lhu s1, 48(a0)
; CHECK-V-NEXT: lhu s2, 40(a0)
@@ -1958,87 +2015,96 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) {
; CHECK-V-NEXT: lhu a0, 8(a0)
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
-; CHECK-V-NEXT: sw a0, 28(sp)
+; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; CHECK-V-NEXT: vmv.s.x v8, a0
+; CHECK-V-NEXT: addi a0, sp, 16
+; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-V-NEXT: mv a0, s6
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
-; CHECK-V-NEXT: sw a0, 0(sp)
+; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; CHECK-V-NEXT: vmv.s.x v8, a0
+; CHECK-V-NEXT: vsetivli zero, 2, e32, m2, tu, ma
+; CHECK-V-NEXT: addi a0, sp, 16
+; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT: vslideup.vi v8, v10, 1
+; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-V-NEXT: mv a0, s5
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
-; CHECK-V-NEXT: sw a0, 24(sp)
+; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; CHECK-V-NEXT: vmv.s.x v8, a0
+; CHECK-V-NEXT: vsetivli zero, 3, e32, m2, tu, ma
+; CHECK-V-NEXT: addi a0, sp, 16
+; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT: vslideup.vi v10, v8, 2
+; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
; CHECK-V-NEXT: mv a0, s4
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
-; CHECK-V-NEXT: sw a0, 20(sp)
+; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; CHECK-V-NEXT: vmv.s.x v8, a0
+; CHECK-V-NEXT: vsetivli zero, 4, e32, m2, tu, ma
+; CHECK-V-NEXT: addi a0, sp, 16
+; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT: vslideup.vi v10, v8, 3
+; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
; CHECK-V-NEXT: mv a0, s3
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
-; CHECK-V-NEXT: sw a0, 16(sp)
+; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; CHECK-V-NEXT: vmv.s.x v8, a0
+; CHECK-V-NEXT: vsetivli zero, 5, e32, m2, tu, ma
+; CHECK-V-NEXT: addi a0, sp, 16
+; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT: vslideup.vi v10, v8, 4
+; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
; CHECK-V-NEXT: mv a0, s2
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
-; CHECK-V-NEXT: sw a0, 12(sp)
+; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; CHECK-V-NEXT: vmv.s.x v8, a0
+; CHECK-V-NEXT: vsetivli zero, 6, e32, m2, tu, ma
+; CHECK-V-NEXT: addi a0, sp, 16
+; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT: vslideup.vi v10, v8, 5
+; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
; CHECK-V-NEXT: mv a0, s1
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
-; CHECK-V-NEXT: sw a0, 8(sp)
+; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; CHECK-V-NEXT: vmv.s.x v8, a0
+; CHECK-V-NEXT: vsetivli zero, 7, e32, m2, tu, ma
+; CHECK-V-NEXT: addi a0, sp, 16
+; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT: vslideup.vi v10, v8, 6
+; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
; CHECK-V-NEXT: mv a0, s0
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
-; CHECK-V-NEXT: sw a0, 4(sp)
-; CHECK-V-NEXT: addi a0, sp, 28
-; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-V-NEXT: vle32.v v10, (a0)
-; CHECK-V-NEXT: mv a0, sp
-; CHECK-V-NEXT: vle32.v v8, (a0)
-; CHECK-V-NEXT: vsetivli zero, 2, e32, m2, tu, ma
-; CHECK-V-NEXT: vslideup.vi v8, v10, 1
-; CHECK-V-NEXT: addi a0, sp, 24
-; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-V-NEXT: vle32.v v10, (a0)
-; CHECK-V-NEXT: vsetivli zero, 3, e32, m2, tu, ma
-; CHECK-V-NEXT: vslideup.vi v8, v10, 2
-; CHECK-V-NEXT: addi a0, sp, 20
-; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-V-NEXT: vle32.v v10, (a0)
-; CHECK-V-NEXT: vsetivli zero, 4, e32, m2, tu, ma
-; CHECK-V-NEXT: vslideup.vi v8, v10, 3
-; CHECK-V-NEXT: addi a0, sp, 16
-; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-V-NEXT: vle32.v v10, (a0)
-; CHECK-V-NEXT: vsetivli zero, 5, e32, m2, tu, ma
-; CHECK-V-NEXT: vslideup.vi v8, v10, 4
-; CHECK-V-NEXT: addi a0, sp, 12
-; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-V-NEXT: vle32.v v10, (a0)
-; CHECK-V-NEXT: vsetivli zero, 6, e32, m2, tu, ma
-; CHECK-V-NEXT: vslideup.vi v8, v10, 5
-; CHECK-V-NEXT: addi a0, sp, 8
-; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-V-NEXT: vle32.v v10, (a0)
-; CHECK-V-NEXT: vsetivli zero, 7, e32, m2, tu, ma
-; CHECK-V-NEXT: vslideup.vi v8, v10, 6
-; CHECK-V-NEXT: addi a0, sp, 4
-; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-V-NEXT: vle32.v v10, (a0)
; CHECK-V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-V-NEXT: vslideup.vi v8, v10, 7
+; CHECK-V-NEXT: vmv.s.x v8, a0
+; CHECK-V-NEXT: addi a0, sp, 16
+; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT: vslideup.vi v10, v8, 7
; CHECK-V-NEXT: lui a0, 16
; CHECK-V-NEXT: addiw a0, a0, -1
-; CHECK-V-NEXT: vmin.vx v8, v8, a0
+; CHECK-V-NEXT: vmin.vx v8, v10, a0
; CHECK-V-NEXT: vmax.vx v10, v8, zero
; CHECK-V-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-V-NEXT: vnsrl.wi v8, v10, 0
-; CHECK-V-NEXT: ld ra, 88(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s0, 80(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s1, 72(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s2, 64(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s3, 56(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s4, 48(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s5, 40(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s6, 32(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: addi sp, sp, 96
+; CHECK-V-NEXT: csrr a0, vlenb
+; CHECK-V-NEXT: slli a0, a0, 1
+; CHECK-V-NEXT: add sp, sp, a0
+; CHECK-V-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s1, 56(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s2, 48(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s3, 40(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s4, 32(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s5, 24(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s6, 16(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: addi sp, sp, 80
; CHECK-V-NEXT: ret
entry:
%conv = fptosi <8 x half> %x to <8 x i32>
@@ -2134,19 +2200,19 @@ define <2 x i64> @stest_f64i64(<2 x double> %x) {
;
; CHECK-V-LABEL: stest_f64i64:
; CHECK-V: # %bb.0: # %entry
-; CHECK-V-NEXT: addi sp, sp, -80
-; CHECK-V-NEXT: .cfi_def_cfa_offset 80
-; CHECK-V-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s1, 56(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: addi sp, sp, -64
+; CHECK-V-NEXT: .cfi_def_cfa_offset 64
+; CHECK-V-NEXT: sd ra, 56(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s0, 48(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s1, 40(sp) # 8-byte Folded Spill
; CHECK-V-NEXT: .cfi_offset ra, -8
; CHECK-V-NEXT: .cfi_offset s0, -16
; CHECK-V-NEXT: .cfi_offset s1, -24
; CHECK-V-NEXT: csrr a0, vlenb
; CHECK-V-NEXT: slli a0, a0, 1
; CHECK-V-NEXT: sub sp, sp, a0
-; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 2 * vlenb
-; CHECK-V-NEXT: addi a0, sp, 48
+; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 2 * vlenb
+; CHECK-V-NEXT: addi a0, sp, 32
; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; CHECK-V-NEXT: vslidedown.vi v9, v8, 1
@@ -2155,7 +2221,7 @@ define <2 x i64> @stest_f64i64(<2 x double> %x) {
; CHECK-V-NEXT: mv s0, a0
; CHECK-V-NEXT: mv s1, a1
; CHECK-V-NEXT: vsetivli zero, 0, e64, m1, ta, ma
-; CHECK-V-NEXT: addi a0, sp, 48
+; CHECK-V-NEXT: addi a0, sp, 32
; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vfmv.f.s fa0, v8
; CHECK-V-NEXT: call __fixdfti at plt
@@ -2211,22 +2277,17 @@ define <2 x i64> @stest_f64i64(<2 x double> %x) {
; CHECK-V-NEXT: # %bb.15: # %entry
; CHECK-V-NEXT: mv a0, a1
; CHECK-V-NEXT: .LBB18_16: # %entry
-; CHECK-V-NEXT: sd a0, 24(sp)
-; CHECK-V-NEXT: sd s0, 32(sp)
-; CHECK-V-NEXT: addi a0, sp, 24
-; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-V-NEXT: vle64.v v8, (a0)
-; CHECK-V-NEXT: addi a0, sp, 32
-; CHECK-V-NEXT: vle64.v v9, (a0)
; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; CHECK-V-NEXT: vmv.s.x v9, s0
+; CHECK-V-NEXT: vmv.s.x v8, a0
; CHECK-V-NEXT: vslideup.vi v8, v9, 1
; CHECK-V-NEXT: csrr a0, vlenb
; CHECK-V-NEXT: slli a0, a0, 1
; CHECK-V-NEXT: add sp, sp, a0
-; CHECK-V-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s1, 56(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: addi sp, sp, 80
+; CHECK-V-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s1, 40(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: addi sp, sp, 64
; CHECK-V-NEXT: ret
entry:
%conv = fptosi <2 x double> %x to <2 x i128>
@@ -2273,19 +2334,19 @@ define <2 x i64> @utest_f64i64(<2 x double> %x) {
;
; CHECK-V-LABEL: utest_f64i64:
; CHECK-V: # %bb.0: # %entry
-; CHECK-V-NEXT: addi sp, sp, -80
-; CHECK-V-NEXT: .cfi_def_cfa_offset 80
-; CHECK-V-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s1, 56(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: addi sp, sp, -64
+; CHECK-V-NEXT: .cfi_def_cfa_offset 64
+; CHECK-V-NEXT: sd ra, 56(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s0, 48(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s1, 40(sp) # 8-byte Folded Spill
; CHECK-V-NEXT: .cfi_offset ra, -8
; CHECK-V-NEXT: .cfi_offset s0, -16
; CHECK-V-NEXT: .cfi_offset s1, -24
; CHECK-V-NEXT: csrr a0, vlenb
; CHECK-V-NEXT: slli a0, a0, 1
; CHECK-V-NEXT: sub sp, sp, a0
-; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 2 * vlenb
-; CHECK-V-NEXT: addi a0, sp, 48
+; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 2 * vlenb
+; CHECK-V-NEXT: addi a0, sp, 32
; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; CHECK-V-NEXT: vslidedown.vi v9, v8, 1
@@ -2294,7 +2355,7 @@ define <2 x i64> @utest_f64i64(<2 x double> %x) {
; CHECK-V-NEXT: mv s0, a0
; CHECK-V-NEXT: mv s1, a1
; CHECK-V-NEXT: vsetivli zero, 0, e64, m1, ta, ma
-; CHECK-V-NEXT: addi a0, sp, 48
+; CHECK-V-NEXT: addi a0, sp, 32
; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vfmv.f.s fa0, v8
; CHECK-V-NEXT: call __fixunsdfti at plt
@@ -2304,22 +2365,17 @@ define <2 x i64> @utest_f64i64(<2 x double> %x) {
; CHECK-V-NEXT: and a2, a2, s0
; CHECK-V-NEXT: addi a1, a1, -1
; CHECK-V-NEXT: and a0, a1, a0
-; CHECK-V-NEXT: sd a0, 24(sp)
-; CHECK-V-NEXT: sd a2, 32(sp)
-; CHECK-V-NEXT: addi a0, sp, 24
-; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-V-NEXT: vle64.v v8, (a0)
-; CHECK-V-NEXT: addi a0, sp, 32
-; CHECK-V-NEXT: vle64.v v9, (a0)
; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; CHECK-V-NEXT: vmv.s.x v9, a2
+; CHECK-V-NEXT: vmv.s.x v8, a0
; CHECK-V-NEXT: vslideup.vi v8, v9, 1
; CHECK-V-NEXT: csrr a0, vlenb
; CHECK-V-NEXT: slli a0, a0, 1
; CHECK-V-NEXT: add sp, sp, a0
-; CHECK-V-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s1, 56(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: addi sp, sp, 80
+; CHECK-V-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s1, 40(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: addi sp, sp, 64
; CHECK-V-NEXT: ret
entry:
%conv = fptoui <2 x double> %x to <2 x i128>
@@ -2390,19 +2446,19 @@ define <2 x i64> @ustest_f64i64(<2 x double> %x) {
;
; CHECK-V-LABEL: ustest_f64i64:
; CHECK-V: # %bb.0: # %entry
-; CHECK-V-NEXT: addi sp, sp, -80
-; CHECK-V-NEXT: .cfi_def_cfa_offset 80
-; CHECK-V-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s1, 56(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: addi sp, sp, -64
+; CHECK-V-NEXT: .cfi_def_cfa_offset 64
+; CHECK-V-NEXT: sd ra, 56(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s0, 48(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s1, 40(sp) # 8-byte Folded Spill
; CHECK-V-NEXT: .cfi_offset ra, -8
; CHECK-V-NEXT: .cfi_offset s0, -16
; CHECK-V-NEXT: .cfi_offset s1, -24
; CHECK-V-NEXT: csrr a0, vlenb
; CHECK-V-NEXT: slli a0, a0, 1
; CHECK-V-NEXT: sub sp, sp, a0
-; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 2 * vlenb
-; CHECK-V-NEXT: addi a0, sp, 48
+; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 2 * vlenb
+; CHECK-V-NEXT: addi a0, sp, 32
; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; CHECK-V-NEXT: vslidedown.vi v9, v8, 1
@@ -2411,7 +2467,7 @@ define <2 x i64> @ustest_f64i64(<2 x double> %x) {
; CHECK-V-NEXT: mv s1, a0
; CHECK-V-NEXT: mv s0, a1
; CHECK-V-NEXT: vsetivli zero, 0, e64, m1, ta, ma
-; CHECK-V-NEXT: addi a0, sp, 48
+; CHECK-V-NEXT: addi a0, sp, 32
; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vfmv.f.s fa0, v8
; CHECK-V-NEXT: call __fixdfti at plt
@@ -2448,22 +2504,17 @@ define <2 x i64> @ustest_f64i64(<2 x double> %x) {
; CHECK-V-NEXT: and a2, a2, a3
; CHECK-V-NEXT: neg a1, a1
; CHECK-V-NEXT: and a0, a1, a0
-; CHECK-V-NEXT: sd a0, 24(sp)
-; CHECK-V-NEXT: sd a2, 32(sp)
-; CHECK-V-NEXT: addi a0, sp, 24
-; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-V-NEXT: vle64.v v8, (a0)
-; CHECK-V-NEXT: addi a0, sp, 32
-; CHECK-V-NEXT: vle64.v v9, (a0)
; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; CHECK-V-NEXT: vmv.s.x v8, a0
+; CHECK-V-NEXT: vmv.s.x v9, a2
; CHECK-V-NEXT: vslideup.vi v8, v9, 1
; CHECK-V-NEXT: csrr a0, vlenb
; CHECK-V-NEXT: slli a0, a0, 1
; CHECK-V-NEXT: add sp, sp, a0
-; CHECK-V-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s1, 56(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: addi sp, sp, 80
+; CHECK-V-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s1, 40(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: addi sp, sp, 64
; CHECK-V-NEXT: ret
entry:
%conv = fptosi <2 x double> %x to <2 x i128>
@@ -2557,19 +2608,19 @@ define <2 x i64> @stest_f32i64(<2 x float> %x) {
;
; CHECK-V-LABEL: stest_f32i64:
; CHECK-V: # %bb.0: # %entry
-; CHECK-V-NEXT: addi sp, sp, -80
-; CHECK-V-NEXT: .cfi_def_cfa_offset 80
-; CHECK-V-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s1, 56(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: addi sp, sp, -64
+; CHECK-V-NEXT: .cfi_def_cfa_offset 64
+; CHECK-V-NEXT: sd ra, 56(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s0, 48(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s1, 40(sp) # 8-byte Folded Spill
; CHECK-V-NEXT: .cfi_offset ra, -8
; CHECK-V-NEXT: .cfi_offset s0, -16
; CHECK-V-NEXT: .cfi_offset s1, -24
; CHECK-V-NEXT: csrr a0, vlenb
; CHECK-V-NEXT: slli a0, a0, 1
; CHECK-V-NEXT: sub sp, sp, a0
-; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 2 * vlenb
-; CHECK-V-NEXT: addi a0, sp, 48
+; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 2 * vlenb
+; CHECK-V-NEXT: addi a0, sp, 32
; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; CHECK-V-NEXT: vslidedown.vi v9, v8, 1
@@ -2578,7 +2629,7 @@ define <2 x i64> @stest_f32i64(<2 x float> %x) {
; CHECK-V-NEXT: mv s0, a0
; CHECK-V-NEXT: mv s1, a1
; CHECK-V-NEXT: vsetivli zero, 0, e32, mf2, ta, ma
-; CHECK-V-NEXT: addi a0, sp, 48
+; CHECK-V-NEXT: addi a0, sp, 32
; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vfmv.f.s fa0, v8
; CHECK-V-NEXT: call __fixsfti at plt
@@ -2634,22 +2685,17 @@ define <2 x i64> @stest_f32i64(<2 x float> %x) {
; CHECK-V-NEXT: # %bb.15: # %entry
; CHECK-V-NEXT: mv a0, a1
; CHECK-V-NEXT: .LBB21_16: # %entry
-; CHECK-V-NEXT: sd a0, 24(sp)
-; CHECK-V-NEXT: sd s0, 32(sp)
-; CHECK-V-NEXT: addi a0, sp, 24
-; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-V-NEXT: vle64.v v8, (a0)
-; CHECK-V-NEXT: addi a0, sp, 32
-; CHECK-V-NEXT: vle64.v v9, (a0)
; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; CHECK-V-NEXT: vmv.s.x v9, s0
+; CHECK-V-NEXT: vmv.s.x v8, a0
; CHECK-V-NEXT: vslideup.vi v8, v9, 1
; CHECK-V-NEXT: csrr a0, vlenb
; CHECK-V-NEXT: slli a0, a0, 1
; CHECK-V-NEXT: add sp, sp, a0
-; CHECK-V-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s1, 56(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: addi sp, sp, 80
+; CHECK-V-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s1, 40(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: addi sp, sp, 64
; CHECK-V-NEXT: ret
entry:
%conv = fptosi <2 x float> %x to <2 x i128>
@@ -2696,19 +2742,19 @@ define <2 x i64> @utest_f32i64(<2 x float> %x) {
;
; CHECK-V-LABEL: utest_f32i64:
; CHECK-V: # %bb.0: # %entry
-; CHECK-V-NEXT: addi sp, sp, -80
-; CHECK-V-NEXT: .cfi_def_cfa_offset 80
-; CHECK-V-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s1, 56(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: addi sp, sp, -64
+; CHECK-V-NEXT: .cfi_def_cfa_offset 64
+; CHECK-V-NEXT: sd ra, 56(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s0, 48(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s1, 40(sp) # 8-byte Folded Spill
; CHECK-V-NEXT: .cfi_offset ra, -8
; CHECK-V-NEXT: .cfi_offset s0, -16
; CHECK-V-NEXT: .cfi_offset s1, -24
; CHECK-V-NEXT: csrr a0, vlenb
; CHECK-V-NEXT: slli a0, a0, 1
; CHECK-V-NEXT: sub sp, sp, a0
-; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 2 * vlenb
-; CHECK-V-NEXT: addi a0, sp, 48
+; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 2 * vlenb
+; CHECK-V-NEXT: addi a0, sp, 32
; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; CHECK-V-NEXT: vslidedown.vi v9, v8, 1
@@ -2717,7 +2763,7 @@ define <2 x i64> @utest_f32i64(<2 x float> %x) {
; CHECK-V-NEXT: mv s0, a0
; CHECK-V-NEXT: mv s1, a1
; CHECK-V-NEXT: vsetivli zero, 0, e32, mf2, ta, ma
-; CHECK-V-NEXT: addi a0, sp, 48
+; CHECK-V-NEXT: addi a0, sp, 32
; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vfmv.f.s fa0, v8
; CHECK-V-NEXT: call __fixunssfti at plt
@@ -2727,22 +2773,17 @@ define <2 x i64> @utest_f32i64(<2 x float> %x) {
; CHECK-V-NEXT: and a2, a2, s0
; CHECK-V-NEXT: addi a1, a1, -1
; CHECK-V-NEXT: and a0, a1, a0
-; CHECK-V-NEXT: sd a0, 24(sp)
-; CHECK-V-NEXT: sd a2, 32(sp)
-; CHECK-V-NEXT: addi a0, sp, 24
-; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-V-NEXT: vle64.v v8, (a0)
-; CHECK-V-NEXT: addi a0, sp, 32
-; CHECK-V-NEXT: vle64.v v9, (a0)
; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; CHECK-V-NEXT: vmv.s.x v9, a2
+; CHECK-V-NEXT: vmv.s.x v8, a0
; CHECK-V-NEXT: vslideup.vi v8, v9, 1
; CHECK-V-NEXT: csrr a0, vlenb
; CHECK-V-NEXT: slli a0, a0, 1
; CHECK-V-NEXT: add sp, sp, a0
-; CHECK-V-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s1, 56(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: addi sp, sp, 80
+; CHECK-V-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s1, 40(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: addi sp, sp, 64
; CHECK-V-NEXT: ret
entry:
%conv = fptoui <2 x float> %x to <2 x i128>
@@ -2813,19 +2854,19 @@ define <2 x i64> @ustest_f32i64(<2 x float> %x) {
;
; CHECK-V-LABEL: ustest_f32i64:
; CHECK-V: # %bb.0: # %entry
-; CHECK-V-NEXT: addi sp, sp, -80
-; CHECK-V-NEXT: .cfi_def_cfa_offset 80
-; CHECK-V-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s1, 56(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: addi sp, sp, -64
+; CHECK-V-NEXT: .cfi_def_cfa_offset 64
+; CHECK-V-NEXT: sd ra, 56(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s0, 48(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s1, 40(sp) # 8-byte Folded Spill
; CHECK-V-NEXT: .cfi_offset ra, -8
; CHECK-V-NEXT: .cfi_offset s0, -16
; CHECK-V-NEXT: .cfi_offset s1, -24
; CHECK-V-NEXT: csrr a0, vlenb
; CHECK-V-NEXT: slli a0, a0, 1
; CHECK-V-NEXT: sub sp, sp, a0
-; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 2 * vlenb
-; CHECK-V-NEXT: addi a0, sp, 48
+; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 2 * vlenb
+; CHECK-V-NEXT: addi a0, sp, 32
; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; CHECK-V-NEXT: vslidedown.vi v9, v8, 1
@@ -2834,7 +2875,7 @@ define <2 x i64> @ustest_f32i64(<2 x float> %x) {
; CHECK-V-NEXT: mv s1, a0
; CHECK-V-NEXT: mv s0, a1
; CHECK-V-NEXT: vsetivli zero, 0, e32, mf2, ta, ma
-; CHECK-V-NEXT: addi a0, sp, 48
+; CHECK-V-NEXT: addi a0, sp, 32
; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vfmv.f.s fa0, v8
; CHECK-V-NEXT: call __fixsfti at plt
@@ -2871,22 +2912,17 @@ define <2 x i64> @ustest_f32i64(<2 x float> %x) {
; CHECK-V-NEXT: and a2, a2, a3
; CHECK-V-NEXT: neg a1, a1
; CHECK-V-NEXT: and a0, a1, a0
-; CHECK-V-NEXT: sd a0, 24(sp)
-; CHECK-V-NEXT: sd a2, 32(sp)
-; CHECK-V-NEXT: addi a0, sp, 24
-; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-V-NEXT: vle64.v v8, (a0)
-; CHECK-V-NEXT: addi a0, sp, 32
-; CHECK-V-NEXT: vle64.v v9, (a0)
; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; CHECK-V-NEXT: vmv.s.x v8, a0
+; CHECK-V-NEXT: vmv.s.x v9, a2
; CHECK-V-NEXT: vslideup.vi v8, v9, 1
; CHECK-V-NEXT: csrr a0, vlenb
; CHECK-V-NEXT: slli a0, a0, 1
; CHECK-V-NEXT: add sp, sp, a0
-; CHECK-V-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s1, 56(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: addi sp, sp, 80
+; CHECK-V-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s1, 40(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: addi sp, sp, 64
; CHECK-V-NEXT: ret
entry:
%conv = fptosi <2 x float> %x to <2 x i128>
@@ -2982,12 +3018,12 @@ define <2 x i64> @stest_f16i64(<2 x half> %x) {
;
; CHECK-V-LABEL: stest_f16i64:
; CHECK-V: # %bb.0: # %entry
-; CHECK-V-NEXT: addi sp, sp, -48
-; CHECK-V-NEXT: .cfi_def_cfa_offset 48
-; CHECK-V-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: addi sp, sp, -32
+; CHECK-V-NEXT: .cfi_def_cfa_offset 32
+; CHECK-V-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
; CHECK-V-NEXT: .cfi_offset ra, -8
; CHECK-V-NEXT: .cfi_offset s0, -16
; CHECK-V-NEXT: .cfi_offset s1, -24
@@ -3051,20 +3087,15 @@ define <2 x i64> @stest_f16i64(<2 x half> %x) {
; CHECK-V-NEXT: # %bb.15: # %entry
; CHECK-V-NEXT: mv a0, a1
; CHECK-V-NEXT: .LBB24_16: # %entry
-; CHECK-V-NEXT: sd a0, 8(sp)
-; CHECK-V-NEXT: sd s0, 0(sp)
-; CHECK-V-NEXT: addi a0, sp, 8
-; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-V-NEXT: vle64.v v9, (a0)
-; CHECK-V-NEXT: mv a0, sp
-; CHECK-V-NEXT: vle64.v v8, (a0)
; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; CHECK-V-NEXT: vmv.s.x v9, a0
+; CHECK-V-NEXT: vmv.s.x v8, s0
; CHECK-V-NEXT: vslideup.vi v8, v9, 1
-; CHECK-V-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: addi sp, sp, 48
+; CHECK-V-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s2, 0(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: addi sp, sp, 32
; CHECK-V-NEXT: ret
entry:
%conv = fptosi <2 x half> %x to <2 x i128>
@@ -3113,12 +3144,12 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) {
;
; CHECK-V-LABEL: utesth_f16i64:
; CHECK-V: # %bb.0: # %entry
-; CHECK-V-NEXT: addi sp, sp, -48
-; CHECK-V-NEXT: .cfi_def_cfa_offset 48
-; CHECK-V-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: addi sp, sp, -32
+; CHECK-V-NEXT: .cfi_def_cfa_offset 32
+; CHECK-V-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
; CHECK-V-NEXT: .cfi_offset ra, -8
; CHECK-V-NEXT: .cfi_offset s0, -16
; CHECK-V-NEXT: .cfi_offset s1, -24
@@ -3137,20 +3168,15 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) {
; CHECK-V-NEXT: and a2, a2, s1
; CHECK-V-NEXT: addi a1, a1, -1
; CHECK-V-NEXT: and a0, a1, a0
-; CHECK-V-NEXT: sd a0, 8(sp)
-; CHECK-V-NEXT: sd a2, 0(sp)
-; CHECK-V-NEXT: addi a0, sp, 8
-; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-V-NEXT: vle64.v v9, (a0)
-; CHECK-V-NEXT: mv a0, sp
-; CHECK-V-NEXT: vle64.v v8, (a0)
; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; CHECK-V-NEXT: vmv.s.x v9, a0
+; CHECK-V-NEXT: vmv.s.x v8, a2
; CHECK-V-NEXT: vslideup.vi v8, v9, 1
-; CHECK-V-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: addi sp, sp, 48
+; CHECK-V-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s2, 0(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: addi sp, sp, 32
; CHECK-V-NEXT: ret
entry:
%conv = fptoui <2 x half> %x to <2 x i128>
@@ -3223,12 +3249,12 @@ define <2 x i64> @ustest_f16i64(<2 x half> %x) {
;
; CHECK-V-LABEL: ustest_f16i64:
; CHECK-V: # %bb.0: # %entry
-; CHECK-V-NEXT: addi sp, sp, -48
-; CHECK-V-NEXT: .cfi_def_cfa_offset 48
-; CHECK-V-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: addi sp, sp, -32
+; CHECK-V-NEXT: .cfi_def_cfa_offset 32
+; CHECK-V-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
; CHECK-V-NEXT: .cfi_offset ra, -8
; CHECK-V-NEXT: .cfi_offset s0, -16
; CHECK-V-NEXT: .cfi_offset s1, -24
@@ -3274,20 +3300,15 @@ define <2 x i64> @ustest_f16i64(<2 x half> %x) {
; CHECK-V-NEXT: and a2, a2, a3
; CHECK-V-NEXT: neg a1, a1
; CHECK-V-NEXT: and a0, a1, a0
-; CHECK-V-NEXT: sd a0, 8(sp)
-; CHECK-V-NEXT: sd a2, 0(sp)
-; CHECK-V-NEXT: addi a0, sp, 8
-; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-V-NEXT: vle64.v v9, (a0)
-; CHECK-V-NEXT: mv a0, sp
-; CHECK-V-NEXT: vle64.v v8, (a0)
; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; CHECK-V-NEXT: vmv.s.x v9, a0
+; CHECK-V-NEXT: vmv.s.x v8, a2
; CHECK-V-NEXT: vslideup.vi v8, v9, 1
-; CHECK-V-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: addi sp, sp, 48
+; CHECK-V-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s2, 0(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: addi sp, sp, 32
; CHECK-V-NEXT: ret
entry:
%conv = fptosi <2 x half> %x to <2 x i128>
@@ -3732,63 +3753,72 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) {
;
; CHECK-V-LABEL: stest_f16i32_mm:
; CHECK-V: # %bb.0: # %entry
-; CHECK-V-NEXT: addi sp, sp, -64
-; CHECK-V-NEXT: .cfi_def_cfa_offset 64
-; CHECK-V-NEXT: sd ra, 56(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s0, 48(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s1, 40(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s2, 32(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: addi sp, sp, -48
+; CHECK-V-NEXT: .cfi_def_cfa_offset 48
+; CHECK-V-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; CHECK-V-NEXT: .cfi_offset ra, -8
; CHECK-V-NEXT: .cfi_offset s0, -16
; CHECK-V-NEXT: .cfi_offset s1, -24
; CHECK-V-NEXT: .cfi_offset s2, -32
+; CHECK-V-NEXT: csrr a1, vlenb
+; CHECK-V-NEXT: slli a1, a1, 1
+; CHECK-V-NEXT: sub sp, sp, a1
+; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 2 * vlenb
; CHECK-V-NEXT: lhu s0, 24(a0)
; CHECK-V-NEXT: lhu s1, 16(a0)
; CHECK-V-NEXT: lhu s2, 0(a0)
; CHECK-V-NEXT: lhu a0, 8(a0)
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
-; CHECK-V-NEXT: sd a0, 24(sp)
+; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-V-NEXT: vmv.s.x v8, a0
+; CHECK-V-NEXT: addi a0, sp, 16
+; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-V-NEXT: mv a0, s2
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
-; CHECK-V-NEXT: sd a0, 0(sp)
+; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-V-NEXT: vmv.s.x v8, a0
+; CHECK-V-NEXT: vsetivli zero, 2, e64, m2, tu, ma
+; CHECK-V-NEXT: addi a0, sp, 16
+; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT: vslideup.vi v8, v10, 1
+; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-V-NEXT: mv a0, s1
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
-; CHECK-V-NEXT: sd a0, 16(sp)
+; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-V-NEXT: vmv.s.x v8, a0
+; CHECK-V-NEXT: vsetivli zero, 3, e64, m2, tu, ma
+; CHECK-V-NEXT: addi a0, sp, 16
+; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT: vslideup.vi v10, v8, 2
+; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
; CHECK-V-NEXT: mv a0, s0
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
-; CHECK-V-NEXT: sd a0, 8(sp)
-; CHECK-V-NEXT: addi a0, sp, 24
-; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-V-NEXT: vle64.v v10, (a0)
-; CHECK-V-NEXT: mv a0, sp
-; CHECK-V-NEXT: vle64.v v8, (a0)
-; CHECK-V-NEXT: vsetivli zero, 2, e64, m2, tu, ma
-; CHECK-V-NEXT: vslideup.vi v8, v10, 1
-; CHECK-V-NEXT: addi a0, sp, 16
-; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-V-NEXT: vle64.v v10, (a0)
-; CHECK-V-NEXT: vsetivli zero, 3, e64, m2, tu, ma
-; CHECK-V-NEXT: vslideup.vi v8, v10, 2
-; CHECK-V-NEXT: addi a0, sp, 8
-; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-V-NEXT: vle64.v v10, (a0)
; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-V-NEXT: vslideup.vi v8, v10, 3
+; CHECK-V-NEXT: vmv.s.x v8, a0
+; CHECK-V-NEXT: addi a0, sp, 16
+; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT: vslideup.vi v10, v8, 3
; CHECK-V-NEXT: lui a0, 524288
; CHECK-V-NEXT: addiw a1, a0, -1
-; CHECK-V-NEXT: vmin.vx v8, v8, a1
+; CHECK-V-NEXT: vmin.vx v8, v10, a1
; CHECK-V-NEXT: vmax.vx v10, v8, a0
; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-V-NEXT: vnsrl.wi v8, v10, 0
-; CHECK-V-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s1, 40(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s2, 32(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: addi sp, sp, 64
+; CHECK-V-NEXT: csrr a0, vlenb
+; CHECK-V-NEXT: slli a0, a0, 1
+; CHECK-V-NEXT: add sp, sp, a0
+; CHECK-V-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: addi sp, sp, 48
; CHECK-V-NEXT: ret
entry:
%conv = fptosi <4 x half> %x to <4 x i64>
@@ -3880,62 +3910,71 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) {
;
; CHECK-V-LABEL: utesth_f16i32_mm:
; CHECK-V: # %bb.0: # %entry
-; CHECK-V-NEXT: addi sp, sp, -64
-; CHECK-V-NEXT: .cfi_def_cfa_offset 64
-; CHECK-V-NEXT: sd ra, 56(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s0, 48(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s1, 40(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s2, 32(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: addi sp, sp, -48
+; CHECK-V-NEXT: .cfi_def_cfa_offset 48
+; CHECK-V-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; CHECK-V-NEXT: .cfi_offset ra, -8
; CHECK-V-NEXT: .cfi_offset s0, -16
; CHECK-V-NEXT: .cfi_offset s1, -24
; CHECK-V-NEXT: .cfi_offset s2, -32
+; CHECK-V-NEXT: csrr a1, vlenb
+; CHECK-V-NEXT: slli a1, a1, 1
+; CHECK-V-NEXT: sub sp, sp, a1
+; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 2 * vlenb
; CHECK-V-NEXT: lhu s0, 24(a0)
; CHECK-V-NEXT: lhu s1, 16(a0)
; CHECK-V-NEXT: lhu s2, 0(a0)
; CHECK-V-NEXT: lhu a0, 8(a0)
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz
-; CHECK-V-NEXT: sd a0, 24(sp)
+; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-V-NEXT: vmv.s.x v8, a0
+; CHECK-V-NEXT: addi a0, sp, 16
+; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-V-NEXT: mv a0, s2
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz
-; CHECK-V-NEXT: sd a0, 0(sp)
+; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-V-NEXT: vmv.s.x v8, a0
+; CHECK-V-NEXT: vsetivli zero, 2, e64, m2, tu, ma
+; CHECK-V-NEXT: addi a0, sp, 16
+; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT: vslideup.vi v8, v10, 1
+; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-V-NEXT: mv a0, s1
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz
-; CHECK-V-NEXT: sd a0, 16(sp)
+; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-V-NEXT: vmv.s.x v8, a0
+; CHECK-V-NEXT: vsetivli zero, 3, e64, m2, tu, ma
+; CHECK-V-NEXT: addi a0, sp, 16
+; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT: vslideup.vi v10, v8, 2
+; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
; CHECK-V-NEXT: mv a0, s0
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz
-; CHECK-V-NEXT: sd a0, 8(sp)
-; CHECK-V-NEXT: addi a0, sp, 24
-; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-V-NEXT: vle64.v v10, (a0)
-; CHECK-V-NEXT: mv a0, sp
-; CHECK-V-NEXT: vle64.v v8, (a0)
-; CHECK-V-NEXT: vsetivli zero, 2, e64, m2, tu, ma
-; CHECK-V-NEXT: vslideup.vi v8, v10, 1
-; CHECK-V-NEXT: addi a0, sp, 16
-; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-V-NEXT: vle64.v v10, (a0)
-; CHECK-V-NEXT: vsetivli zero, 3, e64, m2, tu, ma
-; CHECK-V-NEXT: vslideup.vi v8, v10, 2
-; CHECK-V-NEXT: addi a0, sp, 8
-; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-V-NEXT: vle64.v v10, (a0)
; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-V-NEXT: vslideup.vi v8, v10, 3
+; CHECK-V-NEXT: vmv.s.x v8, a0
+; CHECK-V-NEXT: addi a0, sp, 16
+; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT: vslideup.vi v10, v8, 3
; CHECK-V-NEXT: li a0, -1
; CHECK-V-NEXT: srli a0, a0, 32
-; CHECK-V-NEXT: vminu.vx v10, v8, a0
+; CHECK-V-NEXT: vminu.vx v10, v10, a0
; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-V-NEXT: vnsrl.wi v8, v10, 0
-; CHECK-V-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s1, 40(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s2, 32(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: addi sp, sp, 64
+; CHECK-V-NEXT: csrr a0, vlenb
+; CHECK-V-NEXT: slli a0, a0, 1
+; CHECK-V-NEXT: add sp, sp, a0
+; CHECK-V-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: addi sp, sp, 48
; CHECK-V-NEXT: ret
entry:
%conv = fptoui <4 x half> %x to <4 x i64>
@@ -4038,63 +4077,72 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) {
;
; CHECK-V-LABEL: ustest_f16i32_mm:
; CHECK-V: # %bb.0: # %entry
-; CHECK-V-NEXT: addi sp, sp, -64
-; CHECK-V-NEXT: .cfi_def_cfa_offset 64
-; CHECK-V-NEXT: sd ra, 56(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s0, 48(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s1, 40(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s2, 32(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: addi sp, sp, -48
+; CHECK-V-NEXT: .cfi_def_cfa_offset 48
+; CHECK-V-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; CHECK-V-NEXT: .cfi_offset ra, -8
; CHECK-V-NEXT: .cfi_offset s0, -16
; CHECK-V-NEXT: .cfi_offset s1, -24
; CHECK-V-NEXT: .cfi_offset s2, -32
+; CHECK-V-NEXT: csrr a1, vlenb
+; CHECK-V-NEXT: slli a1, a1, 1
+; CHECK-V-NEXT: sub sp, sp, a1
+; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 2 * vlenb
; CHECK-V-NEXT: lhu s0, 24(a0)
; CHECK-V-NEXT: lhu s1, 16(a0)
; CHECK-V-NEXT: lhu s2, 0(a0)
; CHECK-V-NEXT: lhu a0, 8(a0)
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
-; CHECK-V-NEXT: sd a0, 24(sp)
+; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-V-NEXT: vmv.s.x v8, a0
+; CHECK-V-NEXT: addi a0, sp, 16
+; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-V-NEXT: mv a0, s2
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
-; CHECK-V-NEXT: sd a0, 0(sp)
+; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-V-NEXT: vmv.s.x v8, a0
+; CHECK-V-NEXT: vsetivli zero, 2, e64, m2, tu, ma
+; CHECK-V-NEXT: addi a0, sp, 16
+; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT: vslideup.vi v8, v10, 1
+; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-V-NEXT: mv a0, s1
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
-; CHECK-V-NEXT: sd a0, 16(sp)
+; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-V-NEXT: vmv.s.x v8, a0
+; CHECK-V-NEXT: vsetivli zero, 3, e64, m2, tu, ma
+; CHECK-V-NEXT: addi a0, sp, 16
+; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT: vslideup.vi v10, v8, 2
+; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
; CHECK-V-NEXT: mv a0, s0
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
-; CHECK-V-NEXT: sd a0, 8(sp)
-; CHECK-V-NEXT: addi a0, sp, 24
-; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-V-NEXT: vle64.v v10, (a0)
-; CHECK-V-NEXT: mv a0, sp
-; CHECK-V-NEXT: vle64.v v8, (a0)
-; CHECK-V-NEXT: vsetivli zero, 2, e64, m2, tu, ma
-; CHECK-V-NEXT: vslideup.vi v8, v10, 1
-; CHECK-V-NEXT: addi a0, sp, 16
-; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-V-NEXT: vle64.v v10, (a0)
-; CHECK-V-NEXT: vsetivli zero, 3, e64, m2, tu, ma
-; CHECK-V-NEXT: vslideup.vi v8, v10, 2
-; CHECK-V-NEXT: addi a0, sp, 8
-; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-V-NEXT: vle64.v v10, (a0)
; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-V-NEXT: vslideup.vi v8, v10, 3
+; CHECK-V-NEXT: vmv.s.x v8, a0
+; CHECK-V-NEXT: addi a0, sp, 16
+; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT: vslideup.vi v10, v8, 3
; CHECK-V-NEXT: li a0, -1
; CHECK-V-NEXT: srli a0, a0, 32
-; CHECK-V-NEXT: vmin.vx v8, v8, a0
+; CHECK-V-NEXT: vmin.vx v8, v10, a0
; CHECK-V-NEXT: vmax.vx v10, v8, zero
; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-V-NEXT: vnsrl.wi v8, v10, 0
-; CHECK-V-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s1, 40(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s2, 32(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: addi sp, sp, 64
+; CHECK-V-NEXT: csrr a0, vlenb
+; CHECK-V-NEXT: slli a0, a0, 1
+; CHECK-V-NEXT: add sp, sp, a0
+; CHECK-V-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: addi sp, sp, 48
; CHECK-V-NEXT: ret
entry:
%conv = fptosi <4 x half> %x to <4 x i64>
@@ -4632,16 +4680,16 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) {
;
; CHECK-V-LABEL: stest_f16i16_mm:
; CHECK-V: # %bb.0: # %entry
-; CHECK-V-NEXT: addi sp, sp, -96
-; CHECK-V-NEXT: .cfi_def_cfa_offset 96
-; CHECK-V-NEXT: sd ra, 88(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s0, 80(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s1, 72(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s2, 64(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s3, 56(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s4, 48(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s5, 40(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s6, 32(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: addi sp, sp, -80
+; CHECK-V-NEXT: .cfi_def_cfa_offset 80
+; CHECK-V-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s1, 56(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s2, 48(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s3, 40(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s4, 32(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s5, 24(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s6, 16(sp) # 8-byte Folded Spill
; CHECK-V-NEXT: .cfi_offset ra, -8
; CHECK-V-NEXT: .cfi_offset s0, -16
; CHECK-V-NEXT: .cfi_offset s1, -24
@@ -4650,6 +4698,10 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) {
; CHECK-V-NEXT: .cfi_offset s4, -48
; CHECK-V-NEXT: .cfi_offset s5, -56
; CHECK-V-NEXT: .cfi_offset s6, -64
+; CHECK-V-NEXT: csrr a1, vlenb
+; CHECK-V-NEXT: slli a1, a1, 1
+; CHECK-V-NEXT: sub sp, sp, a1
+; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 2 * vlenb
; CHECK-V-NEXT: lhu s0, 56(a0)
; CHECK-V-NEXT: lhu s1, 48(a0)
; CHECK-V-NEXT: lhu s2, 40(a0)
@@ -4660,88 +4712,97 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) {
; CHECK-V-NEXT: lhu a0, 8(a0)
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
-; CHECK-V-NEXT: sw a0, 28(sp)
+; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; CHECK-V-NEXT: vmv.s.x v8, a0
+; CHECK-V-NEXT: addi a0, sp, 16
+; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-V-NEXT: mv a0, s6
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
-; CHECK-V-NEXT: sw a0, 0(sp)
+; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; CHECK-V-NEXT: vmv.s.x v8, a0
+; CHECK-V-NEXT: vsetivli zero, 2, e32, m2, tu, ma
+; CHECK-V-NEXT: addi a0, sp, 16
+; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT: vslideup.vi v8, v10, 1
+; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-V-NEXT: mv a0, s5
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
-; CHECK-V-NEXT: sw a0, 24(sp)
+; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; CHECK-V-NEXT: vmv.s.x v8, a0
+; CHECK-V-NEXT: vsetivli zero, 3, e32, m2, tu, ma
+; CHECK-V-NEXT: addi a0, sp, 16
+; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT: vslideup.vi v10, v8, 2
+; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
; CHECK-V-NEXT: mv a0, s4
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
-; CHECK-V-NEXT: sw a0, 20(sp)
+; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; CHECK-V-NEXT: vmv.s.x v8, a0
+; CHECK-V-NEXT: vsetivli zero, 4, e32, m2, tu, ma
+; CHECK-V-NEXT: addi a0, sp, 16
+; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT: vslideup.vi v10, v8, 3
+; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
; CHECK-V-NEXT: mv a0, s3
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
-; CHECK-V-NEXT: sw a0, 16(sp)
+; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; CHECK-V-NEXT: vmv.s.x v8, a0
+; CHECK-V-NEXT: vsetivli zero, 5, e32, m2, tu, ma
+; CHECK-V-NEXT: addi a0, sp, 16
+; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT: vslideup.vi v10, v8, 4
+; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
; CHECK-V-NEXT: mv a0, s2
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
-; CHECK-V-NEXT: sw a0, 12(sp)
+; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; CHECK-V-NEXT: vmv.s.x v8, a0
+; CHECK-V-NEXT: vsetivli zero, 6, e32, m2, tu, ma
+; CHECK-V-NEXT: addi a0, sp, 16
+; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT: vslideup.vi v10, v8, 5
+; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
; CHECK-V-NEXT: mv a0, s1
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
-; CHECK-V-NEXT: sw a0, 8(sp)
+; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; CHECK-V-NEXT: vmv.s.x v8, a0
+; CHECK-V-NEXT: vsetivli zero, 7, e32, m2, tu, ma
+; CHECK-V-NEXT: addi a0, sp, 16
+; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT: vslideup.vi v10, v8, 6
+; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
; CHECK-V-NEXT: mv a0, s0
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
-; CHECK-V-NEXT: sw a0, 4(sp)
-; CHECK-V-NEXT: addi a0, sp, 28
-; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-V-NEXT: vle32.v v10, (a0)
-; CHECK-V-NEXT: mv a0, sp
-; CHECK-V-NEXT: vle32.v v8, (a0)
-; CHECK-V-NEXT: vsetivli zero, 2, e32, m2, tu, ma
-; CHECK-V-NEXT: vslideup.vi v8, v10, 1
-; CHECK-V-NEXT: addi a0, sp, 24
-; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-V-NEXT: vle32.v v10, (a0)
-; CHECK-V-NEXT: vsetivli zero, 3, e32, m2, tu, ma
-; CHECK-V-NEXT: vslideup.vi v8, v10, 2
-; CHECK-V-NEXT: addi a0, sp, 20
-; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-V-NEXT: vle32.v v10, (a0)
-; CHECK-V-NEXT: vsetivli zero, 4, e32, m2, tu, ma
-; CHECK-V-NEXT: vslideup.vi v8, v10, 3
-; CHECK-V-NEXT: addi a0, sp, 16
-; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-V-NEXT: vle32.v v10, (a0)
-; CHECK-V-NEXT: vsetivli zero, 5, e32, m2, tu, ma
-; CHECK-V-NEXT: vslideup.vi v8, v10, 4
-; CHECK-V-NEXT: addi a0, sp, 12
-; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-V-NEXT: vle32.v v10, (a0)
-; CHECK-V-NEXT: vsetivli zero, 6, e32, m2, tu, ma
-; CHECK-V-NEXT: vslideup.vi v8, v10, 5
-; CHECK-V-NEXT: addi a0, sp, 8
-; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-V-NEXT: vle32.v v10, (a0)
-; CHECK-V-NEXT: vsetivli zero, 7, e32, m2, tu, ma
-; CHECK-V-NEXT: vslideup.vi v8, v10, 6
-; CHECK-V-NEXT: addi a0, sp, 4
-; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-V-NEXT: vle32.v v10, (a0)
; CHECK-V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-V-NEXT: vslideup.vi v8, v10, 7
+; CHECK-V-NEXT: vmv.s.x v8, a0
+; CHECK-V-NEXT: addi a0, sp, 16
+; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT: vslideup.vi v10, v8, 7
; CHECK-V-NEXT: lui a0, 8
; CHECK-V-NEXT: addiw a0, a0, -1
-; CHECK-V-NEXT: vmin.vx v8, v8, a0
+; CHECK-V-NEXT: vmin.vx v8, v10, a0
; CHECK-V-NEXT: lui a0, 1048568
; CHECK-V-NEXT: vmax.vx v10, v8, a0
; CHECK-V-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-V-NEXT: vnsrl.wi v8, v10, 0
-; CHECK-V-NEXT: ld ra, 88(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s0, 80(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s1, 72(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s2, 64(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s3, 56(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s4, 48(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s5, 40(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s6, 32(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: addi sp, sp, 96
+; CHECK-V-NEXT: csrr a0, vlenb
+; CHECK-V-NEXT: slli a0, a0, 1
+; CHECK-V-NEXT: add sp, sp, a0
+; CHECK-V-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s1, 56(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s2, 48(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s3, 40(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s4, 32(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s5, 24(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s6, 16(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: addi sp, sp, 80
; CHECK-V-NEXT: ret
entry:
%conv = fptosi <8 x half> %x to <8 x i32>
@@ -4903,16 +4964,16 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) {
;
; CHECK-V-LABEL: utesth_f16i16_mm:
; CHECK-V: # %bb.0: # %entry
-; CHECK-V-NEXT: addi sp, sp, -96
-; CHECK-V-NEXT: .cfi_def_cfa_offset 96
-; CHECK-V-NEXT: sd ra, 88(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s0, 80(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s1, 72(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s2, 64(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s3, 56(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s4, 48(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s5, 40(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s6, 32(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: addi sp, sp, -80
+; CHECK-V-NEXT: .cfi_def_cfa_offset 80
+; CHECK-V-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s1, 56(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s2, 48(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s3, 40(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s4, 32(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s5, 24(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s6, 16(sp) # 8-byte Folded Spill
; CHECK-V-NEXT: .cfi_offset ra, -8
; CHECK-V-NEXT: .cfi_offset s0, -16
; CHECK-V-NEXT: .cfi_offset s1, -24
@@ -4921,6 +4982,10 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) {
; CHECK-V-NEXT: .cfi_offset s4, -48
; CHECK-V-NEXT: .cfi_offset s5, -56
; CHECK-V-NEXT: .cfi_offset s6, -64
+; CHECK-V-NEXT: csrr a1, vlenb
+; CHECK-V-NEXT: slli a1, a1, 1
+; CHECK-V-NEXT: sub sp, sp, a1
+; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 2 * vlenb
; CHECK-V-NEXT: lhu s0, 56(a0)
; CHECK-V-NEXT: lhu s1, 48(a0)
; CHECK-V-NEXT: lhu s2, 40(a0)
@@ -4931,86 +4996,95 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) {
; CHECK-V-NEXT: lhu a0, 8(a0)
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz
-; CHECK-V-NEXT: sw a0, 28(sp)
+; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; CHECK-V-NEXT: vmv.s.x v8, a0
+; CHECK-V-NEXT: addi a0, sp, 16
+; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-V-NEXT: mv a0, s6
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz
-; CHECK-V-NEXT: sw a0, 0(sp)
-; CHECK-V-NEXT: mv a0, s5
-; CHECK-V-NEXT: call __extendhfsf2 at plt
-; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz
-; CHECK-V-NEXT: sw a0, 24(sp)
-; CHECK-V-NEXT: mv a0, s4
-; CHECK-V-NEXT: call __extendhfsf2 at plt
-; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz
-; CHECK-V-NEXT: sw a0, 20(sp)
-; CHECK-V-NEXT: mv a0, s3
-; CHECK-V-NEXT: call __extendhfsf2 at plt
-; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz
-; CHECK-V-NEXT: sw a0, 16(sp)
-; CHECK-V-NEXT: mv a0, s2
-; CHECK-V-NEXT: call __extendhfsf2 at plt
-; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz
-; CHECK-V-NEXT: sw a0, 12(sp)
-; CHECK-V-NEXT: mv a0, s1
-; CHECK-V-NEXT: call __extendhfsf2 at plt
-; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz
-; CHECK-V-NEXT: sw a0, 8(sp)
-; CHECK-V-NEXT: mv a0, s0
-; CHECK-V-NEXT: call __extendhfsf2 at plt
-; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz
-; CHECK-V-NEXT: sw a0, 4(sp)
-; CHECK-V-NEXT: addi a0, sp, 28
; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-V-NEXT: vle32.v v10, (a0)
-; CHECK-V-NEXT: mv a0, sp
-; CHECK-V-NEXT: vle32.v v8, (a0)
+; CHECK-V-NEXT: vmv.s.x v8, a0
; CHECK-V-NEXT: vsetivli zero, 2, e32, m2, tu, ma
+; CHECK-V-NEXT: addi a0, sp, 16
+; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v8, v10, 1
-; CHECK-V-NEXT: addi a0, sp, 24
+; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
+; CHECK-V-NEXT: mv a0, s5
+; CHECK-V-NEXT: call __extendhfsf2 at plt
+; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-V-NEXT: vle32.v v10, (a0)
+; CHECK-V-NEXT: vmv.s.x v8, a0
; CHECK-V-NEXT: vsetivli zero, 3, e32, m2, tu, ma
-; CHECK-V-NEXT: vslideup.vi v8, v10, 2
-; CHECK-V-NEXT: addi a0, sp, 20
+; CHECK-V-NEXT: addi a0, sp, 16
+; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT: vslideup.vi v10, v8, 2
+; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
+; CHECK-V-NEXT: mv a0, s4
+; CHECK-V-NEXT: call __extendhfsf2 at plt
+; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-V-NEXT: vle32.v v10, (a0)
+; CHECK-V-NEXT: vmv.s.x v8, a0
; CHECK-V-NEXT: vsetivli zero, 4, e32, m2, tu, ma
-; CHECK-V-NEXT: vslideup.vi v8, v10, 3
; CHECK-V-NEXT: addi a0, sp, 16
+; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT: vslideup.vi v10, v8, 3
+; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
+; CHECK-V-NEXT: mv a0, s3
+; CHECK-V-NEXT: call __extendhfsf2 at plt
+; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-V-NEXT: vle32.v v10, (a0)
+; CHECK-V-NEXT: vmv.s.x v8, a0
; CHECK-V-NEXT: vsetivli zero, 5, e32, m2, tu, ma
-; CHECK-V-NEXT: vslideup.vi v8, v10, 4
-; CHECK-V-NEXT: addi a0, sp, 12
+; CHECK-V-NEXT: addi a0, sp, 16
+; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT: vslideup.vi v10, v8, 4
+; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
+; CHECK-V-NEXT: mv a0, s2
+; CHECK-V-NEXT: call __extendhfsf2 at plt
+; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-V-NEXT: vle32.v v10, (a0)
+; CHECK-V-NEXT: vmv.s.x v8, a0
; CHECK-V-NEXT: vsetivli zero, 6, e32, m2, tu, ma
-; CHECK-V-NEXT: vslideup.vi v8, v10, 5
-; CHECK-V-NEXT: addi a0, sp, 8
+; CHECK-V-NEXT: addi a0, sp, 16
+; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT: vslideup.vi v10, v8, 5
+; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
+; CHECK-V-NEXT: mv a0, s1
+; CHECK-V-NEXT: call __extendhfsf2 at plt
+; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-V-NEXT: vle32.v v10, (a0)
+; CHECK-V-NEXT: vmv.s.x v8, a0
; CHECK-V-NEXT: vsetivli zero, 7, e32, m2, tu, ma
-; CHECK-V-NEXT: vslideup.vi v8, v10, 6
-; CHECK-V-NEXT: addi a0, sp, 4
-; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-V-NEXT: vle32.v v10, (a0)
+; CHECK-V-NEXT: addi a0, sp, 16
+; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT: vslideup.vi v10, v8, 6
+; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
+; CHECK-V-NEXT: mv a0, s0
+; CHECK-V-NEXT: call __extendhfsf2 at plt
+; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-V-NEXT: vslideup.vi v8, v10, 7
+; CHECK-V-NEXT: vmv.s.x v8, a0
+; CHECK-V-NEXT: addi a0, sp, 16
+; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT: vslideup.vi v10, v8, 7
; CHECK-V-NEXT: lui a0, 16
; CHECK-V-NEXT: addiw a0, a0, -1
-; CHECK-V-NEXT: vminu.vx v10, v8, a0
+; CHECK-V-NEXT: vminu.vx v10, v10, a0
; CHECK-V-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-V-NEXT: vnsrl.wi v8, v10, 0
-; CHECK-V-NEXT: ld ra, 88(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s0, 80(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s1, 72(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s2, 64(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s3, 56(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s4, 48(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s5, 40(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s6, 32(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: addi sp, sp, 96
+; CHECK-V-NEXT: csrr a0, vlenb
+; CHECK-V-NEXT: slli a0, a0, 1
+; CHECK-V-NEXT: add sp, sp, a0
+; CHECK-V-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s1, 56(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s2, 48(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s3, 40(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s4, 32(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s5, 24(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s6, 16(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: addi sp, sp, 80
; CHECK-V-NEXT: ret
entry:
%conv = fptoui <8 x half> %x to <8 x i32>
@@ -5197,16 +5271,16 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) {
;
; CHECK-V-LABEL: ustest_f16i16_mm:
; CHECK-V: # %bb.0: # %entry
-; CHECK-V-NEXT: addi sp, sp, -96
-; CHECK-V-NEXT: .cfi_def_cfa_offset 96
-; CHECK-V-NEXT: sd ra, 88(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s0, 80(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s1, 72(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s2, 64(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s3, 56(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s4, 48(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s5, 40(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s6, 32(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: addi sp, sp, -80
+; CHECK-V-NEXT: .cfi_def_cfa_offset 80
+; CHECK-V-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s1, 56(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s2, 48(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s3, 40(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s4, 32(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s5, 24(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s6, 16(sp) # 8-byte Folded Spill
; CHECK-V-NEXT: .cfi_offset ra, -8
; CHECK-V-NEXT: .cfi_offset s0, -16
; CHECK-V-NEXT: .cfi_offset s1, -24
@@ -5215,6 +5289,10 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) {
; CHECK-V-NEXT: .cfi_offset s4, -48
; CHECK-V-NEXT: .cfi_offset s5, -56
; CHECK-V-NEXT: .cfi_offset s6, -64
+; CHECK-V-NEXT: csrr a1, vlenb
+; CHECK-V-NEXT: slli a1, a1, 1
+; CHECK-V-NEXT: sub sp, sp, a1
+; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 2 * vlenb
; CHECK-V-NEXT: lhu s0, 56(a0)
; CHECK-V-NEXT: lhu s1, 48(a0)
; CHECK-V-NEXT: lhu s2, 40(a0)
@@ -5225,87 +5303,96 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) {
; CHECK-V-NEXT: lhu a0, 8(a0)
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
-; CHECK-V-NEXT: sw a0, 28(sp)
+; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; CHECK-V-NEXT: vmv.s.x v8, a0
+; CHECK-V-NEXT: addi a0, sp, 16
+; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-V-NEXT: mv a0, s6
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
-; CHECK-V-NEXT: sw a0, 0(sp)
+; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; CHECK-V-NEXT: vmv.s.x v8, a0
+; CHECK-V-NEXT: vsetivli zero, 2, e32, m2, tu, ma
+; CHECK-V-NEXT: addi a0, sp, 16
+; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT: vslideup.vi v8, v10, 1
+; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-V-NEXT: mv a0, s5
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
-; CHECK-V-NEXT: sw a0, 24(sp)
+; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; CHECK-V-NEXT: vmv.s.x v8, a0
+; CHECK-V-NEXT: vsetivli zero, 3, e32, m2, tu, ma
+; CHECK-V-NEXT: addi a0, sp, 16
+; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT: vslideup.vi v10, v8, 2
+; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
; CHECK-V-NEXT: mv a0, s4
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
-; CHECK-V-NEXT: sw a0, 20(sp)
+; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; CHECK-V-NEXT: vmv.s.x v8, a0
+; CHECK-V-NEXT: vsetivli zero, 4, e32, m2, tu, ma
+; CHECK-V-NEXT: addi a0, sp, 16
+; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT: vslideup.vi v10, v8, 3
+; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
; CHECK-V-NEXT: mv a0, s3
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
-; CHECK-V-NEXT: sw a0, 16(sp)
+; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; CHECK-V-NEXT: vmv.s.x v8, a0
+; CHECK-V-NEXT: vsetivli zero, 5, e32, m2, tu, ma
+; CHECK-V-NEXT: addi a0, sp, 16
+; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT: vslideup.vi v10, v8, 4
+; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
; CHECK-V-NEXT: mv a0, s2
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
-; CHECK-V-NEXT: sw a0, 12(sp)
+; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; CHECK-V-NEXT: vmv.s.x v8, a0
+; CHECK-V-NEXT: vsetivli zero, 6, e32, m2, tu, ma
+; CHECK-V-NEXT: addi a0, sp, 16
+; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT: vslideup.vi v10, v8, 5
+; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
; CHECK-V-NEXT: mv a0, s1
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
-; CHECK-V-NEXT: sw a0, 8(sp)
+; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; CHECK-V-NEXT: vmv.s.x v8, a0
+; CHECK-V-NEXT: vsetivli zero, 7, e32, m2, tu, ma
+; CHECK-V-NEXT: addi a0, sp, 16
+; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT: vslideup.vi v10, v8, 6
+; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
; CHECK-V-NEXT: mv a0, s0
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
-; CHECK-V-NEXT: sw a0, 4(sp)
-; CHECK-V-NEXT: addi a0, sp, 28
-; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-V-NEXT: vle32.v v10, (a0)
-; CHECK-V-NEXT: mv a0, sp
-; CHECK-V-NEXT: vle32.v v8, (a0)
-; CHECK-V-NEXT: vsetivli zero, 2, e32, m2, tu, ma
-; CHECK-V-NEXT: vslideup.vi v8, v10, 1
-; CHECK-V-NEXT: addi a0, sp, 24
-; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-V-NEXT: vle32.v v10, (a0)
-; CHECK-V-NEXT: vsetivli zero, 3, e32, m2, tu, ma
-; CHECK-V-NEXT: vslideup.vi v8, v10, 2
-; CHECK-V-NEXT: addi a0, sp, 20
-; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-V-NEXT: vle32.v v10, (a0)
-; CHECK-V-NEXT: vsetivli zero, 4, e32, m2, tu, ma
-; CHECK-V-NEXT: vslideup.vi v8, v10, 3
-; CHECK-V-NEXT: addi a0, sp, 16
-; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-V-NEXT: vle32.v v10, (a0)
-; CHECK-V-NEXT: vsetivli zero, 5, e32, m2, tu, ma
-; CHECK-V-NEXT: vslideup.vi v8, v10, 4
-; CHECK-V-NEXT: addi a0, sp, 12
-; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-V-NEXT: vle32.v v10, (a0)
-; CHECK-V-NEXT: vsetivli zero, 6, e32, m2, tu, ma
-; CHECK-V-NEXT: vslideup.vi v8, v10, 5
-; CHECK-V-NEXT: addi a0, sp, 8
-; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-V-NEXT: vle32.v v10, (a0)
-; CHECK-V-NEXT: vsetivli zero, 7, e32, m2, tu, ma
-; CHECK-V-NEXT: vslideup.vi v8, v10, 6
-; CHECK-V-NEXT: addi a0, sp, 4
-; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-V-NEXT: vle32.v v10, (a0)
; CHECK-V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-V-NEXT: vslideup.vi v8, v10, 7
+; CHECK-V-NEXT: vmv.s.x v8, a0
+; CHECK-V-NEXT: addi a0, sp, 16
+; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT: vslideup.vi v10, v8, 7
; CHECK-V-NEXT: lui a0, 16
; CHECK-V-NEXT: addiw a0, a0, -1
-; CHECK-V-NEXT: vmin.vx v8, v8, a0
+; CHECK-V-NEXT: vmin.vx v8, v10, a0
; CHECK-V-NEXT: vmax.vx v10, v8, zero
; CHECK-V-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-V-NEXT: vnsrl.wi v8, v10, 0
-; CHECK-V-NEXT: ld ra, 88(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s0, 80(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s1, 72(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s2, 64(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s3, 56(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s4, 48(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s5, 40(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s6, 32(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: addi sp, sp, 96
+; CHECK-V-NEXT: csrr a0, vlenb
+; CHECK-V-NEXT: slli a0, a0, 1
+; CHECK-V-NEXT: add sp, sp, a0
+; CHECK-V-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s1, 56(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s2, 48(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s3, 40(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s4, 32(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s5, 24(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s6, 16(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: addi sp, sp, 80
; CHECK-V-NEXT: ret
entry:
%conv = fptosi <8 x half> %x to <8 x i32>
@@ -5425,19 +5512,19 @@ define <2 x i64> @stest_f64i64_mm(<2 x double> %x) {
;
; CHECK-V-LABEL: stest_f64i64_mm:
; CHECK-V: # %bb.0: # %entry
-; CHECK-V-NEXT: addi sp, sp, -80
-; CHECK-V-NEXT: .cfi_def_cfa_offset 80
-; CHECK-V-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s1, 56(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: addi sp, sp, -64
+; CHECK-V-NEXT: .cfi_def_cfa_offset 64
+; CHECK-V-NEXT: sd ra, 56(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s0, 48(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s1, 40(sp) # 8-byte Folded Spill
; CHECK-V-NEXT: .cfi_offset ra, -8
; CHECK-V-NEXT: .cfi_offset s0, -16
; CHECK-V-NEXT: .cfi_offset s1, -24
; CHECK-V-NEXT: csrr a0, vlenb
; CHECK-V-NEXT: slli a0, a0, 1
; CHECK-V-NEXT: sub sp, sp, a0
-; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 2 * vlenb
-; CHECK-V-NEXT: addi a0, sp, 48
+; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 2 * vlenb
+; CHECK-V-NEXT: addi a0, sp, 32
; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; CHECK-V-NEXT: vslidedown.vi v9, v8, 1
@@ -5446,7 +5533,7 @@ define <2 x i64> @stest_f64i64_mm(<2 x double> %x) {
; CHECK-V-NEXT: mv s0, a0
; CHECK-V-NEXT: mv s1, a1
; CHECK-V-NEXT: vsetivli zero, 0, e64, m1, ta, ma
-; CHECK-V-NEXT: addi a0, sp, 48
+; CHECK-V-NEXT: addi a0, sp, 32
; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vfmv.f.s fa0, v8
; CHECK-V-NEXT: call __fixdfti at plt
@@ -5491,22 +5578,17 @@ define <2 x i64> @stest_f64i64_mm(<2 x double> %x) {
; CHECK-V-NEXT: .LBB45_13: # %entry
; CHECK-V-NEXT: mv a0, a4
; CHECK-V-NEXT: .LBB45_14: # %entry
-; CHECK-V-NEXT: sd a0, 24(sp)
-; CHECK-V-NEXT: sd s0, 32(sp)
-; CHECK-V-NEXT: addi a0, sp, 24
-; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-V-NEXT: vle64.v v8, (a0)
-; CHECK-V-NEXT: addi a0, sp, 32
-; CHECK-V-NEXT: vle64.v v9, (a0)
; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; CHECK-V-NEXT: vmv.s.x v9, s0
+; CHECK-V-NEXT: vmv.s.x v8, a0
; CHECK-V-NEXT: vslideup.vi v8, v9, 1
; CHECK-V-NEXT: csrr a0, vlenb
; CHECK-V-NEXT: slli a0, a0, 1
; CHECK-V-NEXT: add sp, sp, a0
-; CHECK-V-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s1, 56(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: addi sp, sp, 80
+; CHECK-V-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s1, 40(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: addi sp, sp, 64
; CHECK-V-NEXT: ret
; CHECK-V-NEXT: .LBB45_15: # %entry
; CHECK-V-NEXT: mv a4, a3
@@ -5595,19 +5677,19 @@ define <2 x i64> @utest_f64i64_mm(<2 x double> %x) {
;
; CHECK-V-LABEL: utest_f64i64_mm:
; CHECK-V: # %bb.0: # %entry
-; CHECK-V-NEXT: addi sp, sp, -80
-; CHECK-V-NEXT: .cfi_def_cfa_offset 80
-; CHECK-V-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s1, 56(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: addi sp, sp, -64
+; CHECK-V-NEXT: .cfi_def_cfa_offset 64
+; CHECK-V-NEXT: sd ra, 56(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s0, 48(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s1, 40(sp) # 8-byte Folded Spill
; CHECK-V-NEXT: .cfi_offset ra, -8
; CHECK-V-NEXT: .cfi_offset s0, -16
; CHECK-V-NEXT: .cfi_offset s1, -24
; CHECK-V-NEXT: csrr a0, vlenb
; CHECK-V-NEXT: slli a0, a0, 1
; CHECK-V-NEXT: sub sp, sp, a0
-; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 2 * vlenb
-; CHECK-V-NEXT: addi a0, sp, 48
+; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 2 * vlenb
+; CHECK-V-NEXT: addi a0, sp, 32
; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; CHECK-V-NEXT: vslidedown.vi v9, v8, 1
@@ -5616,7 +5698,7 @@ define <2 x i64> @utest_f64i64_mm(<2 x double> %x) {
; CHECK-V-NEXT: mv s0, a0
; CHECK-V-NEXT: mv s1, a1
; CHECK-V-NEXT: vsetivli zero, 0, e64, m1, ta, ma
-; CHECK-V-NEXT: addi a0, sp, 48
+; CHECK-V-NEXT: addi a0, sp, 32
; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vfmv.f.s fa0, v8
; CHECK-V-NEXT: call __fixunsdfti at plt
@@ -5634,22 +5716,17 @@ define <2 x i64> @utest_f64i64_mm(<2 x double> %x) {
; CHECK-V-NEXT: seqz a1, a1
; CHECK-V-NEXT: addi a1, a1, -1
; CHECK-V-NEXT: and a0, a1, a0
-; CHECK-V-NEXT: sd a0, 24(sp)
-; CHECK-V-NEXT: sd a2, 32(sp)
-; CHECK-V-NEXT: addi a0, sp, 24
-; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-V-NEXT: vle64.v v8, (a0)
-; CHECK-V-NEXT: addi a0, sp, 32
-; CHECK-V-NEXT: vle64.v v9, (a0)
; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; CHECK-V-NEXT: vmv.s.x v9, a2
+; CHECK-V-NEXT: vmv.s.x v8, a0
; CHECK-V-NEXT: vslideup.vi v8, v9, 1
; CHECK-V-NEXT: csrr a0, vlenb
; CHECK-V-NEXT: slli a0, a0, 1
; CHECK-V-NEXT: add sp, sp, a0
-; CHECK-V-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s1, 56(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: addi sp, sp, 80
+; CHECK-V-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s1, 40(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: addi sp, sp, 64
; CHECK-V-NEXT: ret
entry:
%conv = fptoui <2 x double> %x to <2 x i128>
@@ -5722,19 +5799,19 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) {
;
; CHECK-V-LABEL: ustest_f64i64_mm:
; CHECK-V: # %bb.0: # %entry
-; CHECK-V-NEXT: addi sp, sp, -80
-; CHECK-V-NEXT: .cfi_def_cfa_offset 80
-; CHECK-V-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s1, 56(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: addi sp, sp, -64
+; CHECK-V-NEXT: .cfi_def_cfa_offset 64
+; CHECK-V-NEXT: sd ra, 56(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s0, 48(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s1, 40(sp) # 8-byte Folded Spill
; CHECK-V-NEXT: .cfi_offset ra, -8
; CHECK-V-NEXT: .cfi_offset s0, -16
; CHECK-V-NEXT: .cfi_offset s1, -24
; CHECK-V-NEXT: csrr a0, vlenb
; CHECK-V-NEXT: slli a0, a0, 1
; CHECK-V-NEXT: sub sp, sp, a0
-; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 2 * vlenb
-; CHECK-V-NEXT: addi a0, sp, 48
+; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 2 * vlenb
+; CHECK-V-NEXT: addi a0, sp, 32
; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; CHECK-V-NEXT: vslidedown.vi v9, v8, 1
@@ -5743,7 +5820,7 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) {
; CHECK-V-NEXT: mv s1, a0
; CHECK-V-NEXT: mv s0, a1
; CHECK-V-NEXT: vsetivli zero, 0, e64, m1, ta, ma
-; CHECK-V-NEXT: addi a0, sp, 48
+; CHECK-V-NEXT: addi a0, sp, 32
; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vfmv.f.s fa0, v8
; CHECK-V-NEXT: call __fixdfti at plt
@@ -5782,22 +5859,17 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) {
; CHECK-V-NEXT: neg a2, a2
; CHECK-V-NEXT: and a1, a2, a1
; CHECK-V-NEXT: .LBB47_8: # %entry
-; CHECK-V-NEXT: sd a1, 24(sp)
-; CHECK-V-NEXT: sd a0, 32(sp)
-; CHECK-V-NEXT: addi a0, sp, 24
-; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-V-NEXT: vle64.v v8, (a0)
-; CHECK-V-NEXT: addi a0, sp, 32
-; CHECK-V-NEXT: vle64.v v9, (a0)
; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; CHECK-V-NEXT: vmv.s.x v8, a1
+; CHECK-V-NEXT: vmv.s.x v9, a0
; CHECK-V-NEXT: vslideup.vi v8, v9, 1
; CHECK-V-NEXT: csrr a0, vlenb
; CHECK-V-NEXT: slli a0, a0, 1
; CHECK-V-NEXT: add sp, sp, a0
-; CHECK-V-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s1, 56(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: addi sp, sp, 80
+; CHECK-V-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s1, 40(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: addi sp, sp, 64
; CHECK-V-NEXT: ret
entry:
%conv = fptosi <2 x double> %x to <2 x i128>
@@ -5915,19 +5987,19 @@ define <2 x i64> @stest_f32i64_mm(<2 x float> %x) {
;
; CHECK-V-LABEL: stest_f32i64_mm:
; CHECK-V: # %bb.0: # %entry
-; CHECK-V-NEXT: addi sp, sp, -80
-; CHECK-V-NEXT: .cfi_def_cfa_offset 80
-; CHECK-V-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s1, 56(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: addi sp, sp, -64
+; CHECK-V-NEXT: .cfi_def_cfa_offset 64
+; CHECK-V-NEXT: sd ra, 56(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s0, 48(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s1, 40(sp) # 8-byte Folded Spill
; CHECK-V-NEXT: .cfi_offset ra, -8
; CHECK-V-NEXT: .cfi_offset s0, -16
; CHECK-V-NEXT: .cfi_offset s1, -24
; CHECK-V-NEXT: csrr a0, vlenb
; CHECK-V-NEXT: slli a0, a0, 1
; CHECK-V-NEXT: sub sp, sp, a0
-; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 2 * vlenb
-; CHECK-V-NEXT: addi a0, sp, 48
+; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 2 * vlenb
+; CHECK-V-NEXT: addi a0, sp, 32
; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; CHECK-V-NEXT: vslidedown.vi v9, v8, 1
@@ -5936,7 +6008,7 @@ define <2 x i64> @stest_f32i64_mm(<2 x float> %x) {
; CHECK-V-NEXT: mv s0, a0
; CHECK-V-NEXT: mv s1, a1
; CHECK-V-NEXT: vsetivli zero, 0, e32, mf2, ta, ma
-; CHECK-V-NEXT: addi a0, sp, 48
+; CHECK-V-NEXT: addi a0, sp, 32
; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vfmv.f.s fa0, v8
; CHECK-V-NEXT: call __fixsfti at plt
@@ -5981,22 +6053,17 @@ define <2 x i64> @stest_f32i64_mm(<2 x float> %x) {
; CHECK-V-NEXT: .LBB48_13: # %entry
; CHECK-V-NEXT: mv a0, a4
; CHECK-V-NEXT: .LBB48_14: # %entry
-; CHECK-V-NEXT: sd a0, 24(sp)
-; CHECK-V-NEXT: sd s0, 32(sp)
-; CHECK-V-NEXT: addi a0, sp, 24
-; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-V-NEXT: vle64.v v8, (a0)
-; CHECK-V-NEXT: addi a0, sp, 32
-; CHECK-V-NEXT: vle64.v v9, (a0)
; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; CHECK-V-NEXT: vmv.s.x v9, s0
+; CHECK-V-NEXT: vmv.s.x v8, a0
; CHECK-V-NEXT: vslideup.vi v8, v9, 1
; CHECK-V-NEXT: csrr a0, vlenb
; CHECK-V-NEXT: slli a0, a0, 1
; CHECK-V-NEXT: add sp, sp, a0
-; CHECK-V-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s1, 56(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: addi sp, sp, 80
+; CHECK-V-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s1, 40(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: addi sp, sp, 64
; CHECK-V-NEXT: ret
; CHECK-V-NEXT: .LBB48_15: # %entry
; CHECK-V-NEXT: mv a4, a3
@@ -6085,19 +6152,19 @@ define <2 x i64> @utest_f32i64_mm(<2 x float> %x) {
;
; CHECK-V-LABEL: utest_f32i64_mm:
; CHECK-V: # %bb.0: # %entry
-; CHECK-V-NEXT: addi sp, sp, -80
-; CHECK-V-NEXT: .cfi_def_cfa_offset 80
-; CHECK-V-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s1, 56(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: addi sp, sp, -64
+; CHECK-V-NEXT: .cfi_def_cfa_offset 64
+; CHECK-V-NEXT: sd ra, 56(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s0, 48(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s1, 40(sp) # 8-byte Folded Spill
; CHECK-V-NEXT: .cfi_offset ra, -8
; CHECK-V-NEXT: .cfi_offset s0, -16
; CHECK-V-NEXT: .cfi_offset s1, -24
; CHECK-V-NEXT: csrr a0, vlenb
; CHECK-V-NEXT: slli a0, a0, 1
; CHECK-V-NEXT: sub sp, sp, a0
-; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 2 * vlenb
-; CHECK-V-NEXT: addi a0, sp, 48
+; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 2 * vlenb
+; CHECK-V-NEXT: addi a0, sp, 32
; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; CHECK-V-NEXT: vslidedown.vi v9, v8, 1
@@ -6106,7 +6173,7 @@ define <2 x i64> @utest_f32i64_mm(<2 x float> %x) {
; CHECK-V-NEXT: mv s0, a0
; CHECK-V-NEXT: mv s1, a1
; CHECK-V-NEXT: vsetivli zero, 0, e32, mf2, ta, ma
-; CHECK-V-NEXT: addi a0, sp, 48
+; CHECK-V-NEXT: addi a0, sp, 32
; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vfmv.f.s fa0, v8
; CHECK-V-NEXT: call __fixunssfti at plt
@@ -6124,22 +6191,17 @@ define <2 x i64> @utest_f32i64_mm(<2 x float> %x) {
; CHECK-V-NEXT: seqz a1, a1
; CHECK-V-NEXT: addi a1, a1, -1
; CHECK-V-NEXT: and a0, a1, a0
-; CHECK-V-NEXT: sd a0, 24(sp)
-; CHECK-V-NEXT: sd a2, 32(sp)
-; CHECK-V-NEXT: addi a0, sp, 24
-; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-V-NEXT: vle64.v v8, (a0)
-; CHECK-V-NEXT: addi a0, sp, 32
-; CHECK-V-NEXT: vle64.v v9, (a0)
; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; CHECK-V-NEXT: vmv.s.x v9, a2
+; CHECK-V-NEXT: vmv.s.x v8, a0
; CHECK-V-NEXT: vslideup.vi v8, v9, 1
; CHECK-V-NEXT: csrr a0, vlenb
; CHECK-V-NEXT: slli a0, a0, 1
; CHECK-V-NEXT: add sp, sp, a0
-; CHECK-V-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s1, 56(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: addi sp, sp, 80
+; CHECK-V-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s1, 40(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: addi sp, sp, 64
; CHECK-V-NEXT: ret
entry:
%conv = fptoui <2 x float> %x to <2 x i128>
@@ -6212,19 +6274,19 @@ define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) {
;
; CHECK-V-LABEL: ustest_f32i64_mm:
; CHECK-V: # %bb.0: # %entry
-; CHECK-V-NEXT: addi sp, sp, -80
-; CHECK-V-NEXT: .cfi_def_cfa_offset 80
-; CHECK-V-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s1, 56(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: addi sp, sp, -64
+; CHECK-V-NEXT: .cfi_def_cfa_offset 64
+; CHECK-V-NEXT: sd ra, 56(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s0, 48(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s1, 40(sp) # 8-byte Folded Spill
; CHECK-V-NEXT: .cfi_offset ra, -8
; CHECK-V-NEXT: .cfi_offset s0, -16
; CHECK-V-NEXT: .cfi_offset s1, -24
; CHECK-V-NEXT: csrr a0, vlenb
; CHECK-V-NEXT: slli a0, a0, 1
; CHECK-V-NEXT: sub sp, sp, a0
-; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 2 * vlenb
-; CHECK-V-NEXT: addi a0, sp, 48
+; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 2 * vlenb
+; CHECK-V-NEXT: addi a0, sp, 32
; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; CHECK-V-NEXT: vslidedown.vi v9, v8, 1
@@ -6233,7 +6295,7 @@ define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) {
; CHECK-V-NEXT: mv s1, a0
; CHECK-V-NEXT: mv s0, a1
; CHECK-V-NEXT: vsetivli zero, 0, e32, mf2, ta, ma
-; CHECK-V-NEXT: addi a0, sp, 48
+; CHECK-V-NEXT: addi a0, sp, 32
; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vfmv.f.s fa0, v8
; CHECK-V-NEXT: call __fixsfti at plt
@@ -6272,22 +6334,17 @@ define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) {
; CHECK-V-NEXT: neg a2, a2
; CHECK-V-NEXT: and a1, a2, a1
; CHECK-V-NEXT: .LBB50_8: # %entry
-; CHECK-V-NEXT: sd a1, 24(sp)
-; CHECK-V-NEXT: sd a0, 32(sp)
-; CHECK-V-NEXT: addi a0, sp, 24
-; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-V-NEXT: vle64.v v8, (a0)
-; CHECK-V-NEXT: addi a0, sp, 32
-; CHECK-V-NEXT: vle64.v v9, (a0)
; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; CHECK-V-NEXT: vmv.s.x v8, a1
+; CHECK-V-NEXT: vmv.s.x v9, a0
; CHECK-V-NEXT: vslideup.vi v8, v9, 1
; CHECK-V-NEXT: csrr a0, vlenb
; CHECK-V-NEXT: slli a0, a0, 1
; CHECK-V-NEXT: add sp, sp, a0
-; CHECK-V-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s1, 56(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: addi sp, sp, 80
+; CHECK-V-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s1, 40(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: addi sp, sp, 64
; CHECK-V-NEXT: ret
entry:
%conv = fptosi <2 x float> %x to <2 x i128>
@@ -6407,12 +6464,12 @@ define <2 x i64> @stest_f16i64_mm(<2 x half> %x) {
;
; CHECK-V-LABEL: stest_f16i64_mm:
; CHECK-V: # %bb.0: # %entry
-; CHECK-V-NEXT: addi sp, sp, -48
-; CHECK-V-NEXT: .cfi_def_cfa_offset 48
-; CHECK-V-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: addi sp, sp, -32
+; CHECK-V-NEXT: .cfi_def_cfa_offset 32
+; CHECK-V-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
; CHECK-V-NEXT: .cfi_offset ra, -8
; CHECK-V-NEXT: .cfi_offset s0, -16
; CHECK-V-NEXT: .cfi_offset s1, -24
@@ -6466,20 +6523,15 @@ define <2 x i64> @stest_f16i64_mm(<2 x half> %x) {
; CHECK-V-NEXT: .LBB51_13: # %entry
; CHECK-V-NEXT: mv a0, a4
; CHECK-V-NEXT: .LBB51_14: # %entry
-; CHECK-V-NEXT: sd a0, 8(sp)
-; CHECK-V-NEXT: sd s0, 0(sp)
-; CHECK-V-NEXT: addi a0, sp, 8
-; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-V-NEXT: vle64.v v9, (a0)
-; CHECK-V-NEXT: mv a0, sp
-; CHECK-V-NEXT: vle64.v v8, (a0)
; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; CHECK-V-NEXT: vmv.s.x v9, a0
+; CHECK-V-NEXT: vmv.s.x v8, s0
; CHECK-V-NEXT: vslideup.vi v8, v9, 1
-; CHECK-V-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: addi sp, sp, 48
+; CHECK-V-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s2, 0(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: addi sp, sp, 32
; CHECK-V-NEXT: ret
; CHECK-V-NEXT: .LBB51_15: # %entry
; CHECK-V-NEXT: mv a4, a3
@@ -6570,12 +6622,12 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) {
;
; CHECK-V-LABEL: utesth_f16i64_mm:
; CHECK-V: # %bb.0: # %entry
-; CHECK-V-NEXT: addi sp, sp, -48
-; CHECK-V-NEXT: .cfi_def_cfa_offset 48
-; CHECK-V-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: addi sp, sp, -32
+; CHECK-V-NEXT: .cfi_def_cfa_offset 32
+; CHECK-V-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
; CHECK-V-NEXT: .cfi_offset ra, -8
; CHECK-V-NEXT: .cfi_offset s0, -16
; CHECK-V-NEXT: .cfi_offset s1, -24
@@ -6603,20 +6655,15 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) {
; CHECK-V-NEXT: seqz a2, s2
; CHECK-V-NEXT: addi a2, a2, -1
; CHECK-V-NEXT: and a1, a2, a1
-; CHECK-V-NEXT: sd a1, 8(sp)
-; CHECK-V-NEXT: sd a0, 0(sp)
-; CHECK-V-NEXT: addi a0, sp, 8
-; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-V-NEXT: vle64.v v9, (a0)
-; CHECK-V-NEXT: mv a0, sp
-; CHECK-V-NEXT: vle64.v v8, (a0)
; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; CHECK-V-NEXT: vmv.s.x v9, a1
+; CHECK-V-NEXT: vmv.s.x v8, a0
; CHECK-V-NEXT: vslideup.vi v8, v9, 1
-; CHECK-V-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: addi sp, sp, 48
+; CHECK-V-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s2, 0(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: addi sp, sp, 32
; CHECK-V-NEXT: ret
entry:
%conv = fptoui <2 x half> %x to <2 x i128>
@@ -6691,12 +6738,12 @@ define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) {
;
; CHECK-V-LABEL: ustest_f16i64_mm:
; CHECK-V: # %bb.0: # %entry
-; CHECK-V-NEXT: addi sp, sp, -48
-; CHECK-V-NEXT: .cfi_def_cfa_offset 48
-; CHECK-V-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; CHECK-V-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: addi sp, sp, -32
+; CHECK-V-NEXT: .cfi_def_cfa_offset 32
+; CHECK-V-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
; CHECK-V-NEXT: .cfi_offset ra, -8
; CHECK-V-NEXT: .cfi_offset s0, -16
; CHECK-V-NEXT: .cfi_offset s1, -24
@@ -6745,20 +6792,15 @@ define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) {
; CHECK-V-NEXT: neg a2, a2
; CHECK-V-NEXT: and a1, a2, a1
; CHECK-V-NEXT: .LBB53_8: # %entry
-; CHECK-V-NEXT: sd a1, 8(sp)
-; CHECK-V-NEXT: sd a0, 0(sp)
-; CHECK-V-NEXT: addi a0, sp, 8
-; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-V-NEXT: vle64.v v9, (a0)
-; CHECK-V-NEXT: mv a0, sp
-; CHECK-V-NEXT: vle64.v v8, (a0)
; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; CHECK-V-NEXT: vmv.s.x v9, a1
+; CHECK-V-NEXT: vmv.s.x v8, a0
; CHECK-V-NEXT: vslideup.vi v8, v9, 1
-; CHECK-V-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; CHECK-V-NEXT: addi sp, sp, 48
+; CHECK-V-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s2, 0(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: addi sp, sp, 32
; CHECK-V-NEXT: ret
entry:
%conv = fptosi <2 x half> %x to <2 x i128>
More information about the llvm-commits
mailing list