[llvm] [RISCV] Bitcast fixed length bf16/f16 build_vector to i16 with Zvfbfmin/Zvfhmin+Zfbfmin/Zfhmin. (PR #106637)
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 29 18:52:54 PDT 2024
https://github.com/topperc updated https://github.com/llvm/llvm-project/pull/106637
>From f1c7b90954389174a078944cf87315b3ae9492cb Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Thu, 29 Aug 2024 14:47:19 -0700
Subject: [PATCH 1/2] [RISCV] Bitcast fixed length bf16/f16 build_vector to i16
with Zvfbfmin/Zvfhmin+Zfbfmin/Zfhmin.
Previously we only handled build_vectors that could be turned into
splat_vectors. And we promoted them to f32 splats by extending in
the scalar domain and narrowing in the vector domain.
This fixes a crash where we failed to account for whether the f32
vector type fit in LMUL<=8.
Because this occurs after type legalization, we have to be careful
to use XLenVT for the scalar integer type and use custom cast nodes.
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 61 +-
.../rvv/fixed-vectors-fp-buildvec-bf16.ll | 175 ++
.../RISCV/rvv/fixed-vectors-fp-buildvec.ll | 151 +-
.../RISCV/rvv/fixed-vectors-fp-setcc.ll | 160 +-
.../RISCV/rvv/fixed-vectors-fp-splat-bf16.ll | 45 +-
.../RISCV/rvv/fixed-vectors-fp-splat.ll | 89 +-
.../CodeGen/RISCV/rvv/fixed-vectors-fp.ll | 1905 +++++++++--------
.../RISCV/rvv/fixed-vectors-setcc-fp-vp.ll | 196 +-
.../RISCV/rvv/fixed-vectors-vfadd-vp.ll | 80 +-
.../RISCV/rvv/fixed-vectors-vfdiv-vp.ll | 80 +-
.../RISCV/rvv/fixed-vectors-vfma-vp.ll | 104 +-
.../CodeGen/RISCV/rvv/fixed-vectors-vfmax.ll | 80 +-
.../CodeGen/RISCV/rvv/fixed-vectors-vfmin.ll | 80 +-
.../RISCV/rvv/fixed-vectors-vfmul-vp.ll | 80 +-
.../RISCV/rvv/fixed-vectors-vfsub-vp.ll | 80 +-
.../RISCV/rvv/fixed-vectors-vpmerge-bf16.ll | 40 +-
.../RISCV/rvv/fixed-vectors-vpmerge.ll | 40 +-
17 files changed, 1768 insertions(+), 1678 deletions(-)
create mode 100644 llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec-bf16.ll
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 09928dcc1f489a..f11edc34dfbf5a 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1255,6 +1255,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
if (VT.getVectorElementType() == MVT::f16 &&
!Subtarget.hasVInstructionsF16()) {
+ setOperationAction(ISD::BITCAST, VT, Custom);
setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
setOperationAction(
{ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
@@ -1264,8 +1265,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
VT, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
if (Subtarget.hasStdExtZfhmin()) {
- // FIXME: We should prefer BUILD_VECTOR over SPLAT_VECTOR.
- setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
} else {
// We need to custom legalize f16 build vectors if Zfhmin isn't
// available.
@@ -1283,10 +1283,10 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
}
if (VT.getVectorElementType() == MVT::bf16) {
+ setOperationAction(ISD::BITCAST, VT, Custom);
setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
if (Subtarget.hasStdExtZfbfmin()) {
- // FIXME: We should prefer BUILD_VECTOR over SPLAT_VECTOR.
- setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
} else {
// We need to custom legalize bf16 build vectors if Zfbfmin isn't
// available.
@@ -3924,26 +3924,46 @@ static SDValue lowerBuildVectorViaPacking(SDValue Op, SelectionDAG &DAG,
DAG.getBuildVector(WideVecVT, DL, NewOperands));
}
-// Convert to an vXf16 build_vector to vXi16 with bitcasts.
-static SDValue lowerBUILD_VECTORvXf16(SDValue Op, SelectionDAG &DAG) {
- MVT VT = Op.getSimpleValueType();
- MVT IVT = VT.changeVectorElementType(MVT::i16);
- SmallVector<SDValue, 16> NewOps(Op.getNumOperands());
- for (unsigned I = 0, E = Op.getNumOperands(); I != E; ++I)
- NewOps[I] = DAG.getBitcast(MVT::i16, Op.getOperand(I));
- SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), IVT, NewOps);
- return DAG.getBitcast(VT, Res);
-}
-
static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
MVT VT = Op.getSimpleValueType();
assert(VT.isFixedLengthVector() && "Unexpected vector!");
- // If we don't have scalar f16/bf16, we need to bitcast to an i16 vector.
- if ((VT.getVectorElementType() == MVT::f16 && !Subtarget.hasStdExtZfhmin()) ||
- (VT.getVectorElementType() == MVT::bf16 && !Subtarget.hasStdExtZfbfmin()))
- return lowerBUILD_VECTORvXf16(Op, DAG);
+ MVT EltVT = VT.getVectorElementType();
+ MVT XLenVT = Subtarget.getXLenVT();
+
+ SDLoc DL(Op);
+
+ // Proper support for f16 requires Zvfh. bf16 always requires special
+ // handling. We need to cast the scalar to integer and create an integer
+ // build_vector.
+ if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) || EltVT == MVT::bf16) {
+ MVT IVT = VT.changeVectorElementType(MVT::i16);
+ SmallVector<SDValue, 16> NewOps(Op.getNumOperands());
+ for (unsigned I = 0, E = Op.getNumOperands(); I != E; ++I) {
+ SDValue Elem = Op.getOperand(I);
+ if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||
+ (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin())) {
+ // Called by LegalizeDAG, we need to use XLenVT operations since we
+ // can't create illegal types.
+ if (auto *C = dyn_cast<ConstantFPSDNode>(Elem)) {
+ // Manually constant fold.
+ // FIXME: Add a constant fold combine for FMV_X_ANYEXTH.
+ // FIXME: We need a load+FMV_X_ANYEXTH combine too.
+ APInt V =
+ C->getValueAPF().bitcastToAPInt().sext(XLenVT.getSizeInBits());
+ NewOps[I] = DAG.getConstant(V, DL, XLenVT);
+ } else {
+ NewOps[I] = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Elem);
+ }
+ } else {
+ // Called by scalar type legalizer, we can use i16.
+ NewOps[I] = DAG.getBitcast(MVT::i16, Op.getOperand(I));
+ }
+ }
+ SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, DL, IVT, NewOps);
+ return DAG.getBitcast(VT, Res);
+ }
if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
@@ -3951,11 +3971,8 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
- SDLoc DL(Op);
auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
- MVT XLenVT = Subtarget.getXLenVT();
-
if (VT.getVectorElementType() == MVT::i1) {
// A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask
// vector type, we have a legal equivalently-sized i8 type, so we can use
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec-bf16.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec-bf16.ll
new file mode 100644
index 00000000000000..170e71af09b49d
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec-bf16.ll
@@ -0,0 +1,175 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zvfbfmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32ZVFBFMIN,RV32-NO-ZFBFMIN
+; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zvfbfmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64ZVFBFMIN,RV64-NO-ZFBFMIN
+; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfbfmin,+zvfbfmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32ZVFBFMIN,RV32-ZFBFMIN
+; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfbfmin,+zvfbfmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64ZVFBFMIN,RV64-ZFBFMIN
+
+define <4 x bfloat> @splat_idx_v4bf16(<4 x bfloat> %v, i64 %idx) {
+; RV32-NO-ZFBFMIN-LABEL: splat_idx_v4bf16:
+; RV32-NO-ZFBFMIN: # %bb.0:
+; RV32-NO-ZFBFMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV32-NO-ZFBFMIN-NEXT: vrgather.vx v9, v8, a0
+; RV32-NO-ZFBFMIN-NEXT: vmv1r.v v8, v9
+; RV32-NO-ZFBFMIN-NEXT: ret
+;
+; RV64-NO-ZFBFMIN-LABEL: splat_idx_v4bf16:
+; RV64-NO-ZFBFMIN: # %bb.0:
+; RV64-NO-ZFBFMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV64-NO-ZFBFMIN-NEXT: vrgather.vx v9, v8, a0
+; RV64-NO-ZFBFMIN-NEXT: vmv1r.v v8, v9
+; RV64-NO-ZFBFMIN-NEXT: ret
+;
+; RV32-ZFBFMIN-LABEL: splat_idx_v4bf16:
+; RV32-ZFBFMIN: # %bb.0:
+; RV32-ZFBFMIN-NEXT: addi sp, sp, -48
+; RV32-ZFBFMIN-NEXT: .cfi_def_cfa_offset 48
+; RV32-ZFBFMIN-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32-ZFBFMIN-NEXT: .cfi_offset ra, -4
+; RV32-ZFBFMIN-NEXT: csrr a1, vlenb
+; RV32-ZFBFMIN-NEXT: slli a1, a1, 1
+; RV32-ZFBFMIN-NEXT: sub sp, sp, a1
+; RV32-ZFBFMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 2 * vlenb
+; RV32-ZFBFMIN-NEXT: addi a1, sp, 32
+; RV32-ZFBFMIN-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill
+; RV32-ZFBFMIN-NEXT: andi a0, a0, 3
+; RV32-ZFBFMIN-NEXT: li a1, 2
+; RV32-ZFBFMIN-NEXT: call __mulsi3
+; RV32-ZFBFMIN-NEXT: addi a1, sp, 16
+; RV32-ZFBFMIN-NEXT: add a0, a1, a0
+; RV32-ZFBFMIN-NEXT: addi a2, sp, 32
+; RV32-ZFBFMIN-NEXT: vl1r.v v8, (a2) # Unknown-size Folded Reload
+; RV32-ZFBFMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV32-ZFBFMIN-NEXT: vse16.v v8, (a1)
+; RV32-ZFBFMIN-NEXT: flh fa5, 0(a0)
+; RV32-ZFBFMIN-NEXT: fmv.x.h a0, fa5
+; RV32-ZFBFMIN-NEXT: vmv.v.x v8, a0
+; RV32-ZFBFMIN-NEXT: csrr a0, vlenb
+; RV32-ZFBFMIN-NEXT: slli a0, a0, 1
+; RV32-ZFBFMIN-NEXT: add sp, sp, a0
+; RV32-ZFBFMIN-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32-ZFBFMIN-NEXT: addi sp, sp, 48
+; RV32-ZFBFMIN-NEXT: ret
+;
+; RV64-ZFBFMIN-LABEL: splat_idx_v4bf16:
+; RV64-ZFBFMIN: # %bb.0:
+; RV64-ZFBFMIN-NEXT: addi sp, sp, -48
+; RV64-ZFBFMIN-NEXT: .cfi_def_cfa_offset 48
+; RV64-ZFBFMIN-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64-ZFBFMIN-NEXT: .cfi_offset ra, -8
+; RV64-ZFBFMIN-NEXT: csrr a1, vlenb
+; RV64-ZFBFMIN-NEXT: slli a1, a1, 1
+; RV64-ZFBFMIN-NEXT: sub sp, sp, a1
+; RV64-ZFBFMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 2 * vlenb
+; RV64-ZFBFMIN-NEXT: addi a1, sp, 32
+; RV64-ZFBFMIN-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill
+; RV64-ZFBFMIN-NEXT: andi a0, a0, 3
+; RV64-ZFBFMIN-NEXT: li a1, 2
+; RV64-ZFBFMIN-NEXT: call __muldi3
+; RV64-ZFBFMIN-NEXT: addi a1, sp, 16
+; RV64-ZFBFMIN-NEXT: add a0, a1, a0
+; RV64-ZFBFMIN-NEXT: addi a2, sp, 32
+; RV64-ZFBFMIN-NEXT: vl1r.v v8, (a2) # Unknown-size Folded Reload
+; RV64-ZFBFMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV64-ZFBFMIN-NEXT: vse16.v v8, (a1)
+; RV64-ZFBFMIN-NEXT: flh fa5, 0(a0)
+; RV64-ZFBFMIN-NEXT: fmv.x.h a0, fa5
+; RV64-ZFBFMIN-NEXT: vmv.v.x v8, a0
+; RV64-ZFBFMIN-NEXT: csrr a0, vlenb
+; RV64-ZFBFMIN-NEXT: slli a0, a0, 1
+; RV64-ZFBFMIN-NEXT: add sp, sp, a0
+; RV64-ZFBFMIN-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64-ZFBFMIN-NEXT: addi sp, sp, 48
+; RV64-ZFBFMIN-NEXT: ret
+ %x = extractelement <4 x bfloat> %v, i64 %idx
+ %ins = insertelement <4 x bfloat> poison, bfloat %x, i32 0
+ %splat = shufflevector <4 x bfloat> %ins, <4 x bfloat> poison, <4 x i32> zeroinitializer
+ ret <4 x bfloat> %splat
+}
+
+define <2 x bfloat> @buildvec_v2bf16(bfloat %a, bfloat %b) {
+; RV32-NO-ZFBFMIN-LABEL: buildvec_v2bf16:
+; RV32-NO-ZFBFMIN: # %bb.0:
+; RV32-NO-ZFBFMIN-NEXT: fmv.x.w a0, fa1
+; RV32-NO-ZFBFMIN-NEXT: fmv.x.w a1, fa0
+; RV32-NO-ZFBFMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; RV32-NO-ZFBFMIN-NEXT: vmv.v.x v8, a1
+; RV32-NO-ZFBFMIN-NEXT: vslide1down.vx v8, v8, a0
+; RV32-NO-ZFBFMIN-NEXT: ret
+;
+; RV64-NO-ZFBFMIN-LABEL: buildvec_v2bf16:
+; RV64-NO-ZFBFMIN: # %bb.0:
+; RV64-NO-ZFBFMIN-NEXT: fmv.x.w a0, fa1
+; RV64-NO-ZFBFMIN-NEXT: fmv.x.w a1, fa0
+; RV64-NO-ZFBFMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; RV64-NO-ZFBFMIN-NEXT: vmv.v.x v8, a1
+; RV64-NO-ZFBFMIN-NEXT: vslide1down.vx v8, v8, a0
+; RV64-NO-ZFBFMIN-NEXT: ret
+;
+; RV32-ZFBFMIN-LABEL: buildvec_v2bf16:
+; RV32-ZFBFMIN: # %bb.0:
+; RV32-ZFBFMIN-NEXT: fmv.x.h a0, fa1
+; RV32-ZFBFMIN-NEXT: fmv.x.h a1, fa0
+; RV32-ZFBFMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; RV32-ZFBFMIN-NEXT: vmv.v.x v8, a1
+; RV32-ZFBFMIN-NEXT: vslide1down.vx v8, v8, a0
+; RV32-ZFBFMIN-NEXT: ret
+;
+; RV64-ZFBFMIN-LABEL: buildvec_v2bf16:
+; RV64-ZFBFMIN: # %bb.0:
+; RV64-ZFBFMIN-NEXT: fmv.x.h a0, fa1
+; RV64-ZFBFMIN-NEXT: fmv.x.h a1, fa0
+; RV64-ZFBFMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; RV64-ZFBFMIN-NEXT: vmv.v.x v8, a1
+; RV64-ZFBFMIN-NEXT: vslide1down.vx v8, v8, a0
+; RV64-ZFBFMIN-NEXT: ret
+ %v1 = insertelement <2 x bfloat> poison, bfloat %a, i64 0
+ %v2 = insertelement <2 x bfloat> %v1, bfloat %b, i64 1
+ ret <2 x bfloat> %v2
+}
+
+define <2 x bfloat> @vid_v2bf16() {
+; CHECK-LABEL: vid_v2bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, 260096
+; CHECK-NEXT: vsetivli zero, 2, e32, m1, ta, ma
+; CHECK-NEXT: vmv.s.x v8, a0
+; CHECK-NEXT: ret
+ ret <2 x bfloat> <bfloat 0.0, bfloat 1.0>
+}
+
+define <2 x bfloat> @vid_addend1_v2bf16() {
+; CHECK-LABEL: vid_addend1_v2bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, 262148
+; CHECK-NEXT: addi a0, a0, -128
+; CHECK-NEXT: vsetivli zero, 2, e32, m1, ta, ma
+; CHECK-NEXT: vmv.s.x v8, a0
+; CHECK-NEXT: ret
+ ret <2 x bfloat> <bfloat 1.0, bfloat 2.0>
+}
+
+define <2 x bfloat> @vid_denominator2_v2bf16() {
+; CHECK-LABEL: vid_denominator2_v2bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, 260100
+; CHECK-NEXT: addi a0, a0, -256
+; CHECK-NEXT: vsetivli zero, 2, e32, m1, ta, ma
+; CHECK-NEXT: vmv.s.x v8, a0
+; CHECK-NEXT: ret
+ ret <2 x bfloat> <bfloat 0.5, bfloat 1.0>
+}
+
+define <2 x bfloat> @vid_step2_v2bf16() {
+; CHECK-LABEL: vid_step2_v2bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; CHECK-NEXT: vid.v v8
+; CHECK-NEXT: vsll.vi v8, v8, 14
+; CHECK-NEXT: ret
+ ret <2 x bfloat> <bfloat 0.0, bfloat 2.0>
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; RV32: {{.*}}
+; RV32ZVFBFMIN: {{.*}}
+; RV64: {{.*}}
+; RV64ZVFBFMIN: {{.*}}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
index 26ed4595ca7583..e3aabb5de29c28 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
@@ -4,8 +4,10 @@
; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+zba,+zbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32ZVFH
; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64ZVFH,RV64V
; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+rva22u64 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64ZVFH,RVA22U64
-; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zvfhmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32ZVFHMIN
-; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zvfhmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64ZVFHMIN
+; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zvfhmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32ZVFHMIN,RV32-NO-ZFHMIN
+; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zvfhmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64ZVFHMIN,RV64-NO-ZFHMIN
+; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfhmin,+zvfhmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32ZVFHMIN,RV32-ZFHMIN
+; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfhmin,+zvfhmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64ZVFHMIN,RV64-ZFHMIN
; Tests that a floating-point build_vector doesn't try and generate a VID
; instruction
@@ -169,12 +171,95 @@ define <4 x half> @splat_c3_v4f16(<4 x half> %v) {
}
define <4 x half> @splat_idx_v4f16(<4 x half> %v, i64 %idx) {
-; CHECK-LABEL: splat_idx_v4f16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-NEXT: vrgather.vx v9, v8, a0
-; CHECK-NEXT: vmv1r.v v8, v9
-; CHECK-NEXT: ret
+; RV32ZVFH-LABEL: splat_idx_v4f16:
+; RV32ZVFH: # %bb.0:
+; RV32ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV32ZVFH-NEXT: vrgather.vx v9, v8, a0
+; RV32ZVFH-NEXT: vmv1r.v v8, v9
+; RV32ZVFH-NEXT: ret
+;
+; RV64ZVFH-LABEL: splat_idx_v4f16:
+; RV64ZVFH: # %bb.0:
+; RV64ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV64ZVFH-NEXT: vrgather.vx v9, v8, a0
+; RV64ZVFH-NEXT: vmv1r.v v8, v9
+; RV64ZVFH-NEXT: ret
+;
+; RV32-NO-ZFHMIN-LABEL: splat_idx_v4f16:
+; RV32-NO-ZFHMIN: # %bb.0:
+; RV32-NO-ZFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV32-NO-ZFHMIN-NEXT: vrgather.vx v9, v8, a0
+; RV32-NO-ZFHMIN-NEXT: vmv1r.v v8, v9
+; RV32-NO-ZFHMIN-NEXT: ret
+;
+; RV64-NO-ZFHMIN-LABEL: splat_idx_v4f16:
+; RV64-NO-ZFHMIN: # %bb.0:
+; RV64-NO-ZFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV64-NO-ZFHMIN-NEXT: vrgather.vx v9, v8, a0
+; RV64-NO-ZFHMIN-NEXT: vmv1r.v v8, v9
+; RV64-NO-ZFHMIN-NEXT: ret
+;
+; RV32-ZFHMIN-LABEL: splat_idx_v4f16:
+; RV32-ZFHMIN: # %bb.0:
+; RV32-ZFHMIN-NEXT: addi sp, sp, -48
+; RV32-ZFHMIN-NEXT: .cfi_def_cfa_offset 48
+; RV32-ZFHMIN-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32-ZFHMIN-NEXT: .cfi_offset ra, -4
+; RV32-ZFHMIN-NEXT: csrr a1, vlenb
+; RV32-ZFHMIN-NEXT: slli a1, a1, 1
+; RV32-ZFHMIN-NEXT: sub sp, sp, a1
+; RV32-ZFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 2 * vlenb
+; RV32-ZFHMIN-NEXT: addi a1, sp, 32
+; RV32-ZFHMIN-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill
+; RV32-ZFHMIN-NEXT: andi a0, a0, 3
+; RV32-ZFHMIN-NEXT: li a1, 2
+; RV32-ZFHMIN-NEXT: call __mulsi3
+; RV32-ZFHMIN-NEXT: addi a1, sp, 16
+; RV32-ZFHMIN-NEXT: add a0, a1, a0
+; RV32-ZFHMIN-NEXT: addi a2, sp, 32
+; RV32-ZFHMIN-NEXT: vl1r.v v8, (a2) # Unknown-size Folded Reload
+; RV32-ZFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV32-ZFHMIN-NEXT: vse16.v v8, (a1)
+; RV32-ZFHMIN-NEXT: flh fa5, 0(a0)
+; RV32-ZFHMIN-NEXT: fmv.x.h a0, fa5
+; RV32-ZFHMIN-NEXT: vmv.v.x v8, a0
+; RV32-ZFHMIN-NEXT: csrr a0, vlenb
+; RV32-ZFHMIN-NEXT: slli a0, a0, 1
+; RV32-ZFHMIN-NEXT: add sp, sp, a0
+; RV32-ZFHMIN-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32-ZFHMIN-NEXT: addi sp, sp, 48
+; RV32-ZFHMIN-NEXT: ret
+;
+; RV64-ZFHMIN-LABEL: splat_idx_v4f16:
+; RV64-ZFHMIN: # %bb.0:
+; RV64-ZFHMIN-NEXT: addi sp, sp, -48
+; RV64-ZFHMIN-NEXT: .cfi_def_cfa_offset 48
+; RV64-ZFHMIN-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64-ZFHMIN-NEXT: .cfi_offset ra, -8
+; RV64-ZFHMIN-NEXT: csrr a1, vlenb
+; RV64-ZFHMIN-NEXT: slli a1, a1, 1
+; RV64-ZFHMIN-NEXT: sub sp, sp, a1
+; RV64-ZFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 2 * vlenb
+; RV64-ZFHMIN-NEXT: addi a1, sp, 32
+; RV64-ZFHMIN-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill
+; RV64-ZFHMIN-NEXT: andi a0, a0, 3
+; RV64-ZFHMIN-NEXT: li a1, 2
+; RV64-ZFHMIN-NEXT: call __muldi3
+; RV64-ZFHMIN-NEXT: addi a1, sp, 16
+; RV64-ZFHMIN-NEXT: add a0, a1, a0
+; RV64-ZFHMIN-NEXT: addi a2, sp, 32
+; RV64-ZFHMIN-NEXT: vl1r.v v8, (a2) # Unknown-size Folded Reload
+; RV64-ZFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV64-ZFHMIN-NEXT: vse16.v v8, (a1)
+; RV64-ZFHMIN-NEXT: flh fa5, 0(a0)
+; RV64-ZFHMIN-NEXT: fmv.x.h a0, fa5
+; RV64-ZFHMIN-NEXT: vmv.v.x v8, a0
+; RV64-ZFHMIN-NEXT: csrr a0, vlenb
+; RV64-ZFHMIN-NEXT: slli a0, a0, 1
+; RV64-ZFHMIN-NEXT: add sp, sp, a0
+; RV64-ZFHMIN-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64-ZFHMIN-NEXT: addi sp, sp, 48
+; RV64-ZFHMIN-NEXT: ret
%x = extractelement <4 x half> %v, i64 %idx
%ins = insertelement <4 x half> poison, half %x, i32 0
%splat = shufflevector <4 x half> %ins, <4 x half> poison, <4 x i32> zeroinitializer
@@ -295,23 +380,41 @@ define <2 x half> @buildvec_v2f16(half %a, half %b) {
; RV64ZVFH-NEXT: vfslide1down.vf v8, v8, fa1
; RV64ZVFH-NEXT: ret
;
-; RV32ZVFHMIN-LABEL: buildvec_v2f16:
-; RV32ZVFHMIN: # %bb.0:
-; RV32ZVFHMIN-NEXT: fmv.x.w a0, fa1
-; RV32ZVFHMIN-NEXT: fmv.x.w a1, fa0
-; RV32ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; RV32ZVFHMIN-NEXT: vmv.v.x v8, a1
-; RV32ZVFHMIN-NEXT: vslide1down.vx v8, v8, a0
-; RV32ZVFHMIN-NEXT: ret
+; RV32-NO-ZFHMIN-LABEL: buildvec_v2f16:
+; RV32-NO-ZFHMIN: # %bb.0:
+; RV32-NO-ZFHMIN-NEXT: fmv.x.w a0, fa1
+; RV32-NO-ZFHMIN-NEXT: fmv.x.w a1, fa0
+; RV32-NO-ZFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; RV32-NO-ZFHMIN-NEXT: vmv.v.x v8, a1
+; RV32-NO-ZFHMIN-NEXT: vslide1down.vx v8, v8, a0
+; RV32-NO-ZFHMIN-NEXT: ret
;
-; RV64ZVFHMIN-LABEL: buildvec_v2f16:
-; RV64ZVFHMIN: # %bb.0:
-; RV64ZVFHMIN-NEXT: fmv.x.w a0, fa1
-; RV64ZVFHMIN-NEXT: fmv.x.w a1, fa0
-; RV64ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; RV64ZVFHMIN-NEXT: vmv.v.x v8, a1
-; RV64ZVFHMIN-NEXT: vslide1down.vx v8, v8, a0
-; RV64ZVFHMIN-NEXT: ret
+; RV64-NO-ZFHMIN-LABEL: buildvec_v2f16:
+; RV64-NO-ZFHMIN: # %bb.0:
+; RV64-NO-ZFHMIN-NEXT: fmv.x.w a0, fa1
+; RV64-NO-ZFHMIN-NEXT: fmv.x.w a1, fa0
+; RV64-NO-ZFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; RV64-NO-ZFHMIN-NEXT: vmv.v.x v8, a1
+; RV64-NO-ZFHMIN-NEXT: vslide1down.vx v8, v8, a0
+; RV64-NO-ZFHMIN-NEXT: ret
+;
+; RV32-ZFHMIN-LABEL: buildvec_v2f16:
+; RV32-ZFHMIN: # %bb.0:
+; RV32-ZFHMIN-NEXT: fmv.x.h a0, fa1
+; RV32-ZFHMIN-NEXT: fmv.x.h a1, fa0
+; RV32-ZFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; RV32-ZFHMIN-NEXT: vmv.v.x v8, a1
+; RV32-ZFHMIN-NEXT: vslide1down.vx v8, v8, a0
+; RV32-ZFHMIN-NEXT: ret
+;
+; RV64-ZFHMIN-LABEL: buildvec_v2f16:
+; RV64-ZFHMIN: # %bb.0:
+; RV64-ZFHMIN-NEXT: fmv.x.h a0, fa1
+; RV64-ZFHMIN-NEXT: fmv.x.h a1, fa0
+; RV64-ZFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; RV64-ZFHMIN-NEXT: vmv.v.x v8, a1
+; RV64-ZFHMIN-NEXT: vslide1down.vx v8, v8, a0
+; RV64-ZFHMIN-NEXT: ret
%v1 = insertelement <2 x half> poison, half %a, i64 0
%v2 = insertelement <2 x half> %v1, half %b, i64 1
ret <2 x half> %v2
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-setcc.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-setcc.ll
index a566fab1596f60..31e2d75e514b41 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-setcc.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-setcc.ll
@@ -648,12 +648,8 @@ define void @fcmp_oeq_vf_v8f16(ptr %x, half %y, ptr %z) {
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZVFHMIN-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10
-; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT: fmv.x.h a0, fa0
+; ZVFHMIN-NEXT: vmv.v.x v9, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
@@ -681,12 +677,8 @@ define void @fcmp_oeq_vf_v8f16_nonans(ptr %x, half %y, ptr %z) {
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZVFHMIN-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10
-; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT: fmv.x.h a0, fa0
+; ZVFHMIN-NEXT: vmv.v.x v9, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
@@ -814,12 +806,8 @@ define void @fcmp_olt_vf_v16f16(ptr %x, half %y, ptr %z) {
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; ZVFHMIN-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v12, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12
-; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVFHMIN-NEXT: fmv.x.h a0, fa0
+; ZVFHMIN-NEXT: vmv.v.x v10, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
@@ -847,12 +835,8 @@ define void @fcmp_olt_vf_v16f16_nonans(ptr %x, half %y, ptr %z) {
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; ZVFHMIN-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v12, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12
-; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVFHMIN-NEXT: fmv.x.h a0, fa0
+; ZVFHMIN-NEXT: vmv.v.x v10, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
@@ -965,12 +949,8 @@ define void @fcmp_ule_vf_v32f16(ptr %x, half %y, ptr %z) {
; ZVFHMIN-NEXT: li a2, 32
; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma
; ZVFHMIN-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v16, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
-; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma
+; ZVFHMIN-NEXT: fmv.x.h a0, fa0
+; ZVFHMIN-NEXT: vmv.v.x v12, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
@@ -1001,12 +981,8 @@ define void @fcmp_ule_vf_v32f16_nonans(ptr %x, half %y, ptr %z) {
; ZVFHMIN-NEXT: li a2, 32
; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma
; ZVFHMIN-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v16, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
-; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma
+; ZVFHMIN-NEXT: fmv.x.h a0, fa0
+; ZVFHMIN-NEXT: vmv.v.x v12, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
@@ -1217,19 +1193,15 @@ define void @fcmp_ord_vf_v4f16(ptr %x, half %y, ptr %z) {
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v9, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
-; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFHMIN-NEXT: fmv.x.h a0, fa0
+; ZVFHMIN-NEXT: vmv.v.x v9, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vmfeq.vv v8, v9, v9
+; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vmfeq.vv v9, v9, v9
+; ZVFHMIN-NEXT: vmfeq.vv v9, v10, v10
; ZVFHMIN-NEXT: vmand.mm v0, v8, v9
; ZVFHMIN-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
; ZVFHMIN-NEXT: vmv.v.i v8, 0
@@ -1275,19 +1247,15 @@ define void @fcmp_uno_vf_v4f16(ptr %x, half %y, ptr %z) {
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v9, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
-; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFHMIN-NEXT: fmv.x.h a0, fa0
+; ZVFHMIN-NEXT: vmv.v.x v9, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vmfne.vv v8, v9, v9
+; ZVFHMIN-NEXT: vmfne.vv v8, v10, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vmfne.vv v9, v9, v9
+; ZVFHMIN-NEXT: vmfne.vv v9, v10, v10
; ZVFHMIN-NEXT: vmor.mm v0, v8, v9
; ZVFHMIN-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
; ZVFHMIN-NEXT: vmv.v.i v8, 0
@@ -1321,12 +1289,8 @@ define void @fcmp_oeq_fv_v8f16(ptr %x, half %y, ptr %z) {
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZVFHMIN-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10
-; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT: fmv.x.h a0, fa0
+; ZVFHMIN-NEXT: vmv.v.x v9, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
@@ -1354,12 +1318,8 @@ define void @fcmp_oeq_fv_v8f16_nonans(ptr %x, half %y, ptr %z) {
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZVFHMIN-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10
-; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT: fmv.x.h a0, fa0
+; ZVFHMIN-NEXT: vmv.v.x v9, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
@@ -1487,12 +1447,8 @@ define void @fcmp_olt_fv_v16f16(ptr %x, half %y, ptr %z) {
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; ZVFHMIN-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v12, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12
-; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVFHMIN-NEXT: fmv.x.h a0, fa0
+; ZVFHMIN-NEXT: vmv.v.x v10, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
@@ -1520,12 +1476,8 @@ define void @fcmp_olt_fv_v16f16_nonans(ptr %x, half %y, ptr %z) {
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; ZVFHMIN-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v12, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12
-; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVFHMIN-NEXT: fmv.x.h a0, fa0
+; ZVFHMIN-NEXT: vmv.v.x v10, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
@@ -1638,12 +1590,8 @@ define void @fcmp_ule_fv_v32f16(ptr %x, half %y, ptr %z) {
; ZVFHMIN-NEXT: li a2, 32
; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma
; ZVFHMIN-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v16, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
-; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma
+; ZVFHMIN-NEXT: fmv.x.h a0, fa0
+; ZVFHMIN-NEXT: vmv.v.x v12, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
@@ -1674,12 +1622,8 @@ define void @fcmp_ule_fv_v32f16_nonans(ptr %x, half %y, ptr %z) {
; ZVFHMIN-NEXT: li a2, 32
; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma
; ZVFHMIN-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v16, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
-; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma
+; ZVFHMIN-NEXT: fmv.x.h a0, fa0
+; ZVFHMIN-NEXT: vmv.v.x v12, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
@@ -1890,19 +1834,15 @@ define void @fcmp_ord_fv_v4f16(ptr %x, half %y, ptr %z) {
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v9, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
-; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFHMIN-NEXT: fmv.x.h a0, fa0
+; ZVFHMIN-NEXT: vmv.v.x v9, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vmfeq.vv v8, v9, v9
+; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vmfeq.vv v9, v9, v9
+; ZVFHMIN-NEXT: vmfeq.vv v9, v10, v10
; ZVFHMIN-NEXT: vmand.mm v0, v9, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
; ZVFHMIN-NEXT: vmv.v.i v8, 0
@@ -1948,19 +1888,15 @@ define void @fcmp_uno_fv_v4f16(ptr %x, half %y, ptr %z) {
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v9, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
-; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFHMIN-NEXT: fmv.x.h a0, fa0
+; ZVFHMIN-NEXT: vmv.v.x v9, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vmfne.vv v8, v9, v9
+; ZVFHMIN-NEXT: vmfne.vv v8, v10, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vmfne.vv v9, v9, v9
+; ZVFHMIN-NEXT: vmfne.vv v9, v10, v10
; ZVFHMIN-NEXT: vmor.mm v0, v9, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
; ZVFHMIN-NEXT: vmv.v.i v8, 0
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-splat-bf16.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-splat-bf16.ll
index b1250f4804549a..c94cdadc8ca597 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-splat-bf16.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-splat-bf16.ll
@@ -7,11 +7,9 @@
define <8 x bfloat> @splat_v8bf16(ptr %x, bfloat %y) {
; ZFBFMIN-ZVFBFMIN-LABEL: splat_v8bf16:
; ZFBFMIN-ZVFBFMIN: # %bb.0:
-; ZFBFMIN-ZVFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0
-; ZFBFMIN-ZVFBFMIN-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; ZFBFMIN-ZVFBFMIN-NEXT: vfmv.v.f v10, fa5
-; ZFBFMIN-ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZFBFMIN-ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v10
+; ZFBFMIN-ZVFBFMIN-NEXT: fmv.x.h a0, fa0
+; ZFBFMIN-ZVFBFMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZFBFMIN-ZVFBFMIN-NEXT: vmv.v.x v8, a0
; ZFBFMIN-ZVFBFMIN-NEXT: ret
;
; ZVFBFMIN-LABEL: splat_v8bf16:
@@ -28,11 +26,9 @@ define <8 x bfloat> @splat_v8bf16(ptr %x, bfloat %y) {
define <16 x bfloat> @splat_16bf16(ptr %x, bfloat %y) {
; ZFBFMIN-ZVFBFMIN-LABEL: splat_16bf16:
; ZFBFMIN-ZVFBFMIN: # %bb.0:
-; ZFBFMIN-ZVFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0
-; ZFBFMIN-ZVFBFMIN-NEXT: vsetvli a0, zero, e32, m4, ta, ma
-; ZFBFMIN-ZVFBFMIN-NEXT: vfmv.v.f v12, fa5
-; ZFBFMIN-ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZFBFMIN-ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v12
+; ZFBFMIN-ZVFBFMIN-NEXT: fmv.x.h a0, fa0
+; ZFBFMIN-ZVFBFMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZFBFMIN-ZVFBFMIN-NEXT: vmv.v.x v8, a0
; ZFBFMIN-ZVFBFMIN-NEXT: ret
;
; ZVFBFMIN-LABEL: splat_16bf16:
@@ -46,10 +42,31 @@ define <16 x bfloat> @splat_16bf16(ptr %x, bfloat %y) {
ret <16 x bfloat> %b
}
+define <64 x bfloat> @splat_64bf16(ptr %x, bfloat %y) {
+; ZFBFMIN-ZVFBFMIN-LABEL: splat_64bf16:
+; ZFBFMIN-ZVFBFMIN: # %bb.0:
+; ZFBFMIN-ZVFBFMIN-NEXT: fmv.x.h a0, fa0
+; ZFBFMIN-ZVFBFMIN-NEXT: li a1, 64
+; ZFBFMIN-ZVFBFMIN-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; ZFBFMIN-ZVFBFMIN-NEXT: vmv.v.x v8, a0
+; ZFBFMIN-ZVFBFMIN-NEXT: ret
+;
+; ZVFBFMIN-LABEL: splat_64bf16:
+; ZVFBFMIN: # %bb.0:
+; ZVFBFMIN-NEXT: fmv.x.w a0, fa0
+; ZVFBFMIN-NEXT: li a1, 64
+; ZVFBFMIN-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; ZVFBFMIN-NEXT: vmv.v.x v8, a0
+; ZVFBFMIN-NEXT: ret
+ %a = insertelement <64 x bfloat> poison, bfloat %y, i32 0
+ %b = shufflevector <64 x bfloat> %a, <64 x bfloat> poison, <64 x i32> zeroinitializer
+ ret <64 x bfloat> %b
+}
+
define <8 x bfloat> @splat_zero_v8bf16(ptr %x) {
; ZFBFMIN-ZVFBFMIN-LABEL: splat_zero_v8bf16:
; ZFBFMIN-ZVFBFMIN: # %bb.0:
-; ZFBFMIN-ZVFBFMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZFBFMIN-ZVFBFMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZFBFMIN-ZVFBFMIN-NEXT: vmv.v.i v8, 0
; ZFBFMIN-ZVFBFMIN-NEXT: ret
;
@@ -64,7 +81,7 @@ define <8 x bfloat> @splat_zero_v8bf16(ptr %x) {
define <16 x bfloat> @splat_zero_16bf16(ptr %x) {
; ZFBFMIN-ZVFBFMIN-LABEL: splat_zero_16bf16:
; ZFBFMIN-ZVFBFMIN: # %bb.0:
-; ZFBFMIN-ZVFBFMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; ZFBFMIN-ZVFBFMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; ZFBFMIN-ZVFBFMIN-NEXT: vmv.v.i v8, 0
; ZFBFMIN-ZVFBFMIN-NEXT: ret
;
@@ -80,7 +97,7 @@ define <8 x bfloat> @splat_negzero_v8bf16(ptr %x) {
; ZFBFMIN-ZVFBFMIN-LABEL: splat_negzero_v8bf16:
; ZFBFMIN-ZVFBFMIN: # %bb.0:
; ZFBFMIN-ZVFBFMIN-NEXT: lui a0, 1048568
-; ZFBFMIN-ZVFBFMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; ZFBFMIN-ZVFBFMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZFBFMIN-ZVFBFMIN-NEXT: vmv.v.x v8, a0
; ZFBFMIN-ZVFBFMIN-NEXT: ret
;
@@ -97,7 +114,7 @@ define <16 x bfloat> @splat_negzero_16bf16(ptr %x) {
; ZFBFMIN-ZVFBFMIN-LABEL: splat_negzero_16bf16:
; ZFBFMIN-ZVFBFMIN: # %bb.0:
; ZFBFMIN-ZVFBFMIN-NEXT: lui a0, 1048568
-; ZFBFMIN-ZVFBFMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; ZFBFMIN-ZVFBFMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; ZFBFMIN-ZVFBFMIN-NEXT: vmv.v.x v8, a0
; ZFBFMIN-ZVFBFMIN-NEXT: ret
;
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-splat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-splat.ll
index 10103813d526c5..250b3e90cbbb65 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-splat.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-splat.ll
@@ -1,7 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV32
; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,RV64-ZVFH
-; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zvfhmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,RV64-ZVFHMIN
+; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zvfhmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,RV64-ZVFHMIN,RV64-ZVFHMIN-NOZFHMIN
+; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfhmin,+zvfhmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,RV64-ZVFHMIN,RV64_ZVFHMIN-ZFHMIN
define void @splat_v8f16(ptr %x, half %y) {
; CHECK-RV32-LABEL: splat_v8f16:
@@ -18,13 +19,21 @@ define void @splat_v8f16(ptr %x, half %y) {
; RV64-ZVFH-NEXT: vse16.v v8, (a0)
; RV64-ZVFH-NEXT: ret
;
-; RV64-ZVFHMIN-LABEL: splat_v8f16:
-; RV64-ZVFHMIN: # %bb.0:
-; RV64-ZVFHMIN-NEXT: fmv.x.w a1, fa0
-; RV64-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64-ZVFHMIN-NEXT: vmv.v.x v8, a1
-; RV64-ZVFHMIN-NEXT: vse16.v v8, (a0)
-; RV64-ZVFHMIN-NEXT: ret
+; RV64-ZVFHMIN-NOZFHMIN-LABEL: splat_v8f16:
+; RV64-ZVFHMIN-NOZFHMIN: # %bb.0:
+; RV64-ZVFHMIN-NOZFHMIN-NEXT: fmv.x.w a1, fa0
+; RV64-ZVFHMIN-NOZFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV64-ZVFHMIN-NOZFHMIN-NEXT: vmv.v.x v8, a1
+; RV64-ZVFHMIN-NOZFHMIN-NEXT: vse16.v v8, (a0)
+; RV64-ZVFHMIN-NOZFHMIN-NEXT: ret
+;
+; RV64_ZVFHMIN-ZFHMIN-LABEL: splat_v8f16:
+; RV64_ZVFHMIN-ZFHMIN: # %bb.0:
+; RV64_ZVFHMIN-ZFHMIN-NEXT: fmv.x.h a1, fa0
+; RV64_ZVFHMIN-ZFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV64_ZVFHMIN-ZFHMIN-NEXT: vmv.v.x v8, a1
+; RV64_ZVFHMIN-ZFHMIN-NEXT: vse16.v v8, (a0)
+; RV64_ZVFHMIN-ZFHMIN-NEXT: ret
%a = insertelement <8 x half> poison, half %y, i32 0
%b = shufflevector <8 x half> %a, <8 x half> poison, <8 x i32> zeroinitializer
store <8 x half> %b, ptr %x
@@ -72,13 +81,21 @@ define void @splat_16f16(ptr %x, half %y) {
; RV64-ZVFH-NEXT: vse16.v v8, (a0)
; RV64-ZVFH-NEXT: ret
;
-; RV64-ZVFHMIN-LABEL: splat_16f16:
-; RV64-ZVFHMIN: # %bb.0:
-; RV64-ZVFHMIN-NEXT: fmv.x.w a1, fa0
-; RV64-ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; RV64-ZVFHMIN-NEXT: vmv.v.x v8, a1
-; RV64-ZVFHMIN-NEXT: vse16.v v8, (a0)
-; RV64-ZVFHMIN-NEXT: ret
+; RV64-ZVFHMIN-NOZFHMIN-LABEL: splat_16f16:
+; RV64-ZVFHMIN-NOZFHMIN: # %bb.0:
+; RV64-ZVFHMIN-NOZFHMIN-NEXT: fmv.x.w a1, fa0
+; RV64-ZVFHMIN-NOZFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; RV64-ZVFHMIN-NOZFHMIN-NEXT: vmv.v.x v8, a1
+; RV64-ZVFHMIN-NOZFHMIN-NEXT: vse16.v v8, (a0)
+; RV64-ZVFHMIN-NOZFHMIN-NEXT: ret
+;
+; RV64_ZVFHMIN-ZFHMIN-LABEL: splat_16f16:
+; RV64_ZVFHMIN-ZFHMIN: # %bb.0:
+; RV64_ZVFHMIN-ZFHMIN-NEXT: fmv.x.h a1, fa0
+; RV64_ZVFHMIN-ZFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; RV64_ZVFHMIN-ZFHMIN-NEXT: vmv.v.x v8, a1
+; RV64_ZVFHMIN-ZFHMIN-NEXT: vse16.v v8, (a0)
+; RV64_ZVFHMIN-ZFHMIN-NEXT: ret
%a = insertelement <16 x half> poison, half %y, i32 0
%b = shufflevector <16 x half> %a, <16 x half> poison, <16 x i32> zeroinitializer
store <16 x half> %b, ptr %x
@@ -111,6 +128,46 @@ define void @splat_v4f64(ptr %x, double %y) {
ret void
}
+define void @splat_64f16(ptr %x, half %y) {
+; CHECK-RV32-LABEL: splat_64f16:
+; CHECK-RV32: # %bb.0:
+; CHECK-RV32-NEXT: li a1, 64
+; CHECK-RV32-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-RV32-NEXT: vfmv.v.f v8, fa0
+; CHECK-RV32-NEXT: vse16.v v8, (a0)
+; CHECK-RV32-NEXT: ret
+;
+; RV64-ZVFH-LABEL: splat_64f16:
+; RV64-ZVFH: # %bb.0:
+; RV64-ZVFH-NEXT: li a1, 64
+; RV64-ZVFH-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; RV64-ZVFH-NEXT: vfmv.v.f v8, fa0
+; RV64-ZVFH-NEXT: vse16.v v8, (a0)
+; RV64-ZVFH-NEXT: ret
+;
+; RV64-ZVFHMIN-NOZFHMIN-LABEL: splat_64f16:
+; RV64-ZVFHMIN-NOZFHMIN: # %bb.0:
+; RV64-ZVFHMIN-NOZFHMIN-NEXT: fmv.x.w a1, fa0
+; RV64-ZVFHMIN-NOZFHMIN-NEXT: li a2, 64
+; RV64-ZVFHMIN-NOZFHMIN-NEXT: vsetvli zero, a2, e16, m8, ta, ma
+; RV64-ZVFHMIN-NOZFHMIN-NEXT: vmv.v.x v8, a1
+; RV64-ZVFHMIN-NOZFHMIN-NEXT: vse16.v v8, (a0)
+; RV64-ZVFHMIN-NOZFHMIN-NEXT: ret
+;
+; RV64_ZVFHMIN-ZFHMIN-LABEL: splat_64f16:
+; RV64_ZVFHMIN-ZFHMIN: # %bb.0:
+; RV64_ZVFHMIN-ZFHMIN-NEXT: fmv.x.h a1, fa0
+; RV64_ZVFHMIN-ZFHMIN-NEXT: li a2, 64
+; RV64_ZVFHMIN-ZFHMIN-NEXT: vsetvli zero, a2, e16, m8, ta, ma
+; RV64_ZVFHMIN-ZFHMIN-NEXT: vmv.v.x v8, a1
+; RV64_ZVFHMIN-ZFHMIN-NEXT: vse16.v v8, (a0)
+; RV64_ZVFHMIN-ZFHMIN-NEXT: ret
+ %a = insertelement <64 x half> poison, half %y, i32 0
+ %b = shufflevector <64 x half> %a, <64 x half> poison, <64 x i32> zeroinitializer
+ store <64 x half> %b, ptr %x
+ ret void
+}
+
define void @splat_zero_v8f16(ptr %x) {
; CHECK-LABEL: splat_zero_v8f16:
; CHECK: # %bb.0:
@@ -268,3 +325,5 @@ define void @splat_negzero_v4f64(ptr %x) {
store <4 x double> splat (double -0.0), ptr %x
ret void
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; RV64-ZVFHMIN: {{.*}}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
index fb9c0a57fd1bee..0874e23d9a5463 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
@@ -685,129 +685,146 @@ define void @fabs_v6f16(ptr %x) {
;
; ZVFHMIN-ZFHIN-RV32-LABEL: fabs_v6f16:
; ZVFHMIN-ZFHIN-RV32: # %bb.0:
-; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, -64
-; ZVFHMIN-ZFHIN-RV32-NEXT: .cfi_def_cfa_offset 64
-; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, -48
+; ZVFHMIN-ZFHIN-RV32-NEXT: .cfi_def_cfa_offset 48
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, mu
; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v8, (a0)
; ZVFHMIN-ZFHIN-RV32-NEXT: mv a1, sp
; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a1)
-; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 10(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 36(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 8(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 32(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 6(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 28(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 4(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 24(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 2(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 20(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 28(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 0(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 16(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 37(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 127
-; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 37(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 33(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 127
-; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 4(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 36(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 44(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 40(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 29(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 127
; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 29(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 25(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 127
; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 25(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 21(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 37(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 37(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 45(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 127
-; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 21(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 17(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 45(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 41(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 127
-; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 17(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 36(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 58(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 32(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 41(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 28(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 54(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa4, 24(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa4, 52(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa3, 20(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa3, 50(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa2, 16(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa2, 48(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: addi a1, sp, 48
-; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v8, (a1)
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 46(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa4, 44(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa3, 42(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa2, 40(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: addi a1, sp, 40
-; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
-; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v9, (a1)
-; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v9, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa3, 36(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa4
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa4, 44(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa3
+; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v8, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa4
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a6, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v9, a6
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.i v0, 15
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v8, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslidedown.vi v8, v9, 2
; ZVFHMIN-ZFHIN-RV32-NEXT: addi a0, a0, 8
-; ZVFHMIN-ZFHIN-RV32-NEXT: vslidedown.vi v8, v8, 2
; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; ZVFHMIN-ZFHIN-RV32-NEXT: vse32.v v8, (a0)
-; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, 64
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, 48
; ZVFHMIN-ZFHIN-RV32-NEXT: ret
;
; ZVFHMIN-ZFHIN-RV64-LABEL: fabs_v6f16:
; ZVFHMIN-ZFHIN-RV64: # %bb.0:
-; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, -80
-; ZVFHMIN-ZFHIN-RV64-NEXT: .cfi_def_cfa_offset 80
-; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, -64
+; ZVFHMIN-ZFHIN-RV64-NEXT: .cfi_def_cfa_offset 64
+; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, mu
; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v8, (a0)
; ZVFHMIN-ZFHIN-RV64-NEXT: mv a1, sp
; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a1)
-; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 10(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 56(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 8(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 48(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 6(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 40(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 4(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 32(sp)
; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 2(sp)
; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 24(sp)
; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 0(sp)
; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 16(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 57(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 127
-; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 57(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 4(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 25(sp)
; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 127
-; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 49(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 17(sp)
; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 127
-; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 17(sp)
; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 33(sp)
; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 127
; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 33(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 41(sp)
; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 127
-; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 25(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 17(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 57(sp)
; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 127
-; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 17(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 56(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 74(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 48(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 72(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 40(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 70(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 32(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 68(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 49(sp)
; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 24(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 66(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 16(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 64(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: addi a1, sp, 64
-; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa4, 16(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa3, 40(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a2, fa4
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa4, 56(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a4, fa3
+; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v8, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a2, fa4
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v9, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.i v0, 15
+; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t
; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-ZFHIN-RV64-NEXT: vse64.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT: vse64.v v9, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT: vslidedown.vi v8, v9, 2
; ZVFHMIN-ZFHIN-RV64-NEXT: addi a0, a0, 8
-; ZVFHMIN-ZFHIN-RV64-NEXT: vslidedown.vi v8, v8, 2
; ZVFHMIN-ZFHIN-RV64-NEXT: vse32.v v8, (a0)
-; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, 80
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, 64
; ZVFHMIN-ZFHIN-RV64-NEXT: ret
%a = load <6 x half>, ptr %x
%b = call <6 x half> @llvm.fabs.v6f16(<6 x half> %a)
@@ -995,16 +1012,12 @@ define void @copysign_vf_v8f16(ptr %x, half %y) {
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v9, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
-; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
+; ZVFHMIN-NEXT: vmv.v.x v9, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8
+; ZVFHMIN-NEXT: vfsgnj.vv v8, v10, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
; ZVFHMIN-NEXT: vse16.v v9, (a0)
@@ -1030,16 +1043,16 @@ define void @copysign_vf_v6f16(ptr %x, half %y) {
; ZVFHMIN-RV32: # %bb.0:
; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-RV32-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMIN-RV32-NEXT: vfmv.v.f v9, fa5
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v10, v9
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-RV32-NEXT: li a2, 192
+; ZVFHMIN-RV32-NEXT: vmv.s.x v0, a2
+; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa0
+; ZVFHMIN-RV32-NEXT: vmv.v.x v9, a2
+; ZVFHMIN-RV32-NEXT: vmerge.vxm v9, v9, a1, v0
+; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v10, v9
; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v10
; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV32-NEXT: vfsgnj.vv v8, v9, v8
+; ZVFHMIN-RV32-NEXT: vfsgnj.vv v8, v9, v10
; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8
; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
@@ -1054,16 +1067,16 @@ define void @copysign_vf_v6f16(ptr %x, half %y) {
; ZVFHMIN-RV64: # %bb.0:
; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
; ZVFHMIN-RV64-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-RV64-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMIN-RV64-NEXT: vfmv.v.f v9, fa5
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v10, v9
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-RV64-NEXT: li a2, 192
+; ZVFHMIN-RV64-NEXT: vmv.s.x v0, a2
+; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa0
+; ZVFHMIN-RV64-NEXT: vmv.v.x v9, a2
+; ZVFHMIN-RV64-NEXT: vmerge.vxm v9, v9, a1, v0
+; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v10, v9
; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v10
; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV64-NEXT: vfsgnj.vv v8, v9, v8
+; ZVFHMIN-RV64-NEXT: vfsgnj.vv v8, v9, v10
; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8
; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
@@ -1352,10 +1365,6 @@ define void @copysign_neg_trunc_v3f16_v3f32(ptr %x, ptr %y) {
; ZVFHMIN-RV64-NEXT: .cfi_def_cfa_offset 16
; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; ZVFHMIN-RV64-NEXT: vle64.v v8, (a0)
-; ZVFHMIN-RV64-NEXT: mv a2, sp
-; ZVFHMIN-RV64-NEXT: vse64.v v8, (a2)
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMIN-RV64-NEXT: vle16.v v8, (a2)
; ZVFHMIN-RV64-NEXT: vsetivli zero, 3, e32, mf2, ta, ma
; ZVFHMIN-RV64-NEXT: vle32.v v9, (a1)
; ZVFHMIN-RV64-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
@@ -2320,16 +2329,12 @@ define void @fadd_vf_v8f16(ptr %x, half %y) {
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v9, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
-; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
+; ZVFHMIN-NEXT: vmv.v.x v9, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfadd.vv v8, v9, v8
+; ZVFHMIN-NEXT: vfadd.vv v8, v10, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
; ZVFHMIN-NEXT: vse16.v v9, (a0)
@@ -2355,16 +2360,16 @@ define void @fadd_vf_v6f16(ptr %x, half %y) {
; ZVFHMIN-RV32: # %bb.0:
; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-RV32-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMIN-RV32-NEXT: vfmv.v.f v9, fa5
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v10, v9
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-RV32-NEXT: li a2, 192
+; ZVFHMIN-RV32-NEXT: vmv.s.x v0, a2
+; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa0
+; ZVFHMIN-RV32-NEXT: vmv.v.x v9, a2
+; ZVFHMIN-RV32-NEXT: vmerge.vxm v9, v9, a1, v0
+; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v10, v9
; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v10
; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV32-NEXT: vfadd.vv v8, v9, v8
+; ZVFHMIN-RV32-NEXT: vfadd.vv v8, v9, v10
; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8
; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
@@ -2379,16 +2384,16 @@ define void @fadd_vf_v6f16(ptr %x, half %y) {
; ZVFHMIN-RV64: # %bb.0:
; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
; ZVFHMIN-RV64-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-RV64-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMIN-RV64-NEXT: vfmv.v.f v9, fa5
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v10, v9
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-RV64-NEXT: li a2, 192
+; ZVFHMIN-RV64-NEXT: vmv.s.x v0, a2
+; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa0
+; ZVFHMIN-RV64-NEXT: vmv.v.x v9, a2
+; ZVFHMIN-RV64-NEXT: vmerge.vxm v9, v9, a1, v0
+; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v10, v9
; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v10
; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV64-NEXT: vfadd.vv v8, v9, v8
+; ZVFHMIN-RV64-NEXT: vfadd.vv v8, v9, v10
; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8
; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
@@ -2458,16 +2463,12 @@ define void @fadd_fv_v8f16(ptr %x, half %y) {
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v9, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
-; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
+; ZVFHMIN-NEXT: vmv.v.x v9, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfadd.vv v8, v8, v9
+; ZVFHMIN-NEXT: vfadd.vv v8, v8, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
; ZVFHMIN-NEXT: vse16.v v9, (a0)
@@ -2493,16 +2494,16 @@ define void @fadd_fv_v6f16(ptr %x, half %y) {
; ZVFHMIN-RV32: # %bb.0:
; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-RV32-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMIN-RV32-NEXT: vfmv.v.f v9, fa5
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v10, v9
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-RV32-NEXT: li a2, 192
+; ZVFHMIN-RV32-NEXT: vmv.s.x v0, a2
+; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa0
+; ZVFHMIN-RV32-NEXT: vmv.v.x v9, a2
+; ZVFHMIN-RV32-NEXT: vmerge.vxm v9, v9, a1, v0
+; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v10, v9
; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v10
; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV32-NEXT: vfadd.vv v8, v8, v9
+; ZVFHMIN-RV32-NEXT: vfadd.vv v8, v10, v9
; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8
; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
@@ -2517,16 +2518,16 @@ define void @fadd_fv_v6f16(ptr %x, half %y) {
; ZVFHMIN-RV64: # %bb.0:
; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
; ZVFHMIN-RV64-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-RV64-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMIN-RV64-NEXT: vfmv.v.f v9, fa5
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v10, v9
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-RV64-NEXT: li a2, 192
+; ZVFHMIN-RV64-NEXT: vmv.s.x v0, a2
+; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa0
+; ZVFHMIN-RV64-NEXT: vmv.v.x v9, a2
+; ZVFHMIN-RV64-NEXT: vmerge.vxm v9, v9, a1, v0
+; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v10, v9
; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v10
; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV64-NEXT: vfadd.vv v8, v8, v9
+; ZVFHMIN-RV64-NEXT: vfadd.vv v8, v10, v9
; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8
; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
@@ -2596,16 +2597,12 @@ define void @fsub_vf_v8f16(ptr %x, half %y) {
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v9, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
-; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
+; ZVFHMIN-NEXT: vmv.v.x v9, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfsub.vv v8, v9, v8
+; ZVFHMIN-NEXT: vfsub.vv v8, v10, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
; ZVFHMIN-NEXT: vse16.v v9, (a0)
@@ -2631,16 +2628,16 @@ define void @fsub_vf_v6f16(ptr %x, half %y) {
; ZVFHMIN-RV32: # %bb.0:
; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-RV32-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMIN-RV32-NEXT: vfmv.v.f v9, fa5
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v10, v9
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-RV32-NEXT: li a2, 192
+; ZVFHMIN-RV32-NEXT: vmv.s.x v0, a2
+; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa0
+; ZVFHMIN-RV32-NEXT: vmv.v.x v9, a2
+; ZVFHMIN-RV32-NEXT: vmerge.vxm v9, v9, a1, v0
+; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v10, v9
; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v10
; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV32-NEXT: vfsub.vv v8, v9, v8
+; ZVFHMIN-RV32-NEXT: vfsub.vv v8, v9, v10
; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8
; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
@@ -2655,16 +2652,16 @@ define void @fsub_vf_v6f16(ptr %x, half %y) {
; ZVFHMIN-RV64: # %bb.0:
; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
; ZVFHMIN-RV64-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-RV64-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMIN-RV64-NEXT: vfmv.v.f v9, fa5
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v10, v9
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-RV64-NEXT: li a2, 192
+; ZVFHMIN-RV64-NEXT: vmv.s.x v0, a2
+; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa0
+; ZVFHMIN-RV64-NEXT: vmv.v.x v9, a2
+; ZVFHMIN-RV64-NEXT: vmerge.vxm v9, v9, a1, v0
+; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v10, v9
; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v10
; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV64-NEXT: vfsub.vv v8, v9, v8
+; ZVFHMIN-RV64-NEXT: vfsub.vv v8, v9, v10
; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8
; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
@@ -2734,16 +2731,12 @@ define void @fsub_fv_v8f16(ptr %x, half %y) {
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v9, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
-; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
+; ZVFHMIN-NEXT: vmv.v.x v9, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfsub.vv v8, v8, v9
+; ZVFHMIN-NEXT: vfsub.vv v8, v8, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
; ZVFHMIN-NEXT: vse16.v v9, (a0)
@@ -2769,16 +2762,16 @@ define void @fsub_fv_v6f16(ptr %x, half %y) {
; ZVFHMIN-RV32: # %bb.0:
; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-RV32-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMIN-RV32-NEXT: vfmv.v.f v9, fa5
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v10, v9
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-RV32-NEXT: li a2, 192
+; ZVFHMIN-RV32-NEXT: vmv.s.x v0, a2
+; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa0
+; ZVFHMIN-RV32-NEXT: vmv.v.x v9, a2
+; ZVFHMIN-RV32-NEXT: vmerge.vxm v9, v9, a1, v0
+; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v10, v9
; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v10
; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV32-NEXT: vfsub.vv v8, v8, v9
+; ZVFHMIN-RV32-NEXT: vfsub.vv v8, v10, v9
; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8
; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
@@ -2793,16 +2786,16 @@ define void @fsub_fv_v6f16(ptr %x, half %y) {
; ZVFHMIN-RV64: # %bb.0:
; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
; ZVFHMIN-RV64-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-RV64-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMIN-RV64-NEXT: vfmv.v.f v9, fa5
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v10, v9
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-RV64-NEXT: li a2, 192
+; ZVFHMIN-RV64-NEXT: vmv.s.x v0, a2
+; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa0
+; ZVFHMIN-RV64-NEXT: vmv.v.x v9, a2
+; ZVFHMIN-RV64-NEXT: vmerge.vxm v9, v9, a1, v0
+; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v10, v9
; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v10
; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV64-NEXT: vfsub.vv v8, v8, v9
+; ZVFHMIN-RV64-NEXT: vfsub.vv v8, v10, v9
; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8
; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
@@ -2872,16 +2865,12 @@ define void @fmul_vf_v8f16(ptr %x, half %y) {
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v9, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
-; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
+; ZVFHMIN-NEXT: vmv.v.x v9, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfmul.vv v8, v9, v8
+; ZVFHMIN-NEXT: vfmul.vv v8, v10, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
; ZVFHMIN-NEXT: vse16.v v9, (a0)
@@ -2907,16 +2896,16 @@ define void @fmul_vf_v6f16(ptr %x, half %y) {
; ZVFHMIN-RV32: # %bb.0:
; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-RV32-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMIN-RV32-NEXT: vfmv.v.f v9, fa5
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v10, v9
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-RV32-NEXT: li a2, 192
+; ZVFHMIN-RV32-NEXT: vmv.s.x v0, a2
+; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa0
+; ZVFHMIN-RV32-NEXT: vmv.v.x v9, a2
+; ZVFHMIN-RV32-NEXT: vmerge.vxm v9, v9, a1, v0
+; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v10, v9
; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v10
; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV32-NEXT: vfmul.vv v8, v9, v8
+; ZVFHMIN-RV32-NEXT: vfmul.vv v8, v9, v10
; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8
; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
@@ -2931,16 +2920,16 @@ define void @fmul_vf_v6f16(ptr %x, half %y) {
; ZVFHMIN-RV64: # %bb.0:
; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
; ZVFHMIN-RV64-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-RV64-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMIN-RV64-NEXT: vfmv.v.f v9, fa5
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v10, v9
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-RV64-NEXT: li a2, 192
+; ZVFHMIN-RV64-NEXT: vmv.s.x v0, a2
+; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa0
+; ZVFHMIN-RV64-NEXT: vmv.v.x v9, a2
+; ZVFHMIN-RV64-NEXT: vmerge.vxm v9, v9, a1, v0
+; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v10, v9
; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v10
; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV64-NEXT: vfmul.vv v8, v9, v8
+; ZVFHMIN-RV64-NEXT: vfmul.vv v8, v9, v10
; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8
; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
@@ -3010,16 +2999,12 @@ define void @fmul_fv_v8f16(ptr %x, half %y) {
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v9, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
-; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
+; ZVFHMIN-NEXT: vmv.v.x v9, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfmul.vv v8, v8, v9
+; ZVFHMIN-NEXT: vfmul.vv v8, v8, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
; ZVFHMIN-NEXT: vse16.v v9, (a0)
@@ -3045,16 +3030,16 @@ define void @fmul_fv_v6f16(ptr %x, half %y) {
; ZVFHMIN-RV32: # %bb.0:
; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-RV32-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMIN-RV32-NEXT: vfmv.v.f v9, fa5
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v10, v9
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-RV32-NEXT: li a2, 192
+; ZVFHMIN-RV32-NEXT: vmv.s.x v0, a2
+; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa0
+; ZVFHMIN-RV32-NEXT: vmv.v.x v9, a2
+; ZVFHMIN-RV32-NEXT: vmerge.vxm v9, v9, a1, v0
+; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v10, v9
; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v10
; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV32-NEXT: vfmul.vv v8, v8, v9
+; ZVFHMIN-RV32-NEXT: vfmul.vv v8, v10, v9
; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8
; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
@@ -3069,16 +3054,16 @@ define void @fmul_fv_v6f16(ptr %x, half %y) {
; ZVFHMIN-RV64: # %bb.0:
; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
; ZVFHMIN-RV64-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-RV64-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMIN-RV64-NEXT: vfmv.v.f v9, fa5
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v10, v9
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-RV64-NEXT: li a2, 192
+; ZVFHMIN-RV64-NEXT: vmv.s.x v0, a2
+; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa0
+; ZVFHMIN-RV64-NEXT: vmv.v.x v9, a2
+; ZVFHMIN-RV64-NEXT: vmerge.vxm v9, v9, a1, v0
+; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v10, v9
; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v10
; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV64-NEXT: vfmul.vv v8, v8, v9
+; ZVFHMIN-RV64-NEXT: vfmul.vv v8, v10, v9
; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8
; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
@@ -3148,16 +3133,12 @@ define void @fdiv_vf_v8f16(ptr %x, half %y) {
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v9, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
-; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
+; ZVFHMIN-NEXT: vmv.v.x v9, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfdiv.vv v8, v9, v8
+; ZVFHMIN-NEXT: vfdiv.vv v8, v10, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
; ZVFHMIN-NEXT: vse16.v v9, (a0)
@@ -3183,16 +3164,16 @@ define void @fdiv_vf_v6f16(ptr %x, half %y) {
; ZVFHMIN-RV32: # %bb.0:
; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-RV32-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMIN-RV32-NEXT: vfmv.v.f v9, fa5
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v10, v9
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-RV32-NEXT: li a2, 192
+; ZVFHMIN-RV32-NEXT: vmv.s.x v0, a2
+; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa0
+; ZVFHMIN-RV32-NEXT: vmv.v.x v9, a2
+; ZVFHMIN-RV32-NEXT: vmerge.vxm v9, v9, a1, v0
+; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v10, v9
; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v10
; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV32-NEXT: vfdiv.vv v8, v9, v8
+; ZVFHMIN-RV32-NEXT: vfdiv.vv v8, v9, v10
; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8
; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
@@ -3207,16 +3188,16 @@ define void @fdiv_vf_v6f16(ptr %x, half %y) {
; ZVFHMIN-RV64: # %bb.0:
; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
; ZVFHMIN-RV64-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-RV64-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMIN-RV64-NEXT: vfmv.v.f v9, fa5
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v10, v9
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-RV64-NEXT: li a2, 192
+; ZVFHMIN-RV64-NEXT: vmv.s.x v0, a2
+; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa0
+; ZVFHMIN-RV64-NEXT: vmv.v.x v9, a2
+; ZVFHMIN-RV64-NEXT: vmerge.vxm v9, v9, a1, v0
+; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v10, v9
; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v10
; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV64-NEXT: vfdiv.vv v8, v9, v8
+; ZVFHMIN-RV64-NEXT: vfdiv.vv v8, v9, v10
; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8
; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
@@ -3286,16 +3267,12 @@ define void @fdiv_fv_v8f16(ptr %x, half %y) {
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v9, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
-; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
+; ZVFHMIN-NEXT: vmv.v.x v9, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfdiv.vv v8, v8, v9
+; ZVFHMIN-NEXT: vfdiv.vv v8, v8, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
; ZVFHMIN-NEXT: vse16.v v9, (a0)
@@ -3321,16 +3298,16 @@ define void @fdiv_fv_v6f16(ptr %x, half %y) {
; ZVFHMIN-RV32: # %bb.0:
; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-RV32-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMIN-RV32-NEXT: vfmv.v.f v9, fa5
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v10, v9
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-RV32-NEXT: li a2, 192
+; ZVFHMIN-RV32-NEXT: vmv.s.x v0, a2
+; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa0
+; ZVFHMIN-RV32-NEXT: vmv.v.x v9, a2
+; ZVFHMIN-RV32-NEXT: vmerge.vxm v9, v9, a1, v0
+; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v10, v9
; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v10
; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV32-NEXT: vfdiv.vv v8, v8, v9
+; ZVFHMIN-RV32-NEXT: vfdiv.vv v8, v10, v9
; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8
; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
@@ -3345,16 +3322,16 @@ define void @fdiv_fv_v6f16(ptr %x, half %y) {
; ZVFHMIN-RV64: # %bb.0:
; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
; ZVFHMIN-RV64-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-RV64-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMIN-RV64-NEXT: vfmv.v.f v9, fa5
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v10, v9
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-RV64-NEXT: li a2, 192
+; ZVFHMIN-RV64-NEXT: vmv.s.x v0, a2
+; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa0
+; ZVFHMIN-RV64-NEXT: vmv.v.x v9, a2
+; ZVFHMIN-RV64-NEXT: vmerge.vxm v9, v9, a1, v0
+; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v10, v9
; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v10
; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV64-NEXT: vfdiv.vv v8, v8, v9
+; ZVFHMIN-RV64-NEXT: vfdiv.vv v8, v10, v9
; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8
; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
@@ -3424,22 +3401,18 @@ define void @fma_vf_v8f16(ptr %x, ptr %y, half %z) {
; ZVFHMIN-LABEL: fma_vf_v8f16:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-NEXT: vle16.v v9, (a1)
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v10
-; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v11
+; ZVFHMIN-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-NEXT: vle16.v v9, (a0)
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
+; ZVFHMIN-NEXT: vmv.v.x v10, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v8, v9, v10
+; ZVFHMIN-NEXT: vfmadd.vv v9, v8, v11
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-NEXT: vse16.v v9, (a0)
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: vse16.v v8, (a0)
; ZVFHMIN-NEXT: ret
%a = load <8 x half>, ptr %x
%b = load <8 x half>, ptr %y
@@ -3465,25 +3438,25 @@ define void @fma_vf_v6f16(ptr %x, ptr %y, half %z) {
; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0)
; ZVFHMIN-RV32-NEXT: vle16.v v9, (a1)
-; ZVFHMIN-RV32-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMIN-RV32-NEXT: vfmv.v.f v10, fa5
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v11, v10
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-RV32-NEXT: li a2, 192
+; ZVFHMIN-RV32-NEXT: vmv.s.x v0, a2
+; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa0
+; ZVFHMIN-RV32-NEXT: vmv.v.x v10, a2
+; ZVFHMIN-RV32-NEXT: vmerge.vxm v10, v10, a1, v0
+; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v11, v10
; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v10, v9
; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v11
; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV32-NEXT: vfmadd.vv v8, v9, v10
+; ZVFHMIN-RV32-NEXT: vfmadd.vv v9, v11, v10
; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8
+; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vslidedown.vi v8, v9, 2
+; ZVFHMIN-RV32-NEXT: vslidedown.vi v9, v8, 2
; ZVFHMIN-RV32-NEXT: addi a1, a0, 8
-; ZVFHMIN-RV32-NEXT: vse32.v v8, (a1)
+; ZVFHMIN-RV32-NEXT: vse32.v v9, (a1)
; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0)
+; ZVFHMIN-RV32-NEXT: vse16.v v8, (a0)
; ZVFHMIN-RV32-NEXT: ret
;
; ZVFHMIN-RV64-LABEL: fma_vf_v6f16:
@@ -3491,22 +3464,22 @@ define void @fma_vf_v6f16(ptr %x, ptr %y, half %z) {
; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
; ZVFHMIN-RV64-NEXT: vle16.v v8, (a0)
; ZVFHMIN-RV64-NEXT: vle16.v v9, (a1)
-; ZVFHMIN-RV64-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMIN-RV64-NEXT: vfmv.v.f v10, fa5
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v11, v10
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-RV64-NEXT: li a2, 192
+; ZVFHMIN-RV64-NEXT: vmv.s.x v0, a2
+; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa0
+; ZVFHMIN-RV64-NEXT: vmv.v.x v10, a2
+; ZVFHMIN-RV64-NEXT: vmerge.vxm v10, v10, a1, v0
+; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v11, v10
; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v10, v9
; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v11
; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV64-NEXT: vfmadd.vv v8, v9, v10
+; ZVFHMIN-RV64-NEXT: vfmadd.vv v9, v11, v10
; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8
+; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vse64.v v9, (a0)
-; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v9, 2
+; ZVFHMIN-RV64-NEXT: vse64.v v8, (a0)
+; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v8, 2
; ZVFHMIN-RV64-NEXT: addi a0, a0, 8
; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0)
; ZVFHMIN-RV64-NEXT: ret
@@ -3577,22 +3550,18 @@ define void @fma_fv_v8f16(ptr %x, ptr %y, half %z) {
; ZVFHMIN-LABEL: fma_fv_v8f16:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-NEXT: vle16.v v9, (a1)
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v10
-; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v11
+; ZVFHMIN-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-NEXT: vle16.v v9, (a0)
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
+; ZVFHMIN-NEXT: vmv.v.x v10, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v8, v9, v10
+; ZVFHMIN-NEXT: vfmadd.vv v9, v8, v11
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-NEXT: vse16.v v9, (a0)
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: vse16.v v8, (a0)
; ZVFHMIN-NEXT: ret
%a = load <8 x half>, ptr %x
%b = load <8 x half>, ptr %y
@@ -3618,25 +3587,25 @@ define void @fma_fv_v6f16(ptr %x, ptr %y, half %z) {
; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0)
; ZVFHMIN-RV32-NEXT: vle16.v v9, (a1)
-; ZVFHMIN-RV32-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMIN-RV32-NEXT: vfmv.v.f v10, fa5
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v11, v10
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-RV32-NEXT: li a2, 192
+; ZVFHMIN-RV32-NEXT: vmv.s.x v0, a2
+; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa0
+; ZVFHMIN-RV32-NEXT: vmv.v.x v10, a2
+; ZVFHMIN-RV32-NEXT: vmerge.vxm v10, v10, a1, v0
+; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v11, v10
; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v10, v9
; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v11
; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV32-NEXT: vfmadd.vv v8, v9, v10
+; ZVFHMIN-RV32-NEXT: vfmadd.vv v9, v11, v10
; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8
+; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vslidedown.vi v8, v9, 2
+; ZVFHMIN-RV32-NEXT: vslidedown.vi v9, v8, 2
; ZVFHMIN-RV32-NEXT: addi a1, a0, 8
-; ZVFHMIN-RV32-NEXT: vse32.v v8, (a1)
+; ZVFHMIN-RV32-NEXT: vse32.v v9, (a1)
; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0)
+; ZVFHMIN-RV32-NEXT: vse16.v v8, (a0)
; ZVFHMIN-RV32-NEXT: ret
;
; ZVFHMIN-RV64-LABEL: fma_fv_v6f16:
@@ -3644,22 +3613,22 @@ define void @fma_fv_v6f16(ptr %x, ptr %y, half %z) {
; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
; ZVFHMIN-RV64-NEXT: vle16.v v8, (a0)
; ZVFHMIN-RV64-NEXT: vle16.v v9, (a1)
-; ZVFHMIN-RV64-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMIN-RV64-NEXT: vfmv.v.f v10, fa5
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v11, v10
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-RV64-NEXT: li a2, 192
+; ZVFHMIN-RV64-NEXT: vmv.s.x v0, a2
+; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa0
+; ZVFHMIN-RV64-NEXT: vmv.v.x v10, a2
+; ZVFHMIN-RV64-NEXT: vmerge.vxm v10, v10, a1, v0
+; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v11, v10
; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v10, v9
; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v11
; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV64-NEXT: vfmadd.vv v8, v9, v10
+; ZVFHMIN-RV64-NEXT: vfmadd.vv v9, v11, v10
; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8
+; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vse64.v v9, (a0)
-; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v9, 2
+; ZVFHMIN-RV64-NEXT: vse64.v v8, (a0)
+; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v8, 2
; ZVFHMIN-RV64-NEXT: addi a0, a0, 8
; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0)
; ZVFHMIN-RV64-NEXT: ret
@@ -3730,26 +3699,22 @@ define void @fmsub_vf_v8f16(ptr %x, ptr %y, half %z) {
; ZVFHMIN-LABEL: fmsub_vf_v8f16:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-NEXT: vle16.v v9, (a1)
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v10
-; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFHMIN-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-NEXT: vle16.v v9, (a0)
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
+; ZVFHMIN-NEXT: vmv.v.x v10, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v9, v10
+; ZVFHMIN-NEXT: vfneg.v v8, v11
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v11
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v10
+; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v11
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfmacc.vv v11, v9, v8
+; ZVFHMIN-NEXT: vfmacc.vv v10, v8, v9
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v11
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
; ZVFHMIN-NEXT: vse16.v v8, (a0)
; ZVFHMIN-NEXT: ret
%a = load <8 x half>, ptr %x
@@ -3777,30 +3742,30 @@ define void @fmsub_vf_v6f16(ptr %x, ptr %y, half %z) {
; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0)
; ZVFHMIN-RV32-NEXT: vle16.v v9, (a1)
-; ZVFHMIN-RV32-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMIN-RV32-NEXT: vfmv.v.f v10, fa5
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v11, v10
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFHMIN-RV32-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-RV32-NEXT: li a2, 192
+; ZVFHMIN-RV32-NEXT: vmv.s.x v0, a2
+; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa0
+; ZVFHMIN-RV32-NEXT: vmv.v.x v10, a2
+; ZVFHMIN-RV32-NEXT: vmerge.vxm v10, v10, a1, v0
+; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v11, v9
; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV32-NEXT: vfneg.v v9, v10
+; ZVFHMIN-RV32-NEXT: vfneg.v v9, v11
; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v10, v9
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v11, v9
+; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v10
+; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v10, v8
; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v11
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v11, v10
; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV32-NEXT: vfmacc.vv v11, v9, v8
+; ZVFHMIN-RV32-NEXT: vfmacc.vv v8, v9, v10
; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v8, v11
+; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8
; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vslidedown.vi v9, v8, 2
+; ZVFHMIN-RV32-NEXT: vslidedown.vi v8, v9, 2
; ZVFHMIN-RV32-NEXT: addi a1, a0, 8
-; ZVFHMIN-RV32-NEXT: vse32.v v9, (a1)
+; ZVFHMIN-RV32-NEXT: vse32.v v8, (a1)
; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMIN-RV32-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0)
; ZVFHMIN-RV32-NEXT: ret
;
; ZVFHMIN-RV64-LABEL: fmsub_vf_v6f16:
@@ -3808,27 +3773,27 @@ define void @fmsub_vf_v6f16(ptr %x, ptr %y, half %z) {
; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
; ZVFHMIN-RV64-NEXT: vle16.v v8, (a0)
; ZVFHMIN-RV64-NEXT: vle16.v v9, (a1)
-; ZVFHMIN-RV64-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMIN-RV64-NEXT: vfmv.v.f v10, fa5
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v11, v10
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFHMIN-RV64-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-RV64-NEXT: li a2, 192
+; ZVFHMIN-RV64-NEXT: vmv.s.x v0, a2
+; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa0
+; ZVFHMIN-RV64-NEXT: vmv.v.x v10, a2
+; ZVFHMIN-RV64-NEXT: vmerge.vxm v10, v10, a1, v0
+; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v11, v9
; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV64-NEXT: vfneg.v v9, v10
+; ZVFHMIN-RV64-NEXT: vfneg.v v9, v11
; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v10, v9
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v11, v9
+; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v10
+; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v10, v8
; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v11
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v11, v10
; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV64-NEXT: vfmacc.vv v11, v9, v8
+; ZVFHMIN-RV64-NEXT: vfmacc.vv v8, v9, v10
; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v8, v11
+; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8
; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vse64.v v8, (a0)
-; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v8, 2
+; ZVFHMIN-RV64-NEXT: vse64.v v9, (a0)
+; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v9, 2
; ZVFHMIN-RV64-NEXT: addi a0, a0, 8
; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0)
; ZVFHMIN-RV64-NEXT: ret
@@ -3956,13 +3921,13 @@ define void @trunc_v8f16(ptr %x) {
;
; ZVFHMIN-ZFH-RV32-LABEL: trunc_v8f16:
; ZVFHMIN-ZFH-RV32: # %bb.0:
-; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, -32
-; ZVFHMIN-ZFH-RV32-NEXT: .cfi_def_cfa_offset 32
+; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, -16
+; ZVFHMIN-ZFH-RV32-NEXT: .cfi_def_cfa_offset 16
; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v8, (a0)
; ZVFHMIN-ZFH-RV32-NEXT: mv a1, sp
; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v8, (a1)
-; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 14(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 2(sp)
; ZVFHMIN-ZFH-RV32-NEXT: lui a1, %hi(.LCPI115_0)
; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, %lo(.LCPI115_0)(a1)
; ZVFHMIN-ZFH-RV32-NEXT: fabs.h fa3, fa4
@@ -3973,92 +3938,101 @@ define void @trunc_v8f16(ptr %x) {
; ZVFHMIN-ZFH-RV32-NEXT: fcvt.h.w fa3, a1, rtz
; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa4, fa3, fa4
; ZVFHMIN-ZFH-RV32-NEXT: .LBB115_2:
-; ZVFHMIN-ZFH-RV32-NEXT: fsh fa4, 30(sp)
-; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 12(sp)
-; ZVFHMIN-ZFH-RV32-NEXT: fabs.h fa3, fa4
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa1, 0(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fabs.h fa3, fa1
; ZVFHMIN-ZFH-RV32-NEXT: flt.h a1, fa3, fa5
; ZVFHMIN-ZFH-RV32-NEXT: beqz a1, .LBB115_4
; ZVFHMIN-ZFH-RV32-NEXT: # %bb.3:
-; ZVFHMIN-ZFH-RV32-NEXT: fcvt.w.h a1, fa4, rtz
+; ZVFHMIN-ZFH-RV32-NEXT: fcvt.w.h a1, fa1, rtz
; ZVFHMIN-ZFH-RV32-NEXT: fcvt.h.w fa3, a1, rtz
-; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa4, fa3, fa4
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa1, fa3, fa1
; ZVFHMIN-ZFH-RV32-NEXT: .LBB115_4:
-; ZVFHMIN-ZFH-RV32-NEXT: fsh fa4, 28(sp)
-; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 10(sp)
-; ZVFHMIN-ZFH-RV32-NEXT: fabs.h fa3, fa4
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa2, 4(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fabs.h fa3, fa2
; ZVFHMIN-ZFH-RV32-NEXT: flt.h a1, fa3, fa5
; ZVFHMIN-ZFH-RV32-NEXT: beqz a1, .LBB115_6
; ZVFHMIN-ZFH-RV32-NEXT: # %bb.5:
-; ZVFHMIN-ZFH-RV32-NEXT: fcvt.w.h a1, fa4, rtz
+; ZVFHMIN-ZFH-RV32-NEXT: fcvt.w.h a1, fa2, rtz
; ZVFHMIN-ZFH-RV32-NEXT: fcvt.h.w fa3, a1, rtz
-; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa4, fa3, fa4
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa2, fa3, fa2
; ZVFHMIN-ZFH-RV32-NEXT: .LBB115_6:
-; ZVFHMIN-ZFH-RV32-NEXT: fsh fa4, 26(sp)
-; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 8(sp)
-; ZVFHMIN-ZFH-RV32-NEXT: fabs.h fa3, fa4
-; ZVFHMIN-ZFH-RV32-NEXT: flt.h a1, fa3, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa3, 6(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fabs.h fa0, fa3
+; ZVFHMIN-ZFH-RV32-NEXT: flt.h a1, fa0, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a2, fa1
; ZVFHMIN-ZFH-RV32-NEXT: beqz a1, .LBB115_8
; ZVFHMIN-ZFH-RV32-NEXT: # %bb.7:
-; ZVFHMIN-ZFH-RV32-NEXT: fcvt.w.h a1, fa4, rtz
-; ZVFHMIN-ZFH-RV32-NEXT: fcvt.h.w fa3, a1, rtz
-; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa4, fa3, fa4
+; ZVFHMIN-ZFH-RV32-NEXT: fcvt.w.h a1, fa3, rtz
+; ZVFHMIN-ZFH-RV32-NEXT: fcvt.h.w fa1, a1, rtz
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa3, fa1, fa3
; ZVFHMIN-ZFH-RV32-NEXT: .LBB115_8:
-; ZVFHMIN-ZFH-RV32-NEXT: fsh fa4, 24(sp)
-; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 6(sp)
-; ZVFHMIN-ZFH-RV32-NEXT: fabs.h fa3, fa4
-; ZVFHMIN-ZFH-RV32-NEXT: flt.h a1, fa3, fa5
-; ZVFHMIN-ZFH-RV32-NEXT: beqz a1, .LBB115_10
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa1, 10(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a1, fa4
+; ZVFHMIN-ZFH-RV32-NEXT: fabs.h fa4, fa1
+; ZVFHMIN-ZFH-RV32-NEXT: flt.h a3, fa4, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: vmv.v.x v8, a2
+; ZVFHMIN-ZFH-RV32-NEXT: beqz a3, .LBB115_10
; ZVFHMIN-ZFH-RV32-NEXT: # %bb.9:
-; ZVFHMIN-ZFH-RV32-NEXT: fcvt.w.h a1, fa4, rtz
-; ZVFHMIN-ZFH-RV32-NEXT: fcvt.h.w fa3, a1, rtz
-; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa4, fa3, fa4
+; ZVFHMIN-ZFH-RV32-NEXT: fcvt.w.h a2, fa1, rtz
+; ZVFHMIN-ZFH-RV32-NEXT: fcvt.h.w fa4, a2, rtz
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa1, fa4, fa1
; ZVFHMIN-ZFH-RV32-NEXT: .LBB115_10:
-; ZVFHMIN-ZFH-RV32-NEXT: fsh fa4, 22(sp)
-; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 4(sp)
-; ZVFHMIN-ZFH-RV32-NEXT: fabs.h fa3, fa4
-; ZVFHMIN-ZFH-RV32-NEXT: flt.h a1, fa3, fa5
-; ZVFHMIN-ZFH-RV32-NEXT: beqz a1, .LBB115_12
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 8(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a1
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a2, fa2
+; ZVFHMIN-ZFH-RV32-NEXT: fabs.h fa2, fa4
+; ZVFHMIN-ZFH-RV32-NEXT: flt.h a3, fa2, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a1, fa1
+; ZVFHMIN-ZFH-RV32-NEXT: beqz a3, .LBB115_12
; ZVFHMIN-ZFH-RV32-NEXT: # %bb.11:
-; ZVFHMIN-ZFH-RV32-NEXT: fcvt.w.h a1, fa4, rtz
-; ZVFHMIN-ZFH-RV32-NEXT: fcvt.h.w fa3, a1, rtz
-; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa4, fa3, fa4
+; ZVFHMIN-ZFH-RV32-NEXT: fcvt.w.h a3, fa4, rtz
+; ZVFHMIN-ZFH-RV32-NEXT: fcvt.h.w fa2, a3, rtz
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa4, fa2, fa4
; ZVFHMIN-ZFH-RV32-NEXT: .LBB115_12:
-; ZVFHMIN-ZFH-RV32-NEXT: fsh fa4, 20(sp)
-; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 2(sp)
-; ZVFHMIN-ZFH-RV32-NEXT: fabs.h fa3, fa4
-; ZVFHMIN-ZFH-RV32-NEXT: flt.h a1, fa3, fa5
-; ZVFHMIN-ZFH-RV32-NEXT: beqz a1, .LBB115_14
+; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a2
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa2, 12(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a2, fa3
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a3, fa4
+; ZVFHMIN-ZFH-RV32-NEXT: vmv.v.x v9, a3
+; ZVFHMIN-ZFH-RV32-NEXT: fabs.h fa4, fa2
+; ZVFHMIN-ZFH-RV32-NEXT: flt.h a3, fa4, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v9, v9, a1
+; ZVFHMIN-ZFH-RV32-NEXT: beqz a3, .LBB115_14
; ZVFHMIN-ZFH-RV32-NEXT: # %bb.13:
-; ZVFHMIN-ZFH-RV32-NEXT: fcvt.w.h a1, fa4, rtz
-; ZVFHMIN-ZFH-RV32-NEXT: fcvt.h.w fa3, a1, rtz
-; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa4, fa3, fa4
+; ZVFHMIN-ZFH-RV32-NEXT: fcvt.w.h a1, fa2, rtz
+; ZVFHMIN-ZFH-RV32-NEXT: fcvt.h.w fa4, a1, rtz
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa2, fa4, fa2
; ZVFHMIN-ZFH-RV32-NEXT: .LBB115_14:
-; ZVFHMIN-ZFH-RV32-NEXT: fsh fa4, 18(sp)
-; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 0(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 14(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a2
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a1, fa2
; ZVFHMIN-ZFH-RV32-NEXT: fabs.h fa3, fa4
-; ZVFHMIN-ZFH-RV32-NEXT: flt.h a1, fa3, fa5
-; ZVFHMIN-ZFH-RV32-NEXT: beqz a1, .LBB115_16
+; ZVFHMIN-ZFH-RV32-NEXT: flt.h a2, fa3, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v9, v9, a1
+; ZVFHMIN-ZFH-RV32-NEXT: beqz a2, .LBB115_16
; ZVFHMIN-ZFH-RV32-NEXT: # %bb.15:
; ZVFHMIN-ZFH-RV32-NEXT: fcvt.w.h a1, fa4, rtz
; ZVFHMIN-ZFH-RV32-NEXT: fcvt.h.w fa5, a1, rtz
; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa4, fa5, fa4
; ZVFHMIN-ZFH-RV32-NEXT: .LBB115_16:
-; ZVFHMIN-ZFH-RV32-NEXT: fsh fa4, 16(sp)
-; ZVFHMIN-ZFH-RV32-NEXT: addi a1, sp, 16
-; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v8, (a1)
-; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v8, (a0)
-; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, 32
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a1, fa4
+; ZVFHMIN-ZFH-RV32-NEXT: vmv.v.i v0, 15
+; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v9, v9, a1
+; ZVFHMIN-ZFH-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
+; ZVFHMIN-ZFH-RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t
+; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v9, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, 16
; ZVFHMIN-ZFH-RV32-NEXT: ret
;
; ZVFHMIN-ZFH-RV64-LABEL: trunc_v8f16:
; ZVFHMIN-ZFH-RV64: # %bb.0:
-; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, -32
-; ZVFHMIN-ZFH-RV64-NEXT: .cfi_def_cfa_offset 32
+; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, -16
+; ZVFHMIN-ZFH-RV64-NEXT: .cfi_def_cfa_offset 16
; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v8, (a0)
; ZVFHMIN-ZFH-RV64-NEXT: mv a1, sp
; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v8, (a1)
-; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 14(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 2(sp)
; ZVFHMIN-ZFH-RV64-NEXT: lui a1, %hi(.LCPI115_0)
; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, %lo(.LCPI115_0)(a1)
; ZVFHMIN-ZFH-RV64-NEXT: fabs.h fa3, fa4
@@ -4069,92 +4043,101 @@ define void @trunc_v8f16(ptr %x) {
; ZVFHMIN-ZFH-RV64-NEXT: fcvt.h.w fa3, a1, rtz
; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa4, fa3, fa4
; ZVFHMIN-ZFH-RV64-NEXT: .LBB115_2:
-; ZVFHMIN-ZFH-RV64-NEXT: fsh fa4, 30(sp)
-; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 12(sp)
-; ZVFHMIN-ZFH-RV64-NEXT: fabs.h fa3, fa4
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa1, 0(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fabs.h fa3, fa1
; ZVFHMIN-ZFH-RV64-NEXT: flt.h a1, fa3, fa5
; ZVFHMIN-ZFH-RV64-NEXT: beqz a1, .LBB115_4
; ZVFHMIN-ZFH-RV64-NEXT: # %bb.3:
-; ZVFHMIN-ZFH-RV64-NEXT: fcvt.w.h a1, fa4, rtz
+; ZVFHMIN-ZFH-RV64-NEXT: fcvt.w.h a1, fa1, rtz
; ZVFHMIN-ZFH-RV64-NEXT: fcvt.h.w fa3, a1, rtz
-; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa4, fa3, fa4
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa1, fa3, fa1
; ZVFHMIN-ZFH-RV64-NEXT: .LBB115_4:
-; ZVFHMIN-ZFH-RV64-NEXT: fsh fa4, 28(sp)
-; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 10(sp)
-; ZVFHMIN-ZFH-RV64-NEXT: fabs.h fa3, fa4
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa2, 4(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fabs.h fa3, fa2
; ZVFHMIN-ZFH-RV64-NEXT: flt.h a1, fa3, fa5
; ZVFHMIN-ZFH-RV64-NEXT: beqz a1, .LBB115_6
; ZVFHMIN-ZFH-RV64-NEXT: # %bb.5:
-; ZVFHMIN-ZFH-RV64-NEXT: fcvt.w.h a1, fa4, rtz
+; ZVFHMIN-ZFH-RV64-NEXT: fcvt.w.h a1, fa2, rtz
; ZVFHMIN-ZFH-RV64-NEXT: fcvt.h.w fa3, a1, rtz
-; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa4, fa3, fa4
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa2, fa3, fa2
; ZVFHMIN-ZFH-RV64-NEXT: .LBB115_6:
-; ZVFHMIN-ZFH-RV64-NEXT: fsh fa4, 26(sp)
-; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 8(sp)
-; ZVFHMIN-ZFH-RV64-NEXT: fabs.h fa3, fa4
-; ZVFHMIN-ZFH-RV64-NEXT: flt.h a1, fa3, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa3, 6(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fabs.h fa0, fa3
+; ZVFHMIN-ZFH-RV64-NEXT: flt.h a1, fa0, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a2, fa1
; ZVFHMIN-ZFH-RV64-NEXT: beqz a1, .LBB115_8
; ZVFHMIN-ZFH-RV64-NEXT: # %bb.7:
-; ZVFHMIN-ZFH-RV64-NEXT: fcvt.w.h a1, fa4, rtz
-; ZVFHMIN-ZFH-RV64-NEXT: fcvt.h.w fa3, a1, rtz
-; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa4, fa3, fa4
+; ZVFHMIN-ZFH-RV64-NEXT: fcvt.w.h a1, fa3, rtz
+; ZVFHMIN-ZFH-RV64-NEXT: fcvt.h.w fa1, a1, rtz
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa3, fa1, fa3
; ZVFHMIN-ZFH-RV64-NEXT: .LBB115_8:
-; ZVFHMIN-ZFH-RV64-NEXT: fsh fa4, 24(sp)
-; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 6(sp)
-; ZVFHMIN-ZFH-RV64-NEXT: fabs.h fa3, fa4
-; ZVFHMIN-ZFH-RV64-NEXT: flt.h a1, fa3, fa5
-; ZVFHMIN-ZFH-RV64-NEXT: beqz a1, .LBB115_10
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa1, 10(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a1, fa4
+; ZVFHMIN-ZFH-RV64-NEXT: fabs.h fa4, fa1
+; ZVFHMIN-ZFH-RV64-NEXT: flt.h a3, fa4, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: vmv.v.x v8, a2
+; ZVFHMIN-ZFH-RV64-NEXT: beqz a3, .LBB115_10
; ZVFHMIN-ZFH-RV64-NEXT: # %bb.9:
-; ZVFHMIN-ZFH-RV64-NEXT: fcvt.w.h a1, fa4, rtz
-; ZVFHMIN-ZFH-RV64-NEXT: fcvt.h.w fa3, a1, rtz
-; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa4, fa3, fa4
+; ZVFHMIN-ZFH-RV64-NEXT: fcvt.w.h a2, fa1, rtz
+; ZVFHMIN-ZFH-RV64-NEXT: fcvt.h.w fa4, a2, rtz
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa1, fa4, fa1
; ZVFHMIN-ZFH-RV64-NEXT: .LBB115_10:
-; ZVFHMIN-ZFH-RV64-NEXT: fsh fa4, 22(sp)
-; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 4(sp)
-; ZVFHMIN-ZFH-RV64-NEXT: fabs.h fa3, fa4
-; ZVFHMIN-ZFH-RV64-NEXT: flt.h a1, fa3, fa5
-; ZVFHMIN-ZFH-RV64-NEXT: beqz a1, .LBB115_12
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 8(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v8, v8, a1
+; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a2, fa2
+; ZVFHMIN-ZFH-RV64-NEXT: fabs.h fa2, fa4
+; ZVFHMIN-ZFH-RV64-NEXT: flt.h a3, fa2, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a1, fa1
+; ZVFHMIN-ZFH-RV64-NEXT: beqz a3, .LBB115_12
; ZVFHMIN-ZFH-RV64-NEXT: # %bb.11:
-; ZVFHMIN-ZFH-RV64-NEXT: fcvt.w.h a1, fa4, rtz
-; ZVFHMIN-ZFH-RV64-NEXT: fcvt.h.w fa3, a1, rtz
-; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa4, fa3, fa4
+; ZVFHMIN-ZFH-RV64-NEXT: fcvt.w.h a3, fa4, rtz
+; ZVFHMIN-ZFH-RV64-NEXT: fcvt.h.w fa2, a3, rtz
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa4, fa2, fa4
; ZVFHMIN-ZFH-RV64-NEXT: .LBB115_12:
-; ZVFHMIN-ZFH-RV64-NEXT: fsh fa4, 20(sp)
-; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 2(sp)
-; ZVFHMIN-ZFH-RV64-NEXT: fabs.h fa3, fa4
-; ZVFHMIN-ZFH-RV64-NEXT: flt.h a1, fa3, fa5
-; ZVFHMIN-ZFH-RV64-NEXT: beqz a1, .LBB115_14
+; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v8, v8, a2
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa2, 12(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a2, fa3
+; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a3, fa4
+; ZVFHMIN-ZFH-RV64-NEXT: vmv.v.x v9, a3
+; ZVFHMIN-ZFH-RV64-NEXT: fabs.h fa4, fa2
+; ZVFHMIN-ZFH-RV64-NEXT: flt.h a3, fa4, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v9, v9, a1
+; ZVFHMIN-ZFH-RV64-NEXT: beqz a3, .LBB115_14
; ZVFHMIN-ZFH-RV64-NEXT: # %bb.13:
-; ZVFHMIN-ZFH-RV64-NEXT: fcvt.w.h a1, fa4, rtz
-; ZVFHMIN-ZFH-RV64-NEXT: fcvt.h.w fa3, a1, rtz
-; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa4, fa3, fa4
+; ZVFHMIN-ZFH-RV64-NEXT: fcvt.w.h a1, fa2, rtz
+; ZVFHMIN-ZFH-RV64-NEXT: fcvt.h.w fa4, a1, rtz
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa2, fa4, fa2
; ZVFHMIN-ZFH-RV64-NEXT: .LBB115_14:
-; ZVFHMIN-ZFH-RV64-NEXT: fsh fa4, 18(sp)
-; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 0(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 14(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v8, v8, a2
+; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a1, fa2
; ZVFHMIN-ZFH-RV64-NEXT: fabs.h fa3, fa4
-; ZVFHMIN-ZFH-RV64-NEXT: flt.h a1, fa3, fa5
-; ZVFHMIN-ZFH-RV64-NEXT: beqz a1, .LBB115_16
+; ZVFHMIN-ZFH-RV64-NEXT: flt.h a2, fa3, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v9, v9, a1
+; ZVFHMIN-ZFH-RV64-NEXT: beqz a2, .LBB115_16
; ZVFHMIN-ZFH-RV64-NEXT: # %bb.15:
; ZVFHMIN-ZFH-RV64-NEXT: fcvt.w.h a1, fa4, rtz
; ZVFHMIN-ZFH-RV64-NEXT: fcvt.h.w fa5, a1, rtz
; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa4, fa5, fa4
; ZVFHMIN-ZFH-RV64-NEXT: .LBB115_16:
-; ZVFHMIN-ZFH-RV64-NEXT: fsh fa4, 16(sp)
-; ZVFHMIN-ZFH-RV64-NEXT: addi a1, sp, 16
-; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v8, (a1)
-; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v8, (a0)
-; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, 32
+; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a1, fa4
+; ZVFHMIN-ZFH-RV64-NEXT: vmv.v.i v0, 15
+; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v9, v9, a1
+; ZVFHMIN-ZFH-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
+; ZVFHMIN-ZFH-RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t
+; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v9, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, 16
; ZVFHMIN-ZFH-RV64-NEXT: ret
;
; ZVFHMIN-ZFHIN-RV32-LABEL: trunc_v8f16:
; ZVFHMIN-ZFHIN-RV32: # %bb.0:
-; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, -32
-; ZVFHMIN-ZFHIN-RV32-NEXT: .cfi_def_cfa_offset 32
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, -16
+; ZVFHMIN-ZFHIN-RV32-NEXT: .cfi_def_cfa_offset 16
; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v8, (a0)
; ZVFHMIN-ZFHIN-RV32-NEXT: mv a1, sp
; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a1)
-; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 2(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.h fa4, fa5
; ZVFHMIN-ZFHIN-RV32-NEXT: lui a1, 307200
; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.w.x fa5, a1
@@ -4166,107 +4149,116 @@ define void @trunc_v8f16(ptr %x) {
; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.w fa3, a1, rtz
; ZVFHMIN-ZFHIN-RV32-NEXT: fsgnj.s fa4, fa3, fa4
; ZVFHMIN-ZFHIN-RV32-NEXT: .LBB115_2:
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.h.s fa4, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa4, 30(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa4, 12(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.h fa4, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT: fabs.s fa3, fa4
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa3, 0(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.h fa2, fa3
+; ZVFHMIN-ZFHIN-RV32-NEXT: fabs.s fa3, fa2
; ZVFHMIN-ZFHIN-RV32-NEXT: flt.s a1, fa3, fa5
; ZVFHMIN-ZFHIN-RV32-NEXT: beqz a1, .LBB115_4
; ZVFHMIN-ZFHIN-RV32-NEXT: # %bb.3:
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.w.s a1, fa4, rtz
+; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.w.s a1, fa2, rtz
; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.w fa3, a1, rtz
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsgnj.s fa4, fa3, fa4
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsgnj.s fa2, fa3, fa2
; ZVFHMIN-ZFHIN-RV32-NEXT: .LBB115_4:
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.h.s fa4, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa4, 28(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa4, 10(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.h fa4, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT: fabs.s fa3, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT: flt.s a1, fa3, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa3, 4(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.h fa3, fa3
+; ZVFHMIN-ZFHIN-RV32-NEXT: fabs.s fa1, fa3
+; ZVFHMIN-ZFHIN-RV32-NEXT: flt.s a1, fa1, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.h.s fa1, fa2
; ZVFHMIN-ZFHIN-RV32-NEXT: beqz a1, .LBB115_6
; ZVFHMIN-ZFHIN-RV32-NEXT: # %bb.5:
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.w.s a1, fa4, rtz
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.w fa3, a1, rtz
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsgnj.s fa4, fa3, fa4
+; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.w.s a1, fa3, rtz
+; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.w fa2, a1, rtz
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsgnj.s fa3, fa2, fa3
; ZVFHMIN-ZFHIN-RV32-NEXT: .LBB115_6:
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.h.s fa4, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa4, 26(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa4, 8(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.h fa4, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT: fabs.s fa3, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT: flt.s a1, fa3, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa0, 6(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.h.s fa2, fa4
+; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.h fa4, fa0
+; ZVFHMIN-ZFHIN-RV32-NEXT: fabs.s fa0, fa4
+; ZVFHMIN-ZFHIN-RV32-NEXT: flt.s a1, fa0, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa1
; ZVFHMIN-ZFHIN-RV32-NEXT: beqz a1, .LBB115_8
; ZVFHMIN-ZFHIN-RV32-NEXT: # %bb.7:
; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.w.s a1, fa4, rtz
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.w fa3, a1, rtz
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsgnj.s fa4, fa3, fa4
+; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.w fa1, a1, rtz
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsgnj.s fa4, fa1, fa4
; ZVFHMIN-ZFHIN-RV32-NEXT: .LBB115_8:
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.h.s fa4, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa4, 24(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa4, 6(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.h fa4, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT: fabs.s fa3, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT: flt.s a1, fa3, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT: beqz a1, .LBB115_10
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa1, 10(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa2
+; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v8, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.h fa2, fa1
+; ZVFHMIN-ZFHIN-RV32-NEXT: fabs.s fa1, fa2
+; ZVFHMIN-ZFHIN-RV32-NEXT: flt.s a2, fa1, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.h.s fa3, fa3
+; ZVFHMIN-ZFHIN-RV32-NEXT: beqz a2, .LBB115_10
; ZVFHMIN-ZFHIN-RV32-NEXT: # %bb.9:
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.w.s a1, fa4, rtz
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.w fa3, a1, rtz
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsgnj.s fa4, fa3, fa4
+; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.w.s a2, fa2, rtz
+; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.w fa1, a2, rtz
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsgnj.s fa2, fa1, fa2
; ZVFHMIN-ZFHIN-RV32-NEXT: .LBB115_10:
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.h.s fa4, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa4, 22(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa4, 4(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.h fa4, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT: fabs.s fa3, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT: flt.s a1, fa3, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT: beqz a1, .LBB115_12
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa1, 8(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa3
+; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.h.s fa3, fa4
+; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.h.s fa2, fa2
+; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.h fa4, fa1
+; ZVFHMIN-ZFHIN-RV32-NEXT: fabs.s fa1, fa4
+; ZVFHMIN-ZFHIN-RV32-NEXT: flt.s a3, fa1, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa2
+; ZVFHMIN-ZFHIN-RV32-NEXT: beqz a3, .LBB115_12
; ZVFHMIN-ZFHIN-RV32-NEXT: # %bb.11:
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.w.s a1, fa4, rtz
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.w fa3, a1, rtz
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsgnj.s fa4, fa3, fa4
+; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.w.s a3, fa4, rtz
+; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.w fa2, a3, rtz
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsgnj.s fa4, fa2, fa4
; ZVFHMIN-ZFHIN-RV32-NEXT: .LBB115_12:
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa3
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa3, 12(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.h.s fa4, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa4, 20(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa4, 2(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.h fa4, fa4
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa4
+; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v9, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.h fa4, fa3
; ZVFHMIN-ZFHIN-RV32-NEXT: fabs.s fa3, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT: flt.s a1, fa3, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT: beqz a1, .LBB115_14
+; ZVFHMIN-ZFHIN-RV32-NEXT: flt.s a3, fa3, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT: beqz a3, .LBB115_14
; ZVFHMIN-ZFHIN-RV32-NEXT: # %bb.13:
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.w.s a1, fa4, rtz
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.w fa3, a1, rtz
+; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.w.s a2, fa4, rtz
+; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.w fa3, a2, rtz
; ZVFHMIN-ZFHIN-RV32-NEXT: fsgnj.s fa4, fa3, fa4
; ZVFHMIN-ZFHIN-RV32-NEXT: .LBB115_14:
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa3, 14(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a1
; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.h.s fa4, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa4, 18(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa4, 0(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.h fa4, fa4
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa4
+; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.h fa4, fa3
; ZVFHMIN-ZFHIN-RV32-NEXT: fabs.s fa3, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT: flt.s a1, fa3, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT: beqz a1, .LBB115_16
+; ZVFHMIN-ZFHIN-RV32-NEXT: flt.s a2, fa3, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: beqz a2, .LBB115_16
; ZVFHMIN-ZFHIN-RV32-NEXT: # %bb.15:
; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.w.s a1, fa4, rtz
; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.w fa5, a1, rtz
; ZVFHMIN-ZFHIN-RV32-NEXT: fsgnj.s fa4, fa5, fa4
; ZVFHMIN-ZFHIN-RV32-NEXT: .LBB115_16:
; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.h.s fa5, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 16(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: addi a1, sp, 16
-; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v8, (a1)
-; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a0)
-; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, 32
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.i v0, 15
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t
+; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v9, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, 16
; ZVFHMIN-ZFHIN-RV32-NEXT: ret
;
; ZVFHMIN-ZFHIN-RV64-LABEL: trunc_v8f16:
; ZVFHMIN-ZFHIN-RV64: # %bb.0:
-; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, -32
-; ZVFHMIN-ZFHIN-RV64-NEXT: .cfi_def_cfa_offset 32
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, -16
+; ZVFHMIN-ZFHIN-RV64-NEXT: .cfi_def_cfa_offset 16
; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v8, (a0)
; ZVFHMIN-ZFHIN-RV64-NEXT: mv a1, sp
; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a1)
-; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 2(sp)
; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.h fa4, fa5
; ZVFHMIN-ZFHIN-RV64-NEXT: lui a1, 307200
; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.w.x fa5, a1
@@ -4278,96 +4270,105 @@ define void @trunc_v8f16(ptr %x) {
; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.w fa3, a1, rtz
; ZVFHMIN-ZFHIN-RV64-NEXT: fsgnj.s fa4, fa3, fa4
; ZVFHMIN-ZFHIN-RV64-NEXT: .LBB115_2:
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.h.s fa4, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa4, 30(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa4, 12(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.h fa4, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT: fabs.s fa3, fa4
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa3, 0(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.h fa2, fa3
+; ZVFHMIN-ZFHIN-RV64-NEXT: fabs.s fa3, fa2
; ZVFHMIN-ZFHIN-RV64-NEXT: flt.s a1, fa3, fa5
; ZVFHMIN-ZFHIN-RV64-NEXT: beqz a1, .LBB115_4
; ZVFHMIN-ZFHIN-RV64-NEXT: # %bb.3:
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.w.s a1, fa4, rtz
+; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.w.s a1, fa2, rtz
; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.w fa3, a1, rtz
-; ZVFHMIN-ZFHIN-RV64-NEXT: fsgnj.s fa4, fa3, fa4
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsgnj.s fa2, fa3, fa2
; ZVFHMIN-ZFHIN-RV64-NEXT: .LBB115_4:
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.h.s fa4, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa4, 28(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa4, 10(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.h fa4, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT: fabs.s fa3, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT: flt.s a1, fa3, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa3, 4(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.h fa3, fa3
+; ZVFHMIN-ZFHIN-RV64-NEXT: fabs.s fa1, fa3
+; ZVFHMIN-ZFHIN-RV64-NEXT: flt.s a1, fa1, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.h.s fa1, fa2
; ZVFHMIN-ZFHIN-RV64-NEXT: beqz a1, .LBB115_6
; ZVFHMIN-ZFHIN-RV64-NEXT: # %bb.5:
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.w.s a1, fa4, rtz
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.w fa3, a1, rtz
-; ZVFHMIN-ZFHIN-RV64-NEXT: fsgnj.s fa4, fa3, fa4
+; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.w.s a1, fa3, rtz
+; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.w fa2, a1, rtz
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsgnj.s fa3, fa2, fa3
; ZVFHMIN-ZFHIN-RV64-NEXT: .LBB115_6:
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.h.s fa4, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa4, 26(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa4, 8(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.h fa4, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT: fabs.s fa3, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT: flt.s a1, fa3, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa0, 6(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.h.s fa2, fa4
+; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.h fa4, fa0
+; ZVFHMIN-ZFHIN-RV64-NEXT: fabs.s fa0, fa4
+; ZVFHMIN-ZFHIN-RV64-NEXT: flt.s a1, fa0, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a2, fa1
; ZVFHMIN-ZFHIN-RV64-NEXT: beqz a1, .LBB115_8
; ZVFHMIN-ZFHIN-RV64-NEXT: # %bb.7:
; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.w.s a1, fa4, rtz
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.w fa3, a1, rtz
-; ZVFHMIN-ZFHIN-RV64-NEXT: fsgnj.s fa4, fa3, fa4
+; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.w fa1, a1, rtz
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsgnj.s fa4, fa1, fa4
; ZVFHMIN-ZFHIN-RV64-NEXT: .LBB115_8:
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.h.s fa4, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa4, 24(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa4, 6(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.h fa4, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT: fabs.s fa3, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT: flt.s a1, fa3, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT: beqz a1, .LBB115_10
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa1, 10(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa2
+; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v8, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.h fa2, fa1
+; ZVFHMIN-ZFHIN-RV64-NEXT: fabs.s fa1, fa2
+; ZVFHMIN-ZFHIN-RV64-NEXT: flt.s a2, fa1, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.h.s fa3, fa3
+; ZVFHMIN-ZFHIN-RV64-NEXT: beqz a2, .LBB115_10
; ZVFHMIN-ZFHIN-RV64-NEXT: # %bb.9:
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.w.s a1, fa4, rtz
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.w fa3, a1, rtz
-; ZVFHMIN-ZFHIN-RV64-NEXT: fsgnj.s fa4, fa3, fa4
+; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.w.s a2, fa2, rtz
+; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.w fa1, a2, rtz
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsgnj.s fa2, fa1, fa2
; ZVFHMIN-ZFHIN-RV64-NEXT: .LBB115_10:
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.h.s fa4, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa4, 22(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa4, 4(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.h fa4, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT: fabs.s fa3, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT: flt.s a1, fa3, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT: beqz a1, .LBB115_12
+; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa1, 8(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa3
+; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.h.s fa3, fa4
+; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.h.s fa2, fa2
+; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.h fa4, fa1
+; ZVFHMIN-ZFHIN-RV64-NEXT: fabs.s fa1, fa4
+; ZVFHMIN-ZFHIN-RV64-NEXT: flt.s a3, fa1, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a2, fa2
+; ZVFHMIN-ZFHIN-RV64-NEXT: beqz a3, .LBB115_12
; ZVFHMIN-ZFHIN-RV64-NEXT: # %bb.11:
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.w.s a1, fa4, rtz
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.w fa3, a1, rtz
-; ZVFHMIN-ZFHIN-RV64-NEXT: fsgnj.s fa4, fa3, fa4
+; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.w.s a3, fa4, rtz
+; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.w fa2, a3, rtz
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsgnj.s fa4, fa2, fa4
; ZVFHMIN-ZFHIN-RV64-NEXT: .LBB115_12:
+; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa3
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa3, 12(sp)
; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.h.s fa4, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa4, 20(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa4, 2(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.h fa4, fa4
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa4
+; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v9, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.h fa4, fa3
; ZVFHMIN-ZFHIN-RV64-NEXT: fabs.s fa3, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT: flt.s a1, fa3, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT: beqz a1, .LBB115_14
+; ZVFHMIN-ZFHIN-RV64-NEXT: flt.s a3, fa3, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT: beqz a3, .LBB115_14
; ZVFHMIN-ZFHIN-RV64-NEXT: # %bb.13:
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.w.s a1, fa4, rtz
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.w fa3, a1, rtz
+; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.w.s a2, fa4, rtz
+; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.w fa3, a2, rtz
; ZVFHMIN-ZFHIN-RV64-NEXT: fsgnj.s fa4, fa3, fa4
; ZVFHMIN-ZFHIN-RV64-NEXT: .LBB115_14:
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa3, 14(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a1
; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.h.s fa4, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa4, 18(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa4, 0(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.h fa4, fa4
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa4
+; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.h fa4, fa3
; ZVFHMIN-ZFHIN-RV64-NEXT: fabs.s fa3, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT: flt.s a1, fa3, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT: beqz a1, .LBB115_16
+; ZVFHMIN-ZFHIN-RV64-NEXT: flt.s a2, fa3, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: beqz a2, .LBB115_16
; ZVFHMIN-ZFHIN-RV64-NEXT: # %bb.15:
; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.w.s a1, fa4, rtz
; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.w fa5, a1, rtz
; ZVFHMIN-ZFHIN-RV64-NEXT: fsgnj.s fa4, fa5, fa4
; ZVFHMIN-ZFHIN-RV64-NEXT: .LBB115_16:
; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.h.s fa5, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 16(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: addi a1, sp, 16
-; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v8, (a1)
-; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a0)
-; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, 32
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.i v0, 15
+; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
+; ZVFHMIN-ZFHIN-RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t
+; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v9, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, 16
; ZVFHMIN-ZFHIN-RV64-NEXT: ret
%a = load <8 x half>, ptr %x
%b = call <8 x half> @llvm.trunc.v8f16(<8 x half> %a)
@@ -4395,13 +4396,13 @@ define void @trunc_v6f16(ptr %x) {
;
; ZVFHMIN-ZFH-RV32-LABEL: trunc_v6f16:
; ZVFHMIN-ZFH-RV32: # %bb.0:
-; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, -48
-; ZVFHMIN-ZFH-RV32-NEXT: .cfi_def_cfa_offset 48
+; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, -16
+; ZVFHMIN-ZFH-RV32-NEXT: .cfi_def_cfa_offset 16
; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v8, (a0)
; ZVFHMIN-ZFH-RV32-NEXT: mv a1, sp
; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v8, (a1)
-; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 14(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 2(sp)
; ZVFHMIN-ZFH-RV32-NEXT: lui a1, %hi(.LCPI116_0)
; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, %lo(.LCPI116_0)(a1)
; ZVFHMIN-ZFH-RV32-NEXT: fabs.h fa3, fa4
@@ -4412,103 +4413,111 @@ define void @trunc_v6f16(ptr %x) {
; ZVFHMIN-ZFH-RV32-NEXT: fcvt.h.w fa3, a1, rtz
; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa4, fa3, fa4
; ZVFHMIN-ZFH-RV32-NEXT: .LBB116_2:
-; ZVFHMIN-ZFH-RV32-NEXT: fsh fa4, 46(sp)
-; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 12(sp)
-; ZVFHMIN-ZFH-RV32-NEXT: fabs.h fa3, fa4
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa1, 0(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fabs.h fa3, fa1
; ZVFHMIN-ZFH-RV32-NEXT: flt.h a1, fa3, fa5
; ZVFHMIN-ZFH-RV32-NEXT: beqz a1, .LBB116_4
; ZVFHMIN-ZFH-RV32-NEXT: # %bb.3:
-; ZVFHMIN-ZFH-RV32-NEXT: fcvt.w.h a1, fa4, rtz
+; ZVFHMIN-ZFH-RV32-NEXT: fcvt.w.h a1, fa1, rtz
; ZVFHMIN-ZFH-RV32-NEXT: fcvt.h.w fa3, a1, rtz
-; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa4, fa3, fa4
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa1, fa3, fa1
; ZVFHMIN-ZFH-RV32-NEXT: .LBB116_4:
-; ZVFHMIN-ZFH-RV32-NEXT: fsh fa4, 44(sp)
-; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 10(sp)
-; ZVFHMIN-ZFH-RV32-NEXT: fabs.h fa3, fa4
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa2, 4(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fabs.h fa3, fa2
; ZVFHMIN-ZFH-RV32-NEXT: flt.h a1, fa3, fa5
; ZVFHMIN-ZFH-RV32-NEXT: beqz a1, .LBB116_6
; ZVFHMIN-ZFH-RV32-NEXT: # %bb.5:
-; ZVFHMIN-ZFH-RV32-NEXT: fcvt.w.h a1, fa4, rtz
+; ZVFHMIN-ZFH-RV32-NEXT: fcvt.w.h a1, fa2, rtz
; ZVFHMIN-ZFH-RV32-NEXT: fcvt.h.w fa3, a1, rtz
-; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa4, fa3, fa4
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa2, fa3, fa2
; ZVFHMIN-ZFH-RV32-NEXT: .LBB116_6:
-; ZVFHMIN-ZFH-RV32-NEXT: fsh fa4, 42(sp)
-; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 8(sp)
-; ZVFHMIN-ZFH-RV32-NEXT: fabs.h fa3, fa4
-; ZVFHMIN-ZFH-RV32-NEXT: flt.h a1, fa3, fa5
-; ZVFHMIN-ZFH-RV32-NEXT: beqz a1, .LBB116_8
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa3, 6(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fabs.h fa0, fa3
+; ZVFHMIN-ZFH-RV32-NEXT: flt.h a2, fa0, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a1, fa1
+; ZVFHMIN-ZFH-RV32-NEXT: beqz a2, .LBB116_8
; ZVFHMIN-ZFH-RV32-NEXT: # %bb.7:
-; ZVFHMIN-ZFH-RV32-NEXT: fcvt.w.h a1, fa4, rtz
-; ZVFHMIN-ZFH-RV32-NEXT: fcvt.h.w fa3, a1, rtz
-; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa4, fa3, fa4
+; ZVFHMIN-ZFH-RV32-NEXT: fcvt.w.h a2, fa3, rtz
+; ZVFHMIN-ZFH-RV32-NEXT: fcvt.h.w fa1, a2, rtz
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa3, fa1, fa3
; ZVFHMIN-ZFH-RV32-NEXT: .LBB116_8:
-; ZVFHMIN-ZFH-RV32-NEXT: fsh fa4, 40(sp)
-; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 6(sp)
-; ZVFHMIN-ZFH-RV32-NEXT: fabs.h fa3, fa4
-; ZVFHMIN-ZFH-RV32-NEXT: flt.h a1, fa3, fa5
-; ZVFHMIN-ZFH-RV32-NEXT: beqz a1, .LBB116_10
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa1, 10(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a2, fa4
+; ZVFHMIN-ZFH-RV32-NEXT: fabs.h fa4, fa1
+; ZVFHMIN-ZFH-RV32-NEXT: flt.h a3, fa4, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: vmv.v.x v8, a1
+; ZVFHMIN-ZFH-RV32-NEXT: beqz a3, .LBB116_10
; ZVFHMIN-ZFH-RV32-NEXT: # %bb.9:
-; ZVFHMIN-ZFH-RV32-NEXT: fcvt.w.h a1, fa4, rtz
-; ZVFHMIN-ZFH-RV32-NEXT: fcvt.h.w fa3, a1, rtz
-; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa4, fa3, fa4
+; ZVFHMIN-ZFH-RV32-NEXT: fcvt.w.h a3, fa1, rtz
+; ZVFHMIN-ZFH-RV32-NEXT: fcvt.h.w fa4, a3, rtz
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa1, fa4, fa1
; ZVFHMIN-ZFH-RV32-NEXT: .LBB116_10:
-; ZVFHMIN-ZFH-RV32-NEXT: fsh fa4, 38(sp)
-; ZVFHMIN-ZFH-RV32-NEXT: flh fa3, 4(sp)
-; ZVFHMIN-ZFH-RV32-NEXT: fabs.h fa2, fa3
-; ZVFHMIN-ZFH-RV32-NEXT: flt.h a1, fa2, fa5
-; ZVFHMIN-ZFH-RV32-NEXT: beqz a1, .LBB116_12
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 8(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a2
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a3, fa2
+; ZVFHMIN-ZFH-RV32-NEXT: fabs.h fa2, fa4
+; ZVFHMIN-ZFH-RV32-NEXT: flt.h a4, fa2, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a5, fa1
+; ZVFHMIN-ZFH-RV32-NEXT: beqz a4, .LBB116_12
; ZVFHMIN-ZFH-RV32-NEXT: # %bb.11:
-; ZVFHMIN-ZFH-RV32-NEXT: fcvt.w.h a1, fa3, rtz
-; ZVFHMIN-ZFH-RV32-NEXT: fcvt.h.w fa2, a1, rtz
-; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa3, fa2, fa3
+; ZVFHMIN-ZFH-RV32-NEXT: fcvt.w.h a4, fa4, rtz
+; ZVFHMIN-ZFH-RV32-NEXT: fcvt.h.w fa2, a4, rtz
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa4, fa2, fa4
; ZVFHMIN-ZFH-RV32-NEXT: .LBB116_12:
-; ZVFHMIN-ZFH-RV32-NEXT: fsh fa3, 36(sp)
-; ZVFHMIN-ZFH-RV32-NEXT: flh fa2, 2(sp)
-; ZVFHMIN-ZFH-RV32-NEXT: fabs.h fa1, fa2
-; ZVFHMIN-ZFH-RV32-NEXT: flt.h a1, fa1, fa5
-; ZVFHMIN-ZFH-RV32-NEXT: beqz a1, .LBB116_14
+; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a3
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa2, 12(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a4, fa3
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a6, fa4
+; ZVFHMIN-ZFH-RV32-NEXT: vmv.v.x v9, a6
+; ZVFHMIN-ZFH-RV32-NEXT: fabs.h fa4, fa2
+; ZVFHMIN-ZFH-RV32-NEXT: flt.h a6, fa4, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v9, v9, a5
+; ZVFHMIN-ZFH-RV32-NEXT: beqz a6, .LBB116_14
; ZVFHMIN-ZFH-RV32-NEXT: # %bb.13:
-; ZVFHMIN-ZFH-RV32-NEXT: fcvt.w.h a1, fa2, rtz
-; ZVFHMIN-ZFH-RV32-NEXT: fcvt.h.w fa1, a1, rtz
-; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa2, fa1, fa2
+; ZVFHMIN-ZFH-RV32-NEXT: fcvt.w.h a5, fa2, rtz
+; ZVFHMIN-ZFH-RV32-NEXT: fcvt.h.w fa4, a5, rtz
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa2, fa4, fa2
; ZVFHMIN-ZFH-RV32-NEXT: .LBB116_14:
-; ZVFHMIN-ZFH-RV32-NEXT: fsh fa2, 34(sp)
-; ZVFHMIN-ZFH-RV32-NEXT: flh fa1, 0(sp)
-; ZVFHMIN-ZFH-RV32-NEXT: fabs.h fa0, fa1
-; ZVFHMIN-ZFH-RV32-NEXT: flt.h a1, fa0, fa5
-; ZVFHMIN-ZFH-RV32-NEXT: beqz a1, .LBB116_16
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 14(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a4
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a5, fa2
+; ZVFHMIN-ZFH-RV32-NEXT: fabs.h fa3, fa4
+; ZVFHMIN-ZFH-RV32-NEXT: flt.h a6, fa3, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v9, v9, a5
+; ZVFHMIN-ZFH-RV32-NEXT: beqz a6, .LBB116_16
; ZVFHMIN-ZFH-RV32-NEXT: # %bb.15:
-; ZVFHMIN-ZFH-RV32-NEXT: fcvt.w.h a1, fa1, rtz
-; ZVFHMIN-ZFH-RV32-NEXT: fcvt.h.w fa5, a1, rtz
-; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa1, fa5, fa1
+; ZVFHMIN-ZFH-RV32-NEXT: fcvt.w.h a5, fa4, rtz
+; ZVFHMIN-ZFH-RV32-NEXT: fcvt.h.w fa5, a5, rtz
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa4, fa5, fa4
; ZVFHMIN-ZFH-RV32-NEXT: .LBB116_16:
-; ZVFHMIN-ZFH-RV32-NEXT: fsh fa1, 32(sp)
-; ZVFHMIN-ZFH-RV32-NEXT: addi a1, sp, 32
-; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v8, (a1)
-; ZVFHMIN-ZFH-RV32-NEXT: fsh fa4, 30(sp)
-; ZVFHMIN-ZFH-RV32-NEXT: fsh fa3, 28(sp)
-; ZVFHMIN-ZFH-RV32-NEXT: fsh fa2, 26(sp)
-; ZVFHMIN-ZFH-RV32-NEXT: fsh fa1, 24(sp)
-; ZVFHMIN-ZFH-RV32-NEXT: addi a1, sp, 24
-; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
-; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v9, (a1)
-; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v9, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a5, fa4
+; ZVFHMIN-ZFH-RV32-NEXT: vmv.v.i v0, 15
+; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v9, v9, a5
+; ZVFHMIN-ZFH-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
+; ZVFHMIN-ZFH-RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t
+; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT: vmv.v.x v8, a1
+; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a2
+; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a3
+; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a4
+; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT: vslidedown.vi v8, v9, 2
; ZVFHMIN-ZFH-RV32-NEXT: addi a0, a0, 8
-; ZVFHMIN-ZFH-RV32-NEXT: vslidedown.vi v8, v8, 2
; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; ZVFHMIN-ZFH-RV32-NEXT: vse32.v v8, (a0)
-; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, 48
+; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, 16
; ZVFHMIN-ZFH-RV32-NEXT: ret
;
; ZVFHMIN-ZFH-RV64-LABEL: trunc_v6f16:
; ZVFHMIN-ZFH-RV64: # %bb.0:
-; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, -32
-; ZVFHMIN-ZFH-RV64-NEXT: .cfi_def_cfa_offset 32
+; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, -16
+; ZVFHMIN-ZFH-RV64-NEXT: .cfi_def_cfa_offset 16
; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v8, (a0)
; ZVFHMIN-ZFH-RV64-NEXT: mv a1, sp
; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v8, (a1)
-; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 14(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 2(sp)
; ZVFHMIN-ZFH-RV64-NEXT: lui a1, %hi(.LCPI116_0)
; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, %lo(.LCPI116_0)(a1)
; ZVFHMIN-ZFH-RV64-NEXT: fabs.h fa3, fa4
@@ -4519,96 +4528,105 @@ define void @trunc_v6f16(ptr %x) {
; ZVFHMIN-ZFH-RV64-NEXT: fcvt.h.w fa3, a1, rtz
; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa4, fa3, fa4
; ZVFHMIN-ZFH-RV64-NEXT: .LBB116_2:
-; ZVFHMIN-ZFH-RV64-NEXT: fsh fa4, 30(sp)
-; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 12(sp)
-; ZVFHMIN-ZFH-RV64-NEXT: fabs.h fa3, fa4
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa1, 0(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fabs.h fa3, fa1
; ZVFHMIN-ZFH-RV64-NEXT: flt.h a1, fa3, fa5
; ZVFHMIN-ZFH-RV64-NEXT: beqz a1, .LBB116_4
; ZVFHMIN-ZFH-RV64-NEXT: # %bb.3:
-; ZVFHMIN-ZFH-RV64-NEXT: fcvt.w.h a1, fa4, rtz
+; ZVFHMIN-ZFH-RV64-NEXT: fcvt.w.h a1, fa1, rtz
; ZVFHMIN-ZFH-RV64-NEXT: fcvt.h.w fa3, a1, rtz
-; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa4, fa3, fa4
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa1, fa3, fa1
; ZVFHMIN-ZFH-RV64-NEXT: .LBB116_4:
-; ZVFHMIN-ZFH-RV64-NEXT: fsh fa4, 28(sp)
-; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 10(sp)
-; ZVFHMIN-ZFH-RV64-NEXT: fabs.h fa3, fa4
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa2, 4(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fabs.h fa3, fa2
; ZVFHMIN-ZFH-RV64-NEXT: flt.h a1, fa3, fa5
; ZVFHMIN-ZFH-RV64-NEXT: beqz a1, .LBB116_6
; ZVFHMIN-ZFH-RV64-NEXT: # %bb.5:
-; ZVFHMIN-ZFH-RV64-NEXT: fcvt.w.h a1, fa4, rtz
+; ZVFHMIN-ZFH-RV64-NEXT: fcvt.w.h a1, fa2, rtz
; ZVFHMIN-ZFH-RV64-NEXT: fcvt.h.w fa3, a1, rtz
-; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa4, fa3, fa4
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa2, fa3, fa2
; ZVFHMIN-ZFH-RV64-NEXT: .LBB116_6:
-; ZVFHMIN-ZFH-RV64-NEXT: fsh fa4, 26(sp)
-; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 8(sp)
-; ZVFHMIN-ZFH-RV64-NEXT: fabs.h fa3, fa4
-; ZVFHMIN-ZFH-RV64-NEXT: flt.h a1, fa3, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa3, 6(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fabs.h fa0, fa3
+; ZVFHMIN-ZFH-RV64-NEXT: flt.h a1, fa0, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a2, fa1
; ZVFHMIN-ZFH-RV64-NEXT: beqz a1, .LBB116_8
; ZVFHMIN-ZFH-RV64-NEXT: # %bb.7:
-; ZVFHMIN-ZFH-RV64-NEXT: fcvt.w.h a1, fa4, rtz
-; ZVFHMIN-ZFH-RV64-NEXT: fcvt.h.w fa3, a1, rtz
-; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa4, fa3, fa4
+; ZVFHMIN-ZFH-RV64-NEXT: fcvt.w.h a1, fa3, rtz
+; ZVFHMIN-ZFH-RV64-NEXT: fcvt.h.w fa1, a1, rtz
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa3, fa1, fa3
; ZVFHMIN-ZFH-RV64-NEXT: .LBB116_8:
-; ZVFHMIN-ZFH-RV64-NEXT: fsh fa4, 24(sp)
-; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 6(sp)
-; ZVFHMIN-ZFH-RV64-NEXT: fabs.h fa3, fa4
-; ZVFHMIN-ZFH-RV64-NEXT: flt.h a1, fa3, fa5
-; ZVFHMIN-ZFH-RV64-NEXT: beqz a1, .LBB116_10
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa1, 10(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a1, fa4
+; ZVFHMIN-ZFH-RV64-NEXT: fabs.h fa4, fa1
+; ZVFHMIN-ZFH-RV64-NEXT: flt.h a3, fa4, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: vmv.v.x v8, a2
+; ZVFHMIN-ZFH-RV64-NEXT: beqz a3, .LBB116_10
; ZVFHMIN-ZFH-RV64-NEXT: # %bb.9:
-; ZVFHMIN-ZFH-RV64-NEXT: fcvt.w.h a1, fa4, rtz
-; ZVFHMIN-ZFH-RV64-NEXT: fcvt.h.w fa3, a1, rtz
-; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa4, fa3, fa4
+; ZVFHMIN-ZFH-RV64-NEXT: fcvt.w.h a2, fa1, rtz
+; ZVFHMIN-ZFH-RV64-NEXT: fcvt.h.w fa4, a2, rtz
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa1, fa4, fa1
; ZVFHMIN-ZFH-RV64-NEXT: .LBB116_10:
-; ZVFHMIN-ZFH-RV64-NEXT: fsh fa4, 22(sp)
-; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 4(sp)
-; ZVFHMIN-ZFH-RV64-NEXT: fabs.h fa3, fa4
-; ZVFHMIN-ZFH-RV64-NEXT: flt.h a1, fa3, fa5
-; ZVFHMIN-ZFH-RV64-NEXT: beqz a1, .LBB116_12
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 8(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v8, v8, a1
+; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a2, fa2
+; ZVFHMIN-ZFH-RV64-NEXT: fabs.h fa2, fa4
+; ZVFHMIN-ZFH-RV64-NEXT: flt.h a3, fa2, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a1, fa1
+; ZVFHMIN-ZFH-RV64-NEXT: beqz a3, .LBB116_12
; ZVFHMIN-ZFH-RV64-NEXT: # %bb.11:
-; ZVFHMIN-ZFH-RV64-NEXT: fcvt.w.h a1, fa4, rtz
-; ZVFHMIN-ZFH-RV64-NEXT: fcvt.h.w fa3, a1, rtz
-; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa4, fa3, fa4
+; ZVFHMIN-ZFH-RV64-NEXT: fcvt.w.h a3, fa4, rtz
+; ZVFHMIN-ZFH-RV64-NEXT: fcvt.h.w fa2, a3, rtz
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa4, fa2, fa4
; ZVFHMIN-ZFH-RV64-NEXT: .LBB116_12:
-; ZVFHMIN-ZFH-RV64-NEXT: fsh fa4, 20(sp)
-; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 2(sp)
-; ZVFHMIN-ZFH-RV64-NEXT: fabs.h fa3, fa4
-; ZVFHMIN-ZFH-RV64-NEXT: flt.h a1, fa3, fa5
-; ZVFHMIN-ZFH-RV64-NEXT: beqz a1, .LBB116_14
+; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v8, v8, a2
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa2, 12(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a2, fa3
+; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a3, fa4
+; ZVFHMIN-ZFH-RV64-NEXT: vmv.v.x v9, a3
+; ZVFHMIN-ZFH-RV64-NEXT: fabs.h fa4, fa2
+; ZVFHMIN-ZFH-RV64-NEXT: flt.h a3, fa4, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v9, v9, a1
+; ZVFHMIN-ZFH-RV64-NEXT: beqz a3, .LBB116_14
; ZVFHMIN-ZFH-RV64-NEXT: # %bb.13:
-; ZVFHMIN-ZFH-RV64-NEXT: fcvt.w.h a1, fa4, rtz
-; ZVFHMIN-ZFH-RV64-NEXT: fcvt.h.w fa3, a1, rtz
-; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa4, fa3, fa4
+; ZVFHMIN-ZFH-RV64-NEXT: fcvt.w.h a1, fa2, rtz
+; ZVFHMIN-ZFH-RV64-NEXT: fcvt.h.w fa4, a1, rtz
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa2, fa4, fa2
; ZVFHMIN-ZFH-RV64-NEXT: .LBB116_14:
-; ZVFHMIN-ZFH-RV64-NEXT: fsh fa4, 18(sp)
-; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 0(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 14(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v8, v8, a2
+; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a1, fa2
; ZVFHMIN-ZFH-RV64-NEXT: fabs.h fa3, fa4
-; ZVFHMIN-ZFH-RV64-NEXT: flt.h a1, fa3, fa5
-; ZVFHMIN-ZFH-RV64-NEXT: beqz a1, .LBB116_16
+; ZVFHMIN-ZFH-RV64-NEXT: flt.h a2, fa3, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v9, v9, a1
+; ZVFHMIN-ZFH-RV64-NEXT: beqz a2, .LBB116_16
; ZVFHMIN-ZFH-RV64-NEXT: # %bb.15:
; ZVFHMIN-ZFH-RV64-NEXT: fcvt.w.h a1, fa4, rtz
; ZVFHMIN-ZFH-RV64-NEXT: fcvt.h.w fa5, a1, rtz
; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa4, fa5, fa4
; ZVFHMIN-ZFH-RV64-NEXT: .LBB116_16:
-; ZVFHMIN-ZFH-RV64-NEXT: fsh fa4, 16(sp)
-; ZVFHMIN-ZFH-RV64-NEXT: addi a1, sp, 16
-; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a1, fa4
+; ZVFHMIN-ZFH-RV64-NEXT: vmv.v.i v0, 15
+; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v9, v9, a1
+; ZVFHMIN-ZFH-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
+; ZVFHMIN-ZFH-RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t
; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-ZFH-RV64-NEXT: vse64.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT: vse64.v v9, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT: vslidedown.vi v8, v9, 2
; ZVFHMIN-ZFH-RV64-NEXT: addi a0, a0, 8
-; ZVFHMIN-ZFH-RV64-NEXT: vslidedown.vi v8, v8, 2
; ZVFHMIN-ZFH-RV64-NEXT: vse32.v v8, (a0)
-; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, 32
+; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, 16
; ZVFHMIN-ZFH-RV64-NEXT: ret
;
; ZVFHMIN-ZFHIN-RV32-LABEL: trunc_v6f16:
; ZVFHMIN-ZFHIN-RV32: # %bb.0:
-; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, -48
-; ZVFHMIN-ZFHIN-RV32-NEXT: .cfi_def_cfa_offset 48
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, -16
+; ZVFHMIN-ZFHIN-RV32-NEXT: .cfi_def_cfa_offset 16
; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v8, (a0)
; ZVFHMIN-ZFHIN-RV32-NEXT: mv a1, sp
; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a1)
-; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 2(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.h fa4, fa5
; ZVFHMIN-ZFHIN-RV32-NEXT: lui a1, 307200
; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.w.x fa5, a1
@@ -4620,118 +4638,126 @@ define void @trunc_v6f16(ptr %x) {
; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.w fa3, a1, rtz
; ZVFHMIN-ZFHIN-RV32-NEXT: fsgnj.s fa4, fa3, fa4
; ZVFHMIN-ZFHIN-RV32-NEXT: .LBB116_2:
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.h.s fa4, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa4, 46(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa4, 12(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.h fa4, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT: fabs.s fa3, fa4
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa3, 0(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.h fa2, fa3
+; ZVFHMIN-ZFHIN-RV32-NEXT: fabs.s fa3, fa2
; ZVFHMIN-ZFHIN-RV32-NEXT: flt.s a1, fa3, fa5
; ZVFHMIN-ZFHIN-RV32-NEXT: beqz a1, .LBB116_4
; ZVFHMIN-ZFHIN-RV32-NEXT: # %bb.3:
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.w.s a1, fa4, rtz
+; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.w.s a1, fa2, rtz
; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.w fa3, a1, rtz
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsgnj.s fa4, fa3, fa4
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsgnj.s fa2, fa3, fa2
; ZVFHMIN-ZFHIN-RV32-NEXT: .LBB116_4:
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.h.s fa4, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa4, 44(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa4, 10(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.h fa4, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT: fabs.s fa3, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT: flt.s a1, fa3, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa3, 4(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.h fa3, fa3
+; ZVFHMIN-ZFHIN-RV32-NEXT: fabs.s fa1, fa3
+; ZVFHMIN-ZFHIN-RV32-NEXT: flt.s a1, fa1, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.h.s fa1, fa2
; ZVFHMIN-ZFHIN-RV32-NEXT: beqz a1, .LBB116_6
; ZVFHMIN-ZFHIN-RV32-NEXT: # %bb.5:
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.w.s a1, fa4, rtz
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.w fa3, a1, rtz
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsgnj.s fa4, fa3, fa4
+; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.w.s a1, fa3, rtz
+; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.w fa2, a1, rtz
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsgnj.s fa3, fa2, fa3
; ZVFHMIN-ZFHIN-RV32-NEXT: .LBB116_6:
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.h.s fa4, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa4, 42(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa4, 8(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.h fa4, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT: fabs.s fa3, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT: flt.s a1, fa3, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT: beqz a1, .LBB116_8
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa0, 6(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.h.s fa2, fa4
+; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.h fa4, fa0
+; ZVFHMIN-ZFHIN-RV32-NEXT: fabs.s fa0, fa4
+; ZVFHMIN-ZFHIN-RV32-NEXT: flt.s a2, fa0, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa1
+; ZVFHMIN-ZFHIN-RV32-NEXT: beqz a2, .LBB116_8
; ZVFHMIN-ZFHIN-RV32-NEXT: # %bb.7:
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.w.s a1, fa4, rtz
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.w fa3, a1, rtz
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsgnj.s fa4, fa3, fa4
+; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.w.s a2, fa4, rtz
+; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.w fa1, a2, rtz
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsgnj.s fa4, fa1, fa4
; ZVFHMIN-ZFHIN-RV32-NEXT: .LBB116_8:
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.h.s fa4, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa4, 40(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa4, 6(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.h fa4, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT: fabs.s fa3, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT: flt.s a1, fa3, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT: beqz a1, .LBB116_10
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa1, 10(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa2
+; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v8, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.h fa2, fa1
+; ZVFHMIN-ZFHIN-RV32-NEXT: fabs.s fa1, fa2
+; ZVFHMIN-ZFHIN-RV32-NEXT: flt.s a3, fa1, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.h.s fa3, fa3
+; ZVFHMIN-ZFHIN-RV32-NEXT: beqz a3, .LBB116_10
; ZVFHMIN-ZFHIN-RV32-NEXT: # %bb.9:
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.w.s a1, fa4, rtz
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.w fa3, a1, rtz
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsgnj.s fa4, fa3, fa4
+; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.w.s a3, fa2, rtz
+; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.w fa1, a3, rtz
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsgnj.s fa2, fa1, fa2
; ZVFHMIN-ZFHIN-RV32-NEXT: .LBB116_10:
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.h.s fa4, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa4, 38(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa3, 4(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.h fa3, fa3
-; ZVFHMIN-ZFHIN-RV32-NEXT: fabs.s fa2, fa3
-; ZVFHMIN-ZFHIN-RV32-NEXT: flt.s a1, fa2, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT: beqz a1, .LBB116_12
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa1, 8(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa3
+; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.h.s fa3, fa4
+; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.h.s fa2, fa2
+; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.h fa4, fa1
+; ZVFHMIN-ZFHIN-RV32-NEXT: fabs.s fa1, fa4
+; ZVFHMIN-ZFHIN-RV32-NEXT: flt.s a4, fa1, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa2
+; ZVFHMIN-ZFHIN-RV32-NEXT: beqz a4, .LBB116_12
; ZVFHMIN-ZFHIN-RV32-NEXT: # %bb.11:
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.w.s a1, fa3, rtz
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.w fa2, a1, rtz
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsgnj.s fa3, fa2, fa3
+; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.w.s a4, fa4, rtz
+; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.w fa2, a4, rtz
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsgnj.s fa4, fa2, fa4
; ZVFHMIN-ZFHIN-RV32-NEXT: .LBB116_12:
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.h.s fa3, fa3
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa3, 36(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa2, 2(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.h fa2, fa2
-; ZVFHMIN-ZFHIN-RV32-NEXT: fabs.s fa1, fa2
-; ZVFHMIN-ZFHIN-RV32-NEXT: flt.s a1, fa1, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT: beqz a1, .LBB116_14
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa3
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa3, 12(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.h.s fa4, fa4
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a6, fa4
+; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v9, a6
+; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.h fa4, fa3
+; ZVFHMIN-ZFHIN-RV32-NEXT: fabs.s fa3, fa4
+; ZVFHMIN-ZFHIN-RV32-NEXT: flt.s a6, fa3, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT: beqz a6, .LBB116_14
; ZVFHMIN-ZFHIN-RV32-NEXT: # %bb.13:
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.w.s a1, fa2, rtz
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.w fa1, a1, rtz
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsgnj.s fa2, fa1, fa2
+; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.w.s a5, fa4, rtz
+; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.w fa3, a5, rtz
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsgnj.s fa4, fa3, fa4
; ZVFHMIN-ZFHIN-RV32-NEXT: .LBB116_14:
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.h.s fa2, fa2
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa2, 34(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa1, 0(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.h fa1, fa1
-; ZVFHMIN-ZFHIN-RV32-NEXT: fabs.s fa0, fa1
-; ZVFHMIN-ZFHIN-RV32-NEXT: flt.s a1, fa0, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT: beqz a1, .LBB116_16
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa3, 14(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.h.s fa4, fa4
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa4
+; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.h fa4, fa3
+; ZVFHMIN-ZFHIN-RV32-NEXT: fabs.s fa3, fa4
+; ZVFHMIN-ZFHIN-RV32-NEXT: flt.s a6, fa3, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT: beqz a6, .LBB116_16
; ZVFHMIN-ZFHIN-RV32-NEXT: # %bb.15:
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.w.s a1, fa1, rtz
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.w fa5, a1, rtz
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsgnj.s fa1, fa5, fa1
+; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.w.s a5, fa4, rtz
+; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.w fa5, a5, rtz
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsgnj.s fa4, fa5, fa4
; ZVFHMIN-ZFHIN-RV32-NEXT: .LBB116_16:
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.h.s fa5, fa1
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 32(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: addi a1, sp, 32
-; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v8, (a1)
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa4, 30(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa3, 28(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa2, 26(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 24(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: addi a1, sp, 24
-; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
-; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v9, (a1)
-; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v9, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.h.s fa5, fa4
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.i v0, 15
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v8, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslidedown.vi v8, v9, 2
; ZVFHMIN-ZFHIN-RV32-NEXT: addi a0, a0, 8
-; ZVFHMIN-ZFHIN-RV32-NEXT: vslidedown.vi v8, v8, 2
; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; ZVFHMIN-ZFHIN-RV32-NEXT: vse32.v v8, (a0)
-; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, 48
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, 16
; ZVFHMIN-ZFHIN-RV32-NEXT: ret
;
; ZVFHMIN-ZFHIN-RV64-LABEL: trunc_v6f16:
; ZVFHMIN-ZFHIN-RV64: # %bb.0:
-; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, -32
-; ZVFHMIN-ZFHIN-RV64-NEXT: .cfi_def_cfa_offset 32
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, -16
+; ZVFHMIN-ZFHIN-RV64-NEXT: .cfi_def_cfa_offset 16
; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v8, (a0)
; ZVFHMIN-ZFHIN-RV64-NEXT: mv a1, sp
; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a1)
-; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 2(sp)
; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.h fa4, fa5
; ZVFHMIN-ZFHIN-RV64-NEXT: lui a1, 307200
; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.w.x fa5, a1
@@ -4743,100 +4769,109 @@ define void @trunc_v6f16(ptr %x) {
; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.w fa3, a1, rtz
; ZVFHMIN-ZFHIN-RV64-NEXT: fsgnj.s fa4, fa3, fa4
; ZVFHMIN-ZFHIN-RV64-NEXT: .LBB116_2:
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.h.s fa4, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa4, 30(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa4, 12(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.h fa4, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT: fabs.s fa3, fa4
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa3, 0(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.h fa2, fa3
+; ZVFHMIN-ZFHIN-RV64-NEXT: fabs.s fa3, fa2
; ZVFHMIN-ZFHIN-RV64-NEXT: flt.s a1, fa3, fa5
; ZVFHMIN-ZFHIN-RV64-NEXT: beqz a1, .LBB116_4
; ZVFHMIN-ZFHIN-RV64-NEXT: # %bb.3:
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.w.s a1, fa4, rtz
+; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.w.s a1, fa2, rtz
; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.w fa3, a1, rtz
-; ZVFHMIN-ZFHIN-RV64-NEXT: fsgnj.s fa4, fa3, fa4
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsgnj.s fa2, fa3, fa2
; ZVFHMIN-ZFHIN-RV64-NEXT: .LBB116_4:
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.h.s fa4, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa4, 28(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa4, 10(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.h fa4, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT: fabs.s fa3, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT: flt.s a1, fa3, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa3, 4(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.h fa3, fa3
+; ZVFHMIN-ZFHIN-RV64-NEXT: fabs.s fa1, fa3
+; ZVFHMIN-ZFHIN-RV64-NEXT: flt.s a1, fa1, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.h.s fa1, fa2
; ZVFHMIN-ZFHIN-RV64-NEXT: beqz a1, .LBB116_6
; ZVFHMIN-ZFHIN-RV64-NEXT: # %bb.5:
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.w.s a1, fa4, rtz
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.w fa3, a1, rtz
-; ZVFHMIN-ZFHIN-RV64-NEXT: fsgnj.s fa4, fa3, fa4
+; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.w.s a1, fa3, rtz
+; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.w fa2, a1, rtz
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsgnj.s fa3, fa2, fa3
; ZVFHMIN-ZFHIN-RV64-NEXT: .LBB116_6:
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.h.s fa4, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa4, 26(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa4, 8(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.h fa4, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT: fabs.s fa3, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT: flt.s a1, fa3, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa0, 6(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.h.s fa2, fa4
+; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.h fa4, fa0
+; ZVFHMIN-ZFHIN-RV64-NEXT: fabs.s fa0, fa4
+; ZVFHMIN-ZFHIN-RV64-NEXT: flt.s a1, fa0, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a2, fa1
; ZVFHMIN-ZFHIN-RV64-NEXT: beqz a1, .LBB116_8
; ZVFHMIN-ZFHIN-RV64-NEXT: # %bb.7:
; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.w.s a1, fa4, rtz
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.w fa3, a1, rtz
-; ZVFHMIN-ZFHIN-RV64-NEXT: fsgnj.s fa4, fa3, fa4
+; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.w fa1, a1, rtz
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsgnj.s fa4, fa1, fa4
; ZVFHMIN-ZFHIN-RV64-NEXT: .LBB116_8:
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.h.s fa4, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa4, 24(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa4, 6(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.h fa4, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT: fabs.s fa3, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT: flt.s a1, fa3, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT: beqz a1, .LBB116_10
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa1, 10(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa2
+; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v8, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.h fa2, fa1
+; ZVFHMIN-ZFHIN-RV64-NEXT: fabs.s fa1, fa2
+; ZVFHMIN-ZFHIN-RV64-NEXT: flt.s a2, fa1, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.h.s fa3, fa3
+; ZVFHMIN-ZFHIN-RV64-NEXT: beqz a2, .LBB116_10
; ZVFHMIN-ZFHIN-RV64-NEXT: # %bb.9:
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.w.s a1, fa4, rtz
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.w fa3, a1, rtz
-; ZVFHMIN-ZFHIN-RV64-NEXT: fsgnj.s fa4, fa3, fa4
+; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.w.s a2, fa2, rtz
+; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.w fa1, a2, rtz
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsgnj.s fa2, fa1, fa2
; ZVFHMIN-ZFHIN-RV64-NEXT: .LBB116_10:
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.h.s fa4, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa4, 22(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa4, 4(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.h fa4, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT: fabs.s fa3, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT: flt.s a1, fa3, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT: beqz a1, .LBB116_12
+; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa1, 8(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa3
+; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.h.s fa3, fa4
+; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.h.s fa2, fa2
+; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.h fa4, fa1
+; ZVFHMIN-ZFHIN-RV64-NEXT: fabs.s fa1, fa4
+; ZVFHMIN-ZFHIN-RV64-NEXT: flt.s a3, fa1, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a2, fa2
+; ZVFHMIN-ZFHIN-RV64-NEXT: beqz a3, .LBB116_12
; ZVFHMIN-ZFHIN-RV64-NEXT: # %bb.11:
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.w.s a1, fa4, rtz
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.w fa3, a1, rtz
-; ZVFHMIN-ZFHIN-RV64-NEXT: fsgnj.s fa4, fa3, fa4
+; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.w.s a3, fa4, rtz
+; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.w fa2, a3, rtz
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsgnj.s fa4, fa2, fa4
; ZVFHMIN-ZFHIN-RV64-NEXT: .LBB116_12:
+; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa3
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa3, 12(sp)
; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.h.s fa4, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa4, 20(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa4, 2(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.h fa4, fa4
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa4
+; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v9, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.h fa4, fa3
; ZVFHMIN-ZFHIN-RV64-NEXT: fabs.s fa3, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT: flt.s a1, fa3, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT: beqz a1, .LBB116_14
+; ZVFHMIN-ZFHIN-RV64-NEXT: flt.s a3, fa3, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT: beqz a3, .LBB116_14
; ZVFHMIN-ZFHIN-RV64-NEXT: # %bb.13:
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.w.s a1, fa4, rtz
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.w fa3, a1, rtz
+; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.w.s a2, fa4, rtz
+; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.w fa3, a2, rtz
; ZVFHMIN-ZFHIN-RV64-NEXT: fsgnj.s fa4, fa3, fa4
; ZVFHMIN-ZFHIN-RV64-NEXT: .LBB116_14:
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa3, 14(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a1
; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.h.s fa4, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa4, 18(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa4, 0(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.h fa4, fa4
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa4
+; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.h fa4, fa3
; ZVFHMIN-ZFHIN-RV64-NEXT: fabs.s fa3, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT: flt.s a1, fa3, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT: beqz a1, .LBB116_16
+; ZVFHMIN-ZFHIN-RV64-NEXT: flt.s a2, fa3, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: beqz a2, .LBB116_16
; ZVFHMIN-ZFHIN-RV64-NEXT: # %bb.15:
; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.w.s a1, fa4, rtz
; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.w fa5, a1, rtz
; ZVFHMIN-ZFHIN-RV64-NEXT: fsgnj.s fa4, fa5, fa4
; ZVFHMIN-ZFHIN-RV64-NEXT: .LBB116_16:
; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.h.s fa5, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 16(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: addi a1, sp, 16
-; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.i v0, 15
+; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
+; ZVFHMIN-ZFHIN-RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t
; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-ZFHIN-RV64-NEXT: vse64.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT: vse64.v v9, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT: vslidedown.vi v8, v9, 2
; ZVFHMIN-ZFHIN-RV64-NEXT: addi a0, a0, 8
-; ZVFHMIN-ZFHIN-RV64-NEXT: vslidedown.vi v8, v8, 2
; ZVFHMIN-ZFHIN-RV64-NEXT: vse32.v v8, (a0)
-; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, 32
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, 16
; ZVFHMIN-ZFHIN-RV64-NEXT: ret
%a = load <6 x half>, ptr %x
%b = call <6 x half> @llvm.trunc.v6f16(<6 x half> %a)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll
index 33e9cde4c30abb..8e2a225622eec2 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll
@@ -64,12 +64,9 @@ define <8 x i1> @fcmp_oeq_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer
;
; ZVFHMIN-LABEL: fcmp_oeq_vf_v8f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v9, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
@@ -91,12 +88,9 @@ define <8 x i1> @fcmp_oeq_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3
;
; ZVFHMIN-LABEL: fcmp_oeq_vf_swap_v8f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v9, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
@@ -138,12 +132,9 @@ define <8 x i1> @fcmp_ogt_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer
;
; ZVFHMIN-LABEL: fcmp_ogt_vf_v8f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v9, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
@@ -165,12 +156,9 @@ define <8 x i1> @fcmp_ogt_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3
;
; ZVFHMIN-LABEL: fcmp_ogt_vf_swap_v8f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v9, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
@@ -212,12 +200,9 @@ define <8 x i1> @fcmp_oge_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer
;
; ZVFHMIN-LABEL: fcmp_oge_vf_v8f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v9, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
@@ -239,12 +224,9 @@ define <8 x i1> @fcmp_oge_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3
;
; ZVFHMIN-LABEL: fcmp_oge_vf_swap_v8f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v9, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
@@ -286,12 +268,9 @@ define <8 x i1> @fcmp_olt_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer
;
; ZVFHMIN-LABEL: fcmp_olt_vf_v8f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v9, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
@@ -313,12 +292,9 @@ define <8 x i1> @fcmp_olt_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3
;
; ZVFHMIN-LABEL: fcmp_olt_vf_swap_v8f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v9, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
@@ -360,12 +336,9 @@ define <8 x i1> @fcmp_ole_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer
;
; ZVFHMIN-LABEL: fcmp_ole_vf_v8f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v9, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
@@ -387,12 +360,9 @@ define <8 x i1> @fcmp_ole_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3
;
; ZVFHMIN-LABEL: fcmp_ole_vf_swap_v8f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v9, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
@@ -439,12 +409,9 @@ define <8 x i1> @fcmp_one_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer
;
; ZVFHMIN-LABEL: fcmp_one_vf_v8f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v9, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
@@ -469,12 +436,9 @@ define <8 x i1> @fcmp_one_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3
;
; ZVFHMIN-LABEL: fcmp_one_vf_swap_v8f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v9, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
@@ -526,12 +490,9 @@ define <8 x i1> @fcmp_ord_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer
;
; ZVFHMIN-LABEL: fcmp_ord_vf_v8f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v9, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v10, v0.t
@@ -560,12 +521,9 @@ define <8 x i1> @fcmp_ord_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3
;
; ZVFHMIN-LABEL: fcmp_ord_vf_swap_v8f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v9, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v10, v0.t
@@ -615,12 +573,9 @@ define <8 x i1> @fcmp_ueq_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer
;
; ZVFHMIN-LABEL: fcmp_ueq_vf_v8f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v9, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
@@ -645,12 +600,9 @@ define <8 x i1> @fcmp_ueq_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3
;
; ZVFHMIN-LABEL: fcmp_ueq_vf_swap_v8f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v9, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
@@ -695,12 +647,9 @@ define <8 x i1> @fcmp_ugt_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer
;
; ZVFHMIN-LABEL: fcmp_ugt_vf_v8f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v9, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
@@ -723,12 +672,9 @@ define <8 x i1> @fcmp_ugt_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3
;
; ZVFHMIN-LABEL: fcmp_ugt_vf_swap_v8f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v9, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
@@ -772,12 +718,9 @@ define <8 x i1> @fcmp_uge_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer
;
; ZVFHMIN-LABEL: fcmp_uge_vf_v8f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v9, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
@@ -800,12 +743,9 @@ define <8 x i1> @fcmp_uge_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3
;
; ZVFHMIN-LABEL: fcmp_uge_vf_swap_v8f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v9, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
@@ -849,12 +789,9 @@ define <8 x i1> @fcmp_ult_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer
;
; ZVFHMIN-LABEL: fcmp_ult_vf_v8f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v9, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
@@ -877,12 +814,9 @@ define <8 x i1> @fcmp_ult_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3
;
; ZVFHMIN-LABEL: fcmp_ult_vf_swap_v8f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v9, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
@@ -926,12 +860,9 @@ define <8 x i1> @fcmp_ule_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer
;
; ZVFHMIN-LABEL: fcmp_ule_vf_v8f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v9, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
@@ -954,12 +885,9 @@ define <8 x i1> @fcmp_ule_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3
;
; ZVFHMIN-LABEL: fcmp_ule_vf_swap_v8f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v9, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
@@ -1001,12 +929,9 @@ define <8 x i1> @fcmp_une_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer
;
; ZVFHMIN-LABEL: fcmp_une_vf_v8f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v9, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
@@ -1028,12 +953,9 @@ define <8 x i1> @fcmp_une_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3
;
; ZVFHMIN-LABEL: fcmp_une_vf_swap_v8f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v9, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
@@ -1084,12 +1006,9 @@ define <8 x i1> @fcmp_uno_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer
;
; ZVFHMIN-LABEL: fcmp_uno_vf_v8f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v9, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
; ZVFHMIN-NEXT: vmfne.vv v8, v10, v10, v0.t
@@ -1118,12 +1037,9 @@ define <8 x i1> @fcmp_uno_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3
;
; ZVFHMIN-LABEL: fcmp_uno_vf_swap_v8f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v9, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
; ZVFHMIN-NEXT: vmfne.vv v8, v10, v10, v0.t
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfadd-vp.ll
index f023c760f14a74..7a7236235d1203 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfadd-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfadd-vp.ll
@@ -61,16 +61,13 @@ define <2 x half> @vfadd_vf_v2f16(<2 x half> %va, half %b, <2 x i1> %m, i32 zero
;
; ZVFHMIN-LABEL: vfadd_vf_v2f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v9, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: vmv.v.x v9, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfadd.vv v9, v9, v8, v0.t
+; ZVFHMIN-NEXT: vfadd.vv v9, v10, v8, v0.t
; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
@@ -89,16 +86,13 @@ define <2 x half> @vfadd_vf_v2f16_unmasked(<2 x half> %va, half %b, i32 zeroext
;
; ZVFHMIN-LABEL: vfadd_vf_v2f16_unmasked:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v9, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: vmv.v.x v9, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfadd.vv v9, v9, v8
+; ZVFHMIN-NEXT: vfadd.vv v9, v10, v8
; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
@@ -184,16 +178,13 @@ define <4 x half> @vfadd_vf_v4f16(<4 x half> %va, half %b, <4 x i1> %m, i32 zero
;
; ZVFHMIN-LABEL: vfadd_vf_v4f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v9, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: vmv.v.x v9, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfadd.vv v9, v9, v8, v0.t
+; ZVFHMIN-NEXT: vfadd.vv v9, v10, v8, v0.t
; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
@@ -212,16 +203,13 @@ define <4 x half> @vfadd_vf_v4f16_unmasked(<4 x half> %va, half %b, i32 zeroext
;
; ZVFHMIN-LABEL: vfadd_vf_v4f16_unmasked:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v9, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: vmv.v.x v9, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfadd.vv v9, v9, v8
+; ZVFHMIN-NEXT: vfadd.vv v9, v10, v8
; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
@@ -284,12 +272,9 @@ define <8 x half> @vfadd_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zero
;
; ZVFHMIN-LABEL: vfadd_vf_v8f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v9, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
@@ -312,12 +297,9 @@ define <8 x half> @vfadd_vf_v8f16_unmasked(<8 x half> %va, half %b, i32 zeroext
;
; ZVFHMIN-LABEL: vfadd_vf_v8f16_unmasked:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v9, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
@@ -384,12 +366,9 @@ define <16 x half> @vfadd_vf_v16f16(<16 x half> %va, half %b, <16 x i1> %m, i32
;
; ZVFHMIN-LABEL: vfadd_vf_v16f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v12, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v10, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma
@@ -412,12 +391,9 @@ define <16 x half> @vfadd_vf_v16f16_unmasked(<16 x half> %va, half %b, i32 zeroe
;
; ZVFHMIN-LABEL: vfadd_vf_v16f16_unmasked:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v12, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v10, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfdiv-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfdiv-vp.ll
index 9fb8377d5a5ef9..cb83e5ff4f2b32 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfdiv-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfdiv-vp.ll
@@ -61,16 +61,13 @@ define <2 x half> @vfdiv_vf_v2f16(<2 x half> %va, half %b, <2 x i1> %m, i32 zero
;
; ZVFHMIN-LABEL: vfdiv_vf_v2f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v9, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: vmv.v.x v9, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfdiv.vv v9, v9, v8, v0.t
+; ZVFHMIN-NEXT: vfdiv.vv v9, v10, v8, v0.t
; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
@@ -89,16 +86,13 @@ define <2 x half> @vfdiv_vf_v2f16_unmasked(<2 x half> %va, half %b, i32 zeroext
;
; ZVFHMIN-LABEL: vfdiv_vf_v2f16_unmasked:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v9, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: vmv.v.x v9, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfdiv.vv v9, v9, v8
+; ZVFHMIN-NEXT: vfdiv.vv v9, v10, v8
; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
@@ -184,16 +178,13 @@ define <4 x half> @vfdiv_vf_v4f16(<4 x half> %va, half %b, <4 x i1> %m, i32 zero
;
; ZVFHMIN-LABEL: vfdiv_vf_v4f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v9, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: vmv.v.x v9, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfdiv.vv v9, v9, v8, v0.t
+; ZVFHMIN-NEXT: vfdiv.vv v9, v10, v8, v0.t
; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
@@ -212,16 +203,13 @@ define <4 x half> @vfdiv_vf_v4f16_unmasked(<4 x half> %va, half %b, i32 zeroext
;
; ZVFHMIN-LABEL: vfdiv_vf_v4f16_unmasked:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v9, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: vmv.v.x v9, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfdiv.vv v9, v9, v8
+; ZVFHMIN-NEXT: vfdiv.vv v9, v10, v8
; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
@@ -284,12 +272,9 @@ define <8 x half> @vfdiv_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zero
;
; ZVFHMIN-LABEL: vfdiv_vf_v8f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v9, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
@@ -312,12 +297,9 @@ define <8 x half> @vfdiv_vf_v8f16_unmasked(<8 x half> %va, half %b, i32 zeroext
;
; ZVFHMIN-LABEL: vfdiv_vf_v8f16_unmasked:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v9, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
@@ -384,12 +366,9 @@ define <16 x half> @vfdiv_vf_v16f16(<16 x half> %va, half %b, <16 x i1> %m, i32
;
; ZVFHMIN-LABEL: vfdiv_vf_v16f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v12, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v10, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma
@@ -412,12 +391,9 @@ define <16 x half> @vfdiv_vf_v16f16_unmasked(<16 x half> %va, half %b, i32 zeroe
;
; ZVFHMIN-LABEL: vfdiv_vf_v16f16_unmasked:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v12, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v10, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfma-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfma-vp.ll
index e2e48cee3eacc2..6dcebc9763d82b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfma-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfma-vp.ll
@@ -64,17 +64,14 @@ define <2 x half> @vfma_vf_v2f16(<2 x half> %va, half %b, <2 x half> %vc, <2 x i
;
; ZVFHMIN-LABEL: vfma_vf_v2f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v10
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFHMIN-NEXT: vmv.v.x v10, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9
; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v11
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v12, v9, v10, v0.t
+; ZVFHMIN-NEXT: vfmadd.vv v12, v9, v11, v0.t
; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
; ZVFHMIN-NEXT: ret
@@ -93,17 +90,14 @@ define <2 x half> @vfma_vf_v2f16_unmasked(<2 x half> %va, half %b, <2 x half> %v
;
; ZVFHMIN-LABEL: vfma_vf_v2f16_unmasked:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v10
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFHMIN-NEXT: vmv.v.x v10, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9
; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v11
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v12, v9, v10
+; ZVFHMIN-NEXT: vfmadd.vv v12, v9, v11
; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
; ZVFHMIN-NEXT: ret
@@ -169,17 +163,14 @@ define <4 x half> @vfma_vf_v4f16(<4 x half> %va, half %b, <4 x half> %vc, <4 x i
;
; ZVFHMIN-LABEL: vfma_vf_v4f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v10
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFHMIN-NEXT: vmv.v.x v10, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9
; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v11
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v12, v9, v10, v0.t
+; ZVFHMIN-NEXT: vfmadd.vv v12, v9, v11, v0.t
; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
; ZVFHMIN-NEXT: ret
@@ -198,17 +189,14 @@ define <4 x half> @vfma_vf_v4f16_unmasked(<4 x half> %va, half %b, <4 x half> %v
;
; ZVFHMIN-LABEL: vfma_vf_v4f16_unmasked:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v10
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFHMIN-NEXT: vmv.v.x v10, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9
; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v11
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v12, v9, v10
+; ZVFHMIN-NEXT: vfmadd.vv v12, v9, v11
; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
; ZVFHMIN-NEXT: ret
@@ -274,17 +262,14 @@ define <8 x half> @vfma_vf_v8f16(<8 x half> %va, half %b, <8 x half> %vc, <8 x i
;
; ZVFHMIN-LABEL: vfma_vf_v8f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v10
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFHMIN-NEXT: vmv.v.x v10, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v16, v14, v10, v0.t
+; ZVFHMIN-NEXT: vfmadd.vv v16, v14, v12, v0.t
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
; ZVFHMIN-NEXT: ret
@@ -303,17 +288,14 @@ define <8 x half> @vfma_vf_v8f16_unmasked(<8 x half> %va, half %b, <8 x half> %v
;
; ZVFHMIN-LABEL: vfma_vf_v8f16_unmasked:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v10
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFHMIN-NEXT: vmv.v.x v10, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v16, v14, v10
+; ZVFHMIN-NEXT: vfmadd.vv v16, v14, v12
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
; ZVFHMIN-NEXT: ret
@@ -379,17 +361,14 @@ define <16 x half> @vfma_vf_v16f16(<16 x half> %va, half %b, <16 x half> %vc, <1
;
; ZVFHMIN-LABEL: vfma_vf_v16f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v12, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v16, v12
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10
+; ZVFHMIN-NEXT: vmv.v.x v12, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v24, v20, v12, v0.t
+; ZVFHMIN-NEXT: vfmadd.vv v24, v20, v16, v0.t
; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24
; ZVFHMIN-NEXT: ret
@@ -408,17 +387,14 @@ define <16 x half> @vfma_vf_v16f16_unmasked(<16 x half> %va, half %b, <16 x half
;
; ZVFHMIN-LABEL: vfma_vf_v16f16_unmasked:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v12, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v16, v12
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10
+; ZVFHMIN-NEXT: vmv.v.x v12, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v24, v20, v12
+; ZVFHMIN-NEXT: vfmadd.vv v24, v20, v16
; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24
; ZVFHMIN-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax.ll
index a9d6b5f047ebb1..11420a23285d07 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax.ll
@@ -40,16 +40,13 @@ define <2 x half> @vfmax_v2f16_vf(<2 x half> %a, half %b) {
;
; ZVFHMIN-LABEL: vfmax_v2f16_vf:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v9, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
+; ZVFHMIN-NEXT: fmv.x.h a0, fa0
; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: vmv.v.x v9, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfmax.vv v9, v9, v8
+; ZVFHMIN-NEXT: vfmax.vv v9, v10, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
@@ -68,16 +65,13 @@ define <2 x half> @vfmax_v2f16_fv(<2 x half> %a, half %b) {
;
; ZVFHMIN-LABEL: vfmax_v2f16_fv:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v9, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
+; ZVFHMIN-NEXT: fmv.x.h a0, fa0
; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: vmv.v.x v9, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfmax.vv v9, v8, v9
+; ZVFHMIN-NEXT: vfmax.vv v9, v8, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
@@ -119,16 +113,13 @@ define <4 x half> @vfmax_v4f16_vf(<4 x half> %a, half %b) {
;
; ZVFHMIN-LABEL: vfmax_v4f16_vf:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v9, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
+; ZVFHMIN-NEXT: fmv.x.h a0, fa0
; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: vmv.v.x v9, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfmax.vv v9, v9, v8
+; ZVFHMIN-NEXT: vfmax.vv v9, v10, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
@@ -147,16 +138,13 @@ define <4 x half> @vfmax_v4f16_fv(<4 x half> %a, half %b) {
;
; ZVFHMIN-LABEL: vfmax_v4f16_fv:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v9, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
+; ZVFHMIN-NEXT: fmv.x.h a0, fa0
; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: vmv.v.x v9, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfmax.vv v9, v8, v9
+; ZVFHMIN-NEXT: vfmax.vv v9, v8, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
@@ -198,12 +186,9 @@ define <8 x half> @vfmax_v8f16_vf(<8 x half> %a, half %b) {
;
; ZVFHMIN-LABEL: vfmax_v8f16_vf:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10
+; ZVFHMIN-NEXT: fmv.x.h a0, fa0
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v9, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
@@ -226,12 +211,9 @@ define <8 x half> @vfmax_v8f16_fv(<8 x half> %a, half %b) {
;
; ZVFHMIN-LABEL: vfmax_v8f16_fv:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10
+; ZVFHMIN-NEXT: fmv.x.h a0, fa0
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v9, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
@@ -277,12 +259,9 @@ define <16 x half> @vfmax_v16f16_vf(<16 x half> %a, half %b) {
;
; ZVFHMIN-LABEL: vfmax_v16f16_vf:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v12, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12
+; ZVFHMIN-NEXT: fmv.x.h a0, fa0
; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v10, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
@@ -305,12 +284,9 @@ define <16 x half> @vfmax_v16f16_fv(<16 x half> %a, half %b) {
;
; ZVFHMIN-LABEL: vfmax_v16f16_fv:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v12, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12
+; ZVFHMIN-NEXT: fmv.x.h a0, fa0
; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v10, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin.ll
index d7c6fb3568f66d..e8ae32a45f7cd4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin.ll
@@ -40,16 +40,13 @@ define <2 x half> @vfmin_v2f16_vf(<2 x half> %a, half %b) {
;
; ZVFHMIN-LABEL: vfmin_v2f16_vf:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v9, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
+; ZVFHMIN-NEXT: fmv.x.h a0, fa0
; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: vmv.v.x v9, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfmin.vv v9, v9, v8
+; ZVFHMIN-NEXT: vfmin.vv v9, v10, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
@@ -68,16 +65,13 @@ define <2 x half> @vfmin_v2f16_fv(<2 x half> %a, half %b) {
;
; ZVFHMIN-LABEL: vfmin_v2f16_fv:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v9, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
+; ZVFHMIN-NEXT: fmv.x.h a0, fa0
; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: vmv.v.x v9, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfmin.vv v9, v8, v9
+; ZVFHMIN-NEXT: vfmin.vv v9, v8, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
@@ -119,16 +113,13 @@ define <4 x half> @vfmin_v4f16_vf(<4 x half> %a, half %b) {
;
; ZVFHMIN-LABEL: vfmin_v4f16_vf:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v9, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
+; ZVFHMIN-NEXT: fmv.x.h a0, fa0
; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: vmv.v.x v9, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfmin.vv v9, v9, v8
+; ZVFHMIN-NEXT: vfmin.vv v9, v10, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
@@ -147,16 +138,13 @@ define <4 x half> @vfmin_v4f16_fv(<4 x half> %a, half %b) {
;
; ZVFHMIN-LABEL: vfmin_v4f16_fv:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v9, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
+; ZVFHMIN-NEXT: fmv.x.h a0, fa0
; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: vmv.v.x v9, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfmin.vv v9, v8, v9
+; ZVFHMIN-NEXT: vfmin.vv v9, v8, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
@@ -198,12 +186,9 @@ define <8 x half> @vfmin_v8f16_vf(<8 x half> %a, half %b) {
;
; ZVFHMIN-LABEL: vfmin_v8f16_vf:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10
+; ZVFHMIN-NEXT: fmv.x.h a0, fa0
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v9, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
@@ -226,12 +211,9 @@ define <8 x half> @vfmin_v8f16_fv(<8 x half> %a, half %b) {
;
; ZVFHMIN-LABEL: vfmin_v8f16_fv:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10
+; ZVFHMIN-NEXT: fmv.x.h a0, fa0
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v9, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
@@ -277,12 +259,9 @@ define <16 x half> @vfmin_v16f16_vf(<16 x half> %a, half %b) {
;
; ZVFHMIN-LABEL: vfmin_v16f16_vf:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v12, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12
+; ZVFHMIN-NEXT: fmv.x.h a0, fa0
; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v10, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
@@ -305,12 +284,9 @@ define <16 x half> @vfmin_v16f16_fv(<16 x half> %a, half %b) {
;
; ZVFHMIN-LABEL: vfmin_v16f16_fv:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v12, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12
+; ZVFHMIN-NEXT: fmv.x.h a0, fa0
; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v10, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmul-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmul-vp.ll
index 64ce0a12de8cfb..86f140723d7f86 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmul-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmul-vp.ll
@@ -61,16 +61,13 @@ define <2 x half> @vfmul_vf_v2f16(<2 x half> %va, half %b, <2 x i1> %m, i32 zero
;
; ZVFHMIN-LABEL: vfmul_vf_v2f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v9, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: vmv.v.x v9, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfmul.vv v9, v9, v8, v0.t
+; ZVFHMIN-NEXT: vfmul.vv v9, v10, v8, v0.t
; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
@@ -89,16 +86,13 @@ define <2 x half> @vfmul_vf_v2f16_unmasked(<2 x half> %va, half %b, i32 zeroext
;
; ZVFHMIN-LABEL: vfmul_vf_v2f16_unmasked:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v9, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: vmv.v.x v9, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfmul.vv v9, v9, v8
+; ZVFHMIN-NEXT: vfmul.vv v9, v10, v8
; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
@@ -184,16 +178,13 @@ define <4 x half> @vfmul_vf_v4f16(<4 x half> %va, half %b, <4 x i1> %m, i32 zero
;
; ZVFHMIN-LABEL: vfmul_vf_v4f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v9, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: vmv.v.x v9, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfmul.vv v9, v9, v8, v0.t
+; ZVFHMIN-NEXT: vfmul.vv v9, v10, v8, v0.t
; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
@@ -212,16 +203,13 @@ define <4 x half> @vfmul_vf_v4f16_unmasked(<4 x half> %va, half %b, i32 zeroext
;
; ZVFHMIN-LABEL: vfmul_vf_v4f16_unmasked:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v9, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: vmv.v.x v9, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfmul.vv v9, v9, v8
+; ZVFHMIN-NEXT: vfmul.vv v9, v10, v8
; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
@@ -284,12 +272,9 @@ define <8 x half> @vfmul_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zero
;
; ZVFHMIN-LABEL: vfmul_vf_v8f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v9, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
@@ -312,12 +297,9 @@ define <8 x half> @vfmul_vf_v8f16_unmasked(<8 x half> %va, half %b, i32 zeroext
;
; ZVFHMIN-LABEL: vfmul_vf_v8f16_unmasked:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v9, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
@@ -384,12 +366,9 @@ define <16 x half> @vfmul_vf_v16f16(<16 x half> %va, half %b, <16 x i1> %m, i32
;
; ZVFHMIN-LABEL: vfmul_vf_v16f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v12, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v10, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma
@@ -412,12 +391,9 @@ define <16 x half> @vfmul_vf_v16f16_unmasked(<16 x half> %va, half %b, i32 zeroe
;
; ZVFHMIN-LABEL: vfmul_vf_v16f16_unmasked:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v12, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v10, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfsub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfsub-vp.ll
index eb717a851ed467..d0a0bf516d3558 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfsub-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfsub-vp.ll
@@ -61,16 +61,13 @@ define <2 x half> @vfsub_vf_v2f16(<2 x half> %va, half %b, <2 x i1> %m, i32 zero
;
; ZVFHMIN-LABEL: vfsub_vf_v2f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v9, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: vmv.v.x v9, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfsub.vv v9, v9, v8, v0.t
+; ZVFHMIN-NEXT: vfsub.vv v9, v10, v8, v0.t
; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
@@ -89,16 +86,13 @@ define <2 x half> @vfsub_vf_v2f16_unmasked(<2 x half> %va, half %b, i32 zeroext
;
; ZVFHMIN-LABEL: vfsub_vf_v2f16_unmasked:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v9, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: vmv.v.x v9, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfsub.vv v9, v9, v8
+; ZVFHMIN-NEXT: vfsub.vv v9, v10, v8
; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
@@ -184,16 +178,13 @@ define <4 x half> @vfsub_vf_v4f16(<4 x half> %va, half %b, <4 x i1> %m, i32 zero
;
; ZVFHMIN-LABEL: vfsub_vf_v4f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v9, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: vmv.v.x v9, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfsub.vv v9, v9, v8, v0.t
+; ZVFHMIN-NEXT: vfsub.vv v9, v10, v8, v0.t
; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
@@ -212,16 +203,13 @@ define <4 x half> @vfsub_vf_v4f16_unmasked(<4 x half> %va, half %b, i32 zeroext
;
; ZVFHMIN-LABEL: vfsub_vf_v4f16_unmasked:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v9, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: vmv.v.x v9, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfsub.vv v9, v9, v8
+; ZVFHMIN-NEXT: vfsub.vv v9, v10, v8
; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
@@ -284,12 +272,9 @@ define <8 x half> @vfsub_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zero
;
; ZVFHMIN-LABEL: vfsub_vf_v8f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v9, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
@@ -312,12 +297,9 @@ define <8 x half> @vfsub_vf_v8f16_unmasked(<8 x half> %va, half %b, i32 zeroext
;
; ZVFHMIN-LABEL: vfsub_vf_v8f16_unmasked:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v9, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
@@ -384,12 +366,9 @@ define <16 x half> @vfsub_vf_v16f16(<16 x half> %va, half %b, <16 x i1> %m, i32
;
; ZVFHMIN-LABEL: vfsub_vf_v16f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v12, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v10, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma
@@ -412,12 +391,9 @@ define <16 x half> @vfsub_vf_v16f16_unmasked(<16 x half> %va, half %b, i32 zeroe
;
; ZVFHMIN-LABEL: vfsub_vf_v16f16_unmasked:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v12, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v10, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge-bf16.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge-bf16.ll
index da7f9f56fcf169..4186a6b304a225 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge-bf16.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge-bf16.ll
@@ -24,11 +24,11 @@ define <2 x bfloat> @vpmerge_vv_v2bf16(<2 x bfloat> %va, <2 x bfloat> %vb, <2 x
define <2 x bfloat> @vpmerge_vf_v2bf16(bfloat %a, <2 x bfloat> %vb, <2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpmerge_vf_v2bf16:
; CHECK: # %bb.0:
-; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
-; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
-; CHECK-NEXT: vfmv.v.f v9, fa5
-; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a1
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, ma
+; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0
; CHECK-NEXT: ret
%elt.head = insertelement <2 x bfloat> poison, bfloat %a, i32 0
%va = shufflevector <2 x bfloat> %elt.head, <2 x bfloat> poison, <2 x i32> zeroinitializer
@@ -52,11 +52,11 @@ define <4 x bfloat> @vpmerge_vv_v4bf16(<4 x bfloat> %va, <4 x bfloat> %vb, <4 x
define <4 x bfloat> @vpmerge_vf_v4bf16(bfloat %a, <4 x bfloat> %vb, <4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpmerge_vf_v4bf16:
; CHECK: # %bb.0:
-; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
-; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; CHECK-NEXT: vfmv.v.f v9, fa5
-; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a1
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, ma
+; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0
; CHECK-NEXT: ret
%elt.head = insertelement <4 x bfloat> poison, bfloat %a, i32 0
%va = shufflevector <4 x bfloat> %elt.head, <4 x bfloat> poison, <4 x i32> zeroinitializer
@@ -80,11 +80,11 @@ define <8 x bfloat> @vpmerge_vv_v8bf16(<8 x bfloat> %va, <8 x bfloat> %vb, <8 x
define <8 x bfloat> @vpmerge_vf_v8bf16(bfloat %a, <8 x bfloat> %vb, <8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpmerge_vf_v8bf16:
; CHECK: # %bb.0:
-; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
-; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfmv.v.f v10, fa5
-; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a1
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma
+; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0
; CHECK-NEXT: ret
%elt.head = insertelement <8 x bfloat> poison, bfloat %a, i32 0
%va = shufflevector <8 x bfloat> %elt.head, <8 x bfloat> poison, <8 x i32> zeroinitializer
@@ -108,11 +108,11 @@ define <16 x bfloat> @vpmerge_vv_v16bf16(<16 x bfloat> %va, <16 x bfloat> %vb, <
define <16 x bfloat> @vpmerge_vf_v16bf16(bfloat %a, <16 x bfloat> %vb, <16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpmerge_vf_v16bf16:
; CHECK: # %bb.0:
-; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
-; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma
-; CHECK-NEXT: vfmv.v.f v12, fa5
-; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12, v0.t
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a1
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, ma
+; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0
; CHECK-NEXT: ret
%elt.head = insertelement <16 x bfloat> poison, bfloat %a, i32 0
%va = shufflevector <16 x bfloat> %elt.head, <16 x bfloat> poison, <16 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll
index 9f0561b394b819..bdf76dc63ddd85 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll
@@ -846,11 +846,11 @@ define <2 x half> @vpmerge_vf_v2f16(half %a, <2 x half> %vb, <2 x i1> %m, i32 ze
;
; ZVFHMIN-LABEL: vpmerge_vf_v2f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v9, fa5
-; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, tu, mu
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9, v0.t
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
+; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v9, a1
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, tu, ma
+; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v9, v0
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <2 x half> poison, half %a, i32 0
%va = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer
@@ -880,11 +880,11 @@ define <4 x half> @vpmerge_vf_v4f16(half %a, <4 x half> %vb, <4 x i1> %m, i32 ze
;
; ZVFHMIN-LABEL: vpmerge_vf_v4f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v9, fa5
-; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, tu, mu
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9, v0.t
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
+; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v9, a1
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, tu, ma
+; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v9, v0
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <4 x half> poison, half %a, i32 0
%va = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer
@@ -914,11 +914,11 @@ define <8 x half> @vpmerge_vf_v8f16(half %a, <8 x half> %vb, <8 x i1> %m, i32 ze
;
; ZVFHMIN-LABEL: vpmerge_vf_v8f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
-; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, tu, mu
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10, v0.t
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
+; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v9, a1
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, tu, ma
+; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v9, v0
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <8 x half> poison, half %a, i32 0
%va = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -948,11 +948,11 @@ define <16 x half> @vpmerge_vf_v16f16(half %a, <16 x half> %vb, <16 x i1> %m, i3
;
; ZVFHMIN-LABEL: vpmerge_vf_v16f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v12, fa5
-; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, tu, mu
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
+; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v10, a1
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, tu, ma
+; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <16 x half> poison, half %a, i32 0
%va = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer
>From 7c40fadbe49e7af8904c92226dabc0b6a05463a1 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Thu, 29 Aug 2024 18:52:38 -0700
Subject: [PATCH 2/2] fixup! Remove FIXME about constant folding.
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index f11edc34dfbf5a..53a8777b1044a0 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -3947,9 +3947,8 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
// Called by LegalizeDAG, we need to use XLenVT operations since we
// can't create illegal types.
if (auto *C = dyn_cast<ConstantFPSDNode>(Elem)) {
- // Manually constant fold.
- // FIXME: Add a constant fold combine for FMV_X_ANYEXTH.
- // FIXME: We need a load+FMV_X_ANYEXTH combine too.
+ // Manually constant fold so the integer build_vector can be lowered
+ // better. Waiting for DAGCombine will be too late.
APInt V =
C->getValueAPF().bitcastToAPInt().sext(XLenVT.getSizeInBits());
NewOps[I] = DAG.getConstant(V, DL, XLenVT);
More information about the llvm-commits
mailing list