[llvm] [LegalizeVectorTypes] When widening don't check for libcalls if promoted (PR #111297)

Luke Lau via llvm-commits llvm-commits at lists.llvm.org
Sun Oct 6 09:22:00 PDT 2024


https://github.com/lukel97 created https://github.com/llvm/llvm-project/pull/111297

When widening some FP ops, LegalizeVectorTypes will check to see if the widened op may be scalarized and then turned into a bunch of libcalls, and if so unroll early to avoid unecessary libcalls of the padded undef elements.

It checks if the widened op is legal or custom to see if it will be scalarized, but promoted ops will also avoid scalarization.

This relaxes the check to account for this which fixes some illegal vector types on RISC-V from being scalarized when they could be widened.


>From 243792c5aa69d7ac1180e9a030877d842e5bb4b4 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Mon, 7 Oct 2024 00:14:34 +0800
Subject: [PATCH] [LegalizeVectorTypes] When widening don't check for libcalls
 if promoted

When widening some FP ops, LegalizeVectorTypes will check to see if the widened op may be scalarized and then turned into a bunch of libcalls, and if so unroll early to avoid unecessary libcalls of the padded undef elements.

It checks if the widened op is legal or custom to see if it will be scalarized, but promoted ops will also avoid scalarization.

This relaxes the check to account for this which fixes some illegal vector types on RISC-V from being scalarized when they could be widened.
---
 .../SelectionDAG/LegalizeVectorTypes.cpp      |    2 +-
 .../CodeGen/RISCV/rvv/fixed-vectors-fp.ll     | 1671 +----------------
 2 files changed, 95 insertions(+), 1578 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 0a22f06271984e..e7ae989fcc3494 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -4441,7 +4441,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
     // libcalls on the undef elements.
     EVT VT = N->getValueType(0);
     EVT WideVecVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
-    if (!TLI.isOperationLegalOrCustom(N->getOpcode(), WideVecVT) &&
+    if (!TLI.isOperationLegalOrCustomOrPromote(N->getOpcode(), WideVecVT) &&
         TLI.isOperationExpand(N->getOpcode(), VT.getScalarType())) {
       Res = DAG.UnrollVectorOp(N, WideVecVT.getVectorNumElements());
       return true;
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
index ea7829f2d6c658..297afd9fc96f9d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
@@ -1192,259 +1192,18 @@ define void @sqrt_v6f16(ptr %x) {
 ; ZVFH-NEXT:    vse16.v v8, (a0)
 ; ZVFH-NEXT:    ret
 ;
-; RV32-ZVFHMIN-LABEL: sqrt_v6f16:
-; RV32-ZVFHMIN:       # %bb.0:
-; RV32-ZVFHMIN-NEXT:    addi sp, sp, -48
-; RV32-ZVFHMIN-NEXT:    .cfi_def_cfa_offset 48
-; RV32-ZVFHMIN-NEXT:    sw ra, 44(sp) # 4-byte Folded Spill
-; RV32-ZVFHMIN-NEXT:    sw s0, 40(sp) # 4-byte Folded Spill
-; RV32-ZVFHMIN-NEXT:    sw s1, 36(sp) # 4-byte Folded Spill
-; RV32-ZVFHMIN-NEXT:    fsd fs0, 24(sp) # 8-byte Folded Spill
-; RV32-ZVFHMIN-NEXT:    .cfi_offset ra, -4
-; RV32-ZVFHMIN-NEXT:    .cfi_offset s0, -8
-; RV32-ZVFHMIN-NEXT:    .cfi_offset s1, -12
-; RV32-ZVFHMIN-NEXT:    .cfi_offset fs0, -24
-; RV32-ZVFHMIN-NEXT:    csrr a1, vlenb
-; RV32-ZVFHMIN-NEXT:    slli a1, a1, 1
-; RV32-ZVFHMIN-NEXT:    sub sp, sp, a1
-; RV32-ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 2 * vlenb
-; RV32-ZVFHMIN-NEXT:    mv s0, a0
-; RV32-ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; RV32-ZVFHMIN-NEXT:    vle16.v v8, (a0)
-; RV32-ZVFHMIN-NEXT:    csrr a0, vlenb
-; RV32-ZVFHMIN-NEXT:    add a0, sp, a0
-; RV32-ZVFHMIN-NEXT:    addi a0, a0, 16
-; RV32-ZVFHMIN-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
-; RV32-ZVFHMIN-NEXT:    vmv.x.s a0, v8
-; RV32-ZVFHMIN-NEXT:    fmv.w.x fa0, a0
-; RV32-ZVFHMIN-NEXT:    call __extendhfsf2
-; RV32-ZVFHMIN-NEXT:    fmv.s fs0, fa0
-; RV32-ZVFHMIN-NEXT:    csrr a0, vlenb
-; RV32-ZVFHMIN-NEXT:    add a0, sp, a0
-; RV32-ZVFHMIN-NEXT:    addi a0, a0, 16
-; RV32-ZVFHMIN-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
-; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
-; RV32-ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 1
-; RV32-ZVFHMIN-NEXT:    vmv.x.s a0, v8
-; RV32-ZVFHMIN-NEXT:    fmv.w.x fa0, a0
-; RV32-ZVFHMIN-NEXT:    call __extendhfsf2
-; RV32-ZVFHMIN-NEXT:    fsqrt.s fa0, fa0
-; RV32-ZVFHMIN-NEXT:    call __truncsfhf2
-; RV32-ZVFHMIN-NEXT:    fsqrt.s fa5, fs0
-; RV32-ZVFHMIN-NEXT:    fmv.x.w s1, fa0
-; RV32-ZVFHMIN-NEXT:    fmv.s fa0, fa5
-; RV32-ZVFHMIN-NEXT:    call __truncsfhf2
-; RV32-ZVFHMIN-NEXT:    fmv.x.w a0, fa0
-; RV32-ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV32-ZVFHMIN-NEXT:    vmv.v.x v8, a0
-; RV32-ZVFHMIN-NEXT:    vslide1down.vx v8, v8, s1
-; RV32-ZVFHMIN-NEXT:    addi a0, sp, 16
-; RV32-ZVFHMIN-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
-; RV32-ZVFHMIN-NEXT:    csrr a0, vlenb
-; RV32-ZVFHMIN-NEXT:    add a0, sp, a0
-; RV32-ZVFHMIN-NEXT:    addi a0, a0, 16
-; RV32-ZVFHMIN-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
-; RV32-ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 2
-; RV32-ZVFHMIN-NEXT:    vmv.x.s a0, v8
-; RV32-ZVFHMIN-NEXT:    fmv.w.x fa0, a0
-; RV32-ZVFHMIN-NEXT:    call __extendhfsf2
-; RV32-ZVFHMIN-NEXT:    fsqrt.s fa0, fa0
-; RV32-ZVFHMIN-NEXT:    call __truncsfhf2
-; RV32-ZVFHMIN-NEXT:    fmv.x.w a0, fa0
-; RV32-ZVFHMIN-NEXT:    addi a1, sp, 16
-; RV32-ZVFHMIN-NEXT:    vl1r.v v8, (a1) # Unknown-size Folded Reload
-; RV32-ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV32-ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a0
-; RV32-ZVFHMIN-NEXT:    addi a0, sp, 16
-; RV32-ZVFHMIN-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
-; RV32-ZVFHMIN-NEXT:    csrr a0, vlenb
-; RV32-ZVFHMIN-NEXT:    add a0, sp, a0
-; RV32-ZVFHMIN-NEXT:    addi a0, a0, 16
-; RV32-ZVFHMIN-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
-; RV32-ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 3
-; RV32-ZVFHMIN-NEXT:    vmv.x.s a0, v8
-; RV32-ZVFHMIN-NEXT:    fmv.w.x fa0, a0
-; RV32-ZVFHMIN-NEXT:    call __extendhfsf2
-; RV32-ZVFHMIN-NEXT:    fsqrt.s fa0, fa0
-; RV32-ZVFHMIN-NEXT:    call __truncsfhf2
-; RV32-ZVFHMIN-NEXT:    fmv.x.w a0, fa0
-; RV32-ZVFHMIN-NEXT:    addi a1, sp, 16
-; RV32-ZVFHMIN-NEXT:    vl1r.v v8, (a1) # Unknown-size Folded Reload
-; RV32-ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV32-ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a0
-; RV32-ZVFHMIN-NEXT:    addi a0, sp, 16
-; RV32-ZVFHMIN-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
-; RV32-ZVFHMIN-NEXT:    csrr a0, vlenb
-; RV32-ZVFHMIN-NEXT:    add a0, sp, a0
-; RV32-ZVFHMIN-NEXT:    addi a0, a0, 16
-; RV32-ZVFHMIN-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
-; RV32-ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 4
-; RV32-ZVFHMIN-NEXT:    vmv.x.s a0, v8
-; RV32-ZVFHMIN-NEXT:    fmv.w.x fa0, a0
-; RV32-ZVFHMIN-NEXT:    call __extendhfsf2
-; RV32-ZVFHMIN-NEXT:    fsqrt.s fa0, fa0
-; RV32-ZVFHMIN-NEXT:    call __truncsfhf2
-; RV32-ZVFHMIN-NEXT:    fmv.x.w a0, fa0
-; RV32-ZVFHMIN-NEXT:    addi a1, sp, 16
-; RV32-ZVFHMIN-NEXT:    vl1r.v v8, (a1) # Unknown-size Folded Reload
-; RV32-ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV32-ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a0
-; RV32-ZVFHMIN-NEXT:    addi a0, sp, 16
-; RV32-ZVFHMIN-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
-; RV32-ZVFHMIN-NEXT:    csrr a0, vlenb
-; RV32-ZVFHMIN-NEXT:    add a0, sp, a0
-; RV32-ZVFHMIN-NEXT:    addi a0, a0, 16
-; RV32-ZVFHMIN-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
-; RV32-ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 5
-; RV32-ZVFHMIN-NEXT:    vmv.x.s a0, v8
-; RV32-ZVFHMIN-NEXT:    fmv.w.x fa0, a0
-; RV32-ZVFHMIN-NEXT:    call __extendhfsf2
-; RV32-ZVFHMIN-NEXT:    fsqrt.s fa0, fa0
-; RV32-ZVFHMIN-NEXT:    call __truncsfhf2
-; RV32-ZVFHMIN-NEXT:    fmv.x.w a0, fa0
-; RV32-ZVFHMIN-NEXT:    addi a1, sp, 16
-; RV32-ZVFHMIN-NEXT:    vl1r.v v8, (a1) # Unknown-size Folded Reload
-; RV32-ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV32-ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a0
-; RV32-ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; RV32-ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 2
-; RV32-ZVFHMIN-NEXT:    vse16.v v8, (s0)
-; RV32-ZVFHMIN-NEXT:    csrr a0, vlenb
-; RV32-ZVFHMIN-NEXT:    slli a0, a0, 1
-; RV32-ZVFHMIN-NEXT:    add sp, sp, a0
-; RV32-ZVFHMIN-NEXT:    lw ra, 44(sp) # 4-byte Folded Reload
-; RV32-ZVFHMIN-NEXT:    lw s0, 40(sp) # 4-byte Folded Reload
-; RV32-ZVFHMIN-NEXT:    lw s1, 36(sp) # 4-byte Folded Reload
-; RV32-ZVFHMIN-NEXT:    fld fs0, 24(sp) # 8-byte Folded Reload
-; RV32-ZVFHMIN-NEXT:    addi sp, sp, 48
-; RV32-ZVFHMIN-NEXT:    ret
-;
-; RV64-ZVFHMIN-LABEL: sqrt_v6f16:
-; RV64-ZVFHMIN:       # %bb.0:
-; RV64-ZVFHMIN-NEXT:    addi sp, sp, -48
-; RV64-ZVFHMIN-NEXT:    .cfi_def_cfa_offset 48
-; RV64-ZVFHMIN-NEXT:    sd ra, 40(sp) # 8-byte Folded Spill
-; RV64-ZVFHMIN-NEXT:    sd s0, 32(sp) # 8-byte Folded Spill
-; RV64-ZVFHMIN-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
-; RV64-ZVFHMIN-NEXT:    fsd fs0, 16(sp) # 8-byte Folded Spill
-; RV64-ZVFHMIN-NEXT:    .cfi_offset ra, -8
-; RV64-ZVFHMIN-NEXT:    .cfi_offset s0, -16
-; RV64-ZVFHMIN-NEXT:    .cfi_offset s1, -24
-; RV64-ZVFHMIN-NEXT:    .cfi_offset fs0, -32
-; RV64-ZVFHMIN-NEXT:    csrr a1, vlenb
-; RV64-ZVFHMIN-NEXT:    slli a1, a1, 1
-; RV64-ZVFHMIN-NEXT:    sub sp, sp, a1
-; RV64-ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 2 * vlenb
-; RV64-ZVFHMIN-NEXT:    mv s0, a0
-; RV64-ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; RV64-ZVFHMIN-NEXT:    vle16.v v8, (a0)
-; RV64-ZVFHMIN-NEXT:    csrr a0, vlenb
-; RV64-ZVFHMIN-NEXT:    add a0, sp, a0
-; RV64-ZVFHMIN-NEXT:    addi a0, a0, 16
-; RV64-ZVFHMIN-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
-; RV64-ZVFHMIN-NEXT:    vmv.x.s a0, v8
-; RV64-ZVFHMIN-NEXT:    fmv.w.x fa0, a0
-; RV64-ZVFHMIN-NEXT:    call __extendhfsf2
-; RV64-ZVFHMIN-NEXT:    fmv.s fs0, fa0
-; RV64-ZVFHMIN-NEXT:    csrr a0, vlenb
-; RV64-ZVFHMIN-NEXT:    add a0, sp, a0
-; RV64-ZVFHMIN-NEXT:    addi a0, a0, 16
-; RV64-ZVFHMIN-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
-; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
-; RV64-ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 1
-; RV64-ZVFHMIN-NEXT:    vmv.x.s a0, v8
-; RV64-ZVFHMIN-NEXT:    fmv.w.x fa0, a0
-; RV64-ZVFHMIN-NEXT:    call __extendhfsf2
-; RV64-ZVFHMIN-NEXT:    fsqrt.s fa0, fa0
-; RV64-ZVFHMIN-NEXT:    call __truncsfhf2
-; RV64-ZVFHMIN-NEXT:    fsqrt.s fa5, fs0
-; RV64-ZVFHMIN-NEXT:    fmv.x.w s1, fa0
-; RV64-ZVFHMIN-NEXT:    fmv.s fa0, fa5
-; RV64-ZVFHMIN-NEXT:    call __truncsfhf2
-; RV64-ZVFHMIN-NEXT:    fmv.x.w a0, fa0
-; RV64-ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV64-ZVFHMIN-NEXT:    vmv.v.x v8, a0
-; RV64-ZVFHMIN-NEXT:    vslide1down.vx v8, v8, s1
-; RV64-ZVFHMIN-NEXT:    addi a0, sp, 16
-; RV64-ZVFHMIN-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
-; RV64-ZVFHMIN-NEXT:    csrr a0, vlenb
-; RV64-ZVFHMIN-NEXT:    add a0, sp, a0
-; RV64-ZVFHMIN-NEXT:    addi a0, a0, 16
-; RV64-ZVFHMIN-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
-; RV64-ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 2
-; RV64-ZVFHMIN-NEXT:    vmv.x.s a0, v8
-; RV64-ZVFHMIN-NEXT:    fmv.w.x fa0, a0
-; RV64-ZVFHMIN-NEXT:    call __extendhfsf2
-; RV64-ZVFHMIN-NEXT:    fsqrt.s fa0, fa0
-; RV64-ZVFHMIN-NEXT:    call __truncsfhf2
-; RV64-ZVFHMIN-NEXT:    fmv.x.w a0, fa0
-; RV64-ZVFHMIN-NEXT:    addi a1, sp, 16
-; RV64-ZVFHMIN-NEXT:    vl1r.v v8, (a1) # Unknown-size Folded Reload
-; RV64-ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV64-ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a0
-; RV64-ZVFHMIN-NEXT:    addi a0, sp, 16
-; RV64-ZVFHMIN-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
-; RV64-ZVFHMIN-NEXT:    csrr a0, vlenb
-; RV64-ZVFHMIN-NEXT:    add a0, sp, a0
-; RV64-ZVFHMIN-NEXT:    addi a0, a0, 16
-; RV64-ZVFHMIN-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
-; RV64-ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 3
-; RV64-ZVFHMIN-NEXT:    vmv.x.s a0, v8
-; RV64-ZVFHMIN-NEXT:    fmv.w.x fa0, a0
-; RV64-ZVFHMIN-NEXT:    call __extendhfsf2
-; RV64-ZVFHMIN-NEXT:    fsqrt.s fa0, fa0
-; RV64-ZVFHMIN-NEXT:    call __truncsfhf2
-; RV64-ZVFHMIN-NEXT:    fmv.x.w a0, fa0
-; RV64-ZVFHMIN-NEXT:    addi a1, sp, 16
-; RV64-ZVFHMIN-NEXT:    vl1r.v v8, (a1) # Unknown-size Folded Reload
-; RV64-ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV64-ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a0
-; RV64-ZVFHMIN-NEXT:    addi a0, sp, 16
-; RV64-ZVFHMIN-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
-; RV64-ZVFHMIN-NEXT:    csrr a0, vlenb
-; RV64-ZVFHMIN-NEXT:    add a0, sp, a0
-; RV64-ZVFHMIN-NEXT:    addi a0, a0, 16
-; RV64-ZVFHMIN-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
-; RV64-ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 4
-; RV64-ZVFHMIN-NEXT:    vmv.x.s a0, v8
-; RV64-ZVFHMIN-NEXT:    fmv.w.x fa0, a0
-; RV64-ZVFHMIN-NEXT:    call __extendhfsf2
-; RV64-ZVFHMIN-NEXT:    fsqrt.s fa0, fa0
-; RV64-ZVFHMIN-NEXT:    call __truncsfhf2
-; RV64-ZVFHMIN-NEXT:    fmv.x.w a0, fa0
-; RV64-ZVFHMIN-NEXT:    addi a1, sp, 16
-; RV64-ZVFHMIN-NEXT:    vl1r.v v8, (a1) # Unknown-size Folded Reload
-; RV64-ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV64-ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a0
-; RV64-ZVFHMIN-NEXT:    addi a0, sp, 16
-; RV64-ZVFHMIN-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
-; RV64-ZVFHMIN-NEXT:    csrr a0, vlenb
-; RV64-ZVFHMIN-NEXT:    add a0, sp, a0
-; RV64-ZVFHMIN-NEXT:    addi a0, a0, 16
-; RV64-ZVFHMIN-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
-; RV64-ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 5
-; RV64-ZVFHMIN-NEXT:    vmv.x.s a0, v8
-; RV64-ZVFHMIN-NEXT:    fmv.w.x fa0, a0
-; RV64-ZVFHMIN-NEXT:    call __extendhfsf2
-; RV64-ZVFHMIN-NEXT:    fsqrt.s fa0, fa0
-; RV64-ZVFHMIN-NEXT:    call __truncsfhf2
-; RV64-ZVFHMIN-NEXT:    fmv.x.w a0, fa0
-; RV64-ZVFHMIN-NEXT:    addi a1, sp, 16
-; RV64-ZVFHMIN-NEXT:    vl1r.v v8, (a1) # Unknown-size Folded Reload
-; RV64-ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV64-ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a0
-; RV64-ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; RV64-ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 2
-; RV64-ZVFHMIN-NEXT:    vse16.v v8, (s0)
-; RV64-ZVFHMIN-NEXT:    csrr a0, vlenb
-; RV64-ZVFHMIN-NEXT:    slli a0, a0, 1
-; RV64-ZVFHMIN-NEXT:    add sp, sp, a0
-; RV64-ZVFHMIN-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
-; RV64-ZVFHMIN-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
-; RV64-ZVFHMIN-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64-ZVFHMIN-NEXT:    fld fs0, 16(sp) # 8-byte Folded Reload
-; RV64-ZVFHMIN-NEXT:    addi sp, sp, 48
-; RV64-ZVFHMIN-NEXT:    ret
+; ZVFHMIN-LABEL: sqrt_v6f16:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT:    vfsqrt.v v8, v10
+; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
+; ZVFHMIN-NEXT:    vse16.v v10, (a0)
+; ZVFHMIN-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = call <6 x half> @llvm.sqrt.v6f16(<6 x half> %a)
   store <6 x half> %b, ptr %x
@@ -3264,337 +3023,25 @@ define void @trunc_v6f16(ptr %x) {
 ; ZVFH-NEXT:    vse16.v v8, (a0)
 ; ZVFH-NEXT:    ret
 ;
-; RV32-ZVFHMIN-LABEL: trunc_v6f16:
-; RV32-ZVFHMIN:       # %bb.0:
-; RV32-ZVFHMIN-NEXT:    addi sp, sp, -48
-; RV32-ZVFHMIN-NEXT:    .cfi_def_cfa_offset 48
-; RV32-ZVFHMIN-NEXT:    sw ra, 44(sp) # 4-byte Folded Spill
-; RV32-ZVFHMIN-NEXT:    sw s0, 40(sp) # 4-byte Folded Spill
-; RV32-ZVFHMIN-NEXT:    sw s1, 36(sp) # 4-byte Folded Spill
-; RV32-ZVFHMIN-NEXT:    fsd fs0, 24(sp) # 8-byte Folded Spill
-; RV32-ZVFHMIN-NEXT:    .cfi_offset ra, -4
-; RV32-ZVFHMIN-NEXT:    .cfi_offset s0, -8
-; RV32-ZVFHMIN-NEXT:    .cfi_offset s1, -12
-; RV32-ZVFHMIN-NEXT:    .cfi_offset fs0, -24
-; RV32-ZVFHMIN-NEXT:    csrr a1, vlenb
-; RV32-ZVFHMIN-NEXT:    slli a1, a1, 1
-; RV32-ZVFHMIN-NEXT:    sub sp, sp, a1
-; RV32-ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 2 * vlenb
-; RV32-ZVFHMIN-NEXT:    mv s0, a0
-; RV32-ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; RV32-ZVFHMIN-NEXT:    vle16.v v8, (a0)
-; RV32-ZVFHMIN-NEXT:    csrr a0, vlenb
-; RV32-ZVFHMIN-NEXT:    add a0, sp, a0
-; RV32-ZVFHMIN-NEXT:    addi a0, a0, 16
-; RV32-ZVFHMIN-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
-; RV32-ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 1
-; RV32-ZVFHMIN-NEXT:    vmv.x.s a0, v8
-; RV32-ZVFHMIN-NEXT:    fmv.w.x fa0, a0
-; RV32-ZVFHMIN-NEXT:    call __extendhfsf2
-; RV32-ZVFHMIN-NEXT:    lui a0, 307200
-; RV32-ZVFHMIN-NEXT:    fmv.w.x fs0, a0
-; RV32-ZVFHMIN-NEXT:    fabs.s fa5, fa0
-; RV32-ZVFHMIN-NEXT:    flt.s a0, fa5, fs0
-; RV32-ZVFHMIN-NEXT:    beqz a0, .LBB116_2
-; RV32-ZVFHMIN-NEXT:  # %bb.1:
-; RV32-ZVFHMIN-NEXT:    fcvt.w.s a0, fa0, rtz
-; RV32-ZVFHMIN-NEXT:    fcvt.s.w fa5, a0, rtz
-; RV32-ZVFHMIN-NEXT:    fsgnj.s fa0, fa5, fa0
-; RV32-ZVFHMIN-NEXT:  .LBB116_2:
-; RV32-ZVFHMIN-NEXT:    call __truncsfhf2
-; RV32-ZVFHMIN-NEXT:    fmv.x.w s1, fa0
-; RV32-ZVFHMIN-NEXT:    csrr a0, vlenb
-; RV32-ZVFHMIN-NEXT:    add a0, sp, a0
-; RV32-ZVFHMIN-NEXT:    lh a0, 16(a0) # 8-byte Folded Reload
-; RV32-ZVFHMIN-NEXT:    fmv.w.x fa0, a0
-; RV32-ZVFHMIN-NEXT:    call __extendhfsf2
-; RV32-ZVFHMIN-NEXT:    fabs.s fa5, fa0
-; RV32-ZVFHMIN-NEXT:    flt.s a0, fa5, fs0
-; RV32-ZVFHMIN-NEXT:    beqz a0, .LBB116_4
-; RV32-ZVFHMIN-NEXT:  # %bb.3:
-; RV32-ZVFHMIN-NEXT:    fcvt.w.s a0, fa0, rtz
-; RV32-ZVFHMIN-NEXT:    fcvt.s.w fa5, a0, rtz
-; RV32-ZVFHMIN-NEXT:    fsgnj.s fa0, fa5, fa0
-; RV32-ZVFHMIN-NEXT:  .LBB116_4:
-; RV32-ZVFHMIN-NEXT:    call __truncsfhf2
-; RV32-ZVFHMIN-NEXT:    fmv.x.w a0, fa0
-; RV32-ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV32-ZVFHMIN-NEXT:    vmv.v.x v8, a0
-; RV32-ZVFHMIN-NEXT:    vslide1down.vx v8, v8, s1
-; RV32-ZVFHMIN-NEXT:    addi a0, sp, 16
-; RV32-ZVFHMIN-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
-; RV32-ZVFHMIN-NEXT:    csrr a0, vlenb
-; RV32-ZVFHMIN-NEXT:    add a0, sp, a0
-; RV32-ZVFHMIN-NEXT:    addi a0, a0, 16
-; RV32-ZVFHMIN-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
-; RV32-ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 2
-; RV32-ZVFHMIN-NEXT:    vmv.x.s a0, v8
-; RV32-ZVFHMIN-NEXT:    fmv.w.x fa0, a0
-; RV32-ZVFHMIN-NEXT:    call __extendhfsf2
-; RV32-ZVFHMIN-NEXT:    fabs.s fa5, fa0
-; RV32-ZVFHMIN-NEXT:    flt.s a0, fa5, fs0
-; RV32-ZVFHMIN-NEXT:    beqz a0, .LBB116_6
-; RV32-ZVFHMIN-NEXT:  # %bb.5:
-; RV32-ZVFHMIN-NEXT:    fcvt.w.s a0, fa0, rtz
-; RV32-ZVFHMIN-NEXT:    fcvt.s.w fa5, a0, rtz
-; RV32-ZVFHMIN-NEXT:    fsgnj.s fa0, fa5, fa0
-; RV32-ZVFHMIN-NEXT:  .LBB116_6:
-; RV32-ZVFHMIN-NEXT:    call __truncsfhf2
-; RV32-ZVFHMIN-NEXT:    fmv.x.w a0, fa0
-; RV32-ZVFHMIN-NEXT:    addi a1, sp, 16
-; RV32-ZVFHMIN-NEXT:    vl1r.v v8, (a1) # Unknown-size Folded Reload
-; RV32-ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV32-ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a0
-; RV32-ZVFHMIN-NEXT:    addi a0, sp, 16
-; RV32-ZVFHMIN-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
-; RV32-ZVFHMIN-NEXT:    csrr a0, vlenb
-; RV32-ZVFHMIN-NEXT:    add a0, sp, a0
-; RV32-ZVFHMIN-NEXT:    addi a0, a0, 16
-; RV32-ZVFHMIN-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
-; RV32-ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 3
-; RV32-ZVFHMIN-NEXT:    vmv.x.s a0, v8
-; RV32-ZVFHMIN-NEXT:    fmv.w.x fa0, a0
-; RV32-ZVFHMIN-NEXT:    call __extendhfsf2
-; RV32-ZVFHMIN-NEXT:    fabs.s fa5, fa0
-; RV32-ZVFHMIN-NEXT:    flt.s a0, fa5, fs0
-; RV32-ZVFHMIN-NEXT:    beqz a0, .LBB116_8
-; RV32-ZVFHMIN-NEXT:  # %bb.7:
-; RV32-ZVFHMIN-NEXT:    fcvt.w.s a0, fa0, rtz
-; RV32-ZVFHMIN-NEXT:    fcvt.s.w fa5, a0, rtz
-; RV32-ZVFHMIN-NEXT:    fsgnj.s fa0, fa5, fa0
-; RV32-ZVFHMIN-NEXT:  .LBB116_8:
-; RV32-ZVFHMIN-NEXT:    call __truncsfhf2
-; RV32-ZVFHMIN-NEXT:    fmv.x.w a0, fa0
-; RV32-ZVFHMIN-NEXT:    addi a1, sp, 16
-; RV32-ZVFHMIN-NEXT:    vl1r.v v8, (a1) # Unknown-size Folded Reload
-; RV32-ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV32-ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a0
-; RV32-ZVFHMIN-NEXT:    addi a0, sp, 16
-; RV32-ZVFHMIN-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
-; RV32-ZVFHMIN-NEXT:    csrr a0, vlenb
-; RV32-ZVFHMIN-NEXT:    add a0, sp, a0
-; RV32-ZVFHMIN-NEXT:    addi a0, a0, 16
-; RV32-ZVFHMIN-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
-; RV32-ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 4
-; RV32-ZVFHMIN-NEXT:    vmv.x.s a0, v8
-; RV32-ZVFHMIN-NEXT:    fmv.w.x fa0, a0
-; RV32-ZVFHMIN-NEXT:    call __extendhfsf2
-; RV32-ZVFHMIN-NEXT:    fabs.s fa5, fa0
-; RV32-ZVFHMIN-NEXT:    flt.s a0, fa5, fs0
-; RV32-ZVFHMIN-NEXT:    beqz a0, .LBB116_10
-; RV32-ZVFHMIN-NEXT:  # %bb.9:
-; RV32-ZVFHMIN-NEXT:    fcvt.w.s a0, fa0, rtz
-; RV32-ZVFHMIN-NEXT:    fcvt.s.w fa5, a0, rtz
-; RV32-ZVFHMIN-NEXT:    fsgnj.s fa0, fa5, fa0
-; RV32-ZVFHMIN-NEXT:  .LBB116_10:
-; RV32-ZVFHMIN-NEXT:    call __truncsfhf2
-; RV32-ZVFHMIN-NEXT:    fmv.x.w a0, fa0
-; RV32-ZVFHMIN-NEXT:    addi a1, sp, 16
-; RV32-ZVFHMIN-NEXT:    vl1r.v v8, (a1) # Unknown-size Folded Reload
-; RV32-ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV32-ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a0
-; RV32-ZVFHMIN-NEXT:    addi a0, sp, 16
-; RV32-ZVFHMIN-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
-; RV32-ZVFHMIN-NEXT:    csrr a0, vlenb
-; RV32-ZVFHMIN-NEXT:    add a0, sp, a0
-; RV32-ZVFHMIN-NEXT:    addi a0, a0, 16
-; RV32-ZVFHMIN-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
-; RV32-ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 5
-; RV32-ZVFHMIN-NEXT:    vmv.x.s a0, v8
-; RV32-ZVFHMIN-NEXT:    fmv.w.x fa0, a0
-; RV32-ZVFHMIN-NEXT:    call __extendhfsf2
-; RV32-ZVFHMIN-NEXT:    fabs.s fa5, fa0
-; RV32-ZVFHMIN-NEXT:    flt.s a0, fa5, fs0
-; RV32-ZVFHMIN-NEXT:    beqz a0, .LBB116_12
-; RV32-ZVFHMIN-NEXT:  # %bb.11:
-; RV32-ZVFHMIN-NEXT:    fcvt.w.s a0, fa0, rtz
-; RV32-ZVFHMIN-NEXT:    fcvt.s.w fa5, a0, rtz
-; RV32-ZVFHMIN-NEXT:    fsgnj.s fa0, fa5, fa0
-; RV32-ZVFHMIN-NEXT:  .LBB116_12:
-; RV32-ZVFHMIN-NEXT:    call __truncsfhf2
-; RV32-ZVFHMIN-NEXT:    fmv.x.w a0, fa0
-; RV32-ZVFHMIN-NEXT:    addi a1, sp, 16
-; RV32-ZVFHMIN-NEXT:    vl1r.v v8, (a1) # Unknown-size Folded Reload
-; RV32-ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV32-ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a0
-; RV32-ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; RV32-ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 2
-; RV32-ZVFHMIN-NEXT:    vse16.v v8, (s0)
-; RV32-ZVFHMIN-NEXT:    csrr a0, vlenb
-; RV32-ZVFHMIN-NEXT:    slli a0, a0, 1
-; RV32-ZVFHMIN-NEXT:    add sp, sp, a0
-; RV32-ZVFHMIN-NEXT:    lw ra, 44(sp) # 4-byte Folded Reload
-; RV32-ZVFHMIN-NEXT:    lw s0, 40(sp) # 4-byte Folded Reload
-; RV32-ZVFHMIN-NEXT:    lw s1, 36(sp) # 4-byte Folded Reload
-; RV32-ZVFHMIN-NEXT:    fld fs0, 24(sp) # 8-byte Folded Reload
-; RV32-ZVFHMIN-NEXT:    addi sp, sp, 48
-; RV32-ZVFHMIN-NEXT:    ret
-;
-; RV64-ZVFHMIN-LABEL: trunc_v6f16:
-; RV64-ZVFHMIN:       # %bb.0:
-; RV64-ZVFHMIN-NEXT:    addi sp, sp, -48
-; RV64-ZVFHMIN-NEXT:    .cfi_def_cfa_offset 48
-; RV64-ZVFHMIN-NEXT:    sd ra, 40(sp) # 8-byte Folded Spill
-; RV64-ZVFHMIN-NEXT:    sd s0, 32(sp) # 8-byte Folded Spill
-; RV64-ZVFHMIN-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
-; RV64-ZVFHMIN-NEXT:    fsd fs0, 16(sp) # 8-byte Folded Spill
-; RV64-ZVFHMIN-NEXT:    .cfi_offset ra, -8
-; RV64-ZVFHMIN-NEXT:    .cfi_offset s0, -16
-; RV64-ZVFHMIN-NEXT:    .cfi_offset s1, -24
-; RV64-ZVFHMIN-NEXT:    .cfi_offset fs0, -32
-; RV64-ZVFHMIN-NEXT:    csrr a1, vlenb
-; RV64-ZVFHMIN-NEXT:    slli a1, a1, 1
-; RV64-ZVFHMIN-NEXT:    sub sp, sp, a1
-; RV64-ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 2 * vlenb
-; RV64-ZVFHMIN-NEXT:    mv s0, a0
-; RV64-ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; RV64-ZVFHMIN-NEXT:    vle16.v v8, (a0)
-; RV64-ZVFHMIN-NEXT:    csrr a0, vlenb
-; RV64-ZVFHMIN-NEXT:    add a0, sp, a0
-; RV64-ZVFHMIN-NEXT:    addi a0, a0, 16
-; RV64-ZVFHMIN-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
-; RV64-ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 1
-; RV64-ZVFHMIN-NEXT:    vmv.x.s a0, v8
-; RV64-ZVFHMIN-NEXT:    fmv.w.x fa0, a0
-; RV64-ZVFHMIN-NEXT:    call __extendhfsf2
-; RV64-ZVFHMIN-NEXT:    lui a0, 307200
-; RV64-ZVFHMIN-NEXT:    fmv.w.x fs0, a0
-; RV64-ZVFHMIN-NEXT:    fabs.s fa5, fa0
-; RV64-ZVFHMIN-NEXT:    flt.s a0, fa5, fs0
-; RV64-ZVFHMIN-NEXT:    beqz a0, .LBB116_2
-; RV64-ZVFHMIN-NEXT:  # %bb.1:
-; RV64-ZVFHMIN-NEXT:    fcvt.w.s a0, fa0, rtz
-; RV64-ZVFHMIN-NEXT:    fcvt.s.w fa5, a0, rtz
-; RV64-ZVFHMIN-NEXT:    fsgnj.s fa0, fa5, fa0
-; RV64-ZVFHMIN-NEXT:  .LBB116_2:
-; RV64-ZVFHMIN-NEXT:    call __truncsfhf2
-; RV64-ZVFHMIN-NEXT:    fmv.x.w s1, fa0
-; RV64-ZVFHMIN-NEXT:    csrr a0, vlenb
-; RV64-ZVFHMIN-NEXT:    add a0, sp, a0
-; RV64-ZVFHMIN-NEXT:    lh a0, 16(a0) # 8-byte Folded Reload
-; RV64-ZVFHMIN-NEXT:    fmv.w.x fa0, a0
-; RV64-ZVFHMIN-NEXT:    call __extendhfsf2
-; RV64-ZVFHMIN-NEXT:    fabs.s fa5, fa0
-; RV64-ZVFHMIN-NEXT:    flt.s a0, fa5, fs0
-; RV64-ZVFHMIN-NEXT:    beqz a0, .LBB116_4
-; RV64-ZVFHMIN-NEXT:  # %bb.3:
-; RV64-ZVFHMIN-NEXT:    fcvt.w.s a0, fa0, rtz
-; RV64-ZVFHMIN-NEXT:    fcvt.s.w fa5, a0, rtz
-; RV64-ZVFHMIN-NEXT:    fsgnj.s fa0, fa5, fa0
-; RV64-ZVFHMIN-NEXT:  .LBB116_4:
-; RV64-ZVFHMIN-NEXT:    call __truncsfhf2
-; RV64-ZVFHMIN-NEXT:    fmv.x.w a0, fa0
-; RV64-ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV64-ZVFHMIN-NEXT:    vmv.v.x v8, a0
-; RV64-ZVFHMIN-NEXT:    vslide1down.vx v8, v8, s1
-; RV64-ZVFHMIN-NEXT:    addi a0, sp, 16
-; RV64-ZVFHMIN-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
-; RV64-ZVFHMIN-NEXT:    csrr a0, vlenb
-; RV64-ZVFHMIN-NEXT:    add a0, sp, a0
-; RV64-ZVFHMIN-NEXT:    addi a0, a0, 16
-; RV64-ZVFHMIN-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
-; RV64-ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 2
-; RV64-ZVFHMIN-NEXT:    vmv.x.s a0, v8
-; RV64-ZVFHMIN-NEXT:    fmv.w.x fa0, a0
-; RV64-ZVFHMIN-NEXT:    call __extendhfsf2
-; RV64-ZVFHMIN-NEXT:    fabs.s fa5, fa0
-; RV64-ZVFHMIN-NEXT:    flt.s a0, fa5, fs0
-; RV64-ZVFHMIN-NEXT:    beqz a0, .LBB116_6
-; RV64-ZVFHMIN-NEXT:  # %bb.5:
-; RV64-ZVFHMIN-NEXT:    fcvt.w.s a0, fa0, rtz
-; RV64-ZVFHMIN-NEXT:    fcvt.s.w fa5, a0, rtz
-; RV64-ZVFHMIN-NEXT:    fsgnj.s fa0, fa5, fa0
-; RV64-ZVFHMIN-NEXT:  .LBB116_6:
-; RV64-ZVFHMIN-NEXT:    call __truncsfhf2
-; RV64-ZVFHMIN-NEXT:    fmv.x.w a0, fa0
-; RV64-ZVFHMIN-NEXT:    addi a1, sp, 16
-; RV64-ZVFHMIN-NEXT:    vl1r.v v8, (a1) # Unknown-size Folded Reload
-; RV64-ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV64-ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a0
-; RV64-ZVFHMIN-NEXT:    addi a0, sp, 16
-; RV64-ZVFHMIN-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
-; RV64-ZVFHMIN-NEXT:    csrr a0, vlenb
-; RV64-ZVFHMIN-NEXT:    add a0, sp, a0
-; RV64-ZVFHMIN-NEXT:    addi a0, a0, 16
-; RV64-ZVFHMIN-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
-; RV64-ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 3
-; RV64-ZVFHMIN-NEXT:    vmv.x.s a0, v8
-; RV64-ZVFHMIN-NEXT:    fmv.w.x fa0, a0
-; RV64-ZVFHMIN-NEXT:    call __extendhfsf2
-; RV64-ZVFHMIN-NEXT:    fabs.s fa5, fa0
-; RV64-ZVFHMIN-NEXT:    flt.s a0, fa5, fs0
-; RV64-ZVFHMIN-NEXT:    beqz a0, .LBB116_8
-; RV64-ZVFHMIN-NEXT:  # %bb.7:
-; RV64-ZVFHMIN-NEXT:    fcvt.w.s a0, fa0, rtz
-; RV64-ZVFHMIN-NEXT:    fcvt.s.w fa5, a0, rtz
-; RV64-ZVFHMIN-NEXT:    fsgnj.s fa0, fa5, fa0
-; RV64-ZVFHMIN-NEXT:  .LBB116_8:
-; RV64-ZVFHMIN-NEXT:    call __truncsfhf2
-; RV64-ZVFHMIN-NEXT:    fmv.x.w a0, fa0
-; RV64-ZVFHMIN-NEXT:    addi a1, sp, 16
-; RV64-ZVFHMIN-NEXT:    vl1r.v v8, (a1) # Unknown-size Folded Reload
-; RV64-ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV64-ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a0
-; RV64-ZVFHMIN-NEXT:    addi a0, sp, 16
-; RV64-ZVFHMIN-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
-; RV64-ZVFHMIN-NEXT:    csrr a0, vlenb
-; RV64-ZVFHMIN-NEXT:    add a0, sp, a0
-; RV64-ZVFHMIN-NEXT:    addi a0, a0, 16
-; RV64-ZVFHMIN-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
-; RV64-ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 4
-; RV64-ZVFHMIN-NEXT:    vmv.x.s a0, v8
-; RV64-ZVFHMIN-NEXT:    fmv.w.x fa0, a0
-; RV64-ZVFHMIN-NEXT:    call __extendhfsf2
-; RV64-ZVFHMIN-NEXT:    fabs.s fa5, fa0
-; RV64-ZVFHMIN-NEXT:    flt.s a0, fa5, fs0
-; RV64-ZVFHMIN-NEXT:    beqz a0, .LBB116_10
-; RV64-ZVFHMIN-NEXT:  # %bb.9:
-; RV64-ZVFHMIN-NEXT:    fcvt.w.s a0, fa0, rtz
-; RV64-ZVFHMIN-NEXT:    fcvt.s.w fa5, a0, rtz
-; RV64-ZVFHMIN-NEXT:    fsgnj.s fa0, fa5, fa0
-; RV64-ZVFHMIN-NEXT:  .LBB116_10:
-; RV64-ZVFHMIN-NEXT:    call __truncsfhf2
-; RV64-ZVFHMIN-NEXT:    fmv.x.w a0, fa0
-; RV64-ZVFHMIN-NEXT:    addi a1, sp, 16
-; RV64-ZVFHMIN-NEXT:    vl1r.v v8, (a1) # Unknown-size Folded Reload
-; RV64-ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV64-ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a0
-; RV64-ZVFHMIN-NEXT:    addi a0, sp, 16
-; RV64-ZVFHMIN-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
-; RV64-ZVFHMIN-NEXT:    csrr a0, vlenb
-; RV64-ZVFHMIN-NEXT:    add a0, sp, a0
-; RV64-ZVFHMIN-NEXT:    addi a0, a0, 16
-; RV64-ZVFHMIN-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
-; RV64-ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 5
-; RV64-ZVFHMIN-NEXT:    vmv.x.s a0, v8
-; RV64-ZVFHMIN-NEXT:    fmv.w.x fa0, a0
-; RV64-ZVFHMIN-NEXT:    call __extendhfsf2
-; RV64-ZVFHMIN-NEXT:    fabs.s fa5, fa0
-; RV64-ZVFHMIN-NEXT:    flt.s a0, fa5, fs0
-; RV64-ZVFHMIN-NEXT:    beqz a0, .LBB116_12
-; RV64-ZVFHMIN-NEXT:  # %bb.11:
-; RV64-ZVFHMIN-NEXT:    fcvt.w.s a0, fa0, rtz
-; RV64-ZVFHMIN-NEXT:    fcvt.s.w fa5, a0, rtz
-; RV64-ZVFHMIN-NEXT:    fsgnj.s fa0, fa5, fa0
-; RV64-ZVFHMIN-NEXT:  .LBB116_12:
-; RV64-ZVFHMIN-NEXT:    call __truncsfhf2
-; RV64-ZVFHMIN-NEXT:    fmv.x.w a0, fa0
-; RV64-ZVFHMIN-NEXT:    addi a1, sp, 16
-; RV64-ZVFHMIN-NEXT:    vl1r.v v8, (a1) # Unknown-size Folded Reload
-; RV64-ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV64-ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a0
-; RV64-ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; RV64-ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 2
-; RV64-ZVFHMIN-NEXT:    vse16.v v8, (s0)
-; RV64-ZVFHMIN-NEXT:    csrr a0, vlenb
-; RV64-ZVFHMIN-NEXT:    slli a0, a0, 1
-; RV64-ZVFHMIN-NEXT:    add sp, sp, a0
-; RV64-ZVFHMIN-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
-; RV64-ZVFHMIN-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
-; RV64-ZVFHMIN-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64-ZVFHMIN-NEXT:    fld fs0, 16(sp) # 8-byte Folded Reload
-; RV64-ZVFHMIN-NEXT:    addi sp, sp, 48
-; RV64-ZVFHMIN-NEXT:    ret
+; ZVFHMIN-LABEL: trunc_v6f16:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT:    vfabs.v v8, v10
+; ZVFHMIN-NEXT:    lui a1, 307200
+; ZVFHMIN-NEXT:    fmv.w.x fa5, a1
+; ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5
+; ZVFHMIN-NEXT:    vfcvt.rtz.x.f.v v8, v10, v0.t
+; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
+; ZVFHMIN-NEXT:    vfsgnj.vv v10, v8, v10, v0.t
+; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vse16.v v8, (a0)
+; ZVFHMIN-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = call <6 x half> @llvm.trunc.v6f16(<6 x half> %a)
   store <6 x half> %b, ptr %x
@@ -3710,337 +3157,27 @@ define void @ceil_v6f16(ptr %x) {
 ; ZVFH-NEXT:    vse16.v v8, (a0)
 ; ZVFH-NEXT:    ret
 ;
-; RV32-ZVFHMIN-LABEL: ceil_v6f16:
-; RV32-ZVFHMIN:       # %bb.0:
-; RV32-ZVFHMIN-NEXT:    addi sp, sp, -48
-; RV32-ZVFHMIN-NEXT:    .cfi_def_cfa_offset 48
-; RV32-ZVFHMIN-NEXT:    sw ra, 44(sp) # 4-byte Folded Spill
-; RV32-ZVFHMIN-NEXT:    sw s0, 40(sp) # 4-byte Folded Spill
-; RV32-ZVFHMIN-NEXT:    sw s1, 36(sp) # 4-byte Folded Spill
-; RV32-ZVFHMIN-NEXT:    fsd fs0, 24(sp) # 8-byte Folded Spill
-; RV32-ZVFHMIN-NEXT:    .cfi_offset ra, -4
-; RV32-ZVFHMIN-NEXT:    .cfi_offset s0, -8
-; RV32-ZVFHMIN-NEXT:    .cfi_offset s1, -12
-; RV32-ZVFHMIN-NEXT:    .cfi_offset fs0, -24
-; RV32-ZVFHMIN-NEXT:    csrr a1, vlenb
-; RV32-ZVFHMIN-NEXT:    slli a1, a1, 1
-; RV32-ZVFHMIN-NEXT:    sub sp, sp, a1
-; RV32-ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 2 * vlenb
-; RV32-ZVFHMIN-NEXT:    mv s0, a0
-; RV32-ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; RV32-ZVFHMIN-NEXT:    vle16.v v8, (a0)
-; RV32-ZVFHMIN-NEXT:    csrr a0, vlenb
-; RV32-ZVFHMIN-NEXT:    add a0, sp, a0
-; RV32-ZVFHMIN-NEXT:    addi a0, a0, 16
-; RV32-ZVFHMIN-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
-; RV32-ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 1
-; RV32-ZVFHMIN-NEXT:    vmv.x.s a0, v8
-; RV32-ZVFHMIN-NEXT:    fmv.w.x fa0, a0
-; RV32-ZVFHMIN-NEXT:    call __extendhfsf2
-; RV32-ZVFHMIN-NEXT:    lui a0, 307200
-; RV32-ZVFHMIN-NEXT:    fmv.w.x fs0, a0
-; RV32-ZVFHMIN-NEXT:    fabs.s fa5, fa0
-; RV32-ZVFHMIN-NEXT:    flt.s a0, fa5, fs0
-; RV32-ZVFHMIN-NEXT:    beqz a0, .LBB120_2
-; RV32-ZVFHMIN-NEXT:  # %bb.1:
-; RV32-ZVFHMIN-NEXT:    fcvt.w.s a0, fa0, rup
-; RV32-ZVFHMIN-NEXT:    fcvt.s.w fa5, a0, rup
-; RV32-ZVFHMIN-NEXT:    fsgnj.s fa0, fa5, fa0
-; RV32-ZVFHMIN-NEXT:  .LBB120_2:
-; RV32-ZVFHMIN-NEXT:    call __truncsfhf2
-; RV32-ZVFHMIN-NEXT:    fmv.x.w s1, fa0
-; RV32-ZVFHMIN-NEXT:    csrr a0, vlenb
-; RV32-ZVFHMIN-NEXT:    add a0, sp, a0
-; RV32-ZVFHMIN-NEXT:    lh a0, 16(a0) # 8-byte Folded Reload
-; RV32-ZVFHMIN-NEXT:    fmv.w.x fa0, a0
-; RV32-ZVFHMIN-NEXT:    call __extendhfsf2
-; RV32-ZVFHMIN-NEXT:    fabs.s fa5, fa0
-; RV32-ZVFHMIN-NEXT:    flt.s a0, fa5, fs0
-; RV32-ZVFHMIN-NEXT:    beqz a0, .LBB120_4
-; RV32-ZVFHMIN-NEXT:  # %bb.3:
-; RV32-ZVFHMIN-NEXT:    fcvt.w.s a0, fa0, rup
-; RV32-ZVFHMIN-NEXT:    fcvt.s.w fa5, a0, rup
-; RV32-ZVFHMIN-NEXT:    fsgnj.s fa0, fa5, fa0
-; RV32-ZVFHMIN-NEXT:  .LBB120_4:
-; RV32-ZVFHMIN-NEXT:    call __truncsfhf2
-; RV32-ZVFHMIN-NEXT:    fmv.x.w a0, fa0
-; RV32-ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV32-ZVFHMIN-NEXT:    vmv.v.x v8, a0
-; RV32-ZVFHMIN-NEXT:    vslide1down.vx v8, v8, s1
-; RV32-ZVFHMIN-NEXT:    addi a0, sp, 16
-; RV32-ZVFHMIN-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
-; RV32-ZVFHMIN-NEXT:    csrr a0, vlenb
-; RV32-ZVFHMIN-NEXT:    add a0, sp, a0
-; RV32-ZVFHMIN-NEXT:    addi a0, a0, 16
-; RV32-ZVFHMIN-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
-; RV32-ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 2
-; RV32-ZVFHMIN-NEXT:    vmv.x.s a0, v8
-; RV32-ZVFHMIN-NEXT:    fmv.w.x fa0, a0
-; RV32-ZVFHMIN-NEXT:    call __extendhfsf2
-; RV32-ZVFHMIN-NEXT:    fabs.s fa5, fa0
-; RV32-ZVFHMIN-NEXT:    flt.s a0, fa5, fs0
-; RV32-ZVFHMIN-NEXT:    beqz a0, .LBB120_6
-; RV32-ZVFHMIN-NEXT:  # %bb.5:
-; RV32-ZVFHMIN-NEXT:    fcvt.w.s a0, fa0, rup
-; RV32-ZVFHMIN-NEXT:    fcvt.s.w fa5, a0, rup
-; RV32-ZVFHMIN-NEXT:    fsgnj.s fa0, fa5, fa0
-; RV32-ZVFHMIN-NEXT:  .LBB120_6:
-; RV32-ZVFHMIN-NEXT:    call __truncsfhf2
-; RV32-ZVFHMIN-NEXT:    fmv.x.w a0, fa0
-; RV32-ZVFHMIN-NEXT:    addi a1, sp, 16
-; RV32-ZVFHMIN-NEXT:    vl1r.v v8, (a1) # Unknown-size Folded Reload
-; RV32-ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV32-ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a0
-; RV32-ZVFHMIN-NEXT:    addi a0, sp, 16
-; RV32-ZVFHMIN-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
-; RV32-ZVFHMIN-NEXT:    csrr a0, vlenb
-; RV32-ZVFHMIN-NEXT:    add a0, sp, a0
-; RV32-ZVFHMIN-NEXT:    addi a0, a0, 16
-; RV32-ZVFHMIN-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
-; RV32-ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 3
-; RV32-ZVFHMIN-NEXT:    vmv.x.s a0, v8
-; RV32-ZVFHMIN-NEXT:    fmv.w.x fa0, a0
-; RV32-ZVFHMIN-NEXT:    call __extendhfsf2
-; RV32-ZVFHMIN-NEXT:    fabs.s fa5, fa0
-; RV32-ZVFHMIN-NEXT:    flt.s a0, fa5, fs0
-; RV32-ZVFHMIN-NEXT:    beqz a0, .LBB120_8
-; RV32-ZVFHMIN-NEXT:  # %bb.7:
-; RV32-ZVFHMIN-NEXT:    fcvt.w.s a0, fa0, rup
-; RV32-ZVFHMIN-NEXT:    fcvt.s.w fa5, a0, rup
-; RV32-ZVFHMIN-NEXT:    fsgnj.s fa0, fa5, fa0
-; RV32-ZVFHMIN-NEXT:  .LBB120_8:
-; RV32-ZVFHMIN-NEXT:    call __truncsfhf2
-; RV32-ZVFHMIN-NEXT:    fmv.x.w a0, fa0
-; RV32-ZVFHMIN-NEXT:    addi a1, sp, 16
-; RV32-ZVFHMIN-NEXT:    vl1r.v v8, (a1) # Unknown-size Folded Reload
-; RV32-ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV32-ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a0
-; RV32-ZVFHMIN-NEXT:    addi a0, sp, 16
-; RV32-ZVFHMIN-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
-; RV32-ZVFHMIN-NEXT:    csrr a0, vlenb
-; RV32-ZVFHMIN-NEXT:    add a0, sp, a0
-; RV32-ZVFHMIN-NEXT:    addi a0, a0, 16
-; RV32-ZVFHMIN-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
-; RV32-ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 4
-; RV32-ZVFHMIN-NEXT:    vmv.x.s a0, v8
-; RV32-ZVFHMIN-NEXT:    fmv.w.x fa0, a0
-; RV32-ZVFHMIN-NEXT:    call __extendhfsf2
-; RV32-ZVFHMIN-NEXT:    fabs.s fa5, fa0
-; RV32-ZVFHMIN-NEXT:    flt.s a0, fa5, fs0
-; RV32-ZVFHMIN-NEXT:    beqz a0, .LBB120_10
-; RV32-ZVFHMIN-NEXT:  # %bb.9:
-; RV32-ZVFHMIN-NEXT:    fcvt.w.s a0, fa0, rup
-; RV32-ZVFHMIN-NEXT:    fcvt.s.w fa5, a0, rup
-; RV32-ZVFHMIN-NEXT:    fsgnj.s fa0, fa5, fa0
-; RV32-ZVFHMIN-NEXT:  .LBB120_10:
-; RV32-ZVFHMIN-NEXT:    call __truncsfhf2
-; RV32-ZVFHMIN-NEXT:    fmv.x.w a0, fa0
-; RV32-ZVFHMIN-NEXT:    addi a1, sp, 16
-; RV32-ZVFHMIN-NEXT:    vl1r.v v8, (a1) # Unknown-size Folded Reload
-; RV32-ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV32-ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a0
-; RV32-ZVFHMIN-NEXT:    addi a0, sp, 16
-; RV32-ZVFHMIN-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
-; RV32-ZVFHMIN-NEXT:    csrr a0, vlenb
-; RV32-ZVFHMIN-NEXT:    add a0, sp, a0
-; RV32-ZVFHMIN-NEXT:    addi a0, a0, 16
-; RV32-ZVFHMIN-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
-; RV32-ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 5
-; RV32-ZVFHMIN-NEXT:    vmv.x.s a0, v8
-; RV32-ZVFHMIN-NEXT:    fmv.w.x fa0, a0
-; RV32-ZVFHMIN-NEXT:    call __extendhfsf2
-; RV32-ZVFHMIN-NEXT:    fabs.s fa5, fa0
-; RV32-ZVFHMIN-NEXT:    flt.s a0, fa5, fs0
-; RV32-ZVFHMIN-NEXT:    beqz a0, .LBB120_12
-; RV32-ZVFHMIN-NEXT:  # %bb.11:
-; RV32-ZVFHMIN-NEXT:    fcvt.w.s a0, fa0, rup
-; RV32-ZVFHMIN-NEXT:    fcvt.s.w fa5, a0, rup
-; RV32-ZVFHMIN-NEXT:    fsgnj.s fa0, fa5, fa0
-; RV32-ZVFHMIN-NEXT:  .LBB120_12:
-; RV32-ZVFHMIN-NEXT:    call __truncsfhf2
-; RV32-ZVFHMIN-NEXT:    fmv.x.w a0, fa0
-; RV32-ZVFHMIN-NEXT:    addi a1, sp, 16
-; RV32-ZVFHMIN-NEXT:    vl1r.v v8, (a1) # Unknown-size Folded Reload
-; RV32-ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV32-ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a0
-; RV32-ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; RV32-ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 2
-; RV32-ZVFHMIN-NEXT:    vse16.v v8, (s0)
-; RV32-ZVFHMIN-NEXT:    csrr a0, vlenb
-; RV32-ZVFHMIN-NEXT:    slli a0, a0, 1
-; RV32-ZVFHMIN-NEXT:    add sp, sp, a0
-; RV32-ZVFHMIN-NEXT:    lw ra, 44(sp) # 4-byte Folded Reload
-; RV32-ZVFHMIN-NEXT:    lw s0, 40(sp) # 4-byte Folded Reload
-; RV32-ZVFHMIN-NEXT:    lw s1, 36(sp) # 4-byte Folded Reload
-; RV32-ZVFHMIN-NEXT:    fld fs0, 24(sp) # 8-byte Folded Reload
-; RV32-ZVFHMIN-NEXT:    addi sp, sp, 48
-; RV32-ZVFHMIN-NEXT:    ret
-;
-; RV64-ZVFHMIN-LABEL: ceil_v6f16:
-; RV64-ZVFHMIN:       # %bb.0:
-; RV64-ZVFHMIN-NEXT:    addi sp, sp, -48
-; RV64-ZVFHMIN-NEXT:    .cfi_def_cfa_offset 48
-; RV64-ZVFHMIN-NEXT:    sd ra, 40(sp) # 8-byte Folded Spill
-; RV64-ZVFHMIN-NEXT:    sd s0, 32(sp) # 8-byte Folded Spill
-; RV64-ZVFHMIN-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
-; RV64-ZVFHMIN-NEXT:    fsd fs0, 16(sp) # 8-byte Folded Spill
-; RV64-ZVFHMIN-NEXT:    .cfi_offset ra, -8
-; RV64-ZVFHMIN-NEXT:    .cfi_offset s0, -16
-; RV64-ZVFHMIN-NEXT:    .cfi_offset s1, -24
-; RV64-ZVFHMIN-NEXT:    .cfi_offset fs0, -32
-; RV64-ZVFHMIN-NEXT:    csrr a1, vlenb
-; RV64-ZVFHMIN-NEXT:    slli a1, a1, 1
-; RV64-ZVFHMIN-NEXT:    sub sp, sp, a1
-; RV64-ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 2 * vlenb
-; RV64-ZVFHMIN-NEXT:    mv s0, a0
-; RV64-ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; RV64-ZVFHMIN-NEXT:    vle16.v v8, (a0)
-; RV64-ZVFHMIN-NEXT:    csrr a0, vlenb
-; RV64-ZVFHMIN-NEXT:    add a0, sp, a0
-; RV64-ZVFHMIN-NEXT:    addi a0, a0, 16
-; RV64-ZVFHMIN-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
-; RV64-ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 1
-; RV64-ZVFHMIN-NEXT:    vmv.x.s a0, v8
-; RV64-ZVFHMIN-NEXT:    fmv.w.x fa0, a0
-; RV64-ZVFHMIN-NEXT:    call __extendhfsf2
-; RV64-ZVFHMIN-NEXT:    lui a0, 307200
-; RV64-ZVFHMIN-NEXT:    fmv.w.x fs0, a0
-; RV64-ZVFHMIN-NEXT:    fabs.s fa5, fa0
-; RV64-ZVFHMIN-NEXT:    flt.s a0, fa5, fs0
-; RV64-ZVFHMIN-NEXT:    beqz a0, .LBB120_2
-; RV64-ZVFHMIN-NEXT:  # %bb.1:
-; RV64-ZVFHMIN-NEXT:    fcvt.w.s a0, fa0, rup
-; RV64-ZVFHMIN-NEXT:    fcvt.s.w fa5, a0, rup
-; RV64-ZVFHMIN-NEXT:    fsgnj.s fa0, fa5, fa0
-; RV64-ZVFHMIN-NEXT:  .LBB120_2:
-; RV64-ZVFHMIN-NEXT:    call __truncsfhf2
-; RV64-ZVFHMIN-NEXT:    fmv.x.w s1, fa0
-; RV64-ZVFHMIN-NEXT:    csrr a0, vlenb
-; RV64-ZVFHMIN-NEXT:    add a0, sp, a0
-; RV64-ZVFHMIN-NEXT:    lh a0, 16(a0) # 8-byte Folded Reload
-; RV64-ZVFHMIN-NEXT:    fmv.w.x fa0, a0
-; RV64-ZVFHMIN-NEXT:    call __extendhfsf2
-; RV64-ZVFHMIN-NEXT:    fabs.s fa5, fa0
-; RV64-ZVFHMIN-NEXT:    flt.s a0, fa5, fs0
-; RV64-ZVFHMIN-NEXT:    beqz a0, .LBB120_4
-; RV64-ZVFHMIN-NEXT:  # %bb.3:
-; RV64-ZVFHMIN-NEXT:    fcvt.w.s a0, fa0, rup
-; RV64-ZVFHMIN-NEXT:    fcvt.s.w fa5, a0, rup
-; RV64-ZVFHMIN-NEXT:    fsgnj.s fa0, fa5, fa0
-; RV64-ZVFHMIN-NEXT:  .LBB120_4:
-; RV64-ZVFHMIN-NEXT:    call __truncsfhf2
-; RV64-ZVFHMIN-NEXT:    fmv.x.w a0, fa0
-; RV64-ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV64-ZVFHMIN-NEXT:    vmv.v.x v8, a0
-; RV64-ZVFHMIN-NEXT:    vslide1down.vx v8, v8, s1
-; RV64-ZVFHMIN-NEXT:    addi a0, sp, 16
-; RV64-ZVFHMIN-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
-; RV64-ZVFHMIN-NEXT:    csrr a0, vlenb
-; RV64-ZVFHMIN-NEXT:    add a0, sp, a0
-; RV64-ZVFHMIN-NEXT:    addi a0, a0, 16
-; RV64-ZVFHMIN-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
-; RV64-ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 2
-; RV64-ZVFHMIN-NEXT:    vmv.x.s a0, v8
-; RV64-ZVFHMIN-NEXT:    fmv.w.x fa0, a0
-; RV64-ZVFHMIN-NEXT:    call __extendhfsf2
-; RV64-ZVFHMIN-NEXT:    fabs.s fa5, fa0
-; RV64-ZVFHMIN-NEXT:    flt.s a0, fa5, fs0
-; RV64-ZVFHMIN-NEXT:    beqz a0, .LBB120_6
-; RV64-ZVFHMIN-NEXT:  # %bb.5:
-; RV64-ZVFHMIN-NEXT:    fcvt.w.s a0, fa0, rup
-; RV64-ZVFHMIN-NEXT:    fcvt.s.w fa5, a0, rup
-; RV64-ZVFHMIN-NEXT:    fsgnj.s fa0, fa5, fa0
-; RV64-ZVFHMIN-NEXT:  .LBB120_6:
-; RV64-ZVFHMIN-NEXT:    call __truncsfhf2
-; RV64-ZVFHMIN-NEXT:    fmv.x.w a0, fa0
-; RV64-ZVFHMIN-NEXT:    addi a1, sp, 16
-; RV64-ZVFHMIN-NEXT:    vl1r.v v8, (a1) # Unknown-size Folded Reload
-; RV64-ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV64-ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a0
-; RV64-ZVFHMIN-NEXT:    addi a0, sp, 16
-; RV64-ZVFHMIN-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
-; RV64-ZVFHMIN-NEXT:    csrr a0, vlenb
-; RV64-ZVFHMIN-NEXT:    add a0, sp, a0
-; RV64-ZVFHMIN-NEXT:    addi a0, a0, 16
-; RV64-ZVFHMIN-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
-; RV64-ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 3
-; RV64-ZVFHMIN-NEXT:    vmv.x.s a0, v8
-; RV64-ZVFHMIN-NEXT:    fmv.w.x fa0, a0
-; RV64-ZVFHMIN-NEXT:    call __extendhfsf2
-; RV64-ZVFHMIN-NEXT:    fabs.s fa5, fa0
-; RV64-ZVFHMIN-NEXT:    flt.s a0, fa5, fs0
-; RV64-ZVFHMIN-NEXT:    beqz a0, .LBB120_8
-; RV64-ZVFHMIN-NEXT:  # %bb.7:
-; RV64-ZVFHMIN-NEXT:    fcvt.w.s a0, fa0, rup
-; RV64-ZVFHMIN-NEXT:    fcvt.s.w fa5, a0, rup
-; RV64-ZVFHMIN-NEXT:    fsgnj.s fa0, fa5, fa0
-; RV64-ZVFHMIN-NEXT:  .LBB120_8:
-; RV64-ZVFHMIN-NEXT:    call __truncsfhf2
-; RV64-ZVFHMIN-NEXT:    fmv.x.w a0, fa0
-; RV64-ZVFHMIN-NEXT:    addi a1, sp, 16
-; RV64-ZVFHMIN-NEXT:    vl1r.v v8, (a1) # Unknown-size Folded Reload
-; RV64-ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV64-ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a0
-; RV64-ZVFHMIN-NEXT:    addi a0, sp, 16
-; RV64-ZVFHMIN-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
-; RV64-ZVFHMIN-NEXT:    csrr a0, vlenb
-; RV64-ZVFHMIN-NEXT:    add a0, sp, a0
-; RV64-ZVFHMIN-NEXT:    addi a0, a0, 16
-; RV64-ZVFHMIN-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
-; RV64-ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 4
-; RV64-ZVFHMIN-NEXT:    vmv.x.s a0, v8
-; RV64-ZVFHMIN-NEXT:    fmv.w.x fa0, a0
-; RV64-ZVFHMIN-NEXT:    call __extendhfsf2
-; RV64-ZVFHMIN-NEXT:    fabs.s fa5, fa0
-; RV64-ZVFHMIN-NEXT:    flt.s a0, fa5, fs0
-; RV64-ZVFHMIN-NEXT:    beqz a0, .LBB120_10
-; RV64-ZVFHMIN-NEXT:  # %bb.9:
-; RV64-ZVFHMIN-NEXT:    fcvt.w.s a0, fa0, rup
-; RV64-ZVFHMIN-NEXT:    fcvt.s.w fa5, a0, rup
-; RV64-ZVFHMIN-NEXT:    fsgnj.s fa0, fa5, fa0
-; RV64-ZVFHMIN-NEXT:  .LBB120_10:
-; RV64-ZVFHMIN-NEXT:    call __truncsfhf2
-; RV64-ZVFHMIN-NEXT:    fmv.x.w a0, fa0
-; RV64-ZVFHMIN-NEXT:    addi a1, sp, 16
-; RV64-ZVFHMIN-NEXT:    vl1r.v v8, (a1) # Unknown-size Folded Reload
-; RV64-ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV64-ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a0
-; RV64-ZVFHMIN-NEXT:    addi a0, sp, 16
-; RV64-ZVFHMIN-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
-; RV64-ZVFHMIN-NEXT:    csrr a0, vlenb
-; RV64-ZVFHMIN-NEXT:    add a0, sp, a0
-; RV64-ZVFHMIN-NEXT:    addi a0, a0, 16
-; RV64-ZVFHMIN-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
-; RV64-ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 5
-; RV64-ZVFHMIN-NEXT:    vmv.x.s a0, v8
-; RV64-ZVFHMIN-NEXT:    fmv.w.x fa0, a0
-; RV64-ZVFHMIN-NEXT:    call __extendhfsf2
-; RV64-ZVFHMIN-NEXT:    fabs.s fa5, fa0
-; RV64-ZVFHMIN-NEXT:    flt.s a0, fa5, fs0
-; RV64-ZVFHMIN-NEXT:    beqz a0, .LBB120_12
-; RV64-ZVFHMIN-NEXT:  # %bb.11:
-; RV64-ZVFHMIN-NEXT:    fcvt.w.s a0, fa0, rup
-; RV64-ZVFHMIN-NEXT:    fcvt.s.w fa5, a0, rup
-; RV64-ZVFHMIN-NEXT:    fsgnj.s fa0, fa5, fa0
-; RV64-ZVFHMIN-NEXT:  .LBB120_12:
-; RV64-ZVFHMIN-NEXT:    call __truncsfhf2
-; RV64-ZVFHMIN-NEXT:    fmv.x.w a0, fa0
-; RV64-ZVFHMIN-NEXT:    addi a1, sp, 16
-; RV64-ZVFHMIN-NEXT:    vl1r.v v8, (a1) # Unknown-size Folded Reload
-; RV64-ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV64-ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a0
-; RV64-ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; RV64-ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 2
-; RV64-ZVFHMIN-NEXT:    vse16.v v8, (s0)
-; RV64-ZVFHMIN-NEXT:    csrr a0, vlenb
-; RV64-ZVFHMIN-NEXT:    slli a0, a0, 1
-; RV64-ZVFHMIN-NEXT:    add sp, sp, a0
-; RV64-ZVFHMIN-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
-; RV64-ZVFHMIN-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
-; RV64-ZVFHMIN-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64-ZVFHMIN-NEXT:    fld fs0, 16(sp) # 8-byte Folded Reload
-; RV64-ZVFHMIN-NEXT:    addi sp, sp, 48
-; RV64-ZVFHMIN-NEXT:    ret
+; ZVFHMIN-LABEL: ceil_v6f16:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT:    vfabs.v v8, v10
+; ZVFHMIN-NEXT:    lui a1, 307200
+; ZVFHMIN-NEXT:    fmv.w.x fa5, a1
+; ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5
+; ZVFHMIN-NEXT:    fsrmi a1, 3
+; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v10, v0.t
+; ZVFHMIN-NEXT:    fsrm a1
+; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
+; ZVFHMIN-NEXT:    vfsgnj.vv v10, v8, v10, v0.t
+; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vse16.v v8, (a0)
+; ZVFHMIN-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = call <6 x half> @llvm.ceil.v6f16(<6 x half> %a)
   store <6 x half> %b, ptr %x
@@ -4160,337 +3297,27 @@ define void @floor_v6f16(ptr %x) {
 ; ZVFH-NEXT:    vse16.v v8, (a0)
 ; ZVFH-NEXT:    ret
 ;
-; RV32-ZVFHMIN-LABEL: floor_v6f16:
-; RV32-ZVFHMIN:       # %bb.0:
-; RV32-ZVFHMIN-NEXT:    addi sp, sp, -48
-; RV32-ZVFHMIN-NEXT:    .cfi_def_cfa_offset 48
-; RV32-ZVFHMIN-NEXT:    sw ra, 44(sp) # 4-byte Folded Spill
-; RV32-ZVFHMIN-NEXT:    sw s0, 40(sp) # 4-byte Folded Spill
-; RV32-ZVFHMIN-NEXT:    sw s1, 36(sp) # 4-byte Folded Spill
-; RV32-ZVFHMIN-NEXT:    fsd fs0, 24(sp) # 8-byte Folded Spill
-; RV32-ZVFHMIN-NEXT:    .cfi_offset ra, -4
-; RV32-ZVFHMIN-NEXT:    .cfi_offset s0, -8
-; RV32-ZVFHMIN-NEXT:    .cfi_offset s1, -12
-; RV32-ZVFHMIN-NEXT:    .cfi_offset fs0, -24
-; RV32-ZVFHMIN-NEXT:    csrr a1, vlenb
-; RV32-ZVFHMIN-NEXT:    slli a1, a1, 1
-; RV32-ZVFHMIN-NEXT:    sub sp, sp, a1
-; RV32-ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 2 * vlenb
-; RV32-ZVFHMIN-NEXT:    mv s0, a0
-; RV32-ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; RV32-ZVFHMIN-NEXT:    vle16.v v8, (a0)
-; RV32-ZVFHMIN-NEXT:    csrr a0, vlenb
-; RV32-ZVFHMIN-NEXT:    add a0, sp, a0
-; RV32-ZVFHMIN-NEXT:    addi a0, a0, 16
-; RV32-ZVFHMIN-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
-; RV32-ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 1
-; RV32-ZVFHMIN-NEXT:    vmv.x.s a0, v8
-; RV32-ZVFHMIN-NEXT:    fmv.w.x fa0, a0
-; RV32-ZVFHMIN-NEXT:    call __extendhfsf2
-; RV32-ZVFHMIN-NEXT:    lui a0, 307200
-; RV32-ZVFHMIN-NEXT:    fmv.w.x fs0, a0
-; RV32-ZVFHMIN-NEXT:    fabs.s fa5, fa0
-; RV32-ZVFHMIN-NEXT:    flt.s a0, fa5, fs0
-; RV32-ZVFHMIN-NEXT:    beqz a0, .LBB124_2
-; RV32-ZVFHMIN-NEXT:  # %bb.1:
-; RV32-ZVFHMIN-NEXT:    fcvt.w.s a0, fa0, rdn
-; RV32-ZVFHMIN-NEXT:    fcvt.s.w fa5, a0, rdn
-; RV32-ZVFHMIN-NEXT:    fsgnj.s fa0, fa5, fa0
-; RV32-ZVFHMIN-NEXT:  .LBB124_2:
-; RV32-ZVFHMIN-NEXT:    call __truncsfhf2
-; RV32-ZVFHMIN-NEXT:    fmv.x.w s1, fa0
-; RV32-ZVFHMIN-NEXT:    csrr a0, vlenb
-; RV32-ZVFHMIN-NEXT:    add a0, sp, a0
-; RV32-ZVFHMIN-NEXT:    lh a0, 16(a0) # 8-byte Folded Reload
-; RV32-ZVFHMIN-NEXT:    fmv.w.x fa0, a0
-; RV32-ZVFHMIN-NEXT:    call __extendhfsf2
-; RV32-ZVFHMIN-NEXT:    fabs.s fa5, fa0
-; RV32-ZVFHMIN-NEXT:    flt.s a0, fa5, fs0
-; RV32-ZVFHMIN-NEXT:    beqz a0, .LBB124_4
-; RV32-ZVFHMIN-NEXT:  # %bb.3:
-; RV32-ZVFHMIN-NEXT:    fcvt.w.s a0, fa0, rdn
-; RV32-ZVFHMIN-NEXT:    fcvt.s.w fa5, a0, rdn
-; RV32-ZVFHMIN-NEXT:    fsgnj.s fa0, fa5, fa0
-; RV32-ZVFHMIN-NEXT:  .LBB124_4:
-; RV32-ZVFHMIN-NEXT:    call __truncsfhf2
-; RV32-ZVFHMIN-NEXT:    fmv.x.w a0, fa0
-; RV32-ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV32-ZVFHMIN-NEXT:    vmv.v.x v8, a0
-; RV32-ZVFHMIN-NEXT:    vslide1down.vx v8, v8, s1
-; RV32-ZVFHMIN-NEXT:    addi a0, sp, 16
-; RV32-ZVFHMIN-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
-; RV32-ZVFHMIN-NEXT:    csrr a0, vlenb
-; RV32-ZVFHMIN-NEXT:    add a0, sp, a0
-; RV32-ZVFHMIN-NEXT:    addi a0, a0, 16
-; RV32-ZVFHMIN-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
-; RV32-ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 2
-; RV32-ZVFHMIN-NEXT:    vmv.x.s a0, v8
-; RV32-ZVFHMIN-NEXT:    fmv.w.x fa0, a0
-; RV32-ZVFHMIN-NEXT:    call __extendhfsf2
-; RV32-ZVFHMIN-NEXT:    fabs.s fa5, fa0
-; RV32-ZVFHMIN-NEXT:    flt.s a0, fa5, fs0
-; RV32-ZVFHMIN-NEXT:    beqz a0, .LBB124_6
-; RV32-ZVFHMIN-NEXT:  # %bb.5:
-; RV32-ZVFHMIN-NEXT:    fcvt.w.s a0, fa0, rdn
-; RV32-ZVFHMIN-NEXT:    fcvt.s.w fa5, a0, rdn
-; RV32-ZVFHMIN-NEXT:    fsgnj.s fa0, fa5, fa0
-; RV32-ZVFHMIN-NEXT:  .LBB124_6:
-; RV32-ZVFHMIN-NEXT:    call __truncsfhf2
-; RV32-ZVFHMIN-NEXT:    fmv.x.w a0, fa0
-; RV32-ZVFHMIN-NEXT:    addi a1, sp, 16
-; RV32-ZVFHMIN-NEXT:    vl1r.v v8, (a1) # Unknown-size Folded Reload
-; RV32-ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV32-ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a0
-; RV32-ZVFHMIN-NEXT:    addi a0, sp, 16
-; RV32-ZVFHMIN-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
-; RV32-ZVFHMIN-NEXT:    csrr a0, vlenb
-; RV32-ZVFHMIN-NEXT:    add a0, sp, a0
-; RV32-ZVFHMIN-NEXT:    addi a0, a0, 16
-; RV32-ZVFHMIN-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
-; RV32-ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 3
-; RV32-ZVFHMIN-NEXT:    vmv.x.s a0, v8
-; RV32-ZVFHMIN-NEXT:    fmv.w.x fa0, a0
-; RV32-ZVFHMIN-NEXT:    call __extendhfsf2
-; RV32-ZVFHMIN-NEXT:    fabs.s fa5, fa0
-; RV32-ZVFHMIN-NEXT:    flt.s a0, fa5, fs0
-; RV32-ZVFHMIN-NEXT:    beqz a0, .LBB124_8
-; RV32-ZVFHMIN-NEXT:  # %bb.7:
-; RV32-ZVFHMIN-NEXT:    fcvt.w.s a0, fa0, rdn
-; RV32-ZVFHMIN-NEXT:    fcvt.s.w fa5, a0, rdn
-; RV32-ZVFHMIN-NEXT:    fsgnj.s fa0, fa5, fa0
-; RV32-ZVFHMIN-NEXT:  .LBB124_8:
-; RV32-ZVFHMIN-NEXT:    call __truncsfhf2
-; RV32-ZVFHMIN-NEXT:    fmv.x.w a0, fa0
-; RV32-ZVFHMIN-NEXT:    addi a1, sp, 16
-; RV32-ZVFHMIN-NEXT:    vl1r.v v8, (a1) # Unknown-size Folded Reload
-; RV32-ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV32-ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a0
-; RV32-ZVFHMIN-NEXT:    addi a0, sp, 16
-; RV32-ZVFHMIN-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
-; RV32-ZVFHMIN-NEXT:    csrr a0, vlenb
-; RV32-ZVFHMIN-NEXT:    add a0, sp, a0
-; RV32-ZVFHMIN-NEXT:    addi a0, a0, 16
-; RV32-ZVFHMIN-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
-; RV32-ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 4
-; RV32-ZVFHMIN-NEXT:    vmv.x.s a0, v8
-; RV32-ZVFHMIN-NEXT:    fmv.w.x fa0, a0
-; RV32-ZVFHMIN-NEXT:    call __extendhfsf2
-; RV32-ZVFHMIN-NEXT:    fabs.s fa5, fa0
-; RV32-ZVFHMIN-NEXT:    flt.s a0, fa5, fs0
-; RV32-ZVFHMIN-NEXT:    beqz a0, .LBB124_10
-; RV32-ZVFHMIN-NEXT:  # %bb.9:
-; RV32-ZVFHMIN-NEXT:    fcvt.w.s a0, fa0, rdn
-; RV32-ZVFHMIN-NEXT:    fcvt.s.w fa5, a0, rdn
-; RV32-ZVFHMIN-NEXT:    fsgnj.s fa0, fa5, fa0
-; RV32-ZVFHMIN-NEXT:  .LBB124_10:
-; RV32-ZVFHMIN-NEXT:    call __truncsfhf2
-; RV32-ZVFHMIN-NEXT:    fmv.x.w a0, fa0
-; RV32-ZVFHMIN-NEXT:    addi a1, sp, 16
-; RV32-ZVFHMIN-NEXT:    vl1r.v v8, (a1) # Unknown-size Folded Reload
-; RV32-ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV32-ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a0
-; RV32-ZVFHMIN-NEXT:    addi a0, sp, 16
-; RV32-ZVFHMIN-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
-; RV32-ZVFHMIN-NEXT:    csrr a0, vlenb
-; RV32-ZVFHMIN-NEXT:    add a0, sp, a0
-; RV32-ZVFHMIN-NEXT:    addi a0, a0, 16
-; RV32-ZVFHMIN-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
-; RV32-ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 5
-; RV32-ZVFHMIN-NEXT:    vmv.x.s a0, v8
-; RV32-ZVFHMIN-NEXT:    fmv.w.x fa0, a0
-; RV32-ZVFHMIN-NEXT:    call __extendhfsf2
-; RV32-ZVFHMIN-NEXT:    fabs.s fa5, fa0
-; RV32-ZVFHMIN-NEXT:    flt.s a0, fa5, fs0
-; RV32-ZVFHMIN-NEXT:    beqz a0, .LBB124_12
-; RV32-ZVFHMIN-NEXT:  # %bb.11:
-; RV32-ZVFHMIN-NEXT:    fcvt.w.s a0, fa0, rdn
-; RV32-ZVFHMIN-NEXT:    fcvt.s.w fa5, a0, rdn
-; RV32-ZVFHMIN-NEXT:    fsgnj.s fa0, fa5, fa0
-; RV32-ZVFHMIN-NEXT:  .LBB124_12:
-; RV32-ZVFHMIN-NEXT:    call __truncsfhf2
-; RV32-ZVFHMIN-NEXT:    fmv.x.w a0, fa0
-; RV32-ZVFHMIN-NEXT:    addi a1, sp, 16
-; RV32-ZVFHMIN-NEXT:    vl1r.v v8, (a1) # Unknown-size Folded Reload
-; RV32-ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV32-ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a0
-; RV32-ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; RV32-ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 2
-; RV32-ZVFHMIN-NEXT:    vse16.v v8, (s0)
-; RV32-ZVFHMIN-NEXT:    csrr a0, vlenb
-; RV32-ZVFHMIN-NEXT:    slli a0, a0, 1
-; RV32-ZVFHMIN-NEXT:    add sp, sp, a0
-; RV32-ZVFHMIN-NEXT:    lw ra, 44(sp) # 4-byte Folded Reload
-; RV32-ZVFHMIN-NEXT:    lw s0, 40(sp) # 4-byte Folded Reload
-; RV32-ZVFHMIN-NEXT:    lw s1, 36(sp) # 4-byte Folded Reload
-; RV32-ZVFHMIN-NEXT:    fld fs0, 24(sp) # 8-byte Folded Reload
-; RV32-ZVFHMIN-NEXT:    addi sp, sp, 48
-; RV32-ZVFHMIN-NEXT:    ret
-;
-; RV64-ZVFHMIN-LABEL: floor_v6f16:
-; RV64-ZVFHMIN:       # %bb.0:
-; RV64-ZVFHMIN-NEXT:    addi sp, sp, -48
-; RV64-ZVFHMIN-NEXT:    .cfi_def_cfa_offset 48
-; RV64-ZVFHMIN-NEXT:    sd ra, 40(sp) # 8-byte Folded Spill
-; RV64-ZVFHMIN-NEXT:    sd s0, 32(sp) # 8-byte Folded Spill
-; RV64-ZVFHMIN-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
-; RV64-ZVFHMIN-NEXT:    fsd fs0, 16(sp) # 8-byte Folded Spill
-; RV64-ZVFHMIN-NEXT:    .cfi_offset ra, -8
-; RV64-ZVFHMIN-NEXT:    .cfi_offset s0, -16
-; RV64-ZVFHMIN-NEXT:    .cfi_offset s1, -24
-; RV64-ZVFHMIN-NEXT:    .cfi_offset fs0, -32
-; RV64-ZVFHMIN-NEXT:    csrr a1, vlenb
-; RV64-ZVFHMIN-NEXT:    slli a1, a1, 1
-; RV64-ZVFHMIN-NEXT:    sub sp, sp, a1
-; RV64-ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 2 * vlenb
-; RV64-ZVFHMIN-NEXT:    mv s0, a0
-; RV64-ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; RV64-ZVFHMIN-NEXT:    vle16.v v8, (a0)
-; RV64-ZVFHMIN-NEXT:    csrr a0, vlenb
-; RV64-ZVFHMIN-NEXT:    add a0, sp, a0
-; RV64-ZVFHMIN-NEXT:    addi a0, a0, 16
-; RV64-ZVFHMIN-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
-; RV64-ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 1
-; RV64-ZVFHMIN-NEXT:    vmv.x.s a0, v8
-; RV64-ZVFHMIN-NEXT:    fmv.w.x fa0, a0
-; RV64-ZVFHMIN-NEXT:    call __extendhfsf2
-; RV64-ZVFHMIN-NEXT:    lui a0, 307200
-; RV64-ZVFHMIN-NEXT:    fmv.w.x fs0, a0
-; RV64-ZVFHMIN-NEXT:    fabs.s fa5, fa0
-; RV64-ZVFHMIN-NEXT:    flt.s a0, fa5, fs0
-; RV64-ZVFHMIN-NEXT:    beqz a0, .LBB124_2
-; RV64-ZVFHMIN-NEXT:  # %bb.1:
-; RV64-ZVFHMIN-NEXT:    fcvt.w.s a0, fa0, rdn
-; RV64-ZVFHMIN-NEXT:    fcvt.s.w fa5, a0, rdn
-; RV64-ZVFHMIN-NEXT:    fsgnj.s fa0, fa5, fa0
-; RV64-ZVFHMIN-NEXT:  .LBB124_2:
-; RV64-ZVFHMIN-NEXT:    call __truncsfhf2
-; RV64-ZVFHMIN-NEXT:    fmv.x.w s1, fa0
-; RV64-ZVFHMIN-NEXT:    csrr a0, vlenb
-; RV64-ZVFHMIN-NEXT:    add a0, sp, a0
-; RV64-ZVFHMIN-NEXT:    lh a0, 16(a0) # 8-byte Folded Reload
-; RV64-ZVFHMIN-NEXT:    fmv.w.x fa0, a0
-; RV64-ZVFHMIN-NEXT:    call __extendhfsf2
-; RV64-ZVFHMIN-NEXT:    fabs.s fa5, fa0
-; RV64-ZVFHMIN-NEXT:    flt.s a0, fa5, fs0
-; RV64-ZVFHMIN-NEXT:    beqz a0, .LBB124_4
-; RV64-ZVFHMIN-NEXT:  # %bb.3:
-; RV64-ZVFHMIN-NEXT:    fcvt.w.s a0, fa0, rdn
-; RV64-ZVFHMIN-NEXT:    fcvt.s.w fa5, a0, rdn
-; RV64-ZVFHMIN-NEXT:    fsgnj.s fa0, fa5, fa0
-; RV64-ZVFHMIN-NEXT:  .LBB124_4:
-; RV64-ZVFHMIN-NEXT:    call __truncsfhf2
-; RV64-ZVFHMIN-NEXT:    fmv.x.w a0, fa0
-; RV64-ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV64-ZVFHMIN-NEXT:    vmv.v.x v8, a0
-; RV64-ZVFHMIN-NEXT:    vslide1down.vx v8, v8, s1
-; RV64-ZVFHMIN-NEXT:    addi a0, sp, 16
-; RV64-ZVFHMIN-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
-; RV64-ZVFHMIN-NEXT:    csrr a0, vlenb
-; RV64-ZVFHMIN-NEXT:    add a0, sp, a0
-; RV64-ZVFHMIN-NEXT:    addi a0, a0, 16
-; RV64-ZVFHMIN-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
-; RV64-ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 2
-; RV64-ZVFHMIN-NEXT:    vmv.x.s a0, v8
-; RV64-ZVFHMIN-NEXT:    fmv.w.x fa0, a0
-; RV64-ZVFHMIN-NEXT:    call __extendhfsf2
-; RV64-ZVFHMIN-NEXT:    fabs.s fa5, fa0
-; RV64-ZVFHMIN-NEXT:    flt.s a0, fa5, fs0
-; RV64-ZVFHMIN-NEXT:    beqz a0, .LBB124_6
-; RV64-ZVFHMIN-NEXT:  # %bb.5:
-; RV64-ZVFHMIN-NEXT:    fcvt.w.s a0, fa0, rdn
-; RV64-ZVFHMIN-NEXT:    fcvt.s.w fa5, a0, rdn
-; RV64-ZVFHMIN-NEXT:    fsgnj.s fa0, fa5, fa0
-; RV64-ZVFHMIN-NEXT:  .LBB124_6:
-; RV64-ZVFHMIN-NEXT:    call __truncsfhf2
-; RV64-ZVFHMIN-NEXT:    fmv.x.w a0, fa0
-; RV64-ZVFHMIN-NEXT:    addi a1, sp, 16
-; RV64-ZVFHMIN-NEXT:    vl1r.v v8, (a1) # Unknown-size Folded Reload
-; RV64-ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV64-ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a0
-; RV64-ZVFHMIN-NEXT:    addi a0, sp, 16
-; RV64-ZVFHMIN-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
-; RV64-ZVFHMIN-NEXT:    csrr a0, vlenb
-; RV64-ZVFHMIN-NEXT:    add a0, sp, a0
-; RV64-ZVFHMIN-NEXT:    addi a0, a0, 16
-; RV64-ZVFHMIN-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
-; RV64-ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 3
-; RV64-ZVFHMIN-NEXT:    vmv.x.s a0, v8
-; RV64-ZVFHMIN-NEXT:    fmv.w.x fa0, a0
-; RV64-ZVFHMIN-NEXT:    call __extendhfsf2
-; RV64-ZVFHMIN-NEXT:    fabs.s fa5, fa0
-; RV64-ZVFHMIN-NEXT:    flt.s a0, fa5, fs0
-; RV64-ZVFHMIN-NEXT:    beqz a0, .LBB124_8
-; RV64-ZVFHMIN-NEXT:  # %bb.7:
-; RV64-ZVFHMIN-NEXT:    fcvt.w.s a0, fa0, rdn
-; RV64-ZVFHMIN-NEXT:    fcvt.s.w fa5, a0, rdn
-; RV64-ZVFHMIN-NEXT:    fsgnj.s fa0, fa5, fa0
-; RV64-ZVFHMIN-NEXT:  .LBB124_8:
-; RV64-ZVFHMIN-NEXT:    call __truncsfhf2
-; RV64-ZVFHMIN-NEXT:    fmv.x.w a0, fa0
-; RV64-ZVFHMIN-NEXT:    addi a1, sp, 16
-; RV64-ZVFHMIN-NEXT:    vl1r.v v8, (a1) # Unknown-size Folded Reload
-; RV64-ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV64-ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a0
-; RV64-ZVFHMIN-NEXT:    addi a0, sp, 16
-; RV64-ZVFHMIN-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
-; RV64-ZVFHMIN-NEXT:    csrr a0, vlenb
-; RV64-ZVFHMIN-NEXT:    add a0, sp, a0
-; RV64-ZVFHMIN-NEXT:    addi a0, a0, 16
-; RV64-ZVFHMIN-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
-; RV64-ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 4
-; RV64-ZVFHMIN-NEXT:    vmv.x.s a0, v8
-; RV64-ZVFHMIN-NEXT:    fmv.w.x fa0, a0
-; RV64-ZVFHMIN-NEXT:    call __extendhfsf2
-; RV64-ZVFHMIN-NEXT:    fabs.s fa5, fa0
-; RV64-ZVFHMIN-NEXT:    flt.s a0, fa5, fs0
-; RV64-ZVFHMIN-NEXT:    beqz a0, .LBB124_10
-; RV64-ZVFHMIN-NEXT:  # %bb.9:
-; RV64-ZVFHMIN-NEXT:    fcvt.w.s a0, fa0, rdn
-; RV64-ZVFHMIN-NEXT:    fcvt.s.w fa5, a0, rdn
-; RV64-ZVFHMIN-NEXT:    fsgnj.s fa0, fa5, fa0
-; RV64-ZVFHMIN-NEXT:  .LBB124_10:
-; RV64-ZVFHMIN-NEXT:    call __truncsfhf2
-; RV64-ZVFHMIN-NEXT:    fmv.x.w a0, fa0
-; RV64-ZVFHMIN-NEXT:    addi a1, sp, 16
-; RV64-ZVFHMIN-NEXT:    vl1r.v v8, (a1) # Unknown-size Folded Reload
-; RV64-ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV64-ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a0
-; RV64-ZVFHMIN-NEXT:    addi a0, sp, 16
-; RV64-ZVFHMIN-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
-; RV64-ZVFHMIN-NEXT:    csrr a0, vlenb
-; RV64-ZVFHMIN-NEXT:    add a0, sp, a0
-; RV64-ZVFHMIN-NEXT:    addi a0, a0, 16
-; RV64-ZVFHMIN-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
-; RV64-ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 5
-; RV64-ZVFHMIN-NEXT:    vmv.x.s a0, v8
-; RV64-ZVFHMIN-NEXT:    fmv.w.x fa0, a0
-; RV64-ZVFHMIN-NEXT:    call __extendhfsf2
-; RV64-ZVFHMIN-NEXT:    fabs.s fa5, fa0
-; RV64-ZVFHMIN-NEXT:    flt.s a0, fa5, fs0
-; RV64-ZVFHMIN-NEXT:    beqz a0, .LBB124_12
-; RV64-ZVFHMIN-NEXT:  # %bb.11:
-; RV64-ZVFHMIN-NEXT:    fcvt.w.s a0, fa0, rdn
-; RV64-ZVFHMIN-NEXT:    fcvt.s.w fa5, a0, rdn
-; RV64-ZVFHMIN-NEXT:    fsgnj.s fa0, fa5, fa0
-; RV64-ZVFHMIN-NEXT:  .LBB124_12:
-; RV64-ZVFHMIN-NEXT:    call __truncsfhf2
-; RV64-ZVFHMIN-NEXT:    fmv.x.w a0, fa0
-; RV64-ZVFHMIN-NEXT:    addi a1, sp, 16
-; RV64-ZVFHMIN-NEXT:    vl1r.v v8, (a1) # Unknown-size Folded Reload
-; RV64-ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV64-ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a0
-; RV64-ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; RV64-ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 2
-; RV64-ZVFHMIN-NEXT:    vse16.v v8, (s0)
-; RV64-ZVFHMIN-NEXT:    csrr a0, vlenb
-; RV64-ZVFHMIN-NEXT:    slli a0, a0, 1
-; RV64-ZVFHMIN-NEXT:    add sp, sp, a0
-; RV64-ZVFHMIN-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
-; RV64-ZVFHMIN-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
-; RV64-ZVFHMIN-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64-ZVFHMIN-NEXT:    fld fs0, 16(sp) # 8-byte Folded Reload
-; RV64-ZVFHMIN-NEXT:    addi sp, sp, 48
-; RV64-ZVFHMIN-NEXT:    ret
+; ZVFHMIN-LABEL: floor_v6f16:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT:    vfabs.v v8, v10
+; ZVFHMIN-NEXT:    lui a1, 307200
+; ZVFHMIN-NEXT:    fmv.w.x fa5, a1
+; ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5
+; ZVFHMIN-NEXT:    fsrmi a1, 2
+; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v10, v0.t
+; ZVFHMIN-NEXT:    fsrm a1
+; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
+; ZVFHMIN-NEXT:    vfsgnj.vv v10, v8, v10, v0.t
+; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vse16.v v8, (a0)
+; ZVFHMIN-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = call <6 x half> @llvm.floor.v6f16(<6 x half> %a)
   store <6 x half> %b, ptr %x
@@ -4610,337 +3437,27 @@ define void @round_v6f16(ptr %x) {
 ; ZVFH-NEXT:    vse16.v v8, (a0)
 ; ZVFH-NEXT:    ret
 ;
-; RV32-ZVFHMIN-LABEL: round_v6f16:
-; RV32-ZVFHMIN:       # %bb.0:
-; RV32-ZVFHMIN-NEXT:    addi sp, sp, -48
-; RV32-ZVFHMIN-NEXT:    .cfi_def_cfa_offset 48
-; RV32-ZVFHMIN-NEXT:    sw ra, 44(sp) # 4-byte Folded Spill
-; RV32-ZVFHMIN-NEXT:    sw s0, 40(sp) # 4-byte Folded Spill
-; RV32-ZVFHMIN-NEXT:    sw s1, 36(sp) # 4-byte Folded Spill
-; RV32-ZVFHMIN-NEXT:    fsd fs0, 24(sp) # 8-byte Folded Spill
-; RV32-ZVFHMIN-NEXT:    .cfi_offset ra, -4
-; RV32-ZVFHMIN-NEXT:    .cfi_offset s0, -8
-; RV32-ZVFHMIN-NEXT:    .cfi_offset s1, -12
-; RV32-ZVFHMIN-NEXT:    .cfi_offset fs0, -24
-; RV32-ZVFHMIN-NEXT:    csrr a1, vlenb
-; RV32-ZVFHMIN-NEXT:    slli a1, a1, 1
-; RV32-ZVFHMIN-NEXT:    sub sp, sp, a1
-; RV32-ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 2 * vlenb
-; RV32-ZVFHMIN-NEXT:    mv s0, a0
-; RV32-ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; RV32-ZVFHMIN-NEXT:    vle16.v v8, (a0)
-; RV32-ZVFHMIN-NEXT:    csrr a0, vlenb
-; RV32-ZVFHMIN-NEXT:    add a0, sp, a0
-; RV32-ZVFHMIN-NEXT:    addi a0, a0, 16
-; RV32-ZVFHMIN-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
-; RV32-ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 1
-; RV32-ZVFHMIN-NEXT:    vmv.x.s a0, v8
-; RV32-ZVFHMIN-NEXT:    fmv.w.x fa0, a0
-; RV32-ZVFHMIN-NEXT:    call __extendhfsf2
-; RV32-ZVFHMIN-NEXT:    lui a0, 307200
-; RV32-ZVFHMIN-NEXT:    fmv.w.x fs0, a0
-; RV32-ZVFHMIN-NEXT:    fabs.s fa5, fa0
-; RV32-ZVFHMIN-NEXT:    flt.s a0, fa5, fs0
-; RV32-ZVFHMIN-NEXT:    beqz a0, .LBB128_2
-; RV32-ZVFHMIN-NEXT:  # %bb.1:
-; RV32-ZVFHMIN-NEXT:    fcvt.w.s a0, fa0, rmm
-; RV32-ZVFHMIN-NEXT:    fcvt.s.w fa5, a0, rmm
-; RV32-ZVFHMIN-NEXT:    fsgnj.s fa0, fa5, fa0
-; RV32-ZVFHMIN-NEXT:  .LBB128_2:
-; RV32-ZVFHMIN-NEXT:    call __truncsfhf2
-; RV32-ZVFHMIN-NEXT:    fmv.x.w s1, fa0
-; RV32-ZVFHMIN-NEXT:    csrr a0, vlenb
-; RV32-ZVFHMIN-NEXT:    add a0, sp, a0
-; RV32-ZVFHMIN-NEXT:    lh a0, 16(a0) # 8-byte Folded Reload
-; RV32-ZVFHMIN-NEXT:    fmv.w.x fa0, a0
-; RV32-ZVFHMIN-NEXT:    call __extendhfsf2
-; RV32-ZVFHMIN-NEXT:    fabs.s fa5, fa0
-; RV32-ZVFHMIN-NEXT:    flt.s a0, fa5, fs0
-; RV32-ZVFHMIN-NEXT:    beqz a0, .LBB128_4
-; RV32-ZVFHMIN-NEXT:  # %bb.3:
-; RV32-ZVFHMIN-NEXT:    fcvt.w.s a0, fa0, rmm
-; RV32-ZVFHMIN-NEXT:    fcvt.s.w fa5, a0, rmm
-; RV32-ZVFHMIN-NEXT:    fsgnj.s fa0, fa5, fa0
-; RV32-ZVFHMIN-NEXT:  .LBB128_4:
-; RV32-ZVFHMIN-NEXT:    call __truncsfhf2
-; RV32-ZVFHMIN-NEXT:    fmv.x.w a0, fa0
-; RV32-ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV32-ZVFHMIN-NEXT:    vmv.v.x v8, a0
-; RV32-ZVFHMIN-NEXT:    vslide1down.vx v8, v8, s1
-; RV32-ZVFHMIN-NEXT:    addi a0, sp, 16
-; RV32-ZVFHMIN-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
-; RV32-ZVFHMIN-NEXT:    csrr a0, vlenb
-; RV32-ZVFHMIN-NEXT:    add a0, sp, a0
-; RV32-ZVFHMIN-NEXT:    addi a0, a0, 16
-; RV32-ZVFHMIN-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
-; RV32-ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 2
-; RV32-ZVFHMIN-NEXT:    vmv.x.s a0, v8
-; RV32-ZVFHMIN-NEXT:    fmv.w.x fa0, a0
-; RV32-ZVFHMIN-NEXT:    call __extendhfsf2
-; RV32-ZVFHMIN-NEXT:    fabs.s fa5, fa0
-; RV32-ZVFHMIN-NEXT:    flt.s a0, fa5, fs0
-; RV32-ZVFHMIN-NEXT:    beqz a0, .LBB128_6
-; RV32-ZVFHMIN-NEXT:  # %bb.5:
-; RV32-ZVFHMIN-NEXT:    fcvt.w.s a0, fa0, rmm
-; RV32-ZVFHMIN-NEXT:    fcvt.s.w fa5, a0, rmm
-; RV32-ZVFHMIN-NEXT:    fsgnj.s fa0, fa5, fa0
-; RV32-ZVFHMIN-NEXT:  .LBB128_6:
-; RV32-ZVFHMIN-NEXT:    call __truncsfhf2
-; RV32-ZVFHMIN-NEXT:    fmv.x.w a0, fa0
-; RV32-ZVFHMIN-NEXT:    addi a1, sp, 16
-; RV32-ZVFHMIN-NEXT:    vl1r.v v8, (a1) # Unknown-size Folded Reload
-; RV32-ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV32-ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a0
-; RV32-ZVFHMIN-NEXT:    addi a0, sp, 16
-; RV32-ZVFHMIN-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
-; RV32-ZVFHMIN-NEXT:    csrr a0, vlenb
-; RV32-ZVFHMIN-NEXT:    add a0, sp, a0
-; RV32-ZVFHMIN-NEXT:    addi a0, a0, 16
-; RV32-ZVFHMIN-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
-; RV32-ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 3
-; RV32-ZVFHMIN-NEXT:    vmv.x.s a0, v8
-; RV32-ZVFHMIN-NEXT:    fmv.w.x fa0, a0
-; RV32-ZVFHMIN-NEXT:    call __extendhfsf2
-; RV32-ZVFHMIN-NEXT:    fabs.s fa5, fa0
-; RV32-ZVFHMIN-NEXT:    flt.s a0, fa5, fs0
-; RV32-ZVFHMIN-NEXT:    beqz a0, .LBB128_8
-; RV32-ZVFHMIN-NEXT:  # %bb.7:
-; RV32-ZVFHMIN-NEXT:    fcvt.w.s a0, fa0, rmm
-; RV32-ZVFHMIN-NEXT:    fcvt.s.w fa5, a0, rmm
-; RV32-ZVFHMIN-NEXT:    fsgnj.s fa0, fa5, fa0
-; RV32-ZVFHMIN-NEXT:  .LBB128_8:
-; RV32-ZVFHMIN-NEXT:    call __truncsfhf2
-; RV32-ZVFHMIN-NEXT:    fmv.x.w a0, fa0
-; RV32-ZVFHMIN-NEXT:    addi a1, sp, 16
-; RV32-ZVFHMIN-NEXT:    vl1r.v v8, (a1) # Unknown-size Folded Reload
-; RV32-ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV32-ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a0
-; RV32-ZVFHMIN-NEXT:    addi a0, sp, 16
-; RV32-ZVFHMIN-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
-; RV32-ZVFHMIN-NEXT:    csrr a0, vlenb
-; RV32-ZVFHMIN-NEXT:    add a0, sp, a0
-; RV32-ZVFHMIN-NEXT:    addi a0, a0, 16
-; RV32-ZVFHMIN-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
-; RV32-ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 4
-; RV32-ZVFHMIN-NEXT:    vmv.x.s a0, v8
-; RV32-ZVFHMIN-NEXT:    fmv.w.x fa0, a0
-; RV32-ZVFHMIN-NEXT:    call __extendhfsf2
-; RV32-ZVFHMIN-NEXT:    fabs.s fa5, fa0
-; RV32-ZVFHMIN-NEXT:    flt.s a0, fa5, fs0
-; RV32-ZVFHMIN-NEXT:    beqz a0, .LBB128_10
-; RV32-ZVFHMIN-NEXT:  # %bb.9:
-; RV32-ZVFHMIN-NEXT:    fcvt.w.s a0, fa0, rmm
-; RV32-ZVFHMIN-NEXT:    fcvt.s.w fa5, a0, rmm
-; RV32-ZVFHMIN-NEXT:    fsgnj.s fa0, fa5, fa0
-; RV32-ZVFHMIN-NEXT:  .LBB128_10:
-; RV32-ZVFHMIN-NEXT:    call __truncsfhf2
-; RV32-ZVFHMIN-NEXT:    fmv.x.w a0, fa0
-; RV32-ZVFHMIN-NEXT:    addi a1, sp, 16
-; RV32-ZVFHMIN-NEXT:    vl1r.v v8, (a1) # Unknown-size Folded Reload
-; RV32-ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV32-ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a0
-; RV32-ZVFHMIN-NEXT:    addi a0, sp, 16
-; RV32-ZVFHMIN-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
-; RV32-ZVFHMIN-NEXT:    csrr a0, vlenb
-; RV32-ZVFHMIN-NEXT:    add a0, sp, a0
-; RV32-ZVFHMIN-NEXT:    addi a0, a0, 16
-; RV32-ZVFHMIN-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
-; RV32-ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 5
-; RV32-ZVFHMIN-NEXT:    vmv.x.s a0, v8
-; RV32-ZVFHMIN-NEXT:    fmv.w.x fa0, a0
-; RV32-ZVFHMIN-NEXT:    call __extendhfsf2
-; RV32-ZVFHMIN-NEXT:    fabs.s fa5, fa0
-; RV32-ZVFHMIN-NEXT:    flt.s a0, fa5, fs0
-; RV32-ZVFHMIN-NEXT:    beqz a0, .LBB128_12
-; RV32-ZVFHMIN-NEXT:  # %bb.11:
-; RV32-ZVFHMIN-NEXT:    fcvt.w.s a0, fa0, rmm
-; RV32-ZVFHMIN-NEXT:    fcvt.s.w fa5, a0, rmm
-; RV32-ZVFHMIN-NEXT:    fsgnj.s fa0, fa5, fa0
-; RV32-ZVFHMIN-NEXT:  .LBB128_12:
-; RV32-ZVFHMIN-NEXT:    call __truncsfhf2
-; RV32-ZVFHMIN-NEXT:    fmv.x.w a0, fa0
-; RV32-ZVFHMIN-NEXT:    addi a1, sp, 16
-; RV32-ZVFHMIN-NEXT:    vl1r.v v8, (a1) # Unknown-size Folded Reload
-; RV32-ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV32-ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a0
-; RV32-ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; RV32-ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 2
-; RV32-ZVFHMIN-NEXT:    vse16.v v8, (s0)
-; RV32-ZVFHMIN-NEXT:    csrr a0, vlenb
-; RV32-ZVFHMIN-NEXT:    slli a0, a0, 1
-; RV32-ZVFHMIN-NEXT:    add sp, sp, a0
-; RV32-ZVFHMIN-NEXT:    lw ra, 44(sp) # 4-byte Folded Reload
-; RV32-ZVFHMIN-NEXT:    lw s0, 40(sp) # 4-byte Folded Reload
-; RV32-ZVFHMIN-NEXT:    lw s1, 36(sp) # 4-byte Folded Reload
-; RV32-ZVFHMIN-NEXT:    fld fs0, 24(sp) # 8-byte Folded Reload
-; RV32-ZVFHMIN-NEXT:    addi sp, sp, 48
-; RV32-ZVFHMIN-NEXT:    ret
-;
-; RV64-ZVFHMIN-LABEL: round_v6f16:
-; RV64-ZVFHMIN:       # %bb.0:
-; RV64-ZVFHMIN-NEXT:    addi sp, sp, -48
-; RV64-ZVFHMIN-NEXT:    .cfi_def_cfa_offset 48
-; RV64-ZVFHMIN-NEXT:    sd ra, 40(sp) # 8-byte Folded Spill
-; RV64-ZVFHMIN-NEXT:    sd s0, 32(sp) # 8-byte Folded Spill
-; RV64-ZVFHMIN-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
-; RV64-ZVFHMIN-NEXT:    fsd fs0, 16(sp) # 8-byte Folded Spill
-; RV64-ZVFHMIN-NEXT:    .cfi_offset ra, -8
-; RV64-ZVFHMIN-NEXT:    .cfi_offset s0, -16
-; RV64-ZVFHMIN-NEXT:    .cfi_offset s1, -24
-; RV64-ZVFHMIN-NEXT:    .cfi_offset fs0, -32
-; RV64-ZVFHMIN-NEXT:    csrr a1, vlenb
-; RV64-ZVFHMIN-NEXT:    slli a1, a1, 1
-; RV64-ZVFHMIN-NEXT:    sub sp, sp, a1
-; RV64-ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 2 * vlenb
-; RV64-ZVFHMIN-NEXT:    mv s0, a0
-; RV64-ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; RV64-ZVFHMIN-NEXT:    vle16.v v8, (a0)
-; RV64-ZVFHMIN-NEXT:    csrr a0, vlenb
-; RV64-ZVFHMIN-NEXT:    add a0, sp, a0
-; RV64-ZVFHMIN-NEXT:    addi a0, a0, 16
-; RV64-ZVFHMIN-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
-; RV64-ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 1
-; RV64-ZVFHMIN-NEXT:    vmv.x.s a0, v8
-; RV64-ZVFHMIN-NEXT:    fmv.w.x fa0, a0
-; RV64-ZVFHMIN-NEXT:    call __extendhfsf2
-; RV64-ZVFHMIN-NEXT:    lui a0, 307200
-; RV64-ZVFHMIN-NEXT:    fmv.w.x fs0, a0
-; RV64-ZVFHMIN-NEXT:    fabs.s fa5, fa0
-; RV64-ZVFHMIN-NEXT:    flt.s a0, fa5, fs0
-; RV64-ZVFHMIN-NEXT:    beqz a0, .LBB128_2
-; RV64-ZVFHMIN-NEXT:  # %bb.1:
-; RV64-ZVFHMIN-NEXT:    fcvt.w.s a0, fa0, rmm
-; RV64-ZVFHMIN-NEXT:    fcvt.s.w fa5, a0, rmm
-; RV64-ZVFHMIN-NEXT:    fsgnj.s fa0, fa5, fa0
-; RV64-ZVFHMIN-NEXT:  .LBB128_2:
-; RV64-ZVFHMIN-NEXT:    call __truncsfhf2
-; RV64-ZVFHMIN-NEXT:    fmv.x.w s1, fa0
-; RV64-ZVFHMIN-NEXT:    csrr a0, vlenb
-; RV64-ZVFHMIN-NEXT:    add a0, sp, a0
-; RV64-ZVFHMIN-NEXT:    lh a0, 16(a0) # 8-byte Folded Reload
-; RV64-ZVFHMIN-NEXT:    fmv.w.x fa0, a0
-; RV64-ZVFHMIN-NEXT:    call __extendhfsf2
-; RV64-ZVFHMIN-NEXT:    fabs.s fa5, fa0
-; RV64-ZVFHMIN-NEXT:    flt.s a0, fa5, fs0
-; RV64-ZVFHMIN-NEXT:    beqz a0, .LBB128_4
-; RV64-ZVFHMIN-NEXT:  # %bb.3:
-; RV64-ZVFHMIN-NEXT:    fcvt.w.s a0, fa0, rmm
-; RV64-ZVFHMIN-NEXT:    fcvt.s.w fa5, a0, rmm
-; RV64-ZVFHMIN-NEXT:    fsgnj.s fa0, fa5, fa0
-; RV64-ZVFHMIN-NEXT:  .LBB128_4:
-; RV64-ZVFHMIN-NEXT:    call __truncsfhf2
-; RV64-ZVFHMIN-NEXT:    fmv.x.w a0, fa0
-; RV64-ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV64-ZVFHMIN-NEXT:    vmv.v.x v8, a0
-; RV64-ZVFHMIN-NEXT:    vslide1down.vx v8, v8, s1
-; RV64-ZVFHMIN-NEXT:    addi a0, sp, 16
-; RV64-ZVFHMIN-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
-; RV64-ZVFHMIN-NEXT:    csrr a0, vlenb
-; RV64-ZVFHMIN-NEXT:    add a0, sp, a0
-; RV64-ZVFHMIN-NEXT:    addi a0, a0, 16
-; RV64-ZVFHMIN-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
-; RV64-ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 2
-; RV64-ZVFHMIN-NEXT:    vmv.x.s a0, v8
-; RV64-ZVFHMIN-NEXT:    fmv.w.x fa0, a0
-; RV64-ZVFHMIN-NEXT:    call __extendhfsf2
-; RV64-ZVFHMIN-NEXT:    fabs.s fa5, fa0
-; RV64-ZVFHMIN-NEXT:    flt.s a0, fa5, fs0
-; RV64-ZVFHMIN-NEXT:    beqz a0, .LBB128_6
-; RV64-ZVFHMIN-NEXT:  # %bb.5:
-; RV64-ZVFHMIN-NEXT:    fcvt.w.s a0, fa0, rmm
-; RV64-ZVFHMIN-NEXT:    fcvt.s.w fa5, a0, rmm
-; RV64-ZVFHMIN-NEXT:    fsgnj.s fa0, fa5, fa0
-; RV64-ZVFHMIN-NEXT:  .LBB128_6:
-; RV64-ZVFHMIN-NEXT:    call __truncsfhf2
-; RV64-ZVFHMIN-NEXT:    fmv.x.w a0, fa0
-; RV64-ZVFHMIN-NEXT:    addi a1, sp, 16
-; RV64-ZVFHMIN-NEXT:    vl1r.v v8, (a1) # Unknown-size Folded Reload
-; RV64-ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV64-ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a0
-; RV64-ZVFHMIN-NEXT:    addi a0, sp, 16
-; RV64-ZVFHMIN-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
-; RV64-ZVFHMIN-NEXT:    csrr a0, vlenb
-; RV64-ZVFHMIN-NEXT:    add a0, sp, a0
-; RV64-ZVFHMIN-NEXT:    addi a0, a0, 16
-; RV64-ZVFHMIN-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
-; RV64-ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 3
-; RV64-ZVFHMIN-NEXT:    vmv.x.s a0, v8
-; RV64-ZVFHMIN-NEXT:    fmv.w.x fa0, a0
-; RV64-ZVFHMIN-NEXT:    call __extendhfsf2
-; RV64-ZVFHMIN-NEXT:    fabs.s fa5, fa0
-; RV64-ZVFHMIN-NEXT:    flt.s a0, fa5, fs0
-; RV64-ZVFHMIN-NEXT:    beqz a0, .LBB128_8
-; RV64-ZVFHMIN-NEXT:  # %bb.7:
-; RV64-ZVFHMIN-NEXT:    fcvt.w.s a0, fa0, rmm
-; RV64-ZVFHMIN-NEXT:    fcvt.s.w fa5, a0, rmm
-; RV64-ZVFHMIN-NEXT:    fsgnj.s fa0, fa5, fa0
-; RV64-ZVFHMIN-NEXT:  .LBB128_8:
-; RV64-ZVFHMIN-NEXT:    call __truncsfhf2
-; RV64-ZVFHMIN-NEXT:    fmv.x.w a0, fa0
-; RV64-ZVFHMIN-NEXT:    addi a1, sp, 16
-; RV64-ZVFHMIN-NEXT:    vl1r.v v8, (a1) # Unknown-size Folded Reload
-; RV64-ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV64-ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a0
-; RV64-ZVFHMIN-NEXT:    addi a0, sp, 16
-; RV64-ZVFHMIN-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
-; RV64-ZVFHMIN-NEXT:    csrr a0, vlenb
-; RV64-ZVFHMIN-NEXT:    add a0, sp, a0
-; RV64-ZVFHMIN-NEXT:    addi a0, a0, 16
-; RV64-ZVFHMIN-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
-; RV64-ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 4
-; RV64-ZVFHMIN-NEXT:    vmv.x.s a0, v8
-; RV64-ZVFHMIN-NEXT:    fmv.w.x fa0, a0
-; RV64-ZVFHMIN-NEXT:    call __extendhfsf2
-; RV64-ZVFHMIN-NEXT:    fabs.s fa5, fa0
-; RV64-ZVFHMIN-NEXT:    flt.s a0, fa5, fs0
-; RV64-ZVFHMIN-NEXT:    beqz a0, .LBB128_10
-; RV64-ZVFHMIN-NEXT:  # %bb.9:
-; RV64-ZVFHMIN-NEXT:    fcvt.w.s a0, fa0, rmm
-; RV64-ZVFHMIN-NEXT:    fcvt.s.w fa5, a0, rmm
-; RV64-ZVFHMIN-NEXT:    fsgnj.s fa0, fa5, fa0
-; RV64-ZVFHMIN-NEXT:  .LBB128_10:
-; RV64-ZVFHMIN-NEXT:    call __truncsfhf2
-; RV64-ZVFHMIN-NEXT:    fmv.x.w a0, fa0
-; RV64-ZVFHMIN-NEXT:    addi a1, sp, 16
-; RV64-ZVFHMIN-NEXT:    vl1r.v v8, (a1) # Unknown-size Folded Reload
-; RV64-ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV64-ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a0
-; RV64-ZVFHMIN-NEXT:    addi a0, sp, 16
-; RV64-ZVFHMIN-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
-; RV64-ZVFHMIN-NEXT:    csrr a0, vlenb
-; RV64-ZVFHMIN-NEXT:    add a0, sp, a0
-; RV64-ZVFHMIN-NEXT:    addi a0, a0, 16
-; RV64-ZVFHMIN-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
-; RV64-ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 5
-; RV64-ZVFHMIN-NEXT:    vmv.x.s a0, v8
-; RV64-ZVFHMIN-NEXT:    fmv.w.x fa0, a0
-; RV64-ZVFHMIN-NEXT:    call __extendhfsf2
-; RV64-ZVFHMIN-NEXT:    fabs.s fa5, fa0
-; RV64-ZVFHMIN-NEXT:    flt.s a0, fa5, fs0
-; RV64-ZVFHMIN-NEXT:    beqz a0, .LBB128_12
-; RV64-ZVFHMIN-NEXT:  # %bb.11:
-; RV64-ZVFHMIN-NEXT:    fcvt.w.s a0, fa0, rmm
-; RV64-ZVFHMIN-NEXT:    fcvt.s.w fa5, a0, rmm
-; RV64-ZVFHMIN-NEXT:    fsgnj.s fa0, fa5, fa0
-; RV64-ZVFHMIN-NEXT:  .LBB128_12:
-; RV64-ZVFHMIN-NEXT:    call __truncsfhf2
-; RV64-ZVFHMIN-NEXT:    fmv.x.w a0, fa0
-; RV64-ZVFHMIN-NEXT:    addi a1, sp, 16
-; RV64-ZVFHMIN-NEXT:    vl1r.v v8, (a1) # Unknown-size Folded Reload
-; RV64-ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV64-ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a0
-; RV64-ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; RV64-ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 2
-; RV64-ZVFHMIN-NEXT:    vse16.v v8, (s0)
-; RV64-ZVFHMIN-NEXT:    csrr a0, vlenb
-; RV64-ZVFHMIN-NEXT:    slli a0, a0, 1
-; RV64-ZVFHMIN-NEXT:    add sp, sp, a0
-; RV64-ZVFHMIN-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
-; RV64-ZVFHMIN-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
-; RV64-ZVFHMIN-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64-ZVFHMIN-NEXT:    fld fs0, 16(sp) # 8-byte Folded Reload
-; RV64-ZVFHMIN-NEXT:    addi sp, sp, 48
-; RV64-ZVFHMIN-NEXT:    ret
+; ZVFHMIN-LABEL: round_v6f16:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT:    vfabs.v v8, v10
+; ZVFHMIN-NEXT:    lui a1, 307200
+; ZVFHMIN-NEXT:    fmv.w.x fa5, a1
+; ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5
+; ZVFHMIN-NEXT:    fsrmi a1, 4
+; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v10, v0.t
+; ZVFHMIN-NEXT:    fsrm a1
+; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
+; ZVFHMIN-NEXT:    vfsgnj.vv v10, v8, v10, v0.t
+; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vse16.v v8, (a0)
+; ZVFHMIN-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = call <6 x half> @llvm.round.v6f16(<6 x half> %a)
   store <6 x half> %b, ptr %x



More information about the llvm-commits mailing list