[llvm] [RISCV] Don't promote f16 FNEG/FABS with Zfhmin/Zhinxmin. (PR #106474)

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Thu Aug 29 08:16:42 PDT 2024


https://github.com/topperc updated https://github.com/llvm/llvm-project/pull/106474

>From d479d403a807ff83a027745bb09e855d078cee9e Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Wed, 28 Aug 2024 16:54:35 -0700
Subject: [PATCH 1/2] [RISCV] Don't promote f16 FNEG/FABS with Zfhmin/Zhinxmin.

fneg/fabs are not supposed to canonicalize nans. Promoting to f32 will
go through an fp_extend which will canonicalize.

We need to use integer bit manip to clear the bit instead.

Unfortunately, this is going through the stack due to i16 not being
a legal type. Fixing that will require custom legalization or some
other generic SelectionDAG change.
---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp   |   5 +-
 llvm/test/CodeGen/RISCV/bfloat-arith.ll       | 603 +++++++++++----
 llvm/test/CodeGen/RISCV/half-arith-strict.ll  | 631 ++++++++++-----
 llvm/test/CodeGen/RISCV/half-arith.ll         | 724 ++++++++++--------
 .../RISCV/half-bitmanip-dagcombines.ll        |  70 +-
 llvm/test/CodeGen/RISCV/half-intrinsics.ll    |  27 +-
 .../CodeGen/RISCV/rvv/fixed-vectors-fp.ll     | 189 ++++-
 7 files changed, 1533 insertions(+), 716 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 790107b772fcb3..15ce730e23dd3b 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -395,7 +395,6 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
       ISD::FADD,          ISD::FSUB,
       ISD::FMUL,          ISD::FMA,
       ISD::FDIV,          ISD::FSQRT,
-      ISD::FABS,          ISD::FNEG,
       ISD::STRICT_FMA,    ISD::STRICT_FADD,
       ISD::STRICT_FSUB,   ISD::STRICT_FMUL,
       ISD::STRICT_FDIV,   ISD::STRICT_FSQRT,
@@ -416,6 +415,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::BR_CC, MVT::bf16, Expand);
     setOperationAction(ZfhminZfbfminPromoteOps, MVT::bf16, Promote);
     setOperationAction(ISD::FREM, MVT::bf16, Promote);
+    setOperationAction(ISD::FABS, MVT::bf16, Expand);
+    setOperationAction(ISD::FNEG, MVT::bf16, Expand);
     // FIXME: Need to promote bf16 FCOPYSIGN to f32, but the
     // DAGCombiner::visitFP_ROUND probably needs improvements first.
     setOperationAction(ISD::FCOPYSIGN, MVT::bf16, Expand);
@@ -433,6 +434,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
       setOperationAction({ISD::STRICT_LRINT, ISD::STRICT_LLRINT,
                           ISD::STRICT_LROUND, ISD::STRICT_LLROUND},
                          MVT::f16, Legal);
+      setOperationAction(ISD::FABS, MVT::f16, Expand);
+      setOperationAction(ISD::FNEG, MVT::f16, Expand);
       // FIXME: Need to promote f16 FCOPYSIGN to f32, but the
       // DAGCombiner::visitFP_ROUND probably needs improvements first.
       setOperationAction(ISD::FCOPYSIGN, MVT::f16, Expand);
diff --git a/llvm/test/CodeGen/RISCV/bfloat-arith.ll b/llvm/test/CodeGen/RISCV/bfloat-arith.ll
index 632e933c595671..56a30dd0f6ffee 100644
--- a/llvm/test/CodeGen/RISCV/bfloat-arith.ll
+++ b/llvm/test/CodeGen/RISCV/bfloat-arith.ll
@@ -105,17 +105,39 @@ define bfloat @fsgnj_s(bfloat %a, bfloat %b) nounwind {
 }
 
 define i32 @fneg_s(bfloat %a, bfloat %b) nounwind {
-; CHECK-LABEL: fneg_s:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
-; CHECK-NEXT:    fadd.s fa5, fa5, fa5
-; CHECK-NEXT:    fcvt.bf16.s fa5, fa5
-; CHECK-NEXT:    fcvt.s.bf16 fa5, fa5
-; CHECK-NEXT:    fneg.s fa4, fa5
-; CHECK-NEXT:    fcvt.bf16.s fa4, fa4
-; CHECK-NEXT:    fcvt.s.bf16 fa4, fa4
-; CHECK-NEXT:    feq.s a0, fa5, fa4
-; CHECK-NEXT:    ret
+; RV32IZFBFMIN-LABEL: fneg_s:
+; RV32IZFBFMIN:       # %bb.0:
+; RV32IZFBFMIN-NEXT:    addi sp, sp, -16
+; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa0
+; RV32IZFBFMIN-NEXT:    fadd.s fa5, fa5, fa5
+; RV32IZFBFMIN-NEXT:    fcvt.bf16.s fa5, fa5
+; RV32IZFBFMIN-NEXT:    fsh fa5, 12(sp)
+; RV32IZFBFMIN-NEXT:    lbu a0, 13(sp)
+; RV32IZFBFMIN-NEXT:    xori a0, a0, 128
+; RV32IZFBFMIN-NEXT:    sb a0, 13(sp)
+; RV32IZFBFMIN-NEXT:    flh fa4, 12(sp)
+; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa5
+; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32IZFBFMIN-NEXT:    feq.s a0, fa5, fa4
+; RV32IZFBFMIN-NEXT:    addi sp, sp, 16
+; RV32IZFBFMIN-NEXT:    ret
+;
+; RV64IZFBFMIN-LABEL: fneg_s:
+; RV64IZFBFMIN:       # %bb.0:
+; RV64IZFBFMIN-NEXT:    addi sp, sp, -16
+; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa0
+; RV64IZFBFMIN-NEXT:    fadd.s fa5, fa5, fa5
+; RV64IZFBFMIN-NEXT:    fcvt.bf16.s fa5, fa5
+; RV64IZFBFMIN-NEXT:    fsh fa5, 8(sp)
+; RV64IZFBFMIN-NEXT:    lbu a0, 9(sp)
+; RV64IZFBFMIN-NEXT:    xori a0, a0, 128
+; RV64IZFBFMIN-NEXT:    sb a0, 9(sp)
+; RV64IZFBFMIN-NEXT:    flh fa4, 8(sp)
+; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa5
+; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64IZFBFMIN-NEXT:    feq.s a0, fa5, fa4
+; RV64IZFBFMIN-NEXT:    addi sp, sp, 16
+; RV64IZFBFMIN-NEXT:    ret
   %1 = fadd bfloat %a, %a
   %2 = fneg bfloat %1
   %3 = fcmp oeq bfloat %1, %2
@@ -131,9 +153,11 @@ define bfloat @fsgnjn_s(bfloat %a, bfloat %b) nounwind {
 ; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa4, fa0
 ; RV32IZFBFMIN-NEXT:    fadd.s fa5, fa4, fa5
 ; RV32IZFBFMIN-NEXT:    fcvt.bf16.s fa5, fa5
-; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa5
-; RV32IZFBFMIN-NEXT:    fneg.s fa5, fa5
-; RV32IZFBFMIN-NEXT:    fcvt.bf16.s fa5, fa5
+; RV32IZFBFMIN-NEXT:    fsh fa5, 4(sp)
+; RV32IZFBFMIN-NEXT:    lbu a0, 5(sp)
+; RV32IZFBFMIN-NEXT:    xori a0, a0, 128
+; RV32IZFBFMIN-NEXT:    sb a0, 5(sp)
+; RV32IZFBFMIN-NEXT:    flh fa5, 4(sp)
 ; RV32IZFBFMIN-NEXT:    fsh fa0, 8(sp)
 ; RV32IZFBFMIN-NEXT:    fsh fa5, 12(sp)
 ; RV32IZFBFMIN-NEXT:    lbu a0, 9(sp)
@@ -148,24 +172,26 @@ define bfloat @fsgnjn_s(bfloat %a, bfloat %b) nounwind {
 ;
 ; RV64IZFBFMIN-LABEL: fsgnjn_s:
 ; RV64IZFBFMIN:       # %bb.0:
-; RV64IZFBFMIN-NEXT:    addi sp, sp, -16
+; RV64IZFBFMIN-NEXT:    addi sp, sp, -32
 ; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa1
 ; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa4, fa0
 ; RV64IZFBFMIN-NEXT:    fadd.s fa5, fa4, fa5
 ; RV64IZFBFMIN-NEXT:    fcvt.bf16.s fa5, fa5
-; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa5
-; RV64IZFBFMIN-NEXT:    fneg.s fa5, fa5
-; RV64IZFBFMIN-NEXT:    fcvt.bf16.s fa5, fa5
-; RV64IZFBFMIN-NEXT:    fsh fa0, 0(sp)
 ; RV64IZFBFMIN-NEXT:    fsh fa5, 8(sp)
-; RV64IZFBFMIN-NEXT:    lbu a0, 1(sp)
-; RV64IZFBFMIN-NEXT:    lbu a1, 9(sp)
+; RV64IZFBFMIN-NEXT:    lbu a0, 9(sp)
+; RV64IZFBFMIN-NEXT:    xori a0, a0, 128
+; RV64IZFBFMIN-NEXT:    sb a0, 9(sp)
+; RV64IZFBFMIN-NEXT:    flh fa5, 8(sp)
+; RV64IZFBFMIN-NEXT:    fsh fa0, 16(sp)
+; RV64IZFBFMIN-NEXT:    fsh fa5, 24(sp)
+; RV64IZFBFMIN-NEXT:    lbu a0, 17(sp)
+; RV64IZFBFMIN-NEXT:    lbu a1, 25(sp)
 ; RV64IZFBFMIN-NEXT:    andi a0, a0, 127
 ; RV64IZFBFMIN-NEXT:    andi a1, a1, 128
 ; RV64IZFBFMIN-NEXT:    or a0, a0, a1
-; RV64IZFBFMIN-NEXT:    sb a0, 1(sp)
-; RV64IZFBFMIN-NEXT:    flh fa0, 0(sp)
-; RV64IZFBFMIN-NEXT:    addi sp, sp, 16
+; RV64IZFBFMIN-NEXT:    sb a0, 17(sp)
+; RV64IZFBFMIN-NEXT:    flh fa0, 16(sp)
+; RV64IZFBFMIN-NEXT:    addi sp, sp, 32
 ; RV64IZFBFMIN-NEXT:    ret
   %1 = fadd bfloat %a, %b
   %2 = fneg bfloat %1
@@ -176,19 +202,43 @@ define bfloat @fsgnjn_s(bfloat %a, bfloat %b) nounwind {
 declare bfloat @llvm.fabs.bf16(bfloat)
 
 define bfloat @fabs_s(bfloat %a, bfloat %b) nounwind {
-; CHECK-LABEL: fabs_s:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    fcvt.s.bf16 fa5, fa1
-; CHECK-NEXT:    fcvt.s.bf16 fa4, fa0
-; CHECK-NEXT:    fadd.s fa5, fa4, fa5
-; CHECK-NEXT:    fcvt.bf16.s fa5, fa5
-; CHECK-NEXT:    fcvt.s.bf16 fa5, fa5
-; CHECK-NEXT:    fabs.s fa4, fa5
-; CHECK-NEXT:    fcvt.bf16.s fa4, fa4
-; CHECK-NEXT:    fcvt.s.bf16 fa4, fa4
-; CHECK-NEXT:    fadd.s fa5, fa4, fa5
-; CHECK-NEXT:    fcvt.bf16.s fa0, fa5
-; CHECK-NEXT:    ret
+; RV32IZFBFMIN-LABEL: fabs_s:
+; RV32IZFBFMIN:       # %bb.0:
+; RV32IZFBFMIN-NEXT:    addi sp, sp, -16
+; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa1
+; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa4, fa0
+; RV32IZFBFMIN-NEXT:    fadd.s fa5, fa4, fa5
+; RV32IZFBFMIN-NEXT:    fcvt.bf16.s fa5, fa5
+; RV32IZFBFMIN-NEXT:    fsh fa5, 12(sp)
+; RV32IZFBFMIN-NEXT:    lbu a0, 13(sp)
+; RV32IZFBFMIN-NEXT:    andi a0, a0, 127
+; RV32IZFBFMIN-NEXT:    sb a0, 13(sp)
+; RV32IZFBFMIN-NEXT:    flh fa4, 12(sp)
+; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa5
+; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32IZFBFMIN-NEXT:    fadd.s fa5, fa4, fa5
+; RV32IZFBFMIN-NEXT:    fcvt.bf16.s fa0, fa5
+; RV32IZFBFMIN-NEXT:    addi sp, sp, 16
+; RV32IZFBFMIN-NEXT:    ret
+;
+; RV64IZFBFMIN-LABEL: fabs_s:
+; RV64IZFBFMIN:       # %bb.0:
+; RV64IZFBFMIN-NEXT:    addi sp, sp, -16
+; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa1
+; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa4, fa0
+; RV64IZFBFMIN-NEXT:    fadd.s fa5, fa4, fa5
+; RV64IZFBFMIN-NEXT:    fcvt.bf16.s fa5, fa5
+; RV64IZFBFMIN-NEXT:    fsh fa5, 8(sp)
+; RV64IZFBFMIN-NEXT:    lbu a0, 9(sp)
+; RV64IZFBFMIN-NEXT:    andi a0, a0, 127
+; RV64IZFBFMIN-NEXT:    sb a0, 9(sp)
+; RV64IZFBFMIN-NEXT:    flh fa4, 8(sp)
+; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa5
+; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64IZFBFMIN-NEXT:    fadd.s fa5, fa4, fa5
+; RV64IZFBFMIN-NEXT:    fcvt.bf16.s fa0, fa5
+; RV64IZFBFMIN-NEXT:    addi sp, sp, 16
+; RV64IZFBFMIN-NEXT:    ret
   %1 = fadd bfloat %a, %b
   %2 = call bfloat @llvm.fabs.bf16(bfloat %1)
   %3 = fadd bfloat %2, %1
@@ -239,21 +289,45 @@ define bfloat @fmadd_s(bfloat %a, bfloat %b, bfloat %c) nounwind {
 }
 
 define bfloat @fmsub_s(bfloat %a, bfloat %b, bfloat %c) nounwind {
-; CHECK-LABEL: fmsub_s:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    fcvt.s.bf16 fa5, fa2
-; CHECK-NEXT:    fmv.w.x fa4, zero
-; CHECK-NEXT:    fadd.s fa5, fa5, fa4
-; CHECK-NEXT:    fcvt.bf16.s fa5, fa5
-; CHECK-NEXT:    fcvt.s.bf16 fa5, fa5
-; CHECK-NEXT:    fneg.s fa5, fa5
-; CHECK-NEXT:    fcvt.bf16.s fa5, fa5
-; CHECK-NEXT:    fcvt.s.bf16 fa5, fa5
-; CHECK-NEXT:    fcvt.s.bf16 fa4, fa1
-; CHECK-NEXT:    fcvt.s.bf16 fa3, fa0
-; CHECK-NEXT:    fmadd.s fa5, fa3, fa4, fa5
-; CHECK-NEXT:    fcvt.bf16.s fa0, fa5
-; CHECK-NEXT:    ret
+; RV32IZFBFMIN-LABEL: fmsub_s:
+; RV32IZFBFMIN:       # %bb.0:
+; RV32IZFBFMIN-NEXT:    addi sp, sp, -16
+; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa2
+; RV32IZFBFMIN-NEXT:    fmv.w.x fa4, zero
+; RV32IZFBFMIN-NEXT:    fadd.s fa5, fa5, fa4
+; RV32IZFBFMIN-NEXT:    fcvt.bf16.s fa5, fa5
+; RV32IZFBFMIN-NEXT:    fsh fa5, 12(sp)
+; RV32IZFBFMIN-NEXT:    lbu a0, 13(sp)
+; RV32IZFBFMIN-NEXT:    xori a0, a0, 128
+; RV32IZFBFMIN-NEXT:    sb a0, 13(sp)
+; RV32IZFBFMIN-NEXT:    flh fa5, 12(sp)
+; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa4, fa1
+; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa3, fa0
+; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa5
+; RV32IZFBFMIN-NEXT:    fmadd.s fa5, fa3, fa4, fa5
+; RV32IZFBFMIN-NEXT:    fcvt.bf16.s fa0, fa5
+; RV32IZFBFMIN-NEXT:    addi sp, sp, 16
+; RV32IZFBFMIN-NEXT:    ret
+;
+; RV64IZFBFMIN-LABEL: fmsub_s:
+; RV64IZFBFMIN:       # %bb.0:
+; RV64IZFBFMIN-NEXT:    addi sp, sp, -16
+; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa2
+; RV64IZFBFMIN-NEXT:    fmv.w.x fa4, zero
+; RV64IZFBFMIN-NEXT:    fadd.s fa5, fa5, fa4
+; RV64IZFBFMIN-NEXT:    fcvt.bf16.s fa5, fa5
+; RV64IZFBFMIN-NEXT:    fsh fa5, 8(sp)
+; RV64IZFBFMIN-NEXT:    lbu a0, 9(sp)
+; RV64IZFBFMIN-NEXT:    xori a0, a0, 128
+; RV64IZFBFMIN-NEXT:    sb a0, 9(sp)
+; RV64IZFBFMIN-NEXT:    flh fa5, 8(sp)
+; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa4, fa1
+; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa3, fa0
+; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa5
+; RV64IZFBFMIN-NEXT:    fmadd.s fa5, fa3, fa4, fa5
+; RV64IZFBFMIN-NEXT:    fcvt.bf16.s fa0, fa5
+; RV64IZFBFMIN-NEXT:    addi sp, sp, 16
+; RV64IZFBFMIN-NEXT:    ret
   %c_ = fadd bfloat 0.0, %c ; avoid negation using xor
   %negc = fsub bfloat -0.0, %c_
   %1 = call bfloat @llvm.fma.bf16(bfloat %a, bfloat %b, bfloat %negc)
@@ -261,27 +335,61 @@ define bfloat @fmsub_s(bfloat %a, bfloat %b, bfloat %c) nounwind {
 }
 
 define bfloat @fnmadd_s(bfloat %a, bfloat %b, bfloat %c) nounwind {
-; CHECK-LABEL: fnmadd_s:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
-; CHECK-NEXT:    fmv.w.x fa4, zero
-; CHECK-NEXT:    fadd.s fa5, fa5, fa4
-; CHECK-NEXT:    fcvt.bf16.s fa5, fa5
-; CHECK-NEXT:    fcvt.s.bf16 fa3, fa2
-; CHECK-NEXT:    fadd.s fa4, fa3, fa4
-; CHECK-NEXT:    fcvt.bf16.s fa4, fa4
-; CHECK-NEXT:    fcvt.s.bf16 fa5, fa5
-; CHECK-NEXT:    fneg.s fa5, fa5
-; CHECK-NEXT:    fcvt.bf16.s fa5, fa5
-; CHECK-NEXT:    fcvt.s.bf16 fa4, fa4
-; CHECK-NEXT:    fneg.s fa4, fa4
-; CHECK-NEXT:    fcvt.bf16.s fa4, fa4
-; CHECK-NEXT:    fcvt.s.bf16 fa4, fa4
-; CHECK-NEXT:    fcvt.s.bf16 fa5, fa5
-; CHECK-NEXT:    fcvt.s.bf16 fa3, fa1
-; CHECK-NEXT:    fmadd.s fa5, fa5, fa3, fa4
-; CHECK-NEXT:    fcvt.bf16.s fa0, fa5
-; CHECK-NEXT:    ret
+; RV32IZFBFMIN-LABEL: fnmadd_s:
+; RV32IZFBFMIN:       # %bb.0:
+; RV32IZFBFMIN-NEXT:    addi sp, sp, -16
+; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa0
+; RV32IZFBFMIN-NEXT:    fmv.w.x fa4, zero
+; RV32IZFBFMIN-NEXT:    fadd.s fa5, fa5, fa4
+; RV32IZFBFMIN-NEXT:    fcvt.bf16.s fa5, fa5
+; RV32IZFBFMIN-NEXT:    fsh fa5, 8(sp)
+; RV32IZFBFMIN-NEXT:    lbu a0, 9(sp)
+; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa2
+; RV32IZFBFMIN-NEXT:    fadd.s fa5, fa5, fa4
+; RV32IZFBFMIN-NEXT:    fcvt.bf16.s fa5, fa5
+; RV32IZFBFMIN-NEXT:    xori a0, a0, 128
+; RV32IZFBFMIN-NEXT:    sb a0, 9(sp)
+; RV32IZFBFMIN-NEXT:    flh fa4, 8(sp)
+; RV32IZFBFMIN-NEXT:    fsh fa5, 12(sp)
+; RV32IZFBFMIN-NEXT:    lbu a0, 13(sp)
+; RV32IZFBFMIN-NEXT:    xori a0, a0, 128
+; RV32IZFBFMIN-NEXT:    sb a0, 13(sp)
+; RV32IZFBFMIN-NEXT:    flh fa5, 12(sp)
+; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa3, fa1
+; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa5
+; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32IZFBFMIN-NEXT:    fmadd.s fa5, fa4, fa3, fa5
+; RV32IZFBFMIN-NEXT:    fcvt.bf16.s fa0, fa5
+; RV32IZFBFMIN-NEXT:    addi sp, sp, 16
+; RV32IZFBFMIN-NEXT:    ret
+;
+; RV64IZFBFMIN-LABEL: fnmadd_s:
+; RV64IZFBFMIN:       # %bb.0:
+; RV64IZFBFMIN-NEXT:    addi sp, sp, -16
+; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa0
+; RV64IZFBFMIN-NEXT:    fmv.w.x fa4, zero
+; RV64IZFBFMIN-NEXT:    fadd.s fa5, fa5, fa4
+; RV64IZFBFMIN-NEXT:    fcvt.bf16.s fa5, fa5
+; RV64IZFBFMIN-NEXT:    fsh fa5, 0(sp)
+; RV64IZFBFMIN-NEXT:    lbu a0, 1(sp)
+; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa2
+; RV64IZFBFMIN-NEXT:    fadd.s fa5, fa5, fa4
+; RV64IZFBFMIN-NEXT:    fcvt.bf16.s fa5, fa5
+; RV64IZFBFMIN-NEXT:    xori a0, a0, 128
+; RV64IZFBFMIN-NEXT:    sb a0, 1(sp)
+; RV64IZFBFMIN-NEXT:    flh fa4, 0(sp)
+; RV64IZFBFMIN-NEXT:    fsh fa5, 8(sp)
+; RV64IZFBFMIN-NEXT:    lbu a0, 9(sp)
+; RV64IZFBFMIN-NEXT:    xori a0, a0, 128
+; RV64IZFBFMIN-NEXT:    sb a0, 9(sp)
+; RV64IZFBFMIN-NEXT:    flh fa5, 8(sp)
+; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa3, fa1
+; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa5
+; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64IZFBFMIN-NEXT:    fmadd.s fa5, fa4, fa3, fa5
+; RV64IZFBFMIN-NEXT:    fcvt.bf16.s fa0, fa5
+; RV64IZFBFMIN-NEXT:    addi sp, sp, 16
+; RV64IZFBFMIN-NEXT:    ret
   %a_ = fadd bfloat 0.0, %a
   %c_ = fadd bfloat 0.0, %c
   %nega = fsub bfloat -0.0, %a_
@@ -291,27 +399,61 @@ define bfloat @fnmadd_s(bfloat %a, bfloat %b, bfloat %c) nounwind {
 }
 
 define bfloat @fnmadd_s_2(bfloat %a, bfloat %b, bfloat %c) nounwind {
-; CHECK-LABEL: fnmadd_s_2:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    fcvt.s.bf16 fa5, fa1
-; CHECK-NEXT:    fmv.w.x fa4, zero
-; CHECK-NEXT:    fadd.s fa5, fa5, fa4
-; CHECK-NEXT:    fcvt.bf16.s fa5, fa5
-; CHECK-NEXT:    fcvt.s.bf16 fa3, fa2
-; CHECK-NEXT:    fadd.s fa4, fa3, fa4
-; CHECK-NEXT:    fcvt.bf16.s fa4, fa4
-; CHECK-NEXT:    fcvt.s.bf16 fa5, fa5
-; CHECK-NEXT:    fneg.s fa5, fa5
-; CHECK-NEXT:    fcvt.bf16.s fa5, fa5
-; CHECK-NEXT:    fcvt.s.bf16 fa4, fa4
-; CHECK-NEXT:    fneg.s fa4, fa4
-; CHECK-NEXT:    fcvt.bf16.s fa4, fa4
-; CHECK-NEXT:    fcvt.s.bf16 fa4, fa4
-; CHECK-NEXT:    fcvt.s.bf16 fa5, fa5
-; CHECK-NEXT:    fcvt.s.bf16 fa3, fa0
-; CHECK-NEXT:    fmadd.s fa5, fa3, fa5, fa4
-; CHECK-NEXT:    fcvt.bf16.s fa0, fa5
-; CHECK-NEXT:    ret
+; RV32IZFBFMIN-LABEL: fnmadd_s_2:
+; RV32IZFBFMIN:       # %bb.0:
+; RV32IZFBFMIN-NEXT:    addi sp, sp, -16
+; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa1
+; RV32IZFBFMIN-NEXT:    fmv.w.x fa4, zero
+; RV32IZFBFMIN-NEXT:    fadd.s fa5, fa5, fa4
+; RV32IZFBFMIN-NEXT:    fcvt.bf16.s fa5, fa5
+; RV32IZFBFMIN-NEXT:    fsh fa5, 8(sp)
+; RV32IZFBFMIN-NEXT:    lbu a0, 9(sp)
+; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa2
+; RV32IZFBFMIN-NEXT:    fadd.s fa5, fa5, fa4
+; RV32IZFBFMIN-NEXT:    fcvt.bf16.s fa5, fa5
+; RV32IZFBFMIN-NEXT:    xori a0, a0, 128
+; RV32IZFBFMIN-NEXT:    sb a0, 9(sp)
+; RV32IZFBFMIN-NEXT:    flh fa4, 8(sp)
+; RV32IZFBFMIN-NEXT:    fsh fa5, 12(sp)
+; RV32IZFBFMIN-NEXT:    lbu a0, 13(sp)
+; RV32IZFBFMIN-NEXT:    xori a0, a0, 128
+; RV32IZFBFMIN-NEXT:    sb a0, 13(sp)
+; RV32IZFBFMIN-NEXT:    flh fa5, 12(sp)
+; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa3, fa0
+; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa5
+; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32IZFBFMIN-NEXT:    fmadd.s fa5, fa3, fa4, fa5
+; RV32IZFBFMIN-NEXT:    fcvt.bf16.s fa0, fa5
+; RV32IZFBFMIN-NEXT:    addi sp, sp, 16
+; RV32IZFBFMIN-NEXT:    ret
+;
+; RV64IZFBFMIN-LABEL: fnmadd_s_2:
+; RV64IZFBFMIN:       # %bb.0:
+; RV64IZFBFMIN-NEXT:    addi sp, sp, -16
+; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa1
+; RV64IZFBFMIN-NEXT:    fmv.w.x fa4, zero
+; RV64IZFBFMIN-NEXT:    fadd.s fa5, fa5, fa4
+; RV64IZFBFMIN-NEXT:    fcvt.bf16.s fa5, fa5
+; RV64IZFBFMIN-NEXT:    fsh fa5, 0(sp)
+; RV64IZFBFMIN-NEXT:    lbu a0, 1(sp)
+; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa2
+; RV64IZFBFMIN-NEXT:    fadd.s fa5, fa5, fa4
+; RV64IZFBFMIN-NEXT:    fcvt.bf16.s fa5, fa5
+; RV64IZFBFMIN-NEXT:    xori a0, a0, 128
+; RV64IZFBFMIN-NEXT:    sb a0, 1(sp)
+; RV64IZFBFMIN-NEXT:    flh fa4, 0(sp)
+; RV64IZFBFMIN-NEXT:    fsh fa5, 8(sp)
+; RV64IZFBFMIN-NEXT:    lbu a0, 9(sp)
+; RV64IZFBFMIN-NEXT:    xori a0, a0, 128
+; RV64IZFBFMIN-NEXT:    sb a0, 9(sp)
+; RV64IZFBFMIN-NEXT:    flh fa5, 8(sp)
+; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa3, fa0
+; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa5
+; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64IZFBFMIN-NEXT:    fmadd.s fa5, fa3, fa4, fa5
+; RV64IZFBFMIN-NEXT:    fcvt.bf16.s fa0, fa5
+; RV64IZFBFMIN-NEXT:    addi sp, sp, 16
+; RV64IZFBFMIN-NEXT:    ret
   %b_ = fadd bfloat 0.0, %b
   %c_ = fadd bfloat 0.0, %c
   %negb = fsub bfloat -0.0, %b_
@@ -321,17 +463,37 @@ define bfloat @fnmadd_s_2(bfloat %a, bfloat %b, bfloat %c) nounwind {
 }
 
 define bfloat @fnmadd_s_3(bfloat %a, bfloat %b, bfloat %c) nounwind {
-; CHECK-LABEL: fnmadd_s_3:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    fcvt.s.bf16 fa5, fa2
-; CHECK-NEXT:    fcvt.s.bf16 fa4, fa1
-; CHECK-NEXT:    fcvt.s.bf16 fa3, fa0
-; CHECK-NEXT:    fmadd.s fa5, fa3, fa4, fa5
-; CHECK-NEXT:    fcvt.bf16.s fa5, fa5
-; CHECK-NEXT:    fcvt.s.bf16 fa5, fa5
-; CHECK-NEXT:    fneg.s fa5, fa5
-; CHECK-NEXT:    fcvt.bf16.s fa0, fa5
-; CHECK-NEXT:    ret
+; RV32IZFBFMIN-LABEL: fnmadd_s_3:
+; RV32IZFBFMIN:       # %bb.0:
+; RV32IZFBFMIN-NEXT:    addi sp, sp, -16
+; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa2
+; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa4, fa1
+; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa3, fa0
+; RV32IZFBFMIN-NEXT:    fmadd.s fa5, fa3, fa4, fa5
+; RV32IZFBFMIN-NEXT:    fcvt.bf16.s fa5, fa5
+; RV32IZFBFMIN-NEXT:    fsh fa5, 12(sp)
+; RV32IZFBFMIN-NEXT:    lbu a0, 13(sp)
+; RV32IZFBFMIN-NEXT:    xori a0, a0, 128
+; RV32IZFBFMIN-NEXT:    sb a0, 13(sp)
+; RV32IZFBFMIN-NEXT:    flh fa0, 12(sp)
+; RV32IZFBFMIN-NEXT:    addi sp, sp, 16
+; RV32IZFBFMIN-NEXT:    ret
+;
+; RV64IZFBFMIN-LABEL: fnmadd_s_3:
+; RV64IZFBFMIN:       # %bb.0:
+; RV64IZFBFMIN-NEXT:    addi sp, sp, -16
+; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa2
+; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa4, fa1
+; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa3, fa0
+; RV64IZFBFMIN-NEXT:    fmadd.s fa5, fa3, fa4, fa5
+; RV64IZFBFMIN-NEXT:    fcvt.bf16.s fa5, fa5
+; RV64IZFBFMIN-NEXT:    fsh fa5, 8(sp)
+; RV64IZFBFMIN-NEXT:    lbu a0, 9(sp)
+; RV64IZFBFMIN-NEXT:    xori a0, a0, 128
+; RV64IZFBFMIN-NEXT:    sb a0, 9(sp)
+; RV64IZFBFMIN-NEXT:    flh fa0, 8(sp)
+; RV64IZFBFMIN-NEXT:    addi sp, sp, 16
+; RV64IZFBFMIN-NEXT:    ret
   %1 = call bfloat @llvm.fma.bf16(bfloat %a, bfloat %b, bfloat %c)
   %neg = fneg bfloat %1
   ret bfloat %neg
@@ -339,38 +501,82 @@ define bfloat @fnmadd_s_3(bfloat %a, bfloat %b, bfloat %c) nounwind {
 
 
 define bfloat @fnmadd_nsz(bfloat %a, bfloat %b, bfloat %c) nounwind {
-; CHECK-LABEL: fnmadd_nsz:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    fcvt.s.bf16 fa5, fa2
-; CHECK-NEXT:    fcvt.s.bf16 fa4, fa1
-; CHECK-NEXT:    fcvt.s.bf16 fa3, fa0
-; CHECK-NEXT:    fmadd.s fa5, fa3, fa4, fa5
-; CHECK-NEXT:    fcvt.bf16.s fa5, fa5
-; CHECK-NEXT:    fcvt.s.bf16 fa5, fa5
-; CHECK-NEXT:    fneg.s fa5, fa5
-; CHECK-NEXT:    fcvt.bf16.s fa0, fa5
-; CHECK-NEXT:    ret
+; RV32IZFBFMIN-LABEL: fnmadd_nsz:
+; RV32IZFBFMIN:       # %bb.0:
+; RV32IZFBFMIN-NEXT:    addi sp, sp, -16
+; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa2
+; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa4, fa1
+; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa3, fa0
+; RV32IZFBFMIN-NEXT:    fmadd.s fa5, fa3, fa4, fa5
+; RV32IZFBFMIN-NEXT:    fcvt.bf16.s fa5, fa5
+; RV32IZFBFMIN-NEXT:    fsh fa5, 12(sp)
+; RV32IZFBFMIN-NEXT:    lbu a0, 13(sp)
+; RV32IZFBFMIN-NEXT:    xori a0, a0, 128
+; RV32IZFBFMIN-NEXT:    sb a0, 13(sp)
+; RV32IZFBFMIN-NEXT:    flh fa0, 12(sp)
+; RV32IZFBFMIN-NEXT:    addi sp, sp, 16
+; RV32IZFBFMIN-NEXT:    ret
+;
+; RV64IZFBFMIN-LABEL: fnmadd_nsz:
+; RV64IZFBFMIN:       # %bb.0:
+; RV64IZFBFMIN-NEXT:    addi sp, sp, -16
+; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa2
+; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa4, fa1
+; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa3, fa0
+; RV64IZFBFMIN-NEXT:    fmadd.s fa5, fa3, fa4, fa5
+; RV64IZFBFMIN-NEXT:    fcvt.bf16.s fa5, fa5
+; RV64IZFBFMIN-NEXT:    fsh fa5, 8(sp)
+; RV64IZFBFMIN-NEXT:    lbu a0, 9(sp)
+; RV64IZFBFMIN-NEXT:    xori a0, a0, 128
+; RV64IZFBFMIN-NEXT:    sb a0, 9(sp)
+; RV64IZFBFMIN-NEXT:    flh fa0, 8(sp)
+; RV64IZFBFMIN-NEXT:    addi sp, sp, 16
+; RV64IZFBFMIN-NEXT:    ret
   %1 = call nsz bfloat @llvm.fma.bf16(bfloat %a, bfloat %b, bfloat %c)
   %neg = fneg nsz bfloat %1
   ret bfloat %neg
 }
 
 define bfloat @fnmsub_s(bfloat %a, bfloat %b, bfloat %c) nounwind {
-; CHECK-LABEL: fnmsub_s:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
-; CHECK-NEXT:    fmv.w.x fa4, zero
-; CHECK-NEXT:    fadd.s fa5, fa5, fa4
-; CHECK-NEXT:    fcvt.bf16.s fa5, fa5
-; CHECK-NEXT:    fcvt.s.bf16 fa5, fa5
-; CHECK-NEXT:    fneg.s fa5, fa5
-; CHECK-NEXT:    fcvt.bf16.s fa5, fa5
-; CHECK-NEXT:    fcvt.s.bf16 fa5, fa5
-; CHECK-NEXT:    fcvt.s.bf16 fa4, fa2
-; CHECK-NEXT:    fcvt.s.bf16 fa3, fa1
-; CHECK-NEXT:    fmadd.s fa5, fa5, fa3, fa4
-; CHECK-NEXT:    fcvt.bf16.s fa0, fa5
-; CHECK-NEXT:    ret
+; RV32IZFBFMIN-LABEL: fnmsub_s:
+; RV32IZFBFMIN:       # %bb.0:
+; RV32IZFBFMIN-NEXT:    addi sp, sp, -16
+; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa0
+; RV32IZFBFMIN-NEXT:    fmv.w.x fa4, zero
+; RV32IZFBFMIN-NEXT:    fadd.s fa5, fa5, fa4
+; RV32IZFBFMIN-NEXT:    fcvt.bf16.s fa5, fa5
+; RV32IZFBFMIN-NEXT:    fsh fa5, 12(sp)
+; RV32IZFBFMIN-NEXT:    lbu a0, 13(sp)
+; RV32IZFBFMIN-NEXT:    xori a0, a0, 128
+; RV32IZFBFMIN-NEXT:    sb a0, 13(sp)
+; RV32IZFBFMIN-NEXT:    flh fa5, 12(sp)
+; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa4, fa2
+; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa3, fa1
+; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa5
+; RV32IZFBFMIN-NEXT:    fmadd.s fa5, fa5, fa3, fa4
+; RV32IZFBFMIN-NEXT:    fcvt.bf16.s fa0, fa5
+; RV32IZFBFMIN-NEXT:    addi sp, sp, 16
+; RV32IZFBFMIN-NEXT:    ret
+;
+; RV64IZFBFMIN-LABEL: fnmsub_s:
+; RV64IZFBFMIN:       # %bb.0:
+; RV64IZFBFMIN-NEXT:    addi sp, sp, -16
+; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa0
+; RV64IZFBFMIN-NEXT:    fmv.w.x fa4, zero
+; RV64IZFBFMIN-NEXT:    fadd.s fa5, fa5, fa4
+; RV64IZFBFMIN-NEXT:    fcvt.bf16.s fa5, fa5
+; RV64IZFBFMIN-NEXT:    fsh fa5, 8(sp)
+; RV64IZFBFMIN-NEXT:    lbu a0, 9(sp)
+; RV64IZFBFMIN-NEXT:    xori a0, a0, 128
+; RV64IZFBFMIN-NEXT:    sb a0, 9(sp)
+; RV64IZFBFMIN-NEXT:    flh fa5, 8(sp)
+; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa4, fa2
+; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa3, fa1
+; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa5
+; RV64IZFBFMIN-NEXT:    fmadd.s fa5, fa5, fa3, fa4
+; RV64IZFBFMIN-NEXT:    fcvt.bf16.s fa0, fa5
+; RV64IZFBFMIN-NEXT:    addi sp, sp, 16
+; RV64IZFBFMIN-NEXT:    ret
   %a_ = fadd bfloat 0.0, %a
   %nega = fsub bfloat -0.0, %a_
   %1 = call bfloat @llvm.fma.bf16(bfloat %nega, bfloat %b, bfloat %c)
@@ -378,21 +584,45 @@ define bfloat @fnmsub_s(bfloat %a, bfloat %b, bfloat %c) nounwind {
 }
 
 define bfloat @fnmsub_s_2(bfloat %a, bfloat %b, bfloat %c) nounwind {
-; CHECK-LABEL: fnmsub_s_2:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    fcvt.s.bf16 fa5, fa1
-; CHECK-NEXT:    fmv.w.x fa4, zero
-; CHECK-NEXT:    fadd.s fa5, fa5, fa4
-; CHECK-NEXT:    fcvt.bf16.s fa5, fa5
-; CHECK-NEXT:    fcvt.s.bf16 fa5, fa5
-; CHECK-NEXT:    fneg.s fa5, fa5
-; CHECK-NEXT:    fcvt.bf16.s fa5, fa5
-; CHECK-NEXT:    fcvt.s.bf16 fa5, fa5
-; CHECK-NEXT:    fcvt.s.bf16 fa4, fa2
-; CHECK-NEXT:    fcvt.s.bf16 fa3, fa0
-; CHECK-NEXT:    fmadd.s fa5, fa3, fa5, fa4
-; CHECK-NEXT:    fcvt.bf16.s fa0, fa5
-; CHECK-NEXT:    ret
+; RV32IZFBFMIN-LABEL: fnmsub_s_2:
+; RV32IZFBFMIN:       # %bb.0:
+; RV32IZFBFMIN-NEXT:    addi sp, sp, -16
+; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa1
+; RV32IZFBFMIN-NEXT:    fmv.w.x fa4, zero
+; RV32IZFBFMIN-NEXT:    fadd.s fa5, fa5, fa4
+; RV32IZFBFMIN-NEXT:    fcvt.bf16.s fa5, fa5
+; RV32IZFBFMIN-NEXT:    fsh fa5, 12(sp)
+; RV32IZFBFMIN-NEXT:    lbu a0, 13(sp)
+; RV32IZFBFMIN-NEXT:    xori a0, a0, 128
+; RV32IZFBFMIN-NEXT:    sb a0, 13(sp)
+; RV32IZFBFMIN-NEXT:    flh fa5, 12(sp)
+; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa4, fa2
+; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa3, fa0
+; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa5
+; RV32IZFBFMIN-NEXT:    fmadd.s fa5, fa3, fa5, fa4
+; RV32IZFBFMIN-NEXT:    fcvt.bf16.s fa0, fa5
+; RV32IZFBFMIN-NEXT:    addi sp, sp, 16
+; RV32IZFBFMIN-NEXT:    ret
+;
+; RV64IZFBFMIN-LABEL: fnmsub_s_2:
+; RV64IZFBFMIN:       # %bb.0:
+; RV64IZFBFMIN-NEXT:    addi sp, sp, -16
+; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa1
+; RV64IZFBFMIN-NEXT:    fmv.w.x fa4, zero
+; RV64IZFBFMIN-NEXT:    fadd.s fa5, fa5, fa4
+; RV64IZFBFMIN-NEXT:    fcvt.bf16.s fa5, fa5
+; RV64IZFBFMIN-NEXT:    fsh fa5, 8(sp)
+; RV64IZFBFMIN-NEXT:    lbu a0, 9(sp)
+; RV64IZFBFMIN-NEXT:    xori a0, a0, 128
+; RV64IZFBFMIN-NEXT:    sb a0, 9(sp)
+; RV64IZFBFMIN-NEXT:    flh fa5, 8(sp)
+; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa4, fa2
+; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa3, fa0
+; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa5
+; RV64IZFBFMIN-NEXT:    fmadd.s fa5, fa3, fa5, fa4
+; RV64IZFBFMIN-NEXT:    fcvt.bf16.s fa0, fa5
+; RV64IZFBFMIN-NEXT:    addi sp, sp, 16
+; RV64IZFBFMIN-NEXT:    ret
   %b_ = fadd bfloat 0.0, %b
   %negb = fsub bfloat -0.0, %b_
   %1 = call bfloat @llvm.fma.bf16(bfloat %a, bfloat %negb, bfloat %c)
@@ -439,30 +669,63 @@ define bfloat @fmsub_s_contract(bfloat %a, bfloat %b, bfloat %c) nounwind {
 }
 
 define bfloat @fnmadd_s_contract(bfloat %a, bfloat %b, bfloat %c) nounwind {
-; CHECK-LABEL: fnmadd_s_contract:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
-; CHECK-NEXT:    fmv.w.x fa4, zero
-; CHECK-NEXT:    fadd.s fa5, fa5, fa4
-; CHECK-NEXT:    fcvt.bf16.s fa5, fa5
-; CHECK-NEXT:    fcvt.s.bf16 fa3, fa1
-; CHECK-NEXT:    fadd.s fa3, fa3, fa4
-; CHECK-NEXT:    fcvt.bf16.s fa3, fa3
-; CHECK-NEXT:    fcvt.s.bf16 fa2, fa2
-; CHECK-NEXT:    fadd.s fa4, fa2, fa4
-; CHECK-NEXT:    fcvt.bf16.s fa4, fa4
-; CHECK-NEXT:    fcvt.s.bf16 fa3, fa3
-; CHECK-NEXT:    fcvt.s.bf16 fa5, fa5
-; CHECK-NEXT:    fmul.s fa5, fa5, fa3
-; CHECK-NEXT:    fcvt.bf16.s fa5, fa5
-; CHECK-NEXT:    fcvt.s.bf16 fa5, fa5
-; CHECK-NEXT:    fneg.s fa5, fa5
-; CHECK-NEXT:    fcvt.bf16.s fa5, fa5
-; CHECK-NEXT:    fcvt.s.bf16 fa5, fa5
-; CHECK-NEXT:    fcvt.s.bf16 fa4, fa4
-; CHECK-NEXT:    fsub.s fa5, fa5, fa4
-; CHECK-NEXT:    fcvt.bf16.s fa0, fa5
-; CHECK-NEXT:    ret
+; RV32IZFBFMIN-LABEL: fnmadd_s_contract:
+; RV32IZFBFMIN:       # %bb.0:
+; RV32IZFBFMIN-NEXT:    addi sp, sp, -16
+; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa0
+; RV32IZFBFMIN-NEXT:    fmv.w.x fa4, zero
+; RV32IZFBFMIN-NEXT:    fadd.s fa5, fa5, fa4
+; RV32IZFBFMIN-NEXT:    fcvt.bf16.s fa5, fa5
+; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa3, fa1
+; RV32IZFBFMIN-NEXT:    fadd.s fa3, fa3, fa4
+; RV32IZFBFMIN-NEXT:    fcvt.bf16.s fa3, fa3
+; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa3, fa3
+; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa5
+; RV32IZFBFMIN-NEXT:    fmul.s fa5, fa5, fa3
+; RV32IZFBFMIN-NEXT:    fcvt.bf16.s fa5, fa5
+; RV32IZFBFMIN-NEXT:    fsh fa5, 12(sp)
+; RV32IZFBFMIN-NEXT:    lbu a0, 13(sp)
+; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa2
+; RV32IZFBFMIN-NEXT:    xori a0, a0, 128
+; RV32IZFBFMIN-NEXT:    sb a0, 13(sp)
+; RV32IZFBFMIN-NEXT:    flh fa3, 12(sp)
+; RV32IZFBFMIN-NEXT:    fadd.s fa5, fa5, fa4
+; RV32IZFBFMIN-NEXT:    fcvt.bf16.s fa5, fa5
+; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa5
+; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa4, fa3
+; RV32IZFBFMIN-NEXT:    fsub.s fa5, fa4, fa5
+; RV32IZFBFMIN-NEXT:    fcvt.bf16.s fa0, fa5
+; RV32IZFBFMIN-NEXT:    addi sp, sp, 16
+; RV32IZFBFMIN-NEXT:    ret
+;
+; RV64IZFBFMIN-LABEL: fnmadd_s_contract:
+; RV64IZFBFMIN:       # %bb.0:
+; RV64IZFBFMIN-NEXT:    addi sp, sp, -16
+; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa0
+; RV64IZFBFMIN-NEXT:    fmv.w.x fa4, zero
+; RV64IZFBFMIN-NEXT:    fadd.s fa5, fa5, fa4
+; RV64IZFBFMIN-NEXT:    fcvt.bf16.s fa5, fa5
+; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa3, fa1
+; RV64IZFBFMIN-NEXT:    fadd.s fa3, fa3, fa4
+; RV64IZFBFMIN-NEXT:    fcvt.bf16.s fa3, fa3
+; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa3, fa3
+; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa5
+; RV64IZFBFMIN-NEXT:    fmul.s fa5, fa5, fa3
+; RV64IZFBFMIN-NEXT:    fcvt.bf16.s fa5, fa5
+; RV64IZFBFMIN-NEXT:    fsh fa5, 8(sp)
+; RV64IZFBFMIN-NEXT:    lbu a0, 9(sp)
+; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa2
+; RV64IZFBFMIN-NEXT:    xori a0, a0, 128
+; RV64IZFBFMIN-NEXT:    sb a0, 9(sp)
+; RV64IZFBFMIN-NEXT:    flh fa3, 8(sp)
+; RV64IZFBFMIN-NEXT:    fadd.s fa5, fa5, fa4
+; RV64IZFBFMIN-NEXT:    fcvt.bf16.s fa5, fa5
+; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa5
+; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa4, fa3
+; RV64IZFBFMIN-NEXT:    fsub.s fa5, fa4, fa5
+; RV64IZFBFMIN-NEXT:    fcvt.bf16.s fa0, fa5
+; RV64IZFBFMIN-NEXT:    addi sp, sp, 16
+; RV64IZFBFMIN-NEXT:    ret
   %a_ = fadd bfloat 0.0, %a ; avoid negation using xor
   %b_ = fadd bfloat 0.0, %b ; avoid negation using xor
   %c_ = fadd bfloat 0.0, %c ; avoid negation using xor
diff --git a/llvm/test/CodeGen/RISCV/half-arith-strict.ll b/llvm/test/CodeGen/RISCV/half-arith-strict.ll
index 02cd91c7075940..4c7096f4045e2b 100644
--- a/llvm/test/CodeGen/RISCV/half-arith-strict.ll
+++ b/llvm/test/CodeGen/RISCV/half-arith-strict.ll
@@ -11,16 +11,16 @@
 ; RUN:   | FileCheck -check-prefix=CHECK-ZHINX %s
 ; RUN: llc -mtriple=riscv32 -mattr=+zfhmin -verify-machineinstrs \
 ; RUN:   -disable-strictnode-mutation -target-abi ilp32f < %s \
-; RUN:   | FileCheck -check-prefix=CHECK-ZFHMIN %s
+; RUN:   | FileCheck -check-prefixes=CHECK-ZFHMIN,CHECK-ZFHMIN-RV32 %s
 ; RUN: llc -mtriple=riscv64 -mattr=+zfhmin -verify-machineinstrs \
 ; RUN:   -disable-strictnode-mutation -target-abi lp64f < %s \
-; RUN:  | FileCheck -check-prefix=CHECK-ZFHMIN %s
+; RUN:  | FileCheck -check-prefixes=CHECK-ZFHMIN,CHECK-ZFHMIN-RV64 %s
 ; RUN: llc -mtriple=riscv32 -mattr=+zhinxmin -verify-machineinstrs \
 ; RUN:   -disable-strictnode-mutation -target-abi ilp32 < %s \
-; RUN:   | FileCheck -check-prefix=CHECK-ZHINXMIN %s
+; RUN:   | FileCheck -check-prefixes=CHECK-ZHINXMIN,CHECK-ZHINXMIN-RV32 %s
 ; RUN: llc -mtriple=riscv64 -mattr=+zhinxmin -verify-machineinstrs \
 ; RUN:   -disable-strictnode-mutation -target-abi lp64 < %s \
-; RUN:   | FileCheck -check-prefix=CHECK-ZHINXMIN %s
+; RUN:   | FileCheck -check-prefixes=CHECK-ZHINXMIN,CHECK-ZHINXMIN-RV64 %s
 
 ; FIXME: We can't test without Zfh because soft promote legalization isn't
 ; implemented in SelectionDAG for STRICT nodes.
@@ -239,36 +239,83 @@ define half @fmsub_h(half %a, half %b, half %c) nounwind strictfp {
 ; CHECK-ZHINX-NEXT:    fmsub.h a0, a0, a1, a2
 ; CHECK-ZHINX-NEXT:    ret
 ;
-; CHECK-ZFHMIN-LABEL: fmsub_h:
-; CHECK-ZFHMIN:       # %bb.0:
-; CHECK-ZFHMIN-NEXT:    fcvt.s.h fa5, fa2
-; CHECK-ZFHMIN-NEXT:    fmv.w.x fa4, zero
-; CHECK-ZFHMIN-NEXT:    fadd.s fa5, fa5, fa4
-; CHECK-ZFHMIN-NEXT:    fcvt.h.s fa5, fa5
-; CHECK-ZFHMIN-NEXT:    fcvt.s.h fa5, fa5
-; CHECK-ZFHMIN-NEXT:    fneg.s fa5, fa5
-; CHECK-ZFHMIN-NEXT:    fcvt.h.s fa5, fa5
-; CHECK-ZFHMIN-NEXT:    fcvt.s.h fa5, fa5
-; CHECK-ZFHMIN-NEXT:    fcvt.s.h fa4, fa1
-; CHECK-ZFHMIN-NEXT:    fcvt.s.h fa3, fa0
-; CHECK-ZFHMIN-NEXT:    fmadd.s fa5, fa3, fa4, fa5
-; CHECK-ZFHMIN-NEXT:    fcvt.h.s fa0, fa5
-; CHECK-ZFHMIN-NEXT:    ret
-;
-; CHECK-ZHINXMIN-LABEL: fmsub_h:
-; CHECK-ZHINXMIN:       # %bb.0:
-; CHECK-ZHINXMIN-NEXT:    fcvt.s.h a2, a2
-; CHECK-ZHINXMIN-NEXT:    fadd.s a2, a2, zero
-; CHECK-ZHINXMIN-NEXT:    fcvt.h.s a2, a2
-; CHECK-ZHINXMIN-NEXT:    fcvt.s.h a2, a2
-; CHECK-ZHINXMIN-NEXT:    fneg.s a2, a2
-; CHECK-ZHINXMIN-NEXT:    fcvt.h.s a2, a2
-; CHECK-ZHINXMIN-NEXT:    fcvt.s.h a2, a2
-; CHECK-ZHINXMIN-NEXT:    fcvt.s.h a1, a1
-; CHECK-ZHINXMIN-NEXT:    fcvt.s.h a0, a0
-; CHECK-ZHINXMIN-NEXT:    fmadd.s a0, a0, a1, a2
-; CHECK-ZHINXMIN-NEXT:    fcvt.h.s a0, a0
-; CHECK-ZHINXMIN-NEXT:    ret
+; CHECK-ZFHMIN-RV32-LABEL: fmsub_h:
+; CHECK-ZFHMIN-RV32:       # %bb.0:
+; CHECK-ZFHMIN-RV32-NEXT:    addi sp, sp, -16
+; CHECK-ZFHMIN-RV32-NEXT:    fcvt.s.h fa5, fa2
+; CHECK-ZFHMIN-RV32-NEXT:    fmv.w.x fa4, zero
+; CHECK-ZFHMIN-RV32-NEXT:    fadd.s fa5, fa5, fa4
+; CHECK-ZFHMIN-RV32-NEXT:    fcvt.h.s fa5, fa5
+; CHECK-ZFHMIN-RV32-NEXT:    fsh fa5, 12(sp)
+; CHECK-ZFHMIN-RV32-NEXT:    lbu a0, 13(sp)
+; CHECK-ZFHMIN-RV32-NEXT:    xori a0, a0, 128
+; CHECK-ZFHMIN-RV32-NEXT:    sb a0, 13(sp)
+; CHECK-ZFHMIN-RV32-NEXT:    flh fa5, 12(sp)
+; CHECK-ZFHMIN-RV32-NEXT:    fcvt.s.h fa4, fa1
+; CHECK-ZFHMIN-RV32-NEXT:    fcvt.s.h fa3, fa0
+; CHECK-ZFHMIN-RV32-NEXT:    fcvt.s.h fa5, fa5
+; CHECK-ZFHMIN-RV32-NEXT:    fmadd.s fa5, fa3, fa4, fa5
+; CHECK-ZFHMIN-RV32-NEXT:    fcvt.h.s fa0, fa5
+; CHECK-ZFHMIN-RV32-NEXT:    addi sp, sp, 16
+; CHECK-ZFHMIN-RV32-NEXT:    ret
+;
+; CHECK-ZFHMIN-RV64-LABEL: fmsub_h:
+; CHECK-ZFHMIN-RV64:       # %bb.0:
+; CHECK-ZFHMIN-RV64-NEXT:    addi sp, sp, -16
+; CHECK-ZFHMIN-RV64-NEXT:    fcvt.s.h fa5, fa2
+; CHECK-ZFHMIN-RV64-NEXT:    fmv.w.x fa4, zero
+; CHECK-ZFHMIN-RV64-NEXT:    fadd.s fa5, fa5, fa4
+; CHECK-ZFHMIN-RV64-NEXT:    fcvt.h.s fa5, fa5
+; CHECK-ZFHMIN-RV64-NEXT:    fsh fa5, 8(sp)
+; CHECK-ZFHMIN-RV64-NEXT:    lbu a0, 9(sp)
+; CHECK-ZFHMIN-RV64-NEXT:    xori a0, a0, 128
+; CHECK-ZFHMIN-RV64-NEXT:    sb a0, 9(sp)
+; CHECK-ZFHMIN-RV64-NEXT:    flh fa5, 8(sp)
+; CHECK-ZFHMIN-RV64-NEXT:    fcvt.s.h fa4, fa1
+; CHECK-ZFHMIN-RV64-NEXT:    fcvt.s.h fa3, fa0
+; CHECK-ZFHMIN-RV64-NEXT:    fcvt.s.h fa5, fa5
+; CHECK-ZFHMIN-RV64-NEXT:    fmadd.s fa5, fa3, fa4, fa5
+; CHECK-ZFHMIN-RV64-NEXT:    fcvt.h.s fa0, fa5
+; CHECK-ZFHMIN-RV64-NEXT:    addi sp, sp, 16
+; CHECK-ZFHMIN-RV64-NEXT:    ret
+;
+; CHECK-ZHINXMIN-RV32-LABEL: fmsub_h:
+; CHECK-ZHINXMIN-RV32:       # %bb.0:
+; CHECK-ZHINXMIN-RV32-NEXT:    addi sp, sp, -16
+; CHECK-ZHINXMIN-RV32-NEXT:    fcvt.s.h a2, a2
+; CHECK-ZHINXMIN-RV32-NEXT:    fadd.s a2, a2, zero
+; CHECK-ZHINXMIN-RV32-NEXT:    fcvt.h.s a2, a2
+; CHECK-ZHINXMIN-RV32-NEXT:    sh a2, 12(sp)
+; CHECK-ZHINXMIN-RV32-NEXT:    lbu a2, 13(sp)
+; CHECK-ZHINXMIN-RV32-NEXT:    xori a2, a2, 128
+; CHECK-ZHINXMIN-RV32-NEXT:    sb a2, 13(sp)
+; CHECK-ZHINXMIN-RV32-NEXT:    lh a2, 12(sp)
+; CHECK-ZHINXMIN-RV32-NEXT:    fcvt.s.h a1, a1
+; CHECK-ZHINXMIN-RV32-NEXT:    fcvt.s.h a0, a0
+; CHECK-ZHINXMIN-RV32-NEXT:    fcvt.s.h a2, a2
+; CHECK-ZHINXMIN-RV32-NEXT:    fmadd.s a0, a0, a1, a2
+; CHECK-ZHINXMIN-RV32-NEXT:    fcvt.h.s a0, a0
+; CHECK-ZHINXMIN-RV32-NEXT:    addi sp, sp, 16
+; CHECK-ZHINXMIN-RV32-NEXT:    ret
+;
+; CHECK-ZHINXMIN-RV64-LABEL: fmsub_h:
+; CHECK-ZHINXMIN-RV64:       # %bb.0:
+; CHECK-ZHINXMIN-RV64-NEXT:    addi sp, sp, -16
+; CHECK-ZHINXMIN-RV64-NEXT:    fcvt.s.h a2, a2
+; CHECK-ZHINXMIN-RV64-NEXT:    fadd.s a2, a2, zero
+; CHECK-ZHINXMIN-RV64-NEXT:    fcvt.h.s a2, a2
+; CHECK-ZHINXMIN-RV64-NEXT:    sh a2, 8(sp)
+; CHECK-ZHINXMIN-RV64-NEXT:    lbu a2, 9(sp)
+; CHECK-ZHINXMIN-RV64-NEXT:    xori a2, a2, 128
+; CHECK-ZHINXMIN-RV64-NEXT:    sb a2, 9(sp)
+; CHECK-ZHINXMIN-RV64-NEXT:    lh a2, 8(sp)
+; CHECK-ZHINXMIN-RV64-NEXT:    fcvt.s.h a1, a1
+; CHECK-ZHINXMIN-RV64-NEXT:    fcvt.s.h a0, a0
+; CHECK-ZHINXMIN-RV64-NEXT:    fcvt.s.h a2, a2
+; CHECK-ZHINXMIN-RV64-NEXT:    fmadd.s a0, a0, a1, a2
+; CHECK-ZHINXMIN-RV64-NEXT:    fcvt.h.s a0, a0
+; CHECK-ZHINXMIN-RV64-NEXT:    addi sp, sp, 16
+; CHECK-ZHINXMIN-RV64-NEXT:    ret
   %c_ = fadd half 0.0, %c ; avoid negation using xor
   %negc = fneg half %c_
   %1 = call half @llvm.experimental.constrained.fma.f16(half %a, half %b, half %negc, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp
@@ -291,48 +338,115 @@ define half @fnmadd_h(half %a, half %b, half %c) nounwind strictfp {
 ; CHECK-ZHINX-NEXT:    fnmadd.h a0, a0, a1, a2
 ; CHECK-ZHINX-NEXT:    ret
 ;
-; CHECK-ZFHMIN-LABEL: fnmadd_h:
-; CHECK-ZFHMIN:       # %bb.0:
-; CHECK-ZFHMIN-NEXT:    fcvt.s.h fa5, fa0
-; CHECK-ZFHMIN-NEXT:    fmv.w.x fa4, zero
-; CHECK-ZFHMIN-NEXT:    fadd.s fa5, fa5, fa4
-; CHECK-ZFHMIN-NEXT:    fcvt.h.s fa5, fa5
-; CHECK-ZFHMIN-NEXT:    fcvt.s.h fa3, fa2
-; CHECK-ZFHMIN-NEXT:    fadd.s fa4, fa3, fa4
-; CHECK-ZFHMIN-NEXT:    fcvt.h.s fa4, fa4
-; CHECK-ZFHMIN-NEXT:    fcvt.s.h fa5, fa5
-; CHECK-ZFHMIN-NEXT:    fneg.s fa5, fa5
-; CHECK-ZFHMIN-NEXT:    fcvt.h.s fa5, fa5
-; CHECK-ZFHMIN-NEXT:    fcvt.s.h fa4, fa4
-; CHECK-ZFHMIN-NEXT:    fneg.s fa4, fa4
-; CHECK-ZFHMIN-NEXT:    fcvt.h.s fa4, fa4
-; CHECK-ZFHMIN-NEXT:    fcvt.s.h fa4, fa4
-; CHECK-ZFHMIN-NEXT:    fcvt.s.h fa5, fa5
-; CHECK-ZFHMIN-NEXT:    fcvt.s.h fa3, fa1
-; CHECK-ZFHMIN-NEXT:    fmadd.s fa5, fa5, fa3, fa4
-; CHECK-ZFHMIN-NEXT:    fcvt.h.s fa0, fa5
-; CHECK-ZFHMIN-NEXT:    ret
-;
-; CHECK-ZHINXMIN-LABEL: fnmadd_h:
-; CHECK-ZHINXMIN:       # %bb.0:
-; CHECK-ZHINXMIN-NEXT:    fcvt.s.h a0, a0
-; CHECK-ZHINXMIN-NEXT:    fadd.s a0, a0, zero
-; CHECK-ZHINXMIN-NEXT:    fcvt.h.s a0, a0
-; CHECK-ZHINXMIN-NEXT:    fcvt.s.h a2, a2
-; CHECK-ZHINXMIN-NEXT:    fadd.s a2, a2, zero
-; CHECK-ZHINXMIN-NEXT:    fcvt.h.s a2, a2
-; CHECK-ZHINXMIN-NEXT:    fcvt.s.h a0, a0
-; CHECK-ZHINXMIN-NEXT:    fneg.s a0, a0
-; CHECK-ZHINXMIN-NEXT:    fcvt.h.s a0, a0
-; CHECK-ZHINXMIN-NEXT:    fcvt.s.h a2, a2
-; CHECK-ZHINXMIN-NEXT:    fneg.s a2, a2
-; CHECK-ZHINXMIN-NEXT:    fcvt.h.s a2, a2
-; CHECK-ZHINXMIN-NEXT:    fcvt.s.h a2, a2
-; CHECK-ZHINXMIN-NEXT:    fcvt.s.h a0, a0
-; CHECK-ZHINXMIN-NEXT:    fcvt.s.h a1, a1
-; CHECK-ZHINXMIN-NEXT:    fmadd.s a0, a0, a1, a2
-; CHECK-ZHINXMIN-NEXT:    fcvt.h.s a0, a0
-; CHECK-ZHINXMIN-NEXT:    ret
+; CHECK-ZFHMIN-RV32-LABEL: fnmadd_h:
+; CHECK-ZFHMIN-RV32:       # %bb.0:
+; CHECK-ZFHMIN-RV32-NEXT:    addi sp, sp, -16
+; CHECK-ZFHMIN-RV32-NEXT:    fcvt.s.h fa5, fa0
+; CHECK-ZFHMIN-RV32-NEXT:    fmv.w.x fa4, zero
+; CHECK-ZFHMIN-RV32-NEXT:    fadd.s fa5, fa5, fa4
+; CHECK-ZFHMIN-RV32-NEXT:    fcvt.h.s fa5, fa5
+; CHECK-ZFHMIN-RV32-NEXT:    fsh fa5, 8(sp)
+; CHECK-ZFHMIN-RV32-NEXT:    lbu a0, 9(sp)
+; CHECK-ZFHMIN-RV32-NEXT:    fcvt.s.h fa5, fa2
+; CHECK-ZFHMIN-RV32-NEXT:    fadd.s fa5, fa5, fa4
+; CHECK-ZFHMIN-RV32-NEXT:    fcvt.h.s fa5, fa5
+; CHECK-ZFHMIN-RV32-NEXT:    xori a0, a0, 128
+; CHECK-ZFHMIN-RV32-NEXT:    sb a0, 9(sp)
+; CHECK-ZFHMIN-RV32-NEXT:    flh fa4, 8(sp)
+; CHECK-ZFHMIN-RV32-NEXT:    fsh fa5, 12(sp)
+; CHECK-ZFHMIN-RV32-NEXT:    lbu a0, 13(sp)
+; CHECK-ZFHMIN-RV32-NEXT:    xori a0, a0, 128
+; CHECK-ZFHMIN-RV32-NEXT:    sb a0, 13(sp)
+; CHECK-ZFHMIN-RV32-NEXT:    flh fa5, 12(sp)
+; CHECK-ZFHMIN-RV32-NEXT:    fcvt.s.h fa3, fa1
+; CHECK-ZFHMIN-RV32-NEXT:    fcvt.s.h fa5, fa5
+; CHECK-ZFHMIN-RV32-NEXT:    fcvt.s.h fa4, fa4
+; CHECK-ZFHMIN-RV32-NEXT:    fmadd.s fa5, fa4, fa3, fa5
+; CHECK-ZFHMIN-RV32-NEXT:    fcvt.h.s fa0, fa5
+; CHECK-ZFHMIN-RV32-NEXT:    addi sp, sp, 16
+; CHECK-ZFHMIN-RV32-NEXT:    ret
+;
+; CHECK-ZFHMIN-RV64-LABEL: fnmadd_h:
+; CHECK-ZFHMIN-RV64:       # %bb.0:
+; CHECK-ZFHMIN-RV64-NEXT:    addi sp, sp, -16
+; CHECK-ZFHMIN-RV64-NEXT:    fcvt.s.h fa5, fa0
+; CHECK-ZFHMIN-RV64-NEXT:    fmv.w.x fa4, zero
+; CHECK-ZFHMIN-RV64-NEXT:    fadd.s fa5, fa5, fa4
+; CHECK-ZFHMIN-RV64-NEXT:    fcvt.h.s fa5, fa5
+; CHECK-ZFHMIN-RV64-NEXT:    fsh fa5, 0(sp)
+; CHECK-ZFHMIN-RV64-NEXT:    lbu a0, 1(sp)
+; CHECK-ZFHMIN-RV64-NEXT:    fcvt.s.h fa5, fa2
+; CHECK-ZFHMIN-RV64-NEXT:    fadd.s fa5, fa5, fa4
+; CHECK-ZFHMIN-RV64-NEXT:    fcvt.h.s fa5, fa5
+; CHECK-ZFHMIN-RV64-NEXT:    xori a0, a0, 128
+; CHECK-ZFHMIN-RV64-NEXT:    sb a0, 1(sp)
+; CHECK-ZFHMIN-RV64-NEXT:    flh fa4, 0(sp)
+; CHECK-ZFHMIN-RV64-NEXT:    fsh fa5, 8(sp)
+; CHECK-ZFHMIN-RV64-NEXT:    lbu a0, 9(sp)
+; CHECK-ZFHMIN-RV64-NEXT:    xori a0, a0, 128
+; CHECK-ZFHMIN-RV64-NEXT:    sb a0, 9(sp)
+; CHECK-ZFHMIN-RV64-NEXT:    flh fa5, 8(sp)
+; CHECK-ZFHMIN-RV64-NEXT:    fcvt.s.h fa3, fa1
+; CHECK-ZFHMIN-RV64-NEXT:    fcvt.s.h fa5, fa5
+; CHECK-ZFHMIN-RV64-NEXT:    fcvt.s.h fa4, fa4
+; CHECK-ZFHMIN-RV64-NEXT:    fmadd.s fa5, fa4, fa3, fa5
+; CHECK-ZFHMIN-RV64-NEXT:    fcvt.h.s fa0, fa5
+; CHECK-ZFHMIN-RV64-NEXT:    addi sp, sp, 16
+; CHECK-ZFHMIN-RV64-NEXT:    ret
+;
+; CHECK-ZHINXMIN-RV32-LABEL: fnmadd_h:
+; CHECK-ZHINXMIN-RV32:       # %bb.0:
+; CHECK-ZHINXMIN-RV32-NEXT:    addi sp, sp, -16
+; CHECK-ZHINXMIN-RV32-NEXT:    fcvt.s.h a0, a0
+; CHECK-ZHINXMIN-RV32-NEXT:    fadd.s a0, a0, zero
+; CHECK-ZHINXMIN-RV32-NEXT:    fcvt.h.s a0, a0
+; CHECK-ZHINXMIN-RV32-NEXT:    sh a0, 8(sp)
+; CHECK-ZHINXMIN-RV32-NEXT:    lbu a0, 9(sp)
+; CHECK-ZHINXMIN-RV32-NEXT:    fcvt.s.h a2, a2
+; CHECK-ZHINXMIN-RV32-NEXT:    fadd.s a2, a2, zero
+; CHECK-ZHINXMIN-RV32-NEXT:    fcvt.h.s a2, a2
+; CHECK-ZHINXMIN-RV32-NEXT:    xori a0, a0, 128
+; CHECK-ZHINXMIN-RV32-NEXT:    sb a0, 9(sp)
+; CHECK-ZHINXMIN-RV32-NEXT:    lh a0, 8(sp)
+; CHECK-ZHINXMIN-RV32-NEXT:    sh a2, 12(sp)
+; CHECK-ZHINXMIN-RV32-NEXT:    lbu a2, 13(sp)
+; CHECK-ZHINXMIN-RV32-NEXT:    xori a2, a2, 128
+; CHECK-ZHINXMIN-RV32-NEXT:    sb a2, 13(sp)
+; CHECK-ZHINXMIN-RV32-NEXT:    lh a2, 12(sp)
+; CHECK-ZHINXMIN-RV32-NEXT:    fcvt.s.h a1, a1
+; CHECK-ZHINXMIN-RV32-NEXT:    fcvt.s.h a2, a2
+; CHECK-ZHINXMIN-RV32-NEXT:    fcvt.s.h a0, a0
+; CHECK-ZHINXMIN-RV32-NEXT:    fmadd.s a0, a0, a1, a2
+; CHECK-ZHINXMIN-RV32-NEXT:    fcvt.h.s a0, a0
+; CHECK-ZHINXMIN-RV32-NEXT:    addi sp, sp, 16
+; CHECK-ZHINXMIN-RV32-NEXT:    ret
+;
+; CHECK-ZHINXMIN-RV64-LABEL: fnmadd_h:
+; CHECK-ZHINXMIN-RV64:       # %bb.0:
+; CHECK-ZHINXMIN-RV64-NEXT:    addi sp, sp, -16
+; CHECK-ZHINXMIN-RV64-NEXT:    fcvt.s.h a0, a0
+; CHECK-ZHINXMIN-RV64-NEXT:    fadd.s a0, a0, zero
+; CHECK-ZHINXMIN-RV64-NEXT:    fcvt.h.s a0, a0
+; CHECK-ZHINXMIN-RV64-NEXT:    sh a0, 0(sp)
+; CHECK-ZHINXMIN-RV64-NEXT:    lbu a0, 1(sp)
+; CHECK-ZHINXMIN-RV64-NEXT:    fcvt.s.h a2, a2
+; CHECK-ZHINXMIN-RV64-NEXT:    fadd.s a2, a2, zero
+; CHECK-ZHINXMIN-RV64-NEXT:    fcvt.h.s a2, a2
+; CHECK-ZHINXMIN-RV64-NEXT:    xori a0, a0, 128
+; CHECK-ZHINXMIN-RV64-NEXT:    sb a0, 1(sp)
+; CHECK-ZHINXMIN-RV64-NEXT:    lh a0, 0(sp)
+; CHECK-ZHINXMIN-RV64-NEXT:    sh a2, 8(sp)
+; CHECK-ZHINXMIN-RV64-NEXT:    lbu a2, 9(sp)
+; CHECK-ZHINXMIN-RV64-NEXT:    xori a2, a2, 128
+; CHECK-ZHINXMIN-RV64-NEXT:    sb a2, 9(sp)
+; CHECK-ZHINXMIN-RV64-NEXT:    lh a2, 8(sp)
+; CHECK-ZHINXMIN-RV64-NEXT:    fcvt.s.h a1, a1
+; CHECK-ZHINXMIN-RV64-NEXT:    fcvt.s.h a2, a2
+; CHECK-ZHINXMIN-RV64-NEXT:    fcvt.s.h a0, a0
+; CHECK-ZHINXMIN-RV64-NEXT:    fmadd.s a0, a0, a1, a2
+; CHECK-ZHINXMIN-RV64-NEXT:    fcvt.h.s a0, a0
+; CHECK-ZHINXMIN-RV64-NEXT:    addi sp, sp, 16
+; CHECK-ZHINXMIN-RV64-NEXT:    ret
   %a_ = fadd half 0.0, %a
   %c_ = fadd half 0.0, %c
   %nega = fneg half %a_
@@ -357,48 +471,115 @@ define half @fnmadd_h_2(half %a, half %b, half %c) nounwind strictfp {
 ; CHECK-ZHINX-NEXT:    fnmadd.h a0, a1, a0, a2
 ; CHECK-ZHINX-NEXT:    ret
 ;
-; CHECK-ZFHMIN-LABEL: fnmadd_h_2:
-; CHECK-ZFHMIN:       # %bb.0:
-; CHECK-ZFHMIN-NEXT:    fcvt.s.h fa5, fa1
-; CHECK-ZFHMIN-NEXT:    fmv.w.x fa4, zero
-; CHECK-ZFHMIN-NEXT:    fadd.s fa5, fa5, fa4
-; CHECK-ZFHMIN-NEXT:    fcvt.h.s fa5, fa5
-; CHECK-ZFHMIN-NEXT:    fcvt.s.h fa3, fa2
-; CHECK-ZFHMIN-NEXT:    fadd.s fa4, fa3, fa4
-; CHECK-ZFHMIN-NEXT:    fcvt.h.s fa4, fa4
-; CHECK-ZFHMIN-NEXT:    fcvt.s.h fa5, fa5
-; CHECK-ZFHMIN-NEXT:    fneg.s fa5, fa5
-; CHECK-ZFHMIN-NEXT:    fcvt.h.s fa5, fa5
-; CHECK-ZFHMIN-NEXT:    fcvt.s.h fa4, fa4
-; CHECK-ZFHMIN-NEXT:    fneg.s fa4, fa4
-; CHECK-ZFHMIN-NEXT:    fcvt.h.s fa4, fa4
-; CHECK-ZFHMIN-NEXT:    fcvt.s.h fa4, fa4
-; CHECK-ZFHMIN-NEXT:    fcvt.s.h fa5, fa5
-; CHECK-ZFHMIN-NEXT:    fcvt.s.h fa3, fa0
-; CHECK-ZFHMIN-NEXT:    fmadd.s fa5, fa3, fa5, fa4
-; CHECK-ZFHMIN-NEXT:    fcvt.h.s fa0, fa5
-; CHECK-ZFHMIN-NEXT:    ret
-;
-; CHECK-ZHINXMIN-LABEL: fnmadd_h_2:
-; CHECK-ZHINXMIN:       # %bb.0:
-; CHECK-ZHINXMIN-NEXT:    fcvt.s.h a1, a1
-; CHECK-ZHINXMIN-NEXT:    fadd.s a1, a1, zero
-; CHECK-ZHINXMIN-NEXT:    fcvt.h.s a1, a1
-; CHECK-ZHINXMIN-NEXT:    fcvt.s.h a2, a2
-; CHECK-ZHINXMIN-NEXT:    fadd.s a2, a2, zero
-; CHECK-ZHINXMIN-NEXT:    fcvt.h.s a2, a2
-; CHECK-ZHINXMIN-NEXT:    fcvt.s.h a1, a1
-; CHECK-ZHINXMIN-NEXT:    fneg.s a1, a1
-; CHECK-ZHINXMIN-NEXT:    fcvt.h.s a1, a1
-; CHECK-ZHINXMIN-NEXT:    fcvt.s.h a2, a2
-; CHECK-ZHINXMIN-NEXT:    fneg.s a2, a2
-; CHECK-ZHINXMIN-NEXT:    fcvt.h.s a2, a2
-; CHECK-ZHINXMIN-NEXT:    fcvt.s.h a2, a2
-; CHECK-ZHINXMIN-NEXT:    fcvt.s.h a1, a1
-; CHECK-ZHINXMIN-NEXT:    fcvt.s.h a0, a0
-; CHECK-ZHINXMIN-NEXT:    fmadd.s a0, a0, a1, a2
-; CHECK-ZHINXMIN-NEXT:    fcvt.h.s a0, a0
-; CHECK-ZHINXMIN-NEXT:    ret
+; CHECK-ZFHMIN-RV32-LABEL: fnmadd_h_2:
+; CHECK-ZFHMIN-RV32:       # %bb.0:
+; CHECK-ZFHMIN-RV32-NEXT:    addi sp, sp, -16
+; CHECK-ZFHMIN-RV32-NEXT:    fcvt.s.h fa5, fa1
+; CHECK-ZFHMIN-RV32-NEXT:    fmv.w.x fa4, zero
+; CHECK-ZFHMIN-RV32-NEXT:    fadd.s fa5, fa5, fa4
+; CHECK-ZFHMIN-RV32-NEXT:    fcvt.h.s fa5, fa5
+; CHECK-ZFHMIN-RV32-NEXT:    fsh fa5, 8(sp)
+; CHECK-ZFHMIN-RV32-NEXT:    lbu a0, 9(sp)
+; CHECK-ZFHMIN-RV32-NEXT:    fcvt.s.h fa5, fa2
+; CHECK-ZFHMIN-RV32-NEXT:    fadd.s fa5, fa5, fa4
+; CHECK-ZFHMIN-RV32-NEXT:    fcvt.h.s fa5, fa5
+; CHECK-ZFHMIN-RV32-NEXT:    xori a0, a0, 128
+; CHECK-ZFHMIN-RV32-NEXT:    sb a0, 9(sp)
+; CHECK-ZFHMIN-RV32-NEXT:    flh fa4, 8(sp)
+; CHECK-ZFHMIN-RV32-NEXT:    fsh fa5, 12(sp)
+; CHECK-ZFHMIN-RV32-NEXT:    lbu a0, 13(sp)
+; CHECK-ZFHMIN-RV32-NEXT:    xori a0, a0, 128
+; CHECK-ZFHMIN-RV32-NEXT:    sb a0, 13(sp)
+; CHECK-ZFHMIN-RV32-NEXT:    flh fa5, 12(sp)
+; CHECK-ZFHMIN-RV32-NEXT:    fcvt.s.h fa3, fa0
+; CHECK-ZFHMIN-RV32-NEXT:    fcvt.s.h fa5, fa5
+; CHECK-ZFHMIN-RV32-NEXT:    fcvt.s.h fa4, fa4
+; CHECK-ZFHMIN-RV32-NEXT:    fmadd.s fa5, fa3, fa4, fa5
+; CHECK-ZFHMIN-RV32-NEXT:    fcvt.h.s fa0, fa5
+; CHECK-ZFHMIN-RV32-NEXT:    addi sp, sp, 16
+; CHECK-ZFHMIN-RV32-NEXT:    ret
+;
+; CHECK-ZFHMIN-RV64-LABEL: fnmadd_h_2:
+; CHECK-ZFHMIN-RV64:       # %bb.0:
+; CHECK-ZFHMIN-RV64-NEXT:    addi sp, sp, -16
+; CHECK-ZFHMIN-RV64-NEXT:    fcvt.s.h fa5, fa1
+; CHECK-ZFHMIN-RV64-NEXT:    fmv.w.x fa4, zero
+; CHECK-ZFHMIN-RV64-NEXT:    fadd.s fa5, fa5, fa4
+; CHECK-ZFHMIN-RV64-NEXT:    fcvt.h.s fa5, fa5
+; CHECK-ZFHMIN-RV64-NEXT:    fsh fa5, 0(sp)
+; CHECK-ZFHMIN-RV64-NEXT:    lbu a0, 1(sp)
+; CHECK-ZFHMIN-RV64-NEXT:    fcvt.s.h fa5, fa2
+; CHECK-ZFHMIN-RV64-NEXT:    fadd.s fa5, fa5, fa4
+; CHECK-ZFHMIN-RV64-NEXT:    fcvt.h.s fa5, fa5
+; CHECK-ZFHMIN-RV64-NEXT:    xori a0, a0, 128
+; CHECK-ZFHMIN-RV64-NEXT:    sb a0, 1(sp)
+; CHECK-ZFHMIN-RV64-NEXT:    flh fa4, 0(sp)
+; CHECK-ZFHMIN-RV64-NEXT:    fsh fa5, 8(sp)
+; CHECK-ZFHMIN-RV64-NEXT:    lbu a0, 9(sp)
+; CHECK-ZFHMIN-RV64-NEXT:    xori a0, a0, 128
+; CHECK-ZFHMIN-RV64-NEXT:    sb a0, 9(sp)
+; CHECK-ZFHMIN-RV64-NEXT:    flh fa5, 8(sp)
+; CHECK-ZFHMIN-RV64-NEXT:    fcvt.s.h fa3, fa0
+; CHECK-ZFHMIN-RV64-NEXT:    fcvt.s.h fa5, fa5
+; CHECK-ZFHMIN-RV64-NEXT:    fcvt.s.h fa4, fa4
+; CHECK-ZFHMIN-RV64-NEXT:    fmadd.s fa5, fa3, fa4, fa5
+; CHECK-ZFHMIN-RV64-NEXT:    fcvt.h.s fa0, fa5
+; CHECK-ZFHMIN-RV64-NEXT:    addi sp, sp, 16
+; CHECK-ZFHMIN-RV64-NEXT:    ret
+;
+; CHECK-ZHINXMIN-RV32-LABEL: fnmadd_h_2:
+; CHECK-ZHINXMIN-RV32:       # %bb.0:
+; CHECK-ZHINXMIN-RV32-NEXT:    addi sp, sp, -16
+; CHECK-ZHINXMIN-RV32-NEXT:    fcvt.s.h a1, a1
+; CHECK-ZHINXMIN-RV32-NEXT:    fadd.s a1, a1, zero
+; CHECK-ZHINXMIN-RV32-NEXT:    fcvt.h.s a1, a1
+; CHECK-ZHINXMIN-RV32-NEXT:    sh a1, 8(sp)
+; CHECK-ZHINXMIN-RV32-NEXT:    lbu a1, 9(sp)
+; CHECK-ZHINXMIN-RV32-NEXT:    fcvt.s.h a2, a2
+; CHECK-ZHINXMIN-RV32-NEXT:    fadd.s a2, a2, zero
+; CHECK-ZHINXMIN-RV32-NEXT:    fcvt.h.s a2, a2
+; CHECK-ZHINXMIN-RV32-NEXT:    xori a1, a1, 128
+; CHECK-ZHINXMIN-RV32-NEXT:    sb a1, 9(sp)
+; CHECK-ZHINXMIN-RV32-NEXT:    lh a1, 8(sp)
+; CHECK-ZHINXMIN-RV32-NEXT:    sh a2, 12(sp)
+; CHECK-ZHINXMIN-RV32-NEXT:    lbu a2, 13(sp)
+; CHECK-ZHINXMIN-RV32-NEXT:    xori a2, a2, 128
+; CHECK-ZHINXMIN-RV32-NEXT:    sb a2, 13(sp)
+; CHECK-ZHINXMIN-RV32-NEXT:    lh a2, 12(sp)
+; CHECK-ZHINXMIN-RV32-NEXT:    fcvt.s.h a0, a0
+; CHECK-ZHINXMIN-RV32-NEXT:    fcvt.s.h a2, a2
+; CHECK-ZHINXMIN-RV32-NEXT:    fcvt.s.h a1, a1
+; CHECK-ZHINXMIN-RV32-NEXT:    fmadd.s a0, a0, a1, a2
+; CHECK-ZHINXMIN-RV32-NEXT:    fcvt.h.s a0, a0
+; CHECK-ZHINXMIN-RV32-NEXT:    addi sp, sp, 16
+; CHECK-ZHINXMIN-RV32-NEXT:    ret
+;
+; CHECK-ZHINXMIN-RV64-LABEL: fnmadd_h_2:
+; CHECK-ZHINXMIN-RV64:       # %bb.0:
+; CHECK-ZHINXMIN-RV64-NEXT:    addi sp, sp, -16
+; CHECK-ZHINXMIN-RV64-NEXT:    fcvt.s.h a1, a1
+; CHECK-ZHINXMIN-RV64-NEXT:    fadd.s a1, a1, zero
+; CHECK-ZHINXMIN-RV64-NEXT:    fcvt.h.s a1, a1
+; CHECK-ZHINXMIN-RV64-NEXT:    sh a1, 0(sp)
+; CHECK-ZHINXMIN-RV64-NEXT:    lbu a1, 1(sp)
+; CHECK-ZHINXMIN-RV64-NEXT:    fcvt.s.h a2, a2
+; CHECK-ZHINXMIN-RV64-NEXT:    fadd.s a2, a2, zero
+; CHECK-ZHINXMIN-RV64-NEXT:    fcvt.h.s a2, a2
+; CHECK-ZHINXMIN-RV64-NEXT:    xori a1, a1, 128
+; CHECK-ZHINXMIN-RV64-NEXT:    sb a1, 1(sp)
+; CHECK-ZHINXMIN-RV64-NEXT:    lh a1, 0(sp)
+; CHECK-ZHINXMIN-RV64-NEXT:    sh a2, 8(sp)
+; CHECK-ZHINXMIN-RV64-NEXT:    lbu a2, 9(sp)
+; CHECK-ZHINXMIN-RV64-NEXT:    xori a2, a2, 128
+; CHECK-ZHINXMIN-RV64-NEXT:    sb a2, 9(sp)
+; CHECK-ZHINXMIN-RV64-NEXT:    lh a2, 8(sp)
+; CHECK-ZHINXMIN-RV64-NEXT:    fcvt.s.h a0, a0
+; CHECK-ZHINXMIN-RV64-NEXT:    fcvt.s.h a2, a2
+; CHECK-ZHINXMIN-RV64-NEXT:    fcvt.s.h a1, a1
+; CHECK-ZHINXMIN-RV64-NEXT:    fmadd.s a0, a0, a1, a2
+; CHECK-ZHINXMIN-RV64-NEXT:    fcvt.h.s a0, a0
+; CHECK-ZHINXMIN-RV64-NEXT:    addi sp, sp, 16
+; CHECK-ZHINXMIN-RV64-NEXT:    ret
   %b_ = fadd half 0.0, %b
   %c_ = fadd half 0.0, %c
   %negb = fneg half %b_
@@ -421,36 +602,83 @@ define half @fnmsub_h(half %a, half %b, half %c) nounwind strictfp {
 ; CHECK-ZHINX-NEXT:    fnmsub.h a0, a0, a1, a2
 ; CHECK-ZHINX-NEXT:    ret
 ;
-; CHECK-ZFHMIN-LABEL: fnmsub_h:
-; CHECK-ZFHMIN:       # %bb.0:
-; CHECK-ZFHMIN-NEXT:    fcvt.s.h fa5, fa0
-; CHECK-ZFHMIN-NEXT:    fmv.w.x fa4, zero
-; CHECK-ZFHMIN-NEXT:    fadd.s fa5, fa5, fa4
-; CHECK-ZFHMIN-NEXT:    fcvt.h.s fa5, fa5
-; CHECK-ZFHMIN-NEXT:    fcvt.s.h fa5, fa5
-; CHECK-ZFHMIN-NEXT:    fneg.s fa5, fa5
-; CHECK-ZFHMIN-NEXT:    fcvt.h.s fa5, fa5
-; CHECK-ZFHMIN-NEXT:    fcvt.s.h fa5, fa5
-; CHECK-ZFHMIN-NEXT:    fcvt.s.h fa4, fa2
-; CHECK-ZFHMIN-NEXT:    fcvt.s.h fa3, fa1
-; CHECK-ZFHMIN-NEXT:    fmadd.s fa5, fa5, fa3, fa4
-; CHECK-ZFHMIN-NEXT:    fcvt.h.s fa0, fa5
-; CHECK-ZFHMIN-NEXT:    ret
-;
-; CHECK-ZHINXMIN-LABEL: fnmsub_h:
-; CHECK-ZHINXMIN:       # %bb.0:
-; CHECK-ZHINXMIN-NEXT:    fcvt.s.h a0, a0
-; CHECK-ZHINXMIN-NEXT:    fadd.s a0, a0, zero
-; CHECK-ZHINXMIN-NEXT:    fcvt.h.s a0, a0
-; CHECK-ZHINXMIN-NEXT:    fcvt.s.h a0, a0
-; CHECK-ZHINXMIN-NEXT:    fneg.s a0, a0
-; CHECK-ZHINXMIN-NEXT:    fcvt.h.s a0, a0
-; CHECK-ZHINXMIN-NEXT:    fcvt.s.h a0, a0
-; CHECK-ZHINXMIN-NEXT:    fcvt.s.h a2, a2
-; CHECK-ZHINXMIN-NEXT:    fcvt.s.h a1, a1
-; CHECK-ZHINXMIN-NEXT:    fmadd.s a0, a0, a1, a2
-; CHECK-ZHINXMIN-NEXT:    fcvt.h.s a0, a0
-; CHECK-ZHINXMIN-NEXT:    ret
+; CHECK-ZFHMIN-RV32-LABEL: fnmsub_h:
+; CHECK-ZFHMIN-RV32:       # %bb.0:
+; CHECK-ZFHMIN-RV32-NEXT:    addi sp, sp, -16
+; CHECK-ZFHMIN-RV32-NEXT:    fcvt.s.h fa5, fa0
+; CHECK-ZFHMIN-RV32-NEXT:    fmv.w.x fa4, zero
+; CHECK-ZFHMIN-RV32-NEXT:    fadd.s fa5, fa5, fa4
+; CHECK-ZFHMIN-RV32-NEXT:    fcvt.h.s fa5, fa5
+; CHECK-ZFHMIN-RV32-NEXT:    fsh fa5, 12(sp)
+; CHECK-ZFHMIN-RV32-NEXT:    lbu a0, 13(sp)
+; CHECK-ZFHMIN-RV32-NEXT:    xori a0, a0, 128
+; CHECK-ZFHMIN-RV32-NEXT:    sb a0, 13(sp)
+; CHECK-ZFHMIN-RV32-NEXT:    flh fa5, 12(sp)
+; CHECK-ZFHMIN-RV32-NEXT:    fcvt.s.h fa4, fa2
+; CHECK-ZFHMIN-RV32-NEXT:    fcvt.s.h fa3, fa1
+; CHECK-ZFHMIN-RV32-NEXT:    fcvt.s.h fa5, fa5
+; CHECK-ZFHMIN-RV32-NEXT:    fmadd.s fa5, fa5, fa3, fa4
+; CHECK-ZFHMIN-RV32-NEXT:    fcvt.h.s fa0, fa5
+; CHECK-ZFHMIN-RV32-NEXT:    addi sp, sp, 16
+; CHECK-ZFHMIN-RV32-NEXT:    ret
+;
+; CHECK-ZFHMIN-RV64-LABEL: fnmsub_h:
+; CHECK-ZFHMIN-RV64:       # %bb.0:
+; CHECK-ZFHMIN-RV64-NEXT:    addi sp, sp, -16
+; CHECK-ZFHMIN-RV64-NEXT:    fcvt.s.h fa5, fa0
+; CHECK-ZFHMIN-RV64-NEXT:    fmv.w.x fa4, zero
+; CHECK-ZFHMIN-RV64-NEXT:    fadd.s fa5, fa5, fa4
+; CHECK-ZFHMIN-RV64-NEXT:    fcvt.h.s fa5, fa5
+; CHECK-ZFHMIN-RV64-NEXT:    fsh fa5, 8(sp)
+; CHECK-ZFHMIN-RV64-NEXT:    lbu a0, 9(sp)
+; CHECK-ZFHMIN-RV64-NEXT:    xori a0, a0, 128
+; CHECK-ZFHMIN-RV64-NEXT:    sb a0, 9(sp)
+; CHECK-ZFHMIN-RV64-NEXT:    flh fa5, 8(sp)
+; CHECK-ZFHMIN-RV64-NEXT:    fcvt.s.h fa4, fa2
+; CHECK-ZFHMIN-RV64-NEXT:    fcvt.s.h fa3, fa1
+; CHECK-ZFHMIN-RV64-NEXT:    fcvt.s.h fa5, fa5
+; CHECK-ZFHMIN-RV64-NEXT:    fmadd.s fa5, fa5, fa3, fa4
+; CHECK-ZFHMIN-RV64-NEXT:    fcvt.h.s fa0, fa5
+; CHECK-ZFHMIN-RV64-NEXT:    addi sp, sp, 16
+; CHECK-ZFHMIN-RV64-NEXT:    ret
+;
+; CHECK-ZHINXMIN-RV32-LABEL: fnmsub_h:
+; CHECK-ZHINXMIN-RV32:       # %bb.0:
+; CHECK-ZHINXMIN-RV32-NEXT:    addi sp, sp, -16
+; CHECK-ZHINXMIN-RV32-NEXT:    fcvt.s.h a0, a0
+; CHECK-ZHINXMIN-RV32-NEXT:    fadd.s a0, a0, zero
+; CHECK-ZHINXMIN-RV32-NEXT:    fcvt.h.s a0, a0
+; CHECK-ZHINXMIN-RV32-NEXT:    sh a0, 12(sp)
+; CHECK-ZHINXMIN-RV32-NEXT:    lbu a0, 13(sp)
+; CHECK-ZHINXMIN-RV32-NEXT:    xori a0, a0, 128
+; CHECK-ZHINXMIN-RV32-NEXT:    sb a0, 13(sp)
+; CHECK-ZHINXMIN-RV32-NEXT:    lh a0, 12(sp)
+; CHECK-ZHINXMIN-RV32-NEXT:    fcvt.s.h a2, a2
+; CHECK-ZHINXMIN-RV32-NEXT:    fcvt.s.h a1, a1
+; CHECK-ZHINXMIN-RV32-NEXT:    fcvt.s.h a0, a0
+; CHECK-ZHINXMIN-RV32-NEXT:    fmadd.s a0, a0, a1, a2
+; CHECK-ZHINXMIN-RV32-NEXT:    fcvt.h.s a0, a0
+; CHECK-ZHINXMIN-RV32-NEXT:    addi sp, sp, 16
+; CHECK-ZHINXMIN-RV32-NEXT:    ret
+;
+; CHECK-ZHINXMIN-RV64-LABEL: fnmsub_h:
+; CHECK-ZHINXMIN-RV64:       # %bb.0:
+; CHECK-ZHINXMIN-RV64-NEXT:    addi sp, sp, -16
+; CHECK-ZHINXMIN-RV64-NEXT:    fcvt.s.h a0, a0
+; CHECK-ZHINXMIN-RV64-NEXT:    fadd.s a0, a0, zero
+; CHECK-ZHINXMIN-RV64-NEXT:    fcvt.h.s a0, a0
+; CHECK-ZHINXMIN-RV64-NEXT:    sh a0, 8(sp)
+; CHECK-ZHINXMIN-RV64-NEXT:    lbu a0, 9(sp)
+; CHECK-ZHINXMIN-RV64-NEXT:    xori a0, a0, 128
+; CHECK-ZHINXMIN-RV64-NEXT:    sb a0, 9(sp)
+; CHECK-ZHINXMIN-RV64-NEXT:    lh a0, 8(sp)
+; CHECK-ZHINXMIN-RV64-NEXT:    fcvt.s.h a2, a2
+; CHECK-ZHINXMIN-RV64-NEXT:    fcvt.s.h a1, a1
+; CHECK-ZHINXMIN-RV64-NEXT:    fcvt.s.h a0, a0
+; CHECK-ZHINXMIN-RV64-NEXT:    fmadd.s a0, a0, a1, a2
+; CHECK-ZHINXMIN-RV64-NEXT:    fcvt.h.s a0, a0
+; CHECK-ZHINXMIN-RV64-NEXT:    addi sp, sp, 16
+; CHECK-ZHINXMIN-RV64-NEXT:    ret
   %a_ = fadd half 0.0, %a
   %nega = fneg half %a_
   %1 = call half @llvm.experimental.constrained.fma.f16(half %nega, half %b, half %c, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp
@@ -471,36 +699,83 @@ define half @fnmsub_h_2(half %a, half %b, half %c) nounwind strictfp {
 ; CHECK-ZHINX-NEXT:    fnmsub.h a0, a1, a0, a2
 ; CHECK-ZHINX-NEXT:    ret
 ;
-; CHECK-ZFHMIN-LABEL: fnmsub_h_2:
-; CHECK-ZFHMIN:       # %bb.0:
-; CHECK-ZFHMIN-NEXT:    fcvt.s.h fa5, fa1
-; CHECK-ZFHMIN-NEXT:    fmv.w.x fa4, zero
-; CHECK-ZFHMIN-NEXT:    fadd.s fa5, fa5, fa4
-; CHECK-ZFHMIN-NEXT:    fcvt.h.s fa5, fa5
-; CHECK-ZFHMIN-NEXT:    fcvt.s.h fa5, fa5
-; CHECK-ZFHMIN-NEXT:    fneg.s fa5, fa5
-; CHECK-ZFHMIN-NEXT:    fcvt.h.s fa5, fa5
-; CHECK-ZFHMIN-NEXT:    fcvt.s.h fa5, fa5
-; CHECK-ZFHMIN-NEXT:    fcvt.s.h fa4, fa2
-; CHECK-ZFHMIN-NEXT:    fcvt.s.h fa3, fa0
-; CHECK-ZFHMIN-NEXT:    fmadd.s fa5, fa3, fa5, fa4
-; CHECK-ZFHMIN-NEXT:    fcvt.h.s fa0, fa5
-; CHECK-ZFHMIN-NEXT:    ret
-;
-; CHECK-ZHINXMIN-LABEL: fnmsub_h_2:
-; CHECK-ZHINXMIN:       # %bb.0:
-; CHECK-ZHINXMIN-NEXT:    fcvt.s.h a1, a1
-; CHECK-ZHINXMIN-NEXT:    fadd.s a1, a1, zero
-; CHECK-ZHINXMIN-NEXT:    fcvt.h.s a1, a1
-; CHECK-ZHINXMIN-NEXT:    fcvt.s.h a1, a1
-; CHECK-ZHINXMIN-NEXT:    fneg.s a1, a1
-; CHECK-ZHINXMIN-NEXT:    fcvt.h.s a1, a1
-; CHECK-ZHINXMIN-NEXT:    fcvt.s.h a1, a1
-; CHECK-ZHINXMIN-NEXT:    fcvt.s.h a2, a2
-; CHECK-ZHINXMIN-NEXT:    fcvt.s.h a0, a0
-; CHECK-ZHINXMIN-NEXT:    fmadd.s a0, a0, a1, a2
-; CHECK-ZHINXMIN-NEXT:    fcvt.h.s a0, a0
-; CHECK-ZHINXMIN-NEXT:    ret
+; CHECK-ZFHMIN-RV32-LABEL: fnmsub_h_2:
+; CHECK-ZFHMIN-RV32:       # %bb.0:
+; CHECK-ZFHMIN-RV32-NEXT:    addi sp, sp, -16
+; CHECK-ZFHMIN-RV32-NEXT:    fcvt.s.h fa5, fa1
+; CHECK-ZFHMIN-RV32-NEXT:    fmv.w.x fa4, zero
+; CHECK-ZFHMIN-RV32-NEXT:    fadd.s fa5, fa5, fa4
+; CHECK-ZFHMIN-RV32-NEXT:    fcvt.h.s fa5, fa5
+; CHECK-ZFHMIN-RV32-NEXT:    fsh fa5, 12(sp)
+; CHECK-ZFHMIN-RV32-NEXT:    lbu a0, 13(sp)
+; CHECK-ZFHMIN-RV32-NEXT:    xori a0, a0, 128
+; CHECK-ZFHMIN-RV32-NEXT:    sb a0, 13(sp)
+; CHECK-ZFHMIN-RV32-NEXT:    flh fa5, 12(sp)
+; CHECK-ZFHMIN-RV32-NEXT:    fcvt.s.h fa4, fa2
+; CHECK-ZFHMIN-RV32-NEXT:    fcvt.s.h fa3, fa0
+; CHECK-ZFHMIN-RV32-NEXT:    fcvt.s.h fa5, fa5
+; CHECK-ZFHMIN-RV32-NEXT:    fmadd.s fa5, fa3, fa5, fa4
+; CHECK-ZFHMIN-RV32-NEXT:    fcvt.h.s fa0, fa5
+; CHECK-ZFHMIN-RV32-NEXT:    addi sp, sp, 16
+; CHECK-ZFHMIN-RV32-NEXT:    ret
+;
+; CHECK-ZFHMIN-RV64-LABEL: fnmsub_h_2:
+; CHECK-ZFHMIN-RV64:       # %bb.0:
+; CHECK-ZFHMIN-RV64-NEXT:    addi sp, sp, -16
+; CHECK-ZFHMIN-RV64-NEXT:    fcvt.s.h fa5, fa1
+; CHECK-ZFHMIN-RV64-NEXT:    fmv.w.x fa4, zero
+; CHECK-ZFHMIN-RV64-NEXT:    fadd.s fa5, fa5, fa4
+; CHECK-ZFHMIN-RV64-NEXT:    fcvt.h.s fa5, fa5
+; CHECK-ZFHMIN-RV64-NEXT:    fsh fa5, 8(sp)
+; CHECK-ZFHMIN-RV64-NEXT:    lbu a0, 9(sp)
+; CHECK-ZFHMIN-RV64-NEXT:    xori a0, a0, 128
+; CHECK-ZFHMIN-RV64-NEXT:    sb a0, 9(sp)
+; CHECK-ZFHMIN-RV64-NEXT:    flh fa5, 8(sp)
+; CHECK-ZFHMIN-RV64-NEXT:    fcvt.s.h fa4, fa2
+; CHECK-ZFHMIN-RV64-NEXT:    fcvt.s.h fa3, fa0
+; CHECK-ZFHMIN-RV64-NEXT:    fcvt.s.h fa5, fa5
+; CHECK-ZFHMIN-RV64-NEXT:    fmadd.s fa5, fa3, fa5, fa4
+; CHECK-ZFHMIN-RV64-NEXT:    fcvt.h.s fa0, fa5
+; CHECK-ZFHMIN-RV64-NEXT:    addi sp, sp, 16
+; CHECK-ZFHMIN-RV64-NEXT:    ret
+;
+; CHECK-ZHINXMIN-RV32-LABEL: fnmsub_h_2:
+; CHECK-ZHINXMIN-RV32:       # %bb.0:
+; CHECK-ZHINXMIN-RV32-NEXT:    addi sp, sp, -16
+; CHECK-ZHINXMIN-RV32-NEXT:    fcvt.s.h a1, a1
+; CHECK-ZHINXMIN-RV32-NEXT:    fadd.s a1, a1, zero
+; CHECK-ZHINXMIN-RV32-NEXT:    fcvt.h.s a1, a1
+; CHECK-ZHINXMIN-RV32-NEXT:    sh a1, 12(sp)
+; CHECK-ZHINXMIN-RV32-NEXT:    lbu a1, 13(sp)
+; CHECK-ZHINXMIN-RV32-NEXT:    xori a1, a1, 128
+; CHECK-ZHINXMIN-RV32-NEXT:    sb a1, 13(sp)
+; CHECK-ZHINXMIN-RV32-NEXT:    lh a1, 12(sp)
+; CHECK-ZHINXMIN-RV32-NEXT:    fcvt.s.h a2, a2
+; CHECK-ZHINXMIN-RV32-NEXT:    fcvt.s.h a0, a0
+; CHECK-ZHINXMIN-RV32-NEXT:    fcvt.s.h a1, a1
+; CHECK-ZHINXMIN-RV32-NEXT:    fmadd.s a0, a0, a1, a2
+; CHECK-ZHINXMIN-RV32-NEXT:    fcvt.h.s a0, a0
+; CHECK-ZHINXMIN-RV32-NEXT:    addi sp, sp, 16
+; CHECK-ZHINXMIN-RV32-NEXT:    ret
+;
+; CHECK-ZHINXMIN-RV64-LABEL: fnmsub_h_2:
+; CHECK-ZHINXMIN-RV64:       # %bb.0:
+; CHECK-ZHINXMIN-RV64-NEXT:    addi sp, sp, -16
+; CHECK-ZHINXMIN-RV64-NEXT:    fcvt.s.h a1, a1
+; CHECK-ZHINXMIN-RV64-NEXT:    fadd.s a1, a1, zero
+; CHECK-ZHINXMIN-RV64-NEXT:    fcvt.h.s a1, a1
+; CHECK-ZHINXMIN-RV64-NEXT:    sh a1, 8(sp)
+; CHECK-ZHINXMIN-RV64-NEXT:    lbu a1, 9(sp)
+; CHECK-ZHINXMIN-RV64-NEXT:    xori a1, a1, 128
+; CHECK-ZHINXMIN-RV64-NEXT:    sb a1, 9(sp)
+; CHECK-ZHINXMIN-RV64-NEXT:    lh a1, 8(sp)
+; CHECK-ZHINXMIN-RV64-NEXT:    fcvt.s.h a2, a2
+; CHECK-ZHINXMIN-RV64-NEXT:    fcvt.s.h a0, a0
+; CHECK-ZHINXMIN-RV64-NEXT:    fcvt.s.h a1, a1
+; CHECK-ZHINXMIN-RV64-NEXT:    fmadd.s a0, a0, a1, a2
+; CHECK-ZHINXMIN-RV64-NEXT:    fcvt.h.s a0, a0
+; CHECK-ZHINXMIN-RV64-NEXT:    addi sp, sp, 16
+; CHECK-ZHINXMIN-RV64-NEXT:    ret
   %b_ = fadd half 0.0, %b
   %negb = fneg half %b_
   %1 = call half @llvm.experimental.constrained.fma.f16(half %a, half %negb, half %c, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp
diff --git a/llvm/test/CodeGen/RISCV/half-arith.ll b/llvm/test/CodeGen/RISCV/half-arith.ll
index 10e63e3a9f7483..59981a282ab43e 100644
--- a/llvm/test/CodeGen/RISCV/half-arith.ll
+++ b/llvm/test/CodeGen/RISCV/half-arith.ll
@@ -630,29 +630,39 @@ define i32 @fneg_s(half %a, half %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
 ;
-; CHECKIZFHMIN-LABEL: fneg_s:
-; CHECKIZFHMIN:       # %bb.0:
-; CHECKIZFHMIN-NEXT:    fcvt.s.h fa5, fa0
-; CHECKIZFHMIN-NEXT:    fadd.s fa5, fa5, fa5
-; CHECKIZFHMIN-NEXT:    fcvt.h.s fa5, fa5
-; CHECKIZFHMIN-NEXT:    fcvt.s.h fa5, fa5
-; CHECKIZFHMIN-NEXT:    fneg.s fa4, fa5
-; CHECKIZFHMIN-NEXT:    fcvt.h.s fa4, fa4
-; CHECKIZFHMIN-NEXT:    fcvt.s.h fa4, fa4
-; CHECKIZFHMIN-NEXT:    feq.s a0, fa5, fa4
-; CHECKIZFHMIN-NEXT:    ret
+; CHECK-RV32-FSGNJ-LABEL: fneg_s:
+; CHECK-RV32-FSGNJ:       # %bb.0:
+; CHECK-RV32-FSGNJ-NEXT:    addi sp, sp, -16
+; CHECK-RV32-FSGNJ-NEXT:    fcvt.s.h fa5, fa0
+; CHECK-RV32-FSGNJ-NEXT:    fadd.s fa5, fa5, fa5
+; CHECK-RV32-FSGNJ-NEXT:    fcvt.h.s fa5, fa5
+; CHECK-RV32-FSGNJ-NEXT:    fsh fa5, 12(sp)
+; CHECK-RV32-FSGNJ-NEXT:    lbu a0, 13(sp)
+; CHECK-RV32-FSGNJ-NEXT:    xori a0, a0, 128
+; CHECK-RV32-FSGNJ-NEXT:    sb a0, 13(sp)
+; CHECK-RV32-FSGNJ-NEXT:    flh fa4, 12(sp)
+; CHECK-RV32-FSGNJ-NEXT:    fcvt.s.h fa5, fa5
+; CHECK-RV32-FSGNJ-NEXT:    fcvt.s.h fa4, fa4
+; CHECK-RV32-FSGNJ-NEXT:    feq.s a0, fa5, fa4
+; CHECK-RV32-FSGNJ-NEXT:    addi sp, sp, 16
+; CHECK-RV32-FSGNJ-NEXT:    ret
 ;
-; CHECKZHINXMIN-LABEL: fneg_s:
-; CHECKZHINXMIN:       # %bb.0:
-; CHECKZHINXMIN-NEXT:    fcvt.s.h a0, a0
-; CHECKZHINXMIN-NEXT:    fadd.s a0, a0, a0
-; CHECKZHINXMIN-NEXT:    fcvt.h.s a0, a0
-; CHECKZHINXMIN-NEXT:    fcvt.s.h a0, a0
-; CHECKZHINXMIN-NEXT:    fneg.s a1, a0
-; CHECKZHINXMIN-NEXT:    fcvt.h.s a1, a1
-; CHECKZHINXMIN-NEXT:    fcvt.s.h a1, a1
-; CHECKZHINXMIN-NEXT:    feq.s a0, a0, a1
-; CHECKZHINXMIN-NEXT:    ret
+; CHECK-RV64-FSGNJ-LABEL: fneg_s:
+; CHECK-RV64-FSGNJ:       # %bb.0:
+; CHECK-RV64-FSGNJ-NEXT:    addi sp, sp, -16
+; CHECK-RV64-FSGNJ-NEXT:    fcvt.s.h fa5, fa0
+; CHECK-RV64-FSGNJ-NEXT:    fadd.s fa5, fa5, fa5
+; CHECK-RV64-FSGNJ-NEXT:    fcvt.h.s fa5, fa5
+; CHECK-RV64-FSGNJ-NEXT:    fsh fa5, 8(sp)
+; CHECK-RV64-FSGNJ-NEXT:    lbu a0, 9(sp)
+; CHECK-RV64-FSGNJ-NEXT:    xori a0, a0, 128
+; CHECK-RV64-FSGNJ-NEXT:    sb a0, 9(sp)
+; CHECK-RV64-FSGNJ-NEXT:    flh fa4, 8(sp)
+; CHECK-RV64-FSGNJ-NEXT:    fcvt.s.h fa5, fa5
+; CHECK-RV64-FSGNJ-NEXT:    fcvt.s.h fa4, fa4
+; CHECK-RV64-FSGNJ-NEXT:    feq.s a0, fa5, fa4
+; CHECK-RV64-FSGNJ-NEXT:    addi sp, sp, 16
+; CHECK-RV64-FSGNJ-NEXT:    ret
 ; CHECK-ZHINXMIN-LABEL: fneg_s:
 ; CHECK-ZHINXMIN:       # %bb.0:
 ; CHECK-ZHINXMIN-NEXT:    fcvt.s.h a0, a0
@@ -771,9 +781,11 @@ define half @fsgnjn_s(half %a, half %b) nounwind {
 ; CHECK-RV32-FSGNJ-NEXT:    fcvt.s.h fa4, fa0
 ; CHECK-RV32-FSGNJ-NEXT:    fadd.s fa5, fa4, fa5
 ; CHECK-RV32-FSGNJ-NEXT:    fcvt.h.s fa5, fa5
-; CHECK-RV32-FSGNJ-NEXT:    fcvt.s.h fa5, fa5
-; CHECK-RV32-FSGNJ-NEXT:    fneg.s fa5, fa5
-; CHECK-RV32-FSGNJ-NEXT:    fcvt.h.s fa5, fa5
+; CHECK-RV32-FSGNJ-NEXT:    fsh fa5, 4(sp)
+; CHECK-RV32-FSGNJ-NEXT:    lbu a0, 5(sp)
+; CHECK-RV32-FSGNJ-NEXT:    xori a0, a0, 128
+; CHECK-RV32-FSGNJ-NEXT:    sb a0, 5(sp)
+; CHECK-RV32-FSGNJ-NEXT:    flh fa5, 4(sp)
 ; CHECK-RV32-FSGNJ-NEXT:    fsh fa0, 8(sp)
 ; CHECK-RV32-FSGNJ-NEXT:    fsh fa5, 12(sp)
 ; CHECK-RV32-FSGNJ-NEXT:    lbu a0, 9(sp)
@@ -788,24 +800,26 @@ define half @fsgnjn_s(half %a, half %b) nounwind {
 ;
 ; CHECK-RV64-FSGNJ-LABEL: fsgnjn_s:
 ; CHECK-RV64-FSGNJ:       # %bb.0:
-; CHECK-RV64-FSGNJ-NEXT:    addi sp, sp, -16
+; CHECK-RV64-FSGNJ-NEXT:    addi sp, sp, -32
 ; CHECK-RV64-FSGNJ-NEXT:    fcvt.s.h fa5, fa1
 ; CHECK-RV64-FSGNJ-NEXT:    fcvt.s.h fa4, fa0
 ; CHECK-RV64-FSGNJ-NEXT:    fadd.s fa5, fa4, fa5
 ; CHECK-RV64-FSGNJ-NEXT:    fcvt.h.s fa5, fa5
-; CHECK-RV64-FSGNJ-NEXT:    fcvt.s.h fa5, fa5
-; CHECK-RV64-FSGNJ-NEXT:    fneg.s fa5, fa5
-; CHECK-RV64-FSGNJ-NEXT:    fcvt.h.s fa5, fa5
-; CHECK-RV64-FSGNJ-NEXT:    fsh fa0, 0(sp)
 ; CHECK-RV64-FSGNJ-NEXT:    fsh fa5, 8(sp)
-; CHECK-RV64-FSGNJ-NEXT:    lbu a0, 1(sp)
-; CHECK-RV64-FSGNJ-NEXT:    lbu a1, 9(sp)
+; CHECK-RV64-FSGNJ-NEXT:    lbu a0, 9(sp)
+; CHECK-RV64-FSGNJ-NEXT:    xori a0, a0, 128
+; CHECK-RV64-FSGNJ-NEXT:    sb a0, 9(sp)
+; CHECK-RV64-FSGNJ-NEXT:    flh fa5, 8(sp)
+; CHECK-RV64-FSGNJ-NEXT:    fsh fa0, 16(sp)
+; CHECK-RV64-FSGNJ-NEXT:    fsh fa5, 24(sp)
+; CHECK-RV64-FSGNJ-NEXT:    lbu a0, 17(sp)
+; CHECK-RV64-FSGNJ-NEXT:    lbu a1, 25(sp)
 ; CHECK-RV64-FSGNJ-NEXT:    andi a0, a0, 127
 ; CHECK-RV64-FSGNJ-NEXT:    andi a1, a1, 128
 ; CHECK-RV64-FSGNJ-NEXT:    or a0, a0, a1
-; CHECK-RV64-FSGNJ-NEXT:    sb a0, 1(sp)
-; CHECK-RV64-FSGNJ-NEXT:    flh fa0, 0(sp)
-; CHECK-RV64-FSGNJ-NEXT:    addi sp, sp, 16
+; CHECK-RV64-FSGNJ-NEXT:    sb a0, 17(sp)
+; CHECK-RV64-FSGNJ-NEXT:    flh fa0, 16(sp)
+; CHECK-RV64-FSGNJ-NEXT:    addi sp, sp, 32
 ; CHECK-RV64-FSGNJ-NEXT:    ret
 ; CHECK-ZHINXMIN-LABEL: fsgnjn_s:
 ; CHECK-ZHINXMIN:       # %bb.0:
@@ -971,33 +985,43 @@ define half @fabs_s(half %a, half %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
 ;
-; CHECKIZFHMIN-LABEL: fabs_s:
-; CHECKIZFHMIN:       # %bb.0:
-; CHECKIZFHMIN-NEXT:    fcvt.s.h fa5, fa1
-; CHECKIZFHMIN-NEXT:    fcvt.s.h fa4, fa0
-; CHECKIZFHMIN-NEXT:    fadd.s fa5, fa4, fa5
-; CHECKIZFHMIN-NEXT:    fcvt.h.s fa5, fa5
-; CHECKIZFHMIN-NEXT:    fcvt.s.h fa5, fa5
-; CHECKIZFHMIN-NEXT:    fabs.s fa4, fa5
-; CHECKIZFHMIN-NEXT:    fcvt.h.s fa4, fa4
-; CHECKIZFHMIN-NEXT:    fcvt.s.h fa4, fa4
-; CHECKIZFHMIN-NEXT:    fadd.s fa5, fa4, fa5
-; CHECKIZFHMIN-NEXT:    fcvt.h.s fa0, fa5
-; CHECKIZFHMIN-NEXT:    ret
+; CHECK-RV32-FSGNJ-LABEL: fabs_s:
+; CHECK-RV32-FSGNJ:       # %bb.0:
+; CHECK-RV32-FSGNJ-NEXT:    addi sp, sp, -16
+; CHECK-RV32-FSGNJ-NEXT:    fcvt.s.h fa5, fa1
+; CHECK-RV32-FSGNJ-NEXT:    fcvt.s.h fa4, fa0
+; CHECK-RV32-FSGNJ-NEXT:    fadd.s fa5, fa4, fa5
+; CHECK-RV32-FSGNJ-NEXT:    fcvt.h.s fa5, fa5
+; CHECK-RV32-FSGNJ-NEXT:    fsh fa5, 12(sp)
+; CHECK-RV32-FSGNJ-NEXT:    lbu a0, 13(sp)
+; CHECK-RV32-FSGNJ-NEXT:    andi a0, a0, 127
+; CHECK-RV32-FSGNJ-NEXT:    sb a0, 13(sp)
+; CHECK-RV32-FSGNJ-NEXT:    flh fa4, 12(sp)
+; CHECK-RV32-FSGNJ-NEXT:    fcvt.s.h fa5, fa5
+; CHECK-RV32-FSGNJ-NEXT:    fcvt.s.h fa4, fa4
+; CHECK-RV32-FSGNJ-NEXT:    fadd.s fa5, fa4, fa5
+; CHECK-RV32-FSGNJ-NEXT:    fcvt.h.s fa0, fa5
+; CHECK-RV32-FSGNJ-NEXT:    addi sp, sp, 16
+; CHECK-RV32-FSGNJ-NEXT:    ret
 ;
-; CHECKZHINXMIN-LABEL: fabs_s:
-; CHECKZHINXMIN:       # %bb.0:
-; CHECKZHINXMIN-NEXT:    fcvt.s.h a1, a1
-; CHECKZHINXMIN-NEXT:    fcvt.s.h a0, a0
-; CHECKZHINXMIN-NEXT:    fadd.s a0, a0, a1
-; CHECKZHINXMIN-NEXT:    fcvt.h.s a0, a0
-; CHECKZHINXMIN-NEXT:    fcvt.s.h a0, a0
-; CHECKZHINXMIN-NEXT:    fabs.s a1, a0
-; CHECKZHINXMIN-NEXT:    fcvt.h.s a1, a1
-; CHECKZHINXMIN-NEXT:    fcvt.s.h a1, a1
-; CHECKZHINXMIN-NEXT:    fadd.s a0, a1, a0
-; CHECKZHINXMIN-NEXT:    fcvt.h.s a0, a0
-; CHECKZHINXMIN-NEXT:    ret
+; CHECK-RV64-FSGNJ-LABEL: fabs_s:
+; CHECK-RV64-FSGNJ:       # %bb.0:
+; CHECK-RV64-FSGNJ-NEXT:    addi sp, sp, -16
+; CHECK-RV64-FSGNJ-NEXT:    fcvt.s.h fa5, fa1
+; CHECK-RV64-FSGNJ-NEXT:    fcvt.s.h fa4, fa0
+; CHECK-RV64-FSGNJ-NEXT:    fadd.s fa5, fa4, fa5
+; CHECK-RV64-FSGNJ-NEXT:    fcvt.h.s fa5, fa5
+; CHECK-RV64-FSGNJ-NEXT:    fsh fa5, 8(sp)
+; CHECK-RV64-FSGNJ-NEXT:    lbu a0, 9(sp)
+; CHECK-RV64-FSGNJ-NEXT:    andi a0, a0, 127
+; CHECK-RV64-FSGNJ-NEXT:    sb a0, 9(sp)
+; CHECK-RV64-FSGNJ-NEXT:    flh fa4, 8(sp)
+; CHECK-RV64-FSGNJ-NEXT:    fcvt.s.h fa5, fa5
+; CHECK-RV64-FSGNJ-NEXT:    fcvt.s.h fa4, fa4
+; CHECK-RV64-FSGNJ-NEXT:    fadd.s fa5, fa4, fa5
+; CHECK-RV64-FSGNJ-NEXT:    fcvt.h.s fa0, fa5
+; CHECK-RV64-FSGNJ-NEXT:    addi sp, sp, 16
+; CHECK-RV64-FSGNJ-NEXT:    ret
 ; CHECK-ZHINXMIN-LABEL: fabs_s:
 ; CHECK-ZHINXMIN:       # %bb.0:
 ; CHECK-ZHINXMIN-NEXT:    fcvt.s.h a1, a1
@@ -1409,36 +1433,45 @@ define half @fmsub_s(half %a, half %b, half %c) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
-; CHECKIZFHMIN-LABEL: fmsub_s:
-; CHECKIZFHMIN:       # %bb.0:
-; CHECKIZFHMIN-NEXT:    fcvt.s.h fa5, fa2
-; CHECKIZFHMIN-NEXT:    fmv.w.x fa4, zero
-; CHECKIZFHMIN-NEXT:    fadd.s fa5, fa5, fa4
-; CHECKIZFHMIN-NEXT:    fcvt.h.s fa5, fa5
-; CHECKIZFHMIN-NEXT:    fcvt.s.h fa5, fa5
-; CHECKIZFHMIN-NEXT:    fneg.s fa5, fa5
-; CHECKIZFHMIN-NEXT:    fcvt.h.s fa5, fa5
-; CHECKIZFHMIN-NEXT:    fcvt.s.h fa5, fa5
-; CHECKIZFHMIN-NEXT:    fcvt.s.h fa4, fa1
-; CHECKIZFHMIN-NEXT:    fcvt.s.h fa3, fa0
-; CHECKIZFHMIN-NEXT:    fmadd.s fa5, fa3, fa4, fa5
-; CHECKIZFHMIN-NEXT:    fcvt.h.s fa0, fa5
-; CHECKIZFHMIN-NEXT:    ret
+; CHECK-RV32-FSGNJ-LABEL: fmsub_s:
+; CHECK-RV32-FSGNJ:       # %bb.0:
+; CHECK-RV32-FSGNJ-NEXT:    addi sp, sp, -16
+; CHECK-RV32-FSGNJ-NEXT:    fcvt.s.h fa5, fa2
+; CHECK-RV32-FSGNJ-NEXT:    fmv.w.x fa4, zero
+; CHECK-RV32-FSGNJ-NEXT:    fadd.s fa5, fa5, fa4
+; CHECK-RV32-FSGNJ-NEXT:    fcvt.h.s fa5, fa5
+; CHECK-RV32-FSGNJ-NEXT:    fsh fa5, 12(sp)
+; CHECK-RV32-FSGNJ-NEXT:    lbu a0, 13(sp)
+; CHECK-RV32-FSGNJ-NEXT:    xori a0, a0, 128
+; CHECK-RV32-FSGNJ-NEXT:    sb a0, 13(sp)
+; CHECK-RV32-FSGNJ-NEXT:    flh fa5, 12(sp)
+; CHECK-RV32-FSGNJ-NEXT:    fcvt.s.h fa4, fa1
+; CHECK-RV32-FSGNJ-NEXT:    fcvt.s.h fa3, fa0
+; CHECK-RV32-FSGNJ-NEXT:    fcvt.s.h fa5, fa5
+; CHECK-RV32-FSGNJ-NEXT:    fmadd.s fa5, fa3, fa4, fa5
+; CHECK-RV32-FSGNJ-NEXT:    fcvt.h.s fa0, fa5
+; CHECK-RV32-FSGNJ-NEXT:    addi sp, sp, 16
+; CHECK-RV32-FSGNJ-NEXT:    ret
 ;
-; CHECKZHINXMIN-LABEL: fmsub_s:
-; CHECKZHINXMIN:       # %bb.0:
-; CHECKZHINXMIN-NEXT:    fcvt.s.h a2, a2
-; CHECKZHINXMIN-NEXT:    fadd.s a2, a2, zero
-; CHECKZHINXMIN-NEXT:    fcvt.h.s a2, a2
-; CHECKZHINXMIN-NEXT:    fcvt.s.h a2, a2
-; CHECKZHINXMIN-NEXT:    fneg.s a2, a2
-; CHECKZHINXMIN-NEXT:    fcvt.h.s a2, a2
-; CHECKZHINXMIN-NEXT:    fcvt.s.h a2, a2
-; CHECKZHINXMIN-NEXT:    fcvt.s.h a1, a1
-; CHECKZHINXMIN-NEXT:    fcvt.s.h a0, a0
-; CHECKZHINXMIN-NEXT:    fmadd.s a0, a0, a1, a2
-; CHECKZHINXMIN-NEXT:    fcvt.h.s a0, a0
-; CHECKZHINXMIN-NEXT:    ret
+; CHECK-RV64-FSGNJ-LABEL: fmsub_s:
+; CHECK-RV64-FSGNJ:       # %bb.0:
+; CHECK-RV64-FSGNJ-NEXT:    addi sp, sp, -16
+; CHECK-RV64-FSGNJ-NEXT:    fcvt.s.h fa5, fa2
+; CHECK-RV64-FSGNJ-NEXT:    fmv.w.x fa4, zero
+; CHECK-RV64-FSGNJ-NEXT:    fadd.s fa5, fa5, fa4
+; CHECK-RV64-FSGNJ-NEXT:    fcvt.h.s fa5, fa5
+; CHECK-RV64-FSGNJ-NEXT:    fsh fa5, 8(sp)
+; CHECK-RV64-FSGNJ-NEXT:    lbu a0, 9(sp)
+; CHECK-RV64-FSGNJ-NEXT:    xori a0, a0, 128
+; CHECK-RV64-FSGNJ-NEXT:    sb a0, 9(sp)
+; CHECK-RV64-FSGNJ-NEXT:    flh fa5, 8(sp)
+; CHECK-RV64-FSGNJ-NEXT:    fcvt.s.h fa4, fa1
+; CHECK-RV64-FSGNJ-NEXT:    fcvt.s.h fa3, fa0
+; CHECK-RV64-FSGNJ-NEXT:    fcvt.s.h fa5, fa5
+; CHECK-RV64-FSGNJ-NEXT:    fmadd.s fa5, fa3, fa4, fa5
+; CHECK-RV64-FSGNJ-NEXT:    fcvt.h.s fa0, fa5
+; CHECK-RV64-FSGNJ-NEXT:    addi sp, sp, 16
+; CHECK-RV64-FSGNJ-NEXT:    ret
 ; CHECK-ZHINXMIN-LABEL: fmsub_s:
 ; CHECK-ZHINXMIN:       # %bb.0:
 ; CHECK-ZHINXMIN-NEXT:    fcvt.s.h a2, a2
@@ -1591,48 +1624,61 @@ define half @fnmadd_s(half %a, half %b, half %c) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
-; CHECKIZFHMIN-LABEL: fnmadd_s:
-; CHECKIZFHMIN:       # %bb.0:
-; CHECKIZFHMIN-NEXT:    fcvt.s.h fa5, fa0
-; CHECKIZFHMIN-NEXT:    fmv.w.x fa4, zero
-; CHECKIZFHMIN-NEXT:    fadd.s fa5, fa5, fa4
-; CHECKIZFHMIN-NEXT:    fcvt.h.s fa5, fa5
-; CHECKIZFHMIN-NEXT:    fcvt.s.h fa3, fa2
-; CHECKIZFHMIN-NEXT:    fadd.s fa4, fa3, fa4
-; CHECKIZFHMIN-NEXT:    fcvt.h.s fa4, fa4
-; CHECKIZFHMIN-NEXT:    fcvt.s.h fa5, fa5
-; CHECKIZFHMIN-NEXT:    fneg.s fa5, fa5
-; CHECKIZFHMIN-NEXT:    fcvt.h.s fa5, fa5
-; CHECKIZFHMIN-NEXT:    fcvt.s.h fa4, fa4
-; CHECKIZFHMIN-NEXT:    fneg.s fa4, fa4
-; CHECKIZFHMIN-NEXT:    fcvt.h.s fa4, fa4
-; CHECKIZFHMIN-NEXT:    fcvt.s.h fa4, fa4
-; CHECKIZFHMIN-NEXT:    fcvt.s.h fa5, fa5
-; CHECKIZFHMIN-NEXT:    fcvt.s.h fa3, fa1
-; CHECKIZFHMIN-NEXT:    fmadd.s fa5, fa5, fa3, fa4
-; CHECKIZFHMIN-NEXT:    fcvt.h.s fa0, fa5
-; CHECKIZFHMIN-NEXT:    ret
+; CHECK-RV32-FSGNJ-LABEL: fnmadd_s:
+; CHECK-RV32-FSGNJ:       # %bb.0:
+; CHECK-RV32-FSGNJ-NEXT:    addi sp, sp, -16
+; CHECK-RV32-FSGNJ-NEXT:    fcvt.s.h fa5, fa0
+; CHECK-RV32-FSGNJ-NEXT:    fmv.w.x fa4, zero
+; CHECK-RV32-FSGNJ-NEXT:    fadd.s fa5, fa5, fa4
+; CHECK-RV32-FSGNJ-NEXT:    fcvt.h.s fa5, fa5
+; CHECK-RV32-FSGNJ-NEXT:    fsh fa5, 8(sp)
+; CHECK-RV32-FSGNJ-NEXT:    lbu a0, 9(sp)
+; CHECK-RV32-FSGNJ-NEXT:    fcvt.s.h fa5, fa2
+; CHECK-RV32-FSGNJ-NEXT:    fadd.s fa5, fa5, fa4
+; CHECK-RV32-FSGNJ-NEXT:    fcvt.h.s fa5, fa5
+; CHECK-RV32-FSGNJ-NEXT:    xori a0, a0, 128
+; CHECK-RV32-FSGNJ-NEXT:    sb a0, 9(sp)
+; CHECK-RV32-FSGNJ-NEXT:    flh fa4, 8(sp)
+; CHECK-RV32-FSGNJ-NEXT:    fsh fa5, 12(sp)
+; CHECK-RV32-FSGNJ-NEXT:    lbu a0, 13(sp)
+; CHECK-RV32-FSGNJ-NEXT:    xori a0, a0, 128
+; CHECK-RV32-FSGNJ-NEXT:    sb a0, 13(sp)
+; CHECK-RV32-FSGNJ-NEXT:    flh fa5, 12(sp)
+; CHECK-RV32-FSGNJ-NEXT:    fcvt.s.h fa3, fa1
+; CHECK-RV32-FSGNJ-NEXT:    fcvt.s.h fa5, fa5
+; CHECK-RV32-FSGNJ-NEXT:    fcvt.s.h fa4, fa4
+; CHECK-RV32-FSGNJ-NEXT:    fmadd.s fa5, fa4, fa3, fa5
+; CHECK-RV32-FSGNJ-NEXT:    fcvt.h.s fa0, fa5
+; CHECK-RV32-FSGNJ-NEXT:    addi sp, sp, 16
+; CHECK-RV32-FSGNJ-NEXT:    ret
 ;
-; CHECKZHINXMIN-LABEL: fnmadd_s:
-; CHECKZHINXMIN:       # %bb.0:
-; CHECKZHINXMIN-NEXT:    fcvt.s.h a0, a0
-; CHECKZHINXMIN-NEXT:    fadd.s a0, a0, zero
-; CHECKZHINXMIN-NEXT:    fcvt.h.s a0, a0
-; CHECKZHINXMIN-NEXT:    fcvt.s.h a2, a2
-; CHECKZHINXMIN-NEXT:    fadd.s a2, a2, zero
-; CHECKZHINXMIN-NEXT:    fcvt.h.s a2, a2
-; CHECKZHINXMIN-NEXT:    fcvt.s.h a0, a0
-; CHECKZHINXMIN-NEXT:    fneg.s a0, a0
-; CHECKZHINXMIN-NEXT:    fcvt.h.s a0, a0
-; CHECKZHINXMIN-NEXT:    fcvt.s.h a2, a2
-; CHECKZHINXMIN-NEXT:    fneg.s a2, a2
-; CHECKZHINXMIN-NEXT:    fcvt.h.s a2, a2
-; CHECKZHINXMIN-NEXT:    fcvt.s.h a2, a2
-; CHECKZHINXMIN-NEXT:    fcvt.s.h a0, a0
-; CHECKZHINXMIN-NEXT:    fcvt.s.h a1, a1
-; CHECKZHINXMIN-NEXT:    fmadd.s a0, a0, a1, a2
-; CHECKZHINXMIN-NEXT:    fcvt.h.s a0, a0
-; CHECKZHINXMIN-NEXT:    ret
+; CHECK-RV64-FSGNJ-LABEL: fnmadd_s:
+; CHECK-RV64-FSGNJ:       # %bb.0:
+; CHECK-RV64-FSGNJ-NEXT:    addi sp, sp, -16
+; CHECK-RV64-FSGNJ-NEXT:    fcvt.s.h fa5, fa0
+; CHECK-RV64-FSGNJ-NEXT:    fmv.w.x fa4, zero
+; CHECK-RV64-FSGNJ-NEXT:    fadd.s fa5, fa5, fa4
+; CHECK-RV64-FSGNJ-NEXT:    fcvt.h.s fa5, fa5
+; CHECK-RV64-FSGNJ-NEXT:    fsh fa5, 0(sp)
+; CHECK-RV64-FSGNJ-NEXT:    lbu a0, 1(sp)
+; CHECK-RV64-FSGNJ-NEXT:    fcvt.s.h fa5, fa2
+; CHECK-RV64-FSGNJ-NEXT:    fadd.s fa5, fa5, fa4
+; CHECK-RV64-FSGNJ-NEXT:    fcvt.h.s fa5, fa5
+; CHECK-RV64-FSGNJ-NEXT:    xori a0, a0, 128
+; CHECK-RV64-FSGNJ-NEXT:    sb a0, 1(sp)
+; CHECK-RV64-FSGNJ-NEXT:    flh fa4, 0(sp)
+; CHECK-RV64-FSGNJ-NEXT:    fsh fa5, 8(sp)
+; CHECK-RV64-FSGNJ-NEXT:    lbu a0, 9(sp)
+; CHECK-RV64-FSGNJ-NEXT:    xori a0, a0, 128
+; CHECK-RV64-FSGNJ-NEXT:    sb a0, 9(sp)
+; CHECK-RV64-FSGNJ-NEXT:    flh fa5, 8(sp)
+; CHECK-RV64-FSGNJ-NEXT:    fcvt.s.h fa3, fa1
+; CHECK-RV64-FSGNJ-NEXT:    fcvt.s.h fa5, fa5
+; CHECK-RV64-FSGNJ-NEXT:    fcvt.s.h fa4, fa4
+; CHECK-RV64-FSGNJ-NEXT:    fmadd.s fa5, fa4, fa3, fa5
+; CHECK-RV64-FSGNJ-NEXT:    fcvt.h.s fa0, fa5
+; CHECK-RV64-FSGNJ-NEXT:    addi sp, sp, 16
+; CHECK-RV64-FSGNJ-NEXT:    ret
 ; CHECK-ZHINXMIN-LABEL: fnmadd_s:
 ; CHECK-ZHINXMIN:       # %bb.0:
 ; CHECK-ZHINXMIN-NEXT:    fcvt.s.h a0, a0
@@ -1793,48 +1839,61 @@ define half @fnmadd_s_2(half %a, half %b, half %c) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
-; CHECKIZFHMIN-LABEL: fnmadd_s_2:
-; CHECKIZFHMIN:       # %bb.0:
-; CHECKIZFHMIN-NEXT:    fcvt.s.h fa5, fa1
-; CHECKIZFHMIN-NEXT:    fmv.w.x fa4, zero
-; CHECKIZFHMIN-NEXT:    fadd.s fa5, fa5, fa4
-; CHECKIZFHMIN-NEXT:    fcvt.h.s fa5, fa5
-; CHECKIZFHMIN-NEXT:    fcvt.s.h fa3, fa2
-; CHECKIZFHMIN-NEXT:    fadd.s fa4, fa3, fa4
-; CHECKIZFHMIN-NEXT:    fcvt.h.s fa4, fa4
-; CHECKIZFHMIN-NEXT:    fcvt.s.h fa5, fa5
-; CHECKIZFHMIN-NEXT:    fneg.s fa5, fa5
-; CHECKIZFHMIN-NEXT:    fcvt.h.s fa5, fa5
-; CHECKIZFHMIN-NEXT:    fcvt.s.h fa4, fa4
-; CHECKIZFHMIN-NEXT:    fneg.s fa4, fa4
-; CHECKIZFHMIN-NEXT:    fcvt.h.s fa4, fa4
-; CHECKIZFHMIN-NEXT:    fcvt.s.h fa4, fa4
-; CHECKIZFHMIN-NEXT:    fcvt.s.h fa5, fa5
-; CHECKIZFHMIN-NEXT:    fcvt.s.h fa3, fa0
-; CHECKIZFHMIN-NEXT:    fmadd.s fa5, fa3, fa5, fa4
-; CHECKIZFHMIN-NEXT:    fcvt.h.s fa0, fa5
-; CHECKIZFHMIN-NEXT:    ret
+; CHECK-RV32-FSGNJ-LABEL: fnmadd_s_2:
+; CHECK-RV32-FSGNJ:       # %bb.0:
+; CHECK-RV32-FSGNJ-NEXT:    addi sp, sp, -16
+; CHECK-RV32-FSGNJ-NEXT:    fcvt.s.h fa5, fa1
+; CHECK-RV32-FSGNJ-NEXT:    fmv.w.x fa4, zero
+; CHECK-RV32-FSGNJ-NEXT:    fadd.s fa5, fa5, fa4
+; CHECK-RV32-FSGNJ-NEXT:    fcvt.h.s fa5, fa5
+; CHECK-RV32-FSGNJ-NEXT:    fsh fa5, 8(sp)
+; CHECK-RV32-FSGNJ-NEXT:    lbu a0, 9(sp)
+; CHECK-RV32-FSGNJ-NEXT:    fcvt.s.h fa5, fa2
+; CHECK-RV32-FSGNJ-NEXT:    fadd.s fa5, fa5, fa4
+; CHECK-RV32-FSGNJ-NEXT:    fcvt.h.s fa5, fa5
+; CHECK-RV32-FSGNJ-NEXT:    xori a0, a0, 128
+; CHECK-RV32-FSGNJ-NEXT:    sb a0, 9(sp)
+; CHECK-RV32-FSGNJ-NEXT:    flh fa4, 8(sp)
+; CHECK-RV32-FSGNJ-NEXT:    fsh fa5, 12(sp)
+; CHECK-RV32-FSGNJ-NEXT:    lbu a0, 13(sp)
+; CHECK-RV32-FSGNJ-NEXT:    xori a0, a0, 128
+; CHECK-RV32-FSGNJ-NEXT:    sb a0, 13(sp)
+; CHECK-RV32-FSGNJ-NEXT:    flh fa5, 12(sp)
+; CHECK-RV32-FSGNJ-NEXT:    fcvt.s.h fa3, fa0
+; CHECK-RV32-FSGNJ-NEXT:    fcvt.s.h fa5, fa5
+; CHECK-RV32-FSGNJ-NEXT:    fcvt.s.h fa4, fa4
+; CHECK-RV32-FSGNJ-NEXT:    fmadd.s fa5, fa3, fa4, fa5
+; CHECK-RV32-FSGNJ-NEXT:    fcvt.h.s fa0, fa5
+; CHECK-RV32-FSGNJ-NEXT:    addi sp, sp, 16
+; CHECK-RV32-FSGNJ-NEXT:    ret
 ;
-; CHECKZHINXMIN-LABEL: fnmadd_s_2:
-; CHECKZHINXMIN:       # %bb.0:
-; CHECKZHINXMIN-NEXT:    fcvt.s.h a1, a1
-; CHECKZHINXMIN-NEXT:    fadd.s a1, a1, zero
-; CHECKZHINXMIN-NEXT:    fcvt.h.s a1, a1
-; CHECKZHINXMIN-NEXT:    fcvt.s.h a2, a2
-; CHECKZHINXMIN-NEXT:    fadd.s a2, a2, zero
-; CHECKZHINXMIN-NEXT:    fcvt.h.s a2, a2
-; CHECKZHINXMIN-NEXT:    fcvt.s.h a1, a1
-; CHECKZHINXMIN-NEXT:    fneg.s a1, a1
-; CHECKZHINXMIN-NEXT:    fcvt.h.s a1, a1
-; CHECKZHINXMIN-NEXT:    fcvt.s.h a2, a2
-; CHECKZHINXMIN-NEXT:    fneg.s a2, a2
-; CHECKZHINXMIN-NEXT:    fcvt.h.s a2, a2
-; CHECKZHINXMIN-NEXT:    fcvt.s.h a2, a2
-; CHECKZHINXMIN-NEXT:    fcvt.s.h a1, a1
-; CHECKZHINXMIN-NEXT:    fcvt.s.h a0, a0
-; CHECKZHINXMIN-NEXT:    fmadd.s a0, a0, a1, a2
-; CHECKZHINXMIN-NEXT:    fcvt.h.s a0, a0
-; CHECKZHINXMIN-NEXT:    ret
+; CHECK-RV64-FSGNJ-LABEL: fnmadd_s_2:
+; CHECK-RV64-FSGNJ:       # %bb.0:
+; CHECK-RV64-FSGNJ-NEXT:    addi sp, sp, -16
+; CHECK-RV64-FSGNJ-NEXT:    fcvt.s.h fa5, fa1
+; CHECK-RV64-FSGNJ-NEXT:    fmv.w.x fa4, zero
+; CHECK-RV64-FSGNJ-NEXT:    fadd.s fa5, fa5, fa4
+; CHECK-RV64-FSGNJ-NEXT:    fcvt.h.s fa5, fa5
+; CHECK-RV64-FSGNJ-NEXT:    fsh fa5, 0(sp)
+; CHECK-RV64-FSGNJ-NEXT:    lbu a0, 1(sp)
+; CHECK-RV64-FSGNJ-NEXT:    fcvt.s.h fa5, fa2
+; CHECK-RV64-FSGNJ-NEXT:    fadd.s fa5, fa5, fa4
+; CHECK-RV64-FSGNJ-NEXT:    fcvt.h.s fa5, fa5
+; CHECK-RV64-FSGNJ-NEXT:    xori a0, a0, 128
+; CHECK-RV64-FSGNJ-NEXT:    sb a0, 1(sp)
+; CHECK-RV64-FSGNJ-NEXT:    flh fa4, 0(sp)
+; CHECK-RV64-FSGNJ-NEXT:    fsh fa5, 8(sp)
+; CHECK-RV64-FSGNJ-NEXT:    lbu a0, 9(sp)
+; CHECK-RV64-FSGNJ-NEXT:    xori a0, a0, 128
+; CHECK-RV64-FSGNJ-NEXT:    sb a0, 9(sp)
+; CHECK-RV64-FSGNJ-NEXT:    flh fa5, 8(sp)
+; CHECK-RV64-FSGNJ-NEXT:    fcvt.s.h fa3, fa0
+; CHECK-RV64-FSGNJ-NEXT:    fcvt.s.h fa5, fa5
+; CHECK-RV64-FSGNJ-NEXT:    fcvt.s.h fa4, fa4
+; CHECK-RV64-FSGNJ-NEXT:    fmadd.s fa5, fa3, fa4, fa5
+; CHECK-RV64-FSGNJ-NEXT:    fcvt.h.s fa0, fa5
+; CHECK-RV64-FSGNJ-NEXT:    addi sp, sp, 16
+; CHECK-RV64-FSGNJ-NEXT:    ret
 ; CHECK-ZHINXMIN-LABEL: fnmadd_s_2:
 ; CHECK-ZHINXMIN:       # %bb.0:
 ; CHECK-ZHINXMIN-NEXT:    fcvt.s.h a1, a1
@@ -1959,17 +2018,37 @@ define half @fnmadd_s_3(half %a, half %b, half %c) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
-; CHECKIZFHMIN-LABEL: fnmadd_s_3:
-; CHECKIZFHMIN:       # %bb.0:
-; CHECKIZFHMIN-NEXT:    fcvt.s.h fa5, fa2
-; CHECKIZFHMIN-NEXT:    fcvt.s.h fa4, fa1
-; CHECKIZFHMIN-NEXT:    fcvt.s.h fa3, fa0
-; CHECKIZFHMIN-NEXT:    fmadd.s fa5, fa3, fa4, fa5
-; CHECKIZFHMIN-NEXT:    fcvt.h.s fa5, fa5
-; CHECKIZFHMIN-NEXT:    fcvt.s.h fa5, fa5
-; CHECKIZFHMIN-NEXT:    fneg.s fa5, fa5
-; CHECKIZFHMIN-NEXT:    fcvt.h.s fa0, fa5
-; CHECKIZFHMIN-NEXT:    ret
+; CHECK-RV32-FSGNJ-LABEL: fnmadd_s_3:
+; CHECK-RV32-FSGNJ:       # %bb.0:
+; CHECK-RV32-FSGNJ-NEXT:    addi sp, sp, -16
+; CHECK-RV32-FSGNJ-NEXT:    fcvt.s.h fa5, fa2
+; CHECK-RV32-FSGNJ-NEXT:    fcvt.s.h fa4, fa1
+; CHECK-RV32-FSGNJ-NEXT:    fcvt.s.h fa3, fa0
+; CHECK-RV32-FSGNJ-NEXT:    fmadd.s fa5, fa3, fa4, fa5
+; CHECK-RV32-FSGNJ-NEXT:    fcvt.h.s fa5, fa5
+; CHECK-RV32-FSGNJ-NEXT:    fsh fa5, 12(sp)
+; CHECK-RV32-FSGNJ-NEXT:    lbu a0, 13(sp)
+; CHECK-RV32-FSGNJ-NEXT:    xori a0, a0, 128
+; CHECK-RV32-FSGNJ-NEXT:    sb a0, 13(sp)
+; CHECK-RV32-FSGNJ-NEXT:    flh fa0, 12(sp)
+; CHECK-RV32-FSGNJ-NEXT:    addi sp, sp, 16
+; CHECK-RV32-FSGNJ-NEXT:    ret
+;
+; CHECK-RV64-FSGNJ-LABEL: fnmadd_s_3:
+; CHECK-RV64-FSGNJ:       # %bb.0:
+; CHECK-RV64-FSGNJ-NEXT:    addi sp, sp, -16
+; CHECK-RV64-FSGNJ-NEXT:    fcvt.s.h fa5, fa2
+; CHECK-RV64-FSGNJ-NEXT:    fcvt.s.h fa4, fa1
+; CHECK-RV64-FSGNJ-NEXT:    fcvt.s.h fa3, fa0
+; CHECK-RV64-FSGNJ-NEXT:    fmadd.s fa5, fa3, fa4, fa5
+; CHECK-RV64-FSGNJ-NEXT:    fcvt.h.s fa5, fa5
+; CHECK-RV64-FSGNJ-NEXT:    fsh fa5, 8(sp)
+; CHECK-RV64-FSGNJ-NEXT:    lbu a0, 9(sp)
+; CHECK-RV64-FSGNJ-NEXT:    xori a0, a0, 128
+; CHECK-RV64-FSGNJ-NEXT:    sb a0, 9(sp)
+; CHECK-RV64-FSGNJ-NEXT:    flh fa0, 8(sp)
+; CHECK-RV64-FSGNJ-NEXT:    addi sp, sp, 16
+; CHECK-RV64-FSGNJ-NEXT:    ret
 ;
 ; CHECKZHINXMIN-LABEL: fnmadd_s_3:
 ; CHECKZHINXMIN:       # %bb.0:
@@ -2090,17 +2169,37 @@ define half @fnmadd_nsz(half %a, half %b, half %c) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
-; CHECKIZFHMIN-LABEL: fnmadd_nsz:
-; CHECKIZFHMIN:       # %bb.0:
-; CHECKIZFHMIN-NEXT:    fcvt.s.h fa5, fa2
-; CHECKIZFHMIN-NEXT:    fcvt.s.h fa4, fa1
-; CHECKIZFHMIN-NEXT:    fcvt.s.h fa3, fa0
-; CHECKIZFHMIN-NEXT:    fmadd.s fa5, fa3, fa4, fa5
-; CHECKIZFHMIN-NEXT:    fcvt.h.s fa5, fa5
-; CHECKIZFHMIN-NEXT:    fcvt.s.h fa5, fa5
-; CHECKIZFHMIN-NEXT:    fneg.s fa5, fa5
-; CHECKIZFHMIN-NEXT:    fcvt.h.s fa0, fa5
-; CHECKIZFHMIN-NEXT:    ret
+; CHECK-RV32-FSGNJ-LABEL: fnmadd_nsz:
+; CHECK-RV32-FSGNJ:       # %bb.0:
+; CHECK-RV32-FSGNJ-NEXT:    addi sp, sp, -16
+; CHECK-RV32-FSGNJ-NEXT:    fcvt.s.h fa5, fa2
+; CHECK-RV32-FSGNJ-NEXT:    fcvt.s.h fa4, fa1
+; CHECK-RV32-FSGNJ-NEXT:    fcvt.s.h fa3, fa0
+; CHECK-RV32-FSGNJ-NEXT:    fmadd.s fa5, fa3, fa4, fa5
+; CHECK-RV32-FSGNJ-NEXT:    fcvt.h.s fa5, fa5
+; CHECK-RV32-FSGNJ-NEXT:    fsh fa5, 12(sp)
+; CHECK-RV32-FSGNJ-NEXT:    lbu a0, 13(sp)
+; CHECK-RV32-FSGNJ-NEXT:    xori a0, a0, 128
+; CHECK-RV32-FSGNJ-NEXT:    sb a0, 13(sp)
+; CHECK-RV32-FSGNJ-NEXT:    flh fa0, 12(sp)
+; CHECK-RV32-FSGNJ-NEXT:    addi sp, sp, 16
+; CHECK-RV32-FSGNJ-NEXT:    ret
+;
+; CHECK-RV64-FSGNJ-LABEL: fnmadd_nsz:
+; CHECK-RV64-FSGNJ:       # %bb.0:
+; CHECK-RV64-FSGNJ-NEXT:    addi sp, sp, -16
+; CHECK-RV64-FSGNJ-NEXT:    fcvt.s.h fa5, fa2
+; CHECK-RV64-FSGNJ-NEXT:    fcvt.s.h fa4, fa1
+; CHECK-RV64-FSGNJ-NEXT:    fcvt.s.h fa3, fa0
+; CHECK-RV64-FSGNJ-NEXT:    fmadd.s fa5, fa3, fa4, fa5
+; CHECK-RV64-FSGNJ-NEXT:    fcvt.h.s fa5, fa5
+; CHECK-RV64-FSGNJ-NEXT:    fsh fa5, 8(sp)
+; CHECK-RV64-FSGNJ-NEXT:    lbu a0, 9(sp)
+; CHECK-RV64-FSGNJ-NEXT:    xori a0, a0, 128
+; CHECK-RV64-FSGNJ-NEXT:    sb a0, 9(sp)
+; CHECK-RV64-FSGNJ-NEXT:    flh fa0, 8(sp)
+; CHECK-RV64-FSGNJ-NEXT:    addi sp, sp, 16
+; CHECK-RV64-FSGNJ-NEXT:    ret
 ;
 ; CHECKZHINXMIN-LABEL: fnmadd_nsz:
 ; CHECKZHINXMIN:       # %bb.0:
@@ -2227,36 +2326,45 @@ define half @fnmsub_s(half %a, half %b, half %c) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
-; CHECKIZFHMIN-LABEL: fnmsub_s:
-; CHECKIZFHMIN:       # %bb.0:
-; CHECKIZFHMIN-NEXT:    fcvt.s.h fa5, fa0
-; CHECKIZFHMIN-NEXT:    fmv.w.x fa4, zero
-; CHECKIZFHMIN-NEXT:    fadd.s fa5, fa5, fa4
-; CHECKIZFHMIN-NEXT:    fcvt.h.s fa5, fa5
-; CHECKIZFHMIN-NEXT:    fcvt.s.h fa5, fa5
-; CHECKIZFHMIN-NEXT:    fneg.s fa5, fa5
-; CHECKIZFHMIN-NEXT:    fcvt.h.s fa5, fa5
-; CHECKIZFHMIN-NEXT:    fcvt.s.h fa5, fa5
-; CHECKIZFHMIN-NEXT:    fcvt.s.h fa4, fa2
-; CHECKIZFHMIN-NEXT:    fcvt.s.h fa3, fa1
-; CHECKIZFHMIN-NEXT:    fmadd.s fa5, fa5, fa3, fa4
-; CHECKIZFHMIN-NEXT:    fcvt.h.s fa0, fa5
-; CHECKIZFHMIN-NEXT:    ret
+; CHECK-RV32-FSGNJ-LABEL: fnmsub_s:
+; CHECK-RV32-FSGNJ:       # %bb.0:
+; CHECK-RV32-FSGNJ-NEXT:    addi sp, sp, -16
+; CHECK-RV32-FSGNJ-NEXT:    fcvt.s.h fa5, fa0
+; CHECK-RV32-FSGNJ-NEXT:    fmv.w.x fa4, zero
+; CHECK-RV32-FSGNJ-NEXT:    fadd.s fa5, fa5, fa4
+; CHECK-RV32-FSGNJ-NEXT:    fcvt.h.s fa5, fa5
+; CHECK-RV32-FSGNJ-NEXT:    fsh fa5, 12(sp)
+; CHECK-RV32-FSGNJ-NEXT:    lbu a0, 13(sp)
+; CHECK-RV32-FSGNJ-NEXT:    xori a0, a0, 128
+; CHECK-RV32-FSGNJ-NEXT:    sb a0, 13(sp)
+; CHECK-RV32-FSGNJ-NEXT:    flh fa5, 12(sp)
+; CHECK-RV32-FSGNJ-NEXT:    fcvt.s.h fa4, fa2
+; CHECK-RV32-FSGNJ-NEXT:    fcvt.s.h fa3, fa1
+; CHECK-RV32-FSGNJ-NEXT:    fcvt.s.h fa5, fa5
+; CHECK-RV32-FSGNJ-NEXT:    fmadd.s fa5, fa5, fa3, fa4
+; CHECK-RV32-FSGNJ-NEXT:    fcvt.h.s fa0, fa5
+; CHECK-RV32-FSGNJ-NEXT:    addi sp, sp, 16
+; CHECK-RV32-FSGNJ-NEXT:    ret
 ;
-; CHECKZHINXMIN-LABEL: fnmsub_s:
-; CHECKZHINXMIN:       # %bb.0:
-; CHECKZHINXMIN-NEXT:    fcvt.s.h a0, a0
-; CHECKZHINXMIN-NEXT:    fadd.s a0, a0, zero
-; CHECKZHINXMIN-NEXT:    fcvt.h.s a0, a0
-; CHECKZHINXMIN-NEXT:    fcvt.s.h a0, a0
-; CHECKZHINXMIN-NEXT:    fneg.s a0, a0
-; CHECKZHINXMIN-NEXT:    fcvt.h.s a0, a0
-; CHECKZHINXMIN-NEXT:    fcvt.s.h a0, a0
-; CHECKZHINXMIN-NEXT:    fcvt.s.h a2, a2
-; CHECKZHINXMIN-NEXT:    fcvt.s.h a1, a1
-; CHECKZHINXMIN-NEXT:    fmadd.s a0, a0, a1, a2
-; CHECKZHINXMIN-NEXT:    fcvt.h.s a0, a0
-; CHECKZHINXMIN-NEXT:    ret
+; CHECK-RV64-FSGNJ-LABEL: fnmsub_s:
+; CHECK-RV64-FSGNJ:       # %bb.0:
+; CHECK-RV64-FSGNJ-NEXT:    addi sp, sp, -16
+; CHECK-RV64-FSGNJ-NEXT:    fcvt.s.h fa5, fa0
+; CHECK-RV64-FSGNJ-NEXT:    fmv.w.x fa4, zero
+; CHECK-RV64-FSGNJ-NEXT:    fadd.s fa5, fa5, fa4
+; CHECK-RV64-FSGNJ-NEXT:    fcvt.h.s fa5, fa5
+; CHECK-RV64-FSGNJ-NEXT:    fsh fa5, 8(sp)
+; CHECK-RV64-FSGNJ-NEXT:    lbu a0, 9(sp)
+; CHECK-RV64-FSGNJ-NEXT:    xori a0, a0, 128
+; CHECK-RV64-FSGNJ-NEXT:    sb a0, 9(sp)
+; CHECK-RV64-FSGNJ-NEXT:    flh fa5, 8(sp)
+; CHECK-RV64-FSGNJ-NEXT:    fcvt.s.h fa4, fa2
+; CHECK-RV64-FSGNJ-NEXT:    fcvt.s.h fa3, fa1
+; CHECK-RV64-FSGNJ-NEXT:    fcvt.s.h fa5, fa5
+; CHECK-RV64-FSGNJ-NEXT:    fmadd.s fa5, fa5, fa3, fa4
+; CHECK-RV64-FSGNJ-NEXT:    fcvt.h.s fa0, fa5
+; CHECK-RV64-FSGNJ-NEXT:    addi sp, sp, 16
+; CHECK-RV64-FSGNJ-NEXT:    ret
 ; CHECK-ZHINXMIN-LABEL: fnmsub_s:
 ; CHECK-ZHINXMIN:       # %bb.0:
 ; CHECK-ZHINXMIN-NEXT:    fcvt.s.h a0, a0
@@ -2379,36 +2487,45 @@ define half @fnmsub_s_2(half %a, half %b, half %c) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
-; CHECKIZFHMIN-LABEL: fnmsub_s_2:
-; CHECKIZFHMIN:       # %bb.0:
-; CHECKIZFHMIN-NEXT:    fcvt.s.h fa5, fa1
-; CHECKIZFHMIN-NEXT:    fmv.w.x fa4, zero
-; CHECKIZFHMIN-NEXT:    fadd.s fa5, fa5, fa4
-; CHECKIZFHMIN-NEXT:    fcvt.h.s fa5, fa5
-; CHECKIZFHMIN-NEXT:    fcvt.s.h fa5, fa5
-; CHECKIZFHMIN-NEXT:    fneg.s fa5, fa5
-; CHECKIZFHMIN-NEXT:    fcvt.h.s fa5, fa5
-; CHECKIZFHMIN-NEXT:    fcvt.s.h fa5, fa5
-; CHECKIZFHMIN-NEXT:    fcvt.s.h fa4, fa2
-; CHECKIZFHMIN-NEXT:    fcvt.s.h fa3, fa0
-; CHECKIZFHMIN-NEXT:    fmadd.s fa5, fa3, fa5, fa4
-; CHECKIZFHMIN-NEXT:    fcvt.h.s fa0, fa5
-; CHECKIZFHMIN-NEXT:    ret
+; CHECK-RV32-FSGNJ-LABEL: fnmsub_s_2:
+; CHECK-RV32-FSGNJ:       # %bb.0:
+; CHECK-RV32-FSGNJ-NEXT:    addi sp, sp, -16
+; CHECK-RV32-FSGNJ-NEXT:    fcvt.s.h fa5, fa1
+; CHECK-RV32-FSGNJ-NEXT:    fmv.w.x fa4, zero
+; CHECK-RV32-FSGNJ-NEXT:    fadd.s fa5, fa5, fa4
+; CHECK-RV32-FSGNJ-NEXT:    fcvt.h.s fa5, fa5
+; CHECK-RV32-FSGNJ-NEXT:    fsh fa5, 12(sp)
+; CHECK-RV32-FSGNJ-NEXT:    lbu a0, 13(sp)
+; CHECK-RV32-FSGNJ-NEXT:    xori a0, a0, 128
+; CHECK-RV32-FSGNJ-NEXT:    sb a0, 13(sp)
+; CHECK-RV32-FSGNJ-NEXT:    flh fa5, 12(sp)
+; CHECK-RV32-FSGNJ-NEXT:    fcvt.s.h fa4, fa2
+; CHECK-RV32-FSGNJ-NEXT:    fcvt.s.h fa3, fa0
+; CHECK-RV32-FSGNJ-NEXT:    fcvt.s.h fa5, fa5
+; CHECK-RV32-FSGNJ-NEXT:    fmadd.s fa5, fa3, fa5, fa4
+; CHECK-RV32-FSGNJ-NEXT:    fcvt.h.s fa0, fa5
+; CHECK-RV32-FSGNJ-NEXT:    addi sp, sp, 16
+; CHECK-RV32-FSGNJ-NEXT:    ret
 ;
-; CHECKZHINXMIN-LABEL: fnmsub_s_2:
-; CHECKZHINXMIN:       # %bb.0:
-; CHECKZHINXMIN-NEXT:    fcvt.s.h a1, a1
-; CHECKZHINXMIN-NEXT:    fadd.s a1, a1, zero
-; CHECKZHINXMIN-NEXT:    fcvt.h.s a1, a1
-; CHECKZHINXMIN-NEXT:    fcvt.s.h a1, a1
-; CHECKZHINXMIN-NEXT:    fneg.s a1, a1
-; CHECKZHINXMIN-NEXT:    fcvt.h.s a1, a1
-; CHECKZHINXMIN-NEXT:    fcvt.s.h a1, a1
-; CHECKZHINXMIN-NEXT:    fcvt.s.h a2, a2
-; CHECKZHINXMIN-NEXT:    fcvt.s.h a0, a0
-; CHECKZHINXMIN-NEXT:    fmadd.s a0, a0, a1, a2
-; CHECKZHINXMIN-NEXT:    fcvt.h.s a0, a0
-; CHECKZHINXMIN-NEXT:    ret
+; CHECK-RV64-FSGNJ-LABEL: fnmsub_s_2:
+; CHECK-RV64-FSGNJ:       # %bb.0:
+; CHECK-RV64-FSGNJ-NEXT:    addi sp, sp, -16
+; CHECK-RV64-FSGNJ-NEXT:    fcvt.s.h fa5, fa1
+; CHECK-RV64-FSGNJ-NEXT:    fmv.w.x fa4, zero
+; CHECK-RV64-FSGNJ-NEXT:    fadd.s fa5, fa5, fa4
+; CHECK-RV64-FSGNJ-NEXT:    fcvt.h.s fa5, fa5
+; CHECK-RV64-FSGNJ-NEXT:    fsh fa5, 8(sp)
+; CHECK-RV64-FSGNJ-NEXT:    lbu a0, 9(sp)
+; CHECK-RV64-FSGNJ-NEXT:    xori a0, a0, 128
+; CHECK-RV64-FSGNJ-NEXT:    sb a0, 9(sp)
+; CHECK-RV64-FSGNJ-NEXT:    flh fa5, 8(sp)
+; CHECK-RV64-FSGNJ-NEXT:    fcvt.s.h fa4, fa2
+; CHECK-RV64-FSGNJ-NEXT:    fcvt.s.h fa3, fa0
+; CHECK-RV64-FSGNJ-NEXT:    fcvt.s.h fa5, fa5
+; CHECK-RV64-FSGNJ-NEXT:    fmadd.s fa5, fa3, fa5, fa4
+; CHECK-RV64-FSGNJ-NEXT:    fcvt.h.s fa0, fa5
+; CHECK-RV64-FSGNJ-NEXT:    addi sp, sp, 16
+; CHECK-RV64-FSGNJ-NEXT:    ret
 ; CHECK-ZHINXMIN-LABEL: fnmsub_s_2:
 ; CHECK-ZHINXMIN:       # %bb.0:
 ; CHECK-ZHINXMIN-NEXT:    fcvt.s.h a1, a1
@@ -2847,54 +2964,63 @@ define half @fnmadd_s_contract(half %a, half %b, half %c) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
-; CHECKIZFHMIN-LABEL: fnmadd_s_contract:
-; CHECKIZFHMIN:       # %bb.0:
-; CHECKIZFHMIN-NEXT:    fcvt.s.h fa5, fa0
-; CHECKIZFHMIN-NEXT:    fmv.w.x fa4, zero
-; CHECKIZFHMIN-NEXT:    fadd.s fa5, fa5, fa4
-; CHECKIZFHMIN-NEXT:    fcvt.h.s fa5, fa5
-; CHECKIZFHMIN-NEXT:    fcvt.s.h fa3, fa1
-; CHECKIZFHMIN-NEXT:    fadd.s fa3, fa3, fa4
-; CHECKIZFHMIN-NEXT:    fcvt.h.s fa3, fa3
-; CHECKIZFHMIN-NEXT:    fcvt.s.h fa2, fa2
-; CHECKIZFHMIN-NEXT:    fadd.s fa4, fa2, fa4
-; CHECKIZFHMIN-NEXT:    fcvt.h.s fa4, fa4
-; CHECKIZFHMIN-NEXT:    fcvt.s.h fa3, fa3
-; CHECKIZFHMIN-NEXT:    fcvt.s.h fa5, fa5
-; CHECKIZFHMIN-NEXT:    fmul.s fa5, fa5, fa3
-; CHECKIZFHMIN-NEXT:    fcvt.h.s fa5, fa5
-; CHECKIZFHMIN-NEXT:    fcvt.s.h fa5, fa5
-; CHECKIZFHMIN-NEXT:    fneg.s fa5, fa5
-; CHECKIZFHMIN-NEXT:    fcvt.h.s fa5, fa5
-; CHECKIZFHMIN-NEXT:    fcvt.s.h fa5, fa5
-; CHECKIZFHMIN-NEXT:    fcvt.s.h fa4, fa4
-; CHECKIZFHMIN-NEXT:    fsub.s fa5, fa5, fa4
-; CHECKIZFHMIN-NEXT:    fcvt.h.s fa0, fa5
-; CHECKIZFHMIN-NEXT:    ret
+; CHECK-RV32-FSGNJ-LABEL: fnmadd_s_contract:
+; CHECK-RV32-FSGNJ:       # %bb.0:
+; CHECK-RV32-FSGNJ-NEXT:    addi sp, sp, -16
+; CHECK-RV32-FSGNJ-NEXT:    fcvt.s.h fa5, fa0
+; CHECK-RV32-FSGNJ-NEXT:    fmv.w.x fa4, zero
+; CHECK-RV32-FSGNJ-NEXT:    fadd.s fa5, fa5, fa4
+; CHECK-RV32-FSGNJ-NEXT:    fcvt.h.s fa5, fa5
+; CHECK-RV32-FSGNJ-NEXT:    fcvt.s.h fa3, fa1
+; CHECK-RV32-FSGNJ-NEXT:    fadd.s fa3, fa3, fa4
+; CHECK-RV32-FSGNJ-NEXT:    fcvt.h.s fa3, fa3
+; CHECK-RV32-FSGNJ-NEXT:    fcvt.s.h fa3, fa3
+; CHECK-RV32-FSGNJ-NEXT:    fcvt.s.h fa5, fa5
+; CHECK-RV32-FSGNJ-NEXT:    fmul.s fa5, fa5, fa3
+; CHECK-RV32-FSGNJ-NEXT:    fcvt.h.s fa5, fa5
+; CHECK-RV32-FSGNJ-NEXT:    fsh fa5, 12(sp)
+; CHECK-RV32-FSGNJ-NEXT:    lbu a0, 13(sp)
+; CHECK-RV32-FSGNJ-NEXT:    fcvt.s.h fa5, fa2
+; CHECK-RV32-FSGNJ-NEXT:    xori a0, a0, 128
+; CHECK-RV32-FSGNJ-NEXT:    sb a0, 13(sp)
+; CHECK-RV32-FSGNJ-NEXT:    flh fa3, 12(sp)
+; CHECK-RV32-FSGNJ-NEXT:    fadd.s fa5, fa5, fa4
+; CHECK-RV32-FSGNJ-NEXT:    fcvt.h.s fa5, fa5
+; CHECK-RV32-FSGNJ-NEXT:    fcvt.s.h fa5, fa5
+; CHECK-RV32-FSGNJ-NEXT:    fcvt.s.h fa4, fa3
+; CHECK-RV32-FSGNJ-NEXT:    fsub.s fa5, fa4, fa5
+; CHECK-RV32-FSGNJ-NEXT:    fcvt.h.s fa0, fa5
+; CHECK-RV32-FSGNJ-NEXT:    addi sp, sp, 16
+; CHECK-RV32-FSGNJ-NEXT:    ret
 ;
-; CHECKZHINXMIN-LABEL: fnmadd_s_contract:
-; CHECKZHINXMIN:       # %bb.0:
-; CHECKZHINXMIN-NEXT:    fcvt.s.h a0, a0
-; CHECKZHINXMIN-NEXT:    fadd.s a0, a0, zero
-; CHECKZHINXMIN-NEXT:    fcvt.h.s a0, a0
-; CHECKZHINXMIN-NEXT:    fcvt.s.h a1, a1
-; CHECKZHINXMIN-NEXT:    fadd.s a1, a1, zero
-; CHECKZHINXMIN-NEXT:    fcvt.h.s a1, a1
-; CHECKZHINXMIN-NEXT:    fcvt.s.h a2, a2
-; CHECKZHINXMIN-NEXT:    fadd.s a2, a2, zero
-; CHECKZHINXMIN-NEXT:    fcvt.h.s a2, a2
-; CHECKZHINXMIN-NEXT:    fcvt.s.h a1, a1
-; CHECKZHINXMIN-NEXT:    fcvt.s.h a0, a0
-; CHECKZHINXMIN-NEXT:    fmul.s a0, a0, a1
-; CHECKZHINXMIN-NEXT:    fcvt.h.s a0, a0
-; CHECKZHINXMIN-NEXT:    fcvt.s.h a0, a0
-; CHECKZHINXMIN-NEXT:    fneg.s a0, a0
-; CHECKZHINXMIN-NEXT:    fcvt.h.s a0, a0
-; CHECKZHINXMIN-NEXT:    fcvt.s.h a0, a0
-; CHECKZHINXMIN-NEXT:    fcvt.s.h a1, a2
-; CHECKZHINXMIN-NEXT:    fsub.s a0, a0, a1
-; CHECKZHINXMIN-NEXT:    fcvt.h.s a0, a0
-; CHECKZHINXMIN-NEXT:    ret
+; CHECK-RV64-FSGNJ-LABEL: fnmadd_s_contract:
+; CHECK-RV64-FSGNJ:       # %bb.0:
+; CHECK-RV64-FSGNJ-NEXT:    addi sp, sp, -16
+; CHECK-RV64-FSGNJ-NEXT:    fcvt.s.h fa5, fa0
+; CHECK-RV64-FSGNJ-NEXT:    fmv.w.x fa4, zero
+; CHECK-RV64-FSGNJ-NEXT:    fadd.s fa5, fa5, fa4
+; CHECK-RV64-FSGNJ-NEXT:    fcvt.h.s fa5, fa5
+; CHECK-RV64-FSGNJ-NEXT:    fcvt.s.h fa3, fa1
+; CHECK-RV64-FSGNJ-NEXT:    fadd.s fa3, fa3, fa4
+; CHECK-RV64-FSGNJ-NEXT:    fcvt.h.s fa3, fa3
+; CHECK-RV64-FSGNJ-NEXT:    fcvt.s.h fa3, fa3
+; CHECK-RV64-FSGNJ-NEXT:    fcvt.s.h fa5, fa5
+; CHECK-RV64-FSGNJ-NEXT:    fmul.s fa5, fa5, fa3
+; CHECK-RV64-FSGNJ-NEXT:    fcvt.h.s fa5, fa5
+; CHECK-RV64-FSGNJ-NEXT:    fsh fa5, 8(sp)
+; CHECK-RV64-FSGNJ-NEXT:    lbu a0, 9(sp)
+; CHECK-RV64-FSGNJ-NEXT:    fcvt.s.h fa5, fa2
+; CHECK-RV64-FSGNJ-NEXT:    xori a0, a0, 128
+; CHECK-RV64-FSGNJ-NEXT:    sb a0, 9(sp)
+; CHECK-RV64-FSGNJ-NEXT:    flh fa3, 8(sp)
+; CHECK-RV64-FSGNJ-NEXT:    fadd.s fa5, fa5, fa4
+; CHECK-RV64-FSGNJ-NEXT:    fcvt.h.s fa5, fa5
+; CHECK-RV64-FSGNJ-NEXT:    fcvt.s.h fa5, fa5
+; CHECK-RV64-FSGNJ-NEXT:    fcvt.s.h fa4, fa3
+; CHECK-RV64-FSGNJ-NEXT:    fsub.s fa5, fa4, fa5
+; CHECK-RV64-FSGNJ-NEXT:    fcvt.h.s fa0, fa5
+; CHECK-RV64-FSGNJ-NEXT:    addi sp, sp, 16
+; CHECK-RV64-FSGNJ-NEXT:    ret
 ; CHECK-ZHINXMIN-LABEL: fnmadd_s_contract:
 ; CHECK-ZHINXMIN:       # %bb.0:
 ; CHECK-ZHINXMIN-NEXT:    fcvt.s.h a0, a0
diff --git a/llvm/test/CodeGen/RISCV/half-bitmanip-dagcombines.ll b/llvm/test/CodeGen/RISCV/half-bitmanip-dagcombines.ll
index a103a9e09d1498..c824e7f9845951 100644
--- a/llvm/test/CodeGen/RISCV/half-bitmanip-dagcombines.ll
+++ b/llvm/test/CodeGen/RISCV/half-bitmanip-dagcombines.ll
@@ -208,13 +208,15 @@ define half @fcopysign_fneg(half %a, half %b) nounwind {
 ; RV32IZFHMIN-LABEL: fcopysign_fneg:
 ; RV32IZFHMIN:       # %bb.0:
 ; RV32IZFHMIN-NEXT:    addi sp, sp, -16
-; RV32IZFHMIN-NEXT:    fmv.h.x fa5, a0
-; RV32IZFHMIN-NEXT:    fmv.h.x fa4, a1
-; RV32IZFHMIN-NEXT:    fcvt.s.h fa4, fa4
-; RV32IZFHMIN-NEXT:    fneg.s fa4, fa4
-; RV32IZFHMIN-NEXT:    fcvt.h.s fa4, fa4
-; RV32IZFHMIN-NEXT:    fsh fa5, 8(sp)
-; RV32IZFHMIN-NEXT:    fsh fa4, 12(sp)
+; RV32IZFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV32IZFHMIN-NEXT:    fsh fa5, 4(sp)
+; RV32IZFHMIN-NEXT:    lbu a1, 5(sp)
+; RV32IZFHMIN-NEXT:    xori a1, a1, 128
+; RV32IZFHMIN-NEXT:    sb a1, 5(sp)
+; RV32IZFHMIN-NEXT:    flh fa5, 4(sp)
+; RV32IZFHMIN-NEXT:    fmv.h.x fa4, a0
+; RV32IZFHMIN-NEXT:    fsh fa4, 8(sp)
+; RV32IZFHMIN-NEXT:    fsh fa5, 12(sp)
 ; RV32IZFHMIN-NEXT:    lbu a0, 9(sp)
 ; RV32IZFHMIN-NEXT:    lbu a1, 13(sp)
 ; RV32IZFHMIN-NEXT:    andi a0, a0, 127
@@ -228,31 +230,35 @@ define half @fcopysign_fneg(half %a, half %b) nounwind {
 ;
 ; RV64IZFHMIN-LABEL: fcopysign_fneg:
 ; RV64IZFHMIN:       # %bb.0:
-; RV64IZFHMIN-NEXT:    addi sp, sp, -16
-; RV64IZFHMIN-NEXT:    fmv.h.x fa5, a0
-; RV64IZFHMIN-NEXT:    fmv.h.x fa4, a1
-; RV64IZFHMIN-NEXT:    fcvt.s.h fa4, fa4
-; RV64IZFHMIN-NEXT:    fneg.s fa4, fa4
-; RV64IZFHMIN-NEXT:    fcvt.h.s fa4, fa4
-; RV64IZFHMIN-NEXT:    fsh fa5, 0(sp)
-; RV64IZFHMIN-NEXT:    fsh fa4, 8(sp)
-; RV64IZFHMIN-NEXT:    lbu a0, 1(sp)
+; RV64IZFHMIN-NEXT:    addi sp, sp, -32
+; RV64IZFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV64IZFHMIN-NEXT:    fsh fa5, 8(sp)
 ; RV64IZFHMIN-NEXT:    lbu a1, 9(sp)
+; RV64IZFHMIN-NEXT:    xori a1, a1, 128
+; RV64IZFHMIN-NEXT:    sb a1, 9(sp)
+; RV64IZFHMIN-NEXT:    flh fa5, 8(sp)
+; RV64IZFHMIN-NEXT:    fmv.h.x fa4, a0
+; RV64IZFHMIN-NEXT:    fsh fa4, 16(sp)
+; RV64IZFHMIN-NEXT:    fsh fa5, 24(sp)
+; RV64IZFHMIN-NEXT:    lbu a0, 17(sp)
+; RV64IZFHMIN-NEXT:    lbu a1, 25(sp)
 ; RV64IZFHMIN-NEXT:    andi a0, a0, 127
 ; RV64IZFHMIN-NEXT:    andi a1, a1, 128
 ; RV64IZFHMIN-NEXT:    or a0, a0, a1
-; RV64IZFHMIN-NEXT:    sb a0, 1(sp)
-; RV64IZFHMIN-NEXT:    flh fa5, 0(sp)
+; RV64IZFHMIN-NEXT:    sb a0, 17(sp)
+; RV64IZFHMIN-NEXT:    flh fa5, 16(sp)
 ; RV64IZFHMIN-NEXT:    fmv.x.h a0, fa5
-; RV64IZFHMIN-NEXT:    addi sp, sp, 16
+; RV64IZFHMIN-NEXT:    addi sp, sp, 32
 ; RV64IZFHMIN-NEXT:    ret
 ;
 ; RV32IZHINXMIN-LABEL: fcopysign_fneg:
 ; RV32IZHINXMIN:       # %bb.0:
 ; RV32IZHINXMIN-NEXT:    addi sp, sp, -16
-; RV32IZHINXMIN-NEXT:    fcvt.s.h a1, a1
-; RV32IZHINXMIN-NEXT:    fneg.s a1, a1
-; RV32IZHINXMIN-NEXT:    fcvt.h.s a1, a1
+; RV32IZHINXMIN-NEXT:    sh a1, 4(sp)
+; RV32IZHINXMIN-NEXT:    lbu a1, 5(sp)
+; RV32IZHINXMIN-NEXT:    xori a1, a1, 128
+; RV32IZHINXMIN-NEXT:    sb a1, 5(sp)
+; RV32IZHINXMIN-NEXT:    lh a1, 4(sp)
 ; RV32IZHINXMIN-NEXT:    sh a0, 8(sp)
 ; RV32IZHINXMIN-NEXT:    sh a1, 12(sp)
 ; RV32IZHINXMIN-NEXT:    lbu a0, 9(sp)
@@ -267,20 +273,22 @@ define half @fcopysign_fneg(half %a, half %b) nounwind {
 ;
 ; RV64IZHINXMIN-LABEL: fcopysign_fneg:
 ; RV64IZHINXMIN:       # %bb.0:
-; RV64IZHINXMIN-NEXT:    addi sp, sp, -16
-; RV64IZHINXMIN-NEXT:    fcvt.s.h a1, a1
-; RV64IZHINXMIN-NEXT:    fneg.s a1, a1
-; RV64IZHINXMIN-NEXT:    fcvt.h.s a1, a1
-; RV64IZHINXMIN-NEXT:    sh a0, 0(sp)
+; RV64IZHINXMIN-NEXT:    addi sp, sp, -32
 ; RV64IZHINXMIN-NEXT:    sh a1, 8(sp)
-; RV64IZHINXMIN-NEXT:    lbu a0, 1(sp)
 ; RV64IZHINXMIN-NEXT:    lbu a1, 9(sp)
+; RV64IZHINXMIN-NEXT:    xori a1, a1, 128
+; RV64IZHINXMIN-NEXT:    sb a1, 9(sp)
+; RV64IZHINXMIN-NEXT:    lh a1, 8(sp)
+; RV64IZHINXMIN-NEXT:    sh a0, 16(sp)
+; RV64IZHINXMIN-NEXT:    sh a1, 24(sp)
+; RV64IZHINXMIN-NEXT:    lbu a0, 17(sp)
+; RV64IZHINXMIN-NEXT:    lbu a1, 25(sp)
 ; RV64IZHINXMIN-NEXT:    andi a0, a0, 127
 ; RV64IZHINXMIN-NEXT:    andi a1, a1, 128
 ; RV64IZHINXMIN-NEXT:    or a0, a0, a1
-; RV64IZHINXMIN-NEXT:    sb a0, 1(sp)
-; RV64IZHINXMIN-NEXT:    lh a0, 0(sp)
-; RV64IZHINXMIN-NEXT:    addi sp, sp, 16
+; RV64IZHINXMIN-NEXT:    sb a0, 17(sp)
+; RV64IZHINXMIN-NEXT:    lh a0, 16(sp)
+; RV64IZHINXMIN-NEXT:    addi sp, sp, 32
 ; RV64IZHINXMIN-NEXT:    ret
   %1 = fneg half %b
   %2 = call half @llvm.copysign.f16(half %a, half %1)
diff --git a/llvm/test/CodeGen/RISCV/half-intrinsics.ll b/llvm/test/CodeGen/RISCV/half-intrinsics.ll
index 4587c442cda5b3..7f1eebdf64a551 100644
--- a/llvm/test/CodeGen/RISCV/half-intrinsics.ll
+++ b/llvm/test/CodeGen/RISCV/half-intrinsics.ll
@@ -1821,12 +1821,27 @@ define half @fabs_f16(half %a) nounwind {
 ; RV64I-NEXT:    srli a0, a0, 49
 ; RV64I-NEXT:    ret
 ;
-; CHECKIZFHMIN-LABEL: fabs_f16:
-; CHECKIZFHMIN:       # %bb.0:
-; CHECKIZFHMIN-NEXT:    fcvt.s.h fa5, fa0
-; CHECKIZFHMIN-NEXT:    fabs.s fa5, fa5
-; CHECKIZFHMIN-NEXT:    fcvt.h.s fa0, fa5
-; CHECKIZFHMIN-NEXT:    ret
+; RV32IZFHMIN-LABEL: fabs_f16:
+; RV32IZFHMIN:       # %bb.0:
+; RV32IZFHMIN-NEXT:    addi sp, sp, -16
+; RV32IZFHMIN-NEXT:    fsh fa0, 12(sp)
+; RV32IZFHMIN-NEXT:    lbu a0, 13(sp)
+; RV32IZFHMIN-NEXT:    andi a0, a0, 127
+; RV32IZFHMIN-NEXT:    sb a0, 13(sp)
+; RV32IZFHMIN-NEXT:    flh fa0, 12(sp)
+; RV32IZFHMIN-NEXT:    addi sp, sp, 16
+; RV32IZFHMIN-NEXT:    ret
+;
+; RV64IZFHMIN-LABEL: fabs_f16:
+; RV64IZFHMIN:       # %bb.0:
+; RV64IZFHMIN-NEXT:    addi sp, sp, -16
+; RV64IZFHMIN-NEXT:    fsh fa0, 8(sp)
+; RV64IZFHMIN-NEXT:    lbu a0, 9(sp)
+; RV64IZFHMIN-NEXT:    andi a0, a0, 127
+; RV64IZFHMIN-NEXT:    sb a0, 9(sp)
+; RV64IZFHMIN-NEXT:    flh fa0, 8(sp)
+; RV64IZFHMIN-NEXT:    addi sp, sp, 16
+; RV64IZFHMIN-NEXT:    ret
 ;
 ; RV32IZHINXMIN-LABEL: fabs_f16:
 ; RV32IZHINXMIN:       # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
index cdbca0b874e607..fb9c0a57fd1bee 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
@@ -650,38 +650,165 @@ define void @fabs_v6f16(ptr %x) {
 ; ZVFH-NEXT:    vse16.v v8, (a0)
 ; ZVFH-NEXT:    ret
 ;
-; ZVFHMIN-RV32-LABEL: fabs_v6f16:
-; ZVFHMIN-RV32:       # %bb.0:
-; ZVFHMIN-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT:    vle16.v v8, (a0)
-; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV32-NEXT:    vfabs.v v8, v9
-; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMIN-RV32-NEXT:    addi a1, a0, 8
-; ZVFHMIN-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMIN-RV32-NEXT:    vse32.v v8, (a1)
-; ZVFHMIN-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMIN-RV32-NEXT:    vse16.v v9, (a0)
-; ZVFHMIN-RV32-NEXT:    ret
+; ZVFHMIN-ZFH-RV32-LABEL: fabs_v6f16:
+; ZVFHMIN-ZFH-RV32:       # %bb.0:
+; ZVFHMIN-ZFH-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFHMIN-ZFH-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT:    vfabs.v v8, v9
+; ZVFHMIN-ZFH-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT:    vfncvt.f.f.w v9, v8
+; ZVFHMIN-ZFH-RV32-NEXT:    addi a1, a0, 8
+; ZVFHMIN-ZFH-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT:    vslidedown.vi v8, v9, 2
+; ZVFHMIN-ZFH-RV32-NEXT:    vse32.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT:    vse16.v v9, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT:    ret
 ;
-; ZVFHMIN-RV64-LABEL: fabs_v6f16:
-; ZVFHMIN-RV64:       # %bb.0:
-; ZVFHMIN-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT:    vle16.v v8, (a0)
-; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV64-NEXT:    vfabs.v v8, v9
-; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMIN-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT:    vse64.v v9, (a0)
-; ZVFHMIN-RV64-NEXT:    addi a0, a0, 8
-; ZVFHMIN-RV64-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMIN-RV64-NEXT:    vse32.v v8, (a0)
-; ZVFHMIN-RV64-NEXT:    ret
+; ZVFHMIN-ZFH-RV64-LABEL: fabs_v6f16:
+; ZVFHMIN-ZFH-RV64:       # %bb.0:
+; ZVFHMIN-ZFH-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFHMIN-ZFH-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT:    vfabs.v v8, v9
+; ZVFHMIN-ZFH-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT:    vfncvt.f.f.w v9, v8
+; ZVFHMIN-ZFH-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT:    vse64.v v9, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT:    addi a0, a0, 8
+; ZVFHMIN-ZFH-RV64-NEXT:    vslidedown.vi v8, v9, 2
+; ZVFHMIN-ZFH-RV64-NEXT:    vse32.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT:    ret
+;
+; ZVFHMIN-ZFHIN-RV32-LABEL: fabs_v6f16:
+; ZVFHMIN-ZFHIN-RV32:       # %bb.0:
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi sp, sp, -64
+; ZVFHMIN-ZFHIN-RV32-NEXT:    .cfi_def_cfa_offset 64
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    mv a1, sp
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 10(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 36(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 6(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 28(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 4(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 2(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 20(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 0(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 37(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 37(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 29(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 29(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 21(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 21(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 17(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 17(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 36(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 58(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 28(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 54(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa4, 24(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa4, 52(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa3, 20(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa3, 50(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa2, 16(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa2, 48(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a1, sp, 48
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 46(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa4, 44(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa3, 42(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa2, 40(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a1, sp, 40
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vsetivli zero, 4, e32, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vle16.v v9, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v9, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a0, a0, 8
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vslidedown.vi v8, v8, 2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vse32.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi sp, sp, 64
+; ZVFHMIN-ZFHIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-ZFHIN-RV64-LABEL: fabs_v6f16:
+; ZVFHMIN-ZFHIN-RV64:       # %bb.0:
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi sp, sp, -80
+; ZVFHMIN-ZFHIN-RV64-NEXT:    .cfi_def_cfa_offset 80
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    mv a1, sp
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 10(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 6(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 4(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 2(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 0(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 17(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 17(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 74(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 72(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 70(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 68(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 66(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi a1, sp, 64
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vse64.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi a0, a0, 8
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vslidedown.vi v8, v8, 2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vse32.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi sp, sp, 80
+; ZVFHMIN-ZFHIN-RV64-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = call <6 x half> @llvm.fabs.v6f16(<6 x half> %a)
   store <6 x half> %b, ptr %x

>From ebfddfd88d794efbe8834644685ba6c186ad8d52 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Thu, 29 Aug 2024 08:16:23 -0700
Subject: [PATCH 2/2] fixup! remove comment.

---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 15ce730e23dd3b..5f18207262013f 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -417,8 +417,6 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::FREM, MVT::bf16, Promote);
     setOperationAction(ISD::FABS, MVT::bf16, Expand);
     setOperationAction(ISD::FNEG, MVT::bf16, Expand);
-    // FIXME: Need to promote bf16 FCOPYSIGN to f32, but the
-    // DAGCombiner::visitFP_ROUND probably needs improvements first.
     setOperationAction(ISD::FCOPYSIGN, MVT::bf16, Expand);
   }
 
@@ -436,8 +434,6 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
                          MVT::f16, Legal);
       setOperationAction(ISD::FABS, MVT::f16, Expand);
       setOperationAction(ISD::FNEG, MVT::f16, Expand);
-      // FIXME: Need to promote f16 FCOPYSIGN to f32, but the
-      // DAGCombiner::visitFP_ROUND probably needs improvements first.
       setOperationAction(ISD::FCOPYSIGN, MVT::f16, Expand);
     }
 



More information about the llvm-commits mailing list