[llvm] bb5bbe5 - [RISCV][GISel] Support s32/s64 G_FSUB/FDIV/FNEG without F/D extensions.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Sun Nov 24 18:24:43 PST 2024


Author: Craig Topper
Date: 2024-11-24T18:22:12-08:00
New Revision: bb5bbe523d4437d72287f89fbaa277aaa71c0bd2

URL: https://github.com/llvm/llvm-project/commit/bb5bbe523d4437d72287f89fbaa277aaa71c0bd2
DIFF: https://github.com/llvm/llvm-project/commit/bb5bbe523d4437d72287f89fbaa277aaa71c0bd2.diff

LOG: [RISCV][GISel] Support s32/s64 G_FSUB/FDIV/FNEG without F/D extensions.

Use libcalls for G_FSUB/FDIV. Use integer operations for G_FNEG.

Copy most of the IR tests for arithmetic from SelectionDAG.

Added: 
    llvm/test/CodeGen/RISCV/GlobalISel/double-arith.ll
    llvm/test/CodeGen/RISCV/GlobalISel/float-arith.ll

Modified: 
    llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
    llvm/test/CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
index c5bef80f8f48a6..a9294e76f8763f 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
@@ -491,20 +491,14 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST)
 
   // FP Operations
 
-  getActionDefinitionsBuilder({G_FSUB, G_FDIV, G_FNEG})
-      .legalFor(ST.hasStdExtF(), {s32})
-      .legalFor(ST.hasStdExtD(), {s64})
-      .legalFor(ST.hasStdExtZfh(), {s16});
-
-  // FIXME: Merge with FSUB/FDIV/etc when we use libcalls for them.
   getActionDefinitionsBuilder(
-      {G_FADD, G_FMUL, G_FMA, G_FSQRT, G_FMAXNUM, G_FMINNUM})
+      {G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMA, G_FSQRT, G_FMAXNUM, G_FMINNUM})
       .legalFor(ST.hasStdExtF(), {s32})
       .legalFor(ST.hasStdExtD(), {s64})
       .legalFor(ST.hasStdExtZfh(), {s16})
       .libcallFor({s32, s64});
 
-  getActionDefinitionsBuilder(G_FABS)
+  getActionDefinitionsBuilder({G_FNEG, G_FABS})
       .legalFor(ST.hasStdExtF(), {s32})
       .legalFor(ST.hasStdExtD(), {s64})
       .legalFor(ST.hasStdExtZfh(), {s16})

diff  --git a/llvm/test/CodeGen/RISCV/GlobalISel/double-arith.ll b/llvm/test/CodeGen/RISCV/GlobalISel/double-arith.ll
new file mode 100644
index 00000000000000..eafc9c644bdbf0
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/double-arith.ll
@@ -0,0 +1,1324 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -global-isel -mattr=+d -verify-machineinstrs < %s \
+; RUN:   -target-abi=ilp32d | FileCheck -check-prefixes=CHECKIFD,RV32IFD %s
+; RUN: llc -mtriple=riscv64 -global-isel -mattr=+d -verify-machineinstrs < %s \
+; RUN:   -target-abi=lp64d | FileCheck -check-prefixes=CHECKIFD,RV64IFD %s
+; RUN: llc -mtriple=riscv32 -global-isel -verify-machineinstrs < %s \
+; RUN:   | FileCheck -check-prefix=RV32I %s
+; RUN: llc -mtriple=riscv64 -global-isel -verify-machineinstrs < %s \
+; RUN:   | FileCheck -check-prefix=RV64I %s
+
+; These tests are each targeted at a particular RISC-V FPU instruction.
+; Compares and conversions can be found in double-fcmp.ll and double-convert.ll
+; respectively. Some other double-*.ll files in this folder exercise LLVM IR
+; instructions that don't directly match a RISC-V instruction.
+
+define double @fadd_d(double %a, double %b) nounwind {
+; CHECKIFD-LABEL: fadd_d:
+; CHECKIFD:       # %bb.0:
+; CHECKIFD-NEXT:    fadd.d fa0, fa0, fa1
+; CHECKIFD-NEXT:    ret
+;
+; RV32I-LABEL: fadd_d:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    call __adddf3
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: fadd_d:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    call __adddf3
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+  %1 = fadd double %a, %b
+  ret double %1
+}
+
+define double @fsub_d(double %a, double %b) nounwind {
+; CHECKIFD-LABEL: fsub_d:
+; CHECKIFD:       # %bb.0:
+; CHECKIFD-NEXT:    fsub.d fa0, fa0, fa1
+; CHECKIFD-NEXT:    ret
+;
+; RV32I-LABEL: fsub_d:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    call __subdf3
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: fsub_d:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    call __subdf3
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+  %1 = fsub double %a, %b
+  ret double %1
+}
+
+define double @fmul_d(double %a, double %b) nounwind {
+; CHECKIFD-LABEL: fmul_d:
+; CHECKIFD:       # %bb.0:
+; CHECKIFD-NEXT:    fmul.d fa0, fa0, fa1
+; CHECKIFD-NEXT:    ret
+;
+; RV32I-LABEL: fmul_d:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    call __muldf3
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: fmul_d:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    call __muldf3
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+  %1 = fmul double %a, %b
+  ret double %1
+}
+
+define double @fdiv_d(double %a, double %b) nounwind {
+; CHECKIFD-LABEL: fdiv_d:
+; CHECKIFD:       # %bb.0:
+; CHECKIFD-NEXT:    fdiv.d fa0, fa0, fa1
+; CHECKIFD-NEXT:    ret
+;
+; RV32I-LABEL: fdiv_d:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    call __divdf3
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: fdiv_d:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    call __divdf3
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+  %1 = fdiv double %a, %b
+  ret double %1
+}
+
+declare double @llvm.sqrt.f64(double)
+
+define double @fsqrt_d(double %a) nounwind {
+; CHECKIFD-LABEL: fsqrt_d:
+; CHECKIFD:       # %bb.0:
+; CHECKIFD-NEXT:    fsqrt.d fa0, fa0
+; CHECKIFD-NEXT:    ret
+;
+; RV32I-LABEL: fsqrt_d:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    call sqrt
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: fsqrt_d:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    call sqrt
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+  %1 = call double @llvm.sqrt.f64(double %a)
+  ret double %1
+}
+
+declare double @llvm.copysign.f64(double, double)
+
+define double @fsgnj_d(double %a, double %b) nounwind {
+; CHECKIFD-LABEL: fsgnj_d:
+; CHECKIFD:       # %bb.0:
+; CHECKIFD-NEXT:    fsgnj.d fa0, fa0, fa1
+; CHECKIFD-NEXT:    ret
+;
+; RV32I-LABEL: fsgnj_d:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    lui a2, 524288
+; RV32I-NEXT:    addi a4, a2, -1
+; RV32I-NEXT:    and a1, a1, a4
+; RV32I-NEXT:    and a2, a3, a2
+; RV32I-NEXT:    or a1, a1, a2
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: fsgnj_d:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a2, -1
+; RV64I-NEXT:    slli a3, a2, 63
+; RV64I-NEXT:    srli a2, a2, 1
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    and a1, a1, a3
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    ret
+  %1 = call double @llvm.copysign.f64(double %a, double %b)
+  ret double %1
+}
+
+define double @fsgnjn_d(double %a, double %b) nounwind {
+; TODO: fsgnjn.s isn't selected on RV64 because DAGCombiner::visitBITCAST will
+; convert (bitconvert (fneg x)) to a xor.
+;
+; CHECKIFD-LABEL: fsgnjn_d:
+; CHECKIFD:       # %bb.0:
+; CHECKIFD-NEXT:    fneg.d fa5, fa1
+; CHECKIFD-NEXT:    fsgnj.d fa0, fa0, fa5
+; CHECKIFD-NEXT:    ret
+;
+; RV32I-LABEL: fsgnjn_d:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    lui a2, 524288
+; RV32I-NEXT:    xor a3, a3, a2
+; RV32I-NEXT:    addi a4, a2, -1
+; RV32I-NEXT:    and a1, a1, a4
+; RV32I-NEXT:    and a2, a3, a2
+; RV32I-NEXT:    or a1, a1, a2
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: fsgnjn_d:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a2, -1
+; RV64I-NEXT:    slli a3, a2, 63
+; RV64I-NEXT:    srli a2, a2, 1
+; RV64I-NEXT:    xor a1, a1, a3
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    and a1, a1, a3
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    ret
+  %1 = fneg double %b
+  %2 = call double @llvm.copysign.f64(double %a, double %1)
+  ret double %2
+}
+
+declare double @llvm.fabs.f64(double)
+
+; This function performs extra work to ensure that
+; DAGCombiner::visitBITCAST doesn't replace the fabs with an and.
+define double @fabs_d(double %a, double %b) nounwind {
+; CHECKIFD-LABEL: fabs_d:
+; CHECKIFD:       # %bb.0:
+; CHECKIFD-NEXT:    fadd.d fa5, fa0, fa1
+; CHECKIFD-NEXT:    fabs.d fa4, fa5
+; CHECKIFD-NEXT:    fadd.d fa0, fa4, fa5
+; CHECKIFD-NEXT:    ret
+;
+; RV32I-LABEL: fabs_d:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    call __adddf3
+; RV32I-NEXT:    mv a3, a1
+; RV32I-NEXT:    lui a1, 524288
+; RV32I-NEXT:    addi a1, a1, -1
+; RV32I-NEXT:    and a1, a3, a1
+; RV32I-NEXT:    mv a2, a0
+; RV32I-NEXT:    call __adddf3
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: fabs_d:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    call __adddf3
+; RV64I-NEXT:    mv a1, a0
+; RV64I-NEXT:    li a0, -1
+; RV64I-NEXT:    srli a0, a0, 1
+; RV64I-NEXT:    and a0, a1, a0
+; RV64I-NEXT:    call __adddf3
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+  %1 = fadd double %a, %b
+  %2 = call double @llvm.fabs.f64(double %1)
+  %3 = fadd double %2, %1
+  ret double %3
+}
+
+declare double @llvm.minnum.f64(double, double)
+
+define double @fmin_d(double %a, double %b) nounwind {
+; CHECKIFD-LABEL: fmin_d:
+; CHECKIFD:       # %bb.0:
+; CHECKIFD-NEXT:    fmin.d fa0, fa0, fa1
+; CHECKIFD-NEXT:    ret
+;
+; RV32I-LABEL: fmin_d:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    call fmin
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: fmin_d:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    call fmin
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+  %1 = call double @llvm.minnum.f64(double %a, double %b)
+  ret double %1
+}
+
+declare double @llvm.maxnum.f64(double, double)
+
+define double @fmax_d(double %a, double %b) nounwind {
+; CHECKIFD-LABEL: fmax_d:
+; CHECKIFD:       # %bb.0:
+; CHECKIFD-NEXT:    fmax.d fa0, fa0, fa1
+; CHECKIFD-NEXT:    ret
+;
+; RV32I-LABEL: fmax_d:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    call fmax
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: fmax_d:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    call fmax
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+  %1 = call double @llvm.maxnum.f64(double %a, double %b)
+  ret double %1
+}
+
+declare double @llvm.fma.f64(double, double, double)
+
+define double @fmadd_d(double %a, double %b, double %c) nounwind {
+; CHECKIFD-LABEL: fmadd_d:
+; CHECKIFD:       # %bb.0:
+; CHECKIFD-NEXT:    fmadd.d fa0, fa0, fa1, fa2
+; CHECKIFD-NEXT:    ret
+;
+; RV32I-LABEL: fmadd_d:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    call fma
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: fmadd_d:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    call fma
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+  %1 = call double @llvm.fma.f64(double %a, double %b, double %c)
+  ret double %1
+}
+
+define double @fmsub_d(double %a, double %b, double %c) nounwind {
+; RV32IFD-LABEL: fmsub_d:
+; RV32IFD:       # %bb.0:
+; RV32IFD-NEXT:    addi sp, sp, -16
+; RV32IFD-NEXT:    sw zero, 8(sp)
+; RV32IFD-NEXT:    sw zero, 12(sp)
+; RV32IFD-NEXT:    fld fa5, 8(sp)
+; RV32IFD-NEXT:    fadd.d fa5, fa2, fa5
+; RV32IFD-NEXT:    fmsub.d fa0, fa0, fa1, fa5
+; RV32IFD-NEXT:    addi sp, sp, 16
+; RV32IFD-NEXT:    ret
+;
+; RV64IFD-LABEL: fmsub_d:
+; RV64IFD:       # %bb.0:
+; RV64IFD-NEXT:    fmv.d.x fa5, zero
+; RV64IFD-NEXT:    fadd.d fa5, fa2, fa5
+; RV64IFD-NEXT:    fmsub.d fa0, fa0, fa1, fa5
+; RV64IFD-NEXT:    ret
+;
+; RV32I-LABEL: fmsub_d:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -32
+; RV32I-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    mv s0, a0
+; RV32I-NEXT:    mv s1, a1
+; RV32I-NEXT:    mv s2, a2
+; RV32I-NEXT:    mv s3, a3
+; RV32I-NEXT:    mv a0, a4
+; RV32I-NEXT:    lui a1, %hi(.LCPI11_0)
+; RV32I-NEXT:    addi a1, a1, %lo(.LCPI11_0)
+; RV32I-NEXT:    lw a2, 0(a1)
+; RV32I-NEXT:    lw a3, 4(a1)
+; RV32I-NEXT:    mv a1, a5
+; RV32I-NEXT:    call __adddf3
+; RV32I-NEXT:    mv a4, a0
+; RV32I-NEXT:    lui a5, 524288
+; RV32I-NEXT:    xor a5, a1, a5
+; RV32I-NEXT:    mv a0, s0
+; RV32I-NEXT:    mv a1, s1
+; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    mv a3, s3
+; RV32I-NEXT:    call fma
+; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 32
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: fmsub_d:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -32
+; RV64I-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    mv s0, a0
+; RV64I-NEXT:    mv s1, a1
+; RV64I-NEXT:    lui a0, %hi(.LCPI11_0)
+; RV64I-NEXT:    ld a1, %lo(.LCPI11_0)(a0)
+; RV64I-NEXT:    mv a0, a2
+; RV64I-NEXT:    call __adddf3
+; RV64I-NEXT:    li a1, -1
+; RV64I-NEXT:    slli a1, a1, 63
+; RV64I-NEXT:    xor a2, a0, a1
+; RV64I-NEXT:    mv a0, s0
+; RV64I-NEXT:    mv a1, s1
+; RV64I-NEXT:    call fma
+; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 32
+; RV64I-NEXT:    ret
+  %c_ = fadd double 0.0, %c ; avoid negation using xor
+  %negc = fneg double %c_
+  %1 = call double @llvm.fma.f64(double %a, double %b, double %negc)
+  ret double %1
+}
+
+define double @fnmadd_d(double %a, double %b, double %c) nounwind {
+; RV32IFD-LABEL: fnmadd_d:
+; RV32IFD:       # %bb.0:
+; RV32IFD-NEXT:    addi sp, sp, -16
+; RV32IFD-NEXT:    sw zero, 8(sp)
+; RV32IFD-NEXT:    sw zero, 12(sp)
+; RV32IFD-NEXT:    fld fa5, 8(sp)
+; RV32IFD-NEXT:    fadd.d fa4, fa0, fa5
+; RV32IFD-NEXT:    fadd.d fa5, fa2, fa5
+; RV32IFD-NEXT:    fnmadd.d fa0, fa4, fa1, fa5
+; RV32IFD-NEXT:    addi sp, sp, 16
+; RV32IFD-NEXT:    ret
+;
+; RV64IFD-LABEL: fnmadd_d:
+; RV64IFD:       # %bb.0:
+; RV64IFD-NEXT:    fmv.d.x fa5, zero
+; RV64IFD-NEXT:    fadd.d fa4, fa0, fa5
+; RV64IFD-NEXT:    fadd.d fa5, fa2, fa5
+; RV64IFD-NEXT:    fnmadd.d fa0, fa4, fa1, fa5
+; RV64IFD-NEXT:    ret
+;
+; RV32I-LABEL: fnmadd_d:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -48
+; RV32I-NEXT:    sw ra, 44(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s0, 40(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s1, 36(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s2, 32(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s3, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s4, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s5, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s6, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s7, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    mv s0, a2
+; RV32I-NEXT:    mv s1, a3
+; RV32I-NEXT:    mv s2, a4
+; RV32I-NEXT:    lui a2, %hi(.LCPI12_0)
+; RV32I-NEXT:    addi a2, a2, %lo(.LCPI12_0)
+; RV32I-NEXT:    lw s3, 0(a2)
+; RV32I-NEXT:    lw s4, 4(a2)
+; RV32I-NEXT:    mv s5, a5
+; RV32I-NEXT:    mv a2, s3
+; RV32I-NEXT:    mv a3, s4
+; RV32I-NEXT:    call __adddf3
+; RV32I-NEXT:    mv s6, a0
+; RV32I-NEXT:    mv s7, a1
+; RV32I-NEXT:    mv a0, s2
+; RV32I-NEXT:    mv a1, s5
+; RV32I-NEXT:    mv a2, s3
+; RV32I-NEXT:    mv a3, s4
+; RV32I-NEXT:    call __adddf3
+; RV32I-NEXT:    mv a4, a0
+; RV32I-NEXT:    lui a5, 524288
+; RV32I-NEXT:    xor a2, s7, a5
+; RV32I-NEXT:    xor a5, a1, a5
+; RV32I-NEXT:    mv a0, s6
+; RV32I-NEXT:    mv a1, a2
+; RV32I-NEXT:    mv a2, s0
+; RV32I-NEXT:    mv a3, s1
+; RV32I-NEXT:    call fma
+; RV32I-NEXT:    lw ra, 44(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 40(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 36(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 32(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s4, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s5, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s6, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s7, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 48
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: fnmadd_d:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -48
+; RV64I-NEXT:    sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    mv s0, a1
+; RV64I-NEXT:    lui a1, %hi(.LCPI12_0)
+; RV64I-NEXT:    ld s1, %lo(.LCPI12_0)(a1)
+; RV64I-NEXT:    mv s2, a2
+; RV64I-NEXT:    mv a1, s1
+; RV64I-NEXT:    call __adddf3
+; RV64I-NEXT:    mv s3, a0
+; RV64I-NEXT:    mv a0, s2
+; RV64I-NEXT:    mv a1, s1
+; RV64I-NEXT:    call __adddf3
+; RV64I-NEXT:    li a1, -1
+; RV64I-NEXT:    slli a2, a1, 63
+; RV64I-NEXT:    xor a1, s3, a2
+; RV64I-NEXT:    xor a2, a0, a2
+; RV64I-NEXT:    mv a0, a1
+; RV64I-NEXT:    mv a1, s0
+; RV64I-NEXT:    call fma
+; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 48
+; RV64I-NEXT:    ret
+  %a_ = fadd double 0.0, %a
+  %c_ = fadd double 0.0, %c
+  %nega = fneg double %a_
+  %negc = fneg double %c_
+  %1 = call double @llvm.fma.f64(double %nega, double %b, double %negc)
+  ret double %1
+}
+
+define double @fnmadd_d_2(double %a, double %b, double %c) nounwind {
+; RV32IFD-LABEL: fnmadd_d_2:
+; RV32IFD:       # %bb.0:
+; RV32IFD-NEXT:    addi sp, sp, -16
+; RV32IFD-NEXT:    sw zero, 8(sp)
+; RV32IFD-NEXT:    sw zero, 12(sp)
+; RV32IFD-NEXT:    fld fa5, 8(sp)
+; RV32IFD-NEXT:    fadd.d fa4, fa1, fa5
+; RV32IFD-NEXT:    fadd.d fa5, fa2, fa5
+; RV32IFD-NEXT:    fnmadd.d fa0, fa4, fa0, fa5
+; RV32IFD-NEXT:    addi sp, sp, 16
+; RV32IFD-NEXT:    ret
+;
+; RV64IFD-LABEL: fnmadd_d_2:
+; RV64IFD:       # %bb.0:
+; RV64IFD-NEXT:    fmv.d.x fa5, zero
+; RV64IFD-NEXT:    fadd.d fa4, fa1, fa5
+; RV64IFD-NEXT:    fadd.d fa5, fa2, fa5
+; RV64IFD-NEXT:    fnmadd.d fa0, fa4, fa0, fa5
+; RV64IFD-NEXT:    ret
+;
+; RV32I-LABEL: fnmadd_d_2:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -48
+; RV32I-NEXT:    sw ra, 44(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s0, 40(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s1, 36(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s2, 32(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s3, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s4, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s5, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s6, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s7, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    mv s0, a0
+; RV32I-NEXT:    mv s1, a1
+; RV32I-NEXT:    mv a0, a2
+; RV32I-NEXT:    mv a1, a3
+; RV32I-NEXT:    mv s2, a4
+; RV32I-NEXT:    lui a2, %hi(.LCPI13_0)
+; RV32I-NEXT:    addi a2, a2, %lo(.LCPI13_0)
+; RV32I-NEXT:    lw s3, 0(a2)
+; RV32I-NEXT:    lw s4, 4(a2)
+; RV32I-NEXT:    mv s5, a5
+; RV32I-NEXT:    mv a2, s3
+; RV32I-NEXT:    mv a3, s4
+; RV32I-NEXT:    call __adddf3
+; RV32I-NEXT:    mv s6, a0
+; RV32I-NEXT:    mv s7, a1
+; RV32I-NEXT:    mv a0, s2
+; RV32I-NEXT:    mv a1, s5
+; RV32I-NEXT:    mv a2, s3
+; RV32I-NEXT:    mv a3, s4
+; RV32I-NEXT:    call __adddf3
+; RV32I-NEXT:    mv a4, a0
+; RV32I-NEXT:    lui a5, 524288
+; RV32I-NEXT:    xor a3, s7, a5
+; RV32I-NEXT:    xor a5, a1, a5
+; RV32I-NEXT:    mv a0, s0
+; RV32I-NEXT:    mv a1, s1
+; RV32I-NEXT:    mv a2, s6
+; RV32I-NEXT:    call fma
+; RV32I-NEXT:    lw ra, 44(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 40(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 36(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 32(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s4, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s5, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s6, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s7, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 48
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: fnmadd_d_2:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -48
+; RV64I-NEXT:    sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    mv s0, a0
+; RV64I-NEXT:    mv a0, a1
+; RV64I-NEXT:    lui a1, %hi(.LCPI13_0)
+; RV64I-NEXT:    ld s1, %lo(.LCPI13_0)(a1)
+; RV64I-NEXT:    mv s2, a2
+; RV64I-NEXT:    mv a1, s1
+; RV64I-NEXT:    call __adddf3
+; RV64I-NEXT:    mv s3, a0
+; RV64I-NEXT:    mv a0, s2
+; RV64I-NEXT:    mv a1, s1
+; RV64I-NEXT:    call __adddf3
+; RV64I-NEXT:    li a1, -1
+; RV64I-NEXT:    slli a2, a1, 63
+; RV64I-NEXT:    xor a1, s3, a2
+; RV64I-NEXT:    xor a2, a0, a2
+; RV64I-NEXT:    mv a0, s0
+; RV64I-NEXT:    call fma
+; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 48
+; RV64I-NEXT:    ret
+  %b_ = fadd double 0.0, %b
+  %c_ = fadd double 0.0, %c
+  %negb = fneg double %b_
+  %negc = fneg double %c_
+  %1 = call double @llvm.fma.f64(double %a, double %negb, double %negc)
+  ret double %1
+}
+
+define double @fnmadd_d_3(double %a, double %b, double %c) nounwind {
+; CHECKIFD-LABEL: fnmadd_d_3:
+; CHECKIFD:       # %bb.0:
+; CHECKIFD-NEXT:    fmadd.d fa5, fa0, fa1, fa2
+; CHECKIFD-NEXT:    fneg.d fa0, fa5
+; CHECKIFD-NEXT:    ret
+;
+; RV32I-LABEL: fnmadd_d_3:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    call fma
+; RV32I-NEXT:    lui a2, 524288
+; RV32I-NEXT:    xor a1, a1, a2
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: fnmadd_d_3:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    call fma
+; RV64I-NEXT:    li a1, -1
+; RV64I-NEXT:    slli a1, a1, 63
+; RV64I-NEXT:    xor a0, a0, a1
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+  %1 = call double @llvm.fma.f64(double %a, double %b, double %c)
+  %neg = fneg double %1
+  ret double %neg
+}
+
+
+define double @fnmadd_nsz(double %a, double %b, double %c) nounwind {
+; CHECKIFD-LABEL: fnmadd_nsz:
+; CHECKIFD:       # %bb.0:
+; CHECKIFD-NEXT:    fmadd.d fa5, fa0, fa1, fa2
+; CHECKIFD-NEXT:    fneg.d fa0, fa5
+; CHECKIFD-NEXT:    ret
+;
+; RV32I-LABEL: fnmadd_nsz:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    call fma
+; RV32I-NEXT:    lui a2, 524288
+; RV32I-NEXT:    xor a1, a1, a2
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: fnmadd_nsz:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    call fma
+; RV64I-NEXT:    li a1, -1
+; RV64I-NEXT:    slli a1, a1, 63
+; RV64I-NEXT:    xor a0, a0, a1
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+  %1 = call nsz double @llvm.fma.f64(double %a, double %b, double %c)
+  %neg = fneg nsz double %1
+  ret double %neg
+}
+
+define double @fnmsub_d(double %a, double %b, double %c) nounwind {
+; RV32IFD-LABEL: fnmsub_d:
+; RV32IFD:       # %bb.0:
+; RV32IFD-NEXT:    addi sp, sp, -16
+; RV32IFD-NEXT:    sw zero, 8(sp)
+; RV32IFD-NEXT:    sw zero, 12(sp)
+; RV32IFD-NEXT:    fld fa5, 8(sp)
+; RV32IFD-NEXT:    fadd.d fa5, fa0, fa5
+; RV32IFD-NEXT:    fnmsub.d fa0, fa5, fa1, fa2
+; RV32IFD-NEXT:    addi sp, sp, 16
+; RV32IFD-NEXT:    ret
+;
+; RV64IFD-LABEL: fnmsub_d:
+; RV64IFD:       # %bb.0:
+; RV64IFD-NEXT:    fmv.d.x fa5, zero
+; RV64IFD-NEXT:    fadd.d fa5, fa0, fa5
+; RV64IFD-NEXT:    fnmsub.d fa0, fa5, fa1, fa2
+; RV64IFD-NEXT:    ret
+;
+; RV32I-LABEL: fnmsub_d:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -32
+; RV32I-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    mv s0, a2
+; RV32I-NEXT:    mv s1, a3
+; RV32I-NEXT:    mv s2, a4
+; RV32I-NEXT:    lui a2, %hi(.LCPI16_0)
+; RV32I-NEXT:    addi a3, a2, %lo(.LCPI16_0)
+; RV32I-NEXT:    lw a2, 0(a3)
+; RV32I-NEXT:    lw a3, 4(a3)
+; RV32I-NEXT:    mv s3, a5
+; RV32I-NEXT:    call __adddf3
+; RV32I-NEXT:    lui a2, 524288
+; RV32I-NEXT:    xor a1, a1, a2
+; RV32I-NEXT:    mv a2, s0
+; RV32I-NEXT:    mv a3, s1
+; RV32I-NEXT:    mv a4, s2
+; RV32I-NEXT:    mv a5, s3
+; RV32I-NEXT:    call fma
+; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 32
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: fnmsub_d:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -32
+; RV64I-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    mv s0, a1
+; RV64I-NEXT:    lui a1, %hi(.LCPI16_0)
+; RV64I-NEXT:    ld a1, %lo(.LCPI16_0)(a1)
+; RV64I-NEXT:    mv s1, a2
+; RV64I-NEXT:    call __adddf3
+; RV64I-NEXT:    li a1, -1
+; RV64I-NEXT:    slli a1, a1, 63
+; RV64I-NEXT:    xor a0, a0, a1
+; RV64I-NEXT:    mv a1, s0
+; RV64I-NEXT:    mv a2, s1
+; RV64I-NEXT:    call fma
+; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 32
+; RV64I-NEXT:    ret
+  %a_ = fadd double 0.0, %a
+  %nega = fneg double %a_
+  %1 = call double @llvm.fma.f64(double %nega, double %b, double %c)
+  ret double %1
+}
+
+define double @fnmsub_d_2(double %a, double %b, double %c) nounwind {
+; RV32IFD-LABEL: fnmsub_d_2:
+; RV32IFD:       # %bb.0:
+; RV32IFD-NEXT:    addi sp, sp, -16
+; RV32IFD-NEXT:    sw zero, 8(sp)
+; RV32IFD-NEXT:    sw zero, 12(sp)
+; RV32IFD-NEXT:    fld fa5, 8(sp)
+; RV32IFD-NEXT:    fadd.d fa5, fa1, fa5
+; RV32IFD-NEXT:    fnmsub.d fa0, fa5, fa0, fa2
+; RV32IFD-NEXT:    addi sp, sp, 16
+; RV32IFD-NEXT:    ret
+;
+; RV64IFD-LABEL: fnmsub_d_2:
+; RV64IFD:       # %bb.0:
+; RV64IFD-NEXT:    fmv.d.x fa5, zero
+; RV64IFD-NEXT:    fadd.d fa5, fa1, fa5
+; RV64IFD-NEXT:    fnmsub.d fa0, fa5, fa0, fa2
+; RV64IFD-NEXT:    ret
+;
+; RV32I-LABEL: fnmsub_d_2:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -32
+; RV32I-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    mv s0, a0
+; RV32I-NEXT:    mv s1, a1
+; RV32I-NEXT:    mv a0, a2
+; RV32I-NEXT:    mv a1, a3
+; RV32I-NEXT:    mv s2, a4
+; RV32I-NEXT:    lui a2, %hi(.LCPI17_0)
+; RV32I-NEXT:    addi a3, a2, %lo(.LCPI17_0)
+; RV32I-NEXT:    lw a2, 0(a3)
+; RV32I-NEXT:    lw a3, 4(a3)
+; RV32I-NEXT:    mv s3, a5
+; RV32I-NEXT:    call __adddf3
+; RV32I-NEXT:    mv a2, a0
+; RV32I-NEXT:    lui a3, 524288
+; RV32I-NEXT:    xor a3, a1, a3
+; RV32I-NEXT:    mv a0, s0
+; RV32I-NEXT:    mv a1, s1
+; RV32I-NEXT:    mv a4, s2
+; RV32I-NEXT:    mv a5, s3
+; RV32I-NEXT:    call fma
+; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 32
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: fnmsub_d_2:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -32
+; RV64I-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    mv s0, a0
+; RV64I-NEXT:    mv a0, a1
+; RV64I-NEXT:    lui a1, %hi(.LCPI17_0)
+; RV64I-NEXT:    ld a1, %lo(.LCPI17_0)(a1)
+; RV64I-NEXT:    mv s1, a2
+; RV64I-NEXT:    call __adddf3
+; RV64I-NEXT:    li a1, -1
+; RV64I-NEXT:    slli a1, a1, 63
+; RV64I-NEXT:    xor a1, a0, a1
+; RV64I-NEXT:    mv a0, s0
+; RV64I-NEXT:    mv a2, s1
+; RV64I-NEXT:    call fma
+; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 32
+; RV64I-NEXT:    ret
+  %b_ = fadd double 0.0, %b
+  %negb = fneg double %b_
+  %1 = call double @llvm.fma.f64(double %a, double %negb, double %c)
+  ret double %1
+}
+
+define double @fmadd_d_contract(double %a, double %b, double %c) nounwind {
+; CHECKIFD-LABEL: fmadd_d_contract:
+; CHECKIFD:       # %bb.0:
+; CHECKIFD-NEXT:    fmadd.d fa0, fa0, fa1, fa2
+; CHECKIFD-NEXT:    ret
+;
+; RV32I-LABEL: fmadd_d_contract:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    mv s0, a4
+; RV32I-NEXT:    mv s1, a5
+; RV32I-NEXT:    call __muldf3
+; RV32I-NEXT:    mv a2, s0
+; RV32I-NEXT:    mv a3, s1
+; RV32I-NEXT:    call __adddf3
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: fmadd_d_contract:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s0, 0(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    mv s0, a2
+; RV64I-NEXT:    call __muldf3
+; RV64I-NEXT:    mv a1, s0
+; RV64I-NEXT:    call __adddf3
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+  %1 = fmul contract double %a, %b
+  %2 = fadd contract double %1, %c
+  ret double %2
+}
+
+define double @fmsub_d_contract(double %a, double %b, double %c) nounwind {
+; RV32IFD-LABEL: fmsub_d_contract:
+; RV32IFD:       # %bb.0:
+; RV32IFD-NEXT:    addi sp, sp, -16
+; RV32IFD-NEXT:    sw zero, 8(sp)
+; RV32IFD-NEXT:    sw zero, 12(sp)
+; RV32IFD-NEXT:    fld fa5, 8(sp)
+; RV32IFD-NEXT:    fadd.d fa5, fa2, fa5
+; RV32IFD-NEXT:    fmul.d fa4, fa0, fa1
+; RV32IFD-NEXT:    fsub.d fa0, fa4, fa5
+; RV32IFD-NEXT:    addi sp, sp, 16
+; RV32IFD-NEXT:    ret
+;
+; RV64IFD-LABEL: fmsub_d_contract:
+; RV64IFD:       # %bb.0:
+; RV64IFD-NEXT:    fmv.d.x fa5, zero
+; RV64IFD-NEXT:    fadd.d fa5, fa2, fa5
+; RV64IFD-NEXT:    fmul.d fa4, fa0, fa1
+; RV64IFD-NEXT:    fsub.d fa0, fa4, fa5
+; RV64IFD-NEXT:    ret
+;
+; RV32I-LABEL: fmsub_d_contract:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -32
+; RV32I-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    mv s0, a0
+; RV32I-NEXT:    mv s1, a1
+; RV32I-NEXT:    mv s2, a2
+; RV32I-NEXT:    mv s3, a3
+; RV32I-NEXT:    mv a0, a4
+; RV32I-NEXT:    lui a1, %hi(.LCPI19_0)
+; RV32I-NEXT:    addi a1, a1, %lo(.LCPI19_0)
+; RV32I-NEXT:    lw a2, 0(a1)
+; RV32I-NEXT:    lw a3, 4(a1)
+; RV32I-NEXT:    mv a1, a5
+; RV32I-NEXT:    call __adddf3
+; RV32I-NEXT:    mv s4, a0
+; RV32I-NEXT:    mv s5, a1
+; RV32I-NEXT:    mv a0, s0
+; RV32I-NEXT:    mv a1, s1
+; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    mv a3, s3
+; RV32I-NEXT:    call __muldf3
+; RV32I-NEXT:    mv a2, s4
+; RV32I-NEXT:    mv a3, s5
+; RV32I-NEXT:    call __subdf3
+; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 32
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: fmsub_d_contract:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -32
+; RV64I-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s2, 0(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    mv s0, a0
+; RV64I-NEXT:    mv s1, a1
+; RV64I-NEXT:    lui a0, %hi(.LCPI19_0)
+; RV64I-NEXT:    ld a1, %lo(.LCPI19_0)(a0)
+; RV64I-NEXT:    mv a0, a2
+; RV64I-NEXT:    call __adddf3
+; RV64I-NEXT:    mv s2, a0
+; RV64I-NEXT:    mv a0, s0
+; RV64I-NEXT:    mv a1, s1
+; RV64I-NEXT:    call __muldf3
+; RV64I-NEXT:    mv a1, s2
+; RV64I-NEXT:    call __subdf3
+; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 32
+; RV64I-NEXT:    ret
+  %c_ = fadd double 0.0, %c ; avoid negation using xor
+  %1 = fmul contract double %a, %b
+  %2 = fsub contract double %1, %c_
+  ret double %2
+}
+
+define double @fnmadd_d_contract(double %a, double %b, double %c) nounwind {
+; RV32IFD-LABEL: fnmadd_d_contract:
+; RV32IFD:       # %bb.0:
+; RV32IFD-NEXT:    addi sp, sp, -16
+; RV32IFD-NEXT:    sw zero, 8(sp)
+; RV32IFD-NEXT:    sw zero, 12(sp)
+; RV32IFD-NEXT:    fld fa5, 8(sp)
+; RV32IFD-NEXT:    fadd.d fa4, fa0, fa5
+; RV32IFD-NEXT:    fadd.d fa3, fa1, fa5
+; RV32IFD-NEXT:    fadd.d fa5, fa2, fa5
+; RV32IFD-NEXT:    fmul.d fa4, fa4, fa3
+; RV32IFD-NEXT:    fneg.d fa4, fa4
+; RV32IFD-NEXT:    fsub.d fa0, fa4, fa5
+; RV32IFD-NEXT:    addi sp, sp, 16
+; RV32IFD-NEXT:    ret
+;
+; RV64IFD-LABEL: fnmadd_d_contract:
+; RV64IFD:       # %bb.0:
+; RV64IFD-NEXT:    fmv.d.x fa5, zero
+; RV64IFD-NEXT:    fadd.d fa4, fa0, fa5
+; RV64IFD-NEXT:    fadd.d fa3, fa1, fa5
+; RV64IFD-NEXT:    fadd.d fa5, fa2, fa5
+; RV64IFD-NEXT:    fmul.d fa4, fa4, fa3
+; RV64IFD-NEXT:    fneg.d fa4, fa4
+; RV64IFD-NEXT:    fsub.d fa0, fa4, fa5
+; RV64IFD-NEXT:    ret
+;
+; RV32I-LABEL: fnmadd_d_contract:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -48
+; RV32I-NEXT:    sw ra, 44(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s0, 40(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s1, 36(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s2, 32(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s3, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s4, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s5, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s6, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s7, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    mv s0, a2
+; RV32I-NEXT:    mv s1, a3
+; RV32I-NEXT:    mv s2, a4
+; RV32I-NEXT:    lui a2, %hi(.LCPI20_0)
+; RV32I-NEXT:    addi a2, a2, %lo(.LCPI20_0)
+; RV32I-NEXT:    lw s3, 0(a2)
+; RV32I-NEXT:    lw s4, 4(a2)
+; RV32I-NEXT:    mv s5, a5
+; RV32I-NEXT:    mv a2, s3
+; RV32I-NEXT:    mv a3, s4
+; RV32I-NEXT:    call __adddf3
+; RV32I-NEXT:    mv s6, a0
+; RV32I-NEXT:    mv s7, a1
+; RV32I-NEXT:    mv a0, s0
+; RV32I-NEXT:    mv a1, s1
+; RV32I-NEXT:    mv a2, s3
+; RV32I-NEXT:    mv a3, s4
+; RV32I-NEXT:    call __adddf3
+; RV32I-NEXT:    mv s0, a0
+; RV32I-NEXT:    mv s1, a1
+; RV32I-NEXT:    mv a0, s2
+; RV32I-NEXT:    mv a1, s5
+; RV32I-NEXT:    mv a2, s3
+; RV32I-NEXT:    mv a3, s4
+; RV32I-NEXT:    call __adddf3
+; RV32I-NEXT:    mv s2, a0
+; RV32I-NEXT:    mv s3, a1
+; RV32I-NEXT:    mv a0, s6
+; RV32I-NEXT:    mv a1, s7
+; RV32I-NEXT:    mv a2, s0
+; RV32I-NEXT:    mv a3, s1
+; RV32I-NEXT:    call __muldf3
+; RV32I-NEXT:    lui a2, 524288
+; RV32I-NEXT:    xor a1, a1, a2
+; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    mv a3, s3
+; RV32I-NEXT:    call __subdf3
+; RV32I-NEXT:    lw ra, 44(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 40(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 36(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 32(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s4, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s5, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s6, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s7, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 48
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: fnmadd_d_contract:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -48
+; RV64I-NEXT:    sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    mv s0, a1
+; RV64I-NEXT:    lui a1, %hi(.LCPI20_0)
+; RV64I-NEXT:    ld s1, %lo(.LCPI20_0)(a1)
+; RV64I-NEXT:    mv s2, a2
+; RV64I-NEXT:    mv a1, s1
+; RV64I-NEXT:    call __adddf3
+; RV64I-NEXT:    mv s3, a0
+; RV64I-NEXT:    mv a0, s0
+; RV64I-NEXT:    mv a1, s1
+; RV64I-NEXT:    call __adddf3
+; RV64I-NEXT:    mv s0, a0
+; RV64I-NEXT:    mv a0, s2
+; RV64I-NEXT:    mv a1, s1
+; RV64I-NEXT:    call __adddf3
+; RV64I-NEXT:    mv s1, a0
+; RV64I-NEXT:    mv a0, s3
+; RV64I-NEXT:    mv a1, s0
+; RV64I-NEXT:    call __muldf3
+; RV64I-NEXT:    li a1, -1
+; RV64I-NEXT:    slli a1, a1, 63
+; RV64I-NEXT:    xor a0, a0, a1
+; RV64I-NEXT:    mv a1, s1
+; RV64I-NEXT:    call __subdf3
+; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 48
+; RV64I-NEXT:    ret
+  %a_ = fadd double 0.0, %a ; avoid negation using xor
+  %b_ = fadd double 0.0, %b ; avoid negation using xor
+  %c_ = fadd double 0.0, %c ; avoid negation using xor
+  %1 = fmul contract double %a_, %b_
+  %2 = fneg double %1
+  %3 = fsub contract double %2, %c_
+  ret double %3
+}
+
+define double @fnmsub_d_contract(double %a, double %b, double %c) nounwind {
+; RV32IFD-LABEL: fnmsub_d_contract:
+; RV32IFD:       # %bb.0:
+; RV32IFD-NEXT:    addi sp, sp, -16
+; RV32IFD-NEXT:    sw zero, 8(sp)
+; RV32IFD-NEXT:    sw zero, 12(sp)
+; RV32IFD-NEXT:    fld fa5, 8(sp)
+; RV32IFD-NEXT:    fadd.d fa4, fa0, fa5
+; RV32IFD-NEXT:    fadd.d fa5, fa1, fa5
+; RV32IFD-NEXT:    fnmsub.d fa0, fa4, fa5, fa2
+; RV32IFD-NEXT:    addi sp, sp, 16
+; RV32IFD-NEXT:    ret
+;
+; RV64IFD-LABEL: fnmsub_d_contract:
+; RV64IFD:       # %bb.0:
+; RV64IFD-NEXT:    fmv.d.x fa5, zero
+; RV64IFD-NEXT:    fadd.d fa4, fa0, fa5
+; RV64IFD-NEXT:    fadd.d fa5, fa1, fa5
+; RV64IFD-NEXT:    fnmsub.d fa0, fa4, fa5, fa2
+; RV64IFD-NEXT:    ret
+;
+; RV32I-LABEL: fnmsub_d_contract:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -48
+; RV32I-NEXT:    sw ra, 44(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s0, 40(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s1, 36(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s2, 32(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s3, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s4, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s5, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s6, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s7, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    mv s0, a2
+; RV32I-NEXT:    mv s1, a3
+; RV32I-NEXT:    mv s2, a4
+; RV32I-NEXT:    lui a2, %hi(.LCPI21_0)
+; RV32I-NEXT:    addi a2, a2, %lo(.LCPI21_0)
+; RV32I-NEXT:    lw s3, 0(a2)
+; RV32I-NEXT:    lw s4, 4(a2)
+; RV32I-NEXT:    mv s5, a5
+; RV32I-NEXT:    mv a2, s3
+; RV32I-NEXT:    mv a3, s4
+; RV32I-NEXT:    call __adddf3
+; RV32I-NEXT:    mv s6, a0
+; RV32I-NEXT:    mv s7, a1
+; RV32I-NEXT:    mv a0, s0
+; RV32I-NEXT:    mv a1, s1
+; RV32I-NEXT:    mv a2, s3
+; RV32I-NEXT:    mv a3, s4
+; RV32I-NEXT:    call __adddf3
+; RV32I-NEXT:    mv a2, a0
+; RV32I-NEXT:    mv a3, a1
+; RV32I-NEXT:    mv a0, s6
+; RV32I-NEXT:    mv a1, s7
+; RV32I-NEXT:    call __muldf3
+; RV32I-NEXT:    mv a2, a0
+; RV32I-NEXT:    mv a3, a1
+; RV32I-NEXT:    mv a0, s2
+; RV32I-NEXT:    mv a1, s5
+; RV32I-NEXT:    call __subdf3
+; RV32I-NEXT:    lw ra, 44(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 40(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 36(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 32(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s4, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s5, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s6, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s7, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 48
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: fnmsub_d_contract:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -48
+; RV64I-NEXT:    sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    mv s0, a1
+; RV64I-NEXT:    lui a1, %hi(.LCPI21_0)
+; RV64I-NEXT:    ld s1, %lo(.LCPI21_0)(a1)
+; RV64I-NEXT:    mv s2, a2
+; RV64I-NEXT:    mv a1, s1
+; RV64I-NEXT:    call __adddf3
+; RV64I-NEXT:    mv s3, a0
+; RV64I-NEXT:    mv a0, s0
+; RV64I-NEXT:    mv a1, s1
+; RV64I-NEXT:    call __adddf3
+; RV64I-NEXT:    mv a1, a0
+; RV64I-NEXT:    mv a0, s3
+; RV64I-NEXT:    call __muldf3
+; RV64I-NEXT:    mv a1, a0
+; RV64I-NEXT:    mv a0, s2
+; RV64I-NEXT:    call __subdf3
+; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 48
+; RV64I-NEXT:    ret
+  %a_ = fadd double 0.0, %a ; avoid negation using xor
+  %b_ = fadd double 0.0, %b ; avoid negation using xor
+  %1 = fmul contract double %a_, %b_
+  %2 = fsub contract double %c, %1
+  ret double %2
+}
+
+define double @fsgnjx_f64(double %x, double %y) nounwind {
+; RV32IFD-LABEL: fsgnjx_f64:
+; RV32IFD:       # %bb.0:
+; RV32IFD-NEXT:    addi sp, sp, -16
+; RV32IFD-NEXT:    lui a0, 261888
+; RV32IFD-NEXT:    sw zero, 8(sp)
+; RV32IFD-NEXT:    sw a0, 12(sp)
+; RV32IFD-NEXT:    fld fa5, 8(sp)
+; RV32IFD-NEXT:    fsgnj.d fa5, fa5, fa0
+; RV32IFD-NEXT:    fmul.d fa0, fa5, fa1
+; RV32IFD-NEXT:    addi sp, sp, 16
+; RV32IFD-NEXT:    ret
+;
+; RV64IFD-LABEL: fsgnjx_f64:
+; RV64IFD:       # %bb.0:
+; RV64IFD-NEXT:    li a0, 1023
+; RV64IFD-NEXT:    slli a0, a0, 52
+; RV64IFD-NEXT:    fmv.d.x fa5, a0
+; RV64IFD-NEXT:    fsgnj.d fa5, fa5, fa0
+; RV64IFD-NEXT:    fmul.d fa0, fa5, fa1
+; RV64IFD-NEXT:    ret
+;
+; RV32I-LABEL: fsgnjx_f64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    lui a0, 524288
+; RV32I-NEXT:    lui a4, 261888
+; RV32I-NEXT:    and a0, a1, a0
+; RV32I-NEXT:    or a1, a0, a4
+; RV32I-NEXT:    li a0, 0
+; RV32I-NEXT:    call __muldf3
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: fsgnjx_f64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    li a2, -1
+; RV64I-NEXT:    li a3, 1023
+; RV64I-NEXT:    slli a2, a2, 63
+; RV64I-NEXT:    slli a3, a3, 52
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    or a0, a0, a3
+; RV64I-NEXT:    call __muldf3
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+  %z = call double @llvm.copysign.f64(double 1.0, double %x)
+  %mul = fmul double %z, %y
+  ret double %mul
+}

diff  --git a/llvm/test/CodeGen/RISCV/GlobalISel/float-arith.ll b/llvm/test/CodeGen/RISCV/GlobalISel/float-arith.ll
new file mode 100644
index 00000000000000..0d210890c41f9a
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/float-arith.ll
@@ -0,0 +1,1100 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -global-isel -mattr=+f -verify-machineinstrs < %s \
+; RUN:   -target-abi=ilp32f | FileCheck -check-prefix=CHECKIF %s
+; RUN: llc -mtriple=riscv64 -global-isel -mattr=+f -verify-machineinstrs < %s \
+; RUN:   -target-abi=lp64f | FileCheck -check-prefix=CHECKIF %s
+; RUN: llc -mtriple=riscv32 -global-isel -verify-machineinstrs < %s \
+; RUN:   | FileCheck -check-prefix=RV32I %s
+; RUN: llc -mtriple=riscv64 -global-isel -verify-machineinstrs < %s \
+; RUN:   | FileCheck -check-prefix=RV64I %s
+
+; These tests are each targeted at a particular RISC-V FPU instruction.
+; Compares and conversions can be found in float-fcmp.ll and float-convert.ll
+; respectively. Some other float-*.ll files in this folder exercise LLVM IR
+; instructions that don't directly match a RISC-V instruction.
+
+define float @fadd_s(float %a, float %b) nounwind {
+; CHECKIF-LABEL: fadd_s:
+; CHECKIF:       # %bb.0:
+; CHECKIF-NEXT:    fadd.s fa0, fa0, fa1
+; CHECKIF-NEXT:    ret
+;
+; RV32I-LABEL: fadd_s:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    call __addsf3
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: fadd_s:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    call __addsf3
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+  %1 = fadd float %a, %b
+  ret float %1
+}
+
+define float @fsub_s(float %a, float %b) nounwind {
+; CHECKIF-LABEL: fsub_s:
+; CHECKIF:       # %bb.0:
+; CHECKIF-NEXT:    fsub.s fa0, fa0, fa1
+; CHECKIF-NEXT:    ret
+;
+; RV32I-LABEL: fsub_s:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    call __subsf3
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: fsub_s:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    call __subsf3
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+  %1 = fsub float %a, %b
+  ret float %1
+}
+
+define float @fmul_s(float %a, float %b) nounwind {
+; CHECKIF-LABEL: fmul_s:
+; CHECKIF:       # %bb.0:
+; CHECKIF-NEXT:    fmul.s fa0, fa0, fa1
+; CHECKIF-NEXT:    ret
+;
+; RV32I-LABEL: fmul_s:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    call __mulsf3
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: fmul_s:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    call __mulsf3
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+  %1 = fmul float %a, %b
+  ret float %1
+}
+
+define float @fdiv_s(float %a, float %b) nounwind {
+; CHECKIF-LABEL: fdiv_s:
+; CHECKIF:       # %bb.0:
+; CHECKIF-NEXT:    fdiv.s fa0, fa0, fa1
+; CHECKIF-NEXT:    ret
+;
+; RV32I-LABEL: fdiv_s:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    call __divsf3
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: fdiv_s:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    call __divsf3
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+  %1 = fdiv float %a, %b
+  ret float %1
+}
+
+declare float @llvm.sqrt.f32(float)
+
+define float @fsqrt_s(float %a) nounwind {
+; CHECKIF-LABEL: fsqrt_s:
+; CHECKIF:       # %bb.0:
+; CHECKIF-NEXT:    fsqrt.s fa0, fa0
+; CHECKIF-NEXT:    ret
+;
+; RV32I-LABEL: fsqrt_s:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    call sqrtf
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: fsqrt_s:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    call sqrtf
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+  %1 = call float @llvm.sqrt.f32(float %a)
+  ret float %1
+}
+
+declare float @llvm.copysign.f32(float, float)
+
+define float @fsgnj_s(float %a, float %b) nounwind {
+; CHECKIF-LABEL: fsgnj_s:
+; CHECKIF:       # %bb.0:
+; CHECKIF-NEXT:    fsgnj.s fa0, fa0, fa1
+; CHECKIF-NEXT:    ret
+;
+; RV32I-LABEL: fsgnj_s:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    lui a2, 524288
+; RV32I-NEXT:    addi a3, a2, -1
+; RV32I-NEXT:    and a0, a0, a3
+; RV32I-NEXT:    and a1, a1, a2
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: fsgnj_s:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a2, 524288
+; RV64I-NEXT:    addiw a3, a2, -1
+; RV64I-NEXT:    and a0, a0, a3
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    ret
+  %1 = call float @llvm.copysign.f32(float %a, float %b)
+  ret float %1
+}
+
+define float @fsgnjn_s(float %a, float %b) nounwind {
+; CHECKIF-LABEL: fsgnjn_s:
+; CHECKIF:       # %bb.0:
+; CHECKIF-NEXT:    fadd.s fa5, fa0, fa1
+; CHECKIF-NEXT:    fneg.s fa5, fa5
+; CHECKIF-NEXT:    fsgnj.s fa0, fa0, fa5
+; CHECKIF-NEXT:    ret
+;
+; RV32I-LABEL: fsgnjn_s:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    mv s0, a0
+; RV32I-NEXT:    call __addsf3
+; RV32I-NEXT:    lui a1, 524288
+; RV32I-NEXT:    xor a0, a0, a1
+; RV32I-NEXT:    addi a2, a1, -1
+; RV32I-NEXT:    and a2, s0, a2
+; RV32I-NEXT:    and a0, a0, a1
+; RV32I-NEXT:    or a0, a2, a0
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: fsgnjn_s:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s0, 0(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    mv s0, a0
+; RV64I-NEXT:    call __addsf3
+; RV64I-NEXT:    lui a1, 524288
+; RV64I-NEXT:    xor a0, a0, a1
+; RV64I-NEXT:    addiw a2, a1, -1
+; RV64I-NEXT:    and a2, s0, a2
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    or a0, a2, a0
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+  %1 = fadd float %a, %b
+  %2 = fneg float %1
+  %3 = call float @llvm.copysign.f32(float %a, float %2)
+  ret float %3
+}
+
+declare float @llvm.fabs.f32(float)
+
+define float @fabs_s(float %a, float %b) nounwind {
+; CHECKIF-LABEL: fabs_s:
+; CHECKIF:       # %bb.0:
+; CHECKIF-NEXT:    fadd.s fa5, fa0, fa1
+; CHECKIF-NEXT:    fabs.s fa4, fa5
+; CHECKIF-NEXT:    fadd.s fa0, fa4, fa5
+; CHECKIF-NEXT:    ret
+;
+; RV32I-LABEL: fabs_s:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    call __addsf3
+; RV32I-NEXT:    mv a1, a0
+; RV32I-NEXT:    lui a0, 524288
+; RV32I-NEXT:    addi a0, a0, -1
+; RV32I-NEXT:    and a0, a1, a0
+; RV32I-NEXT:    call __addsf3
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: fabs_s:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    call __addsf3
+; RV64I-NEXT:    mv a1, a0
+; RV64I-NEXT:    lui a0, 524288
+; RV64I-NEXT:    addiw a0, a0, -1
+; RV64I-NEXT:    and a0, a1, a0
+; RV64I-NEXT:    call __addsf3
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+  %1 = fadd float %a, %b
+  %2 = call float @llvm.fabs.f32(float %1)
+  %3 = fadd float %2, %1
+  ret float %3
+}
+
+declare float @llvm.minnum.f32(float, float)
+
+define float @fmin_s(float %a, float %b) nounwind {
+; CHECKIF-LABEL: fmin_s:
+; CHECKIF:       # %bb.0:
+; CHECKIF-NEXT:    fmin.s fa0, fa0, fa1
+; CHECKIF-NEXT:    ret
+;
+; RV32I-LABEL: fmin_s:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    call fminf
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: fmin_s:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    call fminf
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+  %1 = call float @llvm.minnum.f32(float %a, float %b)
+  ret float %1
+}
+
+declare float @llvm.maxnum.f32(float, float)
+
+define float @fmax_s(float %a, float %b) nounwind {
+; CHECKIF-LABEL: fmax_s:
+; CHECKIF:       # %bb.0:
+; CHECKIF-NEXT:    fmax.s fa0, fa0, fa1
+; CHECKIF-NEXT:    ret
+;
+; RV32I-LABEL: fmax_s:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    call fmaxf
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: fmax_s:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    call fmaxf
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+  %1 = call float @llvm.maxnum.f32(float %a, float %b)
+  ret float %1
+}
+
+declare float @llvm.fma.f32(float, float, float)
+
+define float @fmadd_s(float %a, float %b, float %c) nounwind {
+; CHECKIF-LABEL: fmadd_s:
+; CHECKIF:       # %bb.0:
+; CHECKIF-NEXT:    fmadd.s fa0, fa0, fa1, fa2
+; CHECKIF-NEXT:    ret
+;
+; RV32I-LABEL: fmadd_s:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    call fmaf
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: fmadd_s:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    call fmaf
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+  %1 = call float @llvm.fma.f32(float %a, float %b, float %c)
+  ret float %1
+}
+
+define float @fmsub_s(float %a, float %b, float %c) nounwind {
+; CHECKIF-LABEL: fmsub_s:
+; CHECKIF:       # %bb.0:
+; CHECKIF-NEXT:    fmv.w.x fa5, zero
+; CHECKIF-NEXT:    fadd.s fa5, fa2, fa5
+; CHECKIF-NEXT:    fmsub.s fa0, fa0, fa1, fa5
+; CHECKIF-NEXT:    ret
+;
+; RV32I-LABEL: fmsub_s:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    mv s0, a0
+; RV32I-NEXT:    mv s1, a1
+; RV32I-NEXT:    lui a0, %hi(.LCPI11_0)
+; RV32I-NEXT:    lw a1, %lo(.LCPI11_0)(a0)
+; RV32I-NEXT:    mv a0, a2
+; RV32I-NEXT:    call __addsf3
+; RV32I-NEXT:    lui a2, 524288
+; RV32I-NEXT:    xor a2, a0, a2
+; RV32I-NEXT:    mv a0, s0
+; RV32I-NEXT:    mv a1, s1
+; RV32I-NEXT:    call fmaf
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: fmsub_s:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -32
+; RV64I-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    mv s0, a0
+; RV64I-NEXT:    mv s1, a1
+; RV64I-NEXT:    lui a0, %hi(.LCPI11_0)
+; RV64I-NEXT:    lw a1, %lo(.LCPI11_0)(a0)
+; RV64I-NEXT:    mv a0, a2
+; RV64I-NEXT:    call __addsf3
+; RV64I-NEXT:    lui a2, 524288
+; RV64I-NEXT:    xor a2, a0, a2
+; RV64I-NEXT:    mv a0, s0
+; RV64I-NEXT:    mv a1, s1
+; RV64I-NEXT:    call fmaf
+; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 32
+; RV64I-NEXT:    ret
+  %c_ = fadd float 0.0, %c ; avoid negation using xor
+  %negc = fneg float %c_
+  %1 = call float @llvm.fma.f32(float %a, float %b, float %negc)
+  ret float %1
+}
+
+define float @fnmadd_s(float %a, float %b, float %c) nounwind {
+; CHECKIF-LABEL: fnmadd_s:
+; CHECKIF:       # %bb.0:
+; CHECKIF-NEXT:    fmv.w.x fa5, zero
+; CHECKIF-NEXT:    fadd.s fa4, fa0, fa5
+; CHECKIF-NEXT:    fadd.s fa5, fa2, fa5
+; CHECKIF-NEXT:    fnmadd.s fa0, fa4, fa1, fa5
+; CHECKIF-NEXT:    ret
+;
+; RV32I-LABEL: fnmadd_s:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -32
+; RV32I-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    mv s0, a1
+; RV32I-NEXT:    lui a1, %hi(.LCPI12_0)
+; RV32I-NEXT:    lw s1, %lo(.LCPI12_0)(a1)
+; RV32I-NEXT:    mv s2, a2
+; RV32I-NEXT:    mv a1, s1
+; RV32I-NEXT:    call __addsf3
+; RV32I-NEXT:    mv s3, a0
+; RV32I-NEXT:    mv a0, s2
+; RV32I-NEXT:    mv a1, s1
+; RV32I-NEXT:    call __addsf3
+; RV32I-NEXT:    lui a2, 524288
+; RV32I-NEXT:    xor a1, s3, a2
+; RV32I-NEXT:    xor a2, a0, a2
+; RV32I-NEXT:    mv a0, a1
+; RV32I-NEXT:    mv a1, s0
+; RV32I-NEXT:    call fmaf
+; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 32
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: fnmadd_s:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -48
+; RV64I-NEXT:    sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    mv s0, a1
+; RV64I-NEXT:    lui a1, %hi(.LCPI12_0)
+; RV64I-NEXT:    lw s1, %lo(.LCPI12_0)(a1)
+; RV64I-NEXT:    mv s2, a2
+; RV64I-NEXT:    mv a1, s1
+; RV64I-NEXT:    call __addsf3
+; RV64I-NEXT:    mv s3, a0
+; RV64I-NEXT:    mv a0, s2
+; RV64I-NEXT:    mv a1, s1
+; RV64I-NEXT:    call __addsf3
+; RV64I-NEXT:    lui a2, 524288
+; RV64I-NEXT:    xor a1, s3, a2
+; RV64I-NEXT:    xor a2, a0, a2
+; RV64I-NEXT:    mv a0, a1
+; RV64I-NEXT:    mv a1, s0
+; RV64I-NEXT:    call fmaf
+; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 48
+; RV64I-NEXT:    ret
+  %a_ = fadd float 0.0, %a
+  %c_ = fadd float 0.0, %c
+  %nega = fneg float %a_
+  %negc = fneg float %c_
+  %1 = call float @llvm.fma.f32(float %nega, float %b, float %negc)
+  ret float %1
+}
+
+define float @fnmadd_s_2(float %a, float %b, float %c) nounwind {
+; CHECKIF-LABEL: fnmadd_s_2:
+; CHECKIF:       # %bb.0:
+; CHECKIF-NEXT:    fmv.w.x fa5, zero
+; CHECKIF-NEXT:    fadd.s fa4, fa1, fa5
+; CHECKIF-NEXT:    fadd.s fa5, fa2, fa5
+; CHECKIF-NEXT:    fnmadd.s fa0, fa4, fa0, fa5
+; CHECKIF-NEXT:    ret
+;
+; RV32I-LABEL: fnmadd_s_2:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -32
+; RV32I-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    mv s0, a0
+; RV32I-NEXT:    mv a0, a1
+; RV32I-NEXT:    lui a1, %hi(.LCPI13_0)
+; RV32I-NEXT:    lw s1, %lo(.LCPI13_0)(a1)
+; RV32I-NEXT:    mv s2, a2
+; RV32I-NEXT:    mv a1, s1
+; RV32I-NEXT:    call __addsf3
+; RV32I-NEXT:    mv s3, a0
+; RV32I-NEXT:    mv a0, s2
+; RV32I-NEXT:    mv a1, s1
+; RV32I-NEXT:    call __addsf3
+; RV32I-NEXT:    lui a2, 524288
+; RV32I-NEXT:    xor a1, s3, a2
+; RV32I-NEXT:    xor a2, a0, a2
+; RV32I-NEXT:    mv a0, s0
+; RV32I-NEXT:    call fmaf
+; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 32
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: fnmadd_s_2:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -48
+; RV64I-NEXT:    sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    mv s0, a0
+; RV64I-NEXT:    mv a0, a1
+; RV64I-NEXT:    lui a1, %hi(.LCPI13_0)
+; RV64I-NEXT:    lw s1, %lo(.LCPI13_0)(a1)
+; RV64I-NEXT:    mv s2, a2
+; RV64I-NEXT:    mv a1, s1
+; RV64I-NEXT:    call __addsf3
+; RV64I-NEXT:    mv s3, a0
+; RV64I-NEXT:    mv a0, s2
+; RV64I-NEXT:    mv a1, s1
+; RV64I-NEXT:    call __addsf3
+; RV64I-NEXT:    lui a2, 524288
+; RV64I-NEXT:    xor a1, s3, a2
+; RV64I-NEXT:    xor a2, a0, a2
+; RV64I-NEXT:    mv a0, s0
+; RV64I-NEXT:    call fmaf
+; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 48
+; RV64I-NEXT:    ret
+  %b_ = fadd float 0.0, %b
+  %c_ = fadd float 0.0, %c
+  %negb = fneg float %b_
+  %negc = fneg float %c_
+  %1 = call float @llvm.fma.f32(float %a, float %negb, float %negc)
+  ret float %1
+}
+
+define float @fnmadd_s_3(float %a, float %b, float %c) nounwind {
+; RV32IF-LABEL: fnmadd_s_3:
+; RV32IF:       # %bb.0:
+; RV32IF-NEXT:    fmadd.s ft0, fa0, fa1, fa2
+; RV32IF-NEXT:    fneg.s fa0, ft0
+; RV32IF-NEXT:    ret
+;
+; RV64IF-LABEL: fnmadd_s_3:
+; RV64IF:       # %bb.0:
+; RV64IF-NEXT:    fmadd.s ft0, fa0, fa1, fa2
+; RV64IF-NEXT:    fneg.s fa0, ft0
+; RV64IF-NEXT:    ret
+;
+; CHECKIF-LABEL: fnmadd_s_3:
+; CHECKIF:       # %bb.0:
+; CHECKIF-NEXT:    fmadd.s fa5, fa0, fa1, fa2
+; CHECKIF-NEXT:    fneg.s fa0, fa5
+; CHECKIF-NEXT:    ret
+;
+; RV32I-LABEL: fnmadd_s_3:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    call fmaf
+; RV32I-NEXT:    lui a1, 524288
+; RV32I-NEXT:    xor a0, a0, a1
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: fnmadd_s_3:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    call fmaf
+; RV64I-NEXT:    lui a1, 524288
+; RV64I-NEXT:    xor a0, a0, a1
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+  %1 = call float @llvm.fma.f32(float %a, float %b, float %c)
+  %neg = fneg float %1
+  ret float %neg
+}
+
+define float @fnmadd_nsz(float %a, float %b, float %c) nounwind {
+; RV32IF-LABEL: fnmadd_nsz:
+; RV32IF:       # %bb.0:
+; RV32IF-NEXT:    fnmadd.s fa0, fa0, fa1, fa2
+; RV32IF-NEXT:    ret
+;
+; RV64IF-LABEL: fnmadd_nsz:
+; RV64IF:       # %bb.0:
+; RV64IF-NEXT:    fnmadd.s fa0, fa0, fa1, fa2
+; RV64IF-NEXT:    ret
+;
+; CHECKIF-LABEL: fnmadd_nsz:
+; CHECKIF:       # %bb.0:
+; CHECKIF-NEXT:    fmadd.s fa5, fa0, fa1, fa2
+; CHECKIF-NEXT:    fneg.s fa0, fa5
+; CHECKIF-NEXT:    ret
+;
+; RV32I-LABEL: fnmadd_nsz:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    call fmaf
+; RV32I-NEXT:    lui a1, 524288
+; RV32I-NEXT:    xor a0, a0, a1
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: fnmadd_nsz:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    call fmaf
+; RV64I-NEXT:    lui a1, 524288
+; RV64I-NEXT:    xor a0, a0, a1
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+  %1 = call nsz float @llvm.fma.f32(float %a, float %b, float %c)
+  %neg = fneg nsz float %1
+  ret float %neg
+}
+
+define float @fnmsub_s(float %a, float %b, float %c) nounwind {
+; CHECKIF-LABEL: fnmsub_s:
+; CHECKIF:       # %bb.0:
+; CHECKIF-NEXT:    fmv.w.x fa5, zero
+; CHECKIF-NEXT:    fadd.s fa5, fa0, fa5
+; CHECKIF-NEXT:    fnmsub.s fa0, fa5, fa1, fa2
+; CHECKIF-NEXT:    ret
+;
+; RV32I-LABEL: fnmsub_s:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    mv s0, a1
+; RV32I-NEXT:    lui a1, %hi(.LCPI16_0)
+; RV32I-NEXT:    lw a1, %lo(.LCPI16_0)(a1)
+; RV32I-NEXT:    mv s1, a2
+; RV32I-NEXT:    call __addsf3
+; RV32I-NEXT:    lui a1, 524288
+; RV32I-NEXT:    xor a0, a0, a1
+; RV32I-NEXT:    mv a1, s0
+; RV32I-NEXT:    mv a2, s1
+; RV32I-NEXT:    call fmaf
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: fnmsub_s:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -32
+; RV64I-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    mv s0, a1
+; RV64I-NEXT:    lui a1, %hi(.LCPI16_0)
+; RV64I-NEXT:    lw a1, %lo(.LCPI16_0)(a1)
+; RV64I-NEXT:    mv s1, a2
+; RV64I-NEXT:    call __addsf3
+; RV64I-NEXT:    lui a1, 524288
+; RV64I-NEXT:    xor a0, a0, a1
+; RV64I-NEXT:    mv a1, s0
+; RV64I-NEXT:    mv a2, s1
+; RV64I-NEXT:    call fmaf
+; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 32
+; RV64I-NEXT:    ret
+  %a_ = fadd float 0.0, %a
+  %nega = fneg float %a_
+  %1 = call float @llvm.fma.f32(float %nega, float %b, float %c)
+  ret float %1
+}
+
+define float @fnmsub_s_2(float %a, float %b, float %c) nounwind {
+; CHECKIF-LABEL: fnmsub_s_2:
+; CHECKIF:       # %bb.0:
+; CHECKIF-NEXT:    fmv.w.x fa5, zero
+; CHECKIF-NEXT:    fadd.s fa5, fa1, fa5
+; CHECKIF-NEXT:    fnmsub.s fa0, fa5, fa0, fa2
+; CHECKIF-NEXT:    ret
+;
+; RV32I-LABEL: fnmsub_s_2:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    mv s0, a0
+; RV32I-NEXT:    mv a0, a1
+; RV32I-NEXT:    lui a1, %hi(.LCPI17_0)
+; RV32I-NEXT:    lw a1, %lo(.LCPI17_0)(a1)
+; RV32I-NEXT:    mv s1, a2
+; RV32I-NEXT:    call __addsf3
+; RV32I-NEXT:    lui a1, 524288
+; RV32I-NEXT:    xor a1, a0, a1
+; RV32I-NEXT:    mv a0, s0
+; RV32I-NEXT:    mv a2, s1
+; RV32I-NEXT:    call fmaf
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: fnmsub_s_2:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -32
+; RV64I-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    mv s0, a0
+; RV64I-NEXT:    mv a0, a1
+; RV64I-NEXT:    lui a1, %hi(.LCPI17_0)
+; RV64I-NEXT:    lw a1, %lo(.LCPI17_0)(a1)
+; RV64I-NEXT:    mv s1, a2
+; RV64I-NEXT:    call __addsf3
+; RV64I-NEXT:    lui a1, 524288
+; RV64I-NEXT:    xor a1, a0, a1
+; RV64I-NEXT:    mv a0, s0
+; RV64I-NEXT:    mv a2, s1
+; RV64I-NEXT:    call fmaf
+; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 32
+; RV64I-NEXT:    ret
+  %b_ = fadd float 0.0, %b
+  %negb = fneg float %b_
+  %1 = call float @llvm.fma.f32(float %a, float %negb, float %c)
+  ret float %1
+}
+
+define float @fmadd_s_contract(float %a, float %b, float %c) nounwind {
+; CHECKIF-LABEL: fmadd_s_contract:
+; CHECKIF:       # %bb.0:
+; CHECKIF-NEXT:    fmadd.s fa0, fa0, fa1, fa2
+; CHECKIF-NEXT:    ret
+;
+; RV32I-LABEL: fmadd_s_contract:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    mv s0, a2
+; RV32I-NEXT:    call __mulsf3
+; RV32I-NEXT:    mv a1, s0
+; RV32I-NEXT:    call __addsf3
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: fmadd_s_contract:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s0, 0(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    mv s0, a2
+; RV64I-NEXT:    call __mulsf3
+; RV64I-NEXT:    mv a1, s0
+; RV64I-NEXT:    call __addsf3
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+  %1 = fmul contract float %a, %b
+  %2 = fadd contract float %1, %c
+  ret float %2
+}
+
+define float @fmsub_s_contract(float %a, float %b, float %c) nounwind {
+; CHECKIF-LABEL: fmsub_s_contract:
+; CHECKIF:       # %bb.0:
+; CHECKIF-NEXT:    fmv.w.x fa5, zero
+; CHECKIF-NEXT:    fadd.s fa5, fa2, fa5
+; CHECKIF-NEXT:    fmul.s fa4, fa0, fa1
+; CHECKIF-NEXT:    fsub.s fa0, fa4, fa5
+; CHECKIF-NEXT:    ret
+;
+; RV32I-LABEL: fmsub_s_contract:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s2, 0(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    mv s0, a0
+; RV32I-NEXT:    mv s1, a1
+; RV32I-NEXT:    lui a0, %hi(.LCPI19_0)
+; RV32I-NEXT:    lw a1, %lo(.LCPI19_0)(a0)
+; RV32I-NEXT:    mv a0, a2
+; RV32I-NEXT:    call __addsf3
+; RV32I-NEXT:    mv s2, a0
+; RV32I-NEXT:    mv a0, s0
+; RV32I-NEXT:    mv a1, s1
+; RV32I-NEXT:    call __mulsf3
+; RV32I-NEXT:    mv a1, s2
+; RV32I-NEXT:    call __subsf3
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 0(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: fmsub_s_contract:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -32
+; RV64I-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s2, 0(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    mv s0, a0
+; RV64I-NEXT:    mv s1, a1
+; RV64I-NEXT:    lui a0, %hi(.LCPI19_0)
+; RV64I-NEXT:    lw a1, %lo(.LCPI19_0)(a0)
+; RV64I-NEXT:    mv a0, a2
+; RV64I-NEXT:    call __addsf3
+; RV64I-NEXT:    mv s2, a0
+; RV64I-NEXT:    mv a0, s0
+; RV64I-NEXT:    mv a1, s1
+; RV64I-NEXT:    call __mulsf3
+; RV64I-NEXT:    mv a1, s2
+; RV64I-NEXT:    call __subsf3
+; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 32
+; RV64I-NEXT:    ret
+  %c_ = fadd float 0.0, %c ; avoid negation using xor
+  %1 = fmul contract float %a, %b
+  %2 = fsub contract float %1, %c_
+  ret float %2
+}
+
+define float @fnmadd_s_contract(float %a, float %b, float %c) nounwind {
+; CHECKIF-LABEL: fnmadd_s_contract:
+; CHECKIF:       # %bb.0:
+; CHECKIF-NEXT:    fmv.w.x fa5, zero
+; CHECKIF-NEXT:    fadd.s fa4, fa0, fa5
+; CHECKIF-NEXT:    fadd.s fa3, fa1, fa5
+; CHECKIF-NEXT:    fadd.s fa5, fa2, fa5
+; CHECKIF-NEXT:    fmul.s fa4, fa4, fa3
+; CHECKIF-NEXT:    fneg.s fa4, fa4
+; CHECKIF-NEXT:    fsub.s fa0, fa4, fa5
+; CHECKIF-NEXT:    ret
+;
+; RV32I-LABEL: fnmadd_s_contract:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -32
+; RV32I-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    mv s0, a1
+; RV32I-NEXT:    lui a1, %hi(.LCPI20_0)
+; RV32I-NEXT:    lw s1, %lo(.LCPI20_0)(a1)
+; RV32I-NEXT:    mv s2, a2
+; RV32I-NEXT:    mv a1, s1
+; RV32I-NEXT:    call __addsf3
+; RV32I-NEXT:    mv s3, a0
+; RV32I-NEXT:    mv a0, s0
+; RV32I-NEXT:    mv a1, s1
+; RV32I-NEXT:    call __addsf3
+; RV32I-NEXT:    mv s0, a0
+; RV32I-NEXT:    mv a0, s2
+; RV32I-NEXT:    mv a1, s1
+; RV32I-NEXT:    call __addsf3
+; RV32I-NEXT:    mv s1, a0
+; RV32I-NEXT:    mv a0, s3
+; RV32I-NEXT:    mv a1, s0
+; RV32I-NEXT:    call __mulsf3
+; RV32I-NEXT:    lui a1, 524288
+; RV32I-NEXT:    xor a0, a0, a1
+; RV32I-NEXT:    mv a1, s1
+; RV32I-NEXT:    call __subsf3
+; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 32
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: fnmadd_s_contract:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -48
+; RV64I-NEXT:    sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    mv s0, a1
+; RV64I-NEXT:    lui a1, %hi(.LCPI20_0)
+; RV64I-NEXT:    lw s1, %lo(.LCPI20_0)(a1)
+; RV64I-NEXT:    mv s2, a2
+; RV64I-NEXT:    mv a1, s1
+; RV64I-NEXT:    call __addsf3
+; RV64I-NEXT:    mv s3, a0
+; RV64I-NEXT:    mv a0, s0
+; RV64I-NEXT:    mv a1, s1
+; RV64I-NEXT:    call __addsf3
+; RV64I-NEXT:    mv s0, a0
+; RV64I-NEXT:    mv a0, s2
+; RV64I-NEXT:    mv a1, s1
+; RV64I-NEXT:    call __addsf3
+; RV64I-NEXT:    mv s1, a0
+; RV64I-NEXT:    mv a0, s3
+; RV64I-NEXT:    mv a1, s0
+; RV64I-NEXT:    call __mulsf3
+; RV64I-NEXT:    lui a1, 524288
+; RV64I-NEXT:    xor a0, a0, a1
+; RV64I-NEXT:    mv a1, s1
+; RV64I-NEXT:    call __subsf3
+; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 48
+; RV64I-NEXT:    ret
+  %a_ = fadd float 0.0, %a ; avoid negation using xor
+  %b_ = fadd float 0.0, %b ; avoid negation using xor
+  %c_ = fadd float 0.0, %c ; avoid negation using xor
+  %1 = fmul contract float %a_, %b_
+  %2 = fneg float %1
+  %3 = fsub contract float %2, %c_
+  ret float %3
+}
+
+define float @fnmsub_s_contract(float %a, float %b, float %c) nounwind {
+; CHECKIF-LABEL: fnmsub_s_contract:
+; CHECKIF:       # %bb.0:
+; CHECKIF-NEXT:    fmv.w.x fa5, zero
+; CHECKIF-NEXT:    fadd.s fa4, fa0, fa5
+; CHECKIF-NEXT:    fadd.s fa5, fa1, fa5
+; CHECKIF-NEXT:    fnmsub.s fa0, fa4, fa5, fa2
+; CHECKIF-NEXT:    ret
+;
+; RV32I-LABEL: fnmsub_s_contract:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -32
+; RV32I-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    mv s0, a1
+; RV32I-NEXT:    lui a1, %hi(.LCPI21_0)
+; RV32I-NEXT:    lw s1, %lo(.LCPI21_0)(a1)
+; RV32I-NEXT:    mv s2, a2
+; RV32I-NEXT:    mv a1, s1
+; RV32I-NEXT:    call __addsf3
+; RV32I-NEXT:    mv s3, a0
+; RV32I-NEXT:    mv a0, s0
+; RV32I-NEXT:    mv a1, s1
+; RV32I-NEXT:    call __addsf3
+; RV32I-NEXT:    mv a1, a0
+; RV32I-NEXT:    mv a0, s3
+; RV32I-NEXT:    call __mulsf3
+; RV32I-NEXT:    mv a1, a0
+; RV32I-NEXT:    mv a0, s2
+; RV32I-NEXT:    call __subsf3
+; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 32
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: fnmsub_s_contract:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -48
+; RV64I-NEXT:    sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    mv s0, a1
+; RV64I-NEXT:    lui a1, %hi(.LCPI21_0)
+; RV64I-NEXT:    lw s1, %lo(.LCPI21_0)(a1)
+; RV64I-NEXT:    mv s2, a2
+; RV64I-NEXT:    mv a1, s1
+; RV64I-NEXT:    call __addsf3
+; RV64I-NEXT:    mv s3, a0
+; RV64I-NEXT:    mv a0, s0
+; RV64I-NEXT:    mv a1, s1
+; RV64I-NEXT:    call __addsf3
+; RV64I-NEXT:    mv a1, a0
+; RV64I-NEXT:    mv a0, s3
+; RV64I-NEXT:    call __mulsf3
+; RV64I-NEXT:    mv a1, a0
+; RV64I-NEXT:    mv a0, s2
+; RV64I-NEXT:    call __subsf3
+; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 48
+; RV64I-NEXT:    ret
+  %a_ = fadd float 0.0, %a ; avoid negation using xor
+  %b_ = fadd float 0.0, %b ; avoid negation using xor
+  %1 = fmul contract float %a_, %b_
+  %2 = fsub contract float %c, %1
+  ret float %2
+}
+
+define float @fsgnjx_f32(float %x, float %y) nounwind {
+; CHECKIF-LABEL: fsgnjx_f32:
+; CHECKIF:       # %bb.0:
+; CHECKIF-NEXT:    lui a0, 260096
+; CHECKIF-NEXT:    fmv.w.x fa5, a0
+; CHECKIF-NEXT:    fsgnj.s fa5, fa5, fa0
+; CHECKIF-NEXT:    fmul.s fa0, fa5, fa1
+; CHECKIF-NEXT:    ret
+;
+; RV32I-LABEL: fsgnjx_f32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    lui a2, 524288
+; RV32I-NEXT:    lui a3, 260096
+; RV32I-NEXT:    and a0, a0, a2
+; RV32I-NEXT:    or a0, a0, a3
+; RV32I-NEXT:    call __mulsf3
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: fsgnjx_f32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lui a2, 524288
+; RV64I-NEXT:    lui a3, 260096
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    or a0, a0, a3
+; RV64I-NEXT:    call __mulsf3
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+  %z = call float @llvm.copysign.f32(float 1.0, float %x)
+  %mul = fmul float %z, %y
+  ret float %mul
+}

diff  --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir
index b5619075b6bfcb..74749d8f1944be 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir
@@ -455,6 +455,7 @@
 # DEBUG-NEXT: .. the first uncovered type index: 1, OK
 # DEBUG-NEXT: .. the first uncovered imm index: 0, OK
 # DEBUG-NEXT: G_FSUB (opcode {{[0-9]+}}): 1 type index, 0 imm indices
+# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
 # DEBUG-NEXT: .. the first uncovered type index: 1, OK
 # DEBUG-NEXT: .. the first uncovered imm index: 0, OK
 # DEBUG-NEXT: G_FMUL (opcode {{[0-9]+}}): 1 type index, 0 imm indices
@@ -506,7 +507,6 @@
 # DEBUG-NEXT:.. type index coverage check SKIPPED: no rules defined
 # DEBUG-NEXT:.. imm index coverage check SKIPPED: no rules defined
 # DEBUG-NEXT: G_FNEG (opcode {{[0-9]+}}): 1 type index, 0 imm indices
-# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
 # DEBUG-NEXT: .. the first uncovered type index: 1, OK
 # DEBUG-NEXT: .. the first uncovered imm index: 0, OK
 # DEBUG-NEXT: G_FPEXT (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
@@ -536,6 +536,7 @@
 # DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
 # DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
 # DEBUG-NEXT: G_FABS (opcode {{[0-9]+}}): 1 type index, 0 imm indices
+# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
 # DEBUG-NEXT: .. the first uncovered type index: 1, OK
 # DEBUG-NEXT: .. the first uncovered imm index: 0, OK
 # DEBUG-NEXT: G_FCOPYSIGN (opcode {{[0-9]+}}): 2 type indices


        


More information about the llvm-commits mailing list