[llvm] bb5bbe5 - [RISCV][GISel] Support s32/s64 G_FSUB/FDIV/FNEG without F/D extensions.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sun Nov 24 18:24:43 PST 2024
Author: Craig Topper
Date: 2024-11-24T18:22:12-08:00
New Revision: bb5bbe523d4437d72287f89fbaa277aaa71c0bd2
URL: https://github.com/llvm/llvm-project/commit/bb5bbe523d4437d72287f89fbaa277aaa71c0bd2
DIFF: https://github.com/llvm/llvm-project/commit/bb5bbe523d4437d72287f89fbaa277aaa71c0bd2.diff
LOG: [RISCV][GISel] Support s32/s64 G_FSUB/FDIV/FNEG without F/D extensions.
Use libcalls for G_FSUB/FDIV. Use integer operations for G_FNEG.
Copy most of the IR tests for arithmetic from SelectionDAG.
Added:
llvm/test/CodeGen/RISCV/GlobalISel/double-arith.ll
llvm/test/CodeGen/RISCV/GlobalISel/float-arith.ll
Modified:
llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
llvm/test/CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
index c5bef80f8f48a6..a9294e76f8763f 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
@@ -491,20 +491,14 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST)
// FP Operations
- getActionDefinitionsBuilder({G_FSUB, G_FDIV, G_FNEG})
- .legalFor(ST.hasStdExtF(), {s32})
- .legalFor(ST.hasStdExtD(), {s64})
- .legalFor(ST.hasStdExtZfh(), {s16});
-
- // FIXME: Merge with FSUB/FDIV/etc when we use libcalls for them.
getActionDefinitionsBuilder(
- {G_FADD, G_FMUL, G_FMA, G_FSQRT, G_FMAXNUM, G_FMINNUM})
+ {G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMA, G_FSQRT, G_FMAXNUM, G_FMINNUM})
.legalFor(ST.hasStdExtF(), {s32})
.legalFor(ST.hasStdExtD(), {s64})
.legalFor(ST.hasStdExtZfh(), {s16})
.libcallFor({s32, s64});
- getActionDefinitionsBuilder(G_FABS)
+ getActionDefinitionsBuilder({G_FNEG, G_FABS})
.legalFor(ST.hasStdExtF(), {s32})
.legalFor(ST.hasStdExtD(), {s64})
.legalFor(ST.hasStdExtZfh(), {s16})
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/double-arith.ll b/llvm/test/CodeGen/RISCV/GlobalISel/double-arith.ll
new file mode 100644
index 00000000000000..eafc9c644bdbf0
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/double-arith.ll
@@ -0,0 +1,1324 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -global-isel -mattr=+d -verify-machineinstrs < %s \
+; RUN: -target-abi=ilp32d | FileCheck -check-prefixes=CHECKIFD,RV32IFD %s
+; RUN: llc -mtriple=riscv64 -global-isel -mattr=+d -verify-machineinstrs < %s \
+; RUN: -target-abi=lp64d | FileCheck -check-prefixes=CHECKIFD,RV64IFD %s
+; RUN: llc -mtriple=riscv32 -global-isel -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefix=RV32I %s
+; RUN: llc -mtriple=riscv64 -global-isel -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefix=RV64I %s
+
+; These tests are each targeted at a particular RISC-V FPU instruction.
+; Compares and conversions can be found in double-fcmp.ll and double-convert.ll
+; respectively. Some other double-*.ll files in this folder exercise LLVM IR
+; instructions that don't directly match a RISC-V instruction.
+
+define double @fadd_d(double %a, double %b) nounwind {
+; CHECKIFD-LABEL: fadd_d:
+; CHECKIFD: # %bb.0:
+; CHECKIFD-NEXT: fadd.d fa0, fa0, fa1
+; CHECKIFD-NEXT: ret
+;
+; RV32I-LABEL: fadd_d:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: call __adddf3
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: fadd_d:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: call __adddf3
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+ %1 = fadd double %a, %b
+ ret double %1
+}
+
+define double @fsub_d(double %a, double %b) nounwind {
+; CHECKIFD-LABEL: fsub_d:
+; CHECKIFD: # %bb.0:
+; CHECKIFD-NEXT: fsub.d fa0, fa0, fa1
+; CHECKIFD-NEXT: ret
+;
+; RV32I-LABEL: fsub_d:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: call __subdf3
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: fsub_d:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: call __subdf3
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+ %1 = fsub double %a, %b
+ ret double %1
+}
+
+define double @fmul_d(double %a, double %b) nounwind {
+; CHECKIFD-LABEL: fmul_d:
+; CHECKIFD: # %bb.0:
+; CHECKIFD-NEXT: fmul.d fa0, fa0, fa1
+; CHECKIFD-NEXT: ret
+;
+; RV32I-LABEL: fmul_d:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: call __muldf3
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: fmul_d:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: call __muldf3
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+ %1 = fmul double %a, %b
+ ret double %1
+}
+
+define double @fdiv_d(double %a, double %b) nounwind {
+; CHECKIFD-LABEL: fdiv_d:
+; CHECKIFD: # %bb.0:
+; CHECKIFD-NEXT: fdiv.d fa0, fa0, fa1
+; CHECKIFD-NEXT: ret
+;
+; RV32I-LABEL: fdiv_d:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: call __divdf3
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: fdiv_d:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: call __divdf3
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+ %1 = fdiv double %a, %b
+ ret double %1
+}
+
+declare double @llvm.sqrt.f64(double)
+
+define double @fsqrt_d(double %a) nounwind {
+; CHECKIFD-LABEL: fsqrt_d:
+; CHECKIFD: # %bb.0:
+; CHECKIFD-NEXT: fsqrt.d fa0, fa0
+; CHECKIFD-NEXT: ret
+;
+; RV32I-LABEL: fsqrt_d:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: call sqrt
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: fsqrt_d:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: call sqrt
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+ %1 = call double @llvm.sqrt.f64(double %a)
+ ret double %1
+}
+
+declare double @llvm.copysign.f64(double, double)
+
+define double @fsgnj_d(double %a, double %b) nounwind {
+; CHECKIFD-LABEL: fsgnj_d:
+; CHECKIFD: # %bb.0:
+; CHECKIFD-NEXT: fsgnj.d fa0, fa0, fa1
+; CHECKIFD-NEXT: ret
+;
+; RV32I-LABEL: fsgnj_d:
+; RV32I: # %bb.0:
+; RV32I-NEXT: lui a2, 524288
+; RV32I-NEXT: addi a4, a2, -1
+; RV32I-NEXT: and a1, a1, a4
+; RV32I-NEXT: and a2, a3, a2
+; RV32I-NEXT: or a1, a1, a2
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: fsgnj_d:
+; RV64I: # %bb.0:
+; RV64I-NEXT: li a2, -1
+; RV64I-NEXT: slli a3, a2, 63
+; RV64I-NEXT: srli a2, a2, 1
+; RV64I-NEXT: and a0, a0, a2
+; RV64I-NEXT: and a1, a1, a3
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: ret
+ %1 = call double @llvm.copysign.f64(double %a, double %b)
+ ret double %1
+}
+
+define double @fsgnjn_d(double %a, double %b) nounwind {
+; TODO: fsgnjn.s isn't selected on RV64 because DAGCombiner::visitBITCAST will
+; convert (bitconvert (fneg x)) to a xor.
+;
+; CHECKIFD-LABEL: fsgnjn_d:
+; CHECKIFD: # %bb.0:
+; CHECKIFD-NEXT: fneg.d fa5, fa1
+; CHECKIFD-NEXT: fsgnj.d fa0, fa0, fa5
+; CHECKIFD-NEXT: ret
+;
+; RV32I-LABEL: fsgnjn_d:
+; RV32I: # %bb.0:
+; RV32I-NEXT: lui a2, 524288
+; RV32I-NEXT: xor a3, a3, a2
+; RV32I-NEXT: addi a4, a2, -1
+; RV32I-NEXT: and a1, a1, a4
+; RV32I-NEXT: and a2, a3, a2
+; RV32I-NEXT: or a1, a1, a2
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: fsgnjn_d:
+; RV64I: # %bb.0:
+; RV64I-NEXT: li a2, -1
+; RV64I-NEXT: slli a3, a2, 63
+; RV64I-NEXT: srli a2, a2, 1
+; RV64I-NEXT: xor a1, a1, a3
+; RV64I-NEXT: and a0, a0, a2
+; RV64I-NEXT: and a1, a1, a3
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: ret
+ %1 = fneg double %b
+ %2 = call double @llvm.copysign.f64(double %a, double %1)
+ ret double %2
+}
+
+declare double @llvm.fabs.f64(double)
+
+; This function performs extra work to ensure that
+; DAGCombiner::visitBITCAST doesn't replace the fabs with an and.
+define double @fabs_d(double %a, double %b) nounwind {
+; CHECKIFD-LABEL: fabs_d:
+; CHECKIFD: # %bb.0:
+; CHECKIFD-NEXT: fadd.d fa5, fa0, fa1
+; CHECKIFD-NEXT: fabs.d fa4, fa5
+; CHECKIFD-NEXT: fadd.d fa0, fa4, fa5
+; CHECKIFD-NEXT: ret
+;
+; RV32I-LABEL: fabs_d:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: call __adddf3
+; RV32I-NEXT: mv a3, a1
+; RV32I-NEXT: lui a1, 524288
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: and a1, a3, a1
+; RV32I-NEXT: mv a2, a0
+; RV32I-NEXT: call __adddf3
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: fabs_d:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: call __adddf3
+; RV64I-NEXT: mv a1, a0
+; RV64I-NEXT: li a0, -1
+; RV64I-NEXT: srli a0, a0, 1
+; RV64I-NEXT: and a0, a1, a0
+; RV64I-NEXT: call __adddf3
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+ %1 = fadd double %a, %b
+ %2 = call double @llvm.fabs.f64(double %1)
+ %3 = fadd double %2, %1
+ ret double %3
+}
+
+declare double @llvm.minnum.f64(double, double)
+
+define double @fmin_d(double %a, double %b) nounwind {
+; CHECKIFD-LABEL: fmin_d:
+; CHECKIFD: # %bb.0:
+; CHECKIFD-NEXT: fmin.d fa0, fa0, fa1
+; CHECKIFD-NEXT: ret
+;
+; RV32I-LABEL: fmin_d:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: call fmin
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: fmin_d:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: call fmin
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+ %1 = call double @llvm.minnum.f64(double %a, double %b)
+ ret double %1
+}
+
+declare double @llvm.maxnum.f64(double, double)
+
+define double @fmax_d(double %a, double %b) nounwind {
+; CHECKIFD-LABEL: fmax_d:
+; CHECKIFD: # %bb.0:
+; CHECKIFD-NEXT: fmax.d fa0, fa0, fa1
+; CHECKIFD-NEXT: ret
+;
+; RV32I-LABEL: fmax_d:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: call fmax
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: fmax_d:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: call fmax
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+ %1 = call double @llvm.maxnum.f64(double %a, double %b)
+ ret double %1
+}
+
+declare double @llvm.fma.f64(double, double, double)
+
+define double @fmadd_d(double %a, double %b, double %c) nounwind {
+; CHECKIFD-LABEL: fmadd_d:
+; CHECKIFD: # %bb.0:
+; CHECKIFD-NEXT: fmadd.d fa0, fa0, fa1, fa2
+; CHECKIFD-NEXT: ret
+;
+; RV32I-LABEL: fmadd_d:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: call fma
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: fmadd_d:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: call fma
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+ %1 = call double @llvm.fma.f64(double %a, double %b, double %c)
+ ret double %1
+}
+
+define double @fmsub_d(double %a, double %b, double %c) nounwind {
+; RV32IFD-LABEL: fmsub_d:
+; RV32IFD: # %bb.0:
+; RV32IFD-NEXT: addi sp, sp, -16
+; RV32IFD-NEXT: sw zero, 8(sp)
+; RV32IFD-NEXT: sw zero, 12(sp)
+; RV32IFD-NEXT: fld fa5, 8(sp)
+; RV32IFD-NEXT: fadd.d fa5, fa2, fa5
+; RV32IFD-NEXT: fmsub.d fa0, fa0, fa1, fa5
+; RV32IFD-NEXT: addi sp, sp, 16
+; RV32IFD-NEXT: ret
+;
+; RV64IFD-LABEL: fmsub_d:
+; RV64IFD: # %bb.0:
+; RV64IFD-NEXT: fmv.d.x fa5, zero
+; RV64IFD-NEXT: fadd.d fa5, fa2, fa5
+; RV64IFD-NEXT: fmsub.d fa0, fa0, fa1, fa5
+; RV64IFD-NEXT: ret
+;
+; RV32I-LABEL: fmsub_d:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: mv s0, a0
+; RV32I-NEXT: mv s1, a1
+; RV32I-NEXT: mv s2, a2
+; RV32I-NEXT: mv s3, a3
+; RV32I-NEXT: mv a0, a4
+; RV32I-NEXT: lui a1, %hi(.LCPI11_0)
+; RV32I-NEXT: addi a1, a1, %lo(.LCPI11_0)
+; RV32I-NEXT: lw a2, 0(a1)
+; RV32I-NEXT: lw a3, 4(a1)
+; RV32I-NEXT: mv a1, a5
+; RV32I-NEXT: call __adddf3
+; RV32I-NEXT: mv a4, a0
+; RV32I-NEXT: lui a5, 524288
+; RV32I-NEXT: xor a5, a1, a5
+; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: mv a1, s1
+; RV32I-NEXT: mv a2, s2
+; RV32I-NEXT: mv a3, s3
+; RV32I-NEXT: call fma
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: fmsub_d:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: mv s0, a0
+; RV64I-NEXT: mv s1, a1
+; RV64I-NEXT: lui a0, %hi(.LCPI11_0)
+; RV64I-NEXT: ld a1, %lo(.LCPI11_0)(a0)
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: call __adddf3
+; RV64I-NEXT: li a1, -1
+; RV64I-NEXT: slli a1, a1, 63
+; RV64I-NEXT: xor a2, a0, a1
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: mv a1, s1
+; RV64I-NEXT: call fma
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ret
+ %c_ = fadd double 0.0, %c ; avoid negation using xor
+ %negc = fneg double %c_
+ %1 = call double @llvm.fma.f64(double %a, double %b, double %negc)
+ ret double %1
+}
+
+define double @fnmadd_d(double %a, double %b, double %c) nounwind {
+; RV32IFD-LABEL: fnmadd_d:
+; RV32IFD: # %bb.0:
+; RV32IFD-NEXT: addi sp, sp, -16
+; RV32IFD-NEXT: sw zero, 8(sp)
+; RV32IFD-NEXT: sw zero, 12(sp)
+; RV32IFD-NEXT: fld fa5, 8(sp)
+; RV32IFD-NEXT: fadd.d fa4, fa0, fa5
+; RV32IFD-NEXT: fadd.d fa5, fa2, fa5
+; RV32IFD-NEXT: fnmadd.d fa0, fa4, fa1, fa5
+; RV32IFD-NEXT: addi sp, sp, 16
+; RV32IFD-NEXT: ret
+;
+; RV64IFD-LABEL: fnmadd_d:
+; RV64IFD: # %bb.0:
+; RV64IFD-NEXT: fmv.d.x fa5, zero
+; RV64IFD-NEXT: fadd.d fa4, fa0, fa5
+; RV64IFD-NEXT: fadd.d fa5, fa2, fa5
+; RV64IFD-NEXT: fnmadd.d fa0, fa4, fa1, fa5
+; RV64IFD-NEXT: ret
+;
+; RV32I-LABEL: fnmadd_d:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -48
+; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s4, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s5, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s6, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s7, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: mv s0, a2
+; RV32I-NEXT: mv s1, a3
+; RV32I-NEXT: mv s2, a4
+; RV32I-NEXT: lui a2, %hi(.LCPI12_0)
+; RV32I-NEXT: addi a2, a2, %lo(.LCPI12_0)
+; RV32I-NEXT: lw s3, 0(a2)
+; RV32I-NEXT: lw s4, 4(a2)
+; RV32I-NEXT: mv s5, a5
+; RV32I-NEXT: mv a2, s3
+; RV32I-NEXT: mv a3, s4
+; RV32I-NEXT: call __adddf3
+; RV32I-NEXT: mv s6, a0
+; RV32I-NEXT: mv s7, a1
+; RV32I-NEXT: mv a0, s2
+; RV32I-NEXT: mv a1, s5
+; RV32I-NEXT: mv a2, s3
+; RV32I-NEXT: mv a3, s4
+; RV32I-NEXT: call __adddf3
+; RV32I-NEXT: mv a4, a0
+; RV32I-NEXT: lui a5, 524288
+; RV32I-NEXT: xor a2, s7, a5
+; RV32I-NEXT: xor a5, a1, a5
+; RV32I-NEXT: mv a0, s6
+; RV32I-NEXT: mv a1, a2
+; RV32I-NEXT: mv a2, s0
+; RV32I-NEXT: mv a3, s1
+; RV32I-NEXT: call fma
+; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s4, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s5, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s6, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s7, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 48
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: fnmadd_d:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: mv s0, a1
+; RV64I-NEXT: lui a1, %hi(.LCPI12_0)
+; RV64I-NEXT: ld s1, %lo(.LCPI12_0)(a1)
+; RV64I-NEXT: mv s2, a2
+; RV64I-NEXT: mv a1, s1
+; RV64I-NEXT: call __adddf3
+; RV64I-NEXT: mv s3, a0
+; RV64I-NEXT: mv a0, s2
+; RV64I-NEXT: mv a1, s1
+; RV64I-NEXT: call __adddf3
+; RV64I-NEXT: li a1, -1
+; RV64I-NEXT: slli a2, a1, 63
+; RV64I-NEXT: xor a1, s3, a2
+; RV64I-NEXT: xor a2, a0, a2
+; RV64I-NEXT: mv a0, a1
+; RV64I-NEXT: mv a1, s0
+; RV64I-NEXT: call fma
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 48
+; RV64I-NEXT: ret
+ %a_ = fadd double 0.0, %a
+ %c_ = fadd double 0.0, %c
+ %nega = fneg double %a_
+ %negc = fneg double %c_
+ %1 = call double @llvm.fma.f64(double %nega, double %b, double %negc)
+ ret double %1
+}
+
+define double @fnmadd_d_2(double %a, double %b, double %c) nounwind {
+; RV32IFD-LABEL: fnmadd_d_2:
+; RV32IFD: # %bb.0:
+; RV32IFD-NEXT: addi sp, sp, -16
+; RV32IFD-NEXT: sw zero, 8(sp)
+; RV32IFD-NEXT: sw zero, 12(sp)
+; RV32IFD-NEXT: fld fa5, 8(sp)
+; RV32IFD-NEXT: fadd.d fa4, fa1, fa5
+; RV32IFD-NEXT: fadd.d fa5, fa2, fa5
+; RV32IFD-NEXT: fnmadd.d fa0, fa4, fa0, fa5
+; RV32IFD-NEXT: addi sp, sp, 16
+; RV32IFD-NEXT: ret
+;
+; RV64IFD-LABEL: fnmadd_d_2:
+; RV64IFD: # %bb.0:
+; RV64IFD-NEXT: fmv.d.x fa5, zero
+; RV64IFD-NEXT: fadd.d fa4, fa1, fa5
+; RV64IFD-NEXT: fadd.d fa5, fa2, fa5
+; RV64IFD-NEXT: fnmadd.d fa0, fa4, fa0, fa5
+; RV64IFD-NEXT: ret
+;
+; RV32I-LABEL: fnmadd_d_2:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -48
+; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s4, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s5, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s6, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s7, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: mv s0, a0
+; RV32I-NEXT: mv s1, a1
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: mv a1, a3
+; RV32I-NEXT: mv s2, a4
+; RV32I-NEXT: lui a2, %hi(.LCPI13_0)
+; RV32I-NEXT: addi a2, a2, %lo(.LCPI13_0)
+; RV32I-NEXT: lw s3, 0(a2)
+; RV32I-NEXT: lw s4, 4(a2)
+; RV32I-NEXT: mv s5, a5
+; RV32I-NEXT: mv a2, s3
+; RV32I-NEXT: mv a3, s4
+; RV32I-NEXT: call __adddf3
+; RV32I-NEXT: mv s6, a0
+; RV32I-NEXT: mv s7, a1
+; RV32I-NEXT: mv a0, s2
+; RV32I-NEXT: mv a1, s5
+; RV32I-NEXT: mv a2, s3
+; RV32I-NEXT: mv a3, s4
+; RV32I-NEXT: call __adddf3
+; RV32I-NEXT: mv a4, a0
+; RV32I-NEXT: lui a5, 524288
+; RV32I-NEXT: xor a3, s7, a5
+; RV32I-NEXT: xor a5, a1, a5
+; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: mv a1, s1
+; RV32I-NEXT: mv a2, s6
+; RV32I-NEXT: call fma
+; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s4, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s5, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s6, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s7, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 48
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: fnmadd_d_2:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: mv s0, a0
+; RV64I-NEXT: mv a0, a1
+; RV64I-NEXT: lui a1, %hi(.LCPI13_0)
+; RV64I-NEXT: ld s1, %lo(.LCPI13_0)(a1)
+; RV64I-NEXT: mv s2, a2
+; RV64I-NEXT: mv a1, s1
+; RV64I-NEXT: call __adddf3
+; RV64I-NEXT: mv s3, a0
+; RV64I-NEXT: mv a0, s2
+; RV64I-NEXT: mv a1, s1
+; RV64I-NEXT: call __adddf3
+; RV64I-NEXT: li a1, -1
+; RV64I-NEXT: slli a2, a1, 63
+; RV64I-NEXT: xor a1, s3, a2
+; RV64I-NEXT: xor a2, a0, a2
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: call fma
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 48
+; RV64I-NEXT: ret
+ %b_ = fadd double 0.0, %b
+ %c_ = fadd double 0.0, %c
+ %negb = fneg double %b_
+ %negc = fneg double %c_
+ %1 = call double @llvm.fma.f64(double %a, double %negb, double %negc)
+ ret double %1
+}
+
+define double @fnmadd_d_3(double %a, double %b, double %c) nounwind {
+; CHECKIFD-LABEL: fnmadd_d_3:
+; CHECKIFD: # %bb.0:
+; CHECKIFD-NEXT: fmadd.d fa5, fa0, fa1, fa2
+; CHECKIFD-NEXT: fneg.d fa0, fa5
+; CHECKIFD-NEXT: ret
+;
+; RV32I-LABEL: fnmadd_d_3:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: call fma
+; RV32I-NEXT: lui a2, 524288
+; RV32I-NEXT: xor a1, a1, a2
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: fnmadd_d_3:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: call fma
+; RV64I-NEXT: li a1, -1
+; RV64I-NEXT: slli a1, a1, 63
+; RV64I-NEXT: xor a0, a0, a1
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+ %1 = call double @llvm.fma.f64(double %a, double %b, double %c)
+ %neg = fneg double %1
+ ret double %neg
+}
+
+
+define double @fnmadd_nsz(double %a, double %b, double %c) nounwind {
+; CHECKIFD-LABEL: fnmadd_nsz:
+; CHECKIFD: # %bb.0:
+; CHECKIFD-NEXT: fmadd.d fa5, fa0, fa1, fa2
+; CHECKIFD-NEXT: fneg.d fa0, fa5
+; CHECKIFD-NEXT: ret
+;
+; RV32I-LABEL: fnmadd_nsz:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: call fma
+; RV32I-NEXT: lui a2, 524288
+; RV32I-NEXT: xor a1, a1, a2
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: fnmadd_nsz:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: call fma
+; RV64I-NEXT: li a1, -1
+; RV64I-NEXT: slli a1, a1, 63
+; RV64I-NEXT: xor a0, a0, a1
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+ %1 = call nsz double @llvm.fma.f64(double %a, double %b, double %c)
+ %neg = fneg nsz double %1
+ ret double %neg
+}
+
+define double @fnmsub_d(double %a, double %b, double %c) nounwind {
+; RV32IFD-LABEL: fnmsub_d:
+; RV32IFD: # %bb.0:
+; RV32IFD-NEXT: addi sp, sp, -16
+; RV32IFD-NEXT: sw zero, 8(sp)
+; RV32IFD-NEXT: sw zero, 12(sp)
+; RV32IFD-NEXT: fld fa5, 8(sp)
+; RV32IFD-NEXT: fadd.d fa5, fa0, fa5
+; RV32IFD-NEXT: fnmsub.d fa0, fa5, fa1, fa2
+; RV32IFD-NEXT: addi sp, sp, 16
+; RV32IFD-NEXT: ret
+;
+; RV64IFD-LABEL: fnmsub_d:
+; RV64IFD: # %bb.0:
+; RV64IFD-NEXT: fmv.d.x fa5, zero
+; RV64IFD-NEXT: fadd.d fa5, fa0, fa5
+; RV64IFD-NEXT: fnmsub.d fa0, fa5, fa1, fa2
+; RV64IFD-NEXT: ret
+;
+; RV32I-LABEL: fnmsub_d:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: mv s0, a2
+; RV32I-NEXT: mv s1, a3
+; RV32I-NEXT: mv s2, a4
+; RV32I-NEXT: lui a2, %hi(.LCPI16_0)
+; RV32I-NEXT: addi a3, a2, %lo(.LCPI16_0)
+; RV32I-NEXT: lw a2, 0(a3)
+; RV32I-NEXT: lw a3, 4(a3)
+; RV32I-NEXT: mv s3, a5
+; RV32I-NEXT: call __adddf3
+; RV32I-NEXT: lui a2, 524288
+; RV32I-NEXT: xor a1, a1, a2
+; RV32I-NEXT: mv a2, s0
+; RV32I-NEXT: mv a3, s1
+; RV32I-NEXT: mv a4, s2
+; RV32I-NEXT: mv a5, s3
+; RV32I-NEXT: call fma
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: fnmsub_d:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: mv s0, a1
+; RV64I-NEXT: lui a1, %hi(.LCPI16_0)
+; RV64I-NEXT: ld a1, %lo(.LCPI16_0)(a1)
+; RV64I-NEXT: mv s1, a2
+; RV64I-NEXT: call __adddf3
+; RV64I-NEXT: li a1, -1
+; RV64I-NEXT: slli a1, a1, 63
+; RV64I-NEXT: xor a0, a0, a1
+; RV64I-NEXT: mv a1, s0
+; RV64I-NEXT: mv a2, s1
+; RV64I-NEXT: call fma
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ret
+ %a_ = fadd double 0.0, %a
+ %nega = fneg double %a_
+ %1 = call double @llvm.fma.f64(double %nega, double %b, double %c)
+ ret double %1
+}
+
+define double @fnmsub_d_2(double %a, double %b, double %c) nounwind {
+; RV32IFD-LABEL: fnmsub_d_2:
+; RV32IFD: # %bb.0:
+; RV32IFD-NEXT: addi sp, sp, -16
+; RV32IFD-NEXT: sw zero, 8(sp)
+; RV32IFD-NEXT: sw zero, 12(sp)
+; RV32IFD-NEXT: fld fa5, 8(sp)
+; RV32IFD-NEXT: fadd.d fa5, fa1, fa5
+; RV32IFD-NEXT: fnmsub.d fa0, fa5, fa0, fa2
+; RV32IFD-NEXT: addi sp, sp, 16
+; RV32IFD-NEXT: ret
+;
+; RV64IFD-LABEL: fnmsub_d_2:
+; RV64IFD: # %bb.0:
+; RV64IFD-NEXT: fmv.d.x fa5, zero
+; RV64IFD-NEXT: fadd.d fa5, fa1, fa5
+; RV64IFD-NEXT: fnmsub.d fa0, fa5, fa0, fa2
+; RV64IFD-NEXT: ret
+;
+; RV32I-LABEL: fnmsub_d_2:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: mv s0, a0
+; RV32I-NEXT: mv s1, a1
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: mv a1, a3
+; RV32I-NEXT: mv s2, a4
+; RV32I-NEXT: lui a2, %hi(.LCPI17_0)
+; RV32I-NEXT: addi a3, a2, %lo(.LCPI17_0)
+; RV32I-NEXT: lw a2, 0(a3)
+; RV32I-NEXT: lw a3, 4(a3)
+; RV32I-NEXT: mv s3, a5
+; RV32I-NEXT: call __adddf3
+; RV32I-NEXT: mv a2, a0
+; RV32I-NEXT: lui a3, 524288
+; RV32I-NEXT: xor a3, a1, a3
+; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: mv a1, s1
+; RV32I-NEXT: mv a4, s2
+; RV32I-NEXT: mv a5, s3
+; RV32I-NEXT: call fma
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: fnmsub_d_2:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: mv s0, a0
+; RV64I-NEXT: mv a0, a1
+; RV64I-NEXT: lui a1, %hi(.LCPI17_0)
+; RV64I-NEXT: ld a1, %lo(.LCPI17_0)(a1)
+; RV64I-NEXT: mv s1, a2
+; RV64I-NEXT: call __adddf3
+; RV64I-NEXT: li a1, -1
+; RV64I-NEXT: slli a1, a1, 63
+; RV64I-NEXT: xor a1, a0, a1
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: mv a2, s1
+; RV64I-NEXT: call fma
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ret
+ %b_ = fadd double 0.0, %b
+ %negb = fneg double %b_
+ %1 = call double @llvm.fma.f64(double %a, double %negb, double %c)
+ ret double %1
+}
+
+define double @fmadd_d_contract(double %a, double %b, double %c) nounwind {
+; CHECKIFD-LABEL: fmadd_d_contract:
+; CHECKIFD: # %bb.0:
+; CHECKIFD-NEXT: fmadd.d fa0, fa0, fa1, fa2
+; CHECKIFD-NEXT: ret
+;
+; RV32I-LABEL: fmadd_d_contract:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: mv s0, a4
+; RV32I-NEXT: mv s1, a5
+; RV32I-NEXT: call __muldf3
+; RV32I-NEXT: mv a2, s0
+; RV32I-NEXT: mv a3, s1
+; RV32I-NEXT: call __adddf3
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: fmadd_d_contract:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: call __muldf3
+; RV64I-NEXT: mv a1, s0
+; RV64I-NEXT: call __adddf3
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+ %1 = fmul contract double %a, %b
+ %2 = fadd contract double %1, %c
+ ret double %2
+}
+
+define double @fmsub_d_contract(double %a, double %b, double %c) nounwind {
+; RV32IFD-LABEL: fmsub_d_contract:
+; RV32IFD: # %bb.0:
+; RV32IFD-NEXT: addi sp, sp, -16
+; RV32IFD-NEXT: sw zero, 8(sp)
+; RV32IFD-NEXT: sw zero, 12(sp)
+; RV32IFD-NEXT: fld fa5, 8(sp)
+; RV32IFD-NEXT: fadd.d fa5, fa2, fa5
+; RV32IFD-NEXT: fmul.d fa4, fa0, fa1
+; RV32IFD-NEXT: fsub.d fa0, fa4, fa5
+; RV32IFD-NEXT: addi sp, sp, 16
+; RV32IFD-NEXT: ret
+;
+; RV64IFD-LABEL: fmsub_d_contract:
+; RV64IFD: # %bb.0:
+; RV64IFD-NEXT: fmv.d.x fa5, zero
+; RV64IFD-NEXT: fadd.d fa5, fa2, fa5
+; RV64IFD-NEXT: fmul.d fa4, fa0, fa1
+; RV64IFD-NEXT: fsub.d fa0, fa4, fa5
+; RV64IFD-NEXT: ret
+;
+; RV32I-LABEL: fmsub_d_contract:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: mv s0, a0
+; RV32I-NEXT: mv s1, a1
+; RV32I-NEXT: mv s2, a2
+; RV32I-NEXT: mv s3, a3
+; RV32I-NEXT: mv a0, a4
+; RV32I-NEXT: lui a1, %hi(.LCPI19_0)
+; RV32I-NEXT: addi a1, a1, %lo(.LCPI19_0)
+; RV32I-NEXT: lw a2, 0(a1)
+; RV32I-NEXT: lw a3, 4(a1)
+; RV32I-NEXT: mv a1, a5
+; RV32I-NEXT: call __adddf3
+; RV32I-NEXT: mv s4, a0
+; RV32I-NEXT: mv s5, a1
+; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: mv a1, s1
+; RV32I-NEXT: mv a2, s2
+; RV32I-NEXT: mv a3, s3
+; RV32I-NEXT: call __muldf3
+; RV32I-NEXT: mv a2, s4
+; RV32I-NEXT: mv a3, s5
+; RV32I-NEXT: call __subdf3
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: fmsub_d_contract:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
+; RV64I-NEXT: mv s0, a0
+; RV64I-NEXT: mv s1, a1
+; RV64I-NEXT: lui a0, %hi(.LCPI19_0)
+; RV64I-NEXT: ld a1, %lo(.LCPI19_0)(a0)
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: call __adddf3
+; RV64I-NEXT: mv s2, a0
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: mv a1, s1
+; RV64I-NEXT: call __muldf3
+; RV64I-NEXT: mv a1, s2
+; RV64I-NEXT: call __subdf3
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ret
+ %c_ = fadd double 0.0, %c ; avoid negation using xor
+ %1 = fmul contract double %a, %b
+ %2 = fsub contract double %1, %c_
+ ret double %2
+}
+
+define double @fnmadd_d_contract(double %a, double %b, double %c) nounwind {
+; RV32IFD-LABEL: fnmadd_d_contract:
+; RV32IFD: # %bb.0:
+; RV32IFD-NEXT: addi sp, sp, -16
+; RV32IFD-NEXT: sw zero, 8(sp)
+; RV32IFD-NEXT: sw zero, 12(sp)
+; RV32IFD-NEXT: fld fa5, 8(sp)
+; RV32IFD-NEXT: fadd.d fa4, fa0, fa5
+; RV32IFD-NEXT: fadd.d fa3, fa1, fa5
+; RV32IFD-NEXT: fadd.d fa5, fa2, fa5
+; RV32IFD-NEXT: fmul.d fa4, fa4, fa3
+; RV32IFD-NEXT: fneg.d fa4, fa4
+; RV32IFD-NEXT: fsub.d fa0, fa4, fa5
+; RV32IFD-NEXT: addi sp, sp, 16
+; RV32IFD-NEXT: ret
+;
+; RV64IFD-LABEL: fnmadd_d_contract:
+; RV64IFD: # %bb.0:
+; RV64IFD-NEXT: fmv.d.x fa5, zero
+; RV64IFD-NEXT: fadd.d fa4, fa0, fa5
+; RV64IFD-NEXT: fadd.d fa3, fa1, fa5
+; RV64IFD-NEXT: fadd.d fa5, fa2, fa5
+; RV64IFD-NEXT: fmul.d fa4, fa4, fa3
+; RV64IFD-NEXT: fneg.d fa4, fa4
+; RV64IFD-NEXT: fsub.d fa0, fa4, fa5
+; RV64IFD-NEXT: ret
+;
+; RV32I-LABEL: fnmadd_d_contract:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -48
+; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s4, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s5, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s6, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s7, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: mv s0, a2
+; RV32I-NEXT: mv s1, a3
+; RV32I-NEXT: mv s2, a4
+; RV32I-NEXT: lui a2, %hi(.LCPI20_0)
+; RV32I-NEXT: addi a2, a2, %lo(.LCPI20_0)
+; RV32I-NEXT: lw s3, 0(a2)
+; RV32I-NEXT: lw s4, 4(a2)
+; RV32I-NEXT: mv s5, a5
+; RV32I-NEXT: mv a2, s3
+; RV32I-NEXT: mv a3, s4
+; RV32I-NEXT: call __adddf3
+; RV32I-NEXT: mv s6, a0
+; RV32I-NEXT: mv s7, a1
+; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: mv a1, s1
+; RV32I-NEXT: mv a2, s3
+; RV32I-NEXT: mv a3, s4
+; RV32I-NEXT: call __adddf3
+; RV32I-NEXT: mv s0, a0
+; RV32I-NEXT: mv s1, a1
+; RV32I-NEXT: mv a0, s2
+; RV32I-NEXT: mv a1, s5
+; RV32I-NEXT: mv a2, s3
+; RV32I-NEXT: mv a3, s4
+; RV32I-NEXT: call __adddf3
+; RV32I-NEXT: mv s2, a0
+; RV32I-NEXT: mv s3, a1
+; RV32I-NEXT: mv a0, s6
+; RV32I-NEXT: mv a1, s7
+; RV32I-NEXT: mv a2, s0
+; RV32I-NEXT: mv a3, s1
+; RV32I-NEXT: call __muldf3
+; RV32I-NEXT: lui a2, 524288
+; RV32I-NEXT: xor a1, a1, a2
+; RV32I-NEXT: mv a2, s2
+; RV32I-NEXT: mv a3, s3
+; RV32I-NEXT: call __subdf3
+; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s4, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s5, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s6, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s7, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 48
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: fnmadd_d_contract:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: mv s0, a1
+; RV64I-NEXT: lui a1, %hi(.LCPI20_0)
+; RV64I-NEXT: ld s1, %lo(.LCPI20_0)(a1)
+; RV64I-NEXT: mv s2, a2
+; RV64I-NEXT: mv a1, s1
+; RV64I-NEXT: call __adddf3
+; RV64I-NEXT: mv s3, a0
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: mv a1, s1
+; RV64I-NEXT: call __adddf3
+; RV64I-NEXT: mv s0, a0
+; RV64I-NEXT: mv a0, s2
+; RV64I-NEXT: mv a1, s1
+; RV64I-NEXT: call __adddf3
+; RV64I-NEXT: mv s1, a0
+; RV64I-NEXT: mv a0, s3
+; RV64I-NEXT: mv a1, s0
+; RV64I-NEXT: call __muldf3
+; RV64I-NEXT: li a1, -1
+; RV64I-NEXT: slli a1, a1, 63
+; RV64I-NEXT: xor a0, a0, a1
+; RV64I-NEXT: mv a1, s1
+; RV64I-NEXT: call __subdf3
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 48
+; RV64I-NEXT: ret
+ %a_ = fadd double 0.0, %a ; avoid negation using xor
+ %b_ = fadd double 0.0, %b ; avoid negation using xor
+ %c_ = fadd double 0.0, %c ; avoid negation using xor
+ %1 = fmul contract double %a_, %b_
+ %2 = fneg double %1
+ %3 = fsub contract double %2, %c_
+ ret double %3
+}
+
+define double @fnmsub_d_contract(double %a, double %b, double %c) nounwind {
+; RV32IFD-LABEL: fnmsub_d_contract:
+; RV32IFD: # %bb.0:
+; RV32IFD-NEXT: addi sp, sp, -16
+; RV32IFD-NEXT: sw zero, 8(sp)
+; RV32IFD-NEXT: sw zero, 12(sp)
+; RV32IFD-NEXT: fld fa5, 8(sp)
+; RV32IFD-NEXT: fadd.d fa4, fa0, fa5
+; RV32IFD-NEXT: fadd.d fa5, fa1, fa5
+; RV32IFD-NEXT: fnmsub.d fa0, fa4, fa5, fa2
+; RV32IFD-NEXT: addi sp, sp, 16
+; RV32IFD-NEXT: ret
+;
+; RV64IFD-LABEL: fnmsub_d_contract:
+; RV64IFD: # %bb.0:
+; RV64IFD-NEXT: fmv.d.x fa5, zero
+; RV64IFD-NEXT: fadd.d fa4, fa0, fa5
+; RV64IFD-NEXT: fadd.d fa5, fa1, fa5
+; RV64IFD-NEXT: fnmsub.d fa0, fa4, fa5, fa2
+; RV64IFD-NEXT: ret
+;
+; RV32I-LABEL: fnmsub_d_contract:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -48
+; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s4, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s5, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s6, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s7, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: mv s0, a2
+; RV32I-NEXT: mv s1, a3
+; RV32I-NEXT: mv s2, a4
+; RV32I-NEXT: lui a2, %hi(.LCPI21_0)
+; RV32I-NEXT: addi a2, a2, %lo(.LCPI21_0)
+; RV32I-NEXT: lw s3, 0(a2)
+; RV32I-NEXT: lw s4, 4(a2)
+; RV32I-NEXT: mv s5, a5
+; RV32I-NEXT: mv a2, s3
+; RV32I-NEXT: mv a3, s4
+; RV32I-NEXT: call __adddf3
+; RV32I-NEXT: mv s6, a0
+; RV32I-NEXT: mv s7, a1
+; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: mv a1, s1
+; RV32I-NEXT: mv a2, s3
+; RV32I-NEXT: mv a3, s4
+; RV32I-NEXT: call __adddf3
+; RV32I-NEXT: mv a2, a0
+; RV32I-NEXT: mv a3, a1
+; RV32I-NEXT: mv a0, s6
+; RV32I-NEXT: mv a1, s7
+; RV32I-NEXT: call __muldf3
+; RV32I-NEXT: mv a2, a0
+; RV32I-NEXT: mv a3, a1
+; RV32I-NEXT: mv a0, s2
+; RV32I-NEXT: mv a1, s5
+; RV32I-NEXT: call __subdf3
+; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s4, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s5, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s6, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s7, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 48
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: fnmsub_d_contract:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: mv s0, a1
+; RV64I-NEXT: lui a1, %hi(.LCPI21_0)
+; RV64I-NEXT: ld s1, %lo(.LCPI21_0)(a1)
+; RV64I-NEXT: mv s2, a2
+; RV64I-NEXT: mv a1, s1
+; RV64I-NEXT: call __adddf3
+; RV64I-NEXT: mv s3, a0
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: mv a1, s1
+; RV64I-NEXT: call __adddf3
+; RV64I-NEXT: mv a1, a0
+; RV64I-NEXT: mv a0, s3
+; RV64I-NEXT: call __muldf3
+; RV64I-NEXT: mv a1, a0
+; RV64I-NEXT: mv a0, s2
+; RV64I-NEXT: call __subdf3
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 48
+; RV64I-NEXT: ret
+ %a_ = fadd double 0.0, %a ; avoid negation using xor
+ %b_ = fadd double 0.0, %b ; avoid negation using xor
+ %1 = fmul contract double %a_, %b_
+ %2 = fsub contract double %c, %1
+ ret double %2
+}
+
+define double @fsgnjx_f64(double %x, double %y) nounwind {
+; RV32IFD-LABEL: fsgnjx_f64:
+; RV32IFD: # %bb.0:
+; RV32IFD-NEXT: addi sp, sp, -16
+; RV32IFD-NEXT: lui a0, 261888
+; RV32IFD-NEXT: sw zero, 8(sp)
+; RV32IFD-NEXT: sw a0, 12(sp)
+; RV32IFD-NEXT: fld fa5, 8(sp)
+; RV32IFD-NEXT: fsgnj.d fa5, fa5, fa0
+; RV32IFD-NEXT: fmul.d fa0, fa5, fa1
+; RV32IFD-NEXT: addi sp, sp, 16
+; RV32IFD-NEXT: ret
+;
+; RV64IFD-LABEL: fsgnjx_f64:
+; RV64IFD: # %bb.0:
+; RV64IFD-NEXT: li a0, 1023
+; RV64IFD-NEXT: slli a0, a0, 52
+; RV64IFD-NEXT: fmv.d.x fa5, a0
+; RV64IFD-NEXT: fsgnj.d fa5, fa5, fa0
+; RV64IFD-NEXT: fmul.d fa0, fa5, fa1
+; RV64IFD-NEXT: ret
+;
+; RV32I-LABEL: fsgnjx_f64:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a0, 524288
+; RV32I-NEXT: lui a4, 261888
+; RV32I-NEXT: and a0, a1, a0
+; RV32I-NEXT: or a1, a0, a4
+; RV32I-NEXT: li a0, 0
+; RV32I-NEXT: call __muldf3
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: fsgnjx_f64:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a2, -1
+; RV64I-NEXT: li a3, 1023
+; RV64I-NEXT: slli a2, a2, 63
+; RV64I-NEXT: slli a3, a3, 52
+; RV64I-NEXT: and a0, a0, a2
+; RV64I-NEXT: or a0, a0, a3
+; RV64I-NEXT: call __muldf3
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+ %z = call double @llvm.copysign.f64(double 1.0, double %x)
+ %mul = fmul double %z, %y
+ ret double %mul
+}
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/float-arith.ll b/llvm/test/CodeGen/RISCV/GlobalISel/float-arith.ll
new file mode 100644
index 00000000000000..0d210890c41f9a
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/float-arith.ll
@@ -0,0 +1,1100 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -global-isel -mattr=+f -verify-machineinstrs < %s \
+; RUN: -target-abi=ilp32f | FileCheck -check-prefix=CHECKIF %s
+; RUN: llc -mtriple=riscv64 -global-isel -mattr=+f -verify-machineinstrs < %s \
+; RUN: -target-abi=lp64f | FileCheck -check-prefix=CHECKIF %s
+; RUN: llc -mtriple=riscv32 -global-isel -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefix=RV32I %s
+; RUN: llc -mtriple=riscv64 -global-isel -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefix=RV64I %s
+
+; These tests are each targeted at a particular RISC-V FPU instruction.
+; Compares and conversions can be found in float-fcmp.ll and float-convert.ll
+; respectively. Some other float-*.ll files in this folder exercise LLVM IR
+; instructions that don't directly match a RISC-V instruction.
+
+define float @fadd_s(float %a, float %b) nounwind {
+; CHECKIF-LABEL: fadd_s:
+; CHECKIF: # %bb.0:
+; CHECKIF-NEXT: fadd.s fa0, fa0, fa1
+; CHECKIF-NEXT: ret
+;
+; RV32I-LABEL: fadd_s:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: call __addsf3
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: fadd_s:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: call __addsf3
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+ %1 = fadd float %a, %b
+ ret float %1
+}
+
+define float @fsub_s(float %a, float %b) nounwind {
+; CHECKIF-LABEL: fsub_s:
+; CHECKIF: # %bb.0:
+; CHECKIF-NEXT: fsub.s fa0, fa0, fa1
+; CHECKIF-NEXT: ret
+;
+; RV32I-LABEL: fsub_s:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: call __subsf3
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: fsub_s:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: call __subsf3
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+ %1 = fsub float %a, %b
+ ret float %1
+}
+
+define float @fmul_s(float %a, float %b) nounwind {
+; CHECKIF-LABEL: fmul_s:
+; CHECKIF: # %bb.0:
+; CHECKIF-NEXT: fmul.s fa0, fa0, fa1
+; CHECKIF-NEXT: ret
+;
+; RV32I-LABEL: fmul_s:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: call __mulsf3
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: fmul_s:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: call __mulsf3
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+ %1 = fmul float %a, %b
+ ret float %1
+}
+
+define float @fdiv_s(float %a, float %b) nounwind {
+; CHECKIF-LABEL: fdiv_s:
+; CHECKIF: # %bb.0:
+; CHECKIF-NEXT: fdiv.s fa0, fa0, fa1
+; CHECKIF-NEXT: ret
+;
+; RV32I-LABEL: fdiv_s:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: call __divsf3
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: fdiv_s:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: call __divsf3
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+ %1 = fdiv float %a, %b
+ ret float %1
+}
+
+declare float @llvm.sqrt.f32(float)
+
+define float @fsqrt_s(float %a) nounwind {
+; CHECKIF-LABEL: fsqrt_s:
+; CHECKIF: # %bb.0:
+; CHECKIF-NEXT: fsqrt.s fa0, fa0
+; CHECKIF-NEXT: ret
+;
+; RV32I-LABEL: fsqrt_s:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: call sqrtf
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: fsqrt_s:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: call sqrtf
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+ %1 = call float @llvm.sqrt.f32(float %a)
+ ret float %1
+}
+
+declare float @llvm.copysign.f32(float, float)
+
+define float @fsgnj_s(float %a, float %b) nounwind {
+; CHECKIF-LABEL: fsgnj_s:
+; CHECKIF: # %bb.0:
+; CHECKIF-NEXT: fsgnj.s fa0, fa0, fa1
+; CHECKIF-NEXT: ret
+;
+; RV32I-LABEL: fsgnj_s:
+; RV32I: # %bb.0:
+; RV32I-NEXT: lui a2, 524288
+; RV32I-NEXT: addi a3, a2, -1
+; RV32I-NEXT: and a0, a0, a3
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: or a0, a0, a1
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: fsgnj_s:
+; RV64I: # %bb.0:
+; RV64I-NEXT: lui a2, 524288
+; RV64I-NEXT: addiw a3, a2, -1
+; RV64I-NEXT: and a0, a0, a3
+; RV64I-NEXT: and a1, a1, a2
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: ret
+ %1 = call float @llvm.copysign.f32(float %a, float %b)
+ ret float %1
+}
+
+define float @fsgnjn_s(float %a, float %b) nounwind {
+; CHECKIF-LABEL: fsgnjn_s:
+; CHECKIF: # %bb.0:
+; CHECKIF-NEXT: fadd.s fa5, fa0, fa1
+; CHECKIF-NEXT: fneg.s fa5, fa5
+; CHECKIF-NEXT: fsgnj.s fa0, fa0, fa5
+; CHECKIF-NEXT: ret
+;
+; RV32I-LABEL: fsgnjn_s:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: mv s0, a0
+; RV32I-NEXT: call __addsf3
+; RV32I-NEXT: lui a1, 524288
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: addi a2, a1, -1
+; RV32I-NEXT: and a2, s0, a2
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: or a0, a2, a0
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: fsgnjn_s:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
+; RV64I-NEXT: mv s0, a0
+; RV64I-NEXT: call __addsf3
+; RV64I-NEXT: lui a1, 524288
+; RV64I-NEXT: xor a0, a0, a1
+; RV64I-NEXT: addiw a2, a1, -1
+; RV64I-NEXT: and a2, s0, a2
+; RV64I-NEXT: and a0, a0, a1
+; RV64I-NEXT: or a0, a2, a0
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+ %1 = fadd float %a, %b
+ %2 = fneg float %1
+ %3 = call float @llvm.copysign.f32(float %a, float %2)
+ ret float %3
+}
+
+declare float @llvm.fabs.f32(float)
+
+define float @fabs_s(float %a, float %b) nounwind {
+; CHECKIF-LABEL: fabs_s:
+; CHECKIF: # %bb.0:
+; CHECKIF-NEXT: fadd.s fa5, fa0, fa1
+; CHECKIF-NEXT: fabs.s fa4, fa5
+; CHECKIF-NEXT: fadd.s fa0, fa4, fa5
+; CHECKIF-NEXT: ret
+;
+; RV32I-LABEL: fabs_s:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: call __addsf3
+; RV32I-NEXT: mv a1, a0
+; RV32I-NEXT: lui a0, 524288
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: and a0, a1, a0
+; RV32I-NEXT: call __addsf3
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: fabs_s:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: call __addsf3
+; RV64I-NEXT: mv a1, a0
+; RV64I-NEXT: lui a0, 524288
+; RV64I-NEXT: addiw a0, a0, -1
+; RV64I-NEXT: and a0, a1, a0
+; RV64I-NEXT: call __addsf3
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+ %1 = fadd float %a, %b
+ %2 = call float @llvm.fabs.f32(float %1)
+ %3 = fadd float %2, %1
+ ret float %3
+}
+
+declare float @llvm.minnum.f32(float, float)
+
+define float @fmin_s(float %a, float %b) nounwind {
+; CHECKIF-LABEL: fmin_s:
+; CHECKIF: # %bb.0:
+; CHECKIF-NEXT: fmin.s fa0, fa0, fa1
+; CHECKIF-NEXT: ret
+;
+; RV32I-LABEL: fmin_s:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: call fminf
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: fmin_s:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: call fminf
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+ %1 = call float @llvm.minnum.f32(float %a, float %b)
+ ret float %1
+}
+
+declare float @llvm.maxnum.f32(float, float)
+
+define float @fmax_s(float %a, float %b) nounwind {
+; CHECKIF-LABEL: fmax_s:
+; CHECKIF: # %bb.0:
+; CHECKIF-NEXT: fmax.s fa0, fa0, fa1
+; CHECKIF-NEXT: ret
+;
+; RV32I-LABEL: fmax_s:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: call fmaxf
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: fmax_s:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: call fmaxf
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+ %1 = call float @llvm.maxnum.f32(float %a, float %b)
+ ret float %1
+}
+
+declare float @llvm.fma.f32(float, float, float)
+
+define float @fmadd_s(float %a, float %b, float %c) nounwind {
+; CHECKIF-LABEL: fmadd_s:
+; CHECKIF: # %bb.0:
+; CHECKIF-NEXT: fmadd.s fa0, fa0, fa1, fa2
+; CHECKIF-NEXT: ret
+;
+; RV32I-LABEL: fmadd_s:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: call fmaf
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: fmadd_s:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: call fmaf
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+ %1 = call float @llvm.fma.f32(float %a, float %b, float %c)
+ ret float %1
+}
+
+define float @fmsub_s(float %a, float %b, float %c) nounwind {
+; CHECKIF-LABEL: fmsub_s:
+; CHECKIF: # %bb.0:
+; CHECKIF-NEXT: fmv.w.x fa5, zero
+; CHECKIF-NEXT: fadd.s fa5, fa2, fa5
+; CHECKIF-NEXT: fmsub.s fa0, fa0, fa1, fa5
+; CHECKIF-NEXT: ret
+;
+; RV32I-LABEL: fmsub_s:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: mv s0, a0
+; RV32I-NEXT: mv s1, a1
+; RV32I-NEXT: lui a0, %hi(.LCPI11_0)
+; RV32I-NEXT: lw a1, %lo(.LCPI11_0)(a0)
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: call __addsf3
+; RV32I-NEXT: lui a2, 524288
+; RV32I-NEXT: xor a2, a0, a2
+; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: mv a1, s1
+; RV32I-NEXT: call fmaf
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: fmsub_s:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: mv s0, a0
+; RV64I-NEXT: mv s1, a1
+; RV64I-NEXT: lui a0, %hi(.LCPI11_0)
+; RV64I-NEXT: lw a1, %lo(.LCPI11_0)(a0)
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: call __addsf3
+; RV64I-NEXT: lui a2, 524288
+; RV64I-NEXT: xor a2, a0, a2
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: mv a1, s1
+; RV64I-NEXT: call fmaf
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ret
+ %c_ = fadd float 0.0, %c ; avoid negation using xor
+ %negc = fneg float %c_
+ %1 = call float @llvm.fma.f32(float %a, float %b, float %negc)
+ ret float %1
+}
+
+define float @fnmadd_s(float %a, float %b, float %c) nounwind {
+; CHECKIF-LABEL: fnmadd_s:
+; CHECKIF: # %bb.0:
+; CHECKIF-NEXT: fmv.w.x fa5, zero
+; CHECKIF-NEXT: fadd.s fa4, fa0, fa5
+; CHECKIF-NEXT: fadd.s fa5, fa2, fa5
+; CHECKIF-NEXT: fnmadd.s fa0, fa4, fa1, fa5
+; CHECKIF-NEXT: ret
+;
+; RV32I-LABEL: fnmadd_s:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: mv s0, a1
+; RV32I-NEXT: lui a1, %hi(.LCPI12_0)
+; RV32I-NEXT: lw s1, %lo(.LCPI12_0)(a1)
+; RV32I-NEXT: mv s2, a2
+; RV32I-NEXT: mv a1, s1
+; RV32I-NEXT: call __addsf3
+; RV32I-NEXT: mv s3, a0
+; RV32I-NEXT: mv a0, s2
+; RV32I-NEXT: mv a1, s1
+; RV32I-NEXT: call __addsf3
+; RV32I-NEXT: lui a2, 524288
+; RV32I-NEXT: xor a1, s3, a2
+; RV32I-NEXT: xor a2, a0, a2
+; RV32I-NEXT: mv a0, a1
+; RV32I-NEXT: mv a1, s0
+; RV32I-NEXT: call fmaf
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: fnmadd_s:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: mv s0, a1
+; RV64I-NEXT: lui a1, %hi(.LCPI12_0)
+; RV64I-NEXT: lw s1, %lo(.LCPI12_0)(a1)
+; RV64I-NEXT: mv s2, a2
+; RV64I-NEXT: mv a1, s1
+; RV64I-NEXT: call __addsf3
+; RV64I-NEXT: mv s3, a0
+; RV64I-NEXT: mv a0, s2
+; RV64I-NEXT: mv a1, s1
+; RV64I-NEXT: call __addsf3
+; RV64I-NEXT: lui a2, 524288
+; RV64I-NEXT: xor a1, s3, a2
+; RV64I-NEXT: xor a2, a0, a2
+; RV64I-NEXT: mv a0, a1
+; RV64I-NEXT: mv a1, s0
+; RV64I-NEXT: call fmaf
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 48
+; RV64I-NEXT: ret
+ %a_ = fadd float 0.0, %a
+ %c_ = fadd float 0.0, %c
+ %nega = fneg float %a_
+ %negc = fneg float %c_
+ %1 = call float @llvm.fma.f32(float %nega, float %b, float %negc)
+ ret float %1
+}
+
+define float @fnmadd_s_2(float %a, float %b, float %c) nounwind {
+; CHECKIF-LABEL: fnmadd_s_2:
+; CHECKIF: # %bb.0:
+; CHECKIF-NEXT: fmv.w.x fa5, zero
+; CHECKIF-NEXT: fadd.s fa4, fa1, fa5
+; CHECKIF-NEXT: fadd.s fa5, fa2, fa5
+; CHECKIF-NEXT: fnmadd.s fa0, fa4, fa0, fa5
+; CHECKIF-NEXT: ret
+;
+; RV32I-LABEL: fnmadd_s_2:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: mv s0, a0
+; RV32I-NEXT: mv a0, a1
+; RV32I-NEXT: lui a1, %hi(.LCPI13_0)
+; RV32I-NEXT: lw s1, %lo(.LCPI13_0)(a1)
+; RV32I-NEXT: mv s2, a2
+; RV32I-NEXT: mv a1, s1
+; RV32I-NEXT: call __addsf3
+; RV32I-NEXT: mv s3, a0
+; RV32I-NEXT: mv a0, s2
+; RV32I-NEXT: mv a1, s1
+; RV32I-NEXT: call __addsf3
+; RV32I-NEXT: lui a2, 524288
+; RV32I-NEXT: xor a1, s3, a2
+; RV32I-NEXT: xor a2, a0, a2
+; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: call fmaf
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: fnmadd_s_2:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: mv s0, a0
+; RV64I-NEXT: mv a0, a1
+; RV64I-NEXT: lui a1, %hi(.LCPI13_0)
+; RV64I-NEXT: lw s1, %lo(.LCPI13_0)(a1)
+; RV64I-NEXT: mv s2, a2
+; RV64I-NEXT: mv a1, s1
+; RV64I-NEXT: call __addsf3
+; RV64I-NEXT: mv s3, a0
+; RV64I-NEXT: mv a0, s2
+; RV64I-NEXT: mv a1, s1
+; RV64I-NEXT: call __addsf3
+; RV64I-NEXT: lui a2, 524288
+; RV64I-NEXT: xor a1, s3, a2
+; RV64I-NEXT: xor a2, a0, a2
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: call fmaf
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 48
+; RV64I-NEXT: ret
+ %b_ = fadd float 0.0, %b
+ %c_ = fadd float 0.0, %c
+ %negb = fneg float %b_
+ %negc = fneg float %c_
+ %1 = call float @llvm.fma.f32(float %a, float %negb, float %negc)
+ ret float %1
+}
+
+define float @fnmadd_s_3(float %a, float %b, float %c) nounwind {
+; RV32IF-LABEL: fnmadd_s_3:
+; RV32IF: # %bb.0:
+; RV32IF-NEXT: fmadd.s ft0, fa0, fa1, fa2
+; RV32IF-NEXT: fneg.s fa0, ft0
+; RV32IF-NEXT: ret
+;
+; RV64IF-LABEL: fnmadd_s_3:
+; RV64IF: # %bb.0:
+; RV64IF-NEXT: fmadd.s ft0, fa0, fa1, fa2
+; RV64IF-NEXT: fneg.s fa0, ft0
+; RV64IF-NEXT: ret
+;
+; CHECKIF-LABEL: fnmadd_s_3:
+; CHECKIF: # %bb.0:
+; CHECKIF-NEXT: fmadd.s fa5, fa0, fa1, fa2
+; CHECKIF-NEXT: fneg.s fa0, fa5
+; CHECKIF-NEXT: ret
+;
+; RV32I-LABEL: fnmadd_s_3:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: call fmaf
+; RV32I-NEXT: lui a1, 524288
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: fnmadd_s_3:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: call fmaf
+; RV64I-NEXT: lui a1, 524288
+; RV64I-NEXT: xor a0, a0, a1
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+ %1 = call float @llvm.fma.f32(float %a, float %b, float %c)
+ %neg = fneg float %1
+ ret float %neg
+}
+
+define float @fnmadd_nsz(float %a, float %b, float %c) nounwind {
+; RV32IF-LABEL: fnmadd_nsz:
+; RV32IF: # %bb.0:
+; RV32IF-NEXT: fnmadd.s fa0, fa0, fa1, fa2
+; RV32IF-NEXT: ret
+;
+; RV64IF-LABEL: fnmadd_nsz:
+; RV64IF: # %bb.0:
+; RV64IF-NEXT: fnmadd.s fa0, fa0, fa1, fa2
+; RV64IF-NEXT: ret
+;
+; CHECKIF-LABEL: fnmadd_nsz:
+; CHECKIF: # %bb.0:
+; CHECKIF-NEXT: fmadd.s fa5, fa0, fa1, fa2
+; CHECKIF-NEXT: fneg.s fa0, fa5
+; CHECKIF-NEXT: ret
+;
+; RV32I-LABEL: fnmadd_nsz:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: call fmaf
+; RV32I-NEXT: lui a1, 524288
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: fnmadd_nsz:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: call fmaf
+; RV64I-NEXT: lui a1, 524288
+; RV64I-NEXT: xor a0, a0, a1
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+ %1 = call nsz float @llvm.fma.f32(float %a, float %b, float %c)
+ %neg = fneg nsz float %1
+ ret float %neg
+}
+
+define float @fnmsub_s(float %a, float %b, float %c) nounwind {
+; CHECKIF-LABEL: fnmsub_s:
+; CHECKIF: # %bb.0:
+; CHECKIF-NEXT: fmv.w.x fa5, zero
+; CHECKIF-NEXT: fadd.s fa5, fa0, fa5
+; CHECKIF-NEXT: fnmsub.s fa0, fa5, fa1, fa2
+; CHECKIF-NEXT: ret
+;
+; RV32I-LABEL: fnmsub_s:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: mv s0, a1
+; RV32I-NEXT: lui a1, %hi(.LCPI16_0)
+; RV32I-NEXT: lw a1, %lo(.LCPI16_0)(a1)
+; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: call __addsf3
+; RV32I-NEXT: lui a1, 524288
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: mv a1, s0
+; RV32I-NEXT: mv a2, s1
+; RV32I-NEXT: call fmaf
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: fnmsub_s:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: mv s0, a1
+; RV64I-NEXT: lui a1, %hi(.LCPI16_0)
+; RV64I-NEXT: lw a1, %lo(.LCPI16_0)(a1)
+; RV64I-NEXT: mv s1, a2
+; RV64I-NEXT: call __addsf3
+; RV64I-NEXT: lui a1, 524288
+; RV64I-NEXT: xor a0, a0, a1
+; RV64I-NEXT: mv a1, s0
+; RV64I-NEXT: mv a2, s1
+; RV64I-NEXT: call fmaf
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ret
+ %a_ = fadd float 0.0, %a
+ %nega = fneg float %a_
+ %1 = call float @llvm.fma.f32(float %nega, float %b, float %c)
+ ret float %1
+}
+
+define float @fnmsub_s_2(float %a, float %b, float %c) nounwind {
+; CHECKIF-LABEL: fnmsub_s_2:
+; CHECKIF: # %bb.0:
+; CHECKIF-NEXT: fmv.w.x fa5, zero
+; CHECKIF-NEXT: fadd.s fa5, fa1, fa5
+; CHECKIF-NEXT: fnmsub.s fa0, fa5, fa0, fa2
+; CHECKIF-NEXT: ret
+;
+; RV32I-LABEL: fnmsub_s_2:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: mv s0, a0
+; RV32I-NEXT: mv a0, a1
+; RV32I-NEXT: lui a1, %hi(.LCPI17_0)
+; RV32I-NEXT: lw a1, %lo(.LCPI17_0)(a1)
+; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: call __addsf3
+; RV32I-NEXT: lui a1, 524288
+; RV32I-NEXT: xor a1, a0, a1
+; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: mv a2, s1
+; RV32I-NEXT: call fmaf
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: fnmsub_s_2:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: mv s0, a0
+; RV64I-NEXT: mv a0, a1
+; RV64I-NEXT: lui a1, %hi(.LCPI17_0)
+; RV64I-NEXT: lw a1, %lo(.LCPI17_0)(a1)
+; RV64I-NEXT: mv s1, a2
+; RV64I-NEXT: call __addsf3
+; RV64I-NEXT: lui a1, 524288
+; RV64I-NEXT: xor a1, a0, a1
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: mv a2, s1
+; RV64I-NEXT: call fmaf
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ret
+ %b_ = fadd float 0.0, %b
+ %negb = fneg float %b_
+ %1 = call float @llvm.fma.f32(float %a, float %negb, float %c)
+ ret float %1
+}
+
+define float @fmadd_s_contract(float %a, float %b, float %c) nounwind {
+; CHECKIF-LABEL: fmadd_s_contract:
+; CHECKIF: # %bb.0:
+; CHECKIF-NEXT: fmadd.s fa0, fa0, fa1, fa2
+; CHECKIF-NEXT: ret
+;
+; RV32I-LABEL: fmadd_s_contract:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: mv s0, a2
+; RV32I-NEXT: call __mulsf3
+; RV32I-NEXT: mv a1, s0
+; RV32I-NEXT: call __addsf3
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: fmadd_s_contract:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: call __mulsf3
+; RV64I-NEXT: mv a1, s0
+; RV64I-NEXT: call __addsf3
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+ %1 = fmul contract float %a, %b
+ %2 = fadd contract float %1, %c
+ ret float %2
+}
+
+define float @fmsub_s_contract(float %a, float %b, float %c) nounwind {
+; CHECKIF-LABEL: fmsub_s_contract:
+; CHECKIF: # %bb.0:
+; CHECKIF-NEXT: fmv.w.x fa5, zero
+; CHECKIF-NEXT: fadd.s fa5, fa2, fa5
+; CHECKIF-NEXT: fmul.s fa4, fa0, fa1
+; CHECKIF-NEXT: fsub.s fa0, fa4, fa5
+; CHECKIF-NEXT: ret
+;
+; RV32I-LABEL: fmsub_s_contract:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
+; RV32I-NEXT: mv s0, a0
+; RV32I-NEXT: mv s1, a1
+; RV32I-NEXT: lui a0, %hi(.LCPI19_0)
+; RV32I-NEXT: lw a1, %lo(.LCPI19_0)(a0)
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: call __addsf3
+; RV32I-NEXT: mv s2, a0
+; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: mv a1, s1
+; RV32I-NEXT: call __mulsf3
+; RV32I-NEXT: mv a1, s2
+; RV32I-NEXT: call __subsf3
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: fmsub_s_contract:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
+; RV64I-NEXT: mv s0, a0
+; RV64I-NEXT: mv s1, a1
+; RV64I-NEXT: lui a0, %hi(.LCPI19_0)
+; RV64I-NEXT: lw a1, %lo(.LCPI19_0)(a0)
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: call __addsf3
+; RV64I-NEXT: mv s2, a0
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: mv a1, s1
+; RV64I-NEXT: call __mulsf3
+; RV64I-NEXT: mv a1, s2
+; RV64I-NEXT: call __subsf3
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ret
+ %c_ = fadd float 0.0, %c ; avoid negation using xor
+ %1 = fmul contract float %a, %b
+ %2 = fsub contract float %1, %c_
+ ret float %2
+}
+
+define float @fnmadd_s_contract(float %a, float %b, float %c) nounwind {
+; CHECKIF-LABEL: fnmadd_s_contract:
+; CHECKIF: # %bb.0:
+; CHECKIF-NEXT: fmv.w.x fa5, zero
+; CHECKIF-NEXT: fadd.s fa4, fa0, fa5
+; CHECKIF-NEXT: fadd.s fa3, fa1, fa5
+; CHECKIF-NEXT: fadd.s fa5, fa2, fa5
+; CHECKIF-NEXT: fmul.s fa4, fa4, fa3
+; CHECKIF-NEXT: fneg.s fa4, fa4
+; CHECKIF-NEXT: fsub.s fa0, fa4, fa5
+; CHECKIF-NEXT: ret
+;
+; RV32I-LABEL: fnmadd_s_contract:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: mv s0, a1
+; RV32I-NEXT: lui a1, %hi(.LCPI20_0)
+; RV32I-NEXT: lw s1, %lo(.LCPI20_0)(a1)
+; RV32I-NEXT: mv s2, a2
+; RV32I-NEXT: mv a1, s1
+; RV32I-NEXT: call __addsf3
+; RV32I-NEXT: mv s3, a0
+; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: mv a1, s1
+; RV32I-NEXT: call __addsf3
+; RV32I-NEXT: mv s0, a0
+; RV32I-NEXT: mv a0, s2
+; RV32I-NEXT: mv a1, s1
+; RV32I-NEXT: call __addsf3
+; RV32I-NEXT: mv s1, a0
+; RV32I-NEXT: mv a0, s3
+; RV32I-NEXT: mv a1, s0
+; RV32I-NEXT: call __mulsf3
+; RV32I-NEXT: lui a1, 524288
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: mv a1, s1
+; RV32I-NEXT: call __subsf3
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: fnmadd_s_contract:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: mv s0, a1
+; RV64I-NEXT: lui a1, %hi(.LCPI20_0)
+; RV64I-NEXT: lw s1, %lo(.LCPI20_0)(a1)
+; RV64I-NEXT: mv s2, a2
+; RV64I-NEXT: mv a1, s1
+; RV64I-NEXT: call __addsf3
+; RV64I-NEXT: mv s3, a0
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: mv a1, s1
+; RV64I-NEXT: call __addsf3
+; RV64I-NEXT: mv s0, a0
+; RV64I-NEXT: mv a0, s2
+; RV64I-NEXT: mv a1, s1
+; RV64I-NEXT: call __addsf3
+; RV64I-NEXT: mv s1, a0
+; RV64I-NEXT: mv a0, s3
+; RV64I-NEXT: mv a1, s0
+; RV64I-NEXT: call __mulsf3
+; RV64I-NEXT: lui a1, 524288
+; RV64I-NEXT: xor a0, a0, a1
+; RV64I-NEXT: mv a1, s1
+; RV64I-NEXT: call __subsf3
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 48
+; RV64I-NEXT: ret
+ %a_ = fadd float 0.0, %a ; avoid negation using xor
+ %b_ = fadd float 0.0, %b ; avoid negation using xor
+ %c_ = fadd float 0.0, %c ; avoid negation using xor
+ %1 = fmul contract float %a_, %b_
+ %2 = fneg float %1
+ %3 = fsub contract float %2, %c_
+ ret float %3
+}
+
+define float @fnmsub_s_contract(float %a, float %b, float %c) nounwind {
+; CHECKIF-LABEL: fnmsub_s_contract:
+; CHECKIF: # %bb.0:
+; CHECKIF-NEXT: fmv.w.x fa5, zero
+; CHECKIF-NEXT: fadd.s fa4, fa0, fa5
+; CHECKIF-NEXT: fadd.s fa5, fa1, fa5
+; CHECKIF-NEXT: fnmsub.s fa0, fa4, fa5, fa2
+; CHECKIF-NEXT: ret
+;
+; RV32I-LABEL: fnmsub_s_contract:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: mv s0, a1
+; RV32I-NEXT: lui a1, %hi(.LCPI21_0)
+; RV32I-NEXT: lw s1, %lo(.LCPI21_0)(a1)
+; RV32I-NEXT: mv s2, a2
+; RV32I-NEXT: mv a1, s1
+; RV32I-NEXT: call __addsf3
+; RV32I-NEXT: mv s3, a0
+; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: mv a1, s1
+; RV32I-NEXT: call __addsf3
+; RV32I-NEXT: mv a1, a0
+; RV32I-NEXT: mv a0, s3
+; RV32I-NEXT: call __mulsf3
+; RV32I-NEXT: mv a1, a0
+; RV32I-NEXT: mv a0, s2
+; RV32I-NEXT: call __subsf3
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: fnmsub_s_contract:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: mv s0, a1
+; RV64I-NEXT: lui a1, %hi(.LCPI21_0)
+; RV64I-NEXT: lw s1, %lo(.LCPI21_0)(a1)
+; RV64I-NEXT: mv s2, a2
+; RV64I-NEXT: mv a1, s1
+; RV64I-NEXT: call __addsf3
+; RV64I-NEXT: mv s3, a0
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: mv a1, s1
+; RV64I-NEXT: call __addsf3
+; RV64I-NEXT: mv a1, a0
+; RV64I-NEXT: mv a0, s3
+; RV64I-NEXT: call __mulsf3
+; RV64I-NEXT: mv a1, a0
+; RV64I-NEXT: mv a0, s2
+; RV64I-NEXT: call __subsf3
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 48
+; RV64I-NEXT: ret
+ %a_ = fadd float 0.0, %a ; avoid negation using xor
+ %b_ = fadd float 0.0, %b ; avoid negation using xor
+ %1 = fmul contract float %a_, %b_
+ %2 = fsub contract float %c, %1
+ ret float %2
+}
+
+define float @fsgnjx_f32(float %x, float %y) nounwind {
+; CHECKIF-LABEL: fsgnjx_f32:
+; CHECKIF: # %bb.0:
+; CHECKIF-NEXT: lui a0, 260096
+; CHECKIF-NEXT: fmv.w.x fa5, a0
+; CHECKIF-NEXT: fsgnj.s fa5, fa5, fa0
+; CHECKIF-NEXT: fmul.s fa0, fa5, fa1
+; CHECKIF-NEXT: ret
+;
+; RV32I-LABEL: fsgnjx_f32:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a2, 524288
+; RV32I-NEXT: lui a3, 260096
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: or a0, a0, a3
+; RV32I-NEXT: call __mulsf3
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: fsgnjx_f32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a2, 524288
+; RV64I-NEXT: lui a3, 260096
+; RV64I-NEXT: and a0, a0, a2
+; RV64I-NEXT: or a0, a0, a3
+; RV64I-NEXT: call __mulsf3
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+ %z = call float @llvm.copysign.f32(float 1.0, float %x)
+ %mul = fmul float %z, %y
+ ret float %mul
+}
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir
index b5619075b6bfcb..74749d8f1944be 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir
@@ -455,6 +455,7 @@
# DEBUG-NEXT: .. the first uncovered type index: 1, OK
# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
# DEBUG-NEXT: G_FSUB (opcode {{[0-9]+}}): 1 type index, 0 imm indices
+# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
# DEBUG-NEXT: .. the first uncovered type index: 1, OK
# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
# DEBUG-NEXT: G_FMUL (opcode {{[0-9]+}}): 1 type index, 0 imm indices
@@ -506,7 +507,6 @@
# DEBUG-NEXT:.. type index coverage check SKIPPED: no rules defined
# DEBUG-NEXT:.. imm index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: G_FNEG (opcode {{[0-9]+}}): 1 type index, 0 imm indices
-# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
# DEBUG-NEXT: .. the first uncovered type index: 1, OK
# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
# DEBUG-NEXT: G_FPEXT (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
@@ -536,6 +536,7 @@
# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: G_FABS (opcode {{[0-9]+}}): 1 type index, 0 imm indices
+# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
# DEBUG-NEXT: .. the first uncovered type index: 1, OK
# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
# DEBUG-NEXT: G_FCOPYSIGN (opcode {{[0-9]+}}): 2 type indices
More information about the llvm-commits
mailing list