[llvm] bed4c58 - [NVPTX] Check 'contract' fast-math flag in addition to global options (#131372)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 25 09:20:46 PDT 2025
Author: Alex MacLean
Date: 2025-03-25T09:20:42-07:00
New Revision: bed4c581c2a73a437f13e278c0d654c01efb58a1
URL: https://github.com/llvm/llvm-project/commit/bed4c581c2a73a437f13e278c0d654c01efb58a1
DIFF: https://github.com/llvm/llvm-project/commit/bed4c581c2a73a437f13e278c0d654c01efb58a1.diff
LOG: [NVPTX] Check 'contract' fast-math flag in addition to global options (#131372)
Added:
Modified:
llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
llvm/test/CodeGen/NVPTX/fp-contract.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 7b70cf0eaaa8a..06e221777b7ea 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -4950,7 +4950,9 @@ PerformFADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
if (N0.getOpcode() == ISD::FMUL) {
const auto *TLI = static_cast<const NVPTXTargetLowering *>(
&DCI.DAG.getTargetLoweringInfo());
- if (!TLI->allowFMA(DCI.DAG.getMachineFunction(), OptLevel))
+ if (!(TLI->allowFMA(DCI.DAG.getMachineFunction(), OptLevel) ||
+ (N->getFlags().hasAllowContract() &&
+ N0->getFlags().hasAllowContract())))
return SDValue();
// For floating point:
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index a65bd14ebfe5f..1786503a6dd4e 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -150,8 +150,6 @@ def doRsqrtOpt : Predicate<"doRsqrtOpt()">;
def doMulWide : Predicate<"doMulWide">;
-def allowFMA : Predicate<"allowFMA()">;
-def noFMA : Predicate<"!allowFMA()">;
def allowUnsafeFPMath : Predicate<"allowUnsafeFPMath()">;
def noUnsafeFPMath : Predicate<"!allowUnsafeFPMath()">;
@@ -367,167 +365,89 @@ multiclass FMINIMUMMAXIMUM<string OpcStr, bit NaN, SDNode OpNode> {
// This multiclass should be used for nodes that can be folded to make fma ops.
// In this case, we use the ".rn" variant when FMA is disabled, as this behaves
// just like the non ".rn" op, but prevents ptxas from creating FMAs.
-multiclass F3_fma_component<string OpcStr, SDNode OpNode> {
- def f64rr :
- NVPTXInst<(outs Float64Regs:$dst),
- (ins Float64Regs:$a, Float64Regs:$b),
- !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"),
- [(set f64:$dst, (OpNode f64:$a, f64:$b))]>,
- Requires<[allowFMA]>;
- def f64ri :
- NVPTXInst<(outs Float64Regs:$dst),
- (ins Float64Regs:$a, f64imm:$b),
- !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"),
- [(set f64:$dst, (OpNode f64:$a, fpimm:$b))]>,
- Requires<[allowFMA]>;
- def f32rr_ftz :
- NVPTXInst<(outs Float32Regs:$dst),
- (ins Float32Regs:$a, Float32Regs:$b),
- !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"),
- [(set f32:$dst, (OpNode f32:$a, f32:$b))]>,
- Requires<[allowFMA, doF32FTZ]>;
- def f32ri_ftz :
- NVPTXInst<(outs Float32Regs:$dst),
- (ins Float32Regs:$a, f32imm:$b),
- !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"),
- [(set f32:$dst, (OpNode f32:$a, fpimm:$b))]>,
- Requires<[allowFMA, doF32FTZ]>;
- def f32rr :
- NVPTXInst<(outs Float32Regs:$dst),
- (ins Float32Regs:$a, Float32Regs:$b),
- !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"),
- [(set f32:$dst, (OpNode f32:$a, f32:$b))]>,
- Requires<[allowFMA]>;
- def f32ri :
- NVPTXInst<(outs Float32Regs:$dst),
- (ins Float32Regs:$a, f32imm:$b),
- !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"),
- [(set f32:$dst, (OpNode f32:$a, fpimm:$b))]>,
- Requires<[allowFMA]>;
-
- def f16rr_ftz :
- NVPTXInst<(outs Int16Regs:$dst),
- (ins Int16Regs:$a, Int16Regs:$b),
- !strconcat(OpcStr, ".ftz.f16 \t$dst, $a, $b;"),
- [(set f16:$dst, (OpNode f16:$a, f16:$b))]>,
- Requires<[useFP16Math, allowFMA, doF32FTZ]>;
- def f16rr :
- NVPTXInst<(outs Int16Regs:$dst),
- (ins Int16Regs:$a, Int16Regs:$b),
- !strconcat(OpcStr, ".f16 \t$dst, $a, $b;"),
- [(set f16:$dst, (OpNode f16:$a, f16:$b))]>,
- Requires<[useFP16Math, allowFMA]>;
+multiclass F3<string op_str, SDPatternOperator op_pat> {
+ def f64rr :
+ NVPTXInst<(outs Float64Regs:$dst),
+ (ins Float64Regs:$a, Float64Regs:$b),
+ op_str # ".f64 \t$dst, $a, $b;",
+ [(set f64:$dst, (op_pat f64:$a, f64:$b))]>;
+ def f64ri :
+ NVPTXInst<(outs Float64Regs:$dst),
+ (ins Float64Regs:$a, f64imm:$b),
+ op_str # ".f64 \t$dst, $a, $b;",
+ [(set f64:$dst, (op_pat f64:$a, fpimm:$b))]>;
+ def f32rr_ftz :
+ NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, Float32Regs:$b),
+ op_str # ".ftz.f32 \t$dst, $a, $b;",
+ [(set f32:$dst, (op_pat f32:$a, f32:$b))]>,
+ Requires<[doF32FTZ]>;
+ def f32ri_ftz :
+ NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, f32imm:$b),
+ op_str # ".ftz.f32 \t$dst, $a, $b;",
+ [(set f32:$dst, (op_pat f32:$a, fpimm:$b))]>,
+ Requires<[doF32FTZ]>;
+ def f32rr :
+ NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, Float32Regs:$b),
+ op_str # ".f32 \t$dst, $a, $b;",
+ [(set f32:$dst, (op_pat f32:$a, f32:$b))]>;
+ def f32ri :
+ NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, f32imm:$b),
+ op_str # ".f32 \t$dst, $a, $b;",
+ [(set f32:$dst, (op_pat f32:$a, fpimm:$b))]>;
+
+ def f16rr_ftz :
+ NVPTXInst<(outs Int16Regs:$dst),
+ (ins Int16Regs:$a, Int16Regs:$b),
+ op_str # ".ftz.f16 \t$dst, $a, $b;",
+ [(set f16:$dst, (op_pat f16:$a, f16:$b))]>,
+ Requires<[useFP16Math, doF32FTZ]>;
+ def f16rr :
+ NVPTXInst<(outs Int16Regs:$dst),
+ (ins Int16Regs:$a, Int16Regs:$b),
+ op_str # ".f16 \t$dst, $a, $b;",
+ [(set f16:$dst, (op_pat f16:$a, f16:$b))]>,
+ Requires<[useFP16Math]>;
+
+ def f16x2rr_ftz :
+ NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int32Regs:$a, Int32Regs:$b),
+ op_str # ".ftz.f16x2 \t$dst, $a, $b;",
+ [(set v2f16:$dst, (op_pat v2f16:$a, v2f16:$b))]>,
+ Requires<[useFP16Math, doF32FTZ]>;
+ def f16x2rr :
+ NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int32Regs:$a, Int32Regs:$b),
+ op_str # ".f16x2 \t$dst, $a, $b;",
+ [(set v2f16:$dst, (op_pat v2f16:$a, v2f16:$b))]>,
+ Requires<[useFP16Math]>;
+ def bf16rr :
+ NVPTXInst<(outs Int16Regs:$dst),
+ (ins Int16Regs:$a, Int16Regs:$b),
+ op_str # ".bf16 \t$dst, $a, $b;",
+ [(set bf16:$dst, (op_pat bf16:$a, bf16:$b))]>,
+ Requires<[hasBF16Math]>;
+
+ def bf16x2rr :
+ NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int32Regs:$a, Int32Regs:$b),
+ op_str # ".bf16x2 \t$dst, $a, $b;",
+ [(set v2bf16:$dst, (op_pat v2bf16:$a, v2bf16:$b))]>,
+ Requires<[hasBF16Math]>;
+}
- def f16x2rr_ftz :
- NVPTXInst<(outs Int32Regs:$dst),
- (ins Int32Regs:$a, Int32Regs:$b),
- !strconcat(OpcStr, ".ftz.f16x2 \t$dst, $a, $b;"),
- [(set v2f16:$dst, (OpNode v2f16:$a, v2f16:$b))]>,
- Requires<[useFP16Math, allowFMA, doF32FTZ]>;
- def f16x2rr :
- NVPTXInst<(outs Int32Regs:$dst),
- (ins Int32Regs:$a, Int32Regs:$b),
- !strconcat(OpcStr, ".f16x2 \t$dst, $a, $b;"),
- [(set v2f16:$dst, (OpNode v2f16:$a, v2f16:$b))]>,
- Requires<[useFP16Math, allowFMA]>;
- def bf16rr :
- NVPTXInst<(outs Int16Regs:$dst),
- (ins Int16Regs:$a, Int16Regs:$b),
- !strconcat(OpcStr, ".bf16 \t$dst, $a, $b;"),
- [(set bf16:$dst, (OpNode bf16:$a, bf16:$b))]>,
- Requires<[hasBF16Math, allowFMA]>;
+class BinOpAllowsFMA<SDPatternOperator operator>
+ : PatFrag<(ops node:$A, node:$B),
+ (operator node:$A, node:$B), [{
+ return allowFMA() || N->getFlags().hasAllowContract();;
+}]>;
- def bf16x2rr :
- NVPTXInst<(outs Int32Regs:$dst),
- (ins Int32Regs:$a, Int32Regs:$b),
- !strconcat(OpcStr, ".bf16x2 \t$dst, $a, $b;"),
- [(set v2bf16:$dst, (OpNode v2bf16:$a, v2bf16:$b))]>,
- Requires<[hasBF16Math, allowFMA]>;
- // These have strange names so we don't perturb existing mir tests.
- def _rnf64rr :
- NVPTXInst<(outs Float64Regs:$dst),
- (ins Float64Regs:$a, Float64Regs:$b),
- !strconcat(OpcStr, ".rn.f64 \t$dst, $a, $b;"),
- [(set f64:$dst, (OpNode f64:$a, f64:$b))]>,
- Requires<[noFMA]>;
- def _rnf64ri :
- NVPTXInst<(outs Float64Regs:$dst),
- (ins Float64Regs:$a, f64imm:$b),
- !strconcat(OpcStr, ".rn.f64 \t$dst, $a, $b;"),
- [(set f64:$dst, (OpNode f64:$a, fpimm:$b))]>,
- Requires<[noFMA]>;
- def _rnf32rr_ftz :
- NVPTXInst<(outs Float32Regs:$dst),
- (ins Float32Regs:$a, Float32Regs:$b),
- !strconcat(OpcStr, ".rn.ftz.f32 \t$dst, $a, $b;"),
- [(set f32:$dst, (OpNode f32:$a, Float32Regs:$b))]>,
- Requires<[noFMA, doF32FTZ]>;
- def _rnf32ri_ftz :
- NVPTXInst<(outs Float32Regs:$dst),
- (ins Float32Regs:$a, f32imm:$b),
- !strconcat(OpcStr, ".rn.ftz.f32 \t$dst, $a, $b;"),
- [(set f32:$dst, (OpNode f32:$a, fpimm:$b))]>,
- Requires<[noFMA, doF32FTZ]>;
- def _rnf32rr :
- NVPTXInst<(outs Float32Regs:$dst),
- (ins Float32Regs:$a, Float32Regs:$b),
- !strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"),
- [(set f32:$dst, (OpNode f32:$a, f32:$b))]>,
- Requires<[noFMA]>;
- def _rnf32ri :
- NVPTXInst<(outs Float32Regs:$dst),
- (ins Float32Regs:$a, f32imm:$b),
- !strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"),
- [(set f32:$dst, (OpNode f32:$a, fpimm:$b))]>,
- Requires<[noFMA]>;
- def _rnf16rr_ftz :
- NVPTXInst<(outs Int16Regs:$dst),
- (ins Int16Regs:$a, Int16Regs:$b),
- !strconcat(OpcStr, ".rn.ftz.f16 \t$dst, $a, $b;"),
- [(set f16:$dst, (OpNode f16:$a, f16:$b))]>,
- Requires<[useFP16Math, noFMA, doF32FTZ]>;
- def _rnf16rr :
- NVPTXInst<(outs Int16Regs:$dst),
- (ins Int16Regs:$a, Int16Regs:$b),
- !strconcat(OpcStr, ".rn.f16 \t$dst, $a, $b;"),
- [(set f16:$dst, (OpNode f16:$a, f16:$b))]>,
- Requires<[useFP16Math, noFMA]>;
- def _rnf16x2rr_ftz :
- NVPTXInst<(outs Int32Regs:$dst),
- (ins Int32Regs:$a, Int32Regs:$b),
- !strconcat(OpcStr, ".rn.ftz.f16x2 \t$dst, $a, $b;"),
- [(set v2f16:$dst, (OpNode v2f16:$a, v2f16:$b))]>,
- Requires<[useFP16Math, noFMA, doF32FTZ]>;
- def _rnf16x2rr :
- NVPTXInst<(outs Int32Regs:$dst),
- (ins Int32Regs:$a, Int32Regs:$b),
- !strconcat(OpcStr, ".rn.f16x2 \t$dst, $a, $b;"),
- [(set v2f16:$dst, (OpNode v2f16:$a, v2f16:$b))]>,
- Requires<[useFP16Math, noFMA]>;
- def _rnbf16rr_ftz :
- NVPTXInst<(outs Int16Regs:$dst),
- (ins Int16Regs:$a, Int16Regs:$b),
- !strconcat(OpcStr, ".rn.ftz.bf16 \t$dst, $a, $b;"),
- [(set bf16:$dst, (OpNode bf16:$a, bf16:$b))]>,
- Requires<[hasBF16Math, noFMA, doF32FTZ]>;
- def _rnbf16rr :
- NVPTXInst<(outs Int16Regs:$dst),
- (ins Int16Regs:$a, Int16Regs:$b),
- !strconcat(OpcStr, ".rn.bf16 \t$dst, $a, $b;"),
- [(set bf16:$dst, (OpNode bf16:$a, bf16:$b))]>,
- Requires<[hasBF16Math, noFMA]>;
- def _rnbf16x2rr_ftz :
- NVPTXInst<(outs Int32Regs:$dst),
- (ins Int32Regs:$a, Int32Regs:$b),
- !strconcat(OpcStr, ".rn.ftz.bf16x2 \t$dst, $a, $b;"),
- [(set v2bf16:$dst, (OpNode v2bf16:$a, v2bf16:$b))]>,
- Requires<[hasBF16Math, noFMA, doF32FTZ]>;
- def _rnbf16x2rr :
- NVPTXInst<(outs Int32Regs:$dst),
- (ins Int32Regs:$a, Int32Regs:$b),
- !strconcat(OpcStr, ".rn.bf16x2 \t$dst, $a, $b;"),
- [(set v2bf16:$dst, (OpNode v2bf16:$a, v2bf16:$b))]>,
- Requires<[hasBF16Math, noFMA]>;
+multiclass F3_fma_component<string op_str, SDNode op_node> {
+ defm "" : F3<op_str, BinOpAllowsFMA<op_node>>;
+ defm _rn : F3<op_str # ".rn", op_node>;
}
// Template for operations which take two f32 or f64 operands. Provides three
diff --git a/llvm/test/CodeGen/NVPTX/fp-contract.ll b/llvm/test/CodeGen/NVPTX/fp-contract.ll
index 9da9a8691098b..ea5da6ee57f65 100644
--- a/llvm/test/CodeGen/NVPTX/fp-contract.ll
+++ b/llvm/test/CodeGen/NVPTX/fp-contract.ll
@@ -1,6 +1,7 @@
-; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 -fp-contract=fast | FileCheck %s --check-prefix=FAST
-; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_30 | FileCheck %s --check-prefix=DEFAULT
-; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 -fp-contract=fast | %ptxas-verify %}
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_30 -fp-contract=fast | FileCheck %s --check-prefixes=CHECK,FAST
+; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_30 | FileCheck %s --check-prefixes=CHECK,DEFAULT
+; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_30 -fp-contract=fast | %ptxas-verify %}
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_30 | %ptxas-verify %}
target triple = "nvptx64-unknown-cuda"
@@ -11,25 +12,105 @@ target triple = "nvptx64-unknown-cuda"
;; is free to fuse with a multiply if it is able. If fusion is not allowed,
;; we do not form fma.rn at the PTX level and explicitly generate add.rn
;; for all adds to prevent ptxas from fusion the ops.
-
-;; FAST-LABEL: @t0
-;; DEFAULT-LABEL: @t0
define float @t0(float %a, float %b, float %c) {
-;; FAST: fma.rn.f32
-;; DEFAULT: mul.rn.f32
-;; DEFAULT: add.rn.f32
+; FAST-LABEL: t0(
+; FAST: {
+; FAST-NEXT: .reg .f32 %f<5>;
+; FAST-EMPTY:
+; FAST-NEXT: // %bb.0:
+; FAST-NEXT: ld.param.f32 %f1, [t0_param_0];
+; FAST-NEXT: ld.param.f32 %f2, [t0_param_1];
+; FAST-NEXT: ld.param.f32 %f3, [t0_param_2];
+; FAST-NEXT: fma.rn.f32 %f4, %f1, %f2, %f3;
+; FAST-NEXT: st.param.f32 [func_retval0], %f4;
+; FAST-NEXT: ret;
+;
+; DEFAULT-LABEL: t0(
+; DEFAULT: {
+; DEFAULT-NEXT: .reg .f32 %f<6>;
+; DEFAULT-EMPTY:
+; DEFAULT-NEXT: // %bb.0:
+; DEFAULT-NEXT: ld.param.f32 %f1, [t0_param_0];
+; DEFAULT-NEXT: ld.param.f32 %f2, [t0_param_1];
+; DEFAULT-NEXT: mul.rn.f32 %f3, %f1, %f2;
+; DEFAULT-NEXT: ld.param.f32 %f4, [t0_param_2];
+; DEFAULT-NEXT: add.rn.f32 %f5, %f3, %f4;
+; DEFAULT-NEXT: st.param.f32 [func_retval0], %f5;
+; DEFAULT-NEXT: ret;
%v0 = fmul float %a, %b
%v1 = fadd float %v0, %c
ret float %v1
}
-;; FAST-LABEL: @t1
-;; DEFAULT-LABEL: @t1
-define float @t1(float %a, float %b) {
;; We cannot form an fma here, but make sure we explicitly emit add.rn.f32
;; to prevent ptxas from fusing this with anything else.
-;; FAST: add.f32
-;; DEFAULT: add.rn.f32
+define float @t1(float %a, float %b) {
+; FAST-LABEL: t1(
+; FAST: {
+; FAST-NEXT: .reg .f32 %f<6>;
+; FAST-EMPTY:
+; FAST-NEXT: // %bb.0:
+; FAST-NEXT: ld.param.f32 %f1, [t1_param_0];
+; FAST-NEXT: ld.param.f32 %f2, [t1_param_1];
+; FAST-NEXT: add.f32 %f3, %f1, %f2;
+; FAST-NEXT: sub.f32 %f4, %f1, %f2;
+; FAST-NEXT: mul.f32 %f5, %f3, %f4;
+; FAST-NEXT: st.param.f32 [func_retval0], %f5;
+; FAST-NEXT: ret;
+;
+; DEFAULT-LABEL: t1(
+; DEFAULT: {
+; DEFAULT-NEXT: .reg .f32 %f<6>;
+; DEFAULT-EMPTY:
+; DEFAULT-NEXT: // %bb.0:
+; DEFAULT-NEXT: ld.param.f32 %f1, [t1_param_0];
+; DEFAULT-NEXT: ld.param.f32 %f2, [t1_param_1];
+; DEFAULT-NEXT: add.rn.f32 %f3, %f1, %f2;
+; DEFAULT-NEXT: sub.rn.f32 %f4, %f1, %f2;
+; DEFAULT-NEXT: mul.rn.f32 %f5, %f3, %f4;
+; DEFAULT-NEXT: st.param.f32 [func_retval0], %f5;
+; DEFAULT-NEXT: ret;
%v1 = fadd float %a, %b
+ %v2 = fsub float %a, %b
+ %v3 = fmul float %v1, %v2
+ ret float %v3
+}
+
+;; Make sure we generate the non ".rn" version when the "contract" flag is
+;; present on the instructions
+define float @t2(float %a, float %b) {
+; CHECK-LABEL: t2(
+; CHECK: {
+; CHECK-NEXT: .reg .f32 %f<6>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.f32 %f1, [t2_param_0];
+; CHECK-NEXT: ld.param.f32 %f2, [t2_param_1];
+; CHECK-NEXT: add.f32 %f3, %f1, %f2;
+; CHECK-NEXT: sub.f32 %f4, %f1, %f2;
+; CHECK-NEXT: mul.f32 %f5, %f3, %f4;
+; CHECK-NEXT: st.param.f32 [func_retval0], %f5;
+; CHECK-NEXT: ret;
+ %v1 = fadd contract float %a, %b
+ %v2 = fsub contract float %a, %b
+ %v3 = fmul contract float %v1, %v2
+ ret float %v3
+}
+
+;; Make sure we always fold to fma when the "contract" flag is present
+define float @t3(float %a, float %b, float %c) {
+; CHECK-LABEL: t3(
+; CHECK: {
+; CHECK-NEXT: .reg .f32 %f<5>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.f32 %f1, [t3_param_0];
+; CHECK-NEXT: ld.param.f32 %f2, [t3_param_1];
+; CHECK-NEXT: ld.param.f32 %f3, [t3_param_2];
+; CHECK-NEXT: fma.rn.f32 %f4, %f1, %f2, %f3;
+; CHECK-NEXT: st.param.f32 [func_retval0], %f4;
+; CHECK-NEXT: ret;
+ %v0 = fmul contract float %a, %b
+ %v1 = fadd contract float %v0, %c
ret float %v1
}
More information about the llvm-commits
mailing list