[llvm] [WebAssembly] Lower fmuladd to fma (PR #161355)
Sam Parker via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 6 07:08:32 PDT 2025
https://github.com/sparker-arm updated https://github.com/llvm/llvm-project/pull/161355
>From 3ffead4a4c9d2a5f91d90148214da0ede0a3d48f Mon Sep 17 00:00:00 2001
From: Sam Parker <sam.parker at arm.com>
Date: Mon, 6 Oct 2025 15:01:56 +0100
Subject: [PATCH] [WebAssembly] relaxed madd, nmadd from fmuladd.
Introduce an fmuladd ISD node as an equivalent to the LLVM intrinsic
and lower this to madd and nmadd, when we have relaxed-simd.
---
llvm/include/llvm/CodeGen/ISDOpcodes.h | 3 +
.../include/llvm/Target/TargetSelectionDAG.td | 1 +
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 17 +
.../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 6 +-
.../SelectionDAG/SelectionDAGBuilder.cpp | 6 +
.../SelectionDAG/SelectionDAGDumper.cpp | 1 +
.../CodeGen/SelectionDAG/TargetLowering.cpp | 1 +
llvm/lib/CodeGen/TargetLoweringBase.cpp | 3 +-
.../WebAssembly/WebAssemblyISelLowering.cpp | 8 +
.../WebAssembly/WebAssemblyInstrSIMD.td | 14 +-
.../CodeGen/WebAssembly/simd-relaxed-fma.ll | 1240 ++++++++++++++++-
.../CodeGen/WebAssembly/simd-relaxed-fnma.ll | 63 +-
12 files changed, 1325 insertions(+), 38 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h
index c76c83d84b3c7..e963ec817a91f 100644
--- a/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -514,6 +514,9 @@ enum NodeType {
/// separately rounded operations.
FMAD,
+ /// FMULADD - Performs a * b + c, with, or without, intermediate rounding.
+ FMULADD,
+
/// FCOPYSIGN(X, Y) - Return the value of X with the sign of Y. NOTE: This
/// DAG node does not require that X and Y have the same type, just that
/// they are both floating point. X and the result must have the same type.
diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td
index 5e57dcaa303f3..be01675c694de 100644
--- a/llvm/include/llvm/Target/TargetSelectionDAG.td
+++ b/llvm/include/llvm/Target/TargetSelectionDAG.td
@@ -530,6 +530,7 @@ def fdiv : SDNode<"ISD::FDIV" , SDTFPBinOp>;
def frem : SDNode<"ISD::FREM" , SDTFPBinOp>;
def fma : SDNode<"ISD::FMA" , SDTFPTernaryOp, [SDNPCommutative]>;
def fmad : SDNode<"ISD::FMAD" , SDTFPTernaryOp, [SDNPCommutative]>;
+def fmuladd : SDNode<"ISD::FMULADD" , SDTFPTernaryOp, [SDNPCommutative]>;
def fabs : SDNode<"ISD::FABS" , SDTFPUnaryOp>;
def fminnum : SDNode<"ISD::FMINNUM" , SDTFPBinOp,
[SDNPCommutative, SDNPAssociative]>;
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 204e1f0c75e00..f3b3ada3aacce 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -509,6 +509,7 @@ namespace {
SDValue visitFMUL(SDNode *N);
template <class MatchContextClass> SDValue visitFMA(SDNode *N);
SDValue visitFMAD(SDNode *N);
+ SDValue visitFMULADD(SDNode *N);
SDValue visitFDIV(SDNode *N);
SDValue visitFREM(SDNode *N);
SDValue visitFSQRT(SDNode *N);
@@ -1991,6 +1992,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::FMUL: return visitFMUL(N);
case ISD::FMA: return visitFMA<EmptyMatchContext>(N);
case ISD::FMAD: return visitFMAD(N);
+ case ISD::FMULADD: return visitFMULADD(N);
case ISD::FDIV: return visitFDIV(N);
case ISD::FREM: return visitFREM(N);
case ISD::FSQRT: return visitFSQRT(N);
@@ -18431,6 +18433,21 @@ SDValue DAGCombiner::visitFMAD(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitFMULADD(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+
+ // Constant fold FMULADD.
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::FMULADD, DL, VT, {N0, N1, N2}))
+ return C;
+
+ return SDValue();
+}
+
+
// Combine multiple FDIVs with the same divisor into multiple FMULs by the
// reciprocal.
// E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 8fc7eabf90ea8..2e2c3099d474f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5785,6 +5785,7 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
case ISD::FCOPYSIGN:
case ISD::FMA:
case ISD::FMAD:
+ case ISD::FMULADD:
case ISD::FP_EXTEND:
case ISD::FP_TO_SINT_SAT:
case ISD::FP_TO_UINT_SAT:
@@ -5903,6 +5904,7 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, const APInt &DemandedElts,
case ISD::FCOSH:
case ISD::FTANH:
case ISD::FMA:
+ case ISD::FMULADD:
case ISD::FMAD: {
if (SNaN)
return true;
@@ -7230,7 +7232,7 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
}
// Handle fma/fmad special cases.
- if (Opcode == ISD::FMA || Opcode == ISD::FMAD) {
+ if (Opcode == ISD::FMA || Opcode == ISD::FMAD || Opcode == ISD::FMULADD) {
assert(VT.isFloatingPoint() && "This operator only applies to FP types!");
assert(Ops[0].getValueType() == VT && Ops[1].getValueType() == VT &&
Ops[2].getValueType() == VT && "FMA types must match!");
@@ -7241,7 +7243,7 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
APFloat V1 = C1->getValueAPF();
const APFloat &V2 = C2->getValueAPF();
const APFloat &V3 = C3->getValueAPF();
- if (Opcode == ISD::FMAD) {
+ if (Opcode == ISD::FMAD || Opcode == ISD::FMULADD) {
V1.multiply(V2, APFloat::rmNearestTiesToEven);
V1.add(V3, APFloat::rmNearestTiesToEven);
} else
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index b5201a311c591..3ae233828d21c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -6996,6 +6996,12 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
getValue(I.getArgOperand(0)),
getValue(I.getArgOperand(1)),
getValue(I.getArgOperand(2)), Flags));
+ } else if (TLI.isOperationLegalOrCustom(ISD::FMULADD, VT)) {
+ setValue(&I, DAG.getNode(ISD::FMULADD, sdl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1)),
+ getValue(I.getArgOperand(2)), Flags));
} else {
// TODO: Intrinsic calls should have fast-math-flags.
SDValue Mul = DAG.getNode(
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 4b2a00c2e2cfa..561c585ea449d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -310,6 +310,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::FMA: return "fma";
case ISD::STRICT_FMA: return "strict_fma";
case ISD::FMAD: return "fmad";
+ case ISD::FMULADD: return "fmuladd";
case ISD::FREM: return "frem";
case ISD::STRICT_FREM: return "strict_frem";
case ISD::FCOPYSIGN: return "fcopysign";
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index cc503d324e74b..920dff935daed 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -7676,6 +7676,7 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
break;
}
case ISD::FMA:
+ case ISD::FMULADD:
case ISD::FMAD: {
if (!Flags.hasNoSignedZeros())
break;
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index c23281a820b2b..060b1ddc2ef39 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -815,7 +815,8 @@ void TargetLoweringBase::initActions() {
ISD::FTAN, ISD::FACOS,
ISD::FASIN, ISD::FATAN,
ISD::FCOSH, ISD::FSINH,
- ISD::FTANH, ISD::FATAN2},
+ ISD::FTANH, ISD::FATAN2,
+ ISD::FMULADD},
VT, Expand);
// Overflow operations default to expand
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 163bf9ba5b089..05af6fa002d86 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -317,6 +317,14 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, T, Custom);
}
+ if (Subtarget->hasRelaxedSIMD()) {
+ if (Subtarget->hasFP16()) {
+ setOperationAction(ISD::FMULADD, MVT::v8f16, Legal);
+ }
+ setOperationAction(ISD::FMULADD, MVT::v4f32, Legal);
+ setOperationAction(ISD::FMULADD, MVT::v2f64, Legal);
+ }
+
// Partial MLA reductions.
for (auto Op : {ISD::PARTIAL_REDUCE_SMLA, ISD::PARTIAL_REDUCE_UMLA}) {
setPartialReduceMLAAction(Op, MVT::v4i32, MVT::v16i8, Legal);
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
index 130602650d34e..6633d97605a9a 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -1597,16 +1597,20 @@ multiclass SIMDMADD<Vec vec, bits<32> simdopA, bits<32> simdopS, list<Predicate>
vec.prefix#".relaxed_nmadd\t$dst, $a, $b, $c",
vec.prefix#".relaxed_nmadd", simdopS, reqs>;
- def : Pat<(fadd_contract (vec.vt V128:$a), (fmul_contract (vec.vt V128:$b), (vec.vt V128:$c))),
- (!cast<Instruction>("MADD_"#vec) V128:$a, V128:$b, V128:$c)>, Requires<[HasRelaxedSIMD]>;
+ def : Pat<(fadd_contract (fmul_contract (vec.vt V128:$a), (vec.vt V128:$b)), (vec.vt V128:$c)),
+ (!cast<Instruction>("MADD_"#vec) V128:$a, V128:$b, V128:$c)>, Requires<reqs>;
+ def : Pat<(fmuladd (vec.vt V128:$a), (vec.vt V128:$b), (vec.vt V128:$c)),
+ (!cast<Instruction>("MADD_"#vec) V128:$a, V128:$b, V128:$c)>, Requires<reqs>;
- def : Pat<(fsub_contract (vec.vt V128:$a), (fmul_contract (vec.vt V128:$b), (vec.vt V128:$c))),
- (!cast<Instruction>("NMADD_"#vec) V128:$a, V128:$b, V128:$c)>, Requires<[HasRelaxedSIMD]>;
+ def : Pat<(fsub_contract (vec.vt V128:$c), (fmul_contract (vec.vt V128:$a), (vec.vt V128:$b))),
+ (!cast<Instruction>("NMADD_"#vec) V128:$a, V128:$b, V128:$c)>, Requires<reqs>;
+ def : Pat<(fmuladd (fneg (vec.vt V128:$a)), (vec.vt V128:$b), (vec.vt V128:$c)),
+ (!cast<Instruction>("NMADD_"#vec) V128:$a, V128:$b, V128:$c)>, Requires<reqs>;
}
defm "" : SIMDMADD<F32x4, 0x105, 0x106, [HasRelaxedSIMD]>;
defm "" : SIMDMADD<F64x2, 0x107, 0x108, [HasRelaxedSIMD]>;
-defm "" : SIMDMADD<F16x8, 0x14e, 0x14f, [HasFP16]>;
+defm "" : SIMDMADD<F16x8, 0x14e, 0x14f, [HasRelaxedSIMD, HasFP16]>;
//===----------------------------------------------------------------------===//
// Laneselect
diff --git a/llvm/test/CodeGen/WebAssembly/simd-relaxed-fma.ll b/llvm/test/CodeGen/WebAssembly/simd-relaxed-fma.ll
index e065de38951b1..6bae35c3eb011 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-relaxed-fma.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-relaxed-fma.ll
@@ -2,9 +2,278 @@
; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+fp16,+simd128,+relaxed-simd | FileCheck %s --check-prefix=RELAXED
; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+fp16,+simd128, | FileCheck %s --check-prefix=STRICT
+; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 | FileCheck %s --check-prefix=NOFP16
+; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers | FileCheck %s --check-prefix=NOSIMD
target triple = "wasm32"
+define half @fadd_fmul_contract_f16(half %a, half %b, half %c) {
+; RELAXED-LABEL: fadd_fmul_contract_f16:
+; RELAXED: .functype fadd_fmul_contract_f16 (f32, f32, f32) -> (f32)
+; RELAXED-NEXT: # %bb.0:
+; RELAXED-NEXT: call $push0=, __truncsfhf2, $0
+; RELAXED-NEXT: call $push1=, __extendhfsf2, $pop0
+; RELAXED-NEXT: call $push2=, __truncsfhf2, $1
+; RELAXED-NEXT: call $push3=, __extendhfsf2, $pop2
+; RELAXED-NEXT: f32.mul $push4=, $pop1, $pop3
+; RELAXED-NEXT: call $push5=, __truncsfhf2, $2
+; RELAXED-NEXT: call $push6=, __extendhfsf2, $pop5
+; RELAXED-NEXT: f32.add $push7=, $pop4, $pop6
+; RELAXED-NEXT: return $pop7
+;
+; STRICT-LABEL: fadd_fmul_contract_f16:
+; STRICT: .functype fadd_fmul_contract_f16 (f32, f32, f32) -> (f32)
+; STRICT-NEXT: # %bb.0:
+; STRICT-NEXT: call $push0=, __truncsfhf2, $0
+; STRICT-NEXT: call $push1=, __extendhfsf2, $pop0
+; STRICT-NEXT: call $push2=, __truncsfhf2, $1
+; STRICT-NEXT: call $push3=, __extendhfsf2, $pop2
+; STRICT-NEXT: f32.mul $push4=, $pop1, $pop3
+; STRICT-NEXT: call $push5=, __truncsfhf2, $2
+; STRICT-NEXT: call $push6=, __extendhfsf2, $pop5
+; STRICT-NEXT: f32.add $push7=, $pop4, $pop6
+; STRICT-NEXT: return $pop7
+;
+; NOFP16-LABEL: fadd_fmul_contract_f16:
+; NOFP16: .functype fadd_fmul_contract_f16 (f32, f32, f32) -> (f32)
+; NOFP16-NEXT: # %bb.0:
+; NOFP16-NEXT: call $push0=, __truncsfhf2, $0
+; NOFP16-NEXT: call $push1=, __extendhfsf2, $pop0
+; NOFP16-NEXT: call $push2=, __truncsfhf2, $1
+; NOFP16-NEXT: call $push3=, __extendhfsf2, $pop2
+; NOFP16-NEXT: f32.mul $push4=, $pop1, $pop3
+; NOFP16-NEXT: call $push5=, __truncsfhf2, $2
+; NOFP16-NEXT: call $push6=, __extendhfsf2, $pop5
+; NOFP16-NEXT: f32.add $push7=, $pop4, $pop6
+; NOFP16-NEXT: return $pop7
+;
+; NOSIMD-LABEL: fadd_fmul_contract_f16:
+; NOSIMD: .functype fadd_fmul_contract_f16 (f32, f32, f32) -> (f32)
+; NOSIMD-NEXT: # %bb.0:
+; NOSIMD-NEXT: call $push0=, __truncsfhf2, $0
+; NOSIMD-NEXT: call $push1=, __extendhfsf2, $pop0
+; NOSIMD-NEXT: call $push2=, __truncsfhf2, $1
+; NOSIMD-NEXT: call $push3=, __extendhfsf2, $pop2
+; NOSIMD-NEXT: f32.mul $push4=, $pop1, $pop3
+; NOSIMD-NEXT: call $push5=, __truncsfhf2, $2
+; NOSIMD-NEXT: call $push6=, __extendhfsf2, $pop5
+; NOSIMD-NEXT: f32.add $push7=, $pop4, $pop6
+; NOSIMD-NEXT: return $pop7
+ %mul = fmul contract half %b, %a
+ %add = fadd contract half %mul, %c
+ ret half %add
+}
+
+define half @fmuladd_contract_f16(half %a, half %b, half %c) {
+; RELAXED-LABEL: fmuladd_contract_f16:
+; RELAXED: .functype fmuladd_contract_f16 (f32, f32, f32) -> (f32)
+; RELAXED-NEXT: # %bb.0:
+; RELAXED-NEXT: call $push0=, __truncsfhf2, $1
+; RELAXED-NEXT: call $push1=, __extendhfsf2, $pop0
+; RELAXED-NEXT: call $push2=, __truncsfhf2, $0
+; RELAXED-NEXT: call $push3=, __extendhfsf2, $pop2
+; RELAXED-NEXT: f32.mul $push4=, $pop1, $pop3
+; RELAXED-NEXT: call $push5=, __truncsfhf2, $2
+; RELAXED-NEXT: call $push6=, __extendhfsf2, $pop5
+; RELAXED-NEXT: f32.add $push7=, $pop4, $pop6
+; RELAXED-NEXT: return $pop7
+;
+; STRICT-LABEL: fmuladd_contract_f16:
+; STRICT: .functype fmuladd_contract_f16 (f32, f32, f32) -> (f32)
+; STRICT-NEXT: # %bb.0:
+; STRICT-NEXT: call $push0=, __truncsfhf2, $1
+; STRICT-NEXT: call $push1=, __extendhfsf2, $pop0
+; STRICT-NEXT: call $push2=, __truncsfhf2, $0
+; STRICT-NEXT: call $push3=, __extendhfsf2, $pop2
+; STRICT-NEXT: f32.mul $push4=, $pop1, $pop3
+; STRICT-NEXT: call $push5=, __truncsfhf2, $2
+; STRICT-NEXT: call $push6=, __extendhfsf2, $pop5
+; STRICT-NEXT: f32.add $push7=, $pop4, $pop6
+; STRICT-NEXT: return $pop7
+;
+; NOFP16-LABEL: fmuladd_contract_f16:
+; NOFP16: .functype fmuladd_contract_f16 (f32, f32, f32) -> (f32)
+; NOFP16-NEXT: # %bb.0:
+; NOFP16-NEXT: call $push0=, __truncsfhf2, $1
+; NOFP16-NEXT: call $push1=, __extendhfsf2, $pop0
+; NOFP16-NEXT: call $push2=, __truncsfhf2, $0
+; NOFP16-NEXT: call $push3=, __extendhfsf2, $pop2
+; NOFP16-NEXT: f32.mul $push4=, $pop1, $pop3
+; NOFP16-NEXT: call $push5=, __truncsfhf2, $2
+; NOFP16-NEXT: call $push6=, __extendhfsf2, $pop5
+; NOFP16-NEXT: f32.add $push7=, $pop4, $pop6
+; NOFP16-NEXT: return $pop7
+;
+; NOSIMD-LABEL: fmuladd_contract_f16:
+; NOSIMD: .functype fmuladd_contract_f16 (f32, f32, f32) -> (f32)
+; NOSIMD-NEXT: # %bb.0:
+; NOSIMD-NEXT: call $push0=, __truncsfhf2, $1
+; NOSIMD-NEXT: call $push1=, __extendhfsf2, $pop0
+; NOSIMD-NEXT: call $push2=, __truncsfhf2, $0
+; NOSIMD-NEXT: call $push3=, __extendhfsf2, $pop2
+; NOSIMD-NEXT: f32.mul $push4=, $pop1, $pop3
+; NOSIMD-NEXT: call $push5=, __truncsfhf2, $2
+; NOSIMD-NEXT: call $push6=, __extendhfsf2, $pop5
+; NOSIMD-NEXT: f32.add $push7=, $pop4, $pop6
+; NOSIMD-NEXT: return $pop7
+ %fma = call contract half @llvm.fmuladd(half %a, half %b, half %c)
+ ret half %fma
+}
+
+define half @fmuladd_f16(half %a, half %b, half %c) {
+; RELAXED-LABEL: fmuladd_f16:
+; RELAXED: .functype fmuladd_f16 (f32, f32, f32) -> (f32)
+; RELAXED-NEXT: # %bb.0:
+; RELAXED-NEXT: call $push0=, __truncsfhf2, $1
+; RELAXED-NEXT: call $push1=, __extendhfsf2, $pop0
+; RELAXED-NEXT: call $push2=, __truncsfhf2, $0
+; RELAXED-NEXT: call $push3=, __extendhfsf2, $pop2
+; RELAXED-NEXT: f32.mul $push4=, $pop1, $pop3
+; RELAXED-NEXT: call $push5=, __truncsfhf2, $2
+; RELAXED-NEXT: call $push6=, __extendhfsf2, $pop5
+; RELAXED-NEXT: f32.add $push7=, $pop4, $pop6
+; RELAXED-NEXT: return $pop7
+;
+; STRICT-LABEL: fmuladd_f16:
+; STRICT: .functype fmuladd_f16 (f32, f32, f32) -> (f32)
+; STRICT-NEXT: # %bb.0:
+; STRICT-NEXT: call $push0=, __truncsfhf2, $1
+; STRICT-NEXT: call $push1=, __extendhfsf2, $pop0
+; STRICT-NEXT: call $push2=, __truncsfhf2, $0
+; STRICT-NEXT: call $push3=, __extendhfsf2, $pop2
+; STRICT-NEXT: f32.mul $push4=, $pop1, $pop3
+; STRICT-NEXT: call $push5=, __truncsfhf2, $2
+; STRICT-NEXT: call $push6=, __extendhfsf2, $pop5
+; STRICT-NEXT: f32.add $push7=, $pop4, $pop6
+; STRICT-NEXT: return $pop7
+;
+; NOFP16-LABEL: fmuladd_f16:
+; NOFP16: .functype fmuladd_f16 (f32, f32, f32) -> (f32)
+; NOFP16-NEXT: # %bb.0:
+; NOFP16-NEXT: call $push0=, __truncsfhf2, $1
+; NOFP16-NEXT: call $push1=, __extendhfsf2, $pop0
+; NOFP16-NEXT: call $push2=, __truncsfhf2, $0
+; NOFP16-NEXT: call $push3=, __extendhfsf2, $pop2
+; NOFP16-NEXT: f32.mul $push4=, $pop1, $pop3
+; NOFP16-NEXT: call $push5=, __truncsfhf2, $2
+; NOFP16-NEXT: call $push6=, __extendhfsf2, $pop5
+; NOFP16-NEXT: f32.add $push7=, $pop4, $pop6
+; NOFP16-NEXT: return $pop7
+;
+; NOSIMD-LABEL: fmuladd_f16:
+; NOSIMD: .functype fmuladd_f16 (f32, f32, f32) -> (f32)
+; NOSIMD-NEXT: # %bb.0:
+; NOSIMD-NEXT: call $push0=, __truncsfhf2, $1
+; NOSIMD-NEXT: call $push1=, __extendhfsf2, $pop0
+; NOSIMD-NEXT: call $push2=, __truncsfhf2, $0
+; NOSIMD-NEXT: call $push3=, __extendhfsf2, $pop2
+; NOSIMD-NEXT: f32.mul $push4=, $pop1, $pop3
+; NOSIMD-NEXT: call $push5=, __truncsfhf2, $2
+; NOSIMD-NEXT: call $push6=, __extendhfsf2, $pop5
+; NOSIMD-NEXT: f32.add $push7=, $pop4, $pop6
+; NOSIMD-NEXT: return $pop7
+ %fma = call half @llvm.fmuladd(half %a, half %b, half %c)
+ ret half %fma
+}
+
+
+define float @fadd_fmul_contract_f32(float %a, float %b, float %c) {
+; RELAXED-LABEL: fadd_fmul_contract_f32:
+; RELAXED: .functype fadd_fmul_contract_f32 (f32, f32, f32) -> (f32)
+; RELAXED-NEXT: # %bb.0:
+; RELAXED-NEXT: f32.mul $push0=, $1, $0
+; RELAXED-NEXT: f32.add $push1=, $pop0, $2
+; RELAXED-NEXT: return $pop1
+;
+; STRICT-LABEL: fadd_fmul_contract_f32:
+; STRICT: .functype fadd_fmul_contract_f32 (f32, f32, f32) -> (f32)
+; STRICT-NEXT: # %bb.0:
+; STRICT-NEXT: f32.mul $push0=, $1, $0
+; STRICT-NEXT: f32.add $push1=, $pop0, $2
+; STRICT-NEXT: return $pop1
+;
+; NOFP16-LABEL: fadd_fmul_contract_f32:
+; NOFP16: .functype fadd_fmul_contract_f32 (f32, f32, f32) -> (f32)
+; NOFP16-NEXT: # %bb.0:
+; NOFP16-NEXT: f32.mul $push0=, $1, $0
+; NOFP16-NEXT: f32.add $push1=, $pop0, $2
+; NOFP16-NEXT: return $pop1
+;
+; NOSIMD-LABEL: fadd_fmul_contract_f32:
+; NOSIMD: .functype fadd_fmul_contract_f32 (f32, f32, f32) -> (f32)
+; NOSIMD-NEXT: # %bb.0:
+; NOSIMD-NEXT: f32.mul $push0=, $1, $0
+; NOSIMD-NEXT: f32.add $push1=, $pop0, $2
+; NOSIMD-NEXT: return $pop1
+ %mul = fmul contract float %b, %a
+ %add = fadd contract float %mul, %c
+ ret float %add
+}
+
+define float @fmuladd_contract_f32(float %a, float %b, float %c) {
+; RELAXED-LABEL: fmuladd_contract_f32:
+; RELAXED: .functype fmuladd_contract_f32 (f32, f32, f32) -> (f32)
+; RELAXED-NEXT: # %bb.0:
+; RELAXED-NEXT: f32.mul $push0=, $0, $1
+; RELAXED-NEXT: f32.add $push1=, $pop0, $2
+; RELAXED-NEXT: return $pop1
+;
+; STRICT-LABEL: fmuladd_contract_f32:
+; STRICT: .functype fmuladd_contract_f32 (f32, f32, f32) -> (f32)
+; STRICT-NEXT: # %bb.0:
+; STRICT-NEXT: f32.mul $push0=, $0, $1
+; STRICT-NEXT: f32.add $push1=, $pop0, $2
+; STRICT-NEXT: return $pop1
+;
+; NOFP16-LABEL: fmuladd_contract_f32:
+; NOFP16: .functype fmuladd_contract_f32 (f32, f32, f32) -> (f32)
+; NOFP16-NEXT: # %bb.0:
+; NOFP16-NEXT: f32.mul $push0=, $0, $1
+; NOFP16-NEXT: f32.add $push1=, $pop0, $2
+; NOFP16-NEXT: return $pop1
+;
+; NOSIMD-LABEL: fmuladd_contract_f32:
+; NOSIMD: .functype fmuladd_contract_f32 (f32, f32, f32) -> (f32)
+; NOSIMD-NEXT: # %bb.0:
+; NOSIMD-NEXT: f32.mul $push0=, $0, $1
+; NOSIMD-NEXT: f32.add $push1=, $pop0, $2
+; NOSIMD-NEXT: return $pop1
+ %fma = call contract float @llvm.fmuladd(float %a, float %b, float %c)
+ ret float %fma
+}
+
+define float @fmuladd_f32(float %a, float %b, float %c) {
+; RELAXED-LABEL: fmuladd_f32:
+; RELAXED: .functype fmuladd_f32 (f32, f32, f32) -> (f32)
+; RELAXED-NEXT: # %bb.0:
+; RELAXED-NEXT: f32.mul $push0=, $0, $1
+; RELAXED-NEXT: f32.add $push1=, $pop0, $2
+; RELAXED-NEXT: return $pop1
+;
+; STRICT-LABEL: fmuladd_f32:
+; STRICT: .functype fmuladd_f32 (f32, f32, f32) -> (f32)
+; STRICT-NEXT: # %bb.0:
+; STRICT-NEXT: f32.mul $push0=, $0, $1
+; STRICT-NEXT: f32.add $push1=, $pop0, $2
+; STRICT-NEXT: return $pop1
+;
+; NOFP16-LABEL: fmuladd_f32:
+; NOFP16: .functype fmuladd_f32 (f32, f32, f32) -> (f32)
+; NOFP16-NEXT: # %bb.0:
+; NOFP16-NEXT: f32.mul $push0=, $0, $1
+; NOFP16-NEXT: f32.add $push1=, $pop0, $2
+; NOFP16-NEXT: return $pop1
+;
+; NOSIMD-LABEL: fmuladd_f32:
+; NOSIMD: .functype fmuladd_f32 (f32, f32, f32) -> (f32)
+; NOSIMD-NEXT: # %bb.0:
+; NOSIMD-NEXT: f32.mul $push0=, $0, $1
+; NOSIMD-NEXT: f32.add $push1=, $pop0, $2
+; NOSIMD-NEXT: return $pop1
+ %fma = call float @llvm.fmuladd(float %a, float %b, float %c)
+ ret float %fma
+}
+
define double @fadd_fmul_contract_f64(double %a, double %b, double %c) {
; RELAXED-LABEL: fadd_fmul_contract_f64:
; RELAXED: .functype fadd_fmul_contract_f64 (f64, f64, f64) -> (f64)
@@ -19,16 +288,94 @@ define double @fadd_fmul_contract_f64(double %a, double %b, double %c) {
; STRICT-NEXT: f64.mul $push0=, $1, $0
; STRICT-NEXT: f64.add $push1=, $pop0, $2
; STRICT-NEXT: return $pop1
+;
+; NOFP16-LABEL: fadd_fmul_contract_f64:
+; NOFP16: .functype fadd_fmul_contract_f64 (f64, f64, f64) -> (f64)
+; NOFP16-NEXT: # %bb.0:
+; NOFP16-NEXT: f64.mul $push0=, $1, $0
+; NOFP16-NEXT: f64.add $push1=, $pop0, $2
+; NOFP16-NEXT: return $pop1
+;
+; NOSIMD-LABEL: fadd_fmul_contract_f64:
+; NOSIMD: .functype fadd_fmul_contract_f64 (f64, f64, f64) -> (f64)
+; NOSIMD-NEXT: # %bb.0:
+; NOSIMD-NEXT: f64.mul $push0=, $1, $0
+; NOSIMD-NEXT: f64.add $push1=, $pop0, $2
+; NOSIMD-NEXT: return $pop1
%mul = fmul contract double %b, %a
%add = fadd contract double %mul, %c
ret double %add
}
+define double @fmuladd_f64(double %a, double %b, double %c) {
+; RELAXED-LABEL: fmuladd_f64:
+; RELAXED: .functype fmuladd_f64 (f64, f64, f64) -> (f64)
+; RELAXED-NEXT: # %bb.0:
+; RELAXED-NEXT: f64.mul $push0=, $0, $1
+; RELAXED-NEXT: f64.add $push1=, $pop0, $2
+; RELAXED-NEXT: return $pop1
+;
+; STRICT-LABEL: fmuladd_f64:
+; STRICT: .functype fmuladd_f64 (f64, f64, f64) -> (f64)
+; STRICT-NEXT: # %bb.0:
+; STRICT-NEXT: f64.mul $push0=, $0, $1
+; STRICT-NEXT: f64.add $push1=, $pop0, $2
+; STRICT-NEXT: return $pop1
+;
+; NOFP16-LABEL: fmuladd_f64:
+; NOFP16: .functype fmuladd_f64 (f64, f64, f64) -> (f64)
+; NOFP16-NEXT: # %bb.0:
+; NOFP16-NEXT: f64.mul $push0=, $0, $1
+; NOFP16-NEXT: f64.add $push1=, $pop0, $2
+; NOFP16-NEXT: return $pop1
+;
+; NOSIMD-LABEL: fmuladd_f64:
+; NOSIMD: .functype fmuladd_f64 (f64, f64, f64) -> (f64)
+; NOSIMD-NEXT: # %bb.0:
+; NOSIMD-NEXT: f64.mul $push0=, $0, $1
+; NOSIMD-NEXT: f64.add $push1=, $pop0, $2
+; NOSIMD-NEXT: return $pop1
+ %fma = call double @llvm.fmuladd(double %a, double %b, double %c)
+ ret double %fma
+}
+
+define double @fmuladd_contract_f64(double %a, double %b, double %c) {
+; RELAXED-LABEL: fmuladd_contract_f64:
+; RELAXED: .functype fmuladd_contract_f64 (f64, f64, f64) -> (f64)
+; RELAXED-NEXT: # %bb.0:
+; RELAXED-NEXT: f64.mul $push0=, $0, $1
+; RELAXED-NEXT: f64.add $push1=, $pop0, $2
+; RELAXED-NEXT: return $pop1
+;
+; STRICT-LABEL: fmuladd_contract_f64:
+; STRICT: .functype fmuladd_contract_f64 (f64, f64, f64) -> (f64)
+; STRICT-NEXT: # %bb.0:
+; STRICT-NEXT: f64.mul $push0=, $0, $1
+; STRICT-NEXT: f64.add $push1=, $pop0, $2
+; STRICT-NEXT: return $pop1
+;
+; NOFP16-LABEL: fmuladd_contract_f64:
+; NOFP16: .functype fmuladd_contract_f64 (f64, f64, f64) -> (f64)
+; NOFP16-NEXT: # %bb.0:
+; NOFP16-NEXT: f64.mul $push0=, $0, $1
+; NOFP16-NEXT: f64.add $push1=, $pop0, $2
+; NOFP16-NEXT: return $pop1
+;
+; NOSIMD-LABEL: fmuladd_contract_f64:
+; NOSIMD: .functype fmuladd_contract_f64 (f64, f64, f64) -> (f64)
+; NOSIMD-NEXT: # %bb.0:
+; NOSIMD-NEXT: f64.mul $push0=, $0, $1
+; NOSIMD-NEXT: f64.add $push1=, $pop0, $2
+; NOSIMD-NEXT: return $pop1
+ %fma = call contract double @llvm.fmuladd(double %a, double %b, double %c)
+ ret double %fma
+}
+
define <4 x float> @fadd_fmul_contract_4xf32(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
; RELAXED-LABEL: fadd_fmul_contract_4xf32:
; RELAXED: .functype fadd_fmul_contract_4xf32 (v128, v128, v128) -> (v128)
; RELAXED-NEXT: # %bb.0:
-; RELAXED-NEXT: f32x4.relaxed_madd $push0=, $2, $1, $0
+; RELAXED-NEXT: f32x4.relaxed_madd $push0=, $1, $0, $2
; RELAXED-NEXT: return $pop0
;
; STRICT-LABEL: fadd_fmul_contract_4xf32:
@@ -37,17 +384,40 @@ define <4 x float> @fadd_fmul_contract_4xf32(<4 x float> %a, <4 x float> %b, <4
; STRICT-NEXT: f32x4.mul $push0=, $1, $0
; STRICT-NEXT: f32x4.add $push1=, $pop0, $2
; STRICT-NEXT: return $pop1
+;
+; NOFP16-LABEL: fadd_fmul_contract_4xf32:
+; NOFP16: .functype fadd_fmul_contract_4xf32 (v128, v128, v128) -> (v128)
+; NOFP16-NEXT: # %bb.0:
+; NOFP16-NEXT: f32x4.mul $push0=, $1, $0
+; NOFP16-NEXT: f32x4.add $push1=, $pop0, $2
+; NOFP16-NEXT: return $pop1
+;
+; NOSIMD-LABEL: fadd_fmul_contract_4xf32:
+; NOSIMD: .functype fadd_fmul_contract_4xf32 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
+; NOSIMD-NEXT: # %bb.0:
+; NOSIMD-NEXT: f32.mul $push0=, $8, $4
+; NOSIMD-NEXT: f32.add $push1=, $pop0, $12
+; NOSIMD-NEXT: f32.store 12($0), $pop1
+; NOSIMD-NEXT: f32.mul $push2=, $7, $3
+; NOSIMD-NEXT: f32.add $push3=, $pop2, $11
+; NOSIMD-NEXT: f32.store 8($0), $pop3
+; NOSIMD-NEXT: f32.mul $push4=, $6, $2
+; NOSIMD-NEXT: f32.add $push5=, $pop4, $10
+; NOSIMD-NEXT: f32.store 4($0), $pop5
+; NOSIMD-NEXT: f32.mul $push6=, $5, $1
+; NOSIMD-NEXT: f32.add $push7=, $pop6, $9
+; NOSIMD-NEXT: f32.store 0($0), $pop7
+; NOSIMD-NEXT: return
%mul = fmul contract <4 x float> %b, %a
%add = fadd contract <4 x float> %mul, %c
ret <4 x float> %add
}
-
define <8 x half> @fadd_fmul_contract_8xf16(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
; RELAXED-LABEL: fadd_fmul_contract_8xf16:
; RELAXED: .functype fadd_fmul_contract_8xf16 (v128, v128, v128) -> (v128)
; RELAXED-NEXT: # %bb.0:
-; RELAXED-NEXT: f16x8.relaxed_madd $push0=, $2, $1, $0
+; RELAXED-NEXT: f16x8.relaxed_madd $push0=, $1, $0, $2
; RELAXED-NEXT: return $pop0
;
; STRICT-LABEL: fadd_fmul_contract_8xf16:
@@ -56,12 +426,181 @@ define <8 x half> @fadd_fmul_contract_8xf16(<8 x half> %a, <8 x half> %b, <8 x h
; STRICT-NEXT: f16x8.mul $push0=, $1, $0
; STRICT-NEXT: f16x8.add $push1=, $pop0, $2
; STRICT-NEXT: return $pop1
+;
+; NOFP16-LABEL: fadd_fmul_contract_8xf16:
+; NOFP16: .functype fadd_fmul_contract_8xf16 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
+; NOFP16-NEXT: # %bb.0:
+; NOFP16-NEXT: call $push0=, __truncsfhf2, $8
+; NOFP16-NEXT: call $push1=, __extendhfsf2, $pop0
+; NOFP16-NEXT: call $push2=, __truncsfhf2, $16
+; NOFP16-NEXT: call $push3=, __extendhfsf2, $pop2
+; NOFP16-NEXT: f32.mul $push4=, $pop1, $pop3
+; NOFP16-NEXT: call $push5=, __truncsfhf2, $24
+; NOFP16-NEXT: call $push6=, __extendhfsf2, $pop5
+; NOFP16-NEXT: f32.add $push7=, $pop4, $pop6
+; NOFP16-NEXT: call $push8=, __truncsfhf2, $pop7
+; NOFP16-NEXT: i32.store16 14($0), $pop8
+; NOFP16-NEXT: call $push9=, __truncsfhf2, $7
+; NOFP16-NEXT: call $push10=, __extendhfsf2, $pop9
+; NOFP16-NEXT: call $push11=, __truncsfhf2, $15
+; NOFP16-NEXT: call $push12=, __extendhfsf2, $pop11
+; NOFP16-NEXT: f32.mul $push13=, $pop10, $pop12
+; NOFP16-NEXT: call $push14=, __truncsfhf2, $23
+; NOFP16-NEXT: call $push15=, __extendhfsf2, $pop14
+; NOFP16-NEXT: f32.add $push16=, $pop13, $pop15
+; NOFP16-NEXT: call $push17=, __truncsfhf2, $pop16
+; NOFP16-NEXT: i32.store16 12($0), $pop17
+; NOFP16-NEXT: call $push18=, __truncsfhf2, $6
+; NOFP16-NEXT: call $push19=, __extendhfsf2, $pop18
+; NOFP16-NEXT: call $push20=, __truncsfhf2, $14
+; NOFP16-NEXT: call $push21=, __extendhfsf2, $pop20
+; NOFP16-NEXT: f32.mul $push22=, $pop19, $pop21
+; NOFP16-NEXT: call $push23=, __truncsfhf2, $22
+; NOFP16-NEXT: call $push24=, __extendhfsf2, $pop23
+; NOFP16-NEXT: f32.add $push25=, $pop22, $pop24
+; NOFP16-NEXT: call $push26=, __truncsfhf2, $pop25
+; NOFP16-NEXT: i32.store16 10($0), $pop26
+; NOFP16-NEXT: call $push27=, __truncsfhf2, $5
+; NOFP16-NEXT: call $push28=, __extendhfsf2, $pop27
+; NOFP16-NEXT: call $push29=, __truncsfhf2, $13
+; NOFP16-NEXT: call $push30=, __extendhfsf2, $pop29
+; NOFP16-NEXT: f32.mul $push31=, $pop28, $pop30
+; NOFP16-NEXT: call $push32=, __truncsfhf2, $21
+; NOFP16-NEXT: call $push33=, __extendhfsf2, $pop32
+; NOFP16-NEXT: f32.add $push34=, $pop31, $pop33
+; NOFP16-NEXT: call $push35=, __truncsfhf2, $pop34
+; NOFP16-NEXT: i32.store16 8($0), $pop35
+; NOFP16-NEXT: call $push36=, __truncsfhf2, $4
+; NOFP16-NEXT: call $push37=, __extendhfsf2, $pop36
+; NOFP16-NEXT: call $push38=, __truncsfhf2, $12
+; NOFP16-NEXT: call $push39=, __extendhfsf2, $pop38
+; NOFP16-NEXT: f32.mul $push40=, $pop37, $pop39
+; NOFP16-NEXT: call $push41=, __truncsfhf2, $20
+; NOFP16-NEXT: call $push42=, __extendhfsf2, $pop41
+; NOFP16-NEXT: f32.add $push43=, $pop40, $pop42
+; NOFP16-NEXT: call $push44=, __truncsfhf2, $pop43
+; NOFP16-NEXT: i32.store16 6($0), $pop44
+; NOFP16-NEXT: call $push45=, __truncsfhf2, $3
+; NOFP16-NEXT: call $push46=, __extendhfsf2, $pop45
+; NOFP16-NEXT: call $push47=, __truncsfhf2, $11
+; NOFP16-NEXT: call $push48=, __extendhfsf2, $pop47
+; NOFP16-NEXT: f32.mul $push49=, $pop46, $pop48
+; NOFP16-NEXT: call $push50=, __truncsfhf2, $19
+; NOFP16-NEXT: call $push51=, __extendhfsf2, $pop50
+; NOFP16-NEXT: f32.add $push52=, $pop49, $pop51
+; NOFP16-NEXT: call $push53=, __truncsfhf2, $pop52
+; NOFP16-NEXT: i32.store16 4($0), $pop53
+; NOFP16-NEXT: call $push54=, __truncsfhf2, $2
+; NOFP16-NEXT: call $push55=, __extendhfsf2, $pop54
+; NOFP16-NEXT: call $push56=, __truncsfhf2, $10
+; NOFP16-NEXT: call $push57=, __extendhfsf2, $pop56
+; NOFP16-NEXT: f32.mul $push58=, $pop55, $pop57
+; NOFP16-NEXT: call $push59=, __truncsfhf2, $18
+; NOFP16-NEXT: call $push60=, __extendhfsf2, $pop59
+; NOFP16-NEXT: f32.add $push61=, $pop58, $pop60
+; NOFP16-NEXT: call $push62=, __truncsfhf2, $pop61
+; NOFP16-NEXT: i32.store16 2($0), $pop62
+; NOFP16-NEXT: call $push63=, __truncsfhf2, $1
+; NOFP16-NEXT: call $push64=, __extendhfsf2, $pop63
+; NOFP16-NEXT: call $push65=, __truncsfhf2, $9
+; NOFP16-NEXT: call $push66=, __extendhfsf2, $pop65
+; NOFP16-NEXT: f32.mul $push67=, $pop64, $pop66
+; NOFP16-NEXT: call $push68=, __truncsfhf2, $17
+; NOFP16-NEXT: call $push69=, __extendhfsf2, $pop68
+; NOFP16-NEXT: f32.add $push70=, $pop67, $pop69
+; NOFP16-NEXT: call $push71=, __truncsfhf2, $pop70
+; NOFP16-NEXT: i32.store16 0($0), $pop71
+; NOFP16-NEXT: return
+;
+; NOSIMD-LABEL: fadd_fmul_contract_8xf16:
+; NOSIMD: .functype fadd_fmul_contract_8xf16 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
+; NOSIMD-NEXT: # %bb.0:
+; NOSIMD-NEXT: call $push0=, __truncsfhf2, $8
+; NOSIMD-NEXT: call $push1=, __extendhfsf2, $pop0
+; NOSIMD-NEXT: call $push2=, __truncsfhf2, $16
+; NOSIMD-NEXT: call $push3=, __extendhfsf2, $pop2
+; NOSIMD-NEXT: f32.mul $push4=, $pop1, $pop3
+; NOSIMD-NEXT: call $push5=, __truncsfhf2, $24
+; NOSIMD-NEXT: call $push6=, __extendhfsf2, $pop5
+; NOSIMD-NEXT: f32.add $push7=, $pop4, $pop6
+; NOSIMD-NEXT: call $push8=, __truncsfhf2, $pop7
+; NOSIMD-NEXT: i32.store16 14($0), $pop8
+; NOSIMD-NEXT: call $push9=, __truncsfhf2, $7
+; NOSIMD-NEXT: call $push10=, __extendhfsf2, $pop9
+; NOSIMD-NEXT: call $push11=, __truncsfhf2, $15
+; NOSIMD-NEXT: call $push12=, __extendhfsf2, $pop11
+; NOSIMD-NEXT: f32.mul $push13=, $pop10, $pop12
+; NOSIMD-NEXT: call $push14=, __truncsfhf2, $23
+; NOSIMD-NEXT: call $push15=, __extendhfsf2, $pop14
+; NOSIMD-NEXT: f32.add $push16=, $pop13, $pop15
+; NOSIMD-NEXT: call $push17=, __truncsfhf2, $pop16
+; NOSIMD-NEXT: i32.store16 12($0), $pop17
+; NOSIMD-NEXT: call $push18=, __truncsfhf2, $6
+; NOSIMD-NEXT: call $push19=, __extendhfsf2, $pop18
+; NOSIMD-NEXT: call $push20=, __truncsfhf2, $14
+; NOSIMD-NEXT: call $push21=, __extendhfsf2, $pop20
+; NOSIMD-NEXT: f32.mul $push22=, $pop19, $pop21
+; NOSIMD-NEXT: call $push23=, __truncsfhf2, $22
+; NOSIMD-NEXT: call $push24=, __extendhfsf2, $pop23
+; NOSIMD-NEXT: f32.add $push25=, $pop22, $pop24
+; NOSIMD-NEXT: call $push26=, __truncsfhf2, $pop25
+; NOSIMD-NEXT: i32.store16 10($0), $pop26
+; NOSIMD-NEXT: call $push27=, __truncsfhf2, $5
+; NOSIMD-NEXT: call $push28=, __extendhfsf2, $pop27
+; NOSIMD-NEXT: call $push29=, __truncsfhf2, $13
+; NOSIMD-NEXT: call $push30=, __extendhfsf2, $pop29
+; NOSIMD-NEXT: f32.mul $push31=, $pop28, $pop30
+; NOSIMD-NEXT: call $push32=, __truncsfhf2, $21
+; NOSIMD-NEXT: call $push33=, __extendhfsf2, $pop32
+; NOSIMD-NEXT: f32.add $push34=, $pop31, $pop33
+; NOSIMD-NEXT: call $push35=, __truncsfhf2, $pop34
+; NOSIMD-NEXT: i32.store16 8($0), $pop35
+; NOSIMD-NEXT: call $push36=, __truncsfhf2, $4
+; NOSIMD-NEXT: call $push37=, __extendhfsf2, $pop36
+; NOSIMD-NEXT: call $push38=, __truncsfhf2, $12
+; NOSIMD-NEXT: call $push39=, __extendhfsf2, $pop38
+; NOSIMD-NEXT: f32.mul $push40=, $pop37, $pop39
+; NOSIMD-NEXT: call $push41=, __truncsfhf2, $20
+; NOSIMD-NEXT: call $push42=, __extendhfsf2, $pop41
+; NOSIMD-NEXT: f32.add $push43=, $pop40, $pop42
+; NOSIMD-NEXT: call $push44=, __truncsfhf2, $pop43
+; NOSIMD-NEXT: i32.store16 6($0), $pop44
+; NOSIMD-NEXT: call $push45=, __truncsfhf2, $3
+; NOSIMD-NEXT: call $push46=, __extendhfsf2, $pop45
+; NOSIMD-NEXT: call $push47=, __truncsfhf2, $11
+; NOSIMD-NEXT: call $push48=, __extendhfsf2, $pop47
+; NOSIMD-NEXT: f32.mul $push49=, $pop46, $pop48
+; NOSIMD-NEXT: call $push50=, __truncsfhf2, $19
+; NOSIMD-NEXT: call $push51=, __extendhfsf2, $pop50
+; NOSIMD-NEXT: f32.add $push52=, $pop49, $pop51
+; NOSIMD-NEXT: call $push53=, __truncsfhf2, $pop52
+; NOSIMD-NEXT: i32.store16 4($0), $pop53
+; NOSIMD-NEXT: call $push54=, __truncsfhf2, $2
+; NOSIMD-NEXT: call $push55=, __extendhfsf2, $pop54
+; NOSIMD-NEXT: call $push56=, __truncsfhf2, $10
+; NOSIMD-NEXT: call $push57=, __extendhfsf2, $pop56
+; NOSIMD-NEXT: f32.mul $push58=, $pop55, $pop57
+; NOSIMD-NEXT: call $push59=, __truncsfhf2, $18
+; NOSIMD-NEXT: call $push60=, __extendhfsf2, $pop59
+; NOSIMD-NEXT: f32.add $push61=, $pop58, $pop60
+; NOSIMD-NEXT: call $push62=, __truncsfhf2, $pop61
+; NOSIMD-NEXT: i32.store16 2($0), $pop62
+; NOSIMD-NEXT: call $push63=, __truncsfhf2, $1
+; NOSIMD-NEXT: call $push64=, __extendhfsf2, $pop63
+; NOSIMD-NEXT: call $push65=, __truncsfhf2, $9
+; NOSIMD-NEXT: call $push66=, __extendhfsf2, $pop65
+; NOSIMD-NEXT: f32.mul $push67=, $pop64, $pop66
+; NOSIMD-NEXT: call $push68=, __truncsfhf2, $17
+; NOSIMD-NEXT: call $push69=, __extendhfsf2, $pop68
+; NOSIMD-NEXT: f32.add $push70=, $pop67, $pop69
+; NOSIMD-NEXT: call $push71=, __truncsfhf2, $pop70
+; NOSIMD-NEXT: i32.store16 0($0), $pop71
+; NOSIMD-NEXT: return
%mul = fmul contract <8 x half> %b, %a
%add = fadd contract <8 x half> %mul, %c
ret <8 x half> %add
}
-
define <4 x float> @fadd_fmul_4xf32(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
; RELAXED-LABEL: fadd_fmul_4xf32:
; RELAXED: .functype fadd_fmul_4xf32 (v128, v128, v128) -> (v128)
@@ -76,16 +615,414 @@ define <4 x float> @fadd_fmul_4xf32(<4 x float> %a, <4 x float> %b, <4 x float>
; STRICT-NEXT: f32x4.mul $push0=, $1, $0
; STRICT-NEXT: f32x4.add $push1=, $pop0, $2
; STRICT-NEXT: return $pop1
+;
+; NOFP16-LABEL: fadd_fmul_4xf32:
+; NOFP16: .functype fadd_fmul_4xf32 (v128, v128, v128) -> (v128)
+; NOFP16-NEXT: # %bb.0:
+; NOFP16-NEXT: f32x4.mul $push0=, $1, $0
+; NOFP16-NEXT: f32x4.add $push1=, $pop0, $2
+; NOFP16-NEXT: return $pop1
+;
+; NOSIMD-LABEL: fadd_fmul_4xf32:
+; NOSIMD: .functype fadd_fmul_4xf32 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
+; NOSIMD-NEXT: # %bb.0:
+; NOSIMD-NEXT: f32.mul $push0=, $8, $4
+; NOSIMD-NEXT: f32.add $push1=, $pop0, $12
+; NOSIMD-NEXT: f32.store 12($0), $pop1
+; NOSIMD-NEXT: f32.mul $push2=, $7, $3
+; NOSIMD-NEXT: f32.add $push3=, $pop2, $11
+; NOSIMD-NEXT: f32.store 8($0), $pop3
+; NOSIMD-NEXT: f32.mul $push4=, $6, $2
+; NOSIMD-NEXT: f32.add $push5=, $pop4, $10
+; NOSIMD-NEXT: f32.store 4($0), $pop5
+; NOSIMD-NEXT: f32.mul $push6=, $5, $1
+; NOSIMD-NEXT: f32.add $push7=, $pop6, $9
+; NOSIMD-NEXT: f32.store 0($0), $pop7
+; NOSIMD-NEXT: return
%mul = fmul <4 x float> %b, %a
%add = fadd contract <4 x float> %mul, %c
ret <4 x float> %add
}
+define <8 x half> @fmuladd_contract_8xf16(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
+; RELAXED-LABEL: fmuladd_contract_8xf16:
+; RELAXED: .functype fmuladd_contract_8xf16 (v128, v128, v128) -> (v128)
+; RELAXED-NEXT: # %bb.0:
+; RELAXED-NEXT: f16x8.relaxed_madd $push0=, $0, $1, $2
+; RELAXED-NEXT: return $pop0
+;
+; STRICT-LABEL: fmuladd_contract_8xf16:
+; STRICT: .functype fmuladd_contract_8xf16 (v128, v128, v128) -> (v128)
+; STRICT-NEXT: # %bb.0:
+; STRICT-NEXT: f16x8.mul $push0=, $0, $1
+; STRICT-NEXT: f16x8.add $push1=, $pop0, $2
+; STRICT-NEXT: return $pop1
+;
+; NOFP16-LABEL: fmuladd_contract_8xf16:
+; NOFP16: .functype fmuladd_contract_8xf16 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
+; NOFP16-NEXT: # %bb.0:
+; NOFP16-NEXT: call $push0=, __truncsfhf2, $16
+; NOFP16-NEXT: call $push1=, __extendhfsf2, $pop0
+; NOFP16-NEXT: call $push2=, __truncsfhf2, $8
+; NOFP16-NEXT: call $push3=, __extendhfsf2, $pop2
+; NOFP16-NEXT: f32.mul $push4=, $pop1, $pop3
+; NOFP16-NEXT: call $push5=, __truncsfhf2, $24
+; NOFP16-NEXT: call $push6=, __extendhfsf2, $pop5
+; NOFP16-NEXT: f32.add $push7=, $pop4, $pop6
+; NOFP16-NEXT: call $push8=, __truncsfhf2, $pop7
+; NOFP16-NEXT: i32.store16 14($0), $pop8
+; NOFP16-NEXT: call $push9=, __truncsfhf2, $15
+; NOFP16-NEXT: call $push10=, __extendhfsf2, $pop9
+; NOFP16-NEXT: call $push11=, __truncsfhf2, $7
+; NOFP16-NEXT: call $push12=, __extendhfsf2, $pop11
+; NOFP16-NEXT: f32.mul $push13=, $pop10, $pop12
+; NOFP16-NEXT: call $push14=, __truncsfhf2, $23
+; NOFP16-NEXT: call $push15=, __extendhfsf2, $pop14
+; NOFP16-NEXT: f32.add $push16=, $pop13, $pop15
+; NOFP16-NEXT: call $push17=, __truncsfhf2, $pop16
+; NOFP16-NEXT: i32.store16 12($0), $pop17
+; NOFP16-NEXT: call $push18=, __truncsfhf2, $14
+; NOFP16-NEXT: call $push19=, __extendhfsf2, $pop18
+; NOFP16-NEXT: call $push20=, __truncsfhf2, $6
+; NOFP16-NEXT: call $push21=, __extendhfsf2, $pop20
+; NOFP16-NEXT: f32.mul $push22=, $pop19, $pop21
+; NOFP16-NEXT: call $push23=, __truncsfhf2, $22
+; NOFP16-NEXT: call $push24=, __extendhfsf2, $pop23
+; NOFP16-NEXT: f32.add $push25=, $pop22, $pop24
+; NOFP16-NEXT: call $push26=, __truncsfhf2, $pop25
+; NOFP16-NEXT: i32.store16 10($0), $pop26
+; NOFP16-NEXT: call $push27=, __truncsfhf2, $13
+; NOFP16-NEXT: call $push28=, __extendhfsf2, $pop27
+; NOFP16-NEXT: call $push29=, __truncsfhf2, $5
+; NOFP16-NEXT: call $push30=, __extendhfsf2, $pop29
+; NOFP16-NEXT: f32.mul $push31=, $pop28, $pop30
+; NOFP16-NEXT: call $push32=, __truncsfhf2, $21
+; NOFP16-NEXT: call $push33=, __extendhfsf2, $pop32
+; NOFP16-NEXT: f32.add $push34=, $pop31, $pop33
+; NOFP16-NEXT: call $push35=, __truncsfhf2, $pop34
+; NOFP16-NEXT: i32.store16 8($0), $pop35
+; NOFP16-NEXT: call $push36=, __truncsfhf2, $12
+; NOFP16-NEXT: call $push37=, __extendhfsf2, $pop36
+; NOFP16-NEXT: call $push38=, __truncsfhf2, $4
+; NOFP16-NEXT: call $push39=, __extendhfsf2, $pop38
+; NOFP16-NEXT: f32.mul $push40=, $pop37, $pop39
+; NOFP16-NEXT: call $push41=, __truncsfhf2, $20
+; NOFP16-NEXT: call $push42=, __extendhfsf2, $pop41
+; NOFP16-NEXT: f32.add $push43=, $pop40, $pop42
+; NOFP16-NEXT: call $push44=, __truncsfhf2, $pop43
+; NOFP16-NEXT: i32.store16 6($0), $pop44
+; NOFP16-NEXT: call $push45=, __truncsfhf2, $11
+; NOFP16-NEXT: call $push46=, __extendhfsf2, $pop45
+; NOFP16-NEXT: call $push47=, __truncsfhf2, $3
+; NOFP16-NEXT: call $push48=, __extendhfsf2, $pop47
+; NOFP16-NEXT: f32.mul $push49=, $pop46, $pop48
+; NOFP16-NEXT: call $push50=, __truncsfhf2, $19
+; NOFP16-NEXT: call $push51=, __extendhfsf2, $pop50
+; NOFP16-NEXT: f32.add $push52=, $pop49, $pop51
+; NOFP16-NEXT: call $push53=, __truncsfhf2, $pop52
+; NOFP16-NEXT: i32.store16 4($0), $pop53
+; NOFP16-NEXT: call $push54=, __truncsfhf2, $10
+; NOFP16-NEXT: call $push55=, __extendhfsf2, $pop54
+; NOFP16-NEXT: call $push56=, __truncsfhf2, $2
+; NOFP16-NEXT: call $push57=, __extendhfsf2, $pop56
+; NOFP16-NEXT: f32.mul $push58=, $pop55, $pop57
+; NOFP16-NEXT: call $push59=, __truncsfhf2, $18
+; NOFP16-NEXT: call $push60=, __extendhfsf2, $pop59
+; NOFP16-NEXT: f32.add $push61=, $pop58, $pop60
+; NOFP16-NEXT: call $push62=, __truncsfhf2, $pop61
+; NOFP16-NEXT: i32.store16 2($0), $pop62
+; NOFP16-NEXT: call $push63=, __truncsfhf2, $9
+; NOFP16-NEXT: call $push64=, __extendhfsf2, $pop63
+; NOFP16-NEXT: call $push65=, __truncsfhf2, $1
+; NOFP16-NEXT: call $push66=, __extendhfsf2, $pop65
+; NOFP16-NEXT: f32.mul $push67=, $pop64, $pop66
+; NOFP16-NEXT: call $push68=, __truncsfhf2, $17
+; NOFP16-NEXT: call $push69=, __extendhfsf2, $pop68
+; NOFP16-NEXT: f32.add $push70=, $pop67, $pop69
+; NOFP16-NEXT: call $push71=, __truncsfhf2, $pop70
+; NOFP16-NEXT: i32.store16 0($0), $pop71
+; NOFP16-NEXT: return
+;
+; NOSIMD-LABEL: fmuladd_contract_8xf16:
+; NOSIMD: .functype fmuladd_contract_8xf16 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
+; NOSIMD-NEXT: # %bb.0:
+; NOSIMD-NEXT: call $push0=, __truncsfhf2, $16
+; NOSIMD-NEXT: call $push1=, __extendhfsf2, $pop0
+; NOSIMD-NEXT: call $push2=, __truncsfhf2, $8
+; NOSIMD-NEXT: call $push3=, __extendhfsf2, $pop2
+; NOSIMD-NEXT: f32.mul $push4=, $pop1, $pop3
+; NOSIMD-NEXT: call $push5=, __truncsfhf2, $24
+; NOSIMD-NEXT: call $push6=, __extendhfsf2, $pop5
+; NOSIMD-NEXT: f32.add $push7=, $pop4, $pop6
+; NOSIMD-NEXT: call $push8=, __truncsfhf2, $pop7
+; NOSIMD-NEXT: i32.store16 14($0), $pop8
+; NOSIMD-NEXT: call $push9=, __truncsfhf2, $15
+; NOSIMD-NEXT: call $push10=, __extendhfsf2, $pop9
+; NOSIMD-NEXT: call $push11=, __truncsfhf2, $7
+; NOSIMD-NEXT: call $push12=, __extendhfsf2, $pop11
+; NOSIMD-NEXT: f32.mul $push13=, $pop10, $pop12
+; NOSIMD-NEXT: call $push14=, __truncsfhf2, $23
+; NOSIMD-NEXT: call $push15=, __extendhfsf2, $pop14
+; NOSIMD-NEXT: f32.add $push16=, $pop13, $pop15
+; NOSIMD-NEXT: call $push17=, __truncsfhf2, $pop16
+; NOSIMD-NEXT: i32.store16 12($0), $pop17
+; NOSIMD-NEXT: call $push18=, __truncsfhf2, $14
+; NOSIMD-NEXT: call $push19=, __extendhfsf2, $pop18
+; NOSIMD-NEXT: call $push20=, __truncsfhf2, $6
+; NOSIMD-NEXT: call $push21=, __extendhfsf2, $pop20
+; NOSIMD-NEXT: f32.mul $push22=, $pop19, $pop21
+; NOSIMD-NEXT: call $push23=, __truncsfhf2, $22
+; NOSIMD-NEXT: call $push24=, __extendhfsf2, $pop23
+; NOSIMD-NEXT: f32.add $push25=, $pop22, $pop24
+; NOSIMD-NEXT: call $push26=, __truncsfhf2, $pop25
+; NOSIMD-NEXT: i32.store16 10($0), $pop26
+; NOSIMD-NEXT: call $push27=, __truncsfhf2, $13
+; NOSIMD-NEXT: call $push28=, __extendhfsf2, $pop27
+; NOSIMD-NEXT: call $push29=, __truncsfhf2, $5
+; NOSIMD-NEXT: call $push30=, __extendhfsf2, $pop29
+; NOSIMD-NEXT: f32.mul $push31=, $pop28, $pop30
+; NOSIMD-NEXT: call $push32=, __truncsfhf2, $21
+; NOSIMD-NEXT: call $push33=, __extendhfsf2, $pop32
+; NOSIMD-NEXT: f32.add $push34=, $pop31, $pop33
+; NOSIMD-NEXT: call $push35=, __truncsfhf2, $pop34
+; NOSIMD-NEXT: i32.store16 8($0), $pop35
+; NOSIMD-NEXT: call $push36=, __truncsfhf2, $12
+; NOSIMD-NEXT: call $push37=, __extendhfsf2, $pop36
+; NOSIMD-NEXT: call $push38=, __truncsfhf2, $4
+; NOSIMD-NEXT: call $push39=, __extendhfsf2, $pop38
+; NOSIMD-NEXT: f32.mul $push40=, $pop37, $pop39
+; NOSIMD-NEXT: call $push41=, __truncsfhf2, $20
+; NOSIMD-NEXT: call $push42=, __extendhfsf2, $pop41
+; NOSIMD-NEXT: f32.add $push43=, $pop40, $pop42
+; NOSIMD-NEXT: call $push44=, __truncsfhf2, $pop43
+; NOSIMD-NEXT: i32.store16 6($0), $pop44
+; NOSIMD-NEXT: call $push45=, __truncsfhf2, $11
+; NOSIMD-NEXT: call $push46=, __extendhfsf2, $pop45
+; NOSIMD-NEXT: call $push47=, __truncsfhf2, $3
+; NOSIMD-NEXT: call $push48=, __extendhfsf2, $pop47
+; NOSIMD-NEXT: f32.mul $push49=, $pop46, $pop48
+; NOSIMD-NEXT: call $push50=, __truncsfhf2, $19
+; NOSIMD-NEXT: call $push51=, __extendhfsf2, $pop50
+; NOSIMD-NEXT: f32.add $push52=, $pop49, $pop51
+; NOSIMD-NEXT: call $push53=, __truncsfhf2, $pop52
+; NOSIMD-NEXT: i32.store16 4($0), $pop53
+; NOSIMD-NEXT: call $push54=, __truncsfhf2, $10
+; NOSIMD-NEXT: call $push55=, __extendhfsf2, $pop54
+; NOSIMD-NEXT: call $push56=, __truncsfhf2, $2
+; NOSIMD-NEXT: call $push57=, __extendhfsf2, $pop56
+; NOSIMD-NEXT: f32.mul $push58=, $pop55, $pop57
+; NOSIMD-NEXT: call $push59=, __truncsfhf2, $18
+; NOSIMD-NEXT: call $push60=, __extendhfsf2, $pop59
+; NOSIMD-NEXT: f32.add $push61=, $pop58, $pop60
+; NOSIMD-NEXT: call $push62=, __truncsfhf2, $pop61
+; NOSIMD-NEXT: i32.store16 2($0), $pop62
+; NOSIMD-NEXT: call $push63=, __truncsfhf2, $9
+; NOSIMD-NEXT: call $push64=, __extendhfsf2, $pop63
+; NOSIMD-NEXT: call $push65=, __truncsfhf2, $1
+; NOSIMD-NEXT: call $push66=, __extendhfsf2, $pop65
+; NOSIMD-NEXT: f32.mul $push67=, $pop64, $pop66
+; NOSIMD-NEXT: call $push68=, __truncsfhf2, $17
+; NOSIMD-NEXT: call $push69=, __extendhfsf2, $pop68
+; NOSIMD-NEXT: f32.add $push70=, $pop67, $pop69
+; NOSIMD-NEXT: call $push71=, __truncsfhf2, $pop70
+; NOSIMD-NEXT: i32.store16 0($0), $pop71
+; NOSIMD-NEXT: return
+ %fma = call contract <8 x half> @llvm.fmuladd(<8 x half> %a, <8 x half> %b, <8 x half> %c)
+ ret <8 x half> %fma
+}
+
+define <8 x half> @fmuladd_8xf16(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
+; RELAXED-LABEL: fmuladd_8xf16:
+; RELAXED: .functype fmuladd_8xf16 (v128, v128, v128) -> (v128)
+; RELAXED-NEXT: # %bb.0:
+; RELAXED-NEXT: f16x8.relaxed_madd $push0=, $0, $1, $2
+; RELAXED-NEXT: return $pop0
+;
+; STRICT-LABEL: fmuladd_8xf16:
+; STRICT: .functype fmuladd_8xf16 (v128, v128, v128) -> (v128)
+; STRICT-NEXT: # %bb.0:
+; STRICT-NEXT: f16x8.mul $push0=, $0, $1
+; STRICT-NEXT: f16x8.add $push1=, $pop0, $2
+; STRICT-NEXT: return $pop1
+;
+; NOFP16-LABEL: fmuladd_8xf16:
+; NOFP16: .functype fmuladd_8xf16 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
+; NOFP16-NEXT: # %bb.0:
+; NOFP16-NEXT: call $push0=, __truncsfhf2, $16
+; NOFP16-NEXT: call $push1=, __extendhfsf2, $pop0
+; NOFP16-NEXT: call $push2=, __truncsfhf2, $8
+; NOFP16-NEXT: call $push3=, __extendhfsf2, $pop2
+; NOFP16-NEXT: f32.mul $push4=, $pop1, $pop3
+; NOFP16-NEXT: call $push5=, __truncsfhf2, $24
+; NOFP16-NEXT: call $push6=, __extendhfsf2, $pop5
+; NOFP16-NEXT: f32.add $push7=, $pop4, $pop6
+; NOFP16-NEXT: call $push8=, __truncsfhf2, $pop7
+; NOFP16-NEXT: i32.store16 14($0), $pop8
+; NOFP16-NEXT: call $push9=, __truncsfhf2, $15
+; NOFP16-NEXT: call $push10=, __extendhfsf2, $pop9
+; NOFP16-NEXT: call $push11=, __truncsfhf2, $7
+; NOFP16-NEXT: call $push12=, __extendhfsf2, $pop11
+; NOFP16-NEXT: f32.mul $push13=, $pop10, $pop12
+; NOFP16-NEXT: call $push14=, __truncsfhf2, $23
+; NOFP16-NEXT: call $push15=, __extendhfsf2, $pop14
+; NOFP16-NEXT: f32.add $push16=, $pop13, $pop15
+; NOFP16-NEXT: call $push17=, __truncsfhf2, $pop16
+; NOFP16-NEXT: i32.store16 12($0), $pop17
+; NOFP16-NEXT: call $push18=, __truncsfhf2, $14
+; NOFP16-NEXT: call $push19=, __extendhfsf2, $pop18
+; NOFP16-NEXT: call $push20=, __truncsfhf2, $6
+; NOFP16-NEXT: call $push21=, __extendhfsf2, $pop20
+; NOFP16-NEXT: f32.mul $push22=, $pop19, $pop21
+; NOFP16-NEXT: call $push23=, __truncsfhf2, $22
+; NOFP16-NEXT: call $push24=, __extendhfsf2, $pop23
+; NOFP16-NEXT: f32.add $push25=, $pop22, $pop24
+; NOFP16-NEXT: call $push26=, __truncsfhf2, $pop25
+; NOFP16-NEXT: i32.store16 10($0), $pop26
+; NOFP16-NEXT: call $push27=, __truncsfhf2, $13
+; NOFP16-NEXT: call $push28=, __extendhfsf2, $pop27
+; NOFP16-NEXT: call $push29=, __truncsfhf2, $5
+; NOFP16-NEXT: call $push30=, __extendhfsf2, $pop29
+; NOFP16-NEXT: f32.mul $push31=, $pop28, $pop30
+; NOFP16-NEXT: call $push32=, __truncsfhf2, $21
+; NOFP16-NEXT: call $push33=, __extendhfsf2, $pop32
+; NOFP16-NEXT: f32.add $push34=, $pop31, $pop33
+; NOFP16-NEXT: call $push35=, __truncsfhf2, $pop34
+; NOFP16-NEXT: i32.store16 8($0), $pop35
+; NOFP16-NEXT: call $push36=, __truncsfhf2, $12
+; NOFP16-NEXT: call $push37=, __extendhfsf2, $pop36
+; NOFP16-NEXT: call $push38=, __truncsfhf2, $4
+; NOFP16-NEXT: call $push39=, __extendhfsf2, $pop38
+; NOFP16-NEXT: f32.mul $push40=, $pop37, $pop39
+; NOFP16-NEXT: call $push41=, __truncsfhf2, $20
+; NOFP16-NEXT: call $push42=, __extendhfsf2, $pop41
+; NOFP16-NEXT: f32.add $push43=, $pop40, $pop42
+; NOFP16-NEXT: call $push44=, __truncsfhf2, $pop43
+; NOFP16-NEXT: i32.store16 6($0), $pop44
+; NOFP16-NEXT: call $push45=, __truncsfhf2, $11
+; NOFP16-NEXT: call $push46=, __extendhfsf2, $pop45
+; NOFP16-NEXT: call $push47=, __truncsfhf2, $3
+; NOFP16-NEXT: call $push48=, __extendhfsf2, $pop47
+; NOFP16-NEXT: f32.mul $push49=, $pop46, $pop48
+; NOFP16-NEXT: call $push50=, __truncsfhf2, $19
+; NOFP16-NEXT: call $push51=, __extendhfsf2, $pop50
+; NOFP16-NEXT: f32.add $push52=, $pop49, $pop51
+; NOFP16-NEXT: call $push53=, __truncsfhf2, $pop52
+; NOFP16-NEXT: i32.store16 4($0), $pop53
+; NOFP16-NEXT: call $push54=, __truncsfhf2, $10
+; NOFP16-NEXT: call $push55=, __extendhfsf2, $pop54
+; NOFP16-NEXT: call $push56=, __truncsfhf2, $2
+; NOFP16-NEXT: call $push57=, __extendhfsf2, $pop56
+; NOFP16-NEXT: f32.mul $push58=, $pop55, $pop57
+; NOFP16-NEXT: call $push59=, __truncsfhf2, $18
+; NOFP16-NEXT: call $push60=, __extendhfsf2, $pop59
+; NOFP16-NEXT: f32.add $push61=, $pop58, $pop60
+; NOFP16-NEXT: call $push62=, __truncsfhf2, $pop61
+; NOFP16-NEXT: i32.store16 2($0), $pop62
+; NOFP16-NEXT: call $push63=, __truncsfhf2, $9
+; NOFP16-NEXT: call $push64=, __extendhfsf2, $pop63
+; NOFP16-NEXT: call $push65=, __truncsfhf2, $1
+; NOFP16-NEXT: call $push66=, __extendhfsf2, $pop65
+; NOFP16-NEXT: f32.mul $push67=, $pop64, $pop66
+; NOFP16-NEXT: call $push68=, __truncsfhf2, $17
+; NOFP16-NEXT: call $push69=, __extendhfsf2, $pop68
+; NOFP16-NEXT: f32.add $push70=, $pop67, $pop69
+; NOFP16-NEXT: call $push71=, __truncsfhf2, $pop70
+; NOFP16-NEXT: i32.store16 0($0), $pop71
+; NOFP16-NEXT: return
+;
+; NOSIMD-LABEL: fmuladd_8xf16:
+; NOSIMD: .functype fmuladd_8xf16 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
+; NOSIMD-NEXT: # %bb.0:
+; NOSIMD-NEXT: call $push0=, __truncsfhf2, $16
+; NOSIMD-NEXT: call $push1=, __extendhfsf2, $pop0
+; NOSIMD-NEXT: call $push2=, __truncsfhf2, $8
+; NOSIMD-NEXT: call $push3=, __extendhfsf2, $pop2
+; NOSIMD-NEXT: f32.mul $push4=, $pop1, $pop3
+; NOSIMD-NEXT: call $push5=, __truncsfhf2, $24
+; NOSIMD-NEXT: call $push6=, __extendhfsf2, $pop5
+; NOSIMD-NEXT: f32.add $push7=, $pop4, $pop6
+; NOSIMD-NEXT: call $push8=, __truncsfhf2, $pop7
+; NOSIMD-NEXT: i32.store16 14($0), $pop8
+; NOSIMD-NEXT: call $push9=, __truncsfhf2, $15
+; NOSIMD-NEXT: call $push10=, __extendhfsf2, $pop9
+; NOSIMD-NEXT: call $push11=, __truncsfhf2, $7
+; NOSIMD-NEXT: call $push12=, __extendhfsf2, $pop11
+; NOSIMD-NEXT: f32.mul $push13=, $pop10, $pop12
+; NOSIMD-NEXT: call $push14=, __truncsfhf2, $23
+; NOSIMD-NEXT: call $push15=, __extendhfsf2, $pop14
+; NOSIMD-NEXT: f32.add $push16=, $pop13, $pop15
+; NOSIMD-NEXT: call $push17=, __truncsfhf2, $pop16
+; NOSIMD-NEXT: i32.store16 12($0), $pop17
+; NOSIMD-NEXT: call $push18=, __truncsfhf2, $14
+; NOSIMD-NEXT: call $push19=, __extendhfsf2, $pop18
+; NOSIMD-NEXT: call $push20=, __truncsfhf2, $6
+; NOSIMD-NEXT: call $push21=, __extendhfsf2, $pop20
+; NOSIMD-NEXT: f32.mul $push22=, $pop19, $pop21
+; NOSIMD-NEXT: call $push23=, __truncsfhf2, $22
+; NOSIMD-NEXT: call $push24=, __extendhfsf2, $pop23
+; NOSIMD-NEXT: f32.add $push25=, $pop22, $pop24
+; NOSIMD-NEXT: call $push26=, __truncsfhf2, $pop25
+; NOSIMD-NEXT: i32.store16 10($0), $pop26
+; NOSIMD-NEXT: call $push27=, __truncsfhf2, $13
+; NOSIMD-NEXT: call $push28=, __extendhfsf2, $pop27
+; NOSIMD-NEXT: call $push29=, __truncsfhf2, $5
+; NOSIMD-NEXT: call $push30=, __extendhfsf2, $pop29
+; NOSIMD-NEXT: f32.mul $push31=, $pop28, $pop30
+; NOSIMD-NEXT: call $push32=, __truncsfhf2, $21
+; NOSIMD-NEXT: call $push33=, __extendhfsf2, $pop32
+; NOSIMD-NEXT: f32.add $push34=, $pop31, $pop33
+; NOSIMD-NEXT: call $push35=, __truncsfhf2, $pop34
+; NOSIMD-NEXT: i32.store16 8($0), $pop35
+; NOSIMD-NEXT: call $push36=, __truncsfhf2, $12
+; NOSIMD-NEXT: call $push37=, __extendhfsf2, $pop36
+; NOSIMD-NEXT: call $push38=, __truncsfhf2, $4
+; NOSIMD-NEXT: call $push39=, __extendhfsf2, $pop38
+; NOSIMD-NEXT: f32.mul $push40=, $pop37, $pop39
+; NOSIMD-NEXT: call $push41=, __truncsfhf2, $20
+; NOSIMD-NEXT: call $push42=, __extendhfsf2, $pop41
+; NOSIMD-NEXT: f32.add $push43=, $pop40, $pop42
+; NOSIMD-NEXT: call $push44=, __truncsfhf2, $pop43
+; NOSIMD-NEXT: i32.store16 6($0), $pop44
+; NOSIMD-NEXT: call $push45=, __truncsfhf2, $11
+; NOSIMD-NEXT: call $push46=, __extendhfsf2, $pop45
+; NOSIMD-NEXT: call $push47=, __truncsfhf2, $3
+; NOSIMD-NEXT: call $push48=, __extendhfsf2, $pop47
+; NOSIMD-NEXT: f32.mul $push49=, $pop46, $pop48
+; NOSIMD-NEXT: call $push50=, __truncsfhf2, $19
+; NOSIMD-NEXT: call $push51=, __extendhfsf2, $pop50
+; NOSIMD-NEXT: f32.add $push52=, $pop49, $pop51
+; NOSIMD-NEXT: call $push53=, __truncsfhf2, $pop52
+; NOSIMD-NEXT: i32.store16 4($0), $pop53
+; NOSIMD-NEXT: call $push54=, __truncsfhf2, $10
+; NOSIMD-NEXT: call $push55=, __extendhfsf2, $pop54
+; NOSIMD-NEXT: call $push56=, __truncsfhf2, $2
+; NOSIMD-NEXT: call $push57=, __extendhfsf2, $pop56
+; NOSIMD-NEXT: f32.mul $push58=, $pop55, $pop57
+; NOSIMD-NEXT: call $push59=, __truncsfhf2, $18
+; NOSIMD-NEXT: call $push60=, __extendhfsf2, $pop59
+; NOSIMD-NEXT: f32.add $push61=, $pop58, $pop60
+; NOSIMD-NEXT: call $push62=, __truncsfhf2, $pop61
+; NOSIMD-NEXT: i32.store16 2($0), $pop62
+; NOSIMD-NEXT: call $push63=, __truncsfhf2, $9
+; NOSIMD-NEXT: call $push64=, __extendhfsf2, $pop63
+; NOSIMD-NEXT: call $push65=, __truncsfhf2, $1
+; NOSIMD-NEXT: call $push66=, __extendhfsf2, $pop65
+; NOSIMD-NEXT: f32.mul $push67=, $pop64, $pop66
+; NOSIMD-NEXT: call $push68=, __truncsfhf2, $17
+; NOSIMD-NEXT: call $push69=, __extendhfsf2, $pop68
+; NOSIMD-NEXT: f32.add $push70=, $pop67, $pop69
+; NOSIMD-NEXT: call $push71=, __truncsfhf2, $pop70
+; NOSIMD-NEXT: i32.store16 0($0), $pop71
+; NOSIMD-NEXT: return
+ %fma = call <8 x half> @llvm.fmuladd(<8 x half> %a, <8 x half> %b, <8 x half> %c)
+ ret <8 x half> %fma
+}
+
define <4 x float> @fmuladd_contract_4xf32(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
; RELAXED-LABEL: fmuladd_contract_4xf32:
; RELAXED: .functype fmuladd_contract_4xf32 (v128, v128, v128) -> (v128)
; RELAXED-NEXT: # %bb.0:
-; RELAXED-NEXT: f32x4.relaxed_madd $push0=, $2, $0, $1
+; RELAXED-NEXT: f32x4.relaxed_madd $push0=, $0, $1, $2
; RELAXED-NEXT: return $pop0
;
; STRICT-LABEL: fmuladd_contract_4xf32:
@@ -94,18 +1031,40 @@ define <4 x float> @fmuladd_contract_4xf32(<4 x float> %a, <4 x float> %b, <4 x
; STRICT-NEXT: f32x4.mul $push0=, $0, $1
; STRICT-NEXT: f32x4.add $push1=, $pop0, $2
; STRICT-NEXT: return $pop1
+;
+; NOFP16-LABEL: fmuladd_contract_4xf32:
+; NOFP16: .functype fmuladd_contract_4xf32 (v128, v128, v128) -> (v128)
+; NOFP16-NEXT: # %bb.0:
+; NOFP16-NEXT: f32x4.mul $push0=, $0, $1
+; NOFP16-NEXT: f32x4.add $push1=, $pop0, $2
+; NOFP16-NEXT: return $pop1
+;
+; NOSIMD-LABEL: fmuladd_contract_4xf32:
+; NOSIMD: .functype fmuladd_contract_4xf32 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
+; NOSIMD-NEXT: # %bb.0:
+; NOSIMD-NEXT: f32.mul $push0=, $4, $8
+; NOSIMD-NEXT: f32.add $push1=, $pop0, $12
+; NOSIMD-NEXT: f32.store 12($0), $pop1
+; NOSIMD-NEXT: f32.mul $push2=, $3, $7
+; NOSIMD-NEXT: f32.add $push3=, $pop2, $11
+; NOSIMD-NEXT: f32.store 8($0), $pop3
+; NOSIMD-NEXT: f32.mul $push4=, $2, $6
+; NOSIMD-NEXT: f32.add $push5=, $pop4, $10
+; NOSIMD-NEXT: f32.store 4($0), $pop5
+; NOSIMD-NEXT: f32.mul $push6=, $1, $5
+; NOSIMD-NEXT: f32.add $push7=, $pop6, $9
+; NOSIMD-NEXT: f32.store 0($0), $pop7
+; NOSIMD-NEXT: return
%fma = call contract <4 x float> @llvm.fmuladd(<4 x float> %a, <4 x float> %b, <4 x float> %c)
ret <4 x float> %fma
}
-; TODO: This should also have relaxed_madd in RELAXED case
define <4 x float> @fmuladd_4xf32(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
; RELAXED-LABEL: fmuladd_4xf32:
; RELAXED: .functype fmuladd_4xf32 (v128, v128, v128) -> (v128)
; RELAXED-NEXT: # %bb.0:
-; RELAXED-NEXT: f32x4.mul $push0=, $0, $1
-; RELAXED-NEXT: f32x4.add $push1=, $pop0, $2
-; RELAXED-NEXT: return $pop1
+; RELAXED-NEXT: f32x4.relaxed_madd $push0=, $0, $1, $2
+; RELAXED-NEXT: return $pop0
;
; STRICT-LABEL: fmuladd_4xf32:
; STRICT: .functype fmuladd_4xf32 (v128, v128, v128) -> (v128)
@@ -113,10 +1072,104 @@ define <4 x float> @fmuladd_4xf32(<4 x float> %a, <4 x float> %b, <4 x float> %c
; STRICT-NEXT: f32x4.mul $push0=, $0, $1
; STRICT-NEXT: f32x4.add $push1=, $pop0, $2
; STRICT-NEXT: return $pop1
+;
+; NOFP16-LABEL: fmuladd_4xf32:
+; NOFP16: .functype fmuladd_4xf32 (v128, v128, v128) -> (v128)
+; NOFP16-NEXT: # %bb.0:
+; NOFP16-NEXT: f32x4.mul $push0=, $0, $1
+; NOFP16-NEXT: f32x4.add $push1=, $pop0, $2
+; NOFP16-NEXT: return $pop1
+;
+; NOSIMD-LABEL: fmuladd_4xf32:
+; NOSIMD: .functype fmuladd_4xf32 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
+; NOSIMD-NEXT: # %bb.0:
+; NOSIMD-NEXT: f32.mul $push0=, $4, $8
+; NOSIMD-NEXT: f32.add $push1=, $pop0, $12
+; NOSIMD-NEXT: f32.store 12($0), $pop1
+; NOSIMD-NEXT: f32.mul $push2=, $3, $7
+; NOSIMD-NEXT: f32.add $push3=, $pop2, $11
+; NOSIMD-NEXT: f32.store 8($0), $pop3
+; NOSIMD-NEXT: f32.mul $push4=, $2, $6
+; NOSIMD-NEXT: f32.add $push5=, $pop4, $10
+; NOSIMD-NEXT: f32.store 4($0), $pop5
+; NOSIMD-NEXT: f32.mul $push6=, $1, $5
+; NOSIMD-NEXT: f32.add $push7=, $pop6, $9
+; NOSIMD-NEXT: f32.store 0($0), $pop7
+; NOSIMD-NEXT: return
%fma = call <4 x float> @llvm.fmuladd(<4 x float> %a, <4 x float> %b, <4 x float> %c)
ret <4 x float> %fma
}
+define <2 x double> @fmuladd_contract_2xf64(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
+; RELAXED-LABEL: fmuladd_contract_2xf64:
+; RELAXED: .functype fmuladd_contract_2xf64 (v128, v128, v128) -> (v128)
+; RELAXED-NEXT: # %bb.0:
+; RELAXED-NEXT: f64x2.relaxed_madd $push0=, $0, $1, $2
+; RELAXED-NEXT: return $pop0
+;
+; STRICT-LABEL: fmuladd_contract_2xf64:
+; STRICT: .functype fmuladd_contract_2xf64 (v128, v128, v128) -> (v128)
+; STRICT-NEXT: # %bb.0:
+; STRICT-NEXT: f64x2.mul $push0=, $0, $1
+; STRICT-NEXT: f64x2.add $push1=, $pop0, $2
+; STRICT-NEXT: return $pop1
+;
+; NOFP16-LABEL: fmuladd_contract_2xf64:
+; NOFP16: .functype fmuladd_contract_2xf64 (v128, v128, v128) -> (v128)
+; NOFP16-NEXT: # %bb.0:
+; NOFP16-NEXT: f64x2.mul $push0=, $0, $1
+; NOFP16-NEXT: f64x2.add $push1=, $pop0, $2
+; NOFP16-NEXT: return $pop1
+;
+; NOSIMD-LABEL: fmuladd_contract_2xf64:
+; NOSIMD: .functype fmuladd_contract_2xf64 (i32, f64, f64, f64, f64, f64, f64) -> ()
+; NOSIMD-NEXT: # %bb.0:
+; NOSIMD-NEXT: f64.mul $push0=, $2, $4
+; NOSIMD-NEXT: f64.add $push1=, $pop0, $6
+; NOSIMD-NEXT: f64.store 8($0), $pop1
+; NOSIMD-NEXT: f64.mul $push2=, $1, $3
+; NOSIMD-NEXT: f64.add $push3=, $pop2, $5
+; NOSIMD-NEXT: f64.store 0($0), $pop3
+; NOSIMD-NEXT: return
+ %fma = call contract <2 x double> @llvm.fmuladd(<2 x double> %a, <2 x double> %b, <2 x double> %c)
+ ret <2 x double> %fma
+}
+
+define <2 x double> @fmuladd_2xf64(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
+; RELAXED-LABEL: fmuladd_2xf64:
+; RELAXED: .functype fmuladd_2xf64 (v128, v128, v128) -> (v128)
+; RELAXED-NEXT: # %bb.0:
+; RELAXED-NEXT: f64x2.relaxed_madd $push0=, $0, $1, $2
+; RELAXED-NEXT: return $pop0
+;
+; STRICT-LABEL: fmuladd_2xf64:
+; STRICT: .functype fmuladd_2xf64 (v128, v128, v128) -> (v128)
+; STRICT-NEXT: # %bb.0:
+; STRICT-NEXT: f64x2.mul $push0=, $0, $1
+; STRICT-NEXT: f64x2.add $push1=, $pop0, $2
+; STRICT-NEXT: return $pop1
+;
+; NOFP16-LABEL: fmuladd_2xf64:
+; NOFP16: .functype fmuladd_2xf64 (v128, v128, v128) -> (v128)
+; NOFP16-NEXT: # %bb.0:
+; NOFP16-NEXT: f64x2.mul $push0=, $0, $1
+; NOFP16-NEXT: f64x2.add $push1=, $pop0, $2
+; NOFP16-NEXT: return $pop1
+;
+; NOSIMD-LABEL: fmuladd_2xf64:
+; NOSIMD: .functype fmuladd_2xf64 (i32, f64, f64, f64, f64, f64, f64) -> ()
+; NOSIMD-NEXT: # %bb.0:
+; NOSIMD-NEXT: f64.mul $push0=, $2, $4
+; NOSIMD-NEXT: f64.add $push1=, $pop0, $6
+; NOSIMD-NEXT: f64.store 8($0), $pop1
+; NOSIMD-NEXT: f64.mul $push2=, $1, $3
+; NOSIMD-NEXT: f64.add $push3=, $pop2, $5
+; NOSIMD-NEXT: f64.store 0($0), $pop3
+; NOSIMD-NEXT: return
+ %fma = call <2 x double> @llvm.fmuladd(<2 x double> %a, <2 x double> %b, <2 x double> %c)
+ ret <2 x double> %fma
+}
+
define <4 x float> @fma_4xf32(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
; RELAXED-LABEL: fma_4xf32:
; RELAXED: .functype fma_4xf32 (v128, v128, v128) -> (v128)
@@ -167,6 +1220,44 @@ define <4 x float> @fma_4xf32(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
; STRICT-NEXT: call $push18=, fmaf, $pop17, $pop16, $pop15
; STRICT-NEXT: f32x4.replace_lane $push19=, $pop14, 3, $pop18
; STRICT-NEXT: return $pop19
+;
+; NOFP16-LABEL: fma_4xf32:
+; NOFP16: .functype fma_4xf32 (v128, v128, v128) -> (v128)
+; NOFP16-NEXT: # %bb.0:
+; NOFP16-NEXT: f32x4.extract_lane $push2=, $0, 0
+; NOFP16-NEXT: f32x4.extract_lane $push1=, $1, 0
+; NOFP16-NEXT: f32x4.extract_lane $push0=, $2, 0
+; NOFP16-NEXT: call $push3=, fmaf, $pop2, $pop1, $pop0
+; NOFP16-NEXT: f32x4.splat $push4=, $pop3
+; NOFP16-NEXT: f32x4.extract_lane $push7=, $0, 1
+; NOFP16-NEXT: f32x4.extract_lane $push6=, $1, 1
+; NOFP16-NEXT: f32x4.extract_lane $push5=, $2, 1
+; NOFP16-NEXT: call $push8=, fmaf, $pop7, $pop6, $pop5
+; NOFP16-NEXT: f32x4.replace_lane $push9=, $pop4, 1, $pop8
+; NOFP16-NEXT: f32x4.extract_lane $push12=, $0, 2
+; NOFP16-NEXT: f32x4.extract_lane $push11=, $1, 2
+; NOFP16-NEXT: f32x4.extract_lane $push10=, $2, 2
+; NOFP16-NEXT: call $push13=, fmaf, $pop12, $pop11, $pop10
+; NOFP16-NEXT: f32x4.replace_lane $push14=, $pop9, 2, $pop13
+; NOFP16-NEXT: f32x4.extract_lane $push17=, $0, 3
+; NOFP16-NEXT: f32x4.extract_lane $push16=, $1, 3
+; NOFP16-NEXT: f32x4.extract_lane $push15=, $2, 3
+; NOFP16-NEXT: call $push18=, fmaf, $pop17, $pop16, $pop15
+; NOFP16-NEXT: f32x4.replace_lane $push19=, $pop14, 3, $pop18
+; NOFP16-NEXT: return $pop19
+;
+; NOSIMD-LABEL: fma_4xf32:
+; NOSIMD: .functype fma_4xf32 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
+; NOSIMD-NEXT: # %bb.0:
+; NOSIMD-NEXT: call $push0=, fmaf, $4, $8, $12
+; NOSIMD-NEXT: f32.store 12($0), $pop0
+; NOSIMD-NEXT: call $push1=, fmaf, $3, $7, $11
+; NOSIMD-NEXT: f32.store 8($0), $pop1
+; NOSIMD-NEXT: call $push2=, fmaf, $2, $6, $10
+; NOSIMD-NEXT: f32.store 4($0), $pop2
+; NOSIMD-NEXT: call $push3=, fmaf, $1, $5, $9
+; NOSIMD-NEXT: f32.store 0($0), $pop3
+; NOSIMD-NEXT: return
%fma = call <4 x float> @llvm.fma(<4 x float> %a, <4 x float> %b, <4 x float> %c)
ret <4 x float> %fma
}
@@ -176,9 +1267,9 @@ define <8 x float> @fadd_fmul_contract_8xf32(<8 x float> %a, <8 x float> %b, <8
; RELAXED-LABEL: fadd_fmul_contract_8xf32:
; RELAXED: .functype fadd_fmul_contract_8xf32 (i32, v128, v128, v128, v128, v128, v128) -> ()
; RELAXED-NEXT: # %bb.0:
-; RELAXED-NEXT: f32x4.relaxed_madd $push0=, $6, $4, $2
+; RELAXED-NEXT: f32x4.relaxed_madd $push0=, $4, $2, $6
; RELAXED-NEXT: v128.store 16($0), $pop0
-; RELAXED-NEXT: f32x4.relaxed_madd $push1=, $5, $3, $1
+; RELAXED-NEXT: f32x4.relaxed_madd $push1=, $3, $1, $5
; RELAXED-NEXT: v128.store 0($0), $pop1
; RELAXED-NEXT: return
;
@@ -192,17 +1283,56 @@ define <8 x float> @fadd_fmul_contract_8xf32(<8 x float> %a, <8 x float> %b, <8
; STRICT-NEXT: f32x4.add $push3=, $pop2, $5
; STRICT-NEXT: v128.store 0($0), $pop3
; STRICT-NEXT: return
+;
+; NOFP16-LABEL: fadd_fmul_contract_8xf32:
+; NOFP16: .functype fadd_fmul_contract_8xf32 (i32, v128, v128, v128, v128, v128, v128) -> ()
+; NOFP16-NEXT: # %bb.0:
+; NOFP16-NEXT: f32x4.mul $push0=, $4, $2
+; NOFP16-NEXT: f32x4.add $push1=, $pop0, $6
+; NOFP16-NEXT: v128.store 16($0), $pop1
+; NOFP16-NEXT: f32x4.mul $push2=, $3, $1
+; NOFP16-NEXT: f32x4.add $push3=, $pop2, $5
+; NOFP16-NEXT: v128.store 0($0), $pop3
+; NOFP16-NEXT: return
+;
+; NOSIMD-LABEL: fadd_fmul_contract_8xf32:
+; NOSIMD: .functype fadd_fmul_contract_8xf32 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
+; NOSIMD-NEXT: # %bb.0:
+; NOSIMD-NEXT: f32.mul $push0=, $16, $8
+; NOSIMD-NEXT: f32.add $push1=, $pop0, $24
+; NOSIMD-NEXT: f32.store 28($0), $pop1
+; NOSIMD-NEXT: f32.mul $push2=, $15, $7
+; NOSIMD-NEXT: f32.add $push3=, $pop2, $23
+; NOSIMD-NEXT: f32.store 24($0), $pop3
+; NOSIMD-NEXT: f32.mul $push4=, $14, $6
+; NOSIMD-NEXT: f32.add $push5=, $pop4, $22
+; NOSIMD-NEXT: f32.store 20($0), $pop5
+; NOSIMD-NEXT: f32.mul $push6=, $13, $5
+; NOSIMD-NEXT: f32.add $push7=, $pop6, $21
+; NOSIMD-NEXT: f32.store 16($0), $pop7
+; NOSIMD-NEXT: f32.mul $push8=, $12, $4
+; NOSIMD-NEXT: f32.add $push9=, $pop8, $20
+; NOSIMD-NEXT: f32.store 12($0), $pop9
+; NOSIMD-NEXT: f32.mul $push10=, $11, $3
+; NOSIMD-NEXT: f32.add $push11=, $pop10, $19
+; NOSIMD-NEXT: f32.store 8($0), $pop11
+; NOSIMD-NEXT: f32.mul $push12=, $10, $2
+; NOSIMD-NEXT: f32.add $push13=, $pop12, $18
+; NOSIMD-NEXT: f32.store 4($0), $pop13
+; NOSIMD-NEXT: f32.mul $push14=, $9, $1
+; NOSIMD-NEXT: f32.add $push15=, $pop14, $17
+; NOSIMD-NEXT: f32.store 0($0), $pop15
+; NOSIMD-NEXT: return
%mul = fmul contract <8 x float> %b, %a
%add = fadd contract <8 x float> %mul, %c
ret <8 x float> %add
}
-
define <2 x double> @fadd_fmul_contract_2xf64(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
; RELAXED-LABEL: fadd_fmul_contract_2xf64:
; RELAXED: .functype fadd_fmul_contract_2xf64 (v128, v128, v128) -> (v128)
; RELAXED-NEXT: # %bb.0:
-; RELAXED-NEXT: f64x2.relaxed_madd $push0=, $2, $1, $0
+; RELAXED-NEXT: f64x2.relaxed_madd $push0=, $1, $0, $2
; RELAXED-NEXT: return $pop0
;
; STRICT-LABEL: fadd_fmul_contract_2xf64:
@@ -211,28 +1341,64 @@ define <2 x double> @fadd_fmul_contract_2xf64(<2 x double> %a, <2 x double> %b,
; STRICT-NEXT: f64x2.mul $push0=, $1, $0
; STRICT-NEXT: f64x2.add $push1=, $pop0, $2
; STRICT-NEXT: return $pop1
+;
+; NOFP16-LABEL: fadd_fmul_contract_2xf64:
+; NOFP16: .functype fadd_fmul_contract_2xf64 (v128, v128, v128) -> (v128)
+; NOFP16-NEXT: # %bb.0:
+; NOFP16-NEXT: f64x2.mul $push0=, $1, $0
+; NOFP16-NEXT: f64x2.add $push1=, $pop0, $2
+; NOFP16-NEXT: return $pop1
+;
+; NOSIMD-LABEL: fadd_fmul_contract_2xf64:
+; NOSIMD: .functype fadd_fmul_contract_2xf64 (i32, f64, f64, f64, f64, f64, f64) -> ()
+; NOSIMD-NEXT: # %bb.0:
+; NOSIMD-NEXT: f64.mul $push0=, $4, $2
+; NOSIMD-NEXT: f64.add $push1=, $pop0, $6
+; NOSIMD-NEXT: f64.store 8($0), $pop1
+; NOSIMD-NEXT: f64.mul $push2=, $3, $1
+; NOSIMD-NEXT: f64.add $push3=, $pop2, $5
+; NOSIMD-NEXT: f64.store 0($0), $pop3
+; NOSIMD-NEXT: return
%mul = fmul contract <2 x double> %b, %a
%add = fadd contract <2 x double> %mul, %c
ret <2 x double> %add
}
-define float @fadd_fmul_contract_f32(float %a, float %b, float %c) {
-; RELAXED-LABEL: fadd_fmul_contract_f32:
-; RELAXED: .functype fadd_fmul_contract_f32 (f32, f32, f32) -> (f32)
+define <2 x double> @fadd_fmul_2xf64(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
+; RELAXED-LABEL: fadd_fmul_2xf64:
+; RELAXED: .functype fadd_fmul_2xf64 (v128, v128, v128) -> (v128)
; RELAXED-NEXT: # %bb.0:
-; RELAXED-NEXT: f32.mul $push0=, $1, $0
-; RELAXED-NEXT: f32.add $push1=, $pop0, $2
+; RELAXED-NEXT: f64x2.mul $push0=, $1, $0
+; RELAXED-NEXT: f64x2.add $push1=, $pop0, $2
; RELAXED-NEXT: return $pop1
;
-; STRICT-LABEL: fadd_fmul_contract_f32:
-; STRICT: .functype fadd_fmul_contract_f32 (f32, f32, f32) -> (f32)
+; STRICT-LABEL: fadd_fmul_2xf64:
+; STRICT: .functype fadd_fmul_2xf64 (v128, v128, v128) -> (v128)
; STRICT-NEXT: # %bb.0:
-; STRICT-NEXT: f32.mul $push0=, $1, $0
-; STRICT-NEXT: f32.add $push1=, $pop0, $2
+; STRICT-NEXT: f64x2.mul $push0=, $1, $0
+; STRICT-NEXT: f64x2.add $push1=, $pop0, $2
; STRICT-NEXT: return $pop1
- %mul = fmul contract float %b, %a
- %add = fadd contract float %mul, %c
- ret float %add
+;
+; NOFP16-LABEL: fadd_fmul_2xf64:
+; NOFP16: .functype fadd_fmul_2xf64 (v128, v128, v128) -> (v128)
+; NOFP16-NEXT: # %bb.0:
+; NOFP16-NEXT: f64x2.mul $push0=, $1, $0
+; NOFP16-NEXT: f64x2.add $push1=, $pop0, $2
+; NOFP16-NEXT: return $pop1
+;
+; NOSIMD-LABEL: fadd_fmul_2xf64:
+; NOSIMD: .functype fadd_fmul_2xf64 (i32, f64, f64, f64, f64, f64, f64) -> ()
+; NOSIMD-NEXT: # %bb.0:
+; NOSIMD-NEXT: f64.mul $push0=, $4, $2
+; NOSIMD-NEXT: f64.add $push1=, $pop0, $6
+; NOSIMD-NEXT: f64.store 8($0), $pop1
+; NOSIMD-NEXT: f64.mul $push2=, $3, $1
+; NOSIMD-NEXT: f64.add $push3=, $pop2, $5
+; NOSIMD-NEXT: f64.store 0($0), $pop3
+; NOSIMD-NEXT: return
+ %mul = fmul <2 x double> %b, %a
+ %add = fadd <2 x double> %mul, %c
+ ret <2 x double> %add
}
define float @fma_f32(float %a, float %b, float %c) {
@@ -247,6 +1413,18 @@ define float @fma_f32(float %a, float %b, float %c) {
; STRICT-NEXT: # %bb.0:
; STRICT-NEXT: call $push0=, fmaf, $0, $1, $2
; STRICT-NEXT: return $pop0
+;
+; NOFP16-LABEL: fma_f32:
+; NOFP16: .functype fma_f32 (f32, f32, f32) -> (f32)
+; NOFP16-NEXT: # %bb.0:
+; NOFP16-NEXT: call $push0=, fmaf, $0, $1, $2
+; NOFP16-NEXT: return $pop0
+;
+; NOSIMD-LABEL: fma_f32:
+; NOSIMD: .functype fma_f32 (f32, f32, f32) -> (f32)
+; NOSIMD-NEXT: # %bb.0:
+; NOSIMD-NEXT: call $push0=, fmaf, $0, $1, $2
+; NOSIMD-NEXT: return $pop0
%fma = call float @llvm.fma(float %a, float %b, float %c)
ret float %fma
}
@@ -263,6 +1441,18 @@ define double @fma_f64(double %a, double %b, double %c) {
; STRICT-NEXT: # %bb.0:
; STRICT-NEXT: call $push0=, fma, $0, $1, $2
; STRICT-NEXT: return $pop0
+;
+; NOFP16-LABEL: fma_f64:
+; NOFP16: .functype fma_f64 (f64, f64, f64) -> (f64)
+; NOFP16-NEXT: # %bb.0:
+; NOFP16-NEXT: call $push0=, fma, $0, $1, $2
+; NOFP16-NEXT: return $pop0
+;
+; NOSIMD-LABEL: fma_f64:
+; NOSIMD: .functype fma_f64 (f64, f64, f64) -> (f64)
+; NOSIMD-NEXT: # %bb.0:
+; NOSIMD-NEXT: call $push0=, fma, $0, $1, $2
+; NOSIMD-NEXT: return $pop0
%fma = call double @llvm.fma(double %a, double %b, double %c)
ret double %fma
}
diff --git a/llvm/test/CodeGen/WebAssembly/simd-relaxed-fnma.ll b/llvm/test/CodeGen/WebAssembly/simd-relaxed-fnma.ll
index 6e2d860c3f152..e658ac87a30c0 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-relaxed-fnma.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-relaxed-fnma.ll
@@ -27,7 +27,7 @@ define <4 x float> @fsub_fmul_contract_4xf32(<4 x float> %a, <4 x float> %b, <4
; RELAXED-LABEL: fsub_fmul_contract_4xf32:
; RELAXED: .functype fsub_fmul_contract_4xf32 (v128, v128, v128) -> (v128)
; RELAXED-NEXT: # %bb.0:
-; RELAXED-NEXT: f32x4.relaxed_nmadd $push0=, $2, $1, $0
+; RELAXED-NEXT: f32x4.relaxed_nmadd $push0=, $1, $0, $2
; RELAXED-NEXT: return $pop0
;
; STRICT-LABEL: fsub_fmul_contract_4xf32:
@@ -46,7 +46,7 @@ define <8 x half> @fsub_fmul_contract_8xf16(<8 x half> %a, <8 x half> %b, <8 x h
; RELAXED-LABEL: fsub_fmul_contract_8xf16:
; RELAXED: .functype fsub_fmul_contract_8xf16 (v128, v128, v128) -> (v128)
; RELAXED-NEXT: # %bb.0:
-; RELAXED-NEXT: f16x8.relaxed_nmadd $push0=, $2, $1, $0
+; RELAXED-NEXT: f16x8.relaxed_nmadd $push0=, $1, $0, $2
; RELAXED-NEXT: return $pop0
;
; STRICT-LABEL: fsub_fmul_contract_8xf16:
@@ -84,9 +84,9 @@ define <8 x float> @fsub_fmul_contract_8xf32(<8 x float> %a, <8 x float> %b, <8
; RELAXED-LABEL: fsub_fmul_contract_8xf32:
; RELAXED: .functype fsub_fmul_contract_8xf32 (i32, v128, v128, v128, v128, v128, v128) -> ()
; RELAXED-NEXT: # %bb.0:
-; RELAXED-NEXT: f32x4.relaxed_nmadd $push0=, $6, $4, $2
+; RELAXED-NEXT: f32x4.relaxed_nmadd $push0=, $4, $2, $6
; RELAXED-NEXT: v128.store 16($0), $pop0
-; RELAXED-NEXT: f32x4.relaxed_nmadd $push1=, $5, $3, $1
+; RELAXED-NEXT: f32x4.relaxed_nmadd $push1=, $3, $1, $5
; RELAXED-NEXT: v128.store 0($0), $pop1
; RELAXED-NEXT: return
;
@@ -110,7 +110,7 @@ define <2 x double> @fsub_fmul_contract_2xf64(<2 x double> %a, <2 x double> %b,
; RELAXED-LABEL: fsub_fmul_contract_2xf64:
; RELAXED: .functype fsub_fmul_contract_2xf64 (v128, v128, v128) -> (v128)
; RELAXED-NEXT: # %bb.0:
-; RELAXED-NEXT: f64x2.relaxed_nmadd $push0=, $2, $1, $0
+; RELAXED-NEXT: f64x2.relaxed_nmadd $push0=, $1, $0, $2
; RELAXED-NEXT: return $pop0
;
; STRICT-LABEL: fsub_fmul_contract_2xf64:
@@ -143,3 +143,56 @@ define float @fsub_fmul_contract_f32(float %a, float %b, float %c) {
ret float %sub
}
+define <8 x half> @fmuladd_8xf16(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
+; RELAXED-LABEL: fmuladd_8xf16:
+; RELAXED: .functype fmuladd_8xf16 (v128, v128, v128) -> (v128)
+; RELAXED-NEXT: # %bb.0:
+; RELAXED-NEXT: f16x8.relaxed_nmadd $push0=, $0, $1, $2
+; RELAXED-NEXT: return $pop0
+;
+; STRICT-LABEL: fmuladd_8xf16:
+; STRICT: .functype fmuladd_8xf16 (v128, v128, v128) -> (v128)
+; STRICT-NEXT: # %bb.0:
+; STRICT-NEXT: f16x8.mul $push0=, $0, $1
+; STRICT-NEXT: f16x8.sub $push1=, $2, $pop0
+; STRICT-NEXT: return $pop1
+ %fneg = fneg <8 x half> %a
+ %fma = call <8 x half> @llvm.fmuladd(<8 x half> %fneg, <8 x half> %b, <8 x half> %c)
+ ret <8 x half> %fma
+}
+
+define <4 x float> @fmuladd_4xf32(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
+; RELAXED-LABEL: fmuladd_4xf32:
+; RELAXED: .functype fmuladd_4xf32 (v128, v128, v128) -> (v128)
+; RELAXED-NEXT: # %bb.0:
+; RELAXED-NEXT: f32x4.relaxed_nmadd $push0=, $0, $1, $2
+; RELAXED-NEXT: return $pop0
+;
+; STRICT-LABEL: fmuladd_4xf32:
+; STRICT: .functype fmuladd_4xf32 (v128, v128, v128) -> (v128)
+; STRICT-NEXT: # %bb.0:
+; STRICT-NEXT: f32x4.mul $push0=, $0, $1
+; STRICT-NEXT: f32x4.sub $push1=, $2, $pop0
+; STRICT-NEXT: return $pop1
+ %fneg = fneg <4 x float> %a
+ %fma = call <4 x float> @llvm.fmuladd(<4 x float> %fneg, <4 x float> %b, <4 x float> %c)
+ ret <4 x float> %fma
+}
+
+define <2 x double> @fmuladd_2xf64(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
+; RELAXED-LABEL: fmuladd_2xf64:
+; RELAXED: .functype fmuladd_2xf64 (v128, v128, v128) -> (v128)
+; RELAXED-NEXT: # %bb.0:
+; RELAXED-NEXT: f64x2.relaxed_nmadd $push0=, $0, $1, $2
+; RELAXED-NEXT: return $pop0
+;
+; STRICT-LABEL: fmuladd_2xf64:
+; STRICT: .functype fmuladd_2xf64 (v128, v128, v128) -> (v128)
+; STRICT-NEXT: # %bb.0:
+; STRICT-NEXT: f64x2.mul $push0=, $0, $1
+; STRICT-NEXT: f64x2.sub $push1=, $2, $pop0
+; STRICT-NEXT: return $pop1
+ %fneg = fneg <2 x double> %a
+ %fma = call <2 x double> @llvm.fmuladd(<2 x double> %fneg, <2 x double> %b, <2 x double> %c)
+ ret <2 x double> %fma
+}
More information about the llvm-commits
mailing list