[llvm] 30d3441 - Revert "[WebAssembly] Lower fmuladd to madd and nmadd" (#163171)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 13 03:53:45 PDT 2025
Author: Sam Parker
Date: 2025-10-13T11:53:40+01:00
New Revision: 30d3441cf082c3672cd7b8495cb8cc1d6ca2c8e0
URL: https://github.com/llvm/llvm-project/commit/30d3441cf082c3672cd7b8495cb8cc1d6ca2c8e0
DIFF: https://github.com/llvm/llvm-project/commit/30d3441cf082c3672cd7b8495cb8cc1d6ca2c8e0.diff
LOG: Revert "[WebAssembly] Lower fmuladd to madd and nmadd" (#163171)
Reverts llvm/llvm-project#161355
Looks like I've broken some intrinsic code generation.
Added:
Modified:
llvm/include/llvm/CodeGen/ISDOpcodes.h
llvm/include/llvm/Target/TargetSelectionDAG.td
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
llvm/lib/CodeGen/TargetLoweringBase.cpp
llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
llvm/test/CodeGen/WebAssembly/simd-relaxed-fma.ll
llvm/test/CodeGen/WebAssembly/simd-relaxed-fnma.ll
llvm/test/MC/WebAssembly/simd-encodings.s
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h
index ff3dd0d4c3c51..c76c83d84b3c7 100644
--- a/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -514,12 +514,6 @@ enum NodeType {
/// separately rounded operations.
FMAD,
- /// FMULADD - Performs a * b + c, with, or without, intermediate rounding.
- /// It is expected that this will be illegal for most targets, as it usually
- /// makes sense to split this or use an FMA. But some targets, such as
- /// WebAssembly, can directly support these semantics.
- FMULADD,
-
/// FCOPYSIGN(X, Y) - Return the value of X with the sign of Y. NOTE: This
/// DAG node does not require that X and Y have the same type, just that
/// they are both floating point. X and the result must have the same type.
diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td
index 07a858fd682fc..632be7ad9e350 100644
--- a/llvm/include/llvm/Target/TargetSelectionDAG.td
+++ b/llvm/include/llvm/Target/TargetSelectionDAG.td
@@ -535,7 +535,6 @@ def fdiv : SDNode<"ISD::FDIV" , SDTFPBinOp>;
def frem : SDNode<"ISD::FREM" , SDTFPBinOp>;
def fma : SDNode<"ISD::FMA" , SDTFPTernaryOp, [SDNPCommutative]>;
def fmad : SDNode<"ISD::FMAD" , SDTFPTernaryOp, [SDNPCommutative]>;
-def fmuladd : SDNode<"ISD::FMULADD" , SDTFPTernaryOp, [SDNPCommutative]>;
def fabs : SDNode<"ISD::FABS" , SDTFPUnaryOp>;
def fminnum : SDNode<"ISD::FMINNUM" , SDTFPBinOp,
[SDNPCommutative, SDNPAssociative]>;
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index e15384202f758..b1accdd066dfd 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -509,7 +509,6 @@ namespace {
SDValue visitFMUL(SDNode *N);
template <class MatchContextClass> SDValue visitFMA(SDNode *N);
SDValue visitFMAD(SDNode *N);
- SDValue visitFMULADD(SDNode *N);
SDValue visitFDIV(SDNode *N);
SDValue visitFREM(SDNode *N);
SDValue visitFSQRT(SDNode *N);
@@ -1992,7 +1991,6 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::FMUL: return visitFMUL(N);
case ISD::FMA: return visitFMA<EmptyMatchContext>(N);
case ISD::FMAD: return visitFMAD(N);
- case ISD::FMULADD: return visitFMULADD(N);
case ISD::FDIV: return visitFDIV(N);
case ISD::FREM: return visitFREM(N);
case ISD::FSQRT: return visitFSQRT(N);
@@ -18446,21 +18444,6 @@ SDValue DAGCombiner::visitFMAD(SDNode *N) {
return SDValue();
}
-SDValue DAGCombiner::visitFMULADD(SDNode *N) {
- SDValue N0 = N->getOperand(0);
- SDValue N1 = N->getOperand(1);
- SDValue N2 = N->getOperand(2);
- EVT VT = N->getValueType(0);
- SDLoc DL(N);
-
- // Constant fold FMULADD.
- if (SDValue C =
- DAG.FoldConstantArithmetic(ISD::FMULADD, DL, VT, {N0, N1, N2}))
- return C;
-
- return SDValue();
-}
-
// Combine multiple FDIVs with the same divisor into multiple FMULs by the
// reciprocal.
// E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index c1fd052d01f31..08af74c258899 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5786,7 +5786,6 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
case ISD::FCOPYSIGN:
case ISD::FMA:
case ISD::FMAD:
- case ISD::FMULADD:
case ISD::FP_EXTEND:
case ISD::FP_TO_SINT_SAT:
case ISD::FP_TO_UINT_SAT:
@@ -5905,7 +5904,6 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, const APInt &DemandedElts,
case ISD::FCOSH:
case ISD::FTANH:
case ISD::FMA:
- case ISD::FMULADD:
case ISD::FMAD: {
if (SNaN)
return true;
@@ -7233,7 +7231,7 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
}
// Handle fma/fmad special cases.
- if (Opcode == ISD::FMA || Opcode == ISD::FMAD || Opcode == ISD::FMULADD) {
+ if (Opcode == ISD::FMA || Opcode == ISD::FMAD) {
assert(VT.isFloatingPoint() && "This operator only applies to FP types!");
assert(Ops[0].getValueType() == VT && Ops[1].getValueType() == VT &&
Ops[2].getValueType() == VT && "FMA types must match!");
@@ -7244,7 +7242,7 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
APFloat V1 = C1->getValueAPF();
const APFloat &V2 = C2->getValueAPF();
const APFloat &V3 = C3->getValueAPF();
- if (Opcode == ISD::FMAD || Opcode == ISD::FMULADD) {
+ if (Opcode == ISD::FMAD) {
V1.multiply(V2, APFloat::rmNearestTiesToEven);
V1.add(V3, APFloat::rmNearestTiesToEven);
} else
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 0f2b5188fc10a..c21890a0d856f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -6996,13 +6996,6 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
getValue(I.getArgOperand(0)),
getValue(I.getArgOperand(1)),
getValue(I.getArgOperand(2)), Flags));
- } else if (TLI.isOperationLegalOrCustom(ISD::FMULADD, VT)) {
- // TODO: Support splitting the vector.
- setValue(&I, DAG.getNode(ISD::FMULADD, sdl,
- getValue(I.getArgOperand(0)).getValueType(),
- getValue(I.getArgOperand(0)),
- getValue(I.getArgOperand(1)),
- getValue(I.getArgOperand(2)), Flags));
} else {
// TODO: Intrinsic calls should have fast-math-flags.
SDValue Mul = DAG.getNode(
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 39cbfad6d0be1..fcfbfe6c461d3 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -310,7 +310,6 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::FMA: return "fma";
case ISD::STRICT_FMA: return "strict_fma";
case ISD::FMAD: return "fmad";
- case ISD::FMULADD: return "fmuladd";
case ISD::FREM: return "frem";
case ISD::STRICT_FREM: return "strict_frem";
case ISD::FCOPYSIGN: return "fcopysign";
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 920dff935daed..cc503d324e74b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -7676,7 +7676,6 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
break;
}
case ISD::FMA:
- case ISD::FMULADD:
case ISD::FMAD: {
if (!Flags.hasNoSignedZeros())
break;
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 060b1ddc2ef39..c23281a820b2b 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -815,8 +815,7 @@ void TargetLoweringBase::initActions() {
ISD::FTAN, ISD::FACOS,
ISD::FASIN, ISD::FATAN,
ISD::FCOSH, ISD::FSINH,
- ISD::FTANH, ISD::FATAN2,
- ISD::FMULADD},
+ ISD::FTANH, ISD::FATAN2},
VT, Expand);
// Overflow operations default to expand
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 47c24fc27f1d6..64723340051b8 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -317,15 +317,6 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, T, Custom);
}
- if (Subtarget->hasFP16()) {
- setOperationAction(ISD::FMA, MVT::v8f16, Legal);
- }
-
- if (Subtarget->hasRelaxedSIMD()) {
- setOperationAction(ISD::FMULADD, MVT::v4f32, Legal);
- setOperationAction(ISD::FMULADD, MVT::v2f64, Legal);
- }
-
// Partial MLA reductions.
for (auto Op : {ISD::PARTIAL_REDUCE_SMLA, ISD::PARTIAL_REDUCE_UMLA}) {
setPartialReduceMLAAction(Op, MVT::v4i32, MVT::v16i8, Legal);
@@ -1129,18 +1120,6 @@ WebAssemblyTargetLowering::getPreferredVectorAction(MVT VT) const {
return TargetLoweringBase::getPreferredVectorAction(VT);
}
-bool WebAssemblyTargetLowering::isFMAFasterThanFMulAndFAdd(
- const MachineFunction &MF, EVT VT) const {
- if (!Subtarget->hasFP16() || !VT.isVector())
- return false;
-
- EVT ScalarVT = VT.getScalarType();
- if (!ScalarVT.isSimple())
- return false;
-
- return ScalarVT.getSimpleVT().SimpleTy == MVT::f16;
-}
-
bool WebAssemblyTargetLowering::shouldSimplifyDemandedVectorElts(
SDValue Op, const TargetLoweringOpt &TLO) const {
// ISel process runs DAGCombiner after legalization; this step is called
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
index 472ec678534a4..b33a8530310be 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
@@ -81,8 +81,6 @@ class WebAssemblyTargetLowering final : public TargetLowering {
TargetLoweringBase::LegalizeTypeAction
getPreferredVectorAction(MVT VT) const override;
- bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
- EVT VT) const override;
SDValue LowerCall(CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const override;
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
index 72835b3c6424e..49af78bce68c3 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -1626,8 +1626,7 @@ defm "" : RelaxedConvert<I32x4, F64x2, int_wasm_relaxed_trunc_unsigned_zero,
// Relaxed (Negative) Multiply-Add (madd/nmadd)
//===----------------------------------------------------------------------===//
-multiclass RELAXED_SIMDMADD<Vec vec, bits<32> simdopA, bits<32> simdopS,
- list<Predicate> reqs> {
+multiclass SIMDMADD<Vec vec, bits<32> simdopA, bits<32> simdopS, list<Predicate> reqs> {
defm MADD_#vec :
SIMD_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), (outs), (ins),
[(set (vec.vt V128:$dst), (int_wasm_relaxed_madd
@@ -1641,40 +1640,16 @@ multiclass RELAXED_SIMDMADD<Vec vec, bits<32> simdopA, bits<32> simdopS,
vec.prefix#".relaxed_nmadd\t$dst, $a, $b, $c",
vec.prefix#".relaxed_nmadd", simdopS, reqs>;
- def : Pat<(fadd_contract (fmul_contract (vec.vt V128:$a), (vec.vt V128:$b)), (vec.vt V128:$c)),
- (!cast<Instruction>("MADD_"#vec) V128:$a, V128:$b, V128:$c)>, Requires<reqs>;
- def : Pat<(fmuladd (vec.vt V128:$a), (vec.vt V128:$b), (vec.vt V128:$c)),
- (!cast<Instruction>("MADD_"#vec) V128:$a, V128:$b, V128:$c)>, Requires<reqs>;
+ def : Pat<(fadd_contract (vec.vt V128:$a), (fmul_contract (vec.vt V128:$b), (vec.vt V128:$c))),
+ (!cast<Instruction>("MADD_"#vec) V128:$a, V128:$b, V128:$c)>, Requires<[HasRelaxedSIMD]>;
- def : Pat<(fsub_contract (vec.vt V128:$c), (fmul_contract (vec.vt V128:$a), (vec.vt V128:$b))),
- (!cast<Instruction>("NMADD_"#vec) V128:$a, V128:$b, V128:$c)>, Requires<reqs>;
- def : Pat<(fmuladd (fneg (vec.vt V128:$a)), (vec.vt V128:$b), (vec.vt V128:$c)),
- (!cast<Instruction>("NMADD_"#vec) V128:$a, V128:$b, V128:$c)>, Requires<reqs>;
+ def : Pat<(fsub_contract (vec.vt V128:$a), (fmul_contract (vec.vt V128:$b), (vec.vt V128:$c))),
+ (!cast<Instruction>("NMADD_"#vec) V128:$a, V128:$b, V128:$c)>, Requires<[HasRelaxedSIMD]>;
}
-defm "" : RELAXED_SIMDMADD<F32x4, 0x105, 0x106, [HasRelaxedSIMD]>;
-defm "" : RELAXED_SIMDMADD<F64x2, 0x107, 0x108, [HasRelaxedSIMD]>;
-
-//===----------------------------------------------------------------------===//
-// FP16 (Negative) Multiply-Add (madd/nmadd)
-//===----------------------------------------------------------------------===//
-
-multiclass HALF_PRECISION_SIMDMADD<Vec vec, bits<32> simdopA, bits<32> simdopS,
- list<Predicate> reqs> {
- defm MADD_#vec :
- SIMD_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), (outs), (ins),
- [(set (vec.vt V128:$dst), (fma
- (vec.vt V128:$a), (vec.vt V128:$b), (vec.vt V128:$c)))],
- vec.prefix#".madd\t$dst, $a, $b, $c",
- vec.prefix#".madd", simdopA, reqs>;
- defm NMADD_#vec :
- SIMD_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), (outs), (ins),
- [(set (vec.vt V128:$dst), (fma
- (fneg (vec.vt V128:$a)), (vec.vt V128:$b), (vec.vt V128:$c)))],
- vec.prefix#".nmadd\t$dst, $a, $b, $c",
- vec.prefix#".nmadd", simdopS, reqs>;
-}
-defm "" : HALF_PRECISION_SIMDMADD<F16x8, 0x14e, 0x14f, [HasFP16]>;
+defm "" : SIMDMADD<F32x4, 0x105, 0x106, [HasRelaxedSIMD]>;
+defm "" : SIMDMADD<F64x2, 0x107, 0x108, [HasRelaxedSIMD]>;
+defm "" : SIMDMADD<F16x8, 0x14e, 0x14f, [HasFP16]>;
//===----------------------------------------------------------------------===//
// Laneselect
diff --git a/llvm/test/CodeGen/WebAssembly/simd-relaxed-fma.ll b/llvm/test/CodeGen/WebAssembly/simd-relaxed-fma.ll
index 600241aef99d0..e065de38951b1 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-relaxed-fma.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-relaxed-fma.ll
@@ -2,278 +2,9 @@
; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+fp16,+simd128,+relaxed-simd | FileCheck %s --check-prefix=RELAXED
; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+fp16,+simd128, | FileCheck %s --check-prefix=STRICT
-; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 | FileCheck %s --check-prefix=NOFP16
-; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers | FileCheck %s --check-prefix=NOSIMD
target triple = "wasm32"
-define half @fadd_fmul_contract_f16(half %a, half %b, half %c) {
-; RELAXED-LABEL: fadd_fmul_contract_f16:
-; RELAXED: .functype fadd_fmul_contract_f16 (f32, f32, f32) -> (f32)
-; RELAXED-NEXT: # %bb.0:
-; RELAXED-NEXT: call $push0=, __truncsfhf2, $0
-; RELAXED-NEXT: call $push1=, __extendhfsf2, $pop0
-; RELAXED-NEXT: call $push2=, __truncsfhf2, $1
-; RELAXED-NEXT: call $push3=, __extendhfsf2, $pop2
-; RELAXED-NEXT: f32.mul $push4=, $pop1, $pop3
-; RELAXED-NEXT: call $push5=, __truncsfhf2, $2
-; RELAXED-NEXT: call $push6=, __extendhfsf2, $pop5
-; RELAXED-NEXT: f32.add $push7=, $pop4, $pop6
-; RELAXED-NEXT: return $pop7
-;
-; STRICT-LABEL: fadd_fmul_contract_f16:
-; STRICT: .functype fadd_fmul_contract_f16 (f32, f32, f32) -> (f32)
-; STRICT-NEXT: # %bb.0:
-; STRICT-NEXT: call $push0=, __truncsfhf2, $0
-; STRICT-NEXT: call $push1=, __extendhfsf2, $pop0
-; STRICT-NEXT: call $push2=, __truncsfhf2, $1
-; STRICT-NEXT: call $push3=, __extendhfsf2, $pop2
-; STRICT-NEXT: f32.mul $push4=, $pop1, $pop3
-; STRICT-NEXT: call $push5=, __truncsfhf2, $2
-; STRICT-NEXT: call $push6=, __extendhfsf2, $pop5
-; STRICT-NEXT: f32.add $push7=, $pop4, $pop6
-; STRICT-NEXT: return $pop7
-;
-; NOFP16-LABEL: fadd_fmul_contract_f16:
-; NOFP16: .functype fadd_fmul_contract_f16 (f32, f32, f32) -> (f32)
-; NOFP16-NEXT: # %bb.0:
-; NOFP16-NEXT: call $push0=, __truncsfhf2, $0
-; NOFP16-NEXT: call $push1=, __extendhfsf2, $pop0
-; NOFP16-NEXT: call $push2=, __truncsfhf2, $1
-; NOFP16-NEXT: call $push3=, __extendhfsf2, $pop2
-; NOFP16-NEXT: f32.mul $push4=, $pop1, $pop3
-; NOFP16-NEXT: call $push5=, __truncsfhf2, $2
-; NOFP16-NEXT: call $push6=, __extendhfsf2, $pop5
-; NOFP16-NEXT: f32.add $push7=, $pop4, $pop6
-; NOFP16-NEXT: return $pop7
-;
-; NOSIMD-LABEL: fadd_fmul_contract_f16:
-; NOSIMD: .functype fadd_fmul_contract_f16 (f32, f32, f32) -> (f32)
-; NOSIMD-NEXT: # %bb.0:
-; NOSIMD-NEXT: call $push0=, __truncsfhf2, $0
-; NOSIMD-NEXT: call $push1=, __extendhfsf2, $pop0
-; NOSIMD-NEXT: call $push2=, __truncsfhf2, $1
-; NOSIMD-NEXT: call $push3=, __extendhfsf2, $pop2
-; NOSIMD-NEXT: f32.mul $push4=, $pop1, $pop3
-; NOSIMD-NEXT: call $push5=, __truncsfhf2, $2
-; NOSIMD-NEXT: call $push6=, __extendhfsf2, $pop5
-; NOSIMD-NEXT: f32.add $push7=, $pop4, $pop6
-; NOSIMD-NEXT: return $pop7
- %mul = fmul contract half %b, %a
- %add = fadd contract half %mul, %c
- ret half %add
-}
-
-define half @fmuladd_contract_f16(half %a, half %b, half %c) {
-; RELAXED-LABEL: fmuladd_contract_f16:
-; RELAXED: .functype fmuladd_contract_f16 (f32, f32, f32) -> (f32)
-; RELAXED-NEXT: # %bb.0:
-; RELAXED-NEXT: call $push0=, __truncsfhf2, $1
-; RELAXED-NEXT: call $push1=, __extendhfsf2, $pop0
-; RELAXED-NEXT: call $push2=, __truncsfhf2, $0
-; RELAXED-NEXT: call $push3=, __extendhfsf2, $pop2
-; RELAXED-NEXT: f32.mul $push4=, $pop1, $pop3
-; RELAXED-NEXT: call $push5=, __truncsfhf2, $2
-; RELAXED-NEXT: call $push6=, __extendhfsf2, $pop5
-; RELAXED-NEXT: f32.add $push7=, $pop4, $pop6
-; RELAXED-NEXT: return $pop7
-;
-; STRICT-LABEL: fmuladd_contract_f16:
-; STRICT: .functype fmuladd_contract_f16 (f32, f32, f32) -> (f32)
-; STRICT-NEXT: # %bb.0:
-; STRICT-NEXT: call $push0=, __truncsfhf2, $1
-; STRICT-NEXT: call $push1=, __extendhfsf2, $pop0
-; STRICT-NEXT: call $push2=, __truncsfhf2, $0
-; STRICT-NEXT: call $push3=, __extendhfsf2, $pop2
-; STRICT-NEXT: f32.mul $push4=, $pop1, $pop3
-; STRICT-NEXT: call $push5=, __truncsfhf2, $2
-; STRICT-NEXT: call $push6=, __extendhfsf2, $pop5
-; STRICT-NEXT: f32.add $push7=, $pop4, $pop6
-; STRICT-NEXT: return $pop7
-;
-; NOFP16-LABEL: fmuladd_contract_f16:
-; NOFP16: .functype fmuladd_contract_f16 (f32, f32, f32) -> (f32)
-; NOFP16-NEXT: # %bb.0:
-; NOFP16-NEXT: call $push0=, __truncsfhf2, $1
-; NOFP16-NEXT: call $push1=, __extendhfsf2, $pop0
-; NOFP16-NEXT: call $push2=, __truncsfhf2, $0
-; NOFP16-NEXT: call $push3=, __extendhfsf2, $pop2
-; NOFP16-NEXT: f32.mul $push4=, $pop1, $pop3
-; NOFP16-NEXT: call $push5=, __truncsfhf2, $2
-; NOFP16-NEXT: call $push6=, __extendhfsf2, $pop5
-; NOFP16-NEXT: f32.add $push7=, $pop4, $pop6
-; NOFP16-NEXT: return $pop7
-;
-; NOSIMD-LABEL: fmuladd_contract_f16:
-; NOSIMD: .functype fmuladd_contract_f16 (f32, f32, f32) -> (f32)
-; NOSIMD-NEXT: # %bb.0:
-; NOSIMD-NEXT: call $push0=, __truncsfhf2, $1
-; NOSIMD-NEXT: call $push1=, __extendhfsf2, $pop0
-; NOSIMD-NEXT: call $push2=, __truncsfhf2, $0
-; NOSIMD-NEXT: call $push3=, __extendhfsf2, $pop2
-; NOSIMD-NEXT: f32.mul $push4=, $pop1, $pop3
-; NOSIMD-NEXT: call $push5=, __truncsfhf2, $2
-; NOSIMD-NEXT: call $push6=, __extendhfsf2, $pop5
-; NOSIMD-NEXT: f32.add $push7=, $pop4, $pop6
-; NOSIMD-NEXT: return $pop7
- %fma = call contract half @llvm.fmuladd(half %a, half %b, half %c)
- ret half %fma
-}
-
-define half @fmuladd_f16(half %a, half %b, half %c) {
-; RELAXED-LABEL: fmuladd_f16:
-; RELAXED: .functype fmuladd_f16 (f32, f32, f32) -> (f32)
-; RELAXED-NEXT: # %bb.0:
-; RELAXED-NEXT: call $push0=, __truncsfhf2, $1
-; RELAXED-NEXT: call $push1=, __extendhfsf2, $pop0
-; RELAXED-NEXT: call $push2=, __truncsfhf2, $0
-; RELAXED-NEXT: call $push3=, __extendhfsf2, $pop2
-; RELAXED-NEXT: f32.mul $push4=, $pop1, $pop3
-; RELAXED-NEXT: call $push5=, __truncsfhf2, $2
-; RELAXED-NEXT: call $push6=, __extendhfsf2, $pop5
-; RELAXED-NEXT: f32.add $push7=, $pop4, $pop6
-; RELAXED-NEXT: return $pop7
-;
-; STRICT-LABEL: fmuladd_f16:
-; STRICT: .functype fmuladd_f16 (f32, f32, f32) -> (f32)
-; STRICT-NEXT: # %bb.0:
-; STRICT-NEXT: call $push0=, __truncsfhf2, $1
-; STRICT-NEXT: call $push1=, __extendhfsf2, $pop0
-; STRICT-NEXT: call $push2=, __truncsfhf2, $0
-; STRICT-NEXT: call $push3=, __extendhfsf2, $pop2
-; STRICT-NEXT: f32.mul $push4=, $pop1, $pop3
-; STRICT-NEXT: call $push5=, __truncsfhf2, $2
-; STRICT-NEXT: call $push6=, __extendhfsf2, $pop5
-; STRICT-NEXT: f32.add $push7=, $pop4, $pop6
-; STRICT-NEXT: return $pop7
-;
-; NOFP16-LABEL: fmuladd_f16:
-; NOFP16: .functype fmuladd_f16 (f32, f32, f32) -> (f32)
-; NOFP16-NEXT: # %bb.0:
-; NOFP16-NEXT: call $push0=, __truncsfhf2, $1
-; NOFP16-NEXT: call $push1=, __extendhfsf2, $pop0
-; NOFP16-NEXT: call $push2=, __truncsfhf2, $0
-; NOFP16-NEXT: call $push3=, __extendhfsf2, $pop2
-; NOFP16-NEXT: f32.mul $push4=, $pop1, $pop3
-; NOFP16-NEXT: call $push5=, __truncsfhf2, $2
-; NOFP16-NEXT: call $push6=, __extendhfsf2, $pop5
-; NOFP16-NEXT: f32.add $push7=, $pop4, $pop6
-; NOFP16-NEXT: return $pop7
-;
-; NOSIMD-LABEL: fmuladd_f16:
-; NOSIMD: .functype fmuladd_f16 (f32, f32, f32) -> (f32)
-; NOSIMD-NEXT: # %bb.0:
-; NOSIMD-NEXT: call $push0=, __truncsfhf2, $1
-; NOSIMD-NEXT: call $push1=, __extendhfsf2, $pop0
-; NOSIMD-NEXT: call $push2=, __truncsfhf2, $0
-; NOSIMD-NEXT: call $push3=, __extendhfsf2, $pop2
-; NOSIMD-NEXT: f32.mul $push4=, $pop1, $pop3
-; NOSIMD-NEXT: call $push5=, __truncsfhf2, $2
-; NOSIMD-NEXT: call $push6=, __extendhfsf2, $pop5
-; NOSIMD-NEXT: f32.add $push7=, $pop4, $pop6
-; NOSIMD-NEXT: return $pop7
- %fma = call half @llvm.fmuladd(half %a, half %b, half %c)
- ret half %fma
-}
-
-
-define float @fadd_fmul_contract_f32(float %a, float %b, float %c) {
-; RELAXED-LABEL: fadd_fmul_contract_f32:
-; RELAXED: .functype fadd_fmul_contract_f32 (f32, f32, f32) -> (f32)
-; RELAXED-NEXT: # %bb.0:
-; RELAXED-NEXT: f32.mul $push0=, $1, $0
-; RELAXED-NEXT: f32.add $push1=, $pop0, $2
-; RELAXED-NEXT: return $pop1
-;
-; STRICT-LABEL: fadd_fmul_contract_f32:
-; STRICT: .functype fadd_fmul_contract_f32 (f32, f32, f32) -> (f32)
-; STRICT-NEXT: # %bb.0:
-; STRICT-NEXT: f32.mul $push0=, $1, $0
-; STRICT-NEXT: f32.add $push1=, $pop0, $2
-; STRICT-NEXT: return $pop1
-;
-; NOFP16-LABEL: fadd_fmul_contract_f32:
-; NOFP16: .functype fadd_fmul_contract_f32 (f32, f32, f32) -> (f32)
-; NOFP16-NEXT: # %bb.0:
-; NOFP16-NEXT: f32.mul $push0=, $1, $0
-; NOFP16-NEXT: f32.add $push1=, $pop0, $2
-; NOFP16-NEXT: return $pop1
-;
-; NOSIMD-LABEL: fadd_fmul_contract_f32:
-; NOSIMD: .functype fadd_fmul_contract_f32 (f32, f32, f32) -> (f32)
-; NOSIMD-NEXT: # %bb.0:
-; NOSIMD-NEXT: f32.mul $push0=, $1, $0
-; NOSIMD-NEXT: f32.add $push1=, $pop0, $2
-; NOSIMD-NEXT: return $pop1
- %mul = fmul contract float %b, %a
- %add = fadd contract float %mul, %c
- ret float %add
-}
-
-define float @fmuladd_contract_f32(float %a, float %b, float %c) {
-; RELAXED-LABEL: fmuladd_contract_f32:
-; RELAXED: .functype fmuladd_contract_f32 (f32, f32, f32) -> (f32)
-; RELAXED-NEXT: # %bb.0:
-; RELAXED-NEXT: f32.mul $push0=, $0, $1
-; RELAXED-NEXT: f32.add $push1=, $pop0, $2
-; RELAXED-NEXT: return $pop1
-;
-; STRICT-LABEL: fmuladd_contract_f32:
-; STRICT: .functype fmuladd_contract_f32 (f32, f32, f32) -> (f32)
-; STRICT-NEXT: # %bb.0:
-; STRICT-NEXT: f32.mul $push0=, $0, $1
-; STRICT-NEXT: f32.add $push1=, $pop0, $2
-; STRICT-NEXT: return $pop1
-;
-; NOFP16-LABEL: fmuladd_contract_f32:
-; NOFP16: .functype fmuladd_contract_f32 (f32, f32, f32) -> (f32)
-; NOFP16-NEXT: # %bb.0:
-; NOFP16-NEXT: f32.mul $push0=, $0, $1
-; NOFP16-NEXT: f32.add $push1=, $pop0, $2
-; NOFP16-NEXT: return $pop1
-;
-; NOSIMD-LABEL: fmuladd_contract_f32:
-; NOSIMD: .functype fmuladd_contract_f32 (f32, f32, f32) -> (f32)
-; NOSIMD-NEXT: # %bb.0:
-; NOSIMD-NEXT: f32.mul $push0=, $0, $1
-; NOSIMD-NEXT: f32.add $push1=, $pop0, $2
-; NOSIMD-NEXT: return $pop1
- %fma = call contract float @llvm.fmuladd(float %a, float %b, float %c)
- ret float %fma
-}
-
-define float @fmuladd_f32(float %a, float %b, float %c) {
-; RELAXED-LABEL: fmuladd_f32:
-; RELAXED: .functype fmuladd_f32 (f32, f32, f32) -> (f32)
-; RELAXED-NEXT: # %bb.0:
-; RELAXED-NEXT: f32.mul $push0=, $0, $1
-; RELAXED-NEXT: f32.add $push1=, $pop0, $2
-; RELAXED-NEXT: return $pop1
-;
-; STRICT-LABEL: fmuladd_f32:
-; STRICT: .functype fmuladd_f32 (f32, f32, f32) -> (f32)
-; STRICT-NEXT: # %bb.0:
-; STRICT-NEXT: f32.mul $push0=, $0, $1
-; STRICT-NEXT: f32.add $push1=, $pop0, $2
-; STRICT-NEXT: return $pop1
-;
-; NOFP16-LABEL: fmuladd_f32:
-; NOFP16: .functype fmuladd_f32 (f32, f32, f32) -> (f32)
-; NOFP16-NEXT: # %bb.0:
-; NOFP16-NEXT: f32.mul $push0=, $0, $1
-; NOFP16-NEXT: f32.add $push1=, $pop0, $2
-; NOFP16-NEXT: return $pop1
-;
-; NOSIMD-LABEL: fmuladd_f32:
-; NOSIMD: .functype fmuladd_f32 (f32, f32, f32) -> (f32)
-; NOSIMD-NEXT: # %bb.0:
-; NOSIMD-NEXT: f32.mul $push0=, $0, $1
-; NOSIMD-NEXT: f32.add $push1=, $pop0, $2
-; NOSIMD-NEXT: return $pop1
- %fma = call float @llvm.fmuladd(float %a, float %b, float %c)
- ret float %fma
-}
-
define double @fadd_fmul_contract_f64(double %a, double %b, double %c) {
; RELAXED-LABEL: fadd_fmul_contract_f64:
; RELAXED: .functype fadd_fmul_contract_f64 (f64, f64, f64) -> (f64)
@@ -288,94 +19,16 @@ define double @fadd_fmul_contract_f64(double %a, double %b, double %c) {
; STRICT-NEXT: f64.mul $push0=, $1, $0
; STRICT-NEXT: f64.add $push1=, $pop0, $2
; STRICT-NEXT: return $pop1
-;
-; NOFP16-LABEL: fadd_fmul_contract_f64:
-; NOFP16: .functype fadd_fmul_contract_f64 (f64, f64, f64) -> (f64)
-; NOFP16-NEXT: # %bb.0:
-; NOFP16-NEXT: f64.mul $push0=, $1, $0
-; NOFP16-NEXT: f64.add $push1=, $pop0, $2
-; NOFP16-NEXT: return $pop1
-;
-; NOSIMD-LABEL: fadd_fmul_contract_f64:
-; NOSIMD: .functype fadd_fmul_contract_f64 (f64, f64, f64) -> (f64)
-; NOSIMD-NEXT: # %bb.0:
-; NOSIMD-NEXT: f64.mul $push0=, $1, $0
-; NOSIMD-NEXT: f64.add $push1=, $pop0, $2
-; NOSIMD-NEXT: return $pop1
%mul = fmul contract double %b, %a
%add = fadd contract double %mul, %c
ret double %add
}
-define double @fmuladd_f64(double %a, double %b, double %c) {
-; RELAXED-LABEL: fmuladd_f64:
-; RELAXED: .functype fmuladd_f64 (f64, f64, f64) -> (f64)
-; RELAXED-NEXT: # %bb.0:
-; RELAXED-NEXT: f64.mul $push0=, $0, $1
-; RELAXED-NEXT: f64.add $push1=, $pop0, $2
-; RELAXED-NEXT: return $pop1
-;
-; STRICT-LABEL: fmuladd_f64:
-; STRICT: .functype fmuladd_f64 (f64, f64, f64) -> (f64)
-; STRICT-NEXT: # %bb.0:
-; STRICT-NEXT: f64.mul $push0=, $0, $1
-; STRICT-NEXT: f64.add $push1=, $pop0, $2
-; STRICT-NEXT: return $pop1
-;
-; NOFP16-LABEL: fmuladd_f64:
-; NOFP16: .functype fmuladd_f64 (f64, f64, f64) -> (f64)
-; NOFP16-NEXT: # %bb.0:
-; NOFP16-NEXT: f64.mul $push0=, $0, $1
-; NOFP16-NEXT: f64.add $push1=, $pop0, $2
-; NOFP16-NEXT: return $pop1
-;
-; NOSIMD-LABEL: fmuladd_f64:
-; NOSIMD: .functype fmuladd_f64 (f64, f64, f64) -> (f64)
-; NOSIMD-NEXT: # %bb.0:
-; NOSIMD-NEXT: f64.mul $push0=, $0, $1
-; NOSIMD-NEXT: f64.add $push1=, $pop0, $2
-; NOSIMD-NEXT: return $pop1
- %fma = call double @llvm.fmuladd(double %a, double %b, double %c)
- ret double %fma
-}
-
-define double @fmuladd_contract_f64(double %a, double %b, double %c) {
-; RELAXED-LABEL: fmuladd_contract_f64:
-; RELAXED: .functype fmuladd_contract_f64 (f64, f64, f64) -> (f64)
-; RELAXED-NEXT: # %bb.0:
-; RELAXED-NEXT: f64.mul $push0=, $0, $1
-; RELAXED-NEXT: f64.add $push1=, $pop0, $2
-; RELAXED-NEXT: return $pop1
-;
-; STRICT-LABEL: fmuladd_contract_f64:
-; STRICT: .functype fmuladd_contract_f64 (f64, f64, f64) -> (f64)
-; STRICT-NEXT: # %bb.0:
-; STRICT-NEXT: f64.mul $push0=, $0, $1
-; STRICT-NEXT: f64.add $push1=, $pop0, $2
-; STRICT-NEXT: return $pop1
-;
-; NOFP16-LABEL: fmuladd_contract_f64:
-; NOFP16: .functype fmuladd_contract_f64 (f64, f64, f64) -> (f64)
-; NOFP16-NEXT: # %bb.0:
-; NOFP16-NEXT: f64.mul $push0=, $0, $1
-; NOFP16-NEXT: f64.add $push1=, $pop0, $2
-; NOFP16-NEXT: return $pop1
-;
-; NOSIMD-LABEL: fmuladd_contract_f64:
-; NOSIMD: .functype fmuladd_contract_f64 (f64, f64, f64) -> (f64)
-; NOSIMD-NEXT: # %bb.0:
-; NOSIMD-NEXT: f64.mul $push0=, $0, $1
-; NOSIMD-NEXT: f64.add $push1=, $pop0, $2
-; NOSIMD-NEXT: return $pop1
- %fma = call contract double @llvm.fmuladd(double %a, double %b, double %c)
- ret double %fma
-}
-
define <4 x float> @fadd_fmul_contract_4xf32(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
; RELAXED-LABEL: fadd_fmul_contract_4xf32:
; RELAXED: .functype fadd_fmul_contract_4xf32 (v128, v128, v128) -> (v128)
; RELAXED-NEXT: # %bb.0:
-; RELAXED-NEXT: f32x4.relaxed_madd $push0=, $1, $0, $2
+; RELAXED-NEXT: f32x4.relaxed_madd $push0=, $2, $1, $0
; RELAXED-NEXT: return $pop0
;
; STRICT-LABEL: fadd_fmul_contract_4xf32:
@@ -384,222 +37,31 @@ define <4 x float> @fadd_fmul_contract_4xf32(<4 x float> %a, <4 x float> %b, <4
; STRICT-NEXT: f32x4.mul $push0=, $1, $0
; STRICT-NEXT: f32x4.add $push1=, $pop0, $2
; STRICT-NEXT: return $pop1
-;
-; NOFP16-LABEL: fadd_fmul_contract_4xf32:
-; NOFP16: .functype fadd_fmul_contract_4xf32 (v128, v128, v128) -> (v128)
-; NOFP16-NEXT: # %bb.0:
-; NOFP16-NEXT: f32x4.mul $push0=, $1, $0
-; NOFP16-NEXT: f32x4.add $push1=, $pop0, $2
-; NOFP16-NEXT: return $pop1
-;
-; NOSIMD-LABEL: fadd_fmul_contract_4xf32:
-; NOSIMD: .functype fadd_fmul_contract_4xf32 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
-; NOSIMD-NEXT: # %bb.0:
-; NOSIMD-NEXT: f32.mul $push0=, $8, $4
-; NOSIMD-NEXT: f32.add $push1=, $pop0, $12
-; NOSIMD-NEXT: f32.store 12($0), $pop1
-; NOSIMD-NEXT: f32.mul $push2=, $7, $3
-; NOSIMD-NEXT: f32.add $push3=, $pop2, $11
-; NOSIMD-NEXT: f32.store 8($0), $pop3
-; NOSIMD-NEXT: f32.mul $push4=, $6, $2
-; NOSIMD-NEXT: f32.add $push5=, $pop4, $10
-; NOSIMD-NEXT: f32.store 4($0), $pop5
-; NOSIMD-NEXT: f32.mul $push6=, $5, $1
-; NOSIMD-NEXT: f32.add $push7=, $pop6, $9
-; NOSIMD-NEXT: f32.store 0($0), $pop7
-; NOSIMD-NEXT: return
%mul = fmul contract <4 x float> %b, %a
%add = fadd contract <4 x float> %mul, %c
ret <4 x float> %add
}
+
define <8 x half> @fadd_fmul_contract_8xf16(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
; RELAXED-LABEL: fadd_fmul_contract_8xf16:
; RELAXED: .functype fadd_fmul_contract_8xf16 (v128, v128, v128) -> (v128)
; RELAXED-NEXT: # %bb.0:
-; RELAXED-NEXT: f16x8.madd $push0=, $1, $0, $2
+; RELAXED-NEXT: f16x8.relaxed_madd $push0=, $2, $1, $0
; RELAXED-NEXT: return $pop0
;
; STRICT-LABEL: fadd_fmul_contract_8xf16:
; STRICT: .functype fadd_fmul_contract_8xf16 (v128, v128, v128) -> (v128)
; STRICT-NEXT: # %bb.0:
-; STRICT-NEXT: f16x8.madd $push0=, $1, $0, $2
-; STRICT-NEXT: return $pop0
-;
-; NOFP16-LABEL: fadd_fmul_contract_8xf16:
-; NOFP16: .functype fadd_fmul_contract_8xf16 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
-; NOFP16-NEXT: # %bb.0:
-; NOFP16-NEXT: call $push0=, __truncsfhf2, $8
-; NOFP16-NEXT: call $push1=, __extendhfsf2, $pop0
-; NOFP16-NEXT: call $push2=, __truncsfhf2, $16
-; NOFP16-NEXT: call $push3=, __extendhfsf2, $pop2
-; NOFP16-NEXT: f32.mul $push4=, $pop1, $pop3
-; NOFP16-NEXT: call $push5=, __truncsfhf2, $24
-; NOFP16-NEXT: call $push6=, __extendhfsf2, $pop5
-; NOFP16-NEXT: f32.add $push7=, $pop4, $pop6
-; NOFP16-NEXT: call $push8=, __truncsfhf2, $pop7
-; NOFP16-NEXT: i32.store16 14($0), $pop8
-; NOFP16-NEXT: call $push9=, __truncsfhf2, $7
-; NOFP16-NEXT: call $push10=, __extendhfsf2, $pop9
-; NOFP16-NEXT: call $push11=, __truncsfhf2, $15
-; NOFP16-NEXT: call $push12=, __extendhfsf2, $pop11
-; NOFP16-NEXT: f32.mul $push13=, $pop10, $pop12
-; NOFP16-NEXT: call $push14=, __truncsfhf2, $23
-; NOFP16-NEXT: call $push15=, __extendhfsf2, $pop14
-; NOFP16-NEXT: f32.add $push16=, $pop13, $pop15
-; NOFP16-NEXT: call $push17=, __truncsfhf2, $pop16
-; NOFP16-NEXT: i32.store16 12($0), $pop17
-; NOFP16-NEXT: call $push18=, __truncsfhf2, $6
-; NOFP16-NEXT: call $push19=, __extendhfsf2, $pop18
-; NOFP16-NEXT: call $push20=, __truncsfhf2, $14
-; NOFP16-NEXT: call $push21=, __extendhfsf2, $pop20
-; NOFP16-NEXT: f32.mul $push22=, $pop19, $pop21
-; NOFP16-NEXT: call $push23=, __truncsfhf2, $22
-; NOFP16-NEXT: call $push24=, __extendhfsf2, $pop23
-; NOFP16-NEXT: f32.add $push25=, $pop22, $pop24
-; NOFP16-NEXT: call $push26=, __truncsfhf2, $pop25
-; NOFP16-NEXT: i32.store16 10($0), $pop26
-; NOFP16-NEXT: call $push27=, __truncsfhf2, $5
-; NOFP16-NEXT: call $push28=, __extendhfsf2, $pop27
-; NOFP16-NEXT: call $push29=, __truncsfhf2, $13
-; NOFP16-NEXT: call $push30=, __extendhfsf2, $pop29
-; NOFP16-NEXT: f32.mul $push31=, $pop28, $pop30
-; NOFP16-NEXT: call $push32=, __truncsfhf2, $21
-; NOFP16-NEXT: call $push33=, __extendhfsf2, $pop32
-; NOFP16-NEXT: f32.add $push34=, $pop31, $pop33
-; NOFP16-NEXT: call $push35=, __truncsfhf2, $pop34
-; NOFP16-NEXT: i32.store16 8($0), $pop35
-; NOFP16-NEXT: call $push36=, __truncsfhf2, $4
-; NOFP16-NEXT: call $push37=, __extendhfsf2, $pop36
-; NOFP16-NEXT: call $push38=, __truncsfhf2, $12
-; NOFP16-NEXT: call $push39=, __extendhfsf2, $pop38
-; NOFP16-NEXT: f32.mul $push40=, $pop37, $pop39
-; NOFP16-NEXT: call $push41=, __truncsfhf2, $20
-; NOFP16-NEXT: call $push42=, __extendhfsf2, $pop41
-; NOFP16-NEXT: f32.add $push43=, $pop40, $pop42
-; NOFP16-NEXT: call $push44=, __truncsfhf2, $pop43
-; NOFP16-NEXT: i32.store16 6($0), $pop44
-; NOFP16-NEXT: call $push45=, __truncsfhf2, $3
-; NOFP16-NEXT: call $push46=, __extendhfsf2, $pop45
-; NOFP16-NEXT: call $push47=, __truncsfhf2, $11
-; NOFP16-NEXT: call $push48=, __extendhfsf2, $pop47
-; NOFP16-NEXT: f32.mul $push49=, $pop46, $pop48
-; NOFP16-NEXT: call $push50=, __truncsfhf2, $19
-; NOFP16-NEXT: call $push51=, __extendhfsf2, $pop50
-; NOFP16-NEXT: f32.add $push52=, $pop49, $pop51
-; NOFP16-NEXT: call $push53=, __truncsfhf2, $pop52
-; NOFP16-NEXT: i32.store16 4($0), $pop53
-; NOFP16-NEXT: call $push54=, __truncsfhf2, $2
-; NOFP16-NEXT: call $push55=, __extendhfsf2, $pop54
-; NOFP16-NEXT: call $push56=, __truncsfhf2, $10
-; NOFP16-NEXT: call $push57=, __extendhfsf2, $pop56
-; NOFP16-NEXT: f32.mul $push58=, $pop55, $pop57
-; NOFP16-NEXT: call $push59=, __truncsfhf2, $18
-; NOFP16-NEXT: call $push60=, __extendhfsf2, $pop59
-; NOFP16-NEXT: f32.add $push61=, $pop58, $pop60
-; NOFP16-NEXT: call $push62=, __truncsfhf2, $pop61
-; NOFP16-NEXT: i32.store16 2($0), $pop62
-; NOFP16-NEXT: call $push63=, __truncsfhf2, $1
-; NOFP16-NEXT: call $push64=, __extendhfsf2, $pop63
-; NOFP16-NEXT: call $push65=, __truncsfhf2, $9
-; NOFP16-NEXT: call $push66=, __extendhfsf2, $pop65
-; NOFP16-NEXT: f32.mul $push67=, $pop64, $pop66
-; NOFP16-NEXT: call $push68=, __truncsfhf2, $17
-; NOFP16-NEXT: call $push69=, __extendhfsf2, $pop68
-; NOFP16-NEXT: f32.add $push70=, $pop67, $pop69
-; NOFP16-NEXT: call $push71=, __truncsfhf2, $pop70
-; NOFP16-NEXT: i32.store16 0($0), $pop71
-; NOFP16-NEXT: return
-;
-; NOSIMD-LABEL: fadd_fmul_contract_8xf16:
-; NOSIMD: .functype fadd_fmul_contract_8xf16 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
-; NOSIMD-NEXT: # %bb.0:
-; NOSIMD-NEXT: call $push0=, __truncsfhf2, $8
-; NOSIMD-NEXT: call $push1=, __extendhfsf2, $pop0
-; NOSIMD-NEXT: call $push2=, __truncsfhf2, $16
-; NOSIMD-NEXT: call $push3=, __extendhfsf2, $pop2
-; NOSIMD-NEXT: f32.mul $push4=, $pop1, $pop3
-; NOSIMD-NEXT: call $push5=, __truncsfhf2, $24
-; NOSIMD-NEXT: call $push6=, __extendhfsf2, $pop5
-; NOSIMD-NEXT: f32.add $push7=, $pop4, $pop6
-; NOSIMD-NEXT: call $push8=, __truncsfhf2, $pop7
-; NOSIMD-NEXT: i32.store16 14($0), $pop8
-; NOSIMD-NEXT: call $push9=, __truncsfhf2, $7
-; NOSIMD-NEXT: call $push10=, __extendhfsf2, $pop9
-; NOSIMD-NEXT: call $push11=, __truncsfhf2, $15
-; NOSIMD-NEXT: call $push12=, __extendhfsf2, $pop11
-; NOSIMD-NEXT: f32.mul $push13=, $pop10, $pop12
-; NOSIMD-NEXT: call $push14=, __truncsfhf2, $23
-; NOSIMD-NEXT: call $push15=, __extendhfsf2, $pop14
-; NOSIMD-NEXT: f32.add $push16=, $pop13, $pop15
-; NOSIMD-NEXT: call $push17=, __truncsfhf2, $pop16
-; NOSIMD-NEXT: i32.store16 12($0), $pop17
-; NOSIMD-NEXT: call $push18=, __truncsfhf2, $6
-; NOSIMD-NEXT: call $push19=, __extendhfsf2, $pop18
-; NOSIMD-NEXT: call $push20=, __truncsfhf2, $14
-; NOSIMD-NEXT: call $push21=, __extendhfsf2, $pop20
-; NOSIMD-NEXT: f32.mul $push22=, $pop19, $pop21
-; NOSIMD-NEXT: call $push23=, __truncsfhf2, $22
-; NOSIMD-NEXT: call $push24=, __extendhfsf2, $pop23
-; NOSIMD-NEXT: f32.add $push25=, $pop22, $pop24
-; NOSIMD-NEXT: call $push26=, __truncsfhf2, $pop25
-; NOSIMD-NEXT: i32.store16 10($0), $pop26
-; NOSIMD-NEXT: call $push27=, __truncsfhf2, $5
-; NOSIMD-NEXT: call $push28=, __extendhfsf2, $pop27
-; NOSIMD-NEXT: call $push29=, __truncsfhf2, $13
-; NOSIMD-NEXT: call $push30=, __extendhfsf2, $pop29
-; NOSIMD-NEXT: f32.mul $push31=, $pop28, $pop30
-; NOSIMD-NEXT: call $push32=, __truncsfhf2, $21
-; NOSIMD-NEXT: call $push33=, __extendhfsf2, $pop32
-; NOSIMD-NEXT: f32.add $push34=, $pop31, $pop33
-; NOSIMD-NEXT: call $push35=, __truncsfhf2, $pop34
-; NOSIMD-NEXT: i32.store16 8($0), $pop35
-; NOSIMD-NEXT: call $push36=, __truncsfhf2, $4
-; NOSIMD-NEXT: call $push37=, __extendhfsf2, $pop36
-; NOSIMD-NEXT: call $push38=, __truncsfhf2, $12
-; NOSIMD-NEXT: call $push39=, __extendhfsf2, $pop38
-; NOSIMD-NEXT: f32.mul $push40=, $pop37, $pop39
-; NOSIMD-NEXT: call $push41=, __truncsfhf2, $20
-; NOSIMD-NEXT: call $push42=, __extendhfsf2, $pop41
-; NOSIMD-NEXT: f32.add $push43=, $pop40, $pop42
-; NOSIMD-NEXT: call $push44=, __truncsfhf2, $pop43
-; NOSIMD-NEXT: i32.store16 6($0), $pop44
-; NOSIMD-NEXT: call $push45=, __truncsfhf2, $3
-; NOSIMD-NEXT: call $push46=, __extendhfsf2, $pop45
-; NOSIMD-NEXT: call $push47=, __truncsfhf2, $11
-; NOSIMD-NEXT: call $push48=, __extendhfsf2, $pop47
-; NOSIMD-NEXT: f32.mul $push49=, $pop46, $pop48
-; NOSIMD-NEXT: call $push50=, __truncsfhf2, $19
-; NOSIMD-NEXT: call $push51=, __extendhfsf2, $pop50
-; NOSIMD-NEXT: f32.add $push52=, $pop49, $pop51
-; NOSIMD-NEXT: call $push53=, __truncsfhf2, $pop52
-; NOSIMD-NEXT: i32.store16 4($0), $pop53
-; NOSIMD-NEXT: call $push54=, __truncsfhf2, $2
-; NOSIMD-NEXT: call $push55=, __extendhfsf2, $pop54
-; NOSIMD-NEXT: call $push56=, __truncsfhf2, $10
-; NOSIMD-NEXT: call $push57=, __extendhfsf2, $pop56
-; NOSIMD-NEXT: f32.mul $push58=, $pop55, $pop57
-; NOSIMD-NEXT: call $push59=, __truncsfhf2, $18
-; NOSIMD-NEXT: call $push60=, __extendhfsf2, $pop59
-; NOSIMD-NEXT: f32.add $push61=, $pop58, $pop60
-; NOSIMD-NEXT: call $push62=, __truncsfhf2, $pop61
-; NOSIMD-NEXT: i32.store16 2($0), $pop62
-; NOSIMD-NEXT: call $push63=, __truncsfhf2, $1
-; NOSIMD-NEXT: call $push64=, __extendhfsf2, $pop63
-; NOSIMD-NEXT: call $push65=, __truncsfhf2, $9
-; NOSIMD-NEXT: call $push66=, __extendhfsf2, $pop65
-; NOSIMD-NEXT: f32.mul $push67=, $pop64, $pop66
-; NOSIMD-NEXT: call $push68=, __truncsfhf2, $17
-; NOSIMD-NEXT: call $push69=, __extendhfsf2, $pop68
-; NOSIMD-NEXT: f32.add $push70=, $pop67, $pop69
-; NOSIMD-NEXT: call $push71=, __truncsfhf2, $pop70
-; NOSIMD-NEXT: i32.store16 0($0), $pop71
-; NOSIMD-NEXT: return
+; STRICT-NEXT: f16x8.mul $push0=, $1, $0
+; STRICT-NEXT: f16x8.add $push1=, $pop0, $2
+; STRICT-NEXT: return $pop1
%mul = fmul contract <8 x half> %b, %a
%add = fadd contract <8 x half> %mul, %c
ret <8 x half> %add
}
+
define <4 x float> @fadd_fmul_4xf32(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
; RELAXED-LABEL: fadd_fmul_4xf32:
; RELAXED: .functype fadd_fmul_4xf32 (v128, v128, v128) -> (v128)
@@ -614,412 +76,16 @@ define <4 x float> @fadd_fmul_4xf32(<4 x float> %a, <4 x float> %b, <4 x float>
; STRICT-NEXT: f32x4.mul $push0=, $1, $0
; STRICT-NEXT: f32x4.add $push1=, $pop0, $2
; STRICT-NEXT: return $pop1
-;
-; NOFP16-LABEL: fadd_fmul_4xf32:
-; NOFP16: .functype fadd_fmul_4xf32 (v128, v128, v128) -> (v128)
-; NOFP16-NEXT: # %bb.0:
-; NOFP16-NEXT: f32x4.mul $push0=, $1, $0
-; NOFP16-NEXT: f32x4.add $push1=, $pop0, $2
-; NOFP16-NEXT: return $pop1
-;
-; NOSIMD-LABEL: fadd_fmul_4xf32:
-; NOSIMD: .functype fadd_fmul_4xf32 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
-; NOSIMD-NEXT: # %bb.0:
-; NOSIMD-NEXT: f32.mul $push0=, $8, $4
-; NOSIMD-NEXT: f32.add $push1=, $pop0, $12
-; NOSIMD-NEXT: f32.store 12($0), $pop1
-; NOSIMD-NEXT: f32.mul $push2=, $7, $3
-; NOSIMD-NEXT: f32.add $push3=, $pop2, $11
-; NOSIMD-NEXT: f32.store 8($0), $pop3
-; NOSIMD-NEXT: f32.mul $push4=, $6, $2
-; NOSIMD-NEXT: f32.add $push5=, $pop4, $10
-; NOSIMD-NEXT: f32.store 4($0), $pop5
-; NOSIMD-NEXT: f32.mul $push6=, $5, $1
-; NOSIMD-NEXT: f32.add $push7=, $pop6, $9
-; NOSIMD-NEXT: f32.store 0($0), $pop7
-; NOSIMD-NEXT: return
%mul = fmul <4 x float> %b, %a
%add = fadd contract <4 x float> %mul, %c
ret <4 x float> %add
}
-define <8 x half> @fmuladd_contract_8xf16(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
-; RELAXED-LABEL: fmuladd_contract_8xf16:
-; RELAXED: .functype fmuladd_contract_8xf16 (v128, v128, v128) -> (v128)
-; RELAXED-NEXT: # %bb.0:
-; RELAXED-NEXT: f16x8.madd $push0=, $0, $1, $2
-; RELAXED-NEXT: return $pop0
-;
-; STRICT-LABEL: fmuladd_contract_8xf16:
-; STRICT: .functype fmuladd_contract_8xf16 (v128, v128, v128) -> (v128)
-; STRICT-NEXT: # %bb.0:
-; STRICT-NEXT: f16x8.madd $push0=, $0, $1, $2
-; STRICT-NEXT: return $pop0
-;
-; NOFP16-LABEL: fmuladd_contract_8xf16:
-; NOFP16: .functype fmuladd_contract_8xf16 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
-; NOFP16-NEXT: # %bb.0:
-; NOFP16-NEXT: call $push0=, __truncsfhf2, $16
-; NOFP16-NEXT: call $push1=, __extendhfsf2, $pop0
-; NOFP16-NEXT: call $push2=, __truncsfhf2, $8
-; NOFP16-NEXT: call $push3=, __extendhfsf2, $pop2
-; NOFP16-NEXT: f32.mul $push4=, $pop1, $pop3
-; NOFP16-NEXT: call $push5=, __truncsfhf2, $24
-; NOFP16-NEXT: call $push6=, __extendhfsf2, $pop5
-; NOFP16-NEXT: f32.add $push7=, $pop4, $pop6
-; NOFP16-NEXT: call $push8=, __truncsfhf2, $pop7
-; NOFP16-NEXT: i32.store16 14($0), $pop8
-; NOFP16-NEXT: call $push9=, __truncsfhf2, $15
-; NOFP16-NEXT: call $push10=, __extendhfsf2, $pop9
-; NOFP16-NEXT: call $push11=, __truncsfhf2, $7
-; NOFP16-NEXT: call $push12=, __extendhfsf2, $pop11
-; NOFP16-NEXT: f32.mul $push13=, $pop10, $pop12
-; NOFP16-NEXT: call $push14=, __truncsfhf2, $23
-; NOFP16-NEXT: call $push15=, __extendhfsf2, $pop14
-; NOFP16-NEXT: f32.add $push16=, $pop13, $pop15
-; NOFP16-NEXT: call $push17=, __truncsfhf2, $pop16
-; NOFP16-NEXT: i32.store16 12($0), $pop17
-; NOFP16-NEXT: call $push18=, __truncsfhf2, $14
-; NOFP16-NEXT: call $push19=, __extendhfsf2, $pop18
-; NOFP16-NEXT: call $push20=, __truncsfhf2, $6
-; NOFP16-NEXT: call $push21=, __extendhfsf2, $pop20
-; NOFP16-NEXT: f32.mul $push22=, $pop19, $pop21
-; NOFP16-NEXT: call $push23=, __truncsfhf2, $22
-; NOFP16-NEXT: call $push24=, __extendhfsf2, $pop23
-; NOFP16-NEXT: f32.add $push25=, $pop22, $pop24
-; NOFP16-NEXT: call $push26=, __truncsfhf2, $pop25
-; NOFP16-NEXT: i32.store16 10($0), $pop26
-; NOFP16-NEXT: call $push27=, __truncsfhf2, $13
-; NOFP16-NEXT: call $push28=, __extendhfsf2, $pop27
-; NOFP16-NEXT: call $push29=, __truncsfhf2, $5
-; NOFP16-NEXT: call $push30=, __extendhfsf2, $pop29
-; NOFP16-NEXT: f32.mul $push31=, $pop28, $pop30
-; NOFP16-NEXT: call $push32=, __truncsfhf2, $21
-; NOFP16-NEXT: call $push33=, __extendhfsf2, $pop32
-; NOFP16-NEXT: f32.add $push34=, $pop31, $pop33
-; NOFP16-NEXT: call $push35=, __truncsfhf2, $pop34
-; NOFP16-NEXT: i32.store16 8($0), $pop35
-; NOFP16-NEXT: call $push36=, __truncsfhf2, $12
-; NOFP16-NEXT: call $push37=, __extendhfsf2, $pop36
-; NOFP16-NEXT: call $push38=, __truncsfhf2, $4
-; NOFP16-NEXT: call $push39=, __extendhfsf2, $pop38
-; NOFP16-NEXT: f32.mul $push40=, $pop37, $pop39
-; NOFP16-NEXT: call $push41=, __truncsfhf2, $20
-; NOFP16-NEXT: call $push42=, __extendhfsf2, $pop41
-; NOFP16-NEXT: f32.add $push43=, $pop40, $pop42
-; NOFP16-NEXT: call $push44=, __truncsfhf2, $pop43
-; NOFP16-NEXT: i32.store16 6($0), $pop44
-; NOFP16-NEXT: call $push45=, __truncsfhf2, $11
-; NOFP16-NEXT: call $push46=, __extendhfsf2, $pop45
-; NOFP16-NEXT: call $push47=, __truncsfhf2, $3
-; NOFP16-NEXT: call $push48=, __extendhfsf2, $pop47
-; NOFP16-NEXT: f32.mul $push49=, $pop46, $pop48
-; NOFP16-NEXT: call $push50=, __truncsfhf2, $19
-; NOFP16-NEXT: call $push51=, __extendhfsf2, $pop50
-; NOFP16-NEXT: f32.add $push52=, $pop49, $pop51
-; NOFP16-NEXT: call $push53=, __truncsfhf2, $pop52
-; NOFP16-NEXT: i32.store16 4($0), $pop53
-; NOFP16-NEXT: call $push54=, __truncsfhf2, $10
-; NOFP16-NEXT: call $push55=, __extendhfsf2, $pop54
-; NOFP16-NEXT: call $push56=, __truncsfhf2, $2
-; NOFP16-NEXT: call $push57=, __extendhfsf2, $pop56
-; NOFP16-NEXT: f32.mul $push58=, $pop55, $pop57
-; NOFP16-NEXT: call $push59=, __truncsfhf2, $18
-; NOFP16-NEXT: call $push60=, __extendhfsf2, $pop59
-; NOFP16-NEXT: f32.add $push61=, $pop58, $pop60
-; NOFP16-NEXT: call $push62=, __truncsfhf2, $pop61
-; NOFP16-NEXT: i32.store16 2($0), $pop62
-; NOFP16-NEXT: call $push63=, __truncsfhf2, $9
-; NOFP16-NEXT: call $push64=, __extendhfsf2, $pop63
-; NOFP16-NEXT: call $push65=, __truncsfhf2, $1
-; NOFP16-NEXT: call $push66=, __extendhfsf2, $pop65
-; NOFP16-NEXT: f32.mul $push67=, $pop64, $pop66
-; NOFP16-NEXT: call $push68=, __truncsfhf2, $17
-; NOFP16-NEXT: call $push69=, __extendhfsf2, $pop68
-; NOFP16-NEXT: f32.add $push70=, $pop67, $pop69
-; NOFP16-NEXT: call $push71=, __truncsfhf2, $pop70
-; NOFP16-NEXT: i32.store16 0($0), $pop71
-; NOFP16-NEXT: return
-;
-; NOSIMD-LABEL: fmuladd_contract_8xf16:
-; NOSIMD: .functype fmuladd_contract_8xf16 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
-; NOSIMD-NEXT: # %bb.0:
-; NOSIMD-NEXT: call $push0=, __truncsfhf2, $16
-; NOSIMD-NEXT: call $push1=, __extendhfsf2, $pop0
-; NOSIMD-NEXT: call $push2=, __truncsfhf2, $8
-; NOSIMD-NEXT: call $push3=, __extendhfsf2, $pop2
-; NOSIMD-NEXT: f32.mul $push4=, $pop1, $pop3
-; NOSIMD-NEXT: call $push5=, __truncsfhf2, $24
-; NOSIMD-NEXT: call $push6=, __extendhfsf2, $pop5
-; NOSIMD-NEXT: f32.add $push7=, $pop4, $pop6
-; NOSIMD-NEXT: call $push8=, __truncsfhf2, $pop7
-; NOSIMD-NEXT: i32.store16 14($0), $pop8
-; NOSIMD-NEXT: call $push9=, __truncsfhf2, $15
-; NOSIMD-NEXT: call $push10=, __extendhfsf2, $pop9
-; NOSIMD-NEXT: call $push11=, __truncsfhf2, $7
-; NOSIMD-NEXT: call $push12=, __extendhfsf2, $pop11
-; NOSIMD-NEXT: f32.mul $push13=, $pop10, $pop12
-; NOSIMD-NEXT: call $push14=, __truncsfhf2, $23
-; NOSIMD-NEXT: call $push15=, __extendhfsf2, $pop14
-; NOSIMD-NEXT: f32.add $push16=, $pop13, $pop15
-; NOSIMD-NEXT: call $push17=, __truncsfhf2, $pop16
-; NOSIMD-NEXT: i32.store16 12($0), $pop17
-; NOSIMD-NEXT: call $push18=, __truncsfhf2, $14
-; NOSIMD-NEXT: call $push19=, __extendhfsf2, $pop18
-; NOSIMD-NEXT: call $push20=, __truncsfhf2, $6
-; NOSIMD-NEXT: call $push21=, __extendhfsf2, $pop20
-; NOSIMD-NEXT: f32.mul $push22=, $pop19, $pop21
-; NOSIMD-NEXT: call $push23=, __truncsfhf2, $22
-; NOSIMD-NEXT: call $push24=, __extendhfsf2, $pop23
-; NOSIMD-NEXT: f32.add $push25=, $pop22, $pop24
-; NOSIMD-NEXT: call $push26=, __truncsfhf2, $pop25
-; NOSIMD-NEXT: i32.store16 10($0), $pop26
-; NOSIMD-NEXT: call $push27=, __truncsfhf2, $13
-; NOSIMD-NEXT: call $push28=, __extendhfsf2, $pop27
-; NOSIMD-NEXT: call $push29=, __truncsfhf2, $5
-; NOSIMD-NEXT: call $push30=, __extendhfsf2, $pop29
-; NOSIMD-NEXT: f32.mul $push31=, $pop28, $pop30
-; NOSIMD-NEXT: call $push32=, __truncsfhf2, $21
-; NOSIMD-NEXT: call $push33=, __extendhfsf2, $pop32
-; NOSIMD-NEXT: f32.add $push34=, $pop31, $pop33
-; NOSIMD-NEXT: call $push35=, __truncsfhf2, $pop34
-; NOSIMD-NEXT: i32.store16 8($0), $pop35
-; NOSIMD-NEXT: call $push36=, __truncsfhf2, $12
-; NOSIMD-NEXT: call $push37=, __extendhfsf2, $pop36
-; NOSIMD-NEXT: call $push38=, __truncsfhf2, $4
-; NOSIMD-NEXT: call $push39=, __extendhfsf2, $pop38
-; NOSIMD-NEXT: f32.mul $push40=, $pop37, $pop39
-; NOSIMD-NEXT: call $push41=, __truncsfhf2, $20
-; NOSIMD-NEXT: call $push42=, __extendhfsf2, $pop41
-; NOSIMD-NEXT: f32.add $push43=, $pop40, $pop42
-; NOSIMD-NEXT: call $push44=, __truncsfhf2, $pop43
-; NOSIMD-NEXT: i32.store16 6($0), $pop44
-; NOSIMD-NEXT: call $push45=, __truncsfhf2, $11
-; NOSIMD-NEXT: call $push46=, __extendhfsf2, $pop45
-; NOSIMD-NEXT: call $push47=, __truncsfhf2, $3
-; NOSIMD-NEXT: call $push48=, __extendhfsf2, $pop47
-; NOSIMD-NEXT: f32.mul $push49=, $pop46, $pop48
-; NOSIMD-NEXT: call $push50=, __truncsfhf2, $19
-; NOSIMD-NEXT: call $push51=, __extendhfsf2, $pop50
-; NOSIMD-NEXT: f32.add $push52=, $pop49, $pop51
-; NOSIMD-NEXT: call $push53=, __truncsfhf2, $pop52
-; NOSIMD-NEXT: i32.store16 4($0), $pop53
-; NOSIMD-NEXT: call $push54=, __truncsfhf2, $10
-; NOSIMD-NEXT: call $push55=, __extendhfsf2, $pop54
-; NOSIMD-NEXT: call $push56=, __truncsfhf2, $2
-; NOSIMD-NEXT: call $push57=, __extendhfsf2, $pop56
-; NOSIMD-NEXT: f32.mul $push58=, $pop55, $pop57
-; NOSIMD-NEXT: call $push59=, __truncsfhf2, $18
-; NOSIMD-NEXT: call $push60=, __extendhfsf2, $pop59
-; NOSIMD-NEXT: f32.add $push61=, $pop58, $pop60
-; NOSIMD-NEXT: call $push62=, __truncsfhf2, $pop61
-; NOSIMD-NEXT: i32.store16 2($0), $pop62
-; NOSIMD-NEXT: call $push63=, __truncsfhf2, $9
-; NOSIMD-NEXT: call $push64=, __extendhfsf2, $pop63
-; NOSIMD-NEXT: call $push65=, __truncsfhf2, $1
-; NOSIMD-NEXT: call $push66=, __extendhfsf2, $pop65
-; NOSIMD-NEXT: f32.mul $push67=, $pop64, $pop66
-; NOSIMD-NEXT: call $push68=, __truncsfhf2, $17
-; NOSIMD-NEXT: call $push69=, __extendhfsf2, $pop68
-; NOSIMD-NEXT: f32.add $push70=, $pop67, $pop69
-; NOSIMD-NEXT: call $push71=, __truncsfhf2, $pop70
-; NOSIMD-NEXT: i32.store16 0($0), $pop71
-; NOSIMD-NEXT: return
- %fma = call contract <8 x half> @llvm.fmuladd(<8 x half> %a, <8 x half> %b, <8 x half> %c)
- ret <8 x half> %fma
-}
-
-define <8 x half> @fmuladd_8xf16(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
-; RELAXED-LABEL: fmuladd_8xf16:
-; RELAXED: .functype fmuladd_8xf16 (v128, v128, v128) -> (v128)
-; RELAXED-NEXT: # %bb.0:
-; RELAXED-NEXT: f16x8.madd $push0=, $0, $1, $2
-; RELAXED-NEXT: return $pop0
-;
-; STRICT-LABEL: fmuladd_8xf16:
-; STRICT: .functype fmuladd_8xf16 (v128, v128, v128) -> (v128)
-; STRICT-NEXT: # %bb.0:
-; STRICT-NEXT: f16x8.madd $push0=, $0, $1, $2
-; STRICT-NEXT: return $pop0
-;
-; NOFP16-LABEL: fmuladd_8xf16:
-; NOFP16: .functype fmuladd_8xf16 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
-; NOFP16-NEXT: # %bb.0:
-; NOFP16-NEXT: call $push0=, __truncsfhf2, $16
-; NOFP16-NEXT: call $push1=, __extendhfsf2, $pop0
-; NOFP16-NEXT: call $push2=, __truncsfhf2, $8
-; NOFP16-NEXT: call $push3=, __extendhfsf2, $pop2
-; NOFP16-NEXT: f32.mul $push4=, $pop1, $pop3
-; NOFP16-NEXT: call $push5=, __truncsfhf2, $24
-; NOFP16-NEXT: call $push6=, __extendhfsf2, $pop5
-; NOFP16-NEXT: f32.add $push7=, $pop4, $pop6
-; NOFP16-NEXT: call $push8=, __truncsfhf2, $pop7
-; NOFP16-NEXT: i32.store16 14($0), $pop8
-; NOFP16-NEXT: call $push9=, __truncsfhf2, $15
-; NOFP16-NEXT: call $push10=, __extendhfsf2, $pop9
-; NOFP16-NEXT: call $push11=, __truncsfhf2, $7
-; NOFP16-NEXT: call $push12=, __extendhfsf2, $pop11
-; NOFP16-NEXT: f32.mul $push13=, $pop10, $pop12
-; NOFP16-NEXT: call $push14=, __truncsfhf2, $23
-; NOFP16-NEXT: call $push15=, __extendhfsf2, $pop14
-; NOFP16-NEXT: f32.add $push16=, $pop13, $pop15
-; NOFP16-NEXT: call $push17=, __truncsfhf2, $pop16
-; NOFP16-NEXT: i32.store16 12($0), $pop17
-; NOFP16-NEXT: call $push18=, __truncsfhf2, $14
-; NOFP16-NEXT: call $push19=, __extendhfsf2, $pop18
-; NOFP16-NEXT: call $push20=, __truncsfhf2, $6
-; NOFP16-NEXT: call $push21=, __extendhfsf2, $pop20
-; NOFP16-NEXT: f32.mul $push22=, $pop19, $pop21
-; NOFP16-NEXT: call $push23=, __truncsfhf2, $22
-; NOFP16-NEXT: call $push24=, __extendhfsf2, $pop23
-; NOFP16-NEXT: f32.add $push25=, $pop22, $pop24
-; NOFP16-NEXT: call $push26=, __truncsfhf2, $pop25
-; NOFP16-NEXT: i32.store16 10($0), $pop26
-; NOFP16-NEXT: call $push27=, __truncsfhf2, $13
-; NOFP16-NEXT: call $push28=, __extendhfsf2, $pop27
-; NOFP16-NEXT: call $push29=, __truncsfhf2, $5
-; NOFP16-NEXT: call $push30=, __extendhfsf2, $pop29
-; NOFP16-NEXT: f32.mul $push31=, $pop28, $pop30
-; NOFP16-NEXT: call $push32=, __truncsfhf2, $21
-; NOFP16-NEXT: call $push33=, __extendhfsf2, $pop32
-; NOFP16-NEXT: f32.add $push34=, $pop31, $pop33
-; NOFP16-NEXT: call $push35=, __truncsfhf2, $pop34
-; NOFP16-NEXT: i32.store16 8($0), $pop35
-; NOFP16-NEXT: call $push36=, __truncsfhf2, $12
-; NOFP16-NEXT: call $push37=, __extendhfsf2, $pop36
-; NOFP16-NEXT: call $push38=, __truncsfhf2, $4
-; NOFP16-NEXT: call $push39=, __extendhfsf2, $pop38
-; NOFP16-NEXT: f32.mul $push40=, $pop37, $pop39
-; NOFP16-NEXT: call $push41=, __truncsfhf2, $20
-; NOFP16-NEXT: call $push42=, __extendhfsf2, $pop41
-; NOFP16-NEXT: f32.add $push43=, $pop40, $pop42
-; NOFP16-NEXT: call $push44=, __truncsfhf2, $pop43
-; NOFP16-NEXT: i32.store16 6($0), $pop44
-; NOFP16-NEXT: call $push45=, __truncsfhf2, $11
-; NOFP16-NEXT: call $push46=, __extendhfsf2, $pop45
-; NOFP16-NEXT: call $push47=, __truncsfhf2, $3
-; NOFP16-NEXT: call $push48=, __extendhfsf2, $pop47
-; NOFP16-NEXT: f32.mul $push49=, $pop46, $pop48
-; NOFP16-NEXT: call $push50=, __truncsfhf2, $19
-; NOFP16-NEXT: call $push51=, __extendhfsf2, $pop50
-; NOFP16-NEXT: f32.add $push52=, $pop49, $pop51
-; NOFP16-NEXT: call $push53=, __truncsfhf2, $pop52
-; NOFP16-NEXT: i32.store16 4($0), $pop53
-; NOFP16-NEXT: call $push54=, __truncsfhf2, $10
-; NOFP16-NEXT: call $push55=, __extendhfsf2, $pop54
-; NOFP16-NEXT: call $push56=, __truncsfhf2, $2
-; NOFP16-NEXT: call $push57=, __extendhfsf2, $pop56
-; NOFP16-NEXT: f32.mul $push58=, $pop55, $pop57
-; NOFP16-NEXT: call $push59=, __truncsfhf2, $18
-; NOFP16-NEXT: call $push60=, __extendhfsf2, $pop59
-; NOFP16-NEXT: f32.add $push61=, $pop58, $pop60
-; NOFP16-NEXT: call $push62=, __truncsfhf2, $pop61
-; NOFP16-NEXT: i32.store16 2($0), $pop62
-; NOFP16-NEXT: call $push63=, __truncsfhf2, $9
-; NOFP16-NEXT: call $push64=, __extendhfsf2, $pop63
-; NOFP16-NEXT: call $push65=, __truncsfhf2, $1
-; NOFP16-NEXT: call $push66=, __extendhfsf2, $pop65
-; NOFP16-NEXT: f32.mul $push67=, $pop64, $pop66
-; NOFP16-NEXT: call $push68=, __truncsfhf2, $17
-; NOFP16-NEXT: call $push69=, __extendhfsf2, $pop68
-; NOFP16-NEXT: f32.add $push70=, $pop67, $pop69
-; NOFP16-NEXT: call $push71=, __truncsfhf2, $pop70
-; NOFP16-NEXT: i32.store16 0($0), $pop71
-; NOFP16-NEXT: return
-;
-; NOSIMD-LABEL: fmuladd_8xf16:
-; NOSIMD: .functype fmuladd_8xf16 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
-; NOSIMD-NEXT: # %bb.0:
-; NOSIMD-NEXT: call $push0=, __truncsfhf2, $16
-; NOSIMD-NEXT: call $push1=, __extendhfsf2, $pop0
-; NOSIMD-NEXT: call $push2=, __truncsfhf2, $8
-; NOSIMD-NEXT: call $push3=, __extendhfsf2, $pop2
-; NOSIMD-NEXT: f32.mul $push4=, $pop1, $pop3
-; NOSIMD-NEXT: call $push5=, __truncsfhf2, $24
-; NOSIMD-NEXT: call $push6=, __extendhfsf2, $pop5
-; NOSIMD-NEXT: f32.add $push7=, $pop4, $pop6
-; NOSIMD-NEXT: call $push8=, __truncsfhf2, $pop7
-; NOSIMD-NEXT: i32.store16 14($0), $pop8
-; NOSIMD-NEXT: call $push9=, __truncsfhf2, $15
-; NOSIMD-NEXT: call $push10=, __extendhfsf2, $pop9
-; NOSIMD-NEXT: call $push11=, __truncsfhf2, $7
-; NOSIMD-NEXT: call $push12=, __extendhfsf2, $pop11
-; NOSIMD-NEXT: f32.mul $push13=, $pop10, $pop12
-; NOSIMD-NEXT: call $push14=, __truncsfhf2, $23
-; NOSIMD-NEXT: call $push15=, __extendhfsf2, $pop14
-; NOSIMD-NEXT: f32.add $push16=, $pop13, $pop15
-; NOSIMD-NEXT: call $push17=, __truncsfhf2, $pop16
-; NOSIMD-NEXT: i32.store16 12($0), $pop17
-; NOSIMD-NEXT: call $push18=, __truncsfhf2, $14
-; NOSIMD-NEXT: call $push19=, __extendhfsf2, $pop18
-; NOSIMD-NEXT: call $push20=, __truncsfhf2, $6
-; NOSIMD-NEXT: call $push21=, __extendhfsf2, $pop20
-; NOSIMD-NEXT: f32.mul $push22=, $pop19, $pop21
-; NOSIMD-NEXT: call $push23=, __truncsfhf2, $22
-; NOSIMD-NEXT: call $push24=, __extendhfsf2, $pop23
-; NOSIMD-NEXT: f32.add $push25=, $pop22, $pop24
-; NOSIMD-NEXT: call $push26=, __truncsfhf2, $pop25
-; NOSIMD-NEXT: i32.store16 10($0), $pop26
-; NOSIMD-NEXT: call $push27=, __truncsfhf2, $13
-; NOSIMD-NEXT: call $push28=, __extendhfsf2, $pop27
-; NOSIMD-NEXT: call $push29=, __truncsfhf2, $5
-; NOSIMD-NEXT: call $push30=, __extendhfsf2, $pop29
-; NOSIMD-NEXT: f32.mul $push31=, $pop28, $pop30
-; NOSIMD-NEXT: call $push32=, __truncsfhf2, $21
-; NOSIMD-NEXT: call $push33=, __extendhfsf2, $pop32
-; NOSIMD-NEXT: f32.add $push34=, $pop31, $pop33
-; NOSIMD-NEXT: call $push35=, __truncsfhf2, $pop34
-; NOSIMD-NEXT: i32.store16 8($0), $pop35
-; NOSIMD-NEXT: call $push36=, __truncsfhf2, $12
-; NOSIMD-NEXT: call $push37=, __extendhfsf2, $pop36
-; NOSIMD-NEXT: call $push38=, __truncsfhf2, $4
-; NOSIMD-NEXT: call $push39=, __extendhfsf2, $pop38
-; NOSIMD-NEXT: f32.mul $push40=, $pop37, $pop39
-; NOSIMD-NEXT: call $push41=, __truncsfhf2, $20
-; NOSIMD-NEXT: call $push42=, __extendhfsf2, $pop41
-; NOSIMD-NEXT: f32.add $push43=, $pop40, $pop42
-; NOSIMD-NEXT: call $push44=, __truncsfhf2, $pop43
-; NOSIMD-NEXT: i32.store16 6($0), $pop44
-; NOSIMD-NEXT: call $push45=, __truncsfhf2, $11
-; NOSIMD-NEXT: call $push46=, __extendhfsf2, $pop45
-; NOSIMD-NEXT: call $push47=, __truncsfhf2, $3
-; NOSIMD-NEXT: call $push48=, __extendhfsf2, $pop47
-; NOSIMD-NEXT: f32.mul $push49=, $pop46, $pop48
-; NOSIMD-NEXT: call $push50=, __truncsfhf2, $19
-; NOSIMD-NEXT: call $push51=, __extendhfsf2, $pop50
-; NOSIMD-NEXT: f32.add $push52=, $pop49, $pop51
-; NOSIMD-NEXT: call $push53=, __truncsfhf2, $pop52
-; NOSIMD-NEXT: i32.store16 4($0), $pop53
-; NOSIMD-NEXT: call $push54=, __truncsfhf2, $10
-; NOSIMD-NEXT: call $push55=, __extendhfsf2, $pop54
-; NOSIMD-NEXT: call $push56=, __truncsfhf2, $2
-; NOSIMD-NEXT: call $push57=, __extendhfsf2, $pop56
-; NOSIMD-NEXT: f32.mul $push58=, $pop55, $pop57
-; NOSIMD-NEXT: call $push59=, __truncsfhf2, $18
-; NOSIMD-NEXT: call $push60=, __extendhfsf2, $pop59
-; NOSIMD-NEXT: f32.add $push61=, $pop58, $pop60
-; NOSIMD-NEXT: call $push62=, __truncsfhf2, $pop61
-; NOSIMD-NEXT: i32.store16 2($0), $pop62
-; NOSIMD-NEXT: call $push63=, __truncsfhf2, $9
-; NOSIMD-NEXT: call $push64=, __extendhfsf2, $pop63
-; NOSIMD-NEXT: call $push65=, __truncsfhf2, $1
-; NOSIMD-NEXT: call $push66=, __extendhfsf2, $pop65
-; NOSIMD-NEXT: f32.mul $push67=, $pop64, $pop66
-; NOSIMD-NEXT: call $push68=, __truncsfhf2, $17
-; NOSIMD-NEXT: call $push69=, __extendhfsf2, $pop68
-; NOSIMD-NEXT: f32.add $push70=, $pop67, $pop69
-; NOSIMD-NEXT: call $push71=, __truncsfhf2, $pop70
-; NOSIMD-NEXT: i32.store16 0($0), $pop71
-; NOSIMD-NEXT: return
- %fma = call <8 x half> @llvm.fmuladd(<8 x half> %a, <8 x half> %b, <8 x half> %c)
- ret <8 x half> %fma
-}
-
define <4 x float> @fmuladd_contract_4xf32(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
; RELAXED-LABEL: fmuladd_contract_4xf32:
; RELAXED: .functype fmuladd_contract_4xf32 (v128, v128, v128) -> (v128)
; RELAXED-NEXT: # %bb.0:
-; RELAXED-NEXT: f32x4.relaxed_madd $push0=, $0, $1, $2
+; RELAXED-NEXT: f32x4.relaxed_madd $push0=, $2, $0, $1
; RELAXED-NEXT: return $pop0
;
; STRICT-LABEL: fmuladd_contract_4xf32:
@@ -1028,40 +94,18 @@ define <4 x float> @fmuladd_contract_4xf32(<4 x float> %a, <4 x float> %b, <4 x
; STRICT-NEXT: f32x4.mul $push0=, $0, $1
; STRICT-NEXT: f32x4.add $push1=, $pop0, $2
; STRICT-NEXT: return $pop1
-;
-; NOFP16-LABEL: fmuladd_contract_4xf32:
-; NOFP16: .functype fmuladd_contract_4xf32 (v128, v128, v128) -> (v128)
-; NOFP16-NEXT: # %bb.0:
-; NOFP16-NEXT: f32x4.mul $push0=, $0, $1
-; NOFP16-NEXT: f32x4.add $push1=, $pop0, $2
-; NOFP16-NEXT: return $pop1
-;
-; NOSIMD-LABEL: fmuladd_contract_4xf32:
-; NOSIMD: .functype fmuladd_contract_4xf32 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
-; NOSIMD-NEXT: # %bb.0:
-; NOSIMD-NEXT: f32.mul $push0=, $4, $8
-; NOSIMD-NEXT: f32.add $push1=, $pop0, $12
-; NOSIMD-NEXT: f32.store 12($0), $pop1
-; NOSIMD-NEXT: f32.mul $push2=, $3, $7
-; NOSIMD-NEXT: f32.add $push3=, $pop2, $11
-; NOSIMD-NEXT: f32.store 8($0), $pop3
-; NOSIMD-NEXT: f32.mul $push4=, $2, $6
-; NOSIMD-NEXT: f32.add $push5=, $pop4, $10
-; NOSIMD-NEXT: f32.store 4($0), $pop5
-; NOSIMD-NEXT: f32.mul $push6=, $1, $5
-; NOSIMD-NEXT: f32.add $push7=, $pop6, $9
-; NOSIMD-NEXT: f32.store 0($0), $pop7
-; NOSIMD-NEXT: return
%fma = call contract <4 x float> @llvm.fmuladd(<4 x float> %a, <4 x float> %b, <4 x float> %c)
ret <4 x float> %fma
}
+; TODO: This should also have relaxed_madd in RELAXED case
define <4 x float> @fmuladd_4xf32(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
; RELAXED-LABEL: fmuladd_4xf32:
; RELAXED: .functype fmuladd_4xf32 (v128, v128, v128) -> (v128)
; RELAXED-NEXT: # %bb.0:
-; RELAXED-NEXT: f32x4.relaxed_madd $push0=, $0, $1, $2
-; RELAXED-NEXT: return $pop0
+; RELAXED-NEXT: f32x4.mul $push0=, $0, $1
+; RELAXED-NEXT: f32x4.add $push1=, $pop0, $2
+; RELAXED-NEXT: return $pop1
;
; STRICT-LABEL: fmuladd_4xf32:
; STRICT: .functype fmuladd_4xf32 (v128, v128, v128) -> (v128)
@@ -1069,170 +113,10 @@ define <4 x float> @fmuladd_4xf32(<4 x float> %a, <4 x float> %b, <4 x float> %c
; STRICT-NEXT: f32x4.mul $push0=, $0, $1
; STRICT-NEXT: f32x4.add $push1=, $pop0, $2
; STRICT-NEXT: return $pop1
-;
-; NOFP16-LABEL: fmuladd_4xf32:
-; NOFP16: .functype fmuladd_4xf32 (v128, v128, v128) -> (v128)
-; NOFP16-NEXT: # %bb.0:
-; NOFP16-NEXT: f32x4.mul $push0=, $0, $1
-; NOFP16-NEXT: f32x4.add $push1=, $pop0, $2
-; NOFP16-NEXT: return $pop1
-;
-; NOSIMD-LABEL: fmuladd_4xf32:
-; NOSIMD: .functype fmuladd_4xf32 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
-; NOSIMD-NEXT: # %bb.0:
-; NOSIMD-NEXT: f32.mul $push0=, $4, $8
-; NOSIMD-NEXT: f32.add $push1=, $pop0, $12
-; NOSIMD-NEXT: f32.store 12($0), $pop1
-; NOSIMD-NEXT: f32.mul $push2=, $3, $7
-; NOSIMD-NEXT: f32.add $push3=, $pop2, $11
-; NOSIMD-NEXT: f32.store 8($0), $pop3
-; NOSIMD-NEXT: f32.mul $push4=, $2, $6
-; NOSIMD-NEXT: f32.add $push5=, $pop4, $10
-; NOSIMD-NEXT: f32.store 4($0), $pop5
-; NOSIMD-NEXT: f32.mul $push6=, $1, $5
-; NOSIMD-NEXT: f32.add $push7=, $pop6, $9
-; NOSIMD-NEXT: f32.store 0($0), $pop7
-; NOSIMD-NEXT: return
%fma = call <4 x float> @llvm.fmuladd(<4 x float> %a, <4 x float> %b, <4 x float> %c)
ret <4 x float> %fma
}
-define <8 x float> @fmuladd_8xf32(<8 x float> %a, <8 x float> %b, <8 x float> %c) {
-; RELAXED-LABEL: fmuladd_8xf32:
-; RELAXED: .functype fmuladd_8xf32 (i32, v128, v128, v128, v128, v128, v128) -> ()
-; RELAXED-NEXT: # %bb.0:
-; RELAXED-NEXT: f32x4.mul $push0=, $2, $4
-; RELAXED-NEXT: f32x4.add $push1=, $pop0, $6
-; RELAXED-NEXT: v128.store 16($0), $pop1
-; RELAXED-NEXT: f32x4.mul $push2=, $1, $3
-; RELAXED-NEXT: f32x4.add $push3=, $pop2, $5
-; RELAXED-NEXT: v128.store 0($0), $pop3
-; RELAXED-NEXT: return
-;
-; STRICT-LABEL: fmuladd_8xf32:
-; STRICT: .functype fmuladd_8xf32 (i32, v128, v128, v128, v128, v128, v128) -> ()
-; STRICT-NEXT: # %bb.0:
-; STRICT-NEXT: f32x4.mul $push0=, $2, $4
-; STRICT-NEXT: f32x4.add $push1=, $pop0, $6
-; STRICT-NEXT: v128.store 16($0), $pop1
-; STRICT-NEXT: f32x4.mul $push2=, $1, $3
-; STRICT-NEXT: f32x4.add $push3=, $pop2, $5
-; STRICT-NEXT: v128.store 0($0), $pop3
-; STRICT-NEXT: return
-;
-; NOFP16-LABEL: fmuladd_8xf32:
-; NOFP16: .functype fmuladd_8xf32 (i32, v128, v128, v128, v128, v128, v128) -> ()
-; NOFP16-NEXT: # %bb.0:
-; NOFP16-NEXT: f32x4.mul $push0=, $2, $4
-; NOFP16-NEXT: f32x4.add $push1=, $pop0, $6
-; NOFP16-NEXT: v128.store 16($0), $pop1
-; NOFP16-NEXT: f32x4.mul $push2=, $1, $3
-; NOFP16-NEXT: f32x4.add $push3=, $pop2, $5
-; NOFP16-NEXT: v128.store 0($0), $pop3
-; NOFP16-NEXT: return
-;
-; NOSIMD-LABEL: fmuladd_8xf32:
-; NOSIMD: .functype fmuladd_8xf32 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
-; NOSIMD-NEXT: # %bb.0:
-; NOSIMD-NEXT: f32.mul $push0=, $8, $16
-; NOSIMD-NEXT: f32.add $push1=, $pop0, $24
-; NOSIMD-NEXT: f32.store 28($0), $pop1
-; NOSIMD-NEXT: f32.mul $push2=, $7, $15
-; NOSIMD-NEXT: f32.add $push3=, $pop2, $23
-; NOSIMD-NEXT: f32.store 24($0), $pop3
-; NOSIMD-NEXT: f32.mul $push4=, $6, $14
-; NOSIMD-NEXT: f32.add $push5=, $pop4, $22
-; NOSIMD-NEXT: f32.store 20($0), $pop5
-; NOSIMD-NEXT: f32.mul $push6=, $5, $13
-; NOSIMD-NEXT: f32.add $push7=, $pop6, $21
-; NOSIMD-NEXT: f32.store 16($0), $pop7
-; NOSIMD-NEXT: f32.mul $push8=, $4, $12
-; NOSIMD-NEXT: f32.add $push9=, $pop8, $20
-; NOSIMD-NEXT: f32.store 12($0), $pop9
-; NOSIMD-NEXT: f32.mul $push10=, $3, $11
-; NOSIMD-NEXT: f32.add $push11=, $pop10, $19
-; NOSIMD-NEXT: f32.store 8($0), $pop11
-; NOSIMD-NEXT: f32.mul $push12=, $2, $10
-; NOSIMD-NEXT: f32.add $push13=, $pop12, $18
-; NOSIMD-NEXT: f32.store 4($0), $pop13
-; NOSIMD-NEXT: f32.mul $push14=, $1, $9
-; NOSIMD-NEXT: f32.add $push15=, $pop14, $17
-; NOSIMD-NEXT: f32.store 0($0), $pop15
-; NOSIMD-NEXT: return
- %fma = call <8 x float> @llvm.fmuladd(<8 x float> %a, <8 x float> %b, <8 x float> %c)
- ret <8 x float> %fma
-}
-
-define <2 x double> @fmuladd_contract_2xf64(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
-; RELAXED-LABEL: fmuladd_contract_2xf64:
-; RELAXED: .functype fmuladd_contract_2xf64 (v128, v128, v128) -> (v128)
-; RELAXED-NEXT: # %bb.0:
-; RELAXED-NEXT: f64x2.relaxed_madd $push0=, $0, $1, $2
-; RELAXED-NEXT: return $pop0
-;
-; STRICT-LABEL: fmuladd_contract_2xf64:
-; STRICT: .functype fmuladd_contract_2xf64 (v128, v128, v128) -> (v128)
-; STRICT-NEXT: # %bb.0:
-; STRICT-NEXT: f64x2.mul $push0=, $0, $1
-; STRICT-NEXT: f64x2.add $push1=, $pop0, $2
-; STRICT-NEXT: return $pop1
-;
-; NOFP16-LABEL: fmuladd_contract_2xf64:
-; NOFP16: .functype fmuladd_contract_2xf64 (v128, v128, v128) -> (v128)
-; NOFP16-NEXT: # %bb.0:
-; NOFP16-NEXT: f64x2.mul $push0=, $0, $1
-; NOFP16-NEXT: f64x2.add $push1=, $pop0, $2
-; NOFP16-NEXT: return $pop1
-;
-; NOSIMD-LABEL: fmuladd_contract_2xf64:
-; NOSIMD: .functype fmuladd_contract_2xf64 (i32, f64, f64, f64, f64, f64, f64) -> ()
-; NOSIMD-NEXT: # %bb.0:
-; NOSIMD-NEXT: f64.mul $push0=, $2, $4
-; NOSIMD-NEXT: f64.add $push1=, $pop0, $6
-; NOSIMD-NEXT: f64.store 8($0), $pop1
-; NOSIMD-NEXT: f64.mul $push2=, $1, $3
-; NOSIMD-NEXT: f64.add $push3=, $pop2, $5
-; NOSIMD-NEXT: f64.store 0($0), $pop3
-; NOSIMD-NEXT: return
- %fma = call contract <2 x double> @llvm.fmuladd(<2 x double> %a, <2 x double> %b, <2 x double> %c)
- ret <2 x double> %fma
-}
-
-define <2 x double> @fmuladd_2xf64(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
-; RELAXED-LABEL: fmuladd_2xf64:
-; RELAXED: .functype fmuladd_2xf64 (v128, v128, v128) -> (v128)
-; RELAXED-NEXT: # %bb.0:
-; RELAXED-NEXT: f64x2.relaxed_madd $push0=, $0, $1, $2
-; RELAXED-NEXT: return $pop0
-;
-; STRICT-LABEL: fmuladd_2xf64:
-; STRICT: .functype fmuladd_2xf64 (v128, v128, v128) -> (v128)
-; STRICT-NEXT: # %bb.0:
-; STRICT-NEXT: f64x2.mul $push0=, $0, $1
-; STRICT-NEXT: f64x2.add $push1=, $pop0, $2
-; STRICT-NEXT: return $pop1
-;
-; NOFP16-LABEL: fmuladd_2xf64:
-; NOFP16: .functype fmuladd_2xf64 (v128, v128, v128) -> (v128)
-; NOFP16-NEXT: # %bb.0:
-; NOFP16-NEXT: f64x2.mul $push0=, $0, $1
-; NOFP16-NEXT: f64x2.add $push1=, $pop0, $2
-; NOFP16-NEXT: return $pop1
-;
-; NOSIMD-LABEL: fmuladd_2xf64:
-; NOSIMD: .functype fmuladd_2xf64 (i32, f64, f64, f64, f64, f64, f64) -> ()
-; NOSIMD-NEXT: # %bb.0:
-; NOSIMD-NEXT: f64.mul $push0=, $2, $4
-; NOSIMD-NEXT: f64.add $push1=, $pop0, $6
-; NOSIMD-NEXT: f64.store 8($0), $pop1
-; NOSIMD-NEXT: f64.mul $push2=, $1, $3
-; NOSIMD-NEXT: f64.add $push3=, $pop2, $5
-; NOSIMD-NEXT: f64.store 0($0), $pop3
-; NOSIMD-NEXT: return
- %fma = call <2 x double> @llvm.fmuladd(<2 x double> %a, <2 x double> %b, <2 x double> %c)
- ret <2 x double> %fma
-}
-
define <4 x float> @fma_4xf32(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
; RELAXED-LABEL: fma_4xf32:
; RELAXED: .functype fma_4xf32 (v128, v128, v128) -> (v128)
@@ -1283,44 +167,6 @@ define <4 x float> @fma_4xf32(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
; STRICT-NEXT: call $push18=, fmaf, $pop17, $pop16, $pop15
; STRICT-NEXT: f32x4.replace_lane $push19=, $pop14, 3, $pop18
; STRICT-NEXT: return $pop19
-;
-; NOFP16-LABEL: fma_4xf32:
-; NOFP16: .functype fma_4xf32 (v128, v128, v128) -> (v128)
-; NOFP16-NEXT: # %bb.0:
-; NOFP16-NEXT: f32x4.extract_lane $push2=, $0, 0
-; NOFP16-NEXT: f32x4.extract_lane $push1=, $1, 0
-; NOFP16-NEXT: f32x4.extract_lane $push0=, $2, 0
-; NOFP16-NEXT: call $push3=, fmaf, $pop2, $pop1, $pop0
-; NOFP16-NEXT: f32x4.splat $push4=, $pop3
-; NOFP16-NEXT: f32x4.extract_lane $push7=, $0, 1
-; NOFP16-NEXT: f32x4.extract_lane $push6=, $1, 1
-; NOFP16-NEXT: f32x4.extract_lane $push5=, $2, 1
-; NOFP16-NEXT: call $push8=, fmaf, $pop7, $pop6, $pop5
-; NOFP16-NEXT: f32x4.replace_lane $push9=, $pop4, 1, $pop8
-; NOFP16-NEXT: f32x4.extract_lane $push12=, $0, 2
-; NOFP16-NEXT: f32x4.extract_lane $push11=, $1, 2
-; NOFP16-NEXT: f32x4.extract_lane $push10=, $2, 2
-; NOFP16-NEXT: call $push13=, fmaf, $pop12, $pop11, $pop10
-; NOFP16-NEXT: f32x4.replace_lane $push14=, $pop9, 2, $pop13
-; NOFP16-NEXT: f32x4.extract_lane $push17=, $0, 3
-; NOFP16-NEXT: f32x4.extract_lane $push16=, $1, 3
-; NOFP16-NEXT: f32x4.extract_lane $push15=, $2, 3
-; NOFP16-NEXT: call $push18=, fmaf, $pop17, $pop16, $pop15
-; NOFP16-NEXT: f32x4.replace_lane $push19=, $pop14, 3, $pop18
-; NOFP16-NEXT: return $pop19
-;
-; NOSIMD-LABEL: fma_4xf32:
-; NOSIMD: .functype fma_4xf32 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
-; NOSIMD-NEXT: # %bb.0:
-; NOSIMD-NEXT: call $push0=, fmaf, $4, $8, $12
-; NOSIMD-NEXT: f32.store 12($0), $pop0
-; NOSIMD-NEXT: call $push1=, fmaf, $3, $7, $11
-; NOSIMD-NEXT: f32.store 8($0), $pop1
-; NOSIMD-NEXT: call $push2=, fmaf, $2, $6, $10
-; NOSIMD-NEXT: f32.store 4($0), $pop2
-; NOSIMD-NEXT: call $push3=, fmaf, $1, $5, $9
-; NOSIMD-NEXT: f32.store 0($0), $pop3
-; NOSIMD-NEXT: return
%fma = call <4 x float> @llvm.fma(<4 x float> %a, <4 x float> %b, <4 x float> %c)
ret <4 x float> %fma
}
@@ -1330,9 +176,9 @@ define <8 x float> @fadd_fmul_contract_8xf32(<8 x float> %a, <8 x float> %b, <8
; RELAXED-LABEL: fadd_fmul_contract_8xf32:
; RELAXED: .functype fadd_fmul_contract_8xf32 (i32, v128, v128, v128, v128, v128, v128) -> ()
; RELAXED-NEXT: # %bb.0:
-; RELAXED-NEXT: f32x4.relaxed_madd $push0=, $4, $2, $6
+; RELAXED-NEXT: f32x4.relaxed_madd $push0=, $6, $4, $2
; RELAXED-NEXT: v128.store 16($0), $pop0
-; RELAXED-NEXT: f32x4.relaxed_madd $push1=, $3, $1, $5
+; RELAXED-NEXT: f32x4.relaxed_madd $push1=, $5, $3, $1
; RELAXED-NEXT: v128.store 0($0), $pop1
; RELAXED-NEXT: return
;
@@ -1346,56 +192,17 @@ define <8 x float> @fadd_fmul_contract_8xf32(<8 x float> %a, <8 x float> %b, <8
; STRICT-NEXT: f32x4.add $push3=, $pop2, $5
; STRICT-NEXT: v128.store 0($0), $pop3
; STRICT-NEXT: return
-;
-; NOFP16-LABEL: fadd_fmul_contract_8xf32:
-; NOFP16: .functype fadd_fmul_contract_8xf32 (i32, v128, v128, v128, v128, v128, v128) -> ()
-; NOFP16-NEXT: # %bb.0:
-; NOFP16-NEXT: f32x4.mul $push0=, $4, $2
-; NOFP16-NEXT: f32x4.add $push1=, $pop0, $6
-; NOFP16-NEXT: v128.store 16($0), $pop1
-; NOFP16-NEXT: f32x4.mul $push2=, $3, $1
-; NOFP16-NEXT: f32x4.add $push3=, $pop2, $5
-; NOFP16-NEXT: v128.store 0($0), $pop3
-; NOFP16-NEXT: return
-;
-; NOSIMD-LABEL: fadd_fmul_contract_8xf32:
-; NOSIMD: .functype fadd_fmul_contract_8xf32 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
-; NOSIMD-NEXT: # %bb.0:
-; NOSIMD-NEXT: f32.mul $push0=, $16, $8
-; NOSIMD-NEXT: f32.add $push1=, $pop0, $24
-; NOSIMD-NEXT: f32.store 28($0), $pop1
-; NOSIMD-NEXT: f32.mul $push2=, $15, $7
-; NOSIMD-NEXT: f32.add $push3=, $pop2, $23
-; NOSIMD-NEXT: f32.store 24($0), $pop3
-; NOSIMD-NEXT: f32.mul $push4=, $14, $6
-; NOSIMD-NEXT: f32.add $push5=, $pop4, $22
-; NOSIMD-NEXT: f32.store 20($0), $pop5
-; NOSIMD-NEXT: f32.mul $push6=, $13, $5
-; NOSIMD-NEXT: f32.add $push7=, $pop6, $21
-; NOSIMD-NEXT: f32.store 16($0), $pop7
-; NOSIMD-NEXT: f32.mul $push8=, $12, $4
-; NOSIMD-NEXT: f32.add $push9=, $pop8, $20
-; NOSIMD-NEXT: f32.store 12($0), $pop9
-; NOSIMD-NEXT: f32.mul $push10=, $11, $3
-; NOSIMD-NEXT: f32.add $push11=, $pop10, $19
-; NOSIMD-NEXT: f32.store 8($0), $pop11
-; NOSIMD-NEXT: f32.mul $push12=, $10, $2
-; NOSIMD-NEXT: f32.add $push13=, $pop12, $18
-; NOSIMD-NEXT: f32.store 4($0), $pop13
-; NOSIMD-NEXT: f32.mul $push14=, $9, $1
-; NOSIMD-NEXT: f32.add $push15=, $pop14, $17
-; NOSIMD-NEXT: f32.store 0($0), $pop15
-; NOSIMD-NEXT: return
%mul = fmul contract <8 x float> %b, %a
%add = fadd contract <8 x float> %mul, %c
ret <8 x float> %add
}
+
define <2 x double> @fadd_fmul_contract_2xf64(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
; RELAXED-LABEL: fadd_fmul_contract_2xf64:
; RELAXED: .functype fadd_fmul_contract_2xf64 (v128, v128, v128) -> (v128)
; RELAXED-NEXT: # %bb.0:
-; RELAXED-NEXT: f64x2.relaxed_madd $push0=, $1, $0, $2
+; RELAXED-NEXT: f64x2.relaxed_madd $push0=, $2, $1, $0
; RELAXED-NEXT: return $pop0
;
; STRICT-LABEL: fadd_fmul_contract_2xf64:
@@ -1404,64 +211,28 @@ define <2 x double> @fadd_fmul_contract_2xf64(<2 x double> %a, <2 x double> %b,
; STRICT-NEXT: f64x2.mul $push0=, $1, $0
; STRICT-NEXT: f64x2.add $push1=, $pop0, $2
; STRICT-NEXT: return $pop1
-;
-; NOFP16-LABEL: fadd_fmul_contract_2xf64:
-; NOFP16: .functype fadd_fmul_contract_2xf64 (v128, v128, v128) -> (v128)
-; NOFP16-NEXT: # %bb.0:
-; NOFP16-NEXT: f64x2.mul $push0=, $1, $0
-; NOFP16-NEXT: f64x2.add $push1=, $pop0, $2
-; NOFP16-NEXT: return $pop1
-;
-; NOSIMD-LABEL: fadd_fmul_contract_2xf64:
-; NOSIMD: .functype fadd_fmul_contract_2xf64 (i32, f64, f64, f64, f64, f64, f64) -> ()
-; NOSIMD-NEXT: # %bb.0:
-; NOSIMD-NEXT: f64.mul $push0=, $4, $2
-; NOSIMD-NEXT: f64.add $push1=, $pop0, $6
-; NOSIMD-NEXT: f64.store 8($0), $pop1
-; NOSIMD-NEXT: f64.mul $push2=, $3, $1
-; NOSIMD-NEXT: f64.add $push3=, $pop2, $5
-; NOSIMD-NEXT: f64.store 0($0), $pop3
-; NOSIMD-NEXT: return
%mul = fmul contract <2 x double> %b, %a
%add = fadd contract <2 x double> %mul, %c
ret <2 x double> %add
}
-define <2 x double> @fadd_fmul_2xf64(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
-; RELAXED-LABEL: fadd_fmul_2xf64:
-; RELAXED: .functype fadd_fmul_2xf64 (v128, v128, v128) -> (v128)
+define float @fadd_fmul_contract_f32(float %a, float %b, float %c) {
+; RELAXED-LABEL: fadd_fmul_contract_f32:
+; RELAXED: .functype fadd_fmul_contract_f32 (f32, f32, f32) -> (f32)
; RELAXED-NEXT: # %bb.0:
-; RELAXED-NEXT: f64x2.mul $push0=, $1, $0
-; RELAXED-NEXT: f64x2.add $push1=, $pop0, $2
+; RELAXED-NEXT: f32.mul $push0=, $1, $0
+; RELAXED-NEXT: f32.add $push1=, $pop0, $2
; RELAXED-NEXT: return $pop1
;
-; STRICT-LABEL: fadd_fmul_2xf64:
-; STRICT: .functype fadd_fmul_2xf64 (v128, v128, v128) -> (v128)
+; STRICT-LABEL: fadd_fmul_contract_f32:
+; STRICT: .functype fadd_fmul_contract_f32 (f32, f32, f32) -> (f32)
; STRICT-NEXT: # %bb.0:
-; STRICT-NEXT: f64x2.mul $push0=, $1, $0
-; STRICT-NEXT: f64x2.add $push1=, $pop0, $2
+; STRICT-NEXT: f32.mul $push0=, $1, $0
+; STRICT-NEXT: f32.add $push1=, $pop0, $2
; STRICT-NEXT: return $pop1
-;
-; NOFP16-LABEL: fadd_fmul_2xf64:
-; NOFP16: .functype fadd_fmul_2xf64 (v128, v128, v128) -> (v128)
-; NOFP16-NEXT: # %bb.0:
-; NOFP16-NEXT: f64x2.mul $push0=, $1, $0
-; NOFP16-NEXT: f64x2.add $push1=, $pop0, $2
-; NOFP16-NEXT: return $pop1
-;
-; NOSIMD-LABEL: fadd_fmul_2xf64:
-; NOSIMD: .functype fadd_fmul_2xf64 (i32, f64, f64, f64, f64, f64, f64) -> ()
-; NOSIMD-NEXT: # %bb.0:
-; NOSIMD-NEXT: f64.mul $push0=, $4, $2
-; NOSIMD-NEXT: f64.add $push1=, $pop0, $6
-; NOSIMD-NEXT: f64.store 8($0), $pop1
-; NOSIMD-NEXT: f64.mul $push2=, $3, $1
-; NOSIMD-NEXT: f64.add $push3=, $pop2, $5
-; NOSIMD-NEXT: f64.store 0($0), $pop3
-; NOSIMD-NEXT: return
- %mul = fmul <2 x double> %b, %a
- %add = fadd <2 x double> %mul, %c
- ret <2 x double> %add
+ %mul = fmul contract float %b, %a
+ %add = fadd contract float %mul, %c
+ ret float %add
}
define float @fma_f32(float %a, float %b, float %c) {
@@ -1476,18 +247,6 @@ define float @fma_f32(float %a, float %b, float %c) {
; STRICT-NEXT: # %bb.0:
; STRICT-NEXT: call $push0=, fmaf, $0, $1, $2
; STRICT-NEXT: return $pop0
-;
-; NOFP16-LABEL: fma_f32:
-; NOFP16: .functype fma_f32 (f32, f32, f32) -> (f32)
-; NOFP16-NEXT: # %bb.0:
-; NOFP16-NEXT: call $push0=, fmaf, $0, $1, $2
-; NOFP16-NEXT: return $pop0
-;
-; NOSIMD-LABEL: fma_f32:
-; NOSIMD: .functype fma_f32 (f32, f32, f32) -> (f32)
-; NOSIMD-NEXT: # %bb.0:
-; NOSIMD-NEXT: call $push0=, fmaf, $0, $1, $2
-; NOSIMD-NEXT: return $pop0
%fma = call float @llvm.fma(float %a, float %b, float %c)
ret float %fma
}
@@ -1504,18 +263,6 @@ define double @fma_f64(double %a, double %b, double %c) {
; STRICT-NEXT: # %bb.0:
; STRICT-NEXT: call $push0=, fma, $0, $1, $2
; STRICT-NEXT: return $pop0
-;
-; NOFP16-LABEL: fma_f64:
-; NOFP16: .functype fma_f64 (f64, f64, f64) -> (f64)
-; NOFP16-NEXT: # %bb.0:
-; NOFP16-NEXT: call $push0=, fma, $0, $1, $2
-; NOFP16-NEXT: return $pop0
-;
-; NOSIMD-LABEL: fma_f64:
-; NOSIMD: .functype fma_f64 (f64, f64, f64) -> (f64)
-; NOSIMD-NEXT: # %bb.0:
-; NOSIMD-NEXT: call $push0=, fma, $0, $1, $2
-; NOSIMD-NEXT: return $pop0
%fma = call double @llvm.fma(double %a, double %b, double %c)
ret double %fma
}
diff --git a/llvm/test/CodeGen/WebAssembly/simd-relaxed-fnma.ll b/llvm/test/CodeGen/WebAssembly/simd-relaxed-fnma.ll
index b90c1dadd755f..6e2d860c3f152 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-relaxed-fnma.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-relaxed-fnma.ll
@@ -27,7 +27,7 @@ define <4 x float> @fsub_fmul_contract_4xf32(<4 x float> %a, <4 x float> %b, <4
; RELAXED-LABEL: fsub_fmul_contract_4xf32:
; RELAXED: .functype fsub_fmul_contract_4xf32 (v128, v128, v128) -> (v128)
; RELAXED-NEXT: # %bb.0:
-; RELAXED-NEXT: f32x4.relaxed_nmadd $push0=, $1, $0, $2
+; RELAXED-NEXT: f32x4.relaxed_nmadd $push0=, $2, $1, $0
; RELAXED-NEXT: return $pop0
;
; STRICT-LABEL: fsub_fmul_contract_4xf32:
@@ -46,14 +46,15 @@ define <8 x half> @fsub_fmul_contract_8xf16(<8 x half> %a, <8 x half> %b, <8 x h
; RELAXED-LABEL: fsub_fmul_contract_8xf16:
; RELAXED: .functype fsub_fmul_contract_8xf16 (v128, v128, v128) -> (v128)
; RELAXED-NEXT: # %bb.0:
-; RELAXED-NEXT: f16x8.nmadd $push0=, $1, $0, $2
+; RELAXED-NEXT: f16x8.relaxed_nmadd $push0=, $2, $1, $0
; RELAXED-NEXT: return $pop0
;
; STRICT-LABEL: fsub_fmul_contract_8xf16:
; STRICT: .functype fsub_fmul_contract_8xf16 (v128, v128, v128) -> (v128)
; STRICT-NEXT: # %bb.0:
-; STRICT-NEXT: f16x8.nmadd $push0=, $1, $0, $2
-; STRICT-NEXT: return $pop0
+; STRICT-NEXT: f16x8.mul $push0=, $1, $0
+; STRICT-NEXT: f16x8.sub $push1=, $2, $pop0
+; STRICT-NEXT: return $pop1
%mul = fmul contract <8 x half> %b, %a
%sub = fsub contract <8 x half> %c, %mul
ret <8 x half> %sub
@@ -83,9 +84,9 @@ define <8 x float> @fsub_fmul_contract_8xf32(<8 x float> %a, <8 x float> %b, <8
; RELAXED-LABEL: fsub_fmul_contract_8xf32:
; RELAXED: .functype fsub_fmul_contract_8xf32 (i32, v128, v128, v128, v128, v128, v128) -> ()
; RELAXED-NEXT: # %bb.0:
-; RELAXED-NEXT: f32x4.relaxed_nmadd $push0=, $4, $2, $6
+; RELAXED-NEXT: f32x4.relaxed_nmadd $push0=, $6, $4, $2
; RELAXED-NEXT: v128.store 16($0), $pop0
-; RELAXED-NEXT: f32x4.relaxed_nmadd $push1=, $3, $1, $5
+; RELAXED-NEXT: f32x4.relaxed_nmadd $push1=, $5, $3, $1
; RELAXED-NEXT: v128.store 0($0), $pop1
; RELAXED-NEXT: return
;
@@ -109,7 +110,7 @@ define <2 x double> @fsub_fmul_contract_2xf64(<2 x double> %a, <2 x double> %b,
; RELAXED-LABEL: fsub_fmul_contract_2xf64:
; RELAXED: .functype fsub_fmul_contract_2xf64 (v128, v128, v128) -> (v128)
; RELAXED-NEXT: # %bb.0:
-; RELAXED-NEXT: f64x2.relaxed_nmadd $push0=, $1, $0, $2
+; RELAXED-NEXT: f64x2.relaxed_nmadd $push0=, $2, $1, $0
; RELAXED-NEXT: return $pop0
;
; STRICT-LABEL: fsub_fmul_contract_2xf64:
@@ -142,55 +143,3 @@ define float @fsub_fmul_contract_f32(float %a, float %b, float %c) {
ret float %sub
}
-define <8 x half> @fmuladd_8xf16(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
-; RELAXED-LABEL: fmuladd_8xf16:
-; RELAXED: .functype fmuladd_8xf16 (v128, v128, v128) -> (v128)
-; RELAXED-NEXT: # %bb.0:
-; RELAXED-NEXT: f16x8.nmadd $push0=, $0, $1, $2
-; RELAXED-NEXT: return $pop0
-;
-; STRICT-LABEL: fmuladd_8xf16:
-; STRICT: .functype fmuladd_8xf16 (v128, v128, v128) -> (v128)
-; STRICT-NEXT: # %bb.0:
-; STRICT-NEXT: f16x8.nmadd $push0=, $0, $1, $2
-; STRICT-NEXT: return $pop0
- %fneg = fneg <8 x half> %a
- %fma = call <8 x half> @llvm.fmuladd(<8 x half> %fneg, <8 x half> %b, <8 x half> %c)
- ret <8 x half> %fma
-}
-
-define <4 x float> @fmuladd_4xf32(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
-; RELAXED-LABEL: fmuladd_4xf32:
-; RELAXED: .functype fmuladd_4xf32 (v128, v128, v128) -> (v128)
-; RELAXED-NEXT: # %bb.0:
-; RELAXED-NEXT: f32x4.relaxed_nmadd $push0=, $0, $1, $2
-; RELAXED-NEXT: return $pop0
-;
-; STRICT-LABEL: fmuladd_4xf32:
-; STRICT: .functype fmuladd_4xf32 (v128, v128, v128) -> (v128)
-; STRICT-NEXT: # %bb.0:
-; STRICT-NEXT: f32x4.mul $push0=, $0, $1
-; STRICT-NEXT: f32x4.sub $push1=, $2, $pop0
-; STRICT-NEXT: return $pop1
- %fneg = fneg <4 x float> %a
- %fma = call <4 x float> @llvm.fmuladd(<4 x float> %fneg, <4 x float> %b, <4 x float> %c)
- ret <4 x float> %fma
-}
-
-define <2 x double> @fmuladd_2xf64(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
-; RELAXED-LABEL: fmuladd_2xf64:
-; RELAXED: .functype fmuladd_2xf64 (v128, v128, v128) -> (v128)
-; RELAXED-NEXT: # %bb.0:
-; RELAXED-NEXT: f64x2.relaxed_nmadd $push0=, $0, $1, $2
-; RELAXED-NEXT: return $pop0
-;
-; STRICT-LABEL: fmuladd_2xf64:
-; STRICT: .functype fmuladd_2xf64 (v128, v128, v128) -> (v128)
-; STRICT-NEXT: # %bb.0:
-; STRICT-NEXT: f64x2.mul $push0=, $0, $1
-; STRICT-NEXT: f64x2.sub $push1=, $2, $pop0
-; STRICT-NEXT: return $pop1
- %fneg = fneg <2 x double> %a
- %fma = call <2 x double> @llvm.fmuladd(<2 x double> %fneg, <2 x double> %b, <2 x double> %c)
- ret <2 x double> %fma
-}
diff --git a/llvm/test/MC/WebAssembly/simd-encodings.s b/llvm/test/MC/WebAssembly/simd-encodings.s
index 57da338a9a95d..48aec4bc52a0c 100644
--- a/llvm/test/MC/WebAssembly/simd-encodings.s
+++ b/llvm/test/MC/WebAssembly/simd-encodings.s
@@ -917,11 +917,11 @@ main:
# CHECK: f16x8.nearest # encoding: [0xfd,0xb6,0x02]
f16x8.nearest
- # CHECK: f16x8.madd # encoding: [0xfd,0xce,0x02]
- f16x8.madd
+ # CHECK: f16x8.relaxed_madd # encoding: [0xfd,0xce,0x02]
+ f16x8.relaxed_madd
- # CHECK: f16x8.nmadd # encoding: [0xfd,0xcf,0x02]
- f16x8.nmadd
+ # CHECK: f16x8.relaxed_nmadd # encoding: [0xfd,0xcf,0x02]
+ f16x8.relaxed_nmadd
# CHECK: i16x8.trunc_sat_f16x8_s # encoding: [0xfd,0xc5,0x02]
i16x8.trunc_sat_f16x8_s
More information about the llvm-commits
mailing list