[llvm] 30d3441 - Revert "[WebAssembly] Lower fmuladd to madd and nmadd" (#163171)

via llvm-commits llvm-commits at lists.llvm.org
Mon Oct 13 03:53:45 PDT 2025


Author: Sam Parker
Date: 2025-10-13T11:53:40+01:00
New Revision: 30d3441cf082c3672cd7b8495cb8cc1d6ca2c8e0

URL: https://github.com/llvm/llvm-project/commit/30d3441cf082c3672cd7b8495cb8cc1d6ca2c8e0
DIFF: https://github.com/llvm/llvm-project/commit/30d3441cf082c3672cd7b8495cb8cc1d6ca2c8e0.diff

LOG: Revert "[WebAssembly] Lower fmuladd to madd and nmadd" (#163171)

Reverts llvm/llvm-project#161355

Looks like I've broken some intrinsic code generation.

Added: 
    

Modified: 
    llvm/include/llvm/CodeGen/ISDOpcodes.h
    llvm/include/llvm/Target/TargetSelectionDAG.td
    llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
    llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
    llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
    llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
    llvm/lib/CodeGen/TargetLoweringBase.cpp
    llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
    llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
    llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
    llvm/test/CodeGen/WebAssembly/simd-relaxed-fma.ll
    llvm/test/CodeGen/WebAssembly/simd-relaxed-fnma.ll
    llvm/test/MC/WebAssembly/simd-encodings.s

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h
index ff3dd0d4c3c51..c76c83d84b3c7 100644
--- a/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -514,12 +514,6 @@ enum NodeType {
   /// separately rounded operations.
   FMAD,
 
-  /// FMULADD - Performs a * b + c, with, or without, intermediate rounding.
-  /// It is expected that this will be illegal for most targets, as it usually
-  /// makes sense to split this or use an FMA. But some targets, such as
-  /// WebAssembly, can directly support these semantics.
-  FMULADD,
-
   /// FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.  NOTE: This
   /// DAG node does not require that X and Y have the same type, just that
   /// they are both floating point.  X and the result must have the same type.

diff  --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td
index 07a858fd682fc..632be7ad9e350 100644
--- a/llvm/include/llvm/Target/TargetSelectionDAG.td
+++ b/llvm/include/llvm/Target/TargetSelectionDAG.td
@@ -535,7 +535,6 @@ def fdiv       : SDNode<"ISD::FDIV"       , SDTFPBinOp>;
 def frem       : SDNode<"ISD::FREM"       , SDTFPBinOp>;
 def fma        : SDNode<"ISD::FMA"        , SDTFPTernaryOp, [SDNPCommutative]>;
 def fmad       : SDNode<"ISD::FMAD"       , SDTFPTernaryOp, [SDNPCommutative]>;
-def fmuladd    : SDNode<"ISD::FMULADD"    , SDTFPTernaryOp, [SDNPCommutative]>;
 def fabs       : SDNode<"ISD::FABS"       , SDTFPUnaryOp>;
 def fminnum    : SDNode<"ISD::FMINNUM"    , SDTFPBinOp,
                                   [SDNPCommutative, SDNPAssociative]>;

diff  --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index e15384202f758..b1accdd066dfd 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -509,7 +509,6 @@ namespace {
     SDValue visitFMUL(SDNode *N);
     template <class MatchContextClass> SDValue visitFMA(SDNode *N);
     SDValue visitFMAD(SDNode *N);
-    SDValue visitFMULADD(SDNode *N);
     SDValue visitFDIV(SDNode *N);
     SDValue visitFREM(SDNode *N);
     SDValue visitFSQRT(SDNode *N);
@@ -1992,7 +1991,6 @@ SDValue DAGCombiner::visit(SDNode *N) {
   case ISD::FMUL:               return visitFMUL(N);
   case ISD::FMA:                return visitFMA<EmptyMatchContext>(N);
   case ISD::FMAD:               return visitFMAD(N);
-  case ISD::FMULADD:            return visitFMULADD(N);
   case ISD::FDIV:               return visitFDIV(N);
   case ISD::FREM:               return visitFREM(N);
   case ISD::FSQRT:              return visitFSQRT(N);
@@ -18446,21 +18444,6 @@ SDValue DAGCombiner::visitFMAD(SDNode *N) {
   return SDValue();
 }
 
-SDValue DAGCombiner::visitFMULADD(SDNode *N) {
-  SDValue N0 = N->getOperand(0);
-  SDValue N1 = N->getOperand(1);
-  SDValue N2 = N->getOperand(2);
-  EVT VT = N->getValueType(0);
-  SDLoc DL(N);
-
-  // Constant fold FMULADD.
-  if (SDValue C =
-          DAG.FoldConstantArithmetic(ISD::FMULADD, DL, VT, {N0, N1, N2}))
-    return C;
-
-  return SDValue();
-}
-
 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
 // reciprocal.
 // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)

diff  --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index c1fd052d01f31..08af74c258899 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5786,7 +5786,6 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
   case ISD::FCOPYSIGN:
   case ISD::FMA:
   case ISD::FMAD:
-  case ISD::FMULADD:
   case ISD::FP_EXTEND:
   case ISD::FP_TO_SINT_SAT:
   case ISD::FP_TO_UINT_SAT:
@@ -5905,7 +5904,6 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, const APInt &DemandedElts,
   case ISD::FCOSH:
   case ISD::FTANH:
   case ISD::FMA:
-  case ISD::FMULADD:
   case ISD::FMAD: {
     if (SNaN)
       return true;
@@ -7233,7 +7231,7 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
   }
 
   // Handle fma/fmad special cases.
-  if (Opcode == ISD::FMA || Opcode == ISD::FMAD || Opcode == ISD::FMULADD) {
+  if (Opcode == ISD::FMA || Opcode == ISD::FMAD) {
     assert(VT.isFloatingPoint() && "This operator only applies to FP types!");
     assert(Ops[0].getValueType() == VT && Ops[1].getValueType() == VT &&
            Ops[2].getValueType() == VT && "FMA types must match!");
@@ -7244,7 +7242,7 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
       APFloat V1 = C1->getValueAPF();
       const APFloat &V2 = C2->getValueAPF();
       const APFloat &V3 = C3->getValueAPF();
-      if (Opcode == ISD::FMAD || Opcode == ISD::FMULADD) {
+      if (Opcode == ISD::FMAD) {
         V1.multiply(V2, APFloat::rmNearestTiesToEven);
         V1.add(V3, APFloat::rmNearestTiesToEven);
       } else

diff  --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 0f2b5188fc10a..c21890a0d856f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -6996,13 +6996,6 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
                                getValue(I.getArgOperand(0)),
                                getValue(I.getArgOperand(1)),
                                getValue(I.getArgOperand(2)), Flags));
-    } else if (TLI.isOperationLegalOrCustom(ISD::FMULADD, VT)) {
-      // TODO: Support splitting the vector.
-      setValue(&I, DAG.getNode(ISD::FMULADD, sdl,
-                               getValue(I.getArgOperand(0)).getValueType(),
-                               getValue(I.getArgOperand(0)),
-                               getValue(I.getArgOperand(1)),
-                               getValue(I.getArgOperand(2)), Flags));
     } else {
       // TODO: Intrinsic calls should have fast-math-flags.
       SDValue Mul = DAG.getNode(

diff  --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 39cbfad6d0be1..fcfbfe6c461d3 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -310,7 +310,6 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::FMA:                        return "fma";
   case ISD::STRICT_FMA:                 return "strict_fma";
   case ISD::FMAD:                       return "fmad";
-  case ISD::FMULADD:                    return "fmuladd";
   case ISD::FREM:                       return "frem";
   case ISD::STRICT_FREM:                return "strict_frem";
   case ISD::FCOPYSIGN:                  return "fcopysign";

diff  --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 920dff935daed..cc503d324e74b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -7676,7 +7676,6 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
     break;
   }
   case ISD::FMA:
-  case ISD::FMULADD:
   case ISD::FMAD: {
     if (!Flags.hasNoSignedZeros())
       break;

diff  --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 060b1ddc2ef39..c23281a820b2b 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -815,8 +815,7 @@ void TargetLoweringBase::initActions() {
                         ISD::FTAN,           ISD::FACOS,
                         ISD::FASIN,          ISD::FATAN,
                         ISD::FCOSH,          ISD::FSINH,
-                        ISD::FTANH,          ISD::FATAN2,
-                        ISD::FMULADD},
+                        ISD::FTANH,          ISD::FATAN2},
                        VT, Expand);
 
     // Overflow operations default to expand

diff  --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 47c24fc27f1d6..64723340051b8 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -317,15 +317,6 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
       setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, T, Custom);
     }
 
-    if (Subtarget->hasFP16()) {
-      setOperationAction(ISD::FMA, MVT::v8f16, Legal);
-    }
-
-    if (Subtarget->hasRelaxedSIMD()) {
-      setOperationAction(ISD::FMULADD, MVT::v4f32, Legal);
-      setOperationAction(ISD::FMULADD, MVT::v2f64, Legal);
-    }
-
     // Partial MLA reductions.
     for (auto Op : {ISD::PARTIAL_REDUCE_SMLA, ISD::PARTIAL_REDUCE_UMLA}) {
       setPartialReduceMLAAction(Op, MVT::v4i32, MVT::v16i8, Legal);
@@ -1129,18 +1120,6 @@ WebAssemblyTargetLowering::getPreferredVectorAction(MVT VT) const {
   return TargetLoweringBase::getPreferredVectorAction(VT);
 }
 
-bool WebAssemblyTargetLowering::isFMAFasterThanFMulAndFAdd(
-    const MachineFunction &MF, EVT VT) const {
-  if (!Subtarget->hasFP16() || !VT.isVector())
-    return false;
-
-  EVT ScalarVT = VT.getScalarType();
-  if (!ScalarVT.isSimple())
-    return false;
-
-  return ScalarVT.getSimpleVT().SimpleTy == MVT::f16;
-}
-
 bool WebAssemblyTargetLowering::shouldSimplifyDemandedVectorElts(
     SDValue Op, const TargetLoweringOpt &TLO) const {
   // ISel process runs DAGCombiner after legalization; this step is called

diff  --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
index 472ec678534a4..b33a8530310be 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
@@ -81,8 +81,6 @@ class WebAssemblyTargetLowering final : public TargetLowering {
 
   TargetLoweringBase::LegalizeTypeAction
   getPreferredVectorAction(MVT VT) const override;
-  bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
-                                  EVT VT) const override;
 
   SDValue LowerCall(CallLoweringInfo &CLI,
                     SmallVectorImpl<SDValue> &InVals) const override;

diff  --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
index 72835b3c6424e..49af78bce68c3 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -1626,8 +1626,7 @@ defm "" : RelaxedConvert<I32x4, F64x2, int_wasm_relaxed_trunc_unsigned_zero,
 // Relaxed (Negative) Multiply-Add  (madd/nmadd)
 //===----------------------------------------------------------------------===//
 
-multiclass RELAXED_SIMDMADD<Vec vec, bits<32> simdopA, bits<32> simdopS,
-                            list<Predicate> reqs> {
+multiclass SIMDMADD<Vec vec, bits<32> simdopA, bits<32> simdopS, list<Predicate> reqs> {
   defm MADD_#vec :
     SIMD_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), (outs), (ins),
            [(set (vec.vt V128:$dst), (int_wasm_relaxed_madd
@@ -1641,40 +1640,16 @@ multiclass RELAXED_SIMDMADD<Vec vec, bits<32> simdopA, bits<32> simdopS,
            vec.prefix#".relaxed_nmadd\t$dst, $a, $b, $c",
            vec.prefix#".relaxed_nmadd", simdopS, reqs>;
 
-  def : Pat<(fadd_contract (fmul_contract (vec.vt V128:$a), (vec.vt V128:$b)), (vec.vt V128:$c)),
-            (!cast<Instruction>("MADD_"#vec) V128:$a, V128:$b, V128:$c)>, Requires<reqs>;
-  def : Pat<(fmuladd (vec.vt V128:$a), (vec.vt V128:$b), (vec.vt V128:$c)),
-             (!cast<Instruction>("MADD_"#vec) V128:$a, V128:$b, V128:$c)>, Requires<reqs>;
+  def : Pat<(fadd_contract (vec.vt V128:$a), (fmul_contract (vec.vt V128:$b), (vec.vt V128:$c))),
+             (!cast<Instruction>("MADD_"#vec) V128:$a, V128:$b, V128:$c)>, Requires<[HasRelaxedSIMD]>;
 
-  def : Pat<(fsub_contract (vec.vt V128:$c), (fmul_contract (vec.vt V128:$a), (vec.vt V128:$b))),
-            (!cast<Instruction>("NMADD_"#vec) V128:$a, V128:$b, V128:$c)>, Requires<reqs>;
-  def : Pat<(fmuladd (fneg (vec.vt V128:$a)), (vec.vt V128:$b), (vec.vt V128:$c)),
-             (!cast<Instruction>("NMADD_"#vec) V128:$a, V128:$b, V128:$c)>, Requires<reqs>;
+  def : Pat<(fsub_contract (vec.vt V128:$a), (fmul_contract (vec.vt V128:$b), (vec.vt V128:$c))),
+             (!cast<Instruction>("NMADD_"#vec) V128:$a, V128:$b, V128:$c)>, Requires<[HasRelaxedSIMD]>;
 }
 
-defm "" : RELAXED_SIMDMADD<F32x4, 0x105, 0x106, [HasRelaxedSIMD]>;
-defm "" : RELAXED_SIMDMADD<F64x2, 0x107, 0x108, [HasRelaxedSIMD]>;
-
-//===----------------------------------------------------------------------===//
-// FP16 (Negative) Multiply-Add  (madd/nmadd)
-//===----------------------------------------------------------------------===//
-
-multiclass HALF_PRECISION_SIMDMADD<Vec vec, bits<32> simdopA, bits<32> simdopS,
-                                   list<Predicate> reqs> {
-  defm MADD_#vec :
-    SIMD_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), (outs), (ins),
-           [(set (vec.vt V128:$dst), (fma
-             (vec.vt V128:$a), (vec.vt V128:$b), (vec.vt V128:$c)))],
-           vec.prefix#".madd\t$dst, $a, $b, $c",
-           vec.prefix#".madd", simdopA, reqs>;
-  defm NMADD_#vec :
-    SIMD_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), (outs), (ins),
-           [(set (vec.vt V128:$dst), (fma
-             (fneg (vec.vt V128:$a)), (vec.vt V128:$b), (vec.vt V128:$c)))],
-           vec.prefix#".nmadd\t$dst, $a, $b, $c",
-           vec.prefix#".nmadd", simdopS, reqs>;
-}
-defm "" : HALF_PRECISION_SIMDMADD<F16x8, 0x14e, 0x14f, [HasFP16]>;
+defm "" : SIMDMADD<F32x4, 0x105, 0x106, [HasRelaxedSIMD]>;
+defm "" : SIMDMADD<F64x2, 0x107, 0x108, [HasRelaxedSIMD]>;
+defm "" : SIMDMADD<F16x8, 0x14e, 0x14f, [HasFP16]>;
 
 //===----------------------------------------------------------------------===//
 // Laneselect

diff  --git a/llvm/test/CodeGen/WebAssembly/simd-relaxed-fma.ll b/llvm/test/CodeGen/WebAssembly/simd-relaxed-fma.ll
index 600241aef99d0..e065de38951b1 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-relaxed-fma.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-relaxed-fma.ll
@@ -2,278 +2,9 @@
 
 ; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers  -mattr=+fp16,+simd128,+relaxed-simd | FileCheck %s --check-prefix=RELAXED
 ; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers  -mattr=+fp16,+simd128,              | FileCheck %s --check-prefix=STRICT
-; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers  -mattr=+simd128                     | FileCheck %s --check-prefix=NOFP16
-; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers                                      | FileCheck %s --check-prefix=NOSIMD
 
 target triple = "wasm32"
 
-define half @fadd_fmul_contract_f16(half %a, half %b, half %c) {
-; RELAXED-LABEL: fadd_fmul_contract_f16:
-; RELAXED:         .functype fadd_fmul_contract_f16 (f32, f32, f32) -> (f32)
-; RELAXED-NEXT:  # %bb.0:
-; RELAXED-NEXT:    call $push0=, __truncsfhf2, $0
-; RELAXED-NEXT:    call $push1=, __extendhfsf2, $pop0
-; RELAXED-NEXT:    call $push2=, __truncsfhf2, $1
-; RELAXED-NEXT:    call $push3=, __extendhfsf2, $pop2
-; RELAXED-NEXT:    f32.mul $push4=, $pop1, $pop3
-; RELAXED-NEXT:    call $push5=, __truncsfhf2, $2
-; RELAXED-NEXT:    call $push6=, __extendhfsf2, $pop5
-; RELAXED-NEXT:    f32.add $push7=, $pop4, $pop6
-; RELAXED-NEXT:    return $pop7
-;
-; STRICT-LABEL: fadd_fmul_contract_f16:
-; STRICT:         .functype fadd_fmul_contract_f16 (f32, f32, f32) -> (f32)
-; STRICT-NEXT:  # %bb.0:
-; STRICT-NEXT:    call $push0=, __truncsfhf2, $0
-; STRICT-NEXT:    call $push1=, __extendhfsf2, $pop0
-; STRICT-NEXT:    call $push2=, __truncsfhf2, $1
-; STRICT-NEXT:    call $push3=, __extendhfsf2, $pop2
-; STRICT-NEXT:    f32.mul $push4=, $pop1, $pop3
-; STRICT-NEXT:    call $push5=, __truncsfhf2, $2
-; STRICT-NEXT:    call $push6=, __extendhfsf2, $pop5
-; STRICT-NEXT:    f32.add $push7=, $pop4, $pop6
-; STRICT-NEXT:    return $pop7
-;
-; NOFP16-LABEL: fadd_fmul_contract_f16:
-; NOFP16:         .functype fadd_fmul_contract_f16 (f32, f32, f32) -> (f32)
-; NOFP16-NEXT:  # %bb.0:
-; NOFP16-NEXT:    call $push0=, __truncsfhf2, $0
-; NOFP16-NEXT:    call $push1=, __extendhfsf2, $pop0
-; NOFP16-NEXT:    call $push2=, __truncsfhf2, $1
-; NOFP16-NEXT:    call $push3=, __extendhfsf2, $pop2
-; NOFP16-NEXT:    f32.mul $push4=, $pop1, $pop3
-; NOFP16-NEXT:    call $push5=, __truncsfhf2, $2
-; NOFP16-NEXT:    call $push6=, __extendhfsf2, $pop5
-; NOFP16-NEXT:    f32.add $push7=, $pop4, $pop6
-; NOFP16-NEXT:    return $pop7
-;
-; NOSIMD-LABEL: fadd_fmul_contract_f16:
-; NOSIMD:         .functype fadd_fmul_contract_f16 (f32, f32, f32) -> (f32)
-; NOSIMD-NEXT:  # %bb.0:
-; NOSIMD-NEXT:    call $push0=, __truncsfhf2, $0
-; NOSIMD-NEXT:    call $push1=, __extendhfsf2, $pop0
-; NOSIMD-NEXT:    call $push2=, __truncsfhf2, $1
-; NOSIMD-NEXT:    call $push3=, __extendhfsf2, $pop2
-; NOSIMD-NEXT:    f32.mul $push4=, $pop1, $pop3
-; NOSIMD-NEXT:    call $push5=, __truncsfhf2, $2
-; NOSIMD-NEXT:    call $push6=, __extendhfsf2, $pop5
-; NOSIMD-NEXT:    f32.add $push7=, $pop4, $pop6
-; NOSIMD-NEXT:    return $pop7
-  %mul = fmul contract half %b, %a
-  %add = fadd contract half %mul, %c
-  ret half %add
-}
-
-define half @fmuladd_contract_f16(half %a, half %b, half %c) {
-; RELAXED-LABEL: fmuladd_contract_f16:
-; RELAXED:         .functype fmuladd_contract_f16 (f32, f32, f32) -> (f32)
-; RELAXED-NEXT:  # %bb.0:
-; RELAXED-NEXT:    call $push0=, __truncsfhf2, $1
-; RELAXED-NEXT:    call $push1=, __extendhfsf2, $pop0
-; RELAXED-NEXT:    call $push2=, __truncsfhf2, $0
-; RELAXED-NEXT:    call $push3=, __extendhfsf2, $pop2
-; RELAXED-NEXT:    f32.mul $push4=, $pop1, $pop3
-; RELAXED-NEXT:    call $push5=, __truncsfhf2, $2
-; RELAXED-NEXT:    call $push6=, __extendhfsf2, $pop5
-; RELAXED-NEXT:    f32.add $push7=, $pop4, $pop6
-; RELAXED-NEXT:    return $pop7
-;
-; STRICT-LABEL: fmuladd_contract_f16:
-; STRICT:         .functype fmuladd_contract_f16 (f32, f32, f32) -> (f32)
-; STRICT-NEXT:  # %bb.0:
-; STRICT-NEXT:    call $push0=, __truncsfhf2, $1
-; STRICT-NEXT:    call $push1=, __extendhfsf2, $pop0
-; STRICT-NEXT:    call $push2=, __truncsfhf2, $0
-; STRICT-NEXT:    call $push3=, __extendhfsf2, $pop2
-; STRICT-NEXT:    f32.mul $push4=, $pop1, $pop3
-; STRICT-NEXT:    call $push5=, __truncsfhf2, $2
-; STRICT-NEXT:    call $push6=, __extendhfsf2, $pop5
-; STRICT-NEXT:    f32.add $push7=, $pop4, $pop6
-; STRICT-NEXT:    return $pop7
-;
-; NOFP16-LABEL: fmuladd_contract_f16:
-; NOFP16:         .functype fmuladd_contract_f16 (f32, f32, f32) -> (f32)
-; NOFP16-NEXT:  # %bb.0:
-; NOFP16-NEXT:    call $push0=, __truncsfhf2, $1
-; NOFP16-NEXT:    call $push1=, __extendhfsf2, $pop0
-; NOFP16-NEXT:    call $push2=, __truncsfhf2, $0
-; NOFP16-NEXT:    call $push3=, __extendhfsf2, $pop2
-; NOFP16-NEXT:    f32.mul $push4=, $pop1, $pop3
-; NOFP16-NEXT:    call $push5=, __truncsfhf2, $2
-; NOFP16-NEXT:    call $push6=, __extendhfsf2, $pop5
-; NOFP16-NEXT:    f32.add $push7=, $pop4, $pop6
-; NOFP16-NEXT:    return $pop7
-;
-; NOSIMD-LABEL: fmuladd_contract_f16:
-; NOSIMD:         .functype fmuladd_contract_f16 (f32, f32, f32) -> (f32)
-; NOSIMD-NEXT:  # %bb.0:
-; NOSIMD-NEXT:    call $push0=, __truncsfhf2, $1
-; NOSIMD-NEXT:    call $push1=, __extendhfsf2, $pop0
-; NOSIMD-NEXT:    call $push2=, __truncsfhf2, $0
-; NOSIMD-NEXT:    call $push3=, __extendhfsf2, $pop2
-; NOSIMD-NEXT:    f32.mul $push4=, $pop1, $pop3
-; NOSIMD-NEXT:    call $push5=, __truncsfhf2, $2
-; NOSIMD-NEXT:    call $push6=, __extendhfsf2, $pop5
-; NOSIMD-NEXT:    f32.add $push7=, $pop4, $pop6
-; NOSIMD-NEXT:    return $pop7
-  %fma = call contract half @llvm.fmuladd(half %a, half %b, half %c)
-  ret half %fma
-}
-
-define half @fmuladd_f16(half %a, half %b, half %c) {
-; RELAXED-LABEL: fmuladd_f16:
-; RELAXED:         .functype fmuladd_f16 (f32, f32, f32) -> (f32)
-; RELAXED-NEXT:  # %bb.0:
-; RELAXED-NEXT:    call $push0=, __truncsfhf2, $1
-; RELAXED-NEXT:    call $push1=, __extendhfsf2, $pop0
-; RELAXED-NEXT:    call $push2=, __truncsfhf2, $0
-; RELAXED-NEXT:    call $push3=, __extendhfsf2, $pop2
-; RELAXED-NEXT:    f32.mul $push4=, $pop1, $pop3
-; RELAXED-NEXT:    call $push5=, __truncsfhf2, $2
-; RELAXED-NEXT:    call $push6=, __extendhfsf2, $pop5
-; RELAXED-NEXT:    f32.add $push7=, $pop4, $pop6
-; RELAXED-NEXT:    return $pop7
-;
-; STRICT-LABEL: fmuladd_f16:
-; STRICT:         .functype fmuladd_f16 (f32, f32, f32) -> (f32)
-; STRICT-NEXT:  # %bb.0:
-; STRICT-NEXT:    call $push0=, __truncsfhf2, $1
-; STRICT-NEXT:    call $push1=, __extendhfsf2, $pop0
-; STRICT-NEXT:    call $push2=, __truncsfhf2, $0
-; STRICT-NEXT:    call $push3=, __extendhfsf2, $pop2
-; STRICT-NEXT:    f32.mul $push4=, $pop1, $pop3
-; STRICT-NEXT:    call $push5=, __truncsfhf2, $2
-; STRICT-NEXT:    call $push6=, __extendhfsf2, $pop5
-; STRICT-NEXT:    f32.add $push7=, $pop4, $pop6
-; STRICT-NEXT:    return $pop7
-;
-; NOFP16-LABEL: fmuladd_f16:
-; NOFP16:         .functype fmuladd_f16 (f32, f32, f32) -> (f32)
-; NOFP16-NEXT:  # %bb.0:
-; NOFP16-NEXT:    call $push0=, __truncsfhf2, $1
-; NOFP16-NEXT:    call $push1=, __extendhfsf2, $pop0
-; NOFP16-NEXT:    call $push2=, __truncsfhf2, $0
-; NOFP16-NEXT:    call $push3=, __extendhfsf2, $pop2
-; NOFP16-NEXT:    f32.mul $push4=, $pop1, $pop3
-; NOFP16-NEXT:    call $push5=, __truncsfhf2, $2
-; NOFP16-NEXT:    call $push6=, __extendhfsf2, $pop5
-; NOFP16-NEXT:    f32.add $push7=, $pop4, $pop6
-; NOFP16-NEXT:    return $pop7
-;
-; NOSIMD-LABEL: fmuladd_f16:
-; NOSIMD:         .functype fmuladd_f16 (f32, f32, f32) -> (f32)
-; NOSIMD-NEXT:  # %bb.0:
-; NOSIMD-NEXT:    call $push0=, __truncsfhf2, $1
-; NOSIMD-NEXT:    call $push1=, __extendhfsf2, $pop0
-; NOSIMD-NEXT:    call $push2=, __truncsfhf2, $0
-; NOSIMD-NEXT:    call $push3=, __extendhfsf2, $pop2
-; NOSIMD-NEXT:    f32.mul $push4=, $pop1, $pop3
-; NOSIMD-NEXT:    call $push5=, __truncsfhf2, $2
-; NOSIMD-NEXT:    call $push6=, __extendhfsf2, $pop5
-; NOSIMD-NEXT:    f32.add $push7=, $pop4, $pop6
-; NOSIMD-NEXT:    return $pop7
-  %fma = call half @llvm.fmuladd(half %a, half %b, half %c)
-  ret half %fma
-}
-
-
-define float @fadd_fmul_contract_f32(float %a, float %b, float %c) {
-; RELAXED-LABEL: fadd_fmul_contract_f32:
-; RELAXED:         .functype fadd_fmul_contract_f32 (f32, f32, f32) -> (f32)
-; RELAXED-NEXT:  # %bb.0:
-; RELAXED-NEXT:    f32.mul $push0=, $1, $0
-; RELAXED-NEXT:    f32.add $push1=, $pop0, $2
-; RELAXED-NEXT:    return $pop1
-;
-; STRICT-LABEL: fadd_fmul_contract_f32:
-; STRICT:         .functype fadd_fmul_contract_f32 (f32, f32, f32) -> (f32)
-; STRICT-NEXT:  # %bb.0:
-; STRICT-NEXT:    f32.mul $push0=, $1, $0
-; STRICT-NEXT:    f32.add $push1=, $pop0, $2
-; STRICT-NEXT:    return $pop1
-;
-; NOFP16-LABEL: fadd_fmul_contract_f32:
-; NOFP16:         .functype fadd_fmul_contract_f32 (f32, f32, f32) -> (f32)
-; NOFP16-NEXT:  # %bb.0:
-; NOFP16-NEXT:    f32.mul $push0=, $1, $0
-; NOFP16-NEXT:    f32.add $push1=, $pop0, $2
-; NOFP16-NEXT:    return $pop1
-;
-; NOSIMD-LABEL: fadd_fmul_contract_f32:
-; NOSIMD:         .functype fadd_fmul_contract_f32 (f32, f32, f32) -> (f32)
-; NOSIMD-NEXT:  # %bb.0:
-; NOSIMD-NEXT:    f32.mul $push0=, $1, $0
-; NOSIMD-NEXT:    f32.add $push1=, $pop0, $2
-; NOSIMD-NEXT:    return $pop1
-  %mul = fmul contract float %b, %a
-  %add = fadd contract float %mul, %c
-  ret float %add
-}
-
-define float @fmuladd_contract_f32(float %a, float %b, float %c) {
-; RELAXED-LABEL: fmuladd_contract_f32:
-; RELAXED:         .functype fmuladd_contract_f32 (f32, f32, f32) -> (f32)
-; RELAXED-NEXT:  # %bb.0:
-; RELAXED-NEXT:    f32.mul $push0=, $0, $1
-; RELAXED-NEXT:    f32.add $push1=, $pop0, $2
-; RELAXED-NEXT:    return $pop1
-;
-; STRICT-LABEL: fmuladd_contract_f32:
-; STRICT:         .functype fmuladd_contract_f32 (f32, f32, f32) -> (f32)
-; STRICT-NEXT:  # %bb.0:
-; STRICT-NEXT:    f32.mul $push0=, $0, $1
-; STRICT-NEXT:    f32.add $push1=, $pop0, $2
-; STRICT-NEXT:    return $pop1
-;
-; NOFP16-LABEL: fmuladd_contract_f32:
-; NOFP16:         .functype fmuladd_contract_f32 (f32, f32, f32) -> (f32)
-; NOFP16-NEXT:  # %bb.0:
-; NOFP16-NEXT:    f32.mul $push0=, $0, $1
-; NOFP16-NEXT:    f32.add $push1=, $pop0, $2
-; NOFP16-NEXT:    return $pop1
-;
-; NOSIMD-LABEL: fmuladd_contract_f32:
-; NOSIMD:         .functype fmuladd_contract_f32 (f32, f32, f32) -> (f32)
-; NOSIMD-NEXT:  # %bb.0:
-; NOSIMD-NEXT:    f32.mul $push0=, $0, $1
-; NOSIMD-NEXT:    f32.add $push1=, $pop0, $2
-; NOSIMD-NEXT:    return $pop1
-  %fma = call contract float @llvm.fmuladd(float %a, float %b, float %c)
-  ret float %fma
-}
-
-define float @fmuladd_f32(float %a, float %b, float %c) {
-; RELAXED-LABEL: fmuladd_f32:
-; RELAXED:         .functype fmuladd_f32 (f32, f32, f32) -> (f32)
-; RELAXED-NEXT:  # %bb.0:
-; RELAXED-NEXT:    f32.mul $push0=, $0, $1
-; RELAXED-NEXT:    f32.add $push1=, $pop0, $2
-; RELAXED-NEXT:    return $pop1
-;
-; STRICT-LABEL: fmuladd_f32:
-; STRICT:         .functype fmuladd_f32 (f32, f32, f32) -> (f32)
-; STRICT-NEXT:  # %bb.0:
-; STRICT-NEXT:    f32.mul $push0=, $0, $1
-; STRICT-NEXT:    f32.add $push1=, $pop0, $2
-; STRICT-NEXT:    return $pop1
-;
-; NOFP16-LABEL: fmuladd_f32:
-; NOFP16:         .functype fmuladd_f32 (f32, f32, f32) -> (f32)
-; NOFP16-NEXT:  # %bb.0:
-; NOFP16-NEXT:    f32.mul $push0=, $0, $1
-; NOFP16-NEXT:    f32.add $push1=, $pop0, $2
-; NOFP16-NEXT:    return $pop1
-;
-; NOSIMD-LABEL: fmuladd_f32:
-; NOSIMD:         .functype fmuladd_f32 (f32, f32, f32) -> (f32)
-; NOSIMD-NEXT:  # %bb.0:
-; NOSIMD-NEXT:    f32.mul $push0=, $0, $1
-; NOSIMD-NEXT:    f32.add $push1=, $pop0, $2
-; NOSIMD-NEXT:    return $pop1
-  %fma = call float @llvm.fmuladd(float %a, float %b, float %c)
-  ret float %fma
-}
-
 define double @fadd_fmul_contract_f64(double %a, double %b, double %c) {
 ; RELAXED-LABEL: fadd_fmul_contract_f64:
 ; RELAXED:         .functype fadd_fmul_contract_f64 (f64, f64, f64) -> (f64)
@@ -288,94 +19,16 @@ define double @fadd_fmul_contract_f64(double %a, double %b, double %c) {
 ; STRICT-NEXT:    f64.mul $push0=, $1, $0
 ; STRICT-NEXT:    f64.add $push1=, $pop0, $2
 ; STRICT-NEXT:    return $pop1
-;
-; NOFP16-LABEL: fadd_fmul_contract_f64:
-; NOFP16:         .functype fadd_fmul_contract_f64 (f64, f64, f64) -> (f64)
-; NOFP16-NEXT:  # %bb.0:
-; NOFP16-NEXT:    f64.mul $push0=, $1, $0
-; NOFP16-NEXT:    f64.add $push1=, $pop0, $2
-; NOFP16-NEXT:    return $pop1
-;
-; NOSIMD-LABEL: fadd_fmul_contract_f64:
-; NOSIMD:         .functype fadd_fmul_contract_f64 (f64, f64, f64) -> (f64)
-; NOSIMD-NEXT:  # %bb.0:
-; NOSIMD-NEXT:    f64.mul $push0=, $1, $0
-; NOSIMD-NEXT:    f64.add $push1=, $pop0, $2
-; NOSIMD-NEXT:    return $pop1
   %mul = fmul contract double %b, %a
   %add = fadd contract double %mul, %c
   ret double %add
 }
 
-define double @fmuladd_f64(double %a, double %b, double %c) {
-; RELAXED-LABEL: fmuladd_f64:
-; RELAXED:         .functype fmuladd_f64 (f64, f64, f64) -> (f64)
-; RELAXED-NEXT:  # %bb.0:
-; RELAXED-NEXT:    f64.mul $push0=, $0, $1
-; RELAXED-NEXT:    f64.add $push1=, $pop0, $2
-; RELAXED-NEXT:    return $pop1
-;
-; STRICT-LABEL: fmuladd_f64:
-; STRICT:         .functype fmuladd_f64 (f64, f64, f64) -> (f64)
-; STRICT-NEXT:  # %bb.0:
-; STRICT-NEXT:    f64.mul $push0=, $0, $1
-; STRICT-NEXT:    f64.add $push1=, $pop0, $2
-; STRICT-NEXT:    return $pop1
-;
-; NOFP16-LABEL: fmuladd_f64:
-; NOFP16:         .functype fmuladd_f64 (f64, f64, f64) -> (f64)
-; NOFP16-NEXT:  # %bb.0:
-; NOFP16-NEXT:    f64.mul $push0=, $0, $1
-; NOFP16-NEXT:    f64.add $push1=, $pop0, $2
-; NOFP16-NEXT:    return $pop1
-;
-; NOSIMD-LABEL: fmuladd_f64:
-; NOSIMD:         .functype fmuladd_f64 (f64, f64, f64) -> (f64)
-; NOSIMD-NEXT:  # %bb.0:
-; NOSIMD-NEXT:    f64.mul $push0=, $0, $1
-; NOSIMD-NEXT:    f64.add $push1=, $pop0, $2
-; NOSIMD-NEXT:    return $pop1
-  %fma = call double @llvm.fmuladd(double %a, double %b, double %c)
-  ret double %fma
-}
-
-define double @fmuladd_contract_f64(double %a, double %b, double %c) {
-; RELAXED-LABEL: fmuladd_contract_f64:
-; RELAXED:         .functype fmuladd_contract_f64 (f64, f64, f64) -> (f64)
-; RELAXED-NEXT:  # %bb.0:
-; RELAXED-NEXT:    f64.mul $push0=, $0, $1
-; RELAXED-NEXT:    f64.add $push1=, $pop0, $2
-; RELAXED-NEXT:    return $pop1
-;
-; STRICT-LABEL: fmuladd_contract_f64:
-; STRICT:         .functype fmuladd_contract_f64 (f64, f64, f64) -> (f64)
-; STRICT-NEXT:  # %bb.0:
-; STRICT-NEXT:    f64.mul $push0=, $0, $1
-; STRICT-NEXT:    f64.add $push1=, $pop0, $2
-; STRICT-NEXT:    return $pop1
-;
-; NOFP16-LABEL: fmuladd_contract_f64:
-; NOFP16:         .functype fmuladd_contract_f64 (f64, f64, f64) -> (f64)
-; NOFP16-NEXT:  # %bb.0:
-; NOFP16-NEXT:    f64.mul $push0=, $0, $1
-; NOFP16-NEXT:    f64.add $push1=, $pop0, $2
-; NOFP16-NEXT:    return $pop1
-;
-; NOSIMD-LABEL: fmuladd_contract_f64:
-; NOSIMD:         .functype fmuladd_contract_f64 (f64, f64, f64) -> (f64)
-; NOSIMD-NEXT:  # %bb.0:
-; NOSIMD-NEXT:    f64.mul $push0=, $0, $1
-; NOSIMD-NEXT:    f64.add $push1=, $pop0, $2
-; NOSIMD-NEXT:    return $pop1
-  %fma = call contract double @llvm.fmuladd(double %a, double %b, double %c)
-  ret double %fma
-}
-
 define <4 x float> @fadd_fmul_contract_4xf32(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
 ; RELAXED-LABEL: fadd_fmul_contract_4xf32:
 ; RELAXED:         .functype fadd_fmul_contract_4xf32 (v128, v128, v128) -> (v128)
 ; RELAXED-NEXT:  # %bb.0:
-; RELAXED-NEXT:    f32x4.relaxed_madd $push0=, $1, $0, $2
+; RELAXED-NEXT:    f32x4.relaxed_madd $push0=, $2, $1, $0
 ; RELAXED-NEXT:    return $pop0
 ;
 ; STRICT-LABEL: fadd_fmul_contract_4xf32:
@@ -384,222 +37,31 @@ define <4 x float> @fadd_fmul_contract_4xf32(<4 x float> %a, <4 x float> %b, <4
 ; STRICT-NEXT:    f32x4.mul $push0=, $1, $0
 ; STRICT-NEXT:    f32x4.add $push1=, $pop0, $2
 ; STRICT-NEXT:    return $pop1
-;
-; NOFP16-LABEL: fadd_fmul_contract_4xf32:
-; NOFP16:         .functype fadd_fmul_contract_4xf32 (v128, v128, v128) -> (v128)
-; NOFP16-NEXT:  # %bb.0:
-; NOFP16-NEXT:    f32x4.mul $push0=, $1, $0
-; NOFP16-NEXT:    f32x4.add $push1=, $pop0, $2
-; NOFP16-NEXT:    return $pop1
-;
-; NOSIMD-LABEL: fadd_fmul_contract_4xf32:
-; NOSIMD:         .functype fadd_fmul_contract_4xf32 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
-; NOSIMD-NEXT:  # %bb.0:
-; NOSIMD-NEXT:    f32.mul $push0=, $8, $4
-; NOSIMD-NEXT:    f32.add $push1=, $pop0, $12
-; NOSIMD-NEXT:    f32.store 12($0), $pop1
-; NOSIMD-NEXT:    f32.mul $push2=, $7, $3
-; NOSIMD-NEXT:    f32.add $push3=, $pop2, $11
-; NOSIMD-NEXT:    f32.store 8($0), $pop3
-; NOSIMD-NEXT:    f32.mul $push4=, $6, $2
-; NOSIMD-NEXT:    f32.add $push5=, $pop4, $10
-; NOSIMD-NEXT:    f32.store 4($0), $pop5
-; NOSIMD-NEXT:    f32.mul $push6=, $5, $1
-; NOSIMD-NEXT:    f32.add $push7=, $pop6, $9
-; NOSIMD-NEXT:    f32.store 0($0), $pop7
-; NOSIMD-NEXT:    return
   %mul = fmul contract <4 x float> %b, %a
   %add = fadd contract <4 x float> %mul, %c
   ret <4 x float> %add
 }
 
+
 define <8 x half> @fadd_fmul_contract_8xf16(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
 ; RELAXED-LABEL: fadd_fmul_contract_8xf16:
 ; RELAXED:         .functype fadd_fmul_contract_8xf16 (v128, v128, v128) -> (v128)
 ; RELAXED-NEXT:  # %bb.0:
-; RELAXED-NEXT:    f16x8.madd $push0=, $1, $0, $2
+; RELAXED-NEXT:    f16x8.relaxed_madd $push0=, $2, $1, $0
 ; RELAXED-NEXT:    return $pop0
 ;
 ; STRICT-LABEL: fadd_fmul_contract_8xf16:
 ; STRICT:         .functype fadd_fmul_contract_8xf16 (v128, v128, v128) -> (v128)
 ; STRICT-NEXT:  # %bb.0:
-; STRICT-NEXT:    f16x8.madd $push0=, $1, $0, $2
-; STRICT-NEXT:    return $pop0
-;
-; NOFP16-LABEL: fadd_fmul_contract_8xf16:
-; NOFP16:         .functype fadd_fmul_contract_8xf16 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
-; NOFP16-NEXT:  # %bb.0:
-; NOFP16-NEXT:    call $push0=, __truncsfhf2, $8
-; NOFP16-NEXT:    call $push1=, __extendhfsf2, $pop0
-; NOFP16-NEXT:    call $push2=, __truncsfhf2, $16
-; NOFP16-NEXT:    call $push3=, __extendhfsf2, $pop2
-; NOFP16-NEXT:    f32.mul $push4=, $pop1, $pop3
-; NOFP16-NEXT:    call $push5=, __truncsfhf2, $24
-; NOFP16-NEXT:    call $push6=, __extendhfsf2, $pop5
-; NOFP16-NEXT:    f32.add $push7=, $pop4, $pop6
-; NOFP16-NEXT:    call $push8=, __truncsfhf2, $pop7
-; NOFP16-NEXT:    i32.store16 14($0), $pop8
-; NOFP16-NEXT:    call $push9=, __truncsfhf2, $7
-; NOFP16-NEXT:    call $push10=, __extendhfsf2, $pop9
-; NOFP16-NEXT:    call $push11=, __truncsfhf2, $15
-; NOFP16-NEXT:    call $push12=, __extendhfsf2, $pop11
-; NOFP16-NEXT:    f32.mul $push13=, $pop10, $pop12
-; NOFP16-NEXT:    call $push14=, __truncsfhf2, $23
-; NOFP16-NEXT:    call $push15=, __extendhfsf2, $pop14
-; NOFP16-NEXT:    f32.add $push16=, $pop13, $pop15
-; NOFP16-NEXT:    call $push17=, __truncsfhf2, $pop16
-; NOFP16-NEXT:    i32.store16 12($0), $pop17
-; NOFP16-NEXT:    call $push18=, __truncsfhf2, $6
-; NOFP16-NEXT:    call $push19=, __extendhfsf2, $pop18
-; NOFP16-NEXT:    call $push20=, __truncsfhf2, $14
-; NOFP16-NEXT:    call $push21=, __extendhfsf2, $pop20
-; NOFP16-NEXT:    f32.mul $push22=, $pop19, $pop21
-; NOFP16-NEXT:    call $push23=, __truncsfhf2, $22
-; NOFP16-NEXT:    call $push24=, __extendhfsf2, $pop23
-; NOFP16-NEXT:    f32.add $push25=, $pop22, $pop24
-; NOFP16-NEXT:    call $push26=, __truncsfhf2, $pop25
-; NOFP16-NEXT:    i32.store16 10($0), $pop26
-; NOFP16-NEXT:    call $push27=, __truncsfhf2, $5
-; NOFP16-NEXT:    call $push28=, __extendhfsf2, $pop27
-; NOFP16-NEXT:    call $push29=, __truncsfhf2, $13
-; NOFP16-NEXT:    call $push30=, __extendhfsf2, $pop29
-; NOFP16-NEXT:    f32.mul $push31=, $pop28, $pop30
-; NOFP16-NEXT:    call $push32=, __truncsfhf2, $21
-; NOFP16-NEXT:    call $push33=, __extendhfsf2, $pop32
-; NOFP16-NEXT:    f32.add $push34=, $pop31, $pop33
-; NOFP16-NEXT:    call $push35=, __truncsfhf2, $pop34
-; NOFP16-NEXT:    i32.store16 8($0), $pop35
-; NOFP16-NEXT:    call $push36=, __truncsfhf2, $4
-; NOFP16-NEXT:    call $push37=, __extendhfsf2, $pop36
-; NOFP16-NEXT:    call $push38=, __truncsfhf2, $12
-; NOFP16-NEXT:    call $push39=, __extendhfsf2, $pop38
-; NOFP16-NEXT:    f32.mul $push40=, $pop37, $pop39
-; NOFP16-NEXT:    call $push41=, __truncsfhf2, $20
-; NOFP16-NEXT:    call $push42=, __extendhfsf2, $pop41
-; NOFP16-NEXT:    f32.add $push43=, $pop40, $pop42
-; NOFP16-NEXT:    call $push44=, __truncsfhf2, $pop43
-; NOFP16-NEXT:    i32.store16 6($0), $pop44
-; NOFP16-NEXT:    call $push45=, __truncsfhf2, $3
-; NOFP16-NEXT:    call $push46=, __extendhfsf2, $pop45
-; NOFP16-NEXT:    call $push47=, __truncsfhf2, $11
-; NOFP16-NEXT:    call $push48=, __extendhfsf2, $pop47
-; NOFP16-NEXT:    f32.mul $push49=, $pop46, $pop48
-; NOFP16-NEXT:    call $push50=, __truncsfhf2, $19
-; NOFP16-NEXT:    call $push51=, __extendhfsf2, $pop50
-; NOFP16-NEXT:    f32.add $push52=, $pop49, $pop51
-; NOFP16-NEXT:    call $push53=, __truncsfhf2, $pop52
-; NOFP16-NEXT:    i32.store16 4($0), $pop53
-; NOFP16-NEXT:    call $push54=, __truncsfhf2, $2
-; NOFP16-NEXT:    call $push55=, __extendhfsf2, $pop54
-; NOFP16-NEXT:    call $push56=, __truncsfhf2, $10
-; NOFP16-NEXT:    call $push57=, __extendhfsf2, $pop56
-; NOFP16-NEXT:    f32.mul $push58=, $pop55, $pop57
-; NOFP16-NEXT:    call $push59=, __truncsfhf2, $18
-; NOFP16-NEXT:    call $push60=, __extendhfsf2, $pop59
-; NOFP16-NEXT:    f32.add $push61=, $pop58, $pop60
-; NOFP16-NEXT:    call $push62=, __truncsfhf2, $pop61
-; NOFP16-NEXT:    i32.store16 2($0), $pop62
-; NOFP16-NEXT:    call $push63=, __truncsfhf2, $1
-; NOFP16-NEXT:    call $push64=, __extendhfsf2, $pop63
-; NOFP16-NEXT:    call $push65=, __truncsfhf2, $9
-; NOFP16-NEXT:    call $push66=, __extendhfsf2, $pop65
-; NOFP16-NEXT:    f32.mul $push67=, $pop64, $pop66
-; NOFP16-NEXT:    call $push68=, __truncsfhf2, $17
-; NOFP16-NEXT:    call $push69=, __extendhfsf2, $pop68
-; NOFP16-NEXT:    f32.add $push70=, $pop67, $pop69
-; NOFP16-NEXT:    call $push71=, __truncsfhf2, $pop70
-; NOFP16-NEXT:    i32.store16 0($0), $pop71
-; NOFP16-NEXT:    return
-;
-; NOSIMD-LABEL: fadd_fmul_contract_8xf16:
-; NOSIMD:         .functype fadd_fmul_contract_8xf16 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
-; NOSIMD-NEXT:  # %bb.0:
-; NOSIMD-NEXT:    call $push0=, __truncsfhf2, $8
-; NOSIMD-NEXT:    call $push1=, __extendhfsf2, $pop0
-; NOSIMD-NEXT:    call $push2=, __truncsfhf2, $16
-; NOSIMD-NEXT:    call $push3=, __extendhfsf2, $pop2
-; NOSIMD-NEXT:    f32.mul $push4=, $pop1, $pop3
-; NOSIMD-NEXT:    call $push5=, __truncsfhf2, $24
-; NOSIMD-NEXT:    call $push6=, __extendhfsf2, $pop5
-; NOSIMD-NEXT:    f32.add $push7=, $pop4, $pop6
-; NOSIMD-NEXT:    call $push8=, __truncsfhf2, $pop7
-; NOSIMD-NEXT:    i32.store16 14($0), $pop8
-; NOSIMD-NEXT:    call $push9=, __truncsfhf2, $7
-; NOSIMD-NEXT:    call $push10=, __extendhfsf2, $pop9
-; NOSIMD-NEXT:    call $push11=, __truncsfhf2, $15
-; NOSIMD-NEXT:    call $push12=, __extendhfsf2, $pop11
-; NOSIMD-NEXT:    f32.mul $push13=, $pop10, $pop12
-; NOSIMD-NEXT:    call $push14=, __truncsfhf2, $23
-; NOSIMD-NEXT:    call $push15=, __extendhfsf2, $pop14
-; NOSIMD-NEXT:    f32.add $push16=, $pop13, $pop15
-; NOSIMD-NEXT:    call $push17=, __truncsfhf2, $pop16
-; NOSIMD-NEXT:    i32.store16 12($0), $pop17
-; NOSIMD-NEXT:    call $push18=, __truncsfhf2, $6
-; NOSIMD-NEXT:    call $push19=, __extendhfsf2, $pop18
-; NOSIMD-NEXT:    call $push20=, __truncsfhf2, $14
-; NOSIMD-NEXT:    call $push21=, __extendhfsf2, $pop20
-; NOSIMD-NEXT:    f32.mul $push22=, $pop19, $pop21
-; NOSIMD-NEXT:    call $push23=, __truncsfhf2, $22
-; NOSIMD-NEXT:    call $push24=, __extendhfsf2, $pop23
-; NOSIMD-NEXT:    f32.add $push25=, $pop22, $pop24
-; NOSIMD-NEXT:    call $push26=, __truncsfhf2, $pop25
-; NOSIMD-NEXT:    i32.store16 10($0), $pop26
-; NOSIMD-NEXT:    call $push27=, __truncsfhf2, $5
-; NOSIMD-NEXT:    call $push28=, __extendhfsf2, $pop27
-; NOSIMD-NEXT:    call $push29=, __truncsfhf2, $13
-; NOSIMD-NEXT:    call $push30=, __extendhfsf2, $pop29
-; NOSIMD-NEXT:    f32.mul $push31=, $pop28, $pop30
-; NOSIMD-NEXT:    call $push32=, __truncsfhf2, $21
-; NOSIMD-NEXT:    call $push33=, __extendhfsf2, $pop32
-; NOSIMD-NEXT:    f32.add $push34=, $pop31, $pop33
-; NOSIMD-NEXT:    call $push35=, __truncsfhf2, $pop34
-; NOSIMD-NEXT:    i32.store16 8($0), $pop35
-; NOSIMD-NEXT:    call $push36=, __truncsfhf2, $4
-; NOSIMD-NEXT:    call $push37=, __extendhfsf2, $pop36
-; NOSIMD-NEXT:    call $push38=, __truncsfhf2, $12
-; NOSIMD-NEXT:    call $push39=, __extendhfsf2, $pop38
-; NOSIMD-NEXT:    f32.mul $push40=, $pop37, $pop39
-; NOSIMD-NEXT:    call $push41=, __truncsfhf2, $20
-; NOSIMD-NEXT:    call $push42=, __extendhfsf2, $pop41
-; NOSIMD-NEXT:    f32.add $push43=, $pop40, $pop42
-; NOSIMD-NEXT:    call $push44=, __truncsfhf2, $pop43
-; NOSIMD-NEXT:    i32.store16 6($0), $pop44
-; NOSIMD-NEXT:    call $push45=, __truncsfhf2, $3
-; NOSIMD-NEXT:    call $push46=, __extendhfsf2, $pop45
-; NOSIMD-NEXT:    call $push47=, __truncsfhf2, $11
-; NOSIMD-NEXT:    call $push48=, __extendhfsf2, $pop47
-; NOSIMD-NEXT:    f32.mul $push49=, $pop46, $pop48
-; NOSIMD-NEXT:    call $push50=, __truncsfhf2, $19
-; NOSIMD-NEXT:    call $push51=, __extendhfsf2, $pop50
-; NOSIMD-NEXT:    f32.add $push52=, $pop49, $pop51
-; NOSIMD-NEXT:    call $push53=, __truncsfhf2, $pop52
-; NOSIMD-NEXT:    i32.store16 4($0), $pop53
-; NOSIMD-NEXT:    call $push54=, __truncsfhf2, $2
-; NOSIMD-NEXT:    call $push55=, __extendhfsf2, $pop54
-; NOSIMD-NEXT:    call $push56=, __truncsfhf2, $10
-; NOSIMD-NEXT:    call $push57=, __extendhfsf2, $pop56
-; NOSIMD-NEXT:    f32.mul $push58=, $pop55, $pop57
-; NOSIMD-NEXT:    call $push59=, __truncsfhf2, $18
-; NOSIMD-NEXT:    call $push60=, __extendhfsf2, $pop59
-; NOSIMD-NEXT:    f32.add $push61=, $pop58, $pop60
-; NOSIMD-NEXT:    call $push62=, __truncsfhf2, $pop61
-; NOSIMD-NEXT:    i32.store16 2($0), $pop62
-; NOSIMD-NEXT:    call $push63=, __truncsfhf2, $1
-; NOSIMD-NEXT:    call $push64=, __extendhfsf2, $pop63
-; NOSIMD-NEXT:    call $push65=, __truncsfhf2, $9
-; NOSIMD-NEXT:    call $push66=, __extendhfsf2, $pop65
-; NOSIMD-NEXT:    f32.mul $push67=, $pop64, $pop66
-; NOSIMD-NEXT:    call $push68=, __truncsfhf2, $17
-; NOSIMD-NEXT:    call $push69=, __extendhfsf2, $pop68
-; NOSIMD-NEXT:    f32.add $push70=, $pop67, $pop69
-; NOSIMD-NEXT:    call $push71=, __truncsfhf2, $pop70
-; NOSIMD-NEXT:    i32.store16 0($0), $pop71
-; NOSIMD-NEXT:    return
+; STRICT-NEXT:    f16x8.mul $push0=, $1, $0
+; STRICT-NEXT:    f16x8.add $push1=, $pop0, $2
+; STRICT-NEXT:    return $pop1
   %mul = fmul contract <8 x half> %b, %a
   %add = fadd contract <8 x half> %mul, %c
   ret <8 x half> %add
 }
 
+
 define <4 x float> @fadd_fmul_4xf32(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
 ; RELAXED-LABEL: fadd_fmul_4xf32:
 ; RELAXED:         .functype fadd_fmul_4xf32 (v128, v128, v128) -> (v128)
@@ -614,412 +76,16 @@ define <4 x float> @fadd_fmul_4xf32(<4 x float> %a, <4 x float> %b, <4 x float>
 ; STRICT-NEXT:    f32x4.mul $push0=, $1, $0
 ; STRICT-NEXT:    f32x4.add $push1=, $pop0, $2
 ; STRICT-NEXT:    return $pop1
-;
-; NOFP16-LABEL: fadd_fmul_4xf32:
-; NOFP16:         .functype fadd_fmul_4xf32 (v128, v128, v128) -> (v128)
-; NOFP16-NEXT:  # %bb.0:
-; NOFP16-NEXT:    f32x4.mul $push0=, $1, $0
-; NOFP16-NEXT:    f32x4.add $push1=, $pop0, $2
-; NOFP16-NEXT:    return $pop1
-;
-; NOSIMD-LABEL: fadd_fmul_4xf32:
-; NOSIMD:         .functype fadd_fmul_4xf32 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
-; NOSIMD-NEXT:  # %bb.0:
-; NOSIMD-NEXT:    f32.mul $push0=, $8, $4
-; NOSIMD-NEXT:    f32.add $push1=, $pop0, $12
-; NOSIMD-NEXT:    f32.store 12($0), $pop1
-; NOSIMD-NEXT:    f32.mul $push2=, $7, $3
-; NOSIMD-NEXT:    f32.add $push3=, $pop2, $11
-; NOSIMD-NEXT:    f32.store 8($0), $pop3
-; NOSIMD-NEXT:    f32.mul $push4=, $6, $2
-; NOSIMD-NEXT:    f32.add $push5=, $pop4, $10
-; NOSIMD-NEXT:    f32.store 4($0), $pop5
-; NOSIMD-NEXT:    f32.mul $push6=, $5, $1
-; NOSIMD-NEXT:    f32.add $push7=, $pop6, $9
-; NOSIMD-NEXT:    f32.store 0($0), $pop7
-; NOSIMD-NEXT:    return
   %mul = fmul <4 x float> %b, %a
   %add = fadd contract <4 x float> %mul, %c
   ret <4 x float> %add
 }
 
-define <8 x half> @fmuladd_contract_8xf16(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
-; RELAXED-LABEL: fmuladd_contract_8xf16:
-; RELAXED:         .functype fmuladd_contract_8xf16 (v128, v128, v128) -> (v128)
-; RELAXED-NEXT:  # %bb.0:
-; RELAXED-NEXT:    f16x8.madd $push0=, $0, $1, $2
-; RELAXED-NEXT:    return $pop0
-;
-; STRICT-LABEL: fmuladd_contract_8xf16:
-; STRICT:         .functype fmuladd_contract_8xf16 (v128, v128, v128) -> (v128)
-; STRICT-NEXT:  # %bb.0:
-; STRICT-NEXT:    f16x8.madd $push0=, $0, $1, $2
-; STRICT-NEXT:    return $pop0
-;
-; NOFP16-LABEL: fmuladd_contract_8xf16:
-; NOFP16:         .functype fmuladd_contract_8xf16 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
-; NOFP16-NEXT:  # %bb.0:
-; NOFP16-NEXT:    call $push0=, __truncsfhf2, $16
-; NOFP16-NEXT:    call $push1=, __extendhfsf2, $pop0
-; NOFP16-NEXT:    call $push2=, __truncsfhf2, $8
-; NOFP16-NEXT:    call $push3=, __extendhfsf2, $pop2
-; NOFP16-NEXT:    f32.mul $push4=, $pop1, $pop3
-; NOFP16-NEXT:    call $push5=, __truncsfhf2, $24
-; NOFP16-NEXT:    call $push6=, __extendhfsf2, $pop5
-; NOFP16-NEXT:    f32.add $push7=, $pop4, $pop6
-; NOFP16-NEXT:    call $push8=, __truncsfhf2, $pop7
-; NOFP16-NEXT:    i32.store16 14($0), $pop8
-; NOFP16-NEXT:    call $push9=, __truncsfhf2, $15
-; NOFP16-NEXT:    call $push10=, __extendhfsf2, $pop9
-; NOFP16-NEXT:    call $push11=, __truncsfhf2, $7
-; NOFP16-NEXT:    call $push12=, __extendhfsf2, $pop11
-; NOFP16-NEXT:    f32.mul $push13=, $pop10, $pop12
-; NOFP16-NEXT:    call $push14=, __truncsfhf2, $23
-; NOFP16-NEXT:    call $push15=, __extendhfsf2, $pop14
-; NOFP16-NEXT:    f32.add $push16=, $pop13, $pop15
-; NOFP16-NEXT:    call $push17=, __truncsfhf2, $pop16
-; NOFP16-NEXT:    i32.store16 12($0), $pop17
-; NOFP16-NEXT:    call $push18=, __truncsfhf2, $14
-; NOFP16-NEXT:    call $push19=, __extendhfsf2, $pop18
-; NOFP16-NEXT:    call $push20=, __truncsfhf2, $6
-; NOFP16-NEXT:    call $push21=, __extendhfsf2, $pop20
-; NOFP16-NEXT:    f32.mul $push22=, $pop19, $pop21
-; NOFP16-NEXT:    call $push23=, __truncsfhf2, $22
-; NOFP16-NEXT:    call $push24=, __extendhfsf2, $pop23
-; NOFP16-NEXT:    f32.add $push25=, $pop22, $pop24
-; NOFP16-NEXT:    call $push26=, __truncsfhf2, $pop25
-; NOFP16-NEXT:    i32.store16 10($0), $pop26
-; NOFP16-NEXT:    call $push27=, __truncsfhf2, $13
-; NOFP16-NEXT:    call $push28=, __extendhfsf2, $pop27
-; NOFP16-NEXT:    call $push29=, __truncsfhf2, $5
-; NOFP16-NEXT:    call $push30=, __extendhfsf2, $pop29
-; NOFP16-NEXT:    f32.mul $push31=, $pop28, $pop30
-; NOFP16-NEXT:    call $push32=, __truncsfhf2, $21
-; NOFP16-NEXT:    call $push33=, __extendhfsf2, $pop32
-; NOFP16-NEXT:    f32.add $push34=, $pop31, $pop33
-; NOFP16-NEXT:    call $push35=, __truncsfhf2, $pop34
-; NOFP16-NEXT:    i32.store16 8($0), $pop35
-; NOFP16-NEXT:    call $push36=, __truncsfhf2, $12
-; NOFP16-NEXT:    call $push37=, __extendhfsf2, $pop36
-; NOFP16-NEXT:    call $push38=, __truncsfhf2, $4
-; NOFP16-NEXT:    call $push39=, __extendhfsf2, $pop38
-; NOFP16-NEXT:    f32.mul $push40=, $pop37, $pop39
-; NOFP16-NEXT:    call $push41=, __truncsfhf2, $20
-; NOFP16-NEXT:    call $push42=, __extendhfsf2, $pop41
-; NOFP16-NEXT:    f32.add $push43=, $pop40, $pop42
-; NOFP16-NEXT:    call $push44=, __truncsfhf2, $pop43
-; NOFP16-NEXT:    i32.store16 6($0), $pop44
-; NOFP16-NEXT:    call $push45=, __truncsfhf2, $11
-; NOFP16-NEXT:    call $push46=, __extendhfsf2, $pop45
-; NOFP16-NEXT:    call $push47=, __truncsfhf2, $3
-; NOFP16-NEXT:    call $push48=, __extendhfsf2, $pop47
-; NOFP16-NEXT:    f32.mul $push49=, $pop46, $pop48
-; NOFP16-NEXT:    call $push50=, __truncsfhf2, $19
-; NOFP16-NEXT:    call $push51=, __extendhfsf2, $pop50
-; NOFP16-NEXT:    f32.add $push52=, $pop49, $pop51
-; NOFP16-NEXT:    call $push53=, __truncsfhf2, $pop52
-; NOFP16-NEXT:    i32.store16 4($0), $pop53
-; NOFP16-NEXT:    call $push54=, __truncsfhf2, $10
-; NOFP16-NEXT:    call $push55=, __extendhfsf2, $pop54
-; NOFP16-NEXT:    call $push56=, __truncsfhf2, $2
-; NOFP16-NEXT:    call $push57=, __extendhfsf2, $pop56
-; NOFP16-NEXT:    f32.mul $push58=, $pop55, $pop57
-; NOFP16-NEXT:    call $push59=, __truncsfhf2, $18
-; NOFP16-NEXT:    call $push60=, __extendhfsf2, $pop59
-; NOFP16-NEXT:    f32.add $push61=, $pop58, $pop60
-; NOFP16-NEXT:    call $push62=, __truncsfhf2, $pop61
-; NOFP16-NEXT:    i32.store16 2($0), $pop62
-; NOFP16-NEXT:    call $push63=, __truncsfhf2, $9
-; NOFP16-NEXT:    call $push64=, __extendhfsf2, $pop63
-; NOFP16-NEXT:    call $push65=, __truncsfhf2, $1
-; NOFP16-NEXT:    call $push66=, __extendhfsf2, $pop65
-; NOFP16-NEXT:    f32.mul $push67=, $pop64, $pop66
-; NOFP16-NEXT:    call $push68=, __truncsfhf2, $17
-; NOFP16-NEXT:    call $push69=, __extendhfsf2, $pop68
-; NOFP16-NEXT:    f32.add $push70=, $pop67, $pop69
-; NOFP16-NEXT:    call $push71=, __truncsfhf2, $pop70
-; NOFP16-NEXT:    i32.store16 0($0), $pop71
-; NOFP16-NEXT:    return
-;
-; NOSIMD-LABEL: fmuladd_contract_8xf16:
-; NOSIMD:         .functype fmuladd_contract_8xf16 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
-; NOSIMD-NEXT:  # %bb.0:
-; NOSIMD-NEXT:    call $push0=, __truncsfhf2, $16
-; NOSIMD-NEXT:    call $push1=, __extendhfsf2, $pop0
-; NOSIMD-NEXT:    call $push2=, __truncsfhf2, $8
-; NOSIMD-NEXT:    call $push3=, __extendhfsf2, $pop2
-; NOSIMD-NEXT:    f32.mul $push4=, $pop1, $pop3
-; NOSIMD-NEXT:    call $push5=, __truncsfhf2, $24
-; NOSIMD-NEXT:    call $push6=, __extendhfsf2, $pop5
-; NOSIMD-NEXT:    f32.add $push7=, $pop4, $pop6
-; NOSIMD-NEXT:    call $push8=, __truncsfhf2, $pop7
-; NOSIMD-NEXT:    i32.store16 14($0), $pop8
-; NOSIMD-NEXT:    call $push9=, __truncsfhf2, $15
-; NOSIMD-NEXT:    call $push10=, __extendhfsf2, $pop9
-; NOSIMD-NEXT:    call $push11=, __truncsfhf2, $7
-; NOSIMD-NEXT:    call $push12=, __extendhfsf2, $pop11
-; NOSIMD-NEXT:    f32.mul $push13=, $pop10, $pop12
-; NOSIMD-NEXT:    call $push14=, __truncsfhf2, $23
-; NOSIMD-NEXT:    call $push15=, __extendhfsf2, $pop14
-; NOSIMD-NEXT:    f32.add $push16=, $pop13, $pop15
-; NOSIMD-NEXT:    call $push17=, __truncsfhf2, $pop16
-; NOSIMD-NEXT:    i32.store16 12($0), $pop17
-; NOSIMD-NEXT:    call $push18=, __truncsfhf2, $14
-; NOSIMD-NEXT:    call $push19=, __extendhfsf2, $pop18
-; NOSIMD-NEXT:    call $push20=, __truncsfhf2, $6
-; NOSIMD-NEXT:    call $push21=, __extendhfsf2, $pop20
-; NOSIMD-NEXT:    f32.mul $push22=, $pop19, $pop21
-; NOSIMD-NEXT:    call $push23=, __truncsfhf2, $22
-; NOSIMD-NEXT:    call $push24=, __extendhfsf2, $pop23
-; NOSIMD-NEXT:    f32.add $push25=, $pop22, $pop24
-; NOSIMD-NEXT:    call $push26=, __truncsfhf2, $pop25
-; NOSIMD-NEXT:    i32.store16 10($0), $pop26
-; NOSIMD-NEXT:    call $push27=, __truncsfhf2, $13
-; NOSIMD-NEXT:    call $push28=, __extendhfsf2, $pop27
-; NOSIMD-NEXT:    call $push29=, __truncsfhf2, $5
-; NOSIMD-NEXT:    call $push30=, __extendhfsf2, $pop29
-; NOSIMD-NEXT:    f32.mul $push31=, $pop28, $pop30
-; NOSIMD-NEXT:    call $push32=, __truncsfhf2, $21
-; NOSIMD-NEXT:    call $push33=, __extendhfsf2, $pop32
-; NOSIMD-NEXT:    f32.add $push34=, $pop31, $pop33
-; NOSIMD-NEXT:    call $push35=, __truncsfhf2, $pop34
-; NOSIMD-NEXT:    i32.store16 8($0), $pop35
-; NOSIMD-NEXT:    call $push36=, __truncsfhf2, $12
-; NOSIMD-NEXT:    call $push37=, __extendhfsf2, $pop36
-; NOSIMD-NEXT:    call $push38=, __truncsfhf2, $4
-; NOSIMD-NEXT:    call $push39=, __extendhfsf2, $pop38
-; NOSIMD-NEXT:    f32.mul $push40=, $pop37, $pop39
-; NOSIMD-NEXT:    call $push41=, __truncsfhf2, $20
-; NOSIMD-NEXT:    call $push42=, __extendhfsf2, $pop41
-; NOSIMD-NEXT:    f32.add $push43=, $pop40, $pop42
-; NOSIMD-NEXT:    call $push44=, __truncsfhf2, $pop43
-; NOSIMD-NEXT:    i32.store16 6($0), $pop44
-; NOSIMD-NEXT:    call $push45=, __truncsfhf2, $11
-; NOSIMD-NEXT:    call $push46=, __extendhfsf2, $pop45
-; NOSIMD-NEXT:    call $push47=, __truncsfhf2, $3
-; NOSIMD-NEXT:    call $push48=, __extendhfsf2, $pop47
-; NOSIMD-NEXT:    f32.mul $push49=, $pop46, $pop48
-; NOSIMD-NEXT:    call $push50=, __truncsfhf2, $19
-; NOSIMD-NEXT:    call $push51=, __extendhfsf2, $pop50
-; NOSIMD-NEXT:    f32.add $push52=, $pop49, $pop51
-; NOSIMD-NEXT:    call $push53=, __truncsfhf2, $pop52
-; NOSIMD-NEXT:    i32.store16 4($0), $pop53
-; NOSIMD-NEXT:    call $push54=, __truncsfhf2, $10
-; NOSIMD-NEXT:    call $push55=, __extendhfsf2, $pop54
-; NOSIMD-NEXT:    call $push56=, __truncsfhf2, $2
-; NOSIMD-NEXT:    call $push57=, __extendhfsf2, $pop56
-; NOSIMD-NEXT:    f32.mul $push58=, $pop55, $pop57
-; NOSIMD-NEXT:    call $push59=, __truncsfhf2, $18
-; NOSIMD-NEXT:    call $push60=, __extendhfsf2, $pop59
-; NOSIMD-NEXT:    f32.add $push61=, $pop58, $pop60
-; NOSIMD-NEXT:    call $push62=, __truncsfhf2, $pop61
-; NOSIMD-NEXT:    i32.store16 2($0), $pop62
-; NOSIMD-NEXT:    call $push63=, __truncsfhf2, $9
-; NOSIMD-NEXT:    call $push64=, __extendhfsf2, $pop63
-; NOSIMD-NEXT:    call $push65=, __truncsfhf2, $1
-; NOSIMD-NEXT:    call $push66=, __extendhfsf2, $pop65
-; NOSIMD-NEXT:    f32.mul $push67=, $pop64, $pop66
-; NOSIMD-NEXT:    call $push68=, __truncsfhf2, $17
-; NOSIMD-NEXT:    call $push69=, __extendhfsf2, $pop68
-; NOSIMD-NEXT:    f32.add $push70=, $pop67, $pop69
-; NOSIMD-NEXT:    call $push71=, __truncsfhf2, $pop70
-; NOSIMD-NEXT:    i32.store16 0($0), $pop71
-; NOSIMD-NEXT:    return
-  %fma = call contract <8 x half> @llvm.fmuladd(<8 x half> %a, <8 x half> %b, <8 x half> %c)
-  ret <8 x half> %fma
-}
-
-define <8 x half> @fmuladd_8xf16(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
-; RELAXED-LABEL: fmuladd_8xf16:
-; RELAXED:         .functype fmuladd_8xf16 (v128, v128, v128) -> (v128)
-; RELAXED-NEXT:  # %bb.0:
-; RELAXED-NEXT:    f16x8.madd $push0=, $0, $1, $2
-; RELAXED-NEXT:    return $pop0
-;
-; STRICT-LABEL: fmuladd_8xf16:
-; STRICT:         .functype fmuladd_8xf16 (v128, v128, v128) -> (v128)
-; STRICT-NEXT:  # %bb.0:
-; STRICT-NEXT:    f16x8.madd $push0=, $0, $1, $2
-; STRICT-NEXT:    return $pop0
-;
-; NOFP16-LABEL: fmuladd_8xf16:
-; NOFP16:         .functype fmuladd_8xf16 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
-; NOFP16-NEXT:  # %bb.0:
-; NOFP16-NEXT:    call $push0=, __truncsfhf2, $16
-; NOFP16-NEXT:    call $push1=, __extendhfsf2, $pop0
-; NOFP16-NEXT:    call $push2=, __truncsfhf2, $8
-; NOFP16-NEXT:    call $push3=, __extendhfsf2, $pop2
-; NOFP16-NEXT:    f32.mul $push4=, $pop1, $pop3
-; NOFP16-NEXT:    call $push5=, __truncsfhf2, $24
-; NOFP16-NEXT:    call $push6=, __extendhfsf2, $pop5
-; NOFP16-NEXT:    f32.add $push7=, $pop4, $pop6
-; NOFP16-NEXT:    call $push8=, __truncsfhf2, $pop7
-; NOFP16-NEXT:    i32.store16 14($0), $pop8
-; NOFP16-NEXT:    call $push9=, __truncsfhf2, $15
-; NOFP16-NEXT:    call $push10=, __extendhfsf2, $pop9
-; NOFP16-NEXT:    call $push11=, __truncsfhf2, $7
-; NOFP16-NEXT:    call $push12=, __extendhfsf2, $pop11
-; NOFP16-NEXT:    f32.mul $push13=, $pop10, $pop12
-; NOFP16-NEXT:    call $push14=, __truncsfhf2, $23
-; NOFP16-NEXT:    call $push15=, __extendhfsf2, $pop14
-; NOFP16-NEXT:    f32.add $push16=, $pop13, $pop15
-; NOFP16-NEXT:    call $push17=, __truncsfhf2, $pop16
-; NOFP16-NEXT:    i32.store16 12($0), $pop17
-; NOFP16-NEXT:    call $push18=, __truncsfhf2, $14
-; NOFP16-NEXT:    call $push19=, __extendhfsf2, $pop18
-; NOFP16-NEXT:    call $push20=, __truncsfhf2, $6
-; NOFP16-NEXT:    call $push21=, __extendhfsf2, $pop20
-; NOFP16-NEXT:    f32.mul $push22=, $pop19, $pop21
-; NOFP16-NEXT:    call $push23=, __truncsfhf2, $22
-; NOFP16-NEXT:    call $push24=, __extendhfsf2, $pop23
-; NOFP16-NEXT:    f32.add $push25=, $pop22, $pop24
-; NOFP16-NEXT:    call $push26=, __truncsfhf2, $pop25
-; NOFP16-NEXT:    i32.store16 10($0), $pop26
-; NOFP16-NEXT:    call $push27=, __truncsfhf2, $13
-; NOFP16-NEXT:    call $push28=, __extendhfsf2, $pop27
-; NOFP16-NEXT:    call $push29=, __truncsfhf2, $5
-; NOFP16-NEXT:    call $push30=, __extendhfsf2, $pop29
-; NOFP16-NEXT:    f32.mul $push31=, $pop28, $pop30
-; NOFP16-NEXT:    call $push32=, __truncsfhf2, $21
-; NOFP16-NEXT:    call $push33=, __extendhfsf2, $pop32
-; NOFP16-NEXT:    f32.add $push34=, $pop31, $pop33
-; NOFP16-NEXT:    call $push35=, __truncsfhf2, $pop34
-; NOFP16-NEXT:    i32.store16 8($0), $pop35
-; NOFP16-NEXT:    call $push36=, __truncsfhf2, $12
-; NOFP16-NEXT:    call $push37=, __extendhfsf2, $pop36
-; NOFP16-NEXT:    call $push38=, __truncsfhf2, $4
-; NOFP16-NEXT:    call $push39=, __extendhfsf2, $pop38
-; NOFP16-NEXT:    f32.mul $push40=, $pop37, $pop39
-; NOFP16-NEXT:    call $push41=, __truncsfhf2, $20
-; NOFP16-NEXT:    call $push42=, __extendhfsf2, $pop41
-; NOFP16-NEXT:    f32.add $push43=, $pop40, $pop42
-; NOFP16-NEXT:    call $push44=, __truncsfhf2, $pop43
-; NOFP16-NEXT:    i32.store16 6($0), $pop44
-; NOFP16-NEXT:    call $push45=, __truncsfhf2, $11
-; NOFP16-NEXT:    call $push46=, __extendhfsf2, $pop45
-; NOFP16-NEXT:    call $push47=, __truncsfhf2, $3
-; NOFP16-NEXT:    call $push48=, __extendhfsf2, $pop47
-; NOFP16-NEXT:    f32.mul $push49=, $pop46, $pop48
-; NOFP16-NEXT:    call $push50=, __truncsfhf2, $19
-; NOFP16-NEXT:    call $push51=, __extendhfsf2, $pop50
-; NOFP16-NEXT:    f32.add $push52=, $pop49, $pop51
-; NOFP16-NEXT:    call $push53=, __truncsfhf2, $pop52
-; NOFP16-NEXT:    i32.store16 4($0), $pop53
-; NOFP16-NEXT:    call $push54=, __truncsfhf2, $10
-; NOFP16-NEXT:    call $push55=, __extendhfsf2, $pop54
-; NOFP16-NEXT:    call $push56=, __truncsfhf2, $2
-; NOFP16-NEXT:    call $push57=, __extendhfsf2, $pop56
-; NOFP16-NEXT:    f32.mul $push58=, $pop55, $pop57
-; NOFP16-NEXT:    call $push59=, __truncsfhf2, $18
-; NOFP16-NEXT:    call $push60=, __extendhfsf2, $pop59
-; NOFP16-NEXT:    f32.add $push61=, $pop58, $pop60
-; NOFP16-NEXT:    call $push62=, __truncsfhf2, $pop61
-; NOFP16-NEXT:    i32.store16 2($0), $pop62
-; NOFP16-NEXT:    call $push63=, __truncsfhf2, $9
-; NOFP16-NEXT:    call $push64=, __extendhfsf2, $pop63
-; NOFP16-NEXT:    call $push65=, __truncsfhf2, $1
-; NOFP16-NEXT:    call $push66=, __extendhfsf2, $pop65
-; NOFP16-NEXT:    f32.mul $push67=, $pop64, $pop66
-; NOFP16-NEXT:    call $push68=, __truncsfhf2, $17
-; NOFP16-NEXT:    call $push69=, __extendhfsf2, $pop68
-; NOFP16-NEXT:    f32.add $push70=, $pop67, $pop69
-; NOFP16-NEXT:    call $push71=, __truncsfhf2, $pop70
-; NOFP16-NEXT:    i32.store16 0($0), $pop71
-; NOFP16-NEXT:    return
-;
-; NOSIMD-LABEL: fmuladd_8xf16:
-; NOSIMD:         .functype fmuladd_8xf16 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
-; NOSIMD-NEXT:  # %bb.0:
-; NOSIMD-NEXT:    call $push0=, __truncsfhf2, $16
-; NOSIMD-NEXT:    call $push1=, __extendhfsf2, $pop0
-; NOSIMD-NEXT:    call $push2=, __truncsfhf2, $8
-; NOSIMD-NEXT:    call $push3=, __extendhfsf2, $pop2
-; NOSIMD-NEXT:    f32.mul $push4=, $pop1, $pop3
-; NOSIMD-NEXT:    call $push5=, __truncsfhf2, $24
-; NOSIMD-NEXT:    call $push6=, __extendhfsf2, $pop5
-; NOSIMD-NEXT:    f32.add $push7=, $pop4, $pop6
-; NOSIMD-NEXT:    call $push8=, __truncsfhf2, $pop7
-; NOSIMD-NEXT:    i32.store16 14($0), $pop8
-; NOSIMD-NEXT:    call $push9=, __truncsfhf2, $15
-; NOSIMD-NEXT:    call $push10=, __extendhfsf2, $pop9
-; NOSIMD-NEXT:    call $push11=, __truncsfhf2, $7
-; NOSIMD-NEXT:    call $push12=, __extendhfsf2, $pop11
-; NOSIMD-NEXT:    f32.mul $push13=, $pop10, $pop12
-; NOSIMD-NEXT:    call $push14=, __truncsfhf2, $23
-; NOSIMD-NEXT:    call $push15=, __extendhfsf2, $pop14
-; NOSIMD-NEXT:    f32.add $push16=, $pop13, $pop15
-; NOSIMD-NEXT:    call $push17=, __truncsfhf2, $pop16
-; NOSIMD-NEXT:    i32.store16 12($0), $pop17
-; NOSIMD-NEXT:    call $push18=, __truncsfhf2, $14
-; NOSIMD-NEXT:    call $push19=, __extendhfsf2, $pop18
-; NOSIMD-NEXT:    call $push20=, __truncsfhf2, $6
-; NOSIMD-NEXT:    call $push21=, __extendhfsf2, $pop20
-; NOSIMD-NEXT:    f32.mul $push22=, $pop19, $pop21
-; NOSIMD-NEXT:    call $push23=, __truncsfhf2, $22
-; NOSIMD-NEXT:    call $push24=, __extendhfsf2, $pop23
-; NOSIMD-NEXT:    f32.add $push25=, $pop22, $pop24
-; NOSIMD-NEXT:    call $push26=, __truncsfhf2, $pop25
-; NOSIMD-NEXT:    i32.store16 10($0), $pop26
-; NOSIMD-NEXT:    call $push27=, __truncsfhf2, $13
-; NOSIMD-NEXT:    call $push28=, __extendhfsf2, $pop27
-; NOSIMD-NEXT:    call $push29=, __truncsfhf2, $5
-; NOSIMD-NEXT:    call $push30=, __extendhfsf2, $pop29
-; NOSIMD-NEXT:    f32.mul $push31=, $pop28, $pop30
-; NOSIMD-NEXT:    call $push32=, __truncsfhf2, $21
-; NOSIMD-NEXT:    call $push33=, __extendhfsf2, $pop32
-; NOSIMD-NEXT:    f32.add $push34=, $pop31, $pop33
-; NOSIMD-NEXT:    call $push35=, __truncsfhf2, $pop34
-; NOSIMD-NEXT:    i32.store16 8($0), $pop35
-; NOSIMD-NEXT:    call $push36=, __truncsfhf2, $12
-; NOSIMD-NEXT:    call $push37=, __extendhfsf2, $pop36
-; NOSIMD-NEXT:    call $push38=, __truncsfhf2, $4
-; NOSIMD-NEXT:    call $push39=, __extendhfsf2, $pop38
-; NOSIMD-NEXT:    f32.mul $push40=, $pop37, $pop39
-; NOSIMD-NEXT:    call $push41=, __truncsfhf2, $20
-; NOSIMD-NEXT:    call $push42=, __extendhfsf2, $pop41
-; NOSIMD-NEXT:    f32.add $push43=, $pop40, $pop42
-; NOSIMD-NEXT:    call $push44=, __truncsfhf2, $pop43
-; NOSIMD-NEXT:    i32.store16 6($0), $pop44
-; NOSIMD-NEXT:    call $push45=, __truncsfhf2, $11
-; NOSIMD-NEXT:    call $push46=, __extendhfsf2, $pop45
-; NOSIMD-NEXT:    call $push47=, __truncsfhf2, $3
-; NOSIMD-NEXT:    call $push48=, __extendhfsf2, $pop47
-; NOSIMD-NEXT:    f32.mul $push49=, $pop46, $pop48
-; NOSIMD-NEXT:    call $push50=, __truncsfhf2, $19
-; NOSIMD-NEXT:    call $push51=, __extendhfsf2, $pop50
-; NOSIMD-NEXT:    f32.add $push52=, $pop49, $pop51
-; NOSIMD-NEXT:    call $push53=, __truncsfhf2, $pop52
-; NOSIMD-NEXT:    i32.store16 4($0), $pop53
-; NOSIMD-NEXT:    call $push54=, __truncsfhf2, $10
-; NOSIMD-NEXT:    call $push55=, __extendhfsf2, $pop54
-; NOSIMD-NEXT:    call $push56=, __truncsfhf2, $2
-; NOSIMD-NEXT:    call $push57=, __extendhfsf2, $pop56
-; NOSIMD-NEXT:    f32.mul $push58=, $pop55, $pop57
-; NOSIMD-NEXT:    call $push59=, __truncsfhf2, $18
-; NOSIMD-NEXT:    call $push60=, __extendhfsf2, $pop59
-; NOSIMD-NEXT:    f32.add $push61=, $pop58, $pop60
-; NOSIMD-NEXT:    call $push62=, __truncsfhf2, $pop61
-; NOSIMD-NEXT:    i32.store16 2($0), $pop62
-; NOSIMD-NEXT:    call $push63=, __truncsfhf2, $9
-; NOSIMD-NEXT:    call $push64=, __extendhfsf2, $pop63
-; NOSIMD-NEXT:    call $push65=, __truncsfhf2, $1
-; NOSIMD-NEXT:    call $push66=, __extendhfsf2, $pop65
-; NOSIMD-NEXT:    f32.mul $push67=, $pop64, $pop66
-; NOSIMD-NEXT:    call $push68=, __truncsfhf2, $17
-; NOSIMD-NEXT:    call $push69=, __extendhfsf2, $pop68
-; NOSIMD-NEXT:    f32.add $push70=, $pop67, $pop69
-; NOSIMD-NEXT:    call $push71=, __truncsfhf2, $pop70
-; NOSIMD-NEXT:    i32.store16 0($0), $pop71
-; NOSIMD-NEXT:    return
-  %fma = call <8 x half> @llvm.fmuladd(<8 x half> %a, <8 x half> %b, <8 x half> %c)
-  ret <8 x half> %fma
-}
-
 define <4 x float> @fmuladd_contract_4xf32(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
 ; RELAXED-LABEL: fmuladd_contract_4xf32:
 ; RELAXED:         .functype fmuladd_contract_4xf32 (v128, v128, v128) -> (v128)
 ; RELAXED-NEXT:  # %bb.0:
-; RELAXED-NEXT:    f32x4.relaxed_madd $push0=, $0, $1, $2
+; RELAXED-NEXT:    f32x4.relaxed_madd $push0=, $2, $0, $1
 ; RELAXED-NEXT:    return $pop0
 ;
 ; STRICT-LABEL: fmuladd_contract_4xf32:
@@ -1028,40 +94,18 @@ define <4 x float> @fmuladd_contract_4xf32(<4 x float> %a, <4 x float> %b, <4 x
 ; STRICT-NEXT:    f32x4.mul $push0=, $0, $1
 ; STRICT-NEXT:    f32x4.add $push1=, $pop0, $2
 ; STRICT-NEXT:    return $pop1
-;
-; NOFP16-LABEL: fmuladd_contract_4xf32:
-; NOFP16:         .functype fmuladd_contract_4xf32 (v128, v128, v128) -> (v128)
-; NOFP16-NEXT:  # %bb.0:
-; NOFP16-NEXT:    f32x4.mul $push0=, $0, $1
-; NOFP16-NEXT:    f32x4.add $push1=, $pop0, $2
-; NOFP16-NEXT:    return $pop1
-;
-; NOSIMD-LABEL: fmuladd_contract_4xf32:
-; NOSIMD:         .functype fmuladd_contract_4xf32 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
-; NOSIMD-NEXT:  # %bb.0:
-; NOSIMD-NEXT:    f32.mul $push0=, $4, $8
-; NOSIMD-NEXT:    f32.add $push1=, $pop0, $12
-; NOSIMD-NEXT:    f32.store 12($0), $pop1
-; NOSIMD-NEXT:    f32.mul $push2=, $3, $7
-; NOSIMD-NEXT:    f32.add $push3=, $pop2, $11
-; NOSIMD-NEXT:    f32.store 8($0), $pop3
-; NOSIMD-NEXT:    f32.mul $push4=, $2, $6
-; NOSIMD-NEXT:    f32.add $push5=, $pop4, $10
-; NOSIMD-NEXT:    f32.store 4($0), $pop5
-; NOSIMD-NEXT:    f32.mul $push6=, $1, $5
-; NOSIMD-NEXT:    f32.add $push7=, $pop6, $9
-; NOSIMD-NEXT:    f32.store 0($0), $pop7
-; NOSIMD-NEXT:    return
   %fma = call contract <4 x float> @llvm.fmuladd(<4 x float> %a, <4 x float> %b, <4 x float> %c)
   ret <4 x float> %fma
 }
 
+; TODO: This should also have relaxed_madd in RELAXED case
 define <4 x float> @fmuladd_4xf32(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
 ; RELAXED-LABEL: fmuladd_4xf32:
 ; RELAXED:         .functype fmuladd_4xf32 (v128, v128, v128) -> (v128)
 ; RELAXED-NEXT:  # %bb.0:
-; RELAXED-NEXT:    f32x4.relaxed_madd $push0=, $0, $1, $2
-; RELAXED-NEXT:    return $pop0
+; RELAXED-NEXT:    f32x4.mul $push0=, $0, $1
+; RELAXED-NEXT:    f32x4.add $push1=, $pop0, $2
+; RELAXED-NEXT:    return $pop1
 ;
 ; STRICT-LABEL: fmuladd_4xf32:
 ; STRICT:         .functype fmuladd_4xf32 (v128, v128, v128) -> (v128)
@@ -1069,170 +113,10 @@ define <4 x float> @fmuladd_4xf32(<4 x float> %a, <4 x float> %b, <4 x float> %c
 ; STRICT-NEXT:    f32x4.mul $push0=, $0, $1
 ; STRICT-NEXT:    f32x4.add $push1=, $pop0, $2
 ; STRICT-NEXT:    return $pop1
-;
-; NOFP16-LABEL: fmuladd_4xf32:
-; NOFP16:         .functype fmuladd_4xf32 (v128, v128, v128) -> (v128)
-; NOFP16-NEXT:  # %bb.0:
-; NOFP16-NEXT:    f32x4.mul $push0=, $0, $1
-; NOFP16-NEXT:    f32x4.add $push1=, $pop0, $2
-; NOFP16-NEXT:    return $pop1
-;
-; NOSIMD-LABEL: fmuladd_4xf32:
-; NOSIMD:         .functype fmuladd_4xf32 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
-; NOSIMD-NEXT:  # %bb.0:
-; NOSIMD-NEXT:    f32.mul $push0=, $4, $8
-; NOSIMD-NEXT:    f32.add $push1=, $pop0, $12
-; NOSIMD-NEXT:    f32.store 12($0), $pop1
-; NOSIMD-NEXT:    f32.mul $push2=, $3, $7
-; NOSIMD-NEXT:    f32.add $push3=, $pop2, $11
-; NOSIMD-NEXT:    f32.store 8($0), $pop3
-; NOSIMD-NEXT:    f32.mul $push4=, $2, $6
-; NOSIMD-NEXT:    f32.add $push5=, $pop4, $10
-; NOSIMD-NEXT:    f32.store 4($0), $pop5
-; NOSIMD-NEXT:    f32.mul $push6=, $1, $5
-; NOSIMD-NEXT:    f32.add $push7=, $pop6, $9
-; NOSIMD-NEXT:    f32.store 0($0), $pop7
-; NOSIMD-NEXT:    return
   %fma = call <4 x float> @llvm.fmuladd(<4 x float> %a, <4 x float> %b, <4 x float> %c)
   ret <4 x float> %fma
 }
 
-define <8 x float> @fmuladd_8xf32(<8 x float> %a, <8 x float> %b, <8 x float> %c) {
-; RELAXED-LABEL: fmuladd_8xf32:
-; RELAXED:         .functype fmuladd_8xf32 (i32, v128, v128, v128, v128, v128, v128) -> ()
-; RELAXED-NEXT:  # %bb.0:
-; RELAXED-NEXT:    f32x4.mul $push0=, $2, $4
-; RELAXED-NEXT:    f32x4.add $push1=, $pop0, $6
-; RELAXED-NEXT:    v128.store 16($0), $pop1
-; RELAXED-NEXT:    f32x4.mul $push2=, $1, $3
-; RELAXED-NEXT:    f32x4.add $push3=, $pop2, $5
-; RELAXED-NEXT:    v128.store 0($0), $pop3
-; RELAXED-NEXT:    return
-;
-; STRICT-LABEL: fmuladd_8xf32:
-; STRICT:         .functype fmuladd_8xf32 (i32, v128, v128, v128, v128, v128, v128) -> ()
-; STRICT-NEXT:  # %bb.0:
-; STRICT-NEXT:    f32x4.mul $push0=, $2, $4
-; STRICT-NEXT:    f32x4.add $push1=, $pop0, $6
-; STRICT-NEXT:    v128.store 16($0), $pop1
-; STRICT-NEXT:    f32x4.mul $push2=, $1, $3
-; STRICT-NEXT:    f32x4.add $push3=, $pop2, $5
-; STRICT-NEXT:    v128.store 0($0), $pop3
-; STRICT-NEXT:    return
-;
-; NOFP16-LABEL: fmuladd_8xf32:
-; NOFP16:         .functype fmuladd_8xf32 (i32, v128, v128, v128, v128, v128, v128) -> ()
-; NOFP16-NEXT:  # %bb.0:
-; NOFP16-NEXT:    f32x4.mul $push0=, $2, $4
-; NOFP16-NEXT:    f32x4.add $push1=, $pop0, $6
-; NOFP16-NEXT:    v128.store 16($0), $pop1
-; NOFP16-NEXT:    f32x4.mul $push2=, $1, $3
-; NOFP16-NEXT:    f32x4.add $push3=, $pop2, $5
-; NOFP16-NEXT:    v128.store 0($0), $pop3
-; NOFP16-NEXT:    return
-;
-; NOSIMD-LABEL: fmuladd_8xf32:
-; NOSIMD:         .functype fmuladd_8xf32 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
-; NOSIMD-NEXT:  # %bb.0:
-; NOSIMD-NEXT:    f32.mul $push0=, $8, $16
-; NOSIMD-NEXT:    f32.add $push1=, $pop0, $24
-; NOSIMD-NEXT:    f32.store 28($0), $pop1
-; NOSIMD-NEXT:    f32.mul $push2=, $7, $15
-; NOSIMD-NEXT:    f32.add $push3=, $pop2, $23
-; NOSIMD-NEXT:    f32.store 24($0), $pop3
-; NOSIMD-NEXT:    f32.mul $push4=, $6, $14
-; NOSIMD-NEXT:    f32.add $push5=, $pop4, $22
-; NOSIMD-NEXT:    f32.store 20($0), $pop5
-; NOSIMD-NEXT:    f32.mul $push6=, $5, $13
-; NOSIMD-NEXT:    f32.add $push7=, $pop6, $21
-; NOSIMD-NEXT:    f32.store 16($0), $pop7
-; NOSIMD-NEXT:    f32.mul $push8=, $4, $12
-; NOSIMD-NEXT:    f32.add $push9=, $pop8, $20
-; NOSIMD-NEXT:    f32.store 12($0), $pop9
-; NOSIMD-NEXT:    f32.mul $push10=, $3, $11
-; NOSIMD-NEXT:    f32.add $push11=, $pop10, $19
-; NOSIMD-NEXT:    f32.store 8($0), $pop11
-; NOSIMD-NEXT:    f32.mul $push12=, $2, $10
-; NOSIMD-NEXT:    f32.add $push13=, $pop12, $18
-; NOSIMD-NEXT:    f32.store 4($0), $pop13
-; NOSIMD-NEXT:    f32.mul $push14=, $1, $9
-; NOSIMD-NEXT:    f32.add $push15=, $pop14, $17
-; NOSIMD-NEXT:    f32.store 0($0), $pop15
-; NOSIMD-NEXT:    return
-  %fma = call <8 x float> @llvm.fmuladd(<8 x float> %a, <8 x float> %b, <8 x float> %c)
-  ret <8 x float> %fma
-}
-
-define <2 x double> @fmuladd_contract_2xf64(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
-; RELAXED-LABEL: fmuladd_contract_2xf64:
-; RELAXED:         .functype fmuladd_contract_2xf64 (v128, v128, v128) -> (v128)
-; RELAXED-NEXT:  # %bb.0:
-; RELAXED-NEXT:    f64x2.relaxed_madd $push0=, $0, $1, $2
-; RELAXED-NEXT:    return $pop0
-;
-; STRICT-LABEL: fmuladd_contract_2xf64:
-; STRICT:         .functype fmuladd_contract_2xf64 (v128, v128, v128) -> (v128)
-; STRICT-NEXT:  # %bb.0:
-; STRICT-NEXT:    f64x2.mul $push0=, $0, $1
-; STRICT-NEXT:    f64x2.add $push1=, $pop0, $2
-; STRICT-NEXT:    return $pop1
-;
-; NOFP16-LABEL: fmuladd_contract_2xf64:
-; NOFP16:         .functype fmuladd_contract_2xf64 (v128, v128, v128) -> (v128)
-; NOFP16-NEXT:  # %bb.0:
-; NOFP16-NEXT:    f64x2.mul $push0=, $0, $1
-; NOFP16-NEXT:    f64x2.add $push1=, $pop0, $2
-; NOFP16-NEXT:    return $pop1
-;
-; NOSIMD-LABEL: fmuladd_contract_2xf64:
-; NOSIMD:         .functype fmuladd_contract_2xf64 (i32, f64, f64, f64, f64, f64, f64) -> ()
-; NOSIMD-NEXT:  # %bb.0:
-; NOSIMD-NEXT:    f64.mul $push0=, $2, $4
-; NOSIMD-NEXT:    f64.add $push1=, $pop0, $6
-; NOSIMD-NEXT:    f64.store 8($0), $pop1
-; NOSIMD-NEXT:    f64.mul $push2=, $1, $3
-; NOSIMD-NEXT:    f64.add $push3=, $pop2, $5
-; NOSIMD-NEXT:    f64.store 0($0), $pop3
-; NOSIMD-NEXT:    return
-  %fma = call contract <2 x double> @llvm.fmuladd(<2 x double> %a, <2 x double> %b, <2 x double> %c)
-  ret <2 x double> %fma
-}
-
-define <2 x double> @fmuladd_2xf64(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
-; RELAXED-LABEL: fmuladd_2xf64:
-; RELAXED:         .functype fmuladd_2xf64 (v128, v128, v128) -> (v128)
-; RELAXED-NEXT:  # %bb.0:
-; RELAXED-NEXT:    f64x2.relaxed_madd $push0=, $0, $1, $2
-; RELAXED-NEXT:    return $pop0
-;
-; STRICT-LABEL: fmuladd_2xf64:
-; STRICT:         .functype fmuladd_2xf64 (v128, v128, v128) -> (v128)
-; STRICT-NEXT:  # %bb.0:
-; STRICT-NEXT:    f64x2.mul $push0=, $0, $1
-; STRICT-NEXT:    f64x2.add $push1=, $pop0, $2
-; STRICT-NEXT:    return $pop1
-;
-; NOFP16-LABEL: fmuladd_2xf64:
-; NOFP16:         .functype fmuladd_2xf64 (v128, v128, v128) -> (v128)
-; NOFP16-NEXT:  # %bb.0:
-; NOFP16-NEXT:    f64x2.mul $push0=, $0, $1
-; NOFP16-NEXT:    f64x2.add $push1=, $pop0, $2
-; NOFP16-NEXT:    return $pop1
-;
-; NOSIMD-LABEL: fmuladd_2xf64:
-; NOSIMD:         .functype fmuladd_2xf64 (i32, f64, f64, f64, f64, f64, f64) -> ()
-; NOSIMD-NEXT:  # %bb.0:
-; NOSIMD-NEXT:    f64.mul $push0=, $2, $4
-; NOSIMD-NEXT:    f64.add $push1=, $pop0, $6
-; NOSIMD-NEXT:    f64.store 8($0), $pop1
-; NOSIMD-NEXT:    f64.mul $push2=, $1, $3
-; NOSIMD-NEXT:    f64.add $push3=, $pop2, $5
-; NOSIMD-NEXT:    f64.store 0($0), $pop3
-; NOSIMD-NEXT:    return
-  %fma = call <2 x double> @llvm.fmuladd(<2 x double> %a, <2 x double> %b, <2 x double> %c)
-  ret <2 x double> %fma
-}
-
 define <4 x float> @fma_4xf32(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
 ; RELAXED-LABEL: fma_4xf32:
 ; RELAXED:         .functype fma_4xf32 (v128, v128, v128) -> (v128)
@@ -1283,44 +167,6 @@ define <4 x float> @fma_4xf32(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
 ; STRICT-NEXT:    call $push18=, fmaf, $pop17, $pop16, $pop15
 ; STRICT-NEXT:    f32x4.replace_lane $push19=, $pop14, 3, $pop18
 ; STRICT-NEXT:    return $pop19
-;
-; NOFP16-LABEL: fma_4xf32:
-; NOFP16:         .functype fma_4xf32 (v128, v128, v128) -> (v128)
-; NOFP16-NEXT:  # %bb.0:
-; NOFP16-NEXT:    f32x4.extract_lane $push2=, $0, 0
-; NOFP16-NEXT:    f32x4.extract_lane $push1=, $1, 0
-; NOFP16-NEXT:    f32x4.extract_lane $push0=, $2, 0
-; NOFP16-NEXT:    call $push3=, fmaf, $pop2, $pop1, $pop0
-; NOFP16-NEXT:    f32x4.splat $push4=, $pop3
-; NOFP16-NEXT:    f32x4.extract_lane $push7=, $0, 1
-; NOFP16-NEXT:    f32x4.extract_lane $push6=, $1, 1
-; NOFP16-NEXT:    f32x4.extract_lane $push5=, $2, 1
-; NOFP16-NEXT:    call $push8=, fmaf, $pop7, $pop6, $pop5
-; NOFP16-NEXT:    f32x4.replace_lane $push9=, $pop4, 1, $pop8
-; NOFP16-NEXT:    f32x4.extract_lane $push12=, $0, 2
-; NOFP16-NEXT:    f32x4.extract_lane $push11=, $1, 2
-; NOFP16-NEXT:    f32x4.extract_lane $push10=, $2, 2
-; NOFP16-NEXT:    call $push13=, fmaf, $pop12, $pop11, $pop10
-; NOFP16-NEXT:    f32x4.replace_lane $push14=, $pop9, 2, $pop13
-; NOFP16-NEXT:    f32x4.extract_lane $push17=, $0, 3
-; NOFP16-NEXT:    f32x4.extract_lane $push16=, $1, 3
-; NOFP16-NEXT:    f32x4.extract_lane $push15=, $2, 3
-; NOFP16-NEXT:    call $push18=, fmaf, $pop17, $pop16, $pop15
-; NOFP16-NEXT:    f32x4.replace_lane $push19=, $pop14, 3, $pop18
-; NOFP16-NEXT:    return $pop19
-;
-; NOSIMD-LABEL: fma_4xf32:
-; NOSIMD:         .functype fma_4xf32 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
-; NOSIMD-NEXT:  # %bb.0:
-; NOSIMD-NEXT:    call $push0=, fmaf, $4, $8, $12
-; NOSIMD-NEXT:    f32.store 12($0), $pop0
-; NOSIMD-NEXT:    call $push1=, fmaf, $3, $7, $11
-; NOSIMD-NEXT:    f32.store 8($0), $pop1
-; NOSIMD-NEXT:    call $push2=, fmaf, $2, $6, $10
-; NOSIMD-NEXT:    f32.store 4($0), $pop2
-; NOSIMD-NEXT:    call $push3=, fmaf, $1, $5, $9
-; NOSIMD-NEXT:    f32.store 0($0), $pop3
-; NOSIMD-NEXT:    return
   %fma = call <4 x float> @llvm.fma(<4 x float> %a, <4 x float> %b, <4 x float> %c)
   ret <4 x float> %fma
 }
@@ -1330,9 +176,9 @@ define <8 x float> @fadd_fmul_contract_8xf32(<8 x float> %a, <8 x float> %b, <8
 ; RELAXED-LABEL: fadd_fmul_contract_8xf32:
 ; RELAXED:         .functype fadd_fmul_contract_8xf32 (i32, v128, v128, v128, v128, v128, v128) -> ()
 ; RELAXED-NEXT:  # %bb.0:
-; RELAXED-NEXT:    f32x4.relaxed_madd $push0=, $4, $2, $6
+; RELAXED-NEXT:    f32x4.relaxed_madd $push0=, $6, $4, $2
 ; RELAXED-NEXT:    v128.store 16($0), $pop0
-; RELAXED-NEXT:    f32x4.relaxed_madd $push1=, $3, $1, $5
+; RELAXED-NEXT:    f32x4.relaxed_madd $push1=, $5, $3, $1
 ; RELAXED-NEXT:    v128.store 0($0), $pop1
 ; RELAXED-NEXT:    return
 ;
@@ -1346,56 +192,17 @@ define <8 x float> @fadd_fmul_contract_8xf32(<8 x float> %a, <8 x float> %b, <8
 ; STRICT-NEXT:    f32x4.add $push3=, $pop2, $5
 ; STRICT-NEXT:    v128.store 0($0), $pop3
 ; STRICT-NEXT:    return
-;
-; NOFP16-LABEL: fadd_fmul_contract_8xf32:
-; NOFP16:         .functype fadd_fmul_contract_8xf32 (i32, v128, v128, v128, v128, v128, v128) -> ()
-; NOFP16-NEXT:  # %bb.0:
-; NOFP16-NEXT:    f32x4.mul $push0=, $4, $2
-; NOFP16-NEXT:    f32x4.add $push1=, $pop0, $6
-; NOFP16-NEXT:    v128.store 16($0), $pop1
-; NOFP16-NEXT:    f32x4.mul $push2=, $3, $1
-; NOFP16-NEXT:    f32x4.add $push3=, $pop2, $5
-; NOFP16-NEXT:    v128.store 0($0), $pop3
-; NOFP16-NEXT:    return
-;
-; NOSIMD-LABEL: fadd_fmul_contract_8xf32:
-; NOSIMD:         .functype fadd_fmul_contract_8xf32 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
-; NOSIMD-NEXT:  # %bb.0:
-; NOSIMD-NEXT:    f32.mul $push0=, $16, $8
-; NOSIMD-NEXT:    f32.add $push1=, $pop0, $24
-; NOSIMD-NEXT:    f32.store 28($0), $pop1
-; NOSIMD-NEXT:    f32.mul $push2=, $15, $7
-; NOSIMD-NEXT:    f32.add $push3=, $pop2, $23
-; NOSIMD-NEXT:    f32.store 24($0), $pop3
-; NOSIMD-NEXT:    f32.mul $push4=, $14, $6
-; NOSIMD-NEXT:    f32.add $push5=, $pop4, $22
-; NOSIMD-NEXT:    f32.store 20($0), $pop5
-; NOSIMD-NEXT:    f32.mul $push6=, $13, $5
-; NOSIMD-NEXT:    f32.add $push7=, $pop6, $21
-; NOSIMD-NEXT:    f32.store 16($0), $pop7
-; NOSIMD-NEXT:    f32.mul $push8=, $12, $4
-; NOSIMD-NEXT:    f32.add $push9=, $pop8, $20
-; NOSIMD-NEXT:    f32.store 12($0), $pop9
-; NOSIMD-NEXT:    f32.mul $push10=, $11, $3
-; NOSIMD-NEXT:    f32.add $push11=, $pop10, $19
-; NOSIMD-NEXT:    f32.store 8($0), $pop11
-; NOSIMD-NEXT:    f32.mul $push12=, $10, $2
-; NOSIMD-NEXT:    f32.add $push13=, $pop12, $18
-; NOSIMD-NEXT:    f32.store 4($0), $pop13
-; NOSIMD-NEXT:    f32.mul $push14=, $9, $1
-; NOSIMD-NEXT:    f32.add $push15=, $pop14, $17
-; NOSIMD-NEXT:    f32.store 0($0), $pop15
-; NOSIMD-NEXT:    return
   %mul = fmul contract <8 x float> %b, %a
   %add = fadd contract <8 x float> %mul, %c
   ret <8 x float> %add
 }
 
+
 define <2 x double> @fadd_fmul_contract_2xf64(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
 ; RELAXED-LABEL: fadd_fmul_contract_2xf64:
 ; RELAXED:         .functype fadd_fmul_contract_2xf64 (v128, v128, v128) -> (v128)
 ; RELAXED-NEXT:  # %bb.0:
-; RELAXED-NEXT:    f64x2.relaxed_madd $push0=, $1, $0, $2
+; RELAXED-NEXT:    f64x2.relaxed_madd $push0=, $2, $1, $0
 ; RELAXED-NEXT:    return $pop0
 ;
 ; STRICT-LABEL: fadd_fmul_contract_2xf64:
@@ -1404,64 +211,28 @@ define <2 x double> @fadd_fmul_contract_2xf64(<2 x double> %a, <2 x double> %b,
 ; STRICT-NEXT:    f64x2.mul $push0=, $1, $0
 ; STRICT-NEXT:    f64x2.add $push1=, $pop0, $2
 ; STRICT-NEXT:    return $pop1
-;
-; NOFP16-LABEL: fadd_fmul_contract_2xf64:
-; NOFP16:         .functype fadd_fmul_contract_2xf64 (v128, v128, v128) -> (v128)
-; NOFP16-NEXT:  # %bb.0:
-; NOFP16-NEXT:    f64x2.mul $push0=, $1, $0
-; NOFP16-NEXT:    f64x2.add $push1=, $pop0, $2
-; NOFP16-NEXT:    return $pop1
-;
-; NOSIMD-LABEL: fadd_fmul_contract_2xf64:
-; NOSIMD:         .functype fadd_fmul_contract_2xf64 (i32, f64, f64, f64, f64, f64, f64) -> ()
-; NOSIMD-NEXT:  # %bb.0:
-; NOSIMD-NEXT:    f64.mul $push0=, $4, $2
-; NOSIMD-NEXT:    f64.add $push1=, $pop0, $6
-; NOSIMD-NEXT:    f64.store 8($0), $pop1
-; NOSIMD-NEXT:    f64.mul $push2=, $3, $1
-; NOSIMD-NEXT:    f64.add $push3=, $pop2, $5
-; NOSIMD-NEXT:    f64.store 0($0), $pop3
-; NOSIMD-NEXT:    return
   %mul = fmul contract <2 x double> %b, %a
   %add = fadd contract <2 x double> %mul, %c
   ret <2 x double> %add
 }
 
-define <2 x double> @fadd_fmul_2xf64(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
-; RELAXED-LABEL: fadd_fmul_2xf64:
-; RELAXED:         .functype fadd_fmul_2xf64 (v128, v128, v128) -> (v128)
+define float @fadd_fmul_contract_f32(float %a, float %b, float %c) {
+; RELAXED-LABEL: fadd_fmul_contract_f32:
+; RELAXED:         .functype fadd_fmul_contract_f32 (f32, f32, f32) -> (f32)
 ; RELAXED-NEXT:  # %bb.0:
-; RELAXED-NEXT:    f64x2.mul $push0=, $1, $0
-; RELAXED-NEXT:    f64x2.add $push1=, $pop0, $2
+; RELAXED-NEXT:    f32.mul $push0=, $1, $0
+; RELAXED-NEXT:    f32.add $push1=, $pop0, $2
 ; RELAXED-NEXT:    return $pop1
 ;
-; STRICT-LABEL: fadd_fmul_2xf64:
-; STRICT:         .functype fadd_fmul_2xf64 (v128, v128, v128) -> (v128)
+; STRICT-LABEL: fadd_fmul_contract_f32:
+; STRICT:         .functype fadd_fmul_contract_f32 (f32, f32, f32) -> (f32)
 ; STRICT-NEXT:  # %bb.0:
-; STRICT-NEXT:    f64x2.mul $push0=, $1, $0
-; STRICT-NEXT:    f64x2.add $push1=, $pop0, $2
+; STRICT-NEXT:    f32.mul $push0=, $1, $0
+; STRICT-NEXT:    f32.add $push1=, $pop0, $2
 ; STRICT-NEXT:    return $pop1
-;
-; NOFP16-LABEL: fadd_fmul_2xf64:
-; NOFP16:         .functype fadd_fmul_2xf64 (v128, v128, v128) -> (v128)
-; NOFP16-NEXT:  # %bb.0:
-; NOFP16-NEXT:    f64x2.mul $push0=, $1, $0
-; NOFP16-NEXT:    f64x2.add $push1=, $pop0, $2
-; NOFP16-NEXT:    return $pop1
-;
-; NOSIMD-LABEL: fadd_fmul_2xf64:
-; NOSIMD:         .functype fadd_fmul_2xf64 (i32, f64, f64, f64, f64, f64, f64) -> ()
-; NOSIMD-NEXT:  # %bb.0:
-; NOSIMD-NEXT:    f64.mul $push0=, $4, $2
-; NOSIMD-NEXT:    f64.add $push1=, $pop0, $6
-; NOSIMD-NEXT:    f64.store 8($0), $pop1
-; NOSIMD-NEXT:    f64.mul $push2=, $3, $1
-; NOSIMD-NEXT:    f64.add $push3=, $pop2, $5
-; NOSIMD-NEXT:    f64.store 0($0), $pop3
-; NOSIMD-NEXT:    return
-  %mul = fmul <2 x double> %b, %a
-  %add = fadd <2 x double> %mul, %c
-  ret <2 x double> %add
+  %mul = fmul contract float %b, %a
+  %add = fadd contract float %mul, %c
+  ret float %add
 }
 
 define float @fma_f32(float %a, float %b, float %c) {
@@ -1476,18 +247,6 @@ define float @fma_f32(float %a, float %b, float %c) {
 ; STRICT-NEXT:  # %bb.0:
 ; STRICT-NEXT:    call $push0=, fmaf, $0, $1, $2
 ; STRICT-NEXT:    return $pop0
-;
-; NOFP16-LABEL: fma_f32:
-; NOFP16:         .functype fma_f32 (f32, f32, f32) -> (f32)
-; NOFP16-NEXT:  # %bb.0:
-; NOFP16-NEXT:    call $push0=, fmaf, $0, $1, $2
-; NOFP16-NEXT:    return $pop0
-;
-; NOSIMD-LABEL: fma_f32:
-; NOSIMD:         .functype fma_f32 (f32, f32, f32) -> (f32)
-; NOSIMD-NEXT:  # %bb.0:
-; NOSIMD-NEXT:    call $push0=, fmaf, $0, $1, $2
-; NOSIMD-NEXT:    return $pop0
   %fma = call float @llvm.fma(float %a, float %b, float %c)
   ret float %fma
 }
@@ -1504,18 +263,6 @@ define double @fma_f64(double %a, double %b, double %c) {
 ; STRICT-NEXT:  # %bb.0:
 ; STRICT-NEXT:    call $push0=, fma, $0, $1, $2
 ; STRICT-NEXT:    return $pop0
-;
-; NOFP16-LABEL: fma_f64:
-; NOFP16:         .functype fma_f64 (f64, f64, f64) -> (f64)
-; NOFP16-NEXT:  # %bb.0:
-; NOFP16-NEXT:    call $push0=, fma, $0, $1, $2
-; NOFP16-NEXT:    return $pop0
-;
-; NOSIMD-LABEL: fma_f64:
-; NOSIMD:         .functype fma_f64 (f64, f64, f64) -> (f64)
-; NOSIMD-NEXT:  # %bb.0:
-; NOSIMD-NEXT:    call $push0=, fma, $0, $1, $2
-; NOSIMD-NEXT:    return $pop0
   %fma = call double @llvm.fma(double %a, double %b, double %c)
   ret double %fma
 }

diff  --git a/llvm/test/CodeGen/WebAssembly/simd-relaxed-fnma.ll b/llvm/test/CodeGen/WebAssembly/simd-relaxed-fnma.ll
index b90c1dadd755f..6e2d860c3f152 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-relaxed-fnma.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-relaxed-fnma.ll
@@ -27,7 +27,7 @@ define <4 x float> @fsub_fmul_contract_4xf32(<4 x float> %a, <4 x float> %b, <4
 ; RELAXED-LABEL: fsub_fmul_contract_4xf32:
 ; RELAXED:         .functype fsub_fmul_contract_4xf32 (v128, v128, v128) -> (v128)
 ; RELAXED-NEXT:  # %bb.0:
-; RELAXED-NEXT:    f32x4.relaxed_nmadd $push0=, $1, $0, $2
+; RELAXED-NEXT:    f32x4.relaxed_nmadd $push0=, $2, $1, $0
 ; RELAXED-NEXT:    return $pop0
 ;
 ; STRICT-LABEL: fsub_fmul_contract_4xf32:
@@ -46,14 +46,15 @@ define <8 x half> @fsub_fmul_contract_8xf16(<8 x half> %a, <8 x half> %b, <8 x h
 ; RELAXED-LABEL: fsub_fmul_contract_8xf16:
 ; RELAXED:         .functype fsub_fmul_contract_8xf16 (v128, v128, v128) -> (v128)
 ; RELAXED-NEXT:  # %bb.0:
-; RELAXED-NEXT:    f16x8.nmadd $push0=, $1, $0, $2
+; RELAXED-NEXT:    f16x8.relaxed_nmadd $push0=, $2, $1, $0
 ; RELAXED-NEXT:    return $pop0
 ;
 ; STRICT-LABEL: fsub_fmul_contract_8xf16:
 ; STRICT:         .functype fsub_fmul_contract_8xf16 (v128, v128, v128) -> (v128)
 ; STRICT-NEXT:  # %bb.0:
-; STRICT-NEXT:    f16x8.nmadd $push0=, $1, $0, $2
-; STRICT-NEXT:    return $pop0
+; STRICT-NEXT:    f16x8.mul $push0=, $1, $0
+; STRICT-NEXT:    f16x8.sub $push1=, $2, $pop0
+; STRICT-NEXT:    return $pop1
   %mul = fmul contract <8 x half> %b, %a
   %sub = fsub contract <8 x half> %c, %mul
   ret <8 x half> %sub
@@ -83,9 +84,9 @@ define <8 x float> @fsub_fmul_contract_8xf32(<8 x float> %a, <8 x float> %b, <8
 ; RELAXED-LABEL: fsub_fmul_contract_8xf32:
 ; RELAXED:         .functype fsub_fmul_contract_8xf32 (i32, v128, v128, v128, v128, v128, v128) -> ()
 ; RELAXED-NEXT:  # %bb.0:
-; RELAXED-NEXT:    f32x4.relaxed_nmadd $push0=, $4, $2, $6
+; RELAXED-NEXT:    f32x4.relaxed_nmadd $push0=, $6, $4, $2
 ; RELAXED-NEXT:    v128.store 16($0), $pop0
-; RELAXED-NEXT:    f32x4.relaxed_nmadd $push1=, $3, $1, $5
+; RELAXED-NEXT:    f32x4.relaxed_nmadd $push1=, $5, $3, $1
 ; RELAXED-NEXT:    v128.store 0($0), $pop1
 ; RELAXED-NEXT:    return
 ;
@@ -109,7 +110,7 @@ define <2 x double> @fsub_fmul_contract_2xf64(<2 x double> %a, <2 x double> %b,
 ; RELAXED-LABEL: fsub_fmul_contract_2xf64:
 ; RELAXED:         .functype fsub_fmul_contract_2xf64 (v128, v128, v128) -> (v128)
 ; RELAXED-NEXT:  # %bb.0:
-; RELAXED-NEXT:    f64x2.relaxed_nmadd $push0=, $1, $0, $2
+; RELAXED-NEXT:    f64x2.relaxed_nmadd $push0=, $2, $1, $0
 ; RELAXED-NEXT:    return $pop0
 ;
 ; STRICT-LABEL: fsub_fmul_contract_2xf64:
@@ -142,55 +143,3 @@ define float @fsub_fmul_contract_f32(float %a, float %b, float %c) {
   ret float %sub
 }
 
-define <8 x half> @fmuladd_8xf16(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
-; RELAXED-LABEL: fmuladd_8xf16:
-; RELAXED:         .functype fmuladd_8xf16 (v128, v128, v128) -> (v128)
-; RELAXED-NEXT:  # %bb.0:
-; RELAXED-NEXT:    f16x8.nmadd $push0=, $0, $1, $2
-; RELAXED-NEXT:    return $pop0
-;
-; STRICT-LABEL: fmuladd_8xf16:
-; STRICT:         .functype fmuladd_8xf16 (v128, v128, v128) -> (v128)
-; STRICT-NEXT:  # %bb.0:
-; STRICT-NEXT:    f16x8.nmadd $push0=, $0, $1, $2
-; STRICT-NEXT:    return $pop0
-  %fneg = fneg <8 x half> %a
-  %fma = call <8 x half> @llvm.fmuladd(<8 x half> %fneg, <8 x half> %b, <8 x half> %c)
-  ret <8 x half> %fma
-}
-
-define <4 x float> @fmuladd_4xf32(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
-; RELAXED-LABEL: fmuladd_4xf32:
-; RELAXED:         .functype fmuladd_4xf32 (v128, v128, v128) -> (v128)
-; RELAXED-NEXT:  # %bb.0:
-; RELAXED-NEXT:    f32x4.relaxed_nmadd $push0=, $0, $1, $2
-; RELAXED-NEXT:    return $pop0
-;
-; STRICT-LABEL: fmuladd_4xf32:
-; STRICT:         .functype fmuladd_4xf32 (v128, v128, v128) -> (v128)
-; STRICT-NEXT:  # %bb.0:
-; STRICT-NEXT:    f32x4.mul $push0=, $0, $1
-; STRICT-NEXT:    f32x4.sub $push1=, $2, $pop0
-; STRICT-NEXT:    return $pop1
-  %fneg = fneg <4 x float> %a
-  %fma = call <4 x float> @llvm.fmuladd(<4 x float> %fneg, <4 x float> %b, <4 x float> %c)
-  ret <4 x float> %fma
-}
-
-define <2 x double> @fmuladd_2xf64(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
-; RELAXED-LABEL: fmuladd_2xf64:
-; RELAXED:         .functype fmuladd_2xf64 (v128, v128, v128) -> (v128)
-; RELAXED-NEXT:  # %bb.0:
-; RELAXED-NEXT:    f64x2.relaxed_nmadd $push0=, $0, $1, $2
-; RELAXED-NEXT:    return $pop0
-;
-; STRICT-LABEL: fmuladd_2xf64:
-; STRICT:         .functype fmuladd_2xf64 (v128, v128, v128) -> (v128)
-; STRICT-NEXT:  # %bb.0:
-; STRICT-NEXT:    f64x2.mul $push0=, $0, $1
-; STRICT-NEXT:    f64x2.sub $push1=, $2, $pop0
-; STRICT-NEXT:    return $pop1
-  %fneg = fneg <2 x double> %a
-  %fma = call <2 x double> @llvm.fmuladd(<2 x double> %fneg, <2 x double> %b, <2 x double> %c)
-  ret <2 x double> %fma
-}

diff  --git a/llvm/test/MC/WebAssembly/simd-encodings.s b/llvm/test/MC/WebAssembly/simd-encodings.s
index 57da338a9a95d..48aec4bc52a0c 100644
--- a/llvm/test/MC/WebAssembly/simd-encodings.s
+++ b/llvm/test/MC/WebAssembly/simd-encodings.s
@@ -917,11 +917,11 @@ main:
     # CHECK: f16x8.nearest # encoding: [0xfd,0xb6,0x02]
     f16x8.nearest
 
-    # CHECK: f16x8.madd # encoding: [0xfd,0xce,0x02]
-    f16x8.madd
+    # CHECK: f16x8.relaxed_madd # encoding: [0xfd,0xce,0x02]
+    f16x8.relaxed_madd
 
-    # CHECK: f16x8.nmadd # encoding: [0xfd,0xcf,0x02]
-    f16x8.nmadd
+    # CHECK: f16x8.relaxed_nmadd # encoding: [0xfd,0xcf,0x02]
+    f16x8.relaxed_nmadd
 
     # CHECK: i16x8.trunc_sat_f16x8_s # encoding: [0xfd,0xc5,0x02]
     i16x8.trunc_sat_f16x8_s


        


More information about the llvm-commits mailing list