[llvm] [WebAssembly] Lower fmuladd to fma (PR #161355)
Sam Parker via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 1 00:51:00 PDT 2025
https://github.com/sparker-arm updated https://github.com/llvm/llvm-project/pull/161355
>From 71a2d9659d232f55176bbd577be5047bf05a1348 Mon Sep 17 00:00:00 2001
From: Sam Parker <sam.parker at arm.com>
Date: Tue, 30 Sep 2025 11:28:15 +0100
Subject: [PATCH 1/3] [WebAssembly] Lower fmuladd to fma
Lower v8f16, v4f32 and v2f64 fmuladd calls to fma, when we have
relaxed simd.
---
.../WebAssembly/WebAssemblyISelLowering.cpp | 31 ++++++++++
.../WebAssembly/WebAssemblyISelLowering.h | 2 +
.../WebAssembly/WebAssemblyInstrSIMD.td | 3 +
.../CodeGen/WebAssembly/simd-relaxed-fma.ll | 61 +++++++++++--------
4 files changed, 73 insertions(+), 24 deletions(-)
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 163bf9ba5b089..b88bdd662e123 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -158,6 +158,15 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
setTruncStoreAction(T, MVT::f16, Expand);
}
+ // Overwrite settings for FMA, when we have relaxed simd.
+ if (Subtarget->hasRelaxedSIMD()) {
+ setOperationAction(ISD::FMA, MVT::v4f32, Legal);
+ setOperationAction(ISD::FMA, MVT::v2f64, Legal);
+ if (Subtarget->hasFP16()) {
+ setOperationAction(ISD::FMA, MVT::v8f16, Legal);
+ }
+ }
+
// Expand unavailable integer operations.
for (auto Op :
{ISD::BSWAP, ISD::SMUL_LOHI, ISD::UMUL_LOHI, ISD::MULHS, ISD::MULHU,
@@ -992,6 +1001,28 @@ bool WebAssemblyTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
(ExtT == MVT::v2i64 && MemT == MVT::v2i32);
}
+bool WebAssemblyTargetLowering::isFMAFasterThanFMulAndFAdd(
+ const MachineFunction &MF, EVT VT) const {
+ if (!Subtarget->hasRelaxedSIMD() || !VT.isVector())
+ return false;
+
+ VT = VT.getScalarType();
+ if (!VT.isSimple())
+ return false;
+
+ switch (VT.getSimpleVT().SimpleTy) {
+ default:
+ break;
+ case MVT::f16:
+ return Subtarget->hasFP16();
+ case MVT::f32:
+ case MVT::f64:
+ return true;
+ }
+
+ return false;
+}
+
bool WebAssemblyTargetLowering::isOffsetFoldingLegal(
const GlobalAddressSDNode *GA) const {
// Wasm doesn't support function addresses with offsets
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
index b33a8530310be..bfa968fdd2fac 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
@@ -67,6 +67,8 @@ class WebAssemblyTargetLowering final : public TargetLowering {
unsigned *Fast) const override;
bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
bool isVectorLoadExtDesirable(SDValue ExtVal) const override;
+ bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
+ EVT VT) const override;
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
EVT VT) const override;
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
index 130602650d34e..a3f007d960960 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -1600,6 +1600,9 @@ multiclass SIMDMADD<Vec vec, bits<32> simdopA, bits<32> simdopS, list<Predicate>
def : Pat<(fadd_contract (vec.vt V128:$a), (fmul_contract (vec.vt V128:$b), (vec.vt V128:$c))),
(!cast<Instruction>("MADD_"#vec) V128:$a, V128:$b, V128:$c)>, Requires<[HasRelaxedSIMD]>;
+ def : Pat<(fma (vec.vt V128:$a), (vec.vt V128:$b), (vec.vt V128:$c)),
+ (!cast<Instruction>("MADD_"#vec) V128:$c, V128:$a, V128:$b)>, Requires<[HasRelaxedSIMD]>;
+
def : Pat<(fsub_contract (vec.vt V128:$a), (fmul_contract (vec.vt V128:$b), (vec.vt V128:$c))),
(!cast<Instruction>("NMADD_"#vec) V128:$a, V128:$b, V128:$c)>, Requires<[HasRelaxedSIMD]>;
}
diff --git a/llvm/test/CodeGen/WebAssembly/simd-relaxed-fma.ll b/llvm/test/CodeGen/WebAssembly/simd-relaxed-fma.ll
index e065de38951b1..7a2929f2f41a3 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-relaxed-fma.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-relaxed-fma.ll
@@ -61,6 +61,39 @@ define <8 x half> @fadd_fmul_contract_8xf16(<8 x half> %a, <8 x half> %b, <8 x h
ret <8 x half> %add
}
+define <8 x half> @fmuladd_8xf16(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
+; RELAXED-LABEL: fmuladd_8xf16:
+; RELAXED: .functype fmuladd_8xf16 (v128, v128, v128) -> (v128)
+; RELAXED-NEXT: # %bb.0:
+; RELAXED-NEXT: f16x8.relaxed_madd $push0=, $2, $0, $1
+; RELAXED-NEXT: return $pop0
+;
+; STRICT-LABEL: fmuladd_8xf16:
+; STRICT: .functype fmuladd_8xf16 (v128, v128, v128) -> (v128)
+; STRICT-NEXT: # %bb.0:
+; STRICT-NEXT: f16x8.mul $push0=, $0, $1
+; STRICT-NEXT: f16x8.add $push1=, $pop0, $2
+; STRICT-NEXT: return $pop1
+ %fma = call <8 x half> @llvm.fmuladd(<8 x half> %a, <8 x half> %b, <8 x half> %c)
+ ret <8 x half> %fma
+}
+
+define <8 x half> @fmuladd_contract_8xf16(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
+; RELAXED-LABEL: fmuladd_contract_8xf16:
+; RELAXED: .functype fmuladd_contract_8xf16 (v128, v128, v128) -> (v128)
+; RELAXED-NEXT: # %bb.0:
+; RELAXED-NEXT: f16x8.relaxed_madd $push0=, $2, $0, $1
+; RELAXED-NEXT: return $pop0
+;
+; STRICT-LABEL: fmuladd_contract_8xf16:
+; STRICT: .functype fmuladd_contract_8xf16 (v128, v128, v128) -> (v128)
+; STRICT-NEXT: # %bb.0:
+; STRICT-NEXT: f16x8.mul $push0=, $0, $1
+; STRICT-NEXT: f16x8.add $push1=, $pop0, $2
+; STRICT-NEXT: return $pop1
+ %fma = call contract <8 x half> @llvm.fmuladd(<8 x half> %a, <8 x half> %b, <8 x half> %c)
+ ret <8 x half> %fma
+}
define <4 x float> @fadd_fmul_4xf32(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
; RELAXED-LABEL: fadd_fmul_4xf32:
@@ -103,9 +136,8 @@ define <4 x float> @fmuladd_4xf32(<4 x float> %a, <4 x float> %b, <4 x float> %c
; RELAXED-LABEL: fmuladd_4xf32:
; RELAXED: .functype fmuladd_4xf32 (v128, v128, v128) -> (v128)
; RELAXED-NEXT: # %bb.0:
-; RELAXED-NEXT: f32x4.mul $push0=, $0, $1
-; RELAXED-NEXT: f32x4.add $push1=, $pop0, $2
-; RELAXED-NEXT: return $pop1
+; RELAXED-NEXT: f32x4.relaxed_madd $push0=, $2, $0, $1
+; RELAXED-NEXT: return $pop0
;
; STRICT-LABEL: fmuladd_4xf32:
; STRICT: .functype fmuladd_4xf32 (v128, v128, v128) -> (v128)
@@ -121,27 +153,8 @@ define <4 x float> @fma_4xf32(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
; RELAXED-LABEL: fma_4xf32:
; RELAXED: .functype fma_4xf32 (v128, v128, v128) -> (v128)
; RELAXED-NEXT: # %bb.0:
-; RELAXED-NEXT: f32x4.extract_lane $push2=, $0, 0
-; RELAXED-NEXT: f32x4.extract_lane $push1=, $1, 0
-; RELAXED-NEXT: f32x4.extract_lane $push0=, $2, 0
-; RELAXED-NEXT: call $push3=, fmaf, $pop2, $pop1, $pop0
-; RELAXED-NEXT: f32x4.splat $push4=, $pop3
-; RELAXED-NEXT: f32x4.extract_lane $push7=, $0, 1
-; RELAXED-NEXT: f32x4.extract_lane $push6=, $1, 1
-; RELAXED-NEXT: f32x4.extract_lane $push5=, $2, 1
-; RELAXED-NEXT: call $push8=, fmaf, $pop7, $pop6, $pop5
-; RELAXED-NEXT: f32x4.replace_lane $push9=, $pop4, 1, $pop8
-; RELAXED-NEXT: f32x4.extract_lane $push12=, $0, 2
-; RELAXED-NEXT: f32x4.extract_lane $push11=, $1, 2
-; RELAXED-NEXT: f32x4.extract_lane $push10=, $2, 2
-; RELAXED-NEXT: call $push13=, fmaf, $pop12, $pop11, $pop10
-; RELAXED-NEXT: f32x4.replace_lane $push14=, $pop9, 2, $pop13
-; RELAXED-NEXT: f32x4.extract_lane $push17=, $0, 3
-; RELAXED-NEXT: f32x4.extract_lane $push16=, $1, 3
-; RELAXED-NEXT: f32x4.extract_lane $push15=, $2, 3
-; RELAXED-NEXT: call $push18=, fmaf, $pop17, $pop16, $pop15
-; RELAXED-NEXT: f32x4.replace_lane $push19=, $pop14, 3, $pop18
-; RELAXED-NEXT: return $pop19
+; RELAXED-NEXT: f32x4.relaxed_madd $push0=, $2, $0, $1
+; RELAXED-NEXT: return $pop0
;
; STRICT-LABEL: fma_4xf32:
; STRICT: .functype fma_4xf32 (v128, v128, v128) -> (v128)
>From e5ab2c98e798420c22a4bca8b858c8e58c408753 Mon Sep 17 00:00:00 2001
From: Sam Parker <sam.parker at arm.com>
Date: Tue, 30 Sep 2025 13:21:00 +0100
Subject: [PATCH 2/3] add fnmadd pattern
---
llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
index a3f007d960960..d0f4c8cab49db 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -1605,6 +1605,10 @@ multiclass SIMDMADD<Vec vec, bits<32> simdopA, bits<32> simdopS, list<Predicate>
def : Pat<(fsub_contract (vec.vt V128:$a), (fmul_contract (vec.vt V128:$b), (vec.vt V128:$c))),
(!cast<Instruction>("NMADD_"#vec) V128:$a, V128:$b, V128:$c)>, Requires<[HasRelaxedSIMD]>;
+
+ def : Pat<(fma (fneg (vec.vt V128:$a)), (vec.vt V128:$b), (vec.vt V128:$c)),
+ (!cast<Instruction>("NMADD_"#vec) V128:$c, V128:$a, V128:$b)>, Requires<[HasRelaxedSIMD]>;
+
}
defm "" : SIMDMADD<F32x4, 0x105, 0x106, [HasRelaxedSIMD]>;
>From b3d5aa298a27bf13c3c71f7065ada5ce13548142 Mon Sep 17 00:00:00 2001
From: Sam Parker <sam.parker at arm.com>
Date: Wed, 1 Oct 2025 08:44:16 +0100
Subject: [PATCH 3/3] Remove old patterns and corrected operand order.
---
.../include/llvm/Target/TargetSelectionDAG.td | 9 ---------
.../WebAssembly/WebAssemblyInstrSIMD.td | 10 ++--------
.../CodeGen/WebAssembly/simd-relaxed-fma.ll | 20 +++++++++----------
.../CodeGen/WebAssembly/simd-relaxed-fnma.ll | 10 +++++-----
4 files changed, 17 insertions(+), 32 deletions(-)
diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td
index 5e57dcaa303f3..cfd0d81355543 100644
--- a/llvm/include/llvm/Target/TargetSelectionDAG.td
+++ b/llvm/include/llvm/Target/TargetSelectionDAG.td
@@ -1144,20 +1144,11 @@ def immAllZerosV : SDPatternOperator; // ISD::isConstantSplatVectorAllZeros
// Other helper fragments.
-// An 'fmul' node which has contract flag
-def fmul_contract : PatFrag<(ops node:$a, node:$b), (fmul node:$a, node:$b),[{
- return N->getFlags().hasAllowContract();
-}]>;
-
// An 'fadd' node which can be contracted with fmul_contract into a fma or other relaxed instruction
def fadd_contract : PatFrag<(ops node:$a, node:$b), (fadd node:$a, node:$b),[{
return N->getFlags().hasAllowContract();
}]>;
-def fsub_contract : PatFrag<(ops node:$a, node:$b), (fsub node:$a, node:$b),[{
- return N->getFlags().hasAllowContract();
-}]>;
-
def not : PatFrag<(ops node:$in), (xor node:$in, -1)>;
def vnot : PatFrag<(ops node:$in), (xor node:$in, immAllOnesV)>;
def ineg : PatFrag<(ops node:$in), (sub 0, node:$in)>;
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
index d0f4c8cab49db..398c51bbb13de 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -1597,17 +1597,11 @@ multiclass SIMDMADD<Vec vec, bits<32> simdopA, bits<32> simdopS, list<Predicate>
vec.prefix#".relaxed_nmadd\t$dst, $a, $b, $c",
vec.prefix#".relaxed_nmadd", simdopS, reqs>;
- def : Pat<(fadd_contract (vec.vt V128:$a), (fmul_contract (vec.vt V128:$b), (vec.vt V128:$c))),
- (!cast<Instruction>("MADD_"#vec) V128:$a, V128:$b, V128:$c)>, Requires<[HasRelaxedSIMD]>;
-
def : Pat<(fma (vec.vt V128:$a), (vec.vt V128:$b), (vec.vt V128:$c)),
- (!cast<Instruction>("MADD_"#vec) V128:$c, V128:$a, V128:$b)>, Requires<[HasRelaxedSIMD]>;
-
- def : Pat<(fsub_contract (vec.vt V128:$a), (fmul_contract (vec.vt V128:$b), (vec.vt V128:$c))),
- (!cast<Instruction>("NMADD_"#vec) V128:$a, V128:$b, V128:$c)>, Requires<[HasRelaxedSIMD]>;
+ (!cast<Instruction>("MADD_"#vec) V128:$a, V128:$b, V128:$c)>, Requires<[HasRelaxedSIMD]>;
def : Pat<(fma (fneg (vec.vt V128:$a)), (vec.vt V128:$b), (vec.vt V128:$c)),
- (!cast<Instruction>("NMADD_"#vec) V128:$c, V128:$a, V128:$b)>, Requires<[HasRelaxedSIMD]>;
+ (!cast<Instruction>("NMADD_"#vec) V128:$a, V128:$b, V128:$c)>, Requires<[HasRelaxedSIMD]>;
}
diff --git a/llvm/test/CodeGen/WebAssembly/simd-relaxed-fma.ll b/llvm/test/CodeGen/WebAssembly/simd-relaxed-fma.ll
index 7a2929f2f41a3..c1242cdaa5a5e 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-relaxed-fma.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-relaxed-fma.ll
@@ -28,7 +28,7 @@ define <4 x float> @fadd_fmul_contract_4xf32(<4 x float> %a, <4 x float> %b, <4
; RELAXED-LABEL: fadd_fmul_contract_4xf32:
; RELAXED: .functype fadd_fmul_contract_4xf32 (v128, v128, v128) -> (v128)
; RELAXED-NEXT: # %bb.0:
-; RELAXED-NEXT: f32x4.relaxed_madd $push0=, $2, $1, $0
+; RELAXED-NEXT: f32x4.relaxed_madd $push0=, $1, $0, $2
; RELAXED-NEXT: return $pop0
;
; STRICT-LABEL: fadd_fmul_contract_4xf32:
@@ -47,7 +47,7 @@ define <8 x half> @fadd_fmul_contract_8xf16(<8 x half> %a, <8 x half> %b, <8 x h
; RELAXED-LABEL: fadd_fmul_contract_8xf16:
; RELAXED: .functype fadd_fmul_contract_8xf16 (v128, v128, v128) -> (v128)
; RELAXED-NEXT: # %bb.0:
-; RELAXED-NEXT: f16x8.relaxed_madd $push0=, $2, $1, $0
+; RELAXED-NEXT: f16x8.relaxed_madd $push0=, $1, $0, $2
; RELAXED-NEXT: return $pop0
;
; STRICT-LABEL: fadd_fmul_contract_8xf16:
@@ -65,7 +65,7 @@ define <8 x half> @fmuladd_8xf16(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
; RELAXED-LABEL: fmuladd_8xf16:
; RELAXED: .functype fmuladd_8xf16 (v128, v128, v128) -> (v128)
; RELAXED-NEXT: # %bb.0:
-; RELAXED-NEXT: f16x8.relaxed_madd $push0=, $2, $0, $1
+; RELAXED-NEXT: f16x8.relaxed_madd $push0=, $0, $1, $2
; RELAXED-NEXT: return $pop0
;
; STRICT-LABEL: fmuladd_8xf16:
@@ -82,7 +82,7 @@ define <8 x half> @fmuladd_contract_8xf16(<8 x half> %a, <8 x half> %b, <8 x hal
; RELAXED-LABEL: fmuladd_contract_8xf16:
; RELAXED: .functype fmuladd_contract_8xf16 (v128, v128, v128) -> (v128)
; RELAXED-NEXT: # %bb.0:
-; RELAXED-NEXT: f16x8.relaxed_madd $push0=, $2, $0, $1
+; RELAXED-NEXT: f16x8.relaxed_madd $push0=, $0, $1, $2
; RELAXED-NEXT: return $pop0
;
; STRICT-LABEL: fmuladd_contract_8xf16:
@@ -118,7 +118,7 @@ define <4 x float> @fmuladd_contract_4xf32(<4 x float> %a, <4 x float> %b, <4 x
; RELAXED-LABEL: fmuladd_contract_4xf32:
; RELAXED: .functype fmuladd_contract_4xf32 (v128, v128, v128) -> (v128)
; RELAXED-NEXT: # %bb.0:
-; RELAXED-NEXT: f32x4.relaxed_madd $push0=, $2, $0, $1
+; RELAXED-NEXT: f32x4.relaxed_madd $push0=, $0, $1, $2
; RELAXED-NEXT: return $pop0
;
; STRICT-LABEL: fmuladd_contract_4xf32:
@@ -136,7 +136,7 @@ define <4 x float> @fmuladd_4xf32(<4 x float> %a, <4 x float> %b, <4 x float> %c
; RELAXED-LABEL: fmuladd_4xf32:
; RELAXED: .functype fmuladd_4xf32 (v128, v128, v128) -> (v128)
; RELAXED-NEXT: # %bb.0:
-; RELAXED-NEXT: f32x4.relaxed_madd $push0=, $2, $0, $1
+; RELAXED-NEXT: f32x4.relaxed_madd $push0=, $0, $1, $2
; RELAXED-NEXT: return $pop0
;
; STRICT-LABEL: fmuladd_4xf32:
@@ -153,7 +153,7 @@ define <4 x float> @fma_4xf32(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
; RELAXED-LABEL: fma_4xf32:
; RELAXED: .functype fma_4xf32 (v128, v128, v128) -> (v128)
; RELAXED-NEXT: # %bb.0:
-; RELAXED-NEXT: f32x4.relaxed_madd $push0=, $2, $0, $1
+; RELAXED-NEXT: f32x4.relaxed_madd $push0=, $0, $1, $2
; RELAXED-NEXT: return $pop0
;
; STRICT-LABEL: fma_4xf32:
@@ -189,9 +189,9 @@ define <8 x float> @fadd_fmul_contract_8xf32(<8 x float> %a, <8 x float> %b, <8
; RELAXED-LABEL: fadd_fmul_contract_8xf32:
; RELAXED: .functype fadd_fmul_contract_8xf32 (i32, v128, v128, v128, v128, v128, v128) -> ()
; RELAXED-NEXT: # %bb.0:
-; RELAXED-NEXT: f32x4.relaxed_madd $push0=, $6, $4, $2
+; RELAXED-NEXT: f32x4.relaxed_madd $push0=, $4, $2, $6
; RELAXED-NEXT: v128.store 16($0), $pop0
-; RELAXED-NEXT: f32x4.relaxed_madd $push1=, $5, $3, $1
+; RELAXED-NEXT: f32x4.relaxed_madd $push1=, $3, $1, $5
; RELAXED-NEXT: v128.store 0($0), $pop1
; RELAXED-NEXT: return
;
@@ -215,7 +215,7 @@ define <2 x double> @fadd_fmul_contract_2xf64(<2 x double> %a, <2 x double> %b,
; RELAXED-LABEL: fadd_fmul_contract_2xf64:
; RELAXED: .functype fadd_fmul_contract_2xf64 (v128, v128, v128) -> (v128)
; RELAXED-NEXT: # %bb.0:
-; RELAXED-NEXT: f64x2.relaxed_madd $push0=, $2, $1, $0
+; RELAXED-NEXT: f64x2.relaxed_madd $push0=, $1, $0, $2
; RELAXED-NEXT: return $pop0
;
; STRICT-LABEL: fadd_fmul_contract_2xf64:
diff --git a/llvm/test/CodeGen/WebAssembly/simd-relaxed-fnma.ll b/llvm/test/CodeGen/WebAssembly/simd-relaxed-fnma.ll
index 6e2d860c3f152..902598d68e132 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-relaxed-fnma.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-relaxed-fnma.ll
@@ -27,7 +27,7 @@ define <4 x float> @fsub_fmul_contract_4xf32(<4 x float> %a, <4 x float> %b, <4
; RELAXED-LABEL: fsub_fmul_contract_4xf32:
; RELAXED: .functype fsub_fmul_contract_4xf32 (v128, v128, v128) -> (v128)
; RELAXED-NEXT: # %bb.0:
-; RELAXED-NEXT: f32x4.relaxed_nmadd $push0=, $2, $1, $0
+; RELAXED-NEXT: f32x4.relaxed_nmadd $push0=, $1, $0, $2
; RELAXED-NEXT: return $pop0
;
; STRICT-LABEL: fsub_fmul_contract_4xf32:
@@ -46,7 +46,7 @@ define <8 x half> @fsub_fmul_contract_8xf16(<8 x half> %a, <8 x half> %b, <8 x h
; RELAXED-LABEL: fsub_fmul_contract_8xf16:
; RELAXED: .functype fsub_fmul_contract_8xf16 (v128, v128, v128) -> (v128)
; RELAXED-NEXT: # %bb.0:
-; RELAXED-NEXT: f16x8.relaxed_nmadd $push0=, $2, $1, $0
+; RELAXED-NEXT: f16x8.relaxed_nmadd $push0=, $1, $0, $2
; RELAXED-NEXT: return $pop0
;
; STRICT-LABEL: fsub_fmul_contract_8xf16:
@@ -84,9 +84,9 @@ define <8 x float> @fsub_fmul_contract_8xf32(<8 x float> %a, <8 x float> %b, <8
; RELAXED-LABEL: fsub_fmul_contract_8xf32:
; RELAXED: .functype fsub_fmul_contract_8xf32 (i32, v128, v128, v128, v128, v128, v128) -> ()
; RELAXED-NEXT: # %bb.0:
-; RELAXED-NEXT: f32x4.relaxed_nmadd $push0=, $6, $4, $2
+; RELAXED-NEXT: f32x4.relaxed_nmadd $push0=, $4, $2, $6
; RELAXED-NEXT: v128.store 16($0), $pop0
-; RELAXED-NEXT: f32x4.relaxed_nmadd $push1=, $5, $3, $1
+; RELAXED-NEXT: f32x4.relaxed_nmadd $push1=, $3, $1, $5
; RELAXED-NEXT: v128.store 0($0), $pop1
; RELAXED-NEXT: return
;
@@ -110,7 +110,7 @@ define <2 x double> @fsub_fmul_contract_2xf64(<2 x double> %a, <2 x double> %b,
; RELAXED-LABEL: fsub_fmul_contract_2xf64:
; RELAXED: .functype fsub_fmul_contract_2xf64 (v128, v128, v128) -> (v128)
; RELAXED-NEXT: # %bb.0:
-; RELAXED-NEXT: f64x2.relaxed_nmadd $push0=, $2, $1, $0
+; RELAXED-NEXT: f64x2.relaxed_nmadd $push0=, $1, $0, $2
; RELAXED-NEXT: return $pop0
;
; STRICT-LABEL: fsub_fmul_contract_2xf64:
More information about the llvm-commits
mailing list