[llvm] [WebAssembly] Lower fmuladd to fma (PR #161355)
Sam Parker via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 30 05:25:57 PDT 2025
https://github.com/sparker-arm updated https://github.com/llvm/llvm-project/pull/161355
>From 71a2d9659d232f55176bbd577be5047bf05a1348 Mon Sep 17 00:00:00 2001
From: Sam Parker <sam.parker at arm.com>
Date: Tue, 30 Sep 2025 11:28:15 +0100
Subject: [PATCH 1/2] [WebAssembly] Lower fmuladd to fma
Lower v8f16, v4f32 and v2f64 fmuladd calls to fma, when we have
relaxed simd.
---
.../WebAssembly/WebAssemblyISelLowering.cpp | 31 ++++++++++
.../WebAssembly/WebAssemblyISelLowering.h | 2 +
.../WebAssembly/WebAssemblyInstrSIMD.td | 3 +
.../CodeGen/WebAssembly/simd-relaxed-fma.ll | 61 +++++++++++--------
4 files changed, 73 insertions(+), 24 deletions(-)
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 163bf9ba5b089..b88bdd662e123 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -158,6 +158,15 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
setTruncStoreAction(T, MVT::f16, Expand);
}
+ // Overwrite settings for FMA, when we have relaxed simd.
+ if (Subtarget->hasRelaxedSIMD()) {
+ setOperationAction(ISD::FMA, MVT::v4f32, Legal);
+ setOperationAction(ISD::FMA, MVT::v2f64, Legal);
+ if (Subtarget->hasFP16()) {
+ setOperationAction(ISD::FMA, MVT::v8f16, Legal);
+ }
+ }
+
// Expand unavailable integer operations.
for (auto Op :
{ISD::BSWAP, ISD::SMUL_LOHI, ISD::UMUL_LOHI, ISD::MULHS, ISD::MULHU,
@@ -992,6 +1001,28 @@ bool WebAssemblyTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
(ExtT == MVT::v2i64 && MemT == MVT::v2i32);
}
+bool WebAssemblyTargetLowering::isFMAFasterThanFMulAndFAdd(
+ const MachineFunction &MF, EVT VT) const {
+ if (!Subtarget->hasRelaxedSIMD() || !VT.isVector())
+ return false;
+
+ VT = VT.getScalarType();
+ if (!VT.isSimple())
+ return false;
+
+ switch (VT.getSimpleVT().SimpleTy) {
+ default:
+ break;
+ case MVT::f16:
+ return Subtarget->hasFP16();
+ case MVT::f32:
+ case MVT::f64:
+ return true;
+ }
+
+ return false;
+}
+
bool WebAssemblyTargetLowering::isOffsetFoldingLegal(
const GlobalAddressSDNode *GA) const {
// Wasm doesn't support function addresses with offsets
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
index b33a8530310be..bfa968fdd2fac 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
@@ -67,6 +67,8 @@ class WebAssemblyTargetLowering final : public TargetLowering {
unsigned *Fast) const override;
bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
bool isVectorLoadExtDesirable(SDValue ExtVal) const override;
+ bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
+ EVT VT) const override;
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
EVT VT) const override;
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
index 130602650d34e..a3f007d960960 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -1600,6 +1600,9 @@ multiclass SIMDMADD<Vec vec, bits<32> simdopA, bits<32> simdopS, list<Predicate>
def : Pat<(fadd_contract (vec.vt V128:$a), (fmul_contract (vec.vt V128:$b), (vec.vt V128:$c))),
(!cast<Instruction>("MADD_"#vec) V128:$a, V128:$b, V128:$c)>, Requires<[HasRelaxedSIMD]>;
+ def : Pat<(fma (vec.vt V128:$a), (vec.vt V128:$b), (vec.vt V128:$c)),
+ (!cast<Instruction>("MADD_"#vec) V128:$c, V128:$a, V128:$b)>, Requires<[HasRelaxedSIMD]>;
+
def : Pat<(fsub_contract (vec.vt V128:$a), (fmul_contract (vec.vt V128:$b), (vec.vt V128:$c))),
(!cast<Instruction>("NMADD_"#vec) V128:$a, V128:$b, V128:$c)>, Requires<[HasRelaxedSIMD]>;
}
diff --git a/llvm/test/CodeGen/WebAssembly/simd-relaxed-fma.ll b/llvm/test/CodeGen/WebAssembly/simd-relaxed-fma.ll
index e065de38951b1..7a2929f2f41a3 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-relaxed-fma.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-relaxed-fma.ll
@@ -61,6 +61,39 @@ define <8 x half> @fadd_fmul_contract_8xf16(<8 x half> %a, <8 x half> %b, <8 x h
ret <8 x half> %add
}
+define <8 x half> @fmuladd_8xf16(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
+; RELAXED-LABEL: fmuladd_8xf16:
+; RELAXED: .functype fmuladd_8xf16 (v128, v128, v128) -> (v128)
+; RELAXED-NEXT: # %bb.0:
+; RELAXED-NEXT: f16x8.relaxed_madd $push0=, $2, $0, $1
+; RELAXED-NEXT: return $pop0
+;
+; STRICT-LABEL: fmuladd_8xf16:
+; STRICT: .functype fmuladd_8xf16 (v128, v128, v128) -> (v128)
+; STRICT-NEXT: # %bb.0:
+; STRICT-NEXT: f16x8.mul $push0=, $0, $1
+; STRICT-NEXT: f16x8.add $push1=, $pop0, $2
+; STRICT-NEXT: return $pop1
+ %fma = call <8 x half> @llvm.fmuladd(<8 x half> %a, <8 x half> %b, <8 x half> %c)
+ ret <8 x half> %fma
+}
+
+define <8 x half> @fmuladd_contract_8xf16(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
+; RELAXED-LABEL: fmuladd_contract_8xf16:
+; RELAXED: .functype fmuladd_contract_8xf16 (v128, v128, v128) -> (v128)
+; RELAXED-NEXT: # %bb.0:
+; RELAXED-NEXT: f16x8.relaxed_madd $push0=, $2, $0, $1
+; RELAXED-NEXT: return $pop0
+;
+; STRICT-LABEL: fmuladd_contract_8xf16:
+; STRICT: .functype fmuladd_contract_8xf16 (v128, v128, v128) -> (v128)
+; STRICT-NEXT: # %bb.0:
+; STRICT-NEXT: f16x8.mul $push0=, $0, $1
+; STRICT-NEXT: f16x8.add $push1=, $pop0, $2
+; STRICT-NEXT: return $pop1
+ %fma = call contract <8 x half> @llvm.fmuladd(<8 x half> %a, <8 x half> %b, <8 x half> %c)
+ ret <8 x half> %fma
+}
define <4 x float> @fadd_fmul_4xf32(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
; RELAXED-LABEL: fadd_fmul_4xf32:
@@ -103,9 +136,8 @@ define <4 x float> @fmuladd_4xf32(<4 x float> %a, <4 x float> %b, <4 x float> %c
; RELAXED-LABEL: fmuladd_4xf32:
; RELAXED: .functype fmuladd_4xf32 (v128, v128, v128) -> (v128)
; RELAXED-NEXT: # %bb.0:
-; RELAXED-NEXT: f32x4.mul $push0=, $0, $1
-; RELAXED-NEXT: f32x4.add $push1=, $pop0, $2
-; RELAXED-NEXT: return $pop1
+; RELAXED-NEXT: f32x4.relaxed_madd $push0=, $2, $0, $1
+; RELAXED-NEXT: return $pop0
;
; STRICT-LABEL: fmuladd_4xf32:
; STRICT: .functype fmuladd_4xf32 (v128, v128, v128) -> (v128)
@@ -121,27 +153,8 @@ define <4 x float> @fma_4xf32(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
; RELAXED-LABEL: fma_4xf32:
; RELAXED: .functype fma_4xf32 (v128, v128, v128) -> (v128)
; RELAXED-NEXT: # %bb.0:
-; RELAXED-NEXT: f32x4.extract_lane $push2=, $0, 0
-; RELAXED-NEXT: f32x4.extract_lane $push1=, $1, 0
-; RELAXED-NEXT: f32x4.extract_lane $push0=, $2, 0
-; RELAXED-NEXT: call $push3=, fmaf, $pop2, $pop1, $pop0
-; RELAXED-NEXT: f32x4.splat $push4=, $pop3
-; RELAXED-NEXT: f32x4.extract_lane $push7=, $0, 1
-; RELAXED-NEXT: f32x4.extract_lane $push6=, $1, 1
-; RELAXED-NEXT: f32x4.extract_lane $push5=, $2, 1
-; RELAXED-NEXT: call $push8=, fmaf, $pop7, $pop6, $pop5
-; RELAXED-NEXT: f32x4.replace_lane $push9=, $pop4, 1, $pop8
-; RELAXED-NEXT: f32x4.extract_lane $push12=, $0, 2
-; RELAXED-NEXT: f32x4.extract_lane $push11=, $1, 2
-; RELAXED-NEXT: f32x4.extract_lane $push10=, $2, 2
-; RELAXED-NEXT: call $push13=, fmaf, $pop12, $pop11, $pop10
-; RELAXED-NEXT: f32x4.replace_lane $push14=, $pop9, 2, $pop13
-; RELAXED-NEXT: f32x4.extract_lane $push17=, $0, 3
-; RELAXED-NEXT: f32x4.extract_lane $push16=, $1, 3
-; RELAXED-NEXT: f32x4.extract_lane $push15=, $2, 3
-; RELAXED-NEXT: call $push18=, fmaf, $pop17, $pop16, $pop15
-; RELAXED-NEXT: f32x4.replace_lane $push19=, $pop14, 3, $pop18
-; RELAXED-NEXT: return $pop19
+; RELAXED-NEXT: f32x4.relaxed_madd $push0=, $2, $0, $1
+; RELAXED-NEXT: return $pop0
;
; STRICT-LABEL: fma_4xf32:
; STRICT: .functype fma_4xf32 (v128, v128, v128) -> (v128)
>From e5ab2c98e798420c22a4bca8b858c8e58c408753 Mon Sep 17 00:00:00 2001
From: Sam Parker <sam.parker at arm.com>
Date: Tue, 30 Sep 2025 13:21:00 +0100
Subject: [PATCH 2/2] add fnmadd pattern
---
llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
index a3f007d960960..d0f4c8cab49db 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -1605,6 +1605,10 @@ multiclass SIMDMADD<Vec vec, bits<32> simdopA, bits<32> simdopS, list<Predicate>
def : Pat<(fsub_contract (vec.vt V128:$a), (fmul_contract (vec.vt V128:$b), (vec.vt V128:$c))),
(!cast<Instruction>("NMADD_"#vec) V128:$a, V128:$b, V128:$c)>, Requires<[HasRelaxedSIMD]>;
+
+ def : Pat<(fma (fneg (vec.vt V128:$a)), (vec.vt V128:$b), (vec.vt V128:$c)),
+ (!cast<Instruction>("NMADD_"#vec) V128:$c, V128:$a, V128:$b)>, Requires<[HasRelaxedSIMD]>;
+
}
defm "" : SIMDMADD<F32x4, 0x105, 0x106, [HasRelaxedSIMD]>;
More information about the llvm-commits
mailing list