[llvm] [DAGCombiner] Fold freeze(fmul) + fadd/fsub into FMA combine (PR #142250)
Harrison Hao via llvm-commits
llvm-commits at lists.llvm.org
Fri May 30 20:49:00 PDT 2025
https://github.com/harrisonGPU created https://github.com/llvm/llvm-project/pull/142250
Avoid blocking FMA formation on freeze(fmul x, y). Enable combining patterns like:
* freeze(x * y) + z → fma(x, y, z)
* z + freeze(x * y) → fma(x, y, z)
* freeze(x * y) - z → fma(x, y, -z)
* z - freeze(x * y) → fma(-x, y, z)
This improves precision and performance in common numerical code.
Closes: https://github.com/llvm/llvm-project/issues/141622
>From 1309dbf25ee74fadae1eaf1ed251aa6c5af8b1a4 Mon Sep 17 00:00:00 2001
From: Harrison Hao <tsworld1314 at gmail.com>
Date: Sat, 31 May 2025 03:04:06 +0000
Subject: [PATCH] [DAGCombiner] Fold freeze(fmul) + fadd/fsub into FMA combine
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 46 ++++++++++++++++
.../CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll | 54 +++++++++++++++++++
2 files changed, 100 insertions(+)
create mode 100644 llvm/test/CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index be2209a2f8faf..ea5b44da9e48b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -16729,6 +16729,28 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
}
}
+ // fold (fadd (freeze (fmul x, y)), z) -> (fma x, y, z).
+ if ((Options.UnsafeFPMath || N->getFlags().hasAllowContract()) &&
+ N0.getOpcode() == ISD::FREEZE) {
+ SDValue FrozenMul = N0.getOperand(0);
+ if (matcher.match(FrozenMul, ISD::FMUL) && isContractableFMUL(FrozenMul)) {
+ SDValue X = FrozenMul.getOperand(0);
+ SDValue Y = FrozenMul.getOperand(1);
+ return matcher.getNode(PreferredFusedOpcode, SL, VT, X, Y, N1);
+ }
+ }
+
+ // fold (fadd x, (freeze (fmul y, z))) -> (fma y, z, x)
+ if ((Options.UnsafeFPMath || N->getFlags().hasAllowContract()) &&
+ N1.getOpcode() == ISD::FREEZE) {
+ SDValue FrozenMul = N1.getOperand(0);
+ if (matcher.match(FrozenMul, ISD::FMUL) && isContractableFMUL(FrozenMul)) {
+ SDValue X = FrozenMul.getOperand(0);
+ SDValue Y = FrozenMul.getOperand(1);
+ return matcher.getNode(PreferredFusedOpcode, SL, VT, X, Y, N0);
+ }
+ }
+
// More folding opportunities when target permits.
if (Aggressive) {
// fold (fadd (fma x, y, (fpext (fmul u, v))), z)
@@ -17006,6 +17028,30 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
}
}
+ // fold (fsub (freeze (fmul x, y)), z) -> (fma x, y, (fneg z))
+ if ((Options.UnsafeFPMath || N->getFlags().hasAllowContract()) &&
+ N0.getOpcode() == ISD::FREEZE) {
+ SDValue FrozenMul = N0.getOperand(0);
+ if (matcher.match(FrozenMul, ISD::FMUL) && isContractableFMUL(FrozenMul)) {
+ SDValue X = FrozenMul.getOperand(0);
+ SDValue Y = FrozenMul.getOperand(1);
+ SDValue NegZ = matcher.getNode(ISD::FNEG, SL, VT, N1);
+ return matcher.getNode(PreferredFusedOpcode, SL, VT, X, Y, NegZ);
+ }
+ }
+
+ // fold (fsub z, (freeze(fmul x, y))) -> (fma (fneg x), y, z)
+ if ((Options.UnsafeFPMath || N->getFlags().hasAllowContract()) &&
+ N1.getOpcode() == ISD::FREEZE) {
+ SDValue FrozenMul = N1.getOperand(0);
+ if (matcher.match(FrozenMul, ISD::FMUL) && isContractableFMUL(FrozenMul)) {
+ SDValue X = FrozenMul.getOperand(0);
+ SDValue Y = FrozenMul.getOperand(1);
+ SDValue NegX = matcher.getNode(ISD::FNEG, SL, VT, X);
+ return matcher.getNode(PreferredFusedOpcode, SL, VT, NegX, Y, N0);
+ }
+ }
+
auto isReassociable = [&Options](SDNode *N) {
return Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
};
diff --git a/llvm/test/CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll b/llvm/test/CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll
new file mode 100644
index 0000000000000..75fe67e743c03
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll
@@ -0,0 +1,54 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s -check-prefix GFX11
+
+define float @fma_from_freeze_mul_add_left(float %x, float %y) {
+; GFX11-LABEL: fma_from_freeze_mul_add_left:
+; GFX11: ; %bb.0: ; %bb
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_fma_f32 v0, v0, v1, 1.0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+bb:
+ %mul = fmul contract float %x, %y
+ %mul.fr = freeze float %mul
+ %add = fadd contract float %mul.fr, 1.000000e+00
+ ret float %add
+}
+
+define float @fma_from_freeze_mul_add_right(float %x, float %y) {
+; GFX11-LABEL: fma_from_freeze_mul_add_right:
+; GFX11: ; %bb.0: ; %bb
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_fma_f32 v0, v0, v1, 1.0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+bb:
+ %mul = fmul contract float %x, %y
+ %mul.fr = freeze float %mul
+ %add = fadd contract float 1.000000e+00, %mul.fr
+ ret float %add
+}
+
+define float @fma_from_freeze_mul_sub_left(float %x, float %y) {
+; GFX11-LABEL: fma_from_freeze_mul_sub_left:
+; GFX11: ; %bb.0: ; %bb
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_fma_f32 v0, v0, v1, -1.0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+bb:
+ %mul = fmul contract float %x, %y
+ %mul.fr = freeze float %mul
+ %sub = fsub contract float %mul.fr, 1.000000e+00
+ ret float %sub
+}
+
+define float @fma_from_freeze_mul_sub_right(float %x, float %y) {
+; GFX11-LABEL: fma_from_freeze_mul_sub_right:
+; GFX11: ; %bb.0: ; %bb
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_fma_f32 v0, -v0, v1, 1.0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+bb:
+ %mul = fmul contract float %x, %y
+ %mul.fr = freeze float %mul
+ %sub = fsub contract float 1.000000e+00, %mul.fr
+ ret float %sub
+}
More information about the llvm-commits
mailing list