[llvm] [DAGCombiner] Fold freeze(fmul) + fadd/fsub into FMA combine (PR #142250)
via llvm-commits
llvm-commits at lists.llvm.org
Fri May 30 20:49:34 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
@llvm/pr-subscribers-llvm-selectiondag
Author: Harrison Hao (harrisonGPU)
<details>
<summary>Changes</summary>
Avoid blocking FMA formation on freeze(fmul x, y). Enable combining patterns like:
* freeze(x * y) + z → fma(x, y, z)
* z + freeze(x * y) → fma(x, y, z)
* freeze(x * y) - z → fma(x, y, -z)
* z - freeze(x * y) → fma(-x, y, z)
This improves precision and performance in common numerical code.
Closes: https://github.com/llvm/llvm-project/issues/141622
---
Full diff: https://github.com/llvm/llvm-project/pull/142250.diff
2 Files Affected:
- (modified) llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (+46)
- (added) llvm/test/CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll (+54)
``````````diff
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index be2209a2f8faf..ea5b44da9e48b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -16729,6 +16729,28 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
}
}
+ // fold (fadd (freeze (fmul x, y)), z) -> (fma x, y, z).
+ if ((Options.UnsafeFPMath || N->getFlags().hasAllowContract()) &&
+ N0.getOpcode() == ISD::FREEZE) {
+ SDValue FrozenMul = N0.getOperand(0);
+ if (matcher.match(FrozenMul, ISD::FMUL) && isContractableFMUL(FrozenMul)) {
+ SDValue X = FrozenMul.getOperand(0);
+ SDValue Y = FrozenMul.getOperand(1);
+ return matcher.getNode(PreferredFusedOpcode, SL, VT, X, Y, N1);
+ }
+ }
+
+ // fold (fadd x, (freeze (fmul y, z))) -> (fma y, z, x)
+ if ((Options.UnsafeFPMath || N->getFlags().hasAllowContract()) &&
+ N1.getOpcode() == ISD::FREEZE) {
+ SDValue FrozenMul = N1.getOperand(0);
+ if (matcher.match(FrozenMul, ISD::FMUL) && isContractableFMUL(FrozenMul)) {
+ SDValue X = FrozenMul.getOperand(0);
+ SDValue Y = FrozenMul.getOperand(1);
+ return matcher.getNode(PreferredFusedOpcode, SL, VT, X, Y, N0);
+ }
+ }
+
// More folding opportunities when target permits.
if (Aggressive) {
// fold (fadd (fma x, y, (fpext (fmul u, v))), z)
@@ -17006,6 +17028,30 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
}
}
+ // fold (fsub (freeze (fmul x, y)), z) -> (fma x, y, (fneg z))
+ if ((Options.UnsafeFPMath || N->getFlags().hasAllowContract()) &&
+ N0.getOpcode() == ISD::FREEZE) {
+ SDValue FrozenMul = N0.getOperand(0);
+ if (matcher.match(FrozenMul, ISD::FMUL) && isContractableFMUL(FrozenMul)) {
+ SDValue X = FrozenMul.getOperand(0);
+ SDValue Y = FrozenMul.getOperand(1);
+ SDValue NegZ = matcher.getNode(ISD::FNEG, SL, VT, N1);
+ return matcher.getNode(PreferredFusedOpcode, SL, VT, X, Y, NegZ);
+ }
+ }
+
+ // fold (fsub z, (freeze(fmul x, y))) -> (fma (fneg x), y, z)
+ if ((Options.UnsafeFPMath || N->getFlags().hasAllowContract()) &&
+ N1.getOpcode() == ISD::FREEZE) {
+ SDValue FrozenMul = N1.getOperand(0);
+ if (matcher.match(FrozenMul, ISD::FMUL) && isContractableFMUL(FrozenMul)) {
+ SDValue X = FrozenMul.getOperand(0);
+ SDValue Y = FrozenMul.getOperand(1);
+ SDValue NegX = matcher.getNode(ISD::FNEG, SL, VT, X);
+ return matcher.getNode(PreferredFusedOpcode, SL, VT, NegX, Y, N0);
+ }
+ }
+
auto isReassociable = [&Options](SDNode *N) {
return Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
};
diff --git a/llvm/test/CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll b/llvm/test/CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll
new file mode 100644
index 0000000000000..75fe67e743c03
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll
@@ -0,0 +1,54 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s -check-prefix GFX11
+
+define float @fma_from_freeze_mul_add_left(float %x, float %y) {
+; GFX11-LABEL: fma_from_freeze_mul_add_left:
+; GFX11: ; %bb.0: ; %bb
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_fma_f32 v0, v0, v1, 1.0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+bb:
+ %mul = fmul contract float %x, %y
+ %mul.fr = freeze float %mul
+ %add = fadd contract float %mul.fr, 1.000000e+00
+ ret float %add
+}
+
+define float @fma_from_freeze_mul_add_right(float %x, float %y) {
+; GFX11-LABEL: fma_from_freeze_mul_add_right:
+; GFX11: ; %bb.0: ; %bb
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_fma_f32 v0, v0, v1, 1.0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+bb:
+ %mul = fmul contract float %x, %y
+ %mul.fr = freeze float %mul
+ %add = fadd contract float 1.000000e+00, %mul.fr
+ ret float %add
+}
+
+define float @fma_from_freeze_mul_sub_left(float %x, float %y) {
+; GFX11-LABEL: fma_from_freeze_mul_sub_left:
+; GFX11: ; %bb.0: ; %bb
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_fma_f32 v0, v0, v1, -1.0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+bb:
+ %mul = fmul contract float %x, %y
+ %mul.fr = freeze float %mul
+ %sub = fsub contract float %mul.fr, 1.000000e+00
+ ret float %sub
+}
+
+define float @fma_from_freeze_mul_sub_right(float %x, float %y) {
+; GFX11-LABEL: fma_from_freeze_mul_sub_right:
+; GFX11: ; %bb.0: ; %bb
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_fma_f32 v0, -v0, v1, 1.0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+bb:
+ %mul = fmul contract float %x, %y
+ %mul.fr = freeze float %mul
+ %sub = fsub contract float 1.000000e+00, %mul.fr
+ ret float %sub
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/142250
More information about the llvm-commits
mailing list