[llvm] [DAGCombiner] Fold freeze(fmul) + fadd/fsub into FMA combine (PR #142250)
Harrison Hao via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 3 21:49:19 PDT 2025
https://github.com/harrisonGPU updated https://github.com/llvm/llvm-project/pull/142250
>From 2e74dd227c89d8c098635c79dbe9dc7d7a8587e0 Mon Sep 17 00:00:00 2001
From: Harrison Hao <tsworld1314 at gmail.com>
Date: Sat, 31 May 2025 03:04:06 +0000
Subject: [PATCH 1/3] [DAGCombiner] Fold freeze(fmul) + fadd/fsub into FMA
combine
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 46 ++++++++++++++++
.../CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll | 54 +++++++++++++++++++
2 files changed, 100 insertions(+)
create mode 100644 llvm/test/CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index aba3c0f80a024..a37521e6b2690 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -16736,6 +16736,28 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
}
}
+ // fold (fadd (freeze (fmul x, y)), z) -> (fma x, y, z).
+ if ((Options.UnsafeFPMath || N->getFlags().hasAllowContract()) &&
+ N0.getOpcode() == ISD::FREEZE) {
+ SDValue FrozenMul = N0.getOperand(0);
+ if (matcher.match(FrozenMul, ISD::FMUL) && isContractableFMUL(FrozenMul)) {
+ SDValue X = FrozenMul.getOperand(0);
+ SDValue Y = FrozenMul.getOperand(1);
+ return matcher.getNode(PreferredFusedOpcode, SL, VT, X, Y, N1);
+ }
+ }
+
+ // fold (fadd x, (freeze (fmul y, z))) -> (fma y, z, x)
+ if ((Options.UnsafeFPMath || N->getFlags().hasAllowContract()) &&
+ N1.getOpcode() == ISD::FREEZE) {
+ SDValue FrozenMul = N1.getOperand(0);
+ if (matcher.match(FrozenMul, ISD::FMUL) && isContractableFMUL(FrozenMul)) {
+ SDValue X = FrozenMul.getOperand(0);
+ SDValue Y = FrozenMul.getOperand(1);
+ return matcher.getNode(PreferredFusedOpcode, SL, VT, X, Y, N0);
+ }
+ }
+
// More folding opportunities when target permits.
if (Aggressive) {
// fold (fadd (fma x, y, (fpext (fmul u, v))), z)
@@ -17013,6 +17035,30 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
}
}
+ // fold (fsub (freeze (fmul x, y)), z) -> (fma x, y, (fneg z))
+ if ((Options.UnsafeFPMath || N->getFlags().hasAllowContract()) &&
+ N0.getOpcode() == ISD::FREEZE) {
+ SDValue FrozenMul = N0.getOperand(0);
+ if (matcher.match(FrozenMul, ISD::FMUL) && isContractableFMUL(FrozenMul)) {
+ SDValue X = FrozenMul.getOperand(0);
+ SDValue Y = FrozenMul.getOperand(1);
+ SDValue NegZ = matcher.getNode(ISD::FNEG, SL, VT, N1);
+ return matcher.getNode(PreferredFusedOpcode, SL, VT, X, Y, NegZ);
+ }
+ }
+
+ // fold (fsub z, (freeze(fmul x, y))) -> (fma (fneg x), y, z)
+ if ((Options.UnsafeFPMath || N->getFlags().hasAllowContract()) &&
+ N1.getOpcode() == ISD::FREEZE) {
+ SDValue FrozenMul = N1.getOperand(0);
+ if (matcher.match(FrozenMul, ISD::FMUL) && isContractableFMUL(FrozenMul)) {
+ SDValue X = FrozenMul.getOperand(0);
+ SDValue Y = FrozenMul.getOperand(1);
+ SDValue NegX = matcher.getNode(ISD::FNEG, SL, VT, X);
+ return matcher.getNode(PreferredFusedOpcode, SL, VT, NegX, Y, N0);
+ }
+ }
+
auto isReassociable = [&Options](SDNode *N) {
return Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
};
diff --git a/llvm/test/CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll b/llvm/test/CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll
new file mode 100644
index 0000000000000..75fe67e743c03
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll
@@ -0,0 +1,54 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s -check-prefix GFX11
+
+define float @fma_from_freeze_mul_add_left(float %x, float %y) {
+; GFX11-LABEL: fma_from_freeze_mul_add_left:
+; GFX11: ; %bb.0: ; %bb
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_fma_f32 v0, v0, v1, 1.0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+bb:
+ %mul = fmul contract float %x, %y
+ %mul.fr = freeze float %mul
+ %add = fadd contract float %mul.fr, 1.000000e+00
+ ret float %add
+}
+
+define float @fma_from_freeze_mul_add_right(float %x, float %y) {
+; GFX11-LABEL: fma_from_freeze_mul_add_right:
+; GFX11: ; %bb.0: ; %bb
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_fma_f32 v0, v0, v1, 1.0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+bb:
+ %mul = fmul contract float %x, %y
+ %mul.fr = freeze float %mul
+ %add = fadd contract float 1.000000e+00, %mul.fr
+ ret float %add
+}
+
+define float @fma_from_freeze_mul_sub_left(float %x, float %y) {
+; GFX11-LABEL: fma_from_freeze_mul_sub_left:
+; GFX11: ; %bb.0: ; %bb
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_fma_f32 v0, v0, v1, -1.0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+bb:
+ %mul = fmul contract float %x, %y
+ %mul.fr = freeze float %mul
+ %sub = fsub contract float %mul.fr, 1.000000e+00
+ ret float %sub
+}
+
+define float @fma_from_freeze_mul_sub_right(float %x, float %y) {
+; GFX11-LABEL: fma_from_freeze_mul_sub_right:
+; GFX11: ; %bb.0: ; %bb
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_fma_f32 v0, -v0, v1, 1.0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+bb:
+ %mul = fmul contract float %x, %y
+ %mul.fr = freeze float %mul
+ %sub = fsub contract float 1.000000e+00, %mul.fr
+ ret float %sub
+}
>From e56ba95096055fcefe4d5b94cef92d18bce49a7c Mon Sep 17 00:00:00 2001
From: Harrison Hao <tsworld1314 at gmail.com>
Date: Wed, 4 Jun 2025 11:24:59 +0800
Subject: [PATCH 2/3] [DAGCombiner] Update lit test.
---
.../CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll | 118 ++++++++++++------
1 file changed, 81 insertions(+), 37 deletions(-)
diff --git a/llvm/test/CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll b/llvm/test/CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll
index 75fe67e743c03..840361473a157 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll
+++ b/llvm/test/CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll
@@ -1,54 +1,98 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s -check-prefix GFX11
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck %s
define float @fma_from_freeze_mul_add_left(float %x, float %y) {
-; GFX11-LABEL: fma_from_freeze_mul_add_left:
-; GFX11: ; %bb.0: ; %bb
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_fma_f32 v0, v0, v1, 1.0
-; GFX11-NEXT: s_setpc_b64 s[30:31]
-bb:
- %mul = fmul contract float %x, %y
- %mul.fr = freeze float %mul
- %add = fadd contract float %mul.fr, 1.000000e+00
+; CHECK-LABEL: fma_from_freeze_mul_add_left:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_fma_f32 v0, v0, v1, 1.0
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %mul = fmul reassoc nsz arcp contract afn float %x, %y
+ %mul.fr = freeze float %mul
+ %add = fadd reassoc nsz arcp contract afn float %mul.fr, 1.000000e+00
+ ret float %add
+}
+
+define float @fma_from_freeze_mul_add_left_with_nnan(float %x, float %y) {
+; CHECK-LABEL: fma_from_freeze_mul_add_left_with_nnan:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_fma_f32 v0, v0, v1, 1.0
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %mul = fmul reassoc nnan nsz arcp contract afn float %x, %y
+ %mul.fr = freeze float %mul
+ %add = fadd reassoc nnan nsz arcp contract afn float %mul.fr, 1.000000e+00
ret float %add
}
define float @fma_from_freeze_mul_add_right(float %x, float %y) {
-; GFX11-LABEL: fma_from_freeze_mul_add_right:
-; GFX11: ; %bb.0: ; %bb
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_fma_f32 v0, v0, v1, 1.0
-; GFX11-NEXT: s_setpc_b64 s[30:31]
-bb:
- %mul = fmul contract float %x, %y
- %mul.fr = freeze float %mul
- %add = fadd contract float 1.000000e+00, %mul.fr
+; CHECK-LABEL: fma_from_freeze_mul_add_right:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_fma_f32 v0, v0, v1, 1.0
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %mul = fmul reassoc nsz arcp contract afn float %x, %y
+ %mul.fr = freeze float %mul
+ %add = fadd reassoc nsz arcp contract afn float 1.000000e+00, %mul.fr
+ ret float %add
+}
+
+define float @fma_from_freeze_mul_add_right_with_nnan(float %x, float %y) {
+; CHECK-LABEL: fma_from_freeze_mul_add_right_with_nnan:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_fma_f32 v0, v0, v1, 1.0
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %mul = fmul reassoc nnan nsz arcp contract afn float %x, %y
+ %mul.fr = freeze float %mul
+ %add = fadd reassoc nnan nsz arcp contract afn float 1.000000e+00, %mul.fr
ret float %add
}
define float @fma_from_freeze_mul_sub_left(float %x, float %y) {
-; GFX11-LABEL: fma_from_freeze_mul_sub_left:
-; GFX11: ; %bb.0: ; %bb
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_fma_f32 v0, v0, v1, -1.0
-; GFX11-NEXT: s_setpc_b64 s[30:31]
-bb:
- %mul = fmul contract float %x, %y
- %mul.fr = freeze float %mul
- %sub = fsub contract float %mul.fr, 1.000000e+00
+; CHECK-LABEL: fma_from_freeze_mul_sub_left:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_fma_f32 v0, v0, v1, -1.0
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %mul = fmul reassoc nsz arcp contract afn float %x, %y
+ %mul.fr = freeze float %mul
+ %sub = fsub reassoc nsz arcp contract afn float %mul.fr, 1.000000e+00
+ ret float %sub
+}
+
+define float @fma_from_freeze_mul_sub_left_with_nnan(float %x, float %y) {
+; CHECK-LABEL: fma_from_freeze_mul_sub_left_with_nnan:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_fma_f32 v0, v0, v1, -1.0
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %mul = fmul reassoc nnan nsz arcp contract afn float %x, %y
+ %mul.fr = freeze float %mul
+ %sub = fsub reassoc nnan nsz arcp contract afn float %mul.fr, 1.000000e+00
ret float %sub
}
define float @fma_from_freeze_mul_sub_right(float %x, float %y) {
-; GFX11-LABEL: fma_from_freeze_mul_sub_right:
-; GFX11: ; %bb.0: ; %bb
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_fma_f32 v0, -v0, v1, 1.0
-; GFX11-NEXT: s_setpc_b64 s[30:31]
-bb:
- %mul = fmul contract float %x, %y
- %mul.fr = freeze float %mul
- %sub = fsub contract float 1.000000e+00, %mul.fr
+; CHECK-LABEL: fma_from_freeze_mul_sub_right:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_fma_f32 v0, -v0, v1, 1.0
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %mul = fmul reassoc nsz arcp contract afn float %x, %y
+ %mul.fr = freeze float %mul
+ %sub = fsub reassoc nsz arcp contract afn float 1.000000e+00, %mul.fr
+ ret float %sub
+}
+
+define float @fma_from_freeze_mul_sub_right_with_nnan(float %x, float %y) {
+; CHECK-LABEL: fma_from_freeze_mul_sub_right_with_nnan:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_fma_f32 v0, -v0, v1, 1.0
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %mul = fmul reassoc nnan nsz arcp contract afn float %x, %y
+ %mul.fr = freeze float %mul
+ %sub = fsub reassoc nnan nsz arcp contract afn float 1.000000e+00, %mul.fr
ret float %sub
}
>From cf30539ea49c6adc3d239f77472cb90202204bdf Mon Sep 17 00:00:00 2001
From: Harrison Hao <tsworld1314 at gmail.com>
Date: Wed, 4 Jun 2025 12:49:00 +0800
Subject: [PATCH 3/3] [DAGCombiner] Update for hasNoSignedZeros.
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 18 ++++++++++--------
1 file changed, 10 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index a37521e6b2690..f07edf5021e0a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -16737,8 +16737,10 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
}
// fold (fadd (freeze (fmul x, y)), z) -> (fma x, y, z).
- if ((Options.UnsafeFPMath || N->getFlags().hasAllowContract()) &&
- N0.getOpcode() == ISD::FREEZE) {
+ bool CanContract =
+ (Options.UnsafeFPMath || N->getFlags().hasAllowContract()) &&
+ (Options.NoSignedZerosFPMath || N->getFlags().hasNoSignedZeros());
+ if (CanContract && N0.getOpcode() == ISD::FREEZE) {
SDValue FrozenMul = N0.getOperand(0);
if (matcher.match(FrozenMul, ISD::FMUL) && isContractableFMUL(FrozenMul)) {
SDValue X = FrozenMul.getOperand(0);
@@ -16748,8 +16750,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
}
// fold (fadd x, (freeze (fmul y, z))) -> (fma y, z, x)
- if ((Options.UnsafeFPMath || N->getFlags().hasAllowContract()) &&
- N1.getOpcode() == ISD::FREEZE) {
+ if (CanContract && N1.getOpcode() == ISD::FREEZE) {
SDValue FrozenMul = N1.getOperand(0);
if (matcher.match(FrozenMul, ISD::FMUL) && isContractableFMUL(FrozenMul)) {
SDValue X = FrozenMul.getOperand(0);
@@ -17036,8 +17037,10 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
}
// fold (fsub (freeze (fmul x, y)), z) -> (fma x, y, (fneg z))
- if ((Options.UnsafeFPMath || N->getFlags().hasAllowContract()) &&
- N0.getOpcode() == ISD::FREEZE) {
+ bool CanContract =
+ (Options.UnsafeFPMath || N->getFlags().hasAllowContract()) &&
+ (Options.NoSignedZerosFPMath || N->getFlags().hasNoSignedZeros());
+ if (CanContract && N0.getOpcode() == ISD::FREEZE) {
SDValue FrozenMul = N0.getOperand(0);
if (matcher.match(FrozenMul, ISD::FMUL) && isContractableFMUL(FrozenMul)) {
SDValue X = FrozenMul.getOperand(0);
@@ -17048,8 +17051,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
}
// fold (fsub z, (freeze(fmul x, y))) -> (fma (fneg x), y, z)
- if ((Options.UnsafeFPMath || N->getFlags().hasAllowContract()) &&
- N1.getOpcode() == ISD::FREEZE) {
+ if (CanContract && N1.getOpcode() == ISD::FREEZE) {
SDValue FrozenMul = N1.getOperand(0);
if (matcher.match(FrozenMul, ISD::FMUL) && isContractableFMUL(FrozenMul)) {
SDValue X = FrozenMul.getOperand(0);
More information about the llvm-commits
mailing list