[llvm] 102dfa8 - [DAGCombiner] Allow freeze to sink through fmul by adding it to AllowMultipleMaybePoisonOperands (#142250)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Jun 8 04:39:01 PDT 2025
Author: Harrison Hao
Date: 2025-06-08T19:38:57+08:00
New Revision: 102dfa8a487a1c44a95a225825039d282be712a0
URL: https://github.com/llvm/llvm-project/commit/102dfa8a487a1c44a95a225825039d282be712a0
DIFF: https://github.com/llvm/llvm-project/commit/102dfa8a487a1c44a95a225825039d282be712a0.diff
LOG: [DAGCombiner] Allow freeze to sink through fmul by adding it to AllowMultipleMaybePoisonOperands (#142250)
Allow freeze to sink through fmul by treating it as a
non-poison-generating op
when operands are not poison.
Adding `ISD::FMUL` to `AllowMultipleMaybePoisonOperands` lets DAG
combine
push freeze through fmul. This helps expose patterns like `fmul+fadd`
for `FMA` fusion.
When rebuilding the node, we drop flags like nnan/ninf/nsz that imply
poison,
but keep contract, reassoc, afn, and arcp.
Closes: https://github.com/llvm/llvm-project/issues/141622
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/test/CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index ef084863cae3d..eff152812b7c1 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -16392,12 +16392,11 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) {
return SDValue();
bool AllowMultipleMaybePoisonOperands =
- N0.getOpcode() == ISD::SELECT_CC ||
- N0.getOpcode() == ISD::SETCC ||
+ N0.getOpcode() == ISD::SELECT_CC || N0.getOpcode() == ISD::SETCC ||
N0.getOpcode() == ISD::BUILD_VECTOR ||
N0.getOpcode() == ISD::BUILD_PAIR ||
N0.getOpcode() == ISD::VECTOR_SHUFFLE ||
- N0.getOpcode() == ISD::CONCAT_VECTORS;
+ N0.getOpcode() == ISD::CONCAT_VECTORS || N0.getOpcode() == ISD::FMUL;
// Avoid turning a BUILD_VECTOR that can be recognized as "all zeros", "all
// ones" or "constant" into something that depends on FrozenUndef. We can
@@ -16495,7 +16494,17 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) {
SVN->getMask());
} else {
// NOTE: this strips poison generating flags.
- R = DAG.getNode(N0.getOpcode(), SDLoc(N0), N0->getVTList(), Ops);
+ // Folding freeze(op(x, ...)) -> op(freeze(x), ...) does not require nnan,
+ // ninf, nsz, or fast.
+ // However, contract, reassoc, afn, and arcp should be preserved,
+ // as these fast-math flags do not introduce poison values.
+ SDNodeFlags SrcFlags = N0->getFlags();
+ SDNodeFlags SafeFlags;
+ SafeFlags.setAllowContract(SrcFlags.hasAllowContract());
+ SafeFlags.setAllowReassociation(SrcFlags.hasAllowReassociation());
+ SafeFlags.setApproximateFuncs(SrcFlags.hasApproximateFuncs());
+ SafeFlags.setAllowReciprocal(SrcFlags.hasAllowReciprocal());
+ R = DAG.getNode(N0.getOpcode(), SDLoc(N0), N0->getVTList(), Ops, SafeFlags);
}
assert(DAG.isGuaranteedNotToBeUndefOrPoison(R, /*PoisonOnly*/ false) &&
"Can't create node that may be undef/poison!");
diff --git a/llvm/test/CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll b/llvm/test/CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll
index a1b2dbda687fb..9a4ab9865369f 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll
+++ b/llvm/test/CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll
@@ -17,9 +17,7 @@ define float @fma_from_freeze_mul_add_left_with_nnan(float %x, float %y) {
; CHECK-LABEL: fma_from_freeze_mul_add_left_with_nnan:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT: v_mul_f32_e32 v0, v0, v1
-; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; CHECK-NEXT: v_add_f32_e32 v0, 1.0, v0
+; CHECK-NEXT: v_fma_f32 v0, v0, v1, 1.0
; CHECK-NEXT: s_setpc_b64 s[30:31]
%mul = fmul nnan contract afn float %x, %y
%mul.fr = freeze float %mul
@@ -43,9 +41,7 @@ define float @fma_from_freeze_mul_add_right_with_nnan(float %x, float %y) {
; CHECK-LABEL: fma_from_freeze_mul_add_right_with_nnan:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT: v_mul_f32_e32 v0, v0, v1
-; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; CHECK-NEXT: v_add_f32_e32 v0, 1.0, v0
+; CHECK-NEXT: v_fma_f32 v0, v0, v1, 1.0
; CHECK-NEXT: s_setpc_b64 s[30:31]
%mul = fmul nnan contract float %x, %y
%mul.fr = freeze float %mul
@@ -69,9 +65,7 @@ define float @fma_from_freeze_mul_sub_left_with_nnan(float %x, float %y) {
; CHECK-LABEL: fma_from_freeze_mul_sub_left_with_nnan:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT: v_mul_f32_e32 v0, v0, v1
-; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; CHECK-NEXT: v_add_f32_e32 v0, -1.0, v0
+; CHECK-NEXT: v_fma_f32 v0, v0, v1, -1.0
; CHECK-NEXT: s_setpc_b64 s[30:31]
%mul = fmul nnan contract float %x, %y
%mul.fr = freeze float %mul
@@ -95,12 +89,42 @@ define float @fma_from_freeze_mul_sub_right_with_nnan(float %x, float %y) {
; CHECK-LABEL: fma_from_freeze_mul_sub_right_with_nnan:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT: v_mul_f32_e32 v0, v0, v1
-; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; CHECK-NEXT: v_sub_f32_e32 v0, 1.0, v0
+; CHECK-NEXT: v_fma_f32 v0, -v0, v1, 1.0
; CHECK-NEXT: s_setpc_b64 s[30:31]
%mul = fmul nnan contract float %x, %y
%mul.fr = freeze float %mul
%sub = fsub nnan contract float 1.000000e+00, %mul.fr
ret float %sub
}
+
+define float @fma_freeze_sink_multiple_maybe_poison_nnan_add(float %x, float %y) {
+; CHECK-LABEL: fma_freeze_sink_multiple_maybe_poison_nnan_add:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_dual_subrev_f32 v0, 1.0, v0 :: v_dual_add_f32 v1, 1.0, v1
+; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; CHECK-NEXT: v_fma_f32 v0, v0, v1, 1.0
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %fsub_x = fsub nnan contract float %x, 1.000000e+00
+ %fadd_y = fadd nnan contract float %y, 1.000000e+00
+ %mul = fmul nnan contract float %fsub_x, %fadd_y
+ %mul.fr = freeze float %mul
+ %add = fadd nnan contract float %mul.fr, 1.000000e+00
+ ret float %add
+}
+
+define float @fma_freeze_sink_multiple_maybe_poison_nnan_sub(float %x, float %y) {
+; CHECK-LABEL: fma_freeze_sink_multiple_maybe_poison_nnan_sub:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_dual_add_f32 v0, 1.0, v0 :: v_dual_add_f32 v1, -1.0, v1
+; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; CHECK-NEXT: v_fma_f32 v0, v0, v1, -1.0
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %fadd_x = fadd nnan contract float %x, 1.000000e+00
+ %fsub_y = fsub nnan contract float %y, 1.000000e+00
+ %mul = fmul nnan contract float %fadd_x, %fsub_y
+ %mul.fr = freeze float %mul
+ %sub = fsub nnan contract float %mul.fr, 1.000000e+00
+ ret float %sub
+}
More information about the llvm-commits
mailing list