[llvm] [DAGCombiner] Allow freeze to sink through fmul by adding it to AllowMultipleMaybePoisonOperands (PR #142250)

Harrison Hao via llvm-commits llvm-commits at lists.llvm.org
Sat Jun 7 05:23:52 PDT 2025


https://github.com/harrisonGPU updated https://github.com/llvm/llvm-project/pull/142250

>From e64adf283b40638dbd4058b53276d47830a72e33 Mon Sep 17 00:00:00 2001
From: Harrison Hao <tsworld1314 at gmail.com>
Date: Sat, 31 May 2025 03:04:06 +0000
Subject: [PATCH 1/6] [DAGCombiner] Fold freeze(fmul) + fadd/fsub into FMA
 combine

---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |  46 ++++++++
 .../CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll | 102 +++++-------------
 2 files changed, 71 insertions(+), 77 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index aba3c0f80a024..a37521e6b2690 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -16736,6 +16736,28 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
     }
   }
 
+  // fold (fadd (freeze (fmul x, y)), z) -> (fma x, y, z).
+  if ((Options.UnsafeFPMath || N->getFlags().hasAllowContract()) &&
+      N0.getOpcode() == ISD::FREEZE) {
+    SDValue FrozenMul = N0.getOperand(0);
+    if (matcher.match(FrozenMul, ISD::FMUL) && isContractableFMUL(FrozenMul)) {
+      SDValue X = FrozenMul.getOperand(0);
+      SDValue Y = FrozenMul.getOperand(1);
+      return matcher.getNode(PreferredFusedOpcode, SL, VT, X, Y, N1);
+    }
+  }
+
+  // fold (fadd x, (freeze (fmul y, z))) -> (fma y, z, x)
+  if ((Options.UnsafeFPMath || N->getFlags().hasAllowContract()) &&
+      N1.getOpcode() == ISD::FREEZE) {
+    SDValue FrozenMul = N1.getOperand(0);
+    if (matcher.match(FrozenMul, ISD::FMUL) && isContractableFMUL(FrozenMul)) {
+      SDValue X = FrozenMul.getOperand(0);
+      SDValue Y = FrozenMul.getOperand(1);
+      return matcher.getNode(PreferredFusedOpcode, SL, VT, X, Y, N0);
+    }
+  }
+
   // More folding opportunities when target permits.
   if (Aggressive) {
     // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
@@ -17013,6 +17035,30 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
     }
   }
 
+  // fold (fsub (freeze (fmul x, y)), z) -> (fma x, y, (fneg z))
+  if ((Options.UnsafeFPMath || N->getFlags().hasAllowContract()) &&
+      N0.getOpcode() == ISD::FREEZE) {
+    SDValue FrozenMul = N0.getOperand(0);
+    if (matcher.match(FrozenMul, ISD::FMUL) && isContractableFMUL(FrozenMul)) {
+      SDValue X = FrozenMul.getOperand(0);
+      SDValue Y = FrozenMul.getOperand(1);
+      SDValue NegZ = matcher.getNode(ISD::FNEG, SL, VT, N1);
+      return matcher.getNode(PreferredFusedOpcode, SL, VT, X, Y, NegZ);
+    }
+  }
+
+  // fold (fsub z, (freeze(fmul x, y))) -> (fma (fneg x), y, z)
+  if ((Options.UnsafeFPMath || N->getFlags().hasAllowContract()) &&
+      N1.getOpcode() == ISD::FREEZE) {
+    SDValue FrozenMul = N1.getOperand(0);
+    if (matcher.match(FrozenMul, ISD::FMUL) && isContractableFMUL(FrozenMul)) {
+      SDValue X = FrozenMul.getOperand(0);
+      SDValue Y = FrozenMul.getOperand(1);
+      SDValue NegX = matcher.getNode(ISD::FNEG, SL, VT, X);
+      return matcher.getNode(PreferredFusedOpcode, SL, VT, NegX, Y, N0);
+    }
+  }
+
   auto isReassociable = [&Options](SDNode *N) {
     return Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
   };
diff --git a/llvm/test/CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll b/llvm/test/CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll
index a1b2dbda687fb..75fe67e743c03 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll
+++ b/llvm/test/CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll
@@ -1,106 +1,54 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s -check-prefix GFX11
 
 define float @fma_from_freeze_mul_add_left(float %x, float %y) {
-; CHECK-LABEL: fma_from_freeze_mul_add_left:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    v_fma_f32 v0, v0, v1, 1.0
-; CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-LABEL: fma_from_freeze_mul_add_left:
+; GFX11:       ; %bb.0: ; %bb
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    v_fma_f32 v0, v0, v1, 1.0
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+bb:
   %mul = fmul contract float %x, %y
   %mul.fr = freeze float %mul
   %add = fadd contract float %mul.fr, 1.000000e+00
   ret float %add
 }
 
-define float @fma_from_freeze_mul_add_left_with_nnan(float %x, float %y) {
-; CHECK-LABEL: fma_from_freeze_mul_add_left_with_nnan:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    v_mul_f32_e32 v0, v0, v1
-; CHECK-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; CHECK-NEXT:    v_add_f32_e32 v0, 1.0, v0
-; CHECK-NEXT:    s_setpc_b64 s[30:31]
-  %mul = fmul nnan contract afn float %x, %y
-  %mul.fr = freeze float %mul
-  %add = fadd nnan contract float %mul.fr, 1.000000e+00
-  ret float %add
-}
-
 define float @fma_from_freeze_mul_add_right(float %x, float %y) {
-; CHECK-LABEL: fma_from_freeze_mul_add_right:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    v_fma_f32 v0, v0, v1, 1.0
-; CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-LABEL: fma_from_freeze_mul_add_right:
+; GFX11:       ; %bb.0: ; %bb
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    v_fma_f32 v0, v0, v1, 1.0
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+bb:
   %mul = fmul contract float %x, %y
   %mul.fr = freeze float %mul
   %add = fadd contract float 1.000000e+00, %mul.fr
   ret float %add
 }
 
-define float @fma_from_freeze_mul_add_right_with_nnan(float %x, float %y) {
-; CHECK-LABEL: fma_from_freeze_mul_add_right_with_nnan:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    v_mul_f32_e32 v0, v0, v1
-; CHECK-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; CHECK-NEXT:    v_add_f32_e32 v0, 1.0, v0
-; CHECK-NEXT:    s_setpc_b64 s[30:31]
-  %mul = fmul nnan contract float %x, %y
-  %mul.fr = freeze float %mul
-  %add = fadd nnan contract float 1.000000e+00, %mul.fr
-  ret float %add
-}
-
 define float @fma_from_freeze_mul_sub_left(float %x, float %y) {
-; CHECK-LABEL: fma_from_freeze_mul_sub_left:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    v_fma_f32 v0, v0, v1, -1.0
-; CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-LABEL: fma_from_freeze_mul_sub_left:
+; GFX11:       ; %bb.0: ; %bb
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    v_fma_f32 v0, v0, v1, -1.0
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+bb:
   %mul = fmul contract float %x, %y
   %mul.fr = freeze float %mul
   %sub = fsub contract float %mul.fr, 1.000000e+00
   ret float %sub
 }
 
-define float @fma_from_freeze_mul_sub_left_with_nnan(float %x, float %y) {
-; CHECK-LABEL: fma_from_freeze_mul_sub_left_with_nnan:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    v_mul_f32_e32 v0, v0, v1
-; CHECK-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; CHECK-NEXT:    v_add_f32_e32 v0, -1.0, v0
-; CHECK-NEXT:    s_setpc_b64 s[30:31]
-  %mul = fmul nnan contract float %x, %y
-  %mul.fr = freeze float %mul
-  %sub = fsub nnan contract float %mul.fr, 1.000000e+00
-  ret float %sub
-}
-
 define float @fma_from_freeze_mul_sub_right(float %x, float %y) {
-; CHECK-LABEL: fma_from_freeze_mul_sub_right:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    v_fma_f32 v0, -v0, v1, 1.0
-; CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-LABEL: fma_from_freeze_mul_sub_right:
+; GFX11:       ; %bb.0: ; %bb
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    v_fma_f32 v0, -v0, v1, 1.0
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+bb:
   %mul = fmul contract float %x, %y
   %mul.fr = freeze float %mul
   %sub = fsub contract float 1.000000e+00, %mul.fr
   ret float %sub
 }
-
-define float @fma_from_freeze_mul_sub_right_with_nnan(float %x, float %y) {
-; CHECK-LABEL: fma_from_freeze_mul_sub_right_with_nnan:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    v_mul_f32_e32 v0, v0, v1
-; CHECK-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; CHECK-NEXT:    v_sub_f32_e32 v0, 1.0, v0
-; CHECK-NEXT:    s_setpc_b64 s[30:31]
-  %mul = fmul nnan contract float %x, %y
-  %mul.fr = freeze float %mul
-  %sub = fsub nnan contract float 1.000000e+00, %mul.fr
-  ret float %sub
-}

>From 3811073658351b3107615febf8eaa2007054acc9 Mon Sep 17 00:00:00 2001
From: Harrison Hao <tsworld1314 at gmail.com>
Date: Wed, 4 Jun 2025 11:24:59 +0800
Subject: [PATCH 2/6] [DAGCombiner] Update lit test.

---
 .../CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll | 118 ++++++++++++------
 1 file changed, 81 insertions(+), 37 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll b/llvm/test/CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll
index 75fe67e743c03..840361473a157 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll
+++ b/llvm/test/CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll
@@ -1,54 +1,98 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s -check-prefix GFX11
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck %s
 
 define float @fma_from_freeze_mul_add_left(float %x, float %y) {
-; GFX11-LABEL: fma_from_freeze_mul_add_left:
-; GFX11:       ; %bb.0: ; %bb
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_fma_f32 v0, v0, v1, 1.0
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
-bb:
-  %mul = fmul contract float %x, %y
-  %mul.fr = freeze float %mul
-  %add = fadd contract float %mul.fr, 1.000000e+00
+; CHECK-LABEL: fma_from_freeze_mul_add_left:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    v_fma_f32 v0, v0, v1, 1.0
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+  %mul = fmul reassoc nsz arcp contract afn float %x, %y
+  %mul.fr = freeze float %mul
+  %add = fadd reassoc nsz arcp contract afn float %mul.fr, 1.000000e+00
+  ret float %add
+}
+
+define float @fma_from_freeze_mul_add_left_with_nnan(float %x, float %y) {
+; CHECK-LABEL: fma_from_freeze_mul_add_left_with_nnan:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    v_fma_f32 v0, v0, v1, 1.0
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+  %mul = fmul reassoc nnan nsz arcp contract afn float %x, %y
+  %mul.fr = freeze float %mul
+  %add = fadd reassoc nnan nsz arcp contract afn float %mul.fr, 1.000000e+00
   ret float %add
 }
 
 define float @fma_from_freeze_mul_add_right(float %x, float %y) {
-; GFX11-LABEL: fma_from_freeze_mul_add_right:
-; GFX11:       ; %bb.0: ; %bb
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_fma_f32 v0, v0, v1, 1.0
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
-bb:
-  %mul = fmul contract float %x, %y
-  %mul.fr = freeze float %mul
-  %add = fadd contract float 1.000000e+00, %mul.fr
+; CHECK-LABEL: fma_from_freeze_mul_add_right:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    v_fma_f32 v0, v0, v1, 1.0
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+  %mul = fmul reassoc nsz arcp contract afn float %x, %y
+  %mul.fr = freeze float %mul
+  %add = fadd reassoc nsz arcp contract afn float 1.000000e+00, %mul.fr
+  ret float %add
+}
+
+define float @fma_from_freeze_mul_add_right_with_nnan(float %x, float %y) {
+; CHECK-LABEL: fma_from_freeze_mul_add_right_with_nnan:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    v_fma_f32 v0, v0, v1, 1.0
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+  %mul = fmul reassoc nnan nsz arcp contract afn float %x, %y
+  %mul.fr = freeze float %mul
+  %add = fadd reassoc nnan nsz arcp contract afn float 1.000000e+00, %mul.fr
   ret float %add
 }
 
 define float @fma_from_freeze_mul_sub_left(float %x, float %y) {
-; GFX11-LABEL: fma_from_freeze_mul_sub_left:
-; GFX11:       ; %bb.0: ; %bb
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_fma_f32 v0, v0, v1, -1.0
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
-bb:
-  %mul = fmul contract float %x, %y
-  %mul.fr = freeze float %mul
-  %sub = fsub contract float %mul.fr, 1.000000e+00
+; CHECK-LABEL: fma_from_freeze_mul_sub_left:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    v_fma_f32 v0, v0, v1, -1.0
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+  %mul = fmul reassoc nsz arcp contract afn float %x, %y
+  %mul.fr = freeze float %mul
+  %sub = fsub reassoc nsz arcp contract afn float %mul.fr, 1.000000e+00
+  ret float %sub
+}
+
+define float @fma_from_freeze_mul_sub_left_with_nnan(float %x, float %y) {
+; CHECK-LABEL: fma_from_freeze_mul_sub_left_with_nnan:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    v_fma_f32 v0, v0, v1, -1.0
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+  %mul = fmul reassoc nnan nsz arcp contract afn float %x, %y
+  %mul.fr = freeze float %mul
+  %sub = fsub reassoc nnan nsz arcp contract afn float %mul.fr, 1.000000e+00
   ret float %sub
 }
 
 define float @fma_from_freeze_mul_sub_right(float %x, float %y) {
-; GFX11-LABEL: fma_from_freeze_mul_sub_right:
-; GFX11:       ; %bb.0: ; %bb
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_fma_f32 v0, -v0, v1, 1.0
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
-bb:
-  %mul = fmul contract float %x, %y
-  %mul.fr = freeze float %mul
-  %sub = fsub contract float 1.000000e+00, %mul.fr
+; CHECK-LABEL: fma_from_freeze_mul_sub_right:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    v_fma_f32 v0, -v0, v1, 1.0
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+  %mul = fmul reassoc nsz arcp contract afn float %x, %y
+  %mul.fr = freeze float %mul
+  %sub = fsub reassoc nsz arcp contract afn float 1.000000e+00, %mul.fr
+  ret float %sub
+}
+
+define float @fma_from_freeze_mul_sub_right_with_nnan(float %x, float %y) {
+; CHECK-LABEL: fma_from_freeze_mul_sub_right_with_nnan:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    v_fma_f32 v0, -v0, v1, 1.0
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+  %mul = fmul reassoc nnan nsz arcp contract afn float %x, %y
+  %mul.fr = freeze float %mul
+  %sub = fsub reassoc nnan nsz arcp contract afn float 1.000000e+00, %mul.fr
   ret float %sub
 }

>From 591717e6d4b23176b4278002a0572b13758c6dbe Mon Sep 17 00:00:00 2001
From: Harrison Hao <tsworld1314 at gmail.com>
Date: Wed, 4 Jun 2025 12:49:00 +0800
Subject: [PATCH 3/6] [DAGCombiner] Update for hasNoSignedZeros.

---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index a37521e6b2690..f07edf5021e0a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -16737,8 +16737,10 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
   }
 
   // fold (fadd (freeze (fmul x, y)), z) -> (fma x, y, z).
-  if ((Options.UnsafeFPMath || N->getFlags().hasAllowContract()) &&
-      N0.getOpcode() == ISD::FREEZE) {
+  bool CanContract =
+      (Options.UnsafeFPMath || N->getFlags().hasAllowContract()) &&
+      (Options.NoSignedZerosFPMath || N->getFlags().hasNoSignedZeros());
+  if (CanContract && N0.getOpcode() == ISD::FREEZE) {
     SDValue FrozenMul = N0.getOperand(0);
     if (matcher.match(FrozenMul, ISD::FMUL) && isContractableFMUL(FrozenMul)) {
       SDValue X = FrozenMul.getOperand(0);
@@ -16748,8 +16750,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
   }
 
   // fold (fadd x, (freeze (fmul y, z))) -> (fma y, z, x)
-  if ((Options.UnsafeFPMath || N->getFlags().hasAllowContract()) &&
-      N1.getOpcode() == ISD::FREEZE) {
+  if (CanContract && N1.getOpcode() == ISD::FREEZE) {
     SDValue FrozenMul = N1.getOperand(0);
     if (matcher.match(FrozenMul, ISD::FMUL) && isContractableFMUL(FrozenMul)) {
       SDValue X = FrozenMul.getOperand(0);
@@ -17036,8 +17037,10 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
   }
 
   // fold (fsub (freeze (fmul x, y)), z) -> (fma x, y, (fneg z))
-  if ((Options.UnsafeFPMath || N->getFlags().hasAllowContract()) &&
-      N0.getOpcode() == ISD::FREEZE) {
+  bool CanContract =
+      (Options.UnsafeFPMath || N->getFlags().hasAllowContract()) &&
+      (Options.NoSignedZerosFPMath || N->getFlags().hasNoSignedZeros());
+  if (CanContract && N0.getOpcode() == ISD::FREEZE) {
     SDValue FrozenMul = N0.getOperand(0);
     if (matcher.match(FrozenMul, ISD::FMUL) && isContractableFMUL(FrozenMul)) {
       SDValue X = FrozenMul.getOperand(0);
@@ -17048,8 +17051,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
   }
 
   // fold (fsub z, (freeze(fmul x, y))) -> (fma (fneg x), y, z)
-  if ((Options.UnsafeFPMath || N->getFlags().hasAllowContract()) &&
-      N1.getOpcode() == ISD::FREEZE) {
+  if (CanContract && N1.getOpcode() == ISD::FREEZE) {
     SDValue FrozenMul = N1.getOperand(0);
     if (matcher.match(FrozenMul, ISD::FMUL) && isContractableFMUL(FrozenMul)) {
       SDValue X = FrozenMul.getOperand(0);

>From dd0ab154e5cc5d4c510c56bc23e86fc7405503eb Mon Sep 17 00:00:00 2001
From: Harrison Hao <tsworld1314 at gmail.com>
Date: Sat, 7 Jun 2025 10:36:00 +0000
Subject: [PATCH 4/6] [DAGCombiner] Allow freeze to sink through fmul by adding
 it to AllowMultipleMaybePoisonOperands

---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 65 ++++---------------
 1 file changed, 13 insertions(+), 52 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index f07edf5021e0a..fa8d1bb2d78a2 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -16392,12 +16392,11 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) {
     return SDValue();
 
   bool AllowMultipleMaybePoisonOperands =
-      N0.getOpcode() == ISD::SELECT_CC ||
-      N0.getOpcode() == ISD::SETCC ||
+      N0.getOpcode() == ISD::SELECT_CC || N0.getOpcode() == ISD::SETCC ||
       N0.getOpcode() == ISD::BUILD_VECTOR ||
       N0.getOpcode() == ISD::BUILD_PAIR ||
       N0.getOpcode() == ISD::VECTOR_SHUFFLE ||
-      N0.getOpcode() == ISD::CONCAT_VECTORS;
+      N0.getOpcode() == ISD::CONCAT_VECTORS || N0.getOpcode() == ISD::FMUL;
 
   // Avoid turning a BUILD_VECTOR that can be recognized as "all zeros", "all
   // ones" or "constant" into something that depends on FrozenUndef. We can
@@ -16495,7 +16494,17 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) {
                              SVN->getMask());
   } else {
     // NOTE: this strips poison generating flags.
-    R = DAG.getNode(N0.getOpcode(), SDLoc(N0), N0->getVTList(), Ops);
+    // Folding freeze(op(x, ...)) -> op(freeze(x), ...) does not require nnan,
+    // ninf, nsz, or fast.
+    // However, contract, reassoc, afn, and arcp should be preserved,
+    // as these fast-math flags do not introduce poison values.
+    SDNodeFlags SrcFlags = N0->getFlags();
+    SDNodeFlags SafeFlags;
+    SafeFlags.setAllowContract(SrcFlags.hasAllowContract());
+    SafeFlags.setAllowReassociation(SrcFlags.hasAllowReassociation());
+    SafeFlags.setApproximateFuncs(SrcFlags.hasApproximateFuncs());
+    SafeFlags.setAllowReciprocal(SrcFlags.hasAllowReciprocal());
+    R = DAG.getNode(N0.getOpcode(), SDLoc(N0), N0->getVTList(), Ops, SafeFlags);
   }
   assert(DAG.isGuaranteedNotToBeUndefOrPoison(R, /*PoisonOnly*/ false) &&
          "Can't create node that may be undef/poison!");
@@ -16736,29 +16745,6 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
     }
   }
 
-  // fold (fadd (freeze (fmul x, y)), z) -> (fma x, y, z).
-  bool CanContract =
-      (Options.UnsafeFPMath || N->getFlags().hasAllowContract()) &&
-      (Options.NoSignedZerosFPMath || N->getFlags().hasNoSignedZeros());
-  if (CanContract && N0.getOpcode() == ISD::FREEZE) {
-    SDValue FrozenMul = N0.getOperand(0);
-    if (matcher.match(FrozenMul, ISD::FMUL) && isContractableFMUL(FrozenMul)) {
-      SDValue X = FrozenMul.getOperand(0);
-      SDValue Y = FrozenMul.getOperand(1);
-      return matcher.getNode(PreferredFusedOpcode, SL, VT, X, Y, N1);
-    }
-  }
-
-  // fold (fadd x, (freeze (fmul y, z))) -> (fma y, z, x)
-  if (CanContract && N1.getOpcode() == ISD::FREEZE) {
-    SDValue FrozenMul = N1.getOperand(0);
-    if (matcher.match(FrozenMul, ISD::FMUL) && isContractableFMUL(FrozenMul)) {
-      SDValue X = FrozenMul.getOperand(0);
-      SDValue Y = FrozenMul.getOperand(1);
-      return matcher.getNode(PreferredFusedOpcode, SL, VT, X, Y, N0);
-    }
-  }
-
   // More folding opportunities when target permits.
   if (Aggressive) {
     // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
@@ -17036,31 +17022,6 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
     }
   }
 
-  // fold (fsub (freeze (fmul x, y)), z) -> (fma x, y, (fneg z))
-  bool CanContract =
-      (Options.UnsafeFPMath || N->getFlags().hasAllowContract()) &&
-      (Options.NoSignedZerosFPMath || N->getFlags().hasNoSignedZeros());
-  if (CanContract && N0.getOpcode() == ISD::FREEZE) {
-    SDValue FrozenMul = N0.getOperand(0);
-    if (matcher.match(FrozenMul, ISD::FMUL) && isContractableFMUL(FrozenMul)) {
-      SDValue X = FrozenMul.getOperand(0);
-      SDValue Y = FrozenMul.getOperand(1);
-      SDValue NegZ = matcher.getNode(ISD::FNEG, SL, VT, N1);
-      return matcher.getNode(PreferredFusedOpcode, SL, VT, X, Y, NegZ);
-    }
-  }
-
-  // fold (fsub z, (freeze(fmul x, y))) -> (fma (fneg x), y, z)
-  if (CanContract && N1.getOpcode() == ISD::FREEZE) {
-    SDValue FrozenMul = N1.getOperand(0);
-    if (matcher.match(FrozenMul, ISD::FMUL) && isContractableFMUL(FrozenMul)) {
-      SDValue X = FrozenMul.getOperand(0);
-      SDValue Y = FrozenMul.getOperand(1);
-      SDValue NegX = matcher.getNode(ISD::FNEG, SL, VT, X);
-      return matcher.getNode(PreferredFusedOpcode, SL, VT, NegX, Y, N0);
-    }
-  }
-
   auto isReassociable = [&Options](SDNode *N) {
     return Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
   };

>From e731c530e17f07777cc77c9fd092f6e5d8cf49d1 Mon Sep 17 00:00:00 2001
From: Harrison Hao <tsworld1314 at gmail.com>
Date: Sat, 7 Jun 2025 10:38:10 +0000
Subject: [PATCH 5/6] [DAGCombiner] Update test.

---
 .../CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll | 32 +++++++++----------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll b/llvm/test/CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll
index 840361473a157..a27b2920c73eb 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll
+++ b/llvm/test/CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll
@@ -7,9 +7,9 @@ define float @fma_from_freeze_mul_add_left(float %x, float %y) {
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; CHECK-NEXT:    v_fma_f32 v0, v0, v1, 1.0
 ; CHECK-NEXT:    s_setpc_b64 s[30:31]
-  %mul = fmul reassoc nsz arcp contract afn float %x, %y
+  %mul = fmul contract float %x, %y
   %mul.fr = freeze float %mul
-  %add = fadd reassoc nsz arcp contract afn float %mul.fr, 1.000000e+00
+  %add = fadd contract float %mul.fr, 1.000000e+00
   ret float %add
 }
 
@@ -19,9 +19,9 @@ define float @fma_from_freeze_mul_add_left_with_nnan(float %x, float %y) {
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; CHECK-NEXT:    v_fma_f32 v0, v0, v1, 1.0
 ; CHECK-NEXT:    s_setpc_b64 s[30:31]
-  %mul = fmul reassoc nnan nsz arcp contract afn float %x, %y
+  %mul = fmul nnan contract afn float %x, %y
   %mul.fr = freeze float %mul
-  %add = fadd reassoc nnan nsz arcp contract afn float %mul.fr, 1.000000e+00
+  %add = fadd nnan contract float %mul.fr, 1.000000e+00
   ret float %add
 }
 
@@ -31,9 +31,9 @@ define float @fma_from_freeze_mul_add_right(float %x, float %y) {
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; CHECK-NEXT:    v_fma_f32 v0, v0, v1, 1.0
 ; CHECK-NEXT:    s_setpc_b64 s[30:31]
-  %mul = fmul reassoc nsz arcp contract afn float %x, %y
+  %mul = fmul contract float %x, %y
   %mul.fr = freeze float %mul
-  %add = fadd reassoc nsz arcp contract afn float 1.000000e+00, %mul.fr
+  %add = fadd contract float 1.000000e+00, %mul.fr
   ret float %add
 }
 
@@ -43,9 +43,9 @@ define float @fma_from_freeze_mul_add_right_with_nnan(float %x, float %y) {
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; CHECK-NEXT:    v_fma_f32 v0, v0, v1, 1.0
 ; CHECK-NEXT:    s_setpc_b64 s[30:31]
-  %mul = fmul reassoc nnan nsz arcp contract afn float %x, %y
+  %mul = fmul nnan contract float %x, %y
   %mul.fr = freeze float %mul
-  %add = fadd reassoc nnan nsz arcp contract afn float 1.000000e+00, %mul.fr
+  %add = fadd nnan contract float 1.000000e+00, %mul.fr
   ret float %add
 }
 
@@ -55,9 +55,9 @@ define float @fma_from_freeze_mul_sub_left(float %x, float %y) {
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; CHECK-NEXT:    v_fma_f32 v0, v0, v1, -1.0
 ; CHECK-NEXT:    s_setpc_b64 s[30:31]
-  %mul = fmul reassoc nsz arcp contract afn float %x, %y
+  %mul = fmul contract float %x, %y
   %mul.fr = freeze float %mul
-  %sub = fsub reassoc nsz arcp contract afn float %mul.fr, 1.000000e+00
+  %sub = fsub contract float %mul.fr, 1.000000e+00
   ret float %sub
 }
 
@@ -67,9 +67,9 @@ define float @fma_from_freeze_mul_sub_left_with_nnan(float %x, float %y) {
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; CHECK-NEXT:    v_fma_f32 v0, v0, v1, -1.0
 ; CHECK-NEXT:    s_setpc_b64 s[30:31]
-  %mul = fmul reassoc nnan nsz arcp contract afn float %x, %y
+  %mul = fmul nnan contract float %x, %y
   %mul.fr = freeze float %mul
-  %sub = fsub reassoc nnan nsz arcp contract afn float %mul.fr, 1.000000e+00
+  %sub = fsub nnan contract float %mul.fr, 1.000000e+00
   ret float %sub
 }
 
@@ -79,9 +79,9 @@ define float @fma_from_freeze_mul_sub_right(float %x, float %y) {
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; CHECK-NEXT:    v_fma_f32 v0, -v0, v1, 1.0
 ; CHECK-NEXT:    s_setpc_b64 s[30:31]
-  %mul = fmul reassoc nsz arcp contract afn float %x, %y
+  %mul = fmul contract float %x, %y
   %mul.fr = freeze float %mul
-  %sub = fsub reassoc nsz arcp contract afn float 1.000000e+00, %mul.fr
+  %sub = fsub contract float 1.000000e+00, %mul.fr
   ret float %sub
 }
 
@@ -91,8 +91,8 @@ define float @fma_from_freeze_mul_sub_right_with_nnan(float %x, float %y) {
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; CHECK-NEXT:    v_fma_f32 v0, -v0, v1, 1.0
 ; CHECK-NEXT:    s_setpc_b64 s[30:31]
-  %mul = fmul reassoc nnan nsz arcp contract afn float %x, %y
+  %mul = fmul nnan contract float %x, %y
   %mul.fr = freeze float %mul
-  %sub = fsub reassoc nnan nsz arcp contract afn float 1.000000e+00, %mul.fr
+  %sub = fsub nnan contract float 1.000000e+00, %mul.fr
   ret float %sub
 }

>From 728c44bfe088ca7feb8b8f8e34043ff2806ffbce Mon Sep 17 00:00:00 2001
From: Harrison Hao <tsworld1314 at gmail.com>
Date: Sat, 7 Jun 2025 12:21:45 +0000
Subject: [PATCH 6/6] [DAGCombiner] Add a new test.

---
 .../CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll | 32 +++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/llvm/test/CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll b/llvm/test/CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll
index a27b2920c73eb..d762f75ccad26 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll
+++ b/llvm/test/CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll
@@ -96,3 +96,35 @@ define float @fma_from_freeze_mul_sub_right_with_nnan(float %x, float %y) {
   %sub = fsub nnan contract float 1.000000e+00, %mul.fr
   ret float %sub
 }
+
+define float @fma_freeze_sink_multiple_maybe_poison_nnan_add(float %x, float %y) {
+; CHECK-LABEL: fma_freeze_sink_multiple_maybe_poison_nnan_add:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    v_dual_subrev_f32 v0, 1.0, v0 :: v_dual_add_f32 v1, 1.0, v1
+; CHECK-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; CHECK-NEXT:    v_fma_f32 v0, v0, v1, 1.0
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+  %fadd_x = fsub reassoc nnan nsz arcp contract float %x, 1.000000e+00
+  %fadd_y = fadd reassoc nnan nsz arcp contract float %y, 1.000000e+00
+  %mul = fmul reassoc nnan nsz arcp contract afn float %fadd_x, %fadd_y
+  %mul.fr = freeze float %mul
+  %sub = fadd reassoc nsz arcp contract afn contract float %mul.fr, 1.000000e+00
+  ret float %sub
+}
+
+define float @fma_freeze_sink_multiple_maybe_poison_nnan_sub(float %x, float %y) {
+; CHECK-LABEL: fma_freeze_sink_multiple_maybe_poison_nnan_sub:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    v_dual_add_f32 v0, 1.0, v0 :: v_dual_add_f32 v1, -1.0, v1
+; CHECK-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; CHECK-NEXT:    v_fma_f32 v0, v0, v1, -1.0
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+  %fadd_x = fadd reassoc nnan nsz arcp contract float %x, 1.000000e+00
+  %fadd_y = fsub reassoc nnan nsz arcp contract float %y, 1.000000e+00
+  %mul = fmul reassoc nnan nsz arcp contract afn float %fadd_x, %fadd_y
+  %mul.fr = freeze float %mul
+  %sub = fsub reassoc nsz arcp contract afn contract float %mul.fr, 1.000000e+00
+  ret float %sub
+}



More information about the llvm-commits mailing list