[llvm] [DAG][AMDGPU] Add lit tests for FMA combining with freeze and nnan variants (PR #142628)

Tue Jun 3 08:09:25 PDT 2025

https://github.com/harrisonGPU created https://github.com/llvm/llvm-project/pull/142628

After this PR https://github.com/llvm/llvm-project/pull/142345, combining `freeze` on `fmul` (without `nnan`) followed by `fadd` into a single `fma` is supported.
This patch adds lit tests to verify the optimization behavior for both nnan and non-nnan variants.

Closes: https://github.com/llvm/llvm-project/issues/141622

>From 57491e321e5ae8a42eeec276447491beb6a35f9e Mon Sep 17 00:00:00 2001
From: Harrison Hao <tsworld1314 at gmail.com>
Date: Tue, 3 Jun 2025 15:02:36 +0000
Subject: [PATCH] [DAG][AMDGPU] Add lit tests for FMA combining with freeze and
 nnan variants

---
 .../CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll | 106 ++++++++++++++++++
 1 file changed, 106 insertions(+)
 create mode 100644 llvm/test/CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll

diff --git a/llvm/test/CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll b/llvm/test/CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll
new file mode 100644
index 0000000000000..dbf5636ae03ed
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll
@@ -0,0 +1,106 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck %s
+
+define float @fma_from_freeze_mul_add_left(float %x, float %y) {
+; CHECK-LABEL: fma_from_freeze_mul_add_left:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    v_fma_f32 v0, v0, v1, 1.0
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+  %mul = fmul reassoc nsz arcp contract afn float %x, %y
+  %mul.fr = freeze float %mul
+  %add = fadd reassoc nsz arcp contract afn float %mul.fr, 1.000000e+00
+  ret float %add
+}
+
+define float @fma_from_freeze_mul_add_left_with_nnan(float %x, float %y) {
+; CHECK-LABEL: fma_from_freeze_mul_add_left_with_nnan:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    v_mul_f32_e32 v0, v0, v1
+; CHECK-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; CHECK-NEXT:    v_add_f32_e32 v0, 1.0, v0
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+  %mul = fmul reassoc nnan nsz arcp contract afn float %x, %y
+  %mul.fr = freeze float %mul
+  %add = fadd reassoc nnan nsz arcp contract afn float %mul.fr, 1.000000e+00
+  ret float %add
+}
+
+define float @fma_from_freeze_mul_add_right(float %x, float %y) {
+; CHECK-LABEL: fma_from_freeze_mul_add_right:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    v_fma_f32 v0, v0, v1, 1.0
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+  %mul = fmul reassoc nsz arcp contract afn float %x, %y
+  %mul.fr = freeze float %mul
+  %add = fadd reassoc nsz arcp contract afn float 1.000000e+00, %mul.fr
+  ret float %add
+}
+
+define float @fma_from_freeze_mul_add_right_with_nnan(float %x, float %y) {
+; CHECK-LABEL: fma_from_freeze_mul_add_right_with_nnan:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    v_mul_f32_e32 v0, v0, v1
+; CHECK-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; CHECK-NEXT:    v_add_f32_e32 v0, 1.0, v0
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+  %mul = fmul reassoc nnan nsz arcp contract afn float %x, %y
+  %mul.fr = freeze float %mul
+  %add = fadd reassoc nnan nsz arcp contract afn float 1.000000e+00, %mul.fr
+  ret float %add
+}
+
+define float @fma_from_freeze_mul_sub_left(float %x, float %y) {
+; CHECK-LABEL: fma_from_freeze_mul_sub_left:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    v_fma_f32 v0, v0, v1, -1.0
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+  %mul = fmul reassoc nsz arcp contract afn float %x, %y
+  %mul.fr = freeze float %mul
+  %sub = fsub reassoc nsz arcp contract afn float %mul.fr, 1.000000e+00
+  ret float %sub
+}
+
+define float @fma_from_freeze_mul_sub_left_with_nnan(float %x, float %y) {
+; CHECK-LABEL: fma_from_freeze_mul_sub_left_with_nnan:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    v_mul_f32_e32 v0, v0, v1
+; CHECK-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; CHECK-NEXT:    v_add_f32_e32 v0, -1.0, v0
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+  %mul = fmul reassoc nnan nsz arcp contract afn float %x, %y
+  %mul.fr = freeze float %mul
+  %sub = fsub reassoc nnan nsz arcp contract afn float %mul.fr, 1.000000e+00
+  ret float %sub
+}
+
+define float @fma_from_freeze_mul_sub_right(float %x, float %y) {
+; CHECK-LABEL: fma_from_freeze_mul_sub_right:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    v_fma_f32 v0, -v0, v1, 1.0
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+  %mul = fmul reassoc nsz arcp contract afn float %x, %y
+  %mul.fr = freeze float %mul
+  %sub = fsub reassoc nsz arcp contract afn float 1.000000e+00, %mul.fr
+  ret float %sub
+}
+
+define float @fma_from_freeze_mul_sub_right_with_nnan(float %x, float %y) {
+; CHECK-LABEL: fma_from_freeze_mul_sub_right_with_nnan:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    v_mul_f32_e32 v0, v0, v1
+; CHECK-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; CHECK-NEXT:    v_sub_f32_e32 v0, 1.0, v0
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+  %mul = fmul reassoc nnan nsz arcp contract afn float %x, %y
+  %mul.fr = freeze float %mul
+  %sub = fsub reassoc nnan nsz arcp contract afn float 1.000000e+00, %mul.fr
+  ret float %sub
+}