[llvm] [DAG][AMDGPU] Add lit tests for FMA combining with freeze and nnan variants (PR #142628)
Harrison Hao via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 3 08:09:25 PDT 2025
https://github.com/harrisonGPU created https://github.com/llvm/llvm-project/pull/142628
After this PR https://github.com/llvm/llvm-project/pull/142345, combining `freeze` on `fmul` (without `nnan`) followed by `fadd` into a single `fma` is supported.
This patch adds lit tests to verify the optimization behavior for both nnan and non-nnan variants.
Closes: https://github.com/llvm/llvm-project/issues/141622
>From 57491e321e5ae8a42eeec276447491beb6a35f9e Mon Sep 17 00:00:00 2001
From: Harrison Hao <tsworld1314 at gmail.com>
Date: Tue, 3 Jun 2025 15:02:36 +0000
Subject: [PATCH] [DAG][AMDGPU] Add lit tests for FMA combining with freeze and
nnan variants
---
.../CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll | 106 ++++++++++++++++++
1 file changed, 106 insertions(+)
create mode 100644 llvm/test/CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll
diff --git a/llvm/test/CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll b/llvm/test/CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll
new file mode 100644
index 0000000000000..dbf5636ae03ed
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll
@@ -0,0 +1,106 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck %s
+
+define float @fma_from_freeze_mul_add_left(float %x, float %y) {
+; CHECK-LABEL: fma_from_freeze_mul_add_left:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_fma_f32 v0, v0, v1, 1.0
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %mul = fmul reassoc nsz arcp contract afn float %x, %y
+ %mul.fr = freeze float %mul
+ %add = fadd reassoc nsz arcp contract afn float %mul.fr, 1.000000e+00
+ ret float %add
+}
+
+define float @fma_from_freeze_mul_add_left_with_nnan(float %x, float %y) {
+; CHECK-LABEL: fma_from_freeze_mul_add_left_with_nnan:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_mul_f32_e32 v0, v0, v1
+; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; CHECK-NEXT: v_add_f32_e32 v0, 1.0, v0
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %mul = fmul reassoc nnan nsz arcp contract afn float %x, %y
+ %mul.fr = freeze float %mul
+ %add = fadd reassoc nnan nsz arcp contract afn float %mul.fr, 1.000000e+00
+ ret float %add
+}
+
+define float @fma_from_freeze_mul_add_right(float %x, float %y) {
+; CHECK-LABEL: fma_from_freeze_mul_add_right:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_fma_f32 v0, v0, v1, 1.0
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %mul = fmul reassoc nsz arcp contract afn float %x, %y
+ %mul.fr = freeze float %mul
+ %add = fadd reassoc nsz arcp contract afn float 1.000000e+00, %mul.fr
+ ret float %add
+}
+
+define float @fma_from_freeze_mul_add_right_with_nnan(float %x, float %y) {
+; CHECK-LABEL: fma_from_freeze_mul_add_right_with_nnan:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_mul_f32_e32 v0, v0, v1
+; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; CHECK-NEXT: v_add_f32_e32 v0, 1.0, v0
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %mul = fmul reassoc nnan nsz arcp contract afn float %x, %y
+ %mul.fr = freeze float %mul
+ %add = fadd reassoc nnan nsz arcp contract afn float 1.000000e+00, %mul.fr
+ ret float %add
+}
+
+define float @fma_from_freeze_mul_sub_left(float %x, float %y) {
+; CHECK-LABEL: fma_from_freeze_mul_sub_left:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_fma_f32 v0, v0, v1, -1.0
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %mul = fmul reassoc nsz arcp contract afn float %x, %y
+ %mul.fr = freeze float %mul
+ %sub = fsub reassoc nsz arcp contract afn float %mul.fr, 1.000000e+00
+ ret float %sub
+}
+
+define float @fma_from_freeze_mul_sub_left_with_nnan(float %x, float %y) {
+; CHECK-LABEL: fma_from_freeze_mul_sub_left_with_nnan:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_mul_f32_e32 v0, v0, v1
+; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; CHECK-NEXT: v_add_f32_e32 v0, -1.0, v0
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %mul = fmul reassoc nnan nsz arcp contract afn float %x, %y
+ %mul.fr = freeze float %mul
+ %sub = fsub reassoc nnan nsz arcp contract afn float %mul.fr, 1.000000e+00
+ ret float %sub
+}
+
+define float @fma_from_freeze_mul_sub_right(float %x, float %y) {
+; CHECK-LABEL: fma_from_freeze_mul_sub_right:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_fma_f32 v0, -v0, v1, 1.0
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %mul = fmul reassoc nsz arcp contract afn float %x, %y
+ %mul.fr = freeze float %mul
+ %sub = fsub reassoc nsz arcp contract afn float 1.000000e+00, %mul.fr
+ ret float %sub
+}
+
+define float @fma_from_freeze_mul_sub_right_with_nnan(float %x, float %y) {
+; CHECK-LABEL: fma_from_freeze_mul_sub_right_with_nnan:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_mul_f32_e32 v0, v0, v1
+; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; CHECK-NEXT: v_sub_f32_e32 v0, 1.0, v0
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %mul = fmul reassoc nnan nsz arcp contract afn float %x, %y
+ %mul.fr = freeze float %mul
+ %sub = fsub reassoc nnan nsz arcp contract afn float 1.000000e+00, %mul.fr
+ ret float %sub
+}
More information about the llvm-commits
mailing list