[llvm] [DAGCombiner] Remove NoSignedZerosFPMath in visitFNEG (PR #162052)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 6 21:48:24 PDT 2025
https://github.com/paperchalice updated https://github.com/llvm/llvm-project/pull/162052
>From 67c7d1cda01b91a6ab76c50e38e3038e4af9d83c Mon Sep 17 00:00:00 2001
From: PaperChalice <liujunchang97 at outlook.com>
Date: Mon, 6 Oct 2025 17:20:53 +0800
Subject: [PATCH 1/2] [DAGCombiner] Remove NoSignedZerosFPMath in visitFNEG
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 204e1f0c75e00..d104d8a2967b6 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -19301,9 +19301,8 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
// FIXME: This is duplicated in getNegatibleCost, but getNegatibleCost doesn't
// know it was called from a context with a nsz flag if the input fsub does
// not.
- if (N0.getOpcode() == ISD::FSUB &&
- (DAG.getTarget().Options.NoSignedZerosFPMath ||
- N->getFlags().hasNoSignedZeros()) && N0.hasOneUse()) {
+ if (N0.getOpcode() == ISD::FSUB && N->getFlags().hasNoSignedZeros() &&
+ N0.hasOneUse()) {
return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0.getOperand(1),
N0.getOperand(0));
}
>From 30b176a8f206d2f813b0e76c545eb76b74aa36dc Mon Sep 17 00:00:00 2001
From: PaperChalice <liujunchang97 at outlook.com>
Date: Mon, 6 Oct 2025 19:53:44 +0800
Subject: [PATCH 2/2] fix tests
---
llvm/test/CodeGen/AMDGPU/fsub.ll | 5 +-
.../AMDGPU/select-fabs-fneg-extract.v2f16.ll | 160 ++++++++++++++----
2 files changed, 124 insertions(+), 41 deletions(-)
diff --git a/llvm/test/CodeGen/AMDGPU/fsub.ll b/llvm/test/CodeGen/AMDGPU/fsub.ll
index 743431c7e0e67..984690fd0ea16 100644
--- a/llvm/test/CodeGen/AMDGPU/fsub.ll
+++ b/llvm/test/CodeGen/AMDGPU/fsub.ll
@@ -100,7 +100,7 @@ define amdgpu_kernel void @v_fneg_fsub_nsz_attribute_f32(ptr addrspace(1) %out,
%a = load float, ptr addrspace(1) %in, align 4
%b = load float, ptr addrspace(1) %b_ptr, align 4
%result = fsub float %a, %b
- %neg.result = fsub float -0.0, %result
+ %neg.result = fsub nsz float -0.0, %result
store float %neg.result, ptr addrspace(1) %out, align 4
ret void
}
@@ -129,6 +129,3 @@ define amdgpu_kernel void @v_fsub_0_nsz_attribute_f32(ptr addrspace(1) %out, ptr
store float %result, ptr addrspace(1) %out, align 4
ret void
}
-
-attributes #0 = { nounwind "no-signed-zeros-fp-math"="true" }
-attributes #1 = { nounwind "no-signed-zeros-fp-math"="false" }
diff --git a/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.v2f16.ll b/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.v2f16.ll
index bb22144b815a1..eb69a53a72c90 100644
--- a/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.v2f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.v2f16.ll
@@ -4387,28 +4387,28 @@ define <2 x half> @select_fneg_posk_src_add_v2f16_nsz(<2 x i32> %c, <2 x half> %
}
define <2 x half> @select_fneg_posk_src_sub_v2f16(<2 x i32> %c, <2 x half> %x) {
-; CI-SAFE-LABEL: select_fneg_posk_src_sub_v2f16:
-; CI-SAFE: ; %bb.0:
-; CI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v3, v3
-; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v2, v2
-; CI-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v3, v3
-; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v2, v2
-; CI-SAFE-NEXT: v_add_f32_e32 v3, -4.0, v3
-; CI-SAFE-NEXT: v_add_f32_e32 v2, -4.0, v2
-; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v3, v3
-; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v2, v2
-; CI-SAFE-NEXT: v_lshlrev_b32_e32 v3, 16, v3
-; CI-SAFE-NEXT: v_or_b32_e32 v2, v2, v3
-; CI-SAFE-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
-; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v3, v2
-; CI-SAFE-NEXT: v_lshrrev_b32_e32 v2, 16, v2
-; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v2, v2
-; CI-SAFE-NEXT: v_cndmask_b32_e32 v0, 2.0, v3, vcc
-; CI-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; CI-SAFE-NEXT: v_cndmask_b32_e32 v1, 2.0, v2, vcc
-; CI-SAFE-NEXT: s_setpc_b64 s[30:31]
+; CI-LABEL: select_fneg_posk_src_sub_v2f16:
+; CI: ; %bb.0:
+; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
+; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
+; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; CI-NEXT: v_cvt_f32_f16_e32 v3, v3
+; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; CI-NEXT: v_add_f32_e32 v3, -4.0, v3
+; CI-NEXT: v_add_f32_e32 v2, -4.0, v2
+; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
+; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
+; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3
+; CI-NEXT: v_or_b32_e32 v2, v2, v3
+; CI-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
+; CI-NEXT: v_cvt_f32_f16_e32 v3, v2
+; CI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
+; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; CI-NEXT: v_cndmask_b32_e32 v0, 2.0, v3, vcc
+; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; CI-NEXT: v_cndmask_b32_e32 v1, 2.0, v2, vcc
+; CI-NEXT: s_setpc_b64 s[30:31]
;
; VI-SAFE-LABEL: select_fneg_posk_src_sub_v2f16:
; VI-SAFE: ; %bb.0:
@@ -4468,21 +4468,6 @@ define <2 x half> @select_fneg_posk_src_sub_v2f16(<2 x i32> %c, <2 x half> %x) {
; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; CI-NSZ-LABEL: select_fneg_posk_src_sub_v2f16:
-; CI-NSZ: ; %bb.0:
-; CI-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CI-NSZ-NEXT: v_cvt_f16_f32_e32 v2, v2
-; CI-NSZ-NEXT: v_cvt_f16_f32_e32 v3, v3
-; CI-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; CI-NSZ-NEXT: v_cvt_f32_f16_e32 v2, v2
-; CI-NSZ-NEXT: v_cvt_f32_f16_e32 v3, v3
-; CI-NSZ-NEXT: v_sub_f32_e32 v2, 4.0, v2
-; CI-NSZ-NEXT: v_sub_f32_e32 v3, 4.0, v3
-; CI-NSZ-NEXT: v_cndmask_b32_e32 v0, 2.0, v2, vcc
-; CI-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; CI-NSZ-NEXT: v_cndmask_b32_e32 v1, 2.0, v3, vcc
-; CI-NSZ-NEXT: s_setpc_b64 s[30:31]
-;
; VI-NSZ-LABEL: select_fneg_posk_src_sub_v2f16:
; VI-NSZ: ; %bb.0:
; VI-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4541,6 +4526,105 @@ define <2 x half> @select_fneg_posk_src_sub_v2f16(<2 x i32> %c, <2 x half> %x) {
ret <2 x half> %select
}
+define <2 x half> @select_fneg_posk_src_sub_v2f16_nsz(<2 x i32> %c, <2 x half> %x) {
+; CI-LABEL: select_fneg_posk_src_sub_v2f16_nsz:
+; CI: ; %bb.0:
+; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
+; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
+; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; CI-NEXT: v_cvt_f32_f16_e32 v3, v3
+; CI-NEXT: v_sub_f32_e32 v2, 4.0, v2
+; CI-NEXT: v_sub_f32_e32 v3, 4.0, v3
+; CI-NEXT: v_cndmask_b32_e32 v0, 2.0, v2, vcc
+; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; CI-NEXT: v_cndmask_b32_e32 v1, 2.0, v3, vcc
+; CI-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-LABEL: select_fneg_posk_src_sub_v2f16_nsz:
+; VI: ; %bb.0:
+; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; VI-NEXT: v_mov_b32_e32 v1, 0x4400
+; VI-NEXT: v_sub_f16_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; VI-NEXT: v_sub_f16_e32 v2, 4.0, v2
+; VI-NEXT: v_mov_b32_e32 v3, 0x4000
+; VI-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
+; VI-NEXT: v_cndmask_b32_e64 v0, v3, v2, s[4:5]
+; VI-NEXT: v_cndmask_b32_sdwa v1, v3, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: select_fneg_posk_src_sub_v2f16_nsz:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; GFX9-NEXT: v_pk_add_f16 v1, v2, 4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
+; GFX9-NEXT: v_mov_b32_e32 v2, 0x4000
+; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
+; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: s_mov_b32 s4, 0x5040100
+; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-TRUE16-LABEL: select_fneg_posk_src_sub_v2f16_nsz:
+; GFX11-SAFE-TRUE16: ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v2, 4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
+; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
+; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: select_fneg_posk_src_sub_v2f16_nsz:
+; GFX11-SAFE-FAKE16: ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v2, v2, 4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
+; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
+; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-TRUE16-LABEL: select_fneg_posk_src_sub_v2f16_nsz:
+; GFX11-NSZ-TRUE16: ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v2, 4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
+; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
+; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: select_fneg_posk_src_sub_v2f16_nsz:
+; GFX11-NSZ-FAKE16: ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v2, v2, 4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
+; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
+; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+ %cmp = icmp eq <2 x i32> %c, zeroinitializer
+ %add = fsub <2 x half> %x, <half 4.0, half 4.0>
+ %fneg = fneg nsz <2 x half> %add
+ %select = select <2 x i1> %cmp, <2 x half> %fneg, <2 x half> <half 2.0, half 2.0>
+ ret <2 x half> %select
+}
+
define <2 x half> @select_fneg_posk_src_mul_v2f16(<2 x i32> %c, <2 x half> %x) {
; CI-LABEL: select_fneg_posk_src_mul_v2f16:
; CI: ; %bb.0:
@@ -5048,6 +5132,8 @@ declare <2 x half> @llvm.fmuladd.v2f16(<2 x half>, <2 x half>, <2 x half>) #0
attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CI-NSZ: {{.*}}
+; CI-SAFE: {{.*}}
; GFX11: {{.*}}
; GFX11-NSZ: {{.*}}
; GFX11-SAFE: {{.*}}
More information about the llvm-commits
mailing list