[llvm] 1e01c02 - [DAGCombiner] Remove `NoSignedZerosFPMath` uses in `visitFADD` (#160635)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 25 20:24:06 PDT 2025
Author: paperchalice
Date: 2025-09-26T11:24:02+08:00
New Revision: 1e01c029961c0dd3dd70b68e029581dc6ef9c17b
URL: https://github.com/llvm/llvm-project/commit/1e01c029961c0dd3dd70b68e029581dc6ef9c17b
DIFF: https://github.com/llvm/llvm-project/commit/1e01c029961c0dd3dd70b68e029581dc6ef9c17b.diff
LOG: [DAGCombiner] Remove `NoSignedZerosFPMath` uses in `visitFADD` (#160635)
Remove these global flags and use node level flags instead.
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll
llvm/test/CodeGen/AMDGPU/fneg-combines.ll
llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll
llvm/test/CodeGen/X86/fadd-combines.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index a6ba6e518899f..c81568672de3c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -17770,7 +17770,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
// N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
if (N1C && N1C->isZero())
- if (N1C->isNegative() || Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())
+ if (N1C->isNegative() || Flags.hasNoSignedZeros())
return N0;
if (SDValue NewSel = foldBinOpIntoSelect(N))
@@ -17823,11 +17823,10 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
return DAG.getConstantFP(0.0, DL, VT);
}
- // If 'unsafe math' or reassoc and nsz, fold lots of things.
+ // If reassoc and nsz, fold lots of things.
// TODO: break out portions of the transformations below for which Unsafe is
// considered and which do not require both nsz and reassoc
- if ((Options.NoSignedZerosFPMath ||
- (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
+ if (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros() &&
AllowNewConst) {
// fadd (fadd x, c1), c2 -> fadd x, c1 + c2
if (N1CFP && N0.getOpcode() == ISD::FADD &&
@@ -17911,10 +17910,9 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
DAG.getConstantFP(4.0, DL, VT));
}
}
- } // enable-unsafe-fp-math && AllowNewConst
+ } // reassoc && nsz && AllowNewConst
- if ((Options.NoSignedZerosFPMath ||
- (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros()))) {
+ if (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros()) {
// Fold fadd(vecreduce(x), vecreduce(y)) -> vecreduce(fadd(x, y))
if (SDValue SD = reassociateReduction(ISD::VECREDUCE_FADD, ISD::FADD, DL,
VT, N0, N1, Flags))
diff --git a/llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll b/llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll
index 462d7748b86cd..b14e8c44ffcce 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll
@@ -581,145 +581,63 @@ define { half, half } @v_fneg_add_multi_use_fneg_x_f16(half %a, half %b, half %c
ret { half, half } %insert.1
}
-; This one asserted with -enable-no-signed-zeros-fp-math
-define amdgpu_ps half @fneg_fadd_0_f16(half inreg %tmp2, half inreg %tmp6, <4 x i32> %arg) #0 {
-; SI-SAFE-LABEL: fneg_fadd_0_f16:
-; SI-SAFE: ; %bb.0: ; %.entry
-; SI-SAFE-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0
-; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v0, s1
-; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v1, s0
-; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v0, v0
-; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v1, v1
-; SI-SAFE-NEXT: v_div_scale_f32 v2, s[0:1], v0, v0, 1.0
-; SI-SAFE-NEXT: v_rcp_f32_e32 v3, v2
-; SI-SAFE-NEXT: v_div_scale_f32 v4, vcc, 1.0, v0, 1.0
-; SI-SAFE-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
-; SI-SAFE-NEXT: v_fma_f32 v5, -v2, v3, 1.0
-; SI-SAFE-NEXT: v_fma_f32 v3, v5, v3, v3
-; SI-SAFE-NEXT: v_mul_f32_e32 v5, v4, v3
-; SI-SAFE-NEXT: v_fma_f32 v6, -v2, v5, v4
-; SI-SAFE-NEXT: v_fma_f32 v5, v6, v3, v5
-; SI-SAFE-NEXT: v_fma_f32 v2, -v2, v5, v4
-; SI-SAFE-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
-; SI-SAFE-NEXT: v_div_fmas_f32 v2, v2, v3, v5
-; SI-SAFE-NEXT: v_div_fixup_f32 v0, v2, v0, 1.0
-; SI-SAFE-NEXT: v_mad_f32 v0, v0, 0, 0
-; SI-SAFE-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v1
-; SI-SAFE-NEXT: v_cndmask_b32_e64 v0, -v0, v1, vcc
-; SI-SAFE-NEXT: v_mov_b32_e32 v1, 0x7fc00000
-; SI-SAFE-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0
-; SI-SAFE-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
-; SI-SAFE-NEXT: ; return to shader part epilog
-;
-; SI-NSZ-LABEL: fneg_fadd_0_f16:
-; SI-NSZ: ; %bb.0: ; %.entry
-; SI-NSZ-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0
-; SI-NSZ-NEXT: v_cvt_f16_f32_e32 v0, s1
-; SI-NSZ-NEXT: v_cvt_f16_f32_e32 v1, s0
-; SI-NSZ-NEXT: v_cvt_f32_f16_e32 v0, v0
-; SI-NSZ-NEXT: v_cvt_f32_f16_e32 v1, v1
-; SI-NSZ-NEXT: v_div_scale_f32 v2, s[0:1], v0, v0, 1.0
-; SI-NSZ-NEXT: v_rcp_f32_e32 v3, v2
-; SI-NSZ-NEXT: v_div_scale_f32 v4, vcc, 1.0, v0, 1.0
-; SI-NSZ-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
-; SI-NSZ-NEXT: v_fma_f32 v5, -v2, v3, 1.0
-; SI-NSZ-NEXT: v_fma_f32 v3, v5, v3, v3
-; SI-NSZ-NEXT: v_mul_f32_e32 v5, v4, v3
-; SI-NSZ-NEXT: v_fma_f32 v6, -v2, v5, v4
-; SI-NSZ-NEXT: v_fma_f32 v5, v6, v3, v5
-; SI-NSZ-NEXT: v_fma_f32 v2, -v2, v5, v4
-; SI-NSZ-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
-; SI-NSZ-NEXT: v_div_fmas_f32 v2, v2, v3, v5
-; SI-NSZ-NEXT: v_div_fixup_f32 v0, v2, v0, 1.0
-; SI-NSZ-NEXT: v_mul_f32_e32 v0, 0x80000000, v0
-; SI-NSZ-NEXT: v_cmp_nlt_f32_e64 vcc, -v0, v1
-; SI-NSZ-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
-; SI-NSZ-NEXT: v_mov_b32_e32 v1, 0x7fc00000
-; SI-NSZ-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0
-; SI-NSZ-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
-; SI-NSZ-NEXT: ; return to shader part epilog
-;
-; VI-SAFE-LABEL: fneg_fadd_0_f16:
-; VI-SAFE: ; %bb.0: ; %.entry
-; VI-SAFE-NEXT: v_rcp_f16_e32 v0, s1
-; VI-SAFE-NEXT: v_mov_b32_e32 v1, s0
-; VI-SAFE-NEXT: v_mul_f16_e32 v0, 0, v0
-; VI-SAFE-NEXT: v_add_f16_e32 v0, 0, v0
-; VI-SAFE-NEXT: v_xor_b32_e32 v2, 0x8000, v0
-; VI-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, s0, v0
-; VI-SAFE-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
-; VI-SAFE-NEXT: v_mov_b32_e32 v1, 0x7e00
-; VI-SAFE-NEXT: v_cmp_nlt_f16_e32 vcc, 0, v0
-; VI-SAFE-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
-; VI-SAFE-NEXT: ; return to shader part epilog
-;
-; VI-NSZ-LABEL: fneg_fadd_0_f16:
-; VI-NSZ: ; %bb.0: ; %.entry
-; VI-NSZ-NEXT: v_rcp_f16_e32 v0, s1
-; VI-NSZ-NEXT: v_mov_b32_e32 v1, s0
-; VI-NSZ-NEXT: v_mul_f16_e32 v0, 0x8000, v0
-; VI-NSZ-NEXT: v_cmp_nlt_f16_e64 vcc, -v0, s0
-; VI-NSZ-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
-; VI-NSZ-NEXT: v_mov_b32_e32 v1, 0x7e00
-; VI-NSZ-NEXT: v_cmp_nlt_f16_e32 vcc, 0, v0
-; VI-NSZ-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
-; VI-NSZ-NEXT: ; return to shader part epilog
-;
-; GFX11-SAFE-LABEL: fneg_fadd_0_f16:
-; GFX11-SAFE: ; %bb.0: ; %.entry
-; GFX11-SAFE-NEXT: v_rcp_f16_e32 v0, s1
-; GFX11-SAFE-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-SAFE-NEXT: v_mul_f16_e32 v0, 0, v0
-; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-NEXT: v_add_f16_e32 v0, 0, v0
-; GFX11-SAFE-NEXT: v_xor_b32_e32 v1, 0x8000, v0
-; GFX11-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc_lo, s0, v0
-; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-NEXT: v_cndmask_b32_e64 v0, v1, s0, vcc_lo
-; GFX11-SAFE-NEXT: v_cmp_nlt_f16_e32 vcc_lo, 0, v0
-; GFX11-SAFE-NEXT: v_cndmask_b32_e64 v0, 0x7e00, 0, vcc_lo
-; GFX11-SAFE-NEXT: ; return to shader part epilog
-;
-; GFX11-NSZ-LABEL: fneg_fadd_0_f16:
-; GFX11-NSZ: ; %bb.0: ; %.entry
-; GFX11-NSZ-NEXT: v_rcp_f16_e32 v0, s1
-; GFX11-NSZ-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-NSZ-NEXT: v_mul_f16_e32 v0, 0x8000, v0
-; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-NEXT: v_cmp_nlt_f16_e64 s1, -v0, s0
-; GFX11-NSZ-NEXT: v_cndmask_b32_e64 v0, v0, s0, s1
-; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-NEXT: v_cmp_nlt_f16_e32 vcc_lo, 0, v0
-; GFX11-NSZ-NEXT: v_cndmask_b32_e64 v0, 0x7e00, 0, vcc_lo
-; GFX11-NSZ-NEXT: ; return to shader part epilog
-; GFX11-SAFE-TRUE16-LABEL: fneg_fadd_0_f16:
-; GFX11-SAFE-TRUE16: ; %bb.0: ; %.entry
-; GFX11-SAFE-TRUE16-NEXT: v_rcp_f16_e32 v0.l, s1
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-SAFE-TRUE16-NEXT: v_mul_f16_e32 v0.l, 0, v0.l
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_add_f16_e32 v0.l, 0, v0.l
-; GFX11-SAFE-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, s0, v0.l
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_xor_b32_e32 v0, 0x8000, v1
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v0/*Invalid register, operand has 'VS_16' register class*/, s0, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, 0, v0.l
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x7e00, 0, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: ; return to shader part epilog
-; GFX11-NSZ-TRUE16-LABEL: fneg_fadd_0_f16:
-; GFX11-NSZ-TRUE16: ; %bb.0: ; %.entry
-; GFX11-NSZ-TRUE16-NEXT: v_rcp_f16_e32 v0.l, s1
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-NSZ-TRUE16-NEXT: v_mul_f16_e32 v0.l, 0x8000, v0.l
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_nlt_f16_e64 s1, -v0.l, s0
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v0.l, s0, s1
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, 0, v0.l
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x7e00, 0, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: ; return to shader part epilog
+define amdgpu_ps half @fneg_fadd_0_safe_f16(half inreg %tmp2, half inreg %tmp6, <4 x i32> %arg) #0 {
+; SI-LABEL: fneg_fadd_0_safe_f16:
+; SI: ; %bb.0: ; %.entry
+; SI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0
+; SI-NEXT: v_cvt_f16_f32_e32 v0, s1
+; SI-NEXT: v_cvt_f16_f32_e32 v1, s0
+; SI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SI-NEXT: v_div_scale_f32 v2, s[0:1], v0, v0, 1.0
+; SI-NEXT: v_rcp_f32_e32 v3, v2
+; SI-NEXT: v_div_scale_f32 v4, vcc, 1.0, v0, 1.0
+; SI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
+; SI-NEXT: v_fma_f32 v5, -v2, v3, 1.0
+; SI-NEXT: v_fma_f32 v3, v5, v3, v3
+; SI-NEXT: v_mul_f32_e32 v5, v4, v3
+; SI-NEXT: v_fma_f32 v6, -v2, v5, v4
+; SI-NEXT: v_fma_f32 v5, v6, v3, v5
+; SI-NEXT: v_fma_f32 v2, -v2, v5, v4
+; SI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
+; SI-NEXT: v_div_fmas_f32 v2, v2, v3, v5
+; SI-NEXT: v_div_fixup_f32 v0, v2, v0, 1.0
+; SI-NEXT: v_mad_f32 v0, v0, 0, 0
+; SI-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v1
+; SI-NEXT: v_cndmask_b32_e64 v0, -v0, v1, vcc
+; SI-NEXT: v_mov_b32_e32 v1, 0x7fc00000
+; SI-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0
+; SI-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
+; SI-NEXT: ; return to shader part epilog
+;
+; VI-LABEL: fneg_fadd_0_safe_f16:
+; VI: ; %bb.0: ; %.entry
+; VI-NEXT: v_rcp_f16_e32 v0, s1
+; VI-NEXT: v_mov_b32_e32 v1, s0
+; VI-NEXT: v_mul_f16_e32 v0, 0, v0
+; VI-NEXT: v_add_f16_e32 v0, 0, v0
+; VI-NEXT: v_xor_b32_e32 v2, 0x8000, v0
+; VI-NEXT: v_cmp_ngt_f16_e32 vcc, s0, v0
+; VI-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; VI-NEXT: v_mov_b32_e32 v1, 0x7e00
+; VI-NEXT: v_cmp_nlt_f16_e32 vcc, 0, v0
+; VI-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
+; VI-NEXT: ; return to shader part epilog
+;
+; GFX11-LABEL: fneg_fadd_0_safe_f16:
+; GFX11: ; %bb.0: ; %.entry
+; GFX11-NEXT: v_rcp_f16_e32 v0, s1
+; GFX11-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-NEXT: v_mul_f16_e32 v0, 0, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_add_f16_e32 v0, 0, v0
+; GFX11-NEXT: v_xor_b32_e32 v1, 0x8000, v0
+; GFX11-NEXT: v_cmp_ngt_f16_e32 vcc_lo, s0, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_cndmask_b32_e64 v0, v1, s0, vcc_lo
+; GFX11-NEXT: v_cmp_nlt_f16_e32 vcc_lo, 0, v0
+; GFX11-NEXT: v_cndmask_b32_e64 v0, 0x7e00, 0, vcc_lo
+; GFX11-NEXT: ; return to shader part epilog
.entry:
%tmp7 = fdiv half 1.000000e+00, %tmp6
%tmp8 = fmul half 0.000000e+00, %tmp7
@@ -733,108 +651,51 @@ define amdgpu_ps half @fneg_fadd_0_f16(half inreg %tmp2, half inreg %tmp6, <4 x
ret half %.i198
}
-; This is a workaround because -enable-no-signed-zeros-fp-math does not set up
-; function attribute unsafe-fp-math automatically. Combine with the previous test
-; when that is done.
define amdgpu_ps half @fneg_fadd_0_nsz_f16(half inreg %tmp2, half inreg %tmp6, <4 x i32> %arg) #2 {
-; SI-SAFE-LABEL: fneg_fadd_0_nsz_f16:
-; SI-SAFE: ; %bb.0: ; %.entry
-; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v0, s0
-; SI-SAFE-NEXT: s_brev_b32 s0, 1
-; SI-SAFE-NEXT: v_mov_b32_e32 v1, 0x7fc00000
-; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v0, v0
-; SI-SAFE-NEXT: v_min_legacy_f32_e32 v0, 0, v0
-; SI-SAFE-NEXT: v_cmp_ngt_f32_e32 vcc, s0, v0
-; SI-SAFE-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
-; SI-SAFE-NEXT: ; return to shader part epilog
-;
-; SI-NSZ-LABEL: fneg_fadd_0_nsz_f16:
-; SI-NSZ: ; %bb.0: ; %.entry
-; SI-NSZ-NEXT: v_cvt_f16_f32_e32 v0, s1
-; SI-NSZ-NEXT: v_cvt_f16_f32_e32 v1, s0
-; SI-NSZ-NEXT: v_mov_b32_e32 v2, 0x7fc00000
-; SI-NSZ-NEXT: v_cvt_f32_f16_e32 v0, v0
-; SI-NSZ-NEXT: v_cvt_f32_f16_e32 v1, v1
-; SI-NSZ-NEXT: v_rcp_f32_e32 v0, v0
-; SI-NSZ-NEXT: v_mul_f32_e32 v0, 0x80000000, v0
-; SI-NSZ-NEXT: v_cmp_nlt_f32_e64 vcc, -v0, v1
-; SI-NSZ-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
-; SI-NSZ-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0
-; SI-NSZ-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
-; SI-NSZ-NEXT: ; return to shader part epilog
-;
-; VI-SAFE-LABEL: fneg_fadd_0_nsz_f16:
-; VI-SAFE: ; %bb.0: ; %.entry
-; VI-SAFE-NEXT: v_mov_b32_e32 v0, 0x8000
-; VI-SAFE-NEXT: v_mov_b32_e32 v1, s0
-; VI-SAFE-NEXT: v_cmp_ngt_f16_e64 vcc, s0, 0
-; VI-SAFE-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
-; VI-SAFE-NEXT: v_mov_b32_e32 v1, 0x7e00
-; VI-SAFE-NEXT: v_cmp_nlt_f16_e32 vcc, 0, v0
-; VI-SAFE-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
-; VI-SAFE-NEXT: ; return to shader part epilog
-;
-; VI-NSZ-LABEL: fneg_fadd_0_nsz_f16:
-; VI-NSZ: ; %bb.0: ; %.entry
-; VI-NSZ-NEXT: v_rcp_f16_e32 v0, s1
-; VI-NSZ-NEXT: v_mov_b32_e32 v1, s0
-; VI-NSZ-NEXT: v_mul_f16_e32 v0, 0x8000, v0
-; VI-NSZ-NEXT: v_cmp_nlt_f16_e64 vcc, -v0, s0
-; VI-NSZ-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
-; VI-NSZ-NEXT: v_mov_b32_e32 v1, 0x7e00
-; VI-NSZ-NEXT: v_cmp_nlt_f16_e32 vcc, 0, v0
-; VI-NSZ-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
-; VI-NSZ-NEXT: ; return to shader part epilog
-;
-; GFX11-SAFE-LABEL: fneg_fadd_0_nsz_f16:
-; GFX11-SAFE: ; %bb.0: ; %.entry
-; GFX11-SAFE-NEXT: v_mov_b32_e32 v0, s0
-; GFX11-SAFE-NEXT: v_cmp_ngt_f16_e64 vcc_lo, s0, 0
-; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-NEXT: v_cndmask_b32_e32 v0, 0x8000, v0, vcc_lo
-; GFX11-SAFE-NEXT: v_cmp_nlt_f16_e32 vcc_lo, 0, v0
-; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_3)
-; GFX11-SAFE-NEXT: v_cndmask_b32_e64 v0, 0x7e00, 0, vcc_lo
-; GFX11-SAFE-NEXT: ; return to shader part epilog
-;
-; GFX11-NSZ-LABEL: fneg_fadd_0_nsz_f16:
-; GFX11-NSZ: ; %bb.0: ; %.entry
-; GFX11-NSZ-NEXT: v_rcp_f16_e32 v0, s1
-; GFX11-NSZ-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-NSZ-NEXT: v_mul_f16_e32 v0, 0x8000, v0
-; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-NEXT: v_cmp_nlt_f16_e64 s1, -v0, s0
-; GFX11-NSZ-NEXT: v_cndmask_b32_e64 v0, v0, s0, s1
-; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-NEXT: v_cmp_nlt_f16_e32 vcc_lo, 0, v0
-; GFX11-NSZ-NEXT: v_cndmask_b32_e64 v0, 0x7e00, 0, vcc_lo
-; GFX11-NSZ-NEXT: ; return to shader part epilog
-; GFX11-SAFE-TRUE16-LABEL: fneg_fadd_0_nsz_f16:
-; GFX11-SAFE-TRUE16: ; %bb.0: ; %.entry
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_ngt_f16_e64 s1, s0, 0
-; GFX11-SAFE-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x8000
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v0.l, s0, s1
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, 0, v0.l
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x7e00, 0, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: ; return to shader part epilog
-; GFX11-NSZ-TRUE16-LABEL: fneg_fadd_0_nsz_f16:
-; GFX11-NSZ-TRUE16: ; %bb.0: ; %.entry
-; GFX11-NSZ-TRUE16-NEXT: v_rcp_f16_e32 v0.l, s1
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-NSZ-TRUE16-NEXT: v_mul_f16_e32 v0.l, 0x8000, v0.l
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_nlt_f16_e64 s1, -v0.l, s0
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v0.l, s0, s1
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, 0, v0.l
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x7e00, 0, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: ; return to shader part epilog
+; SI-LABEL: fneg_fadd_0_nsz_f16:
+; SI: ; %bb.0: ; %.entry
+; SI-NEXT: v_cvt_f16_f32_e32 v0, s1
+; SI-NEXT: v_cvt_f16_f32_e32 v1, s0
+; SI-NEXT: v_mov_b32_e32 v2, 0x7fc00000
+; SI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SI-NEXT: v_rcp_f32_e32 v0, v0
+; SI-NEXT: v_mul_f32_e32 v0, 0x80000000, v0
+; SI-NEXT: v_cmp_nlt_f32_e64 vcc, -v0, v1
+; SI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; SI-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0
+; SI-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
+; SI-NEXT: ; return to shader part epilog
+;
+; VI-LABEL: fneg_fadd_0_nsz_f16:
+; VI: ; %bb.0: ; %.entry
+; VI-NEXT: v_rcp_f16_e32 v0, s1
+; VI-NEXT: v_mov_b32_e32 v1, s0
+; VI-NEXT: v_mul_f16_e32 v0, 0x8000, v0
+; VI-NEXT: v_cmp_nlt_f16_e64 vcc, -v0, s0
+; VI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; VI-NEXT: v_mov_b32_e32 v1, 0x7e00
+; VI-NEXT: v_cmp_nlt_f16_e32 vcc, 0, v0
+; VI-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
+; VI-NEXT: ; return to shader part epilog
+;
+; GFX11-LABEL: fneg_fadd_0_nsz_f16:
+; GFX11: ; %bb.0: ; %.entry
+; GFX11-NEXT: v_rcp_f16_e32 v0, s1
+; GFX11-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-NEXT: v_mul_f16_e32 v0, 0x8000, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_cmp_nlt_f16_e64 s1, -v0, s0
+; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, s0, s1
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_cmp_nlt_f16_e32 vcc_lo, 0, v0
+; GFX11-NEXT: v_cndmask_b32_e64 v0, 0x7e00, 0, vcc_lo
+; GFX11-NEXT: ; return to shader part epilog
.entry:
%tmp7 = fdiv afn half 1.000000e+00, %tmp6
%tmp8 = fmul contract half 0.000000e+00, %tmp7
%tmp9 = fmul reassoc nnan arcp contract half 0.000000e+00, %tmp8
- %.i188 = fadd nnan ninf contract half %tmp9, 0.000000e+00
+ %.i188 = fadd nsz half %tmp9, 0.000000e+00
%tmp10 = fcmp uge half %.i188, %tmp2
%tmp11 = fneg half %.i188
%.i092 = select i1 %tmp10, half %tmp2, half %tmp11
diff --git a/llvm/test/CodeGen/AMDGPU/fneg-combines.ll b/llvm/test/CodeGen/AMDGPU/fneg-combines.ll
index ba34e9245f39c..12e9888314fc1 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-combines.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-combines.ll
@@ -880,102 +880,54 @@ define amdgpu_kernel void @v_fneg_add_multi_use_fneg_x_f32(ptr addrspace(1) %out
}
; This one asserted with -enable-no-signed-zeros-fp-math
-define amdgpu_ps float @fneg_fadd_0(float inreg %tmp2, float inreg %tmp6, <4 x i32> %arg) local_unnamed_addr #0 {
-; SI-SAFE-LABEL: fneg_fadd_0:
-; SI-SAFE: ; %bb.0: ; %.entry
-; SI-SAFE-NEXT: v_div_scale_f32 v0, s[2:3], s1, s1, 1.0
-; SI-SAFE-NEXT: v_rcp_f32_e32 v1, v0
-; SI-SAFE-NEXT: v_div_scale_f32 v2, vcc, 1.0, s1, 1.0
-; SI-SAFE-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
-; SI-SAFE-NEXT: v_fma_f32 v3, -v0, v1, 1.0
-; SI-SAFE-NEXT: v_fma_f32 v1, v3, v1, v1
-; SI-SAFE-NEXT: v_mul_f32_e32 v3, v2, v1
-; SI-SAFE-NEXT: v_fma_f32 v4, -v0, v3, v2
-; SI-SAFE-NEXT: v_fma_f32 v3, v4, v1, v3
-; SI-SAFE-NEXT: v_fma_f32 v0, -v0, v3, v2
-; SI-SAFE-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
-; SI-SAFE-NEXT: v_div_fmas_f32 v0, v0, v1, v3
-; SI-SAFE-NEXT: v_div_fixup_f32 v0, v0, s1, 1.0
-; SI-SAFE-NEXT: v_mad_f32 v0, v0, 0, 0
-; SI-SAFE-NEXT: v_mov_b32_e32 v1, s0
-; SI-SAFE-NEXT: v_cmp_ngt_f32_e32 vcc, s0, v0
-; SI-SAFE-NEXT: v_cndmask_b32_e64 v0, -v0, v1, vcc
-; SI-SAFE-NEXT: v_mov_b32_e32 v1, 0x7fc00000
-; SI-SAFE-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0
-; SI-SAFE-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
-; SI-SAFE-NEXT: ; return to shader part epilog
-;
-; SI-NSZ-LABEL: fneg_fadd_0:
-; SI-NSZ: ; %bb.0: ; %.entry
-; SI-NSZ-NEXT: v_div_scale_f32 v0, s[2:3], s1, s1, 1.0
-; SI-NSZ-NEXT: v_rcp_f32_e32 v1, v0
-; SI-NSZ-NEXT: v_div_scale_f32 v2, vcc, 1.0, s1, 1.0
-; SI-NSZ-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
-; SI-NSZ-NEXT: v_fma_f32 v3, -v0, v1, 1.0
-; SI-NSZ-NEXT: v_fma_f32 v1, v3, v1, v1
-; SI-NSZ-NEXT: v_mul_f32_e32 v3, v2, v1
-; SI-NSZ-NEXT: v_fma_f32 v4, -v0, v3, v2
-; SI-NSZ-NEXT: v_fma_f32 v3, v4, v1, v3
-; SI-NSZ-NEXT: v_fma_f32 v0, -v0, v3, v2
-; SI-NSZ-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
-; SI-NSZ-NEXT: v_div_fmas_f32 v0, v0, v1, v3
-; SI-NSZ-NEXT: v_div_fixup_f32 v0, v0, s1, 1.0
-; SI-NSZ-NEXT: v_mul_f32_e32 v0, 0, v0
-; SI-NSZ-NEXT: v_mov_b32_e32 v1, s0
-; SI-NSZ-NEXT: v_cmp_ngt_f32_e32 vcc, s0, v0
-; SI-NSZ-NEXT: v_cndmask_b32_e64 v0, -v0, v1, vcc
-; SI-NSZ-NEXT: v_mov_b32_e32 v1, 0x7fc00000
-; SI-NSZ-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0
-; SI-NSZ-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
-; SI-NSZ-NEXT: ; return to shader part epilog
-;
-; VI-SAFE-LABEL: fneg_fadd_0:
-; VI-SAFE: ; %bb.0: ; %.entry
-; VI-SAFE-NEXT: v_div_scale_f32 v0, s[2:3], s1, s1, 1.0
-; VI-SAFE-NEXT: v_div_scale_f32 v1, vcc, 1.0, s1, 1.0
-; VI-SAFE-NEXT: v_rcp_f32_e32 v2, v0
-; VI-SAFE-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
-; VI-SAFE-NEXT: v_fma_f32 v3, -v0, v2, 1.0
-; VI-SAFE-NEXT: v_fma_f32 v2, v3, v2, v2
-; VI-SAFE-NEXT: v_mul_f32_e32 v3, v1, v2
-; VI-SAFE-NEXT: v_fma_f32 v4, -v0, v3, v1
-; VI-SAFE-NEXT: v_fma_f32 v3, v4, v2, v3
-; VI-SAFE-NEXT: v_fma_f32 v0, -v0, v3, v1
-; VI-SAFE-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
-; VI-SAFE-NEXT: v_div_fmas_f32 v0, v0, v2, v3
-; VI-SAFE-NEXT: v_mov_b32_e32 v2, s0
-; VI-SAFE-NEXT: v_mov_b32_e32 v1, 0x7fc00000
-; VI-SAFE-NEXT: v_div_fixup_f32 v0, v0, s1, 1.0
-; VI-SAFE-NEXT: v_mad_f32 v0, v0, 0, 0
-; VI-SAFE-NEXT: v_cmp_ngt_f32_e32 vcc, s0, v0
-; VI-SAFE-NEXT: v_cndmask_b32_e64 v0, -v0, v2, vcc
-; VI-SAFE-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0
-; VI-SAFE-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
-; VI-SAFE-NEXT: ; return to shader part epilog
-;
-; VI-NSZ-LABEL: fneg_fadd_0:
-; VI-NSZ: ; %bb.0: ; %.entry
-; VI-NSZ-NEXT: v_div_scale_f32 v0, s[2:3], s1, s1, 1.0
-; VI-NSZ-NEXT: v_div_scale_f32 v1, vcc, 1.0, s1, 1.0
-; VI-NSZ-NEXT: v_rcp_f32_e32 v2, v0
-; VI-NSZ-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
-; VI-NSZ-NEXT: v_fma_f32 v3, -v0, v2, 1.0
-; VI-NSZ-NEXT: v_fma_f32 v2, v3, v2, v2
-; VI-NSZ-NEXT: v_mul_f32_e32 v3, v1, v2
-; VI-NSZ-NEXT: v_fma_f32 v4, -v0, v3, v1
-; VI-NSZ-NEXT: v_fma_f32 v3, v4, v2, v3
-; VI-NSZ-NEXT: v_fma_f32 v0, -v0, v3, v1
-; VI-NSZ-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
-; VI-NSZ-NEXT: v_div_fmas_f32 v0, v0, v2, v3
-; VI-NSZ-NEXT: v_mov_b32_e32 v2, s0
-; VI-NSZ-NEXT: v_mov_b32_e32 v1, 0x7fc00000
-; VI-NSZ-NEXT: v_div_fixup_f32 v0, v0, s1, 1.0
-; VI-NSZ-NEXT: v_mul_f32_e32 v0, 0, v0
-; VI-NSZ-NEXT: v_cmp_ngt_f32_e32 vcc, s0, v0
-; VI-NSZ-NEXT: v_cndmask_b32_e64 v0, -v0, v2, vcc
-; VI-NSZ-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0
-; VI-NSZ-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
-; VI-NSZ-NEXT: ; return to shader part epilog
+define amdgpu_ps float @fneg_fadd_0_safe(float inreg %tmp2, float inreg %tmp6, <4 x i32> %arg) local_unnamed_addr #0 {
+; SI-LABEL: fneg_fadd_0_safe:
+; SI: ; %bb.0: ; %.entry
+; SI-NEXT: v_div_scale_f32 v0, s[2:3], s1, s1, 1.0
+; SI-NEXT: v_rcp_f32_e32 v1, v0
+; SI-NEXT: v_div_scale_f32 v2, vcc, 1.0, s1, 1.0
+; SI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
+; SI-NEXT: v_fma_f32 v3, -v0, v1, 1.0
+; SI-NEXT: v_fma_f32 v1, v3, v1, v1
+; SI-NEXT: v_mul_f32_e32 v3, v2, v1
+; SI-NEXT: v_fma_f32 v4, -v0, v3, v2
+; SI-NEXT: v_fma_f32 v3, v4, v1, v3
+; SI-NEXT: v_fma_f32 v0, -v0, v3, v2
+; SI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
+; SI-NEXT: v_div_fmas_f32 v0, v0, v1, v3
+; SI-NEXT: v_div_fixup_f32 v0, v0, s1, 1.0
+; SI-NEXT: v_mad_f32 v0, v0, 0, 0
+; SI-NEXT: v_mov_b32_e32 v1, s0
+; SI-NEXT: v_cmp_ngt_f32_e32 vcc, s0, v0
+; SI-NEXT: v_cndmask_b32_e64 v0, -v0, v1, vcc
+; SI-NEXT: v_mov_b32_e32 v1, 0x7fc00000
+; SI-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0
+; SI-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
+; SI-NEXT: ; return to shader part epilog
+;
+; VI-LABEL: fneg_fadd_0_safe:
+; VI: ; %bb.0: ; %.entry
+; VI-NEXT: v_div_scale_f32 v0, s[2:3], s1, s1, 1.0
+; VI-NEXT: v_div_scale_f32 v1, vcc, 1.0, s1, 1.0
+; VI-NEXT: v_rcp_f32_e32 v2, v0
+; VI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
+; VI-NEXT: v_fma_f32 v3, -v0, v2, 1.0
+; VI-NEXT: v_fma_f32 v2, v3, v2, v2
+; VI-NEXT: v_mul_f32_e32 v3, v1, v2
+; VI-NEXT: v_fma_f32 v4, -v0, v3, v1
+; VI-NEXT: v_fma_f32 v3, v4, v2, v3
+; VI-NEXT: v_fma_f32 v0, -v0, v3, v1
+; VI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
+; VI-NEXT: v_div_fmas_f32 v0, v0, v2, v3
+; VI-NEXT: v_mov_b32_e32 v2, s0
+; VI-NEXT: v_mov_b32_e32 v1, 0x7fc00000
+; VI-NEXT: v_div_fixup_f32 v0, v0, s1, 1.0
+; VI-NEXT: v_mad_f32 v0, v0, 0, 0
+; VI-NEXT: v_cmp_ngt_f32_e32 vcc, s0, v0
+; VI-NEXT: v_cndmask_b32_e64 v0, -v0, v2, vcc
+; VI-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0
+; VI-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
+; VI-NEXT: ; return to shader part epilog
.entry:
%tmp7 = fdiv float 1.000000e+00, %tmp6
%tmp8 = fmul float 0.000000e+00, %tmp7
@@ -989,39 +941,23 @@ define amdgpu_ps float @fneg_fadd_0(float inreg %tmp2, float inreg %tmp6, <4 x i
ret float %.i198
}
-; This is a workaround because -enable-no-signed-zeros-fp-math does not set up
-; function attribute unsafe-fp-math automatically. Combine with the previous test
-; when that is done.
-define amdgpu_ps float @fneg_fadd_0_nsz(float inreg %tmp2, float inreg %tmp6, <4 x i32> %arg) local_unnamed_addr #2 {
-; GCN-SAFE-LABEL: fneg_fadd_0_nsz:
-; GCN-SAFE: ; %bb.0: ; %.entry
-; GCN-SAFE-NEXT: v_rcp_f32_e32 v0, s1
-; GCN-SAFE-NEXT: v_mov_b32_e32 v1, s0
-; GCN-SAFE-NEXT: v_mul_f32_e32 v0, 0, v0
-; GCN-SAFE-NEXT: v_add_f32_e32 v0, 0, v0
-; GCN-SAFE-NEXT: v_cmp_ngt_f32_e32 vcc, s0, v0
-; GCN-SAFE-NEXT: v_cndmask_b32_e64 v0, -v0, v1, vcc
-; GCN-SAFE-NEXT: v_mov_b32_e32 v1, 0x7fc00000
-; GCN-SAFE-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0
-; GCN-SAFE-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
-; GCN-SAFE-NEXT: ; return to shader part epilog
-;
-; GCN-NSZ-LABEL: fneg_fadd_0_nsz:
-; GCN-NSZ: ; %bb.0: ; %.entry
-; GCN-NSZ-NEXT: v_rcp_f32_e32 v0, s1
-; GCN-NSZ-NEXT: v_mov_b32_e32 v1, s0
-; GCN-NSZ-NEXT: v_mul_f32_e32 v0, 0, v0
-; GCN-NSZ-NEXT: v_cmp_ngt_f32_e32 vcc, s0, v0
-; GCN-NSZ-NEXT: v_cndmask_b32_e64 v0, -v0, v1, vcc
-; GCN-NSZ-NEXT: v_mov_b32_e32 v1, 0x7fc00000
-; GCN-NSZ-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0
-; GCN-NSZ-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
-; GCN-NSZ-NEXT: ; return to shader part epilog
+define amdgpu_ps float @fneg_fadd_0_nsz(float inreg %tmp2, float inreg %tmp6, <4 x i32> %arg) local_unnamed_addr {
+; GCN-LABEL: fneg_fadd_0_nsz:
+; GCN: ; %bb.0: ; %.entry
+; GCN-NEXT: v_rcp_f32_e32 v0, s1
+; GCN-NEXT: v_mov_b32_e32 v1, s0
+; GCN-NEXT: v_mul_f32_e32 v0, 0, v0
+; GCN-NEXT: v_cmp_ngt_f32_e32 vcc, s0, v0
+; GCN-NEXT: v_cndmask_b32_e64 v0, -v0, v1, vcc
+; GCN-NEXT: v_mov_b32_e32 v1, 0x7fc00000
+; GCN-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0
+; GCN-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
+; GCN-NEXT: ; return to shader part epilog
.entry:
%tmp7 = fdiv afn float 1.000000e+00, %tmp6
%tmp8 = fmul float 0.000000e+00, %tmp7
%tmp9 = fmul reassoc nnan arcp contract float 0.000000e+00, %tmp8
- %.i188 = fadd float %tmp9, 0.000000e+00
+ %.i188 = fadd nsz float %tmp9, 0.000000e+00
%tmp10 = fcmp uge float %.i188, %tmp2
%tmp11 = fneg float %.i188
%.i092 = select i1 %tmp10, float %tmp2, float %tmp11
@@ -8072,3 +8008,6 @@ attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign"
attributes #1 = { nounwind readnone }
attributes #2 = { nounwind "unsafe-fp-math"="true" }
attributes #3 = { nounwind "no-signed-zeros-fp-math"="true" }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GCN-NSZ: {{.*}}
+; GCN-SAFE: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll b/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll
index e687745469014..c4ca79dc85312 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll
@@ -175,103 +175,54 @@ define { float, float } @v_fneg_add_multi_use_fneg_x_f32(float %a, float %b, flo
ret { float, float } %insert.1
}
-; This one asserted with -enable-no-signed-zeros-fp-math
-define amdgpu_ps float @fneg_fadd_0_f32(float inreg %tmp2, float inreg %tmp6, <4 x i32> %arg) #0 {
-; SI-SAFE-LABEL: fneg_fadd_0_f32:
-; SI-SAFE: ; %bb.0: ; %.entry
-; SI-SAFE-NEXT: v_div_scale_f32 v0, s[2:3], s1, s1, 1.0
-; SI-SAFE-NEXT: v_rcp_f32_e32 v1, v0
-; SI-SAFE-NEXT: v_div_scale_f32 v2, vcc, 1.0, s1, 1.0
-; SI-SAFE-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
-; SI-SAFE-NEXT: v_fma_f32 v3, -v0, v1, 1.0
-; SI-SAFE-NEXT: v_fma_f32 v1, v3, v1, v1
-; SI-SAFE-NEXT: v_mul_f32_e32 v3, v2, v1
-; SI-SAFE-NEXT: v_fma_f32 v4, -v0, v3, v2
-; SI-SAFE-NEXT: v_fma_f32 v3, v4, v1, v3
-; SI-SAFE-NEXT: v_fma_f32 v0, -v0, v3, v2
-; SI-SAFE-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
-; SI-SAFE-NEXT: v_div_fmas_f32 v0, v0, v1, v3
-; SI-SAFE-NEXT: v_div_fixup_f32 v0, v0, s1, 1.0
-; SI-SAFE-NEXT: v_mad_f32 v0, v0, 0, 0
-; SI-SAFE-NEXT: v_mov_b32_e32 v1, s0
-; SI-SAFE-NEXT: v_cmp_ngt_f32_e32 vcc, s0, v0
-; SI-SAFE-NEXT: v_cndmask_b32_e64 v0, -v0, v1, vcc
-; SI-SAFE-NEXT: v_mov_b32_e32 v1, 0x7fc00000
-; SI-SAFE-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0
-; SI-SAFE-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
-; SI-SAFE-NEXT: ; return to shader part epilog
-;
-; SI-NSZ-LABEL: fneg_fadd_0_f32:
-; SI-NSZ: ; %bb.0: ; %.entry
-; SI-NSZ-NEXT: v_div_scale_f32 v0, s[2:3], s1, s1, 1.0
-; SI-NSZ-NEXT: v_rcp_f32_e32 v1, v0
-; SI-NSZ-NEXT: v_div_scale_f32 v2, vcc, 1.0, s1, 1.0
-; SI-NSZ-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
-; SI-NSZ-NEXT: v_fma_f32 v3, -v0, v1, 1.0
-; SI-NSZ-NEXT: v_fma_f32 v1, v3, v1, v1
-; SI-NSZ-NEXT: v_mul_f32_e32 v3, v2, v1
-; SI-NSZ-NEXT: v_fma_f32 v4, -v0, v3, v2
-; SI-NSZ-NEXT: v_fma_f32 v3, v4, v1, v3
-; SI-NSZ-NEXT: v_fma_f32 v0, -v0, v3, v2
-; SI-NSZ-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
-; SI-NSZ-NEXT: v_div_fmas_f32 v0, v0, v1, v3
-; SI-NSZ-NEXT: v_div_fixup_f32 v0, v0, s1, 1.0
-; SI-NSZ-NEXT: v_mul_f32_e32 v0, 0, v0
-; SI-NSZ-NEXT: v_mov_b32_e32 v1, s0
-; SI-NSZ-NEXT: v_cmp_ngt_f32_e32 vcc, s0, v0
-; SI-NSZ-NEXT: v_cndmask_b32_e64 v0, -v0, v1, vcc
-; SI-NSZ-NEXT: v_mov_b32_e32 v1, 0x7fc00000
-; SI-NSZ-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0
-; SI-NSZ-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
-; SI-NSZ-NEXT: ; return to shader part epilog
-;
-; VI-SAFE-LABEL: fneg_fadd_0_f32:
-; VI-SAFE: ; %bb.0: ; %.entry
-; VI-SAFE-NEXT: v_div_scale_f32 v0, s[2:3], s1, s1, 1.0
-; VI-SAFE-NEXT: v_div_scale_f32 v1, vcc, 1.0, s1, 1.0
-; VI-SAFE-NEXT: v_rcp_f32_e32 v2, v0
-; VI-SAFE-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
-; VI-SAFE-NEXT: v_fma_f32 v3, -v0, v2, 1.0
-; VI-SAFE-NEXT: v_fma_f32 v2, v3, v2, v2
-; VI-SAFE-NEXT: v_mul_f32_e32 v3, v1, v2
-; VI-SAFE-NEXT: v_fma_f32 v4, -v0, v3, v1
-; VI-SAFE-NEXT: v_fma_f32 v3, v4, v2, v3
-; VI-SAFE-NEXT: v_fma_f32 v0, -v0, v3, v1
-; VI-SAFE-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
-; VI-SAFE-NEXT: v_div_fmas_f32 v0, v0, v2, v3
-; VI-SAFE-NEXT: v_mov_b32_e32 v2, s0
-; VI-SAFE-NEXT: v_mov_b32_e32 v1, 0x7fc00000
-; VI-SAFE-NEXT: v_div_fixup_f32 v0, v0, s1, 1.0
-; VI-SAFE-NEXT: v_mad_f32 v0, v0, 0, 0
-; VI-SAFE-NEXT: v_cmp_ngt_f32_e32 vcc, s0, v0
-; VI-SAFE-NEXT: v_cndmask_b32_e64 v0, -v0, v2, vcc
-; VI-SAFE-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0
-; VI-SAFE-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
-; VI-SAFE-NEXT: ; return to shader part epilog
+define amdgpu_ps float @fneg_fadd_0_safe_f32(float inreg %tmp2, float inreg %tmp6, <4 x i32> %arg) #0 {
+; SI-LABEL: fneg_fadd_0_safe_f32:
+; SI: ; %bb.0: ; %.entry
+; SI-NEXT: v_div_scale_f32 v0, s[2:3], s1, s1, 1.0
+; SI-NEXT: v_rcp_f32_e32 v1, v0
+; SI-NEXT: v_div_scale_f32 v2, vcc, 1.0, s1, 1.0
+; SI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
+; SI-NEXT: v_fma_f32 v3, -v0, v1, 1.0
+; SI-NEXT: v_fma_f32 v1, v3, v1, v1
+; SI-NEXT: v_mul_f32_e32 v3, v2, v1
+; SI-NEXT: v_fma_f32 v4, -v0, v3, v2
+; SI-NEXT: v_fma_f32 v3, v4, v1, v3
+; SI-NEXT: v_fma_f32 v0, -v0, v3, v2
+; SI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
+; SI-NEXT: v_div_fmas_f32 v0, v0, v1, v3
+; SI-NEXT: v_div_fixup_f32 v0, v0, s1, 1.0
+; SI-NEXT: v_mad_f32 v0, v0, 0, 0
+; SI-NEXT: v_mov_b32_e32 v1, s0
+; SI-NEXT: v_cmp_ngt_f32_e32 vcc, s0, v0
+; SI-NEXT: v_cndmask_b32_e64 v0, -v0, v1, vcc
+; SI-NEXT: v_mov_b32_e32 v1, 0x7fc00000
+; SI-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0
+; SI-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
+; SI-NEXT: ; return to shader part epilog
;
-; VI-NSZ-LABEL: fneg_fadd_0_f32:
-; VI-NSZ: ; %bb.0: ; %.entry
-; VI-NSZ-NEXT: v_div_scale_f32 v0, s[2:3], s1, s1, 1.0
-; VI-NSZ-NEXT: v_div_scale_f32 v1, vcc, 1.0, s1, 1.0
-; VI-NSZ-NEXT: v_rcp_f32_e32 v2, v0
-; VI-NSZ-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
-; VI-NSZ-NEXT: v_fma_f32 v3, -v0, v2, 1.0
-; VI-NSZ-NEXT: v_fma_f32 v2, v3, v2, v2
-; VI-NSZ-NEXT: v_mul_f32_e32 v3, v1, v2
-; VI-NSZ-NEXT: v_fma_f32 v4, -v0, v3, v1
-; VI-NSZ-NEXT: v_fma_f32 v3, v4, v2, v3
-; VI-NSZ-NEXT: v_fma_f32 v0, -v0, v3, v1
-; VI-NSZ-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
-; VI-NSZ-NEXT: v_div_fmas_f32 v0, v0, v2, v3
-; VI-NSZ-NEXT: v_mov_b32_e32 v2, s0
-; VI-NSZ-NEXT: v_mov_b32_e32 v1, 0x7fc00000
-; VI-NSZ-NEXT: v_div_fixup_f32 v0, v0, s1, 1.0
-; VI-NSZ-NEXT: v_mul_f32_e32 v0, 0, v0
-; VI-NSZ-NEXT: v_cmp_ngt_f32_e32 vcc, s0, v0
-; VI-NSZ-NEXT: v_cndmask_b32_e64 v0, -v0, v2, vcc
-; VI-NSZ-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0
-; VI-NSZ-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
-; VI-NSZ-NEXT: ; return to shader part epilog
+; VI-LABEL: fneg_fadd_0_safe_f32:
+; VI: ; %bb.0: ; %.entry
+; VI-NEXT: v_div_scale_f32 v0, s[2:3], s1, s1, 1.0
+; VI-NEXT: v_div_scale_f32 v1, vcc, 1.0, s1, 1.0
+; VI-NEXT: v_rcp_f32_e32 v2, v0
+; VI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
+; VI-NEXT: v_fma_f32 v3, -v0, v2, 1.0
+; VI-NEXT: v_fma_f32 v2, v3, v2, v2
+; VI-NEXT: v_mul_f32_e32 v3, v1, v2
+; VI-NEXT: v_fma_f32 v4, -v0, v3, v1
+; VI-NEXT: v_fma_f32 v3, v4, v2, v3
+; VI-NEXT: v_fma_f32 v0, -v0, v3, v1
+; VI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
+; VI-NEXT: v_div_fmas_f32 v0, v0, v2, v3
+; VI-NEXT: v_mov_b32_e32 v2, s0
+; VI-NEXT: v_mov_b32_e32 v1, 0x7fc00000
+; VI-NEXT: v_div_fixup_f32 v0, v0, s1, 1.0
+; VI-NEXT: v_mad_f32 v0, v0, 0, 0
+; VI-NEXT: v_cmp_ngt_f32_e32 vcc, s0, v0
+; VI-NEXT: v_cndmask_b32_e64 v0, -v0, v2, vcc
+; VI-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0
+; VI-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
+; VI-NEXT: ; return to shader part epilog
.entry:
%tmp7 = fdiv float 1.000000e+00, %tmp6
%tmp8 = fmul float 0.000000e+00, %tmp7
@@ -289,35 +240,22 @@ define amdgpu_ps float @fneg_fadd_0_f32(float inreg %tmp2, float inreg %tmp6, <4
; function attribute unsafe-fp-math automatically. Combine with the previous test
; when that is done.
define amdgpu_ps float @fneg_fadd_0_nsz_f32(float inreg %tmp2, float inreg %tmp6, <4 x i32> %arg) #2 {
-; GCN-SAFE-LABEL: fneg_fadd_0_nsz_f32:
-; GCN-SAFE: ; %bb.0: ; %.entry
-; GCN-SAFE-NEXT: v_rcp_f32_e32 v0, s1
-; GCN-SAFE-NEXT: v_mov_b32_e32 v1, s0
-; GCN-SAFE-NEXT: v_mul_f32_e32 v0, 0, v0
-; GCN-SAFE-NEXT: v_add_f32_e32 v0, 0, v0
-; GCN-SAFE-NEXT: v_cmp_ngt_f32_e32 vcc, s0, v0
-; GCN-SAFE-NEXT: v_cndmask_b32_e64 v0, -v0, v1, vcc
-; GCN-SAFE-NEXT: v_mov_b32_e32 v1, 0x7fc00000
-; GCN-SAFE-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0
-; GCN-SAFE-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
-; GCN-SAFE-NEXT: ; return to shader part epilog
-;
-; GCN-NSZ-LABEL: fneg_fadd_0_nsz_f32:
-; GCN-NSZ: ; %bb.0: ; %.entry
-; GCN-NSZ-NEXT: v_rcp_f32_e32 v0, s1
-; GCN-NSZ-NEXT: v_mov_b32_e32 v1, s0
-; GCN-NSZ-NEXT: v_mul_f32_e32 v0, 0, v0
-; GCN-NSZ-NEXT: v_cmp_ngt_f32_e32 vcc, s0, v0
-; GCN-NSZ-NEXT: v_cndmask_b32_e64 v0, -v0, v1, vcc
-; GCN-NSZ-NEXT: v_mov_b32_e32 v1, 0x7fc00000
-; GCN-NSZ-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0
-; GCN-NSZ-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
-; GCN-NSZ-NEXT: ; return to shader part epilog
+; GCN-LABEL: fneg_fadd_0_nsz_f32:
+; GCN: ; %bb.0: ; %.entry
+; GCN-NEXT: v_rcp_f32_e32 v0, s1
+; GCN-NEXT: v_mov_b32_e32 v1, s0
+; GCN-NEXT: v_mul_f32_e32 v0, 0, v0
+; GCN-NEXT: v_cmp_ngt_f32_e32 vcc, s0, v0
+; GCN-NEXT: v_cndmask_b32_e64 v0, -v0, v1, vcc
+; GCN-NEXT: v_mov_b32_e32 v1, 0x7fc00000
+; GCN-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0
+; GCN-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
+; GCN-NEXT: ; return to shader part epilog
.entry:
%tmp7 = fdiv afn float 1.000000e+00, %tmp6
%tmp8 = fmul float 0.000000e+00, %tmp7
%tmp9 = fmul reassoc nnan arcp contract float 0.000000e+00, %tmp8
- %.i188 = fadd float %tmp9, 0.000000e+00
+ %.i188 = fadd nsz float %tmp9, 0.000000e+00
%tmp10 = fcmp uge float %.i188, %tmp2
%tmp11 = fneg float %.i188
%.i092 = select i1 %tmp10, float %tmp2, float %tmp11
@@ -569,8 +507,6 @@ define amdgpu_ps double @fneg_fadd_0_f64(double inreg %tmp2, double inreg %tmp6,
; SI-NSZ-LABEL: fneg_fadd_0_f64:
; SI-NSZ: ; %bb.0: ; %.entry
; SI-NSZ-NEXT: v_div_scale_f64 v[0:1], s[4:5], s[2:3], s[2:3], 1.0
-; SI-NSZ-NEXT: s_mov_b32 s4, 0
-; SI-NSZ-NEXT: s_brev_b32 s5, 1
; SI-NSZ-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
; SI-NSZ-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
; SI-NSZ-NEXT: v_fma_f64 v[2:3], v[2:3], v[4:5], v[2:3]
@@ -583,7 +519,10 @@ define amdgpu_ps double @fneg_fadd_0_f64(double inreg %tmp2, double inreg %tmp6,
; SI-NSZ-NEXT: v_mov_b32_e32 v2, s1
; SI-NSZ-NEXT: v_mov_b32_e32 v3, s0
; SI-NSZ-NEXT: v_div_fixup_f64 v[0:1], v[0:1], s[2:3], 1.0
-; SI-NSZ-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5]
+; SI-NSZ-NEXT: s_mov_b32 s2, 0
+; SI-NSZ-NEXT: v_mul_f64 v[0:1], v[0:1], 0
+; SI-NSZ-NEXT: s_brev_b32 s3, 1
+; SI-NSZ-NEXT: v_fma_f64 v[0:1], v[0:1], s[2:3], s[2:3]
; SI-NSZ-NEXT: v_cmp_nlt_f64_e64 vcc, -v[0:1], s[0:1]
; SI-NSZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
; SI-NSZ-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
@@ -637,7 +576,8 @@ define amdgpu_ps double @fneg_fadd_0_f64(double inreg %tmp2, double inreg %tmp6,
; VI-NSZ-NEXT: v_div_fixup_f64 v[0:1], v[0:1], s[2:3], 1.0
; VI-NSZ-NEXT: s_mov_b32 s2, 0
; VI-NSZ-NEXT: s_brev_b32 s3, 1
-; VI-NSZ-NEXT: v_mul_f64 v[0:1], v[0:1], s[2:3]
+; VI-NSZ-NEXT: v_mul_f64 v[0:1], v[0:1], 0
+; VI-NSZ-NEXT: v_fma_f64 v[0:1], v[0:1], s[2:3], s[2:3]
; VI-NSZ-NEXT: v_cmp_nlt_f64_e64 vcc, -v[0:1], s[0:1]
; VI-NSZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
; VI-NSZ-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
@@ -663,102 +603,56 @@ define amdgpu_ps double @fneg_fadd_0_f64(double inreg %tmp2, double inreg %tmp6,
; function attribute unsafe-fp-math automatically. Combine with the previous test
; when that is done.
define amdgpu_ps double @fneg_fadd_0_nsz_f64(double inreg %tmp2, double inreg %tmp6, <4 x i32> %arg) #2 {
-; SI-SAFE-LABEL: fneg_fadd_0_nsz_f64:
-; SI-SAFE: ; %bb.0: ; %.entry
-; SI-SAFE-NEXT: v_rcp_f64_e32 v[0:1], s[2:3]
-; SI-SAFE-NEXT: v_fma_f64 v[2:3], -s[2:3], v[0:1], 1.0
-; SI-SAFE-NEXT: v_fma_f64 v[0:1], v[2:3], v[0:1], v[0:1]
-; SI-SAFE-NEXT: v_fma_f64 v[2:3], -s[2:3], v[0:1], 1.0
-; SI-SAFE-NEXT: v_fma_f64 v[0:1], v[2:3], v[0:1], v[0:1]
-; SI-SAFE-NEXT: v_fma_f64 v[2:3], -s[2:3], v[0:1], 1.0
-; SI-SAFE-NEXT: v_fma_f64 v[0:1], v[2:3], v[0:1], v[0:1]
-; SI-SAFE-NEXT: v_mov_b32_e32 v2, s1
-; SI-SAFE-NEXT: v_mul_f64 v[0:1], v[0:1], 0
-; SI-SAFE-NEXT: v_mov_b32_e32 v3, s0
-; SI-SAFE-NEXT: v_add_f64 v[0:1], v[0:1], 0
-; SI-SAFE-NEXT: v_cmp_ngt_f64_e32 vcc, s[0:1], v[0:1]
-; SI-SAFE-NEXT: v_xor_b32_e32 v4, 0x80000000, v1
-; SI-SAFE-NEXT: v_cndmask_b32_e32 v1, v4, v2, vcc
-; SI-SAFE-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
-; SI-SAFE-NEXT: v_cmp_nlt_f64_e32 vcc, 0, v[0:1]
-; SI-SAFE-NEXT: s_and_b64 s[0:1], vcc, exec
-; SI-SAFE-NEXT: s_cselect_b32 s1, 0, 0x7ff80000
-; SI-SAFE-NEXT: s_mov_b32 s0, 0
-; SI-SAFE-NEXT: ; return to shader part epilog
-;
-; SI-NSZ-LABEL: fneg_fadd_0_nsz_f64:
-; SI-NSZ: ; %bb.0: ; %.entry
-; SI-NSZ-NEXT: v_rcp_f64_e32 v[0:1], s[2:3]
-; SI-NSZ-NEXT: v_fma_f64 v[2:3], -s[2:3], v[0:1], 1.0
-; SI-NSZ-NEXT: v_fma_f64 v[0:1], v[2:3], v[0:1], v[0:1]
-; SI-NSZ-NEXT: v_fma_f64 v[2:3], -s[2:3], v[0:1], 1.0
-; SI-NSZ-NEXT: v_fma_f64 v[0:1], v[2:3], v[0:1], v[0:1]
-; SI-NSZ-NEXT: v_fma_f64 v[2:3], -s[2:3], v[0:1], 1.0
-; SI-NSZ-NEXT: s_mov_b32 s2, 0
-; SI-NSZ-NEXT: v_fma_f64 v[0:1], v[2:3], v[0:1], v[0:1]
-; SI-NSZ-NEXT: s_brev_b32 s3, 1
-; SI-NSZ-NEXT: v_mul_f64 v[0:1], v[0:1], s[2:3]
-; SI-NSZ-NEXT: v_mov_b32_e32 v2, s1
-; SI-NSZ-NEXT: v_cmp_nlt_f64_e64 vcc, -v[0:1], s[0:1]
-; SI-NSZ-NEXT: v_mov_b32_e32 v3, s0
-; SI-NSZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
-; SI-NSZ-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
-; SI-NSZ-NEXT: v_cmp_nlt_f64_e32 vcc, 0, v[0:1]
-; SI-NSZ-NEXT: s_and_b64 s[0:1], vcc, exec
-; SI-NSZ-NEXT: s_cselect_b32 s1, 0, 0x7ff80000
-; SI-NSZ-NEXT: s_mov_b32 s0, 0
-; SI-NSZ-NEXT: ; return to shader part epilog
-;
-; VI-SAFE-LABEL: fneg_fadd_0_nsz_f64:
-; VI-SAFE: ; %bb.0: ; %.entry
-; VI-SAFE-NEXT: v_rcp_f64_e32 v[0:1], s[2:3]
-; VI-SAFE-NEXT: v_mov_b32_e32 v4, s0
-; VI-SAFE-NEXT: v_fma_f64 v[2:3], -s[2:3], v[0:1], 1.0
-; VI-SAFE-NEXT: v_fma_f64 v[0:1], v[2:3], v[0:1], v[0:1]
-; VI-SAFE-NEXT: v_fma_f64 v[2:3], -s[2:3], v[0:1], 1.0
-; VI-SAFE-NEXT: v_fma_f64 v[0:1], v[2:3], v[0:1], v[0:1]
-; VI-SAFE-NEXT: v_fma_f64 v[2:3], -s[2:3], v[0:1], 1.0
-; VI-SAFE-NEXT: v_fma_f64 v[0:1], v[2:3], v[0:1], v[0:1]
-; VI-SAFE-NEXT: v_mov_b32_e32 v2, s1
-; VI-SAFE-NEXT: v_mul_f64 v[0:1], v[0:1], 0
-; VI-SAFE-NEXT: v_add_f64 v[0:1], v[0:1], 0
-; VI-SAFE-NEXT: v_cmp_ngt_f64_e32 vcc, s[0:1], v[0:1]
-; VI-SAFE-NEXT: v_xor_b32_e32 v3, 0x80000000, v1
-; VI-SAFE-NEXT: v_cndmask_b32_e32 v1, v3, v2, vcc
-; VI-SAFE-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
-; VI-SAFE-NEXT: v_cmp_nlt_f64_e32 vcc, 0, v[0:1]
-; VI-SAFE-NEXT: s_and_b64 s[0:1], vcc, exec
-; VI-SAFE-NEXT: s_cselect_b32 s1, 0, 0x7ff80000
-; VI-SAFE-NEXT: s_mov_b32 s0, 0
-; VI-SAFE-NEXT: ; return to shader part epilog
+; SI-LABEL: fneg_fadd_0_nsz_f64:
+; SI: ; %bb.0: ; %.entry
+; SI-NEXT: v_rcp_f64_e32 v[0:1], s[2:3]
+; SI-NEXT: v_fma_f64 v[2:3], -s[2:3], v[0:1], 1.0
+; SI-NEXT: v_fma_f64 v[0:1], v[2:3], v[0:1], v[0:1]
+; SI-NEXT: v_fma_f64 v[2:3], -s[2:3], v[0:1], 1.0
+; SI-NEXT: v_fma_f64 v[0:1], v[2:3], v[0:1], v[0:1]
+; SI-NEXT: v_fma_f64 v[2:3], -s[2:3], v[0:1], 1.0
+; SI-NEXT: s_mov_b32 s2, 0
+; SI-NEXT: v_fma_f64 v[0:1], v[2:3], v[0:1], v[0:1]
+; SI-NEXT: s_brev_b32 s3, 1
+; SI-NEXT: v_mul_f64 v[0:1], v[0:1], s[2:3]
+; SI-NEXT: v_mov_b32_e32 v2, s1
+; SI-NEXT: v_cmp_nlt_f64_e64 vcc, -v[0:1], s[0:1]
+; SI-NEXT: v_mov_b32_e32 v3, s0
+; SI-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
+; SI-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
+; SI-NEXT: v_cmp_nlt_f64_e32 vcc, 0, v[0:1]
+; SI-NEXT: s_and_b64 s[0:1], vcc, exec
+; SI-NEXT: s_cselect_b32 s1, 0, 0x7ff80000
+; SI-NEXT: s_mov_b32 s0, 0
+; SI-NEXT: ; return to shader part epilog
;
-; VI-NSZ-LABEL: fneg_fadd_0_nsz_f64:
-; VI-NSZ: ; %bb.0: ; %.entry
-; VI-NSZ-NEXT: v_rcp_f64_e32 v[0:1], s[2:3]
-; VI-NSZ-NEXT: v_fma_f64 v[2:3], -s[2:3], v[0:1], 1.0
-; VI-NSZ-NEXT: v_fma_f64 v[0:1], v[2:3], v[0:1], v[0:1]
-; VI-NSZ-NEXT: v_fma_f64 v[2:3], -s[2:3], v[0:1], 1.0
-; VI-NSZ-NEXT: v_fma_f64 v[0:1], v[2:3], v[0:1], v[0:1]
-; VI-NSZ-NEXT: v_fma_f64 v[2:3], -s[2:3], v[0:1], 1.0
-; VI-NSZ-NEXT: s_mov_b32 s2, 0
-; VI-NSZ-NEXT: s_brev_b32 s3, 1
-; VI-NSZ-NEXT: v_fma_f64 v[0:1], v[2:3], v[0:1], v[0:1]
-; VI-NSZ-NEXT: v_mov_b32_e32 v2, s1
-; VI-NSZ-NEXT: v_mov_b32_e32 v3, s0
-; VI-NSZ-NEXT: v_mul_f64 v[0:1], v[0:1], s[2:3]
-; VI-NSZ-NEXT: v_cmp_nlt_f64_e64 vcc, -v[0:1], s[0:1]
-; VI-NSZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
-; VI-NSZ-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
-; VI-NSZ-NEXT: v_cmp_nlt_f64_e32 vcc, 0, v[0:1]
-; VI-NSZ-NEXT: s_and_b64 s[0:1], vcc, exec
-; VI-NSZ-NEXT: s_cselect_b32 s1, 0, 0x7ff80000
-; VI-NSZ-NEXT: s_mov_b32 s0, 0
-; VI-NSZ-NEXT: ; return to shader part epilog
+; VI-LABEL: fneg_fadd_0_nsz_f64:
+; VI: ; %bb.0: ; %.entry
+; VI-NEXT: v_rcp_f64_e32 v[0:1], s[2:3]
+; VI-NEXT: v_fma_f64 v[2:3], -s[2:3], v[0:1], 1.0
+; VI-NEXT: v_fma_f64 v[0:1], v[2:3], v[0:1], v[0:1]
+; VI-NEXT: v_fma_f64 v[2:3], -s[2:3], v[0:1], 1.0
+; VI-NEXT: v_fma_f64 v[0:1], v[2:3], v[0:1], v[0:1]
+; VI-NEXT: v_fma_f64 v[2:3], -s[2:3], v[0:1], 1.0
+; VI-NEXT: s_mov_b32 s2, 0
+; VI-NEXT: s_brev_b32 s3, 1
+; VI-NEXT: v_fma_f64 v[0:1], v[2:3], v[0:1], v[0:1]
+; VI-NEXT: v_mov_b32_e32 v2, s1
+; VI-NEXT: v_mov_b32_e32 v3, s0
+; VI-NEXT: v_mul_f64 v[0:1], v[0:1], s[2:3]
+; VI-NEXT: v_cmp_nlt_f64_e64 vcc, -v[0:1], s[0:1]
+; VI-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
+; VI-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
+; VI-NEXT: v_cmp_nlt_f64_e32 vcc, 0, v[0:1]
+; VI-NEXT: s_and_b64 s[0:1], vcc, exec
+; VI-NEXT: s_cselect_b32 s1, 0, 0x7ff80000
+; VI-NEXT: s_mov_b32 s0, 0
+; VI-NEXT: ; return to shader part epilog
.entry:
%tmp7 = fdiv afn double 1.000000e+00, %tmp6
%tmp8 = fmul double 0.000000e+00, %tmp7
%tmp9 = fmul reassoc nnan arcp contract double 0.000000e+00, %tmp8
- %.i188 = fadd double %tmp9, 0.000000e+00
+ %.i188 = fadd nsz double %tmp9, 0.000000e+00
%tmp10 = fcmp uge double %.i188, %tmp2
%tmp11 = fneg double %.i188
%.i092 = select i1 %tmp10, double %tmp2, double %tmp11
diff --git a/llvm/test/CodeGen/X86/fadd-combines.ll b/llvm/test/CodeGen/X86/fadd-combines.ll
index 1082177e3da19..2c06c538ae10d 100644
--- a/llvm/test/CodeGen/X86/fadd-combines.ll
+++ b/llvm/test/CodeGen/X86/fadd-combines.ll
@@ -5,7 +5,7 @@ define float @fadd_zero_f32(float %x) #0 {
; CHECK-LABEL: fadd_zero_f32:
; CHECK: # %bb.0:
; CHECK-NEXT: retq
- %y = fadd float %x, 0.0
+ %y = fadd nsz float %x, 0.0
ret float %y
}
@@ -13,7 +13,7 @@ define <4 x float> @fadd_zero_4f32(<4 x float> %x) #0 {
; CHECK-LABEL: fadd_zero_4f32:
; CHECK: # %bb.0:
; CHECK-NEXT: retq
- %y = fadd <4 x float> %x, zeroinitializer
+ %y = fadd nsz <4 x float> %x, zeroinitializer
ret <4 x float> %y
}
@@ -31,8 +31,8 @@ define float @fadd_2const_f32(float %x) #0 {
; CHECK: # %bb.0:
; CHECK-NEXT: addss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: retq
- %y = fadd float %x, 1.0
- %z = fadd float %y, 2.0
+ %y = fadd reassoc nsz float %x, 1.0
+ %z = fadd reassoc nsz float %y, 2.0
ret float %z
}
@@ -45,8 +45,8 @@ define <4 x float> @fadd_2const_4f32(<4 x float> %x) #0 {
; CHECK: # %bb.0:
; CHECK-NEXT: addps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: retq
- %y = fadd <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>
- %z = fadd <4 x float> %y, <float 4.0, float 3.0, float 2.0, float 1.0>
+ %y = fadd reassoc nsz <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>
+ %z = fadd reassoc nsz <4 x float> %y, <float 4.0, float 3.0, float 2.0, float 1.0>
ret <4 x float> %z
}
@@ -56,8 +56,8 @@ define float @fadd_x_fmul_x_c_f32(float %x) #0 {
; CHECK: # %bb.0:
; CHECK-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: retq
- %y = fmul float %x, 2.0
- %z = fadd float %x, %y
+ %y = fmul reassoc nsz float %x, 2.0
+ %z = fadd reassoc nsz float %x, %y
ret float %z
}
@@ -70,8 +70,8 @@ define <4 x float> @fadd_x_fmul_x_c_4f32(<4 x float> %x) #0 {
; CHECK: # %bb.0:
; CHECK-NEXT: mulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: retq
- %y = fmul <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>
- %z = fadd <4 x float> %x, %y
+ %y = fmul reassoc nsz <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>
+ %z = fadd reassoc nsz <4 x float> %x, %y
ret <4 x float> %z
}
@@ -81,8 +81,8 @@ define float @fadd_fmul_x_c_x_f32(float %x) #0 {
; CHECK: # %bb.0:
; CHECK-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: retq
- %y = fmul float %x, 2.0
- %z = fadd float %y, %x
+ %y = fmul reassoc nsz float %x, 2.0
+ %z = fadd reassoc nsz float %y, %x
ret float %z
}
@@ -95,8 +95,8 @@ define <4 x float> @fadd_fmul_x_c_x_4f32(<4 x float> %x) #0 {
; CHECK: # %bb.0:
; CHECK-NEXT: mulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: retq
- %y = fmul <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>
- %z = fadd <4 x float> %y, %x
+ %y = fmul reassoc nsz <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>
+ %z = fadd reassoc nsz <4 x float> %y, %x
ret <4 x float> %z
}
@@ -106,9 +106,9 @@ define float @fadd_fadd_x_x_fmul_x_c_f32(float %x) #0 {
; CHECK: # %bb.0:
; CHECK-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: retq
- %y = fadd float %x, %x
- %z = fmul float %x, 2.0
- %w = fadd float %y, %z
+ %y = fadd reassoc nsz float %x, %x
+ %z = fmul reassoc nsz float %x, 2.0
+ %w = fadd reassoc nsz float %y, %z
ret float %w
}
@@ -121,9 +121,9 @@ define <4 x float> @fadd_fadd_x_x_fmul_x_c_4f32(<4 x float> %x) #0 {
; CHECK: # %bb.0:
; CHECK-NEXT: mulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: retq
- %y = fadd <4 x float> %x, %x
- %z = fmul <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>
- %w = fadd <4 x float> %y, %z
+ %y = fadd reassoc nsz <4 x float> %x, %x
+ %z = fmul reassoc nsz <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>
+ %w = fadd reassoc nsz <4 x float> %y, %z
ret <4 x float> %w
}
@@ -133,9 +133,9 @@ define float @fadd_fmul_x_c_fadd_x_x_f32(float %x) #0 {
; CHECK: # %bb.0:
; CHECK-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: retq
- %y = fadd float %x, %x
- %z = fmul float %x, 2.0
- %w = fadd float %z, %y
+ %y = fadd reassoc nsz float %x, %x
+ %z = fmul reassoc nsz float %x, 2.0
+ %w = fadd reassoc nsz float %z, %y
ret float %w
}
@@ -148,9 +148,9 @@ define <4 x float> @fadd_fmul_x_c_fadd_x_x_4f32(<4 x float> %x) #0 {
; CHECK: # %bb.0:
; CHECK-NEXT: mulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: retq
- %y = fadd <4 x float> %x, %x
- %z = fmul <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>
- %w = fadd <4 x float> %z, %y
+ %y = fadd reassoc nsz <4 x float> %x, %x
+ %z = fmul reassoc nsz <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>
+ %w = fadd reassoc nsz <4 x float> %z, %y
ret <4 x float> %w
}
@@ -160,8 +160,8 @@ define float @fadd_x_fadd_x_x_f32(float %x) #0 {
; CHECK: # %bb.0:
; CHECK-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: retq
- %y = fadd float %x, %x
- %z = fadd float %x, %y
+ %y = fadd reassoc nsz float %x, %x
+ %z = fadd reassoc nsz float %x, %y
ret float %z
}
@@ -174,8 +174,8 @@ define <4 x float> @fadd_x_fadd_x_x_4f32(<4 x float> %x) #0 {
; CHECK: # %bb.0:
; CHECK-NEXT: mulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: retq
- %y = fadd <4 x float> %x, %x
- %z = fadd <4 x float> %x, %y
+ %y = fadd reassoc nsz <4 x float> %x, %x
+ %z = fadd reassoc nsz <4 x float> %x, %y
ret <4 x float> %z
}
@@ -185,8 +185,8 @@ define float @fadd_fadd_x_x_x_f32(float %x) #0 {
; CHECK: # %bb.0:
; CHECK-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: retq
- %y = fadd float %x, %x
- %z = fadd float %y, %x
+ %y = fadd reassoc nsz float %x, %x
+ %z = fadd reassoc nsz float %y, %x
ret float %z
}
@@ -199,8 +199,8 @@ define <4 x float> @fadd_fadd_x_x_x_4f32(<4 x float> %x) #0 {
; CHECK: # %bb.0:
; CHECK-NEXT: mulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: retq
- %y = fadd <4 x float> %x, %x
- %z = fadd <4 x float> %y, %x
+ %y = fadd reassoc nsz <4 x float> %x, %x
+ %z = fadd reassoc nsz <4 x float> %y, %x
ret <4 x float> %z
}
@@ -210,8 +210,8 @@ define float @fadd_fadd_x_x_fadd_x_x_f32(float %x) #0 {
; CHECK: # %bb.0:
; CHECK-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: retq
- %y = fadd float %x, %x
- %z = fadd float %y, %y
+ %y = fadd reassoc nsz float %x, %x
+ %z = fadd reassoc nsz float %y, %y
ret float %z
}
@@ -224,8 +224,8 @@ define <4 x float> @fadd_fadd_x_x_fadd_x_x_4f32(<4 x float> %x) #0 {
; CHECK: # %bb.0:
; CHECK-NEXT: mulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: retq
- %y = fadd <4 x float> %x, %x
- %z = fadd <4 x float> %y, %y
+ %y = fadd reassoc nsz <4 x float> %x, %x
+ %z = fadd reassoc nsz <4 x float> %y, %y
ret <4 x float> %z
}
@@ -241,9 +241,9 @@ define float @fadd_const_multiuse_attr(float %x) #0 {
; CHECK-NEXT: addss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: addss %xmm1, %xmm0
; CHECK-NEXT: retq
- %a1 = fadd float %x, 42.0
- %a2 = fadd float %a1, 17.0
- %a3 = fadd float %a1, %a2
+ %a1 = fadd reassoc nsz float %x, 42.0
+ %a2 = fadd reassoc nsz float %a1, 17.0
+ %a3 = fadd reassoc nsz float %a1, %a2
ret float %a3
}
@@ -275,4 +275,4 @@ define <2 x double> @fmul2_negated_vec(<2 x double> %a, <2 x double> %b, <2 x do
ret <2 x double> %sub
}
-attributes #0 = { "less-precise-fpmad"="true" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "unsafe-fp-math"="true" "no-signed-zeros-fp-math"="true" }
+attributes #0 = { "less-precise-fpmad"="true" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "unsafe-fp-math"="true" }
More information about the llvm-commits
mailing list