[llvm] [TargetLowering] Remove NoSignedZerosFPMath uses (PR #160975)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 26 22:45:41 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-selectiondag
Author: None (paperchalice)
<details>
<summary>Changes</summary>
---
Patch is 45.91 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/160975.diff
6 Files Affected:
- (modified) llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp (+5-7)
- (modified) llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.v2f16.ll (+255-86)
- (modified) llvm/test/CodeGen/AMDGPU/v_mac.ll (+4-5)
- (modified) llvm/test/CodeGen/AMDGPU/v_mac_f16.ll (+7-7)
- (modified) llvm/test/CodeGen/PowerPC/scalar_cmp.ll (+156-70)
- (modified) llvm/test/CodeGen/X86/negative-sin.ll (+5-10)
``````````diff
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 4145c8a54a6fe..db3b21b4f83f4 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -7492,7 +7492,6 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
// Pre-increment recursion depth for use in recursive calls.
++Depth;
const SDNodeFlags Flags = Op->getFlags();
- const TargetOptions &Options = DAG.getTarget().Options;
EVT VT = Op.getValueType();
unsigned Opcode = Op.getOpcode();
@@ -7572,7 +7571,7 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
return DAG.getBuildVector(VT, DL, Ops);
}
case ISD::FADD: {
- if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
+ if (!Flags.hasNoSignedZeros())
break;
// After operation legalization, it might not be legal to create new FSUBs.
@@ -7617,7 +7616,7 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
}
case ISD::FSUB: {
// We can't turn -(A-B) into B-A when we honor signed zeros.
- if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
+ if (!Flags.hasNoSignedZeros())
break;
SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
@@ -7678,7 +7677,7 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
}
case ISD::FMA:
case ISD::FMAD: {
- if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
+ if (!Flags.hasNoSignedZeros())
break;
SDValue X = Op.getOperand(0), Y = Op.getOperand(1), Z = Op.getOperand(2);
@@ -8797,7 +8796,6 @@ SDValue TargetLowering::expandFMINIMUMNUM_FMAXIMUMNUM(SDNode *Node,
EVT VT = Node->getValueType(0);
EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
bool IsMax = Opc == ISD::FMAXIMUMNUM;
- const TargetOptions &Options = DAG.getTarget().Options;
SDNodeFlags Flags = Node->getFlags();
unsigned NewOp =
@@ -8856,8 +8854,8 @@ SDValue TargetLowering::expandFMINIMUMNUM_FMAXIMUMNUM(SDNode *Node,
// TODO: We need quiet sNaN if strictfp.
// Fixup signed zero behavior.
- if (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros() ||
- DAG.isKnownNeverZeroFloat(LHS) || DAG.isKnownNeverZeroFloat(RHS)) {
+ if (Flags.hasNoSignedZeros() || DAG.isKnownNeverZeroFloat(LHS) ||
+ DAG.isKnownNeverZeroFloat(RHS)) {
return MinMax;
}
SDValue TestZero =
diff --git a/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.v2f16.ll b/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.v2f16.ll
index 92d3277d5d3e3..bb22144b815a1 100644
--- a/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.v2f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.v2f16.ll
@@ -4148,28 +4148,28 @@ define <2 x half> @mul_select_negk_negfabs_v2f16(<2 x i32> %c, <2 x half> %x, <2
; --------------------------------------------------------------------------------
define <2 x half> @select_fneg_posk_src_add_v2f16(<2 x i32> %c, <2 x half> %x, <2 x half> %y) {
-; CI-SAFE-LABEL: select_fneg_posk_src_add_v2f16:
-; CI-SAFE: ; %bb.0:
-; CI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v3, v3
-; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v2, v2
-; CI-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v3, v3
-; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v2, v2
-; CI-SAFE-NEXT: v_add_f32_e32 v3, 4.0, v3
-; CI-SAFE-NEXT: v_add_f32_e32 v2, 4.0, v2
-; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v3, v3
-; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v2, v2
-; CI-SAFE-NEXT: v_lshlrev_b32_e32 v3, 16, v3
-; CI-SAFE-NEXT: v_or_b32_e32 v2, v2, v3
-; CI-SAFE-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
-; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v3, v2
-; CI-SAFE-NEXT: v_lshrrev_b32_e32 v2, 16, v2
-; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v2, v2
-; CI-SAFE-NEXT: v_cndmask_b32_e32 v0, 2.0, v3, vcc
-; CI-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; CI-SAFE-NEXT: v_cndmask_b32_e32 v1, 2.0, v2, vcc
-; CI-SAFE-NEXT: s_setpc_b64 s[30:31]
+; CI-LABEL: select_fneg_posk_src_add_v2f16:
+; CI: ; %bb.0:
+; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
+; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
+; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; CI-NEXT: v_cvt_f32_f16_e32 v3, v3
+; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; CI-NEXT: v_add_f32_e32 v3, 4.0, v3
+; CI-NEXT: v_add_f32_e32 v2, 4.0, v2
+; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
+; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
+; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3
+; CI-NEXT: v_or_b32_e32 v2, v2, v3
+; CI-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
+; CI-NEXT: v_cvt_f32_f16_e32 v3, v2
+; CI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
+; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; CI-NEXT: v_cndmask_b32_e32 v0, 2.0, v3, vcc
+; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; CI-NEXT: v_cndmask_b32_e32 v1, 2.0, v2, vcc
+; CI-NEXT: s_setpc_b64 s[30:31]
;
; VI-SAFE-LABEL: select_fneg_posk_src_add_v2f16:
; VI-SAFE: ; %bb.0:
@@ -4229,21 +4229,6 @@ define <2 x half> @select_fneg_posk_src_add_v2f16(<2 x i32> %c, <2 x half> %x, <
; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; CI-NSZ-LABEL: select_fneg_posk_src_add_v2f16:
-; CI-NSZ: ; %bb.0:
-; CI-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CI-NSZ-NEXT: v_cvt_f16_f32_e32 v2, v2
-; CI-NSZ-NEXT: v_cvt_f16_f32_e32 v3, v3
-; CI-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; CI-NSZ-NEXT: v_cvt_f32_f16_e32 v2, v2
-; CI-NSZ-NEXT: v_cvt_f32_f16_e32 v3, v3
-; CI-NSZ-NEXT: v_sub_f32_e32 v2, -4.0, v2
-; CI-NSZ-NEXT: v_sub_f32_e32 v3, -4.0, v3
-; CI-NSZ-NEXT: v_cndmask_b32_e32 v0, 2.0, v2, vcc
-; CI-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; CI-NSZ-NEXT: v_cndmask_b32_e32 v1, 2.0, v3, vcc
-; CI-NSZ-NEXT: s_setpc_b64 s[30:31]
-;
; VI-NSZ-LABEL: select_fneg_posk_src_add_v2f16:
; VI-NSZ: ; %bb.0:
; VI-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4302,6 +4287,105 @@ define <2 x half> @select_fneg_posk_src_add_v2f16(<2 x i32> %c, <2 x half> %x, <
ret <2 x half> %select
}
+define <2 x half> @select_fneg_posk_src_add_v2f16_nsz(<2 x i32> %c, <2 x half> %x, <2 x half> %y) {
+; CI-LABEL: select_fneg_posk_src_add_v2f16_nsz:
+; CI: ; %bb.0:
+; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
+; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
+; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; CI-NEXT: v_cvt_f32_f16_e32 v3, v3
+; CI-NEXT: v_sub_f32_e32 v2, -4.0, v2
+; CI-NEXT: v_sub_f32_e32 v3, -4.0, v3
+; CI-NEXT: v_cndmask_b32_e32 v0, 2.0, v2, vcc
+; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; CI-NEXT: v_cndmask_b32_e32 v1, 2.0, v3, vcc
+; CI-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-LABEL: select_fneg_posk_src_add_v2f16_nsz:
+; VI: ; %bb.0:
+; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; VI-NEXT: v_mov_b32_e32 v1, 0xc400
+; VI-NEXT: v_sub_f16_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; VI-NEXT: v_sub_f16_e32 v2, -4.0, v2
+; VI-NEXT: v_mov_b32_e32 v3, 0x4000
+; VI-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
+; VI-NEXT: v_cndmask_b32_e64 v0, v3, v2, s[4:5]
+; VI-NEXT: v_cndmask_b32_sdwa v1, v3, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: select_fneg_posk_src_add_v2f16_nsz:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; GFX9-NEXT: v_pk_add_f16 v1, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
+; GFX9-NEXT: v_mov_b32_e32 v2, 0x4000
+; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
+; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: s_mov_b32 s4, 0x5040100
+; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-TRUE16-LABEL: select_fneg_posk_src_add_v2f16_nsz:
+; GFX11-SAFE-TRUE16: ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
+; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
+; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: select_fneg_posk_src_add_v2f16_nsz:
+; GFX11-SAFE-FAKE16: ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v2, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
+; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
+; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-TRUE16-LABEL: select_fneg_posk_src_add_v2f16_nsz:
+; GFX11-NSZ-TRUE16: ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
+; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
+; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: select_fneg_posk_src_add_v2f16_nsz:
+; GFX11-NSZ-FAKE16: ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v2, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
+; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
+; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+ %cmp = icmp eq <2 x i32> %c, zeroinitializer
+ %add = fadd nsz <2 x half> %x, <half 4.0, half 4.0>
+ %fneg = fneg <2 x half> %add
+ %select = select <2 x i1> %cmp, <2 x half> %fneg, <2 x half> <half 2.0, half 2.0>
+ ret <2 x half> %select
+}
+
define <2 x half> @select_fneg_posk_src_sub_v2f16(<2 x i32> %c, <2 x half> %x) {
; CI-SAFE-LABEL: select_fneg_posk_src_sub_v2f16:
; CI-SAFE: ; %bb.0:
@@ -4704,34 +4788,34 @@ define <2 x half> @select_fneg_posk_src_fma_v2f16(<2 x i32> %c, <2 x half> %x, <
}
define <2 x half> @select_fneg_posk_src_fmad_v2f16(<2 x i32> %c, <2 x half> %x, <2 x half> %z) {
-; CI-SAFE-LABEL: select_fneg_posk_src_fmad_v2f16:
-; CI-SAFE: ; %bb.0:
-; CI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v3, v3
-; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v5, v5
-; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v2, v2
-; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v4, v4
-; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v3, v3
-; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v5, v5
-; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v2, v2
-; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v4, v4
-; CI-SAFE-NEXT: v_mul_f32_e32 v3, 4.0, v3
-; CI-SAFE-NEXT: v_add_f32_e32 v3, v3, v5
-; CI-SAFE-NEXT: v_mul_f32_e32 v2, 4.0, v2
-; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v3, v3
-; CI-SAFE-NEXT: v_add_f32_e32 v2, v2, v4
-; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v2, v2
-; CI-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; CI-SAFE-NEXT: v_lshlrev_b32_e32 v3, 16, v3
-; CI-SAFE-NEXT: v_or_b32_e32 v2, v2, v3
-; CI-SAFE-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
-; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v3, v2
-; CI-SAFE-NEXT: v_lshrrev_b32_e32 v2, 16, v2
-; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v2, v2
-; CI-SAFE-NEXT: v_cndmask_b32_e32 v0, 2.0, v3, vcc
-; CI-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; CI-SAFE-NEXT: v_cndmask_b32_e32 v1, 2.0, v2, vcc
-; CI-SAFE-NEXT: s_setpc_b64 s[30:31]
+; CI-LABEL: select_fneg_posk_src_fmad_v2f16:
+; CI: ; %bb.0:
+; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
+; CI-NEXT: v_cvt_f16_f32_e32 v5, v5
+; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
+; CI-NEXT: v_cvt_f16_f32_e32 v4, v4
+; CI-NEXT: v_cvt_f32_f16_e32 v3, v3
+; CI-NEXT: v_cvt_f32_f16_e32 v5, v5
+; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; CI-NEXT: v_cvt_f32_f16_e32 v4, v4
+; CI-NEXT: v_mul_f32_e32 v3, 4.0, v3
+; CI-NEXT: v_add_f32_e32 v3, v3, v5
+; CI-NEXT: v_mul_f32_e32 v2, 4.0, v2
+; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
+; CI-NEXT: v_add_f32_e32 v2, v2, v4
+; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
+; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3
+; CI-NEXT: v_or_b32_e32 v2, v2, v3
+; CI-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
+; CI-NEXT: v_cvt_f32_f16_e32 v3, v2
+; CI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
+; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; CI-NEXT: v_cndmask_b32_e32 v0, 2.0, v3, vcc
+; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; CI-NEXT: v_cndmask_b32_e32 v1, 2.0, v2, vcc
+; CI-NEXT: s_setpc_b64 s[30:31]
;
; VI-SAFE-LABEL: select_fneg_posk_src_fmad_v2f16:
; VI-SAFE: ; %bb.0:
@@ -4793,27 +4877,6 @@ define <2 x half> @select_fneg_posk_src_fmad_v2f16(<2 x i32> %c, <2 x half> %x,
; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; CI-NSZ-LABEL: select_fneg_posk_src_fmad_v2f16:
-; CI-NSZ: ; %bb.0:
-; CI-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CI-NSZ-NEXT: v_cvt_f16_f32_e32 v2, v2
-; CI-NSZ-NEXT: v_cvt_f16_f32_e32 v3, v3
-; CI-NSZ-NEXT: v_cvt_f16_f32_e32 v4, v4
-; CI-NSZ-NEXT: v_cvt_f16_f32_e32 v5, v5
-; CI-NSZ-NEXT: v_cvt_f32_f16_e32 v2, v2
-; CI-NSZ-NEXT: v_cvt_f32_f16_e32 v3, v3
-; CI-NSZ-NEXT: v_cvt_f32_f16_e32 v4, v4
-; CI-NSZ-NEXT: v_cvt_f32_f16_e32 v5, v5
-; CI-NSZ-NEXT: v_mul_f32_e32 v2, -4.0, v2
-; CI-NSZ-NEXT: v_mul_f32_e32 v3, -4.0, v3
-; CI-NSZ-NEXT: v_sub_f32_e32 v2, v2, v4
-; CI-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; CI-NSZ-NEXT: v_sub_f32_e32 v3, v3, v5
-; CI-NSZ-NEXT: v_cndmask_b32_e32 v0, 2.0, v2, vcc
-; CI-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; CI-NSZ-NEXT: v_cndmask_b32_e32 v1, 2.0, v3, vcc
-; CI-NSZ-NEXT: s_setpc_b64 s[30:31]
-;
; VI-NSZ-LABEL: select_fneg_posk_src_fmad_v2f16:
; VI-NSZ: ; %bb.0:
; VI-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4873,6 +4936,112 @@ define <2 x half> @select_fneg_posk_src_fmad_v2f16(<2 x i32> %c, <2 x half> %x,
ret <2 x half> %select
}
+define <2 x half> @select_fneg_posk_src_fmad_v2f16_nsz(<2 x i32> %c, <2 x half> %x, <2 x half> %z) {
+; CI-LABEL: select_fneg_posk_src_fmad_v2f16_nsz:
+; CI: ; %bb.0:
+; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
+; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
+; CI-NEXT: v_cvt_f16_f32_e32 v4, v4
+; CI-NEXT: v_cvt_f16_f32_e32 v5, v5
+; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; CI-NEXT: v_cvt_f32_f16_e32 v3, v3
+; CI-NEXT: v_cvt_f32_f16_e32 v4, v4
+; CI-NEXT: v_cvt_f32_f16_e32 v5, v5
+; CI-NEXT: v_mul_f32_e32 v2, -4.0, v2
+; CI-NEXT: v_mul_f32_e32 v3, -4.0, v3
+; CI-NEXT: v_sub_f32_e32 v2, v2, v4
+; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; CI-NEXT: v_sub_f32_e32 v3, v3, v5
+; CI-NEXT: v_cndmask_b32_e32 v0, 2.0, v2, vcc
+; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; CI-NEXT: v_cndmask_b32_e32 v1, 2.0, v3, vcc
+; CI-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-LABEL: select_fneg_posk_src_fmad_v2f16_nsz:
+; VI: ; %bb.0:
+; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; VI-NEXT: v_lshrrev_b32_e32 v1, 16, v3
+; VI-NEXT: v_lshrrev_b32_e32 v4, 16, v2
+; VI-NEXT: v_fma_f16 v1, v4, -4.0, -v1
+; VI-NEXT: v_fma_f16 v2, v2, -4.0, -v3
+; VI-NEXT: v_mov_b32_e32 v3, 0x4000
+; VI-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
+; VI-NEXT: v_cndmask_b32_e64 v0, v3, v2, s[4:5]
+; VI-NEXT: v_cndmask_b32_sdwa v1, v3, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: select_fneg_posk_src_fmad_v2f16_nsz:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; GFX9-NEXT: v_pk_fma_f16 v1, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1]
+; GFX9-NEXT: v_mov_b32_e32 v2, 0x4000
+; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
+; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: s_mov_b32 s4, 0x5040100
+; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-TRUE16-LABEL: select_fneg_posk_src_fmad_v2f16_nsz:
+; GFX11-SAFE-TRUE16: ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-TRUE16-NEXT: v_pk_fma_f16 v0, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1]
+; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
+; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: select_fneg_posk_src_fmad_v2f16_nsz:
+; GFX11-SAFE-FAKE16: ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT: v_pk_fma_f16 v2, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1]
+; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
+; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-TRUE16-LABEL: select_fneg_posk_src_fmad_v2f16_nsz:
+; GFX11-NSZ-TRUE16: ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-TRUE16-NEXT: v_pk_fma_f16 v0, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1]
+; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NSZ-TRUE16-NEXT: ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/160975
More information about the llvm-commits
mailing list