[llvm] b0a755b - [TargetLowering] Remove NoSignedZerosFPMath uses (#160975)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Sep 28 23:34:00 PDT 2025
Author: paperchalice
Date: 2025-09-29T14:33:56+08:00
New Revision: b0a755b2bfac0a82383dcc31eca82d9e132f1afc
URL: https://github.com/llvm/llvm-project/commit/b0a755b2bfac0a82383dcc31eca82d9e132f1afc
DIFF: https://github.com/llvm/llvm-project/commit/b0a755b2bfac0a82383dcc31eca82d9e132f1afc.diff
LOG: [TargetLowering] Remove NoSignedZerosFPMath uses (#160975)
Remove NoSignedZerosFPMath in TargetLowering part, users should always
use instruction level fast math flags.
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.v2f16.ll
llvm/test/CodeGen/AMDGPU/v_mac.ll
llvm/test/CodeGen/AMDGPU/v_mac_f16.ll
llvm/test/CodeGen/PowerPC/scalar_cmp.ll
llvm/test/CodeGen/X86/negative-sin.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index dba5a8c0a7315..cc503d324e74b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -7492,7 +7492,6 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
// Pre-increment recursion depth for use in recursive calls.
++Depth;
const SDNodeFlags Flags = Op->getFlags();
- const TargetOptions &Options = DAG.getTarget().Options;
EVT VT = Op.getValueType();
unsigned Opcode = Op.getOpcode();
@@ -7572,7 +7571,7 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
return DAG.getBuildVector(VT, DL, Ops);
}
case ISD::FADD: {
- if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
+ if (!Flags.hasNoSignedZeros())
break;
// After operation legalization, it might not be legal to create new FSUBs.
@@ -7617,7 +7616,7 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
}
case ISD::FSUB: {
// We can't turn -(A-B) into B-A when we honor signed zeros.
- if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
+ if (!Flags.hasNoSignedZeros())
break;
SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
@@ -7678,7 +7677,7 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
}
case ISD::FMA:
case ISD::FMAD: {
- if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
+ if (!Flags.hasNoSignedZeros())
break;
SDValue X = Op.getOperand(0), Y = Op.getOperand(1), Z = Op.getOperand(2);
@@ -8797,7 +8796,6 @@ SDValue TargetLowering::expandFMINIMUMNUM_FMAXIMUMNUM(SDNode *Node,
EVT VT = Node->getValueType(0);
EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
bool IsMax = Opc == ISD::FMAXIMUMNUM;
- const TargetOptions &Options = DAG.getTarget().Options;
SDNodeFlags Flags = Node->getFlags();
unsigned NewOp =
@@ -8858,8 +8856,8 @@ SDValue TargetLowering::expandFMINIMUMNUM_FMAXIMUMNUM(SDNode *Node,
// TODO: We need quiet sNaN if strictfp.
// Fixup signed zero behavior.
- if (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros() ||
- DAG.isKnownNeverZeroFloat(LHS) || DAG.isKnownNeverZeroFloat(RHS)) {
+ if (Flags.hasNoSignedZeros() || DAG.isKnownNeverZeroFloat(LHS) ||
+ DAG.isKnownNeverZeroFloat(RHS)) {
return MinMax;
}
SDValue TestZero =
diff --git a/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.v2f16.ll b/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.v2f16.ll
index 92d3277d5d3e3..bb22144b815a1 100644
--- a/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.v2f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.v2f16.ll
@@ -4148,28 +4148,28 @@ define <2 x half> @mul_select_negk_negfabs_v2f16(<2 x i32> %c, <2 x half> %x, <2
; --------------------------------------------------------------------------------
define <2 x half> @select_fneg_posk_src_add_v2f16(<2 x i32> %c, <2 x half> %x, <2 x half> %y) {
-; CI-SAFE-LABEL: select_fneg_posk_src_add_v2f16:
-; CI-SAFE: ; %bb.0:
-; CI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v3, v3
-; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v2, v2
-; CI-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v3, v3
-; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v2, v2
-; CI-SAFE-NEXT: v_add_f32_e32 v3, 4.0, v3
-; CI-SAFE-NEXT: v_add_f32_e32 v2, 4.0, v2
-; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v3, v3
-; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v2, v2
-; CI-SAFE-NEXT: v_lshlrev_b32_e32 v3, 16, v3
-; CI-SAFE-NEXT: v_or_b32_e32 v2, v2, v3
-; CI-SAFE-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
-; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v3, v2
-; CI-SAFE-NEXT: v_lshrrev_b32_e32 v2, 16, v2
-; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v2, v2
-; CI-SAFE-NEXT: v_cndmask_b32_e32 v0, 2.0, v3, vcc
-; CI-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; CI-SAFE-NEXT: v_cndmask_b32_e32 v1, 2.0, v2, vcc
-; CI-SAFE-NEXT: s_setpc_b64 s[30:31]
+; CI-LABEL: select_fneg_posk_src_add_v2f16:
+; CI: ; %bb.0:
+; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
+; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
+; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; CI-NEXT: v_cvt_f32_f16_e32 v3, v3
+; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; CI-NEXT: v_add_f32_e32 v3, 4.0, v3
+; CI-NEXT: v_add_f32_e32 v2, 4.0, v2
+; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
+; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
+; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3
+; CI-NEXT: v_or_b32_e32 v2, v2, v3
+; CI-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
+; CI-NEXT: v_cvt_f32_f16_e32 v3, v2
+; CI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
+; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; CI-NEXT: v_cndmask_b32_e32 v0, 2.0, v3, vcc
+; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; CI-NEXT: v_cndmask_b32_e32 v1, 2.0, v2, vcc
+; CI-NEXT: s_setpc_b64 s[30:31]
;
; VI-SAFE-LABEL: select_fneg_posk_src_add_v2f16:
; VI-SAFE: ; %bb.0:
@@ -4229,21 +4229,6 @@ define <2 x half> @select_fneg_posk_src_add_v2f16(<2 x i32> %c, <2 x half> %x, <
; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; CI-NSZ-LABEL: select_fneg_posk_src_add_v2f16:
-; CI-NSZ: ; %bb.0:
-; CI-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CI-NSZ-NEXT: v_cvt_f16_f32_e32 v2, v2
-; CI-NSZ-NEXT: v_cvt_f16_f32_e32 v3, v3
-; CI-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; CI-NSZ-NEXT: v_cvt_f32_f16_e32 v2, v2
-; CI-NSZ-NEXT: v_cvt_f32_f16_e32 v3, v3
-; CI-NSZ-NEXT: v_sub_f32_e32 v2, -4.0, v2
-; CI-NSZ-NEXT: v_sub_f32_e32 v3, -4.0, v3
-; CI-NSZ-NEXT: v_cndmask_b32_e32 v0, 2.0, v2, vcc
-; CI-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; CI-NSZ-NEXT: v_cndmask_b32_e32 v1, 2.0, v3, vcc
-; CI-NSZ-NEXT: s_setpc_b64 s[30:31]
-;
; VI-NSZ-LABEL: select_fneg_posk_src_add_v2f16:
; VI-NSZ: ; %bb.0:
; VI-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4302,6 +4287,105 @@ define <2 x half> @select_fneg_posk_src_add_v2f16(<2 x i32> %c, <2 x half> %x, <
ret <2 x half> %select
}
+define <2 x half> @select_fneg_posk_src_add_v2f16_nsz(<2 x i32> %c, <2 x half> %x, <2 x half> %y) {
+; CI-LABEL: select_fneg_posk_src_add_v2f16_nsz:
+; CI: ; %bb.0:
+; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
+; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
+; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; CI-NEXT: v_cvt_f32_f16_e32 v3, v3
+; CI-NEXT: v_sub_f32_e32 v2, -4.0, v2
+; CI-NEXT: v_sub_f32_e32 v3, -4.0, v3
+; CI-NEXT: v_cndmask_b32_e32 v0, 2.0, v2, vcc
+; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; CI-NEXT: v_cndmask_b32_e32 v1, 2.0, v3, vcc
+; CI-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-LABEL: select_fneg_posk_src_add_v2f16_nsz:
+; VI: ; %bb.0:
+; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; VI-NEXT: v_mov_b32_e32 v1, 0xc400
+; VI-NEXT: v_sub_f16_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; VI-NEXT: v_sub_f16_e32 v2, -4.0, v2
+; VI-NEXT: v_mov_b32_e32 v3, 0x4000
+; VI-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
+; VI-NEXT: v_cndmask_b32_e64 v0, v3, v2, s[4:5]
+; VI-NEXT: v_cndmask_b32_sdwa v1, v3, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: select_fneg_posk_src_add_v2f16_nsz:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; GFX9-NEXT: v_pk_add_f16 v1, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
+; GFX9-NEXT: v_mov_b32_e32 v2, 0x4000
+; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
+; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: s_mov_b32 s4, 0x5040100
+; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-TRUE16-LABEL: select_fneg_posk_src_add_v2f16_nsz:
+; GFX11-SAFE-TRUE16: ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
+; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
+; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: select_fneg_posk_src_add_v2f16_nsz:
+; GFX11-SAFE-FAKE16: ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v2, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
+; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
+; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-TRUE16-LABEL: select_fneg_posk_src_add_v2f16_nsz:
+; GFX11-NSZ-TRUE16: ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
+; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
+; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: select_fneg_posk_src_add_v2f16_nsz:
+; GFX11-NSZ-FAKE16: ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v2, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
+; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
+; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+ %cmp = icmp eq <2 x i32> %c, zeroinitializer
+ %add = fadd nsz <2 x half> %x, <half 4.0, half 4.0>
+ %fneg = fneg <2 x half> %add
+ %select = select <2 x i1> %cmp, <2 x half> %fneg, <2 x half> <half 2.0, half 2.0>
+ ret <2 x half> %select
+}
+
define <2 x half> @select_fneg_posk_src_sub_v2f16(<2 x i32> %c, <2 x half> %x) {
; CI-SAFE-LABEL: select_fneg_posk_src_sub_v2f16:
; CI-SAFE: ; %bb.0:
@@ -4704,34 +4788,34 @@ define <2 x half> @select_fneg_posk_src_fma_v2f16(<2 x i32> %c, <2 x half> %x, <
}
define <2 x half> @select_fneg_posk_src_fmad_v2f16(<2 x i32> %c, <2 x half> %x, <2 x half> %z) {
-; CI-SAFE-LABEL: select_fneg_posk_src_fmad_v2f16:
-; CI-SAFE: ; %bb.0:
-; CI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v3, v3
-; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v5, v5
-; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v2, v2
-; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v4, v4
-; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v3, v3
-; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v5, v5
-; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v2, v2
-; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v4, v4
-; CI-SAFE-NEXT: v_mul_f32_e32 v3, 4.0, v3
-; CI-SAFE-NEXT: v_add_f32_e32 v3, v3, v5
-; CI-SAFE-NEXT: v_mul_f32_e32 v2, 4.0, v2
-; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v3, v3
-; CI-SAFE-NEXT: v_add_f32_e32 v2, v2, v4
-; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v2, v2
-; CI-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; CI-SAFE-NEXT: v_lshlrev_b32_e32 v3, 16, v3
-; CI-SAFE-NEXT: v_or_b32_e32 v2, v2, v3
-; CI-SAFE-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
-; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v3, v2
-; CI-SAFE-NEXT: v_lshrrev_b32_e32 v2, 16, v2
-; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v2, v2
-; CI-SAFE-NEXT: v_cndmask_b32_e32 v0, 2.0, v3, vcc
-; CI-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; CI-SAFE-NEXT: v_cndmask_b32_e32 v1, 2.0, v2, vcc
-; CI-SAFE-NEXT: s_setpc_b64 s[30:31]
+; CI-LABEL: select_fneg_posk_src_fmad_v2f16:
+; CI: ; %bb.0:
+; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
+; CI-NEXT: v_cvt_f16_f32_e32 v5, v5
+; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
+; CI-NEXT: v_cvt_f16_f32_e32 v4, v4
+; CI-NEXT: v_cvt_f32_f16_e32 v3, v3
+; CI-NEXT: v_cvt_f32_f16_e32 v5, v5
+; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; CI-NEXT: v_cvt_f32_f16_e32 v4, v4
+; CI-NEXT: v_mul_f32_e32 v3, 4.0, v3
+; CI-NEXT: v_add_f32_e32 v3, v3, v5
+; CI-NEXT: v_mul_f32_e32 v2, 4.0, v2
+; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
+; CI-NEXT: v_add_f32_e32 v2, v2, v4
+; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
+; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3
+; CI-NEXT: v_or_b32_e32 v2, v2, v3
+; CI-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
+; CI-NEXT: v_cvt_f32_f16_e32 v3, v2
+; CI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
+; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; CI-NEXT: v_cndmask_b32_e32 v0, 2.0, v3, vcc
+; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; CI-NEXT: v_cndmask_b32_e32 v1, 2.0, v2, vcc
+; CI-NEXT: s_setpc_b64 s[30:31]
;
; VI-SAFE-LABEL: select_fneg_posk_src_fmad_v2f16:
; VI-SAFE: ; %bb.0:
@@ -4793,27 +4877,6 @@ define <2 x half> @select_fneg_posk_src_fmad_v2f16(<2 x i32> %c, <2 x half> %x,
; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; CI-NSZ-LABEL: select_fneg_posk_src_fmad_v2f16:
-; CI-NSZ: ; %bb.0:
-; CI-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CI-NSZ-NEXT: v_cvt_f16_f32_e32 v2, v2
-; CI-NSZ-NEXT: v_cvt_f16_f32_e32 v3, v3
-; CI-NSZ-NEXT: v_cvt_f16_f32_e32 v4, v4
-; CI-NSZ-NEXT: v_cvt_f16_f32_e32 v5, v5
-; CI-NSZ-NEXT: v_cvt_f32_f16_e32 v2, v2
-; CI-NSZ-NEXT: v_cvt_f32_f16_e32 v3, v3
-; CI-NSZ-NEXT: v_cvt_f32_f16_e32 v4, v4
-; CI-NSZ-NEXT: v_cvt_f32_f16_e32 v5, v5
-; CI-NSZ-NEXT: v_mul_f32_e32 v2, -4.0, v2
-; CI-NSZ-NEXT: v_mul_f32_e32 v3, -4.0, v3
-; CI-NSZ-NEXT: v_sub_f32_e32 v2, v2, v4
-; CI-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; CI-NSZ-NEXT: v_sub_f32_e32 v3, v3, v5
-; CI-NSZ-NEXT: v_cndmask_b32_e32 v0, 2.0, v2, vcc
-; CI-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; CI-NSZ-NEXT: v_cndmask_b32_e32 v1, 2.0, v3, vcc
-; CI-NSZ-NEXT: s_setpc_b64 s[30:31]
-;
; VI-NSZ-LABEL: select_fneg_posk_src_fmad_v2f16:
; VI-NSZ: ; %bb.0:
; VI-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4873,6 +4936,112 @@ define <2 x half> @select_fneg_posk_src_fmad_v2f16(<2 x i32> %c, <2 x half> %x,
ret <2 x half> %select
}
+define <2 x half> @select_fneg_posk_src_fmad_v2f16_nsz(<2 x i32> %c, <2 x half> %x, <2 x half> %z) {
+; CI-LABEL: select_fneg_posk_src_fmad_v2f16_nsz:
+; CI: ; %bb.0:
+; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
+; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
+; CI-NEXT: v_cvt_f16_f32_e32 v4, v4
+; CI-NEXT: v_cvt_f16_f32_e32 v5, v5
+; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; CI-NEXT: v_cvt_f32_f16_e32 v3, v3
+; CI-NEXT: v_cvt_f32_f16_e32 v4, v4
+; CI-NEXT: v_cvt_f32_f16_e32 v5, v5
+; CI-NEXT: v_mul_f32_e32 v2, -4.0, v2
+; CI-NEXT: v_mul_f32_e32 v3, -4.0, v3
+; CI-NEXT: v_sub_f32_e32 v2, v2, v4
+; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; CI-NEXT: v_sub_f32_e32 v3, v3, v5
+; CI-NEXT: v_cndmask_b32_e32 v0, 2.0, v2, vcc
+; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; CI-NEXT: v_cndmask_b32_e32 v1, 2.0, v3, vcc
+; CI-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-LABEL: select_fneg_posk_src_fmad_v2f16_nsz:
+; VI: ; %bb.0:
+; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; VI-NEXT: v_lshrrev_b32_e32 v1, 16, v3
+; VI-NEXT: v_lshrrev_b32_e32 v4, 16, v2
+; VI-NEXT: v_fma_f16 v1, v4, -4.0, -v1
+; VI-NEXT: v_fma_f16 v2, v2, -4.0, -v3
+; VI-NEXT: v_mov_b32_e32 v3, 0x4000
+; VI-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
+; VI-NEXT: v_cndmask_b32_e64 v0, v3, v2, s[4:5]
+; VI-NEXT: v_cndmask_b32_sdwa v1, v3, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: select_fneg_posk_src_fmad_v2f16_nsz:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; GFX9-NEXT: v_pk_fma_f16 v1, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1]
+; GFX9-NEXT: v_mov_b32_e32 v2, 0x4000
+; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
+; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: s_mov_b32 s4, 0x5040100
+; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-TRUE16-LABEL: select_fneg_posk_src_fmad_v2f16_nsz:
+; GFX11-SAFE-TRUE16: ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-TRUE16-NEXT: v_pk_fma_f16 v0, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1]
+; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
+; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: select_fneg_posk_src_fmad_v2f16_nsz:
+; GFX11-SAFE-FAKE16: ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT: v_pk_fma_f16 v2, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1]
+; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
+; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-TRUE16-LABEL: select_fneg_posk_src_fmad_v2f16_nsz:
+; GFX11-NSZ-TRUE16: ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-TRUE16-NEXT: v_pk_fma_f16 v0, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1]
+; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
+; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: select_fneg_posk_src_fmad_v2f16_nsz:
+; GFX11-NSZ-FAKE16: ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT: v_pk_fma_f16 v2, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1]
+; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
+; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+ %cmp = icmp eq <2 x i32> %c, zeroinitializer
+ %fmad = call nsz <2 x half> @llvm.fmuladd.v2f16(<2 x half> %x, <2 x half> <half 4.0, half 4.0>, <2 x half> %z)
+ %fneg = fneg <2 x half> %fmad
+ %select = select <2 x i1> %cmp, <2 x half> %fneg, <2 x half> <half 2.0, half 2.0>
+ ret <2 x half> %select
+}
+
declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #0
declare <2 x half> @llvm.fma.v2f16(<2 x half>, <2 x half>, <2 x half>) #0
declare <2 x half> @llvm.fmuladd.v2f16(<2 x half>, <2 x half>, <2 x half>) #0
diff --git a/llvm/test/CodeGen/AMDGPU/v_mac.ll b/llvm/test/CodeGen/AMDGPU/v_mac.ll
index c12871536bafa..f5dc824aae35f 100644
--- a/llvm/test/CodeGen/AMDGPU/v_mac.ll
+++ b/llvm/test/CodeGen/AMDGPU/v_mac.ll
@@ -116,7 +116,7 @@ entry:
; GCN-LABEL: {{^}}nsz_mad_sub0_src0:
; GCN-NOT: v_mac_f32
; GCN: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}}
-define amdgpu_kernel void @nsz_mad_sub0_src0(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
+define amdgpu_kernel void @nsz_mad_sub0_src0(ptr addrspace(1) %out, ptr addrspace(1) %in) {
entry:
%b_ptr = getelementptr float, ptr addrspace(1) %in, i32 1
%c_ptr = getelementptr float, ptr addrspace(1) %in, i32 2
@@ -125,7 +125,7 @@ entry:
%b = load float, ptr addrspace(1) %b_ptr
%c = load float, ptr addrspace(1) %c_ptr
- %neg_a = fsub float 0.0, %a
+ %neg_a = fsub nsz float 0.0, %a
%tmp0 = fmul float %neg_a, %b
%tmp1 = fadd float %tmp0, %c
@@ -176,7 +176,7 @@ entry:
; GCN-LABEL: {{^}}nsz_mad_sub0_src1:
; GCN-NOT: v_mac_f32
; GCN: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}}
-define amdgpu_kernel void @nsz_mad_sub0_src1(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
+define amdgpu_kernel void @nsz_mad_sub0_src1(ptr addrspace(1) %out, ptr addrspace(1) %in) {
entry:
%b_ptr = getelementptr float, ptr addrspace(1) %in, i32 1
%c_ptr = getelementptr float, ptr addrspace(1) %in, i32 2
@@ -185,7 +185,7 @@ entry:
%b = load float, ptr addrspace(1) %b_ptr
%c = load float, ptr addrspace(1) %c_ptr
- %neg_b = fsub float 0.0, %b
+ %neg_b = fsub nsz float 0.0, %b
%tmp0 = fmul float %a, %neg_b
%tmp1 = fadd float %tmp0, %c
@@ -310,6 +310,5 @@ define float @v_mac_f32_dynamic_ftz(float %a, float %b, float %c) "denormal-fp-m
declare i32 @llvm.amdgcn.workitem.id.x() #2
attributes #0 = { nounwind "no-signed-zeros-fp-math"="false" }
-attributes #1 = { nounwind "no-signed-zeros-fp-math"="true" }
attributes #2 = { nounwind readnone }
attributes #3 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/v_mac_f16.ll b/llvm/test/CodeGen/AMDGPU/v_mac_f16.ll
index bcc60b06db291..8da6f2348690a 100644
--- a/llvm/test/CodeGen/AMDGPU/v_mac_f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/v_mac_f16.ll
@@ -236,7 +236,7 @@ entry:
%b.val = load half, ptr addrspace(1) %b
%c.val = load half, ptr addrspace(1) %c
- %a.neg = fsub half 0.0, %a.val
+ %a.neg = fsub nsz half 0.0, %a.val
%t.val = fmul half %a.neg, %b.val
%r.val = fadd half %t.val, %c.val
@@ -263,7 +263,7 @@ entry:
%b.val = load half, ptr addrspace(1) %b
%c.val = load half, ptr addrspace(1) %c
- %b.neg = fsub half 0.0, %b.val
+ %b.neg = fsub nsz half 0.0, %b.val
%t.val = fmul half %a.val, %b.neg
%r.val = fadd half %t.val, %c.val
@@ -290,7 +290,7 @@ entry:
%b.val = load half, ptr addrspace(1) %b
%c.val = load half, ptr addrspace(1) %c
- %c.neg = fsub half 0.0, %c.val
+ %c.neg = fsub nsz half 0.0, %c.val
%t.val = fmul half %a.val, %b.val
%r.val = fadd half %t.val, %c.neg
@@ -601,7 +601,7 @@ entry:
%b.val = load <2 x half>, ptr addrspace(1) %b
%c.val = load <2 x half>, ptr addrspace(1) %c
- %a.neg = fsub <2 x half> <half 0.0, half 0.0>, %a.val
+ %a.neg = fsub nsz <2 x half> <half 0.0, half 0.0>, %a.val
%t.val = fmul <2 x half> %a.neg, %b.val
%r.val = fadd <2 x half> %t.val, %c.val
@@ -634,7 +634,7 @@ entry:
%b.val = load <2 x half>, ptr addrspace(1) %b
%c.val = load <2 x half>, ptr addrspace(1) %c
- %b.neg = fsub <2 x half> <half 0.0, half 0.0>, %b.val
+ %b.neg = fsub nsz <2 x half> <half 0.0, half 0.0>, %b.val
%t.val = fmul <2 x half> %a.val, %b.neg
%r.val = fadd <2 x half> %t.val, %c.val
@@ -667,7 +667,7 @@ entry:
%b.val = load <2 x half>, ptr addrspace(1) %b
%c.val = load <2 x half>, ptr addrspace(1) %c
- %c.neg = fsub <2 x half> <half 0.0, half 0.0>, %c.val
+ %c.neg = fsub nsz <2 x half> <half 0.0, half 0.0>, %c.val
%t.val = fmul <2 x half> %a.val, %b.val
%r.val = fadd <2 x half> %t.val, %c.neg
@@ -678,5 +678,5 @@ entry:
declare void @llvm.amdgcn.s.barrier() #2
attributes #0 = { nounwind "no-signed-zeros-fp-math"="false" "denormal-fp-math"="preserve-sign,preserve-sign" }
-attributes #1 = { nounwind "no-signed-zeros-fp-math"="true" "denormal-fp-math"="preserve-sign,preserve-sign" }
+attributes #1 = { nounwind "denormal-fp-math"="preserve-sign,preserve-sign" }
attributes #2 = { nounwind convergent }
diff --git a/llvm/test/CodeGen/PowerPC/scalar_cmp.ll b/llvm/test/CodeGen/PowerPC/scalar_cmp.ll
index aaabd76e163bb..fd0b494d57677 100644
--- a/llvm/test/CodeGen/PowerPC/scalar_cmp.ll
+++ b/llvm/test/CodeGen/PowerPC/scalar_cmp.ll
@@ -20,18 +20,18 @@
define float @select_oeq_float(float %a, float %b, float %c, float %d) {
; FAST-P8-LABEL: select_oeq_float:
; FAST-P8: # %bb.0: # %entry
-; FAST-P8-NEXT: xssubsp f0, f2, f1
-; FAST-P8-NEXT: xssubsp f1, f1, f2
-; FAST-P8-NEXT: fsel f1, f1, f3, f4
-; FAST-P8-NEXT: fsel f1, f0, f1, f4
+; FAST-P8-NEXT: xssubsp f0, f1, f2
+; FAST-P8-NEXT: xsnegdp f1, f0
+; FAST-P8-NEXT: fsel f0, f0, f3, f4
+; FAST-P8-NEXT: fsel f1, f1, f0, f4
; FAST-P8-NEXT: blr
;
; FAST-P9-LABEL: select_oeq_float:
; FAST-P9: # %bb.0: # %entry
-; FAST-P9-NEXT: xssubsp f0, f2, f1
-; FAST-P9-NEXT: xssubsp f1, f1, f2
-; FAST-P9-NEXT: fsel f1, f1, f3, f4
-; FAST-P9-NEXT: fsel f1, f0, f1, f4
+; FAST-P9-NEXT: xssubsp f0, f1, f2
+; FAST-P9-NEXT: xsnegdp f1, f0
+; FAST-P9-NEXT: fsel f0, f0, f3, f4
+; FAST-P9-NEXT: fsel f1, f1, f0, f4
; FAST-P9-NEXT: blr
;
; NO-FAST-P8-LABEL: select_oeq_float:
@@ -59,6 +59,48 @@ entry:
ret float %cond
}
+define float @select_oeq_float_nsz(float %a, float %b, float %c, float %d) {
+; FAST-P8-LABEL: select_oeq_float_nsz:
+; FAST-P8: # %bb.0: # %entry
+; FAST-P8-NEXT: xssubsp f0, f2, f1
+; FAST-P8-NEXT: xssubsp f1, f1, f2
+; FAST-P8-NEXT: fsel f1, f1, f3, f4
+; FAST-P8-NEXT: fsel f1, f0, f1, f4
+; FAST-P8-NEXT: blr
+;
+; FAST-P9-LABEL: select_oeq_float_nsz:
+; FAST-P9: # %bb.0: # %entry
+; FAST-P9-NEXT: xssubsp f0, f2, f1
+; FAST-P9-NEXT: xssubsp f1, f1, f2
+; FAST-P9-NEXT: fsel f1, f1, f3, f4
+; FAST-P9-NEXT: fsel f1, f0, f1, f4
+; FAST-P9-NEXT: blr
+;
+; NO-FAST-P8-LABEL: select_oeq_float_nsz:
+; NO-FAST-P8: # %bb.0: # %entry
+; NO-FAST-P8-NEXT: fcmpu cr0, f1, f2
+; NO-FAST-P8-NEXT: beq cr0, .LBB1_2
+; NO-FAST-P8-NEXT: # %bb.1: # %entry
+; NO-FAST-P8-NEXT: fmr f3, f4
+; NO-FAST-P8-NEXT: .LBB1_2: # %entry
+; NO-FAST-P8-NEXT: fmr f1, f3
+; NO-FAST-P8-NEXT: blr
+;
+; NO-FAST-P9-LABEL: select_oeq_float_nsz:
+; NO-FAST-P9: # %bb.0: # %entry
+; NO-FAST-P9-NEXT: fcmpu cr0, f1, f2
+; NO-FAST-P9-NEXT: beq cr0, .LBB1_2
+; NO-FAST-P9-NEXT: # %bb.1: # %entry
+; NO-FAST-P9-NEXT: fmr f3, f4
+; NO-FAST-P9-NEXT: .LBB1_2: # %entry
+; NO-FAST-P9-NEXT: fmr f1, f3
+; NO-FAST-P9-NEXT: blr
+entry:
+ %cmp = fcmp nsz oeq float %a, %b
+ %cond = select i1 %cmp, float %c, float %d
+ ret float %cond
+}
+
define double @select_oeq_double(double %a, double %b, double %c, double %d) {
; FAST-P8-LABEL: select_oeq_double:
; FAST-P8: # %bb.0: # %entry
@@ -79,20 +121,20 @@ define double @select_oeq_double(double %a, double %b, double %c, double %d) {
; NO-FAST-P8-LABEL: select_oeq_double:
; NO-FAST-P8: # %bb.0: # %entry
; NO-FAST-P8-NEXT: xscmpudp cr0, f1, f2
-; NO-FAST-P8-NEXT: beq cr0, .LBB1_2
+; NO-FAST-P8-NEXT: beq cr0, .LBB2_2
; NO-FAST-P8-NEXT: # %bb.1: # %entry
; NO-FAST-P8-NEXT: fmr f3, f4
-; NO-FAST-P8-NEXT: .LBB1_2: # %entry
+; NO-FAST-P8-NEXT: .LBB2_2: # %entry
; NO-FAST-P8-NEXT: fmr f1, f3
; NO-FAST-P8-NEXT: blr
;
; NO-FAST-P9-LABEL: select_oeq_double:
; NO-FAST-P9: # %bb.0: # %entry
; NO-FAST-P9-NEXT: xscmpudp cr0, f1, f2
-; NO-FAST-P9-NEXT: beq cr0, .LBB1_2
+; NO-FAST-P9-NEXT: beq cr0, .LBB2_2
; NO-FAST-P9-NEXT: # %bb.1: # %entry
; NO-FAST-P9-NEXT: fmr f3, f4
-; NO-FAST-P9-NEXT: .LBB1_2: # %entry
+; NO-FAST-P9-NEXT: .LBB2_2: # %entry
; NO-FAST-P9-NEXT: fmr f1, f3
; NO-FAST-P9-NEXT: blr
entry:
@@ -182,13 +224,57 @@ entry:
define float @select_one_float(float %a, float %b, float %c, float %d) {
; FAST-P8-LABEL: select_one_float:
; FAST-P8: # %bb.0: # %entry
+; FAST-P8-NEXT: xssubsp f0, f1, f2
+; FAST-P8-NEXT: xsnegdp f1, f0
+; FAST-P8-NEXT: fsel f0, f0, f4, f3
+; FAST-P8-NEXT: fsel f1, f1, f0, f3
+; FAST-P8-NEXT: blr
+;
+; FAST-P9-LABEL: select_one_float:
+; FAST-P9: # %bb.0: # %entry
+; FAST-P9-NEXT: xssubsp f0, f1, f2
+; FAST-P9-NEXT: xsnegdp f1, f0
+; FAST-P9-NEXT: fsel f0, f0, f4, f3
+; FAST-P9-NEXT: fsel f1, f1, f0, f3
+; FAST-P9-NEXT: blr
+;
+; NO-FAST-P8-LABEL: select_one_float:
+; NO-FAST-P8: # %bb.0: # %entry
+; NO-FAST-P8-NEXT: fcmpu cr0, f1, f2
+; NO-FAST-P8-NEXT: crnor 4*cr5+lt, un, eq
+; NO-FAST-P8-NEXT: bc 12, 4*cr5+lt, .LBB5_2
+; NO-FAST-P8-NEXT: # %bb.1: # %entry
+; NO-FAST-P8-NEXT: fmr f3, f4
+; NO-FAST-P8-NEXT: .LBB5_2: # %entry
+; NO-FAST-P8-NEXT: fmr f1, f3
+; NO-FAST-P8-NEXT: blr
+;
+; NO-FAST-P9-LABEL: select_one_float:
+; NO-FAST-P9: # %bb.0: # %entry
+; NO-FAST-P9-NEXT: fcmpu cr0, f1, f2
+; NO-FAST-P9-NEXT: crnor 4*cr5+lt, un, eq
+; NO-FAST-P9-NEXT: bc 12, 4*cr5+lt, .LBB5_2
+; NO-FAST-P9-NEXT: # %bb.1: # %entry
+; NO-FAST-P9-NEXT: fmr f3, f4
+; NO-FAST-P9-NEXT: .LBB5_2: # %entry
+; NO-FAST-P9-NEXT: fmr f1, f3
+; NO-FAST-P9-NEXT: blr
+entry:
+ %cmp = fcmp one float %a, %b
+ %cond = select i1 %cmp, float %c, float %d
+ ret float %cond
+}
+
+define float @select_one_float_nsz(float %a, float %b, float %c, float %d) {
+; FAST-P8-LABEL: select_one_float_nsz:
+; FAST-P8: # %bb.0: # %entry
; FAST-P8-NEXT: xssubsp f0, f2, f1
; FAST-P8-NEXT: xssubsp f1, f1, f2
; FAST-P8-NEXT: fsel f1, f1, f4, f3
; FAST-P8-NEXT: fsel f1, f0, f1, f3
; FAST-P8-NEXT: blr
;
-; FAST-P9-LABEL: select_one_float:
+; FAST-P9-LABEL: select_one_float_nsz:
; FAST-P9: # %bb.0: # %entry
; FAST-P9-NEXT: xssubsp f0, f2, f1
; FAST-P9-NEXT: xssubsp f1, f1, f2
@@ -196,29 +282,29 @@ define float @select_one_float(float %a, float %b, float %c, float %d) {
; FAST-P9-NEXT: fsel f1, f0, f1, f3
; FAST-P9-NEXT: blr
;
-; NO-FAST-P8-LABEL: select_one_float:
+; NO-FAST-P8-LABEL: select_one_float_nsz:
; NO-FAST-P8: # %bb.0: # %entry
; NO-FAST-P8-NEXT: fcmpu cr0, f1, f2
; NO-FAST-P8-NEXT: crnor 4*cr5+lt, un, eq
-; NO-FAST-P8-NEXT: bc 12, 4*cr5+lt, .LBB4_2
+; NO-FAST-P8-NEXT: bc 12, 4*cr5+lt, .LBB6_2
; NO-FAST-P8-NEXT: # %bb.1: # %entry
; NO-FAST-P8-NEXT: fmr f3, f4
-; NO-FAST-P8-NEXT: .LBB4_2: # %entry
+; NO-FAST-P8-NEXT: .LBB6_2: # %entry
; NO-FAST-P8-NEXT: fmr f1, f3
; NO-FAST-P8-NEXT: blr
;
-; NO-FAST-P9-LABEL: select_one_float:
+; NO-FAST-P9-LABEL: select_one_float_nsz:
; NO-FAST-P9: # %bb.0: # %entry
; NO-FAST-P9-NEXT: fcmpu cr0, f1, f2
; NO-FAST-P9-NEXT: crnor 4*cr5+lt, un, eq
-; NO-FAST-P9-NEXT: bc 12, 4*cr5+lt, .LBB4_2
+; NO-FAST-P9-NEXT: bc 12, 4*cr5+lt, .LBB6_2
; NO-FAST-P9-NEXT: # %bb.1: # %entry
; NO-FAST-P9-NEXT: fmr f3, f4
-; NO-FAST-P9-NEXT: .LBB4_2: # %entry
+; NO-FAST-P9-NEXT: .LBB6_2: # %entry
; NO-FAST-P9-NEXT: fmr f1, f3
; NO-FAST-P9-NEXT: blr
entry:
- %cmp = fcmp one float %a, %b
+ %cmp = fcmp nsz one float %a, %b
%cond = select i1 %cmp, float %c, float %d
ret float %cond
}
@@ -244,10 +330,10 @@ define double @select_one_double(double %a, double %b, double %c, double %d) {
; NO-FAST-P8: # %bb.0: # %entry
; NO-FAST-P8-NEXT: fcmpu cr0, f1, f2
; NO-FAST-P8-NEXT: crnor 4*cr5+lt, un, eq
-; NO-FAST-P8-NEXT: bc 12, 4*cr5+lt, .LBB5_2
+; NO-FAST-P8-NEXT: bc 12, 4*cr5+lt, .LBB7_2
; NO-FAST-P8-NEXT: # %bb.1: # %entry
; NO-FAST-P8-NEXT: fmr f3, f4
-; NO-FAST-P8-NEXT: .LBB5_2: # %entry
+; NO-FAST-P8-NEXT: .LBB7_2: # %entry
; NO-FAST-P8-NEXT: fmr f1, f3
; NO-FAST-P8-NEXT: blr
;
@@ -255,10 +341,10 @@ define double @select_one_double(double %a, double %b, double %c, double %d) {
; NO-FAST-P9: # %bb.0: # %entry
; NO-FAST-P9-NEXT: fcmpu cr0, f1, f2
; NO-FAST-P9-NEXT: crnor 4*cr5+lt, un, eq
-; NO-FAST-P9-NEXT: bc 12, 4*cr5+lt, .LBB5_2
+; NO-FAST-P9-NEXT: bc 12, 4*cr5+lt, .LBB7_2
; NO-FAST-P9-NEXT: # %bb.1: # %entry
; NO-FAST-P9-NEXT: fmr f3, f4
-; NO-FAST-P9-NEXT: .LBB5_2: # %entry
+; NO-FAST-P9-NEXT: .LBB7_2: # %entry
; NO-FAST-P9-NEXT: fmr f1, f3
; NO-FAST-P9-NEXT: blr
entry:
@@ -362,10 +448,10 @@ define float @select_oge_float(float %a, float %b, float %c, float %d) {
; NO-FAST-P8: # %bb.0: # %entry
; NO-FAST-P8-NEXT: fcmpu cr0, f1, f2
; NO-FAST-P8-NEXT: crnor 4*cr5+lt, un, lt
-; NO-FAST-P8-NEXT: bc 12, 4*cr5+lt, .LBB8_2
+; NO-FAST-P8-NEXT: bc 12, 4*cr5+lt, .LBB10_2
; NO-FAST-P8-NEXT: # %bb.1: # %entry
; NO-FAST-P8-NEXT: fmr f3, f4
-; NO-FAST-P8-NEXT: .LBB8_2: # %entry
+; NO-FAST-P8-NEXT: .LBB10_2: # %entry
; NO-FAST-P8-NEXT: fmr f1, f3
; NO-FAST-P8-NEXT: blr
;
@@ -373,10 +459,10 @@ define float @select_oge_float(float %a, float %b, float %c, float %d) {
; NO-FAST-P9: # %bb.0: # %entry
; NO-FAST-P9-NEXT: fcmpu cr0, f1, f2
; NO-FAST-P9-NEXT: crnor 4*cr5+lt, un, lt
-; NO-FAST-P9-NEXT: bc 12, 4*cr5+lt, .LBB8_2
+; NO-FAST-P9-NEXT: bc 12, 4*cr5+lt, .LBB10_2
; NO-FAST-P9-NEXT: # %bb.1: # %entry
; NO-FAST-P9-NEXT: fmr f3, f4
-; NO-FAST-P9-NEXT: .LBB8_2: # %entry
+; NO-FAST-P9-NEXT: .LBB10_2: # %entry
; NO-FAST-P9-NEXT: fmr f1, f3
; NO-FAST-P9-NEXT: blr
entry:
@@ -402,10 +488,10 @@ define double @select_oge_double(double %a, double %b, double %c, double %d) {
; NO-FAST-P8: # %bb.0: # %entry
; NO-FAST-P8-NEXT: fcmpu cr0, f1, f2
; NO-FAST-P8-NEXT: crnor 4*cr5+lt, un, lt
-; NO-FAST-P8-NEXT: bc 12, 4*cr5+lt, .LBB9_2
+; NO-FAST-P8-NEXT: bc 12, 4*cr5+lt, .LBB11_2
; NO-FAST-P8-NEXT: # %bb.1: # %entry
; NO-FAST-P8-NEXT: fmr f3, f4
-; NO-FAST-P8-NEXT: .LBB9_2: # %entry
+; NO-FAST-P8-NEXT: .LBB11_2: # %entry
; NO-FAST-P8-NEXT: fmr f1, f3
; NO-FAST-P8-NEXT: blr
;
@@ -413,10 +499,10 @@ define double @select_oge_double(double %a, double %b, double %c, double %d) {
; NO-FAST-P9: # %bb.0: # %entry
; NO-FAST-P9-NEXT: fcmpu cr0, f1, f2
; NO-FAST-P9-NEXT: crnor 4*cr5+lt, un, lt
-; NO-FAST-P9-NEXT: bc 12, 4*cr5+lt, .LBB9_2
+; NO-FAST-P9-NEXT: bc 12, 4*cr5+lt, .LBB11_2
; NO-FAST-P9-NEXT: # %bb.1: # %entry
; NO-FAST-P9-NEXT: fmr f3, f4
-; NO-FAST-P9-NEXT: .LBB9_2: # %entry
+; NO-FAST-P9-NEXT: .LBB11_2: # %entry
; NO-FAST-P9-NEXT: fmr f1, f3
; NO-FAST-P9-NEXT: blr
entry:
@@ -503,20 +589,20 @@ define float @select_olt_float(float %a, float %b, float %c, float %d) {
; NO-FAST-P8-LABEL: select_olt_float:
; NO-FAST-P8: # %bb.0: # %entry
; NO-FAST-P8-NEXT: fcmpu cr0, f1, f2
-; NO-FAST-P8-NEXT: blt cr0, .LBB12_2
+; NO-FAST-P8-NEXT: blt cr0, .LBB14_2
; NO-FAST-P8-NEXT: # %bb.1: # %entry
; NO-FAST-P8-NEXT: fmr f3, f4
-; NO-FAST-P8-NEXT: .LBB12_2: # %entry
+; NO-FAST-P8-NEXT: .LBB14_2: # %entry
; NO-FAST-P8-NEXT: fmr f1, f3
; NO-FAST-P8-NEXT: blr
;
; NO-FAST-P9-LABEL: select_olt_float:
; NO-FAST-P9: # %bb.0: # %entry
; NO-FAST-P9-NEXT: fcmpu cr0, f1, f2
-; NO-FAST-P9-NEXT: blt cr0, .LBB12_2
+; NO-FAST-P9-NEXT: blt cr0, .LBB14_2
; NO-FAST-P9-NEXT: # %bb.1: # %entry
; NO-FAST-P9-NEXT: fmr f3, f4
-; NO-FAST-P9-NEXT: .LBB12_2: # %entry
+; NO-FAST-P9-NEXT: .LBB14_2: # %entry
; NO-FAST-P9-NEXT: fmr f1, f3
; NO-FAST-P9-NEXT: blr
entry:
@@ -541,20 +627,20 @@ define double @select_olt_double(double %a, double %b, double %c, double %d) {
; NO-FAST-P8-LABEL: select_olt_double:
; NO-FAST-P8: # %bb.0: # %entry
; NO-FAST-P8-NEXT: xscmpudp cr0, f1, f2
-; NO-FAST-P8-NEXT: blt cr0, .LBB13_2
+; NO-FAST-P8-NEXT: blt cr0, .LBB15_2
; NO-FAST-P8-NEXT: # %bb.1: # %entry
; NO-FAST-P8-NEXT: fmr f3, f4
-; NO-FAST-P8-NEXT: .LBB13_2: # %entry
+; NO-FAST-P8-NEXT: .LBB15_2: # %entry
; NO-FAST-P8-NEXT: fmr f1, f3
; NO-FAST-P8-NEXT: blr
;
; NO-FAST-P9-LABEL: select_olt_double:
; NO-FAST-P9: # %bb.0: # %entry
; NO-FAST-P9-NEXT: xscmpudp cr0, f1, f2
-; NO-FAST-P9-NEXT: blt cr0, .LBB13_2
+; NO-FAST-P9-NEXT: blt cr0, .LBB15_2
; NO-FAST-P9-NEXT: # %bb.1: # %entry
; NO-FAST-P9-NEXT: fmr f3, f4
-; NO-FAST-P9-NEXT: .LBB13_2: # %entry
+; NO-FAST-P9-NEXT: .LBB15_2: # %entry
; NO-FAST-P9-NEXT: fmr f1, f3
; NO-FAST-P9-NEXT: blr
entry:
@@ -641,20 +727,20 @@ define float @select_ogt_float(float %a, float %b, float %c, float %d) {
; NO-FAST-P8-LABEL: select_ogt_float:
; NO-FAST-P8: # %bb.0: # %entry
; NO-FAST-P8-NEXT: fcmpu cr0, f1, f2
-; NO-FAST-P8-NEXT: bgt cr0, .LBB16_2
+; NO-FAST-P8-NEXT: bgt cr0, .LBB18_2
; NO-FAST-P8-NEXT: # %bb.1: # %entry
; NO-FAST-P8-NEXT: fmr f3, f4
-; NO-FAST-P8-NEXT: .LBB16_2: # %entry
+; NO-FAST-P8-NEXT: .LBB18_2: # %entry
; NO-FAST-P8-NEXT: fmr f1, f3
; NO-FAST-P8-NEXT: blr
;
; NO-FAST-P9-LABEL: select_ogt_float:
; NO-FAST-P9: # %bb.0: # %entry
; NO-FAST-P9-NEXT: fcmpu cr0, f1, f2
-; NO-FAST-P9-NEXT: bgt cr0, .LBB16_2
+; NO-FAST-P9-NEXT: bgt cr0, .LBB18_2
; NO-FAST-P9-NEXT: # %bb.1: # %entry
; NO-FAST-P9-NEXT: fmr f3, f4
-; NO-FAST-P9-NEXT: .LBB16_2: # %entry
+; NO-FAST-P9-NEXT: .LBB18_2: # %entry
; NO-FAST-P9-NEXT: fmr f1, f3
; NO-FAST-P9-NEXT: blr
entry:
@@ -679,20 +765,20 @@ define double @select_ogt_double(double %a, double %b, double %c, double %d) {
; NO-FAST-P8-LABEL: select_ogt_double:
; NO-FAST-P8: # %bb.0: # %entry
; NO-FAST-P8-NEXT: xscmpudp cr0, f1, f2
-; NO-FAST-P8-NEXT: bgt cr0, .LBB17_2
+; NO-FAST-P8-NEXT: bgt cr0, .LBB19_2
; NO-FAST-P8-NEXT: # %bb.1: # %entry
; NO-FAST-P8-NEXT: fmr f3, f4
-; NO-FAST-P8-NEXT: .LBB17_2: # %entry
+; NO-FAST-P8-NEXT: .LBB19_2: # %entry
; NO-FAST-P8-NEXT: fmr f1, f3
; NO-FAST-P8-NEXT: blr
;
; NO-FAST-P9-LABEL: select_ogt_double:
; NO-FAST-P9: # %bb.0: # %entry
; NO-FAST-P9-NEXT: xscmpudp cr0, f1, f2
-; NO-FAST-P9-NEXT: bgt cr0, .LBB17_2
+; NO-FAST-P9-NEXT: bgt cr0, .LBB19_2
; NO-FAST-P9-NEXT: # %bb.1: # %entry
; NO-FAST-P9-NEXT: fmr f3, f4
-; NO-FAST-P9-NEXT: .LBB17_2: # %entry
+; NO-FAST-P9-NEXT: .LBB19_2: # %entry
; NO-FAST-P9-NEXT: fmr f1, f3
; NO-FAST-P9-NEXT: blr
entry:
@@ -780,10 +866,10 @@ define float @select_ole_float(float %a, float %b, float %c, float %d) {
; NO-FAST-P8: # %bb.0: # %entry
; NO-FAST-P8-NEXT: fcmpu cr0, f1, f2
; NO-FAST-P8-NEXT: crnor 4*cr5+lt, un, gt
-; NO-FAST-P8-NEXT: bc 12, 4*cr5+lt, .LBB20_2
+; NO-FAST-P8-NEXT: bc 12, 4*cr5+lt, .LBB22_2
; NO-FAST-P8-NEXT: # %bb.1: # %entry
; NO-FAST-P8-NEXT: fmr f3, f4
-; NO-FAST-P8-NEXT: .LBB20_2: # %entry
+; NO-FAST-P8-NEXT: .LBB22_2: # %entry
; NO-FAST-P8-NEXT: fmr f1, f3
; NO-FAST-P8-NEXT: blr
;
@@ -791,10 +877,10 @@ define float @select_ole_float(float %a, float %b, float %c, float %d) {
; NO-FAST-P9: # %bb.0: # %entry
; NO-FAST-P9-NEXT: fcmpu cr0, f1, f2
; NO-FAST-P9-NEXT: crnor 4*cr5+lt, un, gt
-; NO-FAST-P9-NEXT: bc 12, 4*cr5+lt, .LBB20_2
+; NO-FAST-P9-NEXT: bc 12, 4*cr5+lt, .LBB22_2
; NO-FAST-P9-NEXT: # %bb.1: # %entry
; NO-FAST-P9-NEXT: fmr f3, f4
-; NO-FAST-P9-NEXT: .LBB20_2: # %entry
+; NO-FAST-P9-NEXT: .LBB22_2: # %entry
; NO-FAST-P9-NEXT: fmr f1, f3
; NO-FAST-P9-NEXT: blr
entry:
@@ -820,10 +906,10 @@ define double @select_ole_double(double %a, double %b, double %c, double %d) {
; NO-FAST-P8: # %bb.0: # %entry
; NO-FAST-P8-NEXT: fcmpu cr0, f1, f2
; NO-FAST-P8-NEXT: crnor 4*cr5+lt, un, gt
-; NO-FAST-P8-NEXT: bc 12, 4*cr5+lt, .LBB21_2
+; NO-FAST-P8-NEXT: bc 12, 4*cr5+lt, .LBB23_2
; NO-FAST-P8-NEXT: # %bb.1: # %entry
; NO-FAST-P8-NEXT: fmr f3, f4
-; NO-FAST-P8-NEXT: .LBB21_2: # %entry
+; NO-FAST-P8-NEXT: .LBB23_2: # %entry
; NO-FAST-P8-NEXT: fmr f1, f3
; NO-FAST-P8-NEXT: blr
;
@@ -831,10 +917,10 @@ define double @select_ole_double(double %a, double %b, double %c, double %d) {
; NO-FAST-P9: # %bb.0: # %entry
; NO-FAST-P9-NEXT: fcmpu cr0, f1, f2
; NO-FAST-P9-NEXT: crnor 4*cr5+lt, un, gt
-; NO-FAST-P9-NEXT: bc 12, 4*cr5+lt, .LBB21_2
+; NO-FAST-P9-NEXT: bc 12, 4*cr5+lt, .LBB23_2
; NO-FAST-P9-NEXT: # %bb.1: # %entry
; NO-FAST-P9-NEXT: fmr f3, f4
-; NO-FAST-P9-NEXT: .LBB21_2: # %entry
+; NO-FAST-P9-NEXT: .LBB23_2: # %entry
; NO-FAST-P9-NEXT: fmr f1, f3
; NO-FAST-P9-NEXT: blr
entry:
@@ -926,13 +1012,13 @@ define double @onecmp1(double %a, double %y, double %z) {
; NO-FAST-P8-NEXT: vspltisw v2, 1
; NO-FAST-P8-NEXT: xvcvsxwdp vs0, vs34
; NO-FAST-P8-NEXT: fcmpu cr0, f1, f0
-; NO-FAST-P8-NEXT: bc 12, lt, .LBB24_3
+; NO-FAST-P8-NEXT: bc 12, lt, .LBB26_3
; NO-FAST-P8-NEXT: # %bb.1: # %entry
; NO-FAST-P8-NEXT: fcmpu cr0, f1, f1
-; NO-FAST-P8-NEXT: bc 12, un, .LBB24_3
+; NO-FAST-P8-NEXT: bc 12, un, .LBB26_3
; NO-FAST-P8-NEXT: # %bb.2: # %entry
; NO-FAST-P8-NEXT: fmr f3, f2
-; NO-FAST-P8-NEXT: .LBB24_3: # %entry
+; NO-FAST-P8-NEXT: .LBB26_3: # %entry
; NO-FAST-P8-NEXT: fmr f1, f3
; NO-FAST-P8-NEXT: blr
;
@@ -941,13 +1027,13 @@ define double @onecmp1(double %a, double %y, double %z) {
; NO-FAST-P9-NEXT: vspltisw v2, 1
; NO-FAST-P9-NEXT: xvcvsxwdp vs0, vs34
; NO-FAST-P9-NEXT: fcmpu cr0, f1, f0
-; NO-FAST-P9-NEXT: bc 12, lt, .LBB24_3
+; NO-FAST-P9-NEXT: bc 12, lt, .LBB26_3
; NO-FAST-P9-NEXT: # %bb.1: # %entry
; NO-FAST-P9-NEXT: fcmpu cr0, f1, f1
-; NO-FAST-P9-NEXT: bc 12, un, .LBB24_3
+; NO-FAST-P9-NEXT: bc 12, un, .LBB26_3
; NO-FAST-P9-NEXT: # %bb.2: # %entry
; NO-FAST-P9-NEXT: fmr f3, f2
-; NO-FAST-P9-NEXT: .LBB24_3: # %entry
+; NO-FAST-P9-NEXT: .LBB26_3: # %entry
; NO-FAST-P9-NEXT: fmr f1, f3
; NO-FAST-P9-NEXT: blr
entry:
@@ -978,10 +1064,10 @@ define double @onecmp2(double %a, double %y, double %z) {
; NO-FAST-P8-NEXT: vspltisw v2, 1
; NO-FAST-P8-NEXT: xvcvsxwdp vs0, vs34
; NO-FAST-P8-NEXT: xscmpudp cr0, f1, f0
-; NO-FAST-P8-NEXT: bgt cr0, .LBB25_2
+; NO-FAST-P8-NEXT: bgt cr0, .LBB27_2
; NO-FAST-P8-NEXT: # %bb.1: # %entry
; NO-FAST-P8-NEXT: fmr f2, f3
-; NO-FAST-P8-NEXT: .LBB25_2: # %entry
+; NO-FAST-P8-NEXT: .LBB27_2: # %entry
; NO-FAST-P8-NEXT: fmr f1, f2
; NO-FAST-P8-NEXT: blr
;
@@ -990,10 +1076,10 @@ define double @onecmp2(double %a, double %y, double %z) {
; NO-FAST-P9-NEXT: vspltisw v2, 1
; NO-FAST-P9-NEXT: xvcvsxwdp vs0, vs34
; NO-FAST-P9-NEXT: xscmpudp cr0, f1, f0
-; NO-FAST-P9-NEXT: bgt cr0, .LBB25_2
+; NO-FAST-P9-NEXT: bgt cr0, .LBB27_2
; NO-FAST-P9-NEXT: # %bb.1: # %entry
; NO-FAST-P9-NEXT: fmr f2, f3
-; NO-FAST-P9-NEXT: .LBB25_2: # %entry
+; NO-FAST-P9-NEXT: .LBB27_2: # %entry
; NO-FAST-P9-NEXT: fmr f1, f2
; NO-FAST-P9-NEXT: blr
entry:
@@ -1028,10 +1114,10 @@ define double @onecmp3(double %a, double %y, double %z) {
; NO-FAST-P8-NEXT: vspltisw v2, 1
; NO-FAST-P8-NEXT: xvcvsxwdp vs0, vs34
; NO-FAST-P8-NEXT: xscmpudp cr0, f1, f0
-; NO-FAST-P8-NEXT: beq cr0, .LBB26_2
+; NO-FAST-P8-NEXT: beq cr0, .LBB28_2
; NO-FAST-P8-NEXT: # %bb.1: # %entry
; NO-FAST-P8-NEXT: fmr f2, f3
-; NO-FAST-P8-NEXT: .LBB26_2: # %entry
+; NO-FAST-P8-NEXT: .LBB28_2: # %entry
; NO-FAST-P8-NEXT: fmr f1, f2
; NO-FAST-P8-NEXT: blr
;
@@ -1040,10 +1126,10 @@ define double @onecmp3(double %a, double %y, double %z) {
; NO-FAST-P9-NEXT: vspltisw v2, 1
; NO-FAST-P9-NEXT: xvcvsxwdp vs0, vs34
; NO-FAST-P9-NEXT: xscmpudp cr0, f1, f0
-; NO-FAST-P9-NEXT: beq cr0, .LBB26_2
+; NO-FAST-P9-NEXT: beq cr0, .LBB28_2
; NO-FAST-P9-NEXT: # %bb.1: # %entry
; NO-FAST-P9-NEXT: fmr f2, f3
-; NO-FAST-P9-NEXT: .LBB26_2: # %entry
+; NO-FAST-P9-NEXT: .LBB28_2: # %entry
; NO-FAST-P9-NEXT: fmr f1, f2
; NO-FAST-P9-NEXT: blr
entry:
diff --git a/llvm/test/CodeGen/X86/negative-sin.ll b/llvm/test/CodeGen/X86/negative-sin.ll
index f24507d3a4f38..4836da2ad7797 100644
--- a/llvm/test/CodeGen/X86/negative-sin.ll
+++ b/llvm/test/CodeGen/X86/negative-sin.ll
@@ -82,18 +82,13 @@ define double @semi_strict2(double %e) nounwind {
ret double %h
}
-; FIXME:
-; Auto-upgrade function attribute to IR-level fast-math-flags.
-
-define double @fn_attr(double %e) nounwind #0 {
-; CHECK-LABEL: fn_attr:
+define double @nsz_flag(double %e) nounwind {
+; CHECK-LABEL: nsz_flag:
; CHECK: # %bb.0:
; CHECK-NEXT: jmp sin at PLT # TAILCALL
- %f = fsub double 0.0, %e
- %g = call double @sin(double %f) readonly
- %h = fsub double 0.0, %g
+ %f = fsub nsz double 0.0, %e
+ %g = call nsz double @sin(double %f) readonly
+ %h = fsub nsz double 0.0, %g
ret double %h
}
-attributes #0 = { "unsafe-fp-math"="true" "no-signed-zeros-fp-math"="true" }
-
More information about the llvm-commits
mailing list