[llvm] 9356ec1 - CodeGen: Reorder case handling for is.fpclass legalization
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 17 08:29:58 PDT 2023
Author: Matt Arsenault
Date: 2023-03-17T11:29:50-04:00
New Revision: 9356ec1516cae84837629ef19b7158c0e1155852
URL: https://github.com/llvm/llvm-project/commit/9356ec1516cae84837629ef19b7158c0e1155852
DIFF: https://github.com/llvm/llvm-project/commit/9356ec1516cae84837629ef19b7158c0e1155852.diff
LOG: CodeGen: Reorder case handling for is.fpclass legalization
Subnormal and zero checks can be combined into one, so move
the code closer to reduce the diff in a future change.
Added:
Modified:
llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
llvm/test/CodeGen/X86/is_fpclass.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 1406fecca1a2a..f0a5b2ebcb3ae 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -7438,6 +7438,20 @@ LegalizerHelper::lowerISFPCLASS(MachineInstr &MI) {
AsInt, SignBitC));
}
+ if (FPClassTest PartialCheck = Mask & fcSubnormal) {
+ // issubnormal(V) ==> unsigned(abs(V) - 1) u< (all mantissa bits set)
+ // issubnormal(V) && V>0 ==> unsigned(V - 1) u< (all mantissa bits set)
+ auto V = (PartialCheck == fcPosSubnormal) ? AsInt : Abs;
+ auto OneC = MIRBuilder.buildConstant(IntTy, 1);
+ auto VMinusOne = MIRBuilder.buildSub(IntTy, V, OneC);
+ auto SubnormalRes =
+ MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, VMinusOne,
+ MIRBuilder.buildConstant(IntTy, AllOneMantissa));
+ if (PartialCheck == fcNegSubnormal)
+ SubnormalRes = MIRBuilder.buildAnd(DstTy, SubnormalRes, Sign);
+ appendToRes(SubnormalRes);
+ }
+
if (FPClassTest PartialCheck = Mask & fcInf) {
if (PartialCheck == fcPosInf)
appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
@@ -7474,20 +7488,6 @@ LegalizerHelper::lowerISFPCLASS(MachineInstr &MI) {
}
}
- if (FPClassTest PartialCheck = Mask & fcSubnormal) {
- // issubnormal(V) ==> unsigned(abs(V) - 1) u< (all mantissa bits set)
- // issubnormal(V) && V>0 ==> unsigned(V - 1) u< (all mantissa bits set)
- auto V = (PartialCheck == fcPosSubnormal) ? AsInt : Abs;
- auto OneC = MIRBuilder.buildConstant(IntTy, 1);
- auto VMinusOne = MIRBuilder.buildSub(IntTy, V, OneC);
- auto SubnormalRes =
- MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, VMinusOne,
- MIRBuilder.buildConstant(IntTy, AllOneMantissa));
- if (PartialCheck == fcNegSubnormal)
- SubnormalRes = MIRBuilder.buildAnd(DstTy, SubnormalRes, Sign);
- appendToRes(SubnormalRes);
- }
-
if (FPClassTest PartialCheck = Mask & fcNormal) {
// isnormal(V) ==> (0 u< exp u< max_exp) ==> (unsigned(exp-1) u<
// (max_exp-1))
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 362330cd0ae20..6d0de75579a60 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -8149,6 +8149,19 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
appendResult(PartialRes);
}
+ if (unsigned PartialCheck = Test & fcSubnormal) {
+ // issubnormal(V) ==> unsigned(abs(V) - 1) < (all mantissa bits set)
+ // issubnormal(V) && V>0 ==> unsigned(V - 1) < (all mantissa bits set)
+ SDValue V = (PartialCheck == fcPosSubnormal) ? OpAsInt : AbsV;
+ SDValue MantissaV = DAG.getConstant(AllOneMantissa, DL, IntVT);
+ SDValue VMinusOneV =
+ DAG.getNode(ISD::SUB, DL, IntVT, V, DAG.getConstant(1, DL, IntVT));
+ PartialRes = DAG.getSetCC(DL, ResultVT, VMinusOneV, MantissaV, ISD::SETULT);
+ if (PartialCheck == fcNegSubnormal)
+ PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
+ appendResult(PartialRes);
+ }
+
if (unsigned PartialCheck = Test & fcInf) {
if (PartialCheck == fcPosInf)
PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, InfV, ISD::SETEQ);
@@ -8193,19 +8206,6 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
appendResult(PartialRes);
}
- if (unsigned PartialCheck = Test & fcSubnormal) {
- // issubnormal(V) ==> unsigned(abs(V) - 1) < (all mantissa bits set)
- // issubnormal(V) && V>0 ==> unsigned(V - 1) < (all mantissa bits set)
- SDValue V = (PartialCheck == fcPosSubnormal) ? OpAsInt : AbsV;
- SDValue MantissaV = DAG.getConstant(AllOneMantissa, DL, IntVT);
- SDValue VMinusOneV =
- DAG.getNode(ISD::SUB, DL, IntVT, V, DAG.getConstant(1, DL, IntVT));
- PartialRes = DAG.getSetCC(DL, ResultVT, VMinusOneV, MantissaV, ISD::SETULT);
- if (PartialCheck == fcNegSubnormal)
- PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
- appendResult(PartialRes);
- }
-
if (unsigned PartialCheck = Test & fcNormal) {
// isnormal(V) ==> (0 < exp < max_exp) ==> (unsigned(exp-1) < (max_exp-1))
APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
index 03d9f77acdf2f..35090ec6cef7f 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
@@ -1758,16 +1758,16 @@ define i1 @not_isnormal_f16(half %x) {
; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0
; GFX7GLISEL-NEXT: v_bfe_u32 v1, v0, 0, 16
-; GFX7GLISEL-NEXT: s_movk_i32 s6, 0x7c00
+; GFX7GLISEL-NEXT: v_subrev_i32_e64 v0, s[4:5], 1, v0
+; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x3ff
; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; GFX7GLISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], s6, v1
+; GFX7GLISEL-NEXT: v_cmp_lt_u32_e64 s[4:5], v0, v2
+; GFX7GLISEL-NEXT: s_movk_i32 s6, 0x7c00
; GFX7GLISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
-; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, s6, v1
+; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, s6, v1
; GFX7GLISEL-NEXT: s_or_b64 s[4:5], s[4:5], vcc
-; GFX7GLISEL-NEXT: v_subrev_i32_e32 v0, vcc, 1, v0
-; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x3ff
-; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1
+; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, s6, v1
; GFX7GLISEL-NEXT: s_or_b64 s[4:5], s[4:5], vcc
; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31]
@@ -1827,19 +1827,19 @@ define i1 @not_is_plus_normal_f16(half %x) {
; GFX7GLISEL: ; %bb.0:
; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0x7fff, v0
+; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7GLISEL-NEXT: v_bfe_u32 v2, v1, 0, 16
-; GFX7GLISEL-NEXT: s_movk_i32 s8, 0x7c00
-; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
-; GFX7GLISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], s8, v2
+; GFX7GLISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], v0, v2
+; GFX7GLISEL-NEXT: v_subrev_i32_e64 v0, s[6:7], 1, v1
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX7GLISEL-NEXT: v_mov_b32_e32 v3, 0x3ff
+; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
+; GFX7GLISEL-NEXT: v_cmp_lt_u32_e64 s[6:7], v0, v3
+; GFX7GLISEL-NEXT: s_movk_i32 s8, 0x7c00
; GFX7GLISEL-NEXT: s_or_b64 s[6:7], vcc, s[6:7]
-; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, s8, v2
-; GFX7GLISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], v0, v2
+; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, s8, v2
; GFX7GLISEL-NEXT: s_or_b64 s[6:7], s[6:7], vcc
-; GFX7GLISEL-NEXT: v_subrev_i32_e32 v0, vcc, 1, v1
-; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x3ff
-; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v0, v2
+; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, s8, v2
; GFX7GLISEL-NEXT: s_or_b64 s[6:7], s[6:7], vcc
; GFX7GLISEL-NEXT: v_subrev_i32_e32 v0, vcc, 0x400, v1
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
@@ -1905,19 +1905,19 @@ define i1 @not_is_neg_normal_f16(half %x) {
; GFX7GLISEL: ; %bb.0:
; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0x7fff, v0
+; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7GLISEL-NEXT: v_bfe_u32 v2, v1, 0, 16
-; GFX7GLISEL-NEXT: s_movk_i32 s8, 0x7c00
-; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
-; GFX7GLISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], s8, v2
+; GFX7GLISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v0, v2
+; GFX7GLISEL-NEXT: v_subrev_i32_e64 v0, s[6:7], 1, v1
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX7GLISEL-NEXT: v_mov_b32_e32 v3, 0x3ff
+; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
+; GFX7GLISEL-NEXT: v_cmp_lt_u32_e64 s[6:7], v0, v3
+; GFX7GLISEL-NEXT: s_movk_i32 s8, 0x7c00
; GFX7GLISEL-NEXT: s_or_b64 s[6:7], vcc, s[6:7]
-; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, s8, v2
-; GFX7GLISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v0, v2
+; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, s8, v2
; GFX7GLISEL-NEXT: s_or_b64 s[6:7], s[6:7], vcc
-; GFX7GLISEL-NEXT: v_subrev_i32_e32 v0, vcc, 1, v1
-; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x3ff
-; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v0, v2
+; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, s8, v2
; GFX7GLISEL-NEXT: s_or_b64 s[6:7], s[6:7], vcc
; GFX7GLISEL-NEXT: v_subrev_i32_e32 v0, vcc, 0x400, v1
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
@@ -2155,15 +2155,15 @@ define i1 @not_iszero_f16(half %x) {
; GFX7GLISEL: ; %bb.0:
; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0
-; GFX7GLISEL-NEXT: v_bfe_u32 v1, v0, 0, 16
-; GFX7GLISEL-NEXT: s_movk_i32 s4, 0x7c00
-; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, s4, v1
-; GFX7GLISEL-NEXT: v_cmp_lt_u32_e64 s[4:5], s4, v1
-; GFX7GLISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
; GFX7GLISEL-NEXT: v_subrev_i32_e32 v1, vcc, 1, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x3ff
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v1, v2
+; GFX7GLISEL-NEXT: v_bfe_u32 v1, v0, 0, 16
+; GFX7GLISEL-NEXT: s_movk_i32 s6, 0x7c00
+; GFX7GLISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], s6, v1
+; GFX7GLISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
+; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, s6, v1
; GFX7GLISEL-NEXT: s_or_b64 s[4:5], s[4:5], vcc
; GFX7GLISEL-NEXT: v_subrev_i32_e32 v0, vcc, 0x400, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
@@ -2659,7 +2659,7 @@ define i1 @not_iszero_or_nan_f16(half %x) {
; GFX7SELDAG-NEXT: v_add_i32_e64 v1, s[4:5], -1, v0
; GFX7SELDAG-NEXT: s_movk_i32 s4, 0x3ff
; GFX7SELDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], s4, v1
-; GFX7SELDAG-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
+; GFX7SELDAG-NEXT: s_or_b64 s[4:5], s[4:5], vcc
; GFX7SELDAG-NEXT: v_add_i32_e32 v0, vcc, 0xfffffc00, v0
; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7SELDAG-NEXT: v_cmp_gt_u32_e32 vcc, s6, v0
@@ -2671,13 +2671,13 @@ define i1 @not_iszero_or_nan_f16(half %x) {
; GFX7GLISEL: ; %bb.0: ; %entry
; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0
-; GFX7GLISEL-NEXT: v_bfe_u32 v1, v0, 0, 16
-; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x7c00
-; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
-; GFX7GLISEL-NEXT: v_subrev_i32_e64 v1, s[4:5], 1, v0
+; GFX7GLISEL-NEXT: v_subrev_i32_e32 v1, vcc, 1, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x3ff
-; GFX7GLISEL-NEXT: v_cmp_lt_u32_e64 s[4:5], v1, v2
+; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v1, v2
+; GFX7GLISEL-NEXT: v_bfe_u32 v1, v0, 0, 16
+; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x7c00
+; GFX7GLISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v2
; GFX7GLISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
; GFX7GLISEL-NEXT: v_subrev_i32_e32 v0, vcc, 0x400, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
@@ -2735,7 +2735,7 @@ define i1 @not_iszero_or_nan_f_daz(half %x) #0 {
; GFX7SELDAG-NEXT: v_add_i32_e64 v1, s[4:5], -1, v0
; GFX7SELDAG-NEXT: s_movk_i32 s4, 0x3ff
; GFX7SELDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], s4, v1
-; GFX7SELDAG-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
+; GFX7SELDAG-NEXT: s_or_b64 s[4:5], s[4:5], vcc
; GFX7SELDAG-NEXT: v_add_i32_e32 v0, vcc, 0xfffffc00, v0
; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7SELDAG-NEXT: v_cmp_gt_u32_e32 vcc, s6, v0
@@ -2747,13 +2747,13 @@ define i1 @not_iszero_or_nan_f_daz(half %x) #0 {
; GFX7GLISEL: ; %bb.0: ; %entry
; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0
-; GFX7GLISEL-NEXT: v_bfe_u32 v1, v0, 0, 16
-; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x7c00
-; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
-; GFX7GLISEL-NEXT: v_subrev_i32_e64 v1, s[4:5], 1, v0
+; GFX7GLISEL-NEXT: v_subrev_i32_e32 v1, vcc, 1, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x3ff
-; GFX7GLISEL-NEXT: v_cmp_lt_u32_e64 s[4:5], v1, v2
+; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v1, v2
+; GFX7GLISEL-NEXT: v_bfe_u32 v1, v0, 0, 16
+; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x7c00
+; GFX7GLISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v2
; GFX7GLISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
; GFX7GLISEL-NEXT: v_subrev_i32_e32 v0, vcc, 0x400, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
@@ -2811,7 +2811,7 @@ define i1 @not_iszero_or_nan_f_maybe_daz(half %x) #1 {
; GFX7SELDAG-NEXT: v_add_i32_e64 v1, s[4:5], -1, v0
; GFX7SELDAG-NEXT: s_movk_i32 s4, 0x3ff
; GFX7SELDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], s4, v1
-; GFX7SELDAG-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
+; GFX7SELDAG-NEXT: s_or_b64 s[4:5], s[4:5], vcc
; GFX7SELDAG-NEXT: v_add_i32_e32 v0, vcc, 0xfffffc00, v0
; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7SELDAG-NEXT: v_cmp_gt_u32_e32 vcc, s6, v0
@@ -2823,13 +2823,13 @@ define i1 @not_iszero_or_nan_f_maybe_daz(half %x) #1 {
; GFX7GLISEL: ; %bb.0: ; %entry
; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0
-; GFX7GLISEL-NEXT: v_bfe_u32 v1, v0, 0, 16
-; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x7c00
-; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
-; GFX7GLISEL-NEXT: v_subrev_i32_e64 v1, s[4:5], 1, v0
+; GFX7GLISEL-NEXT: v_subrev_i32_e32 v1, vcc, 1, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x3ff
-; GFX7GLISEL-NEXT: v_cmp_lt_u32_e64 s[4:5], v1, v2
+; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v1, v2
+; GFX7GLISEL-NEXT: v_bfe_u32 v1, v0, 0, 16
+; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x7c00
+; GFX7GLISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v2
; GFX7GLISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
; GFX7GLISEL-NEXT: v_subrev_i32_e32 v0, vcc, 0x400, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
@@ -3009,18 +3009,18 @@ define i1 @not_iszero_or_qnan_f16(half %x) {
; GFX7SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7SELDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX7SELDAG-NEXT: s_movk_i32 s4, 0x7e00
-; GFX7SELDAG-NEXT: s_movk_i32 s6, 0x7c00
+; GFX7SELDAG-NEXT: s_movk_i32 s8, 0x7c00
; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0x7fff, v0
; GFX7SELDAG-NEXT: v_cmp_gt_i32_e32 vcc, s4, v0
-; GFX7SELDAG-NEXT: v_cmp_lt_i32_e64 s[4:5], s6, v0
-; GFX7SELDAG-NEXT: s_and_b64 s[4:5], s[4:5], vcc
-; GFX7SELDAG-NEXT: v_cmp_eq_u32_e32 vcc, s6, v0
-; GFX7SELDAG-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
-; GFX7SELDAG-NEXT: v_add_i32_e32 v1, vcc, -1, v0
-; GFX7SELDAG-NEXT: s_movk_i32 s6, 0x3ff
-; GFX7SELDAG-NEXT: v_cmp_gt_u32_e32 vcc, s6, v1
+; GFX7SELDAG-NEXT: v_cmp_lt_i32_e64 s[4:5], s8, v0
+; GFX7SELDAG-NEXT: s_and_b64 s[6:7], s[4:5], vcc
+; GFX7SELDAG-NEXT: v_add_i32_e64 v1, s[4:5], -1, v0
+; GFX7SELDAG-NEXT: s_movk_i32 s4, 0x3ff
+; GFX7SELDAG-NEXT: v_cmp_eq_u32_e32 vcc, s8, v0
+; GFX7SELDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], s4, v1
; GFX7SELDAG-NEXT: s_or_b64 s[4:5], s[4:5], vcc
; GFX7SELDAG-NEXT: v_add_i32_e32 v0, vcc, 0xfffffc00, v0
+; GFX7SELDAG-NEXT: s_or_b64 s[4:5], s[4:5], s[6:7]
; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7SELDAG-NEXT: s_movk_i32 s6, 0x7800
; GFX7SELDAG-NEXT: v_cmp_gt_u32_e32 vcc, s6, v0
@@ -3032,22 +3032,22 @@ define i1 @not_iszero_or_qnan_f16(half %x) {
; GFX7GLISEL: ; %bb.0: ; %entry
; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0
-; GFX7GLISEL-NEXT: v_bfe_u32 v1, v0, 0, 16
-; GFX7GLISEL-NEXT: s_movk_i32 s4, 0x7c00
-; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x7e00
-; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, s4, v1
-; GFX7GLISEL-NEXT: v_cmp_lt_u32_e64 s[4:5], s4, v1
-; GFX7GLISEL-NEXT: v_cmp_lt_u32_e64 s[6:7], v1, v2
-; GFX7GLISEL-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7]
-; GFX7GLISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
; GFX7GLISEL-NEXT: v_subrev_i32_e32 v1, vcc, 1, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x3ff
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v1, v2
-; GFX7GLISEL-NEXT: s_or_b64 s[4:5], s[4:5], vcc
+; GFX7GLISEL-NEXT: v_bfe_u32 v1, v0, 0, 16
+; GFX7GLISEL-NEXT: s_movk_i32 s8, 0x7c00
+; GFX7GLISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], s8, v1
+; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x7e00
+; GFX7GLISEL-NEXT: s_or_b64 s[6:7], vcc, s[4:5]
+; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, s8, v1
+; GFX7GLISEL-NEXT: v_cmp_lt_u32_e64 s[4:5], v1, v2
+; GFX7GLISEL-NEXT: s_and_b64 s[4:5], vcc, s[4:5]
; GFX7GLISEL-NEXT: v_subrev_i32_e32 v0, vcc, 0x400, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7800
+; GFX7GLISEL-NEXT: s_or_b64 s[4:5], s[6:7], s[4:5]
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1
; GFX7GLISEL-NEXT: s_or_b64 s[4:5], s[4:5], vcc
; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
@@ -3094,15 +3094,15 @@ define i1 @not_iszero_or_snan_f16(half %x) {
; GFX7SELDAG: ; %bb.0: ; %entry
; GFX7SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7SELDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7SELDAG-NEXT: s_movk_i32 s4, 0x7dff
-; GFX7SELDAG-NEXT: s_movk_i32 s5, 0x7c00
-; GFX7SELDAG-NEXT: s_movk_i32 s6, 0x3ff
+; GFX7SELDAG-NEXT: s_movk_i32 s4, 0x7c00
+; GFX7SELDAG-NEXT: s_movk_i32 s6, 0x7dff
; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0x7fff, v0
-; GFX7SELDAG-NEXT: v_cmp_lt_i32_e32 vcc, s4, v0
-; GFX7SELDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], s5, v0
+; GFX7SELDAG-NEXT: v_cmp_eq_u32_e32 vcc, s4, v0
+; GFX7SELDAG-NEXT: v_add_i32_e64 v1, s[4:5], -1, v0
+; GFX7SELDAG-NEXT: s_movk_i32 s4, 0x3ff
+; GFX7SELDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], s4, v1
; GFX7SELDAG-NEXT: s_or_b64 s[4:5], s[4:5], vcc
-; GFX7SELDAG-NEXT: v_add_i32_e32 v1, vcc, -1, v0
-; GFX7SELDAG-NEXT: v_cmp_gt_u32_e32 vcc, s6, v1
+; GFX7SELDAG-NEXT: v_cmp_lt_i32_e32 vcc, s6, v0
; GFX7SELDAG-NEXT: s_or_b64 s[4:5], s[4:5], vcc
; GFX7SELDAG-NEXT: v_add_i32_e32 v0, vcc, 0xfffffc00, v0
; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
@@ -3116,16 +3116,16 @@ define i1 @not_iszero_or_snan_f16(half %x) {
; GFX7GLISEL: ; %bb.0: ; %entry
; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0
-; GFX7GLISEL-NEXT: v_bfe_u32 v1, v0, 0, 16
-; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x7c00
-; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
-; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x7e00
-; GFX7GLISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v2
-; GFX7GLISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
; GFX7GLISEL-NEXT: v_subrev_i32_e32 v1, vcc, 1, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x3ff
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v1, v2
+; GFX7GLISEL-NEXT: v_bfe_u32 v1, v0, 0, 16
+; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x7c00
+; GFX7GLISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v2
+; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x7e00
+; GFX7GLISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
+; GFX7GLISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v2
; GFX7GLISEL-NEXT: s_or_b64 s[4:5], s[4:5], vcc
; GFX7GLISEL-NEXT: v_subrev_i32_e32 v0, vcc, 0x400, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
diff --git a/llvm/test/CodeGen/X86/is_fpclass.ll b/llvm/test/CodeGen/X86/is_fpclass.ll
index 18a5b2737bbe8..a12efe7aa8a25 100644
--- a/llvm/test/CodeGen/X86/is_fpclass.ll
+++ b/llvm/test/CodeGen/X86/is_fpclass.ll
@@ -1922,6 +1922,9 @@ entry:
define i1 @not_iszero_or_qnan_f(float %x) {
; CHECK-32-LABEL: not_iszero_or_qnan_f:
; CHECK-32: # %bb.0: # %entry
+; CHECK-32-NEXT: pushl %esi
+; CHECK-32-NEXT: .cfi_def_cfa_offset 8
+; CHECK-32-NEXT: .cfi_offset %esi, -8
; CHECK-32-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
; CHECK-32-NEXT: andl {{[0-9]+}}(%esp), %eax
; CHECK-32-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000
@@ -1931,15 +1934,17 @@ define i1 @not_iszero_or_qnan_f(float %x) {
; CHECK-32-NEXT: andb %cl, %dl
; CHECK-32-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
; CHECK-32-NEXT: sete %cl
-; CHECK-32-NEXT: orb %dl, %cl
-; CHECK-32-NEXT: leal -1(%eax), %edx
-; CHECK-32-NEXT: cmpl $8388607, %edx # imm = 0x7FFFFF
-; CHECK-32-NEXT: setb %dl
+; CHECK-32-NEXT: leal -1(%eax), %esi
+; CHECK-32-NEXT: cmpl $8388607, %esi # imm = 0x7FFFFF
+; CHECK-32-NEXT: setb %ch
+; CHECK-32-NEXT: orb %cl, %ch
; CHECK-32-NEXT: addl $-8388608, %eax # imm = 0xFF800000
; CHECK-32-NEXT: cmpl $2130706432, %eax # imm = 0x7F000000
; CHECK-32-NEXT: setb %al
; CHECK-32-NEXT: orb %dl, %al
-; CHECK-32-NEXT: orb %cl, %al
+; CHECK-32-NEXT: orb %ch, %al
+; CHECK-32-NEXT: popl %esi
+; CHECK-32-NEXT: .cfi_def_cfa_offset 4
; CHECK-32-NEXT: retl
;
; CHECK-64-LABEL: not_iszero_or_qnan_f:
@@ -1953,15 +1958,15 @@ define i1 @not_iszero_or_qnan_f(float %x) {
; CHECK-64-NEXT: andb %cl, %dl
; CHECK-64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
; CHECK-64-NEXT: sete %cl
-; CHECK-64-NEXT: orb %dl, %cl
-; CHECK-64-NEXT: leal -1(%rax), %edx
-; CHECK-64-NEXT: cmpl $8388607, %edx # imm = 0x7FFFFF
-; CHECK-64-NEXT: setb %dl
+; CHECK-64-NEXT: leal -1(%rax), %esi
+; CHECK-64-NEXT: cmpl $8388607, %esi # imm = 0x7FFFFF
+; CHECK-64-NEXT: setb %sil
+; CHECK-64-NEXT: orb %cl, %sil
; CHECK-64-NEXT: addl $-8388608, %eax # imm = 0xFF800000
; CHECK-64-NEXT: cmpl $2130706432, %eax # imm = 0x7F000000
; CHECK-64-NEXT: setb %al
; CHECK-64-NEXT: orb %dl, %al
-; CHECK-64-NEXT: orb %cl, %al
+; CHECK-64-NEXT: orb %sil, %al
; CHECK-64-NEXT: retq
entry:
%0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 925) ; ~(0x60|0x2) = "~(zero|qnan)"
@@ -1973,38 +1978,38 @@ define i1 @not_iszero_or_snan_f(float %x) {
; CHECK-32: # %bb.0: # %entry
; CHECK-32-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
; CHECK-32-NEXT: andl {{[0-9]+}}(%esp), %eax
-; CHECK-32-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000
-; CHECK-32-NEXT: setge %cl
; CHECK-32-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
-; CHECK-32-NEXT: sete %dl
-; CHECK-32-NEXT: orb %cl, %dl
-; CHECK-32-NEXT: leal -1(%eax), %ecx
-; CHECK-32-NEXT: cmpl $8388607, %ecx # imm = 0x7FFFFF
-; CHECK-32-NEXT: setb %cl
-; CHECK-32-NEXT: orb %dl, %cl
+; CHECK-32-NEXT: sete %cl
+; CHECK-32-NEXT: leal -1(%eax), %edx
+; CHECK-32-NEXT: cmpl $8388607, %edx # imm = 0x7FFFFF
+; CHECK-32-NEXT: setb %dl
+; CHECK-32-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000
+; CHECK-32-NEXT: setge %ch
+; CHECK-32-NEXT: orb %cl, %ch
+; CHECK-32-NEXT: orb %dl, %ch
; CHECK-32-NEXT: addl $-8388608, %eax # imm = 0xFF800000
; CHECK-32-NEXT: cmpl $2130706432, %eax # imm = 0x7F000000
; CHECK-32-NEXT: setb %al
-; CHECK-32-NEXT: orb %cl, %al
+; CHECK-32-NEXT: orb %ch, %al
; CHECK-32-NEXT: retl
;
; CHECK-64-LABEL: not_iszero_or_snan_f:
; CHECK-64: # %bb.0: # %entry
; CHECK-64-NEXT: movd %xmm0, %eax
; CHECK-64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
-; CHECK-64-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000
-; CHECK-64-NEXT: setge %cl
; CHECK-64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
-; CHECK-64-NEXT: sete %dl
-; CHECK-64-NEXT: orb %cl, %dl
-; CHECK-64-NEXT: leal -1(%rax), %ecx
-; CHECK-64-NEXT: cmpl $8388607, %ecx # imm = 0x7FFFFF
-; CHECK-64-NEXT: setb %cl
-; CHECK-64-NEXT: orb %dl, %cl
+; CHECK-64-NEXT: sete %cl
+; CHECK-64-NEXT: leal -1(%rax), %edx
+; CHECK-64-NEXT: cmpl $8388607, %edx # imm = 0x7FFFFF
+; CHECK-64-NEXT: setb %dl
+; CHECK-64-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000
+; CHECK-64-NEXT: setge %sil
+; CHECK-64-NEXT: orb %cl, %sil
+; CHECK-64-NEXT: orb %dl, %sil
; CHECK-64-NEXT: addl $-8388608, %eax # imm = 0xFF800000
; CHECK-64-NEXT: cmpl $2130706432, %eax # imm = 0x7F000000
; CHECK-64-NEXT: setb %al
-; CHECK-64-NEXT: orb %cl, %al
+; CHECK-64-NEXT: orb %sil, %al
; CHECK-64-NEXT: retq
entry:
%0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 926) ; ~(0x60|0x1) = "~(zero|snan)"
More information about the llvm-commits
mailing list