[llvm] 9356ec1 - CodeGen: Reorder case handling for is.fpclass legalization

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 17 08:29:58 PDT 2023


Author: Matt Arsenault
Date: 2023-03-17T11:29:50-04:00
New Revision: 9356ec1516cae84837629ef19b7158c0e1155852

URL: https://github.com/llvm/llvm-project/commit/9356ec1516cae84837629ef19b7158c0e1155852
DIFF: https://github.com/llvm/llvm-project/commit/9356ec1516cae84837629ef19b7158c0e1155852.diff

LOG: CodeGen: Reorder case handling for is.fpclass legalization

Subnormal and zero checks can be combined into one, so move
the code closer to reduce the diff in a future change.

Added: 
    

Modified: 
    llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
    llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
    llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
    llvm/test/CodeGen/X86/is_fpclass.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 1406fecca1a2a..f0a5b2ebcb3ae 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -7438,6 +7438,20 @@ LegalizerHelper::lowerISFPCLASS(MachineInstr &MI) {
                                        AsInt, SignBitC));
   }
 
+  if (FPClassTest PartialCheck = Mask & fcSubnormal) {
+    // issubnormal(V) ==> unsigned(abs(V) - 1) u< (all mantissa bits set)
+    // issubnormal(V) && V>0 ==> unsigned(V - 1) u< (all mantissa bits set)
+    auto V = (PartialCheck == fcPosSubnormal) ? AsInt : Abs;
+    auto OneC = MIRBuilder.buildConstant(IntTy, 1);
+    auto VMinusOne = MIRBuilder.buildSub(IntTy, V, OneC);
+    auto SubnormalRes =
+        MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, VMinusOne,
+                             MIRBuilder.buildConstant(IntTy, AllOneMantissa));
+    if (PartialCheck == fcNegSubnormal)
+      SubnormalRes = MIRBuilder.buildAnd(DstTy, SubnormalRes, Sign);
+    appendToRes(SubnormalRes);
+  }
+
   if (FPClassTest PartialCheck = Mask & fcInf) {
     if (PartialCheck == fcPosInf)
       appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
@@ -7474,20 +7488,6 @@ LegalizerHelper::lowerISFPCLASS(MachineInstr &MI) {
     }
   }
 
-  if (FPClassTest PartialCheck = Mask & fcSubnormal) {
-    // issubnormal(V) ==> unsigned(abs(V) - 1) u< (all mantissa bits set)
-    // issubnormal(V) && V>0 ==> unsigned(V - 1) u< (all mantissa bits set)
-    auto V = (PartialCheck == fcPosSubnormal) ? AsInt : Abs;
-    auto OneC = MIRBuilder.buildConstant(IntTy, 1);
-    auto VMinusOne = MIRBuilder.buildSub(IntTy, V, OneC);
-    auto SubnormalRes =
-        MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, VMinusOne,
-                             MIRBuilder.buildConstant(IntTy, AllOneMantissa));
-    if (PartialCheck == fcNegSubnormal)
-      SubnormalRes = MIRBuilder.buildAnd(DstTy, SubnormalRes, Sign);
-    appendToRes(SubnormalRes);
-  }
-
   if (FPClassTest PartialCheck = Mask & fcNormal) {
     // isnormal(V) ==> (0 u< exp u< max_exp) ==> (unsigned(exp-1) u<
     // (max_exp-1))

diff  --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 362330cd0ae20..6d0de75579a60 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -8149,6 +8149,19 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
     appendResult(PartialRes);
   }
 
+  if (unsigned PartialCheck = Test & fcSubnormal) {
+    // issubnormal(V) ==> unsigned(abs(V) - 1) < (all mantissa bits set)
+    // issubnormal(V) && V>0 ==> unsigned(V - 1) < (all mantissa bits set)
+    SDValue V = (PartialCheck == fcPosSubnormal) ? OpAsInt : AbsV;
+    SDValue MantissaV = DAG.getConstant(AllOneMantissa, DL, IntVT);
+    SDValue VMinusOneV =
+        DAG.getNode(ISD::SUB, DL, IntVT, V, DAG.getConstant(1, DL, IntVT));
+    PartialRes = DAG.getSetCC(DL, ResultVT, VMinusOneV, MantissaV, ISD::SETULT);
+    if (PartialCheck == fcNegSubnormal)
+      PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
+    appendResult(PartialRes);
+  }
+
   if (unsigned PartialCheck = Test & fcInf) {
     if (PartialCheck == fcPosInf)
       PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, InfV, ISD::SETEQ);
@@ -8193,19 +8206,6 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
     appendResult(PartialRes);
   }
 
-  if (unsigned PartialCheck = Test & fcSubnormal) {
-    // issubnormal(V) ==> unsigned(abs(V) - 1) < (all mantissa bits set)
-    // issubnormal(V) && V>0 ==> unsigned(V - 1) < (all mantissa bits set)
-    SDValue V = (PartialCheck == fcPosSubnormal) ? OpAsInt : AbsV;
-    SDValue MantissaV = DAG.getConstant(AllOneMantissa, DL, IntVT);
-    SDValue VMinusOneV =
-        DAG.getNode(ISD::SUB, DL, IntVT, V, DAG.getConstant(1, DL, IntVT));
-    PartialRes = DAG.getSetCC(DL, ResultVT, VMinusOneV, MantissaV, ISD::SETULT);
-    if (PartialCheck == fcNegSubnormal)
-      PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
-    appendResult(PartialRes);
-  }
-
   if (unsigned PartialCheck = Test & fcNormal) {
     // isnormal(V) ==> (0 < exp < max_exp) ==> (unsigned(exp-1) < (max_exp-1))
     APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
index 03d9f77acdf2f..35090ec6cef7f 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
@@ -1758,16 +1758,16 @@ define i1 @not_isnormal_f16(half %x) {
 ; GFX7GLISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
 ; GFX7GLISEL-NEXT:    v_bfe_u32 v1, v0, 0, 16
-; GFX7GLISEL-NEXT:    s_movk_i32 s6, 0x7c00
+; GFX7GLISEL-NEXT:    v_subrev_i32_e64 v0, s[4:5], 1, v0
+; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX7GLISEL-NEXT:    v_mov_b32_e32 v2, 0x3ff
 ; GFX7GLISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
-; GFX7GLISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], s6, v1
+; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e64 s[4:5], v0, v2
+; GFX7GLISEL-NEXT:    s_movk_i32 s6, 0x7c00
 ; GFX7GLISEL-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
-; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e32 vcc, s6, v1
+; GFX7GLISEL-NEXT:    v_cmp_eq_u32_e32 vcc, s6, v1
 ; GFX7GLISEL-NEXT:    s_or_b64 s[4:5], s[4:5], vcc
-; GFX7GLISEL-NEXT:    v_subrev_i32_e32 v0, vcc, 1, v0
-; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX7GLISEL-NEXT:    v_mov_b32_e32 v1, 0x3ff
-; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e32 vcc, v0, v1
+; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e32 vcc, s6, v1
 ; GFX7GLISEL-NEXT:    s_or_b64 s[4:5], s[4:5], vcc
 ; GFX7GLISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
 ; GFX7GLISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -1827,19 +1827,19 @@ define i1 @not_is_plus_normal_f16(half %x) {
 ; GFX7GLISEL:       ; %bb.0:
 ; GFX7GLISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v1, 0x7fff, v0
+; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX7GLISEL-NEXT:    v_bfe_u32 v2, v1, 0, 16
-; GFX7GLISEL-NEXT:    s_movk_i32 s8, 0x7c00
-; GFX7GLISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v2
-; GFX7GLISEL-NEXT:    v_cmp_eq_u32_e64 s[6:7], s8, v2
+; GFX7GLISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], v0, v2
+; GFX7GLISEL-NEXT:    v_subrev_i32_e64 v0, s[6:7], 1, v1
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX7GLISEL-NEXT:    v_mov_b32_e32 v3, 0x3ff
+; GFX7GLISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v2
+; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e64 s[6:7], v0, v3
+; GFX7GLISEL-NEXT:    s_movk_i32 s8, 0x7c00
 ; GFX7GLISEL-NEXT:    s_or_b64 s[6:7], vcc, s[6:7]
-; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e32 vcc, s8, v2
-; GFX7GLISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], v0, v2
+; GFX7GLISEL-NEXT:    v_cmp_eq_u32_e32 vcc, s8, v2
 ; GFX7GLISEL-NEXT:    s_or_b64 s[6:7], s[6:7], vcc
-; GFX7GLISEL-NEXT:    v_subrev_i32_e32 v0, vcc, 1, v1
-; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX7GLISEL-NEXT:    v_mov_b32_e32 v2, 0x3ff
-; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e32 vcc, v0, v2
+; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e32 vcc, s8, v2
 ; GFX7GLISEL-NEXT:    s_or_b64 s[6:7], s[6:7], vcc
 ; GFX7GLISEL-NEXT:    v_subrev_i32_e32 v0, vcc, 0x400, v1
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
@@ -1905,19 +1905,19 @@ define i1 @not_is_neg_normal_f16(half %x) {
 ; GFX7GLISEL:       ; %bb.0:
 ; GFX7GLISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v1, 0x7fff, v0
+; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX7GLISEL-NEXT:    v_bfe_u32 v2, v1, 0, 16
-; GFX7GLISEL-NEXT:    s_movk_i32 s8, 0x7c00
-; GFX7GLISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v2
-; GFX7GLISEL-NEXT:    v_cmp_eq_u32_e64 s[6:7], s8, v2
+; GFX7GLISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v0, v2
+; GFX7GLISEL-NEXT:    v_subrev_i32_e64 v0, s[6:7], 1, v1
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX7GLISEL-NEXT:    v_mov_b32_e32 v3, 0x3ff
+; GFX7GLISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v2
+; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e64 s[6:7], v0, v3
+; GFX7GLISEL-NEXT:    s_movk_i32 s8, 0x7c00
 ; GFX7GLISEL-NEXT:    s_or_b64 s[6:7], vcc, s[6:7]
-; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e32 vcc, s8, v2
-; GFX7GLISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v0, v2
+; GFX7GLISEL-NEXT:    v_cmp_eq_u32_e32 vcc, s8, v2
 ; GFX7GLISEL-NEXT:    s_or_b64 s[6:7], s[6:7], vcc
-; GFX7GLISEL-NEXT:    v_subrev_i32_e32 v0, vcc, 1, v1
-; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX7GLISEL-NEXT:    v_mov_b32_e32 v2, 0x3ff
-; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e32 vcc, v0, v2
+; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e32 vcc, s8, v2
 ; GFX7GLISEL-NEXT:    s_or_b64 s[6:7], s[6:7], vcc
 ; GFX7GLISEL-NEXT:    v_subrev_i32_e32 v0, vcc, 0x400, v1
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
@@ -2155,15 +2155,15 @@ define i1 @not_iszero_f16(half %x) {
 ; GFX7GLISEL:       ; %bb.0:
 ; GFX7GLISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
-; GFX7GLISEL-NEXT:    v_bfe_u32 v1, v0, 0, 16
-; GFX7GLISEL-NEXT:    s_movk_i32 s4, 0x7c00
-; GFX7GLISEL-NEXT:    v_cmp_eq_u32_e32 vcc, s4, v1
-; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e64 s[4:5], s4, v1
-; GFX7GLISEL-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
 ; GFX7GLISEL-NEXT:    v_subrev_i32_e32 v1, vcc, 1, v0
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
 ; GFX7GLISEL-NEXT:    v_mov_b32_e32 v2, 0x3ff
 ; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e32 vcc, v1, v2
+; GFX7GLISEL-NEXT:    v_bfe_u32 v1, v0, 0, 16
+; GFX7GLISEL-NEXT:    s_movk_i32 s6, 0x7c00
+; GFX7GLISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], s6, v1
+; GFX7GLISEL-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
+; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e32 vcc, s6, v1
 ; GFX7GLISEL-NEXT:    s_or_b64 s[4:5], s[4:5], vcc
 ; GFX7GLISEL-NEXT:    v_subrev_i32_e32 v0, vcc, 0x400, v0
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
@@ -2659,7 +2659,7 @@ define i1 @not_iszero_or_nan_f16(half %x) {
 ; GFX7SELDAG-NEXT:    v_add_i32_e64 v1, s[4:5], -1, v0
 ; GFX7SELDAG-NEXT:    s_movk_i32 s4, 0x3ff
 ; GFX7SELDAG-NEXT:    v_cmp_gt_u32_e64 s[4:5], s4, v1
-; GFX7SELDAG-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
+; GFX7SELDAG-NEXT:    s_or_b64 s[4:5], s[4:5], vcc
 ; GFX7SELDAG-NEXT:    v_add_i32_e32 v0, vcc, 0xfffffc00, v0
 ; GFX7SELDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX7SELDAG-NEXT:    v_cmp_gt_u32_e32 vcc, s6, v0
@@ -2671,13 +2671,13 @@ define i1 @not_iszero_or_nan_f16(half %x) {
 ; GFX7GLISEL:       ; %bb.0: ; %entry
 ; GFX7GLISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
-; GFX7GLISEL-NEXT:    v_bfe_u32 v1, v0, 0, 16
-; GFX7GLISEL-NEXT:    v_mov_b32_e32 v2, 0x7c00
-; GFX7GLISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v2
-; GFX7GLISEL-NEXT:    v_subrev_i32_e64 v1, s[4:5], 1, v0
+; GFX7GLISEL-NEXT:    v_subrev_i32_e32 v1, vcc, 1, v0
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
 ; GFX7GLISEL-NEXT:    v_mov_b32_e32 v2, 0x3ff
-; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e64 s[4:5], v1, v2
+; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e32 vcc, v1, v2
+; GFX7GLISEL-NEXT:    v_bfe_u32 v1, v0, 0, 16
+; GFX7GLISEL-NEXT:    v_mov_b32_e32 v2, 0x7c00
+; GFX7GLISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v2
 ; GFX7GLISEL-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
 ; GFX7GLISEL-NEXT:    v_subrev_i32_e32 v0, vcc, 0x400, v0
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
@@ -2735,7 +2735,7 @@ define i1 @not_iszero_or_nan_f_daz(half %x) #0 {
 ; GFX7SELDAG-NEXT:    v_add_i32_e64 v1, s[4:5], -1, v0
 ; GFX7SELDAG-NEXT:    s_movk_i32 s4, 0x3ff
 ; GFX7SELDAG-NEXT:    v_cmp_gt_u32_e64 s[4:5], s4, v1
-; GFX7SELDAG-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
+; GFX7SELDAG-NEXT:    s_or_b64 s[4:5], s[4:5], vcc
 ; GFX7SELDAG-NEXT:    v_add_i32_e32 v0, vcc, 0xfffffc00, v0
 ; GFX7SELDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX7SELDAG-NEXT:    v_cmp_gt_u32_e32 vcc, s6, v0
@@ -2747,13 +2747,13 @@ define i1 @not_iszero_or_nan_f_daz(half %x) #0 {
 ; GFX7GLISEL:       ; %bb.0: ; %entry
 ; GFX7GLISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
-; GFX7GLISEL-NEXT:    v_bfe_u32 v1, v0, 0, 16
-; GFX7GLISEL-NEXT:    v_mov_b32_e32 v2, 0x7c00
-; GFX7GLISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v2
-; GFX7GLISEL-NEXT:    v_subrev_i32_e64 v1, s[4:5], 1, v0
+; GFX7GLISEL-NEXT:    v_subrev_i32_e32 v1, vcc, 1, v0
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
 ; GFX7GLISEL-NEXT:    v_mov_b32_e32 v2, 0x3ff
-; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e64 s[4:5], v1, v2
+; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e32 vcc, v1, v2
+; GFX7GLISEL-NEXT:    v_bfe_u32 v1, v0, 0, 16
+; GFX7GLISEL-NEXT:    v_mov_b32_e32 v2, 0x7c00
+; GFX7GLISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v2
 ; GFX7GLISEL-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
 ; GFX7GLISEL-NEXT:    v_subrev_i32_e32 v0, vcc, 0x400, v0
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
@@ -2811,7 +2811,7 @@ define i1 @not_iszero_or_nan_f_maybe_daz(half %x) #1 {
 ; GFX7SELDAG-NEXT:    v_add_i32_e64 v1, s[4:5], -1, v0
 ; GFX7SELDAG-NEXT:    s_movk_i32 s4, 0x3ff
 ; GFX7SELDAG-NEXT:    v_cmp_gt_u32_e64 s[4:5], s4, v1
-; GFX7SELDAG-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
+; GFX7SELDAG-NEXT:    s_or_b64 s[4:5], s[4:5], vcc
 ; GFX7SELDAG-NEXT:    v_add_i32_e32 v0, vcc, 0xfffffc00, v0
 ; GFX7SELDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX7SELDAG-NEXT:    v_cmp_gt_u32_e32 vcc, s6, v0
@@ -2823,13 +2823,13 @@ define i1 @not_iszero_or_nan_f_maybe_daz(half %x) #1 {
 ; GFX7GLISEL:       ; %bb.0: ; %entry
 ; GFX7GLISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
-; GFX7GLISEL-NEXT:    v_bfe_u32 v1, v0, 0, 16
-; GFX7GLISEL-NEXT:    v_mov_b32_e32 v2, 0x7c00
-; GFX7GLISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v2
-; GFX7GLISEL-NEXT:    v_subrev_i32_e64 v1, s[4:5], 1, v0
+; GFX7GLISEL-NEXT:    v_subrev_i32_e32 v1, vcc, 1, v0
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
 ; GFX7GLISEL-NEXT:    v_mov_b32_e32 v2, 0x3ff
-; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e64 s[4:5], v1, v2
+; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e32 vcc, v1, v2
+; GFX7GLISEL-NEXT:    v_bfe_u32 v1, v0, 0, 16
+; GFX7GLISEL-NEXT:    v_mov_b32_e32 v2, 0x7c00
+; GFX7GLISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v2
 ; GFX7GLISEL-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
 ; GFX7GLISEL-NEXT:    v_subrev_i32_e32 v0, vcc, 0x400, v0
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
@@ -3009,18 +3009,18 @@ define i1 @not_iszero_or_qnan_f16(half %x) {
 ; GFX7SELDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7SELDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; GFX7SELDAG-NEXT:    s_movk_i32 s4, 0x7e00
-; GFX7SELDAG-NEXT:    s_movk_i32 s6, 0x7c00
+; GFX7SELDAG-NEXT:    s_movk_i32 s8, 0x7c00
 ; GFX7SELDAG-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
 ; GFX7SELDAG-NEXT:    v_cmp_gt_i32_e32 vcc, s4, v0
-; GFX7SELDAG-NEXT:    v_cmp_lt_i32_e64 s[4:5], s6, v0
-; GFX7SELDAG-NEXT:    s_and_b64 s[4:5], s[4:5], vcc
-; GFX7SELDAG-NEXT:    v_cmp_eq_u32_e32 vcc, s6, v0
-; GFX7SELDAG-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
-; GFX7SELDAG-NEXT:    v_add_i32_e32 v1, vcc, -1, v0
-; GFX7SELDAG-NEXT:    s_movk_i32 s6, 0x3ff
-; GFX7SELDAG-NEXT:    v_cmp_gt_u32_e32 vcc, s6, v1
+; GFX7SELDAG-NEXT:    v_cmp_lt_i32_e64 s[4:5], s8, v0
+; GFX7SELDAG-NEXT:    s_and_b64 s[6:7], s[4:5], vcc
+; GFX7SELDAG-NEXT:    v_add_i32_e64 v1, s[4:5], -1, v0
+; GFX7SELDAG-NEXT:    s_movk_i32 s4, 0x3ff
+; GFX7SELDAG-NEXT:    v_cmp_eq_u32_e32 vcc, s8, v0
+; GFX7SELDAG-NEXT:    v_cmp_gt_u32_e64 s[4:5], s4, v1
 ; GFX7SELDAG-NEXT:    s_or_b64 s[4:5], s[4:5], vcc
 ; GFX7SELDAG-NEXT:    v_add_i32_e32 v0, vcc, 0xfffffc00, v0
+; GFX7SELDAG-NEXT:    s_or_b64 s[4:5], s[4:5], s[6:7]
 ; GFX7SELDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX7SELDAG-NEXT:    s_movk_i32 s6, 0x7800
 ; GFX7SELDAG-NEXT:    v_cmp_gt_u32_e32 vcc, s6, v0
@@ -3032,22 +3032,22 @@ define i1 @not_iszero_or_qnan_f16(half %x) {
 ; GFX7GLISEL:       ; %bb.0: ; %entry
 ; GFX7GLISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
-; GFX7GLISEL-NEXT:    v_bfe_u32 v1, v0, 0, 16
-; GFX7GLISEL-NEXT:    s_movk_i32 s4, 0x7c00
-; GFX7GLISEL-NEXT:    v_mov_b32_e32 v2, 0x7e00
-; GFX7GLISEL-NEXT:    v_cmp_eq_u32_e32 vcc, s4, v1
-; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e64 s[4:5], s4, v1
-; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e64 s[6:7], v1, v2
-; GFX7GLISEL-NEXT:    s_and_b64 s[4:5], s[4:5], s[6:7]
-; GFX7GLISEL-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
 ; GFX7GLISEL-NEXT:    v_subrev_i32_e32 v1, vcc, 1, v0
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
 ; GFX7GLISEL-NEXT:    v_mov_b32_e32 v2, 0x3ff
 ; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e32 vcc, v1, v2
-; GFX7GLISEL-NEXT:    s_or_b64 s[4:5], s[4:5], vcc
+; GFX7GLISEL-NEXT:    v_bfe_u32 v1, v0, 0, 16
+; GFX7GLISEL-NEXT:    s_movk_i32 s8, 0x7c00
+; GFX7GLISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], s8, v1
+; GFX7GLISEL-NEXT:    v_mov_b32_e32 v2, 0x7e00
+; GFX7GLISEL-NEXT:    s_or_b64 s[6:7], vcc, s[4:5]
+; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e32 vcc, s8, v1
+; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e64 s[4:5], v1, v2
+; GFX7GLISEL-NEXT:    s_and_b64 s[4:5], vcc, s[4:5]
 ; GFX7GLISEL-NEXT:    v_subrev_i32_e32 v0, vcc, 0x400, v0
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX7GLISEL-NEXT:    v_mov_b32_e32 v1, 0x7800
+; GFX7GLISEL-NEXT:    s_or_b64 s[4:5], s[6:7], s[4:5]
 ; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e32 vcc, v0, v1
 ; GFX7GLISEL-NEXT:    s_or_b64 s[4:5], s[4:5], vcc
 ; GFX7GLISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
@@ -3094,15 +3094,15 @@ define i1 @not_iszero_or_snan_f16(half %x) {
 ; GFX7SELDAG:       ; %bb.0: ; %entry
 ; GFX7SELDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7SELDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; GFX7SELDAG-NEXT:    s_movk_i32 s4, 0x7dff
-; GFX7SELDAG-NEXT:    s_movk_i32 s5, 0x7c00
-; GFX7SELDAG-NEXT:    s_movk_i32 s6, 0x3ff
+; GFX7SELDAG-NEXT:    s_movk_i32 s4, 0x7c00
+; GFX7SELDAG-NEXT:    s_movk_i32 s6, 0x7dff
 ; GFX7SELDAG-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
-; GFX7SELDAG-NEXT:    v_cmp_lt_i32_e32 vcc, s4, v0
-; GFX7SELDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], s5, v0
+; GFX7SELDAG-NEXT:    v_cmp_eq_u32_e32 vcc, s4, v0
+; GFX7SELDAG-NEXT:    v_add_i32_e64 v1, s[4:5], -1, v0
+; GFX7SELDAG-NEXT:    s_movk_i32 s4, 0x3ff
+; GFX7SELDAG-NEXT:    v_cmp_gt_u32_e64 s[4:5], s4, v1
 ; GFX7SELDAG-NEXT:    s_or_b64 s[4:5], s[4:5], vcc
-; GFX7SELDAG-NEXT:    v_add_i32_e32 v1, vcc, -1, v0
-; GFX7SELDAG-NEXT:    v_cmp_gt_u32_e32 vcc, s6, v1
+; GFX7SELDAG-NEXT:    v_cmp_lt_i32_e32 vcc, s6, v0
 ; GFX7SELDAG-NEXT:    s_or_b64 s[4:5], s[4:5], vcc
 ; GFX7SELDAG-NEXT:    v_add_i32_e32 v0, vcc, 0xfffffc00, v0
 ; GFX7SELDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
@@ -3116,16 +3116,16 @@ define i1 @not_iszero_or_snan_f16(half %x) {
 ; GFX7GLISEL:       ; %bb.0: ; %entry
 ; GFX7GLISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
-; GFX7GLISEL-NEXT:    v_bfe_u32 v1, v0, 0, 16
-; GFX7GLISEL-NEXT:    v_mov_b32_e32 v2, 0x7c00
-; GFX7GLISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v2
-; GFX7GLISEL-NEXT:    v_mov_b32_e32 v2, 0x7e00
-; GFX7GLISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v2
-; GFX7GLISEL-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
 ; GFX7GLISEL-NEXT:    v_subrev_i32_e32 v1, vcc, 1, v0
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
 ; GFX7GLISEL-NEXT:    v_mov_b32_e32 v2, 0x3ff
 ; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e32 vcc, v1, v2
+; GFX7GLISEL-NEXT:    v_bfe_u32 v1, v0, 0, 16
+; GFX7GLISEL-NEXT:    v_mov_b32_e32 v2, 0x7c00
+; GFX7GLISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v2
+; GFX7GLISEL-NEXT:    v_mov_b32_e32 v2, 0x7e00
+; GFX7GLISEL-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
+; GFX7GLISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v2
 ; GFX7GLISEL-NEXT:    s_or_b64 s[4:5], s[4:5], vcc
 ; GFX7GLISEL-NEXT:    v_subrev_i32_e32 v0, vcc, 0x400, v0
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0

diff  --git a/llvm/test/CodeGen/X86/is_fpclass.ll b/llvm/test/CodeGen/X86/is_fpclass.ll
index 18a5b2737bbe8..a12efe7aa8a25 100644
--- a/llvm/test/CodeGen/X86/is_fpclass.ll
+++ b/llvm/test/CodeGen/X86/is_fpclass.ll
@@ -1922,6 +1922,9 @@ entry:
 define i1 @not_iszero_or_qnan_f(float %x) {
 ; CHECK-32-LABEL: not_iszero_or_qnan_f:
 ; CHECK-32:       # %bb.0: # %entry
+; CHECK-32-NEXT:    pushl %esi
+; CHECK-32-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-32-NEXT:    .cfi_offset %esi, -8
 ; CHECK-32-NEXT:    movl $2147483647, %eax # imm = 0x7FFFFFFF
 ; CHECK-32-NEXT:    andl {{[0-9]+}}(%esp), %eax
 ; CHECK-32-NEXT:    cmpl $2143289344, %eax # imm = 0x7FC00000
@@ -1931,15 +1934,17 @@ define i1 @not_iszero_or_qnan_f(float %x) {
 ; CHECK-32-NEXT:    andb %cl, %dl
 ; CHECK-32-NEXT:    cmpl $2139095040, %eax # imm = 0x7F800000
 ; CHECK-32-NEXT:    sete %cl
-; CHECK-32-NEXT:    orb %dl, %cl
-; CHECK-32-NEXT:    leal -1(%eax), %edx
-; CHECK-32-NEXT:    cmpl $8388607, %edx # imm = 0x7FFFFF
-; CHECK-32-NEXT:    setb %dl
+; CHECK-32-NEXT:    leal -1(%eax), %esi
+; CHECK-32-NEXT:    cmpl $8388607, %esi # imm = 0x7FFFFF
+; CHECK-32-NEXT:    setb %ch
+; CHECK-32-NEXT:    orb %cl, %ch
 ; CHECK-32-NEXT:    addl $-8388608, %eax # imm = 0xFF800000
 ; CHECK-32-NEXT:    cmpl $2130706432, %eax # imm = 0x7F000000
 ; CHECK-32-NEXT:    setb %al
 ; CHECK-32-NEXT:    orb %dl, %al
-; CHECK-32-NEXT:    orb %cl, %al
+; CHECK-32-NEXT:    orb %ch, %al
+; CHECK-32-NEXT:    popl %esi
+; CHECK-32-NEXT:    .cfi_def_cfa_offset 4
 ; CHECK-32-NEXT:    retl
 ;
 ; CHECK-64-LABEL: not_iszero_or_qnan_f:
@@ -1953,15 +1958,15 @@ define i1 @not_iszero_or_qnan_f(float %x) {
 ; CHECK-64-NEXT:    andb %cl, %dl
 ; CHECK-64-NEXT:    cmpl $2139095040, %eax # imm = 0x7F800000
 ; CHECK-64-NEXT:    sete %cl
-; CHECK-64-NEXT:    orb %dl, %cl
-; CHECK-64-NEXT:    leal -1(%rax), %edx
-; CHECK-64-NEXT:    cmpl $8388607, %edx # imm = 0x7FFFFF
-; CHECK-64-NEXT:    setb %dl
+; CHECK-64-NEXT:    leal -1(%rax), %esi
+; CHECK-64-NEXT:    cmpl $8388607, %esi # imm = 0x7FFFFF
+; CHECK-64-NEXT:    setb %sil
+; CHECK-64-NEXT:    orb %cl, %sil
 ; CHECK-64-NEXT:    addl $-8388608, %eax # imm = 0xFF800000
 ; CHECK-64-NEXT:    cmpl $2130706432, %eax # imm = 0x7F000000
 ; CHECK-64-NEXT:    setb %al
 ; CHECK-64-NEXT:    orb %dl, %al
-; CHECK-64-NEXT:    orb %cl, %al
+; CHECK-64-NEXT:    orb %sil, %al
 ; CHECK-64-NEXT:    retq
 entry:
   %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 925)  ; ~(0x60|0x2) = "~(zero|qnan)"
@@ -1973,38 +1978,38 @@ define i1 @not_iszero_or_snan_f(float %x) {
 ; CHECK-32:       # %bb.0: # %entry
 ; CHECK-32-NEXT:    movl $2147483647, %eax # imm = 0x7FFFFFFF
 ; CHECK-32-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; CHECK-32-NEXT:    cmpl $2143289344, %eax # imm = 0x7FC00000
-; CHECK-32-NEXT:    setge %cl
 ; CHECK-32-NEXT:    cmpl $2139095040, %eax # imm = 0x7F800000
-; CHECK-32-NEXT:    sete %dl
-; CHECK-32-NEXT:    orb %cl, %dl
-; CHECK-32-NEXT:    leal -1(%eax), %ecx
-; CHECK-32-NEXT:    cmpl $8388607, %ecx # imm = 0x7FFFFF
-; CHECK-32-NEXT:    setb %cl
-; CHECK-32-NEXT:    orb %dl, %cl
+; CHECK-32-NEXT:    sete %cl
+; CHECK-32-NEXT:    leal -1(%eax), %edx
+; CHECK-32-NEXT:    cmpl $8388607, %edx # imm = 0x7FFFFF
+; CHECK-32-NEXT:    setb %dl
+; CHECK-32-NEXT:    cmpl $2143289344, %eax # imm = 0x7FC00000
+; CHECK-32-NEXT:    setge %ch
+; CHECK-32-NEXT:    orb %cl, %ch
+; CHECK-32-NEXT:    orb %dl, %ch
 ; CHECK-32-NEXT:    addl $-8388608, %eax # imm = 0xFF800000
 ; CHECK-32-NEXT:    cmpl $2130706432, %eax # imm = 0x7F000000
 ; CHECK-32-NEXT:    setb %al
-; CHECK-32-NEXT:    orb %cl, %al
+; CHECK-32-NEXT:    orb %ch, %al
 ; CHECK-32-NEXT:    retl
 ;
 ; CHECK-64-LABEL: not_iszero_or_snan_f:
 ; CHECK-64:       # %bb.0: # %entry
 ; CHECK-64-NEXT:    movd %xmm0, %eax
 ; CHECK-64-NEXT:    andl $2147483647, %eax # imm = 0x7FFFFFFF
-; CHECK-64-NEXT:    cmpl $2143289344, %eax # imm = 0x7FC00000
-; CHECK-64-NEXT:    setge %cl
 ; CHECK-64-NEXT:    cmpl $2139095040, %eax # imm = 0x7F800000
-; CHECK-64-NEXT:    sete %dl
-; CHECK-64-NEXT:    orb %cl, %dl
-; CHECK-64-NEXT:    leal -1(%rax), %ecx
-; CHECK-64-NEXT:    cmpl $8388607, %ecx # imm = 0x7FFFFF
-; CHECK-64-NEXT:    setb %cl
-; CHECK-64-NEXT:    orb %dl, %cl
+; CHECK-64-NEXT:    sete %cl
+; CHECK-64-NEXT:    leal -1(%rax), %edx
+; CHECK-64-NEXT:    cmpl $8388607, %edx # imm = 0x7FFFFF
+; CHECK-64-NEXT:    setb %dl
+; CHECK-64-NEXT:    cmpl $2143289344, %eax # imm = 0x7FC00000
+; CHECK-64-NEXT:    setge %sil
+; CHECK-64-NEXT:    orb %cl, %sil
+; CHECK-64-NEXT:    orb %dl, %sil
 ; CHECK-64-NEXT:    addl $-8388608, %eax # imm = 0xFF800000
 ; CHECK-64-NEXT:    cmpl $2130706432, %eax # imm = 0x7F000000
 ; CHECK-64-NEXT:    setb %al
-; CHECK-64-NEXT:    orb %cl, %al
+; CHECK-64-NEXT:    orb %sil, %al
 ; CHECK-64-NEXT:    retq
 entry:
   %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 926)  ; ~(0x60|0x1) = "~(zero|snan)"


        


More information about the llvm-commits mailing list