[llvm] 61820f8 - CodeGen: Optimize lowering of is.fpclass fcZero|fcSubnormal

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Thu Jul 6 10:04:02 PDT 2023


Author: Matt Arsenault
Date: 2023-07-06T13:03:57-04:00
New Revision: 61820f8b5dac779d136a342d22f5e66f99c7b648

URL: https://github.com/llvm/llvm-project/commit/61820f8b5dac779d136a342d22f5e66f99c7b648
DIFF: https://github.com/llvm/llvm-project/commit/61820f8b5dac779d136a342d22f5e66f99c7b648.diff

LOG: CodeGen: Optimize lowering of is.fpclass fcZero|fcSubnormal

Combine the two checks into a check if the exponent bits are 0. The
inverted case isn't reachable until a future change, and GlobalISel
currently doesn't attempt the inversion optimization.

https://reviews.llvm.org/D143182

Added: 
    

Modified: 
    llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
    llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
    llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
    llvm/test/CodeGen/X86/is_fpclass.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index b4963384702726..266fa37e969d40 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -7274,6 +7274,9 @@ LegalizerHelper::lowerISFPCLASS(MachineInstr &MI) {
     return Legalized;
   }
 
+  // TODO: Try inverting the test with getInvertedFPClassTest like the DAG
+  // version
+
   unsigned BitSize = SrcTy.getScalarSizeInBits();
   const fltSemantics &Semantics = getFltSemanticForLLT(SrcTy.getScalarType());
 
@@ -7329,6 +7332,18 @@ LegalizerHelper::lowerISFPCLASS(MachineInstr &MI) {
     Mask &= ~fcNegFinite;
   }
 
+  if (FPClassTest PartialCheck = Mask & (fcZero | fcSubnormal)) {
+    // fcZero | fcSubnormal => test all exponent bits are 0
+    // TODO: Handle sign bit specific cases
+    // TODO: Handle inverted case
+    if (PartialCheck == (fcZero | fcSubnormal)) {
+      auto ExpBits = MIRBuilder.buildAnd(IntTy, AsInt, ExpMaskC);
+      appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
+                                       ExpBits, ZeroC));
+      Mask &= ~PartialCheck;
+    }
+  }
+
   // Check for individual classes.
   if (FPClassTest PartialCheck = Mask & fcZero) {
     if (PartialCheck == fcPosZero)

diff  --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index b09f2508022129..ef723fd4079499 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -8108,12 +8108,8 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
   // exceptions are ignored.
   if (Flags.hasNoFPExcept() &&
       isOperationLegalOrCustom(ISD::SETCC, OperandVT.getScalarType())) {
-    // Even if the condition isn't legal, we're probably better off expanding it
-    // if it's the combined 0 || denormal compare.
-
     if (isFCmpEqualZero(Test, Semantics, DAG.getMachineFunction()) &&
-        (Test != fcZero ||
-         isCondCodeLegalOrCustom(IsInverted ? ISD::SETUNE : ISD::SETOEQ,
+        (isCondCodeLegalOrCustom(IsInverted ? ISD::SETUNE : ISD::SETOEQ,
                                  OperandVT.getScalarType().getSimpleVT()))) {
       // If denormals could be implicitly treated as 0, this is not equivalent
       // to a compare with 0 since it will also be true for denormals.
@@ -8207,6 +8203,20 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
   }
   appendResult(PartialRes);
 
+  if (FPClassTest PartialCheck = Test & (fcZero | fcSubnormal)) {
+    // fcZero | fcSubnormal => test all exponent bits are 0
+    // TODO: Handle sign bit specific cases
+    if (PartialCheck == (fcZero | fcSubnormal)) {
+      assert(!IsInverted && "should handle inverted case");
+
+      SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ExpMaskV);
+      SDValue ExpIsZero =
+          DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ);
+      appendResult(ExpIsZero);
+      Test &= ~PartialCheck & fcAllFlags;
+    }
+  }
+
   // Check for individual classes.
 
   if (unsigned PartialCheck = Test & fcZero) {

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
index 0c9f073c9384ce..4a08b083ff12a9 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
@@ -1504,27 +1504,18 @@ define i1 @issubnormal_or_zero_f16(half %x) {
 ; GFX7SELDAG:       ; %bb.0: ; %entry
 ; GFX7SELDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7SELDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; GFX7SELDAG-NEXT:    s_movk_i32 s6, 0x3ff
-; GFX7SELDAG-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
+; GFX7SELDAG-NEXT:    v_and_b32_e32 v0, 0x7c00, v0
 ; GFX7SELDAG-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX7SELDAG-NEXT:    v_add_i32_e64 v0, s[4:5], -1, v0
-; GFX7SELDAG-NEXT:    v_cmp_gt_u32_e64 s[4:5], s6, v0
-; GFX7SELDAG-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
-; GFX7SELDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; GFX7SELDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
 ; GFX7SELDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX7GLISEL-LABEL: issubnormal_or_zero_f16:
 ; GFX7GLISEL:       ; %bb.0: ; %entry
 ; GFX7GLISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
-; GFX7GLISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v0
-; GFX7GLISEL-NEXT:    v_subrev_i32_e64 v0, s[4:5], 1, v0
-; GFX7GLISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
+; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0x7c00, v0
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX7GLISEL-NEXT:    v_mov_b32_e32 v1, 0x3ff
-; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e64 s[4:5], v0, v1
-; GFX7GLISEL-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
-; GFX7GLISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; GFX7GLISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
+; GFX7GLISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
 ; GFX7GLISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8CHECK-LABEL: issubnormal_or_zero_f16:
@@ -1701,18 +1692,15 @@ define i1 @not_isnormal_f16(half %x) {
 ; GFX7GLISEL-LABEL: not_isnormal_f16:
 ; GFX7GLISEL:       ; %bb.0:
 ; GFX7GLISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
-; GFX7GLISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v0
-; GFX7GLISEL-NEXT:    v_subrev_i32_e64 v0, s[4:5], 1, v0
+; GFX7GLISEL-NEXT:    v_and_b32_e32 v1, 0x7fff, v0
+; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0x7c00, v0
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX7GLISEL-NEXT:    v_mov_b32_e32 v2, 0x3ff
-; GFX7GLISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
-; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e64 s[4:5], v0, v2
+; GFX7GLISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
+; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v1
 ; GFX7GLISEL-NEXT:    s_movk_i32 s6, 0x7c00
+; GFX7GLISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], s6, v0
 ; GFX7GLISEL-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
-; GFX7GLISEL-NEXT:    v_cmp_eq_u32_e32 vcc, s6, v1
-; GFX7GLISEL-NEXT:    s_or_b64 s[4:5], s[4:5], vcc
-; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e32 vcc, s6, v1
+; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e32 vcc, s6, v0
 ; GFX7GLISEL-NEXT:    s_or_b64 s[4:5], s[4:5], vcc
 ; GFX7GLISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
 ; GFX7GLISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -1770,23 +1758,20 @@ define i1 @not_is_plus_normal_f16(half %x) {
 ; GFX7GLISEL:       ; %bb.0:
 ; GFX7GLISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v1, 0x7fff, v0
+; GFX7GLISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v0
+; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0x7c00, v0
+; GFX7GLISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v1
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX7GLISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v1
-; GFX7GLISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], v0, v2
-; GFX7GLISEL-NEXT:    v_subrev_i32_e64 v0, s[6:7], 1, v1
-; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX7GLISEL-NEXT:    v_mov_b32_e32 v3, 0x3ff
-; GFX7GLISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v2
-; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e64 s[6:7], v0, v3
 ; GFX7GLISEL-NEXT:    s_movk_i32 s8, 0x7c00
+; GFX7GLISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
+; GFX7GLISEL-NEXT:    v_cmp_eq_u32_e64 s[6:7], s8, v3
 ; GFX7GLISEL-NEXT:    s_or_b64 s[6:7], vcc, s[6:7]
-; GFX7GLISEL-NEXT:    v_cmp_eq_u32_e32 vcc, s8, v2
-; GFX7GLISEL-NEXT:    s_or_b64 s[6:7], s[6:7], vcc
-; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e32 vcc, s8, v2
+; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e32 vcc, s8, v3
 ; GFX7GLISEL-NEXT:    s_or_b64 s[6:7], s[6:7], vcc
 ; GFX7GLISEL-NEXT:    v_subrev_i32_e32 v0, vcc, 0x400, v1
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX7GLISEL-NEXT:    v_mov_b32_e32 v1, 0x7800
+; GFX7GLISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], v2, v3
 ; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e32 vcc, v0, v1
 ; GFX7GLISEL-NEXT:    s_and_b64 s[4:5], vcc, s[4:5]
 ; GFX7GLISEL-NEXT:    s_or_b64 s[4:5], s[6:7], s[4:5]
@@ -1846,23 +1831,20 @@ define i1 @not_is_neg_normal_f16(half %x) {
 ; GFX7GLISEL:       ; %bb.0:
 ; GFX7GLISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v1, 0x7fff, v0
+; GFX7GLISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v0
+; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0x7c00, v0
+; GFX7GLISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v1
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX7GLISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v1
-; GFX7GLISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v0, v2
-; GFX7GLISEL-NEXT:    v_subrev_i32_e64 v0, s[6:7], 1, v1
-; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX7GLISEL-NEXT:    v_mov_b32_e32 v3, 0x3ff
-; GFX7GLISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v2
-; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e64 s[6:7], v0, v3
 ; GFX7GLISEL-NEXT:    s_movk_i32 s8, 0x7c00
+; GFX7GLISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
+; GFX7GLISEL-NEXT:    v_cmp_eq_u32_e64 s[6:7], s8, v3
 ; GFX7GLISEL-NEXT:    s_or_b64 s[6:7], vcc, s[6:7]
-; GFX7GLISEL-NEXT:    v_cmp_eq_u32_e32 vcc, s8, v2
-; GFX7GLISEL-NEXT:    s_or_b64 s[6:7], s[6:7], vcc
-; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e32 vcc, s8, v2
+; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e32 vcc, s8, v3
 ; GFX7GLISEL-NEXT:    s_or_b64 s[6:7], s[6:7], vcc
 ; GFX7GLISEL-NEXT:    v_subrev_i32_e32 v0, vcc, 0x400, v1
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX7GLISEL-NEXT:    v_mov_b32_e32 v1, 0x7800
+; GFX7GLISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v2, v3
 ; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e32 vcc, v0, v1
 ; GFX7GLISEL-NEXT:    s_and_b64 s[4:5], vcc, s[4:5]
 ; GFX7GLISEL-NEXT:    s_or_b64 s[4:5], s[6:7], s[4:5]

diff  --git a/llvm/test/CodeGen/X86/is_fpclass.ll b/llvm/test/CodeGen/X86/is_fpclass.ll
index 76d0997ca6485a..0e80a4dc7aeba7 100644
--- a/llvm/test/CodeGen/X86/is_fpclass.ll
+++ b/llvm/test/CodeGen/X86/is_fpclass.ll
@@ -792,24 +792,15 @@ entry:
 define i1 @issubnormal_or_zero_f(float %x) {
 ; CHECK-32-LABEL: issubnormal_or_zero_f:
 ; CHECK-32:       # %bb.0: # %entry
-; CHECK-32-NEXT:    movl $2147483647, %eax # imm = 0x7FFFFFFF
-; CHECK-32-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; CHECK-32-NEXT:    sete %cl
-; CHECK-32-NEXT:    decl %eax
-; CHECK-32-NEXT:    cmpl $8388607, %eax # imm = 0x7FFFFF
-; CHECK-32-NEXT:    setb %al
-; CHECK-32-NEXT:    orb %cl, %al
+; CHECK-32-NEXT:    testl $2139095040, {{[0-9]+}}(%esp) # imm = 0x7F800000
+; CHECK-32-NEXT:    sete %al
 ; CHECK-32-NEXT:    retl
 ;
 ; CHECK-64-LABEL: issubnormal_or_zero_f:
 ; CHECK-64:       # %bb.0: # %entry
 ; CHECK-64-NEXT:    movd %xmm0, %eax
-; CHECK-64-NEXT:    andl $2147483647, %eax # imm = 0x7FFFFFFF
-; CHECK-64-NEXT:    sete %cl
-; CHECK-64-NEXT:    decl %eax
-; CHECK-64-NEXT:    cmpl $8388607, %eax # imm = 0x7FFFFF
-; CHECK-64-NEXT:    setb %al
-; CHECK-64-NEXT:    orb %cl, %al
+; CHECK-64-NEXT:    testl $2139095040, %eax # imm = 0x7F800000
+; CHECK-64-NEXT:    sete %al
 ; CHECK-64-NEXT:    retq
 entry:
   %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 240)  ; 0xf0 = "subnormal|zero"
@@ -819,24 +810,15 @@ entry:
 define i1 @issubnormal_or_zero_f_daz(float %x) #0 {
 ; CHECK-32-LABEL: issubnormal_or_zero_f_daz:
 ; CHECK-32:       # %bb.0: # %entry
-; CHECK-32-NEXT:    flds {{[0-9]+}}(%esp)
-; CHECK-32-NEXT:    fldz
-; CHECK-32-NEXT:    fucompp
-; CHECK-32-NEXT:    fnstsw %ax
-; CHECK-32-NEXT:    # kill: def $ah killed $ah killed $ax
-; CHECK-32-NEXT:    sahf
-; CHECK-32-NEXT:    setnp %cl
+; CHECK-32-NEXT:    testl $2139095040, {{[0-9]+}}(%esp) # imm = 0x7F800000
 ; CHECK-32-NEXT:    sete %al
-; CHECK-32-NEXT:    andb %cl, %al
 ; CHECK-32-NEXT:    retl
 ;
 ; CHECK-64-LABEL: issubnormal_or_zero_f_daz:
 ; CHECK-64:       # %bb.0: # %entry
-; CHECK-64-NEXT:    xorps %xmm1, %xmm1
-; CHECK-64-NEXT:    cmpeqss %xmm0, %xmm1
-; CHECK-64-NEXT:    movd %xmm1, %eax
-; CHECK-64-NEXT:    andl $1, %eax
-; CHECK-64-NEXT:    # kill: def $al killed $al killed $eax
+; CHECK-64-NEXT:    movd %xmm0, %eax
+; CHECK-64-NEXT:    testl $2139095040, %eax # imm = 0x7F800000
+; CHECK-64-NEXT:    sete %al
 ; CHECK-64-NEXT:    retq
 entry:
   %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 240)  ; 0xf0 = "subnormal|zero"
@@ -846,24 +828,15 @@ entry:
 define i1 @issubnormal_or_zero_f_maybe_daz(float %x) #1 {
 ; CHECK-32-LABEL: issubnormal_or_zero_f_maybe_daz:
 ; CHECK-32:       # %bb.0: # %entry
-; CHECK-32-NEXT:    movl $2147483647, %eax # imm = 0x7FFFFFFF
-; CHECK-32-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; CHECK-32-NEXT:    sete %cl
-; CHECK-32-NEXT:    decl %eax
-; CHECK-32-NEXT:    cmpl $8388607, %eax # imm = 0x7FFFFF
-; CHECK-32-NEXT:    setb %al
-; CHECK-32-NEXT:    orb %cl, %al
+; CHECK-32-NEXT:    testl $2139095040, {{[0-9]+}}(%esp) # imm = 0x7F800000
+; CHECK-32-NEXT:    sete %al
 ; CHECK-32-NEXT:    retl
 ;
 ; CHECK-64-LABEL: issubnormal_or_zero_f_maybe_daz:
 ; CHECK-64:       # %bb.0: # %entry
 ; CHECK-64-NEXT:    movd %xmm0, %eax
-; CHECK-64-NEXT:    andl $2147483647, %eax # imm = 0x7FFFFFFF
-; CHECK-64-NEXT:    sete %cl
-; CHECK-64-NEXT:    decl %eax
-; CHECK-64-NEXT:    cmpl $8388607, %eax # imm = 0x7FFFFF
-; CHECK-64-NEXT:    setb %al
-; CHECK-64-NEXT:    orb %cl, %al
+; CHECK-64-NEXT:    testl $2139095040, %eax # imm = 0x7F800000
+; CHECK-64-NEXT:    sete %al
 ; CHECK-64-NEXT:    retq
 entry:
   %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 240)  ; 0xf0 = "subnormal|zero"
@@ -2475,30 +2448,24 @@ define i1 @issubnormal_or_nan_f(float %x) {
 define i1 @issubnormal_or_zero_or_nan_f(float %x) {
 ; CHECK-32-LABEL: issubnormal_or_zero_or_nan_f:
 ; CHECK-32:       # %bb.0:
-; CHECK-32-NEXT:    movl $2147483647, %eax # imm = 0x7FFFFFFF
-; CHECK-32-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; CHECK-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-32-NEXT:    testl $2139095040, %eax # imm = 0x7F800000
 ; CHECK-32-NEXT:    sete %cl
-; CHECK-32-NEXT:    leal -1(%eax), %edx
-; CHECK-32-NEXT:    cmpl $8388607, %edx # imm = 0x7FFFFF
-; CHECK-32-NEXT:    setb %dl
+; CHECK-32-NEXT:    andl $2147483647, %eax # imm = 0x7FFFFFFF
 ; CHECK-32-NEXT:    cmpl $2139095041, %eax # imm = 0x7F800001
 ; CHECK-32-NEXT:    setge %al
 ; CHECK-32-NEXT:    orb %cl, %al
-; CHECK-32-NEXT:    orb %dl, %al
 ; CHECK-32-NEXT:    retl
 ;
 ; CHECK-64-LABEL: issubnormal_or_zero_or_nan_f:
 ; CHECK-64:       # %bb.0:
 ; CHECK-64-NEXT:    movd %xmm0, %eax
-; CHECK-64-NEXT:    andl $2147483647, %eax # imm = 0x7FFFFFFF
+; CHECK-64-NEXT:    testl $2139095040, %eax # imm = 0x7F800000
 ; CHECK-64-NEXT:    sete %cl
-; CHECK-64-NEXT:    leal -1(%rax), %edx
-; CHECK-64-NEXT:    cmpl $8388607, %edx # imm = 0x7FFFFF
-; CHECK-64-NEXT:    setb %dl
+; CHECK-64-NEXT:    andl $2147483647, %eax # imm = 0x7FFFFFFF
 ; CHECK-64-NEXT:    cmpl $2139095041, %eax # imm = 0x7F800001
 ; CHECK-64-NEXT:    setge %al
 ; CHECK-64-NEXT:    orb %cl, %al
-; CHECK-64-NEXT:    orb %dl, %al
 ; CHECK-64-NEXT:    retq
   %class = tail call i1 @llvm.is.fpclass.f32(float %x, i32 243)  ; 0xf0|0x3 = "subnormal|zero|nan"
   ret i1 %class
@@ -2507,30 +2474,24 @@ define i1 @issubnormal_or_zero_or_nan_f(float %x) {
 define i1 @issubnormal_or_zero_or_nan_f_daz(float %x) #0 {
 ; CHECK-32-LABEL: issubnormal_or_zero_or_nan_f_daz:
 ; CHECK-32:       # %bb.0:
-; CHECK-32-NEXT:    movl $2147483647, %eax # imm = 0x7FFFFFFF
-; CHECK-32-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; CHECK-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-32-NEXT:    testl $2139095040, %eax # imm = 0x7F800000
 ; CHECK-32-NEXT:    sete %cl
-; CHECK-32-NEXT:    leal -1(%eax), %edx
-; CHECK-32-NEXT:    cmpl $8388607, %edx # imm = 0x7FFFFF
-; CHECK-32-NEXT:    setb %dl
+; CHECK-32-NEXT:    andl $2147483647, %eax # imm = 0x7FFFFFFF
 ; CHECK-32-NEXT:    cmpl $2139095041, %eax # imm = 0x7F800001
 ; CHECK-32-NEXT:    setge %al
 ; CHECK-32-NEXT:    orb %cl, %al
-; CHECK-32-NEXT:    orb %dl, %al
 ; CHECK-32-NEXT:    retl
 ;
 ; CHECK-64-LABEL: issubnormal_or_zero_or_nan_f_daz:
 ; CHECK-64:       # %bb.0:
 ; CHECK-64-NEXT:    movd %xmm0, %eax
-; CHECK-64-NEXT:    andl $2147483647, %eax # imm = 0x7FFFFFFF
+; CHECK-64-NEXT:    testl $2139095040, %eax # imm = 0x7F800000
 ; CHECK-64-NEXT:    sete %cl
-; CHECK-64-NEXT:    leal -1(%rax), %edx
-; CHECK-64-NEXT:    cmpl $8388607, %edx # imm = 0x7FFFFF
-; CHECK-64-NEXT:    setb %dl
+; CHECK-64-NEXT:    andl $2147483647, %eax # imm = 0x7FFFFFFF
 ; CHECK-64-NEXT:    cmpl $2139095041, %eax # imm = 0x7F800001
 ; CHECK-64-NEXT:    setge %al
 ; CHECK-64-NEXT:    orb %cl, %al
-; CHECK-64-NEXT:    orb %dl, %al
 ; CHECK-64-NEXT:    retq
   %class = tail call i1 @llvm.is.fpclass.f32(float %x, i32 243)  ; 0xf0|0x3 = "subnormal|zero|nan"
   ret i1 %class
@@ -2539,38 +2500,32 @@ define i1 @issubnormal_or_zero_or_nan_f_daz(float %x) #0 {
 define i1 @issubnormal_or_zero_or_snan_f(float %x) {
 ; CHECK-32-LABEL: issubnormal_or_zero_or_snan_f:
 ; CHECK-32:       # %bb.0:
-; CHECK-32-NEXT:    movl $2147483647, %eax # imm = 0x7FFFFFFF
-; CHECK-32-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; CHECK-32-NEXT:    cmpl $2143289344, %eax # imm = 0x7FC00000
-; CHECK-32-NEXT:    setl %cl
-; CHECK-32-NEXT:    cmpl $2139095041, %eax # imm = 0x7F800001
-; CHECK-32-NEXT:    setge %dl
-; CHECK-32-NEXT:    andb %cl, %dl
-; CHECK-32-NEXT:    testl %eax, %eax
-; CHECK-32-NEXT:    sete %cl
-; CHECK-32-NEXT:    decl %eax
-; CHECK-32-NEXT:    cmpl $8388607, %eax # imm = 0x7FFFFF
-; CHECK-32-NEXT:    setb %al
+; CHECK-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-32-NEXT:    movl %eax, %ecx
+; CHECK-32-NEXT:    andl $2147483647, %ecx # imm = 0x7FFFFFFF
+; CHECK-32-NEXT:    cmpl $2143289344, %ecx # imm = 0x7FC00000
+; CHECK-32-NEXT:    setl %dl
+; CHECK-32-NEXT:    cmpl $2139095041, %ecx # imm = 0x7F800001
+; CHECK-32-NEXT:    setge %cl
+; CHECK-32-NEXT:    andb %dl, %cl
+; CHECK-32-NEXT:    testl $2139095040, %eax # imm = 0x7F800000
+; CHECK-32-NEXT:    sete %al
 ; CHECK-32-NEXT:    orb %cl, %al
-; CHECK-32-NEXT:    orb %dl, %al
 ; CHECK-32-NEXT:    retl
 ;
 ; CHECK-64-LABEL: issubnormal_or_zero_or_snan_f:
 ; CHECK-64:       # %bb.0:
 ; CHECK-64-NEXT:    movd %xmm0, %eax
-; CHECK-64-NEXT:    andl $2147483647, %eax # imm = 0x7FFFFFFF
-; CHECK-64-NEXT:    cmpl $2143289344, %eax # imm = 0x7FC00000
-; CHECK-64-NEXT:    setl %cl
-; CHECK-64-NEXT:    cmpl $2139095041, %eax # imm = 0x7F800001
-; CHECK-64-NEXT:    setge %dl
-; CHECK-64-NEXT:    andb %cl, %dl
-; CHECK-64-NEXT:    testl %eax, %eax
-; CHECK-64-NEXT:    sete %cl
-; CHECK-64-NEXT:    decl %eax
-; CHECK-64-NEXT:    cmpl $8388607, %eax # imm = 0x7FFFFF
-; CHECK-64-NEXT:    setb %al
+; CHECK-64-NEXT:    movl %eax, %ecx
+; CHECK-64-NEXT:    andl $2147483647, %ecx # imm = 0x7FFFFFFF
+; CHECK-64-NEXT:    cmpl $2143289344, %ecx # imm = 0x7FC00000
+; CHECK-64-NEXT:    setl %dl
+; CHECK-64-NEXT:    cmpl $2139095041, %ecx # imm = 0x7F800001
+; CHECK-64-NEXT:    setge %cl
+; CHECK-64-NEXT:    andb %dl, %cl
+; CHECK-64-NEXT:    testl $2139095040, %eax # imm = 0x7F800000
+; CHECK-64-NEXT:    sete %al
 ; CHECK-64-NEXT:    orb %cl, %al
-; CHECK-64-NEXT:    orb %dl, %al
 ; CHECK-64-NEXT:    retq
   %class = tail call i1 @llvm.is.fpclass.f32(float %x, i32 241)  ; 0x90|0x1 = "subnormal|snan"
   ret i1 %class
@@ -2579,30 +2534,24 @@ define i1 @issubnormal_or_zero_or_snan_f(float %x) {
 define i1 @issubnormal_or_zero_or_qnan_f(float %x) {
 ; CHECK-32-LABEL: issubnormal_or_zero_or_qnan_f:
 ; CHECK-32:       # %bb.0:
-; CHECK-32-NEXT:    movl $2147483647, %eax # imm = 0x7FFFFFFF
-; CHECK-32-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; CHECK-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-32-NEXT:    testl $2139095040, %eax # imm = 0x7F800000
 ; CHECK-32-NEXT:    sete %cl
-; CHECK-32-NEXT:    leal -1(%eax), %edx
-; CHECK-32-NEXT:    cmpl $8388607, %edx # imm = 0x7FFFFF
-; CHECK-32-NEXT:    setb %dl
+; CHECK-32-NEXT:    andl $2147483647, %eax # imm = 0x7FFFFFFF
 ; CHECK-32-NEXT:    cmpl $2143289344, %eax # imm = 0x7FC00000
 ; CHECK-32-NEXT:    setge %al
 ; CHECK-32-NEXT:    orb %cl, %al
-; CHECK-32-NEXT:    orb %dl, %al
 ; CHECK-32-NEXT:    retl
 ;
 ; CHECK-64-LABEL: issubnormal_or_zero_or_qnan_f:
 ; CHECK-64:       # %bb.0:
 ; CHECK-64-NEXT:    movd %xmm0, %eax
-; CHECK-64-NEXT:    andl $2147483647, %eax # imm = 0x7FFFFFFF
+; CHECK-64-NEXT:    testl $2139095040, %eax # imm = 0x7F800000
 ; CHECK-64-NEXT:    sete %cl
-; CHECK-64-NEXT:    leal -1(%rax), %edx
-; CHECK-64-NEXT:    cmpl $8388607, %edx # imm = 0x7FFFFF
-; CHECK-64-NEXT:    setb %dl
+; CHECK-64-NEXT:    andl $2147483647, %eax # imm = 0x7FFFFFFF
 ; CHECK-64-NEXT:    cmpl $2143289344, %eax # imm = 0x7FC00000
 ; CHECK-64-NEXT:    setge %al
 ; CHECK-64-NEXT:    orb %cl, %al
-; CHECK-64-NEXT:    orb %dl, %al
 ; CHECK-64-NEXT:    retq
   %class = tail call i1 @llvm.is.fpclass.f32(float %x, i32 242)  ; 0x90|0x2 = "subnormal|qnan"
   ret i1 %class


        


More information about the llvm-commits mailing list