[llvm] 61820f8 - CodeGen: Optimize lowering of is.fpclass fcZero|fcSubnormal
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 6 10:04:02 PDT 2023
Author: Matt Arsenault
Date: 2023-07-06T13:03:57-04:00
New Revision: 61820f8b5dac779d136a342d22f5e66f99c7b648
URL: https://github.com/llvm/llvm-project/commit/61820f8b5dac779d136a342d22f5e66f99c7b648
DIFF: https://github.com/llvm/llvm-project/commit/61820f8b5dac779d136a342d22f5e66f99c7b648.diff
LOG: CodeGen: Optimize lowering of is.fpclass fcZero|fcSubnormal
Combine the two checks into a check if the exponent bits are 0. The
inverted case isn't reachable until a future change, and GlobalISel
currently doesn't attempt the inversion optimization.
https://reviews.llvm.org/D143182
Added:
Modified:
llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
llvm/test/CodeGen/X86/is_fpclass.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index b4963384702726..266fa37e969d40 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -7274,6 +7274,9 @@ LegalizerHelper::lowerISFPCLASS(MachineInstr &MI) {
return Legalized;
}
+ // TODO: Try inverting the test with getInvertedFPClassTest like the DAG
+ // version
+
unsigned BitSize = SrcTy.getScalarSizeInBits();
const fltSemantics &Semantics = getFltSemanticForLLT(SrcTy.getScalarType());
@@ -7329,6 +7332,18 @@ LegalizerHelper::lowerISFPCLASS(MachineInstr &MI) {
Mask &= ~fcNegFinite;
}
+ if (FPClassTest PartialCheck = Mask & (fcZero | fcSubnormal)) {
+ // fcZero | fcSubnormal => test all exponent bits are 0
+ // TODO: Handle sign bit specific cases
+ // TODO: Handle inverted case
+ if (PartialCheck == (fcZero | fcSubnormal)) {
+ auto ExpBits = MIRBuilder.buildAnd(IntTy, AsInt, ExpMaskC);
+ appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
+ ExpBits, ZeroC));
+ Mask &= ~PartialCheck;
+ }
+ }
+
// Check for individual classes.
if (FPClassTest PartialCheck = Mask & fcZero) {
if (PartialCheck == fcPosZero)
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index b09f2508022129..ef723fd4079499 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -8108,12 +8108,8 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
// exceptions are ignored.
if (Flags.hasNoFPExcept() &&
isOperationLegalOrCustom(ISD::SETCC, OperandVT.getScalarType())) {
- // Even if the condition isn't legal, we're probably better off expanding it
- // if it's the combined 0 || denormal compare.
-
if (isFCmpEqualZero(Test, Semantics, DAG.getMachineFunction()) &&
- (Test != fcZero ||
- isCondCodeLegalOrCustom(IsInverted ? ISD::SETUNE : ISD::SETOEQ,
+ (isCondCodeLegalOrCustom(IsInverted ? ISD::SETUNE : ISD::SETOEQ,
OperandVT.getScalarType().getSimpleVT()))) {
// If denormals could be implicitly treated as 0, this is not equivalent
// to a compare with 0 since it will also be true for denormals.
@@ -8207,6 +8203,20 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
}
appendResult(PartialRes);
+ if (FPClassTest PartialCheck = Test & (fcZero | fcSubnormal)) {
+ // fcZero | fcSubnormal => test all exponent bits are 0
+ // TODO: Handle sign bit specific cases
+ if (PartialCheck == (fcZero | fcSubnormal)) {
+ assert(!IsInverted && "should handle inverted case");
+
+ SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ExpMaskV);
+ SDValue ExpIsZero =
+ DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ);
+ appendResult(ExpIsZero);
+ Test &= ~PartialCheck & fcAllFlags;
+ }
+ }
+
// Check for individual classes.
if (unsigned PartialCheck = Test & fcZero) {
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
index 0c9f073c9384ce..4a08b083ff12a9 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
@@ -1504,27 +1504,18 @@ define i1 @issubnormal_or_zero_f16(half %x) {
; GFX7SELDAG: ; %bb.0: ; %entry
; GFX7SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7SELDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7SELDAG-NEXT: s_movk_i32 s6, 0x3ff
-; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0x7fff, v0
+; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0x7c00, v0
; GFX7SELDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX7SELDAG-NEXT: v_add_i32_e64 v0, s[4:5], -1, v0
-; GFX7SELDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], s6, v0
-; GFX7SELDAG-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
-; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; GFX7SELDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX7GLISEL-LABEL: issubnormal_or_zero_f16:
; GFX7GLISEL: ; %bb.0: ; %entry
; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0
-; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0xffff, v0
-; GFX7GLISEL-NEXT: v_subrev_i32_e64 v0, s[4:5], 1, v0
-; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7c00, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x3ff
-; GFX7GLISEL-NEXT: v_cmp_lt_u32_e64 s[4:5], v0, v1
-; GFX7GLISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
-; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX8CHECK-LABEL: issubnormal_or_zero_f16:
@@ -1701,18 +1692,15 @@ define i1 @not_isnormal_f16(half %x) {
; GFX7GLISEL-LABEL: not_isnormal_f16:
; GFX7GLISEL: ; %bb.0:
; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0
-; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0xffff, v0
-; GFX7GLISEL-NEXT: v_subrev_i32_e64 v0, s[4:5], 1, v0
+; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0x7fff, v0
+; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7c00, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x3ff
-; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; GFX7GLISEL-NEXT: v_cmp_lt_u32_e64 s[4:5], v0, v2
+; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v1
; GFX7GLISEL-NEXT: s_movk_i32 s6, 0x7c00
+; GFX7GLISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], s6, v0
; GFX7GLISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
-; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, s6, v1
-; GFX7GLISEL-NEXT: s_or_b64 s[4:5], s[4:5], vcc
-; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, s6, v1
+; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, s6, v0
; GFX7GLISEL-NEXT: s_or_b64 s[4:5], s[4:5], vcc
; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31]
@@ -1770,23 +1758,20 @@ define i1 @not_is_plus_normal_f16(half %x) {
; GFX7GLISEL: ; %bb.0:
; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0x7fff, v0
+; GFX7GLISEL-NEXT: v_and_b32_e32 v2, 0xffff, v0
+; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7c00, v0
+; GFX7GLISEL-NEXT: v_and_b32_e32 v3, 0xffff, v1
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX7GLISEL-NEXT: v_and_b32_e32 v2, 0xffff, v1
-; GFX7GLISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], v0, v2
-; GFX7GLISEL-NEXT: v_subrev_i32_e64 v0, s[6:7], 1, v1
-; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX7GLISEL-NEXT: v_mov_b32_e32 v3, 0x3ff
-; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
-; GFX7GLISEL-NEXT: v_cmp_lt_u32_e64 s[6:7], v0, v3
; GFX7GLISEL-NEXT: s_movk_i32 s8, 0x7c00
+; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GFX7GLISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], s8, v3
; GFX7GLISEL-NEXT: s_or_b64 s[6:7], vcc, s[6:7]
-; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, s8, v2
-; GFX7GLISEL-NEXT: s_or_b64 s[6:7], s[6:7], vcc
-; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, s8, v2
+; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, s8, v3
; GFX7GLISEL-NEXT: s_or_b64 s[6:7], s[6:7], vcc
; GFX7GLISEL-NEXT: v_subrev_i32_e32 v0, vcc, 0x400, v1
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7800
+; GFX7GLISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], v2, v3
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1
; GFX7GLISEL-NEXT: s_and_b64 s[4:5], vcc, s[4:5]
; GFX7GLISEL-NEXT: s_or_b64 s[4:5], s[6:7], s[4:5]
@@ -1846,23 +1831,20 @@ define i1 @not_is_neg_normal_f16(half %x) {
; GFX7GLISEL: ; %bb.0:
; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0x7fff, v0
+; GFX7GLISEL-NEXT: v_and_b32_e32 v2, 0xffff, v0
+; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7c00, v0
+; GFX7GLISEL-NEXT: v_and_b32_e32 v3, 0xffff, v1
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX7GLISEL-NEXT: v_and_b32_e32 v2, 0xffff, v1
-; GFX7GLISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v0, v2
-; GFX7GLISEL-NEXT: v_subrev_i32_e64 v0, s[6:7], 1, v1
-; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX7GLISEL-NEXT: v_mov_b32_e32 v3, 0x3ff
-; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
-; GFX7GLISEL-NEXT: v_cmp_lt_u32_e64 s[6:7], v0, v3
; GFX7GLISEL-NEXT: s_movk_i32 s8, 0x7c00
+; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GFX7GLISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], s8, v3
; GFX7GLISEL-NEXT: s_or_b64 s[6:7], vcc, s[6:7]
-; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, s8, v2
-; GFX7GLISEL-NEXT: s_or_b64 s[6:7], s[6:7], vcc
-; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, s8, v2
+; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, s8, v3
; GFX7GLISEL-NEXT: s_or_b64 s[6:7], s[6:7], vcc
; GFX7GLISEL-NEXT: v_subrev_i32_e32 v0, vcc, 0x400, v1
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7800
+; GFX7GLISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v2, v3
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1
; GFX7GLISEL-NEXT: s_and_b64 s[4:5], vcc, s[4:5]
; GFX7GLISEL-NEXT: s_or_b64 s[4:5], s[6:7], s[4:5]
diff --git a/llvm/test/CodeGen/X86/is_fpclass.ll b/llvm/test/CodeGen/X86/is_fpclass.ll
index 76d0997ca6485a..0e80a4dc7aeba7 100644
--- a/llvm/test/CodeGen/X86/is_fpclass.ll
+++ b/llvm/test/CodeGen/X86/is_fpclass.ll
@@ -792,24 +792,15 @@ entry:
define i1 @issubnormal_or_zero_f(float %x) {
; CHECK-32-LABEL: issubnormal_or_zero_f:
; CHECK-32: # %bb.0: # %entry
-; CHECK-32-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
-; CHECK-32-NEXT: andl {{[0-9]+}}(%esp), %eax
-; CHECK-32-NEXT: sete %cl
-; CHECK-32-NEXT: decl %eax
-; CHECK-32-NEXT: cmpl $8388607, %eax # imm = 0x7FFFFF
-; CHECK-32-NEXT: setb %al
-; CHECK-32-NEXT: orb %cl, %al
+; CHECK-32-NEXT: testl $2139095040, {{[0-9]+}}(%esp) # imm = 0x7F800000
+; CHECK-32-NEXT: sete %al
; CHECK-32-NEXT: retl
;
; CHECK-64-LABEL: issubnormal_or_zero_f:
; CHECK-64: # %bb.0: # %entry
; CHECK-64-NEXT: movd %xmm0, %eax
-; CHECK-64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
-; CHECK-64-NEXT: sete %cl
-; CHECK-64-NEXT: decl %eax
-; CHECK-64-NEXT: cmpl $8388607, %eax # imm = 0x7FFFFF
-; CHECK-64-NEXT: setb %al
-; CHECK-64-NEXT: orb %cl, %al
+; CHECK-64-NEXT: testl $2139095040, %eax # imm = 0x7F800000
+; CHECK-64-NEXT: sete %al
; CHECK-64-NEXT: retq
entry:
%0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 240) ; 0xf0 = "subnormal|zero"
@@ -819,24 +810,15 @@ entry:
define i1 @issubnormal_or_zero_f_daz(float %x) #0 {
; CHECK-32-LABEL: issubnormal_or_zero_f_daz:
; CHECK-32: # %bb.0: # %entry
-; CHECK-32-NEXT: flds {{[0-9]+}}(%esp)
-; CHECK-32-NEXT: fldz
-; CHECK-32-NEXT: fucompp
-; CHECK-32-NEXT: fnstsw %ax
-; CHECK-32-NEXT: # kill: def $ah killed $ah killed $ax
-; CHECK-32-NEXT: sahf
-; CHECK-32-NEXT: setnp %cl
+; CHECK-32-NEXT: testl $2139095040, {{[0-9]+}}(%esp) # imm = 0x7F800000
; CHECK-32-NEXT: sete %al
-; CHECK-32-NEXT: andb %cl, %al
; CHECK-32-NEXT: retl
;
; CHECK-64-LABEL: issubnormal_or_zero_f_daz:
; CHECK-64: # %bb.0: # %entry
-; CHECK-64-NEXT: xorps %xmm1, %xmm1
-; CHECK-64-NEXT: cmpeqss %xmm0, %xmm1
-; CHECK-64-NEXT: movd %xmm1, %eax
-; CHECK-64-NEXT: andl $1, %eax
-; CHECK-64-NEXT: # kill: def $al killed $al killed $eax
+; CHECK-64-NEXT: movd %xmm0, %eax
+; CHECK-64-NEXT: testl $2139095040, %eax # imm = 0x7F800000
+; CHECK-64-NEXT: sete %al
; CHECK-64-NEXT: retq
entry:
%0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 240) ; 0xf0 = "subnormal|zero"
@@ -846,24 +828,15 @@ entry:
define i1 @issubnormal_or_zero_f_maybe_daz(float %x) #1 {
; CHECK-32-LABEL: issubnormal_or_zero_f_maybe_daz:
; CHECK-32: # %bb.0: # %entry
-; CHECK-32-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
-; CHECK-32-NEXT: andl {{[0-9]+}}(%esp), %eax
-; CHECK-32-NEXT: sete %cl
-; CHECK-32-NEXT: decl %eax
-; CHECK-32-NEXT: cmpl $8388607, %eax # imm = 0x7FFFFF
-; CHECK-32-NEXT: setb %al
-; CHECK-32-NEXT: orb %cl, %al
+; CHECK-32-NEXT: testl $2139095040, {{[0-9]+}}(%esp) # imm = 0x7F800000
+; CHECK-32-NEXT: sete %al
; CHECK-32-NEXT: retl
;
; CHECK-64-LABEL: issubnormal_or_zero_f_maybe_daz:
; CHECK-64: # %bb.0: # %entry
; CHECK-64-NEXT: movd %xmm0, %eax
-; CHECK-64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
-; CHECK-64-NEXT: sete %cl
-; CHECK-64-NEXT: decl %eax
-; CHECK-64-NEXT: cmpl $8388607, %eax # imm = 0x7FFFFF
-; CHECK-64-NEXT: setb %al
-; CHECK-64-NEXT: orb %cl, %al
+; CHECK-64-NEXT: testl $2139095040, %eax # imm = 0x7F800000
+; CHECK-64-NEXT: sete %al
; CHECK-64-NEXT: retq
entry:
%0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 240) ; 0xf0 = "subnormal|zero"
@@ -2475,30 +2448,24 @@ define i1 @issubnormal_or_nan_f(float %x) {
define i1 @issubnormal_or_zero_or_nan_f(float %x) {
; CHECK-32-LABEL: issubnormal_or_zero_or_nan_f:
; CHECK-32: # %bb.0:
-; CHECK-32-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
-; CHECK-32-NEXT: andl {{[0-9]+}}(%esp), %eax
+; CHECK-32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-32-NEXT: testl $2139095040, %eax # imm = 0x7F800000
; CHECK-32-NEXT: sete %cl
-; CHECK-32-NEXT: leal -1(%eax), %edx
-; CHECK-32-NEXT: cmpl $8388607, %edx # imm = 0x7FFFFF
-; CHECK-32-NEXT: setb %dl
+; CHECK-32-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
; CHECK-32-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001
; CHECK-32-NEXT: setge %al
; CHECK-32-NEXT: orb %cl, %al
-; CHECK-32-NEXT: orb %dl, %al
; CHECK-32-NEXT: retl
;
; CHECK-64-LABEL: issubnormal_or_zero_or_nan_f:
; CHECK-64: # %bb.0:
; CHECK-64-NEXT: movd %xmm0, %eax
-; CHECK-64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
+; CHECK-64-NEXT: testl $2139095040, %eax # imm = 0x7F800000
; CHECK-64-NEXT: sete %cl
-; CHECK-64-NEXT: leal -1(%rax), %edx
-; CHECK-64-NEXT: cmpl $8388607, %edx # imm = 0x7FFFFF
-; CHECK-64-NEXT: setb %dl
+; CHECK-64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
; CHECK-64-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001
; CHECK-64-NEXT: setge %al
; CHECK-64-NEXT: orb %cl, %al
-; CHECK-64-NEXT: orb %dl, %al
; CHECK-64-NEXT: retq
%class = tail call i1 @llvm.is.fpclass.f32(float %x, i32 243) ; 0xf0|0x3 = "subnormal|zero|nan"
ret i1 %class
@@ -2507,30 +2474,24 @@ define i1 @issubnormal_or_zero_or_nan_f(float %x) {
define i1 @issubnormal_or_zero_or_nan_f_daz(float %x) #0 {
; CHECK-32-LABEL: issubnormal_or_zero_or_nan_f_daz:
; CHECK-32: # %bb.0:
-; CHECK-32-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
-; CHECK-32-NEXT: andl {{[0-9]+}}(%esp), %eax
+; CHECK-32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-32-NEXT: testl $2139095040, %eax # imm = 0x7F800000
; CHECK-32-NEXT: sete %cl
-; CHECK-32-NEXT: leal -1(%eax), %edx
-; CHECK-32-NEXT: cmpl $8388607, %edx # imm = 0x7FFFFF
-; CHECK-32-NEXT: setb %dl
+; CHECK-32-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
; CHECK-32-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001
; CHECK-32-NEXT: setge %al
; CHECK-32-NEXT: orb %cl, %al
-; CHECK-32-NEXT: orb %dl, %al
; CHECK-32-NEXT: retl
;
; CHECK-64-LABEL: issubnormal_or_zero_or_nan_f_daz:
; CHECK-64: # %bb.0:
; CHECK-64-NEXT: movd %xmm0, %eax
-; CHECK-64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
+; CHECK-64-NEXT: testl $2139095040, %eax # imm = 0x7F800000
; CHECK-64-NEXT: sete %cl
-; CHECK-64-NEXT: leal -1(%rax), %edx
-; CHECK-64-NEXT: cmpl $8388607, %edx # imm = 0x7FFFFF
-; CHECK-64-NEXT: setb %dl
+; CHECK-64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
; CHECK-64-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001
; CHECK-64-NEXT: setge %al
; CHECK-64-NEXT: orb %cl, %al
-; CHECK-64-NEXT: orb %dl, %al
; CHECK-64-NEXT: retq
%class = tail call i1 @llvm.is.fpclass.f32(float %x, i32 243) ; 0xf0|0x3 = "subnormal|zero|nan"
ret i1 %class
@@ -2539,38 +2500,32 @@ define i1 @issubnormal_or_zero_or_nan_f_daz(float %x) #0 {
define i1 @issubnormal_or_zero_or_snan_f(float %x) {
; CHECK-32-LABEL: issubnormal_or_zero_or_snan_f:
; CHECK-32: # %bb.0:
-; CHECK-32-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
-; CHECK-32-NEXT: andl {{[0-9]+}}(%esp), %eax
-; CHECK-32-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000
-; CHECK-32-NEXT: setl %cl
-; CHECK-32-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001
-; CHECK-32-NEXT: setge %dl
-; CHECK-32-NEXT: andb %cl, %dl
-; CHECK-32-NEXT: testl %eax, %eax
-; CHECK-32-NEXT: sete %cl
-; CHECK-32-NEXT: decl %eax
-; CHECK-32-NEXT: cmpl $8388607, %eax # imm = 0x7FFFFF
-; CHECK-32-NEXT: setb %al
+; CHECK-32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-32-NEXT: movl %eax, %ecx
+; CHECK-32-NEXT: andl $2147483647, %ecx # imm = 0x7FFFFFFF
+; CHECK-32-NEXT: cmpl $2143289344, %ecx # imm = 0x7FC00000
+; CHECK-32-NEXT: setl %dl
+; CHECK-32-NEXT: cmpl $2139095041, %ecx # imm = 0x7F800001
+; CHECK-32-NEXT: setge %cl
+; CHECK-32-NEXT: andb %dl, %cl
+; CHECK-32-NEXT: testl $2139095040, %eax # imm = 0x7F800000
+; CHECK-32-NEXT: sete %al
; CHECK-32-NEXT: orb %cl, %al
-; CHECK-32-NEXT: orb %dl, %al
; CHECK-32-NEXT: retl
;
; CHECK-64-LABEL: issubnormal_or_zero_or_snan_f:
; CHECK-64: # %bb.0:
; CHECK-64-NEXT: movd %xmm0, %eax
-; CHECK-64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
-; CHECK-64-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000
-; CHECK-64-NEXT: setl %cl
-; CHECK-64-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001
-; CHECK-64-NEXT: setge %dl
-; CHECK-64-NEXT: andb %cl, %dl
-; CHECK-64-NEXT: testl %eax, %eax
-; CHECK-64-NEXT: sete %cl
-; CHECK-64-NEXT: decl %eax
-; CHECK-64-NEXT: cmpl $8388607, %eax # imm = 0x7FFFFF
-; CHECK-64-NEXT: setb %al
+; CHECK-64-NEXT: movl %eax, %ecx
+; CHECK-64-NEXT: andl $2147483647, %ecx # imm = 0x7FFFFFFF
+; CHECK-64-NEXT: cmpl $2143289344, %ecx # imm = 0x7FC00000
+; CHECK-64-NEXT: setl %dl
+; CHECK-64-NEXT: cmpl $2139095041, %ecx # imm = 0x7F800001
+; CHECK-64-NEXT: setge %cl
+; CHECK-64-NEXT: andb %dl, %cl
+; CHECK-64-NEXT: testl $2139095040, %eax # imm = 0x7F800000
+; CHECK-64-NEXT: sete %al
; CHECK-64-NEXT: orb %cl, %al
-; CHECK-64-NEXT: orb %dl, %al
; CHECK-64-NEXT: retq
%class = tail call i1 @llvm.is.fpclass.f32(float %x, i32 241) ; 0x90|0x1 = "subnormal|snan"
ret i1 %class
@@ -2579,30 +2534,24 @@ define i1 @issubnormal_or_zero_or_snan_f(float %x) {
define i1 @issubnormal_or_zero_or_qnan_f(float %x) {
; CHECK-32-LABEL: issubnormal_or_zero_or_qnan_f:
; CHECK-32: # %bb.0:
-; CHECK-32-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
-; CHECK-32-NEXT: andl {{[0-9]+}}(%esp), %eax
+; CHECK-32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-32-NEXT: testl $2139095040, %eax # imm = 0x7F800000
; CHECK-32-NEXT: sete %cl
-; CHECK-32-NEXT: leal -1(%eax), %edx
-; CHECK-32-NEXT: cmpl $8388607, %edx # imm = 0x7FFFFF
-; CHECK-32-NEXT: setb %dl
+; CHECK-32-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
; CHECK-32-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000
; CHECK-32-NEXT: setge %al
; CHECK-32-NEXT: orb %cl, %al
-; CHECK-32-NEXT: orb %dl, %al
; CHECK-32-NEXT: retl
;
; CHECK-64-LABEL: issubnormal_or_zero_or_qnan_f:
; CHECK-64: # %bb.0:
; CHECK-64-NEXT: movd %xmm0, %eax
-; CHECK-64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
+; CHECK-64-NEXT: testl $2139095040, %eax # imm = 0x7F800000
; CHECK-64-NEXT: sete %cl
-; CHECK-64-NEXT: leal -1(%rax), %edx
-; CHECK-64-NEXT: cmpl $8388607, %edx # imm = 0x7FFFFF
-; CHECK-64-NEXT: setb %dl
+; CHECK-64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
; CHECK-64-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000
; CHECK-64-NEXT: setge %al
; CHECK-64-NEXT: orb %cl, %al
-; CHECK-64-NEXT: orb %dl, %al
; CHECK-64-NEXT: retq
%class = tail call i1 @llvm.is.fpclass.f32(float %x, i32 242) ; 0x90|0x2 = "subnormal|qnan"
ret i1 %class
More information about the llvm-commits
mailing list