[llvm] DAG: Handle lowering unordered compare with inf (PR #100378)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 26 11:47:41 PDT 2024
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/100378
>From 1246ce40eebc1ba1c39525f686ec15e784d28232 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Tue, 7 Feb 2023 12:22:05 -0400
Subject: [PATCH] DAG: Handle lowering unordered compare with inf
Try to take advantage of the nan check behavior of fcmp.
x86_64 looks better, x86_32 looks worse.
---
llvm/include/llvm/CodeGen/CodeGenCommonISel.h | 7 +-
llvm/lib/CodeGen/CodeGenCommonISel.cpp | 11 ++-
.../CodeGen/SelectionDAG/TargetLowering.cpp | 53 ++++++-----
llvm/test/CodeGen/AMDGPU/fp-classify.ll | 2 +-
.../CodeGen/AMDGPU/llvm.is.fpclass.bf16.ll | 20 ++--
.../CodeGen/AMDGPU/llvm.is.fpclass.f16.ll | 4 +-
llvm/test/CodeGen/PowerPC/fp-classify.ll | 22 +++--
llvm/test/CodeGen/PowerPC/is_fpclass.ll | 10 +-
llvm/test/CodeGen/X86/is_fpclass-fp80.ll | 4 +-
llvm/test/CodeGen/X86/is_fpclass.ll | 92 +++++++++----------
10 files changed, 126 insertions(+), 99 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/CodeGenCommonISel.h b/llvm/include/llvm/CodeGen/CodeGenCommonISel.h
index 90ef890f22d1b..e4b2e20babc07 100644
--- a/llvm/include/llvm/CodeGen/CodeGenCommonISel.h
+++ b/llvm/include/llvm/CodeGen/CodeGenCommonISel.h
@@ -218,10 +218,15 @@ findSplitPointForStackProtector(MachineBasicBlock *BB,
/// Evaluates if the specified FP class test is better performed as the inverse
/// (i.e. fewer instructions should be required to lower it). An example is the
/// test "inf|normal|subnormal|zero", which is an inversion of "nan".
+///
/// \param Test The test as specified in 'is_fpclass' intrinsic invocation.
+///
+/// \param UseFCmp The intention is to perform the comparison using
+/// floating-point compare instructions which check for nan.
+///
/// \returns The inverted test, or fcNone, if inversion does not produce a
/// simpler test.
-FPClassTest invertFPClassTestIfSimpler(FPClassTest Test);
+FPClassTest invertFPClassTestIfSimpler(FPClassTest Test, bool UseFCmp);
/// Assuming the instruction \p MI is going to be deleted, attempt to salvage
/// debug users of \p MI by writing the effect of \p MI in a DIExpression.
diff --git a/llvm/lib/CodeGen/CodeGenCommonISel.cpp b/llvm/lib/CodeGen/CodeGenCommonISel.cpp
index fe144d3c18203..88c643c568027 100644
--- a/llvm/lib/CodeGen/CodeGenCommonISel.cpp
+++ b/llvm/lib/CodeGen/CodeGenCommonISel.cpp
@@ -173,8 +173,9 @@ llvm::findSplitPointForStackProtector(MachineBasicBlock *BB,
return SplitPoint;
}
-FPClassTest llvm::invertFPClassTestIfSimpler(FPClassTest Test) {
+FPClassTest llvm::invertFPClassTestIfSimpler(FPClassTest Test, bool UseFCmp) {
FPClassTest InvertedTest = ~Test;
+
// Pick the direction with fewer tests
// TODO: Handle more combinations of cases that can be handled together
switch (static_cast<unsigned>(InvertedTest)) {
@@ -200,6 +201,14 @@ FPClassTest llvm::invertFPClassTestIfSimpler(FPClassTest Test) {
case fcSubnormal | fcZero:
case fcSubnormal | fcZero | fcNan:
return InvertedTest;
+ case fcInf | fcNan:
+ // If we're trying to use fcmp, we can take advantage of the nan check
+ // behavior of the compare (but this is more instructions in the integer
+ // expansion).
+ return UseFCmp ? InvertedTest : fcNone;
+ case fcFinite | fcNan:
+ // Inversion of fcInf, which can be done in a combined check.
+ return fcInf;
default:
return fcNone;
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 6fd23b5ab9f5f..1e12d7937ba79 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -8566,7 +8566,7 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
// Degenerated cases.
if (Test == fcNone)
return DAG.getBoolConstant(false, DL, ResultVT, OperandVT);
- if ((Test & fcAllFlags) == fcAllFlags)
+ if (Test == fcAllFlags)
return DAG.getBoolConstant(true, DL, ResultVT, OperandVT);
// PPC double double is a pair of doubles, of which the higher part determines
@@ -8577,14 +8577,6 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
OperandVT = MVT::f64;
}
- // Some checks may be represented as inversion of simpler check, for example
- // "inf|normal|subnormal|zero" => !"nan".
- bool IsInverted = false;
- if (FPClassTest InvertedCheck = invertFPClassTestIfSimpler(Test)) {
- IsInverted = true;
- Test = InvertedCheck;
- }
-
// Floating-point type properties.
EVT ScalarFloatVT = OperandVT.getScalarType();
const Type *FloatTy = ScalarFloatVT.getTypeForEVT(*DAG.getContext());
@@ -8596,9 +8588,16 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
if (Flags.hasNoFPExcept() &&
isOperationLegalOrCustom(ISD::SETCC, OperandVT.getScalarType())) {
FPClassTest FPTestMask = Test;
+ bool IsInvertedFP = false;
+
+ if (FPClassTest InvertedFPCheck =
+ invertFPClassTestIfSimpler(FPTestMask, true)) {
+ FPTestMask = InvertedFPCheck;
+ IsInvertedFP = true;
+ }
- ISD::CondCode OrderedCmpOpcode = IsInverted ? ISD::SETUNE : ISD::SETOEQ;
- ISD::CondCode UnorderedCmpOpcode = IsInverted ? ISD::SETONE : ISD::SETUEQ;
+ ISD::CondCode OrderedCmpOpcode = IsInvertedFP ? ISD::SETUNE : ISD::SETOEQ;
+ ISD::CondCode UnorderedCmpOpcode = IsInvertedFP ? ISD::SETONE : ISD::SETUEQ;
// See if we can fold an | fcNan into an unordered compare.
FPClassTest OrderedFPTestMask = FPTestMask & ~fcNan;
@@ -8611,7 +8610,7 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
const bool IsOrdered = FPTestMask == OrderedFPTestMask;
if (std::optional<bool> IsCmp0 =
- isFCmpEqualZero(Test, Semantics, DAG.getMachineFunction());
+ isFCmpEqualZero(FPTestMask, Semantics, DAG.getMachineFunction());
IsCmp0 && (isCondCodeLegalOrCustom(
*IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode,
OperandVT.getScalarType().getSimpleVT()))) {
@@ -8623,15 +8622,16 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
*IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode);
}
- if (Test == fcNan &&
- isCondCodeLegalOrCustom(IsInverted ? ISD::SETO : ISD::SETUO,
- OperandVT.getScalarType().getSimpleVT())) {
+ if (FPTestMask == fcNan &&
+ isCondCodeLegalOrCustom(IsInvertedFP ? ISD::SETO : ISD::SETUO,
+ OperandVT.getScalarType().getSimpleVT()))
return DAG.getSetCC(DL, ResultVT, Op, Op,
- IsInverted ? ISD::SETO : ISD::SETUO);
- }
+ IsInvertedFP ? ISD::SETO : ISD::SETUO);
- if (Test == fcInf &&
- isCondCodeLegalOrCustom(IsInverted ? ISD::SETUNE : ISD::SETOEQ,
+ bool IsOrderedInf = FPTestMask == fcInf;
+ if ((FPTestMask == fcInf || FPTestMask == (fcInf | fcNan)) &&
+ isCondCodeLegalOrCustom(IsOrderedInf ? OrderedCmpOpcode
+ : UnorderedCmpOpcode,
OperandVT.getScalarType().getSimpleVT()) &&
isOperationLegalOrCustom(ISD::FABS, OperandVT.getScalarType())) {
// isinf(x) --> fabs(x) == inf
@@ -8639,15 +8639,15 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
SDValue Inf =
DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT);
return DAG.getSetCC(DL, ResultVT, Abs, Inf,
- IsInverted ? ISD::SETUNE : ISD::SETOEQ);
+ IsOrderedInf ? OrderedCmpOpcode : UnorderedCmpOpcode);
}
if (OrderedFPTestMask == (fcSubnormal | fcZero) && !IsOrdered) {
// TODO: Could handle ordered case, but it produces worse code for
// x86. Maybe handle ordered if fabs is free?
- ISD::CondCode OrderedOp = IsInverted ? ISD::SETUGE : ISD::SETOLT;
- ISD::CondCode UnorderedOp = IsInverted ? ISD::SETOGE : ISD::SETULT;
+ ISD::CondCode OrderedOp = IsInvertedFP ? ISD::SETUGE : ISD::SETOLT;
+ ISD::CondCode UnorderedOp = IsInvertedFP ? ISD::SETOGE : ISD::SETULT;
if (isCondCodeLegalOrCustom(IsOrdered ? OrderedOp : UnorderedOp,
OperandVT.getScalarType().getSimpleVT())) {
@@ -8664,6 +8664,15 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
}
}
+ // Some checks may be represented as inversion of simpler check, for example
+ // "inf|normal|subnormal|zero" => !"nan".
+ bool IsInverted = false;
+
+ if (FPClassTest InvertedCheck = invertFPClassTestIfSimpler(Test, false)) {
+ Test = InvertedCheck;
+ IsInverted = true;
+ }
+
// In the general case use integer operations.
unsigned BitSize = OperandVT.getScalarSizeInBits();
EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), BitSize);
diff --git a/llvm/test/CodeGen/AMDGPU/fp-classify.ll b/llvm/test/CodeGen/AMDGPU/fp-classify.ll
index fb731cc00d3f0..1439ed56055f8 100644
--- a/llvm/test/CodeGen/AMDGPU/fp-classify.ll
+++ b/llvm/test/CodeGen/AMDGPU/fp-classify.ll
@@ -624,7 +624,7 @@ define amdgpu_kernel void @test_isinf_pattern_f16(ptr addrspace(1) nocapture %ou
; SI-NEXT: s_mov_b32 s2, -1
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_and_b32 s4, s4, 0x7fff
-; SI-NEXT: s_cmpk_eq_i32 s4, 0x7c00
+; SI-NEXT: s_cmpk_lg_i32 s4, 0x7c00
; SI-NEXT: s_cselect_b64 s[4:5], -1, 0
; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.bf16.ll b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.bf16.ll
index 2e8049e9765e1..86c0f8d1c1e2f 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.bf16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.bf16.ll
@@ -1130,7 +1130,7 @@ define i1 @isinf_bf16(bfloat %x) nounwind {
; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0
; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15
; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7f80
-; GFX7CHECK-NEXT: v_cmp_eq_u32_e32 vcc, s4, v0
+; GFX7CHECK-NEXT: v_cmp_ne_u32_e32 vcc, s4, v0
; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; GFX7CHECK-NEXT: s_setpc_b64 s[30:31]
;
@@ -1139,7 +1139,7 @@ define i1 @isinf_bf16(bfloat %x) nounwind {
; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7f80
-; GFX8CHECK-NEXT: v_cmp_eq_u16_e32 vcc, s4, v0
+; GFX8CHECK-NEXT: v_cmp_ne_u16_e32 vcc, s4, v0
; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; GFX8CHECK-NEXT: s_setpc_b64 s[30:31]
;
@@ -1148,7 +1148,7 @@ define i1 @isinf_bf16(bfloat %x) nounwind {
; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7f80
-; GFX9CHECK-NEXT: v_cmp_eq_u16_e32 vcc, s4, v0
+; GFX9CHECK-NEXT: v_cmp_ne_u16_e32 vcc, s4, v0
; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; GFX9CHECK-NEXT: s_setpc_b64 s[30:31]
;
@@ -1156,7 +1156,7 @@ define i1 @isinf_bf16(bfloat %x) nounwind {
; GFX10CHECK: ; %bb.0:
; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
-; GFX10CHECK-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x7f80, v0
+; GFX10CHECK-NEXT: v_cmp_ne_u16_e32 vcc_lo, 0x7f80, v0
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
;
@@ -1164,7 +1164,7 @@ define i1 @isinf_bf16(bfloat %x) nounwind {
; GFX11CHECK: ; %bb.0:
; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
-; GFX11CHECK-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x7f80, v0
+; GFX11CHECK-NEXT: v_cmp_ne_u16_e32 vcc_lo, 0x7f80, v0
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
%1 = call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 516) ; 0x204 = "inf"
@@ -2856,7 +2856,7 @@ define i1 @not_isfinite_or_nan_f(bfloat %x) {
; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0
; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15
; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7f80
-; GFX7CHECK-NEXT: v_cmp_eq_u32_e32 vcc, s4, v0
+; GFX7CHECK-NEXT: v_cmp_ne_u32_e32 vcc, s4, v0
; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; GFX7CHECK-NEXT: s_setpc_b64 s[30:31]
;
@@ -2865,7 +2865,7 @@ define i1 @not_isfinite_or_nan_f(bfloat %x) {
; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7f80
-; GFX8CHECK-NEXT: v_cmp_eq_u16_e32 vcc, s4, v0
+; GFX8CHECK-NEXT: v_cmp_ne_u16_e32 vcc, s4, v0
; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; GFX8CHECK-NEXT: s_setpc_b64 s[30:31]
;
@@ -2874,7 +2874,7 @@ define i1 @not_isfinite_or_nan_f(bfloat %x) {
; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7f80
-; GFX9CHECK-NEXT: v_cmp_eq_u16_e32 vcc, s4, v0
+; GFX9CHECK-NEXT: v_cmp_ne_u16_e32 vcc, s4, v0
; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; GFX9CHECK-NEXT: s_setpc_b64 s[30:31]
;
@@ -2882,7 +2882,7 @@ define i1 @not_isfinite_or_nan_f(bfloat %x) {
; GFX10CHECK: ; %bb.0: ; %entry
; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
-; GFX10CHECK-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x7f80, v0
+; GFX10CHECK-NEXT: v_cmp_ne_u16_e32 vcc_lo, 0x7f80, v0
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
;
@@ -2890,7 +2890,7 @@ define i1 @not_isfinite_or_nan_f(bfloat %x) {
; GFX11CHECK: ; %bb.0: ; %entry
; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
-; GFX11CHECK-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x7f80, v0
+; GFX11CHECK-NEXT: v_cmp_ne_u16_e32 vcc_lo, 0x7f80, v0
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
index 9c248bd6e8b2a..fc1574b460cd9 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
@@ -1368,7 +1368,7 @@ define i1 @isinf_f16(half %x) nounwind {
; GFX7SELDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX7SELDAG-NEXT: s_movk_i32 s4, 0x7c00
; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0x7fff, v0
-; GFX7SELDAG-NEXT: v_cmp_eq_u32_e32 vcc, s4, v0
+; GFX7SELDAG-NEXT: v_cmp_ne_u32_e32 vcc, s4, v0
; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; GFX7SELDAG-NEXT: s_setpc_b64 s[30:31]
;
@@ -3159,7 +3159,7 @@ define i1 @not_isfinite_or_nan_f(half %x) {
; GFX7SELDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX7SELDAG-NEXT: s_movk_i32 s4, 0x7c00
; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0x7fff, v0
-; GFX7SELDAG-NEXT: v_cmp_eq_u32_e32 vcc, s4, v0
+; GFX7SELDAG-NEXT: v_cmp_ne_u32_e32 vcc, s4, v0
; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; GFX7SELDAG-NEXT: s_setpc_b64 s[30:31]
;
diff --git a/llvm/test/CodeGen/PowerPC/fp-classify.ll b/llvm/test/CodeGen/PowerPC/fp-classify.ll
index f527b3c48040e..3a670c7c6581a 100644
--- a/llvm/test/CodeGen/PowerPC/fp-classify.ll
+++ b/llvm/test/CodeGen/PowerPC/fp-classify.ll
@@ -9,11 +9,10 @@ define zeroext i1 @abs_isinff(float %x) {
; P8: # %bb.0: # %entry
; P8-NEXT: addis 3, 2, .LCPI0_0 at toc@ha
; P8-NEXT: xsabsdp 0, 1
-; P8-NEXT: li 4, 1
; P8-NEXT: lfs 1, .LCPI0_0 at toc@l(3)
-; P8-NEXT: li 3, 0
+; P8-NEXT: li 3, 1
; P8-NEXT: fcmpu 0, 0, 1
-; P8-NEXT: iseleq 3, 4, 3
+; P8-NEXT: iseleq 3, 0, 3
; P8-NEXT: blr
;
; P9-LABEL: abs_isinff:
@@ -34,11 +33,10 @@ define zeroext i1 @abs_isinf(double %x) {
; P8: # %bb.0: # %entry
; P8-NEXT: addis 3, 2, .LCPI1_0 at toc@ha
; P8-NEXT: xsabsdp 0, 1
-; P8-NEXT: li 4, 1
; P8-NEXT: lfs 1, .LCPI1_0 at toc@l(3)
-; P8-NEXT: li 3, 0
+; P8-NEXT: li 3, 1
; P8-NEXT: fcmpu 0, 0, 1
-; P8-NEXT: iseleq 3, 4, 3
+; P8-NEXT: iseleq 3, 0, 3
; P8-NEXT: blr
;
; P9-LABEL: abs_isinf:
@@ -67,8 +65,8 @@ define zeroext i1 @abs_isinfq(fp128 %x) {
; P8-NEXT: clrldi 4, 4, 1
; P8-NEXT: xor 4, 4, 5
; P8-NEXT: or 3, 3, 4
-; P8-NEXT: cntlzd 3, 3
-; P8-NEXT: rldicl 3, 3, 58, 63
+; P8-NEXT: addic 4, 3, -1
+; P8-NEXT: subfe 3, 4, 3
; P8-NEXT: blr
;
; P9-LABEL: abs_isinfq:
@@ -173,7 +171,8 @@ define <4 x i1> @abs_isinfv4f32(<4 x float> %x) {
; P8-NEXT: xvabssp 0, 34
; P8-NEXT: addi 3, 3, .LCPI6_0 at toc@l
; P8-NEXT: lxvd2x 1, 0, 3
-; P8-NEXT: xvcmpeqsp 34, 0, 1
+; P8-NEXT: xvcmpeqsp 0, 0, 1
+; P8-NEXT: xxlnor 34, 0, 0
; P8-NEXT: blr
;
; P9-LABEL: abs_isinfv4f32:
@@ -182,7 +181,8 @@ define <4 x i1> @abs_isinfv4f32(<4 x float> %x) {
; P9-NEXT: xvabssp 0, 34
; P9-NEXT: addi 3, 3, .LCPI6_0 at toc@l
; P9-NEXT: lxv 1, 0(3)
-; P9-NEXT: xvcmpeqsp 34, 0, 1
+; P9-NEXT: xvcmpeqsp 0, 0, 1
+; P9-NEXT: xxlnor 34, 0, 0
; P9-NEXT: blr
entry:
%0 = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> %x)
@@ -198,6 +198,7 @@ define <2 x i1> @abs_isinfv2f64(<2 x double> %x) {
; P8-NEXT: addi 3, 3, .LCPI7_0 at toc@l
; P8-NEXT: lxvd2x 1, 0, 3
; P8-NEXT: xvcmpeqdp 34, 0, 1
+; P8-NEXT: xxlnor 34, 34, 34
; P8-NEXT: blr
;
; P9-LABEL: abs_isinfv2f64:
@@ -207,6 +208,7 @@ define <2 x i1> @abs_isinfv2f64(<2 x double> %x) {
; P9-NEXT: addi 3, 3, .LCPI7_0 at toc@l
; P9-NEXT: lxv 1, 0(3)
; P9-NEXT: xvcmpeqdp 34, 0, 1
+; P9-NEXT: xxlnor 34, 34, 34
; P9-NEXT: blr
entry:
%0 = tail call <2 x double> @llvm.fabs.v2f64(<2 x double> %x)
diff --git a/llvm/test/CodeGen/PowerPC/is_fpclass.ll b/llvm/test/CodeGen/PowerPC/is_fpclass.ll
index 57f457553a540..b4ca2ab5294ec 100644
--- a/llvm/test/CodeGen/PowerPC/is_fpclass.ll
+++ b/llvm/test/CodeGen/PowerPC/is_fpclass.ll
@@ -117,10 +117,12 @@ define i1 @isinf_float(float %x) nounwind {
define i1 @isinf_ppc_fp128(ppc_fp128 %x) nounwind {
; CHECK-LABEL: isinf_ppc_fp128:
; CHECK: # %bb.0:
-; CHECK-NEXT: xststdcdp 0, 1, 48
-; CHECK-NEXT: li 3, 0
-; CHECK-NEXT: li 4, 1
-; CHECK-NEXT: iseleq 3, 4, 3
+; CHECK-NEXT: addis 3, 2, .LCPI9_0 at toc@ha
+; CHECK-NEXT: xsabsdp 0, 1
+; CHECK-NEXT: lfs 1, .LCPI9_0 at toc@l(3)
+; CHECK-NEXT: li 3, 1
+; CHECK-NEXT: fcmpu 0, 0, 1
+; CHECK-NEXT: iseleq 3, 0, 3
; CHECK-NEXT: blr
%1 = call i1 @llvm.is.fpclass.ppcf128(ppc_fp128 %x, i32 516) ; 0x204 = "inf"
ret i1 %1
diff --git a/llvm/test/CodeGen/X86/is_fpclass-fp80.ll b/llvm/test/CodeGen/X86/is_fpclass-fp80.ll
index 52d294ca01720..cc162c2d43d67 100644
--- a/llvm/test/CodeGen/X86/is_fpclass-fp80.ll
+++ b/llvm/test/CodeGen/X86/is_fpclass-fp80.ll
@@ -244,7 +244,7 @@ define i1 @is_inf_f80(x86_fp80 %x) nounwind {
; X86-NEXT: andl $32767, %eax # imm = 0x7FFF
; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
; X86-NEXT: orl %ecx, %eax
-; X86-NEXT: sete %al
+; X86-NEXT: setne %al
; X86-NEXT: retl
;
; X64-LABEL: is_inf_f80:
@@ -255,7 +255,7 @@ define i1 @is_inf_f80(x86_fp80 %x) nounwind {
; X64-NEXT: xorq {{[0-9]+}}(%rsp), %rcx
; X64-NEXT: andl $32767, %eax # imm = 0x7FFF
; X64-NEXT: orq %rcx, %rax
-; X64-NEXT: sete %al
+; X64-NEXT: setne %al
; X64-NEXT: retq
entry:
%0 = tail call i1 @llvm.is.fpclass.f80(x86_fp80 %x, i32 516) ; 0x204 = "inf"
diff --git a/llvm/test/CodeGen/X86/is_fpclass.ll b/llvm/test/CodeGen/X86/is_fpclass.ll
index cc4d4c4543a51..9636c35a8a69f 100644
--- a/llvm/test/CodeGen/X86/is_fpclass.ll
+++ b/llvm/test/CodeGen/X86/is_fpclass.ll
@@ -147,7 +147,7 @@ define i1 @isinf_f(float %x) {
; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
; X86-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
-; X86-NEXT: sete %al
+; X86-NEXT: setne %al
; X86-NEXT: retl
;
; X64-LABEL: isinf_f:
@@ -155,7 +155,7 @@ define i1 @isinf_f(float %x) {
; X64-NEXT: movd %xmm0, %eax
; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
-; X64-NEXT: sete %al
+; X64-NEXT: setne %al
; X64-NEXT: retq
entry:
%0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 516) ; 0x204 = "inf"
@@ -240,18 +240,22 @@ entry:
define i1 @isfinite_f(float %x) {
; X86-LABEL: isfinite_f:
; X86: # %bb.0: # %entry
-; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
-; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
-; X86-NEXT: setl %al
+; X86-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NEXT: fabs
+; X86-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}
+; X86-NEXT: fxch %st(1)
+; X86-NEXT: fucompp
+; X86-NEXT: fnstsw %ax
+; X86-NEXT: # kill: def $ah killed $ah killed $ax
+; X86-NEXT: sahf
+; X86-NEXT: setne %al
; X86-NEXT: retl
;
; X64-LABEL: isfinite_f:
; X64: # %bb.0: # %entry
-; X64-NEXT: movd %xmm0, %eax
-; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
-; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
-; X64-NEXT: setl %al
+; X64-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-NEXT: setne %al
; X64-NEXT: retq
entry:
%0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 504) ; 0x1f8 = "finite"
@@ -1120,7 +1124,7 @@ define i1 @isinf_d(double %x) {
; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
; X86-NEXT: xorl $2146435072, %eax # imm = 0x7FF00000
; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: sete %al
+; X86-NEXT: setne %al
; X86-NEXT: retl
;
; X64-GENERIC-LABEL: isinf_d:
@@ -1130,7 +1134,7 @@ define i1 @isinf_d(double %x) {
; X64-GENERIC-NEXT: andq %rax, %rcx
; X64-GENERIC-NEXT: movabsq $9218868437227405312, %rax # imm = 0x7FF0000000000000
; X64-GENERIC-NEXT: cmpq %rax, %rcx
-; X64-GENERIC-NEXT: sete %al
+; X64-GENERIC-NEXT: setne %al
; X64-GENERIC-NEXT: retq
;
; X64-NDD-LABEL: isinf_d:
@@ -1140,7 +1144,7 @@ define i1 @isinf_d(double %x) {
; X64-NDD-NEXT: andq %rcx, %rax
; X64-NDD-NEXT: movabsq $9218868437227405312, %rcx # imm = 0x7FF0000000000000
; X64-NDD-NEXT: cmpq %rcx, %rax
-; X64-NDD-NEXT: sete %al
+; X64-NDD-NEXT: setne %al
; X64-NDD-NEXT: retq
entry:
%0 = tail call i1 @llvm.is.fpclass.f64(double %x, i32 516) ; 0x204 = "inf"
@@ -1150,31 +1154,23 @@ entry:
define i1 @isfinite_d(double %x) {
; X86-LABEL: isfinite_d:
; X86: # %bb.0: # %entry
-; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
-; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: cmpl $2146435072, %eax # imm = 0x7FF00000
-; X86-NEXT: setl %al
+; X86-NEXT: fldl {{[0-9]+}}(%esp)
+; X86-NEXT: fabs
+; X86-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}
+; X86-NEXT: fxch %st(1)
+; X86-NEXT: fucompp
+; X86-NEXT: fnstsw %ax
+; X86-NEXT: # kill: def $ah killed $ah killed $ax
+; X86-NEXT: sahf
+; X86-NEXT: setne %al
; X86-NEXT: retl
;
-; X64-GENERIC-LABEL: isfinite_d:
-; X64-GENERIC: # %bb.0: # %entry
-; X64-GENERIC-NEXT: movq %xmm0, %rax
-; X64-GENERIC-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF
-; X64-GENERIC-NEXT: andq %rax, %rcx
-; X64-GENERIC-NEXT: movabsq $9218868437227405312, %rax # imm = 0x7FF0000000000000
-; X64-GENERIC-NEXT: cmpq %rax, %rcx
-; X64-GENERIC-NEXT: setl %al
-; X64-GENERIC-NEXT: retq
-;
-; X64-NDD-LABEL: isfinite_d:
-; X64-NDD: # %bb.0: # %entry
-; X64-NDD-NEXT: movq %xmm0, %rax
-; X64-NDD-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF
-; X64-NDD-NEXT: andq %rcx, %rax
-; X64-NDD-NEXT: movabsq $9218868437227405312, %rcx # imm = 0x7FF0000000000000
-; X64-NDD-NEXT: cmpq %rcx, %rax
-; X64-NDD-NEXT: setl %al
-; X64-NDD-NEXT: retq
+; X64-LABEL: isfinite_d:
+; X64: # %bb.0: # %entry
+; X64-NEXT: andpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-NEXT: ucomisd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-NEXT: setne %al
+; X64-NEXT: retq
entry:
%0 = tail call i1 @llvm.is.fpclass.f64(double %x, i32 504) ; 0x1f8 = "finite"
ret i1 %0
@@ -2053,18 +2049,22 @@ entry:
define i1 @not_isinf_or_nan_f(float %x) {
; X86-LABEL: not_isinf_or_nan_f:
; X86: # %bb.0: # %entry
-; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
-; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
-; X86-NEXT: setl %al
+; X86-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NEXT: fabs
+; X86-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}
+; X86-NEXT: fxch %st(1)
+; X86-NEXT: fucompp
+; X86-NEXT: fnstsw %ax
+; X86-NEXT: # kill: def $ah killed $ah killed $ax
+; X86-NEXT: sahf
+; X86-NEXT: setne %al
; X86-NEXT: retl
;
; X64-LABEL: not_isinf_or_nan_f:
; X64: # %bb.0: # %entry
-; X64-NEXT: movd %xmm0, %eax
-; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
-; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
-; X64-NEXT: setl %al
+; X64-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-NEXT: setne %al
; X64-NEXT: retq
entry:
%0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 504) ; ~(0x204|0x3) = "~(inf|nan)"
@@ -2098,7 +2098,7 @@ define i1 @not_isfinite_or_nan_f(float %x) {
; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
; X86-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
-; X86-NEXT: sete %al
+; X86-NEXT: setne %al
; X86-NEXT: retl
;
; X64-LABEL: not_isfinite_or_nan_f:
@@ -2106,7 +2106,7 @@ define i1 @not_isfinite_or_nan_f(float %x) {
; X64-NEXT: movd %xmm0, %eax
; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
-; X64-NEXT: sete %al
+; X64-NEXT: setne %al
; X64-NEXT: retq
entry:
%0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 516) ; ~(0x1f8|0x3) = "~(finite|nan)"
More information about the llvm-commits
mailing list