[llvm] fc3e6a8 - DAG: Handle lowering unordered compare with inf (#100378)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 5 08:54:36 PDT 2024
Author: Matt Arsenault
Date: 2024-09-05T19:54:32+04:00
New Revision: fc3e6a81868a0c84e405622a64756e57f020ca37
URL: https://github.com/llvm/llvm-project/commit/fc3e6a81868a0c84e405622a64756e57f020ca37
DIFF: https://github.com/llvm/llvm-project/commit/fc3e6a81868a0c84e405622a64756e57f020ca37.diff
LOG: DAG: Handle lowering unordered compare with inf (#100378)
Try to take advantage of the nan check behavior of fcmp.
x86_64 looks better, x86_32 looks worse.
Added:
Modified:
llvm/include/llvm/CodeGen/CodeGenCommonISel.h
llvm/lib/CodeGen/CodeGenCommonISel.cpp
llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
llvm/test/CodeGen/AArch64/isinf.ll
llvm/test/CodeGen/PowerPC/fp-classify.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/CodeGenCommonISel.h b/llvm/include/llvm/CodeGen/CodeGenCommonISel.h
index 90ef890f22d1b1..4c22be94507866 100644
--- a/llvm/include/llvm/CodeGen/CodeGenCommonISel.h
+++ b/llvm/include/llvm/CodeGen/CodeGenCommonISel.h
@@ -218,10 +218,14 @@ findSplitPointForStackProtector(MachineBasicBlock *BB,
/// Evaluates if the specified FP class test is better performed as the inverse
/// (i.e. fewer instructions should be required to lower it). An example is the
/// test "inf|normal|subnormal|zero", which is an inversion of "nan".
+///
/// \param Test The test as specified in 'is_fpclass' intrinsic invocation.
+/// \param UseFCmp The intention is to perform the comparison using
+/// floating-point compare instructions which check for nan.
+///
/// \returns The inverted test, or fcNone, if inversion does not produce a
/// simpler test.
-FPClassTest invertFPClassTestIfSimpler(FPClassTest Test);
+FPClassTest invertFPClassTestIfSimpler(FPClassTest Test, bool UseFCmp);
/// Assuming the instruction \p MI is going to be deleted, attempt to salvage
/// debug users of \p MI by writing the effect of \p MI in a DIExpression.
diff --git a/llvm/lib/CodeGen/CodeGenCommonISel.cpp b/llvm/lib/CodeGen/CodeGenCommonISel.cpp
index fe144d3c182039..d985751e2be0be 100644
--- a/llvm/lib/CodeGen/CodeGenCommonISel.cpp
+++ b/llvm/lib/CodeGen/CodeGenCommonISel.cpp
@@ -173,8 +173,9 @@ llvm::findSplitPointForStackProtector(MachineBasicBlock *BB,
return SplitPoint;
}
-FPClassTest llvm::invertFPClassTestIfSimpler(FPClassTest Test) {
+FPClassTest llvm::invertFPClassTestIfSimpler(FPClassTest Test, bool UseFCmp) {
FPClassTest InvertedTest = ~Test;
+
// Pick the direction with fewer tests
// TODO: Handle more combinations of cases that can be handled together
switch (static_cast<unsigned>(InvertedTest)) {
@@ -200,6 +201,11 @@ FPClassTest llvm::invertFPClassTestIfSimpler(FPClassTest Test) {
case fcSubnormal | fcZero:
case fcSubnormal | fcZero | fcNan:
return InvertedTest;
+ case fcInf | fcNan:
+ // If we're trying to use fcmp, we can take advantage of the nan check
+ // behavior of the compare (but this is more instructions in the integer
+ // expansion).
+ return UseFCmp ? InvertedTest : fcNone;
default:
return fcNone;
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 01feec0c435edf..c3affabb19d375 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -8675,7 +8675,7 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
// Degenerated cases.
if (Test == fcNone)
return DAG.getBoolConstant(false, DL, ResultVT, OperandVT);
- if ((Test & fcAllFlags) == fcAllFlags)
+ if (Test == fcAllFlags)
return DAG.getBoolConstant(true, DL, ResultVT, OperandVT);
// PPC double double is a pair of doubles, of which the higher part determines
@@ -8686,14 +8686,6 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
OperandVT = MVT::f64;
}
- // Some checks may be represented as inversion of simpler check, for example
- // "inf|normal|subnormal|zero" => !"nan".
- bool IsInverted = false;
- if (FPClassTest InvertedCheck = invertFPClassTestIfSimpler(Test)) {
- IsInverted = true;
- Test = InvertedCheck;
- }
-
// Floating-point type properties.
EVT ScalarFloatVT = OperandVT.getScalarType();
const Type *FloatTy = ScalarFloatVT.getTypeForEVT(*DAG.getContext());
@@ -8705,9 +8697,16 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
if (Flags.hasNoFPExcept() &&
isOperationLegalOrCustom(ISD::SETCC, OperandVT.getScalarType())) {
FPClassTest FPTestMask = Test;
+ bool IsInvertedFP = false;
+
+ if (FPClassTest InvertedFPCheck =
+ invertFPClassTestIfSimpler(FPTestMask, true)) {
+ FPTestMask = InvertedFPCheck;
+ IsInvertedFP = true;
+ }
- ISD::CondCode OrderedCmpOpcode = IsInverted ? ISD::SETUNE : ISD::SETOEQ;
- ISD::CondCode UnorderedCmpOpcode = IsInverted ? ISD::SETONE : ISD::SETUEQ;
+ ISD::CondCode OrderedCmpOpcode = IsInvertedFP ? ISD::SETUNE : ISD::SETOEQ;
+ ISD::CondCode UnorderedCmpOpcode = IsInvertedFP ? ISD::SETONE : ISD::SETUEQ;
// See if we can fold an | fcNan into an unordered compare.
FPClassTest OrderedFPTestMask = FPTestMask & ~fcNan;
@@ -8720,7 +8719,7 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
const bool IsOrdered = FPTestMask == OrderedFPTestMask;
if (std::optional<bool> IsCmp0 =
- isFCmpEqualZero(Test, Semantics, DAG.getMachineFunction());
+ isFCmpEqualZero(FPTestMask, Semantics, DAG.getMachineFunction());
IsCmp0 && (isCondCodeLegalOrCustom(
*IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode,
OperandVT.getScalarType().getSimpleVT()))) {
@@ -8732,31 +8731,35 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
*IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode);
}
- if (Test == fcNan &&
- isCondCodeLegalOrCustom(IsInverted ? ISD::SETO : ISD::SETUO,
- OperandVT.getScalarType().getSimpleVT())) {
+ if (FPTestMask == fcNan &&
+ isCondCodeLegalOrCustom(IsInvertedFP ? ISD::SETO : ISD::SETUO,
+ OperandVT.getScalarType().getSimpleVT()))
return DAG.getSetCC(DL, ResultVT, Op, Op,
- IsInverted ? ISD::SETO : ISD::SETUO);
- }
+ IsInvertedFP ? ISD::SETO : ISD::SETUO);
- if (Test == fcInf &&
- isCondCodeLegalOrCustom(IsInverted ? ISD::SETUNE : ISD::SETOEQ,
+ bool IsOrderedInf = FPTestMask == fcInf;
+ if ((FPTestMask == fcInf || FPTestMask == (fcInf | fcNan)) &&
+ isCondCodeLegalOrCustom(IsOrderedInf ? OrderedCmpOpcode
+ : UnorderedCmpOpcode,
OperandVT.getScalarType().getSimpleVT()) &&
- isOperationLegalOrCustom(ISD::FABS, OperandVT.getScalarType())) {
+ isOperationLegalOrCustom(ISD::FABS, OperandVT.getScalarType()) &&
+ (isOperationLegal(ISD::ConstantFP, OperandVT.getScalarType()) ||
+ (OperandVT.isVector() &&
+ isOperationLegalOrCustom(ISD::BUILD_VECTOR, OperandVT)))) {
// isinf(x) --> fabs(x) == inf
SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
SDValue Inf =
DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT);
return DAG.getSetCC(DL, ResultVT, Abs, Inf,
- IsInverted ? ISD::SETUNE : ISD::SETOEQ);
+ IsOrderedInf ? OrderedCmpOpcode : UnorderedCmpOpcode);
}
if (OrderedFPTestMask == (fcSubnormal | fcZero) && !IsOrdered) {
// TODO: Could handle ordered case, but it produces worse code for
// x86. Maybe handle ordered if fabs is free?
- ISD::CondCode OrderedOp = IsInverted ? ISD::SETUGE : ISD::SETOLT;
- ISD::CondCode UnorderedOp = IsInverted ? ISD::SETOGE : ISD::SETULT;
+ ISD::CondCode OrderedOp = IsInvertedFP ? ISD::SETUGE : ISD::SETOLT;
+ ISD::CondCode UnorderedOp = IsInvertedFP ? ISD::SETOGE : ISD::SETULT;
if (isCondCodeLegalOrCustom(IsOrdered ? OrderedOp : UnorderedOp,
OperandVT.getScalarType().getSimpleVT())) {
@@ -8773,6 +8776,15 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
}
}
+ // Some checks may be represented as inversion of simpler check, for example
+ // "inf|normal|subnormal|zero" => !"nan".
+ bool IsInverted = false;
+
+ if (FPClassTest InvertedCheck = invertFPClassTestIfSimpler(Test, false)) {
+ Test = InvertedCheck;
+ IsInverted = true;
+ }
+
// In the general case use integer operations.
unsigned BitSize = OperandVT.getScalarSizeInBits();
EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), BitSize);
diff --git a/llvm/test/CodeGen/AArch64/isinf.ll b/llvm/test/CodeGen/AArch64/isinf.ll
index 834417b98743a8..e68539bcf07d9c 100644
--- a/llvm/test/CodeGen/AArch64/isinf.ll
+++ b/llvm/test/CodeGen/AArch64/isinf.ll
@@ -26,10 +26,10 @@ define i32 @replace_isinf_call_f16(half %x) {
define i32 @replace_isinf_call_f32(float %x) {
; CHECK-LABEL: replace_isinf_call_f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: fabs s0, s0
+; CHECK-NEXT: fmov w9, s0
; CHECK-NEXT: mov w8, #2139095040 // =0x7f800000
-; CHECK-NEXT: fmov s1, w8
-; CHECK-NEXT: fcmp s0, s1
+; CHECK-NEXT: and w9, w9, #0x7fffffff
+; CHECK-NEXT: cmp w9, w8
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%abs = tail call float @llvm.fabs.f32(float %x)
@@ -42,10 +42,10 @@ define i32 @replace_isinf_call_f32(float %x) {
define i32 @replace_isinf_call_f64(double %x) {
; CHECK-LABEL: replace_isinf_call_f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: fabs d0, d0
+; CHECK-NEXT: fmov x9, d0
; CHECK-NEXT: mov x8, #9218868437227405312 // =0x7ff0000000000000
-; CHECK-NEXT: fmov d1, x8
-; CHECK-NEXT: fcmp d0, d1
+; CHECK-NEXT: and x9, x9, #0x7fffffffffffffff
+; CHECK-NEXT: cmp x9, x8
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%abs = tail call double @llvm.fabs.f64(double %x)
diff --git a/llvm/test/CodeGen/PowerPC/fp-classify.ll b/llvm/test/CodeGen/PowerPC/fp-classify.ll
index f527b3c48040e7..dc9853ff2e3014 100644
--- a/llvm/test/CodeGen/PowerPC/fp-classify.ll
+++ b/llvm/test/CodeGen/PowerPC/fp-classify.ll
@@ -7,13 +7,13 @@
define zeroext i1 @abs_isinff(float %x) {
; P8-LABEL: abs_isinff:
; P8: # %bb.0: # %entry
-; P8-NEXT: addis 3, 2, .LCPI0_0 at toc@ha
-; P8-NEXT: xsabsdp 0, 1
-; P8-NEXT: li 4, 1
-; P8-NEXT: lfs 1, .LCPI0_0 at toc@l(3)
-; P8-NEXT: li 3, 0
-; P8-NEXT: fcmpu 0, 0, 1
-; P8-NEXT: iseleq 3, 4, 3
+; P8-NEXT: xscvdpspn 0, 1
+; P8-NEXT: lis 4, 32640
+; P8-NEXT: mffprwz 3, 0
+; P8-NEXT: clrlwi 3, 3, 1
+; P8-NEXT: xor 3, 3, 4
+; P8-NEXT: cntlzw 3, 3
+; P8-NEXT: srwi 3, 3, 5
; P8-NEXT: blr
;
; P9-LABEL: abs_isinff:
@@ -32,13 +32,13 @@ entry:
define zeroext i1 @abs_isinf(double %x) {
; P8-LABEL: abs_isinf:
; P8: # %bb.0: # %entry
-; P8-NEXT: addis 3, 2, .LCPI1_0 at toc@ha
-; P8-NEXT: xsabsdp 0, 1
-; P8-NEXT: li 4, 1
-; P8-NEXT: lfs 1, .LCPI1_0 at toc@l(3)
-; P8-NEXT: li 3, 0
-; P8-NEXT: fcmpu 0, 0, 1
-; P8-NEXT: iseleq 3, 4, 3
+; P8-NEXT: mffprd 3, 1
+; P8-NEXT: li 4, 2047
+; P8-NEXT: rldic 4, 4, 52, 1
+; P8-NEXT: clrldi 3, 3, 1
+; P8-NEXT: xor 3, 3, 4
+; P8-NEXT: cntlzd 3, 3
+; P8-NEXT: rldicl 3, 3, 58, 63
; P8-NEXT: blr
;
; P9-LABEL: abs_isinf:
More information about the llvm-commits
mailing list