[llvm] expandIS_FPCLASS: Support fcNegative and fcPositive (PR #184788)
YunQiang Su via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 10 22:49:29 PDT 2026
https://github.com/wzssyqa updated https://github.com/llvm/llvm-project/pull/184788
>From b40a2c2068cb8dd5f1bb3ca40dae514660322688 Mon Sep 17 00:00:00 2001
From: YunQiang Su <syq at debian.org>
Date: Thu, 5 Mar 2026 21:03:15 +0800
Subject: [PATCH 1/4] expandIS_FPCLASS: Support fcNegative and fcPositive
This can be used by expandFMINIMUM_FMAXIMUM in future patches.
For 32bit system with FP64 support, we use FP_ROUND to trunc
FP64 to FP32 and then get the sign. Without trunc, if IS_FPCLASS
is used in an expandF* function, LegalOps will assert fails on
assign a i64 value to an i32 variable.
---
.../CodeGen/SelectionDAG/TargetLowering.cpp | 57 ++
llvm/test/CodeGen/Mips/is_fpclass.ll | 668 ++++++++++++++----
llvm/test/CodeGen/X86/is_fpclass.ll | 231 ++++++
3 files changed, 815 insertions(+), 141 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 088e6726fea58..89d4ae01ba3ea 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -9306,6 +9306,63 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
// Tests that involve more than one class should be processed first.
SDValue PartialRes;
+ // Handle sign bit tests first (fcPositive/fcNegative).
+ // These test only the sign bit, if not NaN.
+ // On 32-bit platforms with 64-bit floats, we need to be careful about
+ // integer comparisons. We use FP_ROUND to convert to a smaller float type
+ // that matches ResultVT's size, then compare with 0.
+ if (Test == fcPositive || Test == fcNegative) {
+ SDValue SignBitResult;
+ unsigned MaxLegalIntBits = 32;
+ if (isTypeLegal(MVT::i64))
+ MaxLegalIntBits = 64;
+
+ unsigned IntVTBits = IntVT.getScalarSizeInBits();
+ bool NeedFPCompare = IntVTBits > MaxLegalIntBits;
+
+ if (NeedFPCompare) {
+ // Truncate to the largest legal float type.
+ EVT TruncFloatEltVT = (MaxLegalIntBits == 64) ? MVT::f64 : MVT::f32;
+
+ EVT TruncFloatVT = TruncFloatEltVT;
+ if (ResultVT.isVector() && TruncFloatEltVT != MVT::Other) {
+ TruncFloatVT = EVT::getVectorVT(*DAG.getContext(), TruncFloatEltVT,
+ ResultVT.getVectorElementCount());
+ }
+ if (TruncFloatVT != MVT::Other &&
+ isOperationLegalOrCustom(ISD::FP_ROUND, TruncFloatVT)) {
+ // Round to smaller float type, then bitcast to integer for sign check.
+ // Use TargetConstant for the truncation flag.
+ EVT PointerVT =
+ DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
+ SDValue OpTrunc = DAG.getNode(ISD::FP_ROUND, DL, TruncFloatVT, Op,
+ DAG.getTargetConstant(0, DL, PointerVT));
+ EVT TruncIntVT = TruncFloatVT.changeTypeToInteger();
+ SDValue OpTruncInt = DAG.getBitcast(TruncIntVT, OpTrunc);
+ SignBitResult =
+ DAG.getSetCC(DL, ResultVT, OpTruncInt,
+ DAG.getConstant(0, DL, TruncIntVT), ISD::SETLT);
+ } else {
+ // Fall back to original integer comparison.
+ SignBitResult = SignV;
+ }
+ } else {
+ SignBitResult = SignV;
+ }
+
+ if (!DAG.isKnownNeverNaN(Op)) {
+ SDValue NotNaN = DAG.getSetCC(DL, ResultVT, Op, Op, ISD::SETO);
+ SignBitResult =
+ DAG.getNode(ISD::AND, DL, ResultVT, NotNaN, SignBitResult);
+ }
+
+ if (Test == fcNegative)
+ return SignBitResult;
+ else
+ return DAG.getNode(ISD::XOR, DL, ResultVT, SignBitResult,
+ ResultInversionMask);
+ }
+
if (IsF80)
; // Detect finite numbers of f80 by checking individual classes because
// they have different settings of the explicit integer bit.
diff --git a/llvm/test/CodeGen/Mips/is_fpclass.ll b/llvm/test/CodeGen/Mips/is_fpclass.ll
index 9454a064c5312..44b706e5dcbac 100644
--- a/llvm/test/CodeGen/Mips/is_fpclass.ll
+++ b/llvm/test/CodeGen/Mips/is_fpclass.ll
@@ -1,246 +1,632 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=mipsisa32r6-unknown-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s
+; RUN: llc -mtriple=mipsisa32r6-unknown-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=R6
+; RUN: llc -mtriple=mipsel-unknown-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=R2
define i1 @isnan_float(float %x) nounwind {
-; CHECK-LABEL: isnan_float:
-; CHECK: # %bb.0:
-; CHECK-NEXT: class.s $f0, $f12
-; CHECK-NEXT: mfc1 $1, $f0
-; CHECK-NEXT: andi $1, $1, 3
-; CHECK-NEXT: jr $ra
-; CHECK-NEXT: sltu $2, $zero, $1
+; R6-LABEL: isnan_float:
+; R6: # %bb.0:
+; R6-NEXT: class.s $f0, $f12
+; R6-NEXT: mfc1 $1, $f0
+; R6-NEXT: andi $1, $1, 3
+; R6-NEXT: jr $ra
+; R6-NEXT: sltu $2, $zero, $1
+;
+; R2-LABEL: isnan_float:
+; R2: # %bb.0:
+; R2-NEXT: addiu $2, $zero, 1
+; R2-NEXT: c.un.s $f12, $f12
+; R2-NEXT: jr $ra
+; R2-NEXT: movf $2, $zero, $fcc0
%1 = call i1 @llvm.is.fpclass.f32(float %x, i32 3) ; nan
ret i1 %1
}
define i1 @isnan_double(double %x) nounwind {
-; CHECK-LABEL: isnan_double:
-; CHECK: # %bb.0:
-; CHECK-NEXT: class.d $f0, $f12
-; CHECK-NEXT: mfc1 $1, $f0
-; CHECK-NEXT: andi $1, $1, 3
-; CHECK-NEXT: jr $ra
-; CHECK-NEXT: sltu $2, $zero, $1
+; R6-LABEL: isnan_double:
+; R6: # %bb.0:
+; R6-NEXT: class.d $f0, $f12
+; R6-NEXT: mfc1 $1, $f0
+; R6-NEXT: andi $1, $1, 3
+; R6-NEXT: jr $ra
+; R6-NEXT: sltu $2, $zero, $1
+;
+; R2-LABEL: isnan_double:
+; R2: # %bb.0:
+; R2-NEXT: addiu $2, $zero, 1
+; R2-NEXT: c.un.d $f12, $f12
+; R2-NEXT: jr $ra
+; R2-NEXT: movf $2, $zero, $fcc0
%1 = call i1 @llvm.is.fpclass.f64(double %x, i32 3) ; nan
ret i1 %1
}
define i1 @isnan_float_strictfp(float %x) strictfp nounwind {
-; CHECK-LABEL: isnan_float_strictfp:
-; CHECK: # %bb.0:
-; CHECK-NEXT: class.s $f0, $f12
-; CHECK-NEXT: mfc1 $1, $f0
-; CHECK-NEXT: andi $1, $1, 3
-; CHECK-NEXT: jr $ra
-; CHECK-NEXT: sltu $2, $zero, $1
+; R6-LABEL: isnan_float_strictfp:
+; R6: # %bb.0:
+; R6-NEXT: class.s $f0, $f12
+; R6-NEXT: mfc1 $1, $f0
+; R6-NEXT: andi $1, $1, 3
+; R6-NEXT: jr $ra
+; R6-NEXT: sltu $2, $zero, $1
+;
+; R2-LABEL: isnan_float_strictfp:
+; R2: # %bb.0:
+; R2-NEXT: lui $1, 32767
+; R2-NEXT: ori $1, $1, 65535
+; R2-NEXT: mfc1 $2, $f12
+; R2-NEXT: and $1, $2, $1
+; R2-NEXT: lui $2, 32640
+; R2-NEXT: jr $ra
+; R2-NEXT: slt $2, $2, $1
%1 = call i1 @llvm.is.fpclass.f32(float %x, i32 3) strictfp ; nan
ret i1 %1
}
define i1 @isnan_double_strictfp(double %x) strictfp nounwind {
-; CHECK-LABEL: isnan_double_strictfp:
-; CHECK: # %bb.0:
-; CHECK-NEXT: class.d $f0, $f12
-; CHECK-NEXT: mfc1 $1, $f0
-; CHECK-NEXT: andi $1, $1, 3
-; CHECK-NEXT: jr $ra
-; CHECK-NEXT: sltu $2, $zero, $1
+; R6-LABEL: isnan_double_strictfp:
+; R6: # %bb.0:
+; R6-NEXT: class.d $f0, $f12
+; R6-NEXT: mfc1 $1, $f0
+; R6-NEXT: andi $1, $1, 3
+; R6-NEXT: jr $ra
+; R6-NEXT: sltu $2, $zero, $1
+;
+; R2-LABEL: isnan_double_strictfp:
+; R2: # %bb.0:
+; R2-NEXT: addiu $sp, $sp, -8
+; R2-NEXT: sdc1 $f12, 0($sp)
+; R2-NEXT: lui $1, 32767
+; R2-NEXT: ori $1, $1, 65535
+; R2-NEXT: lw $2, 4($sp)
+; R2-NEXT: and $1, $2, $1
+; R2-NEXT: lui $3, 32752
+; R2-NEXT: slt $2, $3, $1
+; R2-NEXT: xor $1, $1, $3
+; R2-NEXT: lw $3, 0($sp)
+; R2-NEXT: sltu $3, $zero, $3
+; R2-NEXT: movz $2, $3, $1
+; R2-NEXT: jr $ra
+; R2-NEXT: addiu $sp, $sp, 8
%1 = call i1 @llvm.is.fpclass.f64(double %x, i32 3) strictfp ; nan
ret i1 %1
}
define i1 @isinf_float(float %x) nounwind {
-; CHECK-LABEL: isinf_float:
-; CHECK: # %bb.0:
-; CHECK-NEXT: class.s $f0, $f12
-; CHECK-NEXT: mfc1 $1, $f0
-; CHECK-NEXT: andi $1, $1, 68
-; CHECK-NEXT: jr $ra
-; CHECK-NEXT: sltu $2, $zero, $1
+; R6-LABEL: isinf_float:
+; R6: # %bb.0:
+; R6-NEXT: class.s $f0, $f12
+; R6-NEXT: mfc1 $1, $f0
+; R6-NEXT: andi $1, $1, 68
+; R6-NEXT: jr $ra
+; R6-NEXT: sltu $2, $zero, $1
+;
+; R2-LABEL: isinf_float:
+; R2: # %bb.0:
+; R2-NEXT: lui $1, 32767
+; R2-NEXT: ori $1, $1, 65535
+; R2-NEXT: mfc1 $2, $f12
+; R2-NEXT: and $1, $2, $1
+; R2-NEXT: lui $2, 32640
+; R2-NEXT: xor $1, $1, $2
+; R2-NEXT: jr $ra
+; R2-NEXT: sltiu $2, $1, 1
%1 = call i1 @llvm.is.fpclass.f32(float %x, i32 516) ; 0x204 = "inf"
ret i1 %1
}
define i1 @isfinite_float(float %x) nounwind {
-; CHECK-LABEL: isfinite_float:
-; CHECK: # %bb.0:
-; CHECK-NEXT: class.s $f0, $f12
-; CHECK-NEXT: mfc1 $1, $f0
-; CHECK-NEXT: andi $1, $1, 952
-; CHECK-NEXT: jr $ra
-; CHECK-NEXT: sltu $2, $zero, $1
+; R6-LABEL: isfinite_float:
+; R6: # %bb.0:
+; R6-NEXT: class.s $f0, $f12
+; R6-NEXT: mfc1 $1, $f0
+; R6-NEXT: andi $1, $1, 952
+; R6-NEXT: jr $ra
+; R6-NEXT: sltu $2, $zero, $1
+;
+; R2-LABEL: isfinite_float:
+; R2: # %bb.0:
+; R2-NEXT: lui $1, 32767
+; R2-NEXT: ori $1, $1, 65535
+; R2-NEXT: mfc1 $2, $f12
+; R2-NEXT: and $1, $2, $1
+; R2-NEXT: lui $2, 32640
+; R2-NEXT: jr $ra
+; R2-NEXT: slt $2, $1, $2
%1 = call i1 @llvm.is.fpclass.f32(float %x, i32 504) ; 0x1f8 = "finite"
ret i1 %1
}
define i1 @isnormal_float(float %x) nounwind {
-; CHECK-LABEL: isnormal_float:
-; CHECK: # %bb.0:
-; CHECK-NEXT: class.s $f0, $f12
-; CHECK-NEXT: mfc1 $1, $f0
-; CHECK-NEXT: andi $1, $1, 136
-; CHECK-NEXT: jr $ra
-; CHECK-NEXT: sltu $2, $zero, $1
+; R6-LABEL: isnormal_float:
+; R6: # %bb.0:
+; R6-NEXT: class.s $f0, $f12
+; R6-NEXT: mfc1 $1, $f0
+; R6-NEXT: andi $1, $1, 136
+; R6-NEXT: jr $ra
+; R6-NEXT: sltu $2, $zero, $1
+;
+; R2-LABEL: isnormal_float:
+; R2: # %bb.0:
+; R2-NEXT: lui $1, 32767
+; R2-NEXT: ori $1, $1, 65535
+; R2-NEXT: mfc1 $2, $f12
+; R2-NEXT: and $1, $2, $1
+; R2-NEXT: lui $2, 65408
+; R2-NEXT: addu $1, $1, $2
+; R2-NEXT: srl $1, $1, 24
+; R2-NEXT: jr $ra
+; R2-NEXT: sltiu $2, $1, 127
%1 = call i1 @llvm.is.fpclass.f32(float %x, i32 264) ; 0x108 = "normal"
ret i1 %1
}
define i1 @issubnormal_float(float %x) nounwind {
-; CHECK-LABEL: issubnormal_float:
-; CHECK: # %bb.0:
-; CHECK-NEXT: class.s $f0, $f12
-; CHECK-NEXT: mfc1 $1, $f0
-; CHECK-NEXT: andi $1, $1, 272
-; CHECK-NEXT: jr $ra
-; CHECK-NEXT: sltu $2, $zero, $1
+; R6-LABEL: issubnormal_float:
+; R6: # %bb.0:
+; R6-NEXT: class.s $f0, $f12
+; R6-NEXT: mfc1 $1, $f0
+; R6-NEXT: andi $1, $1, 272
+; R6-NEXT: jr $ra
+; R6-NEXT: sltu $2, $zero, $1
+;
+; R2-LABEL: issubnormal_float:
+; R2: # %bb.0:
+; R2-NEXT: lui $1, 32767
+; R2-NEXT: ori $1, $1, 65535
+; R2-NEXT: mfc1 $2, $f12
+; R2-NEXT: and $1, $2, $1
+; R2-NEXT: addiu $1, $1, -1
+; R2-NEXT: lui $2, 127
+; R2-NEXT: ori $2, $2, 65535
+; R2-NEXT: jr $ra
+; R2-NEXT: sltu $2, $1, $2
%1 = call i1 @llvm.is.fpclass.f32(float %x, i32 144) ; 0x90 = "subnormal"
ret i1 %1
}
define i1 @iszero_float(float %x) nounwind {
-; CHECK-LABEL: iszero_float:
-; CHECK: # %bb.0:
-; CHECK-NEXT: class.s $f0, $f12
-; CHECK-NEXT: mfc1 $1, $f0
-; CHECK-NEXT: andi $1, $1, 544
-; CHECK-NEXT: jr $ra
-; CHECK-NEXT: sltu $2, $zero, $1
+; R6-LABEL: iszero_float:
+; R6: # %bb.0:
+; R6-NEXT: class.s $f0, $f12
+; R6-NEXT: mfc1 $1, $f0
+; R6-NEXT: andi $1, $1, 544
+; R6-NEXT: jr $ra
+; R6-NEXT: sltu $2, $zero, $1
+;
+; R2-LABEL: iszero_float:
+; R2: # %bb.0:
+; R2-NEXT: mtc1 $zero, $f0
+; R2-NEXT: addiu $2, $zero, 1
+; R2-NEXT: c.eq.s $f12, $f0
+; R2-NEXT: jr $ra
+; R2-NEXT: movf $2, $zero, $fcc0
%1 = call i1 @llvm.is.fpclass.f32(float %x, i32 96) ; 0x60 = "zero"
ret i1 %1
}
define i1 @issnan_float(float %x) nounwind {
-; CHECK-LABEL: issnan_float:
-; CHECK: # %bb.0:
-; CHECK-NEXT: class.s $f0, $f12
-; CHECK-NEXT: mfc1 $1, $f0
-; CHECK-NEXT: andi $1, $1, 1
-; CHECK-NEXT: jr $ra
-; CHECK-NEXT: sltu $2, $zero, $1
+; R6-LABEL: issnan_float:
+; R6: # %bb.0:
+; R6-NEXT: class.s $f0, $f12
+; R6-NEXT: mfc1 $1, $f0
+; R6-NEXT: andi $1, $1, 1
+; R6-NEXT: jr $ra
+; R6-NEXT: sltu $2, $zero, $1
+;
+; R2-LABEL: issnan_float:
+; R2: # %bb.0:
+; R2-NEXT: lui $1, 32767
+; R2-NEXT: ori $1, $1, 65535
+; R2-NEXT: mfc1 $2, $f12
+; R2-NEXT: and $1, $2, $1
+; R2-NEXT: lui $2, 32704
+; R2-NEXT: slt $2, $1, $2
+; R2-NEXT: lui $3, 32640
+; R2-NEXT: slt $1, $3, $1
+; R2-NEXT: jr $ra
+; R2-NEXT: and $2, $1, $2
%1 = call i1 @llvm.is.fpclass.f32(float %x, i32 1)
ret i1 %1
}
define i1 @issnan_double(double %x) nounwind {
-; CHECK-LABEL: issnan_double:
-; CHECK: # %bb.0:
-; CHECK-NEXT: class.d $f0, $f12
-; CHECK-NEXT: mfc1 $1, $f0
-; CHECK-NEXT: andi $1, $1, 1
-; CHECK-NEXT: jr $ra
-; CHECK-NEXT: sltu $2, $zero, $1
+; R6-LABEL: issnan_double:
+; R6: # %bb.0:
+; R6-NEXT: class.d $f0, $f12
+; R6-NEXT: mfc1 $1, $f0
+; R6-NEXT: andi $1, $1, 1
+; R6-NEXT: jr $ra
+; R6-NEXT: sltu $2, $zero, $1
+;
+; R2-LABEL: issnan_double:
+; R2: # %bb.0:
+; R2-NEXT: addiu $sp, $sp, -8
+; R2-NEXT: sdc1 $f12, 0($sp)
+; R2-NEXT: lui $1, 32767
+; R2-NEXT: ori $1, $1, 65535
+; R2-NEXT: lw $2, 4($sp)
+; R2-NEXT: and $1, $2, $1
+; R2-NEXT: lui $2, 32752
+; R2-NEXT: slt $3, $2, $1
+; R2-NEXT: xor $2, $1, $2
+; R2-NEXT: lw $4, 0($sp)
+; R2-NEXT: sltu $4, $zero, $4
+; R2-NEXT: movz $3, $4, $2
+; R2-NEXT: lui $2, 32760
+; R2-NEXT: slt $1, $1, $2
+; R2-NEXT: and $2, $3, $1
+; R2-NEXT: jr $ra
+; R2-NEXT: addiu $sp, $sp, 8
%1 = call i1 @llvm.is.fpclass.f64(double %x, i32 1)
ret i1 %1
}
define i1 @isqnan_float(float %x) nounwind {
-; CHECK-LABEL: isqnan_float:
-; CHECK: # %bb.0:
-; CHECK-NEXT: class.s $f0, $f12
-; CHECK-NEXT: mfc1 $1, $f0
-; CHECK-NEXT: andi $1, $1, 2
-; CHECK-NEXT: jr $ra
-; CHECK-NEXT: sltu $2, $zero, $1
+; R6-LABEL: isqnan_float:
+; R6: # %bb.0:
+; R6-NEXT: class.s $f0, $f12
+; R6-NEXT: mfc1 $1, $f0
+; R6-NEXT: andi $1, $1, 2
+; R6-NEXT: jr $ra
+; R6-NEXT: sltu $2, $zero, $1
+;
+; R2-LABEL: isqnan_float:
+; R2: # %bb.0:
+; R2-NEXT: lui $1, 32767
+; R2-NEXT: ori $1, $1, 65535
+; R2-NEXT: mfc1 $2, $f12
+; R2-NEXT: and $1, $2, $1
+; R2-NEXT: lui $2, 32703
+; R2-NEXT: ori $2, $2, 65535
+; R2-NEXT: jr $ra
+; R2-NEXT: slt $2, $2, $1
%1 = call i1 @llvm.is.fpclass.f32(float %x, i32 2)
ret i1 %1
}
define i1 @isqnan_double(double %x) nounwind {
-; CHECK-LABEL: isqnan_double:
-; CHECK: # %bb.0:
-; CHECK-NEXT: class.d $f0, $f12
-; CHECK-NEXT: mfc1 $1, $f0
-; CHECK-NEXT: andi $1, $1, 2
-; CHECK-NEXT: jr $ra
-; CHECK-NEXT: sltu $2, $zero, $1
+; R6-LABEL: isqnan_double:
+; R6: # %bb.0:
+; R6-NEXT: class.d $f0, $f12
+; R6-NEXT: mfc1 $1, $f0
+; R6-NEXT: andi $1, $1, 2
+; R6-NEXT: jr $ra
+; R6-NEXT: sltu $2, $zero, $1
+;
+; R2-LABEL: isqnan_double:
+; R2: # %bb.0:
+; R2-NEXT: addiu $sp, $sp, -8
+; R2-NEXT: sdc1 $f12, 0($sp)
+; R2-NEXT: lui $1, 32767
+; R2-NEXT: ori $1, $1, 65535
+; R2-NEXT: lw $2, 4($sp)
+; R2-NEXT: and $1, $2, $1
+; R2-NEXT: lui $2, 32759
+; R2-NEXT: ori $2, $2, 65535
+; R2-NEXT: slt $2, $2, $1
+; R2-NEXT: jr $ra
+; R2-NEXT: addiu $sp, $sp, 8
%1 = call i1 @llvm.is.fpclass.f64(double %x, i32 2)
ret i1 %1
}
define i1 @isposzero_double(double %x) nounwind {
-; CHECK-LABEL: isposzero_double:
-; CHECK: # %bb.0:
-; CHECK-NEXT: class.d $f0, $f12
-; CHECK-NEXT: mfc1 $1, $f0
-; CHECK-NEXT: andi $1, $1, 512
-; CHECK-NEXT: jr $ra
-; CHECK-NEXT: sltu $2, $zero, $1
+; R6-LABEL: isposzero_double:
+; R6: # %bb.0:
+; R6-NEXT: class.d $f0, $f12
+; R6-NEXT: mfc1 $1, $f0
+; R6-NEXT: andi $1, $1, 512
+; R6-NEXT: jr $ra
+; R6-NEXT: sltu $2, $zero, $1
+;
+; R2-LABEL: isposzero_double:
+; R2: # %bb.0:
+; R2-NEXT: addiu $sp, $sp, -8
+; R2-NEXT: sdc1 $f12, 0($sp)
+; R2-NEXT: lw $1, 4($sp)
+; R2-NEXT: lw $2, 0($sp)
+; R2-NEXT: or $1, $2, $1
+; R2-NEXT: sltiu $2, $1, 1
+; R2-NEXT: jr $ra
+; R2-NEXT: addiu $sp, $sp, 8
%1 = call i1 @llvm.is.fpclass.f64(double %x, i32 64)
ret i1 %1
}
define i1 @isnegzero_double(double %x) nounwind {
-; CHECK-LABEL: isnegzero_double:
-; CHECK: # %bb.0:
-; CHECK-NEXT: class.d $f0, $f12
-; CHECK-NEXT: mfc1 $1, $f0
-; CHECK-NEXT: andi $1, $1, 32
-; CHECK-NEXT: jr $ra
-; CHECK-NEXT: sltu $2, $zero, $1
+; R6-LABEL: isnegzero_double:
+; R6: # %bb.0:
+; R6-NEXT: class.d $f0, $f12
+; R6-NEXT: mfc1 $1, $f0
+; R6-NEXT: andi $1, $1, 32
+; R6-NEXT: jr $ra
+; R6-NEXT: sltu $2, $zero, $1
+;
+; R2-LABEL: isnegzero_double:
+; R2: # %bb.0:
+; R2-NEXT: addiu $sp, $sp, -8
+; R2-NEXT: sdc1 $f12, 0($sp)
+; R2-NEXT: lui $1, 32768
+; R2-NEXT: lw $2, 4($sp)
+; R2-NEXT: xor $1, $2, $1
+; R2-NEXT: lw $2, 0($sp)
+; R2-NEXT: or $1, $2, $1
+; R2-NEXT: sltiu $2, $1, 1
+; R2-NEXT: jr $ra
+; R2-NEXT: addiu $sp, $sp, 8
%1 = call i1 @llvm.is.fpclass.f64(double %x, i32 32)
ret i1 %1
}
define i1 @isposnormal_double(double %x) nounwind {
-; CHECK-LABEL: isposnormal_double:
-; CHECK: # %bb.0:
-; CHECK-NEXT: class.d $f0, $f12
-; CHECK-NEXT: mfc1 $1, $f0
-; CHECK-NEXT: andi $1, $1, 128
-; CHECK-NEXT: jr $ra
-; CHECK-NEXT: sltu $2, $zero, $1
+; R6-LABEL: isposnormal_double:
+; R6: # %bb.0:
+; R6-NEXT: class.d $f0, $f12
+; R6-NEXT: mfc1 $1, $f0
+; R6-NEXT: andi $1, $1, 128
+; R6-NEXT: jr $ra
+; R6-NEXT: sltu $2, $zero, $1
+;
+; R2-LABEL: isposnormal_double:
+; R2: # %bb.0:
+; R2-NEXT: addiu $sp, $sp, -8
+; R2-NEXT: sdc1 $f12, 0($sp)
+; R2-NEXT: addiu $1, $zero, -1
+; R2-NEXT: lw $2, 4($sp)
+; R2-NEXT: slt $1, $1, $2
+; R2-NEXT: lui $3, 32767
+; R2-NEXT: ori $3, $3, 65535
+; R2-NEXT: and $2, $2, $3
+; R2-NEXT: lui $3, 65520
+; R2-NEXT: addu $2, $2, $3
+; R2-NEXT: srl $2, $2, 21
+; R2-NEXT: sltiu $2, $2, 1023
+; R2-NEXT: and $2, $2, $1
+; R2-NEXT: jr $ra
+; R2-NEXT: addiu $sp, $sp, 8
%1 = call i1 @llvm.is.fpclass.f64(double %x, i32 256)
ret i1 %1
}
define i1 @isnegnormal_double(double %x) nounwind {
-; CHECK-LABEL: isnegnormal_double:
-; CHECK: # %bb.0:
-; CHECK-NEXT: class.d $f0, $f12
-; CHECK-NEXT: mfc1 $1, $f0
-; CHECK-NEXT: andi $1, $1, 8
-; CHECK-NEXT: jr $ra
-; CHECK-NEXT: sltu $2, $zero, $1
+; R6-LABEL: isnegnormal_double:
+; R6: # %bb.0:
+; R6-NEXT: class.d $f0, $f12
+; R6-NEXT: mfc1 $1, $f0
+; R6-NEXT: andi $1, $1, 8
+; R6-NEXT: jr $ra
+; R6-NEXT: sltu $2, $zero, $1
+;
+; R2-LABEL: isnegnormal_double:
+; R2: # %bb.0:
+; R2-NEXT: addiu $sp, $sp, -8
+; R2-NEXT: sdc1 $f12, 0($sp)
+; R2-NEXT: lui $1, 32767
+; R2-NEXT: ori $1, $1, 65535
+; R2-NEXT: lw $2, 4($sp)
+; R2-NEXT: and $1, $2, $1
+; R2-NEXT: lui $3, 65520
+; R2-NEXT: addu $1, $1, $3
+; R2-NEXT: srl $1, $1, 21
+; R2-NEXT: sltiu $1, $1, 1023
+; R2-NEXT: slti $2, $2, 0
+; R2-NEXT: and $2, $1, $2
+; R2-NEXT: jr $ra
+; R2-NEXT: addiu $sp, $sp, 8
%1 = call i1 @llvm.is.fpclass.f64(double %x, i32 8)
ret i1 %1
}
define i1 @isnormal_double(double %x) nounwind {
-; CHECK-LABEL: isnormal_double:
-; CHECK: # %bb.0:
-; CHECK-NEXT: class.d $f0, $f12
-; CHECK-NEXT: mfc1 $1, $f0
-; CHECK-NEXT: andi $1, $1, 136
-; CHECK-NEXT: jr $ra
-; CHECK-NEXT: sltu $2, $zero, $1
+; R6-LABEL: isnormal_double:
+; R6: # %bb.0:
+; R6-NEXT: class.d $f0, $f12
+; R6-NEXT: mfc1 $1, $f0
+; R6-NEXT: andi $1, $1, 136
+; R6-NEXT: jr $ra
+; R6-NEXT: sltu $2, $zero, $1
+;
+; R2-LABEL: isnormal_double:
+; R2: # %bb.0:
+; R2-NEXT: addiu $sp, $sp, -8
+; R2-NEXT: sdc1 $f12, 0($sp)
+; R2-NEXT: lui $1, 32767
+; R2-NEXT: ori $1, $1, 65535
+; R2-NEXT: lw $2, 4($sp)
+; R2-NEXT: and $1, $2, $1
+; R2-NEXT: lui $2, 65520
+; R2-NEXT: addu $1, $1, $2
+; R2-NEXT: srl $1, $1, 21
+; R2-NEXT: sltiu $2, $1, 1023
+; R2-NEXT: jr $ra
+; R2-NEXT: addiu $sp, $sp, 8
%1 = call i1 @llvm.is.fpclass.f64(double %x, i32 264)
ret i1 %1
}
define i1 @isclass_00d_double(double %x) nounwind {
-; CHECK-LABEL: isclass_00d_double:
-; CHECK: # %bb.0:
-; CHECK-NEXT: class.d $f0, $f12
-; CHECK-NEXT: mfc1 $1, $f0
-; CHECK-NEXT: andi $1, $1, 13
-; CHECK-NEXT: jr $ra
-; CHECK-NEXT: sltu $2, $zero, $1
+; R6-LABEL: isclass_00d_double:
+; R6: # %bb.0:
+; R6-NEXT: class.d $f0, $f12
+; R6-NEXT: mfc1 $1, $f0
+; R6-NEXT: andi $1, $1, 13
+; R6-NEXT: jr $ra
+; R6-NEXT: sltu $2, $zero, $1
+;
+; R2-LABEL: isclass_00d_double:
+; R2: # %bb.0:
+; R2-NEXT: addiu $sp, $sp, -8
+; R2-NEXT: sdc1 $f12, 0($sp)
+; R2-NEXT: lui $1, 32767
+; R2-NEXT: ori $1, $1, 65535
+; R2-NEXT: lw $2, 4($sp)
+; R2-NEXT: and $1, $2, $1
+; R2-NEXT: lui $3, 32752
+; R2-NEXT: slt $4, $3, $1
+; R2-NEXT: xor $3, $1, $3
+; R2-NEXT: lw $5, 0($sp)
+; R2-NEXT: sltu $6, $zero, $5
+; R2-NEXT: movz $4, $6, $3
+; R2-NEXT: lui $3, 32760
+; R2-NEXT: slt $3, $1, $3
+; R2-NEXT: and $3, $4, $3
+; R2-NEXT: lui $4, 65520
+; R2-NEXT: xor $6, $2, $4
+; R2-NEXT: or $5, $5, $6
+; R2-NEXT: sltiu $5, $5, 1
+; R2-NEXT: or $3, $5, $3
+; R2-NEXT: addu $1, $1, $4
+; R2-NEXT: srl $1, $1, 21
+; R2-NEXT: sltiu $1, $1, 1023
+; R2-NEXT: slti $2, $2, 0
+; R2-NEXT: and $1, $1, $2
+; R2-NEXT: or $2, $3, $1
+; R2-NEXT: jr $ra
+; R2-NEXT: addiu $sp, $sp, 8
%1 = call i1 @llvm.is.fpclass.f64(double %x, i32 13)
ret i1 %1
}
define i1 @isclass_1c0_float(float %x) nounwind {
-; CHECK-LABEL: isclass_1c0_float:
-; CHECK: # %bb.0:
-; CHECK-NEXT: class.s $f0, $f12
-; CHECK-NEXT: mfc1 $1, $f0
-; CHECK-NEXT: andi $1, $1, 896
-; CHECK-NEXT: jr $ra
-; CHECK-NEXT: sltu $2, $zero, $1
+; R6-LABEL: isclass_1c0_float:
+; R6: # %bb.0:
+; R6-NEXT: class.s $f0, $f12
+; R6-NEXT: mfc1 $1, $f0
+; R6-NEXT: andi $1, $1, 896
+; R6-NEXT: jr $ra
+; R6-NEXT: sltu $2, $zero, $1
+;
+; R2-LABEL: isclass_1c0_float:
+; R2: # %bb.0:
+; R2-NEXT: mfc1 $1, $f12
+; R2-NEXT: srl $1, $1, 23
+; R2-NEXT: jr $ra
+; R2-NEXT: sltiu $2, $1, 255
%1 = call i1 @llvm.is.fpclass.f32(float %x, i32 448)
ret i1 %1
}
+define i1 @isclass_positive_float(float %x) nounwind {
+; R6-LABEL: isclass_positive_float:
+; R6: # %bb.0:
+; R6-NEXT: class.s $f0, $f12
+; R6-NEXT: mfc1 $1, $f0
+; R6-NEXT: andi $1, $1, 960
+; R6-NEXT: jr $ra
+; R6-NEXT: sltu $2, $zero, $1
+;
+; R2-LABEL: isclass_positive_float:
+; R2: # %bb.0:
+; R2-NEXT: addiu $1, $zero, 1
+; R2-NEXT: c.un.s $f12, $f12
+; R2-NEXT: movf $1, $zero, $fcc0
+; R2-NEXT: mfc1 $2, $f12
+; R2-NEXT: addiu $3, $zero, -1
+; R2-NEXT: slt $2, $3, $2
+; R2-NEXT: jr $ra
+; R2-NEXT: or $2, $1, $2
+ %1 = call i1 @llvm.is.fpclass.f32(float %x, i32 960)
+ ret i1 %1
+}
+
+define i1 @isclass_positive_nnan_float(float nofpclass(nan) %x) nounwind {
+; R6-LABEL: isclass_positive_nnan_float:
+; R6: # %bb.0:
+; R6-NEXT: class.s $f0, $f12
+; R6-NEXT: mfc1 $1, $f0
+; R6-NEXT: andi $1, $1, 960
+; R6-NEXT: jr $ra
+; R6-NEXT: sltu $2, $zero, $1
+;
+; R2-LABEL: isclass_positive_nnan_float:
+; R2: # %bb.0:
+; R2-NEXT: mfc1 $1, $f12
+; R2-NEXT: addiu $2, $zero, -1
+; R2-NEXT: jr $ra
+; R2-NEXT: slt $2, $2, $1
+ %1 = call i1 @llvm.is.fpclass.f32(float %x, i32 960)
+ ret i1 %1
+}
+
+define i1 @isclass_positive_nnan_double(double nofpclass(nan) %x) nounwind {
+; R6-LABEL: isclass_positive_nnan_double:
+; R6: # %bb.0:
+; R6-NEXT: class.d $f0, $f12
+; R6-NEXT: mfc1 $1, $f0
+; R6-NEXT: andi $1, $1, 960
+; R6-NEXT: jr $ra
+; R6-NEXT: sltu $2, $zero, $1
+;
+; R2-LABEL: isclass_positive_nnan_double:
+; R2: # %bb.0:
+; R2-NEXT: cvt.s.d $f0, $f12
+; R2-NEXT: mfc1 $1, $f0
+; R2-NEXT: addiu $2, $zero, -1
+; R2-NEXT: jr $ra
+; R2-NEXT: slt $2, $2, $1
+ %1 = call i1 @llvm.is.fpclass.f64(double %x, i32 960)
+ ret i1 %1
+}
+
+define i1 @isclass_negative_float(float %x) nounwind {
+; R6-LABEL: isclass_negative_float:
+; R6: # %bb.0:
+; R6-NEXT: class.s $f0, $f12
+; R6-NEXT: mfc1 $1, $f0
+; R6-NEXT: andi $1, $1, 60
+; R6-NEXT: jr $ra
+; R6-NEXT: sltu $2, $zero, $1
+;
+; R2-LABEL: isclass_negative_float:
+; R2: # %bb.0:
+; R2-NEXT: addiu $1, $zero, 1
+; R2-NEXT: c.un.s $f12, $f12
+; R2-NEXT: movt $1, $zero, $fcc0
+; R2-NEXT: mfc1 $2, $f12
+; R2-NEXT: slti $2, $2, 0
+; R2-NEXT: jr $ra
+; R2-NEXT: and $2, $1, $2
+ %1 = call i1 @llvm.is.fpclass.f32(float %x, i32 60)
+ ret i1 %1
+}
+
+define i1 @isclass_negative_nnan_float(float nofpclass(nan) %x) nounwind {
+; R6-LABEL: isclass_negative_nnan_float:
+; R6: # %bb.0:
+; R6-NEXT: class.s $f0, $f12
+; R6-NEXT: mfc1 $1, $f0
+; R6-NEXT: andi $1, $1, 60
+; R6-NEXT: jr $ra
+; R6-NEXT: sltu $2, $zero, $1
+;
+; R2-LABEL: isclass_negative_nnan_float:
+; R2: # %bb.0:
+; R2-NEXT: mfc1 $1, $f12
+; R2-NEXT: jr $ra
+; R2-NEXT: srl $2, $1, 31
+ %1 = call i1 @llvm.is.fpclass.f32(float %x, i32 60)
+ ret i1 %1
+}
+
+define i1 @isclass_negative_nnan_double(double nofpclass(nan) %x) nounwind {
+; R6-LABEL: isclass_negative_nnan_double:
+; R6: # %bb.0:
+; R6-NEXT: class.d $f0, $f12
+; R6-NEXT: mfc1 $1, $f0
+; R6-NEXT: andi $1, $1, 60
+; R6-NEXT: jr $ra
+; R6-NEXT: sltu $2, $zero, $1
+;
+; R2-LABEL: isclass_negative_nnan_double:
+; R2: # %bb.0:
+; R2-NEXT: cvt.s.d $f0, $f12
+; R2-NEXT: mfc1 $1, $f0
+; R2-NEXT: jr $ra
+; R2-NEXT: srl $2, $1, 31
+ %1 = call i1 @llvm.is.fpclass.f64(double %x, i32 60)
+ ret i1 %1
+}
+
declare i1 @llvm.is.fpclass.f32(float, i32)
declare i1 @llvm.is.fpclass.f64(double, i32)
diff --git a/llvm/test/CodeGen/X86/is_fpclass.ll b/llvm/test/CodeGen/X86/is_fpclass.ll
index 70062b245f36d..c2d6b7ac9b2d7 100644
--- a/llvm/test/CodeGen/X86/is_fpclass.ll
+++ b/llvm/test/CodeGen/X86/is_fpclass.ll
@@ -2901,6 +2901,237 @@ define i1 @not_issubnormal_or_zero_or_qnan_f(float %x) {
ret i1 %class
}
+define <4 x i1> @isclass_positive_nnan(<4 x float> nofpclass(nan) %x) nounwind {
+; X86-LABEL: isclass_positive_nnan:
+; X86: # %bb.0:
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NEXT: fstps {{[0-9]+}}(%esp)
+; X86-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NEXT: fstps {{[0-9]+}}(%esp)
+; X86-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NEXT: fstps (%esp)
+; X86-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NEXT: fstps {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: setns %cl
+; X86-NEXT: shlb $2, %cl
+; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: setns %dl
+; X86-NEXT: shlb $3, %dl
+; X86-NEXT: orb %cl, %dl
+; X86-NEXT: cmpl $0, (%esp)
+; X86-NEXT: setns %cl
+; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: setns %ch
+; X86-NEXT: addb %ch, %ch
+; X86-NEXT: orb %cl, %ch
+; X86-NEXT: orb %dl, %ch
+; X86-NEXT: movb %ch, (%eax)
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: retl $4
+;
+; X64-LABEL: isclass_positive_nnan:
+; X64: # %bb.0:
+; X64-NEXT: pxor %xmm1, %xmm1
+; X64-NEXT: pcmpgtd %xmm0, %xmm1
+; X64-NEXT: pcmpeqd %xmm0, %xmm0
+; X64-NEXT: pxor %xmm1, %xmm0
+; X64-NEXT: retq
+ %1 = call <4 x i1> @llvm.is.fpclass.v4f32(<4 x float> %x, i32 960)
+ ret <4 x i1> %1
+}
+
+define <4 x i1> @isclass_positive(<4 x float> %x) nounwind {
+; X86-LABEL: isclass_positive:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebx
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NEXT: fsts {{[0-9]+}}(%esp)
+; X86-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NEXT: fsts {{[0-9]+}}(%esp)
+; X86-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NEXT: fsts (%esp)
+; X86-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NEXT: fsts {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: sets %dl
+; X86-NEXT: fxch %st(3)
+; X86-NEXT: fucomp %st(0)
+; X86-NEXT: fnstsw %ax
+; X86-NEXT: # kill: def $ah killed $ah killed $ax
+; X86-NEXT: sahf
+; X86-NEXT: setnp %dh
+; X86-NEXT: andb %dl, %dh
+; X86-NEXT: xorb $1, %dh
+; X86-NEXT: shlb $2, %dh
+; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: sets %bl
+; X86-NEXT: fxch %st(1)
+; X86-NEXT: fucomp %st(0)
+; X86-NEXT: fnstsw %ax
+; X86-NEXT: # kill: def $ah killed $ah killed $ax
+; X86-NEXT: sahf
+; X86-NEXT: setnp %dl
+; X86-NEXT: andb %bl, %dl
+; X86-NEXT: notb %dl
+; X86-NEXT: shlb $3, %dl
+; X86-NEXT: orb %dh, %dl
+; X86-NEXT: cmpl $0, (%esp)
+; X86-NEXT: sets %bl
+; X86-NEXT: fucomp %st(0)
+; X86-NEXT: fnstsw %ax
+; X86-NEXT: # kill: def $ah killed $ah killed $ax
+; X86-NEXT: sahf
+; X86-NEXT: setnp %dh
+; X86-NEXT: andb %bl, %dh
+; X86-NEXT: xorb $1, %dh
+; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: sets %bl
+; X86-NEXT: fucomp %st(0)
+; X86-NEXT: fnstsw %ax
+; X86-NEXT: # kill: def $ah killed $ah killed $ax
+; X86-NEXT: sahf
+; X86-NEXT: setnp %al
+; X86-NEXT: andb %bl, %al
+; X86-NEXT: xorb $1, %al
+; X86-NEXT: addb %al, %al
+; X86-NEXT: orb %dh, %al
+; X86-NEXT: orb %dl, %al
+; X86-NEXT: andb $15, %al
+; X86-NEXT: movb %al, (%ecx)
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: popl %ebx
+; X86-NEXT: retl $4
+;
+; X64-LABEL: isclass_positive:
+; X64: # %bb.0:
+; X64-NEXT: pxor %xmm1, %xmm1
+; X64-NEXT: pcmpgtd %xmm0, %xmm1
+; X64-NEXT: cmpordps %xmm0, %xmm0
+; X64-NEXT: andps %xmm1, %xmm0
+; X64-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-NEXT: retq
+ %1 = call <4 x i1> @llvm.is.fpclass.v4f32(<4 x float> %x, i32 960)
+ ret <4 x i1> %1
+}
+
+define <4 x i1> @isclass_negative_nnan(<4 x float> nofpclass(nan) %x) nounwind {
+; X86-LABEL: isclass_negative_nnan:
+; X86: # %bb.0:
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NEXT: fstps {{[0-9]+}}(%esp)
+; X86-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NEXT: fstps {{[0-9]+}}(%esp)
+; X86-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NEXT: fstps (%esp)
+; X86-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NEXT: fstps {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: sets %cl
+; X86-NEXT: shlb $2, %cl
+; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: sets %dl
+; X86-NEXT: shlb $3, %dl
+; X86-NEXT: orb %cl, %dl
+; X86-NEXT: cmpl $0, (%esp)
+; X86-NEXT: sets %cl
+; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: sets %ch
+; X86-NEXT: addb %ch, %ch
+; X86-NEXT: orb %cl, %ch
+; X86-NEXT: orb %dl, %ch
+; X86-NEXT: movb %ch, (%eax)
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: retl $4
+;
+; X64-LABEL: isclass_negative_nnan:
+; X64: # %bb.0:
+; X64-NEXT: pxor %xmm1, %xmm1
+; X64-NEXT: pcmpgtd %xmm0, %xmm1
+; X64-NEXT: movdqa %xmm1, %xmm0
+; X64-NEXT: retq
+ %1 = call <4 x i1> @llvm.is.fpclass.v4f32(<4 x float> %x, i32 60)
+ ret <4 x i1> %1
+}
+
+define <4 x i1> @isclass_negative(<4 x float> %x) nounwind {
+; X86-LABEL: isclass_negative:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebx
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NEXT: fsts {{[0-9]+}}(%esp)
+; X86-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NEXT: fsts {{[0-9]+}}(%esp)
+; X86-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NEXT: fsts (%esp)
+; X86-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NEXT: fsts {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: sets %dl
+; X86-NEXT: fxch %st(3)
+; X86-NEXT: fucomp %st(0)
+; X86-NEXT: fnstsw %ax
+; X86-NEXT: # kill: def $ah killed $ah killed $ax
+; X86-NEXT: sahf
+; X86-NEXT: setnp %dh
+; X86-NEXT: andb %dl, %dh
+; X86-NEXT: shlb $2, %dh
+; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: sets %bl
+; X86-NEXT: fxch %st(1)
+; X86-NEXT: fucomp %st(0)
+; X86-NEXT: fnstsw %ax
+; X86-NEXT: # kill: def $ah killed $ah killed $ax
+; X86-NEXT: sahf
+; X86-NEXT: setnp %dl
+; X86-NEXT: andb %bl, %dl
+; X86-NEXT: shlb $3, %dl
+; X86-NEXT: orb %dh, %dl
+; X86-NEXT: cmpl $0, (%esp)
+; X86-NEXT: sets %bl
+; X86-NEXT: fucomp %st(0)
+; X86-NEXT: fnstsw %ax
+; X86-NEXT: # kill: def $ah killed $ah killed $ax
+; X86-NEXT: sahf
+; X86-NEXT: setnp %dh
+; X86-NEXT: andb %bl, %dh
+; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: sets %bl
+; X86-NEXT: fucomp %st(0)
+; X86-NEXT: fnstsw %ax
+; X86-NEXT: # kill: def $ah killed $ah killed $ax
+; X86-NEXT: sahf
+; X86-NEXT: setnp %al
+; X86-NEXT: andb %bl, %al
+; X86-NEXT: addb %al, %al
+; X86-NEXT: orb %dh, %al
+; X86-NEXT: orb %dl, %al
+; X86-NEXT: movb %al, (%ecx)
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: popl %ebx
+; X86-NEXT: retl $4
+;
+; X64-LABEL: isclass_negative:
+; X64: # %bb.0:
+; X64-NEXT: pxor %xmm1, %xmm1
+; X64-NEXT: pcmpgtd %xmm0, %xmm1
+; X64-NEXT: cmpordps %xmm0, %xmm0
+; X64-NEXT: andps %xmm1, %xmm0
+; X64-NEXT: retq
+ %1 = call <4 x i1> @llvm.is.fpclass.v4f32(<4 x float> %x, i32 60)
+ ret <4 x i1> %1
+}
+
declare i1 @llvm.is.fpclass.f32(float, i32)
declare i1 @llvm.is.fpclass.f64(double, i32)
declare <1 x i1> @llvm.is.fpclass.v1f32(<1 x float>, i32)
>From cdcbb4331d83582a82d2f1de78009a932c890ea8 Mon Sep 17 00:00:00 2001
From: YunQiang Su <syq at debian.org>
Date: Fri, 6 Mar 2026 12:46:22 +0800
Subject: [PATCH 2/4] some fixes
---
.../CodeGen/SelectionDAG/TargetLowering.cpp | 12 +-
.../CodeGen/AMDGPU/llvm.is.fpclass.bf16.ll | 50 ++--
.../CodeGen/AMDGPU/llvm.is.fpclass.f16.ll | 13 +-
llvm/test/CodeGen/Mips/is_fpclass.ll | 13 +-
llvm/test/CodeGen/X86/is_fpclass.ll | 213 +++++++++++-------
5 files changed, 163 insertions(+), 138 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 89d4ae01ba3ea..71275d61e7fc1 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -9322,7 +9322,7 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
if (NeedFPCompare) {
// Truncate to the largest legal float type.
- EVT TruncFloatEltVT = (MaxLegalIntBits == 64) ? MVT::f64 : MVT::f32;
+ EVT TruncFloatEltVT = EVT::getFloatingPointVT(MaxLegalIntBits);
EVT TruncFloatVT = TruncFloatEltVT;
if (ResultVT.isVector() && TruncFloatEltVT != MVT::Other) {
@@ -9333,8 +9333,7 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
isOperationLegalOrCustom(ISD::FP_ROUND, TruncFloatVT)) {
// Round to smaller float type, then bitcast to integer for sign check.
// Use TargetConstant for the truncation flag.
- EVT PointerVT =
- DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
+ EVT PointerVT = getPointerTy(DAG.getDataLayout());
SDValue OpTrunc = DAG.getNode(ISD::FP_ROUND, DL, TruncFloatVT, Op,
DAG.getTargetConstant(0, DL, PointerVT));
EVT TruncIntVT = TruncFloatVT.changeTypeToInteger();
@@ -9356,7 +9355,12 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
DAG.getNode(ISD::AND, DL, ResultVT, NotNaN, SignBitResult);
}
- if (Test == fcNegative)
+ bool IsICmpImmLegal =
+ isLegalICmpImmediate(APInt::getAllOnes(IntVTBits).getZExtValue());
+ if (!NeedFPCompare && (!DAG.isKnownNeverNaN(Op) || IsICmpImmLegal) &&
+ Test == fcPositive) {
+ ; // (fcPosInf | fcFinite) has better performance.
+ } else if (Test == fcNegative)
return SignBitResult;
else
return DAG.getNode(ISD::XOR, DL, ResultVT, SignBitResult,
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.bf16.ll b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.bf16.ll
index d80fb6d8ea108..80ec6fa104b31 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.bf16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.bf16.ll
@@ -2123,80 +2123,62 @@ define i1 @isnegative_bf16(bfloat %x) {
; GFX7CHECK-LABEL: isnegative_bf16:
; GFX7CHECK: ; %bb.0:
; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7CHECK-NEXT: v_and_b32_e32 v1, 0xffff, v0
-; GFX7CHECK-NEXT: v_lshlrev_b32_e32 v2, 16, v0
-; GFX7CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
-; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7f80
-; GFX7CHECK-NEXT: v_cmp_gt_i32_e32 vcc, 0, v2
-; GFX7CHECK-NEXT: v_cmp_gt_i32_e64 s[4:5], s4, v0
-; GFX7CHECK-NEXT: s_mov_b32 s6, 0xff80
+; GFX7CHECK-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX7CHECK-NEXT: v_cmp_gt_i32_e32 vcc, 0, v0
+; GFX7CHECK-NEXT: v_cmp_o_f32_e64 s[4:5], v0, v0
; GFX7CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc
-; GFX7CHECK-NEXT: v_cmp_eq_u32_e32 vcc, s6, v1
-; GFX7CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc
; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
; GFX7CHECK-NEXT: s_setpc_b64 s[30:31]
;
; GFX8CHECK-LABEL: isnegative_bf16:
; GFX8CHECK: ; %bb.0:
; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8CHECK-NEXT: v_and_b32_e32 v1, 0x7fff, v0
-; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7f80
; GFX8CHECK-NEXT: v_cmp_gt_i16_e32 vcc, 0, v0
-; GFX8CHECK-NEXT: v_cmp_gt_i16_e64 s[4:5], s4, v1
-; GFX8CHECK-NEXT: s_movk_i32 s6, 0xff80
+; GFX8CHECK-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX8CHECK-NEXT: v_cmp_o_f32_e64 s[4:5], v0, v0
; GFX8CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc
-; GFX8CHECK-NEXT: v_cmp_eq_u16_e32 vcc, s6, v0
-; GFX8CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc
; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
; GFX8CHECK-NEXT: s_setpc_b64 s[30:31]
;
; GFX9CHECK-LABEL: isnegative_bf16:
; GFX9CHECK: ; %bb.0:
; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9CHECK-NEXT: v_and_b32_e32 v1, 0x7fff, v0
-; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7f80
; GFX9CHECK-NEXT: v_cmp_gt_i16_e32 vcc, 0, v0
-; GFX9CHECK-NEXT: v_cmp_gt_i16_e64 s[4:5], s4, v1
-; GFX9CHECK-NEXT: s_movk_i32 s6, 0xff80
+; GFX9CHECK-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX9CHECK-NEXT: v_cmp_o_f32_e64 s[4:5], v0, v0
; GFX9CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc
-; GFX9CHECK-NEXT: v_cmp_eq_u16_e32 vcc, s6, v0
-; GFX9CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc
; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
; GFX9CHECK-NEXT: s_setpc_b64 s[30:31]
;
; GFX10CHECK-LABEL: isnegative_bf16:
; GFX10CHECK: ; %bb.0:
; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10CHECK-NEXT: v_and_b32_e32 v1, 0x7fff, v0
+; GFX10CHECK-NEXT: v_lshlrev_b32_e32 v1, 16, v0
; GFX10CHECK-NEXT: v_cmp_gt_i16_e32 vcc_lo, 0, v0
-; GFX10CHECK-NEXT: v_cmp_eq_u16_e64 s5, 0xff80, v0
-; GFX10CHECK-NEXT: v_cmp_gt_i16_e64 s4, 0x7f80, v1
+; GFX10CHECK-NEXT: v_cmp_o_f32_e64 s4, v1, v1
; GFX10CHECK-NEXT: s_and_b32 s4, s4, vcc_lo
-; GFX10CHECK-NEXT: s_or_b32 s4, s4, s5
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
;
; GFX11SELDAG-TRUE16-LABEL: isnegative_bf16:
; GFX11SELDAG-TRUE16: ; %bb.0:
; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11SELDAG-TRUE16-NEXT: v_and_b16 v0.h, 0x7fff, v0.l
-; GFX11SELDAG-TRUE16-NEXT: v_cmp_gt_i16_e32 vcc_lo, 0, v0.l
-; GFX11SELDAG-TRUE16-NEXT: v_cmp_eq_u16_e64 s1, 0xff80, v0.l
-; GFX11SELDAG-TRUE16-NEXT: v_cmp_gt_i16_e64 s0, 0x7f80, v0.h
+; GFX11SELDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, 0
+; GFX11SELDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l
+; GFX11SELDAG-TRUE16-NEXT: v_mov_b16_e32 v2.h, v0.l
+; GFX11SELDAG-TRUE16-NEXT: v_cmp_gt_i16_e32 vcc_lo, 0, v1.l
+; GFX11SELDAG-TRUE16-NEXT: v_cmp_o_f32_e64 s0, v2, v2
; GFX11SELDAG-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
-; GFX11SELDAG-TRUE16-NEXT: s_or_b32 s0, s0, s1
; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11SELDAG-FAKE16-LABEL: isnegative_bf16:
; GFX11SELDAG-FAKE16: ; %bb.0:
; GFX11SELDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11SELDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0x7fff, v0
+; GFX11SELDAG-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v0
; GFX11SELDAG-FAKE16-NEXT: v_cmp_gt_i16_e32 vcc_lo, 0, v0
-; GFX11SELDAG-FAKE16-NEXT: v_cmp_eq_u16_e64 s1, 0xff80, v0
-; GFX11SELDAG-FAKE16-NEXT: v_cmp_gt_i16_e64 s0, 0x7f80, v1
+; GFX11SELDAG-FAKE16-NEXT: v_cmp_o_f32_e64 s0, v1, v1
; GFX11SELDAG-FAKE16-NEXT: s_and_b32 s0, s0, vcc_lo
-; GFX11SELDAG-FAKE16-NEXT: s_or_b32 s0, s0, s1
; GFX11SELDAG-FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
; GFX11SELDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
%class = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 60) ; fcNegative
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
index 76958e63d36c9..1b3527a0b5d49 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
@@ -3053,16 +3053,11 @@ define i1 @isnegative_f16(half %x) {
; GFX7SELDAG-LABEL: isnegative_f16:
; GFX7SELDAG: ; %bb.0:
; GFX7SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7SELDAG-NEXT: v_and_b32_e32 v1, 0xffff, v0
-; GFX7SELDAG-NEXT: v_lshlrev_b32_e32 v2, 16, v0
-; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0x7fff, v0
-; GFX7SELDAG-NEXT: s_movk_i32 s4, 0x7c00
-; GFX7SELDAG-NEXT: v_cmp_gt_i32_e32 vcc, 0, v2
-; GFX7SELDAG-NEXT: v_cmp_gt_i32_e64 s[4:5], s4, v0
-; GFX7SELDAG-NEXT: s_mov_b32 s6, 0xfc00
+; GFX7SELDAG-NEXT: v_cvt_f32_f16_e32 v1, v0
+; GFX7SELDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX7SELDAG-NEXT: v_cmp_gt_i32_e32 vcc, 0, v0
+; GFX7SELDAG-NEXT: v_cmp_o_f32_e64 s[4:5], v1, v1
; GFX7SELDAG-NEXT: s_and_b64 s[4:5], s[4:5], vcc
-; GFX7SELDAG-NEXT: v_cmp_eq_u32_e32 vcc, s6, v1
-; GFX7SELDAG-NEXT: s_or_b64 s[4:5], s[4:5], vcc
; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
; GFX7SELDAG-NEXT: s_setpc_b64 s[30:31]
;
diff --git a/llvm/test/CodeGen/Mips/is_fpclass.ll b/llvm/test/CodeGen/Mips/is_fpclass.ll
index 44b706e5dcbac..66c3e3bee09a2 100644
--- a/llvm/test/CodeGen/Mips/is_fpclass.ll
+++ b/llvm/test/CodeGen/Mips/is_fpclass.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=mipsisa32r6-unknown-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=R6
-; RUN: llc -mtriple=mipsel-unknown-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=R2
+; RUN: llc -mtriple=mipsisa32r6-unknown-linux-gnu -o - %s | FileCheck %s --check-prefix=R6
+; RUN: llc -mtriple=mipsel-unknown-linux-gnu -o - %s | FileCheck %s --check-prefix=R2
define i1 @isnan_float(float %x) nounwind {
@@ -518,14 +518,11 @@ define i1 @isclass_positive_float(float %x) nounwind {
;
; R2-LABEL: isclass_positive_float:
; R2: # %bb.0:
-; R2-NEXT: addiu $1, $zero, 1
-; R2-NEXT: c.un.s $f12, $f12
-; R2-NEXT: movf $1, $zero, $fcc0
+; R2-NEXT: lui $1, 32640
+; R2-NEXT: ori $1, $1, 1
; R2-NEXT: mfc1 $2, $f12
-; R2-NEXT: addiu $3, $zero, -1
-; R2-NEXT: slt $2, $3, $2
; R2-NEXT: jr $ra
-; R2-NEXT: or $2, $1, $2
+; R2-NEXT: sltu $2, $2, $1
%1 = call i1 @llvm.is.fpclass.f32(float %x, i32 960)
ret i1 %1
}
diff --git a/llvm/test/CodeGen/X86/is_fpclass.ll b/llvm/test/CodeGen/X86/is_fpclass.ll
index c2d6b7ac9b2d7..c933bba790954 100644
--- a/llvm/test/CodeGen/X86/is_fpclass.ll
+++ b/llvm/test/CodeGen/X86/is_fpclass.ll
@@ -2901,9 +2901,101 @@ define i1 @not_issubnormal_or_zero_or_qnan_f(float %x) {
ret i1 %class
}
-define <4 x i1> @isclass_positive_nnan(<4 x float> nofpclass(nan) %x) nounwind {
+define i1 @isclass_positive_nnan(float nofpclass(nan) %x) nounwind {
; X86-LABEL: isclass_positive_nnan:
; X86: # %bb.0:
+; X86-NEXT: pushl %eax
+; X86-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NEXT: fstps (%esp)
+; X86-NEXT: cmpl $0, (%esp)
+; X86-NEXT: setns %al
+; X86-NEXT: popl %ecx
+; X86-NEXT: retl
+;
+; X64-LABEL: isclass_positive_nnan:
+; X64: # %bb.0:
+; X64-NEXT: movmskps %xmm0, %eax
+; X64-NEXT: testb $1, %al
+; X64-NEXT: sete %al
+; X64-NEXT: retq
+ %ispos = call i1 @llvm.is.fpclass.f32(float %x, i32 960)
+ ret i1 %ispos
+}
+
+define i1 @isclass_positive(float %x) nounwind {
+; X86-LABEL: isclass_positive:
+; X86: # %bb.0:
+; X86-NEXT: cmpl $2139095041, {{[0-9]+}}(%esp) # imm = 0x7F800001
+; X86-NEXT: setb %al
+; X86-NEXT: retl
+;
+; X64-LABEL: isclass_positive:
+; X64: # %bb.0:
+; X64-NEXT: movd %xmm0, %eax
+; X64-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001
+; X64-NEXT: setb %al
+; X64-NEXT: retq
+ %ispos = call i1 @llvm.is.fpclass.f32(float %x, i32 960)
+ ret i1 %ispos
+}
+
+define i1 @isclass_negative_nnan(float nofpclass(nan) %x) nounwind {
+; X86-LABEL: isclass_negative_nnan:
+; X86: # %bb.0:
+; X86-NEXT: pushl %eax
+; X86-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NEXT: fstps (%esp)
+; X86-NEXT: movl (%esp), %eax
+; X86-NEXT: shrl $31, %eax
+; X86-NEXT: # kill: def $al killed $al killed $eax
+; X86-NEXT: popl %ecx
+; X86-NEXT: retl
+;
+; X64-LABEL: isclass_negative_nnan:
+; X64: # %bb.0:
+; X64-NEXT: movmskps %xmm0, %eax
+; X64-NEXT: andl $1, %eax
+; X64-NEXT: # kill: def $al killed $al killed $eax
+; X64-NEXT: retq
+ %isneg = call i1 @llvm.is.fpclass.f32(float %x, i32 60)
+ ret i1 %isneg
+}
+
+define i1 @isclass_negative(float %x) nounwind {
+; X86-LABEL: isclass_negative:
+; X86: # %bb.0:
+; X86-NEXT: pushl %eax
+; X86-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NEXT: fsts (%esp)
+; X86-NEXT: cmpl $0, (%esp)
+; X86-NEXT: sets %cl
+; X86-NEXT: fucomp %st(0)
+; X86-NEXT: fnstsw %ax
+; X86-NEXT: # kill: def $ah killed $ah killed $ax
+; X86-NEXT: sahf
+; X86-NEXT: setnp %al
+; X86-NEXT: andb %cl, %al
+; X86-NEXT: popl %ecx
+; X86-NEXT: retl
+;
+; X64-LABEL: isclass_negative:
+; X64: # %bb.0:
+; X64-NEXT: movmskps %xmm0, %eax
+; X64-NEXT: ucomiss %xmm0, %xmm0
+; X64-NEXT: setnp %cl
+; X64-NEXT: testb $1, %al
+; X64-NEXT: setne %al
+; X64-NEXT: andb %cl, %al
+; X64-NEXT: retq
+ %isneg = call i1 @llvm.is.fpclass.f32(float %x, i32 60)
+ ret i1 %isneg
+}
+
+
+
+define <4 x i1> @isclass_positive_nnan_v4f32(<4 x float> nofpclass(nan) %x) nounwind {
+; X86-LABEL: isclass_positive_nnan_v4f32:
+; X86: # %bb.0:
; X86-NEXT: subl $16, %esp
; X86-NEXT: flds {{[0-9]+}}(%esp)
; X86-NEXT: fstps {{[0-9]+}}(%esp)
@@ -2932,96 +3024,51 @@ define <4 x i1> @isclass_positive_nnan(<4 x float> nofpclass(nan) %x) nounwind {
; X86-NEXT: addl $16, %esp
; X86-NEXT: retl $4
;
-; X64-LABEL: isclass_positive_nnan:
+; X64-LABEL: isclass_positive_nnan_v4f32:
; X64: # %bb.0:
; X64-NEXT: pxor %xmm1, %xmm1
; X64-NEXT: pcmpgtd %xmm0, %xmm1
; X64-NEXT: pcmpeqd %xmm0, %xmm0
; X64-NEXT: pxor %xmm1, %xmm0
; X64-NEXT: retq
- %1 = call <4 x i1> @llvm.is.fpclass.v4f32(<4 x float> %x, i32 960)
- ret <4 x i1> %1
+ %ispos = call <4 x i1> @llvm.is.fpclass.v4f32(<4 x float> %x, i32 960)
+ ret <4 x i1> %ispos
}
-define <4 x i1> @isclass_positive(<4 x float> %x) nounwind {
-; X86-LABEL: isclass_positive:
+define <4 x i1> @isclass_positive_v4f32(<4 x float> %x) nounwind {
+; X86-LABEL: isclass_positive_v4f32:
; X86: # %bb.0:
-; X86-NEXT: pushl %ebx
-; X86-NEXT: subl $16, %esp
-; X86-NEXT: flds {{[0-9]+}}(%esp)
-; X86-NEXT: fsts {{[0-9]+}}(%esp)
-; X86-NEXT: flds {{[0-9]+}}(%esp)
-; X86-NEXT: fsts {{[0-9]+}}(%esp)
-; X86-NEXT: flds {{[0-9]+}}(%esp)
-; X86-NEXT: fsts (%esp)
-; X86-NEXT: flds {{[0-9]+}}(%esp)
-; X86-NEXT: fsts {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: sets %dl
-; X86-NEXT: fxch %st(3)
-; X86-NEXT: fucomp %st(0)
-; X86-NEXT: fnstsw %ax
-; X86-NEXT: # kill: def $ah killed $ah killed $ax
-; X86-NEXT: sahf
-; X86-NEXT: setnp %dh
-; X86-NEXT: andb %dl, %dh
-; X86-NEXT: xorb $1, %dh
-; X86-NEXT: shlb $2, %dh
-; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: sets %bl
-; X86-NEXT: fxch %st(1)
-; X86-NEXT: fucomp %st(0)
-; X86-NEXT: fnstsw %ax
-; X86-NEXT: # kill: def $ah killed $ah killed $ax
-; X86-NEXT: sahf
-; X86-NEXT: setnp %dl
-; X86-NEXT: andb %bl, %dl
-; X86-NEXT: notb %dl
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: cmpl $2139095041, {{[0-9]+}}(%esp) # imm = 0x7F800001
+; X86-NEXT: setb %cl
+; X86-NEXT: shlb $2, %cl
+; X86-NEXT: cmpl $2139095041, {{[0-9]+}}(%esp) # imm = 0x7F800001
+; X86-NEXT: setb %dl
; X86-NEXT: shlb $3, %dl
-; X86-NEXT: orb %dh, %dl
-; X86-NEXT: cmpl $0, (%esp)
-; X86-NEXT: sets %bl
-; X86-NEXT: fucomp %st(0)
-; X86-NEXT: fnstsw %ax
-; X86-NEXT: # kill: def $ah killed $ah killed $ax
-; X86-NEXT: sahf
-; X86-NEXT: setnp %dh
-; X86-NEXT: andb %bl, %dh
-; X86-NEXT: xorb $1, %dh
-; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: sets %bl
-; X86-NEXT: fucomp %st(0)
-; X86-NEXT: fnstsw %ax
-; X86-NEXT: # kill: def $ah killed $ah killed $ax
-; X86-NEXT: sahf
-; X86-NEXT: setnp %al
-; X86-NEXT: andb %bl, %al
-; X86-NEXT: xorb $1, %al
-; X86-NEXT: addb %al, %al
-; X86-NEXT: orb %dh, %al
-; X86-NEXT: orb %dl, %al
-; X86-NEXT: andb $15, %al
-; X86-NEXT: movb %al, (%ecx)
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: addl $16, %esp
-; X86-NEXT: popl %ebx
+; X86-NEXT: orb %cl, %dl
+; X86-NEXT: cmpl $2139095041, {{[0-9]+}}(%esp) # imm = 0x7F800001
+; X86-NEXT: setb %cl
+; X86-NEXT: cmpl $2139095041, {{[0-9]+}}(%esp) # imm = 0x7F800001
+; X86-NEXT: setb %ch
+; X86-NEXT: addb %ch, %ch
+; X86-NEXT: orb %cl, %ch
+; X86-NEXT: orb %dl, %ch
+; X86-NEXT: movb %ch, (%eax)
; X86-NEXT: retl $4
;
-; X64-LABEL: isclass_positive:
+; X64-LABEL: isclass_positive_v4f32:
; X64: # %bb.0:
-; X64-NEXT: pxor %xmm1, %xmm1
-; X64-NEXT: pcmpgtd %xmm0, %xmm1
-; X64-NEXT: cmpordps %xmm0, %xmm0
-; X64-NEXT: andps %xmm1, %xmm0
-; X64-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-NEXT: pcmpeqd %xmm1, %xmm1
+; X64-NEXT: pxor %xmm1, %xmm0
; X64-NEXT: retq
- %1 = call <4 x i1> @llvm.is.fpclass.v4f32(<4 x float> %x, i32 960)
- ret <4 x i1> %1
+ %ispos = call <4 x i1> @llvm.is.fpclass.v4f32(<4 x float> %x, i32 960)
+ ret <4 x i1> %ispos
}
-define <4 x i1> @isclass_negative_nnan(<4 x float> nofpclass(nan) %x) nounwind {
-; X86-LABEL: isclass_negative_nnan:
+define <4 x i1> @isclass_negative_nnan_v4f32(<4 x float> nofpclass(nan) %x) nounwind {
+; X86-LABEL: isclass_negative_nnan_v4f32:
; X86: # %bb.0:
; X86-NEXT: subl $16, %esp
; X86-NEXT: flds {{[0-9]+}}(%esp)
@@ -3051,18 +3098,18 @@ define <4 x i1> @isclass_negative_nnan(<4 x float> nofpclass(nan) %x) nounwind {
; X86-NEXT: addl $16, %esp
; X86-NEXT: retl $4
;
-; X64-LABEL: isclass_negative_nnan:
+; X64-LABEL: isclass_negative_nnan_v4f32:
; X64: # %bb.0:
; X64-NEXT: pxor %xmm1, %xmm1
; X64-NEXT: pcmpgtd %xmm0, %xmm1
; X64-NEXT: movdqa %xmm1, %xmm0
; X64-NEXT: retq
- %1 = call <4 x i1> @llvm.is.fpclass.v4f32(<4 x float> %x, i32 60)
- ret <4 x i1> %1
+ %isneg = call <4 x i1> @llvm.is.fpclass.v4f32(<4 x float> %x, i32 60)
+ ret <4 x i1> %isneg
}
-define <4 x i1> @isclass_negative(<4 x float> %x) nounwind {
-; X86-LABEL: isclass_negative:
+define <4 x i1> @isclass_negative_v4f32(<4 x float> %x) nounwind {
+; X86-LABEL: isclass_negative_v4f32:
; X86: # %bb.0:
; X86-NEXT: pushl %ebx
; X86-NEXT: subl $16, %esp
@@ -3121,15 +3168,15 @@ define <4 x i1> @isclass_negative(<4 x float> %x) nounwind {
; X86-NEXT: popl %ebx
; X86-NEXT: retl $4
;
-; X64-LABEL: isclass_negative:
+; X64-LABEL: isclass_negative_v4f32:
; X64: # %bb.0:
; X64-NEXT: pxor %xmm1, %xmm1
; X64-NEXT: pcmpgtd %xmm0, %xmm1
; X64-NEXT: cmpordps %xmm0, %xmm0
; X64-NEXT: andps %xmm1, %xmm0
; X64-NEXT: retq
- %1 = call <4 x i1> @llvm.is.fpclass.v4f32(<4 x float> %x, i32 60)
- ret <4 x i1> %1
+ %isneg = call <4 x i1> @llvm.is.fpclass.v4f32(<4 x float> %x, i32 60)
+ ret <4 x i1> %isneg
}
declare i1 @llvm.is.fpclass.f32(float, i32)
>From 6359b8b2eecd1c159cc481d97a9a14a63944e39e Mon Sep 17 00:00:00 2001
From: YunQiang Su <syq at debian.org>
Date: Mon, 9 Mar 2026 20:37:00 +0800
Subject: [PATCH 3/4] not hardcoded
---
.../CodeGen/SelectionDAG/TargetLowering.cpp | 84 +++++---
.../CodeGen/AMDGPU/llvm.is.fpclass.bf16.ll | 68 ++----
.../CodeGen/AMDGPU/llvm.is.fpclass.f16.ll | 15 +-
llvm/test/CodeGen/X86/is_fpclass.ll | 200 +++++++++++++++++-
4 files changed, 273 insertions(+), 94 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 71275d61e7fc1..a22dc360b46fc 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -9311,32 +9311,50 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
// On 32-bit platforms with 64-bit floats, we need to be careful about
// integer comparisons. We use FP_ROUND to convert to a smaller float type
// that matches ResultVT's size, then compare with 0.
- if (Test == fcPositive || Test == fcNegative) {
+ FPClassTest FPTestSign = Test & (~fcNan);
+ FPClassTest FPTestNaN = Test & fcNan;
+ if ((FPTestSign == fcPositive || FPTestSign == fcNegative) &&
+ (FPTestNaN == fcNan || FPTestNaN == fcNone)) {
SDValue SignBitResult;
- unsigned MaxLegalIntBits = 32;
- if (isTypeLegal(MVT::i64))
- MaxLegalIntBits = 64;
-
- unsigned IntVTBits = IntVT.getScalarSizeInBits();
- bool NeedFPCompare = IntVTBits > MaxLegalIntBits;
-
- if (NeedFPCompare) {
- // Truncate to the largest legal float type.
- EVT TruncFloatEltVT = EVT::getFloatingPointVT(MaxLegalIntBits);
-
- EVT TruncFloatVT = TruncFloatEltVT;
- if (ResultVT.isVector() && TruncFloatEltVT != MVT::Other) {
- TruncFloatVT = EVT::getVectorVT(*DAG.getContext(), TruncFloatEltVT,
- ResultVT.getVectorElementCount());
+ bool testNegative = FPTestSign == fcNegative;
+ bool testNaN = FPTestNaN == fcNan;
+
+ bool NeedFPTrunc = false;
+ EVT TruncFloatVT = OperandVT;
+ EVT TruncIntVT = IntVT;
+ do {
+ unsigned TruncFloatEltBitsize = TruncFloatVT.getScalarSizeInBits();
+ unsigned TruncIntEltBitsize = TruncIntVT.getScalarSizeInBits();
+ if (isTypeLegal(TruncIntVT) && isTypeLegal(TruncFloatVT) &&
+ TruncFloatEltBitsize == TruncIntEltBitsize) {
+ if (TruncFloatVT != OperandVT)
+ NeedFPTrunc = true;
+ break;
}
- if (TruncFloatVT != MVT::Other &&
- isOperationLegalOrCustom(ISD::FP_ROUND, TruncFloatVT)) {
+ EVT TruncFloatEltVT = TruncFloatVT.getScalarType();
+ EVT TruncIntEltVT0 =
+ EVT::getIntegerVT(*DAG.getContext(), TruncFloatEltBitsize);
+ unsigned TruncIntEltVT0SizeInBits = TruncIntEltVT0.getScalarSizeInBits();
+ EVT TruncIntEltVT =
+ getTypeToTransformTo(*DAG.getContext(), TruncIntEltVT0);
+ unsigned TruncIntEltVTSizeInBits = TruncIntEltVT.getScalarSizeInBits();
+ TruncFloatEltVT =
+ EVT::getFloatingPointVT(TruncIntEltVT.getScalarSizeInBits());
+ if (TruncIntEltVTSizeInBits >= TruncIntEltVT0SizeInBits)
+ break;
+ TruncFloatVT =
+ TruncFloatVT.changeElementType(*DAG.getContext(), TruncFloatEltVT);
+ TruncIntVT =
+ TruncIntVT.changeElementType(*DAG.getContext(), TruncIntEltVT);
+ } while (true);
+
+ if (NeedFPTrunc) {
+ if (isOperationLegalOrCustom(ISD::FP_ROUND, TruncFloatVT)) {
// Round to smaller float type, then bitcast to integer for sign check.
// Use TargetConstant for the truncation flag.
EVT PointerVT = getPointerTy(DAG.getDataLayout());
SDValue OpTrunc = DAG.getNode(ISD::FP_ROUND, DL, TruncFloatVT, Op,
DAG.getTargetConstant(0, DL, PointerVT));
- EVT TruncIntVT = TruncFloatVT.changeTypeToInteger();
SDValue OpTruncInt = DAG.getBitcast(TruncIntVT, OpTrunc);
SignBitResult =
DAG.getSetCC(DL, ResultVT, OpTruncInt,
@@ -9351,20 +9369,26 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
if (!DAG.isKnownNeverNaN(Op)) {
SDValue NotNaN = DAG.getSetCC(DL, ResultVT, Op, Op, ISD::SETO);
- SignBitResult =
- DAG.getNode(ISD::AND, DL, ResultVT, NotNaN, SignBitResult);
+ SDValue IsNaN = DAG.getSetCC(DL, ResultVT, Op, Op, ISD::SETUO);
+ if (testNaN)
+ SignBitResult =
+ DAG.getNode(ISD::OR, DL, ResultVT, IsNaN, SignBitResult);
+ else
+ SignBitResult =
+ DAG.getNode(ISD::AND, DL, ResultVT, NotNaN, SignBitResult);
}
bool IsICmpImmLegal =
- isLegalICmpImmediate(APInt::getAllOnes(IntVTBits).getZExtValue());
- if (!NeedFPCompare && (!DAG.isKnownNeverNaN(Op) || IsICmpImmLegal) &&
- Test == fcPositive) {
- ; // (fcPosInf | fcFinite) has better performance.
- } else if (Test == fcNegative)
- return SignBitResult;
- else
- return DAG.getNode(ISD::XOR, DL, ResultVT, SignBitResult,
- ResultInversionMask);
+ isLegalICmpImmediate(APInt::getAllOnes(BitSize).getSExtValue());
+ if (NeedFPTrunc || DAG.isKnownNeverNaN(Op) ||
+ (OperandVT.isVector() && isTypeLegal(OperandVT)) || testNegative ||
+ !IsICmpImmLegal) {
+ if (testNegative)
+ return SignBitResult;
+ else
+ return DAG.getNode(ISD::XOR, DL, ResultVT, SignBitResult,
+ ResultInversionMask);
+ }
}
if (IsF80)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.bf16.ll b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.bf16.ll
index 80ec6fa104b31..39634183197e8 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.bf16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.bf16.ll
@@ -2027,17 +2027,9 @@ define i1 @not_ispositive_bf16(bfloat %x) {
; GFX7CHECK-LABEL: not_ispositive_bf16:
; GFX7CHECK: ; %bb.0:
; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7CHECK-NEXT: v_and_b32_e32 v1, 0xffff, v0
-; GFX7CHECK-NEXT: v_lshlrev_b32_e32 v2, 16, v0
-; GFX7CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
-; GFX7CHECK-NEXT: s_movk_i32 s6, 0x7f80
-; GFX7CHECK-NEXT: v_cmp_gt_i32_e32 vcc, 0, v2
-; GFX7CHECK-NEXT: v_cmp_gt_i32_e64 s[4:5], s6, v0
-; GFX7CHECK-NEXT: s_mov_b32 s7, 0xff80
-; GFX7CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc
-; GFX7CHECK-NEXT: v_cmp_eq_u32_e32 vcc, s7, v1
-; GFX7CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc
-; GFX7CHECK-NEXT: v_cmp_lt_i32_e32 vcc, s6, v0
+; GFX7CHECK-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX7CHECK-NEXT: v_cmp_gt_i32_e32 vcc, 0, v0
+; GFX7CHECK-NEXT: v_cmp_u_f32_e64 s[4:5], v0, v0
; GFX7CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc
; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
; GFX7CHECK-NEXT: s_setpc_b64 s[30:31]
@@ -2045,15 +2037,9 @@ define i1 @not_ispositive_bf16(bfloat %x) {
; GFX8CHECK-LABEL: not_ispositive_bf16:
; GFX8CHECK: ; %bb.0:
; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8CHECK-NEXT: v_and_b32_e32 v1, 0x7fff, v0
-; GFX8CHECK-NEXT: s_movk_i32 s6, 0x7f80
; GFX8CHECK-NEXT: v_cmp_gt_i16_e32 vcc, 0, v0
-; GFX8CHECK-NEXT: v_cmp_gt_i16_e64 s[4:5], s6, v1
-; GFX8CHECK-NEXT: s_movk_i32 s7, 0xff80
-; GFX8CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc
-; GFX8CHECK-NEXT: v_cmp_eq_u16_e32 vcc, s7, v0
-; GFX8CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc
-; GFX8CHECK-NEXT: v_cmp_lt_i16_e32 vcc, s6, v1
+; GFX8CHECK-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX8CHECK-NEXT: v_cmp_u_f32_e64 s[4:5], v0, v0
; GFX8CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc
; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
; GFX8CHECK-NEXT: s_setpc_b64 s[30:31]
@@ -2061,15 +2047,9 @@ define i1 @not_ispositive_bf16(bfloat %x) {
; GFX9CHECK-LABEL: not_ispositive_bf16:
; GFX9CHECK: ; %bb.0:
; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9CHECK-NEXT: v_and_b32_e32 v1, 0x7fff, v0
-; GFX9CHECK-NEXT: s_movk_i32 s6, 0x7f80
; GFX9CHECK-NEXT: v_cmp_gt_i16_e32 vcc, 0, v0
-; GFX9CHECK-NEXT: v_cmp_gt_i16_e64 s[4:5], s6, v1
-; GFX9CHECK-NEXT: s_movk_i32 s7, 0xff80
-; GFX9CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc
-; GFX9CHECK-NEXT: v_cmp_eq_u16_e32 vcc, s7, v0
-; GFX9CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc
-; GFX9CHECK-NEXT: v_cmp_lt_i16_e32 vcc, s6, v1
+; GFX9CHECK-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX9CHECK-NEXT: v_cmp_u_f32_e64 s[4:5], v0, v0
; GFX9CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc
; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
; GFX9CHECK-NEXT: s_setpc_b64 s[30:31]
@@ -2077,42 +2057,32 @@ define i1 @not_ispositive_bf16(bfloat %x) {
; GFX10CHECK-LABEL: not_ispositive_bf16:
; GFX10CHECK: ; %bb.0:
; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10CHECK-NEXT: v_and_b32_e32 v1, 0x7fff, v0
+; GFX10CHECK-NEXT: v_lshlrev_b32_e32 v1, 16, v0
; GFX10CHECK-NEXT: v_cmp_gt_i16_e32 vcc_lo, 0, v0
-; GFX10CHECK-NEXT: v_cmp_eq_u16_e64 s5, 0xff80, v0
-; GFX10CHECK-NEXT: v_cmp_gt_i16_e64 s4, 0x7f80, v1
-; GFX10CHECK-NEXT: v_cmp_lt_i16_e64 s6, 0x7f80, v1
-; GFX10CHECK-NEXT: s_and_b32 s4, s4, vcc_lo
-; GFX10CHECK-NEXT: s_or_b32 s4, s4, s5
-; GFX10CHECK-NEXT: s_or_b32 s4, s4, s6
+; GFX10CHECK-NEXT: v_cmp_u_f32_e64 s4, v1, v1
+; GFX10CHECK-NEXT: s_or_b32 s4, s4, vcc_lo
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
;
; GFX11SELDAG-TRUE16-LABEL: not_ispositive_bf16:
; GFX11SELDAG-TRUE16: ; %bb.0:
; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11SELDAG-TRUE16-NEXT: v_and_b16 v0.h, 0x7fff, v0.l
-; GFX11SELDAG-TRUE16-NEXT: v_cmp_gt_i16_e32 vcc_lo, 0, v0.l
-; GFX11SELDAG-TRUE16-NEXT: v_cmp_eq_u16_e64 s1, 0xff80, v0.l
-; GFX11SELDAG-TRUE16-NEXT: v_cmp_gt_i16_e64 s0, 0x7f80, v0.h
-; GFX11SELDAG-TRUE16-NEXT: v_cmp_lt_i16_e64 s2, 0x7f80, v0.h
-; GFX11SELDAG-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
-; GFX11SELDAG-TRUE16-NEXT: s_or_b32 s0, s0, s1
-; GFX11SELDAG-TRUE16-NEXT: s_or_b32 s0, s0, s2
+; GFX11SELDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, 0
+; GFX11SELDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l
+; GFX11SELDAG-TRUE16-NEXT: v_mov_b16_e32 v2.h, v0.l
+; GFX11SELDAG-TRUE16-NEXT: v_cmp_gt_i16_e32 vcc_lo, 0, v1.l
+; GFX11SELDAG-TRUE16-NEXT: v_cmp_u_f32_e64 s0, v2, v2
+; GFX11SELDAG-TRUE16-NEXT: s_or_b32 s0, s0, vcc_lo
; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11SELDAG-FAKE16-LABEL: not_ispositive_bf16:
; GFX11SELDAG-FAKE16: ; %bb.0:
; GFX11SELDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11SELDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0x7fff, v0
+; GFX11SELDAG-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v0
; GFX11SELDAG-FAKE16-NEXT: v_cmp_gt_i16_e32 vcc_lo, 0, v0
-; GFX11SELDAG-FAKE16-NEXT: v_cmp_eq_u16_e64 s1, 0xff80, v0
-; GFX11SELDAG-FAKE16-NEXT: v_cmp_gt_i16_e64 s0, 0x7f80, v1
-; GFX11SELDAG-FAKE16-NEXT: v_cmp_lt_i16_e64 s2, 0x7f80, v1
-; GFX11SELDAG-FAKE16-NEXT: s_and_b32 s0, s0, vcc_lo
-; GFX11SELDAG-FAKE16-NEXT: s_or_b32 s0, s0, s1
-; GFX11SELDAG-FAKE16-NEXT: s_or_b32 s0, s0, s2
+; GFX11SELDAG-FAKE16-NEXT: v_cmp_u_f32_e64 s0, v1, v1
+; GFX11SELDAG-FAKE16-NEXT: s_or_b32 s0, s0, vcc_lo
; GFX11SELDAG-FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
; GFX11SELDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
%class = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 63) ; ~fcPositive
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
index 1b3527a0b5d49..6b186cc5a21ec 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
@@ -2962,17 +2962,10 @@ define i1 @not_ispositive_f16(half %x) {
; GFX7SELDAG-LABEL: not_ispositive_f16:
; GFX7SELDAG: ; %bb.0:
; GFX7SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7SELDAG-NEXT: v_and_b32_e32 v1, 0xffff, v0
-; GFX7SELDAG-NEXT: v_lshlrev_b32_e32 v2, 16, v0
-; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0x7fff, v0
-; GFX7SELDAG-NEXT: s_movk_i32 s6, 0x7c00
-; GFX7SELDAG-NEXT: v_cmp_gt_i32_e32 vcc, 0, v2
-; GFX7SELDAG-NEXT: v_cmp_gt_i32_e64 s[4:5], s6, v0
-; GFX7SELDAG-NEXT: s_mov_b32 s7, 0xfc00
-; GFX7SELDAG-NEXT: s_and_b64 s[4:5], s[4:5], vcc
-; GFX7SELDAG-NEXT: v_cmp_eq_u32_e32 vcc, s7, v1
-; GFX7SELDAG-NEXT: s_or_b64 s[4:5], s[4:5], vcc
-; GFX7SELDAG-NEXT: v_cmp_lt_i32_e32 vcc, s6, v0
+; GFX7SELDAG-NEXT: v_cvt_f32_f16_e32 v1, v0
+; GFX7SELDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX7SELDAG-NEXT: v_cmp_gt_i32_e32 vcc, 0, v0
+; GFX7SELDAG-NEXT: v_cmp_u_f32_e64 s[4:5], v1, v1
; GFX7SELDAG-NEXT: s_or_b64 s[4:5], s[4:5], vcc
; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
; GFX7SELDAG-NEXT: s_setpc_b64 s[30:31]
diff --git a/llvm/test/CodeGen/X86/is_fpclass.ll b/llvm/test/CodeGen/X86/is_fpclass.ll
index c933bba790954..d07c699a4b822 100644
--- a/llvm/test/CodeGen/X86/is_fpclass.ll
+++ b/llvm/test/CodeGen/X86/is_fpclass.ll
@@ -3058,10 +3058,11 @@ define <4 x i1> @isclass_positive_v4f32(<4 x float> %x) nounwind {
;
; X64-LABEL: isclass_positive_v4f32:
; X64: # %bb.0:
-; X64-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; X64-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; X64-NEXT: pcmpeqd %xmm1, %xmm1
-; X64-NEXT: pxor %xmm1, %xmm0
+; X64-NEXT: pxor %xmm1, %xmm1
+; X64-NEXT: pcmpgtd %xmm0, %xmm1
+; X64-NEXT: cmpordps %xmm0, %xmm0
+; X64-NEXT: andps %xmm1, %xmm0
+; X64-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-NEXT: retq
%ispos = call <4 x i1> @llvm.is.fpclass.v4f32(<4 x float> %x, i32 960)
ret <4 x i1> %ispos
@@ -3179,6 +3180,197 @@ define <4 x i1> @isclass_negative_v4f32(<4 x float> %x) nounwind {
ret <4 x i1> %isneg
}
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+define i1 @isclass_positive_fcnan(float %x) nounwind {
+; X86-LABEL: isclass_positive_fcnan:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001
+; X86-NEXT: setb %cl
+; X86-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001
+; X86-NEXT: setge %al
+; X86-NEXT: orb %cl, %al
+; X86-NEXT: retl
+;
+; X64-LABEL: isclass_positive_fcnan:
+; X64: # %bb.0:
+; X64-NEXT: movd %xmm0, %eax
+; X64-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001
+; X64-NEXT: setb %cl
+; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X64-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001
+; X64-NEXT: setge %al
+; X64-NEXT: orb %cl, %al
+; X64-NEXT: retq
+ %ispos = call i1 @llvm.is.fpclass.f32(float %x, i32 963)
+ ret i1 %ispos
+}
+
+define i1 @isclass_negative_fcnan(float %x) nounwind {
+; X86-LABEL: isclass_negative_fcnan:
+; X86: # %bb.0:
+; X86-NEXT: pushl %eax
+; X86-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NEXT: fsts (%esp)
+; X86-NEXT: cmpl $0, (%esp)
+; X86-NEXT: sets %cl
+; X86-NEXT: fucomp %st(0)
+; X86-NEXT: fnstsw %ax
+; X86-NEXT: # kill: def $ah killed $ah killed $ax
+; X86-NEXT: sahf
+; X86-NEXT: setp %al
+; X86-NEXT: orb %cl, %al
+; X86-NEXT: popl %ecx
+; X86-NEXT: retl
+;
+; X64-LABEL: isclass_negative_fcnan:
+; X64: # %bb.0:
+; X64-NEXT: movmskps %xmm0, %eax
+; X64-NEXT: ucomiss %xmm0, %xmm0
+; X64-NEXT: setp %cl
+; X64-NEXT: testb $1, %al
+; X64-NEXT: setne %al
+; X64-NEXT: orb %cl, %al
+; X64-NEXT: retq
+ %isneg = call i1 @llvm.is.fpclass.f32(float %x, i32 63)
+ ret i1 %isneg
+}
+
+define <4 x i1> @isclass_positive_fcnan_v4f32(<4 x float> %x) nounwind {
+; X86-LABEL: isclass_positive_fcnan_v4f32:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: cmpl $2139095041, %edi # imm = 0x7F800001
+; X86-NEXT: setb %bl
+; X86-NEXT: andl $2147483647, %edi # imm = 0x7FFFFFFF
+; X86-NEXT: cmpl $2139095041, %edi # imm = 0x7F800001
+; X86-NEXT: setge %bh
+; X86-NEXT: orb %bl, %bh
+; X86-NEXT: shlb $2, %bh
+; X86-NEXT: cmpl $2139095041, %esi # imm = 0x7F800001
+; X86-NEXT: setb %al
+; X86-NEXT: andl $2147483647, %esi # imm = 0x7FFFFFFF
+; X86-NEXT: cmpl $2139095041, %esi # imm = 0x7F800001
+; X86-NEXT: setge %bl
+; X86-NEXT: orb %al, %bl
+; X86-NEXT: shlb $3, %bl
+; X86-NEXT: orb %bh, %bl
+; X86-NEXT: cmpl $2139095041, %edx # imm = 0x7F800001
+; X86-NEXT: setb %al
+; X86-NEXT: andl $2147483647, %edx # imm = 0x7FFFFFFF
+; X86-NEXT: cmpl $2139095041, %edx # imm = 0x7F800001
+; X86-NEXT: setge %dl
+; X86-NEXT: orb %al, %dl
+; X86-NEXT: cmpl $2139095041, %ecx # imm = 0x7F800001
+; X86-NEXT: setb %al
+; X86-NEXT: andl $2147483647, %ecx # imm = 0x7FFFFFFF
+; X86-NEXT: cmpl $2139095041, %ecx # imm = 0x7F800001
+; X86-NEXT: setge %cl
+; X86-NEXT: orb %al, %cl
+; X86-NEXT: addb %cl, %cl
+; X86-NEXT: orb %dl, %cl
+; X86-NEXT: orb %bl, %cl
+; X86-NEXT: movb %cl, (%ebp)
+; X86-NEXT: movl %ebp, %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl $4
+;
+; X64-LABEL: isclass_positive_fcnan_v4f32:
+; X64: # %bb.0:
+; X64-NEXT: pxor %xmm1, %xmm1
+; X64-NEXT: pcmpgtd %xmm0, %xmm1
+; X64-NEXT: cmpunordps %xmm0, %xmm0
+; X64-NEXT: orps %xmm1, %xmm0
+; X64-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-NEXT: retq
+ %ispos = call <4 x i1> @llvm.is.fpclass.v4f32(<4 x float> %x, i32 963)
+ ret <4 x i1> %ispos
+}
+
+define <4 x i1> @isclass_negative_fcnan_v4f32(<4 x float> %x) nounwind {
+; X86-LABEL: isclass_negative_fcnan_v4f32:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebx
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NEXT: fsts {{[0-9]+}}(%esp)
+; X86-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NEXT: fsts {{[0-9]+}}(%esp)
+; X86-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NEXT: fsts (%esp)
+; X86-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NEXT: fsts {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: sets %dl
+; X86-NEXT: fxch %st(3)
+; X86-NEXT: fucomp %st(0)
+; X86-NEXT: fnstsw %ax
+; X86-NEXT: # kill: def $ah killed $ah killed $ax
+; X86-NEXT: sahf
+; X86-NEXT: setp %dh
+; X86-NEXT: orb %dl, %dh
+; X86-NEXT: shlb $2, %dh
+; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: sets %bl
+; X86-NEXT: fxch %st(1)
+; X86-NEXT: fucomp %st(0)
+; X86-NEXT: fnstsw %ax
+; X86-NEXT: # kill: def $ah killed $ah killed $ax
+; X86-NEXT: sahf
+; X86-NEXT: setp %dl
+; X86-NEXT: orb %bl, %dl
+; X86-NEXT: shlb $3, %dl
+; X86-NEXT: orb %dh, %dl
+; X86-NEXT: cmpl $0, (%esp)
+; X86-NEXT: sets %bl
+; X86-NEXT: fucomp %st(0)
+; X86-NEXT: fnstsw %ax
+; X86-NEXT: # kill: def $ah killed $ah killed $ax
+; X86-NEXT: sahf
+; X86-NEXT: setp %dh
+; X86-NEXT: orb %bl, %dh
+; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: sets %bl
+; X86-NEXT: fucomp %st(0)
+; X86-NEXT: fnstsw %ax
+; X86-NEXT: # kill: def $ah killed $ah killed $ax
+; X86-NEXT: sahf
+; X86-NEXT: setp %al
+; X86-NEXT: orb %bl, %al
+; X86-NEXT: addb %al, %al
+; X86-NEXT: orb %dh, %al
+; X86-NEXT: orb %dl, %al
+; X86-NEXT: movb %al, (%ecx)
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: popl %ebx
+; X86-NEXT: retl $4
+;
+; X64-LABEL: isclass_negative_fcnan_v4f32:
+; X64: # %bb.0:
+; X64-NEXT: pxor %xmm1, %xmm1
+; X64-NEXT: pcmpgtd %xmm0, %xmm1
+; X64-NEXT: cmpunordps %xmm0, %xmm0
+; X64-NEXT: orps %xmm1, %xmm0
+; X64-NEXT: retq
+ %isneg = call <4 x i1> @llvm.is.fpclass.v4f32(<4 x float> %x, i32 63)
+ ret <4 x i1> %isneg
+}
+
declare i1 @llvm.is.fpclass.f32(float, i32)
declare i1 @llvm.is.fpclass.f64(double, i32)
declare <1 x i1> @llvm.is.fpclass.v1f32(<1 x float>, i32)
>From f0c8b2823d5d64eec5d1b8e6cb708700ad14aeaa Mon Sep 17 00:00:00 2001
From: YunQiang Su <syq at debian.org>
Date: Wed, 11 Mar 2026 13:47:37 +0800
Subject: [PATCH 4/4] new function getFloatSign
---
.../CodeGen/SelectionDAG/TargetLowering.cpp | 114 ++++++++++--------
1 file changed, 62 insertions(+), 52 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index a22dc360b46fc..c5ea88dadf0e7 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -9088,6 +9088,61 @@ static std::optional<bool> isFCmpEqualZero(FPClassTest Test,
return std::nullopt;
}
+static SDValue getFloatSign(EVT ResultVT, bool &NeedFPTrunc, SDValue Op,
+ SelectionDAG &DAG, const SDLoc &DL,
+ const TargetLowering &TLI) {
+ EVT OperandVT = Op.getValueType();
+ unsigned BitSize = OperandVT.getScalarSizeInBits();
+ EVT IntVT = OperandVT.changeElementType(
+ *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), BitSize));
+ SDValue OpAsInt = DAG.getBitcast(IntVT, Op);
+ SDValue SignBitResult = DAG.getSetCC(
+ DL, ResultVT, OpAsInt, DAG.getConstant(0, DL, IntVT), ISD::SETLT);
+
+ NeedFPTrunc = false;
+ EVT TruncFloatVT = OperandVT;
+ EVT TruncIntVT = IntVT;
+ do {
+ unsigned TruncFloatEltBitsize = TruncFloatVT.getScalarSizeInBits();
+ unsigned TruncIntEltBitsize = TruncIntVT.getScalarSizeInBits();
+ if (TLI.isTypeLegal(TruncIntVT) && TLI.isTypeLegal(TruncFloatVT) &&
+ TruncFloatEltBitsize == TruncIntEltBitsize) {
+ if (TruncFloatVT != OperandVT)
+ NeedFPTrunc = true;
+ break;
+ }
+ EVT TruncFloatEltVT = TruncFloatVT.getScalarType();
+ EVT TruncIntEltVT0 =
+ EVT::getIntegerVT(*DAG.getContext(), TruncFloatEltBitsize);
+ unsigned TruncIntEltVT0SizeInBits = TruncIntEltVT0.getScalarSizeInBits();
+ EVT TruncIntEltVT =
+ TLI.getTypeToTransformTo(*DAG.getContext(), TruncIntEltVT0);
+ unsigned TruncIntEltVTSizeInBits = TruncIntEltVT.getScalarSizeInBits();
+ TruncFloatEltVT =
+ EVT::getFloatingPointVT(TruncIntEltVT.getScalarSizeInBits());
+ if (TruncIntEltVTSizeInBits >= TruncIntEltVT0SizeInBits)
+ break;
+ TruncFloatVT =
+ TruncFloatVT.changeElementType(*DAG.getContext(), TruncFloatEltVT);
+ TruncIntVT = TruncIntVT.changeElementType(*DAG.getContext(), TruncIntEltVT);
+ } while (true);
+
+ if (NeedFPTrunc &&
+ TLI.isOperationLegalOrCustom(ISD::FP_ROUND, TruncFloatVT)) {
+ // Round to smaller float type, then bitcast to integer for sign check.
+ // Use TargetConstant for the truncation flag.
+ EVT PointerVT = TLI.getPointerTy(DAG.getDataLayout());
+ SDValue OpTrunc = DAG.getNode(ISD::FP_ROUND, DL, TruncFloatVT, Op,
+ DAG.getTargetConstant(0, DL, PointerVT));
+ SDValue OpTruncInt = DAG.getBitcast(TruncIntVT, OpTrunc);
+ SignBitResult =
+ DAG.getSetCC(DL, ResultVT, OpTruncInt,
+ DAG.getConstant(0, DL, TruncIntVT), ISD::SETLT);
+ }
+
+ return SignBitResult;
+}
+
SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
const FPClassTest OrigTestMask,
SDNodeFlags Flags, const SDLoc &DL,
@@ -9315,67 +9370,22 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
FPClassTest FPTestNaN = Test & fcNan;
if ((FPTestSign == fcPositive || FPTestSign == fcNegative) &&
(FPTestNaN == fcNan || FPTestNaN == fcNone)) {
- SDValue SignBitResult;
bool testNegative = FPTestSign == fcNegative;
bool testNaN = FPTestNaN == fcNan;
-
bool NeedFPTrunc = false;
- EVT TruncFloatVT = OperandVT;
- EVT TruncIntVT = IntVT;
- do {
- unsigned TruncFloatEltBitsize = TruncFloatVT.getScalarSizeInBits();
- unsigned TruncIntEltBitsize = TruncIntVT.getScalarSizeInBits();
- if (isTypeLegal(TruncIntVT) && isTypeLegal(TruncFloatVT) &&
- TruncFloatEltBitsize == TruncIntEltBitsize) {
- if (TruncFloatVT != OperandVT)
- NeedFPTrunc = true;
- break;
- }
- EVT TruncFloatEltVT = TruncFloatVT.getScalarType();
- EVT TruncIntEltVT0 =
- EVT::getIntegerVT(*DAG.getContext(), TruncFloatEltBitsize);
- unsigned TruncIntEltVT0SizeInBits = TruncIntEltVT0.getScalarSizeInBits();
- EVT TruncIntEltVT =
- getTypeToTransformTo(*DAG.getContext(), TruncIntEltVT0);
- unsigned TruncIntEltVTSizeInBits = TruncIntEltVT.getScalarSizeInBits();
- TruncFloatEltVT =
- EVT::getFloatingPointVT(TruncIntEltVT.getScalarSizeInBits());
- if (TruncIntEltVTSizeInBits >= TruncIntEltVT0SizeInBits)
- break;
- TruncFloatVT =
- TruncFloatVT.changeElementType(*DAG.getContext(), TruncFloatEltVT);
- TruncIntVT =
- TruncIntVT.changeElementType(*DAG.getContext(), TruncIntEltVT);
- } while (true);
-
- if (NeedFPTrunc) {
- if (isOperationLegalOrCustom(ISD::FP_ROUND, TruncFloatVT)) {
- // Round to smaller float type, then bitcast to integer for sign check.
- // Use TargetConstant for the truncation flag.
- EVT PointerVT = getPointerTy(DAG.getDataLayout());
- SDValue OpTrunc = DAG.getNode(ISD::FP_ROUND, DL, TruncFloatVT, Op,
- DAG.getTargetConstant(0, DL, PointerVT));
- SDValue OpTruncInt = DAG.getBitcast(TruncIntVT, OpTrunc);
- SignBitResult =
- DAG.getSetCC(DL, ResultVT, OpTruncInt,
- DAG.getConstant(0, DL, TruncIntVT), ISD::SETLT);
- } else {
- // Fall back to original integer comparison.
- SignBitResult = SignV;
- }
- } else {
- SignBitResult = SignV;
- }
+ SDValue SignBitResult =
+ getFloatSign(ResultVT, NeedFPTrunc, Op, DAG, DL, *this);
if (!DAG.isKnownNeverNaN(Op)) {
- SDValue NotNaN = DAG.getSetCC(DL, ResultVT, Op, Op, ISD::SETO);
- SDValue IsNaN = DAG.getSetCC(DL, ResultVT, Op, Op, ISD::SETUO);
- if (testNaN)
+ if (testNaN) {
+ SDValue IsNaN = DAG.getSetCC(DL, ResultVT, Op, Op, ISD::SETUO);
SignBitResult =
DAG.getNode(ISD::OR, DL, ResultVT, IsNaN, SignBitResult);
- else
+ } else {
+ SDValue NotNaN = DAG.getSetCC(DL, ResultVT, Op, Op, ISD::SETO);
SignBitResult =
DAG.getNode(ISD::AND, DL, ResultVT, NotNaN, SignBitResult);
+ }
}
bool IsICmpImmLegal =
More information about the llvm-commits
mailing list