[llvm] [RISCV][GISel] Use libcalls for f32/f64 G_FCMP without F/D extensions. (PR #117660)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 25 17:57:30 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
Author: Craig Topper (topperc)
<details>
<summary>Changes</summary>
LegalizerHelp only supported f128 libcalls and incorrectly assumed that the destination register for the G_FCMP was s32.
---
Patch is 42.83 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/117660.diff
4 Files Affected:
- (modified) llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp (+42-22)
- (modified) llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp (+2-1)
- (added) llvm/test/CodeGen/RISCV/GlobalISel/double-fcmp.ll (+584)
- (added) llvm/test/CodeGen/RISCV/GlobalISel/float-fcmp.ll (+568)
``````````diff
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 321760ef822bc2..29c7d805ea81e2 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -1001,23 +1001,36 @@ LegalizerHelper::createSetStateLibcall(MachineIRBuilder &MIRBuilder,
/// the ICMP predicate that should be generated to compare with #0
/// after the libcall.
static std::pair<RTLIB::Libcall, CmpInst::Predicate>
-getFCMPLibcallDesc(const CmpInst::Predicate Pred) {
+getFCMPLibcallDesc(const CmpInst::Predicate Pred, unsigned Size) {
+#define RTLIBCASE_CMP(LibcallPrefix, ICmpPred) \
+ do { \
+ switch (Size) { \
+ case 32: \
+ return {RTLIB::LibcallPrefix##32, ICmpPred}; \
+ case 64: \
+ return {RTLIB::LibcallPrefix##64, ICmpPred}; \
+ case 128: \
+ return {RTLIB::LibcallPrefix##128, ICmpPred}; \
+ default: \
+ llvm_unreachable("unexpected size"); \
+ } \
+ } while (0)
switch (Pred) {
case CmpInst::FCMP_OEQ:
- return {RTLIB::OEQ_F128, CmpInst::ICMP_EQ};
+ RTLIBCASE_CMP(OEQ_F, CmpInst::ICMP_EQ);
case CmpInst::FCMP_UNE:
- return {RTLIB::UNE_F128, CmpInst::ICMP_NE};
+ RTLIBCASE_CMP(UNE_F, CmpInst::ICMP_NE);
case CmpInst::FCMP_OGE:
- return {RTLIB::OGE_F128, CmpInst::ICMP_SGE};
+ RTLIBCASE_CMP(OGE_F, CmpInst::ICMP_SGE);
case CmpInst::FCMP_OLT:
- return {RTLIB::OLT_F128, CmpInst::ICMP_SLT};
+ RTLIBCASE_CMP(OLT_F, CmpInst::ICMP_SLT);
case CmpInst::FCMP_OLE:
- return {RTLIB::OLE_F128, CmpInst::ICMP_SLE};
+ RTLIBCASE_CMP(OLE_F, CmpInst::ICMP_SLE);
case CmpInst::FCMP_OGT:
- return {RTLIB::OGT_F128, CmpInst::ICMP_SGT};
+ RTLIBCASE_CMP(OGT_F, CmpInst::ICMP_SGT);
case CmpInst::FCMP_UNO:
- return {RTLIB::UO_F128, CmpInst::ICMP_NE};
+ RTLIBCASE_CMP(UO_F, CmpInst::ICMP_NE);
default:
return {RTLIB::UNKNOWN_LIBCALL, CmpInst::BAD_ICMP_PREDICATE};
}
@@ -1032,21 +1045,24 @@ LegalizerHelper::createFCMPLibcall(MachineIRBuilder &MIRBuilder,
const GFCmp *Cmp = cast<GFCmp>(&MI);
LLT OpLLT = MRI.getType(Cmp->getLHSReg());
- if (OpLLT != LLT::scalar(128) || OpLLT != MRI.getType(Cmp->getRHSReg()))
+ unsigned Size = OpLLT.getSizeInBits();
+ if ((Size != 32 && Size != 64 && Size != 128) ||
+ OpLLT != MRI.getType(Cmp->getRHSReg()))
return UnableToLegalize;
Type *OpType = getFloatTypeForLLT(Ctx, OpLLT);
// DstReg type is s32
const Register DstReg = Cmp->getReg(0);
+ LLT DstTy = MRI.getType(DstReg);
const auto Cond = Cmp->getCond();
// Reference:
// https://gcc.gnu.org/onlinedocs/gccint/Soft-float-library-routines.html#Comparison-functions-1
// Generates a libcall followed by ICMP.
- const auto BuildLibcall =
- [&](const RTLIB::Libcall Libcall, const CmpInst::Predicate ICmpPred,
- const DstOp &Res = LLT::scalar(32)) -> Register {
+ const auto BuildLibcall = [&](const RTLIB::Libcall Libcall,
+ const CmpInst::Predicate ICmpPred,
+ const DstOp &Res) -> Register {
// FCMP libcall always returns an i32, and needs an ICMP with #0.
constexpr LLT TempLLT = LLT::scalar(32);
Register Temp = MRI.createGenericVirtualRegister(TempLLT);
@@ -1065,7 +1081,7 @@ LegalizerHelper::createFCMPLibcall(MachineIRBuilder &MIRBuilder,
};
// Simple case if we have a direct mapping from predicate to libcall
- if (const auto [Libcall, ICmpPred] = getFCMPLibcallDesc(Cond);
+ if (const auto [Libcall, ICmpPred] = getFCMPLibcallDesc(Cond, Size);
Libcall != RTLIB::UNKNOWN_LIBCALL &&
ICmpPred != CmpInst::BAD_ICMP_PREDICATE) {
if (BuildLibcall(Libcall, ICmpPred, DstReg)) {
@@ -1081,11 +1097,13 @@ LegalizerHelper::createFCMPLibcall(MachineIRBuilder &MIRBuilder,
// FCMP_UEQ: unordered or equal
// Convert into (FCMP_OEQ || FCMP_UNO).
- const auto [OeqLibcall, OeqPred] = getFCMPLibcallDesc(CmpInst::FCMP_OEQ);
- const auto Oeq = BuildLibcall(OeqLibcall, OeqPred);
+ const auto [OeqLibcall, OeqPred] =
+ getFCMPLibcallDesc(CmpInst::FCMP_OEQ, Size);
+ const auto Oeq = BuildLibcall(OeqLibcall, OeqPred, DstTy);
- const auto [UnoLibcall, UnoPred] = getFCMPLibcallDesc(CmpInst::FCMP_UNO);
- const auto Uno = BuildLibcall(UnoLibcall, UnoPred);
+ const auto [UnoLibcall, UnoPred] =
+ getFCMPLibcallDesc(CmpInst::FCMP_UNO, Size);
+ const auto Uno = BuildLibcall(UnoLibcall, UnoPred, DstTy);
if (Oeq && Uno)
MIRBuilder.buildOr(DstReg, Oeq, Uno);
else
@@ -1100,13 +1118,15 @@ LegalizerHelper::createFCMPLibcall(MachineIRBuilder &MIRBuilder,
// We inverse the predicate instead of generating a NOT
// to save one instruction.
// On AArch64 isel can even select two cmp into a single ccmp.
- const auto [OeqLibcall, OeqPred] = getFCMPLibcallDesc(CmpInst::FCMP_OEQ);
+ const auto [OeqLibcall, OeqPred] =
+ getFCMPLibcallDesc(CmpInst::FCMP_OEQ, Size);
const auto NotOeq =
- BuildLibcall(OeqLibcall, CmpInst::getInversePredicate(OeqPred));
+ BuildLibcall(OeqLibcall, CmpInst::getInversePredicate(OeqPred), DstTy);
- const auto [UnoLibcall, UnoPred] = getFCMPLibcallDesc(CmpInst::FCMP_UNO);
+ const auto [UnoLibcall, UnoPred] =
+ getFCMPLibcallDesc(CmpInst::FCMP_UNO, Size);
const auto NotUno =
- BuildLibcall(UnoLibcall, CmpInst::getInversePredicate(UnoPred));
+ BuildLibcall(UnoLibcall, CmpInst::getInversePredicate(UnoPred), DstTy);
if (NotOeq && NotUno)
MIRBuilder.buildAnd(DstReg, NotOeq, NotUno);
@@ -1128,7 +1148,7 @@ LegalizerHelper::createFCMPLibcall(MachineIRBuilder &MIRBuilder,
// MIRBuilder.buildFCmp(CmpInst::getInversePredicate(Pred), PredTy,
// Op1, Op2));
const auto [InversedLibcall, InversedPred] =
- getFCMPLibcallDesc(CmpInst::getInversePredicate(Cond));
+ getFCMPLibcallDesc(CmpInst::getInversePredicate(Cond), Size);
if (!BuildLibcall(InversedLibcall,
CmpInst::getInversePredicate(InversedPred), DstReg))
return UnableToLegalize;
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
index a9294e76f8763f..e7166fdab8c268 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
@@ -530,7 +530,8 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST)
.legalFor(ST.hasStdExtF(), {{sXLen, s32}})
.legalFor(ST.hasStdExtD(), {{sXLen, s64}})
.legalFor(ST.hasStdExtZfh(), {{sXLen, s16}})
- .clampScalar(ST.hasStdExtF(), 0, sXLen, sXLen);
+ .clampScalar(0, sXLen, sXLen)
+ .libcallFor({{sXLen, s32}, {sXLen, s64}});
// TODO: Support vector version of G_IS_FPCLASS.
getActionDefinitionsBuilder(G_IS_FPCLASS)
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/double-fcmp.ll b/llvm/test/CodeGen/RISCV/GlobalISel/double-fcmp.ll
new file mode 100644
index 00000000000000..ce3bd794477302
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/double-fcmp.ll
@@ -0,0 +1,584 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -global-isel -mattr=+d -verify-machineinstrs < %s \
+; RUN: -target-abi=ilp32d | FileCheck -check-prefix=CHECKIFD %s
+; RUN: llc -mtriple=riscv64 -global-isel -mattr=+d -verify-machineinstrs < %s \
+; RUN: -target-abi=lp64d | FileCheck -check-prefix=CHECKIFD %s
+; RUN: llc -mtriple=riscv32 -global-isel -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefix=RV32I %s
+; RUN: llc -mtriple=riscv64 -global-isel -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefix=RV64I %s
+
+define i32 @fcmp_false(double %a, double %b) nounwind {
+; CHECKIFD-LABEL: fcmp_false:
+; CHECKIFD: # %bb.0:
+; CHECKIFD-NEXT: li a0, 0
+; CHECKIFD-NEXT: ret
+;
+; RV32I-LABEL: fcmp_false:
+; RV32I: # %bb.0:
+; RV32I-NEXT: li a0, 0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: fcmp_false:
+; RV64I: # %bb.0:
+; RV64I-NEXT: li a0, 0
+; RV64I-NEXT: ret
+ %1 = fcmp false double %a, %b
+ %2 = zext i1 %1 to i32
+ ret i32 %2
+}
+
+define i32 @fcmp_oeq(double %a, double %b) nounwind {
+; CHECKIFD-LABEL: fcmp_oeq:
+; CHECKIFD: # %bb.0:
+; CHECKIFD-NEXT: feq.d a0, fa0, fa1
+; CHECKIFD-NEXT: ret
+;
+; RV32I-LABEL: fcmp_oeq:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: call __eqdf2
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: fcmp_oeq:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: call __eqdf2
+; RV64I-NEXT: slli a0, a0, 32
+; RV64I-NEXT: srli a0, a0, 32
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+ %1 = fcmp oeq double %a, %b
+ %2 = zext i1 %1 to i32
+ ret i32 %2
+}
+
+define i32 @fcmp_ogt(double %a, double %b) nounwind {
+; CHECKIFD-LABEL: fcmp_ogt:
+; CHECKIFD: # %bb.0:
+; CHECKIFD-NEXT: flt.d a0, fa1, fa0
+; CHECKIFD-NEXT: ret
+;
+; RV32I-LABEL: fcmp_ogt:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: call __gtdf2
+; RV32I-NEXT: sgtz a0, a0
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: fcmp_ogt:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: call __gtdf2
+; RV64I-NEXT: sext.w a0, a0
+; RV64I-NEXT: sgtz a0, a0
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+ %1 = fcmp ogt double %a, %b
+ %2 = zext i1 %1 to i32
+ ret i32 %2
+}
+
+define i32 @fcmp_oge(double %a, double %b) nounwind {
+; CHECKIFD-LABEL: fcmp_oge:
+; CHECKIFD: # %bb.0:
+; CHECKIFD-NEXT: fle.d a0, fa1, fa0
+; CHECKIFD-NEXT: ret
+;
+; RV32I-LABEL: fcmp_oge:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: call __gedf2
+; RV32I-NEXT: slti a0, a0, 0
+; RV32I-NEXT: xori a0, a0, 1
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: fcmp_oge:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: call __gedf2
+; RV64I-NEXT: sext.w a0, a0
+; RV64I-NEXT: slti a0, a0, 0
+; RV64I-NEXT: xori a0, a0, 1
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+ %1 = fcmp oge double %a, %b
+ %2 = zext i1 %1 to i32
+ ret i32 %2
+}
+
+define i32 @fcmp_olt(double %a, double %b) nounwind {
+; CHECKIFD-LABEL: fcmp_olt:
+; CHECKIFD: # %bb.0:
+; CHECKIFD-NEXT: flt.d a0, fa0, fa1
+; CHECKIFD-NEXT: ret
+;
+; RV32I-LABEL: fcmp_olt:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: call __ltdf2
+; RV32I-NEXT: slti a0, a0, 0
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: fcmp_olt:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: call __ltdf2
+; RV64I-NEXT: sext.w a0, a0
+; RV64I-NEXT: slti a0, a0, 0
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+ %1 = fcmp olt double %a, %b
+ %2 = zext i1 %1 to i32
+ ret i32 %2
+}
+
+define i32 @fcmp_ole(double %a, double %b) nounwind {
+; CHECKIFD-LABEL: fcmp_ole:
+; CHECKIFD: # %bb.0:
+; CHECKIFD-NEXT: fle.d a0, fa0, fa1
+; CHECKIFD-NEXT: ret
+;
+; RV32I-LABEL: fcmp_ole:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: call __ledf2
+; RV32I-NEXT: sgtz a0, a0
+; RV32I-NEXT: xori a0, a0, 1
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: fcmp_ole:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: call __ledf2
+; RV64I-NEXT: sext.w a0, a0
+; RV64I-NEXT: sgtz a0, a0
+; RV64I-NEXT: xori a0, a0, 1
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+ %1 = fcmp ole double %a, %b
+ %2 = zext i1 %1 to i32
+ ret i32 %2
+}
+
+define i32 @fcmp_one(double %a, double %b) nounwind {
+; CHECKIFD-LABEL: fcmp_one:
+; CHECKIFD: # %bb.0:
+; CHECKIFD-NEXT: flt.d a0, fa0, fa1
+; CHECKIFD-NEXT: flt.d a1, fa1, fa0
+; CHECKIFD-NEXT: or a0, a0, a1
+; CHECKIFD-NEXT: ret
+;
+; RV32I-LABEL: fcmp_one:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: mv s0, a0
+; RV32I-NEXT: mv s1, a1
+; RV32I-NEXT: mv s2, a2
+; RV32I-NEXT: mv s3, a3
+; RV32I-NEXT: call __eqdf2
+; RV32I-NEXT: snez s4, a0
+; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: mv a1, s1
+; RV32I-NEXT: mv a2, s2
+; RV32I-NEXT: mv a3, s3
+; RV32I-NEXT: call __unorddf2
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a0, s4, a0
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: fcmp_one:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
+; RV64I-NEXT: mv s0, a0
+; RV64I-NEXT: mv s1, a1
+; RV64I-NEXT: call __eqdf2
+; RV64I-NEXT: slli a0, a0, 32
+; RV64I-NEXT: srli a0, a0, 32
+; RV64I-NEXT: snez s2, a0
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: mv a1, s1
+; RV64I-NEXT: call __unorddf2
+; RV64I-NEXT: slli a0, a0, 32
+; RV64I-NEXT: srli a0, a0, 32
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: and a0, s2, a0
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ret
+ %1 = fcmp one double %a, %b
+ %2 = zext i1 %1 to i32
+ ret i32 %2
+}
+
+define i32 @fcmp_ord(double %a, double %b) nounwind {
+; CHECKIFD-LABEL: fcmp_ord:
+; CHECKIFD: # %bb.0:
+; CHECKIFD-NEXT: feq.d a0, fa0, fa0
+; CHECKIFD-NEXT: feq.d a1, fa1, fa1
+; CHECKIFD-NEXT: and a0, a0, a1
+; CHECKIFD-NEXT: ret
+;
+; RV32I-LABEL: fcmp_ord:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: call __unorddf2
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: fcmp_ord:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: call __unorddf2
+; RV64I-NEXT: slli a0, a0, 32
+; RV64I-NEXT: srli a0, a0, 32
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+ %1 = fcmp ord double %a, %b
+ %2 = zext i1 %1 to i32
+ ret i32 %2
+}
+
+define i32 @fcmp_ueq(double %a, double %b) nounwind {
+; CHECKIFD-LABEL: fcmp_ueq:
+; CHECKIFD: # %bb.0:
+; CHECKIFD-NEXT: flt.d a0, fa0, fa1
+; CHECKIFD-NEXT: flt.d a1, fa1, fa0
+; CHECKIFD-NEXT: or a0, a0, a1
+; CHECKIFD-NEXT: xori a0, a0, 1
+; CHECKIFD-NEXT: ret
+;
+; RV32I-LABEL: fcmp_ueq:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: mv s0, a0
+; RV32I-NEXT: mv s1, a1
+; RV32I-NEXT: mv s2, a2
+; RV32I-NEXT: mv s3, a3
+; RV32I-NEXT: call __eqdf2
+; RV32I-NEXT: seqz s4, a0
+; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: mv a1, s1
+; RV32I-NEXT: mv a2, s2
+; RV32I-NEXT: mv a3, s3
+; RV32I-NEXT: call __unorddf2
+; RV32I-NEXT: snez a0, a0
+; RV32I-NEXT: or a0, s4, a0
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: fcmp_ueq:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
+; RV64I-NEXT: mv s0, a0
+; RV64I-NEXT: mv s1, a1
+; RV64I-NEXT: call __eqdf2
+; RV64I-NEXT: slli a0, a0, 32
+; RV64I-NEXT: srli a0, a0, 32
+; RV64I-NEXT: seqz s2, a0
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: mv a1, s1
+; RV64I-NEXT: call __unorddf2
+; RV64I-NEXT: slli a0, a0, 32
+; RV64I-NEXT: srli a0, a0, 32
+; RV64I-NEXT: snez a0, a0
+; RV64I-NEXT: or a0, s2, a0
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ret
+ %1 = fcmp ueq double %a, %b
+ %2 = zext i1 %1 to i32
+ ret i32 %2
+}
+
+define i32 @fcmp_ugt(double %a, double %b) nounwind {
+; CHECKIFD-LABEL: fcmp_ugt:
+; CHECKIFD: # %bb.0:
+; CHECKIFD-NEXT: fle.d a0, fa0, fa1
+; CHECKIFD-NEXT: xori a0, a0, 1
+; CHECKIFD-NEXT: ret
+;
+; RV32I-LABEL: fcmp_ugt:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: call __ledf2
+; RV32I-NEXT: sgtz a0, a0
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: fcmp_ugt:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: call __ledf2
+; RV64I-NEXT: sext.w a0, a0
+; RV64I-NEXT: sgtz a0, a0
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-N...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/117660
More information about the llvm-commits
mailing list